cranium 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +21 -0
  3. data/.ruby-version +1 -0
  4. data/Gemfile +4 -0
  5. data/LICENSE.txt +22 -0
  6. data/README.md +29 -0
  7. data/Rakefile +3 -0
  8. data/Vagrantfile +24 -0
  9. data/bin/cranium +9 -0
  10. data/config/cucumber.yml +9 -0
  11. data/cranium.gemspec +26 -0
  12. data/db/setup.sql +8 -0
  13. data/docker-compose.yml +8 -0
  14. data/examples/config.rb +14 -0
  15. data/examples/deduplication.rb +27 -0
  16. data/examples/import_csv_with_field_lookup_inserting_new_dimension_keys.rb +26 -0
  17. data/examples/incremental_extract.rb +17 -0
  18. data/examples/lookup_with_multiple_fields.rb +25 -0
  19. data/features/archive.feature +49 -0
  20. data/features/extract/incremental_extract.feature +56 -0
  21. data/features/extract/simple_extract.feature +85 -0
  22. data/features/import/import_csv_to_database_as_delta.feature +38 -0
  23. data/features/import/import_csv_to_database_with_delete_insert_merging.feature +51 -0
  24. data/features/import/import_csv_to_database_with_truncate_insert.feature +49 -0
  25. data/features/import/import_csv_to_database_with_update_merging.feature +46 -0
  26. data/features/import/import_csv_with_always_inserting_new_dimension_keys.feature +137 -0
  27. data/features/import/import_csv_with_field_lookup_inserting_new_dimension_keys.feature +62 -0
  28. data/features/import/import_csv_with_field_lookup_transformation.feature +125 -0
  29. data/features/import/import_csv_with_transformation.feature +55 -0
  30. data/features/import/import_multiple_csv_files_without_transformations.feature +44 -0
  31. data/features/import/import_with_load_id_from_sequence.feature +53 -0
  32. data/features/import/import_with_lookup_from_multiple_fields.feature +64 -0
  33. data/features/read.feature +56 -0
  34. data/features/remove.feature +44 -0
  35. data/features/restore_database_connection.feature +55 -0
  36. data/features/step_definitions/database_table_steps.rb +40 -0
  37. data/features/step_definitions/definition_steps.rb +3 -0
  38. data/features/step_definitions/execution_steps.rb +23 -0
  39. data/features/step_definitions/file_steps.rb +39 -0
  40. data/features/support/class_extensions.rb +24 -0
  41. data/features/support/env.rb +27 -0
  42. data/features/support/randomize.rb +22 -0
  43. data/features/support/stop_on_first_error.rb +5 -0
  44. data/features/transform/deduplication.feature +37 -0
  45. data/features/transform/empty_transformation.feature +72 -0
  46. data/features/transform/join.feature +180 -0
  47. data/features/transform/join_multiple_files_into_one_output_file.feature +46 -0
  48. data/features/transform/output_rows.feature +70 -0
  49. data/features/transform/projection.feature +34 -0
  50. data/features/transform/raw_ruby_transformation.feature +69 -0
  51. data/features/transform/split_field.feature +39 -0
  52. data/lib/cranium/application.rb +104 -0
  53. data/lib/cranium/archiver.rb +36 -0
  54. data/lib/cranium/attribute_dsl.rb +43 -0
  55. data/lib/cranium/command_line_options.rb +27 -0
  56. data/lib/cranium/configuration.rb +33 -0
  57. data/lib/cranium/data_importer.rb +35 -0
  58. data/lib/cranium/data_reader.rb +48 -0
  59. data/lib/cranium/data_transformer.rb +126 -0
  60. data/lib/cranium/database.rb +36 -0
  61. data/lib/cranium/definition_registry.rb +21 -0
  62. data/lib/cranium/dimension_manager.rb +65 -0
  63. data/lib/cranium/dsl/database_definition.rb +23 -0
  64. data/lib/cranium/dsl/extract_definition.rb +28 -0
  65. data/lib/cranium/dsl/import_definition.rb +50 -0
  66. data/lib/cranium/dsl/source_definition.rb +67 -0
  67. data/lib/cranium/dsl.rb +100 -0
  68. data/lib/cranium/extensions/file.rb +7 -0
  69. data/lib/cranium/extensions/sequel_greenplum.rb +30 -0
  70. data/lib/cranium/external_table.rb +75 -0
  71. data/lib/cranium/extract/data_extractor.rb +11 -0
  72. data/lib/cranium/extract/storage.rb +57 -0
  73. data/lib/cranium/extract/strategy/base.rb +27 -0
  74. data/lib/cranium/extract/strategy/incremental.rb +16 -0
  75. data/lib/cranium/extract/strategy/simple.rb +9 -0
  76. data/lib/cranium/extract/strategy.rb +7 -0
  77. data/lib/cranium/extract.rb +7 -0
  78. data/lib/cranium/import_strategy/base.rb +55 -0
  79. data/lib/cranium/import_strategy/delete_insert.rb +40 -0
  80. data/lib/cranium/import_strategy/delta.rb +8 -0
  81. data/lib/cranium/import_strategy/merge.rb +50 -0
  82. data/lib/cranium/import_strategy/truncate_insert.rb +19 -0
  83. data/lib/cranium/import_strategy.rb +9 -0
  84. data/lib/cranium/logging.rb +15 -0
  85. data/lib/cranium/profiling.rb +13 -0
  86. data/lib/cranium/progress_output.rb +37 -0
  87. data/lib/cranium/sequel/hash.rb +32 -0
  88. data/lib/cranium/sequel.rb +5 -0
  89. data/lib/cranium/source_registry.rb +21 -0
  90. data/lib/cranium/test_framework/cucumber_table.rb +140 -0
  91. data/lib/cranium/test_framework/database_entity.rb +29 -0
  92. data/lib/cranium/test_framework/database_sequence.rb +16 -0
  93. data/lib/cranium/test_framework/database_table.rb +33 -0
  94. data/lib/cranium/test_framework/upload_directory.rb +39 -0
  95. data/lib/cranium/test_framework/world.rb +66 -0
  96. data/lib/cranium/test_framework.rb +10 -0
  97. data/lib/cranium/transformation/duplication_index.rb +42 -0
  98. data/lib/cranium/transformation/index.rb +83 -0
  99. data/lib/cranium/transformation/join.rb +141 -0
  100. data/lib/cranium/transformation/sequence.rb +42 -0
  101. data/lib/cranium/transformation.rb +8 -0
  102. data/lib/cranium/transformation_record.rb +45 -0
  103. data/lib/cranium.rb +57 -0
  104. data/rake/test.rake +31 -0
  105. data/spec/cranium/application_spec.rb +166 -0
  106. data/spec/cranium/archiver_spec.rb +44 -0
  107. data/spec/cranium/command_line_options_spec.rb +32 -0
  108. data/spec/cranium/configuration_spec.rb +31 -0
  109. data/spec/cranium/data_importer_spec.rb +55 -0
  110. data/spec/cranium/data_transformer_spec.rb +16 -0
  111. data/spec/cranium/database_spec.rb +69 -0
  112. data/spec/cranium/definition_registry_spec.rb +45 -0
  113. data/spec/cranium/dimension_manager_spec.rb +63 -0
  114. data/spec/cranium/dsl/database_definition_spec.rb +23 -0
  115. data/spec/cranium/dsl/extract_definition_spec.rb +76 -0
  116. data/spec/cranium/dsl/import_definition_spec.rb +153 -0
  117. data/spec/cranium/dsl/source_definition_spec.rb +84 -0
  118. data/spec/cranium/dsl_spec.rb +119 -0
  119. data/spec/cranium/external_table_spec.rb +71 -0
  120. data/spec/cranium/extract/storage_spec.rb +125 -0
  121. data/spec/cranium/logging_spec.rb +37 -0
  122. data/spec/cranium/sequel/hash_spec.rb +56 -0
  123. data/spec/cranium/source_registry_spec.rb +31 -0
  124. data/spec/cranium/test_framework/cucumber_table_spec.rb +144 -0
  125. data/spec/cranium/transformation/duplication_index_spec.rb +75 -0
  126. data/spec/cranium/transformation/index_spec.rb +178 -0
  127. data/spec/cranium/transformation/join_spec.rb +43 -0
  128. data/spec/cranium/transformation/sequence_spec.rb +83 -0
  129. data/spec/cranium/transformation_record_spec.rb +78 -0
  130. data/spec/cranium_spec.rb +53 -0
  131. data/spec/spec_helper.rb +1 -0
  132. metadata +362 -0
@@ -0,0 +1,69 @@
1
+ Feature: Raw Ruby transformation
2
+
3
+ Scenario: A transform block can use the record as a Hash
4
+ Given a "products.csv" data file containing:
5
+ """
6
+ id,name,category
7
+ JNI-123,Just a product name,Main category > Subcategory > Sub-subcategory
8
+ CDI-234,Another product name,Smart Insight > Cool stuff > Scripts
9
+ """
10
+ And the following definition:
11
+ """
12
+ source :products do
13
+ field :id, String
14
+ field :name, String
15
+ field :category, String
16
+ end
17
+
18
+ source :transformed_products do
19
+ field :item, String
20
+ field :title, String
21
+ field :category, String
22
+ end
23
+
24
+ transform :products => :transformed_products do |record|
25
+ record[:item] = "*#{record[:id]}*"
26
+ record[:title] = record[:name].chars.first
27
+ output record
28
+ end
29
+ """
30
+ When I execute the definition
31
+ Then the process should exit successfully
32
+ And there should be a "transformed_products.csv" data file in the upload directory containing:
33
+ """
34
+ item,title,category
35
+ *JNI-123*,J,Main category > Subcategory > Sub-subcategory
36
+ *CDI-234*,A,Smart Insight > Cool stuff > Scripts
37
+ """
38
+
39
+
40
+ Scenario: Records can be skipped
41
+ Given a "products.csv" data file containing:
42
+ """
43
+ id
44
+ 1
45
+ 2
46
+ 3
47
+ """
48
+ And the following definition:
49
+ """
50
+ source :products do
51
+ field :id, Integer
52
+ end
53
+
54
+ source :transformed_products do
55
+ field :id, Integer
56
+ end
57
+
58
+ transform :products => :transformed_products do |record|
59
+ output record unless "2" == record[:id]
60
+ end
61
+ """
62
+ When I execute the definition
63
+ Then the process should exit successfully
64
+ And there should be a "transformed_products.csv" data file in the upload directory containing:
65
+ """
66
+ id
67
+ 1
68
+ 3
69
+ """
@@ -0,0 +1,39 @@
1
+ Feature: Split field
2
+
3
+ Scenario: A single field can be split into multiple fields
4
+ Given a "products.csv" data file containing:
5
+ """
6
+ id,name,category
7
+ JNI-123,Just a product name,Main category > Subcategory > Sub-subcategory > Ultra-subcategory
8
+ CDI-234,Another product name,Smart Insight > Cool stuff | 3dim > 2dim > 1dim
9
+ """
10
+ And the following definition:
11
+ """
12
+ source :products do
13
+ field :item, String
14
+ field :title, String
15
+ field :category, String
16
+ end
17
+
18
+ source :transformed_products do
19
+ field :item, String
20
+ field :title, String
21
+ field :main_category, String
22
+ field :sub_category, String
23
+ field :department, String
24
+ end
25
+
26
+ transform :products => :transformed_products do |record|
27
+ record.split_field :category, into: [:category], by: "|"
28
+ record.split_field :category, into: [:main_category, :sub_category, :department], by: ">"
29
+ output record
30
+ end
31
+ """
32
+ When I execute the definition
33
+ Then the process should exit successfully
34
+ And there should be a "transformed_products.csv" data file in the upload directory containing:
35
+ """
36
+ item,title,main_category,sub_category,department
37
+ JNI-123,Just a product name,Main category,Subcategory,Sub-subcategory
38
+ CDI-234,Another product name,Smart Insight,Cool stuff,Cool stuff
39
+ """
@@ -0,0 +1,104 @@
1
+ class Cranium::Application
2
+
3
+ include Cranium::Logging
4
+
5
+ attr_reader :sources
6
+
7
+
8
+
9
+ def initialize(arguments)
10
+ @sources = Cranium::SourceRegistry.new
11
+ @hooks = {}
12
+
13
+ @options = Cranium::CommandLineOptions.new arguments
14
+ end
15
+
16
+
17
+
18
+ def load_arguments
19
+ options.load_arguments
20
+ end
21
+
22
+
23
+
24
+ def cranium_arguments
25
+ options.cranium_arguments
26
+ end
27
+
28
+
29
+
30
+ def register_source(name, &block)
31
+ @sources.register_source(name, &block).resolve_files
32
+ end
33
+
34
+
35
+
36
+ def run
37
+ process_file = validate_file options.cranium_arguments[:load]
38
+
39
+ begin
40
+ load process_file
41
+ rescue Exception => ex
42
+ log :error, ex
43
+ raise
44
+ ensure
45
+ apply_hook :after
46
+ end
47
+ end
48
+
49
+
50
+
51
+ def after_import(&block)
52
+ register_hook :after_import, &block
53
+ end
54
+
55
+
56
+
57
+ def register_hook(name, &block)
58
+ @hooks[name] ||= []
59
+ @hooks[name] << block
60
+ end
61
+
62
+
63
+
64
+ def apply_hook(name)
65
+ unless @hooks[name].nil?
66
+ @hooks[name].each do |block|
67
+ block.call
68
+ end
69
+ end
70
+ end
71
+
72
+
73
+
74
+ private
75
+
76
+ attr_reader :options
77
+
78
+
79
+
80
+ def validate_file(load_file)
81
+ exit_if_no_file_specified load_file
82
+ exit_if_no_such_file_exists load_file
83
+ load_file
84
+ end
85
+
86
+
87
+
88
+ def exit_if_no_file_specified(file)
89
+ if file.nil? || file.empty?
90
+ $stderr.puts "ERROR: No file specified"
91
+ exit 1
92
+ end
93
+ end
94
+
95
+
96
+
97
+ def exit_if_no_such_file_exists(file)
98
+ unless File.exists? file
99
+ $stderr.puts "ERROR: File '#{file}' does not exist"
100
+ exit 1
101
+ end
102
+ end
103
+
104
+ end
@@ -0,0 +1,36 @@
1
+ require 'fileutils'
2
+
3
+ module Cranium::Archiver
4
+
5
+ def self.archive(*files)
6
+ create_archive_directory
7
+ archive_files files
8
+ end
9
+
10
+
11
+
12
+ def self.remove(*files)
13
+ files.each do |file_name|
14
+ FileUtils.rm File.join(Cranium.configuration.upload_path, file_name)
15
+ end
16
+ end
17
+
18
+
19
+
20
+ private
21
+
22
+ def self.create_archive_directory
23
+ FileUtils.mkpath Cranium.configuration.archive_directory unless Dir.exists? Cranium.configuration.archive_directory
24
+ end
25
+
26
+
27
+
28
+ def self.archive_files(files)
29
+ archive_datetime = Time.now.strftime("%Y-%m-%d_%Hh%Mm%Ss")
30
+ files.each do |file_name|
31
+ FileUtils.mv File.join(Cranium.configuration.upload_path, file_name),
32
+ File.join(Cranium.configuration.archive_directory, "#{archive_datetime}_#{file_name}")
33
+ end
34
+ end
35
+
36
+ end
@@ -0,0 +1,43 @@
1
+ module Cranium::AttributeDSL
2
+
3
+ def define_attribute(name)
4
+ class_eval <<-attribute_method
5
+
6
+ def #{name}(*args)
7
+ return @#{name} if args.count.zero?
8
+
9
+ @#{name} = args.first
10
+ end
11
+
12
+ attribute_method
13
+ end
14
+
15
+
16
+
17
+ def define_array_attribute(name)
18
+ class_eval <<-attribute_method
19
+
20
+ def #{name}(*args)
21
+ return @#{name} || [] if args.count.zero?
22
+
23
+ @#{name} = args
24
+ end
25
+
26
+ attribute_method
27
+ end
28
+
29
+
30
+
31
+ def define_boolean_attribute(name)
32
+ class_eval <<-attribute_method
33
+
34
+ def #{name}(*args)
35
+ return !!@#{name} if args.count.zero?
36
+
37
+ @#{name} = !!args
38
+ end
39
+
40
+ attribute_method
41
+ end
42
+
43
+ end
@@ -0,0 +1,27 @@
1
+ require "slop"
2
+
3
+ class Cranium::CommandLineOptions
4
+
5
+ def initialize(arguments)
6
+ @arguments = Slop.parse(arguments, autocreate: true).to_hash
7
+ end
8
+
9
+
10
+
11
+ def cranium_arguments
12
+ @cranium_arguments ||= Hash[arguments.map { |k, v| [$1.to_sym, v] if k.to_s =~ /\Acranium\-(.*)/ }.compact]
13
+ end
14
+
15
+
16
+
17
+ def load_arguments
18
+ @load_arguments ||= Hash[arguments.map { |k, v| [k, v] unless k.to_s =~ /\Acranium\-(.*)/ }.compact]
19
+ end
20
+
21
+
22
+
23
+ private
24
+
25
+ attr_reader :arguments
26
+
27
+ end
@@ -0,0 +1,33 @@
1
+ class Cranium::Configuration
2
+
3
+ STORAGE_DIRECTORY_NAME = ".cranium"
4
+
5
+ attr_writer :storage_directory
6
+ attr_accessor :archive_directory
7
+ attr_accessor :greenplum_connection_string
8
+ attr_accessor :mysql_connection_string
9
+ attr_accessor :upload_directory
10
+ attr_accessor :gpfdist_home_directory
11
+ attr_accessor :gpfdist_url
12
+ attr_accessor :loggers
13
+
14
+
15
+
16
+ def initialize
17
+ @loggers = []
18
+ end
19
+
20
+
21
+
22
+ def upload_path
23
+ File.join gpfdist_home_directory, upload_directory
24
+ end
25
+
26
+
27
+
28
+ def storage_directory
29
+ return @storage_directory unless @storage_directory.nil?
30
+ File.join upload_path, STORAGE_DIRECTORY_NAME
31
+ end
32
+
33
+ end
@@ -0,0 +1,35 @@
1
+ class Cranium::DataImporter
2
+
3
+ include Cranium::Logging
4
+
5
+
6
+ def import(import_definition)
7
+ number_of_items_imported = 0
8
+ Cranium::Database.connection.transaction do
9
+ number_of_items_imported = importer_for_definition(import_definition).import
10
+ Cranium.application.apply_hook(:after_import)
11
+ end
12
+
13
+ record_metric import_definition.name, number_of_items_imported.to_s
14
+ end
15
+
16
+
17
+ private
18
+
19
+ def importer_for_definition(import_definition)
20
+ if [!import_definition.merge_fields.empty?, !import_definition.delete_insert_on.empty?, import_definition.truncate_insert].count(true) > 1
21
+ raise StandardError, "Import should not combine merge_on, delete_insert_on and truncate_insert settings"
22
+ end
23
+
24
+ if !import_definition.merge_fields.empty?
25
+ Cranium::ImportStrategy::Merge.new(import_definition)
26
+ elsif !import_definition.delete_insert_on.empty?
27
+ Cranium::ImportStrategy::DeleteInsert.new(import_definition)
28
+ elsif import_definition.truncate_insert
29
+ Cranium::ImportStrategy::TruncateInsert.new(import_definition)
30
+ else
31
+ Cranium::ImportStrategy::Delta.new(import_definition)
32
+ end
33
+ end
34
+
35
+ end
@@ -0,0 +1,48 @@
1
+ require 'csv'
2
+ require 'cranium/extensions/file'
3
+
4
+ class Cranium::DataReader
5
+
6
+ def initialize(source)
7
+ @source = source
8
+ @source_field_names = @source.fields.keys
9
+ end
10
+
11
+
12
+
13
+ def read(&block)
14
+ @source.files.each do |input_file|
15
+ read_input_file File.join(Cranium.configuration.upload_path, input_file), block
16
+ end
17
+ end
18
+
19
+
20
+
21
+ private
22
+
23
+ def read_input_file(input_file, read_block)
24
+ Cranium::ProgressOutput.show_progress File.basename(input_file), File.line_count(input_file) do |progress_bar|
25
+ line_number = 0
26
+ CSV.foreach input_file, csv_read_options_for(@source) do |row|
27
+ next if 1 == (line_number += 1)
28
+
29
+ record = Hash[@source_field_names.zip row]
30
+ self.instance_exec record, &read_block
31
+
32
+ progress_bar.inc
33
+ end
34
+ end
35
+ end
36
+
37
+
38
+
39
+ def csv_read_options_for(source_definition)
40
+ {
41
+ encoding: source_definition.encoding,
42
+ col_sep: source_definition.delimiter,
43
+ quote_char: source_definition.quote,
44
+ return_headers: false
45
+ }
46
+ end
47
+
48
+ end
@@ -0,0 +1,126 @@
1
+ require 'csv'
2
+ require 'cranium/extensions/file'
3
+
4
+ class Cranium::DataTransformer
5
+
6
+ def initialize(source, target)
7
+ @source, @target = source, target
8
+ @index = Cranium::Transformation::Index.new
9
+ @target_fields = @target.fields.keys
10
+ @record = Cranium::TransformationRecord.new @source.fields.keys, @target_fields
11
+ end
12
+
13
+
14
+
15
+ def transform(&block)
16
+ raise StandardError, "Source definition '#{@target.name}' cannot overrride the file name because it is a transformation target" if @target.file_name_overriden?
17
+
18
+ CSV.open "#{Cranium.configuration.upload_path}/#{@target.file}", "w:#{@target.encoding}", csv_write_options_for(@target) do |target_file|
19
+ @target_file = target_file
20
+ @source.files.each do |input_file|
21
+ transform_input_file File.join(Cranium.configuration.upload_path, input_file), block
22
+ end
23
+ end
24
+
25
+ @target.resolve_files
26
+ end
27
+
28
+
29
+
30
+ private
31
+
32
+ def transform_input_file(input_file, transformation_block)
33
+ Cranium::ProgressOutput.show_progress File.basename(input_file), File.line_count(input_file) do |progress_bar|
34
+ line_number = 0
35
+ CSV.foreach input_file, csv_read_options_for(@source) do |row|
36
+ next if 1 == (line_number += 1)
37
+
38
+ @record.input_data = row
39
+ self.instance_exec @record, &transformation_block
40
+
41
+ progress_bar.inc
42
+ end
43
+ end
44
+ end
45
+
46
+
47
+
48
+ def csv_write_options_for(source_definition)
49
+ {
50
+ col_sep: source_definition.delimiter,
51
+ quote_char: source_definition.quote,
52
+ write_headers: true,
53
+ headers: source_definition.fields.keys
54
+ }
55
+ end
56
+
57
+
58
+
59
+ def csv_read_options_for(source_definition)
60
+ {
61
+ encoding: source_definition.encoding,
62
+ col_sep: source_definition.delimiter,
63
+ quote_char: source_definition.quote,
64
+ return_headers: false
65
+ }
66
+ end
67
+
68
+
69
+
70
+ def output(record)
71
+ @target_file << prepare_for_output(case record
72
+ when Cranium::TransformationRecord
73
+ record.data
74
+ when Hash
75
+ record
76
+ else
77
+ raise ArgumentError, "Cannot write '#{record.class}' to file as CSV record"
78
+ end)
79
+ end
80
+
81
+
82
+
83
+ def prepare_for_output(hash)
84
+ hash.
85
+ keep_if { |key| @target_fields.include? key }.
86
+ sort_by { |field, _| @target_fields.index(field) }.
87
+ map { |item| item[1] }.
88
+ map { |value| strip(value) }
89
+ end
90
+
91
+
92
+
93
+ def strip(value)
94
+ return value unless value.respond_to? :strip
95
+ value.strip
96
+ end
97
+
98
+
99
+
100
+ def unique_on_fields?(*fields)
101
+ not Cranium::Transformation::DuplicationIndex[*fields].duplicate? @record
102
+ end
103
+
104
+
105
+
106
+ def lookup(field_name, settings)
107
+ @index.lookup field_name, settings
108
+ end
109
+
110
+
111
+
112
+ def insert(field_name, settings)
113
+ @index.insert field_name, settings
114
+ end
115
+
116
+
117
+
118
+ def next_value_in_sequence(name)
119
+ Cranium::Transformation::Sequence.by_name name
120
+ end
121
+
122
+
123
+
124
+ alias_method :sequence, :next_value_in_sequence
125
+
126
+ end
@@ -0,0 +1,36 @@
1
+ require 'sequel'
2
+ require 'sequel/extensions/connection_validator'
3
+
4
+ module Cranium::Database
5
+
6
+ def self.connection
7
+ @connection ||= setup_connection(Cranium.configuration.greenplum_connection_string)
8
+ end
9
+
10
+
11
+
12
+ def self.[](name)
13
+ @connections ||= {}
14
+ @connections[name] ||= setup_connection(@definitions[name].connect_to)
15
+ end
16
+
17
+
18
+
19
+ def self.register_database(name, &block)
20
+ @definitions ||= Cranium::DefinitionRegistry.new Cranium::DSL::DatabaseDefinition
21
+ @definitions.register_definition name, &block
22
+ end
23
+
24
+
25
+
26
+ private
27
+
28
+
29
+ def self.setup_connection(connection_string)
30
+ connection = Sequel.connect connection_string, loggers: Cranium.configuration.loggers
31
+ connection.extension :connection_validator
32
+ connection.pool.connection_validation_timeout = -1
33
+ return connection
34
+ end
35
+
36
+ end
@@ -0,0 +1,21 @@
1
+ class Cranium::DefinitionRegistry
2
+
3
+ def initialize(definition_class)
4
+ @definition_class = definition_class
5
+ @definitions = {}
6
+ end
7
+
8
+
9
+
10
+ def [](name)
11
+ @definitions[name]
12
+ end
13
+
14
+
15
+
16
+ def register_definition(name, &block)
17
+ definition = @definition_class.new name
18
+ definition.instance_eval &block
19
+ @definitions[name] = definition
20
+ end
21
+ end
@@ -0,0 +1,65 @@
1
+ class Cranium::DimensionManager
2
+
3
+ attr_reader :rows
4
+
5
+
6
+
7
+ def self.for(table_name, key_fields)
8
+ @instances ||= {}
9
+ @instances[[table_name, key_fields]] ||= self.new table_name, key_fields
10
+ end
11
+
12
+
13
+
14
+ def initialize(table_name, key_fields)
15
+ @table_name, @key_fields = table_name, key_fields
16
+ @rows = []
17
+
18
+ Cranium.application.after_import { flush }
19
+ end
20
+
21
+
22
+
23
+ def insert(target_key, row)
24
+ raise ArgumentError, "Required attribute '#{target_key}' missing" unless row.has_key? target_key
25
+
26
+ @rows << resolve_sequence_values(row)
27
+ row[target_key]
28
+ end
29
+
30
+
31
+
32
+ def create_cache_for_field(value_field)
33
+ to_multi_key_cache(db.select_map(@key_fields + [value_field]))
34
+ end
35
+
36
+
37
+
38
+ def flush
39
+ db.multi_insert(@rows) unless @rows.empty?
40
+ @rows = []
41
+ end
42
+
43
+
44
+
45
+ private
46
+
47
+ def to_multi_key_cache(table_data)
48
+ Hash[table_data.map { |row| [row[0..-2], row.last] }]
49
+ end
50
+
51
+
52
+
53
+ def resolve_sequence_values(row)
54
+ row.each do |key, value|
55
+ row[key] = value.next_value if value.is_a? Cranium::Transformation::Sequence
56
+ end
57
+ end
58
+
59
+
60
+
61
+ def db
62
+ Cranium::Database.connection[@table_name]
63
+ end
64
+
65
+ end