cranium 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (132) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +21 -0
  3. data/.ruby-version +1 -0
  4. data/Gemfile +4 -0
  5. data/LICENSE.txt +22 -0
  6. data/README.md +29 -0
  7. data/Rakefile +3 -0
  8. data/Vagrantfile +24 -0
  9. data/bin/cranium +9 -0
  10. data/config/cucumber.yml +9 -0
  11. data/cranium.gemspec +26 -0
  12. data/db/setup.sql +8 -0
  13. data/docker-compose.yml +8 -0
  14. data/examples/config.rb +14 -0
  15. data/examples/deduplication.rb +27 -0
  16. data/examples/import_csv_with_field_lookup_inserting_new_dimension_keys.rb +26 -0
  17. data/examples/incremental_extract.rb +17 -0
  18. data/examples/lookup_with_multiple_fields.rb +25 -0
  19. data/features/archive.feature +49 -0
  20. data/features/extract/incremental_extract.feature +56 -0
  21. data/features/extract/simple_extract.feature +85 -0
  22. data/features/import/import_csv_to_database_as_delta.feature +38 -0
  23. data/features/import/import_csv_to_database_with_delete_insert_merging.feature +51 -0
  24. data/features/import/import_csv_to_database_with_truncate_insert.feature +49 -0
  25. data/features/import/import_csv_to_database_with_update_merging.feature +46 -0
  26. data/features/import/import_csv_with_always_inserting_new_dimension_keys.feature +137 -0
  27. data/features/import/import_csv_with_field_lookup_inserting_new_dimension_keys.feature +62 -0
  28. data/features/import/import_csv_with_field_lookup_transformation.feature +125 -0
  29. data/features/import/import_csv_with_transformation.feature +55 -0
  30. data/features/import/import_multiple_csv_files_without_transformations.feature +44 -0
  31. data/features/import/import_with_load_id_from_sequence.feature +53 -0
  32. data/features/import/import_with_lookup_from_multiple_fields.feature +64 -0
  33. data/features/read.feature +56 -0
  34. data/features/remove.feature +44 -0
  35. data/features/restore_database_connection.feature +55 -0
  36. data/features/step_definitions/database_table_steps.rb +40 -0
  37. data/features/step_definitions/definition_steps.rb +3 -0
  38. data/features/step_definitions/execution_steps.rb +23 -0
  39. data/features/step_definitions/file_steps.rb +39 -0
  40. data/features/support/class_extensions.rb +24 -0
  41. data/features/support/env.rb +27 -0
  42. data/features/support/randomize.rb +22 -0
  43. data/features/support/stop_on_first_error.rb +5 -0
  44. data/features/transform/deduplication.feature +37 -0
  45. data/features/transform/empty_transformation.feature +72 -0
  46. data/features/transform/join.feature +180 -0
  47. data/features/transform/join_multiple_files_into_one_output_file.feature +46 -0
  48. data/features/transform/output_rows.feature +70 -0
  49. data/features/transform/projection.feature +34 -0
  50. data/features/transform/raw_ruby_transformation.feature +69 -0
  51. data/features/transform/split_field.feature +39 -0
  52. data/lib/cranium/application.rb +104 -0
  53. data/lib/cranium/archiver.rb +36 -0
  54. data/lib/cranium/attribute_dsl.rb +43 -0
  55. data/lib/cranium/command_line_options.rb +27 -0
  56. data/lib/cranium/configuration.rb +33 -0
  57. data/lib/cranium/data_importer.rb +35 -0
  58. data/lib/cranium/data_reader.rb +48 -0
  59. data/lib/cranium/data_transformer.rb +126 -0
  60. data/lib/cranium/database.rb +36 -0
  61. data/lib/cranium/definition_registry.rb +21 -0
  62. data/lib/cranium/dimension_manager.rb +65 -0
  63. data/lib/cranium/dsl/database_definition.rb +23 -0
  64. data/lib/cranium/dsl/extract_definition.rb +28 -0
  65. data/lib/cranium/dsl/import_definition.rb +50 -0
  66. data/lib/cranium/dsl/source_definition.rb +67 -0
  67. data/lib/cranium/dsl.rb +100 -0
  68. data/lib/cranium/extensions/file.rb +7 -0
  69. data/lib/cranium/extensions/sequel_greenplum.rb +30 -0
  70. data/lib/cranium/external_table.rb +75 -0
  71. data/lib/cranium/extract/data_extractor.rb +11 -0
  72. data/lib/cranium/extract/storage.rb +57 -0
  73. data/lib/cranium/extract/strategy/base.rb +27 -0
  74. data/lib/cranium/extract/strategy/incremental.rb +16 -0
  75. data/lib/cranium/extract/strategy/simple.rb +9 -0
  76. data/lib/cranium/extract/strategy.rb +7 -0
  77. data/lib/cranium/extract.rb +7 -0
  78. data/lib/cranium/import_strategy/base.rb +55 -0
  79. data/lib/cranium/import_strategy/delete_insert.rb +40 -0
  80. data/lib/cranium/import_strategy/delta.rb +8 -0
  81. data/lib/cranium/import_strategy/merge.rb +50 -0
  82. data/lib/cranium/import_strategy/truncate_insert.rb +19 -0
  83. data/lib/cranium/import_strategy.rb +9 -0
  84. data/lib/cranium/logging.rb +15 -0
  85. data/lib/cranium/profiling.rb +13 -0
  86. data/lib/cranium/progress_output.rb +37 -0
  87. data/lib/cranium/sequel/hash.rb +32 -0
  88. data/lib/cranium/sequel.rb +5 -0
  89. data/lib/cranium/source_registry.rb +21 -0
  90. data/lib/cranium/test_framework/cucumber_table.rb +140 -0
  91. data/lib/cranium/test_framework/database_entity.rb +29 -0
  92. data/lib/cranium/test_framework/database_sequence.rb +16 -0
  93. data/lib/cranium/test_framework/database_table.rb +33 -0
  94. data/lib/cranium/test_framework/upload_directory.rb +39 -0
  95. data/lib/cranium/test_framework/world.rb +66 -0
  96. data/lib/cranium/test_framework.rb +10 -0
  97. data/lib/cranium/transformation/duplication_index.rb +42 -0
  98. data/lib/cranium/transformation/index.rb +83 -0
  99. data/lib/cranium/transformation/join.rb +141 -0
  100. data/lib/cranium/transformation/sequence.rb +42 -0
  101. data/lib/cranium/transformation.rb +8 -0
  102. data/lib/cranium/transformation_record.rb +45 -0
  103. data/lib/cranium.rb +57 -0
  104. data/rake/test.rake +31 -0
  105. data/spec/cranium/application_spec.rb +166 -0
  106. data/spec/cranium/archiver_spec.rb +44 -0
  107. data/spec/cranium/command_line_options_spec.rb +32 -0
  108. data/spec/cranium/configuration_spec.rb +31 -0
  109. data/spec/cranium/data_importer_spec.rb +55 -0
  110. data/spec/cranium/data_transformer_spec.rb +16 -0
  111. data/spec/cranium/database_spec.rb +69 -0
  112. data/spec/cranium/definition_registry_spec.rb +45 -0
  113. data/spec/cranium/dimension_manager_spec.rb +63 -0
  114. data/spec/cranium/dsl/database_definition_spec.rb +23 -0
  115. data/spec/cranium/dsl/extract_definition_spec.rb +76 -0
  116. data/spec/cranium/dsl/import_definition_spec.rb +153 -0
  117. data/spec/cranium/dsl/source_definition_spec.rb +84 -0
  118. data/spec/cranium/dsl_spec.rb +119 -0
  119. data/spec/cranium/external_table_spec.rb +71 -0
  120. data/spec/cranium/extract/storage_spec.rb +125 -0
  121. data/spec/cranium/logging_spec.rb +37 -0
  122. data/spec/cranium/sequel/hash_spec.rb +56 -0
  123. data/spec/cranium/source_registry_spec.rb +31 -0
  124. data/spec/cranium/test_framework/cucumber_table_spec.rb +144 -0
  125. data/spec/cranium/transformation/duplication_index_spec.rb +75 -0
  126. data/spec/cranium/transformation/index_spec.rb +178 -0
  127. data/spec/cranium/transformation/join_spec.rb +43 -0
  128. data/spec/cranium/transformation/sequence_spec.rb +83 -0
  129. data/spec/cranium/transformation_record_spec.rb +78 -0
  130. data/spec/cranium_spec.rb +53 -0
  131. data/spec/spec_helper.rb +1 -0
  132. metadata +362 -0
@@ -0,0 +1,69 @@
1
+ Feature: Raw Ruby transformation
2
+
3
+ Scenario: A transform block can use the record as a Hash
4
+ Given a "products.csv" data file containing:
5
+ """
6
+ id,name,category
7
+ JNI-123,Just a product name,Main category > Subcategory > Sub-subcategory
8
+ CDI-234,Another product name,Smart Insight > Cool stuff > Scripts
9
+ """
10
+ And the following definition:
11
+ """
12
+ source :products do
13
+ field :id, String
14
+ field :name, String
15
+ field :category, String
16
+ end
17
+
18
+ source :transformed_products do
19
+ field :item, String
20
+ field :title, String
21
+ field :category, String
22
+ end
23
+
24
+ transform :products => :transformed_products do |record|
25
+ record[:item] = "*#{record[:id]}*"
26
+ record[:title] = record[:name].chars.first
27
+ output record
28
+ end
29
+ """
30
+ When I execute the definition
31
+ Then the process should exit successfully
32
+ And there should be a "transformed_products.csv" data file in the upload directory containing:
33
+ """
34
+ item,title,category
35
+ *JNI-123*,J,Main category > Subcategory > Sub-subcategory
36
+ *CDI-234*,A,Smart Insight > Cool stuff > Scripts
37
+ """
38
+
39
+
40
+ Scenario: Records can be skipped
41
+ Given a "products.csv" data file containing:
42
+ """
43
+ id
44
+ 1
45
+ 2
46
+ 3
47
+ """
48
+ And the following definition:
49
+ """
50
+ source :products do
51
+ field :id, Integer
52
+ end
53
+
54
+ source :transformed_products do
55
+ field :id, Integer
56
+ end
57
+
58
+ transform :products => :transformed_products do |record|
59
+ output record unless "2" == record[:id]
60
+ end
61
+ """
62
+ When I execute the definition
63
+ Then the process should exit successfully
64
+ And there should be a "transformed_products.csv" data file in the upload directory containing:
65
+ """
66
+ id
67
+ 1
68
+ 3
69
+ """
@@ -0,0 +1,39 @@
1
+ Feature: Split field
2
+
3
+ Scenario: A single field can be split into multiple fields
4
+ Given a "products.csv" data file containing:
5
+ """
6
+ id,name,category
7
+ JNI-123,Just a product name,Main category > Subcategory > Sub-subcategory > Ultra-subcategory
8
+ CDI-234,Another product name,Smart Insight > Cool stuff | 3dim > 2dim > 1dim
9
+ """
10
+ And the following definition:
11
+ """
12
+ source :products do
13
+ field :item, String
14
+ field :title, String
15
+ field :category, String
16
+ end
17
+
18
+ source :transformed_products do
19
+ field :item, String
20
+ field :title, String
21
+ field :main_category, String
22
+ field :sub_category, String
23
+ field :department, String
24
+ end
25
+
26
+ transform :products => :transformed_products do |record|
27
+ record.split_field :category, into: [:category], by: "|"
28
+ record.split_field :category, into: [:main_category, :sub_category, :department], by: ">"
29
+ output record
30
+ end
31
+ """
32
+ When I execute the definition
33
+ Then the process should exit successfully
34
+ And there should be a "transformed_products.csv" data file in the upload directory containing:
35
+ """
36
+ item,title,main_category,sub_category,department
37
+ JNI-123,Just a product name,Main category,Subcategory,Sub-subcategory
38
+ CDI-234,Another product name,Smart Insight,Cool stuff,Cool stuff
39
+ """
@@ -0,0 +1,104 @@
1
+ class Cranium::Application
2
+
3
+ include Cranium::Logging
4
+
5
+ attr_reader :sources
6
+
7
+
8
+
9
+ def initialize(arguments)
10
+ @sources = Cranium::SourceRegistry.new
11
+ @hooks = {}
12
+
13
+ @options = Cranium::CommandLineOptions.new arguments
14
+ end
15
+
16
+
17
+
18
+ def load_arguments
19
+ options.load_arguments
20
+ end
21
+
22
+
23
+
24
+ def cranium_arguments
25
+ options.cranium_arguments
26
+ end
27
+
28
+
29
+
30
+ def register_source(name, &block)
31
+ @sources.register_source(name, &block).resolve_files
32
+ end
33
+
34
+
35
+
36
+ def run
37
+ process_file = validate_file options.cranium_arguments[:load]
38
+
39
+ begin
40
+ load process_file
41
+ rescue Exception => ex
42
+ log :error, ex
43
+ raise
44
+ ensure
45
+ apply_hook :after
46
+ end
47
+ end
48
+
49
+
50
+
51
+ def after_import(&block)
52
+ register_hook :after_import, &block
53
+ end
54
+
55
+
56
+
57
+ def register_hook(name, &block)
58
+ @hooks[name] ||= []
59
+ @hooks[name] << block
60
+ end
61
+
62
+
63
+
64
+ def apply_hook(name)
65
+ unless @hooks[name].nil?
66
+ @hooks[name].each do |block|
67
+ block.call
68
+ end
69
+ end
70
+ end
71
+
72
+
73
+
74
+ private
75
+
76
+ attr_reader :options
77
+
78
+
79
+
80
+ def validate_file(load_file)
81
+ exit_if_no_file_specified load_file
82
+ exit_if_no_such_file_exists load_file
83
+ load_file
84
+ end
85
+
86
+
87
+
88
+ def exit_if_no_file_specified(file)
89
+ if file.nil? || file.empty?
90
+ $stderr.puts "ERROR: No file specified"
91
+ exit 1
92
+ end
93
+ end
94
+
95
+
96
+
97
+ def exit_if_no_such_file_exists(file)
98
+ unless File.exists? file
99
+ $stderr.puts "ERROR: File '#{file}' does not exist"
100
+ exit 1
101
+ end
102
+ end
103
+
104
+ end
@@ -0,0 +1,36 @@
1
+ require 'fileutils'
2
+
3
+ module Cranium::Archiver
4
+
5
+ def self.archive(*files)
6
+ create_archive_directory
7
+ archive_files files
8
+ end
9
+
10
+
11
+
12
+ def self.remove(*files)
13
+ files.each do |file_name|
14
+ FileUtils.rm File.join(Cranium.configuration.upload_path, file_name)
15
+ end
16
+ end
17
+
18
+
19
+
20
+ private
21
+
22
+ def self.create_archive_directory
23
+ FileUtils.mkpath Cranium.configuration.archive_directory unless Dir.exists? Cranium.configuration.archive_directory
24
+ end
25
+
26
+
27
+
28
+ def self.archive_files(files)
29
+ archive_datetime = Time.now.strftime("%Y-%m-%d_%Hh%Mm%Ss")
30
+ files.each do |file_name|
31
+ FileUtils.mv File.join(Cranium.configuration.upload_path, file_name),
32
+ File.join(Cranium.configuration.archive_directory, "#{archive_datetime}_#{file_name}")
33
+ end
34
+ end
35
+
36
+ end
@@ -0,0 +1,43 @@
1
+ module Cranium::AttributeDSL
2
+
3
+ def define_attribute(name)
4
+ class_eval <<-attribute_method
5
+
6
+ def #{name}(*args)
7
+ return @#{name} if args.count.zero?
8
+
9
+ @#{name} = args.first
10
+ end
11
+
12
+ attribute_method
13
+ end
14
+
15
+
16
+
17
+ def define_array_attribute(name)
18
+ class_eval <<-attribute_method
19
+
20
+ def #{name}(*args)
21
+ return @#{name} || [] if args.count.zero?
22
+
23
+ @#{name} = args
24
+ end
25
+
26
+ attribute_method
27
+ end
28
+
29
+
30
+
31
+ def define_boolean_attribute(name)
32
+ class_eval <<-attribute_method
33
+
34
+ def #{name}(*args)
35
+ return !!@#{name} if args.count.zero?
36
+
37
+ @#{name} = !!args
38
+ end
39
+
40
+ attribute_method
41
+ end
42
+
43
+ end
@@ -0,0 +1,27 @@
1
+ require "slop"
2
+
3
+ class Cranium::CommandLineOptions
4
+
5
+ def initialize(arguments)
6
+ @arguments = Slop.parse(arguments, autocreate: true).to_hash
7
+ end
8
+
9
+
10
+
11
+ def cranium_arguments
12
+ @cranium_arguments ||= Hash[arguments.map { |k, v| [$1.to_sym, v] if k.to_s =~ /\Acranium\-(.*)/ }.compact]
13
+ end
14
+
15
+
16
+
17
+ def load_arguments
18
+ @load_arguments ||= Hash[arguments.map { |k, v| [k, v] unless k.to_s =~ /\Acranium\-(.*)/ }.compact]
19
+ end
20
+
21
+
22
+
23
+ private
24
+
25
+ attr_reader :arguments
26
+
27
+ end
@@ -0,0 +1,33 @@
1
+ class Cranium::Configuration
2
+
3
+ STORAGE_DIRECTORY_NAME = ".cranium"
4
+
5
+ attr_writer :storage_directory
6
+ attr_accessor :archive_directory
7
+ attr_accessor :greenplum_connection_string
8
+ attr_accessor :mysql_connection_string
9
+ attr_accessor :upload_directory
10
+ attr_accessor :gpfdist_home_directory
11
+ attr_accessor :gpfdist_url
12
+ attr_accessor :loggers
13
+
14
+
15
+
16
+ def initialize
17
+ @loggers = []
18
+ end
19
+
20
+
21
+
22
+ def upload_path
23
+ File.join gpfdist_home_directory, upload_directory
24
+ end
25
+
26
+
27
+
28
+ def storage_directory
29
+ return @storage_directory unless @storage_directory.nil?
30
+ File.join upload_path, STORAGE_DIRECTORY_NAME
31
+ end
32
+
33
+ end
@@ -0,0 +1,35 @@
1
+ class Cranium::DataImporter
2
+
3
+ include Cranium::Logging
4
+
5
+
6
+ def import(import_definition)
7
+ number_of_items_imported = 0
8
+ Cranium::Database.connection.transaction do
9
+ number_of_items_imported = importer_for_definition(import_definition).import
10
+ Cranium.application.apply_hook(:after_import)
11
+ end
12
+
13
+ record_metric import_definition.name, number_of_items_imported.to_s
14
+ end
15
+
16
+
17
+ private
18
+
19
+ def importer_for_definition(import_definition)
20
+ if [!import_definition.merge_fields.empty?, !import_definition.delete_insert_on.empty?, import_definition.truncate_insert].count(true) > 1
21
+ raise StandardError, "Import should not combine merge_on, delete_insert_on and truncate_insert settings"
22
+ end
23
+
24
+ if !import_definition.merge_fields.empty?
25
+ Cranium::ImportStrategy::Merge.new(import_definition)
26
+ elsif !import_definition.delete_insert_on.empty?
27
+ Cranium::ImportStrategy::DeleteInsert.new(import_definition)
28
+ elsif import_definition.truncate_insert
29
+ Cranium::ImportStrategy::TruncateInsert.new(import_definition)
30
+ else
31
+ Cranium::ImportStrategy::Delta.new(import_definition)
32
+ end
33
+ end
34
+
35
+ end
@@ -0,0 +1,48 @@
1
+ require 'csv'
2
+ require 'cranium/extensions/file'
3
+
4
+ class Cranium::DataReader
5
+
6
+ def initialize(source)
7
+ @source = source
8
+ @source_field_names = @source.fields.keys
9
+ end
10
+
11
+
12
+
13
+ def read(&block)
14
+ @source.files.each do |input_file|
15
+ read_input_file File.join(Cranium.configuration.upload_path, input_file), block
16
+ end
17
+ end
18
+
19
+
20
+
21
+ private
22
+
23
+ def read_input_file(input_file, read_block)
24
+ Cranium::ProgressOutput.show_progress File.basename(input_file), File.line_count(input_file) do |progress_bar|
25
+ line_number = 0
26
+ CSV.foreach input_file, csv_read_options_for(@source) do |row|
27
+ next if 1 == (line_number += 1)
28
+
29
+ record = Hash[@source_field_names.zip row]
30
+ self.instance_exec record, &read_block
31
+
32
+ progress_bar.inc
33
+ end
34
+ end
35
+ end
36
+
37
+
38
+
39
+ def csv_read_options_for(source_definition)
40
+ {
41
+ encoding: source_definition.encoding,
42
+ col_sep: source_definition.delimiter,
43
+ quote_char: source_definition.quote,
44
+ return_headers: false
45
+ }
46
+ end
47
+
48
+ end
@@ -0,0 +1,126 @@
1
+ require 'csv'
2
+ require 'cranium/extensions/file'
3
+
4
+ class Cranium::DataTransformer
5
+
6
+ def initialize(source, target)
7
+ @source, @target = source, target
8
+ @index = Cranium::Transformation::Index.new
9
+ @target_fields = @target.fields.keys
10
+ @record = Cranium::TransformationRecord.new @source.fields.keys, @target_fields
11
+ end
12
+
13
+
14
+
15
+ def transform(&block)
16
+ raise StandardError, "Source definition '#{@target.name}' cannot overrride the file name because it is a transformation target" if @target.file_name_overriden?
17
+
18
+ CSV.open "#{Cranium.configuration.upload_path}/#{@target.file}", "w:#{@target.encoding}", csv_write_options_for(@target) do |target_file|
19
+ @target_file = target_file
20
+ @source.files.each do |input_file|
21
+ transform_input_file File.join(Cranium.configuration.upload_path, input_file), block
22
+ end
23
+ end
24
+
25
+ @target.resolve_files
26
+ end
27
+
28
+
29
+
30
+ private
31
+
32
+ def transform_input_file(input_file, transformation_block)
33
+ Cranium::ProgressOutput.show_progress File.basename(input_file), File.line_count(input_file) do |progress_bar|
34
+ line_number = 0
35
+ CSV.foreach input_file, csv_read_options_for(@source) do |row|
36
+ next if 1 == (line_number += 1)
37
+
38
+ @record.input_data = row
39
+ self.instance_exec @record, &transformation_block
40
+
41
+ progress_bar.inc
42
+ end
43
+ end
44
+ end
45
+
46
+
47
+
48
+ def csv_write_options_for(source_definition)
49
+ {
50
+ col_sep: source_definition.delimiter,
51
+ quote_char: source_definition.quote,
52
+ write_headers: true,
53
+ headers: source_definition.fields.keys
54
+ }
55
+ end
56
+
57
+
58
+
59
+ def csv_read_options_for(source_definition)
60
+ {
61
+ encoding: source_definition.encoding,
62
+ col_sep: source_definition.delimiter,
63
+ quote_char: source_definition.quote,
64
+ return_headers: false
65
+ }
66
+ end
67
+
68
+
69
+
70
+ def output(record)
71
+ @target_file << prepare_for_output(case record
72
+ when Cranium::TransformationRecord
73
+ record.data
74
+ when Hash
75
+ record
76
+ else
77
+ raise ArgumentError, "Cannot write '#{record.class}' to file as CSV record"
78
+ end)
79
+ end
80
+
81
+
82
+
83
+ def prepare_for_output(hash)
84
+ hash.
85
+ keep_if { |key| @target_fields.include? key }.
86
+ sort_by { |field, _| @target_fields.index(field) }.
87
+ map { |item| item[1] }.
88
+ map { |value| strip(value) }
89
+ end
90
+
91
+
92
+
93
+ def strip(value)
94
+ return value unless value.respond_to? :strip
95
+ value.strip
96
+ end
97
+
98
+
99
+
100
+ def unique_on_fields?(*fields)
101
+ not Cranium::Transformation::DuplicationIndex[*fields].duplicate? @record
102
+ end
103
+
104
+
105
+
106
+ def lookup(field_name, settings)
107
+ @index.lookup field_name, settings
108
+ end
109
+
110
+
111
+
112
+ def insert(field_name, settings)
113
+ @index.insert field_name, settings
114
+ end
115
+
116
+
117
+
118
+ def next_value_in_sequence(name)
119
+ Cranium::Transformation::Sequence.by_name name
120
+ end
121
+
122
+
123
+
124
+ alias_method :sequence, :next_value_in_sequence
125
+
126
+ end
@@ -0,0 +1,36 @@
1
+ require 'sequel'
2
+ require 'sequel/extensions/connection_validator'
3
+
4
+ module Cranium::Database
5
+
6
+ def self.connection
7
+ @connection ||= setup_connection(Cranium.configuration.greenplum_connection_string)
8
+ end
9
+
10
+
11
+
12
+ def self.[](name)
13
+ @connections ||= {}
14
+ @connections[name] ||= setup_connection(@definitions[name].connect_to)
15
+ end
16
+
17
+
18
+
19
+ def self.register_database(name, &block)
20
+ @definitions ||= Cranium::DefinitionRegistry.new Cranium::DSL::DatabaseDefinition
21
+ @definitions.register_definition name, &block
22
+ end
23
+
24
+
25
+
26
+ private
27
+
28
+
29
+ def self.setup_connection(connection_string)
30
+ connection = Sequel.connect connection_string, loggers: Cranium.configuration.loggers
31
+ connection.extension :connection_validator
32
+ connection.pool.connection_validation_timeout = -1
33
+ return connection
34
+ end
35
+
36
+ end
@@ -0,0 +1,21 @@
1
+ class Cranium::DefinitionRegistry
2
+
3
+ def initialize(definition_class)
4
+ @definition_class = definition_class
5
+ @definitions = {}
6
+ end
7
+
8
+
9
+
10
+ def [](name)
11
+ @definitions[name]
12
+ end
13
+
14
+
15
+
16
+ def register_definition(name, &block)
17
+ definition = @definition_class.new name
18
+ definition.instance_eval &block
19
+ @definitions[name] = definition
20
+ end
21
+ end
@@ -0,0 +1,65 @@
1
+ class Cranium::DimensionManager
2
+
3
+ attr_reader :rows
4
+
5
+
6
+
7
+ def self.for(table_name, key_fields)
8
+ @instances ||= {}
9
+ @instances[[table_name, key_fields]] ||= self.new table_name, key_fields
10
+ end
11
+
12
+
13
+
14
+ def initialize(table_name, key_fields)
15
+ @table_name, @key_fields = table_name, key_fields
16
+ @rows = []
17
+
18
+ Cranium.application.after_import { flush }
19
+ end
20
+
21
+
22
+
23
+ def insert(target_key, row)
24
+ raise ArgumentError, "Required attribute '#{target_key}' missing" unless row.has_key? target_key
25
+
26
+ @rows << resolve_sequence_values(row)
27
+ row[target_key]
28
+ end
29
+
30
+
31
+
32
+ def create_cache_for_field(value_field)
33
+ to_multi_key_cache(db.select_map(@key_fields + [value_field]))
34
+ end
35
+
36
+
37
+
38
+ def flush
39
+ db.multi_insert(@rows) unless @rows.empty?
40
+ @rows = []
41
+ end
42
+
43
+
44
+
45
+ private
46
+
47
+ def to_multi_key_cache(table_data)
48
+ Hash[table_data.map { |row| [row[0..-2], row.last] }]
49
+ end
50
+
51
+
52
+
53
+ def resolve_sequence_values(row)
54
+ row.each do |key, value|
55
+ row[key] = value.next_value if value.is_a? Cranium::Transformation::Sequence
56
+ end
57
+ end
58
+
59
+
60
+
61
+ def db
62
+ Cranium::Database.connection[@table_name]
63
+ end
64
+
65
+ end