cranium 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +21 -0
- data/.ruby-version +1 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +29 -0
- data/Rakefile +3 -0
- data/Vagrantfile +24 -0
- data/bin/cranium +9 -0
- data/config/cucumber.yml +9 -0
- data/cranium.gemspec +26 -0
- data/db/setup.sql +8 -0
- data/docker-compose.yml +8 -0
- data/examples/config.rb +14 -0
- data/examples/deduplication.rb +27 -0
- data/examples/import_csv_with_field_lookup_inserting_new_dimension_keys.rb +26 -0
- data/examples/incremental_extract.rb +17 -0
- data/examples/lookup_with_multiple_fields.rb +25 -0
- data/features/archive.feature +49 -0
- data/features/extract/incremental_extract.feature +56 -0
- data/features/extract/simple_extract.feature +85 -0
- data/features/import/import_csv_to_database_as_delta.feature +38 -0
- data/features/import/import_csv_to_database_with_delete_insert_merging.feature +51 -0
- data/features/import/import_csv_to_database_with_truncate_insert.feature +49 -0
- data/features/import/import_csv_to_database_with_update_merging.feature +46 -0
- data/features/import/import_csv_with_always_inserting_new_dimension_keys.feature +137 -0
- data/features/import/import_csv_with_field_lookup_inserting_new_dimension_keys.feature +62 -0
- data/features/import/import_csv_with_field_lookup_transformation.feature +125 -0
- data/features/import/import_csv_with_transformation.feature +55 -0
- data/features/import/import_multiple_csv_files_without_transformations.feature +44 -0
- data/features/import/import_with_load_id_from_sequence.feature +53 -0
- data/features/import/import_with_lookup_from_multiple_fields.feature +64 -0
- data/features/read.feature +56 -0
- data/features/remove.feature +44 -0
- data/features/restore_database_connection.feature +55 -0
- data/features/step_definitions/database_table_steps.rb +40 -0
- data/features/step_definitions/definition_steps.rb +3 -0
- data/features/step_definitions/execution_steps.rb +23 -0
- data/features/step_definitions/file_steps.rb +39 -0
- data/features/support/class_extensions.rb +24 -0
- data/features/support/env.rb +27 -0
- data/features/support/randomize.rb +22 -0
- data/features/support/stop_on_first_error.rb +5 -0
- data/features/transform/deduplication.feature +37 -0
- data/features/transform/empty_transformation.feature +72 -0
- data/features/transform/join.feature +180 -0
- data/features/transform/join_multiple_files_into_one_output_file.feature +46 -0
- data/features/transform/output_rows.feature +70 -0
- data/features/transform/projection.feature +34 -0
- data/features/transform/raw_ruby_transformation.feature +69 -0
- data/features/transform/split_field.feature +39 -0
- data/lib/cranium/application.rb +104 -0
- data/lib/cranium/archiver.rb +36 -0
- data/lib/cranium/attribute_dsl.rb +43 -0
- data/lib/cranium/command_line_options.rb +27 -0
- data/lib/cranium/configuration.rb +33 -0
- data/lib/cranium/data_importer.rb +35 -0
- data/lib/cranium/data_reader.rb +48 -0
- data/lib/cranium/data_transformer.rb +126 -0
- data/lib/cranium/database.rb +36 -0
- data/lib/cranium/definition_registry.rb +21 -0
- data/lib/cranium/dimension_manager.rb +65 -0
- data/lib/cranium/dsl/database_definition.rb +23 -0
- data/lib/cranium/dsl/extract_definition.rb +28 -0
- data/lib/cranium/dsl/import_definition.rb +50 -0
- data/lib/cranium/dsl/source_definition.rb +67 -0
- data/lib/cranium/dsl.rb +100 -0
- data/lib/cranium/extensions/file.rb +7 -0
- data/lib/cranium/extensions/sequel_greenplum.rb +30 -0
- data/lib/cranium/external_table.rb +75 -0
- data/lib/cranium/extract/data_extractor.rb +11 -0
- data/lib/cranium/extract/storage.rb +57 -0
- data/lib/cranium/extract/strategy/base.rb +27 -0
- data/lib/cranium/extract/strategy/incremental.rb +16 -0
- data/lib/cranium/extract/strategy/simple.rb +9 -0
- data/lib/cranium/extract/strategy.rb +7 -0
- data/lib/cranium/extract.rb +7 -0
- data/lib/cranium/import_strategy/base.rb +55 -0
- data/lib/cranium/import_strategy/delete_insert.rb +40 -0
- data/lib/cranium/import_strategy/delta.rb +8 -0
- data/lib/cranium/import_strategy/merge.rb +50 -0
- data/lib/cranium/import_strategy/truncate_insert.rb +19 -0
- data/lib/cranium/import_strategy.rb +9 -0
- data/lib/cranium/logging.rb +15 -0
- data/lib/cranium/profiling.rb +13 -0
- data/lib/cranium/progress_output.rb +37 -0
- data/lib/cranium/sequel/hash.rb +32 -0
- data/lib/cranium/sequel.rb +5 -0
- data/lib/cranium/source_registry.rb +21 -0
- data/lib/cranium/test_framework/cucumber_table.rb +140 -0
- data/lib/cranium/test_framework/database_entity.rb +29 -0
- data/lib/cranium/test_framework/database_sequence.rb +16 -0
- data/lib/cranium/test_framework/database_table.rb +33 -0
- data/lib/cranium/test_framework/upload_directory.rb +39 -0
- data/lib/cranium/test_framework/world.rb +66 -0
- data/lib/cranium/test_framework.rb +10 -0
- data/lib/cranium/transformation/duplication_index.rb +42 -0
- data/lib/cranium/transformation/index.rb +83 -0
- data/lib/cranium/transformation/join.rb +141 -0
- data/lib/cranium/transformation/sequence.rb +42 -0
- data/lib/cranium/transformation.rb +8 -0
- data/lib/cranium/transformation_record.rb +45 -0
- data/lib/cranium.rb +57 -0
- data/rake/test.rake +31 -0
- data/spec/cranium/application_spec.rb +166 -0
- data/spec/cranium/archiver_spec.rb +44 -0
- data/spec/cranium/command_line_options_spec.rb +32 -0
- data/spec/cranium/configuration_spec.rb +31 -0
- data/spec/cranium/data_importer_spec.rb +55 -0
- data/spec/cranium/data_transformer_spec.rb +16 -0
- data/spec/cranium/database_spec.rb +69 -0
- data/spec/cranium/definition_registry_spec.rb +45 -0
- data/spec/cranium/dimension_manager_spec.rb +63 -0
- data/spec/cranium/dsl/database_definition_spec.rb +23 -0
- data/spec/cranium/dsl/extract_definition_spec.rb +76 -0
- data/spec/cranium/dsl/import_definition_spec.rb +153 -0
- data/spec/cranium/dsl/source_definition_spec.rb +84 -0
- data/spec/cranium/dsl_spec.rb +119 -0
- data/spec/cranium/external_table_spec.rb +71 -0
- data/spec/cranium/extract/storage_spec.rb +125 -0
- data/spec/cranium/logging_spec.rb +37 -0
- data/spec/cranium/sequel/hash_spec.rb +56 -0
- data/spec/cranium/source_registry_spec.rb +31 -0
- data/spec/cranium/test_framework/cucumber_table_spec.rb +144 -0
- data/spec/cranium/transformation/duplication_index_spec.rb +75 -0
- data/spec/cranium/transformation/index_spec.rb +178 -0
- data/spec/cranium/transformation/join_spec.rb +43 -0
- data/spec/cranium/transformation/sequence_spec.rb +83 -0
- data/spec/cranium/transformation_record_spec.rb +78 -0
- data/spec/cranium_spec.rb +53 -0
- data/spec/spec_helper.rb +1 -0
- metadata +362 -0
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
class Cranium::ImportStrategy::TruncateInsert < Cranium::ImportStrategy::Base
|
|
2
|
+
|
|
3
|
+
def import_from(source_table)
|
|
4
|
+
@source_table = source_table
|
|
5
|
+
|
|
6
|
+
database[target_table].truncate
|
|
7
|
+
import_new_records
|
|
8
|
+
database[@source_table].count
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
private
|
|
14
|
+
|
|
15
|
+
def import_new_records
|
|
16
|
+
database.run database[target_table].insert_sql(target_fields, database[@source_table].select(*source_fields))
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
end
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
module Cranium::ImportStrategy
|
|
2
|
+
|
|
3
|
+
autoload :Base, 'cranium/import_strategy/base'
|
|
4
|
+
autoload :DeleteInsert, 'cranium/import_strategy/delete_insert'
|
|
5
|
+
autoload :TruncateInsert, 'cranium/import_strategy/truncate_insert'
|
|
6
|
+
autoload :Delta, 'cranium/import_strategy/delta'
|
|
7
|
+
autoload :Merge, 'cranium/import_strategy/merge'
|
|
8
|
+
|
|
9
|
+
end
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
BEGIN {
|
|
2
|
+
require 'ruby-prof'
|
|
3
|
+
RubyProf.start
|
|
4
|
+
}
|
|
5
|
+
|
|
6
|
+
END {
|
|
7
|
+
profile = RubyProf.stop
|
|
8
|
+
|
|
9
|
+
printer = RubyProf::CallStackPrinter.new profile
|
|
10
|
+
profile_path = "/tmp/" + File.basename($0).gsub(".", "_") + "_profile.html"
|
|
11
|
+
printer.print File.open(profile_path, "w"), min_percent: 1
|
|
12
|
+
puts "Profiling information saved to: " + profile_path
|
|
13
|
+
}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
require 'progressbar'
|
|
2
|
+
|
|
3
|
+
class Cranium::ProgressOutput
|
|
4
|
+
|
|
5
|
+
def self.show_progress(title, total)
|
|
6
|
+
progress_bar = create_progress_bar title, total
|
|
7
|
+
yield progress_bar
|
|
8
|
+
progress_bar.finish
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
private
|
|
14
|
+
|
|
15
|
+
def self.create_progress_bar(title, total)
|
|
16
|
+
if STDOUT.tty?
|
|
17
|
+
ProgressBar.new(title, total, STDOUT)
|
|
18
|
+
else
|
|
19
|
+
NullProgressBar.new
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class NullProgressBar
|
|
26
|
+
|
|
27
|
+
def inc
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def finish
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
end
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
require 'sequel'
|
|
2
|
+
|
|
3
|
+
class Cranium::Sequel::Hash < Hash
|
|
4
|
+
|
|
5
|
+
def qualify(options)
|
|
6
|
+
invalid_options = options.keys - [:keys_with, :values_with]
|
|
7
|
+
raise ArgumentError, "Unsupported option for qualify: #{invalid_options.first}" unless invalid_options.empty?
|
|
8
|
+
Hash[qualify_fields(options[:keys_with], keys).zip qualify_fields(options[:values_with], values)]
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def qualified_keys(qualifier)
|
|
14
|
+
qualify_fields qualifier, keys
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def qualified_values(qualifier)
|
|
20
|
+
qualify_fields qualifier, values
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
private
|
|
26
|
+
|
|
27
|
+
def qualify_fields(qualifier, fields)
|
|
28
|
+
return fields if qualifier.nil?
|
|
29
|
+
fields.map { |field| Sequel.qualify qualifier, field }
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
end
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
class Cranium::SourceRegistry
|
|
2
|
+
|
|
3
|
+
def initialize
|
|
4
|
+
@sources = {}
|
|
5
|
+
end
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def [](name)
|
|
10
|
+
@sources[name] or raise "Undefined source '#{name}'"
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def register_source(name, &block)
|
|
16
|
+
source = Cranium::DSL::SourceDefinition.new name
|
|
17
|
+
source.instance_eval &block
|
|
18
|
+
@sources[name] = source
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
end
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
class Cranium::TestFramework::CucumberTable
|
|
2
|
+
|
|
3
|
+
def self.from_ast_table(ast_table)
|
|
4
|
+
column_types, hashes = process_ast_table ast_table
|
|
5
|
+
new remove_comment_columns(hashes), column_types
|
|
6
|
+
end
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def initialize(array_of_hashes, column_types = {})
|
|
11
|
+
@pattern_replacements, @data, @column_types = {}, array_of_hashes, column_types
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def with_patterns(pattern_replacements)
|
|
17
|
+
self.tap { @pattern_replacements = pattern_replacements }
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def to_step_definition_arg
|
|
23
|
+
dup
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def accept(_)
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def fields
|
|
34
|
+
@data.first.keys
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def data
|
|
40
|
+
column_count = @column_types.count
|
|
41
|
+
|
|
42
|
+
evaluate_cells.tap do |array_of_hashes|
|
|
43
|
+
array_of_hashes.define_singleton_method(:columns) do
|
|
44
|
+
|
|
45
|
+
result = self.reduce(Hash.new { |hash, key| hash[key] = [] }) do |result, current_hash|
|
|
46
|
+
current_hash.each { |key, value| result[key] << value }
|
|
47
|
+
result
|
|
48
|
+
end.values
|
|
49
|
+
|
|
50
|
+
result == [] ? Array.new(column_count) { [] } : result
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def data_array
|
|
58
|
+
data.map { |hash| hash.values.first }
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
private
|
|
64
|
+
|
|
65
|
+
def evaluate_cells
|
|
66
|
+
@data.map do |row|
|
|
67
|
+
Hash.new.tap do |evaluated_row|
|
|
68
|
+
|
|
69
|
+
row.each do |key, value|
|
|
70
|
+
evaluated_value = evaluate_field(key, value)
|
|
71
|
+
evaluated_row[key] = evaluated_value
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def evaluate_field(key, value)
|
|
81
|
+
if @pattern_replacements.keys.include? value
|
|
82
|
+
(@pattern_replacements[value].is_a? Proc) ? @pattern_replacements[value].() : @pattern_replacements[value]
|
|
83
|
+
else
|
|
84
|
+
case @column_types[key]
|
|
85
|
+
when :integer
|
|
86
|
+
value.to_i
|
|
87
|
+
when :numeric
|
|
88
|
+
BigDecimal.new value
|
|
89
|
+
else
|
|
90
|
+
value
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def self.process_ast_table(ast_table)
|
|
98
|
+
column_types = {}
|
|
99
|
+
ast_table = ast_table.map_headers do |header|
|
|
100
|
+
header.match /^(?<name>.*?)(\s+\((?<type>\w{1,2})\))?$/ do |match|
|
|
101
|
+
match[:name].to_sym.tap do |field_name|
|
|
102
|
+
next if comment_field? field_name
|
|
103
|
+
column_types[field_name] = column_type match[:type]
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
return column_types, ast_table.hashes
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def self.comment_field?(field_name)
|
|
114
|
+
field_name.to_s.start_with? "#"
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def self.column_type(type_specifier)
|
|
120
|
+
case type_specifier
|
|
121
|
+
when "i"
|
|
122
|
+
:integer
|
|
123
|
+
when "n"
|
|
124
|
+
:numeric
|
|
125
|
+
when "s", nil
|
|
126
|
+
:string
|
|
127
|
+
else
|
|
128
|
+
raise StandardError, "Invalid type specified: #{type_specifier}"
|
|
129
|
+
end
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def self.remove_comment_columns(hashes)
|
|
135
|
+
hashes.map do |hash|
|
|
136
|
+
hash.delete_if { |key| key.to_s.start_with? "#" }
|
|
137
|
+
end
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
end
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
class Cranium::TestFramework::DatabaseEntity
|
|
2
|
+
|
|
3
|
+
attr_reader :entity_name
|
|
4
|
+
attr_reader :connection
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def initialize(entity, db_connection)
|
|
9
|
+
@entity_name, @connection = entity, db_connection
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class << self
|
|
15
|
+
|
|
16
|
+
def entities_created
|
|
17
|
+
@entities_created ||= []
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def cleanup
|
|
23
|
+
entities_created.each { |entity| entity.destroy }
|
|
24
|
+
@entities_created = []
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
end
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
class Cranium::TestFramework::DatabaseSequence < Cranium::TestFramework::DatabaseEntity
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def create
|
|
5
|
+
connection.run "CREATE sequence #{entity_name}"
|
|
6
|
+
self.class.entities_created << self
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def destroy
|
|
12
|
+
connection.run "DROP SEQUENCE #{entity_name}"
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
end
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
class Cranium::TestFramework::DatabaseTable < Cranium::TestFramework::DatabaseEntity
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def create(fields)
|
|
5
|
+
connection.run "CREATE TABLE #{entity_name} (#{fields.map { |field| "#{field[:field_name]} #{field[:field_type]}" }.join ", " })"
|
|
6
|
+
self.class.entities_created << self
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def destroy
|
|
12
|
+
connection.run "DROP TABLE #{entity_name}"
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def content(fields = ["*".to_sym])
|
|
18
|
+
connection[entity_name].select(*fields).all
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def insert(data)
|
|
24
|
+
connection[entity_name].multi_insert data
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def clear
|
|
30
|
+
connection[entity_name].truncate
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
end
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
require 'fileutils'
|
|
2
|
+
|
|
3
|
+
class Cranium::TestFramework::UploadDirectory
|
|
4
|
+
|
|
5
|
+
def initialize(working_directory)
|
|
6
|
+
@working_directory = working_directory
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def file_exists?(file_name)
|
|
12
|
+
File.exists? File.join(@working_directory, file_name)
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def read_file(file_name)
|
|
18
|
+
File.read File.join(@working_directory, file_name)
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def save_file(file_name, content)
|
|
24
|
+
File.open(File.join(@working_directory, file_name), "w:UTF-8") { |file| file.write content }
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def delete_file(file_name)
|
|
30
|
+
File.unlink File.join(@working_directory, file_name)
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def remove_directory(path)
|
|
36
|
+
FileUtils.rm_rf path
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
end
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
require 'open3'
|
|
2
|
+
|
|
3
|
+
class Cranium::TestFramework::World
|
|
4
|
+
|
|
5
|
+
DEFINITION_FILE = "definition.rb"
|
|
6
|
+
|
|
7
|
+
attr_reader :output, :error_output, :result_code
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def initialize(working_directory, greenplum_connection)
|
|
12
|
+
@greenplum_connection = greenplum_connection
|
|
13
|
+
@directory = working_directory
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def upload_directory
|
|
19
|
+
@upload_directory ||= Cranium::TestFramework::UploadDirectory.new @directory
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def save_definition(definition)
|
|
25
|
+
config = <<-config_string
|
|
26
|
+
require 'logger'
|
|
27
|
+
|
|
28
|
+
Cranium.configure do |config|
|
|
29
|
+
config.greenplum_connection_string = "#{Cranium.configuration.greenplum_connection_string}"
|
|
30
|
+
config.gpfdist_url = "#{Cranium.configuration.gpfdist_url}"
|
|
31
|
+
config.gpfdist_home_directory = "#{Cranium.configuration.gpfdist_home_directory}"
|
|
32
|
+
config.upload_directory = "#{Cranium.configuration.upload_directory}"
|
|
33
|
+
config.loggers << Logger.new("log/application.log")
|
|
34
|
+
end
|
|
35
|
+
config_string
|
|
36
|
+
|
|
37
|
+
upload_directory.save_file DEFINITION_FILE, config + definition
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def execute_definition
|
|
43
|
+
@output, @error_output, status = Open3.capture3("bundle exec bin/cranium --cranium-load #{@directory}/#{DEFINITION_FILE}")
|
|
44
|
+
@result_code = status.exitstatus
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def script_output
|
|
50
|
+
"Output: #{@output}\n"\
|
|
51
|
+
"Error: #{@error_output}\n"\
|
|
52
|
+
"Exit status: #{@result_code}\n"
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def database_table(table_name)
|
|
58
|
+
Cranium::TestFramework::DatabaseTable.new table_name.to_sym, @greenplum_connection
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def database_sequence(sequence_name)
|
|
63
|
+
Cranium::TestFramework::DatabaseSequence.new sequence_name, @greenplum_connection
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
end
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
module Cranium::TestFramework
|
|
2
|
+
|
|
3
|
+
autoload :CucumberTable, 'cranium/test_framework/cucumber_table'
|
|
4
|
+
autoload :DatabaseEntity, 'cranium/test_framework/database_entity'
|
|
5
|
+
autoload :DatabaseSequence, 'cranium/test_framework/database_sequence'
|
|
6
|
+
autoload :DatabaseTable, 'cranium/test_framework/database_table'
|
|
7
|
+
autoload :UploadDirectory, 'cranium/test_framework/upload_directory'
|
|
8
|
+
autoload :World, 'cranium/test_framework/world'
|
|
9
|
+
|
|
10
|
+
end
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
require 'set'
|
|
2
|
+
|
|
3
|
+
class Cranium::Transformation::DuplicationIndex
|
|
4
|
+
|
|
5
|
+
def self.[](*fields)
|
|
6
|
+
raise ArgumentError, "Cannot build duplication index for empty fieldset" if fields.empty?
|
|
7
|
+
@instances ||= {}
|
|
8
|
+
@instances[fields] ||= new(*fields)
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def initialize(*fields)
|
|
14
|
+
@fields = fields
|
|
15
|
+
@fingerprints = Set.new
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def duplicate?(record)
|
|
21
|
+
fingerprint = take_fingerprint(record)
|
|
22
|
+
|
|
23
|
+
if @fingerprints.include? fingerprint
|
|
24
|
+
true
|
|
25
|
+
else
|
|
26
|
+
@fingerprints.add fingerprint
|
|
27
|
+
false
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
private
|
|
34
|
+
|
|
35
|
+
def take_fingerprint(record)
|
|
36
|
+
@fields.map do |field_name|
|
|
37
|
+
raise StandardError, "Missing deduplication key from record: #{field_name}" unless record.has_key? field_name
|
|
38
|
+
record[field_name]
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
end
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
class Cranium::Transformation::Index
|
|
2
|
+
|
|
3
|
+
def initialize
|
|
4
|
+
@indexes = {}
|
|
5
|
+
end
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def lookup(field_name, options)
|
|
10
|
+
validate options
|
|
11
|
+
|
|
12
|
+
cache = cache_for(options[:from_table], key_fields(options), field_name)
|
|
13
|
+
|
|
14
|
+
if cache.has_key? keys(options)
|
|
15
|
+
cache[keys(options)]
|
|
16
|
+
elsif options.has_key? :if_not_found_then
|
|
17
|
+
case options[:if_not_found_then]
|
|
18
|
+
when Proc
|
|
19
|
+
options[:if_not_found_then].call
|
|
20
|
+
else
|
|
21
|
+
options[:if_not_found_then]
|
|
22
|
+
end
|
|
23
|
+
elsif options.has_key? :if_not_found_then_insert
|
|
24
|
+
cache[keys(options)] = Cranium::DimensionManager.for(options[:from_table], key_fields(options)).insert(field_name, default_value_record(options))
|
|
25
|
+
else
|
|
26
|
+
:not_found
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def insert(field_name, options)
|
|
33
|
+
Cranium::DimensionManager.for(options[:table], [field_name]).insert(field_name, options[:record])
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def validate(options)
|
|
39
|
+
raise ArgumentError, "Cannot specify both :if_not_found_then and :if_not_found_then_insert options" if options.has_key? :if_not_found_then_insert and options.has_key? :if_not_found_then
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
private
|
|
45
|
+
|
|
46
|
+
def default_value_record(options)
|
|
47
|
+
if options.has_key? :match
|
|
48
|
+
key_values = options[:match]
|
|
49
|
+
else
|
|
50
|
+
key_values = {options[:match_column] => options[:to_value]}
|
|
51
|
+
end
|
|
52
|
+
options[:if_not_found_then_insert].merge(key_values)
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def cache_for(table_name, key_fields, value_field)
|
|
58
|
+
@indexes[[table_name, key_fields, value_field]] ||= Cranium::DimensionManager.for(table_name, key_fields).create_cache_for_field(value_field)
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def key_fields(options)
|
|
64
|
+
if options.has_key? :match
|
|
65
|
+
key_fields = options[:match].keys
|
|
66
|
+
else
|
|
67
|
+
key_fields = [options[:match_column]]
|
|
68
|
+
end
|
|
69
|
+
key_fields
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def keys(options)
|
|
75
|
+
if options.has_key? :match
|
|
76
|
+
keys = options[:match].values
|
|
77
|
+
else
|
|
78
|
+
keys = [options[:to_value]]
|
|
79
|
+
end
|
|
80
|
+
keys
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
end
|