cranium 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +21 -0
- data/.ruby-version +1 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +29 -0
- data/Rakefile +3 -0
- data/Vagrantfile +24 -0
- data/bin/cranium +9 -0
- data/config/cucumber.yml +9 -0
- data/cranium.gemspec +26 -0
- data/db/setup.sql +8 -0
- data/docker-compose.yml +8 -0
- data/examples/config.rb +14 -0
- data/examples/deduplication.rb +27 -0
- data/examples/import_csv_with_field_lookup_inserting_new_dimension_keys.rb +26 -0
- data/examples/incremental_extract.rb +17 -0
- data/examples/lookup_with_multiple_fields.rb +25 -0
- data/features/archive.feature +49 -0
- data/features/extract/incremental_extract.feature +56 -0
- data/features/extract/simple_extract.feature +85 -0
- data/features/import/import_csv_to_database_as_delta.feature +38 -0
- data/features/import/import_csv_to_database_with_delete_insert_merging.feature +51 -0
- data/features/import/import_csv_to_database_with_truncate_insert.feature +49 -0
- data/features/import/import_csv_to_database_with_update_merging.feature +46 -0
- data/features/import/import_csv_with_always_inserting_new_dimension_keys.feature +137 -0
- data/features/import/import_csv_with_field_lookup_inserting_new_dimension_keys.feature +62 -0
- data/features/import/import_csv_with_field_lookup_transformation.feature +125 -0
- data/features/import/import_csv_with_transformation.feature +55 -0
- data/features/import/import_multiple_csv_files_without_transformations.feature +44 -0
- data/features/import/import_with_load_id_from_sequence.feature +53 -0
- data/features/import/import_with_lookup_from_multiple_fields.feature +64 -0
- data/features/read.feature +56 -0
- data/features/remove.feature +44 -0
- data/features/restore_database_connection.feature +55 -0
- data/features/step_definitions/database_table_steps.rb +40 -0
- data/features/step_definitions/definition_steps.rb +3 -0
- data/features/step_definitions/execution_steps.rb +23 -0
- data/features/step_definitions/file_steps.rb +39 -0
- data/features/support/class_extensions.rb +24 -0
- data/features/support/env.rb +27 -0
- data/features/support/randomize.rb +22 -0
- data/features/support/stop_on_first_error.rb +5 -0
- data/features/transform/deduplication.feature +37 -0
- data/features/transform/empty_transformation.feature +72 -0
- data/features/transform/join.feature +180 -0
- data/features/transform/join_multiple_files_into_one_output_file.feature +46 -0
- data/features/transform/output_rows.feature +70 -0
- data/features/transform/projection.feature +34 -0
- data/features/transform/raw_ruby_transformation.feature +69 -0
- data/features/transform/split_field.feature +39 -0
- data/lib/cranium/application.rb +104 -0
- data/lib/cranium/archiver.rb +36 -0
- data/lib/cranium/attribute_dsl.rb +43 -0
- data/lib/cranium/command_line_options.rb +27 -0
- data/lib/cranium/configuration.rb +33 -0
- data/lib/cranium/data_importer.rb +35 -0
- data/lib/cranium/data_reader.rb +48 -0
- data/lib/cranium/data_transformer.rb +126 -0
- data/lib/cranium/database.rb +36 -0
- data/lib/cranium/definition_registry.rb +21 -0
- data/lib/cranium/dimension_manager.rb +65 -0
- data/lib/cranium/dsl/database_definition.rb +23 -0
- data/lib/cranium/dsl/extract_definition.rb +28 -0
- data/lib/cranium/dsl/import_definition.rb +50 -0
- data/lib/cranium/dsl/source_definition.rb +67 -0
- data/lib/cranium/dsl.rb +100 -0
- data/lib/cranium/extensions/file.rb +7 -0
- data/lib/cranium/extensions/sequel_greenplum.rb +30 -0
- data/lib/cranium/external_table.rb +75 -0
- data/lib/cranium/extract/data_extractor.rb +11 -0
- data/lib/cranium/extract/storage.rb +57 -0
- data/lib/cranium/extract/strategy/base.rb +27 -0
- data/lib/cranium/extract/strategy/incremental.rb +16 -0
- data/lib/cranium/extract/strategy/simple.rb +9 -0
- data/lib/cranium/extract/strategy.rb +7 -0
- data/lib/cranium/extract.rb +7 -0
- data/lib/cranium/import_strategy/base.rb +55 -0
- data/lib/cranium/import_strategy/delete_insert.rb +40 -0
- data/lib/cranium/import_strategy/delta.rb +8 -0
- data/lib/cranium/import_strategy/merge.rb +50 -0
- data/lib/cranium/import_strategy/truncate_insert.rb +19 -0
- data/lib/cranium/import_strategy.rb +9 -0
- data/lib/cranium/logging.rb +15 -0
- data/lib/cranium/profiling.rb +13 -0
- data/lib/cranium/progress_output.rb +37 -0
- data/lib/cranium/sequel/hash.rb +32 -0
- data/lib/cranium/sequel.rb +5 -0
- data/lib/cranium/source_registry.rb +21 -0
- data/lib/cranium/test_framework/cucumber_table.rb +140 -0
- data/lib/cranium/test_framework/database_entity.rb +29 -0
- data/lib/cranium/test_framework/database_sequence.rb +16 -0
- data/lib/cranium/test_framework/database_table.rb +33 -0
- data/lib/cranium/test_framework/upload_directory.rb +39 -0
- data/lib/cranium/test_framework/world.rb +66 -0
- data/lib/cranium/test_framework.rb +10 -0
- data/lib/cranium/transformation/duplication_index.rb +42 -0
- data/lib/cranium/transformation/index.rb +83 -0
- data/lib/cranium/transformation/join.rb +141 -0
- data/lib/cranium/transformation/sequence.rb +42 -0
- data/lib/cranium/transformation.rb +8 -0
- data/lib/cranium/transformation_record.rb +45 -0
- data/lib/cranium.rb +57 -0
- data/rake/test.rake +31 -0
- data/spec/cranium/application_spec.rb +166 -0
- data/spec/cranium/archiver_spec.rb +44 -0
- data/spec/cranium/command_line_options_spec.rb +32 -0
- data/spec/cranium/configuration_spec.rb +31 -0
- data/spec/cranium/data_importer_spec.rb +55 -0
- data/spec/cranium/data_transformer_spec.rb +16 -0
- data/spec/cranium/database_spec.rb +69 -0
- data/spec/cranium/definition_registry_spec.rb +45 -0
- data/spec/cranium/dimension_manager_spec.rb +63 -0
- data/spec/cranium/dsl/database_definition_spec.rb +23 -0
- data/spec/cranium/dsl/extract_definition_spec.rb +76 -0
- data/spec/cranium/dsl/import_definition_spec.rb +153 -0
- data/spec/cranium/dsl/source_definition_spec.rb +84 -0
- data/spec/cranium/dsl_spec.rb +119 -0
- data/spec/cranium/external_table_spec.rb +71 -0
- data/spec/cranium/extract/storage_spec.rb +125 -0
- data/spec/cranium/logging_spec.rb +37 -0
- data/spec/cranium/sequel/hash_spec.rb +56 -0
- data/spec/cranium/source_registry_spec.rb +31 -0
- data/spec/cranium/test_framework/cucumber_table_spec.rb +144 -0
- data/spec/cranium/transformation/duplication_index_spec.rb +75 -0
- data/spec/cranium/transformation/index_spec.rb +178 -0
- data/spec/cranium/transformation/join_spec.rb +43 -0
- data/spec/cranium/transformation/sequence_spec.rb +83 -0
- data/spec/cranium/transformation_record_spec.rb +78 -0
- data/spec/cranium_spec.rb +53 -0
- data/spec/spec_helper.rb +1 -0
- metadata +362 -0
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
require_relative '../spec_helper'
|
|
2
|
+
require 'ostruct'
|
|
3
|
+
|
|
4
|
+
describe Cranium::ExternalTable do
|
|
5
|
+
|
|
6
|
+
let(:connection) { double "Greenplum connection" }
|
|
7
|
+
let(:source) do
|
|
8
|
+
Cranium::DSL::SourceDefinition.new(:products).tap do |source|
|
|
9
|
+
source.file "test_products.csv"
|
|
10
|
+
source.field :text_field, String
|
|
11
|
+
source.field :integer_field, Integer
|
|
12
|
+
source.field :numeric_field, Float
|
|
13
|
+
source.field :date_field, Date
|
|
14
|
+
source.field :timestamp_field, Time
|
|
15
|
+
source.field :boolean_field1, TrueClass
|
|
16
|
+
source.field :boolean_field2, FalseClass
|
|
17
|
+
source.delimiter ';'
|
|
18
|
+
source.quote '"'
|
|
19
|
+
source.escape "'"
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
let(:external_table) { Cranium::ExternalTable.new source, connection }
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
describe "#create" do
|
|
26
|
+
it "should create an external table from the specified source" do
|
|
27
|
+
allow(Cranium).to receive_messages configuration: OpenStruct.new(
|
|
28
|
+
gpfdist_url: "gpfdist-url",
|
|
29
|
+
gpfdist_home_directory: "/gpfdist-home",
|
|
30
|
+
upload_directory: "upload-dir"
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
allow(source).to receive_messages files: %w(test_products_a.csv test_products_b.csv)
|
|
34
|
+
|
|
35
|
+
expect(connection).to receive(:run).with(<<-sql
|
|
36
|
+
CREATE EXTERNAL TABLE "external_products" (
|
|
37
|
+
"text_field" TEXT,
|
|
38
|
+
"integer_field" INTEGER,
|
|
39
|
+
"numeric_field" NUMERIC,
|
|
40
|
+
"date_field" DATE,
|
|
41
|
+
"timestamp_field" TIMESTAMP WITHOUT TIME ZONE,
|
|
42
|
+
"boolean_field1" BOOLEAN,
|
|
43
|
+
"boolean_field2" BOOLEAN
|
|
44
|
+
)
|
|
45
|
+
LOCATION ('gpfdist://gpfdist-url/upload-dir/test_products_a.csv', 'gpfdist://gpfdist-url/upload-dir/test_products_b.csv')
|
|
46
|
+
FORMAT 'CSV' (DELIMITER ';' ESCAPE '''' QUOTE '"' HEADER)
|
|
47
|
+
ENCODING 'UTF8'
|
|
48
|
+
sql
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
external_table.create
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
describe "#destroy" do
|
|
57
|
+
it "should drop the external table" do
|
|
58
|
+
expect(connection).to receive(:run).with(%Q[DROP EXTERNAL TABLE "external_products"])
|
|
59
|
+
|
|
60
|
+
external_table.destroy
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
describe "#name" do
|
|
66
|
+
it "should return the name of the external table based on the source's name" do
|
|
67
|
+
expect(external_table.name).to eq(:external_products)
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
end
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
require_relative '../../spec_helper'
|
|
2
|
+
|
|
3
|
+
describe Cranium::Extract::Storage do
|
|
4
|
+
|
|
5
|
+
let(:storage) { Cranium::Extract::Storage.new :extract_name }
|
|
6
|
+
let(:storage_dir) { "/storage/directory/.cranium" }
|
|
7
|
+
let(:storage_file) { "#{storage_dir}/extracts" }
|
|
8
|
+
|
|
9
|
+
before do
|
|
10
|
+
allow(Cranium).to receive(:configuration).and_return(Cranium::Configuration.new.tap { |config| config.storage_directory = storage_dir })
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
describe "#last_value_of" do
|
|
14
|
+
context "when storage file doesn't exist" do
|
|
15
|
+
it "should return nil if no storage file was created yet" do
|
|
16
|
+
allow(File).to receive(:exists?).with(storage_file).and_return(false)
|
|
17
|
+
expect(storage.last_value_of(:field)).to eq(nil)
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
context "when storage file already exists" do
|
|
22
|
+
before { allow(File).to receive(:exists?).with(storage_file).and_return(true) }
|
|
23
|
+
|
|
24
|
+
it "should return nil if no value was saved for this extract yet" do
|
|
25
|
+
allow(File).to receive(:read).with(storage_file).and_return(YAML.dump(other_extract_name: {last_values: {}}))
|
|
26
|
+
expect(storage.last_value_of(:field)).to eq(nil)
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
it "should return nil if no value was saved for the field" do
|
|
30
|
+
allow(File).to receive(:read).with(storage_file).and_return(YAML.dump(extract_name: {last_values: {}}))
|
|
31
|
+
expect(storage.last_value_of(:field)).to eq(nil)
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
it "should return the last saved value of the specified field" do
|
|
35
|
+
allow(File).to receive(:read).with(storage_file).and_return(YAML.dump(extract_name: {last_values: {field: 15}}))
|
|
36
|
+
expect(storage.last_value_of(:field)).to eq(15)
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
describe "#save_last_value_of" do
|
|
43
|
+
context "when storage file doesn't exist" do
|
|
44
|
+
before { allow(File).to receive(:exists?).with(storage_file).and_return(false) }
|
|
45
|
+
|
|
46
|
+
it "should create the storage file and save the specified value if the storage directory already exists" do
|
|
47
|
+
allow(Dir).to receive(:exists?).with(storage_dir).and_return(true)
|
|
48
|
+
|
|
49
|
+
expect(File).to receive(:write).with(storage_file, YAML.dump(extract_name: {last_values: {field: 15}}))
|
|
50
|
+
|
|
51
|
+
storage.save_last_value_of(:field, 15)
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
it "should create the storage directory if it doesn't exist yet" do
|
|
55
|
+
allow(Dir).to receive(:exists?).with(storage_dir).and_return(false)
|
|
56
|
+
allow(File).to receive :write
|
|
57
|
+
|
|
58
|
+
expect(FileUtils).to receive(:mkdir_p).with(storage_dir)
|
|
59
|
+
|
|
60
|
+
storage.save_last_value_of(:field, 15)
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
context "when there are previously saved values" do
|
|
65
|
+
before do
|
|
66
|
+
allow(Dir).to receive(:exists?).with(storage_dir).and_return(true)
|
|
67
|
+
allow(File).to receive(:exists?).with(storage_file).and_return(true)
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
it "should overwrite the specified field's value and preserve all others" do
|
|
71
|
+
allow(File).to receive(:read).with(storage_file).and_return(YAML.dump({
|
|
72
|
+
extract_name: {
|
|
73
|
+
last_values: {
|
|
74
|
+
field1: 1,
|
|
75
|
+
field2: 2,
|
|
76
|
+
field3: 3
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
}))
|
|
80
|
+
|
|
81
|
+
expect(File).to receive(:write).with(storage_file, YAML.dump({
|
|
82
|
+
extract_name: {
|
|
83
|
+
last_values: {
|
|
84
|
+
field1: 1,
|
|
85
|
+
field2: 5,
|
|
86
|
+
field3: 3
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
}))
|
|
90
|
+
|
|
91
|
+
storage.save_last_value_of(:field2, 5)
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
it "should create the new entry if it doesn't exist yet" do
|
|
95
|
+
allow(File).to receive(:read).with(storage_file).and_return(YAML.dump({
|
|
96
|
+
other_extract_name: {
|
|
97
|
+
last_values: {
|
|
98
|
+
field1: 1,
|
|
99
|
+
field2: 2,
|
|
100
|
+
field3: 3
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
}))
|
|
104
|
+
|
|
105
|
+
expect(File).to receive(:write).with(storage_file, YAML.dump({
|
|
106
|
+
other_extract_name: {
|
|
107
|
+
last_values: {
|
|
108
|
+
field1: 1,
|
|
109
|
+
field2: 2,
|
|
110
|
+
field3: 3
|
|
111
|
+
}
|
|
112
|
+
},
|
|
113
|
+
extract_name: {
|
|
114
|
+
last_values: {
|
|
115
|
+
field2: 5
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
}))
|
|
119
|
+
|
|
120
|
+
storage.save_last_value_of(:field2, 5)
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
end
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
require_relative '../spec_helper'
|
|
2
|
+
|
|
3
|
+
describe Cranium::Logging do
|
|
4
|
+
|
|
5
|
+
let(:logging_object) { Object.new.tap { |object| object.extend Cranium::Logging } }
|
|
6
|
+
let(:loggers) { [double("Logger 1"), double("Logger 2")] }
|
|
7
|
+
|
|
8
|
+
before(:each) do
|
|
9
|
+
allow(Cranium).to receive_message_chain(:configuration, :loggers).and_return loggers
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def all_loggers_should_receive(level, message)
|
|
15
|
+
loggers.each { |logger| expect(logger).to receive(level).with(message) }
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
describe "#record_metric" do
|
|
21
|
+
it "should record an arbitrary metric in every registered logger" do
|
|
22
|
+
all_loggers_should_receive :info, "[metrics/products] 1234"
|
|
23
|
+
|
|
24
|
+
logging_object.record_metric "products", 1234
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
describe "#log" do
|
|
30
|
+
it "should log a message with the specified reporting level in every registered logger" do
|
|
31
|
+
all_loggers_should_receive :error, "error message"
|
|
32
|
+
|
|
33
|
+
logging_object.log :error, "error message"
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
end
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
require_relative '../../spec_helper'
|
|
2
|
+
|
|
3
|
+
describe Cranium::Sequel::Hash do
|
|
4
|
+
|
|
5
|
+
let(:source_hash) { { :field1 => :field2, :field3 => :field4 } }
|
|
6
|
+
let(:sequel_hash) { Cranium::Sequel::Hash[source_hash] }
|
|
7
|
+
|
|
8
|
+
before(:each) do
|
|
9
|
+
allow(Sequel).to receive(:qualify) { |qualifier, field| :"#{qualifier}_#{field}" }
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
it "should be a Hash" do
|
|
14
|
+
expect(Cranium::Sequel::Hash.new).to be_a Hash
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
describe "#qualify" do
|
|
19
|
+
context "when called with 'keys_with'" do
|
|
20
|
+
it "should qualify only the key fields of the hash" do
|
|
21
|
+
expect(sequel_hash.qualify(keys_with: :table1)).to eq({ :table1_field1 => :field2, :table1_field3 => :field4 })
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
context "when called with 'values_with'" do
|
|
26
|
+
it "should qualify only the value fields of the hash" do
|
|
27
|
+
expect(sequel_hash.qualify(values_with: :table1)).to eq({ :field1 => :table1_field2, :field3 => :table1_field4 })
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
context "when called with both 'keys_with' and 'values_with'" do
|
|
32
|
+
it "should qualify both keys and value fields of the hash" do
|
|
33
|
+
expect(sequel_hash.qualify(keys_with: :table1, values_with: :table2)).to eq({ :table1_field1 => :table2_field2, :table1_field3 => :table2_field4 })
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
it "should raise an error if called with unsupported options" do
|
|
38
|
+
expect { sequel_hash.qualify key_with: :table }.to raise_error ArgumentError, "Unsupported option for qualify: key_with"
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
describe "#qualified_keys" do
|
|
44
|
+
it "should return an array with the hash's keys qualified with the specified qualifier" do
|
|
45
|
+
expect(sequel_hash.qualified_keys(:table)).to eq([:table_field1, :table_field3])
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
describe "#qualified_values" do
|
|
51
|
+
it "should return an array with the hash's values qualified with the specified qualifier" do
|
|
52
|
+
expect(sequel_hash.qualified_values(:table)).to eq([:table_field2, :table_field4])
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
end
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
require_relative '../spec_helper'
|
|
2
|
+
|
|
3
|
+
describe Cranium::SourceRegistry do
|
|
4
|
+
|
|
5
|
+
let(:registry) { Cranium::SourceRegistry.new }
|
|
6
|
+
|
|
7
|
+
describe "#[]" do
|
|
8
|
+
it "should raise an error if a source with the specified name wasn't registered yet" do
|
|
9
|
+
expect { registry[:name] }.to raise_error "Undefined source 'name'"
|
|
10
|
+
end
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
describe "#register_source" do
|
|
15
|
+
it "should register a new source and configure it through the block passed" do
|
|
16
|
+
source = Cranium::DSL::SourceDefinition.new :test_source
|
|
17
|
+
source.field :test_field, String
|
|
18
|
+
|
|
19
|
+
registry.register_source :test_source do
|
|
20
|
+
field :test_field, String
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
expect(registry[:test_source]).to eq(source)
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
it "should return the newly registered source" do
|
|
27
|
+
expect(registry.register_source(:test_source) {}).to be_a Cranium::DSL::SourceDefinition
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
end
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
require_relative "../../spec_helper"
|
|
2
|
+
require 'cucumber/ast/table'
|
|
3
|
+
require 'date'
|
|
4
|
+
|
|
5
|
+
module Cranium::TestFramework
|
|
6
|
+
describe CucumberTable do
|
|
7
|
+
|
|
8
|
+
context "class method" do
|
|
9
|
+
describe ".from_cucumber_table" do
|
|
10
|
+
|
|
11
|
+
let(:table) { CucumberTable.from_ast_table(Cucumber::Ast::Table.new(@table_data)) }
|
|
12
|
+
|
|
13
|
+
it "should return a CucumberTable" do
|
|
14
|
+
@table_data = [{ "column" => "value" }]
|
|
15
|
+
|
|
16
|
+
expect(table).to be_a CucumberTable
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
it "should convert header values to symbols" do
|
|
21
|
+
@table_data = [{ "column1" => "value1", "column2" => "value2" }]
|
|
22
|
+
|
|
23
|
+
expect(table.fields).to eq([:column1, :column2])
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
it "should discard comment columns" do
|
|
28
|
+
@table_data = [{ "column" => "value1", "#comment column" => "value2" }]
|
|
29
|
+
expect(CucumberTable).to receive(:new).with([{ column: "value1" }], { column: :string })
|
|
30
|
+
|
|
31
|
+
table
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
it "should discard type specifiers in column names" do
|
|
36
|
+
@table_data = [{
|
|
37
|
+
"integer_column (i)" => "one",
|
|
38
|
+
"string_column (s)" => "two",
|
|
39
|
+
"numeric_column (n)" => "five",
|
|
40
|
+
"some_column" => "else"
|
|
41
|
+
}]
|
|
42
|
+
|
|
43
|
+
expect(table.fields).to match_array([:integer_column, :string_column, :numeric_column, :some_column])
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
it "should raise an exception if invalid type is specified" do
|
|
48
|
+
@table_data = [{ "column (x)" => "value" }]
|
|
49
|
+
|
|
50
|
+
expect { table.fields }.to raise_error StandardError, "Invalid type specified: x"
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
it "should instantiate the new table with the correct column types" do
|
|
55
|
+
@table_data = [{
|
|
56
|
+
"integer_column (i)" => "one",
|
|
57
|
+
"string_column (s)" => "two",
|
|
58
|
+
"numeric_column (n)" => "five",
|
|
59
|
+
"some_column" => "else"
|
|
60
|
+
}]
|
|
61
|
+
expect(CucumberTable).to receive(:new).with(
|
|
62
|
+
[{
|
|
63
|
+
integer_column: "one",
|
|
64
|
+
string_column: "two",
|
|
65
|
+
numeric_column: "five",
|
|
66
|
+
some_column: "else"
|
|
67
|
+
}],
|
|
68
|
+
{
|
|
69
|
+
integer_column: :integer,
|
|
70
|
+
string_column: :string,
|
|
71
|
+
numeric_column: :numeric,
|
|
72
|
+
some_column: :string
|
|
73
|
+
}
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
table
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
context "instance methods" do
|
|
83
|
+
let(:data) { [{ "one" => "two", "three" => "four" }, { "five" => "six" }] }
|
|
84
|
+
|
|
85
|
+
describe "#fields" do
|
|
86
|
+
it "should return the keys of the first row" do
|
|
87
|
+
expect(CucumberTable.new(data).fields).to eq(%w[one three])
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
describe "#with_patterns" do
|
|
93
|
+
it "should set replacement patterns and return the object" do
|
|
94
|
+
table = CucumberTable.new(data)
|
|
95
|
+
table_with_patterns = table.with_patterns({ "a" => "b" })
|
|
96
|
+
|
|
97
|
+
expect(table_with_patterns).to be_equal table
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
describe "#data" do
|
|
103
|
+
it "should return all data as an array of hashes" do
|
|
104
|
+
expect(CucumberTable.new(data).data).to eq(data)
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
it "should make all substitutions set up as replacement patterns" do
|
|
109
|
+
table = CucumberTable.new [{ first: "NULL", second: "apple", third: "something else entirely" }]
|
|
110
|
+
table.with_patterns(
|
|
111
|
+
"NULL" => nil,
|
|
112
|
+
"apple" => lambda { "pear" }
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
expect(table.data).to eq([first: nil, second: "pear", third: "something else entirely"])
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
it "should evaluate integer fields" do
|
|
120
|
+
table = CucumberTable.new([{ integer_column: "20" }], { integer_column: :integer })
|
|
121
|
+
expect(table.data).to eq([{ integer_column: 20 }])
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
describe "#columns" do
|
|
126
|
+
it "should return an array of empty arrays if there are no data rows" do
|
|
127
|
+
table = CucumberTable.new [], { argument: :string }
|
|
128
|
+
|
|
129
|
+
expect(table.data.columns).to eq([[]])
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
it "should return the data in columns as an array of arrays, discarding all header information" do
|
|
134
|
+
table = CucumberTable.new [{ header1: "value1", header2: "value2" }, { header1: "value3", header2: "value4" }]
|
|
135
|
+
|
|
136
|
+
expect(table.data.columns).to eq([%w[value1 value3], %w[value2 value4]])
|
|
137
|
+
end
|
|
138
|
+
end
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
end
|
|
144
|
+
end
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
require_relative '../../spec_helper'
|
|
2
|
+
|
|
3
|
+
describe Cranium::Transformation::DuplicationIndex do
|
|
4
|
+
|
|
5
|
+
let(:index) { Cranium::Transformation::DuplicationIndex.new :field1, :field2 }
|
|
6
|
+
let(:record) { Cranium::TransformationRecord.new [:field1, :field2, :field3], [:field1, :field2, :field3] }
|
|
7
|
+
|
|
8
|
+
describe ".[]" do
|
|
9
|
+
before(:each) { Cranium::Transformation::DuplicationIndex.instance_variable_set :@instances, nil }
|
|
10
|
+
|
|
11
|
+
it "should return a DuplicationIndex instance for the specified fields" do
|
|
12
|
+
allow(Cranium::Transformation::DuplicationIndex).to receive(:new).with(:field1, :field2).and_return(index)
|
|
13
|
+
|
|
14
|
+
expect(Cranium::Transformation::DuplicationIndex[:field1, :field2]).to eq index
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
it "should memoize the previously created instances" do
|
|
18
|
+
expect(Cranium::Transformation::DuplicationIndex[:field1, :field2]).to eq(Cranium::Transformation::DuplicationIndex[:field1, :field2])
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
it "should raise an error if empty fieldset was passed" do
|
|
22
|
+
expect { Cranium::Transformation::DuplicationIndex[] }.to raise_error ArgumentError, "Cannot build duplication index for empty fieldset"
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
describe "#duplicate?" do
|
|
28
|
+
it "should return false for the first entry" do
|
|
29
|
+
record.input_data = ["one", "two", "three"]
|
|
30
|
+
expect(index.duplicate?(record)).to be_falsey
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
it "should return true the second time it's called for the same record" do
|
|
34
|
+
record.input_data = ["one", "two", "three"]
|
|
35
|
+
index.duplicate?(record)
|
|
36
|
+
expect(index.duplicate?(record)).to be_truthy
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
it "should only use the specified fieldset for duplication detection" do
|
|
40
|
+
record1 = record
|
|
41
|
+
record2 = record.clone
|
|
42
|
+
index = Cranium::Transformation::DuplicationIndex.new :field1
|
|
43
|
+
|
|
44
|
+
record1.input_data = ["one", "two", "three"]
|
|
45
|
+
index.duplicate? record1
|
|
46
|
+
|
|
47
|
+
record2.input_data = ["one", "four", "five"]
|
|
48
|
+
expect(index.duplicate?(record2)).to be_truthy
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
it "should handle multiple fields for detection" do
|
|
52
|
+
record1 = record
|
|
53
|
+
record2 = record.clone
|
|
54
|
+
record3 = record.clone
|
|
55
|
+
index = Cranium::Transformation::DuplicationIndex.new :field1, :field2
|
|
56
|
+
|
|
57
|
+
record1.input_data = ["one", "two", "three"]
|
|
58
|
+
index.duplicate? record1
|
|
59
|
+
|
|
60
|
+
record2.input_data = ["one", "four", "five"]
|
|
61
|
+
expect(index.duplicate?(record2)).to be_falsey
|
|
62
|
+
|
|
63
|
+
record3.input_data = ["one", "two", "five"]
|
|
64
|
+
expect(index.duplicate?(record3)).to be_truthy
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
it "should raise an error if record fieldset doesn't contain index fieldset" do
|
|
68
|
+
record.input_data = ["one", "two", "three"]
|
|
69
|
+
index = Cranium::Transformation::DuplicationIndex.new :field5
|
|
70
|
+
|
|
71
|
+
expect { index.duplicate? record }.to raise_error StandardError, "Missing deduplication key from record: field5"
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
end
|