cranium 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +21 -0
- data/.ruby-version +1 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +29 -0
- data/Rakefile +3 -0
- data/Vagrantfile +24 -0
- data/bin/cranium +9 -0
- data/config/cucumber.yml +9 -0
- data/cranium.gemspec +26 -0
- data/db/setup.sql +8 -0
- data/docker-compose.yml +8 -0
- data/examples/config.rb +14 -0
- data/examples/deduplication.rb +27 -0
- data/examples/import_csv_with_field_lookup_inserting_new_dimension_keys.rb +26 -0
- data/examples/incremental_extract.rb +17 -0
- data/examples/lookup_with_multiple_fields.rb +25 -0
- data/features/archive.feature +49 -0
- data/features/extract/incremental_extract.feature +56 -0
- data/features/extract/simple_extract.feature +85 -0
- data/features/import/import_csv_to_database_as_delta.feature +38 -0
- data/features/import/import_csv_to_database_with_delete_insert_merging.feature +51 -0
- data/features/import/import_csv_to_database_with_truncate_insert.feature +49 -0
- data/features/import/import_csv_to_database_with_update_merging.feature +46 -0
- data/features/import/import_csv_with_always_inserting_new_dimension_keys.feature +137 -0
- data/features/import/import_csv_with_field_lookup_inserting_new_dimension_keys.feature +62 -0
- data/features/import/import_csv_with_field_lookup_transformation.feature +125 -0
- data/features/import/import_csv_with_transformation.feature +55 -0
- data/features/import/import_multiple_csv_files_without_transformations.feature +44 -0
- data/features/import/import_with_load_id_from_sequence.feature +53 -0
- data/features/import/import_with_lookup_from_multiple_fields.feature +64 -0
- data/features/read.feature +56 -0
- data/features/remove.feature +44 -0
- data/features/restore_database_connection.feature +55 -0
- data/features/step_definitions/database_table_steps.rb +40 -0
- data/features/step_definitions/definition_steps.rb +3 -0
- data/features/step_definitions/execution_steps.rb +23 -0
- data/features/step_definitions/file_steps.rb +39 -0
- data/features/support/class_extensions.rb +24 -0
- data/features/support/env.rb +27 -0
- data/features/support/randomize.rb +22 -0
- data/features/support/stop_on_first_error.rb +5 -0
- data/features/transform/deduplication.feature +37 -0
- data/features/transform/empty_transformation.feature +72 -0
- data/features/transform/join.feature +180 -0
- data/features/transform/join_multiple_files_into_one_output_file.feature +46 -0
- data/features/transform/output_rows.feature +70 -0
- data/features/transform/projection.feature +34 -0
- data/features/transform/raw_ruby_transformation.feature +69 -0
- data/features/transform/split_field.feature +39 -0
- data/lib/cranium/application.rb +104 -0
- data/lib/cranium/archiver.rb +36 -0
- data/lib/cranium/attribute_dsl.rb +43 -0
- data/lib/cranium/command_line_options.rb +27 -0
- data/lib/cranium/configuration.rb +33 -0
- data/lib/cranium/data_importer.rb +35 -0
- data/lib/cranium/data_reader.rb +48 -0
- data/lib/cranium/data_transformer.rb +126 -0
- data/lib/cranium/database.rb +36 -0
- data/lib/cranium/definition_registry.rb +21 -0
- data/lib/cranium/dimension_manager.rb +65 -0
- data/lib/cranium/dsl/database_definition.rb +23 -0
- data/lib/cranium/dsl/extract_definition.rb +28 -0
- data/lib/cranium/dsl/import_definition.rb +50 -0
- data/lib/cranium/dsl/source_definition.rb +67 -0
- data/lib/cranium/dsl.rb +100 -0
- data/lib/cranium/extensions/file.rb +7 -0
- data/lib/cranium/extensions/sequel_greenplum.rb +30 -0
- data/lib/cranium/external_table.rb +75 -0
- data/lib/cranium/extract/data_extractor.rb +11 -0
- data/lib/cranium/extract/storage.rb +57 -0
- data/lib/cranium/extract/strategy/base.rb +27 -0
- data/lib/cranium/extract/strategy/incremental.rb +16 -0
- data/lib/cranium/extract/strategy/simple.rb +9 -0
- data/lib/cranium/extract/strategy.rb +7 -0
- data/lib/cranium/extract.rb +7 -0
- data/lib/cranium/import_strategy/base.rb +55 -0
- data/lib/cranium/import_strategy/delete_insert.rb +40 -0
- data/lib/cranium/import_strategy/delta.rb +8 -0
- data/lib/cranium/import_strategy/merge.rb +50 -0
- data/lib/cranium/import_strategy/truncate_insert.rb +19 -0
- data/lib/cranium/import_strategy.rb +9 -0
- data/lib/cranium/logging.rb +15 -0
- data/lib/cranium/profiling.rb +13 -0
- data/lib/cranium/progress_output.rb +37 -0
- data/lib/cranium/sequel/hash.rb +32 -0
- data/lib/cranium/sequel.rb +5 -0
- data/lib/cranium/source_registry.rb +21 -0
- data/lib/cranium/test_framework/cucumber_table.rb +140 -0
- data/lib/cranium/test_framework/database_entity.rb +29 -0
- data/lib/cranium/test_framework/database_sequence.rb +16 -0
- data/lib/cranium/test_framework/database_table.rb +33 -0
- data/lib/cranium/test_framework/upload_directory.rb +39 -0
- data/lib/cranium/test_framework/world.rb +66 -0
- data/lib/cranium/test_framework.rb +10 -0
- data/lib/cranium/transformation/duplication_index.rb +42 -0
- data/lib/cranium/transformation/index.rb +83 -0
- data/lib/cranium/transformation/join.rb +141 -0
- data/lib/cranium/transformation/sequence.rb +42 -0
- data/lib/cranium/transformation.rb +8 -0
- data/lib/cranium/transformation_record.rb +45 -0
- data/lib/cranium.rb +57 -0
- data/rake/test.rake +31 -0
- data/spec/cranium/application_spec.rb +166 -0
- data/spec/cranium/archiver_spec.rb +44 -0
- data/spec/cranium/command_line_options_spec.rb +32 -0
- data/spec/cranium/configuration_spec.rb +31 -0
- data/spec/cranium/data_importer_spec.rb +55 -0
- data/spec/cranium/data_transformer_spec.rb +16 -0
- data/spec/cranium/database_spec.rb +69 -0
- data/spec/cranium/definition_registry_spec.rb +45 -0
- data/spec/cranium/dimension_manager_spec.rb +63 -0
- data/spec/cranium/dsl/database_definition_spec.rb +23 -0
- data/spec/cranium/dsl/extract_definition_spec.rb +76 -0
- data/spec/cranium/dsl/import_definition_spec.rb +153 -0
- data/spec/cranium/dsl/source_definition_spec.rb +84 -0
- data/spec/cranium/dsl_spec.rb +119 -0
- data/spec/cranium/external_table_spec.rb +71 -0
- data/spec/cranium/extract/storage_spec.rb +125 -0
- data/spec/cranium/logging_spec.rb +37 -0
- data/spec/cranium/sequel/hash_spec.rb +56 -0
- data/spec/cranium/source_registry_spec.rb +31 -0
- data/spec/cranium/test_framework/cucumber_table_spec.rb +144 -0
- data/spec/cranium/transformation/duplication_index_spec.rb +75 -0
- data/spec/cranium/transformation/index_spec.rb +178 -0
- data/spec/cranium/transformation/join_spec.rb +43 -0
- data/spec/cranium/transformation/sequence_spec.rb +83 -0
- data/spec/cranium/transformation_record_spec.rb +78 -0
- data/spec/cranium_spec.rb +53 -0
- data/spec/spec_helper.rb +1 -0
- metadata +362 -0
@@ -0,0 +1,71 @@
|
|
1
|
+
require_relative '../spec_helper'
|
2
|
+
require 'ostruct'
|
3
|
+
|
4
|
+
describe Cranium::ExternalTable do
|
5
|
+
|
6
|
+
let(:connection) { double "Greenplum connection" }
|
7
|
+
let(:source) do
|
8
|
+
Cranium::DSL::SourceDefinition.new(:products).tap do |source|
|
9
|
+
source.file "test_products.csv"
|
10
|
+
source.field :text_field, String
|
11
|
+
source.field :integer_field, Integer
|
12
|
+
source.field :numeric_field, Float
|
13
|
+
source.field :date_field, Date
|
14
|
+
source.field :timestamp_field, Time
|
15
|
+
source.field :boolean_field1, TrueClass
|
16
|
+
source.field :boolean_field2, FalseClass
|
17
|
+
source.delimiter ';'
|
18
|
+
source.quote '"'
|
19
|
+
source.escape "'"
|
20
|
+
end
|
21
|
+
end
|
22
|
+
let(:external_table) { Cranium::ExternalTable.new source, connection }
|
23
|
+
|
24
|
+
|
25
|
+
describe "#create" do
|
26
|
+
it "should create an external table from the specified source" do
|
27
|
+
allow(Cranium).to receive_messages configuration: OpenStruct.new(
|
28
|
+
gpfdist_url: "gpfdist-url",
|
29
|
+
gpfdist_home_directory: "/gpfdist-home",
|
30
|
+
upload_directory: "upload-dir"
|
31
|
+
)
|
32
|
+
|
33
|
+
allow(source).to receive_messages files: %w(test_products_a.csv test_products_b.csv)
|
34
|
+
|
35
|
+
expect(connection).to receive(:run).with(<<-sql
|
36
|
+
CREATE EXTERNAL TABLE "external_products" (
|
37
|
+
"text_field" TEXT,
|
38
|
+
"integer_field" INTEGER,
|
39
|
+
"numeric_field" NUMERIC,
|
40
|
+
"date_field" DATE,
|
41
|
+
"timestamp_field" TIMESTAMP WITHOUT TIME ZONE,
|
42
|
+
"boolean_field1" BOOLEAN,
|
43
|
+
"boolean_field2" BOOLEAN
|
44
|
+
)
|
45
|
+
LOCATION ('gpfdist://gpfdist-url/upload-dir/test_products_a.csv', 'gpfdist://gpfdist-url/upload-dir/test_products_b.csv')
|
46
|
+
FORMAT 'CSV' (DELIMITER ';' ESCAPE '''' QUOTE '"' HEADER)
|
47
|
+
ENCODING 'UTF8'
|
48
|
+
sql
|
49
|
+
)
|
50
|
+
|
51
|
+
external_table.create
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
|
56
|
+
describe "#destroy" do
|
57
|
+
it "should drop the external table" do
|
58
|
+
expect(connection).to receive(:run).with(%Q[DROP EXTERNAL TABLE "external_products"])
|
59
|
+
|
60
|
+
external_table.destroy
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
|
65
|
+
describe "#name" do
|
66
|
+
it "should return the name of the external table based on the source's name" do
|
67
|
+
expect(external_table.name).to eq(:external_products)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
end
|
@@ -0,0 +1,125 @@
|
|
1
|
+
require_relative '../../spec_helper'
|
2
|
+
|
3
|
+
describe Cranium::Extract::Storage do
|
4
|
+
|
5
|
+
let(:storage) { Cranium::Extract::Storage.new :extract_name }
|
6
|
+
let(:storage_dir) { "/storage/directory/.cranium" }
|
7
|
+
let(:storage_file) { "#{storage_dir}/extracts" }
|
8
|
+
|
9
|
+
before do
|
10
|
+
allow(Cranium).to receive(:configuration).and_return(Cranium::Configuration.new.tap { |config| config.storage_directory = storage_dir })
|
11
|
+
end
|
12
|
+
|
13
|
+
describe "#last_value_of" do
|
14
|
+
context "when storage file doesn't exist" do
|
15
|
+
it "should return nil if no storage file was created yet" do
|
16
|
+
allow(File).to receive(:exists?).with(storage_file).and_return(false)
|
17
|
+
expect(storage.last_value_of(:field)).to eq(nil)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
context "when storage file already exists" do
|
22
|
+
before { allow(File).to receive(:exists?).with(storage_file).and_return(true) }
|
23
|
+
|
24
|
+
it "should return nil if no value was saved for this extract yet" do
|
25
|
+
allow(File).to receive(:read).with(storage_file).and_return(YAML.dump(other_extract_name: {last_values: {}}))
|
26
|
+
expect(storage.last_value_of(:field)).to eq(nil)
|
27
|
+
end
|
28
|
+
|
29
|
+
it "should return nil if no value was saved for the field" do
|
30
|
+
allow(File).to receive(:read).with(storage_file).and_return(YAML.dump(extract_name: {last_values: {}}))
|
31
|
+
expect(storage.last_value_of(:field)).to eq(nil)
|
32
|
+
end
|
33
|
+
|
34
|
+
it "should return the last saved value of the specified field" do
|
35
|
+
allow(File).to receive(:read).with(storage_file).and_return(YAML.dump(extract_name: {last_values: {field: 15}}))
|
36
|
+
expect(storage.last_value_of(:field)).to eq(15)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
|
42
|
+
describe "#save_last_value_of" do
|
43
|
+
context "when storage file doesn't exist" do
|
44
|
+
before { allow(File).to receive(:exists?).with(storage_file).and_return(false) }
|
45
|
+
|
46
|
+
it "should create the storage file and save the specified value if the storage directory already exists" do
|
47
|
+
allow(Dir).to receive(:exists?).with(storage_dir).and_return(true)
|
48
|
+
|
49
|
+
expect(File).to receive(:write).with(storage_file, YAML.dump(extract_name: {last_values: {field: 15}}))
|
50
|
+
|
51
|
+
storage.save_last_value_of(:field, 15)
|
52
|
+
end
|
53
|
+
|
54
|
+
it "should create the storage directory if it doesn't exist yet" do
|
55
|
+
allow(Dir).to receive(:exists?).with(storage_dir).and_return(false)
|
56
|
+
allow(File).to receive :write
|
57
|
+
|
58
|
+
expect(FileUtils).to receive(:mkdir_p).with(storage_dir)
|
59
|
+
|
60
|
+
storage.save_last_value_of(:field, 15)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
context "when there are previously saved values" do
|
65
|
+
before do
|
66
|
+
allow(Dir).to receive(:exists?).with(storage_dir).and_return(true)
|
67
|
+
allow(File).to receive(:exists?).with(storage_file).and_return(true)
|
68
|
+
end
|
69
|
+
|
70
|
+
it "should overwrite the specified field's value and preserve all others" do
|
71
|
+
allow(File).to receive(:read).with(storage_file).and_return(YAML.dump({
|
72
|
+
extract_name: {
|
73
|
+
last_values: {
|
74
|
+
field1: 1,
|
75
|
+
field2: 2,
|
76
|
+
field3: 3
|
77
|
+
}
|
78
|
+
}
|
79
|
+
}))
|
80
|
+
|
81
|
+
expect(File).to receive(:write).with(storage_file, YAML.dump({
|
82
|
+
extract_name: {
|
83
|
+
last_values: {
|
84
|
+
field1: 1,
|
85
|
+
field2: 5,
|
86
|
+
field3: 3
|
87
|
+
}
|
88
|
+
}
|
89
|
+
}))
|
90
|
+
|
91
|
+
storage.save_last_value_of(:field2, 5)
|
92
|
+
end
|
93
|
+
|
94
|
+
it "should create the new entry if it doesn't exist yet" do
|
95
|
+
allow(File).to receive(:read).with(storage_file).and_return(YAML.dump({
|
96
|
+
other_extract_name: {
|
97
|
+
last_values: {
|
98
|
+
field1: 1,
|
99
|
+
field2: 2,
|
100
|
+
field3: 3
|
101
|
+
}
|
102
|
+
}
|
103
|
+
}))
|
104
|
+
|
105
|
+
expect(File).to receive(:write).with(storage_file, YAML.dump({
|
106
|
+
other_extract_name: {
|
107
|
+
last_values: {
|
108
|
+
field1: 1,
|
109
|
+
field2: 2,
|
110
|
+
field3: 3
|
111
|
+
}
|
112
|
+
},
|
113
|
+
extract_name: {
|
114
|
+
last_values: {
|
115
|
+
field2: 5
|
116
|
+
}
|
117
|
+
}
|
118
|
+
}))
|
119
|
+
|
120
|
+
storage.save_last_value_of(:field2, 5)
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
require_relative '../spec_helper'
|
2
|
+
|
3
|
+
describe Cranium::Logging do
|
4
|
+
|
5
|
+
let(:logging_object) { Object.new.tap { |object| object.extend Cranium::Logging } }
|
6
|
+
let(:loggers) { [double("Logger 1"), double("Logger 2")] }
|
7
|
+
|
8
|
+
before(:each) do
|
9
|
+
allow(Cranium).to receive_message_chain(:configuration, :loggers).and_return loggers
|
10
|
+
end
|
11
|
+
|
12
|
+
|
13
|
+
|
14
|
+
def all_loggers_should_receive(level, message)
|
15
|
+
loggers.each { |logger| expect(logger).to receive(level).with(message) }
|
16
|
+
end
|
17
|
+
|
18
|
+
|
19
|
+
|
20
|
+
describe "#record_metric" do
|
21
|
+
it "should record an arbitrary metric in every registered logger" do
|
22
|
+
all_loggers_should_receive :info, "[metrics/products] 1234"
|
23
|
+
|
24
|
+
logging_object.record_metric "products", 1234
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
|
29
|
+
describe "#log" do
|
30
|
+
it "should log a message with the specified reporting level in every registered logger" do
|
31
|
+
all_loggers_should_receive :error, "error message"
|
32
|
+
|
33
|
+
logging_object.log :error, "error message"
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
require_relative '../../spec_helper'
|
2
|
+
|
3
|
+
describe Cranium::Sequel::Hash do
|
4
|
+
|
5
|
+
let(:source_hash) { { :field1 => :field2, :field3 => :field4 } }
|
6
|
+
let(:sequel_hash) { Cranium::Sequel::Hash[source_hash] }
|
7
|
+
|
8
|
+
before(:each) do
|
9
|
+
allow(Sequel).to receive(:qualify) { |qualifier, field| :"#{qualifier}_#{field}" }
|
10
|
+
end
|
11
|
+
|
12
|
+
|
13
|
+
it "should be a Hash" do
|
14
|
+
expect(Cranium::Sequel::Hash.new).to be_a Hash
|
15
|
+
end
|
16
|
+
|
17
|
+
|
18
|
+
describe "#qualify" do
|
19
|
+
context "when called with 'keys_with'" do
|
20
|
+
it "should qualify only the key fields of the hash" do
|
21
|
+
expect(sequel_hash.qualify(keys_with: :table1)).to eq({ :table1_field1 => :field2, :table1_field3 => :field4 })
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
context "when called with 'values_with'" do
|
26
|
+
it "should qualify only the value fields of the hash" do
|
27
|
+
expect(sequel_hash.qualify(values_with: :table1)).to eq({ :field1 => :table1_field2, :field3 => :table1_field4 })
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
context "when called with both 'keys_with' and 'values_with'" do
|
32
|
+
it "should qualify both keys and value fields of the hash" do
|
33
|
+
expect(sequel_hash.qualify(keys_with: :table1, values_with: :table2)).to eq({ :table1_field1 => :table2_field2, :table1_field3 => :table2_field4 })
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
it "should raise an error if called with unsupported options" do
|
38
|
+
expect { sequel_hash.qualify key_with: :table }.to raise_error ArgumentError, "Unsupported option for qualify: key_with"
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
|
43
|
+
describe "#qualified_keys" do
|
44
|
+
it "should return an array with the hash's keys qualified with the specified qualifier" do
|
45
|
+
expect(sequel_hash.qualified_keys(:table)).to eq([:table_field1, :table_field3])
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
|
50
|
+
describe "#qualified_values" do
|
51
|
+
it "should return an array with the hash's values qualified with the specified qualifier" do
|
52
|
+
expect(sequel_hash.qualified_values(:table)).to eq([:table_field2, :table_field4])
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
require_relative '../spec_helper'
|
2
|
+
|
3
|
+
describe Cranium::SourceRegistry do
|
4
|
+
|
5
|
+
let(:registry) { Cranium::SourceRegistry.new }
|
6
|
+
|
7
|
+
describe "#[]" do
|
8
|
+
it "should raise an error if a source with the specified name wasn't registered yet" do
|
9
|
+
expect { registry[:name] }.to raise_error "Undefined source 'name'"
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
|
14
|
+
describe "#register_source" do
|
15
|
+
it "should register a new source and configure it through the block passed" do
|
16
|
+
source = Cranium::DSL::SourceDefinition.new :test_source
|
17
|
+
source.field :test_field, String
|
18
|
+
|
19
|
+
registry.register_source :test_source do
|
20
|
+
field :test_field, String
|
21
|
+
end
|
22
|
+
|
23
|
+
expect(registry[:test_source]).to eq(source)
|
24
|
+
end
|
25
|
+
|
26
|
+
it "should return the newly registered source" do
|
27
|
+
expect(registry.register_source(:test_source) {}).to be_a Cranium::DSL::SourceDefinition
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
@@ -0,0 +1,144 @@
|
|
1
|
+
require_relative "../../spec_helper"
|
2
|
+
require 'cucumber/ast/table'
|
3
|
+
require 'date'
|
4
|
+
|
5
|
+
module Cranium::TestFramework
|
6
|
+
describe CucumberTable do
|
7
|
+
|
8
|
+
context "class method" do
|
9
|
+
describe ".from_cucumber_table" do
|
10
|
+
|
11
|
+
let(:table) { CucumberTable.from_ast_table(Cucumber::Ast::Table.new(@table_data)) }
|
12
|
+
|
13
|
+
it "should return a CucumberTable" do
|
14
|
+
@table_data = [{ "column" => "value" }]
|
15
|
+
|
16
|
+
expect(table).to be_a CucumberTable
|
17
|
+
end
|
18
|
+
|
19
|
+
|
20
|
+
it "should convert header values to symbols" do
|
21
|
+
@table_data = [{ "column1" => "value1", "column2" => "value2" }]
|
22
|
+
|
23
|
+
expect(table.fields).to eq([:column1, :column2])
|
24
|
+
end
|
25
|
+
|
26
|
+
|
27
|
+
it "should discard comment columns" do
|
28
|
+
@table_data = [{ "column" => "value1", "#comment column" => "value2" }]
|
29
|
+
expect(CucumberTable).to receive(:new).with([{ column: "value1" }], { column: :string })
|
30
|
+
|
31
|
+
table
|
32
|
+
end
|
33
|
+
|
34
|
+
|
35
|
+
it "should discard type specifiers in column names" do
|
36
|
+
@table_data = [{
|
37
|
+
"integer_column (i)" => "one",
|
38
|
+
"string_column (s)" => "two",
|
39
|
+
"numeric_column (n)" => "five",
|
40
|
+
"some_column" => "else"
|
41
|
+
}]
|
42
|
+
|
43
|
+
expect(table.fields).to match_array([:integer_column, :string_column, :numeric_column, :some_column])
|
44
|
+
end
|
45
|
+
|
46
|
+
|
47
|
+
it "should raise an exception if invalid type is specified" do
|
48
|
+
@table_data = [{ "column (x)" => "value" }]
|
49
|
+
|
50
|
+
expect { table.fields }.to raise_error StandardError, "Invalid type specified: x"
|
51
|
+
end
|
52
|
+
|
53
|
+
|
54
|
+
it "should instantiate the new table with the correct column types" do
|
55
|
+
@table_data = [{
|
56
|
+
"integer_column (i)" => "one",
|
57
|
+
"string_column (s)" => "two",
|
58
|
+
"numeric_column (n)" => "five",
|
59
|
+
"some_column" => "else"
|
60
|
+
}]
|
61
|
+
expect(CucumberTable).to receive(:new).with(
|
62
|
+
[{
|
63
|
+
integer_column: "one",
|
64
|
+
string_column: "two",
|
65
|
+
numeric_column: "five",
|
66
|
+
some_column: "else"
|
67
|
+
}],
|
68
|
+
{
|
69
|
+
integer_column: :integer,
|
70
|
+
string_column: :string,
|
71
|
+
numeric_column: :numeric,
|
72
|
+
some_column: :string
|
73
|
+
}
|
74
|
+
)
|
75
|
+
|
76
|
+
table
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
|
82
|
+
context "instance methods" do
|
83
|
+
let(:data) { [{ "one" => "two", "three" => "four" }, { "five" => "six" }] }
|
84
|
+
|
85
|
+
describe "#fields" do
|
86
|
+
it "should return the keys of the first row" do
|
87
|
+
expect(CucumberTable.new(data).fields).to eq(%w[one three])
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
|
92
|
+
describe "#with_patterns" do
|
93
|
+
it "should set replacement patterns and return the object" do
|
94
|
+
table = CucumberTable.new(data)
|
95
|
+
table_with_patterns = table.with_patterns({ "a" => "b" })
|
96
|
+
|
97
|
+
expect(table_with_patterns).to be_equal table
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
|
102
|
+
describe "#data" do
|
103
|
+
it "should return all data as an array of hashes" do
|
104
|
+
expect(CucumberTable.new(data).data).to eq(data)
|
105
|
+
end
|
106
|
+
|
107
|
+
|
108
|
+
it "should make all substitutions set up as replacement patterns" do
|
109
|
+
table = CucumberTable.new [{ first: "NULL", second: "apple", third: "something else entirely" }]
|
110
|
+
table.with_patterns(
|
111
|
+
"NULL" => nil,
|
112
|
+
"apple" => lambda { "pear" }
|
113
|
+
)
|
114
|
+
|
115
|
+
expect(table.data).to eq([first: nil, second: "pear", third: "something else entirely"])
|
116
|
+
end
|
117
|
+
|
118
|
+
|
119
|
+
it "should evaluate integer fields" do
|
120
|
+
table = CucumberTable.new([{ integer_column: "20" }], { integer_column: :integer })
|
121
|
+
expect(table.data).to eq([{ integer_column: 20 }])
|
122
|
+
end
|
123
|
+
|
124
|
+
|
125
|
+
describe "#columns" do
|
126
|
+
it "should return an array of empty arrays if there are no data rows" do
|
127
|
+
table = CucumberTable.new [], { argument: :string }
|
128
|
+
|
129
|
+
expect(table.data.columns).to eq([[]])
|
130
|
+
end
|
131
|
+
|
132
|
+
|
133
|
+
it "should return the data in columns as an array of arrays, discarding all header information" do
|
134
|
+
table = CucumberTable.new [{ header1: "value1", header2: "value2" }, { header1: "value3", header2: "value4" }]
|
135
|
+
|
136
|
+
expect(table.data.columns).to eq([%w[value1 value3], %w[value2 value4]])
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
end
|
142
|
+
|
143
|
+
end
|
144
|
+
end
|
@@ -0,0 +1,75 @@
|
|
1
|
+
require_relative '../../spec_helper'
|
2
|
+
|
3
|
+
describe Cranium::Transformation::DuplicationIndex do
|
4
|
+
|
5
|
+
let(:index) { Cranium::Transformation::DuplicationIndex.new :field1, :field2 }
|
6
|
+
let(:record) { Cranium::TransformationRecord.new [:field1, :field2, :field3], [:field1, :field2, :field3] }
|
7
|
+
|
8
|
+
describe ".[]" do
|
9
|
+
before(:each) { Cranium::Transformation::DuplicationIndex.instance_variable_set :@instances, nil }
|
10
|
+
|
11
|
+
it "should return a DuplicationIndex instance for the specified fields" do
|
12
|
+
allow(Cranium::Transformation::DuplicationIndex).to receive(:new).with(:field1, :field2).and_return(index)
|
13
|
+
|
14
|
+
expect(Cranium::Transformation::DuplicationIndex[:field1, :field2]).to eq index
|
15
|
+
end
|
16
|
+
|
17
|
+
it "should memoize the previously created instances" do
|
18
|
+
expect(Cranium::Transformation::DuplicationIndex[:field1, :field2]).to eq(Cranium::Transformation::DuplicationIndex[:field1, :field2])
|
19
|
+
end
|
20
|
+
|
21
|
+
it "should raise an error if empty fieldset was passed" do
|
22
|
+
expect { Cranium::Transformation::DuplicationIndex[] }.to raise_error ArgumentError, "Cannot build duplication index for empty fieldset"
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
|
27
|
+
describe "#duplicate?" do
|
28
|
+
it "should return false for the first entry" do
|
29
|
+
record.input_data = ["one", "two", "three"]
|
30
|
+
expect(index.duplicate?(record)).to be_falsey
|
31
|
+
end
|
32
|
+
|
33
|
+
it "should return true the second time it's called for the same record" do
|
34
|
+
record.input_data = ["one", "two", "three"]
|
35
|
+
index.duplicate?(record)
|
36
|
+
expect(index.duplicate?(record)).to be_truthy
|
37
|
+
end
|
38
|
+
|
39
|
+
it "should only use the specified fieldset for duplication detection" do
|
40
|
+
record1 = record
|
41
|
+
record2 = record.clone
|
42
|
+
index = Cranium::Transformation::DuplicationIndex.new :field1
|
43
|
+
|
44
|
+
record1.input_data = ["one", "two", "three"]
|
45
|
+
index.duplicate? record1
|
46
|
+
|
47
|
+
record2.input_data = ["one", "four", "five"]
|
48
|
+
expect(index.duplicate?(record2)).to be_truthy
|
49
|
+
end
|
50
|
+
|
51
|
+
it "should handle multiple fields for detection" do
|
52
|
+
record1 = record
|
53
|
+
record2 = record.clone
|
54
|
+
record3 = record.clone
|
55
|
+
index = Cranium::Transformation::DuplicationIndex.new :field1, :field2
|
56
|
+
|
57
|
+
record1.input_data = ["one", "two", "three"]
|
58
|
+
index.duplicate? record1
|
59
|
+
|
60
|
+
record2.input_data = ["one", "four", "five"]
|
61
|
+
expect(index.duplicate?(record2)).to be_falsey
|
62
|
+
|
63
|
+
record3.input_data = ["one", "two", "five"]
|
64
|
+
expect(index.duplicate?(record3)).to be_truthy
|
65
|
+
end
|
66
|
+
|
67
|
+
it "should raise an error if record fieldset doesn't contain index fieldset" do
|
68
|
+
record.input_data = ["one", "two", "three"]
|
69
|
+
index = Cranium::Transformation::DuplicationIndex.new :field5
|
70
|
+
|
71
|
+
expect { index.duplicate? record }.to raise_error StandardError, "Missing deduplication key from record: field5"
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
end
|