cranium 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (132) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +21 -0
  3. data/.ruby-version +1 -0
  4. data/Gemfile +4 -0
  5. data/LICENSE.txt +22 -0
  6. data/README.md +29 -0
  7. data/Rakefile +3 -0
  8. data/Vagrantfile +24 -0
  9. data/bin/cranium +9 -0
  10. data/config/cucumber.yml +9 -0
  11. data/cranium.gemspec +26 -0
  12. data/db/setup.sql +8 -0
  13. data/docker-compose.yml +8 -0
  14. data/examples/config.rb +14 -0
  15. data/examples/deduplication.rb +27 -0
  16. data/examples/import_csv_with_field_lookup_inserting_new_dimension_keys.rb +26 -0
  17. data/examples/incremental_extract.rb +17 -0
  18. data/examples/lookup_with_multiple_fields.rb +25 -0
  19. data/features/archive.feature +49 -0
  20. data/features/extract/incremental_extract.feature +56 -0
  21. data/features/extract/simple_extract.feature +85 -0
  22. data/features/import/import_csv_to_database_as_delta.feature +38 -0
  23. data/features/import/import_csv_to_database_with_delete_insert_merging.feature +51 -0
  24. data/features/import/import_csv_to_database_with_truncate_insert.feature +49 -0
  25. data/features/import/import_csv_to_database_with_update_merging.feature +46 -0
  26. data/features/import/import_csv_with_always_inserting_new_dimension_keys.feature +137 -0
  27. data/features/import/import_csv_with_field_lookup_inserting_new_dimension_keys.feature +62 -0
  28. data/features/import/import_csv_with_field_lookup_transformation.feature +125 -0
  29. data/features/import/import_csv_with_transformation.feature +55 -0
  30. data/features/import/import_multiple_csv_files_without_transformations.feature +44 -0
  31. data/features/import/import_with_load_id_from_sequence.feature +53 -0
  32. data/features/import/import_with_lookup_from_multiple_fields.feature +64 -0
  33. data/features/read.feature +56 -0
  34. data/features/remove.feature +44 -0
  35. data/features/restore_database_connection.feature +55 -0
  36. data/features/step_definitions/database_table_steps.rb +40 -0
  37. data/features/step_definitions/definition_steps.rb +3 -0
  38. data/features/step_definitions/execution_steps.rb +23 -0
  39. data/features/step_definitions/file_steps.rb +39 -0
  40. data/features/support/class_extensions.rb +24 -0
  41. data/features/support/env.rb +27 -0
  42. data/features/support/randomize.rb +22 -0
  43. data/features/support/stop_on_first_error.rb +5 -0
  44. data/features/transform/deduplication.feature +37 -0
  45. data/features/transform/empty_transformation.feature +72 -0
  46. data/features/transform/join.feature +180 -0
  47. data/features/transform/join_multiple_files_into_one_output_file.feature +46 -0
  48. data/features/transform/output_rows.feature +70 -0
  49. data/features/transform/projection.feature +34 -0
  50. data/features/transform/raw_ruby_transformation.feature +69 -0
  51. data/features/transform/split_field.feature +39 -0
  52. data/lib/cranium/application.rb +104 -0
  53. data/lib/cranium/archiver.rb +36 -0
  54. data/lib/cranium/attribute_dsl.rb +43 -0
  55. data/lib/cranium/command_line_options.rb +27 -0
  56. data/lib/cranium/configuration.rb +33 -0
  57. data/lib/cranium/data_importer.rb +35 -0
  58. data/lib/cranium/data_reader.rb +48 -0
  59. data/lib/cranium/data_transformer.rb +126 -0
  60. data/lib/cranium/database.rb +36 -0
  61. data/lib/cranium/definition_registry.rb +21 -0
  62. data/lib/cranium/dimension_manager.rb +65 -0
  63. data/lib/cranium/dsl/database_definition.rb +23 -0
  64. data/lib/cranium/dsl/extract_definition.rb +28 -0
  65. data/lib/cranium/dsl/import_definition.rb +50 -0
  66. data/lib/cranium/dsl/source_definition.rb +67 -0
  67. data/lib/cranium/dsl.rb +100 -0
  68. data/lib/cranium/extensions/file.rb +7 -0
  69. data/lib/cranium/extensions/sequel_greenplum.rb +30 -0
  70. data/lib/cranium/external_table.rb +75 -0
  71. data/lib/cranium/extract/data_extractor.rb +11 -0
  72. data/lib/cranium/extract/storage.rb +57 -0
  73. data/lib/cranium/extract/strategy/base.rb +27 -0
  74. data/lib/cranium/extract/strategy/incremental.rb +16 -0
  75. data/lib/cranium/extract/strategy/simple.rb +9 -0
  76. data/lib/cranium/extract/strategy.rb +7 -0
  77. data/lib/cranium/extract.rb +7 -0
  78. data/lib/cranium/import_strategy/base.rb +55 -0
  79. data/lib/cranium/import_strategy/delete_insert.rb +40 -0
  80. data/lib/cranium/import_strategy/delta.rb +8 -0
  81. data/lib/cranium/import_strategy/merge.rb +50 -0
  82. data/lib/cranium/import_strategy/truncate_insert.rb +19 -0
  83. data/lib/cranium/import_strategy.rb +9 -0
  84. data/lib/cranium/logging.rb +15 -0
  85. data/lib/cranium/profiling.rb +13 -0
  86. data/lib/cranium/progress_output.rb +37 -0
  87. data/lib/cranium/sequel/hash.rb +32 -0
  88. data/lib/cranium/sequel.rb +5 -0
  89. data/lib/cranium/source_registry.rb +21 -0
  90. data/lib/cranium/test_framework/cucumber_table.rb +140 -0
  91. data/lib/cranium/test_framework/database_entity.rb +29 -0
  92. data/lib/cranium/test_framework/database_sequence.rb +16 -0
  93. data/lib/cranium/test_framework/database_table.rb +33 -0
  94. data/lib/cranium/test_framework/upload_directory.rb +39 -0
  95. data/lib/cranium/test_framework/world.rb +66 -0
  96. data/lib/cranium/test_framework.rb +10 -0
  97. data/lib/cranium/transformation/duplication_index.rb +42 -0
  98. data/lib/cranium/transformation/index.rb +83 -0
  99. data/lib/cranium/transformation/join.rb +141 -0
  100. data/lib/cranium/transformation/sequence.rb +42 -0
  101. data/lib/cranium/transformation.rb +8 -0
  102. data/lib/cranium/transformation_record.rb +45 -0
  103. data/lib/cranium.rb +57 -0
  104. data/rake/test.rake +31 -0
  105. data/spec/cranium/application_spec.rb +166 -0
  106. data/spec/cranium/archiver_spec.rb +44 -0
  107. data/spec/cranium/command_line_options_spec.rb +32 -0
  108. data/spec/cranium/configuration_spec.rb +31 -0
  109. data/spec/cranium/data_importer_spec.rb +55 -0
  110. data/spec/cranium/data_transformer_spec.rb +16 -0
  111. data/spec/cranium/database_spec.rb +69 -0
  112. data/spec/cranium/definition_registry_spec.rb +45 -0
  113. data/spec/cranium/dimension_manager_spec.rb +63 -0
  114. data/spec/cranium/dsl/database_definition_spec.rb +23 -0
  115. data/spec/cranium/dsl/extract_definition_spec.rb +76 -0
  116. data/spec/cranium/dsl/import_definition_spec.rb +153 -0
  117. data/spec/cranium/dsl/source_definition_spec.rb +84 -0
  118. data/spec/cranium/dsl_spec.rb +119 -0
  119. data/spec/cranium/external_table_spec.rb +71 -0
  120. data/spec/cranium/extract/storage_spec.rb +125 -0
  121. data/spec/cranium/logging_spec.rb +37 -0
  122. data/spec/cranium/sequel/hash_spec.rb +56 -0
  123. data/spec/cranium/source_registry_spec.rb +31 -0
  124. data/spec/cranium/test_framework/cucumber_table_spec.rb +144 -0
  125. data/spec/cranium/transformation/duplication_index_spec.rb +75 -0
  126. data/spec/cranium/transformation/index_spec.rb +178 -0
  127. data/spec/cranium/transformation/join_spec.rb +43 -0
  128. data/spec/cranium/transformation/sequence_spec.rb +83 -0
  129. data/spec/cranium/transformation_record_spec.rb +78 -0
  130. data/spec/cranium_spec.rb +53 -0
  131. data/spec/spec_helper.rb +1 -0
  132. metadata +362 -0
@@ -0,0 +1,71 @@
1
+ require_relative '../spec_helper'
2
+ require 'ostruct'
3
+
4
+ describe Cranium::ExternalTable do
5
+
6
+ let(:connection) { double "Greenplum connection" }
7
+ let(:source) do
8
+ Cranium::DSL::SourceDefinition.new(:products).tap do |source|
9
+ source.file "test_products.csv"
10
+ source.field :text_field, String
11
+ source.field :integer_field, Integer
12
+ source.field :numeric_field, Float
13
+ source.field :date_field, Date
14
+ source.field :timestamp_field, Time
15
+ source.field :boolean_field1, TrueClass
16
+ source.field :boolean_field2, FalseClass
17
+ source.delimiter ';'
18
+ source.quote '"'
19
+ source.escape "'"
20
+ end
21
+ end
22
+ let(:external_table) { Cranium::ExternalTable.new source, connection }
23
+
24
+
25
+ describe "#create" do
26
+ it "should create an external table from the specified source" do
27
+ allow(Cranium).to receive_messages configuration: OpenStruct.new(
28
+ gpfdist_url: "gpfdist-url",
29
+ gpfdist_home_directory: "/gpfdist-home",
30
+ upload_directory: "upload-dir"
31
+ )
32
+
33
+ allow(source).to receive_messages files: %w(test_products_a.csv test_products_b.csv)
34
+
35
+ expect(connection).to receive(:run).with(<<-sql
36
+ CREATE EXTERNAL TABLE "external_products" (
37
+ "text_field" TEXT,
38
+ "integer_field" INTEGER,
39
+ "numeric_field" NUMERIC,
40
+ "date_field" DATE,
41
+ "timestamp_field" TIMESTAMP WITHOUT TIME ZONE,
42
+ "boolean_field1" BOOLEAN,
43
+ "boolean_field2" BOOLEAN
44
+ )
45
+ LOCATION ('gpfdist://gpfdist-url/upload-dir/test_products_a.csv', 'gpfdist://gpfdist-url/upload-dir/test_products_b.csv')
46
+ FORMAT 'CSV' (DELIMITER ';' ESCAPE '''' QUOTE '"' HEADER)
47
+ ENCODING 'UTF8'
48
+ sql
49
+ )
50
+
51
+ external_table.create
52
+ end
53
+ end
54
+
55
+
56
+ describe "#destroy" do
57
+ it "should drop the external table" do
58
+ expect(connection).to receive(:run).with(%Q[DROP EXTERNAL TABLE "external_products"])
59
+
60
+ external_table.destroy
61
+ end
62
+ end
63
+
64
+
65
+ describe "#name" do
66
+ it "should return the name of the external table based on the source's name" do
67
+ expect(external_table.name).to eq(:external_products)
68
+ end
69
+ end
70
+
71
+ end
@@ -0,0 +1,125 @@
1
+ require_relative '../../spec_helper'
2
+
3
+ describe Cranium::Extract::Storage do
4
+
5
+ let(:storage) { Cranium::Extract::Storage.new :extract_name }
6
+ let(:storage_dir) { "/storage/directory/.cranium" }
7
+ let(:storage_file) { "#{storage_dir}/extracts" }
8
+
9
+ before do
10
+ allow(Cranium).to receive(:configuration).and_return(Cranium::Configuration.new.tap { |config| config.storage_directory = storage_dir })
11
+ end
12
+
13
+ describe "#last_value_of" do
14
+ context "when storage file doesn't exist" do
15
+ it "should return nil if no storage file was created yet" do
16
+ allow(File).to receive(:exists?).with(storage_file).and_return(false)
17
+ expect(storage.last_value_of(:field)).to eq(nil)
18
+ end
19
+ end
20
+
21
+ context "when storage file already exists" do
22
+ before { allow(File).to receive(:exists?).with(storage_file).and_return(true) }
23
+
24
+ it "should return nil if no value was saved for this extract yet" do
25
+ allow(File).to receive(:read).with(storage_file).and_return(YAML.dump(other_extract_name: {last_values: {}}))
26
+ expect(storage.last_value_of(:field)).to eq(nil)
27
+ end
28
+
29
+ it "should return nil if no value was saved for the field" do
30
+ allow(File).to receive(:read).with(storage_file).and_return(YAML.dump(extract_name: {last_values: {}}))
31
+ expect(storage.last_value_of(:field)).to eq(nil)
32
+ end
33
+
34
+ it "should return the last saved value of the specified field" do
35
+ allow(File).to receive(:read).with(storage_file).and_return(YAML.dump(extract_name: {last_values: {field: 15}}))
36
+ expect(storage.last_value_of(:field)).to eq(15)
37
+ end
38
+ end
39
+ end
40
+
41
+
42
+ describe "#save_last_value_of" do
43
+ context "when storage file doesn't exist" do
44
+ before { allow(File).to receive(:exists?).with(storage_file).and_return(false) }
45
+
46
+ it "should create the storage file and save the specified value if the storage directory already exists" do
47
+ allow(Dir).to receive(:exists?).with(storage_dir).and_return(true)
48
+
49
+ expect(File).to receive(:write).with(storage_file, YAML.dump(extract_name: {last_values: {field: 15}}))
50
+
51
+ storage.save_last_value_of(:field, 15)
52
+ end
53
+
54
+ it "should create the storage directory if it doesn't exist yet" do
55
+ allow(Dir).to receive(:exists?).with(storage_dir).and_return(false)
56
+ allow(File).to receive :write
57
+
58
+ expect(FileUtils).to receive(:mkdir_p).with(storage_dir)
59
+
60
+ storage.save_last_value_of(:field, 15)
61
+ end
62
+ end
63
+
64
+ context "when there are previously saved values" do
65
+ before do
66
+ allow(Dir).to receive(:exists?).with(storage_dir).and_return(true)
67
+ allow(File).to receive(:exists?).with(storage_file).and_return(true)
68
+ end
69
+
70
+ it "should overwrite the specified field's value and preserve all others" do
71
+ allow(File).to receive(:read).with(storage_file).and_return(YAML.dump({
72
+ extract_name: {
73
+ last_values: {
74
+ field1: 1,
75
+ field2: 2,
76
+ field3: 3
77
+ }
78
+ }
79
+ }))
80
+
81
+ expect(File).to receive(:write).with(storage_file, YAML.dump({
82
+ extract_name: {
83
+ last_values: {
84
+ field1: 1,
85
+ field2: 5,
86
+ field3: 3
87
+ }
88
+ }
89
+ }))
90
+
91
+ storage.save_last_value_of(:field2, 5)
92
+ end
93
+
94
+ it "should create the new entry if it doesn't exist yet" do
95
+ allow(File).to receive(:read).with(storage_file).and_return(YAML.dump({
96
+ other_extract_name: {
97
+ last_values: {
98
+ field1: 1,
99
+ field2: 2,
100
+ field3: 3
101
+ }
102
+ }
103
+ }))
104
+
105
+ expect(File).to receive(:write).with(storage_file, YAML.dump({
106
+ other_extract_name: {
107
+ last_values: {
108
+ field1: 1,
109
+ field2: 2,
110
+ field3: 3
111
+ }
112
+ },
113
+ extract_name: {
114
+ last_values: {
115
+ field2: 5
116
+ }
117
+ }
118
+ }))
119
+
120
+ storage.save_last_value_of(:field2, 5)
121
+ end
122
+ end
123
+ end
124
+
125
+ end
@@ -0,0 +1,37 @@
1
+ require_relative '../spec_helper'
2
+
3
+ describe Cranium::Logging do
4
+
5
+ let(:logging_object) { Object.new.tap { |object| object.extend Cranium::Logging } }
6
+ let(:loggers) { [double("Logger 1"), double("Logger 2")] }
7
+
8
+ before(:each) do
9
+ allow(Cranium).to receive_message_chain(:configuration, :loggers).and_return loggers
10
+ end
11
+
12
+
13
+
14
+ def all_loggers_should_receive(level, message)
15
+ loggers.each { |logger| expect(logger).to receive(level).with(message) }
16
+ end
17
+
18
+
19
+
20
+ describe "#record_metric" do
21
+ it "should record an arbitrary metric in every registered logger" do
22
+ all_loggers_should_receive :info, "[metrics/products] 1234"
23
+
24
+ logging_object.record_metric "products", 1234
25
+ end
26
+ end
27
+
28
+
29
+ describe "#log" do
30
+ it "should log a message with the specified reporting level in every registered logger" do
31
+ all_loggers_should_receive :error, "error message"
32
+
33
+ logging_object.log :error, "error message"
34
+ end
35
+ end
36
+
37
+ end
@@ -0,0 +1,56 @@
1
+ require_relative '../../spec_helper'
2
+
3
+ describe Cranium::Sequel::Hash do
4
+
5
+ let(:source_hash) { { :field1 => :field2, :field3 => :field4 } }
6
+ let(:sequel_hash) { Cranium::Sequel::Hash[source_hash] }
7
+
8
+ before(:each) do
9
+ allow(Sequel).to receive(:qualify) { |qualifier, field| :"#{qualifier}_#{field}" }
10
+ end
11
+
12
+
13
+ it "should be a Hash" do
14
+ expect(Cranium::Sequel::Hash.new).to be_a Hash
15
+ end
16
+
17
+
18
+ describe "#qualify" do
19
+ context "when called with 'keys_with'" do
20
+ it "should qualify only the key fields of the hash" do
21
+ expect(sequel_hash.qualify(keys_with: :table1)).to eq({ :table1_field1 => :field2, :table1_field3 => :field4 })
22
+ end
23
+ end
24
+
25
+ context "when called with 'values_with'" do
26
+ it "should qualify only the value fields of the hash" do
27
+ expect(sequel_hash.qualify(values_with: :table1)).to eq({ :field1 => :table1_field2, :field3 => :table1_field4 })
28
+ end
29
+ end
30
+
31
+ context "when called with both 'keys_with' and 'values_with'" do
32
+ it "should qualify both keys and value fields of the hash" do
33
+ expect(sequel_hash.qualify(keys_with: :table1, values_with: :table2)).to eq({ :table1_field1 => :table2_field2, :table1_field3 => :table2_field4 })
34
+ end
35
+ end
36
+
37
+ it "should raise an error if called with unsupported options" do
38
+ expect { sequel_hash.qualify key_with: :table }.to raise_error ArgumentError, "Unsupported option for qualify: key_with"
39
+ end
40
+ end
41
+
42
+
43
+ describe "#qualified_keys" do
44
+ it "should return an array with the hash's keys qualified with the specified qualifier" do
45
+ expect(sequel_hash.qualified_keys(:table)).to eq([:table_field1, :table_field3])
46
+ end
47
+ end
48
+
49
+
50
+ describe "#qualified_values" do
51
+ it "should return an array with the hash's values qualified with the specified qualifier" do
52
+ expect(sequel_hash.qualified_values(:table)).to eq([:table_field2, :table_field4])
53
+ end
54
+ end
55
+
56
+ end
@@ -0,0 +1,31 @@
1
+ require_relative '../spec_helper'
2
+
3
+ describe Cranium::SourceRegistry do
4
+
5
+ let(:registry) { Cranium::SourceRegistry.new }
6
+
7
+ describe "#[]" do
8
+ it "should raise an error if a source with the specified name wasn't registered yet" do
9
+ expect { registry[:name] }.to raise_error "Undefined source 'name'"
10
+ end
11
+ end
12
+
13
+
14
+ describe "#register_source" do
15
+ it "should register a new source and configure it through the block passed" do
16
+ source = Cranium::DSL::SourceDefinition.new :test_source
17
+ source.field :test_field, String
18
+
19
+ registry.register_source :test_source do
20
+ field :test_field, String
21
+ end
22
+
23
+ expect(registry[:test_source]).to eq(source)
24
+ end
25
+
26
+ it "should return the newly registered source" do
27
+ expect(registry.register_source(:test_source) {}).to be_a Cranium::DSL::SourceDefinition
28
+ end
29
+ end
30
+
31
+ end
@@ -0,0 +1,144 @@
1
+ require_relative "../../spec_helper"
2
+ require 'cucumber/ast/table'
3
+ require 'date'
4
+
5
+ module Cranium::TestFramework
6
+ describe CucumberTable do
7
+
8
+ context "class method" do
9
+ describe ".from_cucumber_table" do
10
+
11
+ let(:table) { CucumberTable.from_ast_table(Cucumber::Ast::Table.new(@table_data)) }
12
+
13
+ it "should return a CucumberTable" do
14
+ @table_data = [{ "column" => "value" }]
15
+
16
+ expect(table).to be_a CucumberTable
17
+ end
18
+
19
+
20
+ it "should convert header values to symbols" do
21
+ @table_data = [{ "column1" => "value1", "column2" => "value2" }]
22
+
23
+ expect(table.fields).to eq([:column1, :column2])
24
+ end
25
+
26
+
27
+ it "should discard comment columns" do
28
+ @table_data = [{ "column" => "value1", "#comment column" => "value2" }]
29
+ expect(CucumberTable).to receive(:new).with([{ column: "value1" }], { column: :string })
30
+
31
+ table
32
+ end
33
+
34
+
35
+ it "should discard type specifiers in column names" do
36
+ @table_data = [{
37
+ "integer_column (i)" => "one",
38
+ "string_column (s)" => "two",
39
+ "numeric_column (n)" => "five",
40
+ "some_column" => "else"
41
+ }]
42
+
43
+ expect(table.fields).to match_array([:integer_column, :string_column, :numeric_column, :some_column])
44
+ end
45
+
46
+
47
+ it "should raise an exception if invalid type is specified" do
48
+ @table_data = [{ "column (x)" => "value" }]
49
+
50
+ expect { table.fields }.to raise_error StandardError, "Invalid type specified: x"
51
+ end
52
+
53
+
54
+ it "should instantiate the new table with the correct column types" do
55
+ @table_data = [{
56
+ "integer_column (i)" => "one",
57
+ "string_column (s)" => "two",
58
+ "numeric_column (n)" => "five",
59
+ "some_column" => "else"
60
+ }]
61
+ expect(CucumberTable).to receive(:new).with(
62
+ [{
63
+ integer_column: "one",
64
+ string_column: "two",
65
+ numeric_column: "five",
66
+ some_column: "else"
67
+ }],
68
+ {
69
+ integer_column: :integer,
70
+ string_column: :string,
71
+ numeric_column: :numeric,
72
+ some_column: :string
73
+ }
74
+ )
75
+
76
+ table
77
+ end
78
+ end
79
+ end
80
+
81
+
82
+ context "instance methods" do
83
+ let(:data) { [{ "one" => "two", "three" => "four" }, { "five" => "six" }] }
84
+
85
+ describe "#fields" do
86
+ it "should return the keys of the first row" do
87
+ expect(CucumberTable.new(data).fields).to eq(%w[one three])
88
+ end
89
+ end
90
+
91
+
92
+ describe "#with_patterns" do
93
+ it "should set replacement patterns and return the object" do
94
+ table = CucumberTable.new(data)
95
+ table_with_patterns = table.with_patterns({ "a" => "b" })
96
+
97
+ expect(table_with_patterns).to be_equal table
98
+ end
99
+ end
100
+
101
+
102
+ describe "#data" do
103
+ it "should return all data as an array of hashes" do
104
+ expect(CucumberTable.new(data).data).to eq(data)
105
+ end
106
+
107
+
108
+ it "should make all substitutions set up as replacement patterns" do
109
+ table = CucumberTable.new [{ first: "NULL", second: "apple", third: "something else entirely" }]
110
+ table.with_patterns(
111
+ "NULL" => nil,
112
+ "apple" => lambda { "pear" }
113
+ )
114
+
115
+ expect(table.data).to eq([first: nil, second: "pear", third: "something else entirely"])
116
+ end
117
+
118
+
119
+ it "should evaluate integer fields" do
120
+ table = CucumberTable.new([{ integer_column: "20" }], { integer_column: :integer })
121
+ expect(table.data).to eq([{ integer_column: 20 }])
122
+ end
123
+
124
+
125
+ describe "#columns" do
126
+ it "should return an array of empty arrays if there are no data rows" do
127
+ table = CucumberTable.new [], { argument: :string }
128
+
129
+ expect(table.data.columns).to eq([[]])
130
+ end
131
+
132
+
133
+ it "should return the data in columns as an array of arrays, discarding all header information" do
134
+ table = CucumberTable.new [{ header1: "value1", header2: "value2" }, { header1: "value3", header2: "value4" }]
135
+
136
+ expect(table.data.columns).to eq([%w[value1 value3], %w[value2 value4]])
137
+ end
138
+ end
139
+ end
140
+
141
+ end
142
+
143
+ end
144
+ end
@@ -0,0 +1,75 @@
1
+ require_relative '../../spec_helper'
2
+
3
+ describe Cranium::Transformation::DuplicationIndex do
4
+
5
+ let(:index) { Cranium::Transformation::DuplicationIndex.new :field1, :field2 }
6
+ let(:record) { Cranium::TransformationRecord.new [:field1, :field2, :field3], [:field1, :field2, :field3] }
7
+
8
+ describe ".[]" do
9
+ before(:each) { Cranium::Transformation::DuplicationIndex.instance_variable_set :@instances, nil }
10
+
11
+ it "should return a DuplicationIndex instance for the specified fields" do
12
+ allow(Cranium::Transformation::DuplicationIndex).to receive(:new).with(:field1, :field2).and_return(index)
13
+
14
+ expect(Cranium::Transformation::DuplicationIndex[:field1, :field2]).to eq index
15
+ end
16
+
17
+ it "should memoize the previously created instances" do
18
+ expect(Cranium::Transformation::DuplicationIndex[:field1, :field2]).to eq(Cranium::Transformation::DuplicationIndex[:field1, :field2])
19
+ end
20
+
21
+ it "should raise an error if empty fieldset was passed" do
22
+ expect { Cranium::Transformation::DuplicationIndex[] }.to raise_error ArgumentError, "Cannot build duplication index for empty fieldset"
23
+ end
24
+ end
25
+
26
+
27
+ describe "#duplicate?" do
28
+ it "should return false for the first entry" do
29
+ record.input_data = ["one", "two", "three"]
30
+ expect(index.duplicate?(record)).to be_falsey
31
+ end
32
+
33
+ it "should return true the second time it's called for the same record" do
34
+ record.input_data = ["one", "two", "three"]
35
+ index.duplicate?(record)
36
+ expect(index.duplicate?(record)).to be_truthy
37
+ end
38
+
39
+ it "should only use the specified fieldset for duplication detection" do
40
+ record1 = record
41
+ record2 = record.clone
42
+ index = Cranium::Transformation::DuplicationIndex.new :field1
43
+
44
+ record1.input_data = ["one", "two", "three"]
45
+ index.duplicate? record1
46
+
47
+ record2.input_data = ["one", "four", "five"]
48
+ expect(index.duplicate?(record2)).to be_truthy
49
+ end
50
+
51
+ it "should handle multiple fields for detection" do
52
+ record1 = record
53
+ record2 = record.clone
54
+ record3 = record.clone
55
+ index = Cranium::Transformation::DuplicationIndex.new :field1, :field2
56
+
57
+ record1.input_data = ["one", "two", "three"]
58
+ index.duplicate? record1
59
+
60
+ record2.input_data = ["one", "four", "five"]
61
+ expect(index.duplicate?(record2)).to be_falsey
62
+
63
+ record3.input_data = ["one", "two", "five"]
64
+ expect(index.duplicate?(record3)).to be_truthy
65
+ end
66
+
67
+ it "should raise an error if record fieldset doesn't contain index fieldset" do
68
+ record.input_data = ["one", "two", "three"]
69
+ index = Cranium::Transformation::DuplicationIndex.new :field5
70
+
71
+ expect { index.duplicate? record }.to raise_error StandardError, "Missing deduplication key from record: field5"
72
+ end
73
+ end
74
+
75
+ end