cranium 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +21 -0
  3. data/.ruby-version +1 -0
  4. data/Gemfile +4 -0
  5. data/LICENSE.txt +22 -0
  6. data/README.md +29 -0
  7. data/Rakefile +3 -0
  8. data/Vagrantfile +24 -0
  9. data/bin/cranium +9 -0
  10. data/config/cucumber.yml +9 -0
  11. data/cranium.gemspec +26 -0
  12. data/db/setup.sql +8 -0
  13. data/docker-compose.yml +8 -0
  14. data/examples/config.rb +14 -0
  15. data/examples/deduplication.rb +27 -0
  16. data/examples/import_csv_with_field_lookup_inserting_new_dimension_keys.rb +26 -0
  17. data/examples/incremental_extract.rb +17 -0
  18. data/examples/lookup_with_multiple_fields.rb +25 -0
  19. data/features/archive.feature +49 -0
  20. data/features/extract/incremental_extract.feature +56 -0
  21. data/features/extract/simple_extract.feature +85 -0
  22. data/features/import/import_csv_to_database_as_delta.feature +38 -0
  23. data/features/import/import_csv_to_database_with_delete_insert_merging.feature +51 -0
  24. data/features/import/import_csv_to_database_with_truncate_insert.feature +49 -0
  25. data/features/import/import_csv_to_database_with_update_merging.feature +46 -0
  26. data/features/import/import_csv_with_always_inserting_new_dimension_keys.feature +137 -0
  27. data/features/import/import_csv_with_field_lookup_inserting_new_dimension_keys.feature +62 -0
  28. data/features/import/import_csv_with_field_lookup_transformation.feature +125 -0
  29. data/features/import/import_csv_with_transformation.feature +55 -0
  30. data/features/import/import_multiple_csv_files_without_transformations.feature +44 -0
  31. data/features/import/import_with_load_id_from_sequence.feature +53 -0
  32. data/features/import/import_with_lookup_from_multiple_fields.feature +64 -0
  33. data/features/read.feature +56 -0
  34. data/features/remove.feature +44 -0
  35. data/features/restore_database_connection.feature +55 -0
  36. data/features/step_definitions/database_table_steps.rb +40 -0
  37. data/features/step_definitions/definition_steps.rb +3 -0
  38. data/features/step_definitions/execution_steps.rb +23 -0
  39. data/features/step_definitions/file_steps.rb +39 -0
  40. data/features/support/class_extensions.rb +24 -0
  41. data/features/support/env.rb +27 -0
  42. data/features/support/randomize.rb +22 -0
  43. data/features/support/stop_on_first_error.rb +5 -0
  44. data/features/transform/deduplication.feature +37 -0
  45. data/features/transform/empty_transformation.feature +72 -0
  46. data/features/transform/join.feature +180 -0
  47. data/features/transform/join_multiple_files_into_one_output_file.feature +46 -0
  48. data/features/transform/output_rows.feature +70 -0
  49. data/features/transform/projection.feature +34 -0
  50. data/features/transform/raw_ruby_transformation.feature +69 -0
  51. data/features/transform/split_field.feature +39 -0
  52. data/lib/cranium/application.rb +104 -0
  53. data/lib/cranium/archiver.rb +36 -0
  54. data/lib/cranium/attribute_dsl.rb +43 -0
  55. data/lib/cranium/command_line_options.rb +27 -0
  56. data/lib/cranium/configuration.rb +33 -0
  57. data/lib/cranium/data_importer.rb +35 -0
  58. data/lib/cranium/data_reader.rb +48 -0
  59. data/lib/cranium/data_transformer.rb +126 -0
  60. data/lib/cranium/database.rb +36 -0
  61. data/lib/cranium/definition_registry.rb +21 -0
  62. data/lib/cranium/dimension_manager.rb +65 -0
  63. data/lib/cranium/dsl/database_definition.rb +23 -0
  64. data/lib/cranium/dsl/extract_definition.rb +28 -0
  65. data/lib/cranium/dsl/import_definition.rb +50 -0
  66. data/lib/cranium/dsl/source_definition.rb +67 -0
  67. data/lib/cranium/dsl.rb +100 -0
  68. data/lib/cranium/extensions/file.rb +7 -0
  69. data/lib/cranium/extensions/sequel_greenplum.rb +30 -0
  70. data/lib/cranium/external_table.rb +75 -0
  71. data/lib/cranium/extract/data_extractor.rb +11 -0
  72. data/lib/cranium/extract/storage.rb +57 -0
  73. data/lib/cranium/extract/strategy/base.rb +27 -0
  74. data/lib/cranium/extract/strategy/incremental.rb +16 -0
  75. data/lib/cranium/extract/strategy/simple.rb +9 -0
  76. data/lib/cranium/extract/strategy.rb +7 -0
  77. data/lib/cranium/extract.rb +7 -0
  78. data/lib/cranium/import_strategy/base.rb +55 -0
  79. data/lib/cranium/import_strategy/delete_insert.rb +40 -0
  80. data/lib/cranium/import_strategy/delta.rb +8 -0
  81. data/lib/cranium/import_strategy/merge.rb +50 -0
  82. data/lib/cranium/import_strategy/truncate_insert.rb +19 -0
  83. data/lib/cranium/import_strategy.rb +9 -0
  84. data/lib/cranium/logging.rb +15 -0
  85. data/lib/cranium/profiling.rb +13 -0
  86. data/lib/cranium/progress_output.rb +37 -0
  87. data/lib/cranium/sequel/hash.rb +32 -0
  88. data/lib/cranium/sequel.rb +5 -0
  89. data/lib/cranium/source_registry.rb +21 -0
  90. data/lib/cranium/test_framework/cucumber_table.rb +140 -0
  91. data/lib/cranium/test_framework/database_entity.rb +29 -0
  92. data/lib/cranium/test_framework/database_sequence.rb +16 -0
  93. data/lib/cranium/test_framework/database_table.rb +33 -0
  94. data/lib/cranium/test_framework/upload_directory.rb +39 -0
  95. data/lib/cranium/test_framework/world.rb +66 -0
  96. data/lib/cranium/test_framework.rb +10 -0
  97. data/lib/cranium/transformation/duplication_index.rb +42 -0
  98. data/lib/cranium/transformation/index.rb +83 -0
  99. data/lib/cranium/transformation/join.rb +141 -0
  100. data/lib/cranium/transformation/sequence.rb +42 -0
  101. data/lib/cranium/transformation.rb +8 -0
  102. data/lib/cranium/transformation_record.rb +45 -0
  103. data/lib/cranium.rb +57 -0
  104. data/rake/test.rake +31 -0
  105. data/spec/cranium/application_spec.rb +166 -0
  106. data/spec/cranium/archiver_spec.rb +44 -0
  107. data/spec/cranium/command_line_options_spec.rb +32 -0
  108. data/spec/cranium/configuration_spec.rb +31 -0
  109. data/spec/cranium/data_importer_spec.rb +55 -0
  110. data/spec/cranium/data_transformer_spec.rb +16 -0
  111. data/spec/cranium/database_spec.rb +69 -0
  112. data/spec/cranium/definition_registry_spec.rb +45 -0
  113. data/spec/cranium/dimension_manager_spec.rb +63 -0
  114. data/spec/cranium/dsl/database_definition_spec.rb +23 -0
  115. data/spec/cranium/dsl/extract_definition_spec.rb +76 -0
  116. data/spec/cranium/dsl/import_definition_spec.rb +153 -0
  117. data/spec/cranium/dsl/source_definition_spec.rb +84 -0
  118. data/spec/cranium/dsl_spec.rb +119 -0
  119. data/spec/cranium/external_table_spec.rb +71 -0
  120. data/spec/cranium/extract/storage_spec.rb +125 -0
  121. data/spec/cranium/logging_spec.rb +37 -0
  122. data/spec/cranium/sequel/hash_spec.rb +56 -0
  123. data/spec/cranium/source_registry_spec.rb +31 -0
  124. data/spec/cranium/test_framework/cucumber_table_spec.rb +144 -0
  125. data/spec/cranium/transformation/duplication_index_spec.rb +75 -0
  126. data/spec/cranium/transformation/index_spec.rb +178 -0
  127. data/spec/cranium/transformation/join_spec.rb +43 -0
  128. data/spec/cranium/transformation/sequence_spec.rb +83 -0
  129. data/spec/cranium/transformation_record_spec.rb +78 -0
  130. data/spec/cranium_spec.rb +53 -0
  131. data/spec/spec_helper.rb +1 -0
  132. metadata +362 -0
@@ -0,0 +1,71 @@
1
+ require_relative '../spec_helper'
2
+ require 'ostruct'
3
+
4
+ describe Cranium::ExternalTable do
5
+
6
+ let(:connection) { double "Greenplum connection" }
7
+ let(:source) do
8
+ Cranium::DSL::SourceDefinition.new(:products).tap do |source|
9
+ source.file "test_products.csv"
10
+ source.field :text_field, String
11
+ source.field :integer_field, Integer
12
+ source.field :numeric_field, Float
13
+ source.field :date_field, Date
14
+ source.field :timestamp_field, Time
15
+ source.field :boolean_field1, TrueClass
16
+ source.field :boolean_field2, FalseClass
17
+ source.delimiter ';'
18
+ source.quote '"'
19
+ source.escape "'"
20
+ end
21
+ end
22
+ let(:external_table) { Cranium::ExternalTable.new source, connection }
23
+
24
+
25
+ describe "#create" do
26
+ it "should create an external table from the specified source" do
27
+ allow(Cranium).to receive_messages configuration: OpenStruct.new(
28
+ gpfdist_url: "gpfdist-url",
29
+ gpfdist_home_directory: "/gpfdist-home",
30
+ upload_directory: "upload-dir"
31
+ )
32
+
33
+ allow(source).to receive_messages files: %w(test_products_a.csv test_products_b.csv)
34
+
35
+ expect(connection).to receive(:run).with(<<-sql
36
+ CREATE EXTERNAL TABLE "external_products" (
37
+ "text_field" TEXT,
38
+ "integer_field" INTEGER,
39
+ "numeric_field" NUMERIC,
40
+ "date_field" DATE,
41
+ "timestamp_field" TIMESTAMP WITHOUT TIME ZONE,
42
+ "boolean_field1" BOOLEAN,
43
+ "boolean_field2" BOOLEAN
44
+ )
45
+ LOCATION ('gpfdist://gpfdist-url/upload-dir/test_products_a.csv', 'gpfdist://gpfdist-url/upload-dir/test_products_b.csv')
46
+ FORMAT 'CSV' (DELIMITER ';' ESCAPE '''' QUOTE '"' HEADER)
47
+ ENCODING 'UTF8'
48
+ sql
49
+ )
50
+
51
+ external_table.create
52
+ end
53
+ end
54
+
55
+
56
+ describe "#destroy" do
57
+ it "should drop the external table" do
58
+ expect(connection).to receive(:run).with(%Q[DROP EXTERNAL TABLE "external_products"])
59
+
60
+ external_table.destroy
61
+ end
62
+ end
63
+
64
+
65
+ describe "#name" do
66
+ it "should return the name of the external table based on the source's name" do
67
+ expect(external_table.name).to eq(:external_products)
68
+ end
69
+ end
70
+
71
+ end
@@ -0,0 +1,125 @@
1
+ require_relative '../../spec_helper'
2
+
3
+ describe Cranium::Extract::Storage do
4
+
5
+ let(:storage) { Cranium::Extract::Storage.new :extract_name }
6
+ let(:storage_dir) { "/storage/directory/.cranium" }
7
+ let(:storage_file) { "#{storage_dir}/extracts" }
8
+
9
+ before do
10
+ allow(Cranium).to receive(:configuration).and_return(Cranium::Configuration.new.tap { |config| config.storage_directory = storage_dir })
11
+ end
12
+
13
+ describe "#last_value_of" do
14
+ context "when storage file doesn't exist" do
15
+ it "should return nil if no storage file was created yet" do
16
+ allow(File).to receive(:exists?).with(storage_file).and_return(false)
17
+ expect(storage.last_value_of(:field)).to eq(nil)
18
+ end
19
+ end
20
+
21
+ context "when storage file already exists" do
22
+ before { allow(File).to receive(:exists?).with(storage_file).and_return(true) }
23
+
24
+ it "should return nil if no value was saved for this extract yet" do
25
+ allow(File).to receive(:read).with(storage_file).and_return(YAML.dump(other_extract_name: {last_values: {}}))
26
+ expect(storage.last_value_of(:field)).to eq(nil)
27
+ end
28
+
29
+ it "should return nil if no value was saved for the field" do
30
+ allow(File).to receive(:read).with(storage_file).and_return(YAML.dump(extract_name: {last_values: {}}))
31
+ expect(storage.last_value_of(:field)).to eq(nil)
32
+ end
33
+
34
+ it "should return the last saved value of the specified field" do
35
+ allow(File).to receive(:read).with(storage_file).and_return(YAML.dump(extract_name: {last_values: {field: 15}}))
36
+ expect(storage.last_value_of(:field)).to eq(15)
37
+ end
38
+ end
39
+ end
40
+
41
+
42
+ describe "#save_last_value_of" do
43
+ context "when storage file doesn't exist" do
44
+ before { allow(File).to receive(:exists?).with(storage_file).and_return(false) }
45
+
46
+ it "should create the storage file and save the specified value if the storage directory already exists" do
47
+ allow(Dir).to receive(:exists?).with(storage_dir).and_return(true)
48
+
49
+ expect(File).to receive(:write).with(storage_file, YAML.dump(extract_name: {last_values: {field: 15}}))
50
+
51
+ storage.save_last_value_of(:field, 15)
52
+ end
53
+
54
+ it "should create the storage directory if it doesn't exist yet" do
55
+ allow(Dir).to receive(:exists?).with(storage_dir).and_return(false)
56
+ allow(File).to receive :write
57
+
58
+ expect(FileUtils).to receive(:mkdir_p).with(storage_dir)
59
+
60
+ storage.save_last_value_of(:field, 15)
61
+ end
62
+ end
63
+
64
+ context "when there are previously saved values" do
65
+ before do
66
+ allow(Dir).to receive(:exists?).with(storage_dir).and_return(true)
67
+ allow(File).to receive(:exists?).with(storage_file).and_return(true)
68
+ end
69
+
70
+ it "should overwrite the specified field's value and preserve all others" do
71
+ allow(File).to receive(:read).with(storage_file).and_return(YAML.dump({
72
+ extract_name: {
73
+ last_values: {
74
+ field1: 1,
75
+ field2: 2,
76
+ field3: 3
77
+ }
78
+ }
79
+ }))
80
+
81
+ expect(File).to receive(:write).with(storage_file, YAML.dump({
82
+ extract_name: {
83
+ last_values: {
84
+ field1: 1,
85
+ field2: 5,
86
+ field3: 3
87
+ }
88
+ }
89
+ }))
90
+
91
+ storage.save_last_value_of(:field2, 5)
92
+ end
93
+
94
+ it "should create the new entry if it doesn't exist yet" do
95
+ allow(File).to receive(:read).with(storage_file).and_return(YAML.dump({
96
+ other_extract_name: {
97
+ last_values: {
98
+ field1: 1,
99
+ field2: 2,
100
+ field3: 3
101
+ }
102
+ }
103
+ }))
104
+
105
+ expect(File).to receive(:write).with(storage_file, YAML.dump({
106
+ other_extract_name: {
107
+ last_values: {
108
+ field1: 1,
109
+ field2: 2,
110
+ field3: 3
111
+ }
112
+ },
113
+ extract_name: {
114
+ last_values: {
115
+ field2: 5
116
+ }
117
+ }
118
+ }))
119
+
120
+ storage.save_last_value_of(:field2, 5)
121
+ end
122
+ end
123
+ end
124
+
125
+ end
@@ -0,0 +1,37 @@
1
+ require_relative '../spec_helper'
2
+
3
+ describe Cranium::Logging do
4
+
5
+ let(:logging_object) { Object.new.tap { |object| object.extend Cranium::Logging } }
6
+ let(:loggers) { [double("Logger 1"), double("Logger 2")] }
7
+
8
+ before(:each) do
9
+ allow(Cranium).to receive_message_chain(:configuration, :loggers).and_return loggers
10
+ end
11
+
12
+
13
+
14
+ def all_loggers_should_receive(level, message)
15
+ loggers.each { |logger| expect(logger).to receive(level).with(message) }
16
+ end
17
+
18
+
19
+
20
+ describe "#record_metric" do
21
+ it "should record an arbitrary metric in every registered logger" do
22
+ all_loggers_should_receive :info, "[metrics/products] 1234"
23
+
24
+ logging_object.record_metric "products", 1234
25
+ end
26
+ end
27
+
28
+
29
+ describe "#log" do
30
+ it "should log a message with the specified reporting level in every registered logger" do
31
+ all_loggers_should_receive :error, "error message"
32
+
33
+ logging_object.log :error, "error message"
34
+ end
35
+ end
36
+
37
+ end
@@ -0,0 +1,56 @@
1
+ require_relative '../../spec_helper'
2
+
3
+ describe Cranium::Sequel::Hash do
4
+
5
+ let(:source_hash) { { :field1 => :field2, :field3 => :field4 } }
6
+ let(:sequel_hash) { Cranium::Sequel::Hash[source_hash] }
7
+
8
+ before(:each) do
9
+ allow(Sequel).to receive(:qualify) { |qualifier, field| :"#{qualifier}_#{field}" }
10
+ end
11
+
12
+
13
+ it "should be a Hash" do
14
+ expect(Cranium::Sequel::Hash.new).to be_a Hash
15
+ end
16
+
17
+
18
+ describe "#qualify" do
19
+ context "when called with 'keys_with'" do
20
+ it "should qualify only the key fields of the hash" do
21
+ expect(sequel_hash.qualify(keys_with: :table1)).to eq({ :table1_field1 => :field2, :table1_field3 => :field4 })
22
+ end
23
+ end
24
+
25
+ context "when called with 'values_with'" do
26
+ it "should qualify only the value fields of the hash" do
27
+ expect(sequel_hash.qualify(values_with: :table1)).to eq({ :field1 => :table1_field2, :field3 => :table1_field4 })
28
+ end
29
+ end
30
+
31
+ context "when called with both 'keys_with' and 'values_with'" do
32
+ it "should qualify both keys and value fields of the hash" do
33
+ expect(sequel_hash.qualify(keys_with: :table1, values_with: :table2)).to eq({ :table1_field1 => :table2_field2, :table1_field3 => :table2_field4 })
34
+ end
35
+ end
36
+
37
+ it "should raise an error if called with unsupported options" do
38
+ expect { sequel_hash.qualify key_with: :table }.to raise_error ArgumentError, "Unsupported option for qualify: key_with"
39
+ end
40
+ end
41
+
42
+
43
+ describe "#qualified_keys" do
44
+ it "should return an array with the hash's keys qualified with the specified qualifier" do
45
+ expect(sequel_hash.qualified_keys(:table)).to eq([:table_field1, :table_field3])
46
+ end
47
+ end
48
+
49
+
50
+ describe "#qualified_values" do
51
+ it "should return an array with the hash's values qualified with the specified qualifier" do
52
+ expect(sequel_hash.qualified_values(:table)).to eq([:table_field2, :table_field4])
53
+ end
54
+ end
55
+
56
+ end
@@ -0,0 +1,31 @@
1
+ require_relative '../spec_helper'
2
+
3
+ describe Cranium::SourceRegistry do
4
+
5
+ let(:registry) { Cranium::SourceRegistry.new }
6
+
7
+ describe "#[]" do
8
+ it "should raise an error if a source with the specified name wasn't registered yet" do
9
+ expect { registry[:name] }.to raise_error "Undefined source 'name'"
10
+ end
11
+ end
12
+
13
+
14
+ describe "#register_source" do
15
+ it "should register a new source and configure it through the block passed" do
16
+ source = Cranium::DSL::SourceDefinition.new :test_source
17
+ source.field :test_field, String
18
+
19
+ registry.register_source :test_source do
20
+ field :test_field, String
21
+ end
22
+
23
+ expect(registry[:test_source]).to eq(source)
24
+ end
25
+
26
+ it "should return the newly registered source" do
27
+ expect(registry.register_source(:test_source) {}).to be_a Cranium::DSL::SourceDefinition
28
+ end
29
+ end
30
+
31
+ end
@@ -0,0 +1,144 @@
1
+ require_relative "../../spec_helper"
2
+ require 'cucumber/ast/table'
3
+ require 'date'
4
+
5
+ module Cranium::TestFramework
6
+ describe CucumberTable do
7
+
8
+ context "class method" do
9
+ describe ".from_cucumber_table" do
10
+
11
+ let(:table) { CucumberTable.from_ast_table(Cucumber::Ast::Table.new(@table_data)) }
12
+
13
+ it "should return a CucumberTable" do
14
+ @table_data = [{ "column" => "value" }]
15
+
16
+ expect(table).to be_a CucumberTable
17
+ end
18
+
19
+
20
+ it "should convert header values to symbols" do
21
+ @table_data = [{ "column1" => "value1", "column2" => "value2" }]
22
+
23
+ expect(table.fields).to eq([:column1, :column2])
24
+ end
25
+
26
+
27
+ it "should discard comment columns" do
28
+ @table_data = [{ "column" => "value1", "#comment column" => "value2" }]
29
+ expect(CucumberTable).to receive(:new).with([{ column: "value1" }], { column: :string })
30
+
31
+ table
32
+ end
33
+
34
+
35
+ it "should discard type specifiers in column names" do
36
+ @table_data = [{
37
+ "integer_column (i)" => "one",
38
+ "string_column (s)" => "two",
39
+ "numeric_column (n)" => "five",
40
+ "some_column" => "else"
41
+ }]
42
+
43
+ expect(table.fields).to match_array([:integer_column, :string_column, :numeric_column, :some_column])
44
+ end
45
+
46
+
47
+ it "should raise an exception if invalid type is specified" do
48
+ @table_data = [{ "column (x)" => "value" }]
49
+
50
+ expect { table.fields }.to raise_error StandardError, "Invalid type specified: x"
51
+ end
52
+
53
+
54
+ it "should instantiate the new table with the correct column types" do
55
+ @table_data = [{
56
+ "integer_column (i)" => "one",
57
+ "string_column (s)" => "two",
58
+ "numeric_column (n)" => "five",
59
+ "some_column" => "else"
60
+ }]
61
+ expect(CucumberTable).to receive(:new).with(
62
+ [{
63
+ integer_column: "one",
64
+ string_column: "two",
65
+ numeric_column: "five",
66
+ some_column: "else"
67
+ }],
68
+ {
69
+ integer_column: :integer,
70
+ string_column: :string,
71
+ numeric_column: :numeric,
72
+ some_column: :string
73
+ }
74
+ )
75
+
76
+ table
77
+ end
78
+ end
79
+ end
80
+
81
+
82
+ context "instance methods" do
83
+ let(:data) { [{ "one" => "two", "three" => "four" }, { "five" => "six" }] }
84
+
85
+ describe "#fields" do
86
+ it "should return the keys of the first row" do
87
+ expect(CucumberTable.new(data).fields).to eq(%w[one three])
88
+ end
89
+ end
90
+
91
+
92
+ describe "#with_patterns" do
93
+ it "should set replacement patterns and return the object" do
94
+ table = CucumberTable.new(data)
95
+ table_with_patterns = table.with_patterns({ "a" => "b" })
96
+
97
+ expect(table_with_patterns).to be_equal table
98
+ end
99
+ end
100
+
101
+
102
+ describe "#data" do
103
+ it "should return all data as an array of hashes" do
104
+ expect(CucumberTable.new(data).data).to eq(data)
105
+ end
106
+
107
+
108
+ it "should make all substitutions set up as replacement patterns" do
109
+ table = CucumberTable.new [{ first: "NULL", second: "apple", third: "something else entirely" }]
110
+ table.with_patterns(
111
+ "NULL" => nil,
112
+ "apple" => lambda { "pear" }
113
+ )
114
+
115
+ expect(table.data).to eq([first: nil, second: "pear", third: "something else entirely"])
116
+ end
117
+
118
+
119
+ it "should evaluate integer fields" do
120
+ table = CucumberTable.new([{ integer_column: "20" }], { integer_column: :integer })
121
+ expect(table.data).to eq([{ integer_column: 20 }])
122
+ end
123
+
124
+
125
+ describe "#columns" do
126
+ it "should return an array of empty arrays if there are no data rows" do
127
+ table = CucumberTable.new [], { argument: :string }
128
+
129
+ expect(table.data.columns).to eq([[]])
130
+ end
131
+
132
+
133
+ it "should return the data in columns as an array of arrays, discarding all header information" do
134
+ table = CucumberTable.new [{ header1: "value1", header2: "value2" }, { header1: "value3", header2: "value4" }]
135
+
136
+ expect(table.data.columns).to eq([%w[value1 value3], %w[value2 value4]])
137
+ end
138
+ end
139
+ end
140
+
141
+ end
142
+
143
+ end
144
+ end
@@ -0,0 +1,75 @@
1
+ require_relative '../../spec_helper'
2
+
3
+ describe Cranium::Transformation::DuplicationIndex do
4
+
5
+ let(:index) { Cranium::Transformation::DuplicationIndex.new :field1, :field2 }
6
+ let(:record) { Cranium::TransformationRecord.new [:field1, :field2, :field3], [:field1, :field2, :field3] }
7
+
8
+ describe ".[]" do
9
+ before(:each) { Cranium::Transformation::DuplicationIndex.instance_variable_set :@instances, nil }
10
+
11
+ it "should return a DuplicationIndex instance for the specified fields" do
12
+ allow(Cranium::Transformation::DuplicationIndex).to receive(:new).with(:field1, :field2).and_return(index)
13
+
14
+ expect(Cranium::Transformation::DuplicationIndex[:field1, :field2]).to eq index
15
+ end
16
+
17
+ it "should memoize the previously created instances" do
18
+ expect(Cranium::Transformation::DuplicationIndex[:field1, :field2]).to eq(Cranium::Transformation::DuplicationIndex[:field1, :field2])
19
+ end
20
+
21
+ it "should raise an error if empty fieldset was passed" do
22
+ expect { Cranium::Transformation::DuplicationIndex[] }.to raise_error ArgumentError, "Cannot build duplication index for empty fieldset"
23
+ end
24
+ end
25
+
26
+
27
+ describe "#duplicate?" do
28
+ it "should return false for the first entry" do
29
+ record.input_data = ["one", "two", "three"]
30
+ expect(index.duplicate?(record)).to be_falsey
31
+ end
32
+
33
+ it "should return true the second time it's called for the same record" do
34
+ record.input_data = ["one", "two", "three"]
35
+ index.duplicate?(record)
36
+ expect(index.duplicate?(record)).to be_truthy
37
+ end
38
+
39
+ it "should only use the specified fieldset for duplication detection" do
40
+ record1 = record
41
+ record2 = record.clone
42
+ index = Cranium::Transformation::DuplicationIndex.new :field1
43
+
44
+ record1.input_data = ["one", "two", "three"]
45
+ index.duplicate? record1
46
+
47
+ record2.input_data = ["one", "four", "five"]
48
+ expect(index.duplicate?(record2)).to be_truthy
49
+ end
50
+
51
+ it "should handle multiple fields for detection" do
52
+ record1 = record
53
+ record2 = record.clone
54
+ record3 = record.clone
55
+ index = Cranium::Transformation::DuplicationIndex.new :field1, :field2
56
+
57
+ record1.input_data = ["one", "two", "three"]
58
+ index.duplicate? record1
59
+
60
+ record2.input_data = ["one", "four", "five"]
61
+ expect(index.duplicate?(record2)).to be_falsey
62
+
63
+ record3.input_data = ["one", "two", "five"]
64
+ expect(index.duplicate?(record3)).to be_truthy
65
+ end
66
+
67
+ it "should raise an error if record fieldset doesn't contain index fieldset" do
68
+ record.input_data = ["one", "two", "three"]
69
+ index = Cranium::Transformation::DuplicationIndex.new :field5
70
+
71
+ expect { index.duplicate? record }.to raise_error StandardError, "Missing deduplication key from record: field5"
72
+ end
73
+ end
74
+
75
+ end