cranium 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (132) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +21 -0
  3. data/.ruby-version +1 -0
  4. data/Gemfile +4 -0
  5. data/LICENSE.txt +22 -0
  6. data/README.md +29 -0
  7. data/Rakefile +3 -0
  8. data/Vagrantfile +24 -0
  9. data/bin/cranium +9 -0
  10. data/config/cucumber.yml +9 -0
  11. data/cranium.gemspec +26 -0
  12. data/db/setup.sql +8 -0
  13. data/docker-compose.yml +8 -0
  14. data/examples/config.rb +14 -0
  15. data/examples/deduplication.rb +27 -0
  16. data/examples/import_csv_with_field_lookup_inserting_new_dimension_keys.rb +26 -0
  17. data/examples/incremental_extract.rb +17 -0
  18. data/examples/lookup_with_multiple_fields.rb +25 -0
  19. data/features/archive.feature +49 -0
  20. data/features/extract/incremental_extract.feature +56 -0
  21. data/features/extract/simple_extract.feature +85 -0
  22. data/features/import/import_csv_to_database_as_delta.feature +38 -0
  23. data/features/import/import_csv_to_database_with_delete_insert_merging.feature +51 -0
  24. data/features/import/import_csv_to_database_with_truncate_insert.feature +49 -0
  25. data/features/import/import_csv_to_database_with_update_merging.feature +46 -0
  26. data/features/import/import_csv_with_always_inserting_new_dimension_keys.feature +137 -0
  27. data/features/import/import_csv_with_field_lookup_inserting_new_dimension_keys.feature +62 -0
  28. data/features/import/import_csv_with_field_lookup_transformation.feature +125 -0
  29. data/features/import/import_csv_with_transformation.feature +55 -0
  30. data/features/import/import_multiple_csv_files_without_transformations.feature +44 -0
  31. data/features/import/import_with_load_id_from_sequence.feature +53 -0
  32. data/features/import/import_with_lookup_from_multiple_fields.feature +64 -0
  33. data/features/read.feature +56 -0
  34. data/features/remove.feature +44 -0
  35. data/features/restore_database_connection.feature +55 -0
  36. data/features/step_definitions/database_table_steps.rb +40 -0
  37. data/features/step_definitions/definition_steps.rb +3 -0
  38. data/features/step_definitions/execution_steps.rb +23 -0
  39. data/features/step_definitions/file_steps.rb +39 -0
  40. data/features/support/class_extensions.rb +24 -0
  41. data/features/support/env.rb +27 -0
  42. data/features/support/randomize.rb +22 -0
  43. data/features/support/stop_on_first_error.rb +5 -0
  44. data/features/transform/deduplication.feature +37 -0
  45. data/features/transform/empty_transformation.feature +72 -0
  46. data/features/transform/join.feature +180 -0
  47. data/features/transform/join_multiple_files_into_one_output_file.feature +46 -0
  48. data/features/transform/output_rows.feature +70 -0
  49. data/features/transform/projection.feature +34 -0
  50. data/features/transform/raw_ruby_transformation.feature +69 -0
  51. data/features/transform/split_field.feature +39 -0
  52. data/lib/cranium/application.rb +104 -0
  53. data/lib/cranium/archiver.rb +36 -0
  54. data/lib/cranium/attribute_dsl.rb +43 -0
  55. data/lib/cranium/command_line_options.rb +27 -0
  56. data/lib/cranium/configuration.rb +33 -0
  57. data/lib/cranium/data_importer.rb +35 -0
  58. data/lib/cranium/data_reader.rb +48 -0
  59. data/lib/cranium/data_transformer.rb +126 -0
  60. data/lib/cranium/database.rb +36 -0
  61. data/lib/cranium/definition_registry.rb +21 -0
  62. data/lib/cranium/dimension_manager.rb +65 -0
  63. data/lib/cranium/dsl/database_definition.rb +23 -0
  64. data/lib/cranium/dsl/extract_definition.rb +28 -0
  65. data/lib/cranium/dsl/import_definition.rb +50 -0
  66. data/lib/cranium/dsl/source_definition.rb +67 -0
  67. data/lib/cranium/dsl.rb +100 -0
  68. data/lib/cranium/extensions/file.rb +7 -0
  69. data/lib/cranium/extensions/sequel_greenplum.rb +30 -0
  70. data/lib/cranium/external_table.rb +75 -0
  71. data/lib/cranium/extract/data_extractor.rb +11 -0
  72. data/lib/cranium/extract/storage.rb +57 -0
  73. data/lib/cranium/extract/strategy/base.rb +27 -0
  74. data/lib/cranium/extract/strategy/incremental.rb +16 -0
  75. data/lib/cranium/extract/strategy/simple.rb +9 -0
  76. data/lib/cranium/extract/strategy.rb +7 -0
  77. data/lib/cranium/extract.rb +7 -0
  78. data/lib/cranium/import_strategy/base.rb +55 -0
  79. data/lib/cranium/import_strategy/delete_insert.rb +40 -0
  80. data/lib/cranium/import_strategy/delta.rb +8 -0
  81. data/lib/cranium/import_strategy/merge.rb +50 -0
  82. data/lib/cranium/import_strategy/truncate_insert.rb +19 -0
  83. data/lib/cranium/import_strategy.rb +9 -0
  84. data/lib/cranium/logging.rb +15 -0
  85. data/lib/cranium/profiling.rb +13 -0
  86. data/lib/cranium/progress_output.rb +37 -0
  87. data/lib/cranium/sequel/hash.rb +32 -0
  88. data/lib/cranium/sequel.rb +5 -0
  89. data/lib/cranium/source_registry.rb +21 -0
  90. data/lib/cranium/test_framework/cucumber_table.rb +140 -0
  91. data/lib/cranium/test_framework/database_entity.rb +29 -0
  92. data/lib/cranium/test_framework/database_sequence.rb +16 -0
  93. data/lib/cranium/test_framework/database_table.rb +33 -0
  94. data/lib/cranium/test_framework/upload_directory.rb +39 -0
  95. data/lib/cranium/test_framework/world.rb +66 -0
  96. data/lib/cranium/test_framework.rb +10 -0
  97. data/lib/cranium/transformation/duplication_index.rb +42 -0
  98. data/lib/cranium/transformation/index.rb +83 -0
  99. data/lib/cranium/transformation/join.rb +141 -0
  100. data/lib/cranium/transformation/sequence.rb +42 -0
  101. data/lib/cranium/transformation.rb +8 -0
  102. data/lib/cranium/transformation_record.rb +45 -0
  103. data/lib/cranium.rb +57 -0
  104. data/rake/test.rake +31 -0
  105. data/spec/cranium/application_spec.rb +166 -0
  106. data/spec/cranium/archiver_spec.rb +44 -0
  107. data/spec/cranium/command_line_options_spec.rb +32 -0
  108. data/spec/cranium/configuration_spec.rb +31 -0
  109. data/spec/cranium/data_importer_spec.rb +55 -0
  110. data/spec/cranium/data_transformer_spec.rb +16 -0
  111. data/spec/cranium/database_spec.rb +69 -0
  112. data/spec/cranium/definition_registry_spec.rb +45 -0
  113. data/spec/cranium/dimension_manager_spec.rb +63 -0
  114. data/spec/cranium/dsl/database_definition_spec.rb +23 -0
  115. data/spec/cranium/dsl/extract_definition_spec.rb +76 -0
  116. data/spec/cranium/dsl/import_definition_spec.rb +153 -0
  117. data/spec/cranium/dsl/source_definition_spec.rb +84 -0
  118. data/spec/cranium/dsl_spec.rb +119 -0
  119. data/spec/cranium/external_table_spec.rb +71 -0
  120. data/spec/cranium/extract/storage_spec.rb +125 -0
  121. data/spec/cranium/logging_spec.rb +37 -0
  122. data/spec/cranium/sequel/hash_spec.rb +56 -0
  123. data/spec/cranium/source_registry_spec.rb +31 -0
  124. data/spec/cranium/test_framework/cucumber_table_spec.rb +144 -0
  125. data/spec/cranium/transformation/duplication_index_spec.rb +75 -0
  126. data/spec/cranium/transformation/index_spec.rb +178 -0
  127. data/spec/cranium/transformation/join_spec.rb +43 -0
  128. data/spec/cranium/transformation/sequence_spec.rb +83 -0
  129. data/spec/cranium/transformation_record_spec.rb +78 -0
  130. data/spec/cranium_spec.rb +53 -0
  131. data/spec/spec_helper.rb +1 -0
  132. metadata +362 -0
@@ -0,0 +1,55 @@
1
+ Feature: Sequel database connections are fault tolerant
2
+
3
+ Scenario:
4
+ Given a database table called "dim_product" with the following fields:
5
+ | field_name | field_type |
6
+ | item | TEXT |
7
+ | title | TEXT |
8
+ And a "products.csv" data file containing:
9
+ """
10
+ id,name,category
11
+ JNI-123,Just a product name,Main category > Subcategory > Sub-subcategory > Ultra-subcategory
12
+ CDI-234,Another product name,Smart Insight > Cool stuff | 3dim > 2dim > 1dim
13
+ """
14
+ And the following definition:
15
+ """
16
+ require 'sequel'
17
+
18
+ def terminate_connections
19
+ connection_string = ENV['GREENPLUM_AS_ADMIN_URL'] || "postgres://database_administrator:emarsys@192.168.56.43:5432/cranium"
20
+ connection = Sequel.connect connection_string, loggers: Cranium.configuration.loggers
21
+ connection.run("SELECT pg_terminate_backend(procpid) FROM pg_stat_activity WHERE procpid <> pg_backend_pid() AND datname = 'cranium'")
22
+ end
23
+
24
+ source :products do
25
+ encoding "UTF-8"
26
+ delimiter ','
27
+
28
+ field :id, String
29
+ field :name, String
30
+ end
31
+
32
+ source :transformed_products do
33
+ field :id, String
34
+ field :name, String
35
+ end
36
+
37
+ transform :products => :transformed_products do |record|
38
+ output record
39
+ end
40
+
41
+ terminate_connections
42
+
43
+ import :transformed_products do
44
+ into :dim_product
45
+
46
+ put :id => :item
47
+ put :name => :title
48
+ end
49
+ """
50
+ When I execute the definition
51
+ Then the process should exit successfully
52
+ And the "dim_product" table should contain:
53
+ | item | title |
54
+ | JNI-123 | Just a product name |
55
+ | CDI-234 | Another product name |
@@ -0,0 +1,40 @@
1
+ Given(/^a database table called "([^"]*)" with the following fields:$/) do |table_name, fields|
2
+ database_table(table_name).create(fields.data)
3
+ end
4
+
5
+
6
+ Given (/^only the following rows in the "([^"]*)" database table:$/) do |table_name, rows|
7
+ database_table(table_name).clear
8
+ step %Q(the following new rows in the "#{table_name}" database table:), rows
9
+ end
10
+
11
+
12
+ Given (/^the following new rows in the "([^"]*)" database table:$/) do |table_name, rows|
13
+ database_table(table_name).insert rows.data
14
+ end
15
+
16
+
17
+ Given(/^the current value in sequence "([^"]*)" is (\d+)$/) do |sequence_name, current_value|
18
+ Cranium::Database.connection.run "SELECT setval('#{sequence_name}', #{current_value})"
19
+ end
20
+
21
+
22
+ Given(/^a sequence called "([^"]*)" starting from (\d+)$/) do |sequence_name, start_value|
23
+ database_sequence(sequence_name).create
24
+
25
+ step %Q[the current value in sequence "#{sequence_name}" is #{start_value}]
26
+ end
27
+
28
+
29
+ Then(/^the "([^"]*)" table should contain:$/) do |table_name, data|
30
+ expected_data, hashes = [], data.data
31
+ hashes.map do |hash|
32
+ new_row = {}
33
+ hash.each_key do |key|
34
+ new_row[key.to_sym] = hash[key]
35
+ end
36
+ expected_data << new_row
37
+ end
38
+
39
+ expect(database_table(table_name).content(data.fields)).to match_array expected_data
40
+ end
@@ -0,0 +1,3 @@
1
+ Given(/^the following definition:$/) do |definition|
2
+ save_definition definition
3
+ end
@@ -0,0 +1,23 @@
1
+ Given /^the definition is executed(?: again)?$/ do
2
+ step "I execute the definition"
3
+ end
4
+
5
+
6
+ When /^I execute the definition(?: again)?$/ do
7
+ execute_definition
8
+ end
9
+
10
+
11
+ Then /^the process should exit successfully$/ do
12
+ expect(result_code).to eq(0), "Expected script exit code to be 0, but received #{result_code}\n\n#{script_output}\n"
13
+ end
14
+
15
+
16
+ Then /^the process should exit with an error$/ do
17
+ expect(result_code).to eq(1), "Expected script exit code to be 1, but received #{result_code}\n\n#{script_output}\n"
18
+ end
19
+
20
+
21
+ Then /^the error message should contain:$/ do |message|
22
+ expect(error_output).to include message
23
+ end
@@ -0,0 +1,39 @@
1
+ Given /^no "([^"]*)" directory/ do |dir_path|
2
+ upload_directory.remove_directory dir_path
3
+ end
4
+
5
+
6
+ Given /^an empty "([^"]*)" data file$/ do |file_name|
7
+ step %Q(a "#{file_name}" data file containing:), ""
8
+ end
9
+
10
+
11
+ Given /^an? "([^"]*)" data file containing:$/ do |file_name, content|
12
+ upload_directory.save_file file_name, content
13
+ end
14
+
15
+
16
+ Given /^the "([^"]*)" file is deleted$/ do |file_name|
17
+ upload_directory.delete_file file_name
18
+ end
19
+
20
+
21
+ Then /^there should be a "([^"]*)" data file in the upload directory containing:$/ do |file_name, content|
22
+ expect(upload_directory.file_exists?(file_name)).to be_truthy, "expected file '#{file_name}' to exist"
23
+ expect(upload_directory.read_file(file_name).chomp).to eq content
24
+ end
25
+
26
+
27
+ Then /^the "([^"]*)" directory should contain the following files:$/ do |directory_path, files|
28
+ expect(Dir.exists?(directory_path)).to be_truthy, "expected directory '#{directory_path}' to exist"
29
+ files_in_dir = Dir["#{directory_path}/*"].map { |file_name| File.basename file_name }.sort
30
+ expect(files_in_dir.count).to eq files.data.count
31
+ 0.upto files.data.count-1 do |index|
32
+ expect(files_in_dir[index]).to match Regexp.new(files.data[index][:filename])
33
+ end
34
+ end
35
+
36
+
37
+ Then /^the upload directory should contain the following files:$/ do |files|
38
+ step %Q(the "#{Cranium.configuration.upload_path}" directory should contain the following files:), files
39
+ end
@@ -0,0 +1,24 @@
1
+ module Cucumber::Ast
2
+
3
+ module MultilineArgument
4
+ class << self
5
+
6
+ alias_method :from_old, :from
7
+
8
+
9
+
10
+ def from(argument)
11
+ original_result = from_old(argument)
12
+ if original_result.is_a? Cucumber::Ast::Table
13
+ Cranium::TestFramework::CucumberTable.from_ast_table(original_result).with_patterns(
14
+ "NULL" => nil
15
+ )
16
+ else
17
+ original_result
18
+ end
19
+ end
20
+
21
+ end
22
+ end
23
+
24
+ end
@@ -0,0 +1,27 @@
1
+ require 'fileutils'
2
+ require_relative "../../lib/cranium"
3
+
4
+ FileUtils.mkdir_p("log") unless Dir.exists?("log")
5
+
6
+ Cranium.configure do |config|
7
+ config.greenplum_connection_string = ENV['GREENPLUM_URL'] || "postgres://cranium:cranium@192.168.56.43:5432/cranium"
8
+ config.gpfdist_url = ENV['GPFDIST_URL'] || "192.168.56.43:8123"
9
+ config.gpfdist_home_directory = "tmp/custdata"
10
+ config.upload_directory = "cranium_build"
11
+ config.loggers << Logger.new("log/cucumber.log")
12
+ end
13
+
14
+
15
+ Before do
16
+ FileUtils.rm_rf Cranium.configuration.upload_path
17
+ FileUtils.mkdir_p Cranium.configuration.upload_path
18
+ end
19
+
20
+ After do
21
+ Cranium::TestFramework::DatabaseTable.cleanup
22
+ Cranium::TestFramework::DatabaseSequence.cleanup
23
+ end
24
+
25
+ World do
26
+ Cranium::TestFramework::World.new Cranium.configuration.upload_path, Cranium::Database.connection
27
+ end
@@ -0,0 +1,22 @@
1
+ cucumber_seed = ENV['CUCUMBER_SEED'] ? ENV['CUCUMBER_SEED'].to_i : srand % 0xFFFF
2
+ cucumber_dry_run = nil
3
+
4
+
5
+ AfterConfiguration do |cucumber_config|
6
+ original_files = cucumber_config.feature_files
7
+ cucumber_dry_run = cucumber_config.dry_run?
8
+
9
+ config_eigenclass = class << cucumber_config;
10
+ self
11
+ end
12
+ config_eigenclass.send :undef_method, :feature_files
13
+ config_eigenclass.send(:define_method, :feature_files) do
14
+ Kernel.srand cucumber_seed
15
+ original_files.sort_by { Kernel.rand original_files.count }
16
+ end
17
+ end
18
+
19
+
20
+ at_exit do
21
+ puts("Cucumber randomized with seed #{cucumber_seed.inspect}") unless cucumber_dry_run
22
+ end
@@ -0,0 +1,5 @@
1
+ unless ENV["STOP_ON_FIRST_ERROR"] == "no"
2
+ After do |scenario|
3
+ Cucumber.wants_to_quit = true if scenario.failed?
4
+ end
5
+ end
@@ -0,0 +1,37 @@
1
+ Feature: Deduplicate data in CSV file
2
+
3
+ Scenario: Singe file transformation
4
+ Given a "sales_items.csv" data file containing:
5
+ """
6
+ order_id,item,item_name
7
+ 1,Item1,Item name 1
8
+ 2,Item1,Item name 1
9
+ 3,Item2,Item name 2
10
+ 4,Item2,Item name 2
11
+ 5,Item3,Item name 3
12
+ """
13
+ And the following definition:
14
+ """
15
+ source :sales_items do
16
+ file "sales_items.csv"
17
+ field :order_id, String
18
+ field :item, String
19
+ field :item_name, String
20
+ end
21
+
22
+ source :products do
23
+ field :item, String
24
+ field :item_name, String
25
+ end
26
+
27
+ deduplicate :sales_items, into: :products, by: [:item]
28
+ """
29
+ When I execute the definition
30
+ Then the process should exit successfully
31
+ And there should be a "products.csv" data file in the upload directory containing:
32
+ """
33
+ item,item_name
34
+ Item1,Item name 1
35
+ Item2,Item name 2
36
+ Item3,Item name 3
37
+ """
@@ -0,0 +1,72 @@
1
+ Feature: Empty transformation
2
+
3
+ Scenario: Empty transformation between the same structures from the default CSV format simply copies the file
4
+ Given a "products.csv" data file containing:
5
+ """
6
+ id,name,category
7
+ JNI-123,Just a product name,Main category > Subcategory > Sub-subcategory
8
+ CDI-234,Another product name,Smart Insight > Cool stuff > Scripts
9
+ """
10
+ And the following definition:
11
+ """
12
+ source :products do
13
+ field :id, String
14
+ field :name, String
15
+ field :category, String
16
+ end
17
+
18
+ source :products_copy do
19
+ field :id, String
20
+ field :name, String
21
+ field :category, String
22
+ end
23
+
24
+ transform :products => :products_copy do |record|
25
+ output record
26
+ end
27
+ """
28
+ When I execute the definition
29
+ Then the process should exit successfully
30
+ And there should be a "products_copy.csv" data file in the upload directory containing:
31
+ """
32
+ id,name,category
33
+ JNI-123,Just a product name,Main category > Subcategory > Sub-subcategory
34
+ CDI-234,Another product name,Smart Insight > Cool stuff > Scripts
35
+ """
36
+
37
+
38
+ Scenario: Empty transformation between the same structures but from a custom CSV format converts quotes and delimiters to the default format
39
+ Given a "products.csv" data file containing:
40
+ """
41
+ 'id';'name';'category'
42
+ 'JNI-123';'Just a product name';'Main category > Subcategory > Sub-subcategory'
43
+ 'CDI-234';'Another 12" product name';'Smart Insight > Cool stuff > Scripts'
44
+ """
45
+ And the following definition:
46
+ """
47
+ source :products do
48
+ delimiter ';'
49
+ quote "'"
50
+ field :id, String
51
+ field :name, String
52
+ field :category, String
53
+ end
54
+
55
+ source :products_converted do
56
+ field :id, String
57
+ field :name, String
58
+ field :category, String
59
+ end
60
+
61
+ transform :products => :products_converted do |record|
62
+ output record
63
+ end
64
+ """
65
+ When I execute the definition
66
+ Then the process should exit successfully
67
+ And there should be a "products_converted.csv" data file in the upload directory containing:
68
+ """
69
+ id,name,category
70
+ JNI-123,Just a product name,Main category > Subcategory > Sub-subcategory
71
+ CDI-234,"Another 12"" product name",Smart Insight > Cool stuff > Scripts
72
+ """
@@ -0,0 +1,180 @@
1
+ Feature: Joining CSV files
2
+
3
+ Scenario: Singe file transformation
4
+ Given an "orders.csv" data file containing:
5
+ """
6
+ id,order_date,customer_id,total_price
7
+ order_1,2011-01-01,customer_1,100
8
+ order_2,2011-02-02,customer_1,200
9
+ order_3,2011-03-03,customer_2,300
10
+ """
11
+ Given an "order_items.csv" data file containing:
12
+ """
13
+ order_id,item_id,item_name,item_category,quantity,sales_amount,comment
14
+ order_1,item_1,first item,clothing,1,5,some useful comment
15
+ order_1,item_2,second item,communication,2,6,not so useful comment
16
+ order_2,item_2,second item,communication,5,12,very misleading comment
17
+ """
18
+ And the following definition:
19
+ """
20
+ source :orders_file do
21
+ file "orders.csv"
22
+
23
+ field :id, String
24
+ field :order_date, Date
25
+ field :customer_id, String
26
+ field :total_price, Integer
27
+ end
28
+
29
+ source :order_items_file do
30
+ file "order_items.csv"
31
+
32
+ field :order_id, String
33
+ field :item_id, String
34
+ field :item_name, String
35
+ field :item_category, String
36
+ field :quantity, Integer
37
+ field :sales_amount, Integer
38
+ field :comment, String
39
+ end
40
+
41
+ source :sales_items do
42
+ field :order_id, String
43
+ field :order_date, String
44
+ field :new_field, String
45
+ field :customer_id, String
46
+ field :item_id, String
47
+ field :item_name, String
48
+ field :item_category, String
49
+ field :quantity, String
50
+ field :sales_amount, String
51
+ end
52
+
53
+ join :orders_file, with: :order_items_file, into: :sales_items, match_on: { :order_id => :id }
54
+ """
55
+ When I execute the definition
56
+ Then the process should exit successfully
57
+ And there should be a "sales_items.csv" data file in the upload directory containing:
58
+ """
59
+ order_id,order_date,new_field,customer_id,item_id,item_name,item_category,quantity,sales_amount
60
+ order_1,2011-01-01,,customer_1,item_1,first item,clothing,1,5
61
+ order_1,2011-01-01,,customer_1,item_2,second item,communication,2,6
62
+ order_2,2011-02-02,,customer_1,item_2,second item,communication,5,12
63
+ """
64
+
65
+
66
+ Scenario: File transformation with left join
67
+ Given an "orders.csv" data file containing:
68
+ """
69
+ id,order_date,customer_id,total_price
70
+ order_1,2011-01-01,customer_1,100
71
+ order_2,2011-02-02,customer_1,200
72
+ order_3,2011-03-03,customer_2,300
73
+ """
74
+ Given an "order_items.csv" data file containing:
75
+ """
76
+ order_id,item_id,item_name,item_category,quantity,sales_amount,comment
77
+ order_1,item_1,first item,clothing,1,5,some useful comment
78
+ order_1,item_2,second item,communication,2,6,not so useful comment
79
+ order_2,item_2,second item,communication,5,12,very misleading comment
80
+ """
81
+ And the following definition:
82
+ """
83
+ source :orders_file do
84
+ file "orders.csv"
85
+
86
+ field :id, String
87
+ field :order_date, Date
88
+ field :customer_id, String
89
+ field :total_price, Integer
90
+ end
91
+
92
+ source :order_items_file do
93
+ file "order_items.csv"
94
+
95
+ field :order_id, String
96
+ field :item_id, String
97
+ field :item_name, String
98
+ field :item_category, String
99
+ field :quantity, Integer
100
+ field :sales_amount, Integer
101
+ field :comment, String
102
+ end
103
+
104
+ source :sales_items do
105
+ field :id, String
106
+ field :item_id, String
107
+ field :item_name, String
108
+ field :order_date, String
109
+ field :customer_id, String
110
+ field :item_category, String
111
+ field :quantity, String
112
+ field :sales_amount, String
113
+ end
114
+
115
+ join :orders_file, with: :order_items_file, into: :sales_items, match_on: { :order_id => :id }, type: :left
116
+ """
117
+ When I execute the definition
118
+ Then the process should exit successfully
119
+ And there should be a "sales_items.csv" data file in the upload directory containing:
120
+ """
121
+ id,item_id,item_name,order_date,customer_id,item_category,quantity,sales_amount
122
+ order_1,item_1,first item,2011-01-01,customer_1,clothing,1,5
123
+ order_1,item_2,second item,2011-01-01,customer_1,communication,2,6
124
+ order_2,item_2,second item,2011-02-02,customer_1,communication,5,12
125
+ order_3,,,2011-03-03,customer_2,,,
126
+ """
127
+
128
+
129
+ Scenario: Close file after join
130
+ Given an "orders.csv" data file containing:
131
+ """
132
+ id,order_date
133
+ order_1,2011-01-01
134
+ """
135
+ Given an "order_items.csv" data file containing:
136
+ """
137
+ order_id,item_id
138
+ order_1,item_1
139
+ """
140
+ And the following definition:
141
+ """
142
+ source :orders_file do
143
+ file "orders.csv"
144
+
145
+ field :id, String
146
+ field :order_date, Date
147
+ end
148
+
149
+ source :order_items_file do
150
+ file "order_items.csv"
151
+
152
+ field :order_id, String
153
+ field :item_id, String
154
+ end
155
+
156
+ source :sales_items do
157
+ field :id, String
158
+ field :item_id, String
159
+ field :order_date, String
160
+ end
161
+
162
+ source :sales_items_transformed do
163
+ field :id, String
164
+ field :item_id, String
165
+ field :order_date, String
166
+ end
167
+
168
+ join :orders_file, with: :order_items_file, into: :sales_items, match_on: { :order_id => :id }, type: :left
169
+
170
+ transform :sales_items => :sales_items_transformed do |record|
171
+ output record
172
+ end
173
+ """
174
+ When I execute the definition
175
+ Then the process should exit successfully
176
+ And there should be a "sales_items_transformed.csv" data file in the upload directory containing:
177
+ """
178
+ id,item_id,order_date
179
+ order_1,item_1,2011-01-01
180
+ """
@@ -0,0 +1,46 @@
1
+ Feature: Join multiple files into one output file
2
+
3
+ Scenario: Successful transformation
4
+ Given a "products1.csv" data file containing:
5
+ """
6
+ id,name,category
7
+ PROD-1,product name 1,Main category > Subcategory > Sub-subcategory
8
+ PROD-2,product name 2,Main category > Subcategory > Sub-subcategory
9
+ """
10
+ And a "products2.csv" data file containing:
11
+ """
12
+ id,name,category
13
+ PROD-3,product name 3,Main category > Subcategory > Sub-subcategory
14
+ PROD-4,product name 4,Main category > Subcategory > Sub-subcategory
15
+ """
16
+ And the following definition:
17
+ """
18
+ source :products do
19
+ file "products*.csv"
20
+ field :id, String
21
+ field :name, String
22
+ field :category, String
23
+ end
24
+
25
+ source :transformed_products do
26
+ field :item, String
27
+ field :title, String
28
+ field :category, String
29
+ end
30
+
31
+ transform :products => :transformed_products do |record|
32
+ record[:item] = record[:id]
33
+ record[:title] = record[:name]
34
+ output record
35
+ end
36
+ """
37
+ When I execute the definition
38
+ Then the process should exit successfully
39
+ And there should be a "transformed_products.csv" data file in the upload directory containing:
40
+ """
41
+ item,title,category
42
+ PROD-1,product name 1,Main category > Subcategory > Sub-subcategory
43
+ PROD-2,product name 2,Main category > Subcategory > Sub-subcategory
44
+ PROD-3,product name 3,Main category > Subcategory > Sub-subcategory
45
+ PROD-4,product name 4,Main category > Subcategory > Sub-subcategory
46
+ """
@@ -0,0 +1,70 @@
1
+ Feature: Output rows to file
2
+
3
+ Background:
4
+ Given a "products.csv" data file containing:
5
+ """
6
+ id,name
7
+ 1,Product 1
8
+ 2, Product 2
9
+ """
10
+
11
+ Scenario: Output Hash instead of record
12
+ Given the following definition:
13
+ """
14
+ source :products do
15
+ field :id, String
16
+ field :name, String
17
+ end
18
+
19
+ source :products_copy do
20
+ field :id, String
21
+ field :name, String
22
+ end
23
+
24
+ transform :products => :products_copy do |record|
25
+ output name: record[:name],
26
+ id: record[:id]
27
+ end
28
+ """
29
+ When I execute the definition
30
+ Then the process should exit successfully
31
+ And there should be a "products_copy.csv" data file in the upload directory containing:
32
+ """
33
+ id,name
34
+ 1,Product 1
35
+ 2,Product 2
36
+ """
37
+
38
+
39
+ Scenario: Output multiple records for each input row
40
+ Given the following definition:
41
+ """
42
+ source :products do
43
+ field :id, String
44
+ field :name, String
45
+ end
46
+
47
+ source :products_doubled do
48
+ field :id, String
49
+ field :name, String
50
+ field :counter, Integer
51
+ end
52
+
53
+ transform :products => :products_doubled do |record|
54
+ record[:counter] = 1
55
+ output record
56
+ record[:counter] = 2
57
+ output record
58
+ end
59
+ """
60
+ When I execute the definition
61
+ Then the process should exit successfully
62
+ And there should be a "products_doubled.csv" data file in the upload directory containing:
63
+ """
64
+ id,name,counter
65
+ 1,Product 1,1
66
+ 1,Product 1,2
67
+ 2,Product 2,1
68
+ 2,Product 2,2
69
+ """
70
+
@@ -0,0 +1,34 @@
1
+ Feature: Projection
2
+
3
+ Scenario: Empty transformation projects down if the source structure is a superset of the target structure
4
+ Given a "products.csv" data file containing:
5
+ """
6
+ id,name,category
7
+ JNI-123,Just a product name,Main category > Subcategory > Sub-subcategory
8
+ CDI-234,Another product name,Smart Insight > Cool stuff > Scripts
9
+ """
10
+ And the following definition:
11
+ """
12
+ source :products do
13
+ field :id, String
14
+ field :name, String
15
+ field :category, String
16
+ end
17
+
18
+ source :products_projected do
19
+ field :id, String
20
+ field :category, String
21
+ end
22
+
23
+ transform :products => :products_projected do |record|
24
+ output record
25
+ end
26
+ """
27
+ When I execute the definition
28
+ Then the process should exit successfully
29
+ And there should be a "products_projected.csv" data file in the upload directory containing:
30
+ """
31
+ id,category
32
+ JNI-123,Main category > Subcategory > Sub-subcategory
33
+ CDI-234,Smart Insight > Cool stuff > Scripts
34
+ """