cranium 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +21 -0
  3. data/.ruby-version +1 -0
  4. data/Gemfile +4 -0
  5. data/LICENSE.txt +22 -0
  6. data/README.md +29 -0
  7. data/Rakefile +3 -0
  8. data/Vagrantfile +24 -0
  9. data/bin/cranium +9 -0
  10. data/config/cucumber.yml +9 -0
  11. data/cranium.gemspec +26 -0
  12. data/db/setup.sql +8 -0
  13. data/docker-compose.yml +8 -0
  14. data/examples/config.rb +14 -0
  15. data/examples/deduplication.rb +27 -0
  16. data/examples/import_csv_with_field_lookup_inserting_new_dimension_keys.rb +26 -0
  17. data/examples/incremental_extract.rb +17 -0
  18. data/examples/lookup_with_multiple_fields.rb +25 -0
  19. data/features/archive.feature +49 -0
  20. data/features/extract/incremental_extract.feature +56 -0
  21. data/features/extract/simple_extract.feature +85 -0
  22. data/features/import/import_csv_to_database_as_delta.feature +38 -0
  23. data/features/import/import_csv_to_database_with_delete_insert_merging.feature +51 -0
  24. data/features/import/import_csv_to_database_with_truncate_insert.feature +49 -0
  25. data/features/import/import_csv_to_database_with_update_merging.feature +46 -0
  26. data/features/import/import_csv_with_always_inserting_new_dimension_keys.feature +137 -0
  27. data/features/import/import_csv_with_field_lookup_inserting_new_dimension_keys.feature +62 -0
  28. data/features/import/import_csv_with_field_lookup_transformation.feature +125 -0
  29. data/features/import/import_csv_with_transformation.feature +55 -0
  30. data/features/import/import_multiple_csv_files_without_transformations.feature +44 -0
  31. data/features/import/import_with_load_id_from_sequence.feature +53 -0
  32. data/features/import/import_with_lookup_from_multiple_fields.feature +64 -0
  33. data/features/read.feature +56 -0
  34. data/features/remove.feature +44 -0
  35. data/features/restore_database_connection.feature +55 -0
  36. data/features/step_definitions/database_table_steps.rb +40 -0
  37. data/features/step_definitions/definition_steps.rb +3 -0
  38. data/features/step_definitions/execution_steps.rb +23 -0
  39. data/features/step_definitions/file_steps.rb +39 -0
  40. data/features/support/class_extensions.rb +24 -0
  41. data/features/support/env.rb +27 -0
  42. data/features/support/randomize.rb +22 -0
  43. data/features/support/stop_on_first_error.rb +5 -0
  44. data/features/transform/deduplication.feature +37 -0
  45. data/features/transform/empty_transformation.feature +72 -0
  46. data/features/transform/join.feature +180 -0
  47. data/features/transform/join_multiple_files_into_one_output_file.feature +46 -0
  48. data/features/transform/output_rows.feature +70 -0
  49. data/features/transform/projection.feature +34 -0
  50. data/features/transform/raw_ruby_transformation.feature +69 -0
  51. data/features/transform/split_field.feature +39 -0
  52. data/lib/cranium/application.rb +104 -0
  53. data/lib/cranium/archiver.rb +36 -0
  54. data/lib/cranium/attribute_dsl.rb +43 -0
  55. data/lib/cranium/command_line_options.rb +27 -0
  56. data/lib/cranium/configuration.rb +33 -0
  57. data/lib/cranium/data_importer.rb +35 -0
  58. data/lib/cranium/data_reader.rb +48 -0
  59. data/lib/cranium/data_transformer.rb +126 -0
  60. data/lib/cranium/database.rb +36 -0
  61. data/lib/cranium/definition_registry.rb +21 -0
  62. data/lib/cranium/dimension_manager.rb +65 -0
  63. data/lib/cranium/dsl/database_definition.rb +23 -0
  64. data/lib/cranium/dsl/extract_definition.rb +28 -0
  65. data/lib/cranium/dsl/import_definition.rb +50 -0
  66. data/lib/cranium/dsl/source_definition.rb +67 -0
  67. data/lib/cranium/dsl.rb +100 -0
  68. data/lib/cranium/extensions/file.rb +7 -0
  69. data/lib/cranium/extensions/sequel_greenplum.rb +30 -0
  70. data/lib/cranium/external_table.rb +75 -0
  71. data/lib/cranium/extract/data_extractor.rb +11 -0
  72. data/lib/cranium/extract/storage.rb +57 -0
  73. data/lib/cranium/extract/strategy/base.rb +27 -0
  74. data/lib/cranium/extract/strategy/incremental.rb +16 -0
  75. data/lib/cranium/extract/strategy/simple.rb +9 -0
  76. data/lib/cranium/extract/strategy.rb +7 -0
  77. data/lib/cranium/extract.rb +7 -0
  78. data/lib/cranium/import_strategy/base.rb +55 -0
  79. data/lib/cranium/import_strategy/delete_insert.rb +40 -0
  80. data/lib/cranium/import_strategy/delta.rb +8 -0
  81. data/lib/cranium/import_strategy/merge.rb +50 -0
  82. data/lib/cranium/import_strategy/truncate_insert.rb +19 -0
  83. data/lib/cranium/import_strategy.rb +9 -0
  84. data/lib/cranium/logging.rb +15 -0
  85. data/lib/cranium/profiling.rb +13 -0
  86. data/lib/cranium/progress_output.rb +37 -0
  87. data/lib/cranium/sequel/hash.rb +32 -0
  88. data/lib/cranium/sequel.rb +5 -0
  89. data/lib/cranium/source_registry.rb +21 -0
  90. data/lib/cranium/test_framework/cucumber_table.rb +140 -0
  91. data/lib/cranium/test_framework/database_entity.rb +29 -0
  92. data/lib/cranium/test_framework/database_sequence.rb +16 -0
  93. data/lib/cranium/test_framework/database_table.rb +33 -0
  94. data/lib/cranium/test_framework/upload_directory.rb +39 -0
  95. data/lib/cranium/test_framework/world.rb +66 -0
  96. data/lib/cranium/test_framework.rb +10 -0
  97. data/lib/cranium/transformation/duplication_index.rb +42 -0
  98. data/lib/cranium/transformation/index.rb +83 -0
  99. data/lib/cranium/transformation/join.rb +141 -0
  100. data/lib/cranium/transformation/sequence.rb +42 -0
  101. data/lib/cranium/transformation.rb +8 -0
  102. data/lib/cranium/transformation_record.rb +45 -0
  103. data/lib/cranium.rb +57 -0
  104. data/rake/test.rake +31 -0
  105. data/spec/cranium/application_spec.rb +166 -0
  106. data/spec/cranium/archiver_spec.rb +44 -0
  107. data/spec/cranium/command_line_options_spec.rb +32 -0
  108. data/spec/cranium/configuration_spec.rb +31 -0
  109. data/spec/cranium/data_importer_spec.rb +55 -0
  110. data/spec/cranium/data_transformer_spec.rb +16 -0
  111. data/spec/cranium/database_spec.rb +69 -0
  112. data/spec/cranium/definition_registry_spec.rb +45 -0
  113. data/spec/cranium/dimension_manager_spec.rb +63 -0
  114. data/spec/cranium/dsl/database_definition_spec.rb +23 -0
  115. data/spec/cranium/dsl/extract_definition_spec.rb +76 -0
  116. data/spec/cranium/dsl/import_definition_spec.rb +153 -0
  117. data/spec/cranium/dsl/source_definition_spec.rb +84 -0
  118. data/spec/cranium/dsl_spec.rb +119 -0
  119. data/spec/cranium/external_table_spec.rb +71 -0
  120. data/spec/cranium/extract/storage_spec.rb +125 -0
  121. data/spec/cranium/logging_spec.rb +37 -0
  122. data/spec/cranium/sequel/hash_spec.rb +56 -0
  123. data/spec/cranium/source_registry_spec.rb +31 -0
  124. data/spec/cranium/test_framework/cucumber_table_spec.rb +144 -0
  125. data/spec/cranium/transformation/duplication_index_spec.rb +75 -0
  126. data/spec/cranium/transformation/index_spec.rb +178 -0
  127. data/spec/cranium/transformation/join_spec.rb +43 -0
  128. data/spec/cranium/transformation/sequence_spec.rb +83 -0
  129. data/spec/cranium/transformation_record_spec.rb +78 -0
  130. data/spec/cranium_spec.rb +53 -0
  131. data/spec/spec_helper.rb +1 -0
  132. metadata +362 -0
@@ -0,0 +1,55 @@
1
+ Feature: Sequel database connections are fault tolerant
2
+
3
+ Scenario:
4
+ Given a database table called "dim_product" with the following fields:
5
+ | field_name | field_type |
6
+ | item | TEXT |
7
+ | title | TEXT |
8
+ And a "products.csv" data file containing:
9
+ """
10
+ id,name,category
11
+ JNI-123,Just a product name,Main category > Subcategory > Sub-subcategory > Ultra-subcategory
12
+ CDI-234,Another product name,Smart Insight > Cool stuff | 3dim > 2dim > 1dim
13
+ """
14
+ And the following definition:
15
+ """
16
+ require 'sequel'
17
+
18
+ def terminate_connections
19
+ connection_string = ENV['GREENPLUM_AS_ADMIN_URL'] || "postgres://database_administrator:emarsys@192.168.56.43:5432/cranium"
20
+ connection = Sequel.connect connection_string, loggers: Cranium.configuration.loggers
21
+ connection.run("SELECT pg_terminate_backend(procpid) FROM pg_stat_activity WHERE procpid <> pg_backend_pid() AND datname = 'cranium'")
22
+ end
23
+
24
+ source :products do
25
+ encoding "UTF-8"
26
+ delimiter ','
27
+
28
+ field :id, String
29
+ field :name, String
30
+ end
31
+
32
+ source :transformed_products do
33
+ field :id, String
34
+ field :name, String
35
+ end
36
+
37
+ transform :products => :transformed_products do |record|
38
+ output record
39
+ end
40
+
41
+ terminate_connections
42
+
43
+ import :transformed_products do
44
+ into :dim_product
45
+
46
+ put :id => :item
47
+ put :name => :title
48
+ end
49
+ """
50
+ When I execute the definition
51
+ Then the process should exit successfully
52
+ And the "dim_product" table should contain:
53
+ | item | title |
54
+ | JNI-123 | Just a product name |
55
+ | CDI-234 | Another product name |
@@ -0,0 +1,40 @@
1
+ Given(/^a database table called "([^"]*)" with the following fields:$/) do |table_name, fields|
2
+ database_table(table_name).create(fields.data)
3
+ end
4
+
5
+
6
+ Given (/^only the following rows in the "([^"]*)" database table:$/) do |table_name, rows|
7
+ database_table(table_name).clear
8
+ step %Q(the following new rows in the "#{table_name}" database table:), rows
9
+ end
10
+
11
+
12
+ Given (/^the following new rows in the "([^"]*)" database table:$/) do |table_name, rows|
13
+ database_table(table_name).insert rows.data
14
+ end
15
+
16
+
17
+ Given(/^the current value in sequence "([^"]*)" is (\d+)$/) do |sequence_name, current_value|
18
+ Cranium::Database.connection.run "SELECT setval('#{sequence_name}', #{current_value})"
19
+ end
20
+
21
+
22
+ Given(/^a sequence called "([^"]*)" starting from (\d+)$/) do |sequence_name, start_value|
23
+ database_sequence(sequence_name).create
24
+
25
+ step %Q[the current value in sequence "#{sequence_name}" is #{start_value}]
26
+ end
27
+
28
+
29
+ Then(/^the "([^"]*)" table should contain:$/) do |table_name, data|
30
+ expected_data, hashes = [], data.data
31
+ hashes.map do |hash|
32
+ new_row = {}
33
+ hash.each_key do |key|
34
+ new_row[key.to_sym] = hash[key]
35
+ end
36
+ expected_data << new_row
37
+ end
38
+
39
+ expect(database_table(table_name).content(data.fields)).to match_array expected_data
40
+ end
@@ -0,0 +1,3 @@
1
+ Given(/^the following definition:$/) do |definition|
2
+ save_definition definition
3
+ end
@@ -0,0 +1,23 @@
1
+ Given /^the definition is executed(?: again)?$/ do
2
+ step "I execute the definition"
3
+ end
4
+
5
+
6
+ When /^I execute the definition(?: again)?$/ do
7
+ execute_definition
8
+ end
9
+
10
+
11
+ Then /^the process should exit successfully$/ do
12
+ expect(result_code).to eq(0), "Expected script exit code to be 0, but received #{result_code}\n\n#{script_output}\n"
13
+ end
14
+
15
+
16
+ Then /^the process should exit with an error$/ do
17
+ expect(result_code).to eq(1), "Expected script exit code to be 1, but received #{result_code}\n\n#{script_output}\n"
18
+ end
19
+
20
+
21
+ Then /^the error message should contain:$/ do |message|
22
+ expect(error_output).to include message
23
+ end
@@ -0,0 +1,39 @@
1
+ Given /^no "([^"]*)" directory/ do |dir_path|
2
+ upload_directory.remove_directory dir_path
3
+ end
4
+
5
+
6
+ Given /^an empty "([^"]*)" data file$/ do |file_name|
7
+ step %Q(a "#{file_name}" data file containing:), ""
8
+ end
9
+
10
+
11
+ Given /^an? "([^"]*)" data file containing:$/ do |file_name, content|
12
+ upload_directory.save_file file_name, content
13
+ end
14
+
15
+
16
+ Given /^the "([^"]*)" file is deleted$/ do |file_name|
17
+ upload_directory.delete_file file_name
18
+ end
19
+
20
+
21
+ Then /^there should be a "([^"]*)" data file in the upload directory containing:$/ do |file_name, content|
22
+ expect(upload_directory.file_exists?(file_name)).to be_truthy, "expected file '#{file_name}' to exist"
23
+ expect(upload_directory.read_file(file_name).chomp).to eq content
24
+ end
25
+
26
+
27
+ Then /^the "([^"]*)" directory should contain the following files:$/ do |directory_path, files|
28
+ expect(Dir.exists?(directory_path)).to be_truthy, "expected directory '#{directory_path}' to exist"
29
+ files_in_dir = Dir["#{directory_path}/*"].map { |file_name| File.basename file_name }.sort
30
+ expect(files_in_dir.count).to eq files.data.count
31
+ 0.upto files.data.count-1 do |index|
32
+ expect(files_in_dir[index]).to match Regexp.new(files.data[index][:filename])
33
+ end
34
+ end
35
+
36
+
37
+ Then /^the upload directory should contain the following files:$/ do |files|
38
+ step %Q(the "#{Cranium.configuration.upload_path}" directory should contain the following files:), files
39
+ end
@@ -0,0 +1,24 @@
1
+ module Cucumber::Ast
2
+
3
+ module MultilineArgument
4
+ class << self
5
+
6
+ alias_method :from_old, :from
7
+
8
+
9
+
10
+ def from(argument)
11
+ original_result = from_old(argument)
12
+ if original_result.is_a? Cucumber::Ast::Table
13
+ Cranium::TestFramework::CucumberTable.from_ast_table(original_result).with_patterns(
14
+ "NULL" => nil
15
+ )
16
+ else
17
+ original_result
18
+ end
19
+ end
20
+
21
+ end
22
+ end
23
+
24
+ end
@@ -0,0 +1,27 @@
1
+ require 'fileutils'
2
+ require_relative "../../lib/cranium"
3
+
4
+ FileUtils.mkdir_p("log") unless Dir.exists?("log")
5
+
6
+ Cranium.configure do |config|
7
+ config.greenplum_connection_string = ENV['GREENPLUM_URL'] || "postgres://cranium:cranium@192.168.56.43:5432/cranium"
8
+ config.gpfdist_url = ENV['GPFDIST_URL'] || "192.168.56.43:8123"
9
+ config.gpfdist_home_directory = "tmp/custdata"
10
+ config.upload_directory = "cranium_build"
11
+ config.loggers << Logger.new("log/cucumber.log")
12
+ end
13
+
14
+
15
+ Before do
16
+ FileUtils.rm_rf Cranium.configuration.upload_path
17
+ FileUtils.mkdir_p Cranium.configuration.upload_path
18
+ end
19
+
20
+ After do
21
+ Cranium::TestFramework::DatabaseTable.cleanup
22
+ Cranium::TestFramework::DatabaseSequence.cleanup
23
+ end
24
+
25
+ World do
26
+ Cranium::TestFramework::World.new Cranium.configuration.upload_path, Cranium::Database.connection
27
+ end
@@ -0,0 +1,22 @@
1
+ cucumber_seed = ENV['CUCUMBER_SEED'] ? ENV['CUCUMBER_SEED'].to_i : srand % 0xFFFF
2
+ cucumber_dry_run = nil
3
+
4
+
5
+ AfterConfiguration do |cucumber_config|
6
+ original_files = cucumber_config.feature_files
7
+ cucumber_dry_run = cucumber_config.dry_run?
8
+
9
+ config_eigenclass = class << cucumber_config;
10
+ self
11
+ end
12
+ config_eigenclass.send :undef_method, :feature_files
13
+ config_eigenclass.send(:define_method, :feature_files) do
14
+ Kernel.srand cucumber_seed
15
+ original_files.sort_by { Kernel.rand original_files.count }
16
+ end
17
+ end
18
+
19
+
20
+ at_exit do
21
+ puts("Cucumber randomized with seed #{cucumber_seed.inspect}") unless cucumber_dry_run
22
+ end
@@ -0,0 +1,5 @@
1
+ unless ENV["STOP_ON_FIRST_ERROR"] == "no"
2
+ After do |scenario|
3
+ Cucumber.wants_to_quit = true if scenario.failed?
4
+ end
5
+ end
@@ -0,0 +1,37 @@
1
+ Feature: Deduplicate data in CSV file
2
+
3
+ Scenario: Singe file transformation
4
+ Given a "sales_items.csv" data file containing:
5
+ """
6
+ order_id,item,item_name
7
+ 1,Item1,Item name 1
8
+ 2,Item1,Item name 1
9
+ 3,Item2,Item name 2
10
+ 4,Item2,Item name 2
11
+ 5,Item3,Item name 3
12
+ """
13
+ And the following definition:
14
+ """
15
+ source :sales_items do
16
+ file "sales_items.csv"
17
+ field :order_id, String
18
+ field :item, String
19
+ field :item_name, String
20
+ end
21
+
22
+ source :products do
23
+ field :item, String
24
+ field :item_name, String
25
+ end
26
+
27
+ deduplicate :sales_items, into: :products, by: [:item]
28
+ """
29
+ When I execute the definition
30
+ Then the process should exit successfully
31
+ And there should be a "products.csv" data file in the upload directory containing:
32
+ """
33
+ item,item_name
34
+ Item1,Item name 1
35
+ Item2,Item name 2
36
+ Item3,Item name 3
37
+ """
@@ -0,0 +1,72 @@
1
+ Feature: Empty transformation
2
+
3
+ Scenario: Empty transformation between the same structures from the default CSV format simply copies the file
4
+ Given a "products.csv" data file containing:
5
+ """
6
+ id,name,category
7
+ JNI-123,Just a product name,Main category > Subcategory > Sub-subcategory
8
+ CDI-234,Another product name,Smart Insight > Cool stuff > Scripts
9
+ """
10
+ And the following definition:
11
+ """
12
+ source :products do
13
+ field :id, String
14
+ field :name, String
15
+ field :category, String
16
+ end
17
+
18
+ source :products_copy do
19
+ field :id, String
20
+ field :name, String
21
+ field :category, String
22
+ end
23
+
24
+ transform :products => :products_copy do |record|
25
+ output record
26
+ end
27
+ """
28
+ When I execute the definition
29
+ Then the process should exit successfully
30
+ And there should be a "products_copy.csv" data file in the upload directory containing:
31
+ """
32
+ id,name,category
33
+ JNI-123,Just a product name,Main category > Subcategory > Sub-subcategory
34
+ CDI-234,Another product name,Smart Insight > Cool stuff > Scripts
35
+ """
36
+
37
+
38
+ Scenario: Empty transformation between the same structures but from a custom CSV format converts quotes and delimiters to the default format
39
+ Given a "products.csv" data file containing:
40
+ """
41
+ 'id';'name';'category'
42
+ 'JNI-123';'Just a product name';'Main category > Subcategory > Sub-subcategory'
43
+ 'CDI-234';'Another 12" product name';'Smart Insight > Cool stuff > Scripts'
44
+ """
45
+ And the following definition:
46
+ """
47
+ source :products do
48
+ delimiter ';'
49
+ quote "'"
50
+ field :id, String
51
+ field :name, String
52
+ field :category, String
53
+ end
54
+
55
+ source :products_converted do
56
+ field :id, String
57
+ field :name, String
58
+ field :category, String
59
+ end
60
+
61
+ transform :products => :products_converted do |record|
62
+ output record
63
+ end
64
+ """
65
+ When I execute the definition
66
+ Then the process should exit successfully
67
+ And there should be a "products_converted.csv" data file in the upload directory containing:
68
+ """
69
+ id,name,category
70
+ JNI-123,Just a product name,Main category > Subcategory > Sub-subcategory
71
+ CDI-234,"Another 12"" product name",Smart Insight > Cool stuff > Scripts
72
+ """
@@ -0,0 +1,180 @@
1
+ Feature: Joining CSV files
2
+
3
+ Scenario: Singe file transformation
4
+ Given an "orders.csv" data file containing:
5
+ """
6
+ id,order_date,customer_id,total_price
7
+ order_1,2011-01-01,customer_1,100
8
+ order_2,2011-02-02,customer_1,200
9
+ order_3,2011-03-03,customer_2,300
10
+ """
11
+ Given an "order_items.csv" data file containing:
12
+ """
13
+ order_id,item_id,item_name,item_category,quantity,sales_amount,comment
14
+ order_1,item_1,first item,clothing,1,5,some useful comment
15
+ order_1,item_2,second item,communication,2,6,not so useful comment
16
+ order_2,item_2,second item,communication,5,12,very misleading comment
17
+ """
18
+ And the following definition:
19
+ """
20
+ source :orders_file do
21
+ file "orders.csv"
22
+
23
+ field :id, String
24
+ field :order_date, Date
25
+ field :customer_id, String
26
+ field :total_price, Integer
27
+ end
28
+
29
+ source :order_items_file do
30
+ file "order_items.csv"
31
+
32
+ field :order_id, String
33
+ field :item_id, String
34
+ field :item_name, String
35
+ field :item_category, String
36
+ field :quantity, Integer
37
+ field :sales_amount, Integer
38
+ field :comment, String
39
+ end
40
+
41
+ source :sales_items do
42
+ field :order_id, String
43
+ field :order_date, String
44
+ field :new_field, String
45
+ field :customer_id, String
46
+ field :item_id, String
47
+ field :item_name, String
48
+ field :item_category, String
49
+ field :quantity, String
50
+ field :sales_amount, String
51
+ end
52
+
53
+ join :orders_file, with: :order_items_file, into: :sales_items, match_on: { :order_id => :id }
54
+ """
55
+ When I execute the definition
56
+ Then the process should exit successfully
57
+ And there should be a "sales_items.csv" data file in the upload directory containing:
58
+ """
59
+ order_id,order_date,new_field,customer_id,item_id,item_name,item_category,quantity,sales_amount
60
+ order_1,2011-01-01,,customer_1,item_1,first item,clothing,1,5
61
+ order_1,2011-01-01,,customer_1,item_2,second item,communication,2,6
62
+ order_2,2011-02-02,,customer_1,item_2,second item,communication,5,12
63
+ """
64
+
65
+
66
+ Scenario: File transformation with left join
67
+ Given an "orders.csv" data file containing:
68
+ """
69
+ id,order_date,customer_id,total_price
70
+ order_1,2011-01-01,customer_1,100
71
+ order_2,2011-02-02,customer_1,200
72
+ order_3,2011-03-03,customer_2,300
73
+ """
74
+ Given an "order_items.csv" data file containing:
75
+ """
76
+ order_id,item_id,item_name,item_category,quantity,sales_amount,comment
77
+ order_1,item_1,first item,clothing,1,5,some useful comment
78
+ order_1,item_2,second item,communication,2,6,not so useful comment
79
+ order_2,item_2,second item,communication,5,12,very misleading comment
80
+ """
81
+ And the following definition:
82
+ """
83
+ source :orders_file do
84
+ file "orders.csv"
85
+
86
+ field :id, String
87
+ field :order_date, Date
88
+ field :customer_id, String
89
+ field :total_price, Integer
90
+ end
91
+
92
+ source :order_items_file do
93
+ file "order_items.csv"
94
+
95
+ field :order_id, String
96
+ field :item_id, String
97
+ field :item_name, String
98
+ field :item_category, String
99
+ field :quantity, Integer
100
+ field :sales_amount, Integer
101
+ field :comment, String
102
+ end
103
+
104
+ source :sales_items do
105
+ field :id, String
106
+ field :item_id, String
107
+ field :item_name, String
108
+ field :order_date, String
109
+ field :customer_id, String
110
+ field :item_category, String
111
+ field :quantity, String
112
+ field :sales_amount, String
113
+ end
114
+
115
+ join :orders_file, with: :order_items_file, into: :sales_items, match_on: { :order_id => :id }, type: :left
116
+ """
117
+ When I execute the definition
118
+ Then the process should exit successfully
119
+ And there should be a "sales_items.csv" data file in the upload directory containing:
120
+ """
121
+ id,item_id,item_name,order_date,customer_id,item_category,quantity,sales_amount
122
+ order_1,item_1,first item,2011-01-01,customer_1,clothing,1,5
123
+ order_1,item_2,second item,2011-01-01,customer_1,communication,2,6
124
+ order_2,item_2,second item,2011-02-02,customer_1,communication,5,12
125
+ order_3,,,2011-03-03,customer_2,,,
126
+ """
127
+
128
+
129
+ Scenario: Close file after join
130
+ Given an "orders.csv" data file containing:
131
+ """
132
+ id,order_date
133
+ order_1,2011-01-01
134
+ """
135
+ Given an "order_items.csv" data file containing:
136
+ """
137
+ order_id,item_id
138
+ order_1,item_1
139
+ """
140
+ And the following definition:
141
+ """
142
+ source :orders_file do
143
+ file "orders.csv"
144
+
145
+ field :id, String
146
+ field :order_date, Date
147
+ end
148
+
149
+ source :order_items_file do
150
+ file "order_items.csv"
151
+
152
+ field :order_id, String
153
+ field :item_id, String
154
+ end
155
+
156
+ source :sales_items do
157
+ field :id, String
158
+ field :item_id, String
159
+ field :order_date, String
160
+ end
161
+
162
+ source :sales_items_transformed do
163
+ field :id, String
164
+ field :item_id, String
165
+ field :order_date, String
166
+ end
167
+
168
+ join :orders_file, with: :order_items_file, into: :sales_items, match_on: { :order_id => :id }, type: :left
169
+
170
+ transform :sales_items => :sales_items_transformed do |record|
171
+ output record
172
+ end
173
+ """
174
+ When I execute the definition
175
+ Then the process should exit successfully
176
+ And there should be a "sales_items_transformed.csv" data file in the upload directory containing:
177
+ """
178
+ id,item_id,order_date
179
+ order_1,item_1,2011-01-01
180
+ """
@@ -0,0 +1,46 @@
1
+ Feature: Join multiple files into one output file
2
+
3
+ Scenario: Successful transformation
4
+ Given a "products1.csv" data file containing:
5
+ """
6
+ id,name,category
7
+ PROD-1,product name 1,Main category > Subcategory > Sub-subcategory
8
+ PROD-2,product name 2,Main category > Subcategory > Sub-subcategory
9
+ """
10
+ And a "products2.csv" data file containing:
11
+ """
12
+ id,name,category
13
+ PROD-3,product name 3,Main category > Subcategory > Sub-subcategory
14
+ PROD-4,product name 4,Main category > Subcategory > Sub-subcategory
15
+ """
16
+ And the following definition:
17
+ """
18
+ source :products do
19
+ file "products*.csv"
20
+ field :id, String
21
+ field :name, String
22
+ field :category, String
23
+ end
24
+
25
+ source :transformed_products do
26
+ field :item, String
27
+ field :title, String
28
+ field :category, String
29
+ end
30
+
31
+ transform :products => :transformed_products do |record|
32
+ record[:item] = record[:id]
33
+ record[:title] = record[:name]
34
+ output record
35
+ end
36
+ """
37
+ When I execute the definition
38
+ Then the process should exit successfully
39
+ And there should be a "transformed_products.csv" data file in the upload directory containing:
40
+ """
41
+ item,title,category
42
+ PROD-1,product name 1,Main category > Subcategory > Sub-subcategory
43
+ PROD-2,product name 2,Main category > Subcategory > Sub-subcategory
44
+ PROD-3,product name 3,Main category > Subcategory > Sub-subcategory
45
+ PROD-4,product name 4,Main category > Subcategory > Sub-subcategory
46
+ """
@@ -0,0 +1,70 @@
1
+ Feature: Output rows to file
2
+
3
+ Background:
4
+ Given a "products.csv" data file containing:
5
+ """
6
+ id,name
7
+ 1,Product 1
8
+ 2, Product 2
9
+ """
10
+
11
+ Scenario: Output Hash instead of record
12
+ Given the following definition:
13
+ """
14
+ source :products do
15
+ field :id, String
16
+ field :name, String
17
+ end
18
+
19
+ source :products_copy do
20
+ field :id, String
21
+ field :name, String
22
+ end
23
+
24
+ transform :products => :products_copy do |record|
25
+ output name: record[:name],
26
+ id: record[:id]
27
+ end
28
+ """
29
+ When I execute the definition
30
+ Then the process should exit successfully
31
+ And there should be a "products_copy.csv" data file in the upload directory containing:
32
+ """
33
+ id,name
34
+ 1,Product 1
35
+ 2,Product 2
36
+ """
37
+
38
+
39
+ Scenario: Output multiple records for each input row
40
+ Given the following definition:
41
+ """
42
+ source :products do
43
+ field :id, String
44
+ field :name, String
45
+ end
46
+
47
+ source :products_doubled do
48
+ field :id, String
49
+ field :name, String
50
+ field :counter, Integer
51
+ end
52
+
53
+ transform :products => :products_doubled do |record|
54
+ record[:counter] = 1
55
+ output record
56
+ record[:counter] = 2
57
+ output record
58
+ end
59
+ """
60
+ When I execute the definition
61
+ Then the process should exit successfully
62
+ And there should be a "products_doubled.csv" data file in the upload directory containing:
63
+ """
64
+ id,name,counter
65
+ 1,Product 1,1
66
+ 1,Product 1,2
67
+ 2,Product 2,1
68
+ 2,Product 2,2
69
+ """
70
+
@@ -0,0 +1,34 @@
1
+ Feature: Projection
2
+
3
+ Scenario: Empty transformation projects down if the source structure is a superset of the target structure
4
+ Given a "products.csv" data file containing:
5
+ """
6
+ id,name,category
7
+ JNI-123,Just a product name,Main category > Subcategory > Sub-subcategory
8
+ CDI-234,Another product name,Smart Insight > Cool stuff > Scripts
9
+ """
10
+ And the following definition:
11
+ """
12
+ source :products do
13
+ field :id, String
14
+ field :name, String
15
+ field :category, String
16
+ end
17
+
18
+ source :products_projected do
19
+ field :id, String
20
+ field :category, String
21
+ end
22
+
23
+ transform :products => :products_projected do |record|
24
+ output record
25
+ end
26
+ """
27
+ When I execute the definition
28
+ Then the process should exit successfully
29
+ And there should be a "products_projected.csv" data file in the upload directory containing:
30
+ """
31
+ id,category
32
+ JNI-123,Main category > Subcategory > Sub-subcategory
33
+ CDI-234,Smart Insight > Cool stuff > Scripts
34
+ """