cranium 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +21 -0
- data/.ruby-version +1 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +29 -0
- data/Rakefile +3 -0
- data/Vagrantfile +24 -0
- data/bin/cranium +9 -0
- data/config/cucumber.yml +9 -0
- data/cranium.gemspec +26 -0
- data/db/setup.sql +8 -0
- data/docker-compose.yml +8 -0
- data/examples/config.rb +14 -0
- data/examples/deduplication.rb +27 -0
- data/examples/import_csv_with_field_lookup_inserting_new_dimension_keys.rb +26 -0
- data/examples/incremental_extract.rb +17 -0
- data/examples/lookup_with_multiple_fields.rb +25 -0
- data/features/archive.feature +49 -0
- data/features/extract/incremental_extract.feature +56 -0
- data/features/extract/simple_extract.feature +85 -0
- data/features/import/import_csv_to_database_as_delta.feature +38 -0
- data/features/import/import_csv_to_database_with_delete_insert_merging.feature +51 -0
- data/features/import/import_csv_to_database_with_truncate_insert.feature +49 -0
- data/features/import/import_csv_to_database_with_update_merging.feature +46 -0
- data/features/import/import_csv_with_always_inserting_new_dimension_keys.feature +137 -0
- data/features/import/import_csv_with_field_lookup_inserting_new_dimension_keys.feature +62 -0
- data/features/import/import_csv_with_field_lookup_transformation.feature +125 -0
- data/features/import/import_csv_with_transformation.feature +55 -0
- data/features/import/import_multiple_csv_files_without_transformations.feature +44 -0
- data/features/import/import_with_load_id_from_sequence.feature +53 -0
- data/features/import/import_with_lookup_from_multiple_fields.feature +64 -0
- data/features/read.feature +56 -0
- data/features/remove.feature +44 -0
- data/features/restore_database_connection.feature +55 -0
- data/features/step_definitions/database_table_steps.rb +40 -0
- data/features/step_definitions/definition_steps.rb +3 -0
- data/features/step_definitions/execution_steps.rb +23 -0
- data/features/step_definitions/file_steps.rb +39 -0
- data/features/support/class_extensions.rb +24 -0
- data/features/support/env.rb +27 -0
- data/features/support/randomize.rb +22 -0
- data/features/support/stop_on_first_error.rb +5 -0
- data/features/transform/deduplication.feature +37 -0
- data/features/transform/empty_transformation.feature +72 -0
- data/features/transform/join.feature +180 -0
- data/features/transform/join_multiple_files_into_one_output_file.feature +46 -0
- data/features/transform/output_rows.feature +70 -0
- data/features/transform/projection.feature +34 -0
- data/features/transform/raw_ruby_transformation.feature +69 -0
- data/features/transform/split_field.feature +39 -0
- data/lib/cranium/application.rb +104 -0
- data/lib/cranium/archiver.rb +36 -0
- data/lib/cranium/attribute_dsl.rb +43 -0
- data/lib/cranium/command_line_options.rb +27 -0
- data/lib/cranium/configuration.rb +33 -0
- data/lib/cranium/data_importer.rb +35 -0
- data/lib/cranium/data_reader.rb +48 -0
- data/lib/cranium/data_transformer.rb +126 -0
- data/lib/cranium/database.rb +36 -0
- data/lib/cranium/definition_registry.rb +21 -0
- data/lib/cranium/dimension_manager.rb +65 -0
- data/lib/cranium/dsl/database_definition.rb +23 -0
- data/lib/cranium/dsl/extract_definition.rb +28 -0
- data/lib/cranium/dsl/import_definition.rb +50 -0
- data/lib/cranium/dsl/source_definition.rb +67 -0
- data/lib/cranium/dsl.rb +100 -0
- data/lib/cranium/extensions/file.rb +7 -0
- data/lib/cranium/extensions/sequel_greenplum.rb +30 -0
- data/lib/cranium/external_table.rb +75 -0
- data/lib/cranium/extract/data_extractor.rb +11 -0
- data/lib/cranium/extract/storage.rb +57 -0
- data/lib/cranium/extract/strategy/base.rb +27 -0
- data/lib/cranium/extract/strategy/incremental.rb +16 -0
- data/lib/cranium/extract/strategy/simple.rb +9 -0
- data/lib/cranium/extract/strategy.rb +7 -0
- data/lib/cranium/extract.rb +7 -0
- data/lib/cranium/import_strategy/base.rb +55 -0
- data/lib/cranium/import_strategy/delete_insert.rb +40 -0
- data/lib/cranium/import_strategy/delta.rb +8 -0
- data/lib/cranium/import_strategy/merge.rb +50 -0
- data/lib/cranium/import_strategy/truncate_insert.rb +19 -0
- data/lib/cranium/import_strategy.rb +9 -0
- data/lib/cranium/logging.rb +15 -0
- data/lib/cranium/profiling.rb +13 -0
- data/lib/cranium/progress_output.rb +37 -0
- data/lib/cranium/sequel/hash.rb +32 -0
- data/lib/cranium/sequel.rb +5 -0
- data/lib/cranium/source_registry.rb +21 -0
- data/lib/cranium/test_framework/cucumber_table.rb +140 -0
- data/lib/cranium/test_framework/database_entity.rb +29 -0
- data/lib/cranium/test_framework/database_sequence.rb +16 -0
- data/lib/cranium/test_framework/database_table.rb +33 -0
- data/lib/cranium/test_framework/upload_directory.rb +39 -0
- data/lib/cranium/test_framework/world.rb +66 -0
- data/lib/cranium/test_framework.rb +10 -0
- data/lib/cranium/transformation/duplication_index.rb +42 -0
- data/lib/cranium/transformation/index.rb +83 -0
- data/lib/cranium/transformation/join.rb +141 -0
- data/lib/cranium/transformation/sequence.rb +42 -0
- data/lib/cranium/transformation.rb +8 -0
- data/lib/cranium/transformation_record.rb +45 -0
- data/lib/cranium.rb +57 -0
- data/rake/test.rake +31 -0
- data/spec/cranium/application_spec.rb +166 -0
- data/spec/cranium/archiver_spec.rb +44 -0
- data/spec/cranium/command_line_options_spec.rb +32 -0
- data/spec/cranium/configuration_spec.rb +31 -0
- data/spec/cranium/data_importer_spec.rb +55 -0
- data/spec/cranium/data_transformer_spec.rb +16 -0
- data/spec/cranium/database_spec.rb +69 -0
- data/spec/cranium/definition_registry_spec.rb +45 -0
- data/spec/cranium/dimension_manager_spec.rb +63 -0
- data/spec/cranium/dsl/database_definition_spec.rb +23 -0
- data/spec/cranium/dsl/extract_definition_spec.rb +76 -0
- data/spec/cranium/dsl/import_definition_spec.rb +153 -0
- data/spec/cranium/dsl/source_definition_spec.rb +84 -0
- data/spec/cranium/dsl_spec.rb +119 -0
- data/spec/cranium/external_table_spec.rb +71 -0
- data/spec/cranium/extract/storage_spec.rb +125 -0
- data/spec/cranium/logging_spec.rb +37 -0
- data/spec/cranium/sequel/hash_spec.rb +56 -0
- data/spec/cranium/source_registry_spec.rb +31 -0
- data/spec/cranium/test_framework/cucumber_table_spec.rb +144 -0
- data/spec/cranium/transformation/duplication_index_spec.rb +75 -0
- data/spec/cranium/transformation/index_spec.rb +178 -0
- data/spec/cranium/transformation/join_spec.rb +43 -0
- data/spec/cranium/transformation/sequence_spec.rb +83 -0
- data/spec/cranium/transformation_record_spec.rb +78 -0
- data/spec/cranium_spec.rb +53 -0
- data/spec/spec_helper.rb +1 -0
- metadata +362 -0
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
Feature: Sequel database connections are fault tolerant
|
|
2
|
+
|
|
3
|
+
Scenario:
|
|
4
|
+
Given a database table called "dim_product" with the following fields:
|
|
5
|
+
| field_name | field_type |
|
|
6
|
+
| item | TEXT |
|
|
7
|
+
| title | TEXT |
|
|
8
|
+
And a "products.csv" data file containing:
|
|
9
|
+
"""
|
|
10
|
+
id,name,category
|
|
11
|
+
JNI-123,Just a product name,Main category > Subcategory > Sub-subcategory > Ultra-subcategory
|
|
12
|
+
CDI-234,Another product name,Smart Insight > Cool stuff | 3dim > 2dim > 1dim
|
|
13
|
+
"""
|
|
14
|
+
And the following definition:
|
|
15
|
+
"""
|
|
16
|
+
require 'sequel'
|
|
17
|
+
|
|
18
|
+
def terminate_connections
|
|
19
|
+
connection_string = ENV['GREENPLUM_AS_ADMIN_URL'] || "postgres://database_administrator:emarsys@192.168.56.43:5432/cranium"
|
|
20
|
+
connection = Sequel.connect connection_string, loggers: Cranium.configuration.loggers
|
|
21
|
+
connection.run("SELECT pg_terminate_backend(procpid) FROM pg_stat_activity WHERE procpid <> pg_backend_pid() AND datname = 'cranium'")
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
source :products do
|
|
25
|
+
encoding "UTF-8"
|
|
26
|
+
delimiter ','
|
|
27
|
+
|
|
28
|
+
field :id, String
|
|
29
|
+
field :name, String
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
source :transformed_products do
|
|
33
|
+
field :id, String
|
|
34
|
+
field :name, String
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
transform :products => :transformed_products do |record|
|
|
38
|
+
output record
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
terminate_connections
|
|
42
|
+
|
|
43
|
+
import :transformed_products do
|
|
44
|
+
into :dim_product
|
|
45
|
+
|
|
46
|
+
put :id => :item
|
|
47
|
+
put :name => :title
|
|
48
|
+
end
|
|
49
|
+
"""
|
|
50
|
+
When I execute the definition
|
|
51
|
+
Then the process should exit successfully
|
|
52
|
+
And the "dim_product" table should contain:
|
|
53
|
+
| item | title |
|
|
54
|
+
| JNI-123 | Just a product name |
|
|
55
|
+
| CDI-234 | Another product name |
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
Given(/^a database table called "([^"]*)" with the following fields:$/) do |table_name, fields|
|
|
2
|
+
database_table(table_name).create(fields.data)
|
|
3
|
+
end
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
Given (/^only the following rows in the "([^"]*)" database table:$/) do |table_name, rows|
|
|
7
|
+
database_table(table_name).clear
|
|
8
|
+
step %Q(the following new rows in the "#{table_name}" database table:), rows
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
Given (/^the following new rows in the "([^"]*)" database table:$/) do |table_name, rows|
|
|
13
|
+
database_table(table_name).insert rows.data
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
Given(/^the current value in sequence "([^"]*)" is (\d+)$/) do |sequence_name, current_value|
|
|
18
|
+
Cranium::Database.connection.run "SELECT setval('#{sequence_name}', #{current_value})"
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
Given(/^a sequence called "([^"]*)" starting from (\d+)$/) do |sequence_name, start_value|
|
|
23
|
+
database_sequence(sequence_name).create
|
|
24
|
+
|
|
25
|
+
step %Q[the current value in sequence "#{sequence_name}" is #{start_value}]
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
Then(/^the "([^"]*)" table should contain:$/) do |table_name, data|
|
|
30
|
+
expected_data, hashes = [], data.data
|
|
31
|
+
hashes.map do |hash|
|
|
32
|
+
new_row = {}
|
|
33
|
+
hash.each_key do |key|
|
|
34
|
+
new_row[key.to_sym] = hash[key]
|
|
35
|
+
end
|
|
36
|
+
expected_data << new_row
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
expect(database_table(table_name).content(data.fields)).to match_array expected_data
|
|
40
|
+
end
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
Given /^the definition is executed(?: again)?$/ do
|
|
2
|
+
step "I execute the definition"
|
|
3
|
+
end
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
When /^I execute the definition(?: again)?$/ do
|
|
7
|
+
execute_definition
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
Then /^the process should exit successfully$/ do
|
|
12
|
+
expect(result_code).to eq(0), "Expected script exit code to be 0, but received #{result_code}\n\n#{script_output}\n"
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
Then /^the process should exit with an error$/ do
|
|
17
|
+
expect(result_code).to eq(1), "Expected script exit code to be 1, but received #{result_code}\n\n#{script_output}\n"
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
Then /^the error message should contain:$/ do |message|
|
|
22
|
+
expect(error_output).to include message
|
|
23
|
+
end
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
Given /^no "([^"]*)" directory/ do |dir_path|
|
|
2
|
+
upload_directory.remove_directory dir_path
|
|
3
|
+
end
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
Given /^an empty "([^"]*)" data file$/ do |file_name|
|
|
7
|
+
step %Q(a "#{file_name}" data file containing:), ""
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
Given /^an? "([^"]*)" data file containing:$/ do |file_name, content|
|
|
12
|
+
upload_directory.save_file file_name, content
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
Given /^the "([^"]*)" file is deleted$/ do |file_name|
|
|
17
|
+
upload_directory.delete_file file_name
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
Then /^there should be a "([^"]*)" data file in the upload directory containing:$/ do |file_name, content|
|
|
22
|
+
expect(upload_directory.file_exists?(file_name)).to be_truthy, "expected file '#{file_name}' to exist"
|
|
23
|
+
expect(upload_directory.read_file(file_name).chomp).to eq content
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
Then /^the "([^"]*)" directory should contain the following files:$/ do |directory_path, files|
|
|
28
|
+
expect(Dir.exists?(directory_path)).to be_truthy, "expected directory '#{directory_path}' to exist"
|
|
29
|
+
files_in_dir = Dir["#{directory_path}/*"].map { |file_name| File.basename file_name }.sort
|
|
30
|
+
expect(files_in_dir.count).to eq files.data.count
|
|
31
|
+
0.upto files.data.count-1 do |index|
|
|
32
|
+
expect(files_in_dir[index]).to match Regexp.new(files.data[index][:filename])
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
Then /^the upload directory should contain the following files:$/ do |files|
|
|
38
|
+
step %Q(the "#{Cranium.configuration.upload_path}" directory should contain the following files:), files
|
|
39
|
+
end
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
module Cucumber::Ast
|
|
2
|
+
|
|
3
|
+
module MultilineArgument
|
|
4
|
+
class << self
|
|
5
|
+
|
|
6
|
+
alias_method :from_old, :from
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def from(argument)
|
|
11
|
+
original_result = from_old(argument)
|
|
12
|
+
if original_result.is_a? Cucumber::Ast::Table
|
|
13
|
+
Cranium::TestFramework::CucumberTable.from_ast_table(original_result).with_patterns(
|
|
14
|
+
"NULL" => nil
|
|
15
|
+
)
|
|
16
|
+
else
|
|
17
|
+
original_result
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
end
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
require 'fileutils'
|
|
2
|
+
require_relative "../../lib/cranium"
|
|
3
|
+
|
|
4
|
+
FileUtils.mkdir_p("log") unless Dir.exists?("log")
|
|
5
|
+
|
|
6
|
+
Cranium.configure do |config|
|
|
7
|
+
config.greenplum_connection_string = ENV['GREENPLUM_URL'] || "postgres://cranium:cranium@192.168.56.43:5432/cranium"
|
|
8
|
+
config.gpfdist_url = ENV['GPFDIST_URL'] || "192.168.56.43:8123"
|
|
9
|
+
config.gpfdist_home_directory = "tmp/custdata"
|
|
10
|
+
config.upload_directory = "cranium_build"
|
|
11
|
+
config.loggers << Logger.new("log/cucumber.log")
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
Before do
|
|
16
|
+
FileUtils.rm_rf Cranium.configuration.upload_path
|
|
17
|
+
FileUtils.mkdir_p Cranium.configuration.upload_path
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
After do
|
|
21
|
+
Cranium::TestFramework::DatabaseTable.cleanup
|
|
22
|
+
Cranium::TestFramework::DatabaseSequence.cleanup
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
World do
|
|
26
|
+
Cranium::TestFramework::World.new Cranium.configuration.upload_path, Cranium::Database.connection
|
|
27
|
+
end
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
cucumber_seed = ENV['CUCUMBER_SEED'] ? ENV['CUCUMBER_SEED'].to_i : srand % 0xFFFF
|
|
2
|
+
cucumber_dry_run = nil
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
AfterConfiguration do |cucumber_config|
|
|
6
|
+
original_files = cucumber_config.feature_files
|
|
7
|
+
cucumber_dry_run = cucumber_config.dry_run?
|
|
8
|
+
|
|
9
|
+
config_eigenclass = class << cucumber_config;
|
|
10
|
+
self
|
|
11
|
+
end
|
|
12
|
+
config_eigenclass.send :undef_method, :feature_files
|
|
13
|
+
config_eigenclass.send(:define_method, :feature_files) do
|
|
14
|
+
Kernel.srand cucumber_seed
|
|
15
|
+
original_files.sort_by { Kernel.rand original_files.count }
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
at_exit do
|
|
21
|
+
puts("Cucumber randomized with seed #{cucumber_seed.inspect}") unless cucumber_dry_run
|
|
22
|
+
end
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
Feature: Deduplicate data in CSV file
|
|
2
|
+
|
|
3
|
+
Scenario: Singe file transformation
|
|
4
|
+
Given a "sales_items.csv" data file containing:
|
|
5
|
+
"""
|
|
6
|
+
order_id,item,item_name
|
|
7
|
+
1,Item1,Item name 1
|
|
8
|
+
2,Item1,Item name 1
|
|
9
|
+
3,Item2,Item name 2
|
|
10
|
+
4,Item2,Item name 2
|
|
11
|
+
5,Item3,Item name 3
|
|
12
|
+
"""
|
|
13
|
+
And the following definition:
|
|
14
|
+
"""
|
|
15
|
+
source :sales_items do
|
|
16
|
+
file "sales_items.csv"
|
|
17
|
+
field :order_id, String
|
|
18
|
+
field :item, String
|
|
19
|
+
field :item_name, String
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
source :products do
|
|
23
|
+
field :item, String
|
|
24
|
+
field :item_name, String
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
deduplicate :sales_items, into: :products, by: [:item]
|
|
28
|
+
"""
|
|
29
|
+
When I execute the definition
|
|
30
|
+
Then the process should exit successfully
|
|
31
|
+
And there should be a "products.csv" data file in the upload directory containing:
|
|
32
|
+
"""
|
|
33
|
+
item,item_name
|
|
34
|
+
Item1,Item name 1
|
|
35
|
+
Item2,Item name 2
|
|
36
|
+
Item3,Item name 3
|
|
37
|
+
"""
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
Feature: Empty transformation
|
|
2
|
+
|
|
3
|
+
Scenario: Empty transformation between the same structures from the default CSV format simply copies the file
|
|
4
|
+
Given a "products.csv" data file containing:
|
|
5
|
+
"""
|
|
6
|
+
id,name,category
|
|
7
|
+
JNI-123,Just a product name,Main category > Subcategory > Sub-subcategory
|
|
8
|
+
CDI-234,Another product name,Smart Insight > Cool stuff > Scripts
|
|
9
|
+
"""
|
|
10
|
+
And the following definition:
|
|
11
|
+
"""
|
|
12
|
+
source :products do
|
|
13
|
+
field :id, String
|
|
14
|
+
field :name, String
|
|
15
|
+
field :category, String
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
source :products_copy do
|
|
19
|
+
field :id, String
|
|
20
|
+
field :name, String
|
|
21
|
+
field :category, String
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
transform :products => :products_copy do |record|
|
|
25
|
+
output record
|
|
26
|
+
end
|
|
27
|
+
"""
|
|
28
|
+
When I execute the definition
|
|
29
|
+
Then the process should exit successfully
|
|
30
|
+
And there should be a "products_copy.csv" data file in the upload directory containing:
|
|
31
|
+
"""
|
|
32
|
+
id,name,category
|
|
33
|
+
JNI-123,Just a product name,Main category > Subcategory > Sub-subcategory
|
|
34
|
+
CDI-234,Another product name,Smart Insight > Cool stuff > Scripts
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
Scenario: Empty transformation between the same structures but from a custom CSV format converts quotes and delimiters to the default format
|
|
39
|
+
Given a "products.csv" data file containing:
|
|
40
|
+
"""
|
|
41
|
+
'id';'name';'category'
|
|
42
|
+
'JNI-123';'Just a product name';'Main category > Subcategory > Sub-subcategory'
|
|
43
|
+
'CDI-234';'Another 12" product name';'Smart Insight > Cool stuff > Scripts'
|
|
44
|
+
"""
|
|
45
|
+
And the following definition:
|
|
46
|
+
"""
|
|
47
|
+
source :products do
|
|
48
|
+
delimiter ';'
|
|
49
|
+
quote "'"
|
|
50
|
+
field :id, String
|
|
51
|
+
field :name, String
|
|
52
|
+
field :category, String
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
source :products_converted do
|
|
56
|
+
field :id, String
|
|
57
|
+
field :name, String
|
|
58
|
+
field :category, String
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
transform :products => :products_converted do |record|
|
|
62
|
+
output record
|
|
63
|
+
end
|
|
64
|
+
"""
|
|
65
|
+
When I execute the definition
|
|
66
|
+
Then the process should exit successfully
|
|
67
|
+
And there should be a "products_converted.csv" data file in the upload directory containing:
|
|
68
|
+
"""
|
|
69
|
+
id,name,category
|
|
70
|
+
JNI-123,Just a product name,Main category > Subcategory > Sub-subcategory
|
|
71
|
+
CDI-234,"Another 12"" product name",Smart Insight > Cool stuff > Scripts
|
|
72
|
+
"""
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
Feature: Joining CSV files
|
|
2
|
+
|
|
3
|
+
Scenario: Singe file transformation
|
|
4
|
+
Given an "orders.csv" data file containing:
|
|
5
|
+
"""
|
|
6
|
+
id,order_date,customer_id,total_price
|
|
7
|
+
order_1,2011-01-01,customer_1,100
|
|
8
|
+
order_2,2011-02-02,customer_1,200
|
|
9
|
+
order_3,2011-03-03,customer_2,300
|
|
10
|
+
"""
|
|
11
|
+
Given an "order_items.csv" data file containing:
|
|
12
|
+
"""
|
|
13
|
+
order_id,item_id,item_name,item_category,quantity,sales_amount,comment
|
|
14
|
+
order_1,item_1,first item,clothing,1,5,some useful comment
|
|
15
|
+
order_1,item_2,second item,communication,2,6,not so useful comment
|
|
16
|
+
order_2,item_2,second item,communication,5,12,very misleading comment
|
|
17
|
+
"""
|
|
18
|
+
And the following definition:
|
|
19
|
+
"""
|
|
20
|
+
source :orders_file do
|
|
21
|
+
file "orders.csv"
|
|
22
|
+
|
|
23
|
+
field :id, String
|
|
24
|
+
field :order_date, Date
|
|
25
|
+
field :customer_id, String
|
|
26
|
+
field :total_price, Integer
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
source :order_items_file do
|
|
30
|
+
file "order_items.csv"
|
|
31
|
+
|
|
32
|
+
field :order_id, String
|
|
33
|
+
field :item_id, String
|
|
34
|
+
field :item_name, String
|
|
35
|
+
field :item_category, String
|
|
36
|
+
field :quantity, Integer
|
|
37
|
+
field :sales_amount, Integer
|
|
38
|
+
field :comment, String
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
source :sales_items do
|
|
42
|
+
field :order_id, String
|
|
43
|
+
field :order_date, String
|
|
44
|
+
field :new_field, String
|
|
45
|
+
field :customer_id, String
|
|
46
|
+
field :item_id, String
|
|
47
|
+
field :item_name, String
|
|
48
|
+
field :item_category, String
|
|
49
|
+
field :quantity, String
|
|
50
|
+
field :sales_amount, String
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
join :orders_file, with: :order_items_file, into: :sales_items, match_on: { :order_id => :id }
|
|
54
|
+
"""
|
|
55
|
+
When I execute the definition
|
|
56
|
+
Then the process should exit successfully
|
|
57
|
+
And there should be a "sales_items.csv" data file in the upload directory containing:
|
|
58
|
+
"""
|
|
59
|
+
order_id,order_date,new_field,customer_id,item_id,item_name,item_category,quantity,sales_amount
|
|
60
|
+
order_1,2011-01-01,,customer_1,item_1,first item,clothing,1,5
|
|
61
|
+
order_1,2011-01-01,,customer_1,item_2,second item,communication,2,6
|
|
62
|
+
order_2,2011-02-02,,customer_1,item_2,second item,communication,5,12
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
Scenario: File transformation with left join
|
|
67
|
+
Given an "orders.csv" data file containing:
|
|
68
|
+
"""
|
|
69
|
+
id,order_date,customer_id,total_price
|
|
70
|
+
order_1,2011-01-01,customer_1,100
|
|
71
|
+
order_2,2011-02-02,customer_1,200
|
|
72
|
+
order_3,2011-03-03,customer_2,300
|
|
73
|
+
"""
|
|
74
|
+
Given an "order_items.csv" data file containing:
|
|
75
|
+
"""
|
|
76
|
+
order_id,item_id,item_name,item_category,quantity,sales_amount,comment
|
|
77
|
+
order_1,item_1,first item,clothing,1,5,some useful comment
|
|
78
|
+
order_1,item_2,second item,communication,2,6,not so useful comment
|
|
79
|
+
order_2,item_2,second item,communication,5,12,very misleading comment
|
|
80
|
+
"""
|
|
81
|
+
And the following definition:
|
|
82
|
+
"""
|
|
83
|
+
source :orders_file do
|
|
84
|
+
file "orders.csv"
|
|
85
|
+
|
|
86
|
+
field :id, String
|
|
87
|
+
field :order_date, Date
|
|
88
|
+
field :customer_id, String
|
|
89
|
+
field :total_price, Integer
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
source :order_items_file do
|
|
93
|
+
file "order_items.csv"
|
|
94
|
+
|
|
95
|
+
field :order_id, String
|
|
96
|
+
field :item_id, String
|
|
97
|
+
field :item_name, String
|
|
98
|
+
field :item_category, String
|
|
99
|
+
field :quantity, Integer
|
|
100
|
+
field :sales_amount, Integer
|
|
101
|
+
field :comment, String
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
source :sales_items do
|
|
105
|
+
field :id, String
|
|
106
|
+
field :item_id, String
|
|
107
|
+
field :item_name, String
|
|
108
|
+
field :order_date, String
|
|
109
|
+
field :customer_id, String
|
|
110
|
+
field :item_category, String
|
|
111
|
+
field :quantity, String
|
|
112
|
+
field :sales_amount, String
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
join :orders_file, with: :order_items_file, into: :sales_items, match_on: { :order_id => :id }, type: :left
|
|
116
|
+
"""
|
|
117
|
+
When I execute the definition
|
|
118
|
+
Then the process should exit successfully
|
|
119
|
+
And there should be a "sales_items.csv" data file in the upload directory containing:
|
|
120
|
+
"""
|
|
121
|
+
id,item_id,item_name,order_date,customer_id,item_category,quantity,sales_amount
|
|
122
|
+
order_1,item_1,first item,2011-01-01,customer_1,clothing,1,5
|
|
123
|
+
order_1,item_2,second item,2011-01-01,customer_1,communication,2,6
|
|
124
|
+
order_2,item_2,second item,2011-02-02,customer_1,communication,5,12
|
|
125
|
+
order_3,,,2011-03-03,customer_2,,,
|
|
126
|
+
"""
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
Scenario: Close file after join
|
|
130
|
+
Given an "orders.csv" data file containing:
|
|
131
|
+
"""
|
|
132
|
+
id,order_date
|
|
133
|
+
order_1,2011-01-01
|
|
134
|
+
"""
|
|
135
|
+
Given an "order_items.csv" data file containing:
|
|
136
|
+
"""
|
|
137
|
+
order_id,item_id
|
|
138
|
+
order_1,item_1
|
|
139
|
+
"""
|
|
140
|
+
And the following definition:
|
|
141
|
+
"""
|
|
142
|
+
source :orders_file do
|
|
143
|
+
file "orders.csv"
|
|
144
|
+
|
|
145
|
+
field :id, String
|
|
146
|
+
field :order_date, Date
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
source :order_items_file do
|
|
150
|
+
file "order_items.csv"
|
|
151
|
+
|
|
152
|
+
field :order_id, String
|
|
153
|
+
field :item_id, String
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
source :sales_items do
|
|
157
|
+
field :id, String
|
|
158
|
+
field :item_id, String
|
|
159
|
+
field :order_date, String
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
source :sales_items_transformed do
|
|
163
|
+
field :id, String
|
|
164
|
+
field :item_id, String
|
|
165
|
+
field :order_date, String
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
join :orders_file, with: :order_items_file, into: :sales_items, match_on: { :order_id => :id }, type: :left
|
|
169
|
+
|
|
170
|
+
transform :sales_items => :sales_items_transformed do |record|
|
|
171
|
+
output record
|
|
172
|
+
end
|
|
173
|
+
"""
|
|
174
|
+
When I execute the definition
|
|
175
|
+
Then the process should exit successfully
|
|
176
|
+
And there should be a "sales_items_transformed.csv" data file in the upload directory containing:
|
|
177
|
+
"""
|
|
178
|
+
id,item_id,order_date
|
|
179
|
+
order_1,item_1,2011-01-01
|
|
180
|
+
"""
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
Feature: Join multiple files into one output file
|
|
2
|
+
|
|
3
|
+
Scenario: Successful transformation
|
|
4
|
+
Given a "products1.csv" data file containing:
|
|
5
|
+
"""
|
|
6
|
+
id,name,category
|
|
7
|
+
PROD-1,product name 1,Main category > Subcategory > Sub-subcategory
|
|
8
|
+
PROD-2,product name 2,Main category > Subcategory > Sub-subcategory
|
|
9
|
+
"""
|
|
10
|
+
And a "products2.csv" data file containing:
|
|
11
|
+
"""
|
|
12
|
+
id,name,category
|
|
13
|
+
PROD-3,product name 3,Main category > Subcategory > Sub-subcategory
|
|
14
|
+
PROD-4,product name 4,Main category > Subcategory > Sub-subcategory
|
|
15
|
+
"""
|
|
16
|
+
And the following definition:
|
|
17
|
+
"""
|
|
18
|
+
source :products do
|
|
19
|
+
file "products*.csv"
|
|
20
|
+
field :id, String
|
|
21
|
+
field :name, String
|
|
22
|
+
field :category, String
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
source :transformed_products do
|
|
26
|
+
field :item, String
|
|
27
|
+
field :title, String
|
|
28
|
+
field :category, String
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
transform :products => :transformed_products do |record|
|
|
32
|
+
record[:item] = record[:id]
|
|
33
|
+
record[:title] = record[:name]
|
|
34
|
+
output record
|
|
35
|
+
end
|
|
36
|
+
"""
|
|
37
|
+
When I execute the definition
|
|
38
|
+
Then the process should exit successfully
|
|
39
|
+
And there should be a "transformed_products.csv" data file in the upload directory containing:
|
|
40
|
+
"""
|
|
41
|
+
item,title,category
|
|
42
|
+
PROD-1,product name 1,Main category > Subcategory > Sub-subcategory
|
|
43
|
+
PROD-2,product name 2,Main category > Subcategory > Sub-subcategory
|
|
44
|
+
PROD-3,product name 3,Main category > Subcategory > Sub-subcategory
|
|
45
|
+
PROD-4,product name 4,Main category > Subcategory > Sub-subcategory
|
|
46
|
+
"""
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
Feature: Output rows to file
|
|
2
|
+
|
|
3
|
+
Background:
|
|
4
|
+
Given a "products.csv" data file containing:
|
|
5
|
+
"""
|
|
6
|
+
id,name
|
|
7
|
+
1,Product 1
|
|
8
|
+
2, Product 2
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
Scenario: Output Hash instead of record
|
|
12
|
+
Given the following definition:
|
|
13
|
+
"""
|
|
14
|
+
source :products do
|
|
15
|
+
field :id, String
|
|
16
|
+
field :name, String
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
source :products_copy do
|
|
20
|
+
field :id, String
|
|
21
|
+
field :name, String
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
transform :products => :products_copy do |record|
|
|
25
|
+
output name: record[:name],
|
|
26
|
+
id: record[:id]
|
|
27
|
+
end
|
|
28
|
+
"""
|
|
29
|
+
When I execute the definition
|
|
30
|
+
Then the process should exit successfully
|
|
31
|
+
And there should be a "products_copy.csv" data file in the upload directory containing:
|
|
32
|
+
"""
|
|
33
|
+
id,name
|
|
34
|
+
1,Product 1
|
|
35
|
+
2,Product 2
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
Scenario: Output multiple records for each input row
|
|
40
|
+
Given the following definition:
|
|
41
|
+
"""
|
|
42
|
+
source :products do
|
|
43
|
+
field :id, String
|
|
44
|
+
field :name, String
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
source :products_doubled do
|
|
48
|
+
field :id, String
|
|
49
|
+
field :name, String
|
|
50
|
+
field :counter, Integer
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
transform :products => :products_doubled do |record|
|
|
54
|
+
record[:counter] = 1
|
|
55
|
+
output record
|
|
56
|
+
record[:counter] = 2
|
|
57
|
+
output record
|
|
58
|
+
end
|
|
59
|
+
"""
|
|
60
|
+
When I execute the definition
|
|
61
|
+
Then the process should exit successfully
|
|
62
|
+
And there should be a "products_doubled.csv" data file in the upload directory containing:
|
|
63
|
+
"""
|
|
64
|
+
id,name,counter
|
|
65
|
+
1,Product 1,1
|
|
66
|
+
1,Product 1,2
|
|
67
|
+
2,Product 2,1
|
|
68
|
+
2,Product 2,2
|
|
69
|
+
"""
|
|
70
|
+
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
Feature: Projection
|
|
2
|
+
|
|
3
|
+
Scenario: Empty transformation projects down if the source structure is a superset of the target structure
|
|
4
|
+
Given a "products.csv" data file containing:
|
|
5
|
+
"""
|
|
6
|
+
id,name,category
|
|
7
|
+
JNI-123,Just a product name,Main category > Subcategory > Sub-subcategory
|
|
8
|
+
CDI-234,Another product name,Smart Insight > Cool stuff > Scripts
|
|
9
|
+
"""
|
|
10
|
+
And the following definition:
|
|
11
|
+
"""
|
|
12
|
+
source :products do
|
|
13
|
+
field :id, String
|
|
14
|
+
field :name, String
|
|
15
|
+
field :category, String
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
source :products_projected do
|
|
19
|
+
field :id, String
|
|
20
|
+
field :category, String
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
transform :products => :products_projected do |record|
|
|
24
|
+
output record
|
|
25
|
+
end
|
|
26
|
+
"""
|
|
27
|
+
When I execute the definition
|
|
28
|
+
Then the process should exit successfully
|
|
29
|
+
And there should be a "products_projected.csv" data file in the upload directory containing:
|
|
30
|
+
"""
|
|
31
|
+
id,category
|
|
32
|
+
JNI-123,Main category > Subcategory > Sub-subcategory
|
|
33
|
+
CDI-234,Smart Insight > Cool stuff > Scripts
|
|
34
|
+
"""
|