cranium 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +21 -0
- data/.ruby-version +1 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +29 -0
- data/Rakefile +3 -0
- data/Vagrantfile +24 -0
- data/bin/cranium +9 -0
- data/config/cucumber.yml +9 -0
- data/cranium.gemspec +26 -0
- data/db/setup.sql +8 -0
- data/docker-compose.yml +8 -0
- data/examples/config.rb +14 -0
- data/examples/deduplication.rb +27 -0
- data/examples/import_csv_with_field_lookup_inserting_new_dimension_keys.rb +26 -0
- data/examples/incremental_extract.rb +17 -0
- data/examples/lookup_with_multiple_fields.rb +25 -0
- data/features/archive.feature +49 -0
- data/features/extract/incremental_extract.feature +56 -0
- data/features/extract/simple_extract.feature +85 -0
- data/features/import/import_csv_to_database_as_delta.feature +38 -0
- data/features/import/import_csv_to_database_with_delete_insert_merging.feature +51 -0
- data/features/import/import_csv_to_database_with_truncate_insert.feature +49 -0
- data/features/import/import_csv_to_database_with_update_merging.feature +46 -0
- data/features/import/import_csv_with_always_inserting_new_dimension_keys.feature +137 -0
- data/features/import/import_csv_with_field_lookup_inserting_new_dimension_keys.feature +62 -0
- data/features/import/import_csv_with_field_lookup_transformation.feature +125 -0
- data/features/import/import_csv_with_transformation.feature +55 -0
- data/features/import/import_multiple_csv_files_without_transformations.feature +44 -0
- data/features/import/import_with_load_id_from_sequence.feature +53 -0
- data/features/import/import_with_lookup_from_multiple_fields.feature +64 -0
- data/features/read.feature +56 -0
- data/features/remove.feature +44 -0
- data/features/restore_database_connection.feature +55 -0
- data/features/step_definitions/database_table_steps.rb +40 -0
- data/features/step_definitions/definition_steps.rb +3 -0
- data/features/step_definitions/execution_steps.rb +23 -0
- data/features/step_definitions/file_steps.rb +39 -0
- data/features/support/class_extensions.rb +24 -0
- data/features/support/env.rb +27 -0
- data/features/support/randomize.rb +22 -0
- data/features/support/stop_on_first_error.rb +5 -0
- data/features/transform/deduplication.feature +37 -0
- data/features/transform/empty_transformation.feature +72 -0
- data/features/transform/join.feature +180 -0
- data/features/transform/join_multiple_files_into_one_output_file.feature +46 -0
- data/features/transform/output_rows.feature +70 -0
- data/features/transform/projection.feature +34 -0
- data/features/transform/raw_ruby_transformation.feature +69 -0
- data/features/transform/split_field.feature +39 -0
- data/lib/cranium/application.rb +104 -0
- data/lib/cranium/archiver.rb +36 -0
- data/lib/cranium/attribute_dsl.rb +43 -0
- data/lib/cranium/command_line_options.rb +27 -0
- data/lib/cranium/configuration.rb +33 -0
- data/lib/cranium/data_importer.rb +35 -0
- data/lib/cranium/data_reader.rb +48 -0
- data/lib/cranium/data_transformer.rb +126 -0
- data/lib/cranium/database.rb +36 -0
- data/lib/cranium/definition_registry.rb +21 -0
- data/lib/cranium/dimension_manager.rb +65 -0
- data/lib/cranium/dsl/database_definition.rb +23 -0
- data/lib/cranium/dsl/extract_definition.rb +28 -0
- data/lib/cranium/dsl/import_definition.rb +50 -0
- data/lib/cranium/dsl/source_definition.rb +67 -0
- data/lib/cranium/dsl.rb +100 -0
- data/lib/cranium/extensions/file.rb +7 -0
- data/lib/cranium/extensions/sequel_greenplum.rb +30 -0
- data/lib/cranium/external_table.rb +75 -0
- data/lib/cranium/extract/data_extractor.rb +11 -0
- data/lib/cranium/extract/storage.rb +57 -0
- data/lib/cranium/extract/strategy/base.rb +27 -0
- data/lib/cranium/extract/strategy/incremental.rb +16 -0
- data/lib/cranium/extract/strategy/simple.rb +9 -0
- data/lib/cranium/extract/strategy.rb +7 -0
- data/lib/cranium/extract.rb +7 -0
- data/lib/cranium/import_strategy/base.rb +55 -0
- data/lib/cranium/import_strategy/delete_insert.rb +40 -0
- data/lib/cranium/import_strategy/delta.rb +8 -0
- data/lib/cranium/import_strategy/merge.rb +50 -0
- data/lib/cranium/import_strategy/truncate_insert.rb +19 -0
- data/lib/cranium/import_strategy.rb +9 -0
- data/lib/cranium/logging.rb +15 -0
- data/lib/cranium/profiling.rb +13 -0
- data/lib/cranium/progress_output.rb +37 -0
- data/lib/cranium/sequel/hash.rb +32 -0
- data/lib/cranium/sequel.rb +5 -0
- data/lib/cranium/source_registry.rb +21 -0
- data/lib/cranium/test_framework/cucumber_table.rb +140 -0
- data/lib/cranium/test_framework/database_entity.rb +29 -0
- data/lib/cranium/test_framework/database_sequence.rb +16 -0
- data/lib/cranium/test_framework/database_table.rb +33 -0
- data/lib/cranium/test_framework/upload_directory.rb +39 -0
- data/lib/cranium/test_framework/world.rb +66 -0
- data/lib/cranium/test_framework.rb +10 -0
- data/lib/cranium/transformation/duplication_index.rb +42 -0
- data/lib/cranium/transformation/index.rb +83 -0
- data/lib/cranium/transformation/join.rb +141 -0
- data/lib/cranium/transformation/sequence.rb +42 -0
- data/lib/cranium/transformation.rb +8 -0
- data/lib/cranium/transformation_record.rb +45 -0
- data/lib/cranium.rb +57 -0
- data/rake/test.rake +31 -0
- data/spec/cranium/application_spec.rb +166 -0
- data/spec/cranium/archiver_spec.rb +44 -0
- data/spec/cranium/command_line_options_spec.rb +32 -0
- data/spec/cranium/configuration_spec.rb +31 -0
- data/spec/cranium/data_importer_spec.rb +55 -0
- data/spec/cranium/data_transformer_spec.rb +16 -0
- data/spec/cranium/database_spec.rb +69 -0
- data/spec/cranium/definition_registry_spec.rb +45 -0
- data/spec/cranium/dimension_manager_spec.rb +63 -0
- data/spec/cranium/dsl/database_definition_spec.rb +23 -0
- data/spec/cranium/dsl/extract_definition_spec.rb +76 -0
- data/spec/cranium/dsl/import_definition_spec.rb +153 -0
- data/spec/cranium/dsl/source_definition_spec.rb +84 -0
- data/spec/cranium/dsl_spec.rb +119 -0
- data/spec/cranium/external_table_spec.rb +71 -0
- data/spec/cranium/extract/storage_spec.rb +125 -0
- data/spec/cranium/logging_spec.rb +37 -0
- data/spec/cranium/sequel/hash_spec.rb +56 -0
- data/spec/cranium/source_registry_spec.rb +31 -0
- data/spec/cranium/test_framework/cucumber_table_spec.rb +144 -0
- data/spec/cranium/transformation/duplication_index_spec.rb +75 -0
- data/spec/cranium/transformation/index_spec.rb +178 -0
- data/spec/cranium/transformation/join_spec.rb +43 -0
- data/spec/cranium/transformation/sequence_spec.rb +83 -0
- data/spec/cranium/transformation_record_spec.rb +78 -0
- data/spec/cranium_spec.rb +53 -0
- data/spec/spec_helper.rb +1 -0
- metadata +362 -0
@@ -0,0 +1,55 @@
|
|
1
|
+
Feature: Sequel database connections are fault tolerant
|
2
|
+
|
3
|
+
Scenario:
|
4
|
+
Given a database table called "dim_product" with the following fields:
|
5
|
+
| field_name | field_type |
|
6
|
+
| item | TEXT |
|
7
|
+
| title | TEXT |
|
8
|
+
And a "products.csv" data file containing:
|
9
|
+
"""
|
10
|
+
id,name,category
|
11
|
+
JNI-123,Just a product name,Main category > Subcategory > Sub-subcategory > Ultra-subcategory
|
12
|
+
CDI-234,Another product name,Smart Insight > Cool stuff | 3dim > 2dim > 1dim
|
13
|
+
"""
|
14
|
+
And the following definition:
|
15
|
+
"""
|
16
|
+
require 'sequel'
|
17
|
+
|
18
|
+
def terminate_connections
|
19
|
+
connection_string = ENV['GREENPLUM_AS_ADMIN_URL'] || "postgres://database_administrator:emarsys@192.168.56.43:5432/cranium"
|
20
|
+
connection = Sequel.connect connection_string, loggers: Cranium.configuration.loggers
|
21
|
+
connection.run("SELECT pg_terminate_backend(procpid) FROM pg_stat_activity WHERE procpid <> pg_backend_pid() AND datname = 'cranium'")
|
22
|
+
end
|
23
|
+
|
24
|
+
source :products do
|
25
|
+
encoding "UTF-8"
|
26
|
+
delimiter ','
|
27
|
+
|
28
|
+
field :id, String
|
29
|
+
field :name, String
|
30
|
+
end
|
31
|
+
|
32
|
+
source :transformed_products do
|
33
|
+
field :id, String
|
34
|
+
field :name, String
|
35
|
+
end
|
36
|
+
|
37
|
+
transform :products => :transformed_products do |record|
|
38
|
+
output record
|
39
|
+
end
|
40
|
+
|
41
|
+
terminate_connections
|
42
|
+
|
43
|
+
import :transformed_products do
|
44
|
+
into :dim_product
|
45
|
+
|
46
|
+
put :id => :item
|
47
|
+
put :name => :title
|
48
|
+
end
|
49
|
+
"""
|
50
|
+
When I execute the definition
|
51
|
+
Then the process should exit successfully
|
52
|
+
And the "dim_product" table should contain:
|
53
|
+
| item | title |
|
54
|
+
| JNI-123 | Just a product name |
|
55
|
+
| CDI-234 | Another product name |
|
@@ -0,0 +1,40 @@
|
|
1
|
+
Given(/^a database table called "([^"]*)" with the following fields:$/) do |table_name, fields|
|
2
|
+
database_table(table_name).create(fields.data)
|
3
|
+
end
|
4
|
+
|
5
|
+
|
6
|
+
Given (/^only the following rows in the "([^"]*)" database table:$/) do |table_name, rows|
|
7
|
+
database_table(table_name).clear
|
8
|
+
step %Q(the following new rows in the "#{table_name}" database table:), rows
|
9
|
+
end
|
10
|
+
|
11
|
+
|
12
|
+
Given (/^the following new rows in the "([^"]*)" database table:$/) do |table_name, rows|
|
13
|
+
database_table(table_name).insert rows.data
|
14
|
+
end
|
15
|
+
|
16
|
+
|
17
|
+
Given(/^the current value in sequence "([^"]*)" is (\d+)$/) do |sequence_name, current_value|
|
18
|
+
Cranium::Database.connection.run "SELECT setval('#{sequence_name}', #{current_value})"
|
19
|
+
end
|
20
|
+
|
21
|
+
|
22
|
+
Given(/^a sequence called "([^"]*)" starting from (\d+)$/) do |sequence_name, start_value|
|
23
|
+
database_sequence(sequence_name).create
|
24
|
+
|
25
|
+
step %Q[the current value in sequence "#{sequence_name}" is #{start_value}]
|
26
|
+
end
|
27
|
+
|
28
|
+
|
29
|
+
Then(/^the "([^"]*)" table should contain:$/) do |table_name, data|
|
30
|
+
expected_data, hashes = [], data.data
|
31
|
+
hashes.map do |hash|
|
32
|
+
new_row = {}
|
33
|
+
hash.each_key do |key|
|
34
|
+
new_row[key.to_sym] = hash[key]
|
35
|
+
end
|
36
|
+
expected_data << new_row
|
37
|
+
end
|
38
|
+
|
39
|
+
expect(database_table(table_name).content(data.fields)).to match_array expected_data
|
40
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
Given /^the definition is executed(?: again)?$/ do
|
2
|
+
step "I execute the definition"
|
3
|
+
end
|
4
|
+
|
5
|
+
|
6
|
+
When /^I execute the definition(?: again)?$/ do
|
7
|
+
execute_definition
|
8
|
+
end
|
9
|
+
|
10
|
+
|
11
|
+
Then /^the process should exit successfully$/ do
|
12
|
+
expect(result_code).to eq(0), "Expected script exit code to be 0, but received #{result_code}\n\n#{script_output}\n"
|
13
|
+
end
|
14
|
+
|
15
|
+
|
16
|
+
Then /^the process should exit with an error$/ do
|
17
|
+
expect(result_code).to eq(1), "Expected script exit code to be 1, but received #{result_code}\n\n#{script_output}\n"
|
18
|
+
end
|
19
|
+
|
20
|
+
|
21
|
+
Then /^the error message should contain:$/ do |message|
|
22
|
+
expect(error_output).to include message
|
23
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
Given /^no "([^"]*)" directory/ do |dir_path|
|
2
|
+
upload_directory.remove_directory dir_path
|
3
|
+
end
|
4
|
+
|
5
|
+
|
6
|
+
Given /^an empty "([^"]*)" data file$/ do |file_name|
|
7
|
+
step %Q(a "#{file_name}" data file containing:), ""
|
8
|
+
end
|
9
|
+
|
10
|
+
|
11
|
+
Given /^an? "([^"]*)" data file containing:$/ do |file_name, content|
|
12
|
+
upload_directory.save_file file_name, content
|
13
|
+
end
|
14
|
+
|
15
|
+
|
16
|
+
Given /^the "([^"]*)" file is deleted$/ do |file_name|
|
17
|
+
upload_directory.delete_file file_name
|
18
|
+
end
|
19
|
+
|
20
|
+
|
21
|
+
Then /^there should be a "([^"]*)" data file in the upload directory containing:$/ do |file_name, content|
|
22
|
+
expect(upload_directory.file_exists?(file_name)).to be_truthy, "expected file '#{file_name}' to exist"
|
23
|
+
expect(upload_directory.read_file(file_name).chomp).to eq content
|
24
|
+
end
|
25
|
+
|
26
|
+
|
27
|
+
Then /^the "([^"]*)" directory should contain the following files:$/ do |directory_path, files|
|
28
|
+
expect(Dir.exists?(directory_path)).to be_truthy, "expected directory '#{directory_path}' to exist"
|
29
|
+
files_in_dir = Dir["#{directory_path}/*"].map { |file_name| File.basename file_name }.sort
|
30
|
+
expect(files_in_dir.count).to eq files.data.count
|
31
|
+
0.upto files.data.count-1 do |index|
|
32
|
+
expect(files_in_dir[index]).to match Regexp.new(files.data[index][:filename])
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
|
37
|
+
Then /^the upload directory should contain the following files:$/ do |files|
|
38
|
+
step %Q(the "#{Cranium.configuration.upload_path}" directory should contain the following files:), files
|
39
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module Cucumber::Ast
|
2
|
+
|
3
|
+
module MultilineArgument
|
4
|
+
class << self
|
5
|
+
|
6
|
+
alias_method :from_old, :from
|
7
|
+
|
8
|
+
|
9
|
+
|
10
|
+
def from(argument)
|
11
|
+
original_result = from_old(argument)
|
12
|
+
if original_result.is_a? Cucumber::Ast::Table
|
13
|
+
Cranium::TestFramework::CucumberTable.from_ast_table(original_result).with_patterns(
|
14
|
+
"NULL" => nil
|
15
|
+
)
|
16
|
+
else
|
17
|
+
original_result
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
require_relative "../../lib/cranium"
|
3
|
+
|
4
|
+
FileUtils.mkdir_p("log") unless Dir.exists?("log")
|
5
|
+
|
6
|
+
Cranium.configure do |config|
|
7
|
+
config.greenplum_connection_string = ENV['GREENPLUM_URL'] || "postgres://cranium:cranium@192.168.56.43:5432/cranium"
|
8
|
+
config.gpfdist_url = ENV['GPFDIST_URL'] || "192.168.56.43:8123"
|
9
|
+
config.gpfdist_home_directory = "tmp/custdata"
|
10
|
+
config.upload_directory = "cranium_build"
|
11
|
+
config.loggers << Logger.new("log/cucumber.log")
|
12
|
+
end
|
13
|
+
|
14
|
+
|
15
|
+
Before do
|
16
|
+
FileUtils.rm_rf Cranium.configuration.upload_path
|
17
|
+
FileUtils.mkdir_p Cranium.configuration.upload_path
|
18
|
+
end
|
19
|
+
|
20
|
+
After do
|
21
|
+
Cranium::TestFramework::DatabaseTable.cleanup
|
22
|
+
Cranium::TestFramework::DatabaseSequence.cleanup
|
23
|
+
end
|
24
|
+
|
25
|
+
World do
|
26
|
+
Cranium::TestFramework::World.new Cranium.configuration.upload_path, Cranium::Database.connection
|
27
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
cucumber_seed = ENV['CUCUMBER_SEED'] ? ENV['CUCUMBER_SEED'].to_i : srand % 0xFFFF
|
2
|
+
cucumber_dry_run = nil
|
3
|
+
|
4
|
+
|
5
|
+
AfterConfiguration do |cucumber_config|
|
6
|
+
original_files = cucumber_config.feature_files
|
7
|
+
cucumber_dry_run = cucumber_config.dry_run?
|
8
|
+
|
9
|
+
config_eigenclass = class << cucumber_config;
|
10
|
+
self
|
11
|
+
end
|
12
|
+
config_eigenclass.send :undef_method, :feature_files
|
13
|
+
config_eigenclass.send(:define_method, :feature_files) do
|
14
|
+
Kernel.srand cucumber_seed
|
15
|
+
original_files.sort_by { Kernel.rand original_files.count }
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
|
20
|
+
at_exit do
|
21
|
+
puts("Cucumber randomized with seed #{cucumber_seed.inspect}") unless cucumber_dry_run
|
22
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
Feature: Deduplicate data in CSV file
|
2
|
+
|
3
|
+
Scenario: Singe file transformation
|
4
|
+
Given a "sales_items.csv" data file containing:
|
5
|
+
"""
|
6
|
+
order_id,item,item_name
|
7
|
+
1,Item1,Item name 1
|
8
|
+
2,Item1,Item name 1
|
9
|
+
3,Item2,Item name 2
|
10
|
+
4,Item2,Item name 2
|
11
|
+
5,Item3,Item name 3
|
12
|
+
"""
|
13
|
+
And the following definition:
|
14
|
+
"""
|
15
|
+
source :sales_items do
|
16
|
+
file "sales_items.csv"
|
17
|
+
field :order_id, String
|
18
|
+
field :item, String
|
19
|
+
field :item_name, String
|
20
|
+
end
|
21
|
+
|
22
|
+
source :products do
|
23
|
+
field :item, String
|
24
|
+
field :item_name, String
|
25
|
+
end
|
26
|
+
|
27
|
+
deduplicate :sales_items, into: :products, by: [:item]
|
28
|
+
"""
|
29
|
+
When I execute the definition
|
30
|
+
Then the process should exit successfully
|
31
|
+
And there should be a "products.csv" data file in the upload directory containing:
|
32
|
+
"""
|
33
|
+
item,item_name
|
34
|
+
Item1,Item name 1
|
35
|
+
Item2,Item name 2
|
36
|
+
Item3,Item name 3
|
37
|
+
"""
|
@@ -0,0 +1,72 @@
|
|
1
|
+
Feature: Empty transformation
|
2
|
+
|
3
|
+
Scenario: Empty transformation between the same structures from the default CSV format simply copies the file
|
4
|
+
Given a "products.csv" data file containing:
|
5
|
+
"""
|
6
|
+
id,name,category
|
7
|
+
JNI-123,Just a product name,Main category > Subcategory > Sub-subcategory
|
8
|
+
CDI-234,Another product name,Smart Insight > Cool stuff > Scripts
|
9
|
+
"""
|
10
|
+
And the following definition:
|
11
|
+
"""
|
12
|
+
source :products do
|
13
|
+
field :id, String
|
14
|
+
field :name, String
|
15
|
+
field :category, String
|
16
|
+
end
|
17
|
+
|
18
|
+
source :products_copy do
|
19
|
+
field :id, String
|
20
|
+
field :name, String
|
21
|
+
field :category, String
|
22
|
+
end
|
23
|
+
|
24
|
+
transform :products => :products_copy do |record|
|
25
|
+
output record
|
26
|
+
end
|
27
|
+
"""
|
28
|
+
When I execute the definition
|
29
|
+
Then the process should exit successfully
|
30
|
+
And there should be a "products_copy.csv" data file in the upload directory containing:
|
31
|
+
"""
|
32
|
+
id,name,category
|
33
|
+
JNI-123,Just a product name,Main category > Subcategory > Sub-subcategory
|
34
|
+
CDI-234,Another product name,Smart Insight > Cool stuff > Scripts
|
35
|
+
"""
|
36
|
+
|
37
|
+
|
38
|
+
Scenario: Empty transformation between the same structures but from a custom CSV format converts quotes and delimiters to the default format
|
39
|
+
Given a "products.csv" data file containing:
|
40
|
+
"""
|
41
|
+
'id';'name';'category'
|
42
|
+
'JNI-123';'Just a product name';'Main category > Subcategory > Sub-subcategory'
|
43
|
+
'CDI-234';'Another 12" product name';'Smart Insight > Cool stuff > Scripts'
|
44
|
+
"""
|
45
|
+
And the following definition:
|
46
|
+
"""
|
47
|
+
source :products do
|
48
|
+
delimiter ';'
|
49
|
+
quote "'"
|
50
|
+
field :id, String
|
51
|
+
field :name, String
|
52
|
+
field :category, String
|
53
|
+
end
|
54
|
+
|
55
|
+
source :products_converted do
|
56
|
+
field :id, String
|
57
|
+
field :name, String
|
58
|
+
field :category, String
|
59
|
+
end
|
60
|
+
|
61
|
+
transform :products => :products_converted do |record|
|
62
|
+
output record
|
63
|
+
end
|
64
|
+
"""
|
65
|
+
When I execute the definition
|
66
|
+
Then the process should exit successfully
|
67
|
+
And there should be a "products_converted.csv" data file in the upload directory containing:
|
68
|
+
"""
|
69
|
+
id,name,category
|
70
|
+
JNI-123,Just a product name,Main category > Subcategory > Sub-subcategory
|
71
|
+
CDI-234,"Another 12"" product name",Smart Insight > Cool stuff > Scripts
|
72
|
+
"""
|
@@ -0,0 +1,180 @@
|
|
1
|
+
Feature: Joining CSV files
|
2
|
+
|
3
|
+
Scenario: Singe file transformation
|
4
|
+
Given an "orders.csv" data file containing:
|
5
|
+
"""
|
6
|
+
id,order_date,customer_id,total_price
|
7
|
+
order_1,2011-01-01,customer_1,100
|
8
|
+
order_2,2011-02-02,customer_1,200
|
9
|
+
order_3,2011-03-03,customer_2,300
|
10
|
+
"""
|
11
|
+
Given an "order_items.csv" data file containing:
|
12
|
+
"""
|
13
|
+
order_id,item_id,item_name,item_category,quantity,sales_amount,comment
|
14
|
+
order_1,item_1,first item,clothing,1,5,some useful comment
|
15
|
+
order_1,item_2,second item,communication,2,6,not so useful comment
|
16
|
+
order_2,item_2,second item,communication,5,12,very misleading comment
|
17
|
+
"""
|
18
|
+
And the following definition:
|
19
|
+
"""
|
20
|
+
source :orders_file do
|
21
|
+
file "orders.csv"
|
22
|
+
|
23
|
+
field :id, String
|
24
|
+
field :order_date, Date
|
25
|
+
field :customer_id, String
|
26
|
+
field :total_price, Integer
|
27
|
+
end
|
28
|
+
|
29
|
+
source :order_items_file do
|
30
|
+
file "order_items.csv"
|
31
|
+
|
32
|
+
field :order_id, String
|
33
|
+
field :item_id, String
|
34
|
+
field :item_name, String
|
35
|
+
field :item_category, String
|
36
|
+
field :quantity, Integer
|
37
|
+
field :sales_amount, Integer
|
38
|
+
field :comment, String
|
39
|
+
end
|
40
|
+
|
41
|
+
source :sales_items do
|
42
|
+
field :order_id, String
|
43
|
+
field :order_date, String
|
44
|
+
field :new_field, String
|
45
|
+
field :customer_id, String
|
46
|
+
field :item_id, String
|
47
|
+
field :item_name, String
|
48
|
+
field :item_category, String
|
49
|
+
field :quantity, String
|
50
|
+
field :sales_amount, String
|
51
|
+
end
|
52
|
+
|
53
|
+
join :orders_file, with: :order_items_file, into: :sales_items, match_on: { :order_id => :id }
|
54
|
+
"""
|
55
|
+
When I execute the definition
|
56
|
+
Then the process should exit successfully
|
57
|
+
And there should be a "sales_items.csv" data file in the upload directory containing:
|
58
|
+
"""
|
59
|
+
order_id,order_date,new_field,customer_id,item_id,item_name,item_category,quantity,sales_amount
|
60
|
+
order_1,2011-01-01,,customer_1,item_1,first item,clothing,1,5
|
61
|
+
order_1,2011-01-01,,customer_1,item_2,second item,communication,2,6
|
62
|
+
order_2,2011-02-02,,customer_1,item_2,second item,communication,5,12
|
63
|
+
"""
|
64
|
+
|
65
|
+
|
66
|
+
Scenario: File transformation with left join
|
67
|
+
Given an "orders.csv" data file containing:
|
68
|
+
"""
|
69
|
+
id,order_date,customer_id,total_price
|
70
|
+
order_1,2011-01-01,customer_1,100
|
71
|
+
order_2,2011-02-02,customer_1,200
|
72
|
+
order_3,2011-03-03,customer_2,300
|
73
|
+
"""
|
74
|
+
Given an "order_items.csv" data file containing:
|
75
|
+
"""
|
76
|
+
order_id,item_id,item_name,item_category,quantity,sales_amount,comment
|
77
|
+
order_1,item_1,first item,clothing,1,5,some useful comment
|
78
|
+
order_1,item_2,second item,communication,2,6,not so useful comment
|
79
|
+
order_2,item_2,second item,communication,5,12,very misleading comment
|
80
|
+
"""
|
81
|
+
And the following definition:
|
82
|
+
"""
|
83
|
+
source :orders_file do
|
84
|
+
file "orders.csv"
|
85
|
+
|
86
|
+
field :id, String
|
87
|
+
field :order_date, Date
|
88
|
+
field :customer_id, String
|
89
|
+
field :total_price, Integer
|
90
|
+
end
|
91
|
+
|
92
|
+
source :order_items_file do
|
93
|
+
file "order_items.csv"
|
94
|
+
|
95
|
+
field :order_id, String
|
96
|
+
field :item_id, String
|
97
|
+
field :item_name, String
|
98
|
+
field :item_category, String
|
99
|
+
field :quantity, Integer
|
100
|
+
field :sales_amount, Integer
|
101
|
+
field :comment, String
|
102
|
+
end
|
103
|
+
|
104
|
+
source :sales_items do
|
105
|
+
field :id, String
|
106
|
+
field :item_id, String
|
107
|
+
field :item_name, String
|
108
|
+
field :order_date, String
|
109
|
+
field :customer_id, String
|
110
|
+
field :item_category, String
|
111
|
+
field :quantity, String
|
112
|
+
field :sales_amount, String
|
113
|
+
end
|
114
|
+
|
115
|
+
join :orders_file, with: :order_items_file, into: :sales_items, match_on: { :order_id => :id }, type: :left
|
116
|
+
"""
|
117
|
+
When I execute the definition
|
118
|
+
Then the process should exit successfully
|
119
|
+
And there should be a "sales_items.csv" data file in the upload directory containing:
|
120
|
+
"""
|
121
|
+
id,item_id,item_name,order_date,customer_id,item_category,quantity,sales_amount
|
122
|
+
order_1,item_1,first item,2011-01-01,customer_1,clothing,1,5
|
123
|
+
order_1,item_2,second item,2011-01-01,customer_1,communication,2,6
|
124
|
+
order_2,item_2,second item,2011-02-02,customer_1,communication,5,12
|
125
|
+
order_3,,,2011-03-03,customer_2,,,
|
126
|
+
"""
|
127
|
+
|
128
|
+
|
129
|
+
Scenario: Close file after join
|
130
|
+
Given an "orders.csv" data file containing:
|
131
|
+
"""
|
132
|
+
id,order_date
|
133
|
+
order_1,2011-01-01
|
134
|
+
"""
|
135
|
+
Given an "order_items.csv" data file containing:
|
136
|
+
"""
|
137
|
+
order_id,item_id
|
138
|
+
order_1,item_1
|
139
|
+
"""
|
140
|
+
And the following definition:
|
141
|
+
"""
|
142
|
+
source :orders_file do
|
143
|
+
file "orders.csv"
|
144
|
+
|
145
|
+
field :id, String
|
146
|
+
field :order_date, Date
|
147
|
+
end
|
148
|
+
|
149
|
+
source :order_items_file do
|
150
|
+
file "order_items.csv"
|
151
|
+
|
152
|
+
field :order_id, String
|
153
|
+
field :item_id, String
|
154
|
+
end
|
155
|
+
|
156
|
+
source :sales_items do
|
157
|
+
field :id, String
|
158
|
+
field :item_id, String
|
159
|
+
field :order_date, String
|
160
|
+
end
|
161
|
+
|
162
|
+
source :sales_items_transformed do
|
163
|
+
field :id, String
|
164
|
+
field :item_id, String
|
165
|
+
field :order_date, String
|
166
|
+
end
|
167
|
+
|
168
|
+
join :orders_file, with: :order_items_file, into: :sales_items, match_on: { :order_id => :id }, type: :left
|
169
|
+
|
170
|
+
transform :sales_items => :sales_items_transformed do |record|
|
171
|
+
output record
|
172
|
+
end
|
173
|
+
"""
|
174
|
+
When I execute the definition
|
175
|
+
Then the process should exit successfully
|
176
|
+
And there should be a "sales_items_transformed.csv" data file in the upload directory containing:
|
177
|
+
"""
|
178
|
+
id,item_id,order_date
|
179
|
+
order_1,item_1,2011-01-01
|
180
|
+
"""
|
@@ -0,0 +1,46 @@
|
|
1
|
+
Feature: Join multiple files into one output file
|
2
|
+
|
3
|
+
Scenario: Successful transformation
|
4
|
+
Given a "products1.csv" data file containing:
|
5
|
+
"""
|
6
|
+
id,name,category
|
7
|
+
PROD-1,product name 1,Main category > Subcategory > Sub-subcategory
|
8
|
+
PROD-2,product name 2,Main category > Subcategory > Sub-subcategory
|
9
|
+
"""
|
10
|
+
And a "products2.csv" data file containing:
|
11
|
+
"""
|
12
|
+
id,name,category
|
13
|
+
PROD-3,product name 3,Main category > Subcategory > Sub-subcategory
|
14
|
+
PROD-4,product name 4,Main category > Subcategory > Sub-subcategory
|
15
|
+
"""
|
16
|
+
And the following definition:
|
17
|
+
"""
|
18
|
+
source :products do
|
19
|
+
file "products*.csv"
|
20
|
+
field :id, String
|
21
|
+
field :name, String
|
22
|
+
field :category, String
|
23
|
+
end
|
24
|
+
|
25
|
+
source :transformed_products do
|
26
|
+
field :item, String
|
27
|
+
field :title, String
|
28
|
+
field :category, String
|
29
|
+
end
|
30
|
+
|
31
|
+
transform :products => :transformed_products do |record|
|
32
|
+
record[:item] = record[:id]
|
33
|
+
record[:title] = record[:name]
|
34
|
+
output record
|
35
|
+
end
|
36
|
+
"""
|
37
|
+
When I execute the definition
|
38
|
+
Then the process should exit successfully
|
39
|
+
And there should be a "transformed_products.csv" data file in the upload directory containing:
|
40
|
+
"""
|
41
|
+
item,title,category
|
42
|
+
PROD-1,product name 1,Main category > Subcategory > Sub-subcategory
|
43
|
+
PROD-2,product name 2,Main category > Subcategory > Sub-subcategory
|
44
|
+
PROD-3,product name 3,Main category > Subcategory > Sub-subcategory
|
45
|
+
PROD-4,product name 4,Main category > Subcategory > Sub-subcategory
|
46
|
+
"""
|
@@ -0,0 +1,70 @@
|
|
1
|
+
Feature: Output rows to file
|
2
|
+
|
3
|
+
Background:
|
4
|
+
Given a "products.csv" data file containing:
|
5
|
+
"""
|
6
|
+
id,name
|
7
|
+
1,Product 1
|
8
|
+
2, Product 2
|
9
|
+
"""
|
10
|
+
|
11
|
+
Scenario: Output Hash instead of record
|
12
|
+
Given the following definition:
|
13
|
+
"""
|
14
|
+
source :products do
|
15
|
+
field :id, String
|
16
|
+
field :name, String
|
17
|
+
end
|
18
|
+
|
19
|
+
source :products_copy do
|
20
|
+
field :id, String
|
21
|
+
field :name, String
|
22
|
+
end
|
23
|
+
|
24
|
+
transform :products => :products_copy do |record|
|
25
|
+
output name: record[:name],
|
26
|
+
id: record[:id]
|
27
|
+
end
|
28
|
+
"""
|
29
|
+
When I execute the definition
|
30
|
+
Then the process should exit successfully
|
31
|
+
And there should be a "products_copy.csv" data file in the upload directory containing:
|
32
|
+
"""
|
33
|
+
id,name
|
34
|
+
1,Product 1
|
35
|
+
2,Product 2
|
36
|
+
"""
|
37
|
+
|
38
|
+
|
39
|
+
Scenario: Output multiple records for each input row
|
40
|
+
Given the following definition:
|
41
|
+
"""
|
42
|
+
source :products do
|
43
|
+
field :id, String
|
44
|
+
field :name, String
|
45
|
+
end
|
46
|
+
|
47
|
+
source :products_doubled do
|
48
|
+
field :id, String
|
49
|
+
field :name, String
|
50
|
+
field :counter, Integer
|
51
|
+
end
|
52
|
+
|
53
|
+
transform :products => :products_doubled do |record|
|
54
|
+
record[:counter] = 1
|
55
|
+
output record
|
56
|
+
record[:counter] = 2
|
57
|
+
output record
|
58
|
+
end
|
59
|
+
"""
|
60
|
+
When I execute the definition
|
61
|
+
Then the process should exit successfully
|
62
|
+
And there should be a "products_doubled.csv" data file in the upload directory containing:
|
63
|
+
"""
|
64
|
+
id,name,counter
|
65
|
+
1,Product 1,1
|
66
|
+
1,Product 1,2
|
67
|
+
2,Product 2,1
|
68
|
+
2,Product 2,2
|
69
|
+
"""
|
70
|
+
|
@@ -0,0 +1,34 @@
|
|
1
|
+
Feature: Projection
|
2
|
+
|
3
|
+
Scenario: Empty transformation projects down if the source structure is a superset of the target structure
|
4
|
+
Given a "products.csv" data file containing:
|
5
|
+
"""
|
6
|
+
id,name,category
|
7
|
+
JNI-123,Just a product name,Main category > Subcategory > Sub-subcategory
|
8
|
+
CDI-234,Another product name,Smart Insight > Cool stuff > Scripts
|
9
|
+
"""
|
10
|
+
And the following definition:
|
11
|
+
"""
|
12
|
+
source :products do
|
13
|
+
field :id, String
|
14
|
+
field :name, String
|
15
|
+
field :category, String
|
16
|
+
end
|
17
|
+
|
18
|
+
source :products_projected do
|
19
|
+
field :id, String
|
20
|
+
field :category, String
|
21
|
+
end
|
22
|
+
|
23
|
+
transform :products => :products_projected do |record|
|
24
|
+
output record
|
25
|
+
end
|
26
|
+
"""
|
27
|
+
When I execute the definition
|
28
|
+
Then the process should exit successfully
|
29
|
+
And there should be a "products_projected.csv" data file in the upload directory containing:
|
30
|
+
"""
|
31
|
+
id,category
|
32
|
+
JNI-123,Main category > Subcategory > Sub-subcategory
|
33
|
+
CDI-234,Smart Insight > Cool stuff > Scripts
|
34
|
+
"""
|