cranium 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +21 -0
- data/.ruby-version +1 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +29 -0
- data/Rakefile +3 -0
- data/Vagrantfile +24 -0
- data/bin/cranium +9 -0
- data/config/cucumber.yml +9 -0
- data/cranium.gemspec +26 -0
- data/db/setup.sql +8 -0
- data/docker-compose.yml +8 -0
- data/examples/config.rb +14 -0
- data/examples/deduplication.rb +27 -0
- data/examples/import_csv_with_field_lookup_inserting_new_dimension_keys.rb +26 -0
- data/examples/incremental_extract.rb +17 -0
- data/examples/lookup_with_multiple_fields.rb +25 -0
- data/features/archive.feature +49 -0
- data/features/extract/incremental_extract.feature +56 -0
- data/features/extract/simple_extract.feature +85 -0
- data/features/import/import_csv_to_database_as_delta.feature +38 -0
- data/features/import/import_csv_to_database_with_delete_insert_merging.feature +51 -0
- data/features/import/import_csv_to_database_with_truncate_insert.feature +49 -0
- data/features/import/import_csv_to_database_with_update_merging.feature +46 -0
- data/features/import/import_csv_with_always_inserting_new_dimension_keys.feature +137 -0
- data/features/import/import_csv_with_field_lookup_inserting_new_dimension_keys.feature +62 -0
- data/features/import/import_csv_with_field_lookup_transformation.feature +125 -0
- data/features/import/import_csv_with_transformation.feature +55 -0
- data/features/import/import_multiple_csv_files_without_transformations.feature +44 -0
- data/features/import/import_with_load_id_from_sequence.feature +53 -0
- data/features/import/import_with_lookup_from_multiple_fields.feature +64 -0
- data/features/read.feature +56 -0
- data/features/remove.feature +44 -0
- data/features/restore_database_connection.feature +55 -0
- data/features/step_definitions/database_table_steps.rb +40 -0
- data/features/step_definitions/definition_steps.rb +3 -0
- data/features/step_definitions/execution_steps.rb +23 -0
- data/features/step_definitions/file_steps.rb +39 -0
- data/features/support/class_extensions.rb +24 -0
- data/features/support/env.rb +27 -0
- data/features/support/randomize.rb +22 -0
- data/features/support/stop_on_first_error.rb +5 -0
- data/features/transform/deduplication.feature +37 -0
- data/features/transform/empty_transformation.feature +72 -0
- data/features/transform/join.feature +180 -0
- data/features/transform/join_multiple_files_into_one_output_file.feature +46 -0
- data/features/transform/output_rows.feature +70 -0
- data/features/transform/projection.feature +34 -0
- data/features/transform/raw_ruby_transformation.feature +69 -0
- data/features/transform/split_field.feature +39 -0
- data/lib/cranium/application.rb +104 -0
- data/lib/cranium/archiver.rb +36 -0
- data/lib/cranium/attribute_dsl.rb +43 -0
- data/lib/cranium/command_line_options.rb +27 -0
- data/lib/cranium/configuration.rb +33 -0
- data/lib/cranium/data_importer.rb +35 -0
- data/lib/cranium/data_reader.rb +48 -0
- data/lib/cranium/data_transformer.rb +126 -0
- data/lib/cranium/database.rb +36 -0
- data/lib/cranium/definition_registry.rb +21 -0
- data/lib/cranium/dimension_manager.rb +65 -0
- data/lib/cranium/dsl/database_definition.rb +23 -0
- data/lib/cranium/dsl/extract_definition.rb +28 -0
- data/lib/cranium/dsl/import_definition.rb +50 -0
- data/lib/cranium/dsl/source_definition.rb +67 -0
- data/lib/cranium/dsl.rb +100 -0
- data/lib/cranium/extensions/file.rb +7 -0
- data/lib/cranium/extensions/sequel_greenplum.rb +30 -0
- data/lib/cranium/external_table.rb +75 -0
- data/lib/cranium/extract/data_extractor.rb +11 -0
- data/lib/cranium/extract/storage.rb +57 -0
- data/lib/cranium/extract/strategy/base.rb +27 -0
- data/lib/cranium/extract/strategy/incremental.rb +16 -0
- data/lib/cranium/extract/strategy/simple.rb +9 -0
- data/lib/cranium/extract/strategy.rb +7 -0
- data/lib/cranium/extract.rb +7 -0
- data/lib/cranium/import_strategy/base.rb +55 -0
- data/lib/cranium/import_strategy/delete_insert.rb +40 -0
- data/lib/cranium/import_strategy/delta.rb +8 -0
- data/lib/cranium/import_strategy/merge.rb +50 -0
- data/lib/cranium/import_strategy/truncate_insert.rb +19 -0
- data/lib/cranium/import_strategy.rb +9 -0
- data/lib/cranium/logging.rb +15 -0
- data/lib/cranium/profiling.rb +13 -0
- data/lib/cranium/progress_output.rb +37 -0
- data/lib/cranium/sequel/hash.rb +32 -0
- data/lib/cranium/sequel.rb +5 -0
- data/lib/cranium/source_registry.rb +21 -0
- data/lib/cranium/test_framework/cucumber_table.rb +140 -0
- data/lib/cranium/test_framework/database_entity.rb +29 -0
- data/lib/cranium/test_framework/database_sequence.rb +16 -0
- data/lib/cranium/test_framework/database_table.rb +33 -0
- data/lib/cranium/test_framework/upload_directory.rb +39 -0
- data/lib/cranium/test_framework/world.rb +66 -0
- data/lib/cranium/test_framework.rb +10 -0
- data/lib/cranium/transformation/duplication_index.rb +42 -0
- data/lib/cranium/transformation/index.rb +83 -0
- data/lib/cranium/transformation/join.rb +141 -0
- data/lib/cranium/transformation/sequence.rb +42 -0
- data/lib/cranium/transformation.rb +8 -0
- data/lib/cranium/transformation_record.rb +45 -0
- data/lib/cranium.rb +57 -0
- data/rake/test.rake +31 -0
- data/spec/cranium/application_spec.rb +166 -0
- data/spec/cranium/archiver_spec.rb +44 -0
- data/spec/cranium/command_line_options_spec.rb +32 -0
- data/spec/cranium/configuration_spec.rb +31 -0
- data/spec/cranium/data_importer_spec.rb +55 -0
- data/spec/cranium/data_transformer_spec.rb +16 -0
- data/spec/cranium/database_spec.rb +69 -0
- data/spec/cranium/definition_registry_spec.rb +45 -0
- data/spec/cranium/dimension_manager_spec.rb +63 -0
- data/spec/cranium/dsl/database_definition_spec.rb +23 -0
- data/spec/cranium/dsl/extract_definition_spec.rb +76 -0
- data/spec/cranium/dsl/import_definition_spec.rb +153 -0
- data/spec/cranium/dsl/source_definition_spec.rb +84 -0
- data/spec/cranium/dsl_spec.rb +119 -0
- data/spec/cranium/external_table_spec.rb +71 -0
- data/spec/cranium/extract/storage_spec.rb +125 -0
- data/spec/cranium/logging_spec.rb +37 -0
- data/spec/cranium/sequel/hash_spec.rb +56 -0
- data/spec/cranium/source_registry_spec.rb +31 -0
- data/spec/cranium/test_framework/cucumber_table_spec.rb +144 -0
- data/spec/cranium/transformation/duplication_index_spec.rb +75 -0
- data/spec/cranium/transformation/index_spec.rb +178 -0
- data/spec/cranium/transformation/join_spec.rb +43 -0
- data/spec/cranium/transformation/sequence_spec.rb +83 -0
- data/spec/cranium/transformation_record_spec.rb +78 -0
- data/spec/cranium_spec.rb +53 -0
- data/spec/spec_helper.rb +1 -0
- metadata +362 -0
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
Feature: Import a CSV file into the database with merging
|
|
2
|
+
|
|
3
|
+
The merge_on property can be used to specify an id field that is used to detect duplicates while importing.
|
|
4
|
+
Duplicates are updated and new items are added.
|
|
5
|
+
|
|
6
|
+
Scenario: Successful import with merged items
|
|
7
|
+
Given a database table called "dim_product" with the following fields:
|
|
8
|
+
| field_name | field_type |
|
|
9
|
+
| item | TEXT |
|
|
10
|
+
| title | TEXT |
|
|
11
|
+
| description | TEXT |
|
|
12
|
+
And only the following rows in the "dim_product" database table:
|
|
13
|
+
| item | title | description |
|
|
14
|
+
| JNI-123 | Just a product name | Very interesting description |
|
|
15
|
+
| CDI-234 | Another product name | Yet another cool description |
|
|
16
|
+
And a "products.csv" data file containing:
|
|
17
|
+
"""
|
|
18
|
+
id,name,description
|
|
19
|
+
JNI-123,Just a product name,"Very interesting description, updated"
|
|
20
|
+
CDI-234,Updated product name,Yet another cool description
|
|
21
|
+
KLM-987,Inserted product name,This is the best product
|
|
22
|
+
"""
|
|
23
|
+
And the following definition:
|
|
24
|
+
"""
|
|
25
|
+
source :products do
|
|
26
|
+
field :id, String
|
|
27
|
+
field :name, String
|
|
28
|
+
field :description, String
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
import :products do
|
|
32
|
+
into :dim_product
|
|
33
|
+
put :id => :item
|
|
34
|
+
put :name => :title
|
|
35
|
+
put :description => :description
|
|
36
|
+
|
|
37
|
+
merge_on :id => :item
|
|
38
|
+
end
|
|
39
|
+
"""
|
|
40
|
+
When I execute the definition
|
|
41
|
+
Then the process should exit successfully
|
|
42
|
+
And the "dim_product" table should contain:
|
|
43
|
+
| item | title | description |
|
|
44
|
+
| JNI-123 | Just a product name | Very interesting description, updated |
|
|
45
|
+
| CDI-234 | Updated product name | Yet another cool description |
|
|
46
|
+
| KLM-987 | Inserted product name | This is the best product |
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
Feature: Import a CSV file into the database with new dimension values always inserted
|
|
2
|
+
|
|
3
|
+
Scenario: Successful import
|
|
4
|
+
Given a database table called "dim_contact" with the following fields:
|
|
5
|
+
| field_name | field_type |
|
|
6
|
+
| contact_key | SERIAL |
|
|
7
|
+
| user_id | TEXT |
|
|
8
|
+
| name | TEXT |
|
|
9
|
+
And only the following rows in the "dim_contact" database table:
|
|
10
|
+
| contact_key (i) | user_id | name |
|
|
11
|
+
| 10 | 1 | Alma |
|
|
12
|
+
And the current value in sequence "dim_contact_contact_key_seq" is 10
|
|
13
|
+
And a database table called "fct_purchases" with the following fields:
|
|
14
|
+
| field_name | field_type |
|
|
15
|
+
| contact_key | INTEGER |
|
|
16
|
+
| amount | TEXT |
|
|
17
|
+
And a "purchases.csv" data file containing:
|
|
18
|
+
"""
|
|
19
|
+
user_id,amount
|
|
20
|
+
1,100
|
|
21
|
+
NA,200
|
|
22
|
+
NA,300
|
|
23
|
+
"""
|
|
24
|
+
And the following definition:
|
|
25
|
+
"""
|
|
26
|
+
source :purchases do
|
|
27
|
+
field :user_id, String
|
|
28
|
+
field :amount, String
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
source :transformed_purchases do
|
|
32
|
+
field :contact_key, Integer
|
|
33
|
+
field :amount, String
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
transform :purchases => :transformed_purchases do |record|
|
|
37
|
+
record[:contact_key] = insert :contact_key,
|
|
38
|
+
table: :dim_contact,
|
|
39
|
+
record: {contact_key: next_value_in_sequence("dim_contact_contact_key_seq"), user_id: record[:user_id], name: "Unknown contact #{record[:user_id]}"}
|
|
40
|
+
output record
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
import :transformed_purchases do
|
|
44
|
+
into :fct_purchases
|
|
45
|
+
put :contact_key
|
|
46
|
+
put :amount
|
|
47
|
+
end
|
|
48
|
+
"""
|
|
49
|
+
When I execute the definition
|
|
50
|
+
Then the process should exit successfully
|
|
51
|
+
And the "fct_purchases" table should contain:
|
|
52
|
+
| contact_key (i) | amount |
|
|
53
|
+
| 11 | 100 |
|
|
54
|
+
| 12 | 200 |
|
|
55
|
+
| 13 | 300 |
|
|
56
|
+
And the "dim_contact" table should contain:
|
|
57
|
+
| contact_key (i) | user_id | name |
|
|
58
|
+
| 10 | 1 | Alma |
|
|
59
|
+
| 11 | 1 | Unknown contact 1 |
|
|
60
|
+
| 12 | NA | Unknown contact NA |
|
|
61
|
+
| 13 | NA | Unknown contact NA |
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
Scenario: Example use case for the insert
|
|
65
|
+
If purchases made by a predefined contact identifier (NA in this case) do not look for it insert .
|
|
66
|
+
Otherwise use lookup to find or create that contact
|
|
67
|
+
|
|
68
|
+
Given a database table called "dim_contact" with the following fields:
|
|
69
|
+
| field_name | field_type |
|
|
70
|
+
| contact_key | SERIAL |
|
|
71
|
+
| user_id | TEXT |
|
|
72
|
+
| name | TEXT |
|
|
73
|
+
And only the following rows in the "dim_contact" database table:
|
|
74
|
+
| contact_key (i) | user_id | name |
|
|
75
|
+
| 10 | 1 | Alma |
|
|
76
|
+
And the current value in sequence "dim_contact_contact_key_seq" is 10
|
|
77
|
+
And a database table called "fct_purchases" with the following fields:
|
|
78
|
+
| field_name | field_type |
|
|
79
|
+
| contact_key | INTEGER |
|
|
80
|
+
| amount | TEXT |
|
|
81
|
+
And a "purchases.csv" data file containing:
|
|
82
|
+
"""
|
|
83
|
+
user_id,amount
|
|
84
|
+
1,100
|
|
85
|
+
NA,200
|
|
86
|
+
NA,300
|
|
87
|
+
2,400
|
|
88
|
+
2,500
|
|
89
|
+
"""
|
|
90
|
+
And the following definition:
|
|
91
|
+
"""
|
|
92
|
+
source :purchases do
|
|
93
|
+
field :user_id, String
|
|
94
|
+
field :amount, String
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
source :transformed_purchases do
|
|
98
|
+
field :contact_key, Integer
|
|
99
|
+
field :amount, String
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
transform :purchases => :transformed_purchases do |record|
|
|
103
|
+
if record[:user_id] == 'NA'
|
|
104
|
+
record[:contact_key] = insert :contact_key,
|
|
105
|
+
table: :dim_contact,
|
|
106
|
+
record: {contact_key: next_value_in_sequence("dim_contact_contact_key_seq"), user_id: record[:user_id], name: "Unknown contact #{record[:user_id]}"}
|
|
107
|
+
else
|
|
108
|
+
record[:contact_key] = lookup :contact_key,
|
|
109
|
+
from_table: :dim_contact,
|
|
110
|
+
match_column: :user_id,
|
|
111
|
+
to_value: record[:user_id],
|
|
112
|
+
if_not_found_then_insert: {contact_key: next_value_in_sequence("dim_contact_contact_key_seq"), name: "Unknown contact #{record[:user_id]}"}
|
|
113
|
+
end
|
|
114
|
+
output record
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
import :transformed_purchases do
|
|
118
|
+
into :fct_purchases
|
|
119
|
+
put :contact_key
|
|
120
|
+
put :amount
|
|
121
|
+
end
|
|
122
|
+
"""
|
|
123
|
+
When I execute the definition
|
|
124
|
+
Then the process should exit successfully
|
|
125
|
+
And the "fct_purchases" table should contain:
|
|
126
|
+
| contact_key (i) | amount |
|
|
127
|
+
| 10 | 100 |
|
|
128
|
+
| 11 | 200 |
|
|
129
|
+
| 12 | 300 |
|
|
130
|
+
| 13 | 400 |
|
|
131
|
+
| 13 | 500 |
|
|
132
|
+
And the "dim_contact" table should contain:
|
|
133
|
+
| contact_key (i) | user_id | name |
|
|
134
|
+
| 10 | 1 | Alma |
|
|
135
|
+
| 11 | NA | Unknown contact NA |
|
|
136
|
+
| 12 | NA | Unknown contact NA |
|
|
137
|
+
| 13 | 2 | Unknown contact 2 |
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
Feature: Import a CSV file into the database with new dimension values inserted when not found during lookup
|
|
2
|
+
|
|
3
|
+
Scenario: Successful import
|
|
4
|
+
Given a database table called "dim_contact" with the following fields:
|
|
5
|
+
| field_name | field_type |
|
|
6
|
+
| contact_key | SERIAL |
|
|
7
|
+
| user_id | TEXT |
|
|
8
|
+
| name | TEXT |
|
|
9
|
+
And only the following rows in the "dim_contact" database table:
|
|
10
|
+
| contact_key (i) | user_id | name |
|
|
11
|
+
| 10 | 1 | Alma |
|
|
12
|
+
And the current value in sequence "dim_contact_contact_key_seq" is 10
|
|
13
|
+
And a database table called "fct_purchases" with the following fields:
|
|
14
|
+
| field_name | field_type |
|
|
15
|
+
| contact_key | INTEGER |
|
|
16
|
+
| amount | TEXT |
|
|
17
|
+
And a "purchases.csv" data file containing:
|
|
18
|
+
"""
|
|
19
|
+
user_id,amount
|
|
20
|
+
1,100
|
|
21
|
+
2,200
|
|
22
|
+
2,300
|
|
23
|
+
"""
|
|
24
|
+
And the following definition:
|
|
25
|
+
"""
|
|
26
|
+
source :purchases do
|
|
27
|
+
field :user_id, String
|
|
28
|
+
field :amount, String
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
source :transformed_purchases do
|
|
32
|
+
field :contact_key, Integer
|
|
33
|
+
field :amount, String
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
transform :purchases => :transformed_purchases do |record|
|
|
37
|
+
record[:contact_key] = lookup :contact_key,
|
|
38
|
+
from_table: :dim_contact,
|
|
39
|
+
match_column: :user_id,
|
|
40
|
+
to_value: record[:user_id],
|
|
41
|
+
if_not_found_then_insert: { contact_key: next_value_in_sequence("dim_contact_contact_key_seq"),
|
|
42
|
+
name: "Unknown contact #{record[:user_id]}" }
|
|
43
|
+
output record
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
import :transformed_purchases do
|
|
47
|
+
into :fct_purchases
|
|
48
|
+
put :contact_key
|
|
49
|
+
put :amount
|
|
50
|
+
end
|
|
51
|
+
"""
|
|
52
|
+
When I execute the definition
|
|
53
|
+
Then the process should exit successfully
|
|
54
|
+
And the "fct_purchases" table should contain:
|
|
55
|
+
| contact_key (i) | amount |
|
|
56
|
+
| 10 | 100 |
|
|
57
|
+
| 11 | 200 |
|
|
58
|
+
| 11 | 300 |
|
|
59
|
+
And the "dim_contact" table should contain:
|
|
60
|
+
| contact_key (i) | user_id | name |
|
|
61
|
+
| 10 | 1 | Alma |
|
|
62
|
+
| 11 | 2 | Unknown contact 2 |
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
Feature: Import a CSV file into the database with IDs looked up from the database
|
|
2
|
+
|
|
3
|
+
Scenario: Successful import
|
|
4
|
+
Given a database table called "dim_contact" with the following fields:
|
|
5
|
+
| field_name | field_type |
|
|
6
|
+
| contact_key | SERIAL |
|
|
7
|
+
| user_id | TEXT |
|
|
8
|
+
| name | TEXT |
|
|
9
|
+
And only the following rows in the "dim_contact" database table:
|
|
10
|
+
| contact_key (i) | user_id | name |
|
|
11
|
+
| 10 | 1 | Alma |
|
|
12
|
+
| 20 | 2 | Korte |
|
|
13
|
+
And a database table called "fct_purchases" with the following fields:
|
|
14
|
+
| field_name | field_type |
|
|
15
|
+
| contact_key | INTEGER |
|
|
16
|
+
| amount | TEXT |
|
|
17
|
+
And a "purchases.csv" data file containing:
|
|
18
|
+
"""
|
|
19
|
+
user_id,amount
|
|
20
|
+
1,100
|
|
21
|
+
2,200
|
|
22
|
+
3,300
|
|
23
|
+
"""
|
|
24
|
+
And the following definition:
|
|
25
|
+
"""
|
|
26
|
+
source :purchases do
|
|
27
|
+
field :user_id, String
|
|
28
|
+
field :amount, String
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
source :transformed_purchases do
|
|
32
|
+
field :contact_key, Integer
|
|
33
|
+
field :amount, String
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
transform :purchases => :transformed_purchases do |record|
|
|
37
|
+
record[:contact_key] = lookup :contact_key,
|
|
38
|
+
from_table: :dim_contact,
|
|
39
|
+
match_column: :user_id,
|
|
40
|
+
to_value: record[:user_id],
|
|
41
|
+
if_not_found_then: -1
|
|
42
|
+
output record
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
import :transformed_purchases do
|
|
46
|
+
into :fct_purchases
|
|
47
|
+
put :contact_key => :contact_key
|
|
48
|
+
put :amount => :amount
|
|
49
|
+
end
|
|
50
|
+
"""
|
|
51
|
+
When I execute the definition
|
|
52
|
+
Then the process should exit successfully
|
|
53
|
+
And the "fct_purchases" table should contain:
|
|
54
|
+
| contact_key (i) | amount |
|
|
55
|
+
| 10 | 100 |
|
|
56
|
+
| 20 | 200 |
|
|
57
|
+
| -1 | 300 |
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
Scenario: Multiple fields looked up by one key
|
|
61
|
+
Given a database table called "dim_contact" with the following fields:
|
|
62
|
+
| field_name | field_type |
|
|
63
|
+
| contact_key_1 | INTEGER |
|
|
64
|
+
| contact_key_2 | INTEGER |
|
|
65
|
+
| user_id | TEXT |
|
|
66
|
+
| name | TEXT |
|
|
67
|
+
And only the following rows in the "dim_contact" database table:
|
|
68
|
+
| contact_key_1 (i) | contact_key_2 (i) | user_id | name |
|
|
69
|
+
| 10 | 100 | 1 | Alma |
|
|
70
|
+
| 20 | 200 | 2 | Korte |
|
|
71
|
+
And a database table called "fct_purchases" with the following fields:
|
|
72
|
+
| field_name | field_type |
|
|
73
|
+
| contact_key_1 | INTEGER |
|
|
74
|
+
| contact_key_2 | INTEGER |
|
|
75
|
+
| amount | TEXT |
|
|
76
|
+
And a "purchases.csv" data file containing:
|
|
77
|
+
"""
|
|
78
|
+
user_id,amount
|
|
79
|
+
1,100
|
|
80
|
+
2,200
|
|
81
|
+
3,300
|
|
82
|
+
"""
|
|
83
|
+
And the following definition:
|
|
84
|
+
"""
|
|
85
|
+
source :purchases do
|
|
86
|
+
field :user_id, String
|
|
87
|
+
field :amount, String
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
source :transformed_purchases do
|
|
91
|
+
field :contact_key_1, Integer
|
|
92
|
+
field :contact_key_2, Integer
|
|
93
|
+
field :amount, String
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
transform :purchases => :transformed_purchases do |record|
|
|
97
|
+
record[:contact_key_1] = lookup :contact_key_1,
|
|
98
|
+
from_table: :dim_contact,
|
|
99
|
+
match_column: :user_id,
|
|
100
|
+
to_value: record[:user_id],
|
|
101
|
+
if_not_found_then: -1
|
|
102
|
+
|
|
103
|
+
record[:contact_key_2] = lookup :contact_key_2,
|
|
104
|
+
from_table: :dim_contact,
|
|
105
|
+
match_column: :user_id,
|
|
106
|
+
to_value: record[:user_id],
|
|
107
|
+
if_not_found_then: -2
|
|
108
|
+
|
|
109
|
+
output record
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
import :transformed_purchases do
|
|
113
|
+
into :fct_purchases
|
|
114
|
+
put :contact_key_1 => :contact_key_1
|
|
115
|
+
put :contact_key_2 => :contact_key_2
|
|
116
|
+
put :amount => :amount
|
|
117
|
+
end
|
|
118
|
+
"""
|
|
119
|
+
When I execute the definition
|
|
120
|
+
Then the process should exit successfully
|
|
121
|
+
And the "fct_purchases" table should contain:
|
|
122
|
+
| contact_key_1 (i) | contact_key_2 (i) | amount |
|
|
123
|
+
| 10 | 100 | 100 |
|
|
124
|
+
| 20 | 200 | 200 |
|
|
125
|
+
| -1 | -2 | 300 |
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
Feature: Import a CSV file into the database with a split transformation
|
|
2
|
+
|
|
3
|
+
Scenario: Successful import
|
|
4
|
+
Given a database table called "dim_product" with the following fields:
|
|
5
|
+
| field_name | field_type |
|
|
6
|
+
| item | TEXT |
|
|
7
|
+
| title | TEXT |
|
|
8
|
+
| category1 | TEXT |
|
|
9
|
+
| category2 | TEXT |
|
|
10
|
+
| category3 | TEXT |
|
|
11
|
+
And a "products.csv" data file containing:
|
|
12
|
+
"""
|
|
13
|
+
id,name,category
|
|
14
|
+
JNI-123,Just a product name,Main category > Subcategory > Sub-subcategory > Ultra-subcategory
|
|
15
|
+
CDI-234,Another product name,Smart Insight > Cool stuff | 3dim > 2dim > 1dim
|
|
16
|
+
"""
|
|
17
|
+
And the following definition:
|
|
18
|
+
"""
|
|
19
|
+
source :products do
|
|
20
|
+
encoding "UTF-8"
|
|
21
|
+
delimiter ','
|
|
22
|
+
field :id, String
|
|
23
|
+
field :name, String
|
|
24
|
+
field :category, String
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
source :transformed_products do
|
|
28
|
+
field :id, String
|
|
29
|
+
field :name, String
|
|
30
|
+
field :main_category, String
|
|
31
|
+
field :sub_category, String
|
|
32
|
+
field :department, String
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
transform :products => :transformed_products do |record|
|
|
36
|
+
record.split_field :category, into: [:category], by: "|"
|
|
37
|
+
record.split_field :category, into: [:main_category, :sub_category, :department], by: ">"
|
|
38
|
+
output record
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
import :transformed_products do
|
|
42
|
+
into :dim_product
|
|
43
|
+
put :id => :item
|
|
44
|
+
put :name => :title
|
|
45
|
+
put :main_category => :category1
|
|
46
|
+
put :sub_category => :category2
|
|
47
|
+
put :department => :category3
|
|
48
|
+
end
|
|
49
|
+
"""
|
|
50
|
+
When I execute the definition
|
|
51
|
+
Then the process should exit successfully
|
|
52
|
+
And the "dim_product" table should contain:
|
|
53
|
+
| item | title | category1 | category2 | category3 |
|
|
54
|
+
| JNI-123 | Just a product name | Main category | Subcategory | Sub-subcategory |
|
|
55
|
+
| CDI-234 | Another product name | Smart Insight | Cool stuff | Cool stuff |
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
Feature: Import multiple CSV files into the database without any transformations
|
|
2
|
+
|
|
3
|
+
Scenario: Successful import
|
|
4
|
+
Given a database table called "dim_product" with the following fields:
|
|
5
|
+
| field_name | field_type |
|
|
6
|
+
| item | TEXT |
|
|
7
|
+
| title | TEXT |
|
|
8
|
+
| category | TEXT |
|
|
9
|
+
And a "products1.csv" data file containing:
|
|
10
|
+
"""
|
|
11
|
+
id,name,category
|
|
12
|
+
PROD-1,product name 1,Main category > Subcategory > Sub-subcategory
|
|
13
|
+
PROD-2,product name 2,Main category > Subcategory > Sub-subcategory
|
|
14
|
+
"""
|
|
15
|
+
And a "products2.csv" data file containing:
|
|
16
|
+
"""
|
|
17
|
+
id,name,category
|
|
18
|
+
PROD-3,product name 3,Main category > Subcategory > Sub-subcategory
|
|
19
|
+
PROD-4,product name 4,Main category > Subcategory > Sub-subcategory
|
|
20
|
+
"""
|
|
21
|
+
And the following definition:
|
|
22
|
+
"""
|
|
23
|
+
source :products do
|
|
24
|
+
file "products*.csv"
|
|
25
|
+
field :id, String
|
|
26
|
+
field :name, String
|
|
27
|
+
field :category, String
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
import :products do
|
|
31
|
+
into :dim_product
|
|
32
|
+
put :id => :item
|
|
33
|
+
put :name => :title
|
|
34
|
+
put :category => :category
|
|
35
|
+
end
|
|
36
|
+
"""
|
|
37
|
+
When I execute the definition
|
|
38
|
+
Then the process should exit successfully
|
|
39
|
+
And the "dim_product" table should contain:
|
|
40
|
+
| item | title | category |
|
|
41
|
+
| PROD-1 | product name 1 | Main category > Subcategory > Sub-subcategory |
|
|
42
|
+
| PROD-2 | product name 2 | Main category > Subcategory > Sub-subcategory |
|
|
43
|
+
| PROD-3 | product name 3 | Main category > Subcategory > Sub-subcategory |
|
|
44
|
+
| PROD-4 | product name 4 | Main category > Subcategory > Sub-subcategory |
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
Feature: Import data and assign a load id (audit information) from a sequence to all records
|
|
2
|
+
|
|
3
|
+
Scenario: Successful import
|
|
4
|
+
Given a database table called "dim_product" with the following fields:
|
|
5
|
+
| field_name | field_type |
|
|
6
|
+
| load_id | INTEGER |
|
|
7
|
+
| item | TEXT |
|
|
8
|
+
| title | TEXT |
|
|
9
|
+
And a sequence called "some_sequence" starting from 33
|
|
10
|
+
And a "products.csv" data file containing:
|
|
11
|
+
"""
|
|
12
|
+
id,name,category
|
|
13
|
+
JNI-123,Just a product name,Main category > Subcategory > Sub-subcategory > Ultra-subcategory
|
|
14
|
+
CDI-234,Another product name,Smart Insight > Cool stuff | 3dim > 2dim > 1dim
|
|
15
|
+
"""
|
|
16
|
+
And the following definition:
|
|
17
|
+
"""
|
|
18
|
+
LOAD_ID = sequence("some_sequence").next_value
|
|
19
|
+
|
|
20
|
+
source :products do
|
|
21
|
+
encoding "UTF-8"
|
|
22
|
+
delimiter ','
|
|
23
|
+
|
|
24
|
+
field :id, String
|
|
25
|
+
field :name, String
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
source :transformed_products do
|
|
29
|
+
field :load_id, Integer
|
|
30
|
+
|
|
31
|
+
field :id, String
|
|
32
|
+
field :name, String
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
transform :products => :transformed_products do |record|
|
|
36
|
+
record[:load_id] = LOAD_ID
|
|
37
|
+
output record
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
import :transformed_products do
|
|
41
|
+
into :dim_product
|
|
42
|
+
|
|
43
|
+
put :load_id
|
|
44
|
+
put :id => :item
|
|
45
|
+
put :name => :title
|
|
46
|
+
end
|
|
47
|
+
"""
|
|
48
|
+
When I execute the definition
|
|
49
|
+
Then the process should exit successfully
|
|
50
|
+
And the "dim_product" table should contain:
|
|
51
|
+
| load_id (i) | item | title |
|
|
52
|
+
| 34 | JNI-123 | Just a product name |
|
|
53
|
+
| 34 | CDI-234 | Another product name |
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
Feature: Import a CSV file into the database with IDs looked up from multiple columns of the database
|
|
2
|
+
|
|
3
|
+
Scenario: Successful import
|
|
4
|
+
Given a database table called "dim_contact" with the following fields:
|
|
5
|
+
| field_name | field_type |
|
|
6
|
+
| contact_key | SERIAL |
|
|
7
|
+
| user_id_part_1 | TEXT |
|
|
8
|
+
| user_id_part_2 | TEXT |
|
|
9
|
+
| name | TEXT |
|
|
10
|
+
And only the following rows in the "dim_contact" database table:
|
|
11
|
+
| contact_key (i) | user_id_part_1 | user_id_part_2 | name |
|
|
12
|
+
| 11 | 1 | 1 | Alma |
|
|
13
|
+
| 12 | 1 | 2 | Korte |
|
|
14
|
+
| 21 | 2 | 1 | Szilva |
|
|
15
|
+
| 22 | 2 | 2 | Barack |
|
|
16
|
+
And a database table called "fct_purchases" with the following fields:
|
|
17
|
+
| field_name | field_type |
|
|
18
|
+
| contact_key | INTEGER |
|
|
19
|
+
| amount | TEXT |
|
|
20
|
+
And a "purchases.csv" data file containing:
|
|
21
|
+
"""
|
|
22
|
+
user_id_1,user_id_2,amount
|
|
23
|
+
1,1,100
|
|
24
|
+
1,2,200
|
|
25
|
+
2,1,300
|
|
26
|
+
2,2,400
|
|
27
|
+
3,1,500
|
|
28
|
+
"""
|
|
29
|
+
And the following definition:
|
|
30
|
+
"""
|
|
31
|
+
source :purchases do
|
|
32
|
+
field :user_id_1, String
|
|
33
|
+
field :user_id_2, String
|
|
34
|
+
field :amount, String
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
source :transformed_purchases do
|
|
38
|
+
field :contact_key, Integer
|
|
39
|
+
field :amount, String
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
transform :purchases => :transformed_purchases do |record|
|
|
43
|
+
record[:contact_key] = lookup :contact_key,
|
|
44
|
+
from_table: :dim_contact,
|
|
45
|
+
match: { :user_id_part_1 => record[:user_id_1], :user_id_part_2 => record[:user_id_2] },
|
|
46
|
+
if_not_found_then: -1
|
|
47
|
+
output record
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
import :transformed_purchases do
|
|
51
|
+
into :fct_purchases
|
|
52
|
+
put :contact_key => :contact_key
|
|
53
|
+
put :amount => :amount
|
|
54
|
+
end
|
|
55
|
+
"""
|
|
56
|
+
When I execute the definition
|
|
57
|
+
Then the process should exit successfully
|
|
58
|
+
And the "fct_purchases" table should contain:
|
|
59
|
+
| contact_key (i) | amount |
|
|
60
|
+
| 11 | 100 |
|
|
61
|
+
| 12 | 200 |
|
|
62
|
+
| 21 | 300 |
|
|
63
|
+
| 22 | 400 |
|
|
64
|
+
| -1 | 500 |
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
Feature: Remove source files
|
|
2
|
+
|
|
3
|
+
Scenario:
|
|
4
|
+
Given a "forms.csv" data file containing:
|
|
5
|
+
"""
|
|
6
|
+
id,name
|
|
7
|
+
1,Landing form
|
|
8
|
+
2,Other form
|
|
9
|
+
"""
|
|
10
|
+
And a "contacts_extract.csv" data file containing:
|
|
11
|
+
"""
|
|
12
|
+
id,created,form_id
|
|
13
|
+
1,2001-01-01,1
|
|
14
|
+
2,2002-02-02,2
|
|
15
|
+
3,2003-03-03,1
|
|
16
|
+
"""
|
|
17
|
+
And the following definition:
|
|
18
|
+
"""
|
|
19
|
+
source :forms do
|
|
20
|
+
field :id, Integer
|
|
21
|
+
field :name, String
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
source :contacts_extract do
|
|
25
|
+
field :id, Integer
|
|
26
|
+
field :created, String
|
|
27
|
+
field :form_id, Integer
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
source :contacts do
|
|
31
|
+
field :id, Integer
|
|
32
|
+
field :created, String
|
|
33
|
+
field :form, String
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
form_mapping = {}
|
|
37
|
+
|
|
38
|
+
read :forms do |record|
|
|
39
|
+
form_mapping[record[:id]] = record[:name]
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
transform :contacts_extract => :contacts do |record|
|
|
43
|
+
record[:form] = form_mapping[record[:form_id]]
|
|
44
|
+
output record
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
"""
|
|
48
|
+
When I execute the definition
|
|
49
|
+
Then the process should exit successfully
|
|
50
|
+
And there should be a "contacts.csv" data file in the upload directory containing:
|
|
51
|
+
"""
|
|
52
|
+
id,created,form
|
|
53
|
+
1,2001-01-01,Landing form
|
|
54
|
+
2,2002-02-02,Other form
|
|
55
|
+
3,2003-03-03,Landing form
|
|
56
|
+
"""
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
Feature: Remove source files
|
|
2
|
+
|
|
3
|
+
Scenario:
|
|
4
|
+
Given a "contacts_extract_1.csv" data file containing:
|
|
5
|
+
"""
|
|
6
|
+
"""
|
|
7
|
+
And a "contacts_extract_2.csv" data file containing:
|
|
8
|
+
"""
|
|
9
|
+
"""
|
|
10
|
+
And a "clicks_extract_1.csv" data file containing:
|
|
11
|
+
"""
|
|
12
|
+
"""
|
|
13
|
+
And a "products.csv" data file containing:
|
|
14
|
+
"""
|
|
15
|
+
"""
|
|
16
|
+
And the following definition:
|
|
17
|
+
"""
|
|
18
|
+
source :contacts_extract do
|
|
19
|
+
file "contacts_extract_*.csv"
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
source :clicks_extract do
|
|
23
|
+
file "clicks_extract_*.csv"
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
source :products do
|
|
27
|
+
file "products.csv"
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
source :products_transformed do end
|
|
31
|
+
|
|
32
|
+
transform :products => :products_transformed do |record|
|
|
33
|
+
output record
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
remove :contacts_extract, :clicks_extract
|
|
37
|
+
"""
|
|
38
|
+
When I execute the definition
|
|
39
|
+
Then the process should exit successfully
|
|
40
|
+
And the upload directory should contain the following files:
|
|
41
|
+
| filename |
|
|
42
|
+
| definition.rb |
|
|
43
|
+
| products.csv |
|
|
44
|
+
| products_transformed.csv |
|