cranium 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (132) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +21 -0
  3. data/.ruby-version +1 -0
  4. data/Gemfile +4 -0
  5. data/LICENSE.txt +22 -0
  6. data/README.md +29 -0
  7. data/Rakefile +3 -0
  8. data/Vagrantfile +24 -0
  9. data/bin/cranium +9 -0
  10. data/config/cucumber.yml +9 -0
  11. data/cranium.gemspec +26 -0
  12. data/db/setup.sql +8 -0
  13. data/docker-compose.yml +8 -0
  14. data/examples/config.rb +14 -0
  15. data/examples/deduplication.rb +27 -0
  16. data/examples/import_csv_with_field_lookup_inserting_new_dimension_keys.rb +26 -0
  17. data/examples/incremental_extract.rb +17 -0
  18. data/examples/lookup_with_multiple_fields.rb +25 -0
  19. data/features/archive.feature +49 -0
  20. data/features/extract/incremental_extract.feature +56 -0
  21. data/features/extract/simple_extract.feature +85 -0
  22. data/features/import/import_csv_to_database_as_delta.feature +38 -0
  23. data/features/import/import_csv_to_database_with_delete_insert_merging.feature +51 -0
  24. data/features/import/import_csv_to_database_with_truncate_insert.feature +49 -0
  25. data/features/import/import_csv_to_database_with_update_merging.feature +46 -0
  26. data/features/import/import_csv_with_always_inserting_new_dimension_keys.feature +137 -0
  27. data/features/import/import_csv_with_field_lookup_inserting_new_dimension_keys.feature +62 -0
  28. data/features/import/import_csv_with_field_lookup_transformation.feature +125 -0
  29. data/features/import/import_csv_with_transformation.feature +55 -0
  30. data/features/import/import_multiple_csv_files_without_transformations.feature +44 -0
  31. data/features/import/import_with_load_id_from_sequence.feature +53 -0
  32. data/features/import/import_with_lookup_from_multiple_fields.feature +64 -0
  33. data/features/read.feature +56 -0
  34. data/features/remove.feature +44 -0
  35. data/features/restore_database_connection.feature +55 -0
  36. data/features/step_definitions/database_table_steps.rb +40 -0
  37. data/features/step_definitions/definition_steps.rb +3 -0
  38. data/features/step_definitions/execution_steps.rb +23 -0
  39. data/features/step_definitions/file_steps.rb +39 -0
  40. data/features/support/class_extensions.rb +24 -0
  41. data/features/support/env.rb +27 -0
  42. data/features/support/randomize.rb +22 -0
  43. data/features/support/stop_on_first_error.rb +5 -0
  44. data/features/transform/deduplication.feature +37 -0
  45. data/features/transform/empty_transformation.feature +72 -0
  46. data/features/transform/join.feature +180 -0
  47. data/features/transform/join_multiple_files_into_one_output_file.feature +46 -0
  48. data/features/transform/output_rows.feature +70 -0
  49. data/features/transform/projection.feature +34 -0
  50. data/features/transform/raw_ruby_transformation.feature +69 -0
  51. data/features/transform/split_field.feature +39 -0
  52. data/lib/cranium/application.rb +104 -0
  53. data/lib/cranium/archiver.rb +36 -0
  54. data/lib/cranium/attribute_dsl.rb +43 -0
  55. data/lib/cranium/command_line_options.rb +27 -0
  56. data/lib/cranium/configuration.rb +33 -0
  57. data/lib/cranium/data_importer.rb +35 -0
  58. data/lib/cranium/data_reader.rb +48 -0
  59. data/lib/cranium/data_transformer.rb +126 -0
  60. data/lib/cranium/database.rb +36 -0
  61. data/lib/cranium/definition_registry.rb +21 -0
  62. data/lib/cranium/dimension_manager.rb +65 -0
  63. data/lib/cranium/dsl/database_definition.rb +23 -0
  64. data/lib/cranium/dsl/extract_definition.rb +28 -0
  65. data/lib/cranium/dsl/import_definition.rb +50 -0
  66. data/lib/cranium/dsl/source_definition.rb +67 -0
  67. data/lib/cranium/dsl.rb +100 -0
  68. data/lib/cranium/extensions/file.rb +7 -0
  69. data/lib/cranium/extensions/sequel_greenplum.rb +30 -0
  70. data/lib/cranium/external_table.rb +75 -0
  71. data/lib/cranium/extract/data_extractor.rb +11 -0
  72. data/lib/cranium/extract/storage.rb +57 -0
  73. data/lib/cranium/extract/strategy/base.rb +27 -0
  74. data/lib/cranium/extract/strategy/incremental.rb +16 -0
  75. data/lib/cranium/extract/strategy/simple.rb +9 -0
  76. data/lib/cranium/extract/strategy.rb +7 -0
  77. data/lib/cranium/extract.rb +7 -0
  78. data/lib/cranium/import_strategy/base.rb +55 -0
  79. data/lib/cranium/import_strategy/delete_insert.rb +40 -0
  80. data/lib/cranium/import_strategy/delta.rb +8 -0
  81. data/lib/cranium/import_strategy/merge.rb +50 -0
  82. data/lib/cranium/import_strategy/truncate_insert.rb +19 -0
  83. data/lib/cranium/import_strategy.rb +9 -0
  84. data/lib/cranium/logging.rb +15 -0
  85. data/lib/cranium/profiling.rb +13 -0
  86. data/lib/cranium/progress_output.rb +37 -0
  87. data/lib/cranium/sequel/hash.rb +32 -0
  88. data/lib/cranium/sequel.rb +5 -0
  89. data/lib/cranium/source_registry.rb +21 -0
  90. data/lib/cranium/test_framework/cucumber_table.rb +140 -0
  91. data/lib/cranium/test_framework/database_entity.rb +29 -0
  92. data/lib/cranium/test_framework/database_sequence.rb +16 -0
  93. data/lib/cranium/test_framework/database_table.rb +33 -0
  94. data/lib/cranium/test_framework/upload_directory.rb +39 -0
  95. data/lib/cranium/test_framework/world.rb +66 -0
  96. data/lib/cranium/test_framework.rb +10 -0
  97. data/lib/cranium/transformation/duplication_index.rb +42 -0
  98. data/lib/cranium/transformation/index.rb +83 -0
  99. data/lib/cranium/transformation/join.rb +141 -0
  100. data/lib/cranium/transformation/sequence.rb +42 -0
  101. data/lib/cranium/transformation.rb +8 -0
  102. data/lib/cranium/transformation_record.rb +45 -0
  103. data/lib/cranium.rb +57 -0
  104. data/rake/test.rake +31 -0
  105. data/spec/cranium/application_spec.rb +166 -0
  106. data/spec/cranium/archiver_spec.rb +44 -0
  107. data/spec/cranium/command_line_options_spec.rb +32 -0
  108. data/spec/cranium/configuration_spec.rb +31 -0
  109. data/spec/cranium/data_importer_spec.rb +55 -0
  110. data/spec/cranium/data_transformer_spec.rb +16 -0
  111. data/spec/cranium/database_spec.rb +69 -0
  112. data/spec/cranium/definition_registry_spec.rb +45 -0
  113. data/spec/cranium/dimension_manager_spec.rb +63 -0
  114. data/spec/cranium/dsl/database_definition_spec.rb +23 -0
  115. data/spec/cranium/dsl/extract_definition_spec.rb +76 -0
  116. data/spec/cranium/dsl/import_definition_spec.rb +153 -0
  117. data/spec/cranium/dsl/source_definition_spec.rb +84 -0
  118. data/spec/cranium/dsl_spec.rb +119 -0
  119. data/spec/cranium/external_table_spec.rb +71 -0
  120. data/spec/cranium/extract/storage_spec.rb +125 -0
  121. data/spec/cranium/logging_spec.rb +37 -0
  122. data/spec/cranium/sequel/hash_spec.rb +56 -0
  123. data/spec/cranium/source_registry_spec.rb +31 -0
  124. data/spec/cranium/test_framework/cucumber_table_spec.rb +144 -0
  125. data/spec/cranium/transformation/duplication_index_spec.rb +75 -0
  126. data/spec/cranium/transformation/index_spec.rb +178 -0
  127. data/spec/cranium/transformation/join_spec.rb +43 -0
  128. data/spec/cranium/transformation/sequence_spec.rb +83 -0
  129. data/spec/cranium/transformation_record_spec.rb +78 -0
  130. data/spec/cranium_spec.rb +53 -0
  131. data/spec/spec_helper.rb +1 -0
  132. metadata +362 -0
@@ -0,0 +1,46 @@
1
+ Feature: Import a CSV file into the database with merging
2
+
3
+ The merge_on property can be used to specify an id field that is used to detect duplicates while importing.
4
+ Duplicates are updated and new items are added.
5
+
6
+ Scenario: Successful import with merged items
7
+ Given a database table called "dim_product" with the following fields:
8
+ | field_name | field_type |
9
+ | item | TEXT |
10
+ | title | TEXT |
11
+ | description | TEXT |
12
+ And only the following rows in the "dim_product" database table:
13
+ | item | title | description |
14
+ | JNI-123 | Just a product name | Very interesting description |
15
+ | CDI-234 | Another product name | Yet another cool description |
16
+ And a "products.csv" data file containing:
17
+ """
18
+ id,name,description
19
+ JNI-123,Just a product name,"Very interesting description, updated"
20
+ CDI-234,Updated product name,Yet another cool description
21
+ KLM-987,Inserted product name,This is the best product
22
+ """
23
+ And the following definition:
24
+ """
25
+ source :products do
26
+ field :id, String
27
+ field :name, String
28
+ field :description, String
29
+ end
30
+
31
+ import :products do
32
+ into :dim_product
33
+ put :id => :item
34
+ put :name => :title
35
+ put :description => :description
36
+
37
+ merge_on :id => :item
38
+ end
39
+ """
40
+ When I execute the definition
41
+ Then the process should exit successfully
42
+ And the "dim_product" table should contain:
43
+ | item | title | description |
44
+ | JNI-123 | Just a product name | Very interesting description, updated |
45
+ | CDI-234 | Updated product name | Yet another cool description |
46
+ | KLM-987 | Inserted product name | This is the best product |
@@ -0,0 +1,137 @@
1
+ Feature: Import a CSV file into the database with new dimension values always inserted
2
+
3
+ Scenario: Successful import
4
+ Given a database table called "dim_contact" with the following fields:
5
+ | field_name | field_type |
6
+ | contact_key | SERIAL |
7
+ | user_id | TEXT |
8
+ | name | TEXT |
9
+ And only the following rows in the "dim_contact" database table:
10
+ | contact_key (i) | user_id | name |
11
+ | 10 | 1 | Alma |
12
+ And the current value in sequence "dim_contact_contact_key_seq" is 10
13
+ And a database table called "fct_purchases" with the following fields:
14
+ | field_name | field_type |
15
+ | contact_key | INTEGER |
16
+ | amount | TEXT |
17
+ And a "purchases.csv" data file containing:
18
+ """
19
+ user_id,amount
20
+ 1,100
21
+ NA,200
22
+ NA,300
23
+ """
24
+ And the following definition:
25
+ """
26
+ source :purchases do
27
+ field :user_id, String
28
+ field :amount, String
29
+ end
30
+
31
+ source :transformed_purchases do
32
+ field :contact_key, Integer
33
+ field :amount, String
34
+ end
35
+
36
+ transform :purchases => :transformed_purchases do |record|
37
+ record[:contact_key] = insert :contact_key,
38
+ table: :dim_contact,
39
+ record: {contact_key: next_value_in_sequence("dim_contact_contact_key_seq"), user_id: record[:user_id], name: "Unknown contact #{record[:user_id]}"}
40
+ output record
41
+ end
42
+
43
+ import :transformed_purchases do
44
+ into :fct_purchases
45
+ put :contact_key
46
+ put :amount
47
+ end
48
+ """
49
+ When I execute the definition
50
+ Then the process should exit successfully
51
+ And the "fct_purchases" table should contain:
52
+ | contact_key (i) | amount |
53
+ | 11 | 100 |
54
+ | 12 | 200 |
55
+ | 13 | 300 |
56
+ And the "dim_contact" table should contain:
57
+ | contact_key (i) | user_id | name |
58
+ | 10 | 1 | Alma |
59
+ | 11 | 1 | Unknown contact 1 |
60
+ | 12 | NA | Unknown contact NA |
61
+ | 13 | NA | Unknown contact NA |
62
+
63
+
64
+ Scenario: Example use case for the insert
65
+ If purchases made by a predefined contact identifier (NA in this case) do not look for it insert .
66
+ Otherwise use lookup to find or create that contact
67
+
68
+ Given a database table called "dim_contact" with the following fields:
69
+ | field_name | field_type |
70
+ | contact_key | SERIAL |
71
+ | user_id | TEXT |
72
+ | name | TEXT |
73
+ And only the following rows in the "dim_contact" database table:
74
+ | contact_key (i) | user_id | name |
75
+ | 10 | 1 | Alma |
76
+ And the current value in sequence "dim_contact_contact_key_seq" is 10
77
+ And a database table called "fct_purchases" with the following fields:
78
+ | field_name | field_type |
79
+ | contact_key | INTEGER |
80
+ | amount | TEXT |
81
+ And a "purchases.csv" data file containing:
82
+ """
83
+ user_id,amount
84
+ 1,100
85
+ NA,200
86
+ NA,300
87
+ 2,400
88
+ 2,500
89
+ """
90
+ And the following definition:
91
+ """
92
+ source :purchases do
93
+ field :user_id, String
94
+ field :amount, String
95
+ end
96
+
97
+ source :transformed_purchases do
98
+ field :contact_key, Integer
99
+ field :amount, String
100
+ end
101
+
102
+ transform :purchases => :transformed_purchases do |record|
103
+ if record[:user_id] == 'NA'
104
+ record[:contact_key] = insert :contact_key,
105
+ table: :dim_contact,
106
+ record: {contact_key: next_value_in_sequence("dim_contact_contact_key_seq"), user_id: record[:user_id], name: "Unknown contact #{record[:user_id]}"}
107
+ else
108
+ record[:contact_key] = lookup :contact_key,
109
+ from_table: :dim_contact,
110
+ match_column: :user_id,
111
+ to_value: record[:user_id],
112
+ if_not_found_then_insert: {contact_key: next_value_in_sequence("dim_contact_contact_key_seq"), name: "Unknown contact #{record[:user_id]}"}
113
+ end
114
+ output record
115
+ end
116
+
117
+ import :transformed_purchases do
118
+ into :fct_purchases
119
+ put :contact_key
120
+ put :amount
121
+ end
122
+ """
123
+ When I execute the definition
124
+ Then the process should exit successfully
125
+ And the "fct_purchases" table should contain:
126
+ | contact_key (i) | amount |
127
+ | 10 | 100 |
128
+ | 11 | 200 |
129
+ | 12 | 300 |
130
+ | 13 | 400 |
131
+ | 13 | 500 |
132
+ And the "dim_contact" table should contain:
133
+ | contact_key (i) | user_id | name |
134
+ | 10 | 1 | Alma |
135
+ | 11 | NA | Unknown contact NA |
136
+ | 12 | NA | Unknown contact NA |
137
+ | 13 | 2 | Unknown contact 2 |
@@ -0,0 +1,62 @@
1
+ Feature: Import a CSV file into the database with new dimension values inserted when not found during lookup
2
+
3
+ Scenario: Successful import
4
+ Given a database table called "dim_contact" with the following fields:
5
+ | field_name | field_type |
6
+ | contact_key | SERIAL |
7
+ | user_id | TEXT |
8
+ | name | TEXT |
9
+ And only the following rows in the "dim_contact" database table:
10
+ | contact_key (i) | user_id | name |
11
+ | 10 | 1 | Alma |
12
+ And the current value in sequence "dim_contact_contact_key_seq" is 10
13
+ And a database table called "fct_purchases" with the following fields:
14
+ | field_name | field_type |
15
+ | contact_key | INTEGER |
16
+ | amount | TEXT |
17
+ And a "purchases.csv" data file containing:
18
+ """
19
+ user_id,amount
20
+ 1,100
21
+ 2,200
22
+ 2,300
23
+ """
24
+ And the following definition:
25
+ """
26
+ source :purchases do
27
+ field :user_id, String
28
+ field :amount, String
29
+ end
30
+
31
+ source :transformed_purchases do
32
+ field :contact_key, Integer
33
+ field :amount, String
34
+ end
35
+
36
+ transform :purchases => :transformed_purchases do |record|
37
+ record[:contact_key] = lookup :contact_key,
38
+ from_table: :dim_contact,
39
+ match_column: :user_id,
40
+ to_value: record[:user_id],
41
+ if_not_found_then_insert: { contact_key: next_value_in_sequence("dim_contact_contact_key_seq"),
42
+ name: "Unknown contact #{record[:user_id]}" }
43
+ output record
44
+ end
45
+
46
+ import :transformed_purchases do
47
+ into :fct_purchases
48
+ put :contact_key
49
+ put :amount
50
+ end
51
+ """
52
+ When I execute the definition
53
+ Then the process should exit successfully
54
+ And the "fct_purchases" table should contain:
55
+ | contact_key (i) | amount |
56
+ | 10 | 100 |
57
+ | 11 | 200 |
58
+ | 11 | 300 |
59
+ And the "dim_contact" table should contain:
60
+ | contact_key (i) | user_id | name |
61
+ | 10 | 1 | Alma |
62
+ | 11 | 2 | Unknown contact 2 |
@@ -0,0 +1,125 @@
1
+ Feature: Import a CSV file into the database with IDs looked up from the database
2
+
3
+ Scenario: Successful import
4
+ Given a database table called "dim_contact" with the following fields:
5
+ | field_name | field_type |
6
+ | contact_key | SERIAL |
7
+ | user_id | TEXT |
8
+ | name | TEXT |
9
+ And only the following rows in the "dim_contact" database table:
10
+ | contact_key (i) | user_id | name |
11
+ | 10 | 1 | Alma |
12
+ | 20 | 2 | Korte |
13
+ And a database table called "fct_purchases" with the following fields:
14
+ | field_name | field_type |
15
+ | contact_key | INTEGER |
16
+ | amount | TEXT |
17
+ And a "purchases.csv" data file containing:
18
+ """
19
+ user_id,amount
20
+ 1,100
21
+ 2,200
22
+ 3,300
23
+ """
24
+ And the following definition:
25
+ """
26
+ source :purchases do
27
+ field :user_id, String
28
+ field :amount, String
29
+ end
30
+
31
+ source :transformed_purchases do
32
+ field :contact_key, Integer
33
+ field :amount, String
34
+ end
35
+
36
+ transform :purchases => :transformed_purchases do |record|
37
+ record[:contact_key] = lookup :contact_key,
38
+ from_table: :dim_contact,
39
+ match_column: :user_id,
40
+ to_value: record[:user_id],
41
+ if_not_found_then: -1
42
+ output record
43
+ end
44
+
45
+ import :transformed_purchases do
46
+ into :fct_purchases
47
+ put :contact_key => :contact_key
48
+ put :amount => :amount
49
+ end
50
+ """
51
+ When I execute the definition
52
+ Then the process should exit successfully
53
+ And the "fct_purchases" table should contain:
54
+ | contact_key (i) | amount |
55
+ | 10 | 100 |
56
+ | 20 | 200 |
57
+ | -1 | 300 |
58
+
59
+
60
+ Scenario: Multiple fields looked up by one key
61
+ Given a database table called "dim_contact" with the following fields:
62
+ | field_name | field_type |
63
+ | contact_key_1 | INTEGER |
64
+ | contact_key_2 | INTEGER |
65
+ | user_id | TEXT |
66
+ | name | TEXT |
67
+ And only the following rows in the "dim_contact" database table:
68
+ | contact_key_1 (i) | contact_key_2 (i) | user_id | name |
69
+ | 10 | 100 | 1 | Alma |
70
+ | 20 | 200 | 2 | Korte |
71
+ And a database table called "fct_purchases" with the following fields:
72
+ | field_name | field_type |
73
+ | contact_key_1 | INTEGER |
74
+ | contact_key_2 | INTEGER |
75
+ | amount | TEXT |
76
+ And a "purchases.csv" data file containing:
77
+ """
78
+ user_id,amount
79
+ 1,100
80
+ 2,200
81
+ 3,300
82
+ """
83
+ And the following definition:
84
+ """
85
+ source :purchases do
86
+ field :user_id, String
87
+ field :amount, String
88
+ end
89
+
90
+ source :transformed_purchases do
91
+ field :contact_key_1, Integer
92
+ field :contact_key_2, Integer
93
+ field :amount, String
94
+ end
95
+
96
+ transform :purchases => :transformed_purchases do |record|
97
+ record[:contact_key_1] = lookup :contact_key_1,
98
+ from_table: :dim_contact,
99
+ match_column: :user_id,
100
+ to_value: record[:user_id],
101
+ if_not_found_then: -1
102
+
103
+ record[:contact_key_2] = lookup :contact_key_2,
104
+ from_table: :dim_contact,
105
+ match_column: :user_id,
106
+ to_value: record[:user_id],
107
+ if_not_found_then: -2
108
+
109
+ output record
110
+ end
111
+
112
+ import :transformed_purchases do
113
+ into :fct_purchases
114
+ put :contact_key_1 => :contact_key_1
115
+ put :contact_key_2 => :contact_key_2
116
+ put :amount => :amount
117
+ end
118
+ """
119
+ When I execute the definition
120
+ Then the process should exit successfully
121
+ And the "fct_purchases" table should contain:
122
+ | contact_key_1 (i) | contact_key_2 (i) | amount |
123
+ | 10 | 100 | 100 |
124
+ | 20 | 200 | 200 |
125
+ | -1 | -2 | 300 |
@@ -0,0 +1,55 @@
1
+ Feature: Import a CSV file into the database with a split transformation
2
+
3
+ Scenario: Successful import
4
+ Given a database table called "dim_product" with the following fields:
5
+ | field_name | field_type |
6
+ | item | TEXT |
7
+ | title | TEXT |
8
+ | category1 | TEXT |
9
+ | category2 | TEXT |
10
+ | category3 | TEXT |
11
+ And a "products.csv" data file containing:
12
+ """
13
+ id,name,category
14
+ JNI-123,Just a product name,Main category > Subcategory > Sub-subcategory > Ultra-subcategory
15
+ CDI-234,Another product name,Smart Insight > Cool stuff | 3dim > 2dim > 1dim
16
+ """
17
+ And the following definition:
18
+ """
19
+ source :products do
20
+ encoding "UTF-8"
21
+ delimiter ','
22
+ field :id, String
23
+ field :name, String
24
+ field :category, String
25
+ end
26
+
27
+ source :transformed_products do
28
+ field :id, String
29
+ field :name, String
30
+ field :main_category, String
31
+ field :sub_category, String
32
+ field :department, String
33
+ end
34
+
35
+ transform :products => :transformed_products do |record|
36
+ record.split_field :category, into: [:category], by: "|"
37
+ record.split_field :category, into: [:main_category, :sub_category, :department], by: ">"
38
+ output record
39
+ end
40
+
41
+ import :transformed_products do
42
+ into :dim_product
43
+ put :id => :item
44
+ put :name => :title
45
+ put :main_category => :category1
46
+ put :sub_category => :category2
47
+ put :department => :category3
48
+ end
49
+ """
50
+ When I execute the definition
51
+ Then the process should exit successfully
52
+ And the "dim_product" table should contain:
53
+ | item | title | category1 | category2 | category3 |
54
+ | JNI-123 | Just a product name | Main category | Subcategory | Sub-subcategory |
55
+ | CDI-234 | Another product name | Smart Insight | Cool stuff | Cool stuff |
@@ -0,0 +1,44 @@
1
+ Feature: Import multiple CSV files into the database without any transformations
2
+
3
+ Scenario: Successful import
4
+ Given a database table called "dim_product" with the following fields:
5
+ | field_name | field_type |
6
+ | item | TEXT |
7
+ | title | TEXT |
8
+ | category | TEXT |
9
+ And a "products1.csv" data file containing:
10
+ """
11
+ id,name,category
12
+ PROD-1,product name 1,Main category > Subcategory > Sub-subcategory
13
+ PROD-2,product name 2,Main category > Subcategory > Sub-subcategory
14
+ """
15
+ And a "products2.csv" data file containing:
16
+ """
17
+ id,name,category
18
+ PROD-3,product name 3,Main category > Subcategory > Sub-subcategory
19
+ PROD-4,product name 4,Main category > Subcategory > Sub-subcategory
20
+ """
21
+ And the following definition:
22
+ """
23
+ source :products do
24
+ file "products*.csv"
25
+ field :id, String
26
+ field :name, String
27
+ field :category, String
28
+ end
29
+
30
+ import :products do
31
+ into :dim_product
32
+ put :id => :item
33
+ put :name => :title
34
+ put :category => :category
35
+ end
36
+ """
37
+ When I execute the definition
38
+ Then the process should exit successfully
39
+ And the "dim_product" table should contain:
40
+ | item | title | category |
41
+ | PROD-1 | product name 1 | Main category > Subcategory > Sub-subcategory |
42
+ | PROD-2 | product name 2 | Main category > Subcategory > Sub-subcategory |
43
+ | PROD-3 | product name 3 | Main category > Subcategory > Sub-subcategory |
44
+ | PROD-4 | product name 4 | Main category > Subcategory > Sub-subcategory |
@@ -0,0 +1,53 @@
1
+ Feature: Import data and assign a load id (audit information) from a sequence to all records
2
+
3
+ Scenario: Successful import
4
+ Given a database table called "dim_product" with the following fields:
5
+ | field_name | field_type |
6
+ | load_id | INTEGER |
7
+ | item | TEXT |
8
+ | title | TEXT |
9
+ And a sequence called "some_sequence" starting from 33
10
+ And a "products.csv" data file containing:
11
+ """
12
+ id,name,category
13
+ JNI-123,Just a product name,Main category > Subcategory > Sub-subcategory > Ultra-subcategory
14
+ CDI-234,Another product name,Smart Insight > Cool stuff | 3dim > 2dim > 1dim
15
+ """
16
+ And the following definition:
17
+ """
18
+ LOAD_ID = sequence("some_sequence").next_value
19
+
20
+ source :products do
21
+ encoding "UTF-8"
22
+ delimiter ','
23
+
24
+ field :id, String
25
+ field :name, String
26
+ end
27
+
28
+ source :transformed_products do
29
+ field :load_id, Integer
30
+
31
+ field :id, String
32
+ field :name, String
33
+ end
34
+
35
+ transform :products => :transformed_products do |record|
36
+ record[:load_id] = LOAD_ID
37
+ output record
38
+ end
39
+
40
+ import :transformed_products do
41
+ into :dim_product
42
+
43
+ put :load_id
44
+ put :id => :item
45
+ put :name => :title
46
+ end
47
+ """
48
+ When I execute the definition
49
+ Then the process should exit successfully
50
+ And the "dim_product" table should contain:
51
+ | load_id (i) | item | title |
52
+ | 34 | JNI-123 | Just a product name |
53
+ | 34 | CDI-234 | Another product name |
@@ -0,0 +1,64 @@
1
+ Feature: Import a CSV file into the database with IDs looked up from multiple columns of the database
2
+
3
+ Scenario: Successful import
4
+ Given a database table called "dim_contact" with the following fields:
5
+ | field_name | field_type |
6
+ | contact_key | SERIAL |
7
+ | user_id_part_1 | TEXT |
8
+ | user_id_part_2 | TEXT |
9
+ | name | TEXT |
10
+ And only the following rows in the "dim_contact" database table:
11
+ | contact_key (i) | user_id_part_1 | user_id_part_2 | name |
12
+ | 11 | 1 | 1 | Alma |
13
+ | 12 | 1 | 2 | Korte |
14
+ | 21 | 2 | 1 | Szilva |
15
+ | 22 | 2 | 2 | Barack |
16
+ And a database table called "fct_purchases" with the following fields:
17
+ | field_name | field_type |
18
+ | contact_key | INTEGER |
19
+ | amount | TEXT |
20
+ And a "purchases.csv" data file containing:
21
+ """
22
+ user_id_1,user_id_2,amount
23
+ 1,1,100
24
+ 1,2,200
25
+ 2,1,300
26
+ 2,2,400
27
+ 3,1,500
28
+ """
29
+ And the following definition:
30
+ """
31
+ source :purchases do
32
+ field :user_id_1, String
33
+ field :user_id_2, String
34
+ field :amount, String
35
+ end
36
+
37
+ source :transformed_purchases do
38
+ field :contact_key, Integer
39
+ field :amount, String
40
+ end
41
+
42
+ transform :purchases => :transformed_purchases do |record|
43
+ record[:contact_key] = lookup :contact_key,
44
+ from_table: :dim_contact,
45
+ match: { :user_id_part_1 => record[:user_id_1], :user_id_part_2 => record[:user_id_2] },
46
+ if_not_found_then: -1
47
+ output record
48
+ end
49
+
50
+ import :transformed_purchases do
51
+ into :fct_purchases
52
+ put :contact_key => :contact_key
53
+ put :amount => :amount
54
+ end
55
+ """
56
+ When I execute the definition
57
+ Then the process should exit successfully
58
+ And the "fct_purchases" table should contain:
59
+ | contact_key (i) | amount |
60
+ | 11 | 100 |
61
+ | 12 | 200 |
62
+ | 21 | 300 |
63
+ | 22 | 400 |
64
+ | -1 | 500 |
@@ -0,0 +1,56 @@
1
+ Feature: Remove source files
2
+
3
+ Scenario:
4
+ Given a "forms.csv" data file containing:
5
+ """
6
+ id,name
7
+ 1,Landing form
8
+ 2,Other form
9
+ """
10
+ And a "contacts_extract.csv" data file containing:
11
+ """
12
+ id,created,form_id
13
+ 1,2001-01-01,1
14
+ 2,2002-02-02,2
15
+ 3,2003-03-03,1
16
+ """
17
+ And the following definition:
18
+ """
19
+ source :forms do
20
+ field :id, Integer
21
+ field :name, String
22
+ end
23
+
24
+ source :contacts_extract do
25
+ field :id, Integer
26
+ field :created, String
27
+ field :form_id, Integer
28
+ end
29
+
30
+ source :contacts do
31
+ field :id, Integer
32
+ field :created, String
33
+ field :form, String
34
+ end
35
+
36
+ form_mapping = {}
37
+
38
+ read :forms do |record|
39
+ form_mapping[record[:id]] = record[:name]
40
+ end
41
+
42
+ transform :contacts_extract => :contacts do |record|
43
+ record[:form] = form_mapping[record[:form_id]]
44
+ output record
45
+ end
46
+
47
+ """
48
+ When I execute the definition
49
+ Then the process should exit successfully
50
+ And there should be a "contacts.csv" data file in the upload directory containing:
51
+ """
52
+ id,created,form
53
+ 1,2001-01-01,Landing form
54
+ 2,2002-02-02,Other form
55
+ 3,2003-03-03,Landing form
56
+ """
@@ -0,0 +1,44 @@
1
+ Feature: Remove source files
2
+
3
+ Scenario:
4
+ Given a "contacts_extract_1.csv" data file containing:
5
+ """
6
+ """
7
+ And a "contacts_extract_2.csv" data file containing:
8
+ """
9
+ """
10
+ And a "clicks_extract_1.csv" data file containing:
11
+ """
12
+ """
13
+ And a "products.csv" data file containing:
14
+ """
15
+ """
16
+ And the following definition:
17
+ """
18
+ source :contacts_extract do
19
+ file "contacts_extract_*.csv"
20
+ end
21
+
22
+ source :clicks_extract do
23
+ file "clicks_extract_*.csv"
24
+ end
25
+
26
+ source :products do
27
+ file "products.csv"
28
+ end
29
+
30
+ source :products_transformed do end
31
+
32
+ transform :products => :products_transformed do |record|
33
+ output record
34
+ end
35
+
36
+ remove :contacts_extract, :clicks_extract
37
+ """
38
+ When I execute the definition
39
+ Then the process should exit successfully
40
+ And the upload directory should contain the following files:
41
+ | filename |
42
+ | definition.rb |
43
+ | products.csv |
44
+ | products_transformed.csv |