cranium 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +21 -0
  3. data/.ruby-version +1 -0
  4. data/Gemfile +4 -0
  5. data/LICENSE.txt +22 -0
  6. data/README.md +29 -0
  7. data/Rakefile +3 -0
  8. data/Vagrantfile +24 -0
  9. data/bin/cranium +9 -0
  10. data/config/cucumber.yml +9 -0
  11. data/cranium.gemspec +26 -0
  12. data/db/setup.sql +8 -0
  13. data/docker-compose.yml +8 -0
  14. data/examples/config.rb +14 -0
  15. data/examples/deduplication.rb +27 -0
  16. data/examples/import_csv_with_field_lookup_inserting_new_dimension_keys.rb +26 -0
  17. data/examples/incremental_extract.rb +17 -0
  18. data/examples/lookup_with_multiple_fields.rb +25 -0
  19. data/features/archive.feature +49 -0
  20. data/features/extract/incremental_extract.feature +56 -0
  21. data/features/extract/simple_extract.feature +85 -0
  22. data/features/import/import_csv_to_database_as_delta.feature +38 -0
  23. data/features/import/import_csv_to_database_with_delete_insert_merging.feature +51 -0
  24. data/features/import/import_csv_to_database_with_truncate_insert.feature +49 -0
  25. data/features/import/import_csv_to_database_with_update_merging.feature +46 -0
  26. data/features/import/import_csv_with_always_inserting_new_dimension_keys.feature +137 -0
  27. data/features/import/import_csv_with_field_lookup_inserting_new_dimension_keys.feature +62 -0
  28. data/features/import/import_csv_with_field_lookup_transformation.feature +125 -0
  29. data/features/import/import_csv_with_transformation.feature +55 -0
  30. data/features/import/import_multiple_csv_files_without_transformations.feature +44 -0
  31. data/features/import/import_with_load_id_from_sequence.feature +53 -0
  32. data/features/import/import_with_lookup_from_multiple_fields.feature +64 -0
  33. data/features/read.feature +56 -0
  34. data/features/remove.feature +44 -0
  35. data/features/restore_database_connection.feature +55 -0
  36. data/features/step_definitions/database_table_steps.rb +40 -0
  37. data/features/step_definitions/definition_steps.rb +3 -0
  38. data/features/step_definitions/execution_steps.rb +23 -0
  39. data/features/step_definitions/file_steps.rb +39 -0
  40. data/features/support/class_extensions.rb +24 -0
  41. data/features/support/env.rb +27 -0
  42. data/features/support/randomize.rb +22 -0
  43. data/features/support/stop_on_first_error.rb +5 -0
  44. data/features/transform/deduplication.feature +37 -0
  45. data/features/transform/empty_transformation.feature +72 -0
  46. data/features/transform/join.feature +180 -0
  47. data/features/transform/join_multiple_files_into_one_output_file.feature +46 -0
  48. data/features/transform/output_rows.feature +70 -0
  49. data/features/transform/projection.feature +34 -0
  50. data/features/transform/raw_ruby_transformation.feature +69 -0
  51. data/features/transform/split_field.feature +39 -0
  52. data/lib/cranium/application.rb +104 -0
  53. data/lib/cranium/archiver.rb +36 -0
  54. data/lib/cranium/attribute_dsl.rb +43 -0
  55. data/lib/cranium/command_line_options.rb +27 -0
  56. data/lib/cranium/configuration.rb +33 -0
  57. data/lib/cranium/data_importer.rb +35 -0
  58. data/lib/cranium/data_reader.rb +48 -0
  59. data/lib/cranium/data_transformer.rb +126 -0
  60. data/lib/cranium/database.rb +36 -0
  61. data/lib/cranium/definition_registry.rb +21 -0
  62. data/lib/cranium/dimension_manager.rb +65 -0
  63. data/lib/cranium/dsl/database_definition.rb +23 -0
  64. data/lib/cranium/dsl/extract_definition.rb +28 -0
  65. data/lib/cranium/dsl/import_definition.rb +50 -0
  66. data/lib/cranium/dsl/source_definition.rb +67 -0
  67. data/lib/cranium/dsl.rb +100 -0
  68. data/lib/cranium/extensions/file.rb +7 -0
  69. data/lib/cranium/extensions/sequel_greenplum.rb +30 -0
  70. data/lib/cranium/external_table.rb +75 -0
  71. data/lib/cranium/extract/data_extractor.rb +11 -0
  72. data/lib/cranium/extract/storage.rb +57 -0
  73. data/lib/cranium/extract/strategy/base.rb +27 -0
  74. data/lib/cranium/extract/strategy/incremental.rb +16 -0
  75. data/lib/cranium/extract/strategy/simple.rb +9 -0
  76. data/lib/cranium/extract/strategy.rb +7 -0
  77. data/lib/cranium/extract.rb +7 -0
  78. data/lib/cranium/import_strategy/base.rb +55 -0
  79. data/lib/cranium/import_strategy/delete_insert.rb +40 -0
  80. data/lib/cranium/import_strategy/delta.rb +8 -0
  81. data/lib/cranium/import_strategy/merge.rb +50 -0
  82. data/lib/cranium/import_strategy/truncate_insert.rb +19 -0
  83. data/lib/cranium/import_strategy.rb +9 -0
  84. data/lib/cranium/logging.rb +15 -0
  85. data/lib/cranium/profiling.rb +13 -0
  86. data/lib/cranium/progress_output.rb +37 -0
  87. data/lib/cranium/sequel/hash.rb +32 -0
  88. data/lib/cranium/sequel.rb +5 -0
  89. data/lib/cranium/source_registry.rb +21 -0
  90. data/lib/cranium/test_framework/cucumber_table.rb +140 -0
  91. data/lib/cranium/test_framework/database_entity.rb +29 -0
  92. data/lib/cranium/test_framework/database_sequence.rb +16 -0
  93. data/lib/cranium/test_framework/database_table.rb +33 -0
  94. data/lib/cranium/test_framework/upload_directory.rb +39 -0
  95. data/lib/cranium/test_framework/world.rb +66 -0
  96. data/lib/cranium/test_framework.rb +10 -0
  97. data/lib/cranium/transformation/duplication_index.rb +42 -0
  98. data/lib/cranium/transformation/index.rb +83 -0
  99. data/lib/cranium/transformation/join.rb +141 -0
  100. data/lib/cranium/transformation/sequence.rb +42 -0
  101. data/lib/cranium/transformation.rb +8 -0
  102. data/lib/cranium/transformation_record.rb +45 -0
  103. data/lib/cranium.rb +57 -0
  104. data/rake/test.rake +31 -0
  105. data/spec/cranium/application_spec.rb +166 -0
  106. data/spec/cranium/archiver_spec.rb +44 -0
  107. data/spec/cranium/command_line_options_spec.rb +32 -0
  108. data/spec/cranium/configuration_spec.rb +31 -0
  109. data/spec/cranium/data_importer_spec.rb +55 -0
  110. data/spec/cranium/data_transformer_spec.rb +16 -0
  111. data/spec/cranium/database_spec.rb +69 -0
  112. data/spec/cranium/definition_registry_spec.rb +45 -0
  113. data/spec/cranium/dimension_manager_spec.rb +63 -0
  114. data/spec/cranium/dsl/database_definition_spec.rb +23 -0
  115. data/spec/cranium/dsl/extract_definition_spec.rb +76 -0
  116. data/spec/cranium/dsl/import_definition_spec.rb +153 -0
  117. data/spec/cranium/dsl/source_definition_spec.rb +84 -0
  118. data/spec/cranium/dsl_spec.rb +119 -0
  119. data/spec/cranium/external_table_spec.rb +71 -0
  120. data/spec/cranium/extract/storage_spec.rb +125 -0
  121. data/spec/cranium/logging_spec.rb +37 -0
  122. data/spec/cranium/sequel/hash_spec.rb +56 -0
  123. data/spec/cranium/source_registry_spec.rb +31 -0
  124. data/spec/cranium/test_framework/cucumber_table_spec.rb +144 -0
  125. data/spec/cranium/transformation/duplication_index_spec.rb +75 -0
  126. data/spec/cranium/transformation/index_spec.rb +178 -0
  127. data/spec/cranium/transformation/join_spec.rb +43 -0
  128. data/spec/cranium/transformation/sequence_spec.rb +83 -0
  129. data/spec/cranium/transformation_record_spec.rb +78 -0
  130. data/spec/cranium_spec.rb +53 -0
  131. data/spec/spec_helper.rb +1 -0
  132. metadata +362 -0
metadata ADDED
@@ -0,0 +1,362 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: cranium
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.0
5
+ platform: ruby
6
+ authors:
7
+ - Emarsys Technologies
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-04-22 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: pg
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: progressbar
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: sequel
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '4'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '4'
55
+ - !ruby/object:Gem::Dependency
56
+ name: slop
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '3'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '3'
69
+ - !ruby/object:Gem::Dependency
70
+ name: bundler
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '1'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '1'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rake
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '10'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '10'
97
+ - !ruby/object:Gem::Dependency
98
+ name: rspec
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: '3'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: '3'
111
+ - !ruby/object:Gem::Dependency
112
+ name: ruby-prof
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - "~>"
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: cucumber
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - "~>"
130
+ - !ruby/object:Gem::Version
131
+ version: '1'
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - "~>"
137
+ - !ruby/object:Gem::Version
138
+ version: '1'
139
+ description: Provides Extract, Transform and Load functionality for loading data from
140
+ CSV files to a Greenplum database.
141
+ email:
142
+ - smart-insight-dev@emarsys.com
143
+ executables:
144
+ - cranium
145
+ extensions: []
146
+ extra_rdoc_files: []
147
+ files:
148
+ - ".gitignore"
149
+ - ".ruby-version"
150
+ - Gemfile
151
+ - LICENSE.txt
152
+ - README.md
153
+ - Rakefile
154
+ - Vagrantfile
155
+ - bin/cranium
156
+ - config/cucumber.yml
157
+ - cranium.gemspec
158
+ - db/setup.sql
159
+ - docker-compose.yml
160
+ - examples/config.rb
161
+ - examples/deduplication.rb
162
+ - examples/import_csv_with_field_lookup_inserting_new_dimension_keys.rb
163
+ - examples/incremental_extract.rb
164
+ - examples/lookup_with_multiple_fields.rb
165
+ - features/archive.feature
166
+ - features/extract/incremental_extract.feature
167
+ - features/extract/simple_extract.feature
168
+ - features/import/import_csv_to_database_as_delta.feature
169
+ - features/import/import_csv_to_database_with_delete_insert_merging.feature
170
+ - features/import/import_csv_to_database_with_truncate_insert.feature
171
+ - features/import/import_csv_to_database_with_update_merging.feature
172
+ - features/import/import_csv_with_always_inserting_new_dimension_keys.feature
173
+ - features/import/import_csv_with_field_lookup_inserting_new_dimension_keys.feature
174
+ - features/import/import_csv_with_field_lookup_transformation.feature
175
+ - features/import/import_csv_with_transformation.feature
176
+ - features/import/import_multiple_csv_files_without_transformations.feature
177
+ - features/import/import_with_load_id_from_sequence.feature
178
+ - features/import/import_with_lookup_from_multiple_fields.feature
179
+ - features/read.feature
180
+ - features/remove.feature
181
+ - features/restore_database_connection.feature
182
+ - features/step_definitions/database_table_steps.rb
183
+ - features/step_definitions/definition_steps.rb
184
+ - features/step_definitions/execution_steps.rb
185
+ - features/step_definitions/file_steps.rb
186
+ - features/support/class_extensions.rb
187
+ - features/support/env.rb
188
+ - features/support/randomize.rb
189
+ - features/support/stop_on_first_error.rb
190
+ - features/transform/deduplication.feature
191
+ - features/transform/empty_transformation.feature
192
+ - features/transform/join.feature
193
+ - features/transform/join_multiple_files_into_one_output_file.feature
194
+ - features/transform/output_rows.feature
195
+ - features/transform/projection.feature
196
+ - features/transform/raw_ruby_transformation.feature
197
+ - features/transform/split_field.feature
198
+ - lib/cranium.rb
199
+ - lib/cranium/application.rb
200
+ - lib/cranium/archiver.rb
201
+ - lib/cranium/attribute_dsl.rb
202
+ - lib/cranium/command_line_options.rb
203
+ - lib/cranium/configuration.rb
204
+ - lib/cranium/data_importer.rb
205
+ - lib/cranium/data_reader.rb
206
+ - lib/cranium/data_transformer.rb
207
+ - lib/cranium/database.rb
208
+ - lib/cranium/definition_registry.rb
209
+ - lib/cranium/dimension_manager.rb
210
+ - lib/cranium/dsl.rb
211
+ - lib/cranium/dsl/database_definition.rb
212
+ - lib/cranium/dsl/extract_definition.rb
213
+ - lib/cranium/dsl/import_definition.rb
214
+ - lib/cranium/dsl/source_definition.rb
215
+ - lib/cranium/extensions/file.rb
216
+ - lib/cranium/extensions/sequel_greenplum.rb
217
+ - lib/cranium/external_table.rb
218
+ - lib/cranium/extract.rb
219
+ - lib/cranium/extract/data_extractor.rb
220
+ - lib/cranium/extract/storage.rb
221
+ - lib/cranium/extract/strategy.rb
222
+ - lib/cranium/extract/strategy/base.rb
223
+ - lib/cranium/extract/strategy/incremental.rb
224
+ - lib/cranium/extract/strategy/simple.rb
225
+ - lib/cranium/import_strategy.rb
226
+ - lib/cranium/import_strategy/base.rb
227
+ - lib/cranium/import_strategy/delete_insert.rb
228
+ - lib/cranium/import_strategy/delta.rb
229
+ - lib/cranium/import_strategy/merge.rb
230
+ - lib/cranium/import_strategy/truncate_insert.rb
231
+ - lib/cranium/logging.rb
232
+ - lib/cranium/profiling.rb
233
+ - lib/cranium/progress_output.rb
234
+ - lib/cranium/sequel.rb
235
+ - lib/cranium/sequel/hash.rb
236
+ - lib/cranium/source_registry.rb
237
+ - lib/cranium/test_framework.rb
238
+ - lib/cranium/test_framework/cucumber_table.rb
239
+ - lib/cranium/test_framework/database_entity.rb
240
+ - lib/cranium/test_framework/database_sequence.rb
241
+ - lib/cranium/test_framework/database_table.rb
242
+ - lib/cranium/test_framework/upload_directory.rb
243
+ - lib/cranium/test_framework/world.rb
244
+ - lib/cranium/transformation.rb
245
+ - lib/cranium/transformation/duplication_index.rb
246
+ - lib/cranium/transformation/index.rb
247
+ - lib/cranium/transformation/join.rb
248
+ - lib/cranium/transformation/sequence.rb
249
+ - lib/cranium/transformation_record.rb
250
+ - rake/test.rake
251
+ - spec/cranium/application_spec.rb
252
+ - spec/cranium/archiver_spec.rb
253
+ - spec/cranium/command_line_options_spec.rb
254
+ - spec/cranium/configuration_spec.rb
255
+ - spec/cranium/data_importer_spec.rb
256
+ - spec/cranium/data_transformer_spec.rb
257
+ - spec/cranium/database_spec.rb
258
+ - spec/cranium/definition_registry_spec.rb
259
+ - spec/cranium/dimension_manager_spec.rb
260
+ - spec/cranium/dsl/database_definition_spec.rb
261
+ - spec/cranium/dsl/extract_definition_spec.rb
262
+ - spec/cranium/dsl/import_definition_spec.rb
263
+ - spec/cranium/dsl/source_definition_spec.rb
264
+ - spec/cranium/dsl_spec.rb
265
+ - spec/cranium/external_table_spec.rb
266
+ - spec/cranium/extract/storage_spec.rb
267
+ - spec/cranium/logging_spec.rb
268
+ - spec/cranium/sequel/hash_spec.rb
269
+ - spec/cranium/source_registry_spec.rb
270
+ - spec/cranium/test_framework/cucumber_table_spec.rb
271
+ - spec/cranium/transformation/duplication_index_spec.rb
272
+ - spec/cranium/transformation/index_spec.rb
273
+ - spec/cranium/transformation/join_spec.rb
274
+ - spec/cranium/transformation/sequence_spec.rb
275
+ - spec/cranium/transformation_record_spec.rb
276
+ - spec/cranium_spec.rb
277
+ - spec/spec_helper.rb
278
+ homepage: https://github.com/emartech/cranium
279
+ licenses:
280
+ - MIT
281
+ metadata: {}
282
+ post_install_message:
283
+ rdoc_options: []
284
+ require_paths:
285
+ - lib
286
+ required_ruby_version: !ruby/object:Gem::Requirement
287
+ requirements:
288
+ - - ">="
289
+ - !ruby/object:Gem::Version
290
+ version: '0'
291
+ required_rubygems_version: !ruby/object:Gem::Requirement
292
+ requirements:
293
+ - - ">="
294
+ - !ruby/object:Gem::Version
295
+ version: '0'
296
+ requirements: []
297
+ rubyforge_project:
298
+ rubygems_version: 2.5.1
299
+ signing_key:
300
+ specification_version: 4
301
+ summary: Pure Ruby ETL framework
302
+ test_files:
303
+ - features/archive.feature
304
+ - features/extract/incremental_extract.feature
305
+ - features/extract/simple_extract.feature
306
+ - features/import/import_csv_to_database_as_delta.feature
307
+ - features/import/import_csv_to_database_with_delete_insert_merging.feature
308
+ - features/import/import_csv_to_database_with_truncate_insert.feature
309
+ - features/import/import_csv_to_database_with_update_merging.feature
310
+ - features/import/import_csv_with_always_inserting_new_dimension_keys.feature
311
+ - features/import/import_csv_with_field_lookup_inserting_new_dimension_keys.feature
312
+ - features/import/import_csv_with_field_lookup_transformation.feature
313
+ - features/import/import_csv_with_transformation.feature
314
+ - features/import/import_multiple_csv_files_without_transformations.feature
315
+ - features/import/import_with_load_id_from_sequence.feature
316
+ - features/import/import_with_lookup_from_multiple_fields.feature
317
+ - features/read.feature
318
+ - features/remove.feature
319
+ - features/restore_database_connection.feature
320
+ - features/step_definitions/database_table_steps.rb
321
+ - features/step_definitions/definition_steps.rb
322
+ - features/step_definitions/execution_steps.rb
323
+ - features/step_definitions/file_steps.rb
324
+ - features/support/class_extensions.rb
325
+ - features/support/env.rb
326
+ - features/support/randomize.rb
327
+ - features/support/stop_on_first_error.rb
328
+ - features/transform/deduplication.feature
329
+ - features/transform/empty_transformation.feature
330
+ - features/transform/join.feature
331
+ - features/transform/join_multiple_files_into_one_output_file.feature
332
+ - features/transform/output_rows.feature
333
+ - features/transform/projection.feature
334
+ - features/transform/raw_ruby_transformation.feature
335
+ - features/transform/split_field.feature
336
+ - spec/cranium/application_spec.rb
337
+ - spec/cranium/archiver_spec.rb
338
+ - spec/cranium/command_line_options_spec.rb
339
+ - spec/cranium/configuration_spec.rb
340
+ - spec/cranium/data_importer_spec.rb
341
+ - spec/cranium/data_transformer_spec.rb
342
+ - spec/cranium/database_spec.rb
343
+ - spec/cranium/definition_registry_spec.rb
344
+ - spec/cranium/dimension_manager_spec.rb
345
+ - spec/cranium/dsl/database_definition_spec.rb
346
+ - spec/cranium/dsl/extract_definition_spec.rb
347
+ - spec/cranium/dsl/import_definition_spec.rb
348
+ - spec/cranium/dsl/source_definition_spec.rb
349
+ - spec/cranium/dsl_spec.rb
350
+ - spec/cranium/external_table_spec.rb
351
+ - spec/cranium/extract/storage_spec.rb
352
+ - spec/cranium/logging_spec.rb
353
+ - spec/cranium/sequel/hash_spec.rb
354
+ - spec/cranium/source_registry_spec.rb
355
+ - spec/cranium/test_framework/cucumber_table_spec.rb
356
+ - spec/cranium/transformation/duplication_index_spec.rb
357
+ - spec/cranium/transformation/index_spec.rb
358
+ - spec/cranium/transformation/join_spec.rb
359
+ - spec/cranium/transformation/sequence_spec.rb
360
+ - spec/cranium/transformation_record_spec.rb
361
+ - spec/cranium_spec.rb
362
+ - spec/spec_helper.rb