cranium 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (132) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +21 -0
  3. data/.ruby-version +1 -0
  4. data/Gemfile +4 -0
  5. data/LICENSE.txt +22 -0
  6. data/README.md +29 -0
  7. data/Rakefile +3 -0
  8. data/Vagrantfile +24 -0
  9. data/bin/cranium +9 -0
  10. data/config/cucumber.yml +9 -0
  11. data/cranium.gemspec +26 -0
  12. data/db/setup.sql +8 -0
  13. data/docker-compose.yml +8 -0
  14. data/examples/config.rb +14 -0
  15. data/examples/deduplication.rb +27 -0
  16. data/examples/import_csv_with_field_lookup_inserting_new_dimension_keys.rb +26 -0
  17. data/examples/incremental_extract.rb +17 -0
  18. data/examples/lookup_with_multiple_fields.rb +25 -0
  19. data/features/archive.feature +49 -0
  20. data/features/extract/incremental_extract.feature +56 -0
  21. data/features/extract/simple_extract.feature +85 -0
  22. data/features/import/import_csv_to_database_as_delta.feature +38 -0
  23. data/features/import/import_csv_to_database_with_delete_insert_merging.feature +51 -0
  24. data/features/import/import_csv_to_database_with_truncate_insert.feature +49 -0
  25. data/features/import/import_csv_to_database_with_update_merging.feature +46 -0
  26. data/features/import/import_csv_with_always_inserting_new_dimension_keys.feature +137 -0
  27. data/features/import/import_csv_with_field_lookup_inserting_new_dimension_keys.feature +62 -0
  28. data/features/import/import_csv_with_field_lookup_transformation.feature +125 -0
  29. data/features/import/import_csv_with_transformation.feature +55 -0
  30. data/features/import/import_multiple_csv_files_without_transformations.feature +44 -0
  31. data/features/import/import_with_load_id_from_sequence.feature +53 -0
  32. data/features/import/import_with_lookup_from_multiple_fields.feature +64 -0
  33. data/features/read.feature +56 -0
  34. data/features/remove.feature +44 -0
  35. data/features/restore_database_connection.feature +55 -0
  36. data/features/step_definitions/database_table_steps.rb +40 -0
  37. data/features/step_definitions/definition_steps.rb +3 -0
  38. data/features/step_definitions/execution_steps.rb +23 -0
  39. data/features/step_definitions/file_steps.rb +39 -0
  40. data/features/support/class_extensions.rb +24 -0
  41. data/features/support/env.rb +27 -0
  42. data/features/support/randomize.rb +22 -0
  43. data/features/support/stop_on_first_error.rb +5 -0
  44. data/features/transform/deduplication.feature +37 -0
  45. data/features/transform/empty_transformation.feature +72 -0
  46. data/features/transform/join.feature +180 -0
  47. data/features/transform/join_multiple_files_into_one_output_file.feature +46 -0
  48. data/features/transform/output_rows.feature +70 -0
  49. data/features/transform/projection.feature +34 -0
  50. data/features/transform/raw_ruby_transformation.feature +69 -0
  51. data/features/transform/split_field.feature +39 -0
  52. data/lib/cranium/application.rb +104 -0
  53. data/lib/cranium/archiver.rb +36 -0
  54. data/lib/cranium/attribute_dsl.rb +43 -0
  55. data/lib/cranium/command_line_options.rb +27 -0
  56. data/lib/cranium/configuration.rb +33 -0
  57. data/lib/cranium/data_importer.rb +35 -0
  58. data/lib/cranium/data_reader.rb +48 -0
  59. data/lib/cranium/data_transformer.rb +126 -0
  60. data/lib/cranium/database.rb +36 -0
  61. data/lib/cranium/definition_registry.rb +21 -0
  62. data/lib/cranium/dimension_manager.rb +65 -0
  63. data/lib/cranium/dsl/database_definition.rb +23 -0
  64. data/lib/cranium/dsl/extract_definition.rb +28 -0
  65. data/lib/cranium/dsl/import_definition.rb +50 -0
  66. data/lib/cranium/dsl/source_definition.rb +67 -0
  67. data/lib/cranium/dsl.rb +100 -0
  68. data/lib/cranium/extensions/file.rb +7 -0
  69. data/lib/cranium/extensions/sequel_greenplum.rb +30 -0
  70. data/lib/cranium/external_table.rb +75 -0
  71. data/lib/cranium/extract/data_extractor.rb +11 -0
  72. data/lib/cranium/extract/storage.rb +57 -0
  73. data/lib/cranium/extract/strategy/base.rb +27 -0
  74. data/lib/cranium/extract/strategy/incremental.rb +16 -0
  75. data/lib/cranium/extract/strategy/simple.rb +9 -0
  76. data/lib/cranium/extract/strategy.rb +7 -0
  77. data/lib/cranium/extract.rb +7 -0
  78. data/lib/cranium/import_strategy/base.rb +55 -0
  79. data/lib/cranium/import_strategy/delete_insert.rb +40 -0
  80. data/lib/cranium/import_strategy/delta.rb +8 -0
  81. data/lib/cranium/import_strategy/merge.rb +50 -0
  82. data/lib/cranium/import_strategy/truncate_insert.rb +19 -0
  83. data/lib/cranium/import_strategy.rb +9 -0
  84. data/lib/cranium/logging.rb +15 -0
  85. data/lib/cranium/profiling.rb +13 -0
  86. data/lib/cranium/progress_output.rb +37 -0
  87. data/lib/cranium/sequel/hash.rb +32 -0
  88. data/lib/cranium/sequel.rb +5 -0
  89. data/lib/cranium/source_registry.rb +21 -0
  90. data/lib/cranium/test_framework/cucumber_table.rb +140 -0
  91. data/lib/cranium/test_framework/database_entity.rb +29 -0
  92. data/lib/cranium/test_framework/database_sequence.rb +16 -0
  93. data/lib/cranium/test_framework/database_table.rb +33 -0
  94. data/lib/cranium/test_framework/upload_directory.rb +39 -0
  95. data/lib/cranium/test_framework/world.rb +66 -0
  96. data/lib/cranium/test_framework.rb +10 -0
  97. data/lib/cranium/transformation/duplication_index.rb +42 -0
  98. data/lib/cranium/transformation/index.rb +83 -0
  99. data/lib/cranium/transformation/join.rb +141 -0
  100. data/lib/cranium/transformation/sequence.rb +42 -0
  101. data/lib/cranium/transformation.rb +8 -0
  102. data/lib/cranium/transformation_record.rb +45 -0
  103. data/lib/cranium.rb +57 -0
  104. data/rake/test.rake +31 -0
  105. data/spec/cranium/application_spec.rb +166 -0
  106. data/spec/cranium/archiver_spec.rb +44 -0
  107. data/spec/cranium/command_line_options_spec.rb +32 -0
  108. data/spec/cranium/configuration_spec.rb +31 -0
  109. data/spec/cranium/data_importer_spec.rb +55 -0
  110. data/spec/cranium/data_transformer_spec.rb +16 -0
  111. data/spec/cranium/database_spec.rb +69 -0
  112. data/spec/cranium/definition_registry_spec.rb +45 -0
  113. data/spec/cranium/dimension_manager_spec.rb +63 -0
  114. data/spec/cranium/dsl/database_definition_spec.rb +23 -0
  115. data/spec/cranium/dsl/extract_definition_spec.rb +76 -0
  116. data/spec/cranium/dsl/import_definition_spec.rb +153 -0
  117. data/spec/cranium/dsl/source_definition_spec.rb +84 -0
  118. data/spec/cranium/dsl_spec.rb +119 -0
  119. data/spec/cranium/external_table_spec.rb +71 -0
  120. data/spec/cranium/extract/storage_spec.rb +125 -0
  121. data/spec/cranium/logging_spec.rb +37 -0
  122. data/spec/cranium/sequel/hash_spec.rb +56 -0
  123. data/spec/cranium/source_registry_spec.rb +31 -0
  124. data/spec/cranium/test_framework/cucumber_table_spec.rb +144 -0
  125. data/spec/cranium/transformation/duplication_index_spec.rb +75 -0
  126. data/spec/cranium/transformation/index_spec.rb +178 -0
  127. data/spec/cranium/transformation/join_spec.rb +43 -0
  128. data/spec/cranium/transformation/sequence_spec.rb +83 -0
  129. data/spec/cranium/transformation_record_spec.rb +78 -0
  130. data/spec/cranium_spec.rb +53 -0
  131. data/spec/spec_helper.rb +1 -0
  132. metadata +362 -0
metadata ADDED
@@ -0,0 +1,362 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: cranium
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.0
5
+ platform: ruby
6
+ authors:
7
+ - Emarsys Technologies
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-04-22 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: pg
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: progressbar
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: sequel
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '4'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '4'
55
+ - !ruby/object:Gem::Dependency
56
+ name: slop
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '3'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '3'
69
+ - !ruby/object:Gem::Dependency
70
+ name: bundler
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '1'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '1'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rake
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '10'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '10'
97
+ - !ruby/object:Gem::Dependency
98
+ name: rspec
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: '3'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: '3'
111
+ - !ruby/object:Gem::Dependency
112
+ name: ruby-prof
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - "~>"
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: cucumber
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - "~>"
130
+ - !ruby/object:Gem::Version
131
+ version: '1'
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - "~>"
137
+ - !ruby/object:Gem::Version
138
+ version: '1'
139
+ description: Provides Extract, Transform and Load functionality for loading data from
140
+ CSV files to a Greenplum database.
141
+ email:
142
+ - smart-insight-dev@emarsys.com
143
+ executables:
144
+ - cranium
145
+ extensions: []
146
+ extra_rdoc_files: []
147
+ files:
148
+ - ".gitignore"
149
+ - ".ruby-version"
150
+ - Gemfile
151
+ - LICENSE.txt
152
+ - README.md
153
+ - Rakefile
154
+ - Vagrantfile
155
+ - bin/cranium
156
+ - config/cucumber.yml
157
+ - cranium.gemspec
158
+ - db/setup.sql
159
+ - docker-compose.yml
160
+ - examples/config.rb
161
+ - examples/deduplication.rb
162
+ - examples/import_csv_with_field_lookup_inserting_new_dimension_keys.rb
163
+ - examples/incremental_extract.rb
164
+ - examples/lookup_with_multiple_fields.rb
165
+ - features/archive.feature
166
+ - features/extract/incremental_extract.feature
167
+ - features/extract/simple_extract.feature
168
+ - features/import/import_csv_to_database_as_delta.feature
169
+ - features/import/import_csv_to_database_with_delete_insert_merging.feature
170
+ - features/import/import_csv_to_database_with_truncate_insert.feature
171
+ - features/import/import_csv_to_database_with_update_merging.feature
172
+ - features/import/import_csv_with_always_inserting_new_dimension_keys.feature
173
+ - features/import/import_csv_with_field_lookup_inserting_new_dimension_keys.feature
174
+ - features/import/import_csv_with_field_lookup_transformation.feature
175
+ - features/import/import_csv_with_transformation.feature
176
+ - features/import/import_multiple_csv_files_without_transformations.feature
177
+ - features/import/import_with_load_id_from_sequence.feature
178
+ - features/import/import_with_lookup_from_multiple_fields.feature
179
+ - features/read.feature
180
+ - features/remove.feature
181
+ - features/restore_database_connection.feature
182
+ - features/step_definitions/database_table_steps.rb
183
+ - features/step_definitions/definition_steps.rb
184
+ - features/step_definitions/execution_steps.rb
185
+ - features/step_definitions/file_steps.rb
186
+ - features/support/class_extensions.rb
187
+ - features/support/env.rb
188
+ - features/support/randomize.rb
189
+ - features/support/stop_on_first_error.rb
190
+ - features/transform/deduplication.feature
191
+ - features/transform/empty_transformation.feature
192
+ - features/transform/join.feature
193
+ - features/transform/join_multiple_files_into_one_output_file.feature
194
+ - features/transform/output_rows.feature
195
+ - features/transform/projection.feature
196
+ - features/transform/raw_ruby_transformation.feature
197
+ - features/transform/split_field.feature
198
+ - lib/cranium.rb
199
+ - lib/cranium/application.rb
200
+ - lib/cranium/archiver.rb
201
+ - lib/cranium/attribute_dsl.rb
202
+ - lib/cranium/command_line_options.rb
203
+ - lib/cranium/configuration.rb
204
+ - lib/cranium/data_importer.rb
205
+ - lib/cranium/data_reader.rb
206
+ - lib/cranium/data_transformer.rb
207
+ - lib/cranium/database.rb
208
+ - lib/cranium/definition_registry.rb
209
+ - lib/cranium/dimension_manager.rb
210
+ - lib/cranium/dsl.rb
211
+ - lib/cranium/dsl/database_definition.rb
212
+ - lib/cranium/dsl/extract_definition.rb
213
+ - lib/cranium/dsl/import_definition.rb
214
+ - lib/cranium/dsl/source_definition.rb
215
+ - lib/cranium/extensions/file.rb
216
+ - lib/cranium/extensions/sequel_greenplum.rb
217
+ - lib/cranium/external_table.rb
218
+ - lib/cranium/extract.rb
219
+ - lib/cranium/extract/data_extractor.rb
220
+ - lib/cranium/extract/storage.rb
221
+ - lib/cranium/extract/strategy.rb
222
+ - lib/cranium/extract/strategy/base.rb
223
+ - lib/cranium/extract/strategy/incremental.rb
224
+ - lib/cranium/extract/strategy/simple.rb
225
+ - lib/cranium/import_strategy.rb
226
+ - lib/cranium/import_strategy/base.rb
227
+ - lib/cranium/import_strategy/delete_insert.rb
228
+ - lib/cranium/import_strategy/delta.rb
229
+ - lib/cranium/import_strategy/merge.rb
230
+ - lib/cranium/import_strategy/truncate_insert.rb
231
+ - lib/cranium/logging.rb
232
+ - lib/cranium/profiling.rb
233
+ - lib/cranium/progress_output.rb
234
+ - lib/cranium/sequel.rb
235
+ - lib/cranium/sequel/hash.rb
236
+ - lib/cranium/source_registry.rb
237
+ - lib/cranium/test_framework.rb
238
+ - lib/cranium/test_framework/cucumber_table.rb
239
+ - lib/cranium/test_framework/database_entity.rb
240
+ - lib/cranium/test_framework/database_sequence.rb
241
+ - lib/cranium/test_framework/database_table.rb
242
+ - lib/cranium/test_framework/upload_directory.rb
243
+ - lib/cranium/test_framework/world.rb
244
+ - lib/cranium/transformation.rb
245
+ - lib/cranium/transformation/duplication_index.rb
246
+ - lib/cranium/transformation/index.rb
247
+ - lib/cranium/transformation/join.rb
248
+ - lib/cranium/transformation/sequence.rb
249
+ - lib/cranium/transformation_record.rb
250
+ - rake/test.rake
251
+ - spec/cranium/application_spec.rb
252
+ - spec/cranium/archiver_spec.rb
253
+ - spec/cranium/command_line_options_spec.rb
254
+ - spec/cranium/configuration_spec.rb
255
+ - spec/cranium/data_importer_spec.rb
256
+ - spec/cranium/data_transformer_spec.rb
257
+ - spec/cranium/database_spec.rb
258
+ - spec/cranium/definition_registry_spec.rb
259
+ - spec/cranium/dimension_manager_spec.rb
260
+ - spec/cranium/dsl/database_definition_spec.rb
261
+ - spec/cranium/dsl/extract_definition_spec.rb
262
+ - spec/cranium/dsl/import_definition_spec.rb
263
+ - spec/cranium/dsl/source_definition_spec.rb
264
+ - spec/cranium/dsl_spec.rb
265
+ - spec/cranium/external_table_spec.rb
266
+ - spec/cranium/extract/storage_spec.rb
267
+ - spec/cranium/logging_spec.rb
268
+ - spec/cranium/sequel/hash_spec.rb
269
+ - spec/cranium/source_registry_spec.rb
270
+ - spec/cranium/test_framework/cucumber_table_spec.rb
271
+ - spec/cranium/transformation/duplication_index_spec.rb
272
+ - spec/cranium/transformation/index_spec.rb
273
+ - spec/cranium/transformation/join_spec.rb
274
+ - spec/cranium/transformation/sequence_spec.rb
275
+ - spec/cranium/transformation_record_spec.rb
276
+ - spec/cranium_spec.rb
277
+ - spec/spec_helper.rb
278
+ homepage: https://github.com/emartech/cranium
279
+ licenses:
280
+ - MIT
281
+ metadata: {}
282
+ post_install_message:
283
+ rdoc_options: []
284
+ require_paths:
285
+ - lib
286
+ required_ruby_version: !ruby/object:Gem::Requirement
287
+ requirements:
288
+ - - ">="
289
+ - !ruby/object:Gem::Version
290
+ version: '0'
291
+ required_rubygems_version: !ruby/object:Gem::Requirement
292
+ requirements:
293
+ - - ">="
294
+ - !ruby/object:Gem::Version
295
+ version: '0'
296
+ requirements: []
297
+ rubyforge_project:
298
+ rubygems_version: 2.5.1
299
+ signing_key:
300
+ specification_version: 4
301
+ summary: Pure Ruby ETL framework
302
+ test_files:
303
+ - features/archive.feature
304
+ - features/extract/incremental_extract.feature
305
+ - features/extract/simple_extract.feature
306
+ - features/import/import_csv_to_database_as_delta.feature
307
+ - features/import/import_csv_to_database_with_delete_insert_merging.feature
308
+ - features/import/import_csv_to_database_with_truncate_insert.feature
309
+ - features/import/import_csv_to_database_with_update_merging.feature
310
+ - features/import/import_csv_with_always_inserting_new_dimension_keys.feature
311
+ - features/import/import_csv_with_field_lookup_inserting_new_dimension_keys.feature
312
+ - features/import/import_csv_with_field_lookup_transformation.feature
313
+ - features/import/import_csv_with_transformation.feature
314
+ - features/import/import_multiple_csv_files_without_transformations.feature
315
+ - features/import/import_with_load_id_from_sequence.feature
316
+ - features/import/import_with_lookup_from_multiple_fields.feature
317
+ - features/read.feature
318
+ - features/remove.feature
319
+ - features/restore_database_connection.feature
320
+ - features/step_definitions/database_table_steps.rb
321
+ - features/step_definitions/definition_steps.rb
322
+ - features/step_definitions/execution_steps.rb
323
+ - features/step_definitions/file_steps.rb
324
+ - features/support/class_extensions.rb
325
+ - features/support/env.rb
326
+ - features/support/randomize.rb
327
+ - features/support/stop_on_first_error.rb
328
+ - features/transform/deduplication.feature
329
+ - features/transform/empty_transformation.feature
330
+ - features/transform/join.feature
331
+ - features/transform/join_multiple_files_into_one_output_file.feature
332
+ - features/transform/output_rows.feature
333
+ - features/transform/projection.feature
334
+ - features/transform/raw_ruby_transformation.feature
335
+ - features/transform/split_field.feature
336
+ - spec/cranium/application_spec.rb
337
+ - spec/cranium/archiver_spec.rb
338
+ - spec/cranium/command_line_options_spec.rb
339
+ - spec/cranium/configuration_spec.rb
340
+ - spec/cranium/data_importer_spec.rb
341
+ - spec/cranium/data_transformer_spec.rb
342
+ - spec/cranium/database_spec.rb
343
+ - spec/cranium/definition_registry_spec.rb
344
+ - spec/cranium/dimension_manager_spec.rb
345
+ - spec/cranium/dsl/database_definition_spec.rb
346
+ - spec/cranium/dsl/extract_definition_spec.rb
347
+ - spec/cranium/dsl/import_definition_spec.rb
348
+ - spec/cranium/dsl/source_definition_spec.rb
349
+ - spec/cranium/dsl_spec.rb
350
+ - spec/cranium/external_table_spec.rb
351
+ - spec/cranium/extract/storage_spec.rb
352
+ - spec/cranium/logging_spec.rb
353
+ - spec/cranium/sequel/hash_spec.rb
354
+ - spec/cranium/source_registry_spec.rb
355
+ - spec/cranium/test_framework/cucumber_table_spec.rb
356
+ - spec/cranium/transformation/duplication_index_spec.rb
357
+ - spec/cranium/transformation/index_spec.rb
358
+ - spec/cranium/transformation/join_spec.rb
359
+ - spec/cranium/transformation/sequence_spec.rb
360
+ - spec/cranium/transformation_record_spec.rb
361
+ - spec/cranium_spec.rb
362
+ - spec/spec_helper.rb