darrell-activewarehouse-etl 0.9.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. data/CHANGELOG +198 -0
  2. data/LICENSE +7 -0
  3. data/README +99 -0
  4. data/Rakefile +175 -0
  5. data/TODO +28 -0
  6. data/bin/etl +28 -0
  7. data/bin/etl.cmd +8 -0
  8. data/examples/database.example.yml +16 -0
  9. data/lib/etl/batch/batch.rb +111 -0
  10. data/lib/etl/batch/directives.rb +55 -0
  11. data/lib/etl/batch.rb +2 -0
  12. data/lib/etl/builder/date_dimension_builder.rb +96 -0
  13. data/lib/etl/builder/time_dimension_builder.rb +31 -0
  14. data/lib/etl/builder.rb +2 -0
  15. data/lib/etl/commands/etl.rb +89 -0
  16. data/lib/etl/control/control.rb +405 -0
  17. data/lib/etl/control/destination/database_destination.rb +97 -0
  18. data/lib/etl/control/destination/file_destination.rb +126 -0
  19. data/lib/etl/control/destination.rb +448 -0
  20. data/lib/etl/control/source/database_source.rb +220 -0
  21. data/lib/etl/control/source/enumerable_source.rb +11 -0
  22. data/lib/etl/control/source/file_source.rb +90 -0
  23. data/lib/etl/control/source/model_source.rb +39 -0
  24. data/lib/etl/control/source.rb +109 -0
  25. data/lib/etl/control.rb +3 -0
  26. data/lib/etl/core_ext/time/calculations.rb +42 -0
  27. data/lib/etl/core_ext/time.rb +5 -0
  28. data/lib/etl/core_ext.rb +1 -0
  29. data/lib/etl/engine.rb +556 -0
  30. data/lib/etl/execution/base.rb +9 -0
  31. data/lib/etl/execution/batch.rb +8 -0
  32. data/lib/etl/execution/job.rb +8 -0
  33. data/lib/etl/execution/migration.rb +85 -0
  34. data/lib/etl/execution.rb +19 -0
  35. data/lib/etl/generator/generator.rb +20 -0
  36. data/lib/etl/generator/surrogate_key_generator.rb +39 -0
  37. data/lib/etl/generator.rb +2 -0
  38. data/lib/etl/http_tools.rb +139 -0
  39. data/lib/etl/parser/apache_combined_log_parser.rb +49 -0
  40. data/lib/etl/parser/delimited_parser.rb +74 -0
  41. data/lib/etl/parser/fixed_width_parser.rb +65 -0
  42. data/lib/etl/parser/parser.rb +41 -0
  43. data/lib/etl/parser/sax_parser.rb +218 -0
  44. data/lib/etl/parser/xml_parser.rb +65 -0
  45. data/lib/etl/parser.rb +11 -0
  46. data/lib/etl/processor/block_processor.rb +14 -0
  47. data/lib/etl/processor/bulk_import_processor.rb +83 -0
  48. data/lib/etl/processor/check_exist_processor.rb +80 -0
  49. data/lib/etl/processor/check_unique_processor.rb +35 -0
  50. data/lib/etl/processor/copy_field_processor.rb +26 -0
  51. data/lib/etl/processor/encode_processor.rb +55 -0
  52. data/lib/etl/processor/hierarchy_exploder_processor.rb +55 -0
  53. data/lib/etl/processor/print_row_processor.rb +12 -0
  54. data/lib/etl/processor/processor.rb +25 -0
  55. data/lib/etl/processor/rename_processor.rb +24 -0
  56. data/lib/etl/processor/require_non_blank_processor.rb +26 -0
  57. data/lib/etl/processor/row_processor.rb +17 -0
  58. data/lib/etl/processor/sequence_processor.rb +23 -0
  59. data/lib/etl/processor/surrogate_key_processor.rb +53 -0
  60. data/lib/etl/processor/truncate_processor.rb +35 -0
  61. data/lib/etl/processor.rb +11 -0
  62. data/lib/etl/row.rb +20 -0
  63. data/lib/etl/screen/row_count_screen.rb +20 -0
  64. data/lib/etl/screen.rb +14 -0
  65. data/lib/etl/transform/block_transform.rb +13 -0
  66. data/lib/etl/transform/date_to_string_transform.rb +20 -0
  67. data/lib/etl/transform/decode_transform.rb +51 -0
  68. data/lib/etl/transform/default_transform.rb +20 -0
  69. data/lib/etl/transform/foreign_key_lookup_transform.rb +169 -0
  70. data/lib/etl/transform/hierarchy_lookup_transform.rb +49 -0
  71. data/lib/etl/transform/ordinalize_transform.rb +12 -0
  72. data/lib/etl/transform/sha1_transform.rb +13 -0
  73. data/lib/etl/transform/string_to_date_transform.rb +16 -0
  74. data/lib/etl/transform/string_to_datetime_transform.rb +14 -0
  75. data/lib/etl/transform/string_to_time_transform.rb +11 -0
  76. data/lib/etl/transform/transform.rb +61 -0
  77. data/lib/etl/transform/trim_transform.rb +26 -0
  78. data/lib/etl/transform/type_transform.rb +35 -0
  79. data/lib/etl/transform.rb +2 -0
  80. data/lib/etl/util.rb +59 -0
  81. data/lib/etl/version.rb +9 -0
  82. data/lib/etl.rb +83 -0
  83. metadata +245 -0
data/lib/etl.rb ADDED
@@ -0,0 +1,83 @@
1
+ # This source file requires all of the necessary gems and source files for ActiveWarehouse ETL. If you
2
+ # load this source file all of the other required files and gems will also be brought into the
3
+ # runtime.
4
+
5
+ #--
6
+ # Copyright (c) 2006-2007 Anthony Eden
7
+ #
8
+ # Permission is hereby granted, free of charge, to any person obtaining
9
+ # a copy of this software and associated documentation files (the
10
+ # "Software"), to deal in the Software without restriction, including
11
+ # without limitation the rights to use, copy, modify, merge, publish,
12
+ # distribute, sublicense, and/or sell copies of the Software, and to
13
+ # permit persons to whom the Software is furnished to do so, subject to
14
+ # the following conditions:
15
+ #
16
+ # The above copyright notice and this permission notice shall be
17
+ # included in all copies or substantial portions of the Software.
18
+ #
19
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
22
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
23
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26
+ #++
27
+
28
+ require 'logger'
29
+ require 'yaml'
30
+ require 'erb'
31
+
32
+ require 'rubygems'
33
+
34
+ unless defined?(REXML::VERSION)
35
+ require 'rexml/rexml'
36
+ REXML::VERSION = REXML::Version
37
+ end
38
+
39
+ require 'active_support'
40
+ require 'active_record'
41
+ require 'adapter_extensions'
42
+
43
+ if RUBY_VERSION < '1.9'
44
+ require 'faster_csv'
45
+ else
46
+ require 'csv'
47
+ end
48
+
49
+ $:.unshift(File.dirname(__FILE__))
50
+
51
+ require 'etl/core_ext'
52
+ require 'etl/util'
53
+ require 'etl/http_tools'
54
+ require 'etl/builder'
55
+ require 'etl/version'
56
+ require 'etl/engine'
57
+ require 'etl/control'
58
+ require 'etl/batch'
59
+ require 'etl/row'
60
+ require 'etl/parser'
61
+ require 'etl/transform'
62
+ require 'etl/processor'
63
+ require 'etl/generator'
64
+ require 'etl/screen'
65
+
66
+ module ETL #:nodoc:
67
+ class ETLError < StandardError #:nodoc:
68
+ end
69
+ class ControlError < ETLError #:nodoc:
70
+ end
71
+ class DefinitionError < ControlError #:nodoc:
72
+ end
73
+ class ConfigurationError < ControlError #:nodoc:
74
+ end
75
+ class MismatchError < ETLError #:nodoc:
76
+ end
77
+ class ResolverError < ETLError #:nodoc:
78
+ end
79
+ class ScreenError < ETLError #:nodoc:
80
+ end
81
+ class FatalScreenError < ScreenError #:nodoc:
82
+ end
83
+ end
metadata ADDED
@@ -0,0 +1,245 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: darrell-activewarehouse-etl
3
+ version: !ruby/object:Gem::Version
4
+ hash: 11
5
+ prerelease: false
6
+ segments:
7
+ - 0
8
+ - 9
9
+ - 1
10
+ - 4
11
+ version: 0.9.1.4
12
+ platform: ruby
13
+ authors:
14
+ - Anthony Eden
15
+ autorequire:
16
+ bindir: bin
17
+ cert_chain: []
18
+
19
+ date: 2008-08-14 00:00:00 -07:00
20
+ default_executable: etl
21
+ dependencies:
22
+ - !ruby/object:Gem::Dependency
23
+ name: rake
24
+ prerelease: false
25
+ requirement: &id001 !ruby/object:Gem::Requirement
26
+ none: false
27
+ requirements:
28
+ - - ">="
29
+ - !ruby/object:Gem::Version
30
+ hash: 1
31
+ segments:
32
+ - 0
33
+ - 7
34
+ - 1
35
+ version: 0.7.1
36
+ type: :runtime
37
+ version_requirements: *id001
38
+ - !ruby/object:Gem::Dependency
39
+ name: activesupport
40
+ prerelease: false
41
+ requirement: &id002 !ruby/object:Gem::Requirement
42
+ none: false
43
+ requirements:
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ hash: 25
47
+ segments:
48
+ - 1
49
+ - 3
50
+ - 1
51
+ version: 1.3.1
52
+ type: :runtime
53
+ version_requirements: *id002
54
+ - !ruby/object:Gem::Dependency
55
+ name: activerecord
56
+ prerelease: false
57
+ requirement: &id003 !ruby/object:Gem::Requirement
58
+ none: false
59
+ requirements:
60
+ - - ">="
61
+ - !ruby/object:Gem::Version
62
+ hash: 39
63
+ segments:
64
+ - 1
65
+ - 14
66
+ - 4
67
+ version: 1.14.4
68
+ type: :runtime
69
+ version_requirements: *id003
70
+ - !ruby/object:Gem::Dependency
71
+ name: fastercsv
72
+ prerelease: false
73
+ requirement: &id004 !ruby/object:Gem::Requirement
74
+ none: false
75
+ requirements:
76
+ - - ">="
77
+ - !ruby/object:Gem::Version
78
+ hash: 31
79
+ segments:
80
+ - 1
81
+ - 2
82
+ - 0
83
+ version: 1.2.0
84
+ type: :runtime
85
+ version_requirements: *id004
86
+ - !ruby/object:Gem::Dependency
87
+ name: adapter_extensions
88
+ prerelease: false
89
+ requirement: &id005 !ruby/object:Gem::Requirement
90
+ none: false
91
+ requirements:
92
+ - - ">="
93
+ - !ruby/object:Gem::Version
94
+ hash: 27
95
+ segments:
96
+ - 0
97
+ - 1
98
+ - 0
99
+ version: 0.1.0
100
+ type: :runtime
101
+ version_requirements: *id005
102
+ - !ruby/object:Gem::Dependency
103
+ name: httpclient
104
+ prerelease: false
105
+ requirement: &id006 !ruby/object:Gem::Requirement
106
+ none: false
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ hash: 11
111
+ segments:
112
+ - 2
113
+ - 1
114
+ - 0
115
+ version: 2.1.0
116
+ type: :runtime
117
+ version_requirements: *id006
118
+ description: ActiveWarehouse ETL is a pure Ruby Extract-Transform-Load application for loading data into a database.
119
+ email: anthonyeden@gmail.com
120
+ executables:
121
+ - etl
122
+ extensions: []
123
+
124
+ extra_rdoc_files: []
125
+
126
+ files:
127
+ - CHANGELOG
128
+ - LICENSE
129
+ - README
130
+ - TODO
131
+ - Rakefile
132
+ - bin/etl
133
+ - bin/etl.cmd
134
+ - lib/etl.rb
135
+ - lib/etl/batch.rb
136
+ - lib/etl/builder.rb
137
+ - lib/etl/control.rb
138
+ - lib/etl/core_ext.rb
139
+ - lib/etl/engine.rb
140
+ - lib/etl/execution.rb
141
+ - lib/etl/generator.rb
142
+ - lib/etl/http_tools.rb
143
+ - lib/etl/parser.rb
144
+ - lib/etl/processor.rb
145
+ - lib/etl/row.rb
146
+ - lib/etl/screen.rb
147
+ - lib/etl/transform.rb
148
+ - lib/etl/util.rb
149
+ - lib/etl/version.rb
150
+ - lib/etl/batch/batch.rb
151
+ - lib/etl/batch/directives.rb
152
+ - lib/etl/builder/date_dimension_builder.rb
153
+ - lib/etl/builder/time_dimension_builder.rb
154
+ - lib/etl/commands/etl.rb
155
+ - lib/etl/control/control.rb
156
+ - lib/etl/control/destination.rb
157
+ - lib/etl/control/source.rb
158
+ - lib/etl/control/destination/database_destination.rb
159
+ - lib/etl/control/destination/file_destination.rb
160
+ - lib/etl/control/source/database_source.rb
161
+ - lib/etl/control/source/enumerable_source.rb
162
+ - lib/etl/control/source/file_source.rb
163
+ - lib/etl/control/source/model_source.rb
164
+ - lib/etl/core_ext/time.rb
165
+ - lib/etl/core_ext/time/calculations.rb
166
+ - lib/etl/execution/base.rb
167
+ - lib/etl/execution/batch.rb
168
+ - lib/etl/execution/job.rb
169
+ - lib/etl/execution/migration.rb
170
+ - lib/etl/generator/generator.rb
171
+ - lib/etl/generator/surrogate_key_generator.rb
172
+ - lib/etl/parser/apache_combined_log_parser.rb
173
+ - lib/etl/parser/delimited_parser.rb
174
+ - lib/etl/parser/fixed_width_parser.rb
175
+ - lib/etl/parser/parser.rb
176
+ - lib/etl/parser/sax_parser.rb
177
+ - lib/etl/parser/xml_parser.rb
178
+ - lib/etl/processor/block_processor.rb
179
+ - lib/etl/processor/bulk_import_processor.rb
180
+ - lib/etl/processor/check_exist_processor.rb
181
+ - lib/etl/processor/check_unique_processor.rb
182
+ - lib/etl/processor/copy_field_processor.rb
183
+ - lib/etl/processor/encode_processor.rb
184
+ - lib/etl/processor/hierarchy_exploder_processor.rb
185
+ - lib/etl/processor/print_row_processor.rb
186
+ - lib/etl/processor/processor.rb
187
+ - lib/etl/processor/rename_processor.rb
188
+ - lib/etl/processor/require_non_blank_processor.rb
189
+ - lib/etl/processor/row_processor.rb
190
+ - lib/etl/processor/sequence_processor.rb
191
+ - lib/etl/processor/surrogate_key_processor.rb
192
+ - lib/etl/processor/truncate_processor.rb
193
+ - lib/etl/screen/row_count_screen.rb
194
+ - lib/etl/transform/block_transform.rb
195
+ - lib/etl/transform/date_to_string_transform.rb
196
+ - lib/etl/transform/decode_transform.rb
197
+ - lib/etl/transform/default_transform.rb
198
+ - lib/etl/transform/foreign_key_lookup_transform.rb
199
+ - lib/etl/transform/hierarchy_lookup_transform.rb
200
+ - lib/etl/transform/ordinalize_transform.rb
201
+ - lib/etl/transform/sha1_transform.rb
202
+ - lib/etl/transform/string_to_date_transform.rb
203
+ - lib/etl/transform/string_to_datetime_transform.rb
204
+ - lib/etl/transform/string_to_time_transform.rb
205
+ - lib/etl/transform/transform.rb
206
+ - lib/etl/transform/trim_transform.rb
207
+ - lib/etl/transform/type_transform.rb
208
+ - examples/database.example.yml
209
+ has_rdoc: true
210
+ homepage: http://activewarehouse.rubyforge.org/etl
211
+ licenses: []
212
+
213
+ post_install_message:
214
+ rdoc_options:
215
+ - --exclude
216
+ - .
217
+ require_paths:
218
+ - lib
219
+ required_ruby_version: !ruby/object:Gem::Requirement
220
+ none: false
221
+ requirements:
222
+ - - ">="
223
+ - !ruby/object:Gem::Version
224
+ hash: 3
225
+ segments:
226
+ - 0
227
+ version: "0"
228
+ required_rubygems_version: !ruby/object:Gem::Requirement
229
+ none: false
230
+ requirements:
231
+ - - ">="
232
+ - !ruby/object:Gem::Version
233
+ hash: 3
234
+ segments:
235
+ - 0
236
+ version: "0"
237
+ requirements: []
238
+
239
+ rubyforge_project: activewarehouse
240
+ rubygems_version: 1.3.7
241
+ signing_key:
242
+ specification_version: 2
243
+ summary: Pure Ruby ETL package.
244
+ test_files: []
245
+