jayzes-activewarehouse-etl 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. data/CHANGELOG +198 -0
  2. data/LICENSE +7 -0
  3. data/README +85 -0
  4. data/Rakefile +153 -0
  5. data/TODO +28 -0
  6. data/bin/etl +28 -0
  7. data/bin/etl.cmd +8 -0
  8. data/examples/database.example.yml +16 -0
  9. data/lib/etl/batch/batch.rb +111 -0
  10. data/lib/etl/batch/directives.rb +55 -0
  11. data/lib/etl/batch.rb +2 -0
  12. data/lib/etl/builder/date_dimension_builder.rb +96 -0
  13. data/lib/etl/builder/time_dimension_builder.rb +31 -0
  14. data/lib/etl/builder.rb +2 -0
  15. data/lib/etl/commands/etl.rb +89 -0
  16. data/lib/etl/control/control.rb +405 -0
  17. data/lib/etl/control/destination/database_destination.rb +95 -0
  18. data/lib/etl/control/destination/file_destination.rb +124 -0
  19. data/lib/etl/control/destination.rb +420 -0
  20. data/lib/etl/control/source/database_source.rb +220 -0
  21. data/lib/etl/control/source/enumerable_source.rb +11 -0
  22. data/lib/etl/control/source/file_source.rb +90 -0
  23. data/lib/etl/control/source/model_source.rb +39 -0
  24. data/lib/etl/control/source.rb +109 -0
  25. data/lib/etl/control.rb +3 -0
  26. data/lib/etl/core_ext/time/calculations.rb +42 -0
  27. data/lib/etl/core_ext/time.rb +5 -0
  28. data/lib/etl/core_ext.rb +1 -0
  29. data/lib/etl/engine.rb +556 -0
  30. data/lib/etl/execution/base.rb +9 -0
  31. data/lib/etl/execution/batch.rb +8 -0
  32. data/lib/etl/execution/job.rb +8 -0
  33. data/lib/etl/execution/migration.rb +85 -0
  34. data/lib/etl/execution.rb +20 -0
  35. data/lib/etl/generator/generator.rb +20 -0
  36. data/lib/etl/generator/surrogate_key_generator.rb +39 -0
  37. data/lib/etl/generator.rb +2 -0
  38. data/lib/etl/http_tools.rb +139 -0
  39. data/lib/etl/parser/apache_combined_log_parser.rb +49 -0
  40. data/lib/etl/parser/delimited_parser.rb +74 -0
  41. data/lib/etl/parser/fixed_width_parser.rb +65 -0
  42. data/lib/etl/parser/parser.rb +41 -0
  43. data/lib/etl/parser/sax_parser.rb +218 -0
  44. data/lib/etl/parser/xml_parser.rb +65 -0
  45. data/lib/etl/parser.rb +11 -0
  46. data/lib/etl/processor/block_processor.rb +14 -0
  47. data/lib/etl/processor/bulk_import_processor.rb +81 -0
  48. data/lib/etl/processor/check_exist_processor.rb +80 -0
  49. data/lib/etl/processor/check_unique_processor.rb +35 -0
  50. data/lib/etl/processor/copy_field_processor.rb +26 -0
  51. data/lib/etl/processor/encode_processor.rb +55 -0
  52. data/lib/etl/processor/hierarchy_exploder_processor.rb +55 -0
  53. data/lib/etl/processor/print_row_processor.rb +12 -0
  54. data/lib/etl/processor/processor.rb +25 -0
  55. data/lib/etl/processor/rename_processor.rb +24 -0
  56. data/lib/etl/processor/require_non_blank_processor.rb +26 -0
  57. data/lib/etl/processor/row_processor.rb +17 -0
  58. data/lib/etl/processor/sequence_processor.rb +23 -0
  59. data/lib/etl/processor/surrogate_key_processor.rb +53 -0
  60. data/lib/etl/processor/truncate_processor.rb +35 -0
  61. data/lib/etl/processor.rb +11 -0
  62. data/lib/etl/row.rb +20 -0
  63. data/lib/etl/screen/row_count_screen.rb +20 -0
  64. data/lib/etl/screen.rb +14 -0
  65. data/lib/etl/transform/block_transform.rb +13 -0
  66. data/lib/etl/transform/date_to_string_transform.rb +20 -0
  67. data/lib/etl/transform/decode_transform.rb +51 -0
  68. data/lib/etl/transform/default_transform.rb +20 -0
  69. data/lib/etl/transform/foreign_key_lookup_transform.rb +151 -0
  70. data/lib/etl/transform/hierarchy_lookup_transform.rb +49 -0
  71. data/lib/etl/transform/ordinalize_transform.rb +12 -0
  72. data/lib/etl/transform/sha1_transform.rb +13 -0
  73. data/lib/etl/transform/string_to_date_transform.rb +16 -0
  74. data/lib/etl/transform/string_to_datetime_transform.rb +14 -0
  75. data/lib/etl/transform/string_to_time_transform.rb +11 -0
  76. data/lib/etl/transform/transform.rb +61 -0
  77. data/lib/etl/transform/trim_transform.rb +26 -0
  78. data/lib/etl/transform/type_transform.rb +35 -0
  79. data/lib/etl/transform.rb +2 -0
  80. data/lib/etl/util.rb +59 -0
  81. data/lib/etl/version.rb +9 -0
  82. data/lib/etl.rb +78 -0
  83. metadata +194 -0
data/lib/etl.rb ADDED
@@ -0,0 +1,78 @@
1
+ # This source file requires all of the necessary gems and source files for ActiveWarehouse ETL. If you
2
+ # load this source file all of the other required files and gems will also be brought into the
3
+ # runtime.
4
+
5
+ #--
6
+ # Copyright (c) 2006-2007 Anthony Eden
7
+ #
8
+ # Permission is hereby granted, free of charge, to any person obtaining
9
+ # a copy of this software and associated documentation files (the
10
+ # "Software"), to deal in the Software without restriction, including
11
+ # without limitation the rights to use, copy, modify, merge, publish,
12
+ # distribute, sublicense, and/or sell copies of the Software, and to
13
+ # permit persons to whom the Software is furnished to do so, subject to
14
+ # the following conditions:
15
+ #
16
+ # The above copyright notice and this permission notice shall be
17
+ # included in all copies or substantial portions of the Software.
18
+ #
19
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
22
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
23
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26
+ #++
27
+
28
+ require 'logger'
29
+ require 'yaml'
30
+ require 'erb'
31
+
32
+ require 'rubygems'
33
+
34
+ unless defined?(REXML::VERSION)
35
+ require 'rexml/rexml'
36
+ REXML::VERSION = REXML::Version
37
+ end
38
+
39
+ require 'active_support'
40
+ require 'active_record'
41
+ require 'adapter_extensions'
42
+ require 'faster_csv'
43
+
44
+ $:.unshift(File.dirname(__FILE__))
45
+
46
+ require 'etl/core_ext'
47
+ require 'etl/util'
48
+ require 'etl/http_tools'
49
+ require 'etl/builder'
50
+ require 'etl/version'
51
+ require 'etl/engine'
52
+ require 'etl/control'
53
+ require 'etl/batch'
54
+ require 'etl/row'
55
+ require 'etl/parser'
56
+ require 'etl/transform'
57
+ require 'etl/processor'
58
+ require 'etl/generator'
59
+ require 'etl/screen'
60
+
61
+ module ETL #:nodoc:
62
+ class ETLError < StandardError #:nodoc:
63
+ end
64
+ class ControlError < ETLError #:nodoc:
65
+ end
66
+ class DefinitionError < ControlError #:nodoc:
67
+ end
68
+ class ConfigurationError < ControlError #:nodoc:
69
+ end
70
+ class MismatchError < ETLError #:nodoc:
71
+ end
72
+ class ResolverError < ETLError #:nodoc:
73
+ end
74
+ class ScreenError < ETLError #:nodoc:
75
+ end
76
+ class FatalScreenError < ScreenError #:nodoc:
77
+ end
78
+ end
metadata ADDED
@@ -0,0 +1,194 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: jayzes-activewarehouse-etl
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.9.1
5
+ platform: ruby
6
+ authors:
7
+ - Anthony Eden
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2008-08-14 00:00:00 -07:00
13
+ default_executable: etl
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: rake
17
+ version_requirement:
18
+ version_requirements: !ruby/object:Gem::Requirement
19
+ requirements:
20
+ - - ">="
21
+ - !ruby/object:Gem::Version
22
+ version: 0.7.1
23
+ version:
24
+ - !ruby/object:Gem::Dependency
25
+ name: activesupport
26
+ version_requirement:
27
+ version_requirements: !ruby/object:Gem::Requirement
28
+ requirements:
29
+ - - ">="
30
+ - !ruby/object:Gem::Version
31
+ version: 1.3.1
32
+ version:
33
+ - !ruby/object:Gem::Dependency
34
+ name: activerecord
35
+ version_requirement:
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: 1.14.4
41
+ version:
42
+ - !ruby/object:Gem::Dependency
43
+ name: fastercsv
44
+ version_requirement:
45
+ version_requirements: !ruby/object:Gem::Requirement
46
+ requirements:
47
+ - - ">="
48
+ - !ruby/object:Gem::Version
49
+ version: 1.2.0
50
+ version:
51
+ - !ruby/object:Gem::Dependency
52
+ name: adapter_extensions
53
+ version_requirement:
54
+ version_requirements: !ruby/object:Gem::Requirement
55
+ requirements:
56
+ - - ">="
57
+ - !ruby/object:Gem::Version
58
+ version: 0.1.0
59
+ version:
60
+ description: ActiveWarehouse ETL is a pure Ruby Extract-Transform-Load application for loading data into a database.
61
+ email: anthonyeden@gmail.com
62
+ executables:
63
+ - etl
64
+ extensions: []
65
+
66
+ extra_rdoc_files: []
67
+
68
+ files:
69
+ - CHANGELOG
70
+ - LICENSE
71
+ - README
72
+ - TODO
73
+ - Rakefile
74
+ - bin/etl
75
+ - bin/etl.cmd
76
+ - lib/etl
77
+ - lib/etl.rb
78
+ - lib/etl/batch
79
+ - lib/etl/batch.rb
80
+ - lib/etl/builder
81
+ - lib/etl/builder.rb
82
+ - lib/etl/commands
83
+ - lib/etl/control
84
+ - lib/etl/control.rb
85
+ - lib/etl/core_ext
86
+ - lib/etl/core_ext.rb
87
+ - lib/etl/engine.rb
88
+ - lib/etl/execution
89
+ - lib/etl/execution.rb
90
+ - lib/etl/generator
91
+ - lib/etl/generator.rb
92
+ - lib/etl/http_tools.rb
93
+ - lib/etl/parser
94
+ - lib/etl/parser.rb
95
+ - lib/etl/processor
96
+ - lib/etl/processor.rb
97
+ - lib/etl/row.rb
98
+ - lib/etl/screen
99
+ - lib/etl/screen.rb
100
+ - lib/etl/transform
101
+ - lib/etl/transform.rb
102
+ - lib/etl/util.rb
103
+ - lib/etl/version.rb
104
+ - lib/etl/batch/batch.rb
105
+ - lib/etl/batch/directives.rb
106
+ - lib/etl/builder/date_dimension_builder.rb
107
+ - lib/etl/builder/time_dimension_builder.rb
108
+ - lib/etl/commands/etl.rb
109
+ - lib/etl/control/control.rb
110
+ - lib/etl/control/destination
111
+ - lib/etl/control/destination.rb
112
+ - lib/etl/control/source
113
+ - lib/etl/control/source.rb
114
+ - lib/etl/control/destination/database_destination.rb
115
+ - lib/etl/control/destination/file_destination.rb
116
+ - lib/etl/control/source/database_source.rb
117
+ - lib/etl/control/source/enumerable_source.rb
118
+ - lib/etl/control/source/file_source.rb
119
+ - lib/etl/control/source/model_source.rb
120
+ - lib/etl/core_ext/time
121
+ - lib/etl/core_ext/time.rb
122
+ - lib/etl/core_ext/time/calculations.rb
123
+ - lib/etl/execution/base.rb
124
+ - lib/etl/execution/batch.rb
125
+ - lib/etl/execution/job.rb
126
+ - lib/etl/execution/migration.rb
127
+ - lib/etl/generator/generator.rb
128
+ - lib/etl/generator/surrogate_key_generator.rb
129
+ - lib/etl/parser/apache_combined_log_parser.rb
130
+ - lib/etl/parser/delimited_parser.rb
131
+ - lib/etl/parser/fixed_width_parser.rb
132
+ - lib/etl/parser/parser.rb
133
+ - lib/etl/parser/sax_parser.rb
134
+ - lib/etl/parser/xml_parser.rb
135
+ - lib/etl/processor/block_processor.rb
136
+ - lib/etl/processor/bulk_import_processor.rb
137
+ - lib/etl/processor/check_exist_processor.rb
138
+ - lib/etl/processor/check_unique_processor.rb
139
+ - lib/etl/processor/copy_field_processor.rb
140
+ - lib/etl/processor/encode_processor.rb
141
+ - lib/etl/processor/hierarchy_exploder_processor.rb
142
+ - lib/etl/processor/print_row_processor.rb
143
+ - lib/etl/processor/processor.rb
144
+ - lib/etl/processor/rename_processor.rb
145
+ - lib/etl/processor/require_non_blank_processor.rb
146
+ - lib/etl/processor/row_processor.rb
147
+ - lib/etl/processor/sequence_processor.rb
148
+ - lib/etl/processor/surrogate_key_processor.rb
149
+ - lib/etl/processor/truncate_processor.rb
150
+ - lib/etl/screen/row_count_screen.rb
151
+ - lib/etl/transform/block_transform.rb
152
+ - lib/etl/transform/date_to_string_transform.rb
153
+ - lib/etl/transform/decode_transform.rb
154
+ - lib/etl/transform/default_transform.rb
155
+ - lib/etl/transform/foreign_key_lookup_transform.rb
156
+ - lib/etl/transform/hierarchy_lookup_transform.rb
157
+ - lib/etl/transform/ordinalize_transform.rb
158
+ - lib/etl/transform/sha1_transform.rb
159
+ - lib/etl/transform/string_to_date_transform.rb
160
+ - lib/etl/transform/string_to_datetime_transform.rb
161
+ - lib/etl/transform/string_to_time_transform.rb
162
+ - lib/etl/transform/transform.rb
163
+ - lib/etl/transform/trim_transform.rb
164
+ - lib/etl/transform/type_transform.rb
165
+ - examples/database.example.yml
166
+ has_rdoc: false
167
+ homepage: http://activewarehouse.rubyforge.org/etl
168
+ post_install_message:
169
+ rdoc_options:
170
+ - --exclude
171
+ - .
172
+ require_paths:
173
+ - lib
174
+ required_ruby_version: !ruby/object:Gem::Requirement
175
+ requirements:
176
+ - - ">="
177
+ - !ruby/object:Gem::Version
178
+ version: "0"
179
+ version:
180
+ required_rubygems_version: !ruby/object:Gem::Requirement
181
+ requirements:
182
+ - - ">="
183
+ - !ruby/object:Gem::Version
184
+ version: "0"
185
+ version:
186
+ requirements: []
187
+
188
+ rubyforge_project: activewarehouse
189
+ rubygems_version: 1.2.0
190
+ signing_key:
191
+ specification_version: 2
192
+ summary: Pure Ruby ETL package.
193
+ test_files: []
194
+