activewarehouse-etl 0.9.1 → 0.9.5.rc1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (135) hide show
  1. data/.gitignore +7 -0
  2. data/0.9-UPGRADE +6 -0
  3. data/CHANGELOG +182 -150
  4. data/Gemfile +4 -0
  5. data/HOW_TO_RELEASE +9 -0
  6. data/README +18 -2
  7. data/Rakefile +35 -91
  8. data/active_support_logger.patch +78 -0
  9. data/activewarehouse-etl.gemspec +30 -0
  10. data/lib/etl.rb +10 -2
  11. data/lib/etl/batch/directives.rb +11 -1
  12. data/lib/etl/control/control.rb +2 -2
  13. data/lib/etl/control/destination.rb +27 -7
  14. data/lib/etl/control/destination/database_destination.rb +8 -6
  15. data/lib/etl/control/destination/excel_destination.rb +91 -0
  16. data/lib/etl/control/destination/file_destination.rb +6 -4
  17. data/lib/etl/control/destination/insert_update_database_destination.rb +133 -0
  18. data/lib/etl/control/destination/update_database_destination.rb +109 -0
  19. data/lib/etl/control/source.rb +3 -2
  20. data/lib/etl/control/source/database_source.rb +14 -10
  21. data/lib/etl/control/source/file_source.rb +2 -2
  22. data/lib/etl/engine.rb +17 -15
  23. data/lib/etl/execution.rb +0 -1
  24. data/lib/etl/execution/batch.rb +3 -1
  25. data/lib/etl/execution/migration.rb +5 -0
  26. data/lib/etl/parser/delimited_parser.rb +20 -1
  27. data/lib/etl/parser/excel_parser.rb +112 -0
  28. data/lib/etl/processor/bulk_import_processor.rb +4 -2
  29. data/lib/etl/processor/database_join_processor.rb +68 -0
  30. data/lib/etl/processor/escape_csv_processor.rb +77 -0
  31. data/lib/etl/processor/filter_row_processor.rb +51 -0
  32. data/lib/etl/processor/ftp_downloader_processor.rb +68 -0
  33. data/lib/etl/processor/ftp_uploader_processor.rb +65 -0
  34. data/lib/etl/processor/imapattachment_downloader_processor.rb +91 -0
  35. data/lib/etl/processor/pop3attachment_downloader_processor.rb +90 -0
  36. data/lib/etl/processor/sftp_downloader_processor.rb +63 -0
  37. data/lib/etl/processor/sftp_uploader_processor.rb +63 -0
  38. data/lib/etl/processor/zip_file_processor.rb +27 -0
  39. data/lib/etl/transform/calculation_transform.rb +71 -0
  40. data/lib/etl/transform/foreign_key_lookup_transform.rb +25 -7
  41. data/lib/etl/transform/ordinalize_transform.rb +3 -1
  42. data/lib/etl/transform/split_fields_transform.rb +27 -0
  43. data/lib/etl/version.rb +1 -7
  44. data/test-matrix.yml +10 -0
  45. data/test/.gitignore +1 -0
  46. data/test/.ignore +2 -0
  47. data/test/all.ebf +6 -0
  48. data/test/apache_combined_log.ctl +11 -0
  49. data/test/batch_test.rb +41 -0
  50. data/test/batch_with_error.ebf +6 -0
  51. data/test/batched1.ctl +0 -0
  52. data/test/batched2.ctl +0 -0
  53. data/test/block_processor.ctl +6 -0
  54. data/test/block_processor_error.ctl +1 -0
  55. data/test/block_processor_pre_post_process.ctl +4 -0
  56. data/test/block_processor_remove_rows.ctl +5 -0
  57. data/test/block_processor_test.rb +38 -0
  58. data/test/config/Gemfile.rails-2.3.x +3 -0
  59. data/test/config/Gemfile.rails-2.3.x.lock +38 -0
  60. data/test/config/Gemfile.rails-3.0.x +3 -0
  61. data/test/config/Gemfile.rails-3.0.x.lock +49 -0
  62. data/test/config/common.rb +21 -0
  63. data/test/connection/mysql/connection.rb +9 -0
  64. data/test/connection/mysql/schema.sql +36 -0
  65. data/test/connection/postgresql/connection.rb +13 -0
  66. data/test/connection/postgresql/schema.sql +39 -0
  67. data/test/control_test.rb +43 -0
  68. data/test/data/apache_combined_log.txt +3 -0
  69. data/test/data/bulk_import.txt +3 -0
  70. data/test/data/bulk_import_with_empties.txt +3 -0
  71. data/test/data/decode.txt +3 -0
  72. data/test/data/delimited.txt +3 -0
  73. data/test/data/encode_source_latin1.txt +2 -0
  74. data/test/data/excel.xls +0 -0
  75. data/test/data/excel2.xls +0 -0
  76. data/test/data/fixed_width.txt +3 -0
  77. data/test/data/multiple_delimited_1.txt +3 -0
  78. data/test/data/multiple_delimited_2.txt +3 -0
  79. data/test/data/people.txt +3 -0
  80. data/test/data/sax.xml +14 -0
  81. data/test/data/xml.xml +16 -0
  82. data/test/date_dimension_builder_test.rb +96 -0
  83. data/test/delimited.ctl +30 -0
  84. data/test/delimited_absolute.ctl +33 -0
  85. data/test/delimited_destination_db.ctl +25 -0
  86. data/test/delimited_excel.ctl +31 -0
  87. data/test/delimited_insert_update.ctl +34 -0
  88. data/test/delimited_update.ctl +34 -0
  89. data/test/delimited_with_bulk_load.ctl +34 -0
  90. data/test/destination_test.rb +275 -0
  91. data/test/directive_test.rb +23 -0
  92. data/test/encode_processor_test.rb +32 -0
  93. data/test/engine_test.rb +32 -0
  94. data/test/errors.ctl +24 -0
  95. data/test/etl_test.rb +42 -0
  96. data/test/excel.ctl +24 -0
  97. data/test/excel2.ctl +25 -0
  98. data/test/fixed_width.ctl +35 -0
  99. data/test/generator_test.rb +14 -0
  100. data/test/inline_parser.ctl +17 -0
  101. data/test/mocks/mock_destination.rb +26 -0
  102. data/test/mocks/mock_source.rb +25 -0
  103. data/test/model_source.ctl +14 -0
  104. data/test/multiple_delimited.ctl +22 -0
  105. data/test/multiple_source_delimited.ctl +39 -0
  106. data/test/parser_test.rb +224 -0
  107. data/test/performance/delimited.ctl +30 -0
  108. data/test/processor_test.rb +44 -0
  109. data/test/row_processor_test.rb +17 -0
  110. data/test/sax.ctl +26 -0
  111. data/test/scd/1.txt +1 -0
  112. data/test/scd/2.txt +1 -0
  113. data/test/scd/3.txt +1 -0
  114. data/test/scd_test.rb +257 -0
  115. data/test/scd_test_type_1.ctl +43 -0
  116. data/test/scd_test_type_2.ctl +34 -0
  117. data/test/screen_test.rb +9 -0
  118. data/test/screen_test_error.ctl +3 -0
  119. data/test/screen_test_fatal.ctl +3 -0
  120. data/test/source_test.rb +139 -0
  121. data/test/test_helper.rb +34 -0
  122. data/test/transform_test.rb +101 -0
  123. data/test/vendor/adapter_extensions-0.5.0/CHANGELOG +26 -0
  124. data/test/vendor/adapter_extensions-0.5.0/LICENSE +16 -0
  125. data/test/vendor/adapter_extensions-0.5.0/README +7 -0
  126. data/test/vendor/adapter_extensions-0.5.0/Rakefile +158 -0
  127. data/test/vendor/adapter_extensions-0.5.0/lib/adapter_extensions.rb +12 -0
  128. data/test/vendor/adapter_extensions-0.5.0/lib/adapter_extensions/connection_adapters/abstract_adapter.rb +44 -0
  129. data/test/vendor/adapter_extensions-0.5.0/lib/adapter_extensions/connection_adapters/mysql_adapter.rb +63 -0
  130. data/test/vendor/adapter_extensions-0.5.0/lib/adapter_extensions/connection_adapters/postgresql_adapter.rb +52 -0
  131. data/test/vendor/adapter_extensions-0.5.0/lib/adapter_extensions/connection_adapters/sqlserver_adapter.rb +44 -0
  132. data/test/vendor/adapter_extensions-0.5.0/lib/adapter_extensions/version.rb +10 -0
  133. data/test/xml.ctl +31 -0
  134. metadata +229 -70
  135. data/lib/etl/execution/record.rb +0 -18
@@ -0,0 +1,7 @@
1
+ pkg/*
2
+ source_data
3
+ test/output/*
4
+ rdoc
5
+ .rvmrc
6
+ .bundle
7
+ *.gem
@@ -0,0 +1,6 @@
1
+ The 0.9 revision of ActiveWarehouse ETL significantly changes how connections are maintained. This release is not backwards compatible.
2
+
3
+ To upgrade, you must do the following:
4
+
5
+ 1.) All database connections used in ETL control files must be declared in database.yml in the directory that contains your ETL control files.
6
+ 2.) All sources, destinations, transforms and processors that use a database connection must include the configuration name/value pair of :target => 'name' where name is replaced with the connection name defined in database.yml. Connection information should no longer be included in control files.
data/CHANGELOG CHANGED
@@ -1,89 +1,113 @@
1
- 0.1.0 - Dec 6, 2006
2
- * Initial release
1
+ 0.9.5 - unreleased
2
+ * Fixes for Rails 3 (gkfabs)
3
+ * Fixes for Ruby 1.9.2 (jlecour, byrnejb, thbar)
4
+ * improvements on CalculationTransform (gkfabs)
5
+ * batch can have children (gkfabs)
6
+ * read all columns by default in DelimitedSource (smeyfroi)
7
+ * Turn db config into a HashWithIndifferentAccess (smeyfroi)
8
+ * DatabaseJoinProcessor (gkfabs)
9
+ * ImapattachmentDownloaderProcessor (gkfabs)
10
+ * Pop3attachementDownloaderProcessor (gkfabs)
11
+ * Enhancements and fixes on EscapeCSVProcessor (gkfabs)
12
+ * Bug correction on hash initialization DatabaseSource (gkfabs)
13
+ * FilterRowProcessor won't return nil (gkfabs)
14
+ * DatabaseSource fixes (gkfabs)
15
+ * New UpdateDatabaseDestination (gkfabs)
16
+ * New InsertUpdateDatabaseDestination (gkfabs)
17
+ * New Excel destination (gkfabs)
18
+ * New Excel parser (gkfabs)
19
+ * Add ability to use a query in DatabaseSource (gkfabs)
20
+ * Modified SQLResolver to allow for multiple key lookups on dimension
21
+ table (cdimartino)
22
+ * Add scd_required_fields which allow to avoid specifying the scd fields when specifying :unique in a destination (darrell)
23
+ * More SCD debug logging (darrell)
24
+ * Make database destination table quoting database agnostic (darrell)
25
+ * Allow absolute paths in file destinations and file bulk imports (darrell)
26
+ * Bug fixes on engine, destination (sasikumargn, cdimartino, darrell)
27
+ * Code and tests clean-up (mainej, aeden, thbar)
28
+ * No more failure on tests (thbar, gkfabs)
29
+ * Add the ability to automatically run the tests on a matrix of configuration including Rails 2/3, Ruby 1.8.7/1.9.2, MySQL/Postgresql (thbar)
30
+ * Use bundler for gem packaging and release (thbar)
31
+
32
+ 0.9.1 - January 14, 2009
33
+ * SQLResolver now uses ETL::Engine.table so it may utilize temp tables. (aeden)
34
+ * Added Thibaut Barrère's encode processor.
35
+ * Added MockSource and MockDestination test helpers (thbar)
36
+ * Added the block processor. Can call a block once (pre/post processor)
37
+ or once for each row (after_read/before_write row processor) (thbar)
38
+ * Changed temp table to use new AdapterExtension copy_table method (aeden)
39
+ * Added bin/etl.cmd windows batch - just add the bin folder to your PATH
40
+ and it will let you call etl on an unpacked/pistoned version of AW-ETL (thbar)
41
+ * Upgraded to support Rails 2.1. No longer compatible with older versions of Rails.
42
+ * Added ETL::Builder::TimeDimensionBuilder
43
+ * Added :default option to ForeignKeyLookupTransform that will be used if no
44
+ foreign key is found.
45
+ * Added :cache option to ForeignKeyLookupTransform that will preload the FK
46
+ mappings if the underlying resolver supports it. Currently supported by
47
+ SQLResolver.
48
+ * A Class extending ETL::Transform::Transform may now be passed as a transformer.
49
+ For example, in the control file you would define the transform as:
50
+ transform :a_field, MyTransform, {:option1 => 'option1'}.
51
+ * Support Ruby 1.9 CSV library
3
52
 
4
- 0.2.0 - Dec 7, 2006
5
- * Added an XML parser for source parsing
6
- * Added support for compound key constraints in destinations via the
7
- :unique => [] option
8
- * Added ability to declare explicit columns in bulk import
9
- * Added support for generators in destinations
10
- * Added a SurrogateKeyGenerator for cases where the database doesn't support
11
- auto generated surrogate keys
53
+ 0.9.0 - August 9, 2007
54
+ * Added support for batch processing through .ebf files. These files are
55
+ essentially control files that apply settings to an entire ETL process.
56
+ * Implemented support for screen blocks. These blocks can be used to test
57
+ the data and raise an error if the screens do not pass.
58
+ * Connections are now cached in a Hash available through
59
+ ETL::Engine.connection(name). This should be used rather than including
60
+ connection information in the control files.
61
+ * Implemented temp table support throughout.
62
+ * DateDimensionBuilder now included in ActiveWarehouse ETL directly.
63
+ * Time calculations for fiscal year now included in ActiveWarehouse ETL.
12
64
 
13
- 0.3.0 - Dec 19, 2006
14
- * Added support for calculated values in virtual fields with Proc
65
+ 0.8.4 - May 24, 2007
66
+ * Added fix for backslash in file writer
15
67
 
16
- 0.4.0 - Jan 11, 2006
17
- * Added :skip_lines option to file source configurations, which can be used
18
- to skip the first n lines in the source data file
19
- * Added better error handling in delimited parser - an error is now raised
20
- if the expected and actual field lengths do not match
21
- * Added :truncate option for database destination. Set to true to truncate
22
- before importing data.
23
- * Added support for :unique => [] option and virtual fields for the database
24
- destination
68
+ 0.8.3 - May 13, 2007
69
+ * Added patches from Andy Triboletti
25
70
 
26
- 0.5.0 - Feb 17, 2007
27
- * Changed require_gem to gem and added alias to allow for older versions of
28
- rubygems.
29
- * Added support for Hash in the source configuration where :name => :parser_name
30
- defines the parser to use and :options => {} defines options to pass to the
31
- parser.
32
- * Added support for passing a custom Parser class in the source configuration.
33
- * Removed the need to include Enumerable in each parser implementation.
34
- * Added new date_to_string and string_to_date transformers.
35
- * Implemented foreign_key_lookup transform including an ActiveRecordResolver.
36
- * Added real time activity logging which is called when the etl bin script is
37
- invoked.
38
- * Improved error handling.
39
- * Default logger level is now WARN.
71
+ 0.8.2 - April 15, 2007
72
+ * Fixed bug with premature destination closing.
73
+ * Added indexes to execution records table.
74
+ * Added a PrintRowProcessor.
75
+ * Added support for conditions and "group by" in the database source.
76
+ * Added after_initialize hook in Processor base class.
77
+ * Added examples directory
40
78
 
41
- 0.5.1 - Feb 18, 2007
42
- * Fixed up truncate processor.
43
- * Updated HOW_TO_RELEASE doc.
79
+ 0.8.1 - Apr 12, 2007
80
+ * Added EnumerableSource
81
+ * Added :type configuration option to the source directive, allowing the source
82
+ type to be explicitly specified. The source type can be a string or symbol
83
+ (in which case the class will be constructed by appending Source to the type
84
+ name), a class (which will be instantiate and passed the control,
85
+ configuration and mapping) and finally an actual Source instance.
44
86
 
45
- 0.5.2 - Feb 19, 2007
46
- * Added error threshold.
47
- * Fixed problem with transform error handling.
87
+ 0.8.0 - Apr 12, 2007
88
+ * Source now available through the current row source accessor.
89
+ * Added new_rows_only configuration option to DatabaseSource. A date field must
90
+ be specified and only records that are greater than the date value in that
91
+ field, relative to the last successful
92
+ execution, will be returned from the source.
93
+ * Added an (untested) count feature which returns the number of rows for
94
+ processing.
95
+ * If no natural key is defined then an empty array will now be used, resulting
96
+ in the row being written to the output without going through change checks.
97
+ * Mapping argument in destination is now optional. An empty hash will be used
98
+ if the mapping hash is not specified. If the mapping hash is not specified
99
+ then the order will be determined using the originating source's order.
100
+ * ActiveRecord configurations loaded from database.yml by the etl tool will be
101
+ merged with ActiveRecord::Base.configurations.
102
+ * Fixed several bugs in how record change detection was implemented.
103
+ * Fixed how the read_locally functionality was implemented so that it will find
104
+ that last completed local source copy using the source's trigger file (untested).
48
105
 
49
- 0.6.0 - Mar 8, 2007
50
- * Fixed missing method problem in validate in Control class.
51
- * Removed control validation for now (source could be code in the control file).
52
- * Transform interface now defined as taking 3 arguments, the field name, field
53
- value and the row. This is not backwards compatible.
54
- * Added HierarchyLookupTransform.
55
- * Added DefaultTransform which will return a specified value if the initial
56
- value is blank.
57
- * Added row-level processing.
58
- * Added HierarchyExploderProcessor which takes a single hierarchy row and
59
- explodes it to multiple rows as used in a hierarchy bridge.
60
- * Added ApacheCombinedLogParser which parses Apache Combined Log format,
61
- including parsing of the
62
- user agent string and the URI, returning a Hash.
63
- * Fixed bug in SAX parser so that attributes are now set when the start_element
64
- event is received.
65
- * Added an HttpTools module which provides some parsing methods (for user agent
66
- and URI).
67
- * Database source now uses its own class for establishing an ActiveRecord
68
- connection.
69
- * Log files are now timestamped.
70
- * Source files are now archived automatically during the extraction process
71
- * Added a :condition option to the destination configuration Hash that accepts
72
- a Proc with a single argument passed to it (the row).
73
- * Added an :append_rows option to the destination configuration Hash that
74
- accepts either a Hash (to append a single row) or an Array of Hashes (to
75
- append multiple rows).
76
- * Only print the read and written row counts if there is at least one source
77
- and one destination respectively.
78
- * Added a depends_on directive that accepts a list of arguments of either strings
79
- or symbols. Each symbol is converted to a string and .ctl is appended;
80
- strings are passed through directly. The dependencies are executed in the order
81
- they are specified.
82
- * The default field separator in the bulk loader is now a comma (was a tab).
106
+ 0.7.2 - Apr 8, 2007
107
+ * Fixed quoting bug in CheckExistProcessor
83
108
 
84
- 0.6.1 - Mar 22, 2007
85
- * Added support for absolute paths in file sources
86
- * Added CopyFieldProcessor
109
+ 0.7.1 - Apr 8, 2007
110
+ * Fixed source caching
87
111
 
88
112
  0.7 - Apr 8, 2007
89
113
  * Job execution is now tracked in a database. This means that ActiveRecord is
@@ -118,81 +142,89 @@
118
142
  This is useful if you need to recover from a failed ETL process.
119
143
  * Updated README
120
144
 
121
- 0.7.1 - Apr 8, 2007
122
- * Fixed source caching
145
+ 0.6.1 - Mar 22, 2007
146
+ * Added support for absolute paths in file sources
147
+ * Added CopyFieldProcessor
123
148
 
124
- 0.7.2 - Apr 8, 2007
125
- * Fixed quoting bug in CheckExistProcessor
149
+ 0.6.0 - Mar 8, 2007
150
+ * Fixed missing method problem in validate in Control class.
151
+ * Removed control validation for now (source could be code in the control file).
152
+ * Transform interface now defined as taking 3 arguments, the field name, field
153
+ value and the row. This is not backwards compatible.
154
+ * Added HierarchyLookupTransform.
155
+ * Added DefaultTransform which will return a specified value if the initial
156
+ value is blank.
157
+ * Added row-level processing.
158
+ * Added HierarchyExploderProcessor which takes a single hierarchy row and
159
+ explodes it to multiple rows as used in a hierarchy bridge.
160
+ * Added ApacheCombinedLogParser which parses Apache Combined Log format,
161
+ including parsing of the
162
+ user agent string and the URI, returning a Hash.
163
+ * Fixed bug in SAX parser so that attributes are now set when the start_element
164
+ event is received.
165
+ * Added an HttpTools module which provides some parsing methods (for user agent
166
+ and URI).
167
+ * Database source now uses its own class for establishing an ActiveRecord
168
+ connection.
169
+ * Log files are now timestamped.
170
+ * Source files are now archived automatically during the extraction process
171
+ * Added a :condition option to the destination configuration Hash that accepts
172
+ a Proc with a single argument passed to it (the row).
173
+ * Added an :append_rows option to the destination configuration Hash that
174
+ accepts either a Hash (to append a single row) or an Array of Hashes (to
175
+ append multiple rows).
176
+ * Only print the read and written row counts if there is at least one source
177
+ and one destination respectively.
178
+ * Added a depends_on directive that accepts a list of arguments of either strings
179
+ or symbols. Each symbol is converted to a string and .ctl is appended;
180
+ strings are passed through directly. The dependencies are executed in the order
181
+ they are specified.
182
+ * The default field separator in the bulk loader is now a comma (was a tab).
126
183
 
127
- 0.8.0 - Apr 12, 2007
128
- * Source now available through the current row source accessor.
129
- * Added new_rows_only configuration option to DatabaseSource. A date field must
130
- be specified and only records that are greater than the date value in that
131
- field, relative to the last successful
132
- execution, will be returned from the source.
133
- * Added an (untested) count feature which returns the number of rows for
134
- processing.
135
- * If no natural key is defined then an empty array will now be used, resulting
136
- in the row being written to the output without going through change checks.
137
- * Mapping argument in destination is now optional. An empty hash will be used
138
- if the mapping hash is not specified. If the mapping hash is not specified
139
- then the order will be determined using the originating source's order.
140
- * ActiveRecord configurations loaded from database.yml by the etl tool will be
141
- merged with ActiveRecord::Base.configurations.
142
- * Fixed several bugs in how record change detection was implemented.
143
- * Fixed how the read_locally functionality was implemented so that it will find
144
- that last completed local source copy using the source's trigger file (untested).
145
-
146
- 0.8.1 - Apr 12, 2007
147
- * Added EnumerableSource
148
- * Added :type configuration option to the source directive, allowing the source
149
- type to be explicitly specified. The source type can be a string or symbol
150
- (in which case the class will be constructed by appending Source to the type
151
- name), a class (which will be instantiate and passed the control,
152
- configuration and mapping) and finally an actual Source instance.
153
-
154
- 0.8.2 - April 15, 2007
155
- * Fixed bug with premature destination closing.
156
- * Added indexes to execution records table.
157
- * Added a PrintRowProcessor.
158
- * Added support for conditions and "group by" in the database source.
159
- * Added after_initialize hook in Processor base class.
160
- * Added examples directory
184
+ 0.5.2 - Feb 19, 2007
185
+ * Added error threshold.
186
+ * Fixed problem with transform error handling.
161
187
 
162
- 0.8.3 - May 13, 2007
163
- * Added patches from Andy Triboletti
188
+ 0.5.1 - Feb 18, 2007
189
+ * Fixed up truncate processor.
190
+ * Updated HOW_TO_RELEASE doc.
164
191
 
165
- 0.8.4 - May 24, 2007
166
- * Added fix for backslash in file writer
192
+ 0.5.0 - Feb 17, 2007
193
+ * Changed require_gem to gem and added alias to allow for older versions of
194
+ rubygems.
195
+ * Added support for Hash in the source configuration where :name => :parser_name
196
+ defines the parser to use and :options => {} defines options to pass to the
197
+ parser.
198
+ * Added support for passing a custom Parser class in the source configuration.
199
+ * Removed the need to include Enumerable in each parser implementation.
200
+ * Added new date_to_string and string_to_date transformers.
201
+ * Implemented foreign_key_lookup transform including an ActiveRecordResolver.
202
+ * Added real time activity logging which is called when the etl bin script is
203
+ invoked.
204
+ * Improved error handling.
205
+ * Default logger level is now WARN.
167
206
 
168
- 0.9.0 - August 9, 2007
169
- * Added support for batch processing through .ebf files. These files are
170
- essentially control files that apply settings to an entire ETL process.
171
- * Implemented support for screen blocks. These blocks can be used to test
172
- the data and raise an error if the screens do not pass.
173
- * Connections are now cached in a Hash available through
174
- ETL::Engine.connection(name). This should be used rather than including
175
- connection information in the control files.
176
- * Implemented temp table support throughout.
177
- * DateDimensionBuilder now included in ActiveWarehouse ETL directly.
178
- * Time calculations for fiscal year now included in ActiveWarehouse ETL.
207
+ 0.4.0 - Jan 11, 2006
208
+ * Added :skip_lines option to file source configurations, which can be used
209
+ to skip the first n lines in the source data file
210
+ * Added better error handling in delimited parser - an error is now raised
211
+ if the expected and actual field lengths do not match
212
+ * Added :truncate option for database destination. Set to true to truncate
213
+ before importing data.
214
+ * Added support for :unique => [] option and virtual fields for the database
215
+ destination
179
216
 
180
- 0.9.1 -
181
- * SQLResolver now uses ETL::Engine.table so it may utilize temp tables. (aeden)
182
- * Added Thibaut Barrère's encode processor.
183
- * Added MockSource and MockDestination test helpers (thbar)
184
- * Added the block processor. Can call a block once (pre/post processor)
185
- or once for each row (after_read/before_write row processor) (thbar)
186
- * Changed temp table to use new AdapterExtension copy_table method (aeden)
187
- * Added bin/etl.cmd windows batch - just add the bin folder to your PATH
188
- and it will let you call etl on an unpacked/pistoned version of AW-ETL (thbar)
189
- * Upgraded to support Rails 2.1. No longer compatible with older versions of Rails.
190
- * Added ETL::Builder::TimeDimensionBuilder
191
- * Added :default option to ForeignKeyLookupTransform that will be used if no
192
- foreign key is found.
193
- * Added :cache option to ForeignKeyLookupTransform that will preload the FK
194
- mappings if the underlying resolver supports it. Currently supported by
195
- SQLResolver.
196
- * A Class extending ETL::Transform::Transform may now be passed as a transformer.
197
- For example, in the control file you would define the transform as:
198
- transform :a_field, MyTransform, {:option1 => 'option1'}.
217
+ 0.3.0 - Dec 19, 2006
218
+ * Added support for calculated values in virtual fields with Proc
219
+
220
+ 0.2.0 - Dec 7, 2006
221
+ * Added an XML parser for source parsing
222
+ * Added support for compound key constraints in destinations via the
223
+ :unique => [] option
224
+ * Added ability to declare explicit columns in bulk import
225
+ * Added support for generators in destinations
226
+ * Added a SurrogateKeyGenerator for cases where the database doesn't support
227
+ auto generated surrogate keys
228
+
229
+ 0.1.0 - Dec 6, 2006
230
+ * Initial release
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in ..gemspec
4
+ gemspec
@@ -0,0 +1,9 @@
1
+ * update lib/etl/version
2
+ * push your changes
3
+ * then use bundler to build + git tag + push to rubygems
4
+
5
+ rake release
6
+
7
+ * you can list changes using github:
8
+
9
+ https://github.com/activewarehouse/activewarehouse-etl/compare/release-0.9.1...master
data/README CHANGED
@@ -7,7 +7,7 @@ Ruby Extract-Transform-Load (ETL) tool.
7
7
 
8
8
  == Online Documentation
9
9
 
10
- Available at http://activewarehouse.rubyforge.org/docs/activewarehouse-etl.html
10
+ Available at https://github.com/activewarehouse/activewarehouse-etl/wiki
11
11
 
12
12
  == Features
13
13
 
@@ -77,7 +77,23 @@ Command line options:
77
77
  Control file examples can be found in the examples directory.
78
78
 
79
79
  == Running Tests
80
- The tests require Shoulda 1.x.
80
+
81
+ Current state:
82
+ - 11 failures on MySQL
83
+ - 1 failure on Postgres
84
+
85
+ The tests require:
86
+ - gem install shoulda
87
+ - gem install flexmock
88
+ - gem install pg (if you want to run the tests on pg)
89
+ - gem install spreadsheet
90
+ - gem install tmail
91
+
92
+ The tests subfolder contains examples database.yml for mysql and postgres.
93
+
94
+ To run the tests:
95
+ - rake test DB=postgresql (for postgres)
96
+ - otherwise just rake test
81
97
 
82
98
  == Feedback
83
99
  This is a work in progress. Comments should be made on the
data/Rakefile CHANGED
@@ -1,21 +1,41 @@
1
+ require 'bundler/gem_tasks'
2
+
1
3
  require 'rake'
2
4
  require 'rake/testtask'
3
- require 'rake/rdoctask'
4
- require 'rake/packagetask'
5
- require 'rake/gempackagetask'
6
- require 'rake/contrib/rubyforgepublisher'
7
-
8
- require File.join(File.dirname(__FILE__), 'lib/etl', 'version')
9
-
10
- module AWETL
11
- PKG_BUILD = ENV['PKG_BUILD'] ? '.' + ENV['PKG_BUILD'] : ''
12
- PKG_NAME = 'activewarehouse-etl'
13
- PKG_VERSION = ETL::VERSION::STRING + PKG_BUILD
14
- PKG_FILE_NAME = "#{PKG_NAME}-#{PKG_VERSION}"
15
- PKG_DESTINATION = ENV["PKG_DESTINATION"] || "../#{PKG_NAME}"
5
+ require 'rdoc'
6
+ require 'rdoc/task'
7
+
8
+ namespace :test do
9
+
10
+ def run_tests(rvm, rails, database)
11
+ database_yml = File.dirname(__FILE__) + "/test/config/database.#{database}.yml"
12
+ FileUtils.cp(database_yml, 'test/database.yml')
13
+
14
+ puts
15
+ puts "============ Ruby #{rvm} - Rails #{rails} - Db #{database} ============="
16
+ puts
17
+
18
+ sh <<-BASH
19
+ BUNDLE_GEMFILE=test/config/Gemfile.rails-#{rails} bundle install > null
20
+ BUNDLE_GEMFILE=test/config/Gemfile.rails-#{rails} rvm #{rvm} rake test
21
+ BASH
22
+ end
23
+
24
+ desc 'Run the tests in all combinations described in test-matrix.yml'
25
+ task :matrix do
26
+ # a la travis
27
+ require 'yaml'
28
+ data = YAML.load(IO.read(File.dirname(__FILE__) + '/test-matrix.yml'))
29
+ data['rvm'].each do |rvm|
30
+ data['rails'].each do |rails|
31
+ data['database'].each do |database|
32
+ run_tests(rvm, rails, database)
33
+ end
34
+ end
35
+ end
36
+ end
16
37
  end
17
38
 
18
- desc 'Default: run unit tests.'
19
39
  task :default => :test
20
40
 
21
41
  desc 'Test the ETL application.'
@@ -46,62 +66,6 @@ namespace :rcov do
46
66
  end
47
67
  end
48
68
 
49
- # Gem Spec
50
-
51
- module AWETL
52
- def self.package_files(package_prefix)
53
- FileList[
54
- "#{package_prefix}CHANGELOG",
55
- "#{package_prefix}LICENSE",
56
- "#{package_prefix}README",
57
- "#{package_prefix}TODO",
58
- "#{package_prefix}Rakefile",
59
- "#{package_prefix}bin/**/*",
60
- "#{package_prefix}doc/**/*",
61
- "#{package_prefix}lib/**/*",
62
- "#{package_prefix}examples/**/*",
63
- ] - [ "#{package_prefix}test" ]
64
- end
65
-
66
- def self.spec(package_prefix = '')
67
- Gem::Specification.new do |s|
68
- s.name = 'activewarehouse-etl'
69
- s.version = AWETL::PKG_VERSION
70
- s.summary = "Pure Ruby ETL package."
71
- s.description = <<-EOF
72
- ActiveWarehouse ETL is a pure Ruby Extract-Transform-Load application for loading data into a database.
73
- EOF
74
-
75
- s.add_dependency('rake', '>= 0.7.1')
76
- s.add_dependency('activesupport', '>= 1.3.1')
77
- s.add_dependency('activerecord', '>= 1.14.4')
78
- s.add_dependency('fastercsv', '>= 1.2.0')
79
- s.add_dependency('adapter_extensions', '>= 0.1.0')
80
-
81
- s.rdoc_options << '--exclude' << '.'
82
- s.has_rdoc = false
83
-
84
- s.files = package_files(package_prefix).to_a.delete_if {|f| f.include?('.svn')}
85
- s.require_path = 'lib'
86
-
87
- s.bindir = "#{package_prefix}bin" # Use these for applications.
88
- s.executables = ['etl']
89
- s.default_executable = "etl"
90
-
91
- s.author = "Anthony Eden"
92
- s.email = "anthonyeden@gmail.com"
93
- s.homepage = "http://activewarehouse.rubyforge.org/etl"
94
- s.rubyforge_project = "activewarehouse"
95
- end
96
- end
97
- end
98
-
99
- Rake::GemPackageTask.new(AWETL.spec) do |pkg|
100
- pkg.gem_spec = AWETL.spec
101
- pkg.need_tar = true
102
- pkg.need_zip = true
103
- end
104
-
105
69
  desc "Generate code statistics"
106
70
  task :lines do
107
71
  lines, codelines, total_lines, total_codelines = 0, 0, 0, 0
@@ -127,27 +91,7 @@ task :lines do
127
91
  puts "Total: Lines #{total_lines}, LOC #{total_codelines}"
128
92
  end
129
93
 
130
- desc "Publish the release files to RubyForge."
131
- task :release => [ :package ] do
132
- `rubyforge login`
133
-
134
- for ext in %w( gem tgz zip )
135
- release_command = "rubyforge add_release activewarehouse #{AWETL::PKG_NAME} 'REL #{AWETL::PKG_VERSION}' pkg/#{AWETL::PKG_NAME}-#{AWETL::PKG_VERSION}.#{ext}"
136
- puts release_command
137
- system(release_command)
138
- end
139
- end
140
-
141
- desc "Publish the API documentation"
94
+ desc "Publish the API documentation (UNTESTED CURRENTLY)"
142
95
  task :pdoc => [:rdoc] do
143
96
  Rake::SshDirPublisher.new("aeden@rubyforge.org", "/var/www/gforge-projects/activewarehouse/etl/rdoc", "rdoc").upload
144
97
  end
145
-
146
- desc "Reinstall the gem from a local package copy"
147
- task :reinstall => [:package] do
148
- windows = RUBY_PLATFORM =~ /mswin/
149
- sudo = windows ? '' : 'sudo'
150
- gem = windows ? 'gem.bat' : 'gem'
151
- `#{sudo} #{gem} uninstall #{AWETL::PKG_NAME} -x`
152
- `#{sudo} #{gem} install pkg/#{AWETL::PKG_NAME}-#{AWETL::PKG_VERSION}`
153
- end