activewarehouse-etl 0.9.1 → 0.9.5.rc1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +7 -0
- data/0.9-UPGRADE +6 -0
- data/CHANGELOG +182 -150
- data/Gemfile +4 -0
- data/HOW_TO_RELEASE +9 -0
- data/README +18 -2
- data/Rakefile +35 -91
- data/active_support_logger.patch +78 -0
- data/activewarehouse-etl.gemspec +30 -0
- data/lib/etl.rb +10 -2
- data/lib/etl/batch/directives.rb +11 -1
- data/lib/etl/control/control.rb +2 -2
- data/lib/etl/control/destination.rb +27 -7
- data/lib/etl/control/destination/database_destination.rb +8 -6
- data/lib/etl/control/destination/excel_destination.rb +91 -0
- data/lib/etl/control/destination/file_destination.rb +6 -4
- data/lib/etl/control/destination/insert_update_database_destination.rb +133 -0
- data/lib/etl/control/destination/update_database_destination.rb +109 -0
- data/lib/etl/control/source.rb +3 -2
- data/lib/etl/control/source/database_source.rb +14 -10
- data/lib/etl/control/source/file_source.rb +2 -2
- data/lib/etl/engine.rb +17 -15
- data/lib/etl/execution.rb +0 -1
- data/lib/etl/execution/batch.rb +3 -1
- data/lib/etl/execution/migration.rb +5 -0
- data/lib/etl/parser/delimited_parser.rb +20 -1
- data/lib/etl/parser/excel_parser.rb +112 -0
- data/lib/etl/processor/bulk_import_processor.rb +4 -2
- data/lib/etl/processor/database_join_processor.rb +68 -0
- data/lib/etl/processor/escape_csv_processor.rb +77 -0
- data/lib/etl/processor/filter_row_processor.rb +51 -0
- data/lib/etl/processor/ftp_downloader_processor.rb +68 -0
- data/lib/etl/processor/ftp_uploader_processor.rb +65 -0
- data/lib/etl/processor/imapattachment_downloader_processor.rb +91 -0
- data/lib/etl/processor/pop3attachment_downloader_processor.rb +90 -0
- data/lib/etl/processor/sftp_downloader_processor.rb +63 -0
- data/lib/etl/processor/sftp_uploader_processor.rb +63 -0
- data/lib/etl/processor/zip_file_processor.rb +27 -0
- data/lib/etl/transform/calculation_transform.rb +71 -0
- data/lib/etl/transform/foreign_key_lookup_transform.rb +25 -7
- data/lib/etl/transform/ordinalize_transform.rb +3 -1
- data/lib/etl/transform/split_fields_transform.rb +27 -0
- data/lib/etl/version.rb +1 -7
- data/test-matrix.yml +10 -0
- data/test/.gitignore +1 -0
- data/test/.ignore +2 -0
- data/test/all.ebf +6 -0
- data/test/apache_combined_log.ctl +11 -0
- data/test/batch_test.rb +41 -0
- data/test/batch_with_error.ebf +6 -0
- data/test/batched1.ctl +0 -0
- data/test/batched2.ctl +0 -0
- data/test/block_processor.ctl +6 -0
- data/test/block_processor_error.ctl +1 -0
- data/test/block_processor_pre_post_process.ctl +4 -0
- data/test/block_processor_remove_rows.ctl +5 -0
- data/test/block_processor_test.rb +38 -0
- data/test/config/Gemfile.rails-2.3.x +3 -0
- data/test/config/Gemfile.rails-2.3.x.lock +38 -0
- data/test/config/Gemfile.rails-3.0.x +3 -0
- data/test/config/Gemfile.rails-3.0.x.lock +49 -0
- data/test/config/common.rb +21 -0
- data/test/connection/mysql/connection.rb +9 -0
- data/test/connection/mysql/schema.sql +36 -0
- data/test/connection/postgresql/connection.rb +13 -0
- data/test/connection/postgresql/schema.sql +39 -0
- data/test/control_test.rb +43 -0
- data/test/data/apache_combined_log.txt +3 -0
- data/test/data/bulk_import.txt +3 -0
- data/test/data/bulk_import_with_empties.txt +3 -0
- data/test/data/decode.txt +3 -0
- data/test/data/delimited.txt +3 -0
- data/test/data/encode_source_latin1.txt +2 -0
- data/test/data/excel.xls +0 -0
- data/test/data/excel2.xls +0 -0
- data/test/data/fixed_width.txt +3 -0
- data/test/data/multiple_delimited_1.txt +3 -0
- data/test/data/multiple_delimited_2.txt +3 -0
- data/test/data/people.txt +3 -0
- data/test/data/sax.xml +14 -0
- data/test/data/xml.xml +16 -0
- data/test/date_dimension_builder_test.rb +96 -0
- data/test/delimited.ctl +30 -0
- data/test/delimited_absolute.ctl +33 -0
- data/test/delimited_destination_db.ctl +25 -0
- data/test/delimited_excel.ctl +31 -0
- data/test/delimited_insert_update.ctl +34 -0
- data/test/delimited_update.ctl +34 -0
- data/test/delimited_with_bulk_load.ctl +34 -0
- data/test/destination_test.rb +275 -0
- data/test/directive_test.rb +23 -0
- data/test/encode_processor_test.rb +32 -0
- data/test/engine_test.rb +32 -0
- data/test/errors.ctl +24 -0
- data/test/etl_test.rb +42 -0
- data/test/excel.ctl +24 -0
- data/test/excel2.ctl +25 -0
- data/test/fixed_width.ctl +35 -0
- data/test/generator_test.rb +14 -0
- data/test/inline_parser.ctl +17 -0
- data/test/mocks/mock_destination.rb +26 -0
- data/test/mocks/mock_source.rb +25 -0
- data/test/model_source.ctl +14 -0
- data/test/multiple_delimited.ctl +22 -0
- data/test/multiple_source_delimited.ctl +39 -0
- data/test/parser_test.rb +224 -0
- data/test/performance/delimited.ctl +30 -0
- data/test/processor_test.rb +44 -0
- data/test/row_processor_test.rb +17 -0
- data/test/sax.ctl +26 -0
- data/test/scd/1.txt +1 -0
- data/test/scd/2.txt +1 -0
- data/test/scd/3.txt +1 -0
- data/test/scd_test.rb +257 -0
- data/test/scd_test_type_1.ctl +43 -0
- data/test/scd_test_type_2.ctl +34 -0
- data/test/screen_test.rb +9 -0
- data/test/screen_test_error.ctl +3 -0
- data/test/screen_test_fatal.ctl +3 -0
- data/test/source_test.rb +139 -0
- data/test/test_helper.rb +34 -0
- data/test/transform_test.rb +101 -0
- data/test/vendor/adapter_extensions-0.5.0/CHANGELOG +26 -0
- data/test/vendor/adapter_extensions-0.5.0/LICENSE +16 -0
- data/test/vendor/adapter_extensions-0.5.0/README +7 -0
- data/test/vendor/adapter_extensions-0.5.0/Rakefile +158 -0
- data/test/vendor/adapter_extensions-0.5.0/lib/adapter_extensions.rb +12 -0
- data/test/vendor/adapter_extensions-0.5.0/lib/adapter_extensions/connection_adapters/abstract_adapter.rb +44 -0
- data/test/vendor/adapter_extensions-0.5.0/lib/adapter_extensions/connection_adapters/mysql_adapter.rb +63 -0
- data/test/vendor/adapter_extensions-0.5.0/lib/adapter_extensions/connection_adapters/postgresql_adapter.rb +52 -0
- data/test/vendor/adapter_extensions-0.5.0/lib/adapter_extensions/connection_adapters/sqlserver_adapter.rb +44 -0
- data/test/vendor/adapter_extensions-0.5.0/lib/adapter_extensions/version.rb +10 -0
- data/test/xml.ctl +31 -0
- metadata +229 -70
- data/lib/etl/execution/record.rb +0 -18
data/.gitignore
ADDED
data/0.9-UPGRADE
ADDED
@@ -0,0 +1,6 @@
|
|
1
|
+
The 0.9 revision of ActiveWarehouse ETL significantly changes how connections are maintained. This release is not backwards compatible.
|
2
|
+
|
3
|
+
To upgrade, you must do the following:
|
4
|
+
|
5
|
+
1.) All database connections used in ETL control files must be declared in database.yml in the directory that contains your ETL control files.
|
6
|
+
2.) All sources, destinations, transforms and processors that use a database connection must include the configuration name/value pair of :target => 'name' where name is replaced with the connection name defined in database.yml. Connection information should no longer be included in control files.
|
data/CHANGELOG
CHANGED
@@ -1,89 +1,113 @@
|
|
1
|
-
0.
|
2
|
-
*
|
1
|
+
0.9.5 - unreleased
|
2
|
+
* Fixes for Rails 3 (gkfabs)
|
3
|
+
* Fixes for Ruby 1.9.2 (jlecour, byrnejb, thbar)
|
4
|
+
* improvements on CalculationTransform (gkfabs)
|
5
|
+
* batch can have children (gkfabs)
|
6
|
+
* read all columns by default in DelimitedSource (smeyfroi)
|
7
|
+
* Turn db config into a HashWithIndifferentAccess (smeyfroi)
|
8
|
+
* DatabaseJoinProcessor (gkfabs)
|
9
|
+
* ImapattachmentDownloaderProcessor (gkfabs)
|
10
|
+
* Pop3attachementDownloaderProcessor (gkfabs)
|
11
|
+
* Enhancements and fixes on EscapeCSVProcessor (gkfabs)
|
12
|
+
* Bug correction on hash initialization DatabaseSource (gkfabs)
|
13
|
+
* FilterRowProcessor won't return nil (gkfabs)
|
14
|
+
* DatabaseSource fixes (gkfabs)
|
15
|
+
* New UpdateDatabaseDestination (gkfabs)
|
16
|
+
* New InsertUpdateDatabaseDestination (gkfabs)
|
17
|
+
* New Excel destination (gkfabs)
|
18
|
+
* New Excel parser (gkfabs)
|
19
|
+
* Add ability to use a query in DatabaseSource (gkfabs)
|
20
|
+
* Modified SQLResolver to allow for multiple key lookups on dimension
|
21
|
+
table (cdimartino)
|
22
|
+
* Add scd_required_fields which allow to avoid specifying the scd fields when specifying :unique in a destination (darrell)
|
23
|
+
* More SCD debug logging (darrell)
|
24
|
+
* Make database destination table quoting database agnostic (darrell)
|
25
|
+
* Allow absolute paths in file destinations and file bulk imports (darrell)
|
26
|
+
* Bug fixes on engine, destination (sasikumargn, cdimartino, darrell)
|
27
|
+
* Code and tests clean-up (mainej, aeden, thbar)
|
28
|
+
* No more failure on tests (thbar, gkfabs)
|
29
|
+
* Add the ability to automatically run the tests on a matrix of configuration including Rails 2/3, Ruby 1.8.7/1.9.2, MySQL/Postgresql (thbar)
|
30
|
+
* Use bundler for gem packaging and release (thbar)
|
31
|
+
|
32
|
+
0.9.1 - January 14, 2009
|
33
|
+
* SQLResolver now uses ETL::Engine.table so it may utilize temp tables. (aeden)
|
34
|
+
* Added Thibaut Barrère's encode processor.
|
35
|
+
* Added MockSource and MockDestination test helpers (thbar)
|
36
|
+
* Added the block processor. Can call a block once (pre/post processor)
|
37
|
+
or once for each row (after_read/before_write row processor) (thbar)
|
38
|
+
* Changed temp table to use new AdapterExtension copy_table method (aeden)
|
39
|
+
* Added bin/etl.cmd windows batch - just add the bin folder to your PATH
|
40
|
+
and it will let you call etl on an unpacked/pistoned version of AW-ETL (thbar)
|
41
|
+
* Upgraded to support Rails 2.1. No longer compatible with older versions of Rails.
|
42
|
+
* Added ETL::Builder::TimeDimensionBuilder
|
43
|
+
* Added :default option to ForeignKeyLookupTransform that will be used if no
|
44
|
+
foreign key is found.
|
45
|
+
* Added :cache option to ForeignKeyLookupTransform that will preload the FK
|
46
|
+
mappings if the underlying resolver supports it. Currently supported by
|
47
|
+
SQLResolver.
|
48
|
+
* A Class extending ETL::Transform::Transform may now be passed as a transformer.
|
49
|
+
For example, in the control file you would define the transform as:
|
50
|
+
transform :a_field, MyTransform, {:option1 => 'option1'}.
|
51
|
+
* Support Ruby 1.9 CSV library
|
3
52
|
|
4
|
-
0.
|
5
|
-
* Added
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
*
|
10
|
-
|
11
|
-
|
53
|
+
0.9.0 - August 9, 2007
|
54
|
+
* Added support for batch processing through .ebf files. These files are
|
55
|
+
essentially control files that apply settings to an entire ETL process.
|
56
|
+
* Implemented support for screen blocks. These blocks can be used to test
|
57
|
+
the data and raise an error if the screens do not pass.
|
58
|
+
* Connections are now cached in a Hash available through
|
59
|
+
ETL::Engine.connection(name). This should be used rather than including
|
60
|
+
connection information in the control files.
|
61
|
+
* Implemented temp table support throughout.
|
62
|
+
* DateDimensionBuilder now included in ActiveWarehouse ETL directly.
|
63
|
+
* Time calculations for fiscal year now included in ActiveWarehouse ETL.
|
12
64
|
|
13
|
-
0.
|
14
|
-
* Added
|
65
|
+
0.8.4 - May 24, 2007
|
66
|
+
* Added fix for backslash in file writer
|
15
67
|
|
16
|
-
0.
|
17
|
-
* Added
|
18
|
-
to skip the first n lines in the source data file
|
19
|
-
* Added better error handling in delimited parser - an error is now raised
|
20
|
-
if the expected and actual field lengths do not match
|
21
|
-
* Added :truncate option for database destination. Set to true to truncate
|
22
|
-
before importing data.
|
23
|
-
* Added support for :unique => [] option and virtual fields for the database
|
24
|
-
destination
|
68
|
+
0.8.3 - May 13, 2007
|
69
|
+
* Added patches from Andy Triboletti
|
25
70
|
|
26
|
-
0.
|
27
|
-
*
|
28
|
-
|
29
|
-
* Added
|
30
|
-
|
31
|
-
|
32
|
-
* Added
|
33
|
-
* Removed the need to include Enumerable in each parser implementation.
|
34
|
-
* Added new date_to_string and string_to_date transformers.
|
35
|
-
* Implemented foreign_key_lookup transform including an ActiveRecordResolver.
|
36
|
-
* Added real time activity logging which is called when the etl bin script is
|
37
|
-
invoked.
|
38
|
-
* Improved error handling.
|
39
|
-
* Default logger level is now WARN.
|
71
|
+
0.8.2 - April 15, 2007
|
72
|
+
* Fixed bug with premature destination closing.
|
73
|
+
* Added indexes to execution records table.
|
74
|
+
* Added a PrintRowProcessor.
|
75
|
+
* Added support for conditions and "group by" in the database source.
|
76
|
+
* Added after_initialize hook in Processor base class.
|
77
|
+
* Added examples directory
|
40
78
|
|
41
|
-
0.
|
42
|
-
*
|
43
|
-
*
|
79
|
+
0.8.1 - Apr 12, 2007
|
80
|
+
* Added EnumerableSource
|
81
|
+
* Added :type configuration option to the source directive, allowing the source
|
82
|
+
type to be explicitly specified. The source type can be a string or symbol
|
83
|
+
(in which case the class will be constructed by appending Source to the type
|
84
|
+
name), a class (which will be instantiate and passed the control,
|
85
|
+
configuration and mapping) and finally an actual Source instance.
|
44
86
|
|
45
|
-
0.
|
46
|
-
*
|
47
|
-
*
|
87
|
+
0.8.0 - Apr 12, 2007
|
88
|
+
* Source now available through the current row source accessor.
|
89
|
+
* Added new_rows_only configuration option to DatabaseSource. A date field must
|
90
|
+
be specified and only records that are greater than the date value in that
|
91
|
+
field, relative to the last successful
|
92
|
+
execution, will be returned from the source.
|
93
|
+
* Added an (untested) count feature which returns the number of rows for
|
94
|
+
processing.
|
95
|
+
* If no natural key is defined then an empty array will now be used, resulting
|
96
|
+
in the row being written to the output without going through change checks.
|
97
|
+
* Mapping argument in destination is now optional. An empty hash will be used
|
98
|
+
if the mapping hash is not specified. If the mapping hash is not specified
|
99
|
+
then the order will be determined using the originating source's order.
|
100
|
+
* ActiveRecord configurations loaded from database.yml by the etl tool will be
|
101
|
+
merged with ActiveRecord::Base.configurations.
|
102
|
+
* Fixed several bugs in how record change detection was implemented.
|
103
|
+
* Fixed how the read_locally functionality was implemented so that it will find
|
104
|
+
that last completed local source copy using the source's trigger file (untested).
|
48
105
|
|
49
|
-
0.
|
50
|
-
* Fixed
|
51
|
-
* Removed control validation for now (source could be code in the control file).
|
52
|
-
* Transform interface now defined as taking 3 arguments, the field name, field
|
53
|
-
value and the row. This is not backwards compatible.
|
54
|
-
* Added HierarchyLookupTransform.
|
55
|
-
* Added DefaultTransform which will return a specified value if the initial
|
56
|
-
value is blank.
|
57
|
-
* Added row-level processing.
|
58
|
-
* Added HierarchyExploderProcessor which takes a single hierarchy row and
|
59
|
-
explodes it to multiple rows as used in a hierarchy bridge.
|
60
|
-
* Added ApacheCombinedLogParser which parses Apache Combined Log format,
|
61
|
-
including parsing of the
|
62
|
-
user agent string and the URI, returning a Hash.
|
63
|
-
* Fixed bug in SAX parser so that attributes are now set when the start_element
|
64
|
-
event is received.
|
65
|
-
* Added an HttpTools module which provides some parsing methods (for user agent
|
66
|
-
and URI).
|
67
|
-
* Database source now uses its own class for establishing an ActiveRecord
|
68
|
-
connection.
|
69
|
-
* Log files are now timestamped.
|
70
|
-
* Source files are now archived automatically during the extraction process
|
71
|
-
* Added a :condition option to the destination configuration Hash that accepts
|
72
|
-
a Proc with a single argument passed to it (the row).
|
73
|
-
* Added an :append_rows option to the destination configuration Hash that
|
74
|
-
accepts either a Hash (to append a single row) or an Array of Hashes (to
|
75
|
-
append multiple rows).
|
76
|
-
* Only print the read and written row counts if there is at least one source
|
77
|
-
and one destination respectively.
|
78
|
-
* Added a depends_on directive that accepts a list of arguments of either strings
|
79
|
-
or symbols. Each symbol is converted to a string and .ctl is appended;
|
80
|
-
strings are passed through directly. The dependencies are executed in the order
|
81
|
-
they are specified.
|
82
|
-
* The default field separator in the bulk loader is now a comma (was a tab).
|
106
|
+
0.7.2 - Apr 8, 2007
|
107
|
+
* Fixed quoting bug in CheckExistProcessor
|
83
108
|
|
84
|
-
0.
|
85
|
-
*
|
86
|
-
* Added CopyFieldProcessor
|
109
|
+
0.7.1 - Apr 8, 2007
|
110
|
+
* Fixed source caching
|
87
111
|
|
88
112
|
0.7 - Apr 8, 2007
|
89
113
|
* Job execution is now tracked in a database. This means that ActiveRecord is
|
@@ -118,81 +142,89 @@
|
|
118
142
|
This is useful if you need to recover from a failed ETL process.
|
119
143
|
* Updated README
|
120
144
|
|
121
|
-
0.
|
122
|
-
*
|
145
|
+
0.6.1 - Mar 22, 2007
|
146
|
+
* Added support for absolute paths in file sources
|
147
|
+
* Added CopyFieldProcessor
|
123
148
|
|
124
|
-
0.
|
125
|
-
* Fixed
|
149
|
+
0.6.0 - Mar 8, 2007
|
150
|
+
* Fixed missing method problem in validate in Control class.
|
151
|
+
* Removed control validation for now (source could be code in the control file).
|
152
|
+
* Transform interface now defined as taking 3 arguments, the field name, field
|
153
|
+
value and the row. This is not backwards compatible.
|
154
|
+
* Added HierarchyLookupTransform.
|
155
|
+
* Added DefaultTransform which will return a specified value if the initial
|
156
|
+
value is blank.
|
157
|
+
* Added row-level processing.
|
158
|
+
* Added HierarchyExploderProcessor which takes a single hierarchy row and
|
159
|
+
explodes it to multiple rows as used in a hierarchy bridge.
|
160
|
+
* Added ApacheCombinedLogParser which parses Apache Combined Log format,
|
161
|
+
including parsing of the
|
162
|
+
user agent string and the URI, returning a Hash.
|
163
|
+
* Fixed bug in SAX parser so that attributes are now set when the start_element
|
164
|
+
event is received.
|
165
|
+
* Added an HttpTools module which provides some parsing methods (for user agent
|
166
|
+
and URI).
|
167
|
+
* Database source now uses its own class for establishing an ActiveRecord
|
168
|
+
connection.
|
169
|
+
* Log files are now timestamped.
|
170
|
+
* Source files are now archived automatically during the extraction process
|
171
|
+
* Added a :condition option to the destination configuration Hash that accepts
|
172
|
+
a Proc with a single argument passed to it (the row).
|
173
|
+
* Added an :append_rows option to the destination configuration Hash that
|
174
|
+
accepts either a Hash (to append a single row) or an Array of Hashes (to
|
175
|
+
append multiple rows).
|
176
|
+
* Only print the read and written row counts if there is at least one source
|
177
|
+
and one destination respectively.
|
178
|
+
* Added a depends_on directive that accepts a list of arguments of either strings
|
179
|
+
or symbols. Each symbol is converted to a string and .ctl is appended;
|
180
|
+
strings are passed through directly. The dependencies are executed in the order
|
181
|
+
they are specified.
|
182
|
+
* The default field separator in the bulk loader is now a comma (was a tab).
|
126
183
|
|
127
|
-
0.
|
128
|
-
*
|
129
|
-
*
|
130
|
-
be specified and only records that are greater than the date value in that
|
131
|
-
field, relative to the last successful
|
132
|
-
execution, will be returned from the source.
|
133
|
-
* Added an (untested) count feature which returns the number of rows for
|
134
|
-
processing.
|
135
|
-
* If no natural key is defined then an empty array will now be used, resulting
|
136
|
-
in the row being written to the output without going through change checks.
|
137
|
-
* Mapping argument in destination is now optional. An empty hash will be used
|
138
|
-
if the mapping hash is not specified. If the mapping hash is not specified
|
139
|
-
then the order will be determined using the originating source's order.
|
140
|
-
* ActiveRecord configurations loaded from database.yml by the etl tool will be
|
141
|
-
merged with ActiveRecord::Base.configurations.
|
142
|
-
* Fixed several bugs in how record change detection was implemented.
|
143
|
-
* Fixed how the read_locally functionality was implemented so that it will find
|
144
|
-
that last completed local source copy using the source's trigger file (untested).
|
145
|
-
|
146
|
-
0.8.1 - Apr 12, 2007
|
147
|
-
* Added EnumerableSource
|
148
|
-
* Added :type configuration option to the source directive, allowing the source
|
149
|
-
type to be explicitly specified. The source type can be a string or symbol
|
150
|
-
(in which case the class will be constructed by appending Source to the type
|
151
|
-
name), a class (which will be instantiate and passed the control,
|
152
|
-
configuration and mapping) and finally an actual Source instance.
|
153
|
-
|
154
|
-
0.8.2 - April 15, 2007
|
155
|
-
* Fixed bug with premature destination closing.
|
156
|
-
* Added indexes to execution records table.
|
157
|
-
* Added a PrintRowProcessor.
|
158
|
-
* Added support for conditions and "group by" in the database source.
|
159
|
-
* Added after_initialize hook in Processor base class.
|
160
|
-
* Added examples directory
|
184
|
+
0.5.2 - Feb 19, 2007
|
185
|
+
* Added error threshold.
|
186
|
+
* Fixed problem with transform error handling.
|
161
187
|
|
162
|
-
0.
|
163
|
-
*
|
188
|
+
0.5.1 - Feb 18, 2007
|
189
|
+
* Fixed up truncate processor.
|
190
|
+
* Updated HOW_TO_RELEASE doc.
|
164
191
|
|
165
|
-
0.
|
166
|
-
*
|
192
|
+
0.5.0 - Feb 17, 2007
|
193
|
+
* Changed require_gem to gem and added alias to allow for older versions of
|
194
|
+
rubygems.
|
195
|
+
* Added support for Hash in the source configuration where :name => :parser_name
|
196
|
+
defines the parser to use and :options => {} defines options to pass to the
|
197
|
+
parser.
|
198
|
+
* Added support for passing a custom Parser class in the source configuration.
|
199
|
+
* Removed the need to include Enumerable in each parser implementation.
|
200
|
+
* Added new date_to_string and string_to_date transformers.
|
201
|
+
* Implemented foreign_key_lookup transform including an ActiveRecordResolver.
|
202
|
+
* Added real time activity logging which is called when the etl bin script is
|
203
|
+
invoked.
|
204
|
+
* Improved error handling.
|
205
|
+
* Default logger level is now WARN.
|
167
206
|
|
168
|
-
0.
|
169
|
-
* Added
|
170
|
-
|
171
|
-
*
|
172
|
-
the
|
173
|
-
*
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
* DateDimensionBuilder now included in ActiveWarehouse ETL directly.
|
178
|
-
* Time calculations for fiscal year now included in ActiveWarehouse ETL.
|
207
|
+
0.4.0 - Jan 11, 2006
|
208
|
+
* Added :skip_lines option to file source configurations, which can be used
|
209
|
+
to skip the first n lines in the source data file
|
210
|
+
* Added better error handling in delimited parser - an error is now raised
|
211
|
+
if the expected and actual field lengths do not match
|
212
|
+
* Added :truncate option for database destination. Set to true to truncate
|
213
|
+
before importing data.
|
214
|
+
* Added support for :unique => [] option and virtual fields for the database
|
215
|
+
destination
|
179
216
|
|
180
|
-
0.
|
181
|
-
*
|
182
|
-
|
183
|
-
|
184
|
-
* Added
|
185
|
-
|
186
|
-
|
187
|
-
* Added
|
188
|
-
|
189
|
-
*
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
*
|
194
|
-
mappings if the underlying resolver supports it. Currently supported by
|
195
|
-
SQLResolver.
|
196
|
-
* A Class extending ETL::Transform::Transform may now be passed as a transformer.
|
197
|
-
For example, in the control file you would define the transform as:
|
198
|
-
transform :a_field, MyTransform, {:option1 => 'option1'}.
|
217
|
+
0.3.0 - Dec 19, 2006
|
218
|
+
* Added support for calculated values in virtual fields with Proc
|
219
|
+
|
220
|
+
0.2.0 - Dec 7, 2006
|
221
|
+
* Added an XML parser for source parsing
|
222
|
+
* Added support for compound key constraints in destinations via the
|
223
|
+
:unique => [] option
|
224
|
+
* Added ability to declare explicit columns in bulk import
|
225
|
+
* Added support for generators in destinations
|
226
|
+
* Added a SurrogateKeyGenerator for cases where the database doesn't support
|
227
|
+
auto generated surrogate keys
|
228
|
+
|
229
|
+
0.1.0 - Dec 6, 2006
|
230
|
+
* Initial release
|
data/Gemfile
ADDED
data/HOW_TO_RELEASE
ADDED
data/README
CHANGED
@@ -7,7 +7,7 @@ Ruby Extract-Transform-Load (ETL) tool.
|
|
7
7
|
|
8
8
|
== Online Documentation
|
9
9
|
|
10
|
-
Available at
|
10
|
+
Available at https://github.com/activewarehouse/activewarehouse-etl/wiki
|
11
11
|
|
12
12
|
== Features
|
13
13
|
|
@@ -77,7 +77,23 @@ Command line options:
|
|
77
77
|
Control file examples can be found in the examples directory.
|
78
78
|
|
79
79
|
== Running Tests
|
80
|
-
|
80
|
+
|
81
|
+
Current state:
|
82
|
+
- 11 failures on MySQL
|
83
|
+
- 1 failure on Postgres
|
84
|
+
|
85
|
+
The tests require:
|
86
|
+
- gem install shoulda
|
87
|
+
- gem install flexmock
|
88
|
+
- gem install pg (if you want to run the tests on pg)
|
89
|
+
- gem install spreadsheet
|
90
|
+
- gem install tmail
|
91
|
+
|
92
|
+
The tests subfolder contains examples database.yml for mysql and postgres.
|
93
|
+
|
94
|
+
To run the tests:
|
95
|
+
- rake test DB=postgresql (for postgres)
|
96
|
+
- otherwise just rake test
|
81
97
|
|
82
98
|
== Feedback
|
83
99
|
This is a work in progress. Comments should be made on the
|
data/Rakefile
CHANGED
@@ -1,21 +1,41 @@
|
|
1
|
+
require 'bundler/gem_tasks'
|
2
|
+
|
1
3
|
require 'rake'
|
2
4
|
require 'rake/testtask'
|
3
|
-
require '
|
4
|
-
require '
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
5
|
+
require 'rdoc'
|
6
|
+
require 'rdoc/task'
|
7
|
+
|
8
|
+
namespace :test do
|
9
|
+
|
10
|
+
def run_tests(rvm, rails, database)
|
11
|
+
database_yml = File.dirname(__FILE__) + "/test/config/database.#{database}.yml"
|
12
|
+
FileUtils.cp(database_yml, 'test/database.yml')
|
13
|
+
|
14
|
+
puts
|
15
|
+
puts "============ Ruby #{rvm} - Rails #{rails} - Db #{database} ============="
|
16
|
+
puts
|
17
|
+
|
18
|
+
sh <<-BASH
|
19
|
+
BUNDLE_GEMFILE=test/config/Gemfile.rails-#{rails} bundle install > null
|
20
|
+
BUNDLE_GEMFILE=test/config/Gemfile.rails-#{rails} rvm #{rvm} rake test
|
21
|
+
BASH
|
22
|
+
end
|
23
|
+
|
24
|
+
desc 'Run the tests in all combinations described in test-matrix.yml'
|
25
|
+
task :matrix do
|
26
|
+
# a la travis
|
27
|
+
require 'yaml'
|
28
|
+
data = YAML.load(IO.read(File.dirname(__FILE__) + '/test-matrix.yml'))
|
29
|
+
data['rvm'].each do |rvm|
|
30
|
+
data['rails'].each do |rails|
|
31
|
+
data['database'].each do |database|
|
32
|
+
run_tests(rvm, rails, database)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
16
37
|
end
|
17
38
|
|
18
|
-
desc 'Default: run unit tests.'
|
19
39
|
task :default => :test
|
20
40
|
|
21
41
|
desc 'Test the ETL application.'
|
@@ -46,62 +66,6 @@ namespace :rcov do
|
|
46
66
|
end
|
47
67
|
end
|
48
68
|
|
49
|
-
# Gem Spec
|
50
|
-
|
51
|
-
module AWETL
|
52
|
-
def self.package_files(package_prefix)
|
53
|
-
FileList[
|
54
|
-
"#{package_prefix}CHANGELOG",
|
55
|
-
"#{package_prefix}LICENSE",
|
56
|
-
"#{package_prefix}README",
|
57
|
-
"#{package_prefix}TODO",
|
58
|
-
"#{package_prefix}Rakefile",
|
59
|
-
"#{package_prefix}bin/**/*",
|
60
|
-
"#{package_prefix}doc/**/*",
|
61
|
-
"#{package_prefix}lib/**/*",
|
62
|
-
"#{package_prefix}examples/**/*",
|
63
|
-
] - [ "#{package_prefix}test" ]
|
64
|
-
end
|
65
|
-
|
66
|
-
def self.spec(package_prefix = '')
|
67
|
-
Gem::Specification.new do |s|
|
68
|
-
s.name = 'activewarehouse-etl'
|
69
|
-
s.version = AWETL::PKG_VERSION
|
70
|
-
s.summary = "Pure Ruby ETL package."
|
71
|
-
s.description = <<-EOF
|
72
|
-
ActiveWarehouse ETL is a pure Ruby Extract-Transform-Load application for loading data into a database.
|
73
|
-
EOF
|
74
|
-
|
75
|
-
s.add_dependency('rake', '>= 0.7.1')
|
76
|
-
s.add_dependency('activesupport', '>= 1.3.1')
|
77
|
-
s.add_dependency('activerecord', '>= 1.14.4')
|
78
|
-
s.add_dependency('fastercsv', '>= 1.2.0')
|
79
|
-
s.add_dependency('adapter_extensions', '>= 0.1.0')
|
80
|
-
|
81
|
-
s.rdoc_options << '--exclude' << '.'
|
82
|
-
s.has_rdoc = false
|
83
|
-
|
84
|
-
s.files = package_files(package_prefix).to_a.delete_if {|f| f.include?('.svn')}
|
85
|
-
s.require_path = 'lib'
|
86
|
-
|
87
|
-
s.bindir = "#{package_prefix}bin" # Use these for applications.
|
88
|
-
s.executables = ['etl']
|
89
|
-
s.default_executable = "etl"
|
90
|
-
|
91
|
-
s.author = "Anthony Eden"
|
92
|
-
s.email = "anthonyeden@gmail.com"
|
93
|
-
s.homepage = "http://activewarehouse.rubyforge.org/etl"
|
94
|
-
s.rubyforge_project = "activewarehouse"
|
95
|
-
end
|
96
|
-
end
|
97
|
-
end
|
98
|
-
|
99
|
-
Rake::GemPackageTask.new(AWETL.spec) do |pkg|
|
100
|
-
pkg.gem_spec = AWETL.spec
|
101
|
-
pkg.need_tar = true
|
102
|
-
pkg.need_zip = true
|
103
|
-
end
|
104
|
-
|
105
69
|
desc "Generate code statistics"
|
106
70
|
task :lines do
|
107
71
|
lines, codelines, total_lines, total_codelines = 0, 0, 0, 0
|
@@ -127,27 +91,7 @@ task :lines do
|
|
127
91
|
puts "Total: Lines #{total_lines}, LOC #{total_codelines}"
|
128
92
|
end
|
129
93
|
|
130
|
-
desc "Publish the
|
131
|
-
task :release => [ :package ] do
|
132
|
-
`rubyforge login`
|
133
|
-
|
134
|
-
for ext in %w( gem tgz zip )
|
135
|
-
release_command = "rubyforge add_release activewarehouse #{AWETL::PKG_NAME} 'REL #{AWETL::PKG_VERSION}' pkg/#{AWETL::PKG_NAME}-#{AWETL::PKG_VERSION}.#{ext}"
|
136
|
-
puts release_command
|
137
|
-
system(release_command)
|
138
|
-
end
|
139
|
-
end
|
140
|
-
|
141
|
-
desc "Publish the API documentation"
|
94
|
+
desc "Publish the API documentation (UNTESTED CURRENTLY)"
|
142
95
|
task :pdoc => [:rdoc] do
|
143
96
|
Rake::SshDirPublisher.new("aeden@rubyforge.org", "/var/www/gforge-projects/activewarehouse/etl/rdoc", "rdoc").upload
|
144
97
|
end
|
145
|
-
|
146
|
-
desc "Reinstall the gem from a local package copy"
|
147
|
-
task :reinstall => [:package] do
|
148
|
-
windows = RUBY_PLATFORM =~ /mswin/
|
149
|
-
sudo = windows ? '' : 'sudo'
|
150
|
-
gem = windows ? 'gem.bat' : 'gem'
|
151
|
-
`#{sudo} #{gem} uninstall #{AWETL::PKG_NAME} -x`
|
152
|
-
`#{sudo} #{gem} install pkg/#{AWETL::PKG_NAME}-#{AWETL::PKG_VERSION}`
|
153
|
-
end
|