activewarehouse-etl-sgonyea 0.9.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +9 -0
- data/0.9-UPGRADE +6 -0
- data/CHANGELOG +236 -0
- data/Gemfile +4 -0
- data/HOW_TO_RELEASE +13 -0
- data/LICENSE +7 -0
- data/README.textile +111 -0
- data/Rakefile +103 -0
- data/TODO +28 -0
- data/active_support_logger.patch +78 -0
- data/activewarehouse-etl.gemspec +36 -0
- data/bin/etl +28 -0
- data/bin/etl.cmd +8 -0
- data/examples/database.example.yml +16 -0
- data/lib/etl.rb +97 -0
- data/lib/etl/batch.rb +2 -0
- data/lib/etl/batch/batch.rb +111 -0
- data/lib/etl/batch/directives.rb +65 -0
- data/lib/etl/builder.rb +2 -0
- data/lib/etl/builder/date_dimension_builder.rb +96 -0
- data/lib/etl/builder/time_dimension_builder.rb +31 -0
- data/lib/etl/commands/etl.rb +89 -0
- data/lib/etl/control.rb +3 -0
- data/lib/etl/control/control.rb +405 -0
- data/lib/etl/control/destination.rb +438 -0
- data/lib/etl/control/destination/csv_destination.rb +113 -0
- data/lib/etl/control/destination/database_destination.rb +97 -0
- data/lib/etl/control/destination/excel_destination.rb +91 -0
- data/lib/etl/control/destination/file_destination.rb +126 -0
- data/lib/etl/control/destination/insert_update_database_destination.rb +136 -0
- data/lib/etl/control/destination/update_database_destination.rb +109 -0
- data/lib/etl/control/destination/yaml_destination.rb +74 -0
- data/lib/etl/control/source.rb +132 -0
- data/lib/etl/control/source/database_source.rb +224 -0
- data/lib/etl/control/source/enumerable_source.rb +11 -0
- data/lib/etl/control/source/file_source.rb +90 -0
- data/lib/etl/control/source/model_source.rb +39 -0
- data/lib/etl/core_ext.rb +1 -0
- data/lib/etl/core_ext/time.rb +5 -0
- data/lib/etl/core_ext/time/calculations.rb +42 -0
- data/lib/etl/engine.rb +582 -0
- data/lib/etl/execution.rb +19 -0
- data/lib/etl/execution/base.rb +8 -0
- data/lib/etl/execution/batch.rb +10 -0
- data/lib/etl/execution/job.rb +8 -0
- data/lib/etl/execution/migration.rb +90 -0
- data/lib/etl/generator.rb +2 -0
- data/lib/etl/generator/generator.rb +20 -0
- data/lib/etl/generator/surrogate_key_generator.rb +39 -0
- data/lib/etl/http_tools.rb +139 -0
- data/lib/etl/parser.rb +11 -0
- data/lib/etl/parser/apache_combined_log_parser.rb +49 -0
- data/lib/etl/parser/csv_parser.rb +93 -0
- data/lib/etl/parser/excel_parser.rb +112 -0
- data/lib/etl/parser/fixed_width_parser.rb +65 -0
- data/lib/etl/parser/nokogiri_xml_parser.rb +83 -0
- data/lib/etl/parser/parser.rb +41 -0
- data/lib/etl/parser/sax_parser.rb +218 -0
- data/lib/etl/parser/xml_parser.rb +65 -0
- data/lib/etl/processor.rb +11 -0
- data/lib/etl/processor/block_processor.rb +14 -0
- data/lib/etl/processor/bulk_import_processor.rb +94 -0
- data/lib/etl/processor/check_exist_processor.rb +80 -0
- data/lib/etl/processor/check_unique_processor.rb +39 -0
- data/lib/etl/processor/copy_field_processor.rb +26 -0
- data/lib/etl/processor/database_join_processor.rb +82 -0
- data/lib/etl/processor/encode_processor.rb +55 -0
- data/lib/etl/processor/ensure_fields_presence_processor.rb +24 -0
- data/lib/etl/processor/escape_csv_processor.rb +77 -0
- data/lib/etl/processor/filter_row_processor.rb +51 -0
- data/lib/etl/processor/ftp_downloader_processor.rb +68 -0
- data/lib/etl/processor/ftp_uploader_processor.rb +65 -0
- data/lib/etl/processor/hierarchy_exploder_processor.rb +55 -0
- data/lib/etl/processor/imapattachment_downloader_processor.rb +91 -0
- data/lib/etl/processor/pop3attachment_downloader_processor.rb +90 -0
- data/lib/etl/processor/print_row_processor.rb +12 -0
- data/lib/etl/processor/processor.rb +25 -0
- data/lib/etl/processor/rename_processor.rb +24 -0
- data/lib/etl/processor/require_non_blank_processor.rb +26 -0
- data/lib/etl/processor/row_processor.rb +27 -0
- data/lib/etl/processor/sequence_processor.rb +23 -0
- data/lib/etl/processor/sftp_downloader_processor.rb +63 -0
- data/lib/etl/processor/sftp_uploader_processor.rb +63 -0
- data/lib/etl/processor/surrogate_key_processor.rb +53 -0
- data/lib/etl/processor/truncate_processor.rb +40 -0
- data/lib/etl/processor/zip_file_processor.rb +27 -0
- data/lib/etl/row.rb +20 -0
- data/lib/etl/screen.rb +14 -0
- data/lib/etl/screen/row_count_screen.rb +20 -0
- data/lib/etl/transform.rb +2 -0
- data/lib/etl/transform/block_transform.rb +13 -0
- data/lib/etl/transform/calculation_transform.rb +71 -0
- data/lib/etl/transform/date_to_string_transform.rb +20 -0
- data/lib/etl/transform/decode_transform.rb +51 -0
- data/lib/etl/transform/default_transform.rb +20 -0
- data/lib/etl/transform/foreign_key_lookup_transform.rb +211 -0
- data/lib/etl/transform/hierarchy_lookup_transform.rb +49 -0
- data/lib/etl/transform/md5_transform.rb +13 -0
- data/lib/etl/transform/ordinalize_transform.rb +14 -0
- data/lib/etl/transform/sha1_transform.rb +13 -0
- data/lib/etl/transform/split_fields_transform.rb +27 -0
- data/lib/etl/transform/string_to_date_time_transform.rb +14 -0
- data/lib/etl/transform/string_to_date_transform.rb +16 -0
- data/lib/etl/transform/string_to_time_transform.rb +11 -0
- data/lib/etl/transform/transform.rb +61 -0
- data/lib/etl/transform/trim_transform.rb +26 -0
- data/lib/etl/transform/type_transform.rb +35 -0
- data/lib/etl/util.rb +59 -0
- data/lib/etl/version.rb +3 -0
- data/test-matrix.yml +10 -0
- data/test/.gitignore +1 -0
- data/test/.ignore +2 -0
- data/test/all.ebf +6 -0
- data/test/apache_combined_log.ctl +11 -0
- data/test/batch_test.rb +41 -0
- data/test/batch_with_error.ebf +6 -0
- data/test/batched1.ctl +0 -0
- data/test/batched2.ctl +0 -0
- data/test/block_processor.ctl +6 -0
- data/test/block_processor_error.ctl +1 -0
- data/test/block_processor_pre_post_process.ctl +4 -0
- data/test/block_processor_remove_rows.ctl +5 -0
- data/test/block_processor_test.rb +38 -0
- data/test/check_exist_processor_test.rb +92 -0
- data/test/check_unique_processor_test.rb +40 -0
- data/test/config/Gemfile.rails-2.3.x +3 -0
- data/test/config/Gemfile.rails-2.3.x.lock +53 -0
- data/test/config/Gemfile.rails-3.0.x +3 -0
- data/test/config/Gemfile.rails-3.0.x.lock +61 -0
- data/test/config/common.rb +29 -0
- data/test/connection/mysql/connection.rb +9 -0
- data/test/connection/mysql/schema.sql +37 -0
- data/test/connection/postgresql/connection.rb +13 -0
- data/test/connection/postgresql/schema.sql +40 -0
- data/test/control_test.rb +43 -0
- data/test/data/apache_combined_log.txt +3 -0
- data/test/data/bulk_import.txt +3 -0
- data/test/data/bulk_import_with_empties.txt +3 -0
- data/test/data/decode.txt +3 -0
- data/test/data/delimited.txt +3 -0
- data/test/data/encode_source_latin1.txt +2 -0
- data/test/data/excel.xls +0 -0
- data/test/data/excel2.xls +0 -0
- data/test/data/fixed_width.txt +3 -0
- data/test/data/multiple_delimited_1.txt +3 -0
- data/test/data/multiple_delimited_2.txt +3 -0
- data/test/data/nokogiri.xml +38 -0
- data/test/data/people.txt +3 -0
- data/test/data/sax.xml +14 -0
- data/test/data/xml.xml +16 -0
- data/test/database_join_processor_test.rb +43 -0
- data/test/date_dimension_builder_test.rb +96 -0
- data/test/delimited.ctl +30 -0
- data/test/delimited_absolute.ctl +31 -0
- data/test/delimited_destination_db.ctl +23 -0
- data/test/delimited_excel.ctl +31 -0
- data/test/delimited_insert_update.ctl +34 -0
- data/test/delimited_update.ctl +34 -0
- data/test/delimited_with_bulk_load.ctl +34 -0
- data/test/destination_test.rb +275 -0
- data/test/directive_test.rb +23 -0
- data/test/encode_processor_test.rb +32 -0
- data/test/engine_test.rb +78 -0
- data/test/ensure_fields_presence_processor_test.rb +28 -0
- data/test/errors.ctl +24 -0
- data/test/etl_test.rb +42 -0
- data/test/excel.ctl +24 -0
- data/test/excel2.ctl +25 -0
- data/test/fixed_width.ctl +35 -0
- data/test/foreign_key_lookup_transform_test.rb +50 -0
- data/test/generator_test.rb +14 -0
- data/test/inline_parser.ctl +17 -0
- data/test/mocks/mock_destination.rb +26 -0
- data/test/mocks/mock_source.rb +25 -0
- data/test/model_source.ctl +14 -0
- data/test/multiple_delimited.ctl +22 -0
- data/test/multiple_source_delimited.ctl +39 -0
- data/test/nokogiri_all.ctl +35 -0
- data/test/nokogiri_select.ctl +35 -0
- data/test/nokogiri_test.rb +35 -0
- data/test/parser_test.rb +224 -0
- data/test/performance/delimited.ctl +30 -0
- data/test/processor_test.rb +44 -0
- data/test/row_processor_test.rb +17 -0
- data/test/sax.ctl +26 -0
- data/test/scd/1.txt +1 -0
- data/test/scd/2.txt +1 -0
- data/test/scd/3.txt +1 -0
- data/test/scd_test.rb +257 -0
- data/test/scd_test_type_1.ctl +43 -0
- data/test/scd_test_type_2.ctl +34 -0
- data/test/screen_test.rb +9 -0
- data/test/screen_test_error.ctl +3 -0
- data/test/screen_test_fatal.ctl +3 -0
- data/test/source_test.rb +154 -0
- data/test/test_helper.rb +37 -0
- data/test/transform_test.rb +101 -0
- data/test/truncate_processor_test.rb +37 -0
- data/test/xml.ctl +31 -0
- metadata +370 -0
data/TODO
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
TODO
|
2
|
+
|
3
|
+
* Add build-in support for audit_dimension
|
4
|
+
* Do not rerun the processing if it isn't needed, i.e. the source and control files have not been modified (allow forced override)
|
5
|
+
* Provide greater control in error handling
|
6
|
+
** Allow a error threshold
|
7
|
+
** Don't die completely if a parse error, just stop processing that specific file if error threshold is reached
|
8
|
+
** Allow mismatch row length error in delimited parser to be ignored
|
9
|
+
* Improve error messages throughout, but especially in problems with the control files
|
10
|
+
* Add support for paritioned views during the insert process. Use specifiable columns as the trigger columns for determining the data output destination.
|
11
|
+
* Check if a temp table exists and the last job run was successful, in which case skip during the current run
|
12
|
+
* Create models for each of the tables in each of the databases defined in ETL::Engine.connections
|
13
|
+
|
14
|
+
Audit Record
|
15
|
+
|
16
|
+
Process-Level
|
17
|
+
* Start Time
|
18
|
+
* End Time
|
19
|
+
* (Duration)
|
20
|
+
* Rows Read
|
21
|
+
* Rows Written
|
22
|
+
* Rows Rejected
|
23
|
+
* Errors
|
24
|
+
* Destination
|
25
|
+
Record-Level
|
26
|
+
* Source
|
27
|
+
* Timestamp
|
28
|
+
* Transformation Log
|
@@ -0,0 +1,78 @@
|
|
1
|
+
Index: lib/active_support/clean_logger.rb
|
2
|
+
===================================================================
|
3
|
+
--- lib/active_support/clean_logger.rb (revision 5963)
|
4
|
+
+++ lib/active_support/clean_logger.rb (working copy)
|
5
|
+
@@ -1,10 +1,21 @@
|
6
|
+
require 'logger'
|
7
|
+
require File.dirname(__FILE__) + '/core_ext/class/attribute_accessors'
|
8
|
+
|
9
|
+
-class Logger #:nodoc:
|
10
|
+
+# Extensions to the built in Ruby logger.
|
11
|
+
+#
|
12
|
+
+# If you want to use the default log formatter as defined in the Ruby core, then you
|
13
|
+
+# will need to set the formatter for the logger as in:
|
14
|
+
+#
|
15
|
+
+# logger.formatter = Formatter.new
|
16
|
+
+#
|
17
|
+
+# You can then specify the datetime format, for example:
|
18
|
+
+#
|
19
|
+
+# logger.datetime_format = "%Y-%m-%d"
|
20
|
+
+class Logger
|
21
|
+
+ # Set to false to disable the silencer
|
22
|
+
cattr_accessor :silencer
|
23
|
+
self.silencer = true
|
24
|
+
-
|
25
|
+
+
|
26
|
+
# Silences the logger for the duration of the block.
|
27
|
+
def silence(temporary_level = Logger::ERROR)
|
28
|
+
if silencer
|
29
|
+
@@ -18,6 +29,35 @@
|
30
|
+
yield self
|
31
|
+
end
|
32
|
+
end
|
33
|
+
+
|
34
|
+
+ alias :old_datetime_format= :datetime_format=
|
35
|
+
+ # Logging date-time format (string passed to +strftime+). Ignored if the formatter
|
36
|
+
+ # does not respond to datetime_format=.
|
37
|
+
+ def datetime_format=(datetime_format)
|
38
|
+
+ formatter.datetime_format = datetime_format if formatter.respond_to?(:datetime_format=)
|
39
|
+
+ end
|
40
|
+
+
|
41
|
+
+ alias :old_datetime_format :datetime_format
|
42
|
+
+ # Get the logging datetime format. Returns nil if the formatter does not support
|
43
|
+
+ # datetime formatting.
|
44
|
+
+ def datetime_format
|
45
|
+
+ formatter.datetime_format if formatter.respond_to?(:datetime_format)
|
46
|
+
+ end
|
47
|
+
+
|
48
|
+
+ alias :old_formatter :formatter
|
49
|
+
+ # Get the current formatter. The default formatter is a SimpleFormatter which only
|
50
|
+
+ # displays the log message
|
51
|
+
+ def formatter
|
52
|
+
+ @formatter ||= SimpleFormatter.new
|
53
|
+
+ end
|
54
|
+
+
|
55
|
+
+ # Simple formatter which only displays the message.
|
56
|
+
+ class SimpleFormatter < Logger::Formatter
|
57
|
+
+ # This method is invoked when a log event occurs
|
58
|
+
+ def call(severity, timestamp, progname, msg)
|
59
|
+
+ "#{msg}\n"
|
60
|
+
+ end
|
61
|
+
+ end
|
62
|
+
|
63
|
+
private
|
64
|
+
alias old_format_message format_message
|
65
|
+
@@ -28,11 +68,11 @@
|
66
|
+
# with Logger from 1.8.3 and vice versa.
|
67
|
+
if method_defined?(:formatter=)
|
68
|
+
def format_message(severity, timestamp, progname, msg)
|
69
|
+
- "#{msg}\n"
|
70
|
+
+ formatter.call(severity, timestamp, progname, msg)
|
71
|
+
end
|
72
|
+
else
|
73
|
+
def format_message(severity, timestamp, msg, progname)
|
74
|
+
- "#{msg}\n"
|
75
|
+
+ formatter.call(severity, timestamp, progname, msg)
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
lib = File.expand_path('../lib/', __FILE__)
|
3
|
+
$:.unshift lib unless $:.include?(lib)
|
4
|
+
|
5
|
+
require 'etl/version'
|
6
|
+
|
7
|
+
Gem::Specification.new do |s|
|
8
|
+
s.name = %q{activewarehouse-etl-sgonyea}
|
9
|
+
s.version = '0.9.6'
|
10
|
+
s.platform = Gem::Platform::RUBY
|
11
|
+
s.authors = ["Anthony Eden", "Thibaut Barrère"]
|
12
|
+
s.email = ["thibaut.barrere@gmail.com"]
|
13
|
+
s.homepage = "https://github.com/activewarehouse/activewarehouse-etl"
|
14
|
+
s.summary = %q{Pure Ruby ETL package.}
|
15
|
+
s.description = %q{ActiveWarehouse ETL is a pure Ruby Extract-Transform-Load application for loading data into a database.}
|
16
|
+
|
17
|
+
s.required_rubygems_version = ">= 1.3.6"
|
18
|
+
|
19
|
+
s.add_runtime_dependency('rake', '>= 0.8.3')
|
20
|
+
s.add_runtime_dependency('activesupport', '>= 2.1.0')
|
21
|
+
s.add_runtime_dependency('activerecord', '>= 2.1.0')
|
22
|
+
s.add_runtime_dependency('fastercsv', '>= 1.2.0')
|
23
|
+
s.add_runtime_dependency('adapter_extensions', '>= 0.9.5.rc1')
|
24
|
+
|
25
|
+
s.add_development_dependency('shoulda', '~>2.11.3')
|
26
|
+
s.add_development_dependency('flexmock', '~>0.9.0')
|
27
|
+
s.add_development_dependency('mysql', '~>2.8.1')
|
28
|
+
s.add_development_dependency('mysql2', '~>0.3.7')
|
29
|
+
s.add_development_dependency('rdoc')
|
30
|
+
s.add_development_dependency('spreadsheet', '~>0.6.5.4')
|
31
|
+
|
32
|
+
s.files = `git ls-files`.split("\n")
|
33
|
+
s.test_files = `git ls-files -- {test}/*`.split("\n")
|
34
|
+
s.executables = %w(etl)
|
35
|
+
s.require_path = "lib"
|
36
|
+
end
|
data/bin/etl
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
#--
|
4
|
+
# Copyright (c) 2006 Anthony Eden
|
5
|
+
#
|
6
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
7
|
+
# a copy of this software and associated documentation files (the
|
8
|
+
# "Software"), to deal in the Software without restriction, including
|
9
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
10
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
11
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
12
|
+
# the following conditions:
|
13
|
+
#
|
14
|
+
# The above copyright notice and this permission notice shall be
|
15
|
+
# included in all copies or substantial portions of the Software.
|
16
|
+
#
|
17
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
18
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
19
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
20
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
21
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
22
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
23
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
24
|
+
#++
|
25
|
+
|
26
|
+
$:.unshift(File.dirname(__FILE__) + '/../lib/')
|
27
|
+
require 'etl'
|
28
|
+
require 'etl/commands/etl'
|
data/bin/etl.cmd
ADDED
@@ -0,0 +1,8 @@
|
|
1
|
+
@echo off
|
2
|
+
|
3
|
+
rem The purpose of this Windows script is to let you use the etl command line with a non-gem version of AW-ETL (eg: unpacked gem, pistoned trunk).
|
4
|
+
rem Just add the current folder on top of your PATH variable to use it instead of the etl command provided with the gem release.
|
5
|
+
|
6
|
+
rem %~dp0 returns the absolute path where the current script is. We just append 'etl' to it, and forward all the arguments with %*
|
7
|
+
|
8
|
+
ruby "%~dp0etl" %*
|
@@ -0,0 +1,16 @@
|
|
1
|
+
etl_execution:
|
2
|
+
adapter: mysql
|
3
|
+
username: root
|
4
|
+
host: localhost
|
5
|
+
database: etl_execution
|
6
|
+
encoding: utf8
|
7
|
+
datawarehouse:
|
8
|
+
adapter: mysql
|
9
|
+
username: root
|
10
|
+
host: localhost
|
11
|
+
database: datawarehouse_development
|
12
|
+
operational:
|
13
|
+
adapter: mysql
|
14
|
+
username: root
|
15
|
+
host: localhost
|
16
|
+
database: operational_production
|
data/lib/etl.rb
ADDED
@@ -0,0 +1,97 @@
|
|
1
|
+
# This source file requires all of the necessary gems and source files for ActiveWarehouse ETL. If you
|
2
|
+
# load this source file all of the other required files and gems will also be brought into the
|
3
|
+
# runtime.
|
4
|
+
|
5
|
+
#--
|
6
|
+
# Copyright (c) 2006-2007 Anthony Eden
|
7
|
+
#
|
8
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
9
|
+
# a copy of this software and associated documentation files (the
|
10
|
+
# "Software"), to deal in the Software without restriction, including
|
11
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
12
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
13
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
14
|
+
# the following conditions:
|
15
|
+
#
|
16
|
+
# The above copyright notice and this permission notice shall be
|
17
|
+
# included in all copies or substantial portions of the Software.
|
18
|
+
#
|
19
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
20
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
21
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
22
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
23
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
24
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
25
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
26
|
+
#++
|
27
|
+
|
28
|
+
require 'logger'
|
29
|
+
require 'yaml'
|
30
|
+
require 'erb'
|
31
|
+
|
32
|
+
require 'rubygems'
|
33
|
+
|
34
|
+
unless defined?(REXML::VERSION)
|
35
|
+
require 'rexml/rexml'
|
36
|
+
unless defined?(REXML::VERSION)
|
37
|
+
REXML::VERSION = REXML::Version
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
require 'active_support'
|
42
|
+
require 'active_record'
|
43
|
+
require 'adapter_extensions'
|
44
|
+
|
45
|
+
if RUBY_VERSION < '1.9'
|
46
|
+
require 'faster_csv'
|
47
|
+
CSV = FasterCSV unless defined?(CSV)
|
48
|
+
else
|
49
|
+
require 'csv'
|
50
|
+
end
|
51
|
+
|
52
|
+
# patch for https://github.com/activewarehouse/activewarehouse-etl/issues/24
|
53
|
+
# allow components to require optional gems
|
54
|
+
class Object
|
55
|
+
def optional_require(feature)
|
56
|
+
begin
|
57
|
+
require feature
|
58
|
+
rescue LoadError
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
$:.unshift(File.dirname(__FILE__))
|
64
|
+
|
65
|
+
require 'etl/core_ext'
|
66
|
+
require 'etl/util'
|
67
|
+
require 'etl/http_tools'
|
68
|
+
require 'etl/builder'
|
69
|
+
require 'etl/version'
|
70
|
+
require 'etl/engine'
|
71
|
+
require 'etl/control'
|
72
|
+
require 'etl/batch'
|
73
|
+
require 'etl/row'
|
74
|
+
require 'etl/parser'
|
75
|
+
require 'etl/transform'
|
76
|
+
require 'etl/processor'
|
77
|
+
require 'etl/generator'
|
78
|
+
require 'etl/screen'
|
79
|
+
|
80
|
+
module ETL #:nodoc:
|
81
|
+
class ETLError < StandardError #:nodoc:
|
82
|
+
end
|
83
|
+
class ControlError < ETLError #:nodoc:
|
84
|
+
end
|
85
|
+
class DefinitionError < ControlError #:nodoc:
|
86
|
+
end
|
87
|
+
class ConfigurationError < ControlError #:nodoc:
|
88
|
+
end
|
89
|
+
class MismatchError < ETLError #:nodoc:
|
90
|
+
end
|
91
|
+
class ResolverError < ETLError #:nodoc:
|
92
|
+
end
|
93
|
+
class ScreenError < ETLError #:nodoc:
|
94
|
+
end
|
95
|
+
class FatalScreenError < ScreenError #:nodoc:
|
96
|
+
end
|
97
|
+
end
|
data/lib/etl/batch.rb
ADDED
@@ -0,0 +1,111 @@
|
|
1
|
+
module ETL #:nodoc:
|
2
|
+
module Batch
|
3
|
+
class Context
|
4
|
+
attr_reader :batch
|
5
|
+
|
6
|
+
class << self
|
7
|
+
# Create a context that is used when evaluating the batch file
|
8
|
+
def create(batch)
|
9
|
+
Context.new(batch).get_binding
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
def initialize(batch)
|
14
|
+
@batch = batch
|
15
|
+
end
|
16
|
+
|
17
|
+
def file
|
18
|
+
batch.file
|
19
|
+
end
|
20
|
+
|
21
|
+
def get_binding
|
22
|
+
binding
|
23
|
+
end
|
24
|
+
|
25
|
+
def run(file)
|
26
|
+
batch.run(File.dirname(self.file) + "/" + file)
|
27
|
+
end
|
28
|
+
|
29
|
+
def use_temp_tables(value=true)
|
30
|
+
batch.use_temp_tables(value)
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
class Batch
|
35
|
+
attr_accessor :file
|
36
|
+
attr_accessor :engine
|
37
|
+
|
38
|
+
class << self
|
39
|
+
# Resolve the given object to an ETL::Control::Control instance. Acceptable arguments
|
40
|
+
# are:
|
41
|
+
# * The path to a control file as a String
|
42
|
+
# * A File object referencing the control file
|
43
|
+
# * The ETL::Control::Control object (which will just be returned)
|
44
|
+
#
|
45
|
+
# Raises a ControlError if any other type is given
|
46
|
+
def resolve(batch, engine)
|
47
|
+
batch = do_resolve(batch)
|
48
|
+
batch.engine = engine
|
49
|
+
batch
|
50
|
+
end
|
51
|
+
|
52
|
+
protected
|
53
|
+
def parse(batch_file)
|
54
|
+
batch_file = batch_file.path if batch_file.instance_of?(File)
|
55
|
+
batch = ETL::Batch::Batch.new(batch_file)
|
56
|
+
eval(IO.readlines(batch_file).join("\n"), Context.create(batch), batch_file)
|
57
|
+
batch
|
58
|
+
end
|
59
|
+
|
60
|
+
def do_resolve(batch)
|
61
|
+
case batch
|
62
|
+
when String
|
63
|
+
ETL::Batch::Batch.parse(File.new(batch))
|
64
|
+
when File
|
65
|
+
ETL::Batch::Batch.parse(batch)
|
66
|
+
when ETL::Batch::Batch
|
67
|
+
batch
|
68
|
+
else
|
69
|
+
raise RuntimeError, "Batch must be a String, File or Batch object"
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def initialize(file)
|
75
|
+
@file = file
|
76
|
+
end
|
77
|
+
|
78
|
+
def run(file)
|
79
|
+
directives << Run.new(self, file)
|
80
|
+
end
|
81
|
+
|
82
|
+
def use_temp_tables(value = true)
|
83
|
+
directives << UseTempTables.new(self)
|
84
|
+
end
|
85
|
+
|
86
|
+
def execute
|
87
|
+
engine.say "Executing batch"
|
88
|
+
before_execute
|
89
|
+
directives.each do |directive|
|
90
|
+
directive.execute
|
91
|
+
end
|
92
|
+
engine.say "Finishing batch"
|
93
|
+
after_execute
|
94
|
+
engine.say "Batch complete"
|
95
|
+
end
|
96
|
+
|
97
|
+
def directives
|
98
|
+
@directives ||= []
|
99
|
+
end
|
100
|
+
|
101
|
+
def before_execute
|
102
|
+
|
103
|
+
end
|
104
|
+
|
105
|
+
def after_execute
|
106
|
+
ETL::Engine.finish # TODO: should be moved to the directive?
|
107
|
+
ETL::Engine.use_temp_tables = false # reset the temp tables
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
module ETL #:nodoc:
|
2
|
+
module Batch #:nodoc:
|
3
|
+
# Abstract base class for directives
|
4
|
+
class Directive
|
5
|
+
# Method to access the batch object
|
6
|
+
attr_reader :batch
|
7
|
+
|
8
|
+
# Initialize the directive with the given batch object
|
9
|
+
def initialize(batch)
|
10
|
+
@batch = batch
|
11
|
+
end
|
12
|
+
|
13
|
+
# Execute the directive
|
14
|
+
def execute
|
15
|
+
do_execute
|
16
|
+
end
|
17
|
+
|
18
|
+
protected
|
19
|
+
# Implemented by subclasses
|
20
|
+
def do_execute
|
21
|
+
raise RuntimeError, "Directive must implement do_execute method"
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
# Directive indicating that the specified ETL control file should be
|
26
|
+
# run
|
27
|
+
class Run < Directive
|
28
|
+
# The file to execute
|
29
|
+
attr_reader :file
|
30
|
+
|
31
|
+
# Initialize the directive with the given batch object and file
|
32
|
+
def initialize(batch, file)
|
33
|
+
super(batch)
|
34
|
+
@file = file
|
35
|
+
end
|
36
|
+
|
37
|
+
protected
|
38
|
+
# Execute the process
|
39
|
+
def do_execute
|
40
|
+
current_batch = ETL::Engine.batch
|
41
|
+
batch.engine.process(file)
|
42
|
+
|
43
|
+
job = ETL::Engine.batch
|
44
|
+
if (job.kind_of? ETL::Execution::Batch and
|
45
|
+
current_batch[:id] != job[:id])
|
46
|
+
job[:batch_id] = current_batch[:id]
|
47
|
+
job.save!
|
48
|
+
end
|
49
|
+
|
50
|
+
ETL::Engine.batch = current_batch
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
# Directive indicating temp tables should be used.
|
55
|
+
class UseTempTables < Directive
|
56
|
+
def initialize(batch)
|
57
|
+
super(batch)
|
58
|
+
end
|
59
|
+
protected
|
60
|
+
def do_execute
|
61
|
+
ETL::Engine.use_temp_tables = true
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|