activewarehouse-etl 0.9.1 → 0.9.5.rc1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +7 -0
- data/0.9-UPGRADE +6 -0
- data/CHANGELOG +182 -150
- data/Gemfile +4 -0
- data/HOW_TO_RELEASE +9 -0
- data/README +18 -2
- data/Rakefile +35 -91
- data/active_support_logger.patch +78 -0
- data/activewarehouse-etl.gemspec +30 -0
- data/lib/etl.rb +10 -2
- data/lib/etl/batch/directives.rb +11 -1
- data/lib/etl/control/control.rb +2 -2
- data/lib/etl/control/destination.rb +27 -7
- data/lib/etl/control/destination/database_destination.rb +8 -6
- data/lib/etl/control/destination/excel_destination.rb +91 -0
- data/lib/etl/control/destination/file_destination.rb +6 -4
- data/lib/etl/control/destination/insert_update_database_destination.rb +133 -0
- data/lib/etl/control/destination/update_database_destination.rb +109 -0
- data/lib/etl/control/source.rb +3 -2
- data/lib/etl/control/source/database_source.rb +14 -10
- data/lib/etl/control/source/file_source.rb +2 -2
- data/lib/etl/engine.rb +17 -15
- data/lib/etl/execution.rb +0 -1
- data/lib/etl/execution/batch.rb +3 -1
- data/lib/etl/execution/migration.rb +5 -0
- data/lib/etl/parser/delimited_parser.rb +20 -1
- data/lib/etl/parser/excel_parser.rb +112 -0
- data/lib/etl/processor/bulk_import_processor.rb +4 -2
- data/lib/etl/processor/database_join_processor.rb +68 -0
- data/lib/etl/processor/escape_csv_processor.rb +77 -0
- data/lib/etl/processor/filter_row_processor.rb +51 -0
- data/lib/etl/processor/ftp_downloader_processor.rb +68 -0
- data/lib/etl/processor/ftp_uploader_processor.rb +65 -0
- data/lib/etl/processor/imapattachment_downloader_processor.rb +91 -0
- data/lib/etl/processor/pop3attachment_downloader_processor.rb +90 -0
- data/lib/etl/processor/sftp_downloader_processor.rb +63 -0
- data/lib/etl/processor/sftp_uploader_processor.rb +63 -0
- data/lib/etl/processor/zip_file_processor.rb +27 -0
- data/lib/etl/transform/calculation_transform.rb +71 -0
- data/lib/etl/transform/foreign_key_lookup_transform.rb +25 -7
- data/lib/etl/transform/ordinalize_transform.rb +3 -1
- data/lib/etl/transform/split_fields_transform.rb +27 -0
- data/lib/etl/version.rb +1 -7
- data/test-matrix.yml +10 -0
- data/test/.gitignore +1 -0
- data/test/.ignore +2 -0
- data/test/all.ebf +6 -0
- data/test/apache_combined_log.ctl +11 -0
- data/test/batch_test.rb +41 -0
- data/test/batch_with_error.ebf +6 -0
- data/test/batched1.ctl +0 -0
- data/test/batched2.ctl +0 -0
- data/test/block_processor.ctl +6 -0
- data/test/block_processor_error.ctl +1 -0
- data/test/block_processor_pre_post_process.ctl +4 -0
- data/test/block_processor_remove_rows.ctl +5 -0
- data/test/block_processor_test.rb +38 -0
- data/test/config/Gemfile.rails-2.3.x +3 -0
- data/test/config/Gemfile.rails-2.3.x.lock +38 -0
- data/test/config/Gemfile.rails-3.0.x +3 -0
- data/test/config/Gemfile.rails-3.0.x.lock +49 -0
- data/test/config/common.rb +21 -0
- data/test/connection/mysql/connection.rb +9 -0
- data/test/connection/mysql/schema.sql +36 -0
- data/test/connection/postgresql/connection.rb +13 -0
- data/test/connection/postgresql/schema.sql +39 -0
- data/test/control_test.rb +43 -0
- data/test/data/apache_combined_log.txt +3 -0
- data/test/data/bulk_import.txt +3 -0
- data/test/data/bulk_import_with_empties.txt +3 -0
- data/test/data/decode.txt +3 -0
- data/test/data/delimited.txt +3 -0
- data/test/data/encode_source_latin1.txt +2 -0
- data/test/data/excel.xls +0 -0
- data/test/data/excel2.xls +0 -0
- data/test/data/fixed_width.txt +3 -0
- data/test/data/multiple_delimited_1.txt +3 -0
- data/test/data/multiple_delimited_2.txt +3 -0
- data/test/data/people.txt +3 -0
- data/test/data/sax.xml +14 -0
- data/test/data/xml.xml +16 -0
- data/test/date_dimension_builder_test.rb +96 -0
- data/test/delimited.ctl +30 -0
- data/test/delimited_absolute.ctl +33 -0
- data/test/delimited_destination_db.ctl +25 -0
- data/test/delimited_excel.ctl +31 -0
- data/test/delimited_insert_update.ctl +34 -0
- data/test/delimited_update.ctl +34 -0
- data/test/delimited_with_bulk_load.ctl +34 -0
- data/test/destination_test.rb +275 -0
- data/test/directive_test.rb +23 -0
- data/test/encode_processor_test.rb +32 -0
- data/test/engine_test.rb +32 -0
- data/test/errors.ctl +24 -0
- data/test/etl_test.rb +42 -0
- data/test/excel.ctl +24 -0
- data/test/excel2.ctl +25 -0
- data/test/fixed_width.ctl +35 -0
- data/test/generator_test.rb +14 -0
- data/test/inline_parser.ctl +17 -0
- data/test/mocks/mock_destination.rb +26 -0
- data/test/mocks/mock_source.rb +25 -0
- data/test/model_source.ctl +14 -0
- data/test/multiple_delimited.ctl +22 -0
- data/test/multiple_source_delimited.ctl +39 -0
- data/test/parser_test.rb +224 -0
- data/test/performance/delimited.ctl +30 -0
- data/test/processor_test.rb +44 -0
- data/test/row_processor_test.rb +17 -0
- data/test/sax.ctl +26 -0
- data/test/scd/1.txt +1 -0
- data/test/scd/2.txt +1 -0
- data/test/scd/3.txt +1 -0
- data/test/scd_test.rb +257 -0
- data/test/scd_test_type_1.ctl +43 -0
- data/test/scd_test_type_2.ctl +34 -0
- data/test/screen_test.rb +9 -0
- data/test/screen_test_error.ctl +3 -0
- data/test/screen_test_fatal.ctl +3 -0
- data/test/source_test.rb +139 -0
- data/test/test_helper.rb +34 -0
- data/test/transform_test.rb +101 -0
- data/test/vendor/adapter_extensions-0.5.0/CHANGELOG +26 -0
- data/test/vendor/adapter_extensions-0.5.0/LICENSE +16 -0
- data/test/vendor/adapter_extensions-0.5.0/README +7 -0
- data/test/vendor/adapter_extensions-0.5.0/Rakefile +158 -0
- data/test/vendor/adapter_extensions-0.5.0/lib/adapter_extensions.rb +12 -0
- data/test/vendor/adapter_extensions-0.5.0/lib/adapter_extensions/connection_adapters/abstract_adapter.rb +44 -0
- data/test/vendor/adapter_extensions-0.5.0/lib/adapter_extensions/connection_adapters/mysql_adapter.rb +63 -0
- data/test/vendor/adapter_extensions-0.5.0/lib/adapter_extensions/connection_adapters/postgresql_adapter.rb +52 -0
- data/test/vendor/adapter_extensions-0.5.0/lib/adapter_extensions/connection_adapters/sqlserver_adapter.rb +44 -0
- data/test/vendor/adapter_extensions-0.5.0/lib/adapter_extensions/version.rb +10 -0
- data/test/xml.ctl +31 -0
- metadata +229 -70
- data/lib/etl/execution/record.rb +0 -18
@@ -0,0 +1,12 @@
|
|
1
|
+
# Extensions to the Rails ActiveRecord adapters.
|
2
|
+
#
|
3
|
+
# Requiring this file will require all of the necessary files to function.
|
4
|
+
|
5
|
+
puts "Using AdapterExtensions"
|
6
|
+
|
7
|
+
require 'rubygems'
|
8
|
+
require 'active_support'
|
9
|
+
require 'active_record'
|
10
|
+
|
11
|
+
$:.unshift(File.dirname(__FILE__))
|
12
|
+
Dir[File.dirname(__FILE__) + "/adapter_extensions/**/*.rb"].each { |file| require(file) }
|
@@ -0,0 +1,44 @@
|
|
1
|
+
# This source file contains extensions to the abstract adapter.
|
2
|
+
module ActiveRecord #:nodoc:
|
3
|
+
module ConnectionAdapters #:nodoc:
|
4
|
+
# Extensions to the AbstractAdapter. In some cases a default implementation
|
5
|
+
# is provided, in others it is adapter-dependent and the method will
|
6
|
+
# raise a NotImplementedError if the adapter does not implement that method
|
7
|
+
class AbstractAdapter
|
8
|
+
# Truncate the specified table
|
9
|
+
def truncate(table_name)
|
10
|
+
execute("TRUNCATE TABLE #{table_name}")
|
11
|
+
end
|
12
|
+
|
13
|
+
# Bulk loading interface. Load the data from the specified file into the
|
14
|
+
# given table. Note that options will be adapter-dependent.
|
15
|
+
def bulk_load(file, table_name, options={})
|
16
|
+
raise ArgumentError, "#{file} does not exist" unless File.exist?(file)
|
17
|
+
raise ArgumentError, "#{table_name} does not exist" unless tables.include?(table_name)
|
18
|
+
do_bulk_load(file, table_name, options)
|
19
|
+
end
|
20
|
+
|
21
|
+
# SQL select into statement constructs a new table from the results
|
22
|
+
# of a select. It is used to select data from a table and create a new
|
23
|
+
# table with its result set at the same time. Note that this method
|
24
|
+
# name does not necessarily match the implementation. E.g. MySQL's
|
25
|
+
# version of this is 'CREATE TABLE ... AS SELECT ...'
|
26
|
+
def support_select_into_table?
|
27
|
+
false
|
28
|
+
end
|
29
|
+
|
30
|
+
# Add a chunk of SQL to the given query that will create a new table and
|
31
|
+
# execute the select into that table.
|
32
|
+
def add_select_into_table(new_table_name, sql_query)
|
33
|
+
raise NotImplementedError, "add_select_into_table is an abstract method"
|
34
|
+
end
|
35
|
+
|
36
|
+
protected
|
37
|
+
|
38
|
+
# for subclasses to implement
|
39
|
+
def do_bulk_load(file, table_name, options={})
|
40
|
+
raise NotImplementedError, "do_bulk_load is an abstract method"
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
# Source code for the MysqlAdapter extensions.
|
2
|
+
module ActiveRecord #:nodoc:
|
3
|
+
module ConnectionAdapters #:nodoc:
|
4
|
+
# Adds new functionality to ActiveRecord MysqlAdapter.
|
5
|
+
class MysqlAdapter < AbstractAdapter
|
6
|
+
|
7
|
+
def support_select_into_table?
|
8
|
+
true
|
9
|
+
end
|
10
|
+
|
11
|
+
# Inserts an INTO table_name clause to the sql_query.
|
12
|
+
def add_select_into_table(new_table_name, sql_query)
|
13
|
+
"CREATE TABLE #{new_table_name} " + sql_query
|
14
|
+
end
|
15
|
+
|
16
|
+
# Copy the specified table.
|
17
|
+
def copy_table(old_table_name, new_table_name)
|
18
|
+
transaction do
|
19
|
+
execute "CREATE TABLE #{new_table_name} LIKE #{old_table_name}"
|
20
|
+
execute "INSERT INTO #{new_table_name} SELECT * FROM #{old_table_name}"
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
protected
|
25
|
+
# Call +bulk_load+, as that method wraps this method.
|
26
|
+
#
|
27
|
+
# Bulk load the data in the specified file. This implementation always uses the LOCAL keyword
|
28
|
+
# so the file must be found locally, not on the remote server, to be loaded.
|
29
|
+
#
|
30
|
+
# Options:
|
31
|
+
# * <tt>:ignore</tt> -- Ignore the specified number of lines from the source file
|
32
|
+
# * <tt>:columns</tt> -- Array of column names defining the source file column order
|
33
|
+
# * <tt>:fields</tt> -- Hash of options for fields:
|
34
|
+
# * <tt>:delimited_by</tt> -- The field delimiter
|
35
|
+
# * <tt>:enclosed_by</tt> -- The field enclosure
|
36
|
+
def do_bulk_load(file, table_name, options={})
|
37
|
+
return if File.size(file) == 0
|
38
|
+
|
39
|
+
# an unfortunate hack - setting the bulk load option after the connection has been
|
40
|
+
# established does not seem to have any effect, and since the connection is made when
|
41
|
+
# active-record is loaded, there's no chance for us to sneak it in earlier. So we
|
42
|
+
# disconnect, set the option, then reconnect - fortunately, this only needs to happen once.
|
43
|
+
unless @bulk_load_enabled
|
44
|
+
disconnect!
|
45
|
+
@connection.options(Mysql::OPT_LOCAL_INFILE, true)
|
46
|
+
connect
|
47
|
+
@bulk_load_enabled = true
|
48
|
+
end
|
49
|
+
|
50
|
+
q = "LOAD DATA LOCAL INFILE '#{file}' INTO TABLE #{table_name}"
|
51
|
+
if options[:fields]
|
52
|
+
q << " FIELDS"
|
53
|
+
q << " TERMINATED BY '#{options[:fields][:delimited_by]}'" if options[:fields][:delimited_by]
|
54
|
+
q << " ENCLOSED BY '#{options[:fields][:enclosed_by]}'" if options[:fields][:enclosed_by]
|
55
|
+
end
|
56
|
+
q << " IGNORE #{options[:ignore]} LINES" if options[:ignore]
|
57
|
+
q << " (#{options[:columns].join(',')})" if options[:columns]
|
58
|
+
execute(q)
|
59
|
+
end
|
60
|
+
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
# Source code for the PostgreSQLAdapter extensions.
|
2
|
+
module ActiveRecord #:nodoc:
|
3
|
+
module ConnectionAdapters #:nodoc:
|
4
|
+
# Adds new functionality to ActiveRecord PostgreSQLAdapter.
|
5
|
+
class PostgreSQLAdapter < AbstractAdapter
|
6
|
+
def support_select_into_table?
|
7
|
+
true
|
8
|
+
end
|
9
|
+
|
10
|
+
# Inserts an INTO table_name clause to the sql_query.
|
11
|
+
def add_select_into_table(new_table_name, sql_query)
|
12
|
+
sql_query.sub(/FROM/i, "INTO #{new_table_name} FROM")
|
13
|
+
end
|
14
|
+
|
15
|
+
# Copy the specified table.
|
16
|
+
def copy_table(old_table_name, new_table_name)
|
17
|
+
execute add_select_into_table(new_table_name, "SELECT * FROM #{old_table_name}")
|
18
|
+
end
|
19
|
+
|
20
|
+
protected
|
21
|
+
# Call +bulk_load+, as that method wraps this method.
|
22
|
+
#
|
23
|
+
# Bulk load the data in the specified file.
|
24
|
+
#
|
25
|
+
# Options:
|
26
|
+
# * <tt>:ignore</tt> -- Ignore the specified number of lines from the source file. In the case of PostgreSQL
|
27
|
+
# only the first line will be ignored from the source file regardless of the number of lines specified.
|
28
|
+
# * <tt>:columns</tt> -- Array of column names defining the source file column order
|
29
|
+
# * <tt>:fields</tt> -- Hash of options for fields:
|
30
|
+
# * <tt>:delimited_by</tt> -- The field delimiter
|
31
|
+
# * <tt>:null_string</tt> -- The string that should be interpreted as NULL (in addition to \N)
|
32
|
+
# * <tt>:enclosed_by</tt> -- The field enclosure
|
33
|
+
def do_bulk_load(file, table_name, options={})
|
34
|
+
q = "COPY #{table_name} "
|
35
|
+
q << "(#{options[:columns].join(',')}) " if options[:columns]
|
36
|
+
q << "FROM '#{File.expand_path(file)}' "
|
37
|
+
if options[:fields]
|
38
|
+
q << "WITH "
|
39
|
+
q << "DELIMITER '#{options[:fields][:delimited_by]}' " if options[:fields][:delimited_by]
|
40
|
+
q << "NULL '#{options[:fields][:null_string]}'" if options[:fields][:null_string]
|
41
|
+
if options[:fields][:enclosed_by] || options[:ignore] && options[:ignore] > 0
|
42
|
+
q << "CSV "
|
43
|
+
q << "HEADER " if options[:ignore] && options[:ignore] > 0
|
44
|
+
q << "QUOTE '#{options[:fields][:enclosed_by]}' " if options[:fields][:enclosed_by]
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
execute(q)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
# Source code for the SQLServerAdapter extensions.
|
2
|
+
module ActiveRecord #:nodoc:
|
3
|
+
module ConnectionAdapters #:nodoc:
|
4
|
+
# Adds new functionality to ActiveRecord SQLServerAdapter.
|
5
|
+
class SQLServerAdapter < AbstractAdapter
|
6
|
+
def support_select_into_table?
|
7
|
+
true
|
8
|
+
end
|
9
|
+
|
10
|
+
# Inserts an INTO table_name clause to the sql_query.
|
11
|
+
def add_select_into_table(new_table_name, sql_query)
|
12
|
+
sql_query.sub(/FROM/i, "INTO #{new_table_name} FROM")
|
13
|
+
end
|
14
|
+
|
15
|
+
# Copy the specified table.
|
16
|
+
def copy_table(old_table_name, new_table_name)
|
17
|
+
execute add_select_into_table(new_table_name, "SELECT * FROM #{old_table_name}")
|
18
|
+
end
|
19
|
+
|
20
|
+
protected
|
21
|
+
# Call +bulk_load+, as that method wraps this method.
|
22
|
+
#
|
23
|
+
# Bulk load the data in the specified file. This implementation relies
|
24
|
+
# on bcp being in your PATH.
|
25
|
+
#
|
26
|
+
# Options:
|
27
|
+
# * <tt>:ignore</tt> -- Ignore the specified number of lines from the source file
|
28
|
+
# * <tt>:columns</tt> -- Array of column names defining the source file column order
|
29
|
+
# * <tt>:fields</tt> -- Hash of options for fields:
|
30
|
+
# * <tt>:delimited_by</tt> -- The field delimiter
|
31
|
+
# * <tt>:enclosed_by</tt> -- The field enclosure
|
32
|
+
def do_bulk_load(file, table_name, options={})
|
33
|
+
env_name = options[:env] || RAILS_ENV
|
34
|
+
config = ActiveRecord::Base.configurations[env_name]
|
35
|
+
puts "Loading table \"#{table_name}\" from file \"#{filename}\""
|
36
|
+
cmd = "bcp \"#{config['database']}.dbo.#{table_name}\" in " +
|
37
|
+
"\"#{filename}\" -S \"#{config['host']}\" -c " +
|
38
|
+
"-t \"#{options[:delimited_by]}\" -b10000 -a8192 -q -E -U \"#{config['username']}\" " +
|
39
|
+
"-P \"#{config['password']}\" -e \"#{filename}.in.errors\""
|
40
|
+
`#{cmd}`
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
data/test/xml.ctl
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
# puts "executing fixed_width.ctl"
|
2
|
+
|
3
|
+
source :in, {
|
4
|
+
:file => 'data/xml.xml',
|
5
|
+
:parser => :xml
|
6
|
+
},
|
7
|
+
{
|
8
|
+
:collection => 'people/person',
|
9
|
+
:fields => [
|
10
|
+
:first_name,
|
11
|
+
:last_name,
|
12
|
+
{
|
13
|
+
:name => :ssn,
|
14
|
+
:xpath => 'social_security_number'
|
15
|
+
},
|
16
|
+
{
|
17
|
+
:name => :age,
|
18
|
+
:type => :integer
|
19
|
+
}
|
20
|
+
]
|
21
|
+
}
|
22
|
+
|
23
|
+
destination :out, {
|
24
|
+
:file => 'output/xml.txt'
|
25
|
+
},
|
26
|
+
{
|
27
|
+
:order => [:first_name, :last_name, :ssn]
|
28
|
+
}
|
29
|
+
|
30
|
+
transform :ssn, :sha1
|
31
|
+
transform(:ssn){ |v| v[0,24] }
|
metadata
CHANGED
@@ -1,69 +1,122 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: activewarehouse-etl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
|
4
|
+
hash: 15424175
|
5
|
+
prerelease: 6
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 9
|
9
|
+
- 5
|
10
|
+
- rc
|
11
|
+
- 1
|
12
|
+
version: 0.9.5.rc1
|
5
13
|
platform: ruby
|
6
14
|
authors:
|
7
15
|
- Anthony Eden
|
16
|
+
- "Thibaut Barr\xC3\xA8re"
|
8
17
|
autorequire:
|
9
18
|
bindir: bin
|
10
19
|
cert_chain: []
|
11
20
|
|
12
|
-
date:
|
13
|
-
default_executable: etl
|
21
|
+
date: 2011-06-10 00:00:00 Z
|
14
22
|
dependencies:
|
15
23
|
- !ruby/object:Gem::Dependency
|
16
24
|
name: rake
|
17
|
-
|
18
|
-
|
19
|
-
|
25
|
+
prerelease: false
|
26
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
27
|
+
none: false
|
20
28
|
requirements:
|
21
29
|
- - ">="
|
22
30
|
- !ruby/object:Gem::Version
|
23
|
-
|
24
|
-
|
31
|
+
hash: 57
|
32
|
+
segments:
|
33
|
+
- 0
|
34
|
+
- 8
|
35
|
+
- 3
|
36
|
+
version: 0.8.3
|
37
|
+
type: :runtime
|
38
|
+
version_requirements: *id001
|
25
39
|
- !ruby/object:Gem::Dependency
|
26
40
|
name: activesupport
|
27
|
-
|
28
|
-
|
29
|
-
|
41
|
+
prerelease: false
|
42
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
43
|
+
none: false
|
30
44
|
requirements:
|
31
45
|
- - ">="
|
32
46
|
- !ruby/object:Gem::Version
|
33
|
-
|
34
|
-
|
47
|
+
hash: 11
|
48
|
+
segments:
|
49
|
+
- 2
|
50
|
+
- 1
|
51
|
+
- 0
|
52
|
+
version: 2.1.0
|
53
|
+
type: :runtime
|
54
|
+
version_requirements: *id002
|
35
55
|
- !ruby/object:Gem::Dependency
|
36
56
|
name: activerecord
|
37
|
-
|
38
|
-
|
39
|
-
|
57
|
+
prerelease: false
|
58
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
59
|
+
none: false
|
40
60
|
requirements:
|
41
61
|
- - ">="
|
42
62
|
- !ruby/object:Gem::Version
|
43
|
-
|
44
|
-
|
63
|
+
hash: 11
|
64
|
+
segments:
|
65
|
+
- 2
|
66
|
+
- 1
|
67
|
+
- 0
|
68
|
+
version: 2.1.0
|
69
|
+
type: :runtime
|
70
|
+
version_requirements: *id003
|
45
71
|
- !ruby/object:Gem::Dependency
|
46
72
|
name: fastercsv
|
47
|
-
|
48
|
-
|
49
|
-
|
73
|
+
prerelease: false
|
74
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
75
|
+
none: false
|
50
76
|
requirements:
|
51
77
|
- - ">="
|
52
78
|
- !ruby/object:Gem::Version
|
79
|
+
hash: 31
|
80
|
+
segments:
|
81
|
+
- 1
|
82
|
+
- 2
|
83
|
+
- 0
|
53
84
|
version: 1.2.0
|
54
|
-
|
85
|
+
type: :runtime
|
86
|
+
version_requirements: *id004
|
55
87
|
- !ruby/object:Gem::Dependency
|
56
88
|
name: adapter_extensions
|
89
|
+
prerelease: false
|
90
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
91
|
+
none: false
|
92
|
+
requirements:
|
93
|
+
- - ">="
|
94
|
+
- !ruby/object:Gem::Version
|
95
|
+
hash: 11
|
96
|
+
segments:
|
97
|
+
- 0
|
98
|
+
- 5
|
99
|
+
- 0
|
100
|
+
version: 0.5.0
|
57
101
|
type: :runtime
|
58
|
-
|
59
|
-
|
102
|
+
version_requirements: *id005
|
103
|
+
- !ruby/object:Gem::Dependency
|
104
|
+
name: spreadsheet
|
105
|
+
prerelease: false
|
106
|
+
requirement: &id006 !ruby/object:Gem::Requirement
|
107
|
+
none: false
|
60
108
|
requirements:
|
61
109
|
- - ">="
|
62
110
|
- !ruby/object:Gem::Version
|
63
|
-
|
64
|
-
|
111
|
+
hash: 3
|
112
|
+
segments:
|
113
|
+
- 0
|
114
|
+
version: "0"
|
115
|
+
type: :runtime
|
116
|
+
version_requirements: *id006
|
65
117
|
description: ActiveWarehouse ETL is a pure Ruby Extract-Transform-Load application for loading data into a database.
|
66
|
-
email:
|
118
|
+
email:
|
119
|
+
- thibaut.barrere@gmail.com
|
67
120
|
executables:
|
68
121
|
- etl
|
69
122
|
extensions: []
|
@@ -71,86 +124,94 @@ extensions: []
|
|
71
124
|
extra_rdoc_files: []
|
72
125
|
|
73
126
|
files:
|
127
|
+
- .gitignore
|
128
|
+
- 0.9-UPGRADE
|
74
129
|
- CHANGELOG
|
130
|
+
- Gemfile
|
131
|
+
- HOW_TO_RELEASE
|
75
132
|
- LICENSE
|
76
133
|
- README
|
77
|
-
- TODO
|
78
134
|
- Rakefile
|
135
|
+
- TODO
|
136
|
+
- active_support_logger.patch
|
137
|
+
- activewarehouse-etl.gemspec
|
79
138
|
- bin/etl
|
80
139
|
- bin/etl.cmd
|
81
|
-
-
|
82
|
-
- lib/etl
|
140
|
+
- examples/database.example.yml
|
141
|
+
- lib/etl.rb
|
142
|
+
- lib/etl/batch.rb
|
83
143
|
- lib/etl/batch/batch.rb
|
84
144
|
- lib/etl/batch/directives.rb
|
85
|
-
- lib/etl/
|
86
|
-
- lib/etl/builder
|
145
|
+
- lib/etl/builder.rb
|
87
146
|
- lib/etl/builder/date_dimension_builder.rb
|
88
147
|
- lib/etl/builder/time_dimension_builder.rb
|
89
|
-
- lib/etl/builder.rb
|
90
|
-
- lib/etl/commands
|
91
148
|
- lib/etl/commands/etl.rb
|
92
|
-
- lib/etl/control
|
149
|
+
- lib/etl/control.rb
|
93
150
|
- lib/etl/control/control.rb
|
94
|
-
- lib/etl/control/destination
|
151
|
+
- lib/etl/control/destination.rb
|
95
152
|
- lib/etl/control/destination/database_destination.rb
|
153
|
+
- lib/etl/control/destination/excel_destination.rb
|
96
154
|
- lib/etl/control/destination/file_destination.rb
|
97
|
-
- lib/etl/control/destination.rb
|
98
|
-
- lib/etl/control/
|
155
|
+
- lib/etl/control/destination/insert_update_database_destination.rb
|
156
|
+
- lib/etl/control/destination/update_database_destination.rb
|
157
|
+
- lib/etl/control/source.rb
|
99
158
|
- lib/etl/control/source/database_source.rb
|
100
159
|
- lib/etl/control/source/enumerable_source.rb
|
101
160
|
- lib/etl/control/source/file_source.rb
|
102
161
|
- lib/etl/control/source/model_source.rb
|
103
|
-
- lib/etl/control/source.rb
|
104
|
-
- lib/etl/control.rb
|
105
|
-
- lib/etl/core_ext
|
106
|
-
- lib/etl/core_ext/time
|
107
|
-
- lib/etl/core_ext/time/calculations.rb
|
108
|
-
- lib/etl/core_ext/time.rb
|
109
162
|
- lib/etl/core_ext.rb
|
163
|
+
- lib/etl/core_ext/time.rb
|
164
|
+
- lib/etl/core_ext/time/calculations.rb
|
110
165
|
- lib/etl/engine.rb
|
111
|
-
- lib/etl/execution
|
166
|
+
- lib/etl/execution.rb
|
112
167
|
- lib/etl/execution/base.rb
|
113
168
|
- lib/etl/execution/batch.rb
|
114
169
|
- lib/etl/execution/job.rb
|
115
170
|
- lib/etl/execution/migration.rb
|
116
|
-
- lib/etl/
|
117
|
-
- lib/etl/execution.rb
|
118
|
-
- lib/etl/generator
|
171
|
+
- lib/etl/generator.rb
|
119
172
|
- lib/etl/generator/generator.rb
|
120
173
|
- lib/etl/generator/surrogate_key_generator.rb
|
121
|
-
- lib/etl/generator.rb
|
122
174
|
- lib/etl/http_tools.rb
|
123
|
-
- lib/etl/parser
|
175
|
+
- lib/etl/parser.rb
|
124
176
|
- lib/etl/parser/apache_combined_log_parser.rb
|
125
177
|
- lib/etl/parser/delimited_parser.rb
|
178
|
+
- lib/etl/parser/excel_parser.rb
|
126
179
|
- lib/etl/parser/fixed_width_parser.rb
|
127
180
|
- lib/etl/parser/parser.rb
|
128
181
|
- lib/etl/parser/sax_parser.rb
|
129
182
|
- lib/etl/parser/xml_parser.rb
|
130
|
-
- lib/etl/
|
131
|
-
- lib/etl/processor
|
183
|
+
- lib/etl/processor.rb
|
132
184
|
- lib/etl/processor/block_processor.rb
|
133
185
|
- lib/etl/processor/bulk_import_processor.rb
|
134
186
|
- lib/etl/processor/check_exist_processor.rb
|
135
187
|
- lib/etl/processor/check_unique_processor.rb
|
136
188
|
- lib/etl/processor/copy_field_processor.rb
|
189
|
+
- lib/etl/processor/database_join_processor.rb
|
137
190
|
- lib/etl/processor/encode_processor.rb
|
191
|
+
- lib/etl/processor/escape_csv_processor.rb
|
192
|
+
- lib/etl/processor/filter_row_processor.rb
|
193
|
+
- lib/etl/processor/ftp_downloader_processor.rb
|
194
|
+
- lib/etl/processor/ftp_uploader_processor.rb
|
138
195
|
- lib/etl/processor/hierarchy_exploder_processor.rb
|
196
|
+
- lib/etl/processor/imapattachment_downloader_processor.rb
|
197
|
+
- lib/etl/processor/pop3attachment_downloader_processor.rb
|
139
198
|
- lib/etl/processor/print_row_processor.rb
|
140
199
|
- lib/etl/processor/processor.rb
|
141
200
|
- lib/etl/processor/rename_processor.rb
|
142
201
|
- lib/etl/processor/require_non_blank_processor.rb
|
143
202
|
- lib/etl/processor/row_processor.rb
|
144
203
|
- lib/etl/processor/sequence_processor.rb
|
204
|
+
- lib/etl/processor/sftp_downloader_processor.rb
|
205
|
+
- lib/etl/processor/sftp_uploader_processor.rb
|
145
206
|
- lib/etl/processor/surrogate_key_processor.rb
|
146
207
|
- lib/etl/processor/truncate_processor.rb
|
147
|
-
- lib/etl/processor.rb
|
208
|
+
- lib/etl/processor/zip_file_processor.rb
|
148
209
|
- lib/etl/row.rb
|
149
|
-
- lib/etl/screen
|
150
|
-
- lib/etl/screen/row_count_screen.rb
|
151
210
|
- lib/etl/screen.rb
|
152
|
-
- lib/etl/
|
211
|
+
- lib/etl/screen/row_count_screen.rb
|
212
|
+
- lib/etl/transform.rb
|
153
213
|
- lib/etl/transform/block_transform.rb
|
214
|
+
- lib/etl/transform/calculation_transform.rb
|
154
215
|
- lib/etl/transform/date_to_string_transform.rb
|
155
216
|
- lib/etl/transform/decode_transform.rb
|
156
217
|
- lib/etl/transform/default_transform.rb
|
@@ -158,43 +219,141 @@ files:
|
|
158
219
|
- lib/etl/transform/hierarchy_lookup_transform.rb
|
159
220
|
- lib/etl/transform/ordinalize_transform.rb
|
160
221
|
- lib/etl/transform/sha1_transform.rb
|
222
|
+
- lib/etl/transform/split_fields_transform.rb
|
161
223
|
- lib/etl/transform/string_to_date_transform.rb
|
162
224
|
- lib/etl/transform/string_to_datetime_transform.rb
|
163
225
|
- lib/etl/transform/string_to_time_transform.rb
|
164
226
|
- lib/etl/transform/transform.rb
|
165
227
|
- lib/etl/transform/trim_transform.rb
|
166
228
|
- lib/etl/transform/type_transform.rb
|
167
|
-
- lib/etl/transform.rb
|
168
229
|
- lib/etl/util.rb
|
169
230
|
- lib/etl/version.rb
|
170
|
-
-
|
171
|
-
-
|
172
|
-
|
173
|
-
|
231
|
+
- test-matrix.yml
|
232
|
+
- test/.gitignore
|
233
|
+
- test/.ignore
|
234
|
+
- test/all.ebf
|
235
|
+
- test/apache_combined_log.ctl
|
236
|
+
- test/batch_test.rb
|
237
|
+
- test/batch_with_error.ebf
|
238
|
+
- test/batched1.ctl
|
239
|
+
- test/batched2.ctl
|
240
|
+
- test/block_processor.ctl
|
241
|
+
- test/block_processor_error.ctl
|
242
|
+
- test/block_processor_pre_post_process.ctl
|
243
|
+
- test/block_processor_remove_rows.ctl
|
244
|
+
- test/block_processor_test.rb
|
245
|
+
- test/config/Gemfile.rails-2.3.x
|
246
|
+
- test/config/Gemfile.rails-2.3.x.lock
|
247
|
+
- test/config/Gemfile.rails-3.0.x
|
248
|
+
- test/config/Gemfile.rails-3.0.x.lock
|
249
|
+
- test/config/common.rb
|
250
|
+
- test/config/database.example.yml
|
251
|
+
- test/connection/mysql/connection.rb
|
252
|
+
- test/connection/mysql/schema.sql
|
253
|
+
- test/connection/postgresql/connection.rb
|
254
|
+
- test/connection/postgresql/schema.sql
|
255
|
+
- test/control_test.rb
|
256
|
+
- test/data/apache_combined_log.txt
|
257
|
+
- test/data/bulk_import.txt
|
258
|
+
- test/data/bulk_import_with_empties.txt
|
259
|
+
- test/data/decode.txt
|
260
|
+
- test/data/delimited.txt
|
261
|
+
- test/data/encode_source_latin1.txt
|
262
|
+
- test/data/excel.xls
|
263
|
+
- test/data/excel2.xls
|
264
|
+
- test/data/fixed_width.txt
|
265
|
+
- test/data/multiple_delimited_1.txt
|
266
|
+
- test/data/multiple_delimited_2.txt
|
267
|
+
- test/data/people.txt
|
268
|
+
- test/data/sax.xml
|
269
|
+
- test/data/xml.xml
|
270
|
+
- test/date_dimension_builder_test.rb
|
271
|
+
- test/delimited.ctl
|
272
|
+
- test/delimited_absolute.ctl
|
273
|
+
- test/delimited_destination_db.ctl
|
274
|
+
- test/delimited_excel.ctl
|
275
|
+
- test/delimited_insert_update.ctl
|
276
|
+
- test/delimited_update.ctl
|
277
|
+
- test/delimited_with_bulk_load.ctl
|
278
|
+
- test/destination_test.rb
|
279
|
+
- test/directive_test.rb
|
280
|
+
- test/encode_processor_test.rb
|
281
|
+
- test/engine_test.rb
|
282
|
+
- test/errors.ctl
|
283
|
+
- test/etl_test.rb
|
284
|
+
- test/excel.ctl
|
285
|
+
- test/excel2.ctl
|
286
|
+
- test/fixed_width.ctl
|
287
|
+
- test/generator_test.rb
|
288
|
+
- test/inline_parser.ctl
|
289
|
+
- test/mocks/mock_destination.rb
|
290
|
+
- test/mocks/mock_source.rb
|
291
|
+
- test/model_source.ctl
|
292
|
+
- test/multiple_delimited.ctl
|
293
|
+
- test/multiple_source_delimited.ctl
|
294
|
+
- test/output/.ignore
|
295
|
+
- test/parser_test.rb
|
296
|
+
- test/performance/delimited.ctl
|
297
|
+
- test/processor_test.rb
|
298
|
+
- test/row_processor_test.rb
|
299
|
+
- test/sax.ctl
|
300
|
+
- test/scd/1.txt
|
301
|
+
- test/scd/2.txt
|
302
|
+
- test/scd/3.txt
|
303
|
+
- test/scd_test.rb
|
304
|
+
- test/scd_test_type_1.ctl
|
305
|
+
- test/scd_test_type_2.ctl
|
306
|
+
- test/screen_test.rb
|
307
|
+
- test/screen_test_error.ctl
|
308
|
+
- test/screen_test_fatal.ctl
|
309
|
+
- test/source_test.rb
|
310
|
+
- test/test_helper.rb
|
311
|
+
- test/transform_test.rb
|
312
|
+
- test/vendor/adapter_extensions-0.5.0/CHANGELOG
|
313
|
+
- test/vendor/adapter_extensions-0.5.0/LICENSE
|
314
|
+
- test/vendor/adapter_extensions-0.5.0/README
|
315
|
+
- test/vendor/adapter_extensions-0.5.0/Rakefile
|
316
|
+
- test/vendor/adapter_extensions-0.5.0/lib/adapter_extensions.rb
|
317
|
+
- test/vendor/adapter_extensions-0.5.0/lib/adapter_extensions/connection_adapters/abstract_adapter.rb
|
318
|
+
- test/vendor/adapter_extensions-0.5.0/lib/adapter_extensions/connection_adapters/mysql_adapter.rb
|
319
|
+
- test/vendor/adapter_extensions-0.5.0/lib/adapter_extensions/connection_adapters/postgresql_adapter.rb
|
320
|
+
- test/vendor/adapter_extensions-0.5.0/lib/adapter_extensions/connection_adapters/sqlserver_adapter.rb
|
321
|
+
- test/vendor/adapter_extensions-0.5.0/lib/adapter_extensions/version.rb
|
322
|
+
- test/xml.ctl
|
323
|
+
homepage: https://github.com/activewarehouse/activewarehouse-etl
|
324
|
+
licenses: []
|
325
|
+
|
174
326
|
post_install_message:
|
175
|
-
rdoc_options:
|
176
|
-
|
177
|
-
- .
|
327
|
+
rdoc_options: []
|
328
|
+
|
178
329
|
require_paths:
|
179
330
|
- lib
|
180
331
|
required_ruby_version: !ruby/object:Gem::Requirement
|
332
|
+
none: false
|
181
333
|
requirements:
|
182
334
|
- - ">="
|
183
335
|
- !ruby/object:Gem::Version
|
336
|
+
hash: 3
|
337
|
+
segments:
|
338
|
+
- 0
|
184
339
|
version: "0"
|
185
|
-
version:
|
186
340
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
341
|
+
none: false
|
187
342
|
requirements:
|
188
343
|
- - ">="
|
189
344
|
- !ruby/object:Gem::Version
|
190
|
-
|
191
|
-
|
345
|
+
hash: 23
|
346
|
+
segments:
|
347
|
+
- 1
|
348
|
+
- 3
|
349
|
+
- 6
|
350
|
+
version: 1.3.6
|
192
351
|
requirements: []
|
193
352
|
|
194
|
-
rubyforge_project:
|
195
|
-
rubygems_version: 1.
|
353
|
+
rubyforge_project:
|
354
|
+
rubygems_version: 1.8.5
|
196
355
|
signing_key:
|
197
|
-
specification_version:
|
356
|
+
specification_version: 3
|
198
357
|
summary: Pure Ruby ETL package.
|
199
358
|
test_files: []
|
200
359
|
|