activewarehouse-etl 0.5.2 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +41 -13
- data/README +1 -1
- data/Rakefile +14 -4
- data/TODO +17 -1
- data/bin/etl +3 -1
- data/lib/etl.rb +11 -7
- data/lib/etl/commands/etl.rb +0 -1
- data/lib/etl/control/control.rb +113 -36
- data/lib/etl/control/destination.rb +13 -1
- data/lib/etl/control/destination/database_destination.rb +3 -1
- data/lib/etl/control/destination/file_destination.rb +5 -2
- data/lib/etl/control/source.rb +36 -0
- data/lib/etl/control/source/database_source.rb +63 -8
- data/lib/etl/control/source/file_source.rb +25 -4
- data/lib/etl/engine.rb +128 -14
- data/lib/etl/generator/surrogate_key_generator.rb +1 -0
- data/lib/etl/http_tools.rb +119 -0
- data/lib/etl/parser/apache_combined_log_parser.rb +47 -0
- data/lib/etl/parser/sax_parser.rb +18 -6
- data/lib/etl/processor.rb +1 -0
- data/lib/etl/processor/bulk_import_processor.rb +12 -0
- data/lib/etl/processor/hierarchy_exploder_processor.rb +54 -0
- data/lib/etl/processor/processor.rb +1 -5
- data/lib/etl/processor/row_processor.rb +17 -0
- data/lib/etl/transform/date_to_string_transform.rb +1 -1
- data/lib/etl/transform/decode_transform.rb +1 -1
- data/lib/etl/transform/default_transform.rb +15 -0
- data/lib/etl/transform/foreign_key_lookup_transform.rb +1 -1
- data/lib/etl/transform/hierarchy_lookup_transform.rb +56 -0
- data/lib/etl/transform/sha1_transform.rb +1 -1
- data/lib/etl/transform/string_to_date_transform.rb +3 -3
- data/lib/etl/transform/string_to_datetime_transform.rb +17 -0
- data/lib/etl/transform/string_to_time_transform.rb +14 -0
- data/lib/etl/transform/transform.rb +8 -4
- data/lib/etl/transform/type_transform.rb +2 -2
- data/lib/etl/version.rb +2 -2
- metadata +21 -8
- data/lib/etl/active_record_ext.rb +0 -1
- data/lib/etl/active_record_ext/connection_adapters/mysql_adapter.rb +0 -34
@@ -5,9 +5,9 @@ module ETL #:nodoc:
|
|
5
5
|
def initialize(control, configuration={})
|
6
6
|
super
|
7
7
|
end
|
8
|
-
# Transform the value using
|
9
|
-
def transform(value)
|
10
|
-
|
8
|
+
# Transform the value using Date.parse
|
9
|
+
def transform(name, value, row)
|
10
|
+
Date.parse(value)
|
11
11
|
end
|
12
12
|
end
|
13
13
|
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module ETL #:nodoc:
|
2
|
+
module Transform #:nodoc:
|
3
|
+
# Transform a String representation of a date to a DateTime instance
|
4
|
+
class StringToDateTimeTransform < ETL::Transform::Transform
|
5
|
+
def initialize(control, configuration={})
|
6
|
+
super
|
7
|
+
end
|
8
|
+
# Transform the value using DateTime.parse.
|
9
|
+
#
|
10
|
+
# WARNING: This transform is slow (due to the Ruby implementation), but if you need to
|
11
|
+
# parse timestamps before or after the values supported by the Time.parse.
|
12
|
+
def transform(name, value, row)
|
13
|
+
DateTime.parse(value)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
module ETL #:nodoc:
|
2
|
+
module Transform #:nodoc:
|
3
|
+
# Transform a String representation of a date to a Time instance
|
4
|
+
class StringToTimeTransform < ETL::Transform::Transform
|
5
|
+
def initialize(control, configuration={})
|
6
|
+
super
|
7
|
+
end
|
8
|
+
# Transform the value using Time.parse
|
9
|
+
def transform(name, value, row)
|
10
|
+
Time.parse(value)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
@@ -20,14 +20,14 @@ module ETL#:nodoc:
|
|
20
20
|
# Transform the specified value using the given transforms. The transforms can either be
|
21
21
|
# Proc objects or objects which extend from Transform and implement the method <tt>transform(value)</tt>.
|
22
22
|
# Any other object will result in a ControlError being raised.
|
23
|
-
def transform(name, value, transforms)
|
24
|
-
# logger.debug "Transforming field #{name}" if transforms.length > 0
|
23
|
+
def transform(name, value, row, transforms)
|
25
24
|
transforms.each do |transform|
|
25
|
+
Engine.logger.debug "Transforming field #{name} with #{transform.inspect}"
|
26
26
|
case transform
|
27
27
|
when Proc
|
28
|
-
value = transform.call(value)
|
28
|
+
value = transform.call([name, value, row])
|
29
29
|
when Transform
|
30
|
-
value = transform.transform(value)
|
30
|
+
value = transform.transform(name, value, row)
|
31
31
|
else
|
32
32
|
raise ControlError, "Unsupported transform configuration type: #{transform}"
|
33
33
|
end
|
@@ -43,6 +43,10 @@ module ETL#:nodoc:
|
|
43
43
|
@control = control
|
44
44
|
@configuration = configuration
|
45
45
|
end
|
46
|
+
|
47
|
+
def transform(name, value, row)
|
48
|
+
raise "transform is an abstract method"
|
49
|
+
end
|
46
50
|
end
|
47
51
|
end
|
48
52
|
end
|
data/lib/etl/version.rb
CHANGED
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.0.10
|
|
3
3
|
specification_version: 1
|
4
4
|
name: activewarehouse-etl
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.
|
7
|
-
date: 2007-
|
6
|
+
version: 0.6.0
|
7
|
+
date: 2007-03-08 00:00:00 -05:00
|
8
8
|
summary: Pure Ruby ETL package.
|
9
9
|
require_paths:
|
10
10
|
- lib
|
@@ -36,14 +36,13 @@ files:
|
|
36
36
|
- bin/etl
|
37
37
|
- lib/etl
|
38
38
|
- lib/etl.rb
|
39
|
-
- lib/etl/active_record_ext
|
40
|
-
- lib/etl/active_record_ext.rb
|
41
39
|
- lib/etl/commands
|
42
40
|
- lib/etl/control
|
43
41
|
- lib/etl/control.rb
|
44
42
|
- lib/etl/engine.rb
|
45
43
|
- lib/etl/generator
|
46
44
|
- lib/etl/generator.rb
|
45
|
+
- lib/etl/http_tools.rb
|
47
46
|
- lib/etl/parser
|
48
47
|
- lib/etl/parser.rb
|
49
48
|
- lib/etl/processor
|
@@ -51,8 +50,6 @@ files:
|
|
51
50
|
- lib/etl/transform
|
52
51
|
- lib/etl/transform.rb
|
53
52
|
- lib/etl/version.rb
|
54
|
-
- lib/etl/active_record_ext/connection_adapters
|
55
|
-
- lib/etl/active_record_ext/connection_adapters/mysql_adapter.rb
|
56
53
|
- lib/etl/commands/etl.rb
|
57
54
|
- lib/etl/control/control.rb
|
58
55
|
- lib/etl/control/destination
|
@@ -65,19 +62,26 @@ files:
|
|
65
62
|
- lib/etl/control/source/file_source.rb
|
66
63
|
- lib/etl/generator/generator.rb
|
67
64
|
- lib/etl/generator/surrogate_key_generator.rb
|
65
|
+
- lib/etl/parser/apache_combined_log_parser.rb
|
68
66
|
- lib/etl/parser/delimited_parser.rb
|
69
67
|
- lib/etl/parser/fixed_width_parser.rb
|
70
68
|
- lib/etl/parser/parser.rb
|
71
69
|
- lib/etl/parser/sax_parser.rb
|
72
70
|
- lib/etl/parser/xml_parser.rb
|
73
71
|
- lib/etl/processor/bulk_import_processor.rb
|
72
|
+
- lib/etl/processor/hierarchy_exploder_processor.rb
|
74
73
|
- lib/etl/processor/processor.rb
|
74
|
+
- lib/etl/processor/row_processor.rb
|
75
75
|
- lib/etl/processor/truncate_processor.rb
|
76
76
|
- lib/etl/transform/date_to_string_transform.rb
|
77
77
|
- lib/etl/transform/decode_transform.rb
|
78
|
+
- lib/etl/transform/default_transform.rb
|
78
79
|
- lib/etl/transform/foreign_key_lookup_transform.rb
|
80
|
+
- lib/etl/transform/hierarchy_lookup_transform.rb
|
79
81
|
- lib/etl/transform/sha1_transform.rb
|
80
82
|
- lib/etl/transform/string_to_date_transform.rb
|
83
|
+
- lib/etl/transform/string_to_datetime_transform.rb
|
84
|
+
- lib/etl/transform/string_to_time_transform.rb
|
81
85
|
- lib/etl/transform/transform.rb
|
82
86
|
- lib/etl/transform/type_transform.rb
|
83
87
|
test_files: []
|
@@ -110,7 +114,7 @@ dependencies:
|
|
110
114
|
requirements:
|
111
115
|
- - ">="
|
112
116
|
- !ruby/object:Gem::Version
|
113
|
-
version: 1.3.1
|
117
|
+
version: 1.3.1
|
114
118
|
version:
|
115
119
|
- !ruby/object:Gem::Dependency
|
116
120
|
name: activerecord
|
@@ -119,7 +123,7 @@ dependencies:
|
|
119
123
|
requirements:
|
120
124
|
- - ">="
|
121
125
|
- !ruby/object:Gem::Version
|
122
|
-
version: 1.14.4
|
126
|
+
version: 1.14.4
|
123
127
|
version:
|
124
128
|
- !ruby/object:Gem::Dependency
|
125
129
|
name: fastercsv
|
@@ -130,3 +134,12 @@ dependencies:
|
|
130
134
|
- !ruby/object:Gem::Version
|
131
135
|
version: 1.0.0
|
132
136
|
version:
|
137
|
+
- !ruby/object:Gem::Dependency
|
138
|
+
name: adapter_extensions
|
139
|
+
version_requirement:
|
140
|
+
version_requirements: !ruby/object:Gem::Version::Requirement
|
141
|
+
requirements:
|
142
|
+
- - ">="
|
143
|
+
- !ruby/object:Gem::Version
|
144
|
+
version: 0.1.0
|
145
|
+
version:
|
@@ -1 +0,0 @@
|
|
1
|
-
require 'etl/active_record_ext/connection_adapters/mysql_adapter'
|
@@ -1,34 +0,0 @@
|
|
1
|
-
require 'active_record/connection_adapters/abstract_adapter'
|
2
|
-
|
3
|
-
module ActiveRecord #:nodoc:
|
4
|
-
module ConnectionAdapters #:nodoc:
|
5
|
-
class MysqlAdapter < AbstractAdapter
|
6
|
-
# Execute a truncate statement on the table. Note that in MySQL a truncate will *NOT* reset
|
7
|
-
# the auto_increment
|
8
|
-
def truncate(table_name)
|
9
|
-
execute("TRUNCATE #{table_name}")
|
10
|
-
end
|
11
|
-
|
12
|
-
# Bulk load the data in the specified file. This implementation always uses the LOCAL keyword
|
13
|
-
# so the file must be found locally, not on the remote server, to be loaded.
|
14
|
-
#
|
15
|
-
# Options:
|
16
|
-
# * <tt>:ignore</tt> -- Ignore the specified number of lines from the source file
|
17
|
-
# * <tt>:columns</tt> -- Array of column names defining the source file column order
|
18
|
-
# * <tt>:fields</tt> -- Hash of options for fields:
|
19
|
-
# ** <tt>:delimited_by</tt> -- The field delimiter
|
20
|
-
# ** <tt>:enclosed_by</tt> -- The field enclosure
|
21
|
-
def bulk_load(file, table_name, options={})
|
22
|
-
q = "LOAD DATA LOCAL INFILE '#{file}' INTO TABLE #{table_name}"
|
23
|
-
if options[:fields]
|
24
|
-
q << " FIELDS"
|
25
|
-
q << " TERMINATED BY '#{options[:fields][:delimited_by]}'" if options[:fields][:delimited_by]
|
26
|
-
q << " ENCLOSED BY '#{options[:fields][:enclosed_by]}'" if options[:fields][:enclosed_by]
|
27
|
-
end
|
28
|
-
q << " IGNORE #{options[:ignore]} LINES" if options[:ignore]
|
29
|
-
q << " (#{options[:columns].join(',')})" if options[:columns]
|
30
|
-
execute(q)
|
31
|
-
end
|
32
|
-
end
|
33
|
-
end
|
34
|
-
end
|