activewarehouse-etl 0.5.2 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +41 -13
- data/README +1 -1
- data/Rakefile +14 -4
- data/TODO +17 -1
- data/bin/etl +3 -1
- data/lib/etl.rb +11 -7
- data/lib/etl/commands/etl.rb +0 -1
- data/lib/etl/control/control.rb +113 -36
- data/lib/etl/control/destination.rb +13 -1
- data/lib/etl/control/destination/database_destination.rb +3 -1
- data/lib/etl/control/destination/file_destination.rb +5 -2
- data/lib/etl/control/source.rb +36 -0
- data/lib/etl/control/source/database_source.rb +63 -8
- data/lib/etl/control/source/file_source.rb +25 -4
- data/lib/etl/engine.rb +128 -14
- data/lib/etl/generator/surrogate_key_generator.rb +1 -0
- data/lib/etl/http_tools.rb +119 -0
- data/lib/etl/parser/apache_combined_log_parser.rb +47 -0
- data/lib/etl/parser/sax_parser.rb +18 -6
- data/lib/etl/processor.rb +1 -0
- data/lib/etl/processor/bulk_import_processor.rb +12 -0
- data/lib/etl/processor/hierarchy_exploder_processor.rb +54 -0
- data/lib/etl/processor/processor.rb +1 -5
- data/lib/etl/processor/row_processor.rb +17 -0
- data/lib/etl/transform/date_to_string_transform.rb +1 -1
- data/lib/etl/transform/decode_transform.rb +1 -1
- data/lib/etl/transform/default_transform.rb +15 -0
- data/lib/etl/transform/foreign_key_lookup_transform.rb +1 -1
- data/lib/etl/transform/hierarchy_lookup_transform.rb +56 -0
- data/lib/etl/transform/sha1_transform.rb +1 -1
- data/lib/etl/transform/string_to_date_transform.rb +3 -3
- data/lib/etl/transform/string_to_datetime_transform.rb +17 -0
- data/lib/etl/transform/string_to_time_transform.rb +14 -0
- data/lib/etl/transform/transform.rb +8 -4
- data/lib/etl/transform/type_transform.rb +2 -2
- data/lib/etl/version.rb +2 -2
- metadata +21 -8
- data/lib/etl/active_record_ext.rb +0 -1
- data/lib/etl/active_record_ext/connection_adapters/mysql_adapter.rb +0 -34
@@ -5,9 +5,9 @@ module ETL #:nodoc:
|
|
5
5
|
def initialize(control, configuration={})
|
6
6
|
super
|
7
7
|
end
|
8
|
-
# Transform the value using
|
9
|
-
def transform(value)
|
10
|
-
|
8
|
+
# Transform the value using Date.parse
|
9
|
+
def transform(name, value, row)
|
10
|
+
Date.parse(value)
|
11
11
|
end
|
12
12
|
end
|
13
13
|
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module ETL #:nodoc:
|
2
|
+
module Transform #:nodoc:
|
3
|
+
# Transform a String representation of a date to a DateTime instance
|
4
|
+
class StringToDateTimeTransform < ETL::Transform::Transform
|
5
|
+
def initialize(control, configuration={})
|
6
|
+
super
|
7
|
+
end
|
8
|
+
# Transform the value using DateTime.parse.
|
9
|
+
#
|
10
|
+
# WARNING: This transform is slow (due to the Ruby implementation), but if you need to
|
11
|
+
# parse timestamps before or after the values supported by the Time.parse.
|
12
|
+
def transform(name, value, row)
|
13
|
+
DateTime.parse(value)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
module ETL #:nodoc:
|
2
|
+
module Transform #:nodoc:
|
3
|
+
# Transform a String representation of a date to a Time instance
|
4
|
+
class StringToTimeTransform < ETL::Transform::Transform
|
5
|
+
def initialize(control, configuration={})
|
6
|
+
super
|
7
|
+
end
|
8
|
+
# Transform the value using Time.parse
|
9
|
+
def transform(name, value, row)
|
10
|
+
Time.parse(value)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
@@ -20,14 +20,14 @@ module ETL#:nodoc:
|
|
20
20
|
# Transform the specified value using the given transforms. The transforms can either be
|
21
21
|
# Proc objects or objects which extend from Transform and implement the method <tt>transform(value)</tt>.
|
22
22
|
# Any other object will result in a ControlError being raised.
|
23
|
-
def transform(name, value, transforms)
|
24
|
-
# logger.debug "Transforming field #{name}" if transforms.length > 0
|
23
|
+
def transform(name, value, row, transforms)
|
25
24
|
transforms.each do |transform|
|
25
|
+
Engine.logger.debug "Transforming field #{name} with #{transform.inspect}"
|
26
26
|
case transform
|
27
27
|
when Proc
|
28
|
-
value = transform.call(value)
|
28
|
+
value = transform.call([name, value, row])
|
29
29
|
when Transform
|
30
|
-
value = transform.transform(value)
|
30
|
+
value = transform.transform(name, value, row)
|
31
31
|
else
|
32
32
|
raise ControlError, "Unsupported transform configuration type: #{transform}"
|
33
33
|
end
|
@@ -43,6 +43,10 @@ module ETL#:nodoc:
|
|
43
43
|
@control = control
|
44
44
|
@configuration = configuration
|
45
45
|
end
|
46
|
+
|
47
|
+
def transform(name, value, row)
|
48
|
+
raise "transform is an abstract method"
|
49
|
+
end
|
46
50
|
end
|
47
51
|
end
|
48
52
|
end
|
data/lib/etl/version.rb
CHANGED
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.0.10
|
|
3
3
|
specification_version: 1
|
4
4
|
name: activewarehouse-etl
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.
|
7
|
-
date: 2007-
|
6
|
+
version: 0.6.0
|
7
|
+
date: 2007-03-08 00:00:00 -05:00
|
8
8
|
summary: Pure Ruby ETL package.
|
9
9
|
require_paths:
|
10
10
|
- lib
|
@@ -36,14 +36,13 @@ files:
|
|
36
36
|
- bin/etl
|
37
37
|
- lib/etl
|
38
38
|
- lib/etl.rb
|
39
|
-
- lib/etl/active_record_ext
|
40
|
-
- lib/etl/active_record_ext.rb
|
41
39
|
- lib/etl/commands
|
42
40
|
- lib/etl/control
|
43
41
|
- lib/etl/control.rb
|
44
42
|
- lib/etl/engine.rb
|
45
43
|
- lib/etl/generator
|
46
44
|
- lib/etl/generator.rb
|
45
|
+
- lib/etl/http_tools.rb
|
47
46
|
- lib/etl/parser
|
48
47
|
- lib/etl/parser.rb
|
49
48
|
- lib/etl/processor
|
@@ -51,8 +50,6 @@ files:
|
|
51
50
|
- lib/etl/transform
|
52
51
|
- lib/etl/transform.rb
|
53
52
|
- lib/etl/version.rb
|
54
|
-
- lib/etl/active_record_ext/connection_adapters
|
55
|
-
- lib/etl/active_record_ext/connection_adapters/mysql_adapter.rb
|
56
53
|
- lib/etl/commands/etl.rb
|
57
54
|
- lib/etl/control/control.rb
|
58
55
|
- lib/etl/control/destination
|
@@ -65,19 +62,26 @@ files:
|
|
65
62
|
- lib/etl/control/source/file_source.rb
|
66
63
|
- lib/etl/generator/generator.rb
|
67
64
|
- lib/etl/generator/surrogate_key_generator.rb
|
65
|
+
- lib/etl/parser/apache_combined_log_parser.rb
|
68
66
|
- lib/etl/parser/delimited_parser.rb
|
69
67
|
- lib/etl/parser/fixed_width_parser.rb
|
70
68
|
- lib/etl/parser/parser.rb
|
71
69
|
- lib/etl/parser/sax_parser.rb
|
72
70
|
- lib/etl/parser/xml_parser.rb
|
73
71
|
- lib/etl/processor/bulk_import_processor.rb
|
72
|
+
- lib/etl/processor/hierarchy_exploder_processor.rb
|
74
73
|
- lib/etl/processor/processor.rb
|
74
|
+
- lib/etl/processor/row_processor.rb
|
75
75
|
- lib/etl/processor/truncate_processor.rb
|
76
76
|
- lib/etl/transform/date_to_string_transform.rb
|
77
77
|
- lib/etl/transform/decode_transform.rb
|
78
|
+
- lib/etl/transform/default_transform.rb
|
78
79
|
- lib/etl/transform/foreign_key_lookup_transform.rb
|
80
|
+
- lib/etl/transform/hierarchy_lookup_transform.rb
|
79
81
|
- lib/etl/transform/sha1_transform.rb
|
80
82
|
- lib/etl/transform/string_to_date_transform.rb
|
83
|
+
- lib/etl/transform/string_to_datetime_transform.rb
|
84
|
+
- lib/etl/transform/string_to_time_transform.rb
|
81
85
|
- lib/etl/transform/transform.rb
|
82
86
|
- lib/etl/transform/type_transform.rb
|
83
87
|
test_files: []
|
@@ -110,7 +114,7 @@ dependencies:
|
|
110
114
|
requirements:
|
111
115
|
- - ">="
|
112
116
|
- !ruby/object:Gem::Version
|
113
|
-
version: 1.3.1
|
117
|
+
version: 1.3.1
|
114
118
|
version:
|
115
119
|
- !ruby/object:Gem::Dependency
|
116
120
|
name: activerecord
|
@@ -119,7 +123,7 @@ dependencies:
|
|
119
123
|
requirements:
|
120
124
|
- - ">="
|
121
125
|
- !ruby/object:Gem::Version
|
122
|
-
version: 1.14.4
|
126
|
+
version: 1.14.4
|
123
127
|
version:
|
124
128
|
- !ruby/object:Gem::Dependency
|
125
129
|
name: fastercsv
|
@@ -130,3 +134,12 @@ dependencies:
|
|
130
134
|
- !ruby/object:Gem::Version
|
131
135
|
version: 1.0.0
|
132
136
|
version:
|
137
|
+
- !ruby/object:Gem::Dependency
|
138
|
+
name: adapter_extensions
|
139
|
+
version_requirement:
|
140
|
+
version_requirements: !ruby/object:Gem::Version::Requirement
|
141
|
+
requirements:
|
142
|
+
- - ">="
|
143
|
+
- !ruby/object:Gem::Version
|
144
|
+
version: 0.1.0
|
145
|
+
version:
|
@@ -1 +0,0 @@
|
|
1
|
-
require 'etl/active_record_ext/connection_adapters/mysql_adapter'
|
@@ -1,34 +0,0 @@
|
|
1
|
-
require 'active_record/connection_adapters/abstract_adapter'
|
2
|
-
|
3
|
-
module ActiveRecord #:nodoc:
|
4
|
-
module ConnectionAdapters #:nodoc:
|
5
|
-
class MysqlAdapter < AbstractAdapter
|
6
|
-
# Execute a truncate statement on the table. Note that in MySQL a truncate will *NOT* reset
|
7
|
-
# the auto_increment
|
8
|
-
def truncate(table_name)
|
9
|
-
execute("TRUNCATE #{table_name}")
|
10
|
-
end
|
11
|
-
|
12
|
-
# Bulk load the data in the specified file. This implementation always uses the LOCAL keyword
|
13
|
-
# so the file must be found locally, not on the remote server, to be loaded.
|
14
|
-
#
|
15
|
-
# Options:
|
16
|
-
# * <tt>:ignore</tt> -- Ignore the specified number of lines from the source file
|
17
|
-
# * <tt>:columns</tt> -- Array of column names defining the source file column order
|
18
|
-
# * <tt>:fields</tt> -- Hash of options for fields:
|
19
|
-
# ** <tt>:delimited_by</tt> -- The field delimiter
|
20
|
-
# ** <tt>:enclosed_by</tt> -- The field enclosure
|
21
|
-
def bulk_load(file, table_name, options={})
|
22
|
-
q = "LOAD DATA LOCAL INFILE '#{file}' INTO TABLE #{table_name}"
|
23
|
-
if options[:fields]
|
24
|
-
q << " FIELDS"
|
25
|
-
q << " TERMINATED BY '#{options[:fields][:delimited_by]}'" if options[:fields][:delimited_by]
|
26
|
-
q << " ENCLOSED BY '#{options[:fields][:enclosed_by]}'" if options[:fields][:enclosed_by]
|
27
|
-
end
|
28
|
-
q << " IGNORE #{options[:ignore]} LINES" if options[:ignore]
|
29
|
-
q << " (#{options[:columns].join(',')})" if options[:columns]
|
30
|
-
execute(q)
|
31
|
-
end
|
32
|
-
end
|
33
|
-
end
|
34
|
-
end
|