activewarehouse-etl 0.5.1 → 0.5.2
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +5 -1
- data/lib/etl/control/control.rb +21 -0
- data/lib/etl/engine.rb +19 -4
- data/lib/etl/transform/foreign_key_lookup_transform.rb +15 -4
- data/lib/etl/transform/transform.rb +7 -11
- data/lib/etl/version.rb +1 -1
- metadata +2 -2
data/CHANGELOG
CHANGED
data/lib/etl/control/control.rb
CHANGED
@@ -9,14 +9,20 @@ module ETL #:nodoc:
|
|
9
9
|
end
|
10
10
|
end
|
11
11
|
|
12
|
+
# Initialize the context
|
12
13
|
def initialize(control)
|
13
14
|
@control = control
|
14
15
|
end
|
15
16
|
|
17
|
+
# Get the control file
|
16
18
|
def file
|
17
19
|
@control.file
|
18
20
|
end
|
19
21
|
|
22
|
+
def set_error_threshold(error_threshold)
|
23
|
+
@control.error_threshold = error_threshold
|
24
|
+
end
|
25
|
+
|
20
26
|
# Define a source.
|
21
27
|
def source(name, configuration={}, definition={})
|
22
28
|
source_types = [:file, :db]
|
@@ -48,6 +54,7 @@ module ETL #:nodoc:
|
|
48
54
|
@control.destinations
|
49
55
|
end
|
50
56
|
|
57
|
+
# Define a transform
|
51
58
|
def transform(name, transformer=nil, configuration={}, &block)
|
52
59
|
transforms[name] ||= []
|
53
60
|
if transformer
|
@@ -65,10 +72,12 @@ module ETL #:nodoc:
|
|
65
72
|
end
|
66
73
|
end
|
67
74
|
|
75
|
+
# Get the defined transforms
|
68
76
|
def transforms
|
69
77
|
@control.transforms
|
70
78
|
end
|
71
79
|
|
80
|
+
# Define a pre-processor
|
72
81
|
def pre_process(name, configuration={})
|
73
82
|
class_name = "#{name.to_s.classify}Processor"
|
74
83
|
begin
|
@@ -79,10 +88,12 @@ module ETL #:nodoc:
|
|
79
88
|
end
|
80
89
|
end
|
81
90
|
|
91
|
+
# Get the defined pre-processors
|
82
92
|
def pre_processors
|
83
93
|
@control.pre_processors
|
84
94
|
end
|
85
95
|
|
96
|
+
# Define a post-processor
|
86
97
|
def post_process(name, configuration={})
|
87
98
|
class_name = "#{name.to_s.classify}Processor"
|
88
99
|
begin
|
@@ -93,10 +104,12 @@ module ETL #:nodoc:
|
|
93
104
|
end
|
94
105
|
end
|
95
106
|
|
107
|
+
# Get the defined post-processors
|
96
108
|
def post_processors
|
97
109
|
@control.post_processors
|
98
110
|
end
|
99
111
|
|
112
|
+
# Get the binding object
|
100
113
|
def get_binding
|
101
114
|
binding
|
102
115
|
end
|
@@ -119,6 +132,9 @@ module ETL #:nodoc:
|
|
119
132
|
# The File object
|
120
133
|
attr_reader :file
|
121
134
|
|
135
|
+
# The error threshold
|
136
|
+
attr_accessor :error_threshold
|
137
|
+
|
122
138
|
class << self
|
123
139
|
# Parse a control file and return a Control instance
|
124
140
|
def parse(control_file)
|
@@ -187,6 +203,11 @@ module ETL #:nodoc:
|
|
187
203
|
@transforms ||= {}
|
188
204
|
end
|
189
205
|
|
206
|
+
# Get the error threshold. Defaults to 100.
|
207
|
+
def error_threshold
|
208
|
+
@error_threshold ||= 100
|
209
|
+
end
|
210
|
+
|
190
211
|
# Validate the control file
|
191
212
|
def validate
|
192
213
|
unless sources.length > 0
|
data/lib/etl/engine.rb
CHANGED
@@ -51,6 +51,10 @@ module ETL #:nodoc:
|
|
51
51
|
def say_on_own_line(message)
|
52
52
|
say("\n" + message)
|
53
53
|
end
|
54
|
+
|
55
|
+
def errors
|
56
|
+
@errors ||= []
|
57
|
+
end
|
54
58
|
|
55
59
|
# Process a control file or object. Acceptable values for control are:
|
56
60
|
# * Path to a file
|
@@ -73,7 +77,7 @@ module ETL #:nodoc:
|
|
73
77
|
say "Source: #{source}"
|
74
78
|
source.each_with_index do |row, index|
|
75
79
|
Engine.current_source_row = index + 1
|
76
|
-
if Engine.realtime_activity && index % 1000 == 0
|
80
|
+
if Engine.realtime_activity && index > 0 && index % 1000 == 0
|
77
81
|
say_without_newline "."
|
78
82
|
end
|
79
83
|
|
@@ -84,8 +88,9 @@ module ETL #:nodoc:
|
|
84
88
|
end
|
85
89
|
rescue => e
|
86
90
|
msg = "Error transforming from #{source} on line #{index}: #{e}"
|
87
|
-
|
91
|
+
errors << msg
|
88
92
|
Engine.logger.error msg
|
93
|
+
break if exceeded_error_threshold?(control)
|
89
94
|
end
|
90
95
|
|
91
96
|
begin
|
@@ -96,11 +101,17 @@ module ETL #:nodoc:
|
|
96
101
|
end
|
97
102
|
rescue
|
98
103
|
msg = "Error writing to #{destination} on line #{index}"
|
99
|
-
|
104
|
+
errors << msg
|
100
105
|
Engine.logger.error msg
|
106
|
+
break if exceeded_error_threshold?(control)
|
101
107
|
end
|
102
108
|
end
|
103
|
-
|
109
|
+
if exceeded_error_threshold?(control)
|
110
|
+
say_on_own_line "Exiting due to exceeding error threshold: #{control.error_threshold}"
|
111
|
+
else
|
112
|
+
say_on_own_line "Processed #{Engine.current_source_row} rows in #{distance_of_time_in_words(start_time)} with #{errors} errors."
|
113
|
+
end
|
114
|
+
|
104
115
|
destinations.each do |destination|
|
105
116
|
destination.close
|
106
117
|
end
|
@@ -112,6 +123,10 @@ module ETL #:nodoc:
|
|
112
123
|
end
|
113
124
|
|
114
125
|
private
|
126
|
+
def exceeded_error_threshold?(control)
|
127
|
+
errors.length > control.error_threshold
|
128
|
+
end
|
129
|
+
|
115
130
|
# Execute all preprocessors
|
116
131
|
def pre_process(control)
|
117
132
|
control.pre_processors.each do |processor|
|
@@ -2,6 +2,9 @@ module ETL #:nodoc:
|
|
2
2
|
module Transform #:nodoc:
|
3
3
|
# Transform which looks up the value and replaces it with a foriegn key reference
|
4
4
|
class ForeignKeyLookupTransform < ETL::Transform::Transform
|
5
|
+
# The resolver to use if the foreign key is not found in the collection
|
6
|
+
attr_accessor :resolver
|
7
|
+
|
5
8
|
# Initialize the foreign key lookup transform.
|
6
9
|
#
|
7
10
|
# Configuration options:
|
@@ -21,20 +24,28 @@ module ETL #:nodoc:
|
|
21
24
|
def transform(value)
|
22
25
|
fk = @collection[value]
|
23
26
|
unless fk
|
24
|
-
raise ResolverError, "Foreign key for #{value} not found and no resolver specified" unless
|
25
|
-
raise ResolverError, "Resolver does not appear to respond to resolve method" unless
|
26
|
-
fk =
|
27
|
+
raise ResolverError, "Foreign key for #{value} not found and no resolver specified" unless resolver
|
28
|
+
raise ResolverError, "Resolver does not appear to respond to resolve method" unless resolver.respond_to?(:resolve)
|
29
|
+
fk = resolver.resolve(value)
|
27
30
|
raise ResolverError, "Unable to resolve #{value} to foreign key" unless fk
|
28
31
|
@collection[value] = fk
|
29
32
|
end
|
30
33
|
fk
|
31
34
|
end
|
32
35
|
end
|
36
|
+
# Alias class name for the ForeignKeyLookupTransform.
|
37
|
+
class FkLookupTransform < ForeignKeyLookupTransform; end
|
33
38
|
end
|
34
39
|
end
|
35
40
|
|
36
41
|
# Resolver which resolves using ActiveRecord.
|
37
42
|
class ActiveRecordResolver
|
43
|
+
# The ActiveRecord class to use
|
44
|
+
attr_accessor :ar_class
|
45
|
+
|
46
|
+
# The find method to use (as a symbol)
|
47
|
+
attr_accessor :find_method
|
48
|
+
|
38
49
|
# Initialize the resolver. The ar_class argument should extend from ActiveRecord::Base. The find_method argument
|
39
50
|
# must be a symbol for the finder method used. For example:
|
40
51
|
#
|
@@ -47,7 +58,7 @@ class ActiveRecordResolver
|
|
47
58
|
end
|
48
59
|
# Resolve the value
|
49
60
|
def resolve(value)
|
50
|
-
rec =
|
61
|
+
rec = ar_class.__send__(find_method, value)
|
51
62
|
rec.nil? ? nil : rec.id
|
52
63
|
end
|
53
64
|
end
|
@@ -23,17 +23,13 @@ module ETL#:nodoc:
|
|
23
23
|
def transform(name, value, transforms)
|
24
24
|
# logger.debug "Transforming field #{name}" if transforms.length > 0
|
25
25
|
transforms.each do |transform|
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
raise ControlError, "Unsupported transform configuration type: #{transform}"
|
34
|
-
end
|
35
|
-
rescue
|
36
|
-
raise TransformError, "Error transforming #{value} with #{transform}"
|
26
|
+
case transform
|
27
|
+
when Proc
|
28
|
+
value = transform.call(value)
|
29
|
+
when Transform
|
30
|
+
value = transform.transform(value)
|
31
|
+
else
|
32
|
+
raise ControlError, "Unsupported transform configuration type: #{transform}"
|
37
33
|
end
|
38
34
|
end
|
39
35
|
value
|
data/lib/etl/version.rb
CHANGED
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.0.10
|
|
3
3
|
specification_version: 1
|
4
4
|
name: activewarehouse-etl
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.5.
|
7
|
-
date: 2007-02-
|
6
|
+
version: 0.5.2
|
7
|
+
date: 2007-02-19 00:00:00 -05:00
|
8
8
|
summary: Pure Ruby ETL package.
|
9
9
|
require_paths:
|
10
10
|
- lib
|