activewarehouse-etl 0.5.1 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +5 -1
- data/lib/etl/control/control.rb +21 -0
- data/lib/etl/engine.rb +19 -4
- data/lib/etl/transform/foreign_key_lookup_transform.rb +15 -4
- data/lib/etl/transform/transform.rb +7 -11
- data/lib/etl/version.rb +1 -1
- metadata +2 -2
data/CHANGELOG
CHANGED
data/lib/etl/control/control.rb
CHANGED
@@ -9,14 +9,20 @@ module ETL #:nodoc:
|
|
9
9
|
end
|
10
10
|
end
|
11
11
|
|
12
|
+
# Initialize the context
|
12
13
|
def initialize(control)
|
13
14
|
@control = control
|
14
15
|
end
|
15
16
|
|
17
|
+
# Get the control file
|
16
18
|
def file
|
17
19
|
@control.file
|
18
20
|
end
|
19
21
|
|
22
|
+
def set_error_threshold(error_threshold)
|
23
|
+
@control.error_threshold = error_threshold
|
24
|
+
end
|
25
|
+
|
20
26
|
# Define a source.
|
21
27
|
def source(name, configuration={}, definition={})
|
22
28
|
source_types = [:file, :db]
|
@@ -48,6 +54,7 @@ module ETL #:nodoc:
|
|
48
54
|
@control.destinations
|
49
55
|
end
|
50
56
|
|
57
|
+
# Define a transform
|
51
58
|
def transform(name, transformer=nil, configuration={}, &block)
|
52
59
|
transforms[name] ||= []
|
53
60
|
if transformer
|
@@ -65,10 +72,12 @@ module ETL #:nodoc:
|
|
65
72
|
end
|
66
73
|
end
|
67
74
|
|
75
|
+
# Get the defined transforms
|
68
76
|
def transforms
|
69
77
|
@control.transforms
|
70
78
|
end
|
71
79
|
|
80
|
+
# Define a pre-processor
|
72
81
|
def pre_process(name, configuration={})
|
73
82
|
class_name = "#{name.to_s.classify}Processor"
|
74
83
|
begin
|
@@ -79,10 +88,12 @@ module ETL #:nodoc:
|
|
79
88
|
end
|
80
89
|
end
|
81
90
|
|
91
|
+
# Get the defined pre-processors
|
82
92
|
def pre_processors
|
83
93
|
@control.pre_processors
|
84
94
|
end
|
85
95
|
|
96
|
+
# Define a post-processor
|
86
97
|
def post_process(name, configuration={})
|
87
98
|
class_name = "#{name.to_s.classify}Processor"
|
88
99
|
begin
|
@@ -93,10 +104,12 @@ module ETL #:nodoc:
|
|
93
104
|
end
|
94
105
|
end
|
95
106
|
|
107
|
+
# Get the defined post-processors
|
96
108
|
def post_processors
|
97
109
|
@control.post_processors
|
98
110
|
end
|
99
111
|
|
112
|
+
# Get the binding object
|
100
113
|
def get_binding
|
101
114
|
binding
|
102
115
|
end
|
@@ -119,6 +132,9 @@ module ETL #:nodoc:
|
|
119
132
|
# The File object
|
120
133
|
attr_reader :file
|
121
134
|
|
135
|
+
# The error threshold
|
136
|
+
attr_accessor :error_threshold
|
137
|
+
|
122
138
|
class << self
|
123
139
|
# Parse a control file and return a Control instance
|
124
140
|
def parse(control_file)
|
@@ -187,6 +203,11 @@ module ETL #:nodoc:
|
|
187
203
|
@transforms ||= {}
|
188
204
|
end
|
189
205
|
|
206
|
+
# Get the error threshold. Defaults to 100.
|
207
|
+
def error_threshold
|
208
|
+
@error_threshold ||= 100
|
209
|
+
end
|
210
|
+
|
190
211
|
# Validate the control file
|
191
212
|
def validate
|
192
213
|
unless sources.length > 0
|
data/lib/etl/engine.rb
CHANGED
@@ -51,6 +51,10 @@ module ETL #:nodoc:
|
|
51
51
|
def say_on_own_line(message)
|
52
52
|
say("\n" + message)
|
53
53
|
end
|
54
|
+
|
55
|
+
def errors
|
56
|
+
@errors ||= []
|
57
|
+
end
|
54
58
|
|
55
59
|
# Process a control file or object. Acceptable values for control are:
|
56
60
|
# * Path to a file
|
@@ -73,7 +77,7 @@ module ETL #:nodoc:
|
|
73
77
|
say "Source: #{source}"
|
74
78
|
source.each_with_index do |row, index|
|
75
79
|
Engine.current_source_row = index + 1
|
76
|
-
if Engine.realtime_activity && index % 1000 == 0
|
80
|
+
if Engine.realtime_activity && index > 0 && index % 1000 == 0
|
77
81
|
say_without_newline "."
|
78
82
|
end
|
79
83
|
|
@@ -84,8 +88,9 @@ module ETL #:nodoc:
|
|
84
88
|
end
|
85
89
|
rescue => e
|
86
90
|
msg = "Error transforming from #{source} on line #{index}: #{e}"
|
87
|
-
|
91
|
+
errors << msg
|
88
92
|
Engine.logger.error msg
|
93
|
+
break if exceeded_error_threshold?(control)
|
89
94
|
end
|
90
95
|
|
91
96
|
begin
|
@@ -96,11 +101,17 @@ module ETL #:nodoc:
|
|
96
101
|
end
|
97
102
|
rescue
|
98
103
|
msg = "Error writing to #{destination} on line #{index}"
|
99
|
-
|
104
|
+
errors << msg
|
100
105
|
Engine.logger.error msg
|
106
|
+
break if exceeded_error_threshold?(control)
|
101
107
|
end
|
102
108
|
end
|
103
|
-
|
109
|
+
if exceeded_error_threshold?(control)
|
110
|
+
say_on_own_line "Exiting due to exceeding error threshold: #{control.error_threshold}"
|
111
|
+
else
|
112
|
+
say_on_own_line "Processed #{Engine.current_source_row} rows in #{distance_of_time_in_words(start_time)} with #{errors} errors."
|
113
|
+
end
|
114
|
+
|
104
115
|
destinations.each do |destination|
|
105
116
|
destination.close
|
106
117
|
end
|
@@ -112,6 +123,10 @@ module ETL #:nodoc:
|
|
112
123
|
end
|
113
124
|
|
114
125
|
private
|
126
|
+
def exceeded_error_threshold?(control)
|
127
|
+
errors.length > control.error_threshold
|
128
|
+
end
|
129
|
+
|
115
130
|
# Execute all preprocessors
|
116
131
|
def pre_process(control)
|
117
132
|
control.pre_processors.each do |processor|
|
@@ -2,6 +2,9 @@ module ETL #:nodoc:
|
|
2
2
|
module Transform #:nodoc:
|
3
3
|
# Transform which looks up the value and replaces it with a foriegn key reference
|
4
4
|
class ForeignKeyLookupTransform < ETL::Transform::Transform
|
5
|
+
# The resolver to use if the foreign key is not found in the collection
|
6
|
+
attr_accessor :resolver
|
7
|
+
|
5
8
|
# Initialize the foreign key lookup transform.
|
6
9
|
#
|
7
10
|
# Configuration options:
|
@@ -21,20 +24,28 @@ module ETL #:nodoc:
|
|
21
24
|
def transform(value)
|
22
25
|
fk = @collection[value]
|
23
26
|
unless fk
|
24
|
-
raise ResolverError, "Foreign key for #{value} not found and no resolver specified" unless
|
25
|
-
raise ResolverError, "Resolver does not appear to respond to resolve method" unless
|
26
|
-
fk =
|
27
|
+
raise ResolverError, "Foreign key for #{value} not found and no resolver specified" unless resolver
|
28
|
+
raise ResolverError, "Resolver does not appear to respond to resolve method" unless resolver.respond_to?(:resolve)
|
29
|
+
fk = resolver.resolve(value)
|
27
30
|
raise ResolverError, "Unable to resolve #{value} to foreign key" unless fk
|
28
31
|
@collection[value] = fk
|
29
32
|
end
|
30
33
|
fk
|
31
34
|
end
|
32
35
|
end
|
36
|
+
# Alias class name for the ForeignKeyLookupTransform.
|
37
|
+
class FkLookupTransform < ForeignKeyLookupTransform; end
|
33
38
|
end
|
34
39
|
end
|
35
40
|
|
36
41
|
# Resolver which resolves using ActiveRecord.
|
37
42
|
class ActiveRecordResolver
|
43
|
+
# The ActiveRecord class to use
|
44
|
+
attr_accessor :ar_class
|
45
|
+
|
46
|
+
# The find method to use (as a symbol)
|
47
|
+
attr_accessor :find_method
|
48
|
+
|
38
49
|
# Initialize the resolver. The ar_class argument should extend from ActiveRecord::Base. The find_method argument
|
39
50
|
# must be a symbol for the finder method used. For example:
|
40
51
|
#
|
@@ -47,7 +58,7 @@ class ActiveRecordResolver
|
|
47
58
|
end
|
48
59
|
# Resolve the value
|
49
60
|
def resolve(value)
|
50
|
-
rec =
|
61
|
+
rec = ar_class.__send__(find_method, value)
|
51
62
|
rec.nil? ? nil : rec.id
|
52
63
|
end
|
53
64
|
end
|
@@ -23,17 +23,13 @@ module ETL#:nodoc:
|
|
23
23
|
def transform(name, value, transforms)
|
24
24
|
# logger.debug "Transforming field #{name}" if transforms.length > 0
|
25
25
|
transforms.each do |transform|
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
raise ControlError, "Unsupported transform configuration type: #{transform}"
|
34
|
-
end
|
35
|
-
rescue
|
36
|
-
raise TransformError, "Error transforming #{value} with #{transform}"
|
26
|
+
case transform
|
27
|
+
when Proc
|
28
|
+
value = transform.call(value)
|
29
|
+
when Transform
|
30
|
+
value = transform.transform(value)
|
31
|
+
else
|
32
|
+
raise ControlError, "Unsupported transform configuration type: #{transform}"
|
37
33
|
end
|
38
34
|
end
|
39
35
|
value
|
data/lib/etl/version.rb
CHANGED
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.0.10
|
|
3
3
|
specification_version: 1
|
4
4
|
name: activewarehouse-etl
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.5.
|
7
|
-
date: 2007-02-
|
6
|
+
version: 0.5.2
|
7
|
+
date: 2007-02-19 00:00:00 -05:00
|
8
8
|
summary: Pure Ruby ETL package.
|
9
9
|
require_paths:
|
10
10
|
- lib
|