activewarehouse-etl 0.5.1 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGELOG CHANGED
@@ -31,4 +31,8 @@
31
31
 
32
32
  0.5.1 - Feb 18, 2007
33
33
  * Fixed up truncate processor
34
- * Updated HOW_TO_RELEASE doc
34
+ * Updated HOW_TO_RELEASE doc
35
+
36
+ 0.5.2 - Feb 19, 2007
37
+ * Added error threshold
38
+ * Fixed problem with transform error handling
@@ -9,14 +9,20 @@ module ETL #:nodoc:
9
9
  end
10
10
  end
11
11
 
12
+ # Initialize the context
12
13
  def initialize(control)
13
14
  @control = control
14
15
  end
15
16
 
17
+ # Get the control file
16
18
  def file
17
19
  @control.file
18
20
  end
19
21
 
22
+ def set_error_threshold(error_threshold)
23
+ @control.error_threshold = error_threshold
24
+ end
25
+
20
26
  # Define a source.
21
27
  def source(name, configuration={}, definition={})
22
28
  source_types = [:file, :db]
@@ -48,6 +54,7 @@ module ETL #:nodoc:
48
54
  @control.destinations
49
55
  end
50
56
 
57
+ # Define a transform
51
58
  def transform(name, transformer=nil, configuration={}, &block)
52
59
  transforms[name] ||= []
53
60
  if transformer
@@ -65,10 +72,12 @@ module ETL #:nodoc:
65
72
  end
66
73
  end
67
74
 
75
+ # Get the defined transforms
68
76
  def transforms
69
77
  @control.transforms
70
78
  end
71
79
 
80
+ # Define a pre-processor
72
81
  def pre_process(name, configuration={})
73
82
  class_name = "#{name.to_s.classify}Processor"
74
83
  begin
@@ -79,10 +88,12 @@ module ETL #:nodoc:
79
88
  end
80
89
  end
81
90
 
91
+ # Get the defined pre-processors
82
92
  def pre_processors
83
93
  @control.pre_processors
84
94
  end
85
95
 
96
+ # Define a post-processor
86
97
  def post_process(name, configuration={})
87
98
  class_name = "#{name.to_s.classify}Processor"
88
99
  begin
@@ -93,10 +104,12 @@ module ETL #:nodoc:
93
104
  end
94
105
  end
95
106
 
107
+ # Get the defined post-processors
96
108
  def post_processors
97
109
  @control.post_processors
98
110
  end
99
111
 
112
+ # Get the binding object
100
113
  def get_binding
101
114
  binding
102
115
  end
@@ -119,6 +132,9 @@ module ETL #:nodoc:
119
132
  # The File object
120
133
  attr_reader :file
121
134
 
135
+ # The error threshold
136
+ attr_accessor :error_threshold
137
+
122
138
  class << self
123
139
  # Parse a control file and return a Control instance
124
140
  def parse(control_file)
@@ -187,6 +203,11 @@ module ETL #:nodoc:
187
203
  @transforms ||= {}
188
204
  end
189
205
 
206
+ # Get the error threshold. Defaults to 100.
207
+ def error_threshold
208
+ @error_threshold ||= 100
209
+ end
210
+
190
211
  # Validate the control file
191
212
  def validate
192
213
  unless sources.length > 0
@@ -51,6 +51,10 @@ module ETL #:nodoc:
51
51
  def say_on_own_line(message)
52
52
  say("\n" + message)
53
53
  end
54
+
55
+ def errors
56
+ @errors ||= []
57
+ end
54
58
 
55
59
  # Process a control file or object. Acceptable values for control are:
56
60
  # * Path to a file
@@ -73,7 +77,7 @@ module ETL #:nodoc:
73
77
  say "Source: #{source}"
74
78
  source.each_with_index do |row, index|
75
79
  Engine.current_source_row = index + 1
76
- if Engine.realtime_activity && index % 1000 == 0
80
+ if Engine.realtime_activity && index > 0 && index % 1000 == 0
77
81
  say_without_newline "."
78
82
  end
79
83
 
@@ -84,8 +88,9 @@ module ETL #:nodoc:
84
88
  end
85
89
  rescue => e
86
90
  msg = "Error transforming from #{source} on line #{index}: #{e}"
87
- source.errors << msg
91
+ errors << msg
88
92
  Engine.logger.error msg
93
+ break if exceeded_error_threshold?(control)
89
94
  end
90
95
 
91
96
  begin
@@ -96,11 +101,17 @@ module ETL #:nodoc:
96
101
  end
97
102
  rescue
98
103
  msg = "Error writing to #{destination} on line #{index}"
99
- destination.errors << msg
104
+ errors << msg
100
105
  Engine.logger.error msg
106
+ break if exceeded_error_threshold?(control)
101
107
  end
102
108
  end
103
- say_on_own_line "Processed #{Engine.current_source_row} rows in #{distance_of_time_in_words(start_time)}"
109
+ if exceeded_error_threshold?(control)
110
+ say_on_own_line "Exiting due to exceeding error threshold: #{control.error_threshold}"
111
+ else
112
+ say_on_own_line "Processed #{Engine.current_source_row} rows in #{distance_of_time_in_words(start_time)} with #{errors} errors."
113
+ end
114
+
104
115
  destinations.each do |destination|
105
116
  destination.close
106
117
  end
@@ -112,6 +123,10 @@ module ETL #:nodoc:
112
123
  end
113
124
 
114
125
  private
126
+ def exceeded_error_threshold?(control)
127
+ errors.length > control.error_threshold
128
+ end
129
+
115
130
  # Execute all preprocessors
116
131
  def pre_process(control)
117
132
  control.pre_processors.each do |processor|
@@ -2,6 +2,9 @@ module ETL #:nodoc:
2
2
  module Transform #:nodoc:
3
3
  # Transform which looks up the value and replaces it with a foriegn key reference
4
4
  class ForeignKeyLookupTransform < ETL::Transform::Transform
5
+ # The resolver to use if the foreign key is not found in the collection
6
+ attr_accessor :resolver
7
+
5
8
  # Initialize the foreign key lookup transform.
6
9
  #
7
10
  # Configuration options:
@@ -21,20 +24,28 @@ module ETL #:nodoc:
21
24
  def transform(value)
22
25
  fk = @collection[value]
23
26
  unless fk
24
- raise ResolverError, "Foreign key for #{value} not found and no resolver specified" unless @resolver
25
- raise ResolverError, "Resolver does not appear to respond to resolve method" unless @resolver.respond_to?(:resolve)
26
- fk = @resolver.resolve(value)
27
+ raise ResolverError, "Foreign key for #{value} not found and no resolver specified" unless resolver
28
+ raise ResolverError, "Resolver does not appear to respond to resolve method" unless resolver.respond_to?(:resolve)
29
+ fk = resolver.resolve(value)
27
30
  raise ResolverError, "Unable to resolve #{value} to foreign key" unless fk
28
31
  @collection[value] = fk
29
32
  end
30
33
  fk
31
34
  end
32
35
  end
36
+ # Alias class name for the ForeignKeyLookupTransform.
37
+ class FkLookupTransform < ForeignKeyLookupTransform; end
33
38
  end
34
39
  end
35
40
 
36
41
  # Resolver which resolves using ActiveRecord.
37
42
  class ActiveRecordResolver
43
+ # The ActiveRecord class to use
44
+ attr_accessor :ar_class
45
+
46
+ # The find method to use (as a symbol)
47
+ attr_accessor :find_method
48
+
38
49
  # Initialize the resolver. The ar_class argument should extend from ActiveRecord::Base. The find_method argument
39
50
  # must be a symbol for the finder method used. For example:
40
51
  #
@@ -47,7 +58,7 @@ class ActiveRecordResolver
47
58
  end
48
59
  # Resolve the value
49
60
  def resolve(value)
50
- rec = @ar_class.__send__(@find_method, value)
61
+ rec = ar_class.__send__(find_method, value)
51
62
  rec.nil? ? nil : rec.id
52
63
  end
53
64
  end
@@ -23,17 +23,13 @@ module ETL#:nodoc:
23
23
  def transform(name, value, transforms)
24
24
  # logger.debug "Transforming field #{name}" if transforms.length > 0
25
25
  transforms.each do |transform|
26
- begin
27
- case transform
28
- when Proc
29
- value = transform.call(value)
30
- when Transform
31
- value = transform.transform(value)
32
- else
33
- raise ControlError, "Unsupported transform configuration type: #{transform}"
34
- end
35
- rescue
36
- raise TransformError, "Error transforming #{value} with #{transform}"
26
+ case transform
27
+ when Proc
28
+ value = transform.call(value)
29
+ when Transform
30
+ value = transform.transform(value)
31
+ else
32
+ raise ControlError, "Unsupported transform configuration type: #{transform}"
37
33
  end
38
34
  end
39
35
  value
@@ -2,7 +2,7 @@ module ETL#:nodoc:
2
2
  module VERSION #:nodoc:
3
3
  MAJOR = 0
4
4
  MINOR = 5
5
- TINY = 1
5
+ TINY = 2
6
6
 
7
7
  STRING = [MAJOR, MINOR, TINY].join('.')
8
8
  end
metadata CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.0.10
3
3
  specification_version: 1
4
4
  name: activewarehouse-etl
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.5.1
7
- date: 2007-02-18 00:00:00 -05:00
6
+ version: 0.5.2
7
+ date: 2007-02-19 00:00:00 -05:00
8
8
  summary: Pure Ruby ETL package.
9
9
  require_paths:
10
10
  - lib