activewarehouse-etl 0.5.1 → 0.5.2

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGELOG CHANGED
@@ -31,4 +31,8 @@
31
31
 
32
32
  0.5.1 - Feb 18, 2007
33
33
  * Fixed up truncate processor
34
- * Updated HOW_TO_RELEASE doc
34
+ * Updated HOW_TO_RELEASE doc
35
+
36
+ 0.5.2 - Feb 19, 2007
37
+ * Added error threshold
38
+ * Fixed problem with transform error handling
@@ -9,14 +9,20 @@ module ETL #:nodoc:
9
9
  end
10
10
  end
11
11
 
12
+ # Initialize the context
12
13
  def initialize(control)
13
14
  @control = control
14
15
  end
15
16
 
17
+ # Get the control file
16
18
  def file
17
19
  @control.file
18
20
  end
19
21
 
22
+ def set_error_threshold(error_threshold)
23
+ @control.error_threshold = error_threshold
24
+ end
25
+
20
26
  # Define a source.
21
27
  def source(name, configuration={}, definition={})
22
28
  source_types = [:file, :db]
@@ -48,6 +54,7 @@ module ETL #:nodoc:
48
54
  @control.destinations
49
55
  end
50
56
 
57
+ # Define a transform
51
58
  def transform(name, transformer=nil, configuration={}, &block)
52
59
  transforms[name] ||= []
53
60
  if transformer
@@ -65,10 +72,12 @@ module ETL #:nodoc:
65
72
  end
66
73
  end
67
74
 
75
+ # Get the defined transforms
68
76
  def transforms
69
77
  @control.transforms
70
78
  end
71
79
 
80
+ # Define a pre-processor
72
81
  def pre_process(name, configuration={})
73
82
  class_name = "#{name.to_s.classify}Processor"
74
83
  begin
@@ -79,10 +88,12 @@ module ETL #:nodoc:
79
88
  end
80
89
  end
81
90
 
91
+ # Get the defined pre-processors
82
92
  def pre_processors
83
93
  @control.pre_processors
84
94
  end
85
95
 
96
+ # Define a post-processor
86
97
  def post_process(name, configuration={})
87
98
  class_name = "#{name.to_s.classify}Processor"
88
99
  begin
@@ -93,10 +104,12 @@ module ETL #:nodoc:
93
104
  end
94
105
  end
95
106
 
107
+ # Get the defined post-processors
96
108
  def post_processors
97
109
  @control.post_processors
98
110
  end
99
111
 
112
+ # Get the binding object
100
113
  def get_binding
101
114
  binding
102
115
  end
@@ -119,6 +132,9 @@ module ETL #:nodoc:
119
132
  # The File object
120
133
  attr_reader :file
121
134
 
135
+ # The error threshold
136
+ attr_accessor :error_threshold
137
+
122
138
  class << self
123
139
  # Parse a control file and return a Control instance
124
140
  def parse(control_file)
@@ -187,6 +203,11 @@ module ETL #:nodoc:
187
203
  @transforms ||= {}
188
204
  end
189
205
 
206
+ # Get the error threshold. Defaults to 100.
207
+ def error_threshold
208
+ @error_threshold ||= 100
209
+ end
210
+
190
211
  # Validate the control file
191
212
  def validate
192
213
  unless sources.length > 0
@@ -51,6 +51,10 @@ module ETL #:nodoc:
51
51
  def say_on_own_line(message)
52
52
  say("\n" + message)
53
53
  end
54
+
55
+ def errors
56
+ @errors ||= []
57
+ end
54
58
 
55
59
  # Process a control file or object. Acceptable values for control are:
56
60
  # * Path to a file
@@ -73,7 +77,7 @@ module ETL #:nodoc:
73
77
  say "Source: #{source}"
74
78
  source.each_with_index do |row, index|
75
79
  Engine.current_source_row = index + 1
76
- if Engine.realtime_activity && index % 1000 == 0
80
+ if Engine.realtime_activity && index > 0 && index % 1000 == 0
77
81
  say_without_newline "."
78
82
  end
79
83
 
@@ -84,8 +88,9 @@ module ETL #:nodoc:
84
88
  end
85
89
  rescue => e
86
90
  msg = "Error transforming from #{source} on line #{index}: #{e}"
87
- source.errors << msg
91
+ errors << msg
88
92
  Engine.logger.error msg
93
+ break if exceeded_error_threshold?(control)
89
94
  end
90
95
 
91
96
  begin
@@ -96,11 +101,17 @@ module ETL #:nodoc:
96
101
  end
97
102
  rescue
98
103
  msg = "Error writing to #{destination} on line #{index}"
99
- destination.errors << msg
104
+ errors << msg
100
105
  Engine.logger.error msg
106
+ break if exceeded_error_threshold?(control)
101
107
  end
102
108
  end
103
- say_on_own_line "Processed #{Engine.current_source_row} rows in #{distance_of_time_in_words(start_time)}"
109
+ if exceeded_error_threshold?(control)
110
+ say_on_own_line "Exiting due to exceeding error threshold: #{control.error_threshold}"
111
+ else
112
+ say_on_own_line "Processed #{Engine.current_source_row} rows in #{distance_of_time_in_words(start_time)} with #{errors} errors."
113
+ end
114
+
104
115
  destinations.each do |destination|
105
116
  destination.close
106
117
  end
@@ -112,6 +123,10 @@ module ETL #:nodoc:
112
123
  end
113
124
 
114
125
  private
126
+ def exceeded_error_threshold?(control)
127
+ errors.length > control.error_threshold
128
+ end
129
+
115
130
  # Execute all preprocessors
116
131
  def pre_process(control)
117
132
  control.pre_processors.each do |processor|
@@ -2,6 +2,9 @@ module ETL #:nodoc:
2
2
  module Transform #:nodoc:
3
3
  # Transform which looks up the value and replaces it with a foriegn key reference
4
4
  class ForeignKeyLookupTransform < ETL::Transform::Transform
5
+ # The resolver to use if the foreign key is not found in the collection
6
+ attr_accessor :resolver
7
+
5
8
  # Initialize the foreign key lookup transform.
6
9
  #
7
10
  # Configuration options:
@@ -21,20 +24,28 @@ module ETL #:nodoc:
21
24
  def transform(value)
22
25
  fk = @collection[value]
23
26
  unless fk
24
- raise ResolverError, "Foreign key for #{value} not found and no resolver specified" unless @resolver
25
- raise ResolverError, "Resolver does not appear to respond to resolve method" unless @resolver.respond_to?(:resolve)
26
- fk = @resolver.resolve(value)
27
+ raise ResolverError, "Foreign key for #{value} not found and no resolver specified" unless resolver
28
+ raise ResolverError, "Resolver does not appear to respond to resolve method" unless resolver.respond_to?(:resolve)
29
+ fk = resolver.resolve(value)
27
30
  raise ResolverError, "Unable to resolve #{value} to foreign key" unless fk
28
31
  @collection[value] = fk
29
32
  end
30
33
  fk
31
34
  end
32
35
  end
36
+ # Alias class name for the ForeignKeyLookupTransform.
37
+ class FkLookupTransform < ForeignKeyLookupTransform; end
33
38
  end
34
39
  end
35
40
 
36
41
  # Resolver which resolves using ActiveRecord.
37
42
  class ActiveRecordResolver
43
+ # The ActiveRecord class to use
44
+ attr_accessor :ar_class
45
+
46
+ # The find method to use (as a symbol)
47
+ attr_accessor :find_method
48
+
38
49
  # Initialize the resolver. The ar_class argument should extend from ActiveRecord::Base. The find_method argument
39
50
  # must be a symbol for the finder method used. For example:
40
51
  #
@@ -47,7 +58,7 @@ class ActiveRecordResolver
47
58
  end
48
59
  # Resolve the value
49
60
  def resolve(value)
50
- rec = @ar_class.__send__(@find_method, value)
61
+ rec = ar_class.__send__(find_method, value)
51
62
  rec.nil? ? nil : rec.id
52
63
  end
53
64
  end
@@ -23,17 +23,13 @@ module ETL#:nodoc:
23
23
  def transform(name, value, transforms)
24
24
  # logger.debug "Transforming field #{name}" if transforms.length > 0
25
25
  transforms.each do |transform|
26
- begin
27
- case transform
28
- when Proc
29
- value = transform.call(value)
30
- when Transform
31
- value = transform.transform(value)
32
- else
33
- raise ControlError, "Unsupported transform configuration type: #{transform}"
34
- end
35
- rescue
36
- raise TransformError, "Error transforming #{value} with #{transform}"
26
+ case transform
27
+ when Proc
28
+ value = transform.call(value)
29
+ when Transform
30
+ value = transform.transform(value)
31
+ else
32
+ raise ControlError, "Unsupported transform configuration type: #{transform}"
37
33
  end
38
34
  end
39
35
  value
@@ -2,7 +2,7 @@ module ETL#:nodoc:
2
2
  module VERSION #:nodoc:
3
3
  MAJOR = 0
4
4
  MINOR = 5
5
- TINY = 1
5
+ TINY = 2
6
6
 
7
7
  STRING = [MAJOR, MINOR, TINY].join('.')
8
8
  end
metadata CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.0.10
3
3
  specification_version: 1
4
4
  name: activewarehouse-etl
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.5.1
7
- date: 2007-02-18 00:00:00 -05:00
6
+ version: 0.5.2
7
+ date: 2007-02-19 00:00:00 -05:00
8
8
  summary: Pure Ruby ETL package.
9
9
  require_paths:
10
10
  - lib