activewarehouse-etl 0.8.1 → 0.8.2
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +9 -1
- data/README +1 -1
- data/Rakefile +1 -0
- data/examples/database.example.yml +6 -0
- data/lib/etl/control/destination.rb +8 -6
- data/lib/etl/control/source/database_source.rb +14 -1
- data/lib/etl/control/source/enumerable_source.rb +4 -5
- data/lib/etl/engine.rb +11 -4
- data/lib/etl/execution/migration.rb +8 -1
- data/lib/etl/execution/record.rb +10 -0
- data/lib/etl/processor/check_exist_processor.rb +1 -1
- data/lib/etl/processor/print_row_processor.rb +12 -0
- data/lib/etl/processor/processor.rb +1 -0
- data/lib/etl/transform/foreign_key_lookup_transform.rb +13 -1
- data/lib/etl/version.rb +1 -1
- metadata +4 -2
data/CHANGELOG
CHANGED
@@ -125,4 +125,12 @@
|
|
125
125
|
* Added :type configuration option to the source directive, allowing the source type to be
|
126
126
|
explicitly specified. The source type can be a string or symbol (in which case the class will
|
127
127
|
be constructed by appending Source to the type name), a class (which will be instantiated
|
128
|
-
and passed the control, configuration and mapping) and finally an actual Source instance.
|
128
|
+
and passed the control, configuration and mapping) and finally an actual Source instance.
|
129
|
+
|
130
|
+
0.8.2 -
|
131
|
+
* Fixed bug with premature destination closing.
|
132
|
+
* Added indexes to execution records table.
|
133
|
+
* Added a PrintRowProcessor.
|
134
|
+
* Added support for conditions and "group by" in the database source.
|
135
|
+
* Added after_initialize hook in Processor base class.
|
136
|
+
* Added examples directory
|
data/README
CHANGED
@@ -65,7 +65,7 @@ Command line options:
|
|
65
65
|
* <tt>--read-locally</tt>: Read from the local cache (skip source extraction)
|
66
66
|
|
67
67
|
== Control File Examples
|
68
|
-
Control file examples can be found in the
|
68
|
+
Control file examples can be found in the examples directory.
|
69
69
|
|
70
70
|
== Feedback
|
71
71
|
This is a work in progress. Comments should be made on the
|
data/Rakefile
CHANGED
@@ -263,12 +263,14 @@ module ETL #:nodoc:
|
|
263
263
|
|
264
264
|
# Record the record
|
265
265
|
if ETL::Engine.job # only record the execution if there is a job
|
266
|
-
ETL::Execution::Record.
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
266
|
+
ETL::Execution::Record.time_spent += Benchmark.realtime do
|
267
|
+
ETL::Execution::Record.create!(
|
268
|
+
:control_file => control.file,
|
269
|
+
:natural_key => nk,
|
270
|
+
:crc => crc,
|
271
|
+
:job_id => ETL::Engine.job.id
|
272
|
+
)
|
273
|
+
end
|
272
274
|
end
|
273
275
|
end
|
274
276
|
end
|
@@ -60,6 +60,11 @@ module ETL #:nodoc:
|
|
60
60
|
configuration[:select] || '*'
|
61
61
|
end
|
62
62
|
|
63
|
+
# Get the group by part of the query, defaults to nil
|
64
|
+
def group
|
65
|
+
configuration[:group]
|
66
|
+
end
|
67
|
+
|
63
68
|
# Get the order for the query, defaults to nil
|
64
69
|
def order
|
65
70
|
configuration[:order]
|
@@ -165,16 +170,24 @@ module ETL #:nodoc:
|
|
165
170
|
q = "SELECT #{select} FROM #{configuration[:table]}"
|
166
171
|
q << " #{join}" if join
|
167
172
|
|
173
|
+
conditions = []
|
168
174
|
if new_records_only
|
169
175
|
last_completed = ETL::Execution::Job.maximum('created_at',
|
170
176
|
:conditions => ['control_file = ? and completed_at is not null', control.file]
|
171
177
|
)
|
172
178
|
if last_completed
|
173
|
-
|
179
|
+
conditions << "#{new_records_only} > #{connection.quote(last_completed.to_s(:db))}"
|
174
180
|
end
|
175
181
|
end
|
176
182
|
|
183
|
+
conditions << configuration[:conditions] if configuration[:conditions]
|
184
|
+
if conditions.length > 0
|
185
|
+
q << " WHERE #{conditions.join(' AND ')}"
|
186
|
+
end
|
187
|
+
|
188
|
+
q << " GROUP BY #{group}" if group
|
177
189
|
q << " ORDER BY #{order}" if order
|
190
|
+
|
178
191
|
if ETL::Engine.limit || ETL::Engine.offset
|
179
192
|
options = {}
|
180
193
|
options[:limit] = ETL::Engine.limit if ETL::Engine.limit
|
@@ -1,9 +1,8 @@
|
|
1
|
-
module ETL
|
2
|
-
module Control
|
1
|
+
module ETL #:nodoc:
|
2
|
+
module Control #:nodoc:
|
3
|
+
# Use an Enumerable as a source
|
3
4
|
class EnumerableSource < ETL::Control::Source
|
4
|
-
|
5
|
-
super
|
6
|
-
end
|
5
|
+
# Iterate through the enumerable
|
7
6
|
def each(&block)
|
8
7
|
configuration[:enumerable].each(&block)
|
9
8
|
end
|
data/lib/etl/engine.rb
CHANGED
@@ -237,7 +237,11 @@ module ETL #:nodoc:
|
|
237
237
|
errors << msg
|
238
238
|
Engine.logger.error(msg)
|
239
239
|
e.backtrace.each { |line| Engine.logger.error(line) }
|
240
|
-
|
240
|
+
begin
|
241
|
+
exceeded_error_threshold?(control) ? break : next
|
242
|
+
rescue => inner_error
|
243
|
+
puts inner_error
|
244
|
+
end
|
241
245
|
end
|
242
246
|
end
|
243
247
|
|
@@ -286,9 +290,10 @@ module ETL #:nodoc:
|
|
286
290
|
return
|
287
291
|
end
|
288
292
|
|
289
|
-
|
290
|
-
|
291
|
-
|
293
|
+
end
|
294
|
+
|
295
|
+
destinations.each do |destination|
|
296
|
+
destination.close
|
292
297
|
end
|
293
298
|
|
294
299
|
say_on_own_line "Executing post processes"
|
@@ -310,6 +315,8 @@ module ETL #:nodoc:
|
|
310
315
|
say "Avg before_writes: #{Engine.rows_read/benchmarks[:before_writes]} rows/sec" if benchmarks[:before_writes] > 0
|
311
316
|
say "Avg transforms: #{Engine.rows_read/benchmarks[:transforms]} rows/sec" if benchmarks[:transforms] > 0
|
312
317
|
say "Avg writes: #{Engine.rows_read/benchmarks[:writes]} rows/sec" if benchmarks[:writes] > 0
|
318
|
+
|
319
|
+
say "Avg time writing execution records: #{ETL::Execution::Record.average_time_spent}"
|
313
320
|
|
314
321
|
# ETL::Transform::Transform.benchmarks.each do |klass, t|
|
315
322
|
# say "Avg #{klass}: #{Engine.rows_read/t} rows/sec"
|
@@ -24,7 +24,7 @@ module ETL #:nodoc:
|
|
24
24
|
|
25
25
|
# Get the final target version number
|
26
26
|
def target
|
27
|
-
|
27
|
+
2
|
28
28
|
end
|
29
29
|
|
30
30
|
private
|
@@ -43,6 +43,13 @@ module ETL #:nodoc:
|
|
43
43
|
end
|
44
44
|
update_schema_info(1)
|
45
45
|
end
|
46
|
+
|
47
|
+
def migration_2 #:nodoc:
|
48
|
+
connection.add_index :records, :control_file
|
49
|
+
connection.add_index :records, :natural_key
|
50
|
+
connection.add_index :records, :job_id
|
51
|
+
update_schema_info(2)
|
52
|
+
end
|
46
53
|
|
47
54
|
# Update the schema info table, setting the version value
|
48
55
|
def update_schema_info(version)
|
data/lib/etl/execution/record.rb
CHANGED
@@ -3,6 +3,16 @@ module ETL #:nodoc:
|
|
3
3
|
# Represents a single record
|
4
4
|
class Record < ETL::Execution::Base
|
5
5
|
belongs_to :table
|
6
|
+
class << self
|
7
|
+
attr_accessor :time_spent
|
8
|
+
def time_spent
|
9
|
+
@time_spent ||= 0
|
10
|
+
end
|
11
|
+
def average_time_spent
|
12
|
+
return 0 if time_spent == 0
|
13
|
+
ETL::Engine.rows_read / time_spent
|
14
|
+
end
|
15
|
+
end
|
6
16
|
end
|
7
17
|
end
|
8
18
|
end
|
@@ -27,7 +27,7 @@ module ETL #:nodoc:
|
|
27
27
|
raise ResolverError, "Foreign key for #{value} not found and no resolver specified" unless resolver
|
28
28
|
raise ResolverError, "Resolver does not appear to respond to resolve method" unless resolver.respond_to?(:resolve)
|
29
29
|
fk = resolver.resolve(value)
|
30
|
-
raise ResolverError, "Unable to resolve #{value} to foreign key" unless fk
|
30
|
+
raise ResolverError, "Unable to resolve #{value} to foreign key for #{name} in row #{ETL::Engine.rows_read}" unless fk
|
31
31
|
@collection[value] = fk
|
32
32
|
end
|
33
33
|
fk
|
@@ -61,4 +61,16 @@ class ActiveRecordResolver
|
|
61
61
|
rec = ar_class.__send__(find_method, value)
|
62
62
|
rec.nil? ? nil : rec.id
|
63
63
|
end
|
64
|
+
end
|
65
|
+
|
66
|
+
class SQLResolver
|
67
|
+
def initialize(table, field, connection=nil)
|
68
|
+
@table = table
|
69
|
+
@field = field
|
70
|
+
@connection = connection
|
71
|
+
end
|
72
|
+
def resolve(value)
|
73
|
+
conn = @connection ||= ActiveRecord::Base.connection
|
74
|
+
conn.select_value("SELECT id FROM #{table} WHERE #{field} = #{conn.quote(value)}")
|
75
|
+
end
|
64
76
|
end
|
data/lib/etl/version.rb
CHANGED
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.0.10
|
|
3
3
|
specification_version: 1
|
4
4
|
name: activewarehouse-etl
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.8.
|
7
|
-
date: 2007-04-
|
6
|
+
version: 0.8.2
|
7
|
+
date: 2007-04-15 00:00:00 -04:00
|
8
8
|
summary: Pure Ruby ETL package.
|
9
9
|
require_paths:
|
10
10
|
- lib
|
@@ -83,6 +83,7 @@ files:
|
|
83
83
|
- lib/etl/processor/check_unique_processor.rb
|
84
84
|
- lib/etl/processor/copy_field_processor.rb
|
85
85
|
- lib/etl/processor/hierarchy_exploder_processor.rb
|
86
|
+
- lib/etl/processor/print_row_processor.rb
|
86
87
|
- lib/etl/processor/processor.rb
|
87
88
|
- lib/etl/processor/rename_processor.rb
|
88
89
|
- lib/etl/processor/row_processor.rb
|
@@ -104,6 +105,7 @@ files:
|
|
104
105
|
- lib/etl/transform/transform.rb
|
105
106
|
- lib/etl/transform/trim_transform.rb
|
106
107
|
- lib/etl/transform/type_transform.rb
|
108
|
+
- examples/database.example.yml
|
107
109
|
test_files: []
|
108
110
|
|
109
111
|
rdoc_options:
|