activewarehouse-etl 0.8.1 → 0.8.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +9 -1
- data/README +1 -1
- data/Rakefile +1 -0
- data/examples/database.example.yml +6 -0
- data/lib/etl/control/destination.rb +8 -6
- data/lib/etl/control/source/database_source.rb +14 -1
- data/lib/etl/control/source/enumerable_source.rb +4 -5
- data/lib/etl/engine.rb +11 -4
- data/lib/etl/execution/migration.rb +8 -1
- data/lib/etl/execution/record.rb +10 -0
- data/lib/etl/processor/check_exist_processor.rb +1 -1
- data/lib/etl/processor/print_row_processor.rb +12 -0
- data/lib/etl/processor/processor.rb +1 -0
- data/lib/etl/transform/foreign_key_lookup_transform.rb +13 -1
- data/lib/etl/version.rb +1 -1
- metadata +4 -2
data/CHANGELOG
CHANGED
@@ -125,4 +125,12 @@
|
|
125
125
|
* Added :type configuration option to the source directive, allowing the source type to be
|
126
126
|
explicitly specified. The source type can be a string or symbol (in which case the class will
|
127
127
|
be constructed by appending Source to the type name), a class (which will be instantiated
|
128
|
-
and passed the control, configuration and mapping) and finally an actual Source instance.
|
128
|
+
and passed the control, configuration and mapping) and finally an actual Source instance.
|
129
|
+
|
130
|
+
0.8.2 -
|
131
|
+
* Fixed bug with premature destination closing.
|
132
|
+
* Added indexes to execution records table.
|
133
|
+
* Added a PrintRowProcessor.
|
134
|
+
* Added support for conditions and "group by" in the database source.
|
135
|
+
* Added after_initialize hook in Processor base class.
|
136
|
+
* Added examples directory
|
data/README
CHANGED
@@ -65,7 +65,7 @@ Command line options:
|
|
65
65
|
* <tt>--read-locally</tt>: Read from the local cache (skip source extraction)
|
66
66
|
|
67
67
|
== Control File Examples
|
68
|
-
Control file examples can be found in the
|
68
|
+
Control file examples can be found in the examples directory.
|
69
69
|
|
70
70
|
== Feedback
|
71
71
|
This is a work in progress. Comments should be made on the
|
data/Rakefile
CHANGED
@@ -263,12 +263,14 @@ module ETL #:nodoc:
|
|
263
263
|
|
264
264
|
# Record the record
|
265
265
|
if ETL::Engine.job # only record the execution if there is a job
|
266
|
-
ETL::Execution::Record.
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
266
|
+
ETL::Execution::Record.time_spent += Benchmark.realtime do
|
267
|
+
ETL::Execution::Record.create!(
|
268
|
+
:control_file => control.file,
|
269
|
+
:natural_key => nk,
|
270
|
+
:crc => crc,
|
271
|
+
:job_id => ETL::Engine.job.id
|
272
|
+
)
|
273
|
+
end
|
272
274
|
end
|
273
275
|
end
|
274
276
|
end
|
@@ -60,6 +60,11 @@ module ETL #:nodoc:
|
|
60
60
|
configuration[:select] || '*'
|
61
61
|
end
|
62
62
|
|
63
|
+
# Get the group by part of the query, defaults to nil
|
64
|
+
def group
|
65
|
+
configuration[:group]
|
66
|
+
end
|
67
|
+
|
63
68
|
# Get the order for the query, defaults to nil
|
64
69
|
def order
|
65
70
|
configuration[:order]
|
@@ -165,16 +170,24 @@ module ETL #:nodoc:
|
|
165
170
|
q = "SELECT #{select} FROM #{configuration[:table]}"
|
166
171
|
q << " #{join}" if join
|
167
172
|
|
173
|
+
conditions = []
|
168
174
|
if new_records_only
|
169
175
|
last_completed = ETL::Execution::Job.maximum('created_at',
|
170
176
|
:conditions => ['control_file = ? and completed_at is not null', control.file]
|
171
177
|
)
|
172
178
|
if last_completed
|
173
|
-
|
179
|
+
conditions << "#{new_records_only} > #{connection.quote(last_completed.to_s(:db))}"
|
174
180
|
end
|
175
181
|
end
|
176
182
|
|
183
|
+
conditions << configuration[:conditions] if configuration[:conditions]
|
184
|
+
if conditions.length > 0
|
185
|
+
q << " WHERE #{conditions.join(' AND ')}"
|
186
|
+
end
|
187
|
+
|
188
|
+
q << " GROUP BY #{group}" if group
|
177
189
|
q << " ORDER BY #{order}" if order
|
190
|
+
|
178
191
|
if ETL::Engine.limit || ETL::Engine.offset
|
179
192
|
options = {}
|
180
193
|
options[:limit] = ETL::Engine.limit if ETL::Engine.limit
|
@@ -1,9 +1,8 @@
|
|
1
|
-
module ETL
|
2
|
-
module Control
|
1
|
+
module ETL #:nodoc:
|
2
|
+
module Control #:nodoc:
|
3
|
+
# Use an Enumerable as a source
|
3
4
|
class EnumerableSource < ETL::Control::Source
|
4
|
-
|
5
|
-
super
|
6
|
-
end
|
5
|
+
# Iterate through the enumerable
|
7
6
|
def each(&block)
|
8
7
|
configuration[:enumerable].each(&block)
|
9
8
|
end
|
data/lib/etl/engine.rb
CHANGED
@@ -237,7 +237,11 @@ module ETL #:nodoc:
|
|
237
237
|
errors << msg
|
238
238
|
Engine.logger.error(msg)
|
239
239
|
e.backtrace.each { |line| Engine.logger.error(line) }
|
240
|
-
|
240
|
+
begin
|
241
|
+
exceeded_error_threshold?(control) ? break : next
|
242
|
+
rescue => inner_error
|
243
|
+
puts inner_error
|
244
|
+
end
|
241
245
|
end
|
242
246
|
end
|
243
247
|
|
@@ -286,9 +290,10 @@ module ETL #:nodoc:
|
|
286
290
|
return
|
287
291
|
end
|
288
292
|
|
289
|
-
|
290
|
-
|
291
|
-
|
293
|
+
end
|
294
|
+
|
295
|
+
destinations.each do |destination|
|
296
|
+
destination.close
|
292
297
|
end
|
293
298
|
|
294
299
|
say_on_own_line "Executing post processes"
|
@@ -310,6 +315,8 @@ module ETL #:nodoc:
|
|
310
315
|
say "Avg before_writes: #{Engine.rows_read/benchmarks[:before_writes]} rows/sec" if benchmarks[:before_writes] > 0
|
311
316
|
say "Avg transforms: #{Engine.rows_read/benchmarks[:transforms]} rows/sec" if benchmarks[:transforms] > 0
|
312
317
|
say "Avg writes: #{Engine.rows_read/benchmarks[:writes]} rows/sec" if benchmarks[:writes] > 0
|
318
|
+
|
319
|
+
say "Avg time writing execution records: #{ETL::Execution::Record.average_time_spent}"
|
313
320
|
|
314
321
|
# ETL::Transform::Transform.benchmarks.each do |klass, t|
|
315
322
|
# say "Avg #{klass}: #{Engine.rows_read/t} rows/sec"
|
@@ -24,7 +24,7 @@ module ETL #:nodoc:
|
|
24
24
|
|
25
25
|
# Get the final target version number
|
26
26
|
def target
|
27
|
-
|
27
|
+
2
|
28
28
|
end
|
29
29
|
|
30
30
|
private
|
@@ -43,6 +43,13 @@ module ETL #:nodoc:
|
|
43
43
|
end
|
44
44
|
update_schema_info(1)
|
45
45
|
end
|
46
|
+
|
47
|
+
def migration_2 #:nodoc:
|
48
|
+
connection.add_index :records, :control_file
|
49
|
+
connection.add_index :records, :natural_key
|
50
|
+
connection.add_index :records, :job_id
|
51
|
+
update_schema_info(2)
|
52
|
+
end
|
46
53
|
|
47
54
|
# Update the schema info table, setting the version value
|
48
55
|
def update_schema_info(version)
|
data/lib/etl/execution/record.rb
CHANGED
@@ -3,6 +3,16 @@ module ETL #:nodoc:
|
|
3
3
|
# Represents a single record
|
4
4
|
class Record < ETL::Execution::Base
|
5
5
|
belongs_to :table
|
6
|
+
class << self
|
7
|
+
attr_accessor :time_spent
|
8
|
+
def time_spent
|
9
|
+
@time_spent ||= 0
|
10
|
+
end
|
11
|
+
def average_time_spent
|
12
|
+
return 0 if time_spent == 0
|
13
|
+
ETL::Engine.rows_read / time_spent
|
14
|
+
end
|
15
|
+
end
|
6
16
|
end
|
7
17
|
end
|
8
18
|
end
|
@@ -27,7 +27,7 @@ module ETL #:nodoc:
|
|
27
27
|
raise ResolverError, "Foreign key for #{value} not found and no resolver specified" unless resolver
|
28
28
|
raise ResolverError, "Resolver does not appear to respond to resolve method" unless resolver.respond_to?(:resolve)
|
29
29
|
fk = resolver.resolve(value)
|
30
|
-
raise ResolverError, "Unable to resolve #{value} to foreign key" unless fk
|
30
|
+
raise ResolverError, "Unable to resolve #{value} to foreign key for #{name} in row #{ETL::Engine.rows_read}" unless fk
|
31
31
|
@collection[value] = fk
|
32
32
|
end
|
33
33
|
fk
|
@@ -61,4 +61,16 @@ class ActiveRecordResolver
|
|
61
61
|
rec = ar_class.__send__(find_method, value)
|
62
62
|
rec.nil? ? nil : rec.id
|
63
63
|
end
|
64
|
+
end
|
65
|
+
|
66
|
+
class SQLResolver
|
67
|
+
def initialize(table, field, connection=nil)
|
68
|
+
@table = table
|
69
|
+
@field = field
|
70
|
+
@connection = connection
|
71
|
+
end
|
72
|
+
def resolve(value)
|
73
|
+
conn = @connection ||= ActiveRecord::Base.connection
|
74
|
+
conn.select_value("SELECT id FROM #{table} WHERE #{field} = #{conn.quote(value)}")
|
75
|
+
end
|
64
76
|
end
|
data/lib/etl/version.rb
CHANGED
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.0.10
|
|
3
3
|
specification_version: 1
|
4
4
|
name: activewarehouse-etl
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.8.
|
7
|
-
date: 2007-04-
|
6
|
+
version: 0.8.2
|
7
|
+
date: 2007-04-15 00:00:00 -04:00
|
8
8
|
summary: Pure Ruby ETL package.
|
9
9
|
require_paths:
|
10
10
|
- lib
|
@@ -83,6 +83,7 @@ files:
|
|
83
83
|
- lib/etl/processor/check_unique_processor.rb
|
84
84
|
- lib/etl/processor/copy_field_processor.rb
|
85
85
|
- lib/etl/processor/hierarchy_exploder_processor.rb
|
86
|
+
- lib/etl/processor/print_row_processor.rb
|
86
87
|
- lib/etl/processor/processor.rb
|
87
88
|
- lib/etl/processor/rename_processor.rb
|
88
89
|
- lib/etl/processor/row_processor.rb
|
@@ -104,6 +105,7 @@ files:
|
|
104
105
|
- lib/etl/transform/transform.rb
|
105
106
|
- lib/etl/transform/trim_transform.rb
|
106
107
|
- lib/etl/transform/type_transform.rb
|
108
|
+
- examples/database.example.yml
|
107
109
|
test_files: []
|
108
110
|
|
109
111
|
rdoc_options:
|