activewarehouse-etl 0.8.2 → 0.8.3
Sign up to get free protection for your applications and to get access to all the features.
data/CHANGELOG
CHANGED
@@ -127,10 +127,13 @@
|
|
127
127
|
be constructed by appending Source to the type name), a class (which will be instantiated
|
128
128
|
and passed the control, configuration and mapping) and finally an actual Source instance.
|
129
129
|
|
130
|
-
0.8.2 -
|
130
|
+
0.8.2 - April 15, 2007
|
131
131
|
* Fixed bug with premature destination closing.
|
132
132
|
* Added indexes to execution records table.
|
133
133
|
* Added a PrintRowProcessor.
|
134
134
|
* Added support for conditions and "group by" in the database source.
|
135
135
|
* Added after_initialize hook in Processor base class.
|
136
|
-
* Added examples directory
|
136
|
+
* Added examples directory
|
137
|
+
|
138
|
+
0.8.3 - May 13, 2007
|
139
|
+
* Added patches from Andy Triboletti
|
@@ -218,6 +218,11 @@ module ETL #:nodoc:
|
|
218
218
|
original_record = ETL::Row[result.symbolize_keys!]
|
219
219
|
original_record[scd_end_date_field] = timestamp
|
220
220
|
ETL::Engine.logger.debug "writing original record"
|
221
|
+
|
222
|
+
# if there is no truncate then the row will exist twice in the database
|
223
|
+
# need to figure out how to delete that old record before inserting the
|
224
|
+
# updated version of the record
|
225
|
+
|
221
226
|
buffer << original_record
|
222
227
|
end
|
223
228
|
|
@@ -57,7 +57,7 @@ module ETL #:nodoc:
|
|
57
57
|
values = []
|
58
58
|
order.each do |name|
|
59
59
|
names << name
|
60
|
-
values <<
|
60
|
+
values << conn.quote(row[name]) # TODO: this is probably not database agnostic
|
61
61
|
end
|
62
62
|
q = "INSERT INTO #{configuration[:table]} (#{names.join(',')}) VALUES (#{values.join(',')})"
|
63
63
|
ETL::Engine.logger.debug("Executing insert: #{q}")
|
data/lib/etl/engine.rb
CHANGED
@@ -204,7 +204,7 @@ module ETL #:nodoc:
|
|
204
204
|
# rows rather than the single row
|
205
205
|
rows = [row]
|
206
206
|
|
207
|
-
|
207
|
+
t = Benchmark.realtime do
|
208
208
|
begin
|
209
209
|
Engine.logger.debug "Processing after read"
|
210
210
|
control.after_read_processors.each do |processor|
|
@@ -221,8 +221,9 @@ module ETL #:nodoc:
|
|
221
221
|
exceeded_error_threshold?(control) ? break : next
|
222
222
|
end
|
223
223
|
end
|
224
|
+
benchmarks[:after_reads] += t unless t.nil?
|
224
225
|
|
225
|
-
|
226
|
+
t = Benchmark.realtime do
|
226
227
|
begin
|
227
228
|
# execute transforms
|
228
229
|
Engine.logger.debug "Executing transforms"
|
@@ -244,8 +245,9 @@ module ETL #:nodoc:
|
|
244
245
|
end
|
245
246
|
end
|
246
247
|
end
|
248
|
+
benchmarks[:transforms] += t unless t.nil?
|
247
249
|
|
248
|
-
|
250
|
+
t = Benchmark.realtime do
|
249
251
|
begin
|
250
252
|
# execute row-level "before write" processing
|
251
253
|
Engine.logger.debug "Processing before write"
|
@@ -264,8 +266,9 @@ module ETL #:nodoc:
|
|
264
266
|
exceeded_error_threshold?(control) ? break : next
|
265
267
|
end
|
266
268
|
end
|
269
|
+
benchmarks[:before_writes] += t unless t.nil?
|
267
270
|
|
268
|
-
|
271
|
+
t = Benchmark.realtime do
|
269
272
|
begin
|
270
273
|
# write the row to the destination
|
271
274
|
destinations.each_with_index do |destination, index|
|
@@ -283,6 +286,7 @@ module ETL #:nodoc:
|
|
283
286
|
exceeded_error_threshold?(control) ? break : next
|
284
287
|
end
|
285
288
|
end
|
289
|
+
benchmarks[:writes] += t unless t.nil?
|
286
290
|
end
|
287
291
|
|
288
292
|
if exceeded_error_threshold?(control)
|
data/lib/etl/http_tools.rb
CHANGED
@@ -7,7 +7,8 @@ module HttpTools
|
|
7
7
|
# Code taken from http://gemtacular.com/gems/ParseUserAgent
|
8
8
|
def parse_user_agent(user_agent)
|
9
9
|
if '-' == user_agent
|
10
|
-
raise 'Invalid User Agent'
|
10
|
+
#raise 'Invalid User Agent'
|
11
|
+
puts 'Invalid User Agent'
|
11
12
|
end
|
12
13
|
|
13
14
|
browser, browser_version_major, browser_version_minor, ostype, os, os_version = nil
|
@@ -106,12 +107,17 @@ module HttpTools
|
|
106
107
|
end
|
107
108
|
|
108
109
|
def parse_uri(uri_string)
|
109
|
-
if uri_string
|
110
|
-
uri
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
110
|
+
if uri_string
|
111
|
+
#attempt to parse uri --if it's a uri then catch the problem and set everything to nil
|
112
|
+
begin
|
113
|
+
uri = URI.parse(uri_string)
|
114
|
+
results = {:scheme => uri.scheme, :host => uri.host, :port => uri.port, :uri_path => uri.path}
|
115
|
+
results[:domain] = $1 if uri.host =~ /\.?([^\.]+\.[^\.]+$)/
|
116
|
+
results
|
117
|
+
rescue
|
118
|
+
{:scheme => nil, :host => nil, :port => nil, :uri_path => nil, :domain => nil}
|
119
|
+
end
|
120
|
+
|
115
121
|
else
|
116
122
|
{:scheme => nil, :host => nil, :port => nil, :uri_path => nil, :domain => nil}
|
117
123
|
end
|
@@ -31,10 +31,10 @@ module ETL #:nodoc:
|
|
31
31
|
:user_agent => $9,
|
32
32
|
}
|
33
33
|
#fields[:timestamp] =~ r%{(\d\d)/(\w\w\w)/(\d\d\d\d):(\d\d):(\d\d):(\d\d) -(\d\d\d\d)}
|
34
|
-
d = Date._strptime(fields[:timestamp], '%d/%b/%Y:%H:%M:%S')
|
35
|
-
fields[:timestamp] = Time.mktime(d[:year], d[:mon], d[:mday], d[:hour], d[:min], d[:sec], d[:sec_fraction])
|
36
|
-
|
37
|
-
fields.merge!(parse_user_agent(fields[:user_agent]))
|
34
|
+
d = Date._strptime(fields[:timestamp], '%d/%b/%Y:%H:%M:%S') unless fields[:timestamp].nil?
|
35
|
+
fields[:timestamp] = Time.mktime(d[:year], d[:mon], d[:mday], d[:hour], d[:min], d[:sec], d[:sec_fraction]) unless d.nil?
|
36
|
+
|
37
|
+
fields.merge!(parse_user_agent(fields[:user_agent])) unless fields[:user_agent].nil?
|
38
38
|
fields.merge!(parse_uri(fields[:referrer]))
|
39
39
|
|
40
40
|
fields.each do |key, value|
|
data/lib/etl/version.rb
CHANGED
metadata
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
|
-
rubygems_version: 0.9.
|
2
|
+
rubygems_version: 0.9.2
|
3
3
|
specification_version: 1
|
4
4
|
name: activewarehouse-etl
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.8.
|
7
|
-
date: 2007-
|
6
|
+
version: 0.8.3
|
7
|
+
date: 2007-05-13 00:00:00 -04:00
|
8
8
|
summary: Pure Ruby ETL package.
|
9
9
|
require_paths:
|
10
10
|
- lib
|
@@ -36,48 +36,41 @@ files:
|
|
36
36
|
- Rakefile
|
37
37
|
- bin/etl
|
38
38
|
- lib/etl
|
39
|
-
- lib/etl.rb
|
40
39
|
- lib/etl/commands
|
41
|
-
- lib/etl/control
|
42
|
-
- lib/etl/control.rb
|
43
|
-
- lib/etl/engine.rb
|
44
|
-
- lib/etl/execution
|
45
|
-
- lib/etl/execution.rb
|
46
|
-
- lib/etl/generator
|
47
|
-
- lib/etl/generator.rb
|
48
|
-
- lib/etl/http_tools.rb
|
49
|
-
- lib/etl/parser
|
50
|
-
- lib/etl/parser.rb
|
51
|
-
- lib/etl/processor
|
52
|
-
- lib/etl/processor.rb
|
53
|
-
- lib/etl/row.rb
|
54
|
-
- lib/etl/screen
|
55
|
-
- lib/etl/transform
|
56
|
-
- lib/etl/transform.rb
|
57
|
-
- lib/etl/version.rb
|
58
40
|
- lib/etl/commands/etl.rb
|
41
|
+
- lib/etl/control
|
59
42
|
- lib/etl/control/control.rb
|
60
43
|
- lib/etl/control/destination
|
61
|
-
- lib/etl/control/destination.rb
|
62
|
-
- lib/etl/control/source
|
63
|
-
- lib/etl/control/source.rb
|
64
44
|
- lib/etl/control/destination/database_destination.rb
|
65
45
|
- lib/etl/control/destination/file_destination.rb
|
46
|
+
- lib/etl/control/destination.rb
|
47
|
+
- lib/etl/control/source
|
66
48
|
- lib/etl/control/source/database_source.rb
|
67
49
|
- lib/etl/control/source/enumerable_source.rb
|
68
50
|
- lib/etl/control/source/file_source.rb
|
51
|
+
- lib/etl/control/source.rb
|
52
|
+
- lib/etl/control.rb
|
53
|
+
- lib/etl/engine.rb
|
54
|
+
- lib/etl/execution
|
69
55
|
- lib/etl/execution/base.rb
|
70
56
|
- lib/etl/execution/job.rb
|
71
57
|
- lib/etl/execution/migration.rb
|
72
58
|
- lib/etl/execution/record.rb
|
59
|
+
- lib/etl/execution.rb
|
60
|
+
- lib/etl/generator
|
73
61
|
- lib/etl/generator/generator.rb
|
74
62
|
- lib/etl/generator/surrogate_key_generator.rb
|
63
|
+
- lib/etl/generator.rb
|
64
|
+
- lib/etl/http_tools.rb
|
65
|
+
- lib/etl/parser
|
75
66
|
- lib/etl/parser/apache_combined_log_parser.rb
|
76
67
|
- lib/etl/parser/delimited_parser.rb
|
77
68
|
- lib/etl/parser/fixed_width_parser.rb
|
78
69
|
- lib/etl/parser/parser.rb
|
79
70
|
- lib/etl/parser/sax_parser.rb
|
80
71
|
- lib/etl/parser/xml_parser.rb
|
72
|
+
- lib/etl/parser.rb
|
73
|
+
- lib/etl/processor
|
81
74
|
- lib/etl/processor/bulk_import_processor.rb
|
82
75
|
- lib/etl/processor/check_exist_processor.rb
|
83
76
|
- lib/etl/processor/check_unique_processor.rb
|
@@ -90,7 +83,11 @@ files:
|
|
90
83
|
- lib/etl/processor/sequence_processor.rb
|
91
84
|
- lib/etl/processor/surrogate_key_processor.rb
|
92
85
|
- lib/etl/processor/truncate_processor.rb
|
86
|
+
- lib/etl/processor.rb
|
87
|
+
- lib/etl/row.rb
|
88
|
+
- lib/etl/screen
|
93
89
|
- lib/etl/screen/row_count_screen.rb
|
90
|
+
- lib/etl/transform
|
94
91
|
- lib/etl/transform/block_transform.rb
|
95
92
|
- lib/etl/transform/date_to_string_transform.rb
|
96
93
|
- lib/etl/transform/decode_transform.rb
|
@@ -105,6 +102,9 @@ files:
|
|
105
102
|
- lib/etl/transform/transform.rb
|
106
103
|
- lib/etl/transform/trim_transform.rb
|
107
104
|
- lib/etl/transform/type_transform.rb
|
105
|
+
- lib/etl/transform.rb
|
106
|
+
- lib/etl/version.rb
|
107
|
+
- lib/etl.rb
|
108
108
|
- examples/database.example.yml
|
109
109
|
test_files: []
|
110
110
|
|