activewarehouse-etl 0.8.2 → 0.8.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGELOG
CHANGED
@@ -127,10 +127,13 @@
|
|
127
127
|
be constructed by appending Source to the type name), a class (which will be instantiated
|
128
128
|
and passed the control, configuration and mapping) and finally an actual Source instance.
|
129
129
|
|
130
|
-
0.8.2 -
|
130
|
+
0.8.2 - April 15, 2007
|
131
131
|
* Fixed bug with premature destination closing.
|
132
132
|
* Added indexes to execution records table.
|
133
133
|
* Added a PrintRowProcessor.
|
134
134
|
* Added support for conditions and "group by" in the database source.
|
135
135
|
* Added after_initialize hook in Processor base class.
|
136
|
-
* Added examples directory
|
136
|
+
* Added examples directory
|
137
|
+
|
138
|
+
0.8.3 - May 13, 2007
|
139
|
+
* Added patches from Andy Triboletti
|
@@ -218,6 +218,11 @@ module ETL #:nodoc:
|
|
218
218
|
original_record = ETL::Row[result.symbolize_keys!]
|
219
219
|
original_record[scd_end_date_field] = timestamp
|
220
220
|
ETL::Engine.logger.debug "writing original record"
|
221
|
+
|
222
|
+
# if there is no truncate then the row will exist twice in the database
|
223
|
+
# need to figure out how to delete that old record before inserting the
|
224
|
+
# updated version of the record
|
225
|
+
|
221
226
|
buffer << original_record
|
222
227
|
end
|
223
228
|
|
@@ -57,7 +57,7 @@ module ETL #:nodoc:
|
|
57
57
|
values = []
|
58
58
|
order.each do |name|
|
59
59
|
names << name
|
60
|
-
values <<
|
60
|
+
values << conn.quote(row[name]) # TODO: this is probably not database agnostic
|
61
61
|
end
|
62
62
|
q = "INSERT INTO #{configuration[:table]} (#{names.join(',')}) VALUES (#{values.join(',')})"
|
63
63
|
ETL::Engine.logger.debug("Executing insert: #{q}")
|
data/lib/etl/engine.rb
CHANGED
@@ -204,7 +204,7 @@ module ETL #:nodoc:
|
|
204
204
|
# rows rather than the single row
|
205
205
|
rows = [row]
|
206
206
|
|
207
|
-
|
207
|
+
t = Benchmark.realtime do
|
208
208
|
begin
|
209
209
|
Engine.logger.debug "Processing after read"
|
210
210
|
control.after_read_processors.each do |processor|
|
@@ -221,8 +221,9 @@ module ETL #:nodoc:
|
|
221
221
|
exceeded_error_threshold?(control) ? break : next
|
222
222
|
end
|
223
223
|
end
|
224
|
+
benchmarks[:after_reads] += t unless t.nil?
|
224
225
|
|
225
|
-
|
226
|
+
t = Benchmark.realtime do
|
226
227
|
begin
|
227
228
|
# execute transforms
|
228
229
|
Engine.logger.debug "Executing transforms"
|
@@ -244,8 +245,9 @@ module ETL #:nodoc:
|
|
244
245
|
end
|
245
246
|
end
|
246
247
|
end
|
248
|
+
benchmarks[:transforms] += t unless t.nil?
|
247
249
|
|
248
|
-
|
250
|
+
t = Benchmark.realtime do
|
249
251
|
begin
|
250
252
|
# execute row-level "before write" processing
|
251
253
|
Engine.logger.debug "Processing before write"
|
@@ -264,8 +266,9 @@ module ETL #:nodoc:
|
|
264
266
|
exceeded_error_threshold?(control) ? break : next
|
265
267
|
end
|
266
268
|
end
|
269
|
+
benchmarks[:before_writes] += t unless t.nil?
|
267
270
|
|
268
|
-
|
271
|
+
t = Benchmark.realtime do
|
269
272
|
begin
|
270
273
|
# write the row to the destination
|
271
274
|
destinations.each_with_index do |destination, index|
|
@@ -283,6 +286,7 @@ module ETL #:nodoc:
|
|
283
286
|
exceeded_error_threshold?(control) ? break : next
|
284
287
|
end
|
285
288
|
end
|
289
|
+
benchmarks[:writes] += t unless t.nil?
|
286
290
|
end
|
287
291
|
|
288
292
|
if exceeded_error_threshold?(control)
|
data/lib/etl/http_tools.rb
CHANGED
@@ -7,7 +7,8 @@ module HttpTools
|
|
7
7
|
# Code taken from http://gemtacular.com/gems/ParseUserAgent
|
8
8
|
def parse_user_agent(user_agent)
|
9
9
|
if '-' == user_agent
|
10
|
-
raise 'Invalid User Agent'
|
10
|
+
#raise 'Invalid User Agent'
|
11
|
+
puts 'Invalid User Agent'
|
11
12
|
end
|
12
13
|
|
13
14
|
browser, browser_version_major, browser_version_minor, ostype, os, os_version = nil
|
@@ -106,12 +107,17 @@ module HttpTools
|
|
106
107
|
end
|
107
108
|
|
108
109
|
def parse_uri(uri_string)
|
109
|
-
if uri_string
|
110
|
-
uri
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
110
|
+
if uri_string
|
111
|
+
#attempt to parse uri --if it's a uri then catch the problem and set everything to nil
|
112
|
+
begin
|
113
|
+
uri = URI.parse(uri_string)
|
114
|
+
results = {:scheme => uri.scheme, :host => uri.host, :port => uri.port, :uri_path => uri.path}
|
115
|
+
results[:domain] = $1 if uri.host =~ /\.?([^\.]+\.[^\.]+$)/
|
116
|
+
results
|
117
|
+
rescue
|
118
|
+
{:scheme => nil, :host => nil, :port => nil, :uri_path => nil, :domain => nil}
|
119
|
+
end
|
120
|
+
|
115
121
|
else
|
116
122
|
{:scheme => nil, :host => nil, :port => nil, :uri_path => nil, :domain => nil}
|
117
123
|
end
|
@@ -31,10 +31,10 @@ module ETL #:nodoc:
|
|
31
31
|
:user_agent => $9,
|
32
32
|
}
|
33
33
|
#fields[:timestamp] =~ r%{(\d\d)/(\w\w\w)/(\d\d\d\d):(\d\d):(\d\d):(\d\d) -(\d\d\d\d)}
|
34
|
-
d = Date._strptime(fields[:timestamp], '%d/%b/%Y:%H:%M:%S')
|
35
|
-
fields[:timestamp] = Time.mktime(d[:year], d[:mon], d[:mday], d[:hour], d[:min], d[:sec], d[:sec_fraction])
|
36
|
-
|
37
|
-
fields.merge!(parse_user_agent(fields[:user_agent]))
|
34
|
+
d = Date._strptime(fields[:timestamp], '%d/%b/%Y:%H:%M:%S') unless fields[:timestamp].nil?
|
35
|
+
fields[:timestamp] = Time.mktime(d[:year], d[:mon], d[:mday], d[:hour], d[:min], d[:sec], d[:sec_fraction]) unless d.nil?
|
36
|
+
|
37
|
+
fields.merge!(parse_user_agent(fields[:user_agent])) unless fields[:user_agent].nil?
|
38
38
|
fields.merge!(parse_uri(fields[:referrer]))
|
39
39
|
|
40
40
|
fields.each do |key, value|
|
data/lib/etl/version.rb
CHANGED
metadata
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
|
-
rubygems_version: 0.9.
|
2
|
+
rubygems_version: 0.9.2
|
3
3
|
specification_version: 1
|
4
4
|
name: activewarehouse-etl
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.8.
|
7
|
-
date: 2007-
|
6
|
+
version: 0.8.3
|
7
|
+
date: 2007-05-13 00:00:00 -04:00
|
8
8
|
summary: Pure Ruby ETL package.
|
9
9
|
require_paths:
|
10
10
|
- lib
|
@@ -36,48 +36,41 @@ files:
|
|
36
36
|
- Rakefile
|
37
37
|
- bin/etl
|
38
38
|
- lib/etl
|
39
|
-
- lib/etl.rb
|
40
39
|
- lib/etl/commands
|
41
|
-
- lib/etl/control
|
42
|
-
- lib/etl/control.rb
|
43
|
-
- lib/etl/engine.rb
|
44
|
-
- lib/etl/execution
|
45
|
-
- lib/etl/execution.rb
|
46
|
-
- lib/etl/generator
|
47
|
-
- lib/etl/generator.rb
|
48
|
-
- lib/etl/http_tools.rb
|
49
|
-
- lib/etl/parser
|
50
|
-
- lib/etl/parser.rb
|
51
|
-
- lib/etl/processor
|
52
|
-
- lib/etl/processor.rb
|
53
|
-
- lib/etl/row.rb
|
54
|
-
- lib/etl/screen
|
55
|
-
- lib/etl/transform
|
56
|
-
- lib/etl/transform.rb
|
57
|
-
- lib/etl/version.rb
|
58
40
|
- lib/etl/commands/etl.rb
|
41
|
+
- lib/etl/control
|
59
42
|
- lib/etl/control/control.rb
|
60
43
|
- lib/etl/control/destination
|
61
|
-
- lib/etl/control/destination.rb
|
62
|
-
- lib/etl/control/source
|
63
|
-
- lib/etl/control/source.rb
|
64
44
|
- lib/etl/control/destination/database_destination.rb
|
65
45
|
- lib/etl/control/destination/file_destination.rb
|
46
|
+
- lib/etl/control/destination.rb
|
47
|
+
- lib/etl/control/source
|
66
48
|
- lib/etl/control/source/database_source.rb
|
67
49
|
- lib/etl/control/source/enumerable_source.rb
|
68
50
|
- lib/etl/control/source/file_source.rb
|
51
|
+
- lib/etl/control/source.rb
|
52
|
+
- lib/etl/control.rb
|
53
|
+
- lib/etl/engine.rb
|
54
|
+
- lib/etl/execution
|
69
55
|
- lib/etl/execution/base.rb
|
70
56
|
- lib/etl/execution/job.rb
|
71
57
|
- lib/etl/execution/migration.rb
|
72
58
|
- lib/etl/execution/record.rb
|
59
|
+
- lib/etl/execution.rb
|
60
|
+
- lib/etl/generator
|
73
61
|
- lib/etl/generator/generator.rb
|
74
62
|
- lib/etl/generator/surrogate_key_generator.rb
|
63
|
+
- lib/etl/generator.rb
|
64
|
+
- lib/etl/http_tools.rb
|
65
|
+
- lib/etl/parser
|
75
66
|
- lib/etl/parser/apache_combined_log_parser.rb
|
76
67
|
- lib/etl/parser/delimited_parser.rb
|
77
68
|
- lib/etl/parser/fixed_width_parser.rb
|
78
69
|
- lib/etl/parser/parser.rb
|
79
70
|
- lib/etl/parser/sax_parser.rb
|
80
71
|
- lib/etl/parser/xml_parser.rb
|
72
|
+
- lib/etl/parser.rb
|
73
|
+
- lib/etl/processor
|
81
74
|
- lib/etl/processor/bulk_import_processor.rb
|
82
75
|
- lib/etl/processor/check_exist_processor.rb
|
83
76
|
- lib/etl/processor/check_unique_processor.rb
|
@@ -90,7 +83,11 @@ files:
|
|
90
83
|
- lib/etl/processor/sequence_processor.rb
|
91
84
|
- lib/etl/processor/surrogate_key_processor.rb
|
92
85
|
- lib/etl/processor/truncate_processor.rb
|
86
|
+
- lib/etl/processor.rb
|
87
|
+
- lib/etl/row.rb
|
88
|
+
- lib/etl/screen
|
93
89
|
- lib/etl/screen/row_count_screen.rb
|
90
|
+
- lib/etl/transform
|
94
91
|
- lib/etl/transform/block_transform.rb
|
95
92
|
- lib/etl/transform/date_to_string_transform.rb
|
96
93
|
- lib/etl/transform/decode_transform.rb
|
@@ -105,6 +102,9 @@ files:
|
|
105
102
|
- lib/etl/transform/transform.rb
|
106
103
|
- lib/etl/transform/trim_transform.rb
|
107
104
|
- lib/etl/transform/type_transform.rb
|
105
|
+
- lib/etl/transform.rb
|
106
|
+
- lib/etl/version.rb
|
107
|
+
- lib/etl.rb
|
108
108
|
- examples/database.example.yml
|
109
109
|
test_files: []
|
110
110
|
|