activewarehouse-etl 0.8.2 → 0.8.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGELOG CHANGED
@@ -127,10 +127,13 @@
127
127
  be constructed by appending Source to the type name), a class (which will be instantiated
128
128
  and passed the control, configuration and mapping) and finally an actual Source instance.
129
129
 
130
- 0.8.2 -
130
+ 0.8.2 - April 15, 2007
131
131
  * Fixed bug with premature destination closing.
132
132
  * Added indexes to execution records table.
133
133
  * Added a PrintRowProcessor.
134
134
  * Added support for conditions and "group by" in the database source.
135
135
  * Added after_initialize hook in Processor base class.
136
- * Added examples directory
136
+ * Added examples directory
137
+
138
+ 0.8.3 - May 13, 2007
139
+ * Added patches from Andy Triboletti
@@ -218,6 +218,11 @@ module ETL #:nodoc:
218
218
  original_record = ETL::Row[result.symbolize_keys!]
219
219
  original_record[scd_end_date_field] = timestamp
220
220
  ETL::Engine.logger.debug "writing original record"
221
+
222
+ # if there is no truncate then the row will exist twice in the database
223
+ # need to figure out how to delete that old record before inserting the
224
+ # updated version of the record
225
+
221
226
  buffer << original_record
222
227
  end
223
228
 
@@ -57,7 +57,7 @@ module ETL #:nodoc:
57
57
  values = []
58
58
  order.each do |name|
59
59
  names << name
60
- values << "'#{row[name]}'" # TODO: this is probably not database agnostic
60
+ values << conn.quote(row[name]) # TODO: this is probably not database agnostic
61
61
  end
62
62
  q = "INSERT INTO #{configuration[:table]} (#{names.join(',')}) VALUES (#{values.join(',')})"
63
63
  ETL::Engine.logger.debug("Executing insert: #{q}")
data/lib/etl/engine.rb CHANGED
@@ -204,7 +204,7 @@ module ETL #:nodoc:
204
204
  # rows rather than the single row
205
205
  rows = [row]
206
206
 
207
- benchmarks[:after_reads] += Benchmark.realtime do
207
+ t = Benchmark.realtime do
208
208
  begin
209
209
  Engine.logger.debug "Processing after read"
210
210
  control.after_read_processors.each do |processor|
@@ -221,8 +221,9 @@ module ETL #:nodoc:
221
221
  exceeded_error_threshold?(control) ? break : next
222
222
  end
223
223
  end
224
+ benchmarks[:after_reads] += t unless t.nil?
224
225
 
225
- benchmarks[:transforms] += Benchmark.realtime do
226
+ t = Benchmark.realtime do
226
227
  begin
227
228
  # execute transforms
228
229
  Engine.logger.debug "Executing transforms"
@@ -244,8 +245,9 @@ module ETL #:nodoc:
244
245
  end
245
246
  end
246
247
  end
248
+ benchmarks[:transforms] += t unless t.nil?
247
249
 
248
- benchmarks[:before_writes] += Benchmark.realtime do
250
+ t = Benchmark.realtime do
249
251
  begin
250
252
  # execute row-level "before write" processing
251
253
  Engine.logger.debug "Processing before write"
@@ -264,8 +266,9 @@ module ETL #:nodoc:
264
266
  exceeded_error_threshold?(control) ? break : next
265
267
  end
266
268
  end
269
+ benchmarks[:before_writes] += t unless t.nil?
267
270
 
268
- benchmarks[:writes] += Benchmark.realtime do
271
+ t = Benchmark.realtime do
269
272
  begin
270
273
  # write the row to the destination
271
274
  destinations.each_with_index do |destination, index|
@@ -283,6 +286,7 @@ module ETL #:nodoc:
283
286
  exceeded_error_threshold?(control) ? break : next
284
287
  end
285
288
  end
289
+ benchmarks[:writes] += t unless t.nil?
286
290
  end
287
291
 
288
292
  if exceeded_error_threshold?(control)
@@ -7,7 +7,8 @@ module HttpTools
7
7
  # Code taken from http://gemtacular.com/gems/ParseUserAgent
8
8
  def parse_user_agent(user_agent)
9
9
  if '-' == user_agent
10
- raise 'Invalid User Agent'
10
+ #raise 'Invalid User Agent'
11
+ puts 'Invalid User Agent'
11
12
  end
12
13
 
13
14
  browser, browser_version_major, browser_version_minor, ostype, os, os_version = nil
@@ -106,12 +107,17 @@ module HttpTools
106
107
  end
107
108
 
108
109
  def parse_uri(uri_string)
109
- if uri_string
110
- uri = URI.parse(uri_string)
111
-
112
- results = {:scheme => uri.scheme, :host => uri.host, :port => uri.port, :uri_path => uri.path}
113
- results[:domain] = $1 if uri.host =~ /\.?([^\.]+\.[^\.]+$)/
114
- results
110
+ if uri_string
111
+ #attempt to parse uri --if it's a uri then catch the problem and set everything to nil
112
+ begin
113
+ uri = URI.parse(uri_string)
114
+ results = {:scheme => uri.scheme, :host => uri.host, :port => uri.port, :uri_path => uri.path}
115
+ results[:domain] = $1 if uri.host =~ /\.?([^\.]+\.[^\.]+$)/
116
+ results
117
+ rescue
118
+ {:scheme => nil, :host => nil, :port => nil, :uri_path => nil, :domain => nil}
119
+ end
120
+
115
121
  else
116
122
  {:scheme => nil, :host => nil, :port => nil, :uri_path => nil, :domain => nil}
117
123
  end
@@ -31,10 +31,10 @@ module ETL #:nodoc:
31
31
  :user_agent => $9,
32
32
  }
33
33
  #fields[:timestamp] =~ r%{(\d\d)/(\w\w\w)/(\d\d\d\d):(\d\d):(\d\d):(\d\d) -(\d\d\d\d)}
34
- d = Date._strptime(fields[:timestamp], '%d/%b/%Y:%H:%M:%S')
35
- fields[:timestamp] = Time.mktime(d[:year], d[:mon], d[:mday], d[:hour], d[:min], d[:sec], d[:sec_fraction])
36
-
37
- fields.merge!(parse_user_agent(fields[:user_agent]))
34
+ d = Date._strptime(fields[:timestamp], '%d/%b/%Y:%H:%M:%S') unless fields[:timestamp].nil?
35
+ fields[:timestamp] = Time.mktime(d[:year], d[:mon], d[:mday], d[:hour], d[:min], d[:sec], d[:sec_fraction]) unless d.nil?
36
+
37
+ fields.merge!(parse_user_agent(fields[:user_agent])) unless fields[:user_agent].nil?
38
38
  fields.merge!(parse_uri(fields[:referrer]))
39
39
 
40
40
  fields.each do |key, value|
data/lib/etl/version.rb CHANGED
@@ -2,7 +2,7 @@ module ETL#:nodoc:
2
2
  module VERSION #:nodoc:
3
3
  MAJOR = 0
4
4
  MINOR = 8
5
- TINY = 2
5
+ TINY = 3
6
6
 
7
7
  STRING = [MAJOR, MINOR, TINY].join('.')
8
8
  end
metadata CHANGED
@@ -1,10 +1,10 @@
1
1
  --- !ruby/object:Gem::Specification
2
- rubygems_version: 0.9.0.10
2
+ rubygems_version: 0.9.2
3
3
  specification_version: 1
4
4
  name: activewarehouse-etl
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.8.2
7
- date: 2007-04-15 00:00:00 -04:00
6
+ version: 0.8.3
7
+ date: 2007-05-13 00:00:00 -04:00
8
8
  summary: Pure Ruby ETL package.
9
9
  require_paths:
10
10
  - lib
@@ -36,48 +36,41 @@ files:
36
36
  - Rakefile
37
37
  - bin/etl
38
38
  - lib/etl
39
- - lib/etl.rb
40
39
  - lib/etl/commands
41
- - lib/etl/control
42
- - lib/etl/control.rb
43
- - lib/etl/engine.rb
44
- - lib/etl/execution
45
- - lib/etl/execution.rb
46
- - lib/etl/generator
47
- - lib/etl/generator.rb
48
- - lib/etl/http_tools.rb
49
- - lib/etl/parser
50
- - lib/etl/parser.rb
51
- - lib/etl/processor
52
- - lib/etl/processor.rb
53
- - lib/etl/row.rb
54
- - lib/etl/screen
55
- - lib/etl/transform
56
- - lib/etl/transform.rb
57
- - lib/etl/version.rb
58
40
  - lib/etl/commands/etl.rb
41
+ - lib/etl/control
59
42
  - lib/etl/control/control.rb
60
43
  - lib/etl/control/destination
61
- - lib/etl/control/destination.rb
62
- - lib/etl/control/source
63
- - lib/etl/control/source.rb
64
44
  - lib/etl/control/destination/database_destination.rb
65
45
  - lib/etl/control/destination/file_destination.rb
46
+ - lib/etl/control/destination.rb
47
+ - lib/etl/control/source
66
48
  - lib/etl/control/source/database_source.rb
67
49
  - lib/etl/control/source/enumerable_source.rb
68
50
  - lib/etl/control/source/file_source.rb
51
+ - lib/etl/control/source.rb
52
+ - lib/etl/control.rb
53
+ - lib/etl/engine.rb
54
+ - lib/etl/execution
69
55
  - lib/etl/execution/base.rb
70
56
  - lib/etl/execution/job.rb
71
57
  - lib/etl/execution/migration.rb
72
58
  - lib/etl/execution/record.rb
59
+ - lib/etl/execution.rb
60
+ - lib/etl/generator
73
61
  - lib/etl/generator/generator.rb
74
62
  - lib/etl/generator/surrogate_key_generator.rb
63
+ - lib/etl/generator.rb
64
+ - lib/etl/http_tools.rb
65
+ - lib/etl/parser
75
66
  - lib/etl/parser/apache_combined_log_parser.rb
76
67
  - lib/etl/parser/delimited_parser.rb
77
68
  - lib/etl/parser/fixed_width_parser.rb
78
69
  - lib/etl/parser/parser.rb
79
70
  - lib/etl/parser/sax_parser.rb
80
71
  - lib/etl/parser/xml_parser.rb
72
+ - lib/etl/parser.rb
73
+ - lib/etl/processor
81
74
  - lib/etl/processor/bulk_import_processor.rb
82
75
  - lib/etl/processor/check_exist_processor.rb
83
76
  - lib/etl/processor/check_unique_processor.rb
@@ -90,7 +83,11 @@ files:
90
83
  - lib/etl/processor/sequence_processor.rb
91
84
  - lib/etl/processor/surrogate_key_processor.rb
92
85
  - lib/etl/processor/truncate_processor.rb
86
+ - lib/etl/processor.rb
87
+ - lib/etl/row.rb
88
+ - lib/etl/screen
93
89
  - lib/etl/screen/row_count_screen.rb
90
+ - lib/etl/transform
94
91
  - lib/etl/transform/block_transform.rb
95
92
  - lib/etl/transform/date_to_string_transform.rb
96
93
  - lib/etl/transform/decode_transform.rb
@@ -105,6 +102,9 @@ files:
105
102
  - lib/etl/transform/transform.rb
106
103
  - lib/etl/transform/trim_transform.rb
107
104
  - lib/etl/transform/type_transform.rb
105
+ - lib/etl/transform.rb
106
+ - lib/etl/version.rb
107
+ - lib/etl.rb
108
108
  - examples/database.example.yml
109
109
  test_files: []
110
110