activewarehouse-etl 0.8.2 → 0.8.3

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGELOG CHANGED
@@ -127,10 +127,13 @@
127
127
  be constructed by appending Source to the type name), a class (which will be instantiated
128
128
  and passed the control, configuration and mapping) and finally an actual Source instance.
129
129
 
130
- 0.8.2 -
130
+ 0.8.2 - April 15, 2007
131
131
  * Fixed bug with premature destination closing.
132
132
  * Added indexes to execution records table.
133
133
  * Added a PrintRowProcessor.
134
134
  * Added support for conditions and "group by" in the database source.
135
135
  * Added after_initialize hook in Processor base class.
136
- * Added examples directory
136
+ * Added examples directory
137
+
138
+ 0.8.3 - May 13, 2007
139
+ * Added patches from Andy Triboletti
@@ -218,6 +218,11 @@ module ETL #:nodoc:
218
218
  original_record = ETL::Row[result.symbolize_keys!]
219
219
  original_record[scd_end_date_field] = timestamp
220
220
  ETL::Engine.logger.debug "writing original record"
221
+
222
+ # if there is no truncate then the row will exist twice in the database
223
+ # need to figure out how to delete that old record before inserting the
224
+ # updated version of the record
225
+
221
226
  buffer << original_record
222
227
  end
223
228
 
@@ -57,7 +57,7 @@ module ETL #:nodoc:
57
57
  values = []
58
58
  order.each do |name|
59
59
  names << name
60
- values << "'#{row[name]}'" # TODO: this is probably not database agnostic
60
+ values << conn.quote(row[name]) # TODO: this is probably not database agnostic
61
61
  end
62
62
  q = "INSERT INTO #{configuration[:table]} (#{names.join(',')}) VALUES (#{values.join(',')})"
63
63
  ETL::Engine.logger.debug("Executing insert: #{q}")
data/lib/etl/engine.rb CHANGED
@@ -204,7 +204,7 @@ module ETL #:nodoc:
204
204
  # rows rather than the single row
205
205
  rows = [row]
206
206
 
207
- benchmarks[:after_reads] += Benchmark.realtime do
207
+ t = Benchmark.realtime do
208
208
  begin
209
209
  Engine.logger.debug "Processing after read"
210
210
  control.after_read_processors.each do |processor|
@@ -221,8 +221,9 @@ module ETL #:nodoc:
221
221
  exceeded_error_threshold?(control) ? break : next
222
222
  end
223
223
  end
224
+ benchmarks[:after_reads] += t unless t.nil?
224
225
 
225
- benchmarks[:transforms] += Benchmark.realtime do
226
+ t = Benchmark.realtime do
226
227
  begin
227
228
  # execute transforms
228
229
  Engine.logger.debug "Executing transforms"
@@ -244,8 +245,9 @@ module ETL #:nodoc:
244
245
  end
245
246
  end
246
247
  end
248
+ benchmarks[:transforms] += t unless t.nil?
247
249
 
248
- benchmarks[:before_writes] += Benchmark.realtime do
250
+ t = Benchmark.realtime do
249
251
  begin
250
252
  # execute row-level "before write" processing
251
253
  Engine.logger.debug "Processing before write"
@@ -264,8 +266,9 @@ module ETL #:nodoc:
264
266
  exceeded_error_threshold?(control) ? break : next
265
267
  end
266
268
  end
269
+ benchmarks[:before_writes] += t unless t.nil?
267
270
 
268
- benchmarks[:writes] += Benchmark.realtime do
271
+ t = Benchmark.realtime do
269
272
  begin
270
273
  # write the row to the destination
271
274
  destinations.each_with_index do |destination, index|
@@ -283,6 +286,7 @@ module ETL #:nodoc:
283
286
  exceeded_error_threshold?(control) ? break : next
284
287
  end
285
288
  end
289
+ benchmarks[:writes] += t unless t.nil?
286
290
  end
287
291
 
288
292
  if exceeded_error_threshold?(control)
@@ -7,7 +7,8 @@ module HttpTools
7
7
  # Code taken from http://gemtacular.com/gems/ParseUserAgent
8
8
  def parse_user_agent(user_agent)
9
9
  if '-' == user_agent
10
- raise 'Invalid User Agent'
10
+ #raise 'Invalid User Agent'
11
+ puts 'Invalid User Agent'
11
12
  end
12
13
 
13
14
  browser, browser_version_major, browser_version_minor, ostype, os, os_version = nil
@@ -106,12 +107,17 @@ module HttpTools
106
107
  end
107
108
 
108
109
  def parse_uri(uri_string)
109
- if uri_string
110
- uri = URI.parse(uri_string)
111
-
112
- results = {:scheme => uri.scheme, :host => uri.host, :port => uri.port, :uri_path => uri.path}
113
- results[:domain] = $1 if uri.host =~ /\.?([^\.]+\.[^\.]+$)/
114
- results
110
+ if uri_string
111
+ #attempt to parse uri --if it's a uri then catch the problem and set everything to nil
112
+ begin
113
+ uri = URI.parse(uri_string)
114
+ results = {:scheme => uri.scheme, :host => uri.host, :port => uri.port, :uri_path => uri.path}
115
+ results[:domain] = $1 if uri.host =~ /\.?([^\.]+\.[^\.]+$)/
116
+ results
117
+ rescue
118
+ {:scheme => nil, :host => nil, :port => nil, :uri_path => nil, :domain => nil}
119
+ end
120
+
115
121
  else
116
122
  {:scheme => nil, :host => nil, :port => nil, :uri_path => nil, :domain => nil}
117
123
  end
@@ -31,10 +31,10 @@ module ETL #:nodoc:
31
31
  :user_agent => $9,
32
32
  }
33
33
  #fields[:timestamp] =~ r%{(\d\d)/(\w\w\w)/(\d\d\d\d):(\d\d):(\d\d):(\d\d) -(\d\d\d\d)}
34
- d = Date._strptime(fields[:timestamp], '%d/%b/%Y:%H:%M:%S')
35
- fields[:timestamp] = Time.mktime(d[:year], d[:mon], d[:mday], d[:hour], d[:min], d[:sec], d[:sec_fraction])
36
-
37
- fields.merge!(parse_user_agent(fields[:user_agent]))
34
+ d = Date._strptime(fields[:timestamp], '%d/%b/%Y:%H:%M:%S') unless fields[:timestamp].nil?
35
+ fields[:timestamp] = Time.mktime(d[:year], d[:mon], d[:mday], d[:hour], d[:min], d[:sec], d[:sec_fraction]) unless d.nil?
36
+
37
+ fields.merge!(parse_user_agent(fields[:user_agent])) unless fields[:user_agent].nil?
38
38
  fields.merge!(parse_uri(fields[:referrer]))
39
39
 
40
40
  fields.each do |key, value|
data/lib/etl/version.rb CHANGED
@@ -2,7 +2,7 @@ module ETL#:nodoc:
2
2
  module VERSION #:nodoc:
3
3
  MAJOR = 0
4
4
  MINOR = 8
5
- TINY = 2
5
+ TINY = 3
6
6
 
7
7
  STRING = [MAJOR, MINOR, TINY].join('.')
8
8
  end
metadata CHANGED
@@ -1,10 +1,10 @@
1
1
  --- !ruby/object:Gem::Specification
2
- rubygems_version: 0.9.0.10
2
+ rubygems_version: 0.9.2
3
3
  specification_version: 1
4
4
  name: activewarehouse-etl
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.8.2
7
- date: 2007-04-15 00:00:00 -04:00
6
+ version: 0.8.3
7
+ date: 2007-05-13 00:00:00 -04:00
8
8
  summary: Pure Ruby ETL package.
9
9
  require_paths:
10
10
  - lib
@@ -36,48 +36,41 @@ files:
36
36
  - Rakefile
37
37
  - bin/etl
38
38
  - lib/etl
39
- - lib/etl.rb
40
39
  - lib/etl/commands
41
- - lib/etl/control
42
- - lib/etl/control.rb
43
- - lib/etl/engine.rb
44
- - lib/etl/execution
45
- - lib/etl/execution.rb
46
- - lib/etl/generator
47
- - lib/etl/generator.rb
48
- - lib/etl/http_tools.rb
49
- - lib/etl/parser
50
- - lib/etl/parser.rb
51
- - lib/etl/processor
52
- - lib/etl/processor.rb
53
- - lib/etl/row.rb
54
- - lib/etl/screen
55
- - lib/etl/transform
56
- - lib/etl/transform.rb
57
- - lib/etl/version.rb
58
40
  - lib/etl/commands/etl.rb
41
+ - lib/etl/control
59
42
  - lib/etl/control/control.rb
60
43
  - lib/etl/control/destination
61
- - lib/etl/control/destination.rb
62
- - lib/etl/control/source
63
- - lib/etl/control/source.rb
64
44
  - lib/etl/control/destination/database_destination.rb
65
45
  - lib/etl/control/destination/file_destination.rb
46
+ - lib/etl/control/destination.rb
47
+ - lib/etl/control/source
66
48
  - lib/etl/control/source/database_source.rb
67
49
  - lib/etl/control/source/enumerable_source.rb
68
50
  - lib/etl/control/source/file_source.rb
51
+ - lib/etl/control/source.rb
52
+ - lib/etl/control.rb
53
+ - lib/etl/engine.rb
54
+ - lib/etl/execution
69
55
  - lib/etl/execution/base.rb
70
56
  - lib/etl/execution/job.rb
71
57
  - lib/etl/execution/migration.rb
72
58
  - lib/etl/execution/record.rb
59
+ - lib/etl/execution.rb
60
+ - lib/etl/generator
73
61
  - lib/etl/generator/generator.rb
74
62
  - lib/etl/generator/surrogate_key_generator.rb
63
+ - lib/etl/generator.rb
64
+ - lib/etl/http_tools.rb
65
+ - lib/etl/parser
75
66
  - lib/etl/parser/apache_combined_log_parser.rb
76
67
  - lib/etl/parser/delimited_parser.rb
77
68
  - lib/etl/parser/fixed_width_parser.rb
78
69
  - lib/etl/parser/parser.rb
79
70
  - lib/etl/parser/sax_parser.rb
80
71
  - lib/etl/parser/xml_parser.rb
72
+ - lib/etl/parser.rb
73
+ - lib/etl/processor
81
74
  - lib/etl/processor/bulk_import_processor.rb
82
75
  - lib/etl/processor/check_exist_processor.rb
83
76
  - lib/etl/processor/check_unique_processor.rb
@@ -90,7 +83,11 @@ files:
90
83
  - lib/etl/processor/sequence_processor.rb
91
84
  - lib/etl/processor/surrogate_key_processor.rb
92
85
  - lib/etl/processor/truncate_processor.rb
86
+ - lib/etl/processor.rb
87
+ - lib/etl/row.rb
88
+ - lib/etl/screen
93
89
  - lib/etl/screen/row_count_screen.rb
90
+ - lib/etl/transform
94
91
  - lib/etl/transform/block_transform.rb
95
92
  - lib/etl/transform/date_to_string_transform.rb
96
93
  - lib/etl/transform/decode_transform.rb
@@ -105,6 +102,9 @@ files:
105
102
  - lib/etl/transform/transform.rb
106
103
  - lib/etl/transform/trim_transform.rb
107
104
  - lib/etl/transform/type_transform.rb
105
+ - lib/etl/transform.rb
106
+ - lib/etl/version.rb
107
+ - lib/etl.rb
108
108
  - examples/database.example.yml
109
109
  test_files: []
110
110