activewarehouse-etl 0.9.0 → 0.9.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -12,6 +12,7 @@ module ETL #:nodoc:
12
12
  end
13
13
  # Transform the value using strftime
14
14
  def transform(name, value, row)
15
+ return value unless value.respond_to?(:strftime)
15
16
  value.strftime(@format)
16
17
  end
17
18
  end
@@ -5,6 +5,9 @@ module ETL #:nodoc:
5
5
  # The resolver to use if the foreign key is not found in the collection
6
6
  attr_accessor :resolver
7
7
 
8
+ # The default foreign key to use if none is found.
9
+ attr_accessor :default
10
+
8
11
  # Initialize the foreign key lookup transform.
9
12
  #
10
13
  # Configuration options:
@@ -12,12 +15,21 @@ module ETL #:nodoc:
12
15
  # an empty Hash will be used. This Hash will be used to cache values that have been resolved already
13
16
  # for future use.
14
17
  # *<tt>:resolver</tt>: Object or Class which implements the method resolve(value)
18
+ # *<tt>:default</tt>: A default foreign key to use if no foreign key is found
15
19
  def initialize(control, name, configuration={})
16
20
  super
17
21
 
18
22
  @collection = (configuration[:collection] || {})
19
23
  @resolver = configuration[:resolver]
20
24
  @resolver = @resolver.new if @resolver.is_a?(Class)
25
+ @default = configuration[:default]
26
+ if configuration[:cache] ||= true
27
+ if resolver.respond_to?(:load_cache)
28
+ resolver.load_cache
29
+ else
30
+ ETL::Engine.logger.info "#{resolver.class.name} does not support caching"
31
+ end
32
+ end
21
33
  end
22
34
 
23
35
  # Transform the value by resolving it to a foriegn key
@@ -27,7 +39,8 @@ module ETL #:nodoc:
27
39
  raise ResolverError, "Foreign key for #{value} not found and no resolver specified" unless resolver
28
40
  raise ResolverError, "Resolver does not appear to respond to resolve method" unless resolver.respond_to?(:resolve)
29
41
  fk = resolver.resolve(value)
30
- raise ResolverError, "Unable to resolve #{value} to foreign key for #{name} in row #{ETL::Engine.rows_read}" unless fk
42
+ fk ||= @default
43
+ raise ResolverError, "Unable to resolve #{value} to foreign key for #{name} in row #{ETL::Engine.rows_read}. You may want to specify a :default value." unless fk
31
44
  @collection[value] = fk
32
45
  end
33
46
  fk
@@ -81,6 +94,58 @@ class SQLResolver
81
94
  @connection ||= ActiveRecord::Base.connection
82
95
  end
83
96
  def resolve(value)
84
- @connection.select_value("SELECT id FROM #{@table} WHERE #{@field} = #{@connection.quote(value)}")
97
+ if @use_cache
98
+ cache[value]
99
+ else
100
+ q = "SELECT id FROM #{table_name} WHERE #{@field} = #{@connection.quote(value)}"
101
+ ETL::Engine.logger.debug("Executing query: #{q}")
102
+ @connection.select_value(q)
103
+ end
104
+ end
105
+ def table_name
106
+ ETL::Engine.table(@table, @connection)
107
+ end
108
+ def cache
109
+ @cache ||= {}
110
+ end
111
+ def load_cache
112
+ @use_cache = true
113
+ q = "SELECT id, #{@field} FROM #{table_name}"
114
+ @connection.select_all(q).each do |record|
115
+ cache[record[@field]] = record['id']
116
+ end
117
+ end
118
+ end
119
+
120
+ class FlatFileResolver
121
+ # Initialize the flat file resolver. Expects to open a comma-delimited file.
122
+ # Returns the column with the given result_field_index.
123
+ #
124
+ # The matches argument is a Hash with the key as the column index to search and
125
+ # the value of the Hash as a String to match exactly. It will only match the first
126
+ # result.
127
+ def initialize(file, match_index, result_field_index)
128
+ @file = file
129
+ @match_index = match_index
130
+ @result_field_index = result_field_index
131
+ end
132
+
133
+ # Get the rows from the file specified in the initializer.
134
+ def rows
135
+ @rows ||= FasterCSV.read(@file)
136
+ end
137
+ protected :rows
138
+
139
+ # Match the row field from the column indicated by the match_index with the given
140
+ # value and return the field value from the column identified by the result_field_index.
141
+ def resolve(value)
142
+ rows.each do |row|
143
+ #puts "checking #{row.inspect} for #{value}"
144
+ if row[@match_index] == value
145
+ #puts "match found!, returning #{row[@result_field_index]}"
146
+ return row[@result_field_index]
147
+ end
148
+ end
149
+ nil
85
150
  end
86
151
  end
@@ -4,7 +4,12 @@ module ETL #:nodoc:
4
4
  class StringToDateTransform < ETL::Transform::Transform
5
5
  # Transform the value using Date.parse
6
6
  def transform(name, value, row)
7
- Date.parse(value)
7
+ return value if value.nil?
8
+ begin
9
+ Date.parse(value)
10
+ rescue => e
11
+ return value
12
+ end
8
13
  end
9
14
  end
10
15
  end
@@ -7,7 +7,7 @@ module ETL #:nodoc:
7
7
  # WARNING: This transform is slow (due to the Ruby implementation), but if you need to
8
8
  # parse timestamps before or after the values supported by the Time.parse.
9
9
  def transform(name, value, row)
10
- DateTime.parse(value)
10
+ DateTime.parse(value) unless value.nil?
11
11
  end
12
12
  end
13
13
  end
@@ -4,7 +4,7 @@ module ETL #:nodoc:
4
4
  class StringToTimeTransform < ETL::Transform::Transform
5
5
  # Transform the value using Time.parse
6
6
  def transform(name, value, row)
7
- Time.parse(value)
7
+ Time.parse(value) unless value.nil?
8
8
  end
9
9
  end
10
10
  end
@@ -2,7 +2,7 @@ module ETL#:nodoc:
2
2
  module VERSION #:nodoc:
3
3
  MAJOR = 0
4
4
  MINOR = 9
5
- TINY = 0
5
+ TINY = 1
6
6
 
7
7
  STRING = [MAJOR, MINOR, TINY].join('.')
8
8
  end
metadata CHANGED
@@ -1,33 +1,75 @@
1
1
  --- !ruby/object:Gem::Specification
2
- rubygems_version: 0.9.2
3
- specification_version: 1
4
2
  name: activewarehouse-etl
5
3
  version: !ruby/object:Gem::Version
6
- version: 0.9.0
7
- date: 2007-08-09 00:00:00 -04:00
8
- summary: Pure Ruby ETL package.
9
- require_paths:
10
- - lib
11
- email: anthonyeden@gmail.com
12
- homepage: http://activewarehouse.rubyforge.org/etl
13
- rubyforge_project: activewarehouse
14
- description: ActiveWarehouse ETL is a pure Ruby Extract-Transform-Load application for loading data into a database.
15
- autorequire:
16
- default_executable: etl
17
- bindir: bin
18
- has_rdoc: false
19
- required_ruby_version: !ruby/object:Gem::Version::Requirement
20
- requirements:
21
- - - ">"
22
- - !ruby/object:Gem::Version
23
- version: 0.0.0
24
- version:
4
+ version: 0.9.1
25
5
  platform: ruby
26
- signing_key:
27
- cert_chain:
28
- post_install_message:
29
6
  authors:
30
7
  - Anthony Eden
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-01-14 00:00:00 -05:00
13
+ default_executable: etl
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: rake
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: 0.7.1
24
+ version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: activesupport
27
+ type: :runtime
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: 1.3.1
34
+ version:
35
+ - !ruby/object:Gem::Dependency
36
+ name: activerecord
37
+ type: :runtime
38
+ version_requirement:
39
+ version_requirements: !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ version: 1.14.4
44
+ version:
45
+ - !ruby/object:Gem::Dependency
46
+ name: fastercsv
47
+ type: :runtime
48
+ version_requirement:
49
+ version_requirements: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ">="
52
+ - !ruby/object:Gem::Version
53
+ version: 1.2.0
54
+ version:
55
+ - !ruby/object:Gem::Dependency
56
+ name: adapter_extensions
57
+ type: :runtime
58
+ version_requirement:
59
+ version_requirements: !ruby/object:Gem::Requirement
60
+ requirements:
61
+ - - ">="
62
+ - !ruby/object:Gem::Version
63
+ version: 0.1.0
64
+ version:
65
+ description: ActiveWarehouse ETL is a pure Ruby Extract-Transform-Load application for loading data into a database.
66
+ email: anthonyeden@gmail.com
67
+ executables:
68
+ - etl
69
+ extensions: []
70
+
71
+ extra_rdoc_files: []
72
+
31
73
  files:
32
74
  - CHANGELOG
33
75
  - LICENSE
@@ -35,6 +77,7 @@ files:
35
77
  - TODO
36
78
  - Rakefile
37
79
  - bin/etl
80
+ - bin/etl.cmd
38
81
  - lib/etl
39
82
  - lib/etl/batch
40
83
  - lib/etl/batch/batch.rb
@@ -42,6 +85,7 @@ files:
42
85
  - lib/etl/batch.rb
43
86
  - lib/etl/builder
44
87
  - lib/etl/builder/date_dimension_builder.rb
88
+ - lib/etl/builder/time_dimension_builder.rb
45
89
  - lib/etl/builder.rb
46
90
  - lib/etl/commands
47
91
  - lib/etl/commands/etl.rb
@@ -85,10 +129,12 @@ files:
85
129
  - lib/etl/parser/xml_parser.rb
86
130
  - lib/etl/parser.rb
87
131
  - lib/etl/processor
132
+ - lib/etl/processor/block_processor.rb
88
133
  - lib/etl/processor/bulk_import_processor.rb
89
134
  - lib/etl/processor/check_exist_processor.rb
90
135
  - lib/etl/processor/check_unique_processor.rb
91
136
  - lib/etl/processor/copy_field_processor.rb
137
+ - lib/etl/processor/encode_processor.rb
92
138
  - lib/etl/processor/hierarchy_exploder_processor.rb
93
139
  - lib/etl/processor/print_row_processor.rb
94
140
  - lib/etl/processor/processor.rb
@@ -123,62 +169,32 @@ files:
123
169
  - lib/etl/version.rb
124
170
  - lib/etl.rb
125
171
  - examples/database.example.yml
126
- test_files: []
127
-
172
+ has_rdoc: false
173
+ homepage: http://activewarehouse.rubyforge.org/etl
174
+ post_install_message:
128
175
  rdoc_options:
129
176
  - --exclude
130
177
  - .
131
- extra_rdoc_files: []
132
-
133
- executables:
134
- - etl
135
- extensions: []
136
-
178
+ require_paths:
179
+ - lib
180
+ required_ruby_version: !ruby/object:Gem::Requirement
181
+ requirements:
182
+ - - ">="
183
+ - !ruby/object:Gem::Version
184
+ version: "0"
185
+ version:
186
+ required_rubygems_version: !ruby/object:Gem::Requirement
187
+ requirements:
188
+ - - ">="
189
+ - !ruby/object:Gem::Version
190
+ version: "0"
191
+ version:
137
192
  requirements: []
138
193
 
139
- dependencies:
140
- - !ruby/object:Gem::Dependency
141
- name: rake
142
- version_requirement:
143
- version_requirements: !ruby/object:Gem::Version::Requirement
144
- requirements:
145
- - - ">="
146
- - !ruby/object:Gem::Version
147
- version: 0.7.1
148
- version:
149
- - !ruby/object:Gem::Dependency
150
- name: activesupport
151
- version_requirement:
152
- version_requirements: !ruby/object:Gem::Version::Requirement
153
- requirements:
154
- - - ">="
155
- - !ruby/object:Gem::Version
156
- version: 1.3.1
157
- version:
158
- - !ruby/object:Gem::Dependency
159
- name: activerecord
160
- version_requirement:
161
- version_requirements: !ruby/object:Gem::Version::Requirement
162
- requirements:
163
- - - ">="
164
- - !ruby/object:Gem::Version
165
- version: 1.14.4
166
- version:
167
- - !ruby/object:Gem::Dependency
168
- name: fastercsv
169
- version_requirement:
170
- version_requirements: !ruby/object:Gem::Version::Requirement
171
- requirements:
172
- - - ">="
173
- - !ruby/object:Gem::Version
174
- version: 1.2.0
175
- version:
176
- - !ruby/object:Gem::Dependency
177
- name: adapter_extensions
178
- version_requirement:
179
- version_requirements: !ruby/object:Gem::Version::Requirement
180
- requirements:
181
- - - ">="
182
- - !ruby/object:Gem::Version
183
- version: 0.1.0
184
- version:
194
+ rubyforge_project: activewarehouse
195
+ rubygems_version: 1.3.1
196
+ signing_key:
197
+ specification_version: 2
198
+ summary: Pure Ruby ETL package.
199
+ test_files: []
200
+