activewarehouse-etl 0.9.0 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,6 +12,7 @@ module ETL #:nodoc:
12
12
  end
13
13
  # Transform the value using strftime
14
14
  def transform(name, value, row)
15
+ return value unless value.respond_to?(:strftime)
15
16
  value.strftime(@format)
16
17
  end
17
18
  end
@@ -5,6 +5,9 @@ module ETL #:nodoc:
5
5
  # The resolver to use if the foreign key is not found in the collection
6
6
  attr_accessor :resolver
7
7
 
8
+ # The default foreign key to use if none is found.
9
+ attr_accessor :default
10
+
8
11
  # Initialize the foreign key lookup transform.
9
12
  #
10
13
  # Configuration options:
@@ -12,12 +15,21 @@ module ETL #:nodoc:
12
15
  # an empty Hash will be used. This Hash will be used to cache values that have been resolved already
13
16
  # for future use.
14
17
  # *<tt>:resolver</tt>: Object or Class which implements the method resolve(value)
18
+ # *<tt>:default</tt>: A default foreign key to use if no foreign key is found
15
19
  def initialize(control, name, configuration={})
16
20
  super
17
21
 
18
22
  @collection = (configuration[:collection] || {})
19
23
  @resolver = configuration[:resolver]
20
24
  @resolver = @resolver.new if @resolver.is_a?(Class)
25
+ @default = configuration[:default]
26
+ if configuration[:cache] ||= true
27
+ if resolver.respond_to?(:load_cache)
28
+ resolver.load_cache
29
+ else
30
+ ETL::Engine.logger.info "#{resolver.class.name} does not support caching"
31
+ end
32
+ end
21
33
  end
22
34
 
23
35
  # Transform the value by resolving it to a foriegn key
@@ -27,7 +39,8 @@ module ETL #:nodoc:
27
39
  raise ResolverError, "Foreign key for #{value} not found and no resolver specified" unless resolver
28
40
  raise ResolverError, "Resolver does not appear to respond to resolve method" unless resolver.respond_to?(:resolve)
29
41
  fk = resolver.resolve(value)
30
- raise ResolverError, "Unable to resolve #{value} to foreign key for #{name} in row #{ETL::Engine.rows_read}" unless fk
42
+ fk ||= @default
43
+ raise ResolverError, "Unable to resolve #{value} to foreign key for #{name} in row #{ETL::Engine.rows_read}. You may want to specify a :default value." unless fk
31
44
  @collection[value] = fk
32
45
  end
33
46
  fk
@@ -81,6 +94,58 @@ class SQLResolver
81
94
  @connection ||= ActiveRecord::Base.connection
82
95
  end
83
96
  def resolve(value)
84
- @connection.select_value("SELECT id FROM #{@table} WHERE #{@field} = #{@connection.quote(value)}")
97
+ if @use_cache
98
+ cache[value]
99
+ else
100
+ q = "SELECT id FROM #{table_name} WHERE #{@field} = #{@connection.quote(value)}"
101
+ ETL::Engine.logger.debug("Executing query: #{q}")
102
+ @connection.select_value(q)
103
+ end
104
+ end
105
+ def table_name
106
+ ETL::Engine.table(@table, @connection)
107
+ end
108
+ def cache
109
+ @cache ||= {}
110
+ end
111
+ def load_cache
112
+ @use_cache = true
113
+ q = "SELECT id, #{@field} FROM #{table_name}"
114
+ @connection.select_all(q).each do |record|
115
+ cache[record[@field]] = record['id']
116
+ end
117
+ end
118
+ end
119
+
120
+ class FlatFileResolver
121
+ # Initialize the flat file resolver. Expects to open a comma-delimited file.
122
+ # Returns the column with the given result_field_index.
123
+ #
124
+ # The matches argument is a Hash with the key as the column index to search and
125
+ # the value of the Hash as a String to match exactly. It will only match the first
126
+ # result.
127
+ def initialize(file, match_index, result_field_index)
128
+ @file = file
129
+ @match_index = match_index
130
+ @result_field_index = result_field_index
131
+ end
132
+
133
+ # Get the rows from the file specified in the initializer.
134
+ def rows
135
+ @rows ||= FasterCSV.read(@file)
136
+ end
137
+ protected :rows
138
+
139
+ # Match the row field from the column indicated by the match_index with the given
140
+ # value and return the field value from the column identified by the result_field_index.
141
+ def resolve(value)
142
+ rows.each do |row|
143
+ #puts "checking #{row.inspect} for #{value}"
144
+ if row[@match_index] == value
145
+ #puts "match found!, returning #{row[@result_field_index]}"
146
+ return row[@result_field_index]
147
+ end
148
+ end
149
+ nil
85
150
  end
86
151
  end
@@ -4,7 +4,12 @@ module ETL #:nodoc:
4
4
  class StringToDateTransform < ETL::Transform::Transform
5
5
  # Transform the value using Date.parse
6
6
  def transform(name, value, row)
7
- Date.parse(value)
7
+ return value if value.nil?
8
+ begin
9
+ Date.parse(value)
10
+ rescue => e
11
+ return value
12
+ end
8
13
  end
9
14
  end
10
15
  end
@@ -7,7 +7,7 @@ module ETL #:nodoc:
7
7
  # WARNING: This transform is slow (due to the Ruby implementation), but if you need to
8
8
  # parse timestamps before or after the values supported by the Time.parse.
9
9
  def transform(name, value, row)
10
- DateTime.parse(value)
10
+ DateTime.parse(value) unless value.nil?
11
11
  end
12
12
  end
13
13
  end
@@ -4,7 +4,7 @@ module ETL #:nodoc:
4
4
  class StringToTimeTransform < ETL::Transform::Transform
5
5
  # Transform the value using Time.parse
6
6
  def transform(name, value, row)
7
- Time.parse(value)
7
+ Time.parse(value) unless value.nil?
8
8
  end
9
9
  end
10
10
  end
@@ -2,7 +2,7 @@ module ETL#:nodoc:
2
2
  module VERSION #:nodoc:
3
3
  MAJOR = 0
4
4
  MINOR = 9
5
- TINY = 0
5
+ TINY = 1
6
6
 
7
7
  STRING = [MAJOR, MINOR, TINY].join('.')
8
8
  end
metadata CHANGED
@@ -1,33 +1,75 @@
1
1
  --- !ruby/object:Gem::Specification
2
- rubygems_version: 0.9.2
3
- specification_version: 1
4
2
  name: activewarehouse-etl
5
3
  version: !ruby/object:Gem::Version
6
- version: 0.9.0
7
- date: 2007-08-09 00:00:00 -04:00
8
- summary: Pure Ruby ETL package.
9
- require_paths:
10
- - lib
11
- email: anthonyeden@gmail.com
12
- homepage: http://activewarehouse.rubyforge.org/etl
13
- rubyforge_project: activewarehouse
14
- description: ActiveWarehouse ETL is a pure Ruby Extract-Transform-Load application for loading data into a database.
15
- autorequire:
16
- default_executable: etl
17
- bindir: bin
18
- has_rdoc: false
19
- required_ruby_version: !ruby/object:Gem::Version::Requirement
20
- requirements:
21
- - - ">"
22
- - !ruby/object:Gem::Version
23
- version: 0.0.0
24
- version:
4
+ version: 0.9.1
25
5
  platform: ruby
26
- signing_key:
27
- cert_chain:
28
- post_install_message:
29
6
  authors:
30
7
  - Anthony Eden
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-01-14 00:00:00 -05:00
13
+ default_executable: etl
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: rake
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: 0.7.1
24
+ version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: activesupport
27
+ type: :runtime
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: 1.3.1
34
+ version:
35
+ - !ruby/object:Gem::Dependency
36
+ name: activerecord
37
+ type: :runtime
38
+ version_requirement:
39
+ version_requirements: !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ version: 1.14.4
44
+ version:
45
+ - !ruby/object:Gem::Dependency
46
+ name: fastercsv
47
+ type: :runtime
48
+ version_requirement:
49
+ version_requirements: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ">="
52
+ - !ruby/object:Gem::Version
53
+ version: 1.2.0
54
+ version:
55
+ - !ruby/object:Gem::Dependency
56
+ name: adapter_extensions
57
+ type: :runtime
58
+ version_requirement:
59
+ version_requirements: !ruby/object:Gem::Requirement
60
+ requirements:
61
+ - - ">="
62
+ - !ruby/object:Gem::Version
63
+ version: 0.1.0
64
+ version:
65
+ description: ActiveWarehouse ETL is a pure Ruby Extract-Transform-Load application for loading data into a database.
66
+ email: anthonyeden@gmail.com
67
+ executables:
68
+ - etl
69
+ extensions: []
70
+
71
+ extra_rdoc_files: []
72
+
31
73
  files:
32
74
  - CHANGELOG
33
75
  - LICENSE
@@ -35,6 +77,7 @@ files:
35
77
  - TODO
36
78
  - Rakefile
37
79
  - bin/etl
80
+ - bin/etl.cmd
38
81
  - lib/etl
39
82
  - lib/etl/batch
40
83
  - lib/etl/batch/batch.rb
@@ -42,6 +85,7 @@ files:
42
85
  - lib/etl/batch.rb
43
86
  - lib/etl/builder
44
87
  - lib/etl/builder/date_dimension_builder.rb
88
+ - lib/etl/builder/time_dimension_builder.rb
45
89
  - lib/etl/builder.rb
46
90
  - lib/etl/commands
47
91
  - lib/etl/commands/etl.rb
@@ -85,10 +129,12 @@ files:
85
129
  - lib/etl/parser/xml_parser.rb
86
130
  - lib/etl/parser.rb
87
131
  - lib/etl/processor
132
+ - lib/etl/processor/block_processor.rb
88
133
  - lib/etl/processor/bulk_import_processor.rb
89
134
  - lib/etl/processor/check_exist_processor.rb
90
135
  - lib/etl/processor/check_unique_processor.rb
91
136
  - lib/etl/processor/copy_field_processor.rb
137
+ - lib/etl/processor/encode_processor.rb
92
138
  - lib/etl/processor/hierarchy_exploder_processor.rb
93
139
  - lib/etl/processor/print_row_processor.rb
94
140
  - lib/etl/processor/processor.rb
@@ -123,62 +169,32 @@ files:
123
169
  - lib/etl/version.rb
124
170
  - lib/etl.rb
125
171
  - examples/database.example.yml
126
- test_files: []
127
-
172
+ has_rdoc: false
173
+ homepage: http://activewarehouse.rubyforge.org/etl
174
+ post_install_message:
128
175
  rdoc_options:
129
176
  - --exclude
130
177
  - .
131
- extra_rdoc_files: []
132
-
133
- executables:
134
- - etl
135
- extensions: []
136
-
178
+ require_paths:
179
+ - lib
180
+ required_ruby_version: !ruby/object:Gem::Requirement
181
+ requirements:
182
+ - - ">="
183
+ - !ruby/object:Gem::Version
184
+ version: "0"
185
+ version:
186
+ required_rubygems_version: !ruby/object:Gem::Requirement
187
+ requirements:
188
+ - - ">="
189
+ - !ruby/object:Gem::Version
190
+ version: "0"
191
+ version:
137
192
  requirements: []
138
193
 
139
- dependencies:
140
- - !ruby/object:Gem::Dependency
141
- name: rake
142
- version_requirement:
143
- version_requirements: !ruby/object:Gem::Version::Requirement
144
- requirements:
145
- - - ">="
146
- - !ruby/object:Gem::Version
147
- version: 0.7.1
148
- version:
149
- - !ruby/object:Gem::Dependency
150
- name: activesupport
151
- version_requirement:
152
- version_requirements: !ruby/object:Gem::Version::Requirement
153
- requirements:
154
- - - ">="
155
- - !ruby/object:Gem::Version
156
- version: 1.3.1
157
- version:
158
- - !ruby/object:Gem::Dependency
159
- name: activerecord
160
- version_requirement:
161
- version_requirements: !ruby/object:Gem::Version::Requirement
162
- requirements:
163
- - - ">="
164
- - !ruby/object:Gem::Version
165
- version: 1.14.4
166
- version:
167
- - !ruby/object:Gem::Dependency
168
- name: fastercsv
169
- version_requirement:
170
- version_requirements: !ruby/object:Gem::Version::Requirement
171
- requirements:
172
- - - ">="
173
- - !ruby/object:Gem::Version
174
- version: 1.2.0
175
- version:
176
- - !ruby/object:Gem::Dependency
177
- name: adapter_extensions
178
- version_requirement:
179
- version_requirements: !ruby/object:Gem::Version::Requirement
180
- requirements:
181
- - - ">="
182
- - !ruby/object:Gem::Version
183
- version: 0.1.0
184
- version:
194
+ rubyforge_project: activewarehouse
195
+ rubygems_version: 1.3.1
196
+ signing_key:
197
+ specification_version: 2
198
+ summary: Pure Ruby ETL package.
199
+ test_files: []
200
+