activewarehouse-etl-sgonyea 0.9.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (200) hide show
  1. data/.gitignore +9 -0
  2. data/0.9-UPGRADE +6 -0
  3. data/CHANGELOG +236 -0
  4. data/Gemfile +4 -0
  5. data/HOW_TO_RELEASE +13 -0
  6. data/LICENSE +7 -0
  7. data/README.textile +111 -0
  8. data/Rakefile +103 -0
  9. data/TODO +28 -0
  10. data/active_support_logger.patch +78 -0
  11. data/activewarehouse-etl.gemspec +36 -0
  12. data/bin/etl +28 -0
  13. data/bin/etl.cmd +8 -0
  14. data/examples/database.example.yml +16 -0
  15. data/lib/etl.rb +97 -0
  16. data/lib/etl/batch.rb +2 -0
  17. data/lib/etl/batch/batch.rb +111 -0
  18. data/lib/etl/batch/directives.rb +65 -0
  19. data/lib/etl/builder.rb +2 -0
  20. data/lib/etl/builder/date_dimension_builder.rb +96 -0
  21. data/lib/etl/builder/time_dimension_builder.rb +31 -0
  22. data/lib/etl/commands/etl.rb +89 -0
  23. data/lib/etl/control.rb +3 -0
  24. data/lib/etl/control/control.rb +405 -0
  25. data/lib/etl/control/destination.rb +438 -0
  26. data/lib/etl/control/destination/csv_destination.rb +113 -0
  27. data/lib/etl/control/destination/database_destination.rb +97 -0
  28. data/lib/etl/control/destination/excel_destination.rb +91 -0
  29. data/lib/etl/control/destination/file_destination.rb +126 -0
  30. data/lib/etl/control/destination/insert_update_database_destination.rb +136 -0
  31. data/lib/etl/control/destination/update_database_destination.rb +109 -0
  32. data/lib/etl/control/destination/yaml_destination.rb +74 -0
  33. data/lib/etl/control/source.rb +132 -0
  34. data/lib/etl/control/source/database_source.rb +224 -0
  35. data/lib/etl/control/source/enumerable_source.rb +11 -0
  36. data/lib/etl/control/source/file_source.rb +90 -0
  37. data/lib/etl/control/source/model_source.rb +39 -0
  38. data/lib/etl/core_ext.rb +1 -0
  39. data/lib/etl/core_ext/time.rb +5 -0
  40. data/lib/etl/core_ext/time/calculations.rb +42 -0
  41. data/lib/etl/engine.rb +582 -0
  42. data/lib/etl/execution.rb +19 -0
  43. data/lib/etl/execution/base.rb +8 -0
  44. data/lib/etl/execution/batch.rb +10 -0
  45. data/lib/etl/execution/job.rb +8 -0
  46. data/lib/etl/execution/migration.rb +90 -0
  47. data/lib/etl/generator.rb +2 -0
  48. data/lib/etl/generator/generator.rb +20 -0
  49. data/lib/etl/generator/surrogate_key_generator.rb +39 -0
  50. data/lib/etl/http_tools.rb +139 -0
  51. data/lib/etl/parser.rb +11 -0
  52. data/lib/etl/parser/apache_combined_log_parser.rb +49 -0
  53. data/lib/etl/parser/csv_parser.rb +93 -0
  54. data/lib/etl/parser/excel_parser.rb +112 -0
  55. data/lib/etl/parser/fixed_width_parser.rb +65 -0
  56. data/lib/etl/parser/nokogiri_xml_parser.rb +83 -0
  57. data/lib/etl/parser/parser.rb +41 -0
  58. data/lib/etl/parser/sax_parser.rb +218 -0
  59. data/lib/etl/parser/xml_parser.rb +65 -0
  60. data/lib/etl/processor.rb +11 -0
  61. data/lib/etl/processor/block_processor.rb +14 -0
  62. data/lib/etl/processor/bulk_import_processor.rb +94 -0
  63. data/lib/etl/processor/check_exist_processor.rb +80 -0
  64. data/lib/etl/processor/check_unique_processor.rb +39 -0
  65. data/lib/etl/processor/copy_field_processor.rb +26 -0
  66. data/lib/etl/processor/database_join_processor.rb +82 -0
  67. data/lib/etl/processor/encode_processor.rb +55 -0
  68. data/lib/etl/processor/ensure_fields_presence_processor.rb +24 -0
  69. data/lib/etl/processor/escape_csv_processor.rb +77 -0
  70. data/lib/etl/processor/filter_row_processor.rb +51 -0
  71. data/lib/etl/processor/ftp_downloader_processor.rb +68 -0
  72. data/lib/etl/processor/ftp_uploader_processor.rb +65 -0
  73. data/lib/etl/processor/hierarchy_exploder_processor.rb +55 -0
  74. data/lib/etl/processor/imapattachment_downloader_processor.rb +91 -0
  75. data/lib/etl/processor/pop3attachment_downloader_processor.rb +90 -0
  76. data/lib/etl/processor/print_row_processor.rb +12 -0
  77. data/lib/etl/processor/processor.rb +25 -0
  78. data/lib/etl/processor/rename_processor.rb +24 -0
  79. data/lib/etl/processor/require_non_blank_processor.rb +26 -0
  80. data/lib/etl/processor/row_processor.rb +27 -0
  81. data/lib/etl/processor/sequence_processor.rb +23 -0
  82. data/lib/etl/processor/sftp_downloader_processor.rb +63 -0
  83. data/lib/etl/processor/sftp_uploader_processor.rb +63 -0
  84. data/lib/etl/processor/surrogate_key_processor.rb +53 -0
  85. data/lib/etl/processor/truncate_processor.rb +40 -0
  86. data/lib/etl/processor/zip_file_processor.rb +27 -0
  87. data/lib/etl/row.rb +20 -0
  88. data/lib/etl/screen.rb +14 -0
  89. data/lib/etl/screen/row_count_screen.rb +20 -0
  90. data/lib/etl/transform.rb +2 -0
  91. data/lib/etl/transform/block_transform.rb +13 -0
  92. data/lib/etl/transform/calculation_transform.rb +71 -0
  93. data/lib/etl/transform/date_to_string_transform.rb +20 -0
  94. data/lib/etl/transform/decode_transform.rb +51 -0
  95. data/lib/etl/transform/default_transform.rb +20 -0
  96. data/lib/etl/transform/foreign_key_lookup_transform.rb +211 -0
  97. data/lib/etl/transform/hierarchy_lookup_transform.rb +49 -0
  98. data/lib/etl/transform/md5_transform.rb +13 -0
  99. data/lib/etl/transform/ordinalize_transform.rb +14 -0
  100. data/lib/etl/transform/sha1_transform.rb +13 -0
  101. data/lib/etl/transform/split_fields_transform.rb +27 -0
  102. data/lib/etl/transform/string_to_date_time_transform.rb +14 -0
  103. data/lib/etl/transform/string_to_date_transform.rb +16 -0
  104. data/lib/etl/transform/string_to_time_transform.rb +11 -0
  105. data/lib/etl/transform/transform.rb +61 -0
  106. data/lib/etl/transform/trim_transform.rb +26 -0
  107. data/lib/etl/transform/type_transform.rb +35 -0
  108. data/lib/etl/util.rb +59 -0
  109. data/lib/etl/version.rb +3 -0
  110. data/test-matrix.yml +10 -0
  111. data/test/.gitignore +1 -0
  112. data/test/.ignore +2 -0
  113. data/test/all.ebf +6 -0
  114. data/test/apache_combined_log.ctl +11 -0
  115. data/test/batch_test.rb +41 -0
  116. data/test/batch_with_error.ebf +6 -0
  117. data/test/batched1.ctl +0 -0
  118. data/test/batched2.ctl +0 -0
  119. data/test/block_processor.ctl +6 -0
  120. data/test/block_processor_error.ctl +1 -0
  121. data/test/block_processor_pre_post_process.ctl +4 -0
  122. data/test/block_processor_remove_rows.ctl +5 -0
  123. data/test/block_processor_test.rb +38 -0
  124. data/test/check_exist_processor_test.rb +92 -0
  125. data/test/check_unique_processor_test.rb +40 -0
  126. data/test/config/Gemfile.rails-2.3.x +3 -0
  127. data/test/config/Gemfile.rails-2.3.x.lock +53 -0
  128. data/test/config/Gemfile.rails-3.0.x +3 -0
  129. data/test/config/Gemfile.rails-3.0.x.lock +61 -0
  130. data/test/config/common.rb +29 -0
  131. data/test/connection/mysql/connection.rb +9 -0
  132. data/test/connection/mysql/schema.sql +37 -0
  133. data/test/connection/postgresql/connection.rb +13 -0
  134. data/test/connection/postgresql/schema.sql +40 -0
  135. data/test/control_test.rb +43 -0
  136. data/test/data/apache_combined_log.txt +3 -0
  137. data/test/data/bulk_import.txt +3 -0
  138. data/test/data/bulk_import_with_empties.txt +3 -0
  139. data/test/data/decode.txt +3 -0
  140. data/test/data/delimited.txt +3 -0
  141. data/test/data/encode_source_latin1.txt +2 -0
  142. data/test/data/excel.xls +0 -0
  143. data/test/data/excel2.xls +0 -0
  144. data/test/data/fixed_width.txt +3 -0
  145. data/test/data/multiple_delimited_1.txt +3 -0
  146. data/test/data/multiple_delimited_2.txt +3 -0
  147. data/test/data/nokogiri.xml +38 -0
  148. data/test/data/people.txt +3 -0
  149. data/test/data/sax.xml +14 -0
  150. data/test/data/xml.xml +16 -0
  151. data/test/database_join_processor_test.rb +43 -0
  152. data/test/date_dimension_builder_test.rb +96 -0
  153. data/test/delimited.ctl +30 -0
  154. data/test/delimited_absolute.ctl +31 -0
  155. data/test/delimited_destination_db.ctl +23 -0
  156. data/test/delimited_excel.ctl +31 -0
  157. data/test/delimited_insert_update.ctl +34 -0
  158. data/test/delimited_update.ctl +34 -0
  159. data/test/delimited_with_bulk_load.ctl +34 -0
  160. data/test/destination_test.rb +275 -0
  161. data/test/directive_test.rb +23 -0
  162. data/test/encode_processor_test.rb +32 -0
  163. data/test/engine_test.rb +78 -0
  164. data/test/ensure_fields_presence_processor_test.rb +28 -0
  165. data/test/errors.ctl +24 -0
  166. data/test/etl_test.rb +42 -0
  167. data/test/excel.ctl +24 -0
  168. data/test/excel2.ctl +25 -0
  169. data/test/fixed_width.ctl +35 -0
  170. data/test/foreign_key_lookup_transform_test.rb +50 -0
  171. data/test/generator_test.rb +14 -0
  172. data/test/inline_parser.ctl +17 -0
  173. data/test/mocks/mock_destination.rb +26 -0
  174. data/test/mocks/mock_source.rb +25 -0
  175. data/test/model_source.ctl +14 -0
  176. data/test/multiple_delimited.ctl +22 -0
  177. data/test/multiple_source_delimited.ctl +39 -0
  178. data/test/nokogiri_all.ctl +35 -0
  179. data/test/nokogiri_select.ctl +35 -0
  180. data/test/nokogiri_test.rb +35 -0
  181. data/test/parser_test.rb +224 -0
  182. data/test/performance/delimited.ctl +30 -0
  183. data/test/processor_test.rb +44 -0
  184. data/test/row_processor_test.rb +17 -0
  185. data/test/sax.ctl +26 -0
  186. data/test/scd/1.txt +1 -0
  187. data/test/scd/2.txt +1 -0
  188. data/test/scd/3.txt +1 -0
  189. data/test/scd_test.rb +257 -0
  190. data/test/scd_test_type_1.ctl +43 -0
  191. data/test/scd_test_type_2.ctl +34 -0
  192. data/test/screen_test.rb +9 -0
  193. data/test/screen_test_error.ctl +3 -0
  194. data/test/screen_test_fatal.ctl +3 -0
  195. data/test/source_test.rb +154 -0
  196. data/test/test_helper.rb +37 -0
  197. data/test/transform_test.rb +101 -0
  198. data/test/truncate_processor_test.rb +37 -0
  199. data/test/xml.ctl +31 -0
  200. metadata +370 -0
@@ -0,0 +1,2 @@
1
+ require 'etl/transform/transform'
2
+ Dir[File.dirname(__FILE__) + "/transform/*.rb"].each { |file| require(file) }
@@ -0,0 +1,13 @@
1
+ module ETL
2
+ module Transform
3
+ class BlockTransform < ETL::Transform::Transform
4
+ def initialize(control, name, configuration)
5
+ super
6
+ @block = configuration[:block]
7
+ end
8
+ def transform(name, value, row)
9
+ @block.call(name, value, row)
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,71 @@
1
+ module ETL
2
+ module Transform
3
+ class CalculationTransform < ETL::Transform::Transform
4
+ attr_reader :function
5
+ attr_reader :fields
6
+
7
+ def initialize(control, name, configuration)
8
+ @function = configuration[:function]
9
+ @fields = configuration[:fields]
10
+ super
11
+ end
12
+
13
+ def transform(name, value, row)
14
+ return nil if row.nil?
15
+ return nil if row[@fields[0]].nil?
16
+
17
+ if (@function.eql? "A + B")
18
+ result = ""
19
+ @fields.each do |field|
20
+ next if field.nil?
21
+
22
+ string = ""
23
+ if field.to_s.eql? field
24
+ string = field
25
+ begin
26
+ string = eval('"' + field + '"')
27
+ rescue
28
+ end
29
+ else
30
+ string = row[field]
31
+ end
32
+ next if string.nil?
33
+
34
+ result = result + string
35
+ end
36
+
37
+ row[name] = result
38
+ end
39
+
40
+ if (@function.eql? "date A")
41
+ first = row[@fields[0]]
42
+ row[name] = Time.parse(first)
43
+ end
44
+
45
+ if (@function.eql? "trim A")
46
+ first = row[@fields[0]]
47
+ row[name] = first.strip
48
+ end
49
+
50
+ if (@function.eql? "lower A")
51
+ first = row[@fields[0]]
52
+ row[name] = first.downcase
53
+ end
54
+
55
+ if (@function.eql? "upper A")
56
+ first = row[@fields[0]]
57
+ row[name] = first.upcase
58
+ end
59
+
60
+ if (@function.eql? "encoding A")
61
+ # Bug from ruby 1.8 http://po-ru.com/diary/fixing-invalid-utf-8-in-ruby-revisited/
62
+ first = row[@fields[0]]
63
+ row[name] = Iconv.conv(@fields[1], @fields[2], first + ' ')[0..-2]
64
+ end
65
+
66
+ row[name]
67
+ end
68
+
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,20 @@
1
+ module ETL #:nodoc:
2
+ module Transform #:nodoc:
3
+ # Transform a Date or Time to a formatted string instance
4
+ class DateToStringTransform < ETL::Transform::Transform
5
+ # Initialize the transformer.
6
+ #
7
+ # Configuration options:
8
+ # * <tt>:format</tt>: A format passed to strftime. Defaults to %Y-%m-%d
9
+ def initialize(control, name, configuration={})
10
+ super
11
+ @format = configuration[:format] || "%Y-%m-%d"
12
+ end
13
+ # Transform the value using strftime
14
+ def transform(name, value, row)
15
+ return value unless value.respond_to?(:strftime)
16
+ value.strftime(@format)
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,51 @@
1
+ module ETL #:nodoc:
2
+ module Transform #:nodoc:
3
+ # Transform which decodes coded values
4
+ class DecodeTransform < ETL::Transform::Transform
5
+ attr_accessor :decode_table_path
6
+
7
+ attr_accessor :decode_table_delimiter
8
+
9
+ attr_accessor :default_value
10
+
11
+ # Initialize the transformer
12
+ #
13
+ # Configuration options:
14
+ # * <tt>:decode_table_path</tt>: The path to the decode table (defaults to 'decode.txt')
15
+ # * <tt>:decode_table_delimiter</tt>: The decode table delimiter (defaults to ':')
16
+ # * <tt>:default_value</tt>: The default value to use (defaults to 'No Value')
17
+ def initialize(control, name, configuration={})
18
+ super
19
+
20
+ if configuration[:decode_table_path]
21
+ configuration[:decode_table_path] = File.join(File.dirname(control.file), configuration[:decode_table_path])
22
+ end
23
+
24
+ @decode_table_path = (configuration[:decode_table_path] || 'decode.txt')
25
+ @decode_table_delimiter = (configuration[:decode_table_delimiter] || ':')
26
+ @default_value = (configuration[:default_value] || 'No Value')
27
+ end
28
+
29
+ # Transform the value
30
+ def transform(name, value, row)
31
+ decode_table[value] || default_value
32
+ end
33
+
34
+ # Get the decode table
35
+ def decode_table
36
+ unless @decode_table
37
+ @decode_table = {}
38
+ open(decode_table_path).each do |line|
39
+ code, value = line.strip.split(decode_table_delimiter)
40
+ if code && code.length > 0
41
+ @decode_table[code] = value
42
+ else
43
+ @default_value = value
44
+ end
45
+ end
46
+ end
47
+ @decode_table
48
+ end
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,20 @@
1
+ module ETL #:nodoc:
2
+ module Transform #:nodoc:
3
+ # Transform which will replace nil or empty values with a specified value.
4
+ class DefaultTransform < Transform
5
+ attr_accessor :default_value
6
+ # Initialize the transform
7
+ #
8
+ # Configuration options:
9
+ # * <tt>:default_value</tt>: The default value to use if the incoming value is blank
10
+ def initialize(control, name, configuration)
11
+ super
12
+ @default_value = configuration[:default_value]
13
+ end
14
+ # Transform the value
15
+ def transform(name, value, row)
16
+ value.blank? ? default_value : value
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,211 @@
1
+ module ETL #:nodoc:
2
+ module Transform #:nodoc:
3
+ # Transform which looks up the value and replaces it with a foriegn key reference
4
+ class ForeignKeyLookupTransform < ETL::Transform::Transform
5
+ # The resolver to use if the foreign key is not found in the collection
6
+ attr_accessor :resolver
7
+
8
+ # The default foreign key to use if none is found.
9
+ attr_accessor :default
10
+
11
+ # Initialize the foreign key lookup transform.
12
+ #
13
+ # Configuration options:
14
+ # *<tt>:collection</tt>: A Hash of natural keys mapped to surrogate keys. If this is not specified then
15
+ # an empty Hash will be used. This Hash will be used to cache values that have been resolved already
16
+ # for future use.
17
+ # *<tt>:resolver</tt>: Object or Class which implements the method resolve(value)
18
+ # *<tt>:default</tt>: A default foreign key to use if no foreign key is found
19
+ # *<tt>:cache</tt>: If true and the resolver responds to load_cache, load_cache will be called
20
+ def initialize(control, name, configuration={})
21
+ super
22
+
23
+ @collection = (configuration[:collection] || {})
24
+ @resolver = configuration[:resolver]
25
+ @resolver = @resolver.new if @resolver.is_a?(Class)
26
+ @default = configuration[:default]
27
+
28
+ configuration[:cache] = true if configuration[:cache].nil?
29
+
30
+ if configuration[:cache]
31
+ if resolver.respond_to?(:load_cache)
32
+ resolver.load_cache
33
+ else
34
+ ETL::Engine.logger.info "#{resolver.class.name} does not support caching"
35
+ end
36
+ end
37
+ end
38
+
39
+ # Transform the value by resolving it to a foriegn key
40
+ def transform(name, value, row)
41
+ fk = @collection[value]
42
+ unless fk
43
+ raise ResolverError, "Foreign key for #{value} not found and no resolver specified" unless resolver
44
+ raise ResolverError, "Resolver does not appear to respond to resolve method" unless resolver.respond_to?(:resolve)
45
+ fk = resolver.resolve(value)
46
+ fk ||= @default
47
+ raise ResolverError, "Unable to resolve #{value} to foreign key for #{name} in row #{ETL::Engine.rows_read}. You may want to specify a :default value." unless fk
48
+ @collection[value] = fk
49
+ end
50
+ fk
51
+ end
52
+ end
53
+ # Alias class name for the ForeignKeyLookupTransform.
54
+ class FkLookupTransform < ForeignKeyLookupTransform; end
55
+ end
56
+ end
57
+
58
+ # Resolver which resolves using ActiveRecord.
59
+ class ActiveRecordResolver
60
+ # The ActiveRecord class to use
61
+ attr_accessor :ar_class
62
+
63
+ # The find method to use (as a symbol)
64
+ attr_accessor :find_method
65
+
66
+ # Initialize the resolver. The ar_class argument should extend from
67
+ # ActiveRecord::Base. The find_method argument must be a symbol for the
68
+ # finder method used. For example:
69
+ #
70
+ # ActiveRecordResolver.new(Person, :find_by_name)
71
+ #
72
+ # Note that the find method defined must only take a single argument.
73
+ def initialize(ar_class, find_method)
74
+ @ar_class = ar_class
75
+ @find_method = find_method
76
+ end
77
+
78
+ # Resolve the value
79
+ def resolve(value)
80
+ rec = ar_class.__send__(find_method, value)
81
+ rec.nil? ? nil : rec.id
82
+ end
83
+ end
84
+
85
+ class SQLResolver
86
+ # Initialize the SQL resolver. Use the given table and field name to search
87
+ # for the appropriate foreign key. The field should be the name of a natural
88
+ # key that is used to locate the surrogate key for the record.
89
+ #
90
+ # The connection argument is optional. If specified it can be either a symbol
91
+ # referencing a connection defined in the ETL database.yml file or an actual
92
+ # ActiveRecord connection instance. If the connection is not specified then
93
+ # the ActiveRecord::Base.connection will be used.
94
+ def initialize(atable, afield, connection=nil)
95
+ # puts "table: #{atable.inspect} field:#{afield.inspect}"
96
+ @table = atable
97
+ @field = afield
98
+ @connection = (connection.respond_to?(:quote) ? connection : ETL::Engine.connection(connection)) if connection
99
+ @connection ||= ActiveRecord::Base.connection
100
+ end
101
+
102
+ def resolve(value)
103
+ return nil if value.nil?
104
+ r = nil
105
+ if @use_cache
106
+ r = cache[value]
107
+ # puts "resolve failed: #{value.class.name}:#{value.inspect} from: #{@table}.#{@field}" unless r
108
+ else
109
+ q = "SELECT id FROM #{table_name} WHERE #{wheres(value)}"
110
+ # puts q
111
+ r = @connection.select_value(q)
112
+ end
113
+ r
114
+ end
115
+
116
+ def table_name
117
+ ETL::Engine.table(@table, @connection)
118
+ end
119
+
120
+ def cache
121
+ @cache ||= {}
122
+ end
123
+
124
+ def load_cache
125
+ q = "SELECT id, #{field.join(', ')} FROM #{table_name}"
126
+ # puts q
127
+ @connection.select_all(q).each do |record|
128
+ ck = @field.kind_of?(Array) ? record.values_at(*@field) : record[@field]
129
+ # puts "load_cache key: #{ck.class.name}:#{ck.inspect}"
130
+ # puts " #{@field.class.name}:#{@field.inspect}"
131
+ # puts " #{record[@field].class.name}:#{record[@field].inspect}"
132
+ cache[ck] = record['id']
133
+ end
134
+ @use_cache = true
135
+ end
136
+
137
+ private
138
+
139
+ def field
140
+ if @field.kind_of?(Array)
141
+ @field
142
+ else
143
+ [ @field ]
144
+ end
145
+ end
146
+
147
+ def wheres(value)
148
+ value = [ value ] unless value.kind_of?(Array)
149
+ field.zip(value).collect { |a|
150
+ "#{a[0]} = #{@connection.quote(a[1])}"
151
+ }.join(' AND ')
152
+ end
153
+ end
154
+
155
+ class IncrementalCacheSQLResolver < SQLResolver
156
+
157
+ def initialize(atable, afield, connection=nil)
158
+ super
159
+ end
160
+
161
+ def resolve(value)
162
+ return nil if value.nil?
163
+ r = cache[value]
164
+ unless r
165
+ q = "SELECT id FROM #{table_name} WHERE #{wheres(value)}"
166
+ r = @connection.select_value(q)
167
+ if r
168
+ cache[value] = r
169
+ end
170
+ end
171
+ r
172
+ end
173
+
174
+ def load_cache
175
+ @cache = {}
176
+ end
177
+
178
+ end
179
+
180
+ class FlatFileResolver
181
+ # Initialize the flat file resolver. Expects to open a comma-delimited file.
182
+ # Returns the column with the given result_field_index.
183
+ #
184
+ # The matches argument is a Hash with the key as the column index to search and
185
+ # the value of the Hash as a String to match exactly. It will only match the first
186
+ # result.
187
+ def initialize(file, match_index, result_field_index)
188
+ @file = file
189
+ @match_index = match_index
190
+ @result_field_index = result_field_index
191
+ end
192
+
193
+ # Get the rows from the file specified in the initializer.
194
+ def rows
195
+ @rows ||= CSV.read(@file)
196
+ end
197
+ protected :rows
198
+
199
+ # Match the row field from the column indicated by the match_index with the given
200
+ # value and return the field value from the column identified by the result_field_index.
201
+ def resolve(value)
202
+ rows.each do |row|
203
+ #puts "checking #{row.inspect} for #{value}"
204
+ if row[@match_index] == value
205
+ #puts "match found!, returning #{row[@result_field_index]}"
206
+ return row[@result_field_index]
207
+ end
208
+ end
209
+ nil
210
+ end
211
+ end
@@ -0,0 +1,49 @@
1
+ module ETL #:nodoc:
2
+ module Transform #:nodoc:
3
+ # Transform which walks up the hierarchy tree to find a value of the current level's value
4
+ # is nil.
5
+ #
6
+ # TODO: Let the resolver be implemented in a class so different resolution methods are
7
+ # possible.
8
+ class HierarchyLookupTransform < ETL::Transform::Transform
9
+ # The name of the field to use for the parent ID
10
+ attr_accessor :parent_id_field
11
+
12
+ # The target connection name
13
+ attr_accessor :target
14
+
15
+ # Initialize the transform
16
+ #
17
+ # Configuration options:
18
+ # * <tt>:target</tt>: The target connection name (required)
19
+ # * <tt>:parent_id_field</tt>: The name of the field to use for the parent ID (defaults to :parent_id)
20
+ def initialize(control, name, configuration={})
21
+ super
22
+ @parent_id_field = configuration[:parent_id_field] || :parent_id
23
+ @target = configuration[:target]
24
+ end
25
+
26
+ # Transform the value.
27
+ def transform(name, value, row)
28
+ if parent_id = row[parent_id_field]
29
+ # TODO: should use more than just the first source out of the control
30
+ parent_id, value = lookup(name,
31
+ control.sources.first.configuration[:table], parent_id, parent_id_field)
32
+ until value || parent_id.nil?
33
+ # TODO: should use more than just the first source out of the control
34
+ parent_id, value = lookup(name,
35
+ control.sources.first.configuration[:table], parent_id, parent_id_field)
36
+ end
37
+ end
38
+ value
39
+ end
40
+
41
+ # Lookup the parent value.
42
+ def lookup(field, table, parent_id, parent_id_field)
43
+ q = "SELECT #{parent_id_field}, #{field} FROM #{table} WHERE id = #{parent_id}"
44
+ row = ETL::Engine.connection(target).select_one(q)
45
+ return row[parent_id_field.to_s], row[field.to_s]
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,13 @@
1
+ require 'digest/md5'
2
+
3
+ module ETL #:nodoc:
4
+ module Transform #:nodoc:
5
+ # Transform which hashes the original value with a MD5 hash algorithm
6
+ class Md5Transform < ETL::Transform::Transform
7
+ # Transform the value with a MD5 digest algorithm.
8
+ def transform(name, value, row)
9
+ Digest::MD5.hexdigest(value)
10
+ end
11
+ end
12
+ end
13
+ end