activewarehouse-etl-sgonyea 0.9.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (200) hide show
  1. data/.gitignore +9 -0
  2. data/0.9-UPGRADE +6 -0
  3. data/CHANGELOG +236 -0
  4. data/Gemfile +4 -0
  5. data/HOW_TO_RELEASE +13 -0
  6. data/LICENSE +7 -0
  7. data/README.textile +111 -0
  8. data/Rakefile +103 -0
  9. data/TODO +28 -0
  10. data/active_support_logger.patch +78 -0
  11. data/activewarehouse-etl.gemspec +36 -0
  12. data/bin/etl +28 -0
  13. data/bin/etl.cmd +8 -0
  14. data/examples/database.example.yml +16 -0
  15. data/lib/etl.rb +97 -0
  16. data/lib/etl/batch.rb +2 -0
  17. data/lib/etl/batch/batch.rb +111 -0
  18. data/lib/etl/batch/directives.rb +65 -0
  19. data/lib/etl/builder.rb +2 -0
  20. data/lib/etl/builder/date_dimension_builder.rb +96 -0
  21. data/lib/etl/builder/time_dimension_builder.rb +31 -0
  22. data/lib/etl/commands/etl.rb +89 -0
  23. data/lib/etl/control.rb +3 -0
  24. data/lib/etl/control/control.rb +405 -0
  25. data/lib/etl/control/destination.rb +438 -0
  26. data/lib/etl/control/destination/csv_destination.rb +113 -0
  27. data/lib/etl/control/destination/database_destination.rb +97 -0
  28. data/lib/etl/control/destination/excel_destination.rb +91 -0
  29. data/lib/etl/control/destination/file_destination.rb +126 -0
  30. data/lib/etl/control/destination/insert_update_database_destination.rb +136 -0
  31. data/lib/etl/control/destination/update_database_destination.rb +109 -0
  32. data/lib/etl/control/destination/yaml_destination.rb +74 -0
  33. data/lib/etl/control/source.rb +132 -0
  34. data/lib/etl/control/source/database_source.rb +224 -0
  35. data/lib/etl/control/source/enumerable_source.rb +11 -0
  36. data/lib/etl/control/source/file_source.rb +90 -0
  37. data/lib/etl/control/source/model_source.rb +39 -0
  38. data/lib/etl/core_ext.rb +1 -0
  39. data/lib/etl/core_ext/time.rb +5 -0
  40. data/lib/etl/core_ext/time/calculations.rb +42 -0
  41. data/lib/etl/engine.rb +582 -0
  42. data/lib/etl/execution.rb +19 -0
  43. data/lib/etl/execution/base.rb +8 -0
  44. data/lib/etl/execution/batch.rb +10 -0
  45. data/lib/etl/execution/job.rb +8 -0
  46. data/lib/etl/execution/migration.rb +90 -0
  47. data/lib/etl/generator.rb +2 -0
  48. data/lib/etl/generator/generator.rb +20 -0
  49. data/lib/etl/generator/surrogate_key_generator.rb +39 -0
  50. data/lib/etl/http_tools.rb +139 -0
  51. data/lib/etl/parser.rb +11 -0
  52. data/lib/etl/parser/apache_combined_log_parser.rb +49 -0
  53. data/lib/etl/parser/csv_parser.rb +93 -0
  54. data/lib/etl/parser/excel_parser.rb +112 -0
  55. data/lib/etl/parser/fixed_width_parser.rb +65 -0
  56. data/lib/etl/parser/nokogiri_xml_parser.rb +83 -0
  57. data/lib/etl/parser/parser.rb +41 -0
  58. data/lib/etl/parser/sax_parser.rb +218 -0
  59. data/lib/etl/parser/xml_parser.rb +65 -0
  60. data/lib/etl/processor.rb +11 -0
  61. data/lib/etl/processor/block_processor.rb +14 -0
  62. data/lib/etl/processor/bulk_import_processor.rb +94 -0
  63. data/lib/etl/processor/check_exist_processor.rb +80 -0
  64. data/lib/etl/processor/check_unique_processor.rb +39 -0
  65. data/lib/etl/processor/copy_field_processor.rb +26 -0
  66. data/lib/etl/processor/database_join_processor.rb +82 -0
  67. data/lib/etl/processor/encode_processor.rb +55 -0
  68. data/lib/etl/processor/ensure_fields_presence_processor.rb +24 -0
  69. data/lib/etl/processor/escape_csv_processor.rb +77 -0
  70. data/lib/etl/processor/filter_row_processor.rb +51 -0
  71. data/lib/etl/processor/ftp_downloader_processor.rb +68 -0
  72. data/lib/etl/processor/ftp_uploader_processor.rb +65 -0
  73. data/lib/etl/processor/hierarchy_exploder_processor.rb +55 -0
  74. data/lib/etl/processor/imapattachment_downloader_processor.rb +91 -0
  75. data/lib/etl/processor/pop3attachment_downloader_processor.rb +90 -0
  76. data/lib/etl/processor/print_row_processor.rb +12 -0
  77. data/lib/etl/processor/processor.rb +25 -0
  78. data/lib/etl/processor/rename_processor.rb +24 -0
  79. data/lib/etl/processor/require_non_blank_processor.rb +26 -0
  80. data/lib/etl/processor/row_processor.rb +27 -0
  81. data/lib/etl/processor/sequence_processor.rb +23 -0
  82. data/lib/etl/processor/sftp_downloader_processor.rb +63 -0
  83. data/lib/etl/processor/sftp_uploader_processor.rb +63 -0
  84. data/lib/etl/processor/surrogate_key_processor.rb +53 -0
  85. data/lib/etl/processor/truncate_processor.rb +40 -0
  86. data/lib/etl/processor/zip_file_processor.rb +27 -0
  87. data/lib/etl/row.rb +20 -0
  88. data/lib/etl/screen.rb +14 -0
  89. data/lib/etl/screen/row_count_screen.rb +20 -0
  90. data/lib/etl/transform.rb +2 -0
  91. data/lib/etl/transform/block_transform.rb +13 -0
  92. data/lib/etl/transform/calculation_transform.rb +71 -0
  93. data/lib/etl/transform/date_to_string_transform.rb +20 -0
  94. data/lib/etl/transform/decode_transform.rb +51 -0
  95. data/lib/etl/transform/default_transform.rb +20 -0
  96. data/lib/etl/transform/foreign_key_lookup_transform.rb +211 -0
  97. data/lib/etl/transform/hierarchy_lookup_transform.rb +49 -0
  98. data/lib/etl/transform/md5_transform.rb +13 -0
  99. data/lib/etl/transform/ordinalize_transform.rb +14 -0
  100. data/lib/etl/transform/sha1_transform.rb +13 -0
  101. data/lib/etl/transform/split_fields_transform.rb +27 -0
  102. data/lib/etl/transform/string_to_date_time_transform.rb +14 -0
  103. data/lib/etl/transform/string_to_date_transform.rb +16 -0
  104. data/lib/etl/transform/string_to_time_transform.rb +11 -0
  105. data/lib/etl/transform/transform.rb +61 -0
  106. data/lib/etl/transform/trim_transform.rb +26 -0
  107. data/lib/etl/transform/type_transform.rb +35 -0
  108. data/lib/etl/util.rb +59 -0
  109. data/lib/etl/version.rb +3 -0
  110. data/test-matrix.yml +10 -0
  111. data/test/.gitignore +1 -0
  112. data/test/.ignore +2 -0
  113. data/test/all.ebf +6 -0
  114. data/test/apache_combined_log.ctl +11 -0
  115. data/test/batch_test.rb +41 -0
  116. data/test/batch_with_error.ebf +6 -0
  117. data/test/batched1.ctl +0 -0
  118. data/test/batched2.ctl +0 -0
  119. data/test/block_processor.ctl +6 -0
  120. data/test/block_processor_error.ctl +1 -0
  121. data/test/block_processor_pre_post_process.ctl +4 -0
  122. data/test/block_processor_remove_rows.ctl +5 -0
  123. data/test/block_processor_test.rb +38 -0
  124. data/test/check_exist_processor_test.rb +92 -0
  125. data/test/check_unique_processor_test.rb +40 -0
  126. data/test/config/Gemfile.rails-2.3.x +3 -0
  127. data/test/config/Gemfile.rails-2.3.x.lock +53 -0
  128. data/test/config/Gemfile.rails-3.0.x +3 -0
  129. data/test/config/Gemfile.rails-3.0.x.lock +61 -0
  130. data/test/config/common.rb +29 -0
  131. data/test/connection/mysql/connection.rb +9 -0
  132. data/test/connection/mysql/schema.sql +37 -0
  133. data/test/connection/postgresql/connection.rb +13 -0
  134. data/test/connection/postgresql/schema.sql +40 -0
  135. data/test/control_test.rb +43 -0
  136. data/test/data/apache_combined_log.txt +3 -0
  137. data/test/data/bulk_import.txt +3 -0
  138. data/test/data/bulk_import_with_empties.txt +3 -0
  139. data/test/data/decode.txt +3 -0
  140. data/test/data/delimited.txt +3 -0
  141. data/test/data/encode_source_latin1.txt +2 -0
  142. data/test/data/excel.xls +0 -0
  143. data/test/data/excel2.xls +0 -0
  144. data/test/data/fixed_width.txt +3 -0
  145. data/test/data/multiple_delimited_1.txt +3 -0
  146. data/test/data/multiple_delimited_2.txt +3 -0
  147. data/test/data/nokogiri.xml +38 -0
  148. data/test/data/people.txt +3 -0
  149. data/test/data/sax.xml +14 -0
  150. data/test/data/xml.xml +16 -0
  151. data/test/database_join_processor_test.rb +43 -0
  152. data/test/date_dimension_builder_test.rb +96 -0
  153. data/test/delimited.ctl +30 -0
  154. data/test/delimited_absolute.ctl +31 -0
  155. data/test/delimited_destination_db.ctl +23 -0
  156. data/test/delimited_excel.ctl +31 -0
  157. data/test/delimited_insert_update.ctl +34 -0
  158. data/test/delimited_update.ctl +34 -0
  159. data/test/delimited_with_bulk_load.ctl +34 -0
  160. data/test/destination_test.rb +275 -0
  161. data/test/directive_test.rb +23 -0
  162. data/test/encode_processor_test.rb +32 -0
  163. data/test/engine_test.rb +78 -0
  164. data/test/ensure_fields_presence_processor_test.rb +28 -0
  165. data/test/errors.ctl +24 -0
  166. data/test/etl_test.rb +42 -0
  167. data/test/excel.ctl +24 -0
  168. data/test/excel2.ctl +25 -0
  169. data/test/fixed_width.ctl +35 -0
  170. data/test/foreign_key_lookup_transform_test.rb +50 -0
  171. data/test/generator_test.rb +14 -0
  172. data/test/inline_parser.ctl +17 -0
  173. data/test/mocks/mock_destination.rb +26 -0
  174. data/test/mocks/mock_source.rb +25 -0
  175. data/test/model_source.ctl +14 -0
  176. data/test/multiple_delimited.ctl +22 -0
  177. data/test/multiple_source_delimited.ctl +39 -0
  178. data/test/nokogiri_all.ctl +35 -0
  179. data/test/nokogiri_select.ctl +35 -0
  180. data/test/nokogiri_test.rb +35 -0
  181. data/test/parser_test.rb +224 -0
  182. data/test/performance/delimited.ctl +30 -0
  183. data/test/processor_test.rb +44 -0
  184. data/test/row_processor_test.rb +17 -0
  185. data/test/sax.ctl +26 -0
  186. data/test/scd/1.txt +1 -0
  187. data/test/scd/2.txt +1 -0
  188. data/test/scd/3.txt +1 -0
  189. data/test/scd_test.rb +257 -0
  190. data/test/scd_test_type_1.ctl +43 -0
  191. data/test/scd_test_type_2.ctl +34 -0
  192. data/test/screen_test.rb +9 -0
  193. data/test/screen_test_error.ctl +3 -0
  194. data/test/screen_test_fatal.ctl +3 -0
  195. data/test/source_test.rb +154 -0
  196. data/test/test_helper.rb +37 -0
  197. data/test/transform_test.rb +101 -0
  198. data/test/truncate_processor_test.rb +37 -0
  199. data/test/xml.ctl +31 -0
  200. metadata +370 -0
@@ -0,0 +1,34 @@
1
+ infile = 'data/people.txt'
2
+ outfile = 'output/people.txt'
3
+
4
+ source :in, {
5
+ :file => infile,
6
+ :parser => {
7
+ :name => :csv
8
+ }
9
+ },
10
+ [
11
+ :first_name,
12
+ :last_name,
13
+ ]
14
+
15
+ before_write :surrogate_key, :target => :data_warehouse, :table => 'person_dimension', :column => 'id'
16
+ before_write :check_exist, {
17
+ :target => :data_warehouse,
18
+ :table => 'person_dimension',
19
+ :columns => [:first_name, :last_name]
20
+ }
21
+
22
+ destination :out, {
23
+ :file => outfile
24
+ },
25
+ {
26
+ :order => [:id, :first_name, :last_name]
27
+ }
28
+
29
+ post_process :bulk_import, {
30
+ :file => outfile,
31
+ :target => :data_warehouse,
32
+ :table => 'person_dimension',
33
+ :order => [:id, :first_name, :last_name]
34
+ }
@@ -0,0 +1,275 @@
1
+ require 'rubygems'
2
+ require 'spreadsheet'
3
+ require File.dirname(__FILE__) + '/test_helper'
4
+
5
+ class Person < ActiveRecord::Base
6
+ end
7
+
8
+ class BadDestination < ETL::Control::Destination
9
+ def initialize(control, configuration, mapping)
10
+ super
11
+ end
12
+ end
13
+
14
+ # Test the functionality of destinations
15
+ class DestinationTest < Test::Unit::TestCase
16
+ # Test a file destination
17
+ def test_file_destination
18
+ outfile = 'output/test_file_destination.txt'
19
+ outfile2 = 'output/test_file_destination.2.txt'
20
+ row = ETL::Row[ :address => '123 SW 1st Street', :city => 'Melbourne',
21
+ :state => 'Florida', :country => 'United States' ]
22
+ row_needs_escape = ETL::Row[ :address => "Allen's Way",
23
+ :city => 'Some City', :state => 'Some State', :country => 'Mexico' ]
24
+ control = ETL::Control::Control.parse(File.dirname(__FILE__) +
25
+ '/delimited.ctl')
26
+
27
+ # First define a basic configuration to check defaults
28
+ configuration = {
29
+ :file => outfile,
30
+ :buffer_size => 0,
31
+ }
32
+ mapping = {
33
+ :order => [:address, :city, :state, :country, :country_code],
34
+ :virtual => {
35
+ :country_code => Proc.new do |r|
36
+ {
37
+ 'United States' => 'US',
38
+ 'Mexico' => 'MX'
39
+ }[r[:country]]
40
+ end
41
+ }
42
+ }
43
+
44
+ dest = ETL::Control::FileDestination.new(control, configuration, mapping)
45
+ dest.write(row)
46
+ dest.write(row_needs_escape)
47
+ dest.close
48
+
49
+ configuration[:file] = outfile2
50
+ configuration[:separator] = '|'
51
+ configuration[:eol] = "[EOL]\n"
52
+
53
+ dest = ETL::Control::FileDestination.new(control, configuration, mapping)
54
+ dest.write(row)
55
+ dest.write(row_needs_escape)
56
+ dest.close
57
+
58
+ # Read back the resulting
59
+ lines = open(File.join(File.dirname(__FILE__), outfile), 'r').readlines
60
+ assert_equal "123 SW 1st Street,Melbourne,Florida,United States,US\n", lines[0]
61
+ assert_equal "Allen's Way,Some City,Some State,Mexico,MX\n", lines[1]
62
+
63
+ lines = open(File.join(File.dirname(__FILE__), outfile2), 'r').readlines
64
+ assert_equal "123 SW 1st Street|Melbourne|Florida|United States|US[EOL]\n", lines[0]
65
+ assert_equal "Allen's Way|Some City|Some State|Mexico|MX[EOL]\n", lines[1]
66
+ end
67
+
68
+ # Test a database destination
69
+ def test_database_destination
70
+ row = ETL::Row[:id => 1, :first_name => 'Bob', :last_name => 'Smith', :ssn => '111234444']
71
+ row_needs_escape = ETL::Row[:id => 2, :first_name => "Foo's", :last_name => "Bar", :ssn => '000000000' ]
72
+ control = ETL::Control::Control.parse(File.dirname(__FILE__) +
73
+ '/delimited.ctl')
74
+
75
+ Person.delete_all
76
+ assert_equal 0, Person.count
77
+
78
+ # First define a basic configuration to check defaults
79
+ configuration = {
80
+ :target => :data_warehouse,
81
+ :database => 'etl_unittest',
82
+ :table => 'people',
83
+ :buffer_size => 0
84
+ }
85
+ mapping = { :order => [:id, :first_name, :last_name, :ssn] }
86
+ dest = ETL::Control::DatabaseDestination.new(control, configuration, mapping)
87
+ dest.write(row)
88
+ dest.close
89
+
90
+ assert_equal 1, Person.find(:all).length
91
+ end
92
+
93
+ def test_database_destination_with_control
94
+ row = ETL::Row[:id => 1, :first_name => 'Bob', :last_name => 'Smith', :ssn => '111234444']
95
+ control = ETL::Control::Control.parse(File.dirname(__FILE__) +
96
+ '/delimited_destination_db.ctl')
97
+ Person.delete_all
98
+ assert_equal 0, Person.count
99
+ d = control.destinations.first
100
+ dest = ETL::Control::DatabaseDestination.new(control, d.configuration, d.mapping)
101
+ dest.write(row)
102
+ dest.close
103
+ assert_equal 1, Person.count
104
+ end
105
+
106
+ def test_unique
107
+ row1 = ETL::Row[:id => 1, :first_name => 'Bob', :last_name => 'Smith', :ssn => '111234444']
108
+ row2 = ETL::Row[:id => 2, :first_name => 'Bob', :last_name => 'Smith', :ssn => '111234444']
109
+ row3 = ETL::Row[:id => 3, :first_name => 'John', :last_name => 'Smith', :ssn => '000112222']
110
+
111
+ outfile = 'output/test_unique.txt'
112
+ control = ETL::Control::Control.parse(File.dirname(__FILE__) + '/delimited.ctl')
113
+
114
+ # First define a basic configuration to check defaults
115
+ configuration = { :file => outfile, :buffer_size => 0, :unique => [:ssn]}
116
+ mapping = {
117
+ :order => [:first_name, :last_name, :ssn]
118
+ }
119
+ dest = ETL::Control::FileDestination.new(control, configuration, mapping)
120
+ dest.write(row1)
121
+ dest.write(row2)
122
+ dest.write(row3)
123
+
124
+ # Close (and flush) the destination
125
+ dest.close
126
+
127
+ # Read back the resulting
128
+ lines = open(File.join(File.dirname(__FILE__), outfile), 'r').readlines
129
+ assert_equal "Bob,Smith,111234444\n", lines[0]
130
+ assert_equal "John,Smith,000112222\n", lines[1]
131
+ end
132
+
133
+ def test_multiple_unique
134
+ row1 = ETL::Row[:id => 1, :first_name => 'Bob', :last_name => 'Smith', :ssn => '111234444']
135
+ row2 = ETL::Row[:id => 2, :first_name => 'Bob', :last_name => 'Smith', :ssn => '111234444']
136
+ row3 = ETL::Row[:id => 3, :first_name => 'Bob', :last_name => 'Smith', :ssn => '000112222']
137
+
138
+ outfile = 'output/test_multiple_unique.txt'
139
+ control = ETL::Control::Control.parse(File.dirname(__FILE__) + '/delimited.ctl')
140
+
141
+ # First define a basic configuration to check defaults
142
+ configuration = { :file => outfile, :buffer_size => 0, :unique => [:last_name,:first_name]}
143
+ mapping = {
144
+ :order => [:first_name, :last_name, :ssn]
145
+ }
146
+ dest = ETL::Control::FileDestination.new(control, configuration, mapping)
147
+ dest.write(row1)
148
+ dest.write(row2)
149
+ dest.write(row3)
150
+
151
+ # Close (and flush) the destination
152
+ dest.close
153
+
154
+ # Read back the resulting
155
+ lines = open(File.join(File.dirname(__FILE__), outfile), 'r').readlines
156
+ assert_equal "Bob,Smith,111234444\n", lines[0]
157
+ assert_equal 1, lines.length
158
+ end
159
+
160
+ def test_bad_destination
161
+ control = ETL::Control::Control.parse_text('')
162
+ configuration = {}
163
+ mapping = {}
164
+ dest = BadDestination.new(control, configuration, mapping)
165
+ dest.write(nil)
166
+ assert_raise NotImplementedError do
167
+ dest.flush
168
+ end
169
+ assert_raise NotImplementedError do
170
+ dest.close
171
+ end
172
+ end
173
+
174
+ def test_excel_destination
175
+ outfile = 'output/test_excel_destination.xls'
176
+ row = ETL::Row[ :address => '123 SW 1st Street', :city => 'Melbourne',
177
+ :state => 'Florida', :country => 'United States' ]
178
+ control = ETL::Control::Control.parse(File.dirname(__FILE__) +
179
+ '/delimited_excel.ctl')
180
+
181
+ # First define a basic configuration to check defaults
182
+ configuration = {
183
+ :file => outfile,
184
+ :buffer_size => 0,
185
+ }
186
+ mapping = {
187
+ :order => [:address, :city, :state, :country, :country_code],
188
+ :virtual => {
189
+ :country_code => Proc.new do |r|
190
+ {
191
+ 'United States' => 'US',
192
+ 'Mexico' => 'MX'
193
+ }[r[:country]]
194
+ end
195
+ }
196
+ }
197
+
198
+ dest = ETL::Control::ExcelDestination.new(control, configuration, mapping)
199
+ dest.write(row)
200
+ dest.close
201
+
202
+ # Read back the resulting
203
+ book = Spreadsheet.open File.join(File.dirname(__FILE__), outfile)
204
+ sheet = book.worksheet(0)
205
+
206
+ assert_equal "123 SW 1st Street", sheet[0, 0]
207
+ assert_equal "Melbourne", sheet[0, 1]
208
+ assert_equal "Florida", sheet[0, 2]
209
+ assert_equal "United States", sheet[0, 3]
210
+ assert_equal "US", sheet[0, 4]
211
+ end
212
+
213
+ # Test a update database destination
214
+ def test_update_database_destination
215
+ row = ETL::Row[:id => 1, :first_name => 'Bob', :last_name => 'Smith', :ssn => '111234444']
216
+ control = ETL::Control::Control.parse(File.dirname(__FILE__) +
217
+ '/delimited_update.ctl')
218
+
219
+ Person.delete_all
220
+ assert_equal 0, Person.count
221
+ test_database_destination
222
+
223
+ # First define a basic configuration to check defaults
224
+ configuration = {
225
+ :type => :update_database,
226
+ :target => :data_warehouse,
227
+ :database => 'etl_unittest',
228
+ :table => 'people',
229
+ :buffer_size => 0
230
+ }
231
+ mapping = {
232
+ :conditions => [{:field => "\#{conn.quote_column_name(:id)}", :value => "\#{conn.quote(row[:id])}", :comp => "="}],
233
+ :order => [:id, :first_name, :last_name, :ssn]
234
+ }
235
+ dest = ETL::Control::UpdateDatabaseDestination.new(control, configuration, mapping)
236
+ dest.write(row)
237
+ dest.close
238
+
239
+ assert_equal 1, Person.find(:all).length
240
+
241
+ end
242
+
243
+ # Test a insert update database destination
244
+ def test_insert_update_database_destination
245
+ row = ETL::Row[:id => 1, :first_name => 'Bob', :last_name => 'Smith', :ssn => '111234444']
246
+ row_needs_escape = ETL::Row[:id => 2, :first_name => "Foo's", :last_name => "Bar", :ssn => '000000000' ]
247
+ row_needs_update = ETL::Row[:id => 1, :first_name => "Sean", :last_name => "Toon", :ssn => '000000000' ]
248
+ control = ETL::Control::Control.parse(File.dirname(__FILE__) +
249
+ '/delimited_insert_update.ctl')
250
+
251
+ Person.delete_all
252
+ assert_equal 0, Person.count
253
+
254
+ # First define a basic configuration to check defaults
255
+ configuration = {
256
+ :type => :insert_update_database,
257
+ :target => :data_warehouse,
258
+ :database => 'etl_unittest',
259
+ :table => 'people',
260
+ :buffer_size => 0
261
+ }
262
+ mapping = {
263
+ :primarykey => [:id],
264
+ :order => [:id, :first_name, :last_name, :ssn]
265
+ }
266
+ dest = ETL::Control::InsertUpdateDatabaseDestination.new(control, configuration, mapping)
267
+ dest.write(row)
268
+ dest.write(row_needs_escape)
269
+ dest.write(row_needs_update)
270
+ dest.close
271
+
272
+ assert_equal 2, Person.find(:all).length
273
+ end
274
+
275
+ end
@@ -0,0 +1,23 @@
1
+ require File.dirname(__FILE__) + '/test_helper'
2
+
3
+ class BadDirective < ETL::Batch::Directive
4
+
5
+ end
6
+
7
+ class BatchTest < Test::Unit::TestCase
8
+
9
+ attr_reader :file
10
+ attr_reader :engine
11
+ def setup
12
+ @file = File.dirname(__FILE__) + '/all.ebf'
13
+ @engine = ETL::Engine.new
14
+ end
15
+
16
+ def test_directive_without_implementation_should_fail
17
+ batch = ETL::Batch::Batch.resolve(file, engine)
18
+ assert_raise RuntimeError do
19
+ d = BadDirective.new(batch)
20
+ d.execute
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,32 @@
1
+ # encoding: utf-8
2
+ require File.dirname(__FILE__) + '/test_helper'
3
+
4
+ class EncodeProcessorTest < Test::Unit::TestCase
5
+
6
+ SOURCE = 'data/encode_source_latin1.txt'
7
+ TARGET = 'output/encode_destination_utf-8.txt'
8
+
9
+ def setup
10
+ @control = flexmock("control")
11
+ @control.should_receive(:file).twice.and_return(File.dirname(__FILE__) + '/fake-control.ctl')
12
+ end
13
+
14
+ def test_should_transform_a_latin1_file_to_utf8_with_grace
15
+ configuration = { :source_file => SOURCE, :source_encoding => 'latin1', :target_file => TARGET, :target_encoding => 'utf-8' }
16
+ ETL::Processor::EncodeProcessor.new(@control, configuration).process
17
+ assert_equal "éphémère has accents.\nlet's encode them.", IO.read(File.join(File.dirname(__FILE__),TARGET))
18
+ end
19
+
20
+ def test_should_throw_exception_on_unsupported_encoding
21
+ configuration = { :source_file => SOURCE, :source_encoding => 'acme-encoding', :target_file => TARGET, :target_encoding => 'utf-8' }
22
+ error = assert_raise(ETL::ControlError) { ETL::Processor::EncodeProcessor.new(@control, configuration) }
23
+ assert_equal "Either the source encoding 'acme-encoding' or the target encoding 'utf-8' is not supported", error.message
24
+ end
25
+
26
+ def test_should_throw_exception_when_target_and_source_are_the_same
27
+ configuration = { :source_file => SOURCE, :source_encoding => 'latin1', :target_file => SOURCE, :target_encoding => 'utf-8' }
28
+ error = assert_raise(ETL::ControlError) { ETL::Processor::EncodeProcessor.new(@control, configuration) }
29
+ assert_equal "Source and target file cannot currently point to the same file", error.message
30
+ end
31
+
32
+ end
@@ -0,0 +1,78 @@
1
+ require File.dirname(__FILE__) + '/test_helper'
2
+
3
+ class EngineTest < Test::Unit::TestCase
4
+
5
+ context 'process' do
6
+
7
+ should 'raise an error when a file which does not exist is given' do
8
+ error = assert_raise(Errno::ENOENT) do
9
+ ETL::Engine.process('foo-bar.ctl')
10
+ end
11
+
12
+ assert_equal "No such file or directory - foo-bar.ctl", error.message
13
+ end
14
+
15
+ should 'raise an error when an unknown file type is given' do
16
+ error = assert_raise(RuntimeError) do
17
+ ETL::Engine.process(__FILE__)
18
+ end
19
+
20
+ assert_match /Unsupported file type/, error.message
21
+ end
22
+
23
+ should_eventually 'stop as soon as the error threshold is reached' do
24
+ engine = ETL::Engine.new
25
+
26
+ assert_equal 0, engine.errors.size
27
+
28
+ engine.process ETL::Control::Control.parse_text <<CTL
29
+ set_error_threshold 1
30
+ source :in, { :type => :enumerable, :enumerable => (1..100) }
31
+ after_read { |row| raise "Failure" }
32
+ CTL
33
+
34
+ assert_equal 1, engine.errors.size
35
+ end
36
+
37
+ end
38
+
39
+ context 'connection' do
40
+
41
+ should 'return an ActiveRecord configuration by name' do
42
+ assert_not_nil ETL::Engine.connection(:data_warehouse)
43
+ end
44
+
45
+ should 'raise an error on non existent connection' do
46
+ error = assert_raise(ETL::ETLError) do
47
+ ETL::Engine.connection(:does_not_exist)
48
+ end
49
+ assert_equal "Cannot find connection named :does_not_exist", error.message
50
+ end
51
+
52
+ should 'raise an error when requesting a connection with no name' do
53
+ error = assert_raise(ETL::ETLError) do
54
+ ETL::Engine.connection(" ")
55
+ end
56
+ assert_equal "Connection with no name requested. Is there a missing :target parameter somewhere?", error.message
57
+ end
58
+ end
59
+
60
+ context 'temp tables' do
61
+ attr_reader :connection
62
+
63
+ setup do
64
+ @connection = ETL::Engine.connection(:data_warehouse)
65
+ end
66
+
67
+ should 'return unmodified table name when temp tables are disabled' do
68
+ assert_equal 'foo', ETL::Engine.table('foo', ETL::Engine.connection(:data_warehouse))
69
+ end
70
+
71
+ should 'return temp table name instead of table name when temp tables are enabled' do
72
+ ETL::Engine.use_temp_tables = true
73
+ assert_equal 'tmp_people', ETL::Engine.table('people', connection)
74
+ ETL::Engine.use_temp_tables = false
75
+ end
76
+ end
77
+
78
+ end
@@ -0,0 +1,28 @@
1
+ require File.dirname(__FILE__) + '/test_helper'
2
+
3
+ class EnsureFieldsPresenceProcessorTest < Test::Unit::TestCase
4
+
5
+ def new_processor(options)
6
+ ETL::Processor::EnsureFieldsPresenceProcessor.new(nil, options)
7
+ end
8
+
9
+ should 'raise an error unless :fields is specified' do
10
+ error = assert_raises(ETL::ControlError) { new_processor({}) }
11
+ assert_equal ":fields must be specified", error.message
12
+ end
13
+
14
+ should 'raise an error if a field is missing in the row' do
15
+ error = assert_raise(ETL::ControlError) do
16
+ processor = new_processor(:fields => [:key])
17
+ processor.process(ETL::Row[])
18
+ end
19
+
20
+ assert_match /missing required field\(s\)/, error.message
21
+ end
22
+
23
+ should 'return the row if the required fields are in the row' do
24
+ row = ETL::Row[:first => nil, :second => "Barry"]
25
+ assert_equal row, new_processor(:fields => [:first, :second]).process(row)
26
+ end
27
+
28
+ end