activewarehouse-etl-sgonyea 0.9.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +9 -0
- data/0.9-UPGRADE +6 -0
- data/CHANGELOG +236 -0
- data/Gemfile +4 -0
- data/HOW_TO_RELEASE +13 -0
- data/LICENSE +7 -0
- data/README.textile +111 -0
- data/Rakefile +103 -0
- data/TODO +28 -0
- data/active_support_logger.patch +78 -0
- data/activewarehouse-etl.gemspec +36 -0
- data/bin/etl +28 -0
- data/bin/etl.cmd +8 -0
- data/examples/database.example.yml +16 -0
- data/lib/etl.rb +97 -0
- data/lib/etl/batch.rb +2 -0
- data/lib/etl/batch/batch.rb +111 -0
- data/lib/etl/batch/directives.rb +65 -0
- data/lib/etl/builder.rb +2 -0
- data/lib/etl/builder/date_dimension_builder.rb +96 -0
- data/lib/etl/builder/time_dimension_builder.rb +31 -0
- data/lib/etl/commands/etl.rb +89 -0
- data/lib/etl/control.rb +3 -0
- data/lib/etl/control/control.rb +405 -0
- data/lib/etl/control/destination.rb +438 -0
- data/lib/etl/control/destination/csv_destination.rb +113 -0
- data/lib/etl/control/destination/database_destination.rb +97 -0
- data/lib/etl/control/destination/excel_destination.rb +91 -0
- data/lib/etl/control/destination/file_destination.rb +126 -0
- data/lib/etl/control/destination/insert_update_database_destination.rb +136 -0
- data/lib/etl/control/destination/update_database_destination.rb +109 -0
- data/lib/etl/control/destination/yaml_destination.rb +74 -0
- data/lib/etl/control/source.rb +132 -0
- data/lib/etl/control/source/database_source.rb +224 -0
- data/lib/etl/control/source/enumerable_source.rb +11 -0
- data/lib/etl/control/source/file_source.rb +90 -0
- data/lib/etl/control/source/model_source.rb +39 -0
- data/lib/etl/core_ext.rb +1 -0
- data/lib/etl/core_ext/time.rb +5 -0
- data/lib/etl/core_ext/time/calculations.rb +42 -0
- data/lib/etl/engine.rb +582 -0
- data/lib/etl/execution.rb +19 -0
- data/lib/etl/execution/base.rb +8 -0
- data/lib/etl/execution/batch.rb +10 -0
- data/lib/etl/execution/job.rb +8 -0
- data/lib/etl/execution/migration.rb +90 -0
- data/lib/etl/generator.rb +2 -0
- data/lib/etl/generator/generator.rb +20 -0
- data/lib/etl/generator/surrogate_key_generator.rb +39 -0
- data/lib/etl/http_tools.rb +139 -0
- data/lib/etl/parser.rb +11 -0
- data/lib/etl/parser/apache_combined_log_parser.rb +49 -0
- data/lib/etl/parser/csv_parser.rb +93 -0
- data/lib/etl/parser/excel_parser.rb +112 -0
- data/lib/etl/parser/fixed_width_parser.rb +65 -0
- data/lib/etl/parser/nokogiri_xml_parser.rb +83 -0
- data/lib/etl/parser/parser.rb +41 -0
- data/lib/etl/parser/sax_parser.rb +218 -0
- data/lib/etl/parser/xml_parser.rb +65 -0
- data/lib/etl/processor.rb +11 -0
- data/lib/etl/processor/block_processor.rb +14 -0
- data/lib/etl/processor/bulk_import_processor.rb +94 -0
- data/lib/etl/processor/check_exist_processor.rb +80 -0
- data/lib/etl/processor/check_unique_processor.rb +39 -0
- data/lib/etl/processor/copy_field_processor.rb +26 -0
- data/lib/etl/processor/database_join_processor.rb +82 -0
- data/lib/etl/processor/encode_processor.rb +55 -0
- data/lib/etl/processor/ensure_fields_presence_processor.rb +24 -0
- data/lib/etl/processor/escape_csv_processor.rb +77 -0
- data/lib/etl/processor/filter_row_processor.rb +51 -0
- data/lib/etl/processor/ftp_downloader_processor.rb +68 -0
- data/lib/etl/processor/ftp_uploader_processor.rb +65 -0
- data/lib/etl/processor/hierarchy_exploder_processor.rb +55 -0
- data/lib/etl/processor/imapattachment_downloader_processor.rb +91 -0
- data/lib/etl/processor/pop3attachment_downloader_processor.rb +90 -0
- data/lib/etl/processor/print_row_processor.rb +12 -0
- data/lib/etl/processor/processor.rb +25 -0
- data/lib/etl/processor/rename_processor.rb +24 -0
- data/lib/etl/processor/require_non_blank_processor.rb +26 -0
- data/lib/etl/processor/row_processor.rb +27 -0
- data/lib/etl/processor/sequence_processor.rb +23 -0
- data/lib/etl/processor/sftp_downloader_processor.rb +63 -0
- data/lib/etl/processor/sftp_uploader_processor.rb +63 -0
- data/lib/etl/processor/surrogate_key_processor.rb +53 -0
- data/lib/etl/processor/truncate_processor.rb +40 -0
- data/lib/etl/processor/zip_file_processor.rb +27 -0
- data/lib/etl/row.rb +20 -0
- data/lib/etl/screen.rb +14 -0
- data/lib/etl/screen/row_count_screen.rb +20 -0
- data/lib/etl/transform.rb +2 -0
- data/lib/etl/transform/block_transform.rb +13 -0
- data/lib/etl/transform/calculation_transform.rb +71 -0
- data/lib/etl/transform/date_to_string_transform.rb +20 -0
- data/lib/etl/transform/decode_transform.rb +51 -0
- data/lib/etl/transform/default_transform.rb +20 -0
- data/lib/etl/transform/foreign_key_lookup_transform.rb +211 -0
- data/lib/etl/transform/hierarchy_lookup_transform.rb +49 -0
- data/lib/etl/transform/md5_transform.rb +13 -0
- data/lib/etl/transform/ordinalize_transform.rb +14 -0
- data/lib/etl/transform/sha1_transform.rb +13 -0
- data/lib/etl/transform/split_fields_transform.rb +27 -0
- data/lib/etl/transform/string_to_date_time_transform.rb +14 -0
- data/lib/etl/transform/string_to_date_transform.rb +16 -0
- data/lib/etl/transform/string_to_time_transform.rb +11 -0
- data/lib/etl/transform/transform.rb +61 -0
- data/lib/etl/transform/trim_transform.rb +26 -0
- data/lib/etl/transform/type_transform.rb +35 -0
- data/lib/etl/util.rb +59 -0
- data/lib/etl/version.rb +3 -0
- data/test-matrix.yml +10 -0
- data/test/.gitignore +1 -0
- data/test/.ignore +2 -0
- data/test/all.ebf +6 -0
- data/test/apache_combined_log.ctl +11 -0
- data/test/batch_test.rb +41 -0
- data/test/batch_with_error.ebf +6 -0
- data/test/batched1.ctl +0 -0
- data/test/batched2.ctl +0 -0
- data/test/block_processor.ctl +6 -0
- data/test/block_processor_error.ctl +1 -0
- data/test/block_processor_pre_post_process.ctl +4 -0
- data/test/block_processor_remove_rows.ctl +5 -0
- data/test/block_processor_test.rb +38 -0
- data/test/check_exist_processor_test.rb +92 -0
- data/test/check_unique_processor_test.rb +40 -0
- data/test/config/Gemfile.rails-2.3.x +3 -0
- data/test/config/Gemfile.rails-2.3.x.lock +53 -0
- data/test/config/Gemfile.rails-3.0.x +3 -0
- data/test/config/Gemfile.rails-3.0.x.lock +61 -0
- data/test/config/common.rb +29 -0
- data/test/connection/mysql/connection.rb +9 -0
- data/test/connection/mysql/schema.sql +37 -0
- data/test/connection/postgresql/connection.rb +13 -0
- data/test/connection/postgresql/schema.sql +40 -0
- data/test/control_test.rb +43 -0
- data/test/data/apache_combined_log.txt +3 -0
- data/test/data/bulk_import.txt +3 -0
- data/test/data/bulk_import_with_empties.txt +3 -0
- data/test/data/decode.txt +3 -0
- data/test/data/delimited.txt +3 -0
- data/test/data/encode_source_latin1.txt +2 -0
- data/test/data/excel.xls +0 -0
- data/test/data/excel2.xls +0 -0
- data/test/data/fixed_width.txt +3 -0
- data/test/data/multiple_delimited_1.txt +3 -0
- data/test/data/multiple_delimited_2.txt +3 -0
- data/test/data/nokogiri.xml +38 -0
- data/test/data/people.txt +3 -0
- data/test/data/sax.xml +14 -0
- data/test/data/xml.xml +16 -0
- data/test/database_join_processor_test.rb +43 -0
- data/test/date_dimension_builder_test.rb +96 -0
- data/test/delimited.ctl +30 -0
- data/test/delimited_absolute.ctl +31 -0
- data/test/delimited_destination_db.ctl +23 -0
- data/test/delimited_excel.ctl +31 -0
- data/test/delimited_insert_update.ctl +34 -0
- data/test/delimited_update.ctl +34 -0
- data/test/delimited_with_bulk_load.ctl +34 -0
- data/test/destination_test.rb +275 -0
- data/test/directive_test.rb +23 -0
- data/test/encode_processor_test.rb +32 -0
- data/test/engine_test.rb +78 -0
- data/test/ensure_fields_presence_processor_test.rb +28 -0
- data/test/errors.ctl +24 -0
- data/test/etl_test.rb +42 -0
- data/test/excel.ctl +24 -0
- data/test/excel2.ctl +25 -0
- data/test/fixed_width.ctl +35 -0
- data/test/foreign_key_lookup_transform_test.rb +50 -0
- data/test/generator_test.rb +14 -0
- data/test/inline_parser.ctl +17 -0
- data/test/mocks/mock_destination.rb +26 -0
- data/test/mocks/mock_source.rb +25 -0
- data/test/model_source.ctl +14 -0
- data/test/multiple_delimited.ctl +22 -0
- data/test/multiple_source_delimited.ctl +39 -0
- data/test/nokogiri_all.ctl +35 -0
- data/test/nokogiri_select.ctl +35 -0
- data/test/nokogiri_test.rb +35 -0
- data/test/parser_test.rb +224 -0
- data/test/performance/delimited.ctl +30 -0
- data/test/processor_test.rb +44 -0
- data/test/row_processor_test.rb +17 -0
- data/test/sax.ctl +26 -0
- data/test/scd/1.txt +1 -0
- data/test/scd/2.txt +1 -0
- data/test/scd/3.txt +1 -0
- data/test/scd_test.rb +257 -0
- data/test/scd_test_type_1.ctl +43 -0
- data/test/scd_test_type_2.ctl +34 -0
- data/test/screen_test.rb +9 -0
- data/test/screen_test_error.ctl +3 -0
- data/test/screen_test_fatal.ctl +3 -0
- data/test/source_test.rb +154 -0
- data/test/test_helper.rb +37 -0
- data/test/transform_test.rb +101 -0
- data/test/truncate_processor_test.rb +37 -0
- data/test/xml.ctl +31 -0
- metadata +370 -0
@@ -0,0 +1,34 @@
|
|
1
|
+
infile = 'data/people.txt'
|
2
|
+
outfile = 'output/people.txt'
|
3
|
+
|
4
|
+
source :in, {
|
5
|
+
:file => infile,
|
6
|
+
:parser => {
|
7
|
+
:name => :csv
|
8
|
+
}
|
9
|
+
},
|
10
|
+
[
|
11
|
+
:first_name,
|
12
|
+
:last_name,
|
13
|
+
]
|
14
|
+
|
15
|
+
before_write :surrogate_key, :target => :data_warehouse, :table => 'person_dimension', :column => 'id'
|
16
|
+
before_write :check_exist, {
|
17
|
+
:target => :data_warehouse,
|
18
|
+
:table => 'person_dimension',
|
19
|
+
:columns => [:first_name, :last_name]
|
20
|
+
}
|
21
|
+
|
22
|
+
destination :out, {
|
23
|
+
:file => outfile
|
24
|
+
},
|
25
|
+
{
|
26
|
+
:order => [:id, :first_name, :last_name]
|
27
|
+
}
|
28
|
+
|
29
|
+
post_process :bulk_import, {
|
30
|
+
:file => outfile,
|
31
|
+
:target => :data_warehouse,
|
32
|
+
:table => 'person_dimension',
|
33
|
+
:order => [:id, :first_name, :last_name]
|
34
|
+
}
|
@@ -0,0 +1,275 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'spreadsheet'
|
3
|
+
require File.dirname(__FILE__) + '/test_helper'
|
4
|
+
|
5
|
+
class Person < ActiveRecord::Base
|
6
|
+
end
|
7
|
+
|
8
|
+
class BadDestination < ETL::Control::Destination
|
9
|
+
def initialize(control, configuration, mapping)
|
10
|
+
super
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
# Test the functionality of destinations
|
15
|
+
class DestinationTest < Test::Unit::TestCase
|
16
|
+
# Test a file destination
|
17
|
+
def test_file_destination
|
18
|
+
outfile = 'output/test_file_destination.txt'
|
19
|
+
outfile2 = 'output/test_file_destination.2.txt'
|
20
|
+
row = ETL::Row[ :address => '123 SW 1st Street', :city => 'Melbourne',
|
21
|
+
:state => 'Florida', :country => 'United States' ]
|
22
|
+
row_needs_escape = ETL::Row[ :address => "Allen's Way",
|
23
|
+
:city => 'Some City', :state => 'Some State', :country => 'Mexico' ]
|
24
|
+
control = ETL::Control::Control.parse(File.dirname(__FILE__) +
|
25
|
+
'/delimited.ctl')
|
26
|
+
|
27
|
+
# First define a basic configuration to check defaults
|
28
|
+
configuration = {
|
29
|
+
:file => outfile,
|
30
|
+
:buffer_size => 0,
|
31
|
+
}
|
32
|
+
mapping = {
|
33
|
+
:order => [:address, :city, :state, :country, :country_code],
|
34
|
+
:virtual => {
|
35
|
+
:country_code => Proc.new do |r|
|
36
|
+
{
|
37
|
+
'United States' => 'US',
|
38
|
+
'Mexico' => 'MX'
|
39
|
+
}[r[:country]]
|
40
|
+
end
|
41
|
+
}
|
42
|
+
}
|
43
|
+
|
44
|
+
dest = ETL::Control::FileDestination.new(control, configuration, mapping)
|
45
|
+
dest.write(row)
|
46
|
+
dest.write(row_needs_escape)
|
47
|
+
dest.close
|
48
|
+
|
49
|
+
configuration[:file] = outfile2
|
50
|
+
configuration[:separator] = '|'
|
51
|
+
configuration[:eol] = "[EOL]\n"
|
52
|
+
|
53
|
+
dest = ETL::Control::FileDestination.new(control, configuration, mapping)
|
54
|
+
dest.write(row)
|
55
|
+
dest.write(row_needs_escape)
|
56
|
+
dest.close
|
57
|
+
|
58
|
+
# Read back the resulting
|
59
|
+
lines = open(File.join(File.dirname(__FILE__), outfile), 'r').readlines
|
60
|
+
assert_equal "123 SW 1st Street,Melbourne,Florida,United States,US\n", lines[0]
|
61
|
+
assert_equal "Allen's Way,Some City,Some State,Mexico,MX\n", lines[1]
|
62
|
+
|
63
|
+
lines = open(File.join(File.dirname(__FILE__), outfile2), 'r').readlines
|
64
|
+
assert_equal "123 SW 1st Street|Melbourne|Florida|United States|US[EOL]\n", lines[0]
|
65
|
+
assert_equal "Allen's Way|Some City|Some State|Mexico|MX[EOL]\n", lines[1]
|
66
|
+
end
|
67
|
+
|
68
|
+
# Test a database destination
|
69
|
+
def test_database_destination
|
70
|
+
row = ETL::Row[:id => 1, :first_name => 'Bob', :last_name => 'Smith', :ssn => '111234444']
|
71
|
+
row_needs_escape = ETL::Row[:id => 2, :first_name => "Foo's", :last_name => "Bar", :ssn => '000000000' ]
|
72
|
+
control = ETL::Control::Control.parse(File.dirname(__FILE__) +
|
73
|
+
'/delimited.ctl')
|
74
|
+
|
75
|
+
Person.delete_all
|
76
|
+
assert_equal 0, Person.count
|
77
|
+
|
78
|
+
# First define a basic configuration to check defaults
|
79
|
+
configuration = {
|
80
|
+
:target => :data_warehouse,
|
81
|
+
:database => 'etl_unittest',
|
82
|
+
:table => 'people',
|
83
|
+
:buffer_size => 0
|
84
|
+
}
|
85
|
+
mapping = { :order => [:id, :first_name, :last_name, :ssn] }
|
86
|
+
dest = ETL::Control::DatabaseDestination.new(control, configuration, mapping)
|
87
|
+
dest.write(row)
|
88
|
+
dest.close
|
89
|
+
|
90
|
+
assert_equal 1, Person.find(:all).length
|
91
|
+
end
|
92
|
+
|
93
|
+
def test_database_destination_with_control
|
94
|
+
row = ETL::Row[:id => 1, :first_name => 'Bob', :last_name => 'Smith', :ssn => '111234444']
|
95
|
+
control = ETL::Control::Control.parse(File.dirname(__FILE__) +
|
96
|
+
'/delimited_destination_db.ctl')
|
97
|
+
Person.delete_all
|
98
|
+
assert_equal 0, Person.count
|
99
|
+
d = control.destinations.first
|
100
|
+
dest = ETL::Control::DatabaseDestination.new(control, d.configuration, d.mapping)
|
101
|
+
dest.write(row)
|
102
|
+
dest.close
|
103
|
+
assert_equal 1, Person.count
|
104
|
+
end
|
105
|
+
|
106
|
+
def test_unique
|
107
|
+
row1 = ETL::Row[:id => 1, :first_name => 'Bob', :last_name => 'Smith', :ssn => '111234444']
|
108
|
+
row2 = ETL::Row[:id => 2, :first_name => 'Bob', :last_name => 'Smith', :ssn => '111234444']
|
109
|
+
row3 = ETL::Row[:id => 3, :first_name => 'John', :last_name => 'Smith', :ssn => '000112222']
|
110
|
+
|
111
|
+
outfile = 'output/test_unique.txt'
|
112
|
+
control = ETL::Control::Control.parse(File.dirname(__FILE__) + '/delimited.ctl')
|
113
|
+
|
114
|
+
# First define a basic configuration to check defaults
|
115
|
+
configuration = { :file => outfile, :buffer_size => 0, :unique => [:ssn]}
|
116
|
+
mapping = {
|
117
|
+
:order => [:first_name, :last_name, :ssn]
|
118
|
+
}
|
119
|
+
dest = ETL::Control::FileDestination.new(control, configuration, mapping)
|
120
|
+
dest.write(row1)
|
121
|
+
dest.write(row2)
|
122
|
+
dest.write(row3)
|
123
|
+
|
124
|
+
# Close (and flush) the destination
|
125
|
+
dest.close
|
126
|
+
|
127
|
+
# Read back the resulting
|
128
|
+
lines = open(File.join(File.dirname(__FILE__), outfile), 'r').readlines
|
129
|
+
assert_equal "Bob,Smith,111234444\n", lines[0]
|
130
|
+
assert_equal "John,Smith,000112222\n", lines[1]
|
131
|
+
end
|
132
|
+
|
133
|
+
def test_multiple_unique
|
134
|
+
row1 = ETL::Row[:id => 1, :first_name => 'Bob', :last_name => 'Smith', :ssn => '111234444']
|
135
|
+
row2 = ETL::Row[:id => 2, :first_name => 'Bob', :last_name => 'Smith', :ssn => '111234444']
|
136
|
+
row3 = ETL::Row[:id => 3, :first_name => 'Bob', :last_name => 'Smith', :ssn => '000112222']
|
137
|
+
|
138
|
+
outfile = 'output/test_multiple_unique.txt'
|
139
|
+
control = ETL::Control::Control.parse(File.dirname(__FILE__) + '/delimited.ctl')
|
140
|
+
|
141
|
+
# First define a basic configuration to check defaults
|
142
|
+
configuration = { :file => outfile, :buffer_size => 0, :unique => [:last_name,:first_name]}
|
143
|
+
mapping = {
|
144
|
+
:order => [:first_name, :last_name, :ssn]
|
145
|
+
}
|
146
|
+
dest = ETL::Control::FileDestination.new(control, configuration, mapping)
|
147
|
+
dest.write(row1)
|
148
|
+
dest.write(row2)
|
149
|
+
dest.write(row3)
|
150
|
+
|
151
|
+
# Close (and flush) the destination
|
152
|
+
dest.close
|
153
|
+
|
154
|
+
# Read back the resulting
|
155
|
+
lines = open(File.join(File.dirname(__FILE__), outfile), 'r').readlines
|
156
|
+
assert_equal "Bob,Smith,111234444\n", lines[0]
|
157
|
+
assert_equal 1, lines.length
|
158
|
+
end
|
159
|
+
|
160
|
+
def test_bad_destination
|
161
|
+
control = ETL::Control::Control.parse_text('')
|
162
|
+
configuration = {}
|
163
|
+
mapping = {}
|
164
|
+
dest = BadDestination.new(control, configuration, mapping)
|
165
|
+
dest.write(nil)
|
166
|
+
assert_raise NotImplementedError do
|
167
|
+
dest.flush
|
168
|
+
end
|
169
|
+
assert_raise NotImplementedError do
|
170
|
+
dest.close
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
174
|
+
def test_excel_destination
|
175
|
+
outfile = 'output/test_excel_destination.xls'
|
176
|
+
row = ETL::Row[ :address => '123 SW 1st Street', :city => 'Melbourne',
|
177
|
+
:state => 'Florida', :country => 'United States' ]
|
178
|
+
control = ETL::Control::Control.parse(File.dirname(__FILE__) +
|
179
|
+
'/delimited_excel.ctl')
|
180
|
+
|
181
|
+
# First define a basic configuration to check defaults
|
182
|
+
configuration = {
|
183
|
+
:file => outfile,
|
184
|
+
:buffer_size => 0,
|
185
|
+
}
|
186
|
+
mapping = {
|
187
|
+
:order => [:address, :city, :state, :country, :country_code],
|
188
|
+
:virtual => {
|
189
|
+
:country_code => Proc.new do |r|
|
190
|
+
{
|
191
|
+
'United States' => 'US',
|
192
|
+
'Mexico' => 'MX'
|
193
|
+
}[r[:country]]
|
194
|
+
end
|
195
|
+
}
|
196
|
+
}
|
197
|
+
|
198
|
+
dest = ETL::Control::ExcelDestination.new(control, configuration, mapping)
|
199
|
+
dest.write(row)
|
200
|
+
dest.close
|
201
|
+
|
202
|
+
# Read back the resulting
|
203
|
+
book = Spreadsheet.open File.join(File.dirname(__FILE__), outfile)
|
204
|
+
sheet = book.worksheet(0)
|
205
|
+
|
206
|
+
assert_equal "123 SW 1st Street", sheet[0, 0]
|
207
|
+
assert_equal "Melbourne", sheet[0, 1]
|
208
|
+
assert_equal "Florida", sheet[0, 2]
|
209
|
+
assert_equal "United States", sheet[0, 3]
|
210
|
+
assert_equal "US", sheet[0, 4]
|
211
|
+
end
|
212
|
+
|
213
|
+
# Test a update database destination
|
214
|
+
def test_update_database_destination
|
215
|
+
row = ETL::Row[:id => 1, :first_name => 'Bob', :last_name => 'Smith', :ssn => '111234444']
|
216
|
+
control = ETL::Control::Control.parse(File.dirname(__FILE__) +
|
217
|
+
'/delimited_update.ctl')
|
218
|
+
|
219
|
+
Person.delete_all
|
220
|
+
assert_equal 0, Person.count
|
221
|
+
test_database_destination
|
222
|
+
|
223
|
+
# First define a basic configuration to check defaults
|
224
|
+
configuration = {
|
225
|
+
:type => :update_database,
|
226
|
+
:target => :data_warehouse,
|
227
|
+
:database => 'etl_unittest',
|
228
|
+
:table => 'people',
|
229
|
+
:buffer_size => 0
|
230
|
+
}
|
231
|
+
mapping = {
|
232
|
+
:conditions => [{:field => "\#{conn.quote_column_name(:id)}", :value => "\#{conn.quote(row[:id])}", :comp => "="}],
|
233
|
+
:order => [:id, :first_name, :last_name, :ssn]
|
234
|
+
}
|
235
|
+
dest = ETL::Control::UpdateDatabaseDestination.new(control, configuration, mapping)
|
236
|
+
dest.write(row)
|
237
|
+
dest.close
|
238
|
+
|
239
|
+
assert_equal 1, Person.find(:all).length
|
240
|
+
|
241
|
+
end
|
242
|
+
|
243
|
+
# Test a insert update database destination
|
244
|
+
def test_insert_update_database_destination
|
245
|
+
row = ETL::Row[:id => 1, :first_name => 'Bob', :last_name => 'Smith', :ssn => '111234444']
|
246
|
+
row_needs_escape = ETL::Row[:id => 2, :first_name => "Foo's", :last_name => "Bar", :ssn => '000000000' ]
|
247
|
+
row_needs_update = ETL::Row[:id => 1, :first_name => "Sean", :last_name => "Toon", :ssn => '000000000' ]
|
248
|
+
control = ETL::Control::Control.parse(File.dirname(__FILE__) +
|
249
|
+
'/delimited_insert_update.ctl')
|
250
|
+
|
251
|
+
Person.delete_all
|
252
|
+
assert_equal 0, Person.count
|
253
|
+
|
254
|
+
# First define a basic configuration to check defaults
|
255
|
+
configuration = {
|
256
|
+
:type => :insert_update_database,
|
257
|
+
:target => :data_warehouse,
|
258
|
+
:database => 'etl_unittest',
|
259
|
+
:table => 'people',
|
260
|
+
:buffer_size => 0
|
261
|
+
}
|
262
|
+
mapping = {
|
263
|
+
:primarykey => [:id],
|
264
|
+
:order => [:id, :first_name, :last_name, :ssn]
|
265
|
+
}
|
266
|
+
dest = ETL::Control::InsertUpdateDatabaseDestination.new(control, configuration, mapping)
|
267
|
+
dest.write(row)
|
268
|
+
dest.write(row_needs_escape)
|
269
|
+
dest.write(row_needs_update)
|
270
|
+
dest.close
|
271
|
+
|
272
|
+
assert_equal 2, Person.find(:all).length
|
273
|
+
end
|
274
|
+
|
275
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/test_helper'
|
2
|
+
|
3
|
+
class BadDirective < ETL::Batch::Directive
|
4
|
+
|
5
|
+
end
|
6
|
+
|
7
|
+
class BatchTest < Test::Unit::TestCase
|
8
|
+
|
9
|
+
attr_reader :file
|
10
|
+
attr_reader :engine
|
11
|
+
def setup
|
12
|
+
@file = File.dirname(__FILE__) + '/all.ebf'
|
13
|
+
@engine = ETL::Engine.new
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_directive_without_implementation_should_fail
|
17
|
+
batch = ETL::Batch::Batch.resolve(file, engine)
|
18
|
+
assert_raise RuntimeError do
|
19
|
+
d = BadDirective.new(batch)
|
20
|
+
d.execute
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require File.dirname(__FILE__) + '/test_helper'
|
3
|
+
|
4
|
+
class EncodeProcessorTest < Test::Unit::TestCase
|
5
|
+
|
6
|
+
SOURCE = 'data/encode_source_latin1.txt'
|
7
|
+
TARGET = 'output/encode_destination_utf-8.txt'
|
8
|
+
|
9
|
+
def setup
|
10
|
+
@control = flexmock("control")
|
11
|
+
@control.should_receive(:file).twice.and_return(File.dirname(__FILE__) + '/fake-control.ctl')
|
12
|
+
end
|
13
|
+
|
14
|
+
def test_should_transform_a_latin1_file_to_utf8_with_grace
|
15
|
+
configuration = { :source_file => SOURCE, :source_encoding => 'latin1', :target_file => TARGET, :target_encoding => 'utf-8' }
|
16
|
+
ETL::Processor::EncodeProcessor.new(@control, configuration).process
|
17
|
+
assert_equal "éphémère has accents.\nlet's encode them.", IO.read(File.join(File.dirname(__FILE__),TARGET))
|
18
|
+
end
|
19
|
+
|
20
|
+
def test_should_throw_exception_on_unsupported_encoding
|
21
|
+
configuration = { :source_file => SOURCE, :source_encoding => 'acme-encoding', :target_file => TARGET, :target_encoding => 'utf-8' }
|
22
|
+
error = assert_raise(ETL::ControlError) { ETL::Processor::EncodeProcessor.new(@control, configuration) }
|
23
|
+
assert_equal "Either the source encoding 'acme-encoding' or the target encoding 'utf-8' is not supported", error.message
|
24
|
+
end
|
25
|
+
|
26
|
+
def test_should_throw_exception_when_target_and_source_are_the_same
|
27
|
+
configuration = { :source_file => SOURCE, :source_encoding => 'latin1', :target_file => SOURCE, :target_encoding => 'utf-8' }
|
28
|
+
error = assert_raise(ETL::ControlError) { ETL::Processor::EncodeProcessor.new(@control, configuration) }
|
29
|
+
assert_equal "Source and target file cannot currently point to the same file", error.message
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
data/test/engine_test.rb
ADDED
@@ -0,0 +1,78 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/test_helper'
|
2
|
+
|
3
|
+
class EngineTest < Test::Unit::TestCase
|
4
|
+
|
5
|
+
context 'process' do
|
6
|
+
|
7
|
+
should 'raise an error when a file which does not exist is given' do
|
8
|
+
error = assert_raise(Errno::ENOENT) do
|
9
|
+
ETL::Engine.process('foo-bar.ctl')
|
10
|
+
end
|
11
|
+
|
12
|
+
assert_equal "No such file or directory - foo-bar.ctl", error.message
|
13
|
+
end
|
14
|
+
|
15
|
+
should 'raise an error when an unknown file type is given' do
|
16
|
+
error = assert_raise(RuntimeError) do
|
17
|
+
ETL::Engine.process(__FILE__)
|
18
|
+
end
|
19
|
+
|
20
|
+
assert_match /Unsupported file type/, error.message
|
21
|
+
end
|
22
|
+
|
23
|
+
should_eventually 'stop as soon as the error threshold is reached' do
|
24
|
+
engine = ETL::Engine.new
|
25
|
+
|
26
|
+
assert_equal 0, engine.errors.size
|
27
|
+
|
28
|
+
engine.process ETL::Control::Control.parse_text <<CTL
|
29
|
+
set_error_threshold 1
|
30
|
+
source :in, { :type => :enumerable, :enumerable => (1..100) }
|
31
|
+
after_read { |row| raise "Failure" }
|
32
|
+
CTL
|
33
|
+
|
34
|
+
assert_equal 1, engine.errors.size
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
38
|
+
|
39
|
+
context 'connection' do
|
40
|
+
|
41
|
+
should 'return an ActiveRecord configuration by name' do
|
42
|
+
assert_not_nil ETL::Engine.connection(:data_warehouse)
|
43
|
+
end
|
44
|
+
|
45
|
+
should 'raise an error on non existent connection' do
|
46
|
+
error = assert_raise(ETL::ETLError) do
|
47
|
+
ETL::Engine.connection(:does_not_exist)
|
48
|
+
end
|
49
|
+
assert_equal "Cannot find connection named :does_not_exist", error.message
|
50
|
+
end
|
51
|
+
|
52
|
+
should 'raise an error when requesting a connection with no name' do
|
53
|
+
error = assert_raise(ETL::ETLError) do
|
54
|
+
ETL::Engine.connection(" ")
|
55
|
+
end
|
56
|
+
assert_equal "Connection with no name requested. Is there a missing :target parameter somewhere?", error.message
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
context 'temp tables' do
|
61
|
+
attr_reader :connection
|
62
|
+
|
63
|
+
setup do
|
64
|
+
@connection = ETL::Engine.connection(:data_warehouse)
|
65
|
+
end
|
66
|
+
|
67
|
+
should 'return unmodified table name when temp tables are disabled' do
|
68
|
+
assert_equal 'foo', ETL::Engine.table('foo', ETL::Engine.connection(:data_warehouse))
|
69
|
+
end
|
70
|
+
|
71
|
+
should 'return temp table name instead of table name when temp tables are enabled' do
|
72
|
+
ETL::Engine.use_temp_tables = true
|
73
|
+
assert_equal 'tmp_people', ETL::Engine.table('people', connection)
|
74
|
+
ETL::Engine.use_temp_tables = false
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/test_helper'
|
2
|
+
|
3
|
+
class EnsureFieldsPresenceProcessorTest < Test::Unit::TestCase
|
4
|
+
|
5
|
+
def new_processor(options)
|
6
|
+
ETL::Processor::EnsureFieldsPresenceProcessor.new(nil, options)
|
7
|
+
end
|
8
|
+
|
9
|
+
should 'raise an error unless :fields is specified' do
|
10
|
+
error = assert_raises(ETL::ControlError) { new_processor({}) }
|
11
|
+
assert_equal ":fields must be specified", error.message
|
12
|
+
end
|
13
|
+
|
14
|
+
should 'raise an error if a field is missing in the row' do
|
15
|
+
error = assert_raise(ETL::ControlError) do
|
16
|
+
processor = new_processor(:fields => [:key])
|
17
|
+
processor.process(ETL::Row[])
|
18
|
+
end
|
19
|
+
|
20
|
+
assert_match /missing required field\(s\)/, error.message
|
21
|
+
end
|
22
|
+
|
23
|
+
should 'return the row if the required fields are in the row' do
|
24
|
+
row = ETL::Row[:first => nil, :second => "Barry"]
|
25
|
+
assert_equal row, new_processor(:fields => [:first, :second]).process(row)
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|