etl 0.9.5.rc1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +12 -0
- data/.yardopts +5 -0
- data/0.9-UPGRADE +6 -0
- data/CHANGELOG +236 -0
- data/Gemfile +4 -0
- data/HOW_TO_RELEASE +13 -0
- data/LICENSE +7 -0
- data/README.textile +111 -0
- data/Rakefile +105 -0
- data/TODO +28 -0
- data/activewarehouse-etl.gemspec +38 -0
- data/bin/etl +28 -0
- data/bin/etl.cmd +8 -0
- data/examples/database.example.yml +16 -0
- data/lib/etl.rb +97 -0
- data/lib/etl/batch.rb +2 -0
- data/lib/etl/batch/batch.rb +111 -0
- data/lib/etl/batch/directives.rb +65 -0
- data/lib/etl/builder.rb +2 -0
- data/lib/etl/builder/date_dimension_builder.rb +96 -0
- data/lib/etl/builder/time_dimension_builder.rb +31 -0
- data/lib/etl/commands/etl.rb +89 -0
- data/lib/etl/control.rb +3 -0
- data/lib/etl/control/control.rb +405 -0
- data/lib/etl/control/destination.rb +438 -0
- data/lib/etl/control/destination/csv_destination.rb +113 -0
- data/lib/etl/control/destination/database_destination.rb +97 -0
- data/lib/etl/control/destination/excel_destination.rb +91 -0
- data/lib/etl/control/destination/file_destination.rb +126 -0
- data/lib/etl/control/destination/insert_update_database_destination.rb +136 -0
- data/lib/etl/control/destination/update_database_destination.rb +109 -0
- data/lib/etl/control/destination/yaml_destination.rb +74 -0
- data/lib/etl/control/source.rb +132 -0
- data/lib/etl/control/source/database_source.rb +224 -0
- data/lib/etl/control/source/enumerable_source.rb +11 -0
- data/lib/etl/control/source/file_source.rb +90 -0
- data/lib/etl/control/source/model_source.rb +39 -0
- data/lib/etl/core_ext.rb +1 -0
- data/lib/etl/core_ext/time.rb +5 -0
- data/lib/etl/core_ext/time/calculations.rb +42 -0
- data/lib/etl/engine.rb +582 -0
- data/lib/etl/execution.rb +19 -0
- data/lib/etl/execution/base.rb +8 -0
- data/lib/etl/execution/batch.rb +10 -0
- data/lib/etl/execution/job.rb +8 -0
- data/lib/etl/execution/migration.rb +90 -0
- data/lib/etl/generator.rb +2 -0
- data/lib/etl/generator/generator.rb +20 -0
- data/lib/etl/generator/surrogate_key_generator.rb +39 -0
- data/lib/etl/http_tools.rb +139 -0
- data/lib/etl/parser.rb +11 -0
- data/lib/etl/parser/apache_combined_log_parser.rb +49 -0
- data/lib/etl/parser/csv_parser.rb +93 -0
- data/lib/etl/parser/excel_parser.rb +112 -0
- data/lib/etl/parser/fixed_width_parser.rb +65 -0
- data/lib/etl/parser/nokogiri_xml_parser.rb +83 -0
- data/lib/etl/parser/parser.rb +41 -0
- data/lib/etl/parser/sax_parser.rb +218 -0
- data/lib/etl/parser/xml_parser.rb +65 -0
- data/lib/etl/processor.rb +11 -0
- data/lib/etl/processor/block_processor.rb +14 -0
- data/lib/etl/processor/bulk_import_processor.rb +94 -0
- data/lib/etl/processor/check_exist_processor.rb +80 -0
- data/lib/etl/processor/check_unique_processor.rb +39 -0
- data/lib/etl/processor/copy_field_processor.rb +26 -0
- data/lib/etl/processor/database_join_processor.rb +82 -0
- data/lib/etl/processor/encode_processor.rb +55 -0
- data/lib/etl/processor/ensure_fields_presence_processor.rb +24 -0
- data/lib/etl/processor/escape_csv_processor.rb +77 -0
- data/lib/etl/processor/filter_row_processor.rb +51 -0
- data/lib/etl/processor/ftp_downloader_processor.rb +68 -0
- data/lib/etl/processor/ftp_uploader_processor.rb +65 -0
- data/lib/etl/processor/hierarchy_exploder_processor.rb +55 -0
- data/lib/etl/processor/imapattachment_downloader_processor.rb +91 -0
- data/lib/etl/processor/pop3attachment_downloader_processor.rb +90 -0
- data/lib/etl/processor/print_row_processor.rb +12 -0
- data/lib/etl/processor/processor.rb +25 -0
- data/lib/etl/processor/rename_processor.rb +24 -0
- data/lib/etl/processor/require_non_blank_processor.rb +26 -0
- data/lib/etl/processor/row_processor.rb +27 -0
- data/lib/etl/processor/sequence_processor.rb +23 -0
- data/lib/etl/processor/sftp_downloader_processor.rb +63 -0
- data/lib/etl/processor/sftp_uploader_processor.rb +63 -0
- data/lib/etl/processor/surrogate_key_processor.rb +53 -0
- data/lib/etl/processor/truncate_processor.rb +40 -0
- data/lib/etl/processor/zip_file_processor.rb +27 -0
- data/lib/etl/row.rb +20 -0
- data/lib/etl/screen.rb +14 -0
- data/lib/etl/screen/row_count_screen.rb +20 -0
- data/lib/etl/transform.rb +2 -0
- data/lib/etl/transform/block_transform.rb +13 -0
- data/lib/etl/transform/calculation_transform.rb +71 -0
- data/lib/etl/transform/date_to_string_transform.rb +20 -0
- data/lib/etl/transform/decode_transform.rb +51 -0
- data/lib/etl/transform/default_transform.rb +20 -0
- data/lib/etl/transform/foreign_key_lookup_transform.rb +211 -0
- data/lib/etl/transform/hierarchy_lookup_transform.rb +49 -0
- data/lib/etl/transform/md5_transform.rb +13 -0
- data/lib/etl/transform/ordinalize_transform.rb +14 -0
- data/lib/etl/transform/sha1_transform.rb +13 -0
- data/lib/etl/transform/split_fields_transform.rb +27 -0
- data/lib/etl/transform/string_to_date_time_transform.rb +14 -0
- data/lib/etl/transform/string_to_date_transform.rb +16 -0
- data/lib/etl/transform/string_to_time_transform.rb +11 -0
- data/lib/etl/transform/transform.rb +61 -0
- data/lib/etl/transform/trim_transform.rb +26 -0
- data/lib/etl/transform/type_transform.rb +35 -0
- data/lib/etl/util.rb +59 -0
- data/lib/etl/version.rb +3 -0
- data/spec/fixtures/all.ebf +6 -0
- data/spec/fixtures/apache_combined_log.ctl +11 -0
- data/spec/fixtures/batch_with_error.ebf +6 -0
- data/spec/fixtures/batched1.ctl +0 -0
- data/spec/fixtures/batched2.ctl +0 -0
- data/spec/fixtures/block_processor.ctl +6 -0
- data/spec/fixtures/block_processor_error.ctl +1 -0
- data/spec/fixtures/block_processor_pre_post_process.ctl +4 -0
- data/spec/fixtures/block_processor_remove_rows.ctl +5 -0
- data/spec/fixtures/data/apache_combined_log.txt +3 -0
- data/spec/fixtures/data/bulk_import.txt +3 -0
- data/spec/fixtures/data/bulk_import_with_empties.txt +3 -0
- data/spec/fixtures/data/decode.txt +3 -0
- data/spec/fixtures/data/delimited.txt +3 -0
- data/spec/fixtures/data/encode_source_latin1.txt +2 -0
- data/spec/fixtures/data/excel.xls +0 -0
- data/spec/fixtures/data/excel2.xls +0 -0
- data/spec/fixtures/data/fixed_width.txt +3 -0
- data/spec/fixtures/data/multiple_delimited_1.txt +3 -0
- data/spec/fixtures/data/multiple_delimited_2.txt +3 -0
- data/spec/fixtures/data/nokogiri.xml +38 -0
- data/spec/fixtures/data/people.txt +3 -0
- data/spec/fixtures/data/sax.xml +14 -0
- data/spec/fixtures/data/xml.xml +16 -0
- data/spec/fixtures/delimited.ctl +30 -0
- data/spec/fixtures/delimited_absolute.ctl +31 -0
- data/spec/fixtures/delimited_destination_db.ctl +23 -0
- data/spec/fixtures/delimited_excel.ctl +31 -0
- data/spec/fixtures/delimited_insert_update.ctl +34 -0
- data/spec/fixtures/delimited_update.ctl +34 -0
- data/spec/fixtures/delimited_with_bulk_load.ctl +34 -0
- data/spec/fixtures/errors.ctl +24 -0
- data/spec/fixtures/excel.ctl +24 -0
- data/spec/fixtures/excel2.ctl +25 -0
- data/spec/fixtures/fixed_width.ctl +35 -0
- data/spec/fixtures/inline_parser.ctl +17 -0
- data/spec/fixtures/model_source.ctl +14 -0
- data/spec/fixtures/multiple_delimited.ctl +22 -0
- data/spec/fixtures/multiple_source_delimited.ctl +39 -0
- data/spec/fixtures/nokogiri_all.ctl +35 -0
- data/spec/fixtures/nokogiri_select.ctl +35 -0
- data/spec/fixtures/output/.ignore +1 -0
- data/spec/fixtures/output/delimited.txt +3 -0
- data/spec/fixtures/output/encode_destination_utf-8.txt +2 -0
- data/spec/fixtures/output/fixed_width.txt +3 -0
- data/spec/fixtures/output/inline_parser.txt +3 -0
- data/spec/fixtures/output/multiple_source_delimited.txt +6 -0
- data/spec/fixtures/output/test_excel_destination.xls +0 -0
- data/spec/fixtures/output/test_file_destination.2.txt +2 -0
- data/spec/fixtures/output/test_file_destination.txt +2 -0
- data/spec/fixtures/output/test_multiple_unique.txt +1 -0
- data/spec/fixtures/output/test_unique.txt +2 -0
- data/spec/fixtures/sax.ctl +26 -0
- data/spec/fixtures/scd/1.txt +1 -0
- data/spec/fixtures/scd/2.txt +1 -0
- data/spec/fixtures/scd/3.txt +1 -0
- data/spec/fixtures/scd_test_type_1.ctl +43 -0
- data/spec/fixtures/scd_test_type_2.ctl +34 -0
- data/spec/fixtures/screen_test_error.ctl +3 -0
- data/spec/fixtures/screen_test_fatal.ctl +3 -0
- data/spec/fixtures/xml.ctl +31 -0
- data/spec/quality_spec.rb +11 -0
- data/spec/spec_helper.rb +10 -0
- data/spec/support/custom_fixtures.rb +54 -0
- data/spec/support/custom_matchers.rb +54 -0
- data/test-matrix.yml +10 -0
- data/test/.gitignore +1 -0
- data/test/.ignore +2 -0
- data/test/batch_test.rb +41 -0
- data/test/block_processor_test.rb +38 -0
- data/test/check_exist_processor_test.rb +92 -0
- data/test/check_unique_processor_test.rb +40 -0
- data/test/config/Gemfile.rails-2.3.x +3 -0
- data/test/config/Gemfile.rails-2.3.x.lock +53 -0
- data/test/config/Gemfile.rails-3.0.x +3 -0
- data/test/config/Gemfile.rails-3.0.x.lock +61 -0
- data/test/config/common.rb +29 -0
- data/test/connection/mysql/connection.rb +9 -0
- data/test/connection/mysql/schema.sql +37 -0
- data/test/connection/postgresql/connection.rb +13 -0
- data/test/connection/postgresql/schema.sql +40 -0
- data/test/control_test.rb +43 -0
- data/test/database_join_processor_test.rb +43 -0
- data/test/date_dimension_builder_test.rb +96 -0
- data/test/destination_test.rb +275 -0
- data/test/directive_test.rb +23 -0
- data/test/encode_processor_test.rb +32 -0
- data/test/engine_test.rb +78 -0
- data/test/ensure_fields_presence_processor_test.rb +28 -0
- data/test/etl_test.rb +42 -0
- data/test/foreign_key_lookup_transform_test.rb +50 -0
- data/test/generator_test.rb +14 -0
- data/test/mocks/mock_destination.rb +26 -0
- data/test/mocks/mock_source.rb +25 -0
- data/test/nokogiri_test.rb +35 -0
- data/test/parser_test.rb +224 -0
- data/test/performance/delimited.ctl +30 -0
- data/test/processor_test.rb +44 -0
- data/test/row_processor_test.rb +17 -0
- data/test/scd_test.rb +257 -0
- data/test/screen_test.rb +9 -0
- data/test/source_test.rb +154 -0
- data/test/test_helper.rb +37 -0
- data/test/transform_test.rb +101 -0
- data/test/truncate_processor_test.rb +37 -0
- metadata +510 -0
@@ -0,0 +1,30 @@
|
|
1
|
+
# puts "executing delimited.ctl"
|
2
|
+
|
3
|
+
source :in, {
|
4
|
+
:file => 'delimited.txt',
|
5
|
+
:parser => :csv
|
6
|
+
},
|
7
|
+
[
|
8
|
+
:first_name,
|
9
|
+
:last_name,
|
10
|
+
:ssn,
|
11
|
+
{
|
12
|
+
:name => :age,
|
13
|
+
:type => :integer
|
14
|
+
},
|
15
|
+
:sex
|
16
|
+
]
|
17
|
+
|
18
|
+
transform :ssn, :sha1
|
19
|
+
transform(:ssn){ |v| v[0,24] }
|
20
|
+
transform :sex, :decode, {:decode_table_path => 'delimited_decode.txt'}
|
21
|
+
|
22
|
+
destination :out, {
|
23
|
+
:file => 'delimited.out.txt'
|
24
|
+
},
|
25
|
+
{
|
26
|
+
:order => [:first_name, :last_name, :name, :ssn, :age, :sex],
|
27
|
+
:virtual => {
|
28
|
+
:name => Proc.new { |row| "#{row[:first_name]} #{row[:last_name]}" }
|
29
|
+
}
|
30
|
+
}
|
@@ -0,0 +1,44 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/test_helper'
|
2
|
+
|
3
|
+
class Person < ActiveRecord::Base
|
4
|
+
end
|
5
|
+
|
6
|
+
# Test pre- and post-processors
|
7
|
+
class ProcessorTest < Test::Unit::TestCase
|
8
|
+
# Test bulk import functionality
|
9
|
+
|
10
|
+
context "the bulk import processor" do
|
11
|
+
should "should import successfully" do
|
12
|
+
assert_nothing_raised { do_bulk_import }
|
13
|
+
assert_equal 3, Person.count
|
14
|
+
assert_equal "Foxworthy", Person.find(2).last_name
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_bulk_import_with_empties
|
19
|
+
# this test ensure that one column with empty value will still allow
|
20
|
+
# the row to be imported
|
21
|
+
# this doesn't apply to the id column though - untested
|
22
|
+
assert_nothing_raised { do_bulk_import('bulk_import_with_empties.txt') }
|
23
|
+
assert_equal 3, Person.count
|
24
|
+
assert Person.find(2).last_name.blank?
|
25
|
+
end
|
26
|
+
|
27
|
+
def test_truncate
|
28
|
+
# TODO: implement test
|
29
|
+
end
|
30
|
+
|
31
|
+
private
|
32
|
+
|
33
|
+
def do_bulk_import(file = 'bulk_import.txt')
|
34
|
+
control = ETL::Control::Control.new(File.join(File.dirname(__FILE__), 'delimited.ctl'))
|
35
|
+
configuration = {
|
36
|
+
:file => "data/#{file}",
|
37
|
+
:truncate => true,
|
38
|
+
:target => :data_warehouse,
|
39
|
+
:table => 'people'
|
40
|
+
}
|
41
|
+
processor = ETL::Processor::BulkImportProcessor.new(control, configuration)
|
42
|
+
processor.process
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/test_helper'
|
2
|
+
|
3
|
+
# Test row processors
|
4
|
+
class RowProcessorTest < Test::Unit::TestCase
|
5
|
+
def test_copy_field_processor
|
6
|
+
|
7
|
+
end
|
8
|
+
def test_hierarchy_exploder_processor
|
9
|
+
|
10
|
+
end
|
11
|
+
def test_rename_processor
|
12
|
+
|
13
|
+
end
|
14
|
+
def test_sequence_processor
|
15
|
+
|
16
|
+
end
|
17
|
+
end
|
data/test/scd_test.rb
ADDED
@@ -0,0 +1,257 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/test_helper'
|
2
|
+
|
3
|
+
class ScdTest < Test::Unit::TestCase
|
4
|
+
context "when working with a slowly changing dimension" do
|
5
|
+
setup do
|
6
|
+
@connection = ETL::Engine.connection(:data_warehouse)
|
7
|
+
@connection.delete("DELETE FROM person_dimension")
|
8
|
+
@end_of_time = DateTime.parse('9999-12-31 00:00:00')
|
9
|
+
end
|
10
|
+
context "of type 1" do
|
11
|
+
context "on run 1" do
|
12
|
+
setup do
|
13
|
+
do_type_1_run(1)
|
14
|
+
end
|
15
|
+
should "insert record" do
|
16
|
+
assert_equal 1, count_bobs
|
17
|
+
end
|
18
|
+
should "set the original address" do
|
19
|
+
assert_boston_address(find_bobs.first)
|
20
|
+
end
|
21
|
+
should "set the original id" do
|
22
|
+
assert_equal 1, find_bobs.first.id
|
23
|
+
end
|
24
|
+
should "skip the load if there is no change" do
|
25
|
+
do_type_1_run(1)
|
26
|
+
lines = lines_for('scd_test_type_1.txt')
|
27
|
+
assert lines.empty?, "scheduled load expected to be empty, was #{lines.size} records"
|
28
|
+
end
|
29
|
+
end
|
30
|
+
context "on run 2" do
|
31
|
+
setup do
|
32
|
+
do_type_1_run(1)
|
33
|
+
do_type_1_run(2)
|
34
|
+
end
|
35
|
+
should "delete the old record" do
|
36
|
+
assert_equal 1, count_bobs, "new record created, but old not deleted: #{find_bobs.inspect}"
|
37
|
+
end
|
38
|
+
should "update the address" do
|
39
|
+
assert_los_angeles_address(find_bobs.last)
|
40
|
+
end
|
41
|
+
should "keep id" do
|
42
|
+
assert_equal 1, find_bobs.first.id
|
43
|
+
end
|
44
|
+
should "only change once even if run again" do
|
45
|
+
do_type_1_run(2)
|
46
|
+
assert_equal 1, count_bobs
|
47
|
+
lines = lines_for('scd_test_type_1.txt')
|
48
|
+
assert lines.empty?, "scheduled load expected to be empty, was #{lines.size} records"
|
49
|
+
end
|
50
|
+
should "revert address on new record" do
|
51
|
+
do_type_1_run(1)
|
52
|
+
assert_boston_address(find_bobs.first)
|
53
|
+
end
|
54
|
+
should "keep record on revert" do
|
55
|
+
do_type_1_run(1)
|
56
|
+
assert_equal 1, count_bobs
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
context "of type 2" do
|
61
|
+
context "on run 1" do
|
62
|
+
setup do
|
63
|
+
do_type_2_run(1)
|
64
|
+
end
|
65
|
+
should "insert record" do
|
66
|
+
assert_equal 1, count_bobs
|
67
|
+
end
|
68
|
+
should "set the original record" do
|
69
|
+
assert_boston_address(find_bobs.first)
|
70
|
+
end
|
71
|
+
should "set the original id" do
|
72
|
+
assert_equal 1, find_bobs.first.id
|
73
|
+
end
|
74
|
+
should "set the effective date" do
|
75
|
+
# doing comparison on strings, as comparison on objects
|
76
|
+
# doesn't consider things equal for some yet to be understood
|
77
|
+
# reason
|
78
|
+
assert_equal current_datetime.to_s, find_bobs.first.effective_date.to_s
|
79
|
+
end
|
80
|
+
should "set the end date" do
|
81
|
+
assert_equal @end_of_time, find_bobs.first.end_date
|
82
|
+
end
|
83
|
+
should "set the latest version flag" do
|
84
|
+
assert find_bobs.first.latest_version?
|
85
|
+
end
|
86
|
+
should "skip the load if there is no change" do
|
87
|
+
do_type_2_run(1)
|
88
|
+
assert_equal 1, find_bobs.last.id, "scheduled load expected to be empty"
|
89
|
+
end
|
90
|
+
|
91
|
+
end
|
92
|
+
context "on run 2" do
|
93
|
+
setup do
|
94
|
+
do_type_2_run(1)
|
95
|
+
do_type_2_run(2)
|
96
|
+
end
|
97
|
+
should "insert new record" do
|
98
|
+
assert_equal 2, count_bobs
|
99
|
+
end
|
100
|
+
should "keep the primary key of the original version" do
|
101
|
+
assert_not_nil find_bobs.detect { |bob| 1 == bob.id }
|
102
|
+
end
|
103
|
+
should "increment the primary key for the new version" do
|
104
|
+
assert_not_nil find_bobs.detect { |bob| 2 == bob.id }
|
105
|
+
end
|
106
|
+
should "expire the old record" do
|
107
|
+
original_bob = find_bobs.detect { |bob| 1 == bob.id }
|
108
|
+
new_bob = find_bobs.detect { |bob| 2 == bob.id }
|
109
|
+
assert_equal new_bob.effective_date, original_bob.end_date
|
110
|
+
end
|
111
|
+
should "keep the address for the expired record" do
|
112
|
+
assert_boston_address(find_bobs.detect { |bob| 1 == bob.id })
|
113
|
+
end
|
114
|
+
should "update the address on the new record" do
|
115
|
+
assert_los_angeles_address(find_bobs.detect { |bob| 2 == bob.id })
|
116
|
+
end
|
117
|
+
should "activate the new record" do
|
118
|
+
# doing comparison on strings, as comparison on objects
|
119
|
+
# doesn't consider things equal for some yet to be understood
|
120
|
+
# reason
|
121
|
+
assert_equal current_datetime.to_s, find_bobs.detect { |bob| 2 == bob.id }.effective_date.to_s
|
122
|
+
end
|
123
|
+
should "set the end date for the new record" do
|
124
|
+
assert_equal @end_of_time, find_bobs.detect { |bob| 2 == bob.id }.end_date
|
125
|
+
end
|
126
|
+
should "shift the latest version" do
|
127
|
+
original_bob = find_bobs.detect { |bob| 1 == bob.id }
|
128
|
+
new_bob = find_bobs.detect { |bob| 2 == bob.id }
|
129
|
+
assert !original_bob.latest_version?
|
130
|
+
assert new_bob.latest_version?
|
131
|
+
end
|
132
|
+
should "only execute a change once" do
|
133
|
+
do_type_2_run(2)
|
134
|
+
assert_equal 2, count_bobs, "scheduled load expected to be empty"
|
135
|
+
end
|
136
|
+
should "insert new records on revert" do
|
137
|
+
do_type_2_run(1)
|
138
|
+
assert_equal 3, count_bobs
|
139
|
+
end
|
140
|
+
should "update address on new record on revert" do
|
141
|
+
do_type_2_run(1)
|
142
|
+
assert_boston_address(find_bobs.detect { |bob| 3 == bob.id })
|
143
|
+
end
|
144
|
+
should "only delete one row on an scd change" do
|
145
|
+
# Two records right now
|
146
|
+
assert_equal 2, count_bobs
|
147
|
+
do_type_2_run(1) # put third version in (same as first version, but that's irrelevant)
|
148
|
+
# was failing because first and second versions were being deleted.
|
149
|
+
assert_equal 3, count_bobs
|
150
|
+
end
|
151
|
+
end
|
152
|
+
context "on non sdc fields that change" do
|
153
|
+
setup do
|
154
|
+
do_type_2_run_with_only_city_state_zip_scd(1)
|
155
|
+
do_type_2_run_with_only_city_state_zip_scd(2)
|
156
|
+
end
|
157
|
+
should "not create an extra record" do
|
158
|
+
do_type_2_run_with_only_city_state_zip_scd(3)
|
159
|
+
assert_equal 2, count_bobs
|
160
|
+
end
|
161
|
+
should "keep id" do
|
162
|
+
do_type_2_run_with_only_city_state_zip_scd(3)
|
163
|
+
assert_not_nil find_bobs.detect { |bob| 2 == bob.id }
|
164
|
+
end
|
165
|
+
should "keep dates" do
|
166
|
+
old_bob = find_bobs.detect { |bob| 2 == bob.id }
|
167
|
+
do_type_2_run_with_only_city_state_zip_scd(3)
|
168
|
+
new_bob = find_bobs.detect { |bob| 2 == bob.id }
|
169
|
+
assert_equal old_bob.end_date, new_bob.end_date
|
170
|
+
assert_equal old_bob.effective_date, new_bob.effective_date
|
171
|
+
end
|
172
|
+
should "keep the latest version flag" do
|
173
|
+
do_type_2_run_with_only_city_state_zip_scd(3)
|
174
|
+
assert find_bobs.detect { |bob| 2 == bob.id }.latest_version?
|
175
|
+
end
|
176
|
+
should "treat non scd fields like type 1 fields" do
|
177
|
+
do_type_2_run_with_only_city_state_zip_scd(3)
|
178
|
+
assert_los_angeles_address(find_bobs.detect { |bob| 2 == bob.id }, "280 Pine Street")
|
179
|
+
end
|
180
|
+
should "skip load when there is no change" do
|
181
|
+
do_type_2_run_with_only_city_state_zip_scd(2)
|
182
|
+
assert_equal 2, count_bobs, "scheduled load expected to be empty"
|
183
|
+
end
|
184
|
+
end
|
185
|
+
end
|
186
|
+
end
|
187
|
+
|
188
|
+
def do_type_2_run(run_num)
|
189
|
+
ENV['run_number'] = run_num.to_s
|
190
|
+
assert_nothing_raised do
|
191
|
+
run_ctl_file("scd_test_type_2.ctl")
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
def do_type_2_run_with_only_city_state_zip_scd(run_num)
|
196
|
+
ENV['type_2_scd_fields'] = Marshal.dump([:city, :state, :zip_code])
|
197
|
+
do_type_2_run(run_num)
|
198
|
+
end
|
199
|
+
|
200
|
+
def do_type_1_run(run_num)
|
201
|
+
ENV['run_number'] = run_num.to_s
|
202
|
+
assert_nothing_raised do
|
203
|
+
run_ctl_file("scd_test_type_1.ctl")
|
204
|
+
end
|
205
|
+
end
|
206
|
+
|
207
|
+
def lines_for(file)
|
208
|
+
File.readlines(File.dirname(__FILE__) + "/output/#{file}")
|
209
|
+
end
|
210
|
+
|
211
|
+
def run_ctl_file(file)
|
212
|
+
ETL::Engine.process(File.dirname(__FILE__) + "/#{file}")
|
213
|
+
end
|
214
|
+
|
215
|
+
def count_bobs
|
216
|
+
@connection.select_value(
|
217
|
+
"SELECT count(*) FROM person_dimension WHERE first_name = 'Bob' and last_name = 'Smith'").to_i
|
218
|
+
end
|
219
|
+
|
220
|
+
def find_bobs
|
221
|
+
bobs = @connection.select_all(
|
222
|
+
"SELECT * FROM person_dimension WHERE first_name = 'Bob' and last_name = 'Smith'")
|
223
|
+
bobs.each do |bob|
|
224
|
+
def bob.id
|
225
|
+
self["id"].to_i
|
226
|
+
end
|
227
|
+
def bob.effective_date
|
228
|
+
DateTime.parse(self["effective_date"])
|
229
|
+
end
|
230
|
+
def bob.end_date
|
231
|
+
DateTime.parse(self["end_date"])
|
232
|
+
end
|
233
|
+
def bob.latest_version?
|
234
|
+
ActiveRecord::ConnectionAdapters::Column.value_to_boolean(self["latest_version"])
|
235
|
+
end
|
236
|
+
end
|
237
|
+
bobs
|
238
|
+
end
|
239
|
+
|
240
|
+
def current_datetime
|
241
|
+
DateTime.parse(Time.now.to_s(:db))
|
242
|
+
end
|
243
|
+
|
244
|
+
def assert_boston_address(bob, street = "200 South Drive")
|
245
|
+
assert_equal street, bob['address'], bob.inspect
|
246
|
+
assert_equal "Boston", bob['city'], bob.inspect
|
247
|
+
assert_equal "MA", bob['state'], bob.inspect
|
248
|
+
assert_equal "32123", bob['zip_code'], bob.inspect
|
249
|
+
end
|
250
|
+
|
251
|
+
def assert_los_angeles_address(bob, street = "1010 SW 23rd St")
|
252
|
+
assert_equal street, bob['address'], bob.inspect
|
253
|
+
assert_equal "Los Angeles", bob['city'], bob.inspect
|
254
|
+
assert_equal "CA", bob['state'], bob.inspect
|
255
|
+
assert_equal "90392", bob['zip_code'], bob.inspect
|
256
|
+
end
|
257
|
+
end
|
data/test/screen_test.rb
ADDED
data/test/source_test.rb
ADDED
@@ -0,0 +1,154 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/test_helper'
|
2
|
+
|
3
|
+
class Person < ActiveRecord::Base
|
4
|
+
end
|
5
|
+
class SourceTest < Test::Unit::TestCase
|
6
|
+
|
7
|
+
context "source" do
|
8
|
+
should "set store_locally to true by default" do
|
9
|
+
assert_equal true, Source.new(nil, { :store_locally => true }, nil).store_locally
|
10
|
+
end
|
11
|
+
|
12
|
+
should "let the user set store_locally to true" do
|
13
|
+
assert_equal true, Source.new(nil, { :store_locally => true }, nil).store_locally
|
14
|
+
end
|
15
|
+
|
16
|
+
should "let the user set store_locally to false" do
|
17
|
+
assert_equal false, Source.new(nil, { :store_locally => false }, nil).store_locally
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
context "a file source" do
|
22
|
+
context "with delimited data" do
|
23
|
+
setup do
|
24
|
+
control = ETL::Control::Control.parse(File.dirname(__FILE__) + '/delimited.ctl')
|
25
|
+
configuration = {
|
26
|
+
:file => 'data/delimited.txt',
|
27
|
+
:parser => :csv
|
28
|
+
}
|
29
|
+
definition = self.definition + [:sex]
|
30
|
+
|
31
|
+
source = ETL::Control::FileSource.new(control, configuration, definition)
|
32
|
+
@rows = source.collect { |row| row }
|
33
|
+
end
|
34
|
+
should "find 3 rows in the delimited file" do
|
35
|
+
assert_equal 3, @rows.length
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
context "a file source with a glob" do
|
41
|
+
setup do
|
42
|
+
control = ETL::Control::Control.parse(File.dirname(__FILE__) + '/multiple_delimited.ctl')
|
43
|
+
configuration = {
|
44
|
+
:file => 'data/multiple_delimited_*.txt',
|
45
|
+
:parser => :csv
|
46
|
+
}
|
47
|
+
|
48
|
+
source = ETL::Control::FileSource.new(control, configuration, definition)
|
49
|
+
@rows = source.collect { |row| row }
|
50
|
+
end
|
51
|
+
should "find 6 rows in total" do
|
52
|
+
assert_equal 6, @rows.length
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
context "a file source with an absolute path" do
|
57
|
+
setup do
|
58
|
+
FileUtils.cp(File.dirname(__FILE__) + '/data/delimited.txt', '/tmp/delimited_abs.txt')
|
59
|
+
|
60
|
+
control = ETL::Control::Control.parse(File.dirname(__FILE__) +
|
61
|
+
'/delimited_absolute.ctl')
|
62
|
+
configuration = {
|
63
|
+
:file => '/tmp/delimited_abs.txt',
|
64
|
+
:parser => :csv
|
65
|
+
}
|
66
|
+
definition = self.definition + [:sex]
|
67
|
+
|
68
|
+
source = ETL::Control::FileSource.new(control, configuration, definition)
|
69
|
+
@rows = source.collect { |row| row }
|
70
|
+
end
|
71
|
+
should "find 3 rows" do
|
72
|
+
assert_equal 3, @rows.length
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
context "multiple sources" do
|
77
|
+
setup do
|
78
|
+
control = ETL::Control::Control.parse(File.dirname(__FILE__) +
|
79
|
+
'/multiple_source_delimited.ctl')
|
80
|
+
@rows = control.sources.collect { |source| source.collect { |row| row }}.flatten!
|
81
|
+
end
|
82
|
+
should "find 12 rows" do
|
83
|
+
assert_equal 12, @rows.length
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
context "a database source" do
|
88
|
+
setup do
|
89
|
+
control = ETL::Control::Control.parse(File.dirname(__FILE__) + '/delimited.ctl')
|
90
|
+
configuration = {
|
91
|
+
:database => 'etl_unittest',
|
92
|
+
:target => :operational_database,
|
93
|
+
:table => 'people',
|
94
|
+
}
|
95
|
+
definition = [
|
96
|
+
:first_name,
|
97
|
+
:last_name,
|
98
|
+
:ssn,
|
99
|
+
]
|
100
|
+
@source = ETL::Control::DatabaseSource.new(control, configuration, definition)
|
101
|
+
end
|
102
|
+
should "set the local file for extraction storage" do
|
103
|
+
assert_match %r{source_data/localhost/etl_unittest/people/\d+.csv}, @source.local_file.to_s
|
104
|
+
end
|
105
|
+
should "find 1 row" do
|
106
|
+
Person.delete_all
|
107
|
+
assert_equal 0, Person.count
|
108
|
+
Person.create!(:first_name => 'Bob', :last_name => 'Smith', :ssn => '123456789')
|
109
|
+
assert_equal 1, Person.count
|
110
|
+
rows = @source.collect { |row| row }
|
111
|
+
assert_equal 1, rows.length
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
context "a file source with an xml parser" do
|
116
|
+
setup do
|
117
|
+
control = ETL::Control::Control.parse(File.dirname(__FILE__) +
|
118
|
+
'/xml.ctl')
|
119
|
+
@rows = control.sources.collect{ |source| source.collect { |row| row }}.flatten!
|
120
|
+
end
|
121
|
+
should "find 2 rows" do
|
122
|
+
assert_equal 2, @rows.length
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
context "a model source" do
|
127
|
+
setup do
|
128
|
+
control = ETL::Control::Control.parse(File.dirname(__FILE__) + '/model_source.ctl')
|
129
|
+
configuration = {
|
130
|
+
|
131
|
+
}
|
132
|
+
definition = [
|
133
|
+
:first_name,
|
134
|
+
:last_name,
|
135
|
+
:ssn
|
136
|
+
]
|
137
|
+
end
|
138
|
+
should_eventually "find n rows" do
|
139
|
+
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
def definition
|
144
|
+
[
|
145
|
+
:first_name,
|
146
|
+
:last_name,
|
147
|
+
:ssn,
|
148
|
+
{
|
149
|
+
:name => :age,
|
150
|
+
:type => :integer
|
151
|
+
}
|
152
|
+
]
|
153
|
+
end
|
154
|
+
end
|