etl 0.9.5.rc1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (215) hide show
  1. data/.gitignore +12 -0
  2. data/.yardopts +5 -0
  3. data/0.9-UPGRADE +6 -0
  4. data/CHANGELOG +236 -0
  5. data/Gemfile +4 -0
  6. data/HOW_TO_RELEASE +13 -0
  7. data/LICENSE +7 -0
  8. data/README.textile +111 -0
  9. data/Rakefile +105 -0
  10. data/TODO +28 -0
  11. data/activewarehouse-etl.gemspec +38 -0
  12. data/bin/etl +28 -0
  13. data/bin/etl.cmd +8 -0
  14. data/examples/database.example.yml +16 -0
  15. data/lib/etl.rb +97 -0
  16. data/lib/etl/batch.rb +2 -0
  17. data/lib/etl/batch/batch.rb +111 -0
  18. data/lib/etl/batch/directives.rb +65 -0
  19. data/lib/etl/builder.rb +2 -0
  20. data/lib/etl/builder/date_dimension_builder.rb +96 -0
  21. data/lib/etl/builder/time_dimension_builder.rb +31 -0
  22. data/lib/etl/commands/etl.rb +89 -0
  23. data/lib/etl/control.rb +3 -0
  24. data/lib/etl/control/control.rb +405 -0
  25. data/lib/etl/control/destination.rb +438 -0
  26. data/lib/etl/control/destination/csv_destination.rb +113 -0
  27. data/lib/etl/control/destination/database_destination.rb +97 -0
  28. data/lib/etl/control/destination/excel_destination.rb +91 -0
  29. data/lib/etl/control/destination/file_destination.rb +126 -0
  30. data/lib/etl/control/destination/insert_update_database_destination.rb +136 -0
  31. data/lib/etl/control/destination/update_database_destination.rb +109 -0
  32. data/lib/etl/control/destination/yaml_destination.rb +74 -0
  33. data/lib/etl/control/source.rb +132 -0
  34. data/lib/etl/control/source/database_source.rb +224 -0
  35. data/lib/etl/control/source/enumerable_source.rb +11 -0
  36. data/lib/etl/control/source/file_source.rb +90 -0
  37. data/lib/etl/control/source/model_source.rb +39 -0
  38. data/lib/etl/core_ext.rb +1 -0
  39. data/lib/etl/core_ext/time.rb +5 -0
  40. data/lib/etl/core_ext/time/calculations.rb +42 -0
  41. data/lib/etl/engine.rb +582 -0
  42. data/lib/etl/execution.rb +19 -0
  43. data/lib/etl/execution/base.rb +8 -0
  44. data/lib/etl/execution/batch.rb +10 -0
  45. data/lib/etl/execution/job.rb +8 -0
  46. data/lib/etl/execution/migration.rb +90 -0
  47. data/lib/etl/generator.rb +2 -0
  48. data/lib/etl/generator/generator.rb +20 -0
  49. data/lib/etl/generator/surrogate_key_generator.rb +39 -0
  50. data/lib/etl/http_tools.rb +139 -0
  51. data/lib/etl/parser.rb +11 -0
  52. data/lib/etl/parser/apache_combined_log_parser.rb +49 -0
  53. data/lib/etl/parser/csv_parser.rb +93 -0
  54. data/lib/etl/parser/excel_parser.rb +112 -0
  55. data/lib/etl/parser/fixed_width_parser.rb +65 -0
  56. data/lib/etl/parser/nokogiri_xml_parser.rb +83 -0
  57. data/lib/etl/parser/parser.rb +41 -0
  58. data/lib/etl/parser/sax_parser.rb +218 -0
  59. data/lib/etl/parser/xml_parser.rb +65 -0
  60. data/lib/etl/processor.rb +11 -0
  61. data/lib/etl/processor/block_processor.rb +14 -0
  62. data/lib/etl/processor/bulk_import_processor.rb +94 -0
  63. data/lib/etl/processor/check_exist_processor.rb +80 -0
  64. data/lib/etl/processor/check_unique_processor.rb +39 -0
  65. data/lib/etl/processor/copy_field_processor.rb +26 -0
  66. data/lib/etl/processor/database_join_processor.rb +82 -0
  67. data/lib/etl/processor/encode_processor.rb +55 -0
  68. data/lib/etl/processor/ensure_fields_presence_processor.rb +24 -0
  69. data/lib/etl/processor/escape_csv_processor.rb +77 -0
  70. data/lib/etl/processor/filter_row_processor.rb +51 -0
  71. data/lib/etl/processor/ftp_downloader_processor.rb +68 -0
  72. data/lib/etl/processor/ftp_uploader_processor.rb +65 -0
  73. data/lib/etl/processor/hierarchy_exploder_processor.rb +55 -0
  74. data/lib/etl/processor/imapattachment_downloader_processor.rb +91 -0
  75. data/lib/etl/processor/pop3attachment_downloader_processor.rb +90 -0
  76. data/lib/etl/processor/print_row_processor.rb +12 -0
  77. data/lib/etl/processor/processor.rb +25 -0
  78. data/lib/etl/processor/rename_processor.rb +24 -0
  79. data/lib/etl/processor/require_non_blank_processor.rb +26 -0
  80. data/lib/etl/processor/row_processor.rb +27 -0
  81. data/lib/etl/processor/sequence_processor.rb +23 -0
  82. data/lib/etl/processor/sftp_downloader_processor.rb +63 -0
  83. data/lib/etl/processor/sftp_uploader_processor.rb +63 -0
  84. data/lib/etl/processor/surrogate_key_processor.rb +53 -0
  85. data/lib/etl/processor/truncate_processor.rb +40 -0
  86. data/lib/etl/processor/zip_file_processor.rb +27 -0
  87. data/lib/etl/row.rb +20 -0
  88. data/lib/etl/screen.rb +14 -0
  89. data/lib/etl/screen/row_count_screen.rb +20 -0
  90. data/lib/etl/transform.rb +2 -0
  91. data/lib/etl/transform/block_transform.rb +13 -0
  92. data/lib/etl/transform/calculation_transform.rb +71 -0
  93. data/lib/etl/transform/date_to_string_transform.rb +20 -0
  94. data/lib/etl/transform/decode_transform.rb +51 -0
  95. data/lib/etl/transform/default_transform.rb +20 -0
  96. data/lib/etl/transform/foreign_key_lookup_transform.rb +211 -0
  97. data/lib/etl/transform/hierarchy_lookup_transform.rb +49 -0
  98. data/lib/etl/transform/md5_transform.rb +13 -0
  99. data/lib/etl/transform/ordinalize_transform.rb +14 -0
  100. data/lib/etl/transform/sha1_transform.rb +13 -0
  101. data/lib/etl/transform/split_fields_transform.rb +27 -0
  102. data/lib/etl/transform/string_to_date_time_transform.rb +14 -0
  103. data/lib/etl/transform/string_to_date_transform.rb +16 -0
  104. data/lib/etl/transform/string_to_time_transform.rb +11 -0
  105. data/lib/etl/transform/transform.rb +61 -0
  106. data/lib/etl/transform/trim_transform.rb +26 -0
  107. data/lib/etl/transform/type_transform.rb +35 -0
  108. data/lib/etl/util.rb +59 -0
  109. data/lib/etl/version.rb +3 -0
  110. data/spec/fixtures/all.ebf +6 -0
  111. data/spec/fixtures/apache_combined_log.ctl +11 -0
  112. data/spec/fixtures/batch_with_error.ebf +6 -0
  113. data/spec/fixtures/batched1.ctl +0 -0
  114. data/spec/fixtures/batched2.ctl +0 -0
  115. data/spec/fixtures/block_processor.ctl +6 -0
  116. data/spec/fixtures/block_processor_error.ctl +1 -0
  117. data/spec/fixtures/block_processor_pre_post_process.ctl +4 -0
  118. data/spec/fixtures/block_processor_remove_rows.ctl +5 -0
  119. data/spec/fixtures/data/apache_combined_log.txt +3 -0
  120. data/spec/fixtures/data/bulk_import.txt +3 -0
  121. data/spec/fixtures/data/bulk_import_with_empties.txt +3 -0
  122. data/spec/fixtures/data/decode.txt +3 -0
  123. data/spec/fixtures/data/delimited.txt +3 -0
  124. data/spec/fixtures/data/encode_source_latin1.txt +2 -0
  125. data/spec/fixtures/data/excel.xls +0 -0
  126. data/spec/fixtures/data/excel2.xls +0 -0
  127. data/spec/fixtures/data/fixed_width.txt +3 -0
  128. data/spec/fixtures/data/multiple_delimited_1.txt +3 -0
  129. data/spec/fixtures/data/multiple_delimited_2.txt +3 -0
  130. data/spec/fixtures/data/nokogiri.xml +38 -0
  131. data/spec/fixtures/data/people.txt +3 -0
  132. data/spec/fixtures/data/sax.xml +14 -0
  133. data/spec/fixtures/data/xml.xml +16 -0
  134. data/spec/fixtures/delimited.ctl +30 -0
  135. data/spec/fixtures/delimited_absolute.ctl +31 -0
  136. data/spec/fixtures/delimited_destination_db.ctl +23 -0
  137. data/spec/fixtures/delimited_excel.ctl +31 -0
  138. data/spec/fixtures/delimited_insert_update.ctl +34 -0
  139. data/spec/fixtures/delimited_update.ctl +34 -0
  140. data/spec/fixtures/delimited_with_bulk_load.ctl +34 -0
  141. data/spec/fixtures/errors.ctl +24 -0
  142. data/spec/fixtures/excel.ctl +24 -0
  143. data/spec/fixtures/excel2.ctl +25 -0
  144. data/spec/fixtures/fixed_width.ctl +35 -0
  145. data/spec/fixtures/inline_parser.ctl +17 -0
  146. data/spec/fixtures/model_source.ctl +14 -0
  147. data/spec/fixtures/multiple_delimited.ctl +22 -0
  148. data/spec/fixtures/multiple_source_delimited.ctl +39 -0
  149. data/spec/fixtures/nokogiri_all.ctl +35 -0
  150. data/spec/fixtures/nokogiri_select.ctl +35 -0
  151. data/spec/fixtures/output/.ignore +1 -0
  152. data/spec/fixtures/output/delimited.txt +3 -0
  153. data/spec/fixtures/output/encode_destination_utf-8.txt +2 -0
  154. data/spec/fixtures/output/fixed_width.txt +3 -0
  155. data/spec/fixtures/output/inline_parser.txt +3 -0
  156. data/spec/fixtures/output/multiple_source_delimited.txt +6 -0
  157. data/spec/fixtures/output/test_excel_destination.xls +0 -0
  158. data/spec/fixtures/output/test_file_destination.2.txt +2 -0
  159. data/spec/fixtures/output/test_file_destination.txt +2 -0
  160. data/spec/fixtures/output/test_multiple_unique.txt +1 -0
  161. data/spec/fixtures/output/test_unique.txt +2 -0
  162. data/spec/fixtures/sax.ctl +26 -0
  163. data/spec/fixtures/scd/1.txt +1 -0
  164. data/spec/fixtures/scd/2.txt +1 -0
  165. data/spec/fixtures/scd/3.txt +1 -0
  166. data/spec/fixtures/scd_test_type_1.ctl +43 -0
  167. data/spec/fixtures/scd_test_type_2.ctl +34 -0
  168. data/spec/fixtures/screen_test_error.ctl +3 -0
  169. data/spec/fixtures/screen_test_fatal.ctl +3 -0
  170. data/spec/fixtures/xml.ctl +31 -0
  171. data/spec/quality_spec.rb +11 -0
  172. data/spec/spec_helper.rb +10 -0
  173. data/spec/support/custom_fixtures.rb +54 -0
  174. data/spec/support/custom_matchers.rb +54 -0
  175. data/test-matrix.yml +10 -0
  176. data/test/.gitignore +1 -0
  177. data/test/.ignore +2 -0
  178. data/test/batch_test.rb +41 -0
  179. data/test/block_processor_test.rb +38 -0
  180. data/test/check_exist_processor_test.rb +92 -0
  181. data/test/check_unique_processor_test.rb +40 -0
  182. data/test/config/Gemfile.rails-2.3.x +3 -0
  183. data/test/config/Gemfile.rails-2.3.x.lock +53 -0
  184. data/test/config/Gemfile.rails-3.0.x +3 -0
  185. data/test/config/Gemfile.rails-3.0.x.lock +61 -0
  186. data/test/config/common.rb +29 -0
  187. data/test/connection/mysql/connection.rb +9 -0
  188. data/test/connection/mysql/schema.sql +37 -0
  189. data/test/connection/postgresql/connection.rb +13 -0
  190. data/test/connection/postgresql/schema.sql +40 -0
  191. data/test/control_test.rb +43 -0
  192. data/test/database_join_processor_test.rb +43 -0
  193. data/test/date_dimension_builder_test.rb +96 -0
  194. data/test/destination_test.rb +275 -0
  195. data/test/directive_test.rb +23 -0
  196. data/test/encode_processor_test.rb +32 -0
  197. data/test/engine_test.rb +78 -0
  198. data/test/ensure_fields_presence_processor_test.rb +28 -0
  199. data/test/etl_test.rb +42 -0
  200. data/test/foreign_key_lookup_transform_test.rb +50 -0
  201. data/test/generator_test.rb +14 -0
  202. data/test/mocks/mock_destination.rb +26 -0
  203. data/test/mocks/mock_source.rb +25 -0
  204. data/test/nokogiri_test.rb +35 -0
  205. data/test/parser_test.rb +224 -0
  206. data/test/performance/delimited.ctl +30 -0
  207. data/test/processor_test.rb +44 -0
  208. data/test/row_processor_test.rb +17 -0
  209. data/test/scd_test.rb +257 -0
  210. data/test/screen_test.rb +9 -0
  211. data/test/source_test.rb +154 -0
  212. data/test/test_helper.rb +37 -0
  213. data/test/transform_test.rb +101 -0
  214. data/test/truncate_processor_test.rb +37 -0
  215. metadata +510 -0
@@ -0,0 +1,30 @@
1
+ # puts "executing delimited.ctl"
2
+
3
+ source :in, {
4
+ :file => 'delimited.txt',
5
+ :parser => :csv
6
+ },
7
+ [
8
+ :first_name,
9
+ :last_name,
10
+ :ssn,
11
+ {
12
+ :name => :age,
13
+ :type => :integer
14
+ },
15
+ :sex
16
+ ]
17
+
18
+ transform :ssn, :sha1
19
+ transform(:ssn){ |v| v[0,24] }
20
+ transform :sex, :decode, {:decode_table_path => 'delimited_decode.txt'}
21
+
22
+ destination :out, {
23
+ :file => 'delimited.out.txt'
24
+ },
25
+ {
26
+ :order => [:first_name, :last_name, :name, :ssn, :age, :sex],
27
+ :virtual => {
28
+ :name => Proc.new { |row| "#{row[:first_name]} #{row[:last_name]}" }
29
+ }
30
+ }
@@ -0,0 +1,44 @@
1
+ require File.dirname(__FILE__) + '/test_helper'
2
+
3
+ class Person < ActiveRecord::Base
4
+ end
5
+
6
+ # Test pre- and post-processors
7
+ class ProcessorTest < Test::Unit::TestCase
8
+ # Test bulk import functionality
9
+
10
+ context "the bulk import processor" do
11
+ should "should import successfully" do
12
+ assert_nothing_raised { do_bulk_import }
13
+ assert_equal 3, Person.count
14
+ assert_equal "Foxworthy", Person.find(2).last_name
15
+ end
16
+ end
17
+
18
+ def test_bulk_import_with_empties
19
+ # this test ensure that one column with empty value will still allow
20
+ # the row to be imported
21
+ # this doesn't apply to the id column though - untested
22
+ assert_nothing_raised { do_bulk_import('bulk_import_with_empties.txt') }
23
+ assert_equal 3, Person.count
24
+ assert Person.find(2).last_name.blank?
25
+ end
26
+
27
+ def test_truncate
28
+ # TODO: implement test
29
+ end
30
+
31
+ private
32
+
33
+ def do_bulk_import(file = 'bulk_import.txt')
34
+ control = ETL::Control::Control.new(File.join(File.dirname(__FILE__), 'delimited.ctl'))
35
+ configuration = {
36
+ :file => "data/#{file}",
37
+ :truncate => true,
38
+ :target => :data_warehouse,
39
+ :table => 'people'
40
+ }
41
+ processor = ETL::Processor::BulkImportProcessor.new(control, configuration)
42
+ processor.process
43
+ end
44
+ end
@@ -0,0 +1,17 @@
1
+ require File.dirname(__FILE__) + '/test_helper'
2
+
3
+ # Test row processors
4
+ class RowProcessorTest < Test::Unit::TestCase
5
+ def test_copy_field_processor
6
+
7
+ end
8
+ def test_hierarchy_exploder_processor
9
+
10
+ end
11
+ def test_rename_processor
12
+
13
+ end
14
+ def test_sequence_processor
15
+
16
+ end
17
+ end
data/test/scd_test.rb ADDED
@@ -0,0 +1,257 @@
1
+ require File.dirname(__FILE__) + '/test_helper'
2
+
3
+ class ScdTest < Test::Unit::TestCase
4
+ context "when working with a slowly changing dimension" do
5
+ setup do
6
+ @connection = ETL::Engine.connection(:data_warehouse)
7
+ @connection.delete("DELETE FROM person_dimension")
8
+ @end_of_time = DateTime.parse('9999-12-31 00:00:00')
9
+ end
10
+ context "of type 1" do
11
+ context "on run 1" do
12
+ setup do
13
+ do_type_1_run(1)
14
+ end
15
+ should "insert record" do
16
+ assert_equal 1, count_bobs
17
+ end
18
+ should "set the original address" do
19
+ assert_boston_address(find_bobs.first)
20
+ end
21
+ should "set the original id" do
22
+ assert_equal 1, find_bobs.first.id
23
+ end
24
+ should "skip the load if there is no change" do
25
+ do_type_1_run(1)
26
+ lines = lines_for('scd_test_type_1.txt')
27
+ assert lines.empty?, "scheduled load expected to be empty, was #{lines.size} records"
28
+ end
29
+ end
30
+ context "on run 2" do
31
+ setup do
32
+ do_type_1_run(1)
33
+ do_type_1_run(2)
34
+ end
35
+ should "delete the old record" do
36
+ assert_equal 1, count_bobs, "new record created, but old not deleted: #{find_bobs.inspect}"
37
+ end
38
+ should "update the address" do
39
+ assert_los_angeles_address(find_bobs.last)
40
+ end
41
+ should "keep id" do
42
+ assert_equal 1, find_bobs.first.id
43
+ end
44
+ should "only change once even if run again" do
45
+ do_type_1_run(2)
46
+ assert_equal 1, count_bobs
47
+ lines = lines_for('scd_test_type_1.txt')
48
+ assert lines.empty?, "scheduled load expected to be empty, was #{lines.size} records"
49
+ end
50
+ should "revert address on new record" do
51
+ do_type_1_run(1)
52
+ assert_boston_address(find_bobs.first)
53
+ end
54
+ should "keep record on revert" do
55
+ do_type_1_run(1)
56
+ assert_equal 1, count_bobs
57
+ end
58
+ end
59
+ end
60
+ context "of type 2" do
61
+ context "on run 1" do
62
+ setup do
63
+ do_type_2_run(1)
64
+ end
65
+ should "insert record" do
66
+ assert_equal 1, count_bobs
67
+ end
68
+ should "set the original record" do
69
+ assert_boston_address(find_bobs.first)
70
+ end
71
+ should "set the original id" do
72
+ assert_equal 1, find_bobs.first.id
73
+ end
74
+ should "set the effective date" do
75
+ # doing comparison on strings, as comparison on objects
76
+ # doesn't consider things equal for some yet to be understood
77
+ # reason
78
+ assert_equal current_datetime.to_s, find_bobs.first.effective_date.to_s
79
+ end
80
+ should "set the end date" do
81
+ assert_equal @end_of_time, find_bobs.first.end_date
82
+ end
83
+ should "set the latest version flag" do
84
+ assert find_bobs.first.latest_version?
85
+ end
86
+ should "skip the load if there is no change" do
87
+ do_type_2_run(1)
88
+ assert_equal 1, find_bobs.last.id, "scheduled load expected to be empty"
89
+ end
90
+
91
+ end
92
+ context "on run 2" do
93
+ setup do
94
+ do_type_2_run(1)
95
+ do_type_2_run(2)
96
+ end
97
+ should "insert new record" do
98
+ assert_equal 2, count_bobs
99
+ end
100
+ should "keep the primary key of the original version" do
101
+ assert_not_nil find_bobs.detect { |bob| 1 == bob.id }
102
+ end
103
+ should "increment the primary key for the new version" do
104
+ assert_not_nil find_bobs.detect { |bob| 2 == bob.id }
105
+ end
106
+ should "expire the old record" do
107
+ original_bob = find_bobs.detect { |bob| 1 == bob.id }
108
+ new_bob = find_bobs.detect { |bob| 2 == bob.id }
109
+ assert_equal new_bob.effective_date, original_bob.end_date
110
+ end
111
+ should "keep the address for the expired record" do
112
+ assert_boston_address(find_bobs.detect { |bob| 1 == bob.id })
113
+ end
114
+ should "update the address on the new record" do
115
+ assert_los_angeles_address(find_bobs.detect { |bob| 2 == bob.id })
116
+ end
117
+ should "activate the new record" do
118
+ # doing comparison on strings, as comparison on objects
119
+ # doesn't consider things equal for some yet to be understood
120
+ # reason
121
+ assert_equal current_datetime.to_s, find_bobs.detect { |bob| 2 == bob.id }.effective_date.to_s
122
+ end
123
+ should "set the end date for the new record" do
124
+ assert_equal @end_of_time, find_bobs.detect { |bob| 2 == bob.id }.end_date
125
+ end
126
+ should "shift the latest version" do
127
+ original_bob = find_bobs.detect { |bob| 1 == bob.id }
128
+ new_bob = find_bobs.detect { |bob| 2 == bob.id }
129
+ assert !original_bob.latest_version?
130
+ assert new_bob.latest_version?
131
+ end
132
+ should "only execute a change once" do
133
+ do_type_2_run(2)
134
+ assert_equal 2, count_bobs, "scheduled load expected to be empty"
135
+ end
136
+ should "insert new records on revert" do
137
+ do_type_2_run(1)
138
+ assert_equal 3, count_bobs
139
+ end
140
+ should "update address on new record on revert" do
141
+ do_type_2_run(1)
142
+ assert_boston_address(find_bobs.detect { |bob| 3 == bob.id })
143
+ end
144
+ should "only delete one row on an scd change" do
145
+ # Two records right now
146
+ assert_equal 2, count_bobs
147
+ do_type_2_run(1) # put third version in (same as first version, but that's irrelevant)
148
+ # was failing because first and second versions were being deleted.
149
+ assert_equal 3, count_bobs
150
+ end
151
+ end
152
+ context "on non sdc fields that change" do
153
+ setup do
154
+ do_type_2_run_with_only_city_state_zip_scd(1)
155
+ do_type_2_run_with_only_city_state_zip_scd(2)
156
+ end
157
+ should "not create an extra record" do
158
+ do_type_2_run_with_only_city_state_zip_scd(3)
159
+ assert_equal 2, count_bobs
160
+ end
161
+ should "keep id" do
162
+ do_type_2_run_with_only_city_state_zip_scd(3)
163
+ assert_not_nil find_bobs.detect { |bob| 2 == bob.id }
164
+ end
165
+ should "keep dates" do
166
+ old_bob = find_bobs.detect { |bob| 2 == bob.id }
167
+ do_type_2_run_with_only_city_state_zip_scd(3)
168
+ new_bob = find_bobs.detect { |bob| 2 == bob.id }
169
+ assert_equal old_bob.end_date, new_bob.end_date
170
+ assert_equal old_bob.effective_date, new_bob.effective_date
171
+ end
172
+ should "keep the latest version flag" do
173
+ do_type_2_run_with_only_city_state_zip_scd(3)
174
+ assert find_bobs.detect { |bob| 2 == bob.id }.latest_version?
175
+ end
176
+ should "treat non scd fields like type 1 fields" do
177
+ do_type_2_run_with_only_city_state_zip_scd(3)
178
+ assert_los_angeles_address(find_bobs.detect { |bob| 2 == bob.id }, "280 Pine Street")
179
+ end
180
+ should "skip load when there is no change" do
181
+ do_type_2_run_with_only_city_state_zip_scd(2)
182
+ assert_equal 2, count_bobs, "scheduled load expected to be empty"
183
+ end
184
+ end
185
+ end
186
+ end
187
+
188
+ def do_type_2_run(run_num)
189
+ ENV['run_number'] = run_num.to_s
190
+ assert_nothing_raised do
191
+ run_ctl_file("scd_test_type_2.ctl")
192
+ end
193
+ end
194
+
195
+ def do_type_2_run_with_only_city_state_zip_scd(run_num)
196
+ ENV['type_2_scd_fields'] = Marshal.dump([:city, :state, :zip_code])
197
+ do_type_2_run(run_num)
198
+ end
199
+
200
+ def do_type_1_run(run_num)
201
+ ENV['run_number'] = run_num.to_s
202
+ assert_nothing_raised do
203
+ run_ctl_file("scd_test_type_1.ctl")
204
+ end
205
+ end
206
+
207
+ def lines_for(file)
208
+ File.readlines(File.dirname(__FILE__) + "/output/#{file}")
209
+ end
210
+
211
+ def run_ctl_file(file)
212
+ ETL::Engine.process(File.dirname(__FILE__) + "/#{file}")
213
+ end
214
+
215
+ def count_bobs
216
+ @connection.select_value(
217
+ "SELECT count(*) FROM person_dimension WHERE first_name = 'Bob' and last_name = 'Smith'").to_i
218
+ end
219
+
220
+ def find_bobs
221
+ bobs = @connection.select_all(
222
+ "SELECT * FROM person_dimension WHERE first_name = 'Bob' and last_name = 'Smith'")
223
+ bobs.each do |bob|
224
+ def bob.id
225
+ self["id"].to_i
226
+ end
227
+ def bob.effective_date
228
+ DateTime.parse(self["effective_date"])
229
+ end
230
+ def bob.end_date
231
+ DateTime.parse(self["end_date"])
232
+ end
233
+ def bob.latest_version?
234
+ ActiveRecord::ConnectionAdapters::Column.value_to_boolean(self["latest_version"])
235
+ end
236
+ end
237
+ bobs
238
+ end
239
+
240
+ def current_datetime
241
+ DateTime.parse(Time.now.to_s(:db))
242
+ end
243
+
244
+ def assert_boston_address(bob, street = "200 South Drive")
245
+ assert_equal street, bob['address'], bob.inspect
246
+ assert_equal "Boston", bob['city'], bob.inspect
247
+ assert_equal "MA", bob['state'], bob.inspect
248
+ assert_equal "32123", bob['zip_code'], bob.inspect
249
+ end
250
+
251
+ def assert_los_angeles_address(bob, street = "1010 SW 23rd St")
252
+ assert_equal street, bob['address'], bob.inspect
253
+ assert_equal "Los Angeles", bob['city'], bob.inspect
254
+ assert_equal "CA", bob['state'], bob.inspect
255
+ assert_equal "90392", bob['zip_code'], bob.inspect
256
+ end
257
+ end
@@ -0,0 +1,9 @@
1
+ require File.dirname(__FILE__) + '/test_helper'
2
+
3
+ class ScreenTest < Test::Unit::TestCase
4
+ def test_screen
5
+ assert_raises(SystemExit) do
6
+ ETL::Engine.process(File.dirname(__FILE__) + '/screen_test_fatal.ctl')
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,154 @@
1
+ require File.dirname(__FILE__) + '/test_helper'
2
+
3
+ class Person < ActiveRecord::Base
4
+ end
5
+ class SourceTest < Test::Unit::TestCase
6
+
7
+ context "source" do
8
+ should "set store_locally to true by default" do
9
+ assert_equal true, Source.new(nil, { :store_locally => true }, nil).store_locally
10
+ end
11
+
12
+ should "let the user set store_locally to true" do
13
+ assert_equal true, Source.new(nil, { :store_locally => true }, nil).store_locally
14
+ end
15
+
16
+ should "let the user set store_locally to false" do
17
+ assert_equal false, Source.new(nil, { :store_locally => false }, nil).store_locally
18
+ end
19
+ end
20
+
21
+ context "a file source" do
22
+ context "with delimited data" do
23
+ setup do
24
+ control = ETL::Control::Control.parse(File.dirname(__FILE__) + '/delimited.ctl')
25
+ configuration = {
26
+ :file => 'data/delimited.txt',
27
+ :parser => :csv
28
+ }
29
+ definition = self.definition + [:sex]
30
+
31
+ source = ETL::Control::FileSource.new(control, configuration, definition)
32
+ @rows = source.collect { |row| row }
33
+ end
34
+ should "find 3 rows in the delimited file" do
35
+ assert_equal 3, @rows.length
36
+ end
37
+ end
38
+ end
39
+
40
+ context "a file source with a glob" do
41
+ setup do
42
+ control = ETL::Control::Control.parse(File.dirname(__FILE__) + '/multiple_delimited.ctl')
43
+ configuration = {
44
+ :file => 'data/multiple_delimited_*.txt',
45
+ :parser => :csv
46
+ }
47
+
48
+ source = ETL::Control::FileSource.new(control, configuration, definition)
49
+ @rows = source.collect { |row| row }
50
+ end
51
+ should "find 6 rows in total" do
52
+ assert_equal 6, @rows.length
53
+ end
54
+ end
55
+
56
+ context "a file source with an absolute path" do
57
+ setup do
58
+ FileUtils.cp(File.dirname(__FILE__) + '/data/delimited.txt', '/tmp/delimited_abs.txt')
59
+
60
+ control = ETL::Control::Control.parse(File.dirname(__FILE__) +
61
+ '/delimited_absolute.ctl')
62
+ configuration = {
63
+ :file => '/tmp/delimited_abs.txt',
64
+ :parser => :csv
65
+ }
66
+ definition = self.definition + [:sex]
67
+
68
+ source = ETL::Control::FileSource.new(control, configuration, definition)
69
+ @rows = source.collect { |row| row }
70
+ end
71
+ should "find 3 rows" do
72
+ assert_equal 3, @rows.length
73
+ end
74
+ end
75
+
76
+ context "multiple sources" do
77
+ setup do
78
+ control = ETL::Control::Control.parse(File.dirname(__FILE__) +
79
+ '/multiple_source_delimited.ctl')
80
+ @rows = control.sources.collect { |source| source.collect { |row| row }}.flatten!
81
+ end
82
+ should "find 12 rows" do
83
+ assert_equal 12, @rows.length
84
+ end
85
+ end
86
+
87
+ context "a database source" do
88
+ setup do
89
+ control = ETL::Control::Control.parse(File.dirname(__FILE__) + '/delimited.ctl')
90
+ configuration = {
91
+ :database => 'etl_unittest',
92
+ :target => :operational_database,
93
+ :table => 'people',
94
+ }
95
+ definition = [
96
+ :first_name,
97
+ :last_name,
98
+ :ssn,
99
+ ]
100
+ @source = ETL::Control::DatabaseSource.new(control, configuration, definition)
101
+ end
102
+ should "set the local file for extraction storage" do
103
+ assert_match %r{source_data/localhost/etl_unittest/people/\d+.csv}, @source.local_file.to_s
104
+ end
105
+ should "find 1 row" do
106
+ Person.delete_all
107
+ assert_equal 0, Person.count
108
+ Person.create!(:first_name => 'Bob', :last_name => 'Smith', :ssn => '123456789')
109
+ assert_equal 1, Person.count
110
+ rows = @source.collect { |row| row }
111
+ assert_equal 1, rows.length
112
+ end
113
+ end
114
+
115
+ context "a file source with an xml parser" do
116
+ setup do
117
+ control = ETL::Control::Control.parse(File.dirname(__FILE__) +
118
+ '/xml.ctl')
119
+ @rows = control.sources.collect{ |source| source.collect { |row| row }}.flatten!
120
+ end
121
+ should "find 2 rows" do
122
+ assert_equal 2, @rows.length
123
+ end
124
+ end
125
+
126
+ context "a model source" do
127
+ setup do
128
+ control = ETL::Control::Control.parse(File.dirname(__FILE__) + '/model_source.ctl')
129
+ configuration = {
130
+
131
+ }
132
+ definition = [
133
+ :first_name,
134
+ :last_name,
135
+ :ssn
136
+ ]
137
+ end
138
+ should_eventually "find n rows" do
139
+
140
+ end
141
+ end
142
+
143
+ def definition
144
+ [
145
+ :first_name,
146
+ :last_name,
147
+ :ssn,
148
+ {
149
+ :name => :age,
150
+ :type => :integer
151
+ }
152
+ ]
153
+ end
154
+ end