activewarehouse-etl-sgonyea 0.9.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (200) hide show
  1. data/.gitignore +9 -0
  2. data/0.9-UPGRADE +6 -0
  3. data/CHANGELOG +236 -0
  4. data/Gemfile +4 -0
  5. data/HOW_TO_RELEASE +13 -0
  6. data/LICENSE +7 -0
  7. data/README.textile +111 -0
  8. data/Rakefile +103 -0
  9. data/TODO +28 -0
  10. data/active_support_logger.patch +78 -0
  11. data/activewarehouse-etl.gemspec +36 -0
  12. data/bin/etl +28 -0
  13. data/bin/etl.cmd +8 -0
  14. data/examples/database.example.yml +16 -0
  15. data/lib/etl.rb +97 -0
  16. data/lib/etl/batch.rb +2 -0
  17. data/lib/etl/batch/batch.rb +111 -0
  18. data/lib/etl/batch/directives.rb +65 -0
  19. data/lib/etl/builder.rb +2 -0
  20. data/lib/etl/builder/date_dimension_builder.rb +96 -0
  21. data/lib/etl/builder/time_dimension_builder.rb +31 -0
  22. data/lib/etl/commands/etl.rb +89 -0
  23. data/lib/etl/control.rb +3 -0
  24. data/lib/etl/control/control.rb +405 -0
  25. data/lib/etl/control/destination.rb +438 -0
  26. data/lib/etl/control/destination/csv_destination.rb +113 -0
  27. data/lib/etl/control/destination/database_destination.rb +97 -0
  28. data/lib/etl/control/destination/excel_destination.rb +91 -0
  29. data/lib/etl/control/destination/file_destination.rb +126 -0
  30. data/lib/etl/control/destination/insert_update_database_destination.rb +136 -0
  31. data/lib/etl/control/destination/update_database_destination.rb +109 -0
  32. data/lib/etl/control/destination/yaml_destination.rb +74 -0
  33. data/lib/etl/control/source.rb +132 -0
  34. data/lib/etl/control/source/database_source.rb +224 -0
  35. data/lib/etl/control/source/enumerable_source.rb +11 -0
  36. data/lib/etl/control/source/file_source.rb +90 -0
  37. data/lib/etl/control/source/model_source.rb +39 -0
  38. data/lib/etl/core_ext.rb +1 -0
  39. data/lib/etl/core_ext/time.rb +5 -0
  40. data/lib/etl/core_ext/time/calculations.rb +42 -0
  41. data/lib/etl/engine.rb +582 -0
  42. data/lib/etl/execution.rb +19 -0
  43. data/lib/etl/execution/base.rb +8 -0
  44. data/lib/etl/execution/batch.rb +10 -0
  45. data/lib/etl/execution/job.rb +8 -0
  46. data/lib/etl/execution/migration.rb +90 -0
  47. data/lib/etl/generator.rb +2 -0
  48. data/lib/etl/generator/generator.rb +20 -0
  49. data/lib/etl/generator/surrogate_key_generator.rb +39 -0
  50. data/lib/etl/http_tools.rb +139 -0
  51. data/lib/etl/parser.rb +11 -0
  52. data/lib/etl/parser/apache_combined_log_parser.rb +49 -0
  53. data/lib/etl/parser/csv_parser.rb +93 -0
  54. data/lib/etl/parser/excel_parser.rb +112 -0
  55. data/lib/etl/parser/fixed_width_parser.rb +65 -0
  56. data/lib/etl/parser/nokogiri_xml_parser.rb +83 -0
  57. data/lib/etl/parser/parser.rb +41 -0
  58. data/lib/etl/parser/sax_parser.rb +218 -0
  59. data/lib/etl/parser/xml_parser.rb +65 -0
  60. data/lib/etl/processor.rb +11 -0
  61. data/lib/etl/processor/block_processor.rb +14 -0
  62. data/lib/etl/processor/bulk_import_processor.rb +94 -0
  63. data/lib/etl/processor/check_exist_processor.rb +80 -0
  64. data/lib/etl/processor/check_unique_processor.rb +39 -0
  65. data/lib/etl/processor/copy_field_processor.rb +26 -0
  66. data/lib/etl/processor/database_join_processor.rb +82 -0
  67. data/lib/etl/processor/encode_processor.rb +55 -0
  68. data/lib/etl/processor/ensure_fields_presence_processor.rb +24 -0
  69. data/lib/etl/processor/escape_csv_processor.rb +77 -0
  70. data/lib/etl/processor/filter_row_processor.rb +51 -0
  71. data/lib/etl/processor/ftp_downloader_processor.rb +68 -0
  72. data/lib/etl/processor/ftp_uploader_processor.rb +65 -0
  73. data/lib/etl/processor/hierarchy_exploder_processor.rb +55 -0
  74. data/lib/etl/processor/imapattachment_downloader_processor.rb +91 -0
  75. data/lib/etl/processor/pop3attachment_downloader_processor.rb +90 -0
  76. data/lib/etl/processor/print_row_processor.rb +12 -0
  77. data/lib/etl/processor/processor.rb +25 -0
  78. data/lib/etl/processor/rename_processor.rb +24 -0
  79. data/lib/etl/processor/require_non_blank_processor.rb +26 -0
  80. data/lib/etl/processor/row_processor.rb +27 -0
  81. data/lib/etl/processor/sequence_processor.rb +23 -0
  82. data/lib/etl/processor/sftp_downloader_processor.rb +63 -0
  83. data/lib/etl/processor/sftp_uploader_processor.rb +63 -0
  84. data/lib/etl/processor/surrogate_key_processor.rb +53 -0
  85. data/lib/etl/processor/truncate_processor.rb +40 -0
  86. data/lib/etl/processor/zip_file_processor.rb +27 -0
  87. data/lib/etl/row.rb +20 -0
  88. data/lib/etl/screen.rb +14 -0
  89. data/lib/etl/screen/row_count_screen.rb +20 -0
  90. data/lib/etl/transform.rb +2 -0
  91. data/lib/etl/transform/block_transform.rb +13 -0
  92. data/lib/etl/transform/calculation_transform.rb +71 -0
  93. data/lib/etl/transform/date_to_string_transform.rb +20 -0
  94. data/lib/etl/transform/decode_transform.rb +51 -0
  95. data/lib/etl/transform/default_transform.rb +20 -0
  96. data/lib/etl/transform/foreign_key_lookup_transform.rb +211 -0
  97. data/lib/etl/transform/hierarchy_lookup_transform.rb +49 -0
  98. data/lib/etl/transform/md5_transform.rb +13 -0
  99. data/lib/etl/transform/ordinalize_transform.rb +14 -0
  100. data/lib/etl/transform/sha1_transform.rb +13 -0
  101. data/lib/etl/transform/split_fields_transform.rb +27 -0
  102. data/lib/etl/transform/string_to_date_time_transform.rb +14 -0
  103. data/lib/etl/transform/string_to_date_transform.rb +16 -0
  104. data/lib/etl/transform/string_to_time_transform.rb +11 -0
  105. data/lib/etl/transform/transform.rb +61 -0
  106. data/lib/etl/transform/trim_transform.rb +26 -0
  107. data/lib/etl/transform/type_transform.rb +35 -0
  108. data/lib/etl/util.rb +59 -0
  109. data/lib/etl/version.rb +3 -0
  110. data/test-matrix.yml +10 -0
  111. data/test/.gitignore +1 -0
  112. data/test/.ignore +2 -0
  113. data/test/all.ebf +6 -0
  114. data/test/apache_combined_log.ctl +11 -0
  115. data/test/batch_test.rb +41 -0
  116. data/test/batch_with_error.ebf +6 -0
  117. data/test/batched1.ctl +0 -0
  118. data/test/batched2.ctl +0 -0
  119. data/test/block_processor.ctl +6 -0
  120. data/test/block_processor_error.ctl +1 -0
  121. data/test/block_processor_pre_post_process.ctl +4 -0
  122. data/test/block_processor_remove_rows.ctl +5 -0
  123. data/test/block_processor_test.rb +38 -0
  124. data/test/check_exist_processor_test.rb +92 -0
  125. data/test/check_unique_processor_test.rb +40 -0
  126. data/test/config/Gemfile.rails-2.3.x +3 -0
  127. data/test/config/Gemfile.rails-2.3.x.lock +53 -0
  128. data/test/config/Gemfile.rails-3.0.x +3 -0
  129. data/test/config/Gemfile.rails-3.0.x.lock +61 -0
  130. data/test/config/common.rb +29 -0
  131. data/test/connection/mysql/connection.rb +9 -0
  132. data/test/connection/mysql/schema.sql +37 -0
  133. data/test/connection/postgresql/connection.rb +13 -0
  134. data/test/connection/postgresql/schema.sql +40 -0
  135. data/test/control_test.rb +43 -0
  136. data/test/data/apache_combined_log.txt +3 -0
  137. data/test/data/bulk_import.txt +3 -0
  138. data/test/data/bulk_import_with_empties.txt +3 -0
  139. data/test/data/decode.txt +3 -0
  140. data/test/data/delimited.txt +3 -0
  141. data/test/data/encode_source_latin1.txt +2 -0
  142. data/test/data/excel.xls +0 -0
  143. data/test/data/excel2.xls +0 -0
  144. data/test/data/fixed_width.txt +3 -0
  145. data/test/data/multiple_delimited_1.txt +3 -0
  146. data/test/data/multiple_delimited_2.txt +3 -0
  147. data/test/data/nokogiri.xml +38 -0
  148. data/test/data/people.txt +3 -0
  149. data/test/data/sax.xml +14 -0
  150. data/test/data/xml.xml +16 -0
  151. data/test/database_join_processor_test.rb +43 -0
  152. data/test/date_dimension_builder_test.rb +96 -0
  153. data/test/delimited.ctl +30 -0
  154. data/test/delimited_absolute.ctl +31 -0
  155. data/test/delimited_destination_db.ctl +23 -0
  156. data/test/delimited_excel.ctl +31 -0
  157. data/test/delimited_insert_update.ctl +34 -0
  158. data/test/delimited_update.ctl +34 -0
  159. data/test/delimited_with_bulk_load.ctl +34 -0
  160. data/test/destination_test.rb +275 -0
  161. data/test/directive_test.rb +23 -0
  162. data/test/encode_processor_test.rb +32 -0
  163. data/test/engine_test.rb +78 -0
  164. data/test/ensure_fields_presence_processor_test.rb +28 -0
  165. data/test/errors.ctl +24 -0
  166. data/test/etl_test.rb +42 -0
  167. data/test/excel.ctl +24 -0
  168. data/test/excel2.ctl +25 -0
  169. data/test/fixed_width.ctl +35 -0
  170. data/test/foreign_key_lookup_transform_test.rb +50 -0
  171. data/test/generator_test.rb +14 -0
  172. data/test/inline_parser.ctl +17 -0
  173. data/test/mocks/mock_destination.rb +26 -0
  174. data/test/mocks/mock_source.rb +25 -0
  175. data/test/model_source.ctl +14 -0
  176. data/test/multiple_delimited.ctl +22 -0
  177. data/test/multiple_source_delimited.ctl +39 -0
  178. data/test/nokogiri_all.ctl +35 -0
  179. data/test/nokogiri_select.ctl +35 -0
  180. data/test/nokogiri_test.rb +35 -0
  181. data/test/parser_test.rb +224 -0
  182. data/test/performance/delimited.ctl +30 -0
  183. data/test/processor_test.rb +44 -0
  184. data/test/row_processor_test.rb +17 -0
  185. data/test/sax.ctl +26 -0
  186. data/test/scd/1.txt +1 -0
  187. data/test/scd/2.txt +1 -0
  188. data/test/scd/3.txt +1 -0
  189. data/test/scd_test.rb +257 -0
  190. data/test/scd_test_type_1.ctl +43 -0
  191. data/test/scd_test_type_2.ctl +34 -0
  192. data/test/screen_test.rb +9 -0
  193. data/test/screen_test_error.ctl +3 -0
  194. data/test/screen_test_fatal.ctl +3 -0
  195. data/test/source_test.rb +154 -0
  196. data/test/test_helper.rb +37 -0
  197. data/test/transform_test.rb +101 -0
  198. data/test/truncate_processor_test.rb +37 -0
  199. data/test/xml.ctl +31 -0
  200. metadata +370 -0
@@ -0,0 +1,43 @@
1
+ source :in, {
2
+ :file => "scd/#{ENV['run_number']}.txt",
3
+ :parser => :csv
4
+ },
5
+ [
6
+ :first_name,
7
+ :last_name,
8
+ :address,
9
+ :city,
10
+ :state,
11
+ :zip_code
12
+ ]
13
+
14
+ # NOTE: These are not usually required for a type 1 SCD dimension, but since
15
+ # we're sharing this table with the type 2 tests, they're necessary.
16
+ transform :effective_date, :default, :default_value => Time.now.to_s(:db)
17
+ transform :end_date, :default, :default_value => '9999-12-31 00:00:00'
18
+ transform :latest_version, :default, :default_value => true
19
+
20
+ destination :out, {
21
+ :file => 'output/scd_test_type_1.txt',
22
+ :natural_key => [:first_name, :last_name],
23
+ :scd => {
24
+ :type => 1,
25
+ :dimension_target => :data_warehouse,
26
+ :dimension_table => 'person_dimension'
27
+ },
28
+ :scd_fields => [:address, :city, :state, :zip_code]
29
+ },
30
+ {
31
+ :order => [
32
+ :id, :first_name, :last_name, :address, :city, :state, :zip_code, :effective_date, :end_date, :latest_version
33
+ ],
34
+ :virtual => {
35
+ :id => ETL::Generator::SurrogateKeyGenerator.new(:target => :data_warehouse, :table => 'person_dimension')
36
+ }
37
+ }
38
+
39
+ post_process :bulk_import, {
40
+ :file => 'output/scd_test_type_1.txt',
41
+ :target => :data_warehouse,
42
+ :table => 'person_dimension'
43
+ }
@@ -0,0 +1,34 @@
1
+ source :in, {
2
+ :file => "scd/#{ENV['run_number']}.txt",
3
+ :parser => :csv
4
+ },
5
+ [
6
+ :first_name,
7
+ :last_name,
8
+ :address,
9
+ :city,
10
+ :state,
11
+ :zip_code
12
+ ]
13
+
14
+ destination :out, {
15
+ :type => :database,
16
+ :target => :data_warehouse,
17
+ :database => 'etl_unittest',
18
+ :table => 'person_dimension',
19
+ :natural_key => [:first_name, :last_name],
20
+ :scd => {
21
+ :type => 2,
22
+ :dimension_target => :data_warehouse,
23
+ :dimension_table => 'person_dimension'
24
+ },
25
+ :scd_fields => ENV['type_2_scd_fields'] ? Marshal.load(ENV['type_2_scd_fields']) : [:address, :city, :state, :zip_code]
26
+ },
27
+ {
28
+ :order => [
29
+ :id, :first_name, :last_name, :address, :city, :state, :zip_code, :effective_date, :end_date, :latest_version
30
+ ],
31
+ :virtual => {
32
+ :id => ETL::Generator::SurrogateKeyGenerator.new(:target => :data_warehouse, :table => 'person_dimension')
33
+ }
34
+ }
@@ -0,0 +1,9 @@
1
+ require File.dirname(__FILE__) + '/test_helper'
2
+
3
+ class ScreenTest < Test::Unit::TestCase
4
+ def test_screen
5
+ assert_raises(SystemExit) do
6
+ ETL::Engine.process(File.dirname(__FILE__) + '/screen_test_fatal.ctl')
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,3 @@
1
+ screen(:error){
2
+ ETL::Screen::RowCountScreen.new(self, :rows => 1)
3
+ }
@@ -0,0 +1,3 @@
1
+ screen(:fatal){
2
+ ETL::Screen::RowCountScreen.new(self, :rows => 1)
3
+ }
@@ -0,0 +1,154 @@
1
+ require File.dirname(__FILE__) + '/test_helper'
2
+
3
+ class Person < ActiveRecord::Base
4
+ end
5
+ class SourceTest < Test::Unit::TestCase
6
+
7
+ context "source" do
8
+ should "set store_locally to true by default" do
9
+ assert_equal true, Source.new(nil, { :store_locally => true }, nil).store_locally
10
+ end
11
+
12
+ should "let the user set store_locally to true" do
13
+ assert_equal true, Source.new(nil, { :store_locally => true }, nil).store_locally
14
+ end
15
+
16
+ should "let the user set store_locally to false" do
17
+ assert_equal false, Source.new(nil, { :store_locally => false }, nil).store_locally
18
+ end
19
+ end
20
+
21
+ context "a file source" do
22
+ context "with delimited data" do
23
+ setup do
24
+ control = ETL::Control::Control.parse(File.dirname(__FILE__) + '/delimited.ctl')
25
+ configuration = {
26
+ :file => 'data/delimited.txt',
27
+ :parser => :csv
28
+ }
29
+ definition = self.definition + [:sex]
30
+
31
+ source = ETL::Control::FileSource.new(control, configuration, definition)
32
+ @rows = source.collect { |row| row }
33
+ end
34
+ should "find 3 rows in the delimited file" do
35
+ assert_equal 3, @rows.length
36
+ end
37
+ end
38
+ end
39
+
40
+ context "a file source with a glob" do
41
+ setup do
42
+ control = ETL::Control::Control.parse(File.dirname(__FILE__) + '/multiple_delimited.ctl')
43
+ configuration = {
44
+ :file => 'data/multiple_delimited_*.txt',
45
+ :parser => :csv
46
+ }
47
+
48
+ source = ETL::Control::FileSource.new(control, configuration, definition)
49
+ @rows = source.collect { |row| row }
50
+ end
51
+ should "find 6 rows in total" do
52
+ assert_equal 6, @rows.length
53
+ end
54
+ end
55
+
56
+ context "a file source with an absolute path" do
57
+ setup do
58
+ FileUtils.cp(File.dirname(__FILE__) + '/data/delimited.txt', '/tmp/delimited_abs.txt')
59
+
60
+ control = ETL::Control::Control.parse(File.dirname(__FILE__) +
61
+ '/delimited_absolute.ctl')
62
+ configuration = {
63
+ :file => '/tmp/delimited_abs.txt',
64
+ :parser => :csv
65
+ }
66
+ definition = self.definition + [:sex]
67
+
68
+ source = ETL::Control::FileSource.new(control, configuration, definition)
69
+ @rows = source.collect { |row| row }
70
+ end
71
+ should "find 3 rows" do
72
+ assert_equal 3, @rows.length
73
+ end
74
+ end
75
+
76
+ context "multiple sources" do
77
+ setup do
78
+ control = ETL::Control::Control.parse(File.dirname(__FILE__) +
79
+ '/multiple_source_delimited.ctl')
80
+ @rows = control.sources.collect { |source| source.collect { |row| row }}.flatten!
81
+ end
82
+ should "find 12 rows" do
83
+ assert_equal 12, @rows.length
84
+ end
85
+ end
86
+
87
+ context "a database source" do
88
+ setup do
89
+ control = ETL::Control::Control.parse(File.dirname(__FILE__) + '/delimited.ctl')
90
+ configuration = {
91
+ :database => 'etl_unittest',
92
+ :target => :operational_database,
93
+ :table => 'people',
94
+ }
95
+ definition = [
96
+ :first_name,
97
+ :last_name,
98
+ :ssn,
99
+ ]
100
+ @source = ETL::Control::DatabaseSource.new(control, configuration, definition)
101
+ end
102
+ should "set the local file for extraction storage" do
103
+ assert_match %r{source_data/localhost/etl_unittest/people/\d+.csv}, @source.local_file.to_s
104
+ end
105
+ should "find 1 row" do
106
+ Person.delete_all
107
+ assert_equal 0, Person.count
108
+ Person.create!(:first_name => 'Bob', :last_name => 'Smith', :ssn => '123456789')
109
+ assert_equal 1, Person.count
110
+ rows = @source.collect { |row| row }
111
+ assert_equal 1, rows.length
112
+ end
113
+ end
114
+
115
+ context "a file source with an xml parser" do
116
+ setup do
117
+ control = ETL::Control::Control.parse(File.dirname(__FILE__) +
118
+ '/xml.ctl')
119
+ @rows = control.sources.collect{ |source| source.collect { |row| row }}.flatten!
120
+ end
121
+ should "find 2 rows" do
122
+ assert_equal 2, @rows.length
123
+ end
124
+ end
125
+
126
+ context "a model source" do
127
+ setup do
128
+ control = ETL::Control::Control.parse(File.dirname(__FILE__) + '/model_source.ctl')
129
+ configuration = {
130
+
131
+ }
132
+ definition = [
133
+ :first_name,
134
+ :last_name,
135
+ :ssn
136
+ ]
137
+ end
138
+ should_eventually "find n rows" do
139
+
140
+ end
141
+ end
142
+
143
+ def definition
144
+ [
145
+ :first_name,
146
+ :last_name,
147
+ :ssn,
148
+ {
149
+ :name => :age,
150
+ :type => :integer
151
+ }
152
+ ]
153
+ end
154
+ end
@@ -0,0 +1,37 @@
1
+ require 'rubygems'
2
+ require 'bundler/setup'
3
+
4
+ $:.unshift(File.dirname(__FILE__) + '/../lib')
5
+ $:.unshift(File.dirname(__FILE__))
6
+
7
+ require 'test/unit'
8
+ require 'pp'
9
+ require 'etl'
10
+ require 'shoulda'
11
+ require 'flexmock/test_unit'
12
+
13
+ database_yml = File.dirname(__FILE__) + '/config/database.yml'
14
+ ETL::Engine.init(:config => database_yml)
15
+ ETL::Engine.logger = Logger.new(STDOUT)
16
+ # ETL::Engine.logger.level = Logger::DEBUG
17
+ ETL::Engine.logger.level = Logger::FATAL
18
+
19
+ db = YAML::load(IO.read(database_yml))['operational_database']['adapter']
20
+ # allow both mysql2 and mysql adapters
21
+ db = db.gsub('mysql2', 'mysql')
22
+ raise "Unsupported test db '#{db}'" unless ['mysql', 'postgresql'].include?(db)
23
+
24
+ require "connection/#{db}/connection"
25
+ ActiveRecord::Base.establish_connection :operational_database
26
+ ETL::Execution::Job.delete_all
27
+
28
+ require 'mocks/mock_source'
29
+ require 'mocks/mock_destination'
30
+
31
+ # shortcut to launch a ctl file
32
+ def process(file)
33
+ Engine.process(File.join(File.dirname(__FILE__), file))
34
+ end
35
+
36
+ puts "ActiveRecord::VERSION = #{ActiveRecord::VERSION::STRING}"
37
+
@@ -0,0 +1,101 @@
1
+ require File.dirname(__FILE__) + '/test_helper'
2
+
3
+ class MyResolver
4
+ def resolve(value)
5
+ 4
6
+ end
7
+ end
8
+
9
+ class TransformTest < Test::Unit::TestCase
10
+ def test_sha1_transform
11
+ control = ETL::Control::Control.parse(File.dirname(__FILE__) + '/delimited.ctl')
12
+ digest_value = 'a9993e364706816aba3e25717850c26c9cd0d89d'
13
+ assert_equal digest_value, ETL::Transform::Sha1Transform.new(
14
+ control, nil
15
+ ).transform('test', 'abc', [])
16
+ end
17
+ def test_block_transform
18
+ #transforms = [Proc.new(){|name, value, row| value[0,2]}]
19
+ #assert_equal '11', ETL::Transform::Transform.transform(:ssn, '1111223333', [], transforms)
20
+ end
21
+ def test_decode_transform
22
+ control = ETL::Control::Control.parse(File.dirname(__FILE__) + '/delimited.ctl')
23
+ configuration = {:decode_table_path => 'data/decode.txt'}
24
+
25
+ t = ETL::Transform::DecodeTransform.new(control, nil, configuration)
26
+
27
+ assert_equal 'Male', t.transform(nil, 'M', [])
28
+ assert_equal 'Female', t.transform(nil, 'F', [])
29
+ assert_equal 'Unknown', t.transform(nil, '', [])
30
+ assert_equal 'Unknown', t.transform(nil, 'blah', [])
31
+ end
32
+ def test_string_to_date_transform
33
+ control = ETL::Control::Control.parse(File.dirname(__FILE__) + '/delimited.ctl')
34
+ t = ETL::Transform::StringToDateTransform.new(control, nil)
35
+
36
+ assert_equal Date.parse('2005-01-01'), t.transform(nil, '2005-01-01', [])
37
+ assert_equal Date.parse('2004-10-20 20:30:00'), t.transform(nil, '2004-10-20', [])
38
+ end
39
+ def test_date_to_string_transform
40
+ control = ETL::Control::Control.parse(File.dirname(__FILE__) + '/delimited.ctl')
41
+ t = ETL::Transform::DateToStringTransform.new(control, nil)
42
+
43
+ d1 = Date.parse('2005-01-01')
44
+ t1 = Time.parse('2004-10-20 23:03:23')
45
+ assert_equal '2005-01-01', t.transform(nil, d1, [])
46
+ assert_equal '2004-10-20', t.transform(nil, t1, [])
47
+
48
+ t = ETL::Transform::DateToStringTransform.new(control, nil, {:format => '%m/%d/%Y'})
49
+
50
+ assert_equal '01/01/2005', t.transform(nil, d1, [])
51
+ assert_equal '10/20/2004', t.transform(nil, t1, [])
52
+ end
53
+ def test_string_to_datetime_transform
54
+ v = '1/1/1900 04:34:30'
55
+ t = ETL::Transform::StringToDateTimeTransform.new(flexmock(:control), nil)
56
+ assert_equal DateTime.parse(v), t.transform(nil, v, nil)
57
+ end
58
+ def test_foreign_key_lookup_transform
59
+ control = ETL::Control::Control.parse(File.dirname(__FILE__) + '/delimited.ctl')
60
+ configuration = {:collection => {'foo' => 1, 'bar' => 2, 'baz' => 3}}
61
+ t = ETL::Transform::ForeignKeyLookupTransform.new(control, nil, configuration)
62
+
63
+ assert_equal 1, t.transform(nil, 'foo', nil)
64
+ assert_equal 2, t.transform(nil, 'bar', nil)
65
+ assert_equal 3, t.transform(nil, 'baz', nil)
66
+ assert_raises(ETL::ResolverError, 'Foreign key for bing not found and no resolver specified') do
67
+ assert_equal 4, t.transform(nil, 'bing', nil)
68
+ end
69
+
70
+ configuration = {:collection => {'foo' => 1, 'bar' => 2, 'baz' => 3}, :resolver => MyResolver}
71
+ t = ETL::Transform::ForeignKeyLookupTransform.new(control, nil, configuration)
72
+ assert_equal 1, t.transform(nil, 'foo', nil)
73
+ assert_equal 2, t.transform(nil, 'bar', nil)
74
+ assert_equal 3, t.transform(nil, 'baz', nil)
75
+ assert_equal 4, t.transform(nil, 'bing', nil)
76
+ end
77
+ def test_type_transform
78
+ control = ETL::Control::Control.parse(File.dirname(__FILE__) + '/delimited.ctl')
79
+ assert_equal 10, ETL::Transform::TypeTransform.new(control, nil, {:type => :number}).transform(nil, '10', nil)
80
+
81
+ assert_equal BigDecimal::ROUND_HALF_UP, BigDecimal.mode(BigDecimal::ROUND_MODE)
82
+ decimal_transformed = ETL::Transform::TypeTransform.new(
83
+ control, nil, {:type => :decimal, :scale => 4}
84
+ ).transform(nil, '10.0000000000000000000000000000000001', nil)
85
+ assert_equal '10.0000000000000000000000000000000001', decimal_transformed.to_s('F')
86
+ end
87
+ def test_non_existent_transformer
88
+
89
+ end
90
+ def test_default_transform
91
+ t = ETL::Transform::DefaultTransform.new(flexmock('control'), nil, {:default_value => 'foo'})
92
+ assert_equal 'foo', t.transform(nil, '', nil)
93
+ assert_equal 'foo', t.transform(nil, nil, nil)
94
+ assert_equal 'bar', t.transform(nil, 'bar', nil)
95
+ end
96
+ def test_ordinalize_transform
97
+ t = ETL::Transform::OrdinalizeTransform.new(flexmock('control'), nil, {})
98
+ assert_equal '1st', t.transform(nil, 1, nil)
99
+ assert_equal '10th', t.transform(nil, 10, nil)
100
+ end
101
+ end
@@ -0,0 +1,37 @@
1
+ require File.dirname(__FILE__) + '/test_helper'
2
+
3
+ include ETL::Processor
4
+
5
+ class TruncateTest < ActiveRecord::Base
6
+ set_table_name 'truncate_test'
7
+ end
8
+
9
+ class TruncateProcessorTest < Test::Unit::TestCase
10
+
11
+ def create_item!
12
+ TruncateTest.create!(:x => 'ABC')
13
+ end
14
+
15
+ def truncate!(options=nil)
16
+ TruncateProcessor.new(nil,
17
+ :target => :data_warehouse,
18
+ :table => TruncateTest.table_name,
19
+ :options => options
20
+ ).process
21
+ end
22
+
23
+ should 'reset ids by default' do
24
+ create_item!
25
+ truncate!
26
+ assert_equal 1, create_item!.id
27
+ end
28
+
29
+ if ETL::Engine.connection(:data_warehouse).class.name =~ /postgres/i
30
+ should 'allow disabling id reset for postgres' do
31
+ truncate!
32
+ create_item!
33
+ truncate!('CONTINUE IDENTITY')
34
+ assert_equal 2, create_item!.id
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,31 @@
1
+ # puts "executing fixed_width.ctl"
2
+
3
+ source :in, {
4
+ :file => 'data/xml.xml',
5
+ :parser => :xml
6
+ },
7
+ {
8
+ :collection => 'people/person',
9
+ :fields => [
10
+ :first_name,
11
+ :last_name,
12
+ {
13
+ :name => :ssn,
14
+ :xpath => 'social_security_number'
15
+ },
16
+ {
17
+ :name => :age,
18
+ :type => :integer
19
+ }
20
+ ]
21
+ }
22
+
23
+ destination :out, {
24
+ :file => 'output/xml.txt'
25
+ },
26
+ {
27
+ :order => [:first_name, :last_name, :ssn]
28
+ }
29
+
30
+ transform :ssn, :sha1
31
+ transform(:ssn){ |v| v[0,24] }