activewarehouse-etl-sgonyea 0.9.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (200) hide show
  1. data/.gitignore +9 -0
  2. data/0.9-UPGRADE +6 -0
  3. data/CHANGELOG +236 -0
  4. data/Gemfile +4 -0
  5. data/HOW_TO_RELEASE +13 -0
  6. data/LICENSE +7 -0
  7. data/README.textile +111 -0
  8. data/Rakefile +103 -0
  9. data/TODO +28 -0
  10. data/active_support_logger.patch +78 -0
  11. data/activewarehouse-etl.gemspec +36 -0
  12. data/bin/etl +28 -0
  13. data/bin/etl.cmd +8 -0
  14. data/examples/database.example.yml +16 -0
  15. data/lib/etl.rb +97 -0
  16. data/lib/etl/batch.rb +2 -0
  17. data/lib/etl/batch/batch.rb +111 -0
  18. data/lib/etl/batch/directives.rb +65 -0
  19. data/lib/etl/builder.rb +2 -0
  20. data/lib/etl/builder/date_dimension_builder.rb +96 -0
  21. data/lib/etl/builder/time_dimension_builder.rb +31 -0
  22. data/lib/etl/commands/etl.rb +89 -0
  23. data/lib/etl/control.rb +3 -0
  24. data/lib/etl/control/control.rb +405 -0
  25. data/lib/etl/control/destination.rb +438 -0
  26. data/lib/etl/control/destination/csv_destination.rb +113 -0
  27. data/lib/etl/control/destination/database_destination.rb +97 -0
  28. data/lib/etl/control/destination/excel_destination.rb +91 -0
  29. data/lib/etl/control/destination/file_destination.rb +126 -0
  30. data/lib/etl/control/destination/insert_update_database_destination.rb +136 -0
  31. data/lib/etl/control/destination/update_database_destination.rb +109 -0
  32. data/lib/etl/control/destination/yaml_destination.rb +74 -0
  33. data/lib/etl/control/source.rb +132 -0
  34. data/lib/etl/control/source/database_source.rb +224 -0
  35. data/lib/etl/control/source/enumerable_source.rb +11 -0
  36. data/lib/etl/control/source/file_source.rb +90 -0
  37. data/lib/etl/control/source/model_source.rb +39 -0
  38. data/lib/etl/core_ext.rb +1 -0
  39. data/lib/etl/core_ext/time.rb +5 -0
  40. data/lib/etl/core_ext/time/calculations.rb +42 -0
  41. data/lib/etl/engine.rb +582 -0
  42. data/lib/etl/execution.rb +19 -0
  43. data/lib/etl/execution/base.rb +8 -0
  44. data/lib/etl/execution/batch.rb +10 -0
  45. data/lib/etl/execution/job.rb +8 -0
  46. data/lib/etl/execution/migration.rb +90 -0
  47. data/lib/etl/generator.rb +2 -0
  48. data/lib/etl/generator/generator.rb +20 -0
  49. data/lib/etl/generator/surrogate_key_generator.rb +39 -0
  50. data/lib/etl/http_tools.rb +139 -0
  51. data/lib/etl/parser.rb +11 -0
  52. data/lib/etl/parser/apache_combined_log_parser.rb +49 -0
  53. data/lib/etl/parser/csv_parser.rb +93 -0
  54. data/lib/etl/parser/excel_parser.rb +112 -0
  55. data/lib/etl/parser/fixed_width_parser.rb +65 -0
  56. data/lib/etl/parser/nokogiri_xml_parser.rb +83 -0
  57. data/lib/etl/parser/parser.rb +41 -0
  58. data/lib/etl/parser/sax_parser.rb +218 -0
  59. data/lib/etl/parser/xml_parser.rb +65 -0
  60. data/lib/etl/processor.rb +11 -0
  61. data/lib/etl/processor/block_processor.rb +14 -0
  62. data/lib/etl/processor/bulk_import_processor.rb +94 -0
  63. data/lib/etl/processor/check_exist_processor.rb +80 -0
  64. data/lib/etl/processor/check_unique_processor.rb +39 -0
  65. data/lib/etl/processor/copy_field_processor.rb +26 -0
  66. data/lib/etl/processor/database_join_processor.rb +82 -0
  67. data/lib/etl/processor/encode_processor.rb +55 -0
  68. data/lib/etl/processor/ensure_fields_presence_processor.rb +24 -0
  69. data/lib/etl/processor/escape_csv_processor.rb +77 -0
  70. data/lib/etl/processor/filter_row_processor.rb +51 -0
  71. data/lib/etl/processor/ftp_downloader_processor.rb +68 -0
  72. data/lib/etl/processor/ftp_uploader_processor.rb +65 -0
  73. data/lib/etl/processor/hierarchy_exploder_processor.rb +55 -0
  74. data/lib/etl/processor/imapattachment_downloader_processor.rb +91 -0
  75. data/lib/etl/processor/pop3attachment_downloader_processor.rb +90 -0
  76. data/lib/etl/processor/print_row_processor.rb +12 -0
  77. data/lib/etl/processor/processor.rb +25 -0
  78. data/lib/etl/processor/rename_processor.rb +24 -0
  79. data/lib/etl/processor/require_non_blank_processor.rb +26 -0
  80. data/lib/etl/processor/row_processor.rb +27 -0
  81. data/lib/etl/processor/sequence_processor.rb +23 -0
  82. data/lib/etl/processor/sftp_downloader_processor.rb +63 -0
  83. data/lib/etl/processor/sftp_uploader_processor.rb +63 -0
  84. data/lib/etl/processor/surrogate_key_processor.rb +53 -0
  85. data/lib/etl/processor/truncate_processor.rb +40 -0
  86. data/lib/etl/processor/zip_file_processor.rb +27 -0
  87. data/lib/etl/row.rb +20 -0
  88. data/lib/etl/screen.rb +14 -0
  89. data/lib/etl/screen/row_count_screen.rb +20 -0
  90. data/lib/etl/transform.rb +2 -0
  91. data/lib/etl/transform/block_transform.rb +13 -0
  92. data/lib/etl/transform/calculation_transform.rb +71 -0
  93. data/lib/etl/transform/date_to_string_transform.rb +20 -0
  94. data/lib/etl/transform/decode_transform.rb +51 -0
  95. data/lib/etl/transform/default_transform.rb +20 -0
  96. data/lib/etl/transform/foreign_key_lookup_transform.rb +211 -0
  97. data/lib/etl/transform/hierarchy_lookup_transform.rb +49 -0
  98. data/lib/etl/transform/md5_transform.rb +13 -0
  99. data/lib/etl/transform/ordinalize_transform.rb +14 -0
  100. data/lib/etl/transform/sha1_transform.rb +13 -0
  101. data/lib/etl/transform/split_fields_transform.rb +27 -0
  102. data/lib/etl/transform/string_to_date_time_transform.rb +14 -0
  103. data/lib/etl/transform/string_to_date_transform.rb +16 -0
  104. data/lib/etl/transform/string_to_time_transform.rb +11 -0
  105. data/lib/etl/transform/transform.rb +61 -0
  106. data/lib/etl/transform/trim_transform.rb +26 -0
  107. data/lib/etl/transform/type_transform.rb +35 -0
  108. data/lib/etl/util.rb +59 -0
  109. data/lib/etl/version.rb +3 -0
  110. data/test-matrix.yml +10 -0
  111. data/test/.gitignore +1 -0
  112. data/test/.ignore +2 -0
  113. data/test/all.ebf +6 -0
  114. data/test/apache_combined_log.ctl +11 -0
  115. data/test/batch_test.rb +41 -0
  116. data/test/batch_with_error.ebf +6 -0
  117. data/test/batched1.ctl +0 -0
  118. data/test/batched2.ctl +0 -0
  119. data/test/block_processor.ctl +6 -0
  120. data/test/block_processor_error.ctl +1 -0
  121. data/test/block_processor_pre_post_process.ctl +4 -0
  122. data/test/block_processor_remove_rows.ctl +5 -0
  123. data/test/block_processor_test.rb +38 -0
  124. data/test/check_exist_processor_test.rb +92 -0
  125. data/test/check_unique_processor_test.rb +40 -0
  126. data/test/config/Gemfile.rails-2.3.x +3 -0
  127. data/test/config/Gemfile.rails-2.3.x.lock +53 -0
  128. data/test/config/Gemfile.rails-3.0.x +3 -0
  129. data/test/config/Gemfile.rails-3.0.x.lock +61 -0
  130. data/test/config/common.rb +29 -0
  131. data/test/connection/mysql/connection.rb +9 -0
  132. data/test/connection/mysql/schema.sql +37 -0
  133. data/test/connection/postgresql/connection.rb +13 -0
  134. data/test/connection/postgresql/schema.sql +40 -0
  135. data/test/control_test.rb +43 -0
  136. data/test/data/apache_combined_log.txt +3 -0
  137. data/test/data/bulk_import.txt +3 -0
  138. data/test/data/bulk_import_with_empties.txt +3 -0
  139. data/test/data/decode.txt +3 -0
  140. data/test/data/delimited.txt +3 -0
  141. data/test/data/encode_source_latin1.txt +2 -0
  142. data/test/data/excel.xls +0 -0
  143. data/test/data/excel2.xls +0 -0
  144. data/test/data/fixed_width.txt +3 -0
  145. data/test/data/multiple_delimited_1.txt +3 -0
  146. data/test/data/multiple_delimited_2.txt +3 -0
  147. data/test/data/nokogiri.xml +38 -0
  148. data/test/data/people.txt +3 -0
  149. data/test/data/sax.xml +14 -0
  150. data/test/data/xml.xml +16 -0
  151. data/test/database_join_processor_test.rb +43 -0
  152. data/test/date_dimension_builder_test.rb +96 -0
  153. data/test/delimited.ctl +30 -0
  154. data/test/delimited_absolute.ctl +31 -0
  155. data/test/delimited_destination_db.ctl +23 -0
  156. data/test/delimited_excel.ctl +31 -0
  157. data/test/delimited_insert_update.ctl +34 -0
  158. data/test/delimited_update.ctl +34 -0
  159. data/test/delimited_with_bulk_load.ctl +34 -0
  160. data/test/destination_test.rb +275 -0
  161. data/test/directive_test.rb +23 -0
  162. data/test/encode_processor_test.rb +32 -0
  163. data/test/engine_test.rb +78 -0
  164. data/test/ensure_fields_presence_processor_test.rb +28 -0
  165. data/test/errors.ctl +24 -0
  166. data/test/etl_test.rb +42 -0
  167. data/test/excel.ctl +24 -0
  168. data/test/excel2.ctl +25 -0
  169. data/test/fixed_width.ctl +35 -0
  170. data/test/foreign_key_lookup_transform_test.rb +50 -0
  171. data/test/generator_test.rb +14 -0
  172. data/test/inline_parser.ctl +17 -0
  173. data/test/mocks/mock_destination.rb +26 -0
  174. data/test/mocks/mock_source.rb +25 -0
  175. data/test/model_source.ctl +14 -0
  176. data/test/multiple_delimited.ctl +22 -0
  177. data/test/multiple_source_delimited.ctl +39 -0
  178. data/test/nokogiri_all.ctl +35 -0
  179. data/test/nokogiri_select.ctl +35 -0
  180. data/test/nokogiri_test.rb +35 -0
  181. data/test/parser_test.rb +224 -0
  182. data/test/performance/delimited.ctl +30 -0
  183. data/test/processor_test.rb +44 -0
  184. data/test/row_processor_test.rb +17 -0
  185. data/test/sax.ctl +26 -0
  186. data/test/scd/1.txt +1 -0
  187. data/test/scd/2.txt +1 -0
  188. data/test/scd/3.txt +1 -0
  189. data/test/scd_test.rb +257 -0
  190. data/test/scd_test_type_1.ctl +43 -0
  191. data/test/scd_test_type_2.ctl +34 -0
  192. data/test/screen_test.rb +9 -0
  193. data/test/screen_test_error.ctl +3 -0
  194. data/test/screen_test_fatal.ctl +3 -0
  195. data/test/source_test.rb +154 -0
  196. data/test/test_helper.rb +37 -0
  197. data/test/transform_test.rb +101 -0
  198. data/test/truncate_processor_test.rb +37 -0
  199. data/test/xml.ctl +31 -0
  200. metadata +370 -0
@@ -0,0 +1,3 @@
1
+ 1,Chris,Smith,111223333
2
+ 2,Jim,,444332222
3
+ 3,Brian,Collingsworth,123443435
@@ -0,0 +1,3 @@
1
+ M:Male
2
+ F:Female
3
+ :Unknown
@@ -0,0 +1,3 @@
1
+ Chris,Smith,111223333,24,M
2
+ Jim,Foxworthy,444332222,51,M
3
+ Brian,Collingsworth,123443435,10,M
@@ -0,0 +1,2 @@
1
+ �ph�m�re has accents.
2
+ let's encode them.
Binary file
Binary file
@@ -0,0 +1,3 @@
1
+ Bob Smith 12344555523
2
+ Jane Doe 98766211145
3
+ AbcdefghiJklmnopqrstu12345678920
@@ -0,0 +1,3 @@
1
+ Chris,Smith,111223333,24
2
+ Jim,Foxworthy,444332222,51
3
+ Brian,Collingsworth,123443435,10
@@ -0,0 +1,3 @@
1
+ Bob,Jones,444223333,28
2
+ Tom,Allen,324001232,33
3
+ Jesse,Baker,555443333,21
@@ -0,0 +1,38 @@
1
+ <?xml version="1.0" encoding="US-ASCII" standalone="yes"?>
2
+ <!DOCTYPE people >
3
+ <people>
4
+ <person ssn="123456789" type="employee">
5
+ <first_name>Bob</first_name>
6
+ <last_name>Smith</last_name>
7
+ <email>bsmith@foo.com</email>
8
+ <colours>
9
+ <eyes>brown</eyes>
10
+ <hair>black</hair>
11
+ <skin>fair</skin>
12
+ </colours>
13
+ <age>24</age>
14
+ </person>
15
+ <person ssn="111223333" type="client">
16
+ <first_name>Jane</first_name>
17
+ <last_name>Doe</last_name>
18
+ <email>jdoe@bar.com</email>
19
+ <colours>
20
+ <eyes>blue</eyes>
21
+ <hair>blond</hair>
22
+ <skin>medium</skin>
23
+ </colours>
24
+ <age>45</age>
25
+ </person>
26
+ <person ssn="133244566" type="client">
27
+ <first_name>Jake</first_name>
28
+ <last_name>Smithsonian</last_name>
29
+ <email>jake@example.com</email>
30
+ <colours>
31
+ <eyes>brown</eyes>
32
+ <hair>black</hair>
33
+ <skin>dark</skin>
34
+ </colours>
35
+ <age>37</age>
36
+ </person>
37
+ </people>
38
+
@@ -0,0 +1,3 @@
1
+ Bob,Smith
2
+ Jane,Doe
3
+ Chris,Cornell
@@ -0,0 +1,14 @@
1
+ <?xml version="1.0"?>
2
+
3
+ <people>
4
+ <person age="24">
5
+ <first_name>Bob</first_name>
6
+ <last_name>Smith</last_name>
7
+ <social_security_number>123456789</social_security_number>
8
+ </person>
9
+ <person age="31">
10
+ <first_name>John</first_name>
11
+ <last_name>Doe</last_name>
12
+ <social_security_number>222114545</social_security_number>
13
+ </person>
14
+ </people>
@@ -0,0 +1,16 @@
1
+ <?xml version="1.0"?>
2
+
3
+ <people>
4
+ <person>
5
+ <first_name>Bob</first_name>
6
+ <last_name>Smith</last_name>
7
+ <social_security_number>123456789</social_security_number>
8
+ <age>24</age>
9
+ </person>
10
+ <person>
11
+ <first_name>John</first_name>
12
+ <last_name>Doe</last_name>
13
+ <social_security_number>222114545</social_security_number>
14
+ <age>31</age>
15
+ </person>
16
+ </people>
@@ -0,0 +1,43 @@
1
+ require File.dirname(__FILE__) + '/test_helper'
2
+
3
+ class DatabaseJoinProcessorTest < Test::Unit::TestCase
4
+
5
+ def new_processor(options)
6
+ ETL::Processor::DatabaseJoinProcessor.new(nil, options)
7
+ end
8
+
9
+ should 'raise an error unless :fields is specified' do
10
+ error = assert_raises(ETL::ControlError) { new_processor({}) }
11
+ assert_equal ":target must be specified", error.message
12
+ end
13
+
14
+ should 'return the row and the database result' do
15
+ row = ETL::Row[:id => 1, :first_name => 'Bob', :last_name => 'Smith', :ssn => '111234444']
16
+ control = ETL::Control::Control.parse(File.dirname(__FILE__) +
17
+ '/delimited.ctl')
18
+
19
+ Person.delete_all
20
+ assert_equal 0, Person.count
21
+
22
+ # First define a basic configuration to check defaults
23
+ configuration = {
24
+ :target => :data_warehouse,
25
+ :database => 'etl_unittest',
26
+ :table => 'people',
27
+ :buffer_size => 0
28
+ }
29
+ mapping = { :order => [:id, :first_name, :last_name, :ssn] }
30
+ dest = ETL::Control::DatabaseDestination.new(control, configuration, mapping)
31
+ dest.write(row)
32
+ dest.close
33
+
34
+ assert_equal 1, Person.find(:all).length
35
+
36
+ row = ETL::Row[:last_name => "Smith"]
37
+ processor = new_processor(:target => :data_warehouse,
38
+ :query => "SELECT first_name FROM people WHERE last_name = \#{connection.quote(row[:last_name])}",
39
+ :fields => ["first_name"]).process(row)
40
+ assert_equal row[:first_name], "Bob"
41
+ end
42
+
43
+ end
@@ -0,0 +1,96 @@
1
+ require File.dirname(__FILE__) + '/test_helper'
2
+
3
+ class DateDimensionBuilderTest < Test::Unit::TestCase
4
+
5
+ context "the DateDimensionBuilder" do
6
+ context "when initialized with defaults" do
7
+ setup do
8
+ @builder = ETL::Builder::DateDimensionBuilder.new
9
+ end
10
+ should "have a start date of 5 years ago" do
11
+ assert_equal Time.now.years_ago(5).to_date, @builder.start_date.to_date
12
+ end
13
+ should "have an end date of now" do
14
+ assert_equal Time.now.to_date, @builder.end_date.to_date
15
+ end
16
+ should "have an empty of array of holiday indicators" do
17
+ assert_equal [], @builder.holiday_indicators
18
+ end
19
+ end
20
+ context "when initialized with arguments" do
21
+ setup do
22
+ @start_date = Time.now.years_ago(2)
23
+ @end_date = Time.now.years_ago(1)
24
+ @builder = ETL::Builder::DateDimensionBuilder.new(@start_date, @end_date)
25
+ end
26
+ should "respect a custom start date" do
27
+ assert_equal @start_date.to_date, @builder.start_date.to_date
28
+ end
29
+ should "respect a custom end date" do
30
+ assert_equal @end_date.to_date, @builder.end_date.to_date
31
+ end
32
+ end
33
+ context "when building a date dimension using the default settings" do
34
+ setup do
35
+ # specific dates required when testing, because leap years affect
36
+ # how many records are built
37
+ @start_date = Date.parse('2002-05-19').to_time
38
+ @end_date = Date.parse('2007-05-19').to_time
39
+ @builder = ETL::Builder::DateDimensionBuilder.new(@start_date, @end_date)
40
+ @records = @builder.build
41
+ end
42
+ should "build a dimension with the correct number of records" do
43
+ assert_equal 1827, @records.length
44
+ end
45
+ should "have the correct first date" do
46
+ assert_date_dimension_record_equal(@builder.start_date, @records.first)
47
+ end
48
+ end
49
+ context "when building a date dimension with a fiscal year offset month" do
50
+ should_eventually "respect the fiscal year offset month" do
51
+
52
+ end
53
+ end
54
+ end
55
+
56
+ def assert_date_dimension_record_equal(date, record)
57
+ real_date = date
58
+ date = date.to_time
59
+ assert_equal date.strftime("%m/%d/%Y"), record[:date]
60
+ assert_equal date.strftime("%B %d,%Y"), record[:full_date_description]
61
+ assert_equal date.strftime("%A"), record[:day_of_week]
62
+ assert_equal date.day, record[:day_number_in_calendar_month]
63
+ assert_equal date.yday, record[:day_number_in_calendar_year]
64
+ assert_equal date.day, record[:day_number_in_fiscal_month]
65
+ assert_equal date.fiscal_year_yday, record[:day_number_in_fiscal_year]
66
+ assert_equal "Week #{date.week}", record[:calendar_week]
67
+ assert_equal date.week, record[:calendar_week_number_in_year]
68
+ assert_equal date.strftime("%B"), record[:calendar_month_name]
69
+ assert_equal date.month, record[:calendar_month_number_in_year]
70
+ assert_equal date.strftime("%Y-%m"), record[:calendar_year_month]
71
+ assert_equal "Q#{date.quarter}", record[:calendar_quarter]
72
+ assert_equal date.quarter, record[:calendar_quarter_number_in_year]
73
+ assert_equal "#{date.strftime('%Y')}-#{record[:calendar_quarter]}", record[:calendar_year_quarter]
74
+ assert_equal "#{date.year}", record[:calendar_year]
75
+ assert_equal "FY Week #{date.fiscal_year_week}", record[:fiscal_week]
76
+ assert_equal date.fiscal_year_week, record[:fiscal_week_number_in_year]
77
+ assert_equal date.fiscal_year_month, record[:fiscal_month]
78
+ assert_equal date.fiscal_year_month, record[:fiscal_month_number_in_year]
79
+ assert_equal "FY#{date.fiscal_year}-" + date.fiscal_year_month.to_s.rjust(2, '0'), record[:fiscal_year_month]
80
+ assert_equal "FY Q#{date.fiscal_year_quarter}", record[:fiscal_quarter]
81
+ assert_equal "FY#{date.fiscal_year}-Q#{date.fiscal_year_quarter}", record[:fiscal_year_quarter]
82
+ assert_equal date.fiscal_year_quarter, record[:fiscal_year_quarter_number]
83
+ assert_equal "FY#{date.fiscal_year}", record[:fiscal_year]
84
+ assert_equal date.fiscal_year, record[:fiscal_year_number]
85
+ assert_equal 'Nonholiday', record[:holiday_indicator]
86
+ assert_equal weekday_indicators[date.wday], record[:weekday_indicator]
87
+ assert_equal 'None', record[:selling_season]
88
+ assert_equal 'None', record[:major_event]
89
+ assert_equal record[:sql_date_stamp], real_date
90
+ end
91
+
92
+ private
93
+ def weekday_indicators
94
+ ['Weekend','Weekday','Weekday','Weekday','Weekday','Weekday','Weekend']
95
+ end
96
+ end
@@ -0,0 +1,30 @@
1
+ source :in, {
2
+ :file => 'data/delimited.txt',
3
+ :parser => {
4
+ :name => :csv
5
+ }
6
+ },
7
+ [
8
+ :first_name,
9
+ :last_name,
10
+ :ssn,
11
+ :age,
12
+ :sex
13
+ ]
14
+
15
+ #transform :age, :type, :type => :number
16
+ transform :ssn, :sha1
17
+ transform(:ssn){ |n, v, row| v[0,24] }
18
+ transform :sex, :decode, {:decode_table_path => 'data/decode.txt'}
19
+
20
+ destination :out, {
21
+ :file => 'output/delimited.txt'
22
+ },
23
+ {
24
+ :order => [:id, :first_name, :last_name, :ssn, :age, :sex, :test, :calc_test],
25
+ :virtual => {
26
+ :id => :surrogate_key,
27
+ :test => "test!",
28
+ :calc_test => Time.now
29
+ },
30
+ }
@@ -0,0 +1,31 @@
1
+ source :in, {
2
+ :file => '/tmp/delimited_abs.txt',
3
+ :parser => {
4
+ :name => :csv
5
+ }
6
+ },
7
+ [
8
+ :first_name,
9
+ :last_name,
10
+ :ssn,
11
+ {
12
+ :name => :age,
13
+ :type => :integer
14
+ },
15
+ :sex
16
+ ]
17
+
18
+ transform :ssn, :sha1
19
+ transform(:ssn){ |n, v, row| v[0,24] }
20
+ transform :sex, :decode, {:decode_table_path => 'data/decode.txt'}
21
+
22
+ destination :out, {
23
+ :file => 'data/delimited_abs.txt'
24
+ },
25
+ {
26
+ :order => [:first_name, :last_name, :ssn, :age, :sex, :test, :calc_test],
27
+ :virtual => {
28
+ :test => "test!",
29
+ :calc_test => Time.now
30
+ }
31
+ }
@@ -0,0 +1,23 @@
1
+ source :in, {
2
+ :file => 'data/delimited.txt',
3
+ :parser => :csv
4
+ },
5
+ [
6
+ :id,
7
+ :first_name,
8
+ :last_name,
9
+ :ssn
10
+ ]
11
+
12
+ transform :ssn, :sha1
13
+ transform(:ssn){ |v| v[0,24] }
14
+
15
+ destination :out, {
16
+ :type => :database,
17
+ :target => :data_warehouse,
18
+ :database => 'etl_unittest',
19
+ :table => 'people',
20
+ },
21
+ {
22
+ :order => [:id, :first_name, :last_name, :ssn]
23
+ }
@@ -0,0 +1,31 @@
1
+ source :in, {
2
+ :file => 'data/delimited.txt',
3
+ :parser => {
4
+ :name => :csv
5
+ }
6
+ },
7
+ [
8
+ :first_name,
9
+ :last_name,
10
+ :ssn,
11
+ :age,
12
+ :sex
13
+ ]
14
+
15
+ #transform :age, :type, :type => :number
16
+ transform :ssn, :sha1
17
+ transform(:ssn){ |n, v, row| v[0,24] }
18
+ transform :sex, :decode, {:decode_table_path => 'data/decode.txt'}
19
+
20
+ destination :out, {
21
+ :type => :excel,
22
+ :file => 'output/delimited_excel.xls'
23
+ },
24
+ {
25
+ :order => [:id, :first_name, :last_name, :ssn, :age, :sex, :test, :calc_test],
26
+ :virtual => {
27
+ :id => :surrogate_key,
28
+ :test => "test!",
29
+ :calc_test => Time.now
30
+ },
31
+ }
@@ -0,0 +1,34 @@
1
+ source :in, {
2
+ :file => 'data/delimited.txt',
3
+ :parser => {
4
+ :name => :csv
5
+ }
6
+ },
7
+ [
8
+ :first_name,
9
+ :last_name,
10
+ :ssn,
11
+ :age,
12
+ :sex
13
+ ]
14
+
15
+ #transform :age, :type, :type => :number
16
+ transform :ssn, :sha1
17
+ transform(:ssn){ |n, v, row| v[0,24] }
18
+ transform :sex, :decode, {:decode_table_path => 'data/decode.txt'}
19
+
20
+ destination :out, {
21
+ :type => :insert_update_database,
22
+ :target => :data_warehouse,
23
+ :database => 'etl_unittest',
24
+ :table => 'people'
25
+ },
26
+ {
27
+ :primarykey => [:id],
28
+ :order => [:id, :first_name, :last_name, :ssn, :age, :sex, :test, :calc_test],
29
+ :virtual => {
30
+ :id => :surrogate_key,
31
+ :test => "test!",
32
+ :calc_test => Time.now
33
+ },
34
+ }
@@ -0,0 +1,34 @@
1
+ source :in, {
2
+ :file => 'data/delimited.txt',
3
+ :parser => {
4
+ :name => :csv
5
+ }
6
+ },
7
+ [
8
+ :first_name,
9
+ :last_name,
10
+ :ssn,
11
+ :age,
12
+ :sex
13
+ ]
14
+
15
+ #transform :age, :type, :type => :number
16
+ transform :ssn, :sha1
17
+ transform(:ssn){ |n, v, row| v[0,24] }
18
+ transform :sex, :decode, {:decode_table_path => 'data/decode.txt'}
19
+
20
+ destination :out, {
21
+ :type => :update_database,
22
+ :target => :data_warehouse,
23
+ :database => 'etl_unittest',
24
+ :table => 'people'
25
+ },
26
+ {
27
+ :conditions => [{:field => "\#{conn.quote_column_name(:id)}", :value => "\#{conn.quote(row[:id])}", :comp => "="}],
28
+ :order => [:id, :first_name, :last_name, :ssn, :age, :sex, :test, :calc_test],
29
+ :virtual => {
30
+ :id => :surrogate_key,
31
+ :test => "test!",
32
+ :calc_test => Time.now
33
+ },
34
+ }