activewarehouse-etl-sgonyea 0.9.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (200) hide show
  1. data/.gitignore +9 -0
  2. data/0.9-UPGRADE +6 -0
  3. data/CHANGELOG +236 -0
  4. data/Gemfile +4 -0
  5. data/HOW_TO_RELEASE +13 -0
  6. data/LICENSE +7 -0
  7. data/README.textile +111 -0
  8. data/Rakefile +103 -0
  9. data/TODO +28 -0
  10. data/active_support_logger.patch +78 -0
  11. data/activewarehouse-etl.gemspec +36 -0
  12. data/bin/etl +28 -0
  13. data/bin/etl.cmd +8 -0
  14. data/examples/database.example.yml +16 -0
  15. data/lib/etl.rb +97 -0
  16. data/lib/etl/batch.rb +2 -0
  17. data/lib/etl/batch/batch.rb +111 -0
  18. data/lib/etl/batch/directives.rb +65 -0
  19. data/lib/etl/builder.rb +2 -0
  20. data/lib/etl/builder/date_dimension_builder.rb +96 -0
  21. data/lib/etl/builder/time_dimension_builder.rb +31 -0
  22. data/lib/etl/commands/etl.rb +89 -0
  23. data/lib/etl/control.rb +3 -0
  24. data/lib/etl/control/control.rb +405 -0
  25. data/lib/etl/control/destination.rb +438 -0
  26. data/lib/etl/control/destination/csv_destination.rb +113 -0
  27. data/lib/etl/control/destination/database_destination.rb +97 -0
  28. data/lib/etl/control/destination/excel_destination.rb +91 -0
  29. data/lib/etl/control/destination/file_destination.rb +126 -0
  30. data/lib/etl/control/destination/insert_update_database_destination.rb +136 -0
  31. data/lib/etl/control/destination/update_database_destination.rb +109 -0
  32. data/lib/etl/control/destination/yaml_destination.rb +74 -0
  33. data/lib/etl/control/source.rb +132 -0
  34. data/lib/etl/control/source/database_source.rb +224 -0
  35. data/lib/etl/control/source/enumerable_source.rb +11 -0
  36. data/lib/etl/control/source/file_source.rb +90 -0
  37. data/lib/etl/control/source/model_source.rb +39 -0
  38. data/lib/etl/core_ext.rb +1 -0
  39. data/lib/etl/core_ext/time.rb +5 -0
  40. data/lib/etl/core_ext/time/calculations.rb +42 -0
  41. data/lib/etl/engine.rb +582 -0
  42. data/lib/etl/execution.rb +19 -0
  43. data/lib/etl/execution/base.rb +8 -0
  44. data/lib/etl/execution/batch.rb +10 -0
  45. data/lib/etl/execution/job.rb +8 -0
  46. data/lib/etl/execution/migration.rb +90 -0
  47. data/lib/etl/generator.rb +2 -0
  48. data/lib/etl/generator/generator.rb +20 -0
  49. data/lib/etl/generator/surrogate_key_generator.rb +39 -0
  50. data/lib/etl/http_tools.rb +139 -0
  51. data/lib/etl/parser.rb +11 -0
  52. data/lib/etl/parser/apache_combined_log_parser.rb +49 -0
  53. data/lib/etl/parser/csv_parser.rb +93 -0
  54. data/lib/etl/parser/excel_parser.rb +112 -0
  55. data/lib/etl/parser/fixed_width_parser.rb +65 -0
  56. data/lib/etl/parser/nokogiri_xml_parser.rb +83 -0
  57. data/lib/etl/parser/parser.rb +41 -0
  58. data/lib/etl/parser/sax_parser.rb +218 -0
  59. data/lib/etl/parser/xml_parser.rb +65 -0
  60. data/lib/etl/processor.rb +11 -0
  61. data/lib/etl/processor/block_processor.rb +14 -0
  62. data/lib/etl/processor/bulk_import_processor.rb +94 -0
  63. data/lib/etl/processor/check_exist_processor.rb +80 -0
  64. data/lib/etl/processor/check_unique_processor.rb +39 -0
  65. data/lib/etl/processor/copy_field_processor.rb +26 -0
  66. data/lib/etl/processor/database_join_processor.rb +82 -0
  67. data/lib/etl/processor/encode_processor.rb +55 -0
  68. data/lib/etl/processor/ensure_fields_presence_processor.rb +24 -0
  69. data/lib/etl/processor/escape_csv_processor.rb +77 -0
  70. data/lib/etl/processor/filter_row_processor.rb +51 -0
  71. data/lib/etl/processor/ftp_downloader_processor.rb +68 -0
  72. data/lib/etl/processor/ftp_uploader_processor.rb +65 -0
  73. data/lib/etl/processor/hierarchy_exploder_processor.rb +55 -0
  74. data/lib/etl/processor/imapattachment_downloader_processor.rb +91 -0
  75. data/lib/etl/processor/pop3attachment_downloader_processor.rb +90 -0
  76. data/lib/etl/processor/print_row_processor.rb +12 -0
  77. data/lib/etl/processor/processor.rb +25 -0
  78. data/lib/etl/processor/rename_processor.rb +24 -0
  79. data/lib/etl/processor/require_non_blank_processor.rb +26 -0
  80. data/lib/etl/processor/row_processor.rb +27 -0
  81. data/lib/etl/processor/sequence_processor.rb +23 -0
  82. data/lib/etl/processor/sftp_downloader_processor.rb +63 -0
  83. data/lib/etl/processor/sftp_uploader_processor.rb +63 -0
  84. data/lib/etl/processor/surrogate_key_processor.rb +53 -0
  85. data/lib/etl/processor/truncate_processor.rb +40 -0
  86. data/lib/etl/processor/zip_file_processor.rb +27 -0
  87. data/lib/etl/row.rb +20 -0
  88. data/lib/etl/screen.rb +14 -0
  89. data/lib/etl/screen/row_count_screen.rb +20 -0
  90. data/lib/etl/transform.rb +2 -0
  91. data/lib/etl/transform/block_transform.rb +13 -0
  92. data/lib/etl/transform/calculation_transform.rb +71 -0
  93. data/lib/etl/transform/date_to_string_transform.rb +20 -0
  94. data/lib/etl/transform/decode_transform.rb +51 -0
  95. data/lib/etl/transform/default_transform.rb +20 -0
  96. data/lib/etl/transform/foreign_key_lookup_transform.rb +211 -0
  97. data/lib/etl/transform/hierarchy_lookup_transform.rb +49 -0
  98. data/lib/etl/transform/md5_transform.rb +13 -0
  99. data/lib/etl/transform/ordinalize_transform.rb +14 -0
  100. data/lib/etl/transform/sha1_transform.rb +13 -0
  101. data/lib/etl/transform/split_fields_transform.rb +27 -0
  102. data/lib/etl/transform/string_to_date_time_transform.rb +14 -0
  103. data/lib/etl/transform/string_to_date_transform.rb +16 -0
  104. data/lib/etl/transform/string_to_time_transform.rb +11 -0
  105. data/lib/etl/transform/transform.rb +61 -0
  106. data/lib/etl/transform/trim_transform.rb +26 -0
  107. data/lib/etl/transform/type_transform.rb +35 -0
  108. data/lib/etl/util.rb +59 -0
  109. data/lib/etl/version.rb +3 -0
  110. data/test-matrix.yml +10 -0
  111. data/test/.gitignore +1 -0
  112. data/test/.ignore +2 -0
  113. data/test/all.ebf +6 -0
  114. data/test/apache_combined_log.ctl +11 -0
  115. data/test/batch_test.rb +41 -0
  116. data/test/batch_with_error.ebf +6 -0
  117. data/test/batched1.ctl +0 -0
  118. data/test/batched2.ctl +0 -0
  119. data/test/block_processor.ctl +6 -0
  120. data/test/block_processor_error.ctl +1 -0
  121. data/test/block_processor_pre_post_process.ctl +4 -0
  122. data/test/block_processor_remove_rows.ctl +5 -0
  123. data/test/block_processor_test.rb +38 -0
  124. data/test/check_exist_processor_test.rb +92 -0
  125. data/test/check_unique_processor_test.rb +40 -0
  126. data/test/config/Gemfile.rails-2.3.x +3 -0
  127. data/test/config/Gemfile.rails-2.3.x.lock +53 -0
  128. data/test/config/Gemfile.rails-3.0.x +3 -0
  129. data/test/config/Gemfile.rails-3.0.x.lock +61 -0
  130. data/test/config/common.rb +29 -0
  131. data/test/connection/mysql/connection.rb +9 -0
  132. data/test/connection/mysql/schema.sql +37 -0
  133. data/test/connection/postgresql/connection.rb +13 -0
  134. data/test/connection/postgresql/schema.sql +40 -0
  135. data/test/control_test.rb +43 -0
  136. data/test/data/apache_combined_log.txt +3 -0
  137. data/test/data/bulk_import.txt +3 -0
  138. data/test/data/bulk_import_with_empties.txt +3 -0
  139. data/test/data/decode.txt +3 -0
  140. data/test/data/delimited.txt +3 -0
  141. data/test/data/encode_source_latin1.txt +2 -0
  142. data/test/data/excel.xls +0 -0
  143. data/test/data/excel2.xls +0 -0
  144. data/test/data/fixed_width.txt +3 -0
  145. data/test/data/multiple_delimited_1.txt +3 -0
  146. data/test/data/multiple_delimited_2.txt +3 -0
  147. data/test/data/nokogiri.xml +38 -0
  148. data/test/data/people.txt +3 -0
  149. data/test/data/sax.xml +14 -0
  150. data/test/data/xml.xml +16 -0
  151. data/test/database_join_processor_test.rb +43 -0
  152. data/test/date_dimension_builder_test.rb +96 -0
  153. data/test/delimited.ctl +30 -0
  154. data/test/delimited_absolute.ctl +31 -0
  155. data/test/delimited_destination_db.ctl +23 -0
  156. data/test/delimited_excel.ctl +31 -0
  157. data/test/delimited_insert_update.ctl +34 -0
  158. data/test/delimited_update.ctl +34 -0
  159. data/test/delimited_with_bulk_load.ctl +34 -0
  160. data/test/destination_test.rb +275 -0
  161. data/test/directive_test.rb +23 -0
  162. data/test/encode_processor_test.rb +32 -0
  163. data/test/engine_test.rb +78 -0
  164. data/test/ensure_fields_presence_processor_test.rb +28 -0
  165. data/test/errors.ctl +24 -0
  166. data/test/etl_test.rb +42 -0
  167. data/test/excel.ctl +24 -0
  168. data/test/excel2.ctl +25 -0
  169. data/test/fixed_width.ctl +35 -0
  170. data/test/foreign_key_lookup_transform_test.rb +50 -0
  171. data/test/generator_test.rb +14 -0
  172. data/test/inline_parser.ctl +17 -0
  173. data/test/mocks/mock_destination.rb +26 -0
  174. data/test/mocks/mock_source.rb +25 -0
  175. data/test/model_source.ctl +14 -0
  176. data/test/multiple_delimited.ctl +22 -0
  177. data/test/multiple_source_delimited.ctl +39 -0
  178. data/test/nokogiri_all.ctl +35 -0
  179. data/test/nokogiri_select.ctl +35 -0
  180. data/test/nokogiri_test.rb +35 -0
  181. data/test/parser_test.rb +224 -0
  182. data/test/performance/delimited.ctl +30 -0
  183. data/test/processor_test.rb +44 -0
  184. data/test/row_processor_test.rb +17 -0
  185. data/test/sax.ctl +26 -0
  186. data/test/scd/1.txt +1 -0
  187. data/test/scd/2.txt +1 -0
  188. data/test/scd/3.txt +1 -0
  189. data/test/scd_test.rb +257 -0
  190. data/test/scd_test_type_1.ctl +43 -0
  191. data/test/scd_test_type_2.ctl +34 -0
  192. data/test/screen_test.rb +9 -0
  193. data/test/screen_test_error.ctl +3 -0
  194. data/test/screen_test_fatal.ctl +3 -0
  195. data/test/source_test.rb +154 -0
  196. data/test/test_helper.rb +37 -0
  197. data/test/transform_test.rb +101 -0
  198. data/test/truncate_processor_test.rb +37 -0
  199. data/test/xml.ctl +31 -0
  200. metadata +370 -0
@@ -0,0 +1,24 @@
1
+ class ErrorProcessor < ETL::Processor::RowProcessor
2
+ def initialize(control, configuration)
3
+ super
4
+ end
5
+ def process(row)
6
+ raise RuntimeError, "Generated error"
7
+ end
8
+ end
9
+
10
+ set_error_threshold 1
11
+
12
+ source :in, {
13
+ :type => :enumerable,
14
+ :enumerable => [
15
+ {:first_name => 'Bob',:last_name => 'Smith'},
16
+ {:first_name => 'Joe', :last_name => 'Thompson'}
17
+ ]
18
+ },
19
+ [
20
+ :first_name,
21
+ :last_name
22
+ ]
23
+
24
+ after_read ErrorProcessor
@@ -0,0 +1,42 @@
1
+ require File.dirname(__FILE__) + '/test_helper'
2
+
3
+ # This is an integration test
4
+ class ETLTest < Test::Unit::TestCase
5
+ # Test end-to-end integration of ETL engine processing for the delimited.ctl control file
6
+ def test_delimited_single_file_load
7
+ #ETL::Engine.logger = Logger.new(STDOUT)
8
+ #ETL::Engine.logger.level = Logger::DEBUG
9
+
10
+ ETL::Engine.init(:config => File.dirname(__FILE__) + '/database.yml')
11
+ ETL::Engine.process(File.dirname(__FILE__) + '/delimited.ctl')
12
+ lines = open(File.dirname(__FILE__) + '/output/delimited.txt').readlines
13
+ assert_equal 3, lines.length
14
+
15
+ data = lines[0].split(',')
16
+ assert_equal '1', data[0]
17
+ assert_equal 'Chris', data[1]
18
+ assert_equal 'Smith', data[2]
19
+ assert_equal '23cc5914d48b146f0fbb73c4', data[3]
20
+ assert_equal '24', data[4]
21
+ assert_equal 'Male', data[5]
22
+ assert_equal 'test!', data[6]
23
+ assert_nothing_raised { Time.parse(data[7]) }
24
+
25
+ data = lines[1].split(',')
26
+ assert_equal '2', data[0]
27
+ assert_equal 'Jim', data[1]
28
+ assert_equal 'Foxworthy', data[2]
29
+ assert_equal '596e3534978b8c2b47851e37', data[3]
30
+ assert_equal '51', data[4]
31
+ assert_equal 'Male', data[5]
32
+ assert_equal 'test!', data[6]
33
+ assert_nothing_raised { Time.parse(data[7]) }
34
+ end
35
+
36
+ # Test end-to-end integration of ETL engine processing for the fixed_width.ctl control file
37
+ def test_fixed_width_single_file_load
38
+ ETL::Engine.process(File.dirname(__FILE__) + '/fixed_width.ctl')
39
+ lines = open(File.dirname(__FILE__) + '/output/delimited.txt').readlines
40
+ assert_equal 3, lines.length
41
+ end
42
+ end
@@ -0,0 +1,24 @@
1
+ source :in, {
2
+ :file => 'data/excel.xls',
3
+ :parser => :excel
4
+ },
5
+ {
6
+ :ignore_blank_line => false,
7
+ :fields => [
8
+ :first_name,
9
+ :last_name,
10
+ :ssn,
11
+ :age
12
+ ]
13
+ }
14
+
15
+ transform :ssn, :sha1
16
+ transform(:ssn){ |n, v, r| v[0,24] }
17
+
18
+
19
+ destination :out, {
20
+ :file => 'output/excel.out.txt'
21
+ },
22
+ {
23
+ :order => [:first_name, :last_name, :ssn, :age]
24
+ }
@@ -0,0 +1,25 @@
1
+ source :in, {
2
+ :file => 'data/excel2.xls',
3
+ :parser => :excel
4
+ },
5
+ {
6
+ :ignore_blank_line => true,
7
+ :worksheets => [ 1 ],
8
+ :fields => [
9
+ :first_name,
10
+ :last_name,
11
+ :ssn,
12
+ :age
13
+ ]
14
+ }
15
+
16
+ transform :ssn, :sha1
17
+ transform(:ssn){ |n, v, r| v[0,24] }
18
+
19
+
20
+ destination :out, {
21
+ :file => 'output/excel2.out.txt'
22
+ },
23
+ {
24
+ :order => [:first_name, :last_name, :ssn, :age]
25
+ }
@@ -0,0 +1,35 @@
1
+ # puts "executing fixed_width.ctl"
2
+
3
+ source :in, {
4
+ :file => 'data/fixed_width.txt',
5
+ :parser => :fixed_width
6
+ },
7
+ {
8
+ :first_name => {
9
+ :start => 1,
10
+ :length => 9
11
+ },
12
+ :last_name => {
13
+ :start => 10,
14
+ :length => 12
15
+ },
16
+ :ssn => {
17
+ :start => 22,
18
+ :length => 9
19
+ },
20
+ :age => {
21
+ :start => 31,
22
+ :length => 2,
23
+ :type => :integer
24
+ }
25
+ }
26
+
27
+ transform :ssn, :sha1
28
+ transform(:ssn){ |n, v, r| v[0,24] }
29
+
30
+ destination :out, {
31
+ :file => 'output/fixed_width.txt'
32
+ },
33
+ {
34
+ :order => [:first_name, :last_name, :ssn, :age]
35
+ }
@@ -0,0 +1,50 @@
1
+ require File.dirname(__FILE__) + '/test_helper'
2
+
3
+ # TODO - use flexmock instead, but I'm not sure how to handle the respond_to part yet
4
+ class TestResolver
5
+ attr_accessor :cache_loaded
6
+
7
+ def initialize
8
+ @cache_loaded = false
9
+ end
10
+
11
+ def load_cache
12
+ @cache_loaded = true
13
+ end
14
+ end
15
+
16
+ class ForeignKeyLookupTransformTest < Test::Unit::TestCase
17
+
18
+ context 'configuration' do
19
+
20
+ should 'enable cache by default' do
21
+ resolver = TestResolver.new
22
+
23
+ transform = ETL::Transform::ForeignKeyLookupTransform.new(nil, 'name',
24
+ {:resolver => resolver})
25
+
26
+ assert_equal true, resolver.cache_loaded
27
+ end
28
+
29
+ should 'allow to disable cache' do
30
+ resolver = TestResolver.new
31
+
32
+ transform = ETL::Transform::ForeignKeyLookupTransform.new(nil, 'name',
33
+ {:resolver => resolver, :cache => false})
34
+
35
+ assert_equal false, resolver.cache_loaded
36
+ end
37
+
38
+ should 'allow to enable cache' do
39
+ resolver = TestResolver.new
40
+
41
+ transform = ETL::Transform::ForeignKeyLookupTransform.new(nil, 'name',
42
+ {:resolver => resolver, :cache => true})
43
+
44
+ assert_equal true, resolver.cache_loaded
45
+ end
46
+
47
+ end
48
+
49
+
50
+ end
@@ -0,0 +1,14 @@
1
+ require File.dirname(__FILE__) + '/test_helper'
2
+
3
+ # Test generators
4
+ class GeneratorTest < Test::Unit::TestCase
5
+ # Test the surrogate key generator
6
+ def test_surrogate_key_generator
7
+ generator_class = ETL::Generator::Generator.class_for_name(:surrogate_key)
8
+ assert_equal ETL::Generator::SurrogateKeyGenerator, generator_class
9
+ generator = generator_class.new
10
+ 1.upto(10) do |i|
11
+ assert_equal i, generator.next
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,17 @@
1
+ class MyParser < ETL::Parser::Parser
2
+ def each
3
+ [{:name => 'foo'},{:name => 'bar'},{:name => 'baz'}].each do |row|
4
+ yield row
5
+ end
6
+ end
7
+ end
8
+
9
+ source :in, {
10
+ :file => '',
11
+ :parser => MyParser
12
+ },
13
+ [
14
+ :name
15
+ ]
16
+
17
+ destination :out, {:file => 'output/inline_parser.txt'},{:order => [:name]}
@@ -0,0 +1,26 @@
1
+ module ETL
2
+ module Control
3
+ # Usage:
4
+ # - declare in the ctl file:
5
+ # destination :out, { :type => :mock, :name => :my_mock_output }
6
+ # - run the .ctl from your test
7
+ # - then assert the content of the rows
8
+ # assert_equal [{:name => 'John Barry'},{:name => 'Gary Moore'}], MockDestination[:my_mock_output]
9
+ class MockDestination < Destination
10
+ def initialize(control, configuration, mapping={})
11
+ super
12
+ @mock_destination_name = configuration[:name] || 'mock_destination'
13
+ @@registry ||= {}
14
+ @@registry[@mock_destination_name] ||= []
15
+ end
16
+ def self.[](mock_destination_name)
17
+ @@registry[mock_destination_name]
18
+ end
19
+ def write(row)
20
+ @@registry[@mock_destination_name] << row
21
+ end
22
+ # the presence of close is asserted - just do nothing
23
+ def close; end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,25 @@
1
+ module ETL
2
+ module Control
3
+ # Usage:
4
+ # - first set the data in your test setup
5
+ # MockSource[:my_input] = [ { :first_name => 'John', :last_name => 'Barry' }, { ...} ]
6
+ # - then declare in the ctl file:
7
+ # source :in, { :type => :mock, :name => :my_input }
8
+ class MockSource < EnumerableSource
9
+ def initialize(control, configuration, definition)
10
+ super
11
+ mock_source_name = configuration[:name] || 'mock_source'
12
+ throw "No mock source data set for mock source '#{mock_source_name}'" if @@registry[mock_source_name].nil?
13
+ configuration[:enumerable] = @@registry[mock_source_name]
14
+ end
15
+ def self.[]=(mock_source_name,mock_source_data)
16
+ @@registry ||= {}
17
+ @@registry[mock_source_name] = mock_source_data
18
+ end
19
+ def self.[](mock_source_name)
20
+ @@registry[mock_source_name]
21
+ end
22
+ end
23
+ end
24
+ end
25
+
@@ -0,0 +1,14 @@
1
+ source :in, {
2
+ :type => :model
3
+ },
4
+ [
5
+ :first_name,
6
+ :last_name
7
+ ]
8
+
9
+ destination :out, {
10
+ :file => 'data/model_out.txt'
11
+ },
12
+ {
13
+ :order => [:first_name, :last_name],
14
+ }
@@ -0,0 +1,22 @@
1
+ # puts "executing delimited.ctl"
2
+
3
+ source :in, {
4
+ :file => 'data/multiple_delimited_*.txt',
5
+ :parser => :csv
6
+ },
7
+ [
8
+ :first_name,
9
+ :last_name,
10
+ :ssn,
11
+ {
12
+ :name => :age,
13
+ :type => :integer
14
+ }
15
+ ]
16
+
17
+ destination :out, {
18
+ :file => 'output/multiple_delimited.txt'
19
+ },
20
+ {
21
+ :order => [:first_name, :last_name, :ssn, :age]
22
+ }
@@ -0,0 +1,39 @@
1
+ # puts "executing delimited.ctl"
2
+
3
+ source :source1, {
4
+ :file => 'data/multiple_delimited_*.txt',
5
+ :parser => :csv
6
+ },
7
+ [
8
+ :first_name,
9
+ :last_name,
10
+ :ssn,
11
+ {
12
+ :name => :age,
13
+ :type => :integer
14
+ }
15
+ ]
16
+
17
+ source :source2, {
18
+ :file => 'data/multiple_delimited_*.txt',
19
+ :parser => :csv
20
+ },
21
+ [
22
+ :first_name,
23
+ :last_name,
24
+ :ssn,
25
+ {
26
+ :name => :age,
27
+ :type => :integer
28
+ }
29
+ ]
30
+
31
+ transform :ssn, :sha1
32
+ transform(:ssn){ |v| v[0,24] }
33
+
34
+ destination :out, {
35
+ :file => 'output/multiple_source_delimited.txt'
36
+ },
37
+ {
38
+ :order => [:first_name, :last_name, :ssn, :age]
39
+ }
@@ -0,0 +1,35 @@
1
+ # puts "executing nokogiri_all.ctl"
2
+
3
+ source :in, {
4
+ :file => 'data/nokogiri.xml',
5
+ :parser => :nokogiri_xml
6
+ },
7
+ {
8
+ :collection => 'people/person',
9
+ :fields => [
10
+ :first_name,
11
+ :last_name,
12
+ {
13
+ :name => :ssn,
14
+ :xpath => '@ssn'
15
+ },
16
+ {
17
+ :name => :age,
18
+ :type => :integer
19
+ },
20
+ {
21
+ :name => :hair_colour,
22
+ :xpath => 'colours/hair'
23
+ }
24
+ ]
25
+ }
26
+
27
+ destination :out, {
28
+ :file => 'output/xml.txt'
29
+ },
30
+ {
31
+ :order => [:first_name, :last_name, :ssn]
32
+ }
33
+
34
+ transform :ssn, :sha1
35
+ transform(:ssn){ |v| v[0,24] }
@@ -0,0 +1,35 @@
1
+ # puts "executing nokogiri_select.ctl"
2
+
3
+ source :in, {
4
+ :file => 'data/nokogiri.xml',
5
+ :parser => :nokogiri_xml
6
+ },
7
+ {
8
+ :collection => 'people/person[@type="client"]',
9
+ :fields => [
10
+ :first_name,
11
+ :last_name,
12
+ {
13
+ :name => :ssn,
14
+ :xpath => '@ssn'
15
+ },
16
+ {
17
+ :name => :age,
18
+ :type => :integer
19
+ },
20
+ {
21
+ :name => :hair_colour,
22
+ :xpath => 'colours/hair'
23
+ }
24
+ ]
25
+ }
26
+
27
+ destination :out, {
28
+ :file => 'output/xml.txt'
29
+ },
30
+ {
31
+ :order => [:first_name, :last_name, :ssn]
32
+ }
33
+
34
+ transform :ssn, :sha1
35
+ transform(:ssn){ |v| v[0,24] }