activewarehouse-etl-sgonyea 0.9.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (200) hide show
  1. data/.gitignore +9 -0
  2. data/0.9-UPGRADE +6 -0
  3. data/CHANGELOG +236 -0
  4. data/Gemfile +4 -0
  5. data/HOW_TO_RELEASE +13 -0
  6. data/LICENSE +7 -0
  7. data/README.textile +111 -0
  8. data/Rakefile +103 -0
  9. data/TODO +28 -0
  10. data/active_support_logger.patch +78 -0
  11. data/activewarehouse-etl.gemspec +36 -0
  12. data/bin/etl +28 -0
  13. data/bin/etl.cmd +8 -0
  14. data/examples/database.example.yml +16 -0
  15. data/lib/etl.rb +97 -0
  16. data/lib/etl/batch.rb +2 -0
  17. data/lib/etl/batch/batch.rb +111 -0
  18. data/lib/etl/batch/directives.rb +65 -0
  19. data/lib/etl/builder.rb +2 -0
  20. data/lib/etl/builder/date_dimension_builder.rb +96 -0
  21. data/lib/etl/builder/time_dimension_builder.rb +31 -0
  22. data/lib/etl/commands/etl.rb +89 -0
  23. data/lib/etl/control.rb +3 -0
  24. data/lib/etl/control/control.rb +405 -0
  25. data/lib/etl/control/destination.rb +438 -0
  26. data/lib/etl/control/destination/csv_destination.rb +113 -0
  27. data/lib/etl/control/destination/database_destination.rb +97 -0
  28. data/lib/etl/control/destination/excel_destination.rb +91 -0
  29. data/lib/etl/control/destination/file_destination.rb +126 -0
  30. data/lib/etl/control/destination/insert_update_database_destination.rb +136 -0
  31. data/lib/etl/control/destination/update_database_destination.rb +109 -0
  32. data/lib/etl/control/destination/yaml_destination.rb +74 -0
  33. data/lib/etl/control/source.rb +132 -0
  34. data/lib/etl/control/source/database_source.rb +224 -0
  35. data/lib/etl/control/source/enumerable_source.rb +11 -0
  36. data/lib/etl/control/source/file_source.rb +90 -0
  37. data/lib/etl/control/source/model_source.rb +39 -0
  38. data/lib/etl/core_ext.rb +1 -0
  39. data/lib/etl/core_ext/time.rb +5 -0
  40. data/lib/etl/core_ext/time/calculations.rb +42 -0
  41. data/lib/etl/engine.rb +582 -0
  42. data/lib/etl/execution.rb +19 -0
  43. data/lib/etl/execution/base.rb +8 -0
  44. data/lib/etl/execution/batch.rb +10 -0
  45. data/lib/etl/execution/job.rb +8 -0
  46. data/lib/etl/execution/migration.rb +90 -0
  47. data/lib/etl/generator.rb +2 -0
  48. data/lib/etl/generator/generator.rb +20 -0
  49. data/lib/etl/generator/surrogate_key_generator.rb +39 -0
  50. data/lib/etl/http_tools.rb +139 -0
  51. data/lib/etl/parser.rb +11 -0
  52. data/lib/etl/parser/apache_combined_log_parser.rb +49 -0
  53. data/lib/etl/parser/csv_parser.rb +93 -0
  54. data/lib/etl/parser/excel_parser.rb +112 -0
  55. data/lib/etl/parser/fixed_width_parser.rb +65 -0
  56. data/lib/etl/parser/nokogiri_xml_parser.rb +83 -0
  57. data/lib/etl/parser/parser.rb +41 -0
  58. data/lib/etl/parser/sax_parser.rb +218 -0
  59. data/lib/etl/parser/xml_parser.rb +65 -0
  60. data/lib/etl/processor.rb +11 -0
  61. data/lib/etl/processor/block_processor.rb +14 -0
  62. data/lib/etl/processor/bulk_import_processor.rb +94 -0
  63. data/lib/etl/processor/check_exist_processor.rb +80 -0
  64. data/lib/etl/processor/check_unique_processor.rb +39 -0
  65. data/lib/etl/processor/copy_field_processor.rb +26 -0
  66. data/lib/etl/processor/database_join_processor.rb +82 -0
  67. data/lib/etl/processor/encode_processor.rb +55 -0
  68. data/lib/etl/processor/ensure_fields_presence_processor.rb +24 -0
  69. data/lib/etl/processor/escape_csv_processor.rb +77 -0
  70. data/lib/etl/processor/filter_row_processor.rb +51 -0
  71. data/lib/etl/processor/ftp_downloader_processor.rb +68 -0
  72. data/lib/etl/processor/ftp_uploader_processor.rb +65 -0
  73. data/lib/etl/processor/hierarchy_exploder_processor.rb +55 -0
  74. data/lib/etl/processor/imapattachment_downloader_processor.rb +91 -0
  75. data/lib/etl/processor/pop3attachment_downloader_processor.rb +90 -0
  76. data/lib/etl/processor/print_row_processor.rb +12 -0
  77. data/lib/etl/processor/processor.rb +25 -0
  78. data/lib/etl/processor/rename_processor.rb +24 -0
  79. data/lib/etl/processor/require_non_blank_processor.rb +26 -0
  80. data/lib/etl/processor/row_processor.rb +27 -0
  81. data/lib/etl/processor/sequence_processor.rb +23 -0
  82. data/lib/etl/processor/sftp_downloader_processor.rb +63 -0
  83. data/lib/etl/processor/sftp_uploader_processor.rb +63 -0
  84. data/lib/etl/processor/surrogate_key_processor.rb +53 -0
  85. data/lib/etl/processor/truncate_processor.rb +40 -0
  86. data/lib/etl/processor/zip_file_processor.rb +27 -0
  87. data/lib/etl/row.rb +20 -0
  88. data/lib/etl/screen.rb +14 -0
  89. data/lib/etl/screen/row_count_screen.rb +20 -0
  90. data/lib/etl/transform.rb +2 -0
  91. data/lib/etl/transform/block_transform.rb +13 -0
  92. data/lib/etl/transform/calculation_transform.rb +71 -0
  93. data/lib/etl/transform/date_to_string_transform.rb +20 -0
  94. data/lib/etl/transform/decode_transform.rb +51 -0
  95. data/lib/etl/transform/default_transform.rb +20 -0
  96. data/lib/etl/transform/foreign_key_lookup_transform.rb +211 -0
  97. data/lib/etl/transform/hierarchy_lookup_transform.rb +49 -0
  98. data/lib/etl/transform/md5_transform.rb +13 -0
  99. data/lib/etl/transform/ordinalize_transform.rb +14 -0
  100. data/lib/etl/transform/sha1_transform.rb +13 -0
  101. data/lib/etl/transform/split_fields_transform.rb +27 -0
  102. data/lib/etl/transform/string_to_date_time_transform.rb +14 -0
  103. data/lib/etl/transform/string_to_date_transform.rb +16 -0
  104. data/lib/etl/transform/string_to_time_transform.rb +11 -0
  105. data/lib/etl/transform/transform.rb +61 -0
  106. data/lib/etl/transform/trim_transform.rb +26 -0
  107. data/lib/etl/transform/type_transform.rb +35 -0
  108. data/lib/etl/util.rb +59 -0
  109. data/lib/etl/version.rb +3 -0
  110. data/test-matrix.yml +10 -0
  111. data/test/.gitignore +1 -0
  112. data/test/.ignore +2 -0
  113. data/test/all.ebf +6 -0
  114. data/test/apache_combined_log.ctl +11 -0
  115. data/test/batch_test.rb +41 -0
  116. data/test/batch_with_error.ebf +6 -0
  117. data/test/batched1.ctl +0 -0
  118. data/test/batched2.ctl +0 -0
  119. data/test/block_processor.ctl +6 -0
  120. data/test/block_processor_error.ctl +1 -0
  121. data/test/block_processor_pre_post_process.ctl +4 -0
  122. data/test/block_processor_remove_rows.ctl +5 -0
  123. data/test/block_processor_test.rb +38 -0
  124. data/test/check_exist_processor_test.rb +92 -0
  125. data/test/check_unique_processor_test.rb +40 -0
  126. data/test/config/Gemfile.rails-2.3.x +3 -0
  127. data/test/config/Gemfile.rails-2.3.x.lock +53 -0
  128. data/test/config/Gemfile.rails-3.0.x +3 -0
  129. data/test/config/Gemfile.rails-3.0.x.lock +61 -0
  130. data/test/config/common.rb +29 -0
  131. data/test/connection/mysql/connection.rb +9 -0
  132. data/test/connection/mysql/schema.sql +37 -0
  133. data/test/connection/postgresql/connection.rb +13 -0
  134. data/test/connection/postgresql/schema.sql +40 -0
  135. data/test/control_test.rb +43 -0
  136. data/test/data/apache_combined_log.txt +3 -0
  137. data/test/data/bulk_import.txt +3 -0
  138. data/test/data/bulk_import_with_empties.txt +3 -0
  139. data/test/data/decode.txt +3 -0
  140. data/test/data/delimited.txt +3 -0
  141. data/test/data/encode_source_latin1.txt +2 -0
  142. data/test/data/excel.xls +0 -0
  143. data/test/data/excel2.xls +0 -0
  144. data/test/data/fixed_width.txt +3 -0
  145. data/test/data/multiple_delimited_1.txt +3 -0
  146. data/test/data/multiple_delimited_2.txt +3 -0
  147. data/test/data/nokogiri.xml +38 -0
  148. data/test/data/people.txt +3 -0
  149. data/test/data/sax.xml +14 -0
  150. data/test/data/xml.xml +16 -0
  151. data/test/database_join_processor_test.rb +43 -0
  152. data/test/date_dimension_builder_test.rb +96 -0
  153. data/test/delimited.ctl +30 -0
  154. data/test/delimited_absolute.ctl +31 -0
  155. data/test/delimited_destination_db.ctl +23 -0
  156. data/test/delimited_excel.ctl +31 -0
  157. data/test/delimited_insert_update.ctl +34 -0
  158. data/test/delimited_update.ctl +34 -0
  159. data/test/delimited_with_bulk_load.ctl +34 -0
  160. data/test/destination_test.rb +275 -0
  161. data/test/directive_test.rb +23 -0
  162. data/test/encode_processor_test.rb +32 -0
  163. data/test/engine_test.rb +78 -0
  164. data/test/ensure_fields_presence_processor_test.rb +28 -0
  165. data/test/errors.ctl +24 -0
  166. data/test/etl_test.rb +42 -0
  167. data/test/excel.ctl +24 -0
  168. data/test/excel2.ctl +25 -0
  169. data/test/fixed_width.ctl +35 -0
  170. data/test/foreign_key_lookup_transform_test.rb +50 -0
  171. data/test/generator_test.rb +14 -0
  172. data/test/inline_parser.ctl +17 -0
  173. data/test/mocks/mock_destination.rb +26 -0
  174. data/test/mocks/mock_source.rb +25 -0
  175. data/test/model_source.ctl +14 -0
  176. data/test/multiple_delimited.ctl +22 -0
  177. data/test/multiple_source_delimited.ctl +39 -0
  178. data/test/nokogiri_all.ctl +35 -0
  179. data/test/nokogiri_select.ctl +35 -0
  180. data/test/nokogiri_test.rb +35 -0
  181. data/test/parser_test.rb +224 -0
  182. data/test/performance/delimited.ctl +30 -0
  183. data/test/processor_test.rb +44 -0
  184. data/test/row_processor_test.rb +17 -0
  185. data/test/sax.ctl +26 -0
  186. data/test/scd/1.txt +1 -0
  187. data/test/scd/2.txt +1 -0
  188. data/test/scd/3.txt +1 -0
  189. data/test/scd_test.rb +257 -0
  190. data/test/scd_test_type_1.ctl +43 -0
  191. data/test/scd_test_type_2.ctl +34 -0
  192. data/test/screen_test.rb +9 -0
  193. data/test/screen_test_error.ctl +3 -0
  194. data/test/screen_test_fatal.ctl +3 -0
  195. data/test/source_test.rb +154 -0
  196. data/test/test_helper.rb +37 -0
  197. data/test/transform_test.rb +101 -0
  198. data/test/truncate_processor_test.rb +37 -0
  199. data/test/xml.ctl +31 -0
  200. metadata +370 -0
@@ -0,0 +1,14 @@
1
+ require 'active_support/core_ext/integer/inflections.rb'
2
+
3
+ module ETL #:nodoc:
4
+ module Transform #:nodoc:
5
+ # Transform a number to an ordinalized version using the ActiveSupport ordinalize
6
+ # core extension
7
+ class OrdinalizeTransform < ETL::Transform::Transform
8
+ # Transform the value from a number to an ordinalized number
9
+ def transform(name, value, row)
10
+ value.ordinalize
11
+ end
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,13 @@
1
+ require 'digest/sha1'
2
+
3
+ module ETL #:nodoc:
4
+ module Transform #:nodoc:
5
+ # Transform which hashes the original value with a SHA-1 hash algorithm
6
+ class Sha1Transform < ETL::Transform::Transform
7
+ # Transform the value with a SHA1 digest algorithm.
8
+ def transform(name, value, row)
9
+ Digest::SHA1.hexdigest(value)
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,27 @@
1
+ module ETL
2
+ module Transform
3
+ class SplitFieldsTransform < ETL::Transform::Transform
4
+ attr_reader :delimiter
5
+ attr_reader :new_fields
6
+
7
+ def initialize(control, name, configuration)
8
+ @delimiter = configuration[:delimiter] || ','
9
+ @new_fields = configuration[:new_fields]
10
+ super
11
+ end
12
+
13
+ def transform(name, value, row)
14
+ return nil if row.nil?
15
+ return nil if row[name].nil?
16
+
17
+ fields = row[name].split(@delimiter)
18
+ @new_fields.each_with_index do |new, index|
19
+ row[new] = fields[index]
20
+ end
21
+
22
+ row[name]
23
+ end
24
+
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,14 @@
1
+ module ETL #:nodoc:
2
+ module Transform #:nodoc:
3
+ # Transform a String representation of a date to a DateTime instance
4
+ class StringToDateTimeTransform < ETL::Transform::Transform
5
+ # Transform the value using DateTime.parse.
6
+ #
7
+ # WARNING: This transform is slow (due to the Ruby implementation), but if you need to
8
+ # parse timestamps before or after the values supported by the Time.parse.
9
+ def transform(name, value, row)
10
+ DateTime.parse(value) unless value.nil?
11
+ end
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,16 @@
1
+ module ETL #:nodoc:
2
+ module Transform #:nodoc:
3
+ # Transform a String representation of a date to a Date instance
4
+ class StringToDateTransform < ETL::Transform::Transform
5
+ # Transform the value using Date.parse
6
+ def transform(name, value, row)
7
+ return value if value.nil?
8
+ begin
9
+ Date.parse(value)
10
+ rescue => e
11
+ return value
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,11 @@
1
+ module ETL #:nodoc:
2
+ module Transform #:nodoc:
3
+ # Transform a String representation of a date to a Time instance
4
+ class StringToTimeTransform < ETL::Transform::Transform
5
+ # Transform the value using Time.parse
6
+ def transform(name, value, row)
7
+ Time.parse(value) unless value.nil?
8
+ end
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,61 @@
1
+ module ETL#:nodoc:
2
+ module Transform#:nodoc:
3
+ # Base class for transforms.
4
+ #
5
+ # A transform converts one value to another value using some sort of algorithm.
6
+ #
7
+ # A simple transform has two arguments, the field to transform and the name of the transform:
8
+ #
9
+ # transform :ssn, :sha1
10
+ #
11
+ # Transforms can also be blocks:
12
+ #
13
+ # transform(:ssn){ |v| v[0,24] }
14
+ #
15
+ # Finally, a transform can include a configuration hash:
16
+ #
17
+ # transform :sex, :decode, {:decode_table_path => 'delimited_decode.txt'}
18
+ class Transform
19
+ class << self
20
+ # Transform the specified value using the given transforms. The transforms can either be
21
+ # Proc objects or objects which extend from Transform and implement the method <tt>transform(value)</tt>.
22
+ # Any other object will result in a ControlError being raised.
23
+ def transform(name, value, row, transforms)
24
+ transforms.each do |transform|
25
+ benchmarks[transform.class] ||= 0
26
+ benchmarks[transform.class] += Benchmark.realtime do
27
+ Engine.logger.debug "Transforming field #{name} with #{transform.inspect}"
28
+ case transform
29
+ when Proc
30
+ value = transform.call([name, value, row])
31
+ when Transform
32
+ value = transform.transform(name, value, row)
33
+ else
34
+ raise ControlError, "Unsupported transform configuration type: #{transform}"
35
+ end
36
+ end
37
+ end
38
+ value
39
+ end
40
+
41
+ def benchmarks
42
+ @benchmarks ||= {}
43
+ end
44
+ end
45
+
46
+ attr_reader :control, :name, :configuration
47
+
48
+ # Initialize the transform object with the given control object, field name and
49
+ # configuration hash
50
+ def initialize(control, name, configuration={})
51
+ @control = control
52
+ @name = name
53
+ @configuration = configuration
54
+ end
55
+
56
+ def transform(name, value, row)
57
+ raise "transform is an abstract method"
58
+ end
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,26 @@
1
+ module ETL #:nodoc:
2
+ module Transform #:nodoc:
3
+ # Transform to trim string
4
+ class TrimTransform < ETL::Transform::Transform
5
+ # Configuration options:
6
+ # * <tt>:type</tt>: :left, :right or :both. Default is :both
7
+ def initialize(control, name, configuration={})
8
+ super
9
+ @type = (configuration[:type] || :both).to_sym
10
+ end
11
+ # Transform the value
12
+ def transform(name, value, row)
13
+ case @type
14
+ when :left
15
+ value.lstrip
16
+ when :right
17
+ value.rstrip
18
+ when :both
19
+ value.strip
20
+ else
21
+ raise "Trim type, if specified, must be :left, :right or :both"
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,35 @@
1
+ module ETL #:nodoc:
2
+ module Transform #:nodoc:
3
+ # Transform from one type to another
4
+ class TypeTransform < ETL::Transform::Transform
5
+ # Initialize the transformer.
6
+ #
7
+ # Configuration options:
8
+ # * <tt>:type</tt>: The type to convert to. Supported types:
9
+ # ** :string
10
+ # ** :number,:integer
11
+ # ** :float
12
+ # ** :decimal
13
+ def initialize(control, name, configuration={})
14
+ super
15
+ @type = configuration[:type]
16
+ @significant = configuration[:significant] ||= 0
17
+ end
18
+ # Transform the value
19
+ def transform(name, value, row)
20
+ case @type
21
+ when :string
22
+ value.to_s
23
+ when :number, :integer
24
+ value.to_i
25
+ when :float
26
+ value.to_f
27
+ when :decimal
28
+ BigDecimal.new(value.to_s, @significant)
29
+ else
30
+ raise "Unsupported type: #{@type}"
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,59 @@
1
+ module ETL
2
+ module Util
3
+ # Return the distance of time in words from the given from_time to the specified to_time. If to_time
4
+ # is not specified then Time.now is used. By default seconds are included...set the include_seconds
5
+ # argument to false to disable the seconds.
6
+ def distance_of_time_in_words(from_time, to_time=Time.now)
7
+ from_time = from_time.to_time if from_time.respond_to?(:to_time)
8
+ to_time = to_time.to_time if to_time.respond_to?(:to_time)
9
+ seconds = (to_time - from_time).round
10
+ distance_in_days = (seconds/(60*60*24)).round
11
+ seconds = seconds % (60*60*24)
12
+ distance_in_hours = (seconds/(60*60)).round
13
+ seconds = seconds % (60*60)
14
+ distance_in_minutes = (seconds/60).round
15
+ seconds = seconds % 60
16
+ distance_in_seconds = seconds
17
+
18
+ s = ''
19
+ s << "#{distance_in_days} days," if distance_in_days > 0
20
+ s << "#{distance_in_hours} hours, " if distance_in_hours > 0
21
+ s << "#{distance_in_minutes} minutes, " if distance_in_minutes > 0
22
+ s << "#{distance_in_seconds} seconds"
23
+ s
24
+ end
25
+
26
+ # Get the approximate disntance of time in words from the given from_time
27
+ # to the the given to_time. If to_time is not specified then it is set
28
+ # to Time.now. By default seconds are included...set the include_seconds
29
+ # argument to false to disable the seconds.
30
+ def approximate_distance_of_time_in_words(from_time, to_time=Time.now, include_seconds=true)
31
+ from_time = from_time.to_time if from_time.respond_to?(:to_time)
32
+ to_time = to_time.to_time if to_time.respond_to?(:to_time)
33
+ distance_in_minutes = (((to_time - from_time).abs)/60).round
34
+ distance_in_seconds = ((to_time - from_time).abs).round
35
+
36
+ case distance_in_minutes
37
+ when 0..1
38
+ return (distance_in_minutes == 0) ? 'less than a minute' : '1 minute' unless include_seconds
39
+ case distance_in_seconds
40
+ when 0..4 then 'less than 5 seconds'
41
+ when 5..9 then 'less than 10 seconds'
42
+ when 10..19 then 'less than 20 seconds'
43
+ when 20..39 then 'half a minute'
44
+ when 40..59 then 'less than a minute'
45
+ else '1 minute'
46
+ end
47
+ when 2..44 then "#{distance_in_minutes} minutes"
48
+ when 45..89 then 'about 1 hour'
49
+ when 90..1439 then "about #{(distance_in_minutes.to_f / 60.0).round} hours"
50
+ when 1440..2879 then '1 day'
51
+ when 2880..43199 then "#{(distance_in_minutes / 1440).round} days"
52
+ when 43200..86399 then 'about 1 month'
53
+ when 86400..525959 then "#{(distance_in_minutes / 43200).round} months"
54
+ when 525960..1051919 then 'about 1 year'
55
+ else "over #{(distance_in_minutes / 525960).round} years"
56
+ end
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,3 @@
1
+ module ETL#:nodoc:
2
+ VERSION = "0.9.5.rc1"
3
+ end
@@ -0,0 +1,10 @@
1
+ rvm:
2
+ - 1.8.7
3
+ - 1.9.2
4
+ # - jruby-1.6.2
5
+ rails:
6
+ - 2.3.x
7
+ - 3.0.x
8
+ database:
9
+ - mysql
10
+ - postgresql
@@ -0,0 +1 @@
1
+ database*.yml
@@ -0,0 +1,2 @@
1
+ database.yml
2
+ *.txt
@@ -0,0 +1,6 @@
1
+ # This is an ETL Batch File and defines a means for executing
2
+ # a collection of ETL scripts as a single process.
3
+
4
+ use_temp_tables
5
+ run 'batched1.ctl'
6
+ run 'batched2.ctl'
@@ -0,0 +1,11 @@
1
+ source :in, {
2
+ :file => 'data/apache_combined_log.txt',
3
+ :parser => :apache_combined_log
4
+ }
5
+
6
+ destination :out, {
7
+ :file => 'output/apache_combined_log.txt'
8
+ },
9
+ {
10
+ :order => []
11
+ }
@@ -0,0 +1,41 @@
1
+ require File.dirname(__FILE__) + '/test_helper'
2
+
3
+ class BatchTest < Test::Unit::TestCase
4
+ attr_reader :file, :db_yaml, :engine
5
+ def setup
6
+ @file = File.dirname(__FILE__) + '/all.ebf'
7
+ @db_yaml = File.dirname(__FILE__) + '/database.yml'
8
+ @engine = ETL::Engine.new
9
+ end
10
+ def teardown
11
+
12
+ end
13
+ def test_etl_batch_file
14
+ #`etl #{file} -c #{db_yaml}`
15
+ end
16
+ def test_batch
17
+ assert_nothing_raised do
18
+ batch = ETL::Batch::Batch.resolve(file, engine)
19
+ batch.execute
20
+ end
21
+ end
22
+ def test_batch_with_file
23
+ assert_nothing_raised do
24
+ batch = ETL::Batch::Batch.resolve(File.new(file), engine)
25
+ batch.execute
26
+ end
27
+ end
28
+ def test_batch_with_batch_object
29
+ assert_nothing_raised do
30
+ batch_instance = ETL::Batch::Batch.new(File.new(file))
31
+ batch_instance.engine = engine
32
+ batch = ETL::Batch::Batch.resolve(batch_instance, engine)
33
+ batch.execute
34
+ end
35
+ end
36
+ def test_batch_with_object_should_fail
37
+ assert_raise(RuntimeError) do
38
+ batch = ETL::Batch::Batch.resolve(0, engine)
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,6 @@
1
+ # This is an ETL Batch File and defines a means for executing
2
+ # a collection of ETL scripts as a single process.
3
+
4
+ use_temp_tables
5
+ run 'delimited_with_bulk_load.ctl'
6
+ run 'screen_test_fatal.ctl'
File without changes
File without changes
@@ -0,0 +1,6 @@
1
+ source :in, { :type => :mock, :name => :block_processed_input }
2
+
3
+ after_read { |row| row[:added_by_after_read] = "after-" +row[:first_name]; row }
4
+ before_write { |row| row[:added_by_before_write] = "Row #{Engine.current_source_row}"; [row,{:new_row => 'added by post_processor'}] }
5
+
6
+ destination :out, { :type => :mock, :name => :block_processed_output }
@@ -0,0 +1 @@
1
+ pre_process { raise ControlError.new( "Cough!") }
@@ -0,0 +1,4 @@
1
+ source :in, { :type => :mock, :name => :another_input }
2
+ pre_process { TestWitness.call("I'm called from pre_process") }
3
+ post_process { TestWitness.call("I'm called from post_process") }
4
+ destination :out, { :type => :mock, :name => :another_output }
@@ -0,0 +1,5 @@
1
+ source :in, { :type => :mock, :name => :block_input }
2
+
3
+ before_write { |row| row[:obsolete] == true ? nil : row }
4
+
5
+ destination :out, { :type => :mock, :name => :block_output }
@@ -0,0 +1,38 @@
1
+ require File.dirname(__FILE__) + '/test_helper'
2
+ include ETL
3
+ include ETL::Control
4
+
5
+ class TestWitness
6
+ end
7
+
8
+ class BlockProcessorTest < Test::Unit::TestCase
9
+
10
+ def test_block_processor_should_work_as_both_after_read_and_before_write_row_processor
11
+ MockSource[:block_processed_input] = [{ :first_name => 'John'},{:first_name => 'Gary'}]
12
+ process 'block_processor.ctl'
13
+ assert_equal 4, MockDestination[:block_processed_output].size
14
+ assert_equal({ :first_name => 'John', :added_by_after_read => 'after-John', :added_by_before_write => "Row 1" }, MockDestination[:block_processed_output][0])
15
+ assert_equal({ :new_row => 'added by post_processor' }, MockDestination[:block_processed_output][1])
16
+ assert_equal({ :first_name => 'Gary', :added_by_after_read => 'after-Gary', :added_by_before_write => "Row 2" }, MockDestination[:block_processed_output][2])
17
+ assert_equal({ :new_row => 'added by post_processor' }, MockDestination[:block_processed_output][3])
18
+ end
19
+
20
+ def test_block_processor_should_let_rows_be_removed_by_setting_it_to_nil
21
+ MockSource[:block_input] = [{ :obsolete => true, :name => 'John'},{ :obsolete => false, :name => 'Gary'}]
22
+ process 'block_processor_remove_rows.ctl'
23
+ assert_equal([{ :obsolete => false, :name => 'Gary' }], MockDestination[:block_output]) # only one record should be kept
24
+ end
25
+
26
+ def test_block_processor_should_work_as_pre_or_post_processor
27
+ flexmock(TestWitness).should_receive(:call).with("I'm called from pre_process")
28
+ flexmock(TestWitness).should_receive(:call).with("I'm called from post_process")
29
+ MockSource[:another_input] = [{ :obsolete => true, :name => 'John'},{ :obsolete => false, :name => 'Gary'}]
30
+ process 'block_processor_pre_post_process.ctl'
31
+ assert_equal(MockSource[:another_input], MockDestination[:another_output])
32
+ end
33
+
34
+ def test_block_error_should_be_propagated
35
+ assert_raise(ControlError) { process 'block_processor_error.ctl' }
36
+ end
37
+
38
+ end