activewarehouse-etl-sgonyea 0.9.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (200) hide show
  1. data/.gitignore +9 -0
  2. data/0.9-UPGRADE +6 -0
  3. data/CHANGELOG +236 -0
  4. data/Gemfile +4 -0
  5. data/HOW_TO_RELEASE +13 -0
  6. data/LICENSE +7 -0
  7. data/README.textile +111 -0
  8. data/Rakefile +103 -0
  9. data/TODO +28 -0
  10. data/active_support_logger.patch +78 -0
  11. data/activewarehouse-etl.gemspec +36 -0
  12. data/bin/etl +28 -0
  13. data/bin/etl.cmd +8 -0
  14. data/examples/database.example.yml +16 -0
  15. data/lib/etl.rb +97 -0
  16. data/lib/etl/batch.rb +2 -0
  17. data/lib/etl/batch/batch.rb +111 -0
  18. data/lib/etl/batch/directives.rb +65 -0
  19. data/lib/etl/builder.rb +2 -0
  20. data/lib/etl/builder/date_dimension_builder.rb +96 -0
  21. data/lib/etl/builder/time_dimension_builder.rb +31 -0
  22. data/lib/etl/commands/etl.rb +89 -0
  23. data/lib/etl/control.rb +3 -0
  24. data/lib/etl/control/control.rb +405 -0
  25. data/lib/etl/control/destination.rb +438 -0
  26. data/lib/etl/control/destination/csv_destination.rb +113 -0
  27. data/lib/etl/control/destination/database_destination.rb +97 -0
  28. data/lib/etl/control/destination/excel_destination.rb +91 -0
  29. data/lib/etl/control/destination/file_destination.rb +126 -0
  30. data/lib/etl/control/destination/insert_update_database_destination.rb +136 -0
  31. data/lib/etl/control/destination/update_database_destination.rb +109 -0
  32. data/lib/etl/control/destination/yaml_destination.rb +74 -0
  33. data/lib/etl/control/source.rb +132 -0
  34. data/lib/etl/control/source/database_source.rb +224 -0
  35. data/lib/etl/control/source/enumerable_source.rb +11 -0
  36. data/lib/etl/control/source/file_source.rb +90 -0
  37. data/lib/etl/control/source/model_source.rb +39 -0
  38. data/lib/etl/core_ext.rb +1 -0
  39. data/lib/etl/core_ext/time.rb +5 -0
  40. data/lib/etl/core_ext/time/calculations.rb +42 -0
  41. data/lib/etl/engine.rb +582 -0
  42. data/lib/etl/execution.rb +19 -0
  43. data/lib/etl/execution/base.rb +8 -0
  44. data/lib/etl/execution/batch.rb +10 -0
  45. data/lib/etl/execution/job.rb +8 -0
  46. data/lib/etl/execution/migration.rb +90 -0
  47. data/lib/etl/generator.rb +2 -0
  48. data/lib/etl/generator/generator.rb +20 -0
  49. data/lib/etl/generator/surrogate_key_generator.rb +39 -0
  50. data/lib/etl/http_tools.rb +139 -0
  51. data/lib/etl/parser.rb +11 -0
  52. data/lib/etl/parser/apache_combined_log_parser.rb +49 -0
  53. data/lib/etl/parser/csv_parser.rb +93 -0
  54. data/lib/etl/parser/excel_parser.rb +112 -0
  55. data/lib/etl/parser/fixed_width_parser.rb +65 -0
  56. data/lib/etl/parser/nokogiri_xml_parser.rb +83 -0
  57. data/lib/etl/parser/parser.rb +41 -0
  58. data/lib/etl/parser/sax_parser.rb +218 -0
  59. data/lib/etl/parser/xml_parser.rb +65 -0
  60. data/lib/etl/processor.rb +11 -0
  61. data/lib/etl/processor/block_processor.rb +14 -0
  62. data/lib/etl/processor/bulk_import_processor.rb +94 -0
  63. data/lib/etl/processor/check_exist_processor.rb +80 -0
  64. data/lib/etl/processor/check_unique_processor.rb +39 -0
  65. data/lib/etl/processor/copy_field_processor.rb +26 -0
  66. data/lib/etl/processor/database_join_processor.rb +82 -0
  67. data/lib/etl/processor/encode_processor.rb +55 -0
  68. data/lib/etl/processor/ensure_fields_presence_processor.rb +24 -0
  69. data/lib/etl/processor/escape_csv_processor.rb +77 -0
  70. data/lib/etl/processor/filter_row_processor.rb +51 -0
  71. data/lib/etl/processor/ftp_downloader_processor.rb +68 -0
  72. data/lib/etl/processor/ftp_uploader_processor.rb +65 -0
  73. data/lib/etl/processor/hierarchy_exploder_processor.rb +55 -0
  74. data/lib/etl/processor/imapattachment_downloader_processor.rb +91 -0
  75. data/lib/etl/processor/pop3attachment_downloader_processor.rb +90 -0
  76. data/lib/etl/processor/print_row_processor.rb +12 -0
  77. data/lib/etl/processor/processor.rb +25 -0
  78. data/lib/etl/processor/rename_processor.rb +24 -0
  79. data/lib/etl/processor/require_non_blank_processor.rb +26 -0
  80. data/lib/etl/processor/row_processor.rb +27 -0
  81. data/lib/etl/processor/sequence_processor.rb +23 -0
  82. data/lib/etl/processor/sftp_downloader_processor.rb +63 -0
  83. data/lib/etl/processor/sftp_uploader_processor.rb +63 -0
  84. data/lib/etl/processor/surrogate_key_processor.rb +53 -0
  85. data/lib/etl/processor/truncate_processor.rb +40 -0
  86. data/lib/etl/processor/zip_file_processor.rb +27 -0
  87. data/lib/etl/row.rb +20 -0
  88. data/lib/etl/screen.rb +14 -0
  89. data/lib/etl/screen/row_count_screen.rb +20 -0
  90. data/lib/etl/transform.rb +2 -0
  91. data/lib/etl/transform/block_transform.rb +13 -0
  92. data/lib/etl/transform/calculation_transform.rb +71 -0
  93. data/lib/etl/transform/date_to_string_transform.rb +20 -0
  94. data/lib/etl/transform/decode_transform.rb +51 -0
  95. data/lib/etl/transform/default_transform.rb +20 -0
  96. data/lib/etl/transform/foreign_key_lookup_transform.rb +211 -0
  97. data/lib/etl/transform/hierarchy_lookup_transform.rb +49 -0
  98. data/lib/etl/transform/md5_transform.rb +13 -0
  99. data/lib/etl/transform/ordinalize_transform.rb +14 -0
  100. data/lib/etl/transform/sha1_transform.rb +13 -0
  101. data/lib/etl/transform/split_fields_transform.rb +27 -0
  102. data/lib/etl/transform/string_to_date_time_transform.rb +14 -0
  103. data/lib/etl/transform/string_to_date_transform.rb +16 -0
  104. data/lib/etl/transform/string_to_time_transform.rb +11 -0
  105. data/lib/etl/transform/transform.rb +61 -0
  106. data/lib/etl/transform/trim_transform.rb +26 -0
  107. data/lib/etl/transform/type_transform.rb +35 -0
  108. data/lib/etl/util.rb +59 -0
  109. data/lib/etl/version.rb +3 -0
  110. data/test-matrix.yml +10 -0
  111. data/test/.gitignore +1 -0
  112. data/test/.ignore +2 -0
  113. data/test/all.ebf +6 -0
  114. data/test/apache_combined_log.ctl +11 -0
  115. data/test/batch_test.rb +41 -0
  116. data/test/batch_with_error.ebf +6 -0
  117. data/test/batched1.ctl +0 -0
  118. data/test/batched2.ctl +0 -0
  119. data/test/block_processor.ctl +6 -0
  120. data/test/block_processor_error.ctl +1 -0
  121. data/test/block_processor_pre_post_process.ctl +4 -0
  122. data/test/block_processor_remove_rows.ctl +5 -0
  123. data/test/block_processor_test.rb +38 -0
  124. data/test/check_exist_processor_test.rb +92 -0
  125. data/test/check_unique_processor_test.rb +40 -0
  126. data/test/config/Gemfile.rails-2.3.x +3 -0
  127. data/test/config/Gemfile.rails-2.3.x.lock +53 -0
  128. data/test/config/Gemfile.rails-3.0.x +3 -0
  129. data/test/config/Gemfile.rails-3.0.x.lock +61 -0
  130. data/test/config/common.rb +29 -0
  131. data/test/connection/mysql/connection.rb +9 -0
  132. data/test/connection/mysql/schema.sql +37 -0
  133. data/test/connection/postgresql/connection.rb +13 -0
  134. data/test/connection/postgresql/schema.sql +40 -0
  135. data/test/control_test.rb +43 -0
  136. data/test/data/apache_combined_log.txt +3 -0
  137. data/test/data/bulk_import.txt +3 -0
  138. data/test/data/bulk_import_with_empties.txt +3 -0
  139. data/test/data/decode.txt +3 -0
  140. data/test/data/delimited.txt +3 -0
  141. data/test/data/encode_source_latin1.txt +2 -0
  142. data/test/data/excel.xls +0 -0
  143. data/test/data/excel2.xls +0 -0
  144. data/test/data/fixed_width.txt +3 -0
  145. data/test/data/multiple_delimited_1.txt +3 -0
  146. data/test/data/multiple_delimited_2.txt +3 -0
  147. data/test/data/nokogiri.xml +38 -0
  148. data/test/data/people.txt +3 -0
  149. data/test/data/sax.xml +14 -0
  150. data/test/data/xml.xml +16 -0
  151. data/test/database_join_processor_test.rb +43 -0
  152. data/test/date_dimension_builder_test.rb +96 -0
  153. data/test/delimited.ctl +30 -0
  154. data/test/delimited_absolute.ctl +31 -0
  155. data/test/delimited_destination_db.ctl +23 -0
  156. data/test/delimited_excel.ctl +31 -0
  157. data/test/delimited_insert_update.ctl +34 -0
  158. data/test/delimited_update.ctl +34 -0
  159. data/test/delimited_with_bulk_load.ctl +34 -0
  160. data/test/destination_test.rb +275 -0
  161. data/test/directive_test.rb +23 -0
  162. data/test/encode_processor_test.rb +32 -0
  163. data/test/engine_test.rb +78 -0
  164. data/test/ensure_fields_presence_processor_test.rb +28 -0
  165. data/test/errors.ctl +24 -0
  166. data/test/etl_test.rb +42 -0
  167. data/test/excel.ctl +24 -0
  168. data/test/excel2.ctl +25 -0
  169. data/test/fixed_width.ctl +35 -0
  170. data/test/foreign_key_lookup_transform_test.rb +50 -0
  171. data/test/generator_test.rb +14 -0
  172. data/test/inline_parser.ctl +17 -0
  173. data/test/mocks/mock_destination.rb +26 -0
  174. data/test/mocks/mock_source.rb +25 -0
  175. data/test/model_source.ctl +14 -0
  176. data/test/multiple_delimited.ctl +22 -0
  177. data/test/multiple_source_delimited.ctl +39 -0
  178. data/test/nokogiri_all.ctl +35 -0
  179. data/test/nokogiri_select.ctl +35 -0
  180. data/test/nokogiri_test.rb +35 -0
  181. data/test/parser_test.rb +224 -0
  182. data/test/performance/delimited.ctl +30 -0
  183. data/test/processor_test.rb +44 -0
  184. data/test/row_processor_test.rb +17 -0
  185. data/test/sax.ctl +26 -0
  186. data/test/scd/1.txt +1 -0
  187. data/test/scd/2.txt +1 -0
  188. data/test/scd/3.txt +1 -0
  189. data/test/scd_test.rb +257 -0
  190. data/test/scd_test_type_1.ctl +43 -0
  191. data/test/scd_test_type_2.ctl +34 -0
  192. data/test/screen_test.rb +9 -0
  193. data/test/screen_test_error.ctl +3 -0
  194. data/test/screen_test_fatal.ctl +3 -0
  195. data/test/source_test.rb +154 -0
  196. data/test/test_helper.rb +37 -0
  197. data/test/transform_test.rb +101 -0
  198. data/test/truncate_processor_test.rb +37 -0
  199. data/test/xml.ctl +31 -0
  200. metadata +370 -0
@@ -0,0 +1,24 @@
1
+ module ETL
2
+ module Processor
3
+ # Ensure that each specified field is available
4
+ class EnsureFieldsPresenceProcessor < ETL::Processor::RowProcessor
5
+
6
+ # Initialize the processor.
7
+ #
8
+ # Configuration options:
9
+ # * <tt>:fields</tt>: An array of keys whose presence should be verified in each row
10
+ def initialize(control, configuration)
11
+ super
12
+ @fields = configuration[:fields]
13
+ raise ControlError, ":fields must be specified" unless @fields
14
+ end
15
+
16
+ def process(row)
17
+ missing_fields = configuration[:fields] - row.keys
18
+ raise(ETL::ControlError,
19
+ "Row missing required field(s) #{missing_fields.join(',')} in row. Available fields are : #{row.keys.join(',')}") unless missing_fields.empty?
20
+ row
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,77 @@
1
+ require 'fileutils'
2
+
3
+ module ETL #:nodoc:
4
+ module Processor #:nodoc:
5
+ class EscapeCsvProcessor < ETL::Processor::Processor
6
+
7
+ # The file to load from
8
+ attr_reader :source_file
9
+ # The file to write to
10
+ attr_reader :target_file
11
+ # whether to use a temporary file or not
12
+ attr_reader :use_temp_file
13
+
14
+ attr_reader :filters
15
+ attr_reader :charcount
16
+
17
+ # Initialize the processor.
18
+ #
19
+ # Configuration options:
20
+ # * <tt>:source_file</tt>: The file to load data from
21
+ # * <tt>:target_file</tt>: The file to write data to
22
+ # * <tt>:file</tt>: short-cut which will set the same value to both source_file and target_file
23
+ def initialize(control, configuration)
24
+ super
25
+ if configuration[:file]
26
+ @use_temp_file = true
27
+ configuration[:source_file] = configuration[:file]
28
+ configuration[:target_file] = configuration[:file] + '.tmp'
29
+ end
30
+ path = Pathname.new(configuration[:source_file])
31
+ @source_file = path.absolute? ? path : Pathname.new(File.dirname(File.expand_path(configuration[:source_file]))) + path
32
+ path = Pathname.new(configuration[:target_file])
33
+ @target_file = path.absolute? ? path : Pathname.new(File.dirname(File.expand_path(configuration[:target_file]))) + path
34
+ @filters = configuration[:filters] || [{:replace => '\"', :result => '""'}]
35
+ @charcount = configuration[:charcount]
36
+ raise ControlError, "Source file must be specified" if @source_file.nil?
37
+ raise ControlError, "Target file must be specified" if @target_file.nil?
38
+ raise ControlError, "Source and target file cannot currently point to the same file" if @source_file == @target_file
39
+ end
40
+
41
+ # Execute the processor
42
+ def process
43
+ reader = File.open(@source_file, 'r')
44
+ writer = File.open(@target_file, 'w')
45
+
46
+ reader.each_line do |line|
47
+ reading = line
48
+ @filters.each do |filter|
49
+ if (!filter[:replace].nil? &&
50
+ !filter[:result].nil?)
51
+ result = reading.gsub(Regexp.new(filter[:replace]), filter[:result])
52
+ reading = result
53
+ end
54
+ end unless @filters.nil?
55
+ @charcount.each do |count|
56
+ if (!count[:char].nil? &&
57
+ !count[:count].nil?)
58
+ c = reading.count count[:char]
59
+ if c != count[:count]
60
+ reading = nil
61
+ end
62
+ end
63
+ end unless @charcount.nil?
64
+ writer.write(reading) unless reading.nil?
65
+ end
66
+
67
+ reader.close
68
+ writer.close
69
+
70
+ if use_temp_file
71
+ FileUtils.rm(source_file)
72
+ FileUtils.mv(target_file,source_file)
73
+ end
74
+ end
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,51 @@
1
+ module ETL
2
+ module Processor
3
+ class FilterRowProcessor < ETL::Processor::RowProcessor
4
+ attr_reader :condition
5
+ attr_reader :outtrue
6
+ attr_reader :outfalse
7
+
8
+ def initialize(control, configuration)
9
+ @condition = configuration[:condition]
10
+ @outtrue = configuration[:outtrue]
11
+ @outfalse = configuration[:outfalse]
12
+ super
13
+ end
14
+
15
+ def process(row)
16
+ return nil if row.nil?
17
+
18
+ if eval_condition(row, @condition)
19
+ return [] if @outtrue.nil?
20
+
21
+ eval(@outtrue)
22
+ else
23
+ eval(@outfalse) unless @outfalse.nil?
24
+ end
25
+
26
+ return row
27
+ end
28
+
29
+ private
30
+ def eval_condition(row, cond)
31
+
32
+ first = cond[1]
33
+ if (cond[1].class == Array)
34
+ first = eval_condition(row, cond[1])
35
+ end
36
+
37
+ second = cond[2]
38
+ if (cond[2].class == Array)
39
+ second = eval_condition(row, cond[2])
40
+ end
41
+
42
+ return eval("#{cond[0]}#{first}#{second}") if cond[0] == "!"
43
+
44
+ eval("#{first}#{cond[0]}#{second}")
45
+ rescue => e
46
+ return false
47
+ end
48
+
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,68 @@
1
+ # Written by Susan Potter under open source MIT license.
2
+ # August 12, 2007.
3
+
4
+ require 'net/ftp'
5
+
6
+ module ETL
7
+ module Processor
8
+ # Custom processor to download files via FTP
9
+ class FtpDownloaderProcessor < ETL::Processor::Processor
10
+ attr_reader :host
11
+ attr_reader :port
12
+ attr_reader :remote_dir
13
+ attr_reader :files
14
+ attr_reader :username
15
+ attr_reader :local_dir
16
+
17
+ # configuration options include:
18
+ # * host - hostname or IP address of FTP server (required)
19
+ # * port - port number for FTP server (default: 21)
20
+ # * remote_dir - remote path on FTP server (default: /)
21
+ # * files - list of files to download from FTP server (default: [])
22
+ # * username - username for FTP server authentication (default: anonymous)
23
+ # * password - password for FTP server authentication (default: nil)
24
+ # * local_dir - local output directory to save downloaded files (default: '')
25
+ #
26
+ # As an example you might write something like the following in your control process file:
27
+ # pre_process :ftp_downloader, {
28
+ # :host => 'ftp.sec.gov',
29
+ # :path => 'edgar/Feed/2007/QTR2',
30
+ # :files => ['20070402.nc.tar.gz', '20070403.nc.tar.gz', '20070404.nc.tar.gz',
31
+ # '20070405.nc.tar.gz', '20070406.nc.tar.gz'],
32
+ # :local_dir => '/data/sec/2007/04',
33
+ # }
34
+ # The above example will anonymously download via FTP the first week's worth of SEC filing feed data
35
+ # from the second quarter of 2007 and download the files to the local directory +/data/sec/2007/04+.
36
+ def initialize(control, configuration)
37
+ @host = configuration[:host]
38
+ @port = configuration[:port] || 21
39
+ @remote_dir = configuration[:remote_dir] || '/'
40
+ @files = configuration[:files] || []
41
+ @username = configuration[:username] || 'anonymous'
42
+ @password = configuration[:password]
43
+ @local_dir = configuration[:local_dir] || ''
44
+ end
45
+
46
+ def process
47
+ Net::FTP.open(@host) do |conn|
48
+ conn.connect(@host, @port)
49
+ conn.login(@username, @password)
50
+ @files.each do |f|
51
+ conn.getbinaryfile(remote_file(f), local_file(f))
52
+ end
53
+ end
54
+ end
55
+
56
+ private
57
+ attr_accessor :password
58
+
59
+ def local_file(name)
60
+ File.join(@local_dir, name)
61
+ end
62
+
63
+ def remote_file(name)
64
+ File.join(@remote_dir, name)
65
+ end
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,65 @@
1
+ require 'net/ftp'
2
+
3
+ module ETL
4
+ module Processor
5
+ # Custom processor to download files via FTP
6
+ class FtpUploaderProcessor < ETL::Processor::Processor
7
+ attr_reader :host
8
+ attr_reader :port
9
+ attr_reader :remote_dir
10
+ attr_reader :files
11
+ attr_reader :username
12
+ attr_reader :local_dir
13
+
14
+ # configuration options include:
15
+ # * host - hostname or IP address of FTP server (required)
16
+ # * port - port number for FTP server (default: 21)
17
+ # * remote_dir - remote path on FTP server (default: /)
18
+ # * files - list of files to download from FTP server (default: [])
19
+ # * username - username for FTP server authentication (default: anonymous)
20
+ # * password - password for FTP server authentication (default: nil)
21
+ # * local_dir - local output directory to save downloaded files (default: '')
22
+ #
23
+ # As an example you might write something like the following in your control process file:
24
+ # pre_process :ftp_uploader, {
25
+ # :host => 'ftp.sec.gov',
26
+ # :path => 'edgar/Feed/2007/QTR2',
27
+ # :files => ['20070402.nc.tar.gz', '20070403.nc.tar.gz', '20070404.nc.tar.gz',
28
+ # '20070405.nc.tar.gz', '20070406.nc.tar.gz'],
29
+ # :local_dir => '/data/sec/2007/04',
30
+ # }
31
+ # The above example will anonymously download via FTP the first week's worth of SEC filing feed data
32
+ # from the second quarter of 2007 and download the files to the local directory +/data/sec/2007/04+.
33
+ def initialize(control, configuration)
34
+ @host = configuration[:host]
35
+ @port = configuration[:port] || 21
36
+ @remote_dir = configuration[:remote_dir] || '/'
37
+ @files = configuration[:files] || []
38
+ @username = configuration[:username] || 'anonymous'
39
+ @password = configuration[:password]
40
+ @local_dir = configuration[:local_dir] || ''
41
+ end
42
+
43
+ def process
44
+ Net::FTP.open(@host) do |conn|
45
+ conn.connect(@host, @port)
46
+ conn.login(@username, @password)
47
+ @files.each do |f|
48
+ conn.putbinaryfile(local_file(f), remote_file(f))
49
+ end
50
+ end
51
+ end
52
+
53
+ private
54
+ attr_accessor :password
55
+
56
+ def local_file(name)
57
+ File.join(@local_dir, name)
58
+ end
59
+
60
+ def remote_file(name)
61
+ File.join(@remote_dir, name)
62
+ end
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,55 @@
1
+ module ETL #:nodoc:
2
+ module Processor #:nodoc:
3
+ # Row-level processor that will convert a single row into multiple rows designed to be inserted
4
+ # into a hierarchy bridge table.
5
+ class HierarchyExploderProcessor < ETL::Processor::RowProcessor
6
+ attr_accessor :id_field
7
+ attr_accessor :parent_id_field
8
+
9
+ # Initialize the processor
10
+ #
11
+ # Configuration options:
12
+ # * <tt>:connection</tt>: The ActiveRecord adapter connection
13
+ # * <tt>:id_field</tt>: The name of the id field (defaults to 'id')
14
+ # * <tt>:parent_id_field</tt>: The name of the parent id field (defaults to 'parent_id')
15
+ #
16
+ # TODO: Allow resolver to be implemented in a customizable fashion, i.e. don't rely
17
+ # on AR as the only resolution method.
18
+ def initialize(control, configuration={})
19
+ @id_field = configuration[:id_field] || 'id'
20
+ @parent_id_field = configuration[:parent_id_field] || 'parent_id'
21
+ super
22
+ end
23
+
24
+ # Process the row expanding it into hierarchy values
25
+ def process(row)
26
+ rows = []
27
+ target = configuration[:target]
28
+ table = configuration[:table]
29
+ conn = ETL::Engine.connection(target)
30
+ build_rows([row[:id]], row[:id], row[:id], row[:parent_id].nil?, 0, rows, table, conn)
31
+ rows
32
+ end
33
+
34
+ protected
35
+ # Recursive function that will add a row for the current level and then call build_rows
36
+ # for all of the children of the current level
37
+ def build_rows(ids, parent_id, row_id, root, level, rows, table, conn)
38
+ ids.each do |id|
39
+ child_ids = conn.select_values("SELECT #{id_field} FROM #{table} WHERE #{parent_id_field} = #{id}")
40
+
41
+ row = {
42
+ :parent_id => row_id,
43
+ :child_id => id,
44
+ :num_levels_from_parent => level,
45
+ :is_bottom => (child_ids.empty? ? 1 : 0),
46
+ :is_top => (root ? 1 : 0),
47
+ }
48
+ rows << row
49
+
50
+ build_rows(child_ids, id, row_id, false, level + 1, rows, table, conn)
51
+ end
52
+ end
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,91 @@
1
+ optional_require 'net/imap'
2
+ optional_require 'tmail'
3
+
4
+ module ETL
5
+ module Processor
6
+ # Custom processor to download files via Imap Attachment
7
+ class ImapattachmentDownloaderProcessor < ETL::Processor::Processor
8
+ attr_reader :host
9
+ attr_reader :ssl
10
+ attr_reader :port
11
+ attr_reader :delete
12
+ attr_reader :filters
13
+ attr_reader :folder
14
+ attr_reader :username
15
+ attr_reader :local_dir
16
+
17
+ # configuration options include:
18
+ # * host - hostname or IP address of IMAP server (required)
19
+ # * ssl - activate encryption (default false)
20
+ # * port - port number for IMAP server (default: 220 or 993)
21
+ # * delete - delete message after reading (default false)
22
+ # * filters - filter mails (default [])
23
+ # * folder - folder to select mails from (default INBOX)
24
+ # * username - username for IMAP server authentication (default: anonymous)
25
+ # * password - password for IMAP server authentication (default: nil)
26
+ # * local_dir - local output directory to save downloaded files (default: '')
27
+ #
28
+ def initialize(control, configuration)
29
+ @host = configuration[:host]
30
+ @ssl = configuration[:ssl] || false
31
+ @port = configuration[:port] || (@ssl ? 993 : 220 )
32
+ @delete = configuration[:delete] || false
33
+ @filters = configuration[:filters] || []
34
+ @folder = configuration[:folder] || 'INBOX'
35
+ @username = configuration[:username] || 'anonymous'
36
+ @password = configuration[:password]
37
+ @local_dir = configuration[:local_dir] || ''
38
+ end
39
+
40
+ def process
41
+ conn = Net::IMAP.new(@host, @port, @ssl)
42
+ conn.login(@username, @password)
43
+
44
+ conn.select(@folder)
45
+ conn.uid_search(["NOT", "DELETED"]).each do |msguuid|
46
+ mail = TMail::Mail.parse( conn.uid_fetch(msguuid, 'RFC822').first.attr['RFC822'] )
47
+ next if mail.attachments.blank?
48
+ if applyfilter(mail, @filters)
49
+ mail.attachments.each do |attachment|
50
+ filename = attachment.original_filename
51
+ File.open(local_file(filename), "w") {|f|
52
+ f << attachment.gets(nil)
53
+ }
54
+ end
55
+
56
+ conn.store(msguuid, "+FLAGS", [:Deleted]) if @delete
57
+ end
58
+ end
59
+ conn.expunge
60
+ conn.close
61
+ end
62
+
63
+ private
64
+ attr_accessor :password
65
+
66
+ def local_file(name)
67
+ File.join(@local_dir, name)
68
+ end
69
+
70
+ def applyfilter(mail, cond)
71
+ return true if (cond.nil? or cond.size < 3)
72
+
73
+ first = cond[1]
74
+ if (cond[1].class == Array)
75
+ first = eval_condition(row, cond[1])
76
+ end
77
+
78
+ second = cond[2]
79
+ if (cond[2].class == Array)
80
+ second = eval_condition(row, cond[2])
81
+ end
82
+
83
+ return eval("#{cond[0]}#{first}#{second}") if cond[0] == "!"
84
+
85
+ eval("#{first}#{cond[0]}#{second}")
86
+ rescue => e
87
+ return false
88
+ end
89
+ end
90
+ end
91
+ end
@@ -0,0 +1,90 @@
1
+ optional_require 'net/pop'
2
+ optional_require 'tmail'
3
+
4
+ module ETL
5
+ module Processor
6
+ # Custom processor to download files via Pop3 Attachment
7
+ class Pop3attachmentDownloaderProcessor < ETL::Processor::Processor
8
+ attr_reader :host
9
+ attr_reader :ssl
10
+ attr_reader :port
11
+ attr_reader :delete
12
+ attr_reader :filters
13
+ attr_reader :username
14
+ attr_reader :local_dir
15
+
16
+ # configuration options include:
17
+ # * host - hostname or IP address of POP3 server (required)
18
+ # * ssl - activate encryption (default false)
19
+ # * port - port number for POP3 server (default: Net::POP3.default_port or Net::POP3.default_pop3s_port)
20
+ # * delete - delete message after reading (default false)
21
+ # * filters - filter mails (default [])
22
+ # * username - username for POP3 server authentication (default: anonymous)
23
+ # * password - password for POP3 server authentication (default: nil)
24
+ # * local_dir - local output directory to save downloaded files (default: '')
25
+ #
26
+ def initialize(control, configuration)
27
+ @host = configuration[:host]
28
+ @ssl = configuration[:ssl] || false
29
+ @port = configuration[:port] || (@ssl ? Net::POP3.default_pop3s_port : Net::POP3.default_port )
30
+ @delete = configuration[:delete] || false
31
+ @filters = configuration[:filters] || []
32
+ @username = configuration[:username] || 'anonymous'
33
+ @password = configuration[:password]
34
+ @local_dir = configuration[:local_dir] || ''
35
+ end
36
+
37
+ def process
38
+ Net::POP3.enable_ssl(OpenSSL::SSL::VERIFY_NONE) if @ssl
39
+ conn = Net::POP3.new(@host, @port)
40
+ conn.start(@username, @password)
41
+ if !conn.mails.empty?
42
+ conn.each_mail do |message|
43
+ stringmail = message.pop
44
+ mail = TMail::Mail.parse(stringmail)
45
+ next if mail.attachments.blank?
46
+ if applyfilter(mail, @filters)
47
+ mail.attachments.each do |attachment|
48
+ filename = attachment.original_filename
49
+ File.open(local_file(filename), "w") {|f|
50
+ f << attachment.gets(nil)
51
+ }
52
+ end
53
+
54
+ message.delete if @delete
55
+ end
56
+ end
57
+ end
58
+
59
+ conn.finish
60
+ end
61
+
62
+ private
63
+ attr_accessor :password
64
+
65
+ def local_file(name)
66
+ File.join(@local_dir, name)
67
+ end
68
+
69
+ def applyfilter(mail, cond)
70
+ return true if (cond.nil? or cond.size < 3)
71
+
72
+ first = cond[1]
73
+ if (cond[1].class == Array)
74
+ first = eval_condition(row, cond[1])
75
+ end
76
+
77
+ second = cond[2]
78
+ if (cond[2].class == Array)
79
+ second = eval_condition(row, cond[2])
80
+ end
81
+
82
+ return eval("#{cond[0]}#{first}#{second}") if cond[0] == "!"
83
+
84
+ eval("#{first}#{cond[0]}#{second}")
85
+ rescue => e
86
+ return false
87
+ end
88
+ end
89
+ end
90
+ end