activewarehouse-etl 0.9.0 → 0.9.1
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +22 -2
- data/README +12 -0
- data/Rakefile +64 -59
- data/bin/etl +0 -0
- data/bin/etl.cmd +8 -0
- data/examples/database.example.yml +11 -1
- data/lib/etl.rb +9 -21
- data/lib/etl/builder.rb +2 -1
- data/lib/etl/builder/date_dimension_builder.rb +67 -54
- data/lib/etl/builder/time_dimension_builder.rb +31 -0
- data/lib/etl/commands/etl.rb +1 -2
- data/lib/etl/control/control.rb +46 -18
- data/lib/etl/control/destination.rb +201 -138
- data/lib/etl/control/destination/database_destination.rb +10 -5
- data/lib/etl/control/source.rb +1 -1
- data/lib/etl/control/source/database_source.rb +8 -10
- data/lib/etl/core_ext/time/calculations.rb +4 -2
- data/lib/etl/engine.rb +35 -10
- data/lib/etl/execution/migration.rb +21 -9
- data/lib/etl/generator/generator.rb +1 -1
- data/lib/etl/http_tools.rb +21 -7
- data/lib/etl/parser/apache_combined_log_parser.rb +3 -1
- data/lib/etl/parser/delimited_parser.rb +1 -1
- data/lib/etl/parser/parser.rb +1 -1
- data/lib/etl/processor/block_processor.rb +14 -0
- data/lib/etl/processor/bulk_import_processor.rb +5 -1
- data/lib/etl/processor/check_exist_processor.rb +1 -0
- data/lib/etl/processor/encode_processor.rb +55 -0
- data/lib/etl/transform/date_to_string_transform.rb +1 -0
- data/lib/etl/transform/foreign_key_lookup_transform.rb +67 -2
- data/lib/etl/transform/string_to_date_transform.rb +6 -1
- data/lib/etl/transform/string_to_datetime_transform.rb +1 -1
- data/lib/etl/transform/string_to_time_transform.rb +1 -1
- data/lib/etl/version.rb +1 -1
- metadata +94 -78
@@ -47,10 +47,7 @@ module ETL #:nodoc:
|
|
47
47
|
|
48
48
|
# Flush the currently buffered data
|
49
49
|
def flush
|
50
|
-
conn = ETL::Engine.connection(target)
|
51
50
|
conn.transaction do
|
52
|
-
conn.truncate(table_name) if truncate
|
53
|
-
|
54
51
|
buffer.flatten.each do |row|
|
55
52
|
# check to see if this row's compound key constraint already exists
|
56
53
|
# note that the compound key constraint may not utilize virtual fields
|
@@ -62,10 +59,10 @@ module ETL #:nodoc:
|
|
62
59
|
names = []
|
63
60
|
values = []
|
64
61
|
order.each do |name|
|
65
|
-
names << name
|
62
|
+
names << "`#{name}`"
|
66
63
|
values << conn.quote(row[name]) # TODO: this is probably not database agnostic
|
67
64
|
end
|
68
|
-
q = "INSERT INTO
|
65
|
+
q = "INSERT INTO `#{table_name}` (#{names.join(',')}) VALUES (#{values.join(',')})"
|
69
66
|
ETL::Engine.logger.debug("Executing insert: #{q}")
|
70
67
|
conn.insert(q, "Insert row #{current_row}")
|
71
68
|
@current_row += 1
|
@@ -81,6 +78,14 @@ module ETL #:nodoc:
|
|
81
78
|
end
|
82
79
|
|
83
80
|
private
|
81
|
+
def conn
|
82
|
+
@conn ||= begin
|
83
|
+
conn = ETL::Engine.connection(target)
|
84
|
+
conn.truncate(table_name) if truncate
|
85
|
+
conn
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
84
89
|
def table_name
|
85
90
|
ETL::Engine.table(table, ETL::Engine.connection(target))
|
86
91
|
end
|
data/lib/etl/control/source.rb
CHANGED
@@ -23,7 +23,7 @@ module ETL #:nodoc:
|
|
23
23
|
# For example if name is :database then this will return a
|
24
24
|
# DatabaseSource class
|
25
25
|
def class_for_name(name)
|
26
|
-
ETL::Control.const_get("#{name.to_s.
|
26
|
+
ETL::Control.const_get("#{name.to_s.camelize}Source")
|
27
27
|
end
|
28
28
|
end
|
29
29
|
|
@@ -93,14 +93,8 @@ module ETL #:nodoc:
|
|
93
93
|
# Get the list of columns to read. This is defined in the source
|
94
94
|
# definition as either an Array or Hash
|
95
95
|
def columns
|
96
|
-
|
97
|
-
|
98
|
-
definition.collect(&:to_sym)
|
99
|
-
when Hash
|
100
|
-
definition.keys.collect(&:to_sym)
|
101
|
-
else
|
102
|
-
raise "Definition must be either an Array or a Hash"
|
103
|
-
end
|
96
|
+
# weird default is required for writing to cache correctly
|
97
|
+
@columns ||= query_rows.any? ? query_rows.first.keys : ['']
|
104
98
|
end
|
105
99
|
|
106
100
|
# Returns each row from the source. If read_locally is specified then
|
@@ -118,7 +112,7 @@ module ETL #:nodoc:
|
|
118
112
|
write_local(file)
|
119
113
|
read_rows(file, &block)
|
120
114
|
else
|
121
|
-
|
115
|
+
query_rows.each do |row|
|
122
116
|
row = ETL::Row.new(row.symbolize_keys)
|
123
117
|
row.source = self
|
124
118
|
yield row
|
@@ -158,7 +152,7 @@ module ETL #:nodoc:
|
|
158
152
|
t = Benchmark.realtime do
|
159
153
|
FasterCSV.open(file, 'w') do |f|
|
160
154
|
f << columns
|
161
|
-
|
155
|
+
query_rows.each do |row|
|
162
156
|
f << columns.collect { |column| row[column.to_s] }
|
163
157
|
lines += 1
|
164
158
|
end
|
@@ -204,6 +198,10 @@ module ETL #:nodoc:
|
|
204
198
|
@query = q
|
205
199
|
end
|
206
200
|
|
201
|
+
def query_rows
|
202
|
+
@query_rows ||= connection.select_all(query)
|
203
|
+
end
|
204
|
+
|
207
205
|
# Get the database connection to use
|
208
206
|
def connection
|
209
207
|
ETL::Engine.connection(target)
|
@@ -1,3 +1,5 @@
|
|
1
|
+
#Updated by Jack Hong on 04/05/08
|
2
|
+
|
1
3
|
module ETL #:nodoc:
|
2
4
|
module CoreExtensions #:nodoc:
|
3
5
|
module Time #:nodoc:
|
@@ -18,7 +20,7 @@ module ETL #:nodoc:
|
|
18
20
|
end
|
19
21
|
def fiscal_year_month(offset_month=10)
|
20
22
|
shifted_month = month - (offset_month - 1)
|
21
|
-
shifted_month += 12 if shifted_month
|
23
|
+
shifted_month += 12 if shifted_month <= 0
|
22
24
|
shifted_month
|
23
25
|
end
|
24
26
|
def fiscal_year_quarter(offset_month=10)
|
@@ -37,4 +39,4 @@ module ETL #:nodoc:
|
|
37
39
|
end
|
38
40
|
end
|
39
41
|
end
|
40
|
-
end
|
42
|
+
end
|
data/lib/etl/engine.rb
CHANGED
@@ -19,7 +19,7 @@ module ETL #:nodoc:
|
|
19
19
|
# * <tt>:rails_root</tt>: Set to the rails root to boot rails
|
20
20
|
def init(options={})
|
21
21
|
unless @initialized
|
22
|
-
puts "initializing ETL engine"
|
22
|
+
puts "initializing ETL engine\n\n"
|
23
23
|
@limit = options[:limit]
|
24
24
|
@offset = options[:offset]
|
25
25
|
@log_write_mode = 'w' if options[:newlog]
|
@@ -28,8 +28,8 @@ module ETL #:nodoc:
|
|
28
28
|
@rails_root = options[:rails_root]
|
29
29
|
|
30
30
|
require File.join(@rails_root, 'config/environment') if @rails_root
|
31
|
-
|
32
31
|
options[:config] ||= 'database.yml'
|
32
|
+
options[:config] = 'config/database.yml' unless File.exist?(options[:config])
|
33
33
|
database_configuration = YAML::load(ERB.new(IO.read(options[:config])).result + "\n")
|
34
34
|
ActiveRecord::Base.configurations.merge!(database_configuration)
|
35
35
|
ETL::Base.configurations = database_configuration
|
@@ -177,9 +177,7 @@ module ETL #:nodoc:
|
|
177
177
|
if temp_tables[temp_table_name].nil?
|
178
178
|
# Create the temp table and add it to the mapping
|
179
179
|
begin connection.drop_table(temp_table_name); rescue; end
|
180
|
-
connection.
|
181
|
-
connection.add_select_into_table(temp_table_name, "SELECT * FROM #{table_name}")
|
182
|
-
)
|
180
|
+
connection.copy_table(table_name, temp_table_name)
|
183
181
|
temp_tables[temp_table_name] = {
|
184
182
|
:table => table_name,
|
185
183
|
:connection => connection
|
@@ -274,6 +272,7 @@ module ETL #:nodoc:
|
|
274
272
|
# Process the specified batch file
|
275
273
|
def process_batch(batch)
|
276
274
|
batch = ETL::Batch::Batch.resolve(batch, self)
|
275
|
+
say "Processing batch #{batch.file}"
|
277
276
|
|
278
277
|
ETL::Engine.batch = ETL::Execution::Batch.create!(
|
279
278
|
:batch_file => batch.file,
|
@@ -290,10 +289,12 @@ module ETL #:nodoc:
|
|
290
289
|
# Process the specified control file
|
291
290
|
def process_control(control)
|
292
291
|
control = ETL::Control::Control.resolve(control)
|
292
|
+
say_on_own_line "Processing control #{control.file}"
|
293
293
|
|
294
294
|
ETL::Engine.job = ETL::Execution::Job.create!(
|
295
295
|
:control_file => control.file,
|
296
|
-
:status => 'executing'
|
296
|
+
:status => 'executing',
|
297
|
+
:batch_id => ETL::Engine.batch ? ETL::Engine.batch.id : nil
|
297
298
|
)
|
298
299
|
|
299
300
|
execute_dependencies(control)
|
@@ -357,11 +358,15 @@ module ETL #:nodoc:
|
|
357
358
|
row[name] = transform.transform(name, row[name], row)
|
358
359
|
end
|
359
360
|
end
|
361
|
+
rescue ResolverError => e
|
362
|
+
Engine.logger.error(e.message)
|
363
|
+
errors << e.message
|
360
364
|
rescue => e
|
361
365
|
msg = "Error transforming from #{Engine.current_source} on line #{Engine.current_source_row}: #{e}"
|
362
366
|
errors << msg
|
363
367
|
Engine.logger.error(msg)
|
364
368
|
e.backtrace.each { |line| Engine.logger.error(line) }
|
369
|
+
ensure
|
365
370
|
begin
|
366
371
|
exceeded_error_threshold?(control) ? break : next
|
367
372
|
rescue => inner_error
|
@@ -422,7 +427,7 @@ module ETL #:nodoc:
|
|
422
427
|
destination.close
|
423
428
|
end
|
424
429
|
|
425
|
-
say_on_own_line "Executing screens"
|
430
|
+
say_on_own_line "Executing before post-process screens"
|
426
431
|
begin
|
427
432
|
execute_screens(control)
|
428
433
|
rescue FatalScreenError => e
|
@@ -443,7 +448,21 @@ module ETL #:nodoc:
|
|
443
448
|
if destinations.length > 0
|
444
449
|
say "Wrote #{Engine.rows_written} lines to destinations"
|
445
450
|
end
|
446
|
-
|
451
|
+
|
452
|
+
say_on_own_line "Executing after post-process screens"
|
453
|
+
begin
|
454
|
+
execute_screens(control, :after_post_process)
|
455
|
+
rescue FatalScreenError => e
|
456
|
+
say "Fatal screen error during job execution: #{e.message}"
|
457
|
+
exit
|
458
|
+
rescue ScreenError => e
|
459
|
+
say "Screen error during job execution: #{e.message}"
|
460
|
+
return
|
461
|
+
else
|
462
|
+
say "Screens passed"
|
463
|
+
end
|
464
|
+
|
465
|
+
say_on_own_line "Completed #{control.file} in #{distance_of_time_in_words(start_time)} with #{errors.length} errors."
|
447
466
|
say "Processing average: #{Engine.average_rows_per_second} rows/sec)"
|
448
467
|
|
449
468
|
say "Avg after_reads: #{Engine.rows_read/benchmarks[:after_reads]} rows/sec" if benchmarks[:after_reads] > 0
|
@@ -509,9 +528,15 @@ module ETL #:nodoc:
|
|
509
528
|
end
|
510
529
|
|
511
530
|
# Execute all screens
|
512
|
-
def execute_screens(control)
|
531
|
+
def execute_screens(control, timing = :before_post_process)
|
532
|
+
screens = case timing
|
533
|
+
when :after_post_process
|
534
|
+
control.after_post_process_screens
|
535
|
+
else # default to before post-process screens
|
536
|
+
control.screens
|
537
|
+
end
|
513
538
|
[:fatal,:error,:warn].each do |type|
|
514
|
-
|
539
|
+
screens[type].each do |block|
|
515
540
|
begin
|
516
541
|
block.call
|
517
542
|
rescue => e
|
@@ -4,20 +4,28 @@ module ETL #:nodoc:
|
|
4
4
|
# for the ETL engine
|
5
5
|
class Migration
|
6
6
|
class << self
|
7
|
+
protected
|
8
|
+
# Get the schema info table name
|
9
|
+
def schema_info_table_name
|
10
|
+
ActiveRecord::Migrator.schema_migrations_table_name
|
11
|
+
end
|
12
|
+
alias :schema_migrations_table_name :schema_info_table_name
|
13
|
+
|
14
|
+
public
|
7
15
|
# Execute the migrations
|
8
16
|
def migrate
|
9
|
-
connection.
|
10
|
-
|
11
|
-
v.upto(target - 1) do |i|
|
17
|
+
connection.initialize_schema_migrations_table
|
18
|
+
last_migration.upto(target - 1) do |i|
|
12
19
|
__send__("migration_#{i+1}".to_sym)
|
13
|
-
|
20
|
+
connection.assume_migrated_upto_version(i+1)
|
14
21
|
end
|
15
22
|
end
|
23
|
+
|
16
24
|
protected
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
25
|
+
def last_migration
|
26
|
+
connection.select_values(
|
27
|
+
"SELECT version FROM #{schema_migrations_table_name}"
|
28
|
+
).map(&:to_i).sort.last || 0
|
21
29
|
end
|
22
30
|
|
23
31
|
# Get the connection to use during migration
|
@@ -27,7 +35,7 @@ module ETL #:nodoc:
|
|
27
35
|
|
28
36
|
# Get the final target version number
|
29
37
|
def target
|
30
|
-
|
38
|
+
4
|
31
39
|
end
|
32
40
|
|
33
41
|
private
|
@@ -62,6 +70,10 @@ module ETL #:nodoc:
|
|
62
70
|
connection.add_column :jobs, :batch_id, :integer
|
63
71
|
connection.add_index :jobs, :batch_id
|
64
72
|
end
|
73
|
+
|
74
|
+
def migration_4
|
75
|
+
connection.drop_table :records
|
76
|
+
end
|
65
77
|
|
66
78
|
# Update the schema info table, setting the version value
|
67
79
|
def update_schema_info(version)
|
@@ -7,7 +7,7 @@ module ETL #:nodoc:
|
|
7
7
|
#
|
8
8
|
# For example, if name is :surrogate_key then a SurrogateKeyGenerator class is returned
|
9
9
|
def class_for_name(name)
|
10
|
-
ETL::Generator.const_get("#{name.to_s.
|
10
|
+
ETL::Generator.const_get("#{name.to_s.camelize}Generator")
|
11
11
|
end
|
12
12
|
end
|
13
13
|
|
data/lib/etl/http_tools.rb
CHANGED
@@ -106,20 +106,34 @@ module HttpTools
|
|
106
106
|
result
|
107
107
|
end
|
108
108
|
|
109
|
-
|
110
|
-
|
109
|
+
# Parse a URI. If options[:prefix] is set then prepend it to the keys for the hash that
|
110
|
+
# is returned.
|
111
|
+
def parse_uri(uri_string, options={})
|
112
|
+
prefix = options[:prefix] ||= ''
|
113
|
+
empty_hash = {
|
114
|
+
"#{prefix}scheme".to_sym => nil,
|
115
|
+
"#{prefix}host".to_sym => nil,
|
116
|
+
"#{prefix}port".to_sym => nil,
|
117
|
+
"#{prefix}uri_path".to_sym => nil,
|
118
|
+
"#{prefix}domain".to_sym => nil
|
119
|
+
}
|
120
|
+
if uri_string
|
111
121
|
#attempt to parse uri --if it's a uri then catch the problem and set everything to nil
|
112
122
|
begin
|
113
123
|
uri = URI.parse(uri_string)
|
114
|
-
results = {
|
115
|
-
|
124
|
+
results = {
|
125
|
+
"#{prefix}scheme".to_sym => uri.scheme,
|
126
|
+
"#{prefix}host".to_sym => uri.host,
|
127
|
+
"#{prefix}port".to_sym => uri.port,
|
128
|
+
"#{prefix}uri_path".to_sym => uri.path
|
129
|
+
}
|
130
|
+
results["#{prefix}domain".to_sym] = $1 if uri.host =~ /\.?([^\.]+\.[^\.]+$)/
|
116
131
|
results
|
117
132
|
rescue
|
118
|
-
|
133
|
+
empty_hash
|
119
134
|
end
|
120
|
-
|
121
135
|
else
|
122
|
-
|
136
|
+
empty_hash
|
123
137
|
end
|
124
138
|
end
|
125
139
|
end
|
@@ -33,9 +33,11 @@ module ETL #:nodoc:
|
|
33
33
|
#fields[:timestamp] =~ r%{(\d\d)/(\w\w\w)/(\d\d\d\d):(\d\d):(\d\d):(\d\d) -(\d\d\d\d)}
|
34
34
|
d = Date._strptime(fields[:timestamp], '%d/%b/%Y:%H:%M:%S') unless fields[:timestamp].nil?
|
35
35
|
fields[:timestamp] = Time.mktime(d[:year], d[:mon], d[:mday], d[:hour], d[:min], d[:sec], d[:sec_fraction]) unless d.nil?
|
36
|
+
|
37
|
+
fields[:method], fields[:path] = fields[:request].split(/\s/)
|
36
38
|
|
37
39
|
fields.merge!(parse_user_agent(fields[:user_agent])) unless fields[:user_agent].nil?
|
38
|
-
fields.merge!(parse_uri(fields[:referrer]))
|
40
|
+
fields.merge!(parse_uri(fields[:referrer], :prefix => 'referrer_'))
|
39
41
|
|
40
42
|
fields.each do |key, value|
|
41
43
|
fields[key] = nil if value == '-'
|
@@ -44,7 +44,7 @@ module ETL #:nodoc:
|
|
44
44
|
ETL::Engine.logger.debug "validating line #{line} in file #{file}"
|
45
45
|
if row.length != fields.length
|
46
46
|
raise_with_info( MismatchError,
|
47
|
-
"The number of
|
47
|
+
"The number of columns from the source (#{row.length}) does not match the number of columns in the definition (#{fields.length})",
|
48
48
|
line, file
|
49
49
|
)
|
50
50
|
end
|
data/lib/etl/parser/parser.rb
CHANGED
@@ -11,7 +11,7 @@ module ETL #:nodoc:
|
|
11
11
|
# Example:
|
12
12
|
# <tt>class_for_name(:fixed_width)</tt> returns a FixedWidthParser class
|
13
13
|
def class_for_name(name)
|
14
|
-
ETL::Parser.const_get("#{name.to_s.
|
14
|
+
ETL::Parser.const_get("#{name.to_s.camelize}Parser")
|
15
15
|
end
|
16
16
|
end
|
17
17
|
|
@@ -0,0 +1,14 @@
|
|
1
|
+
module ETL
|
2
|
+
module Processor
|
3
|
+
# This processor is both a valid RowProcessor (called on each row with after_read) or a Processor (called once on pre_process or post_process)
|
4
|
+
class BlockProcessor < ETL::Processor::RowProcessor
|
5
|
+
def initialize(control, configuration)
|
6
|
+
super
|
7
|
+
@block = configuration[:block]
|
8
|
+
end
|
9
|
+
def process(row=nil)
|
10
|
+
@block.call(row)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
@@ -21,6 +21,8 @@ module ETL #:nodoc:
|
|
21
21
|
attr_accessor :field_enclosure
|
22
22
|
# The line separator (defaults to a newline)
|
23
23
|
attr_accessor :line_separator
|
24
|
+
# The string that indicates a NULL (defaults to an empty string)
|
25
|
+
attr_accessor :null_string
|
24
26
|
|
25
27
|
# Initialize the processor.
|
26
28
|
#
|
@@ -43,6 +45,7 @@ module ETL #:nodoc:
|
|
43
45
|
@columns = configuration[:columns]
|
44
46
|
@field_separator = (configuration[:field_separator] || ',')
|
45
47
|
@line_separator = (configuration[:line_separator] || "\n")
|
48
|
+
@null_string = (configuration[:null_string] || "")
|
46
49
|
@field_enclosure = configuration[:field_enclosure]
|
47
50
|
|
48
51
|
raise ControlError, "Target must be specified" unless @target
|
@@ -59,8 +62,9 @@ module ETL #:nodoc:
|
|
59
62
|
conn.truncate(table_name) if truncate
|
60
63
|
options = {}
|
61
64
|
options[:columns] = columns
|
62
|
-
if field_separator || field_enclosure
|
65
|
+
if field_separator || field_enclosure || line_separator || null_string
|
63
66
|
options[:fields] = {}
|
67
|
+
options[:fields][:null_string] = null_string if null_string
|
64
68
|
options[:fields][:delimited_by] = field_separator if field_separator
|
65
69
|
options[:fields][:enclosed_by] = field_enclosure if field_enclosure
|
66
70
|
options[:fields][:terminated_by] = line_separator if line_separator
|
@@ -0,0 +1,55 @@
|
|
1
|
+
require 'iconv'
|
2
|
+
|
3
|
+
module ETL #:nodoc:
|
4
|
+
module Processor #:nodoc:
|
5
|
+
# The encode processor uses Iconv to convert a file from one encoding (eg: utf-8) to another (eg: latin1), line by line.
|
6
|
+
class EncodeProcessor < ETL::Processor::Processor
|
7
|
+
|
8
|
+
# The file to load from
|
9
|
+
attr_reader :source_file
|
10
|
+
# The file to write to
|
11
|
+
attr_reader :target_file
|
12
|
+
# The source file encoding
|
13
|
+
attr_reader :source_encoding
|
14
|
+
# The target file encoding
|
15
|
+
attr_reader :target_encoding
|
16
|
+
|
17
|
+
# Initialize the processor.
|
18
|
+
#
|
19
|
+
# Configuration options:
|
20
|
+
# * <tt>:source_file</tt>: The file to load data from
|
21
|
+
# * <tt>:source_encoding</tt>: The source file encoding (eg: 'latin1','utf-8'), as supported by Iconv
|
22
|
+
# * <tt>:target_file</tt>: The file to write data to
|
23
|
+
# * <tt>:target_encoding</tt>: The target file encoding
|
24
|
+
def initialize(control, configuration)
|
25
|
+
super
|
26
|
+
raise ControlError, "Source file must be specified" if configuration[:source_file].nil?
|
27
|
+
raise ControlError, "Target file must be specified" if configuration[:target_file].nil?
|
28
|
+
@source_file = File.join(File.dirname(control.file), configuration[:source_file])
|
29
|
+
@source_encoding = configuration[:source_encoding]
|
30
|
+
@target_file = File.join(File.dirname(control.file), configuration[:target_file])
|
31
|
+
@target_encoding = configuration[:target_encoding]
|
32
|
+
raise ControlError, "Source and target file cannot currently point to the same file" if source_file == target_file
|
33
|
+
begin
|
34
|
+
@iconv = Iconv.new(target_encoding,source_encoding)
|
35
|
+
rescue Iconv::InvalidEncoding
|
36
|
+
raise ControlError, "Either the source encoding '#{source_encoding}' or the target encoding '#{target_encoding}' is not supported"
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
# Execute the processor
|
41
|
+
def process
|
42
|
+
# operate line by line to handle large files without loading them in-memory
|
43
|
+
# could be replaced by a system iconv call when available, for greater performance
|
44
|
+
File.open(source_file) do |source|
|
45
|
+
#puts "Opening #{target_file}"
|
46
|
+
File.open(target_file,'w') do |target|
|
47
|
+
source.each_line do |line|
|
48
|
+
target << @iconv.iconv(line)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|