activewarehouse-etl 0.9.0 → 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +22 -2
- data/README +12 -0
- data/Rakefile +64 -59
- data/bin/etl +0 -0
- data/bin/etl.cmd +8 -0
- data/examples/database.example.yml +11 -1
- data/lib/etl.rb +9 -21
- data/lib/etl/builder.rb +2 -1
- data/lib/etl/builder/date_dimension_builder.rb +67 -54
- data/lib/etl/builder/time_dimension_builder.rb +31 -0
- data/lib/etl/commands/etl.rb +1 -2
- data/lib/etl/control/control.rb +46 -18
- data/lib/etl/control/destination.rb +201 -138
- data/lib/etl/control/destination/database_destination.rb +10 -5
- data/lib/etl/control/source.rb +1 -1
- data/lib/etl/control/source/database_source.rb +8 -10
- data/lib/etl/core_ext/time/calculations.rb +4 -2
- data/lib/etl/engine.rb +35 -10
- data/lib/etl/execution/migration.rb +21 -9
- data/lib/etl/generator/generator.rb +1 -1
- data/lib/etl/http_tools.rb +21 -7
- data/lib/etl/parser/apache_combined_log_parser.rb +3 -1
- data/lib/etl/parser/delimited_parser.rb +1 -1
- data/lib/etl/parser/parser.rb +1 -1
- data/lib/etl/processor/block_processor.rb +14 -0
- data/lib/etl/processor/bulk_import_processor.rb +5 -1
- data/lib/etl/processor/check_exist_processor.rb +1 -0
- data/lib/etl/processor/encode_processor.rb +55 -0
- data/lib/etl/transform/date_to_string_transform.rb +1 -0
- data/lib/etl/transform/foreign_key_lookup_transform.rb +67 -2
- data/lib/etl/transform/string_to_date_transform.rb +6 -1
- data/lib/etl/transform/string_to_datetime_transform.rb +1 -1
- data/lib/etl/transform/string_to_time_transform.rb +1 -1
- data/lib/etl/version.rb +1 -1
- metadata +94 -78
@@ -47,10 +47,7 @@ module ETL #:nodoc:
|
|
47
47
|
|
48
48
|
# Flush the currently buffered data
|
49
49
|
def flush
|
50
|
-
conn = ETL::Engine.connection(target)
|
51
50
|
conn.transaction do
|
52
|
-
conn.truncate(table_name) if truncate
|
53
|
-
|
54
51
|
buffer.flatten.each do |row|
|
55
52
|
# check to see if this row's compound key constraint already exists
|
56
53
|
# note that the compound key constraint may not utilize virtual fields
|
@@ -62,10 +59,10 @@ module ETL #:nodoc:
|
|
62
59
|
names = []
|
63
60
|
values = []
|
64
61
|
order.each do |name|
|
65
|
-
names << name
|
62
|
+
names << "`#{name}`"
|
66
63
|
values << conn.quote(row[name]) # TODO: this is probably not database agnostic
|
67
64
|
end
|
68
|
-
q = "INSERT INTO
|
65
|
+
q = "INSERT INTO `#{table_name}` (#{names.join(',')}) VALUES (#{values.join(',')})"
|
69
66
|
ETL::Engine.logger.debug("Executing insert: #{q}")
|
70
67
|
conn.insert(q, "Insert row #{current_row}")
|
71
68
|
@current_row += 1
|
@@ -81,6 +78,14 @@ module ETL #:nodoc:
|
|
81
78
|
end
|
82
79
|
|
83
80
|
private
|
81
|
+
def conn
|
82
|
+
@conn ||= begin
|
83
|
+
conn = ETL::Engine.connection(target)
|
84
|
+
conn.truncate(table_name) if truncate
|
85
|
+
conn
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
84
89
|
def table_name
|
85
90
|
ETL::Engine.table(table, ETL::Engine.connection(target))
|
86
91
|
end
|
data/lib/etl/control/source.rb
CHANGED
@@ -23,7 +23,7 @@ module ETL #:nodoc:
|
|
23
23
|
# For example if name is :database then this will return a
|
24
24
|
# DatabaseSource class
|
25
25
|
def class_for_name(name)
|
26
|
-
ETL::Control.const_get("#{name.to_s.
|
26
|
+
ETL::Control.const_get("#{name.to_s.camelize}Source")
|
27
27
|
end
|
28
28
|
end
|
29
29
|
|
@@ -93,14 +93,8 @@ module ETL #:nodoc:
|
|
93
93
|
# Get the list of columns to read. This is defined in the source
|
94
94
|
# definition as either an Array or Hash
|
95
95
|
def columns
|
96
|
-
|
97
|
-
|
98
|
-
definition.collect(&:to_sym)
|
99
|
-
when Hash
|
100
|
-
definition.keys.collect(&:to_sym)
|
101
|
-
else
|
102
|
-
raise "Definition must be either an Array or a Hash"
|
103
|
-
end
|
96
|
+
# weird default is required for writing to cache correctly
|
97
|
+
@columns ||= query_rows.any? ? query_rows.first.keys : ['']
|
104
98
|
end
|
105
99
|
|
106
100
|
# Returns each row from the source. If read_locally is specified then
|
@@ -118,7 +112,7 @@ module ETL #:nodoc:
|
|
118
112
|
write_local(file)
|
119
113
|
read_rows(file, &block)
|
120
114
|
else
|
121
|
-
|
115
|
+
query_rows.each do |row|
|
122
116
|
row = ETL::Row.new(row.symbolize_keys)
|
123
117
|
row.source = self
|
124
118
|
yield row
|
@@ -158,7 +152,7 @@ module ETL #:nodoc:
|
|
158
152
|
t = Benchmark.realtime do
|
159
153
|
FasterCSV.open(file, 'w') do |f|
|
160
154
|
f << columns
|
161
|
-
|
155
|
+
query_rows.each do |row|
|
162
156
|
f << columns.collect { |column| row[column.to_s] }
|
163
157
|
lines += 1
|
164
158
|
end
|
@@ -204,6 +198,10 @@ module ETL #:nodoc:
|
|
204
198
|
@query = q
|
205
199
|
end
|
206
200
|
|
201
|
+
def query_rows
|
202
|
+
@query_rows ||= connection.select_all(query)
|
203
|
+
end
|
204
|
+
|
207
205
|
# Get the database connection to use
|
208
206
|
def connection
|
209
207
|
ETL::Engine.connection(target)
|
@@ -1,3 +1,5 @@
|
|
1
|
+
#Updated by Jack Hong on 04/05/08
|
2
|
+
|
1
3
|
module ETL #:nodoc:
|
2
4
|
module CoreExtensions #:nodoc:
|
3
5
|
module Time #:nodoc:
|
@@ -18,7 +20,7 @@ module ETL #:nodoc:
|
|
18
20
|
end
|
19
21
|
def fiscal_year_month(offset_month=10)
|
20
22
|
shifted_month = month - (offset_month - 1)
|
21
|
-
shifted_month += 12 if shifted_month
|
23
|
+
shifted_month += 12 if shifted_month <= 0
|
22
24
|
shifted_month
|
23
25
|
end
|
24
26
|
def fiscal_year_quarter(offset_month=10)
|
@@ -37,4 +39,4 @@ module ETL #:nodoc:
|
|
37
39
|
end
|
38
40
|
end
|
39
41
|
end
|
40
|
-
end
|
42
|
+
end
|
data/lib/etl/engine.rb
CHANGED
@@ -19,7 +19,7 @@ module ETL #:nodoc:
|
|
19
19
|
# * <tt>:rails_root</tt>: Set to the rails root to boot rails
|
20
20
|
def init(options={})
|
21
21
|
unless @initialized
|
22
|
-
puts "initializing ETL engine"
|
22
|
+
puts "initializing ETL engine\n\n"
|
23
23
|
@limit = options[:limit]
|
24
24
|
@offset = options[:offset]
|
25
25
|
@log_write_mode = 'w' if options[:newlog]
|
@@ -28,8 +28,8 @@ module ETL #:nodoc:
|
|
28
28
|
@rails_root = options[:rails_root]
|
29
29
|
|
30
30
|
require File.join(@rails_root, 'config/environment') if @rails_root
|
31
|
-
|
32
31
|
options[:config] ||= 'database.yml'
|
32
|
+
options[:config] = 'config/database.yml' unless File.exist?(options[:config])
|
33
33
|
database_configuration = YAML::load(ERB.new(IO.read(options[:config])).result + "\n")
|
34
34
|
ActiveRecord::Base.configurations.merge!(database_configuration)
|
35
35
|
ETL::Base.configurations = database_configuration
|
@@ -177,9 +177,7 @@ module ETL #:nodoc:
|
|
177
177
|
if temp_tables[temp_table_name].nil?
|
178
178
|
# Create the temp table and add it to the mapping
|
179
179
|
begin connection.drop_table(temp_table_name); rescue; end
|
180
|
-
connection.
|
181
|
-
connection.add_select_into_table(temp_table_name, "SELECT * FROM #{table_name}")
|
182
|
-
)
|
180
|
+
connection.copy_table(table_name, temp_table_name)
|
183
181
|
temp_tables[temp_table_name] = {
|
184
182
|
:table => table_name,
|
185
183
|
:connection => connection
|
@@ -274,6 +272,7 @@ module ETL #:nodoc:
|
|
274
272
|
# Process the specified batch file
|
275
273
|
def process_batch(batch)
|
276
274
|
batch = ETL::Batch::Batch.resolve(batch, self)
|
275
|
+
say "Processing batch #{batch.file}"
|
277
276
|
|
278
277
|
ETL::Engine.batch = ETL::Execution::Batch.create!(
|
279
278
|
:batch_file => batch.file,
|
@@ -290,10 +289,12 @@ module ETL #:nodoc:
|
|
290
289
|
# Process the specified control file
|
291
290
|
def process_control(control)
|
292
291
|
control = ETL::Control::Control.resolve(control)
|
292
|
+
say_on_own_line "Processing control #{control.file}"
|
293
293
|
|
294
294
|
ETL::Engine.job = ETL::Execution::Job.create!(
|
295
295
|
:control_file => control.file,
|
296
|
-
:status => 'executing'
|
296
|
+
:status => 'executing',
|
297
|
+
:batch_id => ETL::Engine.batch ? ETL::Engine.batch.id : nil
|
297
298
|
)
|
298
299
|
|
299
300
|
execute_dependencies(control)
|
@@ -357,11 +358,15 @@ module ETL #:nodoc:
|
|
357
358
|
row[name] = transform.transform(name, row[name], row)
|
358
359
|
end
|
359
360
|
end
|
361
|
+
rescue ResolverError => e
|
362
|
+
Engine.logger.error(e.message)
|
363
|
+
errors << e.message
|
360
364
|
rescue => e
|
361
365
|
msg = "Error transforming from #{Engine.current_source} on line #{Engine.current_source_row}: #{e}"
|
362
366
|
errors << msg
|
363
367
|
Engine.logger.error(msg)
|
364
368
|
e.backtrace.each { |line| Engine.logger.error(line) }
|
369
|
+
ensure
|
365
370
|
begin
|
366
371
|
exceeded_error_threshold?(control) ? break : next
|
367
372
|
rescue => inner_error
|
@@ -422,7 +427,7 @@ module ETL #:nodoc:
|
|
422
427
|
destination.close
|
423
428
|
end
|
424
429
|
|
425
|
-
say_on_own_line "Executing screens"
|
430
|
+
say_on_own_line "Executing before post-process screens"
|
426
431
|
begin
|
427
432
|
execute_screens(control)
|
428
433
|
rescue FatalScreenError => e
|
@@ -443,7 +448,21 @@ module ETL #:nodoc:
|
|
443
448
|
if destinations.length > 0
|
444
449
|
say "Wrote #{Engine.rows_written} lines to destinations"
|
445
450
|
end
|
446
|
-
|
451
|
+
|
452
|
+
say_on_own_line "Executing after post-process screens"
|
453
|
+
begin
|
454
|
+
execute_screens(control, :after_post_process)
|
455
|
+
rescue FatalScreenError => e
|
456
|
+
say "Fatal screen error during job execution: #{e.message}"
|
457
|
+
exit
|
458
|
+
rescue ScreenError => e
|
459
|
+
say "Screen error during job execution: #{e.message}"
|
460
|
+
return
|
461
|
+
else
|
462
|
+
say "Screens passed"
|
463
|
+
end
|
464
|
+
|
465
|
+
say_on_own_line "Completed #{control.file} in #{distance_of_time_in_words(start_time)} with #{errors.length} errors."
|
447
466
|
say "Processing average: #{Engine.average_rows_per_second} rows/sec)"
|
448
467
|
|
449
468
|
say "Avg after_reads: #{Engine.rows_read/benchmarks[:after_reads]} rows/sec" if benchmarks[:after_reads] > 0
|
@@ -509,9 +528,15 @@ module ETL #:nodoc:
|
|
509
528
|
end
|
510
529
|
|
511
530
|
# Execute all screens
|
512
|
-
def execute_screens(control)
|
531
|
+
def execute_screens(control, timing = :before_post_process)
|
532
|
+
screens = case timing
|
533
|
+
when :after_post_process
|
534
|
+
control.after_post_process_screens
|
535
|
+
else # default to before post-process screens
|
536
|
+
control.screens
|
537
|
+
end
|
513
538
|
[:fatal,:error,:warn].each do |type|
|
514
|
-
|
539
|
+
screens[type].each do |block|
|
515
540
|
begin
|
516
541
|
block.call
|
517
542
|
rescue => e
|
@@ -4,20 +4,28 @@ module ETL #:nodoc:
|
|
4
4
|
# for the ETL engine
|
5
5
|
class Migration
|
6
6
|
class << self
|
7
|
+
protected
|
8
|
+
# Get the schema info table name
|
9
|
+
def schema_info_table_name
|
10
|
+
ActiveRecord::Migrator.schema_migrations_table_name
|
11
|
+
end
|
12
|
+
alias :schema_migrations_table_name :schema_info_table_name
|
13
|
+
|
14
|
+
public
|
7
15
|
# Execute the migrations
|
8
16
|
def migrate
|
9
|
-
connection.
|
10
|
-
|
11
|
-
v.upto(target - 1) do |i|
|
17
|
+
connection.initialize_schema_migrations_table
|
18
|
+
last_migration.upto(target - 1) do |i|
|
12
19
|
__send__("migration_#{i+1}".to_sym)
|
13
|
-
|
20
|
+
connection.assume_migrated_upto_version(i+1)
|
14
21
|
end
|
15
22
|
end
|
23
|
+
|
16
24
|
protected
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
25
|
+
def last_migration
|
26
|
+
connection.select_values(
|
27
|
+
"SELECT version FROM #{schema_migrations_table_name}"
|
28
|
+
).map(&:to_i).sort.last || 0
|
21
29
|
end
|
22
30
|
|
23
31
|
# Get the connection to use during migration
|
@@ -27,7 +35,7 @@ module ETL #:nodoc:
|
|
27
35
|
|
28
36
|
# Get the final target version number
|
29
37
|
def target
|
30
|
-
|
38
|
+
4
|
31
39
|
end
|
32
40
|
|
33
41
|
private
|
@@ -62,6 +70,10 @@ module ETL #:nodoc:
|
|
62
70
|
connection.add_column :jobs, :batch_id, :integer
|
63
71
|
connection.add_index :jobs, :batch_id
|
64
72
|
end
|
73
|
+
|
74
|
+
def migration_4
|
75
|
+
connection.drop_table :records
|
76
|
+
end
|
65
77
|
|
66
78
|
# Update the schema info table, setting the version value
|
67
79
|
def update_schema_info(version)
|
@@ -7,7 +7,7 @@ module ETL #:nodoc:
|
|
7
7
|
#
|
8
8
|
# For example, if name is :surrogate_key then a SurrogateKeyGenerator class is returned
|
9
9
|
def class_for_name(name)
|
10
|
-
ETL::Generator.const_get("#{name.to_s.
|
10
|
+
ETL::Generator.const_get("#{name.to_s.camelize}Generator")
|
11
11
|
end
|
12
12
|
end
|
13
13
|
|
data/lib/etl/http_tools.rb
CHANGED
@@ -106,20 +106,34 @@ module HttpTools
|
|
106
106
|
result
|
107
107
|
end
|
108
108
|
|
109
|
-
|
110
|
-
|
109
|
+
# Parse a URI. If options[:prefix] is set then prepend it to the keys for the hash that
|
110
|
+
# is returned.
|
111
|
+
def parse_uri(uri_string, options={})
|
112
|
+
prefix = options[:prefix] ||= ''
|
113
|
+
empty_hash = {
|
114
|
+
"#{prefix}scheme".to_sym => nil,
|
115
|
+
"#{prefix}host".to_sym => nil,
|
116
|
+
"#{prefix}port".to_sym => nil,
|
117
|
+
"#{prefix}uri_path".to_sym => nil,
|
118
|
+
"#{prefix}domain".to_sym => nil
|
119
|
+
}
|
120
|
+
if uri_string
|
111
121
|
#attempt to parse uri --if it's a uri then catch the problem and set everything to nil
|
112
122
|
begin
|
113
123
|
uri = URI.parse(uri_string)
|
114
|
-
results = {
|
115
|
-
|
124
|
+
results = {
|
125
|
+
"#{prefix}scheme".to_sym => uri.scheme,
|
126
|
+
"#{prefix}host".to_sym => uri.host,
|
127
|
+
"#{prefix}port".to_sym => uri.port,
|
128
|
+
"#{prefix}uri_path".to_sym => uri.path
|
129
|
+
}
|
130
|
+
results["#{prefix}domain".to_sym] = $1 if uri.host =~ /\.?([^\.]+\.[^\.]+$)/
|
116
131
|
results
|
117
132
|
rescue
|
118
|
-
|
133
|
+
empty_hash
|
119
134
|
end
|
120
|
-
|
121
135
|
else
|
122
|
-
|
136
|
+
empty_hash
|
123
137
|
end
|
124
138
|
end
|
125
139
|
end
|
@@ -33,9 +33,11 @@ module ETL #:nodoc:
|
|
33
33
|
#fields[:timestamp] =~ r%{(\d\d)/(\w\w\w)/(\d\d\d\d):(\d\d):(\d\d):(\d\d) -(\d\d\d\d)}
|
34
34
|
d = Date._strptime(fields[:timestamp], '%d/%b/%Y:%H:%M:%S') unless fields[:timestamp].nil?
|
35
35
|
fields[:timestamp] = Time.mktime(d[:year], d[:mon], d[:mday], d[:hour], d[:min], d[:sec], d[:sec_fraction]) unless d.nil?
|
36
|
+
|
37
|
+
fields[:method], fields[:path] = fields[:request].split(/\s/)
|
36
38
|
|
37
39
|
fields.merge!(parse_user_agent(fields[:user_agent])) unless fields[:user_agent].nil?
|
38
|
-
fields.merge!(parse_uri(fields[:referrer]))
|
40
|
+
fields.merge!(parse_uri(fields[:referrer], :prefix => 'referrer_'))
|
39
41
|
|
40
42
|
fields.each do |key, value|
|
41
43
|
fields[key] = nil if value == '-'
|
@@ -44,7 +44,7 @@ module ETL #:nodoc:
|
|
44
44
|
ETL::Engine.logger.debug "validating line #{line} in file #{file}"
|
45
45
|
if row.length != fields.length
|
46
46
|
raise_with_info( MismatchError,
|
47
|
-
"The number of
|
47
|
+
"The number of columns from the source (#{row.length}) does not match the number of columns in the definition (#{fields.length})",
|
48
48
|
line, file
|
49
49
|
)
|
50
50
|
end
|
data/lib/etl/parser/parser.rb
CHANGED
@@ -11,7 +11,7 @@ module ETL #:nodoc:
|
|
11
11
|
# Example:
|
12
12
|
# <tt>class_for_name(:fixed_width)</tt> returns a FixedWidthParser class
|
13
13
|
def class_for_name(name)
|
14
|
-
ETL::Parser.const_get("#{name.to_s.
|
14
|
+
ETL::Parser.const_get("#{name.to_s.camelize}Parser")
|
15
15
|
end
|
16
16
|
end
|
17
17
|
|
@@ -0,0 +1,14 @@
|
|
1
|
+
module ETL
|
2
|
+
module Processor
|
3
|
+
# This processor is both a valid RowProcessor (called on each row with after_read) or a Processor (called once on pre_process or post_process)
|
4
|
+
class BlockProcessor < ETL::Processor::RowProcessor
|
5
|
+
def initialize(control, configuration)
|
6
|
+
super
|
7
|
+
@block = configuration[:block]
|
8
|
+
end
|
9
|
+
def process(row=nil)
|
10
|
+
@block.call(row)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
@@ -21,6 +21,8 @@ module ETL #:nodoc:
|
|
21
21
|
attr_accessor :field_enclosure
|
22
22
|
# The line separator (defaults to a newline)
|
23
23
|
attr_accessor :line_separator
|
24
|
+
# The string that indicates a NULL (defaults to an empty string)
|
25
|
+
attr_accessor :null_string
|
24
26
|
|
25
27
|
# Initialize the processor.
|
26
28
|
#
|
@@ -43,6 +45,7 @@ module ETL #:nodoc:
|
|
43
45
|
@columns = configuration[:columns]
|
44
46
|
@field_separator = (configuration[:field_separator] || ',')
|
45
47
|
@line_separator = (configuration[:line_separator] || "\n")
|
48
|
+
@null_string = (configuration[:null_string] || "")
|
46
49
|
@field_enclosure = configuration[:field_enclosure]
|
47
50
|
|
48
51
|
raise ControlError, "Target must be specified" unless @target
|
@@ -59,8 +62,9 @@ module ETL #:nodoc:
|
|
59
62
|
conn.truncate(table_name) if truncate
|
60
63
|
options = {}
|
61
64
|
options[:columns] = columns
|
62
|
-
if field_separator || field_enclosure
|
65
|
+
if field_separator || field_enclosure || line_separator || null_string
|
63
66
|
options[:fields] = {}
|
67
|
+
options[:fields][:null_string] = null_string if null_string
|
64
68
|
options[:fields][:delimited_by] = field_separator if field_separator
|
65
69
|
options[:fields][:enclosed_by] = field_enclosure if field_enclosure
|
66
70
|
options[:fields][:terminated_by] = line_separator if line_separator
|
@@ -0,0 +1,55 @@
|
|
1
|
+
require 'iconv'
|
2
|
+
|
3
|
+
module ETL #:nodoc:
|
4
|
+
module Processor #:nodoc:
|
5
|
+
# The encode processor uses Iconv to convert a file from one encoding (eg: utf-8) to another (eg: latin1), line by line.
|
6
|
+
class EncodeProcessor < ETL::Processor::Processor
|
7
|
+
|
8
|
+
# The file to load from
|
9
|
+
attr_reader :source_file
|
10
|
+
# The file to write to
|
11
|
+
attr_reader :target_file
|
12
|
+
# The source file encoding
|
13
|
+
attr_reader :source_encoding
|
14
|
+
# The target file encoding
|
15
|
+
attr_reader :target_encoding
|
16
|
+
|
17
|
+
# Initialize the processor.
|
18
|
+
#
|
19
|
+
# Configuration options:
|
20
|
+
# * <tt>:source_file</tt>: The file to load data from
|
21
|
+
# * <tt>:source_encoding</tt>: The source file encoding (eg: 'latin1','utf-8'), as supported by Iconv
|
22
|
+
# * <tt>:target_file</tt>: The file to write data to
|
23
|
+
# * <tt>:target_encoding</tt>: The target file encoding
|
24
|
+
def initialize(control, configuration)
|
25
|
+
super
|
26
|
+
raise ControlError, "Source file must be specified" if configuration[:source_file].nil?
|
27
|
+
raise ControlError, "Target file must be specified" if configuration[:target_file].nil?
|
28
|
+
@source_file = File.join(File.dirname(control.file), configuration[:source_file])
|
29
|
+
@source_encoding = configuration[:source_encoding]
|
30
|
+
@target_file = File.join(File.dirname(control.file), configuration[:target_file])
|
31
|
+
@target_encoding = configuration[:target_encoding]
|
32
|
+
raise ControlError, "Source and target file cannot currently point to the same file" if source_file == target_file
|
33
|
+
begin
|
34
|
+
@iconv = Iconv.new(target_encoding,source_encoding)
|
35
|
+
rescue Iconv::InvalidEncoding
|
36
|
+
raise ControlError, "Either the source encoding '#{source_encoding}' or the target encoding '#{target_encoding}' is not supported"
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
# Execute the processor
|
41
|
+
def process
|
42
|
+
# operate line by line to handle large files without loading them in-memory
|
43
|
+
# could be replaced by a system iconv call when available, for greater performance
|
44
|
+
File.open(source_file) do |source|
|
45
|
+
#puts "Opening #{target_file}"
|
46
|
+
File.open(target_file,'w') do |target|
|
47
|
+
source.each_line do |line|
|
48
|
+
target << @iconv.iconv(line)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|