activewarehouse-etl 0.6.0 → 0.6.1
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +5 -1
- data/lib/etl/control/source/file_source.rb +4 -3
- data/lib/etl/engine.rb +9 -2
- data/lib/etl/parser/delimited_parser.rb +5 -6
- data/lib/etl/parser/fixed_width_parser.rb +7 -5
- data/lib/etl/parser/parser.rb +3 -18
- data/lib/etl/parser/xml_parser.rb +4 -5
- data/lib/etl/processor/copy_field_processor.rb +10 -0
- data/lib/etl/transform/type_transform.rb +6 -1
- data/lib/etl/version.rb +1 -1
- metadata +3 -2
data/CHANGELOG
CHANGED
@@ -63,4 +63,8 @@
|
|
63
63
|
* Added a depends_on directive that accepts a list of arguments of either strings or symbols. Each
|
64
64
|
symbol is converted to a string and .ctl is appended; strings are passed through directly. The
|
65
65
|
dependencies are executed in the order they are specified.
|
66
|
-
* The default field separator in the bulk loader is now a comma (was a tab).
|
66
|
+
* The default field separator in the bulk loader is now a comma (was a tab).
|
67
|
+
|
68
|
+
0.6.1 - Mar 22, 2007
|
69
|
+
* Added support for absolute paths in file sources
|
70
|
+
* Added CopyFieldProcessor
|
@@ -44,9 +44,10 @@ module ETL #:nodoc:
|
|
44
44
|
private
|
45
45
|
# Copy source data to a local directory structure
|
46
46
|
def copy_sources
|
47
|
-
|
48
|
-
|
49
|
-
|
47
|
+
path = Pathname.new(configuration[:file])
|
48
|
+
path = path.absolute? ? path : Pathname.new(File.dirname(control.file)) + path
|
49
|
+
Pathname.glob(path).each do |f|
|
50
|
+
next if f.directory?
|
50
51
|
FileUtils.cp(f, local_file)
|
51
52
|
end
|
52
53
|
end
|
data/lib/etl/engine.rb
CHANGED
@@ -18,13 +18,20 @@ module ETL #:nodoc:
|
|
18
18
|
new().process(control_file)
|
19
19
|
end
|
20
20
|
|
21
|
+
attr_accessor :timestamped_log
|
22
|
+
|
21
23
|
# A logger for the engine
|
22
24
|
attr_accessor :logger
|
23
25
|
|
24
26
|
def logger #:nodoc:
|
25
27
|
unless @logger
|
26
|
-
|
27
|
-
|
28
|
+
if timestamped_log
|
29
|
+
@logger = Logger.new("etl_#{timestamp}.log")
|
30
|
+
else
|
31
|
+
@logger = Logger.new(File.open('etl.log', 'a'))
|
32
|
+
end
|
33
|
+
@logger.level = Logger::ERROR
|
34
|
+
@logger.formatter = Logger::Formatter.new
|
28
35
|
end
|
29
36
|
@logger
|
30
37
|
end
|
@@ -25,9 +25,9 @@ module ETL #:nodoc:
|
|
25
25
|
line += 1
|
26
26
|
row = {}
|
27
27
|
validate_row(raw_row, line, file)
|
28
|
-
raw_row.each_with_index do |
|
28
|
+
raw_row.each_with_index do |value, index|
|
29
29
|
f = fields[index]
|
30
|
-
row[f.name] =
|
30
|
+
row[f.name] = value
|
31
31
|
end
|
32
32
|
yield row
|
33
33
|
end
|
@@ -56,7 +56,7 @@ module ETL #:nodoc:
|
|
56
56
|
when Symbol
|
57
57
|
fields << Field.new(options)
|
58
58
|
when Hash
|
59
|
-
fields << Field.new(options[:name]
|
59
|
+
fields << Field.new(options[:name])
|
60
60
|
else
|
61
61
|
raise DefinitionError, "Each field definition must either be a symbol or a hash"
|
62
62
|
end
|
@@ -64,10 +64,9 @@ module ETL #:nodoc:
|
|
64
64
|
end
|
65
65
|
|
66
66
|
class Field #:nodoc:
|
67
|
-
attr_reader :name
|
68
|
-
def initialize(name
|
67
|
+
attr_reader :name
|
68
|
+
def initialize(name)
|
69
69
|
@name = name
|
70
|
-
@type = type
|
71
70
|
end
|
72
71
|
end
|
73
72
|
end
|
@@ -22,7 +22,7 @@ module ETL #:nodoc:
|
|
22
22
|
next
|
23
23
|
end
|
24
24
|
# TODO make strip optional?
|
25
|
-
row[name] =
|
25
|
+
row[name] = line[f.field_start, f.field_length].strip
|
26
26
|
end
|
27
27
|
yield row
|
28
28
|
end
|
@@ -37,16 +37,18 @@ module ETL #:nodoc:
|
|
37
37
|
private
|
38
38
|
def configure
|
39
39
|
source.definition.each do |field, options|
|
40
|
-
fields[field] = FixedWidthField.new(
|
40
|
+
fields[field] = FixedWidthField.new(
|
41
|
+
options[:name], options[:start], options[:end], options[:length]
|
42
|
+
)
|
41
43
|
end
|
42
44
|
end
|
43
45
|
end
|
44
46
|
|
45
47
|
class FixedWidthField #:nodoc:
|
46
|
-
attr_reader :name, :field_start, :field_end, :field_length
|
47
|
-
|
48
|
+
attr_reader :name, :field_start, :field_end, :field_length
|
49
|
+
# Initialize the field.
|
50
|
+
def initialize(name, field_start, field_end=nil, field_length=nil)
|
48
51
|
@name = name
|
49
|
-
@type = type ||= :string
|
50
52
|
@field_start = field_start - 1
|
51
53
|
if field_end
|
52
54
|
@field_end = field_end
|
data/lib/etl/parser/parser.rb
CHANGED
@@ -23,26 +23,11 @@ module ETL
|
|
23
23
|
@options = options || {}
|
24
24
|
end
|
25
25
|
|
26
|
-
# Convert the value to the specified type.
|
27
|
-
#
|
28
|
-
# Parameters:
|
29
|
-
# * <tt>name</tt>: The name of the field
|
30
|
-
# * <tt>value</tt>: The value
|
31
|
-
# * <tt>type</tt>: The type name (:integer, :float, :string)
|
32
|
-
def convert(name, value, type)
|
33
|
-
case type
|
34
|
-
when :integer
|
35
|
-
value.to_i
|
36
|
-
when :float
|
37
|
-
value.to_f
|
38
|
-
else
|
39
|
-
value
|
40
|
-
end
|
41
|
-
end
|
42
|
-
|
43
26
|
protected
|
44
27
|
def file
|
45
|
-
|
28
|
+
path = Pathname.new(source.configuration[:file])
|
29
|
+
path = path.absolute? ? path : Pathname.new(File.dirname(source.control.file)) + path
|
30
|
+
path
|
46
31
|
end
|
47
32
|
|
48
33
|
def raise_with_info(error, message, file, line)
|
@@ -23,7 +23,7 @@ module ETL
|
|
23
23
|
row = {}
|
24
24
|
fields.each do |f|
|
25
25
|
value = element.text(f.xpath)
|
26
|
-
row[f.name] =
|
26
|
+
row[f.name] = value
|
27
27
|
end
|
28
28
|
yield row
|
29
29
|
end
|
@@ -46,7 +46,7 @@ module ETL
|
|
46
46
|
fields << Field.new(options, options.to_s)
|
47
47
|
when Hash
|
48
48
|
options[:xpath] ||= options[:name]
|
49
|
-
fields << Field.new(options[:name], options[:xpath].to_s
|
49
|
+
fields << Field.new(options[:name], options[:xpath].to_s)
|
50
50
|
else
|
51
51
|
raise DefinitionError, "Each field definition must either be an symbol or a hash of options for the field"
|
52
52
|
end
|
@@ -54,11 +54,10 @@ module ETL
|
|
54
54
|
end
|
55
55
|
|
56
56
|
class Field
|
57
|
-
attr_reader :name, :xpath
|
58
|
-
def initialize(name, xpath
|
57
|
+
attr_reader :name, :xpath
|
58
|
+
def initialize(name, xpath)
|
59
59
|
@name = name
|
60
60
|
@xpath = xpath
|
61
|
-
@type = type
|
62
61
|
end
|
63
62
|
end
|
64
63
|
end
|
@@ -5,14 +5,19 @@ module ETL #:nodoc:
|
|
5
5
|
def initialize(control, configuration={})
|
6
6
|
super
|
7
7
|
@type = configuration[:type]
|
8
|
+
@significant = configuration[:significant] ||= 0
|
8
9
|
end
|
9
10
|
# Transform the value
|
10
11
|
def transform(name, value, row)
|
11
12
|
case @type
|
12
13
|
when :string
|
13
14
|
value.to_s
|
14
|
-
when :number
|
15
|
+
when :number, :integer
|
15
16
|
value.to_i
|
17
|
+
when :float
|
18
|
+
value.to_f
|
19
|
+
when :decimal
|
20
|
+
BigDecimal.new(value.to_s, @significant)
|
16
21
|
else
|
17
22
|
raise "Unsupported type: #{@type}"
|
18
23
|
end
|
data/lib/etl/version.rb
CHANGED
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.0.10
|
|
3
3
|
specification_version: 1
|
4
4
|
name: activewarehouse-etl
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.6.
|
7
|
-
date: 2007-03-
|
6
|
+
version: 0.6.1
|
7
|
+
date: 2007-03-22 00:00:00 -04:00
|
8
8
|
summary: Pure Ruby ETL package.
|
9
9
|
require_paths:
|
10
10
|
- lib
|
@@ -69,6 +69,7 @@ files:
|
|
69
69
|
- lib/etl/parser/sax_parser.rb
|
70
70
|
- lib/etl/parser/xml_parser.rb
|
71
71
|
- lib/etl/processor/bulk_import_processor.rb
|
72
|
+
- lib/etl/processor/copy_field_processor.rb
|
72
73
|
- lib/etl/processor/hierarchy_exploder_processor.rb
|
73
74
|
- lib/etl/processor/processor.rb
|
74
75
|
- lib/etl/processor/row_processor.rb
|