activewarehouse-etl 0.6.0 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +5 -1
- data/lib/etl/control/source/file_source.rb +4 -3
- data/lib/etl/engine.rb +9 -2
- data/lib/etl/parser/delimited_parser.rb +5 -6
- data/lib/etl/parser/fixed_width_parser.rb +7 -5
- data/lib/etl/parser/parser.rb +3 -18
- data/lib/etl/parser/xml_parser.rb +4 -5
- data/lib/etl/processor/copy_field_processor.rb +10 -0
- data/lib/etl/transform/type_transform.rb +6 -1
- data/lib/etl/version.rb +1 -1
- metadata +3 -2
data/CHANGELOG
CHANGED
@@ -63,4 +63,8 @@
|
|
63
63
|
* Added a depends_on directive that accepts a list of arguments of either strings or symbols. Each
|
64
64
|
symbol is converted to a string and .ctl is appended; strings are passed through directly. The
|
65
65
|
dependencies are executed in the order they are specified.
|
66
|
-
* The default field separator in the bulk loader is now a comma (was a tab).
|
66
|
+
* The default field separator in the bulk loader is now a comma (was a tab).
|
67
|
+
|
68
|
+
0.6.1 - Mar 22, 2007
|
69
|
+
* Added support for absolute paths in file sources
|
70
|
+
* Added CopyFieldProcessor
|
@@ -44,9 +44,10 @@ module ETL #:nodoc:
|
|
44
44
|
private
|
45
45
|
# Copy source data to a local directory structure
|
46
46
|
def copy_sources
|
47
|
-
|
48
|
-
|
49
|
-
|
47
|
+
path = Pathname.new(configuration[:file])
|
48
|
+
path = path.absolute? ? path : Pathname.new(File.dirname(control.file)) + path
|
49
|
+
Pathname.glob(path).each do |f|
|
50
|
+
next if f.directory?
|
50
51
|
FileUtils.cp(f, local_file)
|
51
52
|
end
|
52
53
|
end
|
data/lib/etl/engine.rb
CHANGED
@@ -18,13 +18,20 @@ module ETL #:nodoc:
|
|
18
18
|
new().process(control_file)
|
19
19
|
end
|
20
20
|
|
21
|
+
attr_accessor :timestamped_log
|
22
|
+
|
21
23
|
# A logger for the engine
|
22
24
|
attr_accessor :logger
|
23
25
|
|
24
26
|
def logger #:nodoc:
|
25
27
|
unless @logger
|
26
|
-
|
27
|
-
|
28
|
+
if timestamped_log
|
29
|
+
@logger = Logger.new("etl_#{timestamp}.log")
|
30
|
+
else
|
31
|
+
@logger = Logger.new(File.open('etl.log', 'a'))
|
32
|
+
end
|
33
|
+
@logger.level = Logger::ERROR
|
34
|
+
@logger.formatter = Logger::Formatter.new
|
28
35
|
end
|
29
36
|
@logger
|
30
37
|
end
|
@@ -25,9 +25,9 @@ module ETL #:nodoc:
|
|
25
25
|
line += 1
|
26
26
|
row = {}
|
27
27
|
validate_row(raw_row, line, file)
|
28
|
-
raw_row.each_with_index do |
|
28
|
+
raw_row.each_with_index do |value, index|
|
29
29
|
f = fields[index]
|
30
|
-
row[f.name] =
|
30
|
+
row[f.name] = value
|
31
31
|
end
|
32
32
|
yield row
|
33
33
|
end
|
@@ -56,7 +56,7 @@ module ETL #:nodoc:
|
|
56
56
|
when Symbol
|
57
57
|
fields << Field.new(options)
|
58
58
|
when Hash
|
59
|
-
fields << Field.new(options[:name]
|
59
|
+
fields << Field.new(options[:name])
|
60
60
|
else
|
61
61
|
raise DefinitionError, "Each field definition must either be a symbol or a hash"
|
62
62
|
end
|
@@ -64,10 +64,9 @@ module ETL #:nodoc:
|
|
64
64
|
end
|
65
65
|
|
66
66
|
class Field #:nodoc:
|
67
|
-
attr_reader :name
|
68
|
-
def initialize(name
|
67
|
+
attr_reader :name
|
68
|
+
def initialize(name)
|
69
69
|
@name = name
|
70
|
-
@type = type
|
71
70
|
end
|
72
71
|
end
|
73
72
|
end
|
@@ -22,7 +22,7 @@ module ETL #:nodoc:
|
|
22
22
|
next
|
23
23
|
end
|
24
24
|
# TODO make strip optional?
|
25
|
-
row[name] =
|
25
|
+
row[name] = line[f.field_start, f.field_length].strip
|
26
26
|
end
|
27
27
|
yield row
|
28
28
|
end
|
@@ -37,16 +37,18 @@ module ETL #:nodoc:
|
|
37
37
|
private
|
38
38
|
def configure
|
39
39
|
source.definition.each do |field, options|
|
40
|
-
fields[field] = FixedWidthField.new(
|
40
|
+
fields[field] = FixedWidthField.new(
|
41
|
+
options[:name], options[:start], options[:end], options[:length]
|
42
|
+
)
|
41
43
|
end
|
42
44
|
end
|
43
45
|
end
|
44
46
|
|
45
47
|
class FixedWidthField #:nodoc:
|
46
|
-
attr_reader :name, :field_start, :field_end, :field_length
|
47
|
-
|
48
|
+
attr_reader :name, :field_start, :field_end, :field_length
|
49
|
+
# Initialize the field.
|
50
|
+
def initialize(name, field_start, field_end=nil, field_length=nil)
|
48
51
|
@name = name
|
49
|
-
@type = type ||= :string
|
50
52
|
@field_start = field_start - 1
|
51
53
|
if field_end
|
52
54
|
@field_end = field_end
|
data/lib/etl/parser/parser.rb
CHANGED
@@ -23,26 +23,11 @@ module ETL
|
|
23
23
|
@options = options || {}
|
24
24
|
end
|
25
25
|
|
26
|
-
# Convert the value to the specified type.
|
27
|
-
#
|
28
|
-
# Parameters:
|
29
|
-
# * <tt>name</tt>: The name of the field
|
30
|
-
# * <tt>value</tt>: The value
|
31
|
-
# * <tt>type</tt>: The type name (:integer, :float, :string)
|
32
|
-
def convert(name, value, type)
|
33
|
-
case type
|
34
|
-
when :integer
|
35
|
-
value.to_i
|
36
|
-
when :float
|
37
|
-
value.to_f
|
38
|
-
else
|
39
|
-
value
|
40
|
-
end
|
41
|
-
end
|
42
|
-
|
43
26
|
protected
|
44
27
|
def file
|
45
|
-
|
28
|
+
path = Pathname.new(source.configuration[:file])
|
29
|
+
path = path.absolute? ? path : Pathname.new(File.dirname(source.control.file)) + path
|
30
|
+
path
|
46
31
|
end
|
47
32
|
|
48
33
|
def raise_with_info(error, message, file, line)
|
@@ -23,7 +23,7 @@ module ETL
|
|
23
23
|
row = {}
|
24
24
|
fields.each do |f|
|
25
25
|
value = element.text(f.xpath)
|
26
|
-
row[f.name] =
|
26
|
+
row[f.name] = value
|
27
27
|
end
|
28
28
|
yield row
|
29
29
|
end
|
@@ -46,7 +46,7 @@ module ETL
|
|
46
46
|
fields << Field.new(options, options.to_s)
|
47
47
|
when Hash
|
48
48
|
options[:xpath] ||= options[:name]
|
49
|
-
fields << Field.new(options[:name], options[:xpath].to_s
|
49
|
+
fields << Field.new(options[:name], options[:xpath].to_s)
|
50
50
|
else
|
51
51
|
raise DefinitionError, "Each field definition must either be an symbol or a hash of options for the field"
|
52
52
|
end
|
@@ -54,11 +54,10 @@ module ETL
|
|
54
54
|
end
|
55
55
|
|
56
56
|
class Field
|
57
|
-
attr_reader :name, :xpath
|
58
|
-
def initialize(name, xpath
|
57
|
+
attr_reader :name, :xpath
|
58
|
+
def initialize(name, xpath)
|
59
59
|
@name = name
|
60
60
|
@xpath = xpath
|
61
|
-
@type = type
|
62
61
|
end
|
63
62
|
end
|
64
63
|
end
|
@@ -5,14 +5,19 @@ module ETL #:nodoc:
|
|
5
5
|
def initialize(control, configuration={})
|
6
6
|
super
|
7
7
|
@type = configuration[:type]
|
8
|
+
@significant = configuration[:significant] ||= 0
|
8
9
|
end
|
9
10
|
# Transform the value
|
10
11
|
def transform(name, value, row)
|
11
12
|
case @type
|
12
13
|
when :string
|
13
14
|
value.to_s
|
14
|
-
when :number
|
15
|
+
when :number, :integer
|
15
16
|
value.to_i
|
17
|
+
when :float
|
18
|
+
value.to_f
|
19
|
+
when :decimal
|
20
|
+
BigDecimal.new(value.to_s, @significant)
|
16
21
|
else
|
17
22
|
raise "Unsupported type: #{@type}"
|
18
23
|
end
|
data/lib/etl/version.rb
CHANGED
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.0.10
|
|
3
3
|
specification_version: 1
|
4
4
|
name: activewarehouse-etl
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.6.
|
7
|
-
date: 2007-03-
|
6
|
+
version: 0.6.1
|
7
|
+
date: 2007-03-22 00:00:00 -04:00
|
8
8
|
summary: Pure Ruby ETL package.
|
9
9
|
require_paths:
|
10
10
|
- lib
|
@@ -69,6 +69,7 @@ files:
|
|
69
69
|
- lib/etl/parser/sax_parser.rb
|
70
70
|
- lib/etl/parser/xml_parser.rb
|
71
71
|
- lib/etl/processor/bulk_import_processor.rb
|
72
|
+
- lib/etl/processor/copy_field_processor.rb
|
72
73
|
- lib/etl/processor/hierarchy_exploder_processor.rb
|
73
74
|
- lib/etl/processor/processor.rb
|
74
75
|
- lib/etl/processor/row_processor.rb
|