activewarehouse-etl 0.6.0 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGELOG CHANGED
@@ -63,4 +63,8 @@
63
63
  * Added a depends_on directive that accepts a list of arguments of either strings or symbols. Each
64
64
  symbol is converted to a string and .ctl is appended; strings are passed through directly. The
65
65
  dependencies are executed in the order they are specified.
66
- * The default field separator in the bulk loader is now a comma (was a tab).
66
+ * The default field separator in the bulk loader is now a comma (was a tab).
67
+
68
+ 0.6.1 - Mar 22, 2007
69
+ * Added support for absolute paths in file sources
70
+ * Added CopyFieldProcessor
@@ -44,9 +44,10 @@ module ETL #:nodoc:
44
44
  private
45
45
  # Copy source data to a local directory structure
46
46
  def copy_sources
47
- source_file = File.join(File.dirname(control.file), configuration[:file])
48
- Dir.glob(source_file).each do |f|
49
- next if File.directory?(f)
47
+ path = Pathname.new(configuration[:file])
48
+ path = path.absolute? ? path : Pathname.new(File.dirname(control.file)) + path
49
+ Pathname.glob(path).each do |f|
50
+ next if f.directory?
50
51
  FileUtils.cp(f, local_file)
51
52
  end
52
53
  end
data/lib/etl/engine.rb CHANGED
@@ -18,13 +18,20 @@ module ETL #:nodoc:
18
18
  new().process(control_file)
19
19
  end
20
20
 
21
+ attr_accessor :timestamped_log
22
+
21
23
  # A logger for the engine
22
24
  attr_accessor :logger
23
25
 
24
26
  def logger #:nodoc:
25
27
  unless @logger
26
- @logger = Logger.new("etl_#{timestamp}.log")
27
- @logger.level = Logger::WARN
28
+ if timestamped_log
29
+ @logger = Logger.new("etl_#{timestamp}.log")
30
+ else
31
+ @logger = Logger.new(File.open('etl.log', 'a'))
32
+ end
33
+ @logger.level = Logger::ERROR
34
+ @logger.formatter = Logger::Formatter.new
28
35
  end
29
36
  @logger
30
37
  end
@@ -25,9 +25,9 @@ module ETL #:nodoc:
25
25
  line += 1
26
26
  row = {}
27
27
  validate_row(raw_row, line, file)
28
- raw_row.each_with_index do |record, index|
28
+ raw_row.each_with_index do |value, index|
29
29
  f = fields[index]
30
- row[f.name] = convert(f.name, record, f.type)
30
+ row[f.name] = value
31
31
  end
32
32
  yield row
33
33
  end
@@ -56,7 +56,7 @@ module ETL #:nodoc:
56
56
  when Symbol
57
57
  fields << Field.new(options)
58
58
  when Hash
59
- fields << Field.new(options[:name], options[:type])
59
+ fields << Field.new(options[:name])
60
60
  else
61
61
  raise DefinitionError, "Each field definition must either be a symbol or a hash"
62
62
  end
@@ -64,10 +64,9 @@ module ETL #:nodoc:
64
64
  end
65
65
 
66
66
  class Field #:nodoc:
67
- attr_reader :name, :type
68
- def initialize(name, type=:string)
67
+ attr_reader :name
68
+ def initialize(name)
69
69
  @name = name
70
- @type = type
71
70
  end
72
71
  end
73
72
  end
@@ -22,7 +22,7 @@ module ETL #:nodoc:
22
22
  next
23
23
  end
24
24
  # TODO make strip optional?
25
- row[name] = convert(name, line[f.field_start, f.field_length].strip, f.type)
25
+ row[name] = line[f.field_start, f.field_length].strip
26
26
  end
27
27
  yield row
28
28
  end
@@ -37,16 +37,18 @@ module ETL #:nodoc:
37
37
  private
38
38
  def configure
39
39
  source.definition.each do |field, options|
40
- fields[field] = FixedWidthField.new(options[:name], options[:start], options[:end], options[:length], options[:type])
40
+ fields[field] = FixedWidthField.new(
41
+ options[:name], options[:start], options[:end], options[:length]
42
+ )
41
43
  end
42
44
  end
43
45
  end
44
46
 
45
47
  class FixedWidthField #:nodoc:
46
- attr_reader :name, :field_start, :field_end, :field_length, :type
47
- def initialize(name, field_start, field_end=nil, field_length=nil, type=nil)
48
+ attr_reader :name, :field_start, :field_end, :field_length
49
+ # Initialize the field.
50
+ def initialize(name, field_start, field_end=nil, field_length=nil)
48
51
  @name = name
49
- @type = type ||= :string
50
52
  @field_start = field_start - 1
51
53
  if field_end
52
54
  @field_end = field_end
@@ -23,26 +23,11 @@ module ETL
23
23
  @options = options || {}
24
24
  end
25
25
 
26
- # Convert the value to the specified type.
27
- #
28
- # Parameters:
29
- # * <tt>name</tt>: The name of the field
30
- # * <tt>value</tt>: The value
31
- # * <tt>type</tt>: The type name (:integer, :float, :string)
32
- def convert(name, value, type)
33
- case type
34
- when :integer
35
- value.to_i
36
- when :float
37
- value.to_f
38
- else
39
- value
40
- end
41
- end
42
-
43
26
  protected
44
27
  def file
45
- File.join(File.dirname(source.control.file), source.configuration[:file])
28
+ path = Pathname.new(source.configuration[:file])
29
+ path = path.absolute? ? path : Pathname.new(File.dirname(source.control.file)) + path
30
+ path
46
31
  end
47
32
 
48
33
  def raise_with_info(error, message, file, line)
@@ -23,7 +23,7 @@ module ETL
23
23
  row = {}
24
24
  fields.each do |f|
25
25
  value = element.text(f.xpath)
26
- row[f.name] = convert(f.name, value, f.type)
26
+ row[f.name] = value
27
27
  end
28
28
  yield row
29
29
  end
@@ -46,7 +46,7 @@ module ETL
46
46
  fields << Field.new(options, options.to_s)
47
47
  when Hash
48
48
  options[:xpath] ||= options[:name]
49
- fields << Field.new(options[:name], options[:xpath].to_s, options[:type])
49
+ fields << Field.new(options[:name], options[:xpath].to_s)
50
50
  else
51
51
  raise DefinitionError, "Each field definition must either be an symbol or a hash of options for the field"
52
52
  end
@@ -54,11 +54,10 @@ module ETL
54
54
  end
55
55
 
56
56
  class Field
57
- attr_reader :name, :xpath, :type
58
- def initialize(name, xpath, type=:string)
57
+ attr_reader :name, :xpath
58
+ def initialize(name, xpath)
59
59
  @name = name
60
60
  @xpath = xpath
61
- @type = type
62
61
  end
63
62
  end
64
63
  end
@@ -0,0 +1,10 @@
1
+ module ETL
2
+ module Processor
3
+ class CopyField < ETL::Processor::RowProcessor
4
+ def process(row)
5
+ row[configuration[:destination]] = row[configuration[:source]].dup
6
+ row
7
+ end
8
+ end
9
+ end
10
+ end
@@ -5,14 +5,19 @@ module ETL #:nodoc:
5
5
  def initialize(control, configuration={})
6
6
  super
7
7
  @type = configuration[:type]
8
+ @significant = configuration[:significant] ||= 0
8
9
  end
9
10
  # Transform the value
10
11
  def transform(name, value, row)
11
12
  case @type
12
13
  when :string
13
14
  value.to_s
14
- when :number
15
+ when :number, :integer
15
16
  value.to_i
17
+ when :float
18
+ value.to_f
19
+ when :decimal
20
+ BigDecimal.new(value.to_s, @significant)
16
21
  else
17
22
  raise "Unsupported type: #{@type}"
18
23
  end
data/lib/etl/version.rb CHANGED
@@ -2,7 +2,7 @@ module ETL#:nodoc:
2
2
  module VERSION #:nodoc:
3
3
  MAJOR = 0
4
4
  MINOR = 6
5
- TINY = 0
5
+ TINY = 1
6
6
 
7
7
  STRING = [MAJOR, MINOR, TINY].join('.')
8
8
  end
metadata CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.0.10
3
3
  specification_version: 1
4
4
  name: activewarehouse-etl
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.6.0
7
- date: 2007-03-08 00:00:00 -05:00
6
+ version: 0.6.1
7
+ date: 2007-03-22 00:00:00 -04:00
8
8
  summary: Pure Ruby ETL package.
9
9
  require_paths:
10
10
  - lib
@@ -69,6 +69,7 @@ files:
69
69
  - lib/etl/parser/sax_parser.rb
70
70
  - lib/etl/parser/xml_parser.rb
71
71
  - lib/etl/processor/bulk_import_processor.rb
72
+ - lib/etl/processor/copy_field_processor.rb
72
73
  - lib/etl/processor/hierarchy_exploder_processor.rb
73
74
  - lib/etl/processor/processor.rb
74
75
  - lib/etl/processor/row_processor.rb