activewarehouse-etl 0.6.0 → 0.6.1

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGELOG CHANGED
@@ -63,4 +63,8 @@
63
63
  * Added a depends_on directive that accepts a list of arguments of either strings or symbols. Each
64
64
  symbol is converted to a string and .ctl is appended; strings are passed through directly. The
65
65
  dependencies are executed in the order they are specified.
66
- * The default field separator in the bulk loader is now a comma (was a tab).
66
+ * The default field separator in the bulk loader is now a comma (was a tab).
67
+
68
+ 0.6.1 - Mar 22, 2007
69
+ * Added support for absolute paths in file sources
70
+ * Added CopyFieldProcessor
@@ -44,9 +44,10 @@ module ETL #:nodoc:
44
44
  private
45
45
  # Copy source data to a local directory structure
46
46
  def copy_sources
47
- source_file = File.join(File.dirname(control.file), configuration[:file])
48
- Dir.glob(source_file).each do |f|
49
- next if File.directory?(f)
47
+ path = Pathname.new(configuration[:file])
48
+ path = path.absolute? ? path : Pathname.new(File.dirname(control.file)) + path
49
+ Pathname.glob(path).each do |f|
50
+ next if f.directory?
50
51
  FileUtils.cp(f, local_file)
51
52
  end
52
53
  end
data/lib/etl/engine.rb CHANGED
@@ -18,13 +18,20 @@ module ETL #:nodoc:
18
18
  new().process(control_file)
19
19
  end
20
20
 
21
+ attr_accessor :timestamped_log
22
+
21
23
  # A logger for the engine
22
24
  attr_accessor :logger
23
25
 
24
26
  def logger #:nodoc:
25
27
  unless @logger
26
- @logger = Logger.new("etl_#{timestamp}.log")
27
- @logger.level = Logger::WARN
28
+ if timestamped_log
29
+ @logger = Logger.new("etl_#{timestamp}.log")
30
+ else
31
+ @logger = Logger.new(File.open('etl.log', 'a'))
32
+ end
33
+ @logger.level = Logger::ERROR
34
+ @logger.formatter = Logger::Formatter.new
28
35
  end
29
36
  @logger
30
37
  end
@@ -25,9 +25,9 @@ module ETL #:nodoc:
25
25
  line += 1
26
26
  row = {}
27
27
  validate_row(raw_row, line, file)
28
- raw_row.each_with_index do |record, index|
28
+ raw_row.each_with_index do |value, index|
29
29
  f = fields[index]
30
- row[f.name] = convert(f.name, record, f.type)
30
+ row[f.name] = value
31
31
  end
32
32
  yield row
33
33
  end
@@ -56,7 +56,7 @@ module ETL #:nodoc:
56
56
  when Symbol
57
57
  fields << Field.new(options)
58
58
  when Hash
59
- fields << Field.new(options[:name], options[:type])
59
+ fields << Field.new(options[:name])
60
60
  else
61
61
  raise DefinitionError, "Each field definition must either be a symbol or a hash"
62
62
  end
@@ -64,10 +64,9 @@ module ETL #:nodoc:
64
64
  end
65
65
 
66
66
  class Field #:nodoc:
67
- attr_reader :name, :type
68
- def initialize(name, type=:string)
67
+ attr_reader :name
68
+ def initialize(name)
69
69
  @name = name
70
- @type = type
71
70
  end
72
71
  end
73
72
  end
@@ -22,7 +22,7 @@ module ETL #:nodoc:
22
22
  next
23
23
  end
24
24
  # TODO make strip optional?
25
- row[name] = convert(name, line[f.field_start, f.field_length].strip, f.type)
25
+ row[name] = line[f.field_start, f.field_length].strip
26
26
  end
27
27
  yield row
28
28
  end
@@ -37,16 +37,18 @@ module ETL #:nodoc:
37
37
  private
38
38
  def configure
39
39
  source.definition.each do |field, options|
40
- fields[field] = FixedWidthField.new(options[:name], options[:start], options[:end], options[:length], options[:type])
40
+ fields[field] = FixedWidthField.new(
41
+ options[:name], options[:start], options[:end], options[:length]
42
+ )
41
43
  end
42
44
  end
43
45
  end
44
46
 
45
47
  class FixedWidthField #:nodoc:
46
- attr_reader :name, :field_start, :field_end, :field_length, :type
47
- def initialize(name, field_start, field_end=nil, field_length=nil, type=nil)
48
+ attr_reader :name, :field_start, :field_end, :field_length
49
+ # Initialize the field.
50
+ def initialize(name, field_start, field_end=nil, field_length=nil)
48
51
  @name = name
49
- @type = type ||= :string
50
52
  @field_start = field_start - 1
51
53
  if field_end
52
54
  @field_end = field_end
@@ -23,26 +23,11 @@ module ETL
23
23
  @options = options || {}
24
24
  end
25
25
 
26
- # Convert the value to the specified type.
27
- #
28
- # Parameters:
29
- # * <tt>name</tt>: The name of the field
30
- # * <tt>value</tt>: The value
31
- # * <tt>type</tt>: The type name (:integer, :float, :string)
32
- def convert(name, value, type)
33
- case type
34
- when :integer
35
- value.to_i
36
- when :float
37
- value.to_f
38
- else
39
- value
40
- end
41
- end
42
-
43
26
  protected
44
27
  def file
45
- File.join(File.dirname(source.control.file), source.configuration[:file])
28
+ path = Pathname.new(source.configuration[:file])
29
+ path = path.absolute? ? path : Pathname.new(File.dirname(source.control.file)) + path
30
+ path
46
31
  end
47
32
 
48
33
  def raise_with_info(error, message, file, line)
@@ -23,7 +23,7 @@ module ETL
23
23
  row = {}
24
24
  fields.each do |f|
25
25
  value = element.text(f.xpath)
26
- row[f.name] = convert(f.name, value, f.type)
26
+ row[f.name] = value
27
27
  end
28
28
  yield row
29
29
  end
@@ -46,7 +46,7 @@ module ETL
46
46
  fields << Field.new(options, options.to_s)
47
47
  when Hash
48
48
  options[:xpath] ||= options[:name]
49
- fields << Field.new(options[:name], options[:xpath].to_s, options[:type])
49
+ fields << Field.new(options[:name], options[:xpath].to_s)
50
50
  else
51
51
  raise DefinitionError, "Each field definition must either be an symbol or a hash of options for the field"
52
52
  end
@@ -54,11 +54,10 @@ module ETL
54
54
  end
55
55
 
56
56
  class Field
57
- attr_reader :name, :xpath, :type
58
- def initialize(name, xpath, type=:string)
57
+ attr_reader :name, :xpath
58
+ def initialize(name, xpath)
59
59
  @name = name
60
60
  @xpath = xpath
61
- @type = type
62
61
  end
63
62
  end
64
63
  end
@@ -0,0 +1,10 @@
1
+ module ETL
2
+ module Processor
3
+ class CopyField < ETL::Processor::RowProcessor
4
+ def process(row)
5
+ row[configuration[:destination]] = row[configuration[:source]].dup
6
+ row
7
+ end
8
+ end
9
+ end
10
+ end
@@ -5,14 +5,19 @@ module ETL #:nodoc:
5
5
  def initialize(control, configuration={})
6
6
  super
7
7
  @type = configuration[:type]
8
+ @significant = configuration[:significant] ||= 0
8
9
  end
9
10
  # Transform the value
10
11
  def transform(name, value, row)
11
12
  case @type
12
13
  when :string
13
14
  value.to_s
14
- when :number
15
+ when :number, :integer
15
16
  value.to_i
17
+ when :float
18
+ value.to_f
19
+ when :decimal
20
+ BigDecimal.new(value.to_s, @significant)
16
21
  else
17
22
  raise "Unsupported type: #{@type}"
18
23
  end
data/lib/etl/version.rb CHANGED
@@ -2,7 +2,7 @@ module ETL#:nodoc:
2
2
  module VERSION #:nodoc:
3
3
  MAJOR = 0
4
4
  MINOR = 6
5
- TINY = 0
5
+ TINY = 1
6
6
 
7
7
  STRING = [MAJOR, MINOR, TINY].join('.')
8
8
  end
metadata CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.0.10
3
3
  specification_version: 1
4
4
  name: activewarehouse-etl
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.6.0
7
- date: 2007-03-08 00:00:00 -05:00
6
+ version: 0.6.1
7
+ date: 2007-03-22 00:00:00 -04:00
8
8
  summary: Pure Ruby ETL package.
9
9
  require_paths:
10
10
  - lib
@@ -69,6 +69,7 @@ files:
69
69
  - lib/etl/parser/sax_parser.rb
70
70
  - lib/etl/parser/xml_parser.rb
71
71
  - lib/etl/processor/bulk_import_processor.rb
72
+ - lib/etl/processor/copy_field_processor.rb
72
73
  - lib/etl/processor/hierarchy_exploder_processor.rb
73
74
  - lib/etl/processor/processor.rb
74
75
  - lib/etl/processor/row_processor.rb