activewarehouse-etl 0.5.2 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. data/CHANGELOG +41 -13
  2. data/README +1 -1
  3. data/Rakefile +14 -4
  4. data/TODO +17 -1
  5. data/bin/etl +3 -1
  6. data/lib/etl.rb +11 -7
  7. data/lib/etl/commands/etl.rb +0 -1
  8. data/lib/etl/control/control.rb +113 -36
  9. data/lib/etl/control/destination.rb +13 -1
  10. data/lib/etl/control/destination/database_destination.rb +3 -1
  11. data/lib/etl/control/destination/file_destination.rb +5 -2
  12. data/lib/etl/control/source.rb +36 -0
  13. data/lib/etl/control/source/database_source.rb +63 -8
  14. data/lib/etl/control/source/file_source.rb +25 -4
  15. data/lib/etl/engine.rb +128 -14
  16. data/lib/etl/generator/surrogate_key_generator.rb +1 -0
  17. data/lib/etl/http_tools.rb +119 -0
  18. data/lib/etl/parser/apache_combined_log_parser.rb +47 -0
  19. data/lib/etl/parser/sax_parser.rb +18 -6
  20. data/lib/etl/processor.rb +1 -0
  21. data/lib/etl/processor/bulk_import_processor.rb +12 -0
  22. data/lib/etl/processor/hierarchy_exploder_processor.rb +54 -0
  23. data/lib/etl/processor/processor.rb +1 -5
  24. data/lib/etl/processor/row_processor.rb +17 -0
  25. data/lib/etl/transform/date_to_string_transform.rb +1 -1
  26. data/lib/etl/transform/decode_transform.rb +1 -1
  27. data/lib/etl/transform/default_transform.rb +15 -0
  28. data/lib/etl/transform/foreign_key_lookup_transform.rb +1 -1
  29. data/lib/etl/transform/hierarchy_lookup_transform.rb +56 -0
  30. data/lib/etl/transform/sha1_transform.rb +1 -1
  31. data/lib/etl/transform/string_to_date_transform.rb +3 -3
  32. data/lib/etl/transform/string_to_datetime_transform.rb +17 -0
  33. data/lib/etl/transform/string_to_time_transform.rb +14 -0
  34. data/lib/etl/transform/transform.rb +8 -4
  35. data/lib/etl/transform/type_transform.rb +2 -2
  36. data/lib/etl/version.rb +2 -2
  37. metadata +21 -8
  38. data/lib/etl/active_record_ext.rb +0 -1
  39. data/lib/etl/active_record_ext/connection_adapters/mysql_adapter.rb +0 -34
@@ -8,7 +8,7 @@ module ETL #:nodoc:
8
8
  super
9
9
  end
10
10
  # Transform the value with a SHA1 digest algorithm.
11
- def transform(value)
11
+ def transform(name, value, row)
12
12
  Digest::SHA1.hexdigest(value)
13
13
  end
14
14
  end
@@ -5,9 +5,9 @@ module ETL #:nodoc:
5
5
  def initialize(control, configuration={})
6
6
  super
7
7
  end
8
- # Transform the value using Time.parse
9
- def transform(value)
10
- t = Date.parse(value)
8
+ # Transform the value using Date.parse
9
+ def transform(name, value, row)
10
+ Date.parse(value)
11
11
  end
12
12
  end
13
13
  end
@@ -0,0 +1,17 @@
1
+ module ETL #:nodoc:
2
+ module Transform #:nodoc:
3
+ # Transform a String representation of a date to a DateTime instance
4
+ class StringToDateTimeTransform < ETL::Transform::Transform
5
+ def initialize(control, configuration={})
6
+ super
7
+ end
8
+ # Transform the value using DateTime.parse.
9
+ #
10
+ # WARNING: This transform is slow (due to the Ruby implementation), but if you need to
11
+ # parse timestamps before or after the values supported by the Time.parse.
12
+ def transform(name, value, row)
13
+ DateTime.parse(value)
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,14 @@
1
+ module ETL #:nodoc:
2
+ module Transform #:nodoc:
3
+ # Transform a String representation of a date to a Time instance
4
+ class StringToTimeTransform < ETL::Transform::Transform
5
+ def initialize(control, configuration={})
6
+ super
7
+ end
8
+ # Transform the value using Time.parse
9
+ def transform(name, value, row)
10
+ Time.parse(value)
11
+ end
12
+ end
13
+ end
14
+ end
@@ -20,14 +20,14 @@ module ETL#:nodoc:
20
20
  # Transform the specified value using the given transforms. The transforms can either be
21
21
  # Proc objects or objects which extend from Transform and implement the method <tt>transform(value)</tt>.
22
22
  # Any other object will result in a ControlError being raised.
23
- def transform(name, value, transforms)
24
- # logger.debug "Transforming field #{name}" if transforms.length > 0
23
+ def transform(name, value, row, transforms)
25
24
  transforms.each do |transform|
25
+ Engine.logger.debug "Transforming field #{name} with #{transform.inspect}"
26
26
  case transform
27
27
  when Proc
28
- value = transform.call(value)
28
+ value = transform.call([name, value, row])
29
29
  when Transform
30
- value = transform.transform(value)
30
+ value = transform.transform(name, value, row)
31
31
  else
32
32
  raise ControlError, "Unsupported transform configuration type: #{transform}"
33
33
  end
@@ -43,6 +43,10 @@ module ETL#:nodoc:
43
43
  @control = control
44
44
  @configuration = configuration
45
45
  end
46
+
47
+ def transform(name, value, row)
48
+ raise "transform is an abstract method"
49
+ end
46
50
  end
47
51
  end
48
52
  end
@@ -6,8 +6,8 @@ module ETL #:nodoc:
6
6
  super
7
7
  @type = configuration[:type]
8
8
  end
9
- # Transform the value using Time.parse
10
- def transform(value)
9
+ # Transform the value
10
+ def transform(name, value, row)
11
11
  case @type
12
12
  when :string
13
13
  value.to_s
data/lib/etl/version.rb CHANGED
@@ -1,8 +1,8 @@
1
1
  module ETL#:nodoc:
2
2
  module VERSION #:nodoc:
3
3
  MAJOR = 0
4
- MINOR = 5
5
- TINY = 2
4
+ MINOR = 6
5
+ TINY = 0
6
6
 
7
7
  STRING = [MAJOR, MINOR, TINY].join('.')
8
8
  end
metadata CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.0.10
3
3
  specification_version: 1
4
4
  name: activewarehouse-etl
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.5.2
7
- date: 2007-02-19 00:00:00 -05:00
6
+ version: 0.6.0
7
+ date: 2007-03-08 00:00:00 -05:00
8
8
  summary: Pure Ruby ETL package.
9
9
  require_paths:
10
10
  - lib
@@ -36,14 +36,13 @@ files:
36
36
  - bin/etl
37
37
  - lib/etl
38
38
  - lib/etl.rb
39
- - lib/etl/active_record_ext
40
- - lib/etl/active_record_ext.rb
41
39
  - lib/etl/commands
42
40
  - lib/etl/control
43
41
  - lib/etl/control.rb
44
42
  - lib/etl/engine.rb
45
43
  - lib/etl/generator
46
44
  - lib/etl/generator.rb
45
+ - lib/etl/http_tools.rb
47
46
  - lib/etl/parser
48
47
  - lib/etl/parser.rb
49
48
  - lib/etl/processor
@@ -51,8 +50,6 @@ files:
51
50
  - lib/etl/transform
52
51
  - lib/etl/transform.rb
53
52
  - lib/etl/version.rb
54
- - lib/etl/active_record_ext/connection_adapters
55
- - lib/etl/active_record_ext/connection_adapters/mysql_adapter.rb
56
53
  - lib/etl/commands/etl.rb
57
54
  - lib/etl/control/control.rb
58
55
  - lib/etl/control/destination
@@ -65,19 +62,26 @@ files:
65
62
  - lib/etl/control/source/file_source.rb
66
63
  - lib/etl/generator/generator.rb
67
64
  - lib/etl/generator/surrogate_key_generator.rb
65
+ - lib/etl/parser/apache_combined_log_parser.rb
68
66
  - lib/etl/parser/delimited_parser.rb
69
67
  - lib/etl/parser/fixed_width_parser.rb
70
68
  - lib/etl/parser/parser.rb
71
69
  - lib/etl/parser/sax_parser.rb
72
70
  - lib/etl/parser/xml_parser.rb
73
71
  - lib/etl/processor/bulk_import_processor.rb
72
+ - lib/etl/processor/hierarchy_exploder_processor.rb
74
73
  - lib/etl/processor/processor.rb
74
+ - lib/etl/processor/row_processor.rb
75
75
  - lib/etl/processor/truncate_processor.rb
76
76
  - lib/etl/transform/date_to_string_transform.rb
77
77
  - lib/etl/transform/decode_transform.rb
78
+ - lib/etl/transform/default_transform.rb
78
79
  - lib/etl/transform/foreign_key_lookup_transform.rb
80
+ - lib/etl/transform/hierarchy_lookup_transform.rb
79
81
  - lib/etl/transform/sha1_transform.rb
80
82
  - lib/etl/transform/string_to_date_transform.rb
83
+ - lib/etl/transform/string_to_datetime_transform.rb
84
+ - lib/etl/transform/string_to_time_transform.rb
81
85
  - lib/etl/transform/transform.rb
82
86
  - lib/etl/transform/type_transform.rb
83
87
  test_files: []
@@ -110,7 +114,7 @@ dependencies:
110
114
  requirements:
111
115
  - - ">="
112
116
  - !ruby/object:Gem::Version
113
- version: 1.3.1.5618
117
+ version: 1.3.1
114
118
  version:
115
119
  - !ruby/object:Gem::Dependency
116
120
  name: activerecord
@@ -119,7 +123,7 @@ dependencies:
119
123
  requirements:
120
124
  - - ">="
121
125
  - !ruby/object:Gem::Version
122
- version: 1.14.4.5618
126
+ version: 1.14.4
123
127
  version:
124
128
  - !ruby/object:Gem::Dependency
125
129
  name: fastercsv
@@ -130,3 +134,12 @@ dependencies:
130
134
  - !ruby/object:Gem::Version
131
135
  version: 1.0.0
132
136
  version:
137
+ - !ruby/object:Gem::Dependency
138
+ name: adapter_extensions
139
+ version_requirement:
140
+ version_requirements: !ruby/object:Gem::Version::Requirement
141
+ requirements:
142
+ - - ">="
143
+ - !ruby/object:Gem::Version
144
+ version: 0.1.0
145
+ version:
@@ -1 +0,0 @@
1
- require 'etl/active_record_ext/connection_adapters/mysql_adapter'
@@ -1,34 +0,0 @@
1
- require 'active_record/connection_adapters/abstract_adapter'
2
-
3
- module ActiveRecord #:nodoc:
4
- module ConnectionAdapters #:nodoc:
5
- class MysqlAdapter < AbstractAdapter
6
- # Execute a truncate statement on the table. Note that in MySQL a truncate will *NOT* reset
7
- # the auto_increment
8
- def truncate(table_name)
9
- execute("TRUNCATE #{table_name}")
10
- end
11
-
12
- # Bulk load the data in the specified file. This implementation always uses the LOCAL keyword
13
- # so the file must be found locally, not on the remote server, to be loaded.
14
- #
15
- # Options:
16
- # * <tt>:ignore</tt> -- Ignore the specified number of lines from the source file
17
- # * <tt>:columns</tt> -- Array of column names defining the source file column order
18
- # * <tt>:fields</tt> -- Hash of options for fields:
19
- # ** <tt>:delimited_by</tt> -- The field delimiter
20
- # ** <tt>:enclosed_by</tt> -- The field enclosure
21
- def bulk_load(file, table_name, options={})
22
- q = "LOAD DATA LOCAL INFILE '#{file}' INTO TABLE #{table_name}"
23
- if options[:fields]
24
- q << " FIELDS"
25
- q << " TERMINATED BY '#{options[:fields][:delimited_by]}'" if options[:fields][:delimited_by]
26
- q << " ENCLOSED BY '#{options[:fields][:enclosed_by]}'" if options[:fields][:enclosed_by]
27
- end
28
- q << " IGNORE #{options[:ignore]} LINES" if options[:ignore]
29
- q << " (#{options[:columns].join(',')})" if options[:columns]
30
- execute(q)
31
- end
32
- end
33
- end
34
- end