activewarehouse-etl 0.5.2 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (39) hide show
  1. data/CHANGELOG +41 -13
  2. data/README +1 -1
  3. data/Rakefile +14 -4
  4. data/TODO +17 -1
  5. data/bin/etl +3 -1
  6. data/lib/etl.rb +11 -7
  7. data/lib/etl/commands/etl.rb +0 -1
  8. data/lib/etl/control/control.rb +113 -36
  9. data/lib/etl/control/destination.rb +13 -1
  10. data/lib/etl/control/destination/database_destination.rb +3 -1
  11. data/lib/etl/control/destination/file_destination.rb +5 -2
  12. data/lib/etl/control/source.rb +36 -0
  13. data/lib/etl/control/source/database_source.rb +63 -8
  14. data/lib/etl/control/source/file_source.rb +25 -4
  15. data/lib/etl/engine.rb +128 -14
  16. data/lib/etl/generator/surrogate_key_generator.rb +1 -0
  17. data/lib/etl/http_tools.rb +119 -0
  18. data/lib/etl/parser/apache_combined_log_parser.rb +47 -0
  19. data/lib/etl/parser/sax_parser.rb +18 -6
  20. data/lib/etl/processor.rb +1 -0
  21. data/lib/etl/processor/bulk_import_processor.rb +12 -0
  22. data/lib/etl/processor/hierarchy_exploder_processor.rb +54 -0
  23. data/lib/etl/processor/processor.rb +1 -5
  24. data/lib/etl/processor/row_processor.rb +17 -0
  25. data/lib/etl/transform/date_to_string_transform.rb +1 -1
  26. data/lib/etl/transform/decode_transform.rb +1 -1
  27. data/lib/etl/transform/default_transform.rb +15 -0
  28. data/lib/etl/transform/foreign_key_lookup_transform.rb +1 -1
  29. data/lib/etl/transform/hierarchy_lookup_transform.rb +56 -0
  30. data/lib/etl/transform/sha1_transform.rb +1 -1
  31. data/lib/etl/transform/string_to_date_transform.rb +3 -3
  32. data/lib/etl/transform/string_to_datetime_transform.rb +17 -0
  33. data/lib/etl/transform/string_to_time_transform.rb +14 -0
  34. data/lib/etl/transform/transform.rb +8 -4
  35. data/lib/etl/transform/type_transform.rb +2 -2
  36. data/lib/etl/version.rb +2 -2
  37. metadata +21 -8
  38. data/lib/etl/active_record_ext.rb +0 -1
  39. data/lib/etl/active_record_ext/connection_adapters/mysql_adapter.rb +0 -34
@@ -8,7 +8,7 @@ module ETL #:nodoc:
8
8
  super
9
9
  end
10
10
  # Transform the value with a SHA1 digest algorithm.
11
- def transform(value)
11
+ def transform(name, value, row)
12
12
  Digest::SHA1.hexdigest(value)
13
13
  end
14
14
  end
@@ -5,9 +5,9 @@ module ETL #:nodoc:
5
5
  def initialize(control, configuration={})
6
6
  super
7
7
  end
8
- # Transform the value using Time.parse
9
- def transform(value)
10
- t = Date.parse(value)
8
+ # Transform the value using Date.parse
9
+ def transform(name, value, row)
10
+ Date.parse(value)
11
11
  end
12
12
  end
13
13
  end
@@ -0,0 +1,17 @@
1
+ module ETL #:nodoc:
2
+ module Transform #:nodoc:
3
+ # Transform a String representation of a date to a DateTime instance
4
+ class StringToDateTimeTransform < ETL::Transform::Transform
5
+ def initialize(control, configuration={})
6
+ super
7
+ end
8
+ # Transform the value using DateTime.parse.
9
+ #
10
+ # WARNING: This transform is slow (due to the Ruby implementation), but if you need to
11
+ # parse timestamps before or after the values supported by the Time.parse.
12
+ def transform(name, value, row)
13
+ DateTime.parse(value)
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,14 @@
1
+ module ETL #:nodoc:
2
+ module Transform #:nodoc:
3
+ # Transform a String representation of a date to a Time instance
4
+ class StringToTimeTransform < ETL::Transform::Transform
5
+ def initialize(control, configuration={})
6
+ super
7
+ end
8
+ # Transform the value using Time.parse
9
+ def transform(name, value, row)
10
+ Time.parse(value)
11
+ end
12
+ end
13
+ end
14
+ end
@@ -20,14 +20,14 @@ module ETL#:nodoc:
20
20
  # Transform the specified value using the given transforms. The transforms can either be
21
21
  # Proc objects or objects which extend from Transform and implement the method <tt>transform(value)</tt>.
22
22
  # Any other object will result in a ControlError being raised.
23
- def transform(name, value, transforms)
24
- # logger.debug "Transforming field #{name}" if transforms.length > 0
23
+ def transform(name, value, row, transforms)
25
24
  transforms.each do |transform|
25
+ Engine.logger.debug "Transforming field #{name} with #{transform.inspect}"
26
26
  case transform
27
27
  when Proc
28
- value = transform.call(value)
28
+ value = transform.call([name, value, row])
29
29
  when Transform
30
- value = transform.transform(value)
30
+ value = transform.transform(name, value, row)
31
31
  else
32
32
  raise ControlError, "Unsupported transform configuration type: #{transform}"
33
33
  end
@@ -43,6 +43,10 @@ module ETL#:nodoc:
43
43
  @control = control
44
44
  @configuration = configuration
45
45
  end
46
+
47
+ def transform(name, value, row)
48
+ raise "transform is an abstract method"
49
+ end
46
50
  end
47
51
  end
48
52
  end
@@ -6,8 +6,8 @@ module ETL #:nodoc:
6
6
  super
7
7
  @type = configuration[:type]
8
8
  end
9
- # Transform the value using Time.parse
10
- def transform(value)
9
+ # Transform the value
10
+ def transform(name, value, row)
11
11
  case @type
12
12
  when :string
13
13
  value.to_s
data/lib/etl/version.rb CHANGED
@@ -1,8 +1,8 @@
1
1
  module ETL#:nodoc:
2
2
  module VERSION #:nodoc:
3
3
  MAJOR = 0
4
- MINOR = 5
5
- TINY = 2
4
+ MINOR = 6
5
+ TINY = 0
6
6
 
7
7
  STRING = [MAJOR, MINOR, TINY].join('.')
8
8
  end
metadata CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.0.10
3
3
  specification_version: 1
4
4
  name: activewarehouse-etl
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.5.2
7
- date: 2007-02-19 00:00:00 -05:00
6
+ version: 0.6.0
7
+ date: 2007-03-08 00:00:00 -05:00
8
8
  summary: Pure Ruby ETL package.
9
9
  require_paths:
10
10
  - lib
@@ -36,14 +36,13 @@ files:
36
36
  - bin/etl
37
37
  - lib/etl
38
38
  - lib/etl.rb
39
- - lib/etl/active_record_ext
40
- - lib/etl/active_record_ext.rb
41
39
  - lib/etl/commands
42
40
  - lib/etl/control
43
41
  - lib/etl/control.rb
44
42
  - lib/etl/engine.rb
45
43
  - lib/etl/generator
46
44
  - lib/etl/generator.rb
45
+ - lib/etl/http_tools.rb
47
46
  - lib/etl/parser
48
47
  - lib/etl/parser.rb
49
48
  - lib/etl/processor
@@ -51,8 +50,6 @@ files:
51
50
  - lib/etl/transform
52
51
  - lib/etl/transform.rb
53
52
  - lib/etl/version.rb
54
- - lib/etl/active_record_ext/connection_adapters
55
- - lib/etl/active_record_ext/connection_adapters/mysql_adapter.rb
56
53
  - lib/etl/commands/etl.rb
57
54
  - lib/etl/control/control.rb
58
55
  - lib/etl/control/destination
@@ -65,19 +62,26 @@ files:
65
62
  - lib/etl/control/source/file_source.rb
66
63
  - lib/etl/generator/generator.rb
67
64
  - lib/etl/generator/surrogate_key_generator.rb
65
+ - lib/etl/parser/apache_combined_log_parser.rb
68
66
  - lib/etl/parser/delimited_parser.rb
69
67
  - lib/etl/parser/fixed_width_parser.rb
70
68
  - lib/etl/parser/parser.rb
71
69
  - lib/etl/parser/sax_parser.rb
72
70
  - lib/etl/parser/xml_parser.rb
73
71
  - lib/etl/processor/bulk_import_processor.rb
72
+ - lib/etl/processor/hierarchy_exploder_processor.rb
74
73
  - lib/etl/processor/processor.rb
74
+ - lib/etl/processor/row_processor.rb
75
75
  - lib/etl/processor/truncate_processor.rb
76
76
  - lib/etl/transform/date_to_string_transform.rb
77
77
  - lib/etl/transform/decode_transform.rb
78
+ - lib/etl/transform/default_transform.rb
78
79
  - lib/etl/transform/foreign_key_lookup_transform.rb
80
+ - lib/etl/transform/hierarchy_lookup_transform.rb
79
81
  - lib/etl/transform/sha1_transform.rb
80
82
  - lib/etl/transform/string_to_date_transform.rb
83
+ - lib/etl/transform/string_to_datetime_transform.rb
84
+ - lib/etl/transform/string_to_time_transform.rb
81
85
  - lib/etl/transform/transform.rb
82
86
  - lib/etl/transform/type_transform.rb
83
87
  test_files: []
@@ -110,7 +114,7 @@ dependencies:
110
114
  requirements:
111
115
  - - ">="
112
116
  - !ruby/object:Gem::Version
113
- version: 1.3.1.5618
117
+ version: 1.3.1
114
118
  version:
115
119
  - !ruby/object:Gem::Dependency
116
120
  name: activerecord
@@ -119,7 +123,7 @@ dependencies:
119
123
  requirements:
120
124
  - - ">="
121
125
  - !ruby/object:Gem::Version
122
- version: 1.14.4.5618
126
+ version: 1.14.4
123
127
  version:
124
128
  - !ruby/object:Gem::Dependency
125
129
  name: fastercsv
@@ -130,3 +134,12 @@ dependencies:
130
134
  - !ruby/object:Gem::Version
131
135
  version: 1.0.0
132
136
  version:
137
+ - !ruby/object:Gem::Dependency
138
+ name: adapter_extensions
139
+ version_requirement:
140
+ version_requirements: !ruby/object:Gem::Version::Requirement
141
+ requirements:
142
+ - - ">="
143
+ - !ruby/object:Gem::Version
144
+ version: 0.1.0
145
+ version:
@@ -1 +0,0 @@
1
- require 'etl/active_record_ext/connection_adapters/mysql_adapter'
@@ -1,34 +0,0 @@
1
- require 'active_record/connection_adapters/abstract_adapter'
2
-
3
- module ActiveRecord #:nodoc:
4
- module ConnectionAdapters #:nodoc:
5
- class MysqlAdapter < AbstractAdapter
6
- # Execute a truncate statement on the table. Note that in MySQL a truncate will *NOT* reset
7
- # the auto_increment
8
- def truncate(table_name)
9
- execute("TRUNCATE #{table_name}")
10
- end
11
-
12
- # Bulk load the data in the specified file. This implementation always uses the LOCAL keyword
13
- # so the file must be found locally, not on the remote server, to be loaded.
14
- #
15
- # Options:
16
- # * <tt>:ignore</tt> -- Ignore the specified number of lines from the source file
17
- # * <tt>:columns</tt> -- Array of column names defining the source file column order
18
- # * <tt>:fields</tt> -- Hash of options for fields:
19
- # ** <tt>:delimited_by</tt> -- The field delimiter
20
- # ** <tt>:enclosed_by</tt> -- The field enclosure
21
- def bulk_load(file, table_name, options={})
22
- q = "LOAD DATA LOCAL INFILE '#{file}' INTO TABLE #{table_name}"
23
- if options[:fields]
24
- q << " FIELDS"
25
- q << " TERMINATED BY '#{options[:fields][:delimited_by]}'" if options[:fields][:delimited_by]
26
- q << " ENCLOSED BY '#{options[:fields][:enclosed_by]}'" if options[:fields][:enclosed_by]
27
- end
28
- q << " IGNORE #{options[:ignore]} LINES" if options[:ignore]
29
- q << " (#{options[:columns].join(',')})" if options[:columns]
30
- execute(q)
31
- end
32
- end
33
- end
34
- end