factorylabs-activewarehouse-etl 0.9.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +198 -0
- data/LICENSE +7 -0
- data/README +85 -0
- data/Rakefile +153 -0
- data/TODO +28 -0
- data/bin/etl +28 -0
- data/bin/etl.cmd +8 -0
- data/examples/database.example.yml +16 -0
- data/lib/etl.rb +78 -0
- data/lib/etl/batch.rb +2 -0
- data/lib/etl/batch/batch.rb +111 -0
- data/lib/etl/batch/directives.rb +55 -0
- data/lib/etl/builder.rb +2 -0
- data/lib/etl/builder/date_dimension_builder.rb +96 -0
- data/lib/etl/builder/time_dimension_builder.rb +31 -0
- data/lib/etl/commands/etl.rb +89 -0
- data/lib/etl/control.rb +3 -0
- data/lib/etl/control/control.rb +405 -0
- data/lib/etl/control/destination.rb +420 -0
- data/lib/etl/control/destination/database_destination.rb +95 -0
- data/lib/etl/control/destination/file_destination.rb +124 -0
- data/lib/etl/control/source.rb +109 -0
- data/lib/etl/control/source/database_source.rb +220 -0
- data/lib/etl/control/source/enumerable_source.rb +11 -0
- data/lib/etl/control/source/file_source.rb +90 -0
- data/lib/etl/control/source/model_source.rb +39 -0
- data/lib/etl/core_ext.rb +1 -0
- data/lib/etl/core_ext/time.rb +5 -0
- data/lib/etl/core_ext/time/calculations.rb +42 -0
- data/lib/etl/engine.rb +556 -0
- data/lib/etl/execution.rb +20 -0
- data/lib/etl/execution/base.rb +9 -0
- data/lib/etl/execution/batch.rb +8 -0
- data/lib/etl/execution/job.rb +8 -0
- data/lib/etl/execution/migration.rb +85 -0
- data/lib/etl/execution/record.rb +18 -0
- data/lib/etl/generator.rb +2 -0
- data/lib/etl/generator/generator.rb +20 -0
- data/lib/etl/generator/surrogate_key_generator.rb +39 -0
- data/lib/etl/http_tools.rb +139 -0
- data/lib/etl/parser.rb +11 -0
- data/lib/etl/parser/apache_combined_log_parser.rb +49 -0
- data/lib/etl/parser/delimited_parser.rb +74 -0
- data/lib/etl/parser/fixed_width_parser.rb +65 -0
- data/lib/etl/parser/parser.rb +41 -0
- data/lib/etl/parser/sax_parser.rb +218 -0
- data/lib/etl/parser/xml_parser.rb +65 -0
- data/lib/etl/processor.rb +11 -0
- data/lib/etl/processor/block_processor.rb +14 -0
- data/lib/etl/processor/bulk_import_processor.rb +81 -0
- data/lib/etl/processor/check_exist_processor.rb +80 -0
- data/lib/etl/processor/check_unique_processor.rb +35 -0
- data/lib/etl/processor/copy_field_processor.rb +26 -0
- data/lib/etl/processor/encode_processor.rb +55 -0
- data/lib/etl/processor/hierarchy_exploder_processor.rb +55 -0
- data/lib/etl/processor/print_row_processor.rb +12 -0
- data/lib/etl/processor/processor.rb +25 -0
- data/lib/etl/processor/rename_processor.rb +24 -0
- data/lib/etl/processor/require_non_blank_processor.rb +26 -0
- data/lib/etl/processor/row_processor.rb +17 -0
- data/lib/etl/processor/sequence_processor.rb +23 -0
- data/lib/etl/processor/surrogate_key_processor.rb +53 -0
- data/lib/etl/processor/truncate_processor.rb +35 -0
- data/lib/etl/row.rb +20 -0
- data/lib/etl/screen.rb +14 -0
- data/lib/etl/screen/row_count_screen.rb +20 -0
- data/lib/etl/transform.rb +2 -0
- data/lib/etl/transform/block_transform.rb +13 -0
- data/lib/etl/transform/date_to_string_transform.rb +20 -0
- data/lib/etl/transform/decode_transform.rb +51 -0
- data/lib/etl/transform/default_transform.rb +20 -0
- data/lib/etl/transform/foreign_key_lookup_transform.rb +151 -0
- data/lib/etl/transform/hierarchy_lookup_transform.rb +49 -0
- data/lib/etl/transform/ordinalize_transform.rb +12 -0
- data/lib/etl/transform/sha1_transform.rb +13 -0
- data/lib/etl/transform/string_to_date_transform.rb +16 -0
- data/lib/etl/transform/string_to_datetime_transform.rb +14 -0
- data/lib/etl/transform/string_to_time_transform.rb +11 -0
- data/lib/etl/transform/transform.rb +61 -0
- data/lib/etl/transform/trim_transform.rb +26 -0
- data/lib/etl/transform/type_transform.rb +35 -0
- data/lib/etl/util.rb +59 -0
- data/lib/etl/version.rb +9 -0
- metadata +195 -0
@@ -0,0 +1,35 @@
|
|
1
|
+
module ETL #:nodoc:
|
2
|
+
module Processor #:nodoc:
|
3
|
+
# A processor which will truncate a table. Use as a pre-processor for cleaning out a table
|
4
|
+
# prior to loading
|
5
|
+
class TruncateProcessor < ETL::Processor::Processor
|
6
|
+
# Defines the table to truncate
|
7
|
+
attr_reader :table
|
8
|
+
|
9
|
+
# Defines the database connection to use
|
10
|
+
attr_reader :target
|
11
|
+
|
12
|
+
# Initialize the processor
|
13
|
+
#
|
14
|
+
# Options:
|
15
|
+
# * <tt>:target</tt>: The target connection
|
16
|
+
# * <tt>:table</tt>: The table name
|
17
|
+
def initialize(control, configuration)
|
18
|
+
super
|
19
|
+
#@file = File.join(File.dirname(control.file), configuration[:file])
|
20
|
+
@target = configuration[:target] || {}
|
21
|
+
@table = configuration[:table]
|
22
|
+
end
|
23
|
+
|
24
|
+
def process
|
25
|
+
conn = ETL::Engine.connection(target)
|
26
|
+
conn.truncate(table_name)
|
27
|
+
end
|
28
|
+
|
29
|
+
private
|
30
|
+
def table_name
|
31
|
+
ETL::Engine.table(table, ETL::Engine.connection(target))
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
data/lib/etl/row.rb
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
# This source file contains the ETL::Row class.
|
2
|
+
|
3
|
+
module ETL #:nodoc:
|
4
|
+
# This class represents a single row currently passing through the ETL pipeline
|
5
|
+
class Row < Hash
|
6
|
+
# Accessor for the originating source
|
7
|
+
attr_accessor :source
|
8
|
+
|
9
|
+
# All change types
|
10
|
+
CHANGE_TYPES = [:insert, :update, :delete]
|
11
|
+
|
12
|
+
# Accessor for the row's change type
|
13
|
+
attr_accessor :change_type
|
14
|
+
|
15
|
+
# Get the change type, defaults to :insert
|
16
|
+
def change_type
|
17
|
+
@change_type ||= :insert
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
data/lib/etl/screen.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
# This source file contains the ETL::Screen module and requires all of the
|
2
|
+
# screens
|
3
|
+
|
4
|
+
module ETL #:nodoc:
|
5
|
+
# The ETL::Screen module contains pre-built screens useful for checking the
|
6
|
+
# ETL state during execution. Screens may be fatal, which will result in
|
7
|
+
# termination of the ETL process, errors, which will result in the
|
8
|
+
# termination of just the current ETL control file, or warnings, which will
|
9
|
+
# result in a warning message.
|
10
|
+
module Screen
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
Dir[File.dirname(__FILE__) + "/screen/*.rb"].each { |file| require(file) }
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module ETL
|
2
|
+
module Screen
|
3
|
+
# This screen validates the number of rows which will be bulk loaded
|
4
|
+
# against the results from some sort of a row count query. If there
|
5
|
+
# is a difference then the screen will not pass
|
6
|
+
class RowCountScreen
|
7
|
+
attr_accessor :control, :configuration
|
8
|
+
def initialize(control, configuration={})
|
9
|
+
@control = control
|
10
|
+
@configuration = configuration
|
11
|
+
execute
|
12
|
+
end
|
13
|
+
def execute
|
14
|
+
unless Engine.rows_written == configuration[:rows]
|
15
|
+
raise "Rows written (#{Engine.rows_written}) does not match expected rows (#{configuration[:rows]})"
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
module ETL
|
2
|
+
module Transform
|
3
|
+
class BlockTransform < ETL::Transform::Transform
|
4
|
+
def initialize(control, name, configuration)
|
5
|
+
super
|
6
|
+
@block = configuration[:block]
|
7
|
+
end
|
8
|
+
def transform(name, value, row)
|
9
|
+
@block.call(name, value, row)
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module ETL #:nodoc:
|
2
|
+
module Transform #:nodoc:
|
3
|
+
# Transform a Date or Time to a formatted string instance
|
4
|
+
class DateToStringTransform < ETL::Transform::Transform
|
5
|
+
# Initialize the transformer.
|
6
|
+
#
|
7
|
+
# Configuration options:
|
8
|
+
# * <tt>:format</tt>: A format passed to strftime. Defaults to %Y-%m-%d
|
9
|
+
def initialize(control, name, configuration={})
|
10
|
+
super
|
11
|
+
@format = configuration[:format] || "%Y-%m-%d"
|
12
|
+
end
|
13
|
+
# Transform the value using strftime
|
14
|
+
def transform(name, value, row)
|
15
|
+
return value unless value.respond_to?(:strftime)
|
16
|
+
value.strftime(@format)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
module ETL #:nodoc:
|
2
|
+
module Transform #:nodoc:
|
3
|
+
# Transform which decodes coded values
|
4
|
+
class DecodeTransform < ETL::Transform::Transform
|
5
|
+
attr_accessor :decode_table_path
|
6
|
+
|
7
|
+
attr_accessor :decode_table_delimiter
|
8
|
+
|
9
|
+
attr_accessor :default_value
|
10
|
+
|
11
|
+
# Initialize the transformer
|
12
|
+
#
|
13
|
+
# Configuration options:
|
14
|
+
# * <tt>:decode_table_path</tt>: The path to the decode table (defaults to 'decode.txt')
|
15
|
+
# * <tt>:decode_table_delimiter</tt>: The decode table delimiter (defaults to ':')
|
16
|
+
# * <tt>:default_value</tt>: The default value to use (defaults to 'No Value')
|
17
|
+
def initialize(control, name, configuration={})
|
18
|
+
super
|
19
|
+
|
20
|
+
if configuration[:decode_table_path]
|
21
|
+
configuration[:decode_table_path] = File.join(File.dirname(control.file), configuration[:decode_table_path])
|
22
|
+
end
|
23
|
+
|
24
|
+
@decode_table_path = (configuration[:decode_table_path] || 'decode.txt')
|
25
|
+
@decode_table_delimiter = (configuration[:decode_table_delimiter] || ':')
|
26
|
+
@default_value = (configuration[:default_value] || 'No Value')
|
27
|
+
end
|
28
|
+
|
29
|
+
# Transform the value
|
30
|
+
def transform(name, value, row)
|
31
|
+
decode_table[value] || default_value
|
32
|
+
end
|
33
|
+
|
34
|
+
# Get the decode table
|
35
|
+
def decode_table
|
36
|
+
unless @decode_table
|
37
|
+
@decode_table = {}
|
38
|
+
open(decode_table_path).each do |line|
|
39
|
+
code, value = line.strip.split(decode_table_delimiter)
|
40
|
+
if code && code.length > 0
|
41
|
+
@decode_table[code] = value
|
42
|
+
else
|
43
|
+
@default_value = value
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
@decode_table
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module ETL #:nodoc:
|
2
|
+
module Transform #:nodoc:
|
3
|
+
# Transform which will replace nil or empty values with a specified value.
|
4
|
+
class DefaultTransform < Transform
|
5
|
+
attr_accessor :default_value
|
6
|
+
# Initialize the transform
|
7
|
+
#
|
8
|
+
# Configuration options:
|
9
|
+
# * <tt>:default_value</tt>: The default value to use if the incoming value is blank
|
10
|
+
def initialize(control, name, configuration)
|
11
|
+
super
|
12
|
+
@default_value = configuration[:default_value]
|
13
|
+
end
|
14
|
+
# Transform the value
|
15
|
+
def transform(name, value, row)
|
16
|
+
value.blank? ? default_value : value
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,151 @@
|
|
1
|
+
module ETL #:nodoc:
|
2
|
+
module Transform #:nodoc:
|
3
|
+
# Transform which looks up the value and replaces it with a foriegn key reference
|
4
|
+
class ForeignKeyLookupTransform < ETL::Transform::Transform
|
5
|
+
# The resolver to use if the foreign key is not found in the collection
|
6
|
+
attr_accessor :resolver
|
7
|
+
|
8
|
+
# The default foreign key to use if none is found.
|
9
|
+
attr_accessor :default
|
10
|
+
|
11
|
+
# Initialize the foreign key lookup transform.
|
12
|
+
#
|
13
|
+
# Configuration options:
|
14
|
+
# *<tt>:collection</tt>: A Hash of natural keys mapped to surrogate keys. If this is not specified then
|
15
|
+
# an empty Hash will be used. This Hash will be used to cache values that have been resolved already
|
16
|
+
# for future use.
|
17
|
+
# *<tt>:resolver</tt>: Object or Class which implements the method resolve(value)
|
18
|
+
# *<tt>:default</tt>: A default foreign key to use if no foreign key is found
|
19
|
+
def initialize(control, name, configuration={})
|
20
|
+
super
|
21
|
+
|
22
|
+
@collection = (configuration[:collection] || {})
|
23
|
+
@resolver = configuration[:resolver]
|
24
|
+
@resolver = @resolver.new if @resolver.is_a?(Class)
|
25
|
+
@default = configuration[:default]
|
26
|
+
if configuration[:cache] ||= true
|
27
|
+
if resolver.respond_to?(:load_cache)
|
28
|
+
resolver.load_cache
|
29
|
+
else
|
30
|
+
ETL::Engine.logger.info "#{resolver.class.name} does not support caching"
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
# Transform the value by resolving it to a foriegn key
|
36
|
+
def transform(name, value, row)
|
37
|
+
fk = @collection[value]
|
38
|
+
unless fk
|
39
|
+
raise ResolverError, "Foreign key for #{value} not found and no resolver specified" unless resolver
|
40
|
+
raise ResolverError, "Resolver does not appear to respond to resolve method" unless resolver.respond_to?(:resolve)
|
41
|
+
fk = resolver.resolve(value)
|
42
|
+
fk ||= @default
|
43
|
+
raise ResolverError, "Unable to resolve #{value} to foreign key for #{name} in row #{ETL::Engine.rows_read}. You may want to specify a :default value." unless fk
|
44
|
+
@collection[value] = fk
|
45
|
+
end
|
46
|
+
fk
|
47
|
+
end
|
48
|
+
end
|
49
|
+
# Alias class name for the ForeignKeyLookupTransform.
|
50
|
+
class FkLookupTransform < ForeignKeyLookupTransform; end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
# Resolver which resolves using ActiveRecord.
|
55
|
+
class ActiveRecordResolver
|
56
|
+
# The ActiveRecord class to use
|
57
|
+
attr_accessor :ar_class
|
58
|
+
|
59
|
+
# The find method to use (as a symbol)
|
60
|
+
attr_accessor :find_method
|
61
|
+
|
62
|
+
# Initialize the resolver. The ar_class argument should extend from
|
63
|
+
# ActiveRecord::Base. The find_method argument must be a symbol for the
|
64
|
+
# finder method used. For example:
|
65
|
+
#
|
66
|
+
# ActiveRecordResolver.new(Person, :find_by_name)
|
67
|
+
#
|
68
|
+
# Note that the find method defined must only take a single argument.
|
69
|
+
def initialize(ar_class, find_method)
|
70
|
+
@ar_class = ar_class
|
71
|
+
@find_method = find_method
|
72
|
+
end
|
73
|
+
|
74
|
+
# Resolve the value
|
75
|
+
def resolve(value)
|
76
|
+
rec = ar_class.__send__(find_method, value)
|
77
|
+
rec.nil? ? nil : rec.id
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
class SQLResolver
|
82
|
+
# Initialize the SQL resolver. Use the given table and field name to search
|
83
|
+
# for the appropriate foreign key. The field should be the name of a natural
|
84
|
+
# key that is used to locate the surrogate key for the record.
|
85
|
+
#
|
86
|
+
# The connection argument is optional. If specified it can be either a symbol
|
87
|
+
# referencing a connection defined in the ETL database.yml file or an actual
|
88
|
+
# ActiveRecord connection instance. If the connection is not specified then
|
89
|
+
# the ActiveRecord::Base.connection will be used.
|
90
|
+
def initialize(table, field, connection=nil)
|
91
|
+
@table = table
|
92
|
+
@field = field
|
93
|
+
@connection = (connection.respond_to?(:quote) ? connection : ETL::Engine.connection(connection)) if connection
|
94
|
+
@connection ||= ActiveRecord::Base.connection
|
95
|
+
end
|
96
|
+
def resolve(value)
|
97
|
+
if @use_cache
|
98
|
+
cache[value]
|
99
|
+
else
|
100
|
+
q = "SELECT id FROM #{table_name} WHERE #{@field} = #{@connection.quote(value)}"
|
101
|
+
ETL::Engine.logger.debug("Executing query: #{q}")
|
102
|
+
@connection.select_value(q)
|
103
|
+
end
|
104
|
+
end
|
105
|
+
def table_name
|
106
|
+
ETL::Engine.table(@table, @connection)
|
107
|
+
end
|
108
|
+
def cache
|
109
|
+
@cache ||= {}
|
110
|
+
end
|
111
|
+
def load_cache
|
112
|
+
@use_cache = true
|
113
|
+
q = "SELECT id, #{@field} FROM #{table_name}"
|
114
|
+
@connection.select_all(q).each do |record|
|
115
|
+
cache[record[@field]] = record['id']
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
class FlatFileResolver
|
121
|
+
# Initialize the flat file resolver. Expects to open a comma-delimited file.
|
122
|
+
# Returns the column with the given result_field_index.
|
123
|
+
#
|
124
|
+
# The matches argument is a Hash with the key as the column index to search and
|
125
|
+
# the value of the Hash as a String to match exactly. It will only match the first
|
126
|
+
# result.
|
127
|
+
def initialize(file, match_index, result_field_index)
|
128
|
+
@file = file
|
129
|
+
@match_index = match_index
|
130
|
+
@result_field_index = result_field_index
|
131
|
+
end
|
132
|
+
|
133
|
+
# Get the rows from the file specified in the initializer.
|
134
|
+
def rows
|
135
|
+
@rows ||= FasterCSV.read(@file)
|
136
|
+
end
|
137
|
+
protected :rows
|
138
|
+
|
139
|
+
# Match the row field from the column indicated by the match_index with the given
|
140
|
+
# value and return the field value from the column identified by the result_field_index.
|
141
|
+
def resolve(value)
|
142
|
+
rows.each do |row|
|
143
|
+
#puts "checking #{row.inspect} for #{value}"
|
144
|
+
if row[@match_index] == value
|
145
|
+
#puts "match found!, returning #{row[@result_field_index]}"
|
146
|
+
return row[@result_field_index]
|
147
|
+
end
|
148
|
+
end
|
149
|
+
nil
|
150
|
+
end
|
151
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
module ETL #:nodoc:
|
2
|
+
module Transform #:nodoc:
|
3
|
+
# Transform which walks up the hierarchy tree to find a value of the current level's value
|
4
|
+
# is nil.
|
5
|
+
#
|
6
|
+
# TODO: Let the resolver be implemented in a class so different resolution methods are
|
7
|
+
# possible.
|
8
|
+
class HierarchyLookupTransform < ETL::Transform::Transform
|
9
|
+
# The name of the field to use for the parent ID
|
10
|
+
attr_accessor :parent_id_field
|
11
|
+
|
12
|
+
# The target connection name
|
13
|
+
attr_accessor :target
|
14
|
+
|
15
|
+
# Initialize the transform
|
16
|
+
#
|
17
|
+
# Configuration options:
|
18
|
+
# * <tt>:target</tt>: The target connection name (required)
|
19
|
+
# * <tt>:parent_id_field</tt>: The name of the field to use for the parent ID (defaults to :parent_id)
|
20
|
+
def initialize(control, name, configuration={})
|
21
|
+
super
|
22
|
+
@parent_id_field = configuration[:parent_id_field] || :parent_id
|
23
|
+
@target = configuration[:target]
|
24
|
+
end
|
25
|
+
|
26
|
+
# Transform the value.
|
27
|
+
def transform(name, value, row)
|
28
|
+
if parent_id = row[parent_id_field]
|
29
|
+
# TODO: should use more than just the first source out of the control
|
30
|
+
parent_id, value = lookup(name,
|
31
|
+
control.sources.first.configuration[:table], parent_id, parent_id_field)
|
32
|
+
until value || parent_id.nil?
|
33
|
+
# TODO: should use more than just the first source out of the control
|
34
|
+
parent_id, value = lookup(name,
|
35
|
+
control.sources.first.configuration[:table], parent_id, parent_id_field)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
value
|
39
|
+
end
|
40
|
+
|
41
|
+
# Lookup the parent value.
|
42
|
+
def lookup(field, table, parent_id, parent_id_field)
|
43
|
+
q = "SELECT #{parent_id_field}, #{field} FROM #{table} WHERE id = #{parent_id}"
|
44
|
+
row = ETL::Engine.connection(target).select_one(q)
|
45
|
+
return row[parent_id_field.to_s], row[field.to_s]
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
module ETL #:nodoc:
|
2
|
+
module Transform #:nodoc:
|
3
|
+
# Transform a number to an ordinalized version using the ActiveSupport ordinalize
|
4
|
+
# core extension
|
5
|
+
class OrdinalizeTransform < ETL::Transform::Transform
|
6
|
+
# Transform the value from a number to an ordinalized number
|
7
|
+
def transform(name, value, row)
|
8
|
+
value.ordinalize
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
require 'digest/sha1'
|
2
|
+
|
3
|
+
module ETL #:nodoc:
|
4
|
+
module Transform #:nodoc:
|
5
|
+
# Transform which hashes the original value with a SHA-1 hash algorithm
|
6
|
+
class Sha1Transform < ETL::Transform::Transform
|
7
|
+
# Transform the value with a SHA1 digest algorithm.
|
8
|
+
def transform(name, value, row)
|
9
|
+
Digest::SHA1.hexdigest(value)
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
module ETL #:nodoc:
|
2
|
+
module Transform #:nodoc:
|
3
|
+
# Transform a String representation of a date to a Date instance
|
4
|
+
class StringToDateTransform < ETL::Transform::Transform
|
5
|
+
# Transform the value using Date.parse
|
6
|
+
def transform(name, value, row)
|
7
|
+
return value if value.nil?
|
8
|
+
begin
|
9
|
+
Date.parse(value)
|
10
|
+
rescue => e
|
11
|
+
return value
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|