activewarehouse-etl 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +2 -0
- data/README +27 -0
- data/Rakefile +117 -0
- data/bin/etl +26 -0
- data/lib/etl.rb +58 -0
- data/lib/etl/commands/etl.rb +45 -0
- data/lib/etl/control.rb +3 -0
- data/lib/etl/control/control.rb +134 -0
- data/lib/etl/control/destination.rb +62 -0
- data/lib/etl/control/destination/database_destination.rb +47 -0
- data/lib/etl/control/destination/file_destination.rb +63 -0
- data/lib/etl/control/source.rb +27 -0
- data/lib/etl/control/source/database_source.rb +30 -0
- data/lib/etl/control/source/file_source.rb +19 -0
- data/lib/etl/engine.rb +61 -0
- data/lib/etl/parser.rb +2 -0
- data/lib/etl/parser/delimited_parser.rb +56 -0
- data/lib/etl/parser/fixed_width_parser.rb +59 -0
- data/lib/etl/parser/parser.rb +43 -0
- data/lib/etl/processor.rb +2 -0
- data/lib/etl/processor/bulk_import_processor.rb +39 -0
- data/lib/etl/processor/processor.rb +18 -0
- data/lib/etl/transform.rb +2 -0
- data/lib/etl/transform/decode_transform.rb +37 -0
- data/lib/etl/transform/sha1_transform.rb +15 -0
- data/lib/etl/transform/transform.rb +29 -0
- data/lib/etl/version.rb +9 -0
- metadata +116 -0
data/CHANGELOG
ADDED
data/README
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
Ruby ETL parser.
|
2
|
+
|
3
|
+
== Features
|
4
|
+
|
5
|
+
Current supported features:
|
6
|
+
|
7
|
+
* ETL Domain Specific Language (DSL) - Control files are specified in a Ruby-based DSL
|
8
|
+
* Multiple source types - fixed-width and delimited text files currently supported
|
9
|
+
* Multiple destination types - file and database destinations
|
10
|
+
* Support for extracting from multiple sources
|
11
|
+
* Support for loading to multiple destinations
|
12
|
+
* Extensible transformations - comes with built in SHA1 one-way hash example
|
13
|
+
* Pre/post processing - export to files and then post process with the bulk import processor for large amounts of data
|
14
|
+
* Virtual fields - Add a field to the destination data which doesn't exist in the source data
|
15
|
+
|
16
|
+
|
17
|
+
== Requirements
|
18
|
+
|
19
|
+
* ActiveSupport Gem
|
20
|
+
* ActiveRecord Gem
|
21
|
+
* FasterCSV Gem
|
22
|
+
|
23
|
+
== Examples
|
24
|
+
Examples can be found in the test directory.
|
25
|
+
|
26
|
+
== Feedback
|
27
|
+
This is a work in progress. Comments should be made on the activewarehouse-discuss mailing list at the moment.
|
data/Rakefile
ADDED
@@ -0,0 +1,117 @@
|
|
1
|
+
require 'rake'
|
2
|
+
require 'rake/testtask'
|
3
|
+
require 'rake/rdoctask'
|
4
|
+
require 'rake/packagetask'
|
5
|
+
require 'rake/gempackagetask'
|
6
|
+
require 'rake/contrib/rubyforgepublisher'
|
7
|
+
|
8
|
+
require File.join(File.dirname(__FILE__), 'lib/etl', 'version')
|
9
|
+
|
10
|
+
PKG_BUILD = ENV['PKG_BUILD'] ? '.' + ENV['PKG_BUILD'] : ''
|
11
|
+
PKG_NAME = 'activewarehouse-etl'
|
12
|
+
PKG_VERSION = ETL::VERSION::STRING + PKG_BUILD
|
13
|
+
PKG_FILE_NAME = "#{PKG_NAME}-#{PKG_VERSION}"
|
14
|
+
PKG_DESTINATION = ENV["PKG_DESTINATION"] || "../#{PKG_NAME}"
|
15
|
+
|
16
|
+
RELEASE_NAME = "REL #{PKG_VERSION}"
|
17
|
+
|
18
|
+
RUBY_FORGE_PROJECT = "activewarehouse"
|
19
|
+
RUBY_FORGE_USER = "aeden"
|
20
|
+
|
21
|
+
desc 'Default: run unit tests.'
|
22
|
+
task :default => :test
|
23
|
+
|
24
|
+
desc 'Test the ETL application.'
|
25
|
+
Rake::TestTask.new(:test) do |t|
|
26
|
+
t.libs << 'lib'
|
27
|
+
t.pattern = 'test/**/*_test.rb'
|
28
|
+
t.verbose = true
|
29
|
+
# TODO: reset the database
|
30
|
+
end
|
31
|
+
|
32
|
+
desc 'Generate documentation for the ETL application.'
|
33
|
+
Rake::RDocTask.new(:rdoc) do |rdoc|
|
34
|
+
rdoc.rdoc_dir = 'rdoc'
|
35
|
+
rdoc.title = 'ActiveWarehouse ETL'
|
36
|
+
rdoc.options << '--line-numbers' << '--inline-source'
|
37
|
+
rdoc.rdoc_files.include('README')
|
38
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
39
|
+
end
|
40
|
+
|
41
|
+
PKG_FILES = FileList[
|
42
|
+
'CHANGELOG',
|
43
|
+
'README',
|
44
|
+
'Rakefile',
|
45
|
+
'bin/**/*',
|
46
|
+
'doc/**/*',
|
47
|
+
'lib/**/*',
|
48
|
+
] - [ 'test' ]
|
49
|
+
|
50
|
+
spec = Gem::Specification.new do |s|
|
51
|
+
s.name = 'activewarehouse-etl'
|
52
|
+
s.version = PKG_VERSION
|
53
|
+
s.summary = "Pure Ruby ETL package."
|
54
|
+
s.description = <<-EOF
|
55
|
+
ActiveWarehouse ETL is a pure Ruby Extract-Transform-Load application for loading data into a database.
|
56
|
+
EOF
|
57
|
+
|
58
|
+
s.add_dependency('rake', '>= 0.7.1')
|
59
|
+
s.add_dependency('activesupport', '>= 1.3.1.5618')
|
60
|
+
s.add_dependency('activerecord', '>= 1.14.4.5618')
|
61
|
+
s.add_dependency('fastercsv', '>= 1.0.0')
|
62
|
+
|
63
|
+
s.rdoc_options << '--exclude' << '.'
|
64
|
+
s.has_rdoc = false
|
65
|
+
|
66
|
+
s.files = PKG_FILES.to_a.delete_if {|f| f.include?('.svn')}
|
67
|
+
s.require_path = 'lib'
|
68
|
+
|
69
|
+
s.bindir = "bin" # Use these for applications.
|
70
|
+
s.executables = ['etl']
|
71
|
+
s.default_executable = "etl"
|
72
|
+
|
73
|
+
s.author = "Anthony Eden"
|
74
|
+
s.email = "anthonyeden@gmail.com"
|
75
|
+
s.homepage = "http://activewarehouse.rubyforge.org/etl"
|
76
|
+
s.rubyforge_project = "activewarehouse"
|
77
|
+
end
|
78
|
+
|
79
|
+
Rake::GemPackageTask.new(spec) do |pkg|
|
80
|
+
pkg.gem_spec = spec
|
81
|
+
end
|
82
|
+
|
83
|
+
desc "Generate code statistics"
|
84
|
+
task :lines do
|
85
|
+
lines, codelines, total_lines, total_codelines = 0, 0, 0, 0
|
86
|
+
|
87
|
+
for file_name in FileList["lib/**/*.rb"]
|
88
|
+
next if file_name =~ /vendor/
|
89
|
+
f = File.open(file_name)
|
90
|
+
|
91
|
+
while line = f.gets
|
92
|
+
lines += 1
|
93
|
+
next if line =~ /^\s*$/
|
94
|
+
next if line =~ /^\s*#/
|
95
|
+
codelines += 1
|
96
|
+
end
|
97
|
+
puts "L: #{sprintf("%4d", lines)}, LOC #{sprintf("%4d", codelines)} | #{file_name}"
|
98
|
+
|
99
|
+
total_lines += lines
|
100
|
+
total_codelines += codelines
|
101
|
+
|
102
|
+
lines, codelines = 0, 0
|
103
|
+
end
|
104
|
+
|
105
|
+
puts "Total: Lines #{total_lines}, LOC #{total_codelines}"
|
106
|
+
end
|
107
|
+
|
108
|
+
desc "Publish the release files to RubyForge."
|
109
|
+
task :release => [ :package ] do
|
110
|
+
`rubyforge login`
|
111
|
+
|
112
|
+
for ext in %w( gem tgz zip )
|
113
|
+
release_command = "rubyforge add_release activewarehouse #{PKG_NAME} 'REL #{PKG_VERSION}' pkg/#{PKG_NAME}-#{PKG_VERSION}.#{ext}"
|
114
|
+
puts release_command
|
115
|
+
system(release_command)
|
116
|
+
end
|
117
|
+
end
|
data/bin/etl
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
#--
|
4
|
+
# Copyright (c) 2006 Anthony Eden
|
5
|
+
#
|
6
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
7
|
+
# a copy of this software and associated documentation files (the
|
8
|
+
# "Software"), to deal in the Software without restriction, including
|
9
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
10
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
11
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
12
|
+
# the following conditions:
|
13
|
+
#
|
14
|
+
# The above copyright notice and this permission notice shall be
|
15
|
+
# included in all copies or substantial portions of the Software.
|
16
|
+
#
|
17
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
18
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
19
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
20
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
21
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
22
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
23
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
24
|
+
#++
|
25
|
+
|
26
|
+
require File.dirname(__FILE__) + "/../lib/etl/commands/etl"
|
data/lib/etl.rb
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2006 Anthony Eden
|
3
|
+
#
|
4
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
5
|
+
# a copy of this software and associated documentation files (the
|
6
|
+
# "Software"), to deal in the Software without restriction, including
|
7
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
8
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
9
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
10
|
+
# the following conditions:
|
11
|
+
#
|
12
|
+
# The above copyright notice and this permission notice shall be
|
13
|
+
# included in all copies or substantial portions of the Software.
|
14
|
+
#
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
17
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
18
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
19
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
20
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
21
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
|
+
#++
|
23
|
+
|
24
|
+
unless defined?(Logger)
|
25
|
+
require 'logger'
|
26
|
+
end
|
27
|
+
|
28
|
+
require 'rubygems'
|
29
|
+
unless defined?(ActiveSupport)
|
30
|
+
require_gem 'activesupport'
|
31
|
+
require 'active_support'
|
32
|
+
end
|
33
|
+
|
34
|
+
unless defined?(ActiveRecord)
|
35
|
+
require_gem 'activerecord'
|
36
|
+
require 'active_record'
|
37
|
+
end
|
38
|
+
|
39
|
+
require_gem 'fastercsv'
|
40
|
+
require 'faster_csv'
|
41
|
+
|
42
|
+
$:.unshift(File.dirname(__FILE__))
|
43
|
+
|
44
|
+
require 'etl/version'
|
45
|
+
require 'etl/engine'
|
46
|
+
require 'etl/control'
|
47
|
+
require 'etl/parser'
|
48
|
+
require 'etl/transform'
|
49
|
+
require 'etl/processor'
|
50
|
+
|
51
|
+
module ETL #:nodoc:
|
52
|
+
class ETLError < StandardError #:nodoc:
|
53
|
+
end
|
54
|
+
class ControlError < ETLError #:nodoc:
|
55
|
+
end
|
56
|
+
class DefinitionError < ControlError #:nodoc:
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2006 Anthony Eden
|
3
|
+
#
|
4
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
5
|
+
# a copy of this software and associated documentation files (the
|
6
|
+
# "Software"), to deal in the Software without restriction, including
|
7
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
8
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
9
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
10
|
+
# the following conditions:
|
11
|
+
#
|
12
|
+
# The above copyright notice and this permission notice shall be
|
13
|
+
# included in all copies or substantial portions of the Software.
|
14
|
+
#
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
17
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
18
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
19
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
20
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
21
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
|
+
#++
|
23
|
+
|
24
|
+
require 'benchmark'
|
25
|
+
require File.dirname(__FILE__) + '/../../etl'
|
26
|
+
|
27
|
+
# Print a usage statement
|
28
|
+
def usage #:nodoc:
|
29
|
+
puts "Usage: etl ctl_file [ctl_file2 ctl_file3 ...]"
|
30
|
+
end
|
31
|
+
|
32
|
+
if ARGV.length < 1
|
33
|
+
usage
|
34
|
+
else
|
35
|
+
puts "Starting ETL process"
|
36
|
+
|
37
|
+
t = Benchmark.realtime do
|
38
|
+
ARGV.each do |f|
|
39
|
+
puts "Processing #{f}"
|
40
|
+
ETL::Engine.process(f)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
puts "ETL process complete in #{sprintf('%.3f', t)} seconds"
|
45
|
+
end
|
data/lib/etl/control.rb
ADDED
@@ -0,0 +1,134 @@
|
|
1
|
+
module ETL #:nodoc:
|
2
|
+
module Control #:nodoc:
|
3
|
+
# Object representation of a control file
|
4
|
+
class Control
|
5
|
+
attr_reader :file
|
6
|
+
|
7
|
+
class << self
|
8
|
+
# Parse a control file and return a Control instance
|
9
|
+
def parse(control_file)
|
10
|
+
control_file = control_file.path if control_file.instance_of?(File)
|
11
|
+
# logger.debug "Parsing control file #{control_file.path}"
|
12
|
+
control = ETL::Control::Control.new(control_file)
|
13
|
+
# TODO: better handling of parser errors. Return the line in the control file where the error occurs.
|
14
|
+
eval(IO.readlines(control_file).join("\n"), control.get_binding)
|
15
|
+
control.validate
|
16
|
+
control
|
17
|
+
end
|
18
|
+
|
19
|
+
def resolve(control)
|
20
|
+
case control
|
21
|
+
when String
|
22
|
+
ETL::Control::Control.parse(File.new(control))
|
23
|
+
when File
|
24
|
+
ETL::Control::Control.parse(control)
|
25
|
+
when ETL::Control::Control
|
26
|
+
control
|
27
|
+
else
|
28
|
+
raise ControlError, "Control must be a String, File or Control object"
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def initialize(file)
|
34
|
+
@file = file
|
35
|
+
end
|
36
|
+
|
37
|
+
# Define a source
|
38
|
+
def source(name, configuration={}, definition={})
|
39
|
+
source_types = [:file, :db]
|
40
|
+
source_types.each do |source_type|
|
41
|
+
if configuration[source_type]
|
42
|
+
source_class = ETL::Control::Source.class_for_name(source_type)
|
43
|
+
sources << source_class.new(self, configuration, definition)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
# Get the defined source
|
49
|
+
def sources
|
50
|
+
@sources ||= []
|
51
|
+
end
|
52
|
+
|
53
|
+
# Define a destination
|
54
|
+
def destination(name, configuration={}, mapping={})
|
55
|
+
destination_types.each do |dest_type|
|
56
|
+
if configuration[dest_type]
|
57
|
+
dest_class = ETL::Control::Destination.class_for_name(dest_type)
|
58
|
+
destinations << dest_class.new(self, configuration, mapping)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
# Get the defined destinations
|
64
|
+
def destinations
|
65
|
+
@destinations ||= []
|
66
|
+
end
|
67
|
+
|
68
|
+
def transform(name, transformer=nil, configuration={}, &block)
|
69
|
+
transforms[name] ||= []
|
70
|
+
if transformer
|
71
|
+
transform_class = ETL::Transform.const_get("#{transformer.to_s.classify}Transform")
|
72
|
+
transforms[name] << transform_class.new(self, configuration)
|
73
|
+
elsif block_given?
|
74
|
+
transforms[name] << block
|
75
|
+
else
|
76
|
+
raise ControlError, "Either a transformer or a block must be specified"
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def get_transform(name)
|
81
|
+
transforms[name] ||= []
|
82
|
+
end
|
83
|
+
|
84
|
+
def pre_process(name, configuration={})
|
85
|
+
processor_class = ETL::Processor.const_get("#{name.to_s.classify}Processor")
|
86
|
+
pre_processors << processor_class.new(self, configuration)
|
87
|
+
end
|
88
|
+
|
89
|
+
def pre_processors
|
90
|
+
@pre_processors ||= []
|
91
|
+
end
|
92
|
+
|
93
|
+
def post_process(name, configuration={})
|
94
|
+
processor_class = ETL::Processor.const_get("#{name.to_s.classify}Processor")
|
95
|
+
post_processors << processor_class.new(self, configuration)
|
96
|
+
end
|
97
|
+
|
98
|
+
def post_processors
|
99
|
+
@post_processors ||= []
|
100
|
+
end
|
101
|
+
|
102
|
+
def get_binding
|
103
|
+
binding
|
104
|
+
end
|
105
|
+
|
106
|
+
# Get a map of all transforms for this control
|
107
|
+
def transforms
|
108
|
+
@transforms ||= {}
|
109
|
+
end
|
110
|
+
|
111
|
+
# Validate the control file
|
112
|
+
def validate
|
113
|
+
unless sources.length > 0
|
114
|
+
raise ControlError, "Configuration must include one of the following for the source: #{source_types.join(',')}"
|
115
|
+
end
|
116
|
+
unless destinations.length > 0
|
117
|
+
raise ControlError, "Configuration must include one of the following for the destination: #{destination_types.join(',')}"
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
protected
|
122
|
+
# Get an array of supported source types
|
123
|
+
def source_types
|
124
|
+
[:file, :database]
|
125
|
+
end
|
126
|
+
|
127
|
+
# Get an array of supported destination types
|
128
|
+
def destination_types
|
129
|
+
[:file, :database]
|
130
|
+
end
|
131
|
+
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
module ETL #:nodoc:
|
2
|
+
module Control #:nodoc:
|
3
|
+
class Destination
|
4
|
+
attr_reader :control, :configuration, :mapping
|
5
|
+
attr_accessor :buffer_size, :current_row
|
6
|
+
|
7
|
+
class << self
|
8
|
+
def class_for_name(name)
|
9
|
+
ETL::Control.const_get("#{name.to_s.classify}Destination")
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
def initialize(control, configuration, mapping)
|
14
|
+
@control = control
|
15
|
+
@configuration = configuration
|
16
|
+
@mapping = mapping
|
17
|
+
@buffer_size = configuration[:buffer_size] ||= 1000
|
18
|
+
end
|
19
|
+
|
20
|
+
def current_row
|
21
|
+
@current_row ||= 1
|
22
|
+
end
|
23
|
+
|
24
|
+
# Abstract method
|
25
|
+
def write(row)
|
26
|
+
buffer << row
|
27
|
+
flush if buffer.length >= buffer_size
|
28
|
+
end
|
29
|
+
|
30
|
+
# Abstract method
|
31
|
+
def flush
|
32
|
+
raise NotImplementedError, "flush method must be implemented by subclasses"
|
33
|
+
end
|
34
|
+
|
35
|
+
# Abstract method
|
36
|
+
def close
|
37
|
+
raise NotImplementedError, "close method must be implemented by subclasses"
|
38
|
+
end
|
39
|
+
|
40
|
+
protected
|
41
|
+
def buffer
|
42
|
+
@buffer ||= []
|
43
|
+
end
|
44
|
+
|
45
|
+
# Get the order of elements from the source order
|
46
|
+
def order_from_source
|
47
|
+
order = []
|
48
|
+
control.sources.first.definition.each do |item|
|
49
|
+
case item
|
50
|
+
when Hash
|
51
|
+
order << item[:name]
|
52
|
+
else
|
53
|
+
order << item
|
54
|
+
end
|
55
|
+
end
|
56
|
+
order
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
Dir[File.dirname(__FILE__) + "/destination/*.rb"].each { |file| require(file) }
|
@@ -0,0 +1,47 @@
|
|
1
|
+
module ETL
|
2
|
+
module Control
|
3
|
+
class DatabaseDestination < Destination
|
4
|
+
attr_reader :order
|
5
|
+
def initialize(control, configuration, mapping)
|
6
|
+
super
|
7
|
+
@order = mapping[:order] || order_from_source
|
8
|
+
raise ControlError, "Order required in mapping" unless @order
|
9
|
+
connect
|
10
|
+
end
|
11
|
+
|
12
|
+
def flush
|
13
|
+
conn = ActiveRecord::Base.connection
|
14
|
+
conn.transaction do
|
15
|
+
buffer.each do |row|
|
16
|
+
names = []
|
17
|
+
values = []
|
18
|
+
order.each do |name|
|
19
|
+
names << name
|
20
|
+
values << "'#{row[name]}'"
|
21
|
+
end
|
22
|
+
q = "INSERT INTO #{configuration[:table]} (#{names.join(',')}) VALUES (#{values.join(',')})"
|
23
|
+
ETL::Engine.logger.debug("Query: #{q}")
|
24
|
+
conn.execute(q, "Insert row #{current_row}")
|
25
|
+
@current_row += 1
|
26
|
+
end
|
27
|
+
buffer.clear
|
28
|
+
end
|
29
|
+
end
|
30
|
+
def close
|
31
|
+
flush
|
32
|
+
ActiveRecord::Base.connection.disconnect!
|
33
|
+
end
|
34
|
+
|
35
|
+
private
|
36
|
+
def connect
|
37
|
+
ActiveRecord::Base.establish_connection(
|
38
|
+
:adapter => (configuration[:adapter] || :mysql),
|
39
|
+
:username => (configuration[:username] || 'root'),
|
40
|
+
:host => (configuration[:host] || 'localhost'),
|
41
|
+
:password => configuration[:password],
|
42
|
+
:database => configuration[:database]
|
43
|
+
)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
module ETL #:nodoc:
|
2
|
+
module Control #:nodoc:
|
3
|
+
# File as the final destination.
|
4
|
+
class FileDestination < Destination
|
5
|
+
attr_reader :file, :order
|
6
|
+
attr_accessor :append, :separator, :eol, :enclose
|
7
|
+
|
8
|
+
# Initialize the object.
|
9
|
+
# * <tt>control</tt>: The Control object
|
10
|
+
# * <tt>configuration</tt>: The configuration map
|
11
|
+
# * <tt>mapping</tt>: The output mapping
|
12
|
+
def initialize(control, configuration, mapping)
|
13
|
+
super
|
14
|
+
@file = File.join(File.dirname(control.file), configuration[:file])
|
15
|
+
@append = configuration[:append] ||= false
|
16
|
+
@separator = configuration[:separator] ||= ','
|
17
|
+
@eol = configuration[:eol] ||= "\n"
|
18
|
+
@enclose = configuration[:enclose] ||= nil
|
19
|
+
|
20
|
+
@order = mapping[:order] || order_from_source
|
21
|
+
raise ControlError, "Order required in mapping" unless @order
|
22
|
+
end
|
23
|
+
|
24
|
+
# Close the destination. This will flush the buffer and close the underlying stream or connection.
|
25
|
+
def close
|
26
|
+
flush
|
27
|
+
f.close
|
28
|
+
end
|
29
|
+
|
30
|
+
def flush
|
31
|
+
buffer.each do |row|
|
32
|
+
add_virtuals(row)
|
33
|
+
values = order.collect { |name| row[name] }
|
34
|
+
if !enclose.nil?
|
35
|
+
values.collect! { |v| enclose + v.to_s.gsub(/(#{enclose})/, '\\\\\1') + enclose }
|
36
|
+
end
|
37
|
+
f.write(values.join(separator))
|
38
|
+
f.write(eol)
|
39
|
+
end
|
40
|
+
buffer.clear
|
41
|
+
end
|
42
|
+
|
43
|
+
private
|
44
|
+
# Get the open file stream
|
45
|
+
def f
|
46
|
+
@f ||= open(file, mode)
|
47
|
+
end
|
48
|
+
|
49
|
+
# Get the appropriate mode to open the file stream
|
50
|
+
def mode
|
51
|
+
append ? 'a' : 'w'
|
52
|
+
end
|
53
|
+
|
54
|
+
def add_virtuals(row)
|
55
|
+
if mapping[:virtual]
|
56
|
+
mapping[:virtual].each do |key,value|
|
57
|
+
row[key] = value
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module ETL #:nodoc:
|
2
|
+
module Control #:nodoc:
|
3
|
+
# ETL source. Subclasses must implement the <tt>each</tt> method.
|
4
|
+
class Source
|
5
|
+
include Enumerable
|
6
|
+
attr_accessor :control, :configuration, :definition
|
7
|
+
|
8
|
+
class << self
|
9
|
+
def class_for_name(name)
|
10
|
+
ETL::Control.const_get("#{name.to_s.classify}Source")
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
# Initialize the Source instance
|
15
|
+
# * <tt>control</tt>: The control object
|
16
|
+
# * <tt>configuration</tt>: The configuration hash
|
17
|
+
# * <tt>definition</tt>: The source layout definition
|
18
|
+
def initialize(control, configuration, definition)
|
19
|
+
@control = control
|
20
|
+
@configuration = configuration
|
21
|
+
@definition = definition
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
Dir[File.dirname(__FILE__) + "/source/*.rb"].each { |file| require(file) }
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module ETL
|
2
|
+
module Control
|
3
|
+
class DatabaseSource < Source
|
4
|
+
def initialize(control, configuration, definition)
|
5
|
+
super
|
6
|
+
connect
|
7
|
+
end
|
8
|
+
|
9
|
+
# Returns each row from the source
|
10
|
+
def each
|
11
|
+
conn = ActiveRecord::Base.connection
|
12
|
+
conn.select_all("SELECT * FROM #{configuration[:table]}").each do |row|
|
13
|
+
yield row
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
private
|
18
|
+
def connect
|
19
|
+
# set up the DB connection
|
20
|
+
ActiveRecord::Base.establish_connection(
|
21
|
+
:adapter => (configuration[:adapter] || :mysql),
|
22
|
+
:username => (configuration[:username] || 'root'),
|
23
|
+
:host => (configuration[:host] || 'localhost'),
|
24
|
+
:password => configuration[:password],
|
25
|
+
:database => configuration[:database]
|
26
|
+
)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module ETL
|
2
|
+
module Control
|
3
|
+
class FileSource < Source
|
4
|
+
def initialize(control, configuration, definition)
|
5
|
+
super
|
6
|
+
configure
|
7
|
+
end
|
8
|
+
# Returns each row from the source
|
9
|
+
def each
|
10
|
+
@parser.each { |row| yield row }
|
11
|
+
end
|
12
|
+
|
13
|
+
private
|
14
|
+
def configure
|
15
|
+
@parser = ETL::Parser::Parser.class_for_name(@configuration[:parser]).new(self)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
data/lib/etl/engine.rb
ADDED
@@ -0,0 +1,61 @@
|
|
1
|
+
module ETL
|
2
|
+
class Engine
|
3
|
+
|
4
|
+
class << self
|
5
|
+
def process(control_file)
|
6
|
+
new().process(control_file)
|
7
|
+
end
|
8
|
+
|
9
|
+
attr_accessor :logger
|
10
|
+
|
11
|
+
def logger
|
12
|
+
unless @logger
|
13
|
+
@logger = Logger.new('etl.log')
|
14
|
+
@logger.level = Logger::DEBUG
|
15
|
+
end
|
16
|
+
@logger
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
# Process a control file or object.
|
21
|
+
def process(control)
|
22
|
+
control = ETL::Control::Control.resolve(control)
|
23
|
+
|
24
|
+
pre_process(control)
|
25
|
+
|
26
|
+
sources = control.sources
|
27
|
+
destinations = control.destinations
|
28
|
+
|
29
|
+
sources.each do |source|
|
30
|
+
source.each_with_index do |row, index|
|
31
|
+
row.each do |name, value|
|
32
|
+
# execute transforms
|
33
|
+
row[name] = ETL::Transform::Transform.transform(name, value, control.get_transform(name))
|
34
|
+
end
|
35
|
+
# write the row to the destination
|
36
|
+
destinations.each do |destination|
|
37
|
+
destination.write(row)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
destinations.each do |destination|
|
41
|
+
destination.close
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
post_process(control)
|
46
|
+
end
|
47
|
+
|
48
|
+
private
|
49
|
+
def pre_process(control)
|
50
|
+
control.pre_processors.each do |processor|
|
51
|
+
processor.process
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def post_process(control)
|
56
|
+
control.post_processors.each do |processor|
|
57
|
+
processor.process
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
data/lib/etl/parser.rb
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
module ETL #:nodoc:
|
2
|
+
module Parser #:nodoc:
|
3
|
+
# Parses delimited files
|
4
|
+
class DelimitedParser < ETL::Parser::Parser
|
5
|
+
include Enumerable
|
6
|
+
# Initialize the parser
|
7
|
+
# * <tt>source</tt>: The Source object
|
8
|
+
def initialize(source)
|
9
|
+
super
|
10
|
+
configure
|
11
|
+
end
|
12
|
+
|
13
|
+
# Returns each row
|
14
|
+
def each
|
15
|
+
options = {}
|
16
|
+
Dir.glob(file).each do |file|
|
17
|
+
FasterCSV.foreach(file, options) do |raw_row|
|
18
|
+
row = {}
|
19
|
+
raw_row.each_with_index do |record, index|
|
20
|
+
f = fields[index]
|
21
|
+
row[f.name] = convert(f.name, record, f.type)
|
22
|
+
end
|
23
|
+
yield row
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
# Get an array of defined fields
|
29
|
+
def fields
|
30
|
+
@fields ||= []
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
def configure
|
35
|
+
source.definition.each do |options|
|
36
|
+
case options
|
37
|
+
when Symbol
|
38
|
+
fields << Field.new(options)
|
39
|
+
when Hash
|
40
|
+
fields << Field.new(options[:name], options[:type])
|
41
|
+
else
|
42
|
+
raise DefinitionError, "Each field definition must either be a symbol or a hash"
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
class Field
|
48
|
+
attr_reader :name, :type
|
49
|
+
def initialize(name, type=:string)
|
50
|
+
@name = name
|
51
|
+
@type = type
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
module ETL #:nodoc:
|
2
|
+
module Parser #:nodoc:
|
3
|
+
# Parser for fixed with files
|
4
|
+
class FixedWidthParser < ETL::Parser::Parser
|
5
|
+
include Enumerable
|
6
|
+
|
7
|
+
# Initialize the parser
|
8
|
+
# * <tt>source</tt>: The source object
|
9
|
+
def initialize(source)
|
10
|
+
super
|
11
|
+
configure
|
12
|
+
end
|
13
|
+
|
14
|
+
# Return each row
|
15
|
+
def each
|
16
|
+
Dir.glob(file).each do |file|
|
17
|
+
open(file).each do |line|
|
18
|
+
row = {}
|
19
|
+
fields.each do |name, f|
|
20
|
+
# TODO make strip optional?
|
21
|
+
row[name] = convert(name, line[f.field_start, f.field_length].strip, f.type)
|
22
|
+
end
|
23
|
+
yield row
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
# Return a map of defined fields
|
29
|
+
def fields
|
30
|
+
@fields ||= {}
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
def configure
|
35
|
+
source.definition.each do |field, options|
|
36
|
+
fields[field] = FixedWidthField.new(options[:name], options[:start], options[:end], options[:length], options[:type])
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
class FixedWidthField
|
42
|
+
attr_reader :name, :field_start, :field_end, :field_length, :type
|
43
|
+
def initialize(name, field_start, field_end=nil, field_length=nil, type=nil)
|
44
|
+
@name = name
|
45
|
+
@type = type ||= :string
|
46
|
+
@field_start = field_start - 1
|
47
|
+
if field_end
|
48
|
+
@field_end = field_end
|
49
|
+
@field_length = @field_end - @field_start
|
50
|
+
elsif field_length
|
51
|
+
@field_length = field_length
|
52
|
+
@field_end = @field_start + @field_length
|
53
|
+
else
|
54
|
+
raise DefinitionError, "Either field_end or field_length required"
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
module ETL
|
2
|
+
module Parser
|
3
|
+
class Parser
|
4
|
+
class << self
|
5
|
+
# Convert the name (string or symbol) to a parser class.
|
6
|
+
#
|
7
|
+
# Example:
|
8
|
+
# <tt>class_for_name(:fixed_width)</tt> returns a FixedWidthParser class
|
9
|
+
def class_for_name(name)
|
10
|
+
ETL::Parser.const_get("#{name.to_s.classify}Parser")
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
attr_reader :source
|
15
|
+
|
16
|
+
def initialize(source)
|
17
|
+
@source = source
|
18
|
+
end
|
19
|
+
|
20
|
+
# Convert the value to the specified type.
|
21
|
+
#
|
22
|
+
# Parameters:
|
23
|
+
# * <tt>name</tt>: The name of the field
|
24
|
+
# * <tt>value</tt>: The value
|
25
|
+
# * <tt>type</tt>: The type name (:integer, :float, :string)
|
26
|
+
def convert(name, value, type)
|
27
|
+
case type
|
28
|
+
when :integer
|
29
|
+
value.to_i
|
30
|
+
when :float
|
31
|
+
value.to_f
|
32
|
+
else
|
33
|
+
value
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
protected
|
38
|
+
def file
|
39
|
+
File.join(File.dirname(source.control.file), source.configuration[:file])
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
module ETL
|
2
|
+
module Processor
|
3
|
+
class BulkImportProcessor < ETL::Processor::Processor
|
4
|
+
attr_reader :file, :target
|
5
|
+
def initialize(control, configuration)
|
6
|
+
super
|
7
|
+
@file = File.join(File.dirname(control.file), configuration[:file])
|
8
|
+
@target = configuration[:target]
|
9
|
+
connect
|
10
|
+
end
|
11
|
+
def process
|
12
|
+
conn = ActiveRecord::Base.connection
|
13
|
+
conn.transaction do
|
14
|
+
# Since LOCAL is used this must be allowed by both the client and server
|
15
|
+
conn.execute("LOAD DATA LOCAL INFILE '#{file}' INTO TABLE #{target[:table]}")
|
16
|
+
end
|
17
|
+
end
|
18
|
+
private
|
19
|
+
def log
|
20
|
+
unless @log
|
21
|
+
@log = Logger.new(STDOUT)
|
22
|
+
@log.level = Logger::DEBUG
|
23
|
+
end
|
24
|
+
@log
|
25
|
+
end
|
26
|
+
|
27
|
+
# Connect to the database
|
28
|
+
def connect
|
29
|
+
ActiveRecord::Base.establish_connection(
|
30
|
+
:adapter => (target[:adapter] || :mysql),
|
31
|
+
:username => (target[:username] || 'root'),
|
32
|
+
:host => (target[:host] || 'localhost'),
|
33
|
+
:password => target[:password],
|
34
|
+
:database => target[:database]
|
35
|
+
)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module ETL #:nodoc:
|
2
|
+
module Processor #:nodoc:
|
3
|
+
# Base class for pre and post processors
|
4
|
+
class Processor
|
5
|
+
def initialize(control, configuration)
|
6
|
+
@control = control
|
7
|
+
@configuration = configuration
|
8
|
+
end
|
9
|
+
protected
|
10
|
+
def control
|
11
|
+
@control
|
12
|
+
end
|
13
|
+
def configuration
|
14
|
+
@configuration
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module ETL #:nodoc:
|
2
|
+
module Transform #:nodoc:
|
3
|
+
# Transform which decodes coded values
|
4
|
+
class DecodeTransform < ETL::Transform::Transform
|
5
|
+
attr_accessor :decode_table_path, :decode_table_delimiter, :default_value
|
6
|
+
def initialize(control, configuration={})
|
7
|
+
super
|
8
|
+
|
9
|
+
if configuration[:decode_table_path]
|
10
|
+
configuration[:decode_table_path] = File.join(File.dirname(control.file), configuration[:decode_table_path])
|
11
|
+
end
|
12
|
+
|
13
|
+
@decode_table_path = (configuration[:decode_table_path] || 'decode.txt')
|
14
|
+
@decode_table_delimiter = (configuration[:decode_table_delimiter] || ':')
|
15
|
+
@default_value = (configuration[:default_value] || 'No Value')
|
16
|
+
end
|
17
|
+
def transform(value)
|
18
|
+
decode_table[value] || default_value
|
19
|
+
end
|
20
|
+
|
21
|
+
def decode_table
|
22
|
+
unless @decode_table
|
23
|
+
@decode_table = {}
|
24
|
+
open(decode_table_path).each do |line|
|
25
|
+
code, value = line.strip.split(decode_table_delimiter)
|
26
|
+
if code && code.length > 0
|
27
|
+
@decode_table[code] = value
|
28
|
+
else
|
29
|
+
@default_value = value
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
@decode_table
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
require 'digest/sha1'
|
2
|
+
|
3
|
+
module ETL #:nodoc:
|
4
|
+
module Transform #:nodoc:
|
5
|
+
# Transform which hashes the original value with a SHA-1 hash algorithm
|
6
|
+
class Sha1Transform < ETL::Transform::Transform
|
7
|
+
def initialize(control, configuration={})
|
8
|
+
super
|
9
|
+
end
|
10
|
+
def transform(value)
|
11
|
+
Digest::SHA1.hexdigest(value)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module ETL
|
2
|
+
module Transform
|
3
|
+
class Transform
|
4
|
+
class << self
|
5
|
+
def transform(name, value, transforms)
|
6
|
+
# logger.debug "Transforming field #{name}" if transforms.length > 0
|
7
|
+
transforms.each do |transform|
|
8
|
+
case transform
|
9
|
+
when Proc
|
10
|
+
value = transform.call(value)
|
11
|
+
when Transform
|
12
|
+
value = transform.transform(value)
|
13
|
+
else
|
14
|
+
raise ControlError, "Unsupported transform configuration type: #{transform}"
|
15
|
+
end
|
16
|
+
end
|
17
|
+
value
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
attr_reader :control, :configuration
|
22
|
+
|
23
|
+
def initialize(control, configuration={})
|
24
|
+
@control = control
|
25
|
+
@configuration = configuration
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
data/lib/etl/version.rb
ADDED
metadata
ADDED
@@ -0,0 +1,116 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
rubygems_version: 0.9.0
|
3
|
+
specification_version: 1
|
4
|
+
name: activewarehouse-etl
|
5
|
+
version: !ruby/object:Gem::Version
|
6
|
+
version: 0.1.0
|
7
|
+
date: 2006-12-06 00:00:00 -05:00
|
8
|
+
summary: Pure Ruby ETL package.
|
9
|
+
require_paths:
|
10
|
+
- lib
|
11
|
+
email: anthonyeden@gmail.com
|
12
|
+
homepage: http://activewarehouse.rubyforge.org/etl
|
13
|
+
rubyforge_project: activewarehouse
|
14
|
+
description: ActiveWarehouse ETL is a pure Ruby Extract-Transform-Load application for loading data into a database.
|
15
|
+
autorequire:
|
16
|
+
default_executable: etl
|
17
|
+
bindir: bin
|
18
|
+
has_rdoc: false
|
19
|
+
required_ruby_version: !ruby/object:Gem::Version::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">"
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 0.0.0
|
24
|
+
version:
|
25
|
+
platform: ruby
|
26
|
+
signing_key:
|
27
|
+
cert_chain:
|
28
|
+
post_install_message:
|
29
|
+
authors:
|
30
|
+
- Anthony Eden
|
31
|
+
files:
|
32
|
+
- CHANGELOG
|
33
|
+
- README
|
34
|
+
- Rakefile
|
35
|
+
- bin/etl
|
36
|
+
- lib/etl
|
37
|
+
- lib/etl.rb
|
38
|
+
- lib/etl/commands
|
39
|
+
- lib/etl/control
|
40
|
+
- lib/etl/control.rb
|
41
|
+
- lib/etl/engine.rb
|
42
|
+
- lib/etl/parser
|
43
|
+
- lib/etl/parser.rb
|
44
|
+
- lib/etl/processor
|
45
|
+
- lib/etl/processor.rb
|
46
|
+
- lib/etl/transform
|
47
|
+
- lib/etl/transform.rb
|
48
|
+
- lib/etl/version.rb
|
49
|
+
- lib/etl/commands/etl.rb
|
50
|
+
- lib/etl/control/control.rb
|
51
|
+
- lib/etl/control/destination
|
52
|
+
- lib/etl/control/destination.rb
|
53
|
+
- lib/etl/control/source
|
54
|
+
- lib/etl/control/source.rb
|
55
|
+
- lib/etl/control/destination/database_destination.rb
|
56
|
+
- lib/etl/control/destination/file_destination.rb
|
57
|
+
- lib/etl/control/source/database_source.rb
|
58
|
+
- lib/etl/control/source/file_source.rb
|
59
|
+
- lib/etl/parser/delimited_parser.rb
|
60
|
+
- lib/etl/parser/fixed_width_parser.rb
|
61
|
+
- lib/etl/parser/parser.rb
|
62
|
+
- lib/etl/processor/bulk_import_processor.rb
|
63
|
+
- lib/etl/processor/processor.rb
|
64
|
+
- lib/etl/transform/decode_transform.rb
|
65
|
+
- lib/etl/transform/sha1_transform.rb
|
66
|
+
- lib/etl/transform/transform.rb
|
67
|
+
test_files: []
|
68
|
+
|
69
|
+
rdoc_options:
|
70
|
+
- --exclude
|
71
|
+
- .
|
72
|
+
extra_rdoc_files: []
|
73
|
+
|
74
|
+
executables:
|
75
|
+
- etl
|
76
|
+
extensions: []
|
77
|
+
|
78
|
+
requirements: []
|
79
|
+
|
80
|
+
dependencies:
|
81
|
+
- !ruby/object:Gem::Dependency
|
82
|
+
name: rake
|
83
|
+
version_requirement:
|
84
|
+
version_requirements: !ruby/object:Gem::Version::Requirement
|
85
|
+
requirements:
|
86
|
+
- - ">="
|
87
|
+
- !ruby/object:Gem::Version
|
88
|
+
version: 0.7.1
|
89
|
+
version:
|
90
|
+
- !ruby/object:Gem::Dependency
|
91
|
+
name: activesupport
|
92
|
+
version_requirement:
|
93
|
+
version_requirements: !ruby/object:Gem::Version::Requirement
|
94
|
+
requirements:
|
95
|
+
- - ">="
|
96
|
+
- !ruby/object:Gem::Version
|
97
|
+
version: 1.3.1.5618
|
98
|
+
version:
|
99
|
+
- !ruby/object:Gem::Dependency
|
100
|
+
name: activerecord
|
101
|
+
version_requirement:
|
102
|
+
version_requirements: !ruby/object:Gem::Version::Requirement
|
103
|
+
requirements:
|
104
|
+
- - ">="
|
105
|
+
- !ruby/object:Gem::Version
|
106
|
+
version: 1.14.4.5618
|
107
|
+
version:
|
108
|
+
- !ruby/object:Gem::Dependency
|
109
|
+
name: fastercsv
|
110
|
+
version_requirement:
|
111
|
+
version_requirements: !ruby/object:Gem::Version::Requirement
|
112
|
+
requirements:
|
113
|
+
- - ">="
|
114
|
+
- !ruby/object:Gem::Version
|
115
|
+
version: 1.0.0
|
116
|
+
version:
|