activewarehouse-etl 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +2 -0
- data/README +27 -0
- data/Rakefile +117 -0
- data/bin/etl +26 -0
- data/lib/etl.rb +58 -0
- data/lib/etl/commands/etl.rb +45 -0
- data/lib/etl/control.rb +3 -0
- data/lib/etl/control/control.rb +134 -0
- data/lib/etl/control/destination.rb +62 -0
- data/lib/etl/control/destination/database_destination.rb +47 -0
- data/lib/etl/control/destination/file_destination.rb +63 -0
- data/lib/etl/control/source.rb +27 -0
- data/lib/etl/control/source/database_source.rb +30 -0
- data/lib/etl/control/source/file_source.rb +19 -0
- data/lib/etl/engine.rb +61 -0
- data/lib/etl/parser.rb +2 -0
- data/lib/etl/parser/delimited_parser.rb +56 -0
- data/lib/etl/parser/fixed_width_parser.rb +59 -0
- data/lib/etl/parser/parser.rb +43 -0
- data/lib/etl/processor.rb +2 -0
- data/lib/etl/processor/bulk_import_processor.rb +39 -0
- data/lib/etl/processor/processor.rb +18 -0
- data/lib/etl/transform.rb +2 -0
- data/lib/etl/transform/decode_transform.rb +37 -0
- data/lib/etl/transform/sha1_transform.rb +15 -0
- data/lib/etl/transform/transform.rb +29 -0
- data/lib/etl/version.rb +9 -0
- metadata +116 -0
data/CHANGELOG
ADDED
data/README
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
Ruby ETL parser.
|
2
|
+
|
3
|
+
== Features
|
4
|
+
|
5
|
+
Current supported features:
|
6
|
+
|
7
|
+
* ETL Domain Specific Language (DSL) - Control files are specified in a Ruby-based DSL
|
8
|
+
* Multiple source types - fixed-width and delimited text files currently supported
|
9
|
+
* Multiple destination types - file and database destinations
|
10
|
+
* Support for extracting from multiple sources
|
11
|
+
* Support for loading to multiple destinations
|
12
|
+
* Extensible transformations - comes with built in SHA1 one-way hash example
|
13
|
+
* Pre/post processing - export to files and then post process with the bulk import processor for large amounts of data
|
14
|
+
* Virtual fields - Add a field to the destination data which doesn't exist in the source data
|
15
|
+
|
16
|
+
|
17
|
+
== Requirements
|
18
|
+
|
19
|
+
* ActiveSupport Gem
|
20
|
+
* ActiveRecord Gem
|
21
|
+
* FasterCSV Gem
|
22
|
+
|
23
|
+
== Examples
|
24
|
+
Examples can be found in the test directory.
|
25
|
+
|
26
|
+
== Feedback
|
27
|
+
This is a work in progress. Comments should be made on the activewarehouse-discuss mailing list at the moment.
|
data/Rakefile
ADDED
@@ -0,0 +1,117 @@
|
|
1
|
+
require 'rake'
|
2
|
+
require 'rake/testtask'
|
3
|
+
require 'rake/rdoctask'
|
4
|
+
require 'rake/packagetask'
|
5
|
+
require 'rake/gempackagetask'
|
6
|
+
require 'rake/contrib/rubyforgepublisher'
|
7
|
+
|
8
|
+
require File.join(File.dirname(__FILE__), 'lib/etl', 'version')
|
9
|
+
|
10
|
+
PKG_BUILD = ENV['PKG_BUILD'] ? '.' + ENV['PKG_BUILD'] : ''
|
11
|
+
PKG_NAME = 'activewarehouse-etl'
|
12
|
+
PKG_VERSION = ETL::VERSION::STRING + PKG_BUILD
|
13
|
+
PKG_FILE_NAME = "#{PKG_NAME}-#{PKG_VERSION}"
|
14
|
+
PKG_DESTINATION = ENV["PKG_DESTINATION"] || "../#{PKG_NAME}"
|
15
|
+
|
16
|
+
RELEASE_NAME = "REL #{PKG_VERSION}"
|
17
|
+
|
18
|
+
RUBY_FORGE_PROJECT = "activewarehouse"
|
19
|
+
RUBY_FORGE_USER = "aeden"
|
20
|
+
|
21
|
+
desc 'Default: run unit tests.'
|
22
|
+
task :default => :test
|
23
|
+
|
24
|
+
desc 'Test the ETL application.'
|
25
|
+
Rake::TestTask.new(:test) do |t|
|
26
|
+
t.libs << 'lib'
|
27
|
+
t.pattern = 'test/**/*_test.rb'
|
28
|
+
t.verbose = true
|
29
|
+
# TODO: reset the database
|
30
|
+
end
|
31
|
+
|
32
|
+
desc 'Generate documentation for the ETL application.'
|
33
|
+
Rake::RDocTask.new(:rdoc) do |rdoc|
|
34
|
+
rdoc.rdoc_dir = 'rdoc'
|
35
|
+
rdoc.title = 'ActiveWarehouse ETL'
|
36
|
+
rdoc.options << '--line-numbers' << '--inline-source'
|
37
|
+
rdoc.rdoc_files.include('README')
|
38
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
39
|
+
end
|
40
|
+
|
41
|
+
PKG_FILES = FileList[
|
42
|
+
'CHANGELOG',
|
43
|
+
'README',
|
44
|
+
'Rakefile',
|
45
|
+
'bin/**/*',
|
46
|
+
'doc/**/*',
|
47
|
+
'lib/**/*',
|
48
|
+
] - [ 'test' ]
|
49
|
+
|
50
|
+
spec = Gem::Specification.new do |s|
|
51
|
+
s.name = 'activewarehouse-etl'
|
52
|
+
s.version = PKG_VERSION
|
53
|
+
s.summary = "Pure Ruby ETL package."
|
54
|
+
s.description = <<-EOF
|
55
|
+
ActiveWarehouse ETL is a pure Ruby Extract-Transform-Load application for loading data into a database.
|
56
|
+
EOF
|
57
|
+
|
58
|
+
s.add_dependency('rake', '>= 0.7.1')
|
59
|
+
s.add_dependency('activesupport', '>= 1.3.1.5618')
|
60
|
+
s.add_dependency('activerecord', '>= 1.14.4.5618')
|
61
|
+
s.add_dependency('fastercsv', '>= 1.0.0')
|
62
|
+
|
63
|
+
s.rdoc_options << '--exclude' << '.'
|
64
|
+
s.has_rdoc = false
|
65
|
+
|
66
|
+
s.files = PKG_FILES.to_a.delete_if {|f| f.include?('.svn')}
|
67
|
+
s.require_path = 'lib'
|
68
|
+
|
69
|
+
s.bindir = "bin" # Use these for applications.
|
70
|
+
s.executables = ['etl']
|
71
|
+
s.default_executable = "etl"
|
72
|
+
|
73
|
+
s.author = "Anthony Eden"
|
74
|
+
s.email = "anthonyeden@gmail.com"
|
75
|
+
s.homepage = "http://activewarehouse.rubyforge.org/etl"
|
76
|
+
s.rubyforge_project = "activewarehouse"
|
77
|
+
end
|
78
|
+
|
79
|
+
Rake::GemPackageTask.new(spec) do |pkg|
|
80
|
+
pkg.gem_spec = spec
|
81
|
+
end
|
82
|
+
|
83
|
+
desc "Generate code statistics"
|
84
|
+
task :lines do
|
85
|
+
lines, codelines, total_lines, total_codelines = 0, 0, 0, 0
|
86
|
+
|
87
|
+
for file_name in FileList["lib/**/*.rb"]
|
88
|
+
next if file_name =~ /vendor/
|
89
|
+
f = File.open(file_name)
|
90
|
+
|
91
|
+
while line = f.gets
|
92
|
+
lines += 1
|
93
|
+
next if line =~ /^\s*$/
|
94
|
+
next if line =~ /^\s*#/
|
95
|
+
codelines += 1
|
96
|
+
end
|
97
|
+
puts "L: #{sprintf("%4d", lines)}, LOC #{sprintf("%4d", codelines)} | #{file_name}"
|
98
|
+
|
99
|
+
total_lines += lines
|
100
|
+
total_codelines += codelines
|
101
|
+
|
102
|
+
lines, codelines = 0, 0
|
103
|
+
end
|
104
|
+
|
105
|
+
puts "Total: Lines #{total_lines}, LOC #{total_codelines}"
|
106
|
+
end
|
107
|
+
|
108
|
+
desc "Publish the release files to RubyForge."
|
109
|
+
task :release => [ :package ] do
|
110
|
+
`rubyforge login`
|
111
|
+
|
112
|
+
for ext in %w( gem tgz zip )
|
113
|
+
release_command = "rubyforge add_release activewarehouse #{PKG_NAME} 'REL #{PKG_VERSION}' pkg/#{PKG_NAME}-#{PKG_VERSION}.#{ext}"
|
114
|
+
puts release_command
|
115
|
+
system(release_command)
|
116
|
+
end
|
117
|
+
end
|
data/bin/etl
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
#--
|
4
|
+
# Copyright (c) 2006 Anthony Eden
|
5
|
+
#
|
6
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
7
|
+
# a copy of this software and associated documentation files (the
|
8
|
+
# "Software"), to deal in the Software without restriction, including
|
9
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
10
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
11
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
12
|
+
# the following conditions:
|
13
|
+
#
|
14
|
+
# The above copyright notice and this permission notice shall be
|
15
|
+
# included in all copies or substantial portions of the Software.
|
16
|
+
#
|
17
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
18
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
19
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
20
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
21
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
22
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
23
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
24
|
+
#++
|
25
|
+
|
26
|
+
require File.dirname(__FILE__) + "/../lib/etl/commands/etl"
|
data/lib/etl.rb
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2006 Anthony Eden
|
3
|
+
#
|
4
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
5
|
+
# a copy of this software and associated documentation files (the
|
6
|
+
# "Software"), to deal in the Software without restriction, including
|
7
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
8
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
9
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
10
|
+
# the following conditions:
|
11
|
+
#
|
12
|
+
# The above copyright notice and this permission notice shall be
|
13
|
+
# included in all copies or substantial portions of the Software.
|
14
|
+
#
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
17
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
18
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
19
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
20
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
21
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
|
+
#++
|
23
|
+
|
24
|
+
unless defined?(Logger)
|
25
|
+
require 'logger'
|
26
|
+
end
|
27
|
+
|
28
|
+
require 'rubygems'
|
29
|
+
unless defined?(ActiveSupport)
|
30
|
+
require_gem 'activesupport'
|
31
|
+
require 'active_support'
|
32
|
+
end
|
33
|
+
|
34
|
+
unless defined?(ActiveRecord)
|
35
|
+
require_gem 'activerecord'
|
36
|
+
require 'active_record'
|
37
|
+
end
|
38
|
+
|
39
|
+
require_gem 'fastercsv'
|
40
|
+
require 'faster_csv'
|
41
|
+
|
42
|
+
$:.unshift(File.dirname(__FILE__))
|
43
|
+
|
44
|
+
require 'etl/version'
|
45
|
+
require 'etl/engine'
|
46
|
+
require 'etl/control'
|
47
|
+
require 'etl/parser'
|
48
|
+
require 'etl/transform'
|
49
|
+
require 'etl/processor'
|
50
|
+
|
51
|
+
module ETL #:nodoc:
|
52
|
+
class ETLError < StandardError #:nodoc:
|
53
|
+
end
|
54
|
+
class ControlError < ETLError #:nodoc:
|
55
|
+
end
|
56
|
+
class DefinitionError < ControlError #:nodoc:
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2006 Anthony Eden
|
3
|
+
#
|
4
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
5
|
+
# a copy of this software and associated documentation files (the
|
6
|
+
# "Software"), to deal in the Software without restriction, including
|
7
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
8
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
9
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
10
|
+
# the following conditions:
|
11
|
+
#
|
12
|
+
# The above copyright notice and this permission notice shall be
|
13
|
+
# included in all copies or substantial portions of the Software.
|
14
|
+
#
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
17
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
18
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
19
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
20
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
21
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
|
+
#++
|
23
|
+
|
24
|
+
require 'benchmark'
|
25
|
+
require File.dirname(__FILE__) + '/../../etl'
|
26
|
+
|
27
|
+
# Print a usage statement
|
28
|
+
def usage #:nodoc:
|
29
|
+
puts "Usage: etl ctl_file [ctl_file2 ctl_file3 ...]"
|
30
|
+
end
|
31
|
+
|
32
|
+
if ARGV.length < 1
|
33
|
+
usage
|
34
|
+
else
|
35
|
+
puts "Starting ETL process"
|
36
|
+
|
37
|
+
t = Benchmark.realtime do
|
38
|
+
ARGV.each do |f|
|
39
|
+
puts "Processing #{f}"
|
40
|
+
ETL::Engine.process(f)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
puts "ETL process complete in #{sprintf('%.3f', t)} seconds"
|
45
|
+
end
|
data/lib/etl/control.rb
ADDED
@@ -0,0 +1,134 @@
|
|
1
|
+
module ETL #:nodoc:
|
2
|
+
module Control #:nodoc:
|
3
|
+
# Object representation of a control file
|
4
|
+
class Control
|
5
|
+
attr_reader :file
|
6
|
+
|
7
|
+
class << self
|
8
|
+
# Parse a control file and return a Control instance
|
9
|
+
def parse(control_file)
|
10
|
+
control_file = control_file.path if control_file.instance_of?(File)
|
11
|
+
# logger.debug "Parsing control file #{control_file.path}"
|
12
|
+
control = ETL::Control::Control.new(control_file)
|
13
|
+
# TODO: better handling of parser errors. Return the line in the control file where the error occurs.
|
14
|
+
eval(IO.readlines(control_file).join("\n"), control.get_binding)
|
15
|
+
control.validate
|
16
|
+
control
|
17
|
+
end
|
18
|
+
|
19
|
+
def resolve(control)
|
20
|
+
case control
|
21
|
+
when String
|
22
|
+
ETL::Control::Control.parse(File.new(control))
|
23
|
+
when File
|
24
|
+
ETL::Control::Control.parse(control)
|
25
|
+
when ETL::Control::Control
|
26
|
+
control
|
27
|
+
else
|
28
|
+
raise ControlError, "Control must be a String, File or Control object"
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def initialize(file)
|
34
|
+
@file = file
|
35
|
+
end
|
36
|
+
|
37
|
+
# Define a source
|
38
|
+
def source(name, configuration={}, definition={})
|
39
|
+
source_types = [:file, :db]
|
40
|
+
source_types.each do |source_type|
|
41
|
+
if configuration[source_type]
|
42
|
+
source_class = ETL::Control::Source.class_for_name(source_type)
|
43
|
+
sources << source_class.new(self, configuration, definition)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
# Get the defined source
|
49
|
+
def sources
|
50
|
+
@sources ||= []
|
51
|
+
end
|
52
|
+
|
53
|
+
# Define a destination
|
54
|
+
def destination(name, configuration={}, mapping={})
|
55
|
+
destination_types.each do |dest_type|
|
56
|
+
if configuration[dest_type]
|
57
|
+
dest_class = ETL::Control::Destination.class_for_name(dest_type)
|
58
|
+
destinations << dest_class.new(self, configuration, mapping)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
# Get the defined destinations
|
64
|
+
def destinations
|
65
|
+
@destinations ||= []
|
66
|
+
end
|
67
|
+
|
68
|
+
def transform(name, transformer=nil, configuration={}, &block)
|
69
|
+
transforms[name] ||= []
|
70
|
+
if transformer
|
71
|
+
transform_class = ETL::Transform.const_get("#{transformer.to_s.classify}Transform")
|
72
|
+
transforms[name] << transform_class.new(self, configuration)
|
73
|
+
elsif block_given?
|
74
|
+
transforms[name] << block
|
75
|
+
else
|
76
|
+
raise ControlError, "Either a transformer or a block must be specified"
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def get_transform(name)
|
81
|
+
transforms[name] ||= []
|
82
|
+
end
|
83
|
+
|
84
|
+
def pre_process(name, configuration={})
|
85
|
+
processor_class = ETL::Processor.const_get("#{name.to_s.classify}Processor")
|
86
|
+
pre_processors << processor_class.new(self, configuration)
|
87
|
+
end
|
88
|
+
|
89
|
+
def pre_processors
|
90
|
+
@pre_processors ||= []
|
91
|
+
end
|
92
|
+
|
93
|
+
def post_process(name, configuration={})
|
94
|
+
processor_class = ETL::Processor.const_get("#{name.to_s.classify}Processor")
|
95
|
+
post_processors << processor_class.new(self, configuration)
|
96
|
+
end
|
97
|
+
|
98
|
+
def post_processors
|
99
|
+
@post_processors ||= []
|
100
|
+
end
|
101
|
+
|
102
|
+
def get_binding
|
103
|
+
binding
|
104
|
+
end
|
105
|
+
|
106
|
+
# Get a map of all transforms for this control
|
107
|
+
def transforms
|
108
|
+
@transforms ||= {}
|
109
|
+
end
|
110
|
+
|
111
|
+
# Validate the control file
|
112
|
+
def validate
|
113
|
+
unless sources.length > 0
|
114
|
+
raise ControlError, "Configuration must include one of the following for the source: #{source_types.join(',')}"
|
115
|
+
end
|
116
|
+
unless destinations.length > 0
|
117
|
+
raise ControlError, "Configuration must include one of the following for the destination: #{destination_types.join(',')}"
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
protected
|
122
|
+
# Get an array of supported source types
|
123
|
+
def source_types
|
124
|
+
[:file, :database]
|
125
|
+
end
|
126
|
+
|
127
|
+
# Get an array of supported destination types
|
128
|
+
def destination_types
|
129
|
+
[:file, :database]
|
130
|
+
end
|
131
|
+
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
module ETL #:nodoc:
|
2
|
+
module Control #:nodoc:
|
3
|
+
class Destination
|
4
|
+
attr_reader :control, :configuration, :mapping
|
5
|
+
attr_accessor :buffer_size, :current_row
|
6
|
+
|
7
|
+
class << self
|
8
|
+
def class_for_name(name)
|
9
|
+
ETL::Control.const_get("#{name.to_s.classify}Destination")
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
def initialize(control, configuration, mapping)
|
14
|
+
@control = control
|
15
|
+
@configuration = configuration
|
16
|
+
@mapping = mapping
|
17
|
+
@buffer_size = configuration[:buffer_size] ||= 1000
|
18
|
+
end
|
19
|
+
|
20
|
+
def current_row
|
21
|
+
@current_row ||= 1
|
22
|
+
end
|
23
|
+
|
24
|
+
# Abstract method
|
25
|
+
def write(row)
|
26
|
+
buffer << row
|
27
|
+
flush if buffer.length >= buffer_size
|
28
|
+
end
|
29
|
+
|
30
|
+
# Abstract method
|
31
|
+
def flush
|
32
|
+
raise NotImplementedError, "flush method must be implemented by subclasses"
|
33
|
+
end
|
34
|
+
|
35
|
+
# Abstract method
|
36
|
+
def close
|
37
|
+
raise NotImplementedError, "close method must be implemented by subclasses"
|
38
|
+
end
|
39
|
+
|
40
|
+
protected
|
41
|
+
def buffer
|
42
|
+
@buffer ||= []
|
43
|
+
end
|
44
|
+
|
45
|
+
# Get the order of elements from the source order
|
46
|
+
def order_from_source
|
47
|
+
order = []
|
48
|
+
control.sources.first.definition.each do |item|
|
49
|
+
case item
|
50
|
+
when Hash
|
51
|
+
order << item[:name]
|
52
|
+
else
|
53
|
+
order << item
|
54
|
+
end
|
55
|
+
end
|
56
|
+
order
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
Dir[File.dirname(__FILE__) + "/destination/*.rb"].each { |file| require(file) }
|
@@ -0,0 +1,47 @@
|
|
1
|
+
module ETL
|
2
|
+
module Control
|
3
|
+
class DatabaseDestination < Destination
|
4
|
+
attr_reader :order
|
5
|
+
def initialize(control, configuration, mapping)
|
6
|
+
super
|
7
|
+
@order = mapping[:order] || order_from_source
|
8
|
+
raise ControlError, "Order required in mapping" unless @order
|
9
|
+
connect
|
10
|
+
end
|
11
|
+
|
12
|
+
def flush
|
13
|
+
conn = ActiveRecord::Base.connection
|
14
|
+
conn.transaction do
|
15
|
+
buffer.each do |row|
|
16
|
+
names = []
|
17
|
+
values = []
|
18
|
+
order.each do |name|
|
19
|
+
names << name
|
20
|
+
values << "'#{row[name]}'"
|
21
|
+
end
|
22
|
+
q = "INSERT INTO #{configuration[:table]} (#{names.join(',')}) VALUES (#{values.join(',')})"
|
23
|
+
ETL::Engine.logger.debug("Query: #{q}")
|
24
|
+
conn.execute(q, "Insert row #{current_row}")
|
25
|
+
@current_row += 1
|
26
|
+
end
|
27
|
+
buffer.clear
|
28
|
+
end
|
29
|
+
end
|
30
|
+
def close
|
31
|
+
flush
|
32
|
+
ActiveRecord::Base.connection.disconnect!
|
33
|
+
end
|
34
|
+
|
35
|
+
private
|
36
|
+
def connect
|
37
|
+
ActiveRecord::Base.establish_connection(
|
38
|
+
:adapter => (configuration[:adapter] || :mysql),
|
39
|
+
:username => (configuration[:username] || 'root'),
|
40
|
+
:host => (configuration[:host] || 'localhost'),
|
41
|
+
:password => configuration[:password],
|
42
|
+
:database => configuration[:database]
|
43
|
+
)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
module ETL #:nodoc:
|
2
|
+
module Control #:nodoc:
|
3
|
+
# File as the final destination.
|
4
|
+
class FileDestination < Destination
|
5
|
+
attr_reader :file, :order
|
6
|
+
attr_accessor :append, :separator, :eol, :enclose
|
7
|
+
|
8
|
+
# Initialize the object.
|
9
|
+
# * <tt>control</tt>: The Control object
|
10
|
+
# * <tt>configuration</tt>: The configuration map
|
11
|
+
# * <tt>mapping</tt>: The output mapping
|
12
|
+
def initialize(control, configuration, mapping)
|
13
|
+
super
|
14
|
+
@file = File.join(File.dirname(control.file), configuration[:file])
|
15
|
+
@append = configuration[:append] ||= false
|
16
|
+
@separator = configuration[:separator] ||= ','
|
17
|
+
@eol = configuration[:eol] ||= "\n"
|
18
|
+
@enclose = configuration[:enclose] ||= nil
|
19
|
+
|
20
|
+
@order = mapping[:order] || order_from_source
|
21
|
+
raise ControlError, "Order required in mapping" unless @order
|
22
|
+
end
|
23
|
+
|
24
|
+
# Close the destination. This will flush the buffer and close the underlying stream or connection.
|
25
|
+
def close
|
26
|
+
flush
|
27
|
+
f.close
|
28
|
+
end
|
29
|
+
|
30
|
+
def flush
|
31
|
+
buffer.each do |row|
|
32
|
+
add_virtuals(row)
|
33
|
+
values = order.collect { |name| row[name] }
|
34
|
+
if !enclose.nil?
|
35
|
+
values.collect! { |v| enclose + v.to_s.gsub(/(#{enclose})/, '\\\\\1') + enclose }
|
36
|
+
end
|
37
|
+
f.write(values.join(separator))
|
38
|
+
f.write(eol)
|
39
|
+
end
|
40
|
+
buffer.clear
|
41
|
+
end
|
42
|
+
|
43
|
+
private
|
44
|
+
# Get the open file stream
|
45
|
+
def f
|
46
|
+
@f ||= open(file, mode)
|
47
|
+
end
|
48
|
+
|
49
|
+
# Get the appropriate mode to open the file stream
|
50
|
+
def mode
|
51
|
+
append ? 'a' : 'w'
|
52
|
+
end
|
53
|
+
|
54
|
+
def add_virtuals(row)
|
55
|
+
if mapping[:virtual]
|
56
|
+
mapping[:virtual].each do |key,value|
|
57
|
+
row[key] = value
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module ETL #:nodoc:
|
2
|
+
module Control #:nodoc:
|
3
|
+
# ETL source. Subclasses must implement the <tt>each</tt> method.
|
4
|
+
class Source
|
5
|
+
include Enumerable
|
6
|
+
attr_accessor :control, :configuration, :definition
|
7
|
+
|
8
|
+
class << self
|
9
|
+
def class_for_name(name)
|
10
|
+
ETL::Control.const_get("#{name.to_s.classify}Source")
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
# Initialize the Source instance
|
15
|
+
# * <tt>control</tt>: The control object
|
16
|
+
# * <tt>configuration</tt>: The configuration hash
|
17
|
+
# * <tt>definition</tt>: The source layout definition
|
18
|
+
def initialize(control, configuration, definition)
|
19
|
+
@control = control
|
20
|
+
@configuration = configuration
|
21
|
+
@definition = definition
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
Dir[File.dirname(__FILE__) + "/source/*.rb"].each { |file| require(file) }
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module ETL
|
2
|
+
module Control
|
3
|
+
class DatabaseSource < Source
|
4
|
+
def initialize(control, configuration, definition)
|
5
|
+
super
|
6
|
+
connect
|
7
|
+
end
|
8
|
+
|
9
|
+
# Returns each row from the source
|
10
|
+
def each
|
11
|
+
conn = ActiveRecord::Base.connection
|
12
|
+
conn.select_all("SELECT * FROM #{configuration[:table]}").each do |row|
|
13
|
+
yield row
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
private
|
18
|
+
def connect
|
19
|
+
# set up the DB connection
|
20
|
+
ActiveRecord::Base.establish_connection(
|
21
|
+
:adapter => (configuration[:adapter] || :mysql),
|
22
|
+
:username => (configuration[:username] || 'root'),
|
23
|
+
:host => (configuration[:host] || 'localhost'),
|
24
|
+
:password => configuration[:password],
|
25
|
+
:database => configuration[:database]
|
26
|
+
)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module ETL
|
2
|
+
module Control
|
3
|
+
class FileSource < Source
|
4
|
+
def initialize(control, configuration, definition)
|
5
|
+
super
|
6
|
+
configure
|
7
|
+
end
|
8
|
+
# Returns each row from the source
|
9
|
+
def each
|
10
|
+
@parser.each { |row| yield row }
|
11
|
+
end
|
12
|
+
|
13
|
+
private
|
14
|
+
def configure
|
15
|
+
@parser = ETL::Parser::Parser.class_for_name(@configuration[:parser]).new(self)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
data/lib/etl/engine.rb
ADDED
@@ -0,0 +1,61 @@
|
|
1
|
+
module ETL
|
2
|
+
class Engine
|
3
|
+
|
4
|
+
class << self
|
5
|
+
def process(control_file)
|
6
|
+
new().process(control_file)
|
7
|
+
end
|
8
|
+
|
9
|
+
attr_accessor :logger
|
10
|
+
|
11
|
+
def logger
|
12
|
+
unless @logger
|
13
|
+
@logger = Logger.new('etl.log')
|
14
|
+
@logger.level = Logger::DEBUG
|
15
|
+
end
|
16
|
+
@logger
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
# Process a control file or object.
|
21
|
+
def process(control)
|
22
|
+
control = ETL::Control::Control.resolve(control)
|
23
|
+
|
24
|
+
pre_process(control)
|
25
|
+
|
26
|
+
sources = control.sources
|
27
|
+
destinations = control.destinations
|
28
|
+
|
29
|
+
sources.each do |source|
|
30
|
+
source.each_with_index do |row, index|
|
31
|
+
row.each do |name, value|
|
32
|
+
# execute transforms
|
33
|
+
row[name] = ETL::Transform::Transform.transform(name, value, control.get_transform(name))
|
34
|
+
end
|
35
|
+
# write the row to the destination
|
36
|
+
destinations.each do |destination|
|
37
|
+
destination.write(row)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
destinations.each do |destination|
|
41
|
+
destination.close
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
post_process(control)
|
46
|
+
end
|
47
|
+
|
48
|
+
private
|
49
|
+
def pre_process(control)
|
50
|
+
control.pre_processors.each do |processor|
|
51
|
+
processor.process
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def post_process(control)
|
56
|
+
control.post_processors.each do |processor|
|
57
|
+
processor.process
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
data/lib/etl/parser.rb
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
module ETL #:nodoc:
|
2
|
+
module Parser #:nodoc:
|
3
|
+
# Parses delimited files
|
4
|
+
class DelimitedParser < ETL::Parser::Parser
|
5
|
+
include Enumerable
|
6
|
+
# Initialize the parser
|
7
|
+
# * <tt>source</tt>: The Source object
|
8
|
+
def initialize(source)
|
9
|
+
super
|
10
|
+
configure
|
11
|
+
end
|
12
|
+
|
13
|
+
# Returns each row
|
14
|
+
def each
|
15
|
+
options = {}
|
16
|
+
Dir.glob(file).each do |file|
|
17
|
+
FasterCSV.foreach(file, options) do |raw_row|
|
18
|
+
row = {}
|
19
|
+
raw_row.each_with_index do |record, index|
|
20
|
+
f = fields[index]
|
21
|
+
row[f.name] = convert(f.name, record, f.type)
|
22
|
+
end
|
23
|
+
yield row
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
# Get an array of defined fields
|
29
|
+
def fields
|
30
|
+
@fields ||= []
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
def configure
|
35
|
+
source.definition.each do |options|
|
36
|
+
case options
|
37
|
+
when Symbol
|
38
|
+
fields << Field.new(options)
|
39
|
+
when Hash
|
40
|
+
fields << Field.new(options[:name], options[:type])
|
41
|
+
else
|
42
|
+
raise DefinitionError, "Each field definition must either be a symbol or a hash"
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
class Field
|
48
|
+
attr_reader :name, :type
|
49
|
+
def initialize(name, type=:string)
|
50
|
+
@name = name
|
51
|
+
@type = type
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
module ETL #:nodoc:
|
2
|
+
module Parser #:nodoc:
|
3
|
+
# Parser for fixed with files
|
4
|
+
class FixedWidthParser < ETL::Parser::Parser
|
5
|
+
include Enumerable
|
6
|
+
|
7
|
+
# Initialize the parser
|
8
|
+
# * <tt>source</tt>: The source object
|
9
|
+
def initialize(source)
|
10
|
+
super
|
11
|
+
configure
|
12
|
+
end
|
13
|
+
|
14
|
+
# Return each row
|
15
|
+
def each
|
16
|
+
Dir.glob(file).each do |file|
|
17
|
+
open(file).each do |line|
|
18
|
+
row = {}
|
19
|
+
fields.each do |name, f|
|
20
|
+
# TODO make strip optional?
|
21
|
+
row[name] = convert(name, line[f.field_start, f.field_length].strip, f.type)
|
22
|
+
end
|
23
|
+
yield row
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
# Return a map of defined fields
|
29
|
+
def fields
|
30
|
+
@fields ||= {}
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
def configure
|
35
|
+
source.definition.each do |field, options|
|
36
|
+
fields[field] = FixedWidthField.new(options[:name], options[:start], options[:end], options[:length], options[:type])
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
class FixedWidthField
|
42
|
+
attr_reader :name, :field_start, :field_end, :field_length, :type
|
43
|
+
def initialize(name, field_start, field_end=nil, field_length=nil, type=nil)
|
44
|
+
@name = name
|
45
|
+
@type = type ||= :string
|
46
|
+
@field_start = field_start - 1
|
47
|
+
if field_end
|
48
|
+
@field_end = field_end
|
49
|
+
@field_length = @field_end - @field_start
|
50
|
+
elsif field_length
|
51
|
+
@field_length = field_length
|
52
|
+
@field_end = @field_start + @field_length
|
53
|
+
else
|
54
|
+
raise DefinitionError, "Either field_end or field_length required"
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
module ETL
|
2
|
+
module Parser
|
3
|
+
class Parser
|
4
|
+
class << self
|
5
|
+
# Convert the name (string or symbol) to a parser class.
|
6
|
+
#
|
7
|
+
# Example:
|
8
|
+
# <tt>class_for_name(:fixed_width)</tt> returns a FixedWidthParser class
|
9
|
+
def class_for_name(name)
|
10
|
+
ETL::Parser.const_get("#{name.to_s.classify}Parser")
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
attr_reader :source
|
15
|
+
|
16
|
+
def initialize(source)
|
17
|
+
@source = source
|
18
|
+
end
|
19
|
+
|
20
|
+
# Convert the value to the specified type.
|
21
|
+
#
|
22
|
+
# Parameters:
|
23
|
+
# * <tt>name</tt>: The name of the field
|
24
|
+
# * <tt>value</tt>: The value
|
25
|
+
# * <tt>type</tt>: The type name (:integer, :float, :string)
|
26
|
+
def convert(name, value, type)
|
27
|
+
case type
|
28
|
+
when :integer
|
29
|
+
value.to_i
|
30
|
+
when :float
|
31
|
+
value.to_f
|
32
|
+
else
|
33
|
+
value
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
protected
|
38
|
+
def file
|
39
|
+
File.join(File.dirname(source.control.file), source.configuration[:file])
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
module ETL
|
2
|
+
module Processor
|
3
|
+
class BulkImportProcessor < ETL::Processor::Processor
|
4
|
+
attr_reader :file, :target
|
5
|
+
def initialize(control, configuration)
|
6
|
+
super
|
7
|
+
@file = File.join(File.dirname(control.file), configuration[:file])
|
8
|
+
@target = configuration[:target]
|
9
|
+
connect
|
10
|
+
end
|
11
|
+
def process
|
12
|
+
conn = ActiveRecord::Base.connection
|
13
|
+
conn.transaction do
|
14
|
+
# Since LOCAL is used this must be allowed by both the client and server
|
15
|
+
conn.execute("LOAD DATA LOCAL INFILE '#{file}' INTO TABLE #{target[:table]}")
|
16
|
+
end
|
17
|
+
end
|
18
|
+
private
|
19
|
+
def log
|
20
|
+
unless @log
|
21
|
+
@log = Logger.new(STDOUT)
|
22
|
+
@log.level = Logger::DEBUG
|
23
|
+
end
|
24
|
+
@log
|
25
|
+
end
|
26
|
+
|
27
|
+
# Connect to the database
|
28
|
+
def connect
|
29
|
+
ActiveRecord::Base.establish_connection(
|
30
|
+
:adapter => (target[:adapter] || :mysql),
|
31
|
+
:username => (target[:username] || 'root'),
|
32
|
+
:host => (target[:host] || 'localhost'),
|
33
|
+
:password => target[:password],
|
34
|
+
:database => target[:database]
|
35
|
+
)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module ETL #:nodoc:
|
2
|
+
module Processor #:nodoc:
|
3
|
+
# Base class for pre and post processors
|
4
|
+
class Processor
|
5
|
+
def initialize(control, configuration)
|
6
|
+
@control = control
|
7
|
+
@configuration = configuration
|
8
|
+
end
|
9
|
+
protected
|
10
|
+
def control
|
11
|
+
@control
|
12
|
+
end
|
13
|
+
def configuration
|
14
|
+
@configuration
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module ETL #:nodoc:
|
2
|
+
module Transform #:nodoc:
|
3
|
+
# Transform which decodes coded values
|
4
|
+
class DecodeTransform < ETL::Transform::Transform
|
5
|
+
attr_accessor :decode_table_path, :decode_table_delimiter, :default_value
|
6
|
+
def initialize(control, configuration={})
|
7
|
+
super
|
8
|
+
|
9
|
+
if configuration[:decode_table_path]
|
10
|
+
configuration[:decode_table_path] = File.join(File.dirname(control.file), configuration[:decode_table_path])
|
11
|
+
end
|
12
|
+
|
13
|
+
@decode_table_path = (configuration[:decode_table_path] || 'decode.txt')
|
14
|
+
@decode_table_delimiter = (configuration[:decode_table_delimiter] || ':')
|
15
|
+
@default_value = (configuration[:default_value] || 'No Value')
|
16
|
+
end
|
17
|
+
def transform(value)
|
18
|
+
decode_table[value] || default_value
|
19
|
+
end
|
20
|
+
|
21
|
+
def decode_table
|
22
|
+
unless @decode_table
|
23
|
+
@decode_table = {}
|
24
|
+
open(decode_table_path).each do |line|
|
25
|
+
code, value = line.strip.split(decode_table_delimiter)
|
26
|
+
if code && code.length > 0
|
27
|
+
@decode_table[code] = value
|
28
|
+
else
|
29
|
+
@default_value = value
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
@decode_table
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
require 'digest/sha1'
|
2
|
+
|
3
|
+
module ETL #:nodoc:
|
4
|
+
module Transform #:nodoc:
|
5
|
+
# Transform which hashes the original value with a SHA-1 hash algorithm
|
6
|
+
class Sha1Transform < ETL::Transform::Transform
|
7
|
+
def initialize(control, configuration={})
|
8
|
+
super
|
9
|
+
end
|
10
|
+
def transform(value)
|
11
|
+
Digest::SHA1.hexdigest(value)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module ETL
|
2
|
+
module Transform
|
3
|
+
class Transform
|
4
|
+
class << self
|
5
|
+
def transform(name, value, transforms)
|
6
|
+
# logger.debug "Transforming field #{name}" if transforms.length > 0
|
7
|
+
transforms.each do |transform|
|
8
|
+
case transform
|
9
|
+
when Proc
|
10
|
+
value = transform.call(value)
|
11
|
+
when Transform
|
12
|
+
value = transform.transform(value)
|
13
|
+
else
|
14
|
+
raise ControlError, "Unsupported transform configuration type: #{transform}"
|
15
|
+
end
|
16
|
+
end
|
17
|
+
value
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
attr_reader :control, :configuration
|
22
|
+
|
23
|
+
def initialize(control, configuration={})
|
24
|
+
@control = control
|
25
|
+
@configuration = configuration
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
data/lib/etl/version.rb
ADDED
metadata
ADDED
@@ -0,0 +1,116 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
rubygems_version: 0.9.0
|
3
|
+
specification_version: 1
|
4
|
+
name: activewarehouse-etl
|
5
|
+
version: !ruby/object:Gem::Version
|
6
|
+
version: 0.1.0
|
7
|
+
date: 2006-12-06 00:00:00 -05:00
|
8
|
+
summary: Pure Ruby ETL package.
|
9
|
+
require_paths:
|
10
|
+
- lib
|
11
|
+
email: anthonyeden@gmail.com
|
12
|
+
homepage: http://activewarehouse.rubyforge.org/etl
|
13
|
+
rubyforge_project: activewarehouse
|
14
|
+
description: ActiveWarehouse ETL is a pure Ruby Extract-Transform-Load application for loading data into a database.
|
15
|
+
autorequire:
|
16
|
+
default_executable: etl
|
17
|
+
bindir: bin
|
18
|
+
has_rdoc: false
|
19
|
+
required_ruby_version: !ruby/object:Gem::Version::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">"
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 0.0.0
|
24
|
+
version:
|
25
|
+
platform: ruby
|
26
|
+
signing_key:
|
27
|
+
cert_chain:
|
28
|
+
post_install_message:
|
29
|
+
authors:
|
30
|
+
- Anthony Eden
|
31
|
+
files:
|
32
|
+
- CHANGELOG
|
33
|
+
- README
|
34
|
+
- Rakefile
|
35
|
+
- bin/etl
|
36
|
+
- lib/etl
|
37
|
+
- lib/etl.rb
|
38
|
+
- lib/etl/commands
|
39
|
+
- lib/etl/control
|
40
|
+
- lib/etl/control.rb
|
41
|
+
- lib/etl/engine.rb
|
42
|
+
- lib/etl/parser
|
43
|
+
- lib/etl/parser.rb
|
44
|
+
- lib/etl/processor
|
45
|
+
- lib/etl/processor.rb
|
46
|
+
- lib/etl/transform
|
47
|
+
- lib/etl/transform.rb
|
48
|
+
- lib/etl/version.rb
|
49
|
+
- lib/etl/commands/etl.rb
|
50
|
+
- lib/etl/control/control.rb
|
51
|
+
- lib/etl/control/destination
|
52
|
+
- lib/etl/control/destination.rb
|
53
|
+
- lib/etl/control/source
|
54
|
+
- lib/etl/control/source.rb
|
55
|
+
- lib/etl/control/destination/database_destination.rb
|
56
|
+
- lib/etl/control/destination/file_destination.rb
|
57
|
+
- lib/etl/control/source/database_source.rb
|
58
|
+
- lib/etl/control/source/file_source.rb
|
59
|
+
- lib/etl/parser/delimited_parser.rb
|
60
|
+
- lib/etl/parser/fixed_width_parser.rb
|
61
|
+
- lib/etl/parser/parser.rb
|
62
|
+
- lib/etl/processor/bulk_import_processor.rb
|
63
|
+
- lib/etl/processor/processor.rb
|
64
|
+
- lib/etl/transform/decode_transform.rb
|
65
|
+
- lib/etl/transform/sha1_transform.rb
|
66
|
+
- lib/etl/transform/transform.rb
|
67
|
+
test_files: []
|
68
|
+
|
69
|
+
rdoc_options:
|
70
|
+
- --exclude
|
71
|
+
- .
|
72
|
+
extra_rdoc_files: []
|
73
|
+
|
74
|
+
executables:
|
75
|
+
- etl
|
76
|
+
extensions: []
|
77
|
+
|
78
|
+
requirements: []
|
79
|
+
|
80
|
+
dependencies:
|
81
|
+
- !ruby/object:Gem::Dependency
|
82
|
+
name: rake
|
83
|
+
version_requirement:
|
84
|
+
version_requirements: !ruby/object:Gem::Version::Requirement
|
85
|
+
requirements:
|
86
|
+
- - ">="
|
87
|
+
- !ruby/object:Gem::Version
|
88
|
+
version: 0.7.1
|
89
|
+
version:
|
90
|
+
- !ruby/object:Gem::Dependency
|
91
|
+
name: activesupport
|
92
|
+
version_requirement:
|
93
|
+
version_requirements: !ruby/object:Gem::Version::Requirement
|
94
|
+
requirements:
|
95
|
+
- - ">="
|
96
|
+
- !ruby/object:Gem::Version
|
97
|
+
version: 1.3.1.5618
|
98
|
+
version:
|
99
|
+
- !ruby/object:Gem::Dependency
|
100
|
+
name: activerecord
|
101
|
+
version_requirement:
|
102
|
+
version_requirements: !ruby/object:Gem::Version::Requirement
|
103
|
+
requirements:
|
104
|
+
- - ">="
|
105
|
+
- !ruby/object:Gem::Version
|
106
|
+
version: 1.14.4.5618
|
107
|
+
version:
|
108
|
+
- !ruby/object:Gem::Dependency
|
109
|
+
name: fastercsv
|
110
|
+
version_requirement:
|
111
|
+
version_requirements: !ruby/object:Gem::Version::Requirement
|
112
|
+
requirements:
|
113
|
+
- - ">="
|
114
|
+
- !ruby/object:Gem::Version
|
115
|
+
version: 1.0.0
|
116
|
+
version:
|