simple_etl 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +20 -0
- data/.rspec +1 -0
- data/Gemfile +14 -0
- data/Guardfile +24 -0
- data/README.md +117 -0
- data/Rakefile +11 -0
- data/lib/simple_etl/source/base.rb +72 -0
- data/lib/simple_etl/source/base_context.rb +47 -0
- data/lib/simple_etl/source/errorable.rb +7 -0
- data/lib/simple_etl/source/exceptions.rb +11 -0
- data/lib/simple_etl/source/field_caster.rb +51 -0
- data/lib/simple_etl/source/fixed_width/context.rb +19 -0
- data/lib/simple_etl/source/fixed_width/parser.rb +19 -0
- data/lib/simple_etl/source/parse_result.rb +28 -0
- data/lib/simple_etl/source/row.rb +21 -0
- data/lib/simple_etl/source.rb +29 -0
- data/lib/simple_etl/version.rb +3 -0
- data/lib/simple_etl.rb +7 -0
- data/simple_etl.gemspec +26 -0
- data/spec/fixtures/sample.stl +3 -0
- data/spec/lib/simple_etl/source/base_context_spec.rb +57 -0
- data/spec/lib/simple_etl/source/base_spec.rb +108 -0
- data/spec/lib/simple_etl/source/field_caster_spec.rb +67 -0
- data/spec/lib/simple_etl/source/fixed_width/context_spec.rb +48 -0
- data/spec/lib/simple_etl/source/fixed_width/parser_spec.rb +25 -0
- data/spec/lib/simple_etl/source/parse_result_spec.rb +18 -0
- data/spec/lib/simple_etl/source/row_spec.rb +12 -0
- data/spec/lib/simple_etl/source_spec.rb +33 -0
- data/spec/spec_helper.rb +12 -0
- metadata +172 -0
data/.gitignore
ADDED
data/.rspec
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--colour
|
data/Gemfile
ADDED
data/Guardfile
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
# A sample Guardfile
|
2
|
+
# More info at https://github.com/guard/guard#readme
|
3
|
+
|
4
|
+
guard 'rspec', :version => 2 do
|
5
|
+
watch(%r{^spec/.+_spec\.rb$})
|
6
|
+
watch(%r{^lib/(.+)\.rb$}) { |m| "spec/lib/#{m[1]}_spec.rb" }
|
7
|
+
watch('spec/spec_helper.rb') { "spec" }
|
8
|
+
|
9
|
+
# Rails example
|
10
|
+
watch(%r{^app/(.+)\.rb$}) { |m| "spec/#{m[1]}_spec.rb" }
|
11
|
+
watch(%r{^app/(.*)(\.erb|\.haml)$}) { |m| "spec/#{m[1]}#{m[2]}_spec.rb" }
|
12
|
+
watch(%r{^app/controllers/(.+)_(controller)\.rb$}) { |m| ["spec/routing/#{m[1]}_routing_spec.rb", "spec/#{m[2]}s/#{m[1]}_#{m[2]}_spec.rb", "spec/acceptance/#{m[1]}_spec.rb"] }
|
13
|
+
watch(%r{^spec/support/(.+)\.rb$}) { "spec" }
|
14
|
+
watch('config/routes.rb') { "spec/routing" }
|
15
|
+
watch('app/controllers/application_controller.rb') { "spec/controllers" }
|
16
|
+
|
17
|
+
# Capybara request specs
|
18
|
+
watch(%r{^app/views/(.+)/.*\.(erb|haml)$}) { |m| "spec/requests/#{m[1]}_spec.rb" }
|
19
|
+
|
20
|
+
# Turnip features and steps
|
21
|
+
watch(%r{^spec/acceptance/(.+)\.feature$})
|
22
|
+
watch(%r{^spec/acceptance/steps/(.+)_steps\.rb$}) { |m| Dir[File.join("**/#{m[1]}.feature")][0] || 'spec/acceptance' }
|
23
|
+
end
|
24
|
+
|
data/README.md
ADDED
@@ -0,0 +1,117 @@
|
|
1
|
+
simple_etl
|
2
|
+
==========
|
3
|
+
|
4
|
+
An easy-to-use toolkit to help you with ETL (Extract Transform Load) operations.
|
5
|
+
|
6
|
+
Simple ETL 'would be' (:D) framework-agnostic and easy to use.
|
7
|
+
|
8
|
+
|
9
|
+
## Source
|
10
|
+
|
11
|
+
Source namespace is responsible of input files parsing.
|
12
|
+
|
13
|
+
First of all you have to define a "source template" inside a definition file (for example _my_template.stl_):
|
14
|
+
|
15
|
+
```ruby
|
16
|
+
define :format_name do
|
17
|
+
field :name
|
18
|
+
field :surname
|
19
|
+
end
|
20
|
+
```
|
21
|
+
|
22
|
+
Then you will load the template with the following code:
|
23
|
+
|
24
|
+
```ruby
|
25
|
+
my_template = SimpleEtl::Source.load './etl/my_template.stl'
|
26
|
+
```
|
27
|
+
|
28
|
+
At this point you can parse a source and process the result as with the following code:
|
29
|
+
|
30
|
+
```ruby
|
31
|
+
my_template.parse '....', :type => :inline # load data inline
|
32
|
+
result = my_template.parse 'source.dat' # load from file
|
33
|
+
|
34
|
+
if result.valid?
|
35
|
+
result.rows.each do |row|
|
36
|
+
puts "|\t#{row.name}\t|\t#{row.surname}\t|"
|
37
|
+
end
|
38
|
+
puts "Parse Completed!"
|
39
|
+
else
|
40
|
+
result.errors.each do |error|
|
41
|
+
puts "Error while parsing line #{error.row_index}: #{error.message}"
|
42
|
+
end
|
43
|
+
end
|
44
|
+
```
|
45
|
+
|
46
|
+
As you can see the result is valid if there are no errors.
|
47
|
+
|
48
|
+
The rows array contains all the parsed rows. Each row contains the parsed attributes as accessors.
|
49
|
+
|
50
|
+
The errors array contains all the generated errors. Each error is an object with 'row_index', 'message' and 'exception' properties.
|
51
|
+
|
52
|
+
## Structure of the template definition
|
53
|
+
|
54
|
+
A template definition is composed by three layers:
|
55
|
+
- raw fields
|
56
|
+
- transformations
|
57
|
+
- generators
|
58
|
+
|
59
|
+
### Fields
|
60
|
+
|
61
|
+
```ruby
|
62
|
+
field :name
|
63
|
+
field :surname, :type => :string, :required => true
|
64
|
+
```
|
65
|
+
|
66
|
+
By default type is 'object'. It means it's not converted in any format. Other possible types are:
|
67
|
+
|
68
|
+
- *string*: field is stripped by extra spaces;
|
69
|
+
|
70
|
+
- *integer*: field is stripped. If the input value is nil or empty, nil is returned; it's converted in integer if the value contains numbers; a CastError is raised otherwise;
|
71
|
+
|
72
|
+
- *float*: field is stripped. If the input value is nil or empty, nil is returned; it's converted in float if the value contains numbers; a CastError is raised otherwise;
|
73
|
+
|
74
|
+
- *boolean* field is stripped. If the input value is nil or empty, nil is returned; it's converted in boolean if the input value is true,false,1,0; a CastError is raised otherwise;
|
75
|
+
|
76
|
+
The template definition will provide you an helper for each defined type. So you can write:
|
77
|
+
|
78
|
+
```ruby
|
79
|
+
string :name
|
80
|
+
integer :age
|
81
|
+
```
|
82
|
+
|
83
|
+
For each helper, an additional 'required' helper will also be available:
|
84
|
+
|
85
|
+
```ruby
|
86
|
+
required_string :name
|
87
|
+
required_integer :age
|
88
|
+
```
|
89
|
+
|
90
|
+
Remember: *every format plugin will define its own field syntax, so remember to read the [Wiki](https://github.com/nicolaracco/simple_etl/wiki)*
|
91
|
+
|
92
|
+
|
93
|
+
### Transformers and generators
|
94
|
+
|
95
|
+
They are functions that help you manipulate the parsed raw data:
|
96
|
+
|
97
|
+
```ruby
|
98
|
+
transform :name { |name| name.downcase } # => name field is transformed in downcase
|
99
|
+
|
100
|
+
# a full_name field will be present in the row
|
101
|
+
generate :full_name do |row|
|
102
|
+
"#{row.name} #{row.surname}"
|
103
|
+
end
|
104
|
+
|
105
|
+
generate :company do |row|
|
106
|
+
if cmp = Company.find(row.company_id)
|
107
|
+
cmp
|
108
|
+
else
|
109
|
+
raise ParseError.new "Cannot find a company with id #{row.company_id}"
|
110
|
+
end
|
111
|
+
end
|
112
|
+
```
|
113
|
+
|
114
|
+
A transformer is a code block that transform a particular value. It's executed as soon as the input value is parsed (if it's valid).
|
115
|
+
|
116
|
+
A generator is a code block that generates a new property for the current row.
|
117
|
+
All the generators are executed when the entire row as been read and transformed.
|
data/Rakefile
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
require 'bundler/gem_tasks'
|
2
|
+
require 'rspec/core/rake_task'
|
3
|
+
|
4
|
+
desc "Run those specs"
|
5
|
+
task :spec do
|
6
|
+
RSpec::Core::RakeTask.new(:spec) do |t|
|
7
|
+
t.rspec_opts = %w{--colour --format progress}
|
8
|
+
t.pattern = 'spec/**/*_spec.rb'
|
9
|
+
t.rspec_path = 'bundle exec rspec'
|
10
|
+
end
|
11
|
+
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
module SimpleEtl
|
2
|
+
module Source
|
3
|
+
class Base
|
4
|
+
attr_reader :errors
|
5
|
+
|
6
|
+
attr_reader :context
|
7
|
+
|
8
|
+
def initialize context, &block
|
9
|
+
@errors = []
|
10
|
+
@context = context
|
11
|
+
context.send :instance_eval, &block if block
|
12
|
+
end
|
13
|
+
|
14
|
+
def fetch_field_from_row row, field
|
15
|
+
raise 'Abstract Method'
|
16
|
+
end
|
17
|
+
|
18
|
+
def parse_row row, args = {}
|
19
|
+
row_index = args[:row_index]
|
20
|
+
result = args[:result] || ParseResult.new
|
21
|
+
row_obj = Row.new
|
22
|
+
context.fields.each do |field|
|
23
|
+
begin
|
24
|
+
row_obj.attributes[field[:name]] = parse_field row, field, row_obj
|
25
|
+
rescue SimpleEtl::Source::ParseError
|
26
|
+
row_info = row_index && "row #{row_index}" || ''
|
27
|
+
result.append_error row_index, "Error parsing #{row_info}, column #{field[:name]}: #{$!.message}", $!
|
28
|
+
end
|
29
|
+
end
|
30
|
+
if result.valid?
|
31
|
+
context.generators.each do |field|
|
32
|
+
begin
|
33
|
+
row_obj.attributes[field[:name]] = generate_field field, row_obj
|
34
|
+
rescue SimpleEtl::Source::ParseError
|
35
|
+
row_info = row_index && "for row #{row_index}" || ''
|
36
|
+
result.append_error row_index, "Error generating #{field[:name]} #{row_info}: #{$!.message}", $!
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
result.rows << row_obj
|
41
|
+
result
|
42
|
+
end
|
43
|
+
|
44
|
+
def parse src, args = {}
|
45
|
+
result = args[:result] || ParseResult.new
|
46
|
+
if args[:type] == :inline
|
47
|
+
lines = src.lines.map &:chomp
|
48
|
+
else
|
49
|
+
lines = File.readlines(src).map &:chomp
|
50
|
+
end
|
51
|
+
lines.each_with_index do |row, index|
|
52
|
+
parse_row row, :row_index => index, :result => result
|
53
|
+
end
|
54
|
+
result
|
55
|
+
end
|
56
|
+
|
57
|
+
def parse_field row, field, row_obj
|
58
|
+
value = FieldCaster.send "parse_#{field[:type]}", fetch_field_from_row(row, field)
|
59
|
+
raise FieldRequiredError if field[:required] &&
|
60
|
+
(value.nil? || value == '')
|
61
|
+
if transformer = context.transformations[field[:name]]
|
62
|
+
value = row_obj.instance_exec value, &transformer
|
63
|
+
end
|
64
|
+
value
|
65
|
+
end
|
66
|
+
|
67
|
+
def generate_field field, row_obj
|
68
|
+
row_obj.instance_exec &field[:block]
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
module SimpleEtl
|
2
|
+
module Source
|
3
|
+
class BaseContext
|
4
|
+
attr_reader :fields
|
5
|
+
attr_reader :transformations
|
6
|
+
attr_reader :generators
|
7
|
+
|
8
|
+
def initialize
|
9
|
+
@fields = []
|
10
|
+
@transformations = {}
|
11
|
+
@generators = []
|
12
|
+
end
|
13
|
+
|
14
|
+
def field name, args = {}
|
15
|
+
args = {:required => false, :type => :object}.merge args
|
16
|
+
unless FieldCaster.respond_to? "parse_#{args[:type]}"
|
17
|
+
raise FieldArgumentError.new "#{name}:type (#{args[:type]}) is unknown"
|
18
|
+
end
|
19
|
+
fields << { :name => name }.merge(args)
|
20
|
+
end
|
21
|
+
|
22
|
+
def transform field, &block
|
23
|
+
field = field.to_sym
|
24
|
+
raise FieldNotFoundError.new(field) unless
|
25
|
+
fields.detect { |d| d[:name] == field }
|
26
|
+
transformations[field.to_sym] = block
|
27
|
+
end
|
28
|
+
|
29
|
+
def generate name, args = {}, &block
|
30
|
+
generators << args.merge(:name => name, :block => block)
|
31
|
+
end
|
32
|
+
|
33
|
+
def method_missing name, *params, &block
|
34
|
+
md = name.to_s.match /^(required_)?(\w+)$/
|
35
|
+
type = md && md[2].to_sym
|
36
|
+
if type && FieldCaster.respond_to?("parse_#{type}")
|
37
|
+
params << {} unless params.last.is_a? Hash
|
38
|
+
params.last[:type] = type
|
39
|
+
params.last[:required] = true if md[1]
|
40
|
+
field *params
|
41
|
+
else
|
42
|
+
super
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,11 @@
|
|
1
|
+
module SimpleEtl
|
2
|
+
module Source
|
3
|
+
class Error < Exception; end
|
4
|
+
class ParseError < Error; end
|
5
|
+
|
6
|
+
class FieldNotFoundError < Error; end
|
7
|
+
class FieldArgumentError < ParseError; end
|
8
|
+
class FieldRequiredError < ParseError; end
|
9
|
+
class CastError < ParseError; end
|
10
|
+
end
|
11
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
module SimpleEtl
|
2
|
+
module Source
|
3
|
+
module FieldCaster
|
4
|
+
extend self
|
5
|
+
|
6
|
+
def parse_object o; o; end
|
7
|
+
|
8
|
+
def parse_string o
|
9
|
+
o && o.strip
|
10
|
+
end
|
11
|
+
|
12
|
+
def parse_boolean o
|
13
|
+
if o.nil? || o =~ /^\s*$/
|
14
|
+
nil
|
15
|
+
else
|
16
|
+
if %w(true 1).include? o.strip
|
17
|
+
true
|
18
|
+
elsif %w(false 0).include? o.strip
|
19
|
+
false
|
20
|
+
else
|
21
|
+
raise(CastError.new "Cannot cast '#{o}' to 'boolean'")
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def parse_integer o
|
27
|
+
if o.nil? || o =~ /^\s*$/
|
28
|
+
nil
|
29
|
+
else
|
30
|
+
if o =~ /^\s*\d+\s*$/
|
31
|
+
o.to_i
|
32
|
+
else
|
33
|
+
raise(CastError.new "Cannot cast '#{o}' to 'integer'")
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def parse_float o
|
39
|
+
if o.nil? || o =~ /^\s*$/
|
40
|
+
nil
|
41
|
+
else
|
42
|
+
if o =~ /^\s*\d*([\.\,]\d+)?\s*$/
|
43
|
+
o.gsub(/\,/, '.').to_f
|
44
|
+
else
|
45
|
+
raise(CastError.new "Cannot cast '#{o}' to 'float'")
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module SimpleEtl
|
2
|
+
module Source
|
3
|
+
module FixedWidth
|
4
|
+
class Context < SimpleEtl::Source::BaseContext
|
5
|
+
def field name, start, length, args = {}
|
6
|
+
raise(FieldArgumentError.new "#{name}::start required") unless start
|
7
|
+
raise(FieldArgumentError.new "#{name}::length required") unless length
|
8
|
+
start = Integer(start) rescue
|
9
|
+
raise(FieldArgumentError.new "#{name}::start (#{start}) is not integer")
|
10
|
+
if length != :eol
|
11
|
+
length = Integer(length) rescue
|
12
|
+
raise(FieldArgumentError.new "#{name}::length (#{length}) is not integer")
|
13
|
+
end
|
14
|
+
super name, { :start => start, :length => length }.merge(args)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module SimpleEtl
|
2
|
+
module Source
|
3
|
+
module FixedWidth
|
4
|
+
class Parser < SimpleEtl::Source::Base
|
5
|
+
def initialize &block
|
6
|
+
super Context.new, &block
|
7
|
+
end
|
8
|
+
|
9
|
+
def fetch_field_from_row row, field
|
10
|
+
length = field[:length]
|
11
|
+
length = row.length - field[:start] if length == :eol
|
12
|
+
row[field[:start], length]
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
formats[:fixed_width] = FixedWidth::Parser
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'ostruct'
|
2
|
+
|
3
|
+
module SimpleEtl
|
4
|
+
module Source
|
5
|
+
class ParseResult
|
6
|
+
attr_reader :errors, :rows
|
7
|
+
|
8
|
+
def initialize
|
9
|
+
@errors = []
|
10
|
+
@rows = []
|
11
|
+
end
|
12
|
+
|
13
|
+
def valid?; @errors.empty?; end
|
14
|
+
|
15
|
+
def append_row attributes
|
16
|
+
@rows << Row.new(attributes)
|
17
|
+
end
|
18
|
+
|
19
|
+
def append_error row_index, message, e
|
20
|
+
@errors << OpenStruct.new({
|
21
|
+
:row_index => row_index,
|
22
|
+
:message => message,
|
23
|
+
:exception => e
|
24
|
+
})
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module SimpleEtl
|
2
|
+
module Source
|
3
|
+
class Row
|
4
|
+
attr_reader :attributes
|
5
|
+
|
6
|
+
def initialize attributes = {}
|
7
|
+
@attributes = attributes
|
8
|
+
end
|
9
|
+
|
10
|
+
def method_missing name, *args, &block
|
11
|
+
md = name.to_s.match /^(\w+)(=)?$/
|
12
|
+
if md && attributes.has_key?(md[1].to_sym)
|
13
|
+
field = md[1].to_sym
|
14
|
+
md[2] && (attributes[field] = args.first) || attributes[field]
|
15
|
+
else
|
16
|
+
super
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module SimpleEtl
|
2
|
+
module Source
|
3
|
+
extend self
|
4
|
+
|
5
|
+
@@formats = {}
|
6
|
+
|
7
|
+
def formats; @@formats; end
|
8
|
+
|
9
|
+
def define format, &block
|
10
|
+
format_class = formats[format]
|
11
|
+
raise "Format #{format} not found" unless format_class
|
12
|
+
format_class.new &block
|
13
|
+
end
|
14
|
+
|
15
|
+
def load file
|
16
|
+
raise "Cannot find file" unless File.exist? file
|
17
|
+
module_eval File.read file
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
dir = File.expand_path File.dirname __FILE__
|
23
|
+
%w(exceptions field_caster row parse_result base_context base).each do |file|
|
24
|
+
require File.join dir, "source/#{file}"
|
25
|
+
end
|
26
|
+
|
27
|
+
%w(context parser).each do |file|
|
28
|
+
require File.join dir, "source/fixed_width/#{file}"
|
29
|
+
end
|
data/lib/simple_etl.rb
ADDED
data/simple_etl.gemspec
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "simple_etl/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "simple_etl"
|
7
|
+
s.version = SimpleEtl::VERSION
|
8
|
+
s.authors = ["Nicola Racco"]
|
9
|
+
s.email = ["nicola@nicolaracco.com"]
|
10
|
+
s.homepage = ""
|
11
|
+
s.summary = %q{An easy-to-use toolkit to help you with ETL (Extract Transform Load) operations}
|
12
|
+
s.description = %q{An easy-to-use toolkit to help you with ETL (Extract Transform Load) operations. Simple ETL 'would be' (:D) framework-agnostic and easy to use.}
|
13
|
+
|
14
|
+
s.rubyforge_project = "simple_etl"
|
15
|
+
|
16
|
+
s.files = `git ls-files`.split("\n")
|
17
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
18
|
+
#s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
19
|
+
s.require_paths = ["lib"]
|
20
|
+
|
21
|
+
s.add_development_dependency 'rake'
|
22
|
+
s.add_development_dependency 'guard'
|
23
|
+
s.add_development_dependency 'growl'
|
24
|
+
s.add_development_dependency 'guard-rspec'
|
25
|
+
s.add_development_dependency 'rspec'
|
26
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module SimpleEtl
|
4
|
+
module Source
|
5
|
+
describe BaseContext do
|
6
|
+
subject { BaseContext.new }
|
7
|
+
|
8
|
+
describe '#field' do
|
9
|
+
it 'should append the field definition in the fields list' do
|
10
|
+
subject.field :name
|
11
|
+
subject.fields.should =~ [{:name => :name, :required => false, :type => :object}]
|
12
|
+
end
|
13
|
+
|
14
|
+
it 'should raise error if type is present and unknown' do
|
15
|
+
expect{
|
16
|
+
subject.field :name, :type => 'pippo'
|
17
|
+
}.to raise_error FieldArgumentError
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
describe 'field helpers' do
|
22
|
+
it 'should have an helper for each caster' do
|
23
|
+
subject.string :name
|
24
|
+
subject.fields.should =~ [{:name => :name, :type => :string, :required => false}]
|
25
|
+
end
|
26
|
+
|
27
|
+
it 'should have a required helper for each caster' do
|
28
|
+
subject.required_string :name
|
29
|
+
subject.fields.should =~ [{:name => :name, :type => :string, :required => true}]
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
describe 'transform' do
|
34
|
+
it 'should append to transformations' do
|
35
|
+
subject.field :name
|
36
|
+
expect {
|
37
|
+
subject.transform :name do; end
|
38
|
+
}.to change(subject.transformations, :size).by 1
|
39
|
+
end
|
40
|
+
|
41
|
+
it 'should raise error if field is not specified' do
|
42
|
+
expect {
|
43
|
+
subject.transform :name do; end
|
44
|
+
}.to raise_error FieldNotFoundError
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
describe 'generate' do
|
49
|
+
it 'should append to generators' do
|
50
|
+
expect {
|
51
|
+
subject.generate :name do; end
|
52
|
+
}.to change(subject.generators, :size).by 1
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,108 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module SimpleEtl
|
4
|
+
module Source
|
5
|
+
describe Base do
|
6
|
+
subject { Base.new BaseContext.new }
|
7
|
+
|
8
|
+
describe '#parse_field' do
|
9
|
+
before do
|
10
|
+
subject.stub :fetch_field_from_row do |row, field|
|
11
|
+
row
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
it 'should socks at brush' do
|
16
|
+
expect {
|
17
|
+
subject.parse_field 'ITM', { :name => 'IT', :type => :object }, Row.new
|
18
|
+
}.to_not raise_error
|
19
|
+
end
|
20
|
+
|
21
|
+
context 'when there is a transformation' do
|
22
|
+
it 'should apply it' do
|
23
|
+
subject.context.field :foo
|
24
|
+
subject.context.transform :foo do |s|
|
25
|
+
"pippo"
|
26
|
+
end
|
27
|
+
subject.parse_field('ITM', { :name => :foo, :type => :object }, Row.new).should == "pippo"
|
28
|
+
end
|
29
|
+
|
30
|
+
it 'should set the context to the current row object' do
|
31
|
+
context = subject.context
|
32
|
+
context.field :foo
|
33
|
+
context.transform(:foo) { |s| self.class.should == Row }
|
34
|
+
subject.parse_field 'ITM', { :name => :foo, :type => :object }, Row.new
|
35
|
+
end
|
36
|
+
|
37
|
+
it 'passes the field value as argument' do
|
38
|
+
subject.stub(:fetch_field_from_row) { |row, field| 'pippo' }
|
39
|
+
subject.context.field :foo
|
40
|
+
subject.context.transform(:foo) { |s| s.should == 'pippo' }
|
41
|
+
subject.parse_field 'ITM', { :name => :foo, :type => :object }, Row.new
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
describe '#generate_field' do
|
47
|
+
let(:r) { Row.new :foo => 'w' }
|
48
|
+
|
49
|
+
it 'should set the context to the current row' do
|
50
|
+
block = lambda { self.class.should == Row }
|
51
|
+
subject.generate_field({ :name => :mio, :block => block }, r)
|
52
|
+
end
|
53
|
+
|
54
|
+
it 'should socks at brush' do
|
55
|
+
subject.generate_field({ :name => :mio, :block => lambda { foo } }, r).should == r.foo
|
56
|
+
end
|
57
|
+
|
58
|
+
it 'can raise ParseError without specifing path' do
|
59
|
+
block = lambda { raise ParseError }
|
60
|
+
expect {
|
61
|
+
subject.generate_field({ :name => :mio, :block => block }, r)
|
62
|
+
}.to raise_error ParseError
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
describe '#parse_row' do
|
67
|
+
before do
|
68
|
+
subject.stub :fetch_field_from_row do |row, field|
|
69
|
+
row
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
it 'should return a ParseResult object' do
|
74
|
+
subject.parse_row('').should be_kind_of ParseResult
|
75
|
+
end
|
76
|
+
|
77
|
+
it 'should insert attributes in row' do
|
78
|
+
subject.context.field :sample
|
79
|
+
subject.parse_row('ITM').rows.first.sample.should == 'ITM'
|
80
|
+
end
|
81
|
+
|
82
|
+
context 'when row is not valid' do
|
83
|
+
it 'should not execute generators' do
|
84
|
+
subject.stub :fetch_field_from_row do |row, field|
|
85
|
+
raise ParseError
|
86
|
+
end
|
87
|
+
subject.context.field :sample
|
88
|
+
subject.context.generate :foo do |s|
|
89
|
+
"pippo"
|
90
|
+
end
|
91
|
+
subject.parse_row('ITM').public_methods.should_not include :foo
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
context 'when row is valid' do
|
96
|
+
it 'launches the generator giving the row instance' do
|
97
|
+
subject.context.field :sample
|
98
|
+
# it will raise error if s not have sample
|
99
|
+
subject.context.generate :foo do
|
100
|
+
sample
|
101
|
+
end
|
102
|
+
expect { subject.parse_row('ITM') }.to_not raise_error
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module SimpleEtl
|
4
|
+
module Source
|
5
|
+
describe FieldCaster do
|
6
|
+
subject { FieldCaster }
|
7
|
+
|
8
|
+
describe '::parse_integer' do
|
9
|
+
it 'returns nil if input is blank' do
|
10
|
+
subject.parse_integer(nil).should be_nil
|
11
|
+
subject.parse_integer('').should be_nil
|
12
|
+
end
|
13
|
+
|
14
|
+
it 'returns a number if input has only digits' do
|
15
|
+
subject.parse_integer('43').should == 43
|
16
|
+
subject.parse_integer('00048').should == 48
|
17
|
+
end
|
18
|
+
|
19
|
+
it 'automatically strips spaces' do
|
20
|
+
subject.parse_integer(' 043 ').should == 43
|
21
|
+
end
|
22
|
+
|
23
|
+
it 'returns error with chars' do
|
24
|
+
expect {
|
25
|
+
subject.parse_integer 'a'
|
26
|
+
}.to raise_error CastError
|
27
|
+
end
|
28
|
+
|
29
|
+
it 'returns error with commas' do
|
30
|
+
expect {
|
31
|
+
subject.parse_integer '1.2'
|
32
|
+
}.to raise_error CastError
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
describe '::parse_float' do
|
37
|
+
it 'returns nil if input is blank' do
|
38
|
+
subject.parse_float(nil).should be_nil
|
39
|
+
subject.parse_float('').should be_nil
|
40
|
+
end
|
41
|
+
|
42
|
+
it 'returns a number if input has only digits' do
|
43
|
+
subject.parse_float('43').should == 43.0
|
44
|
+
subject.parse_float('00048').should == 48.0
|
45
|
+
end
|
46
|
+
|
47
|
+
it 'returns a number if input has also ONE separator' do
|
48
|
+
subject.parse_float('43.1').should == 43.1
|
49
|
+
subject.parse_float('43,1').should == 43.1
|
50
|
+
expect {
|
51
|
+
subject.parse_float '43.2.1'
|
52
|
+
}.to raise_error CastError
|
53
|
+
end
|
54
|
+
|
55
|
+
it 'automatically strips spaces' do
|
56
|
+
subject.parse_integer(' 043 ').should == 43
|
57
|
+
end
|
58
|
+
|
59
|
+
it 'returns error in every other situation' do
|
60
|
+
expect {
|
61
|
+
subject.parse_integer 'a'
|
62
|
+
}.to raise_error CastError
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module SimpleEtl
|
4
|
+
module Source
|
5
|
+
module FixedWidth
|
6
|
+
describe Context do
|
7
|
+
subject { FixedWidth::Parser.new.context }
|
8
|
+
|
9
|
+
describe '#field' do
|
10
|
+
it 'should append the field definition in the fields list' do
|
11
|
+
subject.field :name, 10, 20
|
12
|
+
subject.fields.should =~ [{:name => :name, :start => 10, :length => 20, :required => false, :type => :object}]
|
13
|
+
end
|
14
|
+
|
15
|
+
it 'should raise error if start arg is missing' do
|
16
|
+
expect{
|
17
|
+
subject.field :name, nil, 10
|
18
|
+
}.to raise_error FieldArgumentError
|
19
|
+
end
|
20
|
+
|
21
|
+
it 'should raise error if length arg is missing' do
|
22
|
+
expect{
|
23
|
+
subject.field :name, 10, nil
|
24
|
+
}.to raise_error FieldArgumentError
|
25
|
+
end
|
26
|
+
|
27
|
+
it 'should raise error if start is not an integer' do
|
28
|
+
expect {
|
29
|
+
subject.field :name, 'pippo', 20
|
30
|
+
}.to raise_error FieldArgumentError
|
31
|
+
end
|
32
|
+
|
33
|
+
it 'should raise error if length is not an integer' do
|
34
|
+
expect {
|
35
|
+
subject.field :name, 5, 'pippo'
|
36
|
+
}.to raise_error FieldArgumentError
|
37
|
+
end
|
38
|
+
|
39
|
+
it 'should accept the special :eol length' do
|
40
|
+
expect {
|
41
|
+
subject.field :name, 5, :eol
|
42
|
+
}.to_not raise_error
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module SimpleEtl
|
4
|
+
module Source
|
5
|
+
module FixedWidth
|
6
|
+
describe Parser do
|
7
|
+
describe '::new' do
|
8
|
+
it 'should execute the block in the context' do
|
9
|
+
FixedWidth::Parser.new { self.class.should == FixedWidth::Context }
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
describe '#fetch_field_from_row' do
|
14
|
+
it 'should use field attributes to fetch a field' do
|
15
|
+
subject.fetch_field_from_row('ITM', { :start => 0, :length => 2 }).should == 'IT'
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'should fetch till the end of line if length is :eol' do
|
19
|
+
subject.fetch_field_from_row('ITM', { :start => 1, :length => :eol }).should == 'TM'
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module SimpleEtl
|
4
|
+
module Source
|
5
|
+
describe ParseResult do
|
6
|
+
describe '#valid?' do
|
7
|
+
it 'is true if there are no errors' do
|
8
|
+
subject.should be_valid
|
9
|
+
end
|
10
|
+
|
11
|
+
it 'is false if there are errors' do
|
12
|
+
subject.append_error 0, 'm', Exception.new
|
13
|
+
subject.should_not be_valid
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module SimpleEtl
|
4
|
+
describe Source do
|
5
|
+
describe '::define' do
|
6
|
+
it 'should return a new template' do
|
7
|
+
tpl = SimpleEtl::Source.define :fixed_width do
|
8
|
+
integer :name, 10, 12
|
9
|
+
end
|
10
|
+
tpl.context.fields.size.should == 1
|
11
|
+
end
|
12
|
+
|
13
|
+
it 'should raise error if format not exist' do
|
14
|
+
expect {
|
15
|
+
SimpleEtl::Source.define :piipo do; end
|
16
|
+
}.to raise_error
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
describe '::load' do
|
21
|
+
it 'should raise an error if file not exist' do
|
22
|
+
expect {
|
23
|
+
SimpleEtl::Source.load 'pappopappo'
|
24
|
+
}.to raise_error
|
25
|
+
end
|
26
|
+
|
27
|
+
it 'should load correctly' do
|
28
|
+
file = File.join FIXTURES_PATH, "sample.stl"
|
29
|
+
SimpleEtl::Source.load(file).should be_kind_of SimpleEtl::Source::Base
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
# Load the Sinatra app
|
2
|
+
require File.dirname(__FILE__) + '/../lib/simple_etl'
|
3
|
+
require 'rspec'
|
4
|
+
|
5
|
+
Dir[File.expand_path(File.join(File.dirname(__FILE__),'support','**','*.rb'))].each {|f| require f}
|
6
|
+
|
7
|
+
FIXTURES_PATH = File.expand_path File.join File.dirname(__FILE__), "fixtures"
|
8
|
+
|
9
|
+
RSpec.configure do |conf|
|
10
|
+
conf.before :suite do
|
11
|
+
end
|
12
|
+
end
|
metadata
ADDED
@@ -0,0 +1,172 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: simple_etl
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 29
|
5
|
+
prerelease:
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 0
|
9
|
+
- 1
|
10
|
+
version: 0.0.1
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Nicola Racco
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2012-07-03 00:00:00 Z
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
21
|
+
name: rake
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
none: false
|
25
|
+
requirements:
|
26
|
+
- - ">="
|
27
|
+
- !ruby/object:Gem::Version
|
28
|
+
hash: 3
|
29
|
+
segments:
|
30
|
+
- 0
|
31
|
+
version: "0"
|
32
|
+
type: :development
|
33
|
+
version_requirements: *id001
|
34
|
+
- !ruby/object:Gem::Dependency
|
35
|
+
name: guard
|
36
|
+
prerelease: false
|
37
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
38
|
+
none: false
|
39
|
+
requirements:
|
40
|
+
- - ">="
|
41
|
+
- !ruby/object:Gem::Version
|
42
|
+
hash: 3
|
43
|
+
segments:
|
44
|
+
- 0
|
45
|
+
version: "0"
|
46
|
+
type: :development
|
47
|
+
version_requirements: *id002
|
48
|
+
- !ruby/object:Gem::Dependency
|
49
|
+
name: growl
|
50
|
+
prerelease: false
|
51
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
52
|
+
none: false
|
53
|
+
requirements:
|
54
|
+
- - ">="
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
hash: 3
|
57
|
+
segments:
|
58
|
+
- 0
|
59
|
+
version: "0"
|
60
|
+
type: :development
|
61
|
+
version_requirements: *id003
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: guard-rspec
|
64
|
+
prerelease: false
|
65
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
66
|
+
none: false
|
67
|
+
requirements:
|
68
|
+
- - ">="
|
69
|
+
- !ruby/object:Gem::Version
|
70
|
+
hash: 3
|
71
|
+
segments:
|
72
|
+
- 0
|
73
|
+
version: "0"
|
74
|
+
type: :development
|
75
|
+
version_requirements: *id004
|
76
|
+
- !ruby/object:Gem::Dependency
|
77
|
+
name: rspec
|
78
|
+
prerelease: false
|
79
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
80
|
+
none: false
|
81
|
+
requirements:
|
82
|
+
- - ">="
|
83
|
+
- !ruby/object:Gem::Version
|
84
|
+
hash: 3
|
85
|
+
segments:
|
86
|
+
- 0
|
87
|
+
version: "0"
|
88
|
+
type: :development
|
89
|
+
version_requirements: *id005
|
90
|
+
description: An easy-to-use toolkit to help you with ETL (Extract Transform Load) operations. Simple ETL 'would be' (:D) framework-agnostic and easy to use.
|
91
|
+
email:
|
92
|
+
- nicola@nicolaracco.com
|
93
|
+
executables: []
|
94
|
+
|
95
|
+
extensions: []
|
96
|
+
|
97
|
+
extra_rdoc_files: []
|
98
|
+
|
99
|
+
files:
|
100
|
+
- .gitignore
|
101
|
+
- .rspec
|
102
|
+
- Gemfile
|
103
|
+
- Guardfile
|
104
|
+
- README.md
|
105
|
+
- Rakefile
|
106
|
+
- lib/simple_etl.rb
|
107
|
+
- lib/simple_etl/source.rb
|
108
|
+
- lib/simple_etl/source/base.rb
|
109
|
+
- lib/simple_etl/source/base_context.rb
|
110
|
+
- lib/simple_etl/source/errorable.rb
|
111
|
+
- lib/simple_etl/source/exceptions.rb
|
112
|
+
- lib/simple_etl/source/field_caster.rb
|
113
|
+
- lib/simple_etl/source/fixed_width/context.rb
|
114
|
+
- lib/simple_etl/source/fixed_width/parser.rb
|
115
|
+
- lib/simple_etl/source/parse_result.rb
|
116
|
+
- lib/simple_etl/source/row.rb
|
117
|
+
- lib/simple_etl/version.rb
|
118
|
+
- simple_etl.gemspec
|
119
|
+
- spec/fixtures/sample.stl
|
120
|
+
- spec/lib/simple_etl/source/base_context_spec.rb
|
121
|
+
- spec/lib/simple_etl/source/base_spec.rb
|
122
|
+
- spec/lib/simple_etl/source/field_caster_spec.rb
|
123
|
+
- spec/lib/simple_etl/source/fixed_width/context_spec.rb
|
124
|
+
- spec/lib/simple_etl/source/fixed_width/parser_spec.rb
|
125
|
+
- spec/lib/simple_etl/source/parse_result_spec.rb
|
126
|
+
- spec/lib/simple_etl/source/row_spec.rb
|
127
|
+
- spec/lib/simple_etl/source_spec.rb
|
128
|
+
- spec/spec_helper.rb
|
129
|
+
homepage: ""
|
130
|
+
licenses: []
|
131
|
+
|
132
|
+
post_install_message:
|
133
|
+
rdoc_options: []
|
134
|
+
|
135
|
+
require_paths:
|
136
|
+
- lib
|
137
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
138
|
+
none: false
|
139
|
+
requirements:
|
140
|
+
- - ">="
|
141
|
+
- !ruby/object:Gem::Version
|
142
|
+
hash: 3
|
143
|
+
segments:
|
144
|
+
- 0
|
145
|
+
version: "0"
|
146
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
147
|
+
none: false
|
148
|
+
requirements:
|
149
|
+
- - ">="
|
150
|
+
- !ruby/object:Gem::Version
|
151
|
+
hash: 3
|
152
|
+
segments:
|
153
|
+
- 0
|
154
|
+
version: "0"
|
155
|
+
requirements: []
|
156
|
+
|
157
|
+
rubyforge_project: simple_etl
|
158
|
+
rubygems_version: 1.8.23
|
159
|
+
signing_key:
|
160
|
+
specification_version: 3
|
161
|
+
summary: An easy-to-use toolkit to help you with ETL (Extract Transform Load) operations
|
162
|
+
test_files:
|
163
|
+
- spec/fixtures/sample.stl
|
164
|
+
- spec/lib/simple_etl/source/base_context_spec.rb
|
165
|
+
- spec/lib/simple_etl/source/base_spec.rb
|
166
|
+
- spec/lib/simple_etl/source/field_caster_spec.rb
|
167
|
+
- spec/lib/simple_etl/source/fixed_width/context_spec.rb
|
168
|
+
- spec/lib/simple_etl/source/fixed_width/parser_spec.rb
|
169
|
+
- spec/lib/simple_etl/source/parse_result_spec.rb
|
170
|
+
- spec/lib/simple_etl/source/row_spec.rb
|
171
|
+
- spec/lib/simple_etl/source_spec.rb
|
172
|
+
- spec/spec_helper.rb
|