simple_etl 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +20 -0
- data/.rspec +1 -0
- data/Gemfile +14 -0
- data/Guardfile +24 -0
- data/README.md +117 -0
- data/Rakefile +11 -0
- data/lib/simple_etl/source/base.rb +72 -0
- data/lib/simple_etl/source/base_context.rb +47 -0
- data/lib/simple_etl/source/errorable.rb +7 -0
- data/lib/simple_etl/source/exceptions.rb +11 -0
- data/lib/simple_etl/source/field_caster.rb +51 -0
- data/lib/simple_etl/source/fixed_width/context.rb +19 -0
- data/lib/simple_etl/source/fixed_width/parser.rb +19 -0
- data/lib/simple_etl/source/parse_result.rb +28 -0
- data/lib/simple_etl/source/row.rb +21 -0
- data/lib/simple_etl/source.rb +29 -0
- data/lib/simple_etl/version.rb +3 -0
- data/lib/simple_etl.rb +7 -0
- data/simple_etl.gemspec +26 -0
- data/spec/fixtures/sample.stl +3 -0
- data/spec/lib/simple_etl/source/base_context_spec.rb +57 -0
- data/spec/lib/simple_etl/source/base_spec.rb +108 -0
- data/spec/lib/simple_etl/source/field_caster_spec.rb +67 -0
- data/spec/lib/simple_etl/source/fixed_width/context_spec.rb +48 -0
- data/spec/lib/simple_etl/source/fixed_width/parser_spec.rb +25 -0
- data/spec/lib/simple_etl/source/parse_result_spec.rb +18 -0
- data/spec/lib/simple_etl/source/row_spec.rb +12 -0
- data/spec/lib/simple_etl/source_spec.rb +33 -0
- data/spec/spec_helper.rb +12 -0
- metadata +172 -0
data/.gitignore
ADDED
data/.rspec
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--colour
|
data/Gemfile
ADDED
data/Guardfile
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
# A sample Guardfile
|
2
|
+
# More info at https://github.com/guard/guard#readme
|
3
|
+
|
4
|
+
guard 'rspec', :version => 2 do
|
5
|
+
watch(%r{^spec/.+_spec\.rb$})
|
6
|
+
watch(%r{^lib/(.+)\.rb$}) { |m| "spec/lib/#{m[1]}_spec.rb" }
|
7
|
+
watch('spec/spec_helper.rb') { "spec" }
|
8
|
+
|
9
|
+
# Rails example
|
10
|
+
watch(%r{^app/(.+)\.rb$}) { |m| "spec/#{m[1]}_spec.rb" }
|
11
|
+
watch(%r{^app/(.*)(\.erb|\.haml)$}) { |m| "spec/#{m[1]}#{m[2]}_spec.rb" }
|
12
|
+
watch(%r{^app/controllers/(.+)_(controller)\.rb$}) { |m| ["spec/routing/#{m[1]}_routing_spec.rb", "spec/#{m[2]}s/#{m[1]}_#{m[2]}_spec.rb", "spec/acceptance/#{m[1]}_spec.rb"] }
|
13
|
+
watch(%r{^spec/support/(.+)\.rb$}) { "spec" }
|
14
|
+
watch('config/routes.rb') { "spec/routing" }
|
15
|
+
watch('app/controllers/application_controller.rb') { "spec/controllers" }
|
16
|
+
|
17
|
+
# Capybara request specs
|
18
|
+
watch(%r{^app/views/(.+)/.*\.(erb|haml)$}) { |m| "spec/requests/#{m[1]}_spec.rb" }
|
19
|
+
|
20
|
+
# Turnip features and steps
|
21
|
+
watch(%r{^spec/acceptance/(.+)\.feature$})
|
22
|
+
watch(%r{^spec/acceptance/steps/(.+)_steps\.rb$}) { |m| Dir[File.join("**/#{m[1]}.feature")][0] || 'spec/acceptance' }
|
23
|
+
end
|
24
|
+
|
data/README.md
ADDED
@@ -0,0 +1,117 @@
|
|
1
|
+
simple_etl
|
2
|
+
==========
|
3
|
+
|
4
|
+
An easy-to-use toolkit to help you with ETL (Extract Transform Load) operations.
|
5
|
+
|
6
|
+
Simple ETL 'would be' (:D) framework-agnostic and easy to use.
|
7
|
+
|
8
|
+
|
9
|
+
## Source
|
10
|
+
|
11
|
+
Source namespace is responsible of input files parsing.
|
12
|
+
|
13
|
+
First of all you have to define a "source template" inside a definition file (for example _my_template.stl_):
|
14
|
+
|
15
|
+
```ruby
|
16
|
+
define :format_name do
|
17
|
+
field :name
|
18
|
+
field :surname
|
19
|
+
end
|
20
|
+
```
|
21
|
+
|
22
|
+
Then you will load the template with the following code:
|
23
|
+
|
24
|
+
```ruby
|
25
|
+
my_template = SimpleEtl::Source.load './etl/my_template.stl'
|
26
|
+
```
|
27
|
+
|
28
|
+
At this point you can parse a source and process the result as with the following code:
|
29
|
+
|
30
|
+
```ruby
|
31
|
+
my_template.parse '....', :type => :inline # load data inline
|
32
|
+
result = my_template.parse 'source.dat' # load from file
|
33
|
+
|
34
|
+
if result.valid?
|
35
|
+
result.rows.each do |row|
|
36
|
+
puts "|\t#{row.name}\t|\t#{row.surname}\t|"
|
37
|
+
end
|
38
|
+
puts "Parse Completed!"
|
39
|
+
else
|
40
|
+
result.errors.each do |error|
|
41
|
+
puts "Error while parsing line #{error.row_index}: #{error.message}"
|
42
|
+
end
|
43
|
+
end
|
44
|
+
```
|
45
|
+
|
46
|
+
As you can see the result is valid if there are no errors.
|
47
|
+
|
48
|
+
The rows array contains all the parsed rows. Each row contains the parsed attributes as accessors.
|
49
|
+
|
50
|
+
The errors array contains all the generated errors. Each error is an object with 'row_index', 'message' and 'exception' properties.
|
51
|
+
|
52
|
+
## Structure of the template definition
|
53
|
+
|
54
|
+
A template definition is composed by three layers:
|
55
|
+
- raw fields
|
56
|
+
- transformations
|
57
|
+
- generators
|
58
|
+
|
59
|
+
### Fields
|
60
|
+
|
61
|
+
```ruby
|
62
|
+
field :name
|
63
|
+
field :surname, :type => :string, :required => true
|
64
|
+
```
|
65
|
+
|
66
|
+
By default type is 'object'. It means it's not converted in any format. Other possible types are:
|
67
|
+
|
68
|
+
- *string*: field is stripped by extra spaces;
|
69
|
+
|
70
|
+
- *integer*: field is stripped. If the input value is nil or empty, nil is returned; it's converted in integer if the value contains numbers; a CastError is raised otherwise;
|
71
|
+
|
72
|
+
- *float*: field is stripped. If the input value is nil or empty, nil is returned; it's converted in float if the value contains numbers; a CastError is raised otherwise;
|
73
|
+
|
74
|
+
- *boolean* field is stripped. If the input value is nil or empty, nil is returned; it's converted in boolean if the input value is true,false,1,0; a CastError is raised otherwise;
|
75
|
+
|
76
|
+
The template definition will provide you an helper for each defined type. So you can write:
|
77
|
+
|
78
|
+
```ruby
|
79
|
+
string :name
|
80
|
+
integer :age
|
81
|
+
```
|
82
|
+
|
83
|
+
For each helper, an additional 'required' helper will also be available:
|
84
|
+
|
85
|
+
```ruby
|
86
|
+
required_string :name
|
87
|
+
required_integer :age
|
88
|
+
```
|
89
|
+
|
90
|
+
Remember: *every format plugin will define its own field syntax, so remember to read the [Wiki](https://github.com/nicolaracco/simple_etl/wiki)*
|
91
|
+
|
92
|
+
|
93
|
+
### Transformers and generators
|
94
|
+
|
95
|
+
They are functions that help you manipulate the parsed raw data:
|
96
|
+
|
97
|
+
```ruby
|
98
|
+
transform :name { |name| name.downcase } # => name field is transformed in downcase
|
99
|
+
|
100
|
+
# a full_name field will be present in the row
|
101
|
+
generate :full_name do |row|
|
102
|
+
"#{row.name} #{row.surname}"
|
103
|
+
end
|
104
|
+
|
105
|
+
generate :company do |row|
|
106
|
+
if cmp = Company.find(row.company_id)
|
107
|
+
cmp
|
108
|
+
else
|
109
|
+
raise ParseError.new "Cannot find a company with id #{row.company_id}"
|
110
|
+
end
|
111
|
+
end
|
112
|
+
```
|
113
|
+
|
114
|
+
A transformer is a code block that transform a particular value. It's executed as soon as the input value is parsed (if it's valid).
|
115
|
+
|
116
|
+
A generator is a code block that generates a new property for the current row.
|
117
|
+
All the generators are executed when the entire row as been read and transformed.
|
data/Rakefile
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
require 'bundler/gem_tasks'
|
2
|
+
require 'rspec/core/rake_task'
|
3
|
+
|
4
|
+
desc "Run those specs"
|
5
|
+
task :spec do
|
6
|
+
RSpec::Core::RakeTask.new(:spec) do |t|
|
7
|
+
t.rspec_opts = %w{--colour --format progress}
|
8
|
+
t.pattern = 'spec/**/*_spec.rb'
|
9
|
+
t.rspec_path = 'bundle exec rspec'
|
10
|
+
end
|
11
|
+
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
module SimpleEtl
|
2
|
+
module Source
|
3
|
+
class Base
|
4
|
+
attr_reader :errors
|
5
|
+
|
6
|
+
attr_reader :context
|
7
|
+
|
8
|
+
def initialize context, &block
|
9
|
+
@errors = []
|
10
|
+
@context = context
|
11
|
+
context.send :instance_eval, &block if block
|
12
|
+
end
|
13
|
+
|
14
|
+
def fetch_field_from_row row, field
|
15
|
+
raise 'Abstract Method'
|
16
|
+
end
|
17
|
+
|
18
|
+
def parse_row row, args = {}
|
19
|
+
row_index = args[:row_index]
|
20
|
+
result = args[:result] || ParseResult.new
|
21
|
+
row_obj = Row.new
|
22
|
+
context.fields.each do |field|
|
23
|
+
begin
|
24
|
+
row_obj.attributes[field[:name]] = parse_field row, field, row_obj
|
25
|
+
rescue SimpleEtl::Source::ParseError
|
26
|
+
row_info = row_index && "row #{row_index}" || ''
|
27
|
+
result.append_error row_index, "Error parsing #{row_info}, column #{field[:name]}: #{$!.message}", $!
|
28
|
+
end
|
29
|
+
end
|
30
|
+
if result.valid?
|
31
|
+
context.generators.each do |field|
|
32
|
+
begin
|
33
|
+
row_obj.attributes[field[:name]] = generate_field field, row_obj
|
34
|
+
rescue SimpleEtl::Source::ParseError
|
35
|
+
row_info = row_index && "for row #{row_index}" || ''
|
36
|
+
result.append_error row_index, "Error generating #{field[:name]} #{row_info}: #{$!.message}", $!
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
result.rows << row_obj
|
41
|
+
result
|
42
|
+
end
|
43
|
+
|
44
|
+
def parse src, args = {}
|
45
|
+
result = args[:result] || ParseResult.new
|
46
|
+
if args[:type] == :inline
|
47
|
+
lines = src.lines.map &:chomp
|
48
|
+
else
|
49
|
+
lines = File.readlines(src).map &:chomp
|
50
|
+
end
|
51
|
+
lines.each_with_index do |row, index|
|
52
|
+
parse_row row, :row_index => index, :result => result
|
53
|
+
end
|
54
|
+
result
|
55
|
+
end
|
56
|
+
|
57
|
+
def parse_field row, field, row_obj
|
58
|
+
value = FieldCaster.send "parse_#{field[:type]}", fetch_field_from_row(row, field)
|
59
|
+
raise FieldRequiredError if field[:required] &&
|
60
|
+
(value.nil? || value == '')
|
61
|
+
if transformer = context.transformations[field[:name]]
|
62
|
+
value = row_obj.instance_exec value, &transformer
|
63
|
+
end
|
64
|
+
value
|
65
|
+
end
|
66
|
+
|
67
|
+
def generate_field field, row_obj
|
68
|
+
row_obj.instance_exec &field[:block]
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
module SimpleEtl
|
2
|
+
module Source
|
3
|
+
class BaseContext
|
4
|
+
attr_reader :fields
|
5
|
+
attr_reader :transformations
|
6
|
+
attr_reader :generators
|
7
|
+
|
8
|
+
def initialize
|
9
|
+
@fields = []
|
10
|
+
@transformations = {}
|
11
|
+
@generators = []
|
12
|
+
end
|
13
|
+
|
14
|
+
def field name, args = {}
|
15
|
+
args = {:required => false, :type => :object}.merge args
|
16
|
+
unless FieldCaster.respond_to? "parse_#{args[:type]}"
|
17
|
+
raise FieldArgumentError.new "#{name}:type (#{args[:type]}) is unknown"
|
18
|
+
end
|
19
|
+
fields << { :name => name }.merge(args)
|
20
|
+
end
|
21
|
+
|
22
|
+
def transform field, &block
|
23
|
+
field = field.to_sym
|
24
|
+
raise FieldNotFoundError.new(field) unless
|
25
|
+
fields.detect { |d| d[:name] == field }
|
26
|
+
transformations[field.to_sym] = block
|
27
|
+
end
|
28
|
+
|
29
|
+
def generate name, args = {}, &block
|
30
|
+
generators << args.merge(:name => name, :block => block)
|
31
|
+
end
|
32
|
+
|
33
|
+
def method_missing name, *params, &block
|
34
|
+
md = name.to_s.match /^(required_)?(\w+)$/
|
35
|
+
type = md && md[2].to_sym
|
36
|
+
if type && FieldCaster.respond_to?("parse_#{type}")
|
37
|
+
params << {} unless params.last.is_a? Hash
|
38
|
+
params.last[:type] = type
|
39
|
+
params.last[:required] = true if md[1]
|
40
|
+
field *params
|
41
|
+
else
|
42
|
+
super
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,11 @@
|
|
1
|
+
module SimpleEtl
|
2
|
+
module Source
|
3
|
+
class Error < Exception; end
|
4
|
+
class ParseError < Error; end
|
5
|
+
|
6
|
+
class FieldNotFoundError < Error; end
|
7
|
+
class FieldArgumentError < ParseError; end
|
8
|
+
class FieldRequiredError < ParseError; end
|
9
|
+
class CastError < ParseError; end
|
10
|
+
end
|
11
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
module SimpleEtl
|
2
|
+
module Source
|
3
|
+
module FieldCaster
|
4
|
+
extend self
|
5
|
+
|
6
|
+
def parse_object o; o; end
|
7
|
+
|
8
|
+
def parse_string o
|
9
|
+
o && o.strip
|
10
|
+
end
|
11
|
+
|
12
|
+
def parse_boolean o
|
13
|
+
if o.nil? || o =~ /^\s*$/
|
14
|
+
nil
|
15
|
+
else
|
16
|
+
if %w(true 1).include? o.strip
|
17
|
+
true
|
18
|
+
elsif %w(false 0).include? o.strip
|
19
|
+
false
|
20
|
+
else
|
21
|
+
raise(CastError.new "Cannot cast '#{o}' to 'boolean'")
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def parse_integer o
|
27
|
+
if o.nil? || o =~ /^\s*$/
|
28
|
+
nil
|
29
|
+
else
|
30
|
+
if o =~ /^\s*\d+\s*$/
|
31
|
+
o.to_i
|
32
|
+
else
|
33
|
+
raise(CastError.new "Cannot cast '#{o}' to 'integer'")
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def parse_float o
|
39
|
+
if o.nil? || o =~ /^\s*$/
|
40
|
+
nil
|
41
|
+
else
|
42
|
+
if o =~ /^\s*\d*([\.\,]\d+)?\s*$/
|
43
|
+
o.gsub(/\,/, '.').to_f
|
44
|
+
else
|
45
|
+
raise(CastError.new "Cannot cast '#{o}' to 'float'")
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module SimpleEtl
|
2
|
+
module Source
|
3
|
+
module FixedWidth
|
4
|
+
class Context < SimpleEtl::Source::BaseContext
|
5
|
+
def field name, start, length, args = {}
|
6
|
+
raise(FieldArgumentError.new "#{name}::start required") unless start
|
7
|
+
raise(FieldArgumentError.new "#{name}::length required") unless length
|
8
|
+
start = Integer(start) rescue
|
9
|
+
raise(FieldArgumentError.new "#{name}::start (#{start}) is not integer")
|
10
|
+
if length != :eol
|
11
|
+
length = Integer(length) rescue
|
12
|
+
raise(FieldArgumentError.new "#{name}::length (#{length}) is not integer")
|
13
|
+
end
|
14
|
+
super name, { :start => start, :length => length }.merge(args)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module SimpleEtl
|
2
|
+
module Source
|
3
|
+
module FixedWidth
|
4
|
+
class Parser < SimpleEtl::Source::Base
|
5
|
+
def initialize &block
|
6
|
+
super Context.new, &block
|
7
|
+
end
|
8
|
+
|
9
|
+
def fetch_field_from_row row, field
|
10
|
+
length = field[:length]
|
11
|
+
length = row.length - field[:start] if length == :eol
|
12
|
+
row[field[:start], length]
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
formats[:fixed_width] = FixedWidth::Parser
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'ostruct'
|
2
|
+
|
3
|
+
module SimpleEtl
|
4
|
+
module Source
|
5
|
+
class ParseResult
|
6
|
+
attr_reader :errors, :rows
|
7
|
+
|
8
|
+
def initialize
|
9
|
+
@errors = []
|
10
|
+
@rows = []
|
11
|
+
end
|
12
|
+
|
13
|
+
def valid?; @errors.empty?; end
|
14
|
+
|
15
|
+
def append_row attributes
|
16
|
+
@rows << Row.new(attributes)
|
17
|
+
end
|
18
|
+
|
19
|
+
def append_error row_index, message, e
|
20
|
+
@errors << OpenStruct.new({
|
21
|
+
:row_index => row_index,
|
22
|
+
:message => message,
|
23
|
+
:exception => e
|
24
|
+
})
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module SimpleEtl
|
2
|
+
module Source
|
3
|
+
class Row
|
4
|
+
attr_reader :attributes
|
5
|
+
|
6
|
+
def initialize attributes = {}
|
7
|
+
@attributes = attributes
|
8
|
+
end
|
9
|
+
|
10
|
+
def method_missing name, *args, &block
|
11
|
+
md = name.to_s.match /^(\w+)(=)?$/
|
12
|
+
if md && attributes.has_key?(md[1].to_sym)
|
13
|
+
field = md[1].to_sym
|
14
|
+
md[2] && (attributes[field] = args.first) || attributes[field]
|
15
|
+
else
|
16
|
+
super
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module SimpleEtl
|
2
|
+
module Source
|
3
|
+
extend self
|
4
|
+
|
5
|
+
@@formats = {}
|
6
|
+
|
7
|
+
def formats; @@formats; end
|
8
|
+
|
9
|
+
def define format, &block
|
10
|
+
format_class = formats[format]
|
11
|
+
raise "Format #{format} not found" unless format_class
|
12
|
+
format_class.new &block
|
13
|
+
end
|
14
|
+
|
15
|
+
def load file
|
16
|
+
raise "Cannot find file" unless File.exist? file
|
17
|
+
module_eval File.read file
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
dir = File.expand_path File.dirname __FILE__
|
23
|
+
%w(exceptions field_caster row parse_result base_context base).each do |file|
|
24
|
+
require File.join dir, "source/#{file}"
|
25
|
+
end
|
26
|
+
|
27
|
+
%w(context parser).each do |file|
|
28
|
+
require File.join dir, "source/fixed_width/#{file}"
|
29
|
+
end
|
data/lib/simple_etl.rb
ADDED
data/simple_etl.gemspec
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "simple_etl/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "simple_etl"
|
7
|
+
s.version = SimpleEtl::VERSION
|
8
|
+
s.authors = ["Nicola Racco"]
|
9
|
+
s.email = ["nicola@nicolaracco.com"]
|
10
|
+
s.homepage = ""
|
11
|
+
s.summary = %q{An easy-to-use toolkit to help you with ETL (Extract Transform Load) operations}
|
12
|
+
s.description = %q{An easy-to-use toolkit to help you with ETL (Extract Transform Load) operations. Simple ETL 'would be' (:D) framework-agnostic and easy to use.}
|
13
|
+
|
14
|
+
s.rubyforge_project = "simple_etl"
|
15
|
+
|
16
|
+
s.files = `git ls-files`.split("\n")
|
17
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
18
|
+
#s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
19
|
+
s.require_paths = ["lib"]
|
20
|
+
|
21
|
+
s.add_development_dependency 'rake'
|
22
|
+
s.add_development_dependency 'guard'
|
23
|
+
s.add_development_dependency 'growl'
|
24
|
+
s.add_development_dependency 'guard-rspec'
|
25
|
+
s.add_development_dependency 'rspec'
|
26
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module SimpleEtl
|
4
|
+
module Source
|
5
|
+
describe BaseContext do
|
6
|
+
subject { BaseContext.new }
|
7
|
+
|
8
|
+
describe '#field' do
|
9
|
+
it 'should append the field definition in the fields list' do
|
10
|
+
subject.field :name
|
11
|
+
subject.fields.should =~ [{:name => :name, :required => false, :type => :object}]
|
12
|
+
end
|
13
|
+
|
14
|
+
it 'should raise error if type is present and unknown' do
|
15
|
+
expect{
|
16
|
+
subject.field :name, :type => 'pippo'
|
17
|
+
}.to raise_error FieldArgumentError
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
describe 'field helpers' do
|
22
|
+
it 'should have an helper for each caster' do
|
23
|
+
subject.string :name
|
24
|
+
subject.fields.should =~ [{:name => :name, :type => :string, :required => false}]
|
25
|
+
end
|
26
|
+
|
27
|
+
it 'should have a required helper for each caster' do
|
28
|
+
subject.required_string :name
|
29
|
+
subject.fields.should =~ [{:name => :name, :type => :string, :required => true}]
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
describe 'transform' do
|
34
|
+
it 'should append to transformations' do
|
35
|
+
subject.field :name
|
36
|
+
expect {
|
37
|
+
subject.transform :name do; end
|
38
|
+
}.to change(subject.transformations, :size).by 1
|
39
|
+
end
|
40
|
+
|
41
|
+
it 'should raise error if field is not specified' do
|
42
|
+
expect {
|
43
|
+
subject.transform :name do; end
|
44
|
+
}.to raise_error FieldNotFoundError
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
describe 'generate' do
|
49
|
+
it 'should append to generators' do
|
50
|
+
expect {
|
51
|
+
subject.generate :name do; end
|
52
|
+
}.to change(subject.generators, :size).by 1
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,108 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module SimpleEtl
|
4
|
+
module Source
|
5
|
+
describe Base do
|
6
|
+
subject { Base.new BaseContext.new }
|
7
|
+
|
8
|
+
describe '#parse_field' do
|
9
|
+
before do
|
10
|
+
subject.stub :fetch_field_from_row do |row, field|
|
11
|
+
row
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
it 'should socks at brush' do
|
16
|
+
expect {
|
17
|
+
subject.parse_field 'ITM', { :name => 'IT', :type => :object }, Row.new
|
18
|
+
}.to_not raise_error
|
19
|
+
end
|
20
|
+
|
21
|
+
context 'when there is a transformation' do
|
22
|
+
it 'should apply it' do
|
23
|
+
subject.context.field :foo
|
24
|
+
subject.context.transform :foo do |s|
|
25
|
+
"pippo"
|
26
|
+
end
|
27
|
+
subject.parse_field('ITM', { :name => :foo, :type => :object }, Row.new).should == "pippo"
|
28
|
+
end
|
29
|
+
|
30
|
+
it 'should set the context to the current row object' do
|
31
|
+
context = subject.context
|
32
|
+
context.field :foo
|
33
|
+
context.transform(:foo) { |s| self.class.should == Row }
|
34
|
+
subject.parse_field 'ITM', { :name => :foo, :type => :object }, Row.new
|
35
|
+
end
|
36
|
+
|
37
|
+
it 'passes the field value as argument' do
|
38
|
+
subject.stub(:fetch_field_from_row) { |row, field| 'pippo' }
|
39
|
+
subject.context.field :foo
|
40
|
+
subject.context.transform(:foo) { |s| s.should == 'pippo' }
|
41
|
+
subject.parse_field 'ITM', { :name => :foo, :type => :object }, Row.new
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
describe '#generate_field' do
|
47
|
+
let(:r) { Row.new :foo => 'w' }
|
48
|
+
|
49
|
+
it 'should set the context to the current row' do
|
50
|
+
block = lambda { self.class.should == Row }
|
51
|
+
subject.generate_field({ :name => :mio, :block => block }, r)
|
52
|
+
end
|
53
|
+
|
54
|
+
it 'should socks at brush' do
|
55
|
+
subject.generate_field({ :name => :mio, :block => lambda { foo } }, r).should == r.foo
|
56
|
+
end
|
57
|
+
|
58
|
+
it 'can raise ParseError without specifing path' do
|
59
|
+
block = lambda { raise ParseError }
|
60
|
+
expect {
|
61
|
+
subject.generate_field({ :name => :mio, :block => block }, r)
|
62
|
+
}.to raise_error ParseError
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
describe '#parse_row' do
|
67
|
+
before do
|
68
|
+
subject.stub :fetch_field_from_row do |row, field|
|
69
|
+
row
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
it 'should return a ParseResult object' do
|
74
|
+
subject.parse_row('').should be_kind_of ParseResult
|
75
|
+
end
|
76
|
+
|
77
|
+
it 'should insert attributes in row' do
|
78
|
+
subject.context.field :sample
|
79
|
+
subject.parse_row('ITM').rows.first.sample.should == 'ITM'
|
80
|
+
end
|
81
|
+
|
82
|
+
context 'when row is not valid' do
|
83
|
+
it 'should not execute generators' do
|
84
|
+
subject.stub :fetch_field_from_row do |row, field|
|
85
|
+
raise ParseError
|
86
|
+
end
|
87
|
+
subject.context.field :sample
|
88
|
+
subject.context.generate :foo do |s|
|
89
|
+
"pippo"
|
90
|
+
end
|
91
|
+
subject.parse_row('ITM').public_methods.should_not include :foo
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
context 'when row is valid' do
|
96
|
+
it 'launches the generator giving the row instance' do
|
97
|
+
subject.context.field :sample
|
98
|
+
# it will raise error if s not have sample
|
99
|
+
subject.context.generate :foo do
|
100
|
+
sample
|
101
|
+
end
|
102
|
+
expect { subject.parse_row('ITM') }.to_not raise_error
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module SimpleEtl
|
4
|
+
module Source
|
5
|
+
describe FieldCaster do
|
6
|
+
subject { FieldCaster }
|
7
|
+
|
8
|
+
describe '::parse_integer' do
|
9
|
+
it 'returns nil if input is blank' do
|
10
|
+
subject.parse_integer(nil).should be_nil
|
11
|
+
subject.parse_integer('').should be_nil
|
12
|
+
end
|
13
|
+
|
14
|
+
it 'returns a number if input has only digits' do
|
15
|
+
subject.parse_integer('43').should == 43
|
16
|
+
subject.parse_integer('00048').should == 48
|
17
|
+
end
|
18
|
+
|
19
|
+
it 'automatically strips spaces' do
|
20
|
+
subject.parse_integer(' 043 ').should == 43
|
21
|
+
end
|
22
|
+
|
23
|
+
it 'returns error with chars' do
|
24
|
+
expect {
|
25
|
+
subject.parse_integer 'a'
|
26
|
+
}.to raise_error CastError
|
27
|
+
end
|
28
|
+
|
29
|
+
it 'returns error with commas' do
|
30
|
+
expect {
|
31
|
+
subject.parse_integer '1.2'
|
32
|
+
}.to raise_error CastError
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
describe '::parse_float' do
|
37
|
+
it 'returns nil if input is blank' do
|
38
|
+
subject.parse_float(nil).should be_nil
|
39
|
+
subject.parse_float('').should be_nil
|
40
|
+
end
|
41
|
+
|
42
|
+
it 'returns a number if input has only digits' do
|
43
|
+
subject.parse_float('43').should == 43.0
|
44
|
+
subject.parse_float('00048').should == 48.0
|
45
|
+
end
|
46
|
+
|
47
|
+
it 'returns a number if input has also ONE separator' do
|
48
|
+
subject.parse_float('43.1').should == 43.1
|
49
|
+
subject.parse_float('43,1').should == 43.1
|
50
|
+
expect {
|
51
|
+
subject.parse_float '43.2.1'
|
52
|
+
}.to raise_error CastError
|
53
|
+
end
|
54
|
+
|
55
|
+
it 'automatically strips spaces' do
|
56
|
+
subject.parse_integer(' 043 ').should == 43
|
57
|
+
end
|
58
|
+
|
59
|
+
it 'returns error in every other situation' do
|
60
|
+
expect {
|
61
|
+
subject.parse_integer 'a'
|
62
|
+
}.to raise_error CastError
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module SimpleEtl
|
4
|
+
module Source
|
5
|
+
module FixedWidth
|
6
|
+
describe Context do
|
7
|
+
subject { FixedWidth::Parser.new.context }
|
8
|
+
|
9
|
+
describe '#field' do
|
10
|
+
it 'should append the field definition in the fields list' do
|
11
|
+
subject.field :name, 10, 20
|
12
|
+
subject.fields.should =~ [{:name => :name, :start => 10, :length => 20, :required => false, :type => :object}]
|
13
|
+
end
|
14
|
+
|
15
|
+
it 'should raise error if start arg is missing' do
|
16
|
+
expect{
|
17
|
+
subject.field :name, nil, 10
|
18
|
+
}.to raise_error FieldArgumentError
|
19
|
+
end
|
20
|
+
|
21
|
+
it 'should raise error if length arg is missing' do
|
22
|
+
expect{
|
23
|
+
subject.field :name, 10, nil
|
24
|
+
}.to raise_error FieldArgumentError
|
25
|
+
end
|
26
|
+
|
27
|
+
it 'should raise error if start is not an integer' do
|
28
|
+
expect {
|
29
|
+
subject.field :name, 'pippo', 20
|
30
|
+
}.to raise_error FieldArgumentError
|
31
|
+
end
|
32
|
+
|
33
|
+
it 'should raise error if length is not an integer' do
|
34
|
+
expect {
|
35
|
+
subject.field :name, 5, 'pippo'
|
36
|
+
}.to raise_error FieldArgumentError
|
37
|
+
end
|
38
|
+
|
39
|
+
it 'should accept the special :eol length' do
|
40
|
+
expect {
|
41
|
+
subject.field :name, 5, :eol
|
42
|
+
}.to_not raise_error
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module SimpleEtl
|
4
|
+
module Source
|
5
|
+
module FixedWidth
|
6
|
+
describe Parser do
|
7
|
+
describe '::new' do
|
8
|
+
it 'should execute the block in the context' do
|
9
|
+
FixedWidth::Parser.new { self.class.should == FixedWidth::Context }
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
describe '#fetch_field_from_row' do
|
14
|
+
it 'should use field attributes to fetch a field' do
|
15
|
+
subject.fetch_field_from_row('ITM', { :start => 0, :length => 2 }).should == 'IT'
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'should fetch till the end of line if length is :eol' do
|
19
|
+
subject.fetch_field_from_row('ITM', { :start => 1, :length => :eol }).should == 'TM'
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module SimpleEtl
|
4
|
+
module Source
|
5
|
+
describe ParseResult do
|
6
|
+
describe '#valid?' do
|
7
|
+
it 'is true if there are no errors' do
|
8
|
+
subject.should be_valid
|
9
|
+
end
|
10
|
+
|
11
|
+
it 'is false if there are errors' do
|
12
|
+
subject.append_error 0, 'm', Exception.new
|
13
|
+
subject.should_not be_valid
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module SimpleEtl
|
4
|
+
describe Source do
|
5
|
+
describe '::define' do
|
6
|
+
it 'should return a new template' do
|
7
|
+
tpl = SimpleEtl::Source.define :fixed_width do
|
8
|
+
integer :name, 10, 12
|
9
|
+
end
|
10
|
+
tpl.context.fields.size.should == 1
|
11
|
+
end
|
12
|
+
|
13
|
+
it 'should raise error if format not exist' do
|
14
|
+
expect {
|
15
|
+
SimpleEtl::Source.define :piipo do; end
|
16
|
+
}.to raise_error
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
describe '::load' do
|
21
|
+
it 'should raise an error if file not exist' do
|
22
|
+
expect {
|
23
|
+
SimpleEtl::Source.load 'pappopappo'
|
24
|
+
}.to raise_error
|
25
|
+
end
|
26
|
+
|
27
|
+
it 'should load correctly' do
|
28
|
+
file = File.join FIXTURES_PATH, "sample.stl"
|
29
|
+
SimpleEtl::Source.load(file).should be_kind_of SimpleEtl::Source::Base
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
# Load the Sinatra app
|
2
|
+
require File.dirname(__FILE__) + '/../lib/simple_etl'
|
3
|
+
require 'rspec'
|
4
|
+
|
5
|
+
Dir[File.expand_path(File.join(File.dirname(__FILE__),'support','**','*.rb'))].each {|f| require f}
|
6
|
+
|
7
|
+
FIXTURES_PATH = File.expand_path File.join File.dirname(__FILE__), "fixtures"
|
8
|
+
|
9
|
+
RSpec.configure do |conf|
|
10
|
+
conf.before :suite do
|
11
|
+
end
|
12
|
+
end
|
metadata
ADDED
@@ -0,0 +1,172 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: simple_etl
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 29
|
5
|
+
prerelease:
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 0
|
9
|
+
- 1
|
10
|
+
version: 0.0.1
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Nicola Racco
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2012-07-03 00:00:00 Z
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
21
|
+
name: rake
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
none: false
|
25
|
+
requirements:
|
26
|
+
- - ">="
|
27
|
+
- !ruby/object:Gem::Version
|
28
|
+
hash: 3
|
29
|
+
segments:
|
30
|
+
- 0
|
31
|
+
version: "0"
|
32
|
+
type: :development
|
33
|
+
version_requirements: *id001
|
34
|
+
- !ruby/object:Gem::Dependency
|
35
|
+
name: guard
|
36
|
+
prerelease: false
|
37
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
38
|
+
none: false
|
39
|
+
requirements:
|
40
|
+
- - ">="
|
41
|
+
- !ruby/object:Gem::Version
|
42
|
+
hash: 3
|
43
|
+
segments:
|
44
|
+
- 0
|
45
|
+
version: "0"
|
46
|
+
type: :development
|
47
|
+
version_requirements: *id002
|
48
|
+
- !ruby/object:Gem::Dependency
|
49
|
+
name: growl
|
50
|
+
prerelease: false
|
51
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
52
|
+
none: false
|
53
|
+
requirements:
|
54
|
+
- - ">="
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
hash: 3
|
57
|
+
segments:
|
58
|
+
- 0
|
59
|
+
version: "0"
|
60
|
+
type: :development
|
61
|
+
version_requirements: *id003
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: guard-rspec
|
64
|
+
prerelease: false
|
65
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
66
|
+
none: false
|
67
|
+
requirements:
|
68
|
+
- - ">="
|
69
|
+
- !ruby/object:Gem::Version
|
70
|
+
hash: 3
|
71
|
+
segments:
|
72
|
+
- 0
|
73
|
+
version: "0"
|
74
|
+
type: :development
|
75
|
+
version_requirements: *id004
|
76
|
+
- !ruby/object:Gem::Dependency
|
77
|
+
name: rspec
|
78
|
+
prerelease: false
|
79
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
80
|
+
none: false
|
81
|
+
requirements:
|
82
|
+
- - ">="
|
83
|
+
- !ruby/object:Gem::Version
|
84
|
+
hash: 3
|
85
|
+
segments:
|
86
|
+
- 0
|
87
|
+
version: "0"
|
88
|
+
type: :development
|
89
|
+
version_requirements: *id005
|
90
|
+
description: An easy-to-use toolkit to help you with ETL (Extract Transform Load) operations. Simple ETL 'would be' (:D) framework-agnostic and easy to use.
|
91
|
+
email:
|
92
|
+
- nicola@nicolaracco.com
|
93
|
+
executables: []
|
94
|
+
|
95
|
+
extensions: []
|
96
|
+
|
97
|
+
extra_rdoc_files: []
|
98
|
+
|
99
|
+
files:
|
100
|
+
- .gitignore
|
101
|
+
- .rspec
|
102
|
+
- Gemfile
|
103
|
+
- Guardfile
|
104
|
+
- README.md
|
105
|
+
- Rakefile
|
106
|
+
- lib/simple_etl.rb
|
107
|
+
- lib/simple_etl/source.rb
|
108
|
+
- lib/simple_etl/source/base.rb
|
109
|
+
- lib/simple_etl/source/base_context.rb
|
110
|
+
- lib/simple_etl/source/errorable.rb
|
111
|
+
- lib/simple_etl/source/exceptions.rb
|
112
|
+
- lib/simple_etl/source/field_caster.rb
|
113
|
+
- lib/simple_etl/source/fixed_width/context.rb
|
114
|
+
- lib/simple_etl/source/fixed_width/parser.rb
|
115
|
+
- lib/simple_etl/source/parse_result.rb
|
116
|
+
- lib/simple_etl/source/row.rb
|
117
|
+
- lib/simple_etl/version.rb
|
118
|
+
- simple_etl.gemspec
|
119
|
+
- spec/fixtures/sample.stl
|
120
|
+
- spec/lib/simple_etl/source/base_context_spec.rb
|
121
|
+
- spec/lib/simple_etl/source/base_spec.rb
|
122
|
+
- spec/lib/simple_etl/source/field_caster_spec.rb
|
123
|
+
- spec/lib/simple_etl/source/fixed_width/context_spec.rb
|
124
|
+
- spec/lib/simple_etl/source/fixed_width/parser_spec.rb
|
125
|
+
- spec/lib/simple_etl/source/parse_result_spec.rb
|
126
|
+
- spec/lib/simple_etl/source/row_spec.rb
|
127
|
+
- spec/lib/simple_etl/source_spec.rb
|
128
|
+
- spec/spec_helper.rb
|
129
|
+
homepage: ""
|
130
|
+
licenses: []
|
131
|
+
|
132
|
+
post_install_message:
|
133
|
+
rdoc_options: []
|
134
|
+
|
135
|
+
require_paths:
|
136
|
+
- lib
|
137
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
138
|
+
none: false
|
139
|
+
requirements:
|
140
|
+
- - ">="
|
141
|
+
- !ruby/object:Gem::Version
|
142
|
+
hash: 3
|
143
|
+
segments:
|
144
|
+
- 0
|
145
|
+
version: "0"
|
146
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
147
|
+
none: false
|
148
|
+
requirements:
|
149
|
+
- - ">="
|
150
|
+
- !ruby/object:Gem::Version
|
151
|
+
hash: 3
|
152
|
+
segments:
|
153
|
+
- 0
|
154
|
+
version: "0"
|
155
|
+
requirements: []
|
156
|
+
|
157
|
+
rubyforge_project: simple_etl
|
158
|
+
rubygems_version: 1.8.23
|
159
|
+
signing_key:
|
160
|
+
specification_version: 3
|
161
|
+
summary: An easy-to-use toolkit to help you with ETL (Extract Transform Load) operations
|
162
|
+
test_files:
|
163
|
+
- spec/fixtures/sample.stl
|
164
|
+
- spec/lib/simple_etl/source/base_context_spec.rb
|
165
|
+
- spec/lib/simple_etl/source/base_spec.rb
|
166
|
+
- spec/lib/simple_etl/source/field_caster_spec.rb
|
167
|
+
- spec/lib/simple_etl/source/fixed_width/context_spec.rb
|
168
|
+
- spec/lib/simple_etl/source/fixed_width/parser_spec.rb
|
169
|
+
- spec/lib/simple_etl/source/parse_result_spec.rb
|
170
|
+
- spec/lib/simple_etl/source/row_spec.rb
|
171
|
+
- spec/lib/simple_etl/source_spec.rb
|
172
|
+
- spec/spec_helper.rb
|