simple_etl 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,20 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ coverage
6
+ InstalledFiles
7
+ lib/bundler/man
8
+ pkg
9
+ rdoc
10
+ spec/reports
11
+ test/tmp
12
+ test/version_tmp
13
+ tmp
14
+
15
+ # YARD artifacts
16
+ .yardoc
17
+ _yardoc
18
+ doc/
19
+ Gemfile.lock
20
+ pkg/*
data/.rspec ADDED
@@ -0,0 +1 @@
1
+ --colour
data/Gemfile ADDED
@@ -0,0 +1,14 @@
1
+ source "http://rubygems.org"
2
+
3
+ gemspec
4
+
5
+ group :development do
6
+ gem 'rake'
7
+ gem 'guard'
8
+ gem 'growl'
9
+ gem 'guard-rspec'
10
+ end
11
+
12
+ group :development, :test do
13
+ gem 'rspec'
14
+ end
data/Guardfile ADDED
@@ -0,0 +1,24 @@
1
+ # A sample Guardfile
2
+ # More info at https://github.com/guard/guard#readme
3
+
4
+ guard 'rspec', :version => 2 do
5
+ watch(%r{^spec/.+_spec\.rb$})
6
+ watch(%r{^lib/(.+)\.rb$}) { |m| "spec/lib/#{m[1]}_spec.rb" }
7
+ watch('spec/spec_helper.rb') { "spec" }
8
+
9
+ # Rails example
10
+ watch(%r{^app/(.+)\.rb$}) { |m| "spec/#{m[1]}_spec.rb" }
11
+ watch(%r{^app/(.*)(\.erb|\.haml)$}) { |m| "spec/#{m[1]}#{m[2]}_spec.rb" }
12
+ watch(%r{^app/controllers/(.+)_(controller)\.rb$}) { |m| ["spec/routing/#{m[1]}_routing_spec.rb", "spec/#{m[2]}s/#{m[1]}_#{m[2]}_spec.rb", "spec/acceptance/#{m[1]}_spec.rb"] }
13
+ watch(%r{^spec/support/(.+)\.rb$}) { "spec" }
14
+ watch('config/routes.rb') { "spec/routing" }
15
+ watch('app/controllers/application_controller.rb') { "spec/controllers" }
16
+
17
+ # Capybara request specs
18
+ watch(%r{^app/views/(.+)/.*\.(erb|haml)$}) { |m| "spec/requests/#{m[1]}_spec.rb" }
19
+
20
+ # Turnip features and steps
21
+ watch(%r{^spec/acceptance/(.+)\.feature$})
22
+ watch(%r{^spec/acceptance/steps/(.+)_steps\.rb$}) { |m| Dir[File.join("**/#{m[1]}.feature")][0] || 'spec/acceptance' }
23
+ end
24
+
data/README.md ADDED
@@ -0,0 +1,117 @@
1
+ simple_etl
2
+ ==========
3
+
4
+ An easy-to-use toolkit to help you with ETL (Extract Transform Load) operations.
5
+
6
+ Simple ETL 'would be' (:D) framework-agnostic and easy to use.
7
+
8
+
9
+ ## Source
10
+
11
+ Source namespace is responsible of input files parsing.
12
+
13
+ First of all you have to define a "source template" inside a definition file (for example _my_template.stl_):
14
+
15
+ ```ruby
16
+ define :format_name do
17
+ field :name
18
+ field :surname
19
+ end
20
+ ```
21
+
22
+ Then you will load the template with the following code:
23
+
24
+ ```ruby
25
+ my_template = SimpleEtl::Source.load './etl/my_template.stl'
26
+ ```
27
+
28
+ At this point you can parse a source and process the result as with the following code:
29
+
30
+ ```ruby
31
+ my_template.parse '....', :type => :inline # load data inline
32
+ result = my_template.parse 'source.dat' # load from file
33
+
34
+ if result.valid?
35
+ result.rows.each do |row|
36
+ puts "|\t#{row.name}\t|\t#{row.surname}\t|"
37
+ end
38
+ puts "Parse Completed!"
39
+ else
40
+ result.errors.each do |error|
41
+ puts "Error while parsing line #{error.row_index}: #{error.message}"
42
+ end
43
+ end
44
+ ```
45
+
46
+ As you can see the result is valid if there are no errors.
47
+
48
+ The rows array contains all the parsed rows. Each row contains the parsed attributes as accessors.
49
+
50
+ The errors array contains all the generated errors. Each error is an object with 'row_index', 'message' and 'exception' properties.
51
+
52
+ ## Structure of the template definition
53
+
54
+ A template definition is composed by three layers:
55
+ - raw fields
56
+ - transformations
57
+ - generators
58
+
59
+ ### Fields
60
+
61
+ ```ruby
62
+ field :name
63
+ field :surname, :type => :string, :required => true
64
+ ```
65
+
66
+ By default type is 'object'. It means it's not converted in any format. Other possible types are:
67
+
68
+ - *string*: field is stripped by extra spaces;
69
+
70
+ - *integer*: field is stripped. If the input value is nil or empty, nil is returned; it's converted in integer if the value contains numbers; a CastError is raised otherwise;
71
+
72
+ - *float*: field is stripped. If the input value is nil or empty, nil is returned; it's converted in float if the value contains numbers; a CastError is raised otherwise;
73
+
74
+ - *boolean* field is stripped. If the input value is nil or empty, nil is returned; it's converted in boolean if the input value is true,false,1,0; a CastError is raised otherwise;
75
+
76
+ The template definition will provide you an helper for each defined type. So you can write:
77
+
78
+ ```ruby
79
+ string :name
80
+ integer :age
81
+ ```
82
+
83
+ For each helper, an additional 'required' helper will also be available:
84
+
85
+ ```ruby
86
+ required_string :name
87
+ required_integer :age
88
+ ```
89
+
90
+ Remember: *every format plugin will define its own field syntax, so remember to read the [Wiki](https://github.com/nicolaracco/simple_etl/wiki)*
91
+
92
+
93
+ ### Transformers and generators
94
+
95
+ They are functions that help you manipulate the parsed raw data:
96
+
97
+ ```ruby
98
+ transform :name { |name| name.downcase } # => name field is transformed in downcase
99
+
100
+ # a full_name field will be present in the row
101
+ generate :full_name do |row|
102
+ "#{row.name} #{row.surname}"
103
+ end
104
+
105
+ generate :company do |row|
106
+ if cmp = Company.find(row.company_id)
107
+ cmp
108
+ else
109
+ raise ParseError.new "Cannot find a company with id #{row.company_id}"
110
+ end
111
+ end
112
+ ```
113
+
114
+ A transformer is a code block that transform a particular value. It's executed as soon as the input value is parsed (if it's valid).
115
+
116
+ A generator is a code block that generates a new property for the current row.
117
+ All the generators are executed when the entire row as been read and transformed.
data/Rakefile ADDED
@@ -0,0 +1,11 @@
1
+ require 'bundler/gem_tasks'
2
+ require 'rspec/core/rake_task'
3
+
4
+ desc "Run those specs"
5
+ task :spec do
6
+ RSpec::Core::RakeTask.new(:spec) do |t|
7
+ t.rspec_opts = %w{--colour --format progress}
8
+ t.pattern = 'spec/**/*_spec.rb'
9
+ t.rspec_path = 'bundle exec rspec'
10
+ end
11
+ end
@@ -0,0 +1,72 @@
1
+ module SimpleEtl
2
+ module Source
3
+ class Base
4
+ attr_reader :errors
5
+
6
+ attr_reader :context
7
+
8
+ def initialize context, &block
9
+ @errors = []
10
+ @context = context
11
+ context.send :instance_eval, &block if block
12
+ end
13
+
14
+ def fetch_field_from_row row, field
15
+ raise 'Abstract Method'
16
+ end
17
+
18
+ def parse_row row, args = {}
19
+ row_index = args[:row_index]
20
+ result = args[:result] || ParseResult.new
21
+ row_obj = Row.new
22
+ context.fields.each do |field|
23
+ begin
24
+ row_obj.attributes[field[:name]] = parse_field row, field, row_obj
25
+ rescue SimpleEtl::Source::ParseError
26
+ row_info = row_index && "row #{row_index}" || ''
27
+ result.append_error row_index, "Error parsing #{row_info}, column #{field[:name]}: #{$!.message}", $!
28
+ end
29
+ end
30
+ if result.valid?
31
+ context.generators.each do |field|
32
+ begin
33
+ row_obj.attributes[field[:name]] = generate_field field, row_obj
34
+ rescue SimpleEtl::Source::ParseError
35
+ row_info = row_index && "for row #{row_index}" || ''
36
+ result.append_error row_index, "Error generating #{field[:name]} #{row_info}: #{$!.message}", $!
37
+ end
38
+ end
39
+ end
40
+ result.rows << row_obj
41
+ result
42
+ end
43
+
44
+ def parse src, args = {}
45
+ result = args[:result] || ParseResult.new
46
+ if args[:type] == :inline
47
+ lines = src.lines.map &:chomp
48
+ else
49
+ lines = File.readlines(src).map &:chomp
50
+ end
51
+ lines.each_with_index do |row, index|
52
+ parse_row row, :row_index => index, :result => result
53
+ end
54
+ result
55
+ end
56
+
57
+ def parse_field row, field, row_obj
58
+ value = FieldCaster.send "parse_#{field[:type]}", fetch_field_from_row(row, field)
59
+ raise FieldRequiredError if field[:required] &&
60
+ (value.nil? || value == '')
61
+ if transformer = context.transformations[field[:name]]
62
+ value = row_obj.instance_exec value, &transformer
63
+ end
64
+ value
65
+ end
66
+
67
+ def generate_field field, row_obj
68
+ row_obj.instance_exec &field[:block]
69
+ end
70
+ end
71
+ end
72
+ end
@@ -0,0 +1,47 @@
1
+ module SimpleEtl
2
+ module Source
3
+ class BaseContext
4
+ attr_reader :fields
5
+ attr_reader :transformations
6
+ attr_reader :generators
7
+
8
+ def initialize
9
+ @fields = []
10
+ @transformations = {}
11
+ @generators = []
12
+ end
13
+
14
+ def field name, args = {}
15
+ args = {:required => false, :type => :object}.merge args
16
+ unless FieldCaster.respond_to? "parse_#{args[:type]}"
17
+ raise FieldArgumentError.new "#{name}:type (#{args[:type]}) is unknown"
18
+ end
19
+ fields << { :name => name }.merge(args)
20
+ end
21
+
22
+ def transform field, &block
23
+ field = field.to_sym
24
+ raise FieldNotFoundError.new(field) unless
25
+ fields.detect { |d| d[:name] == field }
26
+ transformations[field.to_sym] = block
27
+ end
28
+
29
+ def generate name, args = {}, &block
30
+ generators << args.merge(:name => name, :block => block)
31
+ end
32
+
33
+ def method_missing name, *params, &block
34
+ md = name.to_s.match /^(required_)?(\w+)$/
35
+ type = md && md[2].to_sym
36
+ if type && FieldCaster.respond_to?("parse_#{type}")
37
+ params << {} unless params.last.is_a? Hash
38
+ params.last[:type] = type
39
+ params.last[:required] = true if md[1]
40
+ field *params
41
+ else
42
+ super
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,7 @@
1
+ module SimpleEtl
2
+ module Source
3
+ module Errorable
4
+
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,11 @@
1
+ module SimpleEtl
2
+ module Source
3
+ class Error < Exception; end
4
+ class ParseError < Error; end
5
+
6
+ class FieldNotFoundError < Error; end
7
+ class FieldArgumentError < ParseError; end
8
+ class FieldRequiredError < ParseError; end
9
+ class CastError < ParseError; end
10
+ end
11
+ end
@@ -0,0 +1,51 @@
1
+ module SimpleEtl
2
+ module Source
3
+ module FieldCaster
4
+ extend self
5
+
6
+ def parse_object o; o; end
7
+
8
+ def parse_string o
9
+ o && o.strip
10
+ end
11
+
12
+ def parse_boolean o
13
+ if o.nil? || o =~ /^\s*$/
14
+ nil
15
+ else
16
+ if %w(true 1).include? o.strip
17
+ true
18
+ elsif %w(false 0).include? o.strip
19
+ false
20
+ else
21
+ raise(CastError.new "Cannot cast '#{o}' to 'boolean'")
22
+ end
23
+ end
24
+ end
25
+
26
+ def parse_integer o
27
+ if o.nil? || o =~ /^\s*$/
28
+ nil
29
+ else
30
+ if o =~ /^\s*\d+\s*$/
31
+ o.to_i
32
+ else
33
+ raise(CastError.new "Cannot cast '#{o}' to 'integer'")
34
+ end
35
+ end
36
+ end
37
+
38
+ def parse_float o
39
+ if o.nil? || o =~ /^\s*$/
40
+ nil
41
+ else
42
+ if o =~ /^\s*\d*([\.\,]\d+)?\s*$/
43
+ o.gsub(/\,/, '.').to_f
44
+ else
45
+ raise(CastError.new "Cannot cast '#{o}' to 'float'")
46
+ end
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,19 @@
1
+ module SimpleEtl
2
+ module Source
3
+ module FixedWidth
4
+ class Context < SimpleEtl::Source::BaseContext
5
+ def field name, start, length, args = {}
6
+ raise(FieldArgumentError.new "#{name}::start required") unless start
7
+ raise(FieldArgumentError.new "#{name}::length required") unless length
8
+ start = Integer(start) rescue
9
+ raise(FieldArgumentError.new "#{name}::start (#{start}) is not integer")
10
+ if length != :eol
11
+ length = Integer(length) rescue
12
+ raise(FieldArgumentError.new "#{name}::length (#{length}) is not integer")
13
+ end
14
+ super name, { :start => start, :length => length }.merge(args)
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,19 @@
1
+ module SimpleEtl
2
+ module Source
3
+ module FixedWidth
4
+ class Parser < SimpleEtl::Source::Base
5
+ def initialize &block
6
+ super Context.new, &block
7
+ end
8
+
9
+ def fetch_field_from_row row, field
10
+ length = field[:length]
11
+ length = row.length - field[:start] if length == :eol
12
+ row[field[:start], length]
13
+ end
14
+ end
15
+ end
16
+
17
+ formats[:fixed_width] = FixedWidth::Parser
18
+ end
19
+ end
@@ -0,0 +1,28 @@
1
+ require 'ostruct'
2
+
3
+ module SimpleEtl
4
+ module Source
5
+ class ParseResult
6
+ attr_reader :errors, :rows
7
+
8
+ def initialize
9
+ @errors = []
10
+ @rows = []
11
+ end
12
+
13
+ def valid?; @errors.empty?; end
14
+
15
+ def append_row attributes
16
+ @rows << Row.new(attributes)
17
+ end
18
+
19
+ def append_error row_index, message, e
20
+ @errors << OpenStruct.new({
21
+ :row_index => row_index,
22
+ :message => message,
23
+ :exception => e
24
+ })
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,21 @@
1
+ module SimpleEtl
2
+ module Source
3
+ class Row
4
+ attr_reader :attributes
5
+
6
+ def initialize attributes = {}
7
+ @attributes = attributes
8
+ end
9
+
10
+ def method_missing name, *args, &block
11
+ md = name.to_s.match /^(\w+)(=)?$/
12
+ if md && attributes.has_key?(md[1].to_sym)
13
+ field = md[1].to_sym
14
+ md[2] && (attributes[field] = args.first) || attributes[field]
15
+ else
16
+ super
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,29 @@
1
+ module SimpleEtl
2
+ module Source
3
+ extend self
4
+
5
+ @@formats = {}
6
+
7
+ def formats; @@formats; end
8
+
9
+ def define format, &block
10
+ format_class = formats[format]
11
+ raise "Format #{format} not found" unless format_class
12
+ format_class.new &block
13
+ end
14
+
15
+ def load file
16
+ raise "Cannot find file" unless File.exist? file
17
+ module_eval File.read file
18
+ end
19
+ end
20
+ end
21
+
22
+ dir = File.expand_path File.dirname __FILE__
23
+ %w(exceptions field_caster row parse_result base_context base).each do |file|
24
+ require File.join dir, "source/#{file}"
25
+ end
26
+
27
+ %w(context parser).each do |file|
28
+ require File.join dir, "source/fixed_width/#{file}"
29
+ end
@@ -0,0 +1,3 @@
1
+ module SimpleEtl
2
+ VERSION = "0.0.1"
3
+ end
data/lib/simple_etl.rb ADDED
@@ -0,0 +1,7 @@
1
+ require 'simple_etl/version'
2
+
3
+ require 'simple_etl/source'
4
+
5
+ module SimpleEtl
6
+ # Your code goes here...
7
+ end
@@ -0,0 +1,26 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "simple_etl/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "simple_etl"
7
+ s.version = SimpleEtl::VERSION
8
+ s.authors = ["Nicola Racco"]
9
+ s.email = ["nicola@nicolaracco.com"]
10
+ s.homepage = ""
11
+ s.summary = %q{An easy-to-use toolkit to help you with ETL (Extract Transform Load) operations}
12
+ s.description = %q{An easy-to-use toolkit to help you with ETL (Extract Transform Load) operations. Simple ETL 'would be' (:D) framework-agnostic and easy to use.}
13
+
14
+ s.rubyforge_project = "simple_etl"
15
+
16
+ s.files = `git ls-files`.split("\n")
17
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
18
+ #s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
19
+ s.require_paths = ["lib"]
20
+
21
+ s.add_development_dependency 'rake'
22
+ s.add_development_dependency 'guard'
23
+ s.add_development_dependency 'growl'
24
+ s.add_development_dependency 'guard-rspec'
25
+ s.add_development_dependency 'rspec'
26
+ end
@@ -0,0 +1,3 @@
1
+ define :fixed_width do
2
+ integer :sample, 10, 12
3
+ end
@@ -0,0 +1,57 @@
1
+ require 'spec_helper'
2
+
3
+ module SimpleEtl
4
+ module Source
5
+ describe BaseContext do
6
+ subject { BaseContext.new }
7
+
8
+ describe '#field' do
9
+ it 'should append the field definition in the fields list' do
10
+ subject.field :name
11
+ subject.fields.should =~ [{:name => :name, :required => false, :type => :object}]
12
+ end
13
+
14
+ it 'should raise error if type is present and unknown' do
15
+ expect{
16
+ subject.field :name, :type => 'pippo'
17
+ }.to raise_error FieldArgumentError
18
+ end
19
+ end
20
+
21
+ describe 'field helpers' do
22
+ it 'should have an helper for each caster' do
23
+ subject.string :name
24
+ subject.fields.should =~ [{:name => :name, :type => :string, :required => false}]
25
+ end
26
+
27
+ it 'should have a required helper for each caster' do
28
+ subject.required_string :name
29
+ subject.fields.should =~ [{:name => :name, :type => :string, :required => true}]
30
+ end
31
+ end
32
+
33
+ describe 'transform' do
34
+ it 'should append to transformations' do
35
+ subject.field :name
36
+ expect {
37
+ subject.transform :name do; end
38
+ }.to change(subject.transformations, :size).by 1
39
+ end
40
+
41
+ it 'should raise error if field is not specified' do
42
+ expect {
43
+ subject.transform :name do; end
44
+ }.to raise_error FieldNotFoundError
45
+ end
46
+ end
47
+
48
+ describe 'generate' do
49
+ it 'should append to generators' do
50
+ expect {
51
+ subject.generate :name do; end
52
+ }.to change(subject.generators, :size).by 1
53
+ end
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,108 @@
1
+ require 'spec_helper'
2
+
3
+ module SimpleEtl
4
+ module Source
5
+ describe Base do
6
+ subject { Base.new BaseContext.new }
7
+
8
+ describe '#parse_field' do
9
+ before do
10
+ subject.stub :fetch_field_from_row do |row, field|
11
+ row
12
+ end
13
+ end
14
+
15
+ it 'should socks at brush' do
16
+ expect {
17
+ subject.parse_field 'ITM', { :name => 'IT', :type => :object }, Row.new
18
+ }.to_not raise_error
19
+ end
20
+
21
+ context 'when there is a transformation' do
22
+ it 'should apply it' do
23
+ subject.context.field :foo
24
+ subject.context.transform :foo do |s|
25
+ "pippo"
26
+ end
27
+ subject.parse_field('ITM', { :name => :foo, :type => :object }, Row.new).should == "pippo"
28
+ end
29
+
30
+ it 'should set the context to the current row object' do
31
+ context = subject.context
32
+ context.field :foo
33
+ context.transform(:foo) { |s| self.class.should == Row }
34
+ subject.parse_field 'ITM', { :name => :foo, :type => :object }, Row.new
35
+ end
36
+
37
+ it 'passes the field value as argument' do
38
+ subject.stub(:fetch_field_from_row) { |row, field| 'pippo' }
39
+ subject.context.field :foo
40
+ subject.context.transform(:foo) { |s| s.should == 'pippo' }
41
+ subject.parse_field 'ITM', { :name => :foo, :type => :object }, Row.new
42
+ end
43
+ end
44
+ end
45
+
46
+ describe '#generate_field' do
47
+ let(:r) { Row.new :foo => 'w' }
48
+
49
+ it 'should set the context to the current row' do
50
+ block = lambda { self.class.should == Row }
51
+ subject.generate_field({ :name => :mio, :block => block }, r)
52
+ end
53
+
54
+ it 'should socks at brush' do
55
+ subject.generate_field({ :name => :mio, :block => lambda { foo } }, r).should == r.foo
56
+ end
57
+
58
+ it 'can raise ParseError without specifing path' do
59
+ block = lambda { raise ParseError }
60
+ expect {
61
+ subject.generate_field({ :name => :mio, :block => block }, r)
62
+ }.to raise_error ParseError
63
+ end
64
+ end
65
+
66
+ describe '#parse_row' do
67
+ before do
68
+ subject.stub :fetch_field_from_row do |row, field|
69
+ row
70
+ end
71
+ end
72
+
73
+ it 'should return a ParseResult object' do
74
+ subject.parse_row('').should be_kind_of ParseResult
75
+ end
76
+
77
+ it 'should insert attributes in row' do
78
+ subject.context.field :sample
79
+ subject.parse_row('ITM').rows.first.sample.should == 'ITM'
80
+ end
81
+
82
+ context 'when row is not valid' do
83
+ it 'should not execute generators' do
84
+ subject.stub :fetch_field_from_row do |row, field|
85
+ raise ParseError
86
+ end
87
+ subject.context.field :sample
88
+ subject.context.generate :foo do |s|
89
+ "pippo"
90
+ end
91
+ subject.parse_row('ITM').public_methods.should_not include :foo
92
+ end
93
+ end
94
+
95
+ context 'when row is valid' do
96
+ it 'launches the generator giving the row instance' do
97
+ subject.context.field :sample
98
+ # it will raise error if s not have sample
99
+ subject.context.generate :foo do
100
+ sample
101
+ end
102
+ expect { subject.parse_row('ITM') }.to_not raise_error
103
+ end
104
+ end
105
+ end
106
+ end
107
+ end
108
+ end
@@ -0,0 +1,67 @@
1
+ require 'spec_helper'
2
+
3
+ module SimpleEtl
4
+ module Source
5
+ describe FieldCaster do
6
+ subject { FieldCaster }
7
+
8
+ describe '::parse_integer' do
9
+ it 'returns nil if input is blank' do
10
+ subject.parse_integer(nil).should be_nil
11
+ subject.parse_integer('').should be_nil
12
+ end
13
+
14
+ it 'returns a number if input has only digits' do
15
+ subject.parse_integer('43').should == 43
16
+ subject.parse_integer('00048').should == 48
17
+ end
18
+
19
+ it 'automatically strips spaces' do
20
+ subject.parse_integer(' 043 ').should == 43
21
+ end
22
+
23
+ it 'returns error with chars' do
24
+ expect {
25
+ subject.parse_integer 'a'
26
+ }.to raise_error CastError
27
+ end
28
+
29
+ it 'returns error with commas' do
30
+ expect {
31
+ subject.parse_integer '1.2'
32
+ }.to raise_error CastError
33
+ end
34
+ end
35
+
36
+ describe '::parse_float' do
37
+ it 'returns nil if input is blank' do
38
+ subject.parse_float(nil).should be_nil
39
+ subject.parse_float('').should be_nil
40
+ end
41
+
42
+ it 'returns a number if input has only digits' do
43
+ subject.parse_float('43').should == 43.0
44
+ subject.parse_float('00048').should == 48.0
45
+ end
46
+
47
+ it 'returns a number if input has also ONE separator' do
48
+ subject.parse_float('43.1').should == 43.1
49
+ subject.parse_float('43,1').should == 43.1
50
+ expect {
51
+ subject.parse_float '43.2.1'
52
+ }.to raise_error CastError
53
+ end
54
+
55
+ it 'automatically strips spaces' do
56
+ subject.parse_integer(' 043 ').should == 43
57
+ end
58
+
59
+ it 'returns error in every other situation' do
60
+ expect {
61
+ subject.parse_integer 'a'
62
+ }.to raise_error CastError
63
+ end
64
+ end
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,48 @@
1
+ require 'spec_helper'
2
+
3
+ module SimpleEtl
4
+ module Source
5
+ module FixedWidth
6
+ describe Context do
7
+ subject { FixedWidth::Parser.new.context }
8
+
9
+ describe '#field' do
10
+ it 'should append the field definition in the fields list' do
11
+ subject.field :name, 10, 20
12
+ subject.fields.should =~ [{:name => :name, :start => 10, :length => 20, :required => false, :type => :object}]
13
+ end
14
+
15
+ it 'should raise error if start arg is missing' do
16
+ expect{
17
+ subject.field :name, nil, 10
18
+ }.to raise_error FieldArgumentError
19
+ end
20
+
21
+ it 'should raise error if length arg is missing' do
22
+ expect{
23
+ subject.field :name, 10, nil
24
+ }.to raise_error FieldArgumentError
25
+ end
26
+
27
+ it 'should raise error if start is not an integer' do
28
+ expect {
29
+ subject.field :name, 'pippo', 20
30
+ }.to raise_error FieldArgumentError
31
+ end
32
+
33
+ it 'should raise error if length is not an integer' do
34
+ expect {
35
+ subject.field :name, 5, 'pippo'
36
+ }.to raise_error FieldArgumentError
37
+ end
38
+
39
+ it 'should accept the special :eol length' do
40
+ expect {
41
+ subject.field :name, 5, :eol
42
+ }.to_not raise_error
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,25 @@
1
+ require 'spec_helper'
2
+
3
+ module SimpleEtl
4
+ module Source
5
+ module FixedWidth
6
+ describe Parser do
7
+ describe '::new' do
8
+ it 'should execute the block in the context' do
9
+ FixedWidth::Parser.new { self.class.should == FixedWidth::Context }
10
+ end
11
+ end
12
+
13
+ describe '#fetch_field_from_row' do
14
+ it 'should use field attributes to fetch a field' do
15
+ subject.fetch_field_from_row('ITM', { :start => 0, :length => 2 }).should == 'IT'
16
+ end
17
+
18
+ it 'should fetch till the end of line if length is :eol' do
19
+ subject.fetch_field_from_row('ITM', { :start => 1, :length => :eol }).should == 'TM'
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,18 @@
1
+ require 'spec_helper'
2
+
3
+ module SimpleEtl
4
+ module Source
5
+ describe ParseResult do
6
+ describe '#valid?' do
7
+ it 'is true if there are no errors' do
8
+ subject.should be_valid
9
+ end
10
+
11
+ it 'is false if there are errors' do
12
+ subject.append_error 0, 'm', Exception.new
13
+ subject.should_not be_valid
14
+ end
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,12 @@
1
+ require 'spec_helper'
2
+
3
+ module SimpleEtl
4
+ module Source
5
+ describe Row do
6
+ it 'should use reflection over its attributes' do
7
+ r = Row.new :name => 'w'
8
+ r.name.should == 'w'
9
+ end
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,33 @@
1
+ require 'spec_helper'
2
+
3
+ module SimpleEtl
4
+ describe Source do
5
+ describe '::define' do
6
+ it 'should return a new template' do
7
+ tpl = SimpleEtl::Source.define :fixed_width do
8
+ integer :name, 10, 12
9
+ end
10
+ tpl.context.fields.size.should == 1
11
+ end
12
+
13
+ it 'should raise error if format not exist' do
14
+ expect {
15
+ SimpleEtl::Source.define :piipo do; end
16
+ }.to raise_error
17
+ end
18
+ end
19
+
20
+ describe '::load' do
21
+ it 'should raise an error if file not exist' do
22
+ expect {
23
+ SimpleEtl::Source.load 'pappopappo'
24
+ }.to raise_error
25
+ end
26
+
27
+ it 'should load correctly' do
28
+ file = File.join FIXTURES_PATH, "sample.stl"
29
+ SimpleEtl::Source.load(file).should be_kind_of SimpleEtl::Source::Base
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,12 @@
1
+ # Load the Sinatra app
2
+ require File.dirname(__FILE__) + '/../lib/simple_etl'
3
+ require 'rspec'
4
+
5
+ Dir[File.expand_path(File.join(File.dirname(__FILE__),'support','**','*.rb'))].each {|f| require f}
6
+
7
+ FIXTURES_PATH = File.expand_path File.join File.dirname(__FILE__), "fixtures"
8
+
9
+ RSpec.configure do |conf|
10
+ conf.before :suite do
11
+ end
12
+ end
metadata ADDED
@@ -0,0 +1,172 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: simple_etl
3
+ version: !ruby/object:Gem::Version
4
+ hash: 29
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 1
10
+ version: 0.0.1
11
+ platform: ruby
12
+ authors:
13
+ - Nicola Racco
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2012-07-03 00:00:00 Z
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: rake
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
25
+ requirements:
26
+ - - ">="
27
+ - !ruby/object:Gem::Version
28
+ hash: 3
29
+ segments:
30
+ - 0
31
+ version: "0"
32
+ type: :development
33
+ version_requirements: *id001
34
+ - !ruby/object:Gem::Dependency
35
+ name: guard
36
+ prerelease: false
37
+ requirement: &id002 !ruby/object:Gem::Requirement
38
+ none: false
39
+ requirements:
40
+ - - ">="
41
+ - !ruby/object:Gem::Version
42
+ hash: 3
43
+ segments:
44
+ - 0
45
+ version: "0"
46
+ type: :development
47
+ version_requirements: *id002
48
+ - !ruby/object:Gem::Dependency
49
+ name: growl
50
+ prerelease: false
51
+ requirement: &id003 !ruby/object:Gem::Requirement
52
+ none: false
53
+ requirements:
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ hash: 3
57
+ segments:
58
+ - 0
59
+ version: "0"
60
+ type: :development
61
+ version_requirements: *id003
62
+ - !ruby/object:Gem::Dependency
63
+ name: guard-rspec
64
+ prerelease: false
65
+ requirement: &id004 !ruby/object:Gem::Requirement
66
+ none: false
67
+ requirements:
68
+ - - ">="
69
+ - !ruby/object:Gem::Version
70
+ hash: 3
71
+ segments:
72
+ - 0
73
+ version: "0"
74
+ type: :development
75
+ version_requirements: *id004
76
+ - !ruby/object:Gem::Dependency
77
+ name: rspec
78
+ prerelease: false
79
+ requirement: &id005 !ruby/object:Gem::Requirement
80
+ none: false
81
+ requirements:
82
+ - - ">="
83
+ - !ruby/object:Gem::Version
84
+ hash: 3
85
+ segments:
86
+ - 0
87
+ version: "0"
88
+ type: :development
89
+ version_requirements: *id005
90
+ description: An easy-to-use toolkit to help you with ETL (Extract Transform Load) operations. Simple ETL 'would be' (:D) framework-agnostic and easy to use.
91
+ email:
92
+ - nicola@nicolaracco.com
93
+ executables: []
94
+
95
+ extensions: []
96
+
97
+ extra_rdoc_files: []
98
+
99
+ files:
100
+ - .gitignore
101
+ - .rspec
102
+ - Gemfile
103
+ - Guardfile
104
+ - README.md
105
+ - Rakefile
106
+ - lib/simple_etl.rb
107
+ - lib/simple_etl/source.rb
108
+ - lib/simple_etl/source/base.rb
109
+ - lib/simple_etl/source/base_context.rb
110
+ - lib/simple_etl/source/errorable.rb
111
+ - lib/simple_etl/source/exceptions.rb
112
+ - lib/simple_etl/source/field_caster.rb
113
+ - lib/simple_etl/source/fixed_width/context.rb
114
+ - lib/simple_etl/source/fixed_width/parser.rb
115
+ - lib/simple_etl/source/parse_result.rb
116
+ - lib/simple_etl/source/row.rb
117
+ - lib/simple_etl/version.rb
118
+ - simple_etl.gemspec
119
+ - spec/fixtures/sample.stl
120
+ - spec/lib/simple_etl/source/base_context_spec.rb
121
+ - spec/lib/simple_etl/source/base_spec.rb
122
+ - spec/lib/simple_etl/source/field_caster_spec.rb
123
+ - spec/lib/simple_etl/source/fixed_width/context_spec.rb
124
+ - spec/lib/simple_etl/source/fixed_width/parser_spec.rb
125
+ - spec/lib/simple_etl/source/parse_result_spec.rb
126
+ - spec/lib/simple_etl/source/row_spec.rb
127
+ - spec/lib/simple_etl/source_spec.rb
128
+ - spec/spec_helper.rb
129
+ homepage: ""
130
+ licenses: []
131
+
132
+ post_install_message:
133
+ rdoc_options: []
134
+
135
+ require_paths:
136
+ - lib
137
+ required_ruby_version: !ruby/object:Gem::Requirement
138
+ none: false
139
+ requirements:
140
+ - - ">="
141
+ - !ruby/object:Gem::Version
142
+ hash: 3
143
+ segments:
144
+ - 0
145
+ version: "0"
146
+ required_rubygems_version: !ruby/object:Gem::Requirement
147
+ none: false
148
+ requirements:
149
+ - - ">="
150
+ - !ruby/object:Gem::Version
151
+ hash: 3
152
+ segments:
153
+ - 0
154
+ version: "0"
155
+ requirements: []
156
+
157
+ rubyforge_project: simple_etl
158
+ rubygems_version: 1.8.23
159
+ signing_key:
160
+ specification_version: 3
161
+ summary: An easy-to-use toolkit to help you with ETL (Extract Transform Load) operations
162
+ test_files:
163
+ - spec/fixtures/sample.stl
164
+ - spec/lib/simple_etl/source/base_context_spec.rb
165
+ - spec/lib/simple_etl/source/base_spec.rb
166
+ - spec/lib/simple_etl/source/field_caster_spec.rb
167
+ - spec/lib/simple_etl/source/fixed_width/context_spec.rb
168
+ - spec/lib/simple_etl/source/fixed_width/parser_spec.rb
169
+ - spec/lib/simple_etl/source/parse_result_spec.rb
170
+ - spec/lib/simple_etl/source/row_spec.rb
171
+ - spec/lib/simple_etl/source_spec.rb
172
+ - spec/spec_helper.rb