simple_etl 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,20 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ coverage
6
+ InstalledFiles
7
+ lib/bundler/man
8
+ pkg
9
+ rdoc
10
+ spec/reports
11
+ test/tmp
12
+ test/version_tmp
13
+ tmp
14
+
15
+ # YARD artifacts
16
+ .yardoc
17
+ _yardoc
18
+ doc/
19
+ Gemfile.lock
20
+ pkg/*
data/.rspec ADDED
@@ -0,0 +1 @@
1
+ --colour
data/Gemfile ADDED
@@ -0,0 +1,14 @@
1
+ source "http://rubygems.org"
2
+
3
+ gemspec
4
+
5
+ group :development do
6
+ gem 'rake'
7
+ gem 'guard'
8
+ gem 'growl'
9
+ gem 'guard-rspec'
10
+ end
11
+
12
+ group :development, :test do
13
+ gem 'rspec'
14
+ end
data/Guardfile ADDED
@@ -0,0 +1,24 @@
1
+ # A sample Guardfile
2
+ # More info at https://github.com/guard/guard#readme
3
+
4
+ guard 'rspec', :version => 2 do
5
+ watch(%r{^spec/.+_spec\.rb$})
6
+ watch(%r{^lib/(.+)\.rb$}) { |m| "spec/lib/#{m[1]}_spec.rb" }
7
+ watch('spec/spec_helper.rb') { "spec" }
8
+
9
+ # Rails example
10
+ watch(%r{^app/(.+)\.rb$}) { |m| "spec/#{m[1]}_spec.rb" }
11
+ watch(%r{^app/(.*)(\.erb|\.haml)$}) { |m| "spec/#{m[1]}#{m[2]}_spec.rb" }
12
+ watch(%r{^app/controllers/(.+)_(controller)\.rb$}) { |m| ["spec/routing/#{m[1]}_routing_spec.rb", "spec/#{m[2]}s/#{m[1]}_#{m[2]}_spec.rb", "spec/acceptance/#{m[1]}_spec.rb"] }
13
+ watch(%r{^spec/support/(.+)\.rb$}) { "spec" }
14
+ watch('config/routes.rb') { "spec/routing" }
15
+ watch('app/controllers/application_controller.rb') { "spec/controllers" }
16
+
17
+ # Capybara request specs
18
+ watch(%r{^app/views/(.+)/.*\.(erb|haml)$}) { |m| "spec/requests/#{m[1]}_spec.rb" }
19
+
20
+ # Turnip features and steps
21
+ watch(%r{^spec/acceptance/(.+)\.feature$})
22
+ watch(%r{^spec/acceptance/steps/(.+)_steps\.rb$}) { |m| Dir[File.join("**/#{m[1]}.feature")][0] || 'spec/acceptance' }
23
+ end
24
+
data/README.md ADDED
@@ -0,0 +1,117 @@
1
+ simple_etl
2
+ ==========
3
+
4
+ An easy-to-use toolkit to help you with ETL (Extract Transform Load) operations.
5
+
6
+ Simple ETL 'would be' (:D) framework-agnostic and easy to use.
7
+
8
+
9
+ ## Source
10
+
11
+ Source namespace is responsible of input files parsing.
12
+
13
+ First of all you have to define a "source template" inside a definition file (for example _my_template.stl_):
14
+
15
+ ```ruby
16
+ define :format_name do
17
+ field :name
18
+ field :surname
19
+ end
20
+ ```
21
+
22
+ Then you will load the template with the following code:
23
+
24
+ ```ruby
25
+ my_template = SimpleEtl::Source.load './etl/my_template.stl'
26
+ ```
27
+
28
+ At this point you can parse a source and process the result as with the following code:
29
+
30
+ ```ruby
31
+ my_template.parse '....', :type => :inline # load data inline
32
+ result = my_template.parse 'source.dat' # load from file
33
+
34
+ if result.valid?
35
+ result.rows.each do |row|
36
+ puts "|\t#{row.name}\t|\t#{row.surname}\t|"
37
+ end
38
+ puts "Parse Completed!"
39
+ else
40
+ result.errors.each do |error|
41
+ puts "Error while parsing line #{error.row_index}: #{error.message}"
42
+ end
43
+ end
44
+ ```
45
+
46
+ As you can see the result is valid if there are no errors.
47
+
48
+ The rows array contains all the parsed rows. Each row contains the parsed attributes as accessors.
49
+
50
+ The errors array contains all the generated errors. Each error is an object with 'row_index', 'message' and 'exception' properties.
51
+
52
+ ## Structure of the template definition
53
+
54
+ A template definition is composed by three layers:
55
+ - raw fields
56
+ - transformations
57
+ - generators
58
+
59
+ ### Fields
60
+
61
+ ```ruby
62
+ field :name
63
+ field :surname, :type => :string, :required => true
64
+ ```
65
+
66
+ By default type is 'object'. It means it's not converted in any format. Other possible types are:
67
+
68
+ - *string*: field is stripped by extra spaces;
69
+
70
+ - *integer*: field is stripped. If the input value is nil or empty, nil is returned; it's converted in integer if the value contains numbers; a CastError is raised otherwise;
71
+
72
+ - *float*: field is stripped. If the input value is nil or empty, nil is returned; it's converted in float if the value contains numbers; a CastError is raised otherwise;
73
+
74
+ - *boolean* field is stripped. If the input value is nil or empty, nil is returned; it's converted in boolean if the input value is true,false,1,0; a CastError is raised otherwise;
75
+
76
+ The template definition will provide you an helper for each defined type. So you can write:
77
+
78
+ ```ruby
79
+ string :name
80
+ integer :age
81
+ ```
82
+
83
+ For each helper, an additional 'required' helper will also be available:
84
+
85
+ ```ruby
86
+ required_string :name
87
+ required_integer :age
88
+ ```
89
+
90
+ Remember: *every format plugin will define its own field syntax, so remember to read the [Wiki](https://github.com/nicolaracco/simple_etl/wiki)*
91
+
92
+
93
+ ### Transformers and generators
94
+
95
+ They are functions that help you manipulate the parsed raw data:
96
+
97
+ ```ruby
98
+ transform :name { |name| name.downcase } # => name field is transformed in downcase
99
+
100
+ # a full_name field will be present in the row
101
+ generate :full_name do |row|
102
+ "#{row.name} #{row.surname}"
103
+ end
104
+
105
+ generate :company do |row|
106
+ if cmp = Company.find(row.company_id)
107
+ cmp
108
+ else
109
+ raise ParseError.new "Cannot find a company with id #{row.company_id}"
110
+ end
111
+ end
112
+ ```
113
+
114
+ A transformer is a code block that transform a particular value. It's executed as soon as the input value is parsed (if it's valid).
115
+
116
+ A generator is a code block that generates a new property for the current row.
117
+ All the generators are executed when the entire row as been read and transformed.
data/Rakefile ADDED
@@ -0,0 +1,11 @@
1
+ require 'bundler/gem_tasks'
2
+ require 'rspec/core/rake_task'
3
+
4
+ desc "Run those specs"
5
+ task :spec do
6
+ RSpec::Core::RakeTask.new(:spec) do |t|
7
+ t.rspec_opts = %w{--colour --format progress}
8
+ t.pattern = 'spec/**/*_spec.rb'
9
+ t.rspec_path = 'bundle exec rspec'
10
+ end
11
+ end
@@ -0,0 +1,72 @@
1
+ module SimpleEtl
2
+ module Source
3
+ class Base
4
+ attr_reader :errors
5
+
6
+ attr_reader :context
7
+
8
+ def initialize context, &block
9
+ @errors = []
10
+ @context = context
11
+ context.send :instance_eval, &block if block
12
+ end
13
+
14
+ def fetch_field_from_row row, field
15
+ raise 'Abstract Method'
16
+ end
17
+
18
+ def parse_row row, args = {}
19
+ row_index = args[:row_index]
20
+ result = args[:result] || ParseResult.new
21
+ row_obj = Row.new
22
+ context.fields.each do |field|
23
+ begin
24
+ row_obj.attributes[field[:name]] = parse_field row, field, row_obj
25
+ rescue SimpleEtl::Source::ParseError
26
+ row_info = row_index && "row #{row_index}" || ''
27
+ result.append_error row_index, "Error parsing #{row_info}, column #{field[:name]}: #{$!.message}", $!
28
+ end
29
+ end
30
+ if result.valid?
31
+ context.generators.each do |field|
32
+ begin
33
+ row_obj.attributes[field[:name]] = generate_field field, row_obj
34
+ rescue SimpleEtl::Source::ParseError
35
+ row_info = row_index && "for row #{row_index}" || ''
36
+ result.append_error row_index, "Error generating #{field[:name]} #{row_info}: #{$!.message}", $!
37
+ end
38
+ end
39
+ end
40
+ result.rows << row_obj
41
+ result
42
+ end
43
+
44
+ def parse src, args = {}
45
+ result = args[:result] || ParseResult.new
46
+ if args[:type] == :inline
47
+ lines = src.lines.map &:chomp
48
+ else
49
+ lines = File.readlines(src).map &:chomp
50
+ end
51
+ lines.each_with_index do |row, index|
52
+ parse_row row, :row_index => index, :result => result
53
+ end
54
+ result
55
+ end
56
+
57
+ def parse_field row, field, row_obj
58
+ value = FieldCaster.send "parse_#{field[:type]}", fetch_field_from_row(row, field)
59
+ raise FieldRequiredError if field[:required] &&
60
+ (value.nil? || value == '')
61
+ if transformer = context.transformations[field[:name]]
62
+ value = row_obj.instance_exec value, &transformer
63
+ end
64
+ value
65
+ end
66
+
67
+ def generate_field field, row_obj
68
+ row_obj.instance_exec &field[:block]
69
+ end
70
+ end
71
+ end
72
+ end
@@ -0,0 +1,47 @@
1
+ module SimpleEtl
2
+ module Source
3
+ class BaseContext
4
+ attr_reader :fields
5
+ attr_reader :transformations
6
+ attr_reader :generators
7
+
8
+ def initialize
9
+ @fields = []
10
+ @transformations = {}
11
+ @generators = []
12
+ end
13
+
14
+ def field name, args = {}
15
+ args = {:required => false, :type => :object}.merge args
16
+ unless FieldCaster.respond_to? "parse_#{args[:type]}"
17
+ raise FieldArgumentError.new "#{name}:type (#{args[:type]}) is unknown"
18
+ end
19
+ fields << { :name => name }.merge(args)
20
+ end
21
+
22
+ def transform field, &block
23
+ field = field.to_sym
24
+ raise FieldNotFoundError.new(field) unless
25
+ fields.detect { |d| d[:name] == field }
26
+ transformations[field.to_sym] = block
27
+ end
28
+
29
+ def generate name, args = {}, &block
30
+ generators << args.merge(:name => name, :block => block)
31
+ end
32
+
33
+ def method_missing name, *params, &block
34
+ md = name.to_s.match /^(required_)?(\w+)$/
35
+ type = md && md[2].to_sym
36
+ if type && FieldCaster.respond_to?("parse_#{type}")
37
+ params << {} unless params.last.is_a? Hash
38
+ params.last[:type] = type
39
+ params.last[:required] = true if md[1]
40
+ field *params
41
+ else
42
+ super
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,7 @@
1
+ module SimpleEtl
2
+ module Source
3
+ module Errorable
4
+
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,11 @@
1
+ module SimpleEtl
2
+ module Source
3
+ class Error < Exception; end
4
+ class ParseError < Error; end
5
+
6
+ class FieldNotFoundError < Error; end
7
+ class FieldArgumentError < ParseError; end
8
+ class FieldRequiredError < ParseError; end
9
+ class CastError < ParseError; end
10
+ end
11
+ end
@@ -0,0 +1,51 @@
1
+ module SimpleEtl
2
+ module Source
3
+ module FieldCaster
4
+ extend self
5
+
6
+ def parse_object o; o; end
7
+
8
+ def parse_string o
9
+ o && o.strip
10
+ end
11
+
12
+ def parse_boolean o
13
+ if o.nil? || o =~ /^\s*$/
14
+ nil
15
+ else
16
+ if %w(true 1).include? o.strip
17
+ true
18
+ elsif %w(false 0).include? o.strip
19
+ false
20
+ else
21
+ raise(CastError.new "Cannot cast '#{o}' to 'boolean'")
22
+ end
23
+ end
24
+ end
25
+
26
+ def parse_integer o
27
+ if o.nil? || o =~ /^\s*$/
28
+ nil
29
+ else
30
+ if o =~ /^\s*\d+\s*$/
31
+ o.to_i
32
+ else
33
+ raise(CastError.new "Cannot cast '#{o}' to 'integer'")
34
+ end
35
+ end
36
+ end
37
+
38
+ def parse_float o
39
+ if o.nil? || o =~ /^\s*$/
40
+ nil
41
+ else
42
+ if o =~ /^\s*\d*([\.\,]\d+)?\s*$/
43
+ o.gsub(/\,/, '.').to_f
44
+ else
45
+ raise(CastError.new "Cannot cast '#{o}' to 'float'")
46
+ end
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,19 @@
1
+ module SimpleEtl
2
+ module Source
3
+ module FixedWidth
4
+ class Context < SimpleEtl::Source::BaseContext
5
+ def field name, start, length, args = {}
6
+ raise(FieldArgumentError.new "#{name}::start required") unless start
7
+ raise(FieldArgumentError.new "#{name}::length required") unless length
8
+ start = Integer(start) rescue
9
+ raise(FieldArgumentError.new "#{name}::start (#{start}) is not integer")
10
+ if length != :eol
11
+ length = Integer(length) rescue
12
+ raise(FieldArgumentError.new "#{name}::length (#{length}) is not integer")
13
+ end
14
+ super name, { :start => start, :length => length }.merge(args)
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,19 @@
1
+ module SimpleEtl
2
+ module Source
3
+ module FixedWidth
4
+ class Parser < SimpleEtl::Source::Base
5
+ def initialize &block
6
+ super Context.new, &block
7
+ end
8
+
9
+ def fetch_field_from_row row, field
10
+ length = field[:length]
11
+ length = row.length - field[:start] if length == :eol
12
+ row[field[:start], length]
13
+ end
14
+ end
15
+ end
16
+
17
+ formats[:fixed_width] = FixedWidth::Parser
18
+ end
19
+ end
@@ -0,0 +1,28 @@
1
+ require 'ostruct'
2
+
3
+ module SimpleEtl
4
+ module Source
5
+ class ParseResult
6
+ attr_reader :errors, :rows
7
+
8
+ def initialize
9
+ @errors = []
10
+ @rows = []
11
+ end
12
+
13
+ def valid?; @errors.empty?; end
14
+
15
+ def append_row attributes
16
+ @rows << Row.new(attributes)
17
+ end
18
+
19
+ def append_error row_index, message, e
20
+ @errors << OpenStruct.new({
21
+ :row_index => row_index,
22
+ :message => message,
23
+ :exception => e
24
+ })
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,21 @@
1
+ module SimpleEtl
2
+ module Source
3
+ class Row
4
+ attr_reader :attributes
5
+
6
+ def initialize attributes = {}
7
+ @attributes = attributes
8
+ end
9
+
10
+ def method_missing name, *args, &block
11
+ md = name.to_s.match /^(\w+)(=)?$/
12
+ if md && attributes.has_key?(md[1].to_sym)
13
+ field = md[1].to_sym
14
+ md[2] && (attributes[field] = args.first) || attributes[field]
15
+ else
16
+ super
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,29 @@
1
+ module SimpleEtl
2
+ module Source
3
+ extend self
4
+
5
+ @@formats = {}
6
+
7
+ def formats; @@formats; end
8
+
9
+ def define format, &block
10
+ format_class = formats[format]
11
+ raise "Format #{format} not found" unless format_class
12
+ format_class.new &block
13
+ end
14
+
15
+ def load file
16
+ raise "Cannot find file" unless File.exist? file
17
+ module_eval File.read file
18
+ end
19
+ end
20
+ end
21
+
22
+ dir = File.expand_path File.dirname __FILE__
23
+ %w(exceptions field_caster row parse_result base_context base).each do |file|
24
+ require File.join dir, "source/#{file}"
25
+ end
26
+
27
+ %w(context parser).each do |file|
28
+ require File.join dir, "source/fixed_width/#{file}"
29
+ end
@@ -0,0 +1,3 @@
1
+ module SimpleEtl
2
+ VERSION = "0.0.1"
3
+ end
data/lib/simple_etl.rb ADDED
@@ -0,0 +1,7 @@
1
+ require 'simple_etl/version'
2
+
3
+ require 'simple_etl/source'
4
+
5
+ module SimpleEtl
6
+ # Your code goes here...
7
+ end
@@ -0,0 +1,26 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "simple_etl/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "simple_etl"
7
+ s.version = SimpleEtl::VERSION
8
+ s.authors = ["Nicola Racco"]
9
+ s.email = ["nicola@nicolaracco.com"]
10
+ s.homepage = ""
11
+ s.summary = %q{An easy-to-use toolkit to help you with ETL (Extract Transform Load) operations}
12
+ s.description = %q{An easy-to-use toolkit to help you with ETL (Extract Transform Load) operations. Simple ETL 'would be' (:D) framework-agnostic and easy to use.}
13
+
14
+ s.rubyforge_project = "simple_etl"
15
+
16
+ s.files = `git ls-files`.split("\n")
17
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
18
+ #s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
19
+ s.require_paths = ["lib"]
20
+
21
+ s.add_development_dependency 'rake'
22
+ s.add_development_dependency 'guard'
23
+ s.add_development_dependency 'growl'
24
+ s.add_development_dependency 'guard-rspec'
25
+ s.add_development_dependency 'rspec'
26
+ end
@@ -0,0 +1,3 @@
1
+ define :fixed_width do
2
+ integer :sample, 10, 12
3
+ end
@@ -0,0 +1,57 @@
1
+ require 'spec_helper'
2
+
3
+ module SimpleEtl
4
+ module Source
5
+ describe BaseContext do
6
+ subject { BaseContext.new }
7
+
8
+ describe '#field' do
9
+ it 'should append the field definition in the fields list' do
10
+ subject.field :name
11
+ subject.fields.should =~ [{:name => :name, :required => false, :type => :object}]
12
+ end
13
+
14
+ it 'should raise error if type is present and unknown' do
15
+ expect{
16
+ subject.field :name, :type => 'pippo'
17
+ }.to raise_error FieldArgumentError
18
+ end
19
+ end
20
+
21
+ describe 'field helpers' do
22
+ it 'should have an helper for each caster' do
23
+ subject.string :name
24
+ subject.fields.should =~ [{:name => :name, :type => :string, :required => false}]
25
+ end
26
+
27
+ it 'should have a required helper for each caster' do
28
+ subject.required_string :name
29
+ subject.fields.should =~ [{:name => :name, :type => :string, :required => true}]
30
+ end
31
+ end
32
+
33
+ describe 'transform' do
34
+ it 'should append to transformations' do
35
+ subject.field :name
36
+ expect {
37
+ subject.transform :name do; end
38
+ }.to change(subject.transformations, :size).by 1
39
+ end
40
+
41
+ it 'should raise error if field is not specified' do
42
+ expect {
43
+ subject.transform :name do; end
44
+ }.to raise_error FieldNotFoundError
45
+ end
46
+ end
47
+
48
+ describe 'generate' do
49
+ it 'should append to generators' do
50
+ expect {
51
+ subject.generate :name do; end
52
+ }.to change(subject.generators, :size).by 1
53
+ end
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,108 @@
1
+ require 'spec_helper'
2
+
3
+ module SimpleEtl
4
+ module Source
5
+ describe Base do
6
+ subject { Base.new BaseContext.new }
7
+
8
+ describe '#parse_field' do
9
+ before do
10
+ subject.stub :fetch_field_from_row do |row, field|
11
+ row
12
+ end
13
+ end
14
+
15
+ it 'should socks at brush' do
16
+ expect {
17
+ subject.parse_field 'ITM', { :name => 'IT', :type => :object }, Row.new
18
+ }.to_not raise_error
19
+ end
20
+
21
+ context 'when there is a transformation' do
22
+ it 'should apply it' do
23
+ subject.context.field :foo
24
+ subject.context.transform :foo do |s|
25
+ "pippo"
26
+ end
27
+ subject.parse_field('ITM', { :name => :foo, :type => :object }, Row.new).should == "pippo"
28
+ end
29
+
30
+ it 'should set the context to the current row object' do
31
+ context = subject.context
32
+ context.field :foo
33
+ context.transform(:foo) { |s| self.class.should == Row }
34
+ subject.parse_field 'ITM', { :name => :foo, :type => :object }, Row.new
35
+ end
36
+
37
+ it 'passes the field value as argument' do
38
+ subject.stub(:fetch_field_from_row) { |row, field| 'pippo' }
39
+ subject.context.field :foo
40
+ subject.context.transform(:foo) { |s| s.should == 'pippo' }
41
+ subject.parse_field 'ITM', { :name => :foo, :type => :object }, Row.new
42
+ end
43
+ end
44
+ end
45
+
46
+ describe '#generate_field' do
47
+ let(:r) { Row.new :foo => 'w' }
48
+
49
+ it 'should set the context to the current row' do
50
+ block = lambda { self.class.should == Row }
51
+ subject.generate_field({ :name => :mio, :block => block }, r)
52
+ end
53
+
54
+ it 'should socks at brush' do
55
+ subject.generate_field({ :name => :mio, :block => lambda { foo } }, r).should == r.foo
56
+ end
57
+
58
+ it 'can raise ParseError without specifing path' do
59
+ block = lambda { raise ParseError }
60
+ expect {
61
+ subject.generate_field({ :name => :mio, :block => block }, r)
62
+ }.to raise_error ParseError
63
+ end
64
+ end
65
+
66
+ describe '#parse_row' do
67
+ before do
68
+ subject.stub :fetch_field_from_row do |row, field|
69
+ row
70
+ end
71
+ end
72
+
73
+ it 'should return a ParseResult object' do
74
+ subject.parse_row('').should be_kind_of ParseResult
75
+ end
76
+
77
+ it 'should insert attributes in row' do
78
+ subject.context.field :sample
79
+ subject.parse_row('ITM').rows.first.sample.should == 'ITM'
80
+ end
81
+
82
+ context 'when row is not valid' do
83
+ it 'should not execute generators' do
84
+ subject.stub :fetch_field_from_row do |row, field|
85
+ raise ParseError
86
+ end
87
+ subject.context.field :sample
88
+ subject.context.generate :foo do |s|
89
+ "pippo"
90
+ end
91
+ subject.parse_row('ITM').public_methods.should_not include :foo
92
+ end
93
+ end
94
+
95
+ context 'when row is valid' do
96
+ it 'launches the generator giving the row instance' do
97
+ subject.context.field :sample
98
+ # it will raise error if s not have sample
99
+ subject.context.generate :foo do
100
+ sample
101
+ end
102
+ expect { subject.parse_row('ITM') }.to_not raise_error
103
+ end
104
+ end
105
+ end
106
+ end
107
+ end
108
+ end
@@ -0,0 +1,67 @@
1
+ require 'spec_helper'
2
+
3
+ module SimpleEtl
4
+ module Source
5
+ describe FieldCaster do
6
+ subject { FieldCaster }
7
+
8
+ describe '::parse_integer' do
9
+ it 'returns nil if input is blank' do
10
+ subject.parse_integer(nil).should be_nil
11
+ subject.parse_integer('').should be_nil
12
+ end
13
+
14
+ it 'returns a number if input has only digits' do
15
+ subject.parse_integer('43').should == 43
16
+ subject.parse_integer('00048').should == 48
17
+ end
18
+
19
+ it 'automatically strips spaces' do
20
+ subject.parse_integer(' 043 ').should == 43
21
+ end
22
+
23
+ it 'returns error with chars' do
24
+ expect {
25
+ subject.parse_integer 'a'
26
+ }.to raise_error CastError
27
+ end
28
+
29
+ it 'returns error with commas' do
30
+ expect {
31
+ subject.parse_integer '1.2'
32
+ }.to raise_error CastError
33
+ end
34
+ end
35
+
36
+ describe '::parse_float' do
37
+ it 'returns nil if input is blank' do
38
+ subject.parse_float(nil).should be_nil
39
+ subject.parse_float('').should be_nil
40
+ end
41
+
42
+ it 'returns a number if input has only digits' do
43
+ subject.parse_float('43').should == 43.0
44
+ subject.parse_float('00048').should == 48.0
45
+ end
46
+
47
+ it 'returns a number if input has also ONE separator' do
48
+ subject.parse_float('43.1').should == 43.1
49
+ subject.parse_float('43,1').should == 43.1
50
+ expect {
51
+ subject.parse_float '43.2.1'
52
+ }.to raise_error CastError
53
+ end
54
+
55
+ it 'automatically strips spaces' do
56
+ subject.parse_integer(' 043 ').should == 43
57
+ end
58
+
59
+ it 'returns error in every other situation' do
60
+ expect {
61
+ subject.parse_integer 'a'
62
+ }.to raise_error CastError
63
+ end
64
+ end
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,48 @@
1
+ require 'spec_helper'
2
+
3
+ module SimpleEtl
4
+ module Source
5
+ module FixedWidth
6
+ describe Context do
7
+ subject { FixedWidth::Parser.new.context }
8
+
9
+ describe '#field' do
10
+ it 'should append the field definition in the fields list' do
11
+ subject.field :name, 10, 20
12
+ subject.fields.should =~ [{:name => :name, :start => 10, :length => 20, :required => false, :type => :object}]
13
+ end
14
+
15
+ it 'should raise error if start arg is missing' do
16
+ expect{
17
+ subject.field :name, nil, 10
18
+ }.to raise_error FieldArgumentError
19
+ end
20
+
21
+ it 'should raise error if length arg is missing' do
22
+ expect{
23
+ subject.field :name, 10, nil
24
+ }.to raise_error FieldArgumentError
25
+ end
26
+
27
+ it 'should raise error if start is not an integer' do
28
+ expect {
29
+ subject.field :name, 'pippo', 20
30
+ }.to raise_error FieldArgumentError
31
+ end
32
+
33
+ it 'should raise error if length is not an integer' do
34
+ expect {
35
+ subject.field :name, 5, 'pippo'
36
+ }.to raise_error FieldArgumentError
37
+ end
38
+
39
+ it 'should accept the special :eol length' do
40
+ expect {
41
+ subject.field :name, 5, :eol
42
+ }.to_not raise_error
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,25 @@
1
+ require 'spec_helper'
2
+
3
+ module SimpleEtl
4
+ module Source
5
+ module FixedWidth
6
+ describe Parser do
7
+ describe '::new' do
8
+ it 'should execute the block in the context' do
9
+ FixedWidth::Parser.new { self.class.should == FixedWidth::Context }
10
+ end
11
+ end
12
+
13
+ describe '#fetch_field_from_row' do
14
+ it 'should use field attributes to fetch a field' do
15
+ subject.fetch_field_from_row('ITM', { :start => 0, :length => 2 }).should == 'IT'
16
+ end
17
+
18
+ it 'should fetch till the end of line if length is :eol' do
19
+ subject.fetch_field_from_row('ITM', { :start => 1, :length => :eol }).should == 'TM'
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,18 @@
1
+ require 'spec_helper'
2
+
3
+ module SimpleEtl
4
+ module Source
5
+ describe ParseResult do
6
+ describe '#valid?' do
7
+ it 'is true if there are no errors' do
8
+ subject.should be_valid
9
+ end
10
+
11
+ it 'is false if there are errors' do
12
+ subject.append_error 0, 'm', Exception.new
13
+ subject.should_not be_valid
14
+ end
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,12 @@
1
+ require 'spec_helper'
2
+
3
+ module SimpleEtl
4
+ module Source
5
+ describe Row do
6
+ it 'should use reflection over its attributes' do
7
+ r = Row.new :name => 'w'
8
+ r.name.should == 'w'
9
+ end
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,33 @@
1
+ require 'spec_helper'
2
+
3
+ module SimpleEtl
4
+ describe Source do
5
+ describe '::define' do
6
+ it 'should return a new template' do
7
+ tpl = SimpleEtl::Source.define :fixed_width do
8
+ integer :name, 10, 12
9
+ end
10
+ tpl.context.fields.size.should == 1
11
+ end
12
+
13
+ it 'should raise error if format not exist' do
14
+ expect {
15
+ SimpleEtl::Source.define :piipo do; end
16
+ }.to raise_error
17
+ end
18
+ end
19
+
20
+ describe '::load' do
21
+ it 'should raise an error if file not exist' do
22
+ expect {
23
+ SimpleEtl::Source.load 'pappopappo'
24
+ }.to raise_error
25
+ end
26
+
27
+ it 'should load correctly' do
28
+ file = File.join FIXTURES_PATH, "sample.stl"
29
+ SimpleEtl::Source.load(file).should be_kind_of SimpleEtl::Source::Base
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,12 @@
1
+ # Load the Sinatra app
2
+ require File.dirname(__FILE__) + '/../lib/simple_etl'
3
+ require 'rspec'
4
+
5
+ Dir[File.expand_path(File.join(File.dirname(__FILE__),'support','**','*.rb'))].each {|f| require f}
6
+
7
+ FIXTURES_PATH = File.expand_path File.join File.dirname(__FILE__), "fixtures"
8
+
9
+ RSpec.configure do |conf|
10
+ conf.before :suite do
11
+ end
12
+ end
metadata ADDED
@@ -0,0 +1,172 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: simple_etl
3
+ version: !ruby/object:Gem::Version
4
+ hash: 29
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 1
10
+ version: 0.0.1
11
+ platform: ruby
12
+ authors:
13
+ - Nicola Racco
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2012-07-03 00:00:00 Z
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: rake
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
25
+ requirements:
26
+ - - ">="
27
+ - !ruby/object:Gem::Version
28
+ hash: 3
29
+ segments:
30
+ - 0
31
+ version: "0"
32
+ type: :development
33
+ version_requirements: *id001
34
+ - !ruby/object:Gem::Dependency
35
+ name: guard
36
+ prerelease: false
37
+ requirement: &id002 !ruby/object:Gem::Requirement
38
+ none: false
39
+ requirements:
40
+ - - ">="
41
+ - !ruby/object:Gem::Version
42
+ hash: 3
43
+ segments:
44
+ - 0
45
+ version: "0"
46
+ type: :development
47
+ version_requirements: *id002
48
+ - !ruby/object:Gem::Dependency
49
+ name: growl
50
+ prerelease: false
51
+ requirement: &id003 !ruby/object:Gem::Requirement
52
+ none: false
53
+ requirements:
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ hash: 3
57
+ segments:
58
+ - 0
59
+ version: "0"
60
+ type: :development
61
+ version_requirements: *id003
62
+ - !ruby/object:Gem::Dependency
63
+ name: guard-rspec
64
+ prerelease: false
65
+ requirement: &id004 !ruby/object:Gem::Requirement
66
+ none: false
67
+ requirements:
68
+ - - ">="
69
+ - !ruby/object:Gem::Version
70
+ hash: 3
71
+ segments:
72
+ - 0
73
+ version: "0"
74
+ type: :development
75
+ version_requirements: *id004
76
+ - !ruby/object:Gem::Dependency
77
+ name: rspec
78
+ prerelease: false
79
+ requirement: &id005 !ruby/object:Gem::Requirement
80
+ none: false
81
+ requirements:
82
+ - - ">="
83
+ - !ruby/object:Gem::Version
84
+ hash: 3
85
+ segments:
86
+ - 0
87
+ version: "0"
88
+ type: :development
89
+ version_requirements: *id005
90
+ description: An easy-to-use toolkit to help you with ETL (Extract Transform Load) operations. Simple ETL 'would be' (:D) framework-agnostic and easy to use.
91
+ email:
92
+ - nicola@nicolaracco.com
93
+ executables: []
94
+
95
+ extensions: []
96
+
97
+ extra_rdoc_files: []
98
+
99
+ files:
100
+ - .gitignore
101
+ - .rspec
102
+ - Gemfile
103
+ - Guardfile
104
+ - README.md
105
+ - Rakefile
106
+ - lib/simple_etl.rb
107
+ - lib/simple_etl/source.rb
108
+ - lib/simple_etl/source/base.rb
109
+ - lib/simple_etl/source/base_context.rb
110
+ - lib/simple_etl/source/errorable.rb
111
+ - lib/simple_etl/source/exceptions.rb
112
+ - lib/simple_etl/source/field_caster.rb
113
+ - lib/simple_etl/source/fixed_width/context.rb
114
+ - lib/simple_etl/source/fixed_width/parser.rb
115
+ - lib/simple_etl/source/parse_result.rb
116
+ - lib/simple_etl/source/row.rb
117
+ - lib/simple_etl/version.rb
118
+ - simple_etl.gemspec
119
+ - spec/fixtures/sample.stl
120
+ - spec/lib/simple_etl/source/base_context_spec.rb
121
+ - spec/lib/simple_etl/source/base_spec.rb
122
+ - spec/lib/simple_etl/source/field_caster_spec.rb
123
+ - spec/lib/simple_etl/source/fixed_width/context_spec.rb
124
+ - spec/lib/simple_etl/source/fixed_width/parser_spec.rb
125
+ - spec/lib/simple_etl/source/parse_result_spec.rb
126
+ - spec/lib/simple_etl/source/row_spec.rb
127
+ - spec/lib/simple_etl/source_spec.rb
128
+ - spec/spec_helper.rb
129
+ homepage: ""
130
+ licenses: []
131
+
132
+ post_install_message:
133
+ rdoc_options: []
134
+
135
+ require_paths:
136
+ - lib
137
+ required_ruby_version: !ruby/object:Gem::Requirement
138
+ none: false
139
+ requirements:
140
+ - - ">="
141
+ - !ruby/object:Gem::Version
142
+ hash: 3
143
+ segments:
144
+ - 0
145
+ version: "0"
146
+ required_rubygems_version: !ruby/object:Gem::Requirement
147
+ none: false
148
+ requirements:
149
+ - - ">="
150
+ - !ruby/object:Gem::Version
151
+ hash: 3
152
+ segments:
153
+ - 0
154
+ version: "0"
155
+ requirements: []
156
+
157
+ rubyforge_project: simple_etl
158
+ rubygems_version: 1.8.23
159
+ signing_key:
160
+ specification_version: 3
161
+ summary: An easy-to-use toolkit to help you with ETL (Extract Transform Load) operations
162
+ test_files:
163
+ - spec/fixtures/sample.stl
164
+ - spec/lib/simple_etl/source/base_context_spec.rb
165
+ - spec/lib/simple_etl/source/base_spec.rb
166
+ - spec/lib/simple_etl/source/field_caster_spec.rb
167
+ - spec/lib/simple_etl/source/fixed_width/context_spec.rb
168
+ - spec/lib/simple_etl/source/fixed_width/parser_spec.rb
169
+ - spec/lib/simple_etl/source/parse_result_spec.rb
170
+ - spec/lib/simple_etl/source/row_spec.rb
171
+ - spec/lib/simple_etl/source_spec.rb
172
+ - spec/spec_helper.rb