simple_etl 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +15 -2
- data/lib/simple_etl/source/base.rb +8 -6
- data/lib/simple_etl/source/base_context.rb +6 -0
- data/lib/simple_etl/source/fixed_width/parser.rb +4 -0
- data/lib/simple_etl/version.rb +1 -1
- data/simple_etl.gemspec +1 -1
- data/spec/lib/simple_etl/source/base_spec.rb +12 -0
- metadata +68 -94
data/README.md
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
|
1
|
+
Simple ETL
|
2
2
|
==========
|
3
3
|
|
4
4
|
An easy-to-use toolkit to help you with ETL (Extract Transform Load) operations.
|
@@ -114,4 +114,17 @@ They are functions that help you manipulate the parsed raw data:
|
|
114
114
|
A transformer is a code block that transform a particular value. It's executed as soon as the input value is parsed (if it's valid).
|
115
115
|
|
116
116
|
A generator is a code block that generates a new property for the current row.
|
117
|
-
All the generators are executed when the entire row as been read and transformed.
|
117
|
+
All the generators are executed when the entire row as been read and transformed.
|
118
|
+
|
119
|
+
### Skip rows
|
120
|
+
|
121
|
+
If you need to skip some rows before parsing the file you can use the helper 'skip_rows':
|
122
|
+
|
123
|
+
```ruby
|
124
|
+
define :format do
|
125
|
+
skip_rows 2
|
126
|
+
field :name
|
127
|
+
end
|
128
|
+
```
|
129
|
+
|
130
|
+
This will start the parsing from the third row.
|
@@ -41,15 +41,17 @@ module SimpleEtl
|
|
41
41
|
result
|
42
42
|
end
|
43
43
|
|
44
|
+
def read_rows src, args
|
45
|
+
raise 'Abstract Method'
|
46
|
+
end
|
47
|
+
|
44
48
|
def parse src, args = {}
|
45
49
|
result = args[:result] || ParseResult.new
|
46
|
-
|
47
|
-
lines = src.lines.map &:chomp
|
48
|
-
else
|
49
|
-
lines = File.readlines(src).map &:chomp
|
50
|
-
end
|
50
|
+
lines = read_rows src, args
|
51
51
|
lines.each_with_index do |row, index|
|
52
|
-
|
52
|
+
if index >= context.row_count_to_skip
|
53
|
+
parse_row row, :row_index => index, :result => result
|
54
|
+
end
|
53
55
|
end
|
54
56
|
result
|
55
57
|
end
|
@@ -4,11 +4,13 @@ module SimpleEtl
|
|
4
4
|
attr_reader :fields
|
5
5
|
attr_reader :transformations
|
6
6
|
attr_reader :generators
|
7
|
+
attr_reader :row_count_to_skip
|
7
8
|
|
8
9
|
def initialize
|
9
10
|
@fields = []
|
10
11
|
@transformations = {}
|
11
12
|
@generators = []
|
13
|
+
@row_count_to_skip = 0
|
12
14
|
end
|
13
15
|
|
14
16
|
def field name, args = {}
|
@@ -30,6 +32,10 @@ module SimpleEtl
|
|
30
32
|
generators << args.merge(:name => name, :block => block)
|
31
33
|
end
|
32
34
|
|
35
|
+
def skip_rows row_count
|
36
|
+
@row_count_to_skip = row_count
|
37
|
+
end
|
38
|
+
|
33
39
|
def method_missing name, *params, &block
|
34
40
|
md = name.to_s.match /^(required_)?(\w+)$/
|
35
41
|
type = md && md[2].to_sym
|
data/lib/simple_etl/version.rb
CHANGED
data/simple_etl.gemspec
CHANGED
@@ -7,7 +7,7 @@ Gem::Specification.new do |s|
|
|
7
7
|
s.version = SimpleEtl::VERSION
|
8
8
|
s.authors = ["Nicola Racco"]
|
9
9
|
s.email = ["nicola@nicolaracco.com"]
|
10
|
-
s.homepage = ""
|
10
|
+
s.homepage = "https://github.com/nicolaracco/simple_etl"
|
11
11
|
s.summary = %q{An easy-to-use toolkit to help you with ETL (Extract Transform Load) operations}
|
12
12
|
s.description = %q{An easy-to-use toolkit to help you with ETL (Extract Transform Load) operations. Simple ETL 'would be' (:D) framework-agnostic and easy to use.}
|
13
13
|
|
@@ -103,6 +103,18 @@ module SimpleEtl
|
|
103
103
|
end
|
104
104
|
end
|
105
105
|
end
|
106
|
+
|
107
|
+
describe '#parse' do
|
108
|
+
before do
|
109
|
+
subject.stub :read_rows => [[], []]
|
110
|
+
end
|
111
|
+
|
112
|
+
it 'should skip first rows if specified in context' do
|
113
|
+
subject.parse(nil).rows.count.should == 2
|
114
|
+
subject.context.skip_rows 1
|
115
|
+
subject.parse(nil).rows.count.should == 1
|
116
|
+
end
|
117
|
+
end
|
106
118
|
end
|
107
119
|
end
|
108
120
|
end
|
metadata
CHANGED
@@ -1,102 +1,79 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: simple_etl
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.2
|
5
5
|
prerelease:
|
6
|
-
segments:
|
7
|
-
- 0
|
8
|
-
- 0
|
9
|
-
- 1
|
10
|
-
version: 0.0.1
|
11
6
|
platform: ruby
|
12
|
-
authors:
|
7
|
+
authors:
|
13
8
|
- Nicola Racco
|
14
9
|
autorequire:
|
15
10
|
bindir: bin
|
16
11
|
cert_chain: []
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
- !ruby/object:Gem::Dependency
|
12
|
+
date: 2012-07-05 00:00:00.000000000Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
21
15
|
name: rake
|
22
|
-
|
23
|
-
requirement: &id001 !ruby/object:Gem::Requirement
|
16
|
+
requirement: &70352004437540 !ruby/object:Gem::Requirement
|
24
17
|
none: false
|
25
|
-
requirements:
|
26
|
-
- -
|
27
|
-
- !ruby/object:Gem::Version
|
28
|
-
|
29
|
-
segments:
|
30
|
-
- 0
|
31
|
-
version: "0"
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
32
22
|
type: :development
|
33
|
-
version_requirements: *id001
|
34
|
-
- !ruby/object:Gem::Dependency
|
35
|
-
name: guard
|
36
23
|
prerelease: false
|
37
|
-
|
24
|
+
version_requirements: *70352004437540
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: guard
|
27
|
+
requirement: &70352004437120 !ruby/object:Gem::Requirement
|
38
28
|
none: false
|
39
|
-
requirements:
|
40
|
-
- -
|
41
|
-
- !ruby/object:Gem::Version
|
42
|
-
|
43
|
-
segments:
|
44
|
-
- 0
|
45
|
-
version: "0"
|
29
|
+
requirements:
|
30
|
+
- - ! '>='
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '0'
|
46
33
|
type: :development
|
47
|
-
version_requirements: *id002
|
48
|
-
- !ruby/object:Gem::Dependency
|
49
|
-
name: growl
|
50
34
|
prerelease: false
|
51
|
-
|
35
|
+
version_requirements: *70352004437120
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: growl
|
38
|
+
requirement: &70352004436480 !ruby/object:Gem::Requirement
|
52
39
|
none: false
|
53
|
-
requirements:
|
54
|
-
- -
|
55
|
-
- !ruby/object:Gem::Version
|
56
|
-
|
57
|
-
segments:
|
58
|
-
- 0
|
59
|
-
version: "0"
|
40
|
+
requirements:
|
41
|
+
- - ! '>='
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: '0'
|
60
44
|
type: :development
|
61
|
-
version_requirements: *id003
|
62
|
-
- !ruby/object:Gem::Dependency
|
63
|
-
name: guard-rspec
|
64
45
|
prerelease: false
|
65
|
-
|
46
|
+
version_requirements: *70352004436480
|
47
|
+
- !ruby/object:Gem::Dependency
|
48
|
+
name: guard-rspec
|
49
|
+
requirement: &70352004435860 !ruby/object:Gem::Requirement
|
66
50
|
none: false
|
67
|
-
requirements:
|
68
|
-
- -
|
69
|
-
- !ruby/object:Gem::Version
|
70
|
-
|
71
|
-
segments:
|
72
|
-
- 0
|
73
|
-
version: "0"
|
51
|
+
requirements:
|
52
|
+
- - ! '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
74
55
|
type: :development
|
75
|
-
version_requirements: *id004
|
76
|
-
- !ruby/object:Gem::Dependency
|
77
|
-
name: rspec
|
78
56
|
prerelease: false
|
79
|
-
|
57
|
+
version_requirements: *70352004435860
|
58
|
+
- !ruby/object:Gem::Dependency
|
59
|
+
name: rspec
|
60
|
+
requirement: &70352004432160 !ruby/object:Gem::Requirement
|
80
61
|
none: false
|
81
|
-
requirements:
|
82
|
-
- -
|
83
|
-
- !ruby/object:Gem::Version
|
84
|
-
|
85
|
-
segments:
|
86
|
-
- 0
|
87
|
-
version: "0"
|
62
|
+
requirements:
|
63
|
+
- - ! '>='
|
64
|
+
- !ruby/object:Gem::Version
|
65
|
+
version: '0'
|
88
66
|
type: :development
|
89
|
-
|
90
|
-
|
91
|
-
|
67
|
+
prerelease: false
|
68
|
+
version_requirements: *70352004432160
|
69
|
+
description: An easy-to-use toolkit to help you with ETL (Extract Transform Load)
|
70
|
+
operations. Simple ETL 'would be' (:D) framework-agnostic and easy to use.
|
71
|
+
email:
|
92
72
|
- nicola@nicolaracco.com
|
93
73
|
executables: []
|
94
|
-
|
95
74
|
extensions: []
|
96
|
-
|
97
75
|
extra_rdoc_files: []
|
98
|
-
|
99
|
-
files:
|
76
|
+
files:
|
100
77
|
- .gitignore
|
101
78
|
- .rspec
|
102
79
|
- Gemfile
|
@@ -126,40 +103,37 @@ files:
|
|
126
103
|
- spec/lib/simple_etl/source/row_spec.rb
|
127
104
|
- spec/lib/simple_etl/source_spec.rb
|
128
105
|
- spec/spec_helper.rb
|
129
|
-
homepage:
|
106
|
+
homepage: https://github.com/nicolaracco/simple_etl
|
130
107
|
licenses: []
|
131
|
-
|
132
108
|
post_install_message:
|
133
109
|
rdoc_options: []
|
134
|
-
|
135
|
-
require_paths:
|
110
|
+
require_paths:
|
136
111
|
- lib
|
137
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
112
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
138
113
|
none: false
|
139
|
-
requirements:
|
140
|
-
- -
|
141
|
-
- !ruby/object:Gem::Version
|
142
|
-
|
143
|
-
segments:
|
114
|
+
requirements:
|
115
|
+
- - ! '>='
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
segments:
|
144
119
|
- 0
|
145
|
-
|
146
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
120
|
+
hash: 3236384581747162330
|
121
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
147
122
|
none: false
|
148
|
-
requirements:
|
149
|
-
- -
|
150
|
-
- !ruby/object:Gem::Version
|
151
|
-
|
152
|
-
segments:
|
123
|
+
requirements:
|
124
|
+
- - ! '>='
|
125
|
+
- !ruby/object:Gem::Version
|
126
|
+
version: '0'
|
127
|
+
segments:
|
153
128
|
- 0
|
154
|
-
|
129
|
+
hash: 3236384581747162330
|
155
130
|
requirements: []
|
156
|
-
|
157
131
|
rubyforge_project: simple_etl
|
158
|
-
rubygems_version: 1.8.
|
132
|
+
rubygems_version: 1.8.10
|
159
133
|
signing_key:
|
160
134
|
specification_version: 3
|
161
135
|
summary: An easy-to-use toolkit to help you with ETL (Extract Transform Load) operations
|
162
|
-
test_files:
|
136
|
+
test_files:
|
163
137
|
- spec/fixtures/sample.stl
|
164
138
|
- spec/lib/simple_etl/source/base_context_spec.rb
|
165
139
|
- spec/lib/simple_etl/source/base_spec.rb
|