simple_etl 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +15 -2
- data/lib/simple_etl/source/base.rb +8 -6
- data/lib/simple_etl/source/base_context.rb +6 -0
- data/lib/simple_etl/source/fixed_width/parser.rb +4 -0
- data/lib/simple_etl/version.rb +1 -1
- data/simple_etl.gemspec +1 -1
- data/spec/lib/simple_etl/source/base_spec.rb +12 -0
- metadata +68 -94
data/README.md
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
|
1
|
+
Simple ETL
|
2
2
|
==========
|
3
3
|
|
4
4
|
An easy-to-use toolkit to help you with ETL (Extract Transform Load) operations.
|
@@ -114,4 +114,17 @@ They are functions that help you manipulate the parsed raw data:
|
|
114
114
|
A transformer is a code block that transform a particular value. It's executed as soon as the input value is parsed (if it's valid).
|
115
115
|
|
116
116
|
A generator is a code block that generates a new property for the current row.
|
117
|
-
All the generators are executed when the entire row as been read and transformed.
|
117
|
+
All the generators are executed when the entire row as been read and transformed.
|
118
|
+
|
119
|
+
### Skip rows
|
120
|
+
|
121
|
+
If you need to skip some rows before parsing the file you can use the helper 'skip_rows':
|
122
|
+
|
123
|
+
```ruby
|
124
|
+
define :format do
|
125
|
+
skip_rows 2
|
126
|
+
field :name
|
127
|
+
end
|
128
|
+
```
|
129
|
+
|
130
|
+
This will start the parsing from the third row.
|
@@ -41,15 +41,17 @@ module SimpleEtl
|
|
41
41
|
result
|
42
42
|
end
|
43
43
|
|
44
|
+
def read_rows src, args
|
45
|
+
raise 'Abstract Method'
|
46
|
+
end
|
47
|
+
|
44
48
|
def parse src, args = {}
|
45
49
|
result = args[:result] || ParseResult.new
|
46
|
-
|
47
|
-
lines = src.lines.map &:chomp
|
48
|
-
else
|
49
|
-
lines = File.readlines(src).map &:chomp
|
50
|
-
end
|
50
|
+
lines = read_rows src, args
|
51
51
|
lines.each_with_index do |row, index|
|
52
|
-
|
52
|
+
if index >= context.row_count_to_skip
|
53
|
+
parse_row row, :row_index => index, :result => result
|
54
|
+
end
|
53
55
|
end
|
54
56
|
result
|
55
57
|
end
|
@@ -4,11 +4,13 @@ module SimpleEtl
|
|
4
4
|
attr_reader :fields
|
5
5
|
attr_reader :transformations
|
6
6
|
attr_reader :generators
|
7
|
+
attr_reader :row_count_to_skip
|
7
8
|
|
8
9
|
def initialize
|
9
10
|
@fields = []
|
10
11
|
@transformations = {}
|
11
12
|
@generators = []
|
13
|
+
@row_count_to_skip = 0
|
12
14
|
end
|
13
15
|
|
14
16
|
def field name, args = {}
|
@@ -30,6 +32,10 @@ module SimpleEtl
|
|
30
32
|
generators << args.merge(:name => name, :block => block)
|
31
33
|
end
|
32
34
|
|
35
|
+
def skip_rows row_count
|
36
|
+
@row_count_to_skip = row_count
|
37
|
+
end
|
38
|
+
|
33
39
|
def method_missing name, *params, &block
|
34
40
|
md = name.to_s.match /^(required_)?(\w+)$/
|
35
41
|
type = md && md[2].to_sym
|
data/lib/simple_etl/version.rb
CHANGED
data/simple_etl.gemspec
CHANGED
@@ -7,7 +7,7 @@ Gem::Specification.new do |s|
|
|
7
7
|
s.version = SimpleEtl::VERSION
|
8
8
|
s.authors = ["Nicola Racco"]
|
9
9
|
s.email = ["nicola@nicolaracco.com"]
|
10
|
-
s.homepage = ""
|
10
|
+
s.homepage = "https://github.com/nicolaracco/simple_etl"
|
11
11
|
s.summary = %q{An easy-to-use toolkit to help you with ETL (Extract Transform Load) operations}
|
12
12
|
s.description = %q{An easy-to-use toolkit to help you with ETL (Extract Transform Load) operations. Simple ETL 'would be' (:D) framework-agnostic and easy to use.}
|
13
13
|
|
@@ -103,6 +103,18 @@ module SimpleEtl
|
|
103
103
|
end
|
104
104
|
end
|
105
105
|
end
|
106
|
+
|
107
|
+
describe '#parse' do
|
108
|
+
before do
|
109
|
+
subject.stub :read_rows => [[], []]
|
110
|
+
end
|
111
|
+
|
112
|
+
it 'should skip first rows if specified in context' do
|
113
|
+
subject.parse(nil).rows.count.should == 2
|
114
|
+
subject.context.skip_rows 1
|
115
|
+
subject.parse(nil).rows.count.should == 1
|
116
|
+
end
|
117
|
+
end
|
106
118
|
end
|
107
119
|
end
|
108
120
|
end
|
metadata
CHANGED
@@ -1,102 +1,79 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: simple_etl
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.2
|
5
5
|
prerelease:
|
6
|
-
segments:
|
7
|
-
- 0
|
8
|
-
- 0
|
9
|
-
- 1
|
10
|
-
version: 0.0.1
|
11
6
|
platform: ruby
|
12
|
-
authors:
|
7
|
+
authors:
|
13
8
|
- Nicola Racco
|
14
9
|
autorequire:
|
15
10
|
bindir: bin
|
16
11
|
cert_chain: []
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
- !ruby/object:Gem::Dependency
|
12
|
+
date: 2012-07-05 00:00:00.000000000Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
21
15
|
name: rake
|
22
|
-
|
23
|
-
requirement: &id001 !ruby/object:Gem::Requirement
|
16
|
+
requirement: &70352004437540 !ruby/object:Gem::Requirement
|
24
17
|
none: false
|
25
|
-
requirements:
|
26
|
-
- -
|
27
|
-
- !ruby/object:Gem::Version
|
28
|
-
|
29
|
-
segments:
|
30
|
-
- 0
|
31
|
-
version: "0"
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
32
22
|
type: :development
|
33
|
-
version_requirements: *id001
|
34
|
-
- !ruby/object:Gem::Dependency
|
35
|
-
name: guard
|
36
23
|
prerelease: false
|
37
|
-
|
24
|
+
version_requirements: *70352004437540
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: guard
|
27
|
+
requirement: &70352004437120 !ruby/object:Gem::Requirement
|
38
28
|
none: false
|
39
|
-
requirements:
|
40
|
-
- -
|
41
|
-
- !ruby/object:Gem::Version
|
42
|
-
|
43
|
-
segments:
|
44
|
-
- 0
|
45
|
-
version: "0"
|
29
|
+
requirements:
|
30
|
+
- - ! '>='
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '0'
|
46
33
|
type: :development
|
47
|
-
version_requirements: *id002
|
48
|
-
- !ruby/object:Gem::Dependency
|
49
|
-
name: growl
|
50
34
|
prerelease: false
|
51
|
-
|
35
|
+
version_requirements: *70352004437120
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: growl
|
38
|
+
requirement: &70352004436480 !ruby/object:Gem::Requirement
|
52
39
|
none: false
|
53
|
-
requirements:
|
54
|
-
- -
|
55
|
-
- !ruby/object:Gem::Version
|
56
|
-
|
57
|
-
segments:
|
58
|
-
- 0
|
59
|
-
version: "0"
|
40
|
+
requirements:
|
41
|
+
- - ! '>='
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: '0'
|
60
44
|
type: :development
|
61
|
-
version_requirements: *id003
|
62
|
-
- !ruby/object:Gem::Dependency
|
63
|
-
name: guard-rspec
|
64
45
|
prerelease: false
|
65
|
-
|
46
|
+
version_requirements: *70352004436480
|
47
|
+
- !ruby/object:Gem::Dependency
|
48
|
+
name: guard-rspec
|
49
|
+
requirement: &70352004435860 !ruby/object:Gem::Requirement
|
66
50
|
none: false
|
67
|
-
requirements:
|
68
|
-
- -
|
69
|
-
- !ruby/object:Gem::Version
|
70
|
-
|
71
|
-
segments:
|
72
|
-
- 0
|
73
|
-
version: "0"
|
51
|
+
requirements:
|
52
|
+
- - ! '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
74
55
|
type: :development
|
75
|
-
version_requirements: *id004
|
76
|
-
- !ruby/object:Gem::Dependency
|
77
|
-
name: rspec
|
78
56
|
prerelease: false
|
79
|
-
|
57
|
+
version_requirements: *70352004435860
|
58
|
+
- !ruby/object:Gem::Dependency
|
59
|
+
name: rspec
|
60
|
+
requirement: &70352004432160 !ruby/object:Gem::Requirement
|
80
61
|
none: false
|
81
|
-
requirements:
|
82
|
-
- -
|
83
|
-
- !ruby/object:Gem::Version
|
84
|
-
|
85
|
-
segments:
|
86
|
-
- 0
|
87
|
-
version: "0"
|
62
|
+
requirements:
|
63
|
+
- - ! '>='
|
64
|
+
- !ruby/object:Gem::Version
|
65
|
+
version: '0'
|
88
66
|
type: :development
|
89
|
-
|
90
|
-
|
91
|
-
|
67
|
+
prerelease: false
|
68
|
+
version_requirements: *70352004432160
|
69
|
+
description: An easy-to-use toolkit to help you with ETL (Extract Transform Load)
|
70
|
+
operations. Simple ETL 'would be' (:D) framework-agnostic and easy to use.
|
71
|
+
email:
|
92
72
|
- nicola@nicolaracco.com
|
93
73
|
executables: []
|
94
|
-
|
95
74
|
extensions: []
|
96
|
-
|
97
75
|
extra_rdoc_files: []
|
98
|
-
|
99
|
-
files:
|
76
|
+
files:
|
100
77
|
- .gitignore
|
101
78
|
- .rspec
|
102
79
|
- Gemfile
|
@@ -126,40 +103,37 @@ files:
|
|
126
103
|
- spec/lib/simple_etl/source/row_spec.rb
|
127
104
|
- spec/lib/simple_etl/source_spec.rb
|
128
105
|
- spec/spec_helper.rb
|
129
|
-
homepage:
|
106
|
+
homepage: https://github.com/nicolaracco/simple_etl
|
130
107
|
licenses: []
|
131
|
-
|
132
108
|
post_install_message:
|
133
109
|
rdoc_options: []
|
134
|
-
|
135
|
-
require_paths:
|
110
|
+
require_paths:
|
136
111
|
- lib
|
137
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
112
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
138
113
|
none: false
|
139
|
-
requirements:
|
140
|
-
- -
|
141
|
-
- !ruby/object:Gem::Version
|
142
|
-
|
143
|
-
segments:
|
114
|
+
requirements:
|
115
|
+
- - ! '>='
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
segments:
|
144
119
|
- 0
|
145
|
-
|
146
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
120
|
+
hash: 3236384581747162330
|
121
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
147
122
|
none: false
|
148
|
-
requirements:
|
149
|
-
- -
|
150
|
-
- !ruby/object:Gem::Version
|
151
|
-
|
152
|
-
segments:
|
123
|
+
requirements:
|
124
|
+
- - ! '>='
|
125
|
+
- !ruby/object:Gem::Version
|
126
|
+
version: '0'
|
127
|
+
segments:
|
153
128
|
- 0
|
154
|
-
|
129
|
+
hash: 3236384581747162330
|
155
130
|
requirements: []
|
156
|
-
|
157
131
|
rubyforge_project: simple_etl
|
158
|
-
rubygems_version: 1.8.
|
132
|
+
rubygems_version: 1.8.10
|
159
133
|
signing_key:
|
160
134
|
specification_version: 3
|
161
135
|
summary: An easy-to-use toolkit to help you with ETL (Extract Transform Load) operations
|
162
|
-
test_files:
|
136
|
+
test_files:
|
163
137
|
- spec/fixtures/sample.stl
|
164
138
|
- spec/lib/simple_etl/source/base_context_spec.rb
|
165
139
|
- spec/lib/simple_etl/source/base_spec.rb
|