simple_etl 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -1,4 +1,4 @@
1
- simple_etl
1
+ Simple ETL
2
2
  ==========
3
3
 
4
4
  An easy-to-use toolkit to help you with ETL (Extract Transform Load) operations.
@@ -114,4 +114,17 @@ They are functions that help you manipulate the parsed raw data:
114
114
  A transformer is a code block that transform a particular value. It's executed as soon as the input value is parsed (if it's valid).
115
115
 
116
116
  A generator is a code block that generates a new property for the current row.
117
- All the generators are executed when the entire row as been read and transformed.
117
+ All the generators are executed when the entire row as been read and transformed.
118
+
119
+ ### Skip rows
120
+
121
+ If you need to skip some rows before parsing the file you can use the helper 'skip_rows':
122
+
123
+ ```ruby
124
+ define :format do
125
+ skip_rows 2
126
+ field :name
127
+ end
128
+ ```
129
+
130
+ This will start the parsing from the third row.
@@ -41,15 +41,17 @@ module SimpleEtl
41
41
  result
42
42
  end
43
43
 
44
+ def read_rows src, args
45
+ raise 'Abstract Method'
46
+ end
47
+
44
48
  def parse src, args = {}
45
49
  result = args[:result] || ParseResult.new
46
- if args[:type] == :inline
47
- lines = src.lines.map &:chomp
48
- else
49
- lines = File.readlines(src).map &:chomp
50
- end
50
+ lines = read_rows src, args
51
51
  lines.each_with_index do |row, index|
52
- parse_row row, :row_index => index, :result => result
52
+ if index >= context.row_count_to_skip
53
+ parse_row row, :row_index => index, :result => result
54
+ end
53
55
  end
54
56
  result
55
57
  end
@@ -4,11 +4,13 @@ module SimpleEtl
4
4
  attr_reader :fields
5
5
  attr_reader :transformations
6
6
  attr_reader :generators
7
+ attr_reader :row_count_to_skip
7
8
 
8
9
  def initialize
9
10
  @fields = []
10
11
  @transformations = {}
11
12
  @generators = []
13
+ @row_count_to_skip = 0
12
14
  end
13
15
 
14
16
  def field name, args = {}
@@ -30,6 +32,10 @@ module SimpleEtl
30
32
  generators << args.merge(:name => name, :block => block)
31
33
  end
32
34
 
35
+ def skip_rows row_count
36
+ @row_count_to_skip = row_count
37
+ end
38
+
33
39
  def method_missing name, *params, &block
34
40
  md = name.to_s.match /^(required_)?(\w+)$/
35
41
  type = md && md[2].to_sym
@@ -11,6 +11,10 @@ module SimpleEtl
11
11
  length = row.length - field[:start] if length == :eol
12
12
  row[field[:start], length]
13
13
  end
14
+
15
+ def read_rows src, args
16
+ (args[:type] == :inline && src.lines || File.readlines(src)).map &:chomp
17
+ end
14
18
  end
15
19
  end
16
20
 
@@ -1,3 +1,3 @@
1
1
  module SimpleEtl
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.2"
3
3
  end
data/simple_etl.gemspec CHANGED
@@ -7,7 +7,7 @@ Gem::Specification.new do |s|
7
7
  s.version = SimpleEtl::VERSION
8
8
  s.authors = ["Nicola Racco"]
9
9
  s.email = ["nicola@nicolaracco.com"]
10
- s.homepage = ""
10
+ s.homepage = "https://github.com/nicolaracco/simple_etl"
11
11
  s.summary = %q{An easy-to-use toolkit to help you with ETL (Extract Transform Load) operations}
12
12
  s.description = %q{An easy-to-use toolkit to help you with ETL (Extract Transform Load) operations. Simple ETL 'would be' (:D) framework-agnostic and easy to use.}
13
13
 
@@ -103,6 +103,18 @@ module SimpleEtl
103
103
  end
104
104
  end
105
105
  end
106
+
107
+ describe '#parse' do
108
+ before do
109
+ subject.stub :read_rows => [[], []]
110
+ end
111
+
112
+ it 'should skip first rows if specified in context' do
113
+ subject.parse(nil).rows.count.should == 2
114
+ subject.context.skip_rows 1
115
+ subject.parse(nil).rows.count.should == 1
116
+ end
117
+ end
106
118
  end
107
119
  end
108
120
  end
metadata CHANGED
@@ -1,102 +1,79 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: simple_etl
3
- version: !ruby/object:Gem::Version
4
- hash: 29
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
5
  prerelease:
6
- segments:
7
- - 0
8
- - 0
9
- - 1
10
- version: 0.0.1
11
6
  platform: ruby
12
- authors:
7
+ authors:
13
8
  - Nicola Racco
14
9
  autorequire:
15
10
  bindir: bin
16
11
  cert_chain: []
17
-
18
- date: 2012-07-03 00:00:00 Z
19
- dependencies:
20
- - !ruby/object:Gem::Dependency
12
+ date: 2012-07-05 00:00:00.000000000Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
21
15
  name: rake
22
- prerelease: false
23
- requirement: &id001 !ruby/object:Gem::Requirement
16
+ requirement: &70352004437540 !ruby/object:Gem::Requirement
24
17
  none: false
25
- requirements:
26
- - - ">="
27
- - !ruby/object:Gem::Version
28
- hash: 3
29
- segments:
30
- - 0
31
- version: "0"
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
32
22
  type: :development
33
- version_requirements: *id001
34
- - !ruby/object:Gem::Dependency
35
- name: guard
36
23
  prerelease: false
37
- requirement: &id002 !ruby/object:Gem::Requirement
24
+ version_requirements: *70352004437540
25
+ - !ruby/object:Gem::Dependency
26
+ name: guard
27
+ requirement: &70352004437120 !ruby/object:Gem::Requirement
38
28
  none: false
39
- requirements:
40
- - - ">="
41
- - !ruby/object:Gem::Version
42
- hash: 3
43
- segments:
44
- - 0
45
- version: "0"
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
46
33
  type: :development
47
- version_requirements: *id002
48
- - !ruby/object:Gem::Dependency
49
- name: growl
50
34
  prerelease: false
51
- requirement: &id003 !ruby/object:Gem::Requirement
35
+ version_requirements: *70352004437120
36
+ - !ruby/object:Gem::Dependency
37
+ name: growl
38
+ requirement: &70352004436480 !ruby/object:Gem::Requirement
52
39
  none: false
53
- requirements:
54
- - - ">="
55
- - !ruby/object:Gem::Version
56
- hash: 3
57
- segments:
58
- - 0
59
- version: "0"
40
+ requirements:
41
+ - - ! '>='
42
+ - !ruby/object:Gem::Version
43
+ version: '0'
60
44
  type: :development
61
- version_requirements: *id003
62
- - !ruby/object:Gem::Dependency
63
- name: guard-rspec
64
45
  prerelease: false
65
- requirement: &id004 !ruby/object:Gem::Requirement
46
+ version_requirements: *70352004436480
47
+ - !ruby/object:Gem::Dependency
48
+ name: guard-rspec
49
+ requirement: &70352004435860 !ruby/object:Gem::Requirement
66
50
  none: false
67
- requirements:
68
- - - ">="
69
- - !ruby/object:Gem::Version
70
- hash: 3
71
- segments:
72
- - 0
73
- version: "0"
51
+ requirements:
52
+ - - ! '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
74
55
  type: :development
75
- version_requirements: *id004
76
- - !ruby/object:Gem::Dependency
77
- name: rspec
78
56
  prerelease: false
79
- requirement: &id005 !ruby/object:Gem::Requirement
57
+ version_requirements: *70352004435860
58
+ - !ruby/object:Gem::Dependency
59
+ name: rspec
60
+ requirement: &70352004432160 !ruby/object:Gem::Requirement
80
61
  none: false
81
- requirements:
82
- - - ">="
83
- - !ruby/object:Gem::Version
84
- hash: 3
85
- segments:
86
- - 0
87
- version: "0"
62
+ requirements:
63
+ - - ! '>='
64
+ - !ruby/object:Gem::Version
65
+ version: '0'
88
66
  type: :development
89
- version_requirements: *id005
90
- description: An easy-to-use toolkit to help you with ETL (Extract Transform Load) operations. Simple ETL 'would be' (:D) framework-agnostic and easy to use.
91
- email:
67
+ prerelease: false
68
+ version_requirements: *70352004432160
69
+ description: An easy-to-use toolkit to help you with ETL (Extract Transform Load)
70
+ operations. Simple ETL 'would be' (:D) framework-agnostic and easy to use.
71
+ email:
92
72
  - nicola@nicolaracco.com
93
73
  executables: []
94
-
95
74
  extensions: []
96
-
97
75
  extra_rdoc_files: []
98
-
99
- files:
76
+ files:
100
77
  - .gitignore
101
78
  - .rspec
102
79
  - Gemfile
@@ -126,40 +103,37 @@ files:
126
103
  - spec/lib/simple_etl/source/row_spec.rb
127
104
  - spec/lib/simple_etl/source_spec.rb
128
105
  - spec/spec_helper.rb
129
- homepage: ""
106
+ homepage: https://github.com/nicolaracco/simple_etl
130
107
  licenses: []
131
-
132
108
  post_install_message:
133
109
  rdoc_options: []
134
-
135
- require_paths:
110
+ require_paths:
136
111
  - lib
137
- required_ruby_version: !ruby/object:Gem::Requirement
112
+ required_ruby_version: !ruby/object:Gem::Requirement
138
113
  none: false
139
- requirements:
140
- - - ">="
141
- - !ruby/object:Gem::Version
142
- hash: 3
143
- segments:
114
+ requirements:
115
+ - - ! '>='
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ segments:
144
119
  - 0
145
- version: "0"
146
- required_rubygems_version: !ruby/object:Gem::Requirement
120
+ hash: 3236384581747162330
121
+ required_rubygems_version: !ruby/object:Gem::Requirement
147
122
  none: false
148
- requirements:
149
- - - ">="
150
- - !ruby/object:Gem::Version
151
- hash: 3
152
- segments:
123
+ requirements:
124
+ - - ! '>='
125
+ - !ruby/object:Gem::Version
126
+ version: '0'
127
+ segments:
153
128
  - 0
154
- version: "0"
129
+ hash: 3236384581747162330
155
130
  requirements: []
156
-
157
131
  rubyforge_project: simple_etl
158
- rubygems_version: 1.8.23
132
+ rubygems_version: 1.8.10
159
133
  signing_key:
160
134
  specification_version: 3
161
135
  summary: An easy-to-use toolkit to help you with ETL (Extract Transform Load) operations
162
- test_files:
136
+ test_files:
163
137
  - spec/fixtures/sample.stl
164
138
  - spec/lib/simple_etl/source/base_context_spec.rb
165
139
  - spec/lib/simple_etl/source/base_spec.rb