simple_etl 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -1,4 +1,4 @@
1
- simple_etl
1
+ Simple ETL
2
2
  ==========
3
3
 
4
4
  An easy-to-use toolkit to help you with ETL (Extract Transform Load) operations.
@@ -114,4 +114,17 @@ They are functions that help you manipulate the parsed raw data:
114
114
  A transformer is a code block that transform a particular value. It's executed as soon as the input value is parsed (if it's valid).
115
115
 
116
116
  A generator is a code block that generates a new property for the current row.
117
- All the generators are executed when the entire row as been read and transformed.
117
+ All the generators are executed when the entire row as been read and transformed.
118
+
119
+ ### Skip rows
120
+
121
+ If you need to skip some rows before parsing the file you can use the helper 'skip_rows':
122
+
123
+ ```ruby
124
+ define :format do
125
+ skip_rows 2
126
+ field :name
127
+ end
128
+ ```
129
+
130
+ This will start the parsing from the third row.
@@ -41,15 +41,17 @@ module SimpleEtl
41
41
  result
42
42
  end
43
43
 
44
+ def read_rows src, args
45
+ raise 'Abstract Method'
46
+ end
47
+
44
48
  def parse src, args = {}
45
49
  result = args[:result] || ParseResult.new
46
- if args[:type] == :inline
47
- lines = src.lines.map &:chomp
48
- else
49
- lines = File.readlines(src).map &:chomp
50
- end
50
+ lines = read_rows src, args
51
51
  lines.each_with_index do |row, index|
52
- parse_row row, :row_index => index, :result => result
52
+ if index >= context.row_count_to_skip
53
+ parse_row row, :row_index => index, :result => result
54
+ end
53
55
  end
54
56
  result
55
57
  end
@@ -4,11 +4,13 @@ module SimpleEtl
4
4
  attr_reader :fields
5
5
  attr_reader :transformations
6
6
  attr_reader :generators
7
+ attr_reader :row_count_to_skip
7
8
 
8
9
  def initialize
9
10
  @fields = []
10
11
  @transformations = {}
11
12
  @generators = []
13
+ @row_count_to_skip = 0
12
14
  end
13
15
 
14
16
  def field name, args = {}
@@ -30,6 +32,10 @@ module SimpleEtl
30
32
  generators << args.merge(:name => name, :block => block)
31
33
  end
32
34
 
35
+ def skip_rows row_count
36
+ @row_count_to_skip = row_count
37
+ end
38
+
33
39
  def method_missing name, *params, &block
34
40
  md = name.to_s.match /^(required_)?(\w+)$/
35
41
  type = md && md[2].to_sym
@@ -11,6 +11,10 @@ module SimpleEtl
11
11
  length = row.length - field[:start] if length == :eol
12
12
  row[field[:start], length]
13
13
  end
14
+
15
+ def read_rows src, args
16
+ (args[:type] == :inline && src.lines || File.readlines(src)).map &:chomp
17
+ end
14
18
  end
15
19
  end
16
20
 
@@ -1,3 +1,3 @@
1
1
  module SimpleEtl
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.2"
3
3
  end
data/simple_etl.gemspec CHANGED
@@ -7,7 +7,7 @@ Gem::Specification.new do |s|
7
7
  s.version = SimpleEtl::VERSION
8
8
  s.authors = ["Nicola Racco"]
9
9
  s.email = ["nicola@nicolaracco.com"]
10
- s.homepage = ""
10
+ s.homepage = "https://github.com/nicolaracco/simple_etl"
11
11
  s.summary = %q{An easy-to-use toolkit to help you with ETL (Extract Transform Load) operations}
12
12
  s.description = %q{An easy-to-use toolkit to help you with ETL (Extract Transform Load) operations. Simple ETL 'would be' (:D) framework-agnostic and easy to use.}
13
13
 
@@ -103,6 +103,18 @@ module SimpleEtl
103
103
  end
104
104
  end
105
105
  end
106
+
107
+ describe '#parse' do
108
+ before do
109
+ subject.stub :read_rows => [[], []]
110
+ end
111
+
112
+ it 'should skip first rows if specified in context' do
113
+ subject.parse(nil).rows.count.should == 2
114
+ subject.context.skip_rows 1
115
+ subject.parse(nil).rows.count.should == 1
116
+ end
117
+ end
106
118
  end
107
119
  end
108
120
  end
metadata CHANGED
@@ -1,102 +1,79 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: simple_etl
3
- version: !ruby/object:Gem::Version
4
- hash: 29
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
5
  prerelease:
6
- segments:
7
- - 0
8
- - 0
9
- - 1
10
- version: 0.0.1
11
6
  platform: ruby
12
- authors:
7
+ authors:
13
8
  - Nicola Racco
14
9
  autorequire:
15
10
  bindir: bin
16
11
  cert_chain: []
17
-
18
- date: 2012-07-03 00:00:00 Z
19
- dependencies:
20
- - !ruby/object:Gem::Dependency
12
+ date: 2012-07-05 00:00:00.000000000Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
21
15
  name: rake
22
- prerelease: false
23
- requirement: &id001 !ruby/object:Gem::Requirement
16
+ requirement: &70352004437540 !ruby/object:Gem::Requirement
24
17
  none: false
25
- requirements:
26
- - - ">="
27
- - !ruby/object:Gem::Version
28
- hash: 3
29
- segments:
30
- - 0
31
- version: "0"
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
32
22
  type: :development
33
- version_requirements: *id001
34
- - !ruby/object:Gem::Dependency
35
- name: guard
36
23
  prerelease: false
37
- requirement: &id002 !ruby/object:Gem::Requirement
24
+ version_requirements: *70352004437540
25
+ - !ruby/object:Gem::Dependency
26
+ name: guard
27
+ requirement: &70352004437120 !ruby/object:Gem::Requirement
38
28
  none: false
39
- requirements:
40
- - - ">="
41
- - !ruby/object:Gem::Version
42
- hash: 3
43
- segments:
44
- - 0
45
- version: "0"
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
46
33
  type: :development
47
- version_requirements: *id002
48
- - !ruby/object:Gem::Dependency
49
- name: growl
50
34
  prerelease: false
51
- requirement: &id003 !ruby/object:Gem::Requirement
35
+ version_requirements: *70352004437120
36
+ - !ruby/object:Gem::Dependency
37
+ name: growl
38
+ requirement: &70352004436480 !ruby/object:Gem::Requirement
52
39
  none: false
53
- requirements:
54
- - - ">="
55
- - !ruby/object:Gem::Version
56
- hash: 3
57
- segments:
58
- - 0
59
- version: "0"
40
+ requirements:
41
+ - - ! '>='
42
+ - !ruby/object:Gem::Version
43
+ version: '0'
60
44
  type: :development
61
- version_requirements: *id003
62
- - !ruby/object:Gem::Dependency
63
- name: guard-rspec
64
45
  prerelease: false
65
- requirement: &id004 !ruby/object:Gem::Requirement
46
+ version_requirements: *70352004436480
47
+ - !ruby/object:Gem::Dependency
48
+ name: guard-rspec
49
+ requirement: &70352004435860 !ruby/object:Gem::Requirement
66
50
  none: false
67
- requirements:
68
- - - ">="
69
- - !ruby/object:Gem::Version
70
- hash: 3
71
- segments:
72
- - 0
73
- version: "0"
51
+ requirements:
52
+ - - ! '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
74
55
  type: :development
75
- version_requirements: *id004
76
- - !ruby/object:Gem::Dependency
77
- name: rspec
78
56
  prerelease: false
79
- requirement: &id005 !ruby/object:Gem::Requirement
57
+ version_requirements: *70352004435860
58
+ - !ruby/object:Gem::Dependency
59
+ name: rspec
60
+ requirement: &70352004432160 !ruby/object:Gem::Requirement
80
61
  none: false
81
- requirements:
82
- - - ">="
83
- - !ruby/object:Gem::Version
84
- hash: 3
85
- segments:
86
- - 0
87
- version: "0"
62
+ requirements:
63
+ - - ! '>='
64
+ - !ruby/object:Gem::Version
65
+ version: '0'
88
66
  type: :development
89
- version_requirements: *id005
90
- description: An easy-to-use toolkit to help you with ETL (Extract Transform Load) operations. Simple ETL 'would be' (:D) framework-agnostic and easy to use.
91
- email:
67
+ prerelease: false
68
+ version_requirements: *70352004432160
69
+ description: An easy-to-use toolkit to help you with ETL (Extract Transform Load)
70
+ operations. Simple ETL 'would be' (:D) framework-agnostic and easy to use.
71
+ email:
92
72
  - nicola@nicolaracco.com
93
73
  executables: []
94
-
95
74
  extensions: []
96
-
97
75
  extra_rdoc_files: []
98
-
99
- files:
76
+ files:
100
77
  - .gitignore
101
78
  - .rspec
102
79
  - Gemfile
@@ -126,40 +103,37 @@ files:
126
103
  - spec/lib/simple_etl/source/row_spec.rb
127
104
  - spec/lib/simple_etl/source_spec.rb
128
105
  - spec/spec_helper.rb
129
- homepage: ""
106
+ homepage: https://github.com/nicolaracco/simple_etl
130
107
  licenses: []
131
-
132
108
  post_install_message:
133
109
  rdoc_options: []
134
-
135
- require_paths:
110
+ require_paths:
136
111
  - lib
137
- required_ruby_version: !ruby/object:Gem::Requirement
112
+ required_ruby_version: !ruby/object:Gem::Requirement
138
113
  none: false
139
- requirements:
140
- - - ">="
141
- - !ruby/object:Gem::Version
142
- hash: 3
143
- segments:
114
+ requirements:
115
+ - - ! '>='
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ segments:
144
119
  - 0
145
- version: "0"
146
- required_rubygems_version: !ruby/object:Gem::Requirement
120
+ hash: 3236384581747162330
121
+ required_rubygems_version: !ruby/object:Gem::Requirement
147
122
  none: false
148
- requirements:
149
- - - ">="
150
- - !ruby/object:Gem::Version
151
- hash: 3
152
- segments:
123
+ requirements:
124
+ - - ! '>='
125
+ - !ruby/object:Gem::Version
126
+ version: '0'
127
+ segments:
153
128
  - 0
154
- version: "0"
129
+ hash: 3236384581747162330
155
130
  requirements: []
156
-
157
131
  rubyforge_project: simple_etl
158
- rubygems_version: 1.8.23
132
+ rubygems_version: 1.8.10
159
133
  signing_key:
160
134
  specification_version: 3
161
135
  summary: An easy-to-use toolkit to help you with ETL (Extract Transform Load) operations
162
- test_files:
136
+ test_files:
163
137
  - spec/fixtures/sample.stl
164
138
  - spec/lib/simple_etl/source/base_context_spec.rb
165
139
  - spec/lib/simple_etl/source/base_spec.rb