eccsv 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +17 -0
- data/Gemfile +6 -0
- data/LICENSE.txt +22 -0
- data/README.md +113 -0
- data/Rakefile +16 -0
- data/eccsv.gemspec +25 -0
- data/lib/eccsv/errors.rb +16 -0
- data/lib/eccsv/parser.rb +465 -0
- data/lib/eccsv/parser.y +264 -0
- data/lib/eccsv/version.rb +3 -0
- data/lib/eccsv.rb +6 -0
- data/test/helper.rb +16 -0
- data/test/test_parser.rb +230 -0
- metadata +115 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 8dd728098ee2f3066326be199f2657c34b33facd
|
4
|
+
data.tar.gz: 0472be8f00b3f6c9079cc20808bfd89804ff2212
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 9238b550d38a2766e5c53ca152855dfc7f782cc79c98dad60030095138e4a40a68655b14f22c3e376214901294ad81ad3e835e9091c2ee9d810c302e3f744ff3
|
7
|
+
data.tar.gz: 12599cfb6ae428dc134dd67c64fac344fa2a0306ba28de4e5f7a183b2754780d5c3e865ead20f6e20e2979b076e49e13ca83bbd11eb51ddf03587839d648b693
|
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2013 Vanderbilt University
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,113 @@
|
|
1
|
+
# ECCSV
|
2
|
+
|
3
|
+
ECCSV (error correcting comma seperated values) is a CSV parsing library with
|
4
|
+
advanced error reporting and correcting.
|
5
|
+
|
6
|
+
## Installation
|
7
|
+
|
8
|
+
Add this line to your application's Gemfile:
|
9
|
+
|
10
|
+
gem 'eccsv'
|
11
|
+
|
12
|
+
And then execute:
|
13
|
+
|
14
|
+
$ bundle
|
15
|
+
|
16
|
+
Or install it yourself as:
|
17
|
+
|
18
|
+
$ gem install eccsv
|
19
|
+
|
20
|
+
## Basic Usage
|
21
|
+
|
22
|
+
```ruby
|
23
|
+
require 'eccsv'
|
24
|
+
|
25
|
+
data = <<EOF
|
26
|
+
foo,bar
|
27
|
+
baz,qux
|
28
|
+
EOF
|
29
|
+
|
30
|
+
parser = ECCSV::Parser.new
|
31
|
+
parser.parse(data) #=> [["foo", "bar"], ["baz", "qux"]]
|
32
|
+
```
|
33
|
+
|
34
|
+
## Errors
|
35
|
+
|
36
|
+
One of the goals of this project is to give you descriptive error messages.
|
37
|
+
|
38
|
+
Each error type is a subclass of `ECCSV::Error` and contains the exact line
|
39
|
+
number (via `Error#line`) and column number (via `Error#col`) where the error
|
40
|
+
took place.
|
41
|
+
|
42
|
+
* missing closing quote (`ECCSV::UnmatchedQuoteError`)
|
43
|
+
* quote in the wrong place (`ECCSV::StrayQuoteError`)
|
44
|
+
* rows with not enough fields (`ECCSV::MissingFieldsError`)
|
45
|
+
* rows with too many fields (`ECCSV::ExtraFieldsError`)
|
46
|
+
|
47
|
+
Since missing/extra fields do not cause the CSV to be unparsable, they are
|
48
|
+
treated as warnings instead of errors (see example below).
|
49
|
+
|
50
|
+
### Examples
|
51
|
+
|
52
|
+
#### Unmatched quote
|
53
|
+
|
54
|
+
If there was an error, `#parse` will return `nil` and set `#error`.
|
55
|
+
|
56
|
+
```ruby
|
57
|
+
require 'eccsv'
|
58
|
+
|
59
|
+
data = <<EOF
|
60
|
+
foo,"bar
|
61
|
+
baz,qux
|
62
|
+
EOF
|
63
|
+
|
64
|
+
parser = ECCSV::Parser.new
|
65
|
+
parser.parse(data) #=> nil
|
66
|
+
parser.error #=> #<ECCSV::UnmatchedQuoteError: unmatched quote at line 1, column 5>
|
67
|
+
parser.error.line #=> 1
|
68
|
+
parser.error.col #=> 5
|
69
|
+
```
|
70
|
+
|
71
|
+
#### Missing fields
|
72
|
+
|
73
|
+
If there was a warning, `#parse` will return the records and add to `#warnings`.
|
74
|
+
|
75
|
+
```ruby
|
76
|
+
require 'eccsv'
|
77
|
+
|
78
|
+
data = <<EOF
|
79
|
+
foo,bar
|
80
|
+
baz
|
81
|
+
EOF
|
82
|
+
|
83
|
+
parser = ECCSV::Parser.new
|
84
|
+
parser.parse(data) #=> [["foo", "bar"], ["baz"]]
|
85
|
+
parser.warnings #=> [#<ECCSV::MissingFieldsError: expected 1 more fields on line 2>]
|
86
|
+
parser.warnings[0].line #=> 2
|
87
|
+
parser.warnings[0].col #=> 4
|
88
|
+
```
|
89
|
+
|
90
|
+
#### Extra fields
|
91
|
+
|
92
|
+
```ruby
|
93
|
+
require 'eccsv'
|
94
|
+
|
95
|
+
data = <<EOF
|
96
|
+
foo
|
97
|
+
bar,baz
|
98
|
+
EOF
|
99
|
+
|
100
|
+
parser = ECCSV::Parser.new
|
101
|
+
parser.parse(data) #=> [["foo"], ["bar", "baz"]]
|
102
|
+
parser.warnings #=> [#<ECCSV::ExtraFieldsError: 1 extra fields found on line 2, column 4>]
|
103
|
+
parser.warnings[0].line #=> 2
|
104
|
+
parser.warnings[0].col #=> 4
|
105
|
+
```
|
106
|
+
|
107
|
+
## Contributing
|
108
|
+
|
109
|
+
1. Fork it
|
110
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
111
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
112
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
113
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
2
|
+
require "rake/testtask"
|
3
|
+
|
4
|
+
Rake::TestTask.new do |t|
|
5
|
+
t.libs << "test"
|
6
|
+
t.pattern = 'test/**/test*.rb'
|
7
|
+
end
|
8
|
+
task :test => :racc
|
9
|
+
task :default => :test
|
10
|
+
|
11
|
+
desc "Compile racc grammar"
|
12
|
+
task :racc => "lib/eccsv/parser.rb"
|
13
|
+
|
14
|
+
file "lib/eccsv/parser.rb" => "lib/eccsv/parser.y" do |t|
|
15
|
+
system("racc -v -o #{t.name} #{t.source}")
|
16
|
+
end
|
data/eccsv.gemspec
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'eccsv/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "eccsv"
|
8
|
+
spec.version = ECCSV::VERSION
|
9
|
+
spec.authors = ["Jeremy Stephens"]
|
10
|
+
spec.email = ["jeremy.f.stephens@vanderbilt.edu"]
|
11
|
+
spec.description = %q{CSV library with advanced error reporting}
|
12
|
+
spec.summary = %q{CSV library with advanced error reporting}
|
13
|
+
spec.homepage = "https://github.com/coupler/eccsv"
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files`.split($/)
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_development_dependency "bundler", "~> 1.3"
|
22
|
+
spec.add_development_dependency "rake"
|
23
|
+
spec.add_development_dependency "test-unit"
|
24
|
+
spec.add_development_dependency "racc"
|
25
|
+
end
|
data/lib/eccsv/errors.rb
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
module ECCSV
|
2
|
+
class Error < Exception
|
3
|
+
attr_reader :line, :col
|
4
|
+
|
5
|
+
def initialize(msg = nil, line = nil, col = nil)
|
6
|
+
super(msg)
|
7
|
+
@line = line
|
8
|
+
@col = col
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
class UnmatchedQuoteError < Error; end
|
13
|
+
class StrayQuoteError < Error; end
|
14
|
+
class MissingFieldsError < Error; end
|
15
|
+
class ExtraFieldsError < Error; end
|
16
|
+
end
|
data/lib/eccsv/parser.rb
ADDED
@@ -0,0 +1,465 @@
|
|
1
|
+
#
|
2
|
+
# DO NOT MODIFY!!!!
|
3
|
+
# This file is automatically generated by Racc 1.4.12
|
4
|
+
# from Racc grammer file "".
|
5
|
+
#
|
6
|
+
|
7
|
+
require 'racc/parser.rb'
|
8
|
+
|
9
|
+
require 'strscan'
|
10
|
+
|
11
|
+
module ECCSV
|
12
|
+
class Parser < Racc::Parser
|
13
|
+
|
14
|
+
module_eval(<<'...end parser.y/module_eval...', 'parser.y', 36)
|
15
|
+
class Node
|
16
|
+
attr_reader :value, :token, :line, :col
|
17
|
+
|
18
|
+
def initialize(value = "", token = nil, line = nil, col = nil)
|
19
|
+
@value = value
|
20
|
+
@token = token
|
21
|
+
@line = line
|
22
|
+
@col = col
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
class ParentNode < Node
|
27
|
+
def initialize(children = [], line = nil, col = nil)
|
28
|
+
last = children.last
|
29
|
+
if last && last.is_a?(Node)
|
30
|
+
line = last.line
|
31
|
+
col = last.col
|
32
|
+
end
|
33
|
+
super(nil, nil, line, col)
|
34
|
+
@children = children
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
class QuotedTextNode < ParentNode
|
39
|
+
def value
|
40
|
+
@value ||= @children.collect(&:value).join
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
class FieldNode < ParentNode
|
45
|
+
def value
|
46
|
+
@value ||=
|
47
|
+
if @children[0].token == :TEXT
|
48
|
+
@children[0].value
|
49
|
+
else
|
50
|
+
# quoted text
|
51
|
+
@children[1].value
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
class DelimFieldNode < ParentNode
|
57
|
+
def value
|
58
|
+
@value ||= @children[0].value
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
class DelimFieldsNode < ParentNode
|
63
|
+
def value
|
64
|
+
@value ||=
|
65
|
+
if @children.empty?
|
66
|
+
[]
|
67
|
+
else
|
68
|
+
@children[0].value + [@children[1].value]
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
class RecordNode < ParentNode
|
74
|
+
def value
|
75
|
+
# TODO: 'consume' children to produce value to reduce memory footprint
|
76
|
+
@value ||= @children[0].value + [@children[1].value]
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
class DelimRecordNode < ParentNode
|
81
|
+
def value
|
82
|
+
@value ||= @children.length == 1 ? [] : @children[0].value
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
class DelimRecordsNode < ParentNode
|
87
|
+
def value
|
88
|
+
if @value.nil?
|
89
|
+
if @children.empty?
|
90
|
+
@value = []
|
91
|
+
else
|
92
|
+
@value = @children[0].value
|
93
|
+
val = @children[1].value
|
94
|
+
if !val.empty?
|
95
|
+
@value += [val]
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
@value
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
class RootNode < ParentNode
|
104
|
+
def value
|
105
|
+
if @value.nil?
|
106
|
+
@value = @children[0].value
|
107
|
+
if @children[1]
|
108
|
+
@value += [@children[1].value]
|
109
|
+
end
|
110
|
+
end
|
111
|
+
@value
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
attr_reader :error
|
116
|
+
|
117
|
+
def parse(str)
|
118
|
+
@scanner = StringScanner.new(str)
|
119
|
+
@line = 1
|
120
|
+
@col = 1
|
121
|
+
do_parse
|
122
|
+
end
|
123
|
+
|
124
|
+
def next_token
|
125
|
+
until @scanner.empty?
|
126
|
+
next_line = @line
|
127
|
+
next_col = @col
|
128
|
+
case
|
129
|
+
when match = @scanner.scan(/,/)
|
130
|
+
token = :COMMA
|
131
|
+
when match = @scanner.scan(/"/)
|
132
|
+
token = :QUOTE
|
133
|
+
when match = @scanner.scan(/\n/)
|
134
|
+
token = :NEWLINE
|
135
|
+
next_line += 1
|
136
|
+
next_col = 0
|
137
|
+
when match = @scanner.scan(/[^,\n"]+/)
|
138
|
+
token = :TEXT
|
139
|
+
else
|
140
|
+
raise "can't recognize <#{@scanner.peek(5)}>"
|
141
|
+
end
|
142
|
+
next_col += match.length
|
143
|
+
|
144
|
+
value = node(match, token)
|
145
|
+
@line = next_line
|
146
|
+
@col = next_col
|
147
|
+
|
148
|
+
return [token, value]
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
def warnings
|
153
|
+
@warnings ||= []
|
154
|
+
end
|
155
|
+
|
156
|
+
private
|
157
|
+
|
158
|
+
def node(value = "", token = nil, line = @line, col = @col)
|
159
|
+
Node.new(value, token, line, col)
|
160
|
+
end
|
161
|
+
|
162
|
+
def quoted_text(children = [], line = @line, col = @col)
|
163
|
+
QuotedTextNode.new(children, line, col)
|
164
|
+
end
|
165
|
+
|
166
|
+
def field(children = [], line = @line, col = @col)
|
167
|
+
FieldNode.new(children, line, col)
|
168
|
+
end
|
169
|
+
|
170
|
+
def delim_field(children = [], line = @line, col = @col)
|
171
|
+
DelimFieldNode.new(children, line, col)
|
172
|
+
end
|
173
|
+
|
174
|
+
def delim_fields(children = [], line = @line, col = @col)
|
175
|
+
DelimFieldsNode.new(children, line, col)
|
176
|
+
end
|
177
|
+
|
178
|
+
def record(children = [], line = @line, col = @col)
|
179
|
+
record = RecordNode.new(children, line, col)
|
180
|
+
value = record.value
|
181
|
+
if defined? @num_fields
|
182
|
+
first = children[0]
|
183
|
+
line = first.line
|
184
|
+
col = first.col
|
185
|
+
if @num_fields > value.length
|
186
|
+
msg = "expected %d more fields on line %d" % [@num_fields - value.length, line]
|
187
|
+
self.warnings.push(MissingFieldsError.new(msg, line, col))
|
188
|
+
elsif @num_fields < value.length
|
189
|
+
msg = "%d extra fields found on line %d, column %d" % [value.length - @num_fields, line, col]
|
190
|
+
self.warnings.push(ExtraFieldsError.new(msg, line, col))
|
191
|
+
end
|
192
|
+
else
|
193
|
+
@num_fields = value.length
|
194
|
+
end
|
195
|
+
|
196
|
+
record
|
197
|
+
end
|
198
|
+
|
199
|
+
def delim_record(children = [], line = @line, col = @col)
|
200
|
+
DelimRecordNode.new(children, line, col)
|
201
|
+
end
|
202
|
+
|
203
|
+
def delim_records(children = [], line = @line, col = @col)
|
204
|
+
DelimRecordsNode.new(children, line, col)
|
205
|
+
end
|
206
|
+
|
207
|
+
def root(children = [], line = @line, col = @col)
|
208
|
+
RootNode.new(children, line, col)
|
209
|
+
end
|
210
|
+
|
211
|
+
def on_error(t, val, stack)
|
212
|
+
#pp t
|
213
|
+
#pp val
|
214
|
+
#pp stack
|
215
|
+
|
216
|
+
# figure out what error we have
|
217
|
+
if t == 0
|
218
|
+
# unexpected EOF
|
219
|
+
type = nil
|
220
|
+
stack.reverse_each do |node|
|
221
|
+
case node
|
222
|
+
when QuotedTextNode
|
223
|
+
type = :unmatched_quote
|
224
|
+
when Node
|
225
|
+
if type == :unmatched_quote && node.token == :QUOTE
|
226
|
+
line = node.line
|
227
|
+
col = node.col
|
228
|
+
@error = UnmatchedQuoteError.new("unmatched quote at line #{line}, column #{col}", line, col)
|
229
|
+
end
|
230
|
+
end
|
231
|
+
end
|
232
|
+
|
233
|
+
if @error.nil?
|
234
|
+
@error = Error.new("unexpected EOF")
|
235
|
+
end
|
236
|
+
elsif val.is_a?(Node) && val.token == :QUOTE
|
237
|
+
line = val.line
|
238
|
+
col = val.col
|
239
|
+
@error = StrayQuoteError.new("stray quote at line #{line}, column #{col}", line, col)
|
240
|
+
end
|
241
|
+
end
|
242
|
+
...end parser.y/module_eval...
|
243
|
+
##### State transition tables begin ###
|
244
|
+
|
245
|
+
racc_action_table = [
|
246
|
+
18, 17, 19, 16, -1, 9, 6, 13, 12, 8,
|
247
|
+
14, 3 ]
|
248
|
+
|
249
|
+
racc_action_check = [
|
250
|
+
15, 15, 15, 15, 2, 4, 2, 7, 7, 3,
|
251
|
+
10, 1 ]
|
252
|
+
|
253
|
+
racc_action_pointer = [
|
254
|
+
nil, 11, 4, 9, 3, nil, nil, 3, nil, nil,
|
255
|
+
7, nil, nil, nil, nil, -2, nil, nil, nil, nil ]
|
256
|
+
|
257
|
+
racc_action_default = [
|
258
|
+
-3, -17, -8, -17, -2, -4, -5, -17, 20, -6,
|
259
|
+
-7, -9, -13, -12, -10, -17, -11, -14, -15, -16 ]
|
260
|
+
|
261
|
+
racc_goto_table = [
|
262
|
+
1, 2, 4, 5, 7, 10, 11, 15 ]
|
263
|
+
|
264
|
+
racc_goto_check = [
|
265
|
+
1, 2, 3, 4, 5, 6, 7, 8 ]
|
266
|
+
|
267
|
+
racc_goto_pointer = [
|
268
|
+
nil, 0, 1, 0, 1, 2, -2, -1, -5 ]
|
269
|
+
|
270
|
+
racc_goto_default = [
|
271
|
+
nil, nil, nil, nil, nil, nil, nil, nil, nil ]
|
272
|
+
|
273
|
+
racc_reduce_table = [
|
274
|
+
0, 0, :racc_error,
|
275
|
+
1, 7, :_reduce_1,
|
276
|
+
2, 7, :_reduce_2,
|
277
|
+
0, 8, :_reduce_3,
|
278
|
+
2, 8, :_reduce_4,
|
279
|
+
1, 10, :_reduce_5,
|
280
|
+
2, 10, :_reduce_6,
|
281
|
+
2, 9, :_reduce_7,
|
282
|
+
0, 11, :_reduce_8,
|
283
|
+
2, 11, :_reduce_9,
|
284
|
+
2, 13, :_reduce_10,
|
285
|
+
3, 12, :_reduce_11,
|
286
|
+
1, 12, :_reduce_12,
|
287
|
+
0, 14, :_reduce_13,
|
288
|
+
2, 14, :_reduce_14,
|
289
|
+
2, 14, :_reduce_15,
|
290
|
+
2, 14, :_reduce_16 ]
|
291
|
+
|
292
|
+
racc_reduce_n = 17
|
293
|
+
|
294
|
+
racc_shift_n = 20
|
295
|
+
|
296
|
+
racc_token_table = {
|
297
|
+
false => 0,
|
298
|
+
:error => 1,
|
299
|
+
:NEWLINE => 2,
|
300
|
+
:COMMA => 3,
|
301
|
+
:TEXT => 4,
|
302
|
+
:QUOTE => 5 }
|
303
|
+
|
304
|
+
racc_nt_base = 6
|
305
|
+
|
306
|
+
racc_use_result_var = true
|
307
|
+
|
308
|
+
Racc_arg = [
|
309
|
+
racc_action_table,
|
310
|
+
racc_action_check,
|
311
|
+
racc_action_default,
|
312
|
+
racc_action_pointer,
|
313
|
+
racc_goto_table,
|
314
|
+
racc_goto_check,
|
315
|
+
racc_goto_default,
|
316
|
+
racc_goto_pointer,
|
317
|
+
racc_nt_base,
|
318
|
+
racc_reduce_table,
|
319
|
+
racc_token_table,
|
320
|
+
racc_shift_n,
|
321
|
+
racc_reduce_n,
|
322
|
+
racc_use_result_var ]
|
323
|
+
|
324
|
+
Racc_token_to_s_table = [
|
325
|
+
"$end",
|
326
|
+
"error",
|
327
|
+
"NEWLINE",
|
328
|
+
"COMMA",
|
329
|
+
"TEXT",
|
330
|
+
"QUOTE",
|
331
|
+
"$start",
|
332
|
+
"root",
|
333
|
+
"delim_records",
|
334
|
+
"record",
|
335
|
+
"delim_record",
|
336
|
+
"delim_fields",
|
337
|
+
"field",
|
338
|
+
"delim_field",
|
339
|
+
"quoted_text" ]
|
340
|
+
|
341
|
+
Racc_debug_parser = true
|
342
|
+
|
343
|
+
##### State transition tables end #####
|
344
|
+
|
345
|
+
# reduce 0 omitted
|
346
|
+
|
347
|
+
module_eval(<<'.,.,', 'parser.y', 4)
|
348
|
+
def _reduce_1(val, _values, result)
|
349
|
+
result = root(val).value
|
350
|
+
result
|
351
|
+
end
|
352
|
+
.,.,
|
353
|
+
|
354
|
+
module_eval(<<'.,.,', 'parser.y', 5)
|
355
|
+
def _reduce_2(val, _values, result)
|
356
|
+
result = root(val).value
|
357
|
+
result
|
358
|
+
end
|
359
|
+
.,.,
|
360
|
+
|
361
|
+
module_eval(<<'.,.,', 'parser.y', 7)
|
362
|
+
def _reduce_3(val, _values, result)
|
363
|
+
result = delim_records
|
364
|
+
result
|
365
|
+
end
|
366
|
+
.,.,
|
367
|
+
|
368
|
+
module_eval(<<'.,.,', 'parser.y', 8)
|
369
|
+
def _reduce_4(val, _values, result)
|
370
|
+
result = delim_records(val)
|
371
|
+
result
|
372
|
+
end
|
373
|
+
.,.,
|
374
|
+
|
375
|
+
module_eval(<<'.,.,', 'parser.y', 10)
|
376
|
+
def _reduce_5(val, _values, result)
|
377
|
+
result = delim_record(val)
|
378
|
+
result
|
379
|
+
end
|
380
|
+
.,.,
|
381
|
+
|
382
|
+
module_eval(<<'.,.,', 'parser.y', 11)
|
383
|
+
def _reduce_6(val, _values, result)
|
384
|
+
result = delim_record(val)
|
385
|
+
result
|
386
|
+
end
|
387
|
+
.,.,
|
388
|
+
|
389
|
+
module_eval(<<'.,.,', 'parser.y', 14)
|
390
|
+
def _reduce_7(val, _values, result)
|
391
|
+
result = record(val)
|
392
|
+
result
|
393
|
+
end
|
394
|
+
.,.,
|
395
|
+
|
396
|
+
module_eval(<<'.,.,', 'parser.y', 16)
|
397
|
+
def _reduce_8(val, _values, result)
|
398
|
+
result = delim_fields
|
399
|
+
result
|
400
|
+
end
|
401
|
+
.,.,
|
402
|
+
|
403
|
+
module_eval(<<'.,.,', 'parser.y', 17)
|
404
|
+
def _reduce_9(val, _values, result)
|
405
|
+
result = delim_fields(val)
|
406
|
+
result
|
407
|
+
end
|
408
|
+
.,.,
|
409
|
+
|
410
|
+
module_eval(<<'.,.,', 'parser.y', 19)
|
411
|
+
def _reduce_10(val, _values, result)
|
412
|
+
result = delim_field(val)
|
413
|
+
result
|
414
|
+
end
|
415
|
+
.,.,
|
416
|
+
|
417
|
+
module_eval(<<'.,.,', 'parser.y', 21)
|
418
|
+
def _reduce_11(val, _values, result)
|
419
|
+
result = field(val)
|
420
|
+
result
|
421
|
+
end
|
422
|
+
.,.,
|
423
|
+
|
424
|
+
module_eval(<<'.,.,', 'parser.y', 22)
|
425
|
+
def _reduce_12(val, _values, result)
|
426
|
+
result = field(val)
|
427
|
+
result
|
428
|
+
end
|
429
|
+
.,.,
|
430
|
+
|
431
|
+
module_eval(<<'.,.,', 'parser.y', 24)
|
432
|
+
def _reduce_13(val, _values, result)
|
433
|
+
result = quoted_text
|
434
|
+
result
|
435
|
+
end
|
436
|
+
.,.,
|
437
|
+
|
438
|
+
module_eval(<<'.,.,', 'parser.y', 25)
|
439
|
+
def _reduce_14(val, _values, result)
|
440
|
+
result = quoted_text(val)
|
441
|
+
result
|
442
|
+
end
|
443
|
+
.,.,
|
444
|
+
|
445
|
+
module_eval(<<'.,.,', 'parser.y', 26)
|
446
|
+
def _reduce_15(val, _values, result)
|
447
|
+
result = quoted_text(val)
|
448
|
+
result
|
449
|
+
end
|
450
|
+
.,.,
|
451
|
+
|
452
|
+
module_eval(<<'.,.,', 'parser.y', 27)
|
453
|
+
def _reduce_16(val, _values, result)
|
454
|
+
result = quoted_text(val)
|
455
|
+
result
|
456
|
+
end
|
457
|
+
.,.,
|
458
|
+
|
459
|
+
def _reduce_none(val, _values, result)
|
460
|
+
val[0]
|
461
|
+
end
|
462
|
+
|
463
|
+
end # class Parser
|
464
|
+
|
465
|
+
end
|
data/lib/eccsv/parser.y
ADDED
@@ -0,0 +1,264 @@
|
|
1
|
+
class Parser
|
2
|
+
token NEWLINE COMMA TEXT QUOTE
|
3
|
+
|
4
|
+
rule
|
5
|
+
root: delim_records { result = root(val).value }
|
6
|
+
| delim_records record { result = root(val).value }
|
7
|
+
|
8
|
+
delim_records: { result = delim_records }
|
9
|
+
| delim_records delim_record { result = delim_records(val) }
|
10
|
+
|
11
|
+
delim_record: NEWLINE { result = delim_record(val) }
|
12
|
+
| record NEWLINE { result = delim_record(val) }
|
13
|
+
|
14
|
+
# TODO: reduce record nodes
|
15
|
+
record: delim_fields field { result = record(val) }
|
16
|
+
|
17
|
+
delim_fields: { result = delim_fields }
|
18
|
+
| delim_fields delim_field { result = delim_fields(val) }
|
19
|
+
|
20
|
+
delim_field: field COMMA { result = delim_field(val) }
|
21
|
+
|
22
|
+
field: QUOTE quoted_text QUOTE { result = field(val) }
|
23
|
+
| TEXT { result = field(val) }
|
24
|
+
|
25
|
+
quoted_text: { result = quoted_text }
|
26
|
+
| quoted_text COMMA { result = quoted_text(val) }
|
27
|
+
| quoted_text NEWLINE { result = quoted_text(val) }
|
28
|
+
| quoted_text TEXT { result = quoted_text(val) }
|
29
|
+
end
|
30
|
+
|
31
|
+
---- header
|
32
|
+
require 'strscan'
|
33
|
+
|
34
|
+
module ECCSV
|
35
|
+
---- inner
|
36
|
+
class Node
|
37
|
+
attr_reader :value, :token, :line, :col
|
38
|
+
|
39
|
+
def initialize(value = "", token = nil, line = nil, col = nil)
|
40
|
+
@value = value
|
41
|
+
@token = token
|
42
|
+
@line = line
|
43
|
+
@col = col
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
class ParentNode < Node
|
48
|
+
def initialize(children = [], line = nil, col = nil)
|
49
|
+
last = children.last
|
50
|
+
if last && last.is_a?(Node)
|
51
|
+
line = last.line
|
52
|
+
col = last.col
|
53
|
+
end
|
54
|
+
super(nil, nil, line, col)
|
55
|
+
@children = children
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
class QuotedTextNode < ParentNode
|
60
|
+
def value
|
61
|
+
@value ||= @children.collect(&:value).join
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
class FieldNode < ParentNode
|
66
|
+
def value
|
67
|
+
@value ||=
|
68
|
+
if @children[0].token == :TEXT
|
69
|
+
@children[0].value
|
70
|
+
else
|
71
|
+
# quoted text
|
72
|
+
@children[1].value
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
class DelimFieldNode < ParentNode
|
78
|
+
def value
|
79
|
+
@value ||= @children[0].value
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
class DelimFieldsNode < ParentNode
|
84
|
+
def value
|
85
|
+
@value ||=
|
86
|
+
if @children.empty?
|
87
|
+
[]
|
88
|
+
else
|
89
|
+
@children[0].value + [@children[1].value]
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
class RecordNode < ParentNode
|
95
|
+
def value
|
96
|
+
# TODO: 'consume' children to produce value to reduce memory footprint
|
97
|
+
@value ||= @children[0].value + [@children[1].value]
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
class DelimRecordNode < ParentNode
|
102
|
+
def value
|
103
|
+
@value ||= @children.length == 1 ? [] : @children[0].value
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
class DelimRecordsNode < ParentNode
|
108
|
+
def value
|
109
|
+
if @value.nil?
|
110
|
+
if @children.empty?
|
111
|
+
@value = []
|
112
|
+
else
|
113
|
+
@value = @children[0].value
|
114
|
+
val = @children[1].value
|
115
|
+
if !val.empty?
|
116
|
+
@value += [val]
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
@value
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
class RootNode < ParentNode
|
125
|
+
def value
|
126
|
+
if @value.nil?
|
127
|
+
@value = @children[0].value
|
128
|
+
if @children[1]
|
129
|
+
@value += [@children[1].value]
|
130
|
+
end
|
131
|
+
end
|
132
|
+
@value
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
attr_reader :error
|
137
|
+
|
138
|
+
def parse(str)
|
139
|
+
@scanner = StringScanner.new(str)
|
140
|
+
@line = 1
|
141
|
+
@col = 1
|
142
|
+
do_parse
|
143
|
+
end
|
144
|
+
|
145
|
+
def next_token
|
146
|
+
until @scanner.empty?
|
147
|
+
next_line = @line
|
148
|
+
next_col = @col
|
149
|
+
case
|
150
|
+
when match = @scanner.scan(/,/)
|
151
|
+
token = :COMMA
|
152
|
+
when match = @scanner.scan(/"/)
|
153
|
+
token = :QUOTE
|
154
|
+
when match = @scanner.scan(/\n/)
|
155
|
+
token = :NEWLINE
|
156
|
+
next_line += 1
|
157
|
+
next_col = 0
|
158
|
+
when match = @scanner.scan(/[^,\n"]+/)
|
159
|
+
token = :TEXT
|
160
|
+
else
|
161
|
+
raise "can't recognize <#{@scanner.peek(5)}>"
|
162
|
+
end
|
163
|
+
next_col += match.length
|
164
|
+
|
165
|
+
value = node(match, token)
|
166
|
+
@line = next_line
|
167
|
+
@col = next_col
|
168
|
+
|
169
|
+
return [token, value]
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
173
|
+
def warnings
|
174
|
+
@warnings ||= []
|
175
|
+
end
|
176
|
+
|
177
|
+
private
|
178
|
+
|
179
|
+
def node(value = "", token = nil, line = @line, col = @col)
|
180
|
+
Node.new(value, token, line, col)
|
181
|
+
end
|
182
|
+
|
183
|
+
def quoted_text(children = [], line = @line, col = @col)
|
184
|
+
QuotedTextNode.new(children, line, col)
|
185
|
+
end
|
186
|
+
|
187
|
+
def field(children = [], line = @line, col = @col)
|
188
|
+
FieldNode.new(children, line, col)
|
189
|
+
end
|
190
|
+
|
191
|
+
def delim_field(children = [], line = @line, col = @col)
|
192
|
+
DelimFieldNode.new(children, line, col)
|
193
|
+
end
|
194
|
+
|
195
|
+
def delim_fields(children = [], line = @line, col = @col)
|
196
|
+
DelimFieldsNode.new(children, line, col)
|
197
|
+
end
|
198
|
+
|
199
|
+
def record(children = [], line = @line, col = @col)
|
200
|
+
record = RecordNode.new(children, line, col)
|
201
|
+
value = record.value
|
202
|
+
if defined? @num_fields
|
203
|
+
first = children[0]
|
204
|
+
line = first.line
|
205
|
+
col = first.col
|
206
|
+
if @num_fields > value.length
|
207
|
+
msg = "expected %d more fields on line %d" % [@num_fields - value.length, line]
|
208
|
+
self.warnings.push(MissingFieldsError.new(msg, line, col))
|
209
|
+
elsif @num_fields < value.length
|
210
|
+
msg = "%d extra fields found on line %d, column %d" % [value.length - @num_fields, line, col]
|
211
|
+
self.warnings.push(ExtraFieldsError.new(msg, line, col))
|
212
|
+
end
|
213
|
+
else
|
214
|
+
@num_fields = value.length
|
215
|
+
end
|
216
|
+
|
217
|
+
record
|
218
|
+
end
|
219
|
+
|
220
|
+
def delim_record(children = [], line = @line, col = @col)
|
221
|
+
DelimRecordNode.new(children, line, col)
|
222
|
+
end
|
223
|
+
|
224
|
+
def delim_records(children = [], line = @line, col = @col)
|
225
|
+
DelimRecordsNode.new(children, line, col)
|
226
|
+
end
|
227
|
+
|
228
|
+
def root(children = [], line = @line, col = @col)
|
229
|
+
RootNode.new(children, line, col)
|
230
|
+
end
|
231
|
+
|
232
|
+
def on_error(t, val, stack)
|
233
|
+
#pp t
|
234
|
+
#pp val
|
235
|
+
#pp stack
|
236
|
+
|
237
|
+
# figure out what error we have
|
238
|
+
if t == 0
|
239
|
+
# unexpected EOF
|
240
|
+
type = nil
|
241
|
+
stack.reverse_each do |node|
|
242
|
+
case node
|
243
|
+
when QuotedTextNode
|
244
|
+
type = :unmatched_quote
|
245
|
+
when Node
|
246
|
+
if type == :unmatched_quote && node.token == :QUOTE
|
247
|
+
line = node.line
|
248
|
+
col = node.col
|
249
|
+
@error = UnmatchedQuoteError.new("unmatched quote at line #{line}, column #{col}", line, col)
|
250
|
+
end
|
251
|
+
end
|
252
|
+
end
|
253
|
+
|
254
|
+
if @error.nil?
|
255
|
+
@error = Error.new("unexpected EOF")
|
256
|
+
end
|
257
|
+
elsif val.is_a?(Node) && val.token == :QUOTE
|
258
|
+
line = val.line
|
259
|
+
col = val.col
|
260
|
+
@error = StrayQuoteError.new("stray quote at line #{line}, column #{col}", line, col)
|
261
|
+
end
|
262
|
+
end
|
263
|
+
---- footer
|
264
|
+
end
|
data/lib/eccsv.rb
ADDED
data/test/helper.rb
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'bundler'
|
3
|
+
begin
|
4
|
+
Bundler.setup(:default, :development)
|
5
|
+
rescue Bundler::BundlerError => e
|
6
|
+
$stderr.puts e.message
|
7
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
8
|
+
exit e.status_code
|
9
|
+
end
|
10
|
+
|
11
|
+
require 'test/unit'
|
12
|
+
require 'byebug'
|
13
|
+
|
14
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
15
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
16
|
+
require 'eccsv'
|
data/test/test_parser.rb
ADDED
@@ -0,0 +1,230 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
class TestParser < Test::Unit::TestCase
|
4
|
+
def parse(string)
|
5
|
+
parser = ECCSV::Parser.new
|
6
|
+
parser.parse(string)
|
7
|
+
end
|
8
|
+
|
9
|
+
test "one record with two fields" do
|
10
|
+
assert_equal [['foo', 'bar']], parse("foo,bar")
|
11
|
+
end
|
12
|
+
|
13
|
+
test "one record with one field" do
|
14
|
+
assert_equal [['foo']], parse("foo")
|
15
|
+
end
|
16
|
+
|
17
|
+
test "empty records" do
|
18
|
+
assert_equal [], parse("")
|
19
|
+
end
|
20
|
+
|
21
|
+
test "empty record is skipped by default" do
|
22
|
+
assert_equal [['foo'], ['bar']], parse("foo\n\nbar")
|
23
|
+
end
|
24
|
+
|
25
|
+
test "skipping empty record at end" do
|
26
|
+
assert_equal [['foo'], ['bar']], parse("foo\nbar\n")
|
27
|
+
end
|
28
|
+
|
29
|
+
=begin
|
30
|
+
test "not skipping an empty record" do
|
31
|
+
parser = ECCSV::Parser.new
|
32
|
+
parser.skip_empty_record = false
|
33
|
+
result = parser.parse("foo\n\nbar")
|
34
|
+
assert_equal [['foo'], [], ['bar']], result.value
|
35
|
+
end
|
36
|
+
|
37
|
+
test "not skipping empty record at end" do
|
38
|
+
parser = ECCSV::Parser.new
|
39
|
+
parser.skip_empty_record = false
|
40
|
+
result = parser.parse("foo\nbar\n")
|
41
|
+
assert_equal [['foo'], ['bar'], []], result.value
|
42
|
+
end
|
43
|
+
=end
|
44
|
+
|
45
|
+
test "two records" do
|
46
|
+
assert_equal [['foo', 'bar'], ['baz', 'qux']], parse("foo,bar\nbaz,qux")
|
47
|
+
end
|
48
|
+
|
49
|
+
test "quoted field" do
|
50
|
+
assert_equal [["foo,bar"]], parse(%{"foo,bar"})
|
51
|
+
end
|
52
|
+
|
53
|
+
test "missing closing quote" do
|
54
|
+
parser = ECCSV::Parser.new
|
55
|
+
result = parser.parse(%{foo,bar\n"foo})
|
56
|
+
assert !result
|
57
|
+
assert_kind_of ECCSV::UnmatchedQuoteError, parser.error
|
58
|
+
assert_equal 2, parser.error.line
|
59
|
+
assert_equal 1, parser.error.col
|
60
|
+
end
|
61
|
+
|
62
|
+
test "quote inside unquoted field" do
|
63
|
+
parser = ECCSV::Parser.new
|
64
|
+
result = parser.parse(%{f"oo})
|
65
|
+
assert !result
|
66
|
+
assert_kind_of ECCSV::StrayQuoteError, parser.error
|
67
|
+
assert_equal 1, parser.error.line
|
68
|
+
assert_equal 2, parser.error.col
|
69
|
+
end
|
70
|
+
|
71
|
+
test "missing fields gets warning by default" do
|
72
|
+
parser = ECCSV::Parser.new
|
73
|
+
result = parser.parse(%{foo,bar\nbaz})
|
74
|
+
assert_equal [['foo', 'bar'], ['baz']], result
|
75
|
+
assert_equal 1, parser.warnings.length
|
76
|
+
warning = parser.warnings[0]
|
77
|
+
assert_kind_of ECCSV::MissingFieldsError, warning
|
78
|
+
assert_equal 2, warning.line
|
79
|
+
assert_equal 4, warning.col
|
80
|
+
end
|
81
|
+
|
82
|
+
=begin
|
83
|
+
test "missing fields when disallowed" do
|
84
|
+
parser = ECCSV::Parser.new
|
85
|
+
parser.allow_uneven_records = false
|
86
|
+
result = parser.parse(%{foo,bar\nbaz})
|
87
|
+
assert !result
|
88
|
+
assert_equal :missing_fields, parser.failure_type
|
89
|
+
end
|
90
|
+
=end
|
91
|
+
|
92
|
+
test "extra fields gets warning by default" do
|
93
|
+
parser = ECCSV::Parser.new
|
94
|
+
result = parser.parse(%{foo\nbar,baz})
|
95
|
+
assert_equal [['foo'], ['bar', 'baz']], result
|
96
|
+
assert_equal 1, parser.warnings.length
|
97
|
+
warning = parser.warnings[0]
|
98
|
+
assert_kind_of ECCSV::ExtraFieldsError, warning
|
99
|
+
assert_equal 2, warning.line
|
100
|
+
assert_equal 4, warning.col
|
101
|
+
end
|
102
|
+
|
103
|
+
=begin
|
104
|
+
test "extra fields when disallowed" do
|
105
|
+
parser = ECCSV::Parser.new
|
106
|
+
parser.allow_uneven_records = false
|
107
|
+
result = parser.parse(%{foo\nbar,baz})
|
108
|
+
assert !result
|
109
|
+
assert_equal :extra_fields, parser.failure_type
|
110
|
+
end
|
111
|
+
|
112
|
+
test "single-character custom field separator" do
|
113
|
+
parser = ECCSV::Parser.new
|
114
|
+
parser.field_sep = "\t"
|
115
|
+
result = parser.parse("foo\tbar")
|
116
|
+
assert result, parser.failure_reason
|
117
|
+
assert_equal [['foo', 'bar']], result.value
|
118
|
+
end
|
119
|
+
|
120
|
+
test "multi-character custom field separator" do
|
121
|
+
parser = ECCSV::Parser.new
|
122
|
+
parser.field_sep = "foo"
|
123
|
+
result = parser.parse("bazfoobar")
|
124
|
+
assert result, parser.failure_reason
|
125
|
+
assert_equal [['baz', 'bar']], result.value
|
126
|
+
end
|
127
|
+
|
128
|
+
test "single-character custom record separator" do
|
129
|
+
parser = ECCSV::Parser.new
|
130
|
+
parser.record_sep = "x"
|
131
|
+
result = parser.parse("fooxbar")
|
132
|
+
assert result, parser.failure_reason
|
133
|
+
assert_equal [['foo'], ['bar']], result.value
|
134
|
+
end
|
135
|
+
|
136
|
+
test "multi-character custom record separator" do
|
137
|
+
parser = ECCSV::Parser.new
|
138
|
+
parser.record_sep = "foo"
|
139
|
+
result = parser.parse("barfoobaz")
|
140
|
+
assert result, parser.failure_reason
|
141
|
+
assert_equal [['bar'], ['baz']], result.value
|
142
|
+
end
|
143
|
+
|
144
|
+
test "custom quote character" do
|
145
|
+
parser = ECCSV::Parser.new
|
146
|
+
parser.quote_char = "'"
|
147
|
+
result = parser.parse("'foo,bar'")
|
148
|
+
assert result, parser.failure_reason
|
149
|
+
assert_equal [['foo,bar']], result.value
|
150
|
+
end
|
151
|
+
|
152
|
+
test "parse helper" do
|
153
|
+
result = CsvParser.parse("foo,bar")
|
154
|
+
assert_equal [['foo', 'bar']], result.data
|
155
|
+
end
|
156
|
+
|
157
|
+
test "parse helper with options" do
|
158
|
+
result = CsvParser.parse("foo\tbar", :field_sep => "\t")
|
159
|
+
assert_equal [['foo', 'bar']], result.data
|
160
|
+
end
|
161
|
+
|
162
|
+
test "parse helper with missing closing quote" do
|
163
|
+
error = nil
|
164
|
+
begin
|
165
|
+
CsvParser.parse(%{"foo})
|
166
|
+
rescue CsvParser::MissingQuoteError => error
|
167
|
+
assert_equal 1, error.line
|
168
|
+
assert_equal 1, error.column
|
169
|
+
assert_equal "no ending quote found for quote on line 1, column 1", error.message
|
170
|
+
end
|
171
|
+
assert error
|
172
|
+
end
|
173
|
+
|
174
|
+
test "parse helper with stray quote" do
|
175
|
+
error = nil
|
176
|
+
begin
|
177
|
+
CsvParser.parse(%{f"oo})
|
178
|
+
rescue CsvParser::StrayQuoteError => error
|
179
|
+
assert_equal 1, error.line
|
180
|
+
assert_equal 2, error.column
|
181
|
+
assert_equal "invalid quote found on line 1, column 2", error.message
|
182
|
+
end
|
183
|
+
assert error
|
184
|
+
end
|
185
|
+
|
186
|
+
test "parse helper with allowed short records" do
|
187
|
+
result = CsvParser.parse(%{foo,bar\nbaz})
|
188
|
+
assert_equal 1, result.warnings.length
|
189
|
+
assert_kind_of CsvParser::MissingFieldsError, result.warnings[0]
|
190
|
+
error = result.warnings[0]
|
191
|
+
assert_equal 2, error.line
|
192
|
+
assert_equal 4, error.column
|
193
|
+
assert_equal "record on line 2 had too few fields", error.message
|
194
|
+
end
|
195
|
+
|
196
|
+
test "parse helper with disallowed short records" do
|
197
|
+
error = nil
|
198
|
+
begin
|
199
|
+
CsvParser.parse(%{foo,bar\nbaz}, :allow_uneven_records => false)
|
200
|
+
rescue CsvParser::MissingFieldsError => error
|
201
|
+
assert_equal 2, error.line
|
202
|
+
assert_equal 4, error.column
|
203
|
+
assert_equal "record on line 2 had too few fields", error.message
|
204
|
+
end
|
205
|
+
assert error
|
206
|
+
end
|
207
|
+
|
208
|
+
test "parse helper with allowed long records" do
|
209
|
+
result = CsvParser.parse(%{foo\nbar,baz})
|
210
|
+
assert_equal 1, result.warnings.length
|
211
|
+
assert_kind_of CsvParser::ExtraFieldsError, result.warnings[0]
|
212
|
+
error = result.warnings[0]
|
213
|
+
assert_equal 2, error.line
|
214
|
+
assert_equal 5, error.column
|
215
|
+
assert_equal "record on line 2 had too many fields", error.message
|
216
|
+
end
|
217
|
+
|
218
|
+
test "parse helper with disallowed long records" do
|
219
|
+
error = nil
|
220
|
+
begin
|
221
|
+
CsvParser.parse(%{foo\nbar,baz}, :allow_uneven_records => false)
|
222
|
+
rescue CsvParser::ExtraFieldsError => error
|
223
|
+
assert_equal 2, error.line
|
224
|
+
assert_equal 5, error.column
|
225
|
+
assert_equal "record on line 2 had too many fields", error.message
|
226
|
+
end
|
227
|
+
assert error
|
228
|
+
end
|
229
|
+
=end
|
230
|
+
end
|
metadata
ADDED
@@ -0,0 +1,115 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: eccsv
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Jeremy Stephens
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-10-02 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.3'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.3'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: test-unit
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: racc
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
description: CSV library with advanced error reporting
|
70
|
+
email:
|
71
|
+
- jeremy.f.stephens@vanderbilt.edu
|
72
|
+
executables: []
|
73
|
+
extensions: []
|
74
|
+
extra_rdoc_files: []
|
75
|
+
files:
|
76
|
+
- ".gitignore"
|
77
|
+
- Gemfile
|
78
|
+
- LICENSE.txt
|
79
|
+
- README.md
|
80
|
+
- Rakefile
|
81
|
+
- eccsv.gemspec
|
82
|
+
- lib/eccsv.rb
|
83
|
+
- lib/eccsv/errors.rb
|
84
|
+
- lib/eccsv/parser.rb
|
85
|
+
- lib/eccsv/parser.y
|
86
|
+
- lib/eccsv/version.rb
|
87
|
+
- test/helper.rb
|
88
|
+
- test/test_parser.rb
|
89
|
+
homepage: https://github.com/coupler/eccsv
|
90
|
+
licenses:
|
91
|
+
- MIT
|
92
|
+
metadata: {}
|
93
|
+
post_install_message:
|
94
|
+
rdoc_options: []
|
95
|
+
require_paths:
|
96
|
+
- lib
|
97
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
98
|
+
requirements:
|
99
|
+
- - ">="
|
100
|
+
- !ruby/object:Gem::Version
|
101
|
+
version: '0'
|
102
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
103
|
+
requirements:
|
104
|
+
- - ">="
|
105
|
+
- !ruby/object:Gem::Version
|
106
|
+
version: '0'
|
107
|
+
requirements: []
|
108
|
+
rubyforge_project:
|
109
|
+
rubygems_version: 2.2.2
|
110
|
+
signing_key:
|
111
|
+
specification_version: 4
|
112
|
+
summary: CSV library with advanced error reporting
|
113
|
+
test_files:
|
114
|
+
- test/helper.rb
|
115
|
+
- test/test_parser.rb
|