eccsv 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +17 -0
- data/Gemfile +6 -0
- data/LICENSE.txt +22 -0
- data/README.md +113 -0
- data/Rakefile +16 -0
- data/eccsv.gemspec +25 -0
- data/lib/eccsv/errors.rb +16 -0
- data/lib/eccsv/parser.rb +465 -0
- data/lib/eccsv/parser.y +264 -0
- data/lib/eccsv/version.rb +3 -0
- data/lib/eccsv.rb +6 -0
- data/test/helper.rb +16 -0
- data/test/test_parser.rb +230 -0
- metadata +115 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 8dd728098ee2f3066326be199f2657c34b33facd
|
4
|
+
data.tar.gz: 0472be8f00b3f6c9079cc20808bfd89804ff2212
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 9238b550d38a2766e5c53ca152855dfc7f782cc79c98dad60030095138e4a40a68655b14f22c3e376214901294ad81ad3e835e9091c2ee9d810c302e3f744ff3
|
7
|
+
data.tar.gz: 12599cfb6ae428dc134dd67c64fac344fa2a0306ba28de4e5f7a183b2754780d5c3e865ead20f6e20e2979b076e49e13ca83bbd11eb51ddf03587839d648b693
|
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2013 Vanderbilt University
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,113 @@
|
|
1
|
+
# ECCSV
|
2
|
+
|
3
|
+
ECCSV (error correcting comma seperated values) is a CSV parsing library with
|
4
|
+
advanced error reporting and correcting.
|
5
|
+
|
6
|
+
## Installation
|
7
|
+
|
8
|
+
Add this line to your application's Gemfile:
|
9
|
+
|
10
|
+
gem 'eccsv'
|
11
|
+
|
12
|
+
And then execute:
|
13
|
+
|
14
|
+
$ bundle
|
15
|
+
|
16
|
+
Or install it yourself as:
|
17
|
+
|
18
|
+
$ gem install eccsv
|
19
|
+
|
20
|
+
## Basic Usage
|
21
|
+
|
22
|
+
```ruby
|
23
|
+
require 'eccsv'
|
24
|
+
|
25
|
+
data = <<EOF
|
26
|
+
foo,bar
|
27
|
+
baz,qux
|
28
|
+
EOF
|
29
|
+
|
30
|
+
parser = ECCSV::Parser.new
|
31
|
+
parser.parse(data) #=> [["foo", "bar"], ["baz", "qux"]]
|
32
|
+
```
|
33
|
+
|
34
|
+
## Errors
|
35
|
+
|
36
|
+
One of the goals of this project is to give you descriptive error messages.
|
37
|
+
|
38
|
+
Each error type is a subclass of `ECCSV::Error` and contains the exact line
|
39
|
+
number (via `Error#line`) and column number (via `Error#col`) where the error
|
40
|
+
took place.
|
41
|
+
|
42
|
+
* missing closing quote (`ECCSV::UnmatchedQuoteError`)
|
43
|
+
* quote in the wrong place (`ECCSV::StrayQuoteError`)
|
44
|
+
* rows with not enough fields (`ECCSV::MissingFieldsError`)
|
45
|
+
* rows with too many fields (`ECCSV::ExtraFieldsError`)
|
46
|
+
|
47
|
+
Since missing/extra fields do not cause the CSV to be unparsable, they are
|
48
|
+
treated as warnings instead of errors (see example below).
|
49
|
+
|
50
|
+
### Examples
|
51
|
+
|
52
|
+
#### Unmatched quote
|
53
|
+
|
54
|
+
If there was an error, `#parse` will return `nil` and set `#error`.
|
55
|
+
|
56
|
+
```ruby
|
57
|
+
require 'eccsv'
|
58
|
+
|
59
|
+
data = <<EOF
|
60
|
+
foo,"bar
|
61
|
+
baz,qux
|
62
|
+
EOF
|
63
|
+
|
64
|
+
parser = ECCSV::Parser.new
|
65
|
+
parser.parse(data) #=> nil
|
66
|
+
parser.error #=> #<ECCSV::UnmatchedQuoteError: unmatched quote at line 1, column 5>
|
67
|
+
parser.error.line #=> 1
|
68
|
+
parser.error.col #=> 5
|
69
|
+
```
|
70
|
+
|
71
|
+
#### Missing fields
|
72
|
+
|
73
|
+
If there was a warning, `#parse` will return the records and add to `#warnings`.
|
74
|
+
|
75
|
+
```ruby
|
76
|
+
require 'eccsv'
|
77
|
+
|
78
|
+
data = <<EOF
|
79
|
+
foo,bar
|
80
|
+
baz
|
81
|
+
EOF
|
82
|
+
|
83
|
+
parser = ECCSV::Parser.new
|
84
|
+
parser.parse(data) #=> [["foo", "bar"], ["baz"]]
|
85
|
+
parser.warnings #=> [#<ECCSV::MissingFieldsError: expected 1 more fields on line 2>]
|
86
|
+
parser.warnings[0].line #=> 2
|
87
|
+
parser.warnings[0].col #=> 4
|
88
|
+
```
|
89
|
+
|
90
|
+
#### Extra fields
|
91
|
+
|
92
|
+
```ruby
|
93
|
+
require 'eccsv'
|
94
|
+
|
95
|
+
data = <<EOF
|
96
|
+
foo
|
97
|
+
bar,baz
|
98
|
+
EOF
|
99
|
+
|
100
|
+
parser = ECCSV::Parser.new
|
101
|
+
parser.parse(data) #=> [["foo"], ["bar", "baz"]]
|
102
|
+
parser.warnings #=> [#<ECCSV::ExtraFieldsError: 1 extra fields found on line 2, column 4>]
|
103
|
+
parser.warnings[0].line #=> 2
|
104
|
+
parser.warnings[0].col #=> 4
|
105
|
+
```
|
106
|
+
|
107
|
+
## Contributing
|
108
|
+
|
109
|
+
1. Fork it
|
110
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
111
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
112
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
113
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
2
|
+
require "rake/testtask"
|
3
|
+
|
4
|
+
Rake::TestTask.new do |t|
|
5
|
+
t.libs << "test"
|
6
|
+
t.pattern = 'test/**/test*.rb'
|
7
|
+
end
|
8
|
+
task :test => :racc
|
9
|
+
task :default => :test
|
10
|
+
|
11
|
+
desc "Compile racc grammar"
|
12
|
+
task :racc => "lib/eccsv/parser.rb"
|
13
|
+
|
14
|
+
file "lib/eccsv/parser.rb" => "lib/eccsv/parser.y" do |t|
|
15
|
+
system("racc -v -o #{t.name} #{t.source}")
|
16
|
+
end
|
data/eccsv.gemspec
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'eccsv/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "eccsv"
|
8
|
+
spec.version = ECCSV::VERSION
|
9
|
+
spec.authors = ["Jeremy Stephens"]
|
10
|
+
spec.email = ["jeremy.f.stephens@vanderbilt.edu"]
|
11
|
+
spec.description = %q{CSV library with advanced error reporting}
|
12
|
+
spec.summary = %q{CSV library with advanced error reporting}
|
13
|
+
spec.homepage = "https://github.com/coupler/eccsv"
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files`.split($/)
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_development_dependency "bundler", "~> 1.3"
|
22
|
+
spec.add_development_dependency "rake"
|
23
|
+
spec.add_development_dependency "test-unit"
|
24
|
+
spec.add_development_dependency "racc"
|
25
|
+
end
|
data/lib/eccsv/errors.rb
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
module ECCSV
|
2
|
+
class Error < Exception
|
3
|
+
attr_reader :line, :col
|
4
|
+
|
5
|
+
def initialize(msg = nil, line = nil, col = nil)
|
6
|
+
super(msg)
|
7
|
+
@line = line
|
8
|
+
@col = col
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
class UnmatchedQuoteError < Error; end
|
13
|
+
class StrayQuoteError < Error; end
|
14
|
+
class MissingFieldsError < Error; end
|
15
|
+
class ExtraFieldsError < Error; end
|
16
|
+
end
|
data/lib/eccsv/parser.rb
ADDED
@@ -0,0 +1,465 @@
|
|
1
|
+
#
|
2
|
+
# DO NOT MODIFY!!!!
|
3
|
+
# This file is automatically generated by Racc 1.4.12
|
4
|
+
# from Racc grammer file "".
|
5
|
+
#
|
6
|
+
|
7
|
+
require 'racc/parser.rb'
|
8
|
+
|
9
|
+
require 'strscan'
|
10
|
+
|
11
|
+
module ECCSV
|
12
|
+
class Parser < Racc::Parser
|
13
|
+
|
14
|
+
module_eval(<<'...end parser.y/module_eval...', 'parser.y', 36)
|
15
|
+
class Node
|
16
|
+
attr_reader :value, :token, :line, :col
|
17
|
+
|
18
|
+
def initialize(value = "", token = nil, line = nil, col = nil)
|
19
|
+
@value = value
|
20
|
+
@token = token
|
21
|
+
@line = line
|
22
|
+
@col = col
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
class ParentNode < Node
|
27
|
+
def initialize(children = [], line = nil, col = nil)
|
28
|
+
last = children.last
|
29
|
+
if last && last.is_a?(Node)
|
30
|
+
line = last.line
|
31
|
+
col = last.col
|
32
|
+
end
|
33
|
+
super(nil, nil, line, col)
|
34
|
+
@children = children
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
class QuotedTextNode < ParentNode
|
39
|
+
def value
|
40
|
+
@value ||= @children.collect(&:value).join
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
class FieldNode < ParentNode
|
45
|
+
def value
|
46
|
+
@value ||=
|
47
|
+
if @children[0].token == :TEXT
|
48
|
+
@children[0].value
|
49
|
+
else
|
50
|
+
# quoted text
|
51
|
+
@children[1].value
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
class DelimFieldNode < ParentNode
|
57
|
+
def value
|
58
|
+
@value ||= @children[0].value
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
class DelimFieldsNode < ParentNode
|
63
|
+
def value
|
64
|
+
@value ||=
|
65
|
+
if @children.empty?
|
66
|
+
[]
|
67
|
+
else
|
68
|
+
@children[0].value + [@children[1].value]
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
class RecordNode < ParentNode
|
74
|
+
def value
|
75
|
+
# TODO: 'consume' children to produce value to reduce memory footprint
|
76
|
+
@value ||= @children[0].value + [@children[1].value]
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
class DelimRecordNode < ParentNode
|
81
|
+
def value
|
82
|
+
@value ||= @children.length == 1 ? [] : @children[0].value
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
class DelimRecordsNode < ParentNode
|
87
|
+
def value
|
88
|
+
if @value.nil?
|
89
|
+
if @children.empty?
|
90
|
+
@value = []
|
91
|
+
else
|
92
|
+
@value = @children[0].value
|
93
|
+
val = @children[1].value
|
94
|
+
if !val.empty?
|
95
|
+
@value += [val]
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
@value
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
class RootNode < ParentNode
|
104
|
+
def value
|
105
|
+
if @value.nil?
|
106
|
+
@value = @children[0].value
|
107
|
+
if @children[1]
|
108
|
+
@value += [@children[1].value]
|
109
|
+
end
|
110
|
+
end
|
111
|
+
@value
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
attr_reader :error
|
116
|
+
|
117
|
+
def parse(str)
|
118
|
+
@scanner = StringScanner.new(str)
|
119
|
+
@line = 1
|
120
|
+
@col = 1
|
121
|
+
do_parse
|
122
|
+
end
|
123
|
+
|
124
|
+
def next_token
|
125
|
+
until @scanner.empty?
|
126
|
+
next_line = @line
|
127
|
+
next_col = @col
|
128
|
+
case
|
129
|
+
when match = @scanner.scan(/,/)
|
130
|
+
token = :COMMA
|
131
|
+
when match = @scanner.scan(/"/)
|
132
|
+
token = :QUOTE
|
133
|
+
when match = @scanner.scan(/\n/)
|
134
|
+
token = :NEWLINE
|
135
|
+
next_line += 1
|
136
|
+
next_col = 0
|
137
|
+
when match = @scanner.scan(/[^,\n"]+/)
|
138
|
+
token = :TEXT
|
139
|
+
else
|
140
|
+
raise "can't recognize <#{@scanner.peek(5)}>"
|
141
|
+
end
|
142
|
+
next_col += match.length
|
143
|
+
|
144
|
+
value = node(match, token)
|
145
|
+
@line = next_line
|
146
|
+
@col = next_col
|
147
|
+
|
148
|
+
return [token, value]
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
def warnings
|
153
|
+
@warnings ||= []
|
154
|
+
end
|
155
|
+
|
156
|
+
private
|
157
|
+
|
158
|
+
def node(value = "", token = nil, line = @line, col = @col)
|
159
|
+
Node.new(value, token, line, col)
|
160
|
+
end
|
161
|
+
|
162
|
+
def quoted_text(children = [], line = @line, col = @col)
|
163
|
+
QuotedTextNode.new(children, line, col)
|
164
|
+
end
|
165
|
+
|
166
|
+
def field(children = [], line = @line, col = @col)
|
167
|
+
FieldNode.new(children, line, col)
|
168
|
+
end
|
169
|
+
|
170
|
+
def delim_field(children = [], line = @line, col = @col)
|
171
|
+
DelimFieldNode.new(children, line, col)
|
172
|
+
end
|
173
|
+
|
174
|
+
def delim_fields(children = [], line = @line, col = @col)
|
175
|
+
DelimFieldsNode.new(children, line, col)
|
176
|
+
end
|
177
|
+
|
178
|
+
def record(children = [], line = @line, col = @col)
|
179
|
+
record = RecordNode.new(children, line, col)
|
180
|
+
value = record.value
|
181
|
+
if defined? @num_fields
|
182
|
+
first = children[0]
|
183
|
+
line = first.line
|
184
|
+
col = first.col
|
185
|
+
if @num_fields > value.length
|
186
|
+
msg = "expected %d more fields on line %d" % [@num_fields - value.length, line]
|
187
|
+
self.warnings.push(MissingFieldsError.new(msg, line, col))
|
188
|
+
elsif @num_fields < value.length
|
189
|
+
msg = "%d extra fields found on line %d, column %d" % [value.length - @num_fields, line, col]
|
190
|
+
self.warnings.push(ExtraFieldsError.new(msg, line, col))
|
191
|
+
end
|
192
|
+
else
|
193
|
+
@num_fields = value.length
|
194
|
+
end
|
195
|
+
|
196
|
+
record
|
197
|
+
end
|
198
|
+
|
199
|
+
def delim_record(children = [], line = @line, col = @col)
|
200
|
+
DelimRecordNode.new(children, line, col)
|
201
|
+
end
|
202
|
+
|
203
|
+
def delim_records(children = [], line = @line, col = @col)
|
204
|
+
DelimRecordsNode.new(children, line, col)
|
205
|
+
end
|
206
|
+
|
207
|
+
def root(children = [], line = @line, col = @col)
|
208
|
+
RootNode.new(children, line, col)
|
209
|
+
end
|
210
|
+
|
211
|
+
def on_error(t, val, stack)
|
212
|
+
#pp t
|
213
|
+
#pp val
|
214
|
+
#pp stack
|
215
|
+
|
216
|
+
# figure out what error we have
|
217
|
+
if t == 0
|
218
|
+
# unexpected EOF
|
219
|
+
type = nil
|
220
|
+
stack.reverse_each do |node|
|
221
|
+
case node
|
222
|
+
when QuotedTextNode
|
223
|
+
type = :unmatched_quote
|
224
|
+
when Node
|
225
|
+
if type == :unmatched_quote && node.token == :QUOTE
|
226
|
+
line = node.line
|
227
|
+
col = node.col
|
228
|
+
@error = UnmatchedQuoteError.new("unmatched quote at line #{line}, column #{col}", line, col)
|
229
|
+
end
|
230
|
+
end
|
231
|
+
end
|
232
|
+
|
233
|
+
if @error.nil?
|
234
|
+
@error = Error.new("unexpected EOF")
|
235
|
+
end
|
236
|
+
elsif val.is_a?(Node) && val.token == :QUOTE
|
237
|
+
line = val.line
|
238
|
+
col = val.col
|
239
|
+
@error = StrayQuoteError.new("stray quote at line #{line}, column #{col}", line, col)
|
240
|
+
end
|
241
|
+
end
|
242
|
+
...end parser.y/module_eval...
|
243
|
+
##### State transition tables begin ###
|
244
|
+
|
245
|
+
racc_action_table = [
|
246
|
+
18, 17, 19, 16, -1, 9, 6, 13, 12, 8,
|
247
|
+
14, 3 ]
|
248
|
+
|
249
|
+
racc_action_check = [
|
250
|
+
15, 15, 15, 15, 2, 4, 2, 7, 7, 3,
|
251
|
+
10, 1 ]
|
252
|
+
|
253
|
+
racc_action_pointer = [
|
254
|
+
nil, 11, 4, 9, 3, nil, nil, 3, nil, nil,
|
255
|
+
7, nil, nil, nil, nil, -2, nil, nil, nil, nil ]
|
256
|
+
|
257
|
+
racc_action_default = [
|
258
|
+
-3, -17, -8, -17, -2, -4, -5, -17, 20, -6,
|
259
|
+
-7, -9, -13, -12, -10, -17, -11, -14, -15, -16 ]
|
260
|
+
|
261
|
+
racc_goto_table = [
|
262
|
+
1, 2, 4, 5, 7, 10, 11, 15 ]
|
263
|
+
|
264
|
+
racc_goto_check = [
|
265
|
+
1, 2, 3, 4, 5, 6, 7, 8 ]
|
266
|
+
|
267
|
+
racc_goto_pointer = [
|
268
|
+
nil, 0, 1, 0, 1, 2, -2, -1, -5 ]
|
269
|
+
|
270
|
+
racc_goto_default = [
|
271
|
+
nil, nil, nil, nil, nil, nil, nil, nil, nil ]
|
272
|
+
|
273
|
+
racc_reduce_table = [
|
274
|
+
0, 0, :racc_error,
|
275
|
+
1, 7, :_reduce_1,
|
276
|
+
2, 7, :_reduce_2,
|
277
|
+
0, 8, :_reduce_3,
|
278
|
+
2, 8, :_reduce_4,
|
279
|
+
1, 10, :_reduce_5,
|
280
|
+
2, 10, :_reduce_6,
|
281
|
+
2, 9, :_reduce_7,
|
282
|
+
0, 11, :_reduce_8,
|
283
|
+
2, 11, :_reduce_9,
|
284
|
+
2, 13, :_reduce_10,
|
285
|
+
3, 12, :_reduce_11,
|
286
|
+
1, 12, :_reduce_12,
|
287
|
+
0, 14, :_reduce_13,
|
288
|
+
2, 14, :_reduce_14,
|
289
|
+
2, 14, :_reduce_15,
|
290
|
+
2, 14, :_reduce_16 ]
|
291
|
+
|
292
|
+
racc_reduce_n = 17
|
293
|
+
|
294
|
+
racc_shift_n = 20
|
295
|
+
|
296
|
+
racc_token_table = {
|
297
|
+
false => 0,
|
298
|
+
:error => 1,
|
299
|
+
:NEWLINE => 2,
|
300
|
+
:COMMA => 3,
|
301
|
+
:TEXT => 4,
|
302
|
+
:QUOTE => 5 }
|
303
|
+
|
304
|
+
racc_nt_base = 6
|
305
|
+
|
306
|
+
racc_use_result_var = true
|
307
|
+
|
308
|
+
Racc_arg = [
|
309
|
+
racc_action_table,
|
310
|
+
racc_action_check,
|
311
|
+
racc_action_default,
|
312
|
+
racc_action_pointer,
|
313
|
+
racc_goto_table,
|
314
|
+
racc_goto_check,
|
315
|
+
racc_goto_default,
|
316
|
+
racc_goto_pointer,
|
317
|
+
racc_nt_base,
|
318
|
+
racc_reduce_table,
|
319
|
+
racc_token_table,
|
320
|
+
racc_shift_n,
|
321
|
+
racc_reduce_n,
|
322
|
+
racc_use_result_var ]
|
323
|
+
|
324
|
+
Racc_token_to_s_table = [
|
325
|
+
"$end",
|
326
|
+
"error",
|
327
|
+
"NEWLINE",
|
328
|
+
"COMMA",
|
329
|
+
"TEXT",
|
330
|
+
"QUOTE",
|
331
|
+
"$start",
|
332
|
+
"root",
|
333
|
+
"delim_records",
|
334
|
+
"record",
|
335
|
+
"delim_record",
|
336
|
+
"delim_fields",
|
337
|
+
"field",
|
338
|
+
"delim_field",
|
339
|
+
"quoted_text" ]
|
340
|
+
|
341
|
+
Racc_debug_parser = true
|
342
|
+
|
343
|
+
##### State transition tables end #####
|
344
|
+
|
345
|
+
# reduce 0 omitted
|
346
|
+
|
347
|
+
module_eval(<<'.,.,', 'parser.y', 4)
|
348
|
+
def _reduce_1(val, _values, result)
|
349
|
+
result = root(val).value
|
350
|
+
result
|
351
|
+
end
|
352
|
+
.,.,
|
353
|
+
|
354
|
+
module_eval(<<'.,.,', 'parser.y', 5)
|
355
|
+
def _reduce_2(val, _values, result)
|
356
|
+
result = root(val).value
|
357
|
+
result
|
358
|
+
end
|
359
|
+
.,.,
|
360
|
+
|
361
|
+
module_eval(<<'.,.,', 'parser.y', 7)
|
362
|
+
def _reduce_3(val, _values, result)
|
363
|
+
result = delim_records
|
364
|
+
result
|
365
|
+
end
|
366
|
+
.,.,
|
367
|
+
|
368
|
+
module_eval(<<'.,.,', 'parser.y', 8)
|
369
|
+
def _reduce_4(val, _values, result)
|
370
|
+
result = delim_records(val)
|
371
|
+
result
|
372
|
+
end
|
373
|
+
.,.,
|
374
|
+
|
375
|
+
module_eval(<<'.,.,', 'parser.y', 10)
|
376
|
+
def _reduce_5(val, _values, result)
|
377
|
+
result = delim_record(val)
|
378
|
+
result
|
379
|
+
end
|
380
|
+
.,.,
|
381
|
+
|
382
|
+
module_eval(<<'.,.,', 'parser.y', 11)
|
383
|
+
def _reduce_6(val, _values, result)
|
384
|
+
result = delim_record(val)
|
385
|
+
result
|
386
|
+
end
|
387
|
+
.,.,
|
388
|
+
|
389
|
+
module_eval(<<'.,.,', 'parser.y', 14)
|
390
|
+
def _reduce_7(val, _values, result)
|
391
|
+
result = record(val)
|
392
|
+
result
|
393
|
+
end
|
394
|
+
.,.,
|
395
|
+
|
396
|
+
module_eval(<<'.,.,', 'parser.y', 16)
|
397
|
+
def _reduce_8(val, _values, result)
|
398
|
+
result = delim_fields
|
399
|
+
result
|
400
|
+
end
|
401
|
+
.,.,
|
402
|
+
|
403
|
+
module_eval(<<'.,.,', 'parser.y', 17)
|
404
|
+
def _reduce_9(val, _values, result)
|
405
|
+
result = delim_fields(val)
|
406
|
+
result
|
407
|
+
end
|
408
|
+
.,.,
|
409
|
+
|
410
|
+
module_eval(<<'.,.,', 'parser.y', 19)
|
411
|
+
def _reduce_10(val, _values, result)
|
412
|
+
result = delim_field(val)
|
413
|
+
result
|
414
|
+
end
|
415
|
+
.,.,
|
416
|
+
|
417
|
+
module_eval(<<'.,.,', 'parser.y', 21)
|
418
|
+
def _reduce_11(val, _values, result)
|
419
|
+
result = field(val)
|
420
|
+
result
|
421
|
+
end
|
422
|
+
.,.,
|
423
|
+
|
424
|
+
module_eval(<<'.,.,', 'parser.y', 22)
|
425
|
+
def _reduce_12(val, _values, result)
|
426
|
+
result = field(val)
|
427
|
+
result
|
428
|
+
end
|
429
|
+
.,.,
|
430
|
+
|
431
|
+
module_eval(<<'.,.,', 'parser.y', 24)
|
432
|
+
def _reduce_13(val, _values, result)
|
433
|
+
result = quoted_text
|
434
|
+
result
|
435
|
+
end
|
436
|
+
.,.,
|
437
|
+
|
438
|
+
module_eval(<<'.,.,', 'parser.y', 25)
|
439
|
+
def _reduce_14(val, _values, result)
|
440
|
+
result = quoted_text(val)
|
441
|
+
result
|
442
|
+
end
|
443
|
+
.,.,
|
444
|
+
|
445
|
+
module_eval(<<'.,.,', 'parser.y', 26)
|
446
|
+
def _reduce_15(val, _values, result)
|
447
|
+
result = quoted_text(val)
|
448
|
+
result
|
449
|
+
end
|
450
|
+
.,.,
|
451
|
+
|
452
|
+
module_eval(<<'.,.,', 'parser.y', 27)
|
453
|
+
def _reduce_16(val, _values, result)
|
454
|
+
result = quoted_text(val)
|
455
|
+
result
|
456
|
+
end
|
457
|
+
.,.,
|
458
|
+
|
459
|
+
def _reduce_none(val, _values, result)
|
460
|
+
val[0]
|
461
|
+
end
|
462
|
+
|
463
|
+
end # class Parser
|
464
|
+
|
465
|
+
end
|
data/lib/eccsv/parser.y
ADDED
@@ -0,0 +1,264 @@
|
|
1
|
+
class Parser
|
2
|
+
token NEWLINE COMMA TEXT QUOTE
|
3
|
+
|
4
|
+
rule
|
5
|
+
root: delim_records { result = root(val).value }
|
6
|
+
| delim_records record { result = root(val).value }
|
7
|
+
|
8
|
+
delim_records: { result = delim_records }
|
9
|
+
| delim_records delim_record { result = delim_records(val) }
|
10
|
+
|
11
|
+
delim_record: NEWLINE { result = delim_record(val) }
|
12
|
+
| record NEWLINE { result = delim_record(val) }
|
13
|
+
|
14
|
+
# TODO: reduce record nodes
|
15
|
+
record: delim_fields field { result = record(val) }
|
16
|
+
|
17
|
+
delim_fields: { result = delim_fields }
|
18
|
+
| delim_fields delim_field { result = delim_fields(val) }
|
19
|
+
|
20
|
+
delim_field: field COMMA { result = delim_field(val) }
|
21
|
+
|
22
|
+
field: QUOTE quoted_text QUOTE { result = field(val) }
|
23
|
+
| TEXT { result = field(val) }
|
24
|
+
|
25
|
+
quoted_text: { result = quoted_text }
|
26
|
+
| quoted_text COMMA { result = quoted_text(val) }
|
27
|
+
| quoted_text NEWLINE { result = quoted_text(val) }
|
28
|
+
| quoted_text TEXT { result = quoted_text(val) }
|
29
|
+
end
|
30
|
+
|
31
|
+
---- header
|
32
|
+
require 'strscan'
|
33
|
+
|
34
|
+
module ECCSV
|
35
|
+
---- inner
|
36
|
+
class Node
|
37
|
+
attr_reader :value, :token, :line, :col
|
38
|
+
|
39
|
+
def initialize(value = "", token = nil, line = nil, col = nil)
|
40
|
+
@value = value
|
41
|
+
@token = token
|
42
|
+
@line = line
|
43
|
+
@col = col
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
class ParentNode < Node
|
48
|
+
def initialize(children = [], line = nil, col = nil)
|
49
|
+
last = children.last
|
50
|
+
if last && last.is_a?(Node)
|
51
|
+
line = last.line
|
52
|
+
col = last.col
|
53
|
+
end
|
54
|
+
super(nil, nil, line, col)
|
55
|
+
@children = children
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
class QuotedTextNode < ParentNode
|
60
|
+
def value
|
61
|
+
@value ||= @children.collect(&:value).join
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
class FieldNode < ParentNode
|
66
|
+
def value
|
67
|
+
@value ||=
|
68
|
+
if @children[0].token == :TEXT
|
69
|
+
@children[0].value
|
70
|
+
else
|
71
|
+
# quoted text
|
72
|
+
@children[1].value
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
class DelimFieldNode < ParentNode
|
78
|
+
def value
|
79
|
+
@value ||= @children[0].value
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
class DelimFieldsNode < ParentNode
|
84
|
+
def value
|
85
|
+
@value ||=
|
86
|
+
if @children.empty?
|
87
|
+
[]
|
88
|
+
else
|
89
|
+
@children[0].value + [@children[1].value]
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
class RecordNode < ParentNode
|
95
|
+
def value
|
96
|
+
# TODO: 'consume' children to produce value to reduce memory footprint
|
97
|
+
@value ||= @children[0].value + [@children[1].value]
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
class DelimRecordNode < ParentNode
|
102
|
+
def value
|
103
|
+
@value ||= @children.length == 1 ? [] : @children[0].value
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
class DelimRecordsNode < ParentNode
|
108
|
+
def value
|
109
|
+
if @value.nil?
|
110
|
+
if @children.empty?
|
111
|
+
@value = []
|
112
|
+
else
|
113
|
+
@value = @children[0].value
|
114
|
+
val = @children[1].value
|
115
|
+
if !val.empty?
|
116
|
+
@value += [val]
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
@value
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
class RootNode < ParentNode
|
125
|
+
def value
|
126
|
+
if @value.nil?
|
127
|
+
@value = @children[0].value
|
128
|
+
if @children[1]
|
129
|
+
@value += [@children[1].value]
|
130
|
+
end
|
131
|
+
end
|
132
|
+
@value
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
attr_reader :error
|
137
|
+
|
138
|
+
def parse(str)
|
139
|
+
@scanner = StringScanner.new(str)
|
140
|
+
@line = 1
|
141
|
+
@col = 1
|
142
|
+
do_parse
|
143
|
+
end
|
144
|
+
|
145
|
+
def next_token
|
146
|
+
until @scanner.empty?
|
147
|
+
next_line = @line
|
148
|
+
next_col = @col
|
149
|
+
case
|
150
|
+
when match = @scanner.scan(/,/)
|
151
|
+
token = :COMMA
|
152
|
+
when match = @scanner.scan(/"/)
|
153
|
+
token = :QUOTE
|
154
|
+
when match = @scanner.scan(/\n/)
|
155
|
+
token = :NEWLINE
|
156
|
+
next_line += 1
|
157
|
+
next_col = 0
|
158
|
+
when match = @scanner.scan(/[^,\n"]+/)
|
159
|
+
token = :TEXT
|
160
|
+
else
|
161
|
+
raise "can't recognize <#{@scanner.peek(5)}>"
|
162
|
+
end
|
163
|
+
next_col += match.length
|
164
|
+
|
165
|
+
value = node(match, token)
|
166
|
+
@line = next_line
|
167
|
+
@col = next_col
|
168
|
+
|
169
|
+
return [token, value]
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
173
|
+
def warnings
|
174
|
+
@warnings ||= []
|
175
|
+
end
|
176
|
+
|
177
|
+
private
|
178
|
+
|
179
|
+
def node(value = "", token = nil, line = @line, col = @col)
|
180
|
+
Node.new(value, token, line, col)
|
181
|
+
end
|
182
|
+
|
183
|
+
def quoted_text(children = [], line = @line, col = @col)
|
184
|
+
QuotedTextNode.new(children, line, col)
|
185
|
+
end
|
186
|
+
|
187
|
+
def field(children = [], line = @line, col = @col)
|
188
|
+
FieldNode.new(children, line, col)
|
189
|
+
end
|
190
|
+
|
191
|
+
def delim_field(children = [], line = @line, col = @col)
|
192
|
+
DelimFieldNode.new(children, line, col)
|
193
|
+
end
|
194
|
+
|
195
|
+
def delim_fields(children = [], line = @line, col = @col)
|
196
|
+
DelimFieldsNode.new(children, line, col)
|
197
|
+
end
|
198
|
+
|
199
|
+
def record(children = [], line = @line, col = @col)
|
200
|
+
record = RecordNode.new(children, line, col)
|
201
|
+
value = record.value
|
202
|
+
if defined? @num_fields
|
203
|
+
first = children[0]
|
204
|
+
line = first.line
|
205
|
+
col = first.col
|
206
|
+
if @num_fields > value.length
|
207
|
+
msg = "expected %d more fields on line %d" % [@num_fields - value.length, line]
|
208
|
+
self.warnings.push(MissingFieldsError.new(msg, line, col))
|
209
|
+
elsif @num_fields < value.length
|
210
|
+
msg = "%d extra fields found on line %d, column %d" % [value.length - @num_fields, line, col]
|
211
|
+
self.warnings.push(ExtraFieldsError.new(msg, line, col))
|
212
|
+
end
|
213
|
+
else
|
214
|
+
@num_fields = value.length
|
215
|
+
end
|
216
|
+
|
217
|
+
record
|
218
|
+
end
|
219
|
+
|
220
|
+
def delim_record(children = [], line = @line, col = @col)
|
221
|
+
DelimRecordNode.new(children, line, col)
|
222
|
+
end
|
223
|
+
|
224
|
+
def delim_records(children = [], line = @line, col = @col)
|
225
|
+
DelimRecordsNode.new(children, line, col)
|
226
|
+
end
|
227
|
+
|
228
|
+
def root(children = [], line = @line, col = @col)
|
229
|
+
RootNode.new(children, line, col)
|
230
|
+
end
|
231
|
+
|
232
|
+
def on_error(t, val, stack)
|
233
|
+
#pp t
|
234
|
+
#pp val
|
235
|
+
#pp stack
|
236
|
+
|
237
|
+
# figure out what error we have
|
238
|
+
if t == 0
|
239
|
+
# unexpected EOF
|
240
|
+
type = nil
|
241
|
+
stack.reverse_each do |node|
|
242
|
+
case node
|
243
|
+
when QuotedTextNode
|
244
|
+
type = :unmatched_quote
|
245
|
+
when Node
|
246
|
+
if type == :unmatched_quote && node.token == :QUOTE
|
247
|
+
line = node.line
|
248
|
+
col = node.col
|
249
|
+
@error = UnmatchedQuoteError.new("unmatched quote at line #{line}, column #{col}", line, col)
|
250
|
+
end
|
251
|
+
end
|
252
|
+
end
|
253
|
+
|
254
|
+
if @error.nil?
|
255
|
+
@error = Error.new("unexpected EOF")
|
256
|
+
end
|
257
|
+
elsif val.is_a?(Node) && val.token == :QUOTE
|
258
|
+
line = val.line
|
259
|
+
col = val.col
|
260
|
+
@error = StrayQuoteError.new("stray quote at line #{line}, column #{col}", line, col)
|
261
|
+
end
|
262
|
+
end
|
263
|
+
---- footer
|
264
|
+
end
|
data/lib/eccsv.rb
ADDED
data/test/helper.rb
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'bundler'
|
3
|
+
begin
|
4
|
+
Bundler.setup(:default, :development)
|
5
|
+
rescue Bundler::BundlerError => e
|
6
|
+
$stderr.puts e.message
|
7
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
8
|
+
exit e.status_code
|
9
|
+
end
|
10
|
+
|
11
|
+
require 'test/unit'
|
12
|
+
require 'byebug'
|
13
|
+
|
14
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
15
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
16
|
+
require 'eccsv'
|
data/test/test_parser.rb
ADDED
@@ -0,0 +1,230 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
class TestParser < Test::Unit::TestCase
|
4
|
+
def parse(string)
|
5
|
+
parser = ECCSV::Parser.new
|
6
|
+
parser.parse(string)
|
7
|
+
end
|
8
|
+
|
9
|
+
test "one record with two fields" do
|
10
|
+
assert_equal [['foo', 'bar']], parse("foo,bar")
|
11
|
+
end
|
12
|
+
|
13
|
+
test "one record with one field" do
|
14
|
+
assert_equal [['foo']], parse("foo")
|
15
|
+
end
|
16
|
+
|
17
|
+
test "empty records" do
|
18
|
+
assert_equal [], parse("")
|
19
|
+
end
|
20
|
+
|
21
|
+
test "empty record is skipped by default" do
|
22
|
+
assert_equal [['foo'], ['bar']], parse("foo\n\nbar")
|
23
|
+
end
|
24
|
+
|
25
|
+
test "skipping empty record at end" do
|
26
|
+
assert_equal [['foo'], ['bar']], parse("foo\nbar\n")
|
27
|
+
end
|
28
|
+
|
29
|
+
=begin
|
30
|
+
test "not skipping an empty record" do
|
31
|
+
parser = ECCSV::Parser.new
|
32
|
+
parser.skip_empty_record = false
|
33
|
+
result = parser.parse("foo\n\nbar")
|
34
|
+
assert_equal [['foo'], [], ['bar']], result.value
|
35
|
+
end
|
36
|
+
|
37
|
+
test "not skipping empty record at end" do
|
38
|
+
parser = ECCSV::Parser.new
|
39
|
+
parser.skip_empty_record = false
|
40
|
+
result = parser.parse("foo\nbar\n")
|
41
|
+
assert_equal [['foo'], ['bar'], []], result.value
|
42
|
+
end
|
43
|
+
=end
|
44
|
+
|
45
|
+
test "two records" do
|
46
|
+
assert_equal [['foo', 'bar'], ['baz', 'qux']], parse("foo,bar\nbaz,qux")
|
47
|
+
end
|
48
|
+
|
49
|
+
test "quoted field" do
|
50
|
+
assert_equal [["foo,bar"]], parse(%{"foo,bar"})
|
51
|
+
end
|
52
|
+
|
53
|
+
test "missing closing quote" do
|
54
|
+
parser = ECCSV::Parser.new
|
55
|
+
result = parser.parse(%{foo,bar\n"foo})
|
56
|
+
assert !result
|
57
|
+
assert_kind_of ECCSV::UnmatchedQuoteError, parser.error
|
58
|
+
assert_equal 2, parser.error.line
|
59
|
+
assert_equal 1, parser.error.col
|
60
|
+
end
|
61
|
+
|
62
|
+
test "quote inside unquoted field" do
|
63
|
+
parser = ECCSV::Parser.new
|
64
|
+
result = parser.parse(%{f"oo})
|
65
|
+
assert !result
|
66
|
+
assert_kind_of ECCSV::StrayQuoteError, parser.error
|
67
|
+
assert_equal 1, parser.error.line
|
68
|
+
assert_equal 2, parser.error.col
|
69
|
+
end
|
70
|
+
|
71
|
+
test "missing fields gets warning by default" do
|
72
|
+
parser = ECCSV::Parser.new
|
73
|
+
result = parser.parse(%{foo,bar\nbaz})
|
74
|
+
assert_equal [['foo', 'bar'], ['baz']], result
|
75
|
+
assert_equal 1, parser.warnings.length
|
76
|
+
warning = parser.warnings[0]
|
77
|
+
assert_kind_of ECCSV::MissingFieldsError, warning
|
78
|
+
assert_equal 2, warning.line
|
79
|
+
assert_equal 4, warning.col
|
80
|
+
end
|
81
|
+
|
82
|
+
=begin
|
83
|
+
test "missing fields when disallowed" do
|
84
|
+
parser = ECCSV::Parser.new
|
85
|
+
parser.allow_uneven_records = false
|
86
|
+
result = parser.parse(%{foo,bar\nbaz})
|
87
|
+
assert !result
|
88
|
+
assert_equal :missing_fields, parser.failure_type
|
89
|
+
end
|
90
|
+
=end
|
91
|
+
|
92
|
+
test "extra fields gets warning by default" do
|
93
|
+
parser = ECCSV::Parser.new
|
94
|
+
result = parser.parse(%{foo\nbar,baz})
|
95
|
+
assert_equal [['foo'], ['bar', 'baz']], result
|
96
|
+
assert_equal 1, parser.warnings.length
|
97
|
+
warning = parser.warnings[0]
|
98
|
+
assert_kind_of ECCSV::ExtraFieldsError, warning
|
99
|
+
assert_equal 2, warning.line
|
100
|
+
assert_equal 4, warning.col
|
101
|
+
end
|
102
|
+
|
103
|
+
=begin
|
104
|
+
test "extra fields when disallowed" do
|
105
|
+
parser = ECCSV::Parser.new
|
106
|
+
parser.allow_uneven_records = false
|
107
|
+
result = parser.parse(%{foo\nbar,baz})
|
108
|
+
assert !result
|
109
|
+
assert_equal :extra_fields, parser.failure_type
|
110
|
+
end
|
111
|
+
|
112
|
+
test "single-character custom field separator" do
|
113
|
+
parser = ECCSV::Parser.new
|
114
|
+
parser.field_sep = "\t"
|
115
|
+
result = parser.parse("foo\tbar")
|
116
|
+
assert result, parser.failure_reason
|
117
|
+
assert_equal [['foo', 'bar']], result.value
|
118
|
+
end
|
119
|
+
|
120
|
+
test "multi-character custom field separator" do
|
121
|
+
parser = ECCSV::Parser.new
|
122
|
+
parser.field_sep = "foo"
|
123
|
+
result = parser.parse("bazfoobar")
|
124
|
+
assert result, parser.failure_reason
|
125
|
+
assert_equal [['baz', 'bar']], result.value
|
126
|
+
end
|
127
|
+
|
128
|
+
test "single-character custom record separator" do
|
129
|
+
parser = ECCSV::Parser.new
|
130
|
+
parser.record_sep = "x"
|
131
|
+
result = parser.parse("fooxbar")
|
132
|
+
assert result, parser.failure_reason
|
133
|
+
assert_equal [['foo'], ['bar']], result.value
|
134
|
+
end
|
135
|
+
|
136
|
+
test "multi-character custom record separator" do
|
137
|
+
parser = ECCSV::Parser.new
|
138
|
+
parser.record_sep = "foo"
|
139
|
+
result = parser.parse("barfoobaz")
|
140
|
+
assert result, parser.failure_reason
|
141
|
+
assert_equal [['bar'], ['baz']], result.value
|
142
|
+
end
|
143
|
+
|
144
|
+
test "custom quote character" do
|
145
|
+
parser = ECCSV::Parser.new
|
146
|
+
parser.quote_char = "'"
|
147
|
+
result = parser.parse("'foo,bar'")
|
148
|
+
assert result, parser.failure_reason
|
149
|
+
assert_equal [['foo,bar']], result.value
|
150
|
+
end
|
151
|
+
|
152
|
+
test "parse helper" do
|
153
|
+
result = CsvParser.parse("foo,bar")
|
154
|
+
assert_equal [['foo', 'bar']], result.data
|
155
|
+
end
|
156
|
+
|
157
|
+
test "parse helper with options" do
|
158
|
+
result = CsvParser.parse("foo\tbar", :field_sep => "\t")
|
159
|
+
assert_equal [['foo', 'bar']], result.data
|
160
|
+
end
|
161
|
+
|
162
|
+
test "parse helper with missing closing quote" do
|
163
|
+
error = nil
|
164
|
+
begin
|
165
|
+
CsvParser.parse(%{"foo})
|
166
|
+
rescue CsvParser::MissingQuoteError => error
|
167
|
+
assert_equal 1, error.line
|
168
|
+
assert_equal 1, error.column
|
169
|
+
assert_equal "no ending quote found for quote on line 1, column 1", error.message
|
170
|
+
end
|
171
|
+
assert error
|
172
|
+
end
|
173
|
+
|
174
|
+
test "parse helper with stray quote" do
|
175
|
+
error = nil
|
176
|
+
begin
|
177
|
+
CsvParser.parse(%{f"oo})
|
178
|
+
rescue CsvParser::StrayQuoteError => error
|
179
|
+
assert_equal 1, error.line
|
180
|
+
assert_equal 2, error.column
|
181
|
+
assert_equal "invalid quote found on line 1, column 2", error.message
|
182
|
+
end
|
183
|
+
assert error
|
184
|
+
end
|
185
|
+
|
186
|
+
test "parse helper with allowed short records" do
|
187
|
+
result = CsvParser.parse(%{foo,bar\nbaz})
|
188
|
+
assert_equal 1, result.warnings.length
|
189
|
+
assert_kind_of CsvParser::MissingFieldsError, result.warnings[0]
|
190
|
+
error = result.warnings[0]
|
191
|
+
assert_equal 2, error.line
|
192
|
+
assert_equal 4, error.column
|
193
|
+
assert_equal "record on line 2 had too few fields", error.message
|
194
|
+
end
|
195
|
+
|
196
|
+
test "parse helper with disallowed short records" do
|
197
|
+
error = nil
|
198
|
+
begin
|
199
|
+
CsvParser.parse(%{foo,bar\nbaz}, :allow_uneven_records => false)
|
200
|
+
rescue CsvParser::MissingFieldsError => error
|
201
|
+
assert_equal 2, error.line
|
202
|
+
assert_equal 4, error.column
|
203
|
+
assert_equal "record on line 2 had too few fields", error.message
|
204
|
+
end
|
205
|
+
assert error
|
206
|
+
end
|
207
|
+
|
208
|
+
test "parse helper with allowed long records" do
|
209
|
+
result = CsvParser.parse(%{foo\nbar,baz})
|
210
|
+
assert_equal 1, result.warnings.length
|
211
|
+
assert_kind_of CsvParser::ExtraFieldsError, result.warnings[0]
|
212
|
+
error = result.warnings[0]
|
213
|
+
assert_equal 2, error.line
|
214
|
+
assert_equal 5, error.column
|
215
|
+
assert_equal "record on line 2 had too many fields", error.message
|
216
|
+
end
|
217
|
+
|
218
|
+
test "parse helper with disallowed long records" do
|
219
|
+
error = nil
|
220
|
+
begin
|
221
|
+
CsvParser.parse(%{foo\nbar,baz}, :allow_uneven_records => false)
|
222
|
+
rescue CsvParser::ExtraFieldsError => error
|
223
|
+
assert_equal 2, error.line
|
224
|
+
assert_equal 5, error.column
|
225
|
+
assert_equal "record on line 2 had too many fields", error.message
|
226
|
+
end
|
227
|
+
assert error
|
228
|
+
end
|
229
|
+
=end
|
230
|
+
end
|
metadata
ADDED
@@ -0,0 +1,115 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: eccsv
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Jeremy Stephens
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-10-02 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.3'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.3'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: test-unit
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: racc
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
description: CSV library with advanced error reporting
|
70
|
+
email:
|
71
|
+
- jeremy.f.stephens@vanderbilt.edu
|
72
|
+
executables: []
|
73
|
+
extensions: []
|
74
|
+
extra_rdoc_files: []
|
75
|
+
files:
|
76
|
+
- ".gitignore"
|
77
|
+
- Gemfile
|
78
|
+
- LICENSE.txt
|
79
|
+
- README.md
|
80
|
+
- Rakefile
|
81
|
+
- eccsv.gemspec
|
82
|
+
- lib/eccsv.rb
|
83
|
+
- lib/eccsv/errors.rb
|
84
|
+
- lib/eccsv/parser.rb
|
85
|
+
- lib/eccsv/parser.y
|
86
|
+
- lib/eccsv/version.rb
|
87
|
+
- test/helper.rb
|
88
|
+
- test/test_parser.rb
|
89
|
+
homepage: https://github.com/coupler/eccsv
|
90
|
+
licenses:
|
91
|
+
- MIT
|
92
|
+
metadata: {}
|
93
|
+
post_install_message:
|
94
|
+
rdoc_options: []
|
95
|
+
require_paths:
|
96
|
+
- lib
|
97
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
98
|
+
requirements:
|
99
|
+
- - ">="
|
100
|
+
- !ruby/object:Gem::Version
|
101
|
+
version: '0'
|
102
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
103
|
+
requirements:
|
104
|
+
- - ">="
|
105
|
+
- !ruby/object:Gem::Version
|
106
|
+
version: '0'
|
107
|
+
requirements: []
|
108
|
+
rubyforge_project:
|
109
|
+
rubygems_version: 2.2.2
|
110
|
+
signing_key:
|
111
|
+
specification_version: 4
|
112
|
+
summary: CSV library with advanced error reporting
|
113
|
+
test_files:
|
114
|
+
- test/helper.rb
|
115
|
+
- test/test_parser.rb
|