csv_parser 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +17 -0
- data/Gemfile +6 -0
- data/LICENSE.txt +22 -0
- data/README.md +64 -0
- data/Rakefile +16 -0
- data/csv_parser.gemspec +25 -0
- data/lib/csv_parser.rb +60 -0
- data/lib/csv_parser.treetop +232 -0
- data/lib/csv_parser/csv_parser.rb +1342 -0
- data/lib/csv_parser/parser_extensions.rb +75 -0
- data/lib/csv_parser/result.rb +9 -0
- data/lib/csv_parser/version.rb +3 -0
- data/test/helper.rb +16 -0
- data/test/test_csv_parser.rb +226 -0
- metadata +116 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 0182c5c8d43682eaf967338eab5d249c5036ed54
|
4
|
+
data.tar.gz: 249c47d4015daba1d0def6019449d400b1c8bb2c
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 29835c6d4346804386d2882ee8141207a24eef5fc1fd4163b7bb01d0582b63fa091c9968e934e6dff4a425763393f953a40a0e787054c9a69b75c88a056323d0
|
7
|
+
data.tar.gz: 2ce0c0176e2611e7e569c295cf2b511d2266cc52bbb371ab23422186b3b1a11a023508649d922fc7ee75c237e6567300fb7d002b776a38fcf358c70b752e5519
|
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2013 Vanderbilt University
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
# CsvParser
|
2
|
+
|
3
|
+
CsvParser is a CSV parser that focuses on identifying errors.
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
gem 'csv_parser'
|
10
|
+
|
11
|
+
And then execute:
|
12
|
+
|
13
|
+
$ bundle
|
14
|
+
|
15
|
+
Or install it yourself as:
|
16
|
+
|
17
|
+
$ gem install csv_parser
|
18
|
+
|
19
|
+
## Usage
|
20
|
+
|
21
|
+
CsvParser's goal is to give you descriptive error messages.
|
22
|
+
|
23
|
+
### Error types
|
24
|
+
|
25
|
+
* missing closing quote (`CsvParser::MissingQuoteError`)
|
26
|
+
* quote in the wrong place (`CsvParser::StrayQuoteError`)
|
27
|
+
* rows with not enough fields (`CsvParser::MissingFieldsError`)
|
28
|
+
* rows with too many fields (`CsvParser::ExtraFieldsError`)
|
29
|
+
|
30
|
+
### Options
|
31
|
+
|
32
|
+
You can pass in an options hash to the `CsvParser.parse` method
|
33
|
+
that contains one or more of the following options:
|
34
|
+
|
35
|
+
* `:field_sep` - specify field separator (default is `","`)
|
36
|
+
* `:record_sep` - specify record separator (default is `"\n"`)
|
37
|
+
* `:quote_char` - specify quote character (default is `"\""`)
|
38
|
+
* `:allow_empty_record` - specify whether empty records are allowed (default is `true`)
|
39
|
+
* `:skip_empty_record` - specify whether empty records are skipped (default is `true`)
|
40
|
+
* `:allow_uneven_records` - specify whether records with different field lengths are allowed (default is `true`)
|
41
|
+
|
42
|
+
### Example
|
43
|
+
|
44
|
+
```ruby
|
45
|
+
require 'csv_parser'
|
46
|
+
|
47
|
+
data = <<EOF
|
48
|
+
foo,"bar
|
49
|
+
baz,quz
|
50
|
+
EOF
|
51
|
+
begin
|
52
|
+
result = CsvParser.parse(data)
|
53
|
+
rescue CsvParser::Error => e
|
54
|
+
# e is a CsvParser::MissingQuoteError
|
55
|
+
end
|
56
|
+
```
|
57
|
+
|
58
|
+
## Contributing
|
59
|
+
|
60
|
+
1. Fork it
|
61
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
62
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
63
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
64
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
2
|
+
require "rake/testtask"
|
3
|
+
|
4
|
+
Rake::TestTask.new do |t|
|
5
|
+
t.libs << "test"
|
6
|
+
t.pattern = 'test/**/test*.rb'
|
7
|
+
end
|
8
|
+
task :test => :treetop
|
9
|
+
task :default => :test
|
10
|
+
|
11
|
+
desc "Compile treetop grammar"
|
12
|
+
task :treetop => "lib/csv_parser/csv_parser.rb"
|
13
|
+
|
14
|
+
file "lib/csv_parser/csv_parser.rb" => "lib/csv_parser.treetop" do
|
15
|
+
system("tt lib/csv_parser.treetop -o lib/csv_parser/csv_parser.rb")
|
16
|
+
end
|
data/csv_parser.gemspec
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'csv_parser/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "csv_parser"
|
8
|
+
spec.version = CsvParser::VERSION
|
9
|
+
spec.authors = ["Jeremy Stephens"]
|
10
|
+
spec.email = ["jeremy.f.stephens@vanderbilt.edu"]
|
11
|
+
spec.description = %q{CSV parser with advanced error reporting}
|
12
|
+
spec.summary = %q{CSV parser with advanced error reporting}
|
13
|
+
spec.homepage = "https://github.com/coupler/csv_parser"
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files`.split($/)
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_dependency 'treetop'
|
22
|
+
spec.add_development_dependency "bundler", "~> 1.3"
|
23
|
+
spec.add_development_dependency "rake"
|
24
|
+
spec.add_development_dependency "test-unit"
|
25
|
+
end
|
data/lib/csv_parser.rb
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
require 'treetop'
|
2
|
+
|
3
|
+
require 'csv_parser/version'
|
4
|
+
require 'csv_parser/parser_extensions'
|
5
|
+
require 'csv_parser/result'
|
6
|
+
|
7
|
+
#Treetop.load(File.join(File.dirname(__FILE__), 'csv_parser.treetop'))
|
8
|
+
require 'csv_parser/csv_parser'
|
9
|
+
|
10
|
+
module CsvParser
|
11
|
+
class Error < Exception
|
12
|
+
attr_reader :line, :column
|
13
|
+
|
14
|
+
def initialize(msg, line, column)
|
15
|
+
super(msg)
|
16
|
+
@line = line
|
17
|
+
@column = column
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
class MissingQuoteError < Error; end
|
22
|
+
class StrayQuoteError < Error; end
|
23
|
+
class MissingFieldsError < Error; end
|
24
|
+
class ExtraFieldsError < Error; end
|
25
|
+
|
26
|
+
def self.parse(data, options = {})
|
27
|
+
parser = ::CsvParser::CsvParser.new
|
28
|
+
options.each_pair do |key, value|
|
29
|
+
parser.send("#{key}=", value)
|
30
|
+
end
|
31
|
+
result = parser.parse(data)
|
32
|
+
if result
|
33
|
+
warnings = parser.warnings.collect do |(desc, line, col)|
|
34
|
+
error(desc, line, col)
|
35
|
+
end
|
36
|
+
Result.new(result.value, warnings)
|
37
|
+
else
|
38
|
+
raise error(parser.failure_type, parser.failure_line,
|
39
|
+
parser.failure_column, parser.failure_reason)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def self.error(type, line, column, msg = nil)
|
44
|
+
klass, msg =
|
45
|
+
case type
|
46
|
+
when :missing_quote
|
47
|
+
[MissingQuoteError, "no ending quote found for quote on line #{line}, column #{column}"]
|
48
|
+
when :stray_quote
|
49
|
+
[StrayQuoteError, "invalid quote found on line #{line}, column #{column}"]
|
50
|
+
when :missing_fields
|
51
|
+
[MissingFieldsError, "record on line #{line} had too few fields"]
|
52
|
+
when :extra_fields
|
53
|
+
[ExtraFieldsError, "record on line #{line} had too many fields"]
|
54
|
+
else
|
55
|
+
Error
|
56
|
+
end
|
57
|
+
|
58
|
+
klass.new(msg, line, column)
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,232 @@
|
|
1
|
+
module CsvParser
|
2
|
+
grammar Csv
|
3
|
+
include ParserExtensions
|
4
|
+
|
5
|
+
rule records
|
6
|
+
non_empty_records / empty_records
|
7
|
+
end
|
8
|
+
|
9
|
+
rule non_empty_records
|
10
|
+
first_record
|
11
|
+
other_records
|
12
|
+
{
|
13
|
+
def value
|
14
|
+
arr = [first_record.value]
|
15
|
+
rest = other_records.value
|
16
|
+
if rest
|
17
|
+
arr.push(*rest)
|
18
|
+
end
|
19
|
+
arr
|
20
|
+
end
|
21
|
+
}
|
22
|
+
end
|
23
|
+
|
24
|
+
rule empty_records
|
25
|
+
''
|
26
|
+
{
|
27
|
+
def value
|
28
|
+
[]
|
29
|
+
end
|
30
|
+
}
|
31
|
+
end
|
32
|
+
|
33
|
+
rule first_record
|
34
|
+
'' &{ |s| @first_record = true; true }
|
35
|
+
non_empty_record
|
36
|
+
&{ |s| @first_record_length = @record_length; true }
|
37
|
+
{
|
38
|
+
def value
|
39
|
+
non_empty_record.value
|
40
|
+
end
|
41
|
+
}
|
42
|
+
end
|
43
|
+
|
44
|
+
rule other_records
|
45
|
+
'' &{ |s| @first_record = false; true }
|
46
|
+
(
|
47
|
+
(
|
48
|
+
&{ |s| skip_empty_record? }
|
49
|
+
(
|
50
|
+
record_sep
|
51
|
+
( empty_record record_sep )*
|
52
|
+
non_empty_record
|
53
|
+
)*
|
54
|
+
(
|
55
|
+
&{ |s| skip_empty_record? }
|
56
|
+
( record_sep empty_record )+
|
57
|
+
)?
|
58
|
+
{
|
59
|
+
def value
|
60
|
+
val = elements[1].elements.collect { |elt| elt.non_empty_record.value }
|
61
|
+
val.empty? ? nil : val
|
62
|
+
end
|
63
|
+
}
|
64
|
+
)
|
65
|
+
/
|
66
|
+
(
|
67
|
+
&{ |s| !skip_empty_record? }
|
68
|
+
( record_sep record )*
|
69
|
+
{
|
70
|
+
def value
|
71
|
+
val = elements[1].elements.collect { |elt| elt.record.value }
|
72
|
+
val.empty? ? nil : val
|
73
|
+
end
|
74
|
+
}
|
75
|
+
)
|
76
|
+
)
|
77
|
+
{
|
78
|
+
def value
|
79
|
+
elements[2].value
|
80
|
+
end
|
81
|
+
}
|
82
|
+
end
|
83
|
+
|
84
|
+
rule record
|
85
|
+
non_empty_record / empty_record
|
86
|
+
end
|
87
|
+
|
88
|
+
rule non_empty_record
|
89
|
+
first:field
|
90
|
+
&{ |s| @record_length = 1; @warning = nil; true }
|
91
|
+
rest:(
|
92
|
+
&{ |s|
|
93
|
+
if @first_record || @record_length < @first_record_length
|
94
|
+
true
|
95
|
+
else
|
96
|
+
if allow_uneven_records?
|
97
|
+
@warning ||= [:extra_fields, input.line_of(index + 1), input.column_of(index + 1)]
|
98
|
+
true
|
99
|
+
else
|
100
|
+
@failure_type = :extra_fields
|
101
|
+
false
|
102
|
+
end
|
103
|
+
end
|
104
|
+
}
|
105
|
+
field_sep
|
106
|
+
field
|
107
|
+
&{ |s| @record_length += 1; true }
|
108
|
+
)*
|
109
|
+
&{ |s|
|
110
|
+
if @first_record || @record_length >= @first_record_length
|
111
|
+
if @warning
|
112
|
+
warnings << @warning
|
113
|
+
end
|
114
|
+
true
|
115
|
+
else
|
116
|
+
if allow_uneven_records?
|
117
|
+
warnings << [:missing_fields, input.line_of(index), input.column_of(index)]
|
118
|
+
true
|
119
|
+
else
|
120
|
+
@failure_type = :missing_fields
|
121
|
+
false
|
122
|
+
end
|
123
|
+
end
|
124
|
+
}
|
125
|
+
{
|
126
|
+
def value
|
127
|
+
arr = [first.value]
|
128
|
+
rest.elements.each do |elt|
|
129
|
+
arr << elt.field.value
|
130
|
+
end
|
131
|
+
arr
|
132
|
+
end
|
133
|
+
}
|
134
|
+
end
|
135
|
+
|
136
|
+
rule empty_record
|
137
|
+
''
|
138
|
+
&{ |s|
|
139
|
+
if allow_empty_record?
|
140
|
+
true
|
141
|
+
else
|
142
|
+
@failure_type = :missing_fields
|
143
|
+
false
|
144
|
+
end
|
145
|
+
}
|
146
|
+
{
|
147
|
+
def value
|
148
|
+
[]
|
149
|
+
end
|
150
|
+
}
|
151
|
+
end
|
152
|
+
|
153
|
+
rule field
|
154
|
+
unquoted_text
|
155
|
+
{
|
156
|
+
def value
|
157
|
+
elements.map(&:text_value).join
|
158
|
+
end
|
159
|
+
}
|
160
|
+
/
|
161
|
+
quoted_text
|
162
|
+
{
|
163
|
+
def value
|
164
|
+
elements[1..-2].map(&:text_value).join
|
165
|
+
end
|
166
|
+
}
|
167
|
+
end
|
168
|
+
|
169
|
+
rule quoted_text
|
170
|
+
quote
|
171
|
+
( !quote . )+
|
172
|
+
(
|
173
|
+
quote
|
174
|
+
/
|
175
|
+
'' !{ |s| @failure_type = :missing_quote; @failure_index = start_index; true }
|
176
|
+
)
|
177
|
+
end
|
178
|
+
|
179
|
+
rule unquoted_text
|
180
|
+
(
|
181
|
+
!field_sep
|
182
|
+
!record_sep
|
183
|
+
(
|
184
|
+
!quote
|
185
|
+
/
|
186
|
+
'' !{ |s| @failure_type = :stray_quote; true }
|
187
|
+
)
|
188
|
+
.
|
189
|
+
)+
|
190
|
+
end
|
191
|
+
|
192
|
+
rule field_sep
|
193
|
+
&{ |s| @field_sep_index = 0; true }
|
194
|
+
(
|
195
|
+
!record_sep
|
196
|
+
!quote
|
197
|
+
.
|
198
|
+
&{ |s|
|
199
|
+
if @field_sep_index < field_sep.length &&
|
200
|
+
s[2].text_value == field_sep[@field_sep_index]
|
201
|
+
@field_sep_index += 1
|
202
|
+
true
|
203
|
+
else
|
204
|
+
false
|
205
|
+
end
|
206
|
+
}
|
207
|
+
)+
|
208
|
+
&{ |s| s.map(&:text_value).join == field_sep }
|
209
|
+
end
|
210
|
+
|
211
|
+
rule record_sep
|
212
|
+
&{ |s| @record_sep_index = 0; true }
|
213
|
+
(
|
214
|
+
.
|
215
|
+
&{ |s|
|
216
|
+
if @record_sep_index < record_sep.length &&
|
217
|
+
s[0].text_value == record_sep[@record_sep_index]
|
218
|
+
@record_sep_index += 1
|
219
|
+
true
|
220
|
+
else
|
221
|
+
false
|
222
|
+
end
|
223
|
+
}
|
224
|
+
)+
|
225
|
+
&{ |s| s.map(&:text_value).join == record_sep }
|
226
|
+
end
|
227
|
+
|
228
|
+
rule quote
|
229
|
+
!"\\" . &{ |s| s[1].text_value[0] == quote_char[0] }
|
230
|
+
end
|
231
|
+
end
|
232
|
+
end
|