csv_parser 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +17 -0
- data/Gemfile +6 -0
- data/LICENSE.txt +22 -0
- data/README.md +64 -0
- data/Rakefile +16 -0
- data/csv_parser.gemspec +25 -0
- data/lib/csv_parser.rb +60 -0
- data/lib/csv_parser.treetop +232 -0
- data/lib/csv_parser/csv_parser.rb +1342 -0
- data/lib/csv_parser/parser_extensions.rb +75 -0
- data/lib/csv_parser/result.rb +9 -0
- data/lib/csv_parser/version.rb +3 -0
- data/test/helper.rb +16 -0
- data/test/test_csv_parser.rb +226 -0
- metadata +116 -0
@@ -0,0 +1,75 @@
|
|
1
|
+
module CsvParser
|
2
|
+
module ParserExtensions
|
3
|
+
def field_sep
|
4
|
+
@field_sep ||= ','
|
5
|
+
end
|
6
|
+
|
7
|
+
def field_sep=(str)
|
8
|
+
@field_sep = str
|
9
|
+
end
|
10
|
+
|
11
|
+
def record_sep
|
12
|
+
@record_sep ||= "\n"
|
13
|
+
end
|
14
|
+
|
15
|
+
def record_sep=(str)
|
16
|
+
@record_sep = str
|
17
|
+
end
|
18
|
+
|
19
|
+
def quote_char
|
20
|
+
@quote_char ||= '"'
|
21
|
+
end
|
22
|
+
|
23
|
+
def quote_char=(str)
|
24
|
+
@quote_char = str
|
25
|
+
end
|
26
|
+
|
27
|
+
def allow_empty_record?
|
28
|
+
if defined? @allow_empty_record
|
29
|
+
@allow_empty_record
|
30
|
+
else
|
31
|
+
true
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def allow_empty_record=(bool)
|
36
|
+
@allow_empty_record = bool
|
37
|
+
end
|
38
|
+
|
39
|
+
def skip_empty_record?
|
40
|
+
if defined? @skip_empty_record
|
41
|
+
@skip_empty_record
|
42
|
+
else
|
43
|
+
true
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def skip_empty_record=(bool)
|
48
|
+
@skip_empty_record = bool
|
49
|
+
end
|
50
|
+
|
51
|
+
def allow_uneven_records?
|
52
|
+
if defined? @allow_uneven_records
|
53
|
+
@allow_uneven_records
|
54
|
+
else
|
55
|
+
true
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def allow_uneven_records=(bool)
|
60
|
+
@allow_uneven_records = bool
|
61
|
+
end
|
62
|
+
|
63
|
+
def failure_type
|
64
|
+
@failure_type
|
65
|
+
end
|
66
|
+
|
67
|
+
def failure_index
|
68
|
+
@failure_index || super
|
69
|
+
end
|
70
|
+
|
71
|
+
def warnings
|
72
|
+
@warnings ||= []
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
data/test/helper.rb
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'bundler'
|
3
|
+
begin
|
4
|
+
Bundler.setup(:default, :development)
|
5
|
+
rescue Bundler::BundlerError => e
|
6
|
+
$stderr.puts e.message
|
7
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
8
|
+
exit e.status_code
|
9
|
+
end
|
10
|
+
|
11
|
+
require 'test/unit'
|
12
|
+
require 'byebug'
|
13
|
+
|
14
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
15
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
16
|
+
require 'csv_parser'
|
@@ -0,0 +1,226 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
class TestCsvParser < Test::Unit::TestCase
|
4
|
+
def parse(string)
|
5
|
+
parser = CsvParser::CsvParser.new
|
6
|
+
[parser.parse(string), parser.failure_reason]
|
7
|
+
end
|
8
|
+
|
9
|
+
test "one record with two fields" do
|
10
|
+
result, error = parse("foo,bar")
|
11
|
+
assert result, error
|
12
|
+
assert_equal [['foo', 'bar']], result.value
|
13
|
+
end
|
14
|
+
|
15
|
+
test "one record with one field" do
|
16
|
+
result, error = parse("foo")
|
17
|
+
assert result, error
|
18
|
+
assert_equal [['foo']], result.value
|
19
|
+
end
|
20
|
+
|
21
|
+
test "empty records" do
|
22
|
+
result, error = parse("")
|
23
|
+
assert result, error
|
24
|
+
assert_equal [], result.value
|
25
|
+
end
|
26
|
+
|
27
|
+
test "empty record is skipped by default" do
|
28
|
+
result, error = parse("foo\n\nbar")
|
29
|
+
assert result, error
|
30
|
+
assert_equal [['foo'], ['bar']], result.value
|
31
|
+
end
|
32
|
+
|
33
|
+
test "skipping empty record at end" do
|
34
|
+
result, error = parse("foo\nbar\n")
|
35
|
+
assert result, error
|
36
|
+
assert_equal [['foo'], ['bar']], result.value
|
37
|
+
end
|
38
|
+
|
39
|
+
test "not skipping an empty record" do
|
40
|
+
parser = CsvParser::CsvParser.new
|
41
|
+
parser.skip_empty_record = false
|
42
|
+
result = parser.parse("foo\n\nbar")
|
43
|
+
assert_equal [['foo'], [], ['bar']], result.value
|
44
|
+
end
|
45
|
+
|
46
|
+
test "not skipping empty record at end" do
|
47
|
+
parser = CsvParser::CsvParser.new
|
48
|
+
parser.skip_empty_record = false
|
49
|
+
result = parser.parse("foo\nbar\n")
|
50
|
+
assert_equal [['foo'], ['bar'], []], result.value
|
51
|
+
end
|
52
|
+
|
53
|
+
test "two records" do
|
54
|
+
result, error = parse("foo,bar\nbaz,qux")
|
55
|
+
assert result, error
|
56
|
+
assert_equal [['foo', 'bar'], ['baz', 'qux']], result.value
|
57
|
+
end
|
58
|
+
|
59
|
+
test "quoted field" do
|
60
|
+
result, error = parse(%{"foo,bar"})
|
61
|
+
assert result, error
|
62
|
+
assert_equal [["foo,bar"]], result.value
|
63
|
+
end
|
64
|
+
|
65
|
+
test "missing closing quote" do
|
66
|
+
parser = CsvParser::CsvParser.new
|
67
|
+
result = parser.parse(%{"foo})
|
68
|
+
assert !result
|
69
|
+
assert_equal :missing_quote, parser.failure_type
|
70
|
+
end
|
71
|
+
|
72
|
+
test "quote inside unquoted field" do
|
73
|
+
parser = CsvParser::CsvParser.new
|
74
|
+
result = parser.parse(%{f"oo})
|
75
|
+
assert !result
|
76
|
+
assert_equal :stray_quote, parser.failure_type
|
77
|
+
end
|
78
|
+
|
79
|
+
test "missing fields gets warning by default" do
|
80
|
+
parser = CsvParser::CsvParser.new
|
81
|
+
result = parser.parse(%{foo,bar\nbaz})
|
82
|
+
assert result, parser.failure_reason
|
83
|
+
assert_equal [[:missing_fields, 2, 4]], parser.warnings
|
84
|
+
end
|
85
|
+
|
86
|
+
test "missing fields when disallowed" do
|
87
|
+
parser = CsvParser::CsvParser.new
|
88
|
+
parser.allow_uneven_records = false
|
89
|
+
result = parser.parse(%{foo,bar\nbaz})
|
90
|
+
assert !result
|
91
|
+
assert_equal :missing_fields, parser.failure_type
|
92
|
+
end
|
93
|
+
|
94
|
+
test "extra fields gets warning by default" do
|
95
|
+
parser = CsvParser::CsvParser.new
|
96
|
+
result = parser.parse(%{foo\nbar,baz})
|
97
|
+
assert result, parser.failure_reason
|
98
|
+
assert_equal [[:extra_fields, 2, 5]], parser.warnings
|
99
|
+
end
|
100
|
+
|
101
|
+
test "extra fields when disallowed" do
|
102
|
+
parser = CsvParser::CsvParser.new
|
103
|
+
parser.allow_uneven_records = false
|
104
|
+
result = parser.parse(%{foo\nbar,baz})
|
105
|
+
assert !result
|
106
|
+
assert_equal :extra_fields, parser.failure_type
|
107
|
+
end
|
108
|
+
|
109
|
+
test "single-character custom field separator" do
|
110
|
+
parser = CsvParser::CsvParser.new
|
111
|
+
parser.field_sep = "\t"
|
112
|
+
result = parser.parse("foo\tbar")
|
113
|
+
assert result, parser.failure_reason
|
114
|
+
assert_equal [['foo', 'bar']], result.value
|
115
|
+
end
|
116
|
+
|
117
|
+
test "multi-character custom field separator" do
|
118
|
+
parser = CsvParser::CsvParser.new
|
119
|
+
parser.field_sep = "foo"
|
120
|
+
result = parser.parse("bazfoobar")
|
121
|
+
assert result, parser.failure_reason
|
122
|
+
assert_equal [['baz', 'bar']], result.value
|
123
|
+
end
|
124
|
+
|
125
|
+
test "single-character custom record separator" do
|
126
|
+
parser = CsvParser::CsvParser.new
|
127
|
+
parser.record_sep = "x"
|
128
|
+
result = parser.parse("fooxbar")
|
129
|
+
assert result, parser.failure_reason
|
130
|
+
assert_equal [['foo'], ['bar']], result.value
|
131
|
+
end
|
132
|
+
|
133
|
+
test "multi-character custom record separator" do
|
134
|
+
parser = CsvParser::CsvParser.new
|
135
|
+
parser.record_sep = "foo"
|
136
|
+
result = parser.parse("barfoobaz")
|
137
|
+
assert result, parser.failure_reason
|
138
|
+
assert_equal [['bar'], ['baz']], result.value
|
139
|
+
end
|
140
|
+
|
141
|
+
test "custom quote character" do
|
142
|
+
parser = CsvParser::CsvParser.new
|
143
|
+
parser.quote_char = "'"
|
144
|
+
result = parser.parse("'foo,bar'")
|
145
|
+
assert result, parser.failure_reason
|
146
|
+
assert_equal [['foo,bar']], result.value
|
147
|
+
end
|
148
|
+
|
149
|
+
test "parse helper" do
|
150
|
+
result = CsvParser.parse("foo,bar")
|
151
|
+
assert_equal [['foo', 'bar']], result.data
|
152
|
+
end
|
153
|
+
|
154
|
+
test "parse helper with options" do
|
155
|
+
result = CsvParser.parse("foo\tbar", :field_sep => "\t")
|
156
|
+
assert_equal [['foo', 'bar']], result.data
|
157
|
+
end
|
158
|
+
|
159
|
+
test "parse helper with missing closing quote" do
|
160
|
+
error = nil
|
161
|
+
begin
|
162
|
+
CsvParser.parse(%{"foo})
|
163
|
+
rescue CsvParser::MissingQuoteError => error
|
164
|
+
assert_equal 1, error.line
|
165
|
+
assert_equal 1, error.column
|
166
|
+
assert_equal "no ending quote found for quote on line 1, column 1", error.message
|
167
|
+
end
|
168
|
+
assert error
|
169
|
+
end
|
170
|
+
|
171
|
+
test "parse helper with stray quote" do
|
172
|
+
error = nil
|
173
|
+
begin
|
174
|
+
CsvParser.parse(%{f"oo})
|
175
|
+
rescue CsvParser::StrayQuoteError => error
|
176
|
+
assert_equal 1, error.line
|
177
|
+
assert_equal 2, error.column
|
178
|
+
assert_equal "invalid quote found on line 1, column 2", error.message
|
179
|
+
end
|
180
|
+
assert error
|
181
|
+
end
|
182
|
+
|
183
|
+
test "parse helper with allowed short records" do
|
184
|
+
result = CsvParser.parse(%{foo,bar\nbaz})
|
185
|
+
assert_equal 1, result.warnings.length
|
186
|
+
assert_kind_of CsvParser::MissingFieldsError, result.warnings[0]
|
187
|
+
error = result.warnings[0]
|
188
|
+
assert_equal 2, error.line
|
189
|
+
assert_equal 4, error.column
|
190
|
+
assert_equal "record on line 2 had too few fields", error.message
|
191
|
+
end
|
192
|
+
|
193
|
+
test "parse helper with disallowed short records" do
|
194
|
+
error = nil
|
195
|
+
begin
|
196
|
+
CsvParser.parse(%{foo,bar\nbaz}, :allow_uneven_records => false)
|
197
|
+
rescue CsvParser::MissingFieldsError => error
|
198
|
+
assert_equal 2, error.line
|
199
|
+
assert_equal 4, error.column
|
200
|
+
assert_equal "record on line 2 had too few fields", error.message
|
201
|
+
end
|
202
|
+
assert error
|
203
|
+
end
|
204
|
+
|
205
|
+
test "parse helper with allowed long records" do
|
206
|
+
result = CsvParser.parse(%{foo\nbar,baz})
|
207
|
+
assert_equal 1, result.warnings.length
|
208
|
+
assert_kind_of CsvParser::ExtraFieldsError, result.warnings[0]
|
209
|
+
error = result.warnings[0]
|
210
|
+
assert_equal 2, error.line
|
211
|
+
assert_equal 5, error.column
|
212
|
+
assert_equal "record on line 2 had too many fields", error.message
|
213
|
+
end
|
214
|
+
|
215
|
+
test "parse helper with disallowed long records" do
|
216
|
+
error = nil
|
217
|
+
begin
|
218
|
+
CsvParser.parse(%{foo\nbar,baz}, :allow_uneven_records => false)
|
219
|
+
rescue CsvParser::ExtraFieldsError => error
|
220
|
+
assert_equal 2, error.line
|
221
|
+
assert_equal 5, error.column
|
222
|
+
assert_equal "record on line 2 had too many fields", error.message
|
223
|
+
end
|
224
|
+
assert error
|
225
|
+
end
|
226
|
+
end
|
metadata
ADDED
@@ -0,0 +1,116 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: csv_parser
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Jeremy Stephens
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2013-11-14 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: treetop
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - '>='
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - '>='
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: bundler
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ~>
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '1.3'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ~>
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '1.3'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rake
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - '>='
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: test-unit
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - '>='
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
description: CSV parser with advanced error reporting
|
70
|
+
email:
|
71
|
+
- jeremy.f.stephens@vanderbilt.edu
|
72
|
+
executables: []
|
73
|
+
extensions: []
|
74
|
+
extra_rdoc_files: []
|
75
|
+
files:
|
76
|
+
- .gitignore
|
77
|
+
- Gemfile
|
78
|
+
- LICENSE.txt
|
79
|
+
- README.md
|
80
|
+
- Rakefile
|
81
|
+
- csv_parser.gemspec
|
82
|
+
- lib/csv_parser.rb
|
83
|
+
- lib/csv_parser.treetop
|
84
|
+
- lib/csv_parser/csv_parser.rb
|
85
|
+
- lib/csv_parser/parser_extensions.rb
|
86
|
+
- lib/csv_parser/result.rb
|
87
|
+
- lib/csv_parser/version.rb
|
88
|
+
- test/helper.rb
|
89
|
+
- test/test_csv_parser.rb
|
90
|
+
homepage: https://github.com/coupler/csv_parser
|
91
|
+
licenses:
|
92
|
+
- MIT
|
93
|
+
metadata: {}
|
94
|
+
post_install_message:
|
95
|
+
rdoc_options: []
|
96
|
+
require_paths:
|
97
|
+
- lib
|
98
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
99
|
+
requirements:
|
100
|
+
- - '>='
|
101
|
+
- !ruby/object:Gem::Version
|
102
|
+
version: '0'
|
103
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
104
|
+
requirements:
|
105
|
+
- - '>='
|
106
|
+
- !ruby/object:Gem::Version
|
107
|
+
version: '0'
|
108
|
+
requirements: []
|
109
|
+
rubyforge_project:
|
110
|
+
rubygems_version: 2.0.3
|
111
|
+
signing_key:
|
112
|
+
specification_version: 4
|
113
|
+
summary: CSV parser with advanced error reporting
|
114
|
+
test_files:
|
115
|
+
- test/helper.rb
|
116
|
+
- test/test_csv_parser.rb
|