csv_parser 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +17 -0
- data/Gemfile +6 -0
- data/LICENSE.txt +22 -0
- data/README.md +64 -0
- data/Rakefile +16 -0
- data/csv_parser.gemspec +25 -0
- data/lib/csv_parser.rb +60 -0
- data/lib/csv_parser.treetop +232 -0
- data/lib/csv_parser/csv_parser.rb +1342 -0
- data/lib/csv_parser/parser_extensions.rb +75 -0
- data/lib/csv_parser/result.rb +9 -0
- data/lib/csv_parser/version.rb +3 -0
- data/test/helper.rb +16 -0
- data/test/test_csv_parser.rb +226 -0
- metadata +116 -0
@@ -0,0 +1,75 @@
|
|
1
|
+
module CsvParser
|
2
|
+
module ParserExtensions
|
3
|
+
def field_sep
|
4
|
+
@field_sep ||= ','
|
5
|
+
end
|
6
|
+
|
7
|
+
def field_sep=(str)
|
8
|
+
@field_sep = str
|
9
|
+
end
|
10
|
+
|
11
|
+
def record_sep
|
12
|
+
@record_sep ||= "\n"
|
13
|
+
end
|
14
|
+
|
15
|
+
def record_sep=(str)
|
16
|
+
@record_sep = str
|
17
|
+
end
|
18
|
+
|
19
|
+
def quote_char
|
20
|
+
@quote_char ||= '"'
|
21
|
+
end
|
22
|
+
|
23
|
+
def quote_char=(str)
|
24
|
+
@quote_char = str
|
25
|
+
end
|
26
|
+
|
27
|
+
def allow_empty_record?
|
28
|
+
if defined? @allow_empty_record
|
29
|
+
@allow_empty_record
|
30
|
+
else
|
31
|
+
true
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def allow_empty_record=(bool)
|
36
|
+
@allow_empty_record = bool
|
37
|
+
end
|
38
|
+
|
39
|
+
def skip_empty_record?
|
40
|
+
if defined? @skip_empty_record
|
41
|
+
@skip_empty_record
|
42
|
+
else
|
43
|
+
true
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def skip_empty_record=(bool)
|
48
|
+
@skip_empty_record = bool
|
49
|
+
end
|
50
|
+
|
51
|
+
def allow_uneven_records?
|
52
|
+
if defined? @allow_uneven_records
|
53
|
+
@allow_uneven_records
|
54
|
+
else
|
55
|
+
true
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def allow_uneven_records=(bool)
|
60
|
+
@allow_uneven_records = bool
|
61
|
+
end
|
62
|
+
|
63
|
+
def failure_type
|
64
|
+
@failure_type
|
65
|
+
end
|
66
|
+
|
67
|
+
def failure_index
|
68
|
+
@failure_index || super
|
69
|
+
end
|
70
|
+
|
71
|
+
def warnings
|
72
|
+
@warnings ||= []
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
data/test/helper.rb
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'bundler'
|
3
|
+
begin
|
4
|
+
Bundler.setup(:default, :development)
|
5
|
+
rescue Bundler::BundlerError => e
|
6
|
+
$stderr.puts e.message
|
7
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
8
|
+
exit e.status_code
|
9
|
+
end
|
10
|
+
|
11
|
+
require 'test/unit'
|
12
|
+
require 'byebug'
|
13
|
+
|
14
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
15
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
16
|
+
require 'csv_parser'
|
@@ -0,0 +1,226 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
class TestCsvParser < Test::Unit::TestCase
|
4
|
+
def parse(string)
|
5
|
+
parser = CsvParser::CsvParser.new
|
6
|
+
[parser.parse(string), parser.failure_reason]
|
7
|
+
end
|
8
|
+
|
9
|
+
test "one record with two fields" do
|
10
|
+
result, error = parse("foo,bar")
|
11
|
+
assert result, error
|
12
|
+
assert_equal [['foo', 'bar']], result.value
|
13
|
+
end
|
14
|
+
|
15
|
+
test "one record with one field" do
|
16
|
+
result, error = parse("foo")
|
17
|
+
assert result, error
|
18
|
+
assert_equal [['foo']], result.value
|
19
|
+
end
|
20
|
+
|
21
|
+
test "empty records" do
|
22
|
+
result, error = parse("")
|
23
|
+
assert result, error
|
24
|
+
assert_equal [], result.value
|
25
|
+
end
|
26
|
+
|
27
|
+
test "empty record is skipped by default" do
|
28
|
+
result, error = parse("foo\n\nbar")
|
29
|
+
assert result, error
|
30
|
+
assert_equal [['foo'], ['bar']], result.value
|
31
|
+
end
|
32
|
+
|
33
|
+
test "skipping empty record at end" do
|
34
|
+
result, error = parse("foo\nbar\n")
|
35
|
+
assert result, error
|
36
|
+
assert_equal [['foo'], ['bar']], result.value
|
37
|
+
end
|
38
|
+
|
39
|
+
test "not skipping an empty record" do
|
40
|
+
parser = CsvParser::CsvParser.new
|
41
|
+
parser.skip_empty_record = false
|
42
|
+
result = parser.parse("foo\n\nbar")
|
43
|
+
assert_equal [['foo'], [], ['bar']], result.value
|
44
|
+
end
|
45
|
+
|
46
|
+
test "not skipping empty record at end" do
|
47
|
+
parser = CsvParser::CsvParser.new
|
48
|
+
parser.skip_empty_record = false
|
49
|
+
result = parser.parse("foo\nbar\n")
|
50
|
+
assert_equal [['foo'], ['bar'], []], result.value
|
51
|
+
end
|
52
|
+
|
53
|
+
test "two records" do
|
54
|
+
result, error = parse("foo,bar\nbaz,qux")
|
55
|
+
assert result, error
|
56
|
+
assert_equal [['foo', 'bar'], ['baz', 'qux']], result.value
|
57
|
+
end
|
58
|
+
|
59
|
+
test "quoted field" do
|
60
|
+
result, error = parse(%{"foo,bar"})
|
61
|
+
assert result, error
|
62
|
+
assert_equal [["foo,bar"]], result.value
|
63
|
+
end
|
64
|
+
|
65
|
+
test "missing closing quote" do
|
66
|
+
parser = CsvParser::CsvParser.new
|
67
|
+
result = parser.parse(%{"foo})
|
68
|
+
assert !result
|
69
|
+
assert_equal :missing_quote, parser.failure_type
|
70
|
+
end
|
71
|
+
|
72
|
+
test "quote inside unquoted field" do
|
73
|
+
parser = CsvParser::CsvParser.new
|
74
|
+
result = parser.parse(%{f"oo})
|
75
|
+
assert !result
|
76
|
+
assert_equal :stray_quote, parser.failure_type
|
77
|
+
end
|
78
|
+
|
79
|
+
test "missing fields gets warning by default" do
|
80
|
+
parser = CsvParser::CsvParser.new
|
81
|
+
result = parser.parse(%{foo,bar\nbaz})
|
82
|
+
assert result, parser.failure_reason
|
83
|
+
assert_equal [[:missing_fields, 2, 4]], parser.warnings
|
84
|
+
end
|
85
|
+
|
86
|
+
test "missing fields when disallowed" do
|
87
|
+
parser = CsvParser::CsvParser.new
|
88
|
+
parser.allow_uneven_records = false
|
89
|
+
result = parser.parse(%{foo,bar\nbaz})
|
90
|
+
assert !result
|
91
|
+
assert_equal :missing_fields, parser.failure_type
|
92
|
+
end
|
93
|
+
|
94
|
+
test "extra fields gets warning by default" do
|
95
|
+
parser = CsvParser::CsvParser.new
|
96
|
+
result = parser.parse(%{foo\nbar,baz})
|
97
|
+
assert result, parser.failure_reason
|
98
|
+
assert_equal [[:extra_fields, 2, 5]], parser.warnings
|
99
|
+
end
|
100
|
+
|
101
|
+
test "extra fields when disallowed" do
|
102
|
+
parser = CsvParser::CsvParser.new
|
103
|
+
parser.allow_uneven_records = false
|
104
|
+
result = parser.parse(%{foo\nbar,baz})
|
105
|
+
assert !result
|
106
|
+
assert_equal :extra_fields, parser.failure_type
|
107
|
+
end
|
108
|
+
|
109
|
+
test "single-character custom field separator" do
|
110
|
+
parser = CsvParser::CsvParser.new
|
111
|
+
parser.field_sep = "\t"
|
112
|
+
result = parser.parse("foo\tbar")
|
113
|
+
assert result, parser.failure_reason
|
114
|
+
assert_equal [['foo', 'bar']], result.value
|
115
|
+
end
|
116
|
+
|
117
|
+
test "multi-character custom field separator" do
|
118
|
+
parser = CsvParser::CsvParser.new
|
119
|
+
parser.field_sep = "foo"
|
120
|
+
result = parser.parse("bazfoobar")
|
121
|
+
assert result, parser.failure_reason
|
122
|
+
assert_equal [['baz', 'bar']], result.value
|
123
|
+
end
|
124
|
+
|
125
|
+
test "single-character custom record separator" do
|
126
|
+
parser = CsvParser::CsvParser.new
|
127
|
+
parser.record_sep = "x"
|
128
|
+
result = parser.parse("fooxbar")
|
129
|
+
assert result, parser.failure_reason
|
130
|
+
assert_equal [['foo'], ['bar']], result.value
|
131
|
+
end
|
132
|
+
|
133
|
+
test "multi-character custom record separator" do
|
134
|
+
parser = CsvParser::CsvParser.new
|
135
|
+
parser.record_sep = "foo"
|
136
|
+
result = parser.parse("barfoobaz")
|
137
|
+
assert result, parser.failure_reason
|
138
|
+
assert_equal [['bar'], ['baz']], result.value
|
139
|
+
end
|
140
|
+
|
141
|
+
test "custom quote character" do
|
142
|
+
parser = CsvParser::CsvParser.new
|
143
|
+
parser.quote_char = "'"
|
144
|
+
result = parser.parse("'foo,bar'")
|
145
|
+
assert result, parser.failure_reason
|
146
|
+
assert_equal [['foo,bar']], result.value
|
147
|
+
end
|
148
|
+
|
149
|
+
test "parse helper" do
|
150
|
+
result = CsvParser.parse("foo,bar")
|
151
|
+
assert_equal [['foo', 'bar']], result.data
|
152
|
+
end
|
153
|
+
|
154
|
+
test "parse helper with options" do
|
155
|
+
result = CsvParser.parse("foo\tbar", :field_sep => "\t")
|
156
|
+
assert_equal [['foo', 'bar']], result.data
|
157
|
+
end
|
158
|
+
|
159
|
+
test "parse helper with missing closing quote" do
|
160
|
+
error = nil
|
161
|
+
begin
|
162
|
+
CsvParser.parse(%{"foo})
|
163
|
+
rescue CsvParser::MissingQuoteError => error
|
164
|
+
assert_equal 1, error.line
|
165
|
+
assert_equal 1, error.column
|
166
|
+
assert_equal "no ending quote found for quote on line 1, column 1", error.message
|
167
|
+
end
|
168
|
+
assert error
|
169
|
+
end
|
170
|
+
|
171
|
+
test "parse helper with stray quote" do
|
172
|
+
error = nil
|
173
|
+
begin
|
174
|
+
CsvParser.parse(%{f"oo})
|
175
|
+
rescue CsvParser::StrayQuoteError => error
|
176
|
+
assert_equal 1, error.line
|
177
|
+
assert_equal 2, error.column
|
178
|
+
assert_equal "invalid quote found on line 1, column 2", error.message
|
179
|
+
end
|
180
|
+
assert error
|
181
|
+
end
|
182
|
+
|
183
|
+
test "parse helper with allowed short records" do
|
184
|
+
result = CsvParser.parse(%{foo,bar\nbaz})
|
185
|
+
assert_equal 1, result.warnings.length
|
186
|
+
assert_kind_of CsvParser::MissingFieldsError, result.warnings[0]
|
187
|
+
error = result.warnings[0]
|
188
|
+
assert_equal 2, error.line
|
189
|
+
assert_equal 4, error.column
|
190
|
+
assert_equal "record on line 2 had too few fields", error.message
|
191
|
+
end
|
192
|
+
|
193
|
+
test "parse helper with disallowed short records" do
|
194
|
+
error = nil
|
195
|
+
begin
|
196
|
+
CsvParser.parse(%{foo,bar\nbaz}, :allow_uneven_records => false)
|
197
|
+
rescue CsvParser::MissingFieldsError => error
|
198
|
+
assert_equal 2, error.line
|
199
|
+
assert_equal 4, error.column
|
200
|
+
assert_equal "record on line 2 had too few fields", error.message
|
201
|
+
end
|
202
|
+
assert error
|
203
|
+
end
|
204
|
+
|
205
|
+
test "parse helper with allowed long records" do
|
206
|
+
result = CsvParser.parse(%{foo\nbar,baz})
|
207
|
+
assert_equal 1, result.warnings.length
|
208
|
+
assert_kind_of CsvParser::ExtraFieldsError, result.warnings[0]
|
209
|
+
error = result.warnings[0]
|
210
|
+
assert_equal 2, error.line
|
211
|
+
assert_equal 5, error.column
|
212
|
+
assert_equal "record on line 2 had too many fields", error.message
|
213
|
+
end
|
214
|
+
|
215
|
+
test "parse helper with disallowed long records" do
|
216
|
+
error = nil
|
217
|
+
begin
|
218
|
+
CsvParser.parse(%{foo\nbar,baz}, :allow_uneven_records => false)
|
219
|
+
rescue CsvParser::ExtraFieldsError => error
|
220
|
+
assert_equal 2, error.line
|
221
|
+
assert_equal 5, error.column
|
222
|
+
assert_equal "record on line 2 had too many fields", error.message
|
223
|
+
end
|
224
|
+
assert error
|
225
|
+
end
|
226
|
+
end
|
metadata
ADDED
@@ -0,0 +1,116 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: csv_parser
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Jeremy Stephens
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2013-11-14 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: treetop
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - '>='
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - '>='
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: bundler
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ~>
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '1.3'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ~>
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '1.3'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rake
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - '>='
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: test-unit
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - '>='
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
description: CSV parser with advanced error reporting
|
70
|
+
email:
|
71
|
+
- jeremy.f.stephens@vanderbilt.edu
|
72
|
+
executables: []
|
73
|
+
extensions: []
|
74
|
+
extra_rdoc_files: []
|
75
|
+
files:
|
76
|
+
- .gitignore
|
77
|
+
- Gemfile
|
78
|
+
- LICENSE.txt
|
79
|
+
- README.md
|
80
|
+
- Rakefile
|
81
|
+
- csv_parser.gemspec
|
82
|
+
- lib/csv_parser.rb
|
83
|
+
- lib/csv_parser.treetop
|
84
|
+
- lib/csv_parser/csv_parser.rb
|
85
|
+
- lib/csv_parser/parser_extensions.rb
|
86
|
+
- lib/csv_parser/result.rb
|
87
|
+
- lib/csv_parser/version.rb
|
88
|
+
- test/helper.rb
|
89
|
+
- test/test_csv_parser.rb
|
90
|
+
homepage: https://github.com/coupler/csv_parser
|
91
|
+
licenses:
|
92
|
+
- MIT
|
93
|
+
metadata: {}
|
94
|
+
post_install_message:
|
95
|
+
rdoc_options: []
|
96
|
+
require_paths:
|
97
|
+
- lib
|
98
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
99
|
+
requirements:
|
100
|
+
- - '>='
|
101
|
+
- !ruby/object:Gem::Version
|
102
|
+
version: '0'
|
103
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
104
|
+
requirements:
|
105
|
+
- - '>='
|
106
|
+
- !ruby/object:Gem::Version
|
107
|
+
version: '0'
|
108
|
+
requirements: []
|
109
|
+
rubyforge_project:
|
110
|
+
rubygems_version: 2.0.3
|
111
|
+
signing_key:
|
112
|
+
specification_version: 4
|
113
|
+
summary: CSV parser with advanced error reporting
|
114
|
+
test_files:
|
115
|
+
- test/helper.rb
|
116
|
+
- test/test_csv_parser.rb
|