csv_parser 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 0182c5c8d43682eaf967338eab5d249c5036ed54
4
+ data.tar.gz: 249c47d4015daba1d0def6019449d400b1c8bb2c
5
+ SHA512:
6
+ metadata.gz: 29835c6d4346804386d2882ee8141207a24eef5fc1fd4163b7bb01d0582b63fa091c9968e934e6dff4a425763393f953a40a0e787054c9a69b75c88a056323d0
7
+ data.tar.gz: 2ce0c0176e2611e7e569c295cf2b511d2266cc52bbb371ab23422186b3b1a11a023508649d922fc7ee75c237e6567300fb7d002b776a38fcf358c70b752e5519
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in csv_parser.gemspec
4
+ gemspec
5
+
6
+ gem 'byebug'
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Vanderbilt University
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,64 @@
1
+ # CsvParser
2
+
3
+ CsvParser is a CSV parser that focuses on identifying errors.
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'csv_parser'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install csv_parser
18
+
19
+ ## Usage
20
+
21
+ CsvParser's goal is to give you descriptive error messages.
22
+
23
+ ### Error types
24
+
25
+ * missing closing quote (`CsvParser::MissingQuoteError`)
26
+ * quote in the wrong place (`CsvParser::StrayQuoteError`)
27
+ * rows with not enough fields (`CsvParser::MissingFieldsError`)
28
+ * rows with too many fields (`CsvParser::ExtraFieldsError`)
29
+
30
+ ### Options
31
+
32
+ You can pass in an options hash to the `CsvParser.parse` method
33
+ that contains one or more of the following options:
34
+
35
+ * `:field_sep` - specify field separator (default is `","`)
36
+ * `:record_sep` - specify record separator (default is `"\n"`)
37
+ * `:quote_char` - specify quote character (default is `"\""`)
38
+ * `:allow_empty_record` - specify whether empty records are allowed (default is `true`)
39
+ * `:skip_empty_record` - specify whether empty records are skipped (default is `true`)
40
+ * `:allow_uneven_records` - specify whether records with different field lengths are allowed (default is `true`)
41
+
42
+ ### Example
43
+
44
+ ```ruby
45
+ require 'csv_parser'
46
+
47
+ data = <<EOF
48
+ foo,"bar
49
+ baz,quz
50
+ EOF
51
+ begin
52
+ result = CsvParser.parse(data)
53
+ rescue CsvParser::Error => e
54
+ # e is a CsvParser::MissingQuoteError
55
+ end
56
+ ```
57
+
58
+ ## Contributing
59
+
60
+ 1. Fork it
61
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
62
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
63
+ 4. Push to the branch (`git push origin my-new-feature`)
64
+ 5. Create new Pull Request
@@ -0,0 +1,16 @@
1
+ require "bundler/gem_tasks"
2
+ require "rake/testtask"
3
+
4
+ Rake::TestTask.new do |t|
5
+ t.libs << "test"
6
+ t.pattern = 'test/**/test*.rb'
7
+ end
8
+ task :test => :treetop
9
+ task :default => :test
10
+
11
+ desc "Compile treetop grammar"
12
+ task :treetop => "lib/csv_parser/csv_parser.rb"
13
+
14
+ file "lib/csv_parser/csv_parser.rb" => "lib/csv_parser.treetop" do
15
+ system("tt lib/csv_parser.treetop -o lib/csv_parser/csv_parser.rb")
16
+ end
@@ -0,0 +1,25 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'csv_parser/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "csv_parser"
8
+ spec.version = CsvParser::VERSION
9
+ spec.authors = ["Jeremy Stephens"]
10
+ spec.email = ["jeremy.f.stephens@vanderbilt.edu"]
11
+ spec.description = %q{CSV parser with advanced error reporting}
12
+ spec.summary = %q{CSV parser with advanced error reporting}
13
+ spec.homepage = "https://github.com/coupler/csv_parser"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_dependency 'treetop'
22
+ spec.add_development_dependency "bundler", "~> 1.3"
23
+ spec.add_development_dependency "rake"
24
+ spec.add_development_dependency "test-unit"
25
+ end
@@ -0,0 +1,60 @@
1
+ require 'treetop'
2
+
3
+ require 'csv_parser/version'
4
+ require 'csv_parser/parser_extensions'
5
+ require 'csv_parser/result'
6
+
7
+ #Treetop.load(File.join(File.dirname(__FILE__), 'csv_parser.treetop'))
8
+ require 'csv_parser/csv_parser'
9
+
10
+ module CsvParser
11
+ class Error < Exception
12
+ attr_reader :line, :column
13
+
14
+ def initialize(msg, line, column)
15
+ super(msg)
16
+ @line = line
17
+ @column = column
18
+ end
19
+ end
20
+
21
+ class MissingQuoteError < Error; end
22
+ class StrayQuoteError < Error; end
23
+ class MissingFieldsError < Error; end
24
+ class ExtraFieldsError < Error; end
25
+
26
+ def self.parse(data, options = {})
27
+ parser = ::CsvParser::CsvParser.new
28
+ options.each_pair do |key, value|
29
+ parser.send("#{key}=", value)
30
+ end
31
+ result = parser.parse(data)
32
+ if result
33
+ warnings = parser.warnings.collect do |(desc, line, col)|
34
+ error(desc, line, col)
35
+ end
36
+ Result.new(result.value, warnings)
37
+ else
38
+ raise error(parser.failure_type, parser.failure_line,
39
+ parser.failure_column, parser.failure_reason)
40
+ end
41
+ end
42
+
43
+ def self.error(type, line, column, msg = nil)
44
+ klass, msg =
45
+ case type
46
+ when :missing_quote
47
+ [MissingQuoteError, "no ending quote found for quote on line #{line}, column #{column}"]
48
+ when :stray_quote
49
+ [StrayQuoteError, "invalid quote found on line #{line}, column #{column}"]
50
+ when :missing_fields
51
+ [MissingFieldsError, "record on line #{line} had too few fields"]
52
+ when :extra_fields
53
+ [ExtraFieldsError, "record on line #{line} had too many fields"]
54
+ else
55
+ Error
56
+ end
57
+
58
+ klass.new(msg, line, column)
59
+ end
60
+ end
@@ -0,0 +1,232 @@
1
+ module CsvParser
2
+ grammar Csv
3
+ include ParserExtensions
4
+
5
+ rule records
6
+ non_empty_records / empty_records
7
+ end
8
+
9
+ rule non_empty_records
10
+ first_record
11
+ other_records
12
+ {
13
+ def value
14
+ arr = [first_record.value]
15
+ rest = other_records.value
16
+ if rest
17
+ arr.push(*rest)
18
+ end
19
+ arr
20
+ end
21
+ }
22
+ end
23
+
24
+ rule empty_records
25
+ ''
26
+ {
27
+ def value
28
+ []
29
+ end
30
+ }
31
+ end
32
+
33
+ rule first_record
34
+ '' &{ |s| @first_record = true; true }
35
+ non_empty_record
36
+ &{ |s| @first_record_length = @record_length; true }
37
+ {
38
+ def value
39
+ non_empty_record.value
40
+ end
41
+ }
42
+ end
43
+
44
+ rule other_records
45
+ '' &{ |s| @first_record = false; true }
46
+ (
47
+ (
48
+ &{ |s| skip_empty_record? }
49
+ (
50
+ record_sep
51
+ ( empty_record record_sep )*
52
+ non_empty_record
53
+ )*
54
+ (
55
+ &{ |s| skip_empty_record? }
56
+ ( record_sep empty_record )+
57
+ )?
58
+ {
59
+ def value
60
+ val = elements[1].elements.collect { |elt| elt.non_empty_record.value }
61
+ val.empty? ? nil : val
62
+ end
63
+ }
64
+ )
65
+ /
66
+ (
67
+ &{ |s| !skip_empty_record? }
68
+ ( record_sep record )*
69
+ {
70
+ def value
71
+ val = elements[1].elements.collect { |elt| elt.record.value }
72
+ val.empty? ? nil : val
73
+ end
74
+ }
75
+ )
76
+ )
77
+ {
78
+ def value
79
+ elements[2].value
80
+ end
81
+ }
82
+ end
83
+
84
+ rule record
85
+ non_empty_record / empty_record
86
+ end
87
+
88
+ rule non_empty_record
89
+ first:field
90
+ &{ |s| @record_length = 1; @warning = nil; true }
91
+ rest:(
92
+ &{ |s|
93
+ if @first_record || @record_length < @first_record_length
94
+ true
95
+ else
96
+ if allow_uneven_records?
97
+ @warning ||= [:extra_fields, input.line_of(index + 1), input.column_of(index + 1)]
98
+ true
99
+ else
100
+ @failure_type = :extra_fields
101
+ false
102
+ end
103
+ end
104
+ }
105
+ field_sep
106
+ field
107
+ &{ |s| @record_length += 1; true }
108
+ )*
109
+ &{ |s|
110
+ if @first_record || @record_length >= @first_record_length
111
+ if @warning
112
+ warnings << @warning
113
+ end
114
+ true
115
+ else
116
+ if allow_uneven_records?
117
+ warnings << [:missing_fields, input.line_of(index), input.column_of(index)]
118
+ true
119
+ else
120
+ @failure_type = :missing_fields
121
+ false
122
+ end
123
+ end
124
+ }
125
+ {
126
+ def value
127
+ arr = [first.value]
128
+ rest.elements.each do |elt|
129
+ arr << elt.field.value
130
+ end
131
+ arr
132
+ end
133
+ }
134
+ end
135
+
136
+ rule empty_record
137
+ ''
138
+ &{ |s|
139
+ if allow_empty_record?
140
+ true
141
+ else
142
+ @failure_type = :missing_fields
143
+ false
144
+ end
145
+ }
146
+ {
147
+ def value
148
+ []
149
+ end
150
+ }
151
+ end
152
+
153
+ rule field
154
+ unquoted_text
155
+ {
156
+ def value
157
+ elements.map(&:text_value).join
158
+ end
159
+ }
160
+ /
161
+ quoted_text
162
+ {
163
+ def value
164
+ elements[1..-2].map(&:text_value).join
165
+ end
166
+ }
167
+ end
168
+
169
+ rule quoted_text
170
+ quote
171
+ ( !quote . )+
172
+ (
173
+ quote
174
+ /
175
+ '' !{ |s| @failure_type = :missing_quote; @failure_index = start_index; true }
176
+ )
177
+ end
178
+
179
+ rule unquoted_text
180
+ (
181
+ !field_sep
182
+ !record_sep
183
+ (
184
+ !quote
185
+ /
186
+ '' !{ |s| @failure_type = :stray_quote; true }
187
+ )
188
+ .
189
+ )+
190
+ end
191
+
192
+ rule field_sep
193
+ &{ |s| @field_sep_index = 0; true }
194
+ (
195
+ !record_sep
196
+ !quote
197
+ .
198
+ &{ |s|
199
+ if @field_sep_index < field_sep.length &&
200
+ s[2].text_value == field_sep[@field_sep_index]
201
+ @field_sep_index += 1
202
+ true
203
+ else
204
+ false
205
+ end
206
+ }
207
+ )+
208
+ &{ |s| s.map(&:text_value).join == field_sep }
209
+ end
210
+
211
+ rule record_sep
212
+ &{ |s| @record_sep_index = 0; true }
213
+ (
214
+ .
215
+ &{ |s|
216
+ if @record_sep_index < record_sep.length &&
217
+ s[0].text_value == record_sep[@record_sep_index]
218
+ @record_sep_index += 1
219
+ true
220
+ else
221
+ false
222
+ end
223
+ }
224
+ )+
225
+ &{ |s| s.map(&:text_value).join == record_sep }
226
+ end
227
+
228
+ rule quote
229
+ !"\\" . &{ |s| s[1].text_value[0] == quote_char[0] }
230
+ end
231
+ end
232
+ end