csv_parser 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 0182c5c8d43682eaf967338eab5d249c5036ed54
4
+ data.tar.gz: 249c47d4015daba1d0def6019449d400b1c8bb2c
5
+ SHA512:
6
+ metadata.gz: 29835c6d4346804386d2882ee8141207a24eef5fc1fd4163b7bb01d0582b63fa091c9968e934e6dff4a425763393f953a40a0e787054c9a69b75c88a056323d0
7
+ data.tar.gz: 2ce0c0176e2611e7e569c295cf2b511d2266cc52bbb371ab23422186b3b1a11a023508649d922fc7ee75c237e6567300fb7d002b776a38fcf358c70b752e5519
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in csv_parser.gemspec
4
+ gemspec
5
+
6
+ gem 'byebug'
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Vanderbilt University
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,64 @@
1
+ # CsvParser
2
+
3
+ CsvParser is a CSV parser that focuses on identifying errors.
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'csv_parser'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install csv_parser
18
+
19
+ ## Usage
20
+
21
+ CsvParser's goal is to give you descriptive error messages.
22
+
23
+ ### Error types
24
+
25
+ * missing closing quote (`CsvParser::MissingQuoteError`)
26
+ * quote in the wrong place (`CsvParser::StrayQuoteError`)
27
+ * rows with not enough fields (`CsvParser::MissingFieldsError`)
28
+ * rows with too many fields (`CsvParser::ExtraFieldsError`)
29
+
30
+ ### Options
31
+
32
+ You can pass in an options hash to the `CsvParser.parse` method
33
+ that contains one or more of the following options:
34
+
35
+ * `:field_sep` - specify field separator (default is `","`)
36
+ * `:record_sep` - specify record separator (default is `"\n"`)
37
+ * `:quote_char` - specify quote character (default is `"\""`)
38
+ * `:allow_empty_record` - specify whether empty records are allowed (default is `true`)
39
+ * `:skip_empty_record` - specify whether empty records are skipped (default is `true`)
40
+ * `:allow_uneven_records` - specify whether records with different field lengths are allowed (default is `true`)
41
+
42
+ ### Example
43
+
44
+ ```ruby
45
+ require 'csv_parser'
46
+
47
+ data = <<EOF
48
+ foo,"bar
49
+ baz,quz
50
+ EOF
51
+ begin
52
+ result = CsvParser.parse(data)
53
+ rescue CsvParser::Error => e
54
+ # e is a CsvParser::MissingQuoteError
55
+ end
56
+ ```
57
+
58
+ ## Contributing
59
+
60
+ 1. Fork it
61
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
62
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
63
+ 4. Push to the branch (`git push origin my-new-feature`)
64
+ 5. Create new Pull Request
@@ -0,0 +1,16 @@
1
+ require "bundler/gem_tasks"
2
+ require "rake/testtask"
3
+
4
+ Rake::TestTask.new do |t|
5
+ t.libs << "test"
6
+ t.pattern = 'test/**/test*.rb'
7
+ end
8
+ task :test => :treetop
9
+ task :default => :test
10
+
11
+ desc "Compile treetop grammar"
12
+ task :treetop => "lib/csv_parser/csv_parser.rb"
13
+
14
+ file "lib/csv_parser/csv_parser.rb" => "lib/csv_parser.treetop" do
15
+ system("tt lib/csv_parser.treetop -o lib/csv_parser/csv_parser.rb")
16
+ end
@@ -0,0 +1,25 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'csv_parser/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "csv_parser"
8
+ spec.version = CsvParser::VERSION
9
+ spec.authors = ["Jeremy Stephens"]
10
+ spec.email = ["jeremy.f.stephens@vanderbilt.edu"]
11
+ spec.description = %q{CSV parser with advanced error reporting}
12
+ spec.summary = %q{CSV parser with advanced error reporting}
13
+ spec.homepage = "https://github.com/coupler/csv_parser"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_dependency 'treetop'
22
+ spec.add_development_dependency "bundler", "~> 1.3"
23
+ spec.add_development_dependency "rake"
24
+ spec.add_development_dependency "test-unit"
25
+ end
@@ -0,0 +1,60 @@
1
+ require 'treetop'
2
+
3
+ require 'csv_parser/version'
4
+ require 'csv_parser/parser_extensions'
5
+ require 'csv_parser/result'
6
+
7
+ #Treetop.load(File.join(File.dirname(__FILE__), 'csv_parser.treetop'))
8
+ require 'csv_parser/csv_parser'
9
+
10
+ module CsvParser
11
+ class Error < Exception
12
+ attr_reader :line, :column
13
+
14
+ def initialize(msg, line, column)
15
+ super(msg)
16
+ @line = line
17
+ @column = column
18
+ end
19
+ end
20
+
21
+ class MissingQuoteError < Error; end
22
+ class StrayQuoteError < Error; end
23
+ class MissingFieldsError < Error; end
24
+ class ExtraFieldsError < Error; end
25
+
26
+ def self.parse(data, options = {})
27
+ parser = ::CsvParser::CsvParser.new
28
+ options.each_pair do |key, value|
29
+ parser.send("#{key}=", value)
30
+ end
31
+ result = parser.parse(data)
32
+ if result
33
+ warnings = parser.warnings.collect do |(desc, line, col)|
34
+ error(desc, line, col)
35
+ end
36
+ Result.new(result.value, warnings)
37
+ else
38
+ raise error(parser.failure_type, parser.failure_line,
39
+ parser.failure_column, parser.failure_reason)
40
+ end
41
+ end
42
+
43
+ def self.error(type, line, column, msg = nil)
44
+ klass, msg =
45
+ case type
46
+ when :missing_quote
47
+ [MissingQuoteError, "no ending quote found for quote on line #{line}, column #{column}"]
48
+ when :stray_quote
49
+ [StrayQuoteError, "invalid quote found on line #{line}, column #{column}"]
50
+ when :missing_fields
51
+ [MissingFieldsError, "record on line #{line} had too few fields"]
52
+ when :extra_fields
53
+ [ExtraFieldsError, "record on line #{line} had too many fields"]
54
+ else
55
+ Error
56
+ end
57
+
58
+ klass.new(msg, line, column)
59
+ end
60
+ end
@@ -0,0 +1,232 @@
1
+ module CsvParser
2
+ grammar Csv
3
+ include ParserExtensions
4
+
5
+ rule records
6
+ non_empty_records / empty_records
7
+ end
8
+
9
+ rule non_empty_records
10
+ first_record
11
+ other_records
12
+ {
13
+ def value
14
+ arr = [first_record.value]
15
+ rest = other_records.value
16
+ if rest
17
+ arr.push(*rest)
18
+ end
19
+ arr
20
+ end
21
+ }
22
+ end
23
+
24
+ rule empty_records
25
+ ''
26
+ {
27
+ def value
28
+ []
29
+ end
30
+ }
31
+ end
32
+
33
+ rule first_record
34
+ '' &{ |s| @first_record = true; true }
35
+ non_empty_record
36
+ &{ |s| @first_record_length = @record_length; true }
37
+ {
38
+ def value
39
+ non_empty_record.value
40
+ end
41
+ }
42
+ end
43
+
44
+ rule other_records
45
+ '' &{ |s| @first_record = false; true }
46
+ (
47
+ (
48
+ &{ |s| skip_empty_record? }
49
+ (
50
+ record_sep
51
+ ( empty_record record_sep )*
52
+ non_empty_record
53
+ )*
54
+ (
55
+ &{ |s| skip_empty_record? }
56
+ ( record_sep empty_record )+
57
+ )?
58
+ {
59
+ def value
60
+ val = elements[1].elements.collect { |elt| elt.non_empty_record.value }
61
+ val.empty? ? nil : val
62
+ end
63
+ }
64
+ )
65
+ /
66
+ (
67
+ &{ |s| !skip_empty_record? }
68
+ ( record_sep record )*
69
+ {
70
+ def value
71
+ val = elements[1].elements.collect { |elt| elt.record.value }
72
+ val.empty? ? nil : val
73
+ end
74
+ }
75
+ )
76
+ )
77
+ {
78
+ def value
79
+ elements[2].value
80
+ end
81
+ }
82
+ end
83
+
84
+ rule record
85
+ non_empty_record / empty_record
86
+ end
87
+
88
+ rule non_empty_record
89
+ first:field
90
+ &{ |s| @record_length = 1; @warning = nil; true }
91
+ rest:(
92
+ &{ |s|
93
+ if @first_record || @record_length < @first_record_length
94
+ true
95
+ else
96
+ if allow_uneven_records?
97
+ @warning ||= [:extra_fields, input.line_of(index + 1), input.column_of(index + 1)]
98
+ true
99
+ else
100
+ @failure_type = :extra_fields
101
+ false
102
+ end
103
+ end
104
+ }
105
+ field_sep
106
+ field
107
+ &{ |s| @record_length += 1; true }
108
+ )*
109
+ &{ |s|
110
+ if @first_record || @record_length >= @first_record_length
111
+ if @warning
112
+ warnings << @warning
113
+ end
114
+ true
115
+ else
116
+ if allow_uneven_records?
117
+ warnings << [:missing_fields, input.line_of(index), input.column_of(index)]
118
+ true
119
+ else
120
+ @failure_type = :missing_fields
121
+ false
122
+ end
123
+ end
124
+ }
125
+ {
126
+ def value
127
+ arr = [first.value]
128
+ rest.elements.each do |elt|
129
+ arr << elt.field.value
130
+ end
131
+ arr
132
+ end
133
+ }
134
+ end
135
+
136
+ rule empty_record
137
+ ''
138
+ &{ |s|
139
+ if allow_empty_record?
140
+ true
141
+ else
142
+ @failure_type = :missing_fields
143
+ false
144
+ end
145
+ }
146
+ {
147
+ def value
148
+ []
149
+ end
150
+ }
151
+ end
152
+
153
+ rule field
154
+ unquoted_text
155
+ {
156
+ def value
157
+ elements.map(&:text_value).join
158
+ end
159
+ }
160
+ /
161
+ quoted_text
162
+ {
163
+ def value
164
+ elements[1..-2].map(&:text_value).join
165
+ end
166
+ }
167
+ end
168
+
169
+ rule quoted_text
170
+ quote
171
+ ( !quote . )+
172
+ (
173
+ quote
174
+ /
175
+ '' !{ |s| @failure_type = :missing_quote; @failure_index = start_index; true }
176
+ )
177
+ end
178
+
179
+ rule unquoted_text
180
+ (
181
+ !field_sep
182
+ !record_sep
183
+ (
184
+ !quote
185
+ /
186
+ '' !{ |s| @failure_type = :stray_quote; true }
187
+ )
188
+ .
189
+ )+
190
+ end
191
+
192
+ rule field_sep
193
+ &{ |s| @field_sep_index = 0; true }
194
+ (
195
+ !record_sep
196
+ !quote
197
+ .
198
+ &{ |s|
199
+ if @field_sep_index < field_sep.length &&
200
+ s[2].text_value == field_sep[@field_sep_index]
201
+ @field_sep_index += 1
202
+ true
203
+ else
204
+ false
205
+ end
206
+ }
207
+ )+
208
+ &{ |s| s.map(&:text_value).join == field_sep }
209
+ end
210
+
211
+ rule record_sep
212
+ &{ |s| @record_sep_index = 0; true }
213
+ (
214
+ .
215
+ &{ |s|
216
+ if @record_sep_index < record_sep.length &&
217
+ s[0].text_value == record_sep[@record_sep_index]
218
+ @record_sep_index += 1
219
+ true
220
+ else
221
+ false
222
+ end
223
+ }
224
+ )+
225
+ &{ |s| s.map(&:text_value).join == record_sep }
226
+ end
227
+
228
+ rule quote
229
+ !"\\" . &{ |s| s[1].text_value[0] == quote_char[0] }
230
+ end
231
+ end
232
+ end