fastcsv 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +8 -0
- data/.rspec +2 -0
- data/Gemfile +4 -0
- data/LICENSE +20 -0
- data/README.md +56 -0
- data/Rakefile +21 -0
- data/USAGE +1 -0
- data/ext/fastcsv/extconf.rb +3 -0
- data/ext/fastcsv/fastcsv.c +697 -0
- data/ext/fastcsv/fastcsv.rl +356 -0
- data/fastcsv.gemspec +24 -0
- data/lib/fastcsv.rb +1 -0
- data/spec/fastcsv_spec.rb +218 -0
- data/spec/fixtures/iso-8859-1.csv +1 -0
- data/spec/fixtures/utf-8.csv +1 -0
- data/spec/spec_helper.rb +14 -0
- metadata +136 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: e5d991d026c76068b9e646ba62cefdad823f01e1
|
4
|
+
data.tar.gz: 8815f0bb3b00e01593f2a46320cf58c88926099c
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 8762ce01e3e5af4cd0395bf541879db46f677f79201e2f44dc5f35dd30514c53fa2c2c5808ec61149898d64bff908a74fe2153c690a04e75c82ba7306794fa15
|
7
|
+
data.tar.gz: e6dd0a3f89f9d330428fbc8d6f1b469d9f3a8ca255561c6fa972d1ad2147422bd090f3c7a373d08d29b07748f781f9b8d70fae27a092a075d183de897094eea5
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2014 Open North Inc.
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
# FastCSV
|
2
|
+
|
3
|
+
[![Gem Version](https://badge.fury.io/rb/fastcsv.svg)](http://badge.fury.io/rb/fastcsv)
|
4
|
+
[![Dependency Status](https://gemnasium.com/opennorth/fastcsv.png)](https://gemnasium.com/opennorth/fastcsv)
|
5
|
+
|
6
|
+
A fast [Ragel](http://www.colm.net/open-source/ragel/)-based CSV parser.
|
7
|
+
|
8
|
+
## Usage
|
9
|
+
|
10
|
+
```ruby
|
11
|
+
require 'fastcsv'
|
12
|
+
|
13
|
+
# Read from file.
|
14
|
+
File.open(filename) do |f|
|
15
|
+
FastCSV.raw_parse(f) do |row|
|
16
|
+
# do stuff
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
# Read from an IO object.
|
21
|
+
FastCSV.raw_parse(StringIO.new("foo,bar\n")) do |row|
|
22
|
+
# do stuff
|
23
|
+
end
|
24
|
+
|
25
|
+
# Read from a string.
|
26
|
+
FastCSV.raw_parse("foo,bar\n") do |row|
|
27
|
+
# do stuff
|
28
|
+
end
|
29
|
+
|
30
|
+
# Transcode like with the CSV module.
|
31
|
+
FastCSV.raw_parse("\xF1\n", encoding: 'iso-8859-1:utf-8') do |row|
|
32
|
+
# ["ñ"]
|
33
|
+
end
|
34
|
+
```
|
35
|
+
|
36
|
+
## Development
|
37
|
+
|
38
|
+
ragel -G2 ext/fastcsv/fastcsv.rl
|
39
|
+
ragel -Vp ext/fastcsv/fastcsv.rl | dot -Tpng -o machine.png
|
40
|
+
rake compile
|
41
|
+
gem uninstall fastcsv
|
42
|
+
rake install
|
43
|
+
|
44
|
+
## Why?
|
45
|
+
|
46
|
+
We evaluated [many CSV Ruby gems](https://github.com/jpmckinney/csv-benchmark#benchmark), and they were either too slow or had implementation errors. [rcsv](https://github.com/fiksu/rcsv) is fast and [libcsv](http://sourceforge.net/projects/libcsv/)-based, but it skips blank rows (Ruby's CSV module returns an empty array) and silently fails on input with an unclosed quote; nonetheless, it's an excellent alternative if you find errors in FastCSV! We looked for Ragel-based CSV parsers to copy, but they either had implementation errors or could not handle large inputs. [commas](https://github.com/aklt/commas/blob/master/csv.rl) looks good, but it performs a memory check on each character, which is overkill.
|
47
|
+
|
48
|
+
## Bugs? Questions?
|
49
|
+
|
50
|
+
This project's main repository is on GitHub: [http://github.com/opennorth/fastcsv](http://github.com/opennorth/fastcsv), where your contributions, forks, bug reports, feature requests, and feedback are greatly welcomed.
|
51
|
+
|
52
|
+
## Acknowledgements
|
53
|
+
|
54
|
+
Started as a Ruby 2.1 fork of MoonWolf <moonwolf@moonwolf.com>'s CSVScan, found in [this commit](https://github.com/nickstenning/csvscan/commit/11ec30f71a27cc673bca09738ee8a63942f416f0.patch). CSVScan uses Ragel code from [HPricot](https://github.com/hpricot/hpricot/blob/master/ext/hpricot_scan/hpricot_scan.rl) from [this commit](https://github.com/hpricot/hpricot/blob/908a4ae64bc8b935c4415c47ca6aea6492c6ce0a/ext/hpricot_scan/hpricot_scan.rl).
|
55
|
+
|
56
|
+
Copyright (c) 2014 Open North Inc., released under the MIT license
|
data/Rakefile
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'bundler'
|
2
|
+
Bundler::GemHelper.install_tasks
|
3
|
+
|
4
|
+
require 'rake/extensiontask'
|
5
|
+
Rake::ExtensionTask.new('fastcsv') do |ext|
|
6
|
+
ext.lib_dir = 'lib/fastcsv'
|
7
|
+
end
|
8
|
+
|
9
|
+
require 'rspec/core/rake_task'
|
10
|
+
RSpec::Core::RakeTask.new(:spec)
|
11
|
+
|
12
|
+
task :default => :spec
|
13
|
+
|
14
|
+
begin
|
15
|
+
require 'yard'
|
16
|
+
YARD::Rake::YardocTask.new
|
17
|
+
rescue LoadError
|
18
|
+
task :yard do
|
19
|
+
abort 'YARD is not available. In order to run yard, you must: gem install yard'
|
20
|
+
end
|
21
|
+
end
|
data/USAGE
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
See README.md for full usage details.
|
@@ -0,0 +1,697 @@
|
|
1
|
+
|
2
|
+
#line 1 "ext/fastcsv/fastcsv.rl"
|
3
|
+
#include <ruby.h>
|
4
|
+
#include <ruby/encoding.h>
|
5
|
+
// CSV specifications.
|
6
|
+
// http://tools.ietf.org/html/rfc4180
|
7
|
+
// http://w3c.github.io/csvw/syntax/#ebnf
|
8
|
+
|
9
|
+
// CSV implementation.
|
10
|
+
// https://github.com/ruby/ruby/blob/master/lib/csv.rb
|
11
|
+
|
12
|
+
// Ruby C extensions help.
|
13
|
+
// https://github.com/ruby/ruby/blob/trunk/README.EXT
|
14
|
+
// http://rxr.whitequark.org/mri/source
|
15
|
+
|
16
|
+
// Ragel help.
|
17
|
+
// https://www.mail-archive.com/ragel-users@complang.org/
|
18
|
+
|
19
|
+
# define ASSOCIATE_INDEX \
|
20
|
+
if (internal_index >= 0) { \
|
21
|
+
rb_enc_associate_index(field, internal_index); \
|
22
|
+
field = rb_str_encode(field, rb_enc_from_encoding(external_encoding), 0, Qnil); \
|
23
|
+
} \
|
24
|
+
else { \
|
25
|
+
rb_enc_associate_index(field, rb_enc_to_index(external_encoding)); \
|
26
|
+
}
|
27
|
+
|
28
|
+
static VALUE mModule, rb_eParseError;
|
29
|
+
static ID s_read, s_to_str;
|
30
|
+
|
31
|
+
|
32
|
+
#line 139 "ext/fastcsv/fastcsv.rl"
|
33
|
+
|
34
|
+
|
35
|
+
|
36
|
+
#line 37 "ext/fastcsv/fastcsv.c"
|
37
|
+
static const int fastcsv_start = 4;
|
38
|
+
static const int fastcsv_first_final = 4;
|
39
|
+
static const int fastcsv_error = 0;
|
40
|
+
|
41
|
+
static const int fastcsv_en_main = 4;
|
42
|
+
|
43
|
+
|
44
|
+
#line 142 "ext/fastcsv/fastcsv.rl"
|
45
|
+
|
46
|
+
#define BUFSIZE 16384
|
47
|
+
|
48
|
+
VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
|
49
|
+
int cs, act, have = 0, curline = 1, io = 0;
|
50
|
+
char *ts = 0, *te = 0, *buf = 0, *eof = 0;
|
51
|
+
|
52
|
+
VALUE port, opts;
|
53
|
+
VALUE row = rb_ary_new(), field = Qnil, bufsize = Qnil;
|
54
|
+
int done = 0, unclosed_line = 0, buffer_size = 0, taint = 0;
|
55
|
+
int internal_index = 0, external_index = rb_enc_to_index(rb_default_external_encoding());
|
56
|
+
rb_encoding *external_encoding = rb_default_external_encoding();
|
57
|
+
|
58
|
+
VALUE option;
|
59
|
+
char quote_char = '"'; //, *col_sep = ",", *row_sep = "\r\n";
|
60
|
+
|
61
|
+
rb_scan_args(argc, argv, "11", &port, &opts);
|
62
|
+
taint = OBJ_TAINTED(port);
|
63
|
+
io = rb_respond_to(port, s_read);
|
64
|
+
if (!io) {
|
65
|
+
if (rb_respond_to(port, s_to_str)) {
|
66
|
+
port = rb_funcall(port, s_to_str, 0);
|
67
|
+
StringValue(port);
|
68
|
+
}
|
69
|
+
else {
|
70
|
+
rb_raise(rb_eArgError, "data has to respond to #read or #to_str");
|
71
|
+
}
|
72
|
+
}
|
73
|
+
|
74
|
+
if (NIL_P(opts)) {
|
75
|
+
opts = rb_hash_new();
|
76
|
+
}
|
77
|
+
else if (TYPE(opts) != T_HASH) {
|
78
|
+
rb_raise(rb_eArgError, "options has to be a Hash or nil");
|
79
|
+
}
|
80
|
+
|
81
|
+
// @note Add machines for common CSV dialects, or see if we can use "when"
|
82
|
+
// from Chapter 6 to compare the character to the host program's variable.
|
83
|
+
// option = rb_hash_aref(opts, ID2SYM(rb_intern("quote_char")));
|
84
|
+
// if (TYPE(option) == T_STRING && RSTRING_LEN(option) == 1) {
|
85
|
+
// quote_char = *StringValueCStr(option);
|
86
|
+
// }
|
87
|
+
// else if (!NIL_P(option)) {
|
88
|
+
// rb_raise(rb_eArgError, ":quote_char has to be a single character String");
|
89
|
+
// }
|
90
|
+
|
91
|
+
// option = rb_hash_aref(opts, ID2SYM(rb_intern("col_sep")));
|
92
|
+
// if (TYPE(option) == T_STRING) {
|
93
|
+
// col_sep = StringValueCStr(option);
|
94
|
+
// }
|
95
|
+
// else if (!NIL_P(option)) {
|
96
|
+
// rb_raise(rb_eArgError, ":col_sep has to be a String");
|
97
|
+
// }
|
98
|
+
|
99
|
+
// option = rb_hash_aref(opts, ID2SYM(rb_intern("row_sep")));
|
100
|
+
// if (TYPE(option) == T_STRING) {
|
101
|
+
// row_sep = StringValueCStr(option);
|
102
|
+
// }
|
103
|
+
// else if (!NIL_P(option)) {
|
104
|
+
// rb_raise(rb_eArgError, ":row_sep has to be a String");
|
105
|
+
// }
|
106
|
+
|
107
|
+
option = rb_hash_aref(opts, ID2SYM(rb_intern("encoding")));
|
108
|
+
if (TYPE(option) == T_STRING) {
|
109
|
+
// @see parse_mode_enc in Ruby's io.c
|
110
|
+
const char *string = StringValueCStr(option), *pointer;
|
111
|
+
char internal_encoding_name[ENCODING_MAXNAMELEN + 1];
|
112
|
+
|
113
|
+
pointer = strrchr(string, ':');
|
114
|
+
if (pointer) {
|
115
|
+
long len = (pointer++) - string;
|
116
|
+
if (len == 0 || len > ENCODING_MAXNAMELEN) {
|
117
|
+
internal_index = -1;
|
118
|
+
}
|
119
|
+
else {
|
120
|
+
memcpy(internal_encoding_name, string, len);
|
121
|
+
internal_encoding_name[len] = '\0';
|
122
|
+
string = internal_encoding_name;
|
123
|
+
internal_index = rb_enc_find_index(internal_encoding_name);
|
124
|
+
}
|
125
|
+
}
|
126
|
+
else {
|
127
|
+
internal_index = rb_enc_find_index(string);
|
128
|
+
}
|
129
|
+
|
130
|
+
if (internal_index < 0 && internal_index != -2) {
|
131
|
+
rb_warn("Unsupported encoding %s ignored", string);
|
132
|
+
}
|
133
|
+
|
134
|
+
if (pointer) {
|
135
|
+
external_index = rb_enc_find_index(pointer);
|
136
|
+
if (external_index >= 0) {
|
137
|
+
external_encoding = rb_enc_from_index(external_index);
|
138
|
+
}
|
139
|
+
else {
|
140
|
+
rb_warn("Unsupported encoding %s ignored", string);
|
141
|
+
}
|
142
|
+
}
|
143
|
+
else if (internal_index >= 0) {
|
144
|
+
external_encoding = rb_enc_from_index(internal_index);
|
145
|
+
}
|
146
|
+
}
|
147
|
+
else if (!NIL_P(option)) {
|
148
|
+
rb_raise(rb_eArgError, ":encoding has to be a String");
|
149
|
+
}
|
150
|
+
|
151
|
+
buffer_size = BUFSIZE;
|
152
|
+
if (rb_ivar_defined(self, rb_intern("@buffer_size")) == Qtrue) {
|
153
|
+
bufsize = rb_ivar_get(self, rb_intern("@buffer_size"));
|
154
|
+
if (!NIL_P(bufsize)) {
|
155
|
+
buffer_size = NUM2INT(bufsize);
|
156
|
+
}
|
157
|
+
}
|
158
|
+
|
159
|
+
if (io) {
|
160
|
+
buf = ALLOC_N(char, buffer_size);
|
161
|
+
}
|
162
|
+
|
163
|
+
|
164
|
+
#line 165 "ext/fastcsv/fastcsv.c"
|
165
|
+
{
|
166
|
+
cs = fastcsv_start;
|
167
|
+
ts = 0;
|
168
|
+
te = 0;
|
169
|
+
act = 0;
|
170
|
+
}
|
171
|
+
|
172
|
+
#line 261 "ext/fastcsv/fastcsv.rl"
|
173
|
+
|
174
|
+
while (!done) {
|
175
|
+
VALUE str;
|
176
|
+
char *p, *pe;
|
177
|
+
int len, space = buffer_size - have, tokstart_diff, tokend_diff;
|
178
|
+
|
179
|
+
if (io) {
|
180
|
+
if (space == 0) {
|
181
|
+
tokstart_diff = ts - buf;
|
182
|
+
tokend_diff = te - buf;
|
183
|
+
|
184
|
+
buffer_size += BUFSIZE;
|
185
|
+
REALLOC_N(buf, char, buffer_size);
|
186
|
+
|
187
|
+
space = buffer_size - have;
|
188
|
+
|
189
|
+
ts = buf + tokstart_diff;
|
190
|
+
te = buf + tokend_diff;
|
191
|
+
}
|
192
|
+
p = buf + have;
|
193
|
+
|
194
|
+
str = rb_funcall(port, s_read, 1, INT2FIX(space));
|
195
|
+
if (NIL_P(str)) {
|
196
|
+
// StringIO#read returns nil for empty string.
|
197
|
+
len = 0;
|
198
|
+
}
|
199
|
+
else {
|
200
|
+
len = RSTRING_LEN(str);
|
201
|
+
memcpy(p, StringValuePtr(str), len);
|
202
|
+
}
|
203
|
+
|
204
|
+
if (len < space) {
|
205
|
+
// EOF actions don't work in scanners, so we add a sentinel value.
|
206
|
+
// @see http://www.complang.org/pipermail/ragel-users/2007-May/001516.html
|
207
|
+
// @see https://github.com/leeonix/lua-csv-ragel/blob/master/src/csv.rl
|
208
|
+
p[len++] = 0;
|
209
|
+
done = 1;
|
210
|
+
}
|
211
|
+
}
|
212
|
+
else {
|
213
|
+
p = RSTRING_PTR(port);
|
214
|
+
len = RSTRING_LEN(port);
|
215
|
+
p[len++] = 0;
|
216
|
+
done = 1;
|
217
|
+
}
|
218
|
+
|
219
|
+
pe = p + len;
|
220
|
+
// if (done) {
|
221
|
+
// // This triggers the eof action in the non-scanner version.
|
222
|
+
// eof = pe;
|
223
|
+
// }
|
224
|
+
|
225
|
+
#line 226 "ext/fastcsv/fastcsv.c"
|
226
|
+
{
|
227
|
+
if ( p == pe )
|
228
|
+
goto _test_eof;
|
229
|
+
switch ( cs )
|
230
|
+
{
|
231
|
+
tr0:
|
232
|
+
#line 1 "NONE"
|
233
|
+
{ switch( act ) {
|
234
|
+
case 0:
|
235
|
+
{{goto st0;}}
|
236
|
+
break;
|
237
|
+
default:
|
238
|
+
{{p = ((te))-1;}}
|
239
|
+
break;
|
240
|
+
}
|
241
|
+
}
|
242
|
+
goto st4;
|
243
|
+
tr10:
|
244
|
+
#line 105 "ext/fastcsv/fastcsv.rl"
|
245
|
+
{
|
246
|
+
if (!NIL_P(field) || RARRAY_LEN(row)) {
|
247
|
+
rb_ary_push(row, field);
|
248
|
+
}
|
249
|
+
if (RARRAY_LEN(row)) {
|
250
|
+
rb_yield(row);
|
251
|
+
}
|
252
|
+
}
|
253
|
+
#line 129 "ext/fastcsv/fastcsv.rl"
|
254
|
+
{te = p+1;}
|
255
|
+
goto st4;
|
256
|
+
tr16:
|
257
|
+
#line 129 "ext/fastcsv/fastcsv.rl"
|
258
|
+
{te = p;p--;}
|
259
|
+
goto st4;
|
260
|
+
tr17:
|
261
|
+
#line 128 "ext/fastcsv/fastcsv.rl"
|
262
|
+
{te = p;p--;}
|
263
|
+
goto st4;
|
264
|
+
tr18:
|
265
|
+
#line 105 "ext/fastcsv/fastcsv.rl"
|
266
|
+
{
|
267
|
+
if (!NIL_P(field) || RARRAY_LEN(row)) {
|
268
|
+
rb_ary_push(row, field);
|
269
|
+
}
|
270
|
+
if (RARRAY_LEN(row)) {
|
271
|
+
rb_yield(row);
|
272
|
+
}
|
273
|
+
}
|
274
|
+
#line 128 "ext/fastcsv/fastcsv.rl"
|
275
|
+
{te = p+1;}
|
276
|
+
goto st4;
|
277
|
+
tr20:
|
278
|
+
#line 127 "ext/fastcsv/fastcsv.rl"
|
279
|
+
{te = p;p--;}
|
280
|
+
goto st4;
|
281
|
+
tr21:
|
282
|
+
#line 105 "ext/fastcsv/fastcsv.rl"
|
283
|
+
{
|
284
|
+
if (!NIL_P(field) || RARRAY_LEN(row)) {
|
285
|
+
rb_ary_push(row, field);
|
286
|
+
}
|
287
|
+
if (RARRAY_LEN(row)) {
|
288
|
+
rb_yield(row);
|
289
|
+
}
|
290
|
+
}
|
291
|
+
#line 127 "ext/fastcsv/fastcsv.rl"
|
292
|
+
{te = p+1;}
|
293
|
+
goto st4;
|
294
|
+
st4:
|
295
|
+
#line 1 "NONE"
|
296
|
+
{ts = 0;}
|
297
|
+
#line 1 "NONE"
|
298
|
+
{act = 0;}
|
299
|
+
if ( ++p == pe )
|
300
|
+
goto _test_eof4;
|
301
|
+
case 4:
|
302
|
+
#line 1 "NONE"
|
303
|
+
{ts = p;}
|
304
|
+
#line 305 "ext/fastcsv/fastcsv.c"
|
305
|
+
switch( (*p) ) {
|
306
|
+
case 0: goto tr14;
|
307
|
+
case 10: goto tr3;
|
308
|
+
case 13: goto tr4;
|
309
|
+
case 34: goto tr15;
|
310
|
+
case 44: goto tr5;
|
311
|
+
}
|
312
|
+
goto st1;
|
313
|
+
st1:
|
314
|
+
if ( ++p == pe )
|
315
|
+
goto _test_eof1;
|
316
|
+
case 1:
|
317
|
+
switch( (*p) ) {
|
318
|
+
case 0: goto tr2;
|
319
|
+
case 10: goto tr3;
|
320
|
+
case 13: goto tr4;
|
321
|
+
case 34: goto tr0;
|
322
|
+
case 44: goto tr5;
|
323
|
+
}
|
324
|
+
goto st1;
|
325
|
+
tr2:
|
326
|
+
#line 1 "NONE"
|
327
|
+
{te = p+1;}
|
328
|
+
#line 44 "ext/fastcsv/fastcsv.rl"
|
329
|
+
{
|
330
|
+
if (p == ts) {
|
331
|
+
// Unquoted empty fields are nil, not "", in Ruby.
|
332
|
+
field = Qnil;
|
333
|
+
}
|
334
|
+
else if (p > ts) {
|
335
|
+
field = rb_str_new(ts, p - ts);
|
336
|
+
ASSOCIATE_INDEX;
|
337
|
+
}
|
338
|
+
}
|
339
|
+
#line 105 "ext/fastcsv/fastcsv.rl"
|
340
|
+
{
|
341
|
+
if (!NIL_P(field) || RARRAY_LEN(row)) {
|
342
|
+
rb_ary_push(row, field);
|
343
|
+
}
|
344
|
+
if (RARRAY_LEN(row)) {
|
345
|
+
rb_yield(row);
|
346
|
+
}
|
347
|
+
}
|
348
|
+
#line 129 "ext/fastcsv/fastcsv.rl"
|
349
|
+
{act = 3;}
|
350
|
+
goto st5;
|
351
|
+
st5:
|
352
|
+
if ( ++p == pe )
|
353
|
+
goto _test_eof5;
|
354
|
+
case 5:
|
355
|
+
#line 356 "ext/fastcsv/fastcsv.c"
|
356
|
+
switch( (*p) ) {
|
357
|
+
case 0: goto tr2;
|
358
|
+
case 10: goto tr3;
|
359
|
+
case 13: goto tr4;
|
360
|
+
case 34: goto tr16;
|
361
|
+
case 44: goto tr5;
|
362
|
+
}
|
363
|
+
goto st1;
|
364
|
+
tr3:
|
365
|
+
#line 44 "ext/fastcsv/fastcsv.rl"
|
366
|
+
{
|
367
|
+
if (p == ts) {
|
368
|
+
// Unquoted empty fields are nil, not "", in Ruby.
|
369
|
+
field = Qnil;
|
370
|
+
}
|
371
|
+
else if (p > ts) {
|
372
|
+
field = rb_str_new(ts, p - ts);
|
373
|
+
ASSOCIATE_INDEX;
|
374
|
+
}
|
375
|
+
}
|
376
|
+
#line 95 "ext/fastcsv/fastcsv.rl"
|
377
|
+
{
|
378
|
+
if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field
|
379
|
+
rb_ary_push(row, field);
|
380
|
+
field = Qnil;
|
381
|
+
}
|
382
|
+
|
383
|
+
rb_yield(row);
|
384
|
+
row = rb_ary_new();
|
385
|
+
}
|
386
|
+
#line 32 "ext/fastcsv/fastcsv.rl"
|
387
|
+
{
|
388
|
+
curline++;
|
389
|
+
}
|
390
|
+
goto st6;
|
391
|
+
tr19:
|
392
|
+
#line 32 "ext/fastcsv/fastcsv.rl"
|
393
|
+
{
|
394
|
+
curline++;
|
395
|
+
}
|
396
|
+
goto st6;
|
397
|
+
tr11:
|
398
|
+
#line 95 "ext/fastcsv/fastcsv.rl"
|
399
|
+
{
|
400
|
+
if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field
|
401
|
+
rb_ary_push(row, field);
|
402
|
+
field = Qnil;
|
403
|
+
}
|
404
|
+
|
405
|
+
rb_yield(row);
|
406
|
+
row = rb_ary_new();
|
407
|
+
}
|
408
|
+
#line 32 "ext/fastcsv/fastcsv.rl"
|
409
|
+
{
|
410
|
+
curline++;
|
411
|
+
}
|
412
|
+
goto st6;
|
413
|
+
st6:
|
414
|
+
if ( ++p == pe )
|
415
|
+
goto _test_eof6;
|
416
|
+
case 6:
|
417
|
+
#line 418 "ext/fastcsv/fastcsv.c"
|
418
|
+
if ( (*p) == 0 )
|
419
|
+
goto tr18;
|
420
|
+
goto tr17;
|
421
|
+
tr4:
|
422
|
+
#line 44 "ext/fastcsv/fastcsv.rl"
|
423
|
+
{
|
424
|
+
if (p == ts) {
|
425
|
+
// Unquoted empty fields are nil, not "", in Ruby.
|
426
|
+
field = Qnil;
|
427
|
+
}
|
428
|
+
else if (p > ts) {
|
429
|
+
field = rb_str_new(ts, p - ts);
|
430
|
+
ASSOCIATE_INDEX;
|
431
|
+
}
|
432
|
+
}
|
433
|
+
#line 95 "ext/fastcsv/fastcsv.rl"
|
434
|
+
{
|
435
|
+
if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field
|
436
|
+
rb_ary_push(row, field);
|
437
|
+
field = Qnil;
|
438
|
+
}
|
439
|
+
|
440
|
+
rb_yield(row);
|
441
|
+
row = rb_ary_new();
|
442
|
+
}
|
443
|
+
#line 32 "ext/fastcsv/fastcsv.rl"
|
444
|
+
{
|
445
|
+
curline++;
|
446
|
+
}
|
447
|
+
goto st7;
|
448
|
+
tr12:
|
449
|
+
#line 95 "ext/fastcsv/fastcsv.rl"
|
450
|
+
{
|
451
|
+
if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field
|
452
|
+
rb_ary_push(row, field);
|
453
|
+
field = Qnil;
|
454
|
+
}
|
455
|
+
|
456
|
+
rb_yield(row);
|
457
|
+
row = rb_ary_new();
|
458
|
+
}
|
459
|
+
#line 32 "ext/fastcsv/fastcsv.rl"
|
460
|
+
{
|
461
|
+
curline++;
|
462
|
+
}
|
463
|
+
goto st7;
|
464
|
+
st7:
|
465
|
+
if ( ++p == pe )
|
466
|
+
goto _test_eof7;
|
467
|
+
case 7:
|
468
|
+
#line 469 "ext/fastcsv/fastcsv.c"
|
469
|
+
switch( (*p) ) {
|
470
|
+
case 0: goto tr18;
|
471
|
+
case 10: goto tr19;
|
472
|
+
}
|
473
|
+
goto tr17;
|
474
|
+
tr5:
|
475
|
+
#line 44 "ext/fastcsv/fastcsv.rl"
|
476
|
+
{
|
477
|
+
if (p == ts) {
|
478
|
+
// Unquoted empty fields are nil, not "", in Ruby.
|
479
|
+
field = Qnil;
|
480
|
+
}
|
481
|
+
else if (p > ts) {
|
482
|
+
field = rb_str_new(ts, p - ts);
|
483
|
+
ASSOCIATE_INDEX;
|
484
|
+
}
|
485
|
+
}
|
486
|
+
#line 90 "ext/fastcsv/fastcsv.rl"
|
487
|
+
{
|
488
|
+
rb_ary_push(row, field);
|
489
|
+
field = Qnil;
|
490
|
+
}
|
491
|
+
goto st8;
|
492
|
+
tr13:
|
493
|
+
#line 90 "ext/fastcsv/fastcsv.rl"
|
494
|
+
{
|
495
|
+
rb_ary_push(row, field);
|
496
|
+
field = Qnil;
|
497
|
+
}
|
498
|
+
goto st8;
|
499
|
+
st8:
|
500
|
+
if ( ++p == pe )
|
501
|
+
goto _test_eof8;
|
502
|
+
case 8:
|
503
|
+
#line 504 "ext/fastcsv/fastcsv.c"
|
504
|
+
if ( (*p) == 0 )
|
505
|
+
goto tr21;
|
506
|
+
goto tr20;
|
507
|
+
tr14:
|
508
|
+
#line 1 "NONE"
|
509
|
+
{te = p+1;}
|
510
|
+
#line 105 "ext/fastcsv/fastcsv.rl"
|
511
|
+
{
|
512
|
+
if (!NIL_P(field) || RARRAY_LEN(row)) {
|
513
|
+
rb_ary_push(row, field);
|
514
|
+
}
|
515
|
+
if (RARRAY_LEN(row)) {
|
516
|
+
rb_yield(row);
|
517
|
+
}
|
518
|
+
}
|
519
|
+
#line 44 "ext/fastcsv/fastcsv.rl"
|
520
|
+
{
|
521
|
+
if (p == ts) {
|
522
|
+
// Unquoted empty fields are nil, not "", in Ruby.
|
523
|
+
field = Qnil;
|
524
|
+
}
|
525
|
+
else if (p > ts) {
|
526
|
+
field = rb_str_new(ts, p - ts);
|
527
|
+
ASSOCIATE_INDEX;
|
528
|
+
}
|
529
|
+
}
|
530
|
+
#line 129 "ext/fastcsv/fastcsv.rl"
|
531
|
+
{act = 3;}
|
532
|
+
goto st9;
|
533
|
+
st9:
|
534
|
+
if ( ++p == pe )
|
535
|
+
goto _test_eof9;
|
536
|
+
case 9:
|
537
|
+
#line 538 "ext/fastcsv/fastcsv.c"
|
538
|
+
switch( (*p) ) {
|
539
|
+
case 10: goto tr16;
|
540
|
+
case 13: goto tr16;
|
541
|
+
case 34: goto tr16;
|
542
|
+
case 44: goto tr16;
|
543
|
+
}
|
544
|
+
goto st1;
|
545
|
+
tr8:
|
546
|
+
#line 32 "ext/fastcsv/fastcsv.rl"
|
547
|
+
{
|
548
|
+
curline++;
|
549
|
+
}
|
550
|
+
goto st2;
|
551
|
+
tr15:
|
552
|
+
#line 36 "ext/fastcsv/fastcsv.rl"
|
553
|
+
{
|
554
|
+
unclosed_line = curline;
|
555
|
+
}
|
556
|
+
goto st2;
|
557
|
+
st2:
|
558
|
+
if ( ++p == pe )
|
559
|
+
goto _test_eof2;
|
560
|
+
case 2:
|
561
|
+
#line 562 "ext/fastcsv/fastcsv.c"
|
562
|
+
switch( (*p) ) {
|
563
|
+
case 0: goto st0;
|
564
|
+
case 10: goto tr8;
|
565
|
+
case 13: goto tr8;
|
566
|
+
case 34: goto tr9;
|
567
|
+
}
|
568
|
+
goto st2;
|
569
|
+
st0:
|
570
|
+
cs = 0;
|
571
|
+
goto _out;
|
572
|
+
tr9:
|
573
|
+
#line 55 "ext/fastcsv/fastcsv.rl"
|
574
|
+
{
|
575
|
+
if (p == ts) {
|
576
|
+
field = rb_str_new2("");
|
577
|
+
ASSOCIATE_INDEX;
|
578
|
+
}
|
579
|
+
// @note If we add an action on '""', we can skip some steps if no '""' is found.
|
580
|
+
else if (p > ts) {
|
581
|
+
// Operating on ts in-place produces odd behavior, FYI.
|
582
|
+
char *copy = ALLOC_N(char, p - ts);
|
583
|
+
memcpy(copy, ts, p - ts);
|
584
|
+
|
585
|
+
char *reader = ts, *writer = copy;
|
586
|
+
int escaped = 0;
|
587
|
+
|
588
|
+
while (p > reader) {
|
589
|
+
if (*reader == quote_char && !escaped) {
|
590
|
+
// Skip the escaping character.
|
591
|
+
escaped = 1;
|
592
|
+
}
|
593
|
+
else {
|
594
|
+
escaped = 0;
|
595
|
+
*writer++ = *reader;
|
596
|
+
}
|
597
|
+
reader++;
|
598
|
+
}
|
599
|
+
|
600
|
+
field = rb_str_new(copy, writer - copy);
|
601
|
+
ASSOCIATE_INDEX;
|
602
|
+
|
603
|
+
if (copy != NULL) {
|
604
|
+
free(copy);
|
605
|
+
}
|
606
|
+
}
|
607
|
+
}
|
608
|
+
#line 40 "ext/fastcsv/fastcsv.rl"
|
609
|
+
{
|
610
|
+
unclosed_line = 0;
|
611
|
+
}
|
612
|
+
goto st3;
|
613
|
+
st3:
|
614
|
+
if ( ++p == pe )
|
615
|
+
goto _test_eof3;
|
616
|
+
case 3:
|
617
|
+
#line 618 "ext/fastcsv/fastcsv.c"
|
618
|
+
switch( (*p) ) {
|
619
|
+
case 0: goto tr10;
|
620
|
+
case 10: goto tr11;
|
621
|
+
case 13: goto tr12;
|
622
|
+
case 34: goto st2;
|
623
|
+
case 44: goto tr13;
|
624
|
+
}
|
625
|
+
goto st0;
|
626
|
+
}
|
627
|
+
_test_eof4: cs = 4; goto _test_eof;
|
628
|
+
_test_eof1: cs = 1; goto _test_eof;
|
629
|
+
_test_eof5: cs = 5; goto _test_eof;
|
630
|
+
_test_eof6: cs = 6; goto _test_eof;
|
631
|
+
_test_eof7: cs = 7; goto _test_eof;
|
632
|
+
_test_eof8: cs = 8; goto _test_eof;
|
633
|
+
_test_eof9: cs = 9; goto _test_eof;
|
634
|
+
_test_eof2: cs = 2; goto _test_eof;
|
635
|
+
_test_eof3: cs = 3; goto _test_eof;
|
636
|
+
|
637
|
+
_test_eof: {}
|
638
|
+
if ( p == eof )
|
639
|
+
{
|
640
|
+
switch ( cs ) {
|
641
|
+
case 1: goto tr0;
|
642
|
+
case 5: goto tr16;
|
643
|
+
case 6: goto tr17;
|
644
|
+
case 7: goto tr17;
|
645
|
+
case 8: goto tr20;
|
646
|
+
case 9: goto tr16;
|
647
|
+
}
|
648
|
+
}
|
649
|
+
|
650
|
+
_out: {}
|
651
|
+
}
|
652
|
+
|
653
|
+
#line 313 "ext/fastcsv/fastcsv.rl"
|
654
|
+
|
655
|
+
if (done && cs < fastcsv_first_final) {
|
656
|
+
if (buf != NULL) {
|
657
|
+
free(buf);
|
658
|
+
}
|
659
|
+
if (unclosed_line) {
|
660
|
+
rb_raise(rb_eParseError, "Unclosed quoted field on line %d.", unclosed_line);
|
661
|
+
}
|
662
|
+
// Ruby raises different errors for illegal quoting, depending on whether
|
663
|
+
// a quoted string is followed by a string ("Unclosed quoted field on line
|
664
|
+
// %d.") or by a string ending in a quote ("Missing or stray quote in line
|
665
|
+
// %d"). These precisions are kind of bogus, but we can try using $!.
|
666
|
+
else {
|
667
|
+
rb_raise(rb_eParseError, "Illegal quoting in line %d.", curline);
|
668
|
+
}
|
669
|
+
}
|
670
|
+
|
671
|
+
if (ts == 0) {
|
672
|
+
have = 0;
|
673
|
+
}
|
674
|
+
else if (io) {
|
675
|
+
have = pe - ts;
|
676
|
+
memmove(buf, ts, have);
|
677
|
+
te = buf + (te - ts);
|
678
|
+
ts = buf;
|
679
|
+
}
|
680
|
+
}
|
681
|
+
|
682
|
+
if (buf != NULL) {
|
683
|
+
free(buf);
|
684
|
+
}
|
685
|
+
|
686
|
+
return Qnil;
|
687
|
+
}
|
688
|
+
|
689
|
+
void Init_fastcsv() {
|
690
|
+
s_read = rb_intern("read");
|
691
|
+
s_to_str = rb_intern("to_str");
|
692
|
+
|
693
|
+
mModule = rb_define_module("FastCSV");
|
694
|
+
rb_define_attr(rb_singleton_class(mModule), "buffer_size", 1, 1);
|
695
|
+
rb_define_singleton_method(mModule, "raw_parse", fastcsv, -1);
|
696
|
+
rb_eParseError = rb_define_class_under(mModule, "ParseError", rb_eStandardError);
|
697
|
+
}
|