fastcsv 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +8 -0
- data/.rspec +2 -0
- data/Gemfile +4 -0
- data/LICENSE +20 -0
- data/README.md +56 -0
- data/Rakefile +21 -0
- data/USAGE +1 -0
- data/ext/fastcsv/extconf.rb +3 -0
- data/ext/fastcsv/fastcsv.c +697 -0
- data/ext/fastcsv/fastcsv.rl +356 -0
- data/fastcsv.gemspec +24 -0
- data/lib/fastcsv.rb +1 -0
- data/spec/fastcsv_spec.rb +218 -0
- data/spec/fixtures/iso-8859-1.csv +1 -0
- data/spec/fixtures/utf-8.csv +1 -0
- data/spec/spec_helper.rb +14 -0
- metadata +136 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: e5d991d026c76068b9e646ba62cefdad823f01e1
|
4
|
+
data.tar.gz: 8815f0bb3b00e01593f2a46320cf58c88926099c
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 8762ce01e3e5af4cd0395bf541879db46f677f79201e2f44dc5f35dd30514c53fa2c2c5808ec61149898d64bff908a74fe2153c690a04e75c82ba7306794fa15
|
7
|
+
data.tar.gz: e6dd0a3f89f9d330428fbc8d6f1b469d9f3a8ca255561c6fa972d1ad2147422bd090f3c7a373d08d29b07748f781f9b8d70fae27a092a075d183de897094eea5
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2014 Open North Inc.
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
# FastCSV
|
2
|
+
|
3
|
+
[](http://badge.fury.io/rb/fastcsv)
|
4
|
+
[](https://gemnasium.com/opennorth/fastcsv)
|
5
|
+
|
6
|
+
A fast [Ragel](http://www.colm.net/open-source/ragel/)-based CSV parser.
|
7
|
+
|
8
|
+
## Usage
|
9
|
+
|
10
|
+
```ruby
|
11
|
+
require 'fastcsv'
|
12
|
+
|
13
|
+
# Read from file.
|
14
|
+
File.open(filename) do |f|
|
15
|
+
FastCSV.raw_parse(f) do |row|
|
16
|
+
# do stuff
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
# Read from an IO object.
|
21
|
+
FastCSV.raw_parse(StringIO.new("foo,bar\n")) do |row|
|
22
|
+
# do stuff
|
23
|
+
end
|
24
|
+
|
25
|
+
# Read from a string.
|
26
|
+
FastCSV.raw_parse("foo,bar\n") do |row|
|
27
|
+
# do stuff
|
28
|
+
end
|
29
|
+
|
30
|
+
# Transcode like with the CSV module.
|
31
|
+
FastCSV.raw_parse("\xF1\n", encoding: 'iso-8859-1:utf-8') do |row|
|
32
|
+
# ["ñ"]
|
33
|
+
end
|
34
|
+
```
|
35
|
+
|
36
|
+
## Development
|
37
|
+
|
38
|
+
ragel -G2 ext/fastcsv/fastcsv.rl
|
39
|
+
ragel -Vp ext/fastcsv/fastcsv.rl | dot -Tpng -o machine.png
|
40
|
+
rake compile
|
41
|
+
gem uninstall fastcsv
|
42
|
+
rake install
|
43
|
+
|
44
|
+
## Why?
|
45
|
+
|
46
|
+
We evaluated [many CSV Ruby gems](https://github.com/jpmckinney/csv-benchmark#benchmark), and they were either too slow or had implementation errors. [rcsv](https://github.com/fiksu/rcsv) is fast and [libcsv](http://sourceforge.net/projects/libcsv/)-based, but it skips blank rows (Ruby's CSV module returns an empty array) and silently fails on input with an unclosed quote; nonetheless, it's an excellent alternative if you find errors in FastCSV! We looked for Ragel-based CSV parsers to copy, but they either had implementation errors or could not handle large inputs. [commas](https://github.com/aklt/commas/blob/master/csv.rl) looks good, but it performs a memory check on each character, which is overkill.
|
47
|
+
|
48
|
+
## Bugs? Questions?
|
49
|
+
|
50
|
+
This project's main repository is on GitHub: [http://github.com/opennorth/fastcsv](http://github.com/opennorth/fastcsv), where your contributions, forks, bug reports, feature requests, and feedback are greatly welcomed.
|
51
|
+
|
52
|
+
## Acknowledgements
|
53
|
+
|
54
|
+
Started as a Ruby 2.1 fork of MoonWolf <moonwolf@moonwolf.com>'s CSVScan, found in [this commit](https://github.com/nickstenning/csvscan/commit/11ec30f71a27cc673bca09738ee8a63942f416f0.patch). CSVScan uses Ragel code from [HPricot](https://github.com/hpricot/hpricot/blob/master/ext/hpricot_scan/hpricot_scan.rl) from [this commit](https://github.com/hpricot/hpricot/blob/908a4ae64bc8b935c4415c47ca6aea6492c6ce0a/ext/hpricot_scan/hpricot_scan.rl).
|
55
|
+
|
56
|
+
Copyright (c) 2014 Open North Inc., released under the MIT license
|
data/Rakefile
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'bundler'
|
2
|
+
Bundler::GemHelper.install_tasks
|
3
|
+
|
4
|
+
require 'rake/extensiontask'
|
5
|
+
Rake::ExtensionTask.new('fastcsv') do |ext|
|
6
|
+
ext.lib_dir = 'lib/fastcsv'
|
7
|
+
end
|
8
|
+
|
9
|
+
require 'rspec/core/rake_task'
|
10
|
+
RSpec::Core::RakeTask.new(:spec)
|
11
|
+
|
12
|
+
task :default => :spec
|
13
|
+
|
14
|
+
begin
|
15
|
+
require 'yard'
|
16
|
+
YARD::Rake::YardocTask.new
|
17
|
+
rescue LoadError
|
18
|
+
task :yard do
|
19
|
+
abort 'YARD is not available. In order to run yard, you must: gem install yard'
|
20
|
+
end
|
21
|
+
end
|
data/USAGE
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
See README.md for full usage details.
|
@@ -0,0 +1,697 @@
|
|
1
|
+
|
2
|
+
#line 1 "ext/fastcsv/fastcsv.rl"
|
3
|
+
#include <ruby.h>
|
4
|
+
#include <ruby/encoding.h>
|
5
|
+
// CSV specifications.
|
6
|
+
// http://tools.ietf.org/html/rfc4180
|
7
|
+
// http://w3c.github.io/csvw/syntax/#ebnf
|
8
|
+
|
9
|
+
// CSV implementation.
|
10
|
+
// https://github.com/ruby/ruby/blob/master/lib/csv.rb
|
11
|
+
|
12
|
+
// Ruby C extensions help.
|
13
|
+
// https://github.com/ruby/ruby/blob/trunk/README.EXT
|
14
|
+
// http://rxr.whitequark.org/mri/source
|
15
|
+
|
16
|
+
// Ragel help.
|
17
|
+
// https://www.mail-archive.com/ragel-users@complang.org/
|
18
|
+
|
19
|
+
# define ASSOCIATE_INDEX \
|
20
|
+
if (internal_index >= 0) { \
|
21
|
+
rb_enc_associate_index(field, internal_index); \
|
22
|
+
field = rb_str_encode(field, rb_enc_from_encoding(external_encoding), 0, Qnil); \
|
23
|
+
} \
|
24
|
+
else { \
|
25
|
+
rb_enc_associate_index(field, rb_enc_to_index(external_encoding)); \
|
26
|
+
}
|
27
|
+
|
28
|
+
static VALUE mModule, rb_eParseError;
|
29
|
+
static ID s_read, s_to_str;
|
30
|
+
|
31
|
+
|
32
|
+
#line 139 "ext/fastcsv/fastcsv.rl"
|
33
|
+
|
34
|
+
|
35
|
+
|
36
|
+
#line 37 "ext/fastcsv/fastcsv.c"
|
37
|
+
static const int fastcsv_start = 4;
|
38
|
+
static const int fastcsv_first_final = 4;
|
39
|
+
static const int fastcsv_error = 0;
|
40
|
+
|
41
|
+
static const int fastcsv_en_main = 4;
|
42
|
+
|
43
|
+
|
44
|
+
#line 142 "ext/fastcsv/fastcsv.rl"
|
45
|
+
|
46
|
+
#define BUFSIZE 16384
|
47
|
+
|
48
|
+
VALUE fastcsv(int argc, VALUE *argv, VALUE self) {
|
49
|
+
int cs, act, have = 0, curline = 1, io = 0;
|
50
|
+
char *ts = 0, *te = 0, *buf = 0, *eof = 0;
|
51
|
+
|
52
|
+
VALUE port, opts;
|
53
|
+
VALUE row = rb_ary_new(), field = Qnil, bufsize = Qnil;
|
54
|
+
int done = 0, unclosed_line = 0, buffer_size = 0, taint = 0;
|
55
|
+
int internal_index = 0, external_index = rb_enc_to_index(rb_default_external_encoding());
|
56
|
+
rb_encoding *external_encoding = rb_default_external_encoding();
|
57
|
+
|
58
|
+
VALUE option;
|
59
|
+
char quote_char = '"'; //, *col_sep = ",", *row_sep = "\r\n";
|
60
|
+
|
61
|
+
rb_scan_args(argc, argv, "11", &port, &opts);
|
62
|
+
taint = OBJ_TAINTED(port);
|
63
|
+
io = rb_respond_to(port, s_read);
|
64
|
+
if (!io) {
|
65
|
+
if (rb_respond_to(port, s_to_str)) {
|
66
|
+
port = rb_funcall(port, s_to_str, 0);
|
67
|
+
StringValue(port);
|
68
|
+
}
|
69
|
+
else {
|
70
|
+
rb_raise(rb_eArgError, "data has to respond to #read or #to_str");
|
71
|
+
}
|
72
|
+
}
|
73
|
+
|
74
|
+
if (NIL_P(opts)) {
|
75
|
+
opts = rb_hash_new();
|
76
|
+
}
|
77
|
+
else if (TYPE(opts) != T_HASH) {
|
78
|
+
rb_raise(rb_eArgError, "options has to be a Hash or nil");
|
79
|
+
}
|
80
|
+
|
81
|
+
// @note Add machines for common CSV dialects, or see if we can use "when"
|
82
|
+
// from Chapter 6 to compare the character to the host program's variable.
|
83
|
+
// option = rb_hash_aref(opts, ID2SYM(rb_intern("quote_char")));
|
84
|
+
// if (TYPE(option) == T_STRING && RSTRING_LEN(option) == 1) {
|
85
|
+
// quote_char = *StringValueCStr(option);
|
86
|
+
// }
|
87
|
+
// else if (!NIL_P(option)) {
|
88
|
+
// rb_raise(rb_eArgError, ":quote_char has to be a single character String");
|
89
|
+
// }
|
90
|
+
|
91
|
+
// option = rb_hash_aref(opts, ID2SYM(rb_intern("col_sep")));
|
92
|
+
// if (TYPE(option) == T_STRING) {
|
93
|
+
// col_sep = StringValueCStr(option);
|
94
|
+
// }
|
95
|
+
// else if (!NIL_P(option)) {
|
96
|
+
// rb_raise(rb_eArgError, ":col_sep has to be a String");
|
97
|
+
// }
|
98
|
+
|
99
|
+
// option = rb_hash_aref(opts, ID2SYM(rb_intern("row_sep")));
|
100
|
+
// if (TYPE(option) == T_STRING) {
|
101
|
+
// row_sep = StringValueCStr(option);
|
102
|
+
// }
|
103
|
+
// else if (!NIL_P(option)) {
|
104
|
+
// rb_raise(rb_eArgError, ":row_sep has to be a String");
|
105
|
+
// }
|
106
|
+
|
107
|
+
option = rb_hash_aref(opts, ID2SYM(rb_intern("encoding")));
|
108
|
+
if (TYPE(option) == T_STRING) {
|
109
|
+
// @see parse_mode_enc in Ruby's io.c
|
110
|
+
const char *string = StringValueCStr(option), *pointer;
|
111
|
+
char internal_encoding_name[ENCODING_MAXNAMELEN + 1];
|
112
|
+
|
113
|
+
pointer = strrchr(string, ':');
|
114
|
+
if (pointer) {
|
115
|
+
long len = (pointer++) - string;
|
116
|
+
if (len == 0 || len > ENCODING_MAXNAMELEN) {
|
117
|
+
internal_index = -1;
|
118
|
+
}
|
119
|
+
else {
|
120
|
+
memcpy(internal_encoding_name, string, len);
|
121
|
+
internal_encoding_name[len] = '\0';
|
122
|
+
string = internal_encoding_name;
|
123
|
+
internal_index = rb_enc_find_index(internal_encoding_name);
|
124
|
+
}
|
125
|
+
}
|
126
|
+
else {
|
127
|
+
internal_index = rb_enc_find_index(string);
|
128
|
+
}
|
129
|
+
|
130
|
+
if (internal_index < 0 && internal_index != -2) {
|
131
|
+
rb_warn("Unsupported encoding %s ignored", string);
|
132
|
+
}
|
133
|
+
|
134
|
+
if (pointer) {
|
135
|
+
external_index = rb_enc_find_index(pointer);
|
136
|
+
if (external_index >= 0) {
|
137
|
+
external_encoding = rb_enc_from_index(external_index);
|
138
|
+
}
|
139
|
+
else {
|
140
|
+
rb_warn("Unsupported encoding %s ignored", string);
|
141
|
+
}
|
142
|
+
}
|
143
|
+
else if (internal_index >= 0) {
|
144
|
+
external_encoding = rb_enc_from_index(internal_index);
|
145
|
+
}
|
146
|
+
}
|
147
|
+
else if (!NIL_P(option)) {
|
148
|
+
rb_raise(rb_eArgError, ":encoding has to be a String");
|
149
|
+
}
|
150
|
+
|
151
|
+
buffer_size = BUFSIZE;
|
152
|
+
if (rb_ivar_defined(self, rb_intern("@buffer_size")) == Qtrue) {
|
153
|
+
bufsize = rb_ivar_get(self, rb_intern("@buffer_size"));
|
154
|
+
if (!NIL_P(bufsize)) {
|
155
|
+
buffer_size = NUM2INT(bufsize);
|
156
|
+
}
|
157
|
+
}
|
158
|
+
|
159
|
+
if (io) {
|
160
|
+
buf = ALLOC_N(char, buffer_size);
|
161
|
+
}
|
162
|
+
|
163
|
+
|
164
|
+
#line 165 "ext/fastcsv/fastcsv.c"
|
165
|
+
{
|
166
|
+
cs = fastcsv_start;
|
167
|
+
ts = 0;
|
168
|
+
te = 0;
|
169
|
+
act = 0;
|
170
|
+
}
|
171
|
+
|
172
|
+
#line 261 "ext/fastcsv/fastcsv.rl"
|
173
|
+
|
174
|
+
while (!done) {
|
175
|
+
VALUE str;
|
176
|
+
char *p, *pe;
|
177
|
+
int len, space = buffer_size - have, tokstart_diff, tokend_diff;
|
178
|
+
|
179
|
+
if (io) {
|
180
|
+
if (space == 0) {
|
181
|
+
tokstart_diff = ts - buf;
|
182
|
+
tokend_diff = te - buf;
|
183
|
+
|
184
|
+
buffer_size += BUFSIZE;
|
185
|
+
REALLOC_N(buf, char, buffer_size);
|
186
|
+
|
187
|
+
space = buffer_size - have;
|
188
|
+
|
189
|
+
ts = buf + tokstart_diff;
|
190
|
+
te = buf + tokend_diff;
|
191
|
+
}
|
192
|
+
p = buf + have;
|
193
|
+
|
194
|
+
str = rb_funcall(port, s_read, 1, INT2FIX(space));
|
195
|
+
if (NIL_P(str)) {
|
196
|
+
// StringIO#read returns nil for empty string.
|
197
|
+
len = 0;
|
198
|
+
}
|
199
|
+
else {
|
200
|
+
len = RSTRING_LEN(str);
|
201
|
+
memcpy(p, StringValuePtr(str), len);
|
202
|
+
}
|
203
|
+
|
204
|
+
if (len < space) {
|
205
|
+
// EOF actions don't work in scanners, so we add a sentinel value.
|
206
|
+
// @see http://www.complang.org/pipermail/ragel-users/2007-May/001516.html
|
207
|
+
// @see https://github.com/leeonix/lua-csv-ragel/blob/master/src/csv.rl
|
208
|
+
p[len++] = 0;
|
209
|
+
done = 1;
|
210
|
+
}
|
211
|
+
}
|
212
|
+
else {
|
213
|
+
p = RSTRING_PTR(port);
|
214
|
+
len = RSTRING_LEN(port);
|
215
|
+
p[len++] = 0;
|
216
|
+
done = 1;
|
217
|
+
}
|
218
|
+
|
219
|
+
pe = p + len;
|
220
|
+
// if (done) {
|
221
|
+
// // This triggers the eof action in the non-scanner version.
|
222
|
+
// eof = pe;
|
223
|
+
// }
|
224
|
+
|
225
|
+
#line 226 "ext/fastcsv/fastcsv.c"
|
226
|
+
{
|
227
|
+
if ( p == pe )
|
228
|
+
goto _test_eof;
|
229
|
+
switch ( cs )
|
230
|
+
{
|
231
|
+
tr0:
|
232
|
+
#line 1 "NONE"
|
233
|
+
{ switch( act ) {
|
234
|
+
case 0:
|
235
|
+
{{goto st0;}}
|
236
|
+
break;
|
237
|
+
default:
|
238
|
+
{{p = ((te))-1;}}
|
239
|
+
break;
|
240
|
+
}
|
241
|
+
}
|
242
|
+
goto st4;
|
243
|
+
tr10:
|
244
|
+
#line 105 "ext/fastcsv/fastcsv.rl"
|
245
|
+
{
|
246
|
+
if (!NIL_P(field) || RARRAY_LEN(row)) {
|
247
|
+
rb_ary_push(row, field);
|
248
|
+
}
|
249
|
+
if (RARRAY_LEN(row)) {
|
250
|
+
rb_yield(row);
|
251
|
+
}
|
252
|
+
}
|
253
|
+
#line 129 "ext/fastcsv/fastcsv.rl"
|
254
|
+
{te = p+1;}
|
255
|
+
goto st4;
|
256
|
+
tr16:
|
257
|
+
#line 129 "ext/fastcsv/fastcsv.rl"
|
258
|
+
{te = p;p--;}
|
259
|
+
goto st4;
|
260
|
+
tr17:
|
261
|
+
#line 128 "ext/fastcsv/fastcsv.rl"
|
262
|
+
{te = p;p--;}
|
263
|
+
goto st4;
|
264
|
+
tr18:
|
265
|
+
#line 105 "ext/fastcsv/fastcsv.rl"
|
266
|
+
{
|
267
|
+
if (!NIL_P(field) || RARRAY_LEN(row)) {
|
268
|
+
rb_ary_push(row, field);
|
269
|
+
}
|
270
|
+
if (RARRAY_LEN(row)) {
|
271
|
+
rb_yield(row);
|
272
|
+
}
|
273
|
+
}
|
274
|
+
#line 128 "ext/fastcsv/fastcsv.rl"
|
275
|
+
{te = p+1;}
|
276
|
+
goto st4;
|
277
|
+
tr20:
|
278
|
+
#line 127 "ext/fastcsv/fastcsv.rl"
|
279
|
+
{te = p;p--;}
|
280
|
+
goto st4;
|
281
|
+
tr21:
|
282
|
+
#line 105 "ext/fastcsv/fastcsv.rl"
|
283
|
+
{
|
284
|
+
if (!NIL_P(field) || RARRAY_LEN(row)) {
|
285
|
+
rb_ary_push(row, field);
|
286
|
+
}
|
287
|
+
if (RARRAY_LEN(row)) {
|
288
|
+
rb_yield(row);
|
289
|
+
}
|
290
|
+
}
|
291
|
+
#line 127 "ext/fastcsv/fastcsv.rl"
|
292
|
+
{te = p+1;}
|
293
|
+
goto st4;
|
294
|
+
st4:
|
295
|
+
#line 1 "NONE"
|
296
|
+
{ts = 0;}
|
297
|
+
#line 1 "NONE"
|
298
|
+
{act = 0;}
|
299
|
+
if ( ++p == pe )
|
300
|
+
goto _test_eof4;
|
301
|
+
case 4:
|
302
|
+
#line 1 "NONE"
|
303
|
+
{ts = p;}
|
304
|
+
#line 305 "ext/fastcsv/fastcsv.c"
|
305
|
+
switch( (*p) ) {
|
306
|
+
case 0: goto tr14;
|
307
|
+
case 10: goto tr3;
|
308
|
+
case 13: goto tr4;
|
309
|
+
case 34: goto tr15;
|
310
|
+
case 44: goto tr5;
|
311
|
+
}
|
312
|
+
goto st1;
|
313
|
+
st1:
|
314
|
+
if ( ++p == pe )
|
315
|
+
goto _test_eof1;
|
316
|
+
case 1:
|
317
|
+
switch( (*p) ) {
|
318
|
+
case 0: goto tr2;
|
319
|
+
case 10: goto tr3;
|
320
|
+
case 13: goto tr4;
|
321
|
+
case 34: goto tr0;
|
322
|
+
case 44: goto tr5;
|
323
|
+
}
|
324
|
+
goto st1;
|
325
|
+
tr2:
|
326
|
+
#line 1 "NONE"
|
327
|
+
{te = p+1;}
|
328
|
+
#line 44 "ext/fastcsv/fastcsv.rl"
|
329
|
+
{
|
330
|
+
if (p == ts) {
|
331
|
+
// Unquoted empty fields are nil, not "", in Ruby.
|
332
|
+
field = Qnil;
|
333
|
+
}
|
334
|
+
else if (p > ts) {
|
335
|
+
field = rb_str_new(ts, p - ts);
|
336
|
+
ASSOCIATE_INDEX;
|
337
|
+
}
|
338
|
+
}
|
339
|
+
#line 105 "ext/fastcsv/fastcsv.rl"
|
340
|
+
{
|
341
|
+
if (!NIL_P(field) || RARRAY_LEN(row)) {
|
342
|
+
rb_ary_push(row, field);
|
343
|
+
}
|
344
|
+
if (RARRAY_LEN(row)) {
|
345
|
+
rb_yield(row);
|
346
|
+
}
|
347
|
+
}
|
348
|
+
#line 129 "ext/fastcsv/fastcsv.rl"
|
349
|
+
{act = 3;}
|
350
|
+
goto st5;
|
351
|
+
st5:
|
352
|
+
if ( ++p == pe )
|
353
|
+
goto _test_eof5;
|
354
|
+
case 5:
|
355
|
+
#line 356 "ext/fastcsv/fastcsv.c"
|
356
|
+
switch( (*p) ) {
|
357
|
+
case 0: goto tr2;
|
358
|
+
case 10: goto tr3;
|
359
|
+
case 13: goto tr4;
|
360
|
+
case 34: goto tr16;
|
361
|
+
case 44: goto tr5;
|
362
|
+
}
|
363
|
+
goto st1;
|
364
|
+
tr3:
|
365
|
+
#line 44 "ext/fastcsv/fastcsv.rl"
|
366
|
+
{
|
367
|
+
if (p == ts) {
|
368
|
+
// Unquoted empty fields are nil, not "", in Ruby.
|
369
|
+
field = Qnil;
|
370
|
+
}
|
371
|
+
else if (p > ts) {
|
372
|
+
field = rb_str_new(ts, p - ts);
|
373
|
+
ASSOCIATE_INDEX;
|
374
|
+
}
|
375
|
+
}
|
376
|
+
#line 95 "ext/fastcsv/fastcsv.rl"
|
377
|
+
{
|
378
|
+
if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field
|
379
|
+
rb_ary_push(row, field);
|
380
|
+
field = Qnil;
|
381
|
+
}
|
382
|
+
|
383
|
+
rb_yield(row);
|
384
|
+
row = rb_ary_new();
|
385
|
+
}
|
386
|
+
#line 32 "ext/fastcsv/fastcsv.rl"
|
387
|
+
{
|
388
|
+
curline++;
|
389
|
+
}
|
390
|
+
goto st6;
|
391
|
+
tr19:
|
392
|
+
#line 32 "ext/fastcsv/fastcsv.rl"
|
393
|
+
{
|
394
|
+
curline++;
|
395
|
+
}
|
396
|
+
goto st6;
|
397
|
+
tr11:
|
398
|
+
#line 95 "ext/fastcsv/fastcsv.rl"
|
399
|
+
{
|
400
|
+
if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field
|
401
|
+
rb_ary_push(row, field);
|
402
|
+
field = Qnil;
|
403
|
+
}
|
404
|
+
|
405
|
+
rb_yield(row);
|
406
|
+
row = rb_ary_new();
|
407
|
+
}
|
408
|
+
#line 32 "ext/fastcsv/fastcsv.rl"
|
409
|
+
{
|
410
|
+
curline++;
|
411
|
+
}
|
412
|
+
goto st6;
|
413
|
+
st6:
|
414
|
+
if ( ++p == pe )
|
415
|
+
goto _test_eof6;
|
416
|
+
case 6:
|
417
|
+
#line 418 "ext/fastcsv/fastcsv.c"
|
418
|
+
if ( (*p) == 0 )
|
419
|
+
goto tr18;
|
420
|
+
goto tr17;
|
421
|
+
tr4:
|
422
|
+
#line 44 "ext/fastcsv/fastcsv.rl"
|
423
|
+
{
|
424
|
+
if (p == ts) {
|
425
|
+
// Unquoted empty fields are nil, not "", in Ruby.
|
426
|
+
field = Qnil;
|
427
|
+
}
|
428
|
+
else if (p > ts) {
|
429
|
+
field = rb_str_new(ts, p - ts);
|
430
|
+
ASSOCIATE_INDEX;
|
431
|
+
}
|
432
|
+
}
|
433
|
+
#line 95 "ext/fastcsv/fastcsv.rl"
|
434
|
+
{
|
435
|
+
if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field
|
436
|
+
rb_ary_push(row, field);
|
437
|
+
field = Qnil;
|
438
|
+
}
|
439
|
+
|
440
|
+
rb_yield(row);
|
441
|
+
row = rb_ary_new();
|
442
|
+
}
|
443
|
+
#line 32 "ext/fastcsv/fastcsv.rl"
|
444
|
+
{
|
445
|
+
curline++;
|
446
|
+
}
|
447
|
+
goto st7;
|
448
|
+
tr12:
|
449
|
+
#line 95 "ext/fastcsv/fastcsv.rl"
|
450
|
+
{
|
451
|
+
if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field
|
452
|
+
rb_ary_push(row, field);
|
453
|
+
field = Qnil;
|
454
|
+
}
|
455
|
+
|
456
|
+
rb_yield(row);
|
457
|
+
row = rb_ary_new();
|
458
|
+
}
|
459
|
+
#line 32 "ext/fastcsv/fastcsv.rl"
|
460
|
+
{
|
461
|
+
curline++;
|
462
|
+
}
|
463
|
+
goto st7;
|
464
|
+
st7:
|
465
|
+
if ( ++p == pe )
|
466
|
+
goto _test_eof7;
|
467
|
+
case 7:
|
468
|
+
#line 469 "ext/fastcsv/fastcsv.c"
|
469
|
+
switch( (*p) ) {
|
470
|
+
case 0: goto tr18;
|
471
|
+
case 10: goto tr19;
|
472
|
+
}
|
473
|
+
goto tr17;
|
474
|
+
tr5:
|
475
|
+
#line 44 "ext/fastcsv/fastcsv.rl"
|
476
|
+
{
|
477
|
+
if (p == ts) {
|
478
|
+
// Unquoted empty fields are nil, not "", in Ruby.
|
479
|
+
field = Qnil;
|
480
|
+
}
|
481
|
+
else if (p > ts) {
|
482
|
+
field = rb_str_new(ts, p - ts);
|
483
|
+
ASSOCIATE_INDEX;
|
484
|
+
}
|
485
|
+
}
|
486
|
+
#line 90 "ext/fastcsv/fastcsv.rl"
|
487
|
+
{
|
488
|
+
rb_ary_push(row, field);
|
489
|
+
field = Qnil;
|
490
|
+
}
|
491
|
+
goto st8;
|
492
|
+
tr13:
|
493
|
+
#line 90 "ext/fastcsv/fastcsv.rl"
|
494
|
+
{
|
495
|
+
rb_ary_push(row, field);
|
496
|
+
field = Qnil;
|
497
|
+
}
|
498
|
+
goto st8;
|
499
|
+
st8:
|
500
|
+
if ( ++p == pe )
|
501
|
+
goto _test_eof8;
|
502
|
+
case 8:
|
503
|
+
#line 504 "ext/fastcsv/fastcsv.c"
|
504
|
+
if ( (*p) == 0 )
|
505
|
+
goto tr21;
|
506
|
+
goto tr20;
|
507
|
+
tr14:
|
508
|
+
#line 1 "NONE"
|
509
|
+
{te = p+1;}
|
510
|
+
#line 105 "ext/fastcsv/fastcsv.rl"
|
511
|
+
{
|
512
|
+
if (!NIL_P(field) || RARRAY_LEN(row)) {
|
513
|
+
rb_ary_push(row, field);
|
514
|
+
}
|
515
|
+
if (RARRAY_LEN(row)) {
|
516
|
+
rb_yield(row);
|
517
|
+
}
|
518
|
+
}
|
519
|
+
#line 44 "ext/fastcsv/fastcsv.rl"
|
520
|
+
{
|
521
|
+
if (p == ts) {
|
522
|
+
// Unquoted empty fields are nil, not "", in Ruby.
|
523
|
+
field = Qnil;
|
524
|
+
}
|
525
|
+
else if (p > ts) {
|
526
|
+
field = rb_str_new(ts, p - ts);
|
527
|
+
ASSOCIATE_INDEX;
|
528
|
+
}
|
529
|
+
}
|
530
|
+
#line 129 "ext/fastcsv/fastcsv.rl"
|
531
|
+
{act = 3;}
|
532
|
+
goto st9;
|
533
|
+
st9:
|
534
|
+
if ( ++p == pe )
|
535
|
+
goto _test_eof9;
|
536
|
+
case 9:
|
537
|
+
#line 538 "ext/fastcsv/fastcsv.c"
|
538
|
+
switch( (*p) ) {
|
539
|
+
case 10: goto tr16;
|
540
|
+
case 13: goto tr16;
|
541
|
+
case 34: goto tr16;
|
542
|
+
case 44: goto tr16;
|
543
|
+
}
|
544
|
+
goto st1;
|
545
|
+
tr8:
|
546
|
+
#line 32 "ext/fastcsv/fastcsv.rl"
|
547
|
+
{
|
548
|
+
curline++;
|
549
|
+
}
|
550
|
+
goto st2;
|
551
|
+
tr15:
|
552
|
+
#line 36 "ext/fastcsv/fastcsv.rl"
|
553
|
+
{
|
554
|
+
unclosed_line = curline;
|
555
|
+
}
|
556
|
+
goto st2;
|
557
|
+
st2:
|
558
|
+
if ( ++p == pe )
|
559
|
+
goto _test_eof2;
|
560
|
+
case 2:
|
561
|
+
#line 562 "ext/fastcsv/fastcsv.c"
|
562
|
+
switch( (*p) ) {
|
563
|
+
case 0: goto st0;
|
564
|
+
case 10: goto tr8;
|
565
|
+
case 13: goto tr8;
|
566
|
+
case 34: goto tr9;
|
567
|
+
}
|
568
|
+
goto st2;
|
569
|
+
st0:
|
570
|
+
cs = 0;
|
571
|
+
goto _out;
|
572
|
+
tr9:
|
573
|
+
#line 55 "ext/fastcsv/fastcsv.rl"
|
574
|
+
{
|
575
|
+
if (p == ts) {
|
576
|
+
field = rb_str_new2("");
|
577
|
+
ASSOCIATE_INDEX;
|
578
|
+
}
|
579
|
+
// @note If we add an action on '""', we can skip some steps if no '""' is found.
|
580
|
+
else if (p > ts) {
|
581
|
+
// Operating on ts in-place produces odd behavior, FYI.
|
582
|
+
char *copy = ALLOC_N(char, p - ts);
|
583
|
+
memcpy(copy, ts, p - ts);
|
584
|
+
|
585
|
+
char *reader = ts, *writer = copy;
|
586
|
+
int escaped = 0;
|
587
|
+
|
588
|
+
while (p > reader) {
|
589
|
+
if (*reader == quote_char && !escaped) {
|
590
|
+
// Skip the escaping character.
|
591
|
+
escaped = 1;
|
592
|
+
}
|
593
|
+
else {
|
594
|
+
escaped = 0;
|
595
|
+
*writer++ = *reader;
|
596
|
+
}
|
597
|
+
reader++;
|
598
|
+
}
|
599
|
+
|
600
|
+
field = rb_str_new(copy, writer - copy);
|
601
|
+
ASSOCIATE_INDEX;
|
602
|
+
|
603
|
+
if (copy != NULL) {
|
604
|
+
free(copy);
|
605
|
+
}
|
606
|
+
}
|
607
|
+
}
|
608
|
+
#line 40 "ext/fastcsv/fastcsv.rl"
|
609
|
+
{
|
610
|
+
unclosed_line = 0;
|
611
|
+
}
|
612
|
+
goto st3;
|
613
|
+
st3:
|
614
|
+
if ( ++p == pe )
|
615
|
+
goto _test_eof3;
|
616
|
+
case 3:
|
617
|
+
#line 618 "ext/fastcsv/fastcsv.c"
|
618
|
+
switch( (*p) ) {
|
619
|
+
case 0: goto tr10;
|
620
|
+
case 10: goto tr11;
|
621
|
+
case 13: goto tr12;
|
622
|
+
case 34: goto st2;
|
623
|
+
case 44: goto tr13;
|
624
|
+
}
|
625
|
+
goto st0;
|
626
|
+
}
|
627
|
+
_test_eof4: cs = 4; goto _test_eof;
|
628
|
+
_test_eof1: cs = 1; goto _test_eof;
|
629
|
+
_test_eof5: cs = 5; goto _test_eof;
|
630
|
+
_test_eof6: cs = 6; goto _test_eof;
|
631
|
+
_test_eof7: cs = 7; goto _test_eof;
|
632
|
+
_test_eof8: cs = 8; goto _test_eof;
|
633
|
+
_test_eof9: cs = 9; goto _test_eof;
|
634
|
+
_test_eof2: cs = 2; goto _test_eof;
|
635
|
+
_test_eof3: cs = 3; goto _test_eof;
|
636
|
+
|
637
|
+
_test_eof: {}
|
638
|
+
if ( p == eof )
|
639
|
+
{
|
640
|
+
switch ( cs ) {
|
641
|
+
case 1: goto tr0;
|
642
|
+
case 5: goto tr16;
|
643
|
+
case 6: goto tr17;
|
644
|
+
case 7: goto tr17;
|
645
|
+
case 8: goto tr20;
|
646
|
+
case 9: goto tr16;
|
647
|
+
}
|
648
|
+
}
|
649
|
+
|
650
|
+
_out: {}
|
651
|
+
}
|
652
|
+
|
653
|
+
#line 313 "ext/fastcsv/fastcsv.rl"
|
654
|
+
|
655
|
+
if (done && cs < fastcsv_first_final) {
|
656
|
+
if (buf != NULL) {
|
657
|
+
free(buf);
|
658
|
+
}
|
659
|
+
if (unclosed_line) {
|
660
|
+
rb_raise(rb_eParseError, "Unclosed quoted field on line %d.", unclosed_line);
|
661
|
+
}
|
662
|
+
// Ruby raises different errors for illegal quoting, depending on whether
|
663
|
+
// a quoted string is followed by a string ("Unclosed quoted field on line
|
664
|
+
// %d.") or by a string ending in a quote ("Missing or stray quote in line
|
665
|
+
// %d"). These precisions are kind of bogus, but we can try using $!.
|
666
|
+
else {
|
667
|
+
rb_raise(rb_eParseError, "Illegal quoting in line %d.", curline);
|
668
|
+
}
|
669
|
+
}
|
670
|
+
|
671
|
+
if (ts == 0) {
|
672
|
+
have = 0;
|
673
|
+
}
|
674
|
+
else if (io) {
|
675
|
+
have = pe - ts;
|
676
|
+
memmove(buf, ts, have);
|
677
|
+
te = buf + (te - ts);
|
678
|
+
ts = buf;
|
679
|
+
}
|
680
|
+
}
|
681
|
+
|
682
|
+
if (buf != NULL) {
|
683
|
+
free(buf);
|
684
|
+
}
|
685
|
+
|
686
|
+
return Qnil;
|
687
|
+
}
|
688
|
+
|
689
|
+
void Init_fastcsv() {
|
690
|
+
s_read = rb_intern("read");
|
691
|
+
s_to_str = rb_intern("to_str");
|
692
|
+
|
693
|
+
mModule = rb_define_module("FastCSV");
|
694
|
+
rb_define_attr(rb_singleton_class(mModule), "buffer_size", 1, 1);
|
695
|
+
rb_define_singleton_method(mModule, "raw_parse", fastcsv, -1);
|
696
|
+
rb_eParseError = rb_define_class_under(mModule, "ParseError", rb_eStandardError);
|
697
|
+
}
|