fastcsv 0.0.8 → 0.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Rakefile +6 -6
- data/ext/fastcsv/fastcsv.c +537 -298
- data/ext/fastcsv/fastcsv.rl +49 -31
- data/fastcsv.gemspec +2 -3
- metadata +4 -18
data/ext/fastcsv/fastcsv.rl
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
#include <ruby.h>
|
2
2
|
#include <ruby/encoding.h>
|
3
|
+
#include <stdbool.h>
|
4
|
+
|
3
5
|
// CSV specifications.
|
4
6
|
// http://tools.ietf.org/html/rfc4180
|
5
7
|
// http://w3c.github.io/csvw/syntax/#ebnf
|
@@ -40,6 +42,7 @@ typedef struct {
|
|
40
42
|
|
41
43
|
action open_quote {
|
42
44
|
unclosed_line = curline;
|
45
|
+
in_quoted_field = true;
|
43
46
|
}
|
44
47
|
|
45
48
|
action close_quote {
|
@@ -58,41 +61,16 @@ typedef struct {
|
|
58
61
|
}
|
59
62
|
|
60
63
|
action read_quoted {
|
61
|
-
|
62
|
-
|
63
|
-
ENCODE;
|
64
|
-
}
|
65
|
-
// @note If we add an action on '""', we can skip some steps if no '""' is found.
|
66
|
-
else if (p > ts) {
|
67
|
-
// Operating on ts in-place produces odd behavior, FYI.
|
68
|
-
char *copy = ALLOC_N(char, p - ts);
|
69
|
-
memcpy(copy, ts, p - ts);
|
70
|
-
|
71
|
-
char *reader = ts, *writer = copy;
|
72
|
-
int escaped = 0;
|
73
|
-
|
74
|
-
while (p > reader) {
|
75
|
-
if (*reader == quote_char && !escaped) {
|
76
|
-
// Skip the escaping character.
|
77
|
-
escaped = 1;
|
78
|
-
}
|
79
|
-
else {
|
80
|
-
escaped = 0;
|
81
|
-
*writer++ = *reader;
|
82
|
-
}
|
83
|
-
reader++;
|
84
|
-
}
|
64
|
+
// intentionally blank - see parse_quoted_field
|
65
|
+
}
|
85
66
|
|
86
|
-
|
67
|
+
action new_field {
|
68
|
+
if (in_quoted_field) {
|
69
|
+
parse_quoted_field(&field, encoding, quote_char, ts + 1, p - 1);
|
87
70
|
ENCODE;
|
88
|
-
|
89
|
-
if (copy != NULL) {
|
90
|
-
free(copy);
|
91
|
-
}
|
71
|
+
in_quoted_field = false;
|
92
72
|
}
|
93
|
-
}
|
94
73
|
|
95
|
-
action new_field {
|
96
74
|
rb_ary_push(row, field);
|
97
75
|
field = Qnil;
|
98
76
|
}
|
@@ -126,6 +104,12 @@ typedef struct {
|
|
126
104
|
rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start));
|
127
105
|
}
|
128
106
|
|
107
|
+
if (in_quoted_field) {
|
108
|
+
parse_quoted_field(&field, encoding, quote_char, ts + 1, p - 1);
|
109
|
+
ENCODE;
|
110
|
+
in_quoted_field = false;
|
111
|
+
}
|
112
|
+
|
129
113
|
if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field
|
130
114
|
rb_ary_push(row, field);
|
131
115
|
field = Qnil;
|
@@ -202,6 +186,38 @@ static void rb_io_ext_int_to_encs(rb_encoding *ext, rb_encoding *intern, rb_enco
|
|
202
186
|
}
|
203
187
|
}
|
204
188
|
|
189
|
+
static void parse_quoted_field(VALUE* field, rb_encoding* encoding, char quote_char, char* quoted_field_start, char *quoted_field_end) {
|
190
|
+
// read the full quoted field, handling any escape sequences
|
191
|
+
if (quoted_field_end == quoted_field_start) {
|
192
|
+
// empty quoted field is an empty string
|
193
|
+
*field = rb_enc_str_new("", 0, encoding);
|
194
|
+
} else {
|
195
|
+
// largest possible buffer. if there's escaping, the resulting string will
|
196
|
+
// not use the entire buffer
|
197
|
+
char *copy = ALLOC_N(char, quoted_field_end - quoted_field_start);
|
198
|
+
char *reader = quoted_field_start, *writer = copy;
|
199
|
+
int escaped = 0;
|
200
|
+
|
201
|
+
while (quoted_field_end > reader) {
|
202
|
+
if (*reader == quote_char && !escaped) {
|
203
|
+
// Skip the escaping character.
|
204
|
+
escaped = 1;
|
205
|
+
}
|
206
|
+
else {
|
207
|
+
escaped = 0;
|
208
|
+
*writer++ = *reader;
|
209
|
+
}
|
210
|
+
reader++;
|
211
|
+
}
|
212
|
+
|
213
|
+
*field = rb_enc_str_new(copy, writer - copy, encoding);
|
214
|
+
|
215
|
+
if (copy != NULL) {
|
216
|
+
free(copy);
|
217
|
+
}
|
218
|
+
}
|
219
|
+
}
|
220
|
+
|
205
221
|
static VALUE raw_parse(int argc, VALUE *argv, VALUE self) {
|
206
222
|
int cs, act, have = 0, curline = 1, io = 0;
|
207
223
|
char *ts = 0, *te = 0, *buf = 0, *eof = 0, *mark_row_sep = 0, *row_sep = 0;
|
@@ -217,6 +233,8 @@ static VALUE raw_parse(int argc, VALUE *argv, VALUE self) {
|
|
217
233
|
VALUE option;
|
218
234
|
char quote_char = '"', col_sep = ',';
|
219
235
|
|
236
|
+
bool in_quoted_field = false;
|
237
|
+
|
220
238
|
rb_scan_args(argc, argv, "11", &port, &opts);
|
221
239
|
taint = OBJ_TAINTED(port);
|
222
240
|
io = rb_respond_to(port, s_read);
|
data/fastcsv.gemspec
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
4
|
s.name = "fastcsv"
|
5
|
-
s.version = '0.0.
|
5
|
+
s.version = '0.0.9'
|
6
6
|
s.platform = Gem::Platform::RUBY
|
7
7
|
s.authors = ["James McKinney"]
|
8
8
|
s.homepage = "https://github.com/jpmckinney/fastcsv"
|
@@ -14,8 +14,7 @@ Gem::Specification.new do |s|
|
|
14
14
|
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
15
15
|
s.require_paths = ["lib"]
|
16
16
|
s.extensions = ["ext/fastcsv/extconf.rb"]
|
17
|
-
|
18
|
-
s.add_runtime_dependency('csv')
|
17
|
+
s.required_ruby_version = '< 2.6'
|
19
18
|
|
20
19
|
s.add_development_dependency('coveralls')
|
21
20
|
s.add_development_dependency('rake')
|
metadata
CHANGED
@@ -1,29 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fastcsv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- James McKinney
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-
|
11
|
+
date: 2025-06-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
-
- !ruby/object:Gem::Dependency
|
14
|
-
name: csv
|
15
|
-
requirement: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - ">="
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: '0'
|
20
|
-
type: :runtime
|
21
|
-
prerelease: false
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
23
|
-
requirements:
|
24
|
-
- - ">="
|
25
|
-
- !ruby/object:Gem::Version
|
26
|
-
version: '0'
|
27
13
|
- !ruby/object:Gem::Dependency
|
28
14
|
name: coveralls
|
29
15
|
requirement: !ruby/object:Gem::Requirement
|
@@ -132,9 +118,9 @@ require_paths:
|
|
132
118
|
- lib
|
133
119
|
required_ruby_version: !ruby/object:Gem::Requirement
|
134
120
|
requirements:
|
135
|
-
- - "
|
121
|
+
- - "<"
|
136
122
|
- !ruby/object:Gem::Version
|
137
|
-
version: '
|
123
|
+
version: '2.6'
|
138
124
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
139
125
|
requirements:
|
140
126
|
- - ">="
|