fastcsv 0.0.7 → 0.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,7 @@
1
1
  #include <ruby.h>
2
2
  #include <ruby/encoding.h>
3
+ #include <stdbool.h>
4
+
3
5
  // CSV specifications.
4
6
  // http://tools.ietf.org/html/rfc4180
5
7
  // http://w3c.github.io/csvw/syntax/#ebnf
@@ -40,6 +42,7 @@ typedef struct {
40
42
 
41
43
  action open_quote {
42
44
  unclosed_line = curline;
45
+ in_quoted_field = true;
43
46
  }
44
47
 
45
48
  action close_quote {
@@ -58,41 +61,16 @@ typedef struct {
58
61
  }
59
62
 
60
63
  action read_quoted {
61
- if (p == ts) {
62
- field = rb_enc_str_new("", 0, encoding);
63
- ENCODE;
64
- }
65
- // @note If we add an action on '""', we can skip some steps if no '""' is found.
66
- else if (p > ts) {
67
- // Operating on ts in-place produces odd behavior, FYI.
68
- char *copy = ALLOC_N(char, p - ts);
69
- memcpy(copy, ts, p - ts);
70
-
71
- char *reader = ts, *writer = copy;
72
- int escaped = 0;
73
-
74
- while (p > reader) {
75
- if (*reader == quote_char && !escaped) {
76
- // Skip the escaping character.
77
- escaped = 1;
78
- }
79
- else {
80
- escaped = 0;
81
- *writer++ = *reader;
82
- }
83
- reader++;
84
- }
64
+ // intentionally blank - see parse_quoted_field
65
+ }
85
66
 
86
- field = rb_enc_str_new(copy, writer - copy, encoding);
67
+ action new_field {
68
+ if (in_quoted_field) {
69
+ parse_quoted_field(&field, encoding, quote_char, ts + 1, p - 1);
87
70
  ENCODE;
88
-
89
- if (copy != NULL) {
90
- free(copy);
91
- }
71
+ in_quoted_field = false;
92
72
  }
93
- }
94
73
 
95
- action new_field {
96
74
  rb_ary_push(row, field);
97
75
  field = Qnil;
98
76
  }
@@ -126,6 +104,12 @@ typedef struct {
126
104
  rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start));
127
105
  }
128
106
 
107
+ if (in_quoted_field) {
108
+ parse_quoted_field(&field, encoding, quote_char, ts + 1, p - 1);
109
+ ENCODE;
110
+ in_quoted_field = false;
111
+ }
112
+
129
113
  if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field
130
114
  rb_ary_push(row, field);
131
115
  field = Qnil;
@@ -202,6 +186,38 @@ static void rb_io_ext_int_to_encs(rb_encoding *ext, rb_encoding *intern, rb_enco
202
186
  }
203
187
  }
204
188
 
189
+ static void parse_quoted_field(VALUE* field, rb_encoding* encoding, char quote_char, char* quoted_field_start, char *quoted_field_end) {
190
+ // read the full quoted field, handling any escape sequences
191
+ if (quoted_field_end == quoted_field_start) {
192
+ // empty quoted field is an empty string
193
+ *field = rb_enc_str_new("", 0, encoding);
194
+ } else {
195
+ // largest possible buffer. if there's escaping, the resulting string will
196
+ // not use the entire buffer
197
+ char *copy = ALLOC_N(char, quoted_field_end - quoted_field_start);
198
+ char *reader = quoted_field_start, *writer = copy;
199
+ int escaped = 0;
200
+
201
+ while (quoted_field_end > reader) {
202
+ if (*reader == quote_char && !escaped) {
203
+ // Skip the escaping character.
204
+ escaped = 1;
205
+ }
206
+ else {
207
+ escaped = 0;
208
+ *writer++ = *reader;
209
+ }
210
+ reader++;
211
+ }
212
+
213
+ *field = rb_enc_str_new(copy, writer - copy, encoding);
214
+
215
+ if (copy != NULL) {
216
+ free(copy);
217
+ }
218
+ }
219
+ }
220
+
205
221
  static VALUE raw_parse(int argc, VALUE *argv, VALUE self) {
206
222
  int cs, act, have = 0, curline = 1, io = 0;
207
223
  char *ts = 0, *te = 0, *buf = 0, *eof = 0, *mark_row_sep = 0, *row_sep = 0;
@@ -217,6 +233,8 @@ static VALUE raw_parse(int argc, VALUE *argv, VALUE self) {
217
233
  VALUE option;
218
234
  char quote_char = '"', col_sep = ',';
219
235
 
236
+ bool in_quoted_field = false;
237
+
220
238
  rb_scan_args(argc, argv, "11", &port, &opts);
221
239
  taint = OBJ_TAINTED(port);
222
240
  io = rb_respond_to(port, s_read);
data/fastcsv.gemspec CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = "fastcsv"
5
- s.version = '0.0.7'
5
+ s.version = '0.0.9'
6
6
  s.platform = Gem::Platform::RUBY
7
7
  s.authors = ["James McKinney"]
8
8
  s.homepage = "https://github.com/jpmckinney/fastcsv"
@@ -14,6 +14,7 @@ Gem::Specification.new do |s|
14
14
  s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
15
15
  s.require_paths = ["lib"]
16
16
  s.extensions = ["ext/fastcsv/extconf.rb"]
17
+ s.required_ruby_version = '< 2.6'
17
18
 
18
19
  s.add_development_dependency('coveralls')
19
20
  s.add_development_dependency('rake')
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fastcsv
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.7
4
+ version: 0.0.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - James McKinney
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-01-03 00:00:00.000000000 Z
11
+ date: 2025-06-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: coveralls
@@ -66,8 +66,8 @@ dependencies:
66
66
  - - ">="
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0'
69
- description:
70
- email:
69
+ description:
70
+ email:
71
71
  executables: []
72
72
  extensions:
73
73
  - ext/fastcsv/extconf.rb
@@ -112,23 +112,23 @@ homepage: https://github.com/jpmckinney/fastcsv
112
112
  licenses:
113
113
  - MIT
114
114
  metadata: {}
115
- post_install_message:
115
+ post_install_message:
116
116
  rdoc_options: []
117
117
  require_paths:
118
118
  - lib
119
119
  required_ruby_version: !ruby/object:Gem::Requirement
120
120
  requirements:
121
- - - ">="
121
+ - - "<"
122
122
  - !ruby/object:Gem::Version
123
- version: '0'
123
+ version: '2.6'
124
124
  required_rubygems_version: !ruby/object:Gem::Requirement
125
125
  requirements:
126
126
  - - ">="
127
127
  - !ruby/object:Gem::Version
128
128
  version: '0'
129
129
  requirements: []
130
- rubygems_version: 3.0.3.1
131
- signing_key:
130
+ rubygems_version: 3.5.21
131
+ signing_key:
132
132
  specification_version: 4
133
133
  summary: A fast Ragel-based CSV parser, compatible with Ruby's CSV
134
134
  test_files: