fastcsv 0.0.8 → 0.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,7 @@
1
1
  #include <ruby.h>
2
2
  #include <ruby/encoding.h>
3
+ #include <stdbool.h>
4
+
3
5
  // CSV specifications.
4
6
  // http://tools.ietf.org/html/rfc4180
5
7
  // http://w3c.github.io/csvw/syntax/#ebnf
@@ -40,6 +42,7 @@ typedef struct {
40
42
 
41
43
  action open_quote {
42
44
  unclosed_line = curline;
45
+ in_quoted_field = true;
43
46
  }
44
47
 
45
48
  action close_quote {
@@ -58,41 +61,16 @@ typedef struct {
58
61
  }
59
62
 
60
63
  action read_quoted {
61
- if (p == ts) {
62
- field = rb_enc_str_new("", 0, encoding);
63
- ENCODE;
64
- }
65
- // @note If we add an action on '""', we can skip some steps if no '""' is found.
66
- else if (p > ts) {
67
- // Operating on ts in-place produces odd behavior, FYI.
68
- char *copy = ALLOC_N(char, p - ts);
69
- memcpy(copy, ts, p - ts);
70
-
71
- char *reader = ts, *writer = copy;
72
- int escaped = 0;
73
-
74
- while (p > reader) {
75
- if (*reader == quote_char && !escaped) {
76
- // Skip the escaping character.
77
- escaped = 1;
78
- }
79
- else {
80
- escaped = 0;
81
- *writer++ = *reader;
82
- }
83
- reader++;
84
- }
64
+ // intentionally blank - see parse_quoted_field
65
+ }
85
66
 
86
- field = rb_enc_str_new(copy, writer - copy, encoding);
67
+ action new_field {
68
+ if (in_quoted_field) {
69
+ parse_quoted_field(&field, encoding, quote_char, ts + 1, p - 1);
87
70
  ENCODE;
88
-
89
- if (copy != NULL) {
90
- free(copy);
91
- }
71
+ in_quoted_field = false;
92
72
  }
93
- }
94
73
 
95
- action new_field {
96
74
  rb_ary_push(row, field);
97
75
  field = Qnil;
98
76
  }
@@ -126,6 +104,12 @@ typedef struct {
126
104
  rb_ivar_set(self, s_row, rb_str_new(d->start, p - d->start));
127
105
  }
128
106
 
107
+ if (in_quoted_field) {
108
+ parse_quoted_field(&field, encoding, quote_char, ts + 1, p - 1);
109
+ ENCODE;
110
+ in_quoted_field = false;
111
+ }
112
+
129
113
  if (!NIL_P(field) || RARRAY_LEN(row)) { // same as new_field
130
114
  rb_ary_push(row, field);
131
115
  field = Qnil;
@@ -202,6 +186,38 @@ static void rb_io_ext_int_to_encs(rb_encoding *ext, rb_encoding *intern, rb_enco
202
186
  }
203
187
  }
204
188
 
189
+ static void parse_quoted_field(VALUE* field, rb_encoding* encoding, char quote_char, char* quoted_field_start, char *quoted_field_end) {
190
+ // read the full quoted field, handling any escape sequences
191
+ if (quoted_field_end == quoted_field_start) {
192
+ // empty quoted field is an empty string
193
+ *field = rb_enc_str_new("", 0, encoding);
194
+ } else {
195
+ // largest possible buffer. if there's escaping, the resulting string will
196
+ // not use the entire buffer
197
+ char *copy = ALLOC_N(char, quoted_field_end - quoted_field_start);
198
+ char *reader = quoted_field_start, *writer = copy;
199
+ int escaped = 0;
200
+
201
+ while (quoted_field_end > reader) {
202
+ if (*reader == quote_char && !escaped) {
203
+ // Skip the escaping character.
204
+ escaped = 1;
205
+ }
206
+ else {
207
+ escaped = 0;
208
+ *writer++ = *reader;
209
+ }
210
+ reader++;
211
+ }
212
+
213
+ *field = rb_enc_str_new(copy, writer - copy, encoding);
214
+
215
+ if (copy != NULL) {
216
+ free(copy);
217
+ }
218
+ }
219
+ }
220
+
205
221
  static VALUE raw_parse(int argc, VALUE *argv, VALUE self) {
206
222
  int cs, act, have = 0, curline = 1, io = 0;
207
223
  char *ts = 0, *te = 0, *buf = 0, *eof = 0, *mark_row_sep = 0, *row_sep = 0;
@@ -217,6 +233,8 @@ static VALUE raw_parse(int argc, VALUE *argv, VALUE self) {
217
233
  VALUE option;
218
234
  char quote_char = '"', col_sep = ',';
219
235
 
236
+ bool in_quoted_field = false;
237
+
220
238
  rb_scan_args(argc, argv, "11", &port, &opts);
221
239
  taint = OBJ_TAINTED(port);
222
240
  io = rb_respond_to(port, s_read);
data/fastcsv.gemspec CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = "fastcsv"
5
- s.version = '0.0.8'
5
+ s.version = '0.0.9'
6
6
  s.platform = Gem::Platform::RUBY
7
7
  s.authors = ["James McKinney"]
8
8
  s.homepage = "https://github.com/jpmckinney/fastcsv"
@@ -14,8 +14,7 @@ Gem::Specification.new do |s|
14
14
  s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
15
15
  s.require_paths = ["lib"]
16
16
  s.extensions = ["ext/fastcsv/extconf.rb"]
17
-
18
- s.add_runtime_dependency('csv')
17
+ s.required_ruby_version = '< 2.6'
19
18
 
20
19
  s.add_development_dependency('coveralls')
21
20
  s.add_development_dependency('rake')
metadata CHANGED
@@ -1,29 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fastcsv
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.8
4
+ version: 0.0.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - James McKinney
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-02-26 00:00:00.000000000 Z
11
+ date: 2025-06-25 00:00:00.000000000 Z
12
12
  dependencies:
13
- - !ruby/object:Gem::Dependency
14
- name: csv
15
- requirement: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - ">="
18
- - !ruby/object:Gem::Version
19
- version: '0'
20
- type: :runtime
21
- prerelease: false
22
- version_requirements: !ruby/object:Gem::Requirement
23
- requirements:
24
- - - ">="
25
- - !ruby/object:Gem::Version
26
- version: '0'
27
13
  - !ruby/object:Gem::Dependency
28
14
  name: coveralls
29
15
  requirement: !ruby/object:Gem::Requirement
@@ -132,9 +118,9 @@ require_paths:
132
118
  - lib
133
119
  required_ruby_version: !ruby/object:Gem::Requirement
134
120
  requirements:
135
- - - ">="
121
+ - - "<"
136
122
  - !ruby/object:Gem::Version
137
- version: '0'
123
+ version: '2.6'
138
124
  required_rubygems_version: !ruby/object:Gem::Requirement
139
125
  requirements:
140
126
  - - ">="