cisv 0.4.8 → 0.4.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -83,7 +83,6 @@ Built-in transform names:
83
83
  - `trim`
84
84
  - `to_int` (or `int`)
85
85
  - `to_float` (or `float`)
86
- - `hash_sha256` (or `sha256`)
87
86
  - `base64_encode` (or `base64`)
88
87
 
89
88
  ## Examples
package/binding.gyp CHANGED
@@ -21,7 +21,6 @@
21
21
  "cflags_cc!": [ "-fno-exceptions" ],
22
22
  "cflags_cc": ["-O3"],
23
23
  "defines": [
24
- "NAPI_DISABLE_CPP_EXCEPTIONS",
25
24
  "NAPI_VERSION=6"
26
25
  ],
27
26
  "conditions": [
Binary file
@@ -5,8 +5,11 @@
5
5
  #include <memory>
6
6
  #include <string>
7
7
  #include <unordered_map>
8
+ #include <algorithm>
8
9
  #include <chrono>
9
10
  #include <cstdint>
11
+ #include <climits>
12
+ #include <cmath>
10
13
 
11
14
  namespace {
12
15
 
@@ -46,6 +49,126 @@ static void ValidateSingleCharOption(
46
49
  *target = raw[0];
47
50
  }
48
51
 
52
+ static double MaxJsSafeInteger() {
53
+ return 9007199254740991.0;
54
+ }
55
+
56
+ static bool IsWholeNumber(double value) {
57
+ return std::isfinite(value) && std::floor(value) == value;
58
+ }
59
+
60
+ static void ApplyBooleanOption(
61
+ Napi::Env env,
62
+ const Napi::Object &options,
63
+ const char *option_name,
64
+ bool *target
65
+ ) {
66
+ if (!options.Has(option_name)) {
67
+ return;
68
+ }
69
+
70
+ Napi::Value value = options.Get(option_name);
71
+ if (!value.IsBoolean()) {
72
+ throw Napi::TypeError::New(env, std::string(option_name) + " must be a boolean");
73
+ }
74
+
75
+ *target = value.As<Napi::Boolean>();
76
+ }
77
+
78
+ static void ApplySizeOption(
79
+ Napi::Env env,
80
+ const Napi::Object &options,
81
+ const char *option_name,
82
+ size_t *target
83
+ ) {
84
+ if (!options.Has(option_name)) {
85
+ return;
86
+ }
87
+
88
+ Napi::Value value = options.Get(option_name);
89
+ if (value.IsNull() || value.IsUndefined()) {
90
+ *target = 0;
91
+ return;
92
+ }
93
+ if (!value.IsNumber()) {
94
+ throw Napi::TypeError::New(env, std::string(option_name) + " must be a number");
95
+ }
96
+
97
+ double raw = value.As<Napi::Number>().DoubleValue();
98
+ double max_value = static_cast<double>(SIZE_MAX);
99
+ if (!IsWholeNumber(raw) || raw < 0.0 || raw > max_value || raw > MaxJsSafeInteger()) {
100
+ throw Napi::RangeError::New(env, std::string(option_name) + " is out of range");
101
+ }
102
+
103
+ *target = static_cast<size_t>(raw);
104
+ }
105
+
106
+ static void ApplyLineOption(
107
+ Napi::Env env,
108
+ const Napi::Object &options,
109
+ const char *option_name,
110
+ int *target
111
+ ) {
112
+ if (!options.Has(option_name)) {
113
+ return;
114
+ }
115
+
116
+ Napi::Value value = options.Get(option_name);
117
+ if (value.IsNull() || value.IsUndefined()) {
118
+ *target = 0;
119
+ return;
120
+ }
121
+ if (!value.IsNumber()) {
122
+ throw Napi::TypeError::New(env, std::string(option_name) + " must be a number");
123
+ }
124
+
125
+ double raw = value.As<Napi::Number>().DoubleValue();
126
+ if (!IsWholeNumber(raw) || raw < 0.0 || raw > static_cast<double>(INT_MAX)) {
127
+ throw Napi::RangeError::New(env, std::string(option_name) + " is out of range");
128
+ }
129
+
130
+ *target = static_cast<int>(raw);
131
+ }
132
+
133
+ static void ValidateConfigSemantics(Napi::Env env, const cisv_config &config) {
134
+ if (config.delimiter == config.quote) {
135
+ throw Napi::TypeError::New(env, "delimiter and quote cannot be the same");
136
+ }
137
+ if (config.escape != '\0' && config.escape == config.delimiter) {
138
+ throw Napi::TypeError::New(env, "escape and delimiter cannot be the same");
139
+ }
140
+ if (config.escape != '\0' && config.escape == config.quote) {
141
+ throw Napi::TypeError::New(env, "escape and quote cannot be the same");
142
+ }
143
+ if (config.comment != '\0' &&
144
+ (config.comment == config.delimiter || config.comment == config.quote || config.comment == config.escape)) {
145
+ throw Napi::TypeError::New(env, "comment cannot conflict with delimiter, quote, or escape");
146
+ }
147
+
148
+ int effective_from = config.from_line > 0 ? config.from_line : 1;
149
+ if (config.to_line != 0 && config.to_line < effective_from) {
150
+ throw Napi::RangeError::New(env, "toLine must be >= fromLine");
151
+ }
152
+ }
153
+
154
+ static void ApplyConfigOptions(Napi::Env env, const Napi::Object &options, cisv_config *config) {
155
+ ValidateSingleCharOption(env, options, "delimiter", &config->delimiter);
156
+ ValidateSingleCharOption(env, options, "quote", &config->quote);
157
+ ValidateSingleCharOption(env, options, "escape", &config->escape, true);
158
+ ValidateSingleCharOption(env, options, "comment", &config->comment, true);
159
+
160
+ ApplyBooleanOption(env, options, "skipEmptyLines", &config->skip_empty_lines);
161
+ ApplyBooleanOption(env, options, "trim", &config->trim);
162
+ ApplyBooleanOption(env, options, "relaxed", &config->relaxed);
163
+ ApplyBooleanOption(env, options, "skipLinesWithError", &config->skip_lines_with_error);
164
+
165
+ ApplySizeOption(env, options, "maxRowSize", &config->max_row_size);
166
+ ApplyLineOption(env, options, "fromLine", &config->from_line);
167
+ ApplyLineOption(env, options, "toLine", &config->to_line);
168
+
169
+ ValidateConfigSemantics(env, *config);
170
+ }
171
+
49
172
  // =============================================================================
50
173
  // SECURITY: UTF-8 validation to prevent V8 crashes on invalid input
51
174
  // Invalid UTF-8 data can cause Napi::String::New to throw or crash
@@ -144,7 +267,7 @@ static napi_value SafeNewStringValue(napi_env env, const char* data, size_t len)
144
267
  if (napi_create_string_latin1(env, data, len, &short_value) == napi_ok && short_value) {
145
268
  return short_value;
146
269
  }
147
- } else {
270
+ } else if (isValidUtf8(data, len)) {
148
271
  if (napi_create_string_utf8(env, data, len, &short_value) == napi_ok && short_value) {
149
272
  return short_value;
150
273
  }
@@ -221,6 +344,136 @@ static Napi::String SafeNewString(Napi::Env env, const char* data, size_t len) {
221
344
  return Napi::String(env, SafeNewStringValue(env, data, len));
222
345
  }
223
346
 
347
+ static napi_value NewLatin1StringValue(napi_env env, const char* data, size_t len) {
348
+ napi_value value = nullptr;
349
+ if (napi_create_string_latin1(env, data, len, &value) == napi_ok && value) {
350
+ return value;
351
+ }
352
+ napi_create_string_utf8(env, data, len, &value);
353
+ return value;
354
+ }
355
+
356
+ static napi_value NewCsvStringValue(
357
+ napi_env env,
358
+ const char* data,
359
+ size_t len,
360
+ bool ascii_only
361
+ ) {
362
+ return ascii_only ? NewLatin1StringValue(env, data, len) : SafeNewStringValue(env, data, len);
363
+ }
364
+
365
+ static bool rowsAreAscii(const std::vector<std::vector<std::string>> &rows) {
366
+ for (const auto &row : rows) {
367
+ for (const auto &field : row) {
368
+ if (!isAllAscii(field.data(), field.size())) {
369
+ return false;
370
+ }
371
+ }
372
+ }
373
+ return true;
374
+ }
375
+
376
+ static bool canUseSimpleLfFastPath(const cisv_config &config) {
377
+ return config.escape == '\0' &&
378
+ config.comment == '\0' &&
379
+ !config.trim &&
380
+ !config.skip_empty_lines &&
381
+ !config.relaxed &&
382
+ !config.skip_lines_with_error &&
383
+ config.max_row_size == 0 &&
384
+ config.from_line <= 1 &&
385
+ config.to_line == 0;
386
+ }
387
+
388
+ static bool tryParseSimpleLfToJsRows(
389
+ napi_env env,
390
+ const uint8_t *data,
391
+ size_t len,
392
+ const cisv_config &config,
393
+ napi_value *out
394
+ ) {
395
+ *out = nullptr;
396
+ if (!data || !canUseSimpleLfFastPath(config)) {
397
+ return false;
398
+ }
399
+ if (len >= 3 && data[0] == 0xEF && data[1] == 0xBB && data[2] == 0xBF) {
400
+ return false;
401
+ }
402
+
403
+ size_t row_count = 0;
404
+ bool saw_data_in_row = false;
405
+ for (size_t i = 0; i < len; i++) {
406
+ const uint8_t c = data[i];
407
+ if (c == static_cast<uint8_t>(config.quote) || c == '\r' || (c & 0x80)) {
408
+ return false;
409
+ }
410
+ if (c == '\n') {
411
+ row_count++;
412
+ saw_data_in_row = false;
413
+ } else {
414
+ saw_data_in_row = true;
415
+ }
416
+ }
417
+ if (saw_data_in_row || (len > 0 && data[len - 1] != '\n')) {
418
+ row_count++;
419
+ }
420
+
421
+ napi_value rows;
422
+ if (napi_create_array_with_length(env, row_count, &rows) != napi_ok) {
423
+ return false;
424
+ }
425
+
426
+ size_t row_idx = 0;
427
+ size_t line_start = 0;
428
+ while (line_start < len && row_idx < row_count) {
429
+ size_t line_end = line_start;
430
+ while (line_end < len && data[line_end] != '\n') {
431
+ line_end++;
432
+ }
433
+
434
+ size_t field_count = 1;
435
+ for (size_t i = line_start; i < line_end; i++) {
436
+ if (data[i] == static_cast<uint8_t>(config.delimiter)) {
437
+ field_count++;
438
+ }
439
+ }
440
+
441
+ napi_value row;
442
+ if (napi_create_array_with_length(env, field_count, &row) != napi_ok) {
443
+ return false;
444
+ }
445
+
446
+ size_t field_idx = 0;
447
+ size_t field_start = line_start;
448
+ for (size_t i = line_start; i <= line_end; i++) {
449
+ if (i == line_end || data[i] == static_cast<uint8_t>(config.delimiter)) {
450
+ napi_value field = NewLatin1StringValue(
451
+ env,
452
+ reinterpret_cast<const char*>(data + field_start),
453
+ i - field_start);
454
+ if (!field || napi_set_element(env, row, field_idx, field) != napi_ok) {
455
+ return false;
456
+ }
457
+ field_idx++;
458
+ field_start = i + 1;
459
+ }
460
+ }
461
+
462
+ if (napi_set_element(env, rows, row_idx, row) != napi_ok) {
463
+ return false;
464
+ }
465
+ row_idx++;
466
+
467
+ if (line_end == len) {
468
+ break;
469
+ }
470
+ line_start = line_end + 1;
471
+ }
472
+
473
+ *out = rows;
474
+ return true;
475
+ }
476
+
224
477
  // Extended RowCollector that handles transforms
225
478
  struct RowCollector {
226
479
  std::vector<std::string> current;
@@ -427,11 +680,16 @@ static bool collectParallelRows(
427
680
 
428
681
  static Napi::Array rowsToJsArray(Napi::Env env, const std::vector<std::vector<std::string>> &rows) {
429
682
  Napi::Array out = Napi::Array::New(env, rows.size());
683
+ const bool ascii_only = rowsAreAscii(rows);
430
684
  for (size_t i = 0; i < rows.size(); i++) {
431
685
  Napi::Array row = Napi::Array::New(env, rows[i].size());
432
686
  for (size_t j = 0; j < rows[i].size(); j++) {
433
687
  const std::string &field = rows[i][j];
434
- row[j] = SafeNewString(env, field.c_str(), field.length());
688
+ napi_set_element(
689
+ env,
690
+ row,
691
+ j,
692
+ NewCsvStringValue(env, field.c_str(), field.length(), ascii_only));
435
693
  }
436
694
  out[i] = row;
437
695
  }
@@ -589,7 +847,7 @@ public:
589
847
  }
590
848
 
591
849
  CisvParser(const Napi::CallbackInfo &info) : Napi::ObjectWrap<CisvParser>(info) {
592
- rc_ = new RowCollector();
850
+ rc_ = nullptr;
593
851
  parser_ = nullptr;
594
852
  parse_time_ = 0;
595
853
  total_bytes_ = 0;
@@ -597,6 +855,8 @@ public:
597
855
  iterator_ = nullptr;
598
856
  batch_result_ = nullptr;
599
857
  stream_buffering_active_ = true;
858
+ pending_buffer_data_ = nullptr;
859
+ pending_buffer_size_ = 0;
600
860
 
601
861
  // Initialize configuration with defaults
602
862
  cisv_config_init(&config_);
@@ -609,6 +869,8 @@ public:
609
869
  ApplyConfigFromObject(options);
610
870
  }
611
871
 
872
+ rc_ = new RowCollector();
873
+
612
874
  // Set callbacks
613
875
  config_.field_cb = field_cb;
614
876
  config_.row_cb = row_cb;
@@ -623,51 +885,9 @@ public:
623
885
  // Apply configuration from JavaScript object
624
886
  void ApplyConfigFromObject(Napi::Object options) {
625
887
  Napi::Env env = options.Env();
626
-
627
- // Delimiter
628
- ValidateSingleCharOption(env, options, "delimiter", &config_.delimiter);
629
-
630
- // Quote character
631
- ValidateSingleCharOption(env, options, "quote", &config_.quote);
632
-
633
- // Escape character
634
- ValidateSingleCharOption(env, options, "escape", &config_.escape, true);
635
-
636
- // Comment character
637
- ValidateSingleCharOption(env, options, "comment", &config_.comment, true);
638
-
639
- // Boolean options
640
- if (options.Has("skipEmptyLines")) {
641
- config_.skip_empty_lines = options.Get("skipEmptyLines").As<Napi::Boolean>();
642
- }
643
-
644
- if (options.Has("trim")) {
645
- config_.trim = options.Get("trim").As<Napi::Boolean>();
646
- }
647
-
648
- if (options.Has("relaxed")) {
649
- config_.relaxed = options.Get("relaxed").As<Napi::Boolean>();
650
- }
651
-
652
- if (options.Has("skipLinesWithError")) {
653
- config_.skip_lines_with_error = options.Get("skipLinesWithError").As<Napi::Boolean>();
654
- }
655
-
656
- // Numeric options
657
- if (options.Has("maxRowSize")) {
658
- Napi::Value val = options.Get("maxRowSize");
659
- if (!val.IsNull() && !val.IsUndefined()) {
660
- config_.max_row_size = val.As<Napi::Number>().Uint32Value();
661
- }
662
- }
663
-
664
- if (options.Has("fromLine")) {
665
- config_.from_line = options.Get("fromLine").As<Napi::Number>().Int32Value();
666
- }
667
-
668
- if (options.Has("toLine")) {
669
- config_.to_line = options.Get("toLine").As<Napi::Number>().Int32Value();
670
- }
888
+ cisv_config next = config_;
889
+ ApplyConfigOptions(env, options, &next);
890
+ config_ = next;
671
891
  }
672
892
 
673
893
  // Set configuration after creation
@@ -751,6 +971,9 @@ public:
751
971
  rc_ = nullptr;
752
972
  }
753
973
  clearBatchResult();
974
+ clearFastRows();
975
+ pending_stream_.clear();
976
+ clearPendingBuffer();
754
977
  is_destroyed_ = true;
755
978
  }
756
979
  }
@@ -817,16 +1040,28 @@ public:
817
1040
  throw Napi::Error::New(env, "Parser has been destroyed");
818
1041
  }
819
1042
 
820
- if (info.Length() != 1 || !info[0].IsString()) {
821
- throw Napi::TypeError::New(env, "Expected CSV string");
1043
+ if (info.Length() != 1 || (!info[0].IsString() && !info[0].IsBuffer())) {
1044
+ throw Napi::TypeError::New(env, "Expected CSV string or Buffer");
822
1045
  }
823
1046
 
824
- std::string content = info[0].As<Napi::String>();
1047
+ const char *content_data = nullptr;
1048
+ size_t content_len = 0;
1049
+ std::string content_storage;
1050
+
1051
+ if (info[0].IsBuffer()) {
1052
+ auto buffer = info[0].As<Napi::Buffer<char>>();
1053
+ content_data = buffer.Data();
1054
+ content_len = buffer.Length();
1055
+ } else {
1056
+ content_storage = info[0].As<Napi::String>();
1057
+ content_data = content_storage.data();
1058
+ content_len = content_storage.size();
1059
+ }
825
1060
 
826
1061
  resetRowState();
827
1062
 
828
1063
  if (!hasTransforms()) {
829
- cisv_result_t *batch = cisv_parse_string_batch(content.c_str(), content.length(), &config_);
1064
+ cisv_result_t *batch = cisv_parse_string_batch(content_data, content_len, &config_);
830
1065
  if (!batch) {
831
1066
  throw Napi::Error::New(env, "parse error: " + std::string(strerror(errno)));
832
1067
  }
@@ -843,14 +1078,14 @@ public:
843
1078
  ensureParser(env);
844
1079
 
845
1080
  // Write the string content as chunks
846
- cisv_parser_write(parser_, (const uint8_t*)content.c_str(), content.length());
1081
+ cisv_parser_write(parser_, reinterpret_cast<const uint8_t*>(content_data), content_len);
847
1082
  cisv_parser_end(parser_);
848
1083
 
849
1084
  // Clear the environment reference after parsing
850
1085
  rc_->env = nullptr;
851
1086
  }
852
1087
 
853
- total_bytes_ = content.length();
1088
+ total_bytes_ = content_len;
854
1089
 
855
1090
  return drainRows(env);
856
1091
  }
@@ -916,6 +1151,7 @@ public:
916
1151
 
917
1152
  // Streaming writes produce row-callback data, not batch results.
918
1153
  clearBatchResult();
1154
+ clearFastRows();
919
1155
 
920
1156
  // Set environment for JS transforms
921
1157
  rc_->env = env;
@@ -945,11 +1181,29 @@ public:
945
1181
  // Buffer chunks when no transforms/iterator are active and batch-parse on end().
946
1182
  // If buffered payload exceeds threshold, flush once to parser and continue streaming.
947
1183
  if (!hasTransforms() && iterator_ == nullptr) {
948
- if (chunk_size > SIZE_MAX - pending_stream_.size()) {
949
- throw Napi::Error::New(env, "Buffered stream size would overflow");
950
- }
951
-
952
1184
  if (stream_buffering_active_) {
1185
+ if (chunk_size > 0 && pending_stream_.empty() && pending_buffer_size_ == 0 && info[0].IsBuffer()) {
1186
+ auto buf = info[0].As<Napi::Buffer<uint8_t>>();
1187
+ pending_buffer_ref_ = Napi::Persistent(buf.As<Napi::Object>());
1188
+ pending_buffer_data_ = buf.Data();
1189
+ pending_buffer_size_ = buf.Length();
1190
+ total_bytes_ += chunk_size;
1191
+ return;
1192
+ }
1193
+
1194
+ if (pending_buffer_size_ > 0) {
1195
+ if (pending_buffer_size_ > SIZE_MAX - pending_stream_.size()) {
1196
+ throw Napi::Error::New(env, "Buffered stream size would overflow");
1197
+ }
1198
+ pending_stream_.append(
1199
+ reinterpret_cast<const char*>(pending_buffer_data_),
1200
+ pending_buffer_size_);
1201
+ clearPendingBuffer();
1202
+ }
1203
+
1204
+ if (chunk_size > SIZE_MAX - pending_stream_.size()) {
1205
+ throw Napi::Error::New(env, "Buffered stream size would overflow");
1206
+ }
953
1207
  pending_stream_.append(reinterpret_cast<const char*>(chunk_data), chunk_size);
954
1208
  total_bytes_ += chunk_size;
955
1209
 
@@ -959,7 +1213,7 @@ public:
959
1213
  }
960
1214
  return;
961
1215
  }
962
- } else if (!pending_stream_.empty()) {
1216
+ } else if (!pending_stream_.empty() || pending_buffer_size_ > 0) {
963
1217
  flushPendingStreamToParser();
964
1218
  stream_buffering_active_ = false;
965
1219
  }
@@ -977,6 +1231,21 @@ public:
977
1231
  if (stream_buffering_active_ && !pending_stream_.empty() &&
978
1232
  !hasTransforms() && iterator_ == nullptr &&
979
1233
  rc_ && rc_->rows.empty() && rc_->current.empty()) {
1234
+ napi_value fast_rows = nullptr;
1235
+ if (tryParseSimpleLfToJsRows(
1236
+ info.Env(),
1237
+ reinterpret_cast<const uint8_t*>(pending_stream_.data()),
1238
+ pending_stream_.size(),
1239
+ config_,
1240
+ &fast_rows)) {
1241
+ clearBatchResult();
1242
+ clearFastRows();
1243
+ fast_rows_ref_ = Napi::Persistent(Napi::Value(info.Env(), fast_rows).As<Napi::Object>());
1244
+ pending_stream_.clear();
1245
+ rc_->env = nullptr;
1246
+ return;
1247
+ }
1248
+
980
1249
  cisv_result_t *batch = cisv_parse_string_batch(
981
1250
  pending_stream_.data(), pending_stream_.size(), &config_);
982
1251
  if (!batch) {
@@ -988,13 +1257,55 @@ public:
988
1257
  throw Napi::Error::New(info.Env(), msg);
989
1258
  }
990
1259
  clearBatchResult();
1260
+ clearFastRows();
991
1261
  batch_result_ = batch;
992
1262
  pending_stream_.clear();
993
1263
  rc_->env = nullptr;
994
1264
  return;
995
1265
  }
996
1266
 
997
- if (!pending_stream_.empty()) {
1267
+ if (stream_buffering_active_ && pending_buffer_size_ > 0 &&
1268
+ pending_stream_.empty() &&
1269
+ !hasTransforms() && iterator_ == nullptr &&
1270
+ rc_ && rc_->rows.empty() && rc_->current.empty()) {
1271
+ napi_value fast_rows = nullptr;
1272
+ if (tryParseSimpleLfToJsRows(
1273
+ info.Env(),
1274
+ pending_buffer_data_,
1275
+ pending_buffer_size_,
1276
+ config_,
1277
+ &fast_rows)) {
1278
+ clearBatchResult();
1279
+ clearFastRows();
1280
+ fast_rows_ref_ = Napi::Persistent(Napi::Value(info.Env(), fast_rows).As<Napi::Object>());
1281
+ clearPendingBuffer();
1282
+ rc_->env = nullptr;
1283
+ return;
1284
+ }
1285
+
1286
+ cisv_result_t *batch = cisv_parse_string_batch(
1287
+ reinterpret_cast<const char*>(pending_buffer_data_),
1288
+ pending_buffer_size_,
1289
+ &config_);
1290
+ if (!batch) {
1291
+ clearPendingBuffer();
1292
+ throw Napi::Error::New(info.Env(), "parse error: " + std::string(strerror(errno)));
1293
+ }
1294
+ if (batch->error_code != 0) {
1295
+ std::string msg = batch->error_message[0] ? batch->error_message : "parse error";
1296
+ cisv_result_free(batch);
1297
+ clearPendingBuffer();
1298
+ throw Napi::Error::New(info.Env(), msg);
1299
+ }
1300
+ clearBatchResult();
1301
+ clearFastRows();
1302
+ batch_result_ = batch;
1303
+ clearPendingBuffer();
1304
+ rc_->env = nullptr;
1305
+ return;
1306
+ }
1307
+
1308
+ if (!pending_stream_.empty() || pending_buffer_size_ > 0) {
998
1309
  flushPendingStreamToParser();
999
1310
  stream_buffering_active_ = false;
1000
1311
  }
@@ -1016,18 +1327,24 @@ public:
1016
1327
  flushPendingStreamToParser();
1017
1328
  stream_buffering_active_ = false;
1018
1329
  }
1330
+ if (pending_buffer_size_ > 0) {
1331
+ flushPendingStreamToParser();
1332
+ stream_buffering_active_ = false;
1333
+ }
1019
1334
  return drainRows(info.Env());
1020
1335
  }
1021
1336
 
1022
1337
  void Clear(const Napi::CallbackInfo &info) {
1023
1338
  if (!is_destroyed_ && rc_) {
1024
1339
  clearBatchResult();
1340
+ clearFastRows();
1025
1341
  rc_->rows.clear();
1026
1342
  rc_->current.clear();
1027
1343
  rc_->current_field_index = 0;
1028
1344
  total_bytes_ = 0;
1029
1345
  parse_time_ = 0;
1030
1346
  pending_stream_.clear();
1347
+ clearPendingBuffer();
1031
1348
  stream_buffering_active_ = true;
1032
1349
  // Also clear the environment reference
1033
1350
  rc_->env = nullptr;
@@ -1074,8 +1391,6 @@ public:
1074
1391
  type = TRANSFORM_TO_INT;
1075
1392
  } else if (transform_type == "to_float" || transform_type == "float") {
1076
1393
  type = TRANSFORM_TO_FLOAT;
1077
- } else if (transform_type == "hash_sha256" || transform_type == "sha256") {
1078
- type = TRANSFORM_HASH_SHA256;
1079
1394
  } else if (transform_type == "base64_encode" || transform_type == "base64") {
1080
1395
  type = TRANSFORM_BASE64_ENCODE;
1081
1396
  } else {
@@ -1185,8 +1500,6 @@ Napi::Value TransformByName(const Napi::CallbackInfo &info) {
1185
1500
  type = TRANSFORM_TO_INT;
1186
1501
  } else if (transform_type == "to_float" || transform_type == "float") {
1187
1502
  type = TRANSFORM_TO_FLOAT;
1188
- } else if (transform_type == "hash_sha256" || transform_type == "sha256") {
1189
- type = TRANSFORM_HASH_SHA256;
1190
1503
  } else if (transform_type == "base64_encode" || transform_type == "base64") {
1191
1504
  type = TRANSFORM_BASE64_ENCODE;
1192
1505
  } else {
@@ -1355,18 +1668,28 @@ Napi::Value RemoveTransformByName(const Napi::CallbackInfo &info) {
1355
1668
 
1356
1669
  std::string field_name = info[0].As<Napi::String>();
1357
1670
 
1671
+ int field_index = -1;
1672
+
1358
1673
  // Remove from JavaScript transforms by finding the field index
1359
1674
  if (rc_->pipeline && rc_->pipeline->header_fields) {
1360
1675
  for (size_t i = 0; i < rc_->pipeline->header_count; i++) {
1361
1676
  if (strcmp(rc_->pipeline->header_fields[i], field_name.c_str()) == 0) {
1362
- rc_->js_transforms.erase(i);
1677
+ field_index = static_cast<int>(i);
1678
+ auto it = rc_->js_transforms.find(field_index);
1679
+ if (it != rc_->js_transforms.end()) {
1680
+ if (!it->second.IsEmpty()) {
1681
+ it->second.Reset();
1682
+ }
1683
+ rc_->js_transforms.erase(it);
1684
+ }
1363
1685
  break;
1364
1686
  }
1365
1687
  }
1366
1688
  }
1367
1689
 
1368
- // TODO: Implement removal of C transforms by name in cisv_transformer.c
1369
- // For now, this only removes JS transforms
1690
+ if (field_index >= 0 && rc_->pipeline) {
1691
+ cisv_transform_pipeline_remove_field(rc_->pipeline, field_index);
1692
+ }
1370
1693
 
1371
1694
  return info.This();
1372
1695
  }
@@ -1385,10 +1708,17 @@ Napi::Value RemoveTransformByName(const Napi::CallbackInfo &info) {
1385
1708
  int field_index = info[0].As<Napi::Number>().Int32Value();
1386
1709
 
1387
1710
  // Remove from JavaScript transforms
1388
- rc_->js_transforms.erase(field_index);
1711
+ auto it = rc_->js_transforms.find(field_index);
1712
+ if (it != rc_->js_transforms.end()) {
1713
+ if (!it->second.IsEmpty()) {
1714
+ it->second.Reset();
1715
+ }
1716
+ rc_->js_transforms.erase(it);
1717
+ }
1389
1718
 
1390
- // TODO: Implement removal of C transforms in cisv_transformer.c
1391
- // For now, this only removes JS transforms
1719
+ if (rc_->pipeline) {
1720
+ cisv_transform_pipeline_remove_field(rc_->pipeline, field_index);
1721
+ }
1392
1722
 
1393
1723
  return info.This();
1394
1724
  }
@@ -1515,16 +1845,29 @@ Napi::Value RemoveTransformByName(const Napi::CallbackInfo &info) {
1515
1845
  result.Set("jsTransformCount", Napi::Number::New(env, js_transform_count));
1516
1846
 
1517
1847
  // List field indices with transforms
1518
- Napi::Array fields = Napi::Array::New(env);
1519
- size_t idx = 0;
1848
+ std::vector<int> field_indices;
1849
+ auto add_field_index = [&field_indices](int field_index) {
1850
+ if (std::find(field_indices.begin(), field_indices.end(), field_index) == field_indices.end()) {
1851
+ field_indices.push_back(field_index);
1852
+ }
1853
+ };
1520
1854
 
1521
- // Add JS transform field indices
1855
+ if (rc_ && rc_->pipeline) {
1856
+ for (size_t i = 0; i < rc_->pipeline->count; i++) {
1857
+ add_field_index(rc_->pipeline->transforms[i].field_index);
1858
+ }
1859
+ }
1522
1860
  if (rc_) {
1523
1861
  for (const auto& pair : rc_->js_transforms) {
1524
- fields[idx++] = Napi::Number::New(env, pair.first);
1862
+ add_field_index(pair.first);
1525
1863
  }
1526
1864
  }
1527
1865
 
1866
+ Napi::Array fields = Napi::Array::New(env, field_indices.size());
1867
+ for (size_t i = 0; i < field_indices.size(); i++) {
1868
+ fields[i] = Napi::Number::New(env, field_indices[i]);
1869
+ }
1870
+
1528
1871
  result.Set("fieldIndices", fields);
1529
1872
 
1530
1873
  return result;
@@ -1540,7 +1883,16 @@ Napi::Value RemoveTransformByName(const Napi::CallbackInfo &info) {
1540
1883
  Napi::Object stats = Napi::Object::New(env);
1541
1884
  size_t row_count = 0;
1542
1885
  size_t field_count = 0;
1543
- if (batch_result_) {
1886
+ if (!fast_rows_ref_.IsEmpty()) {
1887
+ Napi::Array rows = fast_rows_ref_.Value().As<Napi::Array>();
1888
+ row_count = rows.Length();
1889
+ if (row_count > 0) {
1890
+ Napi::Value first = rows.Get(static_cast<uint32_t>(0));
1891
+ if (first.IsArray()) {
1892
+ field_count = first.As<Napi::Array>().Length();
1893
+ }
1894
+ }
1895
+ } else if (batch_result_) {
1544
1896
  row_count = batch_result_->row_count;
1545
1897
  if (batch_result_->row_count > 0) {
1546
1898
  field_count = batch_result_->rows[0].field_count;
@@ -1590,25 +1942,13 @@ Napi::Value RemoveTransformByName(const Napi::CallbackInfo &info) {
1590
1942
  cisv_config_init(&config);
1591
1943
 
1592
1944
  // Apply configuration if provided
1945
+ if (info.Length() > 1 && !info[1].IsNull() && !info[1].IsUndefined() && !info[1].IsObject()) {
1946
+ throw Napi::TypeError::New(env, "Config must be an object");
1947
+ }
1948
+
1593
1949
  if (info.Length() > 1 && info[1].IsObject()) {
1594
1950
  Napi::Object options = info[1].As<Napi::Object>();
1595
-
1596
- // Apply same configuration parsing logic
1597
- ValidateSingleCharOption(env, options, "delimiter", &config.delimiter);
1598
- ValidateSingleCharOption(env, options, "quote", &config.quote);
1599
- ValidateSingleCharOption(env, options, "comment", &config.comment, true);
1600
-
1601
- if (options.Has("skipEmptyLines")) {
1602
- config.skip_empty_lines = options.Get("skipEmptyLines").As<Napi::Boolean>();
1603
- }
1604
-
1605
- if (options.Has("fromLine")) {
1606
- config.from_line = options.Get("fromLine").As<Napi::Number>().Int32Value();
1607
- }
1608
-
1609
- if (options.Has("toLine")) {
1610
- config.to_line = options.Get("toLine").As<Napi::Number>().Int32Value();
1611
- }
1951
+ ApplyConfigOptions(env, options, &config);
1612
1952
  }
1613
1953
 
1614
1954
  size_t count = cisv_parser_count_rows_with_config(path.c_str(), &config);
@@ -1684,8 +2024,15 @@ Napi::Value RemoveTransformByName(const Napi::CallbackInfo &info) {
1684
2024
 
1685
2025
  napi_value row;
1686
2026
  napi_create_array_with_length(env, field_count, &row);
2027
+ bool ascii_only = true;
1687
2028
  for (size_t i = 0; i < field_count; i++) {
1688
- napi_set_element(env, row, i, SafeNewStringValue(env, fields[i], lengths[i]));
2029
+ if (!isAllAscii(fields[i], lengths[i])) {
2030
+ ascii_only = false;
2031
+ break;
2032
+ }
2033
+ }
2034
+ for (size_t i = 0; i < field_count; i++) {
2035
+ napi_set_element(env, row, i, NewCsvStringValue(env, fields[i], lengths[i], ascii_only));
1689
2036
  }
1690
2037
 
1691
2038
  return Napi::Value(env, row);
@@ -1734,6 +2081,20 @@ private:
1734
2081
  }
1735
2082
  }
1736
2083
 
2084
+ void clearFastRows() {
2085
+ if (!fast_rows_ref_.IsEmpty()) {
2086
+ fast_rows_ref_.Reset();
2087
+ }
2088
+ }
2089
+
2090
+ void clearPendingBuffer() {
2091
+ if (!pending_buffer_ref_.IsEmpty()) {
2092
+ pending_buffer_ref_.Reset();
2093
+ }
2094
+ pending_buffer_data_ = nullptr;
2095
+ pending_buffer_size_ = 0;
2096
+ }
2097
+
1737
2098
  bool hasTransforms() const {
1738
2099
  bool has_c_transforms = rc_ && rc_->pipeline && rc_->pipeline->count > 0;
1739
2100
  bool has_js_transforms = rc_ && !rc_->js_transforms.empty();
@@ -1742,7 +2103,9 @@ private:
1742
2103
 
1743
2104
  void resetRowState() {
1744
2105
  clearBatchResult();
2106
+ clearFastRows();
1745
2107
  pending_stream_.clear();
2108
+ clearPendingBuffer();
1746
2109
  stream_buffering_active_ = true;
1747
2110
  if (!rc_) return;
1748
2111
  rc_->rows.clear();
@@ -1751,6 +2114,11 @@ private:
1751
2114
  }
1752
2115
 
1753
2116
  void flushPendingStreamToParser() {
2117
+ if (pending_buffer_size_ > 0) {
2118
+ ensureParser(Env());
2119
+ cisv_parser_write(parser_, pending_buffer_data_, pending_buffer_size_);
2120
+ clearPendingBuffer();
2121
+ }
1754
2122
  if (pending_stream_.empty()) {
1755
2123
  return;
1756
2124
  }
@@ -1779,15 +2147,27 @@ private:
1779
2147
  }
1780
2148
 
1781
2149
  Napi::Value drainRows(Napi::Env env) {
2150
+ if (!fast_rows_ref_.IsEmpty()) {
2151
+ return fast_rows_ref_.Value();
2152
+ }
2153
+
1782
2154
  if (batch_result_) {
1783
2155
  napi_value rows;
1784
2156
  napi_create_array_with_length(env, batch_result_->row_count, &rows);
2157
+ const bool ascii_only =
2158
+ !batch_result_->field_data ||
2159
+ batch_result_->field_data_size == 0 ||
2160
+ isAllAscii(batch_result_->field_data, batch_result_->field_data_size);
1785
2161
  for (size_t i = 0; i < batch_result_->row_count; ++i) {
1786
2162
  const cisv_row_t *src_row = &batch_result_->rows[i];
1787
2163
  napi_value row;
1788
2164
  napi_create_array_with_length(env, src_row->field_count, &row);
1789
2165
  for (size_t j = 0; j < src_row->field_count; ++j) {
1790
- napi_set_element(env, row, j, SafeNewStringValue(env, src_row->fields[j], src_row->field_lengths[j]));
2166
+ napi_set_element(
2167
+ env,
2168
+ row,
2169
+ j,
2170
+ NewCsvStringValue(env, src_row->fields[j], src_row->field_lengths[j], ascii_only));
1791
2171
  }
1792
2172
  napi_set_element(env, rows, i, row);
1793
2173
  }
@@ -1800,6 +2180,7 @@ private:
1800
2180
 
1801
2181
  napi_value rows;
1802
2182
  napi_create_array_with_length(env, rc_->rows.size(), &rows);
2183
+ const bool ascii_only = rowsAreAscii(rc_->rows);
1803
2184
 
1804
2185
  for (size_t i = 0; i < rc_->rows.size(); ++i) {
1805
2186
  napi_value row;
@@ -1807,7 +2188,11 @@ private:
1807
2188
  for (size_t j = 0; j < rc_->rows[i].size(); ++j) {
1808
2189
  // SECURITY: Use safe string creation to handle invalid UTF-8 in CSV data
1809
2190
  const std::string& field = rc_->rows[i][j];
1810
- napi_set_element(env, row, j, SafeNewStringValue(env, field.c_str(), field.length()));
2191
+ napi_set_element(
2192
+ env,
2193
+ row,
2194
+ j,
2195
+ NewCsvStringValue(env, field.c_str(), field.length(), ascii_only));
1811
2196
  }
1812
2197
  napi_set_element(env, rows, i, row);
1813
2198
  }
@@ -1826,7 +2211,11 @@ private:
1826
2211
  bool is_destroyed_;
1827
2212
  cisv_iterator_t *iterator_; // For row-by-row iteration
1828
2213
  cisv_result_t *batch_result_;
2214
+ Napi::ObjectReference fast_rows_ref_;
1829
2215
  std::string pending_stream_;
2216
+ Napi::ObjectReference pending_buffer_ref_;
2217
+ const uint8_t *pending_buffer_data_;
2218
+ size_t pending_buffer_size_;
1830
2219
  bool stream_buffering_active_;
1831
2220
  static constexpr size_t kStreamBufferLimitBytes = 8 * 1024 * 1024;
1832
2221
  };
@@ -1836,7 +2225,7 @@ Napi::Object InitAll(Napi::Env env, Napi::Object exports) {
1836
2225
  CisvParser::Init(env, exports);
1837
2226
 
1838
2227
  // Add version info
1839
- exports.Set("version", Napi::String::New(env, "0.4.8"));
2228
+ exports.Set("version", Napi::String::New(env, "0.4.9"));
1840
2229
 
1841
2230
  // Add transform type constants
1842
2231
  Napi::Object transformTypes = Napi::Object::New(env);
@@ -1845,7 +2234,6 @@ Napi::Object InitAll(Napi::Env env, Napi::Object exports) {
1845
2234
  transformTypes.Set("TRIM", Napi::String::New(env, "trim"));
1846
2235
  transformTypes.Set("TO_INT", Napi::String::New(env, "to_int"));
1847
2236
  transformTypes.Set("TO_FLOAT", Napi::String::New(env, "to_float"));
1848
- transformTypes.Set("HASH_SHA256", Napi::String::New(env, "hash_sha256"));
1849
2237
  transformTypes.Set("BASE64_ENCODE", Napi::String::New(env, "base64_encode"));
1850
2238
  exports.Set("TransformType", transformTypes);
1851
2239
 
package/cisv/index.js CHANGED
@@ -1,4 +1,5 @@
1
1
  const path = require('path');
2
2
  const addon = require('node-gyp-build')(path.join(__dirname, '..'));
3
+ const { wrapAddon } = require('./wrapper');
3
4
 
4
- module.exports = addon;
5
+ module.exports = wrapAddon(addon);
package/cisv/index.mjs CHANGED
@@ -7,6 +7,10 @@ const require = createRequire(import.meta.url);
7
7
 
8
8
  const gyp = require('node-gyp-build');
9
9
  const addon = gyp(path.join(__dirname, '..'));
10
+ const { wrapAddon } = require('./wrapper.js');
11
+ const wrapped = wrapAddon(addon);
10
12
 
11
- export const cisvParser = addon.cisvParser;
12
- export default addon;
13
+ export const cisvParser = wrapped.cisvParser;
14
+ export const TransformType = wrapped.TransformType;
15
+ export const version = wrapped.version;
16
+ export default wrapped;
@@ -8,7 +8,6 @@ declare module 'cisv' {
8
8
  TRIM = 'trim',
9
9
  TO_INT = 'to_int',
10
10
  TO_FLOAT = 'to_float',
11
- HASH_SHA256 = 'hash_sha256',
12
11
  BASE64_ENCODE = 'base64_encode',
13
12
  CUSTOM = 'custom'
14
13
  }
@@ -88,11 +87,11 @@ declare module 'cisv' {
88
87
  parseParallel(path: string, numThreads?: number): Promise<string[][]>;
89
88
 
90
89
  /**
91
- * Parse CSV string content
92
- * @param content CSV string content
90
+ * Parse CSV string or Buffer content
91
+ * @param content CSV content as string or Buffer
93
92
  * @returns Array of rows with string values
94
93
  */
95
- parseString(content: string): string[][];
94
+ parseString(content: Buffer | string): string[][];
96
95
 
97
96
  /**
98
97
  * Write chunk of CSV data (for streaming)
@@ -0,0 +1,371 @@
1
+ 'use strict';
2
+
3
+ const { isAscii } = require('buffer');
4
+
5
+ function fastConfigFromOptions(options) {
6
+ if (options == null) {
7
+ return { delimiter: ',', quote: '"' };
8
+ }
9
+ if (typeof options !== 'object') {
10
+ return null;
11
+ }
12
+
13
+ const delimiter = options.delimiter == null ? ',' : options.delimiter;
14
+ const quote = options.quote == null ? '"' : options.quote;
15
+ if (typeof delimiter !== 'string' || delimiter.length !== 1) {
16
+ return null;
17
+ }
18
+ if (typeof quote !== 'string' || quote.length !== 1) {
19
+ return null;
20
+ }
21
+
22
+ if (options.escape != null && options.escape !== '') {
23
+ return null;
24
+ }
25
+ if (options.comment != null && options.comment !== '') {
26
+ return null;
27
+ }
28
+ if (options.trim || options.skipEmptyLines || options.relaxed || options.skipLinesWithError) {
29
+ return null;
30
+ }
31
+ if (options.maxRowSize != null && options.maxRowSize !== 0) {
32
+ return null;
33
+ }
34
+
35
+ const fromLine = options.fromLine == null ? 1 : options.fromLine;
36
+ const toLine = options.toLine == null ? 0 : options.toLine;
37
+ if (fromLine !== 0 && fromLine !== 1) {
38
+ return null;
39
+ }
40
+ if (toLine !== 0) {
41
+ return null;
42
+ }
43
+
44
+ return { delimiter, quote };
45
+ }
46
+
47
+ function chunkToLatin1String(chunk) {
48
+ return Buffer.isBuffer(chunk) ? chunk.toString('latin1') : chunk;
49
+ }
50
+
51
+ function chunksToLatin1String(chunks) {
52
+ if (chunks.length === 1) {
53
+ return chunkToLatin1String(chunks[0]);
54
+ }
55
+
56
+ let out = '';
57
+ for (let i = 0; i < chunks.length; i++) {
58
+ out += chunkToLatin1String(chunks[i]);
59
+ }
60
+ return out;
61
+ }
62
+
63
+ function isSimpleAsciiLf(data, quote) {
64
+ if (data.length >= 3 &&
65
+ data.charCodeAt(0) === 0xEF &&
66
+ data.charCodeAt(1) === 0xBB &&
67
+ data.charCodeAt(2) === 0xBF) {
68
+ return false;
69
+ }
70
+
71
+ const quoteCode = quote.charCodeAt(0);
72
+ for (let i = 0; i < data.length; i++) {
73
+ const code = data.charCodeAt(i);
74
+ if (code === quoteCode || code === 13 || code > 127) {
75
+ return false;
76
+ }
77
+ }
78
+ return true;
79
+ }
80
+
81
+ function chunksAreSimpleAsciiLf(chunks, quote) {
82
+ const quoteCode = quote.charCodeAt(0);
83
+ for (let i = 0; i < chunks.length; i++) {
84
+ const chunk = chunks[i];
85
+ if (Buffer.isBuffer(chunk)) {
86
+ if (i === 0 &&
87
+ chunk.length >= 3 &&
88
+ chunk[0] === 0xEF &&
89
+ chunk[1] === 0xBB &&
90
+ chunk[2] === 0xBF) {
91
+ return false;
92
+ }
93
+ if (!isAscii(chunk) || chunk.indexOf(quoteCode) !== -1 || chunk.indexOf(13) !== -1) {
94
+ return false;
95
+ }
96
+ } else if (!isSimpleAsciiLf(chunk, quote)) {
97
+ return false;
98
+ }
99
+ }
100
+ return true;
101
+ }
102
+
103
+ function analyzeSingleSimpleChunk(chunk, delimiter, quote) {
104
+ const delimiterCode = delimiter.charCodeAt(0);
105
+ const quoteCode = quote.charCodeAt(0);
106
+ let cols = -1;
107
+ let currentCols = 1;
108
+ let hasData = false;
109
+ let rows = 0;
110
+
111
+ if (Buffer.isBuffer(chunk)) {
112
+ if (chunk.length >= 3 && chunk[0] === 0xEF && chunk[1] === 0xBB && chunk[2] === 0xBF) {
113
+ return { simple: false, uniform: false, rows: 0, cols: 0 };
114
+ }
115
+ for (let i = 0; i < chunk.length; i++) {
116
+ const code = chunk[i];
117
+ if (code === quoteCode || code === 13 || code > 127) {
118
+ return { simple: false, uniform: false, rows: 0, cols: 0 };
119
+ }
120
+ if (code === delimiterCode) {
121
+ currentCols++;
122
+ hasData = true;
123
+ } else if (code === 10) {
124
+ if (cols === -1) {
125
+ cols = currentCols;
126
+ } else if (currentCols !== cols) {
127
+ return { simple: true, uniform: false, rows: 0, cols: 0 };
128
+ }
129
+ rows++;
130
+ currentCols = 1;
131
+ hasData = false;
132
+ } else {
133
+ hasData = true;
134
+ }
135
+ }
136
+ } else {
137
+ if (chunk.length >= 3 &&
138
+ chunk.charCodeAt(0) === 0xEF &&
139
+ chunk.charCodeAt(1) === 0xBB &&
140
+ chunk.charCodeAt(2) === 0xBF) {
141
+ return { simple: false, uniform: false, rows: 0, cols: 0 };
142
+ }
143
+ for (let i = 0; i < chunk.length; i++) {
144
+ const code = chunk.charCodeAt(i);
145
+ if (code === quoteCode || code === 13 || code > 127) {
146
+ return { simple: false, uniform: false, rows: 0, cols: 0 };
147
+ }
148
+ if (code === delimiterCode) {
149
+ currentCols++;
150
+ hasData = true;
151
+ } else if (code === 10) {
152
+ if (cols === -1) {
153
+ cols = currentCols;
154
+ } else if (currentCols !== cols) {
155
+ return { simple: true, uniform: false, rows: 0, cols: 0 };
156
+ }
157
+ rows++;
158
+ currentCols = 1;
159
+ hasData = false;
160
+ } else {
161
+ hasData = true;
162
+ }
163
+ }
164
+ }
165
+
166
+ if (hasData && cols !== -1 && currentCols !== cols) {
167
+ return { simple: true, uniform: false, rows: 0, cols: 0 };
168
+ }
169
+ if (hasData) {
170
+ rows++;
171
+ if (cols === -1) {
172
+ cols = currentCols;
173
+ }
174
+ }
175
+
176
+ return { simple: true, uniform: true, rows, cols: Math.max(cols, 0) };
177
+ }
178
+
179
+ function parseSimpleRows(data, delimiter) {
180
+ if (data.length === 0) {
181
+ return [];
182
+ }
183
+
184
+ const last = data.charCodeAt(data.length - 1);
185
+ const body = last === 10 ? data.slice(0, -1) : data;
186
+ if (body.length === 0) {
187
+ return [['']];
188
+ }
189
+
190
+ const lines = body.split('\n');
191
+ const rows = new Array(lines.length);
192
+ for (let i = 0; i < lines.length; i++) {
193
+ rows[i] = lines[i].split(delimiter);
194
+ }
195
+ return rows;
196
+ }
197
+
198
+ function parseUniformRows(data, delimiter, rowCount, cols) {
199
+ let end = data.length;
200
+ if (end === 0) {
201
+ return [];
202
+ }
203
+ if (data.charCodeAt(end - 1) === 10) {
204
+ end--;
205
+ }
206
+ if (end === 0) {
207
+ return [['']];
208
+ }
209
+
210
+ const usePrealloc = rowCount > 0 && cols > 0;
211
+ if (!usePrealloc) {
212
+ cols = 1;
213
+ for (let i = 0; i < end && data.charCodeAt(i) !== 10; i++) {
214
+ if (data[i] === delimiter) {
215
+ cols++;
216
+ }
217
+ }
218
+ }
219
+
220
+ const rows = usePrealloc ? new Array(rowCount) : [];
221
+ let rowIdx = 0;
222
+ let pos = 0;
223
+ while (pos < end) {
224
+ const row = new Array(cols);
225
+ for (let col = 0; col < cols - 1; col++) {
226
+ const next = data.indexOf(delimiter, pos);
227
+ row[col] = data.slice(pos, next);
228
+ pos = next + 1;
229
+ }
230
+
231
+ let lineEnd = data.indexOf('\n', pos);
232
+ if (lineEnd === -1 || lineEnd > end) {
233
+ lineEnd = end;
234
+ }
235
+ row[cols - 1] = data.slice(pos, lineEnd);
236
+ if (usePrealloc) {
237
+ rows[rowIdx++] = row;
238
+ } else {
239
+ rows.push(row);
240
+ }
241
+ pos = lineEnd + 1;
242
+ }
243
+
244
+ return rows;
245
+ }
246
+
247
+ function wrapAddon(addon) {
248
+ const NativeParser = addon.cisvParser;
249
+
250
+ class cisvParser extends NativeParser {
251
+ constructor(options) {
252
+ super(options);
253
+ this._cisvFastConfig = fastConfigFromOptions(options);
254
+ this._cisvFastChunks = [];
255
+ this._cisvFastRows = null;
256
+ this._cisvNativeStream = false;
257
+ }
258
+
259
+ _flushFastChunksToNative() {
260
+ if (this._cisvFastChunks.length === 0) {
261
+ return;
262
+ }
263
+ const chunks = this._cisvFastChunks;
264
+ this._cisvFastChunks = [];
265
+ this._cisvNativeStream = true;
266
+ for (let i = 0; i < chunks.length; i++) {
267
+ super.write(chunks[i]);
268
+ }
269
+ }
270
+
271
+ write(chunk) {
272
+ this._cisvFastRows = null;
273
+ if (this._cisvFastConfig &&
274
+ !this._cisvNativeStream &&
275
+ (Buffer.isBuffer(chunk) || typeof chunk === 'string')) {
276
+ this._cisvFastChunks.push(chunk);
277
+ return;
278
+ }
279
+
280
+ this._flushFastChunksToNative();
281
+ this._cisvNativeStream = true;
282
+ return super.write(chunk);
283
+ }
284
+
285
+ end() {
286
+ if (this._cisvFastConfig &&
287
+ !this._cisvNativeStream &&
288
+ this._cisvFastChunks.length > 0) {
289
+ let uniform = false;
290
+ let simple = false;
291
+
292
+ if (this._cisvFastChunks.length === 1) {
293
+ const analysis = analyzeSingleSimpleChunk(
294
+ this._cisvFastChunks[0],
295
+ this._cisvFastConfig.delimiter,
296
+ this._cisvFastConfig.quote);
297
+ simple = analysis.simple;
298
+ uniform = analysis.uniform;
299
+ var rowCount = analysis.rows;
300
+ var colCount = analysis.cols;
301
+ } else {
302
+ simple = chunksAreSimpleAsciiLf(this._cisvFastChunks, this._cisvFastConfig.quote);
303
+ }
304
+
305
+ if (simple) {
306
+ const data = chunksToLatin1String(this._cisvFastChunks);
307
+ const useLargePrealloc = data.length >= 64 * 1024 * 1024;
308
+ this._cisvFastRows = uniform
309
+ ? parseUniformRows(
310
+ data,
311
+ this._cisvFastConfig.delimiter,
312
+ useLargePrealloc ? rowCount : 0,
313
+ useLargePrealloc ? colCount : 0)
314
+ : parseSimpleRows(data, this._cisvFastConfig.delimiter);
315
+ this._cisvFastChunks = [];
316
+ return;
317
+ }
318
+ }
319
+
320
+ this._flushFastChunksToNative();
321
+ this._cisvNativeStream = true;
322
+ return super.end();
323
+ }
324
+
325
+ getRows() {
326
+ if (this._cisvFastRows !== null) {
327
+ return this._cisvFastRows;
328
+ }
329
+ return super.getRows();
330
+ }
331
+
332
+ clear() {
333
+ this._cisvFastChunks = [];
334
+ this._cisvFastRows = null;
335
+ this._cisvNativeStream = false;
336
+ return super.clear();
337
+ }
338
+
339
+ setConfig(options) {
340
+ this._flushFastChunksToNative();
341
+ this._cisvFastRows = null;
342
+ this._cisvFastConfig = fastConfigFromOptions(options);
343
+ return super.setConfig(options);
344
+ }
345
+
346
+ transform(...args) {
347
+ this._flushFastChunksToNative();
348
+ this._cisvFastConfig = null;
349
+ return super.transform(...args);
350
+ }
351
+
352
+ transformByName(...args) {
353
+ this._flushFastChunksToNative();
354
+ this._cisvFastConfig = null;
355
+ return super.transformByName(...args);
356
+ }
357
+
358
+ destroy() {
359
+ this._cisvFastChunks = [];
360
+ this._cisvFastRows = null;
361
+ return super.destroy();
362
+ }
363
+ }
364
+
365
+ return {
366
+ ...addon,
367
+ cisvParser,
368
+ };
369
+ }
370
+
371
+ module.exports = { wrapAddon };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "cisv",
3
- "version": "0.4.8",
3
+ "version": "0.4.10",
4
4
  "description": "The csv parser on steroids.",
5
5
  "author": "sanix<s4nixd@gmail.com>",
6
6
  "main": "./build/Release/cisv.node",