cisv 0.4.9 → 0.4.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Binary file
@@ -344,6 +344,136 @@ static Napi::String SafeNewString(Napi::Env env, const char* data, size_t len) {
344
344
  return Napi::String(env, SafeNewStringValue(env, data, len));
345
345
  }
346
346
 
347
+ static napi_value NewLatin1StringValue(napi_env env, const char* data, size_t len) {
348
+ napi_value value = nullptr;
349
+ if (napi_create_string_latin1(env, data, len, &value) == napi_ok && value) {
350
+ return value;
351
+ }
352
+ napi_create_string_utf8(env, data, len, &value);
353
+ return value;
354
+ }
355
+
356
+ static napi_value NewCsvStringValue(
357
+ napi_env env,
358
+ const char* data,
359
+ size_t len,
360
+ bool ascii_only
361
+ ) {
362
+ return ascii_only ? NewLatin1StringValue(env, data, len) : SafeNewStringValue(env, data, len);
363
+ }
364
+
365
+ static bool rowsAreAscii(const std::vector<std::vector<std::string>> &rows) {
366
+ for (const auto &row : rows) {
367
+ for (const auto &field : row) {
368
+ if (!isAllAscii(field.data(), field.size())) {
369
+ return false;
370
+ }
371
+ }
372
+ }
373
+ return true;
374
+ }
375
+
376
+ static bool canUseSimpleLfFastPath(const cisv_config &config) {
377
+ return config.escape == '\0' &&
378
+ config.comment == '\0' &&
379
+ !config.trim &&
380
+ !config.skip_empty_lines &&
381
+ !config.relaxed &&
382
+ !config.skip_lines_with_error &&
383
+ config.max_row_size == 0 &&
384
+ config.from_line <= 1 &&
385
+ config.to_line == 0;
386
+ }
387
+
388
+ static bool tryParseSimpleLfToJsRows(
389
+ napi_env env,
390
+ const uint8_t *data,
391
+ size_t len,
392
+ const cisv_config &config,
393
+ napi_value *out
394
+ ) {
395
+ *out = nullptr;
396
+ if (!data || !canUseSimpleLfFastPath(config)) {
397
+ return false;
398
+ }
399
+ if (len >= 3 && data[0] == 0xEF && data[1] == 0xBB && data[2] == 0xBF) {
400
+ return false;
401
+ }
402
+
403
+ size_t row_count = 0;
404
+ bool saw_data_in_row = false;
405
+ for (size_t i = 0; i < len; i++) {
406
+ const uint8_t c = data[i];
407
+ if (c == static_cast<uint8_t>(config.quote) || c == '\r' || (c & 0x80)) {
408
+ return false;
409
+ }
410
+ if (c == '\n') {
411
+ row_count++;
412
+ saw_data_in_row = false;
413
+ } else {
414
+ saw_data_in_row = true;
415
+ }
416
+ }
417
+ if (saw_data_in_row || (len > 0 && data[len - 1] != '\n')) {
418
+ row_count++;
419
+ }
420
+
421
+ napi_value rows;
422
+ if (napi_create_array_with_length(env, row_count, &rows) != napi_ok) {
423
+ return false;
424
+ }
425
+
426
+ size_t row_idx = 0;
427
+ size_t line_start = 0;
428
+ while (line_start < len && row_idx < row_count) {
429
+ size_t line_end = line_start;
430
+ while (line_end < len && data[line_end] != '\n') {
431
+ line_end++;
432
+ }
433
+
434
+ size_t field_count = 1;
435
+ for (size_t i = line_start; i < line_end; i++) {
436
+ if (data[i] == static_cast<uint8_t>(config.delimiter)) {
437
+ field_count++;
438
+ }
439
+ }
440
+
441
+ napi_value row;
442
+ if (napi_create_array_with_length(env, field_count, &row) != napi_ok) {
443
+ return false;
444
+ }
445
+
446
+ size_t field_idx = 0;
447
+ size_t field_start = line_start;
448
+ for (size_t i = line_start; i <= line_end; i++) {
449
+ if (i == line_end || data[i] == static_cast<uint8_t>(config.delimiter)) {
450
+ napi_value field = NewLatin1StringValue(
451
+ env,
452
+ reinterpret_cast<const char*>(data + field_start),
453
+ i - field_start);
454
+ if (!field || napi_set_element(env, row, field_idx, field) != napi_ok) {
455
+ return false;
456
+ }
457
+ field_idx++;
458
+ field_start = i + 1;
459
+ }
460
+ }
461
+
462
+ if (napi_set_element(env, rows, row_idx, row) != napi_ok) {
463
+ return false;
464
+ }
465
+ row_idx++;
466
+
467
+ if (line_end == len) {
468
+ break;
469
+ }
470
+ line_start = line_end + 1;
471
+ }
472
+
473
+ *out = rows;
474
+ return true;
475
+ }
476
+
347
477
  // Extended RowCollector that handles transforms
348
478
  struct RowCollector {
349
479
  std::vector<std::string> current;
@@ -550,11 +680,16 @@ static bool collectParallelRows(
550
680
 
551
681
  static Napi::Array rowsToJsArray(Napi::Env env, const std::vector<std::vector<std::string>> &rows) {
552
682
  Napi::Array out = Napi::Array::New(env, rows.size());
683
+ const bool ascii_only = rowsAreAscii(rows);
553
684
  for (size_t i = 0; i < rows.size(); i++) {
554
685
  Napi::Array row = Napi::Array::New(env, rows[i].size());
555
686
  for (size_t j = 0; j < rows[i].size(); j++) {
556
687
  const std::string &field = rows[i][j];
557
- row[j] = SafeNewString(env, field.c_str(), field.length());
688
+ napi_set_element(
689
+ env,
690
+ row,
691
+ j,
692
+ NewCsvStringValue(env, field.c_str(), field.length(), ascii_only));
558
693
  }
559
694
  out[i] = row;
560
695
  }
@@ -720,6 +855,8 @@ public:
720
855
  iterator_ = nullptr;
721
856
  batch_result_ = nullptr;
722
857
  stream_buffering_active_ = true;
858
+ pending_buffer_data_ = nullptr;
859
+ pending_buffer_size_ = 0;
723
860
 
724
861
  // Initialize configuration with defaults
725
862
  cisv_config_init(&config_);
@@ -834,6 +971,9 @@ public:
834
971
  rc_ = nullptr;
835
972
  }
836
973
  clearBatchResult();
974
+ clearFastRows();
975
+ pending_stream_.clear();
976
+ clearPendingBuffer();
837
977
  is_destroyed_ = true;
838
978
  }
839
979
  }
@@ -1011,6 +1151,7 @@ public:
1011
1151
 
1012
1152
  // Streaming writes produce row-callback data, not batch results.
1013
1153
  clearBatchResult();
1154
+ clearFastRows();
1014
1155
 
1015
1156
  // Set environment for JS transforms
1016
1157
  rc_->env = env;
@@ -1040,11 +1181,29 @@ public:
1040
1181
  // Buffer chunks when no transforms/iterator are active and batch-parse on end().
1041
1182
  // If buffered payload exceeds threshold, flush once to parser and continue streaming.
1042
1183
  if (!hasTransforms() && iterator_ == nullptr) {
1043
- if (chunk_size > SIZE_MAX - pending_stream_.size()) {
1044
- throw Napi::Error::New(env, "Buffered stream size would overflow");
1045
- }
1046
-
1047
1184
  if (stream_buffering_active_) {
1185
+ if (chunk_size > 0 && pending_stream_.empty() && pending_buffer_size_ == 0 && info[0].IsBuffer()) {
1186
+ auto buf = info[0].As<Napi::Buffer<uint8_t>>();
1187
+ pending_buffer_ref_ = Napi::Persistent(buf.As<Napi::Object>());
1188
+ pending_buffer_data_ = buf.Data();
1189
+ pending_buffer_size_ = buf.Length();
1190
+ total_bytes_ += chunk_size;
1191
+ return;
1192
+ }
1193
+
1194
+ if (pending_buffer_size_ > 0) {
1195
+ if (pending_buffer_size_ > SIZE_MAX - pending_stream_.size()) {
1196
+ throw Napi::Error::New(env, "Buffered stream size would overflow");
1197
+ }
1198
+ pending_stream_.append(
1199
+ reinterpret_cast<const char*>(pending_buffer_data_),
1200
+ pending_buffer_size_);
1201
+ clearPendingBuffer();
1202
+ }
1203
+
1204
+ if (chunk_size > SIZE_MAX - pending_stream_.size()) {
1205
+ throw Napi::Error::New(env, "Buffered stream size would overflow");
1206
+ }
1048
1207
  pending_stream_.append(reinterpret_cast<const char*>(chunk_data), chunk_size);
1049
1208
  total_bytes_ += chunk_size;
1050
1209
 
@@ -1054,7 +1213,7 @@ public:
1054
1213
  }
1055
1214
  return;
1056
1215
  }
1057
- } else if (!pending_stream_.empty()) {
1216
+ } else if (!pending_stream_.empty() || pending_buffer_size_ > 0) {
1058
1217
  flushPendingStreamToParser();
1059
1218
  stream_buffering_active_ = false;
1060
1219
  }
@@ -1072,6 +1231,21 @@ public:
1072
1231
  if (stream_buffering_active_ && !pending_stream_.empty() &&
1073
1232
  !hasTransforms() && iterator_ == nullptr &&
1074
1233
  rc_ && rc_->rows.empty() && rc_->current.empty()) {
1234
+ napi_value fast_rows = nullptr;
1235
+ if (tryParseSimpleLfToJsRows(
1236
+ info.Env(),
1237
+ reinterpret_cast<const uint8_t*>(pending_stream_.data()),
1238
+ pending_stream_.size(),
1239
+ config_,
1240
+ &fast_rows)) {
1241
+ clearBatchResult();
1242
+ clearFastRows();
1243
+ fast_rows_ref_ = Napi::Persistent(Napi::Value(info.Env(), fast_rows).As<Napi::Object>());
1244
+ pending_stream_.clear();
1245
+ rc_->env = nullptr;
1246
+ return;
1247
+ }
1248
+
1075
1249
  cisv_result_t *batch = cisv_parse_string_batch(
1076
1250
  pending_stream_.data(), pending_stream_.size(), &config_);
1077
1251
  if (!batch) {
@@ -1083,13 +1257,55 @@ public:
1083
1257
  throw Napi::Error::New(info.Env(), msg);
1084
1258
  }
1085
1259
  clearBatchResult();
1260
+ clearFastRows();
1086
1261
  batch_result_ = batch;
1087
1262
  pending_stream_.clear();
1088
1263
  rc_->env = nullptr;
1089
1264
  return;
1090
1265
  }
1091
1266
 
1092
- if (!pending_stream_.empty()) {
1267
+ if (stream_buffering_active_ && pending_buffer_size_ > 0 &&
1268
+ pending_stream_.empty() &&
1269
+ !hasTransforms() && iterator_ == nullptr &&
1270
+ rc_ && rc_->rows.empty() && rc_->current.empty()) {
1271
+ napi_value fast_rows = nullptr;
1272
+ if (tryParseSimpleLfToJsRows(
1273
+ info.Env(),
1274
+ pending_buffer_data_,
1275
+ pending_buffer_size_,
1276
+ config_,
1277
+ &fast_rows)) {
1278
+ clearBatchResult();
1279
+ clearFastRows();
1280
+ fast_rows_ref_ = Napi::Persistent(Napi::Value(info.Env(), fast_rows).As<Napi::Object>());
1281
+ clearPendingBuffer();
1282
+ rc_->env = nullptr;
1283
+ return;
1284
+ }
1285
+
1286
+ cisv_result_t *batch = cisv_parse_string_batch(
1287
+ reinterpret_cast<const char*>(pending_buffer_data_),
1288
+ pending_buffer_size_,
1289
+ &config_);
1290
+ if (!batch) {
1291
+ clearPendingBuffer();
1292
+ throw Napi::Error::New(info.Env(), "parse error: " + std::string(strerror(errno)));
1293
+ }
1294
+ if (batch->error_code != 0) {
1295
+ std::string msg = batch->error_message[0] ? batch->error_message : "parse error";
1296
+ cisv_result_free(batch);
1297
+ clearPendingBuffer();
1298
+ throw Napi::Error::New(info.Env(), msg);
1299
+ }
1300
+ clearBatchResult();
1301
+ clearFastRows();
1302
+ batch_result_ = batch;
1303
+ clearPendingBuffer();
1304
+ rc_->env = nullptr;
1305
+ return;
1306
+ }
1307
+
1308
+ if (!pending_stream_.empty() || pending_buffer_size_ > 0) {
1093
1309
  flushPendingStreamToParser();
1094
1310
  stream_buffering_active_ = false;
1095
1311
  }
@@ -1111,18 +1327,24 @@ public:
1111
1327
  flushPendingStreamToParser();
1112
1328
  stream_buffering_active_ = false;
1113
1329
  }
1330
+ if (pending_buffer_size_ > 0) {
1331
+ flushPendingStreamToParser();
1332
+ stream_buffering_active_ = false;
1333
+ }
1114
1334
  return drainRows(info.Env());
1115
1335
  }
1116
1336
 
1117
1337
  void Clear(const Napi::CallbackInfo &info) {
1118
1338
  if (!is_destroyed_ && rc_) {
1119
1339
  clearBatchResult();
1340
+ clearFastRows();
1120
1341
  rc_->rows.clear();
1121
1342
  rc_->current.clear();
1122
1343
  rc_->current_field_index = 0;
1123
1344
  total_bytes_ = 0;
1124
1345
  parse_time_ = 0;
1125
1346
  pending_stream_.clear();
1347
+ clearPendingBuffer();
1126
1348
  stream_buffering_active_ = true;
1127
1349
  // Also clear the environment reference
1128
1350
  rc_->env = nullptr;
@@ -1661,7 +1883,16 @@ Napi::Value RemoveTransformByName(const Napi::CallbackInfo &info) {
1661
1883
  Napi::Object stats = Napi::Object::New(env);
1662
1884
  size_t row_count = 0;
1663
1885
  size_t field_count = 0;
1664
- if (batch_result_) {
1886
+ if (!fast_rows_ref_.IsEmpty()) {
1887
+ Napi::Array rows = fast_rows_ref_.Value().As<Napi::Array>();
1888
+ row_count = rows.Length();
1889
+ if (row_count > 0) {
1890
+ Napi::Value first = rows.Get(static_cast<uint32_t>(0));
1891
+ if (first.IsArray()) {
1892
+ field_count = first.As<Napi::Array>().Length();
1893
+ }
1894
+ }
1895
+ } else if (batch_result_) {
1665
1896
  row_count = batch_result_->row_count;
1666
1897
  if (batch_result_->row_count > 0) {
1667
1898
  field_count = batch_result_->rows[0].field_count;
@@ -1793,8 +2024,15 @@ Napi::Value RemoveTransformByName(const Napi::CallbackInfo &info) {
1793
2024
 
1794
2025
  napi_value row;
1795
2026
  napi_create_array_with_length(env, field_count, &row);
2027
+ bool ascii_only = true;
1796
2028
  for (size_t i = 0; i < field_count; i++) {
1797
- napi_set_element(env, row, i, SafeNewStringValue(env, fields[i], lengths[i]));
2029
+ if (!isAllAscii(fields[i], lengths[i])) {
2030
+ ascii_only = false;
2031
+ break;
2032
+ }
2033
+ }
2034
+ for (size_t i = 0; i < field_count; i++) {
2035
+ napi_set_element(env, row, i, NewCsvStringValue(env, fields[i], lengths[i], ascii_only));
1798
2036
  }
1799
2037
 
1800
2038
  return Napi::Value(env, row);
@@ -1843,6 +2081,20 @@ private:
1843
2081
  }
1844
2082
  }
1845
2083
 
2084
+ void clearFastRows() {
2085
+ if (!fast_rows_ref_.IsEmpty()) {
2086
+ fast_rows_ref_.Reset();
2087
+ }
2088
+ }
2089
+
2090
+ void clearPendingBuffer() {
2091
+ if (!pending_buffer_ref_.IsEmpty()) {
2092
+ pending_buffer_ref_.Reset();
2093
+ }
2094
+ pending_buffer_data_ = nullptr;
2095
+ pending_buffer_size_ = 0;
2096
+ }
2097
+
1846
2098
  bool hasTransforms() const {
1847
2099
  bool has_c_transforms = rc_ && rc_->pipeline && rc_->pipeline->count > 0;
1848
2100
  bool has_js_transforms = rc_ && !rc_->js_transforms.empty();
@@ -1851,7 +2103,9 @@ private:
1851
2103
 
1852
2104
  void resetRowState() {
1853
2105
  clearBatchResult();
2106
+ clearFastRows();
1854
2107
  pending_stream_.clear();
2108
+ clearPendingBuffer();
1855
2109
  stream_buffering_active_ = true;
1856
2110
  if (!rc_) return;
1857
2111
  rc_->rows.clear();
@@ -1860,6 +2114,11 @@ private:
1860
2114
  }
1861
2115
 
1862
2116
  void flushPendingStreamToParser() {
2117
+ if (pending_buffer_size_ > 0) {
2118
+ ensureParser(Env());
2119
+ cisv_parser_write(parser_, pending_buffer_data_, pending_buffer_size_);
2120
+ clearPendingBuffer();
2121
+ }
1863
2122
  if (pending_stream_.empty()) {
1864
2123
  return;
1865
2124
  }
@@ -1888,15 +2147,27 @@ private:
1888
2147
  }
1889
2148
 
1890
2149
  Napi::Value drainRows(Napi::Env env) {
2150
+ if (!fast_rows_ref_.IsEmpty()) {
2151
+ return fast_rows_ref_.Value();
2152
+ }
2153
+
1891
2154
  if (batch_result_) {
1892
2155
  napi_value rows;
1893
2156
  napi_create_array_with_length(env, batch_result_->row_count, &rows);
2157
+ const bool ascii_only =
2158
+ !batch_result_->field_data ||
2159
+ batch_result_->field_data_size == 0 ||
2160
+ isAllAscii(batch_result_->field_data, batch_result_->field_data_size);
1894
2161
  for (size_t i = 0; i < batch_result_->row_count; ++i) {
1895
2162
  const cisv_row_t *src_row = &batch_result_->rows[i];
1896
2163
  napi_value row;
1897
2164
  napi_create_array_with_length(env, src_row->field_count, &row);
1898
2165
  for (size_t j = 0; j < src_row->field_count; ++j) {
1899
- napi_set_element(env, row, j, SafeNewStringValue(env, src_row->fields[j], src_row->field_lengths[j]));
2166
+ napi_set_element(
2167
+ env,
2168
+ row,
2169
+ j,
2170
+ NewCsvStringValue(env, src_row->fields[j], src_row->field_lengths[j], ascii_only));
1900
2171
  }
1901
2172
  napi_set_element(env, rows, i, row);
1902
2173
  }
@@ -1909,6 +2180,7 @@ private:
1909
2180
 
1910
2181
  napi_value rows;
1911
2182
  napi_create_array_with_length(env, rc_->rows.size(), &rows);
2183
+ const bool ascii_only = rowsAreAscii(rc_->rows);
1912
2184
 
1913
2185
  for (size_t i = 0; i < rc_->rows.size(); ++i) {
1914
2186
  napi_value row;
@@ -1916,7 +2188,11 @@ private:
1916
2188
  for (size_t j = 0; j < rc_->rows[i].size(); ++j) {
1917
2189
  // SECURITY: Use safe string creation to handle invalid UTF-8 in CSV data
1918
2190
  const std::string& field = rc_->rows[i][j];
1919
- napi_set_element(env, row, j, SafeNewStringValue(env, field.c_str(), field.length()));
2191
+ napi_set_element(
2192
+ env,
2193
+ row,
2194
+ j,
2195
+ NewCsvStringValue(env, field.c_str(), field.length(), ascii_only));
1920
2196
  }
1921
2197
  napi_set_element(env, rows, i, row);
1922
2198
  }
@@ -1935,7 +2211,11 @@ private:
1935
2211
  bool is_destroyed_;
1936
2212
  cisv_iterator_t *iterator_; // For row-by-row iteration
1937
2213
  cisv_result_t *batch_result_;
2214
+ Napi::ObjectReference fast_rows_ref_;
1938
2215
  std::string pending_stream_;
2216
+ Napi::ObjectReference pending_buffer_ref_;
2217
+ const uint8_t *pending_buffer_data_;
2218
+ size_t pending_buffer_size_;
1939
2219
  bool stream_buffering_active_;
1940
2220
  static constexpr size_t kStreamBufferLimitBytes = 8 * 1024 * 1024;
1941
2221
  };
package/cisv/index.js CHANGED
@@ -1,4 +1,5 @@
1
1
  const path = require('path');
2
2
  const addon = require('node-gyp-build')(path.join(__dirname, '..'));
3
+ const { wrapAddon } = require('./wrapper');
3
4
 
4
- module.exports = addon;
5
+ module.exports = wrapAddon(addon);
package/cisv/index.mjs CHANGED
@@ -7,6 +7,10 @@ const require = createRequire(import.meta.url);
7
7
 
8
8
  const gyp = require('node-gyp-build');
9
9
  const addon = gyp(path.join(__dirname, '..'));
10
+ const { wrapAddon } = require('./wrapper.js');
11
+ const wrapped = wrapAddon(addon);
10
12
 
11
- export const cisvParser = addon.cisvParser;
12
- export default addon;
13
+ export const cisvParser = wrapped.cisvParser;
14
+ export const TransformType = wrapped.TransformType;
15
+ export const version = wrapped.version;
16
+ export default wrapped;
@@ -0,0 +1,388 @@
1
+ 'use strict';
2
+
3
+ const { isAscii } = require('buffer');
4
+
5
+ function fastConfigFromOptions(options) {
6
+ if (options == null) {
7
+ return { delimiter: ',', quote: '"' };
8
+ }
9
+ if (typeof options !== 'object') {
10
+ return null;
11
+ }
12
+
13
+ const delimiter = options.delimiter == null ? ',' : options.delimiter;
14
+ const quote = options.quote == null ? '"' : options.quote;
15
+ if (typeof delimiter !== 'string' || delimiter.length !== 1) {
16
+ return null;
17
+ }
18
+ if (typeof quote !== 'string' || quote.length !== 1) {
19
+ return null;
20
+ }
21
+ if (delimiter === '\n' || delimiter === '\r' || quote === '\n' || quote === '\r') {
22
+ return null;
23
+ }
24
+
25
+ if (options.escape != null && options.escape !== '') {
26
+ return null;
27
+ }
28
+ if (options.comment != null && options.comment !== '') {
29
+ return null;
30
+ }
31
+ if (options.trim || options.skipEmptyLines || options.relaxed || options.skipLinesWithError) {
32
+ return null;
33
+ }
34
+ if (options.maxRowSize != null && options.maxRowSize !== 0) {
35
+ return null;
36
+ }
37
+
38
+ const fromLine = options.fromLine == null ? 1 : options.fromLine;
39
+ const toLine = options.toLine == null ? 0 : options.toLine;
40
+ if (fromLine !== 0 && fromLine !== 1) {
41
+ return null;
42
+ }
43
+ if (toLine !== 0) {
44
+ return null;
45
+ }
46
+
47
+ return { delimiter, quote };
48
+ }
49
+
50
+ function chunkToLatin1String(chunk) {
51
+ return Buffer.isBuffer(chunk) ? chunk.toString('latin1') : chunk;
52
+ }
53
+
54
+ function chunksToLatin1String(chunks) {
55
+ if (chunks.length === 1) {
56
+ return chunkToLatin1String(chunks[0]);
57
+ }
58
+
59
+ let out = '';
60
+ for (let i = 0; i < chunks.length; i++) {
61
+ out += chunkToLatin1String(chunks[i]);
62
+ }
63
+ return out;
64
+ }
65
+
66
+ function isSimpleAsciiLf(data, quote) {
67
+ if (data.length >= 3 &&
68
+ data.charCodeAt(0) === 0xEF &&
69
+ data.charCodeAt(1) === 0xBB &&
70
+ data.charCodeAt(2) === 0xBF) {
71
+ return false;
72
+ }
73
+
74
+ const quoteCode = quote.charCodeAt(0);
75
+ for (let i = 0; i < data.length; i++) {
76
+ const code = data.charCodeAt(i);
77
+ if (code === quoteCode || code === 13 || code > 127) {
78
+ return false;
79
+ }
80
+ }
81
+ return true;
82
+ }
83
+
84
+ function chunksAreSimpleAsciiLf(chunks, quote) {
85
+ const quoteCode = quote.charCodeAt(0);
86
+ for (let i = 0; i < chunks.length; i++) {
87
+ const chunk = chunks[i];
88
+ if (Buffer.isBuffer(chunk)) {
89
+ if (i === 0 &&
90
+ chunk.length >= 3 &&
91
+ chunk[0] === 0xEF &&
92
+ chunk[1] === 0xBB &&
93
+ chunk[2] === 0xBF) {
94
+ return false;
95
+ }
96
+ if (!isAscii(chunk) || chunk.indexOf(quoteCode) !== -1 || chunk.indexOf(13) !== -1) {
97
+ return false;
98
+ }
99
+ } else if (!isSimpleAsciiLf(chunk, quote)) {
100
+ return false;
101
+ }
102
+ }
103
+ return true;
104
+ }
105
+
106
+ function analyzeSingleSimpleChunk(chunk, delimiter, quote) {
107
+ const delimiterCode = delimiter.charCodeAt(0);
108
+ const quoteCode = quote.charCodeAt(0);
109
+ let cols = -1;
110
+ let currentCols = 1;
111
+ let hasData = false;
112
+ let rows = 0;
113
+
114
+ if (Buffer.isBuffer(chunk)) {
115
+ if (chunk.length >= 3 && chunk[0] === 0xEF && chunk[1] === 0xBB && chunk[2] === 0xBF) {
116
+ return { simple: false, uniform: false, rows: 0, cols: 0 };
117
+ }
118
+ for (let i = 0; i < chunk.length; i++) {
119
+ const code = chunk[i];
120
+ if (code === quoteCode || code === 13 || code > 127) {
121
+ return { simple: false, uniform: false, rows: 0, cols: 0 };
122
+ }
123
+ if (code === delimiterCode) {
124
+ currentCols++;
125
+ hasData = true;
126
+ } else if (code === 10) {
127
+ if (cols === -1) {
128
+ cols = currentCols;
129
+ } else if (currentCols !== cols) {
130
+ return { simple: true, uniform: false, rows: 0, cols: 0 };
131
+ }
132
+ rows++;
133
+ currentCols = 1;
134
+ hasData = false;
135
+ } else {
136
+ hasData = true;
137
+ }
138
+ }
139
+ } else {
140
+ if (chunk.length >= 3 &&
141
+ chunk.charCodeAt(0) === 0xEF &&
142
+ chunk.charCodeAt(1) === 0xBB &&
143
+ chunk.charCodeAt(2) === 0xBF) {
144
+ return { simple: false, uniform: false, rows: 0, cols: 0 };
145
+ }
146
+ for (let i = 0; i < chunk.length; i++) {
147
+ const code = chunk.charCodeAt(i);
148
+ if (code === quoteCode || code === 13 || code > 127) {
149
+ return { simple: false, uniform: false, rows: 0, cols: 0 };
150
+ }
151
+ if (code === delimiterCode) {
152
+ currentCols++;
153
+ hasData = true;
154
+ } else if (code === 10) {
155
+ if (cols === -1) {
156
+ cols = currentCols;
157
+ } else if (currentCols !== cols) {
158
+ return { simple: true, uniform: false, rows: 0, cols: 0 };
159
+ }
160
+ rows++;
161
+ currentCols = 1;
162
+ hasData = false;
163
+ } else {
164
+ hasData = true;
165
+ }
166
+ }
167
+ }
168
+
169
+ if (hasData && cols !== -1 && currentCols !== cols) {
170
+ return { simple: true, uniform: false, rows: 0, cols: 0 };
171
+ }
172
+ if (hasData) {
173
+ rows++;
174
+ if (cols === -1) {
175
+ cols = currentCols;
176
+ }
177
+ }
178
+
179
+ return { simple: true, uniform: true, rows, cols: Math.max(cols, 0) };
180
+ }
181
+
182
+ function parseSimpleRows(data, delimiter) {
183
+ if (data.length === 0) {
184
+ return [];
185
+ }
186
+
187
+ const last = data.charCodeAt(data.length - 1);
188
+ const body = last === 10 ? data.slice(0, -1) : data;
189
+ if (body.length === 0) {
190
+ return [['']];
191
+ }
192
+
193
+ const lines = body.split('\n');
194
+ const rows = new Array(lines.length);
195
+ for (let i = 0; i < lines.length; i++) {
196
+ rows[i] = lines[i].split(delimiter);
197
+ }
198
+ return rows;
199
+ }
200
+
201
+ function parseUniformRows(data, delimiter, rowCount, cols) {
202
+ const length = data.length;
203
+ if (length === 0) {
204
+ return [];
205
+ }
206
+
207
+ let bodyEnd = length;
208
+ if (data.charCodeAt(bodyEnd - 1) === 10) {
209
+ bodyEnd--;
210
+ }
211
+ if (bodyEnd === 0) {
212
+ return [['']];
213
+ }
214
+
215
+ const hasTrailingEmptyRow = data.charCodeAt(bodyEnd - 1) === 10;
216
+ const usePrealloc = rowCount > 0 && cols > 0;
217
+ if (!usePrealloc) {
218
+ cols = 1;
219
+ for (let i = 0; i < bodyEnd && data.charCodeAt(i) !== 10; i++) {
220
+ if (data[i] === delimiter) {
221
+ cols++;
222
+ }
223
+ }
224
+ }
225
+ const rows = usePrealloc ? new Array(rowCount) : [];
226
+ let rowIdx = 0;
227
+ let pos = 0;
228
+ while (pos < bodyEnd) {
229
+ const row = new Array(cols);
230
+ for (let col = 0; col < cols - 1; col++) {
231
+ const next = data.indexOf(delimiter, pos);
232
+ row[col] = data.slice(pos, next);
233
+ pos = next + 1;
234
+ }
235
+
236
+ let lineEnd = data.indexOf('\n', pos);
237
+ if (lineEnd === -1 || lineEnd > bodyEnd) {
238
+ lineEnd = bodyEnd;
239
+ }
240
+ row[cols - 1] = data.slice(pos, lineEnd);
241
+ if (usePrealloc) {
242
+ rows[rowIdx++] = row;
243
+ } else {
244
+ rows.push(row);
245
+ }
246
+ pos = lineEnd + 1;
247
+ }
248
+
249
+ if (hasTrailingEmptyRow) {
250
+ const row = new Array(cols);
251
+ for (let col = 0; col < cols; col++) {
252
+ row[col] = '';
253
+ }
254
+ if (usePrealloc) {
255
+ rows[rowIdx] = row;
256
+ } else {
257
+ rows.push(row);
258
+ }
259
+ }
260
+
261
+ return rows;
262
+ }
263
+
264
+ function wrapAddon(addon) {
265
+ const NativeParser = addon.cisvParser;
266
+
267
+ class cisvParser extends NativeParser {
268
+ constructor(options) {
269
+ super(options);
270
+ this._cisvFastConfig = fastConfigFromOptions(options);
271
+ this._cisvFastChunks = [];
272
+ this._cisvFastRows = null;
273
+ this._cisvNativeStream = false;
274
+ }
275
+
276
+ _flushFastChunksToNative() {
277
+ if (this._cisvFastChunks.length === 0) {
278
+ return;
279
+ }
280
+ const chunks = this._cisvFastChunks;
281
+ this._cisvFastChunks = [];
282
+ this._cisvNativeStream = true;
283
+ for (let i = 0; i < chunks.length; i++) {
284
+ super.write(chunks[i]);
285
+ }
286
+ }
287
+
288
+ write(chunk) {
289
+ this._cisvFastRows = null;
290
+ if (this._cisvFastConfig &&
291
+ !this._cisvNativeStream &&
292
+ (Buffer.isBuffer(chunk) || typeof chunk === 'string')) {
293
+ this._cisvFastChunks.push(chunk);
294
+ return;
295
+ }
296
+
297
+ this._flushFastChunksToNative();
298
+ this._cisvNativeStream = true;
299
+ return super.write(chunk);
300
+ }
301
+
302
+ end() {
303
+ if (this._cisvFastConfig &&
304
+ !this._cisvNativeStream &&
305
+ this._cisvFastChunks.length > 0) {
306
+ let uniform = false;
307
+ let simple = false;
308
+
309
+ if (this._cisvFastChunks.length === 1) {
310
+ const analysis = analyzeSingleSimpleChunk(
311
+ this._cisvFastChunks[0],
312
+ this._cisvFastConfig.delimiter,
313
+ this._cisvFastConfig.quote);
314
+ simple = analysis.simple;
315
+ uniform = analysis.uniform;
316
+ var rowCount = analysis.rows;
317
+ var colCount = analysis.cols;
318
+ } else {
319
+ simple = chunksAreSimpleAsciiLf(this._cisvFastChunks, this._cisvFastConfig.quote);
320
+ }
321
+
322
+ if (simple) {
323
+ const data = chunksToLatin1String(this._cisvFastChunks);
324
+ const useLargePrealloc = data.length >= 64 * 1024 * 1024;
325
+ this._cisvFastRows = uniform
326
+ ? parseUniformRows(
327
+ data,
328
+ this._cisvFastConfig.delimiter,
329
+ useLargePrealloc ? rowCount : 0,
330
+ useLargePrealloc ? colCount : 0)
331
+ : parseSimpleRows(data, this._cisvFastConfig.delimiter);
332
+ this._cisvFastChunks = [];
333
+ return;
334
+ }
335
+ }
336
+
337
+ this._flushFastChunksToNative();
338
+ this._cisvNativeStream = true;
339
+ return super.end();
340
+ }
341
+
342
+ getRows() {
343
+ if (this._cisvFastRows !== null) {
344
+ return this._cisvFastRows;
345
+ }
346
+ return super.getRows();
347
+ }
348
+
349
+ clear() {
350
+ this._cisvFastChunks = [];
351
+ this._cisvFastRows = null;
352
+ this._cisvNativeStream = false;
353
+ return super.clear();
354
+ }
355
+
356
+ setConfig(options) {
357
+ this._flushFastChunksToNative();
358
+ this._cisvFastRows = null;
359
+ this._cisvFastConfig = fastConfigFromOptions(options);
360
+ return super.setConfig(options);
361
+ }
362
+
363
+ transform(...args) {
364
+ this._flushFastChunksToNative();
365
+ this._cisvFastConfig = null;
366
+ return super.transform(...args);
367
+ }
368
+
369
+ transformByName(...args) {
370
+ this._flushFastChunksToNative();
371
+ this._cisvFastConfig = null;
372
+ return super.transformByName(...args);
373
+ }
374
+
375
+ destroy() {
376
+ this._cisvFastChunks = [];
377
+ this._cisvFastRows = null;
378
+ return super.destroy();
379
+ }
380
+ }
381
+
382
+ return {
383
+ ...addon,
384
+ cisvParser,
385
+ };
386
+ }
387
+
388
+ module.exports = { wrapAddon };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "cisv",
3
- "version": "0.4.9",
3
+ "version": "0.4.11",
4
4
  "description": "The csv parser on steroids.",
5
5
  "author": "sanix<s4nixd@gmail.com>",
6
6
  "main": "./build/Release/cisv.node",