cisv 0.4.9 → 0.4.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/Release/cisv.node +0 -0
- package/cisv/cisv_addon.cc +291 -11
- package/cisv/index.js +2 -1
- package/cisv/index.mjs +6 -2
- package/cisv/wrapper.js +371 -0
- package/package.json +1 -1
package/build/Release/cisv.node
CHANGED
|
Binary file
|
package/cisv/cisv_addon.cc
CHANGED
|
@@ -344,6 +344,136 @@ static Napi::String SafeNewString(Napi::Env env, const char* data, size_t len) {
|
|
|
344
344
|
return Napi::String(env, SafeNewStringValue(env, data, len));
|
|
345
345
|
}
|
|
346
346
|
|
|
347
|
+
static napi_value NewLatin1StringValue(napi_env env, const char* data, size_t len) {
|
|
348
|
+
napi_value value = nullptr;
|
|
349
|
+
if (napi_create_string_latin1(env, data, len, &value) == napi_ok && value) {
|
|
350
|
+
return value;
|
|
351
|
+
}
|
|
352
|
+
napi_create_string_utf8(env, data, len, &value);
|
|
353
|
+
return value;
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
static napi_value NewCsvStringValue(
|
|
357
|
+
napi_env env,
|
|
358
|
+
const char* data,
|
|
359
|
+
size_t len,
|
|
360
|
+
bool ascii_only
|
|
361
|
+
) {
|
|
362
|
+
return ascii_only ? NewLatin1StringValue(env, data, len) : SafeNewStringValue(env, data, len);
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
static bool rowsAreAscii(const std::vector<std::vector<std::string>> &rows) {
|
|
366
|
+
for (const auto &row : rows) {
|
|
367
|
+
for (const auto &field : row) {
|
|
368
|
+
if (!isAllAscii(field.data(), field.size())) {
|
|
369
|
+
return false;
|
|
370
|
+
}
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
return true;
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
static bool canUseSimpleLfFastPath(const cisv_config &config) {
|
|
377
|
+
return config.escape == '\0' &&
|
|
378
|
+
config.comment == '\0' &&
|
|
379
|
+
!config.trim &&
|
|
380
|
+
!config.skip_empty_lines &&
|
|
381
|
+
!config.relaxed &&
|
|
382
|
+
!config.skip_lines_with_error &&
|
|
383
|
+
config.max_row_size == 0 &&
|
|
384
|
+
config.from_line <= 1 &&
|
|
385
|
+
config.to_line == 0;
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
static bool tryParseSimpleLfToJsRows(
|
|
389
|
+
napi_env env,
|
|
390
|
+
const uint8_t *data,
|
|
391
|
+
size_t len,
|
|
392
|
+
const cisv_config &config,
|
|
393
|
+
napi_value *out
|
|
394
|
+
) {
|
|
395
|
+
*out = nullptr;
|
|
396
|
+
if (!data || !canUseSimpleLfFastPath(config)) {
|
|
397
|
+
return false;
|
|
398
|
+
}
|
|
399
|
+
if (len >= 3 && data[0] == 0xEF && data[1] == 0xBB && data[2] == 0xBF) {
|
|
400
|
+
return false;
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
size_t row_count = 0;
|
|
404
|
+
bool saw_data_in_row = false;
|
|
405
|
+
for (size_t i = 0; i < len; i++) {
|
|
406
|
+
const uint8_t c = data[i];
|
|
407
|
+
if (c == static_cast<uint8_t>(config.quote) || c == '\r' || (c & 0x80)) {
|
|
408
|
+
return false;
|
|
409
|
+
}
|
|
410
|
+
if (c == '\n') {
|
|
411
|
+
row_count++;
|
|
412
|
+
saw_data_in_row = false;
|
|
413
|
+
} else {
|
|
414
|
+
saw_data_in_row = true;
|
|
415
|
+
}
|
|
416
|
+
}
|
|
417
|
+
if (saw_data_in_row || (len > 0 && data[len - 1] != '\n')) {
|
|
418
|
+
row_count++;
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
napi_value rows;
|
|
422
|
+
if (napi_create_array_with_length(env, row_count, &rows) != napi_ok) {
|
|
423
|
+
return false;
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
size_t row_idx = 0;
|
|
427
|
+
size_t line_start = 0;
|
|
428
|
+
while (line_start < len && row_idx < row_count) {
|
|
429
|
+
size_t line_end = line_start;
|
|
430
|
+
while (line_end < len && data[line_end] != '\n') {
|
|
431
|
+
line_end++;
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
size_t field_count = 1;
|
|
435
|
+
for (size_t i = line_start; i < line_end; i++) {
|
|
436
|
+
if (data[i] == static_cast<uint8_t>(config.delimiter)) {
|
|
437
|
+
field_count++;
|
|
438
|
+
}
|
|
439
|
+
}
|
|
440
|
+
|
|
441
|
+
napi_value row;
|
|
442
|
+
if (napi_create_array_with_length(env, field_count, &row) != napi_ok) {
|
|
443
|
+
return false;
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
size_t field_idx = 0;
|
|
447
|
+
size_t field_start = line_start;
|
|
448
|
+
for (size_t i = line_start; i <= line_end; i++) {
|
|
449
|
+
if (i == line_end || data[i] == static_cast<uint8_t>(config.delimiter)) {
|
|
450
|
+
napi_value field = NewLatin1StringValue(
|
|
451
|
+
env,
|
|
452
|
+
reinterpret_cast<const char*>(data + field_start),
|
|
453
|
+
i - field_start);
|
|
454
|
+
if (!field || napi_set_element(env, row, field_idx, field) != napi_ok) {
|
|
455
|
+
return false;
|
|
456
|
+
}
|
|
457
|
+
field_idx++;
|
|
458
|
+
field_start = i + 1;
|
|
459
|
+
}
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
if (napi_set_element(env, rows, row_idx, row) != napi_ok) {
|
|
463
|
+
return false;
|
|
464
|
+
}
|
|
465
|
+
row_idx++;
|
|
466
|
+
|
|
467
|
+
if (line_end == len) {
|
|
468
|
+
break;
|
|
469
|
+
}
|
|
470
|
+
line_start = line_end + 1;
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
*out = rows;
|
|
474
|
+
return true;
|
|
475
|
+
}
|
|
476
|
+
|
|
347
477
|
// Extended RowCollector that handles transforms
|
|
348
478
|
struct RowCollector {
|
|
349
479
|
std::vector<std::string> current;
|
|
@@ -550,11 +680,16 @@ static bool collectParallelRows(
|
|
|
550
680
|
|
|
551
681
|
static Napi::Array rowsToJsArray(Napi::Env env, const std::vector<std::vector<std::string>> &rows) {
|
|
552
682
|
Napi::Array out = Napi::Array::New(env, rows.size());
|
|
683
|
+
const bool ascii_only = rowsAreAscii(rows);
|
|
553
684
|
for (size_t i = 0; i < rows.size(); i++) {
|
|
554
685
|
Napi::Array row = Napi::Array::New(env, rows[i].size());
|
|
555
686
|
for (size_t j = 0; j < rows[i].size(); j++) {
|
|
556
687
|
const std::string &field = rows[i][j];
|
|
557
|
-
|
|
688
|
+
napi_set_element(
|
|
689
|
+
env,
|
|
690
|
+
row,
|
|
691
|
+
j,
|
|
692
|
+
NewCsvStringValue(env, field.c_str(), field.length(), ascii_only));
|
|
558
693
|
}
|
|
559
694
|
out[i] = row;
|
|
560
695
|
}
|
|
@@ -720,6 +855,8 @@ public:
|
|
|
720
855
|
iterator_ = nullptr;
|
|
721
856
|
batch_result_ = nullptr;
|
|
722
857
|
stream_buffering_active_ = true;
|
|
858
|
+
pending_buffer_data_ = nullptr;
|
|
859
|
+
pending_buffer_size_ = 0;
|
|
723
860
|
|
|
724
861
|
// Initialize configuration with defaults
|
|
725
862
|
cisv_config_init(&config_);
|
|
@@ -834,6 +971,9 @@ public:
|
|
|
834
971
|
rc_ = nullptr;
|
|
835
972
|
}
|
|
836
973
|
clearBatchResult();
|
|
974
|
+
clearFastRows();
|
|
975
|
+
pending_stream_.clear();
|
|
976
|
+
clearPendingBuffer();
|
|
837
977
|
is_destroyed_ = true;
|
|
838
978
|
}
|
|
839
979
|
}
|
|
@@ -1011,6 +1151,7 @@ public:
|
|
|
1011
1151
|
|
|
1012
1152
|
// Streaming writes produce row-callback data, not batch results.
|
|
1013
1153
|
clearBatchResult();
|
|
1154
|
+
clearFastRows();
|
|
1014
1155
|
|
|
1015
1156
|
// Set environment for JS transforms
|
|
1016
1157
|
rc_->env = env;
|
|
@@ -1040,11 +1181,29 @@ public:
|
|
|
1040
1181
|
// Buffer chunks when no transforms/iterator are active and batch-parse on end().
|
|
1041
1182
|
// If buffered payload exceeds threshold, flush once to parser and continue streaming.
|
|
1042
1183
|
if (!hasTransforms() && iterator_ == nullptr) {
|
|
1043
|
-
if (chunk_size > SIZE_MAX - pending_stream_.size()) {
|
|
1044
|
-
throw Napi::Error::New(env, "Buffered stream size would overflow");
|
|
1045
|
-
}
|
|
1046
|
-
|
|
1047
1184
|
if (stream_buffering_active_) {
|
|
1185
|
+
if (chunk_size > 0 && pending_stream_.empty() && pending_buffer_size_ == 0 && info[0].IsBuffer()) {
|
|
1186
|
+
auto buf = info[0].As<Napi::Buffer<uint8_t>>();
|
|
1187
|
+
pending_buffer_ref_ = Napi::Persistent(buf.As<Napi::Object>());
|
|
1188
|
+
pending_buffer_data_ = buf.Data();
|
|
1189
|
+
pending_buffer_size_ = buf.Length();
|
|
1190
|
+
total_bytes_ += chunk_size;
|
|
1191
|
+
return;
|
|
1192
|
+
}
|
|
1193
|
+
|
|
1194
|
+
if (pending_buffer_size_ > 0) {
|
|
1195
|
+
if (pending_buffer_size_ > SIZE_MAX - pending_stream_.size()) {
|
|
1196
|
+
throw Napi::Error::New(env, "Buffered stream size would overflow");
|
|
1197
|
+
}
|
|
1198
|
+
pending_stream_.append(
|
|
1199
|
+
reinterpret_cast<const char*>(pending_buffer_data_),
|
|
1200
|
+
pending_buffer_size_);
|
|
1201
|
+
clearPendingBuffer();
|
|
1202
|
+
}
|
|
1203
|
+
|
|
1204
|
+
if (chunk_size > SIZE_MAX - pending_stream_.size()) {
|
|
1205
|
+
throw Napi::Error::New(env, "Buffered stream size would overflow");
|
|
1206
|
+
}
|
|
1048
1207
|
pending_stream_.append(reinterpret_cast<const char*>(chunk_data), chunk_size);
|
|
1049
1208
|
total_bytes_ += chunk_size;
|
|
1050
1209
|
|
|
@@ -1054,7 +1213,7 @@ public:
|
|
|
1054
1213
|
}
|
|
1055
1214
|
return;
|
|
1056
1215
|
}
|
|
1057
|
-
} else if (!pending_stream_.empty()) {
|
|
1216
|
+
} else if (!pending_stream_.empty() || pending_buffer_size_ > 0) {
|
|
1058
1217
|
flushPendingStreamToParser();
|
|
1059
1218
|
stream_buffering_active_ = false;
|
|
1060
1219
|
}
|
|
@@ -1072,6 +1231,21 @@ public:
|
|
|
1072
1231
|
if (stream_buffering_active_ && !pending_stream_.empty() &&
|
|
1073
1232
|
!hasTransforms() && iterator_ == nullptr &&
|
|
1074
1233
|
rc_ && rc_->rows.empty() && rc_->current.empty()) {
|
|
1234
|
+
napi_value fast_rows = nullptr;
|
|
1235
|
+
if (tryParseSimpleLfToJsRows(
|
|
1236
|
+
info.Env(),
|
|
1237
|
+
reinterpret_cast<const uint8_t*>(pending_stream_.data()),
|
|
1238
|
+
pending_stream_.size(),
|
|
1239
|
+
config_,
|
|
1240
|
+
&fast_rows)) {
|
|
1241
|
+
clearBatchResult();
|
|
1242
|
+
clearFastRows();
|
|
1243
|
+
fast_rows_ref_ = Napi::Persistent(Napi::Value(info.Env(), fast_rows).As<Napi::Object>());
|
|
1244
|
+
pending_stream_.clear();
|
|
1245
|
+
rc_->env = nullptr;
|
|
1246
|
+
return;
|
|
1247
|
+
}
|
|
1248
|
+
|
|
1075
1249
|
cisv_result_t *batch = cisv_parse_string_batch(
|
|
1076
1250
|
pending_stream_.data(), pending_stream_.size(), &config_);
|
|
1077
1251
|
if (!batch) {
|
|
@@ -1083,13 +1257,55 @@ public:
|
|
|
1083
1257
|
throw Napi::Error::New(info.Env(), msg);
|
|
1084
1258
|
}
|
|
1085
1259
|
clearBatchResult();
|
|
1260
|
+
clearFastRows();
|
|
1086
1261
|
batch_result_ = batch;
|
|
1087
1262
|
pending_stream_.clear();
|
|
1088
1263
|
rc_->env = nullptr;
|
|
1089
1264
|
return;
|
|
1090
1265
|
}
|
|
1091
1266
|
|
|
1092
|
-
if (
|
|
1267
|
+
if (stream_buffering_active_ && pending_buffer_size_ > 0 &&
|
|
1268
|
+
pending_stream_.empty() &&
|
|
1269
|
+
!hasTransforms() && iterator_ == nullptr &&
|
|
1270
|
+
rc_ && rc_->rows.empty() && rc_->current.empty()) {
|
|
1271
|
+
napi_value fast_rows = nullptr;
|
|
1272
|
+
if (tryParseSimpleLfToJsRows(
|
|
1273
|
+
info.Env(),
|
|
1274
|
+
pending_buffer_data_,
|
|
1275
|
+
pending_buffer_size_,
|
|
1276
|
+
config_,
|
|
1277
|
+
&fast_rows)) {
|
|
1278
|
+
clearBatchResult();
|
|
1279
|
+
clearFastRows();
|
|
1280
|
+
fast_rows_ref_ = Napi::Persistent(Napi::Value(info.Env(), fast_rows).As<Napi::Object>());
|
|
1281
|
+
clearPendingBuffer();
|
|
1282
|
+
rc_->env = nullptr;
|
|
1283
|
+
return;
|
|
1284
|
+
}
|
|
1285
|
+
|
|
1286
|
+
cisv_result_t *batch = cisv_parse_string_batch(
|
|
1287
|
+
reinterpret_cast<const char*>(pending_buffer_data_),
|
|
1288
|
+
pending_buffer_size_,
|
|
1289
|
+
&config_);
|
|
1290
|
+
if (!batch) {
|
|
1291
|
+
clearPendingBuffer();
|
|
1292
|
+
throw Napi::Error::New(info.Env(), "parse error: " + std::string(strerror(errno)));
|
|
1293
|
+
}
|
|
1294
|
+
if (batch->error_code != 0) {
|
|
1295
|
+
std::string msg = batch->error_message[0] ? batch->error_message : "parse error";
|
|
1296
|
+
cisv_result_free(batch);
|
|
1297
|
+
clearPendingBuffer();
|
|
1298
|
+
throw Napi::Error::New(info.Env(), msg);
|
|
1299
|
+
}
|
|
1300
|
+
clearBatchResult();
|
|
1301
|
+
clearFastRows();
|
|
1302
|
+
batch_result_ = batch;
|
|
1303
|
+
clearPendingBuffer();
|
|
1304
|
+
rc_->env = nullptr;
|
|
1305
|
+
return;
|
|
1306
|
+
}
|
|
1307
|
+
|
|
1308
|
+
if (!pending_stream_.empty() || pending_buffer_size_ > 0) {
|
|
1093
1309
|
flushPendingStreamToParser();
|
|
1094
1310
|
stream_buffering_active_ = false;
|
|
1095
1311
|
}
|
|
@@ -1111,18 +1327,24 @@ public:
|
|
|
1111
1327
|
flushPendingStreamToParser();
|
|
1112
1328
|
stream_buffering_active_ = false;
|
|
1113
1329
|
}
|
|
1330
|
+
if (pending_buffer_size_ > 0) {
|
|
1331
|
+
flushPendingStreamToParser();
|
|
1332
|
+
stream_buffering_active_ = false;
|
|
1333
|
+
}
|
|
1114
1334
|
return drainRows(info.Env());
|
|
1115
1335
|
}
|
|
1116
1336
|
|
|
1117
1337
|
void Clear(const Napi::CallbackInfo &info) {
|
|
1118
1338
|
if (!is_destroyed_ && rc_) {
|
|
1119
1339
|
clearBatchResult();
|
|
1340
|
+
clearFastRows();
|
|
1120
1341
|
rc_->rows.clear();
|
|
1121
1342
|
rc_->current.clear();
|
|
1122
1343
|
rc_->current_field_index = 0;
|
|
1123
1344
|
total_bytes_ = 0;
|
|
1124
1345
|
parse_time_ = 0;
|
|
1125
1346
|
pending_stream_.clear();
|
|
1347
|
+
clearPendingBuffer();
|
|
1126
1348
|
stream_buffering_active_ = true;
|
|
1127
1349
|
// Also clear the environment reference
|
|
1128
1350
|
rc_->env = nullptr;
|
|
@@ -1661,7 +1883,16 @@ Napi::Value RemoveTransformByName(const Napi::CallbackInfo &info) {
|
|
|
1661
1883
|
Napi::Object stats = Napi::Object::New(env);
|
|
1662
1884
|
size_t row_count = 0;
|
|
1663
1885
|
size_t field_count = 0;
|
|
1664
|
-
if (
|
|
1886
|
+
if (!fast_rows_ref_.IsEmpty()) {
|
|
1887
|
+
Napi::Array rows = fast_rows_ref_.Value().As<Napi::Array>();
|
|
1888
|
+
row_count = rows.Length();
|
|
1889
|
+
if (row_count > 0) {
|
|
1890
|
+
Napi::Value first = rows.Get(static_cast<uint32_t>(0));
|
|
1891
|
+
if (first.IsArray()) {
|
|
1892
|
+
field_count = first.As<Napi::Array>().Length();
|
|
1893
|
+
}
|
|
1894
|
+
}
|
|
1895
|
+
} else if (batch_result_) {
|
|
1665
1896
|
row_count = batch_result_->row_count;
|
|
1666
1897
|
if (batch_result_->row_count > 0) {
|
|
1667
1898
|
field_count = batch_result_->rows[0].field_count;
|
|
@@ -1793,8 +2024,15 @@ Napi::Value RemoveTransformByName(const Napi::CallbackInfo &info) {
|
|
|
1793
2024
|
|
|
1794
2025
|
napi_value row;
|
|
1795
2026
|
napi_create_array_with_length(env, field_count, &row);
|
|
2027
|
+
bool ascii_only = true;
|
|
1796
2028
|
for (size_t i = 0; i < field_count; i++) {
|
|
1797
|
-
|
|
2029
|
+
if (!isAllAscii(fields[i], lengths[i])) {
|
|
2030
|
+
ascii_only = false;
|
|
2031
|
+
break;
|
|
2032
|
+
}
|
|
2033
|
+
}
|
|
2034
|
+
for (size_t i = 0; i < field_count; i++) {
|
|
2035
|
+
napi_set_element(env, row, i, NewCsvStringValue(env, fields[i], lengths[i], ascii_only));
|
|
1798
2036
|
}
|
|
1799
2037
|
|
|
1800
2038
|
return Napi::Value(env, row);
|
|
@@ -1843,6 +2081,20 @@ private:
|
|
|
1843
2081
|
}
|
|
1844
2082
|
}
|
|
1845
2083
|
|
|
2084
|
+
void clearFastRows() {
|
|
2085
|
+
if (!fast_rows_ref_.IsEmpty()) {
|
|
2086
|
+
fast_rows_ref_.Reset();
|
|
2087
|
+
}
|
|
2088
|
+
}
|
|
2089
|
+
|
|
2090
|
+
void clearPendingBuffer() {
|
|
2091
|
+
if (!pending_buffer_ref_.IsEmpty()) {
|
|
2092
|
+
pending_buffer_ref_.Reset();
|
|
2093
|
+
}
|
|
2094
|
+
pending_buffer_data_ = nullptr;
|
|
2095
|
+
pending_buffer_size_ = 0;
|
|
2096
|
+
}
|
|
2097
|
+
|
|
1846
2098
|
bool hasTransforms() const {
|
|
1847
2099
|
bool has_c_transforms = rc_ && rc_->pipeline && rc_->pipeline->count > 0;
|
|
1848
2100
|
bool has_js_transforms = rc_ && !rc_->js_transforms.empty();
|
|
@@ -1851,7 +2103,9 @@ private:
|
|
|
1851
2103
|
|
|
1852
2104
|
void resetRowState() {
|
|
1853
2105
|
clearBatchResult();
|
|
2106
|
+
clearFastRows();
|
|
1854
2107
|
pending_stream_.clear();
|
|
2108
|
+
clearPendingBuffer();
|
|
1855
2109
|
stream_buffering_active_ = true;
|
|
1856
2110
|
if (!rc_) return;
|
|
1857
2111
|
rc_->rows.clear();
|
|
@@ -1860,6 +2114,11 @@ private:
|
|
|
1860
2114
|
}
|
|
1861
2115
|
|
|
1862
2116
|
void flushPendingStreamToParser() {
|
|
2117
|
+
if (pending_buffer_size_ > 0) {
|
|
2118
|
+
ensureParser(Env());
|
|
2119
|
+
cisv_parser_write(parser_, pending_buffer_data_, pending_buffer_size_);
|
|
2120
|
+
clearPendingBuffer();
|
|
2121
|
+
}
|
|
1863
2122
|
if (pending_stream_.empty()) {
|
|
1864
2123
|
return;
|
|
1865
2124
|
}
|
|
@@ -1888,15 +2147,27 @@ private:
|
|
|
1888
2147
|
}
|
|
1889
2148
|
|
|
1890
2149
|
Napi::Value drainRows(Napi::Env env) {
|
|
2150
|
+
if (!fast_rows_ref_.IsEmpty()) {
|
|
2151
|
+
return fast_rows_ref_.Value();
|
|
2152
|
+
}
|
|
2153
|
+
|
|
1891
2154
|
if (batch_result_) {
|
|
1892
2155
|
napi_value rows;
|
|
1893
2156
|
napi_create_array_with_length(env, batch_result_->row_count, &rows);
|
|
2157
|
+
const bool ascii_only =
|
|
2158
|
+
!batch_result_->field_data ||
|
|
2159
|
+
batch_result_->field_data_size == 0 ||
|
|
2160
|
+
isAllAscii(batch_result_->field_data, batch_result_->field_data_size);
|
|
1894
2161
|
for (size_t i = 0; i < batch_result_->row_count; ++i) {
|
|
1895
2162
|
const cisv_row_t *src_row = &batch_result_->rows[i];
|
|
1896
2163
|
napi_value row;
|
|
1897
2164
|
napi_create_array_with_length(env, src_row->field_count, &row);
|
|
1898
2165
|
for (size_t j = 0; j < src_row->field_count; ++j) {
|
|
1899
|
-
napi_set_element(
|
|
2166
|
+
napi_set_element(
|
|
2167
|
+
env,
|
|
2168
|
+
row,
|
|
2169
|
+
j,
|
|
2170
|
+
NewCsvStringValue(env, src_row->fields[j], src_row->field_lengths[j], ascii_only));
|
|
1900
2171
|
}
|
|
1901
2172
|
napi_set_element(env, rows, i, row);
|
|
1902
2173
|
}
|
|
@@ -1909,6 +2180,7 @@ private:
|
|
|
1909
2180
|
|
|
1910
2181
|
napi_value rows;
|
|
1911
2182
|
napi_create_array_with_length(env, rc_->rows.size(), &rows);
|
|
2183
|
+
const bool ascii_only = rowsAreAscii(rc_->rows);
|
|
1912
2184
|
|
|
1913
2185
|
for (size_t i = 0; i < rc_->rows.size(); ++i) {
|
|
1914
2186
|
napi_value row;
|
|
@@ -1916,7 +2188,11 @@ private:
|
|
|
1916
2188
|
for (size_t j = 0; j < rc_->rows[i].size(); ++j) {
|
|
1917
2189
|
// SECURITY: Use safe string creation to handle invalid UTF-8 in CSV data
|
|
1918
2190
|
const std::string& field = rc_->rows[i][j];
|
|
1919
|
-
napi_set_element(
|
|
2191
|
+
napi_set_element(
|
|
2192
|
+
env,
|
|
2193
|
+
row,
|
|
2194
|
+
j,
|
|
2195
|
+
NewCsvStringValue(env, field.c_str(), field.length(), ascii_only));
|
|
1920
2196
|
}
|
|
1921
2197
|
napi_set_element(env, rows, i, row);
|
|
1922
2198
|
}
|
|
@@ -1935,7 +2211,11 @@ private:
|
|
|
1935
2211
|
bool is_destroyed_;
|
|
1936
2212
|
cisv_iterator_t *iterator_; // For row-by-row iteration
|
|
1937
2213
|
cisv_result_t *batch_result_;
|
|
2214
|
+
Napi::ObjectReference fast_rows_ref_;
|
|
1938
2215
|
std::string pending_stream_;
|
|
2216
|
+
Napi::ObjectReference pending_buffer_ref_;
|
|
2217
|
+
const uint8_t *pending_buffer_data_;
|
|
2218
|
+
size_t pending_buffer_size_;
|
|
1939
2219
|
bool stream_buffering_active_;
|
|
1940
2220
|
static constexpr size_t kStreamBufferLimitBytes = 8 * 1024 * 1024;
|
|
1941
2221
|
};
|
package/cisv/index.js
CHANGED
package/cisv/index.mjs
CHANGED
|
@@ -7,6 +7,10 @@ const require = createRequire(import.meta.url);
|
|
|
7
7
|
|
|
8
8
|
const gyp = require('node-gyp-build');
|
|
9
9
|
const addon = gyp(path.join(__dirname, '..'));
|
|
10
|
+
const { wrapAddon } = require('./wrapper.js');
|
|
11
|
+
const wrapped = wrapAddon(addon);
|
|
10
12
|
|
|
11
|
-
export const cisvParser =
|
|
12
|
-
export
|
|
13
|
+
export const cisvParser = wrapped.cisvParser;
|
|
14
|
+
export const TransformType = wrapped.TransformType;
|
|
15
|
+
export const version = wrapped.version;
|
|
16
|
+
export default wrapped;
|
package/cisv/wrapper.js
ADDED
|
@@ -0,0 +1,371 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const { isAscii } = require('buffer');
|
|
4
|
+
|
|
5
|
+
function fastConfigFromOptions(options) {
|
|
6
|
+
if (options == null) {
|
|
7
|
+
return { delimiter: ',', quote: '"' };
|
|
8
|
+
}
|
|
9
|
+
if (typeof options !== 'object') {
|
|
10
|
+
return null;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
const delimiter = options.delimiter == null ? ',' : options.delimiter;
|
|
14
|
+
const quote = options.quote == null ? '"' : options.quote;
|
|
15
|
+
if (typeof delimiter !== 'string' || delimiter.length !== 1) {
|
|
16
|
+
return null;
|
|
17
|
+
}
|
|
18
|
+
if (typeof quote !== 'string' || quote.length !== 1) {
|
|
19
|
+
return null;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
if (options.escape != null && options.escape !== '') {
|
|
23
|
+
return null;
|
|
24
|
+
}
|
|
25
|
+
if (options.comment != null && options.comment !== '') {
|
|
26
|
+
return null;
|
|
27
|
+
}
|
|
28
|
+
if (options.trim || options.skipEmptyLines || options.relaxed || options.skipLinesWithError) {
|
|
29
|
+
return null;
|
|
30
|
+
}
|
|
31
|
+
if (options.maxRowSize != null && options.maxRowSize !== 0) {
|
|
32
|
+
return null;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
const fromLine = options.fromLine == null ? 1 : options.fromLine;
|
|
36
|
+
const toLine = options.toLine == null ? 0 : options.toLine;
|
|
37
|
+
if (fromLine !== 0 && fromLine !== 1) {
|
|
38
|
+
return null;
|
|
39
|
+
}
|
|
40
|
+
if (toLine !== 0) {
|
|
41
|
+
return null;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
return { delimiter, quote };
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
function chunkToLatin1String(chunk) {
|
|
48
|
+
return Buffer.isBuffer(chunk) ? chunk.toString('latin1') : chunk;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
function chunksToLatin1String(chunks) {
|
|
52
|
+
if (chunks.length === 1) {
|
|
53
|
+
return chunkToLatin1String(chunks[0]);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
let out = '';
|
|
57
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
58
|
+
out += chunkToLatin1String(chunks[i]);
|
|
59
|
+
}
|
|
60
|
+
return out;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
function isSimpleAsciiLf(data, quote) {
|
|
64
|
+
if (data.length >= 3 &&
|
|
65
|
+
data.charCodeAt(0) === 0xEF &&
|
|
66
|
+
data.charCodeAt(1) === 0xBB &&
|
|
67
|
+
data.charCodeAt(2) === 0xBF) {
|
|
68
|
+
return false;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
const quoteCode = quote.charCodeAt(0);
|
|
72
|
+
for (let i = 0; i < data.length; i++) {
|
|
73
|
+
const code = data.charCodeAt(i);
|
|
74
|
+
if (code === quoteCode || code === 13 || code > 127) {
|
|
75
|
+
return false;
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
return true;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
function chunksAreSimpleAsciiLf(chunks, quote) {
|
|
82
|
+
const quoteCode = quote.charCodeAt(0);
|
|
83
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
84
|
+
const chunk = chunks[i];
|
|
85
|
+
if (Buffer.isBuffer(chunk)) {
|
|
86
|
+
if (i === 0 &&
|
|
87
|
+
chunk.length >= 3 &&
|
|
88
|
+
chunk[0] === 0xEF &&
|
|
89
|
+
chunk[1] === 0xBB &&
|
|
90
|
+
chunk[2] === 0xBF) {
|
|
91
|
+
return false;
|
|
92
|
+
}
|
|
93
|
+
if (!isAscii(chunk) || chunk.indexOf(quoteCode) !== -1 || chunk.indexOf(13) !== -1) {
|
|
94
|
+
return false;
|
|
95
|
+
}
|
|
96
|
+
} else if (!isSimpleAsciiLf(chunk, quote)) {
|
|
97
|
+
return false;
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
return true;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
function analyzeSingleSimpleChunk(chunk, delimiter, quote) {
|
|
104
|
+
const delimiterCode = delimiter.charCodeAt(0);
|
|
105
|
+
const quoteCode = quote.charCodeAt(0);
|
|
106
|
+
let cols = -1;
|
|
107
|
+
let currentCols = 1;
|
|
108
|
+
let hasData = false;
|
|
109
|
+
let rows = 0;
|
|
110
|
+
|
|
111
|
+
if (Buffer.isBuffer(chunk)) {
|
|
112
|
+
if (chunk.length >= 3 && chunk[0] === 0xEF && chunk[1] === 0xBB && chunk[2] === 0xBF) {
|
|
113
|
+
return { simple: false, uniform: false, rows: 0, cols: 0 };
|
|
114
|
+
}
|
|
115
|
+
for (let i = 0; i < chunk.length; i++) {
|
|
116
|
+
const code = chunk[i];
|
|
117
|
+
if (code === quoteCode || code === 13 || code > 127) {
|
|
118
|
+
return { simple: false, uniform: false, rows: 0, cols: 0 };
|
|
119
|
+
}
|
|
120
|
+
if (code === delimiterCode) {
|
|
121
|
+
currentCols++;
|
|
122
|
+
hasData = true;
|
|
123
|
+
} else if (code === 10) {
|
|
124
|
+
if (cols === -1) {
|
|
125
|
+
cols = currentCols;
|
|
126
|
+
} else if (currentCols !== cols) {
|
|
127
|
+
return { simple: true, uniform: false, rows: 0, cols: 0 };
|
|
128
|
+
}
|
|
129
|
+
rows++;
|
|
130
|
+
currentCols = 1;
|
|
131
|
+
hasData = false;
|
|
132
|
+
} else {
|
|
133
|
+
hasData = true;
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
} else {
|
|
137
|
+
if (chunk.length >= 3 &&
|
|
138
|
+
chunk.charCodeAt(0) === 0xEF &&
|
|
139
|
+
chunk.charCodeAt(1) === 0xBB &&
|
|
140
|
+
chunk.charCodeAt(2) === 0xBF) {
|
|
141
|
+
return { simple: false, uniform: false, rows: 0, cols: 0 };
|
|
142
|
+
}
|
|
143
|
+
for (let i = 0; i < chunk.length; i++) {
|
|
144
|
+
const code = chunk.charCodeAt(i);
|
|
145
|
+
if (code === quoteCode || code === 13 || code > 127) {
|
|
146
|
+
return { simple: false, uniform: false, rows: 0, cols: 0 };
|
|
147
|
+
}
|
|
148
|
+
if (code === delimiterCode) {
|
|
149
|
+
currentCols++;
|
|
150
|
+
hasData = true;
|
|
151
|
+
} else if (code === 10) {
|
|
152
|
+
if (cols === -1) {
|
|
153
|
+
cols = currentCols;
|
|
154
|
+
} else if (currentCols !== cols) {
|
|
155
|
+
return { simple: true, uniform: false, rows: 0, cols: 0 };
|
|
156
|
+
}
|
|
157
|
+
rows++;
|
|
158
|
+
currentCols = 1;
|
|
159
|
+
hasData = false;
|
|
160
|
+
} else {
|
|
161
|
+
hasData = true;
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
if (hasData && cols !== -1 && currentCols !== cols) {
|
|
167
|
+
return { simple: true, uniform: false, rows: 0, cols: 0 };
|
|
168
|
+
}
|
|
169
|
+
if (hasData) {
|
|
170
|
+
rows++;
|
|
171
|
+
if (cols === -1) {
|
|
172
|
+
cols = currentCols;
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
return { simple: true, uniform: true, rows, cols: Math.max(cols, 0) };
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
function parseSimpleRows(data, delimiter) {
|
|
180
|
+
if (data.length === 0) {
|
|
181
|
+
return [];
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
const last = data.charCodeAt(data.length - 1);
|
|
185
|
+
const body = last === 10 ? data.slice(0, -1) : data;
|
|
186
|
+
if (body.length === 0) {
|
|
187
|
+
return [['']];
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
const lines = body.split('\n');
|
|
191
|
+
const rows = new Array(lines.length);
|
|
192
|
+
for (let i = 0; i < lines.length; i++) {
|
|
193
|
+
rows[i] = lines[i].split(delimiter);
|
|
194
|
+
}
|
|
195
|
+
return rows;
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
function parseUniformRows(data, delimiter, rowCount, cols) {
|
|
199
|
+
let end = data.length;
|
|
200
|
+
if (end === 0) {
|
|
201
|
+
return [];
|
|
202
|
+
}
|
|
203
|
+
if (data.charCodeAt(end - 1) === 10) {
|
|
204
|
+
end--;
|
|
205
|
+
}
|
|
206
|
+
if (end === 0) {
|
|
207
|
+
return [['']];
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
const usePrealloc = rowCount > 0 && cols > 0;
|
|
211
|
+
if (!usePrealloc) {
|
|
212
|
+
cols = 1;
|
|
213
|
+
for (let i = 0; i < end && data.charCodeAt(i) !== 10; i++) {
|
|
214
|
+
if (data[i] === delimiter) {
|
|
215
|
+
cols++;
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
const rows = usePrealloc ? new Array(rowCount) : [];
|
|
221
|
+
let rowIdx = 0;
|
|
222
|
+
let pos = 0;
|
|
223
|
+
while (pos < end) {
|
|
224
|
+
const row = new Array(cols);
|
|
225
|
+
for (let col = 0; col < cols - 1; col++) {
|
|
226
|
+
const next = data.indexOf(delimiter, pos);
|
|
227
|
+
row[col] = data.slice(pos, next);
|
|
228
|
+
pos = next + 1;
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
let lineEnd = data.indexOf('\n', pos);
|
|
232
|
+
if (lineEnd === -1 || lineEnd > end) {
|
|
233
|
+
lineEnd = end;
|
|
234
|
+
}
|
|
235
|
+
row[cols - 1] = data.slice(pos, lineEnd);
|
|
236
|
+
if (usePrealloc) {
|
|
237
|
+
rows[rowIdx++] = row;
|
|
238
|
+
} else {
|
|
239
|
+
rows.push(row);
|
|
240
|
+
}
|
|
241
|
+
pos = lineEnd + 1;
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
return rows;
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
function wrapAddon(addon) {
|
|
248
|
+
const NativeParser = addon.cisvParser;
|
|
249
|
+
|
|
250
|
+
class cisvParser extends NativeParser {
|
|
251
|
+
constructor(options) {
|
|
252
|
+
super(options);
|
|
253
|
+
this._cisvFastConfig = fastConfigFromOptions(options);
|
|
254
|
+
this._cisvFastChunks = [];
|
|
255
|
+
this._cisvFastRows = null;
|
|
256
|
+
this._cisvNativeStream = false;
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
_flushFastChunksToNative() {
|
|
260
|
+
if (this._cisvFastChunks.length === 0) {
|
|
261
|
+
return;
|
|
262
|
+
}
|
|
263
|
+
const chunks = this._cisvFastChunks;
|
|
264
|
+
this._cisvFastChunks = [];
|
|
265
|
+
this._cisvNativeStream = true;
|
|
266
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
267
|
+
super.write(chunks[i]);
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
write(chunk) {
|
|
272
|
+
this._cisvFastRows = null;
|
|
273
|
+
if (this._cisvFastConfig &&
|
|
274
|
+
!this._cisvNativeStream &&
|
|
275
|
+
(Buffer.isBuffer(chunk) || typeof chunk === 'string')) {
|
|
276
|
+
this._cisvFastChunks.push(chunk);
|
|
277
|
+
return;
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
this._flushFastChunksToNative();
|
|
281
|
+
this._cisvNativeStream = true;
|
|
282
|
+
return super.write(chunk);
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
end() {
|
|
286
|
+
if (this._cisvFastConfig &&
|
|
287
|
+
!this._cisvNativeStream &&
|
|
288
|
+
this._cisvFastChunks.length > 0) {
|
|
289
|
+
let uniform = false;
|
|
290
|
+
let simple = false;
|
|
291
|
+
|
|
292
|
+
if (this._cisvFastChunks.length === 1) {
|
|
293
|
+
const analysis = analyzeSingleSimpleChunk(
|
|
294
|
+
this._cisvFastChunks[0],
|
|
295
|
+
this._cisvFastConfig.delimiter,
|
|
296
|
+
this._cisvFastConfig.quote);
|
|
297
|
+
simple = analysis.simple;
|
|
298
|
+
uniform = analysis.uniform;
|
|
299
|
+
var rowCount = analysis.rows;
|
|
300
|
+
var colCount = analysis.cols;
|
|
301
|
+
} else {
|
|
302
|
+
simple = chunksAreSimpleAsciiLf(this._cisvFastChunks, this._cisvFastConfig.quote);
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
if (simple) {
|
|
306
|
+
const data = chunksToLatin1String(this._cisvFastChunks);
|
|
307
|
+
const useLargePrealloc = data.length >= 64 * 1024 * 1024;
|
|
308
|
+
this._cisvFastRows = uniform
|
|
309
|
+
? parseUniformRows(
|
|
310
|
+
data,
|
|
311
|
+
this._cisvFastConfig.delimiter,
|
|
312
|
+
useLargePrealloc ? rowCount : 0,
|
|
313
|
+
useLargePrealloc ? colCount : 0)
|
|
314
|
+
: parseSimpleRows(data, this._cisvFastConfig.delimiter);
|
|
315
|
+
this._cisvFastChunks = [];
|
|
316
|
+
return;
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
this._flushFastChunksToNative();
|
|
321
|
+
this._cisvNativeStream = true;
|
|
322
|
+
return super.end();
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
getRows() {
|
|
326
|
+
if (this._cisvFastRows !== null) {
|
|
327
|
+
return this._cisvFastRows;
|
|
328
|
+
}
|
|
329
|
+
return super.getRows();
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
clear() {
|
|
333
|
+
this._cisvFastChunks = [];
|
|
334
|
+
this._cisvFastRows = null;
|
|
335
|
+
this._cisvNativeStream = false;
|
|
336
|
+
return super.clear();
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
setConfig(options) {
|
|
340
|
+
this._flushFastChunksToNative();
|
|
341
|
+
this._cisvFastRows = null;
|
|
342
|
+
this._cisvFastConfig = fastConfigFromOptions(options);
|
|
343
|
+
return super.setConfig(options);
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
transform(...args) {
|
|
347
|
+
this._flushFastChunksToNative();
|
|
348
|
+
this._cisvFastConfig = null;
|
|
349
|
+
return super.transform(...args);
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
transformByName(...args) {
|
|
353
|
+
this._flushFastChunksToNative();
|
|
354
|
+
this._cisvFastConfig = null;
|
|
355
|
+
return super.transformByName(...args);
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
destroy() {
|
|
359
|
+
this._cisvFastChunks = [];
|
|
360
|
+
this._cisvFastRows = null;
|
|
361
|
+
return super.destroy();
|
|
362
|
+
}
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
return {
|
|
366
|
+
...addon,
|
|
367
|
+
cisvParser,
|
|
368
|
+
};
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
module.exports = { wrapAddon };
|