cisv 0.4.8 → 0.4.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +0 -1
- package/binding.gyp +0 -1
- package/build/Release/cisv.node +0 -0
- package/cisv/cisv_addon.cc +485 -97
- package/cisv/index.js +2 -1
- package/cisv/index.mjs +6 -2
- package/cisv/types/cisv.d.ts +3 -4
- package/cisv/wrapper.js +371 -0
- package/package.json +1 -1
package/README.md
CHANGED
package/binding.gyp
CHANGED
package/build/Release/cisv.node
CHANGED
|
Binary file
|
package/cisv/cisv_addon.cc
CHANGED
|
@@ -5,8 +5,11 @@
|
|
|
5
5
|
#include <memory>
|
|
6
6
|
#include <string>
|
|
7
7
|
#include <unordered_map>
|
|
8
|
+
#include <algorithm>
|
|
8
9
|
#include <chrono>
|
|
9
10
|
#include <cstdint>
|
|
11
|
+
#include <climits>
|
|
12
|
+
#include <cmath>
|
|
10
13
|
|
|
11
14
|
namespace {
|
|
12
15
|
|
|
@@ -46,6 +49,126 @@ static void ValidateSingleCharOption(
|
|
|
46
49
|
*target = raw[0];
|
|
47
50
|
}
|
|
48
51
|
|
|
52
|
+
static double MaxJsSafeInteger() {
|
|
53
|
+
return 9007199254740991.0;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
static bool IsWholeNumber(double value) {
|
|
57
|
+
return std::isfinite(value) && std::floor(value) == value;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
static void ApplyBooleanOption(
|
|
61
|
+
Napi::Env env,
|
|
62
|
+
const Napi::Object &options,
|
|
63
|
+
const char *option_name,
|
|
64
|
+
bool *target
|
|
65
|
+
) {
|
|
66
|
+
if (!options.Has(option_name)) {
|
|
67
|
+
return;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
Napi::Value value = options.Get(option_name);
|
|
71
|
+
if (!value.IsBoolean()) {
|
|
72
|
+
throw Napi::TypeError::New(env, std::string(option_name) + " must be a boolean");
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
*target = value.As<Napi::Boolean>();
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
static void ApplySizeOption(
|
|
79
|
+
Napi::Env env,
|
|
80
|
+
const Napi::Object &options,
|
|
81
|
+
const char *option_name,
|
|
82
|
+
size_t *target
|
|
83
|
+
) {
|
|
84
|
+
if (!options.Has(option_name)) {
|
|
85
|
+
return;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
Napi::Value value = options.Get(option_name);
|
|
89
|
+
if (value.IsNull() || value.IsUndefined()) {
|
|
90
|
+
*target = 0;
|
|
91
|
+
return;
|
|
92
|
+
}
|
|
93
|
+
if (!value.IsNumber()) {
|
|
94
|
+
throw Napi::TypeError::New(env, std::string(option_name) + " must be a number");
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
double raw = value.As<Napi::Number>().DoubleValue();
|
|
98
|
+
double max_value = static_cast<double>(SIZE_MAX);
|
|
99
|
+
if (!IsWholeNumber(raw) || raw < 0.0 || raw > max_value || raw > MaxJsSafeInteger()) {
|
|
100
|
+
throw Napi::RangeError::New(env, std::string(option_name) + " is out of range");
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
*target = static_cast<size_t>(raw);
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
static void ApplyLineOption(
|
|
107
|
+
Napi::Env env,
|
|
108
|
+
const Napi::Object &options,
|
|
109
|
+
const char *option_name,
|
|
110
|
+
int *target
|
|
111
|
+
) {
|
|
112
|
+
if (!options.Has(option_name)) {
|
|
113
|
+
return;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
Napi::Value value = options.Get(option_name);
|
|
117
|
+
if (value.IsNull() || value.IsUndefined()) {
|
|
118
|
+
*target = 0;
|
|
119
|
+
return;
|
|
120
|
+
}
|
|
121
|
+
if (!value.IsNumber()) {
|
|
122
|
+
throw Napi::TypeError::New(env, std::string(option_name) + " must be a number");
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
double raw = value.As<Napi::Number>().DoubleValue();
|
|
126
|
+
if (!IsWholeNumber(raw) || raw < 0.0 || raw > static_cast<double>(INT_MAX)) {
|
|
127
|
+
throw Napi::RangeError::New(env, std::string(option_name) + " is out of range");
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
*target = static_cast<int>(raw);
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
static void ValidateConfigSemantics(Napi::Env env, const cisv_config &config) {
|
|
134
|
+
if (config.delimiter == config.quote) {
|
|
135
|
+
throw Napi::TypeError::New(env, "delimiter and quote cannot be the same");
|
|
136
|
+
}
|
|
137
|
+
if (config.escape != '\0' && config.escape == config.delimiter) {
|
|
138
|
+
throw Napi::TypeError::New(env, "escape and delimiter cannot be the same");
|
|
139
|
+
}
|
|
140
|
+
if (config.escape != '\0' && config.escape == config.quote) {
|
|
141
|
+
throw Napi::TypeError::New(env, "escape and quote cannot be the same");
|
|
142
|
+
}
|
|
143
|
+
if (config.comment != '\0' &&
|
|
144
|
+
(config.comment == config.delimiter || config.comment == config.quote || config.comment == config.escape)) {
|
|
145
|
+
throw Napi::TypeError::New(env, "comment cannot conflict with delimiter, quote, or escape");
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
int effective_from = config.from_line > 0 ? config.from_line : 1;
|
|
149
|
+
if (config.to_line != 0 && config.to_line < effective_from) {
|
|
150
|
+
throw Napi::RangeError::New(env, "toLine must be >= fromLine");
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
static void ApplyConfigOptions(Napi::Env env, const Napi::Object &options, cisv_config *config) {
|
|
155
|
+
ValidateSingleCharOption(env, options, "delimiter", &config->delimiter);
|
|
156
|
+
ValidateSingleCharOption(env, options, "quote", &config->quote);
|
|
157
|
+
ValidateSingleCharOption(env, options, "escape", &config->escape, true);
|
|
158
|
+
ValidateSingleCharOption(env, options, "comment", &config->comment, true);
|
|
159
|
+
|
|
160
|
+
ApplyBooleanOption(env, options, "skipEmptyLines", &config->skip_empty_lines);
|
|
161
|
+
ApplyBooleanOption(env, options, "trim", &config->trim);
|
|
162
|
+
ApplyBooleanOption(env, options, "relaxed", &config->relaxed);
|
|
163
|
+
ApplyBooleanOption(env, options, "skipLinesWithError", &config->skip_lines_with_error);
|
|
164
|
+
|
|
165
|
+
ApplySizeOption(env, options, "maxRowSize", &config->max_row_size);
|
|
166
|
+
ApplyLineOption(env, options, "fromLine", &config->from_line);
|
|
167
|
+
ApplyLineOption(env, options, "toLine", &config->to_line);
|
|
168
|
+
|
|
169
|
+
ValidateConfigSemantics(env, *config);
|
|
170
|
+
}
|
|
171
|
+
|
|
49
172
|
// =============================================================================
|
|
50
173
|
// SECURITY: UTF-8 validation to prevent V8 crashes on invalid input
|
|
51
174
|
// Invalid UTF-8 data can cause Napi::String::New to throw or crash
|
|
@@ -144,7 +267,7 @@ static napi_value SafeNewStringValue(napi_env env, const char* data, size_t len)
|
|
|
144
267
|
if (napi_create_string_latin1(env, data, len, &short_value) == napi_ok && short_value) {
|
|
145
268
|
return short_value;
|
|
146
269
|
}
|
|
147
|
-
} else {
|
|
270
|
+
} else if (isValidUtf8(data, len)) {
|
|
148
271
|
if (napi_create_string_utf8(env, data, len, &short_value) == napi_ok && short_value) {
|
|
149
272
|
return short_value;
|
|
150
273
|
}
|
|
@@ -221,6 +344,136 @@ static Napi::String SafeNewString(Napi::Env env, const char* data, size_t len) {
|
|
|
221
344
|
return Napi::String(env, SafeNewStringValue(env, data, len));
|
|
222
345
|
}
|
|
223
346
|
|
|
347
|
+
static napi_value NewLatin1StringValue(napi_env env, const char* data, size_t len) {
|
|
348
|
+
napi_value value = nullptr;
|
|
349
|
+
if (napi_create_string_latin1(env, data, len, &value) == napi_ok && value) {
|
|
350
|
+
return value;
|
|
351
|
+
}
|
|
352
|
+
napi_create_string_utf8(env, data, len, &value);
|
|
353
|
+
return value;
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
static napi_value NewCsvStringValue(
|
|
357
|
+
napi_env env,
|
|
358
|
+
const char* data,
|
|
359
|
+
size_t len,
|
|
360
|
+
bool ascii_only
|
|
361
|
+
) {
|
|
362
|
+
return ascii_only ? NewLatin1StringValue(env, data, len) : SafeNewStringValue(env, data, len);
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
static bool rowsAreAscii(const std::vector<std::vector<std::string>> &rows) {
|
|
366
|
+
for (const auto &row : rows) {
|
|
367
|
+
for (const auto &field : row) {
|
|
368
|
+
if (!isAllAscii(field.data(), field.size())) {
|
|
369
|
+
return false;
|
|
370
|
+
}
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
return true;
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
static bool canUseSimpleLfFastPath(const cisv_config &config) {
|
|
377
|
+
return config.escape == '\0' &&
|
|
378
|
+
config.comment == '\0' &&
|
|
379
|
+
!config.trim &&
|
|
380
|
+
!config.skip_empty_lines &&
|
|
381
|
+
!config.relaxed &&
|
|
382
|
+
!config.skip_lines_with_error &&
|
|
383
|
+
config.max_row_size == 0 &&
|
|
384
|
+
config.from_line <= 1 &&
|
|
385
|
+
config.to_line == 0;
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
static bool tryParseSimpleLfToJsRows(
|
|
389
|
+
napi_env env,
|
|
390
|
+
const uint8_t *data,
|
|
391
|
+
size_t len,
|
|
392
|
+
const cisv_config &config,
|
|
393
|
+
napi_value *out
|
|
394
|
+
) {
|
|
395
|
+
*out = nullptr;
|
|
396
|
+
if (!data || !canUseSimpleLfFastPath(config)) {
|
|
397
|
+
return false;
|
|
398
|
+
}
|
|
399
|
+
if (len >= 3 && data[0] == 0xEF && data[1] == 0xBB && data[2] == 0xBF) {
|
|
400
|
+
return false;
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
size_t row_count = 0;
|
|
404
|
+
bool saw_data_in_row = false;
|
|
405
|
+
for (size_t i = 0; i < len; i++) {
|
|
406
|
+
const uint8_t c = data[i];
|
|
407
|
+
if (c == static_cast<uint8_t>(config.quote) || c == '\r' || (c & 0x80)) {
|
|
408
|
+
return false;
|
|
409
|
+
}
|
|
410
|
+
if (c == '\n') {
|
|
411
|
+
row_count++;
|
|
412
|
+
saw_data_in_row = false;
|
|
413
|
+
} else {
|
|
414
|
+
saw_data_in_row = true;
|
|
415
|
+
}
|
|
416
|
+
}
|
|
417
|
+
if (saw_data_in_row || (len > 0 && data[len - 1] != '\n')) {
|
|
418
|
+
row_count++;
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
napi_value rows;
|
|
422
|
+
if (napi_create_array_with_length(env, row_count, &rows) != napi_ok) {
|
|
423
|
+
return false;
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
size_t row_idx = 0;
|
|
427
|
+
size_t line_start = 0;
|
|
428
|
+
while (line_start < len && row_idx < row_count) {
|
|
429
|
+
size_t line_end = line_start;
|
|
430
|
+
while (line_end < len && data[line_end] != '\n') {
|
|
431
|
+
line_end++;
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
size_t field_count = 1;
|
|
435
|
+
for (size_t i = line_start; i < line_end; i++) {
|
|
436
|
+
if (data[i] == static_cast<uint8_t>(config.delimiter)) {
|
|
437
|
+
field_count++;
|
|
438
|
+
}
|
|
439
|
+
}
|
|
440
|
+
|
|
441
|
+
napi_value row;
|
|
442
|
+
if (napi_create_array_with_length(env, field_count, &row) != napi_ok) {
|
|
443
|
+
return false;
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
size_t field_idx = 0;
|
|
447
|
+
size_t field_start = line_start;
|
|
448
|
+
for (size_t i = line_start; i <= line_end; i++) {
|
|
449
|
+
if (i == line_end || data[i] == static_cast<uint8_t>(config.delimiter)) {
|
|
450
|
+
napi_value field = NewLatin1StringValue(
|
|
451
|
+
env,
|
|
452
|
+
reinterpret_cast<const char*>(data + field_start),
|
|
453
|
+
i - field_start);
|
|
454
|
+
if (!field || napi_set_element(env, row, field_idx, field) != napi_ok) {
|
|
455
|
+
return false;
|
|
456
|
+
}
|
|
457
|
+
field_idx++;
|
|
458
|
+
field_start = i + 1;
|
|
459
|
+
}
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
if (napi_set_element(env, rows, row_idx, row) != napi_ok) {
|
|
463
|
+
return false;
|
|
464
|
+
}
|
|
465
|
+
row_idx++;
|
|
466
|
+
|
|
467
|
+
if (line_end == len) {
|
|
468
|
+
break;
|
|
469
|
+
}
|
|
470
|
+
line_start = line_end + 1;
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
*out = rows;
|
|
474
|
+
return true;
|
|
475
|
+
}
|
|
476
|
+
|
|
224
477
|
// Extended RowCollector that handles transforms
|
|
225
478
|
struct RowCollector {
|
|
226
479
|
std::vector<std::string> current;
|
|
@@ -427,11 +680,16 @@ static bool collectParallelRows(
|
|
|
427
680
|
|
|
428
681
|
static Napi::Array rowsToJsArray(Napi::Env env, const std::vector<std::vector<std::string>> &rows) {
|
|
429
682
|
Napi::Array out = Napi::Array::New(env, rows.size());
|
|
683
|
+
const bool ascii_only = rowsAreAscii(rows);
|
|
430
684
|
for (size_t i = 0; i < rows.size(); i++) {
|
|
431
685
|
Napi::Array row = Napi::Array::New(env, rows[i].size());
|
|
432
686
|
for (size_t j = 0; j < rows[i].size(); j++) {
|
|
433
687
|
const std::string &field = rows[i][j];
|
|
434
|
-
|
|
688
|
+
napi_set_element(
|
|
689
|
+
env,
|
|
690
|
+
row,
|
|
691
|
+
j,
|
|
692
|
+
NewCsvStringValue(env, field.c_str(), field.length(), ascii_only));
|
|
435
693
|
}
|
|
436
694
|
out[i] = row;
|
|
437
695
|
}
|
|
@@ -589,7 +847,7 @@ public:
|
|
|
589
847
|
}
|
|
590
848
|
|
|
591
849
|
CisvParser(const Napi::CallbackInfo &info) : Napi::ObjectWrap<CisvParser>(info) {
|
|
592
|
-
rc_ =
|
|
850
|
+
rc_ = nullptr;
|
|
593
851
|
parser_ = nullptr;
|
|
594
852
|
parse_time_ = 0;
|
|
595
853
|
total_bytes_ = 0;
|
|
@@ -597,6 +855,8 @@ public:
|
|
|
597
855
|
iterator_ = nullptr;
|
|
598
856
|
batch_result_ = nullptr;
|
|
599
857
|
stream_buffering_active_ = true;
|
|
858
|
+
pending_buffer_data_ = nullptr;
|
|
859
|
+
pending_buffer_size_ = 0;
|
|
600
860
|
|
|
601
861
|
// Initialize configuration with defaults
|
|
602
862
|
cisv_config_init(&config_);
|
|
@@ -609,6 +869,8 @@ public:
|
|
|
609
869
|
ApplyConfigFromObject(options);
|
|
610
870
|
}
|
|
611
871
|
|
|
872
|
+
rc_ = new RowCollector();
|
|
873
|
+
|
|
612
874
|
// Set callbacks
|
|
613
875
|
config_.field_cb = field_cb;
|
|
614
876
|
config_.row_cb = row_cb;
|
|
@@ -623,51 +885,9 @@ public:
|
|
|
623
885
|
// Apply configuration from JavaScript object
|
|
624
886
|
void ApplyConfigFromObject(Napi::Object options) {
|
|
625
887
|
Napi::Env env = options.Env();
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
// Quote character
|
|
631
|
-
ValidateSingleCharOption(env, options, "quote", &config_.quote);
|
|
632
|
-
|
|
633
|
-
// Escape character
|
|
634
|
-
ValidateSingleCharOption(env, options, "escape", &config_.escape, true);
|
|
635
|
-
|
|
636
|
-
// Comment character
|
|
637
|
-
ValidateSingleCharOption(env, options, "comment", &config_.comment, true);
|
|
638
|
-
|
|
639
|
-
// Boolean options
|
|
640
|
-
if (options.Has("skipEmptyLines")) {
|
|
641
|
-
config_.skip_empty_lines = options.Get("skipEmptyLines").As<Napi::Boolean>();
|
|
642
|
-
}
|
|
643
|
-
|
|
644
|
-
if (options.Has("trim")) {
|
|
645
|
-
config_.trim = options.Get("trim").As<Napi::Boolean>();
|
|
646
|
-
}
|
|
647
|
-
|
|
648
|
-
if (options.Has("relaxed")) {
|
|
649
|
-
config_.relaxed = options.Get("relaxed").As<Napi::Boolean>();
|
|
650
|
-
}
|
|
651
|
-
|
|
652
|
-
if (options.Has("skipLinesWithError")) {
|
|
653
|
-
config_.skip_lines_with_error = options.Get("skipLinesWithError").As<Napi::Boolean>();
|
|
654
|
-
}
|
|
655
|
-
|
|
656
|
-
// Numeric options
|
|
657
|
-
if (options.Has("maxRowSize")) {
|
|
658
|
-
Napi::Value val = options.Get("maxRowSize");
|
|
659
|
-
if (!val.IsNull() && !val.IsUndefined()) {
|
|
660
|
-
config_.max_row_size = val.As<Napi::Number>().Uint32Value();
|
|
661
|
-
}
|
|
662
|
-
}
|
|
663
|
-
|
|
664
|
-
if (options.Has("fromLine")) {
|
|
665
|
-
config_.from_line = options.Get("fromLine").As<Napi::Number>().Int32Value();
|
|
666
|
-
}
|
|
667
|
-
|
|
668
|
-
if (options.Has("toLine")) {
|
|
669
|
-
config_.to_line = options.Get("toLine").As<Napi::Number>().Int32Value();
|
|
670
|
-
}
|
|
888
|
+
cisv_config next = config_;
|
|
889
|
+
ApplyConfigOptions(env, options, &next);
|
|
890
|
+
config_ = next;
|
|
671
891
|
}
|
|
672
892
|
|
|
673
893
|
// Set configuration after creation
|
|
@@ -751,6 +971,9 @@ public:
|
|
|
751
971
|
rc_ = nullptr;
|
|
752
972
|
}
|
|
753
973
|
clearBatchResult();
|
|
974
|
+
clearFastRows();
|
|
975
|
+
pending_stream_.clear();
|
|
976
|
+
clearPendingBuffer();
|
|
754
977
|
is_destroyed_ = true;
|
|
755
978
|
}
|
|
756
979
|
}
|
|
@@ -817,16 +1040,28 @@ public:
|
|
|
817
1040
|
throw Napi::Error::New(env, "Parser has been destroyed");
|
|
818
1041
|
}
|
|
819
1042
|
|
|
820
|
-
if (info.Length() != 1 || !info[0].IsString()) {
|
|
821
|
-
throw Napi::TypeError::New(env, "Expected CSV string");
|
|
1043
|
+
if (info.Length() != 1 || (!info[0].IsString() && !info[0].IsBuffer())) {
|
|
1044
|
+
throw Napi::TypeError::New(env, "Expected CSV string or Buffer");
|
|
822
1045
|
}
|
|
823
1046
|
|
|
824
|
-
|
|
1047
|
+
const char *content_data = nullptr;
|
|
1048
|
+
size_t content_len = 0;
|
|
1049
|
+
std::string content_storage;
|
|
1050
|
+
|
|
1051
|
+
if (info[0].IsBuffer()) {
|
|
1052
|
+
auto buffer = info[0].As<Napi::Buffer<char>>();
|
|
1053
|
+
content_data = buffer.Data();
|
|
1054
|
+
content_len = buffer.Length();
|
|
1055
|
+
} else {
|
|
1056
|
+
content_storage = info[0].As<Napi::String>();
|
|
1057
|
+
content_data = content_storage.data();
|
|
1058
|
+
content_len = content_storage.size();
|
|
1059
|
+
}
|
|
825
1060
|
|
|
826
1061
|
resetRowState();
|
|
827
1062
|
|
|
828
1063
|
if (!hasTransforms()) {
|
|
829
|
-
cisv_result_t *batch = cisv_parse_string_batch(
|
|
1064
|
+
cisv_result_t *batch = cisv_parse_string_batch(content_data, content_len, &config_);
|
|
830
1065
|
if (!batch) {
|
|
831
1066
|
throw Napi::Error::New(env, "parse error: " + std::string(strerror(errno)));
|
|
832
1067
|
}
|
|
@@ -843,14 +1078,14 @@ public:
|
|
|
843
1078
|
ensureParser(env);
|
|
844
1079
|
|
|
845
1080
|
// Write the string content as chunks
|
|
846
|
-
cisv_parser_write(parser_,
|
|
1081
|
+
cisv_parser_write(parser_, reinterpret_cast<const uint8_t*>(content_data), content_len);
|
|
847
1082
|
cisv_parser_end(parser_);
|
|
848
1083
|
|
|
849
1084
|
// Clear the environment reference after parsing
|
|
850
1085
|
rc_->env = nullptr;
|
|
851
1086
|
}
|
|
852
1087
|
|
|
853
|
-
total_bytes_ =
|
|
1088
|
+
total_bytes_ = content_len;
|
|
854
1089
|
|
|
855
1090
|
return drainRows(env);
|
|
856
1091
|
}
|
|
@@ -916,6 +1151,7 @@ public:
|
|
|
916
1151
|
|
|
917
1152
|
// Streaming writes produce row-callback data, not batch results.
|
|
918
1153
|
clearBatchResult();
|
|
1154
|
+
clearFastRows();
|
|
919
1155
|
|
|
920
1156
|
// Set environment for JS transforms
|
|
921
1157
|
rc_->env = env;
|
|
@@ -945,11 +1181,29 @@ public:
|
|
|
945
1181
|
// Buffer chunks when no transforms/iterator are active and batch-parse on end().
|
|
946
1182
|
// If buffered payload exceeds threshold, flush once to parser and continue streaming.
|
|
947
1183
|
if (!hasTransforms() && iterator_ == nullptr) {
|
|
948
|
-
if (chunk_size > SIZE_MAX - pending_stream_.size()) {
|
|
949
|
-
throw Napi::Error::New(env, "Buffered stream size would overflow");
|
|
950
|
-
}
|
|
951
|
-
|
|
952
1184
|
if (stream_buffering_active_) {
|
|
1185
|
+
if (chunk_size > 0 && pending_stream_.empty() && pending_buffer_size_ == 0 && info[0].IsBuffer()) {
|
|
1186
|
+
auto buf = info[0].As<Napi::Buffer<uint8_t>>();
|
|
1187
|
+
pending_buffer_ref_ = Napi::Persistent(buf.As<Napi::Object>());
|
|
1188
|
+
pending_buffer_data_ = buf.Data();
|
|
1189
|
+
pending_buffer_size_ = buf.Length();
|
|
1190
|
+
total_bytes_ += chunk_size;
|
|
1191
|
+
return;
|
|
1192
|
+
}
|
|
1193
|
+
|
|
1194
|
+
if (pending_buffer_size_ > 0) {
|
|
1195
|
+
if (pending_buffer_size_ > SIZE_MAX - pending_stream_.size()) {
|
|
1196
|
+
throw Napi::Error::New(env, "Buffered stream size would overflow");
|
|
1197
|
+
}
|
|
1198
|
+
pending_stream_.append(
|
|
1199
|
+
reinterpret_cast<const char*>(pending_buffer_data_),
|
|
1200
|
+
pending_buffer_size_);
|
|
1201
|
+
clearPendingBuffer();
|
|
1202
|
+
}
|
|
1203
|
+
|
|
1204
|
+
if (chunk_size > SIZE_MAX - pending_stream_.size()) {
|
|
1205
|
+
throw Napi::Error::New(env, "Buffered stream size would overflow");
|
|
1206
|
+
}
|
|
953
1207
|
pending_stream_.append(reinterpret_cast<const char*>(chunk_data), chunk_size);
|
|
954
1208
|
total_bytes_ += chunk_size;
|
|
955
1209
|
|
|
@@ -959,7 +1213,7 @@ public:
|
|
|
959
1213
|
}
|
|
960
1214
|
return;
|
|
961
1215
|
}
|
|
962
|
-
} else if (!pending_stream_.empty()) {
|
|
1216
|
+
} else if (!pending_stream_.empty() || pending_buffer_size_ > 0) {
|
|
963
1217
|
flushPendingStreamToParser();
|
|
964
1218
|
stream_buffering_active_ = false;
|
|
965
1219
|
}
|
|
@@ -977,6 +1231,21 @@ public:
|
|
|
977
1231
|
if (stream_buffering_active_ && !pending_stream_.empty() &&
|
|
978
1232
|
!hasTransforms() && iterator_ == nullptr &&
|
|
979
1233
|
rc_ && rc_->rows.empty() && rc_->current.empty()) {
|
|
1234
|
+
napi_value fast_rows = nullptr;
|
|
1235
|
+
if (tryParseSimpleLfToJsRows(
|
|
1236
|
+
info.Env(),
|
|
1237
|
+
reinterpret_cast<const uint8_t*>(pending_stream_.data()),
|
|
1238
|
+
pending_stream_.size(),
|
|
1239
|
+
config_,
|
|
1240
|
+
&fast_rows)) {
|
|
1241
|
+
clearBatchResult();
|
|
1242
|
+
clearFastRows();
|
|
1243
|
+
fast_rows_ref_ = Napi::Persistent(Napi::Value(info.Env(), fast_rows).As<Napi::Object>());
|
|
1244
|
+
pending_stream_.clear();
|
|
1245
|
+
rc_->env = nullptr;
|
|
1246
|
+
return;
|
|
1247
|
+
}
|
|
1248
|
+
|
|
980
1249
|
cisv_result_t *batch = cisv_parse_string_batch(
|
|
981
1250
|
pending_stream_.data(), pending_stream_.size(), &config_);
|
|
982
1251
|
if (!batch) {
|
|
@@ -988,13 +1257,55 @@ public:
|
|
|
988
1257
|
throw Napi::Error::New(info.Env(), msg);
|
|
989
1258
|
}
|
|
990
1259
|
clearBatchResult();
|
|
1260
|
+
clearFastRows();
|
|
991
1261
|
batch_result_ = batch;
|
|
992
1262
|
pending_stream_.clear();
|
|
993
1263
|
rc_->env = nullptr;
|
|
994
1264
|
return;
|
|
995
1265
|
}
|
|
996
1266
|
|
|
997
|
-
if (
|
|
1267
|
+
if (stream_buffering_active_ && pending_buffer_size_ > 0 &&
|
|
1268
|
+
pending_stream_.empty() &&
|
|
1269
|
+
!hasTransforms() && iterator_ == nullptr &&
|
|
1270
|
+
rc_ && rc_->rows.empty() && rc_->current.empty()) {
|
|
1271
|
+
napi_value fast_rows = nullptr;
|
|
1272
|
+
if (tryParseSimpleLfToJsRows(
|
|
1273
|
+
info.Env(),
|
|
1274
|
+
pending_buffer_data_,
|
|
1275
|
+
pending_buffer_size_,
|
|
1276
|
+
config_,
|
|
1277
|
+
&fast_rows)) {
|
|
1278
|
+
clearBatchResult();
|
|
1279
|
+
clearFastRows();
|
|
1280
|
+
fast_rows_ref_ = Napi::Persistent(Napi::Value(info.Env(), fast_rows).As<Napi::Object>());
|
|
1281
|
+
clearPendingBuffer();
|
|
1282
|
+
rc_->env = nullptr;
|
|
1283
|
+
return;
|
|
1284
|
+
}
|
|
1285
|
+
|
|
1286
|
+
cisv_result_t *batch = cisv_parse_string_batch(
|
|
1287
|
+
reinterpret_cast<const char*>(pending_buffer_data_),
|
|
1288
|
+
pending_buffer_size_,
|
|
1289
|
+
&config_);
|
|
1290
|
+
if (!batch) {
|
|
1291
|
+
clearPendingBuffer();
|
|
1292
|
+
throw Napi::Error::New(info.Env(), "parse error: " + std::string(strerror(errno)));
|
|
1293
|
+
}
|
|
1294
|
+
if (batch->error_code != 0) {
|
|
1295
|
+
std::string msg = batch->error_message[0] ? batch->error_message : "parse error";
|
|
1296
|
+
cisv_result_free(batch);
|
|
1297
|
+
clearPendingBuffer();
|
|
1298
|
+
throw Napi::Error::New(info.Env(), msg);
|
|
1299
|
+
}
|
|
1300
|
+
clearBatchResult();
|
|
1301
|
+
clearFastRows();
|
|
1302
|
+
batch_result_ = batch;
|
|
1303
|
+
clearPendingBuffer();
|
|
1304
|
+
rc_->env = nullptr;
|
|
1305
|
+
return;
|
|
1306
|
+
}
|
|
1307
|
+
|
|
1308
|
+
if (!pending_stream_.empty() || pending_buffer_size_ > 0) {
|
|
998
1309
|
flushPendingStreamToParser();
|
|
999
1310
|
stream_buffering_active_ = false;
|
|
1000
1311
|
}
|
|
@@ -1016,18 +1327,24 @@ public:
|
|
|
1016
1327
|
flushPendingStreamToParser();
|
|
1017
1328
|
stream_buffering_active_ = false;
|
|
1018
1329
|
}
|
|
1330
|
+
if (pending_buffer_size_ > 0) {
|
|
1331
|
+
flushPendingStreamToParser();
|
|
1332
|
+
stream_buffering_active_ = false;
|
|
1333
|
+
}
|
|
1019
1334
|
return drainRows(info.Env());
|
|
1020
1335
|
}
|
|
1021
1336
|
|
|
1022
1337
|
void Clear(const Napi::CallbackInfo &info) {
|
|
1023
1338
|
if (!is_destroyed_ && rc_) {
|
|
1024
1339
|
clearBatchResult();
|
|
1340
|
+
clearFastRows();
|
|
1025
1341
|
rc_->rows.clear();
|
|
1026
1342
|
rc_->current.clear();
|
|
1027
1343
|
rc_->current_field_index = 0;
|
|
1028
1344
|
total_bytes_ = 0;
|
|
1029
1345
|
parse_time_ = 0;
|
|
1030
1346
|
pending_stream_.clear();
|
|
1347
|
+
clearPendingBuffer();
|
|
1031
1348
|
stream_buffering_active_ = true;
|
|
1032
1349
|
// Also clear the environment reference
|
|
1033
1350
|
rc_->env = nullptr;
|
|
@@ -1074,8 +1391,6 @@ public:
|
|
|
1074
1391
|
type = TRANSFORM_TO_INT;
|
|
1075
1392
|
} else if (transform_type == "to_float" || transform_type == "float") {
|
|
1076
1393
|
type = TRANSFORM_TO_FLOAT;
|
|
1077
|
-
} else if (transform_type == "hash_sha256" || transform_type == "sha256") {
|
|
1078
|
-
type = TRANSFORM_HASH_SHA256;
|
|
1079
1394
|
} else if (transform_type == "base64_encode" || transform_type == "base64") {
|
|
1080
1395
|
type = TRANSFORM_BASE64_ENCODE;
|
|
1081
1396
|
} else {
|
|
@@ -1185,8 +1500,6 @@ Napi::Value TransformByName(const Napi::CallbackInfo &info) {
|
|
|
1185
1500
|
type = TRANSFORM_TO_INT;
|
|
1186
1501
|
} else if (transform_type == "to_float" || transform_type == "float") {
|
|
1187
1502
|
type = TRANSFORM_TO_FLOAT;
|
|
1188
|
-
} else if (transform_type == "hash_sha256" || transform_type == "sha256") {
|
|
1189
|
-
type = TRANSFORM_HASH_SHA256;
|
|
1190
1503
|
} else if (transform_type == "base64_encode" || transform_type == "base64") {
|
|
1191
1504
|
type = TRANSFORM_BASE64_ENCODE;
|
|
1192
1505
|
} else {
|
|
@@ -1355,18 +1668,28 @@ Napi::Value RemoveTransformByName(const Napi::CallbackInfo &info) {
|
|
|
1355
1668
|
|
|
1356
1669
|
std::string field_name = info[0].As<Napi::String>();
|
|
1357
1670
|
|
|
1671
|
+
int field_index = -1;
|
|
1672
|
+
|
|
1358
1673
|
// Remove from JavaScript transforms by finding the field index
|
|
1359
1674
|
if (rc_->pipeline && rc_->pipeline->header_fields) {
|
|
1360
1675
|
for (size_t i = 0; i < rc_->pipeline->header_count; i++) {
|
|
1361
1676
|
if (strcmp(rc_->pipeline->header_fields[i], field_name.c_str()) == 0) {
|
|
1362
|
-
|
|
1677
|
+
field_index = static_cast<int>(i);
|
|
1678
|
+
auto it = rc_->js_transforms.find(field_index);
|
|
1679
|
+
if (it != rc_->js_transforms.end()) {
|
|
1680
|
+
if (!it->second.IsEmpty()) {
|
|
1681
|
+
it->second.Reset();
|
|
1682
|
+
}
|
|
1683
|
+
rc_->js_transforms.erase(it);
|
|
1684
|
+
}
|
|
1363
1685
|
break;
|
|
1364
1686
|
}
|
|
1365
1687
|
}
|
|
1366
1688
|
}
|
|
1367
1689
|
|
|
1368
|
-
|
|
1369
|
-
|
|
1690
|
+
if (field_index >= 0 && rc_->pipeline) {
|
|
1691
|
+
cisv_transform_pipeline_remove_field(rc_->pipeline, field_index);
|
|
1692
|
+
}
|
|
1370
1693
|
|
|
1371
1694
|
return info.This();
|
|
1372
1695
|
}
|
|
@@ -1385,10 +1708,17 @@ Napi::Value RemoveTransformByName(const Napi::CallbackInfo &info) {
|
|
|
1385
1708
|
int field_index = info[0].As<Napi::Number>().Int32Value();
|
|
1386
1709
|
|
|
1387
1710
|
// Remove from JavaScript transforms
|
|
1388
|
-
rc_->js_transforms.
|
|
1711
|
+
auto it = rc_->js_transforms.find(field_index);
|
|
1712
|
+
if (it != rc_->js_transforms.end()) {
|
|
1713
|
+
if (!it->second.IsEmpty()) {
|
|
1714
|
+
it->second.Reset();
|
|
1715
|
+
}
|
|
1716
|
+
rc_->js_transforms.erase(it);
|
|
1717
|
+
}
|
|
1389
1718
|
|
|
1390
|
-
|
|
1391
|
-
|
|
1719
|
+
if (rc_->pipeline) {
|
|
1720
|
+
cisv_transform_pipeline_remove_field(rc_->pipeline, field_index);
|
|
1721
|
+
}
|
|
1392
1722
|
|
|
1393
1723
|
return info.This();
|
|
1394
1724
|
}
|
|
@@ -1515,16 +1845,29 @@ Napi::Value RemoveTransformByName(const Napi::CallbackInfo &info) {
|
|
|
1515
1845
|
result.Set("jsTransformCount", Napi::Number::New(env, js_transform_count));
|
|
1516
1846
|
|
|
1517
1847
|
// List field indices with transforms
|
|
1518
|
-
|
|
1519
|
-
|
|
1848
|
+
std::vector<int> field_indices;
|
|
1849
|
+
auto add_field_index = [&field_indices](int field_index) {
|
|
1850
|
+
if (std::find(field_indices.begin(), field_indices.end(), field_index) == field_indices.end()) {
|
|
1851
|
+
field_indices.push_back(field_index);
|
|
1852
|
+
}
|
|
1853
|
+
};
|
|
1520
1854
|
|
|
1521
|
-
|
|
1855
|
+
if (rc_ && rc_->pipeline) {
|
|
1856
|
+
for (size_t i = 0; i < rc_->pipeline->count; i++) {
|
|
1857
|
+
add_field_index(rc_->pipeline->transforms[i].field_index);
|
|
1858
|
+
}
|
|
1859
|
+
}
|
|
1522
1860
|
if (rc_) {
|
|
1523
1861
|
for (const auto& pair : rc_->js_transforms) {
|
|
1524
|
-
|
|
1862
|
+
add_field_index(pair.first);
|
|
1525
1863
|
}
|
|
1526
1864
|
}
|
|
1527
1865
|
|
|
1866
|
+
Napi::Array fields = Napi::Array::New(env, field_indices.size());
|
|
1867
|
+
for (size_t i = 0; i < field_indices.size(); i++) {
|
|
1868
|
+
fields[i] = Napi::Number::New(env, field_indices[i]);
|
|
1869
|
+
}
|
|
1870
|
+
|
|
1528
1871
|
result.Set("fieldIndices", fields);
|
|
1529
1872
|
|
|
1530
1873
|
return result;
|
|
@@ -1540,7 +1883,16 @@ Napi::Value RemoveTransformByName(const Napi::CallbackInfo &info) {
|
|
|
1540
1883
|
Napi::Object stats = Napi::Object::New(env);
|
|
1541
1884
|
size_t row_count = 0;
|
|
1542
1885
|
size_t field_count = 0;
|
|
1543
|
-
if (
|
|
1886
|
+
if (!fast_rows_ref_.IsEmpty()) {
|
|
1887
|
+
Napi::Array rows = fast_rows_ref_.Value().As<Napi::Array>();
|
|
1888
|
+
row_count = rows.Length();
|
|
1889
|
+
if (row_count > 0) {
|
|
1890
|
+
Napi::Value first = rows.Get(static_cast<uint32_t>(0));
|
|
1891
|
+
if (first.IsArray()) {
|
|
1892
|
+
field_count = first.As<Napi::Array>().Length();
|
|
1893
|
+
}
|
|
1894
|
+
}
|
|
1895
|
+
} else if (batch_result_) {
|
|
1544
1896
|
row_count = batch_result_->row_count;
|
|
1545
1897
|
if (batch_result_->row_count > 0) {
|
|
1546
1898
|
field_count = batch_result_->rows[0].field_count;
|
|
@@ -1590,25 +1942,13 @@ Napi::Value RemoveTransformByName(const Napi::CallbackInfo &info) {
|
|
|
1590
1942
|
cisv_config_init(&config);
|
|
1591
1943
|
|
|
1592
1944
|
// Apply configuration if provided
|
|
1945
|
+
if (info.Length() > 1 && !info[1].IsNull() && !info[1].IsUndefined() && !info[1].IsObject()) {
|
|
1946
|
+
throw Napi::TypeError::New(env, "Config must be an object");
|
|
1947
|
+
}
|
|
1948
|
+
|
|
1593
1949
|
if (info.Length() > 1 && info[1].IsObject()) {
|
|
1594
1950
|
Napi::Object options = info[1].As<Napi::Object>();
|
|
1595
|
-
|
|
1596
|
-
// Apply same configuration parsing logic
|
|
1597
|
-
ValidateSingleCharOption(env, options, "delimiter", &config.delimiter);
|
|
1598
|
-
ValidateSingleCharOption(env, options, "quote", &config.quote);
|
|
1599
|
-
ValidateSingleCharOption(env, options, "comment", &config.comment, true);
|
|
1600
|
-
|
|
1601
|
-
if (options.Has("skipEmptyLines")) {
|
|
1602
|
-
config.skip_empty_lines = options.Get("skipEmptyLines").As<Napi::Boolean>();
|
|
1603
|
-
}
|
|
1604
|
-
|
|
1605
|
-
if (options.Has("fromLine")) {
|
|
1606
|
-
config.from_line = options.Get("fromLine").As<Napi::Number>().Int32Value();
|
|
1607
|
-
}
|
|
1608
|
-
|
|
1609
|
-
if (options.Has("toLine")) {
|
|
1610
|
-
config.to_line = options.Get("toLine").As<Napi::Number>().Int32Value();
|
|
1611
|
-
}
|
|
1951
|
+
ApplyConfigOptions(env, options, &config);
|
|
1612
1952
|
}
|
|
1613
1953
|
|
|
1614
1954
|
size_t count = cisv_parser_count_rows_with_config(path.c_str(), &config);
|
|
@@ -1684,8 +2024,15 @@ Napi::Value RemoveTransformByName(const Napi::CallbackInfo &info) {
|
|
|
1684
2024
|
|
|
1685
2025
|
napi_value row;
|
|
1686
2026
|
napi_create_array_with_length(env, field_count, &row);
|
|
2027
|
+
bool ascii_only = true;
|
|
1687
2028
|
for (size_t i = 0; i < field_count; i++) {
|
|
1688
|
-
|
|
2029
|
+
if (!isAllAscii(fields[i], lengths[i])) {
|
|
2030
|
+
ascii_only = false;
|
|
2031
|
+
break;
|
|
2032
|
+
}
|
|
2033
|
+
}
|
|
2034
|
+
for (size_t i = 0; i < field_count; i++) {
|
|
2035
|
+
napi_set_element(env, row, i, NewCsvStringValue(env, fields[i], lengths[i], ascii_only));
|
|
1689
2036
|
}
|
|
1690
2037
|
|
|
1691
2038
|
return Napi::Value(env, row);
|
|
@@ -1734,6 +2081,20 @@ private:
|
|
|
1734
2081
|
}
|
|
1735
2082
|
}
|
|
1736
2083
|
|
|
2084
|
+
void clearFastRows() {
|
|
2085
|
+
if (!fast_rows_ref_.IsEmpty()) {
|
|
2086
|
+
fast_rows_ref_.Reset();
|
|
2087
|
+
}
|
|
2088
|
+
}
|
|
2089
|
+
|
|
2090
|
+
void clearPendingBuffer() {
|
|
2091
|
+
if (!pending_buffer_ref_.IsEmpty()) {
|
|
2092
|
+
pending_buffer_ref_.Reset();
|
|
2093
|
+
}
|
|
2094
|
+
pending_buffer_data_ = nullptr;
|
|
2095
|
+
pending_buffer_size_ = 0;
|
|
2096
|
+
}
|
|
2097
|
+
|
|
1737
2098
|
bool hasTransforms() const {
|
|
1738
2099
|
bool has_c_transforms = rc_ && rc_->pipeline && rc_->pipeline->count > 0;
|
|
1739
2100
|
bool has_js_transforms = rc_ && !rc_->js_transforms.empty();
|
|
@@ -1742,7 +2103,9 @@ private:
|
|
|
1742
2103
|
|
|
1743
2104
|
void resetRowState() {
|
|
1744
2105
|
clearBatchResult();
|
|
2106
|
+
clearFastRows();
|
|
1745
2107
|
pending_stream_.clear();
|
|
2108
|
+
clearPendingBuffer();
|
|
1746
2109
|
stream_buffering_active_ = true;
|
|
1747
2110
|
if (!rc_) return;
|
|
1748
2111
|
rc_->rows.clear();
|
|
@@ -1751,6 +2114,11 @@ private:
|
|
|
1751
2114
|
}
|
|
1752
2115
|
|
|
1753
2116
|
void flushPendingStreamToParser() {
|
|
2117
|
+
if (pending_buffer_size_ > 0) {
|
|
2118
|
+
ensureParser(Env());
|
|
2119
|
+
cisv_parser_write(parser_, pending_buffer_data_, pending_buffer_size_);
|
|
2120
|
+
clearPendingBuffer();
|
|
2121
|
+
}
|
|
1754
2122
|
if (pending_stream_.empty()) {
|
|
1755
2123
|
return;
|
|
1756
2124
|
}
|
|
@@ -1779,15 +2147,27 @@ private:
|
|
|
1779
2147
|
}
|
|
1780
2148
|
|
|
1781
2149
|
Napi::Value drainRows(Napi::Env env) {
|
|
2150
|
+
if (!fast_rows_ref_.IsEmpty()) {
|
|
2151
|
+
return fast_rows_ref_.Value();
|
|
2152
|
+
}
|
|
2153
|
+
|
|
1782
2154
|
if (batch_result_) {
|
|
1783
2155
|
napi_value rows;
|
|
1784
2156
|
napi_create_array_with_length(env, batch_result_->row_count, &rows);
|
|
2157
|
+
const bool ascii_only =
|
|
2158
|
+
!batch_result_->field_data ||
|
|
2159
|
+
batch_result_->field_data_size == 0 ||
|
|
2160
|
+
isAllAscii(batch_result_->field_data, batch_result_->field_data_size);
|
|
1785
2161
|
for (size_t i = 0; i < batch_result_->row_count; ++i) {
|
|
1786
2162
|
const cisv_row_t *src_row = &batch_result_->rows[i];
|
|
1787
2163
|
napi_value row;
|
|
1788
2164
|
napi_create_array_with_length(env, src_row->field_count, &row);
|
|
1789
2165
|
for (size_t j = 0; j < src_row->field_count; ++j) {
|
|
1790
|
-
napi_set_element(
|
|
2166
|
+
napi_set_element(
|
|
2167
|
+
env,
|
|
2168
|
+
row,
|
|
2169
|
+
j,
|
|
2170
|
+
NewCsvStringValue(env, src_row->fields[j], src_row->field_lengths[j], ascii_only));
|
|
1791
2171
|
}
|
|
1792
2172
|
napi_set_element(env, rows, i, row);
|
|
1793
2173
|
}
|
|
@@ -1800,6 +2180,7 @@ private:
|
|
|
1800
2180
|
|
|
1801
2181
|
napi_value rows;
|
|
1802
2182
|
napi_create_array_with_length(env, rc_->rows.size(), &rows);
|
|
2183
|
+
const bool ascii_only = rowsAreAscii(rc_->rows);
|
|
1803
2184
|
|
|
1804
2185
|
for (size_t i = 0; i < rc_->rows.size(); ++i) {
|
|
1805
2186
|
napi_value row;
|
|
@@ -1807,7 +2188,11 @@ private:
|
|
|
1807
2188
|
for (size_t j = 0; j < rc_->rows[i].size(); ++j) {
|
|
1808
2189
|
// SECURITY: Use safe string creation to handle invalid UTF-8 in CSV data
|
|
1809
2190
|
const std::string& field = rc_->rows[i][j];
|
|
1810
|
-
napi_set_element(
|
|
2191
|
+
napi_set_element(
|
|
2192
|
+
env,
|
|
2193
|
+
row,
|
|
2194
|
+
j,
|
|
2195
|
+
NewCsvStringValue(env, field.c_str(), field.length(), ascii_only));
|
|
1811
2196
|
}
|
|
1812
2197
|
napi_set_element(env, rows, i, row);
|
|
1813
2198
|
}
|
|
@@ -1826,7 +2211,11 @@ private:
|
|
|
1826
2211
|
bool is_destroyed_;
|
|
1827
2212
|
cisv_iterator_t *iterator_; // For row-by-row iteration
|
|
1828
2213
|
cisv_result_t *batch_result_;
|
|
2214
|
+
Napi::ObjectReference fast_rows_ref_;
|
|
1829
2215
|
std::string pending_stream_;
|
|
2216
|
+
Napi::ObjectReference pending_buffer_ref_;
|
|
2217
|
+
const uint8_t *pending_buffer_data_;
|
|
2218
|
+
size_t pending_buffer_size_;
|
|
1830
2219
|
bool stream_buffering_active_;
|
|
1831
2220
|
static constexpr size_t kStreamBufferLimitBytes = 8 * 1024 * 1024;
|
|
1832
2221
|
};
|
|
@@ -1836,7 +2225,7 @@ Napi::Object InitAll(Napi::Env env, Napi::Object exports) {
|
|
|
1836
2225
|
CisvParser::Init(env, exports);
|
|
1837
2226
|
|
|
1838
2227
|
// Add version info
|
|
1839
|
-
exports.Set("version", Napi::String::New(env, "0.4.
|
|
2228
|
+
exports.Set("version", Napi::String::New(env, "0.4.9"));
|
|
1840
2229
|
|
|
1841
2230
|
// Add transform type constants
|
|
1842
2231
|
Napi::Object transformTypes = Napi::Object::New(env);
|
|
@@ -1845,7 +2234,6 @@ Napi::Object InitAll(Napi::Env env, Napi::Object exports) {
|
|
|
1845
2234
|
transformTypes.Set("TRIM", Napi::String::New(env, "trim"));
|
|
1846
2235
|
transformTypes.Set("TO_INT", Napi::String::New(env, "to_int"));
|
|
1847
2236
|
transformTypes.Set("TO_FLOAT", Napi::String::New(env, "to_float"));
|
|
1848
|
-
transformTypes.Set("HASH_SHA256", Napi::String::New(env, "hash_sha256"));
|
|
1849
2237
|
transformTypes.Set("BASE64_ENCODE", Napi::String::New(env, "base64_encode"));
|
|
1850
2238
|
exports.Set("TransformType", transformTypes);
|
|
1851
2239
|
|
package/cisv/index.js
CHANGED
package/cisv/index.mjs
CHANGED
|
@@ -7,6 +7,10 @@ const require = createRequire(import.meta.url);
|
|
|
7
7
|
|
|
8
8
|
const gyp = require('node-gyp-build');
|
|
9
9
|
const addon = gyp(path.join(__dirname, '..'));
|
|
10
|
+
const { wrapAddon } = require('./wrapper.js');
|
|
11
|
+
const wrapped = wrapAddon(addon);
|
|
10
12
|
|
|
11
|
-
export const cisvParser =
|
|
12
|
-
export
|
|
13
|
+
export const cisvParser = wrapped.cisvParser;
|
|
14
|
+
export const TransformType = wrapped.TransformType;
|
|
15
|
+
export const version = wrapped.version;
|
|
16
|
+
export default wrapped;
|
package/cisv/types/cisv.d.ts
CHANGED
|
@@ -8,7 +8,6 @@ declare module 'cisv' {
|
|
|
8
8
|
TRIM = 'trim',
|
|
9
9
|
TO_INT = 'to_int',
|
|
10
10
|
TO_FLOAT = 'to_float',
|
|
11
|
-
HASH_SHA256 = 'hash_sha256',
|
|
12
11
|
BASE64_ENCODE = 'base64_encode',
|
|
13
12
|
CUSTOM = 'custom'
|
|
14
13
|
}
|
|
@@ -88,11 +87,11 @@ declare module 'cisv' {
|
|
|
88
87
|
parseParallel(path: string, numThreads?: number): Promise<string[][]>;
|
|
89
88
|
|
|
90
89
|
/**
|
|
91
|
-
* Parse CSV string content
|
|
92
|
-
* @param content CSV string
|
|
90
|
+
* Parse CSV string or Buffer content
|
|
91
|
+
* @param content CSV content as string or Buffer
|
|
93
92
|
* @returns Array of rows with string values
|
|
94
93
|
*/
|
|
95
|
-
parseString(content: string): string[][];
|
|
94
|
+
parseString(content: Buffer | string): string[][];
|
|
96
95
|
|
|
97
96
|
/**
|
|
98
97
|
* Write chunk of CSV data (for streaming)
|
package/cisv/wrapper.js
ADDED
|
@@ -0,0 +1,371 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const { isAscii } = require('buffer');
|
|
4
|
+
|
|
5
|
+
function fastConfigFromOptions(options) {
|
|
6
|
+
if (options == null) {
|
|
7
|
+
return { delimiter: ',', quote: '"' };
|
|
8
|
+
}
|
|
9
|
+
if (typeof options !== 'object') {
|
|
10
|
+
return null;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
const delimiter = options.delimiter == null ? ',' : options.delimiter;
|
|
14
|
+
const quote = options.quote == null ? '"' : options.quote;
|
|
15
|
+
if (typeof delimiter !== 'string' || delimiter.length !== 1) {
|
|
16
|
+
return null;
|
|
17
|
+
}
|
|
18
|
+
if (typeof quote !== 'string' || quote.length !== 1) {
|
|
19
|
+
return null;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
if (options.escape != null && options.escape !== '') {
|
|
23
|
+
return null;
|
|
24
|
+
}
|
|
25
|
+
if (options.comment != null && options.comment !== '') {
|
|
26
|
+
return null;
|
|
27
|
+
}
|
|
28
|
+
if (options.trim || options.skipEmptyLines || options.relaxed || options.skipLinesWithError) {
|
|
29
|
+
return null;
|
|
30
|
+
}
|
|
31
|
+
if (options.maxRowSize != null && options.maxRowSize !== 0) {
|
|
32
|
+
return null;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
const fromLine = options.fromLine == null ? 1 : options.fromLine;
|
|
36
|
+
const toLine = options.toLine == null ? 0 : options.toLine;
|
|
37
|
+
if (fromLine !== 0 && fromLine !== 1) {
|
|
38
|
+
return null;
|
|
39
|
+
}
|
|
40
|
+
if (toLine !== 0) {
|
|
41
|
+
return null;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
return { delimiter, quote };
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
function chunkToLatin1String(chunk) {
|
|
48
|
+
return Buffer.isBuffer(chunk) ? chunk.toString('latin1') : chunk;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
function chunksToLatin1String(chunks) {
|
|
52
|
+
if (chunks.length === 1) {
|
|
53
|
+
return chunkToLatin1String(chunks[0]);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
let out = '';
|
|
57
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
58
|
+
out += chunkToLatin1String(chunks[i]);
|
|
59
|
+
}
|
|
60
|
+
return out;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
function isSimpleAsciiLf(data, quote) {
|
|
64
|
+
if (data.length >= 3 &&
|
|
65
|
+
data.charCodeAt(0) === 0xEF &&
|
|
66
|
+
data.charCodeAt(1) === 0xBB &&
|
|
67
|
+
data.charCodeAt(2) === 0xBF) {
|
|
68
|
+
return false;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
const quoteCode = quote.charCodeAt(0);
|
|
72
|
+
for (let i = 0; i < data.length; i++) {
|
|
73
|
+
const code = data.charCodeAt(i);
|
|
74
|
+
if (code === quoteCode || code === 13 || code > 127) {
|
|
75
|
+
return false;
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
return true;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
function chunksAreSimpleAsciiLf(chunks, quote) {
|
|
82
|
+
const quoteCode = quote.charCodeAt(0);
|
|
83
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
84
|
+
const chunk = chunks[i];
|
|
85
|
+
if (Buffer.isBuffer(chunk)) {
|
|
86
|
+
if (i === 0 &&
|
|
87
|
+
chunk.length >= 3 &&
|
|
88
|
+
chunk[0] === 0xEF &&
|
|
89
|
+
chunk[1] === 0xBB &&
|
|
90
|
+
chunk[2] === 0xBF) {
|
|
91
|
+
return false;
|
|
92
|
+
}
|
|
93
|
+
if (!isAscii(chunk) || chunk.indexOf(quoteCode) !== -1 || chunk.indexOf(13) !== -1) {
|
|
94
|
+
return false;
|
|
95
|
+
}
|
|
96
|
+
} else if (!isSimpleAsciiLf(chunk, quote)) {
|
|
97
|
+
return false;
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
return true;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
function analyzeSingleSimpleChunk(chunk, delimiter, quote) {
|
|
104
|
+
const delimiterCode = delimiter.charCodeAt(0);
|
|
105
|
+
const quoteCode = quote.charCodeAt(0);
|
|
106
|
+
let cols = -1;
|
|
107
|
+
let currentCols = 1;
|
|
108
|
+
let hasData = false;
|
|
109
|
+
let rows = 0;
|
|
110
|
+
|
|
111
|
+
if (Buffer.isBuffer(chunk)) {
|
|
112
|
+
if (chunk.length >= 3 && chunk[0] === 0xEF && chunk[1] === 0xBB && chunk[2] === 0xBF) {
|
|
113
|
+
return { simple: false, uniform: false, rows: 0, cols: 0 };
|
|
114
|
+
}
|
|
115
|
+
for (let i = 0; i < chunk.length; i++) {
|
|
116
|
+
const code = chunk[i];
|
|
117
|
+
if (code === quoteCode || code === 13 || code > 127) {
|
|
118
|
+
return { simple: false, uniform: false, rows: 0, cols: 0 };
|
|
119
|
+
}
|
|
120
|
+
if (code === delimiterCode) {
|
|
121
|
+
currentCols++;
|
|
122
|
+
hasData = true;
|
|
123
|
+
} else if (code === 10) {
|
|
124
|
+
if (cols === -1) {
|
|
125
|
+
cols = currentCols;
|
|
126
|
+
} else if (currentCols !== cols) {
|
|
127
|
+
return { simple: true, uniform: false, rows: 0, cols: 0 };
|
|
128
|
+
}
|
|
129
|
+
rows++;
|
|
130
|
+
currentCols = 1;
|
|
131
|
+
hasData = false;
|
|
132
|
+
} else {
|
|
133
|
+
hasData = true;
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
} else {
|
|
137
|
+
if (chunk.length >= 3 &&
|
|
138
|
+
chunk.charCodeAt(0) === 0xEF &&
|
|
139
|
+
chunk.charCodeAt(1) === 0xBB &&
|
|
140
|
+
chunk.charCodeAt(2) === 0xBF) {
|
|
141
|
+
return { simple: false, uniform: false, rows: 0, cols: 0 };
|
|
142
|
+
}
|
|
143
|
+
for (let i = 0; i < chunk.length; i++) {
|
|
144
|
+
const code = chunk.charCodeAt(i);
|
|
145
|
+
if (code === quoteCode || code === 13 || code > 127) {
|
|
146
|
+
return { simple: false, uniform: false, rows: 0, cols: 0 };
|
|
147
|
+
}
|
|
148
|
+
if (code === delimiterCode) {
|
|
149
|
+
currentCols++;
|
|
150
|
+
hasData = true;
|
|
151
|
+
} else if (code === 10) {
|
|
152
|
+
if (cols === -1) {
|
|
153
|
+
cols = currentCols;
|
|
154
|
+
} else if (currentCols !== cols) {
|
|
155
|
+
return { simple: true, uniform: false, rows: 0, cols: 0 };
|
|
156
|
+
}
|
|
157
|
+
rows++;
|
|
158
|
+
currentCols = 1;
|
|
159
|
+
hasData = false;
|
|
160
|
+
} else {
|
|
161
|
+
hasData = true;
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
if (hasData && cols !== -1 && currentCols !== cols) {
|
|
167
|
+
return { simple: true, uniform: false, rows: 0, cols: 0 };
|
|
168
|
+
}
|
|
169
|
+
if (hasData) {
|
|
170
|
+
rows++;
|
|
171
|
+
if (cols === -1) {
|
|
172
|
+
cols = currentCols;
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
return { simple: true, uniform: true, rows, cols: Math.max(cols, 0) };
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
function parseSimpleRows(data, delimiter) {
|
|
180
|
+
if (data.length === 0) {
|
|
181
|
+
return [];
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
const last = data.charCodeAt(data.length - 1);
|
|
185
|
+
const body = last === 10 ? data.slice(0, -1) : data;
|
|
186
|
+
if (body.length === 0) {
|
|
187
|
+
return [['']];
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
const lines = body.split('\n');
|
|
191
|
+
const rows = new Array(lines.length);
|
|
192
|
+
for (let i = 0; i < lines.length; i++) {
|
|
193
|
+
rows[i] = lines[i].split(delimiter);
|
|
194
|
+
}
|
|
195
|
+
return rows;
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
function parseUniformRows(data, delimiter, rowCount, cols) {
|
|
199
|
+
let end = data.length;
|
|
200
|
+
if (end === 0) {
|
|
201
|
+
return [];
|
|
202
|
+
}
|
|
203
|
+
if (data.charCodeAt(end - 1) === 10) {
|
|
204
|
+
end--;
|
|
205
|
+
}
|
|
206
|
+
if (end === 0) {
|
|
207
|
+
return [['']];
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
const usePrealloc = rowCount > 0 && cols > 0;
|
|
211
|
+
if (!usePrealloc) {
|
|
212
|
+
cols = 1;
|
|
213
|
+
for (let i = 0; i < end && data.charCodeAt(i) !== 10; i++) {
|
|
214
|
+
if (data[i] === delimiter) {
|
|
215
|
+
cols++;
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
const rows = usePrealloc ? new Array(rowCount) : [];
|
|
221
|
+
let rowIdx = 0;
|
|
222
|
+
let pos = 0;
|
|
223
|
+
while (pos < end) {
|
|
224
|
+
const row = new Array(cols);
|
|
225
|
+
for (let col = 0; col < cols - 1; col++) {
|
|
226
|
+
const next = data.indexOf(delimiter, pos);
|
|
227
|
+
row[col] = data.slice(pos, next);
|
|
228
|
+
pos = next + 1;
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
let lineEnd = data.indexOf('\n', pos);
|
|
232
|
+
if (lineEnd === -1 || lineEnd > end) {
|
|
233
|
+
lineEnd = end;
|
|
234
|
+
}
|
|
235
|
+
row[cols - 1] = data.slice(pos, lineEnd);
|
|
236
|
+
if (usePrealloc) {
|
|
237
|
+
rows[rowIdx++] = row;
|
|
238
|
+
} else {
|
|
239
|
+
rows.push(row);
|
|
240
|
+
}
|
|
241
|
+
pos = lineEnd + 1;
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
return rows;
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
function wrapAddon(addon) {
|
|
248
|
+
const NativeParser = addon.cisvParser;
|
|
249
|
+
|
|
250
|
+
class cisvParser extends NativeParser {
|
|
251
|
+
constructor(options) {
|
|
252
|
+
super(options);
|
|
253
|
+
this._cisvFastConfig = fastConfigFromOptions(options);
|
|
254
|
+
this._cisvFastChunks = [];
|
|
255
|
+
this._cisvFastRows = null;
|
|
256
|
+
this._cisvNativeStream = false;
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
_flushFastChunksToNative() {
|
|
260
|
+
if (this._cisvFastChunks.length === 0) {
|
|
261
|
+
return;
|
|
262
|
+
}
|
|
263
|
+
const chunks = this._cisvFastChunks;
|
|
264
|
+
this._cisvFastChunks = [];
|
|
265
|
+
this._cisvNativeStream = true;
|
|
266
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
267
|
+
super.write(chunks[i]);
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
write(chunk) {
|
|
272
|
+
this._cisvFastRows = null;
|
|
273
|
+
if (this._cisvFastConfig &&
|
|
274
|
+
!this._cisvNativeStream &&
|
|
275
|
+
(Buffer.isBuffer(chunk) || typeof chunk === 'string')) {
|
|
276
|
+
this._cisvFastChunks.push(chunk);
|
|
277
|
+
return;
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
this._flushFastChunksToNative();
|
|
281
|
+
this._cisvNativeStream = true;
|
|
282
|
+
return super.write(chunk);
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
end() {
|
|
286
|
+
if (this._cisvFastConfig &&
|
|
287
|
+
!this._cisvNativeStream &&
|
|
288
|
+
this._cisvFastChunks.length > 0) {
|
|
289
|
+
let uniform = false;
|
|
290
|
+
let simple = false;
|
|
291
|
+
|
|
292
|
+
if (this._cisvFastChunks.length === 1) {
|
|
293
|
+
const analysis = analyzeSingleSimpleChunk(
|
|
294
|
+
this._cisvFastChunks[0],
|
|
295
|
+
this._cisvFastConfig.delimiter,
|
|
296
|
+
this._cisvFastConfig.quote);
|
|
297
|
+
simple = analysis.simple;
|
|
298
|
+
uniform = analysis.uniform;
|
|
299
|
+
var rowCount = analysis.rows;
|
|
300
|
+
var colCount = analysis.cols;
|
|
301
|
+
} else {
|
|
302
|
+
simple = chunksAreSimpleAsciiLf(this._cisvFastChunks, this._cisvFastConfig.quote);
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
if (simple) {
|
|
306
|
+
const data = chunksToLatin1String(this._cisvFastChunks);
|
|
307
|
+
const useLargePrealloc = data.length >= 64 * 1024 * 1024;
|
|
308
|
+
this._cisvFastRows = uniform
|
|
309
|
+
? parseUniformRows(
|
|
310
|
+
data,
|
|
311
|
+
this._cisvFastConfig.delimiter,
|
|
312
|
+
useLargePrealloc ? rowCount : 0,
|
|
313
|
+
useLargePrealloc ? colCount : 0)
|
|
314
|
+
: parseSimpleRows(data, this._cisvFastConfig.delimiter);
|
|
315
|
+
this._cisvFastChunks = [];
|
|
316
|
+
return;
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
this._flushFastChunksToNative();
|
|
321
|
+
this._cisvNativeStream = true;
|
|
322
|
+
return super.end();
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
getRows() {
|
|
326
|
+
if (this._cisvFastRows !== null) {
|
|
327
|
+
return this._cisvFastRows;
|
|
328
|
+
}
|
|
329
|
+
return super.getRows();
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
clear() {
|
|
333
|
+
this._cisvFastChunks = [];
|
|
334
|
+
this._cisvFastRows = null;
|
|
335
|
+
this._cisvNativeStream = false;
|
|
336
|
+
return super.clear();
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
setConfig(options) {
|
|
340
|
+
this._flushFastChunksToNative();
|
|
341
|
+
this._cisvFastRows = null;
|
|
342
|
+
this._cisvFastConfig = fastConfigFromOptions(options);
|
|
343
|
+
return super.setConfig(options);
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
transform(...args) {
|
|
347
|
+
this._flushFastChunksToNative();
|
|
348
|
+
this._cisvFastConfig = null;
|
|
349
|
+
return super.transform(...args);
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
transformByName(...args) {
|
|
353
|
+
this._flushFastChunksToNative();
|
|
354
|
+
this._cisvFastConfig = null;
|
|
355
|
+
return super.transformByName(...args);
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
destroy() {
|
|
359
|
+
this._cisvFastChunks = [];
|
|
360
|
+
this._cisvFastRows = null;
|
|
361
|
+
return super.destroy();
|
|
362
|
+
}
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
return {
|
|
366
|
+
...addon,
|
|
367
|
+
cisvParser,
|
|
368
|
+
};
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
module.exports = { wrapAddon };
|