cisv 0.4.7 → 0.4.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +0 -1
- package/binding.gyp +0 -1
- package/build/Release/cisv.node +0 -0
- package/cisv/cisv_addon.cc +456 -152
- package/cisv/types/cisv.d.ts +19 -4
- package/package.json +1 -1
package/README.md
CHANGED
package/binding.gyp
CHANGED
package/build/Release/cisv.node
CHANGED
|
Binary file
|
package/cisv/cisv_addon.cc
CHANGED
|
@@ -5,11 +5,170 @@
|
|
|
5
5
|
#include <memory>
|
|
6
6
|
#include <string>
|
|
7
7
|
#include <unordered_map>
|
|
8
|
+
#include <algorithm>
|
|
8
9
|
#include <chrono>
|
|
9
10
|
#include <cstdint>
|
|
11
|
+
#include <climits>
|
|
12
|
+
#include <cmath>
|
|
10
13
|
|
|
11
14
|
namespace {
|
|
12
15
|
|
|
16
|
+
static bool isInvalidConfigChar(char c) {
|
|
17
|
+
return c == '\0' || c == '\n' || c == '\r';
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
static void ValidateSingleCharOption(
|
|
21
|
+
Napi::Env env,
|
|
22
|
+
const Napi::Object &options,
|
|
23
|
+
const char *option_name,
|
|
24
|
+
char *target,
|
|
25
|
+
bool allow_null = false
|
|
26
|
+
) {
|
|
27
|
+
if (!options.Has(option_name)) {
|
|
28
|
+
return;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
Napi::Value value = options.Get(option_name);
|
|
32
|
+
if (allow_null && (value.IsNull() || value.IsUndefined())) {
|
|
33
|
+
*target = 0;
|
|
34
|
+
return;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
if (!value.IsString()) {
|
|
38
|
+
throw Napi::TypeError::New(env, std::string(option_name) + " must be a string");
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
std::string raw = value.As<Napi::String>();
|
|
42
|
+
if (raw.size() != 1) {
|
|
43
|
+
throw Napi::TypeError::New(env, std::string(option_name) + " must be exactly 1 character");
|
|
44
|
+
}
|
|
45
|
+
if (isInvalidConfigChar(raw[0])) {
|
|
46
|
+
throw Napi::TypeError::New(env, std::string("Invalid ") + option_name + " character");
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
*target = raw[0];
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
static double MaxJsSafeInteger() {
|
|
53
|
+
return 9007199254740991.0;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
static bool IsWholeNumber(double value) {
|
|
57
|
+
return std::isfinite(value) && std::floor(value) == value;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
static void ApplyBooleanOption(
|
|
61
|
+
Napi::Env env,
|
|
62
|
+
const Napi::Object &options,
|
|
63
|
+
const char *option_name,
|
|
64
|
+
bool *target
|
|
65
|
+
) {
|
|
66
|
+
if (!options.Has(option_name)) {
|
|
67
|
+
return;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
Napi::Value value = options.Get(option_name);
|
|
71
|
+
if (!value.IsBoolean()) {
|
|
72
|
+
throw Napi::TypeError::New(env, std::string(option_name) + " must be a boolean");
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
*target = value.As<Napi::Boolean>();
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
static void ApplySizeOption(
|
|
79
|
+
Napi::Env env,
|
|
80
|
+
const Napi::Object &options,
|
|
81
|
+
const char *option_name,
|
|
82
|
+
size_t *target
|
|
83
|
+
) {
|
|
84
|
+
if (!options.Has(option_name)) {
|
|
85
|
+
return;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
Napi::Value value = options.Get(option_name);
|
|
89
|
+
if (value.IsNull() || value.IsUndefined()) {
|
|
90
|
+
*target = 0;
|
|
91
|
+
return;
|
|
92
|
+
}
|
|
93
|
+
if (!value.IsNumber()) {
|
|
94
|
+
throw Napi::TypeError::New(env, std::string(option_name) + " must be a number");
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
double raw = value.As<Napi::Number>().DoubleValue();
|
|
98
|
+
double max_value = static_cast<double>(SIZE_MAX);
|
|
99
|
+
if (!IsWholeNumber(raw) || raw < 0.0 || raw > max_value || raw > MaxJsSafeInteger()) {
|
|
100
|
+
throw Napi::RangeError::New(env, std::string(option_name) + " is out of range");
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
*target = static_cast<size_t>(raw);
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
static void ApplyLineOption(
|
|
107
|
+
Napi::Env env,
|
|
108
|
+
const Napi::Object &options,
|
|
109
|
+
const char *option_name,
|
|
110
|
+
int *target
|
|
111
|
+
) {
|
|
112
|
+
if (!options.Has(option_name)) {
|
|
113
|
+
return;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
Napi::Value value = options.Get(option_name);
|
|
117
|
+
if (value.IsNull() || value.IsUndefined()) {
|
|
118
|
+
*target = 0;
|
|
119
|
+
return;
|
|
120
|
+
}
|
|
121
|
+
if (!value.IsNumber()) {
|
|
122
|
+
throw Napi::TypeError::New(env, std::string(option_name) + " must be a number");
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
double raw = value.As<Napi::Number>().DoubleValue();
|
|
126
|
+
if (!IsWholeNumber(raw) || raw < 0.0 || raw > static_cast<double>(INT_MAX)) {
|
|
127
|
+
throw Napi::RangeError::New(env, std::string(option_name) + " is out of range");
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
*target = static_cast<int>(raw);
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
static void ValidateConfigSemantics(Napi::Env env, const cisv_config &config) {
|
|
134
|
+
if (config.delimiter == config.quote) {
|
|
135
|
+
throw Napi::TypeError::New(env, "delimiter and quote cannot be the same");
|
|
136
|
+
}
|
|
137
|
+
if (config.escape != '\0' && config.escape == config.delimiter) {
|
|
138
|
+
throw Napi::TypeError::New(env, "escape and delimiter cannot be the same");
|
|
139
|
+
}
|
|
140
|
+
if (config.escape != '\0' && config.escape == config.quote) {
|
|
141
|
+
throw Napi::TypeError::New(env, "escape and quote cannot be the same");
|
|
142
|
+
}
|
|
143
|
+
if (config.comment != '\0' &&
|
|
144
|
+
(config.comment == config.delimiter || config.comment == config.quote || config.comment == config.escape)) {
|
|
145
|
+
throw Napi::TypeError::New(env, "comment cannot conflict with delimiter, quote, or escape");
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
int effective_from = config.from_line > 0 ? config.from_line : 1;
|
|
149
|
+
if (config.to_line != 0 && config.to_line < effective_from) {
|
|
150
|
+
throw Napi::RangeError::New(env, "toLine must be >= fromLine");
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
static void ApplyConfigOptions(Napi::Env env, const Napi::Object &options, cisv_config *config) {
|
|
155
|
+
ValidateSingleCharOption(env, options, "delimiter", &config->delimiter);
|
|
156
|
+
ValidateSingleCharOption(env, options, "quote", &config->quote);
|
|
157
|
+
ValidateSingleCharOption(env, options, "escape", &config->escape, true);
|
|
158
|
+
ValidateSingleCharOption(env, options, "comment", &config->comment, true);
|
|
159
|
+
|
|
160
|
+
ApplyBooleanOption(env, options, "skipEmptyLines", &config->skip_empty_lines);
|
|
161
|
+
ApplyBooleanOption(env, options, "trim", &config->trim);
|
|
162
|
+
ApplyBooleanOption(env, options, "relaxed", &config->relaxed);
|
|
163
|
+
ApplyBooleanOption(env, options, "skipLinesWithError", &config->skip_lines_with_error);
|
|
164
|
+
|
|
165
|
+
ApplySizeOption(env, options, "maxRowSize", &config->max_row_size);
|
|
166
|
+
ApplyLineOption(env, options, "fromLine", &config->from_line);
|
|
167
|
+
ApplyLineOption(env, options, "toLine", &config->to_line);
|
|
168
|
+
|
|
169
|
+
ValidateConfigSemantics(env, *config);
|
|
170
|
+
}
|
|
171
|
+
|
|
13
172
|
// =============================================================================
|
|
14
173
|
// SECURITY: UTF-8 validation to prevent V8 crashes on invalid input
|
|
15
174
|
// Invalid UTF-8 data can cause Napi::String::New to throw or crash
|
|
@@ -108,7 +267,7 @@ static napi_value SafeNewStringValue(napi_env env, const char* data, size_t len)
|
|
|
108
267
|
if (napi_create_string_latin1(env, data, len, &short_value) == napi_ok && short_value) {
|
|
109
268
|
return short_value;
|
|
110
269
|
}
|
|
111
|
-
} else {
|
|
270
|
+
} else if (isValidUtf8(data, len)) {
|
|
112
271
|
if (napi_create_string_utf8(env, data, len, &short_value) == napi_ok && short_value) {
|
|
113
272
|
return short_value;
|
|
114
273
|
}
|
|
@@ -339,6 +498,69 @@ static void error_cb(void *user, int line, const char *msg) {
|
|
|
339
498
|
fprintf(stderr, "CSV Parse Error at line %d: %s\n", line, msg);
|
|
340
499
|
}
|
|
341
500
|
|
|
501
|
+
static bool validateNumThreads(int num_threads, std::string &error) {
|
|
502
|
+
if (num_threads < 0) {
|
|
503
|
+
error = "numThreads must be >= 0";
|
|
504
|
+
return false;
|
|
505
|
+
}
|
|
506
|
+
return true;
|
|
507
|
+
}
|
|
508
|
+
|
|
509
|
+
static bool collectParallelRows(
|
|
510
|
+
cisv_result_t **results,
|
|
511
|
+
int result_count,
|
|
512
|
+
std::vector<std::vector<std::string>> &rows,
|
|
513
|
+
std::string &error
|
|
514
|
+
) {
|
|
515
|
+
size_t total_rows = 0;
|
|
516
|
+
for (int chunk = 0; chunk < result_count; chunk++) {
|
|
517
|
+
cisv_result_t *result = results[chunk];
|
|
518
|
+
if (!result) {
|
|
519
|
+
continue;
|
|
520
|
+
}
|
|
521
|
+
if (result->error_code != 0) {
|
|
522
|
+
error = result->error_message[0] ? result->error_message : "parse error";
|
|
523
|
+
return false;
|
|
524
|
+
}
|
|
525
|
+
total_rows += result->row_count;
|
|
526
|
+
}
|
|
527
|
+
|
|
528
|
+
rows.clear();
|
|
529
|
+
rows.reserve(total_rows);
|
|
530
|
+
|
|
531
|
+
for (int chunk = 0; chunk < result_count; chunk++) {
|
|
532
|
+
cisv_result_t *result = results[chunk];
|
|
533
|
+
if (!result) {
|
|
534
|
+
continue;
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
for (size_t i = 0; i < result->row_count; i++) {
|
|
538
|
+
cisv_row_t *row = &result->rows[i];
|
|
539
|
+
std::vector<std::string> out_row;
|
|
540
|
+
out_row.reserve(row->field_count);
|
|
541
|
+
for (size_t j = 0; j < row->field_count; j++) {
|
|
542
|
+
out_row.emplace_back(row->fields[j], row->field_lengths[j]);
|
|
543
|
+
}
|
|
544
|
+
rows.emplace_back(std::move(out_row));
|
|
545
|
+
}
|
|
546
|
+
}
|
|
547
|
+
|
|
548
|
+
return true;
|
|
549
|
+
}
|
|
550
|
+
|
|
551
|
+
static Napi::Array rowsToJsArray(Napi::Env env, const std::vector<std::vector<std::string>> &rows) {
|
|
552
|
+
Napi::Array out = Napi::Array::New(env, rows.size());
|
|
553
|
+
for (size_t i = 0; i < rows.size(); i++) {
|
|
554
|
+
Napi::Array row = Napi::Array::New(env, rows[i].size());
|
|
555
|
+
for (size_t j = 0; j < rows[i].size(); j++) {
|
|
556
|
+
const std::string &field = rows[i][j];
|
|
557
|
+
row[j] = SafeNewString(env, field.c_str(), field.length());
|
|
558
|
+
}
|
|
559
|
+
out[i] = row;
|
|
560
|
+
}
|
|
561
|
+
return out;
|
|
562
|
+
}
|
|
563
|
+
|
|
342
564
|
class ParseFileWorker final : public Napi::AsyncWorker {
|
|
343
565
|
public:
|
|
344
566
|
ParseFileWorker(
|
|
@@ -383,19 +605,57 @@ public:
|
|
|
383
605
|
}
|
|
384
606
|
|
|
385
607
|
void OnOK() override {
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
608
|
+
deferred_.Resolve(rowsToJsArray(Env(), rows_));
|
|
609
|
+
}
|
|
610
|
+
|
|
611
|
+
void OnError(const Napi::Error &e) override {
|
|
612
|
+
deferred_.Reject(e.Value());
|
|
613
|
+
}
|
|
614
|
+
|
|
615
|
+
private:
|
|
616
|
+
std::string path_;
|
|
617
|
+
cisv_config config_;
|
|
618
|
+
Napi::Promise::Deferred deferred_;
|
|
619
|
+
std::vector<std::vector<std::string>> rows_;
|
|
620
|
+
};
|
|
621
|
+
|
|
622
|
+
class ParseFileParallelWorker final : public Napi::AsyncWorker {
|
|
623
|
+
public:
|
|
624
|
+
ParseFileParallelWorker(
|
|
625
|
+
Napi::Env env,
|
|
626
|
+
std::string path,
|
|
627
|
+
cisv_config config,
|
|
628
|
+
int num_threads,
|
|
629
|
+
Napi::Promise::Deferred deferred
|
|
630
|
+
) : Napi::AsyncWorker(env),
|
|
631
|
+
path_(std::move(path)),
|
|
632
|
+
config_(config),
|
|
633
|
+
num_threads_(num_threads),
|
|
634
|
+
deferred_(deferred) {}
|
|
635
|
+
|
|
636
|
+
void Execute() override {
|
|
637
|
+
if (!validateNumThreads(num_threads_, error_)) {
|
|
638
|
+
SetError(error_);
|
|
639
|
+
return;
|
|
640
|
+
}
|
|
641
|
+
|
|
642
|
+
int result_count = 0;
|
|
643
|
+
cisv_result_t **results = cisv_parse_file_parallel(path_.c_str(), &config_, num_threads_, &result_count);
|
|
644
|
+
if (!results) {
|
|
645
|
+
SetError("parse error: " + std::string(strerror(errno)));
|
|
646
|
+
return;
|
|
647
|
+
}
|
|
648
|
+
|
|
649
|
+
bool ok = collectParallelRows(results, result_count, rows_, error_);
|
|
650
|
+
cisv_results_free(results, result_count);
|
|
651
|
+
|
|
652
|
+
if (!ok) {
|
|
653
|
+
SetError(error_);
|
|
396
654
|
}
|
|
655
|
+
}
|
|
397
656
|
|
|
398
|
-
|
|
657
|
+
void OnOK() override {
|
|
658
|
+
deferred_.Resolve(rowsToJsArray(Env(), rows_));
|
|
399
659
|
}
|
|
400
660
|
|
|
401
661
|
void OnError(const Napi::Error &e) override {
|
|
@@ -405,8 +665,10 @@ public:
|
|
|
405
665
|
private:
|
|
406
666
|
std::string path_;
|
|
407
667
|
cisv_config config_;
|
|
668
|
+
int num_threads_;
|
|
408
669
|
Napi::Promise::Deferred deferred_;
|
|
409
670
|
std::vector<std::vector<std::string>> rows_;
|
|
671
|
+
std::string error_;
|
|
410
672
|
};
|
|
411
673
|
|
|
412
674
|
} // namespace
|
|
@@ -416,7 +678,9 @@ public:
|
|
|
416
678
|
static Napi::Object Init(Napi::Env env, Napi::Object exports) {
|
|
417
679
|
Napi::Function func = DefineClass(env, "cisvParser", {
|
|
418
680
|
InstanceMethod("parseSync", &CisvParser::ParseSync),
|
|
681
|
+
InstanceMethod("parseSyncParallel", &CisvParser::ParseSyncParallel),
|
|
419
682
|
InstanceMethod("parse", &CisvParser::ParseAsync),
|
|
683
|
+
InstanceMethod("parseParallel", &CisvParser::ParseParallel),
|
|
420
684
|
InstanceMethod("parseString", &CisvParser::ParseString),
|
|
421
685
|
InstanceMethod("write", &CisvParser::Write),
|
|
422
686
|
InstanceMethod("end", &CisvParser::End),
|
|
@@ -448,7 +712,8 @@ public:
|
|
|
448
712
|
}
|
|
449
713
|
|
|
450
714
|
CisvParser(const Napi::CallbackInfo &info) : Napi::ObjectWrap<CisvParser>(info) {
|
|
451
|
-
rc_ =
|
|
715
|
+
rc_ = nullptr;
|
|
716
|
+
parser_ = nullptr;
|
|
452
717
|
parse_time_ = 0;
|
|
453
718
|
total_bytes_ = 0;
|
|
454
719
|
is_destroyed_ = false;
|
|
@@ -467,14 +732,13 @@ public:
|
|
|
467
732
|
ApplyConfigFromObject(options);
|
|
468
733
|
}
|
|
469
734
|
|
|
735
|
+
rc_ = new RowCollector();
|
|
736
|
+
|
|
470
737
|
// Set callbacks
|
|
471
738
|
config_.field_cb = field_cb;
|
|
472
739
|
config_.row_cb = row_cb;
|
|
473
740
|
config_.error_cb = error_cb;
|
|
474
741
|
config_.user = rc_;
|
|
475
|
-
|
|
476
|
-
// Create parser with configuration
|
|
477
|
-
parser_ = cisv_parser_create_with_config(&config_);
|
|
478
742
|
}
|
|
479
743
|
|
|
480
744
|
~CisvParser() {
|
|
@@ -483,84 +747,10 @@ public:
|
|
|
483
747
|
|
|
484
748
|
// Apply configuration from JavaScript object
|
|
485
749
|
void ApplyConfigFromObject(Napi::Object options) {
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
std::string delim_str = delim.As<Napi::String>();
|
|
491
|
-
if (!delim_str.empty()) {
|
|
492
|
-
config_.delimiter = delim_str[0];
|
|
493
|
-
}
|
|
494
|
-
}
|
|
495
|
-
}
|
|
496
|
-
|
|
497
|
-
// Quote character
|
|
498
|
-
if (options.Has("quote")) {
|
|
499
|
-
Napi::Value quote = options.Get("quote");
|
|
500
|
-
if (quote.IsString()) {
|
|
501
|
-
std::string quote_str = quote.As<Napi::String>();
|
|
502
|
-
if (!quote_str.empty()) {
|
|
503
|
-
config_.quote = quote_str[0];
|
|
504
|
-
}
|
|
505
|
-
}
|
|
506
|
-
}
|
|
507
|
-
|
|
508
|
-
// Escape character
|
|
509
|
-
if (options.Has("escape")) {
|
|
510
|
-
Napi::Value escape = options.Get("escape");
|
|
511
|
-
if (escape.IsString()) {
|
|
512
|
-
std::string escape_str = escape.As<Napi::String>();
|
|
513
|
-
if (!escape_str.empty()) {
|
|
514
|
-
config_.escape = escape_str[0];
|
|
515
|
-
}
|
|
516
|
-
} else if (escape.IsNull() || escape.IsUndefined()) {
|
|
517
|
-
config_.escape = 0; // RFC4180 style
|
|
518
|
-
}
|
|
519
|
-
}
|
|
520
|
-
|
|
521
|
-
// Comment character
|
|
522
|
-
if (options.Has("comment")) {
|
|
523
|
-
Napi::Value comment = options.Get("comment");
|
|
524
|
-
if (comment.IsString()) {
|
|
525
|
-
std::string comment_str = comment.As<Napi::String>();
|
|
526
|
-
if (!comment_str.empty()) {
|
|
527
|
-
config_.comment = comment_str[0];
|
|
528
|
-
}
|
|
529
|
-
}
|
|
530
|
-
}
|
|
531
|
-
|
|
532
|
-
// Boolean options
|
|
533
|
-
if (options.Has("skipEmptyLines")) {
|
|
534
|
-
config_.skip_empty_lines = options.Get("skipEmptyLines").As<Napi::Boolean>();
|
|
535
|
-
}
|
|
536
|
-
|
|
537
|
-
if (options.Has("trim")) {
|
|
538
|
-
config_.trim = options.Get("trim").As<Napi::Boolean>();
|
|
539
|
-
}
|
|
540
|
-
|
|
541
|
-
if (options.Has("relaxed")) {
|
|
542
|
-
config_.relaxed = options.Get("relaxed").As<Napi::Boolean>();
|
|
543
|
-
}
|
|
544
|
-
|
|
545
|
-
if (options.Has("skipLinesWithError")) {
|
|
546
|
-
config_.skip_lines_with_error = options.Get("skipLinesWithError").As<Napi::Boolean>();
|
|
547
|
-
}
|
|
548
|
-
|
|
549
|
-
// Numeric options
|
|
550
|
-
if (options.Has("maxRowSize")) {
|
|
551
|
-
Napi::Value val = options.Get("maxRowSize");
|
|
552
|
-
if (!val.IsNull() && !val.IsUndefined()) {
|
|
553
|
-
config_.max_row_size = val.As<Napi::Number>().Uint32Value();
|
|
554
|
-
}
|
|
555
|
-
}
|
|
556
|
-
|
|
557
|
-
if (options.Has("fromLine")) {
|
|
558
|
-
config_.from_line = options.Get("fromLine").As<Napi::Number>().Int32Value();
|
|
559
|
-
}
|
|
560
|
-
|
|
561
|
-
if (options.Has("toLine")) {
|
|
562
|
-
config_.to_line = options.Get("toLine").As<Napi::Number>().Int32Value();
|
|
563
|
-
}
|
|
750
|
+
Napi::Env env = options.Env();
|
|
751
|
+
cisv_config next = config_;
|
|
752
|
+
ApplyConfigOptions(env, options, &next);
|
|
753
|
+
config_ = next;
|
|
564
754
|
}
|
|
565
755
|
|
|
566
756
|
// Set configuration after creation
|
|
@@ -578,17 +768,12 @@ public:
|
|
|
578
768
|
Napi::Object options = info[0].As<Napi::Object>();
|
|
579
769
|
ApplyConfigFromObject(options);
|
|
580
770
|
|
|
581
|
-
// Recreate parser
|
|
771
|
+
// Recreate the streaming parser only if it has already been instantiated.
|
|
582
772
|
if (parser_) {
|
|
583
773
|
cisv_parser_destroy(parser_);
|
|
774
|
+
parser_ = nullptr;
|
|
775
|
+
ensureParser(env);
|
|
584
776
|
}
|
|
585
|
-
|
|
586
|
-
config_.field_cb = field_cb;
|
|
587
|
-
config_.row_cb = row_cb;
|
|
588
|
-
config_.error_cb = error_cb;
|
|
589
|
-
config_.user = rc_;
|
|
590
|
-
|
|
591
|
-
parser_ = cisv_parser_create_with_config(&config_);
|
|
592
777
|
}
|
|
593
778
|
|
|
594
779
|
// Get current configuration
|
|
@@ -692,6 +877,7 @@ public:
|
|
|
692
877
|
} else {
|
|
693
878
|
// Set environment for JS transforms
|
|
694
879
|
rc_->env = env;
|
|
880
|
+
ensureParser(env);
|
|
695
881
|
result = cisv_parser_parse_file(parser_, path.c_str());
|
|
696
882
|
// Clear the environment reference after parsing
|
|
697
883
|
rc_->env = nullptr;
|
|
@@ -714,16 +900,28 @@ public:
|
|
|
714
900
|
throw Napi::Error::New(env, "Parser has been destroyed");
|
|
715
901
|
}
|
|
716
902
|
|
|
717
|
-
if (info.Length() != 1 || !info[0].IsString()) {
|
|
718
|
-
throw Napi::TypeError::New(env, "Expected CSV string");
|
|
903
|
+
if (info.Length() != 1 || (!info[0].IsString() && !info[0].IsBuffer())) {
|
|
904
|
+
throw Napi::TypeError::New(env, "Expected CSV string or Buffer");
|
|
719
905
|
}
|
|
720
906
|
|
|
721
|
-
|
|
907
|
+
const char *content_data = nullptr;
|
|
908
|
+
size_t content_len = 0;
|
|
909
|
+
std::string content_storage;
|
|
910
|
+
|
|
911
|
+
if (info[0].IsBuffer()) {
|
|
912
|
+
auto buffer = info[0].As<Napi::Buffer<char>>();
|
|
913
|
+
content_data = buffer.Data();
|
|
914
|
+
content_len = buffer.Length();
|
|
915
|
+
} else {
|
|
916
|
+
content_storage = info[0].As<Napi::String>();
|
|
917
|
+
content_data = content_storage.data();
|
|
918
|
+
content_len = content_storage.size();
|
|
919
|
+
}
|
|
722
920
|
|
|
723
921
|
resetRowState();
|
|
724
922
|
|
|
725
923
|
if (!hasTransforms()) {
|
|
726
|
-
cisv_result_t *batch = cisv_parse_string_batch(
|
|
924
|
+
cisv_result_t *batch = cisv_parse_string_batch(content_data, content_len, &config_);
|
|
727
925
|
if (!batch) {
|
|
728
926
|
throw Napi::Error::New(env, "parse error: " + std::string(strerror(errno)));
|
|
729
927
|
}
|
|
@@ -737,20 +935,68 @@ public:
|
|
|
737
935
|
} else {
|
|
738
936
|
// Set environment for JS transforms
|
|
739
937
|
rc_->env = env;
|
|
938
|
+
ensureParser(env);
|
|
740
939
|
|
|
741
940
|
// Write the string content as chunks
|
|
742
|
-
cisv_parser_write(parser_,
|
|
941
|
+
cisv_parser_write(parser_, reinterpret_cast<const uint8_t*>(content_data), content_len);
|
|
743
942
|
cisv_parser_end(parser_);
|
|
744
943
|
|
|
745
944
|
// Clear the environment reference after parsing
|
|
746
945
|
rc_->env = nullptr;
|
|
747
946
|
}
|
|
748
947
|
|
|
749
|
-
total_bytes_ =
|
|
948
|
+
total_bytes_ = content_len;
|
|
750
949
|
|
|
751
950
|
return drainRows(env);
|
|
752
951
|
}
|
|
753
952
|
|
|
953
|
+
Napi::Value ParseSyncParallel(const Napi::CallbackInfo &info) {
|
|
954
|
+
Napi::Env env = info.Env();
|
|
955
|
+
|
|
956
|
+
if (is_destroyed_) {
|
|
957
|
+
throw Napi::Error::New(env, "Parser has been destroyed");
|
|
958
|
+
}
|
|
959
|
+
|
|
960
|
+
if (info.Length() < 1 || !info[0].IsString()) {
|
|
961
|
+
throw Napi::TypeError::New(env, "Expected file path string");
|
|
962
|
+
}
|
|
963
|
+
|
|
964
|
+
int num_threads = 0;
|
|
965
|
+
if (info.Length() > 1 && !info[1].IsUndefined() && !info[1].IsNull()) {
|
|
966
|
+
if (!info[1].IsNumber()) {
|
|
967
|
+
throw Napi::TypeError::New(env, "numThreads must be a number");
|
|
968
|
+
}
|
|
969
|
+
num_threads = info[1].As<Napi::Number>().Int32Value();
|
|
970
|
+
}
|
|
971
|
+
|
|
972
|
+
std::string validation_error;
|
|
973
|
+
if (!validateNumThreads(num_threads, validation_error)) {
|
|
974
|
+
throw Napi::TypeError::New(env, validation_error);
|
|
975
|
+
}
|
|
976
|
+
|
|
977
|
+
std::string path = info[0].As<Napi::String>().Utf8Value();
|
|
978
|
+
int result_count = 0;
|
|
979
|
+
cisv_result_t **results = cisv_parse_file_parallel(
|
|
980
|
+
path.c_str(),
|
|
981
|
+
&config_,
|
|
982
|
+
num_threads,
|
|
983
|
+
&result_count);
|
|
984
|
+
if (!results) {
|
|
985
|
+
throw Napi::Error::New(env, "parse error: " + std::string(strerror(errno)));
|
|
986
|
+
}
|
|
987
|
+
|
|
988
|
+
std::vector<std::vector<std::string>> rows;
|
|
989
|
+
std::string error;
|
|
990
|
+
bool ok = collectParallelRows(results, result_count, rows, error);
|
|
991
|
+
cisv_results_free(results, result_count);
|
|
992
|
+
|
|
993
|
+
if (!ok) {
|
|
994
|
+
throw Napi::Error::New(env, error);
|
|
995
|
+
}
|
|
996
|
+
|
|
997
|
+
return rowsToJsArray(env, rows);
|
|
998
|
+
}
|
|
999
|
+
|
|
754
1000
|
// Write chunk for streaming
|
|
755
1001
|
void Write(const Napi::CallbackInfo &info) {
|
|
756
1002
|
Napi::Env env = info.Env();
|
|
@@ -813,6 +1059,7 @@ public:
|
|
|
813
1059
|
stream_buffering_active_ = false;
|
|
814
1060
|
}
|
|
815
1061
|
|
|
1062
|
+
ensureParser(env);
|
|
816
1063
|
cisv_parser_write(parser_, chunk_data, chunk_size);
|
|
817
1064
|
total_bytes_ += chunk_size;
|
|
818
1065
|
}
|
|
@@ -847,6 +1094,7 @@ public:
|
|
|
847
1094
|
stream_buffering_active_ = false;
|
|
848
1095
|
}
|
|
849
1096
|
|
|
1097
|
+
ensureParser(info.Env());
|
|
850
1098
|
cisv_parser_end(parser_);
|
|
851
1099
|
// Clear the environment reference after ending to prevent stale references
|
|
852
1100
|
rc_->env = nullptr;
|
|
@@ -921,8 +1169,6 @@ public:
|
|
|
921
1169
|
type = TRANSFORM_TO_INT;
|
|
922
1170
|
} else if (transform_type == "to_float" || transform_type == "float") {
|
|
923
1171
|
type = TRANSFORM_TO_FLOAT;
|
|
924
|
-
} else if (transform_type == "hash_sha256" || transform_type == "sha256") {
|
|
925
|
-
type = TRANSFORM_HASH_SHA256;
|
|
926
1172
|
} else if (transform_type == "base64_encode" || transform_type == "base64") {
|
|
927
1173
|
type = TRANSFORM_BASE64_ENCODE;
|
|
928
1174
|
} else {
|
|
@@ -1032,8 +1278,6 @@ Napi::Value TransformByName(const Napi::CallbackInfo &info) {
|
|
|
1032
1278
|
type = TRANSFORM_TO_INT;
|
|
1033
1279
|
} else if (transform_type == "to_float" || transform_type == "float") {
|
|
1034
1280
|
type = TRANSFORM_TO_FLOAT;
|
|
1035
|
-
} else if (transform_type == "hash_sha256" || transform_type == "sha256") {
|
|
1036
|
-
type = TRANSFORM_HASH_SHA256;
|
|
1037
1281
|
} else if (transform_type == "base64_encode" || transform_type == "base64") {
|
|
1038
1282
|
type = TRANSFORM_BASE64_ENCODE;
|
|
1039
1283
|
} else {
|
|
@@ -1202,18 +1446,28 @@ Napi::Value RemoveTransformByName(const Napi::CallbackInfo &info) {
|
|
|
1202
1446
|
|
|
1203
1447
|
std::string field_name = info[0].As<Napi::String>();
|
|
1204
1448
|
|
|
1449
|
+
int field_index = -1;
|
|
1450
|
+
|
|
1205
1451
|
// Remove from JavaScript transforms by finding the field index
|
|
1206
1452
|
if (rc_->pipeline && rc_->pipeline->header_fields) {
|
|
1207
1453
|
for (size_t i = 0; i < rc_->pipeline->header_count; i++) {
|
|
1208
1454
|
if (strcmp(rc_->pipeline->header_fields[i], field_name.c_str()) == 0) {
|
|
1209
|
-
|
|
1455
|
+
field_index = static_cast<int>(i);
|
|
1456
|
+
auto it = rc_->js_transforms.find(field_index);
|
|
1457
|
+
if (it != rc_->js_transforms.end()) {
|
|
1458
|
+
if (!it->second.IsEmpty()) {
|
|
1459
|
+
it->second.Reset();
|
|
1460
|
+
}
|
|
1461
|
+
rc_->js_transforms.erase(it);
|
|
1462
|
+
}
|
|
1210
1463
|
break;
|
|
1211
1464
|
}
|
|
1212
1465
|
}
|
|
1213
1466
|
}
|
|
1214
1467
|
|
|
1215
|
-
|
|
1216
|
-
|
|
1468
|
+
if (field_index >= 0 && rc_->pipeline) {
|
|
1469
|
+
cisv_transform_pipeline_remove_field(rc_->pipeline, field_index);
|
|
1470
|
+
}
|
|
1217
1471
|
|
|
1218
1472
|
return info.This();
|
|
1219
1473
|
}
|
|
@@ -1232,10 +1486,17 @@ Napi::Value RemoveTransformByName(const Napi::CallbackInfo &info) {
|
|
|
1232
1486
|
int field_index = info[0].As<Napi::Number>().Int32Value();
|
|
1233
1487
|
|
|
1234
1488
|
// Remove from JavaScript transforms
|
|
1235
|
-
rc_->js_transforms.
|
|
1489
|
+
auto it = rc_->js_transforms.find(field_index);
|
|
1490
|
+
if (it != rc_->js_transforms.end()) {
|
|
1491
|
+
if (!it->second.IsEmpty()) {
|
|
1492
|
+
it->second.Reset();
|
|
1493
|
+
}
|
|
1494
|
+
rc_->js_transforms.erase(it);
|
|
1495
|
+
}
|
|
1236
1496
|
|
|
1237
|
-
|
|
1238
|
-
|
|
1497
|
+
if (rc_->pipeline) {
|
|
1498
|
+
cisv_transform_pipeline_remove_field(rc_->pipeline, field_index);
|
|
1499
|
+
}
|
|
1239
1500
|
|
|
1240
1501
|
return info.This();
|
|
1241
1502
|
}
|
|
@@ -1306,6 +1567,43 @@ Napi::Value RemoveTransformByName(const Napi::CallbackInfo &info) {
|
|
|
1306
1567
|
return deferred.Promise();
|
|
1307
1568
|
}
|
|
1308
1569
|
|
|
1570
|
+
Napi::Value ParseParallel(const Napi::CallbackInfo &info) {
|
|
1571
|
+
Napi::Env env = info.Env();
|
|
1572
|
+
|
|
1573
|
+
if (is_destroyed_) {
|
|
1574
|
+
throw Napi::Error::New(env, "Parser has been destroyed");
|
|
1575
|
+
}
|
|
1576
|
+
|
|
1577
|
+
if (info.Length() < 1 || !info[0].IsString()) {
|
|
1578
|
+
throw Napi::TypeError::New(env, "Expected file path string");
|
|
1579
|
+
}
|
|
1580
|
+
|
|
1581
|
+
int num_threads = 0;
|
|
1582
|
+
if (info.Length() > 1 && !info[1].IsUndefined() && !info[1].IsNull()) {
|
|
1583
|
+
if (!info[1].IsNumber()) {
|
|
1584
|
+
throw Napi::TypeError::New(env, "numThreads must be a number");
|
|
1585
|
+
}
|
|
1586
|
+
num_threads = info[1].As<Napi::Number>().Int32Value();
|
|
1587
|
+
}
|
|
1588
|
+
|
|
1589
|
+
auto deferred = Napi::Promise::Deferred::New(env);
|
|
1590
|
+
cisv_config worker_config = config_;
|
|
1591
|
+
worker_config.field_cb = nullptr;
|
|
1592
|
+
worker_config.row_cb = nullptr;
|
|
1593
|
+
worker_config.error_cb = nullptr;
|
|
1594
|
+
worker_config.user = nullptr;
|
|
1595
|
+
|
|
1596
|
+
auto *worker = new ParseFileParallelWorker(
|
|
1597
|
+
env,
|
|
1598
|
+
info[0].As<Napi::String>().Utf8Value(),
|
|
1599
|
+
worker_config,
|
|
1600
|
+
num_threads,
|
|
1601
|
+
deferred);
|
|
1602
|
+
worker->Queue();
|
|
1603
|
+
|
|
1604
|
+
return deferred.Promise();
|
|
1605
|
+
}
|
|
1606
|
+
|
|
1309
1607
|
// Get information about registered transforms
|
|
1310
1608
|
Napi::Value GetTransformInfo(const Napi::CallbackInfo &info) {
|
|
1311
1609
|
Napi::Env env = info.Env();
|
|
@@ -1325,16 +1623,29 @@ Napi::Value RemoveTransformByName(const Napi::CallbackInfo &info) {
|
|
|
1325
1623
|
result.Set("jsTransformCount", Napi::Number::New(env, js_transform_count));
|
|
1326
1624
|
|
|
1327
1625
|
// List field indices with transforms
|
|
1328
|
-
|
|
1329
|
-
|
|
1626
|
+
std::vector<int> field_indices;
|
|
1627
|
+
auto add_field_index = [&field_indices](int field_index) {
|
|
1628
|
+
if (std::find(field_indices.begin(), field_indices.end(), field_index) == field_indices.end()) {
|
|
1629
|
+
field_indices.push_back(field_index);
|
|
1630
|
+
}
|
|
1631
|
+
};
|
|
1330
1632
|
|
|
1331
|
-
|
|
1633
|
+
if (rc_ && rc_->pipeline) {
|
|
1634
|
+
for (size_t i = 0; i < rc_->pipeline->count; i++) {
|
|
1635
|
+
add_field_index(rc_->pipeline->transforms[i].field_index);
|
|
1636
|
+
}
|
|
1637
|
+
}
|
|
1332
1638
|
if (rc_) {
|
|
1333
1639
|
for (const auto& pair : rc_->js_transforms) {
|
|
1334
|
-
|
|
1640
|
+
add_field_index(pair.first);
|
|
1335
1641
|
}
|
|
1336
1642
|
}
|
|
1337
1643
|
|
|
1644
|
+
Napi::Array fields = Napi::Array::New(env, field_indices.size());
|
|
1645
|
+
for (size_t i = 0; i < field_indices.size(); i++) {
|
|
1646
|
+
fields[i] = Napi::Number::New(env, field_indices[i]);
|
|
1647
|
+
}
|
|
1648
|
+
|
|
1338
1649
|
result.Set("fieldIndices", fields);
|
|
1339
1650
|
|
|
1340
1651
|
return result;
|
|
@@ -1400,36 +1711,13 @@ Napi::Value RemoveTransformByName(const Napi::CallbackInfo &info) {
|
|
|
1400
1711
|
cisv_config_init(&config);
|
|
1401
1712
|
|
|
1402
1713
|
// Apply configuration if provided
|
|
1714
|
+
if (info.Length() > 1 && !info[1].IsNull() && !info[1].IsUndefined() && !info[1].IsObject()) {
|
|
1715
|
+
throw Napi::TypeError::New(env, "Config must be an object");
|
|
1716
|
+
}
|
|
1717
|
+
|
|
1403
1718
|
if (info.Length() > 1 && info[1].IsObject()) {
|
|
1404
1719
|
Napi::Object options = info[1].As<Napi::Object>();
|
|
1405
|
-
|
|
1406
|
-
// Apply same configuration parsing logic
|
|
1407
|
-
if (options.Has("delimiter")) {
|
|
1408
|
-
std::string delim = options.Get("delimiter").As<Napi::String>();
|
|
1409
|
-
if (!delim.empty()) config.delimiter = delim[0];
|
|
1410
|
-
}
|
|
1411
|
-
|
|
1412
|
-
if (options.Has("quote")) {
|
|
1413
|
-
std::string quote = options.Get("quote").As<Napi::String>();
|
|
1414
|
-
if (!quote.empty()) config.quote = quote[0];
|
|
1415
|
-
}
|
|
1416
|
-
|
|
1417
|
-
if (options.Has("comment")) {
|
|
1418
|
-
std::string comment = options.Get("comment").As<Napi::String>();
|
|
1419
|
-
if (!comment.empty()) config.comment = comment[0];
|
|
1420
|
-
}
|
|
1421
|
-
|
|
1422
|
-
if (options.Has("skipEmptyLines")) {
|
|
1423
|
-
config.skip_empty_lines = options.Get("skipEmptyLines").As<Napi::Boolean>();
|
|
1424
|
-
}
|
|
1425
|
-
|
|
1426
|
-
if (options.Has("fromLine")) {
|
|
1427
|
-
config.from_line = options.Get("fromLine").As<Napi::Number>().Int32Value();
|
|
1428
|
-
}
|
|
1429
|
-
|
|
1430
|
-
if (options.Has("toLine")) {
|
|
1431
|
-
config.to_line = options.Get("toLine").As<Napi::Number>().Int32Value();
|
|
1432
|
-
}
|
|
1720
|
+
ApplyConfigOptions(env, options, &config);
|
|
1433
1721
|
}
|
|
1434
1722
|
|
|
1435
1723
|
size_t count = cisv_parser_count_rows_with_config(path.c_str(), &config);
|
|
@@ -1532,6 +1820,22 @@ Napi::Value RemoveTransformByName(const Napi::CallbackInfo &info) {
|
|
|
1532
1820
|
}
|
|
1533
1821
|
|
|
1534
1822
|
private:
|
|
1823
|
+
void ensureParser(Napi::Env env) {
|
|
1824
|
+
if (parser_) {
|
|
1825
|
+
return;
|
|
1826
|
+
}
|
|
1827
|
+
|
|
1828
|
+
config_.field_cb = field_cb;
|
|
1829
|
+
config_.row_cb = row_cb;
|
|
1830
|
+
config_.error_cb = error_cb;
|
|
1831
|
+
config_.user = rc_;
|
|
1832
|
+
|
|
1833
|
+
parser_ = cisv_parser_create_with_config(&config_);
|
|
1834
|
+
if (!parser_) {
|
|
1835
|
+
throw Napi::Error::New(env, "Failed to create parser");
|
|
1836
|
+
}
|
|
1837
|
+
}
|
|
1838
|
+
|
|
1535
1839
|
void clearBatchResult() {
|
|
1536
1840
|
if (batch_result_) {
|
|
1537
1841
|
cisv_result_free(batch_result_);
|
|
@@ -1559,6 +1863,7 @@ private:
|
|
|
1559
1863
|
if (pending_stream_.empty()) {
|
|
1560
1864
|
return;
|
|
1561
1865
|
}
|
|
1866
|
+
ensureParser(Env());
|
|
1562
1867
|
cisv_parser_write(
|
|
1563
1868
|
parser_,
|
|
1564
1869
|
reinterpret_cast<const uint8_t*>(pending_stream_.data()),
|
|
@@ -1640,7 +1945,7 @@ Napi::Object InitAll(Napi::Env env, Napi::Object exports) {
|
|
|
1640
1945
|
CisvParser::Init(env, exports);
|
|
1641
1946
|
|
|
1642
1947
|
// Add version info
|
|
1643
|
-
exports.Set("version", Napi::String::New(env, "
|
|
1948
|
+
exports.Set("version", Napi::String::New(env, "0.4.9"));
|
|
1644
1949
|
|
|
1645
1950
|
// Add transform type constants
|
|
1646
1951
|
Napi::Object transformTypes = Napi::Object::New(env);
|
|
@@ -1649,7 +1954,6 @@ Napi::Object InitAll(Napi::Env env, Napi::Object exports) {
|
|
|
1649
1954
|
transformTypes.Set("TRIM", Napi::String::New(env, "trim"));
|
|
1650
1955
|
transformTypes.Set("TO_INT", Napi::String::New(env, "to_int"));
|
|
1651
1956
|
transformTypes.Set("TO_FLOAT", Napi::String::New(env, "to_float"));
|
|
1652
|
-
transformTypes.Set("HASH_SHA256", Napi::String::New(env, "hash_sha256"));
|
|
1653
1957
|
transformTypes.Set("BASE64_ENCODE", Napi::String::New(env, "base64_encode"));
|
|
1654
1958
|
exports.Set("TransformType", transformTypes);
|
|
1655
1959
|
|
package/cisv/types/cisv.d.ts
CHANGED
|
@@ -8,7 +8,6 @@ declare module 'cisv' {
|
|
|
8
8
|
TRIM = 'trim',
|
|
9
9
|
TO_INT = 'to_int',
|
|
10
10
|
TO_FLOAT = 'to_float',
|
|
11
|
-
HASH_SHA256 = 'hash_sha256',
|
|
12
11
|
BASE64_ENCODE = 'base64_encode',
|
|
13
12
|
CUSTOM = 'custom'
|
|
14
13
|
}
|
|
@@ -64,6 +63,14 @@ declare module 'cisv' {
|
|
|
64
63
|
*/
|
|
65
64
|
parseSync(path: string): string[][];
|
|
66
65
|
|
|
66
|
+
/**
|
|
67
|
+
* Parse CSV file synchronously using multiple worker threads.
|
|
68
|
+
* @param path Path to CSV file
|
|
69
|
+
* @param numThreads Number of threads to use (0 = auto-detect)
|
|
70
|
+
* @returns Array of rows with string values
|
|
71
|
+
*/
|
|
72
|
+
parseSyncParallel(path: string, numThreads?: number): string[][];
|
|
73
|
+
|
|
67
74
|
/**
|
|
68
75
|
* Parse CSV file asynchronously
|
|
69
76
|
* @param path Path to CSV file
|
|
@@ -72,11 +79,19 @@ declare module 'cisv' {
|
|
|
72
79
|
parse(path: string): Promise<string[][]>;
|
|
73
80
|
|
|
74
81
|
/**
|
|
75
|
-
* Parse CSV
|
|
76
|
-
* @param
|
|
82
|
+
* Parse CSV file asynchronously using multiple worker threads.
|
|
83
|
+
* @param path Path to CSV file
|
|
84
|
+
* @param numThreads Number of threads to use (0 = auto-detect)
|
|
85
|
+
* @returns Promise resolving to array of rows
|
|
86
|
+
*/
|
|
87
|
+
parseParallel(path: string, numThreads?: number): Promise<string[][]>;
|
|
88
|
+
|
|
89
|
+
/**
|
|
90
|
+
* Parse CSV string or Buffer content
|
|
91
|
+
* @param content CSV content as string or Buffer
|
|
77
92
|
* @returns Array of rows with string values
|
|
78
93
|
*/
|
|
79
|
-
parseString(content: string): string[][];
|
|
94
|
+
parseString(content: Buffer | string): string[][];
|
|
80
95
|
|
|
81
96
|
/**
|
|
82
97
|
* Write chunk of CSV data (for streaming)
|