cisv 0.4.7 → 0.4.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -83,7 +83,6 @@ Built-in transform names:
83
83
  - `trim`
84
84
  - `to_int` (or `int`)
85
85
  - `to_float` (or `float`)
86
- - `hash_sha256` (or `sha256`)
87
86
  - `base64_encode` (or `base64`)
88
87
 
89
88
  ## Examples
package/binding.gyp CHANGED
@@ -21,7 +21,6 @@
21
21
  "cflags_cc!": [ "-fno-exceptions" ],
22
22
  "cflags_cc": ["-O3"],
23
23
  "defines": [
24
- "NAPI_DISABLE_CPP_EXCEPTIONS",
25
24
  "NAPI_VERSION=6"
26
25
  ],
27
26
  "conditions": [
Binary file
@@ -5,11 +5,170 @@
5
5
  #include <memory>
6
6
  #include <string>
7
7
  #include <unordered_map>
8
+ #include <algorithm>
8
9
  #include <chrono>
9
10
  #include <cstdint>
11
+ #include <climits>
12
+ #include <cmath>
10
13
 
11
14
  namespace {
12
15
 
16
+ static bool isInvalidConfigChar(char c) {
17
+ return c == '\0' || c == '\n' || c == '\r';
18
+ }
19
+
20
+ static void ValidateSingleCharOption(
21
+ Napi::Env env,
22
+ const Napi::Object &options,
23
+ const char *option_name,
24
+ char *target,
25
+ bool allow_null = false
26
+ ) {
27
+ if (!options.Has(option_name)) {
28
+ return;
29
+ }
30
+
31
+ Napi::Value value = options.Get(option_name);
32
+ if (allow_null && (value.IsNull() || value.IsUndefined())) {
33
+ *target = 0;
34
+ return;
35
+ }
36
+
37
+ if (!value.IsString()) {
38
+ throw Napi::TypeError::New(env, std::string(option_name) + " must be a string");
39
+ }
40
+
41
+ std::string raw = value.As<Napi::String>();
42
+ if (raw.size() != 1) {
43
+ throw Napi::TypeError::New(env, std::string(option_name) + " must be exactly 1 character");
44
+ }
45
+ if (isInvalidConfigChar(raw[0])) {
46
+ throw Napi::TypeError::New(env, std::string("Invalid ") + option_name + " character");
47
+ }
48
+
49
+ *target = raw[0];
50
+ }
51
+
52
+ static double MaxJsSafeInteger() {
53
+ return 9007199254740991.0;
54
+ }
55
+
56
+ static bool IsWholeNumber(double value) {
57
+ return std::isfinite(value) && std::floor(value) == value;
58
+ }
59
+
60
+ static void ApplyBooleanOption(
61
+ Napi::Env env,
62
+ const Napi::Object &options,
63
+ const char *option_name,
64
+ bool *target
65
+ ) {
66
+ if (!options.Has(option_name)) {
67
+ return;
68
+ }
69
+
70
+ Napi::Value value = options.Get(option_name);
71
+ if (!value.IsBoolean()) {
72
+ throw Napi::TypeError::New(env, std::string(option_name) + " must be a boolean");
73
+ }
74
+
75
+ *target = value.As<Napi::Boolean>();
76
+ }
77
+
78
+ static void ApplySizeOption(
79
+ Napi::Env env,
80
+ const Napi::Object &options,
81
+ const char *option_name,
82
+ size_t *target
83
+ ) {
84
+ if (!options.Has(option_name)) {
85
+ return;
86
+ }
87
+
88
+ Napi::Value value = options.Get(option_name);
89
+ if (value.IsNull() || value.IsUndefined()) {
90
+ *target = 0;
91
+ return;
92
+ }
93
+ if (!value.IsNumber()) {
94
+ throw Napi::TypeError::New(env, std::string(option_name) + " must be a number");
95
+ }
96
+
97
+ double raw = value.As<Napi::Number>().DoubleValue();
98
+ double max_value = static_cast<double>(SIZE_MAX);
99
+ if (!IsWholeNumber(raw) || raw < 0.0 || raw > max_value || raw > MaxJsSafeInteger()) {
100
+ throw Napi::RangeError::New(env, std::string(option_name) + " is out of range");
101
+ }
102
+
103
+ *target = static_cast<size_t>(raw);
104
+ }
105
+
106
+ static void ApplyLineOption(
107
+ Napi::Env env,
108
+ const Napi::Object &options,
109
+ const char *option_name,
110
+ int *target
111
+ ) {
112
+ if (!options.Has(option_name)) {
113
+ return;
114
+ }
115
+
116
+ Napi::Value value = options.Get(option_name);
117
+ if (value.IsNull() || value.IsUndefined()) {
118
+ *target = 0;
119
+ return;
120
+ }
121
+ if (!value.IsNumber()) {
122
+ throw Napi::TypeError::New(env, std::string(option_name) + " must be a number");
123
+ }
124
+
125
+ double raw = value.As<Napi::Number>().DoubleValue();
126
+ if (!IsWholeNumber(raw) || raw < 0.0 || raw > static_cast<double>(INT_MAX)) {
127
+ throw Napi::RangeError::New(env, std::string(option_name) + " is out of range");
128
+ }
129
+
130
+ *target = static_cast<int>(raw);
131
+ }
132
+
133
+ static void ValidateConfigSemantics(Napi::Env env, const cisv_config &config) {
134
+ if (config.delimiter == config.quote) {
135
+ throw Napi::TypeError::New(env, "delimiter and quote cannot be the same");
136
+ }
137
+ if (config.escape != '\0' && config.escape == config.delimiter) {
138
+ throw Napi::TypeError::New(env, "escape and delimiter cannot be the same");
139
+ }
140
+ if (config.escape != '\0' && config.escape == config.quote) {
141
+ throw Napi::TypeError::New(env, "escape and quote cannot be the same");
142
+ }
143
+ if (config.comment != '\0' &&
144
+ (config.comment == config.delimiter || config.comment == config.quote || config.comment == config.escape)) {
145
+ throw Napi::TypeError::New(env, "comment cannot conflict with delimiter, quote, or escape");
146
+ }
147
+
148
+ int effective_from = config.from_line > 0 ? config.from_line : 1;
149
+ if (config.to_line != 0 && config.to_line < effective_from) {
150
+ throw Napi::RangeError::New(env, "toLine must be >= fromLine");
151
+ }
152
+ }
153
+
154
+ static void ApplyConfigOptions(Napi::Env env, const Napi::Object &options, cisv_config *config) {
155
+ ValidateSingleCharOption(env, options, "delimiter", &config->delimiter);
156
+ ValidateSingleCharOption(env, options, "quote", &config->quote);
157
+ ValidateSingleCharOption(env, options, "escape", &config->escape, true);
158
+ ValidateSingleCharOption(env, options, "comment", &config->comment, true);
159
+
160
+ ApplyBooleanOption(env, options, "skipEmptyLines", &config->skip_empty_lines);
161
+ ApplyBooleanOption(env, options, "trim", &config->trim);
162
+ ApplyBooleanOption(env, options, "relaxed", &config->relaxed);
163
+ ApplyBooleanOption(env, options, "skipLinesWithError", &config->skip_lines_with_error);
164
+
165
+ ApplySizeOption(env, options, "maxRowSize", &config->max_row_size);
166
+ ApplyLineOption(env, options, "fromLine", &config->from_line);
167
+ ApplyLineOption(env, options, "toLine", &config->to_line);
168
+
169
+ ValidateConfigSemantics(env, *config);
170
+ }
171
+
13
172
  // =============================================================================
14
173
  // SECURITY: UTF-8 validation to prevent V8 crashes on invalid input
15
174
  // Invalid UTF-8 data can cause Napi::String::New to throw or crash
@@ -108,7 +267,7 @@ static napi_value SafeNewStringValue(napi_env env, const char* data, size_t len)
108
267
  if (napi_create_string_latin1(env, data, len, &short_value) == napi_ok && short_value) {
109
268
  return short_value;
110
269
  }
111
- } else {
270
+ } else if (isValidUtf8(data, len)) {
112
271
  if (napi_create_string_utf8(env, data, len, &short_value) == napi_ok && short_value) {
113
272
  return short_value;
114
273
  }
@@ -339,6 +498,69 @@ static void error_cb(void *user, int line, const char *msg) {
339
498
  fprintf(stderr, "CSV Parse Error at line %d: %s\n", line, msg);
340
499
  }
341
500
 
501
+ static bool validateNumThreads(int num_threads, std::string &error) {
502
+ if (num_threads < 0) {
503
+ error = "numThreads must be >= 0";
504
+ return false;
505
+ }
506
+ return true;
507
+ }
508
+
509
+ static bool collectParallelRows(
510
+ cisv_result_t **results,
511
+ int result_count,
512
+ std::vector<std::vector<std::string>> &rows,
513
+ std::string &error
514
+ ) {
515
+ size_t total_rows = 0;
516
+ for (int chunk = 0; chunk < result_count; chunk++) {
517
+ cisv_result_t *result = results[chunk];
518
+ if (!result) {
519
+ continue;
520
+ }
521
+ if (result->error_code != 0) {
522
+ error = result->error_message[0] ? result->error_message : "parse error";
523
+ return false;
524
+ }
525
+ total_rows += result->row_count;
526
+ }
527
+
528
+ rows.clear();
529
+ rows.reserve(total_rows);
530
+
531
+ for (int chunk = 0; chunk < result_count; chunk++) {
532
+ cisv_result_t *result = results[chunk];
533
+ if (!result) {
534
+ continue;
535
+ }
536
+
537
+ for (size_t i = 0; i < result->row_count; i++) {
538
+ cisv_row_t *row = &result->rows[i];
539
+ std::vector<std::string> out_row;
540
+ out_row.reserve(row->field_count);
541
+ for (size_t j = 0; j < row->field_count; j++) {
542
+ out_row.emplace_back(row->fields[j], row->field_lengths[j]);
543
+ }
544
+ rows.emplace_back(std::move(out_row));
545
+ }
546
+ }
547
+
548
+ return true;
549
+ }
550
+
551
+ static Napi::Array rowsToJsArray(Napi::Env env, const std::vector<std::vector<std::string>> &rows) {
552
+ Napi::Array out = Napi::Array::New(env, rows.size());
553
+ for (size_t i = 0; i < rows.size(); i++) {
554
+ Napi::Array row = Napi::Array::New(env, rows[i].size());
555
+ for (size_t j = 0; j < rows[i].size(); j++) {
556
+ const std::string &field = rows[i][j];
557
+ row[j] = SafeNewString(env, field.c_str(), field.length());
558
+ }
559
+ out[i] = row;
560
+ }
561
+ return out;
562
+ }
563
+
342
564
  class ParseFileWorker final : public Napi::AsyncWorker {
343
565
  public:
344
566
  ParseFileWorker(
@@ -383,19 +605,57 @@ public:
383
605
  }
384
606
 
385
607
  void OnOK() override {
386
- Napi::Env env = Env();
387
- Napi::Array out = Napi::Array::New(env, rows_.size());
388
-
389
- for (size_t i = 0; i < rows_.size(); i++) {
390
- Napi::Array row = Napi::Array::New(env, rows_[i].size());
391
- for (size_t j = 0; j < rows_[i].size(); j++) {
392
- const std::string &field = rows_[i][j];
393
- row[j] = SafeNewString(env, field.c_str(), field.length());
394
- }
395
- out[i] = row;
608
+ deferred_.Resolve(rowsToJsArray(Env(), rows_));
609
+ }
610
+
611
+ void OnError(const Napi::Error &e) override {
612
+ deferred_.Reject(e.Value());
613
+ }
614
+
615
+ private:
616
+ std::string path_;
617
+ cisv_config config_;
618
+ Napi::Promise::Deferred deferred_;
619
+ std::vector<std::vector<std::string>> rows_;
620
+ };
621
+
622
+ class ParseFileParallelWorker final : public Napi::AsyncWorker {
623
+ public:
624
+ ParseFileParallelWorker(
625
+ Napi::Env env,
626
+ std::string path,
627
+ cisv_config config,
628
+ int num_threads,
629
+ Napi::Promise::Deferred deferred
630
+ ) : Napi::AsyncWorker(env),
631
+ path_(std::move(path)),
632
+ config_(config),
633
+ num_threads_(num_threads),
634
+ deferred_(deferred) {}
635
+
636
+ void Execute() override {
637
+ if (!validateNumThreads(num_threads_, error_)) {
638
+ SetError(error_);
639
+ return;
640
+ }
641
+
642
+ int result_count = 0;
643
+ cisv_result_t **results = cisv_parse_file_parallel(path_.c_str(), &config_, num_threads_, &result_count);
644
+ if (!results) {
645
+ SetError("parse error: " + std::string(strerror(errno)));
646
+ return;
647
+ }
648
+
649
+ bool ok = collectParallelRows(results, result_count, rows_, error_);
650
+ cisv_results_free(results, result_count);
651
+
652
+ if (!ok) {
653
+ SetError(error_);
396
654
  }
655
+ }
397
656
 
398
- deferred_.Resolve(out);
657
+ void OnOK() override {
658
+ deferred_.Resolve(rowsToJsArray(Env(), rows_));
399
659
  }
400
660
 
401
661
  void OnError(const Napi::Error &e) override {
@@ -405,8 +665,10 @@ public:
405
665
  private:
406
666
  std::string path_;
407
667
  cisv_config config_;
668
+ int num_threads_;
408
669
  Napi::Promise::Deferred deferred_;
409
670
  std::vector<std::vector<std::string>> rows_;
671
+ std::string error_;
410
672
  };
411
673
 
412
674
  } // namespace
@@ -416,7 +678,9 @@ public:
416
678
  static Napi::Object Init(Napi::Env env, Napi::Object exports) {
417
679
  Napi::Function func = DefineClass(env, "cisvParser", {
418
680
  InstanceMethod("parseSync", &CisvParser::ParseSync),
681
+ InstanceMethod("parseSyncParallel", &CisvParser::ParseSyncParallel),
419
682
  InstanceMethod("parse", &CisvParser::ParseAsync),
683
+ InstanceMethod("parseParallel", &CisvParser::ParseParallel),
420
684
  InstanceMethod("parseString", &CisvParser::ParseString),
421
685
  InstanceMethod("write", &CisvParser::Write),
422
686
  InstanceMethod("end", &CisvParser::End),
@@ -448,7 +712,8 @@ public:
448
712
  }
449
713
 
450
714
  CisvParser(const Napi::CallbackInfo &info) : Napi::ObjectWrap<CisvParser>(info) {
451
- rc_ = new RowCollector();
715
+ rc_ = nullptr;
716
+ parser_ = nullptr;
452
717
  parse_time_ = 0;
453
718
  total_bytes_ = 0;
454
719
  is_destroyed_ = false;
@@ -467,14 +732,13 @@ public:
467
732
  ApplyConfigFromObject(options);
468
733
  }
469
734
 
735
+ rc_ = new RowCollector();
736
+
470
737
  // Set callbacks
471
738
  config_.field_cb = field_cb;
472
739
  config_.row_cb = row_cb;
473
740
  config_.error_cb = error_cb;
474
741
  config_.user = rc_;
475
-
476
- // Create parser with configuration
477
- parser_ = cisv_parser_create_with_config(&config_);
478
742
  }
479
743
 
480
744
  ~CisvParser() {
@@ -483,84 +747,10 @@ public:
483
747
 
484
748
  // Apply configuration from JavaScript object
485
749
  void ApplyConfigFromObject(Napi::Object options) {
486
- // Delimiter
487
- if (options.Has("delimiter")) {
488
- Napi::Value delim = options.Get("delimiter");
489
- if (delim.IsString()) {
490
- std::string delim_str = delim.As<Napi::String>();
491
- if (!delim_str.empty()) {
492
- config_.delimiter = delim_str[0];
493
- }
494
- }
495
- }
496
-
497
- // Quote character
498
- if (options.Has("quote")) {
499
- Napi::Value quote = options.Get("quote");
500
- if (quote.IsString()) {
501
- std::string quote_str = quote.As<Napi::String>();
502
- if (!quote_str.empty()) {
503
- config_.quote = quote_str[0];
504
- }
505
- }
506
- }
507
-
508
- // Escape character
509
- if (options.Has("escape")) {
510
- Napi::Value escape = options.Get("escape");
511
- if (escape.IsString()) {
512
- std::string escape_str = escape.As<Napi::String>();
513
- if (!escape_str.empty()) {
514
- config_.escape = escape_str[0];
515
- }
516
- } else if (escape.IsNull() || escape.IsUndefined()) {
517
- config_.escape = 0; // RFC4180 style
518
- }
519
- }
520
-
521
- // Comment character
522
- if (options.Has("comment")) {
523
- Napi::Value comment = options.Get("comment");
524
- if (comment.IsString()) {
525
- std::string comment_str = comment.As<Napi::String>();
526
- if (!comment_str.empty()) {
527
- config_.comment = comment_str[0];
528
- }
529
- }
530
- }
531
-
532
- // Boolean options
533
- if (options.Has("skipEmptyLines")) {
534
- config_.skip_empty_lines = options.Get("skipEmptyLines").As<Napi::Boolean>();
535
- }
536
-
537
- if (options.Has("trim")) {
538
- config_.trim = options.Get("trim").As<Napi::Boolean>();
539
- }
540
-
541
- if (options.Has("relaxed")) {
542
- config_.relaxed = options.Get("relaxed").As<Napi::Boolean>();
543
- }
544
-
545
- if (options.Has("skipLinesWithError")) {
546
- config_.skip_lines_with_error = options.Get("skipLinesWithError").As<Napi::Boolean>();
547
- }
548
-
549
- // Numeric options
550
- if (options.Has("maxRowSize")) {
551
- Napi::Value val = options.Get("maxRowSize");
552
- if (!val.IsNull() && !val.IsUndefined()) {
553
- config_.max_row_size = val.As<Napi::Number>().Uint32Value();
554
- }
555
- }
556
-
557
- if (options.Has("fromLine")) {
558
- config_.from_line = options.Get("fromLine").As<Napi::Number>().Int32Value();
559
- }
560
-
561
- if (options.Has("toLine")) {
562
- config_.to_line = options.Get("toLine").As<Napi::Number>().Int32Value();
563
- }
750
+ Napi::Env env = options.Env();
751
+ cisv_config next = config_;
752
+ ApplyConfigOptions(env, options, &next);
753
+ config_ = next;
564
754
  }
565
755
 
566
756
  // Set configuration after creation
@@ -578,17 +768,12 @@ public:
578
768
  Napi::Object options = info[0].As<Napi::Object>();
579
769
  ApplyConfigFromObject(options);
580
770
 
581
- // Recreate parser with new configuration
771
+ // Recreate the streaming parser only if it has already been instantiated.
582
772
  if (parser_) {
583
773
  cisv_parser_destroy(parser_);
774
+ parser_ = nullptr;
775
+ ensureParser(env);
584
776
  }
585
-
586
- config_.field_cb = field_cb;
587
- config_.row_cb = row_cb;
588
- config_.error_cb = error_cb;
589
- config_.user = rc_;
590
-
591
- parser_ = cisv_parser_create_with_config(&config_);
592
777
  }
593
778
 
594
779
  // Get current configuration
@@ -692,6 +877,7 @@ public:
692
877
  } else {
693
878
  // Set environment for JS transforms
694
879
  rc_->env = env;
880
+ ensureParser(env);
695
881
  result = cisv_parser_parse_file(parser_, path.c_str());
696
882
  // Clear the environment reference after parsing
697
883
  rc_->env = nullptr;
@@ -714,16 +900,28 @@ public:
714
900
  throw Napi::Error::New(env, "Parser has been destroyed");
715
901
  }
716
902
 
717
- if (info.Length() != 1 || !info[0].IsString()) {
718
- throw Napi::TypeError::New(env, "Expected CSV string");
903
+ if (info.Length() != 1 || (!info[0].IsString() && !info[0].IsBuffer())) {
904
+ throw Napi::TypeError::New(env, "Expected CSV string or Buffer");
719
905
  }
720
906
 
721
- std::string content = info[0].As<Napi::String>();
907
+ const char *content_data = nullptr;
908
+ size_t content_len = 0;
909
+ std::string content_storage;
910
+
911
+ if (info[0].IsBuffer()) {
912
+ auto buffer = info[0].As<Napi::Buffer<char>>();
913
+ content_data = buffer.Data();
914
+ content_len = buffer.Length();
915
+ } else {
916
+ content_storage = info[0].As<Napi::String>();
917
+ content_data = content_storage.data();
918
+ content_len = content_storage.size();
919
+ }
722
920
 
723
921
  resetRowState();
724
922
 
725
923
  if (!hasTransforms()) {
726
- cisv_result_t *batch = cisv_parse_string_batch(content.c_str(), content.length(), &config_);
924
+ cisv_result_t *batch = cisv_parse_string_batch(content_data, content_len, &config_);
727
925
  if (!batch) {
728
926
  throw Napi::Error::New(env, "parse error: " + std::string(strerror(errno)));
729
927
  }
@@ -737,20 +935,68 @@ public:
737
935
  } else {
738
936
  // Set environment for JS transforms
739
937
  rc_->env = env;
938
+ ensureParser(env);
740
939
 
741
940
  // Write the string content as chunks
742
- cisv_parser_write(parser_, (const uint8_t*)content.c_str(), content.length());
941
+ cisv_parser_write(parser_, reinterpret_cast<const uint8_t*>(content_data), content_len);
743
942
  cisv_parser_end(parser_);
744
943
 
745
944
  // Clear the environment reference after parsing
746
945
  rc_->env = nullptr;
747
946
  }
748
947
 
749
- total_bytes_ = content.length();
948
+ total_bytes_ = content_len;
750
949
 
751
950
  return drainRows(env);
752
951
  }
753
952
 
953
+ Napi::Value ParseSyncParallel(const Napi::CallbackInfo &info) {
954
+ Napi::Env env = info.Env();
955
+
956
+ if (is_destroyed_) {
957
+ throw Napi::Error::New(env, "Parser has been destroyed");
958
+ }
959
+
960
+ if (info.Length() < 1 || !info[0].IsString()) {
961
+ throw Napi::TypeError::New(env, "Expected file path string");
962
+ }
963
+
964
+ int num_threads = 0;
965
+ if (info.Length() > 1 && !info[1].IsUndefined() && !info[1].IsNull()) {
966
+ if (!info[1].IsNumber()) {
967
+ throw Napi::TypeError::New(env, "numThreads must be a number");
968
+ }
969
+ num_threads = info[1].As<Napi::Number>().Int32Value();
970
+ }
971
+
972
+ std::string validation_error;
973
+ if (!validateNumThreads(num_threads, validation_error)) {
974
+ throw Napi::TypeError::New(env, validation_error);
975
+ }
976
+
977
+ std::string path = info[0].As<Napi::String>().Utf8Value();
978
+ int result_count = 0;
979
+ cisv_result_t **results = cisv_parse_file_parallel(
980
+ path.c_str(),
981
+ &config_,
982
+ num_threads,
983
+ &result_count);
984
+ if (!results) {
985
+ throw Napi::Error::New(env, "parse error: " + std::string(strerror(errno)));
986
+ }
987
+
988
+ std::vector<std::vector<std::string>> rows;
989
+ std::string error;
990
+ bool ok = collectParallelRows(results, result_count, rows, error);
991
+ cisv_results_free(results, result_count);
992
+
993
+ if (!ok) {
994
+ throw Napi::Error::New(env, error);
995
+ }
996
+
997
+ return rowsToJsArray(env, rows);
998
+ }
999
+
754
1000
  // Write chunk for streaming
755
1001
  void Write(const Napi::CallbackInfo &info) {
756
1002
  Napi::Env env = info.Env();
@@ -813,6 +1059,7 @@ public:
813
1059
  stream_buffering_active_ = false;
814
1060
  }
815
1061
 
1062
+ ensureParser(env);
816
1063
  cisv_parser_write(parser_, chunk_data, chunk_size);
817
1064
  total_bytes_ += chunk_size;
818
1065
  }
@@ -847,6 +1094,7 @@ public:
847
1094
  stream_buffering_active_ = false;
848
1095
  }
849
1096
 
1097
+ ensureParser(info.Env());
850
1098
  cisv_parser_end(parser_);
851
1099
  // Clear the environment reference after ending to prevent stale references
852
1100
  rc_->env = nullptr;
@@ -921,8 +1169,6 @@ public:
921
1169
  type = TRANSFORM_TO_INT;
922
1170
  } else if (transform_type == "to_float" || transform_type == "float") {
923
1171
  type = TRANSFORM_TO_FLOAT;
924
- } else if (transform_type == "hash_sha256" || transform_type == "sha256") {
925
- type = TRANSFORM_HASH_SHA256;
926
1172
  } else if (transform_type == "base64_encode" || transform_type == "base64") {
927
1173
  type = TRANSFORM_BASE64_ENCODE;
928
1174
  } else {
@@ -1032,8 +1278,6 @@ Napi::Value TransformByName(const Napi::CallbackInfo &info) {
1032
1278
  type = TRANSFORM_TO_INT;
1033
1279
  } else if (transform_type == "to_float" || transform_type == "float") {
1034
1280
  type = TRANSFORM_TO_FLOAT;
1035
- } else if (transform_type == "hash_sha256" || transform_type == "sha256") {
1036
- type = TRANSFORM_HASH_SHA256;
1037
1281
  } else if (transform_type == "base64_encode" || transform_type == "base64") {
1038
1282
  type = TRANSFORM_BASE64_ENCODE;
1039
1283
  } else {
@@ -1202,18 +1446,28 @@ Napi::Value RemoveTransformByName(const Napi::CallbackInfo &info) {
1202
1446
 
1203
1447
  std::string field_name = info[0].As<Napi::String>();
1204
1448
 
1449
+ int field_index = -1;
1450
+
1205
1451
  // Remove from JavaScript transforms by finding the field index
1206
1452
  if (rc_->pipeline && rc_->pipeline->header_fields) {
1207
1453
  for (size_t i = 0; i < rc_->pipeline->header_count; i++) {
1208
1454
  if (strcmp(rc_->pipeline->header_fields[i], field_name.c_str()) == 0) {
1209
- rc_->js_transforms.erase(i);
1455
+ field_index = static_cast<int>(i);
1456
+ auto it = rc_->js_transforms.find(field_index);
1457
+ if (it != rc_->js_transforms.end()) {
1458
+ if (!it->second.IsEmpty()) {
1459
+ it->second.Reset();
1460
+ }
1461
+ rc_->js_transforms.erase(it);
1462
+ }
1210
1463
  break;
1211
1464
  }
1212
1465
  }
1213
1466
  }
1214
1467
 
1215
- // TODO: Implement removal of C transforms by name in cisv_transformer.c
1216
- // For now, this only removes JS transforms
1468
+ if (field_index >= 0 && rc_->pipeline) {
1469
+ cisv_transform_pipeline_remove_field(rc_->pipeline, field_index);
1470
+ }
1217
1471
 
1218
1472
  return info.This();
1219
1473
  }
@@ -1232,10 +1486,17 @@ Napi::Value RemoveTransformByName(const Napi::CallbackInfo &info) {
1232
1486
  int field_index = info[0].As<Napi::Number>().Int32Value();
1233
1487
 
1234
1488
  // Remove from JavaScript transforms
1235
- rc_->js_transforms.erase(field_index);
1489
+ auto it = rc_->js_transforms.find(field_index);
1490
+ if (it != rc_->js_transforms.end()) {
1491
+ if (!it->second.IsEmpty()) {
1492
+ it->second.Reset();
1493
+ }
1494
+ rc_->js_transforms.erase(it);
1495
+ }
1236
1496
 
1237
- // TODO: Implement removal of C transforms in cisv_transformer.c
1238
- // For now, this only removes JS transforms
1497
+ if (rc_->pipeline) {
1498
+ cisv_transform_pipeline_remove_field(rc_->pipeline, field_index);
1499
+ }
1239
1500
 
1240
1501
  return info.This();
1241
1502
  }
@@ -1306,6 +1567,43 @@ Napi::Value RemoveTransformByName(const Napi::CallbackInfo &info) {
1306
1567
  return deferred.Promise();
1307
1568
  }
1308
1569
 
1570
+ Napi::Value ParseParallel(const Napi::CallbackInfo &info) {
1571
+ Napi::Env env = info.Env();
1572
+
1573
+ if (is_destroyed_) {
1574
+ throw Napi::Error::New(env, "Parser has been destroyed");
1575
+ }
1576
+
1577
+ if (info.Length() < 1 || !info[0].IsString()) {
1578
+ throw Napi::TypeError::New(env, "Expected file path string");
1579
+ }
1580
+
1581
+ int num_threads = 0;
1582
+ if (info.Length() > 1 && !info[1].IsUndefined() && !info[1].IsNull()) {
1583
+ if (!info[1].IsNumber()) {
1584
+ throw Napi::TypeError::New(env, "numThreads must be a number");
1585
+ }
1586
+ num_threads = info[1].As<Napi::Number>().Int32Value();
1587
+ }
1588
+
1589
+ auto deferred = Napi::Promise::Deferred::New(env);
1590
+ cisv_config worker_config = config_;
1591
+ worker_config.field_cb = nullptr;
1592
+ worker_config.row_cb = nullptr;
1593
+ worker_config.error_cb = nullptr;
1594
+ worker_config.user = nullptr;
1595
+
1596
+ auto *worker = new ParseFileParallelWorker(
1597
+ env,
1598
+ info[0].As<Napi::String>().Utf8Value(),
1599
+ worker_config,
1600
+ num_threads,
1601
+ deferred);
1602
+ worker->Queue();
1603
+
1604
+ return deferred.Promise();
1605
+ }
1606
+
1309
1607
  // Get information about registered transforms
1310
1608
  Napi::Value GetTransformInfo(const Napi::CallbackInfo &info) {
1311
1609
  Napi::Env env = info.Env();
@@ -1325,16 +1623,29 @@ Napi::Value RemoveTransformByName(const Napi::CallbackInfo &info) {
1325
1623
  result.Set("jsTransformCount", Napi::Number::New(env, js_transform_count));
1326
1624
 
1327
1625
  // List field indices with transforms
1328
- Napi::Array fields = Napi::Array::New(env);
1329
- size_t idx = 0;
1626
+ std::vector<int> field_indices;
1627
+ auto add_field_index = [&field_indices](int field_index) {
1628
+ if (std::find(field_indices.begin(), field_indices.end(), field_index) == field_indices.end()) {
1629
+ field_indices.push_back(field_index);
1630
+ }
1631
+ };
1330
1632
 
1331
- // Add JS transform field indices
1633
+ if (rc_ && rc_->pipeline) {
1634
+ for (size_t i = 0; i < rc_->pipeline->count; i++) {
1635
+ add_field_index(rc_->pipeline->transforms[i].field_index);
1636
+ }
1637
+ }
1332
1638
  if (rc_) {
1333
1639
  for (const auto& pair : rc_->js_transforms) {
1334
- fields[idx++] = Napi::Number::New(env, pair.first);
1640
+ add_field_index(pair.first);
1335
1641
  }
1336
1642
  }
1337
1643
 
1644
+ Napi::Array fields = Napi::Array::New(env, field_indices.size());
1645
+ for (size_t i = 0; i < field_indices.size(); i++) {
1646
+ fields[i] = Napi::Number::New(env, field_indices[i]);
1647
+ }
1648
+
1338
1649
  result.Set("fieldIndices", fields);
1339
1650
 
1340
1651
  return result;
@@ -1400,36 +1711,13 @@ Napi::Value RemoveTransformByName(const Napi::CallbackInfo &info) {
1400
1711
  cisv_config_init(&config);
1401
1712
 
1402
1713
  // Apply configuration if provided
1714
+ if (info.Length() > 1 && !info[1].IsNull() && !info[1].IsUndefined() && !info[1].IsObject()) {
1715
+ throw Napi::TypeError::New(env, "Config must be an object");
1716
+ }
1717
+
1403
1718
  if (info.Length() > 1 && info[1].IsObject()) {
1404
1719
  Napi::Object options = info[1].As<Napi::Object>();
1405
-
1406
- // Apply same configuration parsing logic
1407
- if (options.Has("delimiter")) {
1408
- std::string delim = options.Get("delimiter").As<Napi::String>();
1409
- if (!delim.empty()) config.delimiter = delim[0];
1410
- }
1411
-
1412
- if (options.Has("quote")) {
1413
- std::string quote = options.Get("quote").As<Napi::String>();
1414
- if (!quote.empty()) config.quote = quote[0];
1415
- }
1416
-
1417
- if (options.Has("comment")) {
1418
- std::string comment = options.Get("comment").As<Napi::String>();
1419
- if (!comment.empty()) config.comment = comment[0];
1420
- }
1421
-
1422
- if (options.Has("skipEmptyLines")) {
1423
- config.skip_empty_lines = options.Get("skipEmptyLines").As<Napi::Boolean>();
1424
- }
1425
-
1426
- if (options.Has("fromLine")) {
1427
- config.from_line = options.Get("fromLine").As<Napi::Number>().Int32Value();
1428
- }
1429
-
1430
- if (options.Has("toLine")) {
1431
- config.to_line = options.Get("toLine").As<Napi::Number>().Int32Value();
1432
- }
1720
+ ApplyConfigOptions(env, options, &config);
1433
1721
  }
1434
1722
 
1435
1723
  size_t count = cisv_parser_count_rows_with_config(path.c_str(), &config);
@@ -1532,6 +1820,22 @@ Napi::Value RemoveTransformByName(const Napi::CallbackInfo &info) {
1532
1820
  }
1533
1821
 
1534
1822
  private:
1823
+ void ensureParser(Napi::Env env) {
1824
+ if (parser_) {
1825
+ return;
1826
+ }
1827
+
1828
+ config_.field_cb = field_cb;
1829
+ config_.row_cb = row_cb;
1830
+ config_.error_cb = error_cb;
1831
+ config_.user = rc_;
1832
+
1833
+ parser_ = cisv_parser_create_with_config(&config_);
1834
+ if (!parser_) {
1835
+ throw Napi::Error::New(env, "Failed to create parser");
1836
+ }
1837
+ }
1838
+
1535
1839
  void clearBatchResult() {
1536
1840
  if (batch_result_) {
1537
1841
  cisv_result_free(batch_result_);
@@ -1559,6 +1863,7 @@ private:
1559
1863
  if (pending_stream_.empty()) {
1560
1864
  return;
1561
1865
  }
1866
+ ensureParser(Env());
1562
1867
  cisv_parser_write(
1563
1868
  parser_,
1564
1869
  reinterpret_cast<const uint8_t*>(pending_stream_.data()),
@@ -1640,7 +1945,7 @@ Napi::Object InitAll(Napi::Env env, Napi::Object exports) {
1640
1945
  CisvParser::Init(env, exports);
1641
1946
 
1642
1947
  // Add version info
1643
- exports.Set("version", Napi::String::New(env, "1.1.0"));
1948
+ exports.Set("version", Napi::String::New(env, "0.4.9"));
1644
1949
 
1645
1950
  // Add transform type constants
1646
1951
  Napi::Object transformTypes = Napi::Object::New(env);
@@ -1649,7 +1954,6 @@ Napi::Object InitAll(Napi::Env env, Napi::Object exports) {
1649
1954
  transformTypes.Set("TRIM", Napi::String::New(env, "trim"));
1650
1955
  transformTypes.Set("TO_INT", Napi::String::New(env, "to_int"));
1651
1956
  transformTypes.Set("TO_FLOAT", Napi::String::New(env, "to_float"));
1652
- transformTypes.Set("HASH_SHA256", Napi::String::New(env, "hash_sha256"));
1653
1957
  transformTypes.Set("BASE64_ENCODE", Napi::String::New(env, "base64_encode"));
1654
1958
  exports.Set("TransformType", transformTypes);
1655
1959
 
@@ -8,7 +8,6 @@ declare module 'cisv' {
8
8
  TRIM = 'trim',
9
9
  TO_INT = 'to_int',
10
10
  TO_FLOAT = 'to_float',
11
- HASH_SHA256 = 'hash_sha256',
12
11
  BASE64_ENCODE = 'base64_encode',
13
12
  CUSTOM = 'custom'
14
13
  }
@@ -64,6 +63,14 @@ declare module 'cisv' {
64
63
  */
65
64
  parseSync(path: string): string[][];
66
65
 
66
+ /**
67
+ * Parse CSV file synchronously using multiple worker threads.
68
+ * @param path Path to CSV file
69
+ * @param numThreads Number of threads to use (0 = auto-detect)
70
+ * @returns Array of rows with string values
71
+ */
72
+ parseSyncParallel(path: string, numThreads?: number): string[][];
73
+
67
74
  /**
68
75
  * Parse CSV file asynchronously
69
76
  * @param path Path to CSV file
@@ -72,11 +79,19 @@ declare module 'cisv' {
72
79
  parse(path: string): Promise<string[][]>;
73
80
 
74
81
  /**
75
- * Parse CSV string content
76
- * @param content CSV string content
82
+ * Parse CSV file asynchronously using multiple worker threads.
83
+ * @param path Path to CSV file
84
+ * @param numThreads Number of threads to use (0 = auto-detect)
85
+ * @returns Promise resolving to array of rows
86
+ */
87
+ parseParallel(path: string, numThreads?: number): Promise<string[][]>;
88
+
89
+ /**
90
+ * Parse CSV string or Buffer content
91
+ * @param content CSV content as string or Buffer
77
92
  * @returns Array of rows with string values
78
93
  */
79
- parseString(content: string): string[][];
94
+ parseString(content: Buffer | string): string[][];
80
95
 
81
96
  /**
82
97
  * Write chunk of CSV data (for streaming)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "cisv",
3
- "version": "0.4.7",
3
+ "version": "0.4.9",
4
4
  "description": "The csv parser on steroids.",
5
5
  "author": "sanix<s4nixd@gmail.com>",
6
6
  "main": "./build/Release/cisv.node",