cisv 0.4.6 → 0.4.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -11,7 +11,7 @@ npm install cisv
11
11
  From source in this repository:
12
12
 
13
13
  ```bash
14
- cd bindings/nodejs
14
+ cd cisv
15
15
  npm ci
16
16
  npm run build
17
17
  npm test
package/binding.gyp CHANGED
@@ -4,13 +4,13 @@
4
4
  "target_name": "cisv",
5
5
  "sources": [
6
6
  "cisv/cisv_addon.cc",
7
- "../../core/src/parser.c",
8
- "../../core/src/writer.c",
9
- "../../core/src/transformer.c"
7
+ "../core/core/src/parser.c",
8
+ "../core/core/src/writer.c",
9
+ "../core/core/src/transformer.c"
10
10
  ],
11
11
  "include_dirs": [
12
12
  "<!@(node -p \"require('node-addon-api').include\")",
13
- "../../core/include/",
13
+ "../core/core/include/",
14
14
  "cisv/"
15
15
  ],
16
16
  "dependencies": [
Binary file
@@ -10,6 +10,42 @@
10
10
 
11
11
  namespace {
12
12
 
13
+ static bool isInvalidConfigChar(char c) {
14
+ return c == '\0' || c == '\n' || c == '\r';
15
+ }
16
+
17
+ static void ValidateSingleCharOption(
18
+ Napi::Env env,
19
+ const Napi::Object &options,
20
+ const char *option_name,
21
+ char *target,
22
+ bool allow_null = false
23
+ ) {
24
+ if (!options.Has(option_name)) {
25
+ return;
26
+ }
27
+
28
+ Napi::Value value = options.Get(option_name);
29
+ if (allow_null && (value.IsNull() || value.IsUndefined())) {
30
+ *target = 0;
31
+ return;
32
+ }
33
+
34
+ if (!value.IsString()) {
35
+ throw Napi::TypeError::New(env, std::string(option_name) + " must be a string");
36
+ }
37
+
38
+ std::string raw = value.As<Napi::String>();
39
+ if (raw.size() != 1) {
40
+ throw Napi::TypeError::New(env, std::string(option_name) + " must be exactly 1 character");
41
+ }
42
+ if (isInvalidConfigChar(raw[0])) {
43
+ throw Napi::TypeError::New(env, std::string("Invalid ") + option_name + " character");
44
+ }
45
+
46
+ *target = raw[0];
47
+ }
48
+
13
49
  // =============================================================================
14
50
  // SECURITY: UTF-8 validation to prevent V8 crashes on invalid input
15
51
  // Invalid UTF-8 data can cause Napi::String::New to throw or crash
@@ -339,6 +375,69 @@ static void error_cb(void *user, int line, const char *msg) {
339
375
  fprintf(stderr, "CSV Parse Error at line %d: %s\n", line, msg);
340
376
  }
341
377
 
378
+ static bool validateNumThreads(int num_threads, std::string &error) {
379
+ if (num_threads < 0) {
380
+ error = "numThreads must be >= 0";
381
+ return false;
382
+ }
383
+ return true;
384
+ }
385
+
386
+ static bool collectParallelRows(
387
+ cisv_result_t **results,
388
+ int result_count,
389
+ std::vector<std::vector<std::string>> &rows,
390
+ std::string &error
391
+ ) {
392
+ size_t total_rows = 0;
393
+ for (int chunk = 0; chunk < result_count; chunk++) {
394
+ cisv_result_t *result = results[chunk];
395
+ if (!result) {
396
+ continue;
397
+ }
398
+ if (result->error_code != 0) {
399
+ error = result->error_message[0] ? result->error_message : "parse error";
400
+ return false;
401
+ }
402
+ total_rows += result->row_count;
403
+ }
404
+
405
+ rows.clear();
406
+ rows.reserve(total_rows);
407
+
408
+ for (int chunk = 0; chunk < result_count; chunk++) {
409
+ cisv_result_t *result = results[chunk];
410
+ if (!result) {
411
+ continue;
412
+ }
413
+
414
+ for (size_t i = 0; i < result->row_count; i++) {
415
+ cisv_row_t *row = &result->rows[i];
416
+ std::vector<std::string> out_row;
417
+ out_row.reserve(row->field_count);
418
+ for (size_t j = 0; j < row->field_count; j++) {
419
+ out_row.emplace_back(row->fields[j], row->field_lengths[j]);
420
+ }
421
+ rows.emplace_back(std::move(out_row));
422
+ }
423
+ }
424
+
425
+ return true;
426
+ }
427
+
428
+ static Napi::Array rowsToJsArray(Napi::Env env, const std::vector<std::vector<std::string>> &rows) {
429
+ Napi::Array out = Napi::Array::New(env, rows.size());
430
+ for (size_t i = 0; i < rows.size(); i++) {
431
+ Napi::Array row = Napi::Array::New(env, rows[i].size());
432
+ for (size_t j = 0; j < rows[i].size(); j++) {
433
+ const std::string &field = rows[i][j];
434
+ row[j] = SafeNewString(env, field.c_str(), field.length());
435
+ }
436
+ out[i] = row;
437
+ }
438
+ return out;
439
+ }
440
+
342
441
  class ParseFileWorker final : public Napi::AsyncWorker {
343
442
  public:
344
443
  ParseFileWorker(
@@ -383,19 +482,57 @@ public:
383
482
  }
384
483
 
385
484
  void OnOK() override {
386
- Napi::Env env = Env();
387
- Napi::Array out = Napi::Array::New(env, rows_.size());
388
-
389
- for (size_t i = 0; i < rows_.size(); i++) {
390
- Napi::Array row = Napi::Array::New(env, rows_[i].size());
391
- for (size_t j = 0; j < rows_[i].size(); j++) {
392
- const std::string &field = rows_[i][j];
393
- row[j] = SafeNewString(env, field.c_str(), field.length());
394
- }
395
- out[i] = row;
485
+ deferred_.Resolve(rowsToJsArray(Env(), rows_));
486
+ }
487
+
488
+ void OnError(const Napi::Error &e) override {
489
+ deferred_.Reject(e.Value());
490
+ }
491
+
492
+ private:
493
+ std::string path_;
494
+ cisv_config config_;
495
+ Napi::Promise::Deferred deferred_;
496
+ std::vector<std::vector<std::string>> rows_;
497
+ };
498
+
499
+ class ParseFileParallelWorker final : public Napi::AsyncWorker {
500
+ public:
501
+ ParseFileParallelWorker(
502
+ Napi::Env env,
503
+ std::string path,
504
+ cisv_config config,
505
+ int num_threads,
506
+ Napi::Promise::Deferred deferred
507
+ ) : Napi::AsyncWorker(env),
508
+ path_(std::move(path)),
509
+ config_(config),
510
+ num_threads_(num_threads),
511
+ deferred_(deferred) {}
512
+
513
+ void Execute() override {
514
+ if (!validateNumThreads(num_threads_, error_)) {
515
+ SetError(error_);
516
+ return;
396
517
  }
397
518
 
398
- deferred_.Resolve(out);
519
+ int result_count = 0;
520
+ cisv_result_t **results = cisv_parse_file_parallel(path_.c_str(), &config_, num_threads_, &result_count);
521
+ if (!results) {
522
+ SetError("parse error: " + std::string(strerror(errno)));
523
+ return;
524
+ }
525
+
526
+ bool ok = collectParallelRows(results, result_count, rows_, error_);
527
+ cisv_results_free(results, result_count);
528
+
529
+ if (!ok) {
530
+ SetError(error_);
531
+ }
532
+ }
533
+
534
+ void OnOK() override {
535
+ deferred_.Resolve(rowsToJsArray(Env(), rows_));
399
536
  }
400
537
 
401
538
  void OnError(const Napi::Error &e) override {
@@ -405,8 +542,10 @@ public:
405
542
  private:
406
543
  std::string path_;
407
544
  cisv_config config_;
545
+ int num_threads_;
408
546
  Napi::Promise::Deferred deferred_;
409
547
  std::vector<std::vector<std::string>> rows_;
548
+ std::string error_;
410
549
  };
411
550
 
412
551
  } // namespace
@@ -416,7 +555,9 @@ public:
416
555
  static Napi::Object Init(Napi::Env env, Napi::Object exports) {
417
556
  Napi::Function func = DefineClass(env, "cisvParser", {
418
557
  InstanceMethod("parseSync", &CisvParser::ParseSync),
558
+ InstanceMethod("parseSyncParallel", &CisvParser::ParseSyncParallel),
419
559
  InstanceMethod("parse", &CisvParser::ParseAsync),
560
+ InstanceMethod("parseParallel", &CisvParser::ParseParallel),
420
561
  InstanceMethod("parseString", &CisvParser::ParseString),
421
562
  InstanceMethod("write", &CisvParser::Write),
422
563
  InstanceMethod("end", &CisvParser::End),
@@ -449,6 +590,7 @@ public:
449
590
 
450
591
  CisvParser(const Napi::CallbackInfo &info) : Napi::ObjectWrap<CisvParser>(info) {
451
592
  rc_ = new RowCollector();
593
+ parser_ = nullptr;
452
594
  parse_time_ = 0;
453
595
  total_bytes_ = 0;
454
596
  is_destroyed_ = false;
@@ -472,9 +614,6 @@ public:
472
614
  config_.row_cb = row_cb;
473
615
  config_.error_cb = error_cb;
474
616
  config_.user = rc_;
475
-
476
- // Create parser with configuration
477
- parser_ = cisv_parser_create_with_config(&config_);
478
617
  }
479
618
 
480
619
  ~CisvParser() {
@@ -483,51 +622,19 @@ public:
483
622
 
484
623
  // Apply configuration from JavaScript object
485
624
  void ApplyConfigFromObject(Napi::Object options) {
625
+ Napi::Env env = options.Env();
626
+
486
627
  // Delimiter
487
- if (options.Has("delimiter")) {
488
- Napi::Value delim = options.Get("delimiter");
489
- if (delim.IsString()) {
490
- std::string delim_str = delim.As<Napi::String>();
491
- if (!delim_str.empty()) {
492
- config_.delimiter = delim_str[0];
493
- }
494
- }
495
- }
628
+ ValidateSingleCharOption(env, options, "delimiter", &config_.delimiter);
496
629
 
497
630
  // Quote character
498
- if (options.Has("quote")) {
499
- Napi::Value quote = options.Get("quote");
500
- if (quote.IsString()) {
501
- std::string quote_str = quote.As<Napi::String>();
502
- if (!quote_str.empty()) {
503
- config_.quote = quote_str[0];
504
- }
505
- }
506
- }
631
+ ValidateSingleCharOption(env, options, "quote", &config_.quote);
507
632
 
508
633
  // Escape character
509
- if (options.Has("escape")) {
510
- Napi::Value escape = options.Get("escape");
511
- if (escape.IsString()) {
512
- std::string escape_str = escape.As<Napi::String>();
513
- if (!escape_str.empty()) {
514
- config_.escape = escape_str[0];
515
- }
516
- } else if (escape.IsNull() || escape.IsUndefined()) {
517
- config_.escape = 0; // RFC4180 style
518
- }
519
- }
634
+ ValidateSingleCharOption(env, options, "escape", &config_.escape, true);
520
635
 
521
636
  // Comment character
522
- if (options.Has("comment")) {
523
- Napi::Value comment = options.Get("comment");
524
- if (comment.IsString()) {
525
- std::string comment_str = comment.As<Napi::String>();
526
- if (!comment_str.empty()) {
527
- config_.comment = comment_str[0];
528
- }
529
- }
530
- }
637
+ ValidateSingleCharOption(env, options, "comment", &config_.comment, true);
531
638
 
532
639
  // Boolean options
533
640
  if (options.Has("skipEmptyLines")) {
@@ -578,17 +685,12 @@ public:
578
685
  Napi::Object options = info[0].As<Napi::Object>();
579
686
  ApplyConfigFromObject(options);
580
687
 
581
- // Recreate parser with new configuration
688
+ // Recreate the streaming parser only if it has already been instantiated.
582
689
  if (parser_) {
583
690
  cisv_parser_destroy(parser_);
691
+ parser_ = nullptr;
692
+ ensureParser(env);
584
693
  }
585
-
586
- config_.field_cb = field_cb;
587
- config_.row_cb = row_cb;
588
- config_.error_cb = error_cb;
589
- config_.user = rc_;
590
-
591
- parser_ = cisv_parser_create_with_config(&config_);
592
694
  }
593
695
 
594
696
  // Get current configuration
@@ -692,6 +794,7 @@ public:
692
794
  } else {
693
795
  // Set environment for JS transforms
694
796
  rc_->env = env;
797
+ ensureParser(env);
695
798
  result = cisv_parser_parse_file(parser_, path.c_str());
696
799
  // Clear the environment reference after parsing
697
800
  rc_->env = nullptr;
@@ -737,6 +840,7 @@ public:
737
840
  } else {
738
841
  // Set environment for JS transforms
739
842
  rc_->env = env;
843
+ ensureParser(env);
740
844
 
741
845
  // Write the string content as chunks
742
846
  cisv_parser_write(parser_, (const uint8_t*)content.c_str(), content.length());
@@ -751,6 +855,53 @@ public:
751
855
  return drainRows(env);
752
856
  }
753
857
 
858
+ Napi::Value ParseSyncParallel(const Napi::CallbackInfo &info) {
859
+ Napi::Env env = info.Env();
860
+
861
+ if (is_destroyed_) {
862
+ throw Napi::Error::New(env, "Parser has been destroyed");
863
+ }
864
+
865
+ if (info.Length() < 1 || !info[0].IsString()) {
866
+ throw Napi::TypeError::New(env, "Expected file path string");
867
+ }
868
+
869
+ int num_threads = 0;
870
+ if (info.Length() > 1 && !info[1].IsUndefined() && !info[1].IsNull()) {
871
+ if (!info[1].IsNumber()) {
872
+ throw Napi::TypeError::New(env, "numThreads must be a number");
873
+ }
874
+ num_threads = info[1].As<Napi::Number>().Int32Value();
875
+ }
876
+
877
+ std::string validation_error;
878
+ if (!validateNumThreads(num_threads, validation_error)) {
879
+ throw Napi::TypeError::New(env, validation_error);
880
+ }
881
+
882
+ std::string path = info[0].As<Napi::String>().Utf8Value();
883
+ int result_count = 0;
884
+ cisv_result_t **results = cisv_parse_file_parallel(
885
+ path.c_str(),
886
+ &config_,
887
+ num_threads,
888
+ &result_count);
889
+ if (!results) {
890
+ throw Napi::Error::New(env, "parse error: " + std::string(strerror(errno)));
891
+ }
892
+
893
+ std::vector<std::vector<std::string>> rows;
894
+ std::string error;
895
+ bool ok = collectParallelRows(results, result_count, rows, error);
896
+ cisv_results_free(results, result_count);
897
+
898
+ if (!ok) {
899
+ throw Napi::Error::New(env, error);
900
+ }
901
+
902
+ return rowsToJsArray(env, rows);
903
+ }
904
+
754
905
  // Write chunk for streaming
755
906
  void Write(const Napi::CallbackInfo &info) {
756
907
  Napi::Env env = info.Env();
@@ -813,6 +964,7 @@ public:
813
964
  stream_buffering_active_ = false;
814
965
  }
815
966
 
967
+ ensureParser(env);
816
968
  cisv_parser_write(parser_, chunk_data, chunk_size);
817
969
  total_bytes_ += chunk_size;
818
970
  }
@@ -847,6 +999,7 @@ public:
847
999
  stream_buffering_active_ = false;
848
1000
  }
849
1001
 
1002
+ ensureParser(info.Env());
850
1003
  cisv_parser_end(parser_);
851
1004
  // Clear the environment reference after ending to prevent stale references
852
1005
  rc_->env = nullptr;
@@ -1306,6 +1459,43 @@ Napi::Value RemoveTransformByName(const Napi::CallbackInfo &info) {
1306
1459
  return deferred.Promise();
1307
1460
  }
1308
1461
 
1462
+ Napi::Value ParseParallel(const Napi::CallbackInfo &info) {
1463
+ Napi::Env env = info.Env();
1464
+
1465
+ if (is_destroyed_) {
1466
+ throw Napi::Error::New(env, "Parser has been destroyed");
1467
+ }
1468
+
1469
+ if (info.Length() < 1 || !info[0].IsString()) {
1470
+ throw Napi::TypeError::New(env, "Expected file path string");
1471
+ }
1472
+
1473
+ int num_threads = 0;
1474
+ if (info.Length() > 1 && !info[1].IsUndefined() && !info[1].IsNull()) {
1475
+ if (!info[1].IsNumber()) {
1476
+ throw Napi::TypeError::New(env, "numThreads must be a number");
1477
+ }
1478
+ num_threads = info[1].As<Napi::Number>().Int32Value();
1479
+ }
1480
+
1481
+ auto deferred = Napi::Promise::Deferred::New(env);
1482
+ cisv_config worker_config = config_;
1483
+ worker_config.field_cb = nullptr;
1484
+ worker_config.row_cb = nullptr;
1485
+ worker_config.error_cb = nullptr;
1486
+ worker_config.user = nullptr;
1487
+
1488
+ auto *worker = new ParseFileParallelWorker(
1489
+ env,
1490
+ info[0].As<Napi::String>().Utf8Value(),
1491
+ worker_config,
1492
+ num_threads,
1493
+ deferred);
1494
+ worker->Queue();
1495
+
1496
+ return deferred.Promise();
1497
+ }
1498
+
1309
1499
  // Get information about registered transforms
1310
1500
  Napi::Value GetTransformInfo(const Napi::CallbackInfo &info) {
1311
1501
  Napi::Env env = info.Env();
@@ -1404,20 +1594,9 @@ Napi::Value RemoveTransformByName(const Napi::CallbackInfo &info) {
1404
1594
  Napi::Object options = info[1].As<Napi::Object>();
1405
1595
 
1406
1596
  // Apply same configuration parsing logic
1407
- if (options.Has("delimiter")) {
1408
- std::string delim = options.Get("delimiter").As<Napi::String>();
1409
- if (!delim.empty()) config.delimiter = delim[0];
1410
- }
1411
-
1412
- if (options.Has("quote")) {
1413
- std::string quote = options.Get("quote").As<Napi::String>();
1414
- if (!quote.empty()) config.quote = quote[0];
1415
- }
1416
-
1417
- if (options.Has("comment")) {
1418
- std::string comment = options.Get("comment").As<Napi::String>();
1419
- if (!comment.empty()) config.comment = comment[0];
1420
- }
1597
+ ValidateSingleCharOption(env, options, "delimiter", &config.delimiter);
1598
+ ValidateSingleCharOption(env, options, "quote", &config.quote);
1599
+ ValidateSingleCharOption(env, options, "comment", &config.comment, true);
1421
1600
 
1422
1601
  if (options.Has("skipEmptyLines")) {
1423
1602
  config.skip_empty_lines = options.Get("skipEmptyLines").As<Napi::Boolean>();
@@ -1532,6 +1711,22 @@ Napi::Value RemoveTransformByName(const Napi::CallbackInfo &info) {
1532
1711
  }
1533
1712
 
1534
1713
  private:
1714
+ void ensureParser(Napi::Env env) {
1715
+ if (parser_) {
1716
+ return;
1717
+ }
1718
+
1719
+ config_.field_cb = field_cb;
1720
+ config_.row_cb = row_cb;
1721
+ config_.error_cb = error_cb;
1722
+ config_.user = rc_;
1723
+
1724
+ parser_ = cisv_parser_create_with_config(&config_);
1725
+ if (!parser_) {
1726
+ throw Napi::Error::New(env, "Failed to create parser");
1727
+ }
1728
+ }
1729
+
1535
1730
  void clearBatchResult() {
1536
1731
  if (batch_result_) {
1537
1732
  cisv_result_free(batch_result_);
@@ -1559,6 +1754,7 @@ private:
1559
1754
  if (pending_stream_.empty()) {
1560
1755
  return;
1561
1756
  }
1757
+ ensureParser(Env());
1562
1758
  cisv_parser_write(
1563
1759
  parser_,
1564
1760
  reinterpret_cast<const uint8_t*>(pending_stream_.data()),
@@ -1640,7 +1836,7 @@ Napi::Object InitAll(Napi::Env env, Napi::Object exports) {
1640
1836
  CisvParser::Init(env, exports);
1641
1837
 
1642
1838
  // Add version info
1643
- exports.Set("version", Napi::String::New(env, "1.1.0"));
1839
+ exports.Set("version", Napi::String::New(env, "0.4.8"));
1644
1840
 
1645
1841
  // Add transform type constants
1646
1842
  Napi::Object transformTypes = Napi::Object::New(env);
@@ -64,6 +64,14 @@ declare module 'cisv' {
64
64
  */
65
65
  parseSync(path: string): string[][];
66
66
 
67
+ /**
68
+ * Parse CSV file synchronously using multiple worker threads.
69
+ * @param path Path to CSV file
70
+ * @param numThreads Number of threads to use (0 = auto-detect)
71
+ * @returns Array of rows with string values
72
+ */
73
+ parseSyncParallel(path: string, numThreads?: number): string[][];
74
+
67
75
  /**
68
76
  * Parse CSV file asynchronously
69
77
  * @param path Path to CSV file
@@ -71,6 +79,14 @@ declare module 'cisv' {
71
79
  */
72
80
  parse(path: string): Promise<string[][]>;
73
81
 
82
+ /**
83
+ * Parse CSV file asynchronously using multiple worker threads.
84
+ * @param path Path to CSV file
85
+ * @param numThreads Number of threads to use (0 = auto-detect)
86
+ * @returns Promise resolving to array of rows
87
+ */
88
+ parseParallel(path: string, numThreads?: number): Promise<string[][]>;
89
+
74
90
  /**
75
91
  * Parse CSV string content
76
92
  * @param content CSV string content
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "cisv",
3
- "version": "0.4.6",
3
+ "version": "0.4.8",
4
4
  "description": "The csv parser on steroids.",
5
5
  "author": "sanix<s4nixd@gmail.com>",
6
6
  "main": "./build/Release/cisv.node",
@@ -22,10 +22,10 @@
22
22
  "scripts": {
23
23
  "install": "node-gyp rebuild",
24
24
  "build": "node-gyp rebuild",
25
- "test": "mocha ./tests/*.test.js && bash ../../scripts/test_transform.sh",
25
+ "test": "mocha ./tests/*.test.js && node ./test_transform_leak_test.js",
26
26
  "test:build": "npm run build && npm run test",
27
- "benchmark-js": "node ./benchmark.js",
28
- "benchmark-core": "bash ../../scripts/benchmark_cli_reader.sh",
27
+ "benchmark-js": "node ./benchmarks/benchmark.js",
28
+ "benchmark-core": "node ./benchmarks/benchmark.js",
29
29
  "lint": "clang-format -i cisv/*.{cc,h}",
30
30
  "pretest:typescript": "tsc --project tsconfig.json --noEmit",
31
31
  "build:types": "tsc --project tsconfig.json --declaration --emitDeclarationOnly",