cisv 0.4.8 → 0.4.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -83,7 +83,6 @@ Built-in transform names:
83
83
  - `trim`
84
84
  - `to_int` (or `int`)
85
85
  - `to_float` (or `float`)
86
- - `hash_sha256` (or `sha256`)
87
86
  - `base64_encode` (or `base64`)
88
87
 
89
88
  ## Examples
package/binding.gyp CHANGED
@@ -21,7 +21,6 @@
21
21
  "cflags_cc!": [ "-fno-exceptions" ],
22
22
  "cflags_cc": ["-O3"],
23
23
  "defines": [
24
- "NAPI_DISABLE_CPP_EXCEPTIONS",
25
24
  "NAPI_VERSION=6"
26
25
  ],
27
26
  "conditions": [
Binary file
@@ -5,8 +5,11 @@
5
5
  #include <memory>
6
6
  #include <string>
7
7
  #include <unordered_map>
8
+ #include <algorithm>
8
9
  #include <chrono>
9
10
  #include <cstdint>
11
+ #include <climits>
12
+ #include <cmath>
10
13
 
11
14
  namespace {
12
15
 
@@ -46,6 +49,126 @@ static void ValidateSingleCharOption(
46
49
  *target = raw[0];
47
50
  }
48
51
 
52
+ static double MaxJsSafeInteger() {
53
+ return 9007199254740991.0;
54
+ }
55
+
56
+ static bool IsWholeNumber(double value) {
57
+ return std::isfinite(value) && std::floor(value) == value;
58
+ }
59
+
60
+ static void ApplyBooleanOption(
61
+ Napi::Env env,
62
+ const Napi::Object &options,
63
+ const char *option_name,
64
+ bool *target
65
+ ) {
66
+ if (!options.Has(option_name)) {
67
+ return;
68
+ }
69
+
70
+ Napi::Value value = options.Get(option_name);
71
+ if (!value.IsBoolean()) {
72
+ throw Napi::TypeError::New(env, std::string(option_name) + " must be a boolean");
73
+ }
74
+
75
+ *target = value.As<Napi::Boolean>();
76
+ }
77
+
78
+ static void ApplySizeOption(
79
+ Napi::Env env,
80
+ const Napi::Object &options,
81
+ const char *option_name,
82
+ size_t *target
83
+ ) {
84
+ if (!options.Has(option_name)) {
85
+ return;
86
+ }
87
+
88
+ Napi::Value value = options.Get(option_name);
89
+ if (value.IsNull() || value.IsUndefined()) {
90
+ *target = 0;
91
+ return;
92
+ }
93
+ if (!value.IsNumber()) {
94
+ throw Napi::TypeError::New(env, std::string(option_name) + " must be a number");
95
+ }
96
+
97
+ double raw = value.As<Napi::Number>().DoubleValue();
98
+ double max_value = static_cast<double>(SIZE_MAX);
99
+ if (!IsWholeNumber(raw) || raw < 0.0 || raw > max_value || raw > MaxJsSafeInteger()) {
100
+ throw Napi::RangeError::New(env, std::string(option_name) + " is out of range");
101
+ }
102
+
103
+ *target = static_cast<size_t>(raw);
104
+ }
105
+
106
+ static void ApplyLineOption(
107
+ Napi::Env env,
108
+ const Napi::Object &options,
109
+ const char *option_name,
110
+ int *target
111
+ ) {
112
+ if (!options.Has(option_name)) {
113
+ return;
114
+ }
115
+
116
+ Napi::Value value = options.Get(option_name);
117
+ if (value.IsNull() || value.IsUndefined()) {
118
+ *target = 0;
119
+ return;
120
+ }
121
+ if (!value.IsNumber()) {
122
+ throw Napi::TypeError::New(env, std::string(option_name) + " must be a number");
123
+ }
124
+
125
+ double raw = value.As<Napi::Number>().DoubleValue();
126
+ if (!IsWholeNumber(raw) || raw < 0.0 || raw > static_cast<double>(INT_MAX)) {
127
+ throw Napi::RangeError::New(env, std::string(option_name) + " is out of range");
128
+ }
129
+
130
+ *target = static_cast<int>(raw);
131
+ }
132
+
133
+ static void ValidateConfigSemantics(Napi::Env env, const cisv_config &config) {
134
+ if (config.delimiter == config.quote) {
135
+ throw Napi::TypeError::New(env, "delimiter and quote cannot be the same");
136
+ }
137
+ if (config.escape != '\0' && config.escape == config.delimiter) {
138
+ throw Napi::TypeError::New(env, "escape and delimiter cannot be the same");
139
+ }
140
+ if (config.escape != '\0' && config.escape == config.quote) {
141
+ throw Napi::TypeError::New(env, "escape and quote cannot be the same");
142
+ }
143
+ if (config.comment != '\0' &&
144
+ (config.comment == config.delimiter || config.comment == config.quote || config.comment == config.escape)) {
145
+ throw Napi::TypeError::New(env, "comment cannot conflict with delimiter, quote, or escape");
146
+ }
147
+
148
+ int effective_from = config.from_line > 0 ? config.from_line : 1;
149
+ if (config.to_line != 0 && config.to_line < effective_from) {
150
+ throw Napi::RangeError::New(env, "toLine must be >= fromLine");
151
+ }
152
+ }
153
+
154
+ static void ApplyConfigOptions(Napi::Env env, const Napi::Object &options, cisv_config *config) {
155
+ ValidateSingleCharOption(env, options, "delimiter", &config->delimiter);
156
+ ValidateSingleCharOption(env, options, "quote", &config->quote);
157
+ ValidateSingleCharOption(env, options, "escape", &config->escape, true);
158
+ ValidateSingleCharOption(env, options, "comment", &config->comment, true);
159
+
160
+ ApplyBooleanOption(env, options, "skipEmptyLines", &config->skip_empty_lines);
161
+ ApplyBooleanOption(env, options, "trim", &config->trim);
162
+ ApplyBooleanOption(env, options, "relaxed", &config->relaxed);
163
+ ApplyBooleanOption(env, options, "skipLinesWithError", &config->skip_lines_with_error);
164
+
165
+ ApplySizeOption(env, options, "maxRowSize", &config->max_row_size);
166
+ ApplyLineOption(env, options, "fromLine", &config->from_line);
167
+ ApplyLineOption(env, options, "toLine", &config->to_line);
168
+
169
+ ValidateConfigSemantics(env, *config);
170
+ }
171
+
49
172
  // =============================================================================
50
173
  // SECURITY: UTF-8 validation to prevent V8 crashes on invalid input
51
174
  // Invalid UTF-8 data can cause Napi::String::New to throw or crash
@@ -144,7 +267,7 @@ static napi_value SafeNewStringValue(napi_env env, const char* data, size_t len)
144
267
  if (napi_create_string_latin1(env, data, len, &short_value) == napi_ok && short_value) {
145
268
  return short_value;
146
269
  }
147
- } else {
270
+ } else if (isValidUtf8(data, len)) {
148
271
  if (napi_create_string_utf8(env, data, len, &short_value) == napi_ok && short_value) {
149
272
  return short_value;
150
273
  }
@@ -589,7 +712,7 @@ public:
589
712
  }
590
713
 
591
714
  CisvParser(const Napi::CallbackInfo &info) : Napi::ObjectWrap<CisvParser>(info) {
592
- rc_ = new RowCollector();
715
+ rc_ = nullptr;
593
716
  parser_ = nullptr;
594
717
  parse_time_ = 0;
595
718
  total_bytes_ = 0;
@@ -609,6 +732,8 @@ public:
609
732
  ApplyConfigFromObject(options);
610
733
  }
611
734
 
735
+ rc_ = new RowCollector();
736
+
612
737
  // Set callbacks
613
738
  config_.field_cb = field_cb;
614
739
  config_.row_cb = row_cb;
@@ -623,51 +748,9 @@ public:
623
748
  // Apply configuration from JavaScript object
624
749
  void ApplyConfigFromObject(Napi::Object options) {
625
750
  Napi::Env env = options.Env();
626
-
627
- // Delimiter
628
- ValidateSingleCharOption(env, options, "delimiter", &config_.delimiter);
629
-
630
- // Quote character
631
- ValidateSingleCharOption(env, options, "quote", &config_.quote);
632
-
633
- // Escape character
634
- ValidateSingleCharOption(env, options, "escape", &config_.escape, true);
635
-
636
- // Comment character
637
- ValidateSingleCharOption(env, options, "comment", &config_.comment, true);
638
-
639
- // Boolean options
640
- if (options.Has("skipEmptyLines")) {
641
- config_.skip_empty_lines = options.Get("skipEmptyLines").As<Napi::Boolean>();
642
- }
643
-
644
- if (options.Has("trim")) {
645
- config_.trim = options.Get("trim").As<Napi::Boolean>();
646
- }
647
-
648
- if (options.Has("relaxed")) {
649
- config_.relaxed = options.Get("relaxed").As<Napi::Boolean>();
650
- }
651
-
652
- if (options.Has("skipLinesWithError")) {
653
- config_.skip_lines_with_error = options.Get("skipLinesWithError").As<Napi::Boolean>();
654
- }
655
-
656
- // Numeric options
657
- if (options.Has("maxRowSize")) {
658
- Napi::Value val = options.Get("maxRowSize");
659
- if (!val.IsNull() && !val.IsUndefined()) {
660
- config_.max_row_size = val.As<Napi::Number>().Uint32Value();
661
- }
662
- }
663
-
664
- if (options.Has("fromLine")) {
665
- config_.from_line = options.Get("fromLine").As<Napi::Number>().Int32Value();
666
- }
667
-
668
- if (options.Has("toLine")) {
669
- config_.to_line = options.Get("toLine").As<Napi::Number>().Int32Value();
670
- }
751
+ cisv_config next = config_;
752
+ ApplyConfigOptions(env, options, &next);
753
+ config_ = next;
671
754
  }
672
755
 
673
756
  // Set configuration after creation
@@ -817,16 +900,28 @@ public:
817
900
  throw Napi::Error::New(env, "Parser has been destroyed");
818
901
  }
819
902
 
820
- if (info.Length() != 1 || !info[0].IsString()) {
821
- throw Napi::TypeError::New(env, "Expected CSV string");
903
+ if (info.Length() != 1 || (!info[0].IsString() && !info[0].IsBuffer())) {
904
+ throw Napi::TypeError::New(env, "Expected CSV string or Buffer");
822
905
  }
823
906
 
824
- std::string content = info[0].As<Napi::String>();
907
+ const char *content_data = nullptr;
908
+ size_t content_len = 0;
909
+ std::string content_storage;
910
+
911
+ if (info[0].IsBuffer()) {
912
+ auto buffer = info[0].As<Napi::Buffer<char>>();
913
+ content_data = buffer.Data();
914
+ content_len = buffer.Length();
915
+ } else {
916
+ content_storage = info[0].As<Napi::String>();
917
+ content_data = content_storage.data();
918
+ content_len = content_storage.size();
919
+ }
825
920
 
826
921
  resetRowState();
827
922
 
828
923
  if (!hasTransforms()) {
829
- cisv_result_t *batch = cisv_parse_string_batch(content.c_str(), content.length(), &config_);
924
+ cisv_result_t *batch = cisv_parse_string_batch(content_data, content_len, &config_);
830
925
  if (!batch) {
831
926
  throw Napi::Error::New(env, "parse error: " + std::string(strerror(errno)));
832
927
  }
@@ -843,14 +938,14 @@ public:
843
938
  ensureParser(env);
844
939
 
845
940
  // Write the string content as chunks
846
- cisv_parser_write(parser_, (const uint8_t*)content.c_str(), content.length());
941
+ cisv_parser_write(parser_, reinterpret_cast<const uint8_t*>(content_data), content_len);
847
942
  cisv_parser_end(parser_);
848
943
 
849
944
  // Clear the environment reference after parsing
850
945
  rc_->env = nullptr;
851
946
  }
852
947
 
853
- total_bytes_ = content.length();
948
+ total_bytes_ = content_len;
854
949
 
855
950
  return drainRows(env);
856
951
  }
@@ -1074,8 +1169,6 @@ public:
1074
1169
  type = TRANSFORM_TO_INT;
1075
1170
  } else if (transform_type == "to_float" || transform_type == "float") {
1076
1171
  type = TRANSFORM_TO_FLOAT;
1077
- } else if (transform_type == "hash_sha256" || transform_type == "sha256") {
1078
- type = TRANSFORM_HASH_SHA256;
1079
1172
  } else if (transform_type == "base64_encode" || transform_type == "base64") {
1080
1173
  type = TRANSFORM_BASE64_ENCODE;
1081
1174
  } else {
@@ -1185,8 +1278,6 @@ Napi::Value TransformByName(const Napi::CallbackInfo &info) {
1185
1278
  type = TRANSFORM_TO_INT;
1186
1279
  } else if (transform_type == "to_float" || transform_type == "float") {
1187
1280
  type = TRANSFORM_TO_FLOAT;
1188
- } else if (transform_type == "hash_sha256" || transform_type == "sha256") {
1189
- type = TRANSFORM_HASH_SHA256;
1190
1281
  } else if (transform_type == "base64_encode" || transform_type == "base64") {
1191
1282
  type = TRANSFORM_BASE64_ENCODE;
1192
1283
  } else {
@@ -1355,18 +1446,28 @@ Napi::Value RemoveTransformByName(const Napi::CallbackInfo &info) {
1355
1446
 
1356
1447
  std::string field_name = info[0].As<Napi::String>();
1357
1448
 
1449
+ int field_index = -1;
1450
+
1358
1451
  // Remove from JavaScript transforms by finding the field index
1359
1452
  if (rc_->pipeline && rc_->pipeline->header_fields) {
1360
1453
  for (size_t i = 0; i < rc_->pipeline->header_count; i++) {
1361
1454
  if (strcmp(rc_->pipeline->header_fields[i], field_name.c_str()) == 0) {
1362
- rc_->js_transforms.erase(i);
1455
+ field_index = static_cast<int>(i);
1456
+ auto it = rc_->js_transforms.find(field_index);
1457
+ if (it != rc_->js_transforms.end()) {
1458
+ if (!it->second.IsEmpty()) {
1459
+ it->second.Reset();
1460
+ }
1461
+ rc_->js_transforms.erase(it);
1462
+ }
1363
1463
  break;
1364
1464
  }
1365
1465
  }
1366
1466
  }
1367
1467
 
1368
- // TODO: Implement removal of C transforms by name in cisv_transformer.c
1369
- // For now, this only removes JS transforms
1468
+ if (field_index >= 0 && rc_->pipeline) {
1469
+ cisv_transform_pipeline_remove_field(rc_->pipeline, field_index);
1470
+ }
1370
1471
 
1371
1472
  return info.This();
1372
1473
  }
@@ -1385,10 +1486,17 @@ Napi::Value RemoveTransformByName(const Napi::CallbackInfo &info) {
1385
1486
  int field_index = info[0].As<Napi::Number>().Int32Value();
1386
1487
 
1387
1488
  // Remove from JavaScript transforms
1388
- rc_->js_transforms.erase(field_index);
1489
+ auto it = rc_->js_transforms.find(field_index);
1490
+ if (it != rc_->js_transforms.end()) {
1491
+ if (!it->second.IsEmpty()) {
1492
+ it->second.Reset();
1493
+ }
1494
+ rc_->js_transforms.erase(it);
1495
+ }
1389
1496
 
1390
- // TODO: Implement removal of C transforms in cisv_transformer.c
1391
- // For now, this only removes JS transforms
1497
+ if (rc_->pipeline) {
1498
+ cisv_transform_pipeline_remove_field(rc_->pipeline, field_index);
1499
+ }
1392
1500
 
1393
1501
  return info.This();
1394
1502
  }
@@ -1515,16 +1623,29 @@ Napi::Value RemoveTransformByName(const Napi::CallbackInfo &info) {
1515
1623
  result.Set("jsTransformCount", Napi::Number::New(env, js_transform_count));
1516
1624
 
1517
1625
  // List field indices with transforms
1518
- Napi::Array fields = Napi::Array::New(env);
1519
- size_t idx = 0;
1626
+ std::vector<int> field_indices;
1627
+ auto add_field_index = [&field_indices](int field_index) {
1628
+ if (std::find(field_indices.begin(), field_indices.end(), field_index) == field_indices.end()) {
1629
+ field_indices.push_back(field_index);
1630
+ }
1631
+ };
1520
1632
 
1521
- // Add JS transform field indices
1633
+ if (rc_ && rc_->pipeline) {
1634
+ for (size_t i = 0; i < rc_->pipeline->count; i++) {
1635
+ add_field_index(rc_->pipeline->transforms[i].field_index);
1636
+ }
1637
+ }
1522
1638
  if (rc_) {
1523
1639
  for (const auto& pair : rc_->js_transforms) {
1524
- fields[idx++] = Napi::Number::New(env, pair.first);
1640
+ add_field_index(pair.first);
1525
1641
  }
1526
1642
  }
1527
1643
 
1644
+ Napi::Array fields = Napi::Array::New(env, field_indices.size());
1645
+ for (size_t i = 0; i < field_indices.size(); i++) {
1646
+ fields[i] = Napi::Number::New(env, field_indices[i]);
1647
+ }
1648
+
1528
1649
  result.Set("fieldIndices", fields);
1529
1650
 
1530
1651
  return result;
@@ -1590,25 +1711,13 @@ Napi::Value RemoveTransformByName(const Napi::CallbackInfo &info) {
1590
1711
  cisv_config_init(&config);
1591
1712
 
1592
1713
  // Apply configuration if provided
1714
+ if (info.Length() > 1 && !info[1].IsNull() && !info[1].IsUndefined() && !info[1].IsObject()) {
1715
+ throw Napi::TypeError::New(env, "Config must be an object");
1716
+ }
1717
+
1593
1718
  if (info.Length() > 1 && info[1].IsObject()) {
1594
1719
  Napi::Object options = info[1].As<Napi::Object>();
1595
-
1596
- // Apply same configuration parsing logic
1597
- ValidateSingleCharOption(env, options, "delimiter", &config.delimiter);
1598
- ValidateSingleCharOption(env, options, "quote", &config.quote);
1599
- ValidateSingleCharOption(env, options, "comment", &config.comment, true);
1600
-
1601
- if (options.Has("skipEmptyLines")) {
1602
- config.skip_empty_lines = options.Get("skipEmptyLines").As<Napi::Boolean>();
1603
- }
1604
-
1605
- if (options.Has("fromLine")) {
1606
- config.from_line = options.Get("fromLine").As<Napi::Number>().Int32Value();
1607
- }
1608
-
1609
- if (options.Has("toLine")) {
1610
- config.to_line = options.Get("toLine").As<Napi::Number>().Int32Value();
1611
- }
1720
+ ApplyConfigOptions(env, options, &config);
1612
1721
  }
1613
1722
 
1614
1723
  size_t count = cisv_parser_count_rows_with_config(path.c_str(), &config);
@@ -1836,7 +1945,7 @@ Napi::Object InitAll(Napi::Env env, Napi::Object exports) {
1836
1945
  CisvParser::Init(env, exports);
1837
1946
 
1838
1947
  // Add version info
1839
- exports.Set("version", Napi::String::New(env, "0.4.8"));
1948
+ exports.Set("version", Napi::String::New(env, "0.4.9"));
1840
1949
 
1841
1950
  // Add transform type constants
1842
1951
  Napi::Object transformTypes = Napi::Object::New(env);
@@ -1845,7 +1954,6 @@ Napi::Object InitAll(Napi::Env env, Napi::Object exports) {
1845
1954
  transformTypes.Set("TRIM", Napi::String::New(env, "trim"));
1846
1955
  transformTypes.Set("TO_INT", Napi::String::New(env, "to_int"));
1847
1956
  transformTypes.Set("TO_FLOAT", Napi::String::New(env, "to_float"));
1848
- transformTypes.Set("HASH_SHA256", Napi::String::New(env, "hash_sha256"));
1849
1957
  transformTypes.Set("BASE64_ENCODE", Napi::String::New(env, "base64_encode"));
1850
1958
  exports.Set("TransformType", transformTypes);
1851
1959
 
@@ -8,7 +8,6 @@ declare module 'cisv' {
8
8
  TRIM = 'trim',
9
9
  TO_INT = 'to_int',
10
10
  TO_FLOAT = 'to_float',
11
- HASH_SHA256 = 'hash_sha256',
12
11
  BASE64_ENCODE = 'base64_encode',
13
12
  CUSTOM = 'custom'
14
13
  }
@@ -88,11 +87,11 @@ declare module 'cisv' {
88
87
  parseParallel(path: string, numThreads?: number): Promise<string[][]>;
89
88
 
90
89
  /**
91
- * Parse CSV string content
92
- * @param content CSV string content
90
+ * Parse CSV string or Buffer content
91
+ * @param content CSV content as string or Buffer
93
92
  * @returns Array of rows with string values
94
93
  */
95
- parseString(content: string): string[][];
94
+ parseString(content: Buffer | string): string[][];
96
95
 
97
96
  /**
98
97
  * Write chunk of CSV data (for streaming)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "cisv",
3
- "version": "0.4.8",
3
+ "version": "0.4.9",
4
4
  "description": "The csv parser on steroids.",
5
5
  "author": "sanix<s4nixd@gmail.com>",
6
6
  "main": "./build/Release/cisv.node",