cisv 0.0.60 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -5,79 +5,6 @@
5
5
  ![Size](https://deno.bundlejs.com/badge?q=spring-easing)
6
6
  ![Downloads](https://badgen.net/npm/dw/cisv)
7
7
 
8
- > # DISCLAIMER
9
- >
10
- > This csv parser does not covers all quotes/comments edge cases, it is meant for now to be just extremly fast, thus not PROD ready yet.
11
-
12
- Cisv is a csv parser on steroids... literally.
13
- It's a high-performance CSV parser/writer leveraging SIMD instructions and zero-copy memory mapping. Available as both a Node.js native addon and standalone CLI tool with extensive configuration options.
14
-
15
- I wrote about basics in a blog post, you can read here :https://sanixdk.xyz/blogs/how-i-accidentally-created-the-fastest-csv-parser-ever-made.
16
-
17
- ## CLI BENCHMARKS WITH DOCKER
18
-
19
- ```bash
20
- $ docker build -t cisv-benchmark .
21
- ```
22
-
23
- To run them... choosing some specs for the container to size resources, you can :
24
-
25
- ```bash
26
- $ docker run --rm \
27
- --cpus="2.0" \
28
- --memory="4g" \
29
- --memory-swap="4g" \
30
- --cpu-shares=1024 \
31
- --security-opt \
32
- seccomp=unconfined \
33
- cisv-benchmark
34
- ```
35
-
36
- ## BENCHMARKS
37
-
38
- Benchmarks comparison with existing popular tools,
39
- cf pipeline you can check : (https://github.com/Sanix-Darker/cisv/actions/runs/18422464917/job/52498590205) at step "Publish to npm".
40
-
41
- ### SYNCHRONOUS RESULTS
42
-
43
- | Library | Speed (MB/s) | Avg Time (ms) | Operations/sec |
44
- |--------------------|--------------|---------------|----------------|
45
- | cisv (sync) | 71.10 | 0.01 | 153723 |
46
- | csv-parse (sync) | 18.76 | 0.02 | 40563 |
47
- | papaparse (sync) | 27.97 | 0.02 | 60467 |
48
- | udsv (sync) | 69.81 | 0.01 | 150930 |
49
- | d3-dsv (sync) | 98.11 | 0.00 | 212117 |
50
-
51
- ### SYNCHRONOUS RESULTS (WITH DATA ACCESS)
52
-
53
- | Library | Speed (MB/s) | Avg Time (ms) | Operations/sec |
54
- |--------------------|--------------|---------------|----------------|
55
- | cisv (sync) | 104.58 | 0.00 | 226108 |
56
- | csv-parse (sync) | 16.87 | 0.03 | 36482 |
57
- | papaparse (sync) | 28.13 | 0.02 | 60807 |
58
- | udsv (sync) | 69.29 | 0.01 | 149812 |
59
- | d3-dsv (sync) | 96.32 | 0.00 | 208248 |
60
-
61
- ### ASYNCHRONOUS RESULTS
62
-
63
- | Library | Speed (MB/s) | Avg Time (ms) | Operations/sec |
64
- |--------------------------|--------------|---------------|----------------|
65
- | cisv (async/stream) | 98.36 | 0.00 | 212662 |
66
- | papaparse (async/stream) | 21.56 | 0.02 | 46609 |
67
- | fast-csv (async/stream) | 10.09 | 0.05 | 21817 |
68
- | neat-csv (async/promise) | 9.20 | 0.05 | 19898 |
69
- | udsv (async/stream) | 51.74 | 0.01 | 111858 |
70
-
71
- ### ASYNCHRONOUS RESULTS (WITH DATA ACCESS)
72
-
73
- | Library | Speed (MB/s) | Avg Time (ms) | Operations/sec |
74
- |--------------------------|--------------|---------------|----------------|
75
- | cisv (async/stream) | 27.50 | 0.02 | 59460 |
76
- | papaparse (async/stream) | 21.98 | 0.02 | 47513 |
77
- | fast-csv (async/stream) | 10.05 | 0.05 | 21719 |
78
- | neat-csv (async/promise) | 9.58 | 0.05 | 20711 |
79
- | udsv (async/stream) | 53.26 | 0.01 | 115146 |
80
-
81
8
  ## INSTALLATION
82
9
 
83
10
  ### NODE.JS PACKAGE
@@ -85,18 +12,9 @@ cf pipeline you can check : (https://github.com/Sanix-Darker/cisv/actions/runs/1
85
12
  npm install cisv
86
13
  ```
87
14
 
88
- ### CLI TOOL (FROM SOURCE)
89
- ```bash
90
- git clone https://github.com/sanix-darker/cisv
91
- cd cisv
92
- make cli
93
- sudo make install-cli
94
- ```
95
-
96
15
  ### BUILD FROM SOURCE (NODE.JS ADDON)
97
16
  ```bash
98
17
  npm install -g node-gyp
99
- make build
100
18
  ```
101
19
 
102
20
  ## QUICK START
@@ -118,20 +36,6 @@ const tsv_parser = new cisvParser({
118
36
  const tsv_rows = tsv_parser.parseSync('./data.tsv');
119
37
  ```
120
38
 
121
- ### CLI
122
- ```bash
123
- # Basic parsing
124
- cisv_bin data.csv
125
-
126
- # Parse TSV file
127
- cisv_bin -d $'\t' data.tsv
128
-
129
- # Parse with custom quote and trim
130
- cisv_bin -q "'" -t data.csv
131
-
132
- # Skip comment lines
133
- cisv_bin -m '#' config.csv
134
- ```
135
39
 
136
40
  ## CONFIGURATION OPTIONS
137
41
 
@@ -347,116 +251,3 @@ const tsvCount = cisvParser.countRowsWithConfig('data.tsv', {
347
251
  toLine: 1000
348
252
  });
349
253
  ```
350
-
351
- ## CLI USAGE
352
-
353
- ### PARSING OPTIONS
354
-
355
- ```bash
356
- cisv_bin [OPTIONS] [FILE]
357
-
358
- General Options:
359
- -h, --help Show help message
360
- -v, --version Show version
361
- -o, --output FILE Write to FILE instead of stdout
362
- -b, --benchmark Run benchmark mode
363
-
364
- Configuration Options:
365
- -d, --delimiter DELIM Field delimiter (default: ,)
366
- -q, --quote CHAR Quote character (default: ")
367
- -e, --escape CHAR Escape character (default: RFC4180 style)
368
- -m, --comment CHAR Comment character (default: none)
369
- -t, --trim Trim whitespace from fields
370
- -r, --relaxed Use relaxed parsing rules
371
- --skip-empty Skip empty lines
372
- --skip-errors Skip lines with parse errors
373
- --max-row SIZE Maximum row size in bytes
374
- --from-line N Start from line N (1-based)
375
- --to-line N Stop at line N
376
-
377
- Processing Options:
378
- -s, --select COLS Select columns (comma-separated indices)
379
- -c, --count Show only row count
380
- --head N Show first N rows
381
- --tail N Show last N rows
382
- ```
383
-
384
- ### EXAMPLES
385
-
386
- ```bash
387
- # Parse TSV file
388
- cisv_bin -d $'\t' data.tsv
389
-
390
- # Parse CSV with semicolon delimiter and single quotes
391
- cisv_bin -d ';' -q "'" european.csv
392
-
393
- # Skip comment lines starting with #
394
- cisv_bin -m '#' config.csv
395
-
396
- # Trim whitespace and skip empty lines
397
- cisv_bin -t --skip-empty messy.csv
398
-
399
- # Parse lines 100-1000 only
400
- cisv_bin --from-line 100 --to-line 1000 large.csv
401
-
402
- # Select specific columns
403
- cisv_bin -s 0,2,5,7 data.csv
404
-
405
- # Count rows with specific configuration
406
- cisv_bin -c -d $'\t' --skip-empty data.tsv
407
-
408
- # Benchmark with custom delimiter
409
- cisv_bin -b -d ';' european.csv
410
- ```
411
-
412
- ### WRITING
413
-
414
- ```bash
415
- cisv_bin write [OPTIONS]
416
-
417
- Options:
418
- -g, --generate N Generate N rows of test data
419
- -o, --output FILE Output file
420
- -d, --delimiter DELIM Field delimiter
421
- -Q, --quote-all Quote all fields
422
- -r, --crlf Use CRLF line endings
423
- -n, --null TEXT Null representation
424
- -b, --benchmark Benchmark mode
425
- ```
426
-
427
- ## TECHNICAL ARCHITECTURE
428
-
429
- - **SIMD Processing**: AVX-512 (64-byte vectors) or AVX2 (32-byte vectors) for parallel processing
430
- - **Memory Mapping**: Direct kernel-to-userspace zero-copy with `mmap()`
431
- - **Optimized Buffering**: 1MB ring buffer sized for L3 cache efficiency
432
- - **Compiler Optimizations**: LTO and architecture-specific tuning with `-march=native`
433
- - **Configurable Parsing**: RFC 4180 compliant with extensive customization options
434
-
435
- ## FEATURES (PROS)
436
-
437
- - RFC 4180 compliant with configurable extensions
438
- - Handles quoted fields with embedded delimiters
439
- - Support for multiple CSV dialects (TSV, PSV, etc.)
440
- - Comment line support
441
- - Field trimming and empty line handling
442
- - Line range parsing for large files
443
- - Streaming API for unlimited file sizes
444
- - Safe fallback for non-x86 architectures
445
- - High-performance CSV writer with SIMD optimization
446
- - Row counting without full parsing
447
-
448
- ## LIMITATIONS
449
-
450
- - Linux/Unix support only (optimized for x86_64 CPU)
451
- - Windows support planned for future release
452
-
453
- ## LICENSE
454
-
455
- MIT © [sanix-darker](https://github.com/sanix-darker)
456
-
457
- ## ACKNOWLEDGMENTS
458
-
459
- Inspired by:
460
- - [simdjson](https://github.com/simdjson/simdjson) - Parsing gigabytes of JSON per second
461
- - [xsv](https://github.com/BurntSushi/xsv) - Fast CSV command line toolkit
462
- - [rust-csv](https://github.com/BurntSushi/rust-csv) - CSV parser for Rust
package/binding.gyp CHANGED
@@ -4,20 +4,22 @@
4
4
  "target_name": "cisv",
5
5
  "sources": [
6
6
  "cisv/cisv_addon.cc",
7
- "cisv/cisv_parser.c",
8
- "cisv/cisv_writer.c",
9
- "cisv/cisv_transformer.c"
7
+ "../../core/src/parser.c",
8
+ "../../core/src/writer.c",
9
+ "../../core/src/transformer.c"
10
10
  ],
11
11
  "include_dirs": [
12
12
  "<!@(node -p \"require('node-addon-api').include\")",
13
+ "../../core/include/",
13
14
  "cisv/"
14
15
  ],
15
16
  "dependencies": [
16
17
  "<!(node -p \"require('node-addon-api').gyp\")"
17
18
  ],
18
19
  "cflags!": [ "-fno-exceptions" ],
19
- "cflags": ["-O3", "-mavx2"],
20
+ "cflags": ["-O3"],
20
21
  "cflags_cc!": [ "-fno-exceptions" ],
22
+ "cflags_cc": ["-O3"],
21
23
  "defines": [
22
24
  "NAPI_DISABLE_CPP_EXCEPTIONS",
23
25
  "NAPI_VERSION=6"
@@ -28,27 +30,39 @@
28
30
  "-O3",
29
31
  "-march=native",
30
32
  "-mtune=native",
31
- "-ffast-math"
33
+ "-ffast-math",
34
+ "-funroll-loops",
35
+ "-fomit-frame-pointer",
36
+ "-flto"
32
37
  ],
33
38
  "cflags_cc": [
34
39
  "-O3",
35
40
  "-march=native",
36
41
  "-mtune=native",
37
- "-ffast-math"
38
- ]
42
+ "-ffast-math",
43
+ "-funroll-loops",
44
+ "-fomit-frame-pointer",
45
+ "-flto"
46
+ ],
47
+ "ldflags": ["-flto"]
39
48
  }],
40
49
  ["OS=='mac'", {
41
50
  "xcode_settings": {
42
51
  "GCC_OPTIMIZATION_LEVEL": "3",
52
+ "LLVM_LTO": "YES",
43
53
  "OTHER_CFLAGS": [
44
54
  "-march=native",
45
55
  "-mtune=native",
46
- "-ffast-math"
56
+ "-ffast-math",
57
+ "-funroll-loops",
58
+ "-fomit-frame-pointer"
47
59
  ],
48
60
  "OTHER_CPLUSPLUSFLAGS": [
49
61
  "-march=native",
50
62
  "-mtune=native",
51
- "-ffast-math"
63
+ "-ffast-math",
64
+ "-funroll-loops",
65
+ "-fomit-frame-pointer"
52
66
  ]
53
67
  }
54
68
  }]
Binary file
@@ -1,6 +1,6 @@
1
1
  #include <napi.h>
2
- #include "cisv_parser.h"
3
- #include "cisv_transformer.h"
2
+ #include "cisv/parser.h"
3
+ #include "cisv/transformer.h"
4
4
  #include <vector>
5
5
  #include <memory>
6
6
  #include <string>
@@ -9,6 +9,109 @@
9
9
 
10
10
  namespace {
11
11
 
12
+ // =============================================================================
13
+ // SECURITY: UTF-8 validation to prevent V8 crashes on invalid input
14
+ // Invalid UTF-8 data can cause Napi::String::New to throw or crash
15
+ // =============================================================================
16
+ static bool isValidUtf8(const char* data, size_t len) {
17
+ const unsigned char* bytes = reinterpret_cast<const unsigned char*>(data);
18
+ size_t i = 0;
19
+
20
+ while (i < len) {
21
+ unsigned char c = bytes[i];
22
+
23
+ if (c < 0x80) {
24
+ // ASCII: single byte (0x00-0x7F)
25
+ i++;
26
+ } else if ((c & 0xE0) == 0xC0) {
27
+ // 2-byte sequence (0xC0-0xDF)
28
+ if (i + 1 >= len) return false;
29
+ if ((bytes[i + 1] & 0xC0) != 0x80) return false;
30
+ // Overlong check: C0-C1 are invalid
31
+ if (c < 0xC2) return false;
32
+ i += 2;
33
+ } else if ((c & 0xF0) == 0xE0) {
34
+ // 3-byte sequence (0xE0-0xEF)
35
+ if (i + 2 >= len) return false;
36
+ if ((bytes[i + 1] & 0xC0) != 0x80) return false;
37
+ if ((bytes[i + 2] & 0xC0) != 0x80) return false;
38
+ // Overlong check for E0
39
+ if (c == 0xE0 && bytes[i + 1] < 0xA0) return false;
40
+ // Surrogate check (U+D800-U+DFFF)
41
+ if (c == 0xED && bytes[i + 1] >= 0xA0) return false;
42
+ i += 3;
43
+ } else if ((c & 0xF8) == 0xF0) {
44
+ // 4-byte sequence (0xF0-0xF7)
45
+ if (i + 3 >= len) return false;
46
+ if ((bytes[i + 1] & 0xC0) != 0x80) return false;
47
+ if ((bytes[i + 2] & 0xC0) != 0x80) return false;
48
+ if ((bytes[i + 3] & 0xC0) != 0x80) return false;
49
+ // Overlong check for F0
50
+ if (c == 0xF0 && bytes[i + 1] < 0x90) return false;
51
+ // Check for code points > U+10FFFF
52
+ if (c == 0xF4 && bytes[i + 1] >= 0x90) return false;
53
+ if (c > 0xF4) return false;
54
+ i += 4;
55
+ } else {
56
+ // Invalid leading byte
57
+ return false;
58
+ }
59
+ }
60
+ return true;
61
+ }
62
+
63
+ // Create Napi::String with UTF-8 validation (safe version)
64
+ // Falls back to replacement character representation for invalid UTF-8
65
+ static Napi::String SafeNewString(Napi::Env env, const char* data, size_t len) {
66
+ if (isValidUtf8(data, len)) {
67
+ return Napi::String::New(env, data, len);
68
+ }
69
+
70
+ // Invalid UTF-8 - replace invalid bytes with replacement character
71
+ // This prevents V8 crashes while preserving data visibility
72
+ std::string safe_str;
73
+ safe_str.reserve(len);
74
+
75
+ const unsigned char* bytes = reinterpret_cast<const unsigned char*>(data);
76
+ size_t i = 0;
77
+
78
+ while (i < len) {
79
+ unsigned char c = bytes[i];
80
+
81
+ if (c < 0x80) {
82
+ safe_str += static_cast<char>(c);
83
+ i++;
84
+ } else if ((c & 0xE0) == 0xC0 && i + 1 < len &&
85
+ (bytes[i + 1] & 0xC0) == 0x80 && c >= 0xC2) {
86
+ safe_str += static_cast<char>(c);
87
+ safe_str += static_cast<char>(bytes[i + 1]);
88
+ i += 2;
89
+ } else if ((c & 0xF0) == 0xE0 && i + 2 < len &&
90
+ (bytes[i + 1] & 0xC0) == 0x80 &&
91
+ (bytes[i + 2] & 0xC0) == 0x80) {
92
+ safe_str += static_cast<char>(c);
93
+ safe_str += static_cast<char>(bytes[i + 1]);
94
+ safe_str += static_cast<char>(bytes[i + 2]);
95
+ i += 3;
96
+ } else if ((c & 0xF8) == 0xF0 && i + 3 < len &&
97
+ (bytes[i + 1] & 0xC0) == 0x80 &&
98
+ (bytes[i + 2] & 0xC0) == 0x80 &&
99
+ (bytes[i + 3] & 0xC0) == 0x80 && c <= 0xF4) {
100
+ safe_str += static_cast<char>(c);
101
+ safe_str += static_cast<char>(bytes[i + 1]);
102
+ safe_str += static_cast<char>(bytes[i + 2]);
103
+ safe_str += static_cast<char>(bytes[i + 3]);
104
+ i += 4;
105
+ } else {
106
+ // Invalid byte - use UTF-8 replacement character U+FFFD
107
+ safe_str += "\xEF\xBF\xBD";
108
+ i++;
109
+ }
110
+ }
111
+
112
+ return Napi::String::New(env, safe_str);
113
+ }
114
+
12
115
  // Extended RowCollector that handles transforms
13
116
  struct RowCollector {
14
117
  std::vector<std::string> current;
@@ -34,6 +137,13 @@ struct RowCollector {
34
137
  cisv_transform_pipeline_destroy(pipeline);
35
138
  pipeline = nullptr;
36
139
  }
140
+ // SECURITY FIX: Properly release all persistent references to prevent memory leak
141
+ // Napi::Persistent references must be Reset() before being destroyed
142
+ for (auto& pair : js_transforms) {
143
+ if (!pair.second.IsEmpty()) {
144
+ pair.second.Reset(); // Release the persistent handle
145
+ }
146
+ }
37
147
  js_transforms.clear();
38
148
  rows.clear();
39
149
  current.clear();
@@ -76,7 +186,8 @@ struct RowCollector {
76
186
  auto it = js_transforms.find(field_index);
77
187
  if (it != js_transforms.end() && !it->second.IsEmpty()) {
78
188
  try {
79
- Napi::String input = Napi::String::New(env, result);
189
+ // SECURITY: Use safe string creation to handle invalid UTF-8
190
+ Napi::String input = SafeNewString(env, result.c_str(), result.length());
80
191
  Napi::Number field = Napi::Number::New(env, field_index);
81
192
 
82
193
  Napi::Value js_result = it->second.Call({input, field});
@@ -84,8 +195,15 @@ struct RowCollector {
84
195
  if (js_result.IsString()) {
85
196
  result = js_result.As<Napi::String>().Utf8Value();
86
197
  }
198
+ } catch (const Napi::Error& e) {
199
+ // Keep original result but log the error
200
+ fprintf(stderr, "CISV: JS transform error for field %d: %s\n",
201
+ field_index, e.Message().c_str());
202
+ } catch (const std::exception& e) {
203
+ fprintf(stderr, "CISV: C++ exception in JS transform: %s\n", e.what());
87
204
  } catch (...) {
88
- // Keep original result if JS transform fails
205
+ fprintf(stderr, "CISV: Unknown exception in JS transform for field %d\n",
206
+ field_index);
89
207
  }
90
208
  }
91
209
 
@@ -93,7 +211,8 @@ struct RowCollector {
93
211
  auto it_all = js_transforms.find(-1);
94
212
  if (it_all != js_transforms.end() && !it_all->second.IsEmpty()) {
95
213
  try {
96
- Napi::String input = Napi::String::New(env, result);
214
+ // SECURITY: Use safe string creation to handle invalid UTF-8
215
+ Napi::String input = SafeNewString(env, result.c_str(), result.length());
97
216
  Napi::Number field = Napi::Number::New(env, field_index);
98
217
 
99
218
  Napi::Value js_result = it_all->second.Call({input, field});
@@ -101,8 +220,13 @@ struct RowCollector {
101
220
  if (js_result.IsString()) {
102
221
  result = js_result.As<Napi::String>().Utf8Value();
103
222
  }
223
+ } catch (const Napi::Error& e) {
224
+ // Keep original result but log the error
225
+ fprintf(stderr, "CISV: JS transform error (all fields): %s\n", e.Message().c_str());
226
+ } catch (const std::exception& e) {
227
+ fprintf(stderr, "CISV: C++ exception in JS transform: %s\n", e.what());
104
228
  } catch (...) {
105
- // Keep original result if JS transform fails
229
+ fprintf(stderr, "CISV: Unknown exception in JS transform (all fields)\n");
106
230
  }
107
231
  }
108
232
  }
@@ -114,9 +238,19 @@ struct RowCollector {
114
238
  static void field_cb(void *user, const char *data, size_t len) {
115
239
  auto *rc = reinterpret_cast<RowCollector *>(user);
116
240
 
117
- // Apply all transforms (C and JS)
241
+ // Fast path: no transforms - avoid unnecessary string copies
242
+ bool has_c_transforms = rc->pipeline && rc->pipeline->count > 0;
243
+ bool has_js_transforms = !rc->js_transforms.empty();
244
+
245
+ if (!has_c_transforms && !has_js_transforms) {
246
+ rc->current.emplace_back(data, len);
247
+ rc->current_field_index++;
248
+ return;
249
+ }
250
+
251
+ // Slow path: apply transforms
118
252
  std::string transformed = rc->applyTransforms(data, len, rc->current_field_index);
119
- rc->current.emplace_back(transformed);
253
+ rc->current.emplace_back(std::move(transformed));
120
254
  rc->current_field_index++;
121
255
  }
122
256
 
@@ -457,15 +591,25 @@ public:
457
591
 
458
592
  if (info[0].IsBuffer()) {
459
593
  auto buf = info[0].As<Napi::Buffer<uint8_t>>();
460
- cisv_parser_write(parser_, buf.Data(), buf.Length());
461
- total_bytes_ += buf.Length();
594
+ size_t buf_len = buf.Length();
595
+ // Check for overflow before adding to total_bytes_
596
+ if (buf_len > SIZE_MAX - total_bytes_) {
597
+ throw Napi::Error::New(env, "Total bytes would overflow");
598
+ }
599
+ cisv_parser_write(parser_, buf.Data(), buf_len);
600
+ total_bytes_ += buf_len;
462
601
  return;
463
602
  }
464
603
 
465
604
  if (info[0].IsString()) {
466
605
  std::string chunk = info[0].As<Napi::String>();
467
- cisv_parser_write(parser_, reinterpret_cast<const uint8_t*>(chunk.data()), chunk.size());
468
- total_bytes_ += chunk.size();
606
+ size_t chunk_size = chunk.size();
607
+ // Check for overflow before adding to total_bytes_
608
+ if (chunk_size > SIZE_MAX - total_bytes_) {
609
+ throw Napi::Error::New(env, "Total bytes would overflow");
610
+ }
611
+ cisv_parser_write(parser_, reinterpret_cast<const uint8_t*>(chunk.data()), chunk_size);
612
+ total_bytes_ += chunk_size;
469
613
  return;
470
614
  }
471
615
 
@@ -475,9 +619,10 @@ public:
475
619
  void End(const Napi::CallbackInfo &info) {
476
620
  if (!is_destroyed_) {
477
621
  cisv_parser_end(parser_);
478
- // Clear the environment reference after ending
479
- // FIXME: the transformer may need this
480
- // rc_->env = nullptr;
622
+ // Clear the environment reference after ending to prevent stale references
623
+ rc_->env = nullptr;
624
+ // Note: JS transforms stored in rc_->js_transforms remain valid
625
+ // as they are Persistent references managed by the addon lifecycle
481
626
  }
482
627
  }
483
628
 
@@ -554,6 +699,9 @@ public:
554
699
  if (info.Length() >= 3 && info[2].IsObject()) {
555
700
  Napi::Object context_obj = info[2].As<Napi::Object>();
556
701
  ctx = (cisv_transform_context_t*)calloc(1, sizeof(cisv_transform_context_t));
702
+ if (!ctx) {
703
+ throw Napi::Error::New(env, "Memory allocation failed for transform context");
704
+ }
557
705
 
558
706
  // Extract context properties if they exist
559
707
  if (context_obj.Has("key")) {
@@ -561,6 +709,10 @@ public:
561
709
  if (key_val.IsString()) {
562
710
  std::string key = key_val.As<Napi::String>();
563
711
  ctx->key = strdup(key.c_str());
712
+ if (!ctx->key) {
713
+ free(ctx);
714
+ throw Napi::Error::New(env, "Memory allocation failed for key");
715
+ }
564
716
  ctx->key_len = key.length();
565
717
  }
566
718
  }
@@ -570,6 +722,11 @@ public:
570
722
  if (iv_val.IsString()) {
571
723
  std::string iv = iv_val.As<Napi::String>();
572
724
  ctx->iv = strdup(iv.c_str());
725
+ if (!ctx->iv) {
726
+ if (ctx->key) free((void*)ctx->key);
727
+ free(ctx);
728
+ throw Napi::Error::New(env, "Memory allocation failed for iv");
729
+ }
573
730
  ctx->iv_len = iv.length();
574
731
  }
575
732
  }
@@ -653,6 +810,9 @@ Napi::Value TransformByName(const Napi::CallbackInfo &info) {
653
810
  if (info.Length() >= 3 && info[2].IsObject()) {
654
811
  Napi::Object context_obj = info[2].As<Napi::Object>();
655
812
  ctx = (cisv_transform_context_t*)calloc(1, sizeof(cisv_transform_context_t));
813
+ if (!ctx) {
814
+ throw Napi::Error::New(env, "Memory allocation failed for transform context");
815
+ }
656
816
 
657
817
  // Extract context properties if they exist
658
818
  if (context_obj.Has("key")) {
@@ -660,6 +820,10 @@ Napi::Value TransformByName(const Napi::CallbackInfo &info) {
660
820
  if (key_val.IsString()) {
661
821
  std::string key = key_val.As<Napi::String>();
662
822
  ctx->key = strdup(key.c_str());
823
+ if (!ctx->key) {
824
+ free(ctx);
825
+ throw Napi::Error::New(env, "Memory allocation failed for key");
826
+ }
663
827
  ctx->key_len = key.length();
664
828
  }
665
829
  }
@@ -669,6 +833,11 @@ Napi::Value TransformByName(const Napi::CallbackInfo &info) {
669
833
  if (iv_val.IsString()) {
670
834
  std::string iv = iv_val.As<Napi::String>();
671
835
  ctx->iv = strdup(iv.c_str());
836
+ if (!ctx->iv) {
837
+ if (ctx->key) free((void*)ctx->key);
838
+ free(ctx);
839
+ throw Napi::Error::New(env, "Memory allocation failed for iv");
840
+ }
672
841
  ctx->iv_len = iv.length();
673
842
  }
674
843
  }
@@ -722,14 +891,31 @@ void SetHeaderFields(const Napi::CallbackInfo &info) {
722
891
  throw Napi::Error::New(env, "Memory allocation failed");
723
892
  }
724
893
 
894
+ // Initialize to NULL for safe cleanup on partial failure
895
+ for (size_t i = 0; i < field_count; i++) {
896
+ c_field_names[i] = nullptr;
897
+ }
898
+
725
899
  for (size_t i = 0; i < field_count; i++) {
726
900
  Napi::Value field_val = field_names[i];
727
901
  if (!field_val.IsString()) {
902
+ // Clean up all previously allocated strings
903
+ for (size_t j = 0; j < i; j++) {
904
+ if (c_field_names[j]) free((void*)c_field_names[j]);
905
+ }
728
906
  free(c_field_names);
729
907
  throw Napi::TypeError::New(env, "Field names must be strings");
730
908
  }
731
909
  std::string field_str = field_val.As<Napi::String>();
732
910
  c_field_names[i] = strdup(field_str.c_str());
911
+ if (!c_field_names[i]) {
912
+ // Clean up all previously allocated strings
913
+ for (size_t j = 0; j < i; j++) {
914
+ if (c_field_names[j]) free((void*)c_field_names[j]);
915
+ }
916
+ free(c_field_names);
917
+ throw Napi::Error::New(env, "Memory allocation failed for field name");
918
+ }
733
919
  }
734
920
 
735
921
  // Ensure pipeline exists
@@ -981,7 +1167,9 @@ private:
981
1167
  for (size_t i = 0; i < rc_->rows.size(); ++i) {
982
1168
  Napi::Array row = Napi::Array::New(env, rc_->rows[i].size());
983
1169
  for (size_t j = 0; j < rc_->rows[i].size(); ++j) {
984
- row[j] = Napi::String::New(env, rc_->rows[i][j]);
1170
+ // SECURITY: Use safe string creation to handle invalid UTF-8 in CSV data
1171
+ const std::string& field = rc_->rows[i][j];
1172
+ row[j] = SafeNewString(env, field.c_str(), field.length());
985
1173
  }
986
1174
  rows[i] = row;
987
1175
  }