npm - cisv - Versions diffs - 0.0.60 → 0.1.2 - Mend

cisv 0.0.60 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/README.md +0 -209
package/binding.gyp +23 -9
package/build/Release/cisv.node +0 -0
package/cisv/cisv_addon.cc +204 -16
package/package.json +17 -9
package/LICENSE +0 -21
package/benchmark/benchmark.js +0 -418
package/cisv/cisv_parser.c +0 -1333
package/cisv/cisv_parser.h +0 -91
package/cisv/cisv_simd.h +0 -54
package/cisv/cisv_transformer.c +0 -624
package/cisv/cisv_transformer.h +0 -171
package/cisv/cisv_writer.c +0 -531
package/cisv/cisv_writer.h +0 -60
package/data.csv +0 -11
package/index.d.ts +0 -280
/package/{types → cisv/types}/cisv.d.ts +0 -0

package/README.md CHANGED Viewed

@@ -5,79 +5,6 @@
 ![Size](https://deno.bundlejs.com/badge?q=spring-easing)
 ![Downloads](https://badgen.net/npm/dw/cisv)
-> # DISCLAIMER
->
-> This csv parser does not covers all quotes/comments edge cases, it is meant for now to be just extremly fast, thus not PROD ready yet.
-Cisv is a csv parser on steroids... literally.
-It's a high-performance CSV parser/writer leveraging SIMD instructions and zero-copy memory mapping. Available as both a Node.js native addon and standalone CLI tool with extensive configuration options.
-I wrote about basics in a blog post, you can read here :https://sanixdk.xyz/blogs/how-i-accidentally-created-the-fastest-csv-parser-ever-made.
-## CLI BENCHMARKS WITH DOCKER
-```bash
-$ docker build -t cisv-benchmark .
-```
-To run them... choosing some specs for the container to size resources, you can :
-```bash
-$ docker run --rm      \
-    --cpus="2.0"       \
-    --memory="4g"      \
-    --memory-swap="4g" \
-    --cpu-shares=1024  \
-    --security-opt     \
-    seccomp=unconfined \
-    cisv-benchmark
-```
-## BENCHMARKS
-Benchmarks comparison with existing popular tools,
-cf pipeline you can check : (https://github.com/Sanix-Darker/cisv/actions/runs/18422464917/job/52498590205) at step "Publish to npm".
-### SYNCHRONOUS RESULTS
-| Library            | Speed (MB/s) | Avg Time (ms) | Operations/sec |
-|--------------------|--------------|---------------|----------------|
-| cisv (sync)        | 71.10        | 0.01          | 153723         |
-| csv-parse (sync)   | 18.76        | 0.02          | 40563          |
-| papaparse (sync)   | 27.97        | 0.02          | 60467          |
-| udsv (sync)        | 69.81        | 0.01          | 150930         |
-| d3-dsv (sync)      | 98.11        | 0.00          | 212117         |
-### SYNCHRONOUS RESULTS (WITH DATA ACCESS)
-| Library            | Speed (MB/s) | Avg Time (ms) | Operations/sec |
-|--------------------|--------------|---------------|----------------|
-| cisv (sync)        | 104.58       | 0.00          | 226108         |
-| csv-parse (sync)   | 16.87        | 0.03          | 36482          |
-| papaparse (sync)   | 28.13        | 0.02          | 60807          |
-| udsv (sync)        | 69.29        | 0.01          | 149812         |
-| d3-dsv (sync)      | 96.32        | 0.00          | 208248         |
-### ASYNCHRONOUS RESULTS
-| Library                  | Speed (MB/s) | Avg Time (ms) | Operations/sec |
-|--------------------------|--------------|---------------|----------------|
-| cisv (async/stream)      | 98.36        | 0.00          | 212662         |
-| papaparse (async/stream) | 21.56        | 0.02          | 46609          |
-| fast-csv (async/stream)  | 10.09        | 0.05          | 21817          |
-| neat-csv (async/promise) | 9.20         | 0.05          | 19898          |
-| udsv (async/stream)      | 51.74        | 0.01          | 111858         |
-### ASYNCHRONOUS RESULTS (WITH DATA ACCESS)
-| Library                  | Speed (MB/s) | Avg Time (ms) | Operations/sec |
-|--------------------------|--------------|---------------|----------------|
-| cisv (async/stream)      | 27.50        | 0.02          | 59460          |
-| papaparse (async/stream) | 21.98        | 0.02          | 47513          |
-| fast-csv (async/stream)  | 10.05        | 0.05          | 21719          |
-| neat-csv (async/promise) | 9.58         | 0.05          | 20711          |
-| udsv (async/stream)      | 53.26        | 0.01          | 115146         |
 ## INSTALLATION
 ### NODE.JS PACKAGE
@@ -85,18 +12,9 @@ cf pipeline you can check : (https://github.com/Sanix-Darker/cisv/actions/runs/1
 npm install cisv
 ```
-### CLI TOOL (FROM SOURCE)
-```bash
-git clone https://github.com/sanix-darker/cisv
-cd cisv
-make cli
-sudo make install-cli
-```
 ### BUILD FROM SOURCE (NODE.JS ADDON)
 ```bash
 npm install -g node-gyp
-make build
 ```
 ## QUICK START
@@ -118,20 +36,6 @@ const tsv_parser = new cisvParser({
 const tsv_rows = tsv_parser.parseSync('./data.tsv');
 ```
-### CLI
-```bash
-# Basic parsing
-cisv_bin data.csv
-# Parse TSV file
-cisv_bin -d $'\t' data.tsv
-# Parse with custom quote and trim
-cisv_bin -q "'" -t data.csv
-# Skip comment lines
-cisv_bin -m '#' config.csv
-```
 ## CONFIGURATION OPTIONS
@@ -347,116 +251,3 @@ const tsvCount = cisvParser.countRowsWithConfig('data.tsv', {
     toLine: 1000
 });
 ```
-## CLI USAGE
-### PARSING OPTIONS
-```bash
-cisv_bin [OPTIONS] [FILE]
-General Options:
-  -h, --help              Show help message
-  -v, --version           Show version
-  -o, --output FILE       Write to FILE instead of stdout
-  -b, --benchmark         Run benchmark mode
-Configuration Options:
-  -d, --delimiter DELIM   Field delimiter (default: ,)
-  -q, --quote CHAR        Quote character (default: ")
-  -e, --escape CHAR       Escape character (default: RFC4180 style)
-  -m, --comment CHAR      Comment character (default: none)
-  -t, --trim              Trim whitespace from fields
-  -r, --relaxed           Use relaxed parsing rules
-  --skip-empty            Skip empty lines
-  --skip-errors           Skip lines with parse errors
-  --max-row SIZE          Maximum row size in bytes
-  --from-line N           Start from line N (1-based)
-  --to-line N             Stop at line N
-Processing Options:
-  -s, --select COLS       Select columns (comma-separated indices)
-  -c, --count             Show only row count
-  --head N                Show first N rows
-  --tail N                Show last N rows
-```
-### EXAMPLES
-```bash
-# Parse TSV file
-cisv_bin -d $'\t' data.tsv
-# Parse CSV with semicolon delimiter and single quotes
-cisv_bin -d ';' -q "'" european.csv
-# Skip comment lines starting with #
-cisv_bin -m '#' config.csv
-# Trim whitespace and skip empty lines
-cisv_bin -t --skip-empty messy.csv
-# Parse lines 100-1000 only
-cisv_bin --from-line 100 --to-line 1000 large.csv
-# Select specific columns
-cisv_bin -s 0,2,5,7 data.csv
-# Count rows with specific configuration
-cisv_bin -c -d $'\t' --skip-empty data.tsv
-# Benchmark with custom delimiter
-cisv_bin -b -d ';' european.csv
-```
-### WRITING
-```bash
-cisv_bin write [OPTIONS]
-Options:
-  -g, --generate N       Generate N rows of test data
-  -o, --output FILE      Output file
-  -d, --delimiter DELIM  Field delimiter
-  -Q, --quote-all        Quote all fields
-  -r, --crlf             Use CRLF line endings
-  -n, --null TEXT        Null representation
-  -b, --benchmark        Benchmark mode
-```
-## TECHNICAL ARCHITECTURE
-- **SIMD Processing**: AVX-512 (64-byte vectors) or AVX2 (32-byte vectors) for parallel processing
-- **Memory Mapping**: Direct kernel-to-userspace zero-copy with `mmap()`
-- **Optimized Buffering**: 1MB ring buffer sized for L3 cache efficiency
-- **Compiler Optimizations**: LTO and architecture-specific tuning with `-march=native`
-- **Configurable Parsing**: RFC 4180 compliant with extensive customization options
-## FEATURES (PROS)
-- RFC 4180 compliant with configurable extensions
-- Handles quoted fields with embedded delimiters
-- Support for multiple CSV dialects (TSV, PSV, etc.)
-- Comment line support
-- Field trimming and empty line handling
-- Line range parsing for large files
-- Streaming API for unlimited file sizes
-- Safe fallback for non-x86 architectures
-- High-performance CSV writer with SIMD optimization
-- Row counting without full parsing
-## LIMITATIONS
-- Linux/Unix support only (optimized for x86_64 CPU)
-- Windows support planned for future release
-## LICENSE
-MIT © [sanix-darker](https://github.com/sanix-darker)
-## ACKNOWLEDGMENTS
-Inspired by:
-- [simdjson](https://github.com/simdjson/simdjson) - Parsing gigabytes of JSON per second
-- [xsv](https://github.com/BurntSushi/xsv) - Fast CSV command line toolkit
-- [rust-csv](https://github.com/BurntSushi/rust-csv) - CSV parser for Rust

package/binding.gyp CHANGED Viewed

@@ -4,20 +4,22 @@
       "target_name": "cisv",
       "sources": [
         "cisv/cisv_addon.cc",
-        "cisv/cisv_parser.c",
-        "cisv/cisv_writer.c",
-        "cisv/cisv_transformer.c"
+        "../../core/src/parser.c",
+        "../../core/src/writer.c",
+        "../../core/src/transformer.c"
       ],
       "include_dirs": [
         "<!@(node -p \"require('node-addon-api').include\")",
+        "../../core/include/",
         "cisv/"
       ],
       "dependencies": [
         "<!(node -p \"require('node-addon-api').gyp\")"
       ],
       "cflags!": [ "-fno-exceptions" ],
-      "cflags": ["-O3", "-mavx2"],
+      "cflags": ["-O3"],
       "cflags_cc!": [ "-fno-exceptions" ],
+      "cflags_cc": ["-O3"],
       "defines": [
         "NAPI_DISABLE_CPP_EXCEPTIONS",
         "NAPI_VERSION=6"
@@ -28,27 +30,39 @@
             "-O3",
             "-march=native",
             "-mtune=native",
-            "-ffast-math"
+            "-ffast-math",
+            "-funroll-loops",
+            "-fomit-frame-pointer",
+            "-flto"
           ],
           "cflags_cc": [
             "-O3",
             "-march=native",
             "-mtune=native",
-            "-ffast-math"
-          ]
+            "-ffast-math",
+            "-funroll-loops",
+            "-fomit-frame-pointer",
+            "-flto"
+          ],
+          "ldflags": ["-flto"]
         }],
         ["OS=='mac'", {
           "xcode_settings": {
             "GCC_OPTIMIZATION_LEVEL": "3",
+            "LLVM_LTO": "YES",
             "OTHER_CFLAGS": [
               "-march=native",
               "-mtune=native",
-              "-ffast-math"
+              "-ffast-math",
+              "-funroll-loops",
+              "-fomit-frame-pointer"
             ],
             "OTHER_CPLUSPLUSFLAGS": [
               "-march=native",
               "-mtune=native",
-              "-ffast-math"
+              "-ffast-math",
+              "-funroll-loops",
+              "-fomit-frame-pointer"
             ]
           }
         }]

package/build/Release/cisv.node ADDED Viewed

Binary file

package/cisv/cisv_addon.cc CHANGED Viewed

@@ -1,6 +1,6 @@
 #include <napi.h>
-#include "cisv_parser.h"
-#include "cisv_transformer.h"
+#include "cisv/parser.h"
+#include "cisv/transformer.h"
 #include <vector>
 #include <memory>
 #include <string>
@@ -9,6 +9,109 @@
 namespace {
+// =============================================================================
+// SECURITY: UTF-8 validation to prevent V8 crashes on invalid input
+// Invalid UTF-8 data can cause Napi::String::New to throw or crash
+// =============================================================================
+static bool isValidUtf8(const char* data, size_t len) {
+    const unsigned char* bytes = reinterpret_cast<const unsigned char*>(data);
+    size_t i = 0;
+    while (i < len) {
+        unsigned char c = bytes[i];
+        if (c < 0x80) {
+            // ASCII: single byte (0x00-0x7F)
+            i++;
+        } else if ((c & 0xE0) == 0xC0) {
+            // 2-byte sequence (0xC0-0xDF)
+            if (i + 1 >= len) return false;
+            if ((bytes[i + 1] & 0xC0) != 0x80) return false;
+            // Overlong check: C0-C1 are invalid
+            if (c < 0xC2) return false;
+            i += 2;
+        } else if ((c & 0xF0) == 0xE0) {
+            // 3-byte sequence (0xE0-0xEF)
+            if (i + 2 >= len) return false;
+            if ((bytes[i + 1] & 0xC0) != 0x80) return false;
+            if ((bytes[i + 2] & 0xC0) != 0x80) return false;
+            // Overlong check for E0
+            if (c == 0xE0 && bytes[i + 1] < 0xA0) return false;
+            // Surrogate check (U+D800-U+DFFF)
+            if (c == 0xED && bytes[i + 1] >= 0xA0) return false;
+            i += 3;
+        } else if ((c & 0xF8) == 0xF0) {
+            // 4-byte sequence (0xF0-0xF7)
+            if (i + 3 >= len) return false;
+            if ((bytes[i + 1] & 0xC0) != 0x80) return false;
+            if ((bytes[i + 2] & 0xC0) != 0x80) return false;
+            if ((bytes[i + 3] & 0xC0) != 0x80) return false;
+            // Overlong check for F0
+            if (c == 0xF0 && bytes[i + 1] < 0x90) return false;
+            // Check for code points > U+10FFFF
+            if (c == 0xF4 && bytes[i + 1] >= 0x90) return false;
+            if (c > 0xF4) return false;
+            i += 4;
+        } else {
+            // Invalid leading byte
+            return false;
+        }
+    }
+    return true;
+}
+// Create Napi::String with UTF-8 validation (safe version)
+// Falls back to replacement character representation for invalid UTF-8
+static Napi::String SafeNewString(Napi::Env env, const char* data, size_t len) {
+    if (isValidUtf8(data, len)) {
+        return Napi::String::New(env, data, len);
+    }
+    // Invalid UTF-8 - replace invalid bytes with replacement character
+    // This prevents V8 crashes while preserving data visibility
+    std::string safe_str;
+    safe_str.reserve(len);
+    const unsigned char* bytes = reinterpret_cast<const unsigned char*>(data);
+    size_t i = 0;
+    while (i < len) {
+        unsigned char c = bytes[i];
+        if (c < 0x80) {
+            safe_str += static_cast<char>(c);
+            i++;
+        } else if ((c & 0xE0) == 0xC0 && i + 1 < len &&
+                   (bytes[i + 1] & 0xC0) == 0x80 && c >= 0xC2) {
+            safe_str += static_cast<char>(c);
+            safe_str += static_cast<char>(bytes[i + 1]);
+            i += 2;
+        } else if ((c & 0xF0) == 0xE0 && i + 2 < len &&
+                   (bytes[i + 1] & 0xC0) == 0x80 &&
+                   (bytes[i + 2] & 0xC0) == 0x80) {
+            safe_str += static_cast<char>(c);
+            safe_str += static_cast<char>(bytes[i + 1]);
+            safe_str += static_cast<char>(bytes[i + 2]);
+            i += 3;
+        } else if ((c & 0xF8) == 0xF0 && i + 3 < len &&
+                   (bytes[i + 1] & 0xC0) == 0x80 &&
+                   (bytes[i + 2] & 0xC0) == 0x80 &&
+                   (bytes[i + 3] & 0xC0) == 0x80 && c <= 0xF4) {
+            safe_str += static_cast<char>(c);
+            safe_str += static_cast<char>(bytes[i + 1]);
+            safe_str += static_cast<char>(bytes[i + 2]);
+            safe_str += static_cast<char>(bytes[i + 3]);
+            i += 4;
+        } else {
+            // Invalid byte - use UTF-8 replacement character U+FFFD
+            safe_str += "\xEF\xBF\xBD";
+            i++;
+        }
+    }
+    return Napi::String::New(env, safe_str);
+}
 // Extended RowCollector that handles transforms
 struct RowCollector {
     std::vector<std::string> current;
@@ -34,6 +137,13 @@ struct RowCollector {
             cisv_transform_pipeline_destroy(pipeline);
             pipeline = nullptr;
         }
+        // SECURITY FIX: Properly release all persistent references to prevent memory leak
+        // Napi::Persistent references must be Reset() before being destroyed
+        for (auto& pair : js_transforms) {
+            if (!pair.second.IsEmpty()) {
+                pair.second.Reset();  // Release the persistent handle
+            }
+        }
         js_transforms.clear();
         rows.clear();
         current.clear();
@@ -76,7 +186,8 @@ struct RowCollector {
             auto it = js_transforms.find(field_index);
             if (it != js_transforms.end() && !it->second.IsEmpty()) {
                 try {
-                    Napi::String input = Napi::String::New(env, result);
+                    // SECURITY: Use safe string creation to handle invalid UTF-8
+                    Napi::String input = SafeNewString(env, result.c_str(), result.length());
                     Napi::Number field = Napi::Number::New(env, field_index);
                     Napi::Value js_result = it->second.Call({input, field});
@@ -84,8 +195,15 @@ struct RowCollector {
                     if (js_result.IsString()) {
                         result = js_result.As<Napi::String>().Utf8Value();
                     }
+                } catch (const Napi::Error& e) {
+                    // Keep original result but log the error
+                    fprintf(stderr, "CISV: JS transform error for field %d: %s\n",
+                            field_index, e.Message().c_str());
+                } catch (const std::exception& e) {
+                    fprintf(stderr, "CISV: C++ exception in JS transform: %s\n", e.what());
                 } catch (...) {
-                    // Keep original result if JS transform fails
+                    fprintf(stderr, "CISV: Unknown exception in JS transform for field %d\n",
+                            field_index);
                 }
             }
@@ -93,7 +211,8 @@ struct RowCollector {
             auto it_all = js_transforms.find(-1);
             if (it_all != js_transforms.end() && !it_all->second.IsEmpty()) {
                 try {
-                    Napi::String input = Napi::String::New(env, result);
+                    // SECURITY: Use safe string creation to handle invalid UTF-8
+                    Napi::String input = SafeNewString(env, result.c_str(), result.length());
                     Napi::Number field = Napi::Number::New(env, field_index);
                     Napi::Value js_result = it_all->second.Call({input, field});
@@ -101,8 +220,13 @@ struct RowCollector {
                     if (js_result.IsString()) {
                         result = js_result.As<Napi::String>().Utf8Value();
                     }
+                } catch (const Napi::Error& e) {
+                    // Keep original result but log the error
+                    fprintf(stderr, "CISV: JS transform error (all fields): %s\n", e.Message().c_str());
+                } catch (const std::exception& e) {
+                    fprintf(stderr, "CISV: C++ exception in JS transform: %s\n", e.what());
                 } catch (...) {
-                    // Keep original result if JS transform fails
+                    fprintf(stderr, "CISV: Unknown exception in JS transform (all fields)\n");
                 }
             }
         }
@@ -114,9 +238,19 @@ struct RowCollector {
 static void field_cb(void *user, const char *data, size_t len) {
     auto *rc = reinterpret_cast<RowCollector *>(user);
-    // Apply all transforms (C and JS)
+    // Fast path: no transforms - avoid unnecessary string copies
+    bool has_c_transforms = rc->pipeline && rc->pipeline->count > 0;
+    bool has_js_transforms = !rc->js_transforms.empty();
+    if (!has_c_transforms && !has_js_transforms) {
+        rc->current.emplace_back(data, len);
+        rc->current_field_index++;
+        return;
+    }
+    // Slow path: apply transforms
     std::string transformed = rc->applyTransforms(data, len, rc->current_field_index);
-    rc->current.emplace_back(transformed);
+    rc->current.emplace_back(std::move(transformed));
     rc->current_field_index++;
 }
@@ -457,15 +591,25 @@ public:
         if (info[0].IsBuffer()) {
             auto buf = info[0].As<Napi::Buffer<uint8_t>>();
-            cisv_parser_write(parser_, buf.Data(), buf.Length());
-            total_bytes_ += buf.Length();
+            size_t buf_len = buf.Length();
+            // Check for overflow before adding to total_bytes_
+            if (buf_len > SIZE_MAX - total_bytes_) {
+                throw Napi::Error::New(env, "Total bytes would overflow");
+            }
+            cisv_parser_write(parser_, buf.Data(), buf_len);
+            total_bytes_ += buf_len;
             return;
         }
         if (info[0].IsString()) {
             std::string chunk = info[0].As<Napi::String>();
-            cisv_parser_write(parser_, reinterpret_cast<const uint8_t*>(chunk.data()), chunk.size());
-            total_bytes_ += chunk.size();
+            size_t chunk_size = chunk.size();
+            // Check for overflow before adding to total_bytes_
+            if (chunk_size > SIZE_MAX - total_bytes_) {
+                throw Napi::Error::New(env, "Total bytes would overflow");
+            }
+            cisv_parser_write(parser_, reinterpret_cast<const uint8_t*>(chunk.data()), chunk_size);
+            total_bytes_ += chunk_size;
             return;
         }
@@ -475,9 +619,10 @@ public:
     void End(const Napi::CallbackInfo &info) {
         if (!is_destroyed_) {
             cisv_parser_end(parser_);
-            // Clear the environment reference after ending
-            // FIXME: the transformer may need this
-            // rc_->env = nullptr;
+            // Clear the environment reference after ending to prevent stale references
+            rc_->env = nullptr;
+            // Note: JS transforms stored in rc_->js_transforms remain valid
+            // as they are Persistent references managed by the addon lifecycle
         }
     }
@@ -554,6 +699,9 @@ public:
             if (info.Length() >= 3 && info[2].IsObject()) {
                 Napi::Object context_obj = info[2].As<Napi::Object>();
                 ctx = (cisv_transform_context_t*)calloc(1, sizeof(cisv_transform_context_t));
+                if (!ctx) {
+                    throw Napi::Error::New(env, "Memory allocation failed for transform context");
+                }
                 // Extract context properties if they exist
                 if (context_obj.Has("key")) {
@@ -561,6 +709,10 @@ public:
                     if (key_val.IsString()) {
                         std::string key = key_val.As<Napi::String>();
                         ctx->key = strdup(key.c_str());
+                        if (!ctx->key) {
+                            free(ctx);
+                            throw Napi::Error::New(env, "Memory allocation failed for key");
+                        }
                         ctx->key_len = key.length();
                     }
                 }
@@ -570,6 +722,11 @@ public:
                     if (iv_val.IsString()) {
                         std::string iv = iv_val.As<Napi::String>();
                         ctx->iv = strdup(iv.c_str());
+                        if (!ctx->iv) {
+                            if (ctx->key) free((void*)ctx->key);
+                            free(ctx);
+                            throw Napi::Error::New(env, "Memory allocation failed for iv");
+                        }
                         ctx->iv_len = iv.length();
                     }
                 }
@@ -653,6 +810,9 @@ Napi::Value TransformByName(const Napi::CallbackInfo &info) {
         if (info.Length() >= 3 && info[2].IsObject()) {
             Napi::Object context_obj = info[2].As<Napi::Object>();
             ctx = (cisv_transform_context_t*)calloc(1, sizeof(cisv_transform_context_t));
+            if (!ctx) {
+                throw Napi::Error::New(env, "Memory allocation failed for transform context");
+            }
             // Extract context properties if they exist
             if (context_obj.Has("key")) {
@@ -660,6 +820,10 @@ Napi::Value TransformByName(const Napi::CallbackInfo &info) {
                 if (key_val.IsString()) {
                     std::string key = key_val.As<Napi::String>();
                     ctx->key = strdup(key.c_str());
+                    if (!ctx->key) {
+                        free(ctx);
+                        throw Napi::Error::New(env, "Memory allocation failed for key");
+                    }
                     ctx->key_len = key.length();
                 }
             }
@@ -669,6 +833,11 @@ Napi::Value TransformByName(const Napi::CallbackInfo &info) {
                 if (iv_val.IsString()) {
                     std::string iv = iv_val.As<Napi::String>();
                     ctx->iv = strdup(iv.c_str());
+                    if (!ctx->iv) {
+                        if (ctx->key) free((void*)ctx->key);
+                        free(ctx);
+                        throw Napi::Error::New(env, "Memory allocation failed for iv");
+                    }
                     ctx->iv_len = iv.length();
                 }
             }
@@ -722,14 +891,31 @@ void SetHeaderFields(const Napi::CallbackInfo &info) {
         throw Napi::Error::New(env, "Memory allocation failed");
     }
+    // Initialize to NULL for safe cleanup on partial failure
+    for (size_t i = 0; i < field_count; i++) {
+        c_field_names[i] = nullptr;
+    }
     for (size_t i = 0; i < field_count; i++) {
         Napi::Value field_val = field_names[i];
         if (!field_val.IsString()) {
+            // Clean up all previously allocated strings
+            for (size_t j = 0; j < i; j++) {
+                if (c_field_names[j]) free((void*)c_field_names[j]);
+            }
             free(c_field_names);
             throw Napi::TypeError::New(env, "Field names must be strings");
         }
         std::string field_str = field_val.As<Napi::String>();
         c_field_names[i] = strdup(field_str.c_str());
+        if (!c_field_names[i]) {
+            // Clean up all previously allocated strings
+            for (size_t j = 0; j < i; j++) {
+                if (c_field_names[j]) free((void*)c_field_names[j]);
+            }
+            free(c_field_names);
+            throw Napi::Error::New(env, "Memory allocation failed for field name");
+        }
     }
     // Ensure pipeline exists
@@ -981,7 +1167,9 @@ private:
         for (size_t i = 0; i < rc_->rows.size(); ++i) {
             Napi::Array row = Napi::Array::New(env, rc_->rows[i].size());
             for (size_t j = 0; j < rc_->rows[i].size(); ++j) {
-                row[j] = Napi::String::New(env, rc_->rows[i][j]);
+                // SECURITY: Use safe string creation to handle invalid UTF-8 in CSV data
+                const std::string& field = rc_->rows[i][j];
+                row[j] = SafeNewString(env, field.c_str(), field.length());
             }
             rows[i] = row;
         }