npm - cisv - Versions diffs - 0.2.5 → 0.3.2 - Mend

cisv 0.2.5 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/README.md CHANGED Viewed

@@ -1,298 +1,154 @@
-# CISV
+# CISV Node.js Binding
-![License](https://img.shields.io/badge/license-MIT-blue)
-![Build](https://img.shields.io/badge/build-passing-brightgreen)
-![Size](https://deno.bundlejs.com/badge?q=spring-easing)
-![Downloads](https://badgen.net/npm/dw/cisv)
+Native Node-API binding for the CISV C core.
-## INSTALLATION
+## Install
-### NODE.JS PACKAGE
 ```bash
 npm install cisv
 ```
-### BUILD FROM SOURCE (NODE.JS ADDON)
+From source in this repository:
 ```bash
-npm install -g node-gyp
+cd bindings/nodejs
+npm ci
+npm run build
+npm test
 ```
-## QUICK START
+## Quick Start
-### NODE.JS
-```javascript
+```js
 const { cisvParser } = require('cisv');
-// Basic usage
-const parser = new cisvParser();
-const rows = parser.parseSync('./data.csv');
-// With configuration (optional)
-const tsv_parser = new cisvParser({
-    delimiter: '\t',
-    quote: "'",
-    trim: true
-});
-const tsv_rows = tsv_parser.parseSync('./data.tsv');
-```
-## CONFIGURATION OPTIONS
-### Parser Configuration
-```javascript
-const parser = new cisvParser({
-    // Field delimiter character (default: ',')
-    delimiter: ',',
-    // Quote character (default: '"')
-    quote: '"',
-    // Escape character (null for RFC4180 "" style, default: null)
-    escape: null,
-    // Comment character to skip lines (default: null)
-    comment: '#',
-    // Trim whitespace from fields (default: false)
-    trim: true,
-    // Skip empty lines (default: false)
-    skipEmptyLines: true,
-    // Use relaxed parsing rules (default: false)
-    relaxed: false,
-    // Skip lines with parse errors (default: false)
-    skipLinesWithError: true,
-    // Maximum row size in bytes (0 = unlimited, default: 0)
-    maxRowSize: 1048576,
-    // Start parsing from line N (1-based, default: 1)
-    fromLine: 10,
-    // Stop parsing at line N (0 = until end, default: 0)
-    toLine: 1000
-});
-```
-### Dynamic Configuration
-```javascript
-// Set configuration after creation
-parser.setConfig({
-    delimiter: ';',
-    quote: "'",
-    trim: true
-});
-// Get current configuration
-const config = parser.getConfig();
-console.log(config);
-```
-## API REFERENCE
-### TYPESCRIPT DEFINITIONS
-```typescript
-interface CisvConfig {
-    delimiter?: string;
-    quote?: string;
-    escape?: string | null;
-    comment?: string | null;
-    trim?: boolean;
-    skipEmptyLines?: boolean;
-    relaxed?: boolean;
-    skipLinesWithError?: boolean;
-    maxRowSize?: number;
-    fromLine?: number;
-    toLine?: number;
-}
-interface ParsedRow extends Array<string> {}
-interface ParseStats {
-    rowCount: number;
-    fieldCount: number;
-    totalBytes: number;
-    parseTime: number;
-    currentLine: number;
-}
-interface TransformInfo {
-    cTransformCount: number;
-    jsTransformCount: number;
-    fieldIndices: number[];
-}
-class cisvParser {
-    constructor(config?: CisvConfig);
-    parseSync(path: string): ParsedRow[];
-    parse(path: string): Promise<ParsedRow[]>;
-    parseString(csv: string): ParsedRow[];
-    write(chunk: string | Buffer): void;
-    end(): void;
-    getRows(): ParsedRow[];
-    clear(): void;
-    setConfig(config: CisvConfig): void;
-    getConfig(): CisvConfig;
-    transform(fieldIndex: number, type: string | Function): this;
-    removeTransform(fieldIndex: number): this;
-    clearTransforms(): this;
-    getStats(): ParseStats;
-    getTransformInfo(): TransformInfo;
-    destroy(): void;
-    static countRows(path: string): number;
-    static countRowsWithConfig(path: string, config?: CisvConfig): number;
-}
-```
-### BASIC PARSING
-```javascript
-import { cisvParser } from "cisv";
-// Default configuration (standard CSV)
-const parser = new cisvParser();
+const parser = new cisvParser({ delimiter: ',', trim: true });
 const rows = parser.parseSync('data.csv');
-// Custom configuration (TSV with single quotes)
-const tsvParser = new cisvParser({
-    delimiter: '\t',
-    quote: "'"
-});
-const tsvRows = tsvParser.parseSync('data.tsv');
-// Parse specific line range
-const rangeParser = new cisvParser({
-    fromLine: 100,
-    toLine: 1000
-});
-const subset = rangeParser.parseSync('large.csv');
-// Skip comments and empty lines
-const cleanParser = new cisvParser({
-    comment: '#',
-    skipEmptyLines: true,
-    trim: true
-});
-const cleanData = cleanParser.parseSync('config.csv');
+console.log(rows.length);
+console.log(rows[0]);
 ```
-### STREAMING
-```javascript
-import { cisvParser } from "cisv";
-import fs from 'fs';
-const streamParser = new cisvParser({
-    delimiter: ',',
-    trim: true
-});
-const stream = fs.createReadStream('huge-file.csv');
-stream.on('data', chunk => streamParser.write(chunk));
-stream.on('end', () => {
-    streamParser.end();
-    const results = streamParser.getRows();
-    console.log(`Parsed ${results.length} rows`);
-});
-```
-### DATA TRANSFORMATION
-```javascript
-const parser = new cisvParser();
-// Built-in C transforms (optimized)
-parser
-    .transform(0, 'uppercase')      // Column 0 to uppercase
-    .transform(1, 'lowercase')       // Column 1 to lowercase
-    .transform(2, 'trim')           // Column 2 trim whitespace
-    .transform(3, 'to_int')         // Column 3 to integer
-    .transform(4, 'to_float')       // Column 4 to float
-    .transform(5, 'base64_encode')  // Column 5 to base64
-    .transform(6, 'hash_sha256');   // Column 6 to SHA256
-// Custom fieldname transform :
-parser
-    .transform('name', 'uppercase');
-// Custom row transform :
-parser
-    .transformRow((row, rowObj) => {console.log(row}});
-// Custom JavaScript transforms
-parser.transform(7, value => new Date(value).toISOString());
-// Apply to all fields
-parser.transform(-1, value => value.replace(/[^\w\s]/gi, ''));
+## Parser API
+### Constructor options
+- `delimiter?: string` (first character used)
+- `quote?: string` (first character used)
+- `escape?: string | null` (`null` means RFC4180 doubled quote escaping)
+- `comment?: string | null`
+- `trim?: boolean`
+- `skipEmptyLines?: boolean`
+- `relaxed?: boolean`
+- `skipLinesWithError?: boolean`
+- `maxRowSize?: number`
+- `fromLine?: number`
+- `toLine?: number`
+### Instance methods
+- `parseSync(path: string): string[][]`
+- `parse(path: string): Promise<string[][]>`
+- `parseString(csv: string): string[][]`
+- `write(chunk: Buffer | string): void`
+- `end(): void`
+- `getRows(): string[][]`
+- `clear(): void`
+- `setConfig(config): this`
+- `getConfig(): object`
+- `transform(fieldIndex: number, kindOrFn: string | Function, context?): this`
+- `transformByName(fieldName: string, kindOrFn: string | Function, context?): this`
+- `setHeaderFields(fields: string[]): void`
+- `removeTransform(fieldIndex: number): this`
+- `removeTransformByName(fieldName: string): this`
+- `clearTransforms(): this`
+- `getTransformInfo(): { cTransformCount: number, jsTransformCount: number, fieldIndices: number[] }`
+- `getStats(): { rowCount: number, fieldCount: number, totalBytes: number, parseTime: number, currentLine: number }`
+- `openIterator(path: string): this`
+- `fetchRow(): string[] | null`
+- `closeIterator(): this`
+- `destroy(): void`
+### Static methods
+- `cisvParser.countRows(path: string): number`
+- `cisvParser.countRowsWithConfig(path: string, config?): number`
+## Transform Types
+Built-in transform names:
+- `uppercase`
+- `lowercase`
+- `trim`
+- `to_int` (or `int`)
+- `to_float` (or `float`)
+- `hash_sha256` (or `sha256`)
+- `base64_encode` (or `base64`)
+## Examples
+### Async parse
+```js
+const { cisvParser } = require('cisv');
-const transformed = parser.parseSync('data.csv');
+(async () => {
+  const parser = new cisvParser();
+  const rows = await parser.parse('data.csv');
+  console.log(rows.length);
+})();
 ```
-### ROW COUNTING
+### Streaming chunks
-```javascript
-import { cisvParser } from "cisv";
+```js
+const fs = require('fs');
+const { cisvParser } = require('cisv');
-// Fast row counting without parsing
-const count = cisvParser.countRows('large.csv');
+const parser = new cisvParser();
+for (const chunk of [
+  Buffer.from('id,name\n1,'),
+  Buffer.from('john\n2,jane\n')
+]) {
+  parser.write(chunk);
+}
+parser.end();
-// Count with specific configuration
-const tsvCount = cisvParser.countRowsWithConfig('data.tsv', {
-    delimiter: '\t',
-    skipEmptyLines: true,
-    fromLine: 10,
-    toLine: 1000
-});
+console.log(parser.getRows());
 ```
-### ROW-BY-ROW ITERATION
+### Iterator mode (low memory)
-The iterator API provides fgetcsv-style streaming with minimal memory footprint and early exit support.
-```javascript
-import { cisvParser } from "cisv";
-const parser = new cisvParser({ delimiter: ',', trim: true });
+```js
+const { cisvParser } = require('cisv');
-// Open iterator for a file
-parser.openIterator('/path/to/large.csv');
+const parser = new cisvParser({ delimiter: ',' });
+parser.openIterator('large.csv');
-// Fetch rows one at a time
 let row;
 while ((row = parser.fetchRow()) !== null) {
-    console.log(row);  // string[]
-    // Early exit - no wasted work
-    if (row[0] === 'stop') {
-        break;
-    }
+  if (row[0] === 'stop') break;
 }
-// Close iterator when done
 parser.closeIterator();
-// Methods support chaining
-parser.openIterator('data.csv')
-      .closeIterator();
 ```
-**Iterator Methods:**
+### Name-based transforms
+```js
+const { cisvParser } = require('cisv');
+const parser = new cisvParser();
+parser.setHeaderFields(['id', 'name', 'email']);
+parser.transformByName('name', 'uppercase');
+const rows = parser.parseString('id,name,email\n1,john,john@test.com');
+console.log(rows[1][1]); // JOHN
+```
-| Method | Description |
-|--------|-------------|
-| `openIterator(path)` | Open a file for row-by-row iteration |
-| `fetchRow()` | Get next row as `string[]`, or `null` if at EOF |
-| `closeIterator()` | Close iterator and release resources |
+## Notes
-**Notes:**
-- The iterator uses the parser's current configuration (delimiter, quote, trim, etc.)
-- Calling `destroy()` automatically closes any open iterator
-- Only one iterator can be open at a time per parser instance
-- Breaking out of iteration and calling `closeIterator()` stops parsing immediately
+- Returned rows include the header row when the input has one.
+- `removeTransform*` currently removes JavaScript transforms; C-transform removal by index/name is not fully implemented yet.
+- `parse()` runs in a worker thread for non-transform workloads; when transforms are attached it preserves current synchronous transform behavior for compatibility.

package/build/Release/cisv.node CHANGED Viewed

Binary file

package/cisv/cisv_addon.cc CHANGED Viewed

@@ -6,6 +6,7 @@
 #include <string>
 #include <unordered_map>
 #include <chrono>
+#include <cstdint>
 namespace {
@@ -60,11 +61,77 @@ static bool isValidUtf8(const char* data, size_t len) {
     return true;
 }
+// Fast path for common ASCII-only CSV data.
+static inline bool isAllAscii(const char* data, size_t len) {
+    const unsigned char* bytes = reinterpret_cast<const unsigned char*>(data);
+    size_t i = 0;
+    // Check machine-word chunks first.
+    const size_t word_size = sizeof(uintptr_t);
+    const uintptr_t high_mask = sizeof(uintptr_t) == 8
+        ? static_cast<uintptr_t>(0x8080808080808080ULL)
+        : static_cast<uintptr_t>(0x80808080UL);
+    while (i + word_size <= len) {
+        uintptr_t word;
+        memcpy(&word, bytes + i, word_size);
+        if (word & high_mask) {
+            return false;
+        }
+        i += word_size;
+    }
+    while (i < len) {
+        if (bytes[i] & 0x80) {
+            return false;
+        }
+        i++;
+    }
+    return true;
+}
 // Create Napi::String with UTF-8 validation (safe version)
 // Falls back to replacement character representation for invalid UTF-8
-static Napi::String SafeNewString(Napi::Env env, const char* data, size_t len) {
+static napi_value SafeNewStringValue(napi_env env, const char* data, size_t len) {
+    // Short fields are extremely common in CSV; avoid heavier ASCII/UTF-8 scans.
+    if (len <= 32) {
+        bool ascii = true;
+        for (size_t i = 0; i < len; i++) {
+            if (static_cast<unsigned char>(data[i]) & 0x80) {
+                ascii = false;
+                break;
+            }
+        }
+        napi_value short_value = nullptr;
+        if (ascii) {
+            if (napi_create_string_latin1(env, data, len, &short_value) == napi_ok && short_value) {
+                return short_value;
+            }
+        } else {
+            if (napi_create_string_utf8(env, data, len, &short_value) == napi_ok && short_value) {
+                return short_value;
+            }
+        }
+    }
+    // Fastest path: ASCII-only data is valid Latin-1.
+    // Using Latin-1 creation avoids UTF-8 decoding overhead.
+    if (isAllAscii(data, len)) {
+        napi_value latin1_value = nullptr;
+        if (napi_create_string_latin1(env, data, len, &latin1_value) == napi_ok && latin1_value) {
+            return latin1_value;
+        }
+        // Fallback to UTF-8 path if Latin-1 creation fails unexpectedly.
+        napi_value utf8_value = nullptr;
+        napi_create_string_utf8(env, data, len, &utf8_value);
+        return utf8_value;
+    }
     if (isValidUtf8(data, len)) {
-        return Napi::String::New(env, data, len);
+        napi_value utf8_value = nullptr;
+        napi_create_string_utf8(env, data, len, &utf8_value);
+        return utf8_value;
     }
     // Invalid UTF-8 - replace invalid bytes with replacement character
@@ -109,7 +176,13 @@ static Napi::String SafeNewString(Napi::Env env, const char* data, size_t len) {
         }
     }
-    return Napi::String::New(env, safe_str);
+    napi_value safe_value = nullptr;
+    napi_create_string_utf8(env, safe_str.c_str(), safe_str.length(), &safe_value);
+    return safe_value;
+}
+static Napi::String SafeNewString(Napi::Env env, const char* data, size_t len) {
+    return Napi::String(env, SafeNewStringValue(env, data, len));
 }
 // Extended RowCollector that handles transforms
@@ -266,6 +339,76 @@ static void error_cb(void *user, int line, const char *msg) {
     fprintf(stderr, "CSV Parse Error at line %d: %s\n", line, msg);
 }
+class ParseFileWorker final : public Napi::AsyncWorker {
+public:
+    ParseFileWorker(
+        Napi::Env env,
+        std::string path,
+        cisv_config config,
+        Napi::Promise::Deferred deferred
+    ) : Napi::AsyncWorker(env),
+        path_(std::move(path)),
+        config_(config),
+        deferred_(deferred) {}
+    void Execute() override {
+        cisv_result_t *result = cisv_parse_file_batch(path_.c_str(), &config_);
+        if (!result) {
+            SetError("parse error: " + std::string(strerror(errno)));
+            return;
+        }
+        if (result->error_code != 0) {
+            std::string msg = result->error_message[0] ? result->error_message : "parse error";
+            if (msg.rfind("parse error", 0) != 0) {
+                msg = "parse error: " + msg;
+            }
+            SetError(msg);
+            cisv_result_free(result);
+            return;
+        }
+        rows_.reserve(result->row_count);
+        for (size_t i = 0; i < result->row_count; i++) {
+            cisv_row_t *row = &result->rows[i];
+            std::vector<std::string> out_row;
+            out_row.reserve(row->field_count);
+            for (size_t j = 0; j < row->field_count; j++) {
+                out_row.emplace_back(row->fields[j], row->field_lengths[j]);
+            }
+            rows_.emplace_back(std::move(out_row));
+        }
+        cisv_result_free(result);
+    }
+    void OnOK() override {
+        Napi::Env env = Env();
+        Napi::Array out = Napi::Array::New(env, rows_.size());
+        for (size_t i = 0; i < rows_.size(); i++) {
+            Napi::Array row = Napi::Array::New(env, rows_[i].size());
+            for (size_t j = 0; j < rows_[i].size(); j++) {
+                const std::string &field = rows_[i][j];
+                row[j] = SafeNewString(env, field.c_str(), field.length());
+            }
+            out[i] = row;
+        }
+        deferred_.Resolve(out);
+    }
+    void OnError(const Napi::Error &e) override {
+        deferred_.Reject(e.Value());
+    }
+private:
+    std::string path_;
+    cisv_config config_;
+    Napi::Promise::Deferred deferred_;
+    std::vector<std::vector<std::string>> rows_;
+};
 } // namespace
 class CisvParser : public Napi::ObjectWrap<CisvParser> {
@@ -310,6 +453,8 @@ public:
         total_bytes_ = 0;
         is_destroyed_ = false;
         iterator_ = nullptr;
+        batch_result_ = nullptr;
+        stream_buffering_active_ = true;
         // Initialize configuration with defaults
         cisv_config_init(&config_);
@@ -503,6 +648,7 @@ public:
                 delete rc_;
                 rc_ = nullptr;
             }
+            clearBatchResult();
             is_destroyed_ = true;
         }
     }
@@ -528,26 +674,35 @@ public:
         auto start = std::chrono::high_resolution_clock::now();
-        // Clear previous data
-        rc_->rows.clear();
-        rc_->current.clear();
-        rc_->current_field_index = 0;
+        resetRowState();
-        // Set environment for JS transforms
-        rc_->env = env;
-        int result = cisv_parser_parse_file(parser_, path.c_str());
+        int result = 0;
+        if (!hasTransforms()) {
+            cisv_result_t *batch = cisv_parse_file_batch(path.c_str(), &config_);
+            if (!batch) {
+                throw Napi::Error::New(env, "parse error: " + std::string(strerror(errno)));
+            }
+            if (batch->error_code != 0) {
+                std::string msg = batch->error_message[0] ? batch->error_message : "parse error";
+                cisv_result_free(batch);
+                throw Napi::Error::New(env, msg);
+            }
+            clearBatchResult();
+            batch_result_ = batch;
+        } else {
+            // Set environment for JS transforms
+            rc_->env = env;
+            result = cisv_parser_parse_file(parser_, path.c_str());
+            // Clear the environment reference after parsing
+            rc_->env = nullptr;
+            if (result < 0) {
+                throw Napi::Error::New(env, "parse error: " + std::to_string(result));
+            }
+        }
         auto end = std::chrono::high_resolution_clock::now();
         parse_time_ = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
-        // Clear the environment reference after parsing
-        rc_->env = nullptr;
-        if (result < 0) {
-            throw Napi::Error::New(env, "parse error: " + std::to_string(result));
-        }
         return drainRows(env);
     }
@@ -565,22 +720,33 @@ public:
         std::string content = info[0].As<Napi::String>();
-        // Clear previous data
-        rc_->rows.clear();
-        rc_->current.clear();
-        rc_->current_field_index = 0;
+        resetRowState();
-        // Set environment for JS transforms
-        rc_->env = env;
+        if (!hasTransforms()) {
+            cisv_result_t *batch = cisv_parse_string_batch(content.c_str(), content.length(), &config_);
+            if (!batch) {
+                throw Napi::Error::New(env, "parse error: " + std::string(strerror(errno)));
+            }
+            if (batch->error_code != 0) {
+                std::string msg = batch->error_message[0] ? batch->error_message : "parse error";
+                cisv_result_free(batch);
+                throw Napi::Error::New(env, msg);
+            }
+            clearBatchResult();
+            batch_result_ = batch;
+        } else {
+            // Set environment for JS transforms
+            rc_->env = env;
-        // Write the string content as chunks
-        cisv_parser_write(parser_, (const uint8_t*)content.c_str(), content.length());
-        cisv_parser_end(parser_);
+            // Write the string content as chunks
+            cisv_parser_write(parser_, (const uint8_t*)content.c_str(), content.length());
+            cisv_parser_end(parser_);
-        total_bytes_ = content.length();
+            // Clear the environment reference after parsing
+            rc_->env = nullptr;
+        }
-        // Clear the environment reference after parsing
-        rc_->env = nullptr;
+        total_bytes_ = content.length();
         return drainRows(env);
     }
@@ -597,44 +763,95 @@ public:
             throw Napi::TypeError::New(env, "Expected one argument");
         }
+        // Streaming writes produce row-callback data, not batch results.
+        clearBatchResult();
         // Set environment for JS transforms
         rc_->env = env;
+        const uint8_t* chunk_data = nullptr;
+        size_t chunk_size = 0;
+        std::string chunk_storage;
         if (info[0].IsBuffer()) {
             auto buf = info[0].As<Napi::Buffer<uint8_t>>();
-            size_t buf_len = buf.Length();
-            // Check for overflow before adding to total_bytes_
-            if (buf_len > SIZE_MAX - total_bytes_) {
-                throw Napi::Error::New(env, "Total bytes would overflow");
-            }
-            cisv_parser_write(parser_, buf.Data(), buf_len);
-            total_bytes_ += buf_len;
-            return;
+            chunk_data = buf.Data();
+            chunk_size = buf.Length();
+        } else if (info[0].IsString()) {
+            chunk_storage = info[0].As<Napi::String>();
+            chunk_data = reinterpret_cast<const uint8_t*>(chunk_storage.data());
+            chunk_size = chunk_storage.size();
+        } else {
+            throw Napi::TypeError::New(env, "Expected Buffer or String");
+        }
+        // Check for overflow before adding to total_bytes_
+        if (chunk_size > SIZE_MAX - total_bytes_) {
+            throw Napi::Error::New(env, "Total bytes would overflow");
         }
-        if (info[0].IsString()) {
-            std::string chunk = info[0].As<Napi::String>();
-            size_t chunk_size = chunk.size();
-            // Check for overflow before adding to total_bytes_
-            if (chunk_size > SIZE_MAX - total_bytes_) {
-                throw Napi::Error::New(env, "Total bytes would overflow");
+        // Fast streaming mode:
+        // Buffer chunks when no transforms/iterator are active and batch-parse on end().
+        // If buffered payload exceeds threshold, flush once to parser and continue streaming.
+        if (!hasTransforms() && iterator_ == nullptr) {
+            if (chunk_size > SIZE_MAX - pending_stream_.size()) {
+                throw Napi::Error::New(env, "Buffered stream size would overflow");
             }
-            cisv_parser_write(parser_, reinterpret_cast<const uint8_t*>(chunk.data()), chunk_size);
-            total_bytes_ += chunk_size;
-            return;
+            if (stream_buffering_active_) {
+                pending_stream_.append(reinterpret_cast<const char*>(chunk_data), chunk_size);
+                total_bytes_ += chunk_size;
+                if (pending_stream_.size() > kStreamBufferLimitBytes) {
+                    flushPendingStreamToParser();
+                    stream_buffering_active_ = false;
+                }
+                return;
+            }
+        } else if (!pending_stream_.empty()) {
+            flushPendingStreamToParser();
+            stream_buffering_active_ = false;
         }
-        throw Napi::TypeError::New(env, "Expected Buffer or String");
+        cisv_parser_write(parser_, chunk_data, chunk_size);
+        total_bytes_ += chunk_size;
     }
     void End(const Napi::CallbackInfo &info) {
-        if (!is_destroyed_) {
-            cisv_parser_end(parser_);
-            // Clear the environment reference after ending to prevent stale references
+        if (is_destroyed_) {
+            return;
+        }
+        if (stream_buffering_active_ && !pending_stream_.empty() &&
+            !hasTransforms() && iterator_ == nullptr &&
+            rc_ && rc_->rows.empty() && rc_->current.empty()) {
+            cisv_result_t *batch = cisv_parse_string_batch(
+                pending_stream_.data(), pending_stream_.size(), &config_);
+            if (!batch) {
+                throw Napi::Error::New(info.Env(), "parse error: " + std::string(strerror(errno)));
+            }
+            if (batch->error_code != 0) {
+                std::string msg = batch->error_message[0] ? batch->error_message : "parse error";
+                cisv_result_free(batch);
+                throw Napi::Error::New(info.Env(), msg);
+            }
+            clearBatchResult();
+            batch_result_ = batch;
+            pending_stream_.clear();
             rc_->env = nullptr;
-            // Note: JS transforms stored in rc_->js_transforms remain valid
-            // as they are Persistent references managed by the addon lifecycle
+            return;
         }
+        if (!pending_stream_.empty()) {
+            flushPendingStreamToParser();
+            stream_buffering_active_ = false;
+        }
+        cisv_parser_end(parser_);
+        // Clear the environment reference after ending to prevent stale references
+        rc_->env = nullptr;
+        // Note: JS transforms stored in rc_->js_transforms remain valid
+        // as they are Persistent references managed by the addon lifecycle
     }
     Napi::Value GetRows(const Napi::CallbackInfo &info) {
@@ -642,16 +859,23 @@ public:
             Napi::Env env = info.Env();
             throw Napi::Error::New(env, "Parser has been destroyed");
         }
+        if (!pending_stream_.empty()) {
+            flushPendingStreamToParser();
+            stream_buffering_active_ = false;
+        }
         return drainRows(info.Env());
     }
     void Clear(const Napi::CallbackInfo &info) {
         if (!is_destroyed_ && rc_) {
+            clearBatchResult();
             rc_->rows.clear();
             rc_->current.clear();
             rc_->current_field_index = 0;
             total_bytes_ = 0;
             parse_time_ = 0;
+            pending_stream_.clear();
+            stream_buffering_active_ = true;
             // Also clear the environment reference
             rc_->env = nullptr;
         }
@@ -870,11 +1094,26 @@ Napi::Value TransformByName(const Napi::CallbackInfo &info) {
         // Handle JavaScript function transforms by name
         Napi::Function func = info[1].As<Napi::Function>();
-        // Add to the C transform pipeline by name
-        if (cisv_transform_pipeline_add_js_by_name(rc_->pipeline, field_name.c_str(), &func) < 0) {
-            throw Napi::Error::New(env, "Failed to add JS transform for field: " + field_name);
+        if (!rc_->pipeline || !rc_->pipeline->header_fields) {
+            throw Napi::Error::New(env,
+                "Header fields are not set. Call setHeaderFields([...]) before transformByName(..., fn).");
         }
+        int field_index = -1;
+        for (size_t i = 0; i < rc_->pipeline->header_count; i++) {
+            if (strcmp(rc_->pipeline->header_fields[i], field_name.c_str()) == 0) {
+                field_index = static_cast<int>(i);
+                break;
+            }
+        }
+        if (field_index < 0) {
+            throw Napi::Error::New(env, "Unknown field name: " + field_name);
+        }
+        // Store callback in the same map used by applyTransforms().
+        rc_->js_transforms[field_index] = Napi::Persistent(func);
     } else {
         throw Napi::TypeError::New(env, "Transform must be a string type or function");
     }
@@ -1008,6 +1247,11 @@ Napi::Value RemoveTransformByName(const Napi::CallbackInfo &info) {
         }
         // Clear JavaScript transforms
+        for (auto &pair : rc_->js_transforms) {
+            if (!pair.second.IsEmpty()) {
+                pair.second.Reset();
+            }
+        }
         rc_->js_transforms.clear();
         // Clear C transforms - destroy and DON'T recreate pipeline yet
@@ -1033,18 +1277,32 @@ Napi::Value RemoveTransformByName(const Napi::CallbackInfo &info) {
         std::string path = info[0].As<Napi::String>();
-        // Create a promise
         auto deferred = Napi::Promise::Deferred::New(env);
-        // For simplicity, we'll use sync parsing here
-        // FIXME: In production, this should use worker threads
-        try {
-            Napi::Value result = ParseSync(info);
-            deferred.Resolve(result);
-        } catch (const Napi::Error& e) {
-            deferred.Reject(e.Value());
+        // Preserve behavior for transform-enabled parsers (native + JS transforms)
+        // until async transform execution is implemented.
+        bool has_c_transforms = rc_ && rc_->pipeline && rc_->pipeline->count > 0;
+        bool has_js_transforms = rc_ && !rc_->js_transforms.empty();
+        if (has_c_transforms || has_js_transforms) {
+            try {
+                Napi::Value result = ParseSync(info);
+                deferred.Resolve(result);
+            } catch (const Napi::Error &e) {
+                deferred.Reject(e.Value());
+            }
+            return deferred.Promise();
         }
+        // Use batch parser in a worker thread to avoid blocking the event loop.
+        cisv_config worker_config = config_;
+        worker_config.field_cb = nullptr;
+        worker_config.row_cb = nullptr;
+        worker_config.error_cb = nullptr;
+        worker_config.user = nullptr;
+        auto *worker = new ParseFileWorker(env, path, worker_config, deferred);
+        worker->Queue();
         return deferred.Promise();
     }
@@ -1090,10 +1348,22 @@ Napi::Value RemoveTransformByName(const Napi::CallbackInfo &info) {
         }
         Napi::Object stats = Napi::Object::New(env);
+        size_t row_count = 0;
+        size_t field_count = 0;
+        if (batch_result_) {
+            row_count = batch_result_->row_count;
+            if (batch_result_->row_count > 0) {
+                field_count = batch_result_->rows[0].field_count;
+            }
+        } else if (rc_) {
+            row_count = rc_->rows.size();
+            if (!rc_->rows.empty()) {
+                field_count = rc_->rows[0].size();
+            }
+        }
-        stats.Set("rowCount", Napi::Number::New(env, rc_ ? rc_->rows.size() : 0));
-        stats.Set("fieldCount", Napi::Number::New(env,
-            (rc_ && !rc_->rows.empty()) ? rc_->rows[0].size() : 0));
+        stats.Set("rowCount", Napi::Number::New(env, row_count));
+        stats.Set("fieldCount", Napi::Number::New(env, field_count));
         stats.Set("totalBytes", Napi::Number::New(env, total_bytes_));
         stats.Set("parseTime", Napi::Number::New(env, parse_time_));
         stats.Set("currentLine", Napi::Number::New(env,
@@ -1233,14 +1503,13 @@ Napi::Value RemoveTransformByName(const Napi::CallbackInfo &info) {
             throw Napi::Error::New(env, "Error reading CSV row");
         }
-        // Create array of strings for the row
-        Napi::Array row = Napi::Array::New(env, field_count);
+        napi_value row;
+        napi_create_array_with_length(env, field_count, &row);
         for (size_t i = 0; i < field_count; i++) {
-            // SECURITY: Use safe string creation to handle invalid UTF-8
-            row.Set(i, SafeNewString(env, fields[i], lengths[i]));
+            napi_set_element(env, row, i, SafeNewStringValue(env, fields[i], lengths[i]));
         }
-        return row;
+        return Napi::Value(env, row);
     }
     /**
@@ -1263,27 +1532,94 @@ Napi::Value RemoveTransformByName(const Napi::CallbackInfo &info) {
     }
 private:
+    void clearBatchResult() {
+        if (batch_result_) {
+            cisv_result_free(batch_result_);
+            batch_result_ = nullptr;
+        }
+    }
+    bool hasTransforms() const {
+        bool has_c_transforms = rc_ && rc_->pipeline && rc_->pipeline->count > 0;
+        bool has_js_transforms = rc_ && !rc_->js_transforms.empty();
+        return has_c_transforms || has_js_transforms;
+    }
+    void resetRowState() {
+        clearBatchResult();
+        pending_stream_.clear();
+        stream_buffering_active_ = true;
+        if (!rc_) return;
+        rc_->rows.clear();
+        rc_->current.clear();
+        rc_->current_field_index = 0;
+    }
+    void flushPendingStreamToParser() {
+        if (pending_stream_.empty()) {
+            return;
+        }
+        cisv_parser_write(
+            parser_,
+            reinterpret_cast<const uint8_t*>(pending_stream_.data()),
+            pending_stream_.size());
+        pending_stream_.clear();
+    }
+    void loadRowsFromBatch(const cisv_result_t *result) {
+        if (!rc_ || !result) return;
+        rc_->rows.clear();
+        rc_->rows.reserve(result->row_count);
+        for (size_t i = 0; i < result->row_count; i++) {
+            const cisv_row_t *row = &result->rows[i];
+            std::vector<std::string> out_row;
+            out_row.reserve(row->field_count);
+            for (size_t j = 0; j < row->field_count; j++) {
+                out_row.emplace_back(row->fields[j], row->field_lengths[j]);
+            }
+            rc_->rows.emplace_back(std::move(out_row));
+        }
+    }
     Napi::Value drainRows(Napi::Env env) {
+        if (batch_result_) {
+            napi_value rows;
+            napi_create_array_with_length(env, batch_result_->row_count, &rows);
+            for (size_t i = 0; i < batch_result_->row_count; ++i) {
+                const cisv_row_t *src_row = &batch_result_->rows[i];
+                napi_value row;
+                napi_create_array_with_length(env, src_row->field_count, &row);
+                for (size_t j = 0; j < src_row->field_count; ++j) {
+                    napi_set_element(env, row, j, SafeNewStringValue(env, src_row->fields[j], src_row->field_lengths[j]));
+                }
+                napi_set_element(env, rows, i, row);
+            }
+            return Napi::Value(env, rows);
+        }
         if (!rc_) {
             return Napi::Array::New(env, 0);
         }
-        Napi::Array rows = Napi::Array::New(env, rc_->rows.size());
+        napi_value rows;
+        napi_create_array_with_length(env, rc_->rows.size(), &rows);
         for (size_t i = 0; i < rc_->rows.size(); ++i) {
-            Napi::Array row = Napi::Array::New(env, rc_->rows[i].size());
+            napi_value row;
+            napi_create_array_with_length(env, rc_->rows[i].size(), &row);
             for (size_t j = 0; j < rc_->rows[i].size(); ++j) {
                 // SECURITY: Use safe string creation to handle invalid UTF-8 in CSV data
                 const std::string& field = rc_->rows[i][j];
-                row[j] = SafeNewString(env, field.c_str(), field.length());
+                napi_set_element(env, row, j, SafeNewStringValue(env, field.c_str(), field.length()));
             }
-            rows[i] = row;
+            napi_set_element(env, rows, i, row);
         }
         // Don't clear here if we want to keep data for multiple reads
         // rc_->rows.clear();
-        return rows;
+        return Napi::Value(env, rows);
     }
     cisv_parser *parser_;
@@ -1293,6 +1629,10 @@ private:
     double parse_time_;
     bool is_destroyed_;
     cisv_iterator_t *iterator_;  // For row-by-row iteration
+    cisv_result_t *batch_result_;
+    std::string pending_stream_;
+    bool stream_buffering_active_;
+    static constexpr size_t kStreamBufferLimitBytes = 8 * 1024 * 1024;
 };
 // Initialize all exports

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "cisv",
-  "version": "0.2.5",
+  "version": "0.3.2",
   "description": "The csv parser on steroids.",
   "author": "sanix<s4nixd@gmail.com>",
   "main": "./build/Release/cisv.node",