npm - cisv - Versions diffs - 0.0.33 → 0.0.41 - Mend

cisv 0.0.33 → 0.0.41

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/README.md CHANGED Viewed

@@ -19,44 +19,63 @@ I wrote about basics in a blog post, you can read here :https://sanixdk.xyz/blog
 - SIMD accelerated with AVX-512/AVX2 auto-detection
 - Dynamic lookup tables for configurable parsing
+## CLI BENCHMARKS WITH DOCKER
+```bash
+$ docker build -t cisv-benchmark .
+```
+To run them... choosing some specs for the container to size resources, you can :
+```bash
+$ docker run --rm      \
+    --cpus="2.0"       \
+    --memory="4g"      \
+    --memory-swap="4g" \
+    --cpu-shares=1024  \
+    --security-opt     \
+    seccomp=unconfined \
+    cisv-benchmark
+```
 ## BENCHMARKS
 Benchmarks comparison with existing popular tools,
-cf pipeline you can check : (https://github.com/Sanix-Darker/cisv/actions/runs/17194915214/job/48775516036)
+cf pipeline you can check : (https://github.com/Sanix-Darker/cisv/actions/runs/17697547058/job/50298916576) a step "Publish to npm"
 ### SYNCHRONOUS RESULTS
 | Library            | Speed (MB/s) | Avg Time (ms) | Operations/sec |
 |--------------------|--------------|---------------|----------------|
-| cisv (sync)        | 30.04        | 0.02          | 64936          |
-| csv-parse (sync)   | 13.35        | 0.03          | 28870          |
-| papaparse (sync)   | 25.16        | 0.02          | 54406          |
+| cisv (sync)        | 45.58        | 0.01          | 98543          |
+| csv-parse (sync)   | 18.11        | 0.03          | 39155          |
+| papaparse (sync)   | 28.03        | 0.02          | 60596          |
 ### SYNCHRONOUS RESULTS (WITH DATA ACCESS)
 | Library            | Speed (MB/s) | Avg Time (ms) | Operations/sec |
 |--------------------|--------------|---------------|----------------|
-| cisv (sync)        | 31.24        | 0.01          | 67543          |
-| csv-parse (sync)   | 15.42        | 0.03          | 33335          |
-| papaparse (sync)   | 25.49        | 0.02          | 55107          |
+| cisv (sync)        | 46.80        | 0.01          | 101185         |
+| csv-parse (sync)   | 18.92        | 0.02          | 40900          |
+| papaparse (sync)   | 28.38        | 0.02          | 61363          |
 ### ASYNCHRONOUS RESULTS
 | Library                  | Speed (MB/s) | Avg Time (ms) | Operations/sec |
 |--------------------------|--------------|---------------|----------------|
-| cisv (async/stream)      | 61.31        | 0.01          | 132561         |
-| papaparse (async/stream) | 19.24        | 0.02          | 41603          |
-| neat-csv (async/promise) | 9.09         | 0.05          | 19655          |
+| cisv (async/stream)      | 70.07        | 0.01          | 151485         |
+| papaparse (async/stream) | 21.58        | 0.02          | 46646          |
+| neat-csv (async/promise) | 9.77         | 0.05          | 21126          |
 ### ASYNCHRONOUS RESULTS (WITH DATA ACCESS)
 | Library                  | Speed (MB/s) | Avg Time (ms) | Operations/sec |
 |--------------------------|--------------|---------------|----------------|
-| cisv (async/stream)      | 24.59        | 0.02          | 53160          |
-| papaparse (async/stream) | 21.86        | 0.02          | 47260          |
-| neat-csv (async/promise) | 9.38         | 0.05          | 20283          |
+| cisv (async/stream)      | 25.23        | 0.02          | 54545          |
+| papaparse (async/stream) | 22.49        | 0.02          | 48622          |
+| neat-csv (async/promise) | 9.91         | 0.05          | 21428          |
 ## INSTALLATION

package/cisv/cisv_addon.cc CHANGED Viewed

@@ -174,6 +174,8 @@ public:
         // Initialize configuration with defaults
         cisv_config_init(&config_);
+        config_.max_row_size = 0;
         // Handle constructor options if provided
         if (info.Length() > 0 && info[0].IsObject()) {
             Napi::Object options = info[0].As<Napi::Object>();
@@ -261,7 +263,10 @@ public:
         // Numeric options
         if (options.Has("maxRowSize")) {
-            config_.max_row_size = options.Get("maxRowSize").As<Napi::Number>().Uint32Value();
+            Napi::Value val = options.Get("maxRowSize");
+            if (!val.IsNull() && !val.IsUndefined()) {
+                config_.max_row_size = val.As<Napi::Number>().Uint32Value();
+            }
         }
         if (options.Has("fromLine")) {

package/cisv/cisv_parser.c CHANGED Viewed

@@ -6,7 +6,6 @@
 #include <errno.h>
 #include <time.h>
 #include <stdbool.h>
-#include <ctype.h>
 // NOTE: not dealing with windows for now, too much issues
 #include <sys/mman.h>
 #include <fcntl.h>
@@ -14,10 +13,17 @@
 #include <getopt.h>
 #include <sys/time.h>
 #include "cisv_parser.h"
-#include "cisv_simd.h"
-#define RINGBUF_SIZE (1 << 20) // 1 MiB (we may adjust according to needs)
-// #define RINGBUF_SIZE (1 << 16) // 64kb (for memory safe reasons)
+#ifdef __AVX512F__
+#include <immintrin.h>
+#endif
+#ifdef __AVX2__
+#include <immintrin.h>
+#endif
+#define RINGBUF_SIZE (256 * 1024)
+#define DIRECT_PARSE_THRESHOLD (64 * 1024)  // Parse directly if chunk > 64KB
 #define PREFETCH_DISTANCE 256
 struct cisv_parser {
@@ -95,133 +101,285 @@ void cisv_config_init(cisv_config *config) {
 static void init_tables(cisv_parser *parser) {
     if (parser->tables_initialized) return;
-    // Allocate tables if not already allocated
+    // Allocate both tables in one allocation for better cache locality
     if (!parser->state_table) {
-        parser->state_table = calloc(4 * 256, sizeof(uint8_t));
-        parser->action_table = calloc(4 * 256, sizeof(uint8_t));
-        if (!parser->state_table || !parser->action_table) {
-            return;  // Handle allocation failure gracefully
-        }
+        parser->state_table = aligned_alloc(64, 8 * 256);  // Align to cache line
+        if (!parser->state_table) return;
+        parser->action_table = parser->state_table + (4 * 256);
+        memset(parser->state_table, 0, 8 * 256);
     }
-    // Get table pointers for easier access
-    uint8_t (*state_table)[256] = (uint8_t (*)[256])parser->state_table;
-    uint8_t (*action_table)[256] = (uint8_t (*)[256])parser->action_table;
+    uint8_t (*st)[256] = (uint8_t (*)[256])parser->state_table;
+    uint8_t (*at)[256] = (uint8_t (*)[256])parser->action_table;
+    // Unroll initialization loops for better performance
+    // Pre-calculate commonly used values
+    const uint8_t q = parser->quote;
+    const uint8_t d = parser->delimiter;
+    const uint8_t e = parser->escape;
+    const uint8_t c = parser->comment;
+    // Initialize with SIMD where possible
+    #ifdef __AVX2__
+    __m256i unquoted_state = _mm256_set1_epi8(S_UNQUOTED);
+    __m256i quoted_state = _mm256_set1_epi8(S_QUOTED);
+    __m256i comment_state = _mm256_set1_epi8(S_COMMENT);
+    for (int i = 0; i < 256; i += 32) {
+        _mm256_store_si256((__m256i*)&st[S_UNQUOTED][i], unquoted_state);
+        _mm256_store_si256((__m256i*)&st[S_QUOTED][i], quoted_state);
+        _mm256_store_si256((__m256i*)&st[S_COMMENT][i], comment_state);
+    }
+    #else
+    memset(st[S_UNQUOTED], S_UNQUOTED, 256);
+    memset(st[S_QUOTED], S_QUOTED, 256);
+    memset(st[S_COMMENT], S_COMMENT, 256);
+    #endif
-    // Initialize state transitions
-    for (int c = 0; c < 256; c++) {
-        // S_UNQUOTED transitions
-        state_table[S_UNQUOTED][c] = S_UNQUOTED;
-        if (c == parser->quote) {
-            state_table[S_UNQUOTED][c] = S_QUOTED;
-        } else if (parser->comment && c == parser->comment) {
-            state_table[S_UNQUOTED][c] = S_COMMENT;
-        }
+    // Set special transitions
+    st[S_UNQUOTED][q] = S_QUOTED;
+    if (c) st[S_UNQUOTED][c] = S_COMMENT;
-        // S_QUOTED transitions
-        state_table[S_QUOTED][c] = S_QUOTED;
-        if (parser->escape && c == parser->escape) {
-            state_table[S_QUOTED][c] = S_QUOTE_ESC;
-        } else if (c == parser->quote) {
-            state_table[S_QUOTED][c] = S_QUOTE_ESC;
-        }
+    if (e) {
+        st[S_QUOTED][e] = S_QUOTE_ESC;
+        memset(st[S_QUOTE_ESC], S_QUOTED, 256);
+    } else {
+        st[S_QUOTED][q] = S_QUOTE_ESC;
+        memset(st[S_QUOTE_ESC], S_UNQUOTED, 256);
+        st[S_QUOTE_ESC][q] = S_QUOTED;
+    }
-        // S_QUOTE_ESC transitions
-        if (parser->escape) {
-            // With explicit escape character, always return to quoted state
-            state_table[S_QUOTE_ESC][c] = S_QUOTED;
-        } else {
-            // RFC4180-style: "" becomes a literal quote
-            if (c == parser->quote) {
-                state_table[S_QUOTE_ESC][c] = S_QUOTED;
-            } else {
-                state_table[S_QUOTE_ESC][c] = S_UNQUOTED;
-            }
-        }
+    st[S_COMMENT]['\n'] = S_UNQUOTED;
-        // S_COMMENT transitions - stay in comment until newline
-        state_table[S_COMMENT][c] = S_COMMENT;
-        if (c == '\n') {
-            state_table[S_COMMENT][c] = S_UNQUOTED;
+    // Initialize actions with minimal branches
+    memset(at, ACT_NONE, 4 * 256);
+    at[S_UNQUOTED][d] = ACT_FIELD;
+    at[S_UNQUOTED]['\n'] = ACT_FIELD | ACT_ROW;
+    at[S_UNQUOTED]['\r'] = ACT_FIELD;
+    if (!e) {
+        // Vectorize the action table initialization
+        for (int i = 0; i < 256; i++) {
+            at[S_QUOTE_ESC][i] = (i != q) ? ACT_REPROCESS : ACT_NONE;
         }
     }
-    // Initialize action table
-    memset(action_table, ACT_NONE, 4 * 256);
+    // Use SIMD for comment actions
+    #ifdef __AVX2__
+    __m256i skip_act = _mm256_set1_epi8(ACT_SKIP);
+    for (int i = 0; i < 256; i += 32) {
+        _mm256_store_si256((__m256i*)&at[S_COMMENT][i], skip_act);
+    }
+    #else
+    memset(at[S_COMMENT], ACT_SKIP, 256);
+    #endif
+    at[S_COMMENT]['\n'] = ACT_ROW;
-    // S_UNQUOTED actions
-    action_table[S_UNQUOTED][(uint8_t)parser->delimiter] = ACT_FIELD;
-    action_table[S_UNQUOTED]['\n'] = ACT_FIELD | ACT_ROW;
-    action_table[S_UNQUOTED]['\r'] = ACT_FIELD;  // Handle CRLF
+    parser->tables_initialized = 1;
+}
-    // S_QUOTE_ESC actions
-    if (!parser->escape) {
-        // RFC4180-style: reprocess non-quote characters
-        for (int c = 0; c < 256; c++) {
-            if (c != parser->quote) {
-                action_table[S_QUOTE_ESC][c] = ACT_REPROCESS;
+// SIMD-optimized whitespace detection lookup table
+// Ultra-fast trimming with AVX512/AVX2
+static inline const uint8_t* trim_start(const uint8_t *start, const uint8_t *end) {
+    size_t len = end - start;
+#ifdef __AVX512F__
+    if (len >= 64) {
+        const __m512i max_ws = _mm512_set1_epi8(32);
+        while (len >= 64) {
+            __m512i chunk = _mm512_loadu_si512(start);
+            __mmask64 is_ws = _mm512_cmple_epu8_mask(chunk, max_ws);
+            if (is_ws != 0xFFFFFFFFFFFFFFFFULL) {
+                return start + __builtin_ctzll(~is_ws);
             }
+            start += 64;
+            len -= 64;
         }
     }
+#elif defined(__AVX2__)
+    if (len >= 32) {
+        const __m256i max_ws = _mm256_set1_epi8(32);
+        while (len >= 32) {
+            __m256i chunk = _mm256_loadu_si256((__m256i*)start);
+            __m256i cmp = _mm256_cmpgt_epi8(chunk, max_ws);
+            uint32_t mask = _mm256_movemask_epi8(cmp);
-    // S_COMMENT actions - skip everything except newline
-    for (int c = 0; c < 256; c++) {
-        action_table[S_COMMENT][c] = ACT_SKIP;
+            if (mask) {
+                return start + __builtin_ctz(mask);
+            }
+            start += 32;
+            len -= 32;
+        }
     }
-    action_table[S_COMMENT]['\n'] = ACT_ROW;
+#endif
-    parser->tables_initialized = 1;
-}
+    // Unrolled 8-byte processing
+    while (len >= 8) {
+        uint64_t v = *(uint64_t*)start;
+        uint64_t has_non_ws = ((v & 0xE0E0E0E0E0E0E0E0ULL) != 0) |
+                              ((v & 0x1F1F1F1F1F1F1F1FULL) > 0x0D0D0D0D0D0D0D0DULL);
+        if (has_non_ws) {
+            for (int i = 0; i < 8; i++) {
+                if ((uint8_t)(v >> (i*8)) > 32) return start + i;
+            }
+        }
+        start += 8;
+        len -= 8;
+    }
-static inline const uint8_t* trim_start(const uint8_t *start, const uint8_t *end) {
-    while (start < end && isspace(*start)) start++;
-    return start;
-}
+    // 4-byte processing
+    if (len >= 4) {
+        uint32_t v = *(uint32_t*)start;
+        for (int i = 0; i < 4; i++) {
+            uint8_t c = (v >> (i*8)) & 0xFF;
+            if (c > 32) return start + i;
+        }
+        start += 4;
+        len -= 4;
+    }
+    // Remainder
+    switch(len) {
+        case 3: if (*start > 32) return start; start++;
+                /* fallthrough */
+        case 2: if (*start > 32) return start; start++;
+                /* fallthrough */
+        case 1: if (*start > 32) return start; start++;
+    }
-static inline const uint8_t* trim_end(const uint8_t *start, const uint8_t *end) {
-    while (end > start && isspace(*(end - 1))) end--;
     return end;
 }
-static inline void yield_field(cisv_parser *parser, const uint8_t *start, const uint8_t *end) {
-    // Apply trimming if configured
-    if (parser->trim) {
-        start = trim_start(start, end);
-        end = trim_end(start, end);
+static inline const uint8_t* trim_end(const uint8_t *start, const uint8_t *end) {
+    size_t len = end - start;
+#ifdef __AVX512F__
+    while (len >= 64) {
+        const uint8_t *check = end - 64;
+        __m512i chunk = _mm512_loadu_si512(check);
+        const __m512i max_ws = _mm512_set1_epi8(32);
+        __mmask64 is_non_ws = _mm512_cmpgt_epu8_mask(chunk, max_ws);
+        if (is_non_ws) {
+            int last_non_ws = 63 - __builtin_clzll(is_non_ws);
+            return check + last_non_ws + 1;
+        }
+        end -= 64;
+        len -= 64;
+    }
+#elif defined(__AVX2__)
+    while (len >= 32) {
+        const uint8_t *check = end - 32;
+        __m256i chunk = _mm256_loadu_si256((__m256i*)check);
+        const __m256i max_ws = _mm256_set1_epi8(32);
+        __m256i cmp = _mm256_cmpgt_epi8(chunk, max_ws);
+        uint32_t mask = _mm256_movemask_epi8(cmp);
+        if (mask) {
+            int last_non_ws = 31 - __builtin_clz(mask);
+            return check + last_non_ws + 1;
+        }
+        end -= 32;
+        len -= 32;
     }
+#endif
+    // Unrolled 8-byte processing
+    while (len >= 8) {
+        const uint8_t *check = end - 8;
+        uint64_t v = *(uint64_t*)check;
-    // Branchless check: multiply callback by validity flag
-    size_t valid = (parser->fcb != NULL) & (start != NULL) & (end != NULL) & (end >= start);
-    if (valid) {
-        parser->fcb(parser->user, (const char *)start, (size_t)(end - start));
+        for (int i = 7; i >= 0; i--) {
+            if ((uint8_t)(v >> (i*8)) > 32) return check + i + 1;
+        }
+        end -= 8;
+        len -= 8;
     }
-}
-static inline void yield_row(cisv_parser *parser) {
-    // Check if we should skip empty lines
-    if (parser->skip_empty_lines && parser->field_start == parser->row_start) {
-        parser->row_start = parser->field_start;
-        return;
+    // 4-byte processing
+    if (len >= 4) {
+        const uint8_t *check = end - 4;
+        uint32_t v = *(uint32_t*)check;
+        for (int i = 3; i >= 0; i--) {
+            if ((uint8_t)(v >> (i*8)) > 32) return check + i + 1;
+        }
+        end -= 4;
+        len -= 4;
     }
-    // Check line range
-    if (parser->current_line < parser->from_line) {
-        parser->current_line++;
-        parser->row_start = parser->field_start;
-        return;
+    // Remainder
+    while (len-- > 0) {
+        if (*(--end) > 32) return end + 1;
     }
-    if (parser->to_line > 0 && parser->current_line > parser->to_line) {
-        return;
+    return start;
+}
+// yield_field with prefetching and branchless code
+static inline void yield_field(cisv_parser *parser, const uint8_t *start, const uint8_t *end) {
+    // Prefetch parser structure for next access
+    __builtin_prefetch(parser, 0, 3);
+    // Branchless trimming using conditional move
+    const uint8_t *s = start;
+    const uint8_t *e = end;
+    // Use conditional assignment instead of branch
+    const uint8_t *trimmed_s = trim_start(s, e);
+    const uint8_t *trimmed_e = trim_end(trimmed_s, e);
+    // Branchless selection: if trim is 0, use original, if 1, use trimmed
+    uintptr_t mask = -(uintptr_t)parser->trim;
+    s = (const uint8_t*)(((uintptr_t)trimmed_s & mask) | ((uintptr_t)s & ~mask));
+    e = (const uint8_t*)(((uintptr_t)trimmed_e & mask) | ((uintptr_t)e & ~mask));
+    // Combine all conditions into single branch
+    uintptr_t fcb_addr = (uintptr_t)parser->fcb;
+    uintptr_t valid_mask = -(fcb_addr != 0);
+    valid_mask &= -(s != 0);
+    valid_mask &= -(e != 0);
+    valid_mask &= -(e >= s);
+    // Single branch for callback execution
+    if (valid_mask) {
+        // Prefetch user data for callback
+        __builtin_prefetch(parser->user, 0, 1);
+        parser->fcb(parser->user, (const char *)s, (size_t)(e - s));
     }
+}
-    if (parser->rcb) {
+// yield_row with reduced branches
+static inline void yield_row(cisv_parser *parser) {
+    // Prefetch frequently accessed memory
+    __builtin_prefetch(&parser->current_line, 1, 3);
+    __builtin_prefetch(&parser->row_start, 1, 3);
+    // Compute all conditions upfront
+    int is_empty_line = (parser->field_start == parser->row_start);
+    int skip_empty = parser->skip_empty_lines & is_empty_line;
+    int before_range = (parser->current_line < parser->from_line);
+    int after_range = (parser->to_line > 0) & (parser->current_line > parser->to_line);
+    int in_range = !before_range & !after_range;
+    // Branchless increment of current_line (always happens except when after range)
+    parser->current_line += !after_range;
+    // Branchless update of row_start (happens except when after range)
+    uintptr_t new_row_start = (uintptr_t)parser->field_start;
+    uintptr_t old_row_start = (uintptr_t)parser->row_start;
+    parser->row_start = (uint8_t*)((old_row_start & -after_range) | (new_row_start & ~(-after_range)));
+    // Branchless reset of row_size
+    parser->current_row_size &= after_range;
+    // Single branch for callback (most common case last for better prediction)
+    if ((!skip_empty) & in_range & (parser->rcb != NULL)) {
+        __builtin_prefetch(parser->user, 0, 1);
         parser->rcb(parser->user);
     }
-    parser->current_line++;
-    parser->row_start = parser->field_start;
-    parser->current_row_size = 0;
 }
 static inline void handle_error(cisv_parser *parser, const char *msg) {
@@ -334,6 +492,8 @@ static void parse_simd_chunk(cisv_parser *parser, const uint8_t *buffer, size_t
                 // Handle newline
                 if (is_newline) {
                     yield_row(parser);
+                    parser->current_row_size = 0;
+                    parser->row_start = special_pos + 1;
                 }
                 // Update state branchlessly
@@ -626,6 +786,7 @@ static void parse_simd_chunk(cisv_parser *parser, const uint8_t *buffer, size_t
         if (action & ACT_ROW) {
             yield_row(parser);
             parser->current_row_size = 0;
+            parser->row_start = cur + 1;
         }
         cur += 1 - ((action & ACT_REPROCESS) >> 2);
@@ -648,6 +809,7 @@ static int parse_memory(cisv_parser *parser, const uint8_t *buffer, size_t len)
         // Yield final row if there's content
         if (parser->field_start > parser->row_start || !parser->skip_empty_lines) {
             yield_row(parser);
+            parser->current_row_size = 0;
         }
     }
     return 0;
@@ -865,24 +1027,36 @@ int cisv_parser_parse_file(cisv_parser *parser, const char *path) {
 }
 int cisv_parser_write(cisv_parser *parser, const uint8_t *chunk, size_t len) {
-    if (!parser || !chunk || len >= RINGBUF_SIZE) return -EINVAL;
+    if (!parser || !chunk) return -EINVAL;
-    // Branchless overflow handling
-    size_t overflow = (parser->head + len > RINGBUF_SIZE);
-    if (overflow) {
-        parse_memory(parser, parser->ring, parser->head);
-        parser->head = 0;
+    // For large chunks, bypass ring buffer entirely
+    if (len > DIRECT_PARSE_THRESHOLD) {
+        // Parse directly - this is actually FASTER for large data
+        return parse_memory(parser, chunk, len);
+    }
+    // Small chunks use ring buffer for efficiency
+    if (parser->head + len > RINGBUF_SIZE) {
+        // Flush current buffer
+        if (parser->head > 0) {
+            parse_memory(parser, parser->ring, parser->head);
+            parser->head = 0;
+        }
+        // If still too large, parse directly
+        if (len > RINGBUF_SIZE) {
+            return parse_memory(parser, chunk, len);
+        }
     }
     memcpy(parser->ring + parser->head, chunk, len);
     parser->head += len;
-    // Check for newline or buffer threshold
-    uint8_t has_newline = (memchr(chunk, '\n', len) != NULL);
-    uint8_t threshold = (parser->head > (RINGBUF_SIZE / 2));
-    if (has_newline | threshold) {
-        parse_memory(parser, parser->ring, parser->head);
+    // Process on newline or when buffer is getting full
+    if (memchr(chunk, '\n', len) || parser->head > (RINGBUF_SIZE * 3 / 4)) {
+        int result = parse_memory(parser, parser->ring, parser->head);
         parser->head = 0;
+        return result;
     }
     return 0;
 }

package/cisv/cisv_transformer.c CHANGED Viewed

@@ -5,12 +5,15 @@
 #include <ctype.h>
 #include <stdio.h>
+#ifdef __AVX512F__
+#include <immintrin.h>
+#endif
 #ifdef __AVX2__
 #include <immintrin.h>
 #endif
 #define TRANSFORM_POOL_SIZE (1 << 20)  // 1MB default pool
-// #define TRANSFORM_POOL_SIZE (1 << 16)  // 64kb (for memory safe reasons)
 #define SIMD_ALIGNMENT 64
 // Create transform pipeline

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "cisv",
-  "version": "0.0.33",
+  "version": "0.0.41",
   "description": "The fastest csv parser of the multiverse",
   "author": "sanix<s4nixd@gmail.com>",
   "main": "./build/Release/cisv.node",
@@ -9,7 +9,7 @@
     "install": "node-gyp rebuild",
     "build": "node-gyp rebuild",
     "test": "mocha ./tests/*.test.js && bash ./test_transform.sh",
-    "test:build": "npm run test",
+    "test:build": "npm run build && npm run test",
     "benchmark": "node benchmark/benchmark.js",
     "lint": "clang-format -i cisv/*.{cc,h}",
     "prepublishOnly": "npm run benchmark",