cisv 0.0.40 → 0.0.41

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -41,41 +41,41 @@ $ docker run --rm \
41
41
  ## BENCHMARKS
42
42
 
43
43
  Benchmarks comparison with existing popular tools,
44
- cf pipeline you can check : (https://github.com/Sanix-Darker/cisv/actions/runs/17194915214/job/48775516036)
44
+ cf pipeline you can check : (https://github.com/Sanix-Darker/cisv/actions/runs/17697547058/job/50298916576) a step "Publish to npm"
45
45
 
46
46
  ### SYNCHRONOUS RESULTS
47
47
 
48
48
  | Library | Speed (MB/s) | Avg Time (ms) | Operations/sec |
49
49
  |--------------------|--------------|---------------|----------------|
50
- | cisv (sync) | 30.04 | 0.02 | 64936 |
51
- | csv-parse (sync) | 13.35 | 0.03 | 28870 |
52
- | papaparse (sync) | 25.16 | 0.02 | 54406 |
50
+ | cisv (sync) | 45.58 | 0.01 | 98543 |
51
+ | csv-parse (sync) | 18.11 | 0.03 | 39155 |
52
+ | papaparse (sync) | 28.03 | 0.02 | 60596 |
53
53
 
54
54
  ### SYNCHRONOUS RESULTS (WITH DATA ACCESS)
55
55
 
56
56
  | Library | Speed (MB/s) | Avg Time (ms) | Operations/sec |
57
57
  |--------------------|--------------|---------------|----------------|
58
- | cisv (sync) | 31.24 | 0.01 | 67543 |
59
- | csv-parse (sync) | 15.42 | 0.03 | 33335 |
60
- | papaparse (sync) | 25.49 | 0.02 | 55107 |
58
+ | cisv (sync) | 46.80 | 0.01 | 101185 |
59
+ | csv-parse (sync) | 18.92 | 0.02 | 40900 |
60
+ | papaparse (sync) | 28.38 | 0.02 | 61363 |
61
61
 
62
62
 
63
63
  ### ASYNCHRONOUS RESULTS
64
64
 
65
65
  | Library | Speed (MB/s) | Avg Time (ms) | Operations/sec |
66
66
  |--------------------------|--------------|---------------|----------------|
67
- | cisv (async/stream) | 61.31 | 0.01 | 132561 |
68
- | papaparse (async/stream) | 19.24 | 0.02 | 41603 |
69
- | neat-csv (async/promise) | 9.09 | 0.05 | 19655 |
67
+ | cisv (async/stream) | 70.07 | 0.01 | 151485 |
68
+ | papaparse (async/stream) | 21.58 | 0.02 | 46646 |
69
+ | neat-csv (async/promise) | 9.77 | 0.05 | 21126 |
70
70
 
71
71
 
72
72
  ### ASYNCHRONOUS RESULTS (WITH DATA ACCESS)
73
73
 
74
74
  | Library | Speed (MB/s) | Avg Time (ms) | Operations/sec |
75
75
  |--------------------------|--------------|---------------|----------------|
76
- | cisv (async/stream) | 24.59 | 0.02 | 53160 |
77
- | papaparse (async/stream) | 21.86 | 0.02 | 47260 |
78
- | neat-csv (async/promise) | 9.38 | 0.05 | 20283 |
76
+ | cisv (async/stream) | 25.23 | 0.02 | 54545 |
77
+ | papaparse (async/stream) | 22.49 | 0.02 | 48622 |
78
+ | neat-csv (async/promise) | 9.91 | 0.05 | 21428 |
79
79
 
80
80
  ## INSTALLATION
81
81
 
@@ -174,6 +174,8 @@ public:
174
174
  // Initialize configuration with defaults
175
175
  cisv_config_init(&config_);
176
176
 
177
+ config_.max_row_size = 0;
178
+
177
179
  // Handle constructor options if provided
178
180
  if (info.Length() > 0 && info[0].IsObject()) {
179
181
  Napi::Object options = info[0].As<Napi::Object>();
@@ -261,7 +263,10 @@ public:
261
263
 
262
264
  // Numeric options
263
265
  if (options.Has("maxRowSize")) {
264
- config_.max_row_size = options.Get("maxRowSize").As<Napi::Number>().Uint32Value();
266
+ Napi::Value val = options.Get("maxRowSize");
267
+ if (!val.IsNull() && !val.IsUndefined()) {
268
+ config_.max_row_size = val.As<Napi::Number>().Uint32Value();
269
+ }
265
270
  }
266
271
 
267
272
  if (options.Has("fromLine")) {
@@ -6,7 +6,6 @@
6
6
  #include <errno.h>
7
7
  #include <time.h>
8
8
  #include <stdbool.h>
9
- #include <ctype.h>
10
9
  // NOTE: not dealing with windows for now, too much issues
11
10
  #include <sys/mman.h>
12
11
  #include <fcntl.h>
@@ -14,7 +13,6 @@
14
13
  #include <getopt.h>
15
14
  #include <sys/time.h>
16
15
  #include "cisv_parser.h"
17
- #include "cisv_simd.h"
18
16
 
19
17
  #ifdef __AVX512F__
20
18
  #include <immintrin.h>
@@ -24,8 +22,8 @@
24
22
  #include <immintrin.h>
25
23
  #endif
26
24
 
27
- #define RINGBUF_SIZE (1 << 20) // 1 MiB (we may adjust according to needs)
28
- // #define RINGBUF_SIZE (1 << 16) // 64kb (for memory safe reasons)
25
+ #define RINGBUF_SIZE (256 * 1024)
26
+ #define DIRECT_PARSE_THRESHOLD (64 * 1024) // Parse directly if chunk > 64KB
29
27
  #define PREFETCH_DISTANCE 256
30
28
 
31
29
  struct cisv_parser {
@@ -494,6 +492,8 @@ static void parse_simd_chunk(cisv_parser *parser, const uint8_t *buffer, size_t
494
492
  // Handle newline
495
493
  if (is_newline) {
496
494
  yield_row(parser);
495
+ parser->current_row_size = 0;
496
+ parser->row_start = special_pos + 1;
497
497
  }
498
498
 
499
499
  // Update state branchlessly
@@ -786,6 +786,7 @@ static void parse_simd_chunk(cisv_parser *parser, const uint8_t *buffer, size_t
786
786
  if (action & ACT_ROW) {
787
787
  yield_row(parser);
788
788
  parser->current_row_size = 0;
789
+ parser->row_start = cur + 1;
789
790
  }
790
791
 
791
792
  cur += 1 - ((action & ACT_REPROCESS) >> 2);
@@ -808,6 +809,7 @@ static int parse_memory(cisv_parser *parser, const uint8_t *buffer, size_t len)
808
809
  // Yield final row if there's content
809
810
  if (parser->field_start > parser->row_start || !parser->skip_empty_lines) {
810
811
  yield_row(parser);
812
+ parser->current_row_size = 0;
811
813
  }
812
814
  }
813
815
  return 0;
@@ -1025,24 +1027,36 @@ int cisv_parser_parse_file(cisv_parser *parser, const char *path) {
1025
1027
  }
1026
1028
 
1027
1029
  int cisv_parser_write(cisv_parser *parser, const uint8_t *chunk, size_t len) {
1028
- if (!parser || !chunk || len >= RINGBUF_SIZE) return -EINVAL;
1030
+ if (!parser || !chunk) return -EINVAL;
1029
1031
 
1030
- // Branchless overflow handling
1031
- size_t overflow = (parser->head + len > RINGBUF_SIZE);
1032
- if (overflow) {
1033
- parse_memory(parser, parser->ring, parser->head);
1034
- parser->head = 0;
1032
+ // For large chunks, bypass ring buffer entirely
1033
+ if (len > DIRECT_PARSE_THRESHOLD) {
1034
+ // Parse directly - this is actually FASTER for large data
1035
+ return parse_memory(parser, chunk, len);
1036
+ }
1037
+
1038
+ // Small chunks use ring buffer for efficiency
1039
+ if (parser->head + len > RINGBUF_SIZE) {
1040
+ // Flush current buffer
1041
+ if (parser->head > 0) {
1042
+ parse_memory(parser, parser->ring, parser->head);
1043
+ parser->head = 0;
1044
+ }
1045
+
1046
+ // If still too large, parse directly
1047
+ if (len > RINGBUF_SIZE) {
1048
+ return parse_memory(parser, chunk, len);
1049
+ }
1035
1050
  }
1036
1051
 
1037
1052
  memcpy(parser->ring + parser->head, chunk, len);
1038
1053
  parser->head += len;
1039
1054
 
1040
- // Check for newline or buffer threshold
1041
- uint8_t has_newline = (memchr(chunk, '\n', len) != NULL);
1042
- uint8_t threshold = (parser->head > (RINGBUF_SIZE / 2));
1043
- if (has_newline | threshold) {
1044
- parse_memory(parser, parser->ring, parser->head);
1055
+ // Process on newline or when buffer is getting full
1056
+ if (memchr(chunk, '\n', len) || parser->head > (RINGBUF_SIZE * 3 / 4)) {
1057
+ int result = parse_memory(parser, parser->ring, parser->head);
1045
1058
  parser->head = 0;
1059
+ return result;
1046
1060
  }
1047
1061
  return 0;
1048
1062
  }
@@ -5,12 +5,15 @@
5
5
  #include <ctype.h>
6
6
  #include <stdio.h>
7
7
 
8
+ #ifdef __AVX512F__
9
+ #include <immintrin.h>
10
+ #endif
11
+
8
12
  #ifdef __AVX2__
9
13
  #include <immintrin.h>
10
14
  #endif
11
15
 
12
16
  #define TRANSFORM_POOL_SIZE (1 << 20) // 1MB default pool
13
- // #define TRANSFORM_POOL_SIZE (1 << 16) // 64kb (for memory safe reasons)
14
17
  #define SIMD_ALIGNMENT 64
15
18
 
16
19
  // Create transform pipeline
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "cisv",
3
- "version": "0.0.40",
3
+ "version": "0.0.41",
4
4
  "description": "The fastest csv parser of the multiverse",
5
5
  "author": "sanix<s4nixd@gmail.com>",
6
6
  "main": "./build/Release/cisv.node",
@@ -9,7 +9,7 @@
9
9
  "install": "node-gyp rebuild",
10
10
  "build": "node-gyp rebuild",
11
11
  "test": "mocha ./tests/*.test.js && bash ./test_transform.sh",
12
- "test:build": "npm run test",
12
+ "test:build": "npm run build && npm run test",
13
13
  "benchmark": "node benchmark/benchmark.js",
14
14
  "lint": "clang-format -i cisv/*.{cc,h}",
15
15
  "prepublishOnly": "npm run benchmark",