cisv 0.0.33 → 0.0.40

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -19,6 +19,25 @@ I wrote about basics in a blog post, you can read here :https://sanixdk.xyz/blog
19
19
  - SIMD accelerated with AVX-512/AVX2 auto-detection
20
20
  - Dynamic lookup tables for configurable parsing
21
21
 
22
+ ## CLI BENCHMARKS WITH DOCKER
23
+
24
+ ```bash
25
+ $ docker build -t cisv-benchmark .
26
+ ```
27
+
28
+ To run them... choosing some specs for the container to size resources, you can :
29
+
30
+ ```bash
31
+ $ docker run --rm \
32
+ --cpus="2.0" \
33
+ --memory="4g" \
34
+ --memory-swap="4g" \
35
+ --cpu-shares=1024 \
36
+ --security-opt \
37
+ seccomp=unconfined \
38
+ cisv-benchmark
39
+ ```
40
+
22
41
  ## BENCHMARKS
23
42
 
24
43
  Benchmarks comparison with existing popular tools,
@@ -16,6 +16,14 @@
16
16
  #include "cisv_parser.h"
17
17
  #include "cisv_simd.h"
18
18
 
19
+ #ifdef __AVX512F__
20
+ #include <immintrin.h>
21
+ #endif
22
+
23
+ #ifdef __AVX2__
24
+ #include <immintrin.h>
25
+ #endif
26
+
19
27
  #define RINGBUF_SIZE (1 << 20) // 1 MiB (we may adjust according to needs)
20
28
  // #define RINGBUF_SIZE (1 << 16) // 64kb (for memory safe reasons)
21
29
  #define PREFETCH_DISTANCE 256
@@ -95,133 +103,285 @@ void cisv_config_init(cisv_config *config) {
95
103
  static void init_tables(cisv_parser *parser) {
96
104
  if (parser->tables_initialized) return;
97
105
 
98
- // Allocate tables if not already allocated
106
+ // Allocate both tables in one allocation for better cache locality
99
107
  if (!parser->state_table) {
100
- parser->state_table = calloc(4 * 256, sizeof(uint8_t));
101
- parser->action_table = calloc(4 * 256, sizeof(uint8_t));
102
- if (!parser->state_table || !parser->action_table) {
103
- return; // Handle allocation failure gracefully
104
- }
108
+ parser->state_table = aligned_alloc(64, 8 * 256); // Align to cache line
109
+ if (!parser->state_table) return;
110
+ parser->action_table = parser->state_table + (4 * 256);
111
+ memset(parser->state_table, 0, 8 * 256);
105
112
  }
106
113
 
107
- // Get table pointers for easier access
108
- uint8_t (*state_table)[256] = (uint8_t (*)[256])parser->state_table;
109
- uint8_t (*action_table)[256] = (uint8_t (*)[256])parser->action_table;
114
+ uint8_t (*st)[256] = (uint8_t (*)[256])parser->state_table;
115
+ uint8_t (*at)[256] = (uint8_t (*)[256])parser->action_table;
116
+
117
+ // Unroll initialization loops for better performance
118
+ // Pre-calculate commonly used values
119
+ const uint8_t q = parser->quote;
120
+ const uint8_t d = parser->delimiter;
121
+ const uint8_t e = parser->escape;
122
+ const uint8_t c = parser->comment;
123
+
124
+ // Initialize with SIMD where possible
125
+ #ifdef __AVX2__
126
+ __m256i unquoted_state = _mm256_set1_epi8(S_UNQUOTED);
127
+ __m256i quoted_state = _mm256_set1_epi8(S_QUOTED);
128
+ __m256i comment_state = _mm256_set1_epi8(S_COMMENT);
129
+
130
+ for (int i = 0; i < 256; i += 32) {
131
+ _mm256_store_si256((__m256i*)&st[S_UNQUOTED][i], unquoted_state);
132
+ _mm256_store_si256((__m256i*)&st[S_QUOTED][i], quoted_state);
133
+ _mm256_store_si256((__m256i*)&st[S_COMMENT][i], comment_state);
134
+ }
135
+ #else
136
+ memset(st[S_UNQUOTED], S_UNQUOTED, 256);
137
+ memset(st[S_QUOTED], S_QUOTED, 256);
138
+ memset(st[S_COMMENT], S_COMMENT, 256);
139
+ #endif
110
140
 
111
- // Initialize state transitions
112
- for (int c = 0; c < 256; c++) {
113
- // S_UNQUOTED transitions
114
- state_table[S_UNQUOTED][c] = S_UNQUOTED;
115
- if (c == parser->quote) {
116
- state_table[S_UNQUOTED][c] = S_QUOTED;
117
- } else if (parser->comment && c == parser->comment) {
118
- state_table[S_UNQUOTED][c] = S_COMMENT;
119
- }
141
+ // Set special transitions
142
+ st[S_UNQUOTED][q] = S_QUOTED;
143
+ if (c) st[S_UNQUOTED][c] = S_COMMENT;
120
144
 
121
- // S_QUOTED transitions
122
- state_table[S_QUOTED][c] = S_QUOTED;
123
- if (parser->escape && c == parser->escape) {
124
- state_table[S_QUOTED][c] = S_QUOTE_ESC;
125
- } else if (c == parser->quote) {
126
- state_table[S_QUOTED][c] = S_QUOTE_ESC;
127
- }
145
+ if (e) {
146
+ st[S_QUOTED][e] = S_QUOTE_ESC;
147
+ memset(st[S_QUOTE_ESC], S_QUOTED, 256);
148
+ } else {
149
+ st[S_QUOTED][q] = S_QUOTE_ESC;
150
+ memset(st[S_QUOTE_ESC], S_UNQUOTED, 256);
151
+ st[S_QUOTE_ESC][q] = S_QUOTED;
152
+ }
128
153
 
129
- // S_QUOTE_ESC transitions
130
- if (parser->escape) {
131
- // With explicit escape character, always return to quoted state
132
- state_table[S_QUOTE_ESC][c] = S_QUOTED;
133
- } else {
134
- // RFC4180-style: "" becomes a literal quote
135
- if (c == parser->quote) {
136
- state_table[S_QUOTE_ESC][c] = S_QUOTED;
137
- } else {
138
- state_table[S_QUOTE_ESC][c] = S_UNQUOTED;
139
- }
140
- }
154
+ st[S_COMMENT]['\n'] = S_UNQUOTED;
141
155
 
142
- // S_COMMENT transitions - stay in comment until newline
143
- state_table[S_COMMENT][c] = S_COMMENT;
144
- if (c == '\n') {
145
- state_table[S_COMMENT][c] = S_UNQUOTED;
156
+ // Initialize actions with minimal branches
157
+ memset(at, ACT_NONE, 4 * 256);
158
+ at[S_UNQUOTED][d] = ACT_FIELD;
159
+ at[S_UNQUOTED]['\n'] = ACT_FIELD | ACT_ROW;
160
+ at[S_UNQUOTED]['\r'] = ACT_FIELD;
161
+
162
+ if (!e) {
163
+ // Vectorize the action table initialization
164
+ for (int i = 0; i < 256; i++) {
165
+ at[S_QUOTE_ESC][i] = (i != q) ? ACT_REPROCESS : ACT_NONE;
146
166
  }
147
167
  }
148
168
 
149
- // Initialize action table
150
- memset(action_table, ACT_NONE, 4 * 256);
169
+ // Use SIMD for comment actions
170
+ #ifdef __AVX2__
171
+ __m256i skip_act = _mm256_set1_epi8(ACT_SKIP);
172
+ for (int i = 0; i < 256; i += 32) {
173
+ _mm256_store_si256((__m256i*)&at[S_COMMENT][i], skip_act);
174
+ }
175
+ #else
176
+ memset(at[S_COMMENT], ACT_SKIP, 256);
177
+ #endif
178
+ at[S_COMMENT]['\n'] = ACT_ROW;
179
+
180
+ parser->tables_initialized = 1;
181
+ }
182
+
183
+ // SIMD-optimized whitespace detection lookup table
184
+ // Ultra-fast trimming with AVX512/AVX2
185
+ static inline const uint8_t* trim_start(const uint8_t *start, const uint8_t *end) {
186
+ size_t len = end - start;
187
+
188
+ #ifdef __AVX512F__
189
+ if (len >= 64) {
190
+ const __m512i max_ws = _mm512_set1_epi8(32);
151
191
 
152
- // S_UNQUOTED actions
153
- action_table[S_UNQUOTED][(uint8_t)parser->delimiter] = ACT_FIELD;
154
- action_table[S_UNQUOTED]['\n'] = ACT_FIELD | ACT_ROW;
155
- action_table[S_UNQUOTED]['\r'] = ACT_FIELD; // Handle CRLF
192
+ while (len >= 64) {
193
+ __m512i chunk = _mm512_loadu_si512(start);
194
+ __mmask64 is_ws = _mm512_cmple_epu8_mask(chunk, max_ws);
156
195
 
157
- // S_QUOTE_ESC actions
158
- if (!parser->escape) {
159
- // RFC4180-style: reprocess non-quote characters
160
- for (int c = 0; c < 256; c++) {
161
- if (c != parser->quote) {
162
- action_table[S_QUOTE_ESC][c] = ACT_REPROCESS;
196
+ if (is_ws != 0xFFFFFFFFFFFFFFFFULL) {
197
+ return start + __builtin_ctzll(~is_ws);
163
198
  }
199
+ start += 64;
200
+ len -= 64;
164
201
  }
165
202
  }
203
+ #elif defined(__AVX2__)
204
+ if (len >= 32) {
205
+ const __m256i max_ws = _mm256_set1_epi8(32);
206
+
207
+ while (len >= 32) {
208
+ __m256i chunk = _mm256_loadu_si256((__m256i*)start);
209
+ __m256i cmp = _mm256_cmpgt_epi8(chunk, max_ws);
210
+ uint32_t mask = _mm256_movemask_epi8(cmp);
166
211
 
167
- // S_COMMENT actions - skip everything except newline
168
- for (int c = 0; c < 256; c++) {
169
- action_table[S_COMMENT][c] = ACT_SKIP;
212
+ if (mask) {
213
+ return start + __builtin_ctz(mask);
214
+ }
215
+ start += 32;
216
+ len -= 32;
217
+ }
170
218
  }
171
- action_table[S_COMMENT]['\n'] = ACT_ROW;
219
+ #endif
172
220
 
173
- parser->tables_initialized = 1;
174
- }
221
+ // Unrolled 8-byte processing
222
+ while (len >= 8) {
223
+ uint64_t v = *(uint64_t*)start;
224
+ uint64_t has_non_ws = ((v & 0xE0E0E0E0E0E0E0E0ULL) != 0) |
225
+ ((v & 0x1F1F1F1F1F1F1F1FULL) > 0x0D0D0D0D0D0D0D0DULL);
226
+ if (has_non_ws) {
227
+ for (int i = 0; i < 8; i++) {
228
+ if ((uint8_t)(v >> (i*8)) > 32) return start + i;
229
+ }
230
+ }
231
+ start += 8;
232
+ len -= 8;
233
+ }
175
234
 
176
- static inline const uint8_t* trim_start(const uint8_t *start, const uint8_t *end) {
177
- while (start < end && isspace(*start)) start++;
178
- return start;
179
- }
235
+ // 4-byte processing
236
+ if (len >= 4) {
237
+ uint32_t v = *(uint32_t*)start;
238
+ for (int i = 0; i < 4; i++) {
239
+ uint8_t c = (v >> (i*8)) & 0xFF;
240
+ if (c > 32) return start + i;
241
+ }
242
+ start += 4;
243
+ len -= 4;
244
+ }
245
+
246
+ // Remainder
247
+ switch(len) {
248
+ case 3: if (*start > 32) return start; start++;
249
+ /* fallthrough */
250
+ case 2: if (*start > 32) return start; start++;
251
+ /* fallthrough */
252
+ case 1: if (*start > 32) return start; start++;
253
+ }
180
254
 
181
- static inline const uint8_t* trim_end(const uint8_t *start, const uint8_t *end) {
182
- while (end > start && isspace(*(end - 1))) end--;
183
255
  return end;
184
256
  }
185
257
 
186
- static inline void yield_field(cisv_parser *parser, const uint8_t *start, const uint8_t *end) {
187
- // Apply trimming if configured
188
- if (parser->trim) {
189
- start = trim_start(start, end);
190
- end = trim_end(start, end);
258
+ static inline const uint8_t* trim_end(const uint8_t *start, const uint8_t *end) {
259
+ size_t len = end - start;
260
+
261
+ #ifdef __AVX512F__
262
+ while (len >= 64) {
263
+ const uint8_t *check = end - 64;
264
+ __m512i chunk = _mm512_loadu_si512(check);
265
+ const __m512i max_ws = _mm512_set1_epi8(32);
266
+ __mmask64 is_non_ws = _mm512_cmpgt_epu8_mask(chunk, max_ws);
267
+
268
+ if (is_non_ws) {
269
+ int last_non_ws = 63 - __builtin_clzll(is_non_ws);
270
+ return check + last_non_ws + 1;
271
+ }
272
+ end -= 64;
273
+ len -= 64;
274
+ }
275
+ #elif defined(__AVX2__)
276
+ while (len >= 32) {
277
+ const uint8_t *check = end - 32;
278
+ __m256i chunk = _mm256_loadu_si256((__m256i*)check);
279
+ const __m256i max_ws = _mm256_set1_epi8(32);
280
+ __m256i cmp = _mm256_cmpgt_epi8(chunk, max_ws);
281
+ uint32_t mask = _mm256_movemask_epi8(cmp);
282
+
283
+ if (mask) {
284
+ int last_non_ws = 31 - __builtin_clz(mask);
285
+ return check + last_non_ws + 1;
286
+ }
287
+ end -= 32;
288
+ len -= 32;
191
289
  }
290
+ #endif
192
291
 
193
- // Branchless check: multiply callback by validity flag
194
- size_t valid = (parser->fcb != NULL) & (start != NULL) & (end != NULL) & (end >= start);
195
- if (valid) {
196
- parser->fcb(parser->user, (const char *)start, (size_t)(end - start));
292
+ // Unrolled 8-byte processing
293
+ while (len >= 8) {
294
+ const uint8_t *check = end - 8;
295
+ uint64_t v = *(uint64_t*)check;
296
+
297
+ for (int i = 7; i >= 0; i--) {
298
+ if ((uint8_t)(v >> (i*8)) > 32) return check + i + 1;
299
+ }
300
+ end -= 8;
301
+ len -= 8;
197
302
  }
198
- }
199
303
 
200
- static inline void yield_row(cisv_parser *parser) {
201
- // Check if we should skip empty lines
202
- if (parser->skip_empty_lines && parser->field_start == parser->row_start) {
203
- parser->row_start = parser->field_start;
204
- return;
304
+ // 4-byte processing
305
+ if (len >= 4) {
306
+ const uint8_t *check = end - 4;
307
+ uint32_t v = *(uint32_t*)check;
308
+ for (int i = 3; i >= 0; i--) {
309
+ if ((uint8_t)(v >> (i*8)) > 32) return check + i + 1;
310
+ }
311
+ end -= 4;
312
+ len -= 4;
205
313
  }
206
314
 
207
- // Check line range
208
- if (parser->current_line < parser->from_line) {
209
- parser->current_line++;
210
- parser->row_start = parser->field_start;
211
- return;
315
+ // Remainder
316
+ while (len-- > 0) {
317
+ if (*(--end) > 32) return end + 1;
212
318
  }
213
319
 
214
- if (parser->to_line > 0 && parser->current_line > parser->to_line) {
215
- return;
320
+ return start;
321
+ }
322
+
323
+ // yield_field with prefetching and branchless code
324
+ static inline void yield_field(cisv_parser *parser, const uint8_t *start, const uint8_t *end) {
325
+ // Prefetch parser structure for next access
326
+ __builtin_prefetch(parser, 0, 3);
327
+
328
+ // Branchless trimming using conditional move
329
+ const uint8_t *s = start;
330
+ const uint8_t *e = end;
331
+
332
+ // Use conditional assignment instead of branch
333
+ const uint8_t *trimmed_s = trim_start(s, e);
334
+ const uint8_t *trimmed_e = trim_end(trimmed_s, e);
335
+
336
+ // Branchless selection: if trim is 0, use original, if 1, use trimmed
337
+ uintptr_t mask = -(uintptr_t)parser->trim;
338
+ s = (const uint8_t*)(((uintptr_t)trimmed_s & mask) | ((uintptr_t)s & ~mask));
339
+ e = (const uint8_t*)(((uintptr_t)trimmed_e & mask) | ((uintptr_t)e & ~mask));
340
+
341
+ // Combine all conditions into single branch
342
+ uintptr_t fcb_addr = (uintptr_t)parser->fcb;
343
+ uintptr_t valid_mask = -(fcb_addr != 0);
344
+ valid_mask &= -(s != 0);
345
+ valid_mask &= -(e != 0);
346
+ valid_mask &= -(e >= s);
347
+
348
+ // Single branch for callback execution
349
+ if (valid_mask) {
350
+ // Prefetch user data for callback
351
+ __builtin_prefetch(parser->user, 0, 1);
352
+ parser->fcb(parser->user, (const char *)s, (size_t)(e - s));
216
353
  }
354
+ }
217
355
 
218
- if (parser->rcb) {
356
+ // yield_row with reduced branches
357
+ static inline void yield_row(cisv_parser *parser) {
358
+ // Prefetch frequently accessed memory
359
+ __builtin_prefetch(&parser->current_line, 1, 3);
360
+ __builtin_prefetch(&parser->row_start, 1, 3);
361
+
362
+ // Compute all conditions upfront
363
+ int is_empty_line = (parser->field_start == parser->row_start);
364
+ int skip_empty = parser->skip_empty_lines & is_empty_line;
365
+ int before_range = (parser->current_line < parser->from_line);
366
+ int after_range = (parser->to_line > 0) & (parser->current_line > parser->to_line);
367
+ int in_range = !before_range & !after_range;
368
+
369
+ // Branchless increment of current_line (always happens except when after range)
370
+ parser->current_line += !after_range;
371
+
372
+ // Branchless update of row_start (happens except when after range)
373
+ uintptr_t new_row_start = (uintptr_t)parser->field_start;
374
+ uintptr_t old_row_start = (uintptr_t)parser->row_start;
375
+ parser->row_start = (uint8_t*)((old_row_start & -after_range) | (new_row_start & ~(-after_range)));
376
+
377
+ // Branchless reset of row_size
378
+ parser->current_row_size &= after_range;
379
+
380
+ // Single branch for callback (most common case last for better prediction)
381
+ if ((!skip_empty) & in_range & (parser->rcb != NULL)) {
382
+ __builtin_prefetch(parser->user, 0, 1);
219
383
  parser->rcb(parser->user);
220
384
  }
221
-
222
- parser->current_line++;
223
- parser->row_start = parser->field_start;
224
- parser->current_row_size = 0;
225
385
  }
226
386
 
227
387
  static inline void handle_error(cisv_parser *parser, const char *msg) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "cisv",
3
- "version": "0.0.33",
3
+ "version": "0.0.40",
4
4
  "description": "The fastest csv parser of the multiverse",
5
5
  "author": "sanix<s4nixd@gmail.com>",
6
6
  "main": "./build/Release/cisv.node",