cisv 0.0.33 → 0.0.40
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +19 -0
- package/cisv/cisv_parser.c +252 -92
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -19,6 +19,25 @@ I wrote about basics in a blog post, you can read here :https://sanixdk.xyz/blog
|
|
|
19
19
|
- SIMD accelerated with AVX-512/AVX2 auto-detection
|
|
20
20
|
- Dynamic lookup tables for configurable parsing
|
|
21
21
|
|
|
22
|
+
## CLI BENCHMARKS WITH DOCKER
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
$ docker build -t cisv-benchmark .
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
To run them... choosing some specs for the container to size resources, you can :
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
$ docker run --rm \
|
|
32
|
+
--cpus="2.0" \
|
|
33
|
+
--memory="4g" \
|
|
34
|
+
--memory-swap="4g" \
|
|
35
|
+
--cpu-shares=1024 \
|
|
36
|
+
--security-opt \
|
|
37
|
+
seccomp=unconfined \
|
|
38
|
+
cisv-benchmark
|
|
39
|
+
```
|
|
40
|
+
|
|
22
41
|
## BENCHMARKS
|
|
23
42
|
|
|
24
43
|
Benchmarks comparison with existing popular tools,
|
package/cisv/cisv_parser.c
CHANGED
|
@@ -16,6 +16,14 @@
|
|
|
16
16
|
#include "cisv_parser.h"
|
|
17
17
|
#include "cisv_simd.h"
|
|
18
18
|
|
|
19
|
+
#ifdef __AVX512F__
|
|
20
|
+
#include <immintrin.h>
|
|
21
|
+
#endif
|
|
22
|
+
|
|
23
|
+
#ifdef __AVX2__
|
|
24
|
+
#include <immintrin.h>
|
|
25
|
+
#endif
|
|
26
|
+
|
|
19
27
|
#define RINGBUF_SIZE (1 << 20) // 1 MiB (we may adjust according to needs)
|
|
20
28
|
// #define RINGBUF_SIZE (1 << 16) // 64kb (for memory safe reasons)
|
|
21
29
|
#define PREFETCH_DISTANCE 256
|
|
@@ -95,133 +103,285 @@ void cisv_config_init(cisv_config *config) {
|
|
|
95
103
|
static void init_tables(cisv_parser *parser) {
|
|
96
104
|
if (parser->tables_initialized) return;
|
|
97
105
|
|
|
98
|
-
// Allocate tables
|
|
106
|
+
// Allocate both tables in one allocation for better cache locality
|
|
99
107
|
if (!parser->state_table) {
|
|
100
|
-
parser->state_table =
|
|
101
|
-
parser->
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
}
|
|
108
|
+
parser->state_table = aligned_alloc(64, 8 * 256); // Align to cache line
|
|
109
|
+
if (!parser->state_table) return;
|
|
110
|
+
parser->action_table = parser->state_table + (4 * 256);
|
|
111
|
+
memset(parser->state_table, 0, 8 * 256);
|
|
105
112
|
}
|
|
106
113
|
|
|
107
|
-
|
|
108
|
-
uint8_t (*
|
|
109
|
-
|
|
114
|
+
uint8_t (*st)[256] = (uint8_t (*)[256])parser->state_table;
|
|
115
|
+
uint8_t (*at)[256] = (uint8_t (*)[256])parser->action_table;
|
|
116
|
+
|
|
117
|
+
// Unroll initialization loops for better performance
|
|
118
|
+
// Pre-calculate commonly used values
|
|
119
|
+
const uint8_t q = parser->quote;
|
|
120
|
+
const uint8_t d = parser->delimiter;
|
|
121
|
+
const uint8_t e = parser->escape;
|
|
122
|
+
const uint8_t c = parser->comment;
|
|
123
|
+
|
|
124
|
+
// Initialize with SIMD where possible
|
|
125
|
+
#ifdef __AVX2__
|
|
126
|
+
__m256i unquoted_state = _mm256_set1_epi8(S_UNQUOTED);
|
|
127
|
+
__m256i quoted_state = _mm256_set1_epi8(S_QUOTED);
|
|
128
|
+
__m256i comment_state = _mm256_set1_epi8(S_COMMENT);
|
|
129
|
+
|
|
130
|
+
for (int i = 0; i < 256; i += 32) {
|
|
131
|
+
_mm256_store_si256((__m256i*)&st[S_UNQUOTED][i], unquoted_state);
|
|
132
|
+
_mm256_store_si256((__m256i*)&st[S_QUOTED][i], quoted_state);
|
|
133
|
+
_mm256_store_si256((__m256i*)&st[S_COMMENT][i], comment_state);
|
|
134
|
+
}
|
|
135
|
+
#else
|
|
136
|
+
memset(st[S_UNQUOTED], S_UNQUOTED, 256);
|
|
137
|
+
memset(st[S_QUOTED], S_QUOTED, 256);
|
|
138
|
+
memset(st[S_COMMENT], S_COMMENT, 256);
|
|
139
|
+
#endif
|
|
110
140
|
|
|
111
|
-
//
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
state_table[S_UNQUOTED][c] = S_UNQUOTED;
|
|
115
|
-
if (c == parser->quote) {
|
|
116
|
-
state_table[S_UNQUOTED][c] = S_QUOTED;
|
|
117
|
-
} else if (parser->comment && c == parser->comment) {
|
|
118
|
-
state_table[S_UNQUOTED][c] = S_COMMENT;
|
|
119
|
-
}
|
|
141
|
+
// Set special transitions
|
|
142
|
+
st[S_UNQUOTED][q] = S_QUOTED;
|
|
143
|
+
if (c) st[S_UNQUOTED][c] = S_COMMENT;
|
|
120
144
|
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
145
|
+
if (e) {
|
|
146
|
+
st[S_QUOTED][e] = S_QUOTE_ESC;
|
|
147
|
+
memset(st[S_QUOTE_ESC], S_QUOTED, 256);
|
|
148
|
+
} else {
|
|
149
|
+
st[S_QUOTED][q] = S_QUOTE_ESC;
|
|
150
|
+
memset(st[S_QUOTE_ESC], S_UNQUOTED, 256);
|
|
151
|
+
st[S_QUOTE_ESC][q] = S_QUOTED;
|
|
152
|
+
}
|
|
128
153
|
|
|
129
|
-
|
|
130
|
-
if (parser->escape) {
|
|
131
|
-
// With explicit escape character, always return to quoted state
|
|
132
|
-
state_table[S_QUOTE_ESC][c] = S_QUOTED;
|
|
133
|
-
} else {
|
|
134
|
-
// RFC4180-style: "" becomes a literal quote
|
|
135
|
-
if (c == parser->quote) {
|
|
136
|
-
state_table[S_QUOTE_ESC][c] = S_QUOTED;
|
|
137
|
-
} else {
|
|
138
|
-
state_table[S_QUOTE_ESC][c] = S_UNQUOTED;
|
|
139
|
-
}
|
|
140
|
-
}
|
|
154
|
+
st[S_COMMENT]['\n'] = S_UNQUOTED;
|
|
141
155
|
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
156
|
+
// Initialize actions with minimal branches
|
|
157
|
+
memset(at, ACT_NONE, 4 * 256);
|
|
158
|
+
at[S_UNQUOTED][d] = ACT_FIELD;
|
|
159
|
+
at[S_UNQUOTED]['\n'] = ACT_FIELD | ACT_ROW;
|
|
160
|
+
at[S_UNQUOTED]['\r'] = ACT_FIELD;
|
|
161
|
+
|
|
162
|
+
if (!e) {
|
|
163
|
+
// Vectorize the action table initialization
|
|
164
|
+
for (int i = 0; i < 256; i++) {
|
|
165
|
+
at[S_QUOTE_ESC][i] = (i != q) ? ACT_REPROCESS : ACT_NONE;
|
|
146
166
|
}
|
|
147
167
|
}
|
|
148
168
|
|
|
149
|
-
//
|
|
150
|
-
|
|
169
|
+
// Use SIMD for comment actions
|
|
170
|
+
#ifdef __AVX2__
|
|
171
|
+
__m256i skip_act = _mm256_set1_epi8(ACT_SKIP);
|
|
172
|
+
for (int i = 0; i < 256; i += 32) {
|
|
173
|
+
_mm256_store_si256((__m256i*)&at[S_COMMENT][i], skip_act);
|
|
174
|
+
}
|
|
175
|
+
#else
|
|
176
|
+
memset(at[S_COMMENT], ACT_SKIP, 256);
|
|
177
|
+
#endif
|
|
178
|
+
at[S_COMMENT]['\n'] = ACT_ROW;
|
|
179
|
+
|
|
180
|
+
parser->tables_initialized = 1;
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
// SIMD-optimized whitespace detection lookup table
|
|
184
|
+
// Ultra-fast trimming with AVX512/AVX2
|
|
185
|
+
static inline const uint8_t* trim_start(const uint8_t *start, const uint8_t *end) {
|
|
186
|
+
size_t len = end - start;
|
|
187
|
+
|
|
188
|
+
#ifdef __AVX512F__
|
|
189
|
+
if (len >= 64) {
|
|
190
|
+
const __m512i max_ws = _mm512_set1_epi8(32);
|
|
151
191
|
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
action_table[S_UNQUOTED]['\r'] = ACT_FIELD; // Handle CRLF
|
|
192
|
+
while (len >= 64) {
|
|
193
|
+
__m512i chunk = _mm512_loadu_si512(start);
|
|
194
|
+
__mmask64 is_ws = _mm512_cmple_epu8_mask(chunk, max_ws);
|
|
156
195
|
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
// RFC4180-style: reprocess non-quote characters
|
|
160
|
-
for (int c = 0; c < 256; c++) {
|
|
161
|
-
if (c != parser->quote) {
|
|
162
|
-
action_table[S_QUOTE_ESC][c] = ACT_REPROCESS;
|
|
196
|
+
if (is_ws != 0xFFFFFFFFFFFFFFFFULL) {
|
|
197
|
+
return start + __builtin_ctzll(~is_ws);
|
|
163
198
|
}
|
|
199
|
+
start += 64;
|
|
200
|
+
len -= 64;
|
|
164
201
|
}
|
|
165
202
|
}
|
|
203
|
+
#elif defined(__AVX2__)
|
|
204
|
+
if (len >= 32) {
|
|
205
|
+
const __m256i max_ws = _mm256_set1_epi8(32);
|
|
206
|
+
|
|
207
|
+
while (len >= 32) {
|
|
208
|
+
__m256i chunk = _mm256_loadu_si256((__m256i*)start);
|
|
209
|
+
__m256i cmp = _mm256_cmpgt_epi8(chunk, max_ws);
|
|
210
|
+
uint32_t mask = _mm256_movemask_epi8(cmp);
|
|
166
211
|
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
212
|
+
if (mask) {
|
|
213
|
+
return start + __builtin_ctz(mask);
|
|
214
|
+
}
|
|
215
|
+
start += 32;
|
|
216
|
+
len -= 32;
|
|
217
|
+
}
|
|
170
218
|
}
|
|
171
|
-
|
|
219
|
+
#endif
|
|
172
220
|
|
|
173
|
-
|
|
174
|
-
|
|
221
|
+
// Unrolled 8-byte processing
|
|
222
|
+
while (len >= 8) {
|
|
223
|
+
uint64_t v = *(uint64_t*)start;
|
|
224
|
+
uint64_t has_non_ws = ((v & 0xE0E0E0E0E0E0E0E0ULL) != 0) |
|
|
225
|
+
((v & 0x1F1F1F1F1F1F1F1FULL) > 0x0D0D0D0D0D0D0D0DULL);
|
|
226
|
+
if (has_non_ws) {
|
|
227
|
+
for (int i = 0; i < 8; i++) {
|
|
228
|
+
if ((uint8_t)(v >> (i*8)) > 32) return start + i;
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
start += 8;
|
|
232
|
+
len -= 8;
|
|
233
|
+
}
|
|
175
234
|
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
235
|
+
// 4-byte processing
|
|
236
|
+
if (len >= 4) {
|
|
237
|
+
uint32_t v = *(uint32_t*)start;
|
|
238
|
+
for (int i = 0; i < 4; i++) {
|
|
239
|
+
uint8_t c = (v >> (i*8)) & 0xFF;
|
|
240
|
+
if (c > 32) return start + i;
|
|
241
|
+
}
|
|
242
|
+
start += 4;
|
|
243
|
+
len -= 4;
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
// Remainder
|
|
247
|
+
switch(len) {
|
|
248
|
+
case 3: if (*start > 32) return start; start++;
|
|
249
|
+
/* fallthrough */
|
|
250
|
+
case 2: if (*start > 32) return start; start++;
|
|
251
|
+
/* fallthrough */
|
|
252
|
+
case 1: if (*start > 32) return start; start++;
|
|
253
|
+
}
|
|
180
254
|
|
|
181
|
-
static inline const uint8_t* trim_end(const uint8_t *start, const uint8_t *end) {
|
|
182
|
-
while (end > start && isspace(*(end - 1))) end--;
|
|
183
255
|
return end;
|
|
184
256
|
}
|
|
185
257
|
|
|
186
|
-
static inline
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
258
|
+
static inline const uint8_t* trim_end(const uint8_t *start, const uint8_t *end) {
|
|
259
|
+
size_t len = end - start;
|
|
260
|
+
|
|
261
|
+
#ifdef __AVX512F__
|
|
262
|
+
while (len >= 64) {
|
|
263
|
+
const uint8_t *check = end - 64;
|
|
264
|
+
__m512i chunk = _mm512_loadu_si512(check);
|
|
265
|
+
const __m512i max_ws = _mm512_set1_epi8(32);
|
|
266
|
+
__mmask64 is_non_ws = _mm512_cmpgt_epu8_mask(chunk, max_ws);
|
|
267
|
+
|
|
268
|
+
if (is_non_ws) {
|
|
269
|
+
int last_non_ws = 63 - __builtin_clzll(is_non_ws);
|
|
270
|
+
return check + last_non_ws + 1;
|
|
271
|
+
}
|
|
272
|
+
end -= 64;
|
|
273
|
+
len -= 64;
|
|
274
|
+
}
|
|
275
|
+
#elif defined(__AVX2__)
|
|
276
|
+
while (len >= 32) {
|
|
277
|
+
const uint8_t *check = end - 32;
|
|
278
|
+
__m256i chunk = _mm256_loadu_si256((__m256i*)check);
|
|
279
|
+
const __m256i max_ws = _mm256_set1_epi8(32);
|
|
280
|
+
__m256i cmp = _mm256_cmpgt_epi8(chunk, max_ws);
|
|
281
|
+
uint32_t mask = _mm256_movemask_epi8(cmp);
|
|
282
|
+
|
|
283
|
+
if (mask) {
|
|
284
|
+
int last_non_ws = 31 - __builtin_clz(mask);
|
|
285
|
+
return check + last_non_ws + 1;
|
|
286
|
+
}
|
|
287
|
+
end -= 32;
|
|
288
|
+
len -= 32;
|
|
191
289
|
}
|
|
290
|
+
#endif
|
|
192
291
|
|
|
193
|
-
//
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
292
|
+
// Unrolled 8-byte processing
|
|
293
|
+
while (len >= 8) {
|
|
294
|
+
const uint8_t *check = end - 8;
|
|
295
|
+
uint64_t v = *(uint64_t*)check;
|
|
296
|
+
|
|
297
|
+
for (int i = 7; i >= 0; i--) {
|
|
298
|
+
if ((uint8_t)(v >> (i*8)) > 32) return check + i + 1;
|
|
299
|
+
}
|
|
300
|
+
end -= 8;
|
|
301
|
+
len -= 8;
|
|
197
302
|
}
|
|
198
|
-
}
|
|
199
303
|
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
304
|
+
// 4-byte processing
|
|
305
|
+
if (len >= 4) {
|
|
306
|
+
const uint8_t *check = end - 4;
|
|
307
|
+
uint32_t v = *(uint32_t*)check;
|
|
308
|
+
for (int i = 3; i >= 0; i--) {
|
|
309
|
+
if ((uint8_t)(v >> (i*8)) > 32) return check + i + 1;
|
|
310
|
+
}
|
|
311
|
+
end -= 4;
|
|
312
|
+
len -= 4;
|
|
205
313
|
}
|
|
206
314
|
|
|
207
|
-
//
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
parser->row_start = parser->field_start;
|
|
211
|
-
return;
|
|
315
|
+
// Remainder
|
|
316
|
+
while (len-- > 0) {
|
|
317
|
+
if (*(--end) > 32) return end + 1;
|
|
212
318
|
}
|
|
213
319
|
|
|
214
|
-
|
|
215
|
-
|
|
320
|
+
return start;
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
// yield_field with prefetching and branchless code
|
|
324
|
+
static inline void yield_field(cisv_parser *parser, const uint8_t *start, const uint8_t *end) {
|
|
325
|
+
// Prefetch parser structure for next access
|
|
326
|
+
__builtin_prefetch(parser, 0, 3);
|
|
327
|
+
|
|
328
|
+
// Branchless trimming using conditional move
|
|
329
|
+
const uint8_t *s = start;
|
|
330
|
+
const uint8_t *e = end;
|
|
331
|
+
|
|
332
|
+
// Use conditional assignment instead of branch
|
|
333
|
+
const uint8_t *trimmed_s = trim_start(s, e);
|
|
334
|
+
const uint8_t *trimmed_e = trim_end(trimmed_s, e);
|
|
335
|
+
|
|
336
|
+
// Branchless selection: if trim is 0, use original, if 1, use trimmed
|
|
337
|
+
uintptr_t mask = -(uintptr_t)parser->trim;
|
|
338
|
+
s = (const uint8_t*)(((uintptr_t)trimmed_s & mask) | ((uintptr_t)s & ~mask));
|
|
339
|
+
e = (const uint8_t*)(((uintptr_t)trimmed_e & mask) | ((uintptr_t)e & ~mask));
|
|
340
|
+
|
|
341
|
+
// Combine all conditions into single branch
|
|
342
|
+
uintptr_t fcb_addr = (uintptr_t)parser->fcb;
|
|
343
|
+
uintptr_t valid_mask = -(fcb_addr != 0);
|
|
344
|
+
valid_mask &= -(s != 0);
|
|
345
|
+
valid_mask &= -(e != 0);
|
|
346
|
+
valid_mask &= -(e >= s);
|
|
347
|
+
|
|
348
|
+
// Single branch for callback execution
|
|
349
|
+
if (valid_mask) {
|
|
350
|
+
// Prefetch user data for callback
|
|
351
|
+
__builtin_prefetch(parser->user, 0, 1);
|
|
352
|
+
parser->fcb(parser->user, (const char *)s, (size_t)(e - s));
|
|
216
353
|
}
|
|
354
|
+
}
|
|
217
355
|
|
|
218
|
-
|
|
356
|
+
// yield_row with reduced branches
|
|
357
|
+
static inline void yield_row(cisv_parser *parser) {
|
|
358
|
+
// Prefetch frequently accessed memory
|
|
359
|
+
__builtin_prefetch(&parser->current_line, 1, 3);
|
|
360
|
+
__builtin_prefetch(&parser->row_start, 1, 3);
|
|
361
|
+
|
|
362
|
+
// Compute all conditions upfront
|
|
363
|
+
int is_empty_line = (parser->field_start == parser->row_start);
|
|
364
|
+
int skip_empty = parser->skip_empty_lines & is_empty_line;
|
|
365
|
+
int before_range = (parser->current_line < parser->from_line);
|
|
366
|
+
int after_range = (parser->to_line > 0) & (parser->current_line > parser->to_line);
|
|
367
|
+
int in_range = !before_range & !after_range;
|
|
368
|
+
|
|
369
|
+
// Branchless increment of current_line (always happens except when after range)
|
|
370
|
+
parser->current_line += !after_range;
|
|
371
|
+
|
|
372
|
+
// Branchless update of row_start (happens except when after range)
|
|
373
|
+
uintptr_t new_row_start = (uintptr_t)parser->field_start;
|
|
374
|
+
uintptr_t old_row_start = (uintptr_t)parser->row_start;
|
|
375
|
+
parser->row_start = (uint8_t*)((old_row_start & -after_range) | (new_row_start & ~(-after_range)));
|
|
376
|
+
|
|
377
|
+
// Branchless reset of row_size
|
|
378
|
+
parser->current_row_size &= after_range;
|
|
379
|
+
|
|
380
|
+
// Single branch for callback (most common case last for better prediction)
|
|
381
|
+
if ((!skip_empty) & in_range & (parser->rcb != NULL)) {
|
|
382
|
+
__builtin_prefetch(parser->user, 0, 1);
|
|
219
383
|
parser->rcb(parser->user);
|
|
220
384
|
}
|
|
221
|
-
|
|
222
|
-
parser->current_line++;
|
|
223
|
-
parser->row_start = parser->field_start;
|
|
224
|
-
parser->current_row_size = 0;
|
|
225
385
|
}
|
|
226
386
|
|
|
227
387
|
static inline void handle_error(cisv_parser *parser, const char *msg) {
|