cisv 0.0.27 → 0.0.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +19 -5
- package/README.md +11 -12
- package/cisv/cisv_addon.cc +185 -0
- package/cisv/cisv_transformer.c +84 -0
- package/cisv/cisv_transformer.h +26 -0
- package/index.d.ts +158 -278
- package/package.json +1 -1
package/LICENSE
CHANGED
|
@@ -1,7 +1,21 @@
|
|
|
1
|
-
|
|
2
|
-
Version 2, 29 Jul 2025
|
|
1
|
+
MIT License
|
|
3
2
|
|
|
4
|
-
|
|
3
|
+
Copyright (c) 2025 - cisv - sanix-darker.
|
|
5
4
|
|
|
6
|
-
|
|
7
|
-
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
CHANGED
|
@@ -5,7 +5,8 @@
|
|
|
5
5
|

|
|
6
6
|

|
|
7
7
|
|
|
8
|
-
|
|
8
|
+
Cisv is a csv parser on steroids... literally.
|
|
9
|
+
It's a high-performance CSV parser/writer leveraging SIMD instructions and zero-copy memory mapping. Available as both a Node.js native addon and standalone CLI tool with extensive configuration options.
|
|
9
10
|
|
|
10
11
|
I wrote about basics in a blog post, you can read here :https://sanixdk.xyz/blogs/how-i-accidentally-created-the-fastest-csv-parser-ever-made.
|
|
11
12
|
|
|
@@ -292,6 +293,14 @@ parser
|
|
|
292
293
|
.transform(5, 'base64_encode') // Column 5 to base64
|
|
293
294
|
.transform(6, 'hash_sha256'); // Column 6 to SHA256
|
|
294
295
|
|
|
296
|
+
// Custom fieldname transform :
|
|
297
|
+
parser
|
|
298
|
+
.transform('name', 'uppercase');
|
|
299
|
+
|
|
300
|
+
// Custom row transform :
|
|
301
|
+
parser
|
|
302
|
+
.transformRow((row, rowObj) => {console.log(row}});
|
|
303
|
+
|
|
295
304
|
// Custom JavaScript transforms
|
|
296
305
|
parser.transform(7, value => new Date(value).toISOString());
|
|
297
306
|
|
|
@@ -455,19 +464,9 @@ cisv -b -d ';' -q "'" --trim european.csv
|
|
|
455
464
|
- Linux/Unix support only (optimized for x86_64 CPU)
|
|
456
465
|
- Windows support planned for future release
|
|
457
466
|
|
|
458
|
-
## CONTRIBUTING
|
|
459
|
-
|
|
460
|
-
Areas of interest:
|
|
461
|
-
- [ ] ARM NEON/SVE optimization improvements (in progress)
|
|
462
|
-
- [ ] Windows native support
|
|
463
|
-
- [ ] Parallel parsing for multi-core systems
|
|
464
|
-
- [ ] Custom memory allocators
|
|
465
|
-
- [ ] Streaming compression support
|
|
466
|
-
- [ ] Additional transform functions
|
|
467
|
-
|
|
468
467
|
## LICENSE
|
|
469
468
|
|
|
470
|
-
|
|
469
|
+
MIT © [sanix-darker](https://github.com/sanix-darker)
|
|
471
470
|
|
|
472
471
|
## ACKNOWLEDGMENTS
|
|
473
472
|
|
package/cisv/cisv_addon.cc
CHANGED
|
@@ -153,6 +153,10 @@ public:
|
|
|
153
153
|
InstanceMethod("destroy", &CisvParser::Destroy),
|
|
154
154
|
InstanceMethod("setConfig", &CisvParser::SetConfig),
|
|
155
155
|
InstanceMethod("getConfig", &CisvParser::GetConfig),
|
|
156
|
+
InstanceMethod("transformByName", &CisvParser::TransformByName),
|
|
157
|
+
InstanceMethod("setHeaderFields", &CisvParser::SetHeaderFields),
|
|
158
|
+
InstanceMethod("removeTransformByName", &CisvParser::RemoveTransformByName),
|
|
159
|
+
|
|
156
160
|
StaticMethod("countRows", &CisvParser::CountRows),
|
|
157
161
|
StaticMethod("countRowsWithConfig", &CisvParser::CountRowsWithConfig)
|
|
158
162
|
});
|
|
@@ -591,6 +595,187 @@ public:
|
|
|
591
595
|
return info.This(); // Return this for chaining
|
|
592
596
|
}
|
|
593
597
|
|
|
598
|
+
Napi::Value TransformByName(const Napi::CallbackInfo &info) {
|
|
599
|
+
Napi::Env env = info.Env();
|
|
600
|
+
|
|
601
|
+
if (is_destroyed_) {
|
|
602
|
+
throw Napi::Error::New(env, "Parser has been destroyed");
|
|
603
|
+
}
|
|
604
|
+
|
|
605
|
+
if (info.Length() < 2) {
|
|
606
|
+
throw Napi::TypeError::New(env, "Expected field name and transform type/function");
|
|
607
|
+
}
|
|
608
|
+
|
|
609
|
+
if (!info[0].IsString()) {
|
|
610
|
+
throw Napi::TypeError::New(env, "Field name must be a string");
|
|
611
|
+
}
|
|
612
|
+
|
|
613
|
+
std::string field_name = info[0].As<Napi::String>();
|
|
614
|
+
|
|
615
|
+
// Ensure pipeline exists (lazy initialization)
|
|
616
|
+
rc_->ensurePipeline();
|
|
617
|
+
|
|
618
|
+
// Store the environment
|
|
619
|
+
rc_->env = env;
|
|
620
|
+
|
|
621
|
+
// Handle string transform types - using the actual C transformer
|
|
622
|
+
if (info[1].IsString()) {
|
|
623
|
+
std::string transform_type = info[1].As<Napi::String>();
|
|
624
|
+
cisv_transform_type_t type;
|
|
625
|
+
|
|
626
|
+
// Map string to C enum
|
|
627
|
+
if (transform_type == "uppercase") {
|
|
628
|
+
type = TRANSFORM_UPPERCASE;
|
|
629
|
+
} else if (transform_type == "lowercase") {
|
|
630
|
+
type = TRANSFORM_LOWERCASE;
|
|
631
|
+
} else if (transform_type == "trim") {
|
|
632
|
+
type = TRANSFORM_TRIM;
|
|
633
|
+
} else if (transform_type == "to_int" || transform_type == "int") {
|
|
634
|
+
type = TRANSFORM_TO_INT;
|
|
635
|
+
} else if (transform_type == "to_float" || transform_type == "float") {
|
|
636
|
+
type = TRANSFORM_TO_FLOAT;
|
|
637
|
+
} else if (transform_type == "hash_sha256" || transform_type == "sha256") {
|
|
638
|
+
type = TRANSFORM_HASH_SHA256;
|
|
639
|
+
} else if (transform_type == "base64_encode" || transform_type == "base64") {
|
|
640
|
+
type = TRANSFORM_BASE64_ENCODE;
|
|
641
|
+
} else {
|
|
642
|
+
throw Napi::Error::New(env, "Unknown transform type: " + transform_type);
|
|
643
|
+
}
|
|
644
|
+
|
|
645
|
+
// Create context if provided
|
|
646
|
+
cisv_transform_context_t* ctx = nullptr;
|
|
647
|
+
if (info.Length() >= 3 && info[2].IsObject()) {
|
|
648
|
+
Napi::Object context_obj = info[2].As<Napi::Object>();
|
|
649
|
+
ctx = (cisv_transform_context_t*)calloc(1, sizeof(cisv_transform_context_t));
|
|
650
|
+
|
|
651
|
+
// Extract context properties if they exist
|
|
652
|
+
if (context_obj.Has("key")) {
|
|
653
|
+
Napi::Value key_val = context_obj.Get("key");
|
|
654
|
+
if (key_val.IsString()) {
|
|
655
|
+
std::string key = key_val.As<Napi::String>();
|
|
656
|
+
ctx->key = strdup(key.c_str());
|
|
657
|
+
ctx->key_len = key.length();
|
|
658
|
+
}
|
|
659
|
+
}
|
|
660
|
+
|
|
661
|
+
if (context_obj.Has("iv")) {
|
|
662
|
+
Napi::Value iv_val = context_obj.Get("iv");
|
|
663
|
+
if (iv_val.IsString()) {
|
|
664
|
+
std::string iv = iv_val.As<Napi::String>();
|
|
665
|
+
ctx->iv = strdup(iv.c_str());
|
|
666
|
+
ctx->iv_len = iv.length();
|
|
667
|
+
}
|
|
668
|
+
}
|
|
669
|
+
}
|
|
670
|
+
|
|
671
|
+
// Add to the C transform pipeline by name
|
|
672
|
+
if (cisv_transform_pipeline_add_by_name(rc_->pipeline, field_name.c_str(), type, ctx) < 0) {
|
|
673
|
+
// Clean up context if adding failed
|
|
674
|
+
if (ctx) {
|
|
675
|
+
if (ctx->key) free((void*)ctx->key);
|
|
676
|
+
if (ctx->iv) free((void*)ctx->iv);
|
|
677
|
+
if (ctx->extra) free(ctx->extra);
|
|
678
|
+
free(ctx);
|
|
679
|
+
}
|
|
680
|
+
throw Napi::Error::New(env, "Failed to add transform for field: " + field_name);
|
|
681
|
+
}
|
|
682
|
+
|
|
683
|
+
} else if (info[1].IsFunction()) {
|
|
684
|
+
// Handle JavaScript function transforms by name
|
|
685
|
+
Napi::Function func = info[1].As<Napi::Function>();
|
|
686
|
+
|
|
687
|
+
// Add to the C transform pipeline by name
|
|
688
|
+
if (cisv_transform_pipeline_add_js_by_name(rc_->pipeline, field_name.c_str(), &func) < 0) {
|
|
689
|
+
throw Napi::Error::New(env, "Failed to add JS transform for field: " + field_name);
|
|
690
|
+
}
|
|
691
|
+
|
|
692
|
+
} else {
|
|
693
|
+
throw Napi::TypeError::New(env, "Transform must be a string type or function");
|
|
694
|
+
}
|
|
695
|
+
|
|
696
|
+
return info.This(); // Return this for chaining
|
|
697
|
+
}
|
|
698
|
+
|
|
699
|
+
void SetHeaderFields(const Napi::CallbackInfo &info) {
|
|
700
|
+
Napi::Env env = info.Env();
|
|
701
|
+
|
|
702
|
+
if (is_destroyed_) {
|
|
703
|
+
throw Napi::Error::New(env, "Parser has been destroyed");
|
|
704
|
+
}
|
|
705
|
+
|
|
706
|
+
if (info.Length() != 1 || !info[0].IsArray()) {
|
|
707
|
+
throw Napi::TypeError::New(env, "Expected array of field names");
|
|
708
|
+
}
|
|
709
|
+
|
|
710
|
+
Napi::Array field_names = info[0].As<Napi::Array>();
|
|
711
|
+
size_t field_count = field_names.Length();
|
|
712
|
+
|
|
713
|
+
// Convert to C array of strings
|
|
714
|
+
const char** c_field_names = (const char**)malloc(field_count * sizeof(char*));
|
|
715
|
+
if (!c_field_names) {
|
|
716
|
+
throw Napi::Error::New(env, "Memory allocation failed");
|
|
717
|
+
}
|
|
718
|
+
|
|
719
|
+
for (size_t i = 0; i < field_count; i++) {
|
|
720
|
+
Napi::Value field_val = field_names[i];
|
|
721
|
+
if (!field_val.IsString()) {
|
|
722
|
+
free(c_field_names);
|
|
723
|
+
throw Napi::TypeError::New(env, "Field names must be strings");
|
|
724
|
+
}
|
|
725
|
+
std::string field_str = field_val.As<Napi::String>();
|
|
726
|
+
c_field_names[i] = strdup(field_str.c_str());
|
|
727
|
+
}
|
|
728
|
+
|
|
729
|
+
// Ensure pipeline exists
|
|
730
|
+
rc_->ensurePipeline();
|
|
731
|
+
|
|
732
|
+
// Set header fields in the pipeline
|
|
733
|
+
if (cisv_transform_pipeline_set_header(rc_->pipeline, c_field_names, field_count) < 0) {
|
|
734
|
+
// Clean up
|
|
735
|
+
for (size_t i = 0; i < field_count; i++) {
|
|
736
|
+
free((void*)c_field_names[i]);
|
|
737
|
+
}
|
|
738
|
+
free(c_field_names);
|
|
739
|
+
throw Napi::Error::New(env, "Failed to set header fields");
|
|
740
|
+
}
|
|
741
|
+
|
|
742
|
+
// Clean up temporary array (the pipeline makes copies)
|
|
743
|
+
for (size_t i = 0; i < field_count; i++) {
|
|
744
|
+
free((void*)c_field_names[i]);
|
|
745
|
+
}
|
|
746
|
+
free(c_field_names);
|
|
747
|
+
}
|
|
748
|
+
|
|
749
|
+
// Add this method to remove transforms by field name
|
|
750
|
+
Napi::Value RemoveTransformByName(const Napi::CallbackInfo &info) {
|
|
751
|
+
Napi::Env env = info.Env();
|
|
752
|
+
|
|
753
|
+
if (is_destroyed_) {
|
|
754
|
+
throw Napi::Error::New(env, "Parser has been destroyed");
|
|
755
|
+
}
|
|
756
|
+
|
|
757
|
+
if (info.Length() != 1 || !info[0].IsString()) {
|
|
758
|
+
throw Napi::TypeError::New(env, "Expected field name");
|
|
759
|
+
}
|
|
760
|
+
|
|
761
|
+
std::string field_name = info[0].As<Napi::String>();
|
|
762
|
+
|
|
763
|
+
// Remove from JavaScript transforms by finding the field index
|
|
764
|
+
if (rc_->pipeline && rc_->pipeline->header_fields) {
|
|
765
|
+
for (size_t i = 0; i < rc_->pipeline->header_count; i++) {
|
|
766
|
+
if (strcmp(rc_->pipeline->header_fields[i], field_name.c_str()) == 0) {
|
|
767
|
+
rc_->js_transforms.erase(i);
|
|
768
|
+
break;
|
|
769
|
+
}
|
|
770
|
+
}
|
|
771
|
+
}
|
|
772
|
+
|
|
773
|
+
// TODO: Implement removal of C transforms by name in cisv_transformer.c
|
|
774
|
+
// For now, this only removes JS transforms
|
|
775
|
+
|
|
776
|
+
return info.This();
|
|
777
|
+
}
|
|
778
|
+
|
|
594
779
|
Napi::Value RemoveTransform(const Napi::CallbackInfo &info) {
|
|
595
780
|
Napi::Env env = info.Env();
|
|
596
781
|
|
package/cisv/cisv_transformer.c
CHANGED
|
@@ -173,6 +173,90 @@ int cisv_transform_pipeline_add_js(
|
|
|
173
173
|
return 0;
|
|
174
174
|
}
|
|
175
175
|
|
|
176
|
+
// Set header fields for name-based transforms
|
|
177
|
+
int cisv_transform_pipeline_set_header(
|
|
178
|
+
cisv_transform_pipeline_t *pipeline,
|
|
179
|
+
const char **field_names,
|
|
180
|
+
size_t field_count
|
|
181
|
+
) {
|
|
182
|
+
if (!pipeline || !field_names || field_count == 0) return -1;
|
|
183
|
+
|
|
184
|
+
// Free existing header fields if any
|
|
185
|
+
if (pipeline->header_fields) {
|
|
186
|
+
for (size_t i = 0; i < pipeline->header_count; i++) {
|
|
187
|
+
free(pipeline->header_fields[i]);
|
|
188
|
+
}
|
|
189
|
+
free(pipeline->header_fields);
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
// Allocate new header fields array
|
|
193
|
+
pipeline->header_fields = malloc(field_count * sizeof(char *));
|
|
194
|
+
if (!pipeline->header_fields) return -1;
|
|
195
|
+
|
|
196
|
+
// Copy each field name
|
|
197
|
+
for (size_t i = 0; i < field_count; i++) {
|
|
198
|
+
pipeline->header_fields[i] = strdup(field_names[i]);
|
|
199
|
+
if (!pipeline->header_fields[i]) {
|
|
200
|
+
// Cleanup on failure
|
|
201
|
+
for (size_t j = 0; j < i; j++) {
|
|
202
|
+
free(pipeline->header_fields[j]);
|
|
203
|
+
}
|
|
204
|
+
free(pipeline->header_fields);
|
|
205
|
+
pipeline->header_fields = NULL;
|
|
206
|
+
return -1;
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
pipeline->header_count = field_count;
|
|
211
|
+
return 0;
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
// Add JavaScript callback transform by field name
|
|
215
|
+
int cisv_transform_pipeline_add_js_by_name(
|
|
216
|
+
cisv_transform_pipeline_t *pipeline,
|
|
217
|
+
const char *field_name,
|
|
218
|
+
void *js_callback
|
|
219
|
+
) {
|
|
220
|
+
if (!pipeline || !field_name || !js_callback || !pipeline->header_fields) return -1;
|
|
221
|
+
|
|
222
|
+
// Find field index by name
|
|
223
|
+
int field_index = -1;
|
|
224
|
+
for (size_t i = 0; i < pipeline->header_count; i++) {
|
|
225
|
+
if (strcmp(pipeline->header_fields[i], field_name) == 0) {
|
|
226
|
+
field_index = (int)i;
|
|
227
|
+
break;
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
if (field_index == -1) return -1; // Field not found
|
|
232
|
+
|
|
233
|
+
return cisv_transform_pipeline_add_js(pipeline, field_index, js_callback);
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
// Add transform by field name
|
|
237
|
+
int cisv_transform_pipeline_add_by_name(
|
|
238
|
+
cisv_transform_pipeline_t *pipeline,
|
|
239
|
+
const char *field_name,
|
|
240
|
+
cisv_transform_type_t type,
|
|
241
|
+
cisv_transform_context_t *ctx
|
|
242
|
+
) {
|
|
243
|
+
if (!pipeline || !field_name || !pipeline->header_fields) return -1;
|
|
244
|
+
|
|
245
|
+
// Find field index by name
|
|
246
|
+
int field_index = -1;
|
|
247
|
+
for (size_t i = 0; i < pipeline->header_count; i++) {
|
|
248
|
+
if (strcmp(pipeline->header_fields[i], field_name) == 0) {
|
|
249
|
+
field_index = (int)i;
|
|
250
|
+
break;
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
if (field_index == -1) return -1; // Field not found
|
|
255
|
+
|
|
256
|
+
return cisv_transform_pipeline_add(pipeline, field_index, type, ctx);
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
|
|
176
260
|
// Apply transforms for a field
|
|
177
261
|
cisv_transform_result_t cisv_transform_apply(
|
|
178
262
|
cisv_transform_pipeline_t *pipeline,
|
package/cisv/cisv_transformer.h
CHANGED
|
@@ -71,6 +71,7 @@ typedef struct {
|
|
|
71
71
|
cisv_transform_fn fn;
|
|
72
72
|
cisv_transform_context_t *ctx;
|
|
73
73
|
int field_index; // -1 for all fields
|
|
74
|
+
const char *field_name; // Field name to match (alternative to index)
|
|
74
75
|
void *js_callback; // For JS callbacks (napi_ref)
|
|
75
76
|
} cisv_transform_t;
|
|
76
77
|
|
|
@@ -87,6 +88,9 @@ typedef struct {
|
|
|
87
88
|
|
|
88
89
|
// SIMD alignment
|
|
89
90
|
size_t alignment;
|
|
91
|
+
// Header field names for mapping
|
|
92
|
+
char **header_fields;
|
|
93
|
+
size_t header_count;
|
|
90
94
|
} cisv_transform_pipeline_t;
|
|
91
95
|
|
|
92
96
|
typedef struct cisv_js_callback {
|
|
@@ -99,6 +103,13 @@ typedef struct cisv_js_callback {
|
|
|
99
103
|
cisv_transform_pipeline_t *cisv_transform_pipeline_create(size_t initial_capacity);
|
|
100
104
|
void cisv_transform_pipeline_destroy(cisv_transform_pipeline_t *pipeline);
|
|
101
105
|
|
|
106
|
+
// Set header fields for name-based transforms
|
|
107
|
+
int cisv_transform_pipeline_set_header(
|
|
108
|
+
cisv_transform_pipeline_t *pipeline,
|
|
109
|
+
const char **field_names,
|
|
110
|
+
size_t field_count
|
|
111
|
+
);
|
|
112
|
+
|
|
102
113
|
// Add transforms to pipeline
|
|
103
114
|
int cisv_transform_pipeline_add(
|
|
104
115
|
cisv_transform_pipeline_t *pipeline,
|
|
@@ -113,6 +124,21 @@ int cisv_transform_pipeline_add_js(
|
|
|
113
124
|
void *js_callback
|
|
114
125
|
);
|
|
115
126
|
|
|
127
|
+
// Add JavaScript callback transform by field name
|
|
128
|
+
int cisv_transform_pipeline_add_js_by_name(
|
|
129
|
+
cisv_transform_pipeline_t *pipeline,
|
|
130
|
+
const char *field_name,
|
|
131
|
+
void *js_callback
|
|
132
|
+
);
|
|
133
|
+
|
|
134
|
+
// Add transform by field name
|
|
135
|
+
int cisv_transform_pipeline_add_by_name(
|
|
136
|
+
cisv_transform_pipeline_t *pipeline,
|
|
137
|
+
const char *field_name,
|
|
138
|
+
cisv_transform_type_t type,
|
|
139
|
+
cisv_transform_context_t *ctx
|
|
140
|
+
);
|
|
141
|
+
|
|
116
142
|
// Apply transforms
|
|
117
143
|
cisv_transform_result_t cisv_transform_apply(
|
|
118
144
|
cisv_transform_pipeline_t *pipeline,
|
package/index.d.ts
CHANGED
|
@@ -1,400 +1,280 @@
|
|
|
1
1
|
declare module 'cisv' {
|
|
2
2
|
/**
|
|
3
|
-
*
|
|
4
|
-
*/
|
|
5
|
-
export enum TransformType {
|
|
6
|
-
UPPERCASE = 'uppercase',
|
|
7
|
-
LOWERCASE = 'lowercase',
|
|
8
|
-
TRIM = 'trim',
|
|
9
|
-
TO_INT = 'to_int',
|
|
10
|
-
TO_FLOAT = 'to_float',
|
|
11
|
-
HASH_SHA256 = 'hash_sha256',
|
|
12
|
-
BASE64_ENCODE = 'base64_encode',
|
|
13
|
-
CUSTOM = 'custom'
|
|
14
|
-
}
|
|
15
|
-
|
|
16
|
-
/**
|
|
17
|
-
* Transform context for advanced transformations
|
|
18
|
-
*/
|
|
19
|
-
export interface TransformContext {
|
|
20
|
-
key?: string | Buffer;
|
|
21
|
-
iv?: string | Buffer;
|
|
22
|
-
extra?: any;
|
|
23
|
-
}
|
|
24
|
-
|
|
25
|
-
/**
|
|
26
|
-
* Extended configuration for CSV parsing
|
|
3
|
+
* Configuration options for the CSV parser
|
|
27
4
|
*/
|
|
28
5
|
export interface CisvConfig {
|
|
6
|
+
/** Field delimiter character (default: ',') */
|
|
29
7
|
delimiter?: string;
|
|
8
|
+
|
|
9
|
+
/** Quote character (default: '"') */
|
|
30
10
|
quote?: string;
|
|
11
|
+
|
|
12
|
+
/** Escape character (null for RFC4180 "" style, default: null) */
|
|
31
13
|
escape?: string | null;
|
|
14
|
+
|
|
15
|
+
/** Comment character to skip lines (default: null) */
|
|
32
16
|
comment?: string | null;
|
|
17
|
+
|
|
18
|
+
/** Trim whitespace from fields (default: false) */
|
|
33
19
|
trim?: boolean;
|
|
20
|
+
|
|
21
|
+
/** Skip empty lines (default: false) */
|
|
34
22
|
skipEmptyLines?: boolean;
|
|
23
|
+
|
|
24
|
+
/** Use relaxed parsing rules (default: false) */
|
|
35
25
|
relaxed?: boolean;
|
|
26
|
+
|
|
27
|
+
/** Skip lines with parse errors (default: false) */
|
|
36
28
|
skipLinesWithError?: boolean;
|
|
29
|
+
|
|
30
|
+
/** Maximum row size in bytes (0 = unlimited, default: 0) */
|
|
37
31
|
maxRowSize?: number;
|
|
32
|
+
|
|
33
|
+
/** Start parsing from line N (1-based, default: 1) */
|
|
38
34
|
fromLine?: number;
|
|
35
|
+
|
|
36
|
+
/** Stop parsing at line N (0 = until end, default: 0) */
|
|
39
37
|
toLine?: number;
|
|
40
38
|
}
|
|
41
39
|
|
|
42
40
|
/**
|
|
43
|
-
*
|
|
41
|
+
* Parsed row is an array of string values
|
|
44
42
|
*/
|
|
45
|
-
export
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
43
|
+
export type ParsedRow = string[];
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Statistics about the parsing operation
|
|
47
|
+
*/
|
|
48
|
+
export interface ParseStats {
|
|
49
|
+
/** Number of rows parsed */
|
|
50
|
+
rowCount: number;
|
|
51
|
+
|
|
52
|
+
/** Number of fields per row */
|
|
53
|
+
fieldCount: number;
|
|
54
|
+
|
|
55
|
+
/** Total bytes processed */
|
|
56
|
+
totalBytes: number;
|
|
57
|
+
|
|
58
|
+
/** Time taken to parse in milliseconds */
|
|
59
|
+
parseTime: number;
|
|
60
|
+
|
|
61
|
+
/** Current line number being processed */
|
|
62
|
+
currentLine: number;
|
|
52
63
|
}
|
|
53
64
|
|
|
54
65
|
/**
|
|
55
|
-
*
|
|
66
|
+
* Information about registered transforms
|
|
56
67
|
*/
|
|
57
|
-
export interface
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
68
|
+
export interface TransformInfo {
|
|
69
|
+
/** Number of C transforms registered */
|
|
70
|
+
cTransformCount: number;
|
|
71
|
+
|
|
72
|
+
/** Number of JavaScript transforms registered */
|
|
73
|
+
jsTransformCount: number;
|
|
74
|
+
|
|
75
|
+
/** Field indices that have transforms */
|
|
76
|
+
fieldIndices: number[];
|
|
64
77
|
}
|
|
65
78
|
|
|
66
79
|
/**
|
|
67
|
-
* Transform function
|
|
80
|
+
* Transform function signature for field transforms
|
|
68
81
|
*/
|
|
69
|
-
export type
|
|
70
|
-
value: string,
|
|
71
|
-
rowIndex: number,
|
|
72
|
-
fieldIndex: number
|
|
73
|
-
) => string;
|
|
82
|
+
export type FieldTransformFn = (value: string, fieldIndex: number) => string;
|
|
74
83
|
|
|
75
84
|
/**
|
|
76
|
-
*
|
|
85
|
+
* Transform function signature for row transforms
|
|
86
|
+
* @param row - Array of field values
|
|
87
|
+
* @param rowObj - Object with field names as keys (if header is known)
|
|
88
|
+
* @returns Modified row array, object, or null to skip the row
|
|
77
89
|
*/
|
|
78
|
-
export
|
|
79
|
-
|
|
80
|
-
|
|
90
|
+
export type RowTransformFn = (
|
|
91
|
+
row: string[],
|
|
92
|
+
rowObj?: Record<string, string>
|
|
93
|
+
) => string[] | Record<string, string> | null;
|
|
81
94
|
|
|
82
95
|
/**
|
|
83
|
-
*
|
|
96
|
+
* Built-in transform types
|
|
84
97
|
*/
|
|
85
|
-
export
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
98
|
+
export type TransformType =
|
|
99
|
+
| 'uppercase'
|
|
100
|
+
| 'lowercase'
|
|
101
|
+
| 'trim'
|
|
102
|
+
| 'to_int'
|
|
103
|
+
| 'int'
|
|
104
|
+
| 'to_float'
|
|
105
|
+
| 'float'
|
|
106
|
+
| 'hash_sha256'
|
|
107
|
+
| 'sha256'
|
|
108
|
+
| 'base64_encode'
|
|
109
|
+
| 'base64';
|
|
110
|
+
|
|
111
|
+
/**
|
|
112
|
+
* Transform context for advanced transforms
|
|
113
|
+
*/
|
|
114
|
+
export interface TransformContext {
|
|
115
|
+
/** Encryption/hash key if needed */
|
|
116
|
+
key?: string;
|
|
117
|
+
|
|
118
|
+
/** Initialization vector */
|
|
119
|
+
iv?: string;
|
|
120
|
+
|
|
121
|
+
/** Extra context data */
|
|
122
|
+
extra?: any;
|
|
90
123
|
}
|
|
91
124
|
|
|
92
125
|
/**
|
|
93
|
-
*
|
|
126
|
+
* High-performance CSV parser with SIMD optimization
|
|
94
127
|
*/
|
|
95
128
|
export class cisvParser {
|
|
96
|
-
|
|
129
|
+
/**
|
|
130
|
+
* Create a new CSV parser instance
|
|
131
|
+
* @param config - Optional configuration options
|
|
132
|
+
*/
|
|
133
|
+
constructor(config?: CisvConfig);
|
|
97
134
|
|
|
98
135
|
/**
|
|
99
136
|
* Parse CSV file synchronously
|
|
100
|
-
* @param path Path to CSV file
|
|
101
|
-
* @returns Array of rows
|
|
137
|
+
* @param path - Path to CSV file
|
|
138
|
+
* @returns Array of parsed rows
|
|
102
139
|
*/
|
|
103
|
-
parseSync(path: string):
|
|
140
|
+
parseSync(path: string): ParsedRow[];
|
|
104
141
|
|
|
105
142
|
/**
|
|
106
143
|
* Parse CSV file asynchronously
|
|
107
|
-
* @param path Path to CSV file
|
|
108
|
-
* @returns Promise resolving to array of rows
|
|
144
|
+
* @param path - Path to CSV file
|
|
145
|
+
* @returns Promise resolving to array of parsed rows
|
|
109
146
|
*/
|
|
110
|
-
parse(path: string): Promise<
|
|
147
|
+
parse(path: string): Promise<ParsedRow[]>;
|
|
111
148
|
|
|
112
149
|
/**
|
|
113
150
|
* Parse CSV string content
|
|
114
|
-
* @param
|
|
115
|
-
* @returns Array of rows
|
|
151
|
+
* @param csv - CSV string content
|
|
152
|
+
* @returns Array of parsed rows
|
|
116
153
|
*/
|
|
117
|
-
parseString(
|
|
154
|
+
parseString(csv: string): ParsedRow[];
|
|
118
155
|
|
|
119
156
|
/**
|
|
120
|
-
* Write chunk of
|
|
121
|
-
* @param chunk Buffer
|
|
157
|
+
* Write chunk of data for streaming parsing
|
|
158
|
+
* @param chunk - Data chunk as Buffer or string
|
|
122
159
|
*/
|
|
123
|
-
write(chunk:
|
|
160
|
+
write(chunk: Buffer | string): void;
|
|
124
161
|
|
|
125
162
|
/**
|
|
126
|
-
* Signal end of
|
|
163
|
+
* Signal end of streaming data
|
|
127
164
|
*/
|
|
128
165
|
end(): void;
|
|
129
166
|
|
|
130
167
|
/**
|
|
131
|
-
* Get
|
|
168
|
+
* Get accumulated parsed rows
|
|
132
169
|
* @returns Array of parsed rows
|
|
133
170
|
*/
|
|
134
|
-
getRows():
|
|
171
|
+
getRows(): ParsedRow[];
|
|
135
172
|
|
|
136
173
|
/**
|
|
137
|
-
* Clear
|
|
174
|
+
* Clear accumulated data
|
|
138
175
|
*/
|
|
139
176
|
clear(): void;
|
|
140
177
|
|
|
141
178
|
/**
|
|
142
|
-
*
|
|
143
|
-
* @param config
|
|
179
|
+
* Set parser configuration
|
|
180
|
+
* @param config - Configuration options
|
|
144
181
|
*/
|
|
145
182
|
setConfig(config: CisvConfig): void;
|
|
146
183
|
|
|
147
184
|
/**
|
|
148
185
|
* Get current parser configuration
|
|
149
|
-
* @returns
|
|
186
|
+
* @returns Current configuration
|
|
150
187
|
*/
|
|
151
188
|
getConfig(): CisvConfig;
|
|
152
189
|
|
|
153
190
|
/**
|
|
154
|
-
* Add
|
|
155
|
-
* @param
|
|
156
|
-
* @param transform Transform type or custom function
|
|
157
|
-
* @param context Optional transform context
|
|
158
|
-
* @returns
|
|
191
|
+
* Add field transform by index or name
|
|
192
|
+
* @param field - Field index (0-based) or field name, use -1 for all fields
|
|
193
|
+
* @param transform - Transform type or custom function
|
|
194
|
+
* @param context - Optional transform context
|
|
195
|
+
* @returns Parser instance for chaining
|
|
159
196
|
*/
|
|
160
197
|
transform(
|
|
161
|
-
|
|
162
|
-
transform: TransformType |
|
|
198
|
+
field: number | string,
|
|
199
|
+
transform: TransformType | FieldTransformFn,
|
|
163
200
|
context?: TransformContext
|
|
164
201
|
): this;
|
|
165
202
|
|
|
166
203
|
/**
|
|
167
|
-
* Add
|
|
168
|
-
* @param
|
|
169
|
-
* @returns
|
|
170
|
-
*/
|
|
171
|
-
transformMany(transforms: Record<number, TransformType | TransformFunction>): this;
|
|
172
|
-
|
|
173
|
-
/**
|
|
174
|
-
* Remove transformation from a field
|
|
175
|
-
* @param fieldIndex Index of the field
|
|
176
|
-
* @returns this for chaining
|
|
204
|
+
* Add row-level transform
|
|
205
|
+
* @param transform - Row transform function
|
|
206
|
+
* @returns Parser instance for chaining
|
|
177
207
|
*/
|
|
178
|
-
|
|
208
|
+
transformRow(transform: RowTransformFn): this;
|
|
179
209
|
|
|
180
210
|
/**
|
|
181
|
-
*
|
|
182
|
-
* @
|
|
211
|
+
* Set header fields for field name mapping
|
|
212
|
+
* @param fields - Array of field names
|
|
213
|
+
* @returns Parser instance for chaining
|
|
183
214
|
*/
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
/**
|
|
187
|
-
* Apply transformations to existing data
|
|
188
|
-
* @param data Array of rows to transform
|
|
189
|
-
* @returns Transformed data
|
|
190
|
-
*/
|
|
191
|
-
applyTransforms(data: string[][]): string[][];
|
|
215
|
+
setHeader(fields: string[]): this;
|
|
192
216
|
|
|
193
217
|
/**
|
|
194
|
-
*
|
|
195
|
-
* @param
|
|
196
|
-
* @returns
|
|
218
|
+
* Remove transform for specific field
|
|
219
|
+
* @param field - Field index or name
|
|
220
|
+
* @returns Parser instance for chaining
|
|
197
221
|
*/
|
|
198
|
-
|
|
222
|
+
removeTransform(field: number | string): this;
|
|
199
223
|
|
|
200
224
|
/**
|
|
201
|
-
*
|
|
202
|
-
* @
|
|
203
|
-
* @returns this for chaining
|
|
225
|
+
* Clear all transforms
|
|
226
|
+
* @returns Parser instance for chaining
|
|
204
227
|
*/
|
|
205
|
-
|
|
228
|
+
clearTransforms(): this;
|
|
206
229
|
|
|
207
230
|
/**
|
|
208
|
-
* Get statistics
|
|
209
|
-
* @returns
|
|
231
|
+
* Get parsing statistics
|
|
232
|
+
* @returns Statistics object
|
|
210
233
|
*/
|
|
211
234
|
getStats(): ParseStats;
|
|
212
235
|
|
|
213
236
|
/**
|
|
214
|
-
* Get information about
|
|
215
|
-
* @returns
|
|
237
|
+
* Get information about registered transforms
|
|
238
|
+
* @returns Transform information
|
|
216
239
|
*/
|
|
217
240
|
getTransformInfo(): TransformInfo;
|
|
218
241
|
|
|
219
242
|
/**
|
|
220
|
-
* Destroy
|
|
243
|
+
* Destroy parser and free resources
|
|
221
244
|
*/
|
|
222
245
|
destroy(): void;
|
|
223
246
|
|
|
224
247
|
/**
|
|
225
|
-
* Count rows in
|
|
226
|
-
* @param path Path to CSV file
|
|
248
|
+
* Count rows in CSV file without parsing
|
|
249
|
+
* @param path - Path to CSV file
|
|
227
250
|
* @returns Number of rows
|
|
228
251
|
*/
|
|
229
252
|
static countRows(path: string): number;
|
|
230
253
|
|
|
231
254
|
/**
|
|
232
|
-
* Count rows with
|
|
233
|
-
* @param path Path to CSV file
|
|
234
|
-
* @param config
|
|
255
|
+
* Count rows with specific configuration
|
|
256
|
+
* @param path - Path to CSV file
|
|
257
|
+
* @param config - Configuration options
|
|
235
258
|
* @returns Number of rows
|
|
236
259
|
*/
|
|
237
260
|
static countRowsWithConfig(path: string, config?: CisvConfig): number;
|
|
238
|
-
|
|
239
|
-
/**
|
|
240
|
-
* Create a new parser instance with transforms
|
|
241
|
-
* @param options Parse options
|
|
242
|
-
* @returns New parser instance
|
|
243
|
-
*/
|
|
244
|
-
static create(options?: CisvConfig): cisvParser;
|
|
245
|
-
}
|
|
246
|
-
|
|
247
|
-
/**
|
|
248
|
-
* CSV Writer class for generating CSV files
|
|
249
|
-
*/
|
|
250
|
-
export class cisvWriter {
|
|
251
|
-
constructor(options?: WriteOptions);
|
|
252
|
-
|
|
253
|
-
/**
|
|
254
|
-
* Write CSV data to file
|
|
255
|
-
* @param path Output file path
|
|
256
|
-
* @param data Array of rows to write
|
|
257
|
-
*/
|
|
258
|
-
writeSync(path: string, data: any[][]): void;
|
|
259
|
-
|
|
260
|
-
/**
|
|
261
|
-
* Write CSV data to file asynchronously
|
|
262
|
-
* @param path Output file path
|
|
263
|
-
* @param data Array of rows to write
|
|
264
|
-
* @returns Promise that resolves when complete
|
|
265
|
-
*/
|
|
266
|
-
write(path: string, data: any[][]): Promise<void>;
|
|
267
|
-
|
|
268
|
-
/**
|
|
269
|
-
* Convert data to CSV string
|
|
270
|
-
* @param data Array of rows
|
|
271
|
-
* @returns CSV string
|
|
272
|
-
*/
|
|
273
|
-
stringify(data: any[][]): string;
|
|
274
|
-
|
|
275
|
-
/**
|
|
276
|
-
* Stream write rows
|
|
277
|
-
* @param row Single row to write
|
|
278
|
-
*/
|
|
279
|
-
writeRow(row: any[]): void;
|
|
280
|
-
|
|
281
|
-
/**
|
|
282
|
-
* Finish streaming write
|
|
283
|
-
*/
|
|
284
|
-
end(): void;
|
|
285
|
-
|
|
286
|
-
/**
|
|
287
|
-
* Generate test CSV data
|
|
288
|
-
* @param rows Number of rows to generate
|
|
289
|
-
* @param fields Number of fields per row
|
|
290
|
-
* @returns Generated data
|
|
291
|
-
*/
|
|
292
|
-
static generate(rows: number, fields?: number): string[][];
|
|
293
261
|
}
|
|
294
262
|
|
|
295
263
|
/**
|
|
296
|
-
*
|
|
264
|
+
* Transform type constants
|
|
297
265
|
*/
|
|
298
|
-
export
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
* Validate CSV file structure
|
|
308
|
-
* @param path Path to CSV file
|
|
309
|
-
* @returns Validation result
|
|
310
|
-
*/
|
|
311
|
-
export function validate(path: string): {
|
|
312
|
-
valid: boolean;
|
|
313
|
-
errors?: string[];
|
|
314
|
-
warnings?: string[];
|
|
315
|
-
};
|
|
316
|
-
|
|
317
|
-
/**
|
|
318
|
-
* Convert CSV to JSON
|
|
319
|
-
* @param data CSV data
|
|
320
|
-
* @param headers Use first row as headers
|
|
321
|
-
* @returns JSON representation
|
|
322
|
-
*/
|
|
323
|
-
export function toJSON(data: string[][], headers?: boolean): any[];
|
|
324
|
-
|
|
325
|
-
/**
|
|
326
|
-
* Convert JSON to CSV
|
|
327
|
-
* @param data JSON data
|
|
328
|
-
* @returns CSV representation
|
|
329
|
-
*/
|
|
330
|
-
export function fromJSON(data: any[]): string[][];
|
|
331
|
-
|
|
332
|
-
/**
|
|
333
|
-
* Merge multiple CSV files
|
|
334
|
-
* @param paths Array of file paths
|
|
335
|
-
* @param outputPath Output file path
|
|
336
|
-
* @param options Merge options
|
|
337
|
-
*/
|
|
338
|
-
export function merge(
|
|
339
|
-
paths: string[],
|
|
340
|
-
outputPath: string,
|
|
341
|
-
options?: {
|
|
342
|
-
skipHeaders?: boolean;
|
|
343
|
-
delimiter?: string;
|
|
344
|
-
}
|
|
345
|
-
): void;
|
|
346
|
-
|
|
347
|
-
/**
|
|
348
|
-
* Split CSV file into chunks
|
|
349
|
-
* @param path Input file path
|
|
350
|
-
* @param chunkSize Rows per chunk
|
|
351
|
-
* @param outputPrefix Output file prefix
|
|
352
|
-
*/
|
|
353
|
-
export function split(
|
|
354
|
-
path: string,
|
|
355
|
-
chunkSize: number,
|
|
356
|
-
outputPrefix: string
|
|
357
|
-
): string[];
|
|
358
|
-
}
|
|
359
|
-
|
|
360
|
-
/**
|
|
361
|
-
* Performance benchmarking utilities
|
|
362
|
-
*/
|
|
363
|
-
export namespace benchmark {
|
|
364
|
-
/**
|
|
365
|
-
* Run performance benchmark
|
|
366
|
-
* @param path CSV file path
|
|
367
|
-
* @param iterations Number of iterations
|
|
368
|
-
* @returns Benchmark results
|
|
369
|
-
*/
|
|
370
|
-
export function run(
|
|
371
|
-
path: string,
|
|
372
|
-
iterations?: number
|
|
373
|
-
): {
|
|
374
|
-
avgTime: number;
|
|
375
|
-
minTime: number;
|
|
376
|
-
maxTime: number;
|
|
377
|
-
throughput: number;
|
|
378
|
-
};
|
|
379
|
-
|
|
380
|
-
/**
|
|
381
|
-
* Compare parser performance
|
|
382
|
-
* @param paths Array of file paths
|
|
383
|
-
* @returns Comparison results
|
|
384
|
-
*/
|
|
385
|
-
export function compare(paths: string[]): Record<string, any>;
|
|
386
|
-
}
|
|
266
|
+
export const TransformType: {
|
|
267
|
+
readonly UPPERCASE: 'uppercase';
|
|
268
|
+
readonly LOWERCASE: 'lowercase';
|
|
269
|
+
readonly TRIM: 'trim';
|
|
270
|
+
readonly TO_INT: 'to_int';
|
|
271
|
+
readonly TO_FLOAT: 'to_float';
|
|
272
|
+
readonly HASH_SHA256: 'hash_sha256';
|
|
273
|
+
readonly BASE64_ENCODE: 'base64_encode';
|
|
274
|
+
};
|
|
387
275
|
|
|
388
276
|
/**
|
|
389
|
-
*
|
|
277
|
+
* Library version
|
|
390
278
|
*/
|
|
391
|
-
const
|
|
392
|
-
Parser: typeof cisvParser;
|
|
393
|
-
Writer: typeof cisvWriter;
|
|
394
|
-
utils: typeof utils;
|
|
395
|
-
benchmark: typeof benchmark;
|
|
396
|
-
TransformType: typeof TransformType;
|
|
397
|
-
};
|
|
398
|
-
|
|
399
|
-
export default cisv;
|
|
279
|
+
export const version: string;
|
|
400
280
|
}
|