cisv 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,614 @@
1
+ #include <napi.h>
2
+ #include "cisv_parser.h"
3
+ #include "cisv_transformer.h"
4
+ #include <vector>
5
+ #include <memory>
6
+ #include <string>
7
+ #include <unordered_map>
8
+ #include <chrono>
9
+
10
+ namespace {
11
+
12
+ // Extended RowCollector that handles transforms
13
+ struct RowCollector {
14
+ std::vector<std::string> current;
15
+ std::vector<std::vector<std::string>> rows;
16
+ cisv_transform_pipeline_t* pipeline;
17
+ int current_field_index;
18
+
19
+ // JavaScript transforms stored separately
20
+ std::unordered_map<int, Napi::FunctionReference> js_transforms;
21
+ Napi::Env env;
22
+
23
+ RowCollector() : pipeline(nullptr), current_field_index(0), env(nullptr) {
24
+ // DON'T create the pipeline here - do it lazily when needed
25
+ pipeline = nullptr;
26
+ }
27
+
28
+ ~RowCollector() {
29
+ cleanup();
30
+ }
31
+
32
+ void cleanup() {
33
+ if (pipeline) {
34
+ cisv_transform_pipeline_destroy(pipeline);
35
+ pipeline = nullptr;
36
+ }
37
+ js_transforms.clear();
38
+ rows.clear();
39
+ current.clear();
40
+ current_field_index = 0;
41
+ env = nullptr;
42
+ }
43
+
44
+ // Lazy initialization of pipeline
45
+ void ensurePipeline() {
46
+ if (!pipeline) {
47
+ pipeline = cisv_transform_pipeline_create(16);
48
+ }
49
+ }
50
+
51
+ // Apply both C and JS transforms
52
+ std::string applyTransforms(const char* data, size_t len, int field_index) {
53
+ std::string result(data, len);
54
+
55
+ // First apply C transforms
56
+ if (pipeline && pipeline->count > 0) {
57
+ cisv_transform_result_t c_result = cisv_transform_apply(
58
+ pipeline,
59
+ field_index,
60
+ result.c_str(),
61
+ result.length()
62
+ );
63
+
64
+ if (c_result.data) {
65
+ result = std::string(c_result.data, c_result.len);
66
+ // IMPORTANT: Free the result if it was allocated
67
+ if (c_result.needs_free) {
68
+ cisv_transform_result_free(&c_result);
69
+ }
70
+ }
71
+ }
72
+
73
+ // Then apply JavaScript transforms if we have an environment
74
+ if (env) {
75
+ // Apply field-specific transform
76
+ auto it = js_transforms.find(field_index);
77
+ if (it != js_transforms.end() && !it->second.IsEmpty()) {
78
+ try {
79
+ Napi::String input = Napi::String::New(env, result);
80
+ Napi::Number field = Napi::Number::New(env, field_index);
81
+
82
+ Napi::Value js_result = it->second.Call({input, field});
83
+
84
+ if (js_result.IsString()) {
85
+ result = js_result.As<Napi::String>().Utf8Value();
86
+ }
87
+ } catch (...) {
88
+ // Keep original result if JS transform fails
89
+ }
90
+ }
91
+
92
+ // Apply transforms that apply to all fields (-1 index)
93
+ auto it_all = js_transforms.find(-1);
94
+ if (it_all != js_transforms.end() && !it_all->second.IsEmpty()) {
95
+ try {
96
+ Napi::String input = Napi::String::New(env, result);
97
+ Napi::Number field = Napi::Number::New(env, field_index);
98
+
99
+ Napi::Value js_result = it_all->second.Call({input, field});
100
+
101
+ if (js_result.IsString()) {
102
+ result = js_result.As<Napi::String>().Utf8Value();
103
+ }
104
+ } catch (...) {
105
+ // Keep original result if JS transform fails
106
+ }
107
+ }
108
+ }
109
+
110
+ return result;
111
+ }
112
+ };
113
+
114
+ static void field_cb(void *user, const char *data, size_t len) {
115
+ auto *rc = reinterpret_cast<RowCollector *>(user);
116
+
117
+ // Apply all transforms (C and JS)
118
+ std::string transformed = rc->applyTransforms(data, len, rc->current_field_index);
119
+ rc->current.emplace_back(transformed);
120
+ rc->current_field_index++;
121
+ }
122
+
123
+ static void row_cb(void *user) {
124
+ auto *rc = reinterpret_cast<RowCollector *>(user);
125
+ rc->rows.emplace_back(std::move(rc->current));
126
+ rc->current.clear();
127
+ rc->current_field_index = 0; // Reset field index for next row
128
+ }
129
+
130
+ } // namespace
131
+
132
+ class CisvParser : public Napi::ObjectWrap<CisvParser> {
133
+ public:
134
+ static Napi::Object Init(Napi::Env env, Napi::Object exports) {
135
+ Napi::Function func = DefineClass(env, "cisvParser", {
136
+ InstanceMethod("parseSync", &CisvParser::ParseSync),
137
+ InstanceMethod("parse", &CisvParser::ParseAsync),
138
+ InstanceMethod("parseString", &CisvParser::ParseString),
139
+ InstanceMethod("write", &CisvParser::Write),
140
+ InstanceMethod("end", &CisvParser::End),
141
+ InstanceMethod("getRows", &CisvParser::GetRows),
142
+ InstanceMethod("clear", &CisvParser::Clear),
143
+ InstanceMethod("transform", &CisvParser::Transform),
144
+ InstanceMethod("removeTransform", &CisvParser::RemoveTransform),
145
+ InstanceMethod("clearTransforms", &CisvParser::ClearTransforms),
146
+ InstanceMethod("getStats", &CisvParser::GetStats),
147
+ InstanceMethod("getTransformInfo", &CisvParser::GetTransformInfo),
148
+ InstanceMethod("destroy", &CisvParser::Destroy), // Add explicit destroy
149
+ StaticMethod("countRows", &CisvParser::CountRows)
150
+ });
151
+
152
+ exports.Set("cisvParser", func);
153
+ return exports;
154
+ }
155
+
156
+ CisvParser(const Napi::CallbackInfo &info) : Napi::ObjectWrap<CisvParser>(info) {
157
+ rc_ = new RowCollector();
158
+ parser_ = cisv_parser_create(field_cb, row_cb, rc_);
159
+ parse_time_ = 0;
160
+ total_bytes_ = 0;
161
+ is_destroyed_ = false;
162
+
163
+ // Handle constructor options if provided
164
+ if (info.Length() > 0 && info[0].IsObject()) {
165
+ Napi::Object options = info[0].As<Napi::Object>();
166
+
167
+ // Handle options...
168
+ }
169
+ }
170
+
171
+ ~CisvParser() {
172
+ Cleanup();
173
+ }
174
+
175
+ // Explicit cleanup method
176
+ void Cleanup() {
177
+ if (!is_destroyed_) {
178
+ if (parser_) {
179
+ cisv_parser_destroy(parser_);
180
+ parser_ = nullptr;
181
+ }
182
+ if (rc_) {
183
+ rc_->cleanup();
184
+ delete rc_;
185
+ rc_ = nullptr;
186
+ }
187
+ is_destroyed_ = true;
188
+ }
189
+ }
190
+
191
+ // Explicit destroy method callable from JavaScript
192
+ void Destroy(const Napi::CallbackInfo &info) {
193
+ Cleanup();
194
+ }
195
+
196
+ // Synchronous file parsing
197
+ Napi::Value ParseSync(const Napi::CallbackInfo &info) {
198
+ Napi::Env env = info.Env();
199
+
200
+ if (is_destroyed_) {
201
+ throw Napi::Error::New(env, "Parser has been destroyed");
202
+ }
203
+
204
+ if (info.Length() != 1 || !info[0].IsString()) {
205
+ throw Napi::TypeError::New(env, "Expected file path string");
206
+ }
207
+
208
+ std::string path = info[0].As<Napi::String>();
209
+
210
+ auto start = std::chrono::high_resolution_clock::now();
211
+
212
+ // Clear previous data
213
+ rc_->rows.clear();
214
+ rc_->current.clear();
215
+ rc_->current_field_index = 0;
216
+
217
+ // Set environment for JS transforms
218
+ rc_->env = env;
219
+
220
+ int result = cisv_parser_parse_file(parser_, path.c_str());
221
+
222
+ auto end = std::chrono::high_resolution_clock::now();
223
+ parse_time_ = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
224
+
225
+ // Clear the environment reference after parsing
226
+ rc_->env = nullptr;
227
+
228
+ if (result < 0) {
229
+ throw Napi::Error::New(env, "parse error: " + std::to_string(result));
230
+ }
231
+
232
+ return drainRows(env);
233
+ }
234
+
235
+ // Parse string content
236
+ Napi::Value ParseString(const Napi::CallbackInfo &info) {
237
+ Napi::Env env = info.Env();
238
+
239
+ if (is_destroyed_) {
240
+ throw Napi::Error::New(env, "Parser has been destroyed");
241
+ }
242
+
243
+ if (info.Length() != 1 || !info[0].IsString()) {
244
+ throw Napi::TypeError::New(env, "Expected CSV string");
245
+ }
246
+
247
+ std::string content = info[0].As<Napi::String>();
248
+
249
+ // Clear previous data
250
+ rc_->rows.clear();
251
+ rc_->current.clear();
252
+ rc_->current_field_index = 0;
253
+
254
+ // Set environment for JS transforms
255
+ rc_->env = env;
256
+
257
+ // Write the string content as chunks
258
+ cisv_parser_write(parser_, (const uint8_t*)content.c_str(), content.length());
259
+ cisv_parser_end(parser_);
260
+
261
+ total_bytes_ = content.length();
262
+
263
+ // Clear the environment reference after parsing
264
+ rc_->env = nullptr;
265
+
266
+ return drainRows(env);
267
+ }
268
+
269
+ // Write chunk for streaming
270
+ void Write(const Napi::CallbackInfo &info) {
271
+ Napi::Env env = info.Env();
272
+
273
+ if (is_destroyed_) {
274
+ throw Napi::Error::New(env, "Parser has been destroyed");
275
+ }
276
+
277
+ if (info.Length() != 1) {
278
+ throw Napi::TypeError::New(env, "Expected one argument");
279
+ }
280
+
281
+ // Set environment for JS transforms
282
+ rc_->env = env;
283
+
284
+ if (info[0].IsBuffer()) {
285
+ auto buf = info[0].As<Napi::Buffer<uint8_t>>();
286
+ cisv_parser_write(parser_, buf.Data(), buf.Length());
287
+ total_bytes_ += buf.Length();
288
+ return;
289
+ }
290
+
291
+ if (info[0].IsString()) {
292
+ std::string chunk = info[0].As<Napi::String>();
293
+ cisv_parser_write(parser_, reinterpret_cast<const uint8_t*>(chunk.data()), chunk.size());
294
+ total_bytes_ += chunk.size();
295
+ return;
296
+ }
297
+
298
+ throw Napi::TypeError::New(env, "Expected Buffer or String");
299
+ }
300
+
301
+ void End(const Napi::CallbackInfo &info) {
302
+ if (!is_destroyed_) {
303
+ cisv_parser_end(parser_);
304
+ // Clear the environment reference after ending
305
+ rc_->env = nullptr;
306
+ }
307
+ }
308
+
309
+ Napi::Value GetRows(const Napi::CallbackInfo &info) {
310
+ if (is_destroyed_) {
311
+ Napi::Env env = info.Env();
312
+ throw Napi::Error::New(env, "Parser has been destroyed");
313
+ }
314
+ return drainRows(info.Env());
315
+ }
316
+
317
+ void Clear(const Napi::CallbackInfo &info) {
318
+ if (!is_destroyed_ && rc_) {
319
+ rc_->rows.clear();
320
+ rc_->current.clear();
321
+ rc_->current_field_index = 0;
322
+ total_bytes_ = 0;
323
+ parse_time_ = 0;
324
+ // Also clear the environment reference
325
+ rc_->env = nullptr;
326
+ }
327
+ }
328
+
329
+ // Add transform using native C transformer or JavaScript function
330
+ Napi::Value Transform(const Napi::CallbackInfo &info) {
331
+ Napi::Env env = info.Env();
332
+
333
+ if (is_destroyed_) {
334
+ throw Napi::Error::New(env, "Parser has been destroyed");
335
+ }
336
+
337
+ if (info.Length() < 2) {
338
+ throw Napi::TypeError::New(env, "Expected field index and transform type/function");
339
+ }
340
+
341
+ if (!info[0].IsNumber()) {
342
+ throw Napi::TypeError::New(env, "Field index must be a number");
343
+ }
344
+
345
+ int field_index = info[0].As<Napi::Number>().Int32Value();
346
+
347
+ // Ensure pipeline exists (lazy initialization)
348
+ rc_->ensurePipeline();
349
+
350
+ // Store the environment
351
+ rc_->env = env;
352
+
353
+ // Handle string transform types - using the actual C transformer
354
+ if (info[1].IsString()) {
355
+ std::string transform_type = info[1].As<Napi::String>();
356
+ cisv_transform_type_t type;
357
+
358
+ // Map string to C enum
359
+ if (transform_type == "uppercase") {
360
+ type = TRANSFORM_UPPERCASE;
361
+ } else if (transform_type == "lowercase") {
362
+ type = TRANSFORM_LOWERCASE;
363
+ } else if (transform_type == "trim") {
364
+ type = TRANSFORM_TRIM;
365
+ } else if (transform_type == "to_int" || transform_type == "int") {
366
+ type = TRANSFORM_TO_INT;
367
+ } else if (transform_type == "to_float" || transform_type == "float") {
368
+ type = TRANSFORM_TO_FLOAT;
369
+ } else if (transform_type == "hash_sha256" || transform_type == "sha256") {
370
+ type = TRANSFORM_HASH_SHA256;
371
+ } else if (transform_type == "base64_encode" || transform_type == "base64") {
372
+ type = TRANSFORM_BASE64_ENCODE;
373
+ } else {
374
+ throw Napi::Error::New(env, "Unknown transform type: " + transform_type);
375
+ }
376
+
377
+ // Create context if provided
378
+ cisv_transform_context_t* ctx = nullptr;
379
+ if (info.Length() >= 3 && info[2].IsObject()) {
380
+ Napi::Object context_obj = info[2].As<Napi::Object>();
381
+ ctx = (cisv_transform_context_t*)calloc(1, sizeof(cisv_transform_context_t));
382
+
383
+ // Extract context properties if they exist
384
+ if (context_obj.Has("key")) {
385
+ Napi::Value key_val = context_obj.Get("key");
386
+ if (key_val.IsString()) {
387
+ std::string key = key_val.As<Napi::String>();
388
+ ctx->key = strdup(key.c_str());
389
+ ctx->key_len = key.length();
390
+ }
391
+ }
392
+
393
+ if (context_obj.Has("iv")) {
394
+ Napi::Value iv_val = context_obj.Get("iv");
395
+ if (iv_val.IsString()) {
396
+ std::string iv = iv_val.As<Napi::String>();
397
+ ctx->iv = strdup(iv.c_str());
398
+ ctx->iv_len = iv.length();
399
+ }
400
+ }
401
+ }
402
+
403
+ // Add to the C transform pipeline
404
+ if (cisv_transform_pipeline_add(rc_->pipeline, field_index, type, ctx) < 0) {
405
+ // Clean up context if adding failed
406
+ if (ctx) {
407
+ if (ctx->key) free(ctx->key);
408
+ if (ctx->iv) free(ctx->iv);
409
+ if (ctx->extra) free(ctx->extra);
410
+ free(ctx);
411
+ }
412
+ throw Napi::Error::New(env, "Failed to add transform");
413
+ }
414
+
415
+ } else if (info[1].IsFunction()) {
416
+ // Handle JavaScript function transforms
417
+ Napi::Function func = info[1].As<Napi::Function>();
418
+
419
+ // Store the function reference for this field
420
+ rc_->js_transforms[field_index] = Napi::Persistent(func);
421
+
422
+ } else {
423
+ throw Napi::TypeError::New(env, "Transform must be a string type or function");
424
+ }
425
+
426
+ return info.This(); // Return this for chaining
427
+ }
428
+
429
+ Napi::Value RemoveTransform(const Napi::CallbackInfo &info) {
430
+ Napi::Env env = info.Env();
431
+
432
+ if (is_destroyed_) {
433
+ throw Napi::Error::New(env, "Parser has been destroyed");
434
+ }
435
+
436
+ if (info.Length() != 1 || !info[0].IsNumber()) {
437
+ throw Napi::TypeError::New(env, "Expected field index");
438
+ }
439
+
440
+ int field_index = info[0].As<Napi::Number>().Int32Value();
441
+
442
+ // Remove from JavaScript transforms
443
+ rc_->js_transforms.erase(field_index);
444
+
445
+ // TODO: Implement removal of C transforms in cisv_transformer.c
446
+ // For now, this only removes JS transforms
447
+
448
+ return info.This();
449
+ }
450
+
451
+ Napi::Value ClearTransforms(const Napi::CallbackInfo &info) {
452
+ if (is_destroyed_) {
453
+ Napi::Env env = info.Env();
454
+ throw Napi::Error::New(env, "Parser has been destroyed");
455
+ }
456
+
457
+ // Clear JavaScript transforms
458
+ rc_->js_transforms.clear();
459
+
460
+ // Clear C transforms - destroy and DON'T recreate pipeline yet
461
+ if (rc_->pipeline) {
462
+ cisv_transform_pipeline_destroy(rc_->pipeline);
463
+ rc_->pipeline = nullptr; // Will be recreated lazily when needed
464
+ }
465
+
466
+ return info.This();
467
+ }
468
+
469
+ // Async file parsing (returns a Promise)
470
+ Napi::Value ParseAsync(const Napi::CallbackInfo &info) {
471
+ Napi::Env env = info.Env();
472
+
473
+ if (is_destroyed_) {
474
+ throw Napi::Error::New(env, "Parser has been destroyed");
475
+ }
476
+
477
+ if (info.Length() != 1 || !info[0].IsString()) {
478
+ throw Napi::TypeError::New(env, "Expected file path string");
479
+ }
480
+
481
+ std::string path = info[0].As<Napi::String>();
482
+
483
+ // Create a promise
484
+ auto deferred = Napi::Promise::Deferred::New(env);
485
+
486
+ // For simplicity, we'll use sync parsing here
487
+ // In production, this should use worker threads
488
+ try {
489
+ Napi::Value result = ParseSync(info);
490
+ deferred.Resolve(result);
491
+ } catch (const Napi::Error& e) {
492
+ deferred.Reject(e.Value());
493
+ }
494
+
495
+ return deferred.Promise();
496
+ }
497
+
498
+ // Get information about registered transforms
499
+ Napi::Value GetTransformInfo(const Napi::CallbackInfo &info) {
500
+ Napi::Env env = info.Env();
501
+
502
+ if (is_destroyed_) {
503
+ throw Napi::Error::New(env, "Parser has been destroyed");
504
+ }
505
+
506
+ Napi::Object result = Napi::Object::New(env);
507
+
508
+ // Count C transforms
509
+ size_t c_transform_count = (rc_ && rc_->pipeline) ? rc_->pipeline->count : 0;
510
+ result.Set("cTransformCount", Napi::Number::New(env, c_transform_count));
511
+
512
+ // Count JS transforms
513
+ size_t js_transform_count = rc_ ? rc_->js_transforms.size() : 0;
514
+ result.Set("jsTransformCount", Napi::Number::New(env, js_transform_count));
515
+
516
+ // List field indices with transforms
517
+ Napi::Array fields = Napi::Array::New(env);
518
+ size_t idx = 0;
519
+
520
+ // Add JS transform field indices
521
+ if (rc_) {
522
+ for (const auto& pair : rc_->js_transforms) {
523
+ fields[idx++] = Napi::Number::New(env, pair.first);
524
+ }
525
+ }
526
+
527
+ result.Set("fieldIndices", fields);
528
+
529
+ return result;
530
+ }
531
+
532
+ Napi::Value GetStats(const Napi::CallbackInfo &info) {
533
+ Napi::Env env = info.Env();
534
+
535
+ if (is_destroyed_) {
536
+ throw Napi::Error::New(env, "Parser has been destroyed");
537
+ }
538
+
539
+ Napi::Object stats = Napi::Object::New(env);
540
+
541
+ stats.Set("rowCount", Napi::Number::New(env, rc_ ? rc_->rows.size() : 0));
542
+ stats.Set("fieldCount", Napi::Number::New(env,
543
+ (rc_ && !rc_->rows.empty()) ? rc_->rows[0].size() : 0));
544
+ stats.Set("totalBytes", Napi::Number::New(env, total_bytes_));
545
+ stats.Set("parseTime", Napi::Number::New(env, parse_time_));
546
+
547
+ return stats;
548
+ }
549
+
550
+ // Static method to count rows
551
+ static Napi::Value CountRows(const Napi::CallbackInfo &info) {
552
+ Napi::Env env = info.Env();
553
+
554
+ if (info.Length() != 1 || !info[0].IsString()) {
555
+ throw Napi::TypeError::New(env, "Expected file path string");
556
+ }
557
+
558
+ std::string path = info[0].As<Napi::String>();
559
+ size_t count = cisv_parser_count_rows(path.c_str());
560
+
561
+ return Napi::Number::New(env, count);
562
+ }
563
+
564
+ private:
565
+ Napi::Value drainRows(Napi::Env env) {
566
+ if (!rc_) {
567
+ return Napi::Array::New(env, 0);
568
+ }
569
+
570
+ Napi::Array rows = Napi::Array::New(env, rc_->rows.size());
571
+
572
+ for (size_t i = 0; i < rc_->rows.size(); ++i) {
573
+ Napi::Array row = Napi::Array::New(env, rc_->rows[i].size());
574
+ for (size_t j = 0; j < rc_->rows[i].size(); ++j) {
575
+ row[j] = Napi::String::New(env, rc_->rows[i][j]);
576
+ }
577
+ rows[i] = row;
578
+ }
579
+
580
+ // Don't clear here if we want to keep data for multiple reads
581
+ // rc_->rows.clear();
582
+
583
+ return rows;
584
+ }
585
+
586
+ cisv_parser *parser_;
587
+ RowCollector *rc_;
588
+ size_t total_bytes_;
589
+ double parse_time_;
590
+ bool is_destroyed_;
591
+ };
592
+
593
+ // Initialize all exports
594
+ Napi::Object InitAll(Napi::Env env, Napi::Object exports) {
595
+ CisvParser::Init(env, exports);
596
+
597
+ // Add version info
598
+ exports.Set("version", Napi::String::New(env, "1.0.0"));
599
+
600
+ // Add transform type constants
601
+ Napi::Object transformTypes = Napi::Object::New(env);
602
+ transformTypes.Set("UPPERCASE", Napi::String::New(env, "uppercase"));
603
+ transformTypes.Set("LOWERCASE", Napi::String::New(env, "lowercase"));
604
+ transformTypes.Set("TRIM", Napi::String::New(env, "trim"));
605
+ transformTypes.Set("TO_INT", Napi::String::New(env, "to_int"));
606
+ transformTypes.Set("TO_FLOAT", Napi::String::New(env, "to_float"));
607
+ transformTypes.Set("HASH_SHA256", Napi::String::New(env, "hash_sha256"));
608
+ transformTypes.Set("BASE64_ENCODE", Napi::String::New(env, "base64_encode"));
609
+ exports.Set("TransformType", transformTypes);
610
+
611
+ return exports;
612
+ }
613
+
614
+ NODE_API_MODULE(cisv, InitAll)