cisv 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/ci.yml +158 -0
- package/.github/workflows/release.yml +167 -0
- package/Dockerfile +63 -0
- package/LICENSE +7 -0
- package/Makefile +160 -0
- package/README.md +249 -0
- package/SIMD_benchmarks.md +658 -0
- package/benchmark/benchmark.js +287 -0
- package/benchmark_cli_reader.sh +236 -0
- package/benchmark_cli_writer.sh +280 -0
- package/binding.gyp +57 -0
- package/debug-addon.js +64 -0
- package/examples/basic-parse.js +65 -0
- package/examples/large-file.js +35 -0
- package/examples/transform.js +152 -0
- package/examples/typescript.ts +38 -0
- package/index.d.ts +336 -0
- package/install_benchmark_deps.sh +156 -0
- package/package.json +47 -0
- package/run_benchmarks.sh +53 -0
- package/src/cisv_addon.cc +614 -0
- package/src/cisv_parser.c +988 -0
- package/src/cisv_parser.h +55 -0
- package/src/cisv_simd.h +53 -0
- package/src/cisv_transformer.c +537 -0
- package/src/cisv_transformer.h +145 -0
- package/src/cisv_writer.c +535 -0
- package/src/cisv_writer.h +60 -0
- package/src/index.ts +2 -0
- package/src/test/typescript.test.ts +43 -0
- package/src/win_getopt.h +100 -0
- package/src/win_sys_time.h +50 -0
- package/test/basic.test.js +104 -0
- package/test_select.sh +92 -0
- package/test_transform.sh +167 -0
- package/test_transform_leak_test.js +94 -0
- package/tsconfig.json +17 -0
- package/types/cisv.d.ts +8 -0
- package/valgrind-node.supp +69 -0
|
@@ -0,0 +1,614 @@
|
|
|
1
|
+
#include <napi.h>
|
|
2
|
+
#include "cisv_parser.h"
|
|
3
|
+
#include "cisv_transformer.h"
|
|
4
|
+
#include <vector>
|
|
5
|
+
#include <memory>
|
|
6
|
+
#include <string>
|
|
7
|
+
#include <unordered_map>
|
|
8
|
+
#include <chrono>
|
|
9
|
+
|
|
10
|
+
namespace {
|
|
11
|
+
|
|
12
|
+
// Extended RowCollector that handles transforms
|
|
13
|
+
struct RowCollector {
|
|
14
|
+
std::vector<std::string> current;
|
|
15
|
+
std::vector<std::vector<std::string>> rows;
|
|
16
|
+
cisv_transform_pipeline_t* pipeline;
|
|
17
|
+
int current_field_index;
|
|
18
|
+
|
|
19
|
+
// JavaScript transforms stored separately
|
|
20
|
+
std::unordered_map<int, Napi::FunctionReference> js_transforms;
|
|
21
|
+
Napi::Env env;
|
|
22
|
+
|
|
23
|
+
RowCollector() : pipeline(nullptr), current_field_index(0), env(nullptr) {
|
|
24
|
+
// DON'T create the pipeline here - do it lazily when needed
|
|
25
|
+
pipeline = nullptr;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
~RowCollector() {
|
|
29
|
+
cleanup();
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
void cleanup() {
|
|
33
|
+
if (pipeline) {
|
|
34
|
+
cisv_transform_pipeline_destroy(pipeline);
|
|
35
|
+
pipeline = nullptr;
|
|
36
|
+
}
|
|
37
|
+
js_transforms.clear();
|
|
38
|
+
rows.clear();
|
|
39
|
+
current.clear();
|
|
40
|
+
current_field_index = 0;
|
|
41
|
+
env = nullptr;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
// Lazy initialization of pipeline
|
|
45
|
+
void ensurePipeline() {
|
|
46
|
+
if (!pipeline) {
|
|
47
|
+
pipeline = cisv_transform_pipeline_create(16);
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
// Apply both C and JS transforms
|
|
52
|
+
std::string applyTransforms(const char* data, size_t len, int field_index) {
|
|
53
|
+
std::string result(data, len);
|
|
54
|
+
|
|
55
|
+
// First apply C transforms
|
|
56
|
+
if (pipeline && pipeline->count > 0) {
|
|
57
|
+
cisv_transform_result_t c_result = cisv_transform_apply(
|
|
58
|
+
pipeline,
|
|
59
|
+
field_index,
|
|
60
|
+
result.c_str(),
|
|
61
|
+
result.length()
|
|
62
|
+
);
|
|
63
|
+
|
|
64
|
+
if (c_result.data) {
|
|
65
|
+
result = std::string(c_result.data, c_result.len);
|
|
66
|
+
// IMPORTANT: Free the result if it was allocated
|
|
67
|
+
if (c_result.needs_free) {
|
|
68
|
+
cisv_transform_result_free(&c_result);
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
// Then apply JavaScript transforms if we have an environment
|
|
74
|
+
if (env) {
|
|
75
|
+
// Apply field-specific transform
|
|
76
|
+
auto it = js_transforms.find(field_index);
|
|
77
|
+
if (it != js_transforms.end() && !it->second.IsEmpty()) {
|
|
78
|
+
try {
|
|
79
|
+
Napi::String input = Napi::String::New(env, result);
|
|
80
|
+
Napi::Number field = Napi::Number::New(env, field_index);
|
|
81
|
+
|
|
82
|
+
Napi::Value js_result = it->second.Call({input, field});
|
|
83
|
+
|
|
84
|
+
if (js_result.IsString()) {
|
|
85
|
+
result = js_result.As<Napi::String>().Utf8Value();
|
|
86
|
+
}
|
|
87
|
+
} catch (...) {
|
|
88
|
+
// Keep original result if JS transform fails
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
// Apply transforms that apply to all fields (-1 index)
|
|
93
|
+
auto it_all = js_transforms.find(-1);
|
|
94
|
+
if (it_all != js_transforms.end() && !it_all->second.IsEmpty()) {
|
|
95
|
+
try {
|
|
96
|
+
Napi::String input = Napi::String::New(env, result);
|
|
97
|
+
Napi::Number field = Napi::Number::New(env, field_index);
|
|
98
|
+
|
|
99
|
+
Napi::Value js_result = it_all->second.Call({input, field});
|
|
100
|
+
|
|
101
|
+
if (js_result.IsString()) {
|
|
102
|
+
result = js_result.As<Napi::String>().Utf8Value();
|
|
103
|
+
}
|
|
104
|
+
} catch (...) {
|
|
105
|
+
// Keep original result if JS transform fails
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
return result;
|
|
111
|
+
}
|
|
112
|
+
};
|
|
113
|
+
|
|
114
|
+
static void field_cb(void *user, const char *data, size_t len) {
|
|
115
|
+
auto *rc = reinterpret_cast<RowCollector *>(user);
|
|
116
|
+
|
|
117
|
+
// Apply all transforms (C and JS)
|
|
118
|
+
std::string transformed = rc->applyTransforms(data, len, rc->current_field_index);
|
|
119
|
+
rc->current.emplace_back(transformed);
|
|
120
|
+
rc->current_field_index++;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
static void row_cb(void *user) {
|
|
124
|
+
auto *rc = reinterpret_cast<RowCollector *>(user);
|
|
125
|
+
rc->rows.emplace_back(std::move(rc->current));
|
|
126
|
+
rc->current.clear();
|
|
127
|
+
rc->current_field_index = 0; // Reset field index for next row
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
} // namespace
|
|
131
|
+
|
|
132
|
+
class CisvParser : public Napi::ObjectWrap<CisvParser> {
|
|
133
|
+
public:
|
|
134
|
+
static Napi::Object Init(Napi::Env env, Napi::Object exports) {
|
|
135
|
+
Napi::Function func = DefineClass(env, "cisvParser", {
|
|
136
|
+
InstanceMethod("parseSync", &CisvParser::ParseSync),
|
|
137
|
+
InstanceMethod("parse", &CisvParser::ParseAsync),
|
|
138
|
+
InstanceMethod("parseString", &CisvParser::ParseString),
|
|
139
|
+
InstanceMethod("write", &CisvParser::Write),
|
|
140
|
+
InstanceMethod("end", &CisvParser::End),
|
|
141
|
+
InstanceMethod("getRows", &CisvParser::GetRows),
|
|
142
|
+
InstanceMethod("clear", &CisvParser::Clear),
|
|
143
|
+
InstanceMethod("transform", &CisvParser::Transform),
|
|
144
|
+
InstanceMethod("removeTransform", &CisvParser::RemoveTransform),
|
|
145
|
+
InstanceMethod("clearTransforms", &CisvParser::ClearTransforms),
|
|
146
|
+
InstanceMethod("getStats", &CisvParser::GetStats),
|
|
147
|
+
InstanceMethod("getTransformInfo", &CisvParser::GetTransformInfo),
|
|
148
|
+
InstanceMethod("destroy", &CisvParser::Destroy), // Add explicit destroy
|
|
149
|
+
StaticMethod("countRows", &CisvParser::CountRows)
|
|
150
|
+
});
|
|
151
|
+
|
|
152
|
+
exports.Set("cisvParser", func);
|
|
153
|
+
return exports;
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
CisvParser(const Napi::CallbackInfo &info) : Napi::ObjectWrap<CisvParser>(info) {
|
|
157
|
+
rc_ = new RowCollector();
|
|
158
|
+
parser_ = cisv_parser_create(field_cb, row_cb, rc_);
|
|
159
|
+
parse_time_ = 0;
|
|
160
|
+
total_bytes_ = 0;
|
|
161
|
+
is_destroyed_ = false;
|
|
162
|
+
|
|
163
|
+
// Handle constructor options if provided
|
|
164
|
+
if (info.Length() > 0 && info[0].IsObject()) {
|
|
165
|
+
Napi::Object options = info[0].As<Napi::Object>();
|
|
166
|
+
|
|
167
|
+
// Handle options...
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
~CisvParser() {
|
|
172
|
+
Cleanup();
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
// Explicit cleanup method
|
|
176
|
+
void Cleanup() {
|
|
177
|
+
if (!is_destroyed_) {
|
|
178
|
+
if (parser_) {
|
|
179
|
+
cisv_parser_destroy(parser_);
|
|
180
|
+
parser_ = nullptr;
|
|
181
|
+
}
|
|
182
|
+
if (rc_) {
|
|
183
|
+
rc_->cleanup();
|
|
184
|
+
delete rc_;
|
|
185
|
+
rc_ = nullptr;
|
|
186
|
+
}
|
|
187
|
+
is_destroyed_ = true;
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
// Explicit destroy method callable from JavaScript
|
|
192
|
+
void Destroy(const Napi::CallbackInfo &info) {
|
|
193
|
+
Cleanup();
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
// Synchronous file parsing
|
|
197
|
+
Napi::Value ParseSync(const Napi::CallbackInfo &info) {
|
|
198
|
+
Napi::Env env = info.Env();
|
|
199
|
+
|
|
200
|
+
if (is_destroyed_) {
|
|
201
|
+
throw Napi::Error::New(env, "Parser has been destroyed");
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
if (info.Length() != 1 || !info[0].IsString()) {
|
|
205
|
+
throw Napi::TypeError::New(env, "Expected file path string");
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
std::string path = info[0].As<Napi::String>();
|
|
209
|
+
|
|
210
|
+
auto start = std::chrono::high_resolution_clock::now();
|
|
211
|
+
|
|
212
|
+
// Clear previous data
|
|
213
|
+
rc_->rows.clear();
|
|
214
|
+
rc_->current.clear();
|
|
215
|
+
rc_->current_field_index = 0;
|
|
216
|
+
|
|
217
|
+
// Set environment for JS transforms
|
|
218
|
+
rc_->env = env;
|
|
219
|
+
|
|
220
|
+
int result = cisv_parser_parse_file(parser_, path.c_str());
|
|
221
|
+
|
|
222
|
+
auto end = std::chrono::high_resolution_clock::now();
|
|
223
|
+
parse_time_ = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
|
|
224
|
+
|
|
225
|
+
// Clear the environment reference after parsing
|
|
226
|
+
rc_->env = nullptr;
|
|
227
|
+
|
|
228
|
+
if (result < 0) {
|
|
229
|
+
throw Napi::Error::New(env, "parse error: " + std::to_string(result));
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
return drainRows(env);
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
// Parse string content
|
|
236
|
+
Napi::Value ParseString(const Napi::CallbackInfo &info) {
|
|
237
|
+
Napi::Env env = info.Env();
|
|
238
|
+
|
|
239
|
+
if (is_destroyed_) {
|
|
240
|
+
throw Napi::Error::New(env, "Parser has been destroyed");
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
if (info.Length() != 1 || !info[0].IsString()) {
|
|
244
|
+
throw Napi::TypeError::New(env, "Expected CSV string");
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
std::string content = info[0].As<Napi::String>();
|
|
248
|
+
|
|
249
|
+
// Clear previous data
|
|
250
|
+
rc_->rows.clear();
|
|
251
|
+
rc_->current.clear();
|
|
252
|
+
rc_->current_field_index = 0;
|
|
253
|
+
|
|
254
|
+
// Set environment for JS transforms
|
|
255
|
+
rc_->env = env;
|
|
256
|
+
|
|
257
|
+
// Write the string content as chunks
|
|
258
|
+
cisv_parser_write(parser_, (const uint8_t*)content.c_str(), content.length());
|
|
259
|
+
cisv_parser_end(parser_);
|
|
260
|
+
|
|
261
|
+
total_bytes_ = content.length();
|
|
262
|
+
|
|
263
|
+
// Clear the environment reference after parsing
|
|
264
|
+
rc_->env = nullptr;
|
|
265
|
+
|
|
266
|
+
return drainRows(env);
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
// Write chunk for streaming
|
|
270
|
+
void Write(const Napi::CallbackInfo &info) {
|
|
271
|
+
Napi::Env env = info.Env();
|
|
272
|
+
|
|
273
|
+
if (is_destroyed_) {
|
|
274
|
+
throw Napi::Error::New(env, "Parser has been destroyed");
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
if (info.Length() != 1) {
|
|
278
|
+
throw Napi::TypeError::New(env, "Expected one argument");
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
// Set environment for JS transforms
|
|
282
|
+
rc_->env = env;
|
|
283
|
+
|
|
284
|
+
if (info[0].IsBuffer()) {
|
|
285
|
+
auto buf = info[0].As<Napi::Buffer<uint8_t>>();
|
|
286
|
+
cisv_parser_write(parser_, buf.Data(), buf.Length());
|
|
287
|
+
total_bytes_ += buf.Length();
|
|
288
|
+
return;
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
if (info[0].IsString()) {
|
|
292
|
+
std::string chunk = info[0].As<Napi::String>();
|
|
293
|
+
cisv_parser_write(parser_, reinterpret_cast<const uint8_t*>(chunk.data()), chunk.size());
|
|
294
|
+
total_bytes_ += chunk.size();
|
|
295
|
+
return;
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
throw Napi::TypeError::New(env, "Expected Buffer or String");
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
void End(const Napi::CallbackInfo &info) {
|
|
302
|
+
if (!is_destroyed_) {
|
|
303
|
+
cisv_parser_end(parser_);
|
|
304
|
+
// Clear the environment reference after ending
|
|
305
|
+
rc_->env = nullptr;
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
Napi::Value GetRows(const Napi::CallbackInfo &info) {
|
|
310
|
+
if (is_destroyed_) {
|
|
311
|
+
Napi::Env env = info.Env();
|
|
312
|
+
throw Napi::Error::New(env, "Parser has been destroyed");
|
|
313
|
+
}
|
|
314
|
+
return drainRows(info.Env());
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
void Clear(const Napi::CallbackInfo &info) {
|
|
318
|
+
if (!is_destroyed_ && rc_) {
|
|
319
|
+
rc_->rows.clear();
|
|
320
|
+
rc_->current.clear();
|
|
321
|
+
rc_->current_field_index = 0;
|
|
322
|
+
total_bytes_ = 0;
|
|
323
|
+
parse_time_ = 0;
|
|
324
|
+
// Also clear the environment reference
|
|
325
|
+
rc_->env = nullptr;
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
// Add transform using native C transformer or JavaScript function
|
|
330
|
+
Napi::Value Transform(const Napi::CallbackInfo &info) {
|
|
331
|
+
Napi::Env env = info.Env();
|
|
332
|
+
|
|
333
|
+
if (is_destroyed_) {
|
|
334
|
+
throw Napi::Error::New(env, "Parser has been destroyed");
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
if (info.Length() < 2) {
|
|
338
|
+
throw Napi::TypeError::New(env, "Expected field index and transform type/function");
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
if (!info[0].IsNumber()) {
|
|
342
|
+
throw Napi::TypeError::New(env, "Field index must be a number");
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
int field_index = info[0].As<Napi::Number>().Int32Value();
|
|
346
|
+
|
|
347
|
+
// Ensure pipeline exists (lazy initialization)
|
|
348
|
+
rc_->ensurePipeline();
|
|
349
|
+
|
|
350
|
+
// Store the environment
|
|
351
|
+
rc_->env = env;
|
|
352
|
+
|
|
353
|
+
// Handle string transform types - using the actual C transformer
|
|
354
|
+
if (info[1].IsString()) {
|
|
355
|
+
std::string transform_type = info[1].As<Napi::String>();
|
|
356
|
+
cisv_transform_type_t type;
|
|
357
|
+
|
|
358
|
+
// Map string to C enum
|
|
359
|
+
if (transform_type == "uppercase") {
|
|
360
|
+
type = TRANSFORM_UPPERCASE;
|
|
361
|
+
} else if (transform_type == "lowercase") {
|
|
362
|
+
type = TRANSFORM_LOWERCASE;
|
|
363
|
+
} else if (transform_type == "trim") {
|
|
364
|
+
type = TRANSFORM_TRIM;
|
|
365
|
+
} else if (transform_type == "to_int" || transform_type == "int") {
|
|
366
|
+
type = TRANSFORM_TO_INT;
|
|
367
|
+
} else if (transform_type == "to_float" || transform_type == "float") {
|
|
368
|
+
type = TRANSFORM_TO_FLOAT;
|
|
369
|
+
} else if (transform_type == "hash_sha256" || transform_type == "sha256") {
|
|
370
|
+
type = TRANSFORM_HASH_SHA256;
|
|
371
|
+
} else if (transform_type == "base64_encode" || transform_type == "base64") {
|
|
372
|
+
type = TRANSFORM_BASE64_ENCODE;
|
|
373
|
+
} else {
|
|
374
|
+
throw Napi::Error::New(env, "Unknown transform type: " + transform_type);
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
// Create context if provided
|
|
378
|
+
cisv_transform_context_t* ctx = nullptr;
|
|
379
|
+
if (info.Length() >= 3 && info[2].IsObject()) {
|
|
380
|
+
Napi::Object context_obj = info[2].As<Napi::Object>();
|
|
381
|
+
ctx = (cisv_transform_context_t*)calloc(1, sizeof(cisv_transform_context_t));
|
|
382
|
+
|
|
383
|
+
// Extract context properties if they exist
|
|
384
|
+
if (context_obj.Has("key")) {
|
|
385
|
+
Napi::Value key_val = context_obj.Get("key");
|
|
386
|
+
if (key_val.IsString()) {
|
|
387
|
+
std::string key = key_val.As<Napi::String>();
|
|
388
|
+
ctx->key = strdup(key.c_str());
|
|
389
|
+
ctx->key_len = key.length();
|
|
390
|
+
}
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
if (context_obj.Has("iv")) {
|
|
394
|
+
Napi::Value iv_val = context_obj.Get("iv");
|
|
395
|
+
if (iv_val.IsString()) {
|
|
396
|
+
std::string iv = iv_val.As<Napi::String>();
|
|
397
|
+
ctx->iv = strdup(iv.c_str());
|
|
398
|
+
ctx->iv_len = iv.length();
|
|
399
|
+
}
|
|
400
|
+
}
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
// Add to the C transform pipeline
|
|
404
|
+
if (cisv_transform_pipeline_add(rc_->pipeline, field_index, type, ctx) < 0) {
|
|
405
|
+
// Clean up context if adding failed
|
|
406
|
+
if (ctx) {
|
|
407
|
+
if (ctx->key) free(ctx->key);
|
|
408
|
+
if (ctx->iv) free(ctx->iv);
|
|
409
|
+
if (ctx->extra) free(ctx->extra);
|
|
410
|
+
free(ctx);
|
|
411
|
+
}
|
|
412
|
+
throw Napi::Error::New(env, "Failed to add transform");
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
} else if (info[1].IsFunction()) {
|
|
416
|
+
// Handle JavaScript function transforms
|
|
417
|
+
Napi::Function func = info[1].As<Napi::Function>();
|
|
418
|
+
|
|
419
|
+
// Store the function reference for this field
|
|
420
|
+
rc_->js_transforms[field_index] = Napi::Persistent(func);
|
|
421
|
+
|
|
422
|
+
} else {
|
|
423
|
+
throw Napi::TypeError::New(env, "Transform must be a string type or function");
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
return info.This(); // Return this for chaining
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
Napi::Value RemoveTransform(const Napi::CallbackInfo &info) {
|
|
430
|
+
Napi::Env env = info.Env();
|
|
431
|
+
|
|
432
|
+
if (is_destroyed_) {
|
|
433
|
+
throw Napi::Error::New(env, "Parser has been destroyed");
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
if (info.Length() != 1 || !info[0].IsNumber()) {
|
|
437
|
+
throw Napi::TypeError::New(env, "Expected field index");
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
int field_index = info[0].As<Napi::Number>().Int32Value();
|
|
441
|
+
|
|
442
|
+
// Remove from JavaScript transforms
|
|
443
|
+
rc_->js_transforms.erase(field_index);
|
|
444
|
+
|
|
445
|
+
// TODO: Implement removal of C transforms in cisv_transformer.c
|
|
446
|
+
// For now, this only removes JS transforms
|
|
447
|
+
|
|
448
|
+
return info.This();
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
Napi::Value ClearTransforms(const Napi::CallbackInfo &info) {
|
|
452
|
+
if (is_destroyed_) {
|
|
453
|
+
Napi::Env env = info.Env();
|
|
454
|
+
throw Napi::Error::New(env, "Parser has been destroyed");
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
// Clear JavaScript transforms
|
|
458
|
+
rc_->js_transforms.clear();
|
|
459
|
+
|
|
460
|
+
// Clear C transforms - destroy and DON'T recreate pipeline yet
|
|
461
|
+
if (rc_->pipeline) {
|
|
462
|
+
cisv_transform_pipeline_destroy(rc_->pipeline);
|
|
463
|
+
rc_->pipeline = nullptr; // Will be recreated lazily when needed
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
return info.This();
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
// Async file parsing (returns a Promise)
|
|
470
|
+
Napi::Value ParseAsync(const Napi::CallbackInfo &info) {
|
|
471
|
+
Napi::Env env = info.Env();
|
|
472
|
+
|
|
473
|
+
if (is_destroyed_) {
|
|
474
|
+
throw Napi::Error::New(env, "Parser has been destroyed");
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
if (info.Length() != 1 || !info[0].IsString()) {
|
|
478
|
+
throw Napi::TypeError::New(env, "Expected file path string");
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
std::string path = info[0].As<Napi::String>();
|
|
482
|
+
|
|
483
|
+
// Create a promise
|
|
484
|
+
auto deferred = Napi::Promise::Deferred::New(env);
|
|
485
|
+
|
|
486
|
+
// For simplicity, we'll use sync parsing here
|
|
487
|
+
// In production, this should use worker threads
|
|
488
|
+
try {
|
|
489
|
+
Napi::Value result = ParseSync(info);
|
|
490
|
+
deferred.Resolve(result);
|
|
491
|
+
} catch (const Napi::Error& e) {
|
|
492
|
+
deferred.Reject(e.Value());
|
|
493
|
+
}
|
|
494
|
+
|
|
495
|
+
return deferred.Promise();
|
|
496
|
+
}
|
|
497
|
+
|
|
498
|
+
// Get information about registered transforms
|
|
499
|
+
Napi::Value GetTransformInfo(const Napi::CallbackInfo &info) {
|
|
500
|
+
Napi::Env env = info.Env();
|
|
501
|
+
|
|
502
|
+
if (is_destroyed_) {
|
|
503
|
+
throw Napi::Error::New(env, "Parser has been destroyed");
|
|
504
|
+
}
|
|
505
|
+
|
|
506
|
+
Napi::Object result = Napi::Object::New(env);
|
|
507
|
+
|
|
508
|
+
// Count C transforms
|
|
509
|
+
size_t c_transform_count = (rc_ && rc_->pipeline) ? rc_->pipeline->count : 0;
|
|
510
|
+
result.Set("cTransformCount", Napi::Number::New(env, c_transform_count));
|
|
511
|
+
|
|
512
|
+
// Count JS transforms
|
|
513
|
+
size_t js_transform_count = rc_ ? rc_->js_transforms.size() : 0;
|
|
514
|
+
result.Set("jsTransformCount", Napi::Number::New(env, js_transform_count));
|
|
515
|
+
|
|
516
|
+
// List field indices with transforms
|
|
517
|
+
Napi::Array fields = Napi::Array::New(env);
|
|
518
|
+
size_t idx = 0;
|
|
519
|
+
|
|
520
|
+
// Add JS transform field indices
|
|
521
|
+
if (rc_) {
|
|
522
|
+
for (const auto& pair : rc_->js_transforms) {
|
|
523
|
+
fields[idx++] = Napi::Number::New(env, pair.first);
|
|
524
|
+
}
|
|
525
|
+
}
|
|
526
|
+
|
|
527
|
+
result.Set("fieldIndices", fields);
|
|
528
|
+
|
|
529
|
+
return result;
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
Napi::Value GetStats(const Napi::CallbackInfo &info) {
|
|
533
|
+
Napi::Env env = info.Env();
|
|
534
|
+
|
|
535
|
+
if (is_destroyed_) {
|
|
536
|
+
throw Napi::Error::New(env, "Parser has been destroyed");
|
|
537
|
+
}
|
|
538
|
+
|
|
539
|
+
Napi::Object stats = Napi::Object::New(env);
|
|
540
|
+
|
|
541
|
+
stats.Set("rowCount", Napi::Number::New(env, rc_ ? rc_->rows.size() : 0));
|
|
542
|
+
stats.Set("fieldCount", Napi::Number::New(env,
|
|
543
|
+
(rc_ && !rc_->rows.empty()) ? rc_->rows[0].size() : 0));
|
|
544
|
+
stats.Set("totalBytes", Napi::Number::New(env, total_bytes_));
|
|
545
|
+
stats.Set("parseTime", Napi::Number::New(env, parse_time_));
|
|
546
|
+
|
|
547
|
+
return stats;
|
|
548
|
+
}
|
|
549
|
+
|
|
550
|
+
// Static method to count rows
|
|
551
|
+
static Napi::Value CountRows(const Napi::CallbackInfo &info) {
|
|
552
|
+
Napi::Env env = info.Env();
|
|
553
|
+
|
|
554
|
+
if (info.Length() != 1 || !info[0].IsString()) {
|
|
555
|
+
throw Napi::TypeError::New(env, "Expected file path string");
|
|
556
|
+
}
|
|
557
|
+
|
|
558
|
+
std::string path = info[0].As<Napi::String>();
|
|
559
|
+
size_t count = cisv_parser_count_rows(path.c_str());
|
|
560
|
+
|
|
561
|
+
return Napi::Number::New(env, count);
|
|
562
|
+
}
|
|
563
|
+
|
|
564
|
+
private:
|
|
565
|
+
Napi::Value drainRows(Napi::Env env) {
|
|
566
|
+
if (!rc_) {
|
|
567
|
+
return Napi::Array::New(env, 0);
|
|
568
|
+
}
|
|
569
|
+
|
|
570
|
+
Napi::Array rows = Napi::Array::New(env, rc_->rows.size());
|
|
571
|
+
|
|
572
|
+
for (size_t i = 0; i < rc_->rows.size(); ++i) {
|
|
573
|
+
Napi::Array row = Napi::Array::New(env, rc_->rows[i].size());
|
|
574
|
+
for (size_t j = 0; j < rc_->rows[i].size(); ++j) {
|
|
575
|
+
row[j] = Napi::String::New(env, rc_->rows[i][j]);
|
|
576
|
+
}
|
|
577
|
+
rows[i] = row;
|
|
578
|
+
}
|
|
579
|
+
|
|
580
|
+
// Don't clear here if we want to keep data for multiple reads
|
|
581
|
+
// rc_->rows.clear();
|
|
582
|
+
|
|
583
|
+
return rows;
|
|
584
|
+
}
|
|
585
|
+
|
|
586
|
+
cisv_parser *parser_;
|
|
587
|
+
RowCollector *rc_;
|
|
588
|
+
size_t total_bytes_;
|
|
589
|
+
double parse_time_;
|
|
590
|
+
bool is_destroyed_;
|
|
591
|
+
};
|
|
592
|
+
|
|
593
|
+
// Initialize all exports
|
|
594
|
+
Napi::Object InitAll(Napi::Env env, Napi::Object exports) {
|
|
595
|
+
CisvParser::Init(env, exports);
|
|
596
|
+
|
|
597
|
+
// Add version info
|
|
598
|
+
exports.Set("version", Napi::String::New(env, "1.0.0"));
|
|
599
|
+
|
|
600
|
+
// Add transform type constants
|
|
601
|
+
Napi::Object transformTypes = Napi::Object::New(env);
|
|
602
|
+
transformTypes.Set("UPPERCASE", Napi::String::New(env, "uppercase"));
|
|
603
|
+
transformTypes.Set("LOWERCASE", Napi::String::New(env, "lowercase"));
|
|
604
|
+
transformTypes.Set("TRIM", Napi::String::New(env, "trim"));
|
|
605
|
+
transformTypes.Set("TO_INT", Napi::String::New(env, "to_int"));
|
|
606
|
+
transformTypes.Set("TO_FLOAT", Napi::String::New(env, "to_float"));
|
|
607
|
+
transformTypes.Set("HASH_SHA256", Napi::String::New(env, "hash_sha256"));
|
|
608
|
+
transformTypes.Set("BASE64_ENCODE", Napi::String::New(env, "base64_encode"));
|
|
609
|
+
exports.Set("TransformType", transformTypes);
|
|
610
|
+
|
|
611
|
+
return exports;
|
|
612
|
+
}
|
|
613
|
+
|
|
614
|
+
NODE_API_MODULE(cisv, InitAll)
|