cisv 0.2.2 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +45 -0
- package/build/Release/cisv.node +0 -0
- package/cisv/cisv_addon.cc +107 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -251,3 +251,48 @@ const tsvCount = cisvParser.countRowsWithConfig('data.tsv', {
|
|
|
251
251
|
toLine: 1000
|
|
252
252
|
});
|
|
253
253
|
```
|
|
254
|
+
|
|
255
|
+
### ROW-BY-ROW ITERATION
|
|
256
|
+
|
|
257
|
+
The iterator API provides fgetcsv-style streaming with minimal memory footprint and early exit support.
|
|
258
|
+
|
|
259
|
+
```javascript
|
|
260
|
+
import { cisvParser } from "cisv";
|
|
261
|
+
|
|
262
|
+
const parser = new cisvParser({ delimiter: ',', trim: true });
|
|
263
|
+
|
|
264
|
+
// Open iterator for a file
|
|
265
|
+
parser.openIterator('/path/to/large.csv');
|
|
266
|
+
|
|
267
|
+
// Fetch rows one at a time
|
|
268
|
+
let row;
|
|
269
|
+
while ((row = parser.fetchRow()) !== null) {
|
|
270
|
+
console.log(row); // string[]
|
|
271
|
+
|
|
272
|
+
// Early exit - no wasted work
|
|
273
|
+
if (row[0] === 'stop') {
|
|
274
|
+
break;
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
// Close iterator when done
|
|
279
|
+
parser.closeIterator();
|
|
280
|
+
|
|
281
|
+
// Methods support chaining
|
|
282
|
+
parser.openIterator('data.csv')
|
|
283
|
+
.closeIterator();
|
|
284
|
+
```
|
|
285
|
+
|
|
286
|
+
**Iterator Methods:**
|
|
287
|
+
|
|
288
|
+
| Method | Description |
|
|
289
|
+
|--------|-------------|
|
|
290
|
+
| `openIterator(path)` | Open a file for row-by-row iteration |
|
|
291
|
+
| `fetchRow()` | Get next row as `string[]`, or `null` if at EOF |
|
|
292
|
+
| `closeIterator()` | Close iterator and release resources |
|
|
293
|
+
|
|
294
|
+
**Notes:**
|
|
295
|
+
- The iterator uses the parser's current configuration (delimiter, quote, trim, etc.)
|
|
296
|
+
- Calling `destroy()` automatically closes any open iterator
|
|
297
|
+
- Only one iterator can be open at a time per parser instance
|
|
298
|
+
- Breaking out of iteration and calling `closeIterator()` stops parsing immediately
|
package/build/Release/cisv.node
CHANGED
|
Binary file
|
package/cisv/cisv_addon.cc
CHANGED
|
@@ -291,6 +291,11 @@ public:
|
|
|
291
291
|
InstanceMethod("setHeaderFields", &CisvParser::SetHeaderFields),
|
|
292
292
|
InstanceMethod("removeTransformByName", &CisvParser::RemoveTransformByName),
|
|
293
293
|
|
|
294
|
+
// Iterator API methods
|
|
295
|
+
InstanceMethod("openIterator", &CisvParser::OpenIterator),
|
|
296
|
+
InstanceMethod("fetchRow", &CisvParser::FetchRow),
|
|
297
|
+
InstanceMethod("closeIterator", &CisvParser::CloseIterator),
|
|
298
|
+
|
|
294
299
|
StaticMethod("countRows", &CisvParser::CountRows),
|
|
295
300
|
StaticMethod("countRowsWithConfig", &CisvParser::CountRowsWithConfig)
|
|
296
301
|
});
|
|
@@ -304,6 +309,7 @@ public:
|
|
|
304
309
|
parse_time_ = 0;
|
|
305
310
|
total_bytes_ = 0;
|
|
306
311
|
is_destroyed_ = false;
|
|
312
|
+
iterator_ = nullptr;
|
|
307
313
|
|
|
308
314
|
// Initialize configuration with defaults
|
|
309
315
|
cisv_config_init(&config_);
|
|
@@ -483,6 +489,11 @@ public:
|
|
|
483
489
|
// Explicit cleanup method
|
|
484
490
|
void Cleanup() {
|
|
485
491
|
if (!is_destroyed_) {
|
|
492
|
+
// Close iterator if open
|
|
493
|
+
if (iterator_) {
|
|
494
|
+
cisv_iterator_close(iterator_);
|
|
495
|
+
iterator_ = nullptr;
|
|
496
|
+
}
|
|
486
497
|
if (parser_) {
|
|
487
498
|
cisv_parser_destroy(parser_);
|
|
488
499
|
parser_ = nullptr;
|
|
@@ -1156,6 +1167,101 @@ Napi::Value RemoveTransformByName(const Napi::CallbackInfo &info) {
|
|
|
1156
1167
|
return Napi::Number::New(env, count);
|
|
1157
1168
|
}
|
|
1158
1169
|
|
|
1170
|
+
// =========================================================================
|
|
1171
|
+
// Iterator API - Row-by-row streaming with early exit support
|
|
1172
|
+
// =========================================================================
|
|
1173
|
+
|
|
1174
|
+
/**
|
|
1175
|
+
* Open a file for row-by-row iteration.
|
|
1176
|
+
* Uses the current parser configuration.
|
|
1177
|
+
* @param path - Path to CSV file
|
|
1178
|
+
* @returns this for chaining
|
|
1179
|
+
*/
|
|
1180
|
+
Napi::Value OpenIterator(const Napi::CallbackInfo &info) {
|
|
1181
|
+
Napi::Env env = info.Env();
|
|
1182
|
+
|
|
1183
|
+
if (is_destroyed_) {
|
|
1184
|
+
throw Napi::Error::New(env, "Parser has been destroyed");
|
|
1185
|
+
}
|
|
1186
|
+
|
|
1187
|
+
if (info.Length() < 1 || !info[0].IsString()) {
|
|
1188
|
+
throw Napi::TypeError::New(env, "Expected file path string");
|
|
1189
|
+
}
|
|
1190
|
+
|
|
1191
|
+
// Close existing iterator if any
|
|
1192
|
+
if (iterator_) {
|
|
1193
|
+
cisv_iterator_close(iterator_);
|
|
1194
|
+
iterator_ = nullptr;
|
|
1195
|
+
}
|
|
1196
|
+
|
|
1197
|
+
std::string path = info[0].As<Napi::String>();
|
|
1198
|
+
|
|
1199
|
+
// Use current parser configuration for the iterator
|
|
1200
|
+
iterator_ = cisv_iterator_open(path.c_str(), &config_);
|
|
1201
|
+
if (!iterator_) {
|
|
1202
|
+
throw Napi::Error::New(env, "Failed to open file for iteration: " + path);
|
|
1203
|
+
}
|
|
1204
|
+
|
|
1205
|
+
return info.This(); // Return this for chaining
|
|
1206
|
+
}
|
|
1207
|
+
|
|
1208
|
+
/**
|
|
1209
|
+
* Fetch the next row from the iterator.
|
|
1210
|
+
* @returns Array of strings, or null if at end of file
|
|
1211
|
+
*/
|
|
1212
|
+
Napi::Value FetchRow(const Napi::CallbackInfo &info) {
|
|
1213
|
+
Napi::Env env = info.Env();
|
|
1214
|
+
|
|
1215
|
+
if (is_destroyed_) {
|
|
1216
|
+
throw Napi::Error::New(env, "Parser has been destroyed");
|
|
1217
|
+
}
|
|
1218
|
+
|
|
1219
|
+
if (!iterator_) {
|
|
1220
|
+
throw Napi::Error::New(env, "No iterator open. Call openIterator() first.");
|
|
1221
|
+
}
|
|
1222
|
+
|
|
1223
|
+
const char **fields;
|
|
1224
|
+
const size_t *lengths;
|
|
1225
|
+
size_t field_count;
|
|
1226
|
+
|
|
1227
|
+
int result = cisv_iterator_next(iterator_, &fields, &lengths, &field_count);
|
|
1228
|
+
|
|
1229
|
+
if (result == CISV_ITER_EOF) {
|
|
1230
|
+
return env.Null();
|
|
1231
|
+
}
|
|
1232
|
+
if (result == CISV_ITER_ERROR) {
|
|
1233
|
+
throw Napi::Error::New(env, "Error reading CSV row");
|
|
1234
|
+
}
|
|
1235
|
+
|
|
1236
|
+
// Create array of strings for the row
|
|
1237
|
+
Napi::Array row = Napi::Array::New(env, field_count);
|
|
1238
|
+
for (size_t i = 0; i < field_count; i++) {
|
|
1239
|
+
// SECURITY: Use safe string creation to handle invalid UTF-8
|
|
1240
|
+
row.Set(i, SafeNewString(env, fields[i], lengths[i]));
|
|
1241
|
+
}
|
|
1242
|
+
|
|
1243
|
+
return row;
|
|
1244
|
+
}
|
|
1245
|
+
|
|
1246
|
+
/**
|
|
1247
|
+
* Close the iterator and release resources.
|
|
1248
|
+
* @returns this for chaining
|
|
1249
|
+
*/
|
|
1250
|
+
Napi::Value CloseIterator(const Napi::CallbackInfo &info) {
|
|
1251
|
+
Napi::Env env = info.Env();
|
|
1252
|
+
|
|
1253
|
+
if (is_destroyed_) {
|
|
1254
|
+
throw Napi::Error::New(env, "Parser has been destroyed");
|
|
1255
|
+
}
|
|
1256
|
+
|
|
1257
|
+
if (iterator_) {
|
|
1258
|
+
cisv_iterator_close(iterator_);
|
|
1259
|
+
iterator_ = nullptr;
|
|
1260
|
+
}
|
|
1261
|
+
|
|
1262
|
+
return info.This(); // Return this for chaining
|
|
1263
|
+
}
|
|
1264
|
+
|
|
1159
1265
|
private:
|
|
1160
1266
|
Napi::Value drainRows(Napi::Env env) {
|
|
1161
1267
|
if (!rc_) {
|
|
@@ -1186,6 +1292,7 @@ private:
|
|
|
1186
1292
|
size_t total_bytes_;
|
|
1187
1293
|
double parse_time_;
|
|
1188
1294
|
bool is_destroyed_;
|
|
1295
|
+
cisv_iterator_t *iterator_; // For row-by-row iteration
|
|
1189
1296
|
};
|
|
1190
1297
|
|
|
1191
1298
|
// Initialize all exports
|