cisv 0.2.5 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +117 -261
- package/build/Release/cisv.node +0 -0
- package/cisv/cisv_addon.cc +417 -77
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -1,298 +1,154 @@
|
|
|
1
|
-
# CISV
|
|
1
|
+
# CISV Node.js Binding
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-

|
|
5
|
-

|
|
6
|
-

|
|
3
|
+
Native Node-API binding for the CISV C core.
|
|
7
4
|
|
|
8
|
-
##
|
|
5
|
+
## Install
|
|
9
6
|
|
|
10
|
-
### NODE.JS PACKAGE
|
|
11
7
|
```bash
|
|
12
8
|
npm install cisv
|
|
13
9
|
```
|
|
14
10
|
|
|
15
|
-
|
|
11
|
+
From source in this repository:
|
|
12
|
+
|
|
16
13
|
```bash
|
|
17
|
-
|
|
14
|
+
cd bindings/nodejs
|
|
15
|
+
npm ci
|
|
16
|
+
npm run build
|
|
17
|
+
npm test
|
|
18
18
|
```
|
|
19
19
|
|
|
20
|
-
##
|
|
20
|
+
## Quick Start
|
|
21
21
|
|
|
22
|
-
|
|
23
|
-
```javascript
|
|
22
|
+
```js
|
|
24
23
|
const { cisvParser } = require('cisv');
|
|
25
24
|
|
|
26
|
-
|
|
27
|
-
const parser = new cisvParser();
|
|
28
|
-
const rows = parser.parseSync('./data.csv');
|
|
29
|
-
|
|
30
|
-
// With configuration (optional)
|
|
31
|
-
const tsv_parser = new cisvParser({
|
|
32
|
-
delimiter: '\t',
|
|
33
|
-
quote: "'",
|
|
34
|
-
trim: true
|
|
35
|
-
});
|
|
36
|
-
const tsv_rows = tsv_parser.parseSync('./data.tsv');
|
|
37
|
-
```
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
## CONFIGURATION OPTIONS
|
|
41
|
-
|
|
42
|
-
### Parser Configuration
|
|
43
|
-
|
|
44
|
-
```javascript
|
|
45
|
-
const parser = new cisvParser({
|
|
46
|
-
// Field delimiter character (default: ',')
|
|
47
|
-
delimiter: ',',
|
|
48
|
-
|
|
49
|
-
// Quote character (default: '"')
|
|
50
|
-
quote: '"',
|
|
51
|
-
|
|
52
|
-
// Escape character (null for RFC4180 "" style, default: null)
|
|
53
|
-
escape: null,
|
|
54
|
-
|
|
55
|
-
// Comment character to skip lines (default: null)
|
|
56
|
-
comment: '#',
|
|
57
|
-
|
|
58
|
-
// Trim whitespace from fields (default: false)
|
|
59
|
-
trim: true,
|
|
60
|
-
|
|
61
|
-
// Skip empty lines (default: false)
|
|
62
|
-
skipEmptyLines: true,
|
|
63
|
-
|
|
64
|
-
// Use relaxed parsing rules (default: false)
|
|
65
|
-
relaxed: false,
|
|
66
|
-
|
|
67
|
-
// Skip lines with parse errors (default: false)
|
|
68
|
-
skipLinesWithError: true,
|
|
69
|
-
|
|
70
|
-
// Maximum row size in bytes (0 = unlimited, default: 0)
|
|
71
|
-
maxRowSize: 1048576,
|
|
72
|
-
|
|
73
|
-
// Start parsing from line N (1-based, default: 1)
|
|
74
|
-
fromLine: 10,
|
|
75
|
-
|
|
76
|
-
// Stop parsing at line N (0 = until end, default: 0)
|
|
77
|
-
toLine: 1000
|
|
78
|
-
});
|
|
79
|
-
```
|
|
80
|
-
|
|
81
|
-
### Dynamic Configuration
|
|
82
|
-
|
|
83
|
-
```javascript
|
|
84
|
-
// Set configuration after creation
|
|
85
|
-
parser.setConfig({
|
|
86
|
-
delimiter: ';',
|
|
87
|
-
quote: "'",
|
|
88
|
-
trim: true
|
|
89
|
-
});
|
|
90
|
-
|
|
91
|
-
// Get current configuration
|
|
92
|
-
const config = parser.getConfig();
|
|
93
|
-
console.log(config);
|
|
94
|
-
```
|
|
95
|
-
|
|
96
|
-
## API REFERENCE
|
|
97
|
-
|
|
98
|
-
### TYPESCRIPT DEFINITIONS
|
|
99
|
-
```typescript
|
|
100
|
-
interface CisvConfig {
|
|
101
|
-
delimiter?: string;
|
|
102
|
-
quote?: string;
|
|
103
|
-
escape?: string | null;
|
|
104
|
-
comment?: string | null;
|
|
105
|
-
trim?: boolean;
|
|
106
|
-
skipEmptyLines?: boolean;
|
|
107
|
-
relaxed?: boolean;
|
|
108
|
-
skipLinesWithError?: boolean;
|
|
109
|
-
maxRowSize?: number;
|
|
110
|
-
fromLine?: number;
|
|
111
|
-
toLine?: number;
|
|
112
|
-
}
|
|
113
|
-
|
|
114
|
-
interface ParsedRow extends Array<string> {}
|
|
115
|
-
|
|
116
|
-
interface ParseStats {
|
|
117
|
-
rowCount: number;
|
|
118
|
-
fieldCount: number;
|
|
119
|
-
totalBytes: number;
|
|
120
|
-
parseTime: number;
|
|
121
|
-
currentLine: number;
|
|
122
|
-
}
|
|
123
|
-
|
|
124
|
-
interface TransformInfo {
|
|
125
|
-
cTransformCount: number;
|
|
126
|
-
jsTransformCount: number;
|
|
127
|
-
fieldIndices: number[];
|
|
128
|
-
}
|
|
129
|
-
|
|
130
|
-
class cisvParser {
|
|
131
|
-
constructor(config?: CisvConfig);
|
|
132
|
-
parseSync(path: string): ParsedRow[];
|
|
133
|
-
parse(path: string): Promise<ParsedRow[]>;
|
|
134
|
-
parseString(csv: string): ParsedRow[];
|
|
135
|
-
write(chunk: string | Buffer): void;
|
|
136
|
-
end(): void;
|
|
137
|
-
getRows(): ParsedRow[];
|
|
138
|
-
clear(): void;
|
|
139
|
-
setConfig(config: CisvConfig): void;
|
|
140
|
-
getConfig(): CisvConfig;
|
|
141
|
-
transform(fieldIndex: number, type: string | Function): this;
|
|
142
|
-
removeTransform(fieldIndex: number): this;
|
|
143
|
-
clearTransforms(): this;
|
|
144
|
-
getStats(): ParseStats;
|
|
145
|
-
getTransformInfo(): TransformInfo;
|
|
146
|
-
destroy(): void;
|
|
147
|
-
|
|
148
|
-
static countRows(path: string): number;
|
|
149
|
-
static countRowsWithConfig(path: string, config?: CisvConfig): number;
|
|
150
|
-
}
|
|
151
|
-
```
|
|
152
|
-
|
|
153
|
-
### BASIC PARSING
|
|
154
|
-
|
|
155
|
-
```javascript
|
|
156
|
-
import { cisvParser } from "cisv";
|
|
157
|
-
|
|
158
|
-
// Default configuration (standard CSV)
|
|
159
|
-
const parser = new cisvParser();
|
|
25
|
+
const parser = new cisvParser({ delimiter: ',', trim: true });
|
|
160
26
|
const rows = parser.parseSync('data.csv');
|
|
161
27
|
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
delimiter: '\t',
|
|
165
|
-
quote: "'"
|
|
166
|
-
});
|
|
167
|
-
const tsvRows = tsvParser.parseSync('data.tsv');
|
|
168
|
-
|
|
169
|
-
// Parse specific line range
|
|
170
|
-
const rangeParser = new cisvParser({
|
|
171
|
-
fromLine: 100,
|
|
172
|
-
toLine: 1000
|
|
173
|
-
});
|
|
174
|
-
const subset = rangeParser.parseSync('large.csv');
|
|
175
|
-
|
|
176
|
-
// Skip comments and empty lines
|
|
177
|
-
const cleanParser = new cisvParser({
|
|
178
|
-
comment: '#',
|
|
179
|
-
skipEmptyLines: true,
|
|
180
|
-
trim: true
|
|
181
|
-
});
|
|
182
|
-
const cleanData = cleanParser.parseSync('config.csv');
|
|
28
|
+
console.log(rows.length);
|
|
29
|
+
console.log(rows[0]);
|
|
183
30
|
```
|
|
184
31
|
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
32
|
+
## Parser API
|
|
33
|
+
|
|
34
|
+
### Constructor options
|
|
35
|
+
|
|
36
|
+
- `delimiter?: string` (first character used)
|
|
37
|
+
- `quote?: string` (first character used)
|
|
38
|
+
- `escape?: string | null` (`null` means RFC4180 doubled quote escaping)
|
|
39
|
+
- `comment?: string | null`
|
|
40
|
+
- `trim?: boolean`
|
|
41
|
+
- `skipEmptyLines?: boolean`
|
|
42
|
+
- `relaxed?: boolean`
|
|
43
|
+
- `skipLinesWithError?: boolean`
|
|
44
|
+
- `maxRowSize?: number`
|
|
45
|
+
- `fromLine?: number`
|
|
46
|
+
- `toLine?: number`
|
|
47
|
+
|
|
48
|
+
### Instance methods
|
|
49
|
+
|
|
50
|
+
- `parseSync(path: string): string[][]`
|
|
51
|
+
- `parse(path: string): Promise<string[][]>`
|
|
52
|
+
- `parseString(csv: string): string[][]`
|
|
53
|
+
- `write(chunk: Buffer | string): void`
|
|
54
|
+
- `end(): void`
|
|
55
|
+
- `getRows(): string[][]`
|
|
56
|
+
- `clear(): void`
|
|
57
|
+
- `setConfig(config): this`
|
|
58
|
+
- `getConfig(): object`
|
|
59
|
+
- `transform(fieldIndex: number, kindOrFn: string | Function, context?): this`
|
|
60
|
+
- `transformByName(fieldName: string, kindOrFn: string | Function, context?): this`
|
|
61
|
+
- `setHeaderFields(fields: string[]): void`
|
|
62
|
+
- `removeTransform(fieldIndex: number): this`
|
|
63
|
+
- `removeTransformByName(fieldName: string): this`
|
|
64
|
+
- `clearTransforms(): this`
|
|
65
|
+
- `getTransformInfo(): { cTransformCount: number, jsTransformCount: number, fieldIndices: number[] }`
|
|
66
|
+
- `getStats(): { rowCount: number, fieldCount: number, totalBytes: number, parseTime: number, currentLine: number }`
|
|
67
|
+
- `openIterator(path: string): this`
|
|
68
|
+
- `fetchRow(): string[] | null`
|
|
69
|
+
- `closeIterator(): this`
|
|
70
|
+
- `destroy(): void`
|
|
71
|
+
|
|
72
|
+
### Static methods
|
|
73
|
+
|
|
74
|
+
- `cisvParser.countRows(path: string): number`
|
|
75
|
+
- `cisvParser.countRowsWithConfig(path: string, config?): number`
|
|
76
|
+
|
|
77
|
+
## Transform Types
|
|
78
|
+
|
|
79
|
+
Built-in transform names:
|
|
80
|
+
|
|
81
|
+
- `uppercase`
|
|
82
|
+
- `lowercase`
|
|
83
|
+
- `trim`
|
|
84
|
+
- `to_int` (or `int`)
|
|
85
|
+
- `to_float` (or `float`)
|
|
86
|
+
- `hash_sha256` (or `sha256`)
|
|
87
|
+
- `base64_encode` (or `base64`)
|
|
88
|
+
|
|
89
|
+
## Examples
|
|
90
|
+
|
|
91
|
+
### Async parse
|
|
92
|
+
|
|
93
|
+
```js
|
|
94
|
+
const { cisvParser } = require('cisv');
|
|
234
95
|
|
|
235
|
-
|
|
96
|
+
(async () => {
|
|
97
|
+
const parser = new cisvParser();
|
|
98
|
+
const rows = await parser.parse('data.csv');
|
|
99
|
+
console.log(rows.length);
|
|
100
|
+
})();
|
|
236
101
|
```
|
|
237
102
|
|
|
238
|
-
###
|
|
103
|
+
### Streaming chunks
|
|
239
104
|
|
|
240
|
-
```
|
|
241
|
-
|
|
105
|
+
```js
|
|
106
|
+
const fs = require('fs');
|
|
107
|
+
const { cisvParser } = require('cisv');
|
|
242
108
|
|
|
243
|
-
|
|
244
|
-
const
|
|
109
|
+
const parser = new cisvParser();
|
|
110
|
+
for (const chunk of [
|
|
111
|
+
Buffer.from('id,name\n1,'),
|
|
112
|
+
Buffer.from('john\n2,jane\n')
|
|
113
|
+
]) {
|
|
114
|
+
parser.write(chunk);
|
|
115
|
+
}
|
|
116
|
+
parser.end();
|
|
245
117
|
|
|
246
|
-
|
|
247
|
-
const tsvCount = cisvParser.countRowsWithConfig('data.tsv', {
|
|
248
|
-
delimiter: '\t',
|
|
249
|
-
skipEmptyLines: true,
|
|
250
|
-
fromLine: 10,
|
|
251
|
-
toLine: 1000
|
|
252
|
-
});
|
|
118
|
+
console.log(parser.getRows());
|
|
253
119
|
```
|
|
254
120
|
|
|
255
|
-
###
|
|
121
|
+
### Iterator mode (low memory)
|
|
256
122
|
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
```javascript
|
|
260
|
-
import { cisvParser } from "cisv";
|
|
261
|
-
|
|
262
|
-
const parser = new cisvParser({ delimiter: ',', trim: true });
|
|
123
|
+
```js
|
|
124
|
+
const { cisvParser } = require('cisv');
|
|
263
125
|
|
|
264
|
-
|
|
265
|
-
parser.openIterator('
|
|
126
|
+
const parser = new cisvParser({ delimiter: ',' });
|
|
127
|
+
parser.openIterator('large.csv');
|
|
266
128
|
|
|
267
|
-
// Fetch rows one at a time
|
|
268
129
|
let row;
|
|
269
130
|
while ((row = parser.fetchRow()) !== null) {
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
// Early exit - no wasted work
|
|
273
|
-
if (row[0] === 'stop') {
|
|
274
|
-
break;
|
|
275
|
-
}
|
|
131
|
+
if (row[0] === 'stop') break;
|
|
276
132
|
}
|
|
277
133
|
|
|
278
|
-
// Close iterator when done
|
|
279
134
|
parser.closeIterator();
|
|
280
|
-
|
|
281
|
-
// Methods support chaining
|
|
282
|
-
parser.openIterator('data.csv')
|
|
283
|
-
.closeIterator();
|
|
284
135
|
```
|
|
285
136
|
|
|
286
|
-
|
|
137
|
+
### Name-based transforms
|
|
138
|
+
|
|
139
|
+
```js
|
|
140
|
+
const { cisvParser } = require('cisv');
|
|
141
|
+
|
|
142
|
+
const parser = new cisvParser();
|
|
143
|
+
parser.setHeaderFields(['id', 'name', 'email']);
|
|
144
|
+
parser.transformByName('name', 'uppercase');
|
|
145
|
+
|
|
146
|
+
const rows = parser.parseString('id,name,email\n1,john,john@test.com');
|
|
147
|
+
console.log(rows[1][1]); // JOHN
|
|
148
|
+
```
|
|
287
149
|
|
|
288
|
-
|
|
289
|
-
|--------|-------------|
|
|
290
|
-
| `openIterator(path)` | Open a file for row-by-row iteration |
|
|
291
|
-
| `fetchRow()` | Get next row as `string[]`, or `null` if at EOF |
|
|
292
|
-
| `closeIterator()` | Close iterator and release resources |
|
|
150
|
+
## Notes
|
|
293
151
|
|
|
294
|
-
|
|
295
|
-
-
|
|
296
|
-
-
|
|
297
|
-
- Only one iterator can be open at a time per parser instance
|
|
298
|
-
- Breaking out of iteration and calling `closeIterator()` stops parsing immediately
|
|
152
|
+
- Returned rows include the header row when the input has one.
|
|
153
|
+
- `removeTransform*` currently removes JavaScript transforms; C-transform removal by index/name is not fully implemented yet.
|
|
154
|
+
- `parse()` runs in a worker thread for non-transform workloads; when transforms are attached it preserves current synchronous transform behavior for compatibility.
|
package/build/Release/cisv.node
CHANGED
|
Binary file
|
package/cisv/cisv_addon.cc
CHANGED
|
@@ -6,6 +6,7 @@
|
|
|
6
6
|
#include <string>
|
|
7
7
|
#include <unordered_map>
|
|
8
8
|
#include <chrono>
|
|
9
|
+
#include <cstdint>
|
|
9
10
|
|
|
10
11
|
namespace {
|
|
11
12
|
|
|
@@ -60,11 +61,77 @@ static bool isValidUtf8(const char* data, size_t len) {
|
|
|
60
61
|
return true;
|
|
61
62
|
}
|
|
62
63
|
|
|
64
|
+
// Fast path for common ASCII-only CSV data.
|
|
65
|
+
static inline bool isAllAscii(const char* data, size_t len) {
|
|
66
|
+
const unsigned char* bytes = reinterpret_cast<const unsigned char*>(data);
|
|
67
|
+
size_t i = 0;
|
|
68
|
+
|
|
69
|
+
// Check machine-word chunks first.
|
|
70
|
+
const size_t word_size = sizeof(uintptr_t);
|
|
71
|
+
const uintptr_t high_mask = sizeof(uintptr_t) == 8
|
|
72
|
+
? static_cast<uintptr_t>(0x8080808080808080ULL)
|
|
73
|
+
: static_cast<uintptr_t>(0x80808080UL);
|
|
74
|
+
|
|
75
|
+
while (i + word_size <= len) {
|
|
76
|
+
uintptr_t word;
|
|
77
|
+
memcpy(&word, bytes + i, word_size);
|
|
78
|
+
if (word & high_mask) {
|
|
79
|
+
return false;
|
|
80
|
+
}
|
|
81
|
+
i += word_size;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
while (i < len) {
|
|
85
|
+
if (bytes[i] & 0x80) {
|
|
86
|
+
return false;
|
|
87
|
+
}
|
|
88
|
+
i++;
|
|
89
|
+
}
|
|
90
|
+
return true;
|
|
91
|
+
}
|
|
92
|
+
|
|
63
93
|
// Create Napi::String with UTF-8 validation (safe version)
|
|
64
94
|
// Falls back to replacement character representation for invalid UTF-8
|
|
65
|
-
static
|
|
95
|
+
static napi_value SafeNewStringValue(napi_env env, const char* data, size_t len) {
|
|
96
|
+
// Short fields are extremely common in CSV; avoid heavier ASCII/UTF-8 scans.
|
|
97
|
+
if (len <= 32) {
|
|
98
|
+
bool ascii = true;
|
|
99
|
+
for (size_t i = 0; i < len; i++) {
|
|
100
|
+
if (static_cast<unsigned char>(data[i]) & 0x80) {
|
|
101
|
+
ascii = false;
|
|
102
|
+
break;
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
napi_value short_value = nullptr;
|
|
107
|
+
if (ascii) {
|
|
108
|
+
if (napi_create_string_latin1(env, data, len, &short_value) == napi_ok && short_value) {
|
|
109
|
+
return short_value;
|
|
110
|
+
}
|
|
111
|
+
} else {
|
|
112
|
+
if (napi_create_string_utf8(env, data, len, &short_value) == napi_ok && short_value) {
|
|
113
|
+
return short_value;
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
// Fastest path: ASCII-only data is valid Latin-1.
|
|
119
|
+
// Using Latin-1 creation avoids UTF-8 decoding overhead.
|
|
120
|
+
if (isAllAscii(data, len)) {
|
|
121
|
+
napi_value latin1_value = nullptr;
|
|
122
|
+
if (napi_create_string_latin1(env, data, len, &latin1_value) == napi_ok && latin1_value) {
|
|
123
|
+
return latin1_value;
|
|
124
|
+
}
|
|
125
|
+
// Fallback to UTF-8 path if Latin-1 creation fails unexpectedly.
|
|
126
|
+
napi_value utf8_value = nullptr;
|
|
127
|
+
napi_create_string_utf8(env, data, len, &utf8_value);
|
|
128
|
+
return utf8_value;
|
|
129
|
+
}
|
|
130
|
+
|
|
66
131
|
if (isValidUtf8(data, len)) {
|
|
67
|
-
|
|
132
|
+
napi_value utf8_value = nullptr;
|
|
133
|
+
napi_create_string_utf8(env, data, len, &utf8_value);
|
|
134
|
+
return utf8_value;
|
|
68
135
|
}
|
|
69
136
|
|
|
70
137
|
// Invalid UTF-8 - replace invalid bytes with replacement character
|
|
@@ -109,7 +176,13 @@ static Napi::String SafeNewString(Napi::Env env, const char* data, size_t len) {
|
|
|
109
176
|
}
|
|
110
177
|
}
|
|
111
178
|
|
|
112
|
-
|
|
179
|
+
napi_value safe_value = nullptr;
|
|
180
|
+
napi_create_string_utf8(env, safe_str.c_str(), safe_str.length(), &safe_value);
|
|
181
|
+
return safe_value;
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
static Napi::String SafeNewString(Napi::Env env, const char* data, size_t len) {
|
|
185
|
+
return Napi::String(env, SafeNewStringValue(env, data, len));
|
|
113
186
|
}
|
|
114
187
|
|
|
115
188
|
// Extended RowCollector that handles transforms
|
|
@@ -266,6 +339,76 @@ static void error_cb(void *user, int line, const char *msg) {
|
|
|
266
339
|
fprintf(stderr, "CSV Parse Error at line %d: %s\n", line, msg);
|
|
267
340
|
}
|
|
268
341
|
|
|
342
|
+
class ParseFileWorker final : public Napi::AsyncWorker {
|
|
343
|
+
public:
|
|
344
|
+
ParseFileWorker(
|
|
345
|
+
Napi::Env env,
|
|
346
|
+
std::string path,
|
|
347
|
+
cisv_config config,
|
|
348
|
+
Napi::Promise::Deferred deferred
|
|
349
|
+
) : Napi::AsyncWorker(env),
|
|
350
|
+
path_(std::move(path)),
|
|
351
|
+
config_(config),
|
|
352
|
+
deferred_(deferred) {}
|
|
353
|
+
|
|
354
|
+
void Execute() override {
|
|
355
|
+
cisv_result_t *result = cisv_parse_file_batch(path_.c_str(), &config_);
|
|
356
|
+
if (!result) {
|
|
357
|
+
SetError("parse error: " + std::string(strerror(errno)));
|
|
358
|
+
return;
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
if (result->error_code != 0) {
|
|
362
|
+
std::string msg = result->error_message[0] ? result->error_message : "parse error";
|
|
363
|
+
if (msg.rfind("parse error", 0) != 0) {
|
|
364
|
+
msg = "parse error: " + msg;
|
|
365
|
+
}
|
|
366
|
+
SetError(msg);
|
|
367
|
+
cisv_result_free(result);
|
|
368
|
+
return;
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
rows_.reserve(result->row_count);
|
|
372
|
+
for (size_t i = 0; i < result->row_count; i++) {
|
|
373
|
+
cisv_row_t *row = &result->rows[i];
|
|
374
|
+
std::vector<std::string> out_row;
|
|
375
|
+
out_row.reserve(row->field_count);
|
|
376
|
+
for (size_t j = 0; j < row->field_count; j++) {
|
|
377
|
+
out_row.emplace_back(row->fields[j], row->field_lengths[j]);
|
|
378
|
+
}
|
|
379
|
+
rows_.emplace_back(std::move(out_row));
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
cisv_result_free(result);
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
void OnOK() override {
|
|
386
|
+
Napi::Env env = Env();
|
|
387
|
+
Napi::Array out = Napi::Array::New(env, rows_.size());
|
|
388
|
+
|
|
389
|
+
for (size_t i = 0; i < rows_.size(); i++) {
|
|
390
|
+
Napi::Array row = Napi::Array::New(env, rows_[i].size());
|
|
391
|
+
for (size_t j = 0; j < rows_[i].size(); j++) {
|
|
392
|
+
const std::string &field = rows_[i][j];
|
|
393
|
+
row[j] = SafeNewString(env, field.c_str(), field.length());
|
|
394
|
+
}
|
|
395
|
+
out[i] = row;
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
deferred_.Resolve(out);
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
void OnError(const Napi::Error &e) override {
|
|
402
|
+
deferred_.Reject(e.Value());
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
private:
|
|
406
|
+
std::string path_;
|
|
407
|
+
cisv_config config_;
|
|
408
|
+
Napi::Promise::Deferred deferred_;
|
|
409
|
+
std::vector<std::vector<std::string>> rows_;
|
|
410
|
+
};
|
|
411
|
+
|
|
269
412
|
} // namespace
|
|
270
413
|
|
|
271
414
|
class CisvParser : public Napi::ObjectWrap<CisvParser> {
|
|
@@ -310,6 +453,8 @@ public:
|
|
|
310
453
|
total_bytes_ = 0;
|
|
311
454
|
is_destroyed_ = false;
|
|
312
455
|
iterator_ = nullptr;
|
|
456
|
+
batch_result_ = nullptr;
|
|
457
|
+
stream_buffering_active_ = true;
|
|
313
458
|
|
|
314
459
|
// Initialize configuration with defaults
|
|
315
460
|
cisv_config_init(&config_);
|
|
@@ -503,6 +648,7 @@ public:
|
|
|
503
648
|
delete rc_;
|
|
504
649
|
rc_ = nullptr;
|
|
505
650
|
}
|
|
651
|
+
clearBatchResult();
|
|
506
652
|
is_destroyed_ = true;
|
|
507
653
|
}
|
|
508
654
|
}
|
|
@@ -528,26 +674,35 @@ public:
|
|
|
528
674
|
|
|
529
675
|
auto start = std::chrono::high_resolution_clock::now();
|
|
530
676
|
|
|
531
|
-
|
|
532
|
-
rc_->rows.clear();
|
|
533
|
-
rc_->current.clear();
|
|
534
|
-
rc_->current_field_index = 0;
|
|
677
|
+
resetRowState();
|
|
535
678
|
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
679
|
+
int result = 0;
|
|
680
|
+
if (!hasTransforms()) {
|
|
681
|
+
cisv_result_t *batch = cisv_parse_file_batch(path.c_str(), &config_);
|
|
682
|
+
if (!batch) {
|
|
683
|
+
throw Napi::Error::New(env, "parse error: " + std::string(strerror(errno)));
|
|
684
|
+
}
|
|
685
|
+
if (batch->error_code != 0) {
|
|
686
|
+
std::string msg = batch->error_message[0] ? batch->error_message : "parse error";
|
|
687
|
+
cisv_result_free(batch);
|
|
688
|
+
throw Napi::Error::New(env, msg);
|
|
689
|
+
}
|
|
690
|
+
clearBatchResult();
|
|
691
|
+
batch_result_ = batch;
|
|
692
|
+
} else {
|
|
693
|
+
// Set environment for JS transforms
|
|
694
|
+
rc_->env = env;
|
|
695
|
+
result = cisv_parser_parse_file(parser_, path.c_str());
|
|
696
|
+
// Clear the environment reference after parsing
|
|
697
|
+
rc_->env = nullptr;
|
|
698
|
+
if (result < 0) {
|
|
699
|
+
throw Napi::Error::New(env, "parse error: " + std::to_string(result));
|
|
700
|
+
}
|
|
701
|
+
}
|
|
540
702
|
|
|
541
703
|
auto end = std::chrono::high_resolution_clock::now();
|
|
542
704
|
parse_time_ = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
|
|
543
705
|
|
|
544
|
-
// Clear the environment reference after parsing
|
|
545
|
-
rc_->env = nullptr;
|
|
546
|
-
|
|
547
|
-
if (result < 0) {
|
|
548
|
-
throw Napi::Error::New(env, "parse error: " + std::to_string(result));
|
|
549
|
-
}
|
|
550
|
-
|
|
551
706
|
return drainRows(env);
|
|
552
707
|
}
|
|
553
708
|
|
|
@@ -565,22 +720,33 @@ public:
|
|
|
565
720
|
|
|
566
721
|
std::string content = info[0].As<Napi::String>();
|
|
567
722
|
|
|
568
|
-
|
|
569
|
-
rc_->rows.clear();
|
|
570
|
-
rc_->current.clear();
|
|
571
|
-
rc_->current_field_index = 0;
|
|
723
|
+
resetRowState();
|
|
572
724
|
|
|
573
|
-
|
|
574
|
-
|
|
725
|
+
if (!hasTransforms()) {
|
|
726
|
+
cisv_result_t *batch = cisv_parse_string_batch(content.c_str(), content.length(), &config_);
|
|
727
|
+
if (!batch) {
|
|
728
|
+
throw Napi::Error::New(env, "parse error: " + std::string(strerror(errno)));
|
|
729
|
+
}
|
|
730
|
+
if (batch->error_code != 0) {
|
|
731
|
+
std::string msg = batch->error_message[0] ? batch->error_message : "parse error";
|
|
732
|
+
cisv_result_free(batch);
|
|
733
|
+
throw Napi::Error::New(env, msg);
|
|
734
|
+
}
|
|
735
|
+
clearBatchResult();
|
|
736
|
+
batch_result_ = batch;
|
|
737
|
+
} else {
|
|
738
|
+
// Set environment for JS transforms
|
|
739
|
+
rc_->env = env;
|
|
575
740
|
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
741
|
+
// Write the string content as chunks
|
|
742
|
+
cisv_parser_write(parser_, (const uint8_t*)content.c_str(), content.length());
|
|
743
|
+
cisv_parser_end(parser_);
|
|
579
744
|
|
|
580
|
-
|
|
745
|
+
// Clear the environment reference after parsing
|
|
746
|
+
rc_->env = nullptr;
|
|
747
|
+
}
|
|
581
748
|
|
|
582
|
-
|
|
583
|
-
rc_->env = nullptr;
|
|
749
|
+
total_bytes_ = content.length();
|
|
584
750
|
|
|
585
751
|
return drainRows(env);
|
|
586
752
|
}
|
|
@@ -597,44 +763,95 @@ public:
|
|
|
597
763
|
throw Napi::TypeError::New(env, "Expected one argument");
|
|
598
764
|
}
|
|
599
765
|
|
|
766
|
+
// Streaming writes produce row-callback data, not batch results.
|
|
767
|
+
clearBatchResult();
|
|
768
|
+
|
|
600
769
|
// Set environment for JS transforms
|
|
601
770
|
rc_->env = env;
|
|
602
771
|
|
|
772
|
+
const uint8_t* chunk_data = nullptr;
|
|
773
|
+
size_t chunk_size = 0;
|
|
774
|
+
std::string chunk_storage;
|
|
775
|
+
|
|
603
776
|
if (info[0].IsBuffer()) {
|
|
604
777
|
auto buf = info[0].As<Napi::Buffer<uint8_t>>();
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
778
|
+
chunk_data = buf.Data();
|
|
779
|
+
chunk_size = buf.Length();
|
|
780
|
+
} else if (info[0].IsString()) {
|
|
781
|
+
chunk_storage = info[0].As<Napi::String>();
|
|
782
|
+
chunk_data = reinterpret_cast<const uint8_t*>(chunk_storage.data());
|
|
783
|
+
chunk_size = chunk_storage.size();
|
|
784
|
+
} else {
|
|
785
|
+
throw Napi::TypeError::New(env, "Expected Buffer or String");
|
|
786
|
+
}
|
|
787
|
+
|
|
788
|
+
// Check for overflow before adding to total_bytes_
|
|
789
|
+
if (chunk_size > SIZE_MAX - total_bytes_) {
|
|
790
|
+
throw Napi::Error::New(env, "Total bytes would overflow");
|
|
613
791
|
}
|
|
614
792
|
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
if (chunk_size > SIZE_MAX -
|
|
620
|
-
throw Napi::Error::New(env, "
|
|
793
|
+
// Fast streaming mode:
|
|
794
|
+
// Buffer chunks when no transforms/iterator are active and batch-parse on end().
|
|
795
|
+
// If buffered payload exceeds threshold, flush once to parser and continue streaming.
|
|
796
|
+
if (!hasTransforms() && iterator_ == nullptr) {
|
|
797
|
+
if (chunk_size > SIZE_MAX - pending_stream_.size()) {
|
|
798
|
+
throw Napi::Error::New(env, "Buffered stream size would overflow");
|
|
621
799
|
}
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
800
|
+
|
|
801
|
+
if (stream_buffering_active_) {
|
|
802
|
+
pending_stream_.append(reinterpret_cast<const char*>(chunk_data), chunk_size);
|
|
803
|
+
total_bytes_ += chunk_size;
|
|
804
|
+
|
|
805
|
+
if (pending_stream_.size() > kStreamBufferLimitBytes) {
|
|
806
|
+
flushPendingStreamToParser();
|
|
807
|
+
stream_buffering_active_ = false;
|
|
808
|
+
}
|
|
809
|
+
return;
|
|
810
|
+
}
|
|
811
|
+
} else if (!pending_stream_.empty()) {
|
|
812
|
+
flushPendingStreamToParser();
|
|
813
|
+
stream_buffering_active_ = false;
|
|
625
814
|
}
|
|
626
815
|
|
|
627
|
-
|
|
816
|
+
cisv_parser_write(parser_, chunk_data, chunk_size);
|
|
817
|
+
total_bytes_ += chunk_size;
|
|
628
818
|
}
|
|
629
819
|
|
|
630
820
|
void End(const Napi::CallbackInfo &info) {
|
|
631
|
-
if (
|
|
632
|
-
|
|
633
|
-
|
|
821
|
+
if (is_destroyed_) {
|
|
822
|
+
return;
|
|
823
|
+
}
|
|
824
|
+
|
|
825
|
+
if (stream_buffering_active_ && !pending_stream_.empty() &&
|
|
826
|
+
!hasTransforms() && iterator_ == nullptr &&
|
|
827
|
+
rc_ && rc_->rows.empty() && rc_->current.empty()) {
|
|
828
|
+
cisv_result_t *batch = cisv_parse_string_batch(
|
|
829
|
+
pending_stream_.data(), pending_stream_.size(), &config_);
|
|
830
|
+
if (!batch) {
|
|
831
|
+
throw Napi::Error::New(info.Env(), "parse error: " + std::string(strerror(errno)));
|
|
832
|
+
}
|
|
833
|
+
if (batch->error_code != 0) {
|
|
834
|
+
std::string msg = batch->error_message[0] ? batch->error_message : "parse error";
|
|
835
|
+
cisv_result_free(batch);
|
|
836
|
+
throw Napi::Error::New(info.Env(), msg);
|
|
837
|
+
}
|
|
838
|
+
clearBatchResult();
|
|
839
|
+
batch_result_ = batch;
|
|
840
|
+
pending_stream_.clear();
|
|
634
841
|
rc_->env = nullptr;
|
|
635
|
-
|
|
636
|
-
// as they are Persistent references managed by the addon lifecycle
|
|
842
|
+
return;
|
|
637
843
|
}
|
|
844
|
+
|
|
845
|
+
if (!pending_stream_.empty()) {
|
|
846
|
+
flushPendingStreamToParser();
|
|
847
|
+
stream_buffering_active_ = false;
|
|
848
|
+
}
|
|
849
|
+
|
|
850
|
+
cisv_parser_end(parser_);
|
|
851
|
+
// Clear the environment reference after ending to prevent stale references
|
|
852
|
+
rc_->env = nullptr;
|
|
853
|
+
// Note: JS transforms stored in rc_->js_transforms remain valid
|
|
854
|
+
// as they are Persistent references managed by the addon lifecycle
|
|
638
855
|
}
|
|
639
856
|
|
|
640
857
|
Napi::Value GetRows(const Napi::CallbackInfo &info) {
|
|
@@ -642,16 +859,23 @@ public:
|
|
|
642
859
|
Napi::Env env = info.Env();
|
|
643
860
|
throw Napi::Error::New(env, "Parser has been destroyed");
|
|
644
861
|
}
|
|
862
|
+
if (!pending_stream_.empty()) {
|
|
863
|
+
flushPendingStreamToParser();
|
|
864
|
+
stream_buffering_active_ = false;
|
|
865
|
+
}
|
|
645
866
|
return drainRows(info.Env());
|
|
646
867
|
}
|
|
647
868
|
|
|
648
869
|
void Clear(const Napi::CallbackInfo &info) {
|
|
649
870
|
if (!is_destroyed_ && rc_) {
|
|
871
|
+
clearBatchResult();
|
|
650
872
|
rc_->rows.clear();
|
|
651
873
|
rc_->current.clear();
|
|
652
874
|
rc_->current_field_index = 0;
|
|
653
875
|
total_bytes_ = 0;
|
|
654
876
|
parse_time_ = 0;
|
|
877
|
+
pending_stream_.clear();
|
|
878
|
+
stream_buffering_active_ = true;
|
|
655
879
|
// Also clear the environment reference
|
|
656
880
|
rc_->env = nullptr;
|
|
657
881
|
}
|
|
@@ -870,11 +1094,26 @@ Napi::Value TransformByName(const Napi::CallbackInfo &info) {
|
|
|
870
1094
|
// Handle JavaScript function transforms by name
|
|
871
1095
|
Napi::Function func = info[1].As<Napi::Function>();
|
|
872
1096
|
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
1097
|
+
if (!rc_->pipeline || !rc_->pipeline->header_fields) {
|
|
1098
|
+
throw Napi::Error::New(env,
|
|
1099
|
+
"Header fields are not set. Call setHeaderFields([...]) before transformByName(..., fn).");
|
|
876
1100
|
}
|
|
877
1101
|
|
|
1102
|
+
int field_index = -1;
|
|
1103
|
+
for (size_t i = 0; i < rc_->pipeline->header_count; i++) {
|
|
1104
|
+
if (strcmp(rc_->pipeline->header_fields[i], field_name.c_str()) == 0) {
|
|
1105
|
+
field_index = static_cast<int>(i);
|
|
1106
|
+
break;
|
|
1107
|
+
}
|
|
1108
|
+
}
|
|
1109
|
+
|
|
1110
|
+
if (field_index < 0) {
|
|
1111
|
+
throw Napi::Error::New(env, "Unknown field name: " + field_name);
|
|
1112
|
+
}
|
|
1113
|
+
|
|
1114
|
+
// Store callback in the same map used by applyTransforms().
|
|
1115
|
+
rc_->js_transforms[field_index] = Napi::Persistent(func);
|
|
1116
|
+
|
|
878
1117
|
} else {
|
|
879
1118
|
throw Napi::TypeError::New(env, "Transform must be a string type or function");
|
|
880
1119
|
}
|
|
@@ -1008,6 +1247,11 @@ Napi::Value RemoveTransformByName(const Napi::CallbackInfo &info) {
|
|
|
1008
1247
|
}
|
|
1009
1248
|
|
|
1010
1249
|
// Clear JavaScript transforms
|
|
1250
|
+
for (auto &pair : rc_->js_transforms) {
|
|
1251
|
+
if (!pair.second.IsEmpty()) {
|
|
1252
|
+
pair.second.Reset();
|
|
1253
|
+
}
|
|
1254
|
+
}
|
|
1011
1255
|
rc_->js_transforms.clear();
|
|
1012
1256
|
|
|
1013
1257
|
// Clear C transforms - destroy and DON'T recreate pipeline yet
|
|
@@ -1033,18 +1277,32 @@ Napi::Value RemoveTransformByName(const Napi::CallbackInfo &info) {
|
|
|
1033
1277
|
|
|
1034
1278
|
std::string path = info[0].As<Napi::String>();
|
|
1035
1279
|
|
|
1036
|
-
// Create a promise
|
|
1037
1280
|
auto deferred = Napi::Promise::Deferred::New(env);
|
|
1038
1281
|
|
|
1039
|
-
//
|
|
1040
|
-
//
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
|
|
1282
|
+
// Preserve behavior for transform-enabled parsers (native + JS transforms)
|
|
1283
|
+
// until async transform execution is implemented.
|
|
1284
|
+
bool has_c_transforms = rc_ && rc_->pipeline && rc_->pipeline->count > 0;
|
|
1285
|
+
bool has_js_transforms = rc_ && !rc_->js_transforms.empty();
|
|
1286
|
+
if (has_c_transforms || has_js_transforms) {
|
|
1287
|
+
try {
|
|
1288
|
+
Napi::Value result = ParseSync(info);
|
|
1289
|
+
deferred.Resolve(result);
|
|
1290
|
+
} catch (const Napi::Error &e) {
|
|
1291
|
+
deferred.Reject(e.Value());
|
|
1292
|
+
}
|
|
1293
|
+
return deferred.Promise();
|
|
1046
1294
|
}
|
|
1047
1295
|
|
|
1296
|
+
// Use batch parser in a worker thread to avoid blocking the event loop.
|
|
1297
|
+
cisv_config worker_config = config_;
|
|
1298
|
+
worker_config.field_cb = nullptr;
|
|
1299
|
+
worker_config.row_cb = nullptr;
|
|
1300
|
+
worker_config.error_cb = nullptr;
|
|
1301
|
+
worker_config.user = nullptr;
|
|
1302
|
+
|
|
1303
|
+
auto *worker = new ParseFileWorker(env, path, worker_config, deferred);
|
|
1304
|
+
worker->Queue();
|
|
1305
|
+
|
|
1048
1306
|
return deferred.Promise();
|
|
1049
1307
|
}
|
|
1050
1308
|
|
|
@@ -1090,10 +1348,22 @@ Napi::Value RemoveTransformByName(const Napi::CallbackInfo &info) {
|
|
|
1090
1348
|
}
|
|
1091
1349
|
|
|
1092
1350
|
Napi::Object stats = Napi::Object::New(env);
|
|
1351
|
+
size_t row_count = 0;
|
|
1352
|
+
size_t field_count = 0;
|
|
1353
|
+
if (batch_result_) {
|
|
1354
|
+
row_count = batch_result_->row_count;
|
|
1355
|
+
if (batch_result_->row_count > 0) {
|
|
1356
|
+
field_count = batch_result_->rows[0].field_count;
|
|
1357
|
+
}
|
|
1358
|
+
} else if (rc_) {
|
|
1359
|
+
row_count = rc_->rows.size();
|
|
1360
|
+
if (!rc_->rows.empty()) {
|
|
1361
|
+
field_count = rc_->rows[0].size();
|
|
1362
|
+
}
|
|
1363
|
+
}
|
|
1093
1364
|
|
|
1094
|
-
stats.Set("rowCount", Napi::Number::New(env,
|
|
1095
|
-
stats.Set("fieldCount", Napi::Number::New(env,
|
|
1096
|
-
(rc_ && !rc_->rows.empty()) ? rc_->rows[0].size() : 0));
|
|
1365
|
+
stats.Set("rowCount", Napi::Number::New(env, row_count));
|
|
1366
|
+
stats.Set("fieldCount", Napi::Number::New(env, field_count));
|
|
1097
1367
|
stats.Set("totalBytes", Napi::Number::New(env, total_bytes_));
|
|
1098
1368
|
stats.Set("parseTime", Napi::Number::New(env, parse_time_));
|
|
1099
1369
|
stats.Set("currentLine", Napi::Number::New(env,
|
|
@@ -1233,14 +1503,13 @@ Napi::Value RemoveTransformByName(const Napi::CallbackInfo &info) {
|
|
|
1233
1503
|
throw Napi::Error::New(env, "Error reading CSV row");
|
|
1234
1504
|
}
|
|
1235
1505
|
|
|
1236
|
-
|
|
1237
|
-
|
|
1506
|
+
napi_value row;
|
|
1507
|
+
napi_create_array_with_length(env, field_count, &row);
|
|
1238
1508
|
for (size_t i = 0; i < field_count; i++) {
|
|
1239
|
-
|
|
1240
|
-
row.Set(i, SafeNewString(env, fields[i], lengths[i]));
|
|
1509
|
+
napi_set_element(env, row, i, SafeNewStringValue(env, fields[i], lengths[i]));
|
|
1241
1510
|
}
|
|
1242
1511
|
|
|
1243
|
-
return row;
|
|
1512
|
+
return Napi::Value(env, row);
|
|
1244
1513
|
}
|
|
1245
1514
|
|
|
1246
1515
|
/**
|
|
@@ -1263,27 +1532,94 @@ Napi::Value RemoveTransformByName(const Napi::CallbackInfo &info) {
|
|
|
1263
1532
|
}
|
|
1264
1533
|
|
|
1265
1534
|
private:
|
|
1535
|
+
void clearBatchResult() {
|
|
1536
|
+
if (batch_result_) {
|
|
1537
|
+
cisv_result_free(batch_result_);
|
|
1538
|
+
batch_result_ = nullptr;
|
|
1539
|
+
}
|
|
1540
|
+
}
|
|
1541
|
+
|
|
1542
|
+
bool hasTransforms() const {
|
|
1543
|
+
bool has_c_transforms = rc_ && rc_->pipeline && rc_->pipeline->count > 0;
|
|
1544
|
+
bool has_js_transforms = rc_ && !rc_->js_transforms.empty();
|
|
1545
|
+
return has_c_transforms || has_js_transforms;
|
|
1546
|
+
}
|
|
1547
|
+
|
|
1548
|
+
void resetRowState() {
|
|
1549
|
+
clearBatchResult();
|
|
1550
|
+
pending_stream_.clear();
|
|
1551
|
+
stream_buffering_active_ = true;
|
|
1552
|
+
if (!rc_) return;
|
|
1553
|
+
rc_->rows.clear();
|
|
1554
|
+
rc_->current.clear();
|
|
1555
|
+
rc_->current_field_index = 0;
|
|
1556
|
+
}
|
|
1557
|
+
|
|
1558
|
+
void flushPendingStreamToParser() {
|
|
1559
|
+
if (pending_stream_.empty()) {
|
|
1560
|
+
return;
|
|
1561
|
+
}
|
|
1562
|
+
cisv_parser_write(
|
|
1563
|
+
parser_,
|
|
1564
|
+
reinterpret_cast<const uint8_t*>(pending_stream_.data()),
|
|
1565
|
+
pending_stream_.size());
|
|
1566
|
+
pending_stream_.clear();
|
|
1567
|
+
}
|
|
1568
|
+
|
|
1569
|
+
void loadRowsFromBatch(const cisv_result_t *result) {
|
|
1570
|
+
if (!rc_ || !result) return;
|
|
1571
|
+
rc_->rows.clear();
|
|
1572
|
+
rc_->rows.reserve(result->row_count);
|
|
1573
|
+
|
|
1574
|
+
for (size_t i = 0; i < result->row_count; i++) {
|
|
1575
|
+
const cisv_row_t *row = &result->rows[i];
|
|
1576
|
+
std::vector<std::string> out_row;
|
|
1577
|
+
out_row.reserve(row->field_count);
|
|
1578
|
+
for (size_t j = 0; j < row->field_count; j++) {
|
|
1579
|
+
out_row.emplace_back(row->fields[j], row->field_lengths[j]);
|
|
1580
|
+
}
|
|
1581
|
+
rc_->rows.emplace_back(std::move(out_row));
|
|
1582
|
+
}
|
|
1583
|
+
}
|
|
1584
|
+
|
|
1266
1585
|
Napi::Value drainRows(Napi::Env env) {
|
|
1586
|
+
if (batch_result_) {
|
|
1587
|
+
napi_value rows;
|
|
1588
|
+
napi_create_array_with_length(env, batch_result_->row_count, &rows);
|
|
1589
|
+
for (size_t i = 0; i < batch_result_->row_count; ++i) {
|
|
1590
|
+
const cisv_row_t *src_row = &batch_result_->rows[i];
|
|
1591
|
+
napi_value row;
|
|
1592
|
+
napi_create_array_with_length(env, src_row->field_count, &row);
|
|
1593
|
+
for (size_t j = 0; j < src_row->field_count; ++j) {
|
|
1594
|
+
napi_set_element(env, row, j, SafeNewStringValue(env, src_row->fields[j], src_row->field_lengths[j]));
|
|
1595
|
+
}
|
|
1596
|
+
napi_set_element(env, rows, i, row);
|
|
1597
|
+
}
|
|
1598
|
+
return Napi::Value(env, rows);
|
|
1599
|
+
}
|
|
1600
|
+
|
|
1267
1601
|
if (!rc_) {
|
|
1268
1602
|
return Napi::Array::New(env, 0);
|
|
1269
1603
|
}
|
|
1270
1604
|
|
|
1271
|
-
|
|
1605
|
+
napi_value rows;
|
|
1606
|
+
napi_create_array_with_length(env, rc_->rows.size(), &rows);
|
|
1272
1607
|
|
|
1273
1608
|
for (size_t i = 0; i < rc_->rows.size(); ++i) {
|
|
1274
|
-
|
|
1609
|
+
napi_value row;
|
|
1610
|
+
napi_create_array_with_length(env, rc_->rows[i].size(), &row);
|
|
1275
1611
|
for (size_t j = 0; j < rc_->rows[i].size(); ++j) {
|
|
1276
1612
|
// SECURITY: Use safe string creation to handle invalid UTF-8 in CSV data
|
|
1277
1613
|
const std::string& field = rc_->rows[i][j];
|
|
1278
|
-
row
|
|
1614
|
+
napi_set_element(env, row, j, SafeNewStringValue(env, field.c_str(), field.length()));
|
|
1279
1615
|
}
|
|
1280
|
-
rows
|
|
1616
|
+
napi_set_element(env, rows, i, row);
|
|
1281
1617
|
}
|
|
1282
1618
|
|
|
1283
1619
|
// Don't clear here if we want to keep data for multiple reads
|
|
1284
1620
|
// rc_->rows.clear();
|
|
1285
1621
|
|
|
1286
|
-
return rows;
|
|
1622
|
+
return Napi::Value(env, rows);
|
|
1287
1623
|
}
|
|
1288
1624
|
|
|
1289
1625
|
cisv_parser *parser_;
|
|
@@ -1293,6 +1629,10 @@ private:
|
|
|
1293
1629
|
double parse_time_;
|
|
1294
1630
|
bool is_destroyed_;
|
|
1295
1631
|
cisv_iterator_t *iterator_; // For row-by-row iteration
|
|
1632
|
+
cisv_result_t *batch_result_;
|
|
1633
|
+
std::string pending_stream_;
|
|
1634
|
+
bool stream_buffering_active_;
|
|
1635
|
+
static constexpr size_t kStreamBufferLimitBytes = 8 * 1024 * 1024;
|
|
1296
1636
|
};
|
|
1297
1637
|
|
|
1298
1638
|
// Initialize all exports
|