@milaboratories/pf-driver 1.3.11 → 1.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/csv_writer.cjs +79 -0
- package/dist/csv_writer.cjs.map +1 -0
- package/dist/csv_writer.js +78 -0
- package/dist/csv_writer.js.map +1 -0
- package/dist/driver_decl.d.ts +4 -2
- package/dist/driver_decl.d.ts.map +1 -1
- package/dist/driver_double.cjs +1 -1
- package/dist/driver_double.js +1 -1
- package/dist/driver_impl.cjs +94 -17
- package/dist/driver_impl.cjs.map +1 -1
- package/dist/driver_impl.d.ts +2 -1
- package/dist/driver_impl.d.ts.map +1 -1
- package/dist/driver_impl.js +93 -18
- package/dist/driver_impl.js.map +1 -1
- package/dist/index.d.ts +2 -2
- package/package.json +8 -7
- package/src/__tests__/csv_writer.test.ts +419 -0
- package/src/__tests__/download_ptable.test.ts +617 -0
- package/src/csv_writer.ts +154 -0
- package/src/driver_decl.ts +14 -0
- package/src/driver_impl.ts +100 -3
|
@@ -0,0 +1,617 @@
|
|
|
1
|
+
import { afterEach, describe, expect, it } from "vitest";
|
|
2
|
+
import * as fs from "node:fs";
|
|
3
|
+
import * as path from "node:path";
|
|
4
|
+
import * as os from "node:os";
|
|
5
|
+
import { Readable } from "node:stream";
|
|
6
|
+
import { pipeline } from "node:stream/promises";
|
|
7
|
+
import {
|
|
8
|
+
PObjectId,
|
|
9
|
+
ValueType,
|
|
10
|
+
type PTableColumnSpec,
|
|
11
|
+
type PTableVector,
|
|
12
|
+
type TableRange,
|
|
13
|
+
} from "@milaboratories/pl-model-common";
|
|
14
|
+
import { isNil } from "@milaboratories/helpers";
|
|
15
|
+
import { createPathAtomically, type MiLogger } from "@milaboratories/ts-helpers";
|
|
16
|
+
import { parseString } from "fast-csv";
|
|
17
|
+
import { streamPTableRows, type PTableDataSource } from "../csv_writer";
|
|
18
|
+
|
|
19
|
+
// ── Tests ───────────────────────────────────────────────────────────
|
|
20
|
+
|
|
21
|
+
describe("downloadPTable integration", () => {
|
|
22
|
+
// ── 1. Roundtrip small table ────────────────────────────────────
|
|
23
|
+
|
|
24
|
+
it("roundtrips a small mixed-type table through CSV", async () => {
|
|
25
|
+
const specs = [
|
|
26
|
+
makeAxisSpec("id", "ID"),
|
|
27
|
+
makeColumnSpec("score", "Double", "Score"),
|
|
28
|
+
makeColumnSpec("name", "String", "Name"),
|
|
29
|
+
makeColumnSpec("big", "Long", "Big"),
|
|
30
|
+
];
|
|
31
|
+
const intValues = makeIntVector([1, 2, 3, 4, 5]);
|
|
32
|
+
const doubleValues = makeDoubleVector([1.5, 2.7, 0, -3.14, 100.001]);
|
|
33
|
+
const stringValues = makeStringVector(["alice", "bob", null, "dave", "eve"]);
|
|
34
|
+
const longValues = makeLongVector([0n, 9007199254740993n, -1n, 42n, -9007199254740993n]);
|
|
35
|
+
|
|
36
|
+
const fullVectors = [intValues, doubleValues, stringValues, longValues];
|
|
37
|
+
const pTable = makePTableDataSource(fullVectors, specs);
|
|
38
|
+
const columnIndices = [0, 1, 2, 3];
|
|
39
|
+
|
|
40
|
+
const filePath = temporaryFilePath("roundtrip.csv");
|
|
41
|
+
const result = await downloadPTableFromSource(pTable, {
|
|
42
|
+
path: filePath,
|
|
43
|
+
format: "csv",
|
|
44
|
+
columnIndices,
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
expect(result.rowsWritten).toBe(5);
|
|
48
|
+
expect(result.bytesWritten).toBeGreaterThan(0);
|
|
49
|
+
|
|
50
|
+
const fileContent = await fs.promises.readFile(filePath, "utf-8");
|
|
51
|
+
const parsed = await parseCsv(fileContent, ",");
|
|
52
|
+
|
|
53
|
+
// First row is header
|
|
54
|
+
expect(parsed.rows[0]).toEqual(["ID", "Score", "Name", "Big"]);
|
|
55
|
+
|
|
56
|
+
// Compare data rows against getData
|
|
57
|
+
const allData = await pTable.getData(columnIndices, { range: { offset: 0, length: 5 } });
|
|
58
|
+
for (let rowIndex = 0; rowIndex < 5; rowIndex++) {
|
|
59
|
+
const expectedRow = columnIndices.map((columnIndex) =>
|
|
60
|
+
expectedCellString(allData[columnIndex], rowIndex),
|
|
61
|
+
);
|
|
62
|
+
expect(parsed.rows[rowIndex + 1]).toEqual(expectedRow);
|
|
63
|
+
}
|
|
64
|
+
});
|
|
65
|
+
|
|
66
|
+
// ── 2. Range slicing ───────────────────────────────────────────
|
|
67
|
+
|
|
68
|
+
it("slices a 100-row table with range offset=25 length=50", async () => {
|
|
69
|
+
const specs = [makeAxisSpec("idx", "Index"), makeColumnSpec("val", "Int", "Value")];
|
|
70
|
+
const indices = Array.from({ length: 100 }, (_, i) => i);
|
|
71
|
+
const values = Array.from({ length: 100 }, (_, i) => i * 10);
|
|
72
|
+
const fullVectors = [makeIntVector(indices), makeIntVector(values)];
|
|
73
|
+
const pTable = makePTableDataSource(fullVectors, specs);
|
|
74
|
+
|
|
75
|
+
const filePath = temporaryFilePath("range.csv");
|
|
76
|
+
const result = await downloadPTableFromSource(pTable, {
|
|
77
|
+
path: filePath,
|
|
78
|
+
format: "csv",
|
|
79
|
+
columnIndices: [0, 1],
|
|
80
|
+
range: { offset: 25, length: 50 },
|
|
81
|
+
chunkSize: 20,
|
|
82
|
+
});
|
|
83
|
+
|
|
84
|
+
expect(result.rowsWritten).toBe(50);
|
|
85
|
+
|
|
86
|
+
const fileContent = await fs.promises.readFile(filePath, "utf-8");
|
|
87
|
+
const parsed = await parseCsv(fileContent, ",");
|
|
88
|
+
|
|
89
|
+
// Header + 50 data rows
|
|
90
|
+
expect(parsed.rows.length).toBe(51);
|
|
91
|
+
expect(parsed.rows[0]).toEqual(["Index", "Value"]);
|
|
92
|
+
|
|
93
|
+
// Verify data matches rows 25..74
|
|
94
|
+
const allData = await pTable.getData([0, 1], { range: { offset: 25, length: 50 } });
|
|
95
|
+
for (let rowIndex = 0; rowIndex < 50; rowIndex++) {
|
|
96
|
+
expect(parsed.rows[rowIndex + 1]).toEqual([
|
|
97
|
+
expectedCellString(allData[0], rowIndex),
|
|
98
|
+
expectedCellString(allData[1], rowIndex),
|
|
99
|
+
]);
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
// Spot-check first and last
|
|
103
|
+
expect(parsed.rows[1]).toEqual(["25", "250"]);
|
|
104
|
+
expect(parsed.rows[50]).toEqual(["74", "740"]);
|
|
105
|
+
});
|
|
106
|
+
|
|
107
|
+
// ── 3. Escape edge cases ───────────────────────────────────────
|
|
108
|
+
|
|
109
|
+
it("handles escape edge cases: quotes, commas, CRLF, unicode, bigint boundary, null, NaN, Infinity", async () => {
|
|
110
|
+
const specs = [
|
|
111
|
+
makeColumnSpec("text", "String", "Text"),
|
|
112
|
+
makeColumnSpec("number", "Double", "Number"),
|
|
113
|
+
makeColumnSpec("big", "Long", "Big"),
|
|
114
|
+
];
|
|
115
|
+
|
|
116
|
+
const stringValues = makeStringVector([
|
|
117
|
+
'has "quotes"', // embedded double-quotes
|
|
118
|
+
"has,comma", // embedded comma
|
|
119
|
+
"has\r\nCRLF", // embedded CRLF
|
|
120
|
+
"кириллица 日本語 🎉", // unicode + emoji
|
|
121
|
+
null, // null
|
|
122
|
+
"plain", // plain string for NaN row
|
|
123
|
+
"plain2", // plain string for +Infinity row
|
|
124
|
+
"plain3", // plain string for -Infinity row
|
|
125
|
+
]);
|
|
126
|
+
const doubleValues = makeDoubleVector([42.5, -0.001, 3.14, 99.9, 0, NaN, Infinity, -Infinity]);
|
|
127
|
+
const longValues = makeLongVector([
|
|
128
|
+
9223372036854775807n, // max i64
|
|
129
|
+
-9223372036854775808n, // min i64
|
|
130
|
+
0n,
|
|
131
|
+
1n,
|
|
132
|
+
-1n,
|
|
133
|
+
100n,
|
|
134
|
+
200n,
|
|
135
|
+
300n,
|
|
136
|
+
]);
|
|
137
|
+
|
|
138
|
+
const fullVectors = [stringValues, doubleValues, longValues];
|
|
139
|
+
const pTable = makePTableDataSource(fullVectors, specs);
|
|
140
|
+
|
|
141
|
+
const filePath = temporaryFilePath("escape.csv");
|
|
142
|
+
await downloadPTableFromSource(pTable, {
|
|
143
|
+
path: filePath,
|
|
144
|
+
format: "csv",
|
|
145
|
+
columnIndices: [0, 1, 2],
|
|
146
|
+
});
|
|
147
|
+
|
|
148
|
+
const fileContent = await fs.promises.readFile(filePath, "utf-8");
|
|
149
|
+
const parsed = await parseCsv(fileContent, ",");
|
|
150
|
+
|
|
151
|
+
// Skip header, verify data roundtrip
|
|
152
|
+
const dataRows = parsed.rows.slice(1);
|
|
153
|
+
expect(dataRows.length).toBe(8);
|
|
154
|
+
|
|
155
|
+
// Row 0: quotes
|
|
156
|
+
expect(dataRows[0][0]).toBe('has "quotes"');
|
|
157
|
+
expect(dataRows[0][1]).toBe("42.5");
|
|
158
|
+
expect(dataRows[0][2]).toBe("9223372036854775807");
|
|
159
|
+
|
|
160
|
+
// Row 1: comma
|
|
161
|
+
expect(dataRows[1][0]).toBe("has,comma");
|
|
162
|
+
expect(dataRows[1][2]).toBe("-9223372036854775808");
|
|
163
|
+
|
|
164
|
+
// Row 2: CRLF
|
|
165
|
+
expect(dataRows[2][0]).toBe("has\r\nCRLF");
|
|
166
|
+
|
|
167
|
+
// Row 3: unicode
|
|
168
|
+
expect(dataRows[3][0]).toBe("кириллица 日本語 🎉");
|
|
169
|
+
|
|
170
|
+
// Row 4: null string → empty
|
|
171
|
+
expect(dataRows[4][0]).toBe("");
|
|
172
|
+
|
|
173
|
+
// Row 5: NaN → empty
|
|
174
|
+
expect(dataRows[5][1]).toBe("");
|
|
175
|
+
|
|
176
|
+
// Row 6: +Infinity → empty
|
|
177
|
+
expect(dataRows[6][1]).toBe("");
|
|
178
|
+
|
|
179
|
+
// Row 7: -Infinity → empty
|
|
180
|
+
expect(dataRows[7][1]).toBe("");
|
|
181
|
+
});
|
|
182
|
+
|
|
183
|
+
// ── 4. Cancel mid-stream ───────────────────────────────────────
|
|
184
|
+
|
|
185
|
+
it("cleans up .part file and rejects on abort", async () => {
|
|
186
|
+
const rowCount = 5000;
|
|
187
|
+
const specs = [makeAxisSpec("v", "Value")];
|
|
188
|
+
const values = Array.from({ length: rowCount }, (_, i) => i);
|
|
189
|
+
const fullVectors = [makeIntVector(values)];
|
|
190
|
+
|
|
191
|
+
// Slow data source that yields one chunk at a time
|
|
192
|
+
const slowPTable: PTableDataSource & {
|
|
193
|
+
getSpec(): PTableColumnSpec[];
|
|
194
|
+
getShape(): { rows: number };
|
|
195
|
+
} = {
|
|
196
|
+
getData: async (
|
|
197
|
+
columnIndices: number[],
|
|
198
|
+
options?: { range?: TableRange; signal?: AbortSignal },
|
|
199
|
+
) => {
|
|
200
|
+
options?.signal?.throwIfAborted();
|
|
201
|
+
// Add a small delay so the abort has time to fire
|
|
202
|
+
await new Promise((resolve) => setTimeout(resolve, 5));
|
|
203
|
+
options?.signal?.throwIfAborted();
|
|
204
|
+
const range = isNil(options?.range) ? { offset: 0, length: rowCount } : options!.range!;
|
|
205
|
+
return columnIndices.map((columnIndex) => sliceVector(fullVectors[columnIndex], range));
|
|
206
|
+
},
|
|
207
|
+
getSpec: () => specs,
|
|
208
|
+
getShape: () => ({ rows: rowCount }),
|
|
209
|
+
};
|
|
210
|
+
|
|
211
|
+
const filePath = temporaryFilePath("cancel.csv");
|
|
212
|
+
const controller = new AbortController();
|
|
213
|
+
|
|
214
|
+
// Abort after a short delay
|
|
215
|
+
setTimeout(() => controller.abort(), 15);
|
|
216
|
+
|
|
217
|
+
await expect(
|
|
218
|
+
downloadPTableFromSource(slowPTable, {
|
|
219
|
+
path: filePath,
|
|
220
|
+
format: "csv",
|
|
221
|
+
columnIndices: [0],
|
|
222
|
+
chunkSize: 100,
|
|
223
|
+
signal: controller.signal,
|
|
224
|
+
}),
|
|
225
|
+
).rejects.toThrow();
|
|
226
|
+
|
|
227
|
+
// .part file should be cleaned up
|
|
228
|
+
const partExists = fs.existsSync(filePath + ".part");
|
|
229
|
+
expect(partExists).toBe(false);
|
|
230
|
+
|
|
231
|
+
// Final file should NOT exist
|
|
232
|
+
const finalExists = fs.existsSync(filePath);
|
|
233
|
+
expect(finalExists).toBe(false);
|
|
234
|
+
});
|
|
235
|
+
|
|
236
|
+
// ── 5. TSV format ──────────────────────────────────────────────
|
|
237
|
+
|
|
238
|
+
it("produces valid TSV with tab separator and quotes TABs in values", async () => {
|
|
239
|
+
const specs = [makeAxisSpec("id", "ID"), makeColumnSpec("name", "String", "Name")];
|
|
240
|
+
const fullVectors = [
|
|
241
|
+
makeIntVector([1, 2, 3]),
|
|
242
|
+
makeStringVector(["plain", "has\ttab", "normal"]),
|
|
243
|
+
];
|
|
244
|
+
const pTable = makePTableDataSource(fullVectors, specs);
|
|
245
|
+
|
|
246
|
+
const filePath = temporaryFilePath("output.tsv");
|
|
247
|
+
const result = await downloadPTableFromSource(pTable, {
|
|
248
|
+
path: filePath,
|
|
249
|
+
format: "tsv",
|
|
250
|
+
columnIndices: [0, 1],
|
|
251
|
+
bom: false,
|
|
252
|
+
});
|
|
253
|
+
|
|
254
|
+
expect(result.rowsWritten).toBe(3);
|
|
255
|
+
|
|
256
|
+
const fileContent = await fs.promises.readFile(filePath, "utf-8");
|
|
257
|
+
const parsed = await parseCsv(fileContent, "\t");
|
|
258
|
+
|
|
259
|
+
expect(parsed.rows[0]).toEqual(["ID", "Name"]);
|
|
260
|
+
expect(parsed.rows[1]).toEqual(["1", "plain"]);
|
|
261
|
+
expect(parsed.rows[2]).toEqual(["2", "has\ttab"]);
|
|
262
|
+
expect(parsed.rows[3]).toEqual(["3", "normal"]);
|
|
263
|
+
|
|
264
|
+
// Verify raw content uses tab separators
|
|
265
|
+
const lines = fileContent.split("\r\n").filter((line) => line.length > 0);
|
|
266
|
+
expect(lines[0]).toBe("ID\tName");
|
|
267
|
+
});
|
|
268
|
+
|
|
269
|
+
// ── 6. BOM flag ────────────────────────────────────────────────
|
|
270
|
+
|
|
271
|
+
it("prepends UTF-8 BOM when bom=true", async () => {
|
|
272
|
+
const specs = [makeAxisSpec("v", "V")];
|
|
273
|
+
const fullVectors = [makeIntVector([1])];
|
|
274
|
+
const pTable = makePTableDataSource(fullVectors, specs);
|
|
275
|
+
|
|
276
|
+
const filePath = temporaryFilePath("bom.csv");
|
|
277
|
+
await downloadPTableFromSource(pTable, {
|
|
278
|
+
path: filePath,
|
|
279
|
+
format: "csv",
|
|
280
|
+
columnIndices: [0],
|
|
281
|
+
bom: true,
|
|
282
|
+
});
|
|
283
|
+
|
|
284
|
+
const rawBytes = await fs.promises.readFile(filePath);
|
|
285
|
+
// UTF-8 BOM: 0xEF 0xBB 0xBF
|
|
286
|
+
expect(rawBytes[0]).toBe(0xef);
|
|
287
|
+
expect(rawBytes[1]).toBe(0xbb);
|
|
288
|
+
expect(rawBytes[2]).toBe(0xbf);
|
|
289
|
+
|
|
290
|
+
// Also check string-level BOM
|
|
291
|
+
const fileContent = await fs.promises.readFile(filePath, "utf-8");
|
|
292
|
+
expect(fileContent.charCodeAt(0)).toBe(0xfeff);
|
|
293
|
+
});
|
|
294
|
+
|
|
295
|
+
it("does NOT prepend BOM when bom=false", async () => {
|
|
296
|
+
const specs = [makeAxisSpec("v", "V")];
|
|
297
|
+
const fullVectors = [makeIntVector([1])];
|
|
298
|
+
const pTable = makePTableDataSource(fullVectors, specs);
|
|
299
|
+
|
|
300
|
+
const filePath = temporaryFilePath("nobom.csv");
|
|
301
|
+
await downloadPTableFromSource(pTable, {
|
|
302
|
+
path: filePath,
|
|
303
|
+
format: "csv",
|
|
304
|
+
columnIndices: [0],
|
|
305
|
+
bom: false,
|
|
306
|
+
});
|
|
307
|
+
|
|
308
|
+
const rawBytes = await fs.promises.readFile(filePath);
|
|
309
|
+
// First byte should NOT be BOM
|
|
310
|
+
expect(rawBytes[0]).not.toBe(0xef);
|
|
311
|
+
});
|
|
312
|
+
|
|
313
|
+
// ── 7. Concurrent download + getData ───────────────────────────
|
|
314
|
+
|
|
315
|
+
it("concurrent download and getData do not block each other", async () => {
|
|
316
|
+
const largeRowCount = 2000;
|
|
317
|
+
const specs = [makeAxisSpec("v", "Value")];
|
|
318
|
+
const values = Array.from({ length: largeRowCount }, (_, i) => i);
|
|
319
|
+
const fullVectors = [makeIntVector(values)];
|
|
320
|
+
|
|
321
|
+
// Slow data source for download: 10ms delay per chunk
|
|
322
|
+
let downloadGetDataCallCount = 0;
|
|
323
|
+
const slowDownloadPTable: PTableDataSource & {
|
|
324
|
+
getSpec(): PTableColumnSpec[];
|
|
325
|
+
getShape(): { rows: number };
|
|
326
|
+
} = {
|
|
327
|
+
getData: async (
|
|
328
|
+
columnIndices: number[],
|
|
329
|
+
options?: { range?: TableRange; signal?: AbortSignal },
|
|
330
|
+
) => {
|
|
331
|
+
downloadGetDataCallCount++;
|
|
332
|
+
await new Promise((resolve) => setTimeout(resolve, 10));
|
|
333
|
+
const range = isNil(options?.range)
|
|
334
|
+
? { offset: 0, length: largeRowCount }
|
|
335
|
+
: options!.range!;
|
|
336
|
+
return columnIndices.map((columnIndex) => sliceVector(fullVectors[columnIndex], range));
|
|
337
|
+
},
|
|
338
|
+
getSpec: () => specs,
|
|
339
|
+
getShape: () => ({ rows: largeRowCount }),
|
|
340
|
+
};
|
|
341
|
+
|
|
342
|
+
// Fast data source for "UI getData" — different table, no delay
|
|
343
|
+
const fastPTable = makePTableDataSource(
|
|
344
|
+
[makeIntVector([100, 200, 300])],
|
|
345
|
+
[makeAxisSpec("x", "X")],
|
|
346
|
+
);
|
|
347
|
+
|
|
348
|
+
const filePath = temporaryFilePath("concurrent.csv");
|
|
349
|
+
|
|
350
|
+
// Start download (slow, many chunks)
|
|
351
|
+
const downloadPromise = downloadPTableFromSource(slowDownloadPTable, {
|
|
352
|
+
path: filePath,
|
|
353
|
+
format: "csv",
|
|
354
|
+
columnIndices: [0],
|
|
355
|
+
chunkSize: 100,
|
|
356
|
+
});
|
|
357
|
+
|
|
358
|
+
// Concurrently call getData on the fast table — should resolve immediately
|
|
359
|
+
const getDataStartTime = performance.now();
|
|
360
|
+
const getDataResult = await fastPTable.getData([0], { range: { offset: 0, length: 3 } });
|
|
361
|
+
const getDataDuration = performance.now() - getDataStartTime;
|
|
362
|
+
|
|
363
|
+
// getData should complete nearly instantly (< 50ms), well before download finishes
|
|
364
|
+
expect(getDataDuration).toBeLessThan(50);
|
|
365
|
+
expect(vectorLength(getDataResult[0])).toBe(3);
|
|
366
|
+
|
|
367
|
+
// Wait for download to complete
|
|
368
|
+
const downloadResult = await downloadPromise;
|
|
369
|
+
expect(downloadResult.rowsWritten).toBe(largeRowCount);
|
|
370
|
+
|
|
371
|
+
// Download should have made multiple getData calls (2000 rows / 100 chunk = 20)
|
|
372
|
+
expect(downloadGetDataCallCount).toBe(20);
|
|
373
|
+
});
|
|
374
|
+
});
|
|
375
|
+
|
|
376
|
+
// ── Test infrastructure ─────────────────────────────────────────────
|
|
377
|
+
|
|
378
|
+
/** Temporary files created during tests, cleaned up in afterEach. */
|
|
379
|
+
const temporaryFiles: string[] = [];
|
|
380
|
+
|
|
381
|
+
afterEach(async () => {
|
|
382
|
+
for (const filePath of temporaryFiles) {
|
|
383
|
+
await fs.promises.unlink(filePath).catch(() => {});
|
|
384
|
+
await fs.promises.unlink(filePath + ".part").catch(() => {});
|
|
385
|
+
}
|
|
386
|
+
temporaryFiles.length = 0;
|
|
387
|
+
});
|
|
388
|
+
|
|
389
|
+
function temporaryFilePath(name: string): string {
|
|
390
|
+
const filePath = path.join(os.tmpdir(), `pf-driver-test-${Date.now()}-${name}`);
|
|
391
|
+
temporaryFiles.push(filePath);
|
|
392
|
+
return filePath;
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
// ── Spec / vector builders (mirrors csv_writer.test.ts) ─────────────
|
|
396
|
+
|
|
397
|
+
function makeAxisSpec(name: string, label?: string): PTableColumnSpec {
|
|
398
|
+
const annotations: Record<string, string> = {};
|
|
399
|
+
if (label !== undefined) {
|
|
400
|
+
annotations["pl7.app/label"] = label;
|
|
401
|
+
}
|
|
402
|
+
return {
|
|
403
|
+
type: "axis",
|
|
404
|
+
id: { name, type: "Int" },
|
|
405
|
+
spec: { name, type: "Int", annotations },
|
|
406
|
+
} as PTableColumnSpec;
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
function makeColumnSpec(name: string, valueType: string, label?: string): PTableColumnSpec {
|
|
410
|
+
const annotations: Record<string, string> = {};
|
|
411
|
+
if (label !== undefined) {
|
|
412
|
+
annotations["pl7.app/label"] = label;
|
|
413
|
+
}
|
|
414
|
+
return {
|
|
415
|
+
type: "column",
|
|
416
|
+
id: `col:${name}` as PObjectId,
|
|
417
|
+
spec: {
|
|
418
|
+
kind: "PColumn",
|
|
419
|
+
name,
|
|
420
|
+
valueType,
|
|
421
|
+
axesSpec: [],
|
|
422
|
+
annotations,
|
|
423
|
+
},
|
|
424
|
+
} as PTableColumnSpec;
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
function makeIntVector(values: number[]): PTableVector {
|
|
428
|
+
return { type: ValueType.Int, data: new Int32Array(values) };
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
function makeDoubleVector(values: number[]): PTableVector {
|
|
432
|
+
return { type: ValueType.Double, data: new Float64Array(values) };
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
function makeLongVector(values: bigint[]): PTableVector {
|
|
436
|
+
return { type: ValueType.Long, data: new BigInt64Array(values) };
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
function makeStringVector(values: (null | string)[]): PTableVector {
|
|
440
|
+
return { type: ValueType.String, data: values };
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
/**
|
|
444
|
+
* Build a mock PTableDataSource that returns data slice-by-slice
|
|
445
|
+
* by reading from full-length column vectors according to the requested range.
|
|
446
|
+
*/
|
|
447
|
+
function makePTableDataSource(
|
|
448
|
+
fullVectors: PTableVector[],
|
|
449
|
+
specs: PTableColumnSpec[],
|
|
450
|
+
): PTableDataSource & { getSpec(): PTableColumnSpec[]; getShape(): { rows: number } } {
|
|
451
|
+
const rowCount = vectorLength(fullVectors[0]);
|
|
452
|
+
return {
|
|
453
|
+
getData: async (
|
|
454
|
+
columnIndices: number[],
|
|
455
|
+
options?: { range?: TableRange; signal?: AbortSignal },
|
|
456
|
+
) => {
|
|
457
|
+
options?.signal?.throwIfAborted();
|
|
458
|
+
const range = isNil(options?.range) ? { offset: 0, length: rowCount } : options!.range!;
|
|
459
|
+
return columnIndices.map((columnIndex) => sliceVector(fullVectors[columnIndex], range));
|
|
460
|
+
},
|
|
461
|
+
getSpec: () => specs,
|
|
462
|
+
getShape: () => ({ rows: rowCount }),
|
|
463
|
+
};
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
// ── Download pipeline (mirrors driver_impl.ts downloadPTable logic) ─
|
|
467
|
+
|
|
468
|
+
interface DownloadOptions {
|
|
469
|
+
path: string;
|
|
470
|
+
format: "csv" | "tsv";
|
|
471
|
+
columnIndices: number[];
|
|
472
|
+
range?: TableRange;
|
|
473
|
+
chunkSize?: number;
|
|
474
|
+
includeHeader?: boolean;
|
|
475
|
+
bom?: boolean;
|
|
476
|
+
signal?: AbortSignal;
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
interface DownloadResult {
|
|
480
|
+
path: string;
|
|
481
|
+
rowsWritten: number;
|
|
482
|
+
bytesWritten: number;
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
/**
|
|
486
|
+
* Replicates the exact file I/O pipeline from driver_impl.ts downloadPTable,
|
|
487
|
+
* but using a mock PTableDataSource instead of the full pool machinery.
|
|
488
|
+
*/
|
|
489
|
+
async function downloadPTableFromSource(
|
|
490
|
+
pTable: PTableDataSource & { getSpec(): PTableColumnSpec[]; getShape(): { rows: number } },
|
|
491
|
+
options: DownloadOptions,
|
|
492
|
+
): Promise<DownloadResult> {
|
|
493
|
+
const shape = pTable.getShape();
|
|
494
|
+
const effectiveRange = clipRange(options.range, shape);
|
|
495
|
+
const specs = pTable.getSpec();
|
|
496
|
+
const separator = options.format === "tsv" ? "\t" : ",";
|
|
497
|
+
|
|
498
|
+
const iterable = streamPTableRows({
|
|
499
|
+
pTable,
|
|
500
|
+
columnIndices: options.columnIndices,
|
|
501
|
+
range: effectiveRange,
|
|
502
|
+
chunkSize: options.chunkSize ?? 50_000,
|
|
503
|
+
separator,
|
|
504
|
+
signal: options.signal,
|
|
505
|
+
specs,
|
|
506
|
+
includeHeader: options.includeHeader ?? true,
|
|
507
|
+
bom: options.bom ?? true,
|
|
508
|
+
});
|
|
509
|
+
|
|
510
|
+
const noopLogger: MiLogger = {
|
|
511
|
+
info: () => {},
|
|
512
|
+
warn: () => {},
|
|
513
|
+
error: () => {},
|
|
514
|
+
};
|
|
515
|
+
|
|
516
|
+
let bytesWritten = 0;
|
|
517
|
+
await createPathAtomically(noopLogger, options.path, async (tempPath) => {
|
|
518
|
+
const writeStream = fs.createWriteStream(tempPath, { flags: "wx" });
|
|
519
|
+
await pipeline(Readable.from(iterable, { objectMode: false }), writeStream, {
|
|
520
|
+
signal: options.signal,
|
|
521
|
+
});
|
|
522
|
+
bytesWritten = writeStream.bytesWritten;
|
|
523
|
+
});
|
|
524
|
+
|
|
525
|
+
return {
|
|
526
|
+
path: options.path,
|
|
527
|
+
rowsWritten: effectiveRange.length,
|
|
528
|
+
bytesWritten,
|
|
529
|
+
};
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
function clipRange(range: undefined | TableRange, shape: { rows: number }): TableRange {
|
|
533
|
+
if (isNil(range)) {
|
|
534
|
+
return { offset: 0, length: shape.rows };
|
|
535
|
+
}
|
|
536
|
+
const clampedOffset = Math.min(range.offset, shape.rows);
|
|
537
|
+
const clampedLength = Math.min(range.length, shape.rows - clampedOffset);
|
|
538
|
+
return { offset: clampedOffset, length: clampedLength };
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
// ── CSV parser (fast-csv wrapper) ──────────────────────────────────
|
|
542
|
+
|
|
543
|
+
interface ParsedCsv {
|
|
544
|
+
rows: string[][];
|
|
545
|
+
}
|
|
546
|
+
|
|
547
|
+
function parseCsv(content: string, separator: string): Promise<ParsedCsv> {
|
|
548
|
+
// Strip BOM if present
|
|
549
|
+
const stripped = content.charCodeAt(0) === 0xfeff ? content.slice(1) : content;
|
|
550
|
+
|
|
551
|
+
return new Promise<ParsedCsv>((resolve, reject) => {
|
|
552
|
+
const rows: string[][] = [];
|
|
553
|
+
parseString(stripped, { headers: false, delimiter: separator })
|
|
554
|
+
.on("data", (row: string[]) => rows.push(row))
|
|
555
|
+
.on("end", () => resolve({ rows }))
|
|
556
|
+
.on("error", reject);
|
|
557
|
+
});
|
|
558
|
+
}
|
|
559
|
+
|
|
560
|
+
// ── Vector helpers ──────────────────────────────────────────────────
|
|
561
|
+
|
|
562
|
+
function vectorLength(vector: PTableVector): number {
|
|
563
|
+
return vector.data.length;
|
|
564
|
+
}
|
|
565
|
+
|
|
566
|
+
function sliceVector(vector: PTableVector, range: TableRange): PTableVector {
|
|
567
|
+
const { offset, length } = range;
|
|
568
|
+
switch (vector.type) {
|
|
569
|
+
case ValueType.Int:
|
|
570
|
+
return {
|
|
571
|
+
type: vector.type,
|
|
572
|
+
data: (vector.data as Int32Array).slice(offset, offset + length),
|
|
573
|
+
};
|
|
574
|
+
case ValueType.Double:
|
|
575
|
+
case ValueType.Float:
|
|
576
|
+
return {
|
|
577
|
+
type: vector.type,
|
|
578
|
+
data: (vector.data as Float64Array).slice(offset, offset + length),
|
|
579
|
+
};
|
|
580
|
+
case ValueType.Long:
|
|
581
|
+
return {
|
|
582
|
+
type: vector.type,
|
|
583
|
+
data: (vector.data as BigInt64Array).slice(offset, offset + length),
|
|
584
|
+
};
|
|
585
|
+
case ValueType.String:
|
|
586
|
+
return {
|
|
587
|
+
type: vector.type,
|
|
588
|
+
data: (vector.data as (null | string)[]).slice(offset, offset + length),
|
|
589
|
+
};
|
|
590
|
+
default:
|
|
591
|
+
return { type: vector.type, data: vector.data.slice(offset, offset + length) };
|
|
592
|
+
}
|
|
593
|
+
}
|
|
594
|
+
|
|
595
|
+
/**
|
|
596
|
+
* Serialize a PTableVector cell to its expected CSV string representation.
|
|
597
|
+
* Mirrors the serialization in csv_writer.ts serializeValue.
|
|
598
|
+
*/
|
|
599
|
+
function expectedCellString(vector: PTableVector, rowIndex: number): string {
|
|
600
|
+
const rawValue = vector.data[rowIndex];
|
|
601
|
+
if (isNil(rawValue)) return "";
|
|
602
|
+
switch (vector.type) {
|
|
603
|
+
case ValueType.Long:
|
|
604
|
+
return String(rawValue);
|
|
605
|
+
case ValueType.Float:
|
|
606
|
+
case ValueType.Double: {
|
|
607
|
+
const numeric = rawValue as number;
|
|
608
|
+
return Number.isNaN(numeric) || !Number.isFinite(numeric) ? "" : String(numeric);
|
|
609
|
+
}
|
|
610
|
+
case ValueType.Int:
|
|
611
|
+
return String(rawValue);
|
|
612
|
+
case ValueType.String:
|
|
613
|
+
return rawValue as string;
|
|
614
|
+
default:
|
|
615
|
+
return String(rawValue);
|
|
616
|
+
}
|
|
617
|
+
}
|