parquetlens 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +87 -0
- package/dist/chunk-2RGMZZ7F.js +123 -0
- package/dist/chunk-2RGMZZ7F.js.map +1 -0
- package/dist/chunk-3N45GGD2.js +113 -0
- package/dist/chunk-3N45GGD2.js.map +1 -0
- package/dist/chunk-AYPIRAOL.js +112 -0
- package/dist/chunk-AYPIRAOL.js.map +1 -0
- package/dist/chunk-IMVXDI4K.js +112 -0
- package/dist/chunk-IMVXDI4K.js.map +1 -0
- package/dist/chunk-NRRDNC7S.js +485 -0
- package/dist/chunk-NRRDNC7S.js.map +1 -0
- package/dist/main.js +16 -6
- package/dist/main.js.map +1 -1
- package/dist/tui.js +87 -16
- package/dist/tui.js.map +1 -1
- package/package.json +6 -4
|
@@ -0,0 +1,485 @@
|
|
|
1
|
+
import { createRequire } from 'module';
|
|
2
|
+
import { fileURLToPath } from 'url';
|
|
3
|
+
import { dirname } from 'path';
|
|
4
|
+
const require = createRequire(import.meta.url);
|
|
5
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
6
|
+
const __dirname = dirname(__filename);
|
|
7
|
+
|
|
8
|
+
// ../../packages/parquet-reader/dist/index.js
|
|
9
|
+
import { createWriteStream, readFileSync, rmSync } from "fs";
|
|
10
|
+
import { promises as fs } from "fs";
|
|
11
|
+
import { randomUUID } from "crypto";
|
|
12
|
+
import { spawnSync } from "child_process";
|
|
13
|
+
import { Buffer } from "buffer";
|
|
14
|
+
import { createRequire as nodeCreateRequire } from "module";
|
|
15
|
+
import { tmpdir } from "os";
|
|
16
|
+
import path from "path";
|
|
17
|
+
import { pipeline } from "stream/promises";
|
|
18
|
+
import {
|
|
19
|
+
DuckDBAccessMode,
|
|
20
|
+
DuckDBDataProtocol,
|
|
21
|
+
FileFlags,
|
|
22
|
+
NODE_RUNTIME,
|
|
23
|
+
VoidLogger,
|
|
24
|
+
createDuckDB,
|
|
25
|
+
failWith,
|
|
26
|
+
readString
|
|
27
|
+
} from "@duckdb/duckdb-wasm/blocking";
|
|
28
|
+
function resolveParquetUrl(input) {
|
|
29
|
+
if (input.startsWith("hf://")) {
|
|
30
|
+
return resolveHuggingFaceUrl(input);
|
|
31
|
+
}
|
|
32
|
+
if (input.startsWith("http://") || input.startsWith("https://")) {
|
|
33
|
+
return { url: input };
|
|
34
|
+
}
|
|
35
|
+
return null;
|
|
36
|
+
}
|
|
37
|
+
function resolveHuggingFaceUrl(input) {
|
|
38
|
+
const match = input.match(/^hf:\/\/(datasets|models)\/([^@\/]+)\/([^@\/]+)(?:@([^\/]+))?\/(.+)$/);
|
|
39
|
+
if (!match) {
|
|
40
|
+
throw new Error(`Invalid hf:// URL: ${input}`);
|
|
41
|
+
}
|
|
42
|
+
const [, type, user, repo, branch = "main", filePath] = match;
|
|
43
|
+
return {
|
|
44
|
+
url: `https://huggingface.co/${type}/${user}/${repo}/resolve/${branch}/${filePath}`
|
|
45
|
+
};
|
|
46
|
+
}
|
|
47
|
+
var duckDbPromise = null;
|
|
48
|
+
var httpRuntimePatched = false;
|
|
49
|
+
var httpBuffers = /* @__PURE__ */ new Map();
|
|
50
|
+
async function getDuckDb() {
|
|
51
|
+
if (!duckDbPromise) {
|
|
52
|
+
duckDbPromise = (async () => {
|
|
53
|
+
ensureHttpRuntimeSupport();
|
|
54
|
+
const bundles = getDuckDbBundles();
|
|
55
|
+
const db = await createDuckDB(bundles, new VoidLogger(), NODE_RUNTIME);
|
|
56
|
+
await db.instantiate();
|
|
57
|
+
db.open({
|
|
58
|
+
accessMode: DuckDBAccessMode.READ_WRITE,
|
|
59
|
+
filesystem: {
|
|
60
|
+
allowFullHTTPReads: true
|
|
61
|
+
}
|
|
62
|
+
});
|
|
63
|
+
return db;
|
|
64
|
+
})();
|
|
65
|
+
}
|
|
66
|
+
return duckDbPromise;
|
|
67
|
+
}
|
|
68
|
+
function getDuckDbBundles() {
|
|
69
|
+
const localRequire = nodeCreateRequire(import.meta.url);
|
|
70
|
+
const mvpModule = localRequire.resolve("@duckdb/duckdb-wasm/dist/duckdb-mvp.wasm");
|
|
71
|
+
const mvpWorker = localRequire.resolve("@duckdb/duckdb-wasm/dist/duckdb-node-mvp.worker.cjs");
|
|
72
|
+
const ehModule = localRequire.resolve("@duckdb/duckdb-wasm/dist/duckdb-eh.wasm");
|
|
73
|
+
const ehWorker = localRequire.resolve("@duckdb/duckdb-wasm/dist/duckdb-node-eh.worker.cjs");
|
|
74
|
+
return {
|
|
75
|
+
mvp: {
|
|
76
|
+
mainModule: mvpModule,
|
|
77
|
+
mainWorker: mvpWorker
|
|
78
|
+
},
|
|
79
|
+
eh: {
|
|
80
|
+
mainModule: ehModule,
|
|
81
|
+
mainWorker: ehWorker
|
|
82
|
+
}
|
|
83
|
+
};
|
|
84
|
+
}
|
|
85
|
+
function ensureHttpRuntimeSupport() {
|
|
86
|
+
if (httpRuntimePatched) {
|
|
87
|
+
return;
|
|
88
|
+
}
|
|
89
|
+
httpRuntimePatched = true;
|
|
90
|
+
const nodeOpenFile = NODE_RUNTIME.openFile;
|
|
91
|
+
const nodeReadFile = NODE_RUNTIME.readFile;
|
|
92
|
+
const nodeCheckFile = NODE_RUNTIME.checkFile.bind(NODE_RUNTIME);
|
|
93
|
+
const nodeGlob = NODE_RUNTIME.glob.bind(NODE_RUNTIME);
|
|
94
|
+
const nodeCloseFile = NODE_RUNTIME.closeFile.bind(NODE_RUNTIME);
|
|
95
|
+
const nodeGetLastModified = NODE_RUNTIME.getLastFileModificationTime.bind(NODE_RUNTIME);
|
|
96
|
+
NODE_RUNTIME.openFile = (mod, fileId, flags) => {
|
|
97
|
+
const file = NODE_RUNTIME.resolveFileInfo(mod, fileId);
|
|
98
|
+
if (!file || file.dataProtocol !== DuckDBDataProtocol.HTTP) {
|
|
99
|
+
return nodeOpenFile(mod, fileId, flags);
|
|
100
|
+
}
|
|
101
|
+
if (flags & FileFlags.FILE_FLAGS_WRITE || flags & FileFlags.FILE_FLAGS_APPEND) {
|
|
102
|
+
failWith(mod, `Opening file ${file.fileName} failed: HTTP writes are not supported`);
|
|
103
|
+
return 0;
|
|
104
|
+
}
|
|
105
|
+
if (!(flags & FileFlags.FILE_FLAGS_READ)) {
|
|
106
|
+
failWith(mod, `Opening file ${file.fileName} failed: unsupported file flags: ${flags}`);
|
|
107
|
+
return 0;
|
|
108
|
+
}
|
|
109
|
+
if (!file.dataUrl) {
|
|
110
|
+
failWith(mod, `Opening file ${file.fileName} failed: missing data URL`);
|
|
111
|
+
return 0;
|
|
112
|
+
}
|
|
113
|
+
const allowFull = file.allowFullHttpReads ?? true;
|
|
114
|
+
const forceFull = file.forceFullHttpReads ?? false;
|
|
115
|
+
if (!forceFull) {
|
|
116
|
+
try {
|
|
117
|
+
const probe = requestHttpRange(file.dataUrl, 0, 0);
|
|
118
|
+
if (probe.status === 206) {
|
|
119
|
+
const total = parseContentRangeTotal(probe.headers["content-range"]) ?? parseContentLength(probe.headers["content-length"]);
|
|
120
|
+
if (total !== null) {
|
|
121
|
+
return buildOpenResult(mod, total, 0);
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
if (probe.status === 200 && allowFull) {
|
|
125
|
+
const dataPtr = writeResponseToHeap(mod, probe.bytes);
|
|
126
|
+
httpBuffers.set(fileId, { dataPtr, size: probe.bytes.length });
|
|
127
|
+
return buildOpenResult(mod, probe.bytes.length, dataPtr);
|
|
128
|
+
}
|
|
129
|
+
} catch (error) {
|
|
130
|
+
if (!allowFull) {
|
|
131
|
+
failWith(mod, `Opening file ${file.fileName} failed: ${String(error)}`);
|
|
132
|
+
return 0;
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
if (allowFull) {
|
|
137
|
+
try {
|
|
138
|
+
const full = requestHttp(file.dataUrl);
|
|
139
|
+
if (full.status === 200) {
|
|
140
|
+
const dataPtr = writeResponseToHeap(mod, full.bytes);
|
|
141
|
+
httpBuffers.set(fileId, { dataPtr, size: full.bytes.length });
|
|
142
|
+
return buildOpenResult(mod, full.bytes.length, dataPtr);
|
|
143
|
+
}
|
|
144
|
+
} catch (error) {
|
|
145
|
+
failWith(mod, `Opening file ${file.fileName} failed: ${String(error)}`);
|
|
146
|
+
return 0;
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
failWith(mod, `Opening file ${file.fileName} failed: HTTP range requests unavailable`);
|
|
150
|
+
return 0;
|
|
151
|
+
};
|
|
152
|
+
NODE_RUNTIME.readFile = (mod, fileId, buffer, bytes, location) => {
|
|
153
|
+
if (bytes === 0) {
|
|
154
|
+
return 0;
|
|
155
|
+
}
|
|
156
|
+
const file = NODE_RUNTIME.resolveFileInfo(mod, fileId);
|
|
157
|
+
if (!file || file.dataProtocol !== DuckDBDataProtocol.HTTP) {
|
|
158
|
+
return nodeReadFile(mod, fileId, buffer, bytes, location);
|
|
159
|
+
}
|
|
160
|
+
const cached = httpBuffers.get(fileId);
|
|
161
|
+
if (cached) {
|
|
162
|
+
const sliceStart = Math.max(0, location);
|
|
163
|
+
const sliceEnd = Math.min(cached.size, location + bytes);
|
|
164
|
+
const length = Math.max(0, sliceEnd - sliceStart);
|
|
165
|
+
if (length > 0) {
|
|
166
|
+
const src = mod.HEAPU8.subarray(cached.dataPtr + sliceStart, cached.dataPtr + sliceEnd);
|
|
167
|
+
mod.HEAPU8.set(src, buffer);
|
|
168
|
+
}
|
|
169
|
+
return length;
|
|
170
|
+
}
|
|
171
|
+
if (!file.dataUrl) {
|
|
172
|
+
failWith(mod, `Reading file ${file.fileName} failed: missing data URL`);
|
|
173
|
+
return 0;
|
|
174
|
+
}
|
|
175
|
+
try {
|
|
176
|
+
const response = requestHttpRange(file.dataUrl, location, location + bytes - 1);
|
|
177
|
+
if (response.status === 206 || response.status === 200 && location === 0) {
|
|
178
|
+
const length = Math.min(bytes, response.bytes.length);
|
|
179
|
+
if (length > 0) {
|
|
180
|
+
mod.HEAPU8.set(response.bytes.subarray(0, length), buffer);
|
|
181
|
+
}
|
|
182
|
+
return length;
|
|
183
|
+
}
|
|
184
|
+
failWith(mod, `Reading file ${file.fileName} failed with HTTP ${response.status}`);
|
|
185
|
+
return 0;
|
|
186
|
+
} catch (error) {
|
|
187
|
+
failWith(mod, `Reading file ${file.fileName} failed: ${String(error)}`);
|
|
188
|
+
return 0;
|
|
189
|
+
}
|
|
190
|
+
};
|
|
191
|
+
NODE_RUNTIME.checkFile = (mod, pathPtr, pathLen) => {
|
|
192
|
+
const path2 = readString(mod, pathPtr, pathLen);
|
|
193
|
+
if (isHttpUrl(path2)) {
|
|
194
|
+
const response = requestHttpHead(path2);
|
|
195
|
+
return response.status === 200 || response.status === 206;
|
|
196
|
+
}
|
|
197
|
+
return nodeCheckFile(mod, pathPtr, pathLen);
|
|
198
|
+
};
|
|
199
|
+
NODE_RUNTIME.glob = (mod, pathPtr, pathLen) => {
|
|
200
|
+
const path2 = readString(mod, pathPtr, pathLen);
|
|
201
|
+
if (isHttpUrl(path2)) {
|
|
202
|
+
const response = requestHttpHead(path2);
|
|
203
|
+
if (response.status === 200 || response.status === 206) {
|
|
204
|
+
mod.ccall("duckdb_web_fs_glob_add_path", null, ["string"], [path2]);
|
|
205
|
+
}
|
|
206
|
+
return;
|
|
207
|
+
}
|
|
208
|
+
return nodeGlob(mod, pathPtr, pathLen);
|
|
209
|
+
};
|
|
210
|
+
NODE_RUNTIME.closeFile = (mod, fileId) => {
|
|
211
|
+
const cached = httpBuffers.get(fileId);
|
|
212
|
+
if (cached) {
|
|
213
|
+
if (cached.dataPtr) {
|
|
214
|
+
mod._free(cached.dataPtr);
|
|
215
|
+
}
|
|
216
|
+
httpBuffers.delete(fileId);
|
|
217
|
+
}
|
|
218
|
+
nodeCloseFile(mod, fileId);
|
|
219
|
+
};
|
|
220
|
+
NODE_RUNTIME.getLastFileModificationTime = (mod, fileId) => {
|
|
221
|
+
const file = NODE_RUNTIME.resolveFileInfo(mod, fileId);
|
|
222
|
+
if (file?.dataProtocol === DuckDBDataProtocol.HTTP) {
|
|
223
|
+
return Date.now() / 1e3;
|
|
224
|
+
}
|
|
225
|
+
return nodeGetLastModified(mod, fileId);
|
|
226
|
+
};
|
|
227
|
+
}
|
|
228
|
+
function isHttpUrl(value) {
|
|
229
|
+
return value.startsWith("http://") || value.startsWith("https://");
|
|
230
|
+
}
|
|
231
|
+
function buildOpenResult(mod, size, dataPtr) {
|
|
232
|
+
const result = mod._malloc(2 * 8);
|
|
233
|
+
mod.HEAPF64[(result >> 3) + 0] = +size;
|
|
234
|
+
mod.HEAPF64[(result >> 3) + 1] = dataPtr;
|
|
235
|
+
return result;
|
|
236
|
+
}
|
|
237
|
+
function writeResponseToHeap(mod, bytes) {
|
|
238
|
+
const dataPtr = mod._malloc(bytes.byteLength);
|
|
239
|
+
mod.HEAPU8.set(bytes, dataPtr);
|
|
240
|
+
return dataPtr;
|
|
241
|
+
}
|
|
242
|
+
function parseContentRangeTotal(contentRange) {
|
|
243
|
+
if (!contentRange) {
|
|
244
|
+
return null;
|
|
245
|
+
}
|
|
246
|
+
const [, total] = contentRange.split("/");
|
|
247
|
+
if (!total) {
|
|
248
|
+
return null;
|
|
249
|
+
}
|
|
250
|
+
const parsed = Number.parseInt(total, 10);
|
|
251
|
+
return Number.isFinite(parsed) ? parsed : null;
|
|
252
|
+
}
|
|
253
|
+
function parseContentLength(contentLength) {
|
|
254
|
+
if (!contentLength) {
|
|
255
|
+
return null;
|
|
256
|
+
}
|
|
257
|
+
const parsed = Number.parseInt(contentLength, 10);
|
|
258
|
+
return Number.isFinite(parsed) ? parsed : null;
|
|
259
|
+
}
|
|
260
|
+
function requestHttp(url) {
|
|
261
|
+
return requestCurl([url]);
|
|
262
|
+
}
|
|
263
|
+
function requestHttpHead(url) {
|
|
264
|
+
return requestCurl(["-I", url]);
|
|
265
|
+
}
|
|
266
|
+
function requestHttpRange(url, start, end) {
|
|
267
|
+
return requestCurl(["-r", `${start}-${end}`, url]);
|
|
268
|
+
}
|
|
269
|
+
function requestCurl(args) {
|
|
270
|
+
const tempPath = path.join(tmpdir(), `parquetlens-http-${randomUUID()}`);
|
|
271
|
+
try {
|
|
272
|
+
const result = spawnSync("curl", ["-sS", "-L", "-D", "-", "-o", tempPath, ...args], {
|
|
273
|
+
encoding: "buffer",
|
|
274
|
+
maxBuffer: 4 * 1024 * 1024
|
|
275
|
+
});
|
|
276
|
+
if (result.error) {
|
|
277
|
+
if (result.error.code === "ENOENT") {
|
|
278
|
+
throw new Error("curl not found (required for HTTP range reads)");
|
|
279
|
+
}
|
|
280
|
+
throw result.error;
|
|
281
|
+
}
|
|
282
|
+
if (result.status !== 0) {
|
|
283
|
+
const stderr = result.stderr?.toString("utf8").trim();
|
|
284
|
+
throw new Error(stderr || "curl failed");
|
|
285
|
+
}
|
|
286
|
+
const body = readFileSync(tempPath);
|
|
287
|
+
return parseCurlResponse(Buffer.from(result.stdout ?? []), body);
|
|
288
|
+
} finally {
|
|
289
|
+
rmSync(tempPath, { force: true });
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
function parseCurlResponse(headersBuffer, body) {
|
|
293
|
+
const headerBlob = headersBuffer.toString("latin1");
|
|
294
|
+
const blocks = headerBlob.split(/\r\n\r\n/).filter(Boolean);
|
|
295
|
+
const lastBlock = blocks[blocks.length - 1] ?? "";
|
|
296
|
+
const lines = lastBlock.split(/\r\n/).filter(Boolean);
|
|
297
|
+
const statusLine = lines.shift() ?? "";
|
|
298
|
+
const statusToken = statusLine.split(" ")[1] ?? "";
|
|
299
|
+
const status = Number.parseInt(statusToken, 10);
|
|
300
|
+
const headers = {};
|
|
301
|
+
for (const line of lines) {
|
|
302
|
+
const index = line.indexOf(":");
|
|
303
|
+
if (index === -1) {
|
|
304
|
+
continue;
|
|
305
|
+
}
|
|
306
|
+
const key = line.slice(0, index).trim().toLowerCase();
|
|
307
|
+
const value = line.slice(index + 1).trim();
|
|
308
|
+
headers[key] = value;
|
|
309
|
+
}
|
|
310
|
+
return {
|
|
311
|
+
status: Number.isFinite(status) ? status : 0,
|
|
312
|
+
bytes: new Uint8Array(body),
|
|
313
|
+
headers
|
|
314
|
+
};
|
|
315
|
+
}
|
|
316
|
+
async function openParquetSourceFromPath(filePath) {
|
|
317
|
+
const db = await getDuckDb();
|
|
318
|
+
const conn = db.connect();
|
|
319
|
+
const fileName = buildDuckDbFileName(filePath);
|
|
320
|
+
db.registerFileURL(fileName, filePath, DuckDBDataProtocol.NODE_FS, true);
|
|
321
|
+
return createParquetSource(db, conn, fileName);
|
|
322
|
+
}
|
|
323
|
+
async function openParquetSourceFromUrl(input) {
|
|
324
|
+
const resolved = resolveParquetUrl(input);
|
|
325
|
+
if (!resolved) {
|
|
326
|
+
throw new Error("Not a URL");
|
|
327
|
+
}
|
|
328
|
+
const db = await getDuckDb();
|
|
329
|
+
const conn = db.connect();
|
|
330
|
+
const fileName = buildDuckDbFileName(resolved.url);
|
|
331
|
+
db.registerFileURL(fileName, resolved.url, DuckDBDataProtocol.HTTP, true);
|
|
332
|
+
return createParquetSource(db, conn, fileName);
|
|
333
|
+
}
|
|
334
|
+
async function openParquetSource(input) {
|
|
335
|
+
const resolved = resolveParquetUrl(input);
|
|
336
|
+
if (resolved) {
|
|
337
|
+
return openParquetSourceFromUrl(input);
|
|
338
|
+
}
|
|
339
|
+
return openParquetSourceFromPath(input);
|
|
340
|
+
}
|
|
341
|
+
async function readParquetTableFromPath(filePath, options) {
|
|
342
|
+
const source = await openParquetSourceFromPath(filePath);
|
|
343
|
+
try {
|
|
344
|
+
return await source.readTable(options);
|
|
345
|
+
} finally {
|
|
346
|
+
await source.close();
|
|
347
|
+
}
|
|
348
|
+
}
|
|
349
|
+
async function readParquetTableFromUrl(input, options) {
|
|
350
|
+
const source = await openParquetSourceFromUrl(input);
|
|
351
|
+
try {
|
|
352
|
+
return await source.readTable(options);
|
|
353
|
+
} finally {
|
|
354
|
+
await source.close();
|
|
355
|
+
}
|
|
356
|
+
}
|
|
357
|
+
async function bufferStdinToTempFile(filenameHint = "stdin.parquet") {
|
|
358
|
+
const tempDir = await fs.mkdtemp(path.join(tmpdir(), "parquetlens-"));
|
|
359
|
+
const safeName = filenameHint.replace(/[\\/]/g, "_");
|
|
360
|
+
const filePath = path.join(tempDir, `${randomUUID()}-${safeName}`);
|
|
361
|
+
const writeStream = createWriteStream(filePath);
|
|
362
|
+
await pipeline(process.stdin, writeStream);
|
|
363
|
+
return {
|
|
364
|
+
path: filePath,
|
|
365
|
+
cleanup: async () => {
|
|
366
|
+
await fs.rm(tempDir, { recursive: true, force: true });
|
|
367
|
+
}
|
|
368
|
+
};
|
|
369
|
+
}
|
|
370
|
+
async function readParquetTableFromStdin(filenameHint = "stdin.parquet", options) {
|
|
371
|
+
const temp = await bufferStdinToTempFile(filenameHint);
|
|
372
|
+
try {
|
|
373
|
+
return await readParquetTableFromPath(temp.path, options);
|
|
374
|
+
} finally {
|
|
375
|
+
await temp.cleanup();
|
|
376
|
+
}
|
|
377
|
+
}
|
|
378
|
+
function createParquetSource(db, conn, fileName) {
|
|
379
|
+
let metadataPromise = null;
|
|
380
|
+
return {
|
|
381
|
+
readTable: async (options) => {
|
|
382
|
+
const query = buildSelectQuery(fileName, options);
|
|
383
|
+
return conn.query(query);
|
|
384
|
+
},
|
|
385
|
+
readMetadata: () => {
|
|
386
|
+
if (!metadataPromise) {
|
|
387
|
+
metadataPromise = readParquetMetadata(conn, fileName);
|
|
388
|
+
}
|
|
389
|
+
return metadataPromise;
|
|
390
|
+
},
|
|
391
|
+
close: async () => {
|
|
392
|
+
conn.close();
|
|
393
|
+
db.dropFile(fileName);
|
|
394
|
+
}
|
|
395
|
+
};
|
|
396
|
+
}
|
|
397
|
+
function buildDuckDbFileName(input) {
|
|
398
|
+
const suffix = path.extname(input) || ".parquet";
|
|
399
|
+
return `parquetlens-${randomUUID()}${suffix}`;
|
|
400
|
+
}
|
|
401
|
+
function buildSelectQuery(fileName, options) {
|
|
402
|
+
const columns = options?.columns && options.columns.length > 0 ? options.columns : null;
|
|
403
|
+
const selectList = columns ? columns.map(quoteIdentifier).join(", ") : "*";
|
|
404
|
+
const limit = options?.limit;
|
|
405
|
+
const offset = options?.offset;
|
|
406
|
+
let query = `select ${selectList} from read_parquet(${quoteLiteral(fileName)})`;
|
|
407
|
+
if (typeof limit === "number") {
|
|
408
|
+
query += ` limit ${Math.max(0, limit)}`;
|
|
409
|
+
}
|
|
410
|
+
if (typeof offset === "number" && offset > 0) {
|
|
411
|
+
query += ` offset ${Math.max(0, offset)}`;
|
|
412
|
+
}
|
|
413
|
+
return query;
|
|
414
|
+
}
|
|
415
|
+
function quoteIdentifier(value) {
|
|
416
|
+
return `"${value.replace(/"/g, '""')}"`;
|
|
417
|
+
}
|
|
418
|
+
function quoteLiteral(value) {
|
|
419
|
+
return `'${value.replace(/'/g, "''")}'`;
|
|
420
|
+
}
|
|
421
|
+
async function readParquetMetadata(conn, fileName) {
|
|
422
|
+
const metadataRows = tableToObjects(
|
|
423
|
+
conn.query(`select * from parquet_file_metadata(${quoteLiteral(fileName)})`)
|
|
424
|
+
);
|
|
425
|
+
const kvRows = tableToObjects(
|
|
426
|
+
conn.query(`select * from parquet_kv_metadata(${quoteLiteral(fileName)})`)
|
|
427
|
+
);
|
|
428
|
+
const createdByRaw = metadataRows[0]?.created_by ?? metadataRows[0]?.createdBy ?? null;
|
|
429
|
+
const keyValueMetadata = {};
|
|
430
|
+
for (const row of kvRows) {
|
|
431
|
+
const key = row.key ?? row.key_name ?? row.name;
|
|
432
|
+
if (typeof key !== "string" || key.length === 0) {
|
|
433
|
+
continue;
|
|
434
|
+
}
|
|
435
|
+
keyValueMetadata[key] = row.value ?? row.val ?? "";
|
|
436
|
+
}
|
|
437
|
+
return {
|
|
438
|
+
createdBy: normalizeMetadataValue(createdByRaw),
|
|
439
|
+
keyValueMetadata: normalizeMetadataValues(keyValueMetadata)
|
|
440
|
+
};
|
|
441
|
+
}
|
|
442
|
+
function tableToObjects(table) {
|
|
443
|
+
const fields = table.schema.fields.map((field) => field.name);
|
|
444
|
+
const rows = [];
|
|
445
|
+
for (const batch of table.batches) {
|
|
446
|
+
const vectors = fields.map((_, index) => batch.getChildAt(index));
|
|
447
|
+
for (let rowIndex = 0; rowIndex < batch.numRows; rowIndex += 1) {
|
|
448
|
+
const row = {};
|
|
449
|
+
for (let colIndex = 0; colIndex < fields.length; colIndex += 1) {
|
|
450
|
+
row[fields[colIndex]] = vectors[colIndex]?.get(rowIndex);
|
|
451
|
+
}
|
|
452
|
+
rows.push(row);
|
|
453
|
+
}
|
|
454
|
+
}
|
|
455
|
+
return rows;
|
|
456
|
+
}
|
|
457
|
+
function normalizeMetadataValue(value) {
|
|
458
|
+
if (value === null || value === void 0) {
|
|
459
|
+
return void 0;
|
|
460
|
+
}
|
|
461
|
+
if (value instanceof Uint8Array) {
|
|
462
|
+
return Buffer.from(value).toString("utf8");
|
|
463
|
+
}
|
|
464
|
+
if (typeof value === "string") {
|
|
465
|
+
return value;
|
|
466
|
+
}
|
|
467
|
+
return String(value);
|
|
468
|
+
}
|
|
469
|
+
function normalizeMetadataValues(input) {
|
|
470
|
+
const normalized = {};
|
|
471
|
+
for (const [key, value] of Object.entries(input)) {
|
|
472
|
+
const normalizedValue = normalizeMetadataValue(value);
|
|
473
|
+
normalized[key] = normalizedValue ?? "";
|
|
474
|
+
}
|
|
475
|
+
return normalized;
|
|
476
|
+
}
|
|
477
|
+
|
|
478
|
+
export {
|
|
479
|
+
resolveParquetUrl,
|
|
480
|
+
openParquetSource,
|
|
481
|
+
readParquetTableFromPath,
|
|
482
|
+
readParquetTableFromUrl,
|
|
483
|
+
readParquetTableFromStdin
|
|
484
|
+
};
|
|
485
|
+
//# sourceMappingURL=chunk-NRRDNC7S.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../../packages/parquet-reader/src/index.ts","../../../packages/parquet-reader/src/urls.ts"],"sourcesContent":["import { createWriteStream, readFileSync, rmSync } from \"node:fs\";\nimport { promises as fs } from \"node:fs\";\nimport { randomUUID } from \"node:crypto\";\nimport { spawnSync } from \"node:child_process\";\nimport { Buffer } from \"node:buffer\";\nimport { createRequire as nodeCreateRequire } from \"node:module\";\nimport { tmpdir } from \"node:os\";\nimport path from \"node:path\";\nimport { pipeline } from \"node:stream/promises\";\n\nimport type { Table } from \"apache-arrow\";\nimport {\n DuckDBAccessMode,\n DuckDBBundles,\n DuckDBConnection,\n DuckDBDataProtocol,\n DuckDBBindings,\n FileFlags,\n NODE_RUNTIME,\n VoidLogger,\n createDuckDB,\n failWith,\n readString,\n} from \"@duckdb/duckdb-wasm/blocking\";\nimport type { DuckDBModule } from \"@duckdb/duckdb-wasm/blocking\";\n\nimport { resolveParquetUrl } from \"./urls.js\";\n\nexport type TempParquetFile = {\n path: string;\n cleanup: () => Promise<void>;\n};\n\nexport type ParquetReadOptions = {\n batchSize?: number;\n columns?: string[];\n limit?: number;\n offset?: number;\n rowGroups?: number[];\n};\n\nexport type ParquetFileMetadata = {\n createdBy?: string;\n keyValueMetadata: Record<string, string>;\n};\n\nexport type ParquetSource = {\n readTable: (options?: ParquetReadOptions) => Promise<Table>;\n readMetadata: () => Promise<ParquetFileMetadata>;\n close: () => Promise<void>;\n};\n\nlet duckDbPromise: Promise<DuckDBBindings> | null = null;\nlet httpRuntimePatched = false;\n\ntype HttpBuffer = {\n dataPtr: number;\n size: number;\n};\n\nconst httpBuffers = new Map<number, HttpBuffer>();\n\ntype WasmModule = DuckDBModule & {\n HEAPU8: Uint8Array;\n HEAPF64: Float64Array;\n _malloc: (size: number) => number;\n _free: (ptr: number) => void;\n};\n\nasync function getDuckDb(): Promise<DuckDBBindings> {\n if (!duckDbPromise) {\n duckDbPromise = (async () => {\n ensureHttpRuntimeSupport();\n const bundles = getDuckDbBundles();\n const db = await createDuckDB(bundles, new VoidLogger(), NODE_RUNTIME);\n await db.instantiate();\n db.open({\n accessMode: DuckDBAccessMode.READ_WRITE,\n filesystem: {\n allowFullHTTPReads: true,\n },\n });\n return db;\n })();\n }\n\n return duckDbPromise;\n}\n\nfunction getDuckDbBundles(): DuckDBBundles {\n const localRequire = nodeCreateRequire(import.meta.url);\n const mvpModule = localRequire.resolve(\"@duckdb/duckdb-wasm/dist/duckdb-mvp.wasm\");\n const mvpWorker = localRequire.resolve(\"@duckdb/duckdb-wasm/dist/duckdb-node-mvp.worker.cjs\");\n const ehModule = localRequire.resolve(\"@duckdb/duckdb-wasm/dist/duckdb-eh.wasm\");\n const ehWorker = localRequire.resolve(\"@duckdb/duckdb-wasm/dist/duckdb-node-eh.worker.cjs\");\n\n return {\n mvp: {\n mainModule: mvpModule,\n mainWorker: mvpWorker,\n },\n eh: {\n mainModule: ehModule,\n mainWorker: ehWorker,\n },\n };\n}\n\nfunction ensureHttpRuntimeSupport(): void {\n if (httpRuntimePatched) {\n return;\n }\n httpRuntimePatched = true;\n\n const nodeOpenFile = NODE_RUNTIME.openFile as unknown as (\n mod: WasmModule,\n fileId: number,\n flags: FileFlags,\n ) => number;\n const nodeReadFile = NODE_RUNTIME.readFile as unknown as (\n mod: WasmModule,\n fileId: number,\n buffer: number,\n bytes: number,\n location: number,\n ) => number;\n const nodeCheckFile = NODE_RUNTIME.checkFile.bind(NODE_RUNTIME);\n const nodeGlob = NODE_RUNTIME.glob.bind(NODE_RUNTIME);\n const nodeCloseFile = NODE_RUNTIME.closeFile.bind(NODE_RUNTIME);\n const nodeGetLastModified = NODE_RUNTIME.getLastFileModificationTime.bind(NODE_RUNTIME);\n\n NODE_RUNTIME.openFile = (mod: WasmModule, fileId: number, flags: FileFlags): number => {\n const file = NODE_RUNTIME.resolveFileInfo(mod, fileId);\n if (!file || file.dataProtocol !== DuckDBDataProtocol.HTTP) {\n return nodeOpenFile(mod, fileId, flags);\n }\n\n if (flags & FileFlags.FILE_FLAGS_WRITE || flags & FileFlags.FILE_FLAGS_APPEND) {\n failWith(mod, `Opening file ${file.fileName} failed: HTTP writes are not supported`);\n return 0;\n }\n\n if (!(flags & FileFlags.FILE_FLAGS_READ)) {\n failWith(mod, `Opening file ${file.fileName} failed: unsupported file flags: ${flags}`);\n return 0;\n }\n\n if (!file.dataUrl) {\n failWith(mod, `Opening file ${file.fileName} failed: missing data URL`);\n return 0;\n }\n\n const allowFull = file.allowFullHttpReads ?? true;\n const forceFull = file.forceFullHttpReads ?? false;\n\n if (!forceFull) {\n try {\n const probe = requestHttpRange(file.dataUrl, 0, 0);\n if (probe.status === 206) {\n const total =\n parseContentRangeTotal(probe.headers[\"content-range\"]) ??\n parseContentLength(probe.headers[\"content-length\"]);\n if (total !== null) {\n return buildOpenResult(mod, total, 0);\n }\n }\n\n if (probe.status === 200 && allowFull) {\n const dataPtr = writeResponseToHeap(mod, probe.bytes);\n httpBuffers.set(fileId, { dataPtr, size: probe.bytes.length });\n return buildOpenResult(mod, probe.bytes.length, dataPtr);\n }\n } catch (error) {\n if (!allowFull) {\n failWith(mod, `Opening file ${file.fileName} failed: ${String(error)}`);\n return 0;\n }\n }\n }\n\n if (allowFull) {\n try {\n const full = requestHttp(file.dataUrl);\n if (full.status === 200) {\n const dataPtr = writeResponseToHeap(mod, full.bytes);\n httpBuffers.set(fileId, { dataPtr, size: full.bytes.length });\n return buildOpenResult(mod, full.bytes.length, dataPtr);\n }\n } catch (error) {\n failWith(mod, `Opening file ${file.fileName} failed: ${String(error)}`);\n return 0;\n }\n }\n\n failWith(mod, `Opening file ${file.fileName} failed: HTTP range requests unavailable`);\n return 0;\n };\n\n NODE_RUNTIME.readFile = (\n mod: WasmModule,\n fileId: number,\n buffer: number,\n bytes: number,\n location: number,\n ): number => {\n if (bytes === 0) {\n return 0;\n }\n\n const file = NODE_RUNTIME.resolveFileInfo(mod, fileId);\n if (!file || file.dataProtocol !== DuckDBDataProtocol.HTTP) {\n return nodeReadFile(mod, fileId, buffer, bytes, location);\n }\n\n const cached = httpBuffers.get(fileId);\n if (cached) {\n const sliceStart = Math.max(0, location);\n const sliceEnd = Math.min(cached.size, location + bytes);\n const length = Math.max(0, sliceEnd - sliceStart);\n if (length > 0) {\n const src = mod.HEAPU8.subarray(cached.dataPtr + sliceStart, cached.dataPtr + sliceEnd);\n mod.HEAPU8.set(src, buffer);\n }\n return length;\n }\n\n if (!file.dataUrl) {\n failWith(mod, `Reading file ${file.fileName} failed: missing data URL`);\n return 0;\n }\n\n try {\n const response = requestHttpRange(file.dataUrl, location, location + bytes - 1);\n if (response.status === 206 || (response.status === 200 && location === 0)) {\n const length = Math.min(bytes, response.bytes.length);\n if (length > 0) {\n mod.HEAPU8.set(response.bytes.subarray(0, length), buffer);\n }\n return length;\n }\n\n failWith(mod, `Reading file ${file.fileName} failed with HTTP ${response.status}`);\n return 0;\n } catch (error) {\n failWith(mod, `Reading file ${file.fileName} failed: ${String(error)}`);\n return 0;\n }\n };\n\n NODE_RUNTIME.checkFile = (mod: DuckDBModule, pathPtr: number, pathLen: number): boolean => {\n const path = readString(mod, pathPtr, pathLen);\n if (isHttpUrl(path)) {\n const response = requestHttpHead(path);\n return response.status === 200 || response.status === 206;\n }\n return nodeCheckFile(mod, pathPtr, pathLen);\n };\n\n NODE_RUNTIME.glob = (mod: DuckDBModule, pathPtr: number, pathLen: number): void => {\n const path = readString(mod, pathPtr, pathLen);\n if (isHttpUrl(path)) {\n const response = requestHttpHead(path);\n if (response.status === 200 || response.status === 206) {\n mod.ccall(\"duckdb_web_fs_glob_add_path\", null, [\"string\"], [path]);\n }\n return;\n }\n\n return nodeGlob(mod, pathPtr, pathLen);\n };\n\n NODE_RUNTIME.closeFile = (mod: DuckDBModule, fileId: number): void => {\n const cached = httpBuffers.get(fileId);\n if (cached) {\n if (cached.dataPtr) {\n (mod as WasmModule)._free(cached.dataPtr);\n }\n httpBuffers.delete(fileId);\n }\n nodeCloseFile(mod, fileId);\n };\n\n NODE_RUNTIME.getLastFileModificationTime = (mod: DuckDBModule, fileId: number): number => {\n const file = NODE_RUNTIME.resolveFileInfo(mod, fileId);\n if (file?.dataProtocol === DuckDBDataProtocol.HTTP) {\n return Date.now() / 1000;\n }\n return nodeGetLastModified(mod, fileId);\n };\n}\n\nfunction isHttpUrl(value: string): boolean {\n return value.startsWith(\"http://\") || value.startsWith(\"https://\");\n}\n\nfunction buildOpenResult(mod: WasmModule, size: number, dataPtr: number): number {\n const result = mod._malloc(2 * 8);\n mod.HEAPF64[(result >> 3) + 0] = +size;\n mod.HEAPF64[(result >> 3) + 1] = dataPtr;\n return result;\n}\n\nfunction writeResponseToHeap(mod: WasmModule, bytes: Uint8Array): number {\n const dataPtr = mod._malloc(bytes.byteLength);\n mod.HEAPU8.set(bytes, dataPtr);\n return dataPtr;\n}\n\nfunction parseContentRangeTotal(contentRange: string | null): number | null {\n if (!contentRange) {\n return null;\n }\n const [, total] = contentRange.split(\"/\");\n if (!total) {\n return null;\n }\n const parsed = Number.parseInt(total, 10);\n return Number.isFinite(parsed) ? parsed : null;\n}\n\nfunction parseContentLength(contentLength: string | null): number | null {\n if (!contentLength) {\n return null;\n }\n const parsed = Number.parseInt(contentLength, 10);\n return Number.isFinite(parsed) ? parsed : null;\n}\n\ntype HttpResponse = {\n status: number;\n bytes: Uint8Array;\n headers: Record<string, string>;\n};\n\nfunction requestHttp(url: string): HttpResponse {\n return requestCurl([url]);\n}\n\nfunction requestHttpHead(url: string): HttpResponse {\n return requestCurl([\"-I\", url]);\n}\n\nfunction requestHttpRange(url: string, start: number, end: number): HttpResponse {\n return requestCurl([\"-r\", `${start}-${end}`, url]);\n}\n\nfunction requestCurl(args: string[]): HttpResponse {\n const tempPath = path.join(tmpdir(), `parquetlens-http-${randomUUID()}`);\n try {\n const result = spawnSync(\"curl\", [\"-sS\", \"-L\", \"-D\", \"-\", \"-o\", tempPath, ...args], {\n encoding: \"buffer\",\n maxBuffer: 4 * 1024 * 1024,\n });\n\n if (result.error) {\n if ((result.error as NodeJS.ErrnoException).code === \"ENOENT\") {\n throw new Error(\"curl not found (required for HTTP range reads)\");\n }\n throw result.error;\n }\n if (result.status !== 0) {\n const stderr = result.stderr?.toString(\"utf8\").trim();\n throw new Error(stderr || \"curl failed\");\n }\n\n const body = readFileSync(tempPath);\n return parseCurlResponse(Buffer.from(result.stdout ?? []), body);\n } finally {\n rmSync(tempPath, { force: true });\n }\n}\n\nfunction parseCurlResponse(headersBuffer: Buffer, body: Buffer): HttpResponse {\n const headerBlob = headersBuffer.toString(\"latin1\");\n const blocks = headerBlob.split(/\\r\\n\\r\\n/).filter(Boolean);\n const lastBlock = blocks[blocks.length - 1] ?? \"\";\n const lines = lastBlock.split(/\\r\\n/).filter(Boolean);\n const statusLine = lines.shift() ?? \"\";\n const statusToken = statusLine.split(\" \")[1] ?? \"\";\n const status = Number.parseInt(statusToken, 10);\n const headers: Record<string, string> = {};\n\n for (const line of lines) {\n const index = line.indexOf(\":\");\n if (index === -1) {\n continue;\n }\n const key = line.slice(0, index).trim().toLowerCase();\n const value = line.slice(index + 1).trim();\n headers[key] = value;\n }\n\n return {\n status: Number.isFinite(status) ? status : 0,\n bytes: new Uint8Array(body),\n headers,\n };\n}\n\nexport async function openParquetSourceFromPath(filePath: string): Promise<ParquetSource> {\n const db = await getDuckDb();\n const conn = db.connect();\n const fileName = buildDuckDbFileName(filePath);\n\n db.registerFileURL(fileName, filePath, DuckDBDataProtocol.NODE_FS, true);\n\n return createParquetSource(db, conn, fileName);\n}\n\nexport async function openParquetSourceFromUrl(input: string): Promise<ParquetSource> {\n const resolved = resolveParquetUrl(input);\n if (!resolved) {\n throw new Error(\"Not a URL\");\n }\n\n const db = await getDuckDb();\n const conn = db.connect();\n const fileName = buildDuckDbFileName(resolved.url);\n\n db.registerFileURL(fileName, resolved.url, DuckDBDataProtocol.HTTP, true);\n\n return createParquetSource(db, conn, fileName);\n}\n\nexport async function openParquetSourceFromBuffer(buffer: Uint8Array): Promise<ParquetSource> {\n const db = await getDuckDb();\n const conn = db.connect();\n const fileName = buildDuckDbFileName(\"buffer\");\n\n db.registerFileBuffer(fileName, buffer);\n\n return createParquetSource(db, conn, fileName);\n}\n\nexport async function openParquetSource(input: string): Promise<ParquetSource> {\n const resolved = resolveParquetUrl(input);\n if (resolved) {\n return openParquetSourceFromUrl(input);\n }\n\n return openParquetSourceFromPath(input);\n}\n\nexport async function readParquetTableFromBuffer(\n buffer: Uint8Array,\n options?: ParquetReadOptions,\n): Promise<Table> {\n const source = await openParquetSourceFromBuffer(buffer);\n\n try {\n return await source.readTable(options);\n } finally {\n await source.close();\n }\n}\n\nexport async function readParquetTableFromPath(\n filePath: string,\n options?: ParquetReadOptions,\n): Promise<Table> {\n const source = await openParquetSourceFromPath(filePath);\n\n try {\n return await source.readTable(options);\n } finally {\n await source.close();\n }\n}\n\nexport async function readParquetTableFromUrl(\n input: string,\n options?: ParquetReadOptions,\n): Promise<Table> {\n const source = await openParquetSourceFromUrl(input);\n\n try {\n return await source.readTable(options);\n } finally {\n await source.close();\n }\n}\n\nexport async function readParquetMetadataFromBuffer(\n buffer: Uint8Array,\n): Promise<ParquetFileMetadata> {\n const source = await openParquetSourceFromBuffer(buffer);\n\n try {\n return await source.readMetadata();\n } finally {\n await source.close();\n }\n}\n\nexport async function bufferStdinToTempFile(\n filenameHint = \"stdin.parquet\",\n): Promise<TempParquetFile> {\n const tempDir = await fs.mkdtemp(path.join(tmpdir(), \"parquetlens-\"));\n const safeName = filenameHint.replace(/[\\\\/]/g, \"_\");\n const filePath = path.join(tempDir, `${randomUUID()}-${safeName}`);\n const writeStream = createWriteStream(filePath);\n\n await pipeline(process.stdin, writeStream);\n\n return {\n path: filePath,\n cleanup: async () => {\n await fs.rm(tempDir, { recursive: true, force: true });\n },\n };\n}\n\nexport async function readParquetTableFromStdin(\n filenameHint = \"stdin.parquet\",\n options?: ParquetReadOptions,\n): Promise<Table> {\n const temp = await bufferStdinToTempFile(filenameHint);\n\n try {\n return await readParquetTableFromPath(temp.path, options);\n } finally {\n await temp.cleanup();\n }\n}\n\nfunction createParquetSource(\n db: DuckDBBindings,\n conn: DuckDBConnection,\n fileName: string,\n): ParquetSource {\n let metadataPromise: Promise<ParquetFileMetadata> | null = null;\n\n return {\n readTable: async (options?: ParquetReadOptions) => {\n const query = buildSelectQuery(fileName, options);\n return conn.query(query);\n },\n readMetadata: () => {\n if (!metadataPromise) {\n metadataPromise = readParquetMetadata(conn, fileName);\n }\n return metadataPromise;\n },\n close: async () => {\n conn.close();\n db.dropFile(fileName);\n },\n };\n}\n\nfunction buildDuckDbFileName(input: string): string {\n const suffix = path.extname(input) || \".parquet\";\n return `parquetlens-${randomUUID()}${suffix}`;\n}\n\nfunction buildSelectQuery(fileName: string, options?: ParquetReadOptions): string {\n const columns = options?.columns && options.columns.length > 0 ? options.columns : null;\n const selectList = columns ? columns.map(quoteIdentifier).join(\", \") : \"*\";\n const limit = options?.limit;\n const offset = options?.offset;\n\n let query = `select ${selectList} from read_parquet(${quoteLiteral(fileName)})`;\n\n if (typeof limit === \"number\") {\n query += ` limit ${Math.max(0, limit)}`;\n }\n\n if (typeof offset === \"number\" && offset > 0) {\n query += ` offset ${Math.max(0, offset)}`;\n }\n\n return query;\n}\n\nfunction quoteIdentifier(value: string): string {\n return `\"${value.replace(/\"/g, '\"\"')}\"`;\n}\n\nfunction quoteLiteral(value: string): string {\n return `'${value.replace(/'/g, \"''\")}'`;\n}\n\nasync function readParquetMetadata(\n conn: DuckDBConnection,\n fileName: string,\n): Promise<ParquetFileMetadata> {\n const metadataRows = tableToObjects(\n conn.query(`select * from parquet_file_metadata(${quoteLiteral(fileName)})`),\n );\n const kvRows = tableToObjects(\n conn.query(`select * from parquet_kv_metadata(${quoteLiteral(fileName)})`),\n );\n\n const createdByRaw = metadataRows[0]?.created_by ?? metadataRows[0]?.createdBy ?? null;\n const keyValueMetadata: Record<string, unknown> = {};\n\n for (const row of kvRows) {\n const key = row.key ?? row.key_name ?? row.name;\n if (typeof key !== \"string\" || key.length === 0) {\n continue;\n }\n keyValueMetadata[key] = row.value ?? row.val ?? \"\";\n }\n\n return {\n createdBy: normalizeMetadataValue(createdByRaw),\n keyValueMetadata: normalizeMetadataValues(keyValueMetadata),\n };\n}\n\nfunction tableToObjects(table: Table): Record<string, unknown>[] {\n const fields = table.schema.fields.map((field) => field.name);\n const rows: Record<string, unknown>[] = [];\n\n for (const batch of table.batches) {\n const vectors = fields.map((_, index) => batch.getChildAt(index));\n\n for (let rowIndex = 0; rowIndex < batch.numRows; rowIndex += 1) {\n const row: Record<string, unknown> = {};\n\n for (let colIndex = 0; colIndex < fields.length; colIndex += 1) {\n row[fields[colIndex]] = vectors[colIndex]?.get(rowIndex);\n }\n\n rows.push(row);\n }\n }\n\n return rows;\n}\n\nfunction normalizeMetadataValue(value: unknown): string | undefined {\n if (value === null || value === undefined) {\n return undefined;\n }\n\n if (value instanceof Uint8Array) {\n return Buffer.from(value).toString(\"utf8\");\n }\n\n if (typeof value === \"string\") {\n return value;\n }\n\n return String(value);\n}\n\nfunction normalizeMetadataValues(input: Record<string, unknown>): Record<string, string> {\n const normalized: Record<string, string> = {};\n\n for (const [key, value] of Object.entries(input)) {\n const normalizedValue = normalizeMetadataValue(value);\n normalized[key] = normalizedValue ?? \"\";\n }\n\n return normalized;\n}\n\nexport { resolveParquetUrl } from \"./urls.js\";\nexport type { ResolvedParquetUrl } from \"./urls.js\";\n","export type ResolvedParquetUrl = {\n url: string;\n};\n\nexport function resolveParquetUrl(input: string): ResolvedParquetUrl | null {\n if (input.startsWith(\"hf://\")) {\n return resolveHuggingFaceUrl(input);\n }\n\n if (input.startsWith(\"http://\") || input.startsWith(\"https://\")) {\n return { url: input };\n }\n\n return null;\n}\n\nfunction resolveHuggingFaceUrl(input: string): ResolvedParquetUrl {\n const match = input.match(/^hf:\\/\\/(datasets|models)\\/([^@\\/]+)\\/([^@\\/]+)(?:@([^\\/]+))?\\/(.+)$/);\n\n if (!match) {\n throw new Error(`Invalid hf:// URL: ${input}`);\n }\n\n const [, type, user, repo, branch = \"main\", filePath] = match;\n\n return {\n url: `https://huggingface.co/${type}/${user}/${repo}/resolve/${branch}/${filePath}`,\n };\n}\n"],"mappings":";;;;;;;;AAAA,SAAS,mBAAmB,cAAc,cAAc;AACxD,SAAS,YAAY,UAAU;AAC/B,SAAS,kBAAkB;AAC3B,SAAS,iBAAiB;AAC1B,SAAS,cAAc;AACvB,SAAS,iBAAiB,yBAAyB;AACnD,SAAS,cAAc;AACvB,OAAO,UAAU;AACjB,SAAS,gBAAgB;AAGzB;EACE;EAGA;EAEA;EACA;EACA;EACA;EACA;EACA;OACK;ACnBA,SAAS,kBAAkB,OAA0C;AAC1E,MAAI,MAAM,WAAW,OAAO,GAAG;AAC7B,WAAO,sBAAsB,KAAK;EACpC;AAEA,MAAI,MAAM,WAAW,SAAS,KAAK,MAAM,WAAW,UAAU,GAAG;AAC/D,WAAO,EAAE,KAAK,MAAM;EACtB;AAEA,SAAO;AACT;AAEA,SAAS,sBAAsB,OAAmC;AAChE,QAAM,QAAQ,MAAM,MAAM,sEAAsE;AAEhG,MAAI,CAAC,OAAO;AACV,UAAM,IAAI,MAAM,sBAAsB,KAAK,EAAE;EAC/C;AAEA,QAAM,CAAC,EAAE,MAAM,MAAM,MAAM,SAAS,QAAQ,QAAQ,IAAI;AAExD,SAAO;IACL,KAAK,0BAA0B,IAAI,IAAI,IAAI,IAAI,IAAI,YAAY,MAAM,IAAI,QAAQ;EACnF;AACF;ADwBA,IAAI,gBAAgD;AACpD,IAAI,qBAAqB;AAOzB,IAAM,cAAc,oBAAI,IAAwB;AAShD,eAAe,YAAqC;AAClD,MAAI,CAAC,eAAe;AAClB,qBAAiB,YAAY;AAC3B,+BAAyB;AACzB,YAAM,UAAU,iBAAiB;AACjC,YAAM,KAAK,MAAM,aAAa,SAAS,IAAI,WAAW,GAAG,YAAY;AACrE,YAAM,GAAG,YAAY;AACrB,SAAG,KAAK;QACN,YAAY,iBAAiB;QAC7B,YAAY;UACV,oBAAoB;QACtB;MACF,CAAC;AACD,aAAO;IACT,GAAG;EACL;AAEA,SAAO;AACT;AAEA,SAAS,mBAAkC;AACzC,QAAM,eAAe,kBAAkB,YAAY,GAAG;AACtD,QAAM,YAAY,aAAa,QAAQ,0CAA0C;AACjF,QAAM,YAAY,aAAa,QAAQ,qDAAqD;AAC5F,QAAM,WAAW,aAAa,QAAQ,yCAAyC;AAC/E,QAAM,WAAW,aAAa,QAAQ,oDAAoD;AAE1F,SAAO;IACL,KAAK;MACH,YAAY;MACZ,YAAY;IACd;IACA,IAAI;MACF,YAAY;MACZ,YAAY;IACd;EACF;AACF;AAEA,SAAS,2BAAiC;AACxC,MAAI,oBAAoB;AACtB;EACF;AACA,uBAAqB;AAErB,QAAM,eAAe,aAAa;AAKlC,QAAM,eAAe,aAAa;AAOlC,QAAM,gBAAgB,aAAa,UAAU,KAAK,YAAY;AAC9D,QAAM,WAAW,aAAa,KAAK,KAAK,YAAY;AACpD,QAAM,gBAAgB,aAAa,UAAU,KAAK,YAAY;AAC9D,QAAM,sBAAsB,aAAa,4BAA4B,KAAK,YAAY;AAEtF,eAAa,WAAW,CAAC,KAAiB,QAAgB,UAA6B;AACrF,UAAM,OAAO,aAAa,gBAAgB,KAAK,MAAM;AACrD,QAAI,CAAC,QAAQ,KAAK,iBAAiB,mBAAmB,MAAM;AAC1D,aAAO,aAAa,KAAK,QAAQ,KAAK;IACxC;AAEA,QAAI,QAAQ,UAAU,oBAAoB,QAAQ,UAAU,mBAAmB;AAC7E,eAAS,KAAK,gBAAgB,KAAK,QAAQ,wCAAwC;AACnF,aAAO;IACT;AAEA,QAAI,EAAE,QAAQ,UAAU,kBAAkB;AACxC,eAAS,KAAK,gBAAgB,KAAK,QAAQ,oCAAoC,KAAK,EAAE;AACtF,aAAO;IACT;AAEA,QAAI,CAAC,KAAK,SAAS;AACjB,eAAS,KAAK,gBAAgB,KAAK,QAAQ,2BAA2B;AACtE,aAAO;IACT;AAEA,UAAM,YAAY,KAAK,sBAAsB;AAC7C,UAAM,YAAY,KAAK,sBAAsB;AAE7C,QAAI,CAAC,WAAW;AACd,UAAI;AACF,cAAM,QAAQ,iBAAiB,KAAK,SAAS,GAAG,CAAC;AACjD,YAAI,MAAM,WAAW,KAAK;AACxB,gBAAM,QACJ,uBAAuB,MAAM,QAAQ,eAAe,CAAC,KACrD,mBAAmB,MAAM,QAAQ,gBAAgB,CAAC;AACpD,cAAI,UAAU,MAAM;AAClB,mBAAO,gBAAgB,KAAK,OAAO,CAAC;UACtC;QACF;AAEA,YAAI,MAAM,WAAW,OAAO,WAAW;AACrC,gBAAM,UAAU,oBAAoB,KAAK,MAAM,KAAK;AACpD,sBAAY,IAAI,QAAQ,EAAE,SAAS,MAAM,MAAM,MAAM,OAAO,CAAC;AAC7D,iBAAO,gBAAgB,KAAK,MAAM,MAAM,QAAQ,OAAO;QACzD;MACF,SAAS,OAAO;AACd,YAAI,CAAC,WAAW;AACd,mBAAS,KAAK,gBAAgB,KAAK,QAAQ,YAAY,OAAO,KAAK,CAAC,EAAE;AACtE,iBAAO;QACT;MACF;IACF;AAEA,QAAI,WAAW;AACb,UAAI;AACF,cAAM,OAAO,YAAY,KAAK,OAAO;AACrC,YAAI,KAAK,WAAW,KAAK;AACvB,gBAAM,UAAU,oBAAoB,KAAK,KAAK,KAAK;AACnD,sBAAY,IAAI,QAAQ,EAAE,SAAS,MAAM,KAAK,MAAM,OAAO,CAAC;AAC5D,iBAAO,gBAAgB,KAAK,KAAK,MAAM,QAAQ,OAAO;QACxD;MACF,SAAS,OAAO;AACd,iBAAS,KAAK,gBAAgB,KAAK,QAAQ,YAAY,OAAO,KAAK,CAAC,EAAE;AACtE,eAAO;MACT;IACF;AAEA,aAAS,KAAK,gBAAgB,KAAK,QAAQ,0CAA0C;AACrF,WAAO;EACT;AAEA,eAAa,WAAW,CACtB,KACA,QACA,QACA,OACA,aACW;AACX,QAAI,UAAU,GAAG;AACf,aAAO;IACT;AAEA,UAAM,OAAO,aAAa,gBAAgB,KAAK,MAAM;AACrD,QAAI,CAAC,QAAQ,KAAK,iBAAiB,mBAAmB,MAAM;AAC1D,aAAO,aAAa,KAAK,QAAQ,QAAQ,OAAO,QAAQ;IAC1D;AAEA,UAAM,SAAS,YAAY,IAAI,MAAM;AACrC,QAAI,QAAQ;AACV,YAAM,aAAa,KAAK,IAAI,GAAG,QAAQ;AACvC,YAAM,WAAW,KAAK,IAAI,OAAO,MAAM,WAAW,KAAK;AACvD,YAAM,SAAS,KAAK,IAAI,GAAG,WAAW,UAAU;AAChD,UAAI,SAAS,GAAG;AACd,cAAM,MAAM,IAAI,OAAO,SAAS,OAAO,UAAU,YAAY,OAAO,UAAU,QAAQ;AACtF,YAAI,OAAO,IAAI,KAAK,MAAM;MAC5B;AACA,aAAO;IACT;AAEA,QAAI,CAAC,KAAK,SAAS;AACjB,eAAS,KAAK,gBAAgB,KAAK,QAAQ,2BAA2B;AACtE,aAAO;IACT;AAEA,QAAI;AACF,YAAM,WAAW,iBAAiB,KAAK,SAAS,UAAU,WAAW,QAAQ,CAAC;AAC9E,UAAI,SAAS,WAAW,OAAQ,SAAS,WAAW,OAAO,aAAa,GAAI;AAC1E,cAAM,SAAS,KAAK,IAAI,OAAO,SAAS,MAAM,MAAM;AACpD,YAAI,SAAS,GAAG;AACd,cAAI,OAAO,IAAI,SAAS,MAAM,SAAS,GAAG,MAAM,GAAG,MAAM;QAC3D;AACA,eAAO;MACT;AAEA,eAAS,KAAK,gBAAgB,KAAK,QAAQ,qBAAqB,SAAS,MAAM,EAAE;AACjF,aAAO;IACT,SAAS,OAAO;AACd,eAAS,KAAK,gBAAgB,KAAK,QAAQ,YAAY,OAAO,KAAK,CAAC,EAAE;AACtE,aAAO;IACT;EACF;AAEA,eAAa,YAAY,CAAC,KAAmB,SAAiB,YAA6B;AACzF,UAAMA,QAAO,WAAW,KAAK,SAAS,OAAO;AAC7C,QAAI,UAAUA,KAAI,GAAG;AACnB,YAAM,WAAW,gBAAgBA,KAAI;AACrC,aAAO,SAAS,WAAW,OAAO,SAAS,WAAW;IACxD;AACA,WAAO,cAAc,KAAK,SAAS,OAAO;EAC5C;AAEA,eAAa,OAAO,CAAC,KAAmB,SAAiB,YAA0B;AACjF,UAAMA,QAAO,WAAW,KAAK,SAAS,OAAO;AAC7C,QAAI,UAAUA,KAAI,GAAG;AACnB,YAAM,WAAW,gBAAgBA,KAAI;AACrC,UAAI,SAAS,WAAW,OAAO,SAAS,WAAW,KAAK;AACtD,YAAI,MAAM,+BAA+B,MAAM,CAAC,QAAQ,GAAG,CAACA,KAAI,CAAC;MACnE;AACA;IACF;AAEA,WAAO,SAAS,KAAK,SAAS,OAAO;EACvC;AAEA,eAAa,YAAY,CAAC,KAAmB,WAAyB;AACpE,UAAM,SAAS,YAAY,IAAI,MAAM;AACrC,QAAI,QAAQ;AACV,UAAI,OAAO,SAAS;AACjB,YAAmB,MAAM,OAAO,OAAO;MAC1C;AACA,kBAAY,OAAO,MAAM;IAC3B;AACA,kBAAc,KAAK,MAAM;EAC3B;AAEA,eAAa,8BAA8B,CAAC,KAAmB,WAA2B;AACxF,UAAM,OAAO,aAAa,gBAAgB,KAAK,MAAM;AACrD,QAAI,MAAM,iBAAiB,mBAAmB,MAAM;AAClD,aAAO,KAAK,IAAI,IAAI;IACtB;AACA,WAAO,oBAAoB,KAAK,MAAM;EACxC;AACF;AAEA,SAAS,UAAU,OAAwB;AACzC,SAAO,MAAM,WAAW,SAAS,KAAK,MAAM,WAAW,UAAU;AACnE;AAEA,SAAS,gBAAgB,KAAiB,MAAc,SAAyB;AAC/E,QAAM,SAAS,IAAI,QAAQ,IAAI,CAAC;AAChC,MAAI,SAAS,UAAU,KAAK,CAAC,IAAI,CAAC;AAClC,MAAI,SAAS,UAAU,KAAK,CAAC,IAAI;AACjC,SAAO;AACT;AAEA,SAAS,oBAAoB,KAAiB,OAA2B;AACvE,QAAM,UAAU,IAAI,QAAQ,MAAM,UAAU;AAC5C,MAAI,OAAO,IAAI,OAAO,OAAO;AAC7B,SAAO;AACT;AAEA,SAAS,uBAAuB,cAA4C;AAC1E,MAAI,CAAC,cAAc;AACjB,WAAO;EACT;AACA,QAAM,CAAC,EAAE,KAAK,IAAI,aAAa,MAAM,GAAG;AACxC,MAAI,CAAC,OAAO;AACV,WAAO;EACT;AACA,QAAM,SAAS,OAAO,SAAS,OAAO,EAAE;AACxC,SAAO,OAAO,SAAS,MAAM,IAAI,SAAS;AAC5C;AAEA,SAAS,mBAAmB,eAA6C;AACvE,MAAI,CAAC,eAAe;AAClB,WAAO;EACT;AACA,QAAM,SAAS,OAAO,SAAS,eAAe,EAAE;AAChD,SAAO,OAAO,SAAS,MAAM,IAAI,SAAS;AAC5C;AAQA,SAAS,YAAY,KAA2B;AAC9C,SAAO,YAAY,CAAC,GAAG,CAAC;AAC1B;AAEA,SAAS,gBAAgB,KAA2B;AAClD,SAAO,YAAY,CAAC,MAAM,GAAG,CAAC;AAChC;AAEA,SAAS,iBAAiB,KAAa,OAAe,KAA2B;AAC/E,SAAO,YAAY,CAAC,MAAM,GAAG,KAAK,IAAI,GAAG,IAAI,GAAG,CAAC;AACnD;AAEA,SAAS,YAAY,MAA8B;AACjD,QAAM,WAAW,KAAK,KAAK,OAAO,GAAG,oBAAoB,WAAW,CAAC,EAAE;AACvE,MAAI;AACF,UAAM,SAAS,UAAU,QAAQ,CAAC,OAAO,MAAM,MAAM,KAAK,MAAM,UAAU,GAAG,IAAI,GAAG;MAClF,UAAU;MACV,WAAW,IAAI,OAAO;IACxB,CAAC;AAED,QAAI,OAAO,OAAO;AAChB,UAAK,OAAO,MAAgC,SAAS,UAAU;AAC7D,cAAM,IAAI,MAAM,gDAAgD;MAClE;AACA,YAAM,OAAO;IACf;AACA,QAAI,OAAO,WAAW,GAAG;AACvB,YAAM,SAAS,OAAO,QAAQ,SAAS,MAAM,EAAE,KAAK;AACpD,YAAM,IAAI,MAAM,UAAU,aAAa;IACzC;AAEA,UAAM,OAAO,aAAa,QAAQ;AAClC,WAAO,kBAAkB,OAAO,KAAK,OAAO,UAAU,CAAC,CAAC,GAAG,IAAI;EACjE,UAAA;AACE,WAAO,UAAU,EAAE,OAAO,KAAK,CAAC;EAClC;AACF;AAEA,SAAS,kBAAkB,eAAuB,MAA4B;AAC5E,QAAM,aAAa,cAAc,SAAS,QAAQ;AAClD,QAAM,SAAS,WAAW,MAAM,UAAU,EAAE,OAAO,OAAO;AAC1D,QAAM,YAAY,OAAO,OAAO,SAAS,CAAC,KAAK;AAC/C,QAAM,QAAQ,UAAU,MAAM,MAAM,EAAE,OAAO,OAAO;AACpD,QAAM,aAAa,MAAM,MAAM,KAAK;AACpC,QAAM,cAAc,WAAW,MAAM,GAAG,EAAE,CAAC,KAAK;AAChD,QAAM,SAAS,OAAO,SAAS,aAAa,EAAE;AAC9C,QAAM,UAAkC,CAAC;AAEzC,aAAW,QAAQ,OAAO;AACxB,UAAM,QAAQ,KAAK,QAAQ,GAAG;AAC9B,QAAI,UAAU,IAAI;AAChB;IACF;AACA,UAAM,MAAM,KAAK,MAAM,GAAG,KAAK,EAAE,KAAK,EAAE,YAAY;AACpD,UAAM,QAAQ,KAAK,MAAM,QAAQ,CAAC,EAAE,KAAK;AACzC,YAAQ,GAAG,IAAI;EACjB;AAEA,SAAO;IACL,QAAQ,OAAO,SAAS,MAAM,IAAI,SAAS;IAC3C,OAAO,IAAI,WAAW,IAAI;IAC1B;EACF;AACF;AAEA,eAAsB,0BAA0B,UAA0C;AACxF,QAAM,KAAK,MAAM,UAAU;AAC3B,QAAM,OAAO,GAAG,QAAQ;AACxB,QAAM,WAAW,oBAAoB,QAAQ;AAE7C,KAAG,gBAAgB,UAAU,UAAU,mBAAmB,SAAS,IAAI;AAEvE,SAAO,oBAAoB,IAAI,MAAM,QAAQ;AAC/C;AAEA,eAAsB,yBAAyB,OAAuC;AACpF,QAAM,WAAW,kBAAkB,KAAK;AACxC,MAAI,CAAC,UAAU;AACb,UAAM,IAAI,MAAM,WAAW;EAC7B;AAEA,QAAM,KAAK,MAAM,UAAU;AAC3B,QAAM,OAAO,GAAG,QAAQ;AACxB,QAAM,WAAW,oBAAoB,SAAS,GAAG;AAEjD,KAAG,gBAAgB,UAAU,SAAS,KAAK,mBAAmB,MAAM,IAAI;AAExE,SAAO,oBAAoB,IAAI,MAAM,QAAQ;AAC/C;AAYA,eAAsB,kBAAkB,OAAuC;AAC7E,QAAM,WAAW,kBAAkB,KAAK;AACxC,MAAI,UAAU;AACZ,WAAO,yBAAyB,KAAK;EACvC;AAEA,SAAO,0BAA0B,KAAK;AACxC;AAeA,eAAsB,yBACpB,UACA,SACgB;AAChB,QAAM,SAAS,MAAM,0BAA0B,QAAQ;AAEvD,MAAI;AACF,WAAO,MAAM,OAAO,UAAU,OAAO;EACvC,UAAA;AACE,UAAM,OAAO,MAAM;EACrB;AACF;AAEA,eAAsB,wBACpB,OACA,SACgB;AAChB,QAAM,SAAS,MAAM,yBAAyB,KAAK;AAEnD,MAAI;AACF,WAAO,MAAM,OAAO,UAAU,OAAO;EACvC,UAAA;AACE,UAAM,OAAO,MAAM;EACrB;AACF;AAcA,eAAsB,sBACpB,eAAe,iBACW;AAC1B,QAAM,UAAU,MAAM,GAAG,QAAQ,KAAK,KAAK,OAAO,GAAG,cAAc,CAAC;AACpE,QAAM,WAAW,aAAa,QAAQ,UAAU,GAAG;AACnD,QAAM,WAAW,KAAK,KAAK,SAAS,GAAG,WAAW,CAAC,IAAI,QAAQ,EAAE;AACjE,QAAM,cAAc,kBAAkB,QAAQ;AAE9C,QAAM,SAAS,QAAQ,OAAO,WAAW;AAEzC,SAAO;IACL,MAAM;IACN,SAAS,YAAY;AACnB,YAAM,GAAG,GAAG,SAAS,EAAE,WAAW,MAAM,OAAO,KAAK,CAAC;IACvD;EACF;AACF;AAEA,eAAsB,0BACpB,eAAe,iBACf,SACgB;AAChB,QAAM,OAAO,MAAM,sBAAsB,YAAY;AAErD,MAAI;AACF,WAAO,MAAM,yBAAyB,KAAK,MAAM,OAAO;EAC1D,UAAA;AACE,UAAM,KAAK,QAAQ;EACrB;AACF;AAEA,SAAS,oBACP,IACA,MACA,UACe;AACf,MAAI,kBAAuD;AAE3D,SAAO;IACL,WAAW,OAAO,YAAiC;AACjD,YAAM,QAAQ,iBAAiB,UAAU,OAAO;AAChD,aAAO,KAAK,MAAM,KAAK;IACzB;IACA,cAAc,MAAM;AAClB,UAAI,CAAC,iBAAiB;AACpB,0BAAkB,oBAAoB,MAAM,QAAQ;MACtD;AACA,aAAO;IACT;IACA,OAAO,YAAY;AACjB,WAAK,MAAM;AACX,SAAG,SAAS,QAAQ;IACtB;EACF;AACF;AAEA,SAAS,oBAAoB,OAAuB;AAClD,QAAM,SAAS,KAAK,QAAQ,KAAK,KAAK;AACtC,SAAO,eAAe,WAAW,CAAC,GAAG,MAAM;AAC7C;AAEA,SAAS,iBAAiB,UAAkB,SAAsC;AAChF,QAAM,UAAU,SAAS,WAAW,QAAQ,QAAQ,SAAS,IAAI,QAAQ,UAAU;AACnF,QAAM,aAAa,UAAU,QAAQ,IAAI,eAAe,EAAE,KAAK,IAAI,IAAI;AACvE,QAAM,QAAQ,SAAS;AACvB,QAAM,SAAS,SAAS;AAExB,MAAI,QAAQ,UAAU,UAAU,sBAAsB,aAAa,QAAQ,CAAC;AAE5E,MAAI,OAAO,UAAU,UAAU;AAC7B,aAAS,UAAU,KAAK,IAAI,GAAG,KAAK,CAAC;EACvC;AAEA,MAAI,OAAO,WAAW,YAAY,SAAS,GAAG;AAC5C,aAAS,WAAW,KAAK,IAAI,GAAG,MAAM,CAAC;EACzC;AAEA,SAAO;AACT;AAEA,SAAS,gBAAgB,OAAuB;AAC9C,SAAO,IAAI,MAAM,QAAQ,MAAM,IAAI,CAAC;AACtC;AAEA,SAAS,aAAa,OAAuB;AAC3C,SAAO,IAAI,MAAM,QAAQ,MAAM,IAAI,CAAC;AACtC;AAEA,eAAe,oBACb,MACA,UAC8B;AAC9B,QAAM,eAAe;IACnB,KAAK,MAAM,uCAAuC,aAAa,QAAQ,CAAC,GAAG;EAC7E;AACA,QAAM,SAAS;IACb,KAAK,MAAM,qCAAqC,aAAa,QAAQ,CAAC,GAAG;EAC3E;AAEA,QAAM,eAAe,aAAa,CAAC,GAAG,cAAc,aAAa,CAAC,GAAG,aAAa;AAClF,QAAM,mBAA4C,CAAC;AAEnD,aAAW,OAAO,QAAQ;AACxB,UAAM,MAAM,IAAI,OAAO,IAAI,YAAY,IAAI;AAC3C,QAAI,OAAO,QAAQ,YAAY,IAAI,WAAW,GAAG;AAC/C;IACF;AACA,qBAAiB,GAAG,IAAI,IAAI,SAAS,IAAI,OAAO;EAClD;AAEA,SAAO;IACL,WAAW,uBAAuB,YAAY;IAC9C,kBAAkB,wBAAwB,gBAAgB;EAC5D;AACF;AAEA,SAAS,eAAe,OAAyC;AAC/D,QAAM,SAAS,MAAM,OAAO,OAAO,IAAI,CAAC,UAAU,MAAM,IAAI;AAC5D,QAAM,OAAkC,CAAC;AAEzC,aAAW,SAAS,MAAM,SAAS;AACjC,UAAM,UAAU,OAAO,IAAI,CAAC,GAAG,UAAU,MAAM,WAAW,KAAK,CAAC;AAEhE,aAAS,WAAW,GAAG,WAAW,MAAM,SAAS,YAAY,GAAG;AAC9D,YAAM,MAA+B,CAAC;AAEtC,eAAS,WAAW,GAAG,WAAW,OAAO,QAAQ,YAAY,GAAG;AAC9D,YAAI,OAAO,QAAQ,CAAC,IAAI,QAAQ,QAAQ,GAAG,IAAI,QAAQ;MACzD;AAEA,WAAK,KAAK,GAAG;IACf;EACF;AAEA,SAAO;AACT;AAEA,SAAS,uBAAuB,OAAoC;AAClE,MAAI,UAAU,QAAQ,UAAU,QAAW;AACzC,WAAO;EACT;AAEA,MAAI,iBAAiB,YAAY;AAC/B,WAAO,OAAO,KAAK,KAAK,EAAE,SAAS,MAAM;EAC3C;AAEA,MAAI,OAAO,UAAU,UAAU;AAC7B,WAAO;EACT;AAEA,SAAO,OAAO,KAAK;AACrB;AAEA,SAAS,wBAAwB,OAAwD;AACvF,QAAM,aAAqC,CAAC;AAE5C,aAAW,CAAC,KAAK,KAAK,KAAK,OAAO,QAAQ,KAAK,GAAG;AAChD,UAAM,kBAAkB,uBAAuB,KAAK;AACpD,eAAW,GAAG,IAAI,mBAAmB;EACvC;AAEA,SAAO;AACT;","names":["path"]}
|
package/dist/main.js
CHANGED
|
@@ -7,12 +7,16 @@ const __filename = fileURLToPath(import.meta.url);
|
|
|
7
7
|
const __dirname = dirname(__filename);
|
|
8
8
|
import {
|
|
9
9
|
readParquetTableFromPath,
|
|
10
|
-
readParquetTableFromStdin
|
|
11
|
-
|
|
10
|
+
readParquetTableFromStdin,
|
|
11
|
+
readParquetTableFromUrl,
|
|
12
|
+
resolveParquetUrl
|
|
13
|
+
} from "./chunk-NRRDNC7S.js";
|
|
12
14
|
|
|
13
15
|
// src/main.ts
|
|
14
16
|
import { spawnSync } from "child_process";
|
|
15
17
|
import path from "path";
|
|
18
|
+
import { fileURLToPath } from "url";
|
|
19
|
+
var __filename = typeof globalThis.__filename !== "undefined" ? globalThis.__filename : fileURLToPath(import.meta.url);
|
|
16
20
|
var DEFAULT_LIMIT = 20;
|
|
17
21
|
function parseArgs(argv) {
|
|
18
22
|
const options = {
|
|
@@ -112,7 +116,7 @@ function readOptionValue(arg, name, next) {
|
|
|
112
116
|
return null;
|
|
113
117
|
}
|
|
114
118
|
function printUsage() {
|
|
115
|
-
const helpText = `parquetlens <file|-> [options]
|
|
119
|
+
const helpText = `parquetlens <file|url|-> [options]
|
|
116
120
|
|
|
117
121
|
options:
|
|
118
122
|
--limit, --limit=<n> number of rows to show (default: ${DEFAULT_LIMIT})
|
|
@@ -127,7 +131,8 @@ options:
|
|
|
127
131
|
examples:
|
|
128
132
|
parquetlens data.parquet --limit 25
|
|
129
133
|
parquetlens data.parquet --columns=city,state
|
|
130
|
-
parquetlens
|
|
134
|
+
parquetlens hf://datasets/cfahlgren1/hub-stats/daily_papers.parquet
|
|
135
|
+
parquetlens https://huggingface.co/datasets/cfahlgren1/hub-stats/resolve/main/daily_papers.parquet
|
|
131
136
|
parquetlens data.parquet --plain
|
|
132
137
|
parquetlens - < input.parquet
|
|
133
138
|
`;
|
|
@@ -204,11 +209,14 @@ async function loadTable(input, readOptions) {
|
|
|
204
209
|
const stdinFallback = process.stdin.isTTY ? void 0 : "-";
|
|
205
210
|
const source = input ?? stdinFallback;
|
|
206
211
|
if (!source) {
|
|
207
|
-
throw new Error("missing input file (pass a path or pipe stdin)");
|
|
212
|
+
throw new Error("missing input file (pass a path, URL, or pipe stdin)");
|
|
208
213
|
}
|
|
209
214
|
if (source === "-") {
|
|
210
215
|
return readParquetTableFromStdin("stdin.parquet", readOptions);
|
|
211
216
|
}
|
|
217
|
+
if (resolveParquetUrl(source)) {
|
|
218
|
+
return readParquetTableFromUrl(source, readOptions);
|
|
219
|
+
}
|
|
212
220
|
return readParquetTableFromPath(source, readOptions);
|
|
213
221
|
}
|
|
214
222
|
async function main() {
|
|
@@ -227,7 +235,9 @@ async function main() {
|
|
|
227
235
|
const wantsTui = resolveTuiMode(options.tuiMode, options);
|
|
228
236
|
if (wantsTui) {
|
|
229
237
|
if (!input || input === "-") {
|
|
230
|
-
process.stderr.write(
|
|
238
|
+
process.stderr.write(
|
|
239
|
+
"parquetlens: tui mode requires a file path or URL (stdin not supported)\n"
|
|
240
|
+
);
|
|
231
241
|
process.exitCode = 1;
|
|
232
242
|
return;
|
|
233
243
|
}
|