parquetlens 0.1.0 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +87 -0
- package/dist/chunk-NRRDNC7S.js +485 -0
- package/dist/chunk-NRRDNC7S.js.map +1 -0
- package/dist/main.js +16 -6
- package/dist/main.js.map +1 -1
- package/dist/tui.js +87 -16
- package/dist/tui.js.map +1 -1
- package/package.json +6 -4
- package/dist/chunk-573AA4JN.js +0 -112
- package/dist/chunk-573AA4JN.js.map +0 -1
- package/dist/chunk-E6TEBKS4.js +0 -166
- package/dist/chunk-E6TEBKS4.js.map +0 -1
- package/dist/chunk-JOHKCQYH.js +0 -99
- package/dist/chunk-JOHKCQYH.js.map +0 -1
- package/dist/chunk-LHMHT2IQ.js +0 -99
- package/dist/chunk-LHMHT2IQ.js.map +0 -1
- package/dist/chunk-UUCD5YU4.js +0 -92
- package/dist/chunk-UUCD5YU4.js.map +0 -1
- package/dist/chunk-VFBGUOAH.js +0 -92
- package/dist/chunk-VFBGUOAH.js.map +0 -1
package/dist/chunk-573AA4JN.js
DELETED
|
@@ -1,112 +0,0 @@
|
|
|
1
|
-
import { createRequire } from 'module';
|
|
2
|
-
import { fileURLToPath } from 'url';
|
|
3
|
-
import { dirname } from 'path';
|
|
4
|
-
const require = createRequire(import.meta.url);
|
|
5
|
-
const __filename = fileURLToPath(import.meta.url);
|
|
6
|
-
const __dirname = dirname(__filename);
|
|
7
|
-
|
|
8
|
-
// ../../packages/parquet-reader/dist/index.js
|
|
9
|
-
import { Blob as NodeBlob } from "buffer";
|
|
10
|
-
import { createWriteStream } from "fs";
|
|
11
|
-
import { promises as fs } from "fs";
|
|
12
|
-
import { randomUUID } from "crypto";
|
|
13
|
-
import { tmpdir } from "os";
|
|
14
|
-
import path from "path";
|
|
15
|
-
import { pipeline } from "stream/promises";
|
|
16
|
-
import { tableFromIPC } from "apache-arrow";
|
|
17
|
-
import initWasm, { ParquetFile, readParquet } from "parquet-wasm/esm";
|
|
18
|
-
var BlobCtor = typeof Blob === "undefined" ? NodeBlob : Blob;
|
|
19
|
-
var wasmInitialized = false;
|
|
20
|
-
var wasmInitPromise = null;
|
|
21
|
-
async function ensureWasmInitialized() {
|
|
22
|
-
if (wasmInitialized) return;
|
|
23
|
-
if (!wasmInitPromise) {
|
|
24
|
-
wasmInitPromise = initWasm().then(() => {
|
|
25
|
-
wasmInitialized = true;
|
|
26
|
-
});
|
|
27
|
-
}
|
|
28
|
-
return wasmInitPromise;
|
|
29
|
-
}
|
|
30
|
-
async function readParquetTableFromBuffer(buffer, options) {
|
|
31
|
-
await ensureWasmInitialized();
|
|
32
|
-
const wasmTable = readParquet(buffer, options ?? void 0);
|
|
33
|
-
const ipcStream = wasmTable.intoIPCStream();
|
|
34
|
-
return tableFromIPC(ipcStream);
|
|
35
|
-
}
|
|
36
|
-
function createParquetBufferSource(buffer) {
|
|
37
|
-
let metadataPromise = null;
|
|
38
|
-
return {
|
|
39
|
-
buffer,
|
|
40
|
-
byteLength: buffer.byteLength,
|
|
41
|
-
readTable: (options) => readParquetTableFromBuffer(buffer, options),
|
|
42
|
-
readMetadata: () => {
|
|
43
|
-
if (!metadataPromise) {
|
|
44
|
-
metadataPromise = readParquetMetadataFromBuffer(buffer);
|
|
45
|
-
}
|
|
46
|
-
return metadataPromise;
|
|
47
|
-
}
|
|
48
|
-
};
|
|
49
|
-
}
|
|
50
|
-
async function openParquetBufferFromPath(filePath) {
|
|
51
|
-
const buffer = await fs.readFile(filePath);
|
|
52
|
-
return createParquetBufferSource(buffer);
|
|
53
|
-
}
|
|
54
|
-
async function readParquetTableFromPath(filePath, options) {
|
|
55
|
-
const buffer = await fs.readFile(filePath);
|
|
56
|
-
return readParquetTableFromBuffer(buffer, options);
|
|
57
|
-
}
|
|
58
|
-
async function readParquetMetadataFromBuffer(buffer) {
|
|
59
|
-
await ensureWasmInitialized();
|
|
60
|
-
const blobInput = new Uint8Array(buffer).buffer;
|
|
61
|
-
const file = await ParquetFile.fromFile(new BlobCtor([blobInput]));
|
|
62
|
-
const meta = file.metadata();
|
|
63
|
-
const fileMeta = meta.fileMetadata();
|
|
64
|
-
const createdBy = fileMeta.createdBy();
|
|
65
|
-
const keyValueMetadata = Object.fromEntries(fileMeta.keyValueMetadata());
|
|
66
|
-
fileMeta.free();
|
|
67
|
-
meta.free();
|
|
68
|
-
file.free();
|
|
69
|
-
return {
|
|
70
|
-
createdBy: createdBy ?? void 0,
|
|
71
|
-
keyValueMetadata: normalizeMetadataValues(keyValueMetadata)
|
|
72
|
-
};
|
|
73
|
-
}
|
|
74
|
-
function normalizeMetadataValues(input) {
|
|
75
|
-
const normalized = {};
|
|
76
|
-
for (const [key, value] of Object.entries(input)) {
|
|
77
|
-
if (value === null || value === void 0) {
|
|
78
|
-
normalized[key] = "";
|
|
79
|
-
continue;
|
|
80
|
-
}
|
|
81
|
-
normalized[key] = typeof value === "string" ? value : String(value);
|
|
82
|
-
}
|
|
83
|
-
return normalized;
|
|
84
|
-
}
|
|
85
|
-
async function bufferStdinToTempFile(filenameHint = "stdin.parquet") {
|
|
86
|
-
const tempDir = await fs.mkdtemp(path.join(tmpdir(), "parquetlens-"));
|
|
87
|
-
const safeName = filenameHint.replace(/[\\/]/g, "_");
|
|
88
|
-
const filePath = path.join(tempDir, `${randomUUID()}-${safeName}`);
|
|
89
|
-
const writeStream = createWriteStream(filePath);
|
|
90
|
-
await pipeline(process.stdin, writeStream);
|
|
91
|
-
return {
|
|
92
|
-
path: filePath,
|
|
93
|
-
cleanup: async () => {
|
|
94
|
-
await fs.rm(tempDir, { recursive: true, force: true });
|
|
95
|
-
}
|
|
96
|
-
};
|
|
97
|
-
}
|
|
98
|
-
async function readParquetTableFromStdin(filenameHint = "stdin.parquet", options) {
|
|
99
|
-
const temp = await bufferStdinToTempFile(filenameHint);
|
|
100
|
-
try {
|
|
101
|
-
return await readParquetTableFromPath(temp.path, options);
|
|
102
|
-
} finally {
|
|
103
|
-
await temp.cleanup();
|
|
104
|
-
}
|
|
105
|
-
}
|
|
106
|
-
|
|
107
|
-
export {
|
|
108
|
-
openParquetBufferFromPath,
|
|
109
|
-
readParquetTableFromPath,
|
|
110
|
-
readParquetTableFromStdin
|
|
111
|
-
};
|
|
112
|
-
//# sourceMappingURL=chunk-573AA4JN.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"sources":["../../../packages/parquet-reader/src/index.ts"],"sourcesContent":["import { Blob as NodeBlob } from \"node:buffer\";\nimport { createWriteStream } from \"node:fs\";\nimport { promises as fs } from \"node:fs\";\nimport { randomUUID } from \"node:crypto\";\nimport { tmpdir } from \"node:os\";\nimport path from \"node:path\";\nimport { pipeline } from \"node:stream/promises\";\n\nimport { tableFromIPC, Table } from \"apache-arrow\";\nimport initWasm, { ParquetFile, readParquet, type ReaderOptions } from \"parquet-wasm/esm\";\n\nconst BlobCtor: typeof Blob =\n typeof Blob === \"undefined\" ? (NodeBlob as unknown as typeof Blob) : Blob;\n\nlet wasmInitialized = false;\nlet wasmInitPromise: Promise<void> | null = null;\n\nasync function ensureWasmInitialized(): Promise<void> {\n if (wasmInitialized) return;\n if (!wasmInitPromise) {\n wasmInitPromise = initWasm().then(() => {\n wasmInitialized = true;\n });\n }\n return wasmInitPromise;\n}\n\nexport type TempParquetFile = {\n path: string;\n cleanup: () => Promise<void>;\n};\n\nexport type ParquetReadOptions = Pick<\n ReaderOptions,\n \"batchSize\" | \"columns\" | \"limit\" | \"offset\" | \"rowGroups\"\n>;\n\nexport type ParquetFileMetadata = {\n createdBy?: string;\n keyValueMetadata: Record<string, string>;\n};\n\nexport type ParquetBufferSource = {\n buffer: Uint8Array;\n byteLength: number;\n readTable: (options?: ParquetReadOptions) => Promise<Table>;\n readMetadata: () => Promise<ParquetFileMetadata>;\n};\n\nexport async function readParquetTableFromBuffer(\n buffer: Uint8Array,\n options?: ParquetReadOptions,\n): Promise<Table> {\n await ensureWasmInitialized();\n const wasmTable = readParquet(buffer, options ?? undefined);\n const ipcStream = wasmTable.intoIPCStream();\n return tableFromIPC(ipcStream);\n}\n\nexport function createParquetBufferSource(buffer: Uint8Array): ParquetBufferSource {\n let metadataPromise: Promise<ParquetFileMetadata> | null = null;\n\n return {\n buffer,\n byteLength: buffer.byteLength,\n readTable: (options?: ParquetReadOptions) => readParquetTableFromBuffer(buffer, options),\n readMetadata: () => {\n if (!metadataPromise) {\n metadataPromise = readParquetMetadataFromBuffer(buffer);\n }\n return metadataPromise;\n },\n };\n}\n\nexport async function openParquetBufferFromPath(filePath: string): Promise<ParquetBufferSource> {\n const buffer = await fs.readFile(filePath);\n return createParquetBufferSource(buffer);\n}\n\nexport async function readParquetTableFromPath(\n filePath: string,\n options?: ParquetReadOptions,\n): Promise<Table> {\n const buffer = await fs.readFile(filePath);\n return readParquetTableFromBuffer(buffer, options);\n}\n\nexport async function readParquetMetadataFromBuffer(\n buffer: Uint8Array,\n): Promise<ParquetFileMetadata> {\n await ensureWasmInitialized();\n const blobInput = new Uint8Array(buffer).buffer as ArrayBuffer;\n const file = await ParquetFile.fromFile(new BlobCtor([blobInput]));\n const meta = file.metadata();\n const fileMeta = meta.fileMetadata();\n const createdBy = fileMeta.createdBy();\n const keyValueMetadata = Object.fromEntries(fileMeta.keyValueMetadata());\n\n fileMeta.free();\n meta.free();\n file.free();\n\n return {\n createdBy: createdBy ?? undefined,\n keyValueMetadata: normalizeMetadataValues(keyValueMetadata),\n };\n}\n\nfunction normalizeMetadataValues(input: Record<string, unknown>): Record<string, string> {\n const normalized: Record<string, string> = {};\n\n for (const [key, value] of Object.entries(input)) {\n if (value === null || value === undefined) {\n normalized[key] = \"\";\n continue;\n }\n normalized[key] = typeof value === \"string\" ? value : String(value);\n }\n\n return normalized;\n}\n\nexport async function bufferStdinToTempFile(\n filenameHint = \"stdin.parquet\",\n): Promise<TempParquetFile> {\n const tempDir = await fs.mkdtemp(path.join(tmpdir(), \"parquetlens-\"));\n const safeName = filenameHint.replace(/[\\\\/]/g, \"_\");\n const filePath = path.join(tempDir, `${randomUUID()}-${safeName}`);\n const writeStream = createWriteStream(filePath);\n\n await pipeline(process.stdin, writeStream);\n\n return {\n path: filePath,\n cleanup: async () => {\n await fs.rm(tempDir, { recursive: true, force: true });\n },\n };\n}\n\nexport async function readParquetTableFromStdin(\n filenameHint = \"stdin.parquet\",\n options?: ParquetReadOptions,\n): Promise<Table> {\n const temp = await bufferStdinToTempFile(filenameHint);\n\n try {\n return await readParquetTableFromPath(temp.path, options);\n } finally {\n await temp.cleanup();\n }\n}\n"],"mappings":";;;;;;;;AAAA,SAAS,QAAQ,gBAAgB;AACjC,SAAS,yBAAyB;AAClC,SAAS,YAAY,UAAU;AAC/B,SAAS,kBAAkB;AAC3B,SAAS,cAAc;AACvB,OAAO,UAAU;AACjB,SAAS,gBAAgB;AAEzB,SAAS,oBAA2B;AACpC,OAAO,YAAY,aAAa,mBAAuC;AAEvE,IAAM,WACJ,OAAO,SAAS,cAAe,WAAsC;AAEvE,IAAI,kBAAkB;AACtB,IAAI,kBAAwC;AAE5C,eAAe,wBAAuC;AACpD,MAAI,gBAAiB;AACrB,MAAI,CAAC,iBAAiB;AACpB,sBAAkB,SAAS,EAAE,KAAK,MAAM;AACtC,wBAAkB;IACpB,CAAC;EACH;AACA,SAAO;AACT;AAwBA,eAAsB,2BACpB,QACA,SACgB;AAChB,QAAM,sBAAsB;AAC5B,QAAM,YAAY,YAAY,QAAQ,WAAW,MAAS;AAC1D,QAAM,YAAY,UAAU,cAAc;AAC1C,SAAO,aAAa,SAAS;AAC/B;AAEO,SAAS,0BAA0B,QAAyC;AACjF,MAAI,kBAAuD;AAE3D,SAAO;IACL;IACA,YAAY,OAAO;IACnB,WAAW,CAAC,YAAiC,2BAA2B,QAAQ,OAAO;IACvF,cAAc,MAAM;AAClB,UAAI,CAAC,iBAAiB;AACpB,0BAAkB,8BAA8B,MAAM;MACxD;AACA,aAAO;IACT;EACF;AACF;AAEA,eAAsB,0BAA0B,UAAgD;AAC9F,QAAM,SAAS,MAAM,GAAG,SAAS,QAAQ;AACzC,SAAO,0BAA0B,MAAM;AACzC;AAEA,eAAsB,yBACpB,UACA,SACgB;AAChB,QAAM,SAAS,MAAM,GAAG,SAAS,QAAQ;AACzC,SAAO,2BAA2B,QAAQ,OAAO;AACnD;AAEA,eAAsB,8BACpB,QAC8B;AAC9B,QAAM,sBAAsB;AAC5B,QAAM,YAAY,IAAI,WAAW,MAAM,EAAE;AACzC,QAAM,OAAO,MAAM,YAAY,SAAS,IAAI,SAAS,CAAC,SAAS,CAAC,CAAC;AACjE,QAAM,OAAO,KAAK,SAAS;AAC3B,QAAM,WAAW,KAAK,aAAa;AACnC,QAAM,YAAY,SAAS,UAAU;AACrC,QAAM,mBAAmB,OAAO,YAAY,SAAS,iBAAiB,CAAC;AAEvE,WAAS,KAAK;AACd,OAAK,KAAK;AACV,OAAK,KAAK;AAEV,SAAO;IACL,WAAW,aAAa;IACxB,kBAAkB,wBAAwB,gBAAgB;EAC5D;AACF;AAEA,SAAS,wBAAwB,OAAwD;AACvF,QAAM,aAAqC,CAAC;AAE5C,aAAW,CAAC,KAAK,KAAK,KAAK,OAAO,QAAQ,KAAK,GAAG;AAChD,QAAI,UAAU,QAAQ,UAAU,QAAW;AACzC,iBAAW,GAAG,IAAI;AAClB;IACF;AACA,eAAW,GAAG,IAAI,OAAO,UAAU,WAAW,QAAQ,OAAO,KAAK;EACpE;AAEA,SAAO;AACT;AAEA,eAAsB,sBACpB,eAAe,iBACW;AAC1B,QAAM,UAAU,MAAM,GAAG,QAAQ,KAAK,KAAK,OAAO,GAAG,cAAc,CAAC;AACpE,QAAM,WAAW,aAAa,QAAQ,UAAU,GAAG;AACnD,QAAM,WAAW,KAAK,KAAK,SAAS,GAAG,WAAW,CAAC,IAAI,QAAQ,EAAE;AACjE,QAAM,cAAc,kBAAkB,QAAQ;AAE9C,QAAM,SAAS,QAAQ,OAAO,WAAW;AAEzC,SAAO;IACL,MAAM;IACN,SAAS,YAAY;AACnB,YAAM,GAAG,GAAG,SAAS,EAAE,WAAW,MAAM,OAAO,KAAK,CAAC;IACvD;EACF;AACF;AAEA,eAAsB,0BACpB,eAAe,iBACf,SACgB;AAChB,QAAM,OAAO,MAAM,sBAAsB,YAAY;AAErD,MAAI;AACF,WAAO,MAAM,yBAAyB,KAAK,MAAM,OAAO;EAC1D,UAAA;AACE,UAAM,KAAK,QAAQ;EACrB;AACF;","names":[]}
|
package/dist/chunk-E6TEBKS4.js
DELETED
|
@@ -1,166 +0,0 @@
|
|
|
1
|
-
var __create = Object.create;
|
|
2
|
-
var __defProp = Object.defineProperty;
|
|
3
|
-
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
|
-
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
-
var __getProtoOf = Object.getPrototypeOf;
|
|
6
|
-
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
7
|
-
var __require = /* @__PURE__ */ ((x) => typeof require !== "undefined" ? require : typeof Proxy !== "undefined" ? new Proxy(x, {
|
|
8
|
-
get: (a, b) => (typeof require !== "undefined" ? require : a)[b]
|
|
9
|
-
}) : x)(function(x) {
|
|
10
|
-
if (typeof require !== "undefined") return require.apply(this, arguments);
|
|
11
|
-
throw Error('Dynamic require of "' + x + '" is not supported');
|
|
12
|
-
});
|
|
13
|
-
var __commonJS = (cb, mod) => function __require2() {
|
|
14
|
-
return mod || (0, cb[__getOwnPropNames(cb)[0]])((mod = { exports: {} }).exports, mod), mod.exports;
|
|
15
|
-
};
|
|
16
|
-
var __copyProps = (to, from, except, desc) => {
|
|
17
|
-
if (from && typeof from === "object" || typeof from === "function") {
|
|
18
|
-
for (let key of __getOwnPropNames(from))
|
|
19
|
-
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
20
|
-
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
21
|
-
}
|
|
22
|
-
return to;
|
|
23
|
-
};
|
|
24
|
-
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
|
|
25
|
-
// If the importer is in node compatibility mode or this is not an ESM
|
|
26
|
-
// file that has been converted to a CommonJS file using a Babel-
|
|
27
|
-
// compatible transform (i.e. "__esModule" has not been set), then set
|
|
28
|
-
// "default" to the CommonJS "module.exports" for node compatibility.
|
|
29
|
-
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
30
|
-
mod
|
|
31
|
-
));
|
|
32
|
-
|
|
33
|
-
// ../../packages/parquet-reader/dist/index.js
|
|
34
|
-
var require_dist = __commonJS({
|
|
35
|
-
"../../packages/parquet-reader/dist/index.js"(exports, module) {
|
|
36
|
-
"use strict";
|
|
37
|
-
var __create2 = Object.create;
|
|
38
|
-
var __defProp2 = Object.defineProperty;
|
|
39
|
-
var __getOwnPropDesc2 = Object.getOwnPropertyDescriptor;
|
|
40
|
-
var __getOwnPropNames2 = Object.getOwnPropertyNames;
|
|
41
|
-
var __getProtoOf2 = Object.getPrototypeOf;
|
|
42
|
-
var __hasOwnProp2 = Object.prototype.hasOwnProperty;
|
|
43
|
-
var __export = (target, all) => {
|
|
44
|
-
for (var name in all)
|
|
45
|
-
__defProp2(target, name, { get: all[name], enumerable: true });
|
|
46
|
-
};
|
|
47
|
-
var __copyProps2 = (to, from, except, desc) => {
|
|
48
|
-
if (from && typeof from === "object" || typeof from === "function") {
|
|
49
|
-
for (let key of __getOwnPropNames2(from))
|
|
50
|
-
if (!__hasOwnProp2.call(to, key) && key !== except)
|
|
51
|
-
__defProp2(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc2(from, key)) || desc.enumerable });
|
|
52
|
-
}
|
|
53
|
-
return to;
|
|
54
|
-
};
|
|
55
|
-
var __toESM2 = (mod, isNodeMode, target) => (target = mod != null ? __create2(__getProtoOf2(mod)) : {}, __copyProps2(
|
|
56
|
-
// If the importer is in node compatibility mode or this is not an ESM
|
|
57
|
-
// file that has been converted to a CommonJS file using a Babel-
|
|
58
|
-
// compatible transform (i.e. "__esModule" has not been set), then set
|
|
59
|
-
// "default" to the CommonJS "module.exports" for node compatibility.
|
|
60
|
-
isNodeMode || !mod || !mod.__esModule ? __defProp2(target, "default", { value: mod, enumerable: true }) : target,
|
|
61
|
-
mod
|
|
62
|
-
));
|
|
63
|
-
var __toCommonJS = (mod) => __copyProps2(__defProp2({}, "__esModule", { value: true }), mod);
|
|
64
|
-
var index_exports = {};
|
|
65
|
-
__export(index_exports, {
|
|
66
|
-
bufferStdinToTempFile: () => bufferStdinToTempFile,
|
|
67
|
-
createParquetBufferSource: () => createParquetBufferSource,
|
|
68
|
-
openParquetBufferFromPath: () => openParquetBufferFromPath,
|
|
69
|
-
readParquetMetadataFromBuffer: () => readParquetMetadataFromBuffer,
|
|
70
|
-
readParquetTableFromBuffer: () => readParquetTableFromBuffer,
|
|
71
|
-
readParquetTableFromPath: () => readParquetTableFromPath,
|
|
72
|
-
readParquetTableFromStdin: () => readParquetTableFromStdin
|
|
73
|
-
});
|
|
74
|
-
module.exports = __toCommonJS(index_exports);
|
|
75
|
-
var import_node_buffer = __require("buffer");
|
|
76
|
-
var import_node_fs = __require("fs");
|
|
77
|
-
var import_node_fs2 = __require("fs");
|
|
78
|
-
var import_node_crypto = __require("crypto");
|
|
79
|
-
var import_node_os = __require("os");
|
|
80
|
-
var import_node_path = __toESM2(__require("path"));
|
|
81
|
-
var import_promises = __require("stream/promises");
|
|
82
|
-
var import_apache_arrow = __require("apache-arrow");
|
|
83
|
-
var import_parquet_wasm = __require("parquet-wasm");
|
|
84
|
-
var BlobCtor = typeof Blob === "undefined" ? import_node_buffer.Blob : Blob;
|
|
85
|
-
function readParquetTableFromBuffer(buffer, options) {
|
|
86
|
-
const wasmTable = (0, import_parquet_wasm.readParquet)(buffer, options ?? void 0);
|
|
87
|
-
const ipcStream = wasmTable.intoIPCStream();
|
|
88
|
-
return (0, import_apache_arrow.tableFromIPC)(ipcStream);
|
|
89
|
-
}
|
|
90
|
-
function createParquetBufferSource(buffer) {
|
|
91
|
-
let metadataPromise = null;
|
|
92
|
-
return {
|
|
93
|
-
buffer,
|
|
94
|
-
byteLength: buffer.byteLength,
|
|
95
|
-
readTable: (options) => readParquetTableFromBuffer(buffer, options),
|
|
96
|
-
readMetadata: () => {
|
|
97
|
-
if (!metadataPromise) {
|
|
98
|
-
metadataPromise = readParquetMetadataFromBuffer(buffer);
|
|
99
|
-
}
|
|
100
|
-
return metadataPromise;
|
|
101
|
-
}
|
|
102
|
-
};
|
|
103
|
-
}
|
|
104
|
-
async function openParquetBufferFromPath(filePath) {
|
|
105
|
-
const buffer = await import_node_fs2.promises.readFile(filePath);
|
|
106
|
-
return createParquetBufferSource(buffer);
|
|
107
|
-
}
|
|
108
|
-
async function readParquetTableFromPath(filePath, options) {
|
|
109
|
-
const buffer = await import_node_fs2.promises.readFile(filePath);
|
|
110
|
-
return readParquetTableFromBuffer(buffer, options);
|
|
111
|
-
}
|
|
112
|
-
async function readParquetMetadataFromBuffer(buffer) {
|
|
113
|
-
const blobInput = new Uint8Array(buffer).buffer;
|
|
114
|
-
const file = await import_parquet_wasm.ParquetFile.fromFile(new BlobCtor([blobInput]));
|
|
115
|
-
const meta = file.metadata();
|
|
116
|
-
const fileMeta = meta.fileMetadata();
|
|
117
|
-
const createdBy = fileMeta.createdBy();
|
|
118
|
-
const keyValueMetadata = Object.fromEntries(fileMeta.keyValueMetadata());
|
|
119
|
-
fileMeta.free();
|
|
120
|
-
meta.free();
|
|
121
|
-
file.free();
|
|
122
|
-
return {
|
|
123
|
-
createdBy: createdBy ?? void 0,
|
|
124
|
-
keyValueMetadata: normalizeMetadataValues(keyValueMetadata)
|
|
125
|
-
};
|
|
126
|
-
}
|
|
127
|
-
function normalizeMetadataValues(input) {
|
|
128
|
-
const normalized = {};
|
|
129
|
-
for (const [key, value] of Object.entries(input)) {
|
|
130
|
-
if (value === null || value === void 0) {
|
|
131
|
-
normalized[key] = "";
|
|
132
|
-
continue;
|
|
133
|
-
}
|
|
134
|
-
normalized[key] = typeof value === "string" ? value : String(value);
|
|
135
|
-
}
|
|
136
|
-
return normalized;
|
|
137
|
-
}
|
|
138
|
-
async function bufferStdinToTempFile(filenameHint = "stdin.parquet") {
|
|
139
|
-
const tempDir = await import_node_fs2.promises.mkdtemp(import_node_path.default.join((0, import_node_os.tmpdir)(), "parquetlens-"));
|
|
140
|
-
const safeName = filenameHint.replace(/[\\/]/g, "_");
|
|
141
|
-
const filePath = import_node_path.default.join(tempDir, `${(0, import_node_crypto.randomUUID)()}-${safeName}`);
|
|
142
|
-
const writeStream = (0, import_node_fs.createWriteStream)(filePath);
|
|
143
|
-
await (0, import_promises.pipeline)(process.stdin, writeStream);
|
|
144
|
-
return {
|
|
145
|
-
path: filePath,
|
|
146
|
-
cleanup: async () => {
|
|
147
|
-
await import_node_fs2.promises.rm(tempDir, { recursive: true, force: true });
|
|
148
|
-
}
|
|
149
|
-
};
|
|
150
|
-
}
|
|
151
|
-
async function readParquetTableFromStdin(filenameHint = "stdin.parquet", options) {
|
|
152
|
-
const temp = await bufferStdinToTempFile(filenameHint);
|
|
153
|
-
try {
|
|
154
|
-
return await readParquetTableFromPath(temp.path, options);
|
|
155
|
-
} finally {
|
|
156
|
-
await temp.cleanup();
|
|
157
|
-
}
|
|
158
|
-
}
|
|
159
|
-
}
|
|
160
|
-
});
|
|
161
|
-
|
|
162
|
-
export {
|
|
163
|
-
__toESM,
|
|
164
|
-
require_dist
|
|
165
|
-
};
|
|
166
|
-
//# sourceMappingURL=chunk-E6TEBKS4.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"sources":["../../../packages/parquet-reader/src/index.ts"],"sourcesContent":["import { Blob as NodeBlob } from \"node:buffer\";\nimport { createWriteStream } from \"node:fs\";\nimport { promises as fs } from \"node:fs\";\nimport { randomUUID } from \"node:crypto\";\nimport { tmpdir } from \"node:os\";\nimport path from \"node:path\";\nimport { pipeline } from \"node:stream/promises\";\n\nimport { tableFromIPC, Table } from \"apache-arrow\";\nimport { ParquetFile, readParquet, ReaderOptions } from \"parquet-wasm\";\n\nconst BlobCtor: typeof Blob =\n typeof Blob === \"undefined\" ? (NodeBlob as unknown as typeof Blob) : Blob;\n\nexport type TempParquetFile = {\n path: string;\n cleanup: () => Promise<void>;\n};\n\nexport type ParquetReadOptions = Pick<\n ReaderOptions,\n \"batchSize\" | \"columns\" | \"limit\" | \"offset\" | \"rowGroups\"\n>;\n\nexport type ParquetFileMetadata = {\n createdBy?: string;\n keyValueMetadata: Record<string, string>;\n};\n\nexport type ParquetBufferSource = {\n buffer: Uint8Array;\n byteLength: number;\n readTable: (options?: ParquetReadOptions) => Table;\n readMetadata: () => Promise<ParquetFileMetadata>;\n};\n\nexport function readParquetTableFromBuffer(\n buffer: Uint8Array,\n options?: ParquetReadOptions,\n): Table {\n const wasmTable = readParquet(buffer, options ?? undefined);\n const ipcStream = wasmTable.intoIPCStream();\n return tableFromIPC(ipcStream);\n}\n\nexport function createParquetBufferSource(buffer: Uint8Array): ParquetBufferSource {\n let metadataPromise: Promise<ParquetFileMetadata> | null = null;\n\n return {\n buffer,\n byteLength: buffer.byteLength,\n readTable: (options?: ParquetReadOptions) => readParquetTableFromBuffer(buffer, options),\n readMetadata: () => {\n if (!metadataPromise) {\n metadataPromise = readParquetMetadataFromBuffer(buffer);\n }\n return metadataPromise;\n },\n };\n}\n\nexport async function openParquetBufferFromPath(filePath: string): Promise<ParquetBufferSource> {\n const buffer = await fs.readFile(filePath);\n return createParquetBufferSource(buffer);\n}\n\nexport async function readParquetTableFromPath(\n filePath: string,\n options?: ParquetReadOptions,\n): Promise<Table> {\n const buffer = await fs.readFile(filePath);\n return readParquetTableFromBuffer(buffer, options);\n}\n\nexport async function readParquetMetadataFromBuffer(\n buffer: Uint8Array,\n): Promise<ParquetFileMetadata> {\n const blobInput = new Uint8Array(buffer).buffer as ArrayBuffer;\n const file = await ParquetFile.fromFile(new BlobCtor([blobInput]));\n const meta = file.metadata();\n const fileMeta = meta.fileMetadata();\n const createdBy = fileMeta.createdBy();\n const keyValueMetadata = Object.fromEntries(fileMeta.keyValueMetadata());\n\n fileMeta.free();\n meta.free();\n file.free();\n\n return {\n createdBy: createdBy ?? undefined,\n keyValueMetadata: normalizeMetadataValues(keyValueMetadata),\n };\n}\n\nfunction normalizeMetadataValues(input: Record<string, unknown>): Record<string, string> {\n const normalized: Record<string, string> = {};\n\n for (const [key, value] of Object.entries(input)) {\n if (value === null || value === undefined) {\n normalized[key] = \"\";\n continue;\n }\n normalized[key] = typeof value === \"string\" ? value : String(value);\n }\n\n return normalized;\n}\n\nexport async function bufferStdinToTempFile(\n filenameHint = \"stdin.parquet\",\n): Promise<TempParquetFile> {\n const tempDir = await fs.mkdtemp(path.join(tmpdir(), \"parquetlens-\"));\n const safeName = filenameHint.replace(/[\\\\/]/g, \"_\");\n const filePath = path.join(tempDir, `${randomUUID()}-${safeName}`);\n const writeStream = createWriteStream(filePath);\n\n await pipeline(process.stdin, writeStream);\n\n return {\n path: filePath,\n cleanup: async () => {\n await fs.rm(tempDir, { recursive: true, force: true });\n },\n };\n}\n\nexport async function readParquetTableFromStdin(\n filenameHint = \"stdin.parquet\",\n options?: ParquetReadOptions,\n): Promise<Table> {\n const temp = await bufferStdinToTempFile(filenameHint);\n\n try {\n return await readParquetTableFromPath(temp.path, options);\n } finally {\n await temp.cleanup();\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,QAAA,gBAAA,CAAA;AAAA,aAAA,eAAA;MAAA,uBAAA,MAAA;MAAA,2BAAA,MAAA;MAAA,2BAAA,MAAA;MAAA,+BAAA,MAAA;MAAA,4BAAA,MAAA;MAAA,0BAAA,MAAA;MAAA,2BAAA,MAAA;IAAA,CAAA;AAAA,WAAA,UAAA,aAAA,aAAA;AAAA,QAAA,qBAAiC,UAAA,QAAA;AACjC,QAAA,iBAAkC,UAAA,IAAA;AAClC,QAAAA,kBAA+B,UAAA,IAAA;AAC/B,QAAA,qBAA2B,UAAA,QAAA;AAC3B,QAAA,iBAAuB,UAAA,IAAA;AACvB,QAAA,mBAAiBC,SAAA,UAAA,MAAA,CAAA;AACjB,QAAA,kBAAyB,UAAA,iBAAA;AAEzB,QAAA,sBAAoC,UAAA,cAAA;AACpC,QAAA,sBAAwD,UAAA,cAAA;AAExD,QAAM,WACJ,OAAO,SAAS,cAAe,mBAAAC,OAAsC;AAwBhE,aAAS,2BACd,QACA,SACO;AACP,YAAM,aAAA,GAAY,oBAAA,aAAY,QAAQ,WAAW,MAAS;AAC1D,YAAM,YAAY,UAAU,cAAc;AAC1C,cAAA,GAAO,oBAAA,cAAa,SAAS;IAC/B;AAEO,aAAS,0BAA0B,QAAyC;AACjF,UAAI,kBAAuD;AAE3D,aAAO;QACL;QACA,YAAY,OAAO;QACnB,WAAW,CAAC,YAAiC,2BAA2B,QAAQ,OAAO;QACvF,cAAc,MAAM;AAClB,cAAI,CAAC,iBAAiB;AACpB,8BAAkB,8BAA8B,MAAM;UACxD;AACA,iBAAO;QACT;MACF;IACF;AAEA,mBAAsB,0BAA0B,UAAgD;AAC9F,YAAM,SAAS,MAAM,gBAAAC,SAAG,SAAS,QAAQ;AACzC,aAAO,0BAA0B,MAAM;IACzC;AAEA,mBAAsB,yBACpB,UACA,SACgB;AAChB,YAAM,SAAS,MAAM,gBAAAA,SAAG,SAAS,QAAQ;AACzC,aAAO,2BAA2B,QAAQ,OAAO;IACnD;AAEA,mBAAsB,8BACpB,QAC8B;AAC9B,YAAM,YAAY,IAAI,WAAW,MAAM,EAAE;AACzC,YAAM,OAAO,MAAM,oBAAA,YAAY,SAAS,IAAI,SAAS,CAAC,SAAS,CAAC,CAAC;AACjE,YAAM,OAAO,KAAK,SAAS;AAC3B,YAAM,WAAW,KAAK,aAAa;AACnC,YAAM,YAAY,SAAS,UAAU;AACrC,YAAM,mBAAmB,OAAO,YAAY,SAAS,iBAAiB,CAAC;AAEvE,eAAS,KAAK;AACd,WAAK,KAAK;AACV,WAAK,KAAK;AAEV,aAAO;QACL,WAAW,aAAa;QACxB,kBAAkB,wBAAwB,gBAAgB;MAC5D;IACF;AAEA,aAAS,wBAAwB,OAAwD;AACvF,YAAM,aAAqC,CAAC;AAE5C,iBAAW,CAAC,KAAK,KAAK,KAAK,OAAO,QAAQ,KAAK,GAAG;AAChD,YAAI,UAAU,QAAQ,UAAU,QAAW;AACzC,qBAAW,GAAG,IAAI;AAClB;QACF;AACA,mBAAW,GAAG,IAAI,OAAO,UAAU,WAAW,QAAQ,OAAO,KAAK;MACpE;AAEA,aAAO;IACT;AAEA,mBAAsB,sBACpB,eAAe,iBACW;AAC1B,YAAM,UAAU,MAAM,gBAAAA,SAAG,QAAQ,iBAAAC,QAAK,MAAA,GAAK,eAAA,QAAO,GAAG,cAAc,CAAC;AACpE,YAAM,WAAW,aAAa,QAAQ,UAAU,GAAG;AACnD,YAAM,WAAW,iBAAAA,QAAK,KAAK,SAAS,IAAA,GAAG,mBAAA,YAAW,CAAC,IAAI,QAAQ,EAAE;AACjE,YAAM,eAAA,GAAc,eAAA,mBAAkB,QAAQ;AAE9C,aAAA,GAAM,gBAAA,UAAS,QAAQ,OAAO,WAAW;AAEzC,aAAO;QACL,MAAM;QACN,SAAS,YAAY;AACnB,gBAAM,gBAAAD,SAAG,GAAG,SAAS,EAAE,WAAW,MAAM,OAAO,KAAK,CAAC;QACvD;MACF;IACF;AAEA,mBAAsB,0BACpB,eAAe,iBACf,SACgB;AAChB,YAAM,OAAO,MAAM,sBAAsB,YAAY;AAErD,UAAI;AACF,eAAO,MAAM,yBAAyB,KAAK,MAAM,OAAO;MAC1D,UAAA;AACE,cAAM,KAAK,QAAQ;MACrB;IACF;;;","names":["import_node_fs","__toESM","NodeBlob","fs","path"]}
|
package/dist/chunk-JOHKCQYH.js
DELETED
|
@@ -1,99 +0,0 @@
|
|
|
1
|
-
import { createRequire } from 'module';
|
|
2
|
-
import { fileURLToPath } from 'url';
|
|
3
|
-
import { dirname } from 'path';
|
|
4
|
-
const require = createRequire(import.meta.url);
|
|
5
|
-
const __filename = fileURLToPath(import.meta.url);
|
|
6
|
-
const __dirname = dirname(__filename);
|
|
7
|
-
|
|
8
|
-
// ../../packages/parquet-reader/dist/index.js
|
|
9
|
-
import { Blob as NodeBlob } from "buffer";
|
|
10
|
-
import { createWriteStream } from "fs";
|
|
11
|
-
import { promises as fs } from "fs";
|
|
12
|
-
import { randomUUID } from "crypto";
|
|
13
|
-
import { tmpdir } from "os";
|
|
14
|
-
import path from "path";
|
|
15
|
-
import { pipeline } from "stream/promises";
|
|
16
|
-
import { tableFromIPC } from "apache-arrow";
|
|
17
|
-
import { ParquetFile, readParquet } from "parquet-wasm/esm";
|
|
18
|
-
var BlobCtor = typeof Blob === "undefined" ? NodeBlob : Blob;
|
|
19
|
-
function readParquetTableFromBuffer(buffer, options) {
|
|
20
|
-
const wasmTable = readParquet(buffer, options ?? void 0);
|
|
21
|
-
const ipcStream = wasmTable.intoIPCStream();
|
|
22
|
-
return tableFromIPC(ipcStream);
|
|
23
|
-
}
|
|
24
|
-
function createParquetBufferSource(buffer) {
|
|
25
|
-
let metadataPromise = null;
|
|
26
|
-
return {
|
|
27
|
-
buffer,
|
|
28
|
-
byteLength: buffer.byteLength,
|
|
29
|
-
readTable: (options) => readParquetTableFromBuffer(buffer, options),
|
|
30
|
-
readMetadata: () => {
|
|
31
|
-
if (!metadataPromise) {
|
|
32
|
-
metadataPromise = readParquetMetadataFromBuffer(buffer);
|
|
33
|
-
}
|
|
34
|
-
return metadataPromise;
|
|
35
|
-
}
|
|
36
|
-
};
|
|
37
|
-
}
|
|
38
|
-
async function openParquetBufferFromPath(filePath) {
|
|
39
|
-
const buffer = await fs.readFile(filePath);
|
|
40
|
-
return createParquetBufferSource(buffer);
|
|
41
|
-
}
|
|
42
|
-
async function readParquetTableFromPath(filePath, options) {
|
|
43
|
-
const buffer = await fs.readFile(filePath);
|
|
44
|
-
return readParquetTableFromBuffer(buffer, options);
|
|
45
|
-
}
|
|
46
|
-
async function readParquetMetadataFromBuffer(buffer) {
|
|
47
|
-
const blobInput = new Uint8Array(buffer).buffer;
|
|
48
|
-
const file = await ParquetFile.fromFile(new BlobCtor([blobInput]));
|
|
49
|
-
const meta = file.metadata();
|
|
50
|
-
const fileMeta = meta.fileMetadata();
|
|
51
|
-
const createdBy = fileMeta.createdBy();
|
|
52
|
-
const keyValueMetadata = Object.fromEntries(fileMeta.keyValueMetadata());
|
|
53
|
-
fileMeta.free();
|
|
54
|
-
meta.free();
|
|
55
|
-
file.free();
|
|
56
|
-
return {
|
|
57
|
-
createdBy: createdBy ?? void 0,
|
|
58
|
-
keyValueMetadata: normalizeMetadataValues(keyValueMetadata)
|
|
59
|
-
};
|
|
60
|
-
}
|
|
61
|
-
function normalizeMetadataValues(input) {
|
|
62
|
-
const normalized = {};
|
|
63
|
-
for (const [key, value] of Object.entries(input)) {
|
|
64
|
-
if (value === null || value === void 0) {
|
|
65
|
-
normalized[key] = "";
|
|
66
|
-
continue;
|
|
67
|
-
}
|
|
68
|
-
normalized[key] = typeof value === "string" ? value : String(value);
|
|
69
|
-
}
|
|
70
|
-
return normalized;
|
|
71
|
-
}
|
|
72
|
-
async function bufferStdinToTempFile(filenameHint = "stdin.parquet") {
|
|
73
|
-
const tempDir = await fs.mkdtemp(path.join(tmpdir(), "parquetlens-"));
|
|
74
|
-
const safeName = filenameHint.replace(/[\\/]/g, "_");
|
|
75
|
-
const filePath = path.join(tempDir, `${randomUUID()}-${safeName}`);
|
|
76
|
-
const writeStream = createWriteStream(filePath);
|
|
77
|
-
await pipeline(process.stdin, writeStream);
|
|
78
|
-
return {
|
|
79
|
-
path: filePath,
|
|
80
|
-
cleanup: async () => {
|
|
81
|
-
await fs.rm(tempDir, { recursive: true, force: true });
|
|
82
|
-
}
|
|
83
|
-
};
|
|
84
|
-
}
|
|
85
|
-
async function readParquetTableFromStdin(filenameHint = "stdin.parquet", options) {
|
|
86
|
-
const temp = await bufferStdinToTempFile(filenameHint);
|
|
87
|
-
try {
|
|
88
|
-
return await readParquetTableFromPath(temp.path, options);
|
|
89
|
-
} finally {
|
|
90
|
-
await temp.cleanup();
|
|
91
|
-
}
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
export {
|
|
95
|
-
openParquetBufferFromPath,
|
|
96
|
-
readParquetTableFromPath,
|
|
97
|
-
readParquetTableFromStdin
|
|
98
|
-
};
|
|
99
|
-
//# sourceMappingURL=chunk-JOHKCQYH.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"sources":["../../../packages/parquet-reader/src/index.ts"],"sourcesContent":["import { Blob as NodeBlob } from \"node:buffer\";\nimport { createWriteStream } from \"node:fs\";\nimport { promises as fs } from \"node:fs\";\nimport { randomUUID } from \"node:crypto\";\nimport { tmpdir } from \"node:os\";\nimport path from \"node:path\";\nimport { pipeline } from \"node:stream/promises\";\n\nimport { tableFromIPC, Table } from \"apache-arrow\";\nimport { ParquetFile, readParquet, type ReaderOptions } from \"parquet-wasm/esm\";\n\nconst BlobCtor: typeof Blob =\n typeof Blob === \"undefined\" ? (NodeBlob as unknown as typeof Blob) : Blob;\n\nexport type TempParquetFile = {\n path: string;\n cleanup: () => Promise<void>;\n};\n\nexport type ParquetReadOptions = Pick<\n ReaderOptions,\n \"batchSize\" | \"columns\" | \"limit\" | \"offset\" | \"rowGroups\"\n>;\n\nexport type ParquetFileMetadata = {\n createdBy?: string;\n keyValueMetadata: Record<string, string>;\n};\n\nexport type ParquetBufferSource = {\n buffer: Uint8Array;\n byteLength: number;\n readTable: (options?: ParquetReadOptions) => Table;\n readMetadata: () => Promise<ParquetFileMetadata>;\n};\n\nexport function readParquetTableFromBuffer(\n buffer: Uint8Array,\n options?: ParquetReadOptions,\n): Table {\n const wasmTable = readParquet(buffer, options ?? undefined);\n const ipcStream = wasmTable.intoIPCStream();\n return tableFromIPC(ipcStream);\n}\n\nexport function createParquetBufferSource(buffer: Uint8Array): ParquetBufferSource {\n let metadataPromise: Promise<ParquetFileMetadata> | null = null;\n\n return {\n buffer,\n byteLength: buffer.byteLength,\n readTable: (options?: ParquetReadOptions) => readParquetTableFromBuffer(buffer, options),\n readMetadata: () => {\n if (!metadataPromise) {\n metadataPromise = readParquetMetadataFromBuffer(buffer);\n }\n return metadataPromise;\n },\n };\n}\n\nexport async function openParquetBufferFromPath(filePath: string): Promise<ParquetBufferSource> {\n const buffer = await fs.readFile(filePath);\n return createParquetBufferSource(buffer);\n}\n\nexport async function readParquetTableFromPath(\n filePath: string,\n options?: ParquetReadOptions,\n): Promise<Table> {\n const buffer = await fs.readFile(filePath);\n return readParquetTableFromBuffer(buffer, options);\n}\n\nexport async function readParquetMetadataFromBuffer(\n buffer: Uint8Array,\n): Promise<ParquetFileMetadata> {\n const blobInput = new Uint8Array(buffer).buffer as ArrayBuffer;\n const file = await ParquetFile.fromFile(new BlobCtor([blobInput]));\n const meta = file.metadata();\n const fileMeta = meta.fileMetadata();\n const createdBy = fileMeta.createdBy();\n const keyValueMetadata = Object.fromEntries(fileMeta.keyValueMetadata());\n\n fileMeta.free();\n meta.free();\n file.free();\n\n return {\n createdBy: createdBy ?? undefined,\n keyValueMetadata: normalizeMetadataValues(keyValueMetadata),\n };\n}\n\nfunction normalizeMetadataValues(input: Record<string, unknown>): Record<string, string> {\n const normalized: Record<string, string> = {};\n\n for (const [key, value] of Object.entries(input)) {\n if (value === null || value === undefined) {\n normalized[key] = \"\";\n continue;\n }\n normalized[key] = typeof value === \"string\" ? value : String(value);\n }\n\n return normalized;\n}\n\nexport async function bufferStdinToTempFile(\n filenameHint = \"stdin.parquet\",\n): Promise<TempParquetFile> {\n const tempDir = await fs.mkdtemp(path.join(tmpdir(), \"parquetlens-\"));\n const safeName = filenameHint.replace(/[\\\\/]/g, \"_\");\n const filePath = path.join(tempDir, `${randomUUID()}-${safeName}`);\n const writeStream = createWriteStream(filePath);\n\n await pipeline(process.stdin, writeStream);\n\n return {\n path: filePath,\n cleanup: async () => {\n await fs.rm(tempDir, { recursive: true, force: true });\n },\n };\n}\n\nexport async function readParquetTableFromStdin(\n filenameHint = \"stdin.parquet\",\n options?: ParquetReadOptions,\n): Promise<Table> {\n const temp = await bufferStdinToTempFile(filenameHint);\n\n try {\n return await readParquetTableFromPath(temp.path, options);\n } finally {\n await temp.cleanup();\n }\n}\n"],"mappings":";;;;;;;;AAAA,SAAS,QAAQ,gBAAgB;AACjC,SAAS,yBAAyB;AAClC,SAAS,YAAY,UAAU;AAC/B,SAAS,kBAAkB;AAC3B,SAAS,cAAc;AACvB,OAAO,UAAU;AACjB,SAAS,gBAAgB;AAEzB,SAAS,oBAA2B;AACpC,SAAS,aAAa,mBAAuC;AAE7D,IAAM,WACJ,OAAO,SAAS,cAAe,WAAsC;AAwBhE,SAAS,2BACd,QACA,SACO;AACP,QAAM,YAAY,YAAY,QAAQ,WAAW,MAAS;AAC1D,QAAM,YAAY,UAAU,cAAc;AAC1C,SAAO,aAAa,SAAS;AAC/B;AAEO,SAAS,0BAA0B,QAAyC;AACjF,MAAI,kBAAuD;AAE3D,SAAO;IACL;IACA,YAAY,OAAO;IACnB,WAAW,CAAC,YAAiC,2BAA2B,QAAQ,OAAO;IACvF,cAAc,MAAM;AAClB,UAAI,CAAC,iBAAiB;AACpB,0BAAkB,8BAA8B,MAAM;MACxD;AACA,aAAO;IACT;EACF;AACF;AAEA,eAAsB,0BAA0B,UAAgD;AAC9F,QAAM,SAAS,MAAM,GAAG,SAAS,QAAQ;AACzC,SAAO,0BAA0B,MAAM;AACzC;AAEA,eAAsB,yBACpB,UACA,SACgB;AAChB,QAAM,SAAS,MAAM,GAAG,SAAS,QAAQ;AACzC,SAAO,2BAA2B,QAAQ,OAAO;AACnD;AAEA,eAAsB,8BACpB,QAC8B;AAC9B,QAAM,YAAY,IAAI,WAAW,MAAM,EAAE;AACzC,QAAM,OAAO,MAAM,YAAY,SAAS,IAAI,SAAS,CAAC,SAAS,CAAC,CAAC;AACjE,QAAM,OAAO,KAAK,SAAS;AAC3B,QAAM,WAAW,KAAK,aAAa;AACnC,QAAM,YAAY,SAAS,UAAU;AACrC,QAAM,mBAAmB,OAAO,YAAY,SAAS,iBAAiB,CAAC;AAEvE,WAAS,KAAK;AACd,OAAK,KAAK;AACV,OAAK,KAAK;AAEV,SAAO;IACL,WAAW,aAAa;IACxB,kBAAkB,wBAAwB,gBAAgB;EAC5D;AACF;AAEA,SAAS,wBAAwB,OAAwD;AACvF,QAAM,aAAqC,CAAC;AAE5C,aAAW,CAAC,KAAK,KAAK,KAAK,OAAO,QAAQ,KAAK,GAAG;AAChD,QAAI,UAAU,QAAQ,UAAU,QAAW;AACzC,iBAAW,GAAG,IAAI;AAClB;IACF;AACA,eAAW,GAAG,IAAI,OAAO,UAAU,WAAW,QAAQ,OAAO,KAAK;EACpE;AAEA,SAAO;AACT;AAEA,eAAsB,sBACpB,eAAe,iBACW;AAC1B,QAAM,UAAU,MAAM,GAAG,QAAQ,KAAK,KAAK,OAAO,GAAG,cAAc,CAAC;AACpE,QAAM,WAAW,aAAa,QAAQ,UAAU,GAAG;AACnD,QAAM,WAAW,KAAK,KAAK,SAAS,GAAG,WAAW,CAAC,IAAI,QAAQ,EAAE;AACjE,QAAM,cAAc,kBAAkB,QAAQ;AAE9C,QAAM,SAAS,QAAQ,OAAO,WAAW;AAEzC,SAAO;IACL,MAAM;IACN,SAAS,YAAY;AACnB,YAAM,GAAG,GAAG,SAAS,EAAE,WAAW,MAAM,OAAO,KAAK,CAAC;IACvD;EACF;AACF;AAEA,eAAsB,0BACpB,eAAe,iBACf,SACgB;AAChB,QAAM,OAAO,MAAM,sBAAsB,YAAY;AAErD,MAAI;AACF,WAAO,MAAM,yBAAyB,KAAK,MAAM,OAAO;EAC1D,UAAA;AACE,UAAM,KAAK,QAAQ;EACrB;AACF;","names":[]}
|
package/dist/chunk-LHMHT2IQ.js
DELETED
|
@@ -1,99 +0,0 @@
|
|
|
1
|
-
import { createRequire } from 'module';
|
|
2
|
-
import { fileURLToPath } from 'url';
|
|
3
|
-
import { dirname } from 'path';
|
|
4
|
-
const require = createRequire(import.meta.url);
|
|
5
|
-
const __filename = fileURLToPath(import.meta.url);
|
|
6
|
-
const __dirname = dirname(__filename);
|
|
7
|
-
|
|
8
|
-
// ../../packages/parquet-reader/dist/index.js
|
|
9
|
-
import { Blob as NodeBlob } from "buffer";
|
|
10
|
-
import { createWriteStream } from "fs";
|
|
11
|
-
import { promises as fs } from "fs";
|
|
12
|
-
import { randomUUID } from "crypto";
|
|
13
|
-
import { tmpdir } from "os";
|
|
14
|
-
import path from "path";
|
|
15
|
-
import { pipeline } from "stream/promises";
|
|
16
|
-
import { tableFromIPC } from "apache-arrow";
|
|
17
|
-
import { ParquetFile, readParquet } from "parquet-wasm";
|
|
18
|
-
var BlobCtor = typeof Blob === "undefined" ? NodeBlob : Blob;
|
|
19
|
-
function readParquetTableFromBuffer(buffer, options) {
|
|
20
|
-
const wasmTable = readParquet(buffer, options ?? void 0);
|
|
21
|
-
const ipcStream = wasmTable.intoIPCStream();
|
|
22
|
-
return tableFromIPC(ipcStream);
|
|
23
|
-
}
|
|
24
|
-
function createParquetBufferSource(buffer) {
|
|
25
|
-
let metadataPromise = null;
|
|
26
|
-
return {
|
|
27
|
-
buffer,
|
|
28
|
-
byteLength: buffer.byteLength,
|
|
29
|
-
readTable: (options) => readParquetTableFromBuffer(buffer, options),
|
|
30
|
-
readMetadata: () => {
|
|
31
|
-
if (!metadataPromise) {
|
|
32
|
-
metadataPromise = readParquetMetadataFromBuffer(buffer);
|
|
33
|
-
}
|
|
34
|
-
return metadataPromise;
|
|
35
|
-
}
|
|
36
|
-
};
|
|
37
|
-
}
|
|
38
|
-
async function openParquetBufferFromPath(filePath) {
|
|
39
|
-
const buffer = await fs.readFile(filePath);
|
|
40
|
-
return createParquetBufferSource(buffer);
|
|
41
|
-
}
|
|
42
|
-
async function readParquetTableFromPath(filePath, options) {
|
|
43
|
-
const buffer = await fs.readFile(filePath);
|
|
44
|
-
return readParquetTableFromBuffer(buffer, options);
|
|
45
|
-
}
|
|
46
|
-
async function readParquetMetadataFromBuffer(buffer) {
|
|
47
|
-
const blobInput = new Uint8Array(buffer).buffer;
|
|
48
|
-
const file = await ParquetFile.fromFile(new BlobCtor([blobInput]));
|
|
49
|
-
const meta = file.metadata();
|
|
50
|
-
const fileMeta = meta.fileMetadata();
|
|
51
|
-
const createdBy = fileMeta.createdBy();
|
|
52
|
-
const keyValueMetadata = Object.fromEntries(fileMeta.keyValueMetadata());
|
|
53
|
-
fileMeta.free();
|
|
54
|
-
meta.free();
|
|
55
|
-
file.free();
|
|
56
|
-
return {
|
|
57
|
-
createdBy: createdBy ?? void 0,
|
|
58
|
-
keyValueMetadata: normalizeMetadataValues(keyValueMetadata)
|
|
59
|
-
};
|
|
60
|
-
}
|
|
61
|
-
function normalizeMetadataValues(input) {
|
|
62
|
-
const normalized = {};
|
|
63
|
-
for (const [key, value] of Object.entries(input)) {
|
|
64
|
-
if (value === null || value === void 0) {
|
|
65
|
-
normalized[key] = "";
|
|
66
|
-
continue;
|
|
67
|
-
}
|
|
68
|
-
normalized[key] = typeof value === "string" ? value : String(value);
|
|
69
|
-
}
|
|
70
|
-
return normalized;
|
|
71
|
-
}
|
|
72
|
-
async function bufferStdinToTempFile(filenameHint = "stdin.parquet") {
|
|
73
|
-
const tempDir = await fs.mkdtemp(path.join(tmpdir(), "parquetlens-"));
|
|
74
|
-
const safeName = filenameHint.replace(/[\\/]/g, "_");
|
|
75
|
-
const filePath = path.join(tempDir, `${randomUUID()}-${safeName}`);
|
|
76
|
-
const writeStream = createWriteStream(filePath);
|
|
77
|
-
await pipeline(process.stdin, writeStream);
|
|
78
|
-
return {
|
|
79
|
-
path: filePath,
|
|
80
|
-
cleanup: async () => {
|
|
81
|
-
await fs.rm(tempDir, { recursive: true, force: true });
|
|
82
|
-
}
|
|
83
|
-
};
|
|
84
|
-
}
|
|
85
|
-
async function readParquetTableFromStdin(filenameHint = "stdin.parquet", options) {
|
|
86
|
-
const temp = await bufferStdinToTempFile(filenameHint);
|
|
87
|
-
try {
|
|
88
|
-
return await readParquetTableFromPath(temp.path, options);
|
|
89
|
-
} finally {
|
|
90
|
-
await temp.cleanup();
|
|
91
|
-
}
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
export {
|
|
95
|
-
openParquetBufferFromPath,
|
|
96
|
-
readParquetTableFromPath,
|
|
97
|
-
readParquetTableFromStdin
|
|
98
|
-
};
|
|
99
|
-
//# sourceMappingURL=chunk-LHMHT2IQ.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"sources":["../../../packages/parquet-reader/src/index.ts"],"sourcesContent":["import { Blob as NodeBlob } from \"node:buffer\";\nimport { createWriteStream } from \"node:fs\";\nimport { promises as fs } from \"node:fs\";\nimport { randomUUID } from \"node:crypto\";\nimport { tmpdir } from \"node:os\";\nimport path from \"node:path\";\nimport { pipeline } from \"node:stream/promises\";\n\nimport { tableFromIPC, Table } from \"apache-arrow\";\nimport { ParquetFile, readParquet, ReaderOptions } from \"parquet-wasm\";\n\nconst BlobCtor: typeof Blob =\n typeof Blob === \"undefined\" ? (NodeBlob as unknown as typeof Blob) : Blob;\n\nexport type TempParquetFile = {\n path: string;\n cleanup: () => Promise<void>;\n};\n\nexport type ParquetReadOptions = Pick<\n ReaderOptions,\n \"batchSize\" | \"columns\" | \"limit\" | \"offset\" | \"rowGroups\"\n>;\n\nexport type ParquetFileMetadata = {\n createdBy?: string;\n keyValueMetadata: Record<string, string>;\n};\n\nexport type ParquetBufferSource = {\n buffer: Uint8Array;\n byteLength: number;\n readTable: (options?: ParquetReadOptions) => Table;\n readMetadata: () => Promise<ParquetFileMetadata>;\n};\n\nexport function readParquetTableFromBuffer(\n buffer: Uint8Array,\n options?: ParquetReadOptions,\n): Table {\n const wasmTable = readParquet(buffer, options ?? undefined);\n const ipcStream = wasmTable.intoIPCStream();\n return tableFromIPC(ipcStream);\n}\n\nexport function createParquetBufferSource(buffer: Uint8Array): ParquetBufferSource {\n let metadataPromise: Promise<ParquetFileMetadata> | null = null;\n\n return {\n buffer,\n byteLength: buffer.byteLength,\n readTable: (options?: ParquetReadOptions) => readParquetTableFromBuffer(buffer, options),\n readMetadata: () => {\n if (!metadataPromise) {\n metadataPromise = readParquetMetadataFromBuffer(buffer);\n }\n return metadataPromise;\n },\n };\n}\n\nexport async function openParquetBufferFromPath(filePath: string): Promise<ParquetBufferSource> {\n const buffer = await fs.readFile(filePath);\n return createParquetBufferSource(buffer);\n}\n\nexport async function readParquetTableFromPath(\n filePath: string,\n options?: ParquetReadOptions,\n): Promise<Table> {\n const buffer = await fs.readFile(filePath);\n return readParquetTableFromBuffer(buffer, options);\n}\n\nexport async function readParquetMetadataFromBuffer(\n buffer: Uint8Array,\n): Promise<ParquetFileMetadata> {\n const blobInput = new Uint8Array(buffer).buffer as ArrayBuffer;\n const file = await ParquetFile.fromFile(new BlobCtor([blobInput]));\n const meta = file.metadata();\n const fileMeta = meta.fileMetadata();\n const createdBy = fileMeta.createdBy();\n const keyValueMetadata = Object.fromEntries(fileMeta.keyValueMetadata());\n\n fileMeta.free();\n meta.free();\n file.free();\n\n return {\n createdBy: createdBy ?? undefined,\n keyValueMetadata: normalizeMetadataValues(keyValueMetadata),\n };\n}\n\nfunction normalizeMetadataValues(input: Record<string, unknown>): Record<string, string> {\n const normalized: Record<string, string> = {};\n\n for (const [key, value] of Object.entries(input)) {\n if (value === null || value === undefined) {\n normalized[key] = \"\";\n continue;\n }\n normalized[key] = typeof value === \"string\" ? value : String(value);\n }\n\n return normalized;\n}\n\nexport async function bufferStdinToTempFile(\n filenameHint = \"stdin.parquet\",\n): Promise<TempParquetFile> {\n const tempDir = await fs.mkdtemp(path.join(tmpdir(), \"parquetlens-\"));\n const safeName = filenameHint.replace(/[\\\\/]/g, \"_\");\n const filePath = path.join(tempDir, `${randomUUID()}-${safeName}`);\n const writeStream = createWriteStream(filePath);\n\n await pipeline(process.stdin, writeStream);\n\n return {\n path: filePath,\n cleanup: async () => {\n await fs.rm(tempDir, { recursive: true, force: true });\n },\n };\n}\n\nexport async function readParquetTableFromStdin(\n filenameHint = \"stdin.parquet\",\n options?: ParquetReadOptions,\n): Promise<Table> {\n const temp = await bufferStdinToTempFile(filenameHint);\n\n try {\n return await readParquetTableFromPath(temp.path, options);\n } finally {\n await temp.cleanup();\n }\n}\n"],"mappings":";;;;;;;;AAAA,SAAS,QAAQ,gBAAgB;AACjC,SAAS,yBAAyB;AAClC,SAAS,YAAY,UAAU;AAC/B,SAAS,kBAAkB;AAC3B,SAAS,cAAc;AACvB,OAAO,UAAU;AACjB,SAAS,gBAAgB;AAEzB,SAAS,oBAA2B;AACpC,SAAS,aAAa,mBAAkC;AAExD,IAAM,WACJ,OAAO,SAAS,cAAe,WAAsC;AAwBhE,SAAS,2BACd,QACA,SACO;AACP,QAAM,YAAY,YAAY,QAAQ,WAAW,MAAS;AAC1D,QAAM,YAAY,UAAU,cAAc;AAC1C,SAAO,aAAa,SAAS;AAC/B;AAEO,SAAS,0BAA0B,QAAyC;AACjF,MAAI,kBAAuD;AAE3D,SAAO;IACL;IACA,YAAY,OAAO;IACnB,WAAW,CAAC,YAAiC,2BAA2B,QAAQ,OAAO;IACvF,cAAc,MAAM;AAClB,UAAI,CAAC,iBAAiB;AACpB,0BAAkB,8BAA8B,MAAM;MACxD;AACA,aAAO;IACT;EACF;AACF;AAEA,eAAsB,0BAA0B,UAAgD;AAC9F,QAAM,SAAS,MAAM,GAAG,SAAS,QAAQ;AACzC,SAAO,0BAA0B,MAAM;AACzC;AAEA,eAAsB,yBACpB,UACA,SACgB;AAChB,QAAM,SAAS,MAAM,GAAG,SAAS,QAAQ;AACzC,SAAO,2BAA2B,QAAQ,OAAO;AACnD;AAEA,eAAsB,8BACpB,QAC8B;AAC9B,QAAM,YAAY,IAAI,WAAW,MAAM,EAAE;AACzC,QAAM,OAAO,MAAM,YAAY,SAAS,IAAI,SAAS,CAAC,SAAS,CAAC,CAAC;AACjE,QAAM,OAAO,KAAK,SAAS;AAC3B,QAAM,WAAW,KAAK,aAAa;AACnC,QAAM,YAAY,SAAS,UAAU;AACrC,QAAM,mBAAmB,OAAO,YAAY,SAAS,iBAAiB,CAAC;AAEvE,WAAS,KAAK;AACd,OAAK,KAAK;AACV,OAAK,KAAK;AAEV,SAAO;IACL,WAAW,aAAa;IACxB,kBAAkB,wBAAwB,gBAAgB;EAC5D;AACF;AAEA,SAAS,wBAAwB,OAAwD;AACvF,QAAM,aAAqC,CAAC;AAE5C,aAAW,CAAC,KAAK,KAAK,KAAK,OAAO,QAAQ,KAAK,GAAG;AAChD,QAAI,UAAU,QAAQ,UAAU,QAAW;AACzC,iBAAW,GAAG,IAAI;AAClB;IACF;AACA,eAAW,GAAG,IAAI,OAAO,UAAU,WAAW,QAAQ,OAAO,KAAK;EACpE;AAEA,SAAO;AACT;AAEA,eAAsB,sBACpB,eAAe,iBACW;AAC1B,QAAM,UAAU,MAAM,GAAG,QAAQ,KAAK,KAAK,OAAO,GAAG,cAAc,CAAC;AACpE,QAAM,WAAW,aAAa,QAAQ,UAAU,GAAG;AACnD,QAAM,WAAW,KAAK,KAAK,SAAS,GAAG,WAAW,CAAC,IAAI,QAAQ,EAAE;AACjE,QAAM,cAAc,kBAAkB,QAAQ;AAE9C,QAAM,SAAS,QAAQ,OAAO,WAAW;AAEzC,SAAO;IACL,MAAM;IACN,SAAS,YAAY;AACnB,YAAM,GAAG,GAAG,SAAS,EAAE,WAAW,MAAM,OAAO,KAAK,CAAC;IACvD;EACF;AACF;AAEA,eAAsB,0BACpB,eAAe,iBACf,SACgB;AAChB,QAAM,OAAO,MAAM,sBAAsB,YAAY;AAErD,MAAI;AACF,WAAO,MAAM,yBAAyB,KAAK,MAAM,OAAO;EAC1D,UAAA;AACE,UAAM,KAAK,QAAQ;EACrB;AACF;","names":[]}
|
package/dist/chunk-UUCD5YU4.js
DELETED
|
@@ -1,92 +0,0 @@
|
|
|
1
|
-
// ../../packages/parquet-reader/dist/index.js
|
|
2
|
-
import { Blob as NodeBlob } from "buffer";
|
|
3
|
-
import { createWriteStream } from "fs";
|
|
4
|
-
import { promises as fs } from "fs";
|
|
5
|
-
import { randomUUID } from "crypto";
|
|
6
|
-
import { tmpdir } from "os";
|
|
7
|
-
import path from "path";
|
|
8
|
-
import { pipeline } from "stream/promises";
|
|
9
|
-
import { tableFromIPC } from "apache-arrow";
|
|
10
|
-
import { ParquetFile, readParquet } from "parquet-wasm";
|
|
11
|
-
var BlobCtor = typeof Blob === "undefined" ? NodeBlob : Blob;
|
|
12
|
-
function readParquetTableFromBuffer(buffer, options) {
|
|
13
|
-
const wasmTable = readParquet(buffer, options ?? void 0);
|
|
14
|
-
const ipcStream = wasmTable.intoIPCStream();
|
|
15
|
-
return tableFromIPC(ipcStream);
|
|
16
|
-
}
|
|
17
|
-
function createParquetBufferSource(buffer) {
|
|
18
|
-
let metadataPromise = null;
|
|
19
|
-
return {
|
|
20
|
-
buffer,
|
|
21
|
-
byteLength: buffer.byteLength,
|
|
22
|
-
readTable: (options) => readParquetTableFromBuffer(buffer, options),
|
|
23
|
-
readMetadata: () => {
|
|
24
|
-
if (!metadataPromise) {
|
|
25
|
-
metadataPromise = readParquetMetadataFromBuffer(buffer);
|
|
26
|
-
}
|
|
27
|
-
return metadataPromise;
|
|
28
|
-
}
|
|
29
|
-
};
|
|
30
|
-
}
|
|
31
|
-
async function openParquetBufferFromPath(filePath) {
|
|
32
|
-
const buffer = await fs.readFile(filePath);
|
|
33
|
-
return createParquetBufferSource(buffer);
|
|
34
|
-
}
|
|
35
|
-
async function readParquetTableFromPath(filePath, options) {
|
|
36
|
-
const buffer = await fs.readFile(filePath);
|
|
37
|
-
return readParquetTableFromBuffer(buffer, options);
|
|
38
|
-
}
|
|
39
|
-
async function readParquetMetadataFromBuffer(buffer) {
|
|
40
|
-
const blobInput = new Uint8Array(buffer).buffer;
|
|
41
|
-
const file = await ParquetFile.fromFile(new BlobCtor([blobInput]));
|
|
42
|
-
const meta = file.metadata();
|
|
43
|
-
const fileMeta = meta.fileMetadata();
|
|
44
|
-
const createdBy = fileMeta.createdBy();
|
|
45
|
-
const keyValueMetadata = Object.fromEntries(fileMeta.keyValueMetadata());
|
|
46
|
-
fileMeta.free();
|
|
47
|
-
meta.free();
|
|
48
|
-
file.free();
|
|
49
|
-
return {
|
|
50
|
-
createdBy: createdBy ?? void 0,
|
|
51
|
-
keyValueMetadata: normalizeMetadataValues(keyValueMetadata)
|
|
52
|
-
};
|
|
53
|
-
}
|
|
54
|
-
function normalizeMetadataValues(input) {
|
|
55
|
-
const normalized = {};
|
|
56
|
-
for (const [key, value] of Object.entries(input)) {
|
|
57
|
-
if (value === null || value === void 0) {
|
|
58
|
-
normalized[key] = "";
|
|
59
|
-
continue;
|
|
60
|
-
}
|
|
61
|
-
normalized[key] = typeof value === "string" ? value : String(value);
|
|
62
|
-
}
|
|
63
|
-
return normalized;
|
|
64
|
-
}
|
|
65
|
-
async function bufferStdinToTempFile(filenameHint = "stdin.parquet") {
|
|
66
|
-
const tempDir = await fs.mkdtemp(path.join(tmpdir(), "parquetlens-"));
|
|
67
|
-
const safeName = filenameHint.replace(/[\\/]/g, "_");
|
|
68
|
-
const filePath = path.join(tempDir, `${randomUUID()}-${safeName}`);
|
|
69
|
-
const writeStream = createWriteStream(filePath);
|
|
70
|
-
await pipeline(process.stdin, writeStream);
|
|
71
|
-
return {
|
|
72
|
-
path: filePath,
|
|
73
|
-
cleanup: async () => {
|
|
74
|
-
await fs.rm(tempDir, { recursive: true, force: true });
|
|
75
|
-
}
|
|
76
|
-
};
|
|
77
|
-
}
|
|
78
|
-
async function readParquetTableFromStdin(filenameHint = "stdin.parquet", options) {
|
|
79
|
-
const temp = await bufferStdinToTempFile(filenameHint);
|
|
80
|
-
try {
|
|
81
|
-
return await readParquetTableFromPath(temp.path, options);
|
|
82
|
-
} finally {
|
|
83
|
-
await temp.cleanup();
|
|
84
|
-
}
|
|
85
|
-
}
|
|
86
|
-
|
|
87
|
-
export {
|
|
88
|
-
openParquetBufferFromPath,
|
|
89
|
-
readParquetTableFromPath,
|
|
90
|
-
readParquetTableFromStdin
|
|
91
|
-
};
|
|
92
|
-
//# sourceMappingURL=chunk-UUCD5YU4.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"sources":["../../../packages/parquet-reader/src/index.ts"],"sourcesContent":["import { Blob as NodeBlob } from \"node:buffer\";\nimport { createWriteStream } from \"node:fs\";\nimport { promises as fs } from \"node:fs\";\nimport { randomUUID } from \"node:crypto\";\nimport { tmpdir } from \"node:os\";\nimport path from \"node:path\";\nimport { pipeline } from \"node:stream/promises\";\n\nimport { tableFromIPC, Table } from \"apache-arrow\";\nimport { ParquetFile, readParquet, ReaderOptions } from \"parquet-wasm\";\n\nconst BlobCtor: typeof Blob =\n typeof Blob === \"undefined\" ? (NodeBlob as unknown as typeof Blob) : Blob;\n\nexport type TempParquetFile = {\n path: string;\n cleanup: () => Promise<void>;\n};\n\nexport type ParquetReadOptions = Pick<\n ReaderOptions,\n \"batchSize\" | \"columns\" | \"limit\" | \"offset\" | \"rowGroups\"\n>;\n\nexport type ParquetFileMetadata = {\n createdBy?: string;\n keyValueMetadata: Record<string, string>;\n};\n\nexport type ParquetBufferSource = {\n buffer: Uint8Array;\n byteLength: number;\n readTable: (options?: ParquetReadOptions) => Table;\n readMetadata: () => Promise<ParquetFileMetadata>;\n};\n\nexport function readParquetTableFromBuffer(\n buffer: Uint8Array,\n options?: ParquetReadOptions,\n): Table {\n const wasmTable = readParquet(buffer, options ?? undefined);\n const ipcStream = wasmTable.intoIPCStream();\n return tableFromIPC(ipcStream);\n}\n\nexport function createParquetBufferSource(buffer: Uint8Array): ParquetBufferSource {\n let metadataPromise: Promise<ParquetFileMetadata> | null = null;\n\n return {\n buffer,\n byteLength: buffer.byteLength,\n readTable: (options?: ParquetReadOptions) => readParquetTableFromBuffer(buffer, options),\n readMetadata: () => {\n if (!metadataPromise) {\n metadataPromise = readParquetMetadataFromBuffer(buffer);\n }\n return metadataPromise;\n },\n };\n}\n\nexport async function openParquetBufferFromPath(filePath: string): Promise<ParquetBufferSource> {\n const buffer = await fs.readFile(filePath);\n return createParquetBufferSource(buffer);\n}\n\nexport async function readParquetTableFromPath(\n filePath: string,\n options?: ParquetReadOptions,\n): Promise<Table> {\n const buffer = await fs.readFile(filePath);\n return readParquetTableFromBuffer(buffer, options);\n}\n\nexport async function readParquetMetadataFromBuffer(\n buffer: Uint8Array,\n): Promise<ParquetFileMetadata> {\n const blobInput = new Uint8Array(buffer).buffer as ArrayBuffer;\n const file = await ParquetFile.fromFile(new BlobCtor([blobInput]));\n const meta = file.metadata();\n const fileMeta = meta.fileMetadata();\n const createdBy = fileMeta.createdBy();\n const keyValueMetadata = Object.fromEntries(fileMeta.keyValueMetadata());\n\n fileMeta.free();\n meta.free();\n file.free();\n\n return {\n createdBy: createdBy ?? undefined,\n keyValueMetadata: normalizeMetadataValues(keyValueMetadata),\n };\n}\n\nfunction normalizeMetadataValues(input: Record<string, unknown>): Record<string, string> {\n const normalized: Record<string, string> = {};\n\n for (const [key, value] of Object.entries(input)) {\n if (value === null || value === undefined) {\n normalized[key] = \"\";\n continue;\n }\n normalized[key] = typeof value === \"string\" ? value : String(value);\n }\n\n return normalized;\n}\n\nexport async function bufferStdinToTempFile(\n filenameHint = \"stdin.parquet\",\n): Promise<TempParquetFile> {\n const tempDir = await fs.mkdtemp(path.join(tmpdir(), \"parquetlens-\"));\n const safeName = filenameHint.replace(/[\\\\/]/g, \"_\");\n const filePath = path.join(tempDir, `${randomUUID()}-${safeName}`);\n const writeStream = createWriteStream(filePath);\n\n await pipeline(process.stdin, writeStream);\n\n return {\n path: filePath,\n cleanup: async () => {\n await fs.rm(tempDir, { recursive: true, force: true });\n },\n };\n}\n\nexport async function readParquetTableFromStdin(\n filenameHint = \"stdin.parquet\",\n options?: ParquetReadOptions,\n): Promise<Table> {\n const temp = await bufferStdinToTempFile(filenameHint);\n\n try {\n return await readParquetTableFromPath(temp.path, options);\n } finally {\n await temp.cleanup();\n }\n}\n"],"mappings":";AAAA,SAAS,QAAQ,gBAAgB;AACjC,SAAS,yBAAyB;AAClC,SAAS,YAAY,UAAU;AAC/B,SAAS,kBAAkB;AAC3B,SAAS,cAAc;AACvB,OAAO,UAAU;AACjB,SAAS,gBAAgB;AAEzB,SAAS,oBAA2B;AACpC,SAAS,aAAa,mBAAkC;AAExD,IAAM,WACJ,OAAO,SAAS,cAAe,WAAsC;AAwBhE,SAAS,2BACd,QACA,SACO;AACP,QAAM,YAAY,YAAY,QAAQ,WAAW,MAAS;AAC1D,QAAM,YAAY,UAAU,cAAc;AAC1C,SAAO,aAAa,SAAS;AAC/B;AAEO,SAAS,0BAA0B,QAAyC;AACjF,MAAI,kBAAuD;AAE3D,SAAO;IACL;IACA,YAAY,OAAO;IACnB,WAAW,CAAC,YAAiC,2BAA2B,QAAQ,OAAO;IACvF,cAAc,MAAM;AAClB,UAAI,CAAC,iBAAiB;AACpB,0BAAkB,8BAA8B,MAAM;MACxD;AACA,aAAO;IACT;EACF;AACF;AAEA,eAAsB,0BAA0B,UAAgD;AAC9F,QAAM,SAAS,MAAM,GAAG,SAAS,QAAQ;AACzC,SAAO,0BAA0B,MAAM;AACzC;AAEA,eAAsB,yBACpB,UACA,SACgB;AAChB,QAAM,SAAS,MAAM,GAAG,SAAS,QAAQ;AACzC,SAAO,2BAA2B,QAAQ,OAAO;AACnD;AAEA,eAAsB,8BACpB,QAC8B;AAC9B,QAAM,YAAY,IAAI,WAAW,MAAM,EAAE;AACzC,QAAM,OAAO,MAAM,YAAY,SAAS,IAAI,SAAS,CAAC,SAAS,CAAC,CAAC;AACjE,QAAM,OAAO,KAAK,SAAS;AAC3B,QAAM,WAAW,KAAK,aAAa;AACnC,QAAM,YAAY,SAAS,UAAU;AACrC,QAAM,mBAAmB,OAAO,YAAY,SAAS,iBAAiB,CAAC;AAEvE,WAAS,KAAK;AACd,OAAK,KAAK;AACV,OAAK,KAAK;AAEV,SAAO;IACL,WAAW,aAAa;IACxB,kBAAkB,wBAAwB,gBAAgB;EAC5D;AACF;AAEA,SAAS,wBAAwB,OAAwD;AACvF,QAAM,aAAqC,CAAC;AAE5C,aAAW,CAAC,KAAK,KAAK,KAAK,OAAO,QAAQ,KAAK,GAAG;AAChD,QAAI,UAAU,QAAQ,UAAU,QAAW;AACzC,iBAAW,GAAG,IAAI;AAClB;IACF;AACA,eAAW,GAAG,IAAI,OAAO,UAAU,WAAW,QAAQ,OAAO,KAAK;EACpE;AAEA,SAAO;AACT;AAEA,eAAsB,sBACpB,eAAe,iBACW;AAC1B,QAAM,UAAU,MAAM,GAAG,QAAQ,KAAK,KAAK,OAAO,GAAG,cAAc,CAAC;AACpE,QAAM,WAAW,aAAa,QAAQ,UAAU,GAAG;AACnD,QAAM,WAAW,KAAK,KAAK,SAAS,GAAG,WAAW,CAAC,IAAI,QAAQ,EAAE;AACjE,QAAM,cAAc,kBAAkB,QAAQ;AAE9C,QAAM,SAAS,QAAQ,OAAO,WAAW;AAEzC,SAAO;IACL,MAAM;IACN,SAAS,YAAY;AACnB,YAAM,GAAG,GAAG,SAAS,EAAE,WAAW,MAAM,OAAO,KAAK,CAAC;IACvD;EACF;AACF;AAEA,eAAsB,0BACpB,eAAe,iBACf,SACgB;AAChB,QAAM,OAAO,MAAM,sBAAsB,YAAY;AAErD,MAAI;AACF,WAAO,MAAM,yBAAyB,KAAK,MAAM,OAAO;EAC1D,UAAA;AACE,UAAM,KAAK,QAAQ;EACrB;AACF;","names":[]}
|