parquetlens 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,87 @@
1
+ # parquetlens
2
+
3
+ A fast, interactive TUI for viewing Parquet files. Like [csvlens](https://github.com/YS-L/csvlens) but for Parquet.
4
+
5
+ ![parquetlens](https://github.com/user-attachments/assets/4ad68486-e544-4ef1-aa50-5ca855407259)
6
+
7
+ ## Install
8
+
9
+ ```bash
10
+ npm install -g parquetlens
11
+ ```
12
+
13
+ Or run directly with npx:
14
+
15
+ ```bash
16
+ npx parquetlens data.parquet
17
+ ```
18
+
19
+ ## Usage
20
+
21
+ ```bash
22
+ parquetlens <file|url|-> [options]
23
+ ```
24
+
25
+ **Remote URL requirements:**
26
+
27
+ - HTTP/S (including `hf://`) uses range reads via `curl`, so `curl` must be available on your PATH.
28
+
29
+ **Options:**
30
+
31
+ - `--limit <n>` - Number of rows to show (default: 20)
32
+ - `--columns <a,b,c>` - Comma-separated column list
33
+ - `--schema` - Print schema only
34
+ - `--no-schema` - Skip schema output
35
+ - `--json` - Output rows as JSON lines
36
+ - `--tui` - Open interactive viewer (default)
37
+ - `--plain` / `--no-tui` - Disable interactive viewer
38
+
39
+ **Examples:**
40
+
41
+ ```bash
42
+ # View local file
43
+ parquetlens data.parquet
44
+
45
+ # View with column selection
46
+ parquetlens data.parquet --columns city,state
47
+
48
+ # Fetch from URL (e.g., Hugging Face datasets)
49
+ parquetlens https://huggingface.co/datasets/cfahlgren1/hub-stats/resolve/main/daily_papers.parquet
50
+
51
+ # Hugging Face shortcut
52
+ parquetlens hf://datasets/cfahlgren1/hub-stats/daily_papers.parquet
53
+
54
+ # Pipe from stdin
55
+ parquetlens - < data.parquet
56
+
57
+ # Plain output (no TUI)
58
+ parquetlens data.parquet --plain --limit 100
59
+ ```
60
+
61
+ ## TUI Controls
62
+
63
+ | Key | Action |
64
+ | --------------- | ----------------------- |
65
+ | `j/k` or arrows | Scroll rows |
66
+ | `h/l` | Jump columns |
67
+ | `PgUp/PgDn` | Page scroll |
68
+ | Mouse wheel | Scroll |
69
+ | Click cell | Open detail panel |
70
+ | `s` or `Enter` | Toggle detail panel |
71
+ | `e` | Show error detail |
72
+ | `y` | Copy error to clipboard |
73
+ | `x` or `Esc` | Close panel (or quit) |
74
+ | `q` | Quit |
75
+
76
+ ## Features
77
+
78
+ - **Fast**: Uses duckdb-wasm with HTTP range requests
79
+ - **Interactive TUI**: Full-screen terminal UI with mouse support
80
+ - **URL Support**: Read parquet files from URLs (including `hf://`)
81
+ - **Column Types**: Shows Arrow schema types in headers
82
+ - **Cell Detail**: Click any cell to see full content
83
+ - **Streaming**: Reads only the rows you need
84
+
85
+ ## License
86
+
87
+ MIT
@@ -0,0 +1,123 @@
1
+ import { createRequire } from 'module';
2
+ import { fileURLToPath } from 'url';
3
+ import { dirname } from 'path';
4
+ const require = createRequire(import.meta.url);
5
+ const __filename = fileURLToPath(import.meta.url);
6
+ const __dirname = dirname(__filename);
7
+
8
+ // ../../packages/parquet-reader/dist/index.js
9
+ import { Blob as NodeBlob } from "buffer";
10
+ import { createWriteStream, readFileSync } from "fs";
11
+ import { promises as fs } from "fs";
12
+ import { randomUUID } from "crypto";
13
+ import { tmpdir } from "os";
14
+ import path from "path";
15
+ import { pipeline } from "stream/promises";
16
+ import { tableFromIPC } from "apache-arrow";
17
+ import { initSync, ParquetFile, readParquet } from "parquet-wasm/esm";
18
+ var BlobCtor = typeof Blob === "undefined" ? NodeBlob : Blob;
19
+ var wasmInitialized = false;
20
+ function findWasmFile(startDir) {
21
+ let dir = startDir;
22
+ while (dir !== path.dirname(dir)) {
23
+ const wasmPath = path.join(dir, "node_modules", "parquet-wasm", "esm", "parquet_wasm_bg.wasm");
24
+ try {
25
+ readFileSync(wasmPath, { flag: "r" });
26
+ return wasmPath;
27
+ } catch {
28
+ dir = path.dirname(dir);
29
+ }
30
+ }
31
+ throw new Error("Could not find parquet-wasm WASM file");
32
+ }
33
+ function ensureWasmInitialized() {
34
+ if (wasmInitialized) return;
35
+ const wasmPath = findWasmFile(process.cwd());
36
+ const wasmBytes = readFileSync(wasmPath);
37
+ initSync({ module: wasmBytes });
38
+ wasmInitialized = true;
39
+ }
40
+ async function readParquetTableFromBuffer(buffer, options) {
41
+ ensureWasmInitialized();
42
+ const wasmTable = readParquet(buffer, options ?? void 0);
43
+ const ipcStream = wasmTable.intoIPCStream();
44
+ return tableFromIPC(ipcStream);
45
+ }
46
+ function createParquetBufferSource(buffer) {
47
+ let metadataPromise = null;
48
+ return {
49
+ buffer,
50
+ byteLength: buffer.byteLength,
51
+ readTable: (options) => readParquetTableFromBuffer(buffer, options),
52
+ readMetadata: () => {
53
+ if (!metadataPromise) {
54
+ metadataPromise = readParquetMetadataFromBuffer(buffer);
55
+ }
56
+ return metadataPromise;
57
+ }
58
+ };
59
+ }
60
+ async function openParquetBufferFromPath(filePath) {
61
+ const buffer = await fs.readFile(filePath);
62
+ return createParquetBufferSource(buffer);
63
+ }
64
+ async function readParquetTableFromPath(filePath, options) {
65
+ const buffer = await fs.readFile(filePath);
66
+ return readParquetTableFromBuffer(buffer, options);
67
+ }
68
+ async function readParquetMetadataFromBuffer(buffer) {
69
+ ensureWasmInitialized();
70
+ const blobInput = new Uint8Array(buffer).buffer;
71
+ const file = await ParquetFile.fromFile(new BlobCtor([blobInput]));
72
+ const meta = file.metadata();
73
+ const fileMeta = meta.fileMetadata();
74
+ const createdBy = fileMeta.createdBy();
75
+ const keyValueMetadata = Object.fromEntries(fileMeta.keyValueMetadata());
76
+ fileMeta.free();
77
+ meta.free();
78
+ file.free();
79
+ return {
80
+ createdBy: createdBy ?? void 0,
81
+ keyValueMetadata: normalizeMetadataValues(keyValueMetadata)
82
+ };
83
+ }
84
+ function normalizeMetadataValues(input) {
85
+ const normalized = {};
86
+ for (const [key, value] of Object.entries(input)) {
87
+ if (value === null || value === void 0) {
88
+ normalized[key] = "";
89
+ continue;
90
+ }
91
+ normalized[key] = typeof value === "string" ? value : String(value);
92
+ }
93
+ return normalized;
94
+ }
95
+ async function bufferStdinToTempFile(filenameHint = "stdin.parquet") {
96
+ const tempDir = await fs.mkdtemp(path.join(tmpdir(), "parquetlens-"));
97
+ const safeName = filenameHint.replace(/[\\/]/g, "_");
98
+ const filePath = path.join(tempDir, `${randomUUID()}-${safeName}`);
99
+ const writeStream = createWriteStream(filePath);
100
+ await pipeline(process.stdin, writeStream);
101
+ return {
102
+ path: filePath,
103
+ cleanup: async () => {
104
+ await fs.rm(tempDir, { recursive: true, force: true });
105
+ }
106
+ };
107
+ }
108
+ async function readParquetTableFromStdin(filenameHint = "stdin.parquet", options) {
109
+ const temp = await bufferStdinToTempFile(filenameHint);
110
+ try {
111
+ return await readParquetTableFromPath(temp.path, options);
112
+ } finally {
113
+ await temp.cleanup();
114
+ }
115
+ }
116
+
117
+ export {
118
+ readParquetTableFromBuffer,
119
+ openParquetBufferFromPath,
120
+ readParquetTableFromPath,
121
+ readParquetTableFromStdin
122
+ };
123
+ //# sourceMappingURL=chunk-2RGMZZ7F.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../../../packages/parquet-reader/src/index.ts"],"sourcesContent":["import { Blob as NodeBlob } from \"node:buffer\";\nimport { createWriteStream, readFileSync } from \"node:fs\";\nimport { promises as fs } from \"node:fs\";\nimport { randomUUID } from \"node:crypto\";\nimport { tmpdir } from \"node:os\";\nimport path from \"node:path\";\nimport { pipeline } from \"node:stream/promises\";\n\nimport { tableFromIPC, Table } from \"apache-arrow\";\nimport { initSync, ParquetFile, readParquet, type ReaderOptions } from \"parquet-wasm/esm\";\n\nconst BlobCtor: typeof Blob =\n typeof Blob === \"undefined\" ? (NodeBlob as unknown as typeof Blob) : Blob;\n\nlet wasmInitialized = false;\n\nfunction findWasmFile(startDir: string): string {\n let dir = startDir;\n while (dir !== path.dirname(dir)) {\n const wasmPath = path.join(dir, \"node_modules\", \"parquet-wasm\", \"esm\", \"parquet_wasm_bg.wasm\");\n try {\n readFileSync(wasmPath, { flag: \"r\" });\n return wasmPath;\n } catch {\n dir = path.dirname(dir);\n }\n }\n throw new Error(\"Could not find parquet-wasm WASM file\");\n}\n\nfunction ensureWasmInitialized(): void {\n if (wasmInitialized) return;\n\n // Use process.cwd() as starting point to find node_modules\n const wasmPath = findWasmFile(process.cwd());\n const wasmBytes = readFileSync(wasmPath);\n initSync({ module: wasmBytes });\n wasmInitialized = true;\n}\n\nexport type TempParquetFile = {\n path: string;\n cleanup: () => Promise<void>;\n};\n\nexport type ParquetReadOptions = Pick<\n ReaderOptions,\n \"batchSize\" | \"columns\" | \"limit\" | \"offset\" | \"rowGroups\"\n>;\n\nexport type ParquetFileMetadata = {\n createdBy?: string;\n keyValueMetadata: Record<string, string>;\n};\n\nexport type ParquetBufferSource = {\n buffer: Uint8Array;\n byteLength: number;\n readTable: (options?: ParquetReadOptions) => Promise<Table>;\n readMetadata: () => Promise<ParquetFileMetadata>;\n};\n\nexport async function readParquetTableFromBuffer(\n buffer: Uint8Array,\n options?: ParquetReadOptions,\n): Promise<Table> {\n ensureWasmInitialized();\n const wasmTable = readParquet(buffer, options ?? undefined);\n const ipcStream = wasmTable.intoIPCStream();\n return tableFromIPC(ipcStream);\n}\n\nexport function createParquetBufferSource(buffer: Uint8Array): ParquetBufferSource {\n let metadataPromise: Promise<ParquetFileMetadata> | null = null;\n\n return {\n buffer,\n byteLength: buffer.byteLength,\n readTable: (options?: ParquetReadOptions) => readParquetTableFromBuffer(buffer, options),\n readMetadata: () => {\n if (!metadataPromise) {\n metadataPromise = readParquetMetadataFromBuffer(buffer);\n }\n return metadataPromise;\n },\n };\n}\n\nexport async function openParquetBufferFromPath(filePath: string): Promise<ParquetBufferSource> {\n const buffer = await fs.readFile(filePath);\n return createParquetBufferSource(buffer);\n}\n\nexport async function readParquetTableFromPath(\n filePath: string,\n options?: ParquetReadOptions,\n): Promise<Table> {\n const buffer = await fs.readFile(filePath);\n return readParquetTableFromBuffer(buffer, options);\n}\n\nexport async function readParquetMetadataFromBuffer(\n buffer: Uint8Array,\n): Promise<ParquetFileMetadata> {\n ensureWasmInitialized();\n const blobInput = new Uint8Array(buffer).buffer as ArrayBuffer;\n const file = await ParquetFile.fromFile(new BlobCtor([blobInput]));\n const meta = file.metadata();\n const fileMeta = meta.fileMetadata();\n const createdBy = fileMeta.createdBy();\n const keyValueMetadata = Object.fromEntries(fileMeta.keyValueMetadata());\n\n fileMeta.free();\n meta.free();\n file.free();\n\n return {\n createdBy: createdBy ?? undefined,\n keyValueMetadata: normalizeMetadataValues(keyValueMetadata),\n };\n}\n\nfunction normalizeMetadataValues(input: Record<string, unknown>): Record<string, string> {\n const normalized: Record<string, string> = {};\n\n for (const [key, value] of Object.entries(input)) {\n if (value === null || value === undefined) {\n normalized[key] = \"\";\n continue;\n }\n normalized[key] = typeof value === \"string\" ? value : String(value);\n }\n\n return normalized;\n}\n\nexport async function bufferStdinToTempFile(\n filenameHint = \"stdin.parquet\",\n): Promise<TempParquetFile> {\n const tempDir = await fs.mkdtemp(path.join(tmpdir(), \"parquetlens-\"));\n const safeName = filenameHint.replace(/[\\\\/]/g, \"_\");\n const filePath = path.join(tempDir, `${randomUUID()}-${safeName}`);\n const writeStream = createWriteStream(filePath);\n\n await pipeline(process.stdin, writeStream);\n\n return {\n path: filePath,\n cleanup: async () => {\n await fs.rm(tempDir, { recursive: true, force: true });\n },\n };\n}\n\nexport async function readParquetTableFromStdin(\n filenameHint = \"stdin.parquet\",\n options?: ParquetReadOptions,\n): Promise<Table> {\n const temp = await bufferStdinToTempFile(filenameHint);\n\n try {\n return await readParquetTableFromPath(temp.path, options);\n } finally {\n await temp.cleanup();\n }\n}\n"],"mappings":";;;;;;;;AAAA,SAAS,QAAQ,gBAAgB;AACjC,SAAS,mBAAmB,oBAAoB;AAChD,SAAS,YAAY,UAAU;AAC/B,SAAS,kBAAkB;AAC3B,SAAS,cAAc;AACvB,OAAO,UAAU;AACjB,SAAS,gBAAgB;AAEzB,SAAS,oBAA2B;AACpC,SAAS,UAAU,aAAa,mBAAuC;AAEvE,IAAM,WACJ,OAAO,SAAS,cAAe,WAAsC;AAEvE,IAAI,kBAAkB;AAEtB,SAAS,aAAa,UAA0B;AAC9C,MAAI,MAAM;AACV,SAAO,QAAQ,KAAK,QAAQ,GAAG,GAAG;AAChC,UAAM,WAAW,KAAK,KAAK,KAAK,gBAAgB,gBAAgB,OAAO,sBAAsB;AAC7F,QAAI;AACF,mBAAa,UAAU,EAAE,MAAM,IAAI,CAAC;AACpC,aAAO;IACT,QAAQ;AACN,YAAM,KAAK,QAAQ,GAAG;IACxB;EACF;AACA,QAAM,IAAI,MAAM,uCAAuC;AACzD;AAEA,SAAS,wBAA8B;AACrC,MAAI,gBAAiB;AAGrB,QAAM,WAAW,aAAa,QAAQ,IAAI,CAAC;AAC3C,QAAM,YAAY,aAAa,QAAQ;AACvC,WAAS,EAAE,QAAQ,UAAU,CAAC;AAC9B,oBAAkB;AACpB;AAwBA,eAAsB,2BACpB,QACA,SACgB;AAChB,wBAAsB;AACtB,QAAM,YAAY,YAAY,QAAQ,WAAW,MAAS;AAC1D,QAAM,YAAY,UAAU,cAAc;AAC1C,SAAO,aAAa,SAAS;AAC/B;AAEO,SAAS,0BAA0B,QAAyC;AACjF,MAAI,kBAAuD;AAE3D,SAAO;IACL;IACA,YAAY,OAAO;IACnB,WAAW,CAAC,YAAiC,2BAA2B,QAAQ,OAAO;IACvF,cAAc,MAAM;AAClB,UAAI,CAAC,iBAAiB;AACpB,0BAAkB,8BAA8B,MAAM;MACxD;AACA,aAAO;IACT;EACF;AACF;AAEA,eAAsB,0BAA0B,UAAgD;AAC9F,QAAM,SAAS,MAAM,GAAG,SAAS,QAAQ;AACzC,SAAO,0BAA0B,MAAM;AACzC;AAEA,eAAsB,yBACpB,UACA,SACgB;AAChB,QAAM,SAAS,MAAM,GAAG,SAAS,QAAQ;AACzC,SAAO,2BAA2B,QAAQ,OAAO;AACnD;AAEA,eAAsB,8BACpB,QAC8B;AAC9B,wBAAsB;AACtB,QAAM,YAAY,IAAI,WAAW,MAAM,EAAE;AACzC,QAAM,OAAO,MAAM,YAAY,SAAS,IAAI,SAAS,CAAC,SAAS,CAAC,CAAC;AACjE,QAAM,OAAO,KAAK,SAAS;AAC3B,QAAM,WAAW,KAAK,aAAa;AACnC,QAAM,YAAY,SAAS,UAAU;AACrC,QAAM,mBAAmB,OAAO,YAAY,SAAS,iBAAiB,CAAC;AAEvE,WAAS,KAAK;AACd,OAAK,KAAK;AACV,OAAK,KAAK;AAEV,SAAO;IACL,WAAW,aAAa;IACxB,kBAAkB,wBAAwB,gBAAgB;EAC5D;AACF;AAEA,SAAS,wBAAwB,OAAwD;AACvF,QAAM,aAAqC,CAAC;AAE5C,aAAW,CAAC,KAAK,KAAK,KAAK,OAAO,QAAQ,KAAK,GAAG;AAChD,QAAI,UAAU,QAAQ,UAAU,QAAW;AACzC,iBAAW,GAAG,IAAI;AAClB;IACF;AACA,eAAW,GAAG,IAAI,OAAO,UAAU,WAAW,QAAQ,OAAO,KAAK;EACpE;AAEA,SAAO;AACT;AAEA,eAAsB,sBACpB,eAAe,iBACW;AAC1B,QAAM,UAAU,MAAM,GAAG,QAAQ,KAAK,KAAK,OAAO,GAAG,cAAc,CAAC;AACpE,QAAM,WAAW,aAAa,QAAQ,UAAU,GAAG;AACnD,QAAM,WAAW,KAAK,KAAK,SAAS,GAAG,WAAW,CAAC,IAAI,QAAQ,EAAE;AACjE,QAAM,cAAc,kBAAkB,QAAQ;AAE9C,QAAM,SAAS,QAAQ,OAAO,WAAW;AAEzC,SAAO;IACL,MAAM;IACN,SAAS,YAAY;AACnB,YAAM,GAAG,GAAG,SAAS,EAAE,WAAW,MAAM,OAAO,KAAK,CAAC;IACvD;EACF;AACF;AAEA,eAAsB,0BACpB,eAAe,iBACf,SACgB;AAChB,QAAM,OAAO,MAAM,sBAAsB,YAAY;AAErD,MAAI;AACF,WAAO,MAAM,yBAAyB,KAAK,MAAM,OAAO;EAC1D,UAAA;AACE,UAAM,KAAK,QAAQ;EACrB;AACF;","names":[]}
@@ -0,0 +1,113 @@
1
+ import { createRequire } from 'module';
2
+ import { fileURLToPath } from 'url';
3
+ import { dirname } from 'path';
4
+ const require = createRequire(import.meta.url);
5
+ const __filename = fileURLToPath(import.meta.url);
6
+ const __dirname = dirname(__filename);
7
+
8
+ // ../../packages/parquet-reader/dist/index.js
9
+ import { Blob as NodeBlob } from "buffer";
10
+ import { createWriteStream } from "fs";
11
+ import { promises as fs } from "fs";
12
+ import { randomUUID } from "crypto";
13
+ import { tmpdir } from "os";
14
+ import path from "path";
15
+ import { pipeline } from "stream/promises";
16
+ import { tableFromIPC } from "apache-arrow";
17
+ import initWasm, { ParquetFile, readParquet } from "parquet-wasm/esm";
18
+ var BlobCtor = typeof Blob === "undefined" ? NodeBlob : Blob;
19
+ var wasmInitialized = false;
20
+ var wasmInitPromise = null;
21
+ async function ensureWasmInitialized() {
22
+ if (wasmInitialized) return;
23
+ if (!wasmInitPromise) {
24
+ wasmInitPromise = initWasm().then(() => {
25
+ wasmInitialized = true;
26
+ });
27
+ }
28
+ return wasmInitPromise;
29
+ }
30
+ async function readParquetTableFromBuffer(buffer, options) {
31
+ await ensureWasmInitialized();
32
+ const wasmTable = readParquet(buffer, options ?? void 0);
33
+ const ipcStream = wasmTable.intoIPCStream();
34
+ return tableFromIPC(ipcStream);
35
+ }
36
+ function createParquetBufferSource(buffer) {
37
+ let metadataPromise = null;
38
+ return {
39
+ buffer,
40
+ byteLength: buffer.byteLength,
41
+ readTable: (options) => readParquetTableFromBuffer(buffer, options),
42
+ readMetadata: () => {
43
+ if (!metadataPromise) {
44
+ metadataPromise = readParquetMetadataFromBuffer(buffer);
45
+ }
46
+ return metadataPromise;
47
+ }
48
+ };
49
+ }
50
+ async function openParquetBufferFromPath(filePath) {
51
+ const buffer = await fs.readFile(filePath);
52
+ return createParquetBufferSource(buffer);
53
+ }
54
+ async function readParquetTableFromPath(filePath, options) {
55
+ const buffer = await fs.readFile(filePath);
56
+ return readParquetTableFromBuffer(buffer, options);
57
+ }
58
+ async function readParquetMetadataFromBuffer(buffer) {
59
+ await ensureWasmInitialized();
60
+ const blobInput = new Uint8Array(buffer).buffer;
61
+ const file = await ParquetFile.fromFile(new BlobCtor([blobInput]));
62
+ const meta = file.metadata();
63
+ const fileMeta = meta.fileMetadata();
64
+ const createdBy = fileMeta.createdBy();
65
+ const keyValueMetadata = Object.fromEntries(fileMeta.keyValueMetadata());
66
+ fileMeta.free();
67
+ meta.free();
68
+ file.free();
69
+ return {
70
+ createdBy: createdBy ?? void 0,
71
+ keyValueMetadata: normalizeMetadataValues(keyValueMetadata)
72
+ };
73
+ }
74
+ function normalizeMetadataValues(input) {
75
+ const normalized = {};
76
+ for (const [key, value] of Object.entries(input)) {
77
+ if (value === null || value === void 0) {
78
+ normalized[key] = "";
79
+ continue;
80
+ }
81
+ normalized[key] = typeof value === "string" ? value : String(value);
82
+ }
83
+ return normalized;
84
+ }
85
+ async function bufferStdinToTempFile(filenameHint = "stdin.parquet") {
86
+ const tempDir = await fs.mkdtemp(path.join(tmpdir(), "parquetlens-"));
87
+ const safeName = filenameHint.replace(/[\\/]/g, "_");
88
+ const filePath = path.join(tempDir, `${randomUUID()}-${safeName}`);
89
+ const writeStream = createWriteStream(filePath);
90
+ await pipeline(process.stdin, writeStream);
91
+ return {
92
+ path: filePath,
93
+ cleanup: async () => {
94
+ await fs.rm(tempDir, { recursive: true, force: true });
95
+ }
96
+ };
97
+ }
98
+ async function readParquetTableFromStdin(filenameHint = "stdin.parquet", options) {
99
+ const temp = await bufferStdinToTempFile(filenameHint);
100
+ try {
101
+ return await readParquetTableFromPath(temp.path, options);
102
+ } finally {
103
+ await temp.cleanup();
104
+ }
105
+ }
106
+
107
+ export {
108
+ readParquetTableFromBuffer,
109
+ openParquetBufferFromPath,
110
+ readParquetTableFromPath,
111
+ readParquetTableFromStdin
112
+ };
113
+ //# sourceMappingURL=chunk-3N45GGD2.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../../../packages/parquet-reader/src/index.ts"],"sourcesContent":["import { Blob as NodeBlob } from \"node:buffer\";\nimport { createWriteStream } from \"node:fs\";\nimport { promises as fs } from \"node:fs\";\nimport { randomUUID } from \"node:crypto\";\nimport { tmpdir } from \"node:os\";\nimport path from \"node:path\";\nimport { pipeline } from \"node:stream/promises\";\n\nimport { tableFromIPC, Table } from \"apache-arrow\";\nimport initWasm, { ParquetFile, readParquet, type ReaderOptions } from \"parquet-wasm/esm\";\n\nconst BlobCtor: typeof Blob =\n typeof Blob === \"undefined\" ? (NodeBlob as unknown as typeof Blob) : Blob;\n\nlet wasmInitialized = false;\nlet wasmInitPromise: Promise<void> | null = null;\n\nasync function ensureWasmInitialized(): Promise<void> {\n if (wasmInitialized) return;\n if (!wasmInitPromise) {\n wasmInitPromise = initWasm().then(() => {\n wasmInitialized = true;\n });\n }\n return wasmInitPromise;\n}\n\nexport type TempParquetFile = {\n path: string;\n cleanup: () => Promise<void>;\n};\n\nexport type ParquetReadOptions = Pick<\n ReaderOptions,\n \"batchSize\" | \"columns\" | \"limit\" | \"offset\" | \"rowGroups\"\n>;\n\nexport type ParquetFileMetadata = {\n createdBy?: string;\n keyValueMetadata: Record<string, string>;\n};\n\nexport type ParquetBufferSource = {\n buffer: Uint8Array;\n byteLength: number;\n readTable: (options?: ParquetReadOptions) => Promise<Table>;\n readMetadata: () => Promise<ParquetFileMetadata>;\n};\n\nexport async function readParquetTableFromBuffer(\n buffer: Uint8Array,\n options?: ParquetReadOptions,\n): Promise<Table> {\n await ensureWasmInitialized();\n const wasmTable = readParquet(buffer, options ?? undefined);\n const ipcStream = wasmTable.intoIPCStream();\n return tableFromIPC(ipcStream);\n}\n\nexport function createParquetBufferSource(buffer: Uint8Array): ParquetBufferSource {\n let metadataPromise: Promise<ParquetFileMetadata> | null = null;\n\n return {\n buffer,\n byteLength: buffer.byteLength,\n readTable: (options?: ParquetReadOptions) => readParquetTableFromBuffer(buffer, options),\n readMetadata: () => {\n if (!metadataPromise) {\n metadataPromise = readParquetMetadataFromBuffer(buffer);\n }\n return metadataPromise;\n },\n };\n}\n\nexport async function openParquetBufferFromPath(filePath: string): Promise<ParquetBufferSource> {\n const buffer = await fs.readFile(filePath);\n return createParquetBufferSource(buffer);\n}\n\nexport async function readParquetTableFromPath(\n filePath: string,\n options?: ParquetReadOptions,\n): Promise<Table> {\n const buffer = await fs.readFile(filePath);\n return readParquetTableFromBuffer(buffer, options);\n}\n\nexport async function readParquetMetadataFromBuffer(\n buffer: Uint8Array,\n): Promise<ParquetFileMetadata> {\n await ensureWasmInitialized();\n const blobInput = new Uint8Array(buffer).buffer as ArrayBuffer;\n const file = await ParquetFile.fromFile(new BlobCtor([blobInput]));\n const meta = file.metadata();\n const fileMeta = meta.fileMetadata();\n const createdBy = fileMeta.createdBy();\n const keyValueMetadata = Object.fromEntries(fileMeta.keyValueMetadata());\n\n fileMeta.free();\n meta.free();\n file.free();\n\n return {\n createdBy: createdBy ?? undefined,\n keyValueMetadata: normalizeMetadataValues(keyValueMetadata),\n };\n}\n\nfunction normalizeMetadataValues(input: Record<string, unknown>): Record<string, string> {\n const normalized: Record<string, string> = {};\n\n for (const [key, value] of Object.entries(input)) {\n if (value === null || value === undefined) {\n normalized[key] = \"\";\n continue;\n }\n normalized[key] = typeof value === \"string\" ? value : String(value);\n }\n\n return normalized;\n}\n\nexport async function bufferStdinToTempFile(\n filenameHint = \"stdin.parquet\",\n): Promise<TempParquetFile> {\n const tempDir = await fs.mkdtemp(path.join(tmpdir(), \"parquetlens-\"));\n const safeName = filenameHint.replace(/[\\\\/]/g, \"_\");\n const filePath = path.join(tempDir, `${randomUUID()}-${safeName}`);\n const writeStream = createWriteStream(filePath);\n\n await pipeline(process.stdin, writeStream);\n\n return {\n path: filePath,\n cleanup: async () => {\n await fs.rm(tempDir, { recursive: true, force: true });\n },\n };\n}\n\nexport async function readParquetTableFromStdin(\n filenameHint = \"stdin.parquet\",\n options?: ParquetReadOptions,\n): Promise<Table> {\n const temp = await bufferStdinToTempFile(filenameHint);\n\n try {\n return await readParquetTableFromPath(temp.path, options);\n } finally {\n await temp.cleanup();\n }\n}\n"],"mappings":";;;;;;;;AAAA,SAAS,QAAQ,gBAAgB;AACjC,SAAS,yBAAyB;AAClC,SAAS,YAAY,UAAU;AAC/B,SAAS,kBAAkB;AAC3B,SAAS,cAAc;AACvB,OAAO,UAAU;AACjB,SAAS,gBAAgB;AAEzB,SAAS,oBAA2B;AACpC,OAAO,YAAY,aAAa,mBAAuC;AAEvE,IAAM,WACJ,OAAO,SAAS,cAAe,WAAsC;AAEvE,IAAI,kBAAkB;AACtB,IAAI,kBAAwC;AAE5C,eAAe,wBAAuC;AACpD,MAAI,gBAAiB;AACrB,MAAI,CAAC,iBAAiB;AACpB,sBAAkB,SAAS,EAAE,KAAK,MAAM;AACtC,wBAAkB;IACpB,CAAC;EACH;AACA,SAAO;AACT;AAwBA,eAAsB,2BACpB,QACA,SACgB;AAChB,QAAM,sBAAsB;AAC5B,QAAM,YAAY,YAAY,QAAQ,WAAW,MAAS;AAC1D,QAAM,YAAY,UAAU,cAAc;AAC1C,SAAO,aAAa,SAAS;AAC/B;AAEO,SAAS,0BAA0B,QAAyC;AACjF,MAAI,kBAAuD;AAE3D,SAAO;IACL;IACA,YAAY,OAAO;IACnB,WAAW,CAAC,YAAiC,2BAA2B,QAAQ,OAAO;IACvF,cAAc,MAAM;AAClB,UAAI,CAAC,iBAAiB;AACpB,0BAAkB,8BAA8B,MAAM;MACxD;AACA,aAAO;IACT;EACF;AACF;AAEA,eAAsB,0BAA0B,UAAgD;AAC9F,QAAM,SAAS,MAAM,GAAG,SAAS,QAAQ;AACzC,SAAO,0BAA0B,MAAM;AACzC;AAEA,eAAsB,yBACpB,UACA,SACgB;AAChB,QAAM,SAAS,MAAM,GAAG,SAAS,QAAQ;AACzC,SAAO,2BAA2B,QAAQ,OAAO;AACnD;AAEA,eAAsB,8BACpB,QAC8B;AAC9B,QAAM,sBAAsB;AAC5B,QAAM,YAAY,IAAI,WAAW,MAAM,EAAE;AACzC,QAAM,OAAO,MAAM,YAAY,SAAS,IAAI,SAAS,CAAC,SAAS,CAAC,CAAC;AACjE,QAAM,OAAO,KAAK,SAAS;AAC3B,QAAM,WAAW,KAAK,aAAa;AACnC,QAAM,YAAY,SAAS,UAAU;AACrC,QAAM,mBAAmB,OAAO,YAAY,SAAS,iBAAiB,CAAC;AAEvE,WAAS,KAAK;AACd,OAAK,KAAK;AACV,OAAK,KAAK;AAEV,SAAO;IACL,WAAW,aAAa;IACxB,kBAAkB,wBAAwB,gBAAgB;EAC5D;AACF;AAEA,SAAS,wBAAwB,OAAwD;AACvF,QAAM,aAAqC,CAAC;AAE5C,aAAW,CAAC,KAAK,KAAK,KAAK,OAAO,QAAQ,KAAK,GAAG;AAChD,QAAI,UAAU,QAAQ,UAAU,QAAW;AACzC,iBAAW,GAAG,IAAI;AAClB;IACF;AACA,eAAW,GAAG,IAAI,OAAO,UAAU,WAAW,QAAQ,OAAO,KAAK;EACpE;AAEA,SAAO;AACT;AAEA,eAAsB,sBACpB,eAAe,iBACW;AAC1B,QAAM,UAAU,MAAM,GAAG,QAAQ,KAAK,KAAK,OAAO,GAAG,cAAc,CAAC;AACpE,QAAM,WAAW,aAAa,QAAQ,UAAU,GAAG;AACnD,QAAM,WAAW,KAAK,KAAK,SAAS,GAAG,WAAW,CAAC,IAAI,QAAQ,EAAE;AACjE,QAAM,cAAc,kBAAkB,QAAQ;AAE9C,QAAM,SAAS,QAAQ,OAAO,WAAW;AAEzC,SAAO;IACL,MAAM;IACN,SAAS,YAAY;AACnB,YAAM,GAAG,GAAG,SAAS,EAAE,WAAW,MAAM,OAAO,KAAK,CAAC;IACvD;EACF;AACF;AAEA,eAAsB,0BACpB,eAAe,iBACf,SACgB;AAChB,QAAM,OAAO,MAAM,sBAAsB,YAAY;AAErD,MAAI;AACF,WAAO,MAAM,yBAAyB,KAAK,MAAM,OAAO;EAC1D,UAAA;AACE,UAAM,KAAK,QAAQ;EACrB;AACF;","names":[]}
@@ -0,0 +1,112 @@
1
+ import { createRequire } from 'module';
2
+ import { fileURLToPath } from 'url';
3
+ import { dirname } from 'path';
4
+ const require = createRequire(import.meta.url);
5
+ const __filename = fileURLToPath(import.meta.url);
6
+ const __dirname = dirname(__filename);
7
+
8
+ // ../../packages/parquet-reader/dist/index.js
9
+ import { Blob as NodeBlob } from "buffer";
10
+ import { createWriteStream, readFileSync } from "fs";
11
+ import { promises as fs } from "fs";
12
+ import { randomUUID } from "crypto";
13
+ import { createRequire as nodeCreateRequire } from "module";
14
+ import { tmpdir } from "os";
15
+ import path from "path";
16
+ import { pipeline } from "stream/promises";
17
+ import { tableFromIPC } from "apache-arrow";
18
+ import { initSync, ParquetFile, readParquet } from "parquet-wasm/esm";
19
+ var BlobCtor = typeof Blob === "undefined" ? NodeBlob : Blob;
20
+ var wasmInitialized = false;
21
+ function ensureWasmInitialized() {
22
+ if (wasmInitialized) return;
23
+ const localRequire = nodeCreateRequire(import.meta.url);
24
+ const wasmPath = localRequire.resolve("parquet-wasm/esm/parquet_wasm_bg.wasm");
25
+ const wasmBytes = readFileSync(wasmPath);
26
+ initSync({ module: wasmBytes });
27
+ wasmInitialized = true;
28
+ }
29
+ async function readParquetTableFromBuffer(buffer, options) {
30
+ ensureWasmInitialized();
31
+ const wasmTable = readParquet(buffer, options ?? void 0);
32
+ const ipcStream = wasmTable.intoIPCStream();
33
+ return tableFromIPC(ipcStream);
34
+ }
35
+ function createParquetBufferSource(buffer) {
36
+ let metadataPromise = null;
37
+ return {
38
+ buffer,
39
+ byteLength: buffer.byteLength,
40
+ readTable: (options) => readParquetTableFromBuffer(buffer, options),
41
+ readMetadata: () => {
42
+ if (!metadataPromise) {
43
+ metadataPromise = readParquetMetadataFromBuffer(buffer);
44
+ }
45
+ return metadataPromise;
46
+ }
47
+ };
48
+ }
49
+ async function openParquetBufferFromPath(filePath) {
50
+ const buffer = await fs.readFile(filePath);
51
+ return createParquetBufferSource(buffer);
52
+ }
53
+ async function readParquetTableFromPath(filePath, options) {
54
+ const buffer = await fs.readFile(filePath);
55
+ return readParquetTableFromBuffer(buffer, options);
56
+ }
57
+ async function readParquetMetadataFromBuffer(buffer) {
58
+ ensureWasmInitialized();
59
+ const blobInput = new Uint8Array(buffer).buffer;
60
+ const file = await ParquetFile.fromFile(new BlobCtor([blobInput]));
61
+ const meta = file.metadata();
62
+ const fileMeta = meta.fileMetadata();
63
+ const createdBy = fileMeta.createdBy();
64
+ const keyValueMetadata = Object.fromEntries(fileMeta.keyValueMetadata());
65
+ fileMeta.free();
66
+ meta.free();
67
+ file.free();
68
+ return {
69
+ createdBy: createdBy ?? void 0,
70
+ keyValueMetadata: normalizeMetadataValues(keyValueMetadata)
71
+ };
72
+ }
73
+ function normalizeMetadataValues(input) {
74
+ const normalized = {};
75
+ for (const [key, value] of Object.entries(input)) {
76
+ if (value === null || value === void 0) {
77
+ normalized[key] = "";
78
+ continue;
79
+ }
80
+ normalized[key] = typeof value === "string" ? value : String(value);
81
+ }
82
+ return normalized;
83
+ }
84
+ async function bufferStdinToTempFile(filenameHint = "stdin.parquet") {
85
+ const tempDir = await fs.mkdtemp(path.join(tmpdir(), "parquetlens-"));
86
+ const safeName = filenameHint.replace(/[\\/]/g, "_");
87
+ const filePath = path.join(tempDir, `${randomUUID()}-${safeName}`);
88
+ const writeStream = createWriteStream(filePath);
89
+ await pipeline(process.stdin, writeStream);
90
+ return {
91
+ path: filePath,
92
+ cleanup: async () => {
93
+ await fs.rm(tempDir, { recursive: true, force: true });
94
+ }
95
+ };
96
+ }
97
+ async function readParquetTableFromStdin(filenameHint = "stdin.parquet", options) {
98
+ const temp = await bufferStdinToTempFile(filenameHint);
99
+ try {
100
+ return await readParquetTableFromPath(temp.path, options);
101
+ } finally {
102
+ await temp.cleanup();
103
+ }
104
+ }
105
+
106
+ export {
107
+ readParquetTableFromBuffer,
108
+ openParquetBufferFromPath,
109
+ readParquetTableFromPath,
110
+ readParquetTableFromStdin
111
+ };
112
+ //# sourceMappingURL=chunk-AYPIRAOL.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../../../packages/parquet-reader/src/index.ts"],"sourcesContent":["import { Blob as NodeBlob } from \"node:buffer\";\nimport { createWriteStream, readFileSync } from \"node:fs\";\nimport { promises as fs } from \"node:fs\";\nimport { randomUUID } from \"node:crypto\";\nimport { createRequire as nodeCreateRequire } from \"node:module\";\nimport { tmpdir } from \"node:os\";\nimport path from \"node:path\";\nimport { pipeline } from \"node:stream/promises\";\n\nimport { tableFromIPC, Table } from \"apache-arrow\";\nimport { initSync, ParquetFile, readParquet, type ReaderOptions } from \"parquet-wasm/esm\";\n\nconst BlobCtor: typeof Blob =\n typeof Blob === \"undefined\" ? (NodeBlob as unknown as typeof Blob) : Blob;\n\nlet wasmInitialized = false;\n\nfunction ensureWasmInitialized(): void {\n if (wasmInitialized) return;\n\n // Use require.resolve to find the WASM file (works with pnpm)\n const localRequire = nodeCreateRequire(import.meta.url);\n const wasmPath = localRequire.resolve(\"parquet-wasm/esm/parquet_wasm_bg.wasm\");\n const wasmBytes = readFileSync(wasmPath);\n initSync({ module: wasmBytes });\n wasmInitialized = true;\n}\n\nexport type TempParquetFile = {\n path: string;\n cleanup: () => Promise<void>;\n};\n\nexport type ParquetReadOptions = Pick<\n ReaderOptions,\n \"batchSize\" | \"columns\" | \"limit\" | \"offset\" | \"rowGroups\"\n>;\n\nexport type ParquetFileMetadata = {\n createdBy?: string;\n keyValueMetadata: Record<string, string>;\n};\n\nexport type ParquetBufferSource = {\n buffer: Uint8Array;\n byteLength: number;\n readTable: (options?: ParquetReadOptions) => Promise<Table>;\n readMetadata: () => Promise<ParquetFileMetadata>;\n};\n\nexport async function readParquetTableFromBuffer(\n buffer: Uint8Array,\n options?: ParquetReadOptions,\n): Promise<Table> {\n ensureWasmInitialized();\n const wasmTable = readParquet(buffer, options ?? undefined);\n const ipcStream = wasmTable.intoIPCStream();\n return tableFromIPC(ipcStream);\n}\n\nexport function createParquetBufferSource(buffer: Uint8Array): ParquetBufferSource {\n let metadataPromise: Promise<ParquetFileMetadata> | null = null;\n\n return {\n buffer,\n byteLength: buffer.byteLength,\n readTable: (options?: ParquetReadOptions) => readParquetTableFromBuffer(buffer, options),\n readMetadata: () => {\n if (!metadataPromise) {\n metadataPromise = readParquetMetadataFromBuffer(buffer);\n }\n return metadataPromise;\n },\n };\n}\n\nexport async function openParquetBufferFromPath(filePath: string): Promise<ParquetBufferSource> {\n const buffer = await fs.readFile(filePath);\n return createParquetBufferSource(buffer);\n}\n\nexport async function readParquetTableFromPath(\n filePath: string,\n options?: ParquetReadOptions,\n): Promise<Table> {\n const buffer = await fs.readFile(filePath);\n return readParquetTableFromBuffer(buffer, options);\n}\n\nexport async function readParquetMetadataFromBuffer(\n buffer: Uint8Array,\n): Promise<ParquetFileMetadata> {\n ensureWasmInitialized();\n const blobInput = new Uint8Array(buffer).buffer as ArrayBuffer;\n const file = await ParquetFile.fromFile(new BlobCtor([blobInput]));\n const meta = file.metadata();\n const fileMeta = meta.fileMetadata();\n const createdBy = fileMeta.createdBy();\n const keyValueMetadata = Object.fromEntries(fileMeta.keyValueMetadata());\n\n fileMeta.free();\n meta.free();\n file.free();\n\n return {\n createdBy: createdBy ?? undefined,\n keyValueMetadata: normalizeMetadataValues(keyValueMetadata),\n };\n}\n\nfunction normalizeMetadataValues(input: Record<string, unknown>): Record<string, string> {\n const normalized: Record<string, string> = {};\n\n for (const [key, value] of Object.entries(input)) {\n if (value === null || value === undefined) {\n normalized[key] = \"\";\n continue;\n }\n normalized[key] = typeof value === \"string\" ? value : String(value);\n }\n\n return normalized;\n}\n\nexport async function bufferStdinToTempFile(\n filenameHint = \"stdin.parquet\",\n): Promise<TempParquetFile> {\n const tempDir = await fs.mkdtemp(path.join(tmpdir(), \"parquetlens-\"));\n const safeName = filenameHint.replace(/[\\\\/]/g, \"_\");\n const filePath = path.join(tempDir, `${randomUUID()}-${safeName}`);\n const writeStream = createWriteStream(filePath);\n\n await pipeline(process.stdin, writeStream);\n\n return {\n path: filePath,\n cleanup: async () => {\n await fs.rm(tempDir, { recursive: true, force: true });\n },\n };\n}\n\nexport async function readParquetTableFromStdin(\n filenameHint = \"stdin.parquet\",\n options?: ParquetReadOptions,\n): Promise<Table> {\n const temp = await bufferStdinToTempFile(filenameHint);\n\n try {\n return await readParquetTableFromPath(temp.path, options);\n } finally {\n await temp.cleanup();\n }\n}\n"],"mappings":";;;;;;;;AAAA,SAAS,QAAQ,gBAAgB;AACjC,SAAS,mBAAmB,oBAAoB;AAChD,SAAS,YAAY,UAAU;AAC/B,SAAS,kBAAkB;AAC3B,SAAS,iBAAiB,yBAAyB;AACnD,SAAS,cAAc;AACvB,OAAO,UAAU;AACjB,SAAS,gBAAgB;AAEzB,SAAS,oBAA2B;AACpC,SAAS,UAAU,aAAa,mBAAuC;AAEvE,IAAM,WACJ,OAAO,SAAS,cAAe,WAAsC;AAEvE,IAAI,kBAAkB;AAEtB,SAAS,wBAA8B;AACrC,MAAI,gBAAiB;AAGrB,QAAM,eAAe,kBAAkB,YAAY,GAAG;AACtD,QAAM,WAAW,aAAa,QAAQ,uCAAuC;AAC7E,QAAM,YAAY,aAAa,QAAQ;AACvC,WAAS,EAAE,QAAQ,UAAU,CAAC;AAC9B,oBAAkB;AACpB;AAwBA,eAAsB,2BACpB,QACA,SACgB;AAChB,wBAAsB;AACtB,QAAM,YAAY,YAAY,QAAQ,WAAW,MAAS;AAC1D,QAAM,YAAY,UAAU,cAAc;AAC1C,SAAO,aAAa,SAAS;AAC/B;AAEO,SAAS,0BAA0B,QAAyC;AACjF,MAAI,kBAAuD;AAE3D,SAAO;IACL;IACA,YAAY,OAAO;IACnB,WAAW,CAAC,YAAiC,2BAA2B,QAAQ,OAAO;IACvF,cAAc,MAAM;AAClB,UAAI,CAAC,iBAAiB;AACpB,0BAAkB,8BAA8B,MAAM;MACxD;AACA,aAAO;IACT;EACF;AACF;AAEA,eAAsB,0BAA0B,UAAgD;AAC9F,QAAM,SAAS,MAAM,GAAG,SAAS,QAAQ;AACzC,SAAO,0BAA0B,MAAM;AACzC;AAEA,eAAsB,yBACpB,UACA,SACgB;AAChB,QAAM,SAAS,MAAM,GAAG,SAAS,QAAQ;AACzC,SAAO,2BAA2B,QAAQ,OAAO;AACnD;AAEA,eAAsB,8BACpB,QAC8B;AAC9B,wBAAsB;AACtB,QAAM,YAAY,IAAI,WAAW,MAAM,EAAE;AACzC,QAAM,OAAO,MAAM,YAAY,SAAS,IAAI,SAAS,CAAC,SAAS,CAAC,CAAC;AACjE,QAAM,OAAO,KAAK,SAAS;AAC3B,QAAM,WAAW,KAAK,aAAa;AACnC,QAAM,YAAY,SAAS,UAAU;AACrC,QAAM,mBAAmB,OAAO,YAAY,SAAS,iBAAiB,CAAC;AAEvE,WAAS,KAAK;AACd,OAAK,KAAK;AACV,OAAK,KAAK;AAEV,SAAO;IACL,WAAW,aAAa;IACxB,kBAAkB,wBAAwB,gBAAgB;EAC5D;AACF;AAEA,SAAS,wBAAwB,OAAwD;AACvF,QAAM,aAAqC,CAAC;AAE5C,aAAW,CAAC,KAAK,KAAK,KAAK,OAAO,QAAQ,KAAK,GAAG;AAChD,QAAI,UAAU,QAAQ,UAAU,QAAW;AACzC,iBAAW,GAAG,IAAI;AAClB;IACF;AACA,eAAW,GAAG,IAAI,OAAO,UAAU,WAAW,QAAQ,OAAO,KAAK;EACpE;AAEA,SAAO;AACT;AAEA,eAAsB,sBACpB,eAAe,iBACW;AAC1B,QAAM,UAAU,MAAM,GAAG,QAAQ,KAAK,KAAK,OAAO,GAAG,cAAc,CAAC;AACpE,QAAM,WAAW,aAAa,QAAQ,UAAU,GAAG;AACnD,QAAM,WAAW,KAAK,KAAK,SAAS,GAAG,WAAW,CAAC,IAAI,QAAQ,EAAE;AACjE,QAAM,cAAc,kBAAkB,QAAQ;AAE9C,QAAM,SAAS,QAAQ,OAAO,WAAW;AAEzC,SAAO;IACL,MAAM;IACN,SAAS,YAAY;AACnB,YAAM,GAAG,GAAG,SAAS,EAAE,WAAW,MAAM,OAAO,KAAK,CAAC;IACvD;EACF;AACF;AAEA,eAAsB,0BACpB,eAAe,iBACf,SACgB;AAChB,QAAM,OAAO,MAAM,sBAAsB,YAAY;AAErD,MAAI;AACF,WAAO,MAAM,yBAAyB,KAAK,MAAM,OAAO;EAC1D,UAAA;AACE,UAAM,KAAK,QAAQ;EACrB;AACF;","names":[]}
@@ -0,0 +1,112 @@
1
+ import { createRequire } from 'module';
2
+ import { fileURLToPath } from 'url';
3
+ import { dirname } from 'path';
4
+ const require = createRequire(import.meta.url);
5
+ const __filename = fileURLToPath(import.meta.url);
6
+ const __dirname = dirname(__filename);
7
+
8
+ // ../../packages/parquet-reader/dist/index.js
9
+ import { Blob as NodeBlob } from "buffer";
10
+ import { createWriteStream, readFileSync } from "fs";
11
+ import { promises as fs } from "fs";
12
+ import { randomUUID } from "crypto";
13
+ import { createRequire } from "module";
14
+ import { tmpdir } from "os";
15
+ import path from "path";
16
+ import { pipeline } from "stream/promises";
17
+ import { tableFromIPC } from "apache-arrow";
18
+ import { initSync, ParquetFile, readParquet } from "parquet-wasm/esm";
19
+ var BlobCtor = typeof Blob === "undefined" ? NodeBlob : Blob;
20
+ var wasmInitialized = false;
21
+ function ensureWasmInitialized() {
22
+ if (wasmInitialized) return;
23
+ const require2 = createRequire(import.meta.url);
24
+ const wasmPath = require2.resolve("parquet-wasm/esm/parquet_wasm_bg.wasm");
25
+ const wasmBytes = readFileSync(wasmPath);
26
+ initSync({ module: wasmBytes });
27
+ wasmInitialized = true;
28
+ }
29
+ async function readParquetTableFromBuffer(buffer, options) {
30
+ ensureWasmInitialized();
31
+ const wasmTable = readParquet(buffer, options ?? void 0);
32
+ const ipcStream = wasmTable.intoIPCStream();
33
+ return tableFromIPC(ipcStream);
34
+ }
35
+ function createParquetBufferSource(buffer) {
36
+ let metadataPromise = null;
37
+ return {
38
+ buffer,
39
+ byteLength: buffer.byteLength,
40
+ readTable: (options) => readParquetTableFromBuffer(buffer, options),
41
+ readMetadata: () => {
42
+ if (!metadataPromise) {
43
+ metadataPromise = readParquetMetadataFromBuffer(buffer);
44
+ }
45
+ return metadataPromise;
46
+ }
47
+ };
48
+ }
49
+ async function openParquetBufferFromPath(filePath) {
50
+ const buffer = await fs.readFile(filePath);
51
+ return createParquetBufferSource(buffer);
52
+ }
53
+ async function readParquetTableFromPath(filePath, options) {
54
+ const buffer = await fs.readFile(filePath);
55
+ return readParquetTableFromBuffer(buffer, options);
56
+ }
57
+ async function readParquetMetadataFromBuffer(buffer) {
58
+ ensureWasmInitialized();
59
+ const blobInput = new Uint8Array(buffer).buffer;
60
+ const file = await ParquetFile.fromFile(new BlobCtor([blobInput]));
61
+ const meta = file.metadata();
62
+ const fileMeta = meta.fileMetadata();
63
+ const createdBy = fileMeta.createdBy();
64
+ const keyValueMetadata = Object.fromEntries(fileMeta.keyValueMetadata());
65
+ fileMeta.free();
66
+ meta.free();
67
+ file.free();
68
+ return {
69
+ createdBy: createdBy ?? void 0,
70
+ keyValueMetadata: normalizeMetadataValues(keyValueMetadata)
71
+ };
72
+ }
73
+ function normalizeMetadataValues(input) {
74
+ const normalized = {};
75
+ for (const [key, value] of Object.entries(input)) {
76
+ if (value === null || value === void 0) {
77
+ normalized[key] = "";
78
+ continue;
79
+ }
80
+ normalized[key] = typeof value === "string" ? value : String(value);
81
+ }
82
+ return normalized;
83
+ }
84
+ async function bufferStdinToTempFile(filenameHint = "stdin.parquet") {
85
+ const tempDir = await fs.mkdtemp(path.join(tmpdir(), "parquetlens-"));
86
+ const safeName = filenameHint.replace(/[\\/]/g, "_");
87
+ const filePath = path.join(tempDir, `${randomUUID()}-${safeName}`);
88
+ const writeStream = createWriteStream(filePath);
89
+ await pipeline(process.stdin, writeStream);
90
+ return {
91
+ path: filePath,
92
+ cleanup: async () => {
93
+ await fs.rm(tempDir, { recursive: true, force: true });
94
+ }
95
+ };
96
+ }
97
+ async function readParquetTableFromStdin(filenameHint = "stdin.parquet", options) {
98
+ const temp = await bufferStdinToTempFile(filenameHint);
99
+ try {
100
+ return await readParquetTableFromPath(temp.path, options);
101
+ } finally {
102
+ await temp.cleanup();
103
+ }
104
+ }
105
+
106
+ export {
107
+ readParquetTableFromBuffer,
108
+ openParquetBufferFromPath,
109
+ readParquetTableFromPath,
110
+ readParquetTableFromStdin
111
+ };
112
+ //# sourceMappingURL=chunk-IMVXDI4K.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../../../packages/parquet-reader/src/index.ts"],"sourcesContent":["import { Blob as NodeBlob } from \"node:buffer\";\nimport { createWriteStream, readFileSync } from \"node:fs\";\nimport { promises as fs } from \"node:fs\";\nimport { randomUUID } from \"node:crypto\";\nimport { createRequire } from \"node:module\";\nimport { tmpdir } from \"node:os\";\nimport path from \"node:path\";\nimport { pipeline } from \"node:stream/promises\";\n\nimport { tableFromIPC, Table } from \"apache-arrow\";\nimport { initSync, ParquetFile, readParquet, type ReaderOptions } from \"parquet-wasm/esm\";\n\nconst BlobCtor: typeof Blob =\n typeof Blob === \"undefined\" ? (NodeBlob as unknown as typeof Blob) : Blob;\n\nlet wasmInitialized = false;\n\nfunction ensureWasmInitialized(): void {\n if (wasmInitialized) return;\n\n const require = createRequire(import.meta.url);\n const wasmPath = require.resolve(\"parquet-wasm/esm/parquet_wasm_bg.wasm\");\n const wasmBytes = readFileSync(wasmPath);\n initSync({ module: wasmBytes });\n wasmInitialized = true;\n}\n\nexport type TempParquetFile = {\n path: string;\n cleanup: () => Promise<void>;\n};\n\nexport type ParquetReadOptions = Pick<\n ReaderOptions,\n \"batchSize\" | \"columns\" | \"limit\" | \"offset\" | \"rowGroups\"\n>;\n\nexport type ParquetFileMetadata = {\n createdBy?: string;\n keyValueMetadata: Record<string, string>;\n};\n\nexport type ParquetBufferSource = {\n buffer: Uint8Array;\n byteLength: number;\n readTable: (options?: ParquetReadOptions) => Promise<Table>;\n readMetadata: () => Promise<ParquetFileMetadata>;\n};\n\nexport async function readParquetTableFromBuffer(\n buffer: Uint8Array,\n options?: ParquetReadOptions,\n): Promise<Table> {\n ensureWasmInitialized();\n const wasmTable = readParquet(buffer, options ?? undefined);\n const ipcStream = wasmTable.intoIPCStream();\n return tableFromIPC(ipcStream);\n}\n\nexport function createParquetBufferSource(buffer: Uint8Array): ParquetBufferSource {\n let metadataPromise: Promise<ParquetFileMetadata> | null = null;\n\n return {\n buffer,\n byteLength: buffer.byteLength,\n readTable: (options?: ParquetReadOptions) => readParquetTableFromBuffer(buffer, options),\n readMetadata: () => {\n if (!metadataPromise) {\n metadataPromise = readParquetMetadataFromBuffer(buffer);\n }\n return metadataPromise;\n },\n };\n}\n\nexport async function openParquetBufferFromPath(filePath: string): Promise<ParquetBufferSource> {\n const buffer = await fs.readFile(filePath);\n return createParquetBufferSource(buffer);\n}\n\nexport async function readParquetTableFromPath(\n filePath: string,\n options?: ParquetReadOptions,\n): Promise<Table> {\n const buffer = await fs.readFile(filePath);\n return readParquetTableFromBuffer(buffer, options);\n}\n\nexport async function readParquetMetadataFromBuffer(\n buffer: Uint8Array,\n): Promise<ParquetFileMetadata> {\n ensureWasmInitialized();\n const blobInput = new Uint8Array(buffer).buffer as ArrayBuffer;\n const file = await ParquetFile.fromFile(new BlobCtor([blobInput]));\n const meta = file.metadata();\n const fileMeta = meta.fileMetadata();\n const createdBy = fileMeta.createdBy();\n const keyValueMetadata = Object.fromEntries(fileMeta.keyValueMetadata());\n\n fileMeta.free();\n meta.free();\n file.free();\n\n return {\n createdBy: createdBy ?? undefined,\n keyValueMetadata: normalizeMetadataValues(keyValueMetadata),\n };\n}\n\nfunction normalizeMetadataValues(input: Record<string, unknown>): Record<string, string> {\n const normalized: Record<string, string> = {};\n\n for (const [key, value] of Object.entries(input)) {\n if (value === null || value === undefined) {\n normalized[key] = \"\";\n continue;\n }\n normalized[key] = typeof value === \"string\" ? value : String(value);\n }\n\n return normalized;\n}\n\nexport async function bufferStdinToTempFile(\n filenameHint = \"stdin.parquet\",\n): Promise<TempParquetFile> {\n const tempDir = await fs.mkdtemp(path.join(tmpdir(), \"parquetlens-\"));\n const safeName = filenameHint.replace(/[\\\\/]/g, \"_\");\n const filePath = path.join(tempDir, `${randomUUID()}-${safeName}`);\n const writeStream = createWriteStream(filePath);\n\n await pipeline(process.stdin, writeStream);\n\n return {\n path: filePath,\n cleanup: async () => {\n await fs.rm(tempDir, { recursive: true, force: true });\n },\n };\n}\n\nexport async function readParquetTableFromStdin(\n filenameHint = \"stdin.parquet\",\n options?: ParquetReadOptions,\n): Promise<Table> {\n const temp = await bufferStdinToTempFile(filenameHint);\n\n try {\n return await readParquetTableFromPath(temp.path, options);\n } finally {\n await temp.cleanup();\n }\n}\n"],"mappings":";;;;;;;;AAAA,SAAS,QAAQ,gBAAgB;AACjC,SAAS,mBAAmB,oBAAoB;AAChD,SAAS,YAAY,UAAU;AAC/B,SAAS,kBAAkB;AAC3B,SAAS,qBAAqB;AAC9B,SAAS,cAAc;AACvB,OAAO,UAAU;AACjB,SAAS,gBAAgB;AAEzB,SAAS,oBAA2B;AACpC,SAAS,UAAU,aAAa,mBAAuC;AAEvE,IAAM,WACJ,OAAO,SAAS,cAAe,WAAsC;AAEvE,IAAI,kBAAkB;AAEtB,SAAS,wBAA8B;AACrC,MAAI,gBAAiB;AAErB,QAAMA,WAAU,cAAc,YAAY,GAAG;AAC7C,QAAM,WAAWA,SAAQ,QAAQ,uCAAuC;AACxE,QAAM,YAAY,aAAa,QAAQ;AACvC,WAAS,EAAE,QAAQ,UAAU,CAAC;AAC9B,oBAAkB;AACpB;AAwBA,eAAsB,2BACpB,QACA,SACgB;AAChB,wBAAsB;AACtB,QAAM,YAAY,YAAY,QAAQ,WAAW,MAAS;AAC1D,QAAM,YAAY,UAAU,cAAc;AAC1C,SAAO,aAAa,SAAS;AAC/B;AAEO,SAAS,0BAA0B,QAAyC;AACjF,MAAI,kBAAuD;AAE3D,SAAO;IACL;IACA,YAAY,OAAO;IACnB,WAAW,CAAC,YAAiC,2BAA2B,QAAQ,OAAO;IACvF,cAAc,MAAM;AAClB,UAAI,CAAC,iBAAiB;AACpB,0BAAkB,8BAA8B,MAAM;MACxD;AACA,aAAO;IACT;EACF;AACF;AAEA,eAAsB,0BAA0B,UAAgD;AAC9F,QAAM,SAAS,MAAM,GAAG,SAAS,QAAQ;AACzC,SAAO,0BAA0B,MAAM;AACzC;AAEA,eAAsB,yBACpB,UACA,SACgB;AAChB,QAAM,SAAS,MAAM,GAAG,SAAS,QAAQ;AACzC,SAAO,2BAA2B,QAAQ,OAAO;AACnD;AAEA,eAAsB,8BACpB,QAC8B;AAC9B,wBAAsB;AACtB,QAAM,YAAY,IAAI,WAAW,MAAM,EAAE;AACzC,QAAM,OAAO,MAAM,YAAY,SAAS,IAAI,SAAS,CAAC,SAAS,CAAC,CAAC;AACjE,QAAM,OAAO,KAAK,SAAS;AAC3B,QAAM,WAAW,KAAK,aAAa;AACnC,QAAM,YAAY,SAAS,UAAU;AACrC,QAAM,mBAAmB,OAAO,YAAY,SAAS,iBAAiB,CAAC;AAEvE,WAAS,KAAK;AACd,OAAK,KAAK;AACV,OAAK,KAAK;AAEV,SAAO;IACL,WAAW,aAAa;IACxB,kBAAkB,wBAAwB,gBAAgB;EAC5D;AACF;AAEA,SAAS,wBAAwB,OAAwD;AACvF,QAAM,aAAqC,CAAC;AAE5C,aAAW,CAAC,KAAK,KAAK,KAAK,OAAO,QAAQ,KAAK,GAAG;AAChD,QAAI,UAAU,QAAQ,UAAU,QAAW;AACzC,iBAAW,GAAG,IAAI;AAClB;IACF;AACA,eAAW,GAAG,IAAI,OAAO,UAAU,WAAW,QAAQ,OAAO,KAAK;EACpE;AAEA,SAAO;AACT;AAEA,eAAsB,sBACpB,eAAe,iBACW;AAC1B,QAAM,UAAU,MAAM,GAAG,QAAQ,KAAK,KAAK,OAAO,GAAG,cAAc,CAAC;AACpE,QAAM,WAAW,aAAa,QAAQ,UAAU,GAAG;AACnD,QAAM,WAAW,KAAK,KAAK,SAAS,GAAG,WAAW,CAAC,IAAI,QAAQ,EAAE;AACjE,QAAM,cAAc,kBAAkB,QAAQ;AAE9C,QAAM,SAAS,QAAQ,OAAO,WAAW;AAEzC,SAAO;IACL,MAAM;IACN,SAAS,YAAY;AACnB,YAAM,GAAG,GAAG,SAAS,EAAE,WAAW,MAAM,OAAO,KAAK,CAAC;IACvD;EACF;AACF;AAEA,eAAsB,0BACpB,eAAe,iBACf,SACgB;AAChB,QAAM,OAAO,MAAM,sBAAsB,YAAY;AAErD,MAAI;AACF,WAAO,MAAM,yBAAyB,KAAK,MAAM,OAAO;EAC1D,UAAA;AACE,UAAM,KAAK,QAAQ;EACrB;AACF;","names":["require"]}