parquetlens 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "parquetlens",
3
- "version": "0.2.0",
3
+ "version": "0.3.0",
4
4
  "description": "A fast, interactive TUI for viewing Parquet files",
5
5
  "type": "module",
6
6
  "license": "MIT",
@@ -35,6 +35,7 @@
35
35
  "@opentui/core": "^0.1.74",
36
36
  "@opentui/react": "^0.1.74",
37
37
  "apache-arrow": "^17.0.0",
38
+ "cli-table3": "^0.6.5",
38
39
  "react": "^19.2.3"
39
40
  },
40
41
  "devDependencies": {
@@ -1,123 +0,0 @@
1
- import { createRequire } from 'module';
2
- import { fileURLToPath } from 'url';
3
- import { dirname } from 'path';
4
- const require = createRequire(import.meta.url);
5
- const __filename = fileURLToPath(import.meta.url);
6
- const __dirname = dirname(__filename);
7
-
8
- // ../../packages/parquet-reader/dist/index.js
9
- import { Blob as NodeBlob } from "buffer";
10
- import { createWriteStream, readFileSync } from "fs";
11
- import { promises as fs } from "fs";
12
- import { randomUUID } from "crypto";
13
- import { tmpdir } from "os";
14
- import path from "path";
15
- import { pipeline } from "stream/promises";
16
- import { tableFromIPC } from "apache-arrow";
17
- import { initSync, ParquetFile, readParquet } from "parquet-wasm/esm";
18
- var BlobCtor = typeof Blob === "undefined" ? NodeBlob : Blob;
19
- var wasmInitialized = false;
20
- function findWasmFile(startDir) {
21
- let dir = startDir;
22
- while (dir !== path.dirname(dir)) {
23
- const wasmPath = path.join(dir, "node_modules", "parquet-wasm", "esm", "parquet_wasm_bg.wasm");
24
- try {
25
- readFileSync(wasmPath, { flag: "r" });
26
- return wasmPath;
27
- } catch {
28
- dir = path.dirname(dir);
29
- }
30
- }
31
- throw new Error("Could not find parquet-wasm WASM file");
32
- }
33
- function ensureWasmInitialized() {
34
- if (wasmInitialized) return;
35
- const wasmPath = findWasmFile(process.cwd());
36
- const wasmBytes = readFileSync(wasmPath);
37
- initSync({ module: wasmBytes });
38
- wasmInitialized = true;
39
- }
40
- async function readParquetTableFromBuffer(buffer, options) {
41
- ensureWasmInitialized();
42
- const wasmTable = readParquet(buffer, options ?? void 0);
43
- const ipcStream = wasmTable.intoIPCStream();
44
- return tableFromIPC(ipcStream);
45
- }
46
- function createParquetBufferSource(buffer) {
47
- let metadataPromise = null;
48
- return {
49
- buffer,
50
- byteLength: buffer.byteLength,
51
- readTable: (options) => readParquetTableFromBuffer(buffer, options),
52
- readMetadata: () => {
53
- if (!metadataPromise) {
54
- metadataPromise = readParquetMetadataFromBuffer(buffer);
55
- }
56
- return metadataPromise;
57
- }
58
- };
59
- }
60
- async function openParquetBufferFromPath(filePath) {
61
- const buffer = await fs.readFile(filePath);
62
- return createParquetBufferSource(buffer);
63
- }
64
- async function readParquetTableFromPath(filePath, options) {
65
- const buffer = await fs.readFile(filePath);
66
- return readParquetTableFromBuffer(buffer, options);
67
- }
68
- async function readParquetMetadataFromBuffer(buffer) {
69
- ensureWasmInitialized();
70
- const blobInput = new Uint8Array(buffer).buffer;
71
- const file = await ParquetFile.fromFile(new BlobCtor([blobInput]));
72
- const meta = file.metadata();
73
- const fileMeta = meta.fileMetadata();
74
- const createdBy = fileMeta.createdBy();
75
- const keyValueMetadata = Object.fromEntries(fileMeta.keyValueMetadata());
76
- fileMeta.free();
77
- meta.free();
78
- file.free();
79
- return {
80
- createdBy: createdBy ?? void 0,
81
- keyValueMetadata: normalizeMetadataValues(keyValueMetadata)
82
- };
83
- }
84
- function normalizeMetadataValues(input) {
85
- const normalized = {};
86
- for (const [key, value] of Object.entries(input)) {
87
- if (value === null || value === void 0) {
88
- normalized[key] = "";
89
- continue;
90
- }
91
- normalized[key] = typeof value === "string" ? value : String(value);
92
- }
93
- return normalized;
94
- }
95
- async function bufferStdinToTempFile(filenameHint = "stdin.parquet") {
96
- const tempDir = await fs.mkdtemp(path.join(tmpdir(), "parquetlens-"));
97
- const safeName = filenameHint.replace(/[\\/]/g, "_");
98
- const filePath = path.join(tempDir, `${randomUUID()}-${safeName}`);
99
- const writeStream = createWriteStream(filePath);
100
- await pipeline(process.stdin, writeStream);
101
- return {
102
- path: filePath,
103
- cleanup: async () => {
104
- await fs.rm(tempDir, { recursive: true, force: true });
105
- }
106
- };
107
- }
108
- async function readParquetTableFromStdin(filenameHint = "stdin.parquet", options) {
109
- const temp = await bufferStdinToTempFile(filenameHint);
110
- try {
111
- return await readParquetTableFromPath(temp.path, options);
112
- } finally {
113
- await temp.cleanup();
114
- }
115
- }
116
-
117
- export {
118
- readParquetTableFromBuffer,
119
- openParquetBufferFromPath,
120
- readParquetTableFromPath,
121
- readParquetTableFromStdin
122
- };
123
- //# sourceMappingURL=chunk-2RGMZZ7F.js.map
@@ -1 +0,0 @@
1
- {"version":3,"sources":["../../../packages/parquet-reader/src/index.ts"],"sourcesContent":["import { Blob as NodeBlob } from \"node:buffer\";\nimport { createWriteStream, readFileSync } from \"node:fs\";\nimport { promises as fs } from \"node:fs\";\nimport { randomUUID } from \"node:crypto\";\nimport { tmpdir } from \"node:os\";\nimport path from \"node:path\";\nimport { pipeline } from \"node:stream/promises\";\n\nimport { tableFromIPC, Table } from \"apache-arrow\";\nimport { initSync, ParquetFile, readParquet, type ReaderOptions } from \"parquet-wasm/esm\";\n\nconst BlobCtor: typeof Blob =\n typeof Blob === \"undefined\" ? (NodeBlob as unknown as typeof Blob) : Blob;\n\nlet wasmInitialized = false;\n\nfunction findWasmFile(startDir: string): string {\n let dir = startDir;\n while (dir !== path.dirname(dir)) {\n const wasmPath = path.join(dir, \"node_modules\", \"parquet-wasm\", \"esm\", \"parquet_wasm_bg.wasm\");\n try {\n readFileSync(wasmPath, { flag: \"r\" });\n return wasmPath;\n } catch {\n dir = path.dirname(dir);\n }\n }\n throw new Error(\"Could not find parquet-wasm WASM file\");\n}\n\nfunction ensureWasmInitialized(): void {\n if (wasmInitialized) return;\n\n // Use process.cwd() as starting point to find node_modules\n const wasmPath = findWasmFile(process.cwd());\n const wasmBytes = readFileSync(wasmPath);\n initSync({ module: wasmBytes });\n wasmInitialized = true;\n}\n\nexport type TempParquetFile = {\n path: string;\n cleanup: () => Promise<void>;\n};\n\nexport type ParquetReadOptions = Pick<\n ReaderOptions,\n \"batchSize\" | \"columns\" | \"limit\" | \"offset\" | \"rowGroups\"\n>;\n\nexport type ParquetFileMetadata = {\n createdBy?: string;\n keyValueMetadata: Record<string, string>;\n};\n\nexport type ParquetBufferSource = {\n buffer: Uint8Array;\n byteLength: number;\n readTable: (options?: ParquetReadOptions) => Promise<Table>;\n readMetadata: () => Promise<ParquetFileMetadata>;\n};\n\nexport async function readParquetTableFromBuffer(\n buffer: Uint8Array,\n options?: ParquetReadOptions,\n): Promise<Table> {\n ensureWasmInitialized();\n const wasmTable = readParquet(buffer, options ?? undefined);\n const ipcStream = wasmTable.intoIPCStream();\n return tableFromIPC(ipcStream);\n}\n\nexport function createParquetBufferSource(buffer: Uint8Array): ParquetBufferSource {\n let metadataPromise: Promise<ParquetFileMetadata> | null = null;\n\n return {\n buffer,\n byteLength: buffer.byteLength,\n readTable: (options?: ParquetReadOptions) => readParquetTableFromBuffer(buffer, options),\n readMetadata: () => {\n if (!metadataPromise) {\n metadataPromise = readParquetMetadataFromBuffer(buffer);\n }\n return metadataPromise;\n },\n };\n}\n\nexport async function openParquetBufferFromPath(filePath: string): Promise<ParquetBufferSource> {\n const buffer = await fs.readFile(filePath);\n return createParquetBufferSource(buffer);\n}\n\nexport async function readParquetTableFromPath(\n filePath: string,\n options?: ParquetReadOptions,\n): Promise<Table> {\n const buffer = await fs.readFile(filePath);\n return readParquetTableFromBuffer(buffer, options);\n}\n\nexport async function readParquetMetadataFromBuffer(\n buffer: Uint8Array,\n): Promise<ParquetFileMetadata> {\n ensureWasmInitialized();\n const blobInput = new Uint8Array(buffer).buffer as ArrayBuffer;\n const file = await ParquetFile.fromFile(new BlobCtor([blobInput]));\n const meta = file.metadata();\n const fileMeta = meta.fileMetadata();\n const createdBy = fileMeta.createdBy();\n const keyValueMetadata = Object.fromEntries(fileMeta.keyValueMetadata());\n\n fileMeta.free();\n meta.free();\n file.free();\n\n return {\n createdBy: createdBy ?? undefined,\n keyValueMetadata: normalizeMetadataValues(keyValueMetadata),\n };\n}\n\nfunction normalizeMetadataValues(input: Record<string, unknown>): Record<string, string> {\n const normalized: Record<string, string> = {};\n\n for (const [key, value] of Object.entries(input)) {\n if (value === null || value === undefined) {\n normalized[key] = \"\";\n continue;\n }\n normalized[key] = typeof value === \"string\" ? value : String(value);\n }\n\n return normalized;\n}\n\nexport async function bufferStdinToTempFile(\n filenameHint = \"stdin.parquet\",\n): Promise<TempParquetFile> {\n const tempDir = await fs.mkdtemp(path.join(tmpdir(), \"parquetlens-\"));\n const safeName = filenameHint.replace(/[\\\\/]/g, \"_\");\n const filePath = path.join(tempDir, `${randomUUID()}-${safeName}`);\n const writeStream = createWriteStream(filePath);\n\n await pipeline(process.stdin, writeStream);\n\n return {\n path: filePath,\n cleanup: async () => {\n await fs.rm(tempDir, { recursive: true, force: true });\n },\n };\n}\n\nexport async function readParquetTableFromStdin(\n filenameHint = \"stdin.parquet\",\n options?: ParquetReadOptions,\n): Promise<Table> {\n const temp = await bufferStdinToTempFile(filenameHint);\n\n try {\n return await readParquetTableFromPath(temp.path, options);\n } finally {\n await temp.cleanup();\n }\n}\n"],"mappings":";;;;;;;;AAAA,SAAS,QAAQ,gBAAgB;AACjC,SAAS,mBAAmB,oBAAoB;AAChD,SAAS,YAAY,UAAU;AAC/B,SAAS,kBAAkB;AAC3B,SAAS,cAAc;AACvB,OAAO,UAAU;AACjB,SAAS,gBAAgB;AAEzB,SAAS,oBAA2B;AACpC,SAAS,UAAU,aAAa,mBAAuC;AAEvE,IAAM,WACJ,OAAO,SAAS,cAAe,WAAsC;AAEvE,IAAI,kBAAkB;AAEtB,SAAS,aAAa,UAA0B;AAC9C,MAAI,MAAM;AACV,SAAO,QAAQ,KAAK,QAAQ,GAAG,GAAG;AAChC,UAAM,WAAW,KAAK,KAAK,KAAK,gBAAgB,gBAAgB,OAAO,sBAAsB;AAC7F,QAAI;AACF,mBAAa,UAAU,EAAE,MAAM,IAAI,CAAC;AACpC,aAAO;IACT,QAAQ;AACN,YAAM,KAAK,QAAQ,GAAG;IACxB;EACF;AACA,QAAM,IAAI,MAAM,uCAAuC;AACzD;AAEA,SAAS,wBAA8B;AACrC,MAAI,gBAAiB;AAGrB,QAAM,WAAW,aAAa,QAAQ,IAAI,CAAC;AAC3C,QAAM,YAAY,aAAa,QAAQ;AACvC,WAAS,EAAE,QAAQ,UAAU,CAAC;AAC9B,oBAAkB;AACpB;AAwBA,eAAsB,2BACpB,QACA,SACgB;AAChB,wBAAsB;AACtB,QAAM,YAAY,YAAY,QAAQ,WAAW,MAAS;AAC1D,QAAM,YAAY,UAAU,cAAc;AAC1C,SAAO,aAAa,SAAS;AAC/B;AAEO,SAAS,0BAA0B,QAAyC;AACjF,MAAI,kBAAuD;AAE3D,SAAO;IACL;IACA,YAAY,OAAO;IACnB,WAAW,CAAC,YAAiC,2BAA2B,QAAQ,OAAO;IACvF,cAAc,MAAM;AAClB,UAAI,CAAC,iBAAiB;AACpB,0BAAkB,8BAA8B,MAAM;MACxD;AACA,aAAO;IACT;EACF;AACF;AAEA,eAAsB,0BAA0B,UAAgD;AAC9F,QAAM,SAAS,MAAM,GAAG,SAAS,QAAQ;AACzC,SAAO,0BAA0B,MAAM;AACzC;AAEA,eAAsB,yBACpB,UACA,SACgB;AAChB,QAAM,SAAS,MAAM,GAAG,SAAS,QAAQ;AACzC,SAAO,2BAA2B,QAAQ,OAAO;AACnD;AAEA,eAAsB,8BACpB,QAC8B;AAC9B,wBAAsB;AACtB,QAAM,YAAY,IAAI,WAAW,MAAM,EAAE;AACzC,QAAM,OAAO,MAAM,YAAY,SAAS,IAAI,SAAS,CAAC,SAAS,CAAC,CAAC;AACjE,QAAM,OAAO,KAAK,SAAS;AAC3B,QAAM,WAAW,KAAK,aAAa;AACnC,QAAM,YAAY,SAAS,UAAU;AACrC,QAAM,mBAAmB,OAAO,YAAY,SAAS,iBAAiB,CAAC;AAEvE,WAAS,KAAK;AACd,OAAK,KAAK;AACV,OAAK,KAAK;AAEV,SAAO;IACL,WAAW,aAAa;IACxB,kBAAkB,wBAAwB,gBAAgB;EAC5D;AACF;AAEA,SAAS,wBAAwB,OAAwD;AACvF,QAAM,aAAqC,CAAC;AAE5C,aAAW,CAAC,KAAK,KAAK,KAAK,OAAO,QAAQ,KAAK,GAAG;AAChD,QAAI,UAAU,QAAQ,UAAU,QAAW;AACzC,iBAAW,GAAG,IAAI;AAClB;IACF;AACA,eAAW,GAAG,IAAI,OAAO,UAAU,WAAW,QAAQ,OAAO,KAAK;EACpE;AAEA,SAAO;AACT;AAEA,eAAsB,sBACpB,eAAe,iBACW;AAC1B,QAAM,UAAU,MAAM,GAAG,QAAQ,KAAK,KAAK,OAAO,GAAG,cAAc,CAAC;AACpE,QAAM,WAAW,aAAa,QAAQ,UAAU,GAAG;AACnD,QAAM,WAAW,KAAK,KAAK,SAAS,GAAG,WAAW,CAAC,IAAI,QAAQ,EAAE;AACjE,QAAM,cAAc,kBAAkB,QAAQ;AAE9C,QAAM,SAAS,QAAQ,OAAO,WAAW;AAEzC,SAAO;IACL,MAAM;IACN,SAAS,YAAY;AACnB,YAAM,GAAG,GAAG,SAAS,EAAE,WAAW,MAAM,OAAO,KAAK,CAAC;IACvD;EACF;AACF;AAEA,eAAsB,0BACpB,eAAe,iBACf,SACgB;AAChB,QAAM,OAAO,MAAM,sBAAsB,YAAY;AAErD,MAAI;AACF,WAAO,MAAM,yBAAyB,KAAK,MAAM,OAAO;EAC1D,UAAA;AACE,UAAM,KAAK,QAAQ;EACrB;AACF;","names":[]}
@@ -1,113 +0,0 @@
1
- import { createRequire } from 'module';
2
- import { fileURLToPath } from 'url';
3
- import { dirname } from 'path';
4
- const require = createRequire(import.meta.url);
5
- const __filename = fileURLToPath(import.meta.url);
6
- const __dirname = dirname(__filename);
7
-
8
- // ../../packages/parquet-reader/dist/index.js
9
- import { Blob as NodeBlob } from "buffer";
10
- import { createWriteStream } from "fs";
11
- import { promises as fs } from "fs";
12
- import { randomUUID } from "crypto";
13
- import { tmpdir } from "os";
14
- import path from "path";
15
- import { pipeline } from "stream/promises";
16
- import { tableFromIPC } from "apache-arrow";
17
- import initWasm, { ParquetFile, readParquet } from "parquet-wasm/esm";
18
- var BlobCtor = typeof Blob === "undefined" ? NodeBlob : Blob;
19
- var wasmInitialized = false;
20
- var wasmInitPromise = null;
21
- async function ensureWasmInitialized() {
22
- if (wasmInitialized) return;
23
- if (!wasmInitPromise) {
24
- wasmInitPromise = initWasm().then(() => {
25
- wasmInitialized = true;
26
- });
27
- }
28
- return wasmInitPromise;
29
- }
30
- async function readParquetTableFromBuffer(buffer, options) {
31
- await ensureWasmInitialized();
32
- const wasmTable = readParquet(buffer, options ?? void 0);
33
- const ipcStream = wasmTable.intoIPCStream();
34
- return tableFromIPC(ipcStream);
35
- }
36
- function createParquetBufferSource(buffer) {
37
- let metadataPromise = null;
38
- return {
39
- buffer,
40
- byteLength: buffer.byteLength,
41
- readTable: (options) => readParquetTableFromBuffer(buffer, options),
42
- readMetadata: () => {
43
- if (!metadataPromise) {
44
- metadataPromise = readParquetMetadataFromBuffer(buffer);
45
- }
46
- return metadataPromise;
47
- }
48
- };
49
- }
50
- async function openParquetBufferFromPath(filePath) {
51
- const buffer = await fs.readFile(filePath);
52
- return createParquetBufferSource(buffer);
53
- }
54
- async function readParquetTableFromPath(filePath, options) {
55
- const buffer = await fs.readFile(filePath);
56
- return readParquetTableFromBuffer(buffer, options);
57
- }
58
- async function readParquetMetadataFromBuffer(buffer) {
59
- await ensureWasmInitialized();
60
- const blobInput = new Uint8Array(buffer).buffer;
61
- const file = await ParquetFile.fromFile(new BlobCtor([blobInput]));
62
- const meta = file.metadata();
63
- const fileMeta = meta.fileMetadata();
64
- const createdBy = fileMeta.createdBy();
65
- const keyValueMetadata = Object.fromEntries(fileMeta.keyValueMetadata());
66
- fileMeta.free();
67
- meta.free();
68
- file.free();
69
- return {
70
- createdBy: createdBy ?? void 0,
71
- keyValueMetadata: normalizeMetadataValues(keyValueMetadata)
72
- };
73
- }
74
- function normalizeMetadataValues(input) {
75
- const normalized = {};
76
- for (const [key, value] of Object.entries(input)) {
77
- if (value === null || value === void 0) {
78
- normalized[key] = "";
79
- continue;
80
- }
81
- normalized[key] = typeof value === "string" ? value : String(value);
82
- }
83
- return normalized;
84
- }
85
- async function bufferStdinToTempFile(filenameHint = "stdin.parquet") {
86
- const tempDir = await fs.mkdtemp(path.join(tmpdir(), "parquetlens-"));
87
- const safeName = filenameHint.replace(/[\\/]/g, "_");
88
- const filePath = path.join(tempDir, `${randomUUID()}-${safeName}`);
89
- const writeStream = createWriteStream(filePath);
90
- await pipeline(process.stdin, writeStream);
91
- return {
92
- path: filePath,
93
- cleanup: async () => {
94
- await fs.rm(tempDir, { recursive: true, force: true });
95
- }
96
- };
97
- }
98
- async function readParquetTableFromStdin(filenameHint = "stdin.parquet", options) {
99
- const temp = await bufferStdinToTempFile(filenameHint);
100
- try {
101
- return await readParquetTableFromPath(temp.path, options);
102
- } finally {
103
- await temp.cleanup();
104
- }
105
- }
106
-
107
- export {
108
- readParquetTableFromBuffer,
109
- openParquetBufferFromPath,
110
- readParquetTableFromPath,
111
- readParquetTableFromStdin
112
- };
113
- //# sourceMappingURL=chunk-3N45GGD2.js.map
@@ -1 +0,0 @@
1
- {"version":3,"sources":["../../../packages/parquet-reader/src/index.ts"],"sourcesContent":["import { Blob as NodeBlob } from \"node:buffer\";\nimport { createWriteStream } from \"node:fs\";\nimport { promises as fs } from \"node:fs\";\nimport { randomUUID } from \"node:crypto\";\nimport { tmpdir } from \"node:os\";\nimport path from \"node:path\";\nimport { pipeline } from \"node:stream/promises\";\n\nimport { tableFromIPC, Table } from \"apache-arrow\";\nimport initWasm, { ParquetFile, readParquet, type ReaderOptions } from \"parquet-wasm/esm\";\n\nconst BlobCtor: typeof Blob =\n typeof Blob === \"undefined\" ? (NodeBlob as unknown as typeof Blob) : Blob;\n\nlet wasmInitialized = false;\nlet wasmInitPromise: Promise<void> | null = null;\n\nasync function ensureWasmInitialized(): Promise<void> {\n if (wasmInitialized) return;\n if (!wasmInitPromise) {\n wasmInitPromise = initWasm().then(() => {\n wasmInitialized = true;\n });\n }\n return wasmInitPromise;\n}\n\nexport type TempParquetFile = {\n path: string;\n cleanup: () => Promise<void>;\n};\n\nexport type ParquetReadOptions = Pick<\n ReaderOptions,\n \"batchSize\" | \"columns\" | \"limit\" | \"offset\" | \"rowGroups\"\n>;\n\nexport type ParquetFileMetadata = {\n createdBy?: string;\n keyValueMetadata: Record<string, string>;\n};\n\nexport type ParquetBufferSource = {\n buffer: Uint8Array;\n byteLength: number;\n readTable: (options?: ParquetReadOptions) => Promise<Table>;\n readMetadata: () => Promise<ParquetFileMetadata>;\n};\n\nexport async function readParquetTableFromBuffer(\n buffer: Uint8Array,\n options?: ParquetReadOptions,\n): Promise<Table> {\n await ensureWasmInitialized();\n const wasmTable = readParquet(buffer, options ?? undefined);\n const ipcStream = wasmTable.intoIPCStream();\n return tableFromIPC(ipcStream);\n}\n\nexport function createParquetBufferSource(buffer: Uint8Array): ParquetBufferSource {\n let metadataPromise: Promise<ParquetFileMetadata> | null = null;\n\n return {\n buffer,\n byteLength: buffer.byteLength,\n readTable: (options?: ParquetReadOptions) => readParquetTableFromBuffer(buffer, options),\n readMetadata: () => {\n if (!metadataPromise) {\n metadataPromise = readParquetMetadataFromBuffer(buffer);\n }\n return metadataPromise;\n },\n };\n}\n\nexport async function openParquetBufferFromPath(filePath: string): Promise<ParquetBufferSource> {\n const buffer = await fs.readFile(filePath);\n return createParquetBufferSource(buffer);\n}\n\nexport async function readParquetTableFromPath(\n filePath: string,\n options?: ParquetReadOptions,\n): Promise<Table> {\n const buffer = await fs.readFile(filePath);\n return readParquetTableFromBuffer(buffer, options);\n}\n\nexport async function readParquetMetadataFromBuffer(\n buffer: Uint8Array,\n): Promise<ParquetFileMetadata> {\n await ensureWasmInitialized();\n const blobInput = new Uint8Array(buffer).buffer as ArrayBuffer;\n const file = await ParquetFile.fromFile(new BlobCtor([blobInput]));\n const meta = file.metadata();\n const fileMeta = meta.fileMetadata();\n const createdBy = fileMeta.createdBy();\n const keyValueMetadata = Object.fromEntries(fileMeta.keyValueMetadata());\n\n fileMeta.free();\n meta.free();\n file.free();\n\n return {\n createdBy: createdBy ?? undefined,\n keyValueMetadata: normalizeMetadataValues(keyValueMetadata),\n };\n}\n\nfunction normalizeMetadataValues(input: Record<string, unknown>): Record<string, string> {\n const normalized: Record<string, string> = {};\n\n for (const [key, value] of Object.entries(input)) {\n if (value === null || value === undefined) {\n normalized[key] = \"\";\n continue;\n }\n normalized[key] = typeof value === \"string\" ? value : String(value);\n }\n\n return normalized;\n}\n\nexport async function bufferStdinToTempFile(\n filenameHint = \"stdin.parquet\",\n): Promise<TempParquetFile> {\n const tempDir = await fs.mkdtemp(path.join(tmpdir(), \"parquetlens-\"));\n const safeName = filenameHint.replace(/[\\\\/]/g, \"_\");\n const filePath = path.join(tempDir, `${randomUUID()}-${safeName}`);\n const writeStream = createWriteStream(filePath);\n\n await pipeline(process.stdin, writeStream);\n\n return {\n path: filePath,\n cleanup: async () => {\n await fs.rm(tempDir, { recursive: true, force: true });\n },\n };\n}\n\nexport async function readParquetTableFromStdin(\n filenameHint = \"stdin.parquet\",\n options?: ParquetReadOptions,\n): Promise<Table> {\n const temp = await bufferStdinToTempFile(filenameHint);\n\n try {\n return await readParquetTableFromPath(temp.path, options);\n } finally {\n await temp.cleanup();\n }\n}\n"],"mappings":";;;;;;;;AAAA,SAAS,QAAQ,gBAAgB;AACjC,SAAS,yBAAyB;AAClC,SAAS,YAAY,UAAU;AAC/B,SAAS,kBAAkB;AAC3B,SAAS,cAAc;AACvB,OAAO,UAAU;AACjB,SAAS,gBAAgB;AAEzB,SAAS,oBAA2B;AACpC,OAAO,YAAY,aAAa,mBAAuC;AAEvE,IAAM,WACJ,OAAO,SAAS,cAAe,WAAsC;AAEvE,IAAI,kBAAkB;AACtB,IAAI,kBAAwC;AAE5C,eAAe,wBAAuC;AACpD,MAAI,gBAAiB;AACrB,MAAI,CAAC,iBAAiB;AACpB,sBAAkB,SAAS,EAAE,KAAK,MAAM;AACtC,wBAAkB;IACpB,CAAC;EACH;AACA,SAAO;AACT;AAwBA,eAAsB,2BACpB,QACA,SACgB;AAChB,QAAM,sBAAsB;AAC5B,QAAM,YAAY,YAAY,QAAQ,WAAW,MAAS;AAC1D,QAAM,YAAY,UAAU,cAAc;AAC1C,SAAO,aAAa,SAAS;AAC/B;AAEO,SAAS,0BAA0B,QAAyC;AACjF,MAAI,kBAAuD;AAE3D,SAAO;IACL;IACA,YAAY,OAAO;IACnB,WAAW,CAAC,YAAiC,2BAA2B,QAAQ,OAAO;IACvF,cAAc,MAAM;AAClB,UAAI,CAAC,iBAAiB;AACpB,0BAAkB,8BAA8B,MAAM;MACxD;AACA,aAAO;IACT;EACF;AACF;AAEA,eAAsB,0BAA0B,UAAgD;AAC9F,QAAM,SAAS,MAAM,GAAG,SAAS,QAAQ;AACzC,SAAO,0BAA0B,MAAM;AACzC;AAEA,eAAsB,yBACpB,UACA,SACgB;AAChB,QAAM,SAAS,MAAM,GAAG,SAAS,QAAQ;AACzC,SAAO,2BAA2B,QAAQ,OAAO;AACnD;AAEA,eAAsB,8BACpB,QAC8B;AAC9B,QAAM,sBAAsB;AAC5B,QAAM,YAAY,IAAI,WAAW,MAAM,EAAE;AACzC,QAAM,OAAO,MAAM,YAAY,SAAS,IAAI,SAAS,CAAC,SAAS,CAAC,CAAC;AACjE,QAAM,OAAO,KAAK,SAAS;AAC3B,QAAM,WAAW,KAAK,aAAa;AACnC,QAAM,YAAY,SAAS,UAAU;AACrC,QAAM,mBAAmB,OAAO,YAAY,SAAS,iBAAiB,CAAC;AAEvE,WAAS,KAAK;AACd,OAAK,KAAK;AACV,OAAK,KAAK;AAEV,SAAO;IACL,WAAW,aAAa;IACxB,kBAAkB,wBAAwB,gBAAgB;EAC5D;AACF;AAEA,SAAS,wBAAwB,OAAwD;AACvF,QAAM,aAAqC,CAAC;AAE5C,aAAW,CAAC,KAAK,KAAK,KAAK,OAAO,QAAQ,KAAK,GAAG;AAChD,QAAI,UAAU,QAAQ,UAAU,QAAW;AACzC,iBAAW,GAAG,IAAI;AAClB;IACF;AACA,eAAW,GAAG,IAAI,OAAO,UAAU,WAAW,QAAQ,OAAO,KAAK;EACpE;AAEA,SAAO;AACT;AAEA,eAAsB,sBACpB,eAAe,iBACW;AAC1B,QAAM,UAAU,MAAM,GAAG,QAAQ,KAAK,KAAK,OAAO,GAAG,cAAc,CAAC;AACpE,QAAM,WAAW,aAAa,QAAQ,UAAU,GAAG;AACnD,QAAM,WAAW,KAAK,KAAK,SAAS,GAAG,WAAW,CAAC,IAAI,QAAQ,EAAE;AACjE,QAAM,cAAc,kBAAkB,QAAQ;AAE9C,QAAM,SAAS,QAAQ,OAAO,WAAW;AAEzC,SAAO;IACL,MAAM;IACN,SAAS,YAAY;AACnB,YAAM,GAAG,GAAG,SAAS,EAAE,WAAW,MAAM,OAAO,KAAK,CAAC;IACvD;EACF;AACF;AAEA,eAAsB,0BACpB,eAAe,iBACf,SACgB;AAChB,QAAM,OAAO,MAAM,sBAAsB,YAAY;AAErD,MAAI;AACF,WAAO,MAAM,yBAAyB,KAAK,MAAM,OAAO;EAC1D,UAAA;AACE,UAAM,KAAK,QAAQ;EACrB;AACF;","names":[]}
@@ -1,112 +0,0 @@
1
- import { createRequire } from 'module';
2
- import { fileURLToPath } from 'url';
3
- import { dirname } from 'path';
4
- const require = createRequire(import.meta.url);
5
- const __filename = fileURLToPath(import.meta.url);
6
- const __dirname = dirname(__filename);
7
-
8
- // ../../packages/parquet-reader/dist/index.js
9
- import { Blob as NodeBlob } from "buffer";
10
- import { createWriteStream } from "fs";
11
- import { promises as fs } from "fs";
12
- import { randomUUID } from "crypto";
13
- import { tmpdir } from "os";
14
- import path from "path";
15
- import { pipeline } from "stream/promises";
16
- import { tableFromIPC } from "apache-arrow";
17
- import initWasm, { ParquetFile, readParquet } from "parquet-wasm/esm";
18
- var BlobCtor = typeof Blob === "undefined" ? NodeBlob : Blob;
19
- var wasmInitialized = false;
20
- var wasmInitPromise = null;
21
- async function ensureWasmInitialized() {
22
- if (wasmInitialized) return;
23
- if (!wasmInitPromise) {
24
- wasmInitPromise = initWasm().then(() => {
25
- wasmInitialized = true;
26
- });
27
- }
28
- return wasmInitPromise;
29
- }
30
- async function readParquetTableFromBuffer(buffer, options) {
31
- await ensureWasmInitialized();
32
- const wasmTable = readParquet(buffer, options ?? void 0);
33
- const ipcStream = wasmTable.intoIPCStream();
34
- return tableFromIPC(ipcStream);
35
- }
36
- function createParquetBufferSource(buffer) {
37
- let metadataPromise = null;
38
- return {
39
- buffer,
40
- byteLength: buffer.byteLength,
41
- readTable: (options) => readParquetTableFromBuffer(buffer, options),
42
- readMetadata: () => {
43
- if (!metadataPromise) {
44
- metadataPromise = readParquetMetadataFromBuffer(buffer);
45
- }
46
- return metadataPromise;
47
- }
48
- };
49
- }
50
- async function openParquetBufferFromPath(filePath) {
51
- const buffer = await fs.readFile(filePath);
52
- return createParquetBufferSource(buffer);
53
- }
54
- async function readParquetTableFromPath(filePath, options) {
55
- const buffer = await fs.readFile(filePath);
56
- return readParquetTableFromBuffer(buffer, options);
57
- }
58
- async function readParquetMetadataFromBuffer(buffer) {
59
- await ensureWasmInitialized();
60
- const blobInput = new Uint8Array(buffer).buffer;
61
- const file = await ParquetFile.fromFile(new BlobCtor([blobInput]));
62
- const meta = file.metadata();
63
- const fileMeta = meta.fileMetadata();
64
- const createdBy = fileMeta.createdBy();
65
- const keyValueMetadata = Object.fromEntries(fileMeta.keyValueMetadata());
66
- fileMeta.free();
67
- meta.free();
68
- file.free();
69
- return {
70
- createdBy: createdBy ?? void 0,
71
- keyValueMetadata: normalizeMetadataValues(keyValueMetadata)
72
- };
73
- }
74
- function normalizeMetadataValues(input) {
75
- const normalized = {};
76
- for (const [key, value] of Object.entries(input)) {
77
- if (value === null || value === void 0) {
78
- normalized[key] = "";
79
- continue;
80
- }
81
- normalized[key] = typeof value === "string" ? value : String(value);
82
- }
83
- return normalized;
84
- }
85
- async function bufferStdinToTempFile(filenameHint = "stdin.parquet") {
86
- const tempDir = await fs.mkdtemp(path.join(tmpdir(), "parquetlens-"));
87
- const safeName = filenameHint.replace(/[\\/]/g, "_");
88
- const filePath = path.join(tempDir, `${randomUUID()}-${safeName}`);
89
- const writeStream = createWriteStream(filePath);
90
- await pipeline(process.stdin, writeStream);
91
- return {
92
- path: filePath,
93
- cleanup: async () => {
94
- await fs.rm(tempDir, { recursive: true, force: true });
95
- }
96
- };
97
- }
98
- async function readParquetTableFromStdin(filenameHint = "stdin.parquet", options) {
99
- const temp = await bufferStdinToTempFile(filenameHint);
100
- try {
101
- return await readParquetTableFromPath(temp.path, options);
102
- } finally {
103
- await temp.cleanup();
104
- }
105
- }
106
-
107
- export {
108
- openParquetBufferFromPath,
109
- readParquetTableFromPath,
110
- readParquetTableFromStdin
111
- };
112
- //# sourceMappingURL=chunk-573AA4JN.js.map
@@ -1 +0,0 @@
1
- {"version":3,"sources":["../../../packages/parquet-reader/src/index.ts"],"sourcesContent":["import { Blob as NodeBlob } from \"node:buffer\";\nimport { createWriteStream } from \"node:fs\";\nimport { promises as fs } from \"node:fs\";\nimport { randomUUID } from \"node:crypto\";\nimport { tmpdir } from \"node:os\";\nimport path from \"node:path\";\nimport { pipeline } from \"node:stream/promises\";\n\nimport { tableFromIPC, Table } from \"apache-arrow\";\nimport initWasm, { ParquetFile, readParquet, type ReaderOptions } from \"parquet-wasm/esm\";\n\nconst BlobCtor: typeof Blob =\n typeof Blob === \"undefined\" ? (NodeBlob as unknown as typeof Blob) : Blob;\n\nlet wasmInitialized = false;\nlet wasmInitPromise: Promise<void> | null = null;\n\nasync function ensureWasmInitialized(): Promise<void> {\n if (wasmInitialized) return;\n if (!wasmInitPromise) {\n wasmInitPromise = initWasm().then(() => {\n wasmInitialized = true;\n });\n }\n return wasmInitPromise;\n}\n\nexport type TempParquetFile = {\n path: string;\n cleanup: () => Promise<void>;\n};\n\nexport type ParquetReadOptions = Pick<\n ReaderOptions,\n \"batchSize\" | \"columns\" | \"limit\" | \"offset\" | \"rowGroups\"\n>;\n\nexport type ParquetFileMetadata = {\n createdBy?: string;\n keyValueMetadata: Record<string, string>;\n};\n\nexport type ParquetBufferSource = {\n buffer: Uint8Array;\n byteLength: number;\n readTable: (options?: ParquetReadOptions) => Promise<Table>;\n readMetadata: () => Promise<ParquetFileMetadata>;\n};\n\nexport async function readParquetTableFromBuffer(\n buffer: Uint8Array,\n options?: ParquetReadOptions,\n): Promise<Table> {\n await ensureWasmInitialized();\n const wasmTable = readParquet(buffer, options ?? undefined);\n const ipcStream = wasmTable.intoIPCStream();\n return tableFromIPC(ipcStream);\n}\n\nexport function createParquetBufferSource(buffer: Uint8Array): ParquetBufferSource {\n let metadataPromise: Promise<ParquetFileMetadata> | null = null;\n\n return {\n buffer,\n byteLength: buffer.byteLength,\n readTable: (options?: ParquetReadOptions) => readParquetTableFromBuffer(buffer, options),\n readMetadata: () => {\n if (!metadataPromise) {\n metadataPromise = readParquetMetadataFromBuffer(buffer);\n }\n return metadataPromise;\n },\n };\n}\n\nexport async function openParquetBufferFromPath(filePath: string): Promise<ParquetBufferSource> {\n const buffer = await fs.readFile(filePath);\n return createParquetBufferSource(buffer);\n}\n\nexport async function readParquetTableFromPath(\n filePath: string,\n options?: ParquetReadOptions,\n): Promise<Table> {\n const buffer = await fs.readFile(filePath);\n return readParquetTableFromBuffer(buffer, options);\n}\n\nexport async function readParquetMetadataFromBuffer(\n buffer: Uint8Array,\n): Promise<ParquetFileMetadata> {\n await ensureWasmInitialized();\n const blobInput = new Uint8Array(buffer).buffer as ArrayBuffer;\n const file = await ParquetFile.fromFile(new BlobCtor([blobInput]));\n const meta = file.metadata();\n const fileMeta = meta.fileMetadata();\n const createdBy = fileMeta.createdBy();\n const keyValueMetadata = Object.fromEntries(fileMeta.keyValueMetadata());\n\n fileMeta.free();\n meta.free();\n file.free();\n\n return {\n createdBy: createdBy ?? undefined,\n keyValueMetadata: normalizeMetadataValues(keyValueMetadata),\n };\n}\n\nfunction normalizeMetadataValues(input: Record<string, unknown>): Record<string, string> {\n const normalized: Record<string, string> = {};\n\n for (const [key, value] of Object.entries(input)) {\n if (value === null || value === undefined) {\n normalized[key] = \"\";\n continue;\n }\n normalized[key] = typeof value === \"string\" ? value : String(value);\n }\n\n return normalized;\n}\n\nexport async function bufferStdinToTempFile(\n filenameHint = \"stdin.parquet\",\n): Promise<TempParquetFile> {\n const tempDir = await fs.mkdtemp(path.join(tmpdir(), \"parquetlens-\"));\n const safeName = filenameHint.replace(/[\\\\/]/g, \"_\");\n const filePath = path.join(tempDir, `${randomUUID()}-${safeName}`);\n const writeStream = createWriteStream(filePath);\n\n await pipeline(process.stdin, writeStream);\n\n return {\n path: filePath,\n cleanup: async () => {\n await fs.rm(tempDir, { recursive: true, force: true });\n },\n };\n}\n\nexport async function readParquetTableFromStdin(\n filenameHint = \"stdin.parquet\",\n options?: ParquetReadOptions,\n): Promise<Table> {\n const temp = await bufferStdinToTempFile(filenameHint);\n\n try {\n return await readParquetTableFromPath(temp.path, options);\n } finally {\n await temp.cleanup();\n }\n}\n"],"mappings":";;;;;;;;AAAA,SAAS,QAAQ,gBAAgB;AACjC,SAAS,yBAAyB;AAClC,SAAS,YAAY,UAAU;AAC/B,SAAS,kBAAkB;AAC3B,SAAS,cAAc;AACvB,OAAO,UAAU;AACjB,SAAS,gBAAgB;AAEzB,SAAS,oBAA2B;AACpC,OAAO,YAAY,aAAa,mBAAuC;AAEvE,IAAM,WACJ,OAAO,SAAS,cAAe,WAAsC;AAEvE,IAAI,kBAAkB;AACtB,IAAI,kBAAwC;AAE5C,eAAe,wBAAuC;AACpD,MAAI,gBAAiB;AACrB,MAAI,CAAC,iBAAiB;AACpB,sBAAkB,SAAS,EAAE,KAAK,MAAM;AACtC,wBAAkB;IACpB,CAAC;EACH;AACA,SAAO;AACT;AAwBA,eAAsB,2BACpB,QACA,SACgB;AAChB,QAAM,sBAAsB;AAC5B,QAAM,YAAY,YAAY,QAAQ,WAAW,MAAS;AAC1D,QAAM,YAAY,UAAU,cAAc;AAC1C,SAAO,aAAa,SAAS;AAC/B;AAEO,SAAS,0BAA0B,QAAyC;AACjF,MAAI,kBAAuD;AAE3D,SAAO;IACL;IACA,YAAY,OAAO;IACnB,WAAW,CAAC,YAAiC,2BAA2B,QAAQ,OAAO;IACvF,cAAc,MAAM;AAClB,UAAI,CAAC,iBAAiB;AACpB,0BAAkB,8BAA8B,MAAM;MACxD;AACA,aAAO;IACT;EACF;AACF;AAEA,eAAsB,0BAA0B,UAAgD;AAC9F,QAAM,SAAS,MAAM,GAAG,SAAS,QAAQ;AACzC,SAAO,0BAA0B,MAAM;AACzC;AAEA,eAAsB,yBACpB,UACA,SACgB;AAChB,QAAM,SAAS,MAAM,GAAG,SAAS,QAAQ;AACzC,SAAO,2BAA2B,QAAQ,OAAO;AACnD;AAEA,eAAsB,8BACpB,QAC8B;AAC9B,QAAM,sBAAsB;AAC5B,QAAM,YAAY,IAAI,WAAW,MAAM,EAAE;AACzC,QAAM,OAAO,MAAM,YAAY,SAAS,IAAI,SAAS,CAAC,SAAS,CAAC,CAAC;AACjE,QAAM,OAAO,KAAK,SAAS;AAC3B,QAAM,WAAW,KAAK,aAAa;AACnC,QAAM,YAAY,SAAS,UAAU;AACrC,QAAM,mBAAmB,OAAO,YAAY,SAAS,iBAAiB,CAAC;AAEvE,WAAS,KAAK;AACd,OAAK,KAAK;AACV,OAAK,KAAK;AAEV,SAAO;IACL,WAAW,aAAa;IACxB,kBAAkB,wBAAwB,gBAAgB;EAC5D;AACF;AAEA,SAAS,wBAAwB,OAAwD;AACvF,QAAM,aAAqC,CAAC;AAE5C,aAAW,CAAC,KAAK,KAAK,KAAK,OAAO,QAAQ,KAAK,GAAG;AAChD,QAAI,UAAU,QAAQ,UAAU,QAAW;AACzC,iBAAW,GAAG,IAAI;AAClB;IACF;AACA,eAAW,GAAG,IAAI,OAAO,UAAU,WAAW,QAAQ,OAAO,KAAK;EACpE;AAEA,SAAO;AACT;AAEA,eAAsB,sBACpB,eAAe,iBACW;AAC1B,QAAM,UAAU,MAAM,GAAG,QAAQ,KAAK,KAAK,OAAO,GAAG,cAAc,CAAC;AACpE,QAAM,WAAW,aAAa,QAAQ,UAAU,GAAG;AACnD,QAAM,WAAW,KAAK,KAAK,SAAS,GAAG,WAAW,CAAC,IAAI,QAAQ,EAAE;AACjE,QAAM,cAAc,kBAAkB,QAAQ;AAE9C,QAAM,SAAS,QAAQ,OAAO,WAAW;AAEzC,SAAO;IACL,MAAM;IACN,SAAS,YAAY;AACnB,YAAM,GAAG,GAAG,SAAS,EAAE,WAAW,MAAM,OAAO,KAAK,CAAC;IACvD;EACF;AACF;AAEA,eAAsB,0BACpB,eAAe,iBACf,SACgB;AAChB,QAAM,OAAO,MAAM,sBAAsB,YAAY;AAErD,MAAI;AACF,WAAO,MAAM,yBAAyB,KAAK,MAAM,OAAO;EAC1D,UAAA;AACE,UAAM,KAAK,QAAQ;EACrB;AACF;","names":[]}
@@ -1,112 +0,0 @@
1
- import { createRequire } from 'module';
2
- import { fileURLToPath } from 'url';
3
- import { dirname } from 'path';
4
- const require = createRequire(import.meta.url);
5
- const __filename = fileURLToPath(import.meta.url);
6
- const __dirname = dirname(__filename);
7
-
8
- // ../../packages/parquet-reader/dist/index.js
9
- import { Blob as NodeBlob } from "buffer";
10
- import { createWriteStream, readFileSync } from "fs";
11
- import { promises as fs } from "fs";
12
- import { randomUUID } from "crypto";
13
- import { createRequire as nodeCreateRequire } from "module";
14
- import { tmpdir } from "os";
15
- import path from "path";
16
- import { pipeline } from "stream/promises";
17
- import { tableFromIPC } from "apache-arrow";
18
- import { initSync, ParquetFile, readParquet } from "parquet-wasm/esm";
19
- var BlobCtor = typeof Blob === "undefined" ? NodeBlob : Blob;
20
- var wasmInitialized = false;
21
- function ensureWasmInitialized() {
22
- if (wasmInitialized) return;
23
- const localRequire = nodeCreateRequire(import.meta.url);
24
- const wasmPath = localRequire.resolve("parquet-wasm/esm/parquet_wasm_bg.wasm");
25
- const wasmBytes = readFileSync(wasmPath);
26
- initSync({ module: wasmBytes });
27
- wasmInitialized = true;
28
- }
29
- async function readParquetTableFromBuffer(buffer, options) {
30
- ensureWasmInitialized();
31
- const wasmTable = readParquet(buffer, options ?? void 0);
32
- const ipcStream = wasmTable.intoIPCStream();
33
- return tableFromIPC(ipcStream);
34
- }
35
- function createParquetBufferSource(buffer) {
36
- let metadataPromise = null;
37
- return {
38
- buffer,
39
- byteLength: buffer.byteLength,
40
- readTable: (options) => readParquetTableFromBuffer(buffer, options),
41
- readMetadata: () => {
42
- if (!metadataPromise) {
43
- metadataPromise = readParquetMetadataFromBuffer(buffer);
44
- }
45
- return metadataPromise;
46
- }
47
- };
48
- }
49
- async function openParquetBufferFromPath(filePath) {
50
- const buffer = await fs.readFile(filePath);
51
- return createParquetBufferSource(buffer);
52
- }
53
- async function readParquetTableFromPath(filePath, options) {
54
- const buffer = await fs.readFile(filePath);
55
- return readParquetTableFromBuffer(buffer, options);
56
- }
57
- async function readParquetMetadataFromBuffer(buffer) {
58
- ensureWasmInitialized();
59
- const blobInput = new Uint8Array(buffer).buffer;
60
- const file = await ParquetFile.fromFile(new BlobCtor([blobInput]));
61
- const meta = file.metadata();
62
- const fileMeta = meta.fileMetadata();
63
- const createdBy = fileMeta.createdBy();
64
- const keyValueMetadata = Object.fromEntries(fileMeta.keyValueMetadata());
65
- fileMeta.free();
66
- meta.free();
67
- file.free();
68
- return {
69
- createdBy: createdBy ?? void 0,
70
- keyValueMetadata: normalizeMetadataValues(keyValueMetadata)
71
- };
72
- }
73
- function normalizeMetadataValues(input) {
74
- const normalized = {};
75
- for (const [key, value] of Object.entries(input)) {
76
- if (value === null || value === void 0) {
77
- normalized[key] = "";
78
- continue;
79
- }
80
- normalized[key] = typeof value === "string" ? value : String(value);
81
- }
82
- return normalized;
83
- }
84
- async function bufferStdinToTempFile(filenameHint = "stdin.parquet") {
85
- const tempDir = await fs.mkdtemp(path.join(tmpdir(), "parquetlens-"));
86
- const safeName = filenameHint.replace(/[\\/]/g, "_");
87
- const filePath = path.join(tempDir, `${randomUUID()}-${safeName}`);
88
- const writeStream = createWriteStream(filePath);
89
- await pipeline(process.stdin, writeStream);
90
- return {
91
- path: filePath,
92
- cleanup: async () => {
93
- await fs.rm(tempDir, { recursive: true, force: true });
94
- }
95
- };
96
- }
97
- async function readParquetTableFromStdin(filenameHint = "stdin.parquet", options) {
98
- const temp = await bufferStdinToTempFile(filenameHint);
99
- try {
100
- return await readParquetTableFromPath(temp.path, options);
101
- } finally {
102
- await temp.cleanup();
103
- }
104
- }
105
-
106
- export {
107
- readParquetTableFromBuffer,
108
- openParquetBufferFromPath,
109
- readParquetTableFromPath,
110
- readParquetTableFromStdin
111
- };
112
- //# sourceMappingURL=chunk-AYPIRAOL.js.map
@@ -1 +0,0 @@
1
- {"version":3,"sources":["../../../packages/parquet-reader/src/index.ts"],"sourcesContent":["import { Blob as NodeBlob } from \"node:buffer\";\nimport { createWriteStream, readFileSync } from \"node:fs\";\nimport { promises as fs } from \"node:fs\";\nimport { randomUUID } from \"node:crypto\";\nimport { createRequire as nodeCreateRequire } from \"node:module\";\nimport { tmpdir } from \"node:os\";\nimport path from \"node:path\";\nimport { pipeline } from \"node:stream/promises\";\n\nimport { tableFromIPC, Table } from \"apache-arrow\";\nimport { initSync, ParquetFile, readParquet, type ReaderOptions } from \"parquet-wasm/esm\";\n\nconst BlobCtor: typeof Blob =\n typeof Blob === \"undefined\" ? (NodeBlob as unknown as typeof Blob) : Blob;\n\nlet wasmInitialized = false;\n\nfunction ensureWasmInitialized(): void {\n if (wasmInitialized) return;\n\n // Use require.resolve to find the WASM file (works with pnpm)\n const localRequire = nodeCreateRequire(import.meta.url);\n const wasmPath = localRequire.resolve(\"parquet-wasm/esm/parquet_wasm_bg.wasm\");\n const wasmBytes = readFileSync(wasmPath);\n initSync({ module: wasmBytes });\n wasmInitialized = true;\n}\n\nexport type TempParquetFile = {\n path: string;\n cleanup: () => Promise<void>;\n};\n\nexport type ParquetReadOptions = Pick<\n ReaderOptions,\n \"batchSize\" | \"columns\" | \"limit\" | \"offset\" | \"rowGroups\"\n>;\n\nexport type ParquetFileMetadata = {\n createdBy?: string;\n keyValueMetadata: Record<string, string>;\n};\n\nexport type ParquetBufferSource = {\n buffer: Uint8Array;\n byteLength: number;\n readTable: (options?: ParquetReadOptions) => Promise<Table>;\n readMetadata: () => Promise<ParquetFileMetadata>;\n};\n\nexport async function readParquetTableFromBuffer(\n buffer: Uint8Array,\n options?: ParquetReadOptions,\n): Promise<Table> {\n ensureWasmInitialized();\n const wasmTable = readParquet(buffer, options ?? undefined);\n const ipcStream = wasmTable.intoIPCStream();\n return tableFromIPC(ipcStream);\n}\n\nexport function createParquetBufferSource(buffer: Uint8Array): ParquetBufferSource {\n let metadataPromise: Promise<ParquetFileMetadata> | null = null;\n\n return {\n buffer,\n byteLength: buffer.byteLength,\n readTable: (options?: ParquetReadOptions) => readParquetTableFromBuffer(buffer, options),\n readMetadata: () => {\n if (!metadataPromise) {\n metadataPromise = readParquetMetadataFromBuffer(buffer);\n }\n return metadataPromise;\n },\n };\n}\n\nexport async function openParquetBufferFromPath(filePath: string): Promise<ParquetBufferSource> {\n const buffer = await fs.readFile(filePath);\n return createParquetBufferSource(buffer);\n}\n\nexport async function readParquetTableFromPath(\n filePath: string,\n options?: ParquetReadOptions,\n): Promise<Table> {\n const buffer = await fs.readFile(filePath);\n return readParquetTableFromBuffer(buffer, options);\n}\n\nexport async function readParquetMetadataFromBuffer(\n buffer: Uint8Array,\n): Promise<ParquetFileMetadata> {\n ensureWasmInitialized();\n const blobInput = new Uint8Array(buffer).buffer as ArrayBuffer;\n const file = await ParquetFile.fromFile(new BlobCtor([blobInput]));\n const meta = file.metadata();\n const fileMeta = meta.fileMetadata();\n const createdBy = fileMeta.createdBy();\n const keyValueMetadata = Object.fromEntries(fileMeta.keyValueMetadata());\n\n fileMeta.free();\n meta.free();\n file.free();\n\n return {\n createdBy: createdBy ?? undefined,\n keyValueMetadata: normalizeMetadataValues(keyValueMetadata),\n };\n}\n\nfunction normalizeMetadataValues(input: Record<string, unknown>): Record<string, string> {\n const normalized: Record<string, string> = {};\n\n for (const [key, value] of Object.entries(input)) {\n if (value === null || value === undefined) {\n normalized[key] = \"\";\n continue;\n }\n normalized[key] = typeof value === \"string\" ? value : String(value);\n }\n\n return normalized;\n}\n\nexport async function bufferStdinToTempFile(\n filenameHint = \"stdin.parquet\",\n): Promise<TempParquetFile> {\n const tempDir = await fs.mkdtemp(path.join(tmpdir(), \"parquetlens-\"));\n const safeName = filenameHint.replace(/[\\\\/]/g, \"_\");\n const filePath = path.join(tempDir, `${randomUUID()}-${safeName}`);\n const writeStream = createWriteStream(filePath);\n\n await pipeline(process.stdin, writeStream);\n\n return {\n path: filePath,\n cleanup: async () => {\n await fs.rm(tempDir, { recursive: true, force: true });\n },\n };\n}\n\nexport async function readParquetTableFromStdin(\n filenameHint = \"stdin.parquet\",\n options?: ParquetReadOptions,\n): Promise<Table> {\n const temp = await bufferStdinToTempFile(filenameHint);\n\n try {\n return await readParquetTableFromPath(temp.path, options);\n } finally {\n await temp.cleanup();\n }\n}\n"],"mappings":";;;;;;;;AAAA,SAAS,QAAQ,gBAAgB;AACjC,SAAS,mBAAmB,oBAAoB;AAChD,SAAS,YAAY,UAAU;AAC/B,SAAS,kBAAkB;AAC3B,SAAS,iBAAiB,yBAAyB;AACnD,SAAS,cAAc;AACvB,OAAO,UAAU;AACjB,SAAS,gBAAgB;AAEzB,SAAS,oBAA2B;AACpC,SAAS,UAAU,aAAa,mBAAuC;AAEvE,IAAM,WACJ,OAAO,SAAS,cAAe,WAAsC;AAEvE,IAAI,kBAAkB;AAEtB,SAAS,wBAA8B;AACrC,MAAI,gBAAiB;AAGrB,QAAM,eAAe,kBAAkB,YAAY,GAAG;AACtD,QAAM,WAAW,aAAa,QAAQ,uCAAuC;AAC7E,QAAM,YAAY,aAAa,QAAQ;AACvC,WAAS,EAAE,QAAQ,UAAU,CAAC;AAC9B,oBAAkB;AACpB;AAwBA,eAAsB,2BACpB,QACA,SACgB;AAChB,wBAAsB;AACtB,QAAM,YAAY,YAAY,QAAQ,WAAW,MAAS;AAC1D,QAAM,YAAY,UAAU,cAAc;AAC1C,SAAO,aAAa,SAAS;AAC/B;AAEO,SAAS,0BAA0B,QAAyC;AACjF,MAAI,kBAAuD;AAE3D,SAAO;IACL;IACA,YAAY,OAAO;IACnB,WAAW,CAAC,YAAiC,2BAA2B,QAAQ,OAAO;IACvF,cAAc,MAAM;AAClB,UAAI,CAAC,iBAAiB;AACpB,0BAAkB,8BAA8B,MAAM;MACxD;AACA,aAAO;IACT;EACF;AACF;AAEA,eAAsB,0BAA0B,UAAgD;AAC9F,QAAM,SAAS,MAAM,GAAG,SAAS,QAAQ;AACzC,SAAO,0BAA0B,MAAM;AACzC;AAEA,eAAsB,yBACpB,UACA,SACgB;AAChB,QAAM,SAAS,MAAM,GAAG,SAAS,QAAQ;AACzC,SAAO,2BAA2B,QAAQ,OAAO;AACnD;AAEA,eAAsB,8BACpB,QAC8B;AAC9B,wBAAsB;AACtB,QAAM,YAAY,IAAI,WAAW,MAAM,EAAE;AACzC,QAAM,OAAO,MAAM,YAAY,SAAS,IAAI,SAAS,CAAC,SAAS,CAAC,CAAC;AACjE,QAAM,OAAO,KAAK,SAAS;AAC3B,QAAM,WAAW,KAAK,aAAa;AACnC,QAAM,YAAY,SAAS,UAAU;AACrC,QAAM,mBAAmB,OAAO,YAAY,SAAS,iBAAiB,CAAC;AAEvE,WAAS,KAAK;AACd,OAAK,KAAK;AACV,OAAK,KAAK;AAEV,SAAO;IACL,WAAW,aAAa;IACxB,kBAAkB,wBAAwB,gBAAgB;EAC5D;AACF;AAEA,SAAS,wBAAwB,OAAwD;AACvF,QAAM,aAAqC,CAAC;AAE5C,aAAW,CAAC,KAAK,KAAK,KAAK,OAAO,QAAQ,KAAK,GAAG;AAChD,QAAI,UAAU,QAAQ,UAAU,QAAW;AACzC,iBAAW,GAAG,IAAI;AAClB;IACF;AACA,eAAW,GAAG,IAAI,OAAO,UAAU,WAAW,QAAQ,OAAO,KAAK;EACpE;AAEA,SAAO;AACT;AAEA,eAAsB,sBACpB,eAAe,iBACW;AAC1B,QAAM,UAAU,MAAM,GAAG,QAAQ,KAAK,KAAK,OAAO,GAAG,cAAc,CAAC;AACpE,QAAM,WAAW,aAAa,QAAQ,UAAU,GAAG;AACnD,QAAM,WAAW,KAAK,KAAK,SAAS,GAAG,WAAW,CAAC,IAAI,QAAQ,EAAE;AACjE,QAAM,cAAc,kBAAkB,QAAQ;AAE9C,QAAM,SAAS,QAAQ,OAAO,WAAW;AAEzC,SAAO;IACL,MAAM;IACN,SAAS,YAAY;AACnB,YAAM,GAAG,GAAG,SAAS,EAAE,WAAW,MAAM,OAAO,KAAK,CAAC;IACvD;EACF;AACF;AAEA,eAAsB,0BACpB,eAAe,iBACf,SACgB;AAChB,QAAM,OAAO,MAAM,sBAAsB,YAAY;AAErD,MAAI;AACF,WAAO,MAAM,yBAAyB,KAAK,MAAM,OAAO;EAC1D,UAAA;AACE,UAAM,KAAK,QAAQ;EACrB;AACF;","names":[]}