parquetlens 0.1.0 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,92 +0,0 @@
1
- // ../../packages/parquet-reader/dist/index.js
2
- import { Blob as NodeBlob } from "buffer";
3
- import { createWriteStream } from "fs";
4
- import { promises as fs } from "fs";
5
- import { randomUUID } from "crypto";
6
- import { tmpdir } from "os";
7
- import path from "path";
8
- import { pipeline } from "stream/promises";
9
- import { tableFromIPC } from "apache-arrow";
10
- import { ParquetFile, readParquet } from "parquet-wasm/node";
11
- var BlobCtor = typeof Blob === "undefined" ? NodeBlob : Blob;
12
- function readParquetTableFromBuffer(buffer, options) {
13
- const wasmTable = readParquet(buffer, options ?? void 0);
14
- const ipcStream = wasmTable.intoIPCStream();
15
- return tableFromIPC(ipcStream);
16
- }
17
- function createParquetBufferSource(buffer) {
18
- let metadataPromise = null;
19
- return {
20
- buffer,
21
- byteLength: buffer.byteLength,
22
- readTable: (options) => readParquetTableFromBuffer(buffer, options),
23
- readMetadata: () => {
24
- if (!metadataPromise) {
25
- metadataPromise = readParquetMetadataFromBuffer(buffer);
26
- }
27
- return metadataPromise;
28
- }
29
- };
30
- }
31
- async function openParquetBufferFromPath(filePath) {
32
- const buffer = await fs.readFile(filePath);
33
- return createParquetBufferSource(buffer);
34
- }
35
- async function readParquetTableFromPath(filePath, options) {
36
- const buffer = await fs.readFile(filePath);
37
- return readParquetTableFromBuffer(buffer, options);
38
- }
39
- async function readParquetMetadataFromBuffer(buffer) {
40
- const blobInput = new Uint8Array(buffer).buffer;
41
- const file = await ParquetFile.fromFile(new BlobCtor([blobInput]));
42
- const meta = file.metadata();
43
- const fileMeta = meta.fileMetadata();
44
- const createdBy = fileMeta.createdBy();
45
- const keyValueMetadata = Object.fromEntries(fileMeta.keyValueMetadata());
46
- fileMeta.free();
47
- meta.free();
48
- file.free();
49
- return {
50
- createdBy: createdBy ?? void 0,
51
- keyValueMetadata: normalizeMetadataValues(keyValueMetadata)
52
- };
53
- }
54
- function normalizeMetadataValues(input) {
55
- const normalized = {};
56
- for (const [key, value] of Object.entries(input)) {
57
- if (value === null || value === void 0) {
58
- normalized[key] = "";
59
- continue;
60
- }
61
- normalized[key] = typeof value === "string" ? value : String(value);
62
- }
63
- return normalized;
64
- }
65
- async function bufferStdinToTempFile(filenameHint = "stdin.parquet") {
66
- const tempDir = await fs.mkdtemp(path.join(tmpdir(), "parquetlens-"));
67
- const safeName = filenameHint.replace(/[\\/]/g, "_");
68
- const filePath = path.join(tempDir, `${randomUUID()}-${safeName}`);
69
- const writeStream = createWriteStream(filePath);
70
- await pipeline(process.stdin, writeStream);
71
- return {
72
- path: filePath,
73
- cleanup: async () => {
74
- await fs.rm(tempDir, { recursive: true, force: true });
75
- }
76
- };
77
- }
78
- async function readParquetTableFromStdin(filenameHint = "stdin.parquet", options) {
79
- const temp = await bufferStdinToTempFile(filenameHint);
80
- try {
81
- return await readParquetTableFromPath(temp.path, options);
82
- } finally {
83
- await temp.cleanup();
84
- }
85
- }
86
-
87
- export {
88
- openParquetBufferFromPath,
89
- readParquetTableFromPath,
90
- readParquetTableFromStdin
91
- };
92
- //# sourceMappingURL=chunk-VFBGUOAH.js.map
@@ -1 +0,0 @@
1
- {"version":3,"sources":["../../../packages/parquet-reader/src/index.ts"],"sourcesContent":["import { Blob as NodeBlob } from \"node:buffer\";\nimport { createWriteStream } from \"node:fs\";\nimport { promises as fs } from \"node:fs\";\nimport { randomUUID } from \"node:crypto\";\nimport { tmpdir } from \"node:os\";\nimport path from \"node:path\";\nimport { pipeline } from \"node:stream/promises\";\n\nimport { tableFromIPC, Table } from \"apache-arrow\";\nimport { ParquetFile, readParquet, ReaderOptions } from \"parquet-wasm/node\";\n\nconst BlobCtor: typeof Blob =\n typeof Blob === \"undefined\" ? (NodeBlob as unknown as typeof Blob) : Blob;\n\nexport type TempParquetFile = {\n path: string;\n cleanup: () => Promise<void>;\n};\n\nexport type ParquetReadOptions = Pick<\n ReaderOptions,\n \"batchSize\" | \"columns\" | \"limit\" | \"offset\" | \"rowGroups\"\n>;\n\nexport type ParquetFileMetadata = {\n createdBy?: string;\n keyValueMetadata: Record<string, string>;\n};\n\nexport type ParquetBufferSource = {\n buffer: Uint8Array;\n byteLength: number;\n readTable: (options?: ParquetReadOptions) => Table;\n readMetadata: () => Promise<ParquetFileMetadata>;\n};\n\nexport function readParquetTableFromBuffer(\n buffer: Uint8Array,\n options?: ParquetReadOptions,\n): Table {\n const wasmTable = readParquet(buffer, options ?? undefined);\n const ipcStream = wasmTable.intoIPCStream();\n return tableFromIPC(ipcStream);\n}\n\nexport function createParquetBufferSource(buffer: Uint8Array): ParquetBufferSource {\n let metadataPromise: Promise<ParquetFileMetadata> | null = null;\n\n return {\n buffer,\n byteLength: buffer.byteLength,\n readTable: (options?: ParquetReadOptions) => readParquetTableFromBuffer(buffer, options),\n readMetadata: () => {\n if (!metadataPromise) {\n metadataPromise = readParquetMetadataFromBuffer(buffer);\n }\n return metadataPromise;\n },\n };\n}\n\nexport async function openParquetBufferFromPath(filePath: string): Promise<ParquetBufferSource> {\n const buffer = await fs.readFile(filePath);\n return createParquetBufferSource(buffer);\n}\n\nexport async function readParquetTableFromPath(\n filePath: string,\n options?: ParquetReadOptions,\n): Promise<Table> {\n const buffer = await fs.readFile(filePath);\n return readParquetTableFromBuffer(buffer, options);\n}\n\nexport async function readParquetMetadataFromBuffer(\n buffer: Uint8Array,\n): Promise<ParquetFileMetadata> {\n const blobInput = new Uint8Array(buffer).buffer as ArrayBuffer;\n const file = await ParquetFile.fromFile(new BlobCtor([blobInput]));\n const meta = file.metadata();\n const fileMeta = meta.fileMetadata();\n const createdBy = fileMeta.createdBy();\n const keyValueMetadata = Object.fromEntries(fileMeta.keyValueMetadata());\n\n fileMeta.free();\n meta.free();\n file.free();\n\n return {\n createdBy: createdBy ?? undefined,\n keyValueMetadata: normalizeMetadataValues(keyValueMetadata),\n };\n}\n\nfunction normalizeMetadataValues(input: Record<string, unknown>): Record<string, string> {\n const normalized: Record<string, string> = {};\n\n for (const [key, value] of Object.entries(input)) {\n if (value === null || value === undefined) {\n normalized[key] = \"\";\n continue;\n }\n normalized[key] = typeof value === \"string\" ? value : String(value);\n }\n\n return normalized;\n}\n\nexport async function bufferStdinToTempFile(\n filenameHint = \"stdin.parquet\",\n): Promise<TempParquetFile> {\n const tempDir = await fs.mkdtemp(path.join(tmpdir(), \"parquetlens-\"));\n const safeName = filenameHint.replace(/[\\\\/]/g, \"_\");\n const filePath = path.join(tempDir, `${randomUUID()}-${safeName}`);\n const writeStream = createWriteStream(filePath);\n\n await pipeline(process.stdin, writeStream);\n\n return {\n path: filePath,\n cleanup: async () => {\n await fs.rm(tempDir, { recursive: true, force: true });\n },\n };\n}\n\nexport async function readParquetTableFromStdin(\n filenameHint = \"stdin.parquet\",\n options?: ParquetReadOptions,\n): Promise<Table> {\n const temp = await bufferStdinToTempFile(filenameHint);\n\n try {\n return await readParquetTableFromPath(temp.path, options);\n } finally {\n await temp.cleanup();\n }\n}\n"],"mappings":";AAAA,SAAS,QAAQ,gBAAgB;AACjC,SAAS,yBAAyB;AAClC,SAAS,YAAY,UAAU;AAC/B,SAAS,kBAAkB;AAC3B,SAAS,cAAc;AACvB,OAAO,UAAU;AACjB,SAAS,gBAAgB;AAEzB,SAAS,oBAA2B;AACpC,SAAS,aAAa,mBAAkC;AAExD,IAAM,WACJ,OAAO,SAAS,cAAe,WAAsC;AAwBhE,SAAS,2BACd,QACA,SACO;AACP,QAAM,YAAY,YAAY,QAAQ,WAAW,MAAS;AAC1D,QAAM,YAAY,UAAU,cAAc;AAC1C,SAAO,aAAa,SAAS;AAC/B;AAEO,SAAS,0BAA0B,QAAyC;AACjF,MAAI,kBAAuD;AAE3D,SAAO;IACL;IACA,YAAY,OAAO;IACnB,WAAW,CAAC,YAAiC,2BAA2B,QAAQ,OAAO;IACvF,cAAc,MAAM;AAClB,UAAI,CAAC,iBAAiB;AACpB,0BAAkB,8BAA8B,MAAM;MACxD;AACA,aAAO;IACT;EACF;AACF;AAEA,eAAsB,0BAA0B,UAAgD;AAC9F,QAAM,SAAS,MAAM,GAAG,SAAS,QAAQ;AACzC,SAAO,0BAA0B,MAAM;AACzC;AAEA,eAAsB,yBACpB,UACA,SACgB;AAChB,QAAM,SAAS,MAAM,GAAG,SAAS,QAAQ;AACzC,SAAO,2BAA2B,QAAQ,OAAO;AACnD;AAEA,eAAsB,8BACpB,QAC8B;AAC9B,QAAM,YAAY,IAAI,WAAW,MAAM,EAAE;AACzC,QAAM,OAAO,MAAM,YAAY,SAAS,IAAI,SAAS,CAAC,SAAS,CAAC,CAAC;AACjE,QAAM,OAAO,KAAK,SAAS;AAC3B,QAAM,WAAW,KAAK,aAAa;AACnC,QAAM,YAAY,SAAS,UAAU;AACrC,QAAM,mBAAmB,OAAO,YAAY,SAAS,iBAAiB,CAAC;AAEvE,WAAS,KAAK;AACd,OAAK,KAAK;AACV,OAAK,KAAK;AAEV,SAAO;IACL,WAAW,aAAa;IACxB,kBAAkB,wBAAwB,gBAAgB;EAC5D;AACF;AAEA,SAAS,wBAAwB,OAAwD;AACvF,QAAM,aAAqC,CAAC;AAE5C,aAAW,CAAC,KAAK,KAAK,KAAK,OAAO,QAAQ,KAAK,GAAG;AAChD,QAAI,UAAU,QAAQ,UAAU,QAAW;AACzC,iBAAW,GAAG,IAAI;AAClB;IACF;AACA,eAAW,GAAG,IAAI,OAAO,UAAU,WAAW,QAAQ,OAAO,KAAK;EACpE;AAEA,SAAO;AACT;AAEA,eAAsB,sBACpB,eAAe,iBACW;AAC1B,QAAM,UAAU,MAAM,GAAG,QAAQ,KAAK,KAAK,OAAO,GAAG,cAAc,CAAC;AACpE,QAAM,WAAW,aAAa,QAAQ,UAAU,GAAG;AACnD,QAAM,WAAW,KAAK,KAAK,SAAS,GAAG,WAAW,CAAC,IAAI,QAAQ,EAAE;AACjE,QAAM,cAAc,kBAAkB,QAAQ;AAE9C,QAAM,SAAS,QAAQ,OAAO,WAAW;AAEzC,SAAO;IACL,MAAM;IACN,SAAS,YAAY;AACnB,YAAM,GAAG,GAAG,SAAS,EAAE,WAAW,MAAM,OAAO,KAAK,CAAC;IACvD;EACF;AACF;AAEA,eAAsB,0BACpB,eAAe,iBACf,SACgB;AAChB,QAAM,OAAO,MAAM,sBAAsB,YAAY;AAErD,MAAI;AACF,WAAO,MAAM,yBAAyB,KAAK,MAAM,OAAO;EAC1D,UAAA;AACE,UAAM,KAAK,QAAQ;EACrB;AACF;","names":[]}