tiny-parquet 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 nktrchk
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,338 @@
1
+ ```
2
+ ╔═════════════════════════════════════════════════════════════════╗
3
+ ║ ║
4
+ ║ ████████╗██╗███╗ ██╗██╗ ██╗ ║
5
+ ║ ╚══██╔══╝██║████╗ ██║╚██╗ ██╔╝ ║
6
+ ║ ██║ ██║██╔██╗ ██║ ╚████╔╝ ║
7
+ ║ ██║ ██║██║╚██╗██║ ╚██╔╝ ║
8
+ ║ ██║ ██║██║ ╚████║ ██║ ║
9
+ ║ ╚═╝ ╚═╝╚═╝ ╚═══╝ ╚═╝ ║
10
+ ║ ║
11
+ ║ ██████╗ █████╗ ██████╗ ██████╗ ██╗ ██╗███████╗████████╗ ║
12
+ ║ ██╔══██╗██╔══██╗██╔══██╗██╔═══██╗██║ ██║██╔════╝╚══██╔══╝ ║
13
+ ║ ██████╔╝███████║██████╔╝██║ ██║██║ ██║█████╗ ██║ ║
14
+ ║ ██╔═══╝ ██╔══██║██╔══██╗██║▄▄ ██║██║ ██║██╔══╝ ██║ ║
15
+ ║ ██║ ██║ ██║██║ ██║╚██████╔╝╚██████╔╝███████╗ ██║ ║
16
+ ║ ╚═╝ ╚═╝ ╚═╝╚═╝ ╚═╝ ╚══▀▀═╝ ╚═════╝ ╚══════╝ ╚═╝ ║
17
+ ║ ║
18
+ ║ Read & write Apache Parquet in 319KB of WASM. ║
19
+ ║ Two functions. Zero dependencies. ║
20
+ ║ ║
21
+ ╚═════════════════════════════════════════════════════════════════╝
22
+ ```
23
+
24
+ <p align="center">
25
+ <strong>The only Parquet library that fits on Cloudflare Workers free tier and Vercel Edge.</strong>
26
+ </p>
27
+
28
+ <p align="center">
29
+ <a href="https://www.npmjs.com/package/tiny-parquet"><img src="https://img.shields.io/npm/v/tiny-parquet.svg?style=flat-square&color=cb3837" alt="npm version"></a>
30
+ <a href="https://www.npmjs.com/package/tiny-parquet"><img src="https://img.shields.io/npm/dm/tiny-parquet.svg?style=flat-square&color=blue" alt="npm downloads"></a>
31
+ <a href="https://github.com/nktrchk/tiny-parquet/blob/main/LICENSE"><img src="https://img.shields.io/badge/license-MIT-green.svg?style=flat-square" alt="license"></a>
32
+ <a href="https://github.com/nktrchk/tiny-parquet"><img src="https://img.shields.io/badge/PRs-welcome-brightgreen.svg?style=flat-square" alt="PRs Welcome"></a>
33
+ </p>
34
+
35
+ ---
36
+
37
+ ## Why
38
+
39
+ You're building on the edge. You need Parquet. But:
40
+
41
+ ```
42
+ parquet-wasm 3,500 KB ❌ Too fat for Vercel Edge & CF free tier
43
+ duckdb-wasm 8,000 KB ❌ Way too fat
44
+ parquetjs 500 KB ❌ Node.js only
45
+
46
+ tiny-parquet 319 KB ✅ Runs everywhere
47
+ ```
48
+
49
+ **tiny-parquet** gives you `readParquet` and `writeParquet` — two async functions backed by Rust + WASM via the [`parquet2`](https://crates.io/crates/parquet2) crate. That's it.
50
+
51
+ ---
52
+
53
+ ## Install
54
+
55
+ ```bash
56
+ npm install tiny-parquet
57
+ ```
58
+
59
+ ---
60
+
61
+ ## Quick Start
62
+
63
+ ```js
64
+ import { writeParquet, readParquet } from 'tiny-parquet';
65
+
66
+ // ── Write ────────────────────────────────────────────────────────
67
+ const bytes = await writeParquet(
68
+ [
69
+ { name: 'url', type: 'string' },
70
+ { name: 'ts', type: 'int64' },
71
+ { name: 'score', type: 'float64' },
72
+ { name: 'active', type: 'boolean' },
73
+ ],
74
+ {
75
+ url: ['https://example.com', 'https://test.dev'],
76
+ ts: [1708000000, 1708100000],
77
+ score: [98.5, 76.3],
78
+ active: [true, false],
79
+ },
80
+ { compression: 'snappy' }
81
+ );
82
+
83
+ // ── Read ─────────────────────────────────────────────────────────
84
+ const { schema, data, numRows } = await readParquet(bytes);
85
+
86
+ console.log(schema); // [{ name: 'url', type: 'string' }, ...]
87
+ console.log(data.url); // ['https://example.com', 'https://test.dev']
88
+ console.log(numRows); // 2
89
+ ```
90
+
91
+ ### Subpath Imports
92
+
93
+ ```js
94
+ // Import only what you need — smaller bundles
95
+ import { readParquet } from 'tiny-parquet/reader';
96
+ import { writeParquet } from 'tiny-parquet/writer';
97
+ ```
98
+
99
+ ---
100
+
101
+ ## Runs Everywhere
102
+
103
+ ```
104
+ ┌─────────────────────────────────┬────────┐
105
+ │ Runtime │ Status │
106
+ ├─────────────────────────────────┼────────┤
107
+ │ Cloudflare Workers │ ✅ │
108
+ │ Vercel Edge Functions │ ✅ │
109
+ │ Deno / Deno Deploy │ ✅ │
110
+ │ Bun │ ✅ │
111
+ │ Node.js (≥18) │ ✅ │
112
+ │ Browser (Chrome/FF/Safari) │ ✅ │
113
+ │ AWS Lambda │ ✅ │
114
+ │ Google Cloud Functions │ ✅ │
115
+ │ Fastly Compute │ ✅ │
116
+ │ Electron │ ✅ │
117
+ └─────────────────────────────────┴────────┘
118
+ ```
119
+
120
+ ---
121
+
122
+ ## Size Comparison
123
+
124
+ ```
125
+ ┌──────────────────┬─────────┬──────────────────────────────────┬───────────────────────────┐
126
+ │ Package │ Size │ CF Workers free (1MB) │ Vercel Edge (1MB soft) │
127
+ ├──────────────────┼─────────┼──────────────────────────────────┼───────────────────────────┤
128
+ │ parquet-wasm │ 3.5 MB │ ❌ No │ ❌ No │
129
+ │ duckdb-wasm │ 8.0 MB │ ❌ No │ ❌ No │
130
+ │ parquetjs │ 500 KB │ ❌ Node only │ ❌ Node only │
131
+ │ tiny-parquet │ 319 KB │ ✅ Yes │ ✅ Yes │
132
+ └──────────────────┴─────────┴──────────────────────────────────┴───────────────────────────┘
133
+ ```
134
+
135
+ ---
136
+
137
+ ## Anatomy
138
+
139
+ ```
140
+ ┌───────────────────────────────────────────────┐
141
+ │ tiny-parquet │
142
+ │ │
143
+ │ ┌─────────────┐ ┌──────────────┐ │
144
+ │ │ writer.wasm │ │ reader.wasm │ │
145
+ │ │ 179 KB │ │ 140 KB │ │
146
+ │ │ │ │ │ │
147
+ │ │ Rust + │ │ Rust + │ │
148
+ │ │ parquet2 │ │ parquet2 │ │
149
+ │ │ + snappy │ │ + snappy │ │
150
+ │ └──────┬──────┘ └──────┬───────┘ │
151
+ │ │ │ │
152
+ │ ┌──────▼──────┐ ┌──────▼───────┐ │
153
+ │ │ writer.js │ │ reader.js │ │
154
+ │ │ JS glue │ │ JS glue │ │
155
+ │ │ ~120 LOC │ │ ~100 LOC │ │
156
+ │ └─────────────┘ └──────────────┘ │
157
+ │ │
158
+ │ Total: 319 KB · 337 LOC │
159
+ └───────────────────────────────────────────────┘
160
+ ```
161
+
162
+ ---
163
+
164
+ ## API
165
+
166
+ ### `writeParquet(schema, data, config?)`
167
+
168
+ Creates a Parquet file from columnar data.
169
+
170
+ | Param | Type | Description |
171
+ |-------|------|-------------|
172
+ | `schema` | `Array<{ name: string, type: string }>` | Column definitions |
173
+ | `data` | `Record<string, any[]>` | Columnar data keyed by column name |
174
+ | `config` | `{ compression?: 'snappy' \| 'none' }` | Options (default: `snappy`) |
175
+ | **Returns** | `Promise<Uint8Array>` | Raw Parquet file bytes |
176
+
177
+ #### Supported Types
178
+
179
+ | Type | Parquet Physical | Notes |
180
+ |------|-----------------|-------|
181
+ | `string` | `BYTE_ARRAY` (UTF-8) | Default if unrecognized |
182
+ | `int32` | `INT32` | |
183
+ | `int64` | `INT64` | |
184
+ | `float32` | `FLOAT` | Alias: `float` |
185
+ | `float64` | `DOUBLE` | Alias: `double` |
186
+ | `boolean` | `BOOLEAN` | Alias: `bool` |
187
+ | `timestamp` | `INT64` (millis, UTC) | Alias: `timestamp_millis` |
188
+
189
+ ### `readParquet(bytes, maxRows?)`
190
+
191
+ Reads a Parquet file into columnar data.
192
+
193
+ | Param | Type | Description |
194
+ |-------|------|-------------|
195
+ | `bytes` | `Uint8Array` | Raw Parquet file bytes |
196
+ | `maxRows` | `number` | Max rows to decode (default: `500`) |
197
+ | **Returns** | `Promise<{ schema, data, numRows }>` | Parsed result |
198
+
199
+ ---
200
+
201
+ ## Examples
202
+
203
+ ### Cloudflare Worker
204
+
205
+ ```js
206
+ import { writeParquet } from 'tiny-parquet/writer';
207
+
208
+ export default {
209
+ async fetch(request, env) {
210
+ const bytes = await writeParquet(
211
+ [{ name: 'path', type: 'string' }, { name: 'ts', type: 'timestamp' }],
212
+ { path: [new URL(request.url).pathname], ts: [Date.now()] },
213
+ );
214
+
215
+ await env.R2_BUCKET.put(`logs/${Date.now()}.parquet`, bytes);
216
+ return new Response('OK');
217
+ }
218
+ };
219
+ ```
220
+
221
+ ### Next.js Edge Route
222
+
223
+ ```js
224
+ import { readParquet } from 'tiny-parquet/reader';
225
+
226
+ export const runtime = 'edge';
227
+
228
+ export async function GET() {
229
+ const res = await fetch('https://data.example.com/events.parquet');
230
+ const bytes = new Uint8Array(await res.arrayBuffer());
231
+ const { data, numRows } = await readParquet(bytes);
232
+
233
+ return Response.json({ numRows, sample: data });
234
+ }
235
+ ```
236
+
237
+ ### Node.js
238
+
239
+ ```js
240
+ import { readFileSync, writeFileSync } from 'node:fs';
241
+ import { writeParquet, readParquet } from 'tiny-parquet';
242
+
243
+ // Write
244
+ const bytes = await writeParquet(
245
+ [{ name: 'city', type: 'string' }, { name: 'pop', type: 'int64' }],
246
+ { city: ['Berlin', 'London', 'Tokyo'], pop: [3748148, 8982000, 13960000] },
247
+ );
248
+ writeFileSync('cities.parquet', bytes);
249
+
250
+ // Read
251
+ const { schema, data, numRows } = await readParquet(readFileSync('cities.parquet'));
252
+ console.log(`${numRows} rows:`, data);
253
+ ```
254
+
255
+ ### Deno
256
+
257
+ ```ts
258
+ import { writeParquet, readParquet } from 'npm:tiny-parquet';
259
+
260
+ const bytes = await writeParquet(
261
+ [{ name: 'msg', type: 'string' }],
262
+ { msg: ['Hello from Deno!'] },
263
+ );
264
+
265
+ const result = await readParquet(bytes);
266
+ console.log(result.data.msg); // ['Hello from Deno!']
267
+ ```
268
+
269
+ ---
270
+
271
+ ## Rust Source
272
+
273
+ The WASM binaries are compiled from Rust using [`parquet2`](https://crates.io/crates/parquet2) — a lightweight, zero-copy Parquet implementation.
274
+
275
+ ```
276
+ rust/
277
+ ├── parquet-reader/ 140KB WASM
278
+ │ ├── Cargo.toml
279
+ │ └── src/lib.rs 180 lines
280
+ └── parquet-writer/ 179KB WASM
281
+ ├── Cargo.toml
282
+ └── src/lib.rs 258 lines
283
+ ```
284
+
285
+ ### Build from Source
286
+
287
+ ```bash
288
+ # Writer
289
+ cd parquet-writer
290
+ cargo build --target wasm32-unknown-unknown --release
291
+ wasm-bindgen target/wasm32-unknown-unknown/release/parquet_flake.wasm \
292
+ --out-dir pkg --target web
293
+ wasm-opt pkg/parquet_flake_bg.wasm -o ../wasm/writer.wasm -Oz
294
+
295
+ # Reader
296
+ cd ../parquet-reader
297
+ cargo build --target wasm32-unknown-unknown --release
298
+ wasm-bindgen target/wasm32-unknown-unknown/release/parquet_reader.wasm \
299
+ --out-dir pkg --target web
300
+ wasm-opt pkg/parquet_reader_bg.wasm -o ../wasm/reader.wasm -Oz
301
+ ```
302
+
303
+ ---
304
+
305
+ ## Roadmap
306
+
307
+ - [ ] **Dictionary encoding** — 60-80% size reduction on low-cardinality columns
308
+ - [ ] **Column pruning** — Read only the columns you need
309
+ - [ ] **Row group control** — Multiple row groups per file
310
+ - [ ] **Zstd compression** — Better ratio than Snappy
311
+
312
+ ---
313
+
314
+ ## FAQ
315
+
316
+ **Q: How does this compare to `parquet-wasm`?**
317
+ A: `parquet-wasm` is a full-featured Parquet library at 3.5MB. `tiny-parquet` is 10x smaller by supporting only flat schemas and essential types — perfect for edge runtimes where size limits apply.
318
+
319
+ **Q: Can I read files written by DuckDB / Spark / PyArrow?**
320
+ A: Yes — the reader handles standard Parquet files with flat schemas. Nested types (structs, lists, maps) are not supported yet.
321
+
322
+ **Q: Is Snappy compression supported?**
323
+ A: Yes, on both read and write. It's the default compression.
324
+
325
+ **Q: What about TypeScript?**
326
+ A: Full type declarations are included (`*.d.ts`). Just import and go.
327
+
328
+ ---
329
+
330
+ ## License
331
+
332
+ [MIT](./LICENSE) © [nktrchk](https://github.com/nktrchk)
333
+
334
+ ---
335
+
336
+ <p align="center">
337
+ <sub>Built by <a href="https://github.com/nktrchk">nktrchk</a> / <a href="https://enrich.sh">enrich.sh</a> — used in production processing millions of events.</sub>
338
+ </p>
package/package.json ADDED
@@ -0,0 +1,61 @@
1
+ {
2
+ "name": "tiny-parquet",
3
+ "version": "0.1.0",
4
+ "description": "Read and write Parquet files in 319KB of WASM. Two functions. Zero dependencies. Runs on Vercel Edge, Cloudflare Workers, Deno, Bun, Node.js, and browsers.",
5
+ "author": "nktrchk",
6
+ "license": "MIT",
7
+ "repository": {
8
+ "type": "git",
9
+ "url": "git+https://github.com/nktrchk/tiny-parquet.git"
10
+ },
11
+ "homepage": "https://github.com/nktrchk/tiny-parquet#readme",
12
+ "bugs": {
13
+ "url": "https://github.com/nktrchk/tiny-parquet/issues"
14
+ },
15
+ "keywords": [
16
+ "parquet",
17
+ "wasm",
18
+ "webassembly",
19
+ "edge",
20
+ "cloudflare-workers",
21
+ "vercel-edge",
22
+ "deno",
23
+ "bun",
24
+ "serverless",
25
+ "columnar",
26
+ "data",
27
+ "apache-parquet",
28
+ "rust",
29
+ "tiny",
30
+ "parquet-reader",
31
+ "parquet-writer"
32
+ ],
33
+ "type": "module",
34
+ "main": "./src/index.js",
35
+ "module": "./src/index.js",
36
+ "types": "./src/index.d.ts",
37
+ "exports": {
38
+ ".": {
39
+ "import": "./src/index.js",
40
+ "types": "./src/index.d.ts"
41
+ },
42
+ "./reader": {
43
+ "import": "./src/reader.js",
44
+ "types": "./src/reader.d.ts"
45
+ },
46
+ "./writer": {
47
+ "import": "./src/writer.js",
48
+ "types": "./src/writer.d.ts"
49
+ }
50
+ },
51
+ "files": [
52
+ "src/",
53
+ "wasm/",
54
+ "LICENSE",
55
+ "README.md"
56
+ ],
57
+ "engines": {
58
+ "node": ">=18"
59
+ },
60
+ "sideEffects": false
61
+ }
package/src/index.d.ts ADDED
@@ -0,0 +1,2 @@
1
+ export { readParquet } from './reader.js';
2
+ export { writeParquet } from './writer.js';
package/src/index.js ADDED
@@ -0,0 +1,8 @@
1
+ /**
2
+ * tiny-parquet — WASM Parquet reader/writer for edge runtimes & browsers
3
+ *
4
+ * @module tiny-parquet
5
+ */
6
+
7
+ export { readParquet } from './reader.js';
8
+ export { writeParquet } from './writer.js';
@@ -0,0 +1,18 @@
1
+ export interface ColumnSchema {
2
+ name: string;
3
+ type: 'string' | 'int32' | 'int64' | 'float32' | 'float64' | 'boolean' | 'timestamp';
4
+ }
5
+
6
+ export interface ReadResult {
7
+ schema: ColumnSchema[];
8
+ data: Record<string, any[]>;
9
+ numRows: number;
10
+ }
11
+
12
+ /**
13
+ * Read a Parquet file and return columnar data.
14
+ *
15
+ * @param fileBytes - Raw Parquet file bytes.
16
+ * @param maxRows - Maximum rows to decode. Default: 500.
17
+ */
18
+ export function readParquet(fileBytes: Uint8Array, maxRows?: number): Promise<ReadResult>;
package/src/reader.js ADDED
@@ -0,0 +1,163 @@
1
+ /**
2
+ * tiny-parquet/reader — WASM Parquet Reader
3
+ * Supports: Node.js, Browser, Cloudflare Workers, Vercel Edge, Deno, Bun
4
+ * WASM size: ~140KB
5
+ */
6
+
7
+ let wasm;
8
+ let initPromise = null;
9
+
10
+ // ── Heap / object table ──────────────────────────────────────────────────────
11
+ const heap = new Array(128).fill(undefined);
12
+ heap.push(undefined, null, true, false);
13
+ let heap_next = heap.length;
14
+ let stack_pointer = 128;
15
+
16
+ function addHeapObject(obj) {
17
+ if (heap_next === heap.length) heap.push(heap.length + 1);
18
+ const idx = heap_next;
19
+ heap_next = heap[idx];
20
+ heap[idx] = obj;
21
+ return idx;
22
+ }
23
+ function addBorrowedObject(obj) {
24
+ if (stack_pointer === 1) throw new Error('out of js stack');
25
+ heap[--stack_pointer] = obj;
26
+ return stack_pointer;
27
+ }
28
+ function getObject(idx) { return heap[idx]; }
29
+ function dropObject(idx) {
30
+ if (idx < 132) return;
31
+ heap[idx] = heap_next;
32
+ heap_next = idx;
33
+ }
34
+ function takeObject(idx) {
35
+ const ret = getObject(idx);
36
+ dropObject(idx);
37
+ return ret;
38
+ }
39
+
40
+ // ── Memory helpers ───────────────────────────────────────────────────────────
41
+ let cachedUint8 = null;
42
+ function getUint8() {
43
+ if (cachedUint8 === null || cachedUint8.byteLength === 0)
44
+ cachedUint8 = new Uint8Array(wasm.memory.buffer);
45
+ return cachedUint8;
46
+ }
47
+ let cachedDV = null;
48
+ function getDV() {
49
+ if (cachedDV === null || cachedDV.buffer.detached === true ||
50
+ (cachedDV.buffer.detached === undefined && cachedDV.buffer !== wasm.memory.buffer))
51
+ cachedDV = new DataView(wasm.memory.buffer);
52
+ return cachedDV;
53
+ }
54
+
55
+ const decoder = new TextDecoder('utf-8', { ignoreBOM: true, fatal: true });
56
+ decoder.decode(); // warm up
57
+
58
+ function getStringFromWasm(ptr, len) {
59
+ ptr = ptr >>> 0;
60
+ return decoder.decode(getUint8().subarray(ptr, ptr + len));
61
+ }
62
+ function getArrayU8(ptr, len) {
63
+ ptr = ptr >>> 0;
64
+ return getUint8().subarray(ptr, ptr + len);
65
+ }
66
+ function isLikeNone(x) { return x === undefined || x === null; }
67
+ function handleError(f, args) {
68
+ try { return f.apply(this, args); }
69
+ catch (e) { wasm.__wbindgen_export(addHeapObject(e)); }
70
+ }
71
+
72
+ // ── WASM imports ─────────────────────────────────────────────────────────────
73
+ function getImports() {
74
+ const wbg = { __proto__: null };
75
+
76
+ wbg.__wbg___wbindgen_throw_be289d5034ed271b = (a, b) => {
77
+ throw new Error(getStringFromWasm(a, b));
78
+ };
79
+ wbg.__wbg_length_32ed9a279acd054c = (a) => getObject(a).length;
80
+ wbg.__wbg_new_361308b2356cecd0 = () => addHeapObject(new Object());
81
+ wbg.__wbg_new_3eb36ae241fe6f44 = () => addHeapObject(new Array());
82
+ wbg.__wbg_prototypesetcall_bdcdcc5842e4d77d = (arg0, arg1, arg2) => {
83
+ Uint8Array.prototype.set.call(getArrayU8(arg0, arg1), getObject(arg2));
84
+ };
85
+ wbg.__wbg_push_8ffdcb2063340ba5 = (a, b) => getObject(a).push(getObject(b));
86
+ wbg.__wbg_set_6cb8631f80447a67 = (...args) => handleError((a, b, c) => {
87
+ return Reflect.set(getObject(a), getObject(b), getObject(c));
88
+ }, args);
89
+ wbg.__wbindgen_cast_0000000000000001 = (a) => addHeapObject(a);
90
+ wbg.__wbindgen_cast_0000000000000002 = (a, b) => addHeapObject(getStringFromWasm(a, b));
91
+ wbg.__wbindgen_object_drop_ref = (a) => takeObject(a);
92
+
93
+ return { './parquet_reader_bg.js': wbg };
94
+ }
95
+
96
+ // ── WASM loader (universal) ──────────────────────────────────────────────────
97
+ async function loadWasm() {
98
+ const imports = getImports();
99
+
100
+ // Node.js
101
+ if (typeof process !== 'undefined' && process.versions?.node) {
102
+ const { readFileSync } = await import('node:fs');
103
+ const { dirname, join } = await import('node:path');
104
+ const { fileURLToPath } = await import('node:url');
105
+ const __dirname = dirname(fileURLToPath(import.meta.url));
106
+ const bytes = readFileSync(join(__dirname, '..', 'wasm', 'reader.wasm'));
107
+ const { instance } = await WebAssembly.instantiate(bytes, imports);
108
+ return instance.exports;
109
+ }
110
+
111
+ // Edge / Browser — resolve relative to this module
112
+ const wasmUrl = new URL('../wasm/reader.wasm', import.meta.url);
113
+ const response = await fetch(wasmUrl);
114
+ const bytes = await response.arrayBuffer();
115
+ const { instance } = await WebAssembly.instantiate(bytes, imports);
116
+ return instance.exports;
117
+ }
118
+
119
+ // ── Init (lazy singleton) ────────────────────────────────────────────────────
120
+ async function init() {
121
+ if (initPromise) return initPromise;
122
+ initPromise = loadWasm().then(exports => {
123
+ wasm = exports;
124
+ cachedUint8 = null;
125
+ cachedDV = null;
126
+ return wasm;
127
+ });
128
+ return initPromise;
129
+ }
130
+
131
+ // ── Public API ───────────────────────────────────────────────────────────────
132
+ /**
133
+ * Read a Parquet file and return columnar data.
134
+ *
135
+ * @param {Uint8Array} fileBytes - Raw Parquet file bytes.
136
+ * @param {number} [maxRows=500] - Maximum rows to decode.
137
+ * @returns {Promise<{schema: Array<{name: string, type: string}>, data: Record<string, any[]>, numRows: number}>}
138
+ *
139
+ * @example
140
+ * const { schema, data, numRows } = await readParquet(bytes);
141
+ * // schema: [{ name: 'url', type: 'string' }, ...]
142
+ * // data: { url: ['https://example.com'], ts: [1708000000] }
143
+ * // numRows: 1
144
+ */
145
+ export async function readParquet(fileBytes, maxRows = 500) {
146
+ await init();
147
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
148
+ try {
149
+ wasm.readParquet(
150
+ retptr,
151
+ addBorrowedObject(fileBytes),
152
+ isLikeNone(maxRows) ? 0x100000001 : (maxRows) >>> 0,
153
+ );
154
+ const r0 = getDV().getInt32(retptr + 0, true);
155
+ const r1 = getDV().getInt32(retptr + 4, true);
156
+ const r2 = getDV().getInt32(retptr + 8, true);
157
+ if (r2) throw takeObject(r1);
158
+ return takeObject(r0);
159
+ } finally {
160
+ wasm.__wbindgen_add_to_stack_pointer(16);
161
+ heap[stack_pointer++] = undefined;
162
+ }
163
+ }
@@ -0,0 +1,22 @@
1
+ export interface ColumnSchema {
2
+ name: string;
3
+ type: 'string' | 'int32' | 'int64' | 'float32' | 'float64' | 'boolean' | 'timestamp';
4
+ }
5
+
6
+ export interface WriteConfig {
7
+ compression?: 'snappy' | 'none';
8
+ }
9
+
10
+ /**
11
+ * Write a Parquet file from columnar data.
12
+ *
13
+ * @param schema - Column definitions with name and type.
14
+ * @param data - Columnar data keyed by column name.
15
+ * @param config - Optional configuration (compression, etc).
16
+ * @returns The Parquet file as a Uint8Array.
17
+ */
18
+ export function writeParquet(
19
+ schema: ColumnSchema[],
20
+ data: Record<string, any[]>,
21
+ config?: WriteConfig,
22
+ ): Promise<Uint8Array>;
package/src/writer.js ADDED
@@ -0,0 +1,210 @@
1
+ /**
2
+ * tiny-parquet/writer — WASM Parquet Writer
3
+ * Supports: Node.js, Browser, Cloudflare Workers, Vercel Edge, Deno, Bun
4
+ * WASM size: ~179KB
5
+ */
6
+
7
+ let wasm;
8
+ let initPromise = null;
9
+
10
+ // ── Heap / object table ──────────────────────────────────────────────────────
11
+ const heap = new Array(128).fill(undefined);
12
+ heap.push(undefined, null, true, false);
13
+ let heap_next = heap.length;
14
+ let stack_pointer = 128;
15
+
16
+ function addHeapObject(obj) {
17
+ if (heap_next === heap.length) heap.push(heap.length + 1);
18
+ const idx = heap_next;
19
+ heap_next = heap[idx];
20
+ heap[idx] = obj;
21
+ return idx;
22
+ }
23
+ function addBorrowedObject(obj) {
24
+ if (stack_pointer === 1) throw new Error('out of js stack');
25
+ heap[--stack_pointer] = obj;
26
+ return stack_pointer;
27
+ }
28
+ function getObject(idx) { return heap[idx]; }
29
+ function dropObject(idx) {
30
+ if (idx < 132) return;
31
+ heap[idx] = heap_next;
32
+ heap_next = idx;
33
+ }
34
+ function takeObject(idx) {
35
+ const ret = getObject(idx);
36
+ dropObject(idx);
37
+ return ret;
38
+ }
39
+
40
+ // ── Memory helpers ───────────────────────────────────────────────────────────
41
+ let cachedUint8 = null;
42
+ function getUint8() {
43
+ if (cachedUint8 === null || cachedUint8.byteLength === 0)
44
+ cachedUint8 = new Uint8Array(wasm.memory.buffer);
45
+ return cachedUint8;
46
+ }
47
+ let cachedDV = null;
48
+ function getDV() {
49
+ if (cachedDV === null || cachedDV.buffer.detached === true ||
50
+ (cachedDV.buffer.detached === undefined && cachedDV.buffer !== wasm.memory.buffer))
51
+ cachedDV = new DataView(wasm.memory.buffer);
52
+ return cachedDV;
53
+ }
54
+
55
+ const encoder = new TextEncoder();
56
+ const decoder = new TextDecoder('utf-8', { ignoreBOM: true, fatal: true });
57
+ decoder.decode(); // warm up
58
+ let WASM_VECTOR_LEN = 0;
59
+
60
+ function passStringToWasm(arg, malloc, realloc) {
61
+ if (realloc === undefined) {
62
+ const buf = encoder.encode(arg);
63
+ const ptr = malloc(buf.length, 1) >>> 0;
64
+ getUint8().subarray(ptr, ptr + buf.length).set(buf);
65
+ WASM_VECTOR_LEN = buf.length;
66
+ return ptr;
67
+ }
68
+ let len = arg.length;
69
+ let ptr = malloc(len, 1) >>> 0;
70
+ const mem = getUint8();
71
+ let offset = 0;
72
+ for (; offset < len; offset++) {
73
+ const code = arg.charCodeAt(offset);
74
+ if (code > 0x7F) break;
75
+ mem[ptr + offset] = code;
76
+ }
77
+ if (offset !== len) {
78
+ if (offset !== 0) arg = arg.slice(offset);
79
+ ptr = realloc(ptr, len, len = offset + arg.length * 3, 1) >>> 0;
80
+ const view = getUint8().subarray(ptr + offset, ptr + len);
81
+ const ret = encoder.encodeInto(arg, view);
82
+ offset += ret.written;
83
+ ptr = realloc(ptr, len, offset, 1) >>> 0;
84
+ }
85
+ WASM_VECTOR_LEN = offset;
86
+ return ptr;
87
+ }
88
+ function getStringFromWasm(ptr, len) {
89
+ return decoder.decode(getUint8().subarray(ptr >>> 0, (ptr >>> 0) + len));
90
+ }
91
+ function getArrayU8(ptr, len) {
92
+ ptr = ptr >>> 0;
93
+ return getUint8().subarray(ptr, ptr + len);
94
+ }
95
+ function isLikeNone(x) { return x === undefined || x === null; }
96
+ function handleError(f, args) {
97
+ try { return f.apply(this, args); }
98
+ catch (e) { wasm.__wbindgen_export3(addHeapObject(e)); }
99
+ }
100
+
101
+ // ── WASM imports ─────────────────────────────────────────────────────────────
102
+ function getImports() {
103
+ const wbg = { __proto__: null };
104
+ wbg.__wbg___wbindgen_is_falsy_e623e5b815413d00 = (a) => !getObject(a);
105
+ wbg.__wbg___wbindgen_number_get_8ff4255516ccad3e = (arg0, arg1) => {
106
+ const obj = getObject(arg1);
107
+ const ret = typeof obj === 'number' ? obj : undefined;
108
+ getDV().setFloat64(arg0 + 8, isLikeNone(ret) ? 0 : ret, true);
109
+ getDV().setInt32(arg0, !isLikeNone(ret), true);
110
+ };
111
+ wbg.__wbg___wbindgen_string_get_72fb696202c56729 = (arg0, arg1) => {
112
+ const obj = getObject(arg1);
113
+ const ret = typeof obj === 'string' ? obj : undefined;
114
+ const ptr1 = isLikeNone(ret) ? 0 : passStringToWasm(ret, wasm.__wbindgen_export, wasm.__wbindgen_export2);
115
+ const len1 = WASM_VECTOR_LEN;
116
+ getDV().setInt32(arg0 + 4, len1, true);
117
+ getDV().setInt32(arg0, ptr1, true);
118
+ };
119
+ wbg.__wbg___wbindgen_throw_be289d5034ed271b = (a, b) => {
120
+ throw new Error(getStringFromWasm(a, b));
121
+ };
122
+ wbg.__wbg_get_9b94d73e6221f75c = (a, i) => addHeapObject(getObject(a)[i >>> 0]);
123
+ wbg.__wbg_get_b3ed3ad4be2bc8ac = (...args) => handleError((a, b) => {
124
+ return addHeapObject(Reflect.get(getObject(a), getObject(b)));
125
+ }, args);
126
+ wbg.__wbg_isArray_d314bb98fcf08331 = (a) => Array.isArray(getObject(a));
127
+ wbg.__wbg_length_32ed9a279acd054c = (a) => getObject(a).length;
128
+ wbg.__wbg_length_35a7bace40f36eac = (a) => getObject(a).length;
129
+ wbg.__wbg_new_with_length_a2c39cbe88fd8ff1 = (a) => addHeapObject(new Uint8Array(a >>> 0));
130
+ wbg.__wbg_set_cc56eefd2dd91957 = (a, b, c) => getObject(a).set(getArrayU8(b, c));
131
+ wbg.__wbindgen_cast_0000000000000001 = (a, b) => addHeapObject(getStringFromWasm(a, b));
132
+ wbg.__wbindgen_object_drop_ref = (a) => takeObject(a);
133
+ return { './parquet_flake_bg.js': wbg };
134
+ }
135
+
136
+ // ── WASM loader (universal) ──────────────────────────────────────────────────
137
+ async function loadWasm() {
138
+ const imports = getImports();
139
+
140
+ // Node.js
141
+ if (typeof process !== 'undefined' && process.versions?.node) {
142
+ const { readFileSync } = await import('node:fs');
143
+ const { dirname, join } = await import('node:path');
144
+ const { fileURLToPath } = await import('node:url');
145
+ const __dirname = dirname(fileURLToPath(import.meta.url));
146
+ const bytes = readFileSync(join(__dirname, '..', 'wasm', 'writer.wasm'));
147
+ const { instance } = await WebAssembly.instantiate(bytes, imports);
148
+ return instance.exports;
149
+ }
150
+
151
+ // Edge / Browser — resolve relative to this module
152
+ const wasmUrl = new URL('../wasm/writer.wasm', import.meta.url);
153
+ const response = await fetch(wasmUrl);
154
+ const bytes = await response.arrayBuffer();
155
+ const { instance } = await WebAssembly.instantiate(bytes, imports);
156
+ return instance.exports;
157
+ }
158
+
159
+ // ── Init (lazy singleton) ────────────────────────────────────────────────────
160
+ async function init() {
161
+ if (initPromise) return initPromise;
162
+ initPromise = loadWasm().then(exports => {
163
+ wasm = exports;
164
+ cachedUint8 = null;
165
+ cachedDV = null;
166
+ return wasm;
167
+ });
168
+ return initPromise;
169
+ }
170
+
171
+ // ── Public API ───────────────────────────────────────────────────────────────
172
+ /**
173
+ * Write a Parquet file from columnar data.
174
+ *
175
+ * @param {Array<{name: string, type: string}>} schema - Column definitions.
176
+ * Supported types: 'string', 'int32', 'int64', 'float32', 'float64', 'boolean', 'timestamp'
177
+ * @param {Record<string, any[]>} data - Columnar data keyed by column name.
178
+ * @param {Object} [config] - Optional configuration.
179
+ * @param {string} [config.compression='snappy'] - 'snappy' | 'none'
180
+ * @returns {Promise<Uint8Array>} The Parquet file bytes.
181
+ *
182
+ * @example
183
+ * const bytes = await writeParquet(
184
+ * [{ name: 'url', type: 'string' }, { name: 'ts', type: 'int64' }],
185
+ * { url: ['https://example.com'], ts: [1708000000] },
186
+ * { compression: 'snappy' }
187
+ * );
188
+ */
189
+ export async function writeParquet(schema, data, config = {}) {
190
+ await init();
191
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
192
+ try {
193
+ wasm.writeParquet(
194
+ retptr,
195
+ addBorrowedObject(schema),
196
+ addBorrowedObject(data),
197
+ addBorrowedObject(config),
198
+ );
199
+ const r0 = getDV().getInt32(retptr + 0, true);
200
+ const r1 = getDV().getInt32(retptr + 4, true);
201
+ const r2 = getDV().getInt32(retptr + 8, true);
202
+ if (r2) throw takeObject(r1);
203
+ return takeObject(r0);
204
+ } finally {
205
+ wasm.__wbindgen_add_to_stack_pointer(16);
206
+ heap[stack_pointer++] = undefined;
207
+ heap[stack_pointer++] = undefined;
208
+ heap[stack_pointer++] = undefined;
209
+ }
210
+ }
Binary file
Binary file