@parqui/core 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs ADDED
@@ -0,0 +1,458 @@
1
+ "use strict";
2
+ var __defProp = Object.defineProperty;
3
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
+ var __getOwnPropNames = Object.getOwnPropertyNames;
5
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
6
+ var __export = (target, all) => {
7
+ for (var name in all)
8
+ __defProp(target, name, { get: all[name], enumerable: true });
9
+ };
10
+ var __copyProps = (to, from, except, desc) => {
11
+ if (from && typeof from === "object" || typeof from === "function") {
12
+ for (let key of __getOwnPropNames(from))
13
+ if (!__hasOwnProp.call(to, key) && key !== except)
14
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
15
+ }
16
+ return to;
17
+ };
18
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
+
20
+ // src/index.ts
21
+ var index_exports = {};
22
+ __export(index_exports, {
23
+ buildFilterIndex: () => buildFilterIndex,
24
+ buildGroups: () => buildGroups,
25
+ buildSortIndex: () => buildSortIndex,
26
+ collectUniqueValues: () => collectUniqueValues,
27
+ createEmptyPipeline: () => createEmptyPipeline,
28
+ fetchParquetFromUrl: () => fetchParquetFromUrl,
29
+ fileToArrayBuffer: () => fileToArrayBuffer,
30
+ readColumnValues: () => readColumnValues,
31
+ readParquetData: () => readParquetData,
32
+ readParquetMetadata: () => readParquetMetadata,
33
+ readRowsByIndices: () => readRowsByIndices,
34
+ sourceFromBuffer: () => sourceFromBuffer,
35
+ sourceFromFile: () => sourceFromFile,
36
+ sourceFromUrl: () => sourceFromUrl
37
+ });
38
+ module.exports = __toCommonJS(index_exports);
39
+
40
+ // src/reader.ts
41
+ var import_hyparquet = require("hyparquet");
42
+ var PARQUI_DEBUG_NS = "[parqui/core]";
43
+ function coreDebugEnabled() {
44
+ const g = globalThis;
45
+ return g?.__PARQUI_DEBUG ?? false;
46
+ }
47
+ function coreLog(message, details) {
48
+ if (!coreDebugEnabled()) return;
49
+ if (details !== void 0) {
50
+ console.log(`${PARQUI_DEBUG_NS} ${message}`, details);
51
+ } else {
52
+ console.log(`${PARQUI_DEBUG_NS} ${message}`);
53
+ }
54
+ }
55
+ function coreError(message, error) {
56
+ if (!coreDebugEnabled()) return;
57
+ console.error(`${PARQUI_DEBUG_NS} ${message}`, error);
58
+ }
59
+ function makeSliceablePromise(input) {
60
+ const promise = Promise.resolve(input);
61
+ promise.slice = (start, end) => promise.then((buffer) => buffer.slice(start, end));
62
+ return promise;
63
+ }
64
+ var normalizedSourceCache = /* @__PURE__ */ new WeakMap();
65
+ function normalizeSource(source) {
66
+ const cached = normalizedSourceCache.get(source);
67
+ if (cached) return cached;
68
+ const wrapped = {
69
+ byteLength: source.byteLength,
70
+ slice(start, end) {
71
+ const p = makeSliceablePromise(
72
+ Promise.resolve(source.slice(start, end))
73
+ );
74
+ coreLog("normalizeSource:slice", {
75
+ start,
76
+ end: end ?? source.byteLength,
77
+ hasSliceMethod: typeof p.slice === "function"
78
+ });
79
+ return p;
80
+ }
81
+ };
82
+ normalizedSourceCache.set(source, wrapped);
83
+ return wrapped;
84
+ }
85
+ function sourceFromFile(file) {
86
+ coreLog("sourceFromFile:create", { size: file.size });
87
+ return {
88
+ byteLength: file.size,
89
+ slice(start, end) {
90
+ const blob = file.slice(start, end);
91
+ const p = makeSliceablePromise(blob.arrayBuffer());
92
+ coreLog("sourceFromFile:slice", {
93
+ start,
94
+ end: end ?? file.size,
95
+ hasSliceMethod: typeof p.slice === "function"
96
+ });
97
+ return p;
98
+ }
99
+ };
100
+ }
101
+ function sourceFromBuffer(buffer) {
102
+ coreLog("sourceFromBuffer:create", { byteLength: buffer.byteLength });
103
+ return {
104
+ byteLength: buffer.byteLength,
105
+ slice(start, end) {
106
+ const p = makeSliceablePromise(
107
+ Promise.resolve(buffer.slice(start, end))
108
+ );
109
+ coreLog("sourceFromBuffer:slice", {
110
+ start,
111
+ end: end ?? buffer.byteLength,
112
+ hasSliceMethod: typeof p.slice === "function"
113
+ });
114
+ return p;
115
+ }
116
+ };
117
+ }
118
+ async function sourceFromUrl(url) {
119
+ coreLog("sourceFromUrl:start", { url });
120
+ const asyncBuf = await (0, import_hyparquet.asyncBufferFromUrl)({ url });
121
+ const wrapped = {
122
+ byteLength: asyncBuf.byteLength,
123
+ slice(start, end) {
124
+ const p = makeSliceablePromise(
125
+ Promise.resolve(asyncBuf.slice(start, end))
126
+ );
127
+ coreLog("sourceFromUrl:slice", {
128
+ start,
129
+ end: end ?? asyncBuf.byteLength,
130
+ hasSliceMethod: typeof p.slice === "function"
131
+ });
132
+ return p;
133
+ }
134
+ };
135
+ coreLog("sourceFromUrl:ready", { byteLength: wrapped.byteLength });
136
+ return wrapped;
137
+ }
138
+ var asyncBufferCache = /* @__PURE__ */ new WeakMap();
139
+ var rawMetadataCache = /* @__PURE__ */ new WeakMap();
140
+ function getAsyncBuffer(source) {
141
+ const safeSource = normalizeSource(source);
142
+ let buf = asyncBufferCache.get(safeSource);
143
+ if (!buf) {
144
+ buf = safeSource;
145
+ asyncBufferCache.set(safeSource, buf);
146
+ }
147
+ return buf;
148
+ }
149
+ function getRawMetadata(source) {
150
+ const safeSource = normalizeSource(source);
151
+ let promise = rawMetadataCache.get(safeSource);
152
+ if (!promise) {
153
+ const asyncBuf = getAsyncBuffer(safeSource);
154
+ promise = (0, import_hyparquet.parquetMetadataAsync)(asyncBuf);
155
+ rawMetadataCache.set(safeSource, promise);
156
+ }
157
+ return promise;
158
+ }
159
+ function toParquetMetadata(raw) {
160
+ const columns = raw.schema.slice(1).map((col) => ({
161
+ name: col.name,
162
+ type: col.type ?? "UNKNOWN",
163
+ nullable: col.repetition_type !== "REQUIRED"
164
+ }));
165
+ const rowGroupOffsets = [0];
166
+ for (const rg of raw.row_groups) {
167
+ rowGroupOffsets.push(rowGroupOffsets[rowGroupOffsets.length - 1] + Number(rg.num_rows));
168
+ }
169
+ return {
170
+ rowCount: Number(raw.num_rows),
171
+ columns,
172
+ rowGroups: raw.row_groups.length,
173
+ rowGroupOffsets,
174
+ createdBy: raw.created_by ?? void 0
175
+ };
176
+ }
177
+ async function readParquetMetadata(source) {
178
+ const raw = await getRawMetadata(source);
179
+ return toParquetMetadata(raw);
180
+ }
181
+ async function readParquetData(source, options = {}) {
182
+ coreLog("readParquetData:start", {
183
+ offset: options.offset ?? 0,
184
+ limit: options.limit ?? null,
185
+ columns: options.columns?.length ?? "all"
186
+ });
187
+ const asyncBuf = getAsyncBuffer(source);
188
+ const rawMetadata = await getRawMetadata(source);
189
+ const metadata = toParquetMetadata(rawMetadata);
190
+ const rows = [];
191
+ try {
192
+ await (0, import_hyparquet.parquetRead)({
193
+ file: asyncBuf,
194
+ metadata: rawMetadata,
195
+ columns: options.columns,
196
+ rowStart: options.offset ?? 0,
197
+ rowEnd: options.limit !== void 0 ? (options.offset ?? 0) + options.limit : void 0,
198
+ onComplete: (data) => {
199
+ const columnNames = options.columns ?? metadata.columns.map((c) => c.name);
200
+ for (const row of data) {
201
+ const obj = {};
202
+ columnNames.forEach((name, i) => {
203
+ obj[name] = row[i];
204
+ });
205
+ rows.push(obj);
206
+ }
207
+ }
208
+ });
209
+ } catch (error) {
210
+ coreError("readParquetData:error", error);
211
+ throw error;
212
+ }
213
+ return { metadata, rows };
214
+ }
215
+ async function readColumnValues(source, columnNames, isCancelled) {
216
+ const asyncBuf = getAsyncBuffer(source);
217
+ const rawMetadata = await getRawMetadata(source);
218
+ const result = /* @__PURE__ */ new Map();
219
+ for (const name of columnNames) {
220
+ result.set(name, []);
221
+ }
222
+ let rowOffset = 0;
223
+ for (const rg of rawMetadata.row_groups) {
224
+ if (isCancelled?.()) return result;
225
+ const rgRows = Number(rg.num_rows);
226
+ await (0, import_hyparquet.parquetRead)({
227
+ file: asyncBuf,
228
+ metadata: rawMetadata,
229
+ columns: columnNames,
230
+ rowStart: rowOffset,
231
+ rowEnd: rowOffset + rgRows,
232
+ onComplete: (data) => {
233
+ for (let rowIdx = 0; rowIdx < data.length; rowIdx++) {
234
+ const row = data[rowIdx];
235
+ for (let colIdx = 0; colIdx < columnNames.length; colIdx++) {
236
+ result.get(columnNames[colIdx]).push(row[colIdx]);
237
+ }
238
+ }
239
+ }
240
+ });
241
+ rowOffset += rgRows;
242
+ await new Promise((resolve) => setTimeout(resolve, 0));
243
+ }
244
+ return result;
245
+ }
246
+ async function readRowsByIndices(source, rowIndices, columns) {
247
+ if (rowIndices.length === 0) return [];
248
+ const asyncBuf = getAsyncBuffer(source);
249
+ const rawMetadata = await getRawMetadata(source);
250
+ const metadata = toParquetMetadata(rawMetadata);
251
+ const columnNames = columns ?? metadata.columns.map((c) => c.name);
252
+ const sorted = rowIndices.map((idx, pos) => ({ idx, pos }));
253
+ sorted.sort((a, b) => a.idx - b.idx);
254
+ const ranges = [];
255
+ let rangeStart = sorted[0].idx;
256
+ let rangeEnd = sorted[0].idx + 1;
257
+ let positions = [sorted[0].pos];
258
+ for (let i = 1; i < sorted.length; i++) {
259
+ if (sorted[i].idx <= rangeEnd + 50) {
260
+ rangeEnd = sorted[i].idx + 1;
261
+ positions.push(sorted[i].pos);
262
+ } else {
263
+ ranges.push({ start: rangeStart, end: rangeEnd, positions });
264
+ rangeStart = sorted[i].idx;
265
+ rangeEnd = sorted[i].idx + 1;
266
+ positions = [sorted[i].pos];
267
+ }
268
+ }
269
+ ranges.push({ start: rangeStart, end: rangeEnd, positions });
270
+ const result = new Array(rowIndices.length);
271
+ for (const range of ranges) {
272
+ const rangeRows = [];
273
+ await (0, import_hyparquet.parquetRead)({
274
+ file: asyncBuf,
275
+ metadata: rawMetadata,
276
+ columns: columnNames,
277
+ rowStart: range.start,
278
+ rowEnd: range.end,
279
+ onComplete: (data) => {
280
+ for (const row of data) {
281
+ const obj = {};
282
+ columnNames.forEach((name, i) => {
283
+ obj[name] = row[i];
284
+ });
285
+ rangeRows.push(obj);
286
+ }
287
+ }
288
+ });
289
+ let sortedPosIdx = 0;
290
+ for (const s of sorted) {
291
+ if (s.idx >= range.start && s.idx < range.end) {
292
+ const localIdx = s.idx - range.start;
293
+ if (localIdx < rangeRows.length) {
294
+ result[s.pos] = rangeRows[localIdx];
295
+ }
296
+ sortedPosIdx++;
297
+ }
298
+ }
299
+ }
300
+ return result;
301
+ }
302
+ async function fileToArrayBuffer(file) {
303
+ return file.arrayBuffer();
304
+ }
305
+ async function fetchParquetFromUrl(url) {
306
+ const response = await fetch(url);
307
+ if (!response.ok) {
308
+ throw new Error(
309
+ `Failed to fetch parquet file: ${response.status} ${response.statusText}`
310
+ );
311
+ }
312
+ return response.arrayBuffer();
313
+ }
314
+
315
+ // src/pipeline.ts
316
+ function createEmptyPipeline() {
317
+ return { sorts: [], filters: [], groups: [] };
318
+ }
319
+ function buildSortIndex(values, sorts, columnValues) {
320
+ const indices = Array.from({ length: values.length }, (_, i) => i);
321
+ indices.sort((a, b) => {
322
+ for (const sort of sorts) {
323
+ const col = columnValues.get(sort.column);
324
+ if (!col) continue;
325
+ const va = col[a];
326
+ const vb = col[b];
327
+ const cmp = compareValues(va, vb);
328
+ if (cmp !== 0) return sort.direction === "asc" ? cmp : -cmp;
329
+ }
330
+ return a - b;
331
+ });
332
+ return indices;
333
+ }
334
+ function buildFilterIndex(totalRows, filters, columnValues) {
335
+ if (filters.length === 0) return null;
336
+ const passing = [];
337
+ for (let i = 0; i < totalRows; i++) {
338
+ let pass = true;
339
+ for (const filter of filters) {
340
+ const col = columnValues.get(filter.column);
341
+ if (!col) continue;
342
+ if (!matchesFilter(col[i], filter)) {
343
+ pass = false;
344
+ break;
345
+ }
346
+ }
347
+ if (pass) passing.push(i);
348
+ }
349
+ return passing;
350
+ }
351
+ function buildGroups(rowIndices, groups, columnValues) {
352
+ if (groups.length === 0) return [];
353
+ const firstGroup = groups[0];
354
+ const col = columnValues.get(firstGroup.column);
355
+ if (!col) return [];
356
+ const buckets = /* @__PURE__ */ new Map();
357
+ const keyLabels = /* @__PURE__ */ new Map();
358
+ for (let displayIdx = 0; displayIdx < rowIndices.length; displayIdx++) {
359
+ const sourceIdx = rowIndices[displayIdx];
360
+ const value = col[sourceIdx];
361
+ const key = formatGroupKey(value);
362
+ if (!buckets.has(key)) {
363
+ buckets.set(key, []);
364
+ keyLabels.set(key, value);
365
+ }
366
+ buckets.get(key).push(displayIdx);
367
+ }
368
+ const sortedKeys = [...buckets.keys()].sort((a, b) => {
369
+ const va = keyLabels.get(a);
370
+ const vb = keyLabels.get(b);
371
+ return compareValues(va, vb);
372
+ });
373
+ return sortedKeys.map((key) => ({
374
+ key: keyLabels.get(key),
375
+ label: key,
376
+ count: buckets.get(key).length,
377
+ rowIndices: buckets.get(key),
378
+ expanded: false
379
+ }));
380
+ }
381
+ function collectUniqueValues(values, limit = 500) {
382
+ const seen = /* @__PURE__ */ new Set();
383
+ const unique = [];
384
+ for (const v of values) {
385
+ const key = formatGroupKey(v);
386
+ if (!seen.has(key)) {
387
+ seen.add(key);
388
+ unique.push(v);
389
+ if (unique.length >= limit) break;
390
+ }
391
+ }
392
+ unique.sort((a, b) => compareValues(a, b));
393
+ return unique;
394
+ }
395
+ function compareValues(a, b) {
396
+ if (a === null || a === void 0) return b === null || b === void 0 ? 0 : 1;
397
+ if (b === null || b === void 0) return -1;
398
+ if (typeof a === "number" && typeof b === "number") return a - b;
399
+ if (typeof a === "bigint" && typeof b === "bigint") return a < b ? -1 : a > b ? 1 : 0;
400
+ if (typeof a === "boolean" && typeof b === "boolean") return Number(a) - Number(b);
401
+ if (a instanceof Date && b instanceof Date) return a.getTime() - b.getTime();
402
+ return String(a).localeCompare(String(b));
403
+ }
404
+ function matchesFilter(value, filter) {
405
+ switch (filter.operator) {
406
+ case "is_null":
407
+ return value === null || value === void 0;
408
+ case "is_not_null":
409
+ return value !== null && value !== void 0;
410
+ case "eq":
411
+ return value === filter.value;
412
+ case "neq":
413
+ return value !== filter.value;
414
+ case "gt":
415
+ return compareValues(value, filter.value) > 0;
416
+ case "gte":
417
+ return compareValues(value, filter.value) >= 0;
418
+ case "lt":
419
+ return compareValues(value, filter.value) < 0;
420
+ case "lte":
421
+ return compareValues(value, filter.value) <= 0;
422
+ case "contains":
423
+ return String(value).toLowerCase().includes(String(filter.value).toLowerCase());
424
+ case "not_contains":
425
+ return !String(value).toLowerCase().includes(String(filter.value).toLowerCase());
426
+ case "in":
427
+ if (Array.isArray(filter.value)) {
428
+ const set = new Set(filter.value.map(String));
429
+ return set.has(String(value));
430
+ }
431
+ return false;
432
+ default:
433
+ return true;
434
+ }
435
+ }
436
+ function formatGroupKey(value) {
437
+ if (value === null || value === void 0) return "(null)";
438
+ if (value instanceof Date) return value.toISOString();
439
+ return String(value);
440
+ }
441
+ // Annotate the CommonJS export names for ESM import in node:
442
+ 0 && (module.exports = {
443
+ buildFilterIndex,
444
+ buildGroups,
445
+ buildSortIndex,
446
+ collectUniqueValues,
447
+ createEmptyPipeline,
448
+ fetchParquetFromUrl,
449
+ fileToArrayBuffer,
450
+ readColumnValues,
451
+ readParquetData,
452
+ readParquetMetadata,
453
+ readRowsByIndices,
454
+ sourceFromBuffer,
455
+ sourceFromFile,
456
+ sourceFromUrl
457
+ });
458
+ //# sourceMappingURL=index.cjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/index.ts","../src/reader.ts","../src/pipeline.ts"],"sourcesContent":["export {\n readParquetMetadata,\n readParquetData,\n readColumnValues,\n readRowsByIndices,\n sourceFromFile,\n sourceFromBuffer,\n sourceFromUrl,\n // Legacy\n fileToArrayBuffer,\n fetchParquetFromUrl,\n} from \"./reader.js\";\n\nexport {\n buildSortIndex,\n buildFilterIndex,\n buildGroups,\n collectUniqueValues,\n createEmptyPipeline,\n} from \"./pipeline.js\";\n\nexport type {\n ParquetColumn,\n ParquetMetadata,\n ParquetRow,\n ParquetData,\n ReadOptions,\n ParquetSource,\n} from \"./types.js\";\n\nexport type {\n SortDef,\n SortDirection,\n FilterDef,\n FilterOperator,\n GroupDef,\n GroupNode,\n PipelineState,\n} from \"./pipeline.js\";\n","import {\n parquetMetadataAsync,\n parquetRead,\n asyncBufferFromUrl,\n} from \"hyparquet\";\nimport type { AsyncBuffer, FileMetaData } from \"hyparquet\";\nimport type {\n ParquetColumn,\n ParquetData,\n ParquetMetadata,\n ParquetRow,\n ParquetSource,\n ReadOptions,\n} from \"./types.js\";\n\nconst PARQUI_DEBUG_NS = \"[parqui/core]\";\n\nfunction coreDebugEnabled(): boolean {\n const g = globalThis as { __PARQUI_DEBUG?: boolean } | undefined;\n return g?.__PARQUI_DEBUG ?? false;\n}\n\nfunction coreLog(message: string, details?: unknown) {\n if (!coreDebugEnabled()) return;\n if (details !== undefined) {\n console.log(`${PARQUI_DEBUG_NS} ${message}`, details);\n } else {\n console.log(`${PARQUI_DEBUG_NS} ${message}`);\n }\n}\n\nfunction coreError(message: string, error: unknown) {\n if (!coreDebugEnabled()) return;\n console.error(`${PARQUI_DEBUG_NS} ${message}`, error);\n}\n\ntype SliceablePromise = Promise<ArrayBuffer> & {\n slice: (start: number, end?: number) => Promise<ArrayBuffer>;\n};\n\nfunction makeSliceablePromise(input: Promise<ArrayBuffer>): SliceablePromise {\n const promise = Promise.resolve(input) as SliceablePromise;\n promise.slice = (start: number, end?: number) =>\n promise.then((buffer) => buffer.slice(start, end));\n return promise;\n}\n\nconst normalizedSourceCache = new WeakMap<ParquetSource, ParquetSource>();\n\nfunction normalizeSource(source: ParquetSource): ParquetSource {\n const cached = normalizedSourceCache.get(source);\n if (cached) return cached;\n\n const wrapped: ParquetSource = {\n byteLength: source.byteLength,\n slice(start: number, end?: number): Promise<ArrayBuffer> {\n const p = makeSliceablePromise(\n Promise.resolve(source.slice(start, end)),\n );\n coreLog(\"normalizeSource:slice\", {\n start,\n end: end ?? source.byteLength,\n hasSliceMethod: typeof (p as { slice?: unknown }).slice === \"function\",\n });\n return p;\n },\n };\n\n normalizedSourceCache.set(source, wrapped);\n return wrapped;\n}\n\n// ── Source creation ──\n\n/**\n * Create a ParquetSource from a browser File object.\n * Does NOT load the file into memory — reads slices on demand.\n * Works with files of any size (including multi-GB).\n */\nexport function sourceFromFile(file: File): ParquetSource {\n coreLog(\"sourceFromFile:create\", { size: file.size });\n return {\n byteLength: file.size,\n slice(start: number, end?: number): Promise<ArrayBuffer> {\n const blob = file.slice(start, end);\n // hyparquet may branch on `instanceof Promise` and then call `.slice()` on non-promises.\n // This object works in both branches: it's awaitable AND has a `.slice()` method.\n const p = makeSliceablePromise(blob.arrayBuffer());\n coreLog(\"sourceFromFile:slice\", {\n start,\n end: end ?? file.size,\n hasSliceMethod: typeof (p as { slice?: unknown }).slice === \"function\",\n });\n return p;\n },\n };\n}\n\n/**\n * Create a ParquetSource from an ArrayBuffer (file already in memory).\n */\nexport function sourceFromBuffer(buffer: ArrayBuffer): ParquetSource {\n coreLog(\"sourceFromBuffer:create\", { byteLength: buffer.byteLength });\n return {\n byteLength: buffer.byteLength,\n slice(start: number, end?: number): Promise<ArrayBuffer> {\n const p = makeSliceablePromise(\n Promise.resolve(buffer.slice(start, end)),\n );\n coreLog(\"sourceFromBuffer:slice\", {\n start,\n end: end ?? buffer.byteLength,\n hasSliceMethod: typeof (p as { slice?: unknown }).slice === \"function\",\n });\n return p;\n },\n };\n}\n\n/**\n * Create a ParquetSource from a URL using HTTP Range requests.\n */\nexport async function sourceFromUrl(url: string): Promise<ParquetSource> {\n coreLog(\"sourceFromUrl:start\", { url });\n const asyncBuf = await asyncBufferFromUrl({ url });\n const wrapped: ParquetSource = {\n byteLength: asyncBuf.byteLength,\n slice(start: number, end?: number): Promise<ArrayBuffer> {\n const p = makeSliceablePromise(\n Promise.resolve(asyncBuf.slice(start, end)),\n );\n coreLog(\"sourceFromUrl:slice\", {\n start,\n end: end ?? asyncBuf.byteLength,\n hasSliceMethod: typeof (p as { slice?: unknown }).slice === \"function\",\n });\n return p;\n },\n };\n coreLog(\"sourceFromUrl:ready\", { byteLength: wrapped.byteLength });\n return wrapped;\n}\n\n// ── Internal caching ──\n\nconst asyncBufferCache = new WeakMap<ParquetSource, AsyncBuffer>();\nconst rawMetadataCache = new WeakMap<ParquetSource, Promise<FileMetaData>>();\n\nfunction getAsyncBuffer(source: ParquetSource): AsyncBuffer {\n const safeSource = normalizeSource(source);\n let buf = asyncBufferCache.get(safeSource);\n if (!buf) {\n // Use the direct source object. cachedAsyncBuffer can normalize slice()\n // into plain Promises, which breaks in some zone.js runtimes.\n buf = safeSource as AsyncBuffer;\n asyncBufferCache.set(safeSource, buf);\n }\n return buf;\n}\n\nfunction getRawMetadata(source: ParquetSource): Promise<FileMetaData> {\n const safeSource = normalizeSource(source);\n let promise = rawMetadataCache.get(safeSource);\n if (!promise) {\n const asyncBuf = getAsyncBuffer(safeSource);\n promise = parquetMetadataAsync(asyncBuf);\n rawMetadataCache.set(safeSource, promise);\n }\n return promise;\n}\n\nfunction toParquetMetadata(raw: FileMetaData): ParquetMetadata {\n const columns: ParquetColumn[] = raw.schema.slice(1).map((col) => ({\n name: col.name,\n type: col.type ?? \"UNKNOWN\",\n nullable: col.repetition_type !== \"REQUIRED\",\n }));\n\n // Compute row group boundaries: [0, rg0_rows, rg0+rg1_rows, ..., totalRows]\n const rowGroupOffsets: number[] = [0];\n for (const rg of raw.row_groups) {\n rowGroupOffsets.push(rowGroupOffsets[rowGroupOffsets.length - 1] + Number(rg.num_rows));\n }\n\n return {\n rowCount: Number(raw.num_rows),\n columns,\n rowGroups: raw.row_groups.length,\n rowGroupOffsets,\n createdBy: raw.created_by ?? undefined,\n };\n}\n\n// ── Reading ──\n\n/**\n * Read only the metadata from a parquet source.\n * Reads a small amount from the end of the file (footer).\n */\nexport async function readParquetMetadata(\n source: ParquetSource,\n): Promise<ParquetMetadata> {\n const raw = await getRawMetadata(source);\n return toParquetMetadata(raw);\n}\n\n/**\n * Read data (multiple columns, row range) from a parquet source.\n * Only reads the requested row range — does NOT load the entire file.\n */\nexport async function readParquetData(\n source: ParquetSource,\n options: ReadOptions = {},\n): Promise<ParquetData> {\n coreLog(\"readParquetData:start\", {\n offset: options.offset ?? 0,\n limit: options.limit ?? null,\n columns: options.columns?.length ?? \"all\",\n });\n const asyncBuf = getAsyncBuffer(source);\n const rawMetadata = await getRawMetadata(source);\n const metadata = toParquetMetadata(rawMetadata);\n\n const rows: ParquetRow[] = [];\n\n try {\n await parquetRead({\n file: asyncBuf,\n metadata: rawMetadata,\n columns: options.columns,\n rowStart: options.offset ?? 0,\n rowEnd:\n options.limit !== undefined\n ? (options.offset ?? 0) + options.limit\n : undefined,\n onComplete: (data: unknown[][]) => {\n const columnNames =\n options.columns ?? metadata.columns.map((c) => c.name);\n for (const row of data) {\n const obj: ParquetRow = {};\n columnNames.forEach((name, i) => {\n obj[name] = row[i];\n });\n rows.push(obj);\n }\n },\n });\n } catch (error) {\n coreError(\"readParquetData:error\", error);\n throw error;\n }\n\n return { metadata, rows };\n}\n\n/**\n * Read ALL values of specific columns from the parquet source.\n * Used for building sort indices, group keys, and filter unique values.\n * Only reads the requested columns — not all data.\n *\n * Reads row-group by row-group with yields between each to avoid\n * blocking the main thread. This keeps the UI responsive (scroll, etc.)\n * during long reads on large files.\n *\n * @param isCancelled — optional callback checked between row groups.\n * If it returns true, reading stops early and partial results are returned.\n * This allows quick cancellation when the user changes sort/filter mid-read.\n *\n * Returns a Map of column name → array of values (one per row).\n */\nexport async function readColumnValues(\n source: ParquetSource,\n columnNames: string[],\n isCancelled?: () => boolean,\n): Promise<Map<string, unknown[]>> {\n const asyncBuf = getAsyncBuffer(source);\n const rawMetadata = await getRawMetadata(source);\n\n const result = new Map<string, unknown[]>();\n for (const name of columnNames) {\n result.set(name, []);\n }\n\n // Read row-group by row-group to keep the main thread responsive.\n // Check isCancelled between each row group for early abort.\n let rowOffset = 0;\n for (const rg of rawMetadata.row_groups) {\n // Early exit if this compute was superseded\n if (isCancelled?.()) return result;\n\n const rgRows = Number(rg.num_rows);\n\n await parquetRead({\n file: asyncBuf,\n metadata: rawMetadata,\n columns: columnNames,\n rowStart: rowOffset,\n rowEnd: rowOffset + rgRows,\n onComplete: (data: unknown[][]) => {\n for (let rowIdx = 0; rowIdx < data.length; rowIdx++) {\n const row = data[rowIdx];\n for (let colIdx = 0; colIdx < columnNames.length; colIdx++) {\n result.get(columnNames[colIdx])!.push(row[colIdx]);\n }\n }\n },\n });\n\n rowOffset += rgRows;\n\n // Yield to browser between row groups so scroll events can fire\n await new Promise<void>((resolve) => setTimeout(resolve, 0));\n }\n\n return result;\n}\n\n/**\n * Read values of specific columns for specific row indices.\n * Useful for fetching display data for sorted/filtered views.\n */\nexport async function readRowsByIndices(\n source: ParquetSource,\n rowIndices: number[],\n columns?: string[],\n): Promise<ParquetRow[]> {\n if (rowIndices.length === 0) return [];\n\n const asyncBuf = getAsyncBuffer(source);\n const rawMetadata = await getRawMetadata(source);\n const metadata = toParquetMetadata(rawMetadata);\n const columnNames = columns ?? metadata.columns.map((c) => c.name);\n\n // Sort indices to read sequentially, then re-order\n const sorted = rowIndices.map((idx, pos) => ({ idx, pos }));\n sorted.sort((a, b) => a.idx - b.idx);\n\n // Find contiguous ranges to batch reads\n const ranges: { start: number; end: number; positions: number[] }[] = [];\n let rangeStart = sorted[0].idx;\n let rangeEnd = sorted[0].idx + 1;\n let positions = [sorted[0].pos];\n\n for (let i = 1; i < sorted.length; i++) {\n if (sorted[i].idx <= rangeEnd + 50) {\n // Allow small gaps to merge ranges\n rangeEnd = sorted[i].idx + 1;\n positions.push(sorted[i].pos);\n } else {\n ranges.push({ start: rangeStart, end: rangeEnd, positions });\n rangeStart = sorted[i].idx;\n rangeEnd = sorted[i].idx + 1;\n positions = [sorted[i].pos];\n }\n }\n ranges.push({ start: rangeStart, end: rangeEnd, positions });\n\n const result: ParquetRow[] = new Array(rowIndices.length);\n\n for (const range of ranges) {\n const rangeRows: ParquetRow[] = [];\n\n await parquetRead({\n file: asyncBuf,\n metadata: rawMetadata,\n columns: columnNames,\n rowStart: range.start,\n rowEnd: range.end,\n onComplete: (data: unknown[][]) => {\n for (const row of data) {\n const obj: ParquetRow = {};\n columnNames.forEach((name, i) => {\n obj[name] = row[i];\n });\n rangeRows.push(obj);\n }\n },\n });\n\n // Map range rows back to the correct positions\n let sortedPosIdx = 0;\n for (const s of sorted) {\n if (s.idx >= range.start && s.idx < range.end) {\n const localIdx = s.idx - range.start;\n if (localIdx < rangeRows.length) {\n result[s.pos] = rangeRows[localIdx];\n }\n sortedPosIdx++;\n }\n }\n }\n\n return result;\n}\n\n// ── Legacy helpers (kept for backwards compatibility) ──\n\n/** @deprecated Use sourceFromFile() instead */\nexport async function fileToArrayBuffer(file: File): Promise<ArrayBuffer> {\n return file.arrayBuffer();\n}\n\n/** @deprecated Use sourceFromUrl() instead */\nexport async function fetchParquetFromUrl(url: string): Promise<ArrayBuffer> {\n const response = await fetch(url);\n if (!response.ok) {\n throw new Error(\n `Failed to fetch parquet file: ${response.status} ${response.statusText}`,\n );\n }\n return response.arrayBuffer();\n}\n","/**\n * Data pipeline: sorting, filtering, grouping.\n * Operates on column-level indices — never loads all columns at once.\n */\n\n// ── Sort ──\n\nexport type SortDirection = \"asc\" | \"desc\";\n\nexport interface SortDef {\n column: string;\n direction: SortDirection;\n}\n\n// ── Filter ──\n\nexport type FilterOperator =\n | \"eq\"\n | \"neq\"\n | \"gt\"\n | \"gte\"\n | \"lt\"\n | \"lte\"\n | \"contains\"\n | \"not_contains\"\n | \"is_null\"\n | \"is_not_null\"\n | \"in\";\n\nexport interface FilterDef {\n column: string;\n operator: FilterOperator;\n /** The value(s) to compare against. For \"in\" — an array; for \"is_null\"/\"is_not_null\" — ignored. */\n value?: unknown;\n}\n\n// ── Group ──\n\nexport interface GroupDef {\n column: string;\n}\n\nexport interface GroupNode {\n /** The value of the group key */\n key: unknown;\n /** Display label for this group */\n label: string;\n /** Number of rows in this group */\n count: number;\n /** Row indices belonging to this group (in the filtered/sorted order) */\n rowIndices: number[];\n /** Whether this group is expanded in the UI */\n expanded: boolean;\n}\n\n// ── Pipeline state ──\n\nexport interface PipelineState {\n sorts: SortDef[];\n filters: FilterDef[];\n groups: GroupDef[];\n}\n\nexport function createEmptyPipeline(): PipelineState {\n return { sorts: [], filters: [], groups: [] };\n}\n\n// ── Index building ──\n\n/**\n * Build a sorted index from column values.\n * Returns an array of original row indices in the sorted order.\n */\nexport function buildSortIndex(\n values: unknown[],\n sorts: SortDef[],\n columnValues: Map<string, unknown[]>,\n): number[] {\n const indices = Array.from({ length: values.length }, (_, i) => i);\n\n indices.sort((a, b) => {\n for (const sort of sorts) {\n const col = columnValues.get(sort.column);\n if (!col) continue;\n const va = col[a];\n const vb = col[b];\n const cmp = compareValues(va, vb);\n if (cmp !== 0) return sort.direction === \"asc\" ? cmp : -cmp;\n }\n return a - b; // stable: preserve original order for ties\n });\n\n return indices;\n}\n\n/**\n * Apply filters to produce a set of passing row indices.\n */\nexport function buildFilterIndex(\n totalRows: number,\n filters: FilterDef[],\n columnValues: Map<string, unknown[]>,\n): number[] | null {\n if (filters.length === 0) return null; // null = no filtering\n\n const passing: number[] = [];\n for (let i = 0; i < totalRows; i++) {\n let pass = true;\n for (const filter of filters) {\n const col = columnValues.get(filter.column);\n if (!col) continue;\n if (!matchesFilter(col[i], filter)) {\n pass = false;\n break;\n }\n }\n if (pass) passing.push(i);\n }\n return passing;\n}\n\n/**\n * Build group nodes from column values.\n * Supports multi-level grouping (first group column → second → etc.)\n */\nexport function buildGroups(\n rowIndices: number[],\n groups: GroupDef[],\n columnValues: Map<string, unknown[]>,\n): GroupNode[] {\n if (groups.length === 0) return [];\n\n const firstGroup = groups[0];\n const col = columnValues.get(firstGroup.column);\n if (!col) return [];\n\n // Bucket rows by group key — store DISPLAY indices (position in mapping),\n // not source indices, so getRow(displayIndex) works directly.\n const buckets = new Map<string, number[]>();\n const keyLabels = new Map<string, unknown>();\n\n for (let displayIdx = 0; displayIdx < rowIndices.length; displayIdx++) {\n const sourceIdx = rowIndices[displayIdx];\n const value = col[sourceIdx];\n const key = formatGroupKey(value);\n if (!buckets.has(key)) {\n buckets.set(key, []);\n keyLabels.set(key, value);\n }\n buckets.get(key)!.push(displayIdx);\n }\n\n // Sort group keys\n const sortedKeys = [...buckets.keys()].sort((a, b) => {\n const va = keyLabels.get(a);\n const vb = keyLabels.get(b);\n return compareValues(va, vb);\n });\n\n return sortedKeys.map((key) => ({\n key: keyLabels.get(key),\n label: key,\n count: buckets.get(key)!.length,\n rowIndices: buckets.get(key)!,\n expanded: false,\n }));\n}\n\n/**\n * Collect unique values from a column (up to a limit).\n * Returns sorted unique values.\n */\nexport function collectUniqueValues(\n values: unknown[],\n limit: number = 500,\n): unknown[] {\n const seen = new Set<string>();\n const unique: unknown[] = [];\n\n for (const v of values) {\n const key = formatGroupKey(v);\n if (!seen.has(key)) {\n seen.add(key);\n unique.push(v);\n if (unique.length >= limit) break;\n }\n }\n\n unique.sort((a, b) => compareValues(a, b));\n return unique;\n}\n\n// ── Helpers ──\n\nfunction compareValues(a: unknown, b: unknown): number {\n // nulls last\n if (a === null || a === undefined) return b === null || b === undefined ? 0 : 1;\n if (b === null || b === undefined) return -1;\n\n if (typeof a === \"number\" && typeof b === \"number\") return a - b;\n if (typeof a === \"bigint\" && typeof b === \"bigint\") return a < b ? -1 : a > b ? 1 : 0;\n if (typeof a === \"boolean\" && typeof b === \"boolean\") return Number(a) - Number(b);\n\n // Dates\n if (a instanceof Date && b instanceof Date) return a.getTime() - b.getTime();\n\n // Default: string comparison\n return String(a).localeCompare(String(b));\n}\n\nfunction matchesFilter(value: unknown, filter: FilterDef): boolean {\n switch (filter.operator) {\n case \"is_null\":\n return value === null || value === undefined;\n case \"is_not_null\":\n return value !== null && value !== undefined;\n case \"eq\":\n return value === filter.value;\n case \"neq\":\n return value !== filter.value;\n case \"gt\":\n return compareValues(value, filter.value) > 0;\n case \"gte\":\n return compareValues(value, filter.value) >= 0;\n case \"lt\":\n return compareValues(value, filter.value) < 0;\n case \"lte\":\n return compareValues(value, filter.value) <= 0;\n case \"contains\":\n return String(value).toLowerCase().includes(String(filter.value).toLowerCase());\n case \"not_contains\":\n return !String(value).toLowerCase().includes(String(filter.value).toLowerCase());\n case \"in\":\n if (Array.isArray(filter.value)) {\n const set = new Set(filter.value.map(String));\n return set.has(String(value));\n }\n return false;\n default:\n return true;\n }\n}\n\nfunction formatGroupKey(value: unknown): string {\n if (value === null || value === undefined) return \"(null)\";\n if (value instanceof Date) return value.toISOString();\n return String(value);\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACAA,uBAIO;AAWP,IAAM,kBAAkB;AAExB,SAAS,mBAA4B;AACnC,QAAM,IAAI;AACV,SAAO,GAAG,kBAAkB;AAC9B;AAEA,SAAS,QAAQ,SAAiB,SAAmB;AACnD,MAAI,CAAC,iBAAiB,EAAG;AACzB,MAAI,YAAY,QAAW;AACzB,YAAQ,IAAI,GAAG,eAAe,IAAI,OAAO,IAAI,OAAO;AAAA,EACtD,OAAO;AACL,YAAQ,IAAI,GAAG,eAAe,IAAI,OAAO,EAAE;AAAA,EAC7C;AACF;AAEA,SAAS,UAAU,SAAiB,OAAgB;AAClD,MAAI,CAAC,iBAAiB,EAAG;AACzB,UAAQ,MAAM,GAAG,eAAe,IAAI,OAAO,IAAI,KAAK;AACtD;AAMA,SAAS,qBAAqB,OAA+C;AAC3E,QAAM,UAAU,QAAQ,QAAQ,KAAK;AACrC,UAAQ,QAAQ,CAAC,OAAe,QAC9B,QAAQ,KAAK,CAAC,WAAW,OAAO,MAAM,OAAO,GAAG,CAAC;AACnD,SAAO;AACT;AAEA,IAAM,wBAAwB,oBAAI,QAAsC;AAExE,SAAS,gBAAgB,QAAsC;AAC7D,QAAM,SAAS,sBAAsB,IAAI,MAAM;AAC/C,MAAI,OAAQ,QAAO;AAEnB,QAAM,UAAyB;AAAA,IAC7B,YAAY,OAAO;AAAA,IACnB,MAAM,OAAe,KAAoC;AACvD,YAAM,IAAI;AAAA,QACR,QAAQ,QAAQ,OAAO,MAAM,OAAO,GAAG,CAAC;AAAA,MAC1C;AACA,cAAQ,yBAAyB;AAAA,QAC/B;AAAA,QACA,KAAK,OAAO,OAAO;AAAA,QACnB,gBAAgB,OAAQ,EAA0B,UAAU;AAAA,MAC9D,CAAC;AACD,aAAO;AAAA,IACT;AAAA,EACF;AAEA,wBAAsB,IAAI,QAAQ,OAAO;AACzC,SAAO;AACT;AASO,SAAS,eAAe,MAA2B;AACxD,UAAQ,yBAAyB,EAAE,MAAM,KAAK,KAAK,CAAC;AACpD,SAAO;AAAA,IACL,YAAY,KAAK;AAAA,IACjB,MAAM,OAAe,KAAoC;AACvD,YAAM,OAAO,KAAK,MAAM,OAAO,GAAG;AAGlC,YAAM,IAAI,qBAAqB,KAAK,YAAY,CAAC;AACjD,cAAQ,wBAAwB;AAAA,QAC9B;AAAA,QACA,KAAK,OAAO,KAAK;AAAA,QACjB,gBAAgB,OAAQ,EAA0B,UAAU;AAAA,MAC9D,CAAC;AACD,aAAO;AAAA,IACT;AAAA,EACF;AACF;AAKO,SAAS,iBAAiB,QAAoC;AACnE,UAAQ,2BAA2B,EAAE,YAAY,OAAO,WAAW,CAAC;AACpE,SAAO;AAAA,IACL,YAAY,OAAO;AAAA,IACnB,MAAM,OAAe,KAAoC;AACvD,YAAM,IAAI;AAAA,QACR,QAAQ,QAAQ,OAAO,MAAM,OAAO,GAAG,CAAC;AAAA,MAC1C;AACA,cAAQ,0BAA0B;AAAA,QAChC;AAAA,QACA,KAAK,OAAO,OAAO;AAAA,QACnB,gBAAgB,OAAQ,EAA0B,UAAU;AAAA,MAC9D,CAAC;AACD,aAAO;AAAA,IACT;AAAA,EACF;AACF;AAKA,eAAsB,cAAc,KAAqC;AACvE,UAAQ,uBAAuB,EAAE,IAAI,CAAC;AACtC,QAAM,WAAW,UAAM,qCAAmB,EAAE,IAAI,CAAC;AACjD,QAAM,UAAyB;AAAA,IAC7B,YAAY,SAAS;AAAA,IACrB,MAAM,OAAe,KAAoC;AACvD,YAAM,IAAI;AAAA,QACR,QAAQ,QAAQ,SAAS,MAAM,OAAO,GAAG,CAAC;AAAA,MAC5C;AACA,cAAQ,uBAAuB;AAAA,QAC7B;AAAA,QACA,KAAK,OAAO,SAAS;AAAA,QACrB,gBAAgB,OAAQ,EAA0B,UAAU;AAAA,MAC9D,CAAC;AACD,aAAO;AAAA,IACT;AAAA,EACF;AACA,UAAQ,uBAAuB,EAAE,YAAY,QAAQ,WAAW,CAAC;AACjE,SAAO;AACT;AAIA,IAAM,mBAAmB,oBAAI,QAAoC;AACjE,IAAM,mBAAmB,oBAAI,QAA8C;AAE3E,SAAS,eAAe,QAAoC;AAC1D,QAAM,aAAa,gBAAgB,MAAM;AACzC,MAAI,MAAM,iBAAiB,IAAI,UAAU;AACzC,MAAI,CAAC,KAAK;AAGR,UAAM;AACN,qBAAiB,IAAI,YAAY,GAAG;AAAA,EACtC;AACA,SAAO;AACT;AAEA,SAAS,eAAe,QAA8C;AACpE,QAAM,aAAa,gBAAgB,MAAM;AACzC,MAAI,UAAU,iBAAiB,IAAI,UAAU;AAC7C,MAAI,CAAC,SAAS;AACZ,UAAM,WAAW,eAAe,UAAU;AAC1C,kBAAU,uCAAqB,QAAQ;AACvC,qBAAiB,IAAI,YAAY,OAAO;AAAA,EAC1C;AACA,SAAO;AACT;AAEA,SAAS,kBAAkB,KAAoC;AAC7D,QAAM,UAA2B,IAAI,OAAO,MAAM,CAAC,EAAE,IAAI,CAAC,SAAS;AAAA,IACjE,MAAM,IAAI;AAAA,IACV,MAAM,IAAI,QAAQ;AAAA,IAClB,UAAU,IAAI,oBAAoB;AAAA,EACpC,EAAE;AAGF,QAAM,kBAA4B,CAAC,CAAC;AACpC,aAAW,MAAM,IAAI,YAAY;AAC/B,oBAAgB,KAAK,gBAAgB,gBAAgB,SAAS,CAAC,IAAI,OAAO,GAAG,QAAQ,CAAC;AAAA,EACxF;AAEA,SAAO;AAAA,IACL,UAAU,OAAO,IAAI,QAAQ;AAAA,IAC7B;AAAA,IACA,WAAW,IAAI,WAAW;AAAA,IAC1B;AAAA,IACA,WAAW,IAAI,cAAc;AAAA,EAC/B;AACF;AAQA,eAAsB,oBACpB,QAC0B;AAC1B,QAAM,MAAM,MAAM,eAAe,MAAM;AACvC,SAAO,kBAAkB,GAAG;AAC9B;AAMA,eAAsB,gBACpB,QACA,UAAuB,CAAC,GACF;AACtB,UAAQ,yBAAyB;AAAA,IAC/B,QAAQ,QAAQ,UAAU;AAAA,IAC1B,OAAO,QAAQ,SAAS;AAAA,IACxB,SAAS,QAAQ,SAAS,UAAU;AAAA,EACtC,CAAC;AACD,QAAM,WAAW,eAAe,MAAM;AACtC,QAAM,cAAc,MAAM,eAAe,MAAM;AAC/C,QAAM,WAAW,kBAAkB,WAAW;AAE9C,QAAM,OAAqB,CAAC;AAE5B,MAAI;AACF,cAAM,8BAAY;AAAA,MAChB,MAAM;AAAA,MACN,UAAU;AAAA,MACV,SAAS,QAAQ;AAAA,MACjB,UAAU,QAAQ,UAAU;AAAA,MAC5B,QACE,QAAQ,UAAU,UACb,QAAQ,UAAU,KAAK,QAAQ,QAChC;AAAA,MACN,YAAY,CAAC,SAAsB;AACjC,cAAM,cACJ,QAAQ,WAAW,SAAS,QAAQ,IAAI,CAAC,MAAM,EAAE,IAAI;AACvD,mBAAW,OAAO,MAAM;AACtB,gBAAM,MAAkB,CAAC;AACzB,sBAAY,QAAQ,CAAC,MAAM,MAAM;AAC/B,gBAAI,IAAI,IAAI,IAAI,CAAC;AAAA,UACnB,CAAC;AACD,eAAK,KAAK,GAAG;AAAA,QACf;AAAA,MACF;AAAA,IACF,CAAC;AAAA,EACH,SAAS,OAAO;AACd,cAAU,yBAAyB,KAAK;AACxC,UAAM;AAAA,EACR;AAEA,SAAO,EAAE,UAAU,KAAK;AAC1B;AAiBA,eAAsB,iBACpB,QACA,aACA,aACiC;AACjC,QAAM,WAAW,eAAe,MAAM;AACtC,QAAM,cAAc,MAAM,eAAe,MAAM;AAE/C,QAAM,SAAS,oBAAI,IAAuB;AAC1C,aAAW,QAAQ,aAAa;AAC9B,WAAO,IAAI,MAAM,CAAC,CAAC;AAAA,EACrB;AAIA,MAAI,YAAY;AAChB,aAAW,MAAM,YAAY,YAAY;AAEvC,QAAI,cAAc,EAAG,QAAO;AAE5B,UAAM,SAAS,OAAO,GAAG,QAAQ;AAEjC,cAAM,8BAAY;AAAA,MAChB,MAAM;AAAA,MACN,UAAU;AAAA,MACV,SAAS;AAAA,MACT,UAAU;AAAA,MACV,QAAQ,YAAY;AAAA,MACpB,YAAY,CAAC,SAAsB;AACjC,iBAAS,SAAS,GAAG,SAAS,KAAK,QAAQ,UAAU;AACnD,gBAAM,MAAM,KAAK,MAAM;AACvB,mBAAS,SAAS,GAAG,SAAS,YAAY,QAAQ,UAAU;AAC1D,mBAAO,IAAI,YAAY,MAAM,CAAC,EAAG,KAAK,IAAI,MAAM,CAAC;AAAA,UACnD;AAAA,QACF;AAAA,MACF;AAAA,IACF,CAAC;AAED,iBAAa;AAGb,UAAM,IAAI,QAAc,CAAC,YAAY,WAAW,SAAS,CAAC,CAAC;AAAA,EAC7D;AAEA,SAAO;AACT;AAMA,eAAsB,kBACpB,QACA,YACA,SACuB;AACvB,MAAI,WAAW,WAAW,EAAG,QAAO,CAAC;AAErC,QAAM,WAAW,eAAe,MAAM;AACtC,QAAM,cAAc,MAAM,eAAe,MAAM;AAC/C,QAAM,WAAW,kBAAkB,WAAW;AAC9C,QAAM,cAAc,WAAW,SAAS,QAAQ,IAAI,CAAC,MAAM,EAAE,IAAI;AAGjE,QAAM,SAAS,WAAW,IAAI,CAAC,KAAK,SAAS,EAAE,KAAK,IAAI,EAAE;AAC1D,SAAO,KAAK,CAAC,GAAG,MAAM,EAAE,MAAM,EAAE,GAAG;AAGnC,QAAM,SAAgE,CAAC;AACvE,MAAI,aAAa,OAAO,CAAC,EAAE;AAC3B,MAAI,WAAW,OAAO,CAAC,EAAE,MAAM;AAC/B,MAAI,YAAY,CAAC,OAAO,CAAC,EAAE,GAAG;AAE9B,WAAS,IAAI,GAAG,IAAI,OAAO,QAAQ,KAAK;AACtC,QAAI,OAAO,CAAC,EAAE,OAAO,WAAW,IAAI;AAElC,iBAAW,OAAO,CAAC,EAAE,MAAM;AAC3B,gBAAU,KAAK,OAAO,CAAC,EAAE,GAAG;AAAA,IAC9B,OAAO;AACL,aAAO,KAAK,EAAE,OAAO,YAAY,KAAK,UAAU,UAAU,CAAC;AAC3D,mBAAa,OAAO,CAAC,EAAE;AACvB,iBAAW,OAAO,CAAC,EAAE,MAAM;AAC3B,kBAAY,CAAC,OAAO,CAAC,EAAE,GAAG;AAAA,IAC5B;AAAA,EACF;AACA,SAAO,KAAK,EAAE,OAAO,YAAY,KAAK,UAAU,UAAU,CAAC;AAE3D,QAAM,SAAuB,IAAI,MAAM,WAAW,MAAM;AAExD,aAAW,SAAS,QAAQ;AAC1B,UAAM,YAA0B,CAAC;AAEjC,cAAM,8BAAY;AAAA,MAChB,MAAM;AAAA,MACN,UAAU;AAAA,MACV,SAAS;AAAA,MACT,UAAU,MAAM;AAAA,MAChB,QAAQ,MAAM;AAAA,MACd,YAAY,CAAC,SAAsB;AACjC,mBAAW,OAAO,MAAM;AACtB,gBAAM,MAAkB,CAAC;AACzB,sBAAY,QAAQ,CAAC,MAAM,MAAM;AAC/B,gBAAI,IAAI,IAAI,IAAI,CAAC;AAAA,UACnB,CAAC;AACD,oBAAU,KAAK,GAAG;AAAA,QACpB;AAAA,MACF;AAAA,IACF,CAAC;AAGD,QAAI,eAAe;AACnB,eAAW,KAAK,QAAQ;AACtB,UAAI,EAAE,OAAO,MAAM,SAAS,EAAE,MAAM,MAAM,KAAK;AAC7C,cAAM,WAAW,EAAE,MAAM,MAAM;AAC/B,YAAI,WAAW,UAAU,QAAQ;AAC/B,iBAAO,EAAE,GAAG,IAAI,UAAU,QAAQ;AAAA,QACpC;AACA;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAEA,SAAO;AACT;AAKA,eAAsB,kBAAkB,MAAkC;AACxE,SAAO,KAAK,YAAY;AAC1B;AAGA,eAAsB,oBAAoB,KAAmC;AAC3E,QAAM,WAAW,MAAM,MAAM,GAAG;AAChC,MAAI,CAAC,SAAS,IAAI;AAChB,UAAM,IAAI;AAAA,MACR,iCAAiC,SAAS,MAAM,IAAI,SAAS,UAAU;AAAA,IACzE;AAAA,EACF;AACA,SAAO,SAAS,YAAY;AAC9B;;;AC5VO,SAAS,sBAAqC;AACnD,SAAO,EAAE,OAAO,CAAC,GAAG,SAAS,CAAC,GAAG,QAAQ,CAAC,EAAE;AAC9C;AAQO,SAAS,eACd,QACA,OACA,cACU;AACV,QAAM,UAAU,MAAM,KAAK,EAAE,QAAQ,OAAO,OAAO,GAAG,CAAC,GAAG,MAAM,CAAC;AAEjE,UAAQ,KAAK,CAAC,GAAG,MAAM;AACrB,eAAW,QAAQ,OAAO;AACxB,YAAM,MAAM,aAAa,IAAI,KAAK,MAAM;AACxC,UAAI,CAAC,IAAK;AACV,YAAM,KAAK,IAAI,CAAC;AAChB,YAAM,KAAK,IAAI,CAAC;AAChB,YAAM,MAAM,cAAc,IAAI,EAAE;AAChC,UAAI,QAAQ,EAAG,QAAO,KAAK,cAAc,QAAQ,MAAM,CAAC;AAAA,IAC1D;AACA,WAAO,IAAI;AAAA,EACb,CAAC;AAED,SAAO;AACT;AAKO,SAAS,iBACd,WACA,SACA,cACiB;AACjB,MAAI,QAAQ,WAAW,EAAG,QAAO;AAEjC,QAAM,UAAoB,CAAC;AAC3B,WAAS,IAAI,GAAG,IAAI,WAAW,KAAK;AAClC,QAAI,OAAO;AACX,eAAW,UAAU,SAAS;AAC5B,YAAM,MAAM,aAAa,IAAI,OAAO,MAAM;AAC1C,UAAI,CAAC,IAAK;AACV,UAAI,CAAC,cAAc,IAAI,CAAC,GAAG,MAAM,GAAG;AAClC,eAAO;AACP;AAAA,MACF;AAAA,IACF;AACA,QAAI,KAAM,SAAQ,KAAK,CAAC;AAAA,EAC1B;AACA,SAAO;AACT;AAMO,SAAS,YACd,YACA,QACA,cACa;AACb,MAAI,OAAO,WAAW,EAAG,QAAO,CAAC;AAEjC,QAAM,aAAa,OAAO,CAAC;AAC3B,QAAM,MAAM,aAAa,IAAI,WAAW,MAAM;AAC9C,MAAI,CAAC,IAAK,QAAO,CAAC;AAIlB,QAAM,UAAU,oBAAI,IAAsB;AAC1C,QAAM,YAAY,oBAAI,IAAqB;AAE3C,WAAS,aAAa,GAAG,aAAa,WAAW,QAAQ,cAAc;AACrE,UAAM,YAAY,WAAW,UAAU;AACvC,UAAM,QAAQ,IAAI,SAAS;AAC3B,UAAM,MAAM,eAAe,KAAK;AAChC,QAAI,CAAC,QAAQ,IAAI,GAAG,GAAG;AACrB,cAAQ,IAAI,KAAK,CAAC,CAAC;AACnB,gBAAU,IAAI,KAAK,KAAK;AAAA,IAC1B;AACA,YAAQ,IAAI,GAAG,EAAG,KAAK,UAAU;AAAA,EACnC;AAGA,QAAM,aAAa,CAAC,GAAG,QAAQ,KAAK,CAAC,EAAE,KAAK,CAAC,GAAG,MAAM;AACpD,UAAM,KAAK,UAAU,IAAI,CAAC;AAC1B,UAAM,KAAK,UAAU,IAAI,CAAC;AAC1B,WAAO,cAAc,IAAI,EAAE;AAAA,EAC7B,CAAC;AAED,SAAO,WAAW,IAAI,CAAC,SAAS;AAAA,IAC9B,KAAK,UAAU,IAAI,GAAG;AAAA,IACtB,OAAO;AAAA,IACP,OAAO,QAAQ,IAAI,GAAG,EAAG;AAAA,IACzB,YAAY,QAAQ,IAAI,GAAG;AAAA,IAC3B,UAAU;AAAA,EACZ,EAAE;AACJ;AAMO,SAAS,oBACd,QACA,QAAgB,KACL;AACX,QAAM,OAAO,oBAAI,IAAY;AAC7B,QAAM,SAAoB,CAAC;AAE3B,aAAW,KAAK,QAAQ;AACtB,UAAM,MAAM,eAAe,CAAC;AAC5B,QAAI,CAAC,KAAK,IAAI,GAAG,GAAG;AAClB,WAAK,IAAI,GAAG;AACZ,aAAO,KAAK,CAAC;AACb,UAAI,OAAO,UAAU,MAAO;AAAA,IAC9B;AAAA,EACF;AAEA,SAAO,KAAK,CAAC,GAAG,MAAM,cAAc,GAAG,CAAC,CAAC;AACzC,SAAO;AACT;AAIA,SAAS,cAAc,GAAY,GAAoB;AAErD,MAAI,MAAM,QAAQ,MAAM,OAAW,QAAO,MAAM,QAAQ,MAAM,SAAY,IAAI;AAC9E,MAAI,MAAM,QAAQ,MAAM,OAAW,QAAO;AAE1C,MAAI,OAAO,MAAM,YAAY,OAAO,MAAM,SAAU,QAAO,IAAI;AAC/D,MAAI,OAAO,MAAM,YAAY,OAAO,MAAM,SAAU,QAAO,IAAI,IAAI,KAAK,IAAI,IAAI,IAAI;AACpF,MAAI,OAAO,MAAM,aAAa,OAAO,MAAM,UAAW,QAAO,OAAO,CAAC,IAAI,OAAO,CAAC;AAGjF,MAAI,aAAa,QAAQ,aAAa,KAAM,QAAO,EAAE,QAAQ,IAAI,EAAE,QAAQ;AAG3E,SAAO,OAAO,CAAC,EAAE,cAAc,OAAO,CAAC,CAAC;AAC1C;AAEA,SAAS,cAAc,OAAgB,QAA4B;AACjE,UAAQ,OAAO,UAAU;AAAA,IACvB,KAAK;AACH,aAAO,UAAU,QAAQ,UAAU;AAAA,IACrC,KAAK;AACH,aAAO,UAAU,QAAQ,UAAU;AAAA,IACrC,KAAK;AACH,aAAO,UAAU,OAAO;AAAA,IAC1B,KAAK;AACH,aAAO,UAAU,OAAO;AAAA,IAC1B,KAAK;AACH,aAAO,cAAc,OAAO,OAAO,KAAK,IAAI;AAAA,IAC9C,KAAK;AACH,aAAO,cAAc,OAAO,OAAO,KAAK,KAAK;AAAA,IAC/C,KAAK;AACH,aAAO,cAAc,OAAO,OAAO,KAAK,IAAI;AAAA,IAC9C,KAAK;AACH,aAAO,cAAc,OAAO,OAAO,KAAK,KAAK;AAAA,IAC/C,KAAK;AACH,aAAO,OAAO,KAAK,EAAE,YAAY,EAAE,SAAS,OAAO,OAAO,KAAK,EAAE,YAAY,CAAC;AAAA,IAChF,KAAK;AACH,aAAO,CAAC,OAAO,KAAK,EAAE,YAAY,EAAE,SAAS,OAAO,OAAO,KAAK,EAAE,YAAY,CAAC;AAAA,IACjF,KAAK;AACH,UAAI,MAAM,QAAQ,OAAO,KAAK,GAAG;AAC/B,cAAM,MAAM,IAAI,IAAI,OAAO,MAAM,IAAI,MAAM,CAAC;AAC5C,eAAO,IAAI,IAAI,OAAO,KAAK,CAAC;AAAA,MAC9B;AACA,aAAO;AAAA,IACT;AACE,aAAO;AAAA,EACX;AACF;AAEA,SAAS,eAAe,OAAwB;AAC9C,MAAI,UAAU,QAAQ,UAAU,OAAW,QAAO;AAClD,MAAI,iBAAiB,KAAM,QAAO,MAAM,YAAY;AACpD,SAAO,OAAO,KAAK;AACrB;","names":[]}
@@ -0,0 +1,149 @@
1
+ interface ParquetColumn {
2
+ name: string;
3
+ type: string;
4
+ nullable: boolean;
5
+ }
6
+ interface ParquetMetadata {
7
+ rowCount: number;
8
+ columns: ParquetColumn[];
9
+ rowGroups: number;
10
+ /**
11
+ * Starting row index for each row group.
12
+ * Length = rowGroups + 1 (last element = rowCount).
13
+ * Row group i contains rows [rowGroupOffsets[i], rowGroupOffsets[i+1]).
14
+ */
15
+ rowGroupOffsets: number[];
16
+ createdBy?: string;
17
+ }
18
+ interface ParquetRow {
19
+ [key: string]: unknown;
20
+ }
21
+ interface ParquetData {
22
+ metadata: ParquetMetadata;
23
+ rows: ParquetRow[];
24
+ }
25
+ interface ReadOptions {
26
+ columns?: string[];
27
+ offset?: number;
28
+ limit?: number;
29
+ }
30
+ /**
31
+ * A handle to a parquet source that supports on-demand reading.
32
+ * For small files this wraps an ArrayBuffer; for large files / URLs
33
+ * it reads slices from the underlying File or via HTTP Range requests.
34
+ */
35
+ interface ParquetSource {
36
+ /** Total byte length of the file */
37
+ byteLength: number;
38
+ /** Read a slice of the file. May be sync (ArrayBuffer) or async. */
39
+ slice(start: number, end?: number): ArrayBuffer | Promise<ArrayBuffer>;
40
+ }
41
+
42
+ /**
43
+ * Create a ParquetSource from a browser File object.
44
+ * Does NOT load the file into memory — reads slices on demand.
45
+ * Works with files of any size (including multi-GB).
46
+ */
47
+ declare function sourceFromFile(file: File): ParquetSource;
48
+ /**
49
+ * Create a ParquetSource from an ArrayBuffer (file already in memory).
50
+ */
51
+ declare function sourceFromBuffer(buffer: ArrayBuffer): ParquetSource;
52
+ /**
53
+ * Create a ParquetSource from a URL using HTTP Range requests.
54
+ */
55
+ declare function sourceFromUrl(url: string): Promise<ParquetSource>;
56
+ /**
57
+ * Read only the metadata from a parquet source.
58
+ * Reads a small amount from the end of the file (footer).
59
+ */
60
+ declare function readParquetMetadata(source: ParquetSource): Promise<ParquetMetadata>;
61
+ /**
62
+ * Read data (multiple columns, row range) from a parquet source.
63
+ * Only reads the requested row range — does NOT load the entire file.
64
+ */
65
+ declare function readParquetData(source: ParquetSource, options?: ReadOptions): Promise<ParquetData>;
66
+ /**
67
+ * Read ALL values of specific columns from the parquet source.
68
+ * Used for building sort indices, group keys, and filter unique values.
69
+ * Only reads the requested columns — not all data.
70
+ *
71
+ * Reads row-group by row-group with yields between each to avoid
72
+ * blocking the main thread. This keeps the UI responsive (scroll, etc.)
73
+ * during long reads on large files.
74
+ *
75
+ * @param isCancelled — optional callback checked between row groups.
76
+ * If it returns true, reading stops early and partial results are returned.
77
+ * This allows quick cancellation when the user changes sort/filter mid-read.
78
+ *
79
+ * Returns a Map of column name → array of values (one per row).
80
+ */
81
+ declare function readColumnValues(source: ParquetSource, columnNames: string[], isCancelled?: () => boolean): Promise<Map<string, unknown[]>>;
82
+ /**
83
+ * Read values of specific columns for specific row indices.
84
+ * Useful for fetching display data for sorted/filtered views.
85
+ */
86
+ declare function readRowsByIndices(source: ParquetSource, rowIndices: number[], columns?: string[]): Promise<ParquetRow[]>;
87
+ /** @deprecated Use sourceFromFile() instead */
88
+ declare function fileToArrayBuffer(file: File): Promise<ArrayBuffer>;
89
+ /** @deprecated Use sourceFromUrl() instead */
90
+ declare function fetchParquetFromUrl(url: string): Promise<ArrayBuffer>;
91
+
92
+ /**
93
+ * Data pipeline: sorting, filtering, grouping.
94
+ * Operates on column-level indices — never loads all columns at once.
95
+ */
96
+ type SortDirection = "asc" | "desc";
97
+ interface SortDef {
98
+ column: string;
99
+ direction: SortDirection;
100
+ }
101
+ type FilterOperator = "eq" | "neq" | "gt" | "gte" | "lt" | "lte" | "contains" | "not_contains" | "is_null" | "is_not_null" | "in";
102
+ interface FilterDef {
103
+ column: string;
104
+ operator: FilterOperator;
105
+ /** The value(s) to compare against. For "in" — an array; for "is_null"/"is_not_null" — ignored. */
106
+ value?: unknown;
107
+ }
108
+ interface GroupDef {
109
+ column: string;
110
+ }
111
+ interface GroupNode {
112
+ /** The value of the group key */
113
+ key: unknown;
114
+ /** Display label for this group */
115
+ label: string;
116
+ /** Number of rows in this group */
117
+ count: number;
118
+ /** Row indices belonging to this group (in the filtered/sorted order) */
119
+ rowIndices: number[];
120
+ /** Whether this group is expanded in the UI */
121
+ expanded: boolean;
122
+ }
123
+ interface PipelineState {
124
+ sorts: SortDef[];
125
+ filters: FilterDef[];
126
+ groups: GroupDef[];
127
+ }
128
+ declare function createEmptyPipeline(): PipelineState;
129
+ /**
130
+ * Build a sorted index from column values.
131
+ * Returns an array of original row indices in the sorted order.
132
+ */
133
+ declare function buildSortIndex(values: unknown[], sorts: SortDef[], columnValues: Map<string, unknown[]>): number[];
134
+ /**
135
+ * Apply filters to produce a set of passing row indices.
136
+ */
137
+ declare function buildFilterIndex(totalRows: number, filters: FilterDef[], columnValues: Map<string, unknown[]>): number[] | null;
138
+ /**
139
+ * Build group nodes from column values.
140
+ * Supports multi-level grouping (first group column → second → etc.)
141
+ */
142
+ declare function buildGroups(rowIndices: number[], groups: GroupDef[], columnValues: Map<string, unknown[]>): GroupNode[];
143
+ /**
144
+ * Collect unique values from a column (up to a limit).
145
+ * Returns sorted unique values.
146
+ */
147
+ declare function collectUniqueValues(values: unknown[], limit?: number): unknown[];
148
+
149
+ export { type FilterDef, type FilterOperator, type GroupDef, type GroupNode, type ParquetColumn, type ParquetData, type ParquetMetadata, type ParquetRow, type ParquetSource, type PipelineState, type ReadOptions, type SortDef, type SortDirection, buildFilterIndex, buildGroups, buildSortIndex, collectUniqueValues, createEmptyPipeline, fetchParquetFromUrl, fileToArrayBuffer, readColumnValues, readParquetData, readParquetMetadata, readRowsByIndices, sourceFromBuffer, sourceFromFile, sourceFromUrl };
@@ -0,0 +1,149 @@
1
+ interface ParquetColumn {
2
+ name: string;
3
+ type: string;
4
+ nullable: boolean;
5
+ }
6
+ interface ParquetMetadata {
7
+ rowCount: number;
8
+ columns: ParquetColumn[];
9
+ rowGroups: number;
10
+ /**
11
+ * Starting row index for each row group.
12
+ * Length = rowGroups + 1 (last element = rowCount).
13
+ * Row group i contains rows [rowGroupOffsets[i], rowGroupOffsets[i+1]).
14
+ */
15
+ rowGroupOffsets: number[];
16
+ createdBy?: string;
17
+ }
18
+ interface ParquetRow {
19
+ [key: string]: unknown;
20
+ }
21
+ interface ParquetData {
22
+ metadata: ParquetMetadata;
23
+ rows: ParquetRow[];
24
+ }
25
+ interface ReadOptions {
26
+ columns?: string[];
27
+ offset?: number;
28
+ limit?: number;
29
+ }
30
+ /**
31
+ * A handle to a parquet source that supports on-demand reading.
32
+ * For small files this wraps an ArrayBuffer; for large files / URLs
33
+ * it reads slices from the underlying File or via HTTP Range requests.
34
+ */
35
+ interface ParquetSource {
36
+ /** Total byte length of the file */
37
+ byteLength: number;
38
+ /** Read a slice of the file. May be sync (ArrayBuffer) or async. */
39
+ slice(start: number, end?: number): ArrayBuffer | Promise<ArrayBuffer>;
40
+ }
41
+
42
+ /**
43
+ * Create a ParquetSource from a browser File object.
44
+ * Does NOT load the file into memory — reads slices on demand.
45
+ * Works with files of any size (including multi-GB).
46
+ */
47
+ declare function sourceFromFile(file: File): ParquetSource;
48
+ /**
49
+ * Create a ParquetSource from an ArrayBuffer (file already in memory).
50
+ */
51
+ declare function sourceFromBuffer(buffer: ArrayBuffer): ParquetSource;
52
+ /**
53
+ * Create a ParquetSource from a URL using HTTP Range requests.
54
+ */
55
+ declare function sourceFromUrl(url: string): Promise<ParquetSource>;
56
+ /**
57
+ * Read only the metadata from a parquet source.
58
+ * Reads a small amount from the end of the file (footer).
59
+ */
60
+ declare function readParquetMetadata(source: ParquetSource): Promise<ParquetMetadata>;
61
+ /**
62
+ * Read data (multiple columns, row range) from a parquet source.
63
+ * Only reads the requested row range — does NOT load the entire file.
64
+ */
65
+ declare function readParquetData(source: ParquetSource, options?: ReadOptions): Promise<ParquetData>;
66
+ /**
67
+ * Read ALL values of specific columns from the parquet source.
68
+ * Used for building sort indices, group keys, and filter unique values.
69
+ * Only reads the requested columns — not all data.
70
+ *
71
+ * Reads row-group by row-group with yields between each to avoid
72
+ * blocking the main thread. This keeps the UI responsive (scroll, etc.)
73
+ * during long reads on large files.
74
+ *
75
+ * @param isCancelled — optional callback checked between row groups.
76
+ * If it returns true, reading stops early and partial results are returned.
77
+ * This allows quick cancellation when the user changes sort/filter mid-read.
78
+ *
79
+ * Returns a Map of column name → array of values (one per row).
80
+ */
81
+ declare function readColumnValues(source: ParquetSource, columnNames: string[], isCancelled?: () => boolean): Promise<Map<string, unknown[]>>;
82
+ /**
83
+ * Read values of specific columns for specific row indices.
84
+ * Useful for fetching display data for sorted/filtered views.
85
+ */
86
+ declare function readRowsByIndices(source: ParquetSource, rowIndices: number[], columns?: string[]): Promise<ParquetRow[]>;
87
+ /** @deprecated Use sourceFromFile() instead */
88
+ declare function fileToArrayBuffer(file: File): Promise<ArrayBuffer>;
89
+ /** @deprecated Use sourceFromUrl() instead */
90
+ declare function fetchParquetFromUrl(url: string): Promise<ArrayBuffer>;
91
+
92
+ /**
93
+ * Data pipeline: sorting, filtering, grouping.
94
+ * Operates on column-level indices — never loads all columns at once.
95
+ */
96
+ type SortDirection = "asc" | "desc";
97
+ interface SortDef {
98
+ column: string;
99
+ direction: SortDirection;
100
+ }
101
+ type FilterOperator = "eq" | "neq" | "gt" | "gte" | "lt" | "lte" | "contains" | "not_contains" | "is_null" | "is_not_null" | "in";
102
+ interface FilterDef {
103
+ column: string;
104
+ operator: FilterOperator;
105
+ /** The value(s) to compare against. For "in" — an array; for "is_null"/"is_not_null" — ignored. */
106
+ value?: unknown;
107
+ }
108
+ interface GroupDef {
109
+ column: string;
110
+ }
111
+ interface GroupNode {
112
+ /** The value of the group key */
113
+ key: unknown;
114
+ /** Display label for this group */
115
+ label: string;
116
+ /** Number of rows in this group */
117
+ count: number;
118
+ /** Row indices belonging to this group (in the filtered/sorted order) */
119
+ rowIndices: number[];
120
+ /** Whether this group is expanded in the UI */
121
+ expanded: boolean;
122
+ }
123
+ interface PipelineState {
124
+ sorts: SortDef[];
125
+ filters: FilterDef[];
126
+ groups: GroupDef[];
127
+ }
128
+ declare function createEmptyPipeline(): PipelineState;
129
+ /**
130
+ * Build a sorted index from column values.
131
+ * Returns an array of original row indices in the sorted order.
132
+ */
133
+ declare function buildSortIndex(values: unknown[], sorts: SortDef[], columnValues: Map<string, unknown[]>): number[];
134
+ /**
135
+ * Apply filters to produce a set of passing row indices.
136
+ */
137
+ declare function buildFilterIndex(totalRows: number, filters: FilterDef[], columnValues: Map<string, unknown[]>): number[] | null;
138
+ /**
139
+ * Build group nodes from column values.
140
+ * Supports multi-level grouping (first group column → second → etc.)
141
+ */
142
+ declare function buildGroups(rowIndices: number[], groups: GroupDef[], columnValues: Map<string, unknown[]>): GroupNode[];
143
+ /**
144
+ * Collect unique values from a column (up to a limit).
145
+ * Returns sorted unique values.
146
+ */
147
+ declare function collectUniqueValues(values: unknown[], limit?: number): unknown[];
148
+
149
+ export { type FilterDef, type FilterOperator, type GroupDef, type GroupNode, type ParquetColumn, type ParquetData, type ParquetMetadata, type ParquetRow, type ParquetSource, type PipelineState, type ReadOptions, type SortDef, type SortDirection, buildFilterIndex, buildGroups, buildSortIndex, collectUniqueValues, createEmptyPipeline, fetchParquetFromUrl, fileToArrayBuffer, readColumnValues, readParquetData, readParquetMetadata, readRowsByIndices, sourceFromBuffer, sourceFromFile, sourceFromUrl };
package/dist/index.js ADDED
@@ -0,0 +1,422 @@
1
+ // src/reader.ts
2
+ import {
3
+ parquetMetadataAsync,
4
+ parquetRead,
5
+ asyncBufferFromUrl
6
+ } from "hyparquet";
7
+ var PARQUI_DEBUG_NS = "[parqui/core]";
8
+ function coreDebugEnabled() {
9
+ const g = globalThis;
10
+ return g?.__PARQUI_DEBUG ?? false;
11
+ }
12
+ function coreLog(message, details) {
13
+ if (!coreDebugEnabled()) return;
14
+ if (details !== void 0) {
15
+ console.log(`${PARQUI_DEBUG_NS} ${message}`, details);
16
+ } else {
17
+ console.log(`${PARQUI_DEBUG_NS} ${message}`);
18
+ }
19
+ }
20
+ function coreError(message, error) {
21
+ if (!coreDebugEnabled()) return;
22
+ console.error(`${PARQUI_DEBUG_NS} ${message}`, error);
23
+ }
24
+ function makeSliceablePromise(input) {
25
+ const promise = Promise.resolve(input);
26
+ promise.slice = (start, end) => promise.then((buffer) => buffer.slice(start, end));
27
+ return promise;
28
+ }
29
+ var normalizedSourceCache = /* @__PURE__ */ new WeakMap();
30
+ function normalizeSource(source) {
31
+ const cached = normalizedSourceCache.get(source);
32
+ if (cached) return cached;
33
+ const wrapped = {
34
+ byteLength: source.byteLength,
35
+ slice(start, end) {
36
+ const p = makeSliceablePromise(
37
+ Promise.resolve(source.slice(start, end))
38
+ );
39
+ coreLog("normalizeSource:slice", {
40
+ start,
41
+ end: end ?? source.byteLength,
42
+ hasSliceMethod: typeof p.slice === "function"
43
+ });
44
+ return p;
45
+ }
46
+ };
47
+ normalizedSourceCache.set(source, wrapped);
48
+ return wrapped;
49
+ }
50
+ function sourceFromFile(file) {
51
+ coreLog("sourceFromFile:create", { size: file.size });
52
+ return {
53
+ byteLength: file.size,
54
+ slice(start, end) {
55
+ const blob = file.slice(start, end);
56
+ const p = makeSliceablePromise(blob.arrayBuffer());
57
+ coreLog("sourceFromFile:slice", {
58
+ start,
59
+ end: end ?? file.size,
60
+ hasSliceMethod: typeof p.slice === "function"
61
+ });
62
+ return p;
63
+ }
64
+ };
65
+ }
66
+ function sourceFromBuffer(buffer) {
67
+ coreLog("sourceFromBuffer:create", { byteLength: buffer.byteLength });
68
+ return {
69
+ byteLength: buffer.byteLength,
70
+ slice(start, end) {
71
+ const p = makeSliceablePromise(
72
+ Promise.resolve(buffer.slice(start, end))
73
+ );
74
+ coreLog("sourceFromBuffer:slice", {
75
+ start,
76
+ end: end ?? buffer.byteLength,
77
+ hasSliceMethod: typeof p.slice === "function"
78
+ });
79
+ return p;
80
+ }
81
+ };
82
+ }
83
+ async function sourceFromUrl(url) {
84
+ coreLog("sourceFromUrl:start", { url });
85
+ const asyncBuf = await asyncBufferFromUrl({ url });
86
+ const wrapped = {
87
+ byteLength: asyncBuf.byteLength,
88
+ slice(start, end) {
89
+ const p = makeSliceablePromise(
90
+ Promise.resolve(asyncBuf.slice(start, end))
91
+ );
92
+ coreLog("sourceFromUrl:slice", {
93
+ start,
94
+ end: end ?? asyncBuf.byteLength,
95
+ hasSliceMethod: typeof p.slice === "function"
96
+ });
97
+ return p;
98
+ }
99
+ };
100
+ coreLog("sourceFromUrl:ready", { byteLength: wrapped.byteLength });
101
+ return wrapped;
102
+ }
103
+ var asyncBufferCache = /* @__PURE__ */ new WeakMap();
104
+ var rawMetadataCache = /* @__PURE__ */ new WeakMap();
105
+ function getAsyncBuffer(source) {
106
+ const safeSource = normalizeSource(source);
107
+ let buf = asyncBufferCache.get(safeSource);
108
+ if (!buf) {
109
+ buf = safeSource;
110
+ asyncBufferCache.set(safeSource, buf);
111
+ }
112
+ return buf;
113
+ }
114
+ function getRawMetadata(source) {
115
+ const safeSource = normalizeSource(source);
116
+ let promise = rawMetadataCache.get(safeSource);
117
+ if (!promise) {
118
+ const asyncBuf = getAsyncBuffer(safeSource);
119
+ promise = parquetMetadataAsync(asyncBuf);
120
+ rawMetadataCache.set(safeSource, promise);
121
+ }
122
+ return promise;
123
+ }
124
+ function toParquetMetadata(raw) {
125
+ const columns = raw.schema.slice(1).map((col) => ({
126
+ name: col.name,
127
+ type: col.type ?? "UNKNOWN",
128
+ nullable: col.repetition_type !== "REQUIRED"
129
+ }));
130
+ const rowGroupOffsets = [0];
131
+ for (const rg of raw.row_groups) {
132
+ rowGroupOffsets.push(rowGroupOffsets[rowGroupOffsets.length - 1] + Number(rg.num_rows));
133
+ }
134
+ return {
135
+ rowCount: Number(raw.num_rows),
136
+ columns,
137
+ rowGroups: raw.row_groups.length,
138
+ rowGroupOffsets,
139
+ createdBy: raw.created_by ?? void 0
140
+ };
141
+ }
142
+ async function readParquetMetadata(source) {
143
+ const raw = await getRawMetadata(source);
144
+ return toParquetMetadata(raw);
145
+ }
146
+ async function readParquetData(source, options = {}) {
147
+ coreLog("readParquetData:start", {
148
+ offset: options.offset ?? 0,
149
+ limit: options.limit ?? null,
150
+ columns: options.columns?.length ?? "all"
151
+ });
152
+ const asyncBuf = getAsyncBuffer(source);
153
+ const rawMetadata = await getRawMetadata(source);
154
+ const metadata = toParquetMetadata(rawMetadata);
155
+ const rows = [];
156
+ try {
157
+ await parquetRead({
158
+ file: asyncBuf,
159
+ metadata: rawMetadata,
160
+ columns: options.columns,
161
+ rowStart: options.offset ?? 0,
162
+ rowEnd: options.limit !== void 0 ? (options.offset ?? 0) + options.limit : void 0,
163
+ onComplete: (data) => {
164
+ const columnNames = options.columns ?? metadata.columns.map((c) => c.name);
165
+ for (const row of data) {
166
+ const obj = {};
167
+ columnNames.forEach((name, i) => {
168
+ obj[name] = row[i];
169
+ });
170
+ rows.push(obj);
171
+ }
172
+ }
173
+ });
174
+ } catch (error) {
175
+ coreError("readParquetData:error", error);
176
+ throw error;
177
+ }
178
+ return { metadata, rows };
179
+ }
180
+ async function readColumnValues(source, columnNames, isCancelled) {
181
+ const asyncBuf = getAsyncBuffer(source);
182
+ const rawMetadata = await getRawMetadata(source);
183
+ const result = /* @__PURE__ */ new Map();
184
+ for (const name of columnNames) {
185
+ result.set(name, []);
186
+ }
187
+ let rowOffset = 0;
188
+ for (const rg of rawMetadata.row_groups) {
189
+ if (isCancelled?.()) return result;
190
+ const rgRows = Number(rg.num_rows);
191
+ await parquetRead({
192
+ file: asyncBuf,
193
+ metadata: rawMetadata,
194
+ columns: columnNames,
195
+ rowStart: rowOffset,
196
+ rowEnd: rowOffset + rgRows,
197
+ onComplete: (data) => {
198
+ for (let rowIdx = 0; rowIdx < data.length; rowIdx++) {
199
+ const row = data[rowIdx];
200
+ for (let colIdx = 0; colIdx < columnNames.length; colIdx++) {
201
+ result.get(columnNames[colIdx]).push(row[colIdx]);
202
+ }
203
+ }
204
+ }
205
+ });
206
+ rowOffset += rgRows;
207
+ await new Promise((resolve) => setTimeout(resolve, 0));
208
+ }
209
+ return result;
210
+ }
211
+ async function readRowsByIndices(source, rowIndices, columns) {
212
+ if (rowIndices.length === 0) return [];
213
+ const asyncBuf = getAsyncBuffer(source);
214
+ const rawMetadata = await getRawMetadata(source);
215
+ const metadata = toParquetMetadata(rawMetadata);
216
+ const columnNames = columns ?? metadata.columns.map((c) => c.name);
217
+ const sorted = rowIndices.map((idx, pos) => ({ idx, pos }));
218
+ sorted.sort((a, b) => a.idx - b.idx);
219
+ const ranges = [];
220
+ let rangeStart = sorted[0].idx;
221
+ let rangeEnd = sorted[0].idx + 1;
222
+ let positions = [sorted[0].pos];
223
+ for (let i = 1; i < sorted.length; i++) {
224
+ if (sorted[i].idx <= rangeEnd + 50) {
225
+ rangeEnd = sorted[i].idx + 1;
226
+ positions.push(sorted[i].pos);
227
+ } else {
228
+ ranges.push({ start: rangeStart, end: rangeEnd, positions });
229
+ rangeStart = sorted[i].idx;
230
+ rangeEnd = sorted[i].idx + 1;
231
+ positions = [sorted[i].pos];
232
+ }
233
+ }
234
+ ranges.push({ start: rangeStart, end: rangeEnd, positions });
235
+ const result = new Array(rowIndices.length);
236
+ for (const range of ranges) {
237
+ const rangeRows = [];
238
+ await parquetRead({
239
+ file: asyncBuf,
240
+ metadata: rawMetadata,
241
+ columns: columnNames,
242
+ rowStart: range.start,
243
+ rowEnd: range.end,
244
+ onComplete: (data) => {
245
+ for (const row of data) {
246
+ const obj = {};
247
+ columnNames.forEach((name, i) => {
248
+ obj[name] = row[i];
249
+ });
250
+ rangeRows.push(obj);
251
+ }
252
+ }
253
+ });
254
+ let sortedPosIdx = 0;
255
+ for (const s of sorted) {
256
+ if (s.idx >= range.start && s.idx < range.end) {
257
+ const localIdx = s.idx - range.start;
258
+ if (localIdx < rangeRows.length) {
259
+ result[s.pos] = rangeRows[localIdx];
260
+ }
261
+ sortedPosIdx++;
262
+ }
263
+ }
264
+ }
265
+ return result;
266
+ }
267
+ async function fileToArrayBuffer(file) {
268
+ return file.arrayBuffer();
269
+ }
270
+ async function fetchParquetFromUrl(url) {
271
+ const response = await fetch(url);
272
+ if (!response.ok) {
273
+ throw new Error(
274
+ `Failed to fetch parquet file: ${response.status} ${response.statusText}`
275
+ );
276
+ }
277
+ return response.arrayBuffer();
278
+ }
279
+
280
+ // src/pipeline.ts
281
+ function createEmptyPipeline() {
282
+ return { sorts: [], filters: [], groups: [] };
283
+ }
284
+ function buildSortIndex(values, sorts, columnValues) {
285
+ const indices = Array.from({ length: values.length }, (_, i) => i);
286
+ indices.sort((a, b) => {
287
+ for (const sort of sorts) {
288
+ const col = columnValues.get(sort.column);
289
+ if (!col) continue;
290
+ const va = col[a];
291
+ const vb = col[b];
292
+ const cmp = compareValues(va, vb);
293
+ if (cmp !== 0) return sort.direction === "asc" ? cmp : -cmp;
294
+ }
295
+ return a - b;
296
+ });
297
+ return indices;
298
+ }
299
+ function buildFilterIndex(totalRows, filters, columnValues) {
300
+ if (filters.length === 0) return null;
301
+ const passing = [];
302
+ for (let i = 0; i < totalRows; i++) {
303
+ let pass = true;
304
+ for (const filter of filters) {
305
+ const col = columnValues.get(filter.column);
306
+ if (!col) continue;
307
+ if (!matchesFilter(col[i], filter)) {
308
+ pass = false;
309
+ break;
310
+ }
311
+ }
312
+ if (pass) passing.push(i);
313
+ }
314
+ return passing;
315
+ }
316
+ function buildGroups(rowIndices, groups, columnValues) {
317
+ if (groups.length === 0) return [];
318
+ const firstGroup = groups[0];
319
+ const col = columnValues.get(firstGroup.column);
320
+ if (!col) return [];
321
+ const buckets = /* @__PURE__ */ new Map();
322
+ const keyLabels = /* @__PURE__ */ new Map();
323
+ for (let displayIdx = 0; displayIdx < rowIndices.length; displayIdx++) {
324
+ const sourceIdx = rowIndices[displayIdx];
325
+ const value = col[sourceIdx];
326
+ const key = formatGroupKey(value);
327
+ if (!buckets.has(key)) {
328
+ buckets.set(key, []);
329
+ keyLabels.set(key, value);
330
+ }
331
+ buckets.get(key).push(displayIdx);
332
+ }
333
+ const sortedKeys = [...buckets.keys()].sort((a, b) => {
334
+ const va = keyLabels.get(a);
335
+ const vb = keyLabels.get(b);
336
+ return compareValues(va, vb);
337
+ });
338
+ return sortedKeys.map((key) => ({
339
+ key: keyLabels.get(key),
340
+ label: key,
341
+ count: buckets.get(key).length,
342
+ rowIndices: buckets.get(key),
343
+ expanded: false
344
+ }));
345
+ }
346
+ function collectUniqueValues(values, limit = 500) {
347
+ const seen = /* @__PURE__ */ new Set();
348
+ const unique = [];
349
+ for (const v of values) {
350
+ const key = formatGroupKey(v);
351
+ if (!seen.has(key)) {
352
+ seen.add(key);
353
+ unique.push(v);
354
+ if (unique.length >= limit) break;
355
+ }
356
+ }
357
+ unique.sort((a, b) => compareValues(a, b));
358
+ return unique;
359
+ }
360
+ function compareValues(a, b) {
361
+ if (a === null || a === void 0) return b === null || b === void 0 ? 0 : 1;
362
+ if (b === null || b === void 0) return -1;
363
+ if (typeof a === "number" && typeof b === "number") return a - b;
364
+ if (typeof a === "bigint" && typeof b === "bigint") return a < b ? -1 : a > b ? 1 : 0;
365
+ if (typeof a === "boolean" && typeof b === "boolean") return Number(a) - Number(b);
366
+ if (a instanceof Date && b instanceof Date) return a.getTime() - b.getTime();
367
+ return String(a).localeCompare(String(b));
368
+ }
369
+ function matchesFilter(value, filter) {
370
+ switch (filter.operator) {
371
+ case "is_null":
372
+ return value === null || value === void 0;
373
+ case "is_not_null":
374
+ return value !== null && value !== void 0;
375
+ case "eq":
376
+ return value === filter.value;
377
+ case "neq":
378
+ return value !== filter.value;
379
+ case "gt":
380
+ return compareValues(value, filter.value) > 0;
381
+ case "gte":
382
+ return compareValues(value, filter.value) >= 0;
383
+ case "lt":
384
+ return compareValues(value, filter.value) < 0;
385
+ case "lte":
386
+ return compareValues(value, filter.value) <= 0;
387
+ case "contains":
388
+ return String(value).toLowerCase().includes(String(filter.value).toLowerCase());
389
+ case "not_contains":
390
+ return !String(value).toLowerCase().includes(String(filter.value).toLowerCase());
391
+ case "in":
392
+ if (Array.isArray(filter.value)) {
393
+ const set = new Set(filter.value.map(String));
394
+ return set.has(String(value));
395
+ }
396
+ return false;
397
+ default:
398
+ return true;
399
+ }
400
+ }
401
+ function formatGroupKey(value) {
402
+ if (value === null || value === void 0) return "(null)";
403
+ if (value instanceof Date) return value.toISOString();
404
+ return String(value);
405
+ }
406
+ export {
407
+ buildFilterIndex,
408
+ buildGroups,
409
+ buildSortIndex,
410
+ collectUniqueValues,
411
+ createEmptyPipeline,
412
+ fetchParquetFromUrl,
413
+ fileToArrayBuffer,
414
+ readColumnValues,
415
+ readParquetData,
416
+ readParquetMetadata,
417
+ readRowsByIndices,
418
+ sourceFromBuffer,
419
+ sourceFromFile,
420
+ sourceFromUrl
421
+ };
422
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/reader.ts","../src/pipeline.ts"],"sourcesContent":["import {\n parquetMetadataAsync,\n parquetRead,\n asyncBufferFromUrl,\n} from \"hyparquet\";\nimport type { AsyncBuffer, FileMetaData } from \"hyparquet\";\nimport type {\n ParquetColumn,\n ParquetData,\n ParquetMetadata,\n ParquetRow,\n ParquetSource,\n ReadOptions,\n} from \"./types.js\";\n\nconst PARQUI_DEBUG_NS = \"[parqui/core]\";\n\nfunction coreDebugEnabled(): boolean {\n const g = globalThis as { __PARQUI_DEBUG?: boolean } | undefined;\n return g?.__PARQUI_DEBUG ?? false;\n}\n\nfunction coreLog(message: string, details?: unknown) {\n if (!coreDebugEnabled()) return;\n if (details !== undefined) {\n console.log(`${PARQUI_DEBUG_NS} ${message}`, details);\n } else {\n console.log(`${PARQUI_DEBUG_NS} ${message}`);\n }\n}\n\nfunction coreError(message: string, error: unknown) {\n if (!coreDebugEnabled()) return;\n console.error(`${PARQUI_DEBUG_NS} ${message}`, error);\n}\n\ntype SliceablePromise = Promise<ArrayBuffer> & {\n slice: (start: number, end?: number) => Promise<ArrayBuffer>;\n};\n\nfunction makeSliceablePromise(input: Promise<ArrayBuffer>): SliceablePromise {\n const promise = Promise.resolve(input) as SliceablePromise;\n promise.slice = (start: number, end?: number) =>\n promise.then((buffer) => buffer.slice(start, end));\n return promise;\n}\n\nconst normalizedSourceCache = new WeakMap<ParquetSource, ParquetSource>();\n\nfunction normalizeSource(source: ParquetSource): ParquetSource {\n const cached = normalizedSourceCache.get(source);\n if (cached) return cached;\n\n const wrapped: ParquetSource = {\n byteLength: source.byteLength,\n slice(start: number, end?: number): Promise<ArrayBuffer> {\n const p = makeSliceablePromise(\n Promise.resolve(source.slice(start, end)),\n );\n coreLog(\"normalizeSource:slice\", {\n start,\n end: end ?? source.byteLength,\n hasSliceMethod: typeof (p as { slice?: unknown }).slice === \"function\",\n });\n return p;\n },\n };\n\n normalizedSourceCache.set(source, wrapped);\n return wrapped;\n}\n\n// ── Source creation ──\n\n/**\n * Create a ParquetSource from a browser File object.\n * Does NOT load the file into memory — reads slices on demand.\n * Works with files of any size (including multi-GB).\n */\nexport function sourceFromFile(file: File): ParquetSource {\n coreLog(\"sourceFromFile:create\", { size: file.size });\n return {\n byteLength: file.size,\n slice(start: number, end?: number): Promise<ArrayBuffer> {\n const blob = file.slice(start, end);\n // hyparquet may branch on `instanceof Promise` and then call `.slice()` on non-promises.\n // This object works in both branches: it's awaitable AND has a `.slice()` method.\n const p = makeSliceablePromise(blob.arrayBuffer());\n coreLog(\"sourceFromFile:slice\", {\n start,\n end: end ?? file.size,\n hasSliceMethod: typeof (p as { slice?: unknown }).slice === \"function\",\n });\n return p;\n },\n };\n}\n\n/**\n * Create a ParquetSource from an ArrayBuffer (file already in memory).\n */\nexport function sourceFromBuffer(buffer: ArrayBuffer): ParquetSource {\n coreLog(\"sourceFromBuffer:create\", { byteLength: buffer.byteLength });\n return {\n byteLength: buffer.byteLength,\n slice(start: number, end?: number): Promise<ArrayBuffer> {\n const p = makeSliceablePromise(\n Promise.resolve(buffer.slice(start, end)),\n );\n coreLog(\"sourceFromBuffer:slice\", {\n start,\n end: end ?? buffer.byteLength,\n hasSliceMethod: typeof (p as { slice?: unknown }).slice === \"function\",\n });\n return p;\n },\n };\n}\n\n/**\n * Create a ParquetSource from a URL using HTTP Range requests.\n */\nexport async function sourceFromUrl(url: string): Promise<ParquetSource> {\n coreLog(\"sourceFromUrl:start\", { url });\n const asyncBuf = await asyncBufferFromUrl({ url });\n const wrapped: ParquetSource = {\n byteLength: asyncBuf.byteLength,\n slice(start: number, end?: number): Promise<ArrayBuffer> {\n const p = makeSliceablePromise(\n Promise.resolve(asyncBuf.slice(start, end)),\n );\n coreLog(\"sourceFromUrl:slice\", {\n start,\n end: end ?? asyncBuf.byteLength,\n hasSliceMethod: typeof (p as { slice?: unknown }).slice === \"function\",\n });\n return p;\n },\n };\n coreLog(\"sourceFromUrl:ready\", { byteLength: wrapped.byteLength });\n return wrapped;\n}\n\n// ── Internal caching ──\n\nconst asyncBufferCache = new WeakMap<ParquetSource, AsyncBuffer>();\nconst rawMetadataCache = new WeakMap<ParquetSource, Promise<FileMetaData>>();\n\nfunction getAsyncBuffer(source: ParquetSource): AsyncBuffer {\n const safeSource = normalizeSource(source);\n let buf = asyncBufferCache.get(safeSource);\n if (!buf) {\n // Use the direct source object. cachedAsyncBuffer can normalize slice()\n // into plain Promises, which breaks in some zone.js runtimes.\n buf = safeSource as AsyncBuffer;\n asyncBufferCache.set(safeSource, buf);\n }\n return buf;\n}\n\nfunction getRawMetadata(source: ParquetSource): Promise<FileMetaData> {\n const safeSource = normalizeSource(source);\n let promise = rawMetadataCache.get(safeSource);\n if (!promise) {\n const asyncBuf = getAsyncBuffer(safeSource);\n promise = parquetMetadataAsync(asyncBuf);\n rawMetadataCache.set(safeSource, promise);\n }\n return promise;\n}\n\nfunction toParquetMetadata(raw: FileMetaData): ParquetMetadata {\n const columns: ParquetColumn[] = raw.schema.slice(1).map((col) => ({\n name: col.name,\n type: col.type ?? \"UNKNOWN\",\n nullable: col.repetition_type !== \"REQUIRED\",\n }));\n\n // Compute row group boundaries: [0, rg0_rows, rg0+rg1_rows, ..., totalRows]\n const rowGroupOffsets: number[] = [0];\n for (const rg of raw.row_groups) {\n rowGroupOffsets.push(rowGroupOffsets[rowGroupOffsets.length - 1] + Number(rg.num_rows));\n }\n\n return {\n rowCount: Number(raw.num_rows),\n columns,\n rowGroups: raw.row_groups.length,\n rowGroupOffsets,\n createdBy: raw.created_by ?? undefined,\n };\n}\n\n// ── Reading ──\n\n/**\n * Read only the metadata from a parquet source.\n * Reads a small amount from the end of the file (footer).\n */\nexport async function readParquetMetadata(\n source: ParquetSource,\n): Promise<ParquetMetadata> {\n const raw = await getRawMetadata(source);\n return toParquetMetadata(raw);\n}\n\n/**\n * Read data (multiple columns, row range) from a parquet source.\n * Only reads the requested row range — does NOT load the entire file.\n */\nexport async function readParquetData(\n source: ParquetSource,\n options: ReadOptions = {},\n): Promise<ParquetData> {\n coreLog(\"readParquetData:start\", {\n offset: options.offset ?? 0,\n limit: options.limit ?? null,\n columns: options.columns?.length ?? \"all\",\n });\n const asyncBuf = getAsyncBuffer(source);\n const rawMetadata = await getRawMetadata(source);\n const metadata = toParquetMetadata(rawMetadata);\n\n const rows: ParquetRow[] = [];\n\n try {\n await parquetRead({\n file: asyncBuf,\n metadata: rawMetadata,\n columns: options.columns,\n rowStart: options.offset ?? 0,\n rowEnd:\n options.limit !== undefined\n ? (options.offset ?? 0) + options.limit\n : undefined,\n onComplete: (data: unknown[][]) => {\n const columnNames =\n options.columns ?? metadata.columns.map((c) => c.name);\n for (const row of data) {\n const obj: ParquetRow = {};\n columnNames.forEach((name, i) => {\n obj[name] = row[i];\n });\n rows.push(obj);\n }\n },\n });\n } catch (error) {\n coreError(\"readParquetData:error\", error);\n throw error;\n }\n\n return { metadata, rows };\n}\n\n/**\n * Read ALL values of specific columns from the parquet source.\n * Used for building sort indices, group keys, and filter unique values.\n * Only reads the requested columns — not all data.\n *\n * Reads row-group by row-group with yields between each to avoid\n * blocking the main thread. This keeps the UI responsive (scroll, etc.)\n * during long reads on large files.\n *\n * @param isCancelled — optional callback checked between row groups.\n * If it returns true, reading stops early and partial results are returned.\n * This allows quick cancellation when the user changes sort/filter mid-read.\n *\n * Returns a Map of column name → array of values (one per row).\n */\nexport async function readColumnValues(\n source: ParquetSource,\n columnNames: string[],\n isCancelled?: () => boolean,\n): Promise<Map<string, unknown[]>> {\n const asyncBuf = getAsyncBuffer(source);\n const rawMetadata = await getRawMetadata(source);\n\n const result = new Map<string, unknown[]>();\n for (const name of columnNames) {\n result.set(name, []);\n }\n\n // Read row-group by row-group to keep the main thread responsive.\n // Check isCancelled between each row group for early abort.\n let rowOffset = 0;\n for (const rg of rawMetadata.row_groups) {\n // Early exit if this compute was superseded\n if (isCancelled?.()) return result;\n\n const rgRows = Number(rg.num_rows);\n\n await parquetRead({\n file: asyncBuf,\n metadata: rawMetadata,\n columns: columnNames,\n rowStart: rowOffset,\n rowEnd: rowOffset + rgRows,\n onComplete: (data: unknown[][]) => {\n for (let rowIdx = 0; rowIdx < data.length; rowIdx++) {\n const row = data[rowIdx];\n for (let colIdx = 0; colIdx < columnNames.length; colIdx++) {\n result.get(columnNames[colIdx])!.push(row[colIdx]);\n }\n }\n },\n });\n\n rowOffset += rgRows;\n\n // Yield to browser between row groups so scroll events can fire\n await new Promise<void>((resolve) => setTimeout(resolve, 0));\n }\n\n return result;\n}\n\n/**\n * Read values of specific columns for specific row indices.\n * Useful for fetching display data for sorted/filtered views.\n */\nexport async function readRowsByIndices(\n source: ParquetSource,\n rowIndices: number[],\n columns?: string[],\n): Promise<ParquetRow[]> {\n if (rowIndices.length === 0) return [];\n\n const asyncBuf = getAsyncBuffer(source);\n const rawMetadata = await getRawMetadata(source);\n const metadata = toParquetMetadata(rawMetadata);\n const columnNames = columns ?? metadata.columns.map((c) => c.name);\n\n // Sort indices to read sequentially, then re-order\n const sorted = rowIndices.map((idx, pos) => ({ idx, pos }));\n sorted.sort((a, b) => a.idx - b.idx);\n\n // Find contiguous ranges to batch reads\n const ranges: { start: number; end: number; positions: number[] }[] = [];\n let rangeStart = sorted[0].idx;\n let rangeEnd = sorted[0].idx + 1;\n let positions = [sorted[0].pos];\n\n for (let i = 1; i < sorted.length; i++) {\n if (sorted[i].idx <= rangeEnd + 50) {\n // Allow small gaps to merge ranges\n rangeEnd = sorted[i].idx + 1;\n positions.push(sorted[i].pos);\n } else {\n ranges.push({ start: rangeStart, end: rangeEnd, positions });\n rangeStart = sorted[i].idx;\n rangeEnd = sorted[i].idx + 1;\n positions = [sorted[i].pos];\n }\n }\n ranges.push({ start: rangeStart, end: rangeEnd, positions });\n\n const result: ParquetRow[] = new Array(rowIndices.length);\n\n for (const range of ranges) {\n const rangeRows: ParquetRow[] = [];\n\n await parquetRead({\n file: asyncBuf,\n metadata: rawMetadata,\n columns: columnNames,\n rowStart: range.start,\n rowEnd: range.end,\n onComplete: (data: unknown[][]) => {\n for (const row of data) {\n const obj: ParquetRow = {};\n columnNames.forEach((name, i) => {\n obj[name] = row[i];\n });\n rangeRows.push(obj);\n }\n },\n });\n\n // Map range rows back to the correct positions\n let sortedPosIdx = 0;\n for (const s of sorted) {\n if (s.idx >= range.start && s.idx < range.end) {\n const localIdx = s.idx - range.start;\n if (localIdx < rangeRows.length) {\n result[s.pos] = rangeRows[localIdx];\n }\n sortedPosIdx++;\n }\n }\n }\n\n return result;\n}\n\n// ── Legacy helpers (kept for backwards compatibility) ──\n\n/** @deprecated Use sourceFromFile() instead */\nexport async function fileToArrayBuffer(file: File): Promise<ArrayBuffer> {\n return file.arrayBuffer();\n}\n\n/** @deprecated Use sourceFromUrl() instead */\nexport async function fetchParquetFromUrl(url: string): Promise<ArrayBuffer> {\n const response = await fetch(url);\n if (!response.ok) {\n throw new Error(\n `Failed to fetch parquet file: ${response.status} ${response.statusText}`,\n );\n }\n return response.arrayBuffer();\n}\n","/**\n * Data pipeline: sorting, filtering, grouping.\n * Operates on column-level indices — never loads all columns at once.\n */\n\n// ── Sort ──\n\nexport type SortDirection = \"asc\" | \"desc\";\n\nexport interface SortDef {\n column: string;\n direction: SortDirection;\n}\n\n// ── Filter ──\n\nexport type FilterOperator =\n | \"eq\"\n | \"neq\"\n | \"gt\"\n | \"gte\"\n | \"lt\"\n | \"lte\"\n | \"contains\"\n | \"not_contains\"\n | \"is_null\"\n | \"is_not_null\"\n | \"in\";\n\nexport interface FilterDef {\n column: string;\n operator: FilterOperator;\n /** The value(s) to compare against. For \"in\" — an array; for \"is_null\"/\"is_not_null\" — ignored. */\n value?: unknown;\n}\n\n// ── Group ──\n\nexport interface GroupDef {\n column: string;\n}\n\nexport interface GroupNode {\n /** The value of the group key */\n key: unknown;\n /** Display label for this group */\n label: string;\n /** Number of rows in this group */\n count: number;\n /** Row indices belonging to this group (in the filtered/sorted order) */\n rowIndices: number[];\n /** Whether this group is expanded in the UI */\n expanded: boolean;\n}\n\n// ── Pipeline state ──\n\nexport interface PipelineState {\n sorts: SortDef[];\n filters: FilterDef[];\n groups: GroupDef[];\n}\n\nexport function createEmptyPipeline(): PipelineState {\n return { sorts: [], filters: [], groups: [] };\n}\n\n// ── Index building ──\n\n/**\n * Build a sorted index from column values.\n * Returns an array of original row indices in the sorted order.\n */\nexport function buildSortIndex(\n values: unknown[],\n sorts: SortDef[],\n columnValues: Map<string, unknown[]>,\n): number[] {\n const indices = Array.from({ length: values.length }, (_, i) => i);\n\n indices.sort((a, b) => {\n for (const sort of sorts) {\n const col = columnValues.get(sort.column);\n if (!col) continue;\n const va = col[a];\n const vb = col[b];\n const cmp = compareValues(va, vb);\n if (cmp !== 0) return sort.direction === \"asc\" ? cmp : -cmp;\n }\n return a - b; // stable: preserve original order for ties\n });\n\n return indices;\n}\n\n/**\n * Apply filters to produce a set of passing row indices.\n */\nexport function buildFilterIndex(\n totalRows: number,\n filters: FilterDef[],\n columnValues: Map<string, unknown[]>,\n): number[] | null {\n if (filters.length === 0) return null; // null = no filtering\n\n const passing: number[] = [];\n for (let i = 0; i < totalRows; i++) {\n let pass = true;\n for (const filter of filters) {\n const col = columnValues.get(filter.column);\n if (!col) continue;\n if (!matchesFilter(col[i], filter)) {\n pass = false;\n break;\n }\n }\n if (pass) passing.push(i);\n }\n return passing;\n}\n\n/**\n * Build group nodes from column values.\n * Supports multi-level grouping (first group column → second → etc.)\n */\nexport function buildGroups(\n rowIndices: number[],\n groups: GroupDef[],\n columnValues: Map<string, unknown[]>,\n): GroupNode[] {\n if (groups.length === 0) return [];\n\n const firstGroup = groups[0];\n const col = columnValues.get(firstGroup.column);\n if (!col) return [];\n\n // Bucket rows by group key — store DISPLAY indices (position in mapping),\n // not source indices, so getRow(displayIndex) works directly.\n const buckets = new Map<string, number[]>();\n const keyLabels = new Map<string, unknown>();\n\n for (let displayIdx = 0; displayIdx < rowIndices.length; displayIdx++) {\n const sourceIdx = rowIndices[displayIdx];\n const value = col[sourceIdx];\n const key = formatGroupKey(value);\n if (!buckets.has(key)) {\n buckets.set(key, []);\n keyLabels.set(key, value);\n }\n buckets.get(key)!.push(displayIdx);\n }\n\n // Sort group keys\n const sortedKeys = [...buckets.keys()].sort((a, b) => {\n const va = keyLabels.get(a);\n const vb = keyLabels.get(b);\n return compareValues(va, vb);\n });\n\n return sortedKeys.map((key) => ({\n key: keyLabels.get(key),\n label: key,\n count: buckets.get(key)!.length,\n rowIndices: buckets.get(key)!,\n expanded: false,\n }));\n}\n\n/**\n * Collect unique values from a column (up to a limit).\n * Returns sorted unique values.\n */\nexport function collectUniqueValues(\n values: unknown[],\n limit: number = 500,\n): unknown[] {\n const seen = new Set<string>();\n const unique: unknown[] = [];\n\n for (const v of values) {\n const key = formatGroupKey(v);\n if (!seen.has(key)) {\n seen.add(key);\n unique.push(v);\n if (unique.length >= limit) break;\n }\n }\n\n unique.sort((a, b) => compareValues(a, b));\n return unique;\n}\n\n// ── Helpers ──\n\nfunction compareValues(a: unknown, b: unknown): number {\n // nulls last\n if (a === null || a === undefined) return b === null || b === undefined ? 0 : 1;\n if (b === null || b === undefined) return -1;\n\n if (typeof a === \"number\" && typeof b === \"number\") return a - b;\n if (typeof a === \"bigint\" && typeof b === \"bigint\") return a < b ? -1 : a > b ? 1 : 0;\n if (typeof a === \"boolean\" && typeof b === \"boolean\") return Number(a) - Number(b);\n\n // Dates\n if (a instanceof Date && b instanceof Date) return a.getTime() - b.getTime();\n\n // Default: string comparison\n return String(a).localeCompare(String(b));\n}\n\nfunction matchesFilter(value: unknown, filter: FilterDef): boolean {\n switch (filter.operator) {\n case \"is_null\":\n return value === null || value === undefined;\n case \"is_not_null\":\n return value !== null && value !== undefined;\n case \"eq\":\n return value === filter.value;\n case \"neq\":\n return value !== filter.value;\n case \"gt\":\n return compareValues(value, filter.value) > 0;\n case \"gte\":\n return compareValues(value, filter.value) >= 0;\n case \"lt\":\n return compareValues(value, filter.value) < 0;\n case \"lte\":\n return compareValues(value, filter.value) <= 0;\n case \"contains\":\n return String(value).toLowerCase().includes(String(filter.value).toLowerCase());\n case \"not_contains\":\n return !String(value).toLowerCase().includes(String(filter.value).toLowerCase());\n case \"in\":\n if (Array.isArray(filter.value)) {\n const set = new Set(filter.value.map(String));\n return set.has(String(value));\n }\n return false;\n default:\n return true;\n }\n}\n\nfunction formatGroupKey(value: unknown): string {\n if (value === null || value === undefined) return \"(null)\";\n if (value instanceof Date) return value.toISOString();\n return String(value);\n}\n"],"mappings":";AAAA;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,OACK;AAWP,IAAM,kBAAkB;AAExB,SAAS,mBAA4B;AACnC,QAAM,IAAI;AACV,SAAO,GAAG,kBAAkB;AAC9B;AAEA,SAAS,QAAQ,SAAiB,SAAmB;AACnD,MAAI,CAAC,iBAAiB,EAAG;AACzB,MAAI,YAAY,QAAW;AACzB,YAAQ,IAAI,GAAG,eAAe,IAAI,OAAO,IAAI,OAAO;AAAA,EACtD,OAAO;AACL,YAAQ,IAAI,GAAG,eAAe,IAAI,OAAO,EAAE;AAAA,EAC7C;AACF;AAEA,SAAS,UAAU,SAAiB,OAAgB;AAClD,MAAI,CAAC,iBAAiB,EAAG;AACzB,UAAQ,MAAM,GAAG,eAAe,IAAI,OAAO,IAAI,KAAK;AACtD;AAMA,SAAS,qBAAqB,OAA+C;AAC3E,QAAM,UAAU,QAAQ,QAAQ,KAAK;AACrC,UAAQ,QAAQ,CAAC,OAAe,QAC9B,QAAQ,KAAK,CAAC,WAAW,OAAO,MAAM,OAAO,GAAG,CAAC;AACnD,SAAO;AACT;AAEA,IAAM,wBAAwB,oBAAI,QAAsC;AAExE,SAAS,gBAAgB,QAAsC;AAC7D,QAAM,SAAS,sBAAsB,IAAI,MAAM;AAC/C,MAAI,OAAQ,QAAO;AAEnB,QAAM,UAAyB;AAAA,IAC7B,YAAY,OAAO;AAAA,IACnB,MAAM,OAAe,KAAoC;AACvD,YAAM,IAAI;AAAA,QACR,QAAQ,QAAQ,OAAO,MAAM,OAAO,GAAG,CAAC;AAAA,MAC1C;AACA,cAAQ,yBAAyB;AAAA,QAC/B;AAAA,QACA,KAAK,OAAO,OAAO;AAAA,QACnB,gBAAgB,OAAQ,EAA0B,UAAU;AAAA,MAC9D,CAAC;AACD,aAAO;AAAA,IACT;AAAA,EACF;AAEA,wBAAsB,IAAI,QAAQ,OAAO;AACzC,SAAO;AACT;AASO,SAAS,eAAe,MAA2B;AACxD,UAAQ,yBAAyB,EAAE,MAAM,KAAK,KAAK,CAAC;AACpD,SAAO;AAAA,IACL,YAAY,KAAK;AAAA,IACjB,MAAM,OAAe,KAAoC;AACvD,YAAM,OAAO,KAAK,MAAM,OAAO,GAAG;AAGlC,YAAM,IAAI,qBAAqB,KAAK,YAAY,CAAC;AACjD,cAAQ,wBAAwB;AAAA,QAC9B;AAAA,QACA,KAAK,OAAO,KAAK;AAAA,QACjB,gBAAgB,OAAQ,EAA0B,UAAU;AAAA,MAC9D,CAAC;AACD,aAAO;AAAA,IACT;AAAA,EACF;AACF;AAKO,SAAS,iBAAiB,QAAoC;AACnE,UAAQ,2BAA2B,EAAE,YAAY,OAAO,WAAW,CAAC;AACpE,SAAO;AAAA,IACL,YAAY,OAAO;AAAA,IACnB,MAAM,OAAe,KAAoC;AACvD,YAAM,IAAI;AAAA,QACR,QAAQ,QAAQ,OAAO,MAAM,OAAO,GAAG,CAAC;AAAA,MAC1C;AACA,cAAQ,0BAA0B;AAAA,QAChC;AAAA,QACA,KAAK,OAAO,OAAO;AAAA,QACnB,gBAAgB,OAAQ,EAA0B,UAAU;AAAA,MAC9D,CAAC;AACD,aAAO;AAAA,IACT;AAAA,EACF;AACF;AAKA,eAAsB,cAAc,KAAqC;AACvE,UAAQ,uBAAuB,EAAE,IAAI,CAAC;AACtC,QAAM,WAAW,MAAM,mBAAmB,EAAE,IAAI,CAAC;AACjD,QAAM,UAAyB;AAAA,IAC7B,YAAY,SAAS;AAAA,IACrB,MAAM,OAAe,KAAoC;AACvD,YAAM,IAAI;AAAA,QACR,QAAQ,QAAQ,SAAS,MAAM,OAAO,GAAG,CAAC;AAAA,MAC5C;AACA,cAAQ,uBAAuB;AAAA,QAC7B;AAAA,QACA,KAAK,OAAO,SAAS;AAAA,QACrB,gBAAgB,OAAQ,EAA0B,UAAU;AAAA,MAC9D,CAAC;AACD,aAAO;AAAA,IACT;AAAA,EACF;AACA,UAAQ,uBAAuB,EAAE,YAAY,QAAQ,WAAW,CAAC;AACjE,SAAO;AACT;AAIA,IAAM,mBAAmB,oBAAI,QAAoC;AACjE,IAAM,mBAAmB,oBAAI,QAA8C;AAE3E,SAAS,eAAe,QAAoC;AAC1D,QAAM,aAAa,gBAAgB,MAAM;AACzC,MAAI,MAAM,iBAAiB,IAAI,UAAU;AACzC,MAAI,CAAC,KAAK;AAGR,UAAM;AACN,qBAAiB,IAAI,YAAY,GAAG;AAAA,EACtC;AACA,SAAO;AACT;AAEA,SAAS,eAAe,QAA8C;AACpE,QAAM,aAAa,gBAAgB,MAAM;AACzC,MAAI,UAAU,iBAAiB,IAAI,UAAU;AAC7C,MAAI,CAAC,SAAS;AACZ,UAAM,WAAW,eAAe,UAAU;AAC1C,cAAU,qBAAqB,QAAQ;AACvC,qBAAiB,IAAI,YAAY,OAAO;AAAA,EAC1C;AACA,SAAO;AACT;AAEA,SAAS,kBAAkB,KAAoC;AAC7D,QAAM,UAA2B,IAAI,OAAO,MAAM,CAAC,EAAE,IAAI,CAAC,SAAS;AAAA,IACjE,MAAM,IAAI;AAAA,IACV,MAAM,IAAI,QAAQ;AAAA,IAClB,UAAU,IAAI,oBAAoB;AAAA,EACpC,EAAE;AAGF,QAAM,kBAA4B,CAAC,CAAC;AACpC,aAAW,MAAM,IAAI,YAAY;AAC/B,oBAAgB,KAAK,gBAAgB,gBAAgB,SAAS,CAAC,IAAI,OAAO,GAAG,QAAQ,CAAC;AAAA,EACxF;AAEA,SAAO;AAAA,IACL,UAAU,OAAO,IAAI,QAAQ;AAAA,IAC7B;AAAA,IACA,WAAW,IAAI,WAAW;AAAA,IAC1B;AAAA,IACA,WAAW,IAAI,cAAc;AAAA,EAC/B;AACF;AAQA,eAAsB,oBACpB,QAC0B;AAC1B,QAAM,MAAM,MAAM,eAAe,MAAM;AACvC,SAAO,kBAAkB,GAAG;AAC9B;AAMA,eAAsB,gBACpB,QACA,UAAuB,CAAC,GACF;AACtB,UAAQ,yBAAyB;AAAA,IAC/B,QAAQ,QAAQ,UAAU;AAAA,IAC1B,OAAO,QAAQ,SAAS;AAAA,IACxB,SAAS,QAAQ,SAAS,UAAU;AAAA,EACtC,CAAC;AACD,QAAM,WAAW,eAAe,MAAM;AACtC,QAAM,cAAc,MAAM,eAAe,MAAM;AAC/C,QAAM,WAAW,kBAAkB,WAAW;AAE9C,QAAM,OAAqB,CAAC;AAE5B,MAAI;AACF,UAAM,YAAY;AAAA,MAChB,MAAM;AAAA,MACN,UAAU;AAAA,MACV,SAAS,QAAQ;AAAA,MACjB,UAAU,QAAQ,UAAU;AAAA,MAC5B,QACE,QAAQ,UAAU,UACb,QAAQ,UAAU,KAAK,QAAQ,QAChC;AAAA,MACN,YAAY,CAAC,SAAsB;AACjC,cAAM,cACJ,QAAQ,WAAW,SAAS,QAAQ,IAAI,CAAC,MAAM,EAAE,IAAI;AACvD,mBAAW,OAAO,MAAM;AACtB,gBAAM,MAAkB,CAAC;AACzB,sBAAY,QAAQ,CAAC,MAAM,MAAM;AAC/B,gBAAI,IAAI,IAAI,IAAI,CAAC;AAAA,UACnB,CAAC;AACD,eAAK,KAAK,GAAG;AAAA,QACf;AAAA,MACF;AAAA,IACF,CAAC;AAAA,EACH,SAAS,OAAO;AACd,cAAU,yBAAyB,KAAK;AACxC,UAAM;AAAA,EACR;AAEA,SAAO,EAAE,UAAU,KAAK;AAC1B;AAiBA,eAAsB,iBACpB,QACA,aACA,aACiC;AACjC,QAAM,WAAW,eAAe,MAAM;AACtC,QAAM,cAAc,MAAM,eAAe,MAAM;AAE/C,QAAM,SAAS,oBAAI,IAAuB;AAC1C,aAAW,QAAQ,aAAa;AAC9B,WAAO,IAAI,MAAM,CAAC,CAAC;AAAA,EACrB;AAIA,MAAI,YAAY;AAChB,aAAW,MAAM,YAAY,YAAY;AAEvC,QAAI,cAAc,EAAG,QAAO;AAE5B,UAAM,SAAS,OAAO,GAAG,QAAQ;AAEjC,UAAM,YAAY;AAAA,MAChB,MAAM;AAAA,MACN,UAAU;AAAA,MACV,SAAS;AAAA,MACT,UAAU;AAAA,MACV,QAAQ,YAAY;AAAA,MACpB,YAAY,CAAC,SAAsB;AACjC,iBAAS,SAAS,GAAG,SAAS,KAAK,QAAQ,UAAU;AACnD,gBAAM,MAAM,KAAK,MAAM;AACvB,mBAAS,SAAS,GAAG,SAAS,YAAY,QAAQ,UAAU;AAC1D,mBAAO,IAAI,YAAY,MAAM,CAAC,EAAG,KAAK,IAAI,MAAM,CAAC;AAAA,UACnD;AAAA,QACF;AAAA,MACF;AAAA,IACF,CAAC;AAED,iBAAa;AAGb,UAAM,IAAI,QAAc,CAAC,YAAY,WAAW,SAAS,CAAC,CAAC;AAAA,EAC7D;AAEA,SAAO;AACT;AAMA,eAAsB,kBACpB,QACA,YACA,SACuB;AACvB,MAAI,WAAW,WAAW,EAAG,QAAO,CAAC;AAErC,QAAM,WAAW,eAAe,MAAM;AACtC,QAAM,cAAc,MAAM,eAAe,MAAM;AAC/C,QAAM,WAAW,kBAAkB,WAAW;AAC9C,QAAM,cAAc,WAAW,SAAS,QAAQ,IAAI,CAAC,MAAM,EAAE,IAAI;AAGjE,QAAM,SAAS,WAAW,IAAI,CAAC,KAAK,SAAS,EAAE,KAAK,IAAI,EAAE;AAC1D,SAAO,KAAK,CAAC,GAAG,MAAM,EAAE,MAAM,EAAE,GAAG;AAGnC,QAAM,SAAgE,CAAC;AACvE,MAAI,aAAa,OAAO,CAAC,EAAE;AAC3B,MAAI,WAAW,OAAO,CAAC,EAAE,MAAM;AAC/B,MAAI,YAAY,CAAC,OAAO,CAAC,EAAE,GAAG;AAE9B,WAAS,IAAI,GAAG,IAAI,OAAO,QAAQ,KAAK;AACtC,QAAI,OAAO,CAAC,EAAE,OAAO,WAAW,IAAI;AAElC,iBAAW,OAAO,CAAC,EAAE,MAAM;AAC3B,gBAAU,KAAK,OAAO,CAAC,EAAE,GAAG;AAAA,IAC9B,OAAO;AACL,aAAO,KAAK,EAAE,OAAO,YAAY,KAAK,UAAU,UAAU,CAAC;AAC3D,mBAAa,OAAO,CAAC,EAAE;AACvB,iBAAW,OAAO,CAAC,EAAE,MAAM;AAC3B,kBAAY,CAAC,OAAO,CAAC,EAAE,GAAG;AAAA,IAC5B;AAAA,EACF;AACA,SAAO,KAAK,EAAE,OAAO,YAAY,KAAK,UAAU,UAAU,CAAC;AAE3D,QAAM,SAAuB,IAAI,MAAM,WAAW,MAAM;AAExD,aAAW,SAAS,QAAQ;AAC1B,UAAM,YAA0B,CAAC;AAEjC,UAAM,YAAY;AAAA,MAChB,MAAM;AAAA,MACN,UAAU;AAAA,MACV,SAAS;AAAA,MACT,UAAU,MAAM;AAAA,MAChB,QAAQ,MAAM;AAAA,MACd,YAAY,CAAC,SAAsB;AACjC,mBAAW,OAAO,MAAM;AACtB,gBAAM,MAAkB,CAAC;AACzB,sBAAY,QAAQ,CAAC,MAAM,MAAM;AAC/B,gBAAI,IAAI,IAAI,IAAI,CAAC;AAAA,UACnB,CAAC;AACD,oBAAU,KAAK,GAAG;AAAA,QACpB;AAAA,MACF;AAAA,IACF,CAAC;AAGD,QAAI,eAAe;AACnB,eAAW,KAAK,QAAQ;AACtB,UAAI,EAAE,OAAO,MAAM,SAAS,EAAE,MAAM,MAAM,KAAK;AAC7C,cAAM,WAAW,EAAE,MAAM,MAAM;AAC/B,YAAI,WAAW,UAAU,QAAQ;AAC/B,iBAAO,EAAE,GAAG,IAAI,UAAU,QAAQ;AAAA,QACpC;AACA;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAEA,SAAO;AACT;AAKA,eAAsB,kBAAkB,MAAkC;AACxE,SAAO,KAAK,YAAY;AAC1B;AAGA,eAAsB,oBAAoB,KAAmC;AAC3E,QAAM,WAAW,MAAM,MAAM,GAAG;AAChC,MAAI,CAAC,SAAS,IAAI;AAChB,UAAM,IAAI;AAAA,MACR,iCAAiC,SAAS,MAAM,IAAI,SAAS,UAAU;AAAA,IACzE;AAAA,EACF;AACA,SAAO,SAAS,YAAY;AAC9B;;;AC5VO,SAAS,sBAAqC;AACnD,SAAO,EAAE,OAAO,CAAC,GAAG,SAAS,CAAC,GAAG,QAAQ,CAAC,EAAE;AAC9C;AAQO,SAAS,eACd,QACA,OACA,cACU;AACV,QAAM,UAAU,MAAM,KAAK,EAAE,QAAQ,OAAO,OAAO,GAAG,CAAC,GAAG,MAAM,CAAC;AAEjE,UAAQ,KAAK,CAAC,GAAG,MAAM;AACrB,eAAW,QAAQ,OAAO;AACxB,YAAM,MAAM,aAAa,IAAI,KAAK,MAAM;AACxC,UAAI,CAAC,IAAK;AACV,YAAM,KAAK,IAAI,CAAC;AAChB,YAAM,KAAK,IAAI,CAAC;AAChB,YAAM,MAAM,cAAc,IAAI,EAAE;AAChC,UAAI,QAAQ,EAAG,QAAO,KAAK,cAAc,QAAQ,MAAM,CAAC;AAAA,IAC1D;AACA,WAAO,IAAI;AAAA,EACb,CAAC;AAED,SAAO;AACT;AAKO,SAAS,iBACd,WACA,SACA,cACiB;AACjB,MAAI,QAAQ,WAAW,EAAG,QAAO;AAEjC,QAAM,UAAoB,CAAC;AAC3B,WAAS,IAAI,GAAG,IAAI,WAAW,KAAK;AAClC,QAAI,OAAO;AACX,eAAW,UAAU,SAAS;AAC5B,YAAM,MAAM,aAAa,IAAI,OAAO,MAAM;AAC1C,UAAI,CAAC,IAAK;AACV,UAAI,CAAC,cAAc,IAAI,CAAC,GAAG,MAAM,GAAG;AAClC,eAAO;AACP;AAAA,MACF;AAAA,IACF;AACA,QAAI,KAAM,SAAQ,KAAK,CAAC;AAAA,EAC1B;AACA,SAAO;AACT;AAMO,SAAS,YACd,YACA,QACA,cACa;AACb,MAAI,OAAO,WAAW,EAAG,QAAO,CAAC;AAEjC,QAAM,aAAa,OAAO,CAAC;AAC3B,QAAM,MAAM,aAAa,IAAI,WAAW,MAAM;AAC9C,MAAI,CAAC,IAAK,QAAO,CAAC;AAIlB,QAAM,UAAU,oBAAI,IAAsB;AAC1C,QAAM,YAAY,oBAAI,IAAqB;AAE3C,WAAS,aAAa,GAAG,aAAa,WAAW,QAAQ,cAAc;AACrE,UAAM,YAAY,WAAW,UAAU;AACvC,UAAM,QAAQ,IAAI,SAAS;AAC3B,UAAM,MAAM,eAAe,KAAK;AAChC,QAAI,CAAC,QAAQ,IAAI,GAAG,GAAG;AACrB,cAAQ,IAAI,KAAK,CAAC,CAAC;AACnB,gBAAU,IAAI,KAAK,KAAK;AAAA,IAC1B;AACA,YAAQ,IAAI,GAAG,EAAG,KAAK,UAAU;AAAA,EACnC;AAGA,QAAM,aAAa,CAAC,GAAG,QAAQ,KAAK,CAAC,EAAE,KAAK,CAAC,GAAG,MAAM;AACpD,UAAM,KAAK,UAAU,IAAI,CAAC;AAC1B,UAAM,KAAK,UAAU,IAAI,CAAC;AAC1B,WAAO,cAAc,IAAI,EAAE;AAAA,EAC7B,CAAC;AAED,SAAO,WAAW,IAAI,CAAC,SAAS;AAAA,IAC9B,KAAK,UAAU,IAAI,GAAG;AAAA,IACtB,OAAO;AAAA,IACP,OAAO,QAAQ,IAAI,GAAG,EAAG;AAAA,IACzB,YAAY,QAAQ,IAAI,GAAG;AAAA,IAC3B,UAAU;AAAA,EACZ,EAAE;AACJ;AAMO,SAAS,oBACd,QACA,QAAgB,KACL;AACX,QAAM,OAAO,oBAAI,IAAY;AAC7B,QAAM,SAAoB,CAAC;AAE3B,aAAW,KAAK,QAAQ;AACtB,UAAM,MAAM,eAAe,CAAC;AAC5B,QAAI,CAAC,KAAK,IAAI,GAAG,GAAG;AAClB,WAAK,IAAI,GAAG;AACZ,aAAO,KAAK,CAAC;AACb,UAAI,OAAO,UAAU,MAAO;AAAA,IAC9B;AAAA,EACF;AAEA,SAAO,KAAK,CAAC,GAAG,MAAM,cAAc,GAAG,CAAC,CAAC;AACzC,SAAO;AACT;AAIA,SAAS,cAAc,GAAY,GAAoB;AAErD,MAAI,MAAM,QAAQ,MAAM,OAAW,QAAO,MAAM,QAAQ,MAAM,SAAY,IAAI;AAC9E,MAAI,MAAM,QAAQ,MAAM,OAAW,QAAO;AAE1C,MAAI,OAAO,MAAM,YAAY,OAAO,MAAM,SAAU,QAAO,IAAI;AAC/D,MAAI,OAAO,MAAM,YAAY,OAAO,MAAM,SAAU,QAAO,IAAI,IAAI,KAAK,IAAI,IAAI,IAAI;AACpF,MAAI,OAAO,MAAM,aAAa,OAAO,MAAM,UAAW,QAAO,OAAO,CAAC,IAAI,OAAO,CAAC;AAGjF,MAAI,aAAa,QAAQ,aAAa,KAAM,QAAO,EAAE,QAAQ,IAAI,EAAE,QAAQ;AAG3E,SAAO,OAAO,CAAC,EAAE,cAAc,OAAO,CAAC,CAAC;AAC1C;AAEA,SAAS,cAAc,OAAgB,QAA4B;AACjE,UAAQ,OAAO,UAAU;AAAA,IACvB,KAAK;AACH,aAAO,UAAU,QAAQ,UAAU;AAAA,IACrC,KAAK;AACH,aAAO,UAAU,QAAQ,UAAU;AAAA,IACrC,KAAK;AACH,aAAO,UAAU,OAAO;AAAA,IAC1B,KAAK;AACH,aAAO,UAAU,OAAO;AAAA,IAC1B,KAAK;AACH,aAAO,cAAc,OAAO,OAAO,KAAK,IAAI;AAAA,IAC9C,KAAK;AACH,aAAO,cAAc,OAAO,OAAO,KAAK,KAAK;AAAA,IAC/C,KAAK;AACH,aAAO,cAAc,OAAO,OAAO,KAAK,IAAI;AAAA,IAC9C,KAAK;AACH,aAAO,cAAc,OAAO,OAAO,KAAK,KAAK;AAAA,IAC/C,KAAK;AACH,aAAO,OAAO,KAAK,EAAE,YAAY,EAAE,SAAS,OAAO,OAAO,KAAK,EAAE,YAAY,CAAC;AAAA,IAChF,KAAK;AACH,aAAO,CAAC,OAAO,KAAK,EAAE,YAAY,EAAE,SAAS,OAAO,OAAO,KAAK,EAAE,YAAY,CAAC;AAAA,IACjF,KAAK;AACH,UAAI,MAAM,QAAQ,OAAO,KAAK,GAAG;AAC/B,cAAM,MAAM,IAAI,IAAI,OAAO,MAAM,IAAI,MAAM,CAAC;AAC5C,eAAO,IAAI,IAAI,OAAO,KAAK,CAAC;AAAA,MAC9B;AACA,aAAO;AAAA,IACT;AACE,aAAO;AAAA,EACX;AACF;AAEA,SAAS,eAAe,OAAwB;AAC9C,MAAI,UAAU,QAAQ,UAAU,OAAW,QAAO;AAClD,MAAI,iBAAiB,KAAM,QAAO,MAAM,YAAY;AACpD,SAAO,OAAO,KAAK;AACrB;","names":[]}
package/package.json ADDED
@@ -0,0 +1,33 @@
1
+ {
2
+ "name": "@parqui/core",
3
+ "version": "1.1.1",
4
+ "description": "Framework-agnostic core for reading and processing Apache Parquet files",
5
+ "type": "module",
6
+ "main": "./dist/index.cjs",
7
+ "module": "./dist/index.js",
8
+ "types": "./dist/index.d.ts",
9
+ "exports": {
10
+ ".": {
11
+ "types": "./dist/index.d.ts",
12
+ "import": "./dist/index.js",
13
+ "require": "./dist/index.cjs"
14
+ }
15
+ },
16
+ "files": [
17
+ "dist"
18
+ ],
19
+ "devDependencies": {
20
+ "tsup": "^8.4.0",
21
+ "typescript": "^5.8.2"
22
+ },
23
+ "dependencies": {
24
+ "hyparquet": "^1.12.0"
25
+ },
26
+ "license": "MIT",
27
+ "scripts": {
28
+ "build": "tsup",
29
+ "dev": "tsup --watch",
30
+ "clean": "rm -rf dist",
31
+ "lint": "tsc --noEmit"
32
+ }
33
+ }