@gscdump/engine 0.17.4 → 0.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/_chunks/compiler.mjs +2 -1
- package/dist/_chunks/duckdb.d.mts +1 -1
- package/dist/_chunks/engine.mjs +87 -4
- package/dist/_chunks/planner.d.mts +1 -1
- package/dist/_chunks/registry.d.mts +1 -1
- package/dist/_chunks/schema.d.mts +305 -80
- package/dist/_chunks/schema.mjs +19 -3
- package/dist/_chunks/storage.d.mts +37 -2
- package/dist/_chunks/storage.mjs +4 -1
- package/dist/adapters/filesystem.d.mts +1 -1
- package/dist/adapters/filesystem.mjs +1 -1
- package/dist/adapters/hyparquet.d.mts +1 -1
- package/dist/adapters/node.d.mts +1 -1
- package/dist/adapters/node.mjs +1 -1
- package/dist/adapters/r2-manifest.d.mts +1 -1
- package/dist/adapters/r2-manifest.mjs +1 -1
- package/dist/contracts.d.mts +2 -2
- package/dist/index.d.mts +38 -4
- package/dist/index.mjs +6 -5
- package/dist/ingest.d.mts +1 -1
- package/dist/ingest.mjs +17 -1
- package/dist/planner.d.mts +2 -2
- package/dist/resolver/index.d.mts +10 -2
- package/dist/rollups.d.mts +36 -5
- package/dist/rollups.mjs +38 -1
- package/dist/schema.d.mts +2 -2
- package/dist/schema.mjs +2 -2
- package/dist/scope.d.mts +6 -0
- package/dist/scope.mjs +5 -3
- package/dist/source/index.d.mts +1 -1
- package/package.json +3 -3
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { i as dimensionToColumn, r as currentSchemaVersion } from "./schema.mjs";
|
|
2
|
-
import { a as
|
|
2
|
+
import { a as inferSearchType, c as objectKey, f as weekPartition, l as quarterOfMonth, n as dayPartition, o as mondayOfWeek, s as monthPartition, u as quarterPartition } from "./storage.mjs";
|
|
3
3
|
import { METRIC_EXPR, escapeLike, topLevelPagePredicateSql } from "../sql-fragments.mjs";
|
|
4
4
|
import { MS_PER_DAY } from "gscdump";
|
|
5
5
|
import { buildLogicalPlan } from "gscdump/query/plan";
|
|
@@ -75,6 +75,7 @@ async function runStage(deps, ctx, stage, now) {
|
|
|
75
75
|
});
|
|
76
76
|
const buckets = /* @__PURE__ */ new Map();
|
|
77
77
|
for (const entry of candidates) {
|
|
78
|
+
if (entry.partition.startsWith("hourly/")) continue;
|
|
78
79
|
const key = stage.bucketKey(entry);
|
|
79
80
|
if (!key) continue;
|
|
80
81
|
if (stage.bucketLatestMs(key) >= cutoff) continue;
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { C as Row, M as TableName, m as ParquetCodec, x as QueryExecutor } from "./storage.mjs";
|
|
2
2
|
interface DuckDBHandle {
|
|
3
3
|
query: (sql: string, params?: unknown[]) => Promise<Row[]>;
|
|
4
4
|
registerFileBuffer: (name: string, bytes: Uint8Array) => Promise<void>;
|
package/dist/_chunks/engine.mjs
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { r as currentSchemaVersion, t as SCHEMAS } from "./schema.mjs";
|
|
2
|
-
import {
|
|
2
|
+
import { a as inferSearchType, c as objectKey, d as tenantPrefix, n as dayPartition, r as hourPartition } from "./storage.mjs";
|
|
3
3
|
import { i as substituteNamedFiles, n as compileLogicalQueryPlan, o as compactTieredImpl } from "./compiler.mjs";
|
|
4
4
|
import { sqlEscape } from "../sql-bind.mjs";
|
|
5
5
|
import { buildLogicalPlan } from "gscdump/query/plan";
|
|
@@ -168,6 +168,7 @@ function defaultForType(t) {
|
|
|
168
168
|
function columnsJson(table) {
|
|
169
169
|
return `{${SCHEMAS[table].columns.map((c) => `'${c.name}': '${c.type}'`).join(", ")}}`;
|
|
170
170
|
}
|
|
171
|
+
const DEFAULT_HOURLY_RETENTION_MS = 2160 * 60 * 60 * 1e3;
|
|
171
172
|
const VERSION_RE = /__v(\d+)\.parquet$/;
|
|
172
173
|
function parseLockScope(key) {
|
|
173
174
|
const match = VERSION_RE.exec(key);
|
|
@@ -193,6 +194,19 @@ async function gcOrphansImpl(deps, now, graceMs, opts = {}) {
|
|
|
193
194
|
await deps.dataSource.delete(retired.map((e) => e.objectKey));
|
|
194
195
|
await deps.manifestStore.delete(retired);
|
|
195
196
|
}
|
|
197
|
+
let hourlyDeleted = 0;
|
|
198
|
+
if (opts.userId) {
|
|
199
|
+
const hourlyCutoff = now - (opts.hourlyRetentionMs ?? DEFAULT_HOURLY_RETENTION_MS);
|
|
200
|
+
const expiredHourly = (await deps.manifestStore.listAll({
|
|
201
|
+
userId: opts.userId,
|
|
202
|
+
siteId: opts.siteId
|
|
203
|
+
})).filter((e) => e.partition.startsWith("hourly/") && e.createdAt < hourlyCutoff);
|
|
204
|
+
if (expiredHourly.length > 0) {
|
|
205
|
+
await deps.dataSource.delete(expiredHourly.map((e) => e.objectKey));
|
|
206
|
+
await deps.manifestStore.delete(expiredHourly);
|
|
207
|
+
hourlyDeleted = expiredHourly.length;
|
|
208
|
+
}
|
|
209
|
+
}
|
|
196
210
|
let sweptOrphans = 0;
|
|
197
211
|
if (opts.userId) {
|
|
198
212
|
const prefix = tenantPrefix({
|
|
@@ -242,7 +256,7 @@ async function gcOrphansImpl(deps, now, graceMs, opts = {}) {
|
|
|
242
256
|
}
|
|
243
257
|
});
|
|
244
258
|
}
|
|
245
|
-
return { deleted: retired.length + sweptOrphans };
|
|
259
|
+
return { deleted: retired.length + sweptOrphans + hourlyDeleted };
|
|
246
260
|
}
|
|
247
261
|
const URL_PURGE_TABLES = ["pages", "page_keywords"];
|
|
248
262
|
const MAX_DAY_BYTES = 100 * 1024 * 1024;
|
|
@@ -314,6 +328,73 @@ function createStorageEngine(opts) {
|
|
|
314
328
|
}, date, now);
|
|
315
329
|
});
|
|
316
330
|
}
|
|
331
|
+
async function writeHour(ctx, rows) {
|
|
332
|
+
if (!ctx.date) throw new Error("writeHour requires ctx.date (the PT calendar day)");
|
|
333
|
+
const date = ctx.date;
|
|
334
|
+
const now = (ctx.now ?? defaultNow)();
|
|
335
|
+
const partition = hourPartition(date);
|
|
336
|
+
const searchType = ctx.searchType;
|
|
337
|
+
return manifestStore.withLock({
|
|
338
|
+
userId: ctx.userId,
|
|
339
|
+
siteId: ctx.siteId,
|
|
340
|
+
table: ctx.table,
|
|
341
|
+
partition
|
|
342
|
+
}, async () => {
|
|
343
|
+
const live = await manifestStore.listLive({
|
|
344
|
+
userId: ctx.userId,
|
|
345
|
+
siteId: ctx.siteId,
|
|
346
|
+
table: ctx.table,
|
|
347
|
+
partitions: [partition],
|
|
348
|
+
searchType: inferSearchType({ searchType })
|
|
349
|
+
});
|
|
350
|
+
const existing = [];
|
|
351
|
+
for (const entry of live) {
|
|
352
|
+
const rs = await codec.readRows({ table: ctx.table }, entry.objectKey, dataSource);
|
|
353
|
+
existing.push(...rs);
|
|
354
|
+
}
|
|
355
|
+
const dedup = /* @__PURE__ */ new Map();
|
|
356
|
+
for (const r of existing) {
|
|
357
|
+
const k = `${String(r.url ?? "")}\0${String(r.hour ?? "")}`;
|
|
358
|
+
dedup.set(k, r);
|
|
359
|
+
}
|
|
360
|
+
for (const r of rows) {
|
|
361
|
+
const normalized = normalizeRow(ctx.table, r);
|
|
362
|
+
const k = `${String(normalized.url ?? "")}\0${String(normalized.hour ?? "")}`;
|
|
363
|
+
dedup.set(k, normalized);
|
|
364
|
+
}
|
|
365
|
+
const merged = [...dedup.values()];
|
|
366
|
+
const key = objectKey(ctx, ctx.table, partition, now, searchType);
|
|
367
|
+
const { bytes: writtenBytes, rowCount } = await codec.writeRows({ table: ctx.table }, merged, key, dataSource);
|
|
368
|
+
let bytes = writtenBytes;
|
|
369
|
+
if (bytes === 0 && rowCount > 0 && dataSource.head) {
|
|
370
|
+
const probed = await dataSource.head(key);
|
|
371
|
+
if (probed) bytes = probed.bytes;
|
|
372
|
+
}
|
|
373
|
+
if (bytes > 104857600) {
|
|
374
|
+
await dataSource.delete([key]).catch(() => {});
|
|
375
|
+
throw new Error(`writeHour payload ${bytes} bytes exceeds ${MAX_DAY_BYTES} hard ceiling (table=${ctx.table}, key=${key})`);
|
|
376
|
+
}
|
|
377
|
+
const entry = {
|
|
378
|
+
userId: ctx.userId,
|
|
379
|
+
siteId: ctx.siteId,
|
|
380
|
+
table: ctx.table,
|
|
381
|
+
partition,
|
|
382
|
+
objectKey: key,
|
|
383
|
+
rowCount,
|
|
384
|
+
bytes,
|
|
385
|
+
createdAt: now,
|
|
386
|
+
schemaVersion: currentSchemaVersion(ctx.table),
|
|
387
|
+
tier: "raw",
|
|
388
|
+
...searchType !== void 0 ? { searchType } : {}
|
|
389
|
+
};
|
|
390
|
+
await manifestStore.registerVersion(entry, live);
|
|
391
|
+
await manifestStore.bumpWatermark({
|
|
392
|
+
userId: ctx.userId,
|
|
393
|
+
siteId: ctx.siteId,
|
|
394
|
+
table: ctx.table
|
|
395
|
+
}, date, now);
|
|
396
|
+
});
|
|
397
|
+
}
|
|
317
398
|
async function runSQL(opts) {
|
|
318
399
|
opts.signal?.throwIfAborted();
|
|
319
400
|
const entries = Object.entries(opts.fileSets);
|
|
@@ -387,7 +468,8 @@ function createStorageEngine(opts) {
|
|
|
387
468
|
manifestStore
|
|
388
469
|
}, (ctx.now ?? defaultNow)(), graceMs, {
|
|
389
470
|
userId: ctx.userId,
|
|
390
|
-
siteId: ctx.siteId
|
|
471
|
+
siteId: ctx.siteId,
|
|
472
|
+
...ctx.hourlyRetentionMs !== void 0 ? { hourlyRetentionMs: ctx.hourlyRetentionMs } : {}
|
|
391
473
|
});
|
|
392
474
|
}
|
|
393
475
|
async function purgeTenant(ctx) {
|
|
@@ -481,6 +563,7 @@ function createStorageEngine(opts) {
|
|
|
481
563
|
}
|
|
482
564
|
return {
|
|
483
565
|
writeDay,
|
|
566
|
+
writeHour,
|
|
484
567
|
query,
|
|
485
568
|
runSQL,
|
|
486
569
|
compactTiered,
|
|
@@ -495,4 +578,4 @@ function createStorageEngine(opts) {
|
|
|
495
578
|
readObject: (key) => dataSource.read(key)
|
|
496
579
|
};
|
|
497
580
|
}
|
|
498
|
-
export {
|
|
581
|
+
export { createDuckDBCodec as a, canonicalEmptyParquetSchema as i, createStorageEngine as n, createDuckDBExecutor as o, gcOrphansImpl as r, MAX_DAY_BYTES as t };
|