@gscdump/engine 0.17.5 → 0.18.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
  import { i as dimensionToColumn, r as currentSchemaVersion } from "./schema.mjs";
2
- import { a as mondayOfWeek, c as quarterOfMonth, d as weekPartition, i as inferSearchType, l as quarterPartition, n as dayPartition, o as monthPartition, s as objectKey } from "./storage.mjs";
2
+ import { a as inferSearchType, c as objectKey, f as weekPartition, l as quarterOfMonth, n as dayPartition, o as mondayOfWeek, s as monthPartition, u as quarterPartition } from "./storage.mjs";
3
3
  import { METRIC_EXPR, escapeLike, topLevelPagePredicateSql } from "../sql-fragments.mjs";
4
4
  import { MS_PER_DAY } from "gscdump";
5
5
  import { buildLogicalPlan } from "gscdump/query/plan";
@@ -75,6 +75,7 @@ async function runStage(deps, ctx, stage, now) {
75
75
  });
76
76
  const buckets = /* @__PURE__ */ new Map();
77
77
  for (const entry of candidates) {
78
+ if (entry.partition.startsWith("hourly/")) continue;
78
79
  const key = stage.bucketKey(entry);
79
80
  if (!key) continue;
80
81
  if (stage.bucketLatestMs(key) >= cutoff) continue;
@@ -1,4 +1,4 @@
1
- import { S as Row, b as QueryExecutor, j as TableName, p as ParquetCodec } from "./storage.mjs";
1
+ import { C as Row, M as TableName, m as ParquetCodec, x as QueryExecutor } from "./storage.mjs";
2
2
  interface DuckDBHandle {
3
3
  query: (sql: string, params?: unknown[]) => Promise<Row[]>;
4
4
  registerFileBuffer: (name: string, bytes: Uint8Array) => Promise<void>;
@@ -1,5 +1,5 @@
1
1
  import { r as currentSchemaVersion, t as SCHEMAS } from "./schema.mjs";
2
- import { i as inferSearchType, n as dayPartition, s as objectKey, u as tenantPrefix } from "./storage.mjs";
2
+ import { a as inferSearchType, c as objectKey, d as tenantPrefix, n as dayPartition, r as hourPartition } from "./storage.mjs";
3
3
  import { i as substituteNamedFiles, n as compileLogicalQueryPlan, o as compactTieredImpl } from "./compiler.mjs";
4
4
  import { sqlEscape } from "../sql-bind.mjs";
5
5
  import { buildLogicalPlan } from "gscdump/query/plan";
@@ -168,6 +168,7 @@ function defaultForType(t) {
168
168
  function columnsJson(table) {
169
169
  return `{${SCHEMAS[table].columns.map((c) => `'${c.name}': '${c.type}'`).join(", ")}}`;
170
170
  }
171
+ const DEFAULT_HOURLY_RETENTION_MS = 2160 * 60 * 60 * 1e3;
171
172
  const VERSION_RE = /__v(\d+)\.parquet$/;
172
173
  function parseLockScope(key) {
173
174
  const match = VERSION_RE.exec(key);
@@ -193,6 +194,19 @@ async function gcOrphansImpl(deps, now, graceMs, opts = {}) {
193
194
  await deps.dataSource.delete(retired.map((e) => e.objectKey));
194
195
  await deps.manifestStore.delete(retired);
195
196
  }
197
+ let hourlyDeleted = 0;
198
+ if (opts.userId) {
199
+ const hourlyCutoff = now - (opts.hourlyRetentionMs ?? DEFAULT_HOURLY_RETENTION_MS);
200
+ const expiredHourly = (await deps.manifestStore.listAll({
201
+ userId: opts.userId,
202
+ siteId: opts.siteId
203
+ })).filter((e) => e.partition.startsWith("hourly/") && e.createdAt < hourlyCutoff);
204
+ if (expiredHourly.length > 0) {
205
+ await deps.dataSource.delete(expiredHourly.map((e) => e.objectKey));
206
+ await deps.manifestStore.delete(expiredHourly);
207
+ hourlyDeleted = expiredHourly.length;
208
+ }
209
+ }
196
210
  let sweptOrphans = 0;
197
211
  if (opts.userId) {
198
212
  const prefix = tenantPrefix({
@@ -242,7 +256,7 @@ async function gcOrphansImpl(deps, now, graceMs, opts = {}) {
242
256
  }
243
257
  });
244
258
  }
245
- return { deleted: retired.length + sweptOrphans };
259
+ return { deleted: retired.length + sweptOrphans + hourlyDeleted };
246
260
  }
247
261
  const URL_PURGE_TABLES = ["pages", "page_keywords"];
248
262
  const MAX_DAY_BYTES = 100 * 1024 * 1024;
@@ -314,6 +328,73 @@ function createStorageEngine(opts) {
314
328
  }, date, now);
315
329
  });
316
330
  }
331
+ async function writeHour(ctx, rows) {
332
+ if (!ctx.date) throw new Error("writeHour requires ctx.date (the PT calendar day)");
333
+ const date = ctx.date;
334
+ const now = (ctx.now ?? defaultNow)();
335
+ const partition = hourPartition(date);
336
+ const searchType = ctx.searchType;
337
+ return manifestStore.withLock({
338
+ userId: ctx.userId,
339
+ siteId: ctx.siteId,
340
+ table: ctx.table,
341
+ partition
342
+ }, async () => {
343
+ const live = await manifestStore.listLive({
344
+ userId: ctx.userId,
345
+ siteId: ctx.siteId,
346
+ table: ctx.table,
347
+ partitions: [partition],
348
+ searchType: inferSearchType({ searchType })
349
+ });
350
+ const existing = [];
351
+ for (const entry of live) {
352
+ const rs = await codec.readRows({ table: ctx.table }, entry.objectKey, dataSource);
353
+ existing.push(...rs);
354
+ }
355
+ const dedup = /* @__PURE__ */ new Map();
356
+ for (const r of existing) {
357
+ const k = `${String(r.url ?? "")}\0${String(r.hour ?? "")}`;
358
+ dedup.set(k, r);
359
+ }
360
+ for (const r of rows) {
361
+ const normalized = normalizeRow(ctx.table, r);
362
+ const k = `${String(normalized.url ?? "")}\0${String(normalized.hour ?? "")}`;
363
+ dedup.set(k, normalized);
364
+ }
365
+ const merged = [...dedup.values()];
366
+ const key = objectKey(ctx, ctx.table, partition, now, searchType);
367
+ const { bytes: writtenBytes, rowCount } = await codec.writeRows({ table: ctx.table }, merged, key, dataSource);
368
+ let bytes = writtenBytes;
369
+ if (bytes === 0 && rowCount > 0 && dataSource.head) {
370
+ const probed = await dataSource.head(key);
371
+ if (probed) bytes = probed.bytes;
372
+ }
373
+ if (bytes > 104857600) {
374
+ await dataSource.delete([key]).catch(() => {});
375
+ throw new Error(`writeHour payload ${bytes} bytes exceeds ${MAX_DAY_BYTES} hard ceiling (table=${ctx.table}, key=${key})`);
376
+ }
377
+ const entry = {
378
+ userId: ctx.userId,
379
+ siteId: ctx.siteId,
380
+ table: ctx.table,
381
+ partition,
382
+ objectKey: key,
383
+ rowCount,
384
+ bytes,
385
+ createdAt: now,
386
+ schemaVersion: currentSchemaVersion(ctx.table),
387
+ tier: "raw",
388
+ ...searchType !== void 0 ? { searchType } : {}
389
+ };
390
+ await manifestStore.registerVersion(entry, live);
391
+ await manifestStore.bumpWatermark({
392
+ userId: ctx.userId,
393
+ siteId: ctx.siteId,
394
+ table: ctx.table
395
+ }, date, now);
396
+ });
397
+ }
317
398
  async function runSQL(opts) {
318
399
  opts.signal?.throwIfAborted();
319
400
  const entries = Object.entries(opts.fileSets);
@@ -387,7 +468,8 @@ function createStorageEngine(opts) {
387
468
  manifestStore
388
469
  }, (ctx.now ?? defaultNow)(), graceMs, {
389
470
  userId: ctx.userId,
390
- siteId: ctx.siteId
471
+ siteId: ctx.siteId,
472
+ ...ctx.hourlyRetentionMs !== void 0 ? { hourlyRetentionMs: ctx.hourlyRetentionMs } : {}
391
473
  });
392
474
  }
393
475
  async function purgeTenant(ctx) {
@@ -481,6 +563,7 @@ function createStorageEngine(opts) {
481
563
  }
482
564
  return {
483
565
  writeDay,
566
+ writeHour,
484
567
  query,
485
568
  runSQL,
486
569
  compactTiered,
@@ -495,4 +578,4 @@ function createStorageEngine(opts) {
495
578
  readObject: (key) => dataSource.read(key)
496
579
  };
497
580
  }
498
- export { createDuckDBExecutor as a, createDuckDBCodec as i, createStorageEngine as n, canonicalEmptyParquetSchema as r, MAX_DAY_BYTES as t };
581
+ export { createDuckDBCodec as a, canonicalEmptyParquetSchema as i, createStorageEngine as n, createDuckDBExecutor as o, gcOrphansImpl as r, MAX_DAY_BYTES as t };
@@ -1,4 +1,4 @@
1
- import { j as TableName } from "./storage.mjs";
1
+ import { M as TableName } from "./storage.mjs";
2
2
  import { LogicalQueryPlan } from "gscdump/query/plan";
3
3
  import { BuilderState } from "gscdump/query";
4
4
  interface ResolvedQuery {
@@ -1,4 +1,4 @@
1
- import { S as Row$1 } from "./storage.mjs";
1
+ import { C as Row$1 } from "./storage.mjs";
2
2
  import { t as AnalysisParams } from "./analysis-types.mjs";
3
3
  import { o as ResolverAdapter } from "./types.mjs";
4
4
  import { PlannerCapabilities } from "gscdump/query/plan";
@@ -13,7 +13,8 @@ const DIMENSION_SURFACES = {
13
13
  country: ["api", "stored"],
14
14
  device: ["api", "stored"],
15
15
  searchAppearance: ["api", "stored"],
16
- date: ["api", "stored"]
16
+ date: ["api", "stored"],
17
+ hour: ["api", "stored"]
17
18
  };
18
19
  const LOGICAL_DATASETS = {
19
20
  pages: { dimensions: {