npm - @gscdump/engine - Versions diffs - 0.17.3 → 0.17.5 - Mend

@gscdump/engine 0.17.3 → 0.17.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/dist/_chunks/analysis-types.d.mts +3 -1
package/dist/_chunks/compiler.mjs +6 -2
package/dist/_chunks/engine.mjs +1 -1
package/dist/_chunks/resolver.mjs +91 -2
package/dist/_chunks/storage.d.mts +6 -1
package/dist/index.d.mts +106 -3
package/dist/index.mjs +209 -2
package/dist/planner.d.mts +1 -1
package/dist/planner.mjs +1 -1
package/dist/resolver/index.d.mts +59 -1
package/dist/resolver/index.mjs +2 -2
package/dist/rollups.d.mts +22 -1
package/dist/rollups.mjs +28 -1
package/dist/source/index.mjs +1 -1
package/package.json +3 -3

package/dist/_chunks/analysis-types.d.mts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { BuilderState } from "gscdump/query";
+import { BuilderState, SearchType } from "gscdump/query";
 type AnalysisTool = 'striking-distance' | 'opportunity' | 'movers' | 'decay' | 'zero-click' | 'brand' | 'cannibalization' | 'clustering' | 'concentration' | 'seasonality' | 'trends' | 'ctr-anomaly' | 'position-volatility' | 'long-tail' | 'intent-atlas' | 'query-migration' | 'bayesian-ctr' | 'stl-decompose' | 'change-point' | 'bipartite-pagerank' | 'survival' | 'position-distribution' | 'ctr-curve' | 'dark-traffic' | 'content-velocity' | 'keyword-breadth' | 'device-gap' | 'data-query' | 'data-detail';
 interface AnalysisParams {
   type: AnalysisTool;
@@ -37,6 +37,8 @@ interface AnalysisParams {
   qc?: BuilderState;
   /** data-query comparison filter applied to joined current/previous rows. */
   comparisonFilter?: 'new' | 'lost' | 'improving' | 'declining';
+  /** GSC slice the analysis is scoped to. Undefined = analyzer runs cross-type (today's behaviour for web-only sites). */
+  searchType?: SearchType;
 }
 interface AnalysisResult {
   results: Record<string, unknown>[];

package/dist/_chunks/compiler.mjs CHANGED Viewed

@@ -1,8 +1,8 @@
 import { i as dimensionToColumn, r as currentSchemaVersion } from "./schema.mjs";
 import { a as mondayOfWeek, c as quarterOfMonth, d as weekPartition, i as inferSearchType, l as quarterPartition, n as dayPartition, o as monthPartition, s as objectKey } from "./storage.mjs";
 import { METRIC_EXPR, escapeLike, topLevelPagePredicateSql } from "../sql-fragments.mjs";
-import { buildLogicalPlan } from "gscdump/query/plan";
 import { MS_PER_DAY } from "gscdump";
+import { buildLogicalPlan } from "gscdump/query/plan";
 const DAILY_PARTITION_RE = /^daily\/(\d{4}-\d{2}-\d{2})$/;
 const WEEKLY_PARTITION_RE = /^weekly\/(\d{4}-\d{2}-\d{2})$/;
 const MONTHLY_PARTITION_RE = /^monthly\/(\d{4}-\d{2})$/;
@@ -11,6 +11,10 @@ const DEFAULT_THRESHOLDS = {
 	d7: 30,
 	d30: 90
 };
+const RAW_DAILY_COMPACT_THRESHOLD = 7;
+function countRawDailies(entries) {
+	return entries.filter((e) => e.tier === "raw" || e.tier == null && e.partition.startsWith("daily/")).length;
+}
 const PENDING_WINDOW_DAYS = 4;
 const STAGES = [
 	{
@@ -285,4 +289,4 @@ function substituteNamedFiles(sql, sets) {
 	for (const [name, keys] of Object.entries(sets)) out = out.replace(new RegExp(`\\{\\{${name}\\}\\}`, "g"), fileList(keys));
 	return out;
 }
-export { compactTieredImpl as a, substituteNamedFiles as i, compileLogicalQueryPlan as n, enumeratePartitions as o, resolveToSQL as r, FILES_PLACEHOLDER as t };
+export { RAW_DAILY_COMPACT_THRESHOLD as a, enumeratePartitions as c, substituteNamedFiles as i, compileLogicalQueryPlan as n, compactTieredImpl as o, resolveToSQL as r, countRawDailies as s, FILES_PLACEHOLDER as t };

package/dist/_chunks/engine.mjs CHANGED Viewed

@@ -1,6 +1,6 @@
 import { r as currentSchemaVersion, t as SCHEMAS } from "./schema.mjs";
 import { i as inferSearchType, n as dayPartition, s as objectKey, u as tenantPrefix } from "./storage.mjs";
-import { a as compactTieredImpl, i as substituteNamedFiles, n as compileLogicalQueryPlan } from "./compiler.mjs";
+import { i as substituteNamedFiles, n as compileLogicalQueryPlan, o as compactTieredImpl } from "./compiler.mjs";
 import { sqlEscape } from "../sql-bind.mjs";
 import { buildLogicalPlan } from "gscdump/query/plan";
 import { normalizeUrl } from "gscdump/normalize";

package/dist/_chunks/resolver.mjs CHANGED Viewed

@@ -1,7 +1,9 @@
 import { t as SCHEMAS, u as drizzleSchema } from "./schema.mjs";
+import { c as enumeratePartitions } from "./compiler.mjs";
 import { escapeLike } from "../sql-fragments.mjs";
-import { buildLogicalComparisonPlan, buildLogicalPlan } from "gscdump/query/plan";
+import "../planner.mjs";
 import { PgDialect } from "drizzle-orm/pg-core";
+import { buildLogicalComparisonPlan, buildLogicalPlan } from "gscdump/query/plan";
 import { normalizeUrl } from "gscdump/normalize";
 import { sql } from "drizzle-orm";
 const DIMENSION_SURFACES = {
@@ -756,6 +758,93 @@ function createParquetResolverAdapter() {
 		tableRef: (tk) => sql.raw(`read_parquet({{FILES}}, union_by_name = true) AS "${tk}"`)
 	});
 }
+function runArgs(ctx, partitions) {
+	return {
+		ctx: {
+			userId: ctx.userId,
+			siteId: ctx.siteId
+		},
+		table: ctx.table,
+		fileSets: { FILES: {
+			table: ctx.table,
+			partitions
+		} },
+		...ctx.searchType !== void 0 ? { searchType: ctx.searchType } : {}
+	};
+}
+async function runOptimizedQuery(runSQL, ctx, state, dateRange) {
+	const adapter = createParquetResolverAdapter();
+	const base = runArgs(ctx, enumeratePartitions(dateRange.startDate, dateRange.endDate));
+	const optimized = resolveToSQLOptimized(state, {
+		adapter,
+		siteId: void 0
+	});
+	const extras = buildExtrasQueries(state, {
+		adapter,
+		siteId: void 0
+	});
+	const [optRes, ...extrasRows] = await Promise.all([runSQL({
+		...base,
+		sql: optimized.sql,
+		params: optimized.params
+	}), ...extras.map((e) => runSQL({
+		...base,
+		sql: e.sql,
+		params: e.params
+	}))]);
+	const firstRow = optRes.rows[0];
+	const totalCount = Number(firstRow?.totalCount ?? 0);
+	const totals = {
+		clicks: Number(firstRow?.totalClicks ?? 0),
+		impressions: Number(firstRow?.totalImpressions ?? 0),
+		ctr: Number(firstRow?.totalCtr ?? 0),
+		position: Number(firstRow?.totalPosition ?? 0)
+	};
+	return {
+		rows: optRes.rows.map((r) => {
+			const { totalCount: _tc, totalClicks: _tcl, totalImpressions: _ti, totalCtr: _tr, totalPosition: _tp, ...rest } = r;
+			return rest;
+		}),
+		totalCount,
+		totals,
+		extras: extras.map((e, i) => ({
+			key: e.key,
+			rows: extrasRows[i].rows
+		}))
+	};
+}
+async function runComparisonQuery(runSQL, ctx, current, previous, windows, filter) {
+	const adapter = createParquetResolverAdapter();
+	const comparison = resolveComparisonSQL(current, previous, {
+		adapter,
+		siteId: void 0
+	}, filter);
+	const totals = buildTotalsSql(current, {
+		adapter,
+		siteId: void 0
+	});
+	const base = runArgs(ctx, enumeratePartitions(windows.current.startDate < windows.previous.startDate ? windows.current.startDate : windows.previous.startDate, windows.current.endDate > windows.previous.endDate ? windows.current.endDate : windows.previous.endDate));
+	const main = await runSQL({
+		...base,
+		sql: comparison.sql,
+		params: comparison.params
+	});
+	const count = await runSQL({
+		...base,
+		sql: comparison.countSql,
+		params: comparison.countParams
+	});
+	const totalsRow = await runSQL({
+		...base,
+		sql: totals.sql,
+		params: totals.params
+	});
+	return {
+		rows: main.rows,
+		totalCount: Number(count.rows[0]?.total ?? 0),
+		totals: totalsRow.rows[0] ?? {}
+	};
+}
 function assertSchemaInSync(options) {
 	const { label, schema, tableKeyToName, mode } = options;
 	for (const [key, table] of Object.entries(schema)) {
@@ -766,4 +855,4 @@ function assertSchemaInSync(options) {
 		if (missing.length > 0 || extra.length > 0) throw new Error(`${label} drizzle schema for '${key}' drifted from SCHEMAS. Missing: [${missing.join(", ")}]. Extra: [${extra.join(", ")}].`);
 	}
 }
-export { dimensionColumn as C, assertDimensionsSupported as S, supportsDimensionOnSurface as T, resolveToSQLOptimized as _, getDimensionFilters as a, DIMENSION_SURFACES as b, matchesDimensionFilter as c, metricValue as d, buildExtrasQueries as f, resolveToSQL as g, resolveComparisonSQL as h, dimensionValue as i, matchesMetricFilter as l, mergeExtras as m, createParquetResolverAdapter as n, getFilterDimensions as o, buildTotalsSql as p, pgResolverAdapter as r, getInternalFilters as s, assertSchemaInSync as t, matchesTopLevelPage as u, createResolverAdapter as v, inferLogicalDataset as w, LOGICAL_DATASETS as x, createSqlFragments as y };
+export { LOGICAL_DATASETS as C, supportsDimensionOnSurface as D, inferLogicalDataset as E, DIMENSION_SURFACES as S, dimensionColumn as T, resolveComparisonSQL as _, pgResolverAdapter as a, createResolverAdapter as b, getFilterDimensions as c, matchesMetricFilter as d, matchesTopLevelPage as f, mergeExtras as g, buildTotalsSql as h, createParquetResolverAdapter as i, getInternalFilters as l, buildExtrasQueries as m, runComparisonQuery as n, dimensionValue as o, metricValue as p, runOptimizedQuery as r, getDimensionFilters as s, assertSchemaInSync as t, matchesDimensionFilter as u, resolveToSQL as v, assertDimensionsSupported as w, createSqlFragments as x, resolveToSQLOptimized as y };

package/dist/_chunks/storage.d.mts CHANGED Viewed

@@ -11,6 +11,11 @@ interface CompactionThresholds {
   d7?: number;
   d30?: number;
 }
+declare const RAW_DAILY_COMPACT_THRESHOLD = 7;
+declare function countRawDailies(entries: ReadonlyArray<{
+  tier?: string | null;
+  partition: string;
+}>): number;
 declare function enumeratePartitions(startDate: string, endDate: string): string[];
 /**
  * Default `searchType` for entries written before the field landed and for
@@ -468,4 +473,4 @@ interface EngineOptions {
 }
 declare function dayPartition(date: string): string;
 declare function objectKey(ctx: TenantCtx, table: TableName, partition: string, version: number, searchType?: SearchType): string;
-export { SyncStateScope as A, inferSearchType as B, RunSQLOptions as C, SyncStateDetail as D, SyncState as E, WatermarkScope as F, CompactionThresholds as H, WriteCtx as I, WriteResult as L, TenantCtx$1 as M, Watermark as N, SyncStateFilter as O, WatermarkFilter as P, dayPartition as R, Row$1 as S, StorageEngine as T, enumeratePartitions as U, objectKey as V, QueryCtx as _, EngineOptions as a, QueryExecutor as b, ListLiveFilter as c, ManifestPurgeResult as d, ManifestStore as f, PurgeUrlsResult as g, PurgeResult as h, DataSource as i, TableName$1 as j, SyncStateKind as k, LockScope as l, PurgeFilter as m, CompactionTier as n, FileSetRef as o, ParquetCodec as p, DEFAULT_SEARCH_TYPE as r, GcCtx as s, CodecCtx as t, ManifestEntry as u, QueryExecuteOptions as v, SearchType$1 as w, QueryResult as x, QueryExecuteResult as y, inferLegacyTier as z };
+export { SyncStateScope as A, inferSearchType as B, RunSQLOptions as C, SyncStateDetail as D, SyncState as E, WatermarkScope as F, enumeratePartitions as G, CompactionThresholds as H, WriteCtx as I, WriteResult as L, TenantCtx$1 as M, Watermark as N, SyncStateFilter as O, WatermarkFilter as P, dayPartition as R, Row$1 as S, StorageEngine as T, RAW_DAILY_COMPACT_THRESHOLD as U, objectKey as V, countRawDailies as W, QueryCtx as _, EngineOptions as a, QueryExecutor as b, ListLiveFilter as c, ManifestPurgeResult as d, ManifestStore as f, PurgeUrlsResult as g, PurgeResult as h, DataSource as i, TableName$1 as j, SyncStateKind as k, LockScope as l, PurgeFilter as m, CompactionTier as n, FileSetRef as o, ParquetCodec as p, DEFAULT_SEARCH_TYPE as r, GcCtx as s, CodecCtx as t, ManifestEntry as u, QueryExecuteOptions as v, SearchType$1 as w, QueryResult as x, QueryExecuteResult as y, inferLegacyTier as z };

package/dist/index.d.mts CHANGED Viewed

@@ -1,13 +1,116 @@
-import { A as SyncStateScope, B as inferSearchType, C as RunSQLOptions, D as SyncStateDetail, E as SyncState, F as WatermarkScope, H as CompactionThresholds, I as WriteCtx, L as WriteResult, M as TenantCtx, N as Watermark, O as SyncStateFilter, P as WatermarkFilter, R as dayPartition, S as Row, T as StorageEngine, U as enumeratePartitions, V as objectKey, _ as QueryCtx, a as EngineOptions, b as QueryExecutor, c as ListLiveFilter, d as ManifestPurgeResult, f as ManifestStore, g as PurgeUrlsResult, h as PurgeResult, i as DataSource, j as TableName, k as SyncStateKind, l as LockScope, m as PurgeFilter, n as CompactionTier, o as FileSetRef, p as ParquetCodec, r as DEFAULT_SEARCH_TYPE, s as GcCtx, t as CodecCtx, u as ManifestEntry, v as QueryExecuteOptions, w as SearchType, x as QueryResult, y as QueryExecuteResult, z as inferLegacyTier } from "./_chunks/storage.mjs";
+import { A as SyncStateScope, B as inferSearchType, C as RunSQLOptions, D as SyncStateDetail, E as SyncState, F as WatermarkScope, G as enumeratePartitions, H as CompactionThresholds, I as WriteCtx, L as WriteResult, M as TenantCtx, N as Watermark, O as SyncStateFilter, P as WatermarkFilter, R as dayPartition, S as Row, T as StorageEngine, U as RAW_DAILY_COMPACT_THRESHOLD, V as objectKey, W as countRawDailies, _ as QueryCtx, a as EngineOptions, b as QueryExecutor, c as ListLiveFilter, d as ManifestPurgeResult, f as ManifestStore, g as PurgeUrlsResult, h as PurgeResult, i as DataSource, j as TableName, k as SyncStateKind, l as LockScope, m as PurgeFilter, n as CompactionTier, o as FileSetRef, p as ParquetCodec, r as DEFAULT_SEARCH_TYPE, s as GcCtx, t as CodecCtx, u as ManifestEntry, v as QueryExecuteOptions, w as SearchType, x as QueryResult, y as QueryExecuteResult, z as inferLegacyTier } from "./_chunks/storage.mjs";
 import { a as createDuckDBExecutor, i as createDuckDBCodec, n as DuckDBHandle, r as canonicalEmptyParquetSchema, t as DuckDBFactory } from "./_chunks/duckdb.mjs";
 import { _ as pages, a as allTables, c as inferTable, d as TABLE_METADATA, f as countries, g as page_keywords, h as keywords, i as TableSchema, m as drizzleSchema, n as ColumnType, o as currentSchemaVersion, p as devices, r as SCHEMAS, s as dimensionToColumn, t as ColumnDef, u as DrizzleSchema } from "./_chunks/schema.mjs";
 import { InspectionVerdict, SchedulePolicy, ScheduleState, fixedPolicy, inspectionPolicy, sitemapPolicy } from "./schedule.mjs";
 import { GscApiRow, IngestOptions, RowAccumulator, RowAccumulatorOptions, createRowAccumulator, toPath, toSumPosition, transformGscRow } from "./ingest.mjs";
 import { a as substituteNamedFiles, i as resolveToSQL, n as ResolvedQuery, t as FILES_PLACEHOLDER } from "./_chunks/planner.mjs";
 import { bindLiterals, formatLiteral } from "./sql-bind.mjs";
-import { Row as Row$1 } from "@gscdump/contracts";
+import { Row as Row$1, TableName as TableName$1 } from "@gscdump/contracts";
 declare function coerceRow(row: Row$1): Row$1;
 declare function coerceRows(rows: readonly Row$1[]): Row$1[];
 declare const MAX_DAY_BYTES: number;
 declare function createStorageEngine(opts: EngineOptions): StorageEngine;
-export { type CodecCtx, type ColumnDef, type ColumnType, type CompactionThresholds, type CompactionTier, DEFAULT_SEARCH_TYPE, type DataSource, type DrizzleSchema, type DuckDBFactory, type DuckDBHandle, type EngineOptions, FILES_PLACEHOLDER, type FileSetRef, type GcCtx, type GscApiRow, type IngestOptions, type InspectionVerdict, type ListLiveFilter, type LockScope, MAX_DAY_BYTES, type ManifestEntry, type ManifestPurgeResult, type ManifestStore, type ParquetCodec, type PurgeFilter, type PurgeResult, type PurgeUrlsResult, type QueryCtx, type QueryExecuteOptions, type QueryExecuteResult, type QueryExecutor, type QueryResult, type ResolvedQuery, type Row, type RowAccumulator, type RowAccumulatorOptions, type RunSQLOptions, SCHEMAS, type SchedulePolicy, type ScheduleState, type SearchType, type StorageEngine, type SyncState, type SyncStateDetail, type SyncStateFilter, type SyncStateKind, type SyncStateScope, TABLE_METADATA, type TableName, type TableSchema, type TenantCtx, type Watermark, type WatermarkFilter, type WatermarkScope, type WriteCtx, type WriteResult, allTables, bindLiterals, canonicalEmptyParquetSchema, coerceRow, coerceRows, countries, createDuckDBCodec, createDuckDBExecutor, createRowAccumulator, createStorageEngine, currentSchemaVersion, dayPartition, devices, dimensionToColumn, drizzleSchema, enumeratePartitions, fixedPolicy, formatLiteral, inferLegacyTier, inferSearchType, inferTable, inspectionPolicy, keywords, objectKey, page_keywords, pages, resolveToSQL, sitemapPolicy, substituteNamedFiles, toPath, toSumPosition, transformGscRow };
+interface IngestAccumulatorEngine {
+  writeDay: (scope: TenantCtx & {
+    table: TableName$1;
+    date: string;
+    searchType?: SearchType;
+  }, rows: Row$1[]) => Promise<void>;
+  setSyncState: (scope: TenantCtx & {
+    table: TableName$1;
+    date: string;
+    searchType?: SearchType;
+  }, state: 'done' | 'failed', info?: {
+    error?: string;
+  }) => Promise<void>;
+}
+interface IngestAccumulatorCtx {
+  userId: string | number;
+  siteId: string;
+  searchType?: SearchType;
+}
+interface IngestAccumulatorHooks {
+  /**
+   * Called once per (table, date) when the job must abandon in-memory rows
+   *  (overflow or `hasMore` continuation). Host queues a forced re-sync from
+   *  the source. Return true iff a recovery job was actually queued.
+   */
+  onRecover: (table: TableName$1, date: string) => Promise<boolean>;
+  /**
+   * Called when an engine.writeDay fails or recovery itself errors. Host
+   *  logs to its error sink (e.g. `r2_write_errors` D1 table).
+   */
+  onWriteError: (info: {
+    table: TableName$1 | null;
+    date: string | null;
+    error: unknown;
+  }) => Promise<void>;
+  /**
+   * Called after a successful writeDay for a (table, date). Host typically
+   *  busts the manifest cache here so the next read sees the new parquet.
+   */
+  onWritten?: (info: {
+    table: TableName$1;
+    date: string;
+    rowCount: number;
+  }) => void | Promise<void>;
+  /**
+   * Called once at end of `finalize`, only when at least one (table, date)
+   *  actually landed. Host queues rollup rebuild + compaction.
+   */
+  onJobComplete?: (info: {
+    flushed: number;
+    rowsWritten: number;
+  }) => Promise<void>;
+}
+interface FinalizeOptions {
+  /**
+   * The GSC `hasMore` flag for the whole job. When true, in-memory buckets
+   *  only reflect this job's slice; we re-queue forced single-day re-syncs
+   *  via `onRecover` so R2 stays authoritative.
+   */
+  hasMore: boolean;
+}
+interface FinalizeResult {
+  flushed: number;
+  recovered: number;
+  failed: number;
+  rowsWritten: number;
+}
+interface IngestAccumulator {
+  push: (table: TableName$1, rows: readonly GscApiRow[]) => boolean;
+  finalize: (opts: FinalizeOptions) => Promise<FinalizeResult>;
+}
+interface CreateIngestAccumulatorOptions extends RowAccumulatorOptions {
+  engine: IngestAccumulatorEngine;
+  ctx: IngestAccumulatorCtx;
+  hooks: IngestAccumulatorHooks;
+}
+declare function createNoopIngestAccumulator(): IngestAccumulator;
+declare function createIngestAccumulator(opts: CreateIngestAccumulatorOptions): IngestAccumulator;
+type SyncTableName = Extract<TableName$1, 'pages' | 'keywords' | 'countries' | 'devices' | 'page_keywords'>;
+declare const TABLES_BY_SEARCH_TYPE: Record<SearchType, readonly SyncTableName[]>;
+declare function parseEnabledSearchTypes(raw: string | null | undefined): SearchType[];
+declare function validateEnabledSearchTypes(value: unknown): SearchType[];
+declare const TABLE_TIERS: {
+  readonly pages: "critical";
+  readonly keywords: "critical";
+  readonly countries: "standard";
+  readonly devices: "standard";
+  readonly page_keywords: "extended";
+};
+type TieredTableName = keyof typeof TABLE_TIERS;
+type TableTier = 'critical' | 'standard' | 'extended';
+type DateWeight = 'fresh' | 'recent' | 'historical';
+declare function getTableTier(table: string): TableTier;
+declare function getTablesForTier(tier: TableTier): TieredTableName[];
+declare function getDateWeight(date: string, now?: Date): DateWeight;
+declare const TIER_PRIORITY: Record<TableTier, number>;
+declare const WEIGHT_PRIORITY: Record<DateWeight, number>;
+declare const MAX_GSC_PAGES_R2 = 40;
+declare const ROW_LIMIT_R2 = 10000;
+declare const MIN_SYNC_IMPRESSIONS = 1;
+declare const MIN_COUNTRY_IMPRESSIONS = 10;
+declare const MAX_SITEMAP_URLS_PER_SITE = 50000;
+declare const MAX_TRACKED_URLS_PER_SITE = 200000;
+export { type CodecCtx, type ColumnDef, type ColumnType, type CompactionThresholds, type CompactionTier, type CreateIngestAccumulatorOptions, DEFAULT_SEARCH_TYPE, type DataSource, type DateWeight, type DrizzleSchema, type DuckDBFactory, type DuckDBHandle, type EngineOptions, FILES_PLACEHOLDER, type FileSetRef, type FinalizeOptions, type FinalizeResult, type GcCtx, type GscApiRow, type IngestAccumulator, type IngestAccumulatorCtx, type IngestAccumulatorEngine, type IngestAccumulatorHooks, type IngestOptions, type InspectionVerdict, type ListLiveFilter, type LockScope, MAX_DAY_BYTES, MAX_GSC_PAGES_R2, MAX_SITEMAP_URLS_PER_SITE, MAX_TRACKED_URLS_PER_SITE, MIN_COUNTRY_IMPRESSIONS, MIN_SYNC_IMPRESSIONS, type ManifestEntry, type ManifestPurgeResult, type ManifestStore, type ParquetCodec, type PurgeFilter, type PurgeResult, type PurgeUrlsResult, type QueryCtx, type QueryExecuteOptions, type QueryExecuteResult, type QueryExecutor, type QueryResult, RAW_DAILY_COMPACT_THRESHOLD, ROW_LIMIT_R2, type ResolvedQuery, type Row, type RowAccumulator, type RowAccumulatorOptions, type RunSQLOptions, SCHEMAS, type SchedulePolicy, type ScheduleState, type SearchType, type StorageEngine, type SyncState, type SyncStateDetail, type SyncStateFilter, type SyncStateKind, type SyncStateScope, type SyncTableName, TABLES_BY_SEARCH_TYPE, TABLE_METADATA, TABLE_TIERS, TIER_PRIORITY, type TableName, type TableSchema, type TableTier, type TenantCtx, type TieredTableName, WEIGHT_PRIORITY, type Watermark, type WatermarkFilter, type WatermarkScope, type WriteCtx, type WriteResult, allTables, bindLiterals, canonicalEmptyParquetSchema, coerceRow, coerceRows, countRawDailies, countries, createDuckDBCodec, createDuckDBExecutor, createIngestAccumulator, createNoopIngestAccumulator, createRowAccumulator, createStorageEngine, currentSchemaVersion, dayPartition, devices, dimensionToColumn, drizzleSchema, enumeratePartitions, fixedPolicy, formatLiteral, getDateWeight, getTableTier, getTablesForTier, inferLegacyTier, inferSearchType, inferTable, inspectionPolicy, keywords, objectKey, page_keywords, pages, parseEnabledSearchTypes, resolveToSQL, sitemapPolicy, substituteNamedFiles, toPath, toSumPosition, transformGscRow, validateEnabledSearchTypes };

package/dist/index.mjs CHANGED Viewed

@@ -1,10 +1,217 @@
 import { n as coerceRows, t as coerceRow } from "./_chunks/coerce.mjs";
 import { a as inferTable, c as countries, d as keywords, f as page_keywords, i as dimensionToColumn, l as devices, n as allTables, p as pages, r as currentSchemaVersion, s as TABLE_METADATA, t as SCHEMAS, u as drizzleSchema } from "./_chunks/schema.mjs";
 import { i as inferSearchType, n as dayPartition, r as inferLegacyTier, s as objectKey, t as DEFAULT_SEARCH_TYPE } from "./_chunks/storage.mjs";
-import { i as substituteNamedFiles, o as enumeratePartitions, r as resolveToSQL, t as FILES_PLACEHOLDER } from "./_chunks/compiler.mjs";
+import { a as RAW_DAILY_COMPACT_THRESHOLD, c as enumeratePartitions, i as substituteNamedFiles, r as resolveToSQL, s as countRawDailies, t as FILES_PLACEHOLDER } from "./_chunks/compiler.mjs";
 import { bindLiterals, formatLiteral } from "./sql-bind.mjs";
 import { a as createDuckDBExecutor, i as createDuckDBCodec, n as createStorageEngine, r as canonicalEmptyParquetSchema, t as MAX_DAY_BYTES } from "./_chunks/engine.mjs";
 import { createRowAccumulator, toPath, toSumPosition, transformGscRow } from "./ingest.mjs";
 import "./planner.mjs";
 import { fixedPolicy, inspectionPolicy, sitemapPolicy } from "./schedule.mjs";
-export { DEFAULT_SEARCH_TYPE, FILES_PLACEHOLDER, MAX_DAY_BYTES, SCHEMAS, TABLE_METADATA, allTables, bindLiterals, canonicalEmptyParquetSchema, coerceRow, coerceRows, countries, createDuckDBCodec, createDuckDBExecutor, createRowAccumulator, createStorageEngine, currentSchemaVersion, dayPartition, devices, dimensionToColumn, drizzleSchema, enumeratePartitions, fixedPolicy, formatLiteral, inferLegacyTier, inferSearchType, inferTable, inspectionPolicy, keywords, objectKey, page_keywords, pages, resolveToSQL, sitemapPolicy, substituteNamedFiles, toPath, toSumPosition, transformGscRow };
+const NOOP_RESULT = {
+	flushed: 0,
+	recovered: 0,
+	failed: 0,
+	rowsWritten: 0
+};
+function scopeOf(ctx, table, date) {
+	return {
+		userId: String(ctx.userId),
+		siteId: ctx.siteId,
+		table,
+		date,
+		...ctx.searchType !== void 0 ? { searchType: ctx.searchType } : {}
+	};
+}
+function createNoopIngestAccumulator() {
+	return {
+		push() {
+			return false;
+		},
+		async finalize() {
+			return NOOP_RESULT;
+		}
+	};
+}
+function createIngestAccumulator(opts) {
+	const { engine, ctx, hooks, ...accOpts } = opts;
+	const acc = createRowAccumulator(accOpts);
+	async function writeOne(table, date, rows) {
+		const scope = scopeOf(ctx, table, date);
+		return engine.writeDay(scope, rows).then(() => engine.setSyncState(scope, "done")).then(async () => {
+			await hooks.onWritten?.({
+				table,
+				date,
+				rowCount: rows.length
+			});
+			return {
+				ok: true,
+				rows: rows.length
+			};
+		}).catch(async (err) => {
+			await hooks.onWriteError({
+				table,
+				date,
+				error: err
+			}).catch(() => {});
+			return { ok: false };
+		});
+	}
+	async function recover(table, date) {
+		const scope = scopeOf(ctx, table, date);
+		await engine.setSyncState(scope, "failed", { error: "mid-continuation-skip" }).catch(() => {});
+		return hooks.onRecover(table, date).catch(async (err) => {
+			await hooks.onWriteError({
+				table,
+				date,
+				error: err
+			}).catch(() => {});
+			return false;
+		});
+	}
+	return {
+		push(table, rows) {
+			return acc.push(table, rows);
+		},
+		async finalize({ hasMore }) {
+			const overflowed = acc.overflowed;
+			const totalRows = acc.totalRows;
+			const buckets = acc.drain();
+			if (overflowed || hasMore) {
+				const tasks = [];
+				for (const [table, byDate] of buckets) for (const date of byDate.keys()) tasks.push(recover(table, date));
+				const results = await Promise.all(tasks).catch(async (err) => {
+					await hooks.onWriteError({
+						table: null,
+						date: null,
+						error: err
+					}).catch(() => {});
+					return [];
+				});
+				if (overflowed) await hooks.onWriteError({
+					table: null,
+					date: null,
+					error: /* @__PURE__ */ new Error(`ingest accumulator overflow at ${totalRows} rows; recovering via forced re-sync`)
+				}).catch(() => {});
+				return {
+					flushed: 0,
+					recovered: results.filter(Boolean).length,
+					failed: 0,
+					rowsWritten: 0
+				};
+			}
+			const writes = [];
+			for (const [table, byDate] of buckets) for (const [date, rows] of byDate) writes.push(writeOne(table, date, rows));
+			const outcomes = await Promise.all(writes);
+			let flushed = 0;
+			let failed = 0;
+			let rowsWritten = 0;
+			for (const o of outcomes) if (o.ok) {
+				flushed++;
+				rowsWritten += o.rows;
+			} else failed++;
+			if (flushed > 0) await hooks.onJobComplete?.({
+				flushed,
+				rowsWritten
+			}).catch(() => {});
+			return {
+				flushed,
+				recovered: 0,
+				failed,
+				rowsWritten
+			};
+		}
+	};
+}
+const TABLES_BY_SEARCH_TYPE = {
+	web: [
+		"pages",
+		"keywords",
+		"countries",
+		"devices",
+		"page_keywords"
+	],
+	discover: [
+		"pages",
+		"countries",
+		"devices"
+	],
+	news: [
+		"pages",
+		"countries",
+		"devices"
+	],
+	googleNews: [
+		"pages",
+		"countries",
+		"devices"
+	],
+	image: [
+		"pages",
+		"countries",
+		"devices"
+	],
+	video: [
+		"pages",
+		"countries",
+		"devices"
+	]
+};
+function parseEnabledSearchTypes(raw) {
+	if (!raw) return ["web"];
+	const parsed = JSON.parse(raw);
+	if (!Array.isArray(parsed) || parsed.length === 0) return ["web"];
+	const valid = parsed.filter((v) => typeof v === "string" && v in TABLES_BY_SEARCH_TYPE);
+	if (valid.length === 0) return ["web"];
+	if (!valid.includes("web")) valid.unshift("web");
+	return valid;
+}
+function validateEnabledSearchTypes(value) {
+	if (!Array.isArray(value) || value.length === 0) throw new Error("enabledSearchTypes must be a non-empty array");
+	const seen = /* @__PURE__ */ new Set();
+	const out = [];
+	for (const v of value) {
+		if (typeof v !== "string" || !(v in TABLES_BY_SEARCH_TYPE)) throw new Error(`enabledSearchTypes: unknown searchType ${String(v)}`);
+		if (seen.has(v)) continue;
+		seen.add(v);
+		out.push(v);
+	}
+	if (!out.includes("web")) throw new Error("enabledSearchTypes must include \"web\"");
+	return out;
+}
+const TABLE_TIERS = {
+	pages: "critical",
+	keywords: "critical",
+	countries: "standard",
+	devices: "standard",
+	page_keywords: "extended"
+};
+function getTableTier(table) {
+	return TABLE_TIERS[table] || "extended";
+}
+function getTablesForTier(tier) {
+	return Object.entries(TABLE_TIERS).filter(([_, t]) => t === tier).map(([name]) => name);
+}
+function getDateWeight(date, now = /* @__PURE__ */ new Date()) {
+	const target = new Date(date);
+	const daysAgo = Math.floor((now.getTime() - target.getTime()) / (1e3 * 60 * 60 * 24));
+	if (daysAgo <= 3) return "fresh";
+	if (daysAgo <= 60) return "recent";
+	return "historical";
+}
+const TIER_PRIORITY = {
+	critical: 0,
+	standard: 1,
+	extended: 2
+};
+const WEIGHT_PRIORITY = {
+	fresh: 0,
+	recent: 1,
+	historical: 2
+};
+const MAX_GSC_PAGES_R2 = 40;
+const ROW_LIMIT_R2 = 1e4;
+const MIN_SYNC_IMPRESSIONS = 1;
+const MIN_COUNTRY_IMPRESSIONS = 10;
+const MAX_SITEMAP_URLS_PER_SITE = 5e4;
+const MAX_TRACKED_URLS_PER_SITE = 2e5;
+export { DEFAULT_SEARCH_TYPE, FILES_PLACEHOLDER, MAX_DAY_BYTES, MAX_GSC_PAGES_R2, MAX_SITEMAP_URLS_PER_SITE, MAX_TRACKED_URLS_PER_SITE, MIN_COUNTRY_IMPRESSIONS, MIN_SYNC_IMPRESSIONS, RAW_DAILY_COMPACT_THRESHOLD, ROW_LIMIT_R2, SCHEMAS, TABLES_BY_SEARCH_TYPE, TABLE_METADATA, TABLE_TIERS, TIER_PRIORITY, WEIGHT_PRIORITY, allTables, bindLiterals, canonicalEmptyParquetSchema, coerceRow, coerceRows, countRawDailies, countries, createDuckDBCodec, createDuckDBExecutor, createIngestAccumulator, createNoopIngestAccumulator, createRowAccumulator, createStorageEngine, currentSchemaVersion, dayPartition, devices, dimensionToColumn, drizzleSchema, enumeratePartitions, fixedPolicy, formatLiteral, getDateWeight, getTableTier, getTablesForTier, inferLegacyTier, inferSearchType, inferTable, inspectionPolicy, keywords, objectKey, page_keywords, pages, parseEnabledSearchTypes, resolveToSQL, sitemapPolicy, substituteNamedFiles, toPath, toSumPosition, transformGscRow, validateEnabledSearchTypes };

package/dist/planner.d.mts CHANGED Viewed

@@ -1,3 +1,3 @@
-import { U as enumeratePartitions } from "./_chunks/storage.mjs";
+import { G as enumeratePartitions } from "./_chunks/storage.mjs";
 import { a as substituteNamedFiles, i as resolveToSQL, n as ResolvedQuery, r as compileLogicalQueryPlan, t as FILES_PLACEHOLDER } from "./_chunks/planner.mjs";
 export { FILES_PLACEHOLDER, ResolvedQuery, compileLogicalQueryPlan, enumeratePartitions, resolveToSQL, substituteNamedFiles };

package/dist/planner.mjs CHANGED Viewed

@@ -1,2 +1,2 @@
-import { i as substituteNamedFiles, n as compileLogicalQueryPlan, o as enumeratePartitions, r as resolveToSQL, t as FILES_PLACEHOLDER } from "./_chunks/compiler.mjs";
+import { c as enumeratePartitions, i as substituteNamedFiles, n as compileLogicalQueryPlan, r as resolveToSQL, t as FILES_PLACEHOLDER } from "./_chunks/compiler.mjs";
 export { FILES_PLACEHOLDER, compileLogicalQueryPlan, enumeratePartitions, resolveToSQL, substituteNamedFiles };

package/dist/resolver/index.d.mts CHANGED Viewed

@@ -1,3 +1,4 @@
+import { j as TableName$1, w as SearchType$1 } from "../_chunks/storage.mjs";
 import { a as ResolvedSQLOptimized, i as ResolvedSQL, n as ExtraQuery, o as ResolverAdapter, r as ResolvedComparisonSQL, s as ResolverOptions, t as ComparisonFilter } from "../_chunks/types.mjs";
 import { LogicalDataset, LogicalDataset as LogicalDataset$1, PlannerCapabilities } from "gscdump/query/plan";
 import { SQL } from "drizzle-orm";
@@ -96,6 +97,63 @@ declare const pgResolverAdapter: ResolverAdapter<PgTableKey>;
  * accidental adapter caching that would lock in a stale `{{FILES}}` set.
  */
 declare function createParquetResolverAdapter(): ResolverAdapter<PgTableKey>;
+interface RunQueryCtx {
+  userId: string;
+  siteId: string;
+  table: TableName$1;
+  searchType?: SearchType$1;
+}
+interface RunSQLFn {
+  (opts: {
+    ctx: {
+      userId: string;
+      siteId: string;
+    };
+    table: TableName$1;
+    fileSets: Record<string, {
+      table: TableName$1;
+      partitions: string[];
+    }>;
+    sql: string;
+    params: unknown[];
+    searchType?: SearchType$1;
+  }): Promise<{
+    rows: Array<Record<string, unknown>>;
+  }>;
+}
+interface OptimizedQueryResult {
+  rows: Array<Record<string, unknown>>;
+  totalCount: number;
+  totals: {
+    clicks: number;
+    impressions: number;
+    ctr: number;
+    position: number;
+  };
+  extras: Array<{
+    key: string;
+    rows: Array<Record<string, unknown>>;
+  }>;
+}
+interface ComparisonQueryResult {
+  rows: Array<Record<string, unknown>>;
+  totalCount: number;
+  totals: Record<string, unknown>;
+}
+declare function runOptimizedQuery(runSQL: RunSQLFn, ctx: RunQueryCtx, state: BuilderState, dateRange: {
+  startDate: string;
+  endDate: string;
+}): Promise<OptimizedQueryResult>;
+declare function runComparisonQuery(runSQL: RunSQLFn, ctx: RunQueryCtx, current: BuilderState, previous: BuilderState, windows: {
+  current: {
+    startDate: string;
+    endDate: string;
+  };
+  previous: {
+    startDate: string;
+    endDate: string;
+  };
+}, filter?: ComparisonFilter): Promise<ComparisonQueryResult>;
 interface AssertSchemaInSyncOptions {
   /** Label used in the thrown error (e.g. 'browser', 'sqlite'). */
   label: string;
@@ -109,4 +167,4 @@ interface AssertSchemaInSyncOptions {
   mode: 'exact' | 'superset';
 }
 declare function assertSchemaInSync(options: AssertSchemaInSyncOptions): void;
-export { type AssertSchemaInSyncOptions, type ComparisonFilter, type CreateResolverAdapterConfig, DIMENSION_SURFACES, type DimensionBinding, type DimensionSurface, type ExtraQuery, LOGICAL_DATASETS, type LogicalDataset, type LogicalDatasetDefinition, type PgTableKey, type ResolvedComparisonSQL, type ResolvedSQL, type ResolvedSQLOptimized, type ResolverAdapter, type ResolverOptions, type SqlFragments, type SqlFragmentsConfig, assertDimensionsSupported, assertSchemaInSync, buildExtrasQueries, buildTotalsSql, createParquetResolverAdapter, createResolverAdapter, createSqlFragments, dimensionColumn, dimensionValue, getDimensionFilters, getFilterDimensions, getInternalFilters, inferLogicalDataset, matchesDimensionFilter, matchesMetricFilter, matchesTopLevelPage, mergeExtras, metricValue, pgResolverAdapter, resolveComparisonSQL, resolveToSQL, resolveToSQLOptimized, supportsDimensionOnSurface };
+export { type AssertSchemaInSyncOptions, type ComparisonFilter, type ComparisonQueryResult, type CreateResolverAdapterConfig, DIMENSION_SURFACES, type DimensionBinding, type DimensionSurface, type ExtraQuery, LOGICAL_DATASETS, type LogicalDataset, type LogicalDatasetDefinition, type OptimizedQueryResult, type PgTableKey, type ResolvedComparisonSQL, type ResolvedSQL, type ResolvedSQLOptimized, type ResolverAdapter, type ResolverOptions, type RunQueryCtx, type RunSQLFn, type SqlFragments, type SqlFragmentsConfig, assertDimensionsSupported, assertSchemaInSync, buildExtrasQueries, buildTotalsSql, createParquetResolverAdapter, createResolverAdapter, createSqlFragments, dimensionColumn, dimensionValue, getDimensionFilters, getFilterDimensions, getInternalFilters, inferLogicalDataset, matchesDimensionFilter, matchesMetricFilter, matchesTopLevelPage, mergeExtras, metricValue, pgResolverAdapter, resolveComparisonSQL, resolveToSQL, resolveToSQLOptimized, runComparisonQuery, runOptimizedQuery, supportsDimensionOnSurface };

package/dist/resolver/index.mjs CHANGED Viewed

@@ -1,2 +1,2 @@
-import { C as dimensionColumn, S as assertDimensionsSupported, T as supportsDimensionOnSurface, _ as resolveToSQLOptimized, a as getDimensionFilters, b as DIMENSION_SURFACES, c as matchesDimensionFilter, d as metricValue, f as buildExtrasQueries, g as resolveToSQL, h as resolveComparisonSQL, i as dimensionValue, l as matchesMetricFilter, m as mergeExtras, n as createParquetResolverAdapter, o as getFilterDimensions, p as buildTotalsSql, r as pgResolverAdapter, s as getInternalFilters, t as assertSchemaInSync, u as matchesTopLevelPage, v as createResolverAdapter, w as inferLogicalDataset, x as LOGICAL_DATASETS, y as createSqlFragments } from "../_chunks/resolver.mjs";
-export { DIMENSION_SURFACES, LOGICAL_DATASETS, assertDimensionsSupported, assertSchemaInSync, buildExtrasQueries, buildTotalsSql, createParquetResolverAdapter, createResolverAdapter, createSqlFragments, dimensionColumn, dimensionValue, getDimensionFilters, getFilterDimensions, getInternalFilters, inferLogicalDataset, matchesDimensionFilter, matchesMetricFilter, matchesTopLevelPage, mergeExtras, metricValue, pgResolverAdapter, resolveComparisonSQL, resolveToSQL, resolveToSQLOptimized, supportsDimensionOnSurface };
+import { C as LOGICAL_DATASETS, D as supportsDimensionOnSurface, E as inferLogicalDataset, S as DIMENSION_SURFACES, T as dimensionColumn, _ as resolveComparisonSQL, a as pgResolverAdapter, b as createResolverAdapter, c as getFilterDimensions, d as matchesMetricFilter, f as matchesTopLevelPage, g as mergeExtras, h as buildTotalsSql, i as createParquetResolverAdapter, l as getInternalFilters, m as buildExtrasQueries, n as runComparisonQuery, o as dimensionValue, p as metricValue, r as runOptimizedQuery, s as getDimensionFilters, t as assertSchemaInSync, u as matchesDimensionFilter, v as resolveToSQL, w as assertDimensionsSupported, x as createSqlFragments, y as resolveToSQLOptimized } from "../_chunks/resolver.mjs";
+export { DIMENSION_SURFACES, LOGICAL_DATASETS, assertDimensionsSupported, assertSchemaInSync, buildExtrasQueries, buildTotalsSql, createParquetResolverAdapter, createResolverAdapter, createSqlFragments, dimensionColumn, dimensionValue, getDimensionFilters, getFilterDimensions, getInternalFilters, inferLogicalDataset, matchesDimensionFilter, matchesMetricFilter, matchesTopLevelPage, mergeExtras, metricValue, pgResolverAdapter, resolveComparisonSQL, resolveToSQL, resolveToSQLOptimized, runComparisonQuery, runOptimizedQuery, supportsDimensionOnSurface };

package/dist/rollups.d.mts CHANGED Viewed

@@ -58,6 +58,14 @@ interface RollupDef {
   parquetColumns?: readonly ColumnDef[];
   /** Sort-key column names for parquet row-group stats. Optional. */
   parquetSortKey?: readonly string[];
+  /**
+   * When true, this rollup's payload is independent of GSC slice (e.g. entity
+   * rollups sourced from sitemap / indexing snapshots, not slice-partitioned
+   * fact tables). The runner rejects calls that pass `searchType` alongside
+   * a slice-orthogonal def so the output never lands under a per-slice prefix
+   * that the read path won't look at.
+   */
+  sliceOrthogonal?: boolean;
   build: (deps: {
     engine: RollupEngine;
     ctx: TenantCtx;
@@ -102,6 +110,19 @@ interface ParquetRollupPointer {
 }
 declare function rollupKey(ctx: TenantCtx, id: string, builtAt: number, searchType?: SearchType): string;
 declare function rollupParquetKey(ctx: TenantCtx, id: string, builtAt: number, searchType?: SearchType): string;
+interface RollupBucket {
+  list: (opts: {
+    prefix: string;
+  }) => Promise<{
+    objects: Array<{
+      key: string;
+    }>;
+  }>;
+  get: (key: string) => Promise<{
+    text: () => Promise<string>;
+  } | null>;
+}
+declare function readLatestRollup<T = unknown>(bucket: RollupBucket, ctx: TenantCtx, id: string, searchType?: SearchType): Promise<RollupEnvelope<T> | null>;
 interface RebuildRollupsOptions {
   engine: RollupEngine;
   dataSource: DataSource;
@@ -215,4 +236,4 @@ declare const sitemapHealthRollup: RollupDef;
  */
 declare const sitemapChanges28dRollup: RollupDef;
 declare const DEFAULT_ROLLUPS: readonly RollupDef[];
-export { DEFAULT_ROLLUPS, ParquetRollupPointer, RebuildRollupResult, RebuildRollupsOptions, RollupCtx, RollupDef, RollupEngine, RollupEnvelope, dailyTotalsRollup, indexPercentRollup, indexingHealthRollup, indexingMetadataRollup, rebuildRollups, rollupKey, rollupParquetKey, sitemapChanges28dRollup, sitemapHealthRollup, topCountries28dRollup, topKeywords28dParquetRollup, topKeywords28dRollup, topPages28dRollup, weeklyTotalsRollup };
+export { DEFAULT_ROLLUPS, ParquetRollupPointer, RebuildRollupResult, RebuildRollupsOptions, RollupBucket, RollupCtx, RollupDef, RollupEngine, RollupEnvelope, dailyTotalsRollup, indexPercentRollup, indexingHealthRollup, indexingMetadataRollup, readLatestRollup, rebuildRollups, rollupKey, rollupParquetKey, sitemapChanges28dRollup, sitemapHealthRollup, topCountries28dRollup, topKeywords28dParquetRollup, topKeywords28dRollup, topPages28dRollup, weeklyTotalsRollup };

package/dist/rollups.mjs CHANGED Viewed

@@ -11,10 +11,33 @@ function rollupKey(ctx, id, builtAt, searchType) {
 function rollupParquetKey(ctx, id, builtAt, searchType) {
 	return `${rollupPrefix(ctx, searchType)}/${id}__v${builtAt}.parquet`;
 }
+const ROLLUP_FILE_RE = /^(?<id>[a-z0-9_]+)__v(?<ts>\d+)\.json$/;
+async function readLatestRollup(bucket, ctx, id, searchType) {
+	const prefix = `${rollupPrefix(ctx, searchType)}/`;
+	const listing = await bucket.list({ prefix }).catch(() => null);
+	if (!listing) return null;
+	let newest = null;
+	for (const obj of listing.objects) {
+		const m = ROLLUP_FILE_RE.exec(obj.key.slice(prefix.length));
+		if (!m?.groups || m.groups.id !== id) continue;
+		const ts = Number(m.groups.ts);
+		if (!newest || ts > newest.ts) newest = {
+			ts,
+			key: obj.key
+		};
+	}
+	if (!newest) return null;
+	const obj = await bucket.get(newest.key).catch(() => null);
+	if (!obj) return null;
+	return JSON.parse(await obj.text());
+}
 async function rebuildRollups(opts) {
 	const now = opts.now ?? (() => Date.now());
 	const results = [];
 	const searchType = opts.searchType;
+	if (searchType !== void 0) {
+		for (const def of opts.defs) if (def.sliceOrthogonal === true) throw new Error(`rollup def '${def.id}' is slice-orthogonal; do not pass searchType`);
+	}
 	for (const def of opts.defs) {
 		const builtAt = now();
 		const payload = await def.build({
@@ -348,6 +371,7 @@ const indexingMetadataRollup = {
 const indexingHealthRollup = {
 	id: "indexing_health",
 	windowDays: 90,
+	sliceOrthogonal: true,
 	async build({ engine, ctx, dataSource, builtAt }) {
 		const key = inspectionParquetKey(ctx);
 		if (!await dataSource.head?.(key)) return { days: [] };
@@ -391,6 +415,7 @@ const indexingHealthRollup = {
 const indexPercentRollup = {
 	id: "index_percent",
 	windowDays: 90,
+	sliceOrthogonal: true,
 	async build({ engine, ctx, dataSource, builtAt, searchType }) {
 		const urlsKey = sitemapUrlsIndexKey(ctx);
 		if (!await dataSource.head?.(urlsKey)) return {
@@ -452,6 +477,7 @@ const indexPercentRollup = {
 const sitemapHealthRollup = {
 	id: "sitemap_health",
 	windowDays: 90,
+	sliceOrthogonal: true,
 	async build({ dataSource, ctx, builtAt }) {
 		const index = await createSitemapStore({ dataSource }).loadIndex(ctx);
 		const records = Object.values(index.records);
@@ -495,6 +521,7 @@ const sitemapHealthRollup = {
 const sitemapChanges28dRollup = {
 	id: "sitemap_changes_28d",
 	windowDays: 28,
+	sliceOrthogonal: true,
 	async build({ dataSource, ctx, builtAt }) {
 		const store = createSitemapStore({ dataSource });
 		const from = utcDateMinusDays(builtAt, 28);
@@ -562,4 +589,4 @@ const DEFAULT_ROLLUPS = [
 	sitemapHealthRollup,
 	sitemapChanges28dRollup
 ];
-export { DEFAULT_ROLLUPS, dailyTotalsRollup, indexPercentRollup, indexingHealthRollup, indexingMetadataRollup, rebuildRollups, rollupKey, rollupParquetKey, sitemapChanges28dRollup, sitemapHealthRollup, topCountries28dRollup, topKeywords28dParquetRollup, topKeywords28dRollup, topPages28dRollup, weeklyTotalsRollup };
+export { DEFAULT_ROLLUPS, dailyTotalsRollup, indexPercentRollup, indexingHealthRollup, indexingMetadataRollup, readLatestRollup, rebuildRollups, rollupKey, rollupParquetKey, sitemapChanges28dRollup, sitemapHealthRollup, topCountries28dRollup, topKeywords28dParquetRollup, topKeywords28dRollup, topPages28dRollup, weeklyTotalsRollup };

package/dist/source/index.mjs CHANGED Viewed

@@ -1,5 +1,5 @@
 import { n as coerceRows } from "../_chunks/coerce.mjs";
-import { S as assertDimensionsSupported, g as resolveToSQL, o as getFilterDimensions, r as pgResolverAdapter } from "../_chunks/resolver.mjs";
+import { a as pgResolverAdapter, c as getFilterDimensions, v as resolveToSQL, w as assertDimensionsSupported } from "../_chunks/resolver.mjs";
 import { n as runAnalyzerFromSource } from "../_chunks/dispatch.mjs";
 var AttachedTableMissingError = class extends Error {
 	missing;

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@gscdump/engine",
   "type": "module",
-  "version": "0.17.3",
+  "version": "0.17.5",
   "description": "Append-only Parquet/DuckDB storage engine + planner + adapters for the gscdump pipeline. Node + edge runtimes; opt-in heavy peers.",
   "author": {
     "name": "Harlan Wilton",
@@ -169,8 +169,8 @@
   "dependencies": {
     "drizzle-orm": "^0.45.2",
     "proper-lockfile": "^4.1.2",
-    "@gscdump/contracts": "0.17.3",
-    "gscdump": "0.17.3"
+    "@gscdump/contracts": "0.17.5",
+    "gscdump": "0.17.5"
   },
   "devDependencies": {
     "@duckdb/duckdb-wasm": "^1.32.0",