npm - @gscdump/engine - Versions diffs - 0.20.2 → 0.21.0 - Mend

@gscdump/engine 0.20.2 → 0.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

package/dist/_chunks/engine.mjs +1 -1
package/dist/_chunks/iceberg-schema.mjs +67 -0
package/dist/_chunks/registry.d.mts +1 -1
package/dist/_chunks/resolver.mjs +15 -21
package/dist/_chunks/schema.d.mts +452 -133
package/dist/_chunks/schema.mjs +50 -24
package/dist/_chunks/sink.d.mts +329 -0
package/dist/_chunks/storage.d.mts +4 -4
package/dist/adapters/duckdb-node.mjs +2 -2
package/dist/adapters/hyparquet.mjs +5 -5
package/dist/index.d.mts +39 -7
package/dist/index.mjs +272 -14
package/dist/ingest.d.mts +23 -3
package/dist/ingest.mjs +43 -18
package/dist/rollups.d.mts +16 -6
package/dist/rollups.mjs +42 -35
package/dist/schema.d.mts +2 -2
package/dist/schema.mjs +2 -2
package/dist/sink-node.d.mts +31 -0
package/dist/sink-node.mjs +76 -0
package/dist/vendor/hysnappy-purejs.d.mts +29 -0
package/dist/vendor/hysnappy-purejs.mjs +13 -0
package/package.json +14 -3

package/dist/index.mjs CHANGED Viewed

@@ -1,13 +1,132 @@
 import { n as coerceRows, t as coerceRow } from "./_chunks/coerce.mjs";
-import { a as dimensionToColumn, d as devices, f as drizzleSchema, g as pages, h as page_keywords, l as TABLE_METADATA, m as keywords, n as allTables, o as inferTable, p as hourly_pages, r as currentSchemaVersion, t as SCHEMAS, u as countries } from "./_chunks/schema.mjs";
+import { a as dimensionToColumn, d as dates, f as drizzleSchema, g as queries, h as pages, l as TABLE_METADATA, m as page_queries, n as allTables, o as inferTable, p as hourly_pages, r as currentSchemaVersion, t as SCHEMAS, u as countries } from "./_chunks/schema.mjs";
 import { a as inferSearchType, c as objectKey, i as inferLegacyTier, n as dayPartition, r as hourPartition, t as DEFAULT_SEARCH_TYPE } from "./_chunks/storage.mjs";
 import { a as RAW_DAILY_COMPACT_THRESHOLD, c as dedupeOverlappingTiers, i as substituteNamedFiles, l as enumeratePartitions, r as resolveParquetSQL, s as countRawDailies, t as FILES_PLACEHOLDER, u as splitOverlappingTiers } from "./_chunks/parquet-plan.mjs";
 import { bindLiterals, formatLiteral } from "./sql-bind.mjs";
 import { a as createDuckDBCodec, i as canonicalEmptyParquetSchema, n as createStorageEngine, o as createDuckDBExecutor, r as gcOrphansImpl, t as MAX_DAY_BYTES } from "./_chunks/engine.mjs";
-import { createRowAccumulator, toPath, toSumPosition, transformGscRow } from "./ingest.mjs";
+import { a as ICEBERG_TABLES, i as ICEBERG_SCHEMAS, n as ICEBERG_PARTITION_COLUMNS, o as icebergTableSpec, r as ICEBERG_PARTITION_SPEC, t as ICEBERG_FIELD_ID_BASE } from "./_chunks/iceberg-schema.mjs";
+import { assembleDatesRow, createRowAccumulator, toPath, toSumPosition, transformGscRow } from "./ingest.mjs";
 import "./planner.mjs";
 import { rebuildDailyFromHourly } from "./rollups.mjs";
 import { fixedPolicy, inspectionPolicy, sitemapPolicy } from "./schedule.mjs";
+import { icebergAppend, icebergCreateTable, icebergDropTable, restCatalogConnect, restCatalogCreateNamespace, restCatalogListTables, s3SignedResolver } from "icebird";
+const ICEBERG_TYPE_MAP = {
+	STRING: "string",
+	INT: "int",
+	LONG: "long",
+	DOUBLE: "double",
+	DATE: "date"
+};
+function icebergSchemaFor(table) {
+	return {
+		"type": "struct",
+		"schema-id": 0,
+		"fields": ICEBERG_SCHEMAS[table].columns.map((col) => ({
+			id: col.fieldId,
+			name: col.name,
+			required: col.required,
+			type: ICEBERG_TYPE_MAP[col.type]
+		}))
+	};
+}
+function icebergPartitionSpecFor(table) {
+	const fields = ICEBERG_SCHEMAS[table].columns;
+	const fieldId = (name) => {
+		const col = fields.find((c) => c.name === name);
+		if (!col) throw new Error(`iceberg-catalog: table '${table}' has no '${name}' column`);
+		return col.fieldId;
+	};
+	return {
+		"spec-id": 0,
+		"fields": ICEBERG_PARTITION_SPEC.map((p, i) => ({
+			"source-id": fieldId(p.sourceColumn),
+			"field-id": 1e3 + i,
+			"name": p.name,
+			"transform": p.transform
+		}))
+	};
+}
+async function connectIcebergCatalog(config) {
+	return {
+		catalog: await restCatalogConnect({
+			url: config.catalogUri,
+			warehouse: config.warehouse,
+			requestInit: { headers: { Authorization: `Bearer ${config.catalogToken}` } }
+		}),
+		resolver: s3SignedResolver({
+			accessKeyId: config.s3.accessKeyId,
+			secretAccessKey: config.s3.secretAccessKey,
+			region: config.s3.region ?? "auto",
+			endpoint: config.s3.endpoint,
+			pathStyle: true
+		}),
+		namespace: config.namespace
+	};
+}
+function isCommitRateLimited(err) {
+	if (err && typeof err === "object" && err.status === 429) return true;
+	const msg = (err instanceof Error ? err.message : String(err)).toLowerCase();
+	return msg.includes("429") || msg.includes("too many commits") || msg.includes("rate limit");
+}
+function defaultCommitSleep(ms) {
+	return new Promise((resolve) => setTimeout(resolve, ms));
+}
+async function icebergAppendRetrying(args, options = {}) {
+	const maxAttempts = options.maxAttempts ?? 6;
+	const baseDelayMs = options.baseDelayMs ?? 1e3;
+	const maxDelayMs = options.maxDelayMs ?? 2e4;
+	const sleep = options.sleep ?? defaultCommitSleep;
+	const random = options.random ?? Math.random;
+	for (let attempt = 0; attempt < maxAttempts; attempt++) {
+		const err = await icebergAppend(args).then(() => void 0, (e) => e);
+		if (err === void 0) return;
+		if (!isCommitRateLimited(err) || attempt === maxAttempts - 1) throw err;
+		const ceiling = Math.min(maxDelayMs, baseDelayMs * 2 ** attempt);
+		await sleep(Math.floor(random() * ceiling));
+	}
+}
+async function ensureIcebergNamespace(conn) {
+	await restCatalogCreateNamespace(conn.catalog, { namespace: conn.namespace }).catch(() => {});
+}
+async function createIcebergTables(conn, tables = ICEBERG_TABLES) {
+	const results = [];
+	for (const table of tables) await icebergCreateTable({
+		catalog: conn.catalog,
+		namespace: conn.namespace,
+		table,
+		schema: icebergSchemaFor(table),
+		partitionSpec: icebergPartitionSpecFor(table)
+	}).then(() => results.push({
+		table,
+		ok: true
+	}), (e) => results.push({
+		table,
+		ok: false,
+		error: String(e)
+	}));
+	return results;
+}
+async function listIcebergTables(conn) {
+	return restCatalogListTables(conn.catalog, { namespace: conn.namespace }).then((list) => list.map((t) => t.name).sort(), () => []);
+}
+async function dropIcebergTables(conn, tables) {
+	const targets = tables ?? await restCatalogListTables(conn.catalog, { namespace: conn.namespace }).then((list) => list.map((t) => t.name), () => []);
+	const results = [];
+	for (const table of targets) await icebergDropTable({
+		catalog: conn.catalog,
+		namespace: conn.namespace,
+		table,
+		purgeRequested: true
+	}).then(() => results.push({
+		table,
+		ok: true
+	}), (e) => results.push({
+		table,
+		ok: false,
+		error: String(e)
+	}));
+	return results;
+}
 const NOOP_RESULT = {
 	flushed: 0,
 	recovered: 0,
@@ -123,38 +242,177 @@ function createIngestAccumulator(opts) {
 		}
 	};
 }
+const DAY_MILLIS = 864e5;
+function toIcebergDate(value) {
+	if (typeof value === "string") return Math.floor(Date.parse(`${value}T00:00:00Z`) / DAY_MILLIS);
+	if (value instanceof Date) return Math.floor(value.getTime() / DAY_MILLIS);
+	return value;
+}
+function toRecords(slice, rows) {
+	const siteId = slice.ctx.siteId ?? "";
+	return rows.map((row) => ({
+		...row,
+		date: toIcebergDate(row.date),
+		site_id: siteId,
+		search_type: slice.searchType
+	}));
+}
+function createIcebergAppendSink(options) {
+	let connection;
+	const buffers = /* @__PURE__ */ new Map();
+	function connect() {
+		connection ??= connectIcebergCatalog(options.catalog);
+		return connection;
+	}
+	return {
+		capabilities: { appendOnly: true },
+		async emit(slice, rows) {
+			if (rows.length === 0) return { rowCount: 0 };
+			const records = toRecords(slice, rows);
+			const buffer = buffers.get(slice.table);
+			if (buffer) for (let i = 0; i < records.length; i++) buffer.push(records[i]);
+			else buffers.set(slice.table, records);
+			return { rowCount: records.length };
+		},
+		async close() {
+			const flushed = [];
+			const failed = [];
+			if (buffers.size === 0) return {
+				flushed,
+				failed
+			};
+			const conn = await connect().then((c) => c, (err) => {
+				connection = void 0;
+				return { error: String(err) };
+			});
+			if ("error" in conn) {
+				for (const [table, records] of buffers) if (records.length > 0) failed.push({
+					table,
+					error: conn.error
+				});
+				buffers.clear();
+				return {
+					flushed,
+					failed
+				};
+			}
+			for (const [table, records] of buffers) {
+				if (records.length === 0) continue;
+				await icebergAppendRetrying({
+					catalog: conn.catalog,
+					namespace: conn.namespace,
+					table,
+					resolver: conn.resolver,
+					records
+				}, options.commitRetry).then(() => {
+					flushed.push(table);
+				}, (err) => {
+					failed.push({
+						table,
+						error: String(err)
+					});
+				});
+			}
+			buffers.clear();
+			return {
+				flushed,
+				failed
+			};
+		}
+	};
+}
+const KEY_SEP = "\0";
+function partitionKey(slice) {
+	return [
+		slice.table,
+		slice.ctx.siteId ?? "",
+		slice.searchType,
+		slice.date
+	].join(KEY_SEP);
+}
+function tableOfKey(key) {
+	return key.slice(0, key.indexOf(KEY_SEP));
+}
+function withIdentity(slice, rows) {
+	return rows.map((r) => ({
+		...r,
+		site_id: slice.ctx.siteId ?? "",
+		search_type: slice.searchType
+	}));
+}
+function createInMemorySink() {
+	const partitions = /* @__PURE__ */ new Map();
+	let closed = false;
+	function allRows() {
+		return [...partitions.values()].flat();
+	}
+	return {
+		capabilities: { appendOnly: true },
+		async emit(slice, rows) {
+			const key = partitionKey(slice);
+			const stored = withIdentity(slice, rows);
+			const existing = partitions.get(key);
+			if (existing) existing.push(...stored);
+			else partitions.set(key, stored);
+			return { rowCount: stored.length };
+		},
+		async close() {
+			closed = true;
+			return {
+				flushed: [...new Set([...partitions.keys()].map((k) => tableOfKey(k)))],
+				failed: []
+			};
+		},
+		get rows() {
+			return allRows();
+		},
+		get closed() {
+			return closed;
+		},
+		rowsFor(table) {
+			return [...partitions.entries()].filter(([k]) => tableOfKey(k) === table).flatMap(([, v]) => v);
+		},
+		rowsForSlice(slice) {
+			return [...partitions.get(partitionKey(slice)) ?? []];
+		},
+		reset() {
+			partitions.clear();
+			closed = false;
+		}
+	};
+}
 const TABLES_BY_SEARCH_TYPE = {
 	web: [
 		"pages",
-		"keywords",
+		"queries",
 		"countries",
-		"devices",
-		"page_keywords"
+		"page_queries",
+		"dates"
 	],
 	discover: [
 		"pages",
 		"countries",
-		"devices"
+		"dates"
 	],
 	news: [
 		"pages",
 		"countries",
-		"devices"
+		"dates"
 	],
 	googleNews: [
 		"pages",
 		"countries",
-		"devices"
+		"dates"
 	],
 	image: [
 		"pages",
 		"countries",
-		"devices"
+		"dates"
 	],
 	video: [
 		"pages",
 		"countries",
-		"devices"
+		"dates"
 	]
 };
 function parseEnabledSearchTypes(raw) {
@@ -181,10 +439,10 @@ function validateEnabledSearchTypes(value) {
 }
 const TABLE_TIERS = {
 	pages: "critical",
-	keywords: "critical",
+	queries: "critical",
 	countries: "standard",
-	devices: "standard",
-	page_keywords: "extended"
+	dates: "standard",
+	page_queries: "extended"
 };
 function getTableTier(table) {
 	return TABLE_TIERS[table] || "extended";
@@ -215,4 +473,4 @@ const MIN_SYNC_IMPRESSIONS = 1;
 const MIN_COUNTRY_IMPRESSIONS = 10;
 const MAX_SITEMAP_URLS_PER_SITE = 5e4;
 const MAX_TRACKED_URLS_PER_SITE = 2e5;
-export { DEFAULT_SEARCH_TYPE, FILES_PLACEHOLDER, MAX_DAY_BYTES, MAX_GSC_PAGES_R2, MAX_SITEMAP_URLS_PER_SITE, MAX_TRACKED_URLS_PER_SITE, MIN_COUNTRY_IMPRESSIONS, MIN_SYNC_IMPRESSIONS, RAW_DAILY_COMPACT_THRESHOLD, ROW_LIMIT_R2, SCHEMAS, TABLES_BY_SEARCH_TYPE, TABLE_METADATA, TABLE_TIERS, TIER_PRIORITY, WEIGHT_PRIORITY, allTables, bindLiterals, canonicalEmptyParquetSchema, coerceRow, coerceRows, countRawDailies, countries, createDuckDBCodec, createDuckDBExecutor, createIngestAccumulator, createNoopIngestAccumulator, createRowAccumulator, createStorageEngine, currentSchemaVersion, dayPartition, dedupeOverlappingTiers, devices, dimensionToColumn, drizzleSchema, enumeratePartitions, fixedPolicy, formatLiteral, gcOrphansImpl, getDateWeight, getTableTier, getTablesForTier, hourPartition, hourly_pages, inferLegacyTier, inferSearchType, inferTable, inspectionPolicy, keywords, objectKey, page_keywords, pages, parseEnabledSearchTypes, rebuildDailyFromHourly, resolveParquetSQL, sitemapPolicy, splitOverlappingTiers, substituteNamedFiles, toPath, toSumPosition, transformGscRow, validateEnabledSearchTypes };
+export { DEFAULT_SEARCH_TYPE, FILES_PLACEHOLDER, ICEBERG_FIELD_ID_BASE, ICEBERG_PARTITION_COLUMNS, ICEBERG_PARTITION_SPEC, ICEBERG_SCHEMAS, ICEBERG_TABLES, MAX_DAY_BYTES, MAX_GSC_PAGES_R2, MAX_SITEMAP_URLS_PER_SITE, MAX_TRACKED_URLS_PER_SITE, MIN_COUNTRY_IMPRESSIONS, MIN_SYNC_IMPRESSIONS, RAW_DAILY_COMPACT_THRESHOLD, ROW_LIMIT_R2, SCHEMAS, TABLES_BY_SEARCH_TYPE, TABLE_METADATA, TABLE_TIERS, TIER_PRIORITY, WEIGHT_PRIORITY, allTables, assembleDatesRow, bindLiterals, canonicalEmptyParquetSchema, coerceRow, coerceRows, connectIcebergCatalog, countRawDailies, countries, createDuckDBCodec, createDuckDBExecutor, createIcebergAppendSink, createIcebergTables, createInMemorySink, createIngestAccumulator, createNoopIngestAccumulator, createRowAccumulator, createStorageEngine, currentSchemaVersion, dates, dayPartition, dedupeOverlappingTiers, dimensionToColumn, drizzleSchema, dropIcebergTables, ensureIcebergNamespace, enumeratePartitions, fixedPolicy, formatLiteral, gcOrphansImpl, getDateWeight, getTableTier, getTablesForTier, hourPartition, hourly_pages, icebergAppendRetrying, icebergPartitionSpecFor, icebergSchemaFor, icebergTableSpec, inferLegacyTier, inferSearchType, inferTable, inspectionPolicy, isCommitRateLimited, listIcebergTables, objectKey, page_queries, pages, parseEnabledSearchTypes, queries, rebuildDailyFromHourly, resolveParquetSQL, sitemapPolicy, splitOverlappingTiers, substituteNamedFiles, toPath, toSumPosition, transformGscRow, validateEnabledSearchTypes };

package/dist/ingest.d.mts CHANGED Viewed

@@ -19,8 +19,8 @@ interface IngestOptions {
   /**
    * Canonical form of a query string, stored alongside `query` as
    * `query_canonical`. Site-specific (e.g. synonym groups, stemming); if
-   * omitted, `query_canonical` is null. Applied to `keywords` +
-   * `page_keywords` tables only.
+   * omitted, `query_canonical` is null. Applied to `queries` +
+   * `page_queries` tables only.
    */
   normalizeQuery?: (query: string) => string | null | undefined;
 }
@@ -44,6 +44,26 @@ declare function transformGscRow(table: TableName, apiRow: GscApiRow, options?:
   date: string;
   row: Row;
 } | null;
+/**
+ * Assemble one `dates` row for a single `date` from the two GSC queries that
+ * back the table:
+ *
+ * - `totalsRow` — the GSC `['date']` query result: the TRUE site totals
+ *   (clicks/impressions/position), including anonymized impressions.
+ * - `deviceRows` — the GSC `['date','device']` query results for that date:
+ *   one row per device, pivoted into the 9 `*_{device}` columns.
+ * - `queryGrainedImpressions` — total impressions summed from the
+ *   `['query','date']` (or `['page','query','date']`) query for the same date,
+ *   used to derive `anonymized_impressions_pct`.
+ *
+ * `anonymized_impressions_pct = 1 - query_grained_impressions /
+ * page_grained_impressions`, where the page/date totals come from `totalsRow`.
+ * Mirrors the legacy `dailyTotalsRollup` formula. Clamped to `[0, 1]`.
+ */
+declare function assembleDatesRow(date: string, totalsRow: GscApiRow, deviceRows: readonly GscApiRow[], queryGrainedImpressions: number): {
+  date: string;
+  row: Row;
+};
 interface RowAccumulator {
   /**
    * Push a batch of GSC API rows into the accumulator. Returns `false` if
@@ -93,4 +113,4 @@ interface RowAccumulatorOptions extends IngestOptions {
   trackDateBoundary?: boolean;
 }
 declare function createRowAccumulator(options?: RowAccumulatorOptions): RowAccumulator;
-export { GscApiRow, IngestOptions, RowAccumulator, RowAccumulatorOptions, TABLE_DIMS, createRowAccumulator, toPath, toSumPosition, transformGscRow };
+export { GscApiRow, IngestOptions, RowAccumulator, RowAccumulatorOptions, TABLE_DIMS, assembleDatesRow, createRowAccumulator, toPath, toSumPosition, transformGscRow };

package/dist/ingest.mjs CHANGED Viewed

@@ -1,9 +1,9 @@
 const TABLE_DIMS = {
 	pages: ["page", "date"],
-	keywords: ["query", "date"],
+	queries: ["query", "date"],
 	countries: ["country", "date"],
-	devices: ["device", "date"],
-	page_keywords: [
+	dates: ["date"],
+	page_queries: [
 		"page",
 		"query",
 		"date"
@@ -40,7 +40,7 @@ function transformGscRow(table, apiRow, options = {}) {
 			}
 		};
 	}
-	if (table === "keywords") {
+	if (table === "queries") {
 		const query = String(keys[0] ?? "");
 		const date = String(keys[1] ?? "");
 		return {
@@ -68,19 +68,6 @@ function transformGscRow(table, apiRow, options = {}) {
 			}
 		};
 	}
-	if (table === "devices") {
-		const date = String(keys[1] ?? "");
-		return {
-			date,
-			row: {
-				device: String(keys[0] ?? ""),
-				date,
-				clicks,
-				impressions,
-				sum_position
-			}
-		};
-	}
 	if (table === "hourly_pages") {
 		const hour = String(keys[0] ?? "");
 		const date = hour.slice(0, 10);
@@ -109,6 +96,7 @@ function transformGscRow(table, apiRow, options = {}) {
 			}
 		};
 	}
+	if (table === "dates") throw new Error("`dates` rows must be built via assembleDatesRow, not transformGscRow");
 	const query = String(keys[1] ?? "");
 	const date = String(keys[2] ?? "");
 	const query_canonical = options.normalizeQuery?.(query) ?? null;
@@ -125,6 +113,43 @@ function transformGscRow(table, apiRow, options = {}) {
 		}
 	};
 }
+const DEVICE_SUFFIX = {
+	DESKTOP: "desktop",
+	MOBILE: "mobile",
+	TABLET: "tablet"
+};
+function assembleDatesRow(date, totalsRow, deviceRows, queryGrainedImpressions) {
+	const clicks = totalsRow.clicks || 0;
+	const impressions = totalsRow.impressions || 0;
+	const row = {
+		date,
+		clicks,
+		impressions,
+		sum_position: toSumPosition(totalsRow.position || 0, impressions),
+		anonymized_impressions_pct: impressions > 0 ? Math.min(1, Math.max(0, 1 - queryGrainedImpressions / impressions)) : 0,
+		clicks_desktop: 0,
+		clicks_mobile: 0,
+		clicks_tablet: 0,
+		impressions_desktop: 0,
+		impressions_mobile: 0,
+		impressions_tablet: 0,
+		sum_position_desktop: 0,
+		sum_position_mobile: 0,
+		sum_position_tablet: 0
+	};
+	for (const dr of deviceRows) {
+		const suffix = DEVICE_SUFFIX[String(dr.keys?.[1] ?? dr.keys?.[0] ?? "").toUpperCase()];
+		if (!suffix) continue;
+		const dImpr = dr.impressions || 0;
+		row[`clicks_${suffix}`] = dr.clicks || 0;
+		row[`impressions_${suffix}`] = dImpr;
+		row[`sum_position_${suffix}`] = toSumPosition(dr.position || 0, dImpr);
+	}
+	return {
+		date,
+		row
+	};
+}
 const DEFAULT_MAX_ROWS = 5e5;
 function createRowAccumulator(options = {}) {
 	const maxRows = options.maxRows ?? DEFAULT_MAX_ROWS;
@@ -200,4 +225,4 @@ function createRowAccumulator(options = {}) {
 		}
 	};
 }
-export { TABLE_DIMS, createRowAccumulator, toPath, toSumPosition, transformGscRow };
+export { TABLE_DIMS, assembleDatesRow, createRowAccumulator, toPath, toSumPosition, transformGscRow };

package/dist/rollups.d.mts CHANGED Viewed

@@ -90,13 +90,14 @@ interface RollupDef {
      */
     dataSource: DataSource;
     /**
-     * Wall-clock millis when the runner started this rollup. Use for
-     * derived window cutoffs (e.g. trailing-28d boundary) so the SQL can
-     * inline a date literal and stay portable across DuckDB builds that
-     * don't bundle the ICU extension (Workers DuckDB, for one — CURRENT_DATE
-     * lives in ICU).
+     * UTC millis the trailing window anchors to — its inclusive END. Equals
+     * the newest synced/finalized data date when the runner is given
+     * `dataEndDate`, otherwise wall-clock build time. Builders derive window
+     * cutoffs from this (e.g. the trailing-28d boundary) and inline a date
+     * literal so the SQL stays portable across DuckDB builds without the ICU
+     * extension (Workers DuckDB — `CURRENT_DATE` lives in ICU).
      */
-    builtAt: number;
+    windowAnchorMs: number;
     /**
      * GSC search-type slice the runner was invoked for. Builders forward
      * this to every `engine.runSQL` call so the aggregated facts come
@@ -156,6 +157,15 @@ interface RebuildRollupsOptions {
    * only tenants and explicit cross-type admin views.
    */
   searchType?: SearchType;
+  /**
+   * ISO date (`YYYY-MM-DD`) of the newest synced/finalized day. Trailing-
+   * window rollups (28d/90d) anchor their window END here instead of
+   * wall-clock build time, so a "last 28 days" rollup covers the 28 days of
+   * data that actually exist — not 28 days back from whenever the job ran,
+   * which would include GSC's 2-3 day empty tail. Omit for the legacy
+   * wall-clock behaviour.
+   */
+  dataEndDate?: string;
 }
 interface RebuildRollupResult {
   id: string;