@gscdump/engine 0.20.2 → 0.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -1,13 +1,132 @@
1
1
  import { n as coerceRows, t as coerceRow } from "./_chunks/coerce.mjs";
2
- import { a as dimensionToColumn, d as devices, f as drizzleSchema, g as pages, h as page_keywords, l as TABLE_METADATA, m as keywords, n as allTables, o as inferTable, p as hourly_pages, r as currentSchemaVersion, t as SCHEMAS, u as countries } from "./_chunks/schema.mjs";
2
+ import { a as dimensionToColumn, d as dates, f as drizzleSchema, g as queries, h as pages, l as TABLE_METADATA, m as page_queries, n as allTables, o as inferTable, p as hourly_pages, r as currentSchemaVersion, t as SCHEMAS, u as countries } from "./_chunks/schema.mjs";
3
3
  import { a as inferSearchType, c as objectKey, i as inferLegacyTier, n as dayPartition, r as hourPartition, t as DEFAULT_SEARCH_TYPE } from "./_chunks/storage.mjs";
4
4
  import { a as RAW_DAILY_COMPACT_THRESHOLD, c as dedupeOverlappingTiers, i as substituteNamedFiles, l as enumeratePartitions, r as resolveParquetSQL, s as countRawDailies, t as FILES_PLACEHOLDER, u as splitOverlappingTiers } from "./_chunks/parquet-plan.mjs";
5
5
  import { bindLiterals, formatLiteral } from "./sql-bind.mjs";
6
6
  import { a as createDuckDBCodec, i as canonicalEmptyParquetSchema, n as createStorageEngine, o as createDuckDBExecutor, r as gcOrphansImpl, t as MAX_DAY_BYTES } from "./_chunks/engine.mjs";
7
- import { createRowAccumulator, toPath, toSumPosition, transformGscRow } from "./ingest.mjs";
7
+ import { a as ICEBERG_TABLES, i as ICEBERG_SCHEMAS, n as ICEBERG_PARTITION_COLUMNS, o as icebergTableSpec, r as ICEBERG_PARTITION_SPEC, t as ICEBERG_FIELD_ID_BASE } from "./_chunks/iceberg-schema.mjs";
8
+ import { assembleDatesRow, createRowAccumulator, toPath, toSumPosition, transformGscRow } from "./ingest.mjs";
8
9
  import "./planner.mjs";
9
10
  import { rebuildDailyFromHourly } from "./rollups.mjs";
10
11
  import { fixedPolicy, inspectionPolicy, sitemapPolicy } from "./schedule.mjs";
12
+ import { icebergAppend, icebergCreateTable, icebergDropTable, restCatalogConnect, restCatalogCreateNamespace, restCatalogListTables, s3SignedResolver } from "icebird";
13
+ const ICEBERG_TYPE_MAP = {
14
+ STRING: "string",
15
+ INT: "int",
16
+ LONG: "long",
17
+ DOUBLE: "double",
18
+ DATE: "date"
19
+ };
20
+ function icebergSchemaFor(table) {
21
+ return {
22
+ "type": "struct",
23
+ "schema-id": 0,
24
+ "fields": ICEBERG_SCHEMAS[table].columns.map((col) => ({
25
+ id: col.fieldId,
26
+ name: col.name,
27
+ required: col.required,
28
+ type: ICEBERG_TYPE_MAP[col.type]
29
+ }))
30
+ };
31
+ }
32
+ function icebergPartitionSpecFor(table) {
33
+ const fields = ICEBERG_SCHEMAS[table].columns;
34
+ const fieldId = (name) => {
35
+ const col = fields.find((c) => c.name === name);
36
+ if (!col) throw new Error(`iceberg-catalog: table '${table}' has no '${name}' column`);
37
+ return col.fieldId;
38
+ };
39
+ return {
40
+ "spec-id": 0,
41
+ "fields": ICEBERG_PARTITION_SPEC.map((p, i) => ({
42
+ "source-id": fieldId(p.sourceColumn),
43
+ "field-id": 1e3 + i,
44
+ "name": p.name,
45
+ "transform": p.transform
46
+ }))
47
+ };
48
+ }
49
+ async function connectIcebergCatalog(config) {
50
+ return {
51
+ catalog: await restCatalogConnect({
52
+ url: config.catalogUri,
53
+ warehouse: config.warehouse,
54
+ requestInit: { headers: { Authorization: `Bearer ${config.catalogToken}` } }
55
+ }),
56
+ resolver: s3SignedResolver({
57
+ accessKeyId: config.s3.accessKeyId,
58
+ secretAccessKey: config.s3.secretAccessKey,
59
+ region: config.s3.region ?? "auto",
60
+ endpoint: config.s3.endpoint,
61
+ pathStyle: true
62
+ }),
63
+ namespace: config.namespace
64
+ };
65
+ }
66
+ function isCommitRateLimited(err) {
67
+ if (err && typeof err === "object" && err.status === 429) return true;
68
+ const msg = (err instanceof Error ? err.message : String(err)).toLowerCase();
69
+ return msg.includes("429") || msg.includes("too many commits") || msg.includes("rate limit");
70
+ }
71
+ function defaultCommitSleep(ms) {
72
+ return new Promise((resolve) => setTimeout(resolve, ms));
73
+ }
74
+ async function icebergAppendRetrying(args, options = {}) {
75
+ const maxAttempts = options.maxAttempts ?? 6;
76
+ const baseDelayMs = options.baseDelayMs ?? 1e3;
77
+ const maxDelayMs = options.maxDelayMs ?? 2e4;
78
+ const sleep = options.sleep ?? defaultCommitSleep;
79
+ const random = options.random ?? Math.random;
80
+ for (let attempt = 0; attempt < maxAttempts; attempt++) {
81
+ const err = await icebergAppend(args).then(() => void 0, (e) => e);
82
+ if (err === void 0) return;
83
+ if (!isCommitRateLimited(err) || attempt === maxAttempts - 1) throw err;
84
+ const ceiling = Math.min(maxDelayMs, baseDelayMs * 2 ** attempt);
85
+ await sleep(Math.floor(random() * ceiling));
86
+ }
87
+ }
88
+ async function ensureIcebergNamespace(conn) {
89
+ await restCatalogCreateNamespace(conn.catalog, { namespace: conn.namespace }).catch(() => {});
90
+ }
91
+ async function createIcebergTables(conn, tables = ICEBERG_TABLES) {
92
+ const results = [];
93
+ for (const table of tables) await icebergCreateTable({
94
+ catalog: conn.catalog,
95
+ namespace: conn.namespace,
96
+ table,
97
+ schema: icebergSchemaFor(table),
98
+ partitionSpec: icebergPartitionSpecFor(table)
99
+ }).then(() => results.push({
100
+ table,
101
+ ok: true
102
+ }), (e) => results.push({
103
+ table,
104
+ ok: false,
105
+ error: String(e)
106
+ }));
107
+ return results;
108
+ }
109
+ async function listIcebergTables(conn) {
110
+ return restCatalogListTables(conn.catalog, { namespace: conn.namespace }).then((list) => list.map((t) => t.name).sort(), () => []);
111
+ }
112
+ async function dropIcebergTables(conn, tables) {
113
+ const targets = tables ?? await restCatalogListTables(conn.catalog, { namespace: conn.namespace }).then((list) => list.map((t) => t.name), () => []);
114
+ const results = [];
115
+ for (const table of targets) await icebergDropTable({
116
+ catalog: conn.catalog,
117
+ namespace: conn.namespace,
118
+ table,
119
+ purgeRequested: true
120
+ }).then(() => results.push({
121
+ table,
122
+ ok: true
123
+ }), (e) => results.push({
124
+ table,
125
+ ok: false,
126
+ error: String(e)
127
+ }));
128
+ return results;
129
+ }
11
130
  const NOOP_RESULT = {
12
131
  flushed: 0,
13
132
  recovered: 0,
@@ -123,38 +242,177 @@ function createIngestAccumulator(opts) {
123
242
  }
124
243
  };
125
244
  }
245
+ const DAY_MILLIS = 864e5;
246
+ function toIcebergDate(value) {
247
+ if (typeof value === "string") return Math.floor(Date.parse(`${value}T00:00:00Z`) / DAY_MILLIS);
248
+ if (value instanceof Date) return Math.floor(value.getTime() / DAY_MILLIS);
249
+ return value;
250
+ }
251
+ function toRecords(slice, rows) {
252
+ const siteId = slice.ctx.siteId ?? "";
253
+ return rows.map((row) => ({
254
+ ...row,
255
+ date: toIcebergDate(row.date),
256
+ site_id: siteId,
257
+ search_type: slice.searchType
258
+ }));
259
+ }
260
+ function createIcebergAppendSink(options) {
261
+ let connection;
262
+ const buffers = /* @__PURE__ */ new Map();
263
+ function connect() {
264
+ connection ??= connectIcebergCatalog(options.catalog);
265
+ return connection;
266
+ }
267
+ return {
268
+ capabilities: { appendOnly: true },
269
+ async emit(slice, rows) {
270
+ if (rows.length === 0) return { rowCount: 0 };
271
+ const records = toRecords(slice, rows);
272
+ const buffer = buffers.get(slice.table);
273
+ if (buffer) for (let i = 0; i < records.length; i++) buffer.push(records[i]);
274
+ else buffers.set(slice.table, records);
275
+ return { rowCount: records.length };
276
+ },
277
+ async close() {
278
+ const flushed = [];
279
+ const failed = [];
280
+ if (buffers.size === 0) return {
281
+ flushed,
282
+ failed
283
+ };
284
+ const conn = await connect().then((c) => c, (err) => {
285
+ connection = void 0;
286
+ return { error: String(err) };
287
+ });
288
+ if ("error" in conn) {
289
+ for (const [table, records] of buffers) if (records.length > 0) failed.push({
290
+ table,
291
+ error: conn.error
292
+ });
293
+ buffers.clear();
294
+ return {
295
+ flushed,
296
+ failed
297
+ };
298
+ }
299
+ for (const [table, records] of buffers) {
300
+ if (records.length === 0) continue;
301
+ await icebergAppendRetrying({
302
+ catalog: conn.catalog,
303
+ namespace: conn.namespace,
304
+ table,
305
+ resolver: conn.resolver,
306
+ records
307
+ }, options.commitRetry).then(() => {
308
+ flushed.push(table);
309
+ }, (err) => {
310
+ failed.push({
311
+ table,
312
+ error: String(err)
313
+ });
314
+ });
315
+ }
316
+ buffers.clear();
317
+ return {
318
+ flushed,
319
+ failed
320
+ };
321
+ }
322
+ };
323
+ }
324
+ const KEY_SEP = "\0";
325
+ function partitionKey(slice) {
326
+ return [
327
+ slice.table,
328
+ slice.ctx.siteId ?? "",
329
+ slice.searchType,
330
+ slice.date
331
+ ].join(KEY_SEP);
332
+ }
333
+ function tableOfKey(key) {
334
+ return key.slice(0, key.indexOf(KEY_SEP));
335
+ }
336
+ function withIdentity(slice, rows) {
337
+ return rows.map((r) => ({
338
+ ...r,
339
+ site_id: slice.ctx.siteId ?? "",
340
+ search_type: slice.searchType
341
+ }));
342
+ }
343
+ function createInMemorySink() {
344
+ const partitions = /* @__PURE__ */ new Map();
345
+ let closed = false;
346
+ function allRows() {
347
+ return [...partitions.values()].flat();
348
+ }
349
+ return {
350
+ capabilities: { appendOnly: true },
351
+ async emit(slice, rows) {
352
+ const key = partitionKey(slice);
353
+ const stored = withIdentity(slice, rows);
354
+ const existing = partitions.get(key);
355
+ if (existing) existing.push(...stored);
356
+ else partitions.set(key, stored);
357
+ return { rowCount: stored.length };
358
+ },
359
+ async close() {
360
+ closed = true;
361
+ return {
362
+ flushed: [...new Set([...partitions.keys()].map((k) => tableOfKey(k)))],
363
+ failed: []
364
+ };
365
+ },
366
+ get rows() {
367
+ return allRows();
368
+ },
369
+ get closed() {
370
+ return closed;
371
+ },
372
+ rowsFor(table) {
373
+ return [...partitions.entries()].filter(([k]) => tableOfKey(k) === table).flatMap(([, v]) => v);
374
+ },
375
+ rowsForSlice(slice) {
376
+ return [...partitions.get(partitionKey(slice)) ?? []];
377
+ },
378
+ reset() {
379
+ partitions.clear();
380
+ closed = false;
381
+ }
382
+ };
383
+ }
126
384
  const TABLES_BY_SEARCH_TYPE = {
127
385
  web: [
128
386
  "pages",
129
- "keywords",
387
+ "queries",
130
388
  "countries",
131
- "devices",
132
- "page_keywords"
389
+ "page_queries",
390
+ "dates"
133
391
  ],
134
392
  discover: [
135
393
  "pages",
136
394
  "countries",
137
- "devices"
395
+ "dates"
138
396
  ],
139
397
  news: [
140
398
  "pages",
141
399
  "countries",
142
- "devices"
400
+ "dates"
143
401
  ],
144
402
  googleNews: [
145
403
  "pages",
146
404
  "countries",
147
- "devices"
405
+ "dates"
148
406
  ],
149
407
  image: [
150
408
  "pages",
151
409
  "countries",
152
- "devices"
410
+ "dates"
153
411
  ],
154
412
  video: [
155
413
  "pages",
156
414
  "countries",
157
- "devices"
415
+ "dates"
158
416
  ]
159
417
  };
160
418
  function parseEnabledSearchTypes(raw) {
@@ -181,10 +439,10 @@ function validateEnabledSearchTypes(value) {
181
439
  }
182
440
  const TABLE_TIERS = {
183
441
  pages: "critical",
184
- keywords: "critical",
442
+ queries: "critical",
185
443
  countries: "standard",
186
- devices: "standard",
187
- page_keywords: "extended"
444
+ dates: "standard",
445
+ page_queries: "extended"
188
446
  };
189
447
  function getTableTier(table) {
190
448
  return TABLE_TIERS[table] || "extended";
@@ -215,4 +473,4 @@ const MIN_SYNC_IMPRESSIONS = 1;
215
473
  const MIN_COUNTRY_IMPRESSIONS = 10;
216
474
  const MAX_SITEMAP_URLS_PER_SITE = 5e4;
217
475
  const MAX_TRACKED_URLS_PER_SITE = 2e5;
218
- export { DEFAULT_SEARCH_TYPE, FILES_PLACEHOLDER, MAX_DAY_BYTES, MAX_GSC_PAGES_R2, MAX_SITEMAP_URLS_PER_SITE, MAX_TRACKED_URLS_PER_SITE, MIN_COUNTRY_IMPRESSIONS, MIN_SYNC_IMPRESSIONS, RAW_DAILY_COMPACT_THRESHOLD, ROW_LIMIT_R2, SCHEMAS, TABLES_BY_SEARCH_TYPE, TABLE_METADATA, TABLE_TIERS, TIER_PRIORITY, WEIGHT_PRIORITY, allTables, bindLiterals, canonicalEmptyParquetSchema, coerceRow, coerceRows, countRawDailies, countries, createDuckDBCodec, createDuckDBExecutor, createIngestAccumulator, createNoopIngestAccumulator, createRowAccumulator, createStorageEngine, currentSchemaVersion, dayPartition, dedupeOverlappingTiers, devices, dimensionToColumn, drizzleSchema, enumeratePartitions, fixedPolicy, formatLiteral, gcOrphansImpl, getDateWeight, getTableTier, getTablesForTier, hourPartition, hourly_pages, inferLegacyTier, inferSearchType, inferTable, inspectionPolicy, keywords, objectKey, page_keywords, pages, parseEnabledSearchTypes, rebuildDailyFromHourly, resolveParquetSQL, sitemapPolicy, splitOverlappingTiers, substituteNamedFiles, toPath, toSumPosition, transformGscRow, validateEnabledSearchTypes };
476
+ export { DEFAULT_SEARCH_TYPE, FILES_PLACEHOLDER, ICEBERG_FIELD_ID_BASE, ICEBERG_PARTITION_COLUMNS, ICEBERG_PARTITION_SPEC, ICEBERG_SCHEMAS, ICEBERG_TABLES, MAX_DAY_BYTES, MAX_GSC_PAGES_R2, MAX_SITEMAP_URLS_PER_SITE, MAX_TRACKED_URLS_PER_SITE, MIN_COUNTRY_IMPRESSIONS, MIN_SYNC_IMPRESSIONS, RAW_DAILY_COMPACT_THRESHOLD, ROW_LIMIT_R2, SCHEMAS, TABLES_BY_SEARCH_TYPE, TABLE_METADATA, TABLE_TIERS, TIER_PRIORITY, WEIGHT_PRIORITY, allTables, assembleDatesRow, bindLiterals, canonicalEmptyParquetSchema, coerceRow, coerceRows, connectIcebergCatalog, countRawDailies, countries, createDuckDBCodec, createDuckDBExecutor, createIcebergAppendSink, createIcebergTables, createInMemorySink, createIngestAccumulator, createNoopIngestAccumulator, createRowAccumulator, createStorageEngine, currentSchemaVersion, dates, dayPartition, dedupeOverlappingTiers, dimensionToColumn, drizzleSchema, dropIcebergTables, ensureIcebergNamespace, enumeratePartitions, fixedPolicy, formatLiteral, gcOrphansImpl, getDateWeight, getTableTier, getTablesForTier, hourPartition, hourly_pages, icebergAppendRetrying, icebergPartitionSpecFor, icebergSchemaFor, icebergTableSpec, inferLegacyTier, inferSearchType, inferTable, inspectionPolicy, isCommitRateLimited, listIcebergTables, objectKey, page_queries, pages, parseEnabledSearchTypes, queries, rebuildDailyFromHourly, resolveParquetSQL, sitemapPolicy, splitOverlappingTiers, substituteNamedFiles, toPath, toSumPosition, transformGscRow, validateEnabledSearchTypes };
package/dist/ingest.d.mts CHANGED
@@ -19,8 +19,8 @@ interface IngestOptions {
19
19
  /**
20
20
  * Canonical form of a query string, stored alongside `query` as
21
21
  * `query_canonical`. Site-specific (e.g. synonym groups, stemming); if
22
- * omitted, `query_canonical` is null. Applied to `keywords` +
23
- * `page_keywords` tables only.
22
+ * omitted, `query_canonical` is null. Applied to `queries` +
23
+ * `page_queries` tables only.
24
24
  */
25
25
  normalizeQuery?: (query: string) => string | null | undefined;
26
26
  }
@@ -44,6 +44,26 @@ declare function transformGscRow(table: TableName, apiRow: GscApiRow, options?:
44
44
  date: string;
45
45
  row: Row;
46
46
  } | null;
47
+ /**
48
+ * Assemble one `dates` row for a single `date` from the two GSC queries that
49
+ * back the table:
50
+ *
51
+ * - `totalsRow` — the GSC `['date']` query result: the TRUE site totals
52
+ * (clicks/impressions/position), including anonymized impressions.
53
+ * - `deviceRows` — the GSC `['date','device']` query results for that date:
54
+ * one row per device, pivoted into the 9 `*_{device}` columns.
55
+ * - `queryGrainedImpressions` — total impressions summed from the
56
+ * `['query','date']` (or `['page','query','date']`) query for the same date,
57
+ * used to derive `anonymized_impressions_pct`.
58
+ *
59
+ * `anonymized_impressions_pct = 1 - query_grained_impressions /
60
+ * page_grained_impressions`, where the page/date totals come from `totalsRow`.
61
+ * Mirrors the legacy `dailyTotalsRollup` formula. Clamped to `[0, 1]`.
62
+ */
63
+ declare function assembleDatesRow(date: string, totalsRow: GscApiRow, deviceRows: readonly GscApiRow[], queryGrainedImpressions: number): {
64
+ date: string;
65
+ row: Row;
66
+ };
47
67
  interface RowAccumulator {
48
68
  /**
49
69
  * Push a batch of GSC API rows into the accumulator. Returns `false` if
@@ -93,4 +113,4 @@ interface RowAccumulatorOptions extends IngestOptions {
93
113
  trackDateBoundary?: boolean;
94
114
  }
95
115
  declare function createRowAccumulator(options?: RowAccumulatorOptions): RowAccumulator;
96
- export { GscApiRow, IngestOptions, RowAccumulator, RowAccumulatorOptions, TABLE_DIMS, createRowAccumulator, toPath, toSumPosition, transformGscRow };
116
+ export { GscApiRow, IngestOptions, RowAccumulator, RowAccumulatorOptions, TABLE_DIMS, assembleDatesRow, createRowAccumulator, toPath, toSumPosition, transformGscRow };
package/dist/ingest.mjs CHANGED
@@ -1,9 +1,9 @@
1
1
  const TABLE_DIMS = {
2
2
  pages: ["page", "date"],
3
- keywords: ["query", "date"],
3
+ queries: ["query", "date"],
4
4
  countries: ["country", "date"],
5
- devices: ["device", "date"],
6
- page_keywords: [
5
+ dates: ["date"],
6
+ page_queries: [
7
7
  "page",
8
8
  "query",
9
9
  "date"
@@ -40,7 +40,7 @@ function transformGscRow(table, apiRow, options = {}) {
40
40
  }
41
41
  };
42
42
  }
43
- if (table === "keywords") {
43
+ if (table === "queries") {
44
44
  const query = String(keys[0] ?? "");
45
45
  const date = String(keys[1] ?? "");
46
46
  return {
@@ -68,19 +68,6 @@ function transformGscRow(table, apiRow, options = {}) {
68
68
  }
69
69
  };
70
70
  }
71
- if (table === "devices") {
72
- const date = String(keys[1] ?? "");
73
- return {
74
- date,
75
- row: {
76
- device: String(keys[0] ?? ""),
77
- date,
78
- clicks,
79
- impressions,
80
- sum_position
81
- }
82
- };
83
- }
84
71
  if (table === "hourly_pages") {
85
72
  const hour = String(keys[0] ?? "");
86
73
  const date = hour.slice(0, 10);
@@ -109,6 +96,7 @@ function transformGscRow(table, apiRow, options = {}) {
109
96
  }
110
97
  };
111
98
  }
99
+ if (table === "dates") throw new Error("`dates` rows must be built via assembleDatesRow, not transformGscRow");
112
100
  const query = String(keys[1] ?? "");
113
101
  const date = String(keys[2] ?? "");
114
102
  const query_canonical = options.normalizeQuery?.(query) ?? null;
@@ -125,6 +113,43 @@ function transformGscRow(table, apiRow, options = {}) {
125
113
  }
126
114
  };
127
115
  }
116
+ const DEVICE_SUFFIX = {
117
+ DESKTOP: "desktop",
118
+ MOBILE: "mobile",
119
+ TABLET: "tablet"
120
+ };
121
+ function assembleDatesRow(date, totalsRow, deviceRows, queryGrainedImpressions) {
122
+ const clicks = totalsRow.clicks || 0;
123
+ const impressions = totalsRow.impressions || 0;
124
+ const row = {
125
+ date,
126
+ clicks,
127
+ impressions,
128
+ sum_position: toSumPosition(totalsRow.position || 0, impressions),
129
+ anonymized_impressions_pct: impressions > 0 ? Math.min(1, Math.max(0, 1 - queryGrainedImpressions / impressions)) : 0,
130
+ clicks_desktop: 0,
131
+ clicks_mobile: 0,
132
+ clicks_tablet: 0,
133
+ impressions_desktop: 0,
134
+ impressions_mobile: 0,
135
+ impressions_tablet: 0,
136
+ sum_position_desktop: 0,
137
+ sum_position_mobile: 0,
138
+ sum_position_tablet: 0
139
+ };
140
+ for (const dr of deviceRows) {
141
+ const suffix = DEVICE_SUFFIX[String(dr.keys?.[1] ?? dr.keys?.[0] ?? "").toUpperCase()];
142
+ if (!suffix) continue;
143
+ const dImpr = dr.impressions || 0;
144
+ row[`clicks_${suffix}`] = dr.clicks || 0;
145
+ row[`impressions_${suffix}`] = dImpr;
146
+ row[`sum_position_${suffix}`] = toSumPosition(dr.position || 0, dImpr);
147
+ }
148
+ return {
149
+ date,
150
+ row
151
+ };
152
+ }
128
153
  const DEFAULT_MAX_ROWS = 5e5;
129
154
  function createRowAccumulator(options = {}) {
130
155
  const maxRows = options.maxRows ?? DEFAULT_MAX_ROWS;
@@ -200,4 +225,4 @@ function createRowAccumulator(options = {}) {
200
225
  }
201
226
  };
202
227
  }
203
- export { TABLE_DIMS, createRowAccumulator, toPath, toSumPosition, transformGscRow };
228
+ export { TABLE_DIMS, assembleDatesRow, createRowAccumulator, toPath, toSumPosition, transformGscRow };
@@ -90,13 +90,14 @@ interface RollupDef {
90
90
  */
91
91
  dataSource: DataSource;
92
92
  /**
93
- * Wall-clock millis when the runner started this rollup. Use for
94
- * derived window cutoffs (e.g. trailing-28d boundary) so the SQL can
95
- * inline a date literal and stay portable across DuckDB builds that
96
- * don't bundle the ICU extension (Workers DuckDB, for one CURRENT_DATE
97
- * lives in ICU).
93
+ * UTC millis the trailing window anchors to — its inclusive END. Equals
94
+ * the newest synced/finalized data date when the runner is given
95
+ * `dataEndDate`, otherwise wall-clock build time. Builders derive window
96
+ * cutoffs from this (e.g. the trailing-28d boundary) and inline a date
97
+ * literal so the SQL stays portable across DuckDB builds without the ICU
98
+ * extension (Workers DuckDB — `CURRENT_DATE` lives in ICU).
98
99
  */
99
- builtAt: number;
100
+ windowAnchorMs: number;
100
101
  /**
101
102
  * GSC search-type slice the runner was invoked for. Builders forward
102
103
  * this to every `engine.runSQL` call so the aggregated facts come
@@ -156,6 +157,15 @@ interface RebuildRollupsOptions {
156
157
  * only tenants and explicit cross-type admin views.
157
158
  */
158
159
  searchType?: SearchType;
160
+ /**
161
+ * ISO date (`YYYY-MM-DD`) of the newest synced/finalized day. Trailing-
162
+ * window rollups (28d/90d) anchor their window END here instead of
163
+ * wall-clock build time, so a "last 28 days" rollup covers the 28 days of
164
+ * data that actually exist — not 28 days back from whenever the job ran,
165
+ * which would include GSC's 2-3 day empty tail. Omit for the legacy
166
+ * wall-clock behaviour.
167
+ */
168
+ dataEndDate?: string;
159
169
  }
160
170
  interface RebuildRollupResult {
161
171
  id: string;