@gscdump/engine 0.9.1 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,3 @@
1
- import { _ as resolveToSQL, a as createResolverAdapter, c as LOGICAL_DATASETS, d as inferLogicalDataset, f as supportsDimensionOnSurface, g as resolveComparisonSQL, h as mergeExtras, i as compileSqlite, l as assertDimensionsSupported, m as buildTotalsSql, n as pgResolverAdapter, o as createSqlFragments, p as buildExtrasQueries, r as compilePg, s as DIMENSION_SURFACES, t as createParquetResolverAdapter, u as dimensionColumn, v as resolveToSQLOptimized } from "../_chunks/pg-adapter.mjs";
2
- import { a as getFilterDimensions, c as matchesMetricFilter, d as createSqlQuerySource, i as getDimensionFilters, l as matchesTopLevelPage, n as assertSchemaInSync, o as getInternalFilters, r as dimensionValue, s as matchesDimensionFilter, t as isSqlQuerySource, u as metricValue } from "../_chunks/resolver.mjs";
3
- export { DIMENSION_SURFACES, LOGICAL_DATASETS, assertDimensionsSupported, assertSchemaInSync, buildExtrasQueries, buildTotalsSql, compilePg, compileSqlite, createParquetResolverAdapter, createResolverAdapter, createSqlFragments, createSqlQuerySource, dimensionColumn, dimensionValue, getDimensionFilters, getFilterDimensions, getInternalFilters, inferLogicalDataset, isSqlQuerySource, matchesDimensionFilter, matchesMetricFilter, matchesTopLevelPage, mergeExtras, metricValue, pgResolverAdapter, resolveComparisonSQL, resolveToSQL, resolveToSQLOptimized, supportsDimensionOnSurface };
1
+ import { a as DIMENSION_SURFACES, c as dimensionColumn, d as buildExtrasQueries, f as buildTotalsSql, g as resolveToSQLOptimized, h as resolveToSQL, i as createSqlFragments, l as inferLogicalDataset, m as resolveComparisonSQL, n as pgResolverAdapter, o as LOGICAL_DATASETS, p as mergeExtras, r as createResolverAdapter, s as assertDimensionsSupported, t as createParquetResolverAdapter, u as supportsDimensionOnSurface } from "../_chunks/pg-adapter.mjs";
2
+ import { a as getInternalFilters, c as matchesTopLevelPage, i as getFilterDimensions, l as metricValue, n as dimensionValue, o as matchesDimensionFilter, r as getDimensionFilters, s as matchesMetricFilter, t as assertSchemaInSync } from "../_chunks/resolver.mjs";
3
+ export { DIMENSION_SURFACES, LOGICAL_DATASETS, assertDimensionsSupported, assertSchemaInSync, buildExtrasQueries, buildTotalsSql, createParquetResolverAdapter, createResolverAdapter, createSqlFragments, dimensionColumn, dimensionValue, getDimensionFilters, getFilterDimensions, getInternalFilters, inferLogicalDataset, matchesDimensionFilter, matchesMetricFilter, matchesTopLevelPage, mergeExtras, metricValue, pgResolverAdapter, resolveComparisonSQL, resolveToSQL, resolveToSQLOptimized, supportsDimensionOnSurface };
@@ -0,0 +1,163 @@
1
+ import { a as DataSource } from "./_chunks/storage.mjs";
2
+ import { t as ColumnDef } from "./_chunks/schema.mjs";
3
+ import { TenantCtx } from "gscdump/contracts";
4
+ import * as _$_gscdump_engine_contracts0 from "@gscdump/engine/contracts";
5
+ interface RollupCtx extends TenantCtx {
6
+ /** When the rollup was built. Stamped into payload + filename. */
7
+ builtAt: number;
8
+ }
9
+ /**
10
+ * Tenant-scoped engine surface a rollup builder needs. Subset of
11
+ * `StorageEngine.runSQL` so rollups stay testable without a full engine.
12
+ */
13
+ interface RollupEngine {
14
+ runSQL: (opts: {
15
+ ctx: TenantCtx;
16
+ fileSets: Record<string, {
17
+ table: _$_gscdump_engine_contracts0.TableName;
18
+ partitions?: string[];
19
+ }>;
20
+ table?: _$_gscdump_engine_contracts0.TableName;
21
+ sql: string;
22
+ params?: unknown[];
23
+ }) => Promise<{
24
+ rows: _$_gscdump_engine_contracts0.Row[];
25
+ }>;
26
+ }
27
+ /**
28
+ * One rollup definition. Build runs SQL over the tenant's facts and/or reads
29
+ * from entity stores via `dataSource`, returning a JSON-serializable payload
30
+ * that the runner timestamps + writes.
31
+ */
32
+ interface RollupDef {
33
+ id: string;
34
+ /**
35
+ * Window in days the rollup covers. `null` means full history. Used by
36
+ * the runner to populate `windowDays` in the payload metadata so readers
37
+ * can validate freshness.
38
+ */
39
+ windowDays: number | null;
40
+ /**
41
+ * Storage format. `'json'` (default) wraps the build payload in a
42
+ * `RollupEnvelope` and writes as a JSON blob. `'parquet'` expects `build`
43
+ * to return rows matching `parquetColumns` and writes a parquet file plus
44
+ * a tiny JSON sidecar envelope that points at it, so metadata
45
+ * (`builtAt` / `windowDays`) stays readable without decoding parquet.
46
+ */
47
+ format?: 'json' | 'parquet';
48
+ /**
49
+ * Column schema for parquet output. Required when `format === 'parquet'`.
50
+ * Types map the same way as the fact-table encoder: VARCHAR / DATE go
51
+ * through BYTE_ARRAY/UTF8; BIGINT → INT64; INTEGER → INT32; DOUBLE → DOUBLE.
52
+ */
53
+ parquetColumns?: readonly ColumnDef[];
54
+ /** Sort-key column names for parquet row-group stats. Optional. */
55
+ parquetSortKey?: readonly string[];
56
+ build: (deps: {
57
+ engine: RollupEngine;
58
+ ctx: TenantCtx;
59
+ /**
60
+ * Tenant-scoped object store. Rollups that aggregate over entity
61
+ * snapshots (e.g. indexing metadata) read JSON docs through this.
62
+ * Pure-SQL rollups can ignore it.
63
+ */
64
+ dataSource: DataSource;
65
+ /**
66
+ * Wall-clock millis when the runner started this rollup. Use for
67
+ * derived window cutoffs (e.g. trailing-28d boundary) so the SQL can
68
+ * inline a date literal and stay portable across DuckDB builds that
69
+ * don't bundle the ICU extension (Workers DuckDB, for one — CURRENT_DATE
70
+ * lives in ICU).
71
+ */
72
+ builtAt: number;
73
+ }) => Promise<unknown>;
74
+ }
75
+ /**
76
+ * Wire shape persisted to R2/disk. Readers can rely on the `version` + `builtAt`.
77
+ * Parquet rollups write this envelope as a sidecar whose `payload` points at
78
+ * the co-located `.parquet` object via `{ parquetKey, rowCount }`.
79
+ */
80
+ interface RollupEnvelope<T = unknown> {
81
+ version: 1;
82
+ id: string;
83
+ builtAt: number;
84
+ windowDays: number | null;
85
+ payload: T;
86
+ }
87
+ interface ParquetRollupPointer {
88
+ parquetKey: string;
89
+ rowCount: number;
90
+ }
91
+ declare function rollupKey(ctx: TenantCtx, id: string, builtAt: number): string;
92
+ declare function rollupParquetKey(ctx: TenantCtx, id: string, builtAt: number): string;
93
+ interface RebuildRollupsOptions {
94
+ engine: RollupEngine;
95
+ dataSource: DataSource;
96
+ ctx: TenantCtx;
97
+ defs: readonly RollupDef[];
98
+ now?: () => number;
99
+ }
100
+ interface RebuildRollupResult {
101
+ id: string;
102
+ /** JSON envelope key. For parquet rollups this is the sidecar pointer. */
103
+ objectKey: string;
104
+ /** Parquet payload key. Present only when `format === 'parquet'`. */
105
+ parquetKey?: string;
106
+ /** Envelope byte size; for parquet rollups does NOT include parquet bytes. */
107
+ bytes: number;
108
+ /** Parquet payload byte size when `format === 'parquet'`. */
109
+ parquetBytes?: number;
110
+ builtAt: number;
111
+ }
112
+ declare function rebuildRollups(opts: RebuildRollupsOptions): Promise<RebuildRollupResult[]>;
113
+ /**
114
+ * Daily totals across the full history. One row per (date, table) with
115
+ * clicks + impressions + position. Powers sparklines and headline totals.
116
+ *
117
+ * Includes `anonymizedImpressionsPct` per day computed as
118
+ * 1 - sum(query_grained_impressions) / sum(page_grained_impressions)
119
+ * — surfaces GSC's anonymous-query gap so the dashboard can warn users not
120
+ * to trust query-grained breakdowns as comprehensive.
121
+ */
122
+ declare const dailyTotalsRollup: RollupDef;
123
+ /** Weekly totals, ISO week aligned. Cheap and stable for trend widgets. */
124
+ declare const weeklyTotalsRollup: RollupDef;
125
+ /**
126
+ * Top 1000 pages by clicks over the trailing 28-day window. JSON for v1;
127
+ * promote to parquet (`top_pages_28d.parquet`) when the dashboard needs
128
+ * server-side WHERE filtering on this rollup.
129
+ */
130
+ declare const topPages28dRollup: RollupDef;
131
+ /**
132
+ * Top 250 countries by clicks over the trailing 28-day window. Countries
133
+ * cardinality is bounded (~250 ISO codes), so the list fits in a tiny JSON
134
+ * payload regardless of traffic shape. Powers a geo-overview widget without
135
+ * spinning up DuckDB-WASM.
136
+ */
137
+ declare const topCountries28dRollup: RollupDef;
138
+ /** Top 1000 keywords by clicks over the trailing 28-day window. */
139
+ declare const topKeywords28dRollup: RollupDef;
140
+ /**
141
+ * Parquet-format companion to `topKeywords28dRollup`. Same shape, but persists
142
+ * as a parquet object plus JSON sidecar pointer so widgets that need
143
+ * server-side WHERE (filter by prefix, by clicks threshold, paginate) can scan
144
+ * it directly with DuckDB-WASM instead of loading all 1000 rows into JS.
145
+ *
146
+ * Opt-in: include in the caller's rollup def list alongside (or instead of)
147
+ * the JSON variant; the runner treats the two as independent ids so they can
148
+ * coexist during a migration.
149
+ */
150
+ declare const topKeywords28dParquetRollup: RollupDef;
151
+ /**
152
+ * Aggregates the per-URL Indexing API metadata entity store (populated by
153
+ * `gscdump entities indexing snapshot`) into daily counts of `URL_UPDATED`
154
+ * and `URL_REMOVED` notifications. Covers the third entity-snapshot shape
155
+ * without needing its own parquet family — publish events are sparse and
156
+ * aggregate cleanly into a small JSON rollup.
157
+ *
158
+ * Safe no-op when the entity store is empty: returns `{ totals: {...}, days: [] }`
159
+ * so downstream readers don't have to special-case first-run sites.
160
+ */
161
+ declare const indexingMetadataRollup: RollupDef;
162
+ declare const DEFAULT_ROLLUPS: readonly RollupDef[];
163
+ export { DEFAULT_ROLLUPS, ParquetRollupPointer, RebuildRollupResult, RebuildRollupsOptions, RollupCtx, RollupDef, RollupEngine, RollupEnvelope, dailyTotalsRollup, indexingMetadataRollup, rebuildRollups, rollupKey, rollupParquetKey, topCountries28dRollup, topKeywords28dParquetRollup, topKeywords28dRollup, topPages28dRollup, weeklyTotalsRollup };
@@ -0,0 +1,346 @@
1
+ import { createIndexingMetadataStore } from "./entities.mjs";
2
+ import { encodeRowsToParquetFlex } from "./adapters/hyparquet.mjs";
3
+ import { MS_PER_DAY } from "gscdump";
4
+ function rollupPrefix(ctx) {
5
+ return ctx.siteId ? `u_${ctx.userId}/${ctx.siteId}/rollups` : `u_${ctx.userId}/rollups`;
6
+ }
7
+ function rollupKey(ctx, id, builtAt) {
8
+ return `${rollupPrefix(ctx)}/${id}__v${builtAt}.json`;
9
+ }
10
+ function rollupParquetKey(ctx, id, builtAt) {
11
+ return `${rollupPrefix(ctx)}/${id}__v${builtAt}.parquet`;
12
+ }
13
+ async function rebuildRollups(opts) {
14
+ const now = opts.now ?? (() => Date.now());
15
+ const results = [];
16
+ for (const def of opts.defs) {
17
+ const builtAt = now();
18
+ const payload = await def.build({
19
+ engine: opts.engine,
20
+ ctx: opts.ctx,
21
+ dataSource: opts.dataSource,
22
+ builtAt
23
+ });
24
+ if (def.format === "parquet") {
25
+ if (!def.parquetColumns || def.parquetColumns.length === 0) throw new Error(`rollup '${def.id}' declared format='parquet' without parquetColumns`);
26
+ const rows = payload ?? [];
27
+ const parquetBytes = encodeRowsToParquetFlex(rows, {
28
+ columns: def.parquetColumns,
29
+ sortKey: def.parquetSortKey
30
+ });
31
+ const parquetKey = rollupParquetKey(opts.ctx, def.id, builtAt);
32
+ await opts.dataSource.write(parquetKey, parquetBytes);
33
+ const pointer = {
34
+ parquetKey,
35
+ rowCount: rows.length
36
+ };
37
+ const envelope = {
38
+ version: 1,
39
+ id: def.id,
40
+ builtAt,
41
+ windowDays: def.windowDays,
42
+ payload: pointer
43
+ };
44
+ const envelopeBytes = new TextEncoder().encode(JSON.stringify(envelope));
45
+ const key = rollupKey(opts.ctx, def.id, builtAt);
46
+ await opts.dataSource.write(key, envelopeBytes);
47
+ results.push({
48
+ id: def.id,
49
+ objectKey: key,
50
+ parquetKey,
51
+ bytes: envelopeBytes.byteLength,
52
+ parquetBytes: parquetBytes.byteLength,
53
+ builtAt
54
+ });
55
+ continue;
56
+ }
57
+ const envelope = {
58
+ version: 1,
59
+ id: def.id,
60
+ builtAt,
61
+ windowDays: def.windowDays,
62
+ payload
63
+ };
64
+ const json = JSON.stringify(envelope);
65
+ const bytes = new TextEncoder().encode(json);
66
+ const key = rollupKey(opts.ctx, def.id, builtAt);
67
+ await opts.dataSource.write(key, bytes);
68
+ results.push({
69
+ id: def.id,
70
+ objectKey: key,
71
+ bytes: bytes.byteLength,
72
+ builtAt
73
+ });
74
+ }
75
+ return results;
76
+ }
77
+ function utcDateMinusDays(at, days) {
78
+ const d = new Date(at - days * MS_PER_DAY);
79
+ return `${d.getUTCFullYear()}-${String(d.getUTCMonth() + 1).padStart(2, "0")}-${String(d.getUTCDate()).padStart(2, "0")}`;
80
+ }
81
+ const dailyTotalsRollup = {
82
+ id: "daily_totals",
83
+ windowDays: null,
84
+ async build({ engine, ctx }) {
85
+ const pages = await engine.runSQL({
86
+ ctx,
87
+ table: "pages",
88
+ fileSets: { FILES: { table: "pages" } },
89
+ sql: `
90
+ SELECT
91
+ date,
92
+ SUM(clicks)::BIGINT AS clicks,
93
+ SUM(impressions)::BIGINT AS impressions,
94
+ SUM(sum_position)::DOUBLE AS sum_position
95
+ FROM read_parquet({{FILES}}, union_by_name = true)
96
+ GROUP BY date
97
+ ORDER BY date
98
+ `
99
+ });
100
+ const keywords = await engine.runSQL({
101
+ ctx,
102
+ table: "keywords",
103
+ fileSets: { FILES: { table: "keywords" } },
104
+ sql: `
105
+ SELECT
106
+ date,
107
+ SUM(impressions)::BIGINT AS impressions
108
+ FROM read_parquet({{FILES}}, union_by_name = true)
109
+ GROUP BY date
110
+ `
111
+ });
112
+ const keywordImpressionsByDate = /* @__PURE__ */ new Map();
113
+ for (const r of keywords.rows) keywordImpressionsByDate.set(String(r.date), BigInt(r.impressions));
114
+ return pages.rows.map((r) => {
115
+ const totalImpressions = BigInt(r.impressions);
116
+ const queryImpressions = keywordImpressionsByDate.get(String(r.date)) ?? BigInt(0);
117
+ const anonymized = totalImpressions === BigInt(0) ? 0 : 1 - Number(queryImpressions) / Number(totalImpressions);
118
+ return {
119
+ date: r.date,
120
+ clicks: Number(r.clicks),
121
+ impressions: Number(r.impressions),
122
+ sum_position: Number(r.sum_position),
123
+ anonymizedImpressionsPct: Math.max(0, Math.min(1, anonymized))
124
+ };
125
+ });
126
+ }
127
+ };
128
+ const weeklyTotalsRollup = {
129
+ id: "weekly_totals",
130
+ windowDays: null,
131
+ async build({ engine, ctx }) {
132
+ return (await engine.runSQL({
133
+ ctx,
134
+ table: "pages",
135
+ fileSets: { FILES: { table: "pages" } },
136
+ sql: `
137
+ SELECT
138
+ strftime(date_trunc('week', date::DATE), '%Y-%m-%d') AS week,
139
+ SUM(clicks)::BIGINT AS clicks,
140
+ SUM(impressions)::BIGINT AS impressions,
141
+ SUM(sum_position)::DOUBLE AS sum_position
142
+ FROM read_parquet({{FILES}}, union_by_name = true)
143
+ GROUP BY 1
144
+ ORDER BY 1
145
+ `
146
+ })).rows.map((r) => ({
147
+ week: r.week,
148
+ clicks: Number(r.clicks),
149
+ impressions: Number(r.impressions),
150
+ sum_position: Number(r.sum_position)
151
+ }));
152
+ }
153
+ };
154
+ const topPages28dRollup = {
155
+ id: "top_pages_28d",
156
+ windowDays: 28,
157
+ async build({ engine, ctx, builtAt }) {
158
+ const cutoff = utcDateMinusDays(builtAt, 28);
159
+ return (await engine.runSQL({
160
+ ctx,
161
+ table: "pages",
162
+ fileSets: { FILES: { table: "pages" } },
163
+ sql: `
164
+ SELECT
165
+ url,
166
+ SUM(clicks)::BIGINT AS clicks,
167
+ SUM(impressions)::BIGINT AS impressions,
168
+ SUM(sum_position)::DOUBLE AS sum_position
169
+ FROM read_parquet({{FILES}}, union_by_name = true)
170
+ WHERE date >= '${cutoff}'
171
+ GROUP BY url
172
+ ORDER BY clicks DESC
173
+ LIMIT 1000
174
+ `
175
+ })).rows.map((r) => ({
176
+ url: r.url,
177
+ clicks: Number(r.clicks),
178
+ impressions: Number(r.impressions),
179
+ sum_position: Number(r.sum_position)
180
+ }));
181
+ }
182
+ };
183
+ const topCountries28dRollup = {
184
+ id: "top_countries_28d",
185
+ windowDays: 28,
186
+ async build({ engine, ctx, builtAt }) {
187
+ const cutoff = utcDateMinusDays(builtAt, 28);
188
+ return (await engine.runSQL({
189
+ ctx,
190
+ table: "countries",
191
+ fileSets: { FILES: { table: "countries" } },
192
+ sql: `
193
+ SELECT
194
+ country,
195
+ SUM(clicks)::BIGINT AS clicks,
196
+ SUM(impressions)::BIGINT AS impressions,
197
+ SUM(sum_position)::DOUBLE AS sum_position
198
+ FROM read_parquet({{FILES}}, union_by_name = true)
199
+ WHERE date >= '${cutoff}'
200
+ GROUP BY country
201
+ ORDER BY clicks DESC
202
+ LIMIT 250
203
+ `
204
+ })).rows.map((r) => ({
205
+ country: r.country,
206
+ clicks: Number(r.clicks),
207
+ impressions: Number(r.impressions),
208
+ sum_position: Number(r.sum_position)
209
+ }));
210
+ }
211
+ };
212
+ const topKeywords28dRollup = {
213
+ id: "top_keywords_28d",
214
+ windowDays: 28,
215
+ async build({ engine, ctx, builtAt }) {
216
+ const cutoff = utcDateMinusDays(builtAt, 28);
217
+ return (await engine.runSQL({
218
+ ctx,
219
+ table: "keywords",
220
+ fileSets: { FILES: { table: "keywords" } },
221
+ sql: `
222
+ SELECT
223
+ query,
224
+ SUM(clicks)::BIGINT AS clicks,
225
+ SUM(impressions)::BIGINT AS impressions,
226
+ SUM(sum_position)::DOUBLE AS sum_position
227
+ FROM read_parquet({{FILES}}, union_by_name = true)
228
+ WHERE date >= '${cutoff}'
229
+ GROUP BY query
230
+ ORDER BY clicks DESC
231
+ LIMIT 1000
232
+ `
233
+ })).rows.map((r) => ({
234
+ query: r.query,
235
+ clicks: Number(r.clicks),
236
+ impressions: Number(r.impressions),
237
+ sum_position: Number(r.sum_position)
238
+ }));
239
+ }
240
+ };
241
+ const topKeywords28dParquetRollup = {
242
+ id: "top_keywords_28d_parquet",
243
+ windowDays: 28,
244
+ format: "parquet",
245
+ parquetColumns: [
246
+ {
247
+ name: "query",
248
+ type: "VARCHAR",
249
+ nullable: false
250
+ },
251
+ {
252
+ name: "clicks",
253
+ type: "BIGINT",
254
+ nullable: false
255
+ },
256
+ {
257
+ name: "impressions",
258
+ type: "BIGINT",
259
+ nullable: false
260
+ },
261
+ {
262
+ name: "sum_position",
263
+ type: "DOUBLE",
264
+ nullable: false
265
+ }
266
+ ],
267
+ parquetSortKey: ["clicks"],
268
+ async build({ engine, ctx, builtAt }) {
269
+ const cutoff = utcDateMinusDays(builtAt, 28);
270
+ return (await engine.runSQL({
271
+ ctx,
272
+ table: "keywords",
273
+ fileSets: { FILES: { table: "keywords" } },
274
+ sql: `
275
+ SELECT
276
+ query,
277
+ SUM(clicks)::BIGINT AS clicks,
278
+ SUM(impressions)::BIGINT AS impressions,
279
+ SUM(sum_position)::DOUBLE AS sum_position
280
+ FROM read_parquet({{FILES}}, union_by_name = true)
281
+ WHERE date >= '${cutoff}'
282
+ GROUP BY query
283
+ ORDER BY clicks DESC
284
+ LIMIT 1000
285
+ `
286
+ })).rows.map((r) => ({
287
+ query: String(r.query),
288
+ clicks: BigInt(r.clicks),
289
+ impressions: BigInt(r.impressions),
290
+ sum_position: Number(r.sum_position)
291
+ }));
292
+ }
293
+ };
294
+ const indexingMetadataRollup = {
295
+ id: "indexing_metadata",
296
+ windowDays: null,
297
+ async build({ dataSource, ctx }) {
298
+ const index = await createIndexingMetadataStore({ dataSource }).loadIndex(ctx);
299
+ const records = Object.values(index.records);
300
+ const updatesByDay = /* @__PURE__ */ new Map();
301
+ const removesByDay = /* @__PURE__ */ new Map();
302
+ let totalUpdates = 0;
303
+ let totalRemoves = 0;
304
+ let latestUpdate;
305
+ let latestRemove;
306
+ for (const r of records) {
307
+ if (r.latestUpdateAt) {
308
+ totalUpdates++;
309
+ const day = r.latestUpdateAt.slice(0, 10);
310
+ updatesByDay.set(day, (updatesByDay.get(day) ?? 0) + 1);
311
+ if (!latestUpdate || r.latestUpdateAt > latestUpdate) latestUpdate = r.latestUpdateAt;
312
+ }
313
+ if (r.latestRemoveAt) {
314
+ totalRemoves++;
315
+ const day = r.latestRemoveAt.slice(0, 10);
316
+ removesByDay.set(day, (removesByDay.get(day) ?? 0) + 1);
317
+ if (!latestRemove || r.latestRemoveAt > latestRemove) latestRemove = r.latestRemoveAt;
318
+ }
319
+ }
320
+ const days = new Set([...updatesByDay.keys(), ...removesByDay.keys()]);
321
+ const perDay = Array.from(days).sort().map((day) => ({
322
+ day,
323
+ updates: updatesByDay.get(day) ?? 0,
324
+ removes: removesByDay.get(day) ?? 0
325
+ }));
326
+ return {
327
+ totals: {
328
+ urls: records.length,
329
+ updates: totalUpdates,
330
+ removes: totalRemoves,
331
+ latestUpdateAt: latestUpdate ?? null,
332
+ latestRemoveAt: latestRemove ?? null
333
+ },
334
+ days: perDay
335
+ };
336
+ }
337
+ };
338
+ const DEFAULT_ROLLUPS = [
339
+ dailyTotalsRollup,
340
+ weeklyTotalsRollup,
341
+ topPages28dRollup,
342
+ topKeywords28dRollup,
343
+ topCountries28dRollup,
344
+ indexingMetadataRollup
345
+ ];
346
+ export { DEFAULT_ROLLUPS, dailyTotalsRollup, indexingMetadataRollup, rebuildRollups, rollupKey, rollupParquetKey, topCountries28dRollup, topKeywords28dParquetRollup, topKeywords28dRollup, topPages28dRollup, weeklyTotalsRollup };
@@ -1,14 +1,2 @@
1
- /**
2
- * Describes a hot/cold snapshot set. Produced by the snapshot builder,
3
- * consumed by `attachSnapshotIndex`. Filenames are derived from `cold`
4
- * via `cold-${yearMonth}.duckdb`; hot is always `hot.duckdb` when
5
- * `hot: true`.
6
- */
7
- interface SnapshotIndex {
8
- version: 1;
9
- builtAt: string;
10
- cold: string[];
11
- hot: boolean;
12
- hotDays: number;
13
- }
1
+ import { t as SnapshotIndex } from "./_chunks/snapshot.mjs";
14
2
  export { SnapshotIndex };
@@ -1,7 +1,7 @@
1
1
  import { F as TenantCtx, O as StorageEngine, T as Row } from "../_chunks/storage.mjs";
2
+ import { o as ResolverAdapter } from "../_chunks/types.mjs";
2
3
  import { n as AnalysisResult, t as AnalysisParams } from "../_chunks/analysis-types.mjs";
3
- import { i as QueryRow, r as FileSet, s as SqlQuerySource, t as AnalysisQuerySource } from "../_chunks/source-types.mjs";
4
- import { t as AnalyzerRegistry } from "../_chunks/registry.mjs";
4
+ import { C as ExecuteSqlOptions, E as SourceCapabilities, S as AnalysisSourceKind, T as QueryRow, t as AnalyzerRegistry, w as FileSet, x as AnalysisQuerySource } from "../_chunks/registry.mjs";
5
5
  import { PlannerCapabilities } from "gscdump/query/plan";
6
6
  import { BuilderState } from "gscdump/query";
7
7
  interface AttachedTableRunner {
@@ -28,6 +28,13 @@ interface AttachedTableSourceOptions {
28
28
  * paying the SQL execution cost. Omit to disable the check.
29
29
  */
30
30
  attachedTables?: readonly string[];
31
+ /**
32
+ * Dialect adapter surfaced on the source for analyzers that compose SQL
33
+ * from a `BuilderState` at plan-build time (e.g. `data-query`,
34
+ * `data-detail`). Attached-table sources execute pg-flavored DuckDB SQL,
35
+ * so callers should pass `pgResolverAdapter` here.
36
+ */
37
+ adapter?: ResolverAdapter<any>;
31
38
  }
32
39
  declare class AttachedTableMissingError extends Error {
33
40
  readonly missing: readonly string[];
@@ -39,6 +46,21 @@ declare class AttachedTableMissingError extends Error {
39
46
  */
40
47
  declare function rewriteForTableSource(sql: string, schema: string, fileSets: Record<string, FileSet>): string;
41
48
  declare function createAttachedTableSource(runner: AttachedTableRunner, options: AttachedTableSourceOptions): AnalysisQuerySource;
49
+ interface CreateSqlQuerySourceOptions<TKey extends string> {
50
+ /** Debug-only identifier surfaced on the source for error messages. */
51
+ name: string;
52
+ /** Telemetry tag stamped onto analyzer result meta. */
53
+ kind?: AnalysisSourceKind;
54
+ /** Dialect-specific adapter; compiles `BuilderState` → `{ sql, params }`. */
55
+ adapter: ResolverAdapter<TKey>;
56
+ /** Drives the underlying DB. Called for both typed queries and raw SQL. */
57
+ execute: (sql: string, params: unknown[]) => Promise<QueryRow[]>;
58
+ /** Tenant id for multi-tenant dialects; forwarded to `resolveToSQL`. */
59
+ siteId?: string | number;
60
+ /** Additional capability flags merged on top of `adapter.capabilities`. */
61
+ extraCapabilities?: Partial<SourceCapabilities>;
62
+ }
63
+ declare function createSqlQuerySource<TKey extends string>(options: CreateSqlQuerySourceOptions<TKey>): AnalysisQuerySource;
42
64
  /**
43
65
  * Capabilities the engine query path honors. Matches what the DuckDB compiler
44
66
  * passes to `buildLogicalPlan`: regex pushes down; comparison joins and
@@ -51,12 +73,12 @@ interface EngineQuerySourceOptions {
51
73
  ctx: TenantCtx;
52
74
  }
53
75
  /**
54
- * Wraps a storage engine as a `SqlQuerySource`. `queryRows` runs typed
55
- * builder-state queries; `executeSql` delegates to `engine.runSQL` and
56
- * requires `opts.fileSets` (with a `FILES` entry so the target table can be
57
- * resolved for partition lookup).
76
+ * Wraps a storage engine as an `AnalysisQuerySource` with SQL execution.
77
+ * `queryRows` runs typed builder-state queries; `executeSql` delegates to
78
+ * `engine.runSQL` and requires `opts.fileSets` (with a `FILES` entry so the
79
+ * target table can be resolved for partition lookup).
58
80
  */
59
- declare function createEngineQuerySource(options: EngineQuerySourceOptions): SqlQuerySource;
81
+ declare function createEngineQuerySource(options: EngineQuerySourceOptions): AnalysisQuerySource;
60
82
  /**
61
83
  * Convenience: wrap a storage engine + tenant ctx in a source and dispatch.
62
84
  * Equivalent to
@@ -75,4 +97,4 @@ declare function queryComparisonRows<TRow = QueryRow>(source: AnalysisQuerySourc
75
97
  current: TRow[];
76
98
  previous: TRow[];
77
99
  }>;
78
- export { AttachedTableMissingError, type AttachedTableRunner, type AttachedTableSourceOptions, ENGINE_QUERY_CAPABILITIES, EngineQuerySourceOptions, TypedQuery, createAttachedTableSource, createEngineQuerySource, queryComparisonRows, queryRows, rewriteForTableSource, runAnalyzerWithEngine, typedQuery };
100
+ export { type AnalysisQuerySource, type AnalysisSourceKind, AttachedTableMissingError, type AttachedTableRunner, type AttachedTableSourceOptions, type CreateSqlQuerySourceOptions, ENGINE_QUERY_CAPABILITIES, EngineQuerySourceOptions, type ExecuteSqlOptions, type FileSet, type QueryRow, type SourceCapabilities, TypedQuery, createAttachedTableSource, createEngineQuerySource, createSqlQuerySource, queryComparisonRows, queryRows, rewriteForTableSource, runAnalyzerWithEngine, typedQuery };