@ainyc/canonry 4.57.0 → 4.60.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. package/assets/agent-workspace/skills/aero/references/regression-playbook.md +13 -0
  2. package/assets/agent-workspace/skills/canonry/SKILL.md +7 -0
  3. package/assets/agent-workspace/skills/canonry/references/canonry-cli.md +45 -0
  4. package/assets/agent-workspace/skills/canonry/references/google-business-profile.md +257 -0
  5. package/assets/assets/BacklinksPage-CEL_F-c1.js +1 -0
  6. package/assets/assets/{ChartPrimitives-D7C1Cp8w.js → ChartPrimitives-B8Z6fjCY.js} +1 -1
  7. package/assets/assets/ProjectPage-ClmLZ1TC.js +6 -0
  8. package/assets/assets/{RunRow-BntNdrgM.js → RunRow-oSEdTjB1.js} +1 -1
  9. package/assets/assets/{RunsPage-Btp6qn10.js → RunsPage-k-TX3mbf.js} +1 -1
  10. package/assets/assets/{SettingsPage-DkyNiU2i.js → SettingsPage-VCOwDDIL.js} +1 -1
  11. package/assets/assets/{TrafficPage-CBl4Mwdc.js → TrafficPage-GgkjI5RL.js} +1 -1
  12. package/assets/assets/{TrafficSourceDetailPage-BZzuWCn-.js → TrafficSourceDetailPage-DhMF4157.js} +1 -1
  13. package/assets/assets/{extract-error-message-De8_qAzs.js → extract-error-message-CYGeEBx9.js} +1 -1
  14. package/assets/assets/{index-XUKhruAg.js → index-BdGCBd0G.js} +90 -90
  15. package/assets/assets/{index-9NRlymgj.css → index-bQ6xgP8c.css} +1 -1
  16. package/assets/assets/server-traffic-If2bUKTD.js +1 -0
  17. package/assets/assets/{trash-2-B5clF2rU.js → trash-2-NV5lw_nX.js} +1 -1
  18. package/assets/index.html +2 -2
  19. package/dist/{chunk-HL6JZUEW.js → chunk-3G3GAT3E.js} +30326 -33261
  20. package/dist/{chunk-6X5TF73A.js → chunk-CKWHFAVB.js} +414 -1
  21. package/dist/{chunk-WFVUZVJD.js → chunk-DXWUBWBD.js} +1427 -1191
  22. package/dist/chunk-X4ZZFZQZ.js +10210 -0
  23. package/dist/cli.js +618 -466
  24. package/dist/index.d.ts +2 -1
  25. package/dist/index.js +4 -4
  26. package/dist/{intelligence-service-NY3MAVPB.js → intelligence-service-UYVVKQ2K.js} +2 -2
  27. package/dist/mcp.js +10 -8
  28. package/package.json +8 -7
  29. package/assets/assets/BacklinksPage-CmeFZ8UJ.js +0 -1
  30. package/assets/assets/ProjectPage-Y6uCyjGb.js +0 -6
  31. package/assets/assets/server-traffic-bn9LSZN9.js +0 -1
  32. package/dist/chunk-4KWPOVIT.js +0 -4842
@@ -1,4842 +0,0 @@
1
- import {
2
- CitationStates,
3
- ContentActions,
4
- RunKinds,
5
- RunTriggers,
6
- __export,
7
- brandKeyFromText,
8
- brandLabelFromDomain,
9
- categorizeSourceWithCompetitors,
10
- categoryLabel,
11
- determineAnswerMentioned,
12
- effectiveDomains,
13
- normalizeProjectDomain,
14
- registrableDomain
15
- } from "./chunk-WFVUZVJD.js";
16
-
17
- // src/intelligence-service.ts
18
- import { eq, desc, asc, and, ne, or, inArray } from "drizzle-orm";
19
-
20
- // ../db/src/client.ts
21
- import { mkdirSync } from "fs";
22
- import { dirname } from "path";
23
- import Database from "better-sqlite3";
24
- import { drizzle } from "drizzle-orm/better-sqlite3";
25
-
26
- // ../db/src/schema.ts
27
- var schema_exports = {};
28
- __export(schema_exports, {
29
- agentMemory: () => agentMemory,
30
- agentSessions: () => agentSessions,
31
- aiReferralEventsHourly: () => aiReferralEventsHourly,
32
- aiUserFetchEventsHourly: () => aiUserFetchEventsHourly,
33
- apiKeys: () => apiKeys,
34
- auditLog: () => auditLog,
35
- backlinkDomains: () => backlinkDomains,
36
- backlinkSummaries: () => backlinkSummaries,
37
- bingConnections: () => bingConnections,
38
- bingCoverageSnapshots: () => bingCoverageSnapshots,
39
- bingKeywordStats: () => bingKeywordStats,
40
- bingUrlInspections: () => bingUrlInspections,
41
- ccReleaseSyncs: () => ccReleaseSyncs,
42
- competitors: () => competitors,
43
- contentTargetDismissals: () => contentTargetDismissals,
44
- crawlerEventsHourly: () => crawlerEventsHourly,
45
- discoveryProbes: () => discoveryProbes,
46
- discoverySessions: () => discoverySessions,
47
- gaAiReferrals: () => gaAiReferrals,
48
- gaConnections: () => gaConnections,
49
- gaSocialReferrals: () => gaSocialReferrals,
50
- gaTrafficSnapshots: () => gaTrafficSnapshots,
51
- gaTrafficSummaries: () => gaTrafficSummaries,
52
- gaTrafficWindowSummaries: () => gaTrafficWindowSummaries,
53
- googleConnections: () => googleConnections,
54
- gscCoverageSnapshots: () => gscCoverageSnapshots,
55
- gscSearchData: () => gscSearchData,
56
- gscUrlInspections: () => gscUrlInspections,
57
- healthSnapshots: () => healthSnapshots,
58
- insights: () => insights,
59
- migrationsTable: () => migrationsTable,
60
- notifications: () => notifications,
61
- projects: () => projects,
62
- queries: () => queries,
63
- querySnapshots: () => querySnapshots,
64
- rawEventSamples: () => rawEventSamples,
65
- recommendationExplanations: () => recommendationExplanations,
66
- runs: () => runs,
67
- schedules: () => schedules,
68
- trafficSources: () => trafficSources,
69
- usageCounters: () => usageCounters
70
- });
71
- import { index, integer, primaryKey, real, sqliteTable, text, uniqueIndex } from "drizzle-orm/sqlite-core";
72
- var projects = sqliteTable("projects", {
73
- id: text("id").primaryKey(),
74
- name: text("name").notNull().unique(),
75
- displayName: text("display_name").notNull(),
76
- canonicalDomain: text("canonical_domain").notNull(),
77
- ownedDomains: text("owned_domains", { mode: "json" }).$type().notNull().default([]),
78
- aliases: text("aliases", { mode: "json" }).$type().notNull().default([]),
79
- country: text("country").notNull(),
80
- language: text("language").notNull(),
81
- tags: text("tags", { mode: "json" }).$type().notNull().default([]),
82
- labels: text("labels", { mode: "json" }).$type().notNull().default({}),
83
- providers: text("providers", { mode: "json" }).$type().notNull().default([]),
84
- locations: text("locations", { mode: "json" }).$type().notNull().default([]),
85
- defaultLocation: text("default_location"),
86
- autoExtractBacklinks: integer("auto_extract_backlinks", { mode: "boolean" }).notNull().default(false),
87
- configSource: text("config_source").notNull().default("cli"),
88
- configRevision: integer("config_revision").notNull().default(1),
89
- icpDescription: text("icp_description"),
90
- createdAt: text("created_at").notNull(),
91
- updatedAt: text("updated_at").notNull()
92
- });
93
- var queries = sqliteTable("queries", {
94
- id: text("id").primaryKey(),
95
- projectId: text("project_id").notNull().references(() => projects.id, { onDelete: "cascade" }),
96
- query: text("query").notNull(),
97
- provenance: text("provenance"),
98
- createdAt: text("created_at").notNull()
99
- }, (table) => [
100
- index("idx_queries_project").on(table.projectId),
101
- uniqueIndex("idx_queries_project_query").on(table.projectId, table.query)
102
- ]);
103
- var competitors = sqliteTable("competitors", {
104
- id: text("id").primaryKey(),
105
- projectId: text("project_id").notNull().references(() => projects.id, { onDelete: "cascade" }),
106
- domain: text("domain").notNull(),
107
- provenance: text("provenance"),
108
- createdAt: text("created_at").notNull()
109
- }, (table) => [
110
- index("idx_competitors_project").on(table.projectId),
111
- uniqueIndex("idx_competitors_project_domain").on(table.projectId, table.domain)
112
- ]);
113
- var runs = sqliteTable("runs", {
114
- id: text("id").primaryKey(),
115
- projectId: text("project_id").notNull().references(() => projects.id, { onDelete: "cascade" }),
116
- kind: text("kind").notNull().default("answer-visibility"),
117
- status: text("status").notNull().default("queued"),
118
- trigger: text("trigger").notNull().default("manual"),
119
- location: text("location"),
120
- queries: text("queries", { mode: "json" }).$type(),
121
- sourceId: text("source_id"),
122
- startedAt: text("started_at"),
123
- finishedAt: text("finished_at"),
124
- error: text("error"),
125
- createdAt: text("created_at").notNull()
126
- }, (table) => [
127
- index("idx_runs_project").on(table.projectId),
128
- index("idx_runs_status").on(table.status),
129
- index("idx_runs_source").on(table.sourceId)
130
- ]);
131
- var querySnapshots = sqliteTable("query_snapshots", {
132
- id: text("id").primaryKey(),
133
- runId: text("run_id").notNull().references(() => runs.id, { onDelete: "cascade" }),
134
- // `query_id` is nullable + `ON DELETE SET NULL` so historical snapshots
135
- // outlive their queries row. Pre-v58 this FK cascaded — deleting a tracked
136
- // query (PUT /queries replace, individual delete, `canonry apply` dropping
137
- // one) silently wiped the entire citation history for that query. With SET
138
- // NULL the snapshot survives; `queryText` keeps it self-describing when
139
- // the queries row is gone.
140
- queryId: text("query_id").references(() => queries.id, { onDelete: "set null" }),
141
- queryText: text("query_text"),
142
- provider: text("provider").notNull().default("gemini"),
143
- model: text("model"),
144
- citationState: text("citation_state").notNull(),
145
- answerMentioned: integer("answer_mentioned", { mode: "boolean" }),
146
- answerText: text("answer_text"),
147
- citedDomains: text("cited_domains", { mode: "json" }).$type().notNull().default([]),
148
- competitorOverlap: text("competitor_overlap", { mode: "json" }).$type().notNull().default([]),
149
- recommendedCompetitors: text("recommended_competitors", { mode: "json" }).$type().notNull().default([]),
150
- location: text("location"),
151
- screenshotPath: text("screenshot_path"),
152
- rawResponse: text("raw_response"),
153
- createdAt: text("created_at").notNull()
154
- }, (table) => [
155
- index("idx_snapshots_run").on(table.runId),
156
- index("idx_snapshots_query").on(table.queryId),
157
- index("idx_snapshots_citation_state").on(table.citationState),
158
- index("idx_snapshots_provider_model").on(table.provider, table.model),
159
- index("idx_snapshots_location").on(table.location),
160
- index("idx_snapshots_created_at").on(table.createdAt)
161
- ]);
162
- var auditLog = sqliteTable("audit_log", {
163
- id: text("id").primaryKey(),
164
- // SET NULL (not CASCADE) so deleting a project preserves its audit trail.
165
- // The DELETE /projects route writes a "project.deleted" row immediately
166
- // before the delete — a CASCADE here would wipe that record before any
167
- // reader could see it (the deletion would erase the only evidence it
168
- // happened). Detached rows surface in audit queries with project_id=NULL.
169
- projectId: text("project_id").references(() => projects.id, { onDelete: "set null" }),
170
- // High-level identity of the caller: 'api' for HTTP requests, 'scheduler'
171
- // for cron-triggered work, 'cli' / 'agent' / 'mcp' for direct DB writes
172
- // (where applicable). Coarse on purpose — narrower attribution lives in
173
- // `userAgent` and `actorSession`.
174
- actor: text("actor").notNull(),
175
- action: text("action").notNull(),
176
- entityType: text("entity_type").notNull(),
177
- entityId: text("entity_id"),
178
- diff: text("diff"),
179
- // User-Agent header from the originating HTTP request, when available.
180
- // The narrowest reliable signal for "which client did this" — distinguishes
181
- // CLI (`canonry-cli/X.Y.Z`), dashboard (browser UA), MCP adapter, and
182
- // external scripts. NULL for non-HTTP writes (scheduler, run-coordinator,
183
- // direct CLI commands that bypass the API).
184
- userAgent: text("user_agent"),
185
- // Optional caller-supplied trace key for cross-request correlation —
186
- // a session ID, prompt ID, batch ID, etc. The Aero agent populates this
187
- // with its session id so post-mortems can group a related sequence of
188
- // mutations. NULL when the caller didn't provide one.
189
- actorSession: text("actor_session"),
190
- createdAt: text("created_at").notNull()
191
- }, (table) => [
192
- index("idx_audit_log_project").on(table.projectId),
193
- index("idx_audit_log_created").on(table.createdAt)
194
- ]);
195
- var apiKeys = sqliteTable("api_keys", {
196
- id: text("id").primaryKey(),
197
- name: text("name").notNull(),
198
- keyHash: text("key_hash").notNull().unique(),
199
- keyPrefix: text("key_prefix").notNull(),
200
- scopes: text("scopes", { mode: "json" }).$type().notNull().default(["*"]),
201
- createdAt: text("created_at").notNull(),
202
- lastUsedAt: text("last_used_at"),
203
- revokedAt: text("revoked_at")
204
- }, (table) => [
205
- index("idx_api_keys_prefix").on(table.keyPrefix)
206
- ]);
207
- var schedules = sqliteTable("schedules", {
208
- id: text("id").primaryKey(),
209
- projectId: text("project_id").notNull().references(() => projects.id, { onDelete: "cascade" }),
210
- // Run kind dispatched by this schedule. Must be a value of `RunKinds` —
211
- // currently 'answer-visibility' and 'traffic-sync' are user-facing schedulable kinds.
212
- // Defaults to 'answer-visibility' for backward compatibility with rows
213
- // created before migration 53.
214
- kind: text("kind").notNull().default("answer-visibility"),
215
- cronExpr: text("cron_expr").notNull(),
216
- preset: text("preset"),
217
- timezone: text("timezone").notNull().default("UTC"),
218
- enabled: integer("enabled", { mode: "boolean" }).notNull().default(true),
219
- providers: text("providers", { mode: "json" }).$type().notNull().default([]),
220
- /** Optional traffic-source UUID for traffic-sync schedules. Null for other kinds. */
221
- sourceId: text("source_id"),
222
- lastRunAt: text("last_run_at"),
223
- nextRunAt: text("next_run_at"),
224
- createdAt: text("created_at").notNull(),
225
- updatedAt: text("updated_at").notNull()
226
- }, (table) => [
227
- uniqueIndex("idx_schedules_project_kind").on(table.projectId, table.kind)
228
- ]);
229
- var notifications = sqliteTable("notifications", {
230
- id: text("id").primaryKey(),
231
- projectId: text("project_id").notNull().references(() => projects.id, { onDelete: "cascade" }),
232
- channel: text("channel").notNull(),
233
- config: text("config", { mode: "json" }).$type().notNull(),
234
- webhookSecret: text("webhook_secret"),
235
- enabled: integer("enabled", { mode: "boolean" }).notNull().default(true),
236
- createdAt: text("created_at").notNull(),
237
- updatedAt: text("updated_at").notNull()
238
- }, (table) => [
239
- index("idx_notifications_project").on(table.projectId)
240
- ]);
241
- var googleConnections = sqliteTable("google_connections", {
242
- id: text("id").primaryKey(),
243
- domain: text("domain").notNull(),
244
- connectionType: text("connection_type").notNull(),
245
- propertyId: text("property_id"),
246
- sitemapUrl: text("sitemap_url"),
247
- scopes: text("scopes", { mode: "json" }).$type().notNull().default([]),
248
- // The project that established this connection. Used by the OAuth callback
249
- // and the DELETE route to refuse cross-project takeover (a malicious caller
250
- // who points another project at the same `canonicalDomain` cannot overwrite
251
- // or remove an existing connection owned by the original project). Nullable
252
- // for legacy rows written before the column existed — those are treated as
253
- // unowned and the first connect call to claim them succeeds. See root
254
- // AGENTS.md "Deployment Posture" for the broader multi-tenancy posture.
255
- createdByProjectId: text("created_by_project_id").references(() => projects.id, { onDelete: "set null" }),
256
- createdAt: text("created_at").notNull(),
257
- updatedAt: text("updated_at").notNull()
258
- }, (table) => [
259
- uniqueIndex("idx_google_conn_domain_type").on(table.domain, table.connectionType),
260
- index("idx_google_conn_project").on(table.createdByProjectId)
261
- ]);
262
- var gscSearchData = sqliteTable("gsc_search_data", {
263
- id: text("id").primaryKey(),
264
- projectId: text("project_id").notNull().references(() => projects.id, { onDelete: "cascade" }),
265
- syncRunId: text("sync_run_id").notNull().references(() => runs.id, { onDelete: "cascade" }),
266
- date: text("date").notNull(),
267
- query: text("query").notNull(),
268
- page: text("page").notNull(),
269
- country: text("country"),
270
- device: text("device"),
271
- clicks: integer("clicks").notNull().default(0),
272
- impressions: integer("impressions").notNull().default(0),
273
- ctr: text("ctr").notNull().default("0"),
274
- position: text("position").notNull().default("0"),
275
- createdAt: text("created_at").notNull()
276
- }, (table) => [
277
- index("idx_gsc_search_project_date").on(table.projectId, table.date),
278
- index("idx_gsc_search_query").on(table.query),
279
- index("idx_gsc_search_run").on(table.syncRunId)
280
- ]);
281
- var gscUrlInspections = sqliteTable("gsc_url_inspections", {
282
- id: text("id").primaryKey(),
283
- projectId: text("project_id").notNull().references(() => projects.id, { onDelete: "cascade" }),
284
- syncRunId: text("sync_run_id").references(() => runs.id, { onDelete: "cascade" }),
285
- url: text("url").notNull(),
286
- indexingState: text("indexing_state"),
287
- verdict: text("verdict"),
288
- coverageState: text("coverage_state"),
289
- pageFetchState: text("page_fetch_state"),
290
- robotsTxtState: text("robots_txt_state"),
291
- crawlTime: text("crawl_time"),
292
- lastCrawlResult: text("last_crawl_result"),
293
- isMobileFriendly: integer("is_mobile_friendly", { mode: "boolean" }),
294
- richResults: text("rich_results", { mode: "json" }).$type().notNull().default([]),
295
- referringUrls: text("referring_urls", { mode: "json" }).$type().notNull().default([]),
296
- inspectedAt: text("inspected_at").notNull(),
297
- createdAt: text("created_at").notNull()
298
- }, (table) => [
299
- index("idx_gsc_inspect_project_url").on(table.projectId, table.url),
300
- index("idx_gsc_inspect_run").on(table.syncRunId),
301
- index("idx_gsc_inspect_url_time").on(table.url, table.inspectedAt)
302
- ]);
303
- var gscCoverageSnapshots = sqliteTable("gsc_coverage_snapshots", {
304
- id: text("id").primaryKey(),
305
- projectId: text("project_id").notNull().references(() => projects.id, { onDelete: "cascade" }),
306
- syncRunId: text("sync_run_id").references(() => runs.id, { onDelete: "cascade" }),
307
- date: text("date").notNull(),
308
- indexed: integer("indexed").notNull().default(0),
309
- notIndexed: integer("not_indexed").notNull().default(0),
310
- reasonBreakdown: text("reason_breakdown", { mode: "json" }).$type().notNull().default({}),
311
- createdAt: text("created_at").notNull()
312
- }, (table) => [
313
- index("idx_gsc_coverage_snap_project_date").on(table.projectId, table.date),
314
- index("idx_gsc_coverage_snap_run").on(table.syncRunId)
315
- ]);
316
- var bingCoverageSnapshots = sqliteTable("bing_coverage_snapshots", {
317
- id: text("id").primaryKey(),
318
- projectId: text("project_id").notNull().references(() => projects.id, { onDelete: "cascade" }),
319
- syncRunId: text("sync_run_id").references(() => runs.id, { onDelete: "cascade" }),
320
- date: text("date").notNull(),
321
- indexed: integer("indexed").notNull().default(0),
322
- notIndexed: integer("not_indexed").notNull().default(0),
323
- unknown: integer("unknown").notNull().default(0),
324
- createdAt: text("created_at").notNull()
325
- }, (table) => [
326
- uniqueIndex("idx_bing_coverage_snap_project_date_unique").on(table.projectId, table.date),
327
- index("idx_bing_coverage_snap_run").on(table.syncRunId)
328
- ]);
329
- var bingConnections = sqliteTable("bing_connections", {
330
- id: text("id").primaryKey(),
331
- domain: text("domain").notNull(),
332
- siteUrl: text("site_url"),
333
- // Same takeover-prevention column as `google_connections.createdByProjectId`.
334
- // The Bing connect / disconnect routes refuse cross-project writes when an
335
- // existing row's owner doesn't match. Null for legacy rows (treated as
336
- // unowned).
337
- createdByProjectId: text("created_by_project_id").references(() => projects.id, { onDelete: "set null" }),
338
- createdAt: text("created_at").notNull(),
339
- updatedAt: text("updated_at").notNull()
340
- }, (table) => [
341
- uniqueIndex("idx_bing_conn_domain").on(table.domain),
342
- index("idx_bing_conn_project").on(table.createdByProjectId)
343
- ]);
344
- var bingUrlInspections = sqliteTable("bing_url_inspections", {
345
- id: text("id").primaryKey(),
346
- projectId: text("project_id").notNull().references(() => projects.id, { onDelete: "cascade" }),
347
- url: text("url").notNull(),
348
- httpCode: integer("http_code"),
349
- inIndex: integer("in_index", { mode: "boolean" }),
350
- lastCrawledDate: text("last_crawled_date"),
351
- inIndexDate: text("in_index_date"),
352
- inspectedAt: text("inspected_at").notNull(),
353
- syncRunId: text("sync_run_id").references(() => runs.id, { onDelete: "cascade" }),
354
- createdAt: text("created_at").notNull(),
355
- documentSize: integer("document_size"),
356
- anchorCount: integer("anchor_count"),
357
- discoveryDate: text("discovery_date")
358
- }, (table) => [
359
- index("idx_bing_inspect_project_url").on(table.projectId, table.url),
360
- index("idx_bing_inspect_url_time").on(table.url, table.inspectedAt),
361
- index("idx_bing_inspect_run").on(table.syncRunId)
362
- ]);
363
- var bingKeywordStats = sqliteTable("bing_keyword_stats", {
364
- id: text("id").primaryKey(),
365
- projectId: text("project_id").notNull().references(() => projects.id, { onDelete: "cascade" }),
366
- query: text("query").notNull(),
367
- impressions: integer("impressions").notNull().default(0),
368
- clicks: integer("clicks").notNull().default(0),
369
- ctr: text("ctr").notNull().default("0"),
370
- averagePosition: text("average_position").notNull().default("0"),
371
- syncedAt: text("synced_at").notNull(),
372
- createdAt: text("created_at").notNull()
373
- }, (table) => [
374
- index("idx_bing_keyword_project").on(table.projectId),
375
- index("idx_bing_keyword_query").on(table.query)
376
- ]);
377
- var gaConnections = sqliteTable("ga_connections", {
378
- id: text("id").primaryKey(),
379
- projectId: text("project_id").notNull().references(() => projects.id, { onDelete: "cascade" }),
380
- propertyId: text("property_id").notNull(),
381
- clientEmail: text("client_email").notNull(),
382
- createdAt: text("created_at").notNull(),
383
- updatedAt: text("updated_at").notNull()
384
- }, (table) => [
385
- uniqueIndex("idx_ga_conn_project").on(table.projectId)
386
- ]);
387
- var gaTrafficSnapshots = sqliteTable("ga_traffic_snapshots", {
388
- id: text("id").primaryKey(),
389
- projectId: text("project_id").notNull().references(() => projects.id, { onDelete: "cascade" }),
390
- date: text("date").notNull(),
391
- landingPage: text("landing_page").notNull(),
392
- /**
393
- * Canonicalized form of `landingPage` produced by `normalizeUrlPath()` in
394
- * `@ainyc/canonry-contracts`. Nullable so existing rows survive migration;
395
- * new GA4 sync writes populate it. Per-page aggregations should
396
- * `GROUP BY COALESCE(landing_page_normalized, landing_page)` so
397
- * partially-backfilled state still aggregates correctly.
398
- */
399
- landingPageNormalized: text("landing_page_normalized"),
400
- sessions: integer("sessions").notNull().default(0),
401
- organicSessions: integer("organic_sessions").notNull().default(0),
402
- /**
403
- * Per-page Direct channel sessions. Nullable so existing rows survive
404
- * the migration; new GA4 sync writes populate it. Distinct from
405
- * `sessions - organicSessions` because that residual lumps Direct
406
- * together with social, referral, paid, and email.
407
- */
408
- directSessions: integer("direct_sessions"),
409
- users: integer("users").notNull().default(0),
410
- syncedAt: text("synced_at").notNull(),
411
- syncRunId: text("sync_run_id").references(() => runs.id, { onDelete: "cascade" })
412
- }, (table) => [
413
- index("idx_ga_traffic_project_date").on(table.projectId, table.date),
414
- index("idx_ga_traffic_page").on(table.landingPage),
415
- index("idx_ga_traffic_page_normalized").on(table.projectId, table.date, table.landingPageNormalized),
416
- index("idx_ga_traffic_run").on(table.syncRunId)
417
- ]);
418
- var gaAiReferrals = sqliteTable("ga_ai_referrals", {
419
- id: text("id").primaryKey(),
420
- projectId: text("project_id").notNull().references(() => projects.id, { onDelete: "cascade" }),
421
- date: text("date").notNull(),
422
- source: text("source").notNull(),
423
- medium: text("medium").notNull(),
424
- /** Which GA4 dimension produced this row: 'session' | 'first_user' | 'manual_utm' */
425
- sourceDimension: text("source_dimension").notNull().default("session"),
426
- /** GA4 default channel group for the session (e.g. 'Referral', 'Organic Social'). */
427
- channelGroup: text("channel_group").notNull().default("(not set)"),
428
- landingPage: text("landing_page").notNull().default("(not set)"),
429
- landingPageNormalized: text("landing_page_normalized"),
430
- sessions: integer("sessions").notNull().default(0),
431
- users: integer("users").notNull().default(0),
432
- syncedAt: text("synced_at").notNull(),
433
- syncRunId: text("sync_run_id").references(() => runs.id, { onDelete: "cascade" })
434
- }, (table) => [
435
- index("idx_ga_ai_ref_project_date").on(table.projectId, table.date),
436
- index("idx_ga_ai_ref_source").on(table.source),
437
- index("idx_ga_ai_ref_landing_page").on(table.projectId, table.date, table.landingPageNormalized),
438
- uniqueIndex("idx_ga_ai_ref_unique_v4").on(table.projectId, table.date, table.source, table.medium, table.sourceDimension, table.channelGroup, table.landingPage),
439
- index("idx_ga_ai_ref_run").on(table.syncRunId)
440
- ]);
441
- var gaSocialReferrals = sqliteTable("ga_social_referrals", {
442
- id: text("id").primaryKey(),
443
- projectId: text("project_id").notNull().references(() => projects.id, { onDelete: "cascade" }),
444
- date: text("date").notNull(),
445
- source: text("source").notNull(),
446
- medium: text("medium").notNull(),
447
- /** GA4 default channel group (e.g. 'Organic Social', 'Paid Social') */
448
- channelGroup: text("channel_group").notNull().default("Organic Social"),
449
- sessions: integer("sessions").notNull().default(0),
450
- users: integer("users").notNull().default(0),
451
- syncedAt: text("synced_at").notNull(),
452
- syncRunId: text("sync_run_id").references(() => runs.id, { onDelete: "cascade" })
453
- }, (table) => [
454
- index("idx_ga_social_ref_project_date").on(table.projectId, table.date),
455
- index("idx_ga_social_ref_source").on(table.source),
456
- uniqueIndex("idx_ga_social_ref_unique").on(table.projectId, table.date, table.source, table.medium, table.channelGroup),
457
- index("idx_ga_social_ref_run").on(table.syncRunId)
458
- ]);
459
- var gaTrafficSummaries = sqliteTable("ga_traffic_summaries", {
460
- id: text("id").primaryKey(),
461
- projectId: text("project_id").notNull().references(() => projects.id, { onDelete: "cascade" }),
462
- periodStart: text("period_start").notNull(),
463
- periodEnd: text("period_end").notNull(),
464
- totalSessions: integer("total_sessions").notNull().default(0),
465
- totalOrganicSessions: integer("total_organic_sessions").notNull().default(0),
466
- totalUsers: integer("total_users").notNull().default(0),
467
- syncedAt: text("synced_at").notNull(),
468
- syncRunId: text("sync_run_id").references(() => runs.id, { onDelete: "cascade" })
469
- }, (table) => [
470
- index("idx_ga_summary_project").on(table.projectId),
471
- index("idx_ga_summary_run").on(table.syncRunId)
472
- ]);
473
- var gaTrafficWindowSummaries = sqliteTable("ga_traffic_window_summaries", {
474
- id: text("id").primaryKey(),
475
- projectId: text("project_id").notNull().references(() => projects.id, { onDelete: "cascade" }),
476
- /** '7d' | '30d' | '90d' */
477
- windowKey: text("window_key").notNull(),
478
- periodStart: text("period_start").notNull(),
479
- periodEnd: text("period_end").notNull(),
480
- totalSessions: integer("total_sessions").notNull().default(0),
481
- totalOrganicSessions: integer("total_organic_sessions").notNull().default(0),
482
- totalDirectSessions: integer("total_direct_sessions").notNull().default(0),
483
- totalUsers: integer("total_users").notNull().default(0),
484
- syncedAt: text("synced_at").notNull(),
485
- syncRunId: text("sync_run_id").references(() => runs.id, { onDelete: "cascade" })
486
- }, (table) => [
487
- uniqueIndex("idx_ga_window_summary_unique").on(table.projectId, table.windowKey),
488
- index("idx_ga_window_summary_run").on(table.syncRunId)
489
- ]);
490
- var usageCounters = sqliteTable("usage_counters", {
491
- id: text("id").primaryKey(),
492
- scope: text("scope").notNull(),
493
- period: text("period").notNull(),
494
- metric: text("metric").notNull(),
495
- count: integer("count").notNull().default(0),
496
- updatedAt: text("updated_at").notNull()
497
- }, (table) => [
498
- uniqueIndex("idx_usage_scope_period_metric").on(table.scope, table.period, table.metric),
499
- index("idx_usage_scope_period").on(table.scope, table.period)
500
- ]);
501
- var insights = sqliteTable("insights", {
502
- id: text("id").primaryKey(),
503
- projectId: text("project_id").notNull().references(() => projects.id, { onDelete: "cascade" }),
504
- runId: text("run_id").references(() => runs.id, { onDelete: "cascade" }),
505
- type: text("type").notNull(),
506
- severity: text("severity").notNull(),
507
- title: text("title").notNull(),
508
- query: text("query").notNull(),
509
- provider: text("provider").notNull(),
510
- recommendation: text("recommendation", { mode: "json" }).$type(),
511
- cause: text("cause", { mode: "json" }).$type(),
512
- dismissed: integer("dismissed", { mode: "boolean" }).notNull().default(false),
513
- createdAt: text("created_at").notNull()
514
- }, (table) => [
515
- index("idx_insights_project").on(table.projectId),
516
- index("idx_insights_run").on(table.runId),
517
- index("idx_insights_created").on(table.createdAt),
518
- index("idx_insights_query_provider").on(table.query, table.provider)
519
- ]);
520
- var healthSnapshots = sqliteTable("health_snapshots", {
521
- id: text("id").primaryKey(),
522
- projectId: text("project_id").notNull().references(() => projects.id, { onDelete: "cascade" }),
523
- runId: text("run_id").references(() => runs.id, { onDelete: "cascade" }),
524
- overallCitedRate: text("overall_cited_rate").notNull(),
525
- totalPairs: integer("total_pairs").notNull(),
526
- citedPairs: integer("cited_pairs").notNull(),
527
- providerBreakdown: text("provider_breakdown", { mode: "json" }).$type().notNull().default({}),
528
- createdAt: text("created_at").notNull()
529
- }, (table) => [
530
- index("idx_health_snapshots_project").on(table.projectId),
531
- index("idx_health_snapshots_run").on(table.runId),
532
- index("idx_health_snapshots_created").on(table.createdAt)
533
- ]);
534
- var agentSessions = sqliteTable("agent_sessions", {
535
- id: text("id").primaryKey(),
536
- projectId: text("project_id").notNull().unique().references(() => projects.id, { onDelete: "cascade" }),
537
- systemPrompt: text("system_prompt").notNull(),
538
- modelProvider: text("model_provider").notNull(),
539
- modelId: text("model_id").notNull(),
540
- messages: text("messages").notNull().default("[]"),
541
- followUpQueue: text("follow_up_queue").notNull().default("[]"),
542
- createdAt: text("created_at").notNull(),
543
- updatedAt: text("updated_at").notNull()
544
- }, (table) => [
545
- index("idx_agent_sessions_project").on(table.projectId),
546
- index("idx_agent_sessions_updated").on(table.updatedAt)
547
- ]);
548
- var ccReleaseSyncs = sqliteTable("cc_release_syncs", {
549
- id: text("id").primaryKey(),
550
- release: text("release").notNull().unique(),
551
- status: text("status").notNull(),
552
- phaseDetail: text("phase_detail"),
553
- vertexPath: text("vertex_path"),
554
- edgesPath: text("edges_path"),
555
- vertexSha256: text("vertex_sha256"),
556
- edgesSha256: text("edges_sha256"),
557
- vertexBytes: integer("vertex_bytes"),
558
- edgesBytes: integer("edges_bytes"),
559
- projectsProcessed: integer("projects_processed"),
560
- domainsDiscovered: integer("domains_discovered"),
561
- downloadStartedAt: text("download_started_at"),
562
- downloadFinishedAt: text("download_finished_at"),
563
- queryStartedAt: text("query_started_at"),
564
- queryFinishedAt: text("query_finished_at"),
565
- error: text("error"),
566
- createdAt: text("created_at").notNull(),
567
- updatedAt: text("updated_at").notNull()
568
- }, (table) => [
569
- index("idx_cc_release_syncs_status").on(table.status)
570
- ]);
571
- var backlinkDomains = sqliteTable("backlink_domains", {
572
- id: text("id").primaryKey(),
573
- projectId: text("project_id").notNull().references(() => projects.id, { onDelete: "cascade" }),
574
- releaseSyncId: text("release_sync_id").notNull().references(() => ccReleaseSyncs.id, { onDelete: "cascade" }),
575
- release: text("release").notNull(),
576
- targetDomain: text("target_domain").notNull(),
577
- linkingDomain: text("linking_domain").notNull(),
578
- numHosts: integer("num_hosts").notNull(),
579
- createdAt: text("created_at").notNull()
580
- }, (table) => [
581
- index("idx_backlink_domains_project").on(table.projectId),
582
- index("idx_backlink_domains_release_sync").on(table.releaseSyncId),
583
- index("idx_backlink_domains_project_release").on(table.projectId, table.release),
584
- index("idx_backlink_domains_hosts").on(table.numHosts),
585
- uniqueIndex("idx_backlink_domains_unique").on(table.projectId, table.release, table.linkingDomain)
586
- ]);
587
- var backlinkSummaries = sqliteTable("backlink_summaries", {
588
- id: text("id").primaryKey(),
589
- projectId: text("project_id").notNull().references(() => projects.id, { onDelete: "cascade" }),
590
- releaseSyncId: text("release_sync_id").notNull().references(() => ccReleaseSyncs.id, { onDelete: "cascade" }),
591
- release: text("release").notNull(),
592
- targetDomain: text("target_domain").notNull(),
593
- totalLinkingDomains: integer("total_linking_domains").notNull(),
594
- totalHosts: integer("total_hosts").notNull(),
595
- top10HostsShare: text("top_10_hosts_share").notNull(),
596
- queriedAt: text("queried_at").notNull(),
597
- createdAt: text("created_at").notNull()
598
- }, (table) => [
599
- uniqueIndex("idx_backlink_summaries_project_release").on(table.projectId, table.release),
600
- index("idx_backlink_summaries_project").on(table.projectId)
601
- ]);
602
- var agentMemory = sqliteTable("agent_memory", {
603
- id: text("id").primaryKey(),
604
- projectId: text("project_id").notNull().references(() => projects.id, { onDelete: "cascade" }),
605
- key: text("key").notNull(),
606
- value: text("value").notNull(),
607
- source: text("source").notNull(),
608
- createdAt: text("created_at").notNull(),
609
- updatedAt: text("updated_at").notNull()
610
- }, (table) => [
611
- uniqueIndex("uniq_agent_memory_project_key").on(table.projectId, table.key),
612
- index("idx_agent_memory_project_updated").on(table.projectId, table.updatedAt)
613
- ]);
614
- var trafficSources = sqliteTable("traffic_sources", {
615
- id: text("id").primaryKey(),
616
- projectId: text("project_id").notNull().references(() => projects.id, { onDelete: "cascade" }),
617
- sourceType: text("source_type").notNull(),
618
- displayName: text("display_name").notNull(),
619
- status: text("status").notNull(),
620
- lastSyncedAt: text("last_synced_at"),
621
- lastCursor: text("last_cursor"),
622
- lastError: text("last_error"),
623
- // JSON-encoded array of normalized event IDs (e.g. `cloud-run:<ts>:<insertId>`)
624
- // observed in the most recent successful sync. Bounded ring buffer used to
625
- // dedupe across sync runs at the boundary timestamp where lastSyncedAt
626
- // clamping alone leaves a small overlap window.
627
- lastEventIds: text("last_event_ids", { mode: "json" }).$type(),
628
- archivedAt: text("archived_at"),
629
- configJson: text("config_json", { mode: "json" }).$type().notNull().default({}),
630
- createdAt: text("created_at").notNull(),
631
- updatedAt: text("updated_at").notNull()
632
- }, (table) => [
633
- index("idx_traffic_sources_project").on(table.projectId),
634
- index("idx_traffic_sources_project_status").on(table.projectId, table.status)
635
- ]);
636
- var crawlerEventsHourly = sqliteTable("crawler_events_hourly", {
637
- projectId: text("project_id").notNull().references(() => projects.id, { onDelete: "cascade" }),
638
- sourceId: text("source_id").notNull().references(() => trafficSources.id, { onDelete: "cascade" }),
639
- tsHour: text("ts_hour").notNull(),
640
- botId: text("bot_id").notNull(),
641
- operator: text("operator").notNull(),
642
- verificationStatus: text("verification_status").notNull(),
643
- pathNormalized: text("path_normalized").notNull(),
644
- status: integer("status").notNull(),
645
- hits: integer("hits").notNull().default(0),
646
- sampledUserAgent: text("sampled_user_agent"),
647
- createdAt: text("created_at").notNull(),
648
- updatedAt: text("updated_at").notNull()
649
- }, (table) => [
650
- primaryKey({
651
- columns: [
652
- table.projectId,
653
- table.sourceId,
654
- table.tsHour,
655
- table.botId,
656
- table.verificationStatus,
657
- table.pathNormalized,
658
- table.status
659
- ]
660
- }),
661
- index("idx_crawler_hourly_project_ts").on(table.projectId, table.tsHour),
662
- index("idx_crawler_hourly_path").on(table.projectId, table.pathNormalized)
663
- ]);
664
- var aiUserFetchEventsHourly = sqliteTable("ai_user_fetch_events_hourly", {
665
- projectId: text("project_id").notNull().references(() => projects.id, { onDelete: "cascade" }),
666
- sourceId: text("source_id").notNull().references(() => trafficSources.id, { onDelete: "cascade" }),
667
- tsHour: text("ts_hour").notNull(),
668
- botId: text("bot_id").notNull(),
669
- operator: text("operator").notNull(),
670
- verificationStatus: text("verification_status").notNull(),
671
- pathNormalized: text("path_normalized").notNull(),
672
- status: integer("status").notNull(),
673
- hits: integer("hits").notNull().default(0),
674
- sampledUserAgent: text("sampled_user_agent"),
675
- createdAt: text("created_at").notNull(),
676
- updatedAt: text("updated_at").notNull()
677
- }, (table) => [
678
- primaryKey({
679
- columns: [
680
- table.projectId,
681
- table.sourceId,
682
- table.tsHour,
683
- table.botId,
684
- table.verificationStatus,
685
- table.pathNormalized,
686
- table.status
687
- ]
688
- }),
689
- index("idx_ai_user_fetch_hourly_project_ts").on(table.projectId, table.tsHour),
690
- index("idx_ai_user_fetch_hourly_path").on(table.projectId, table.pathNormalized)
691
- ]);
692
- var aiReferralEventsHourly = sqliteTable("ai_referral_events_hourly", {
693
- projectId: text("project_id").notNull().references(() => projects.id, { onDelete: "cascade" }),
694
- sourceId: text("source_id").notNull().references(() => trafficSources.id, { onDelete: "cascade" }),
695
- tsHour: text("ts_hour").notNull(),
696
- product: text("product").notNull(),
697
- operator: text("operator").notNull(),
698
- sourceDomain: text("source_domain").notNull(),
699
- evidenceType: text("evidence_type").notNull(),
700
- landingPathNormalized: text("landing_path_normalized").notNull(),
701
- status: integer("status").notNull(),
702
- sessionsOrHits: integer("sessions_or_hits").notNull().default(0),
703
- usersEstimated: integer("users_estimated"),
704
- createdAt: text("created_at").notNull(),
705
- updatedAt: text("updated_at").notNull()
706
- }, (table) => [
707
- primaryKey({
708
- columns: [
709
- table.projectId,
710
- table.sourceId,
711
- table.tsHour,
712
- table.product,
713
- table.sourceDomain,
714
- table.evidenceType,
715
- table.landingPathNormalized,
716
- table.status
717
- ]
718
- }),
719
- index("idx_ai_referral_hourly_project_ts").on(table.projectId, table.tsHour),
720
- index("idx_ai_referral_hourly_landing").on(table.projectId, table.landingPathNormalized)
721
- ]);
722
- var rawEventSamples = sqliteTable("raw_event_samples", {
723
- id: text("id").primaryKey(),
724
- projectId: text("project_id").notNull().references(() => projects.id, { onDelete: "cascade" }),
725
- sourceId: text("source_id").notNull().references(() => trafficSources.id, { onDelete: "cascade" }),
726
- ts: text("ts").notNull(),
727
- eventType: text("event_type").notNull(),
728
- ipHash: text("ip_hash"),
729
- userAgent: text("user_agent"),
730
- pathNormalized: text("path_normalized").notNull(),
731
- status: integer("status"),
732
- refererHost: text("referer_host"),
733
- classifierDetailsJson: text("classifier_details_json", { mode: "json" }).$type().notNull().default({}),
734
- createdAt: text("created_at").notNull()
735
- }, (table) => [
736
- index("idx_raw_event_samples_project_ts").on(table.projectId, table.ts),
737
- index("idx_raw_event_samples_source_ts").on(table.sourceId, table.ts),
738
- index("idx_raw_event_samples_event_type").on(table.eventType)
739
- ]);
740
- var discoverySessions = sqliteTable("discovery_sessions", {
741
- id: text("id").primaryKey(),
742
- projectId: text("project_id").notNull().references(() => projects.id, { onDelete: "cascade" }),
743
- runId: text("run_id"),
744
- status: text("status").notNull().default("queued"),
745
- icpDescription: text("icp_description"),
746
- seedProvider: text("seed_provider"),
747
- seedCountRaw: integer("seed_count_raw"),
748
- seedCount: integer("seed_count"),
749
- dedupThreshold: real("dedup_threshold"),
750
- probeCount: integer("probe_count"),
751
- citedCount: integer("cited_count"),
752
- aspirationalCount: integer("aspirational_count"),
753
- wastedCount: integer("wasted_count"),
754
- competitorMap: text("competitor_map", { mode: "json" }).$type().notNull().default([]),
755
- error: text("error"),
756
- startedAt: text("started_at"),
757
- finishedAt: text("finished_at"),
758
- createdAt: text("created_at").notNull()
759
- }, (table) => [
760
- index("idx_discovery_sessions_project_created").on(table.projectId, table.createdAt),
761
- index("idx_discovery_sessions_run").on(table.runId)
762
- ]);
763
- var discoveryProbes = sqliteTable("discovery_probes", {
764
- id: text("id").primaryKey(),
765
- sessionId: text("session_id").notNull().references(() => discoverySessions.id, { onDelete: "cascade" }),
766
- projectId: text("project_id").notNull().references(() => projects.id, { onDelete: "cascade" }),
767
- query: text("query").notNull(),
768
- bucket: text("bucket"),
769
- citationState: text("citation_state").notNull(),
770
- citedDomains: text("cited_domains", { mode: "json" }).$type().notNull().default([]),
771
- rawResponse: text("raw_response"),
772
- createdAt: text("created_at").notNull()
773
- }, (table) => [
774
- index("idx_discovery_probes_session").on(table.sessionId),
775
- index("idx_discovery_probes_project").on(table.projectId)
776
- ]);
777
- var contentTargetDismissals = sqliteTable("content_target_dismissals", {
778
- id: text("id").primaryKey(),
779
- projectId: text("project_id").notNull().references(() => projects.id, { onDelete: "cascade" }),
780
- targetRef: text("target_ref").notNull(),
781
- addressedUrl: text("addressed_url"),
782
- note: text("note"),
783
- dismissedAt: text("dismissed_at").notNull()
784
- }, (table) => [
785
- uniqueIndex("idx_content_target_dismissals_project_ref").on(table.projectId, table.targetRef),
786
- index("idx_content_target_dismissals_project").on(table.projectId)
787
- ]);
788
- var recommendationExplanations = sqliteTable("recommendation_explanations", {
789
- id: text("id").primaryKey(),
790
- projectId: text("project_id").notNull().references(() => projects.id, { onDelete: "cascade" }),
791
- targetRef: text("target_ref").notNull(),
792
- promptVersion: text("prompt_version").notNull(),
793
- provider: text("provider").notNull(),
794
- model: text("model").notNull(),
795
- responseText: text("response_text").notNull(),
796
- /** Estimated cost in millicents (1/100 of a cent) for audit; 0 if unknown. */
797
- costMillicents: integer("cost_millicents").notNull().default(0),
798
- generatedAt: text("generated_at").notNull()
799
- }, (table) => [
800
- uniqueIndex("idx_recommendation_explanations_unique").on(
801
- table.projectId,
802
- table.targetRef,
803
- table.promptVersion
804
- ),
805
- index("idx_recommendation_explanations_project").on(table.projectId)
806
- ]);
807
- var migrationsTable = sqliteTable("_migrations", {
808
- version: integer("version").primaryKey(),
809
- name: text("name").notNull(),
810
- appliedAt: text("applied_at").notNull()
811
- });
812
-
813
- // ../db/src/client.ts
814
- function createClient(databasePath) {
815
- mkdirSync(dirname(databasePath), { recursive: true });
816
- const sqlite = new Database(databasePath);
817
- sqlite.pragma("journal_mode = WAL");
818
- sqlite.pragma("foreign_keys = ON");
819
- sqlite.pragma("busy_timeout = 5000");
820
- return drizzle(sqlite, { schema: schema_exports });
821
- }
822
-
823
- // ../db/src/json.ts
824
- function parseJsonColumn(value, fallback) {
825
- if (value == null || value === "") return fallback;
826
- try {
827
- return JSON.parse(value);
828
- } catch {
829
- return fallback;
830
- }
831
- }
832
-
833
- // ../db/src/migrate.ts
834
- import { sql } from "drizzle-orm";
835
- var MIGRATION_SQL = `
836
- CREATE TABLE IF NOT EXISTS projects (
837
- id TEXT PRIMARY KEY,
838
- name TEXT NOT NULL UNIQUE,
839
- display_name TEXT NOT NULL,
840
- canonical_domain TEXT NOT NULL,
841
- owned_domains TEXT NOT NULL DEFAULT '[]',
842
- country TEXT NOT NULL,
843
- language TEXT NOT NULL,
844
- tags TEXT NOT NULL DEFAULT '[]',
845
- labels TEXT NOT NULL DEFAULT '{}',
846
- providers TEXT NOT NULL DEFAULT '[]',
847
- config_source TEXT NOT NULL DEFAULT 'cli',
848
- config_revision INTEGER NOT NULL DEFAULT 1,
849
- created_at TEXT NOT NULL,
850
- updated_at TEXT NOT NULL
851
- );
852
-
853
- CREATE TABLE IF NOT EXISTS queries (
854
- id TEXT PRIMARY KEY,
855
- project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
856
- query TEXT NOT NULL,
857
- created_at TEXT NOT NULL,
858
- UNIQUE(project_id, query)
859
- );
860
-
861
- CREATE TABLE IF NOT EXISTS competitors (
862
- id TEXT PRIMARY KEY,
863
- project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
864
- domain TEXT NOT NULL,
865
- created_at TEXT NOT NULL,
866
- UNIQUE(project_id, domain)
867
- );
868
-
869
- CREATE TABLE IF NOT EXISTS runs (
870
- id TEXT PRIMARY KEY,
871
- project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
872
- kind TEXT NOT NULL DEFAULT 'answer-visibility',
873
- status TEXT NOT NULL DEFAULT 'queued',
874
- trigger TEXT NOT NULL DEFAULT 'manual',
875
- started_at TEXT,
876
- finished_at TEXT,
877
- error TEXT,
878
- created_at TEXT NOT NULL
879
- );
880
-
881
- CREATE TABLE IF NOT EXISTS query_snapshots (
882
- id TEXT PRIMARY KEY,
883
- run_id TEXT NOT NULL REFERENCES runs(id) ON DELETE CASCADE,
884
- query_id TEXT NOT NULL REFERENCES queries(id) ON DELETE CASCADE,
885
- provider TEXT NOT NULL DEFAULT 'gemini',
886
- citation_state TEXT NOT NULL,
887
- answer_text TEXT,
888
- cited_domains TEXT NOT NULL DEFAULT '[]',
889
- competitor_overlap TEXT NOT NULL DEFAULT '[]',
890
- raw_response TEXT,
891
- created_at TEXT NOT NULL
892
- );
893
-
894
- CREATE TABLE IF NOT EXISTS audit_log (
895
- id TEXT PRIMARY KEY,
896
- project_id TEXT REFERENCES projects(id) ON DELETE CASCADE,
897
- actor TEXT NOT NULL,
898
- action TEXT NOT NULL,
899
- entity_type TEXT NOT NULL,
900
- entity_id TEXT,
901
- diff TEXT,
902
- created_at TEXT NOT NULL
903
- );
904
-
905
- CREATE TABLE IF NOT EXISTS api_keys (
906
- id TEXT PRIMARY KEY,
907
- name TEXT NOT NULL,
908
- key_hash TEXT NOT NULL UNIQUE,
909
- key_prefix TEXT NOT NULL,
910
- scopes TEXT NOT NULL DEFAULT '["*"]',
911
- created_at TEXT NOT NULL,
912
- last_used_at TEXT,
913
- revoked_at TEXT
914
- );
915
-
916
- CREATE TABLE IF NOT EXISTS usage_counters (
917
- id TEXT PRIMARY KEY,
918
- scope TEXT NOT NULL,
919
- period TEXT NOT NULL,
920
- metric TEXT NOT NULL,
921
- count INTEGER NOT NULL DEFAULT 0,
922
- updated_at TEXT NOT NULL,
923
- UNIQUE(scope, period, metric)
924
- );
925
-
926
- CREATE INDEX IF NOT EXISTS idx_queries_project ON queries(project_id);
927
- CREATE INDEX IF NOT EXISTS idx_competitors_project ON competitors(project_id);
928
- CREATE INDEX IF NOT EXISTS idx_runs_project ON runs(project_id);
929
- CREATE INDEX IF NOT EXISTS idx_runs_status ON runs(status);
930
- CREATE INDEX IF NOT EXISTS idx_snapshots_run ON query_snapshots(run_id);
931
- CREATE INDEX IF NOT EXISTS idx_snapshots_query ON query_snapshots(query_id);
932
- CREATE INDEX IF NOT EXISTS idx_audit_log_project ON audit_log(project_id);
933
- CREATE INDEX IF NOT EXISTS idx_audit_log_created ON audit_log(created_at);
934
- CREATE TABLE IF NOT EXISTS schedules (
935
- id TEXT PRIMARY KEY,
936
- project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
937
- cron_expr TEXT NOT NULL,
938
- preset TEXT,
939
- timezone TEXT NOT NULL DEFAULT 'UTC',
940
- enabled INTEGER NOT NULL DEFAULT 1,
941
- providers TEXT NOT NULL DEFAULT '[]',
942
- last_run_at TEXT,
943
- next_run_at TEXT,
944
- created_at TEXT NOT NULL,
945
- updated_at TEXT NOT NULL,
946
- UNIQUE(project_id)
947
- );
948
-
949
- CREATE TABLE IF NOT EXISTS notifications (
950
- id TEXT PRIMARY KEY,
951
- project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
952
- channel TEXT NOT NULL,
953
- config TEXT NOT NULL,
954
- enabled INTEGER NOT NULL DEFAULT 1,
955
- created_at TEXT NOT NULL,
956
- updated_at TEXT NOT NULL
957
- );
958
-
959
- CREATE INDEX IF NOT EXISTS idx_api_keys_prefix ON api_keys(key_prefix);
960
- CREATE INDEX IF NOT EXISTS idx_usage_scope_period ON usage_counters(scope, period);
961
- -- NOTE: the (project_id) UNIQUE INDEX that used to live here was replaced by
962
- -- v53's (project_id, kind) index. MIGRATION_SQL re-runs on every boot, so we
963
- -- must NOT recreate the single-column index \u2014 it would conflict with v53 and
964
- -- break traffic-sync schedule creation.
965
- CREATE INDEX IF NOT EXISTS idx_notifications_project ON notifications(project_id);
966
-
967
- -- Migration tracking: records which version has been applied.
968
- -- On boot only versions > max applied version are run.
969
- CREATE TABLE IF NOT EXISTS _migrations (
970
- version INTEGER PRIMARY KEY,
971
- name TEXT NOT NULL,
972
- applied_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
973
- );
974
- `;
975
- var MIGRATION_VERSIONS = [
976
- {
977
- version: 2,
978
- name: "add-providers-column",
979
- statements: [
980
- `ALTER TABLE projects ADD COLUMN providers TEXT NOT NULL DEFAULT '[]'`
981
- ]
982
- },
983
- {
984
- version: 3,
985
- name: "add-webhook-secret",
986
- statements: [
987
- `ALTER TABLE notifications ADD COLUMN webhook_secret TEXT`
988
- ]
989
- },
990
- {
991
- version: 4,
992
- name: "add-owned-domains",
993
- statements: [
994
- `ALTER TABLE projects ADD COLUMN owned_domains TEXT NOT NULL DEFAULT '[]'`
995
- ]
996
- },
997
- {
998
- version: 5,
999
- name: "add-snapshot-model",
1000
- statements: [
1001
- `ALTER TABLE query_snapshots ADD COLUMN model TEXT`,
1002
- `UPDATE query_snapshots SET model = json_extract(raw_response, '$.model') WHERE model IS NULL AND raw_response IS NOT NULL AND json_extract(raw_response, '$.model') IS NOT NULL`
1003
- ]
1004
- },
1005
- {
1006
- version: 6,
1007
- name: "gsc-integration",
1008
- statements: [
1009
- // google_connections (domain-scoped)
1010
- // WARNING: access_token, refresh_token are authentication material; consider storing in config.yaml per CLAUDE.md
1011
- `CREATE TABLE IF NOT EXISTS google_connections (
1012
- id TEXT PRIMARY KEY,
1013
- domain TEXT NOT NULL,
1014
- connection_type TEXT NOT NULL,
1015
- property_id TEXT,
1016
- access_token TEXT,
1017
- refresh_token TEXT,
1018
- token_expires_at TEXT,
1019
- scopes TEXT NOT NULL DEFAULT '[]',
1020
- created_at TEXT NOT NULL,
1021
- updated_at TEXT NOT NULL
1022
- )`,
1023
- `CREATE UNIQUE INDEX IF NOT EXISTS idx_google_conn_domain_type ON google_connections(domain, connection_type)`,
1024
- // gsc_search_data
1025
- `CREATE TABLE IF NOT EXISTS gsc_search_data (
1026
- id TEXT PRIMARY KEY,
1027
- project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
1028
- sync_run_id TEXT NOT NULL REFERENCES runs(id) ON DELETE CASCADE,
1029
- date TEXT NOT NULL,
1030
- query TEXT NOT NULL,
1031
- page TEXT NOT NULL,
1032
- country TEXT,
1033
- device TEXT,
1034
- clicks INTEGER NOT NULL DEFAULT 0,
1035
- impressions INTEGER NOT NULL DEFAULT 0,
1036
- ctr TEXT NOT NULL DEFAULT '0',
1037
- position TEXT NOT NULL DEFAULT '0',
1038
- created_at TEXT NOT NULL
1039
- )`,
1040
- `CREATE INDEX IF NOT EXISTS idx_gsc_search_project_date ON gsc_search_data(project_id, date)`,
1041
- `CREATE INDEX IF NOT EXISTS idx_gsc_search_query ON gsc_search_data(query)`,
1042
- `CREATE INDEX IF NOT EXISTS idx_gsc_search_run ON gsc_search_data(sync_run_id)`,
1043
- // gsc_url_inspections
1044
- `CREATE TABLE IF NOT EXISTS gsc_url_inspections (
1045
- id TEXT PRIMARY KEY,
1046
- project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
1047
- sync_run_id TEXT REFERENCES runs(id) ON DELETE CASCADE,
1048
- url TEXT NOT NULL,
1049
- indexing_state TEXT,
1050
- verdict TEXT,
1051
- coverage_state TEXT,
1052
- page_fetch_state TEXT,
1053
- robots_txt_state TEXT,
1054
- crawl_time TEXT,
1055
- last_crawl_result TEXT,
1056
- is_mobile_friendly INTEGER,
1057
- rich_results TEXT NOT NULL DEFAULT '[]',
1058
- referring_urls TEXT NOT NULL DEFAULT '[]',
1059
- inspected_at TEXT NOT NULL,
1060
- created_at TEXT NOT NULL
1061
- )`,
1062
- `CREATE INDEX IF NOT EXISTS idx_gsc_inspect_project_url ON gsc_url_inspections(project_id, url)`,
1063
- `CREATE INDEX IF NOT EXISTS idx_gsc_inspect_run ON gsc_url_inspections(sync_run_id)`,
1064
- `CREATE INDEX IF NOT EXISTS idx_gsc_inspect_url_time ON gsc_url_inspections(url, inspected_at)`
1065
- ]
1066
- },
1067
- {
1068
- version: 7,
1069
- name: "gsc-coverage-snapshots",
1070
- statements: [
1071
- `CREATE TABLE IF NOT EXISTS gsc_coverage_snapshots (
1072
- id TEXT PRIMARY KEY,
1073
- project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
1074
- sync_run_id TEXT REFERENCES runs(id) ON DELETE CASCADE,
1075
- date TEXT NOT NULL,
1076
- indexed INTEGER NOT NULL DEFAULT 0,
1077
- not_indexed INTEGER NOT NULL DEFAULT 0,
1078
- reason_breakdown TEXT NOT NULL DEFAULT '{}',
1079
- created_at TEXT NOT NULL
1080
- )`,
1081
- `CREATE INDEX IF NOT EXISTS idx_gsc_coverage_snap_project_date ON gsc_coverage_snapshots(project_id, date)`,
1082
- `CREATE INDEX IF NOT EXISTS idx_gsc_coverage_snap_run ON gsc_coverage_snapshots(sync_run_id)`
1083
- ]
1084
- },
1085
- {
1086
- version: 8,
1087
- name: "location-aware-sweeps",
1088
- statements: [
1089
- `ALTER TABLE projects ADD COLUMN locations TEXT NOT NULL DEFAULT '[]'`,
1090
- `ALTER TABLE projects ADD COLUMN default_location TEXT`,
1091
- `ALTER TABLE query_snapshots ADD COLUMN location TEXT`
1092
- ]
1093
- },
1094
- {
1095
- version: 9,
1096
- name: "add-run-location",
1097
- statements: [
1098
- `ALTER TABLE runs ADD COLUMN location TEXT`
1099
- ]
1100
- },
1101
- {
1102
- version: 10,
1103
- name: "add-sitemap-url",
1104
- statements: [
1105
- `ALTER TABLE google_connections ADD COLUMN sitemap_url TEXT`
1106
- ]
1107
- },
1108
- {
1109
- version: 11,
1110
- name: "add-screenshot-path",
1111
- statements: [
1112
- `ALTER TABLE query_snapshots ADD COLUMN screenshot_path TEXT`
1113
- ]
1114
- },
1115
- {
1116
- version: 12,
1117
- name: "bing-wmt-integration",
1118
- statements: [
1119
- // bing_connections
1120
- `CREATE TABLE IF NOT EXISTS bing_connections (
1121
- id TEXT PRIMARY KEY,
1122
- domain TEXT NOT NULL,
1123
- site_url TEXT,
1124
- created_at TEXT NOT NULL,
1125
- updated_at TEXT NOT NULL
1126
- )`,
1127
- `CREATE UNIQUE INDEX IF NOT EXISTS idx_bing_conn_domain ON bing_connections(domain)`,
1128
- // bing_url_inspections
1129
- `CREATE TABLE IF NOT EXISTS bing_url_inspections (
1130
- id TEXT PRIMARY KEY,
1131
- project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
1132
- url TEXT NOT NULL,
1133
- http_code INTEGER,
1134
- in_index INTEGER,
1135
- last_crawled_date TEXT,
1136
- in_index_date TEXT,
1137
- inspected_at TEXT NOT NULL,
1138
- created_at TEXT NOT NULL
1139
- )`,
1140
- `CREATE INDEX IF NOT EXISTS idx_bing_inspect_project_url ON bing_url_inspections(project_id, url)`,
1141
- `CREATE INDEX IF NOT EXISTS idx_bing_inspect_url_time ON bing_url_inspections(url, inspected_at)`,
1142
- // bing_keyword_stats
1143
- `CREATE TABLE IF NOT EXISTS bing_keyword_stats (
1144
- id TEXT PRIMARY KEY,
1145
- project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
1146
- query TEXT NOT NULL,
1147
- impressions INTEGER NOT NULL DEFAULT 0,
1148
- clicks INTEGER NOT NULL DEFAULT 0,
1149
- ctr TEXT NOT NULL DEFAULT '0',
1150
- average_position TEXT NOT NULL DEFAULT '0',
1151
- synced_at TEXT NOT NULL,
1152
- created_at TEXT NOT NULL
1153
- )`,
1154
- `CREATE INDEX IF NOT EXISTS idx_bing_keyword_project ON bing_keyword_stats(project_id)`,
1155
- `CREATE INDEX IF NOT EXISTS idx_bing_keyword_query ON bing_keyword_stats(query)`
1156
- ]
1157
- },
1158
- {
1159
- version: 13,
1160
- name: "ga4-integration",
1161
- statements: [
1162
- // ga_connections
1163
- // WARNING: private_key is authentication material; consider storing in config.yaml per CLAUDE.md
1164
- `CREATE TABLE IF NOT EXISTS ga_connections (
1165
- id TEXT PRIMARY KEY,
1166
- project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
1167
- property_id TEXT NOT NULL,
1168
- client_email TEXT NOT NULL,
1169
- private_key TEXT NOT NULL,
1170
- created_at TEXT NOT NULL,
1171
- updated_at TEXT NOT NULL
1172
- )`,
1173
- `CREATE UNIQUE INDEX IF NOT EXISTS idx_ga_conn_project ON ga_connections(project_id)`,
1174
- // ga_traffic_snapshots
1175
- `CREATE TABLE IF NOT EXISTS ga_traffic_snapshots (
1176
- id TEXT PRIMARY KEY,
1177
- project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
1178
- date TEXT NOT NULL,
1179
- landing_page TEXT NOT NULL,
1180
- sessions INTEGER NOT NULL DEFAULT 0,
1181
- organic_sessions INTEGER NOT NULL DEFAULT 0,
1182
- users INTEGER NOT NULL DEFAULT 0,
1183
- synced_at TEXT NOT NULL
1184
- )`,
1185
- `CREATE INDEX IF NOT EXISTS idx_ga_traffic_project_date ON ga_traffic_snapshots(project_id, date)`,
1186
- `CREATE INDEX IF NOT EXISTS idx_ga_traffic_page ON ga_traffic_snapshots(landing_page)`
1187
- ]
1188
- },
1189
- {
1190
- version: 14,
1191
- name: "ga4-traffic-summaries",
1192
- statements: [
1193
- `CREATE TABLE IF NOT EXISTS ga_traffic_summaries (
1194
- id TEXT PRIMARY KEY,
1195
- project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
1196
- period_start TEXT NOT NULL,
1197
- period_end TEXT NOT NULL,
1198
- total_sessions INTEGER NOT NULL DEFAULT 0,
1199
- total_organic_sessions INTEGER NOT NULL DEFAULT 0,
1200
- total_users INTEGER NOT NULL DEFAULT 0,
1201
- synced_at TEXT NOT NULL
1202
- )`,
1203
- `CREATE INDEX IF NOT EXISTS idx_ga_summary_project ON ga_traffic_summaries(project_id)`
1204
- ]
1205
- },
1206
- {
1207
- version: 15,
1208
- name: "bing-inspect-columns",
1209
- statements: [
1210
- `ALTER TABLE bing_url_inspections ADD COLUMN document_size INTEGER`,
1211
- `ALTER TABLE bing_url_inspections ADD COLUMN anchor_count INTEGER`,
1212
- `ALTER TABLE bing_url_inspections ADD COLUMN discovery_date TEXT`
1213
- ]
1214
- },
1215
- {
1216
- version: 16,
1217
- name: "recommended-competitors",
1218
- statements: [
1219
- `ALTER TABLE query_snapshots ADD COLUMN recommended_competitors TEXT NOT NULL DEFAULT '[]'`
1220
- ]
1221
- },
1222
- {
1223
- version: 17,
1224
- name: "ga4-ai-referrals",
1225
- statements: [
1226
- `CREATE TABLE IF NOT EXISTS ga_ai_referrals (
1227
- id TEXT PRIMARY KEY,
1228
- project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
1229
- date TEXT NOT NULL,
1230
- source TEXT NOT NULL,
1231
- medium TEXT NOT NULL,
1232
- sessions INTEGER NOT NULL DEFAULT 0,
1233
- users INTEGER NOT NULL DEFAULT 0,
1234
- synced_at TEXT NOT NULL
1235
- )`,
1236
- `CREATE INDEX IF NOT EXISTS idx_ga_ai_ref_project_date ON ga_ai_referrals(project_id, date)`,
1237
- `CREATE INDEX IF NOT EXISTS idx_ga_ai_ref_source ON ga_ai_referrals(source)`
1238
- ]
1239
- },
1240
- {
1241
- version: 18,
1242
- name: "answer-mentioned",
1243
- statements: [
1244
- `ALTER TABLE query_snapshots ADD COLUMN answer_mentioned INTEGER`
1245
- ]
1246
- },
1247
- {
1248
- version: 19,
1249
- name: "named-unique-indexes",
1250
- statements: [
1251
- `CREATE UNIQUE INDEX IF NOT EXISTS idx_queries_project_query ON queries(project_id, query)`,
1252
- `CREATE UNIQUE INDEX IF NOT EXISTS idx_competitors_project_domain ON competitors(project_id, domain)`,
1253
- `CREATE UNIQUE INDEX IF NOT EXISTS idx_schedules_project ON schedules(project_id)`,
1254
- `CREATE UNIQUE INDEX IF NOT EXISTS idx_usage_scope_period_metric ON usage_counters(scope, period, metric)`,
1255
- `ALTER TABLE projects ADD COLUMN config_source TEXT NOT NULL DEFAULT 'cli'`,
1256
- `ALTER TABLE projects ADD COLUMN config_revision INTEGER NOT NULL DEFAULT 1`
1257
- ]
1258
- },
1259
- {
1260
- version: 20,
1261
- name: "ga4-source-dimension",
1262
- statements: [
1263
- // Values: 'session' (sessionSource), 'first_user' (firstUserSource), 'manual_utm' (manualSource/utm_source)
1264
- `ALTER TABLE ga_ai_referrals ADD COLUMN source_dimension TEXT NOT NULL DEFAULT 'session'`,
1265
- // Adopt the widened unique key (now including source_dimension). This
1266
- // version intentionally does NOT drop the prior narrow index
1267
- // idx_ga_ai_ref_unique — the original v17 + v20 pair did, but replaying
1268
- // that pair on a DB where data has since accumulated duplicates on the
1269
- // narrow key would crash (the bug this PR fixes). Any DB that ran the
1270
- // historical v20 once already has the narrow index gone; brand-new DBs
1271
- // never create it because v17 was rewritten to omit it. Anything else
1272
- // is repaired by v46, which drops idx_ga_ai_ref_unique_v2 and lands on
1273
- // the final (…, source_dimension, landing_page) index.
1274
- `CREATE UNIQUE INDEX IF NOT EXISTS idx_ga_ai_ref_unique_v2 ON ga_ai_referrals(project_id, date, source, medium, source_dimension)`
1275
- ]
1276
- },
1277
- {
1278
- version: 21,
1279
- name: "snapshot-filtering-indexes",
1280
- statements: [
1281
- `CREATE INDEX IF NOT EXISTS idx_snapshots_citation_state ON query_snapshots(citation_state)`,
1282
- `CREATE INDEX IF NOT EXISTS idx_snapshots_provider_model ON query_snapshots(provider, model)`,
1283
- `CREATE INDEX IF NOT EXISTS idx_snapshots_location ON query_snapshots(location)`
1284
- ]
1285
- },
1286
- {
1287
- version: 22,
1288
- name: "insights-table",
1289
- statements: [
1290
- `CREATE TABLE IF NOT EXISTS insights (
1291
- id TEXT PRIMARY KEY,
1292
- project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
1293
- type TEXT NOT NULL,
1294
- severity TEXT NOT NULL,
1295
- title TEXT NOT NULL,
1296
- query TEXT NOT NULL,
1297
- provider TEXT NOT NULL,
1298
- recommendation TEXT,
1299
- cause TEXT,
1300
- dismissed INTEGER NOT NULL DEFAULT 0,
1301
- created_at TEXT NOT NULL
1302
- )`,
1303
- `CREATE INDEX IF NOT EXISTS idx_insights_project ON insights(project_id)`,
1304
- `CREATE INDEX IF NOT EXISTS idx_insights_created ON insights(created_at)`,
1305
- `CREATE INDEX IF NOT EXISTS idx_insights_query_provider ON insights(query, provider)`
1306
- ]
1307
- },
1308
- {
1309
- version: 23,
1310
- name: "health-snapshots-table",
1311
- statements: [
1312
- `CREATE TABLE IF NOT EXISTS health_snapshots (
1313
- id TEXT PRIMARY KEY,
1314
- project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
1315
- overall_cited_rate TEXT NOT NULL,
1316
- total_pairs INTEGER NOT NULL,
1317
- cited_pairs INTEGER NOT NULL,
1318
- provider_breakdown TEXT NOT NULL DEFAULT '{}',
1319
- created_at TEXT NOT NULL
1320
- )`,
1321
- `CREATE INDEX IF NOT EXISTS idx_health_snapshots_project ON health_snapshots(project_id)`,
1322
- `CREATE INDEX IF NOT EXISTS idx_health_snapshots_created ON health_snapshots(created_at)`
1323
- ]
1324
- },
1325
- {
1326
- version: 24,
1327
- name: "intelligence-run-id",
1328
- statements: [
1329
- `ALTER TABLE insights ADD COLUMN run_id TEXT REFERENCES runs(id) ON DELETE CASCADE`,
1330
- `CREATE INDEX IF NOT EXISTS idx_insights_run ON insights(run_id)`,
1331
- `ALTER TABLE health_snapshots ADD COLUMN run_id TEXT REFERENCES runs(id) ON DELETE CASCADE`,
1332
- `CREATE INDEX IF NOT EXISTS idx_health_snapshots_run ON health_snapshots(run_id)`
1333
- ]
1334
- },
1335
- {
1336
- version: 25,
1337
- name: "ga4-social-referrals",
1338
- statements: [
1339
- // Uses GA4's native sessionDefaultChannelGroup for social classification
1340
- `CREATE TABLE IF NOT EXISTS ga_social_referrals (
1341
- id TEXT PRIMARY KEY,
1342
- project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
1343
- date TEXT NOT NULL,
1344
- source TEXT NOT NULL,
1345
- medium TEXT NOT NULL,
1346
- channel_group TEXT NOT NULL DEFAULT 'Organic Social',
1347
- sessions INTEGER NOT NULL DEFAULT 0,
1348
- users INTEGER NOT NULL DEFAULT 0,
1349
- synced_at TEXT NOT NULL
1350
- )`,
1351
- `CREATE INDEX IF NOT EXISTS idx_ga_social_ref_project_date ON ga_social_referrals(project_id, date)`,
1352
- `CREATE INDEX IF NOT EXISTS idx_ga_social_ref_source ON ga_social_referrals(source)`,
1353
- `CREATE UNIQUE INDEX IF NOT EXISTS idx_ga_social_ref_unique ON ga_social_referrals(project_id, date, source, medium, channel_group)`
1354
- ]
1355
- },
1356
- {
1357
- version: 26,
1358
- name: "bing-coverage-snapshots",
1359
- statements: [
1360
- `CREATE TABLE IF NOT EXISTS bing_coverage_snapshots (
1361
- id TEXT PRIMARY KEY,
1362
- project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
1363
- date TEXT NOT NULL,
1364
- indexed INTEGER NOT NULL DEFAULT 0,
1365
- not_indexed INTEGER NOT NULL DEFAULT 0,
1366
- unknown INTEGER NOT NULL DEFAULT 0,
1367
- created_at TEXT NOT NULL
1368
- )`,
1369
- `CREATE UNIQUE INDEX IF NOT EXISTS idx_bing_coverage_snap_project_date ON bing_coverage_snapshots(project_id, date)`
1370
- ]
1371
- },
1372
- {
1373
- version: 27,
1374
- name: "credential-columns-removed-from-schema",
1375
- statements: [
1376
- // Credential columns removed from Drizzle schema — credentials now live in config.yaml.
1377
- // Physical columns intentionally retained for one-time migration by server.ts.
1378
- // No DDL statements needed.
1379
- ]
1380
- },
1381
- {
1382
- version: 28,
1383
- name: "sync-run-id-bing-inspect",
1384
- statements: [
1385
- `ALTER TABLE bing_url_inspections ADD COLUMN sync_run_id TEXT REFERENCES runs(id) ON DELETE CASCADE`,
1386
- `CREATE INDEX IF NOT EXISTS idx_bing_inspect_run ON bing_url_inspections(sync_run_id)`
1387
- ]
1388
- },
1389
- {
1390
- version: 29,
1391
- name: "sync-run-id-ga-traffic",
1392
- statements: [
1393
- `ALTER TABLE ga_traffic_snapshots ADD COLUMN sync_run_id TEXT REFERENCES runs(id) ON DELETE CASCADE`,
1394
- `CREATE INDEX IF NOT EXISTS idx_ga_traffic_run ON ga_traffic_snapshots(sync_run_id)`
1395
- ]
1396
- },
1397
- {
1398
- version: 30,
1399
- name: "sync-run-id-ga-ai-ref",
1400
- statements: [
1401
- `ALTER TABLE ga_ai_referrals ADD COLUMN sync_run_id TEXT REFERENCES runs(id) ON DELETE CASCADE`,
1402
- `CREATE INDEX IF NOT EXISTS idx_ga_ai_ref_run ON ga_ai_referrals(sync_run_id)`
1403
- ]
1404
- },
1405
- {
1406
- version: 31,
1407
- name: "sync-run-id-ga-social-ref",
1408
- statements: [
1409
- `ALTER TABLE ga_social_referrals ADD COLUMN sync_run_id TEXT REFERENCES runs(id) ON DELETE CASCADE`,
1410
- `CREATE INDEX IF NOT EXISTS idx_ga_social_ref_run ON ga_social_referrals(sync_run_id)`
1411
- ]
1412
- },
1413
- {
1414
- version: 32,
1415
- name: "sync-run-id-ga-summary",
1416
- statements: [
1417
- `ALTER TABLE ga_traffic_summaries ADD COLUMN sync_run_id TEXT REFERENCES runs(id) ON DELETE CASCADE`,
1418
- `CREATE INDEX IF NOT EXISTS idx_ga_summary_run ON ga_traffic_summaries(sync_run_id)`
1419
- ]
1420
- },
1421
- {
1422
- version: 33,
1423
- name: "sync-run-id-bing-coverage",
1424
- statements: [
1425
- `ALTER TABLE bing_coverage_snapshots ADD COLUMN sync_run_id TEXT REFERENCES runs(id) ON DELETE CASCADE`,
1426
- `CREATE INDEX IF NOT EXISTS idx_bing_coverage_snap_run ON bing_coverage_snapshots(sync_run_id)`
1427
- ]
1428
- },
1429
- {
1430
- version: 34,
1431
- name: "bing-coverage-index-rename",
1432
- statements: [
1433
- `DROP INDEX IF EXISTS idx_bing_coverage_snap_project_date`,
1434
- `CREATE UNIQUE INDEX IF NOT EXISTS idx_bing_coverage_snap_project_date_unique ON bing_coverage_snapshots(project_id, date)`
1435
- ]
1436
- },
1437
- {
1438
- version: 35,
1439
- name: "snapshot-created-at-index",
1440
- statements: [
1441
- `CREATE INDEX IF NOT EXISTS idx_snapshots_created_at ON query_snapshots(created_at)`
1442
- ]
1443
- },
1444
- {
1445
- version: 36,
1446
- name: "sql-injection-review",
1447
- statements: [
1448
- // Transaction handling and SQL injection review: verified all strings
1449
- // use SQLite ? binding via Drizzle. No parameterization changes needed.
1450
- ]
1451
- },
1452
- {
1453
- version: 37,
1454
- name: "legacy-credential-cleanup",
1455
- statements: [
1456
- // The legacy credential columns (private_key on ga_connections; access_token,
1457
- // refresh_token, token_expires_at on google_connections) are removed by the
1458
- // extractLegacyCredentials / dropLegacyCredentialColumns pair.
1459
- // Callers read the rows, persist them to config.yaml, and only then drop
1460
- // the columns so a failed config write doesn't permanently lose credentials.
1461
- // No DDL statements here — columns are dropped via exported functions below.
1462
- ]
1463
- },
1464
- {
1465
- version: 38,
1466
- name: "agent-sessions",
1467
- statements: [
1468
- `CREATE TABLE IF NOT EXISTS agent_sessions (
1469
- id TEXT PRIMARY KEY,
1470
- project_id TEXT NOT NULL UNIQUE REFERENCES projects(id) ON DELETE CASCADE,
1471
- system_prompt TEXT NOT NULL,
1472
- model_provider TEXT NOT NULL,
1473
- model_id TEXT NOT NULL,
1474
- messages TEXT NOT NULL DEFAULT '[]',
1475
- follow_up_queue TEXT NOT NULL DEFAULT '[]',
1476
- created_at TEXT NOT NULL,
1477
- updated_at TEXT NOT NULL
1478
- )`,
1479
- `CREATE INDEX IF NOT EXISTS idx_agent_sessions_project ON agent_sessions(project_id)`,
1480
- `CREATE INDEX IF NOT EXISTS idx_agent_sessions_updated ON agent_sessions(updated_at)`
1481
- ]
1482
- },
1483
- {
1484
- version: 39,
1485
- name: "aero-provider-rename",
1486
- statements: [
1487
- // Align Aero provider IDs with sweep naming — anthropic→claude, google→gemini.
1488
- // Idempotent: the UPDATE is a no-op once the rename has been applied.
1489
- `UPDATE agent_sessions SET model_provider = 'claude' WHERE model_provider = 'anthropic'`,
1490
- `UPDATE agent_sessions SET model_provider = 'gemini' WHERE model_provider = 'google'`
1491
- ]
1492
- },
1493
- {
1494
- version: 40,
1495
- name: "agent-memory",
1496
- statements: [
1497
- `CREATE TABLE IF NOT EXISTS agent_memory (
1498
- id TEXT PRIMARY KEY,
1499
- project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
1500
- key TEXT NOT NULL,
1501
- value TEXT NOT NULL,
1502
- source TEXT NOT NULL,
1503
- created_at TEXT NOT NULL,
1504
- updated_at TEXT NOT NULL
1505
- )`,
1506
- `CREATE UNIQUE INDEX IF NOT EXISTS uniq_agent_memory_project_key
1507
- ON agent_memory(project_id, key)`,
1508
- `CREATE INDEX IF NOT EXISTS idx_agent_memory_project_updated
1509
- ON agent_memory(project_id, updated_at)`
1510
- ]
1511
- },
1512
- {
1513
- version: 41,
1514
- name: "common-crawl-backlinks",
1515
- statements: [
1516
- // cc_release_syncs
1517
- `CREATE TABLE IF NOT EXISTS cc_release_syncs (
1518
- id TEXT PRIMARY KEY,
1519
- release TEXT NOT NULL UNIQUE,
1520
- status TEXT NOT NULL,
1521
- phase_detail TEXT,
1522
- vertex_path TEXT,
1523
- edges_path TEXT,
1524
- vertex_sha256 TEXT,
1525
- edges_sha256 TEXT,
1526
- vertex_bytes INTEGER,
1527
- edges_bytes INTEGER,
1528
- projects_processed INTEGER,
1529
- domains_discovered INTEGER,
1530
- download_started_at TEXT,
1531
- download_finished_at TEXT,
1532
- query_started_at TEXT,
1533
- query_finished_at TEXT,
1534
- error TEXT,
1535
- created_at TEXT NOT NULL,
1536
- updated_at TEXT NOT NULL
1537
- )`,
1538
- `CREATE INDEX IF NOT EXISTS idx_cc_release_syncs_status ON cc_release_syncs(status)`,
1539
- // backlink_domains
1540
- `CREATE TABLE IF NOT EXISTS backlink_domains (
1541
- id TEXT PRIMARY KEY,
1542
- project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
1543
- release_sync_id TEXT NOT NULL REFERENCES cc_release_syncs(id) ON DELETE CASCADE,
1544
- release TEXT NOT NULL,
1545
- target_domain TEXT NOT NULL,
1546
- linking_domain TEXT NOT NULL,
1547
- num_hosts INTEGER NOT NULL,
1548
- created_at TEXT NOT NULL
1549
- )`,
1550
- `CREATE INDEX IF NOT EXISTS idx_backlink_domains_project ON backlink_domains(project_id)`,
1551
- `CREATE INDEX IF NOT EXISTS idx_backlink_domains_release_sync ON backlink_domains(release_sync_id)`,
1552
- `CREATE INDEX IF NOT EXISTS idx_backlink_domains_project_release ON backlink_domains(project_id, release)`,
1553
- `CREATE INDEX IF NOT EXISTS idx_backlink_domains_hosts ON backlink_domains(num_hosts)`,
1554
- `CREATE UNIQUE INDEX IF NOT EXISTS idx_backlink_domains_unique ON backlink_domains(project_id, release, linking_domain)`,
1555
- // backlink_summaries
1556
- `CREATE TABLE IF NOT EXISTS backlink_summaries (
1557
- id TEXT PRIMARY KEY,
1558
- project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
1559
- release_sync_id TEXT NOT NULL REFERENCES cc_release_syncs(id) ON DELETE CASCADE,
1560
- release TEXT NOT NULL,
1561
- target_domain TEXT NOT NULL,
1562
- total_linking_domains INTEGER NOT NULL,
1563
- total_hosts INTEGER NOT NULL,
1564
- top_10_hosts_share TEXT NOT NULL,
1565
- queried_at TEXT NOT NULL,
1566
- created_at TEXT NOT NULL
1567
- )`,
1568
- `CREATE UNIQUE INDEX IF NOT EXISTS idx_backlink_summaries_project_release ON backlink_summaries(project_id, release)`,
1569
- `CREATE INDEX IF NOT EXISTS idx_backlink_summaries_project ON backlink_summaries(project_id)`
1570
- ]
1571
- },
1572
- {
1573
- version: 42,
1574
- name: "auto-extract-backlinks",
1575
- statements: [
1576
- `ALTER TABLE projects ADD COLUMN auto_extract_backlinks INTEGER NOT NULL DEFAULT 0`
1577
- ]
1578
- },
1579
- {
1580
- version: 43,
1581
- name: "backfill-bing-in-index",
1582
- statements: [
1583
- // Backfill bing_url_inspections.in_index using the new crawl-signal
1584
- // decision tree. Uses a created_at cutoff so rows written by the new
1585
- // code (which applies a live GetCrawlIssues demotion that can't be
1586
- // replayed offline) are preserved.
1587
- `UPDATE bing_url_inspections
1588
- SET in_index = CASE
1589
- WHEN document_size IS NOT NULL AND document_size > 0 THEN 1
1590
- WHEN last_crawled_date IS NOT NULL AND http_code IS NOT NULL AND http_code >= 400 THEN 0
1591
- WHEN last_crawled_date IS NOT NULL THEN 1
1592
- WHEN discovery_date IS NOT NULL THEN 0
1593
- ELSE NULL
1594
- END
1595
- WHERE created_at < '2026-04-22T00:00:00Z'`
1596
- ]
1597
- },
1598
- {
1599
- version: 44,
1600
- name: "ga-traffic-landing-normalized",
1601
- statements: [
1602
- `ALTER TABLE ga_traffic_snapshots ADD COLUMN landing_page_normalized TEXT`,
1603
- `CREATE INDEX IF NOT EXISTS idx_ga_traffic_page_normalized
1604
- ON ga_traffic_snapshots(project_id, date, landing_page_normalized)`
1605
- ]
1606
- },
1607
- {
1608
- version: 45,
1609
- name: "ga-traffic-direct-sessions",
1610
- statements: [
1611
- `ALTER TABLE ga_traffic_snapshots ADD COLUMN direct_sessions INTEGER`
1612
- ]
1613
- },
1614
- {
1615
- version: 46,
1616
- name: "ga-ai-landing-page",
1617
- statements: [
1618
- `ALTER TABLE ga_ai_referrals ADD COLUMN landing_page TEXT NOT NULL DEFAULT '(not set)'`,
1619
- `ALTER TABLE ga_ai_referrals ADD COLUMN landing_page_normalized TEXT`,
1620
- `DROP INDEX IF EXISTS idx_ga_ai_ref_unique_v2`,
1621
- `CREATE INDEX IF NOT EXISTS idx_ga_ai_ref_landing_page
1622
- ON ga_ai_referrals(project_id, date, landing_page_normalized)`,
1623
- `CREATE UNIQUE INDEX IF NOT EXISTS idx_ga_ai_ref_unique_v3
1624
- ON ga_ai_referrals(project_id, date, source, medium, source_dimension, landing_page)`
1625
- ]
1626
- },
1627
- {
1628
- version: 47,
1629
- name: "ga-traffic-window-summaries",
1630
- statements: [
1631
- `CREATE TABLE IF NOT EXISTS ga_traffic_window_summaries (
1632
- id TEXT PRIMARY KEY,
1633
- project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
1634
- window_key TEXT NOT NULL,
1635
- period_start TEXT NOT NULL,
1636
- period_end TEXT NOT NULL,
1637
- total_sessions INTEGER NOT NULL DEFAULT 0,
1638
- total_organic_sessions INTEGER NOT NULL DEFAULT 0,
1639
- total_direct_sessions INTEGER NOT NULL DEFAULT 0,
1640
- total_users INTEGER NOT NULL DEFAULT 0,
1641
- synced_at TEXT NOT NULL,
1642
- sync_run_id TEXT REFERENCES runs(id) ON DELETE CASCADE
1643
- )`,
1644
- `CREATE UNIQUE INDEX IF NOT EXISTS idx_ga_window_summary_unique
1645
- ON ga_traffic_window_summaries(project_id, window_key)`,
1646
- `CREATE INDEX IF NOT EXISTS idx_ga_window_summary_run
1647
- ON ga_traffic_window_summaries(sync_run_id)`
1648
- ]
1649
- },
1650
- {
1651
- version: 48,
1652
- name: "rename-keywords-to-queries",
1653
- // The actual legacy rename runs before bootstrap SQL so existing DBs never
1654
- // see new-name indexes before their old columns have been renamed. This
1655
- // version records the schema cutover and lands the final index names.
1656
- statements: [
1657
- `DROP INDEX IF EXISTS idx_keywords_project`,
1658
- `DROP INDEX IF EXISTS idx_keywords_project_keyword`,
1659
- `DROP INDEX IF EXISTS idx_snapshots_keyword`,
1660
- `DROP INDEX IF EXISTS idx_insights_keyword_provider`,
1661
- `CREATE INDEX IF NOT EXISTS idx_queries_project ON queries(project_id)`,
1662
- `CREATE UNIQUE INDEX IF NOT EXISTS idx_queries_project_query ON queries(project_id, query)`,
1663
- `CREATE INDEX IF NOT EXISTS idx_snapshots_query ON query_snapshots(query_id)`,
1664
- `CREATE INDEX IF NOT EXISTS idx_insights_query_provider ON insights(query, provider)`
1665
- ],
1666
- run: (tx) => {
1667
- normalizeLegacyQuerySchema(tx);
1668
- }
1669
- },
1670
- {
1671
- version: 49,
1672
- name: "server-side-traffic-tables",
1673
- statements: [
1674
- `CREATE TABLE IF NOT EXISTS traffic_sources (
1675
- id TEXT PRIMARY KEY,
1676
- project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
1677
- source_type TEXT NOT NULL,
1678
- display_name TEXT NOT NULL,
1679
- status TEXT NOT NULL,
1680
- last_synced_at TEXT,
1681
- last_cursor TEXT,
1682
- last_error TEXT,
1683
- archived_at TEXT,
1684
- config_json TEXT NOT NULL DEFAULT '{}',
1685
- created_at TEXT NOT NULL,
1686
- updated_at TEXT NOT NULL
1687
- )`,
1688
- `CREATE INDEX IF NOT EXISTS idx_traffic_sources_project ON traffic_sources(project_id)`,
1689
- `CREATE INDEX IF NOT EXISTS idx_traffic_sources_project_status ON traffic_sources(project_id, status)`,
1690
- `CREATE TABLE IF NOT EXISTS crawler_events_hourly (
1691
- project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
1692
- source_id TEXT NOT NULL REFERENCES traffic_sources(id) ON DELETE CASCADE,
1693
- ts_hour TEXT NOT NULL,
1694
- bot_id TEXT NOT NULL,
1695
- operator TEXT NOT NULL,
1696
- verification_status TEXT NOT NULL,
1697
- path_normalized TEXT NOT NULL,
1698
- status INTEGER NOT NULL,
1699
- hits INTEGER NOT NULL DEFAULT 0,
1700
- sampled_user_agent TEXT,
1701
- created_at TEXT NOT NULL,
1702
- updated_at TEXT NOT NULL,
1703
- PRIMARY KEY (project_id, source_id, ts_hour, bot_id, verification_status, path_normalized, status)
1704
- )`,
1705
- `CREATE INDEX IF NOT EXISTS idx_crawler_hourly_project_ts ON crawler_events_hourly(project_id, ts_hour)`,
1706
- `CREATE INDEX IF NOT EXISTS idx_crawler_hourly_path ON crawler_events_hourly(project_id, path_normalized)`,
1707
- `CREATE TABLE IF NOT EXISTS ai_referral_events_hourly (
1708
- project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
1709
- source_id TEXT NOT NULL REFERENCES traffic_sources(id) ON DELETE CASCADE,
1710
- ts_hour TEXT NOT NULL,
1711
- product TEXT NOT NULL,
1712
- operator TEXT NOT NULL,
1713
- source_domain TEXT NOT NULL,
1714
- evidence_type TEXT NOT NULL,
1715
- landing_path_normalized TEXT NOT NULL,
1716
- status INTEGER NOT NULL,
1717
- sessions_or_hits INTEGER NOT NULL DEFAULT 0,
1718
- users_estimated INTEGER,
1719
- created_at TEXT NOT NULL,
1720
- updated_at TEXT NOT NULL,
1721
- PRIMARY KEY (project_id, source_id, ts_hour, product, source_domain, evidence_type, landing_path_normalized, status)
1722
- )`,
1723
- `CREATE INDEX IF NOT EXISTS idx_ai_referral_hourly_project_ts ON ai_referral_events_hourly(project_id, ts_hour)`,
1724
- `CREATE INDEX IF NOT EXISTS idx_ai_referral_hourly_landing ON ai_referral_events_hourly(project_id, landing_path_normalized)`,
1725
- `CREATE TABLE IF NOT EXISTS raw_event_samples (
1726
- id TEXT PRIMARY KEY,
1727
- project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
1728
- source_id TEXT NOT NULL REFERENCES traffic_sources(id) ON DELETE CASCADE,
1729
- ts TEXT NOT NULL,
1730
- event_type TEXT NOT NULL,
1731
- ip_hash TEXT,
1732
- user_agent TEXT,
1733
- path_normalized TEXT NOT NULL,
1734
- status INTEGER,
1735
- referer_host TEXT,
1736
- classifier_details_json TEXT NOT NULL DEFAULT '{}',
1737
- created_at TEXT NOT NULL
1738
- )`,
1739
- `CREATE INDEX IF NOT EXISTS idx_raw_event_samples_project_ts ON raw_event_samples(project_id, ts)`,
1740
- `CREATE INDEX IF NOT EXISTS idx_raw_event_samples_source_ts ON raw_event_samples(source_id, ts)`,
1741
- `CREATE INDEX IF NOT EXISTS idx_raw_event_samples_event_type ON raw_event_samples(event_type)`
1742
- ]
1743
- },
1744
- {
1745
- version: 50,
1746
- name: "ga-ai-referral-channel-group",
1747
- statements: [],
1748
- run: (tx) => {
1749
- if (!tableExists(tx, "ga_ai_referrals")) return;
1750
- if (!columnExists(tx, "ga_ai_referrals", "channel_group")) {
1751
- tx.run(sql.raw(`ALTER TABLE ga_ai_referrals ADD COLUMN channel_group TEXT NOT NULL DEFAULT '(not set)'`));
1752
- }
1753
- tx.run(sql.raw(`DROP INDEX IF EXISTS idx_ga_ai_ref_unique_v3`));
1754
- tx.run(sql.raw(`CREATE UNIQUE INDEX IF NOT EXISTS idx_ga_ai_ref_unique_v4
1755
- ON ga_ai_referrals(project_id, date, source, medium, source_dimension, channel_group, landing_page)`));
1756
- }
1757
- },
1758
- {
1759
- version: 51,
1760
- name: "runs-source-id",
1761
- statements: [
1762
- `ALTER TABLE runs ADD COLUMN source_id TEXT`,
1763
- `CREATE INDEX IF NOT EXISTS idx_runs_source ON runs(source_id)`
1764
- ]
1765
- },
1766
- {
1767
- version: 52,
1768
- name: "traffic-sources-last-event-ids",
1769
- statements: [
1770
- // JSON-encoded array of normalized event IDs from the previous sync,
1771
- // used for cross-sync boundary-window dedupe so a longer default
1772
- // sync window (or any overlapping re-sync) cannot double-count.
1773
- `ALTER TABLE traffic_sources ADD COLUMN last_event_ids TEXT`
1774
- ]
1775
- },
1776
- {
1777
- version: 53,
1778
- name: "schedules-kind-and-source",
1779
- // The legacy schedules table carries an inline `UNIQUE(project_id)`
1780
- // constraint (see MIGRATION_SQL). SQLite doesn't support dropping inline
1781
- // table constraints, so we use the canonical table-rebuild pattern:
1782
- // create a new table with the desired schema, copy the data, drop the
1783
- // old, rename. All 4 statements run inside the migration runner's
1784
- // single transaction so a partial failure rolls everything back.
1785
- statements: [
1786
- // (project_id, kind) uniqueness is enforced by the explicit
1787
- // `CREATE UNIQUE INDEX idx_schedules_project_kind` below — that's the
1788
- // canonical drizzle-side index name (see schema.ts), so don't duplicate
1789
- // it as an inline UNIQUE() in CREATE TABLE.
1790
- `CREATE TABLE IF NOT EXISTS schedules_v53 (
1791
- id TEXT PRIMARY KEY,
1792
- project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
1793
- kind TEXT NOT NULL DEFAULT 'answer-visibility',
1794
- cron_expr TEXT NOT NULL,
1795
- preset TEXT,
1796
- timezone TEXT NOT NULL DEFAULT 'UTC',
1797
- enabled INTEGER NOT NULL DEFAULT 1,
1798
- providers TEXT NOT NULL DEFAULT '[]',
1799
- source_id TEXT,
1800
- last_run_at TEXT,
1801
- next_run_at TEXT,
1802
- created_at TEXT NOT NULL,
1803
- updated_at TEXT NOT NULL
1804
- )`,
1805
- `INSERT INTO schedules_v53 (
1806
- id, project_id, kind, cron_expr, preset, timezone, enabled,
1807
- providers, source_id, last_run_at, next_run_at, created_at, updated_at
1808
- )
1809
- SELECT id, project_id, 'answer-visibility', cron_expr, preset, timezone, enabled,
1810
- providers, NULL, last_run_at, next_run_at, created_at, updated_at
1811
- FROM schedules`,
1812
- `DROP TABLE schedules`,
1813
- `ALTER TABLE schedules_v53 RENAME TO schedules`,
1814
- // The legacy single-column unique index doesn't survive the table
1815
- // rename, but explicitly DROP IF EXISTS to keep the migration
1816
- // idempotent across edge-case re-runs.
1817
- `DROP INDEX IF EXISTS idx_schedules_project`,
1818
- `CREATE UNIQUE INDEX IF NOT EXISTS idx_schedules_project_kind ON schedules(project_id, kind)`
1819
- ]
1820
- },
1821
- {
1822
- version: 54,
1823
- name: "drop-resurrected-schedules-project-index",
1824
- // v53 dropped `idx_schedules_project`, but `MIGRATION_SQL` (which runs on
1825
- // every boot, before versioned migrations) was still creating it. On any
1826
- // boot AFTER the one that applied v53, Phase 1 re-created the legacy
1827
- // single-column UNIQUE index, which then collided with the new
1828
- // (project_id, kind) semantics and broke traffic-sync schedule creation
1829
- // (`UNIQUE constraint failed: schedules.project_id`). MIGRATION_SQL no
1830
- // longer creates that index; this migration removes it from any DB that
1831
- // already booted past v53 with the resurrected index.
1832
- statements: [
1833
- `DROP INDEX IF EXISTS idx_schedules_project`
1834
- ]
1835
- },
1836
- {
1837
- version: 55,
1838
- name: "discovery-foundation",
1839
- // Adds the three-ring discovery foundation: per-project ICP, query/competitor
1840
- // provenance (so we can trace adopted basket entries back to a discovery
1841
- // session), and the two tables that hold a discovery session's research
1842
- // output. No UNIQUE(session_id, query) on discovery_probes — v2 will probe
1843
- // the same query across multiple providers in the same session.
1844
- //
1845
- // `competitor_map` defaults to '[]' (JSON array) — see DTO
1846
- // `discoveryCompetitorMapEntrySchema` for the entry shape `{domain, hits}`.
1847
- // Backfill of `provenance='cli'` runs once: existing pre-v55 rows are
1848
- // attributed to manual CLI entry so a future NULL distinctly means
1849
- // "post-v55 row missing provenance" (a bug to catch in review).
1850
- statements: [
1851
- `ALTER TABLE projects ADD COLUMN icp_description TEXT`,
1852
- `ALTER TABLE queries ADD COLUMN provenance TEXT`,
1853
- `ALTER TABLE competitors ADD COLUMN provenance TEXT`,
1854
- `UPDATE queries SET provenance = 'cli' WHERE provenance IS NULL`,
1855
- `UPDATE competitors SET provenance = 'cli' WHERE provenance IS NULL`,
1856
- `CREATE TABLE IF NOT EXISTS discovery_sessions (
1857
- id TEXT PRIMARY KEY,
1858
- project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
1859
- status TEXT NOT NULL DEFAULT 'queued',
1860
- icp_description TEXT,
1861
- seed_provider TEXT,
1862
- seed_count_raw INTEGER,
1863
- seed_count INTEGER,
1864
- dedup_threshold REAL,
1865
- probe_count INTEGER,
1866
- cited_count INTEGER,
1867
- aspirational_count INTEGER,
1868
- wasted_count INTEGER,
1869
- competitor_map TEXT NOT NULL DEFAULT '[]',
1870
- error TEXT,
1871
- started_at TEXT,
1872
- finished_at TEXT,
1873
- created_at TEXT NOT NULL
1874
- )`,
1875
- // "Latest session per project" is the access pattern; SQLite walks the
1876
- // composite index backwards for ORDER BY created_at DESC.
1877
- `CREATE INDEX IF NOT EXISTS idx_discovery_sessions_project_created ON discovery_sessions(project_id, created_at)`,
1878
- `CREATE TABLE IF NOT EXISTS discovery_probes (
1879
- id TEXT PRIMARY KEY,
1880
- session_id TEXT NOT NULL REFERENCES discovery_sessions(id) ON DELETE CASCADE,
1881
- project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
1882
- query TEXT NOT NULL,
1883
- bucket TEXT,
1884
- citation_state TEXT NOT NULL,
1885
- cited_domains TEXT NOT NULL DEFAULT '[]',
1886
- raw_response TEXT,
1887
- created_at TEXT NOT NULL
1888
- )`,
1889
- `CREATE INDEX IF NOT EXISTS idx_discovery_probes_session ON discovery_probes(session_id)`,
1890
- `CREATE INDEX IF NOT EXISTS idx_discovery_probes_project ON discovery_probes(project_id)`
1891
- ]
1892
- },
1893
- {
1894
- version: 56,
1895
- name: "discovery-sessions-run-id",
1896
- // Links a discovery_sessions row back to the runs row that drove it. Without
1897
- // this column the run-coordinator can't tell two concurrent discovery
1898
- // sessions apart for the same project — it would fall back to "latest
1899
- // non-queued session" and surface the wrong bucket counts to Aero.
1900
- statements: [
1901
- `ALTER TABLE discovery_sessions ADD COLUMN run_id TEXT`,
1902
- `CREATE INDEX IF NOT EXISTS idx_discovery_sessions_run ON discovery_sessions(run_id)`
1903
- ]
1904
- },
1905
- {
1906
- version: 57,
1907
- name: "runs-scoped-queries",
1908
- // Persists an optional subset of tracked queries to sweep on a per-run
1909
- // basis. NULL = full sweep (the default and only behavior pre-v57); a JSON
1910
- // array of query strings = scope. The job runner reads this to filter the
1911
- // query fetch via `inArray`.
1912
- statements: [
1913
- `ALTER TABLE runs ADD COLUMN queries TEXT`
1914
- ]
1915
- },
1916
- {
1917
- version: 58,
1918
- name: "snapshots-preserve-on-query-delete",
1919
- // The legacy `query_snapshots.query_id` FK was `ON DELETE CASCADE`, so a
1920
- // routine basket edit (PUT /queries replace, individual delete, `canonry
1921
- // apply` dropping a query) silently destroyed every historical citation
1922
- // snapshot for the removed queries — the regression history, transitions,
1923
- // and competitor-overlap evidence that are canonry's whole value.
1924
- //
1925
- // Fix: rebuild `query_snapshots` with `query_id` nullable + `ON DELETE
1926
- // SET NULL`, and add a denormalized `query_text` column populated from
1927
- // `queries.query` via the join. SQLite can't change FK or NOT NULL in
1928
- // place — same canonical table-rebuild pattern v53 used. All statements
1929
- // run inside the migration runner's single transaction.
1930
- //
1931
- // `run_id` keeps `ON DELETE CASCADE` — deleting a run legitimately
1932
- // removes its snapshots. Indexes are recreated on the renamed table.
1933
- statements: [
1934
- `CREATE TABLE IF NOT EXISTS query_snapshots_v58 (
1935
- id TEXT PRIMARY KEY,
1936
- run_id TEXT NOT NULL REFERENCES runs(id) ON DELETE CASCADE,
1937
- query_id TEXT REFERENCES queries(id) ON DELETE SET NULL,
1938
- query_text TEXT,
1939
- provider TEXT NOT NULL DEFAULT 'gemini',
1940
- model TEXT,
1941
- citation_state TEXT NOT NULL,
1942
- answer_mentioned INTEGER,
1943
- answer_text TEXT,
1944
- cited_domains TEXT NOT NULL DEFAULT '[]',
1945
- competitor_overlap TEXT NOT NULL DEFAULT '[]',
1946
- recommended_competitors TEXT NOT NULL DEFAULT '[]',
1947
- location TEXT,
1948
- screenshot_path TEXT,
1949
- raw_response TEXT,
1950
- created_at TEXT NOT NULL
1951
- )`,
1952
- // Backfill `query_text` from joined queries.query so existing snapshots
1953
- // stay readable even if their query is later deleted.
1954
- //
1955
- // IMPORTANT: we use `q.id` (the JOINED queries.id), not `qs.query_id`.
1956
- // Production DBs may already contain snapshots whose `qs.query_id`
1957
- // dangles — a queries row was hard-deleted at some point without
1958
- // cascading (PRAGMA foreign_keys was OFF, or pre-FK schema). Copying
1959
- // `qs.query_id` directly would re-introduce those dangling refs into
1960
- // the new table, which now validates them at INSERT (the new FK still
1961
- // requires query_id values to match queries.id when non-null). Reading
1962
- // through the LEFT JOIN forces every value to be either a valid `q.id`
1963
- // or NULL — pre-existing orphans land with NULL `query_id` / NULL
1964
- // `query_text`, preserving the snapshot row instead of failing the
1965
- // migration. The May 2026 azcoatings DB had 459 such pre-existing
1966
- // orphans; without this guard, migrate() throws SQLITE_CONSTRAINT_FOREIGNKEY.
1967
- `INSERT INTO query_snapshots_v58 (
1968
- id, run_id, query_id, query_text, provider, model, citation_state,
1969
- answer_mentioned, answer_text, cited_domains, competitor_overlap,
1970
- recommended_competitors, location, screenshot_path, raw_response,
1971
- created_at
1972
- )
1973
- SELECT qs.id, qs.run_id, q.id, q.query, qs.provider, qs.model,
1974
- qs.citation_state, qs.answer_mentioned, qs.answer_text,
1975
- qs.cited_domains, qs.competitor_overlap, qs.recommended_competitors,
1976
- qs.location, qs.screenshot_path, qs.raw_response, qs.created_at
1977
- FROM query_snapshots qs
1978
- LEFT JOIN queries q ON q.id = qs.query_id`,
1979
- `DROP TABLE query_snapshots`,
1980
- `ALTER TABLE query_snapshots_v58 RENAME TO query_snapshots`,
1981
- // Recreate the indexes that didn't survive the rename.
1982
- `CREATE INDEX IF NOT EXISTS idx_snapshots_run ON query_snapshots(run_id)`,
1983
- `CREATE INDEX IF NOT EXISTS idx_snapshots_query ON query_snapshots(query_id)`,
1984
- `CREATE INDEX IF NOT EXISTS idx_snapshots_citation_state ON query_snapshots(citation_state)`,
1985
- `CREATE INDEX IF NOT EXISTS idx_snapshots_provider_model ON query_snapshots(provider, model)`,
1986
- `CREATE INDEX IF NOT EXISTS idx_snapshots_location ON query_snapshots(location)`,
1987
- `CREATE INDEX IF NOT EXISTS idx_snapshots_created_at ON query_snapshots(created_at)`
1988
- ]
1989
- },
1990
- {
1991
- version: 59,
1992
- name: "projects-aliases",
1993
- statements: [
1994
- `ALTER TABLE projects ADD COLUMN aliases TEXT NOT NULL DEFAULT '[]'`
1995
- ]
1996
- },
1997
- {
1998
- version: 60,
1999
- name: "audit-log-preserve-on-project-delete",
2000
- // The legacy `audit_log.project_id` FK was `ON DELETE CASCADE`, so any
2001
- // `DELETE /projects/:name` call cascade-wiped every audit row for that
2002
- // project — including the `project.deleted` row the route handler had
2003
- // just written in the same path. The deletion erased the only record
2004
- // that the deletion happened, defeating the entire purpose of the
2005
- // audit log.
2006
- //
2007
- // Fix: rebuild `audit_log` with `project_id` as `ON DELETE SET NULL`.
2008
- // Existing rows survive verbatim; future deletions detach audit rows
2009
- // from the project (project_id=NULL) instead of erasing them. SQLite
2010
- // can't change FK behavior in place — same canonical table-rebuild
2011
- // pattern v58 used for `query_snapshots`.
2012
- statements: [
2013
- `CREATE TABLE IF NOT EXISTS audit_log_v60 (
2014
- id TEXT PRIMARY KEY,
2015
- project_id TEXT REFERENCES projects(id) ON DELETE SET NULL,
2016
- actor TEXT NOT NULL,
2017
- action TEXT NOT NULL,
2018
- entity_type TEXT NOT NULL,
2019
- entity_id TEXT,
2020
- diff TEXT,
2021
- created_at TEXT NOT NULL
2022
- )`,
2023
- // LEFT JOIN guard mirrors v58: if a pre-existing row carries a
2024
- // dangling project_id (from a pre-FK era or a write with
2025
- // PRAGMA foreign_keys=OFF), the join nulls it out rather than
2026
- // failing the migration on the new FK validation.
2027
- `INSERT INTO audit_log_v60 (
2028
- id, project_id, actor, action, entity_type, entity_id, diff, created_at
2029
- )
2030
- SELECT a.id, p.id, a.actor, a.action, a.entity_type, a.entity_id, a.diff, a.created_at
2031
- FROM audit_log a
2032
- LEFT JOIN projects p ON p.id = a.project_id`,
2033
- `DROP TABLE audit_log`,
2034
- `ALTER TABLE audit_log_v60 RENAME TO audit_log`,
2035
- `CREATE INDEX IF NOT EXISTS idx_audit_log_project ON audit_log(project_id)`,
2036
- `CREATE INDEX IF NOT EXISTS idx_audit_log_created ON audit_log(created_at)`
2037
- ]
2038
- },
2039
- {
2040
- version: 61,
2041
- name: "content-target-dismissals",
2042
- // Persistent per-recommendation dismissal so users can mark a content
2043
- // opportunity "addressed" after they ship the page. The orchestrator
2044
- // recomputes opportunities on every report load from live GSC / GA
2045
- // inventory; without persistent dismissal, a recommendation lingers
2046
- // until the next sync surfaces the new page (days–weeks of lag).
2047
- //
2048
- // Keyed by `(project_id, target_ref)` where `target_ref` is the stable
2049
- // hash that `computeTargetRef()` already produces — same value the
2050
- // ContentTargetRowDto exposes, so the client passes back the ref it
2051
- // sees.
2052
- statements: [
2053
- `CREATE TABLE IF NOT EXISTS content_target_dismissals (
2054
- id TEXT PRIMARY KEY,
2055
- project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
2056
- target_ref TEXT NOT NULL,
2057
- addressed_url TEXT,
2058
- note TEXT,
2059
- dismissed_at TEXT NOT NULL
2060
- )`,
2061
- `CREATE UNIQUE INDEX IF NOT EXISTS idx_content_target_dismissals_project_ref ON content_target_dismissals(project_id, target_ref)`,
2062
- `CREATE INDEX IF NOT EXISTS idx_content_target_dismissals_project ON content_target_dismissals(project_id)`
2063
- ]
2064
- },
2065
- {
2066
- version: 62,
2067
- name: "recommendation-explanations",
2068
- // LLM-generated rationale for content recommendations. Cached per
2069
- // (project, target_ref, prompt_version) so repeat clicks are free.
2070
- // Bumping the prompt version invalidates the cache forward without
2071
- // touching the table.
2072
- statements: [
2073
- `CREATE TABLE IF NOT EXISTS recommendation_explanations (
2074
- id TEXT PRIMARY KEY,
2075
- project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
2076
- target_ref TEXT NOT NULL,
2077
- prompt_version TEXT NOT NULL,
2078
- provider TEXT NOT NULL,
2079
- model TEXT NOT NULL,
2080
- response_text TEXT NOT NULL,
2081
- cost_millicents INTEGER NOT NULL DEFAULT 0,
2082
- generated_at TEXT NOT NULL
2083
- )`,
2084
- `CREATE UNIQUE INDEX IF NOT EXISTS idx_recommendation_explanations_unique ON recommendation_explanations(project_id, target_ref, prompt_version)`,
2085
- `CREATE INDEX IF NOT EXISTS idx_recommendation_explanations_project ON recommendation_explanations(project_id)`
2086
- ]
2087
- },
2088
- {
2089
- version: 63,
2090
- name: "audit-log-attribution-columns",
2091
- // Adds `user_agent` and `actor_session` to `audit_log` so post-mortems
2092
- // can attribute destructive events (like the 2026-05-15 azcoatings
2093
- // queries.replaced incident — see PR #593) to a specific caller.
2094
- // Without these columns, every mutation rides as `actor='api'` with no
2095
- // narrower identity, so it's impossible to tell whether a destructive
2096
- // event came from CLI, dashboard, MCP, an agent, or an external script.
2097
- //
2098
- // Both columns nullable — the audit log accepts writes from sources
2099
- // that don't have an HTTP request context (scheduler, run-coordinator,
2100
- // direct DB writes from CLI commands).
2101
- statements: [
2102
- `ALTER TABLE audit_log ADD COLUMN user_agent TEXT`,
2103
- `ALTER TABLE audit_log ADD COLUMN actor_session TEXT`
2104
- ]
2105
- },
2106
- {
2107
- version: 64,
2108
- name: "ai-user-fetch-events-hourly",
2109
- // Splits per-user fetches (ChatGPT-User, Perplexity-User) out of
2110
- // crawler_events_hourly so the dashboard / API can distinguish bulk
2111
- // machine crawl from human-in-the-loop fetch. Bot IDs are pinned to the
2112
- // two `purpose: 'user-agent'` rules that existed before this change —
2113
- // future user-fetch UAs land in the new table directly via the
2114
- // refactored classifier and never need a backfill.
2115
- //
2116
- // Statements are idempotent: CREATE/INDEX are IF NOT EXISTS; the
2117
- // INSERT … SELECT uses ON CONFLICT DO NOTHING (composite PK rows
2118
- // already moved skip silently); the DELETE keys on `bot_id`, so a
2119
- // second run is a no-op after the first DELETE drains the source.
2120
- statements: [
2121
- `CREATE TABLE IF NOT EXISTS ai_user_fetch_events_hourly (
2122
- project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
2123
- source_id TEXT NOT NULL REFERENCES traffic_sources(id) ON DELETE CASCADE,
2124
- ts_hour TEXT NOT NULL,
2125
- bot_id TEXT NOT NULL,
2126
- operator TEXT NOT NULL,
2127
- verification_status TEXT NOT NULL,
2128
- path_normalized TEXT NOT NULL,
2129
- status INTEGER NOT NULL,
2130
- hits INTEGER NOT NULL DEFAULT 0,
2131
- sampled_user_agent TEXT,
2132
- created_at TEXT NOT NULL,
2133
- updated_at TEXT NOT NULL,
2134
- PRIMARY KEY (project_id, source_id, ts_hour, bot_id, verification_status, path_normalized, status)
2135
- )`,
2136
- `CREATE INDEX IF NOT EXISTS idx_ai_user_fetch_hourly_project_ts ON ai_user_fetch_events_hourly(project_id, ts_hour)`,
2137
- `CREATE INDEX IF NOT EXISTS idx_ai_user_fetch_hourly_path ON ai_user_fetch_events_hourly(project_id, path_normalized)`,
2138
- `INSERT INTO ai_user_fetch_events_hourly
2139
- (project_id, source_id, ts_hour, bot_id, operator, verification_status, path_normalized, status, hits, sampled_user_agent, created_at, updated_at)
2140
- SELECT project_id, source_id, ts_hour, bot_id, operator, verification_status, path_normalized, status, hits, sampled_user_agent, created_at, updated_at
2141
- FROM crawler_events_hourly
2142
- WHERE bot_id IN ('openai-chatgpt-user', 'perplexity-user')
2143
- ON CONFLICT DO NOTHING`,
2144
- `DELETE FROM crawler_events_hourly WHERE bot_id IN ('openai-chatgpt-user', 'perplexity-user')`
2145
- ]
2146
- },
2147
- {
2148
- version: 65,
2149
- name: "split-mistral-ai-rule",
2150
- // The pre-existing `mistral-ai` rule matched both `MistralAI-User/*`
2151
- // (per-user fetch) and `MistralBot/*` (bulk crawl) under one id, so
2152
- // every historical row landed in crawler_events_hourly with
2153
- // bot_id='mistral-ai'. The rule is now split into `mistral-ai-user`
2154
- // (purpose: 'user-agent') and `mistral-bot` (purpose: 'crawl'); this
2155
- // migration best-effort routes the legacy rows using the bucket's
2156
- // representative sampled_user_agent.
2157
- //
2158
- // Mixed-UA buckets (where a single (project, source, hour, path,
2159
- // status) accumulated both UAs under the old shared id) are routed
2160
- // by whichever UA happened to be sampled — the bucket-key granularity
2161
- // doesn't preserve per-event UAs, so any heuristic has the same
2162
- // limitation. Going forward the split rules write to disjoint tables.
2163
- //
2164
- // Idempotent: the INSERT…SELECT uses ON CONFLICT DO NOTHING; the
2165
- // UPDATE and DELETE both filter on bot_id='mistral-ai', so a second
2166
- // run finds no rows after the first apply.
2167
- statements: [
2168
- `INSERT INTO ai_user_fetch_events_hourly
2169
- (project_id, source_id, ts_hour, bot_id, operator, verification_status, path_normalized, status, hits, sampled_user_agent, created_at, updated_at)
2170
- SELECT project_id, source_id, ts_hour, 'mistral-ai-user', operator, verification_status, path_normalized, status, hits, sampled_user_agent, created_at, updated_at
2171
- FROM crawler_events_hourly
2172
- WHERE bot_id = 'mistral-ai' AND sampled_user_agent LIKE '%MistralAI-User%'
2173
- ON CONFLICT DO NOTHING`,
2174
- `DELETE FROM crawler_events_hourly WHERE bot_id = 'mistral-ai' AND sampled_user_agent LIKE '%MistralAI-User%'`,
2175
- `UPDATE crawler_events_hourly SET bot_id = 'mistral-bot' WHERE bot_id = 'mistral-ai'`
2176
- ]
2177
- },
2178
- {
2179
- version: 66,
2180
- name: "oauth-connections-track-owning-project",
2181
- // Cross-project OAuth takeover defense. Before this column, the OAuth
2182
- // callback for Google and the connect route for Bing keyed everything on
2183
- // `domain` alone — an attacker who created a project pointed at a victim's
2184
- // canonical domain could complete OAuth from their own Google/Bing account
2185
- // and silently overwrite the legitimate refresh token under that domain
2186
- // key. The new `created_by_project_id` column records the project that
2187
- // first established each connection; the callback and DELETE routes refuse
2188
- // cross-project writes when it doesn't match.
2189
- //
2190
- // Backfill: for each existing connection row, set the owner to the project
2191
- // whose `canonical_domain` matches AND whose `created_at` is oldest (the
2192
- // most likely original owner in a 1:N domain-shared install). Rows with no
2193
- // matching project stay NULL — treated as "unowned" so a future legitimate
2194
- // connect from any project can claim them.
2195
- //
2196
- // Uses the `run` hook so the schema-edit + backfill only fire when the
2197
- // target tables exist. The legacy-keyword test scenario seeds a DB at v46
2198
- // without google_connections / bing_connections (they're created in v6 but
2199
- // the test bypasses the bootstrap) — without the guard, this version's
2200
- // ALTER fails with "no such table".
2201
- //
2202
- // Idempotent: column-existence guard means re-running this version is a
2203
- // no-op; the backfill UPDATE only writes rows where the column is NULL.
2204
- statements: [],
2205
- run: (db) => {
2206
- if (tableExists(db, "google_connections") && !columnExists(db, "google_connections", "created_by_project_id")) {
2207
- db.run(sql.raw(
2208
- `ALTER TABLE google_connections ADD COLUMN created_by_project_id TEXT REFERENCES projects(id) ON DELETE SET NULL`
2209
- ));
2210
- db.run(sql.raw(`CREATE INDEX IF NOT EXISTS idx_google_conn_project ON google_connections(created_by_project_id)`));
2211
- db.run(sql.raw(
2212
- `UPDATE google_connections
2213
- SET created_by_project_id = (
2214
- SELECT p.id FROM projects p
2215
- WHERE LOWER(p.canonical_domain) = LOWER(google_connections.domain)
2216
- ORDER BY p.created_at ASC
2217
- LIMIT 1
2218
- )
2219
- WHERE created_by_project_id IS NULL`
2220
- ));
2221
- }
2222
- if (tableExists(db, "bing_connections") && !columnExists(db, "bing_connections", "created_by_project_id")) {
2223
- db.run(sql.raw(
2224
- `ALTER TABLE bing_connections ADD COLUMN created_by_project_id TEXT REFERENCES projects(id) ON DELETE SET NULL`
2225
- ));
2226
- db.run(sql.raw(`CREATE INDEX IF NOT EXISTS idx_bing_conn_project ON bing_connections(created_by_project_id)`));
2227
- db.run(sql.raw(
2228
- `UPDATE bing_connections
2229
- SET created_by_project_id = (
2230
- SELECT p.id FROM projects p
2231
- WHERE LOWER(p.canonical_domain) = LOWER(bing_connections.domain)
2232
- ORDER BY p.created_at ASC
2233
- LIMIT 1
2234
- )
2235
- WHERE created_by_project_id IS NULL`
2236
- ));
2237
- }
2238
- }
2239
- }
2240
- ];
2241
- function isDuplicateColumnError(err) {
2242
- if (!(err instanceof Error)) return false;
2243
- if (err.message.includes("duplicate column name")) return true;
2244
- if (err.cause instanceof Error && err.cause.message.includes("duplicate column name")) return true;
2245
- return false;
2246
- }
2247
- function columnExists(db, table, column) {
2248
- const rows = db.all(sql.raw(
2249
- `SELECT COUNT(*) as c FROM pragma_table_info('${table}') WHERE name = '${column}'`
2250
- ));
2251
- return (rows[0]?.c ?? 0) > 0;
2252
- }
2253
- function tableExists(db, table) {
2254
- const rows = db.all(sql.raw(
2255
- `SELECT COUNT(*) as c FROM sqlite_master WHERE type = 'table' AND name = '${table}'`
2256
- ));
2257
- return (rows[0]?.c ?? 0) > 0;
2258
- }
2259
- function tableIsEmpty(db, table) {
2260
- const rows = db.all(sql.raw(`SELECT COUNT(*) as c FROM ${table}`));
2261
- return (rows[0]?.c ?? 0) === 0;
2262
- }
2263
- function hasLegacyQuerySchema(db) {
2264
- return tableExists(db, "keywords") || columnExists(db, "query_snapshots", "keyword_id") || columnExists(db, "insights", "keyword");
2265
- }
2266
- function normalizeLegacyQuerySchema(db) {
2267
- if (!hasLegacyQuerySchema(db)) return;
2268
- if (tableExists(db, "keywords") && tableExists(db, "queries")) {
2269
- if (!tableIsEmpty(db, "queries")) {
2270
- throw new Error("Cannot migrate keywords to queries because both tables contain data");
2271
- }
2272
- db.run(sql.raw(`DROP TABLE queries`));
2273
- }
2274
- db.run(sql.raw(`DROP INDEX IF EXISTS idx_keywords_project`));
2275
- db.run(sql.raw(`DROP INDEX IF EXISTS idx_keywords_project_keyword`));
2276
- db.run(sql.raw(`DROP INDEX IF EXISTS idx_snapshots_keyword`));
2277
- db.run(sql.raw(`DROP INDEX IF EXISTS idx_insights_keyword_provider`));
2278
- if (tableExists(db, "keywords")) {
2279
- db.run(sql.raw(`ALTER TABLE keywords RENAME TO queries`));
2280
- }
2281
- if (columnExists(db, "queries", "keyword")) {
2282
- db.run(sql.raw(`ALTER TABLE queries RENAME COLUMN keyword TO query`));
2283
- }
2284
- if (columnExists(db, "query_snapshots", "keyword_id")) {
2285
- db.run(sql.raw(`ALTER TABLE query_snapshots RENAME COLUMN keyword_id TO query_id`));
2286
- }
2287
- if (columnExists(db, "insights", "keyword")) {
2288
- db.run(sql.raw(`ALTER TABLE insights RENAME COLUMN keyword TO query`));
2289
- }
2290
- }
2291
- function dropColumnIfExists(db, table, column) {
2292
- try {
2293
- db.run(sql.raw(`ALTER TABLE ${table} DROP COLUMN ${column}`));
2294
- } catch (err) {
2295
- if (!(err instanceof Error)) throw err;
2296
- const msg = err.message;
2297
- const causeMsg = err.cause instanceof Error ? err.cause.message : "";
2298
- const expected = `no such column: "${column}"`;
2299
- const expectedAlt = `no such column: ${column}`;
2300
- if (msg.includes(expected) || msg.includes(expectedAlt)) return;
2301
- if (causeMsg.includes(expected) || causeMsg.includes(expectedAlt)) return;
2302
- throw err;
2303
- }
2304
- }
2305
- function extractLegacyCredentials(db) {
2306
- const out = { google: [], ga4: [] };
2307
- if (columnExists(db, "google_connections", "access_token")) {
2308
- const rows = db.all(sql.raw(
2309
- `SELECT domain, connection_type, property_id, sitemap_url, access_token, refresh_token, token_expires_at, scopes, created_at, updated_at
2310
- FROM google_connections
2311
- WHERE refresh_token IS NOT NULL AND refresh_token != ''`
2312
- ));
2313
- for (const row of rows) {
2314
- out.google.push({
2315
- domain: row.domain,
2316
- connectionType: row.connection_type,
2317
- propertyId: row.property_id,
2318
- sitemapUrl: row.sitemap_url,
2319
- accessToken: row.access_token,
2320
- refreshToken: row.refresh_token,
2321
- tokenExpiresAt: row.token_expires_at,
2322
- scopes: parseJsonColumn(row.scopes, []),
2323
- createdAt: row.created_at,
2324
- updatedAt: row.updated_at
2325
- });
2326
- }
2327
- }
2328
- if (columnExists(db, "ga_connections", "private_key")) {
2329
- const rows = db.all(sql.raw(
2330
- `SELECT p.name AS project_name, ga.property_id, ga.client_email, ga.private_key, ga.created_at, ga.updated_at
2331
- FROM ga_connections ga
2332
- INNER JOIN projects p ON p.id = ga.project_id
2333
- WHERE ga.private_key IS NOT NULL AND ga.private_key != ''`
2334
- ));
2335
- for (const row of rows) {
2336
- out.ga4.push({
2337
- projectName: row.project_name,
2338
- propertyId: row.property_id,
2339
- clientEmail: row.client_email,
2340
- privateKey: row.private_key,
2341
- createdAt: row.created_at,
2342
- updatedAt: row.updated_at
2343
- });
2344
- }
2345
- }
2346
- return out;
2347
- }
2348
- function dropLegacyCredentialColumns(db) {
2349
- if (columnExists(db, "google_connections", "access_token")) {
2350
- dropColumnIfExists(db, "google_connections", "access_token");
2351
- }
2352
- if (columnExists(db, "google_connections", "refresh_token")) {
2353
- dropColumnIfExists(db, "google_connections", "refresh_token");
2354
- }
2355
- if (columnExists(db, "google_connections", "token_expires_at")) {
2356
- dropColumnIfExists(db, "google_connections", "token_expires_at");
2357
- }
2358
- if (columnExists(db, "ga_connections", "private_key")) {
2359
- dropColumnIfExists(db, "ga_connections", "private_key");
2360
- }
2361
- }
2362
- function getAppliedVersion(db) {
2363
- const rows = db.all(sql`SELECT MAX(version) as max_version FROM _migrations`);
2364
- return rows[0]?.max_version ?? 0;
2365
- }
2366
- function recordMigration(db, version, name) {
2367
- db.run(sql`INSERT OR IGNORE INTO _migrations (version, name) VALUES (${version}, ${name})`);
2368
- }
2369
- function migrate(db) {
2370
- db.transaction((tx) => {
2371
- normalizeLegacyQuerySchema(tx);
2372
- });
2373
- const statements = MIGRATION_SQL.split(";").map((s) => s.trim()).filter((s) => s.length > 0);
2374
- for (const statement of statements) {
2375
- db.run(sql.raw(statement));
2376
- }
2377
- const appliedVersion = getAppliedVersion(db);
2378
- for (const mv of MIGRATION_VERSIONS) {
2379
- if (mv.version <= appliedVersion) continue;
2380
- db.transaction((tx) => {
2381
- for (const statement of mv.statements) {
2382
- try {
2383
- tx.run(sql.raw(statement));
2384
- } catch (err) {
2385
- if (isDuplicateColumnError(err)) continue;
2386
- throw err;
2387
- }
2388
- }
2389
- mv.run?.(tx);
2390
- recordMigration(tx, mv.version, mv.name);
2391
- });
2392
- }
2393
- }
2394
-
2395
- // ../db/src/run-helpers.ts
2396
- function groupRunsByCreatedAt(rows) {
2397
- const groups = [];
2398
- let current = [];
2399
- let currentCreatedAt = null;
2400
- for (const row of rows) {
2401
- if (row.createdAt === currentCreatedAt) {
2402
- current.push(row);
2403
- } else {
2404
- if (current.length > 0) groups.push(current);
2405
- current = [row];
2406
- currentCreatedAt = row.createdAt;
2407
- }
2408
- }
2409
- if (current.length > 0) groups.push(current);
2410
- return groups;
2411
- }
2412
- function pickGroupRepresentative(group) {
2413
- if (group.length === 0) return null;
2414
- let best = group[0];
2415
- for (let i = 1; i < group.length; i++) {
2416
- const candidate = group[i];
2417
- if (candidate.id > best.id) best = candidate;
2418
- }
2419
- return best;
2420
- }
2421
-
2422
- // ../db/src/snapshot-helpers.ts
2423
- function filterTrackedSnapshots(rows) {
2424
- return rows.filter((r) => r.queryId !== null);
2425
- }
2426
-
2427
- // ../intelligence/src/regressions.ts
2428
- function snapshotKey(snap) {
2429
- const loc = snap.location ?? "__none__";
2430
- return JSON.stringify([snap.query, snap.provider, loc]);
2431
- }
2432
- function detectRegressions(currentRun, previousRun) {
2433
- if ((currentRun.location ?? null) !== (previousRun.location ?? null)) {
2434
- return [];
2435
- }
2436
- const regressions = [];
2437
- const previousCited = /* @__PURE__ */ new Map();
2438
- for (const snap of previousRun.snapshots) {
2439
- if (snap.cited) {
2440
- previousCited.set(snapshotKey(snap), {
2441
- citationUrl: snap.citationUrl,
2442
- position: snap.position
2443
- });
2444
- }
2445
- }
2446
- for (const snap of currentRun.snapshots) {
2447
- const key = snapshotKey(snap);
2448
- if (!snap.cited && previousCited.has(key)) {
2449
- const prev = previousCited.get(key);
2450
- regressions.push({
2451
- query: snap.query,
2452
- provider: snap.provider,
2453
- previousCitationUrl: prev.citationUrl,
2454
- previousPosition: prev.position,
2455
- currentRunId: currentRun.runId,
2456
- previousRunId: previousRun.runId
2457
- });
2458
- }
2459
- }
2460
- return regressions;
2461
- }
2462
-
2463
- // ../intelligence/src/gains.ts
2464
- function snapshotKey2(snap) {
2465
- const loc = snap.location ?? "__none__";
2466
- return JSON.stringify([snap.query, snap.provider, loc]);
2467
- }
2468
- function detectGains(currentRun, previousRun) {
2469
- if ((currentRun.location ?? null) !== (previousRun.location ?? null)) {
2470
- return [];
2471
- }
2472
- const gains = [];
2473
- const previousCited = /* @__PURE__ */ new Set();
2474
- for (const snap of previousRun.snapshots) {
2475
- if (snap.cited) {
2476
- previousCited.add(snapshotKey2(snap));
2477
- }
2478
- }
2479
- for (const snap of currentRun.snapshots) {
2480
- const key = snapshotKey2(snap);
2481
- if (snap.cited && !previousCited.has(key)) {
2482
- gains.push({
2483
- query: snap.query,
2484
- provider: snap.provider,
2485
- citationUrl: snap.citationUrl,
2486
- position: snap.position,
2487
- snippet: snap.snippet,
2488
- runId: currentRun.runId
2489
- });
2490
- }
2491
- }
2492
- return gains;
2493
- }
2494
-
2495
- // ../intelligence/src/health.ts
2496
- function computeHealth(run) {
2497
- const providerStats = /* @__PURE__ */ new Map();
2498
- let totalPairs = 0;
2499
- let citedPairs = 0;
2500
- for (const snap of run.snapshots) {
2501
- totalPairs++;
2502
- if (snap.cited) citedPairs++;
2503
- const stats = providerStats.get(snap.provider) ?? { cited: 0, total: 0 };
2504
- stats.total++;
2505
- if (snap.cited) stats.cited++;
2506
- providerStats.set(snap.provider, stats);
2507
- }
2508
- const providerBreakdown = {};
2509
- for (const [provider, stats] of providerStats) {
2510
- providerBreakdown[provider] = {
2511
- citedRate: stats.total > 0 ? stats.cited / stats.total : 0,
2512
- cited: stats.cited,
2513
- total: stats.total
2514
- };
2515
- }
2516
- return {
2517
- overallCitedRate: totalPairs > 0 ? citedPairs / totalPairs : 0,
2518
- totalPairs,
2519
- citedPairs,
2520
- providerBreakdown
2521
- };
2522
- }
2523
- function computeHealthTrend(runs2) {
2524
- if (runs2.length === 0) {
2525
- return { current: 0, previous: 0, delta: 0 };
2526
- }
2527
- const current = computeHealth(runs2[runs2.length - 1]).overallCitedRate;
2528
- if (runs2.length === 1) {
2529
- return { current, previous: 0, delta: current };
2530
- }
2531
- const previous = computeHealth(runs2[runs2.length - 2]).overallCitedRate;
2532
- return {
2533
- current,
2534
- previous,
2535
- delta: current - previous
2536
- };
2537
- }
2538
-
2539
- // ../intelligence/src/causes.ts
2540
- function analyzeCause(regression, currentSnapshots) {
2541
- const matchingSnaps = currentSnapshots.filter(
2542
- (s) => s.query === regression.query && s.provider === regression.provider && !s.cited
2543
- );
2544
- const withCompetitor = matchingSnaps.find((s) => s.competitorDomains?.length);
2545
- if (withCompetitor) {
2546
- const competitor = withCompetitor.competitorDomains[0];
2547
- return {
2548
- cause: "competitor_gain",
2549
- competitorDomain: competitor,
2550
- details: `Competitor ${competitor} now cited for "${regression.query}" on ${regression.provider}`
2551
- };
2552
- }
2553
- const withCited = matchingSnaps.find((s) => s.citedDomains?.length);
2554
- if (withCited) {
2555
- const top = withCited.citedDomains.slice(0, 3);
2556
- return {
2557
- cause: "third_party_displacement",
2558
- details: `${regression.provider} now grounds on ${top.join(", ")} for "${regression.query}" \u2014 none are tracked competitors.`
2559
- };
2560
- }
2561
- return {
2562
- cause: "unknown",
2563
- details: `No specific cause identified for loss of "${regression.query}" on ${regression.provider}`
2564
- };
2565
- }
2566
-
2567
- // ../intelligence/src/insights.ts
2568
- import { randomUUID } from "crypto";
2569
- var QUERY_LEVEL_PROVIDER = "all";
2570
- function generateInsights(input) {
2571
- const insights2 = [];
2572
- const now = (/* @__PURE__ */ new Date()).toISOString();
2573
- const id = () => `ins_${randomUUID().slice(0, 8)}`;
2574
- for (const reg of input.regressions) {
2575
- const cause = input.causes.get(`${reg.query}:${reg.provider}`);
2576
- insights2.push({
2577
- id: id(),
2578
- type: "regression",
2579
- severity: "high",
2580
- title: `Lost ${reg.provider} citation for "${reg.query}"`,
2581
- query: reg.query,
2582
- provider: reg.provider,
2583
- recommendation: {
2584
- action: "audit",
2585
- target: reg.previousCitationUrl,
2586
- reason: `Page was previously cited at position ${reg.previousPosition ?? "unknown"}. Run aeo-audit to check for content or schema issues.`
2587
- },
2588
- cause,
2589
- createdAt: now
2590
- });
2591
- }
2592
- for (const gain of input.gains) {
2593
- insights2.push({
2594
- id: id(),
2595
- type: "gain",
2596
- severity: "low",
2597
- title: `New ${gain.provider} citation for "${gain.query}"`,
2598
- query: gain.query,
2599
- provider: gain.provider,
2600
- recommendation: {
2601
- action: "monitor",
2602
- target: gain.citationUrl,
2603
- reason: `New citation appeared at position ${gain.position ?? "unknown"}. Monitor to confirm it persists.`
2604
- },
2605
- createdAt: now
2606
- });
2607
- }
2608
- for (const fc of input.firstCitations) {
2609
- insights2.push({
2610
- id: id(),
2611
- type: "first-citation",
2612
- severity: "medium",
2613
- title: `First citation for "${fc.query}" on ${fc.provider}`,
2614
- query: fc.query,
2615
- provider: fc.provider,
2616
- recommendation: {
2617
- action: "monitor",
2618
- target: fc.citationUrl,
2619
- reason: `"${fc.query}" had not been cited by any provider before this run. Monitor to confirm the citation persists.`
2620
- },
2621
- createdAt: now
2622
- });
2623
- }
2624
- for (const pp of input.providerPickups) {
2625
- insights2.push({
2626
- id: id(),
2627
- type: "provider-pickup",
2628
- severity: "low",
2629
- title: `${pp.provider} picked up "${pp.query}"`,
2630
- query: pp.query,
2631
- provider: pp.provider,
2632
- recommendation: {
2633
- action: "monitor",
2634
- target: pp.citationUrl,
2635
- reason: `${pp.provider} started citing "${pp.query}" alongside other providers. Monitor to confirm the citation persists.`
2636
- },
2637
- createdAt: now
2638
- });
2639
- }
2640
- for (const gap of input.persistentGaps) {
2641
- insights2.push({
2642
- id: id(),
2643
- type: "persistent-gap",
2644
- severity: "medium",
2645
- title: `"${gap.query}" uncited for ${gap.streak} runs`,
2646
- query: gap.query,
2647
- provider: QUERY_LEVEL_PROVIDER,
2648
- recommendation: {
2649
- action: "audit",
2650
- reason: `No provider has cited "${gap.query}" for ${gap.streak} consecutive runs. Audit content and schema for this topic.`
2651
- },
2652
- createdAt: now
2653
- });
2654
- }
2655
- for (const cg of input.competitorGains) {
2656
- insights2.push({
2657
- id: id(),
2658
- type: "competitor-gained",
2659
- severity: "medium",
2660
- title: `${cg.competitorDomain} appeared on "${cg.query}"`,
2661
- query: cg.query,
2662
- provider: QUERY_LEVEL_PROVIDER,
2663
- cause: {
2664
- cause: "competitor_gain",
2665
- competitorDomain: cg.competitorDomain,
2666
- details: `Tracked competitor ${cg.competitorDomain} just got cited on "${cg.query}".`
2667
- },
2668
- recommendation: {
2669
- action: "audit",
2670
- reason: `Investigate ${cg.competitorDomain}'s content for "${cg.query}" \u2014 they just earned a citation here.`
2671
- },
2672
- createdAt: now
2673
- });
2674
- }
2675
- for (const cl of input.competitorLosses) {
2676
- insights2.push({
2677
- id: id(),
2678
- type: "competitor-lost",
2679
- severity: "low",
2680
- title: `${cl.competitorDomain} dropped from "${cl.query}"`,
2681
- query: cl.query,
2682
- provider: QUERY_LEVEL_PROVIDER,
2683
- cause: {
2684
- cause: "competitor_loss",
2685
- competitorDomain: cl.competitorDomain,
2686
- details: `Tracked competitor ${cl.competitorDomain} lost their citation on "${cl.query}".`
2687
- },
2688
- recommendation: {
2689
- action: "monitor",
2690
- reason: `Opportunity: ${cl.competitorDomain} just lost "${cl.query}". Tighten your own coverage to fill the gap.`
2691
- },
2692
- createdAt: now
2693
- });
2694
- }
2695
- return insights2;
2696
- }
2697
-
2698
- // ../intelligence/src/first-citations.ts
2699
- function detectFirstCitations(currentRun, previousRun) {
2700
- const previousCitedQueries = /* @__PURE__ */ new Set();
2701
- for (const snap of previousRun.snapshots) {
2702
- if (snap.cited) previousCitedQueries.add(snap.query);
2703
- }
2704
- const result = [];
2705
- const seen = /* @__PURE__ */ new Set();
2706
- for (const snap of currentRun.snapshots) {
2707
- if (!snap.cited) continue;
2708
- if (previousCitedQueries.has(snap.query)) continue;
2709
- const key = `${snap.query}:${snap.provider}`;
2710
- if (seen.has(key)) continue;
2711
- seen.add(key);
2712
- result.push({
2713
- query: snap.query,
2714
- provider: snap.provider,
2715
- citationUrl: snap.citationUrl,
2716
- position: snap.position,
2717
- runId: currentRun.runId
2718
- });
2719
- }
2720
- return result;
2721
- }
2722
-
2723
- // ../intelligence/src/provider-pickups.ts
2724
- function detectProviderPickups(currentRun, previousRun) {
2725
- const previousCitedQueries = /* @__PURE__ */ new Set();
2726
- const previousCitedPairs = /* @__PURE__ */ new Set();
2727
- for (const snap of previousRun.snapshots) {
2728
- if (!snap.cited) continue;
2729
- previousCitedQueries.add(snap.query);
2730
- previousCitedPairs.add(`${snap.query}:${snap.provider}`);
2731
- }
2732
- const result = [];
2733
- const seen = /* @__PURE__ */ new Set();
2734
- for (const snap of currentRun.snapshots) {
2735
- if (!snap.cited) continue;
2736
- if (!previousCitedQueries.has(snap.query)) continue;
2737
- const key = `${snap.query}:${snap.provider}`;
2738
- if (previousCitedPairs.has(key)) continue;
2739
- if (seen.has(key)) continue;
2740
- seen.add(key);
2741
- result.push({
2742
- query: snap.query,
2743
- provider: snap.provider,
2744
- citationUrl: snap.citationUrl,
2745
- position: snap.position,
2746
- runId: currentRun.runId
2747
- });
2748
- }
2749
- return result;
2750
- }
2751
-
2752
- // ../intelligence/src/persistent-gaps.ts
2753
- var PERSISTENT_GAP_THRESHOLD = 3;
2754
- function detectPersistentGaps(runs2, threshold = PERSISTENT_GAP_THRESHOLD) {
2755
- if (runs2.length < threshold) return [];
2756
- const queries2 = /* @__PURE__ */ new Set();
2757
- for (const run of runs2) {
2758
- for (const snap of run.snapshots) {
2759
- if (snap.query) queries2.add(snap.query);
2760
- }
2761
- }
2762
- const result = [];
2763
- for (const query of queries2) {
2764
- let streak = 0;
2765
- for (let i = runs2.length - 1; i >= 0; i--) {
2766
- const run = runs2[i];
2767
- const snaps = run.snapshots.filter((s) => s.query === query);
2768
- if (snaps.length === 0) break;
2769
- const anyCited = snaps.some((s) => s.cited);
2770
- if (anyCited) break;
2771
- streak++;
2772
- }
2773
- if (streak >= threshold) {
2774
- result.push({ query, streak, threshold });
2775
- }
2776
- }
2777
- return result;
2778
- }
2779
-
2780
- // ../intelligence/src/competitor-changes.ts
2781
- function buildCompetitorQueryMap(run, tracked) {
2782
- const result = /* @__PURE__ */ new Map();
2783
- for (const snap of run.snapshots) {
2784
- if (!snap.query || !snap.competitorDomains || snap.competitorDomains.length === 0) continue;
2785
- for (const domain of snap.competitorDomains) {
2786
- if (!tracked.has(domain)) continue;
2787
- const existing = result.get(domain) ?? /* @__PURE__ */ new Set();
2788
- existing.add(snap.query);
2789
- result.set(domain, existing);
2790
- }
2791
- }
2792
- return result;
2793
- }
2794
- function detectCompetitorGains(currentRun, previousRun, opts) {
2795
- const tracked = new Set(opts.trackedCompetitors);
2796
- if (tracked.size === 0) return [];
2797
- const currentMap = buildCompetitorQueryMap(currentRun, tracked);
2798
- const previousMap = buildCompetitorQueryMap(previousRun, tracked);
2799
- const result = [];
2800
- for (const competitorDomain of tracked) {
2801
- const currentQs = currentMap.get(competitorDomain) ?? /* @__PURE__ */ new Set();
2802
- const previousQs = previousMap.get(competitorDomain) ?? /* @__PURE__ */ new Set();
2803
- for (const query of currentQs) {
2804
- if (previousQs.has(query)) continue;
2805
- result.push({ query, competitorDomain });
2806
- }
2807
- }
2808
- return result;
2809
- }
2810
- function detectCompetitorLosses(currentRun, previousRun, opts) {
2811
- const tracked = new Set(opts.trackedCompetitors);
2812
- if (tracked.size === 0) return [];
2813
- const currentMap = buildCompetitorQueryMap(currentRun, tracked);
2814
- const previousMap = buildCompetitorQueryMap(previousRun, tracked);
2815
- const result = [];
2816
- for (const competitorDomain of tracked) {
2817
- const currentQs = currentMap.get(competitorDomain) ?? /* @__PURE__ */ new Set();
2818
- const previousQs = previousMap.get(competitorDomain) ?? /* @__PURE__ */ new Set();
2819
- for (const query of previousQs) {
2820
- if (currentQs.has(query)) continue;
2821
- result.push({ query, competitorDomain });
2822
- }
2823
- }
2824
- return result;
2825
- }
2826
-
2827
- // ../intelligence/src/analyzer.ts
2828
- function analyzeRuns(currentRun, previousRun, opts = {}) {
2829
- const trackedCompetitors = opts.trackedCompetitors ?? [];
2830
- const history = opts.history ?? [];
2831
- const persistentGapThreshold = opts.persistentGapThreshold ?? PERSISTENT_GAP_THRESHOLD;
2832
- const regressions = detectRegressions(currentRun, previousRun);
2833
- const gains = detectGains(currentRun, previousRun);
2834
- const firstCitations = detectFirstCitations(currentRun, previousRun);
2835
- const providerPickups = detectProviderPickups(currentRun, previousRun);
2836
- const competitorGains = detectCompetitorGains(currentRun, previousRun, { trackedCompetitors });
2837
- const competitorLosses = detectCompetitorLosses(currentRun, previousRun, { trackedCompetitors });
2838
- const persistentGaps = history.length >= persistentGapThreshold ? detectPersistentGaps(history, persistentGapThreshold) : [];
2839
- const health = computeHealth(currentRun);
2840
- const trend = history.length > 0 ? computeHealthTrend(history) : void 0;
2841
- const causes = /* @__PURE__ */ new Map();
2842
- for (const reg of regressions) {
2843
- const cause = analyzeCause(reg, currentRun.snapshots);
2844
- causes.set(`${reg.query}:${reg.provider}`, cause);
2845
- }
2846
- const insights2 = generateInsights({
2847
- regressions,
2848
- gains,
2849
- firstCitations,
2850
- providerPickups,
2851
- persistentGaps,
2852
- competitorGains,
2853
- competitorLosses,
2854
- health,
2855
- causes
2856
- });
2857
- return {
2858
- regressions,
2859
- gains,
2860
- firstCitations,
2861
- providerPickups,
2862
- persistentGaps,
2863
- competitorGains,
2864
- competitorLosses,
2865
- health,
2866
- trend,
2867
- insights: insights2
2868
- };
2869
- }
2870
-
2871
- // ../intelligence/src/query-shape.ts
2872
- var TRANSACTIONAL = /\b(?:buy|price|pricing|cost|cheap|discount|coupon|deal|sale|trial|plan)\b/i;
2873
- var NAVIGATIONAL = /\b(?:login|sign[- ]?in|contact|support|help|download|app|homepage)\b|\.(?:com|io|net|org|app|ai)\b/i;
2874
- function isBlogShapedQuery(query) {
2875
- const trimmed = query.trim();
2876
- if (!trimmed) return false;
2877
- if (TRANSACTIONAL.test(trimmed)) return false;
2878
- if (NAVIGATIONAL.test(trimmed)) return false;
2879
- return true;
2880
- }
2881
-
2882
- // ../intelligence/src/site-inventory.ts
2883
- var BLOG_SHAPED_PATH_PREFIXES = [
2884
- "/blog/",
2885
- "/posts/",
2886
- "/articles/",
2887
- "/guides/",
2888
- "/learn/",
2889
- "/resources/",
2890
- "/glossary/"
2891
- ];
2892
- function buildInventory(input) {
2893
- const map = /* @__PURE__ */ new Map();
2894
- const addPage = (rawUrl, source) => {
2895
- const path = extractPath(rawUrl);
2896
- if (!path) return;
2897
- if (!isBlogShaped(path)) return;
2898
- let sources = map.get(path);
2899
- if (!sources) {
2900
- sources = /* @__PURE__ */ new Set();
2901
- map.set(path, sources);
2902
- }
2903
- sources.add(source);
2904
- };
2905
- for (const url of input.gscPages) addPage(url, "gsc");
2906
- for (const url of input.ga4LandingPages) addPage(url, "ga4");
2907
- for (const url of input.sitemapUrls) addPage(url, "sitemap");
2908
- for (const url of input.wpPosts) addPage(url, "wp");
2909
- return Array.from(map.entries()).map(([url, sources]) => ({
2910
- url,
2911
- sources: Array.from(sources)
2912
- }));
2913
- }
2914
- function extractPath(url) {
2915
- const trimmed = url.trim();
2916
- if (!trimmed) return "";
2917
- let path;
2918
- try {
2919
- path = new URL(trimmed).pathname;
2920
- } catch {
2921
- path = trimmed;
2922
- }
2923
- const stripped = path.replace(/\/+$/, "");
2924
- return stripped || "/";
2925
- }
2926
- function isBlogShaped(path) {
2927
- return BLOG_SHAPED_PATH_PREFIXES.some((prefix) => path.startsWith(prefix));
2928
- }
2929
-
2930
- // ../intelligence/src/content-classifier.ts
2931
- var SEO_STRONG_THRESHOLD = 10;
2932
- var SEO_WEAK_THRESHOLD = 30;
2933
- function isHomepageOnly(url) {
2934
- if (url === "/" || url === "") return true;
2935
- const stripped = url.split("?")[0].replace(/\/+$/, "");
2936
- return stripped === "" || stripped === "/";
2937
- }
2938
- function classifyContentAction(input) {
2939
- const { ourPage, ourPageInGroundingSources, ourPageHasSchema } = input;
2940
- if (!ourPage) return "create";
2941
- if (isHomepageOnly(ourPage.url)) return "create";
2942
- if (ourPageInGroundingSources) {
2943
- if (ourPageHasSchema === false) return "add-schema";
2944
- return null;
2945
- }
2946
- if (ourPage.position <= SEO_STRONG_THRESHOLD) return "refresh";
2947
- if (ourPage.position <= SEO_WEAK_THRESHOLD) return "expand";
2948
- return "create";
2949
- }
2950
-
2951
- // ../intelligence/src/content-scorer.ts
2952
- var SEVERITY_BY_ACTION = {
2953
- create: 1,
2954
- "add-schema": 0.7,
2955
- expand: 0.6,
2956
- refresh: 0.4
2957
- };
2958
- function scoreContentTarget(input) {
2959
- const demand = computeDemandComponent(input.gscImpressions, input.aiReferralFactor);
2960
- const competitor = computeCompetitorComponent(
2961
- input.competitorCount,
2962
- input.recentMissRate,
2963
- input.citationCount
2964
- );
2965
- const absence = clamp01(1 - input.ourCitedRate);
2966
- const gapSeverity = input.action ? SEVERITY_BY_ACTION[input.action] : 0;
2967
- const score = (demand + competitor) * absence * gapSeverity;
2968
- return {
2969
- score,
2970
- scoreBreakdown: { demand, competitor, absence, gapSeverity },
2971
- drivers: buildDrivers(input),
2972
- demandSource: classifyDemandSource(input.gscImpressions, input.competitorCount)
2973
- };
2974
- }
2975
- function computeDemandComponent(gscImpressions, aiReferralFactor) {
2976
- const logImpressions = Math.log(Math.max(gscImpressions, 0) + 1);
2977
- const aiBoost = 1 + Math.max(aiReferralFactor, 0);
2978
- return logImpressions * aiBoost;
2979
- }
2980
- function computeCompetitorComponent(competitorCount, recentMissRate, citationCount) {
2981
- if (competitorCount <= 0) return 0;
2982
- const logCompetitors = Math.log(competitorCount + 1);
2983
- return logCompetitors * clamp01(recentMissRate) * Math.max(citationCount, 0);
2984
- }
2985
- function classifyDemandSource(gscImpressions, competitorCount) {
2986
- const hasGsc = gscImpressions > 0;
2987
- const hasCompetitor = competitorCount > 0;
2988
- if (hasGsc && hasCompetitor) return "both";
2989
- if (hasCompetitor) return "competitor-evidence";
2990
- return "gsc";
2991
- }
2992
- function clamp01(value) {
2993
- if (value < 0) return 0;
2994
- if (value > 1) return 1;
2995
- return value;
2996
- }
2997
- function buildDrivers(input) {
2998
- const drivers = [];
2999
- if (input.competitorCount > 0) {
3000
- const noun = input.competitorCount === 1 ? "competitor" : "competitors";
3001
- drivers.push(`${input.competitorCount} ${noun} cited`);
3002
- }
3003
- if (input.gscImpressions > 0) {
3004
- drivers.push(`${formatImpressions(input.gscImpressions)} GSC impressions`);
3005
- }
3006
- if (input.recentMissRate >= 0.5 && input.competitorCount > 0) {
3007
- const pct = Math.round(clamp01(input.recentMissRate) * 100);
3008
- drivers.push(`missed in ${pct}% of recent runs`);
3009
- }
3010
- if (input.action === "create" && input.position === null) {
3011
- drivers.push("no existing page");
3012
- }
3013
- if (input.position !== null) {
3014
- const positionDisplay = Math.round(input.position);
3015
- if (input.position > 30) {
3016
- drivers.push(`page ranks #${positionDisplay} (effectively invisible)`);
3017
- } else if (input.position > 10) {
3018
- drivers.push(`page ranks #${positionDisplay}`);
3019
- }
3020
- }
3021
- if (input.action === "add-schema") {
3022
- drivers.push("cited by LLMs but lacks structured data");
3023
- }
3024
- return drivers;
3025
- }
3026
- function formatImpressions(impressions) {
3027
- if (impressions >= 1e3) {
3028
- return `${Math.round(impressions / 100) / 10}k`;
3029
- }
3030
- return String(impressions);
3031
- }
3032
-
3033
- // ../intelligence/src/content-confidence.ts
3034
- var GSC_DENSE_IMPRESSIONS_THRESHOLD = 100;
3035
- var RUN_HISTORY_HIGH_CONFIDENCE_THRESHOLD = 3;
3036
- function calculateActionConfidence(input) {
3037
- const gscDense = input.hasGsc && input.gscImpressions >= GSC_DENSE_IMPRESSIONS_THRESHOLD;
3038
- const historyDeep = input.runsOfHistory >= RUN_HISTORY_HIGH_CONFIDENCE_THRESHOLD;
3039
- if (gscDense && historyDeep) return "high";
3040
- if (!input.hasGsc && !input.hasInventoryMatch) {
3041
- return "low";
3042
- }
3043
- return "medium";
3044
- }
3045
-
3046
- // ../intelligence/src/content-targets.ts
3047
- function buildContentTargetRows(input) {
3048
- const rows = [];
3049
- for (const cq of input.candidateQueries) {
3050
- const ourPage = resolveOurPage(cq, input.inventory);
3051
- const ourPageInGroundingSources = cq.ourCitedInLatestRun;
3052
- const ourPageHasSchema = ourPage ? input.wpSchemaAudit.get(ourPage.url) ?? null : null;
3053
- const action = classifyContentAction({
3054
- ourPage,
3055
- ourPageInGroundingSources,
3056
- ourPageHasSchema
3057
- });
3058
- if (!action) continue;
3059
- const hasGsc = cq.gscImpressions > 0;
3060
- const hasCompetitor = cq.competitorDomains.length > 0;
3061
- if (!hasGsc && !hasCompetitor && !cq.ourCitedInLatestRun) continue;
3062
- const aiReferralFactor = computeAiReferralFactor(
3063
- input.totalAiReferralSessions,
3064
- cq.competitorCitationCount
3065
- );
3066
- const scoring = scoreContentTarget({
3067
- gscImpressions: cq.gscImpressions,
3068
- aiReferralFactor,
3069
- competitorCount: cq.competitorDomains.length,
3070
- recentMissRate: cq.recentMissRate,
3071
- citationCount: cq.competitorCitationCount,
3072
- ourCitedRate: cq.ourCitedRate,
3073
- action,
3074
- position: ourPage?.position ?? null
3075
- });
3076
- const actionConfidence = calculateActionConfidence({
3077
- hasGsc: cq.gscPage !== null,
3078
- gscImpressions: cq.gscImpressions,
3079
- runsOfHistory: cq.runsOfHistory,
3080
- hasCompetitorEvidence: cq.competitorDomains.length > 0,
3081
- hasInventoryMatch: ourPage?.source === "inventory"
3082
- });
3083
- const targetRef = computeTargetRef({
3084
- projectId: input.projectId,
3085
- query: cq.query,
3086
- action
3087
- });
3088
- const winningCompetitor = pickTopCompetitor(cq.competitorGroundingUrls);
3089
- const ourBestPage = ourPage ? {
3090
- url: ourPage.url,
3091
- gscImpressions: cq.gscImpressions,
3092
- gscClicks: cq.gscClicks,
3093
- gscAvgPosition: cq.gscPosition,
3094
- organicSessions: input.gaTrafficByPage.get(ourPage.url) ?? 0
3095
- } : null;
3096
- rows.push({
3097
- targetRef,
3098
- query: cq.query,
3099
- action,
3100
- ourBestPage,
3101
- winningCompetitor,
3102
- score: scoring.score,
3103
- scoreBreakdown: scoring.scoreBreakdown,
3104
- drivers: scoring.drivers,
3105
- demandSource: scoring.demandSource,
3106
- actionConfidence,
3107
- existingAction: input.inProgressActions.get(targetRef) ?? null
3108
- });
3109
- }
3110
- return dedupeByIntent(
3111
- rows.sort((a, b) => b.score - a.score),
3112
- input.queryIntentModifiers ?? []
3113
- );
3114
- }
3115
- function buildContentSourceRows(input) {
3116
- return input.candidateQueries.map((cq) => ({
3117
- query: cq.query,
3118
- groundingSources: [
3119
- ...cq.ourGroundingUrls.map((g) => ({
3120
- uri: g.uri,
3121
- title: g.title,
3122
- domain: g.domain,
3123
- isOurDomain: true,
3124
- isCompetitor: false,
3125
- citationCount: g.citationCount,
3126
- providers: g.providers
3127
- })),
3128
- ...cq.competitorGroundingUrls.map((g) => ({
3129
- uri: g.uri,
3130
- title: g.title,
3131
- domain: g.domain,
3132
- isOurDomain: false,
3133
- isCompetitor: true,
3134
- citationCount: g.citationCount,
3135
- providers: g.providers
3136
- }))
3137
- ]
3138
- }));
3139
- }
3140
- function buildContentGapRows(input) {
3141
- const gaps = [];
3142
- for (const cq of input.candidateQueries) {
3143
- if (cq.competitorDomains.length === 0) continue;
3144
- if (cq.ourCitedRate >= 1) continue;
3145
- gaps.push({
3146
- query: cq.query,
3147
- competitorDomains: cq.competitorDomains,
3148
- competitorCount: cq.competitorDomains.length,
3149
- missRate: clamp012(cq.recentMissRate),
3150
- lastSeenInRunId: input.latestRunId
3151
- });
3152
- }
3153
- return gaps.sort((a, b) => {
3154
- if (b.missRate !== a.missRate) return b.missRate - a.missRate;
3155
- return b.competitorCount - a.competitorCount;
3156
- });
3157
- }
3158
- function resolveOurPage(cq, inventory) {
3159
- if (cq.gscPage && cq.gscPosition !== null) {
3160
- return { url: cq.gscPage, position: cq.gscPosition, source: "gsc" };
3161
- }
3162
- for (const page of inventory) {
3163
- if (slugMatchesQuery(page.url, cq.query)) {
3164
- return { url: page.url, position: 100, source: "inventory" };
3165
- }
3166
- }
3167
- return null;
3168
- }
3169
- function slugMatchesQuery(url, query) {
3170
- const slug = url.toLowerCase();
3171
- const queryAsSlug = query.toLowerCase().trim().replace(/\s+/g, "-");
3172
- if (slug.includes(queryAsSlug)) return true;
3173
- const queryTokens = query.toLowerCase().split(/\s+/).filter((t) => t.length > 2);
3174
- const slugTokens = new Set(slug.split(/[/\s\-_.]+/));
3175
- const overlap = queryTokens.filter((t) => slugTokens.has(t)).length;
3176
- return overlap >= 2;
3177
- }
3178
- function computeAiReferralFactor(totalAiReferralSessions, competitorCount) {
3179
- if (totalAiReferralSessions <= 0) return 0;
3180
- const baseline = Math.min(totalAiReferralSessions / 1e3, 0.5);
3181
- const competitorBoost = competitorCount > 0 ? 0.1 : 0;
3182
- return Math.min(baseline + competitorBoost, 1);
3183
- }
3184
- var QUERY_INTENT_STOPWORDS = /* @__PURE__ */ new Set([
3185
- "a",
3186
- "an",
3187
- "and",
3188
- "at",
3189
- "by",
3190
- "for",
3191
- "from",
3192
- "in",
3193
- "near",
3194
- "of",
3195
- "on",
3196
- "or",
3197
- "the",
3198
- "to"
3199
- ]);
3200
- function dedupeByIntent(rows, modifiers) {
3201
- if (rows.length <= 1 || modifiers.length === 0) return rows;
3202
- const seen = /* @__PURE__ */ new Set();
3203
- const result = [];
3204
- const modifierTokens = new Set(
3205
- modifiers.flatMap(tokenizeQuery).map(normalizeToken).filter(Boolean)
3206
- );
3207
- for (const row of rows) {
3208
- const key = intentKey(row.query, modifierTokens);
3209
- if (!key || seen.has(key)) continue;
3210
- seen.add(key);
3211
- result.push(row);
3212
- }
3213
- return result;
3214
- }
3215
- function intentKey(query, modifierTokens) {
3216
- const tokens = tokenizeQuery(query).map(normalizeToken).filter(Boolean).filter((token) => !QUERY_INTENT_STOPWORDS.has(token)).filter((token) => !modifierTokens.has(token));
3217
- return [...new Set(tokens)].sort().join(" ");
3218
- }
3219
- function tokenizeQuery(value) {
3220
- return value.toLowerCase().match(/[a-z0-9]+/g) ?? [];
3221
- }
3222
- function normalizeToken(token) {
3223
- if (token.length > 4 && token.endsWith("ies")) return `${token.slice(0, -3)}y`;
3224
- if (token.length > 4 && token.endsWith("s") && !token.endsWith("ss")) return token.slice(0, -1);
3225
- return token;
3226
- }
3227
- function pickTopCompetitor(competitors2) {
3228
- if (competitors2.length === 0) return null;
3229
- const top = [...competitors2].sort((a, b) => b.citationCount - a.citationCount)[0];
3230
- return {
3231
- domain: top.domain,
3232
- url: top.uri,
3233
- title: top.title,
3234
- citationCount: top.citationCount
3235
- };
3236
- }
3237
- function computeTargetRef(input) {
3238
- const key = [input.projectId, input.query, input.action].join("|");
3239
- let hash = 0;
3240
- for (let i = 0; i < key.length; i++) {
3241
- hash = (hash << 5) - hash + key.charCodeAt(i) | 0;
3242
- }
3243
- return `tgt_${(hash >>> 0).toString(36)}`;
3244
- }
3245
- function clamp012(value) {
3246
- if (value < 0) return 0;
3247
- if (value > 1) return 1;
3248
- return value;
3249
- }
3250
-
3251
- // ../intelligence/src/next-steps.ts
3252
- var TOP_N = 5;
3253
- var IMMEDIATE_HORIZON = 3;
3254
- var ACTION_TITLE = {
3255
- [ContentActions.create]: (q) => `Create a page targeting "${q}"`,
3256
- [ContentActions.refresh]: (q) => `Refresh the page targeting "${q}"`,
3257
- [ContentActions.expand]: (q) => `Expand coverage of "${q}"`,
3258
- [ContentActions["add-schema"]]: (q) => `Add structured data to the page targeting "${q}"`
3259
- };
3260
- function mapOpportunitiesToNextSteps(opportunities, existing) {
3261
- if (existing.length > 0) return existing;
3262
- if (opportunities.length === 0) return [];
3263
- return opportunities.slice(0, TOP_N).map((opp, idx) => ({
3264
- horizon: idx < IMMEDIATE_HORIZON ? "immediate" : "short-term",
3265
- title: ACTION_TITLE[opp.action](opp.query),
3266
- rationale: `Score ${Math.round(opp.score)} \xB7 demand ${opp.demandSource} \xB7 ${opp.actionConfidence} confidence.`
3267
- }));
3268
- }
3269
-
3270
- // ../intelligence/src/insight-severity.ts
3271
- var SEVERITY_THRESHOLDS = {
3272
- highTrafficImpressions: 100,
3273
- recurrenceCount: 2,
3274
- mediumTrafficImpressions: 10
3275
- };
3276
- function classifyRegressionSeverity(signals) {
3277
- const { gscImpressions, recurrenceCount } = signals;
3278
- if (gscImpressions === void 0 && recurrenceCount === void 0) return "high";
3279
- const isHighTraffic = gscImpressions !== void 0 && gscImpressions >= SEVERITY_THRESHOLDS.highTrafficImpressions;
3280
- const isRecurring = recurrenceCount !== void 0 && recurrenceCount >= SEVERITY_THRESHOLDS.recurrenceCount;
3281
- const isModerateTraffic = gscImpressions !== void 0 && gscImpressions >= SEVERITY_THRESHOLDS.mediumTrafficImpressions;
3282
- if (isHighTraffic && isRecurring) return "critical";
3283
- if (isHighTraffic || isRecurring) return "high";
3284
- if (isModerateTraffic) return "medium";
3285
- return "low";
3286
- }
3287
-
3288
- // ../intelligence/src/insight-grouping.ts
3289
- function groupInsights(insights2, keyFn = (i) => JSON.stringify([i.query, i.provider, i.type])) {
3290
- const order = [];
3291
- const buckets = /* @__PURE__ */ new Map();
3292
- for (const i of insights2) {
3293
- const key = keyFn(i);
3294
- const bucket = buckets.get(key);
3295
- if (bucket) {
3296
- bucket.push(i);
3297
- } else {
3298
- buckets.set(key, [i]);
3299
- order.push(key);
3300
- }
3301
- }
3302
- return order.map((key) => {
3303
- const sorted = [...buckets.get(key)].sort((a, b) => a.createdAt.localeCompare(b.createdAt));
3304
- const representative = sorted[sorted.length - 1];
3305
- return {
3306
- representative,
3307
- count: sorted.length,
3308
- instances: sorted,
3309
- latest: representative.createdAt
3310
- };
3311
- });
3312
- }
3313
-
3314
- // ../intelligence/src/query-categorize.ts
3315
- var TRANSACTIONAL_RE = /\b(?:buy|price|pricing|cost|hire|near me|services?|agency|consultant|company)\b/i;
3316
- var INFORMATIONAL_RE = /\b(?:what|how|why|when|guide|tutorial|vs|versus|alternatives?|examples?|definition)\b/i;
3317
- var MIN_BRAND_TOKEN_LENGTH = 3;
3318
- function compact(value) {
3319
- return value.toLowerCase().replace(/[^a-z0-9]/g, "");
3320
- }
3321
- function buildBrandTokens(canonicalDomain, brandNames = []) {
3322
- const seen = /* @__PURE__ */ new Set();
3323
- const stem = canonicalDomain.toLowerCase().replace(/\.[a-z]{2,}$/, "");
3324
- const stemCompact = compact(stem);
3325
- if (stemCompact.length >= MIN_BRAND_TOKEN_LENGTH) seen.add(stemCompact);
3326
- for (const name of brandNames) {
3327
- if (!name) continue;
3328
- const nameCompact = compact(name);
3329
- if (nameCompact.length >= MIN_BRAND_TOKEN_LENGTH) seen.add(nameCompact);
3330
- }
3331
- return [...seen];
3332
- }
3333
- function categorizeQueryByIntent(query, brandTokens) {
3334
- const compactQuery = compact(query);
3335
- if (brandTokens.length > 0 && brandTokens.some((t) => compactQuery.includes(t))) {
3336
- return "brand";
3337
- }
3338
- if (TRANSACTIONAL_RE.test(query)) return "lead-gen";
3339
- if (INFORMATIONAL_RE.test(query)) return "industry";
3340
- return "other";
3341
- }
3342
-
3343
- // ../intelligence/src/trend-stability.ts
3344
- var MIN_TREND_POINTS = 4;
3345
- function isTrendBaseline(points) {
3346
- return points.length < MIN_TREND_POINTS;
3347
- }
3348
-
3349
- // ../intelligence/src/citation-scorecard.ts
3350
- function buildCitationScorecard(snapshots, queryLookup) {
3351
- if (snapshots.length === 0) {
3352
- return { queries: [], providers: [], matrix: [], providerRates: [] };
3353
- }
3354
- const querySet = /* @__PURE__ */ new Set();
3355
- const providerSet = /* @__PURE__ */ new Set();
3356
- for (const snap of snapshots) {
3357
- const q = queryLookup.byId.get(snap.queryId);
3358
- if (!q) continue;
3359
- querySet.add(q);
3360
- providerSet.add(snap.provider);
3361
- }
3362
- const queryList = [...querySet].sort();
3363
- const providerList = [...providerSet].sort();
3364
- const matrix = queryList.map(
3365
- () => providerList.map(() => null)
3366
- );
3367
- const providerCounts = /* @__PURE__ */ new Map();
3368
- for (const snap of snapshots) {
3369
- const q = queryLookup.byId.get(snap.queryId);
3370
- if (!q) continue;
3371
- const qi = queryList.indexOf(q);
3372
- const pi = providerList.indexOf(snap.provider);
3373
- if (qi < 0 || pi < 0) continue;
3374
- matrix[qi][pi] = {
3375
- citationState: snap.citationState === CitationStates.cited ? "cited" : "not-cited",
3376
- answerMentioned: snap.answerMentioned ?? null,
3377
- model: snap.model
3378
- };
3379
- const counts = providerCounts.get(snap.provider) ?? { cited: 0, total: 0 };
3380
- counts.total++;
3381
- if (snap.citationState === CitationStates.cited) counts.cited++;
3382
- providerCounts.set(snap.provider, counts);
3383
- }
3384
- const providerRates = providerList.map((provider) => {
3385
- const counts = providerCounts.get(provider) ?? { cited: 0, total: 0 };
3386
- const citationRate = counts.total > 0 ? Math.round(counts.cited / counts.total * 100) : 0;
3387
- return {
3388
- provider,
3389
- citedCount: counts.cited,
3390
- totalCount: counts.total,
3391
- citationRate
3392
- };
3393
- });
3394
- return { queries: queryList, providers: providerList, matrix, providerRates };
3395
- }
3396
-
3397
- // ../intelligence/src/domain-matching.ts
3398
- function citedDomainBelongsToProject(citedDomain, projectDomains) {
3399
- const candidate = normalizeProjectDomain(citedDomain);
3400
- for (const domain of projectDomains) {
3401
- const normalized = normalizeProjectDomain(domain);
3402
- if (candidate === normalized || candidate.endsWith(`.${normalized}`)) return true;
3403
- }
3404
- return false;
3405
- }
3406
-
3407
- // ../intelligence/src/competitor-landscape.ts
3408
- function buildCompetitorLandscape(snapshots, competitorDomains, projectDomains, queryLookup) {
3409
- let projectCitationCount = 0;
3410
- const competitorMap = /* @__PURE__ */ new Map();
3411
- for (const c of competitorDomains) {
3412
- competitorMap.set(c, { count: 0, queries: /* @__PURE__ */ new Set(), pages: /* @__PURE__ */ new Map() });
3413
- }
3414
- for (const snap of snapshots) {
3415
- const q = queryLookup.byId.get(snap.queryId);
3416
- const allDomains = [...snap.citedDomains, ...snap.competitorOverlap];
3417
- if (allDomains.some((d) => citedDomainBelongsToProject(d, projectDomains))) {
3418
- projectCitationCount++;
3419
- }
3420
- for (const competitor of competitorDomains) {
3421
- if (allDomains.some((d) => citedDomainBelongsToProject(d, [competitor]))) {
3422
- const entry = competitorMap.get(competitor);
3423
- entry.count++;
3424
- if (q) entry.queries.add(q);
3425
- }
3426
- const competitorNorm = normalizeUrlDomain(competitor);
3427
- for (const gs of snap.groundingSources) {
3428
- const host = normalizeUrlDomain(extractHostFromUri(gs.uri));
3429
- if (!host) continue;
3430
- if (host === competitorNorm || host.endsWith(`.${competitorNorm}`)) {
3431
- const entry = competitorMap.get(competitor);
3432
- const pageQueries = entry.pages.get(gs.uri) ?? /* @__PURE__ */ new Set();
3433
- if (q) pageQueries.add(q);
3434
- entry.pages.set(gs.uri, pageQueries);
3435
- }
3436
- }
3437
- }
3438
- }
3439
- const totalCitedSlots = projectCitationCount + [...competitorMap.values()].reduce((sum, v) => sum + v.count, 0);
3440
- const competitorRows = [...competitorMap.entries()].map(([domain, data]) => {
3441
- const total = snapshots.length;
3442
- const ratio = total > 0 ? data.count / total : 0;
3443
- let pressureLabel = "None";
3444
- if (data.count > 0) {
3445
- if (ratio >= 0.5) pressureLabel = "High";
3446
- else if (ratio >= 0.2) pressureLabel = "Moderate";
3447
- else pressureLabel = "Low";
3448
- }
3449
- const sharePct = totalCitedSlots > 0 ? Math.round(data.count / totalCitedSlots * 100) : 0;
3450
- const theirCitedPages = [...data.pages.entries()].map(([url, qs]) => ({ url, citedFor: [...qs].sort() })).sort((a, b) => b.citedFor.length - a.citedFor.length);
3451
- return {
3452
- domain,
3453
- citationCount: data.count,
3454
- totalCount: total,
3455
- pressureLabel,
3456
- citedQueries: [...data.queries].sort(),
3457
- sharePct,
3458
- theirCitedPages
3459
- };
3460
- });
3461
- competitorRows.sort((a, b) => b.citationCount - a.citationCount);
3462
- return { projectCitationCount, competitors: competitorRows };
3463
- }
3464
- function normalizeUrlDomain(domain) {
3465
- return domain.toLowerCase().replace(/^https?:\/\//, "").replace(/^www\./, "").replace(/\/$/, "");
3466
- }
3467
- function extractHostFromUri(uri) {
3468
- try {
3469
- return new URL(uri).hostname;
3470
- } catch {
3471
- return "";
3472
- }
3473
- }
3474
-
3475
- // ../intelligence/src/mention-landscape.ts
3476
- function buildMentionLandscape(snapshots, competitorDomains, projectBrandNames, projectDomains, queryLookup) {
3477
- let projectMentionCount = 0;
3478
- let totalAnswerSnapshots = 0;
3479
- const competitorMap = /* @__PURE__ */ new Map();
3480
- for (const c of competitorDomains) {
3481
- competitorMap.set(c, { count: 0, queries: /* @__PURE__ */ new Set() });
3482
- }
3483
- for (const snap of snapshots) {
3484
- const text2 = snap.answerText;
3485
- if (!text2) continue;
3486
- totalAnswerSnapshots++;
3487
- const q = queryLookup.byId.get(snap.queryId);
3488
- const projectMentioned = snap.answerMentioned ?? determineAnswerMentioned(
3489
- text2,
3490
- [...projectBrandNames],
3491
- [...projectDomains]
3492
- );
3493
- if (projectMentioned) projectMentionCount++;
3494
- for (const competitor of competitorDomains) {
3495
- const brand = brandLabelFromDomain(competitor);
3496
- const mentioned = determineAnswerMentioned(text2, brand ? [brand] : [], [competitor]);
3497
- if (mentioned) {
3498
- const entry = competitorMap.get(competitor);
3499
- entry.count++;
3500
- if (q) entry.queries.add(q);
3501
- }
3502
- }
3503
- }
3504
- const totalMentionedSlots = projectMentionCount + [...competitorMap.values()].reduce((sum, v) => sum + v.count, 0);
3505
- const competitorRows = [...competitorMap.entries()].map(([domain, data]) => {
3506
- const ratio = totalAnswerSnapshots > 0 ? data.count / totalAnswerSnapshots : 0;
3507
- let pressureLabel = "None";
3508
- if (data.count > 0) {
3509
- if (ratio >= 0.5) pressureLabel = "High";
3510
- else if (ratio >= 0.2) pressureLabel = "Moderate";
3511
- else pressureLabel = "Low";
3512
- }
3513
- const sharePct = totalMentionedSlots > 0 ? Math.round(data.count / totalMentionedSlots * 100) : 0;
3514
- return {
3515
- domain,
3516
- mentionCount: data.count,
3517
- totalCount: totalAnswerSnapshots,
3518
- pressureLabel,
3519
- mentionedQueries: [...data.queries].sort(),
3520
- sharePct
3521
- };
3522
- });
3523
- competitorRows.sort((a, b) => b.mentionCount - a.mentionCount);
3524
- return { projectMentionCount, totalAnswerSnapshots, competitors: competitorRows };
3525
- }
3526
-
3527
- // ../intelligence/src/ai-source-origin.ts
3528
- var DEFAULT_TOP_SOURCE_DOMAINS_LIMIT = 20;
3529
- function buildAiSourceOrigin(snapshots, projectDomains, competitorDomains, topDomainsLimit = DEFAULT_TOP_SOURCE_DOMAINS_LIMIT) {
3530
- const categoryCounts = /* @__PURE__ */ new Map();
3531
- const domainCounts = /* @__PURE__ */ new Map();
3532
- let totalCitations = 0;
3533
- for (const snap of snapshots) {
3534
- for (const raw of snap.citedDomains) {
3535
- if (citedDomainBelongsToProject(raw, projectDomains)) continue;
3536
- const { category, domain } = categorizeSourceWithCompetitors(
3537
- raw,
3538
- competitorDomains,
3539
- citedDomainBelongsToProject
3540
- );
3541
- const bucketLabel = categoryLabel(category);
3542
- const cat = categoryCounts.get(category) ?? { label: bucketLabel, count: 0 };
3543
- cat.count++;
3544
- categoryCounts.set(category, cat);
3545
- domainCounts.set(domain, (domainCounts.get(domain) ?? 0) + 1);
3546
- totalCitations++;
3547
- }
3548
- }
3549
- const categories = [...categoryCounts.entries()].map(([category, { label, count }]) => ({
3550
- category,
3551
- label,
3552
- count,
3553
- sharePct: totalCitations > 0 ? Math.round(count / totalCitations * 100) : 0
3554
- })).sort((a, b) => b.count - a.count);
3555
- const topDomains = [...domainCounts.entries()].map(([domain, count]) => ({
3556
- domain,
3557
- count,
3558
- isCompetitor: citedDomainBelongsToProject(domain, competitorDomains)
3559
- })).sort((a, b) => b.count - a.count).slice(0, topDomainsLimit);
3560
- return { categories, topDomains };
3561
- }
3562
-
3563
- // ../intelligence/src/movement-summary.ts
3564
- function buildMovementSummary(currentSnapshots, previousSnapshots, options = {}) {
3565
- if (previousSnapshots.length === 0) {
3566
- const citedIds = collectCitedQueryIds(currentSnapshots);
3567
- const citedCount = citedIds.size;
3568
- const tone2 = citedCount > 0 ? "positive" : "neutral";
3569
- return withQueryLists(
3570
- { gained: citedCount, lost: 0, tone: tone2, hasPreviousRun: false },
3571
- citedIds,
3572
- /* @__PURE__ */ new Set(),
3573
- options.queryLookup
3574
- );
3575
- }
3576
- const latestCited = collectCitedQueryIds(currentSnapshots);
3577
- const previousCited = collectCitedQueryIds(previousSnapshots);
3578
- const gainedIds = /* @__PURE__ */ new Set();
3579
- const lostIds = /* @__PURE__ */ new Set();
3580
- for (const id of latestCited) {
3581
- if (!previousCited.has(id)) gainedIds.add(id);
3582
- }
3583
- for (const id of previousCited) {
3584
- if (!latestCited.has(id)) lostIds.add(id);
3585
- }
3586
- const tone = lostIds.size > gainedIds.size ? "negative" : gainedIds.size > lostIds.size ? "positive" : "neutral";
3587
- return withQueryLists(
3588
- { gained: gainedIds.size, lost: lostIds.size, tone, hasPreviousRun: true },
3589
- gainedIds,
3590
- lostIds,
3591
- options.queryLookup
3592
- );
3593
- }
3594
- function withQueryLists(base, gainedIds, lostIds, lookup) {
3595
- if (!lookup) return base;
3596
- return {
3597
- ...base,
3598
- gainedQueries: resolveQueryTexts(gainedIds, lookup),
3599
- lostQueries: resolveQueryTexts(lostIds, lookup)
3600
- };
3601
- }
3602
- function resolveQueryTexts(ids, lookup) {
3603
- const out = [];
3604
- for (const id of ids) {
3605
- const text2 = lookup.get(id);
3606
- if (text2) out.push(text2);
3607
- }
3608
- return out.sort();
3609
- }
3610
- function collectCitedQueryIds(snapshots) {
3611
- const cited = /* @__PURE__ */ new Set();
3612
- for (const s of snapshots) {
3613
- if (s.citationState === CitationStates.cited && s.queryId) cited.add(s.queryId);
3614
- }
3615
- return cited;
3616
- }
3617
-
3618
- // ../intelligence/src/score-tones.ts
3619
- function scoreTone(score) {
3620
- if (score >= 70) return "positive";
3621
- if (score >= 40) return "caution";
3622
- return "negative";
3623
- }
3624
- function pressureTone(label) {
3625
- if (label === "High") return "negative";
3626
- if (label === "Moderate") return "caution";
3627
- return "neutral";
3628
- }
3629
- function gapTone(gapCount, totalCount) {
3630
- if (gapCount === 0) return "positive";
3631
- const ratio = totalCount > 0 ? gapCount / totalCount : 0;
3632
- if (ratio >= 0.3) return "negative";
3633
- return "caution";
3634
- }
3635
-
3636
- // ../intelligence/src/visibility-score.ts
3637
- function buildVisibilityScore(snapshots, options) {
3638
- const tooltip = "An LLM used a page on your domain as a source for its answer.";
3639
- if (snapshots.length === 0) {
3640
- return {
3641
- label: "Citation Coverage",
3642
- value: "No data",
3643
- delta: "Run a sweep first",
3644
- tone: "neutral",
3645
- description: "No citation data yet. Trigger a run to start tracking.",
3646
- tooltip,
3647
- trend: []
3648
- };
3649
- }
3650
- const queryCited = /* @__PURE__ */ new Map();
3651
- for (const snap of snapshots) {
3652
- if (!queryCited.has(snap.queryId)) queryCited.set(snap.queryId, false);
3653
- if (snap.citationState === CitationStates.cited) queryCited.set(snap.queryId, true);
3654
- }
3655
- const totalCount = queryCited.size;
3656
- const citedCount = [...queryCited.values()].filter(Boolean).length;
3657
- const score = totalCount > 0 ? Math.round(citedCount / totalCount * 100) : 0;
3658
- const runProviders = new Set(snapshots.map((s) => s.provider));
3659
- const runApiProviderCount = options.configuredApiProviders.filter((p) => runProviders.has(p)).length;
3660
- const isPartialProviderRun = options.configuredApiProviders.length > 1 && runApiProviderCount < options.configuredApiProviders.length;
3661
- return {
3662
- label: "Citation Coverage",
3663
- value: `${score}`,
3664
- delta: `${citedCount} of ${totalCount} queries cited`,
3665
- tone: isPartialProviderRun ? "caution" : scoreTone(score),
3666
- description: `${citedCount} of ${totalCount} tracked queries found your domain in at least one AI answer engine.`,
3667
- tooltip,
3668
- trend: [],
3669
- progress: score,
3670
- providerCoverage: isPartialProviderRun ? `${runApiProviderCount} of ${options.configuredApiProviders.length} providers` : void 0
3671
- };
3672
- }
3673
-
3674
- // ../intelligence/src/mention-coverage.ts
3675
- function buildMentionCoverage(snapshots, options) {
3676
- const tooltip = "Your domain or company name was in the answer returned by the LLM.";
3677
- if (snapshots.length === 0) {
3678
- return {
3679
- label: "Mention Coverage",
3680
- value: "No data",
3681
- delta: "Run a sweep first",
3682
- tone: "neutral",
3683
- description: "No mention data yet. Trigger a run to start tracking.",
3684
- tooltip,
3685
- trend: []
3686
- };
3687
- }
3688
- const queryMentioned = /* @__PURE__ */ new Map();
3689
- for (const snap of snapshots) {
3690
- if (!queryMentioned.has(snap.queryId)) queryMentioned.set(snap.queryId, false);
3691
- if (snap.answerMentioned === true) queryMentioned.set(snap.queryId, true);
3692
- }
3693
- const totalCount = queryMentioned.size;
3694
- const mentionedCount = [...queryMentioned.values()].filter(Boolean).length;
3695
- const score = totalCount > 0 ? Math.round(mentionedCount / totalCount * 100) : 0;
3696
- const runProviders = new Set(snapshots.map((s) => s.provider));
3697
- const runApiProviderCount = options.configuredApiProviders.filter((p) => runProviders.has(p)).length;
3698
- const isPartialProviderRun = options.configuredApiProviders.length > 1 && runApiProviderCount < options.configuredApiProviders.length;
3699
- return {
3700
- label: "Mention Coverage",
3701
- value: `${score}`,
3702
- delta: `${mentionedCount} of ${totalCount} queries mentioned`,
3703
- tone: isPartialProviderRun ? "caution" : scoreTone(score),
3704
- description: `${mentionedCount} of ${totalCount} tracked queries had your brand or domain in the AI answer text.`,
3705
- tooltip,
3706
- trend: [],
3707
- progress: score,
3708
- providerCoverage: isPartialProviderRun ? `${runApiProviderCount} of ${options.configuredApiProviders.length} providers` : void 0
3709
- };
3710
- }
3711
-
3712
- // ../intelligence/src/gap-query-score.ts
3713
- function buildGapQueryScore(snapshots) {
3714
- const tooltip = "Tracked queries where a competitor is cited in the latest run but your domain is not.";
3715
- if (snapshots.length === 0) {
3716
- return {
3717
- label: "Gap Queries",
3718
- value: "No data",
3719
- delta: "Run a sweep first",
3720
- tone: "neutral",
3721
- description: "Run a visibility sweep to identify queries where competitors are cited and your domain is not.",
3722
- tooltip,
3723
- trend: []
3724
- };
3725
- }
3726
- const byQuery = /* @__PURE__ */ new Map();
3727
- for (const snap of snapshots) {
3728
- const key = snap.queryId;
3729
- const current = byQuery.get(key) ?? { cited: false, competitorOverlap: /* @__PURE__ */ new Set() };
3730
- if (snap.citationState === CitationStates.cited) current.cited = true;
3731
- for (const domain of snap.competitorOverlap) current.competitorOverlap.add(domain);
3732
- byQuery.set(key, current);
3733
- }
3734
- const totalCount = byQuery.size;
3735
- const gapCount = [...byQuery.values()].filter(
3736
- (entry) => !entry.cited && entry.competitorOverlap.size > 0
3737
- ).length;
3738
- const gapQueryLabel = gapCount === 1 ? "query" : "queries";
3739
- return {
3740
- label: "Citation Gaps",
3741
- value: `${gapCount}`,
3742
- delta: `${gapCount} of ${totalCount} queries at risk`,
3743
- tone: gapTone(gapCount, totalCount),
3744
- description: gapCount > 0 ? `${gapCount} tracked ${gapQueryLabel} currently cite competitors without citing your domain.` : "No competitive citation gaps detected in the latest visibility run.",
3745
- tooltip,
3746
- trend: [],
3747
- progress: totalCount > 0 ? Math.round(gapCount / totalCount * 100) : 0
3748
- };
3749
- }
3750
- function buildMentionGapScore(snapshots) {
3751
- const tooltip = "Tracked queries where a competitor surfaces in the latest run but your brand / domain is not mentioned in the answer text.";
3752
- if (snapshots.length === 0) {
3753
- return {
3754
- label: "Mention Gaps",
3755
- value: "No data",
3756
- delta: "Run a sweep first",
3757
- tone: "neutral",
3758
- description: "Run a visibility sweep to identify queries where competitors are mentioned and your brand is not.",
3759
- tooltip,
3760
- trend: []
3761
- };
3762
- }
3763
- const byQuery = /* @__PURE__ */ new Map();
3764
- for (const snap of snapshots) {
3765
- const key = snap.queryId;
3766
- const current = byQuery.get(key) ?? { mentioned: false, competitorOverlap: /* @__PURE__ */ new Set() };
3767
- if (snap.answerMentioned === true) current.mentioned = true;
3768
- for (const domain of snap.competitorOverlap) current.competitorOverlap.add(domain);
3769
- byQuery.set(key, current);
3770
- }
3771
- const totalCount = byQuery.size;
3772
- const gapCount = [...byQuery.values()].filter(
3773
- (entry) => !entry.mentioned && entry.competitorOverlap.size > 0
3774
- ).length;
3775
- const gapQueryLabel = gapCount === 1 ? "query" : "queries";
3776
- return {
3777
- label: "Mention Gaps",
3778
- value: `${gapCount}`,
3779
- delta: `${gapCount} of ${totalCount} queries at risk`,
3780
- tone: gapTone(gapCount, totalCount),
3781
- description: gapCount > 0 ? `${gapCount} tracked ${gapQueryLabel} mention competitors but never your brand.` : "No competitive mention gaps detected in the latest visibility run.",
3782
- tooltip,
3783
- trend: [],
3784
- progress: totalCount > 0 ? Math.round(gapCount / totalCount * 100) : 0
3785
- };
3786
- }
3787
-
3788
- // ../intelligence/src/competitor-pressure-score.ts
3789
- function buildCompetitorPressureScore(snapshots, competitorDomains, totalTrackedCompetitors) {
3790
- const tooltip = "How often competitor domains appear alongside yours in AI answers. High pressure means competitors are frequently cited for the same queries.";
3791
- const description = totalTrackedCompetitors > 0 ? `${totalTrackedCompetitors} competitor${totalTrackedCompetitors > 1 ? "s" : ""} tracked.` : "No competitors configured.";
3792
- if (snapshots.length === 0 || competitorDomains.length === 0) {
3793
- return {
3794
- label: "Competitor Pressure",
3795
- value: "None",
3796
- delta: "No overlap detected",
3797
- tone: pressureTone("None"),
3798
- description,
3799
- tooltip,
3800
- trend: []
3801
- };
3802
- }
3803
- const competitorSet = new Set(competitorDomains);
3804
- let overlapCount = 0;
3805
- for (const snap of snapshots) {
3806
- if (snap.competitorOverlap.some((d) => competitorSet.has(d))) {
3807
- overlapCount++;
3808
- }
3809
- }
3810
- const ratio = overlapCount / snapshots.length;
3811
- const label = ratio >= 0.5 ? "High" : ratio >= 0.2 ? "Moderate" : overlapCount > 0 ? "Low" : "None";
3812
- return {
3813
- label: "Competitor Pressure",
3814
- value: label,
3815
- delta: overlapCount > 0 ? `${overlapCount} overlapping citations` : "No overlap detected",
3816
- tone: pressureTone(label),
3817
- description,
3818
- tooltip,
3819
- trend: []
3820
- };
3821
- }
3822
- function buildOverviewCompetitors(snapshots, competitors2, queryLookup) {
3823
- const uniqueQueries = /* @__PURE__ */ new Set();
3824
- for (const snap of snapshots) {
3825
- if (snap.queryId) uniqueQueries.add(snap.queryId);
3826
- }
3827
- const renderQuery = (queryId) => queryLookup?.byId.get(queryId) ?? queryId;
3828
- return competitors2.map((competitor, index2) => {
3829
- const citedQuerySet = /* @__PURE__ */ new Set();
3830
- for (const snap of snapshots) {
3831
- if (snap.competitorOverlap.includes(competitor.domain) || snap.citedDomains.includes(competitor.domain)) {
3832
- if (snap.queryId) citedQuerySet.add(snap.queryId);
3833
- }
3834
- }
3835
- const citedQueries = [...citedQuerySet].map(renderQuery).sort();
3836
- const ratio = uniqueQueries.size > 0 ? citedQuerySet.size / uniqueQueries.size : 0;
3837
- const pressureLabel = ratio >= 0.5 ? "High" : ratio >= 0.2 ? "Moderate" : citedQuerySet.size > 0 ? "Low" : "None";
3838
- return {
3839
- id: competitor.id || `comp_${index2}`,
3840
- domain: competitor.domain,
3841
- citationCount: citedQuerySet.size,
3842
- totalQueries: uniqueQueries.size,
3843
- pressureLabel,
3844
- citedQueries
3845
- };
3846
- });
3847
- }
3848
-
3849
- // ../intelligence/src/provider-scores.ts
3850
- function buildProviderScores(snapshots) {
3851
- const modelGroups = /* @__PURE__ */ new Map();
3852
- for (const snap of snapshots) {
3853
- const provider = snap.provider;
3854
- const model = snap.model ?? null;
3855
- const key = `${provider}::${model ?? "unknown"}`;
3856
- const group = modelGroups.get(key) ?? { provider, model, cited: 0, total: 0 };
3857
- group.total++;
3858
- if (snap.citationState === CitationStates.cited) group.cited++;
3859
- modelGroups.set(key, group);
3860
- }
3861
- return [...modelGroups.values()].sort(
3862
- (a, b) => a.provider.localeCompare(b.provider) || (a.model ?? "").localeCompare(b.model ?? "")
3863
- ).map(({ provider, model, cited, total }) => ({
3864
- provider,
3865
- model,
3866
- score: total > 0 ? Math.round(cited / total * 100) : 0,
3867
- cited,
3868
- total
3869
- }));
3870
- }
3871
-
3872
- // ../intelligence/src/run-history.ts
3873
- var DEFAULT_RUN_HISTORY_LIMIT = 12;
3874
- function buildRunHistory(runs2, snapshotsByRunId, limit = DEFAULT_RUN_HISTORY_LIMIT) {
3875
- const recent = [...runs2].sort((a, b) => b.createdAt.localeCompare(a.createdAt)).slice(0, limit).sort((a, b) => a.createdAt.localeCompare(b.createdAt));
3876
- return recent.map((run) => {
3877
- const snapshots = snapshotsByRunId.get(run.id) ?? [];
3878
- const queryCited = /* @__PURE__ */ new Map();
3879
- for (const snap of snapshots) {
3880
- if (!queryCited.has(snap.queryId)) queryCited.set(snap.queryId, false);
3881
- if (snap.citationState === CitationStates.cited) queryCited.set(snap.queryId, true);
3882
- }
3883
- const totalCount = queryCited.size;
3884
- const citedCount = [...queryCited.values()].filter(Boolean).length;
3885
- const citationRate = totalCount > 0 ? Math.round(citedCount / totalCount * 100) : 0;
3886
- return {
3887
- runId: run.id,
3888
- createdAt: run.createdAt,
3889
- citedCount,
3890
- totalCount,
3891
- citationRate,
3892
- status: run.status
3893
- };
3894
- });
3895
- }
3896
-
3897
- // ../intelligence/src/provider-trends.ts
3898
- function buildProviderTrends(runs2, snapshotsByRunId, limit = 12) {
3899
- const recent = [...runs2].sort((a, b) => b.createdAt.localeCompare(a.createdAt)).slice(0, limit).sort((a, b) => a.createdAt.localeCompare(b.createdAt));
3900
- const keys = collectProviderKeys(snapshotsByRunId.values());
3901
- const result = /* @__PURE__ */ new Map();
3902
- for (const key of keys) result.set(key, []);
3903
- for (const run of recent) {
3904
- const snaps = snapshotsByRunId.get(run.id) ?? [];
3905
- const perKey = /* @__PURE__ */ new Map();
3906
- for (const snap of snaps) {
3907
- const key = providerKey(snap.provider, snap.model);
3908
- const queryMap = perKey.get(key) ?? /* @__PURE__ */ new Map();
3909
- if (!queryMap.has(snap.queryId)) queryMap.set(snap.queryId, false);
3910
- if (snap.citationState === CitationStates.cited) queryMap.set(snap.queryId, true);
3911
- perKey.set(key, queryMap);
3912
- }
3913
- for (const key of keys) {
3914
- const queryMap = perKey.get(key);
3915
- const rate = queryMap && queryMap.size > 0 ? Math.round([...queryMap.values()].filter(Boolean).length / queryMap.size * 100) : 0;
3916
- result.get(key).push({ rate, createdAt: run.createdAt });
3917
- }
3918
- }
3919
- return result;
3920
- }
3921
- function providerKey(provider, model) {
3922
- return `${provider}::${model ?? "unknown"}`;
3923
- }
3924
- function collectProviderKeys(perRun) {
3925
- const keys = /* @__PURE__ */ new Set();
3926
- for (const snaps of perRun) {
3927
- for (const snap of snaps) {
3928
- keys.add(providerKey(snap.provider, snap.model));
3929
- }
3930
- }
3931
- return keys;
3932
- }
3933
-
3934
- // ../intelligence/src/mention-share.ts
3935
- function buildMentionShare(snapshots, options) {
3936
- const tooltip = 'When AI answers your tracked queries and names a brand, the % of brand-name-drops that are you vs your tracked competitors. Cleaner than Citation Coverage for "am I winning the conversation".';
3937
- const emptyBreakdown = {
3938
- projectMentionSnapshots: 0,
3939
- competitorMentionSnapshots: 0,
3940
- perCompetitor: [],
3941
- snapshotsWithAnswerText: 0,
3942
- snapshotsTotal: snapshots.length
3943
- };
3944
- if (snapshots.length === 0) {
3945
- return {
3946
- label: "Mention Share",
3947
- value: "No data",
3948
- delta: "Run a sweep first",
3949
- tone: "neutral",
3950
- description: "No mention share data yet. Trigger a run to start tracking.",
3951
- tooltip,
3952
- trend: [],
3953
- breakdown: emptyBreakdown
3954
- };
3955
- }
3956
- if (options.competitors.length === 0) {
3957
- return {
3958
- label: "Mention Share",
3959
- value: "Add competitors",
3960
- delta: "No competitors configured",
3961
- tone: "neutral",
3962
- description: "Mention Share is a head-to-head competitive metric \u2014 add tracked competitors to compare brand mention rates.",
3963
- tooltip,
3964
- trend: [],
3965
- breakdown: emptyBreakdown
3966
- };
3967
- }
3968
- let projectMentionSnapshots = 0;
3969
- let snapshotsWithAnswerText = 0;
3970
- const competitorCounts = /* @__PURE__ */ new Map();
3971
- for (const c of options.competitors) competitorCounts.set(c.domain, 0);
3972
- for (const snap of snapshots) {
3973
- const text2 = snap.answerText ?? "";
3974
- if (text2.length === 0) continue;
3975
- snapshotsWithAnswerText++;
3976
- if (snap.projectMentioned) projectMentionSnapshots++;
3977
- const answerBrandKey = brandKeyFromText(text2);
3978
- for (const competitor of options.competitors) {
3979
- if (competitorMentioned(text2, answerBrandKey, competitor.brandTokens)) {
3980
- competitorCounts.set(competitor.domain, (competitorCounts.get(competitor.domain) ?? 0) + 1);
3981
- }
3982
- }
3983
- }
3984
- const competitorMentionSnapshots = [...competitorCounts.values()].reduce((a, b) => a + b, 0);
3985
- const denom = projectMentionSnapshots + competitorMentionSnapshots;
3986
- const score = denom > 0 ? Math.round(projectMentionSnapshots / denom * 100) : 0;
3987
- const perCompetitor = options.competitors.map((c) => ({
3988
- domain: c.domain,
3989
- mentionSnapshots: competitorCounts.get(c.domain) ?? 0,
3990
- shareOfCompetitiveTotal: competitorMentionSnapshots > 0 ? Math.round((competitorCounts.get(c.domain) ?? 0) / competitorMentionSnapshots * 1e3) / 10 : 0
3991
- })).filter((row) => row.mentionSnapshots > 0).sort((a, b) => b.mentionSnapshots - a.mentionSnapshots);
3992
- const breakdown = {
3993
- projectMentionSnapshots,
3994
- competitorMentionSnapshots,
3995
- perCompetitor,
3996
- snapshotsWithAnswerText,
3997
- snapshotsTotal: snapshots.length
3998
- };
3999
- const description = describe({
4000
- score,
4001
- projectMentionSnapshots,
4002
- competitorMentionSnapshots,
4003
- perCompetitor
4004
- });
4005
- return {
4006
- label: "Mention Share",
4007
- value: denom > 0 ? `${score}` : "0",
4008
- delta: denom > 0 ? `${projectMentionSnapshots} of ${denom} brand mentions` : "No brand mentions in this run",
4009
- tone: denom > 0 ? mentionShareTone(score) : "neutral",
4010
- description,
4011
- tooltip,
4012
- trend: [],
4013
- progress: denom > 0 ? score : 0,
4014
- breakdown
4015
- };
4016
- }
4017
- function mentionShareTone(score) {
4018
- if (score >= 50) return "positive";
4019
- if (score >= 25) return "caution";
4020
- return "negative";
4021
- }
4022
- function competitorMentioned(text2, answerBrandKey, brandTokens) {
4023
- for (const token of brandTokens) {
4024
- if (token.length < 3) continue;
4025
- const escaped = token.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
4026
- if (new RegExp(`\\b${escaped}\\b`, "i").test(text2)) return true;
4027
- const tokenBrandKey = brandKeyFromText(token);
4028
- if (tokenBrandKey.length >= 3 && answerBrandKey.includes(tokenBrandKey)) return true;
4029
- }
4030
- return false;
4031
- }
4032
- function describe(parts) {
4033
- const { score, projectMentionSnapshots, competitorMentionSnapshots, perCompetitor } = parts;
4034
- if (projectMentionSnapshots === 0 && competitorMentionSnapshots === 0) {
4035
- return "No brand mentions detected for you or your tracked competitors in this run.";
4036
- }
4037
- if (competitorMentionSnapshots === 0) {
4038
- return `${projectMentionSnapshots} brand mentions of you, zero competitor mentions \u2014 you own the conversation.`;
4039
- }
4040
- const top = perCompetitor[0];
4041
- if (!top) {
4042
- return `${score}% of brand mentions are you (${projectMentionSnapshots} of ${projectMentionSnapshots + competitorMentionSnapshots}).`;
4043
- }
4044
- return `${score}% of brand mentions are you. Top competitor: ${top.domain} (${top.mentionSnapshots} mentions).`;
4045
- }
4046
-
4047
- // ../intelligence/src/suggested-queries.ts
4048
- var DEFAULT_MIN_IMPRESSIONS = 10;
4049
- var DEFAULT_LIMIT = 10;
4050
- function buildSuggestedQueries(gscRows, options) {
4051
- const minImpressions = options.minImpressions ?? DEFAULT_MIN_IMPRESSIONS;
4052
- const limit = options.limit ?? DEFAULT_LIMIT;
4053
- const trackedSet = new Set(options.trackedQueries.map(normalizeQuery));
4054
- let skippedAlreadyTracked = 0;
4055
- const candidates = [];
4056
- for (const row of gscRows) {
4057
- if (row.impressions < minImpressions) continue;
4058
- const normalized = normalizeQuery(row.query);
4059
- if (normalized.length === 0) continue;
4060
- if (trackedSet.has(normalized)) {
4061
- skippedAlreadyTracked++;
4062
- continue;
4063
- }
4064
- candidates.push({
4065
- query: row.query,
4066
- impressions: row.impressions,
4067
- clicks: row.clicks,
4068
- avgPosition: row.avgPosition,
4069
- reason: buildReason(row)
4070
- });
4071
- }
4072
- candidates.sort((a, b) => b.impressions - a.impressions);
4073
- const rows = candidates.slice(0, limit);
4074
- return {
4075
- rows,
4076
- totalCandidates: candidates.length,
4077
- skippedAlreadyTracked
4078
- };
4079
- }
4080
- function normalizeQuery(value) {
4081
- return value.trim().toLowerCase();
4082
- }
4083
- function buildReason(row) {
4084
- const impressionsLabel = formatImpressions2(row.impressions);
4085
- if (row.avgPosition <= 10) {
4086
- return `${impressionsLabel} impressions \xB7 ranks #${Math.round(row.avgPosition)} on Google`;
4087
- }
4088
- if (row.avgPosition <= 20) {
4089
- return `${impressionsLabel} impressions \xB7 ranks #${Math.round(row.avgPosition)} \u2014 close to top 10`;
4090
- }
4091
- return `${impressionsLabel} impressions \xB7 ranks #${Math.round(row.avgPosition)}`;
4092
- }
4093
- function formatImpressions2(value) {
4094
- if (value >= 1e6) return `${(value / 1e6).toFixed(1)}M`;
4095
- if (value >= 1e4) return `${Math.round(value / 1e3)}K`;
4096
- if (value >= 1e3) return `${(value / 1e3).toFixed(1)}K`;
4097
- return value.toString();
4098
- }
4099
-
4100
- // ../intelligence/src/smoothed-delta.ts
4101
- var SMOOTHED_RUN_DELTA_MAX_WINDOW = 3;
4102
- function smoothedRunDelta(points, valueFn, maxWindow = SMOOTHED_RUN_DELTA_MAX_WINDOW) {
4103
- if (points.length < 2) return null;
4104
- const window = Math.min(maxWindow, Math.floor(points.length / 2));
4105
- const tail = points.slice(-window);
4106
- const prior = points.slice(-window * 2, -window);
4107
- const sum = (arr) => arr.reduce((s, p) => s + valueFn(p), 0);
4108
- const currentAvg = sum(tail) / tail.length;
4109
- const priorAvg = sum(prior) / prior.length;
4110
- return {
4111
- current: roundTo1Decimal(currentAvg),
4112
- prior: roundTo1Decimal(priorAvg),
4113
- deltaAbs: currentAvg - priorAvg,
4114
- window
4115
- };
4116
- }
4117
- function roundTo1Decimal(value) {
4118
- return Math.round(value * 10) / 10;
4119
- }
4120
-
4121
- // src/intelligence-service.ts
4122
- import crypto from "crypto";
4123
-
4124
- // src/logger.ts
4125
- var IS_TTY = process.stdout.isTTY === true;
4126
- function formatTTY(entry) {
4127
- const { ts, level, module, action, msg, ...ctx } = entry;
4128
- const time = ts.slice(11, 19);
4129
- const levelTag = level === "error" ? "\x1B[31mERR\x1B[0m" : level === "warn" ? "\x1B[33mWRN\x1B[0m" : "\x1B[36mINF\x1B[0m";
4130
- const ctxParts = Object.entries(ctx).filter(([, v]) => v !== void 0 && v !== null).map(([k, v]) => `${k}=${typeof v === "string" ? v : JSON.stringify(v)}`).join(" ");
4131
- const msgPart = msg ? ` ${msg}` : "";
4132
- const ctxPart = ctxParts ? ` ${ctxParts}` : "";
4133
- return `${time} ${levelTag} [${module}] ${action}${msgPart}${ctxPart}`;
4134
- }
4135
- function emit(entry) {
4136
- const stream = entry.level === "error" ? process.stderr : process.stdout;
4137
- if (IS_TTY) {
4138
- stream.write(formatTTY(entry) + "\n");
4139
- } else {
4140
- stream.write(JSON.stringify(entry) + "\n");
4141
- }
4142
- }
4143
- function createLogger(module) {
4144
- function log2(level, action, ctx) {
4145
- const entry = {
4146
- ts: (/* @__PURE__ */ new Date()).toISOString(),
4147
- level,
4148
- module,
4149
- action,
4150
- ...ctx
4151
- };
4152
- emit(entry);
4153
- }
4154
- return {
4155
- info: (action, ctx) => log2("info", action, ctx),
4156
- warn: (action, ctx) => log2("warn", action, ctx),
4157
- error: (action, ctx) => log2("error", action, ctx)
4158
- };
4159
- }
4160
-
4161
- // src/citation-utils.ts
4162
- function domainMatches(domain, canonicalDomain) {
4163
- const normalized = normalizeProjectDomain(canonicalDomain);
4164
- const d = normalizeProjectDomain(domain);
4165
- return d === normalized || d.endsWith(`.${normalized}`);
4166
- }
4167
- function pickProjectCitedDomain(citedDomains, projectDomains) {
4168
- for (const cited of citedDomains) {
4169
- if (projectDomains.some((pd) => domainMatches(cited, pd))) return cited;
4170
- }
4171
- return void 0;
4172
- }
4173
- function determineCitationState(normalized, domains) {
4174
- for (const canonicalDomain of domains) {
4175
- const bareDomain = normalizeProjectDomain(canonicalDomain);
4176
- if (normalized.citedDomains.some((d) => domainMatches(d, bareDomain))) {
4177
- return "cited";
4178
- }
4179
- const lowerDomain = bareDomain.toLowerCase();
4180
- for (const source of normalized.groundingSources) {
4181
- try {
4182
- const uri = source.uri.toLowerCase();
4183
- if (lowerDomain.includes(".") && uri.includes(lowerDomain)) {
4184
- return "cited";
4185
- }
4186
- } catch {
4187
- }
4188
- if (source.title) {
4189
- const titleLower = source.title.toLowerCase().replace(/^www\./, "");
4190
- if (titleLower === lowerDomain || titleLower.endsWith(`.${lowerDomain}`)) {
4191
- return "cited";
4192
- }
4193
- }
4194
- }
4195
- }
4196
- return "not-cited";
4197
- }
4198
- function computeCompetitorOverlap(normalized, competitorDomains) {
4199
- const overlapSet = /* @__PURE__ */ new Set();
4200
- for (const d of normalized.citedDomains) {
4201
- for (const cd of competitorDomains) {
4202
- if (domainMatches(d, cd)) {
4203
- overlapSet.add(cd);
4204
- }
4205
- }
4206
- }
4207
- for (const source of normalized.groundingSources) {
4208
- const uri = source.uri.toLowerCase();
4209
- for (const cd of competitorDomains) {
4210
- if (uri.includes(cd.toLowerCase())) {
4211
- overlapSet.add(cd);
4212
- }
4213
- }
4214
- }
4215
- if (normalized.answerText) {
4216
- const lowerAnswer = normalized.answerText.toLowerCase();
4217
- for (const cd of competitorDomains) {
4218
- if (lowerAnswer.includes(cd.toLowerCase())) {
4219
- overlapSet.add(cd);
4220
- }
4221
- const brand = brandLabelFromDomain(cd);
4222
- if (brand.length >= 4 && new RegExp(`\\b${escapeRegExp(brand)}\\b`, "i").test(lowerAnswer)) {
4223
- overlapSet.add(cd);
4224
- }
4225
- }
4226
- }
4227
- return [...overlapSet];
4228
- }
4229
- function escapeRegExp(value) {
4230
- return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
4231
- }
4232
- function extractRecommendedCompetitors(answerText, ownDomains, citedDomains, competitorDomains, ownBrandNames = []) {
4233
- if (!answerText || answerText.length < 20) return [];
4234
- const ownBrandKeys = new Set(
4235
- ownDomains.flatMap((domain) => collectBrandKeysFromDomain(domain))
4236
- );
4237
- for (const name of ownBrandNames) {
4238
- const key = brandKeyFromText(name);
4239
- if (key.length >= 4) ownBrandKeys.add(key);
4240
- }
4241
- const knownCompetitorKeys = new Set(
4242
- [...citedDomains, ...competitorDomains].flatMap((domain) => collectBrandKeysFromDomain(domain)).filter((key) => !ownBrandKeys.has(key))
4243
- );
4244
- if (knownCompetitorKeys.size === 0) return [];
4245
- const candidatePatterns = [
4246
- /^\s*(?:[-*]|\d+\.)\s+(?:\*\*)?([A-Z0-9][A-Za-z0-9][\w\s.&',/()-]{1,50}?)(?:\*\*)?\s*[:\u2014\u2013-]/gm,
4247
- /\*\*([A-Z0-9][A-Za-z0-9][\w\s.&',/()-]{1,50})\*\*/g,
4248
- /^#{1,4}\s+(?:\d+\.\s+)?(?:\*\*)?([A-Z0-9][A-Za-z0-9][\w\s.&',/()-]{1,50}?)(?:\*\*)?$/gm,
4249
- /\[([A-Z0-9][A-Za-z0-9][\w\s.&',/()-]{1,50})\]\(https?:\/\/[^\s)]+\)/g
4250
- ];
4251
- const genericKeys = /* @__PURE__ */ new Set([
4252
- "additional",
4253
- "best",
4254
- "benefits",
4255
- "bottomline",
4256
- "comparison",
4257
- "conclusion",
4258
- "directorylisting",
4259
- "example",
4260
- "expertise",
4261
- "features",
4262
- "finalthoughts",
4263
- "howitworks",
4264
- "important",
4265
- "keybenefits",
4266
- "keyfeatures",
4267
- "major",
4268
- "note",
4269
- "notable",
4270
- "option",
4271
- "other",
4272
- "overview",
4273
- "pricing",
4274
- "pros",
4275
- "reviews",
4276
- "step",
4277
- "summary",
4278
- "top",
4279
- "verdict",
4280
- "whattolookfor",
4281
- "whyitmatters",
4282
- "whyitstandsout",
4283
- "whywechoseit"
4284
- ]);
4285
- const seen = /* @__PURE__ */ new Map();
4286
- for (const pattern of candidatePatterns) {
4287
- let match;
4288
- while ((match = pattern.exec(answerText)) !== null) {
4289
- const candidate = cleanCandidateName(match[1] ?? "");
4290
- const candidateKey = brandKeyFromText(candidate);
4291
- if (!candidateKey) continue;
4292
- if (genericKeys.has(candidateKey)) continue;
4293
- if (candidate.split(/\s+/).length > 6) continue;
4294
- if (matchesBrandKey(candidateKey, ownBrandKeys)) continue;
4295
- if (!matchesBrandKey(candidateKey, knownCompetitorKeys)) continue;
4296
- if (!seen.has(candidateKey)) seen.set(candidateKey, candidate);
4297
- }
4298
- }
4299
- return [...seen.values()].slice(0, 10);
4300
- }
4301
- function cleanCandidateName(candidate) {
4302
- return candidate.replace(/^[\s"'`]+|[\s"'`.,:;!?]+$/g, "").replace(/\s+/g, " ").trim();
4303
- }
4304
- function collectBrandKeysFromDomain(domain) {
4305
- const reg = registrableDomain(domain);
4306
- if (!reg) {
4307
- const hostname = normalizeProjectDomain(domain).split("/")[0] ?? "";
4308
- const fallback = hostname.replace(/[^a-z0-9]/gi, "").toLowerCase();
4309
- return fallback.length >= 4 ? [fallback] : [];
4310
- }
4311
- const keys = /* @__PURE__ */ new Set();
4312
- const fullKey = reg.replace(/[^a-z0-9]/gi, "").toLowerCase();
4313
- if (fullKey.length >= 4) keys.add(fullKey);
4314
- const brand = brandLabelFromDomain(reg).replace(/[^a-z0-9]/gi, "").toLowerCase();
4315
- if (brand.length >= 4) keys.add(brand);
4316
- return [...keys];
4317
- }
4318
- function matchesBrandKey(candidateKey, brandKeys) {
4319
- for (const brandKey of brandKeys) {
4320
- if (candidateKey === brandKey) return true;
4321
- if (candidateKey.startsWith(brandKey) || candidateKey.endsWith(brandKey)) return true;
4322
- if (brandKey.startsWith(candidateKey) || brandKey.endsWith(candidateKey)) return true;
4323
- }
4324
- return false;
4325
- }
4326
-
4327
- // src/intelligence-service.ts
4328
- var RECURRENCE_LOOKBACK_RUNS = 5;
4329
- var HISTORY_WINDOW_RUNS = Math.max(PERSISTENT_GAP_THRESHOLD, 5);
4330
- var log = createLogger("IntelligenceService");
4331
- var IntelligenceService = class {
4332
- constructor(db) {
4333
- this.db = db;
4334
- }
4335
- /**
4336
- * Analyze a completed run and persist insights + health snapshot.
4337
- * Idempotent: deletes prior results for the same runId before inserting.
4338
- * Returns the analysis result for the coordinator to inspect (e.g. for webhook dispatch).
4339
- */
4340
- analyzeAndPersist(runId, projectId) {
4341
- const recentRuns = this.db.select().from(runs).where(
4342
- and(
4343
- eq(runs.projectId, projectId),
4344
- or(eq(runs.status, "completed"), eq(runs.status, "partial")),
4345
- // Defensive: RunCoordinator already skips probes before this is
4346
- // called, but if a future call site invokes analyzeAndPersist
4347
- // directly for a probe, probes still must not pollute the
4348
- // intelligence window.
4349
- ne(runs.trigger, RunTriggers.probe)
4350
- )
4351
- ).orderBy(desc(runs.finishedAt), desc(runs.createdAt)).limit(HISTORY_WINDOW_RUNS).all();
4352
- if (recentRuns.length === 0) {
4353
- log.info("intelligence.skip", { runId, reason: "no completed runs" });
4354
- return null;
4355
- }
4356
- const currentRunRecord = recentRuns.find((r) => r.id === runId);
4357
- if (!currentRunRecord) {
4358
- log.info("intelligence.skip", { runId, reason: "run not in recent completed list" });
4359
- return null;
4360
- }
4361
- const currentRun = this.buildRunData(
4362
- runId,
4363
- projectId,
4364
- currentRunRecord.finishedAt ?? currentRunRecord.createdAt,
4365
- currentRunRecord.location ?? null
4366
- );
4367
- if (currentRun.snapshots.length === 0) {
4368
- log.info("intelligence.skip", { runId, reason: "no snapshots" });
4369
- return null;
4370
- }
4371
- const orderedRecent = [...recentRuns].reverse();
4372
- const currentLocation = currentRunRecord.location ?? null;
4373
- const sameLocationOrdered = orderedRecent.filter((r) => (r.location ?? null) === currentLocation);
4374
- const currentLocIdx = sameLocationOrdered.findIndex((r) => r.id === runId);
4375
- const previousRunRecord = currentLocIdx > 0 ? sameLocationOrdered[currentLocIdx - 1] : null;
4376
- const previousRun = previousRunRecord ? this.buildRunData(
4377
- previousRunRecord.id,
4378
- projectId,
4379
- previousRunRecord.finishedAt ?? previousRunRecord.createdAt,
4380
- previousRunRecord.location ?? null
4381
- ) : null;
4382
- const trackedCompetitors = this.loadTrackedCompetitors(projectId);
4383
- const history = sameLocationOrdered.slice(0, currentLocIdx + 1).map((r) => r.id === runId ? currentRun : this.buildRunData(r.id, projectId, r.finishedAt ?? r.createdAt, r.location ?? null));
4384
- if (!previousRun) {
4385
- const result2 = analyzeRuns(currentRun, currentRun, { trackedCompetitors, history });
4386
- log.info("intelligence.analyzed", {
4387
- runId,
4388
- regressions: 0,
4389
- gains: 0,
4390
- citedRate: result2.health.overallCitedRate,
4391
- insights: 0
4392
- });
4393
- this.persistResult(this.emptyAnalysisResult(result2), runId, projectId);
4394
- return result2;
4395
- }
4396
- const result = analyzeRuns(currentRun, previousRun, { trackedCompetitors, history });
4397
- log.info("intelligence.analyzed", {
4398
- runId,
4399
- regressions: result.regressions.length,
4400
- gains: result.gains.length,
4401
- firstCitations: result.firstCitations.length,
4402
- providerPickups: result.providerPickups.length,
4403
- persistentGaps: result.persistentGaps.length,
4404
- competitorGains: result.competitorGains.length,
4405
- competitorLosses: result.competitorLosses.length,
4406
- citedRate: result.health.overallCitedRate,
4407
- insights: result.insights.length
4408
- });
4409
- const tieredResult = this.tierResult(result, runId, projectId);
4410
- this.persistResult(tieredResult, runId, projectId);
4411
- return tieredResult;
4412
- }
4413
- /**
4414
- * Analyze a single run given an explicit previous run (or null for first run).
4415
- * Used by backfill where we control the run ordering.
4416
- *
4417
- * `dryRun: true` skips the DB write — `persistResult` is not called and
4418
- * dismissed flags / health rows are untouched. Callers receive the same
4419
- * AnalysisResult they would have, suitable for previewing what a write
4420
- * would have produced.
4421
- */
4422
- analyzeRunWithPrevious(runRecord, previousRunRecord, historyRecords, opts) {
4423
- const currentRun = this.buildRunData(
4424
- runRecord.id,
4425
- runRecord.projectId,
4426
- runRecord.finishedAt ?? runRecord.createdAt,
4427
- runRecord.location ?? null
4428
- );
4429
- if (currentRun.snapshots.length === 0) {
4430
- return null;
4431
- }
4432
- const previousRun = previousRunRecord ? this.buildRunData(
4433
- previousRunRecord.id,
4434
- previousRunRecord.projectId,
4435
- previousRunRecord.finishedAt ?? previousRunRecord.createdAt,
4436
- previousRunRecord.location ?? null
4437
- ) : null;
4438
- const trackedCompetitors = this.loadTrackedCompetitors(runRecord.projectId);
4439
- const history = (historyRecords ?? []).map((r) => r.id === runRecord.id ? currentRun : this.buildRunData(r.id, r.projectId, r.finishedAt ?? r.createdAt, r.location ?? null));
4440
- if (!previousRun) {
4441
- const result2 = analyzeRuns(currentRun, currentRun, { trackedCompetitors, history });
4442
- const emptyResult = this.emptyAnalysisResult(result2);
4443
- if (!opts?.dryRun) this.persistResult(emptyResult, runRecord.id, runRecord.projectId);
4444
- return result2;
4445
- }
4446
- const result = analyzeRuns(currentRun, previousRun, { trackedCompetitors, history });
4447
- const tieredResult = this.tierResult(result, runRecord.id, runRecord.projectId);
4448
- if (!opts?.dryRun) this.persistResult(tieredResult, runRecord.id, runRecord.projectId);
4449
- return tieredResult;
4450
- }
4451
- /**
4452
- * Backfill intelligence for all completed/partial runs of a project.
4453
- * Processes runs in chronological order so each run compares against its predecessor.
4454
- *
4455
- * Scoping options:
4456
- * - `fromRunId` / `toRunId`: bound the target range by exact run ID.
4457
- * - `since`: bound the target range by `finishedAt >= <date>`. Accepts
4458
- * any string that `Date.parse` understands (ISO 8601, `YYYY-MM-DD`,
4459
- * etc.). Runs before the cutoff are *not* re-processed but stay
4460
- * available for predecessor lookup, so transition detection at the
4461
- * boundary stays correct. Composes with `fromRunId` / `toRunId` —
4462
- * all three filters intersect.
4463
- * - `dryRun`: compute the analysis without writing. The return value
4464
- * includes a `delta` describing what would change (rows to delete vs
4465
- * create per run + aggregate). DB is left untouched.
4466
- */
4467
- backfill(projectName, opts, onProgress) {
4468
- const project = this.db.select().from(projects).where(eq(projects.name, projectName)).get();
4469
- if (!project) {
4470
- throw new Error(`Project "${projectName}" not found`);
4471
- }
4472
- let sinceTimestamp = null;
4473
- if (opts?.since !== void 0) {
4474
- const parsed = Date.parse(opts.since);
4475
- if (Number.isNaN(parsed)) {
4476
- throw new Error(`Invalid --since value "${opts.since}": expected a parseable date (ISO 8601 or YYYY-MM-DD)`);
4477
- }
4478
- sinceTimestamp = parsed;
4479
- }
4480
- const allRuns = this.db.select().from(runs).where(
4481
- and(
4482
- eq(runs.projectId, project.id),
4483
- or(eq(runs.status, "completed"), eq(runs.status, "partial")),
4484
- // Backfill must not replay probe runs as if they were real sweeps.
4485
- ne(runs.trigger, RunTriggers.probe)
4486
- )
4487
- ).orderBy(asc(runs.finishedAt)).all();
4488
- let startIdx = 0;
4489
- let endIdx = allRuns.length;
4490
- if (opts?.fromRunId) {
4491
- const idx = allRuns.findIndex((r) => r.id === opts.fromRunId);
4492
- if (idx === -1) throw new Error(`Run "${opts.fromRunId}" not found in project`);
4493
- startIdx = idx;
4494
- }
4495
- if (opts?.toRunId) {
4496
- const idx = allRuns.findIndex((r) => r.id === opts.toRunId);
4497
- if (idx === -1) throw new Error(`Run "${opts.toRunId}" not found in project`);
4498
- endIdx = idx + 1;
4499
- }
4500
- let targetRuns = allRuns.slice(startIdx, endIdx);
4501
- if (sinceTimestamp !== null) {
4502
- targetRuns = targetRuns.filter((r) => {
4503
- const ts = r.finishedAt ?? r.createdAt;
4504
- const t = Date.parse(ts);
4505
- return !Number.isNaN(t) && t >= sinceTimestamp;
4506
- });
4507
- }
4508
- let processed = 0;
4509
- let skipped = 0;
4510
- let totalInsights = 0;
4511
- const isDryRun = opts?.dryRun === true;
4512
- const perRunDelta = [];
4513
- let wouldDeleteTotal = 0;
4514
- const existingByRunId = /* @__PURE__ */ new Map();
4515
- if (isDryRun && targetRuns.length > 0) {
4516
- const rows = this.db.select({ runId: insights.runId }).from(insights).where(inArray(insights.runId, targetRuns.map((r) => r.id))).all();
4517
- for (const r of rows) {
4518
- if (r.runId == null) continue;
4519
- existingByRunId.set(r.runId, (existingByRunId.get(r.runId) ?? 0) + 1);
4520
- }
4521
- }
4522
- for (let i = 0; i < targetRuns.length; i++) {
4523
- const run = targetRuns[i];
4524
- const runLocation = run.location ?? null;
4525
- const sameLocationRuns = allRuns.filter((r) => (r.location ?? null) === runLocation);
4526
- const sameLocIdx = sameLocationRuns.indexOf(run);
4527
- const previousRun = sameLocIdx > 0 ? sameLocationRuns[sameLocIdx - 1] : null;
4528
- const historyStart = Math.max(0, sameLocIdx - (HISTORY_WINDOW_RUNS - 1));
4529
- const historyRecords = sameLocationRuns.slice(historyStart, sameLocIdx + 1);
4530
- const result = this.analyzeRunWithPrevious(run, previousRun, historyRecords, { dryRun: isDryRun });
4531
- if (result) {
4532
- processed++;
4533
- totalInsights += result.insights.length;
4534
- if (isDryRun) {
4535
- const existing = existingByRunId.get(run.id) ?? 0;
4536
- wouldDeleteTotal += existing;
4537
- perRunDelta.push({ runId: run.id, existingInsights: existing, newInsights: result.insights.length });
4538
- }
4539
- onProgress?.({ runId: run.id, index: i + 1, total: targetRuns.length, insights: result.insights.length });
4540
- } else {
4541
- skipped++;
4542
- onProgress?.({ runId: run.id, index: i + 1, total: targetRuns.length, insights: 0 });
4543
- }
4544
- }
4545
- if (isDryRun) {
4546
- return {
4547
- processed,
4548
- skipped,
4549
- totalInsights,
4550
- dryRun: true,
4551
- delta: {
4552
- wouldDelete: wouldDeleteTotal,
4553
- wouldCreate: totalInsights,
4554
- netChange: totalInsights - wouldDeleteTotal,
4555
- perRun: perRunDelta
4556
- }
4557
- };
4558
- }
4559
- return { processed, skipped, totalInsights };
4560
- }
4561
- loadTrackedCompetitors(projectId) {
4562
- return this.db.select({ domain: competitors.domain }).from(competitors).where(eq(competitors.projectId, projectId)).all().map((r) => r.domain);
4563
- }
4564
- /**
4565
- * Wipe transition signals from an analysis result while keeping health.
4566
- * Used when there's no baseline (first run) to avoid emitting false transitions.
4567
- */
4568
- emptyAnalysisResult(result) {
4569
- return {
4570
- ...result,
4571
- insights: [],
4572
- regressions: [],
4573
- gains: [],
4574
- firstCitations: [],
4575
- providerPickups: [],
4576
- persistentGaps: [],
4577
- competitorGains: [],
4578
- competitorLosses: []
4579
- };
4580
- }
4581
- persistResult(result, runId, projectId) {
4582
- const previouslyDismissed = /* @__PURE__ */ new Set();
4583
- const existingInsights = this.db.select({ query: insights.query, provider: insights.provider, type: insights.type, dismissed: insights.dismissed }).from(insights).where(eq(insights.runId, runId)).all();
4584
- for (const row of existingInsights) {
4585
- if (row.dismissed) {
4586
- previouslyDismissed.add(`${row.query}:${row.provider}:${row.type}`);
4587
- }
4588
- }
4589
- this.db.transaction((tx) => {
4590
- tx.delete(insights).where(eq(insights.runId, runId)).run();
4591
- tx.delete(healthSnapshots).where(eq(healthSnapshots.runId, runId)).run();
4592
- const now = (/* @__PURE__ */ new Date()).toISOString();
4593
- for (const insight of result.insights) {
4594
- const wasDismissed = previouslyDismissed.has(`${insight.query}:${insight.provider}:${insight.type}`);
4595
- tx.insert(insights).values({
4596
- id: insight.id,
4597
- projectId,
4598
- runId,
4599
- type: insight.type,
4600
- severity: insight.severity,
4601
- title: insight.title,
4602
- query: insight.query,
4603
- provider: insight.provider,
4604
- recommendation: insight.recommendation ?? null,
4605
- cause: insight.cause ?? null,
4606
- dismissed: wasDismissed,
4607
- createdAt: insight.createdAt
4608
- }).run();
4609
- }
4610
- tx.insert(healthSnapshots).values({
4611
- id: crypto.randomUUID(),
4612
- projectId,
4613
- runId,
4614
- overallCitedRate: String(result.health.overallCitedRate),
4615
- totalPairs: result.health.totalPairs,
4616
- citedPairs: result.health.citedPairs,
4617
- providerBreakdown: result.health.providerBreakdown,
4618
- createdAt: now
4619
- }).run();
4620
- });
4621
- log.info("intelligence.persisted", { runId, insights: result.insights.length });
4622
- }
4623
- /**
4624
- * Apply severity tiering to the insights of an AnalysisResult and return a
4625
- * new result. Wraps `applySeverityTiering` so callers (analyzeAndPersist,
4626
- * analyzeRunWithPrevious) can pass the same tiered shape both into the DB
4627
- * write and back to the RunCoordinator / webhook dispatcher.
4628
- */
4629
- tierResult(result, runId, projectId) {
4630
- if (result.insights.length === 0) return result;
4631
- return { ...result, insights: this.applySeverityTiering(result.insights, runId, projectId) };
4632
- }
4633
- /**
4634
- * Re-classify each regression insight's severity using GSC traffic +
4635
- * recurrence signals via the pure `classifyRegressionSeverity` primitive
4636
- * in @ainyc/canonry-intelligence. Non-regression insights are returned
4637
- * untouched.
4638
- */
4639
- applySeverityTiering(rawInsights, excludeRunId, projectId) {
4640
- const regressions = rawInsights.filter((i) => i.type === "regression");
4641
- if (regressions.length === 0) return rawInsights;
4642
- const gscRows = this.db.select({ query: gscSearchData.query, impressions: gscSearchData.impressions }).from(gscSearchData).where(eq(gscSearchData.projectId, projectId)).all();
4643
- const gscConnected = gscRows.length > 0;
4644
- const gscImpressionsByQuery = /* @__PURE__ */ new Map();
4645
- for (const row of gscRows) {
4646
- const key = row.query.toLowerCase();
4647
- gscImpressionsByQuery.set(key, (gscImpressionsByQuery.get(key) ?? 0) + row.impressions);
4648
- }
4649
- const projectRow = this.db.select({ locations: projects.locations }).from(projects).where(eq(projects.id, projectId)).get();
4650
- const locationCount = Math.max(
4651
- 1,
4652
- (projectRow?.locations ?? []).length
4653
- );
4654
- const ROWS_PER_GROUP_BUDGET = Math.max(2, locationCount);
4655
- const recentRunRows = this.db.select({ id: runs.id, createdAt: runs.createdAt }).from(runs).where(
4656
- and(
4657
- eq(runs.projectId, projectId),
4658
- eq(runs.kind, RunKinds["answer-visibility"]),
4659
- or(eq(runs.status, "completed"), eq(runs.status, "partial")),
4660
- // Defensive — see top of file.
4661
- ne(runs.trigger, RunTriggers.probe)
4662
- )
4663
- ).orderBy(desc(runs.createdAt), desc(runs.id)).limit((RECURRENCE_LOOKBACK_RUNS + 1) * ROWS_PER_GROUP_BUDGET).all();
4664
- const recentGroups = groupRunsByCreatedAt(recentRunRows);
4665
- const recentRunIds = [];
4666
- const recentRunIdToCreatedAt = /* @__PURE__ */ new Map();
4667
- let consumedGroups = 0;
4668
- for (const group of recentGroups) {
4669
- const groupIds = group.map((r) => r.id);
4670
- if (groupIds.includes(excludeRunId)) continue;
4671
- for (const r of group) recentRunIdToCreatedAt.set(r.id, r.createdAt);
4672
- recentRunIds.push(...groupIds);
4673
- consumedGroups++;
4674
- if (consumedGroups >= RECURRENCE_LOOKBACK_RUNS) break;
4675
- }
4676
- const haveHistory = recentRunIds.length > 0;
4677
- const priorRegressionsByPair = /* @__PURE__ */ new Map();
4678
- if (haveHistory) {
4679
- const priorRows = this.db.select({ query: insights.query, provider: insights.provider, runId: insights.runId }).from(insights).where(and(eq(insights.type, "regression"), inArray(insights.runId, recentRunIds))).all();
4680
- const regressionGroups = /* @__PURE__ */ new Map();
4681
- for (const row of priorRows) {
4682
- if (!row.runId) continue;
4683
- const key = `${row.query}:${row.provider}`;
4684
- const groupKey = recentRunIdToCreatedAt.get(row.runId) ?? row.runId;
4685
- let groups = regressionGroups.get(key);
4686
- if (!groups) {
4687
- groups = /* @__PURE__ */ new Set();
4688
- regressionGroups.set(key, groups);
4689
- }
4690
- groups.add(groupKey);
4691
- }
4692
- for (const [key, groups] of regressionGroups) {
4693
- priorRegressionsByPair.set(key, groups.size);
4694
- }
4695
- }
4696
- return rawInsights.map((insight) => {
4697
- if (insight.type !== "regression") return insight;
4698
- const gscImpressions = gscConnected ? gscImpressionsByQuery.get(insight.query.toLowerCase()) ?? 0 : void 0;
4699
- const recurrenceCount = haveHistory ? priorRegressionsByPair.get(`${insight.query}:${insight.provider}`) ?? 0 : void 0;
4700
- const severity = classifyRegressionSeverity({
4701
- gscImpressions,
4702
- recurrenceCount
4703
- });
4704
- return { ...insight, severity };
4705
- });
4706
- }
4707
- buildRunData(runId, projectId, completedAt, location = null) {
4708
- const projectDomainRow = this.db.select({ canonicalDomain: projects.canonicalDomain, ownedDomains: projects.ownedDomains }).from(projects).where(eq(projects.id, projectId)).get();
4709
- const projectDomains = projectDomainRow ? effectiveDomains({
4710
- canonicalDomain: projectDomainRow.canonicalDomain,
4711
- ownedDomains: projectDomainRow.ownedDomains
4712
- }) : [];
4713
- const rows = this.db.select({
4714
- query: queries.query,
4715
- // Denormalized query text persisted by v58 — the fallback when the
4716
- // joined queries.query has been hard-deleted (or the query_id was
4717
- // nulled by the v58 dangling-FK cleanup).
4718
- queryText: querySnapshots.queryText,
4719
- provider: querySnapshots.provider,
4720
- citationState: querySnapshots.citationState,
4721
- citedDomains: querySnapshots.citedDomains,
4722
- competitorOverlap: querySnapshots.competitorOverlap,
4723
- snapshotLocation: querySnapshots.location
4724
- }).from(querySnapshots).leftJoin(queries, eq(querySnapshots.queryId, queries.id)).where(eq(querySnapshots.runId, runId)).all();
4725
- const snapshots = [];
4726
- let orphanCount = 0;
4727
- for (const r of rows) {
4728
- const resolvedQuery = r.query ?? r.queryText ?? null;
4729
- if (!resolvedQuery) {
4730
- orphanCount++;
4731
- continue;
4732
- }
4733
- const domains = r.citedDomains;
4734
- const competitors2 = r.competitorOverlap;
4735
- snapshots.push({
4736
- query: resolvedQuery,
4737
- provider: r.provider,
4738
- cited: r.citationState === CitationStates.cited,
4739
- // The project's OWN cited domain — never a co-cited competitor that
4740
- // happens to sort first in the full citedDomains set.
4741
- citationUrl: pickProjectCitedDomain(domains, projectDomains),
4742
- // Snapshots carry their own location for downstream detectors. In
4743
- // practice every snapshot in a single runId shares the run's
4744
- // location; the per-row column is the same value duplicated, but
4745
- // we read it from the snapshot row so a stale runs.location can't
4746
- // mask snapshot truth.
4747
- location: r.snapshotLocation ?? location ?? null,
4748
- competitorDomains: competitors2,
4749
- // citedDomains is the FULL set (tracked competitors + third-party
4750
- // sources). Cause analysis uses it to name the displacing source
4751
- // when no tracked competitor appears in the response.
4752
- citedDomains: domains
4753
- });
4754
- }
4755
- if (orphanCount > 0) {
4756
- log.warn("snapshot.orphan-skip", { runId, projectId, orphanCount });
4757
- }
4758
- return { runId, projectId, completedAt, location, snapshots };
4759
- }
4760
- };
4761
-
4762
- export {
4763
- projects,
4764
- queries,
4765
- competitors,
4766
- runs,
4767
- querySnapshots,
4768
- auditLog,
4769
- apiKeys,
4770
- schedules,
4771
- notifications,
4772
- gscSearchData,
4773
- gscUrlInspections,
4774
- gscCoverageSnapshots,
4775
- bingCoverageSnapshots,
4776
- bingUrlInspections,
4777
- gaTrafficSnapshots,
4778
- gaAiReferrals,
4779
- gaSocialReferrals,
4780
- gaTrafficSummaries,
4781
- gaTrafficWindowSummaries,
4782
- usageCounters,
4783
- insights,
4784
- healthSnapshots,
4785
- agentSessions,
4786
- ccReleaseSyncs,
4787
- backlinkDomains,
4788
- backlinkSummaries,
4789
- agentMemory,
4790
- trafficSources,
4791
- crawlerEventsHourly,
4792
- aiUserFetchEventsHourly,
4793
- aiReferralEventsHourly,
4794
- rawEventSamples,
4795
- discoverySessions,
4796
- discoveryProbes,
4797
- contentTargetDismissals,
4798
- recommendationExplanations,
4799
- createClient,
4800
- parseJsonColumn,
4801
- extractLegacyCredentials,
4802
- dropLegacyCredentialColumns,
4803
- migrate,
4804
- groupRunsByCreatedAt,
4805
- pickGroupRepresentative,
4806
- filterTrackedSnapshots,
4807
- determineCitationState,
4808
- computeCompetitorOverlap,
4809
- extractRecommendedCompetitors,
4810
- isBlogShapedQuery,
4811
- buildInventory,
4812
- buildContentTargetRows,
4813
- buildContentSourceRows,
4814
- buildContentGapRows,
4815
- mapOpportunitiesToNextSteps,
4816
- groupInsights,
4817
- buildBrandTokens,
4818
- categorizeQueryByIntent,
4819
- MIN_TREND_POINTS,
4820
- isTrendBaseline,
4821
- buildCitationScorecard,
4822
- buildCompetitorLandscape,
4823
- buildMentionLandscape,
4824
- buildAiSourceOrigin,
4825
- buildMovementSummary,
4826
- buildVisibilityScore,
4827
- buildMentionCoverage,
4828
- buildGapQueryScore,
4829
- buildMentionGapScore,
4830
- buildCompetitorPressureScore,
4831
- buildOverviewCompetitors,
4832
- buildProviderScores,
4833
- DEFAULT_RUN_HISTORY_LIMIT,
4834
- buildRunHistory,
4835
- buildProviderTrends,
4836
- providerKey,
4837
- buildMentionShare,
4838
- buildSuggestedQueries,
4839
- smoothedRunDelta,
4840
- createLogger,
4841
- IntelligenceService
4842
- };