@ainyc/canonry 4.24.1 → 4.25.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/assets/agent-workspace/skills/aero/references/aeo-discovery.md +89 -0
- package/assets/agent-workspace/skills/canonry-setup/references/canonry-cli.md +14 -0
- package/assets/assets/{index-BzD9HUxc.js → index-C4scWriC.js} +81 -81
- package/assets/index.html +1 -1
- package/dist/{chunk-E5PZ23OS.js → chunk-6J6WQOGH.js} +905 -195
- package/dist/{chunk-6EJ54OX7.js → chunk-A7HQ6X43.js} +91 -1
- package/dist/{chunk-EUGCQSFC.js → chunk-CRQMGNPH.js} +133 -1
- package/dist/{chunk-OYYFXKRK.js → chunk-IS65IYNZ.js} +115 -2
- package/dist/cli.js +424 -123
- package/dist/index.js +4 -4
- package/dist/{intelligence-service-NVN2PAR7.js → intelligence-service-XLUYTE57.js} +2 -2
- package/dist/mcp.js +9 -3
- package/package.json +9 -9
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
import {
|
|
2
2
|
AGENT_MEMORY_KEY_MAX_LENGTH,
|
|
3
3
|
AGENT_MEMORY_VALUE_MAX_BYTES,
|
|
4
|
+
DISCOVERY_MAX_PROBES_CAP,
|
|
4
5
|
competitorBatchRequestSchema,
|
|
6
|
+
discoveryRunRequestSchema,
|
|
5
7
|
keywordBatchRequestSchema,
|
|
6
8
|
keywordGenerateRequestSchema,
|
|
7
9
|
notificationCreateRequestSchema,
|
|
@@ -16,7 +18,7 @@ import {
|
|
|
16
18
|
trafficConnectCloudRunRequestSchema,
|
|
17
19
|
trafficConnectWordpressRequestSchema,
|
|
18
20
|
trafficEventKindSchema
|
|
19
|
-
} from "./chunk-
|
|
21
|
+
} from "./chunk-CRQMGNPH.js";
|
|
20
22
|
|
|
21
23
|
// src/config.ts
|
|
22
24
|
import fs from "fs";
|
|
@@ -819,6 +821,32 @@ var ApiClient = class {
|
|
|
819
821
|
`/projects/${encodeURIComponent(project)}/traffic/events${qs}`
|
|
820
822
|
);
|
|
821
823
|
}
|
|
824
|
+
async triggerDiscoveryRun(project, body) {
|
|
825
|
+
return this.request(
|
|
826
|
+
"POST",
|
|
827
|
+
`/projects/${encodeURIComponent(project)}/discover/run`,
|
|
828
|
+
body ?? {}
|
|
829
|
+
);
|
|
830
|
+
}
|
|
831
|
+
async listDiscoverySessions(project, opts) {
|
|
832
|
+
const qs = opts?.limit ? `?limit=${encodeURIComponent(String(opts.limit))}` : "";
|
|
833
|
+
return this.request(
|
|
834
|
+
"GET",
|
|
835
|
+
`/projects/${encodeURIComponent(project)}/discover/sessions${qs}`
|
|
836
|
+
);
|
|
837
|
+
}
|
|
838
|
+
async getDiscoverySession(project, sessionId) {
|
|
839
|
+
return this.request(
|
|
840
|
+
"GET",
|
|
841
|
+
`/projects/${encodeURIComponent(project)}/discover/sessions/${encodeURIComponent(sessionId)}`
|
|
842
|
+
);
|
|
843
|
+
}
|
|
844
|
+
async previewDiscoveryPromote(project, sessionId) {
|
|
845
|
+
return this.request(
|
|
846
|
+
"GET",
|
|
847
|
+
`/projects/${encodeURIComponent(project)}/discover/sessions/${encodeURIComponent(sessionId)}/promote`
|
|
848
|
+
);
|
|
849
|
+
}
|
|
822
850
|
async wordpressConnect(project, body) {
|
|
823
851
|
return this.request("POST", `/projects/${encodeURIComponent(project)}/wordpress/connect`, body);
|
|
824
852
|
}
|
|
@@ -1226,6 +1254,24 @@ var trafficSourceIdInputSchema = z2.object({
|
|
|
1226
1254
|
project: projectNameSchema,
|
|
1227
1255
|
sourceId: z2.string().min(1).describe("Traffic source ID.")
|
|
1228
1256
|
});
|
|
1257
|
+
var discoveryRunInputSchema = z2.object({
|
|
1258
|
+
project: projectNameSchema,
|
|
1259
|
+
request: discoveryRunRequestSchema.extend({
|
|
1260
|
+
// Stronger descriptions for the LLM. The base Zod schema enforces the
|
|
1261
|
+
// upper bound; this just clarifies the meaning of each knob.
|
|
1262
|
+
icpDescription: z2.string().min(1).optional().describe("Free-text ICP description. If omitted, the project must already have spec.icpDescription stored."),
|
|
1263
|
+
dedupThreshold: z2.number().min(0).max(1).optional().describe("Cosine similarity threshold for clustering seed candidates. Defaults to 0.85. Lower values dedupe more aggressively."),
|
|
1264
|
+
maxProbes: z2.number().int().positive().max(DISCOVERY_MAX_PROBES_CAP).optional().describe(`Max canonical queries to probe in this session. Default 100, hard cap ${DISCOVERY_MAX_PROBES_CAP}.`)
|
|
1265
|
+
}).optional()
|
|
1266
|
+
});
|
|
1267
|
+
var discoverySessionsListInputSchema = z2.object({
|
|
1268
|
+
project: projectNameSchema,
|
|
1269
|
+
limit: z2.number().int().positive().max(200).optional().describe("Max sessions returned. Default 50.")
|
|
1270
|
+
});
|
|
1271
|
+
var discoverySessionIdInputSchema = z2.object({
|
|
1272
|
+
project: projectNameSchema,
|
|
1273
|
+
sessionId: z2.string().min(1).describe("Discovery session ID returned by canonry_discover_run_start.")
|
|
1274
|
+
});
|
|
1229
1275
|
var AGENT_WEBHOOK_EVENTS = [
|
|
1230
1276
|
notificationEventSchema.enum["run.completed"],
|
|
1231
1277
|
notificationEventSchema.enum["insight.critical"],
|
|
@@ -2130,6 +2176,50 @@ var canonryMcpTools = [
|
|
|
2130
2176
|
await client.deleteNotification(input.project, agentNotification.id);
|
|
2131
2177
|
return { status: "detached", project: input.project };
|
|
2132
2178
|
}
|
|
2179
|
+
}),
|
|
2180
|
+
defineTool({
|
|
2181
|
+
name: "canonry_discover_run_start",
|
|
2182
|
+
title: "Start discovery run",
|
|
2183
|
+
description: 'Kick off a discovery session for a project: ICP \u2192 seed (Gemini grounded prompt) \u2192 embed + cluster + pick representative \u2192 probe each canonical \u2192 classify into cited / aspirational / wasted-surface \u2192 aggregate competitor map. Returns {runId, sessionId, status:"running"} immediately; the work runs in the background. Poll canonry_discover_session_get with the returned sessionId until status is "completed" or "failed". Costs roughly $1 / session at default budget; budget capped at 500 probes / session.',
|
|
2184
|
+
access: "write",
|
|
2185
|
+
tier: "discovery",
|
|
2186
|
+
inputSchema: discoveryRunInputSchema,
|
|
2187
|
+
annotations: writeAnnotations({ idempotentHint: false, openWorldHint: true }),
|
|
2188
|
+
openApiOperations: ["POST /api/v1/projects/{name}/discover/run"],
|
|
2189
|
+
handler: (client, input) => client.triggerDiscoveryRun(input.project, input.request)
|
|
2190
|
+
}),
|
|
2191
|
+
defineTool({
|
|
2192
|
+
name: "canonry_discover_sessions_list",
|
|
2193
|
+
title: "List discovery sessions",
|
|
2194
|
+
description: "List recent discovery sessions for a project, newest first. Returns the session-level summary (status, seed counts, bucket counts, competitor map). Use canonry_discover_session_get to drill into per-query probe rows.",
|
|
2195
|
+
access: "read",
|
|
2196
|
+
tier: "discovery",
|
|
2197
|
+
inputSchema: discoverySessionsListInputSchema,
|
|
2198
|
+
annotations: readAnnotations(),
|
|
2199
|
+
openApiOperations: ["GET /api/v1/projects/{name}/discover/sessions"],
|
|
2200
|
+
handler: (client, input) => client.listDiscoverySessions(input.project, input.limit !== void 0 ? { limit: input.limit } : void 0)
|
|
2201
|
+
}),
|
|
2202
|
+
defineTool({
|
|
2203
|
+
name: "canonry_discover_session_get",
|
|
2204
|
+
title: "Get discovery session",
|
|
2205
|
+
description: 'Get one discovery session with the full probe list (per-query bucket + cited domains). Use after canonry_discover_run_start to inspect what the discovery pipeline produced; this is the canonical read for "what did discovery find" before PR 2 lands `canonry discover promote`.',
|
|
2206
|
+
access: "read",
|
|
2207
|
+
tier: "discovery",
|
|
2208
|
+
inputSchema: discoverySessionIdInputSchema,
|
|
2209
|
+
annotations: readAnnotations(),
|
|
2210
|
+
openApiOperations: ["GET /api/v1/projects/{name}/discover/sessions/{id}"],
|
|
2211
|
+
handler: (client, input) => client.getDiscoverySession(input.project, input.sessionId)
|
|
2212
|
+
}),
|
|
2213
|
+
defineTool({
|
|
2214
|
+
name: "canonry_discover_promote_preview",
|
|
2215
|
+
title: "Preview discovery promotion",
|
|
2216
|
+
description: "Read-only preview of what `canonry discover promote` (PR 2) would persist for a session: bucketed query lists and suggested new competitor domains (those not already in the project's tracked competitor list). v1 returns the preview only; use it to confirm a basket before PR 2 ships the merge step.",
|
|
2217
|
+
access: "read",
|
|
2218
|
+
tier: "discovery",
|
|
2219
|
+
inputSchema: discoverySessionIdInputSchema,
|
|
2220
|
+
annotations: readAnnotations(),
|
|
2221
|
+
openApiOperations: ["GET /api/v1/projects/{name}/discover/sessions/{id}/promote"],
|
|
2222
|
+
handler: (client, input) => client.previewDiscoveryPromote(input.project, input.sessionId)
|
|
2133
2223
|
})
|
|
2134
2224
|
];
|
|
2135
2225
|
var CANONRY_MCP_TOOL_COUNT = canonryMcpTools.length;
|
|
@@ -88,7 +88,9 @@ var runKindSchema = z2.enum([
|
|
|
88
88
|
"bing-inspect",
|
|
89
89
|
"bing-inspect-sitemap",
|
|
90
90
|
"backlink-extract",
|
|
91
|
-
"traffic-sync"
|
|
91
|
+
"traffic-sync",
|
|
92
|
+
"aeo-discover-seed",
|
|
93
|
+
"aeo-discover-probe"
|
|
92
94
|
]);
|
|
93
95
|
var RunKinds = runKindSchema.enum;
|
|
94
96
|
var runTriggerSchema = z2.enum(["manual", "scheduled", "config-apply", "backfill"]);
|
|
@@ -2348,6 +2350,128 @@ var trafficEventsResponseSchema = z20.object({
|
|
|
2348
2350
|
events: z20.array(trafficEventEntrySchema)
|
|
2349
2351
|
});
|
|
2350
2352
|
|
|
2353
|
+
// ../contracts/src/discovery.ts
|
|
2354
|
+
import { z as z21 } from "zod";
|
|
2355
|
+
var discoveryBucketSchema = z21.enum(["cited", "aspirational", "wasted-surface"]);
|
|
2356
|
+
var DiscoveryBuckets = discoveryBucketSchema.enum;
|
|
2357
|
+
var discoverySessionStatusSchema = z21.enum(["queued", "seeding", "probing", "completed", "failed"]);
|
|
2358
|
+
var DiscoverySessionStatuses = discoverySessionStatusSchema.enum;
|
|
2359
|
+
var discoveryCompetitorMapEntrySchema = z21.object({
|
|
2360
|
+
domain: z21.string().min(1),
|
|
2361
|
+
hits: z21.number().int().positive()
|
|
2362
|
+
});
|
|
2363
|
+
var discoveryProbeDtoSchema = z21.object({
|
|
2364
|
+
id: z21.string(),
|
|
2365
|
+
sessionId: z21.string(),
|
|
2366
|
+
projectId: z21.string(),
|
|
2367
|
+
query: z21.string(),
|
|
2368
|
+
bucket: discoveryBucketSchema.nullable().default(null),
|
|
2369
|
+
citationState: citationStateSchema,
|
|
2370
|
+
citedDomains: z21.array(z21.string()).default([]),
|
|
2371
|
+
createdAt: z21.string()
|
|
2372
|
+
});
|
|
2373
|
+
var discoverySessionDtoSchema = z21.object({
|
|
2374
|
+
id: z21.string(),
|
|
2375
|
+
projectId: z21.string(),
|
|
2376
|
+
status: discoverySessionStatusSchema,
|
|
2377
|
+
icpDescription: z21.string().nullable().optional(),
|
|
2378
|
+
seedProvider: z21.string().nullable().optional(),
|
|
2379
|
+
seedCountRaw: z21.number().int().nullable().optional(),
|
|
2380
|
+
seedCount: z21.number().int().nullable().optional(),
|
|
2381
|
+
dedupThreshold: z21.number().nullable().optional(),
|
|
2382
|
+
probeCount: z21.number().int().nullable().optional(),
|
|
2383
|
+
citedCount: z21.number().int().nullable().default(null),
|
|
2384
|
+
aspirationalCount: z21.number().int().nullable().default(null),
|
|
2385
|
+
wastedCount: z21.number().int().nullable().default(null),
|
|
2386
|
+
competitorMap: z21.array(discoveryCompetitorMapEntrySchema).default([]),
|
|
2387
|
+
error: z21.string().nullable().optional(),
|
|
2388
|
+
startedAt: z21.string().nullable().optional(),
|
|
2389
|
+
finishedAt: z21.string().nullable().optional(),
|
|
2390
|
+
createdAt: z21.string()
|
|
2391
|
+
});
|
|
2392
|
+
var discoverySessionDetailDtoSchema = discoverySessionDtoSchema.extend({
|
|
2393
|
+
probes: z21.array(discoveryProbeDtoSchema).default([])
|
|
2394
|
+
});
|
|
2395
|
+
var DISCOVERY_MAX_PROBES_CAP = 500;
|
|
2396
|
+
var discoveryRunRequestSchema = z21.object({
|
|
2397
|
+
icpDescription: z21.string().min(1).optional(),
|
|
2398
|
+
dedupThreshold: z21.number().min(0).max(1).optional(),
|
|
2399
|
+
maxProbes: z21.number().int().positive().max(DISCOVERY_MAX_PROBES_CAP).optional()
|
|
2400
|
+
});
|
|
2401
|
+
var queryProvenanceSchema = z21.union([
|
|
2402
|
+
z21.literal("cli"),
|
|
2403
|
+
z21.string().regex(/^discovery:.+$/)
|
|
2404
|
+
]);
|
|
2405
|
+
|
|
2406
|
+
// ../contracts/src/embeddings.ts
|
|
2407
|
+
function cosineSimilarity(a, b) {
|
|
2408
|
+
if (a.length === 0 || b.length === 0) {
|
|
2409
|
+
throw new Error("cosineSimilarity: vectors must be non-empty");
|
|
2410
|
+
}
|
|
2411
|
+
if (a.length !== b.length) {
|
|
2412
|
+
throw new Error(`cosineSimilarity: vector length mismatch (${a.length} vs ${b.length})`);
|
|
2413
|
+
}
|
|
2414
|
+
let dot = 0;
|
|
2415
|
+
let magA = 0;
|
|
2416
|
+
let magB = 0;
|
|
2417
|
+
for (let i = 0; i < a.length; i++) {
|
|
2418
|
+
dot += a[i] * b[i];
|
|
2419
|
+
magA += a[i] * a[i];
|
|
2420
|
+
magB += b[i] * b[i];
|
|
2421
|
+
}
|
|
2422
|
+
if (magA === 0 || magB === 0) return 0;
|
|
2423
|
+
return dot / (Math.sqrt(magA) * Math.sqrt(magB));
|
|
2424
|
+
}
|
|
2425
|
+
function clusterByCosine(items, vectors, threshold) {
|
|
2426
|
+
if (threshold < 0 || threshold > 1) {
|
|
2427
|
+
throw new Error(`clusterByCosine: threshold must be in [0, 1], got ${threshold}`);
|
|
2428
|
+
}
|
|
2429
|
+
if (items.length !== vectors.length) {
|
|
2430
|
+
throw new Error(`clusterByCosine: items/vectors length mismatch (${items.length} vs ${vectors.length})`);
|
|
2431
|
+
}
|
|
2432
|
+
if (items.length === 0) return [];
|
|
2433
|
+
const parent = items.map((_, i) => i);
|
|
2434
|
+
const find = (x) => {
|
|
2435
|
+
let root = x;
|
|
2436
|
+
while (parent[root] !== root) root = parent[root];
|
|
2437
|
+
let cur = x;
|
|
2438
|
+
while (parent[cur] !== root) {
|
|
2439
|
+
const next = parent[cur];
|
|
2440
|
+
parent[cur] = root;
|
|
2441
|
+
cur = next;
|
|
2442
|
+
}
|
|
2443
|
+
return root;
|
|
2444
|
+
};
|
|
2445
|
+
const union = (a, b) => {
|
|
2446
|
+
const ra = find(a);
|
|
2447
|
+
const rb = find(b);
|
|
2448
|
+
if (ra !== rb) parent[ra] = rb;
|
|
2449
|
+
};
|
|
2450
|
+
for (let i = 0; i < items.length; i++) {
|
|
2451
|
+
for (let j = i + 1; j < items.length; j++) {
|
|
2452
|
+
if (cosineSimilarity(vectors[i], vectors[j]) >= threshold) {
|
|
2453
|
+
union(i, j);
|
|
2454
|
+
}
|
|
2455
|
+
}
|
|
2456
|
+
}
|
|
2457
|
+
const byRoot = /* @__PURE__ */ new Map();
|
|
2458
|
+
for (let i = 0; i < items.length; i++) {
|
|
2459
|
+
const root = find(i);
|
|
2460
|
+
const existing = byRoot.get(root);
|
|
2461
|
+
if (existing) existing.push(i);
|
|
2462
|
+
else byRoot.set(root, [i]);
|
|
2463
|
+
}
|
|
2464
|
+
return Array.from(byRoot.values()).map((indices) => indices.map((idx) => items[idx]));
|
|
2465
|
+
}
|
|
2466
|
+
function pickClusterRepresentative(cluster) {
|
|
2467
|
+
if (cluster.length === 0) throw new Error("pickClusterRepresentative: cluster is empty");
|
|
2468
|
+
let best = cluster[0];
|
|
2469
|
+
for (let i = 1; i < cluster.length; i++) {
|
|
2470
|
+
if (cluster[i].length < best.length) best = cluster[i];
|
|
2471
|
+
}
|
|
2472
|
+
return best;
|
|
2473
|
+
}
|
|
2474
|
+
|
|
2351
2475
|
// ../contracts/src/formatting.ts
|
|
2352
2476
|
function formatRatio(value) {
|
|
2353
2477
|
if (!Number.isFinite(value) || value === 0) return "0%";
|
|
@@ -2451,6 +2575,7 @@ export {
|
|
|
2451
2575
|
RunStatuses,
|
|
2452
2576
|
RunKinds,
|
|
2453
2577
|
RunTriggers,
|
|
2578
|
+
citationStateSchema,
|
|
2454
2579
|
CitationStates,
|
|
2455
2580
|
runTriggerRequestSchema,
|
|
2456
2581
|
parseRunError,
|
|
@@ -2508,6 +2633,13 @@ export {
|
|
|
2508
2633
|
trafficConnectWordpressRequestSchema,
|
|
2509
2634
|
trafficEventKindSchema,
|
|
2510
2635
|
TrafficEventKinds,
|
|
2636
|
+
discoveryBucketSchema,
|
|
2637
|
+
DiscoveryBuckets,
|
|
2638
|
+
DiscoverySessionStatuses,
|
|
2639
|
+
DISCOVERY_MAX_PROBES_CAP,
|
|
2640
|
+
discoveryRunRequestSchema,
|
|
2641
|
+
clusterByCosine,
|
|
2642
|
+
pickClusterRepresentative,
|
|
2511
2643
|
formatRatio,
|
|
2512
2644
|
formatNumber,
|
|
2513
2645
|
formatDate,
|
|
@@ -8,7 +8,7 @@ import {
|
|
|
8
8
|
categoryLabel,
|
|
9
9
|
determineAnswerMentioned,
|
|
10
10
|
normalizeProjectDomain
|
|
11
|
-
} from "./chunk-
|
|
11
|
+
} from "./chunk-CRQMGNPH.js";
|
|
12
12
|
|
|
13
13
|
// src/intelligence-service.ts
|
|
14
14
|
import { eq, desc, asc, and, or, inArray } from "drizzle-orm";
|
|
@@ -36,6 +36,8 @@ __export(schema_exports, {
|
|
|
36
36
|
ccReleaseSyncs: () => ccReleaseSyncs,
|
|
37
37
|
competitors: () => competitors,
|
|
38
38
|
crawlerEventsHourly: () => crawlerEventsHourly,
|
|
39
|
+
discoveryProbes: () => discoveryProbes,
|
|
40
|
+
discoverySessions: () => discoverySessions,
|
|
39
41
|
gaAiReferrals: () => gaAiReferrals,
|
|
40
42
|
gaConnections: () => gaConnections,
|
|
41
43
|
gaSocialReferrals: () => gaSocialReferrals,
|
|
@@ -59,7 +61,7 @@ __export(schema_exports, {
|
|
|
59
61
|
trafficSources: () => trafficSources,
|
|
60
62
|
usageCounters: () => usageCounters
|
|
61
63
|
});
|
|
62
|
-
import { index, integer, primaryKey, sqliteTable, text, uniqueIndex } from "drizzle-orm/sqlite-core";
|
|
64
|
+
import { index, integer, primaryKey, real, sqliteTable, text, uniqueIndex } from "drizzle-orm/sqlite-core";
|
|
63
65
|
var projects = sqliteTable("projects", {
|
|
64
66
|
id: text("id").primaryKey(),
|
|
65
67
|
name: text("name").notNull().unique(),
|
|
@@ -76,6 +78,7 @@ var projects = sqliteTable("projects", {
|
|
|
76
78
|
autoExtractBacklinks: integer("auto_extract_backlinks").notNull().default(0),
|
|
77
79
|
configSource: text("config_source").notNull().default("cli"),
|
|
78
80
|
configRevision: integer("config_revision").notNull().default(1),
|
|
81
|
+
icpDescription: text("icp_description"),
|
|
79
82
|
createdAt: text("created_at").notNull(),
|
|
80
83
|
updatedAt: text("updated_at").notNull()
|
|
81
84
|
});
|
|
@@ -83,6 +86,7 @@ var queries = sqliteTable("queries", {
|
|
|
83
86
|
id: text("id").primaryKey(),
|
|
84
87
|
projectId: text("project_id").notNull().references(() => projects.id, { onDelete: "cascade" }),
|
|
85
88
|
query: text("query").notNull(),
|
|
89
|
+
provenance: text("provenance"),
|
|
86
90
|
createdAt: text("created_at").notNull()
|
|
87
91
|
}, (table) => [
|
|
88
92
|
index("idx_queries_project").on(table.projectId),
|
|
@@ -92,6 +96,7 @@ var competitors = sqliteTable("competitors", {
|
|
|
92
96
|
id: text("id").primaryKey(),
|
|
93
97
|
projectId: text("project_id").notNull().references(() => projects.id, { onDelete: "cascade" }),
|
|
94
98
|
domain: text("domain").notNull(),
|
|
99
|
+
provenance: text("provenance"),
|
|
95
100
|
createdAt: text("created_at").notNull()
|
|
96
101
|
}, (table) => [
|
|
97
102
|
index("idx_competitors_project").on(table.projectId),
|
|
@@ -653,6 +658,43 @@ var rawEventSamples = sqliteTable("raw_event_samples", {
|
|
|
653
658
|
index("idx_raw_event_samples_source_ts").on(table.sourceId, table.ts),
|
|
654
659
|
index("idx_raw_event_samples_event_type").on(table.eventType)
|
|
655
660
|
]);
|
|
661
|
+
var discoverySessions = sqliteTable("discovery_sessions", {
|
|
662
|
+
id: text("id").primaryKey(),
|
|
663
|
+
projectId: text("project_id").notNull().references(() => projects.id, { onDelete: "cascade" }),
|
|
664
|
+
runId: text("run_id"),
|
|
665
|
+
status: text("status").notNull().default("queued"),
|
|
666
|
+
icpDescription: text("icp_description"),
|
|
667
|
+
seedProvider: text("seed_provider"),
|
|
668
|
+
seedCountRaw: integer("seed_count_raw"),
|
|
669
|
+
seedCount: integer("seed_count"),
|
|
670
|
+
dedupThreshold: real("dedup_threshold"),
|
|
671
|
+
probeCount: integer("probe_count"),
|
|
672
|
+
citedCount: integer("cited_count"),
|
|
673
|
+
aspirationalCount: integer("aspirational_count"),
|
|
674
|
+
wastedCount: integer("wasted_count"),
|
|
675
|
+
competitorMap: text("competitor_map").notNull().default("[]"),
|
|
676
|
+
error: text("error"),
|
|
677
|
+
startedAt: text("started_at"),
|
|
678
|
+
finishedAt: text("finished_at"),
|
|
679
|
+
createdAt: text("created_at").notNull()
|
|
680
|
+
}, (table) => [
|
|
681
|
+
index("idx_discovery_sessions_project_created").on(table.projectId, table.createdAt),
|
|
682
|
+
index("idx_discovery_sessions_run").on(table.runId)
|
|
683
|
+
]);
|
|
684
|
+
var discoveryProbes = sqliteTable("discovery_probes", {
|
|
685
|
+
id: text("id").primaryKey(),
|
|
686
|
+
sessionId: text("session_id").notNull().references(() => discoverySessions.id, { onDelete: "cascade" }),
|
|
687
|
+
projectId: text("project_id").notNull().references(() => projects.id, { onDelete: "cascade" }),
|
|
688
|
+
query: text("query").notNull(),
|
|
689
|
+
bucket: text("bucket"),
|
|
690
|
+
citationState: text("citation_state").notNull(),
|
|
691
|
+
citedDomains: text("cited_domains").notNull().default("[]"),
|
|
692
|
+
rawResponse: text("raw_response"),
|
|
693
|
+
createdAt: text("created_at").notNull()
|
|
694
|
+
}, (table) => [
|
|
695
|
+
index("idx_discovery_probes_session").on(table.sessionId),
|
|
696
|
+
index("idx_discovery_probes_project").on(table.projectId)
|
|
697
|
+
]);
|
|
656
698
|
var migrationsTable = sqliteTable("_migrations", {
|
|
657
699
|
version: integer("version").primaryKey(),
|
|
658
700
|
name: text("name").notNull(),
|
|
@@ -1681,6 +1723,75 @@ var MIGRATION_VERSIONS = [
|
|
|
1681
1723
|
statements: [
|
|
1682
1724
|
`DROP INDEX IF EXISTS idx_schedules_project`
|
|
1683
1725
|
]
|
|
1726
|
+
},
|
|
1727
|
+
{
|
|
1728
|
+
version: 55,
|
|
1729
|
+
name: "discovery-foundation",
|
|
1730
|
+
// Adds the three-ring discovery foundation: per-project ICP, query/competitor
|
|
1731
|
+
// provenance (so we can trace adopted basket entries back to a discovery
|
|
1732
|
+
// session), and the two tables that hold a discovery session's research
|
|
1733
|
+
// output. No UNIQUE(session_id, query) on discovery_probes — v2 will probe
|
|
1734
|
+
// the same query across multiple providers in the same session.
|
|
1735
|
+
//
|
|
1736
|
+
// `competitor_map` defaults to '[]' (JSON array) — see DTO
|
|
1737
|
+
// `discoveryCompetitorMapEntrySchema` for the entry shape `{domain, hits}`.
|
|
1738
|
+
// Backfill of `provenance='cli'` runs once: existing pre-v55 rows are
|
|
1739
|
+
// attributed to manual CLI entry so a future NULL distinctly means
|
|
1740
|
+
// "post-v55 row missing provenance" (a bug to catch in review).
|
|
1741
|
+
statements: [
|
|
1742
|
+
`ALTER TABLE projects ADD COLUMN icp_description TEXT`,
|
|
1743
|
+
`ALTER TABLE queries ADD COLUMN provenance TEXT`,
|
|
1744
|
+
`ALTER TABLE competitors ADD COLUMN provenance TEXT`,
|
|
1745
|
+
`UPDATE queries SET provenance = 'cli' WHERE provenance IS NULL`,
|
|
1746
|
+
`UPDATE competitors SET provenance = 'cli' WHERE provenance IS NULL`,
|
|
1747
|
+
`CREATE TABLE IF NOT EXISTS discovery_sessions (
|
|
1748
|
+
id TEXT PRIMARY KEY,
|
|
1749
|
+
project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
|
|
1750
|
+
status TEXT NOT NULL DEFAULT 'queued',
|
|
1751
|
+
icp_description TEXT,
|
|
1752
|
+
seed_provider TEXT,
|
|
1753
|
+
seed_count_raw INTEGER,
|
|
1754
|
+
seed_count INTEGER,
|
|
1755
|
+
dedup_threshold REAL,
|
|
1756
|
+
probe_count INTEGER,
|
|
1757
|
+
cited_count INTEGER,
|
|
1758
|
+
aspirational_count INTEGER,
|
|
1759
|
+
wasted_count INTEGER,
|
|
1760
|
+
competitor_map TEXT NOT NULL DEFAULT '[]',
|
|
1761
|
+
error TEXT,
|
|
1762
|
+
started_at TEXT,
|
|
1763
|
+
finished_at TEXT,
|
|
1764
|
+
created_at TEXT NOT NULL
|
|
1765
|
+
)`,
|
|
1766
|
+
// "Latest session per project" is the access pattern; SQLite walks the
|
|
1767
|
+
// composite index backwards for ORDER BY created_at DESC.
|
|
1768
|
+
`CREATE INDEX IF NOT EXISTS idx_discovery_sessions_project_created ON discovery_sessions(project_id, created_at)`,
|
|
1769
|
+
`CREATE TABLE IF NOT EXISTS discovery_probes (
|
|
1770
|
+
id TEXT PRIMARY KEY,
|
|
1771
|
+
session_id TEXT NOT NULL REFERENCES discovery_sessions(id) ON DELETE CASCADE,
|
|
1772
|
+
project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
|
|
1773
|
+
query TEXT NOT NULL,
|
|
1774
|
+
bucket TEXT,
|
|
1775
|
+
citation_state TEXT NOT NULL,
|
|
1776
|
+
cited_domains TEXT NOT NULL DEFAULT '[]',
|
|
1777
|
+
raw_response TEXT,
|
|
1778
|
+
created_at TEXT NOT NULL
|
|
1779
|
+
)`,
|
|
1780
|
+
`CREATE INDEX IF NOT EXISTS idx_discovery_probes_session ON discovery_probes(session_id)`,
|
|
1781
|
+
`CREATE INDEX IF NOT EXISTS idx_discovery_probes_project ON discovery_probes(project_id)`
|
|
1782
|
+
]
|
|
1783
|
+
},
|
|
1784
|
+
{
|
|
1785
|
+
version: 56,
|
|
1786
|
+
name: "discovery-sessions-run-id",
|
|
1787
|
+
// Links a discovery_sessions row back to the runs row that drove it. Without
|
|
1788
|
+
// this column the run-coordinator can't tell two concurrent discovery
|
|
1789
|
+
// sessions apart for the same project — it would fall back to "latest
|
|
1790
|
+
// non-queued session" and surface the wrong bucket counts to Aero.
|
|
1791
|
+
statements: [
|
|
1792
|
+
`ALTER TABLE discovery_sessions ADD COLUMN run_id TEXT`,
|
|
1793
|
+
`CREATE INDEX IF NOT EXISTS idx_discovery_sessions_run ON discovery_sessions(run_id)`
|
|
1794
|
+
]
|
|
1684
1795
|
}
|
|
1685
1796
|
];
|
|
1686
1797
|
function isDuplicateColumnError(err) {
|
|
@@ -3519,6 +3630,8 @@ export {
|
|
|
3519
3630
|
crawlerEventsHourly,
|
|
3520
3631
|
aiReferralEventsHourly,
|
|
3521
3632
|
rawEventSamples,
|
|
3633
|
+
discoverySessions,
|
|
3634
|
+
discoveryProbes,
|
|
3522
3635
|
createClient,
|
|
3523
3636
|
parseJsonColumn,
|
|
3524
3637
|
extractLegacyCredentials,
|