@ainyc/canonry 4.24.1 → 4.26.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -88,7 +88,9 @@ var runKindSchema = z2.enum([
88
88
  "bing-inspect",
89
89
  "bing-inspect-sitemap",
90
90
  "backlink-extract",
91
- "traffic-sync"
91
+ "traffic-sync",
92
+ "aeo-discover-seed",
93
+ "aeo-discover-probe"
92
94
  ]);
93
95
  var RunKinds = runKindSchema.enum;
94
96
  var runTriggerSchema = z2.enum(["manual", "scheduled", "config-apply", "backfill"]);
@@ -103,6 +105,7 @@ var runTriggerRequestSchema = z2.object({
103
105
  kind: z2.literal(RunKinds["answer-visibility"]).optional(),
104
106
  trigger: z2.literal(RunTriggers.manual).optional(),
105
107
  providers: z2.array(providerNameSchema).optional(),
108
+ queries: z2.array(z2.string().min(1)).min(1).optional(),
106
109
  location: z2.string().min(1).optional(),
107
110
  allLocations: z2.boolean().optional(),
108
111
  noLocation: z2.boolean().optional()
@@ -129,6 +132,7 @@ var runDtoSchema = z2.object({
129
132
  status: runStatusSchema,
130
133
  trigger: runTriggerSchema.default("manual"),
131
134
  location: z2.string().nullable().optional(),
135
+ queries: z2.array(z2.string()).nullable().optional(),
132
136
  startedAt: z2.string().nullable().optional(),
133
137
  finishedAt: z2.string().nullable().optional(),
134
138
  error: runErrorSchema.nullable().optional(),
@@ -2348,6 +2352,128 @@ var trafficEventsResponseSchema = z20.object({
2348
2352
  events: z20.array(trafficEventEntrySchema)
2349
2353
  });
2350
2354
 
2355
+ // ../contracts/src/discovery.ts
2356
+ import { z as z21 } from "zod";
2357
+ var discoveryBucketSchema = z21.enum(["cited", "aspirational", "wasted-surface"]);
2358
+ var DiscoveryBuckets = discoveryBucketSchema.enum;
2359
+ var discoverySessionStatusSchema = z21.enum(["queued", "seeding", "probing", "completed", "failed"]);
2360
+ var DiscoverySessionStatuses = discoverySessionStatusSchema.enum;
2361
+ var discoveryCompetitorMapEntrySchema = z21.object({
2362
+ domain: z21.string().min(1),
2363
+ hits: z21.number().int().positive()
2364
+ });
2365
+ var discoveryProbeDtoSchema = z21.object({
2366
+ id: z21.string(),
2367
+ sessionId: z21.string(),
2368
+ projectId: z21.string(),
2369
+ query: z21.string(),
2370
+ bucket: discoveryBucketSchema.nullable().default(null),
2371
+ citationState: citationStateSchema,
2372
+ citedDomains: z21.array(z21.string()).default([]),
2373
+ createdAt: z21.string()
2374
+ });
2375
+ var discoverySessionDtoSchema = z21.object({
2376
+ id: z21.string(),
2377
+ projectId: z21.string(),
2378
+ status: discoverySessionStatusSchema,
2379
+ icpDescription: z21.string().nullable().optional(),
2380
+ seedProvider: z21.string().nullable().optional(),
2381
+ seedCountRaw: z21.number().int().nullable().optional(),
2382
+ seedCount: z21.number().int().nullable().optional(),
2383
+ dedupThreshold: z21.number().nullable().optional(),
2384
+ probeCount: z21.number().int().nullable().optional(),
2385
+ citedCount: z21.number().int().nullable().default(null),
2386
+ aspirationalCount: z21.number().int().nullable().default(null),
2387
+ wastedCount: z21.number().int().nullable().default(null),
2388
+ competitorMap: z21.array(discoveryCompetitorMapEntrySchema).default([]),
2389
+ error: z21.string().nullable().optional(),
2390
+ startedAt: z21.string().nullable().optional(),
2391
+ finishedAt: z21.string().nullable().optional(),
2392
+ createdAt: z21.string()
2393
+ });
2394
+ var discoverySessionDetailDtoSchema = discoverySessionDtoSchema.extend({
2395
+ probes: z21.array(discoveryProbeDtoSchema).default([])
2396
+ });
2397
+ var DISCOVERY_MAX_PROBES_CAP = 500;
2398
+ var discoveryRunRequestSchema = z21.object({
2399
+ icpDescription: z21.string().min(1).optional(),
2400
+ dedupThreshold: z21.number().min(0).max(1).optional(),
2401
+ maxProbes: z21.number().int().positive().max(DISCOVERY_MAX_PROBES_CAP).optional()
2402
+ });
2403
+ var queryProvenanceSchema = z21.union([
2404
+ z21.literal("cli"),
2405
+ z21.string().regex(/^discovery:.+$/)
2406
+ ]);
2407
+
2408
+ // ../contracts/src/embeddings.ts
2409
+ function cosineSimilarity(a, b) {
2410
+ if (a.length === 0 || b.length === 0) {
2411
+ throw new Error("cosineSimilarity: vectors must be non-empty");
2412
+ }
2413
+ if (a.length !== b.length) {
2414
+ throw new Error(`cosineSimilarity: vector length mismatch (${a.length} vs ${b.length})`);
2415
+ }
2416
+ let dot = 0;
2417
+ let magA = 0;
2418
+ let magB = 0;
2419
+ for (let i = 0; i < a.length; i++) {
2420
+ dot += a[i] * b[i];
2421
+ magA += a[i] * a[i];
2422
+ magB += b[i] * b[i];
2423
+ }
2424
+ if (magA === 0 || magB === 0) return 0;
2425
+ return dot / (Math.sqrt(magA) * Math.sqrt(magB));
2426
+ }
2427
+ function clusterByCosine(items, vectors, threshold) {
2428
+ if (threshold < 0 || threshold > 1) {
2429
+ throw new Error(`clusterByCosine: threshold must be in [0, 1], got ${threshold}`);
2430
+ }
2431
+ if (items.length !== vectors.length) {
2432
+ throw new Error(`clusterByCosine: items/vectors length mismatch (${items.length} vs ${vectors.length})`);
2433
+ }
2434
+ if (items.length === 0) return [];
2435
+ const parent = items.map((_, i) => i);
2436
+ const find = (x) => {
2437
+ let root = x;
2438
+ while (parent[root] !== root) root = parent[root];
2439
+ let cur = x;
2440
+ while (parent[cur] !== root) {
2441
+ const next = parent[cur];
2442
+ parent[cur] = root;
2443
+ cur = next;
2444
+ }
2445
+ return root;
2446
+ };
2447
+ const union = (a, b) => {
2448
+ const ra = find(a);
2449
+ const rb = find(b);
2450
+ if (ra !== rb) parent[ra] = rb;
2451
+ };
2452
+ for (let i = 0; i < items.length; i++) {
2453
+ for (let j = i + 1; j < items.length; j++) {
2454
+ if (cosineSimilarity(vectors[i], vectors[j]) >= threshold) {
2455
+ union(i, j);
2456
+ }
2457
+ }
2458
+ }
2459
+ const byRoot = /* @__PURE__ */ new Map();
2460
+ for (let i = 0; i < items.length; i++) {
2461
+ const root = find(i);
2462
+ const existing = byRoot.get(root);
2463
+ if (existing) existing.push(i);
2464
+ else byRoot.set(root, [i]);
2465
+ }
2466
+ return Array.from(byRoot.values()).map((indices) => indices.map((idx) => items[idx]));
2467
+ }
2468
+ function pickClusterRepresentative(cluster) {
2469
+ if (cluster.length === 0) throw new Error("pickClusterRepresentative: cluster is empty");
2470
+ let best = cluster[0];
2471
+ for (let i = 1; i < cluster.length; i++) {
2472
+ if (cluster[i].length < best.length) best = cluster[i];
2473
+ }
2474
+ return best;
2475
+ }
2476
+
2351
2477
  // ../contracts/src/formatting.ts
2352
2478
  function formatRatio(value) {
2353
2479
  if (!Number.isFinite(value) || value === 0) return "0%";
@@ -2451,6 +2577,7 @@ export {
2451
2577
  RunStatuses,
2452
2578
  RunKinds,
2453
2579
  RunTriggers,
2580
+ citationStateSchema,
2454
2581
  CitationStates,
2455
2582
  runTriggerRequestSchema,
2456
2583
  parseRunError,
@@ -2508,6 +2635,13 @@ export {
2508
2635
  trafficConnectWordpressRequestSchema,
2509
2636
  trafficEventKindSchema,
2510
2637
  TrafficEventKinds,
2638
+ discoveryBucketSchema,
2639
+ DiscoveryBuckets,
2640
+ DiscoverySessionStatuses,
2641
+ DISCOVERY_MAX_PROBES_CAP,
2642
+ discoveryRunRequestSchema,
2643
+ clusterByCosine,
2644
+ pickClusterRepresentative,
2511
2645
  formatRatio,
2512
2646
  formatNumber,
2513
2647
  formatDate,
@@ -8,7 +8,7 @@ import {
8
8
  categoryLabel,
9
9
  determineAnswerMentioned,
10
10
  normalizeProjectDomain
11
- } from "./chunk-EUGCQSFC.js";
11
+ } from "./chunk-HVW665A4.js";
12
12
 
13
13
  // src/intelligence-service.ts
14
14
  import { eq, desc, asc, and, or, inArray } from "drizzle-orm";
@@ -36,6 +36,8 @@ __export(schema_exports, {
36
36
  ccReleaseSyncs: () => ccReleaseSyncs,
37
37
  competitors: () => competitors,
38
38
  crawlerEventsHourly: () => crawlerEventsHourly,
39
+ discoveryProbes: () => discoveryProbes,
40
+ discoverySessions: () => discoverySessions,
39
41
  gaAiReferrals: () => gaAiReferrals,
40
42
  gaConnections: () => gaConnections,
41
43
  gaSocialReferrals: () => gaSocialReferrals,
@@ -59,7 +61,7 @@ __export(schema_exports, {
59
61
  trafficSources: () => trafficSources,
60
62
  usageCounters: () => usageCounters
61
63
  });
62
- import { index, integer, primaryKey, sqliteTable, text, uniqueIndex } from "drizzle-orm/sqlite-core";
64
+ import { index, integer, primaryKey, real, sqliteTable, text, uniqueIndex } from "drizzle-orm/sqlite-core";
63
65
  var projects = sqliteTable("projects", {
64
66
  id: text("id").primaryKey(),
65
67
  name: text("name").notNull().unique(),
@@ -76,6 +78,7 @@ var projects = sqliteTable("projects", {
76
78
  autoExtractBacklinks: integer("auto_extract_backlinks").notNull().default(0),
77
79
  configSource: text("config_source").notNull().default("cli"),
78
80
  configRevision: integer("config_revision").notNull().default(1),
81
+ icpDescription: text("icp_description"),
79
82
  createdAt: text("created_at").notNull(),
80
83
  updatedAt: text("updated_at").notNull()
81
84
  });
@@ -83,6 +86,7 @@ var queries = sqliteTable("queries", {
83
86
  id: text("id").primaryKey(),
84
87
  projectId: text("project_id").notNull().references(() => projects.id, { onDelete: "cascade" }),
85
88
  query: text("query").notNull(),
89
+ provenance: text("provenance"),
86
90
  createdAt: text("created_at").notNull()
87
91
  }, (table) => [
88
92
  index("idx_queries_project").on(table.projectId),
@@ -92,6 +96,7 @@ var competitors = sqliteTable("competitors", {
92
96
  id: text("id").primaryKey(),
93
97
  projectId: text("project_id").notNull().references(() => projects.id, { onDelete: "cascade" }),
94
98
  domain: text("domain").notNull(),
99
+ provenance: text("provenance"),
95
100
  createdAt: text("created_at").notNull()
96
101
  }, (table) => [
97
102
  index("idx_competitors_project").on(table.projectId),
@@ -104,6 +109,7 @@ var runs = sqliteTable("runs", {
104
109
  status: text("status").notNull().default("queued"),
105
110
  trigger: text("trigger").notNull().default("manual"),
106
111
  location: text("location"),
112
+ queries: text("queries"),
107
113
  sourceId: text("source_id"),
108
114
  startedAt: text("started_at"),
109
115
  finishedAt: text("finished_at"),
@@ -653,6 +659,43 @@ var rawEventSamples = sqliteTable("raw_event_samples", {
653
659
  index("idx_raw_event_samples_source_ts").on(table.sourceId, table.ts),
654
660
  index("idx_raw_event_samples_event_type").on(table.eventType)
655
661
  ]);
662
+ var discoverySessions = sqliteTable("discovery_sessions", {
663
+ id: text("id").primaryKey(),
664
+ projectId: text("project_id").notNull().references(() => projects.id, { onDelete: "cascade" }),
665
+ runId: text("run_id"),
666
+ status: text("status").notNull().default("queued"),
667
+ icpDescription: text("icp_description"),
668
+ seedProvider: text("seed_provider"),
669
+ seedCountRaw: integer("seed_count_raw"),
670
+ seedCount: integer("seed_count"),
671
+ dedupThreshold: real("dedup_threshold"),
672
+ probeCount: integer("probe_count"),
673
+ citedCount: integer("cited_count"),
674
+ aspirationalCount: integer("aspirational_count"),
675
+ wastedCount: integer("wasted_count"),
676
+ competitorMap: text("competitor_map").notNull().default("[]"),
677
+ error: text("error"),
678
+ startedAt: text("started_at"),
679
+ finishedAt: text("finished_at"),
680
+ createdAt: text("created_at").notNull()
681
+ }, (table) => [
682
+ index("idx_discovery_sessions_project_created").on(table.projectId, table.createdAt),
683
+ index("idx_discovery_sessions_run").on(table.runId)
684
+ ]);
685
+ var discoveryProbes = sqliteTable("discovery_probes", {
686
+ id: text("id").primaryKey(),
687
+ sessionId: text("session_id").notNull().references(() => discoverySessions.id, { onDelete: "cascade" }),
688
+ projectId: text("project_id").notNull().references(() => projects.id, { onDelete: "cascade" }),
689
+ query: text("query").notNull(),
690
+ bucket: text("bucket"),
691
+ citationState: text("citation_state").notNull(),
692
+ citedDomains: text("cited_domains").notNull().default("[]"),
693
+ rawResponse: text("raw_response"),
694
+ createdAt: text("created_at").notNull()
695
+ }, (table) => [
696
+ index("idx_discovery_probes_session").on(table.sessionId),
697
+ index("idx_discovery_probes_project").on(table.projectId)
698
+ ]);
656
699
  var migrationsTable = sqliteTable("_migrations", {
657
700
  version: integer("version").primaryKey(),
658
701
  name: text("name").notNull(),
@@ -1681,6 +1724,86 @@ var MIGRATION_VERSIONS = [
1681
1724
  statements: [
1682
1725
  `DROP INDEX IF EXISTS idx_schedules_project`
1683
1726
  ]
1727
+ },
1728
+ {
1729
+ version: 55,
1730
+ name: "discovery-foundation",
1731
+ // Adds the three-ring discovery foundation: per-project ICP, query/competitor
1732
+ // provenance (so we can trace adopted basket entries back to a discovery
1733
+ // session), and the two tables that hold a discovery session's research
1734
+ // output. No UNIQUE(session_id, query) on discovery_probes — v2 will probe
1735
+ // the same query across multiple providers in the same session.
1736
+ //
1737
+ // `competitor_map` defaults to '[]' (JSON array) — see DTO
1738
+ // `discoveryCompetitorMapEntrySchema` for the entry shape `{domain, hits}`.
1739
+ // Backfill of `provenance='cli'` runs once: existing pre-v55 rows are
1740
+ // attributed to manual CLI entry so a future NULL distinctly means
1741
+ // "post-v55 row missing provenance" (a bug to catch in review).
1742
+ statements: [
1743
+ `ALTER TABLE projects ADD COLUMN icp_description TEXT`,
1744
+ `ALTER TABLE queries ADD COLUMN provenance TEXT`,
1745
+ `ALTER TABLE competitors ADD COLUMN provenance TEXT`,
1746
+ `UPDATE queries SET provenance = 'cli' WHERE provenance IS NULL`,
1747
+ `UPDATE competitors SET provenance = 'cli' WHERE provenance IS NULL`,
1748
+ `CREATE TABLE IF NOT EXISTS discovery_sessions (
1749
+ id TEXT PRIMARY KEY,
1750
+ project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
1751
+ status TEXT NOT NULL DEFAULT 'queued',
1752
+ icp_description TEXT,
1753
+ seed_provider TEXT,
1754
+ seed_count_raw INTEGER,
1755
+ seed_count INTEGER,
1756
+ dedup_threshold REAL,
1757
+ probe_count INTEGER,
1758
+ cited_count INTEGER,
1759
+ aspirational_count INTEGER,
1760
+ wasted_count INTEGER,
1761
+ competitor_map TEXT NOT NULL DEFAULT '[]',
1762
+ error TEXT,
1763
+ started_at TEXT,
1764
+ finished_at TEXT,
1765
+ created_at TEXT NOT NULL
1766
+ )`,
1767
+ // "Latest session per project" is the access pattern; SQLite walks the
1768
+ // composite index backwards for ORDER BY created_at DESC.
1769
+ `CREATE INDEX IF NOT EXISTS idx_discovery_sessions_project_created ON discovery_sessions(project_id, created_at)`,
1770
+ `CREATE TABLE IF NOT EXISTS discovery_probes (
1771
+ id TEXT PRIMARY KEY,
1772
+ session_id TEXT NOT NULL REFERENCES discovery_sessions(id) ON DELETE CASCADE,
1773
+ project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
1774
+ query TEXT NOT NULL,
1775
+ bucket TEXT,
1776
+ citation_state TEXT NOT NULL,
1777
+ cited_domains TEXT NOT NULL DEFAULT '[]',
1778
+ raw_response TEXT,
1779
+ created_at TEXT NOT NULL
1780
+ )`,
1781
+ `CREATE INDEX IF NOT EXISTS idx_discovery_probes_session ON discovery_probes(session_id)`,
1782
+ `CREATE INDEX IF NOT EXISTS idx_discovery_probes_project ON discovery_probes(project_id)`
1783
+ ]
1784
+ },
1785
+ {
1786
+ version: 56,
1787
+ name: "discovery-sessions-run-id",
1788
+ // Links a discovery_sessions row back to the runs row that drove it. Without
1789
+ // this column the run-coordinator can't tell two concurrent discovery
1790
+ // sessions apart for the same project — it would fall back to "latest
1791
+ // non-queued session" and surface the wrong bucket counts to Aero.
1792
+ statements: [
1793
+ `ALTER TABLE discovery_sessions ADD COLUMN run_id TEXT`,
1794
+ `CREATE INDEX IF NOT EXISTS idx_discovery_sessions_run ON discovery_sessions(run_id)`
1795
+ ]
1796
+ },
1797
+ {
1798
+ version: 57,
1799
+ name: "runs-scoped-queries",
1800
+ // Persists an optional subset of tracked queries to sweep on a per-run
1801
+ // basis. NULL = full sweep (the default and only behavior pre-v57); a JSON
1802
+ // array of query strings = scope. The job runner reads this to filter the
1803
+ // query fetch via `inArray`.
1804
+ statements: [
1805
+ `ALTER TABLE runs ADD COLUMN queries TEXT`
1806
+ ]
1684
1807
  }
1685
1808
  ];
1686
1809
  function isDuplicateColumnError(err) {
@@ -3519,6 +3642,8 @@ export {
3519
3642
  crawlerEventsHourly,
3520
3643
  aiReferralEventsHourly,
3521
3644
  rawEventSamples,
3645
+ discoverySessions,
3646
+ discoveryProbes,
3522
3647
  createClient,
3523
3648
  parseJsonColumn,
3524
3649
  extractLegacyCredentials,