npm - @ainyc/canonry - Versions diffs - 4.24.1 → 4.25.0 - Mend

@ainyc/canonry 4.24.1 → 4.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/assets/agent-workspace/skills/aero/references/aeo-discovery.md +89 -0
package/assets/agent-workspace/skills/canonry-setup/references/canonry-cli.md +14 -0
package/assets/assets/{index-BzD9HUxc.js → index-C4scWriC.js} +81 -81
package/assets/index.html +1 -1
package/dist/{chunk-E5PZ23OS.js → chunk-6J6WQOGH.js} +905 -195
package/dist/{chunk-6EJ54OX7.js → chunk-A7HQ6X43.js} +91 -1
package/dist/{chunk-EUGCQSFC.js → chunk-CRQMGNPH.js} +133 -1
package/dist/{chunk-OYYFXKRK.js → chunk-IS65IYNZ.js} +115 -2
package/dist/cli.js +424 -123
package/dist/index.js +4 -4
package/dist/{intelligence-service-NVN2PAR7.js → intelligence-service-XLUYTE57.js} +2 -2
package/dist/mcp.js +9 -3
package/package.json +9 -9

package/dist/{chunk-6EJ54OX7.js → chunk-A7HQ6X43.js} RENAMED Viewed

@@ -1,7 +1,9 @@
 import {
   AGENT_MEMORY_KEY_MAX_LENGTH,
   AGENT_MEMORY_VALUE_MAX_BYTES,
+  DISCOVERY_MAX_PROBES_CAP,
   competitorBatchRequestSchema,
+  discoveryRunRequestSchema,
   keywordBatchRequestSchema,
   keywordGenerateRequestSchema,
   notificationCreateRequestSchema,
@@ -16,7 +18,7 @@ import {
   trafficConnectCloudRunRequestSchema,
   trafficConnectWordpressRequestSchema,
   trafficEventKindSchema
-} from "./chunk-EUGCQSFC.js";
+} from "./chunk-CRQMGNPH.js";
 // src/config.ts
 import fs from "fs";
@@ -819,6 +821,32 @@ var ApiClient = class {
       `/projects/${encodeURIComponent(project)}/traffic/events${qs}`
     );
   }
+  async triggerDiscoveryRun(project, body) {
+    return this.request(
+      "POST",
+      `/projects/${encodeURIComponent(project)}/discover/run`,
+      body ?? {}
+    );
+  }
+  async listDiscoverySessions(project, opts) {
+    const qs = opts?.limit ? `?limit=${encodeURIComponent(String(opts.limit))}` : "";
+    return this.request(
+      "GET",
+      `/projects/${encodeURIComponent(project)}/discover/sessions${qs}`
+    );
+  }
+  async getDiscoverySession(project, sessionId) {
+    return this.request(
+      "GET",
+      `/projects/${encodeURIComponent(project)}/discover/sessions/${encodeURIComponent(sessionId)}`
+    );
+  }
+  async previewDiscoveryPromote(project, sessionId) {
+    return this.request(
+      "GET",
+      `/projects/${encodeURIComponent(project)}/discover/sessions/${encodeURIComponent(sessionId)}/promote`
+    );
+  }
   async wordpressConnect(project, body) {
     return this.request("POST", `/projects/${encodeURIComponent(project)}/wordpress/connect`, body);
   }
@@ -1226,6 +1254,24 @@ var trafficSourceIdInputSchema = z2.object({
   project: projectNameSchema,
   sourceId: z2.string().min(1).describe("Traffic source ID.")
 });
+var discoveryRunInputSchema = z2.object({
+  project: projectNameSchema,
+  request: discoveryRunRequestSchema.extend({
+    // Stronger descriptions for the LLM. The base Zod schema enforces the
+    // upper bound; this just clarifies the meaning of each knob.
+    icpDescription: z2.string().min(1).optional().describe("Free-text ICP description. If omitted, the project must already have spec.icpDescription stored."),
+    dedupThreshold: z2.number().min(0).max(1).optional().describe("Cosine similarity threshold for clustering seed candidates. Defaults to 0.85. Lower values dedupe more aggressively."),
+    maxProbes: z2.number().int().positive().max(DISCOVERY_MAX_PROBES_CAP).optional().describe(`Max canonical queries to probe in this session. Default 100, hard cap ${DISCOVERY_MAX_PROBES_CAP}.`)
+  }).optional()
+});
+var discoverySessionsListInputSchema = z2.object({
+  project: projectNameSchema,
+  limit: z2.number().int().positive().max(200).optional().describe("Max sessions returned. Default 50.")
+});
+var discoverySessionIdInputSchema = z2.object({
+  project: projectNameSchema,
+  sessionId: z2.string().min(1).describe("Discovery session ID returned by canonry_discover_run_start.")
+});
 var AGENT_WEBHOOK_EVENTS = [
   notificationEventSchema.enum["run.completed"],
   notificationEventSchema.enum["insight.critical"],
@@ -2130,6 +2176,50 @@ var canonryMcpTools = [
       await client.deleteNotification(input.project, agentNotification.id);
       return { status: "detached", project: input.project };
     }
+  }),
+  defineTool({
+    name: "canonry_discover_run_start",
+    title: "Start discovery run",
+    description: 'Kick off a discovery session for a project: ICP \u2192 seed (Gemini grounded prompt) \u2192 embed + cluster + pick representative \u2192 probe each canonical \u2192 classify into cited / aspirational / wasted-surface \u2192 aggregate competitor map. Returns {runId, sessionId, status:"running"} immediately; the work runs in the background. Poll canonry_discover_session_get with the returned sessionId until status is "completed" or "failed". Costs roughly $1 / session at default budget; budget capped at 500 probes / session.',
+    access: "write",
+    tier: "discovery",
+    inputSchema: discoveryRunInputSchema,
+    annotations: writeAnnotations({ idempotentHint: false, openWorldHint: true }),
+    openApiOperations: ["POST /api/v1/projects/{name}/discover/run"],
+    handler: (client, input) => client.triggerDiscoveryRun(input.project, input.request)
+  }),
+  defineTool({
+    name: "canonry_discover_sessions_list",
+    title: "List discovery sessions",
+    description: "List recent discovery sessions for a project, newest first. Returns the session-level summary (status, seed counts, bucket counts, competitor map). Use canonry_discover_session_get to drill into per-query probe rows.",
+    access: "read",
+    tier: "discovery",
+    inputSchema: discoverySessionsListInputSchema,
+    annotations: readAnnotations(),
+    openApiOperations: ["GET /api/v1/projects/{name}/discover/sessions"],
+    handler: (client, input) => client.listDiscoverySessions(input.project, input.limit !== void 0 ? { limit: input.limit } : void 0)
+  }),
+  defineTool({
+    name: "canonry_discover_session_get",
+    title: "Get discovery session",
+    description: 'Get one discovery session with the full probe list (per-query bucket + cited domains). Use after canonry_discover_run_start to inspect what the discovery pipeline produced; this is the canonical read for "what did discovery find" before PR 2 lands `canonry discover promote`.',
+    access: "read",
+    tier: "discovery",
+    inputSchema: discoverySessionIdInputSchema,
+    annotations: readAnnotations(),
+    openApiOperations: ["GET /api/v1/projects/{name}/discover/sessions/{id}"],
+    handler: (client, input) => client.getDiscoverySession(input.project, input.sessionId)
+  }),
+  defineTool({
+    name: "canonry_discover_promote_preview",
+    title: "Preview discovery promotion",
+    description: "Read-only preview of what `canonry discover promote` (PR 2) would persist for a session: bucketed query lists and suggested new competitor domains (those not already in the project's tracked competitor list). v1 returns the preview only; use it to confirm a basket before PR 2 ships the merge step.",
+    access: "read",
+    tier: "discovery",
+    inputSchema: discoverySessionIdInputSchema,
+    annotations: readAnnotations(),
+    openApiOperations: ["GET /api/v1/projects/{name}/discover/sessions/{id}/promote"],
+    handler: (client, input) => client.previewDiscoveryPromote(input.project, input.sessionId)
   })
 ];
 var CANONRY_MCP_TOOL_COUNT = canonryMcpTools.length;

package/dist/{chunk-EUGCQSFC.js → chunk-CRQMGNPH.js} RENAMED Viewed

@@ -88,7 +88,9 @@ var runKindSchema = z2.enum([
   "bing-inspect",
   "bing-inspect-sitemap",
   "backlink-extract",
-  "traffic-sync"
+  "traffic-sync",
+  "aeo-discover-seed",
+  "aeo-discover-probe"
 ]);
 var RunKinds = runKindSchema.enum;
 var runTriggerSchema = z2.enum(["manual", "scheduled", "config-apply", "backfill"]);
@@ -2348,6 +2350,128 @@ var trafficEventsResponseSchema = z20.object({
   events: z20.array(trafficEventEntrySchema)
 });
+// ../contracts/src/discovery.ts
+import { z as z21 } from "zod";
+var discoveryBucketSchema = z21.enum(["cited", "aspirational", "wasted-surface"]);
+var DiscoveryBuckets = discoveryBucketSchema.enum;
+var discoverySessionStatusSchema = z21.enum(["queued", "seeding", "probing", "completed", "failed"]);
+var DiscoverySessionStatuses = discoverySessionStatusSchema.enum;
+var discoveryCompetitorMapEntrySchema = z21.object({
+  domain: z21.string().min(1),
+  hits: z21.number().int().positive()
+});
+var discoveryProbeDtoSchema = z21.object({
+  id: z21.string(),
+  sessionId: z21.string(),
+  projectId: z21.string(),
+  query: z21.string(),
+  bucket: discoveryBucketSchema.nullable().default(null),
+  citationState: citationStateSchema,
+  citedDomains: z21.array(z21.string()).default([]),
+  createdAt: z21.string()
+});
+var discoverySessionDtoSchema = z21.object({
+  id: z21.string(),
+  projectId: z21.string(),
+  status: discoverySessionStatusSchema,
+  icpDescription: z21.string().nullable().optional(),
+  seedProvider: z21.string().nullable().optional(),
+  seedCountRaw: z21.number().int().nullable().optional(),
+  seedCount: z21.number().int().nullable().optional(),
+  dedupThreshold: z21.number().nullable().optional(),
+  probeCount: z21.number().int().nullable().optional(),
+  citedCount: z21.number().int().nullable().default(null),
+  aspirationalCount: z21.number().int().nullable().default(null),
+  wastedCount: z21.number().int().nullable().default(null),
+  competitorMap: z21.array(discoveryCompetitorMapEntrySchema).default([]),
+  error: z21.string().nullable().optional(),
+  startedAt: z21.string().nullable().optional(),
+  finishedAt: z21.string().nullable().optional(),
+  createdAt: z21.string()
+});
+var discoverySessionDetailDtoSchema = discoverySessionDtoSchema.extend({
+  probes: z21.array(discoveryProbeDtoSchema).default([])
+});
+var DISCOVERY_MAX_PROBES_CAP = 500;
+var discoveryRunRequestSchema = z21.object({
+  icpDescription: z21.string().min(1).optional(),
+  dedupThreshold: z21.number().min(0).max(1).optional(),
+  maxProbes: z21.number().int().positive().max(DISCOVERY_MAX_PROBES_CAP).optional()
+});
+var queryProvenanceSchema = z21.union([
+  z21.literal("cli"),
+  z21.string().regex(/^discovery:.+$/)
+]);
+// ../contracts/src/embeddings.ts
+function cosineSimilarity(a, b) {
+  if (a.length === 0 || b.length === 0) {
+    throw new Error("cosineSimilarity: vectors must be non-empty");
+  }
+  if (a.length !== b.length) {
+    throw new Error(`cosineSimilarity: vector length mismatch (${a.length} vs ${b.length})`);
+  }
+  let dot = 0;
+  let magA = 0;
+  let magB = 0;
+  for (let i = 0; i < a.length; i++) {
+    dot += a[i] * b[i];
+    magA += a[i] * a[i];
+    magB += b[i] * b[i];
+  }
+  if (magA === 0 || magB === 0) return 0;
+  return dot / (Math.sqrt(magA) * Math.sqrt(magB));
+}
+function clusterByCosine(items, vectors, threshold) {
+  if (threshold < 0 || threshold > 1) {
+    throw new Error(`clusterByCosine: threshold must be in [0, 1], got ${threshold}`);
+  }
+  if (items.length !== vectors.length) {
+    throw new Error(`clusterByCosine: items/vectors length mismatch (${items.length} vs ${vectors.length})`);
+  }
+  if (items.length === 0) return [];
+  const parent = items.map((_, i) => i);
+  const find = (x) => {
+    let root = x;
+    while (parent[root] !== root) root = parent[root];
+    let cur = x;
+    while (parent[cur] !== root) {
+      const next = parent[cur];
+      parent[cur] = root;
+      cur = next;
+    }
+    return root;
+  };
+  const union = (a, b) => {
+    const ra = find(a);
+    const rb = find(b);
+    if (ra !== rb) parent[ra] = rb;
+  };
+  for (let i = 0; i < items.length; i++) {
+    for (let j = i + 1; j < items.length; j++) {
+      if (cosineSimilarity(vectors[i], vectors[j]) >= threshold) {
+        union(i, j);
+      }
+    }
+  }
+  const byRoot = /* @__PURE__ */ new Map();
+  for (let i = 0; i < items.length; i++) {
+    const root = find(i);
+    const existing = byRoot.get(root);
+    if (existing) existing.push(i);
+    else byRoot.set(root, [i]);
+  }
+  return Array.from(byRoot.values()).map((indices) => indices.map((idx) => items[idx]));
+}
+function pickClusterRepresentative(cluster) {
+  if (cluster.length === 0) throw new Error("pickClusterRepresentative: cluster is empty");
+  let best = cluster[0];
+  for (let i = 1; i < cluster.length; i++) {
+    if (cluster[i].length < best.length) best = cluster[i];
+  }
+  return best;
+}
 // ../contracts/src/formatting.ts
 function formatRatio(value) {
   if (!Number.isFinite(value) || value === 0) return "0%";
@@ -2451,6 +2575,7 @@ export {
   RunStatuses,
   RunKinds,
   RunTriggers,
+  citationStateSchema,
   CitationStates,
   runTriggerRequestSchema,
   parseRunError,
@@ -2508,6 +2633,13 @@ export {
   trafficConnectWordpressRequestSchema,
   trafficEventKindSchema,
   TrafficEventKinds,
+  discoveryBucketSchema,
+  DiscoveryBuckets,
+  DiscoverySessionStatuses,
+  DISCOVERY_MAX_PROBES_CAP,
+  discoveryRunRequestSchema,
+  clusterByCosine,
+  pickClusterRepresentative,
   formatRatio,
   formatNumber,
   formatDate,

package/dist/{chunk-OYYFXKRK.js → chunk-IS65IYNZ.js} RENAMED Viewed

@@ -8,7 +8,7 @@ import {
   categoryLabel,
   determineAnswerMentioned,
   normalizeProjectDomain
-} from "./chunk-EUGCQSFC.js";
+} from "./chunk-CRQMGNPH.js";
 // src/intelligence-service.ts
 import { eq, desc, asc, and, or, inArray } from "drizzle-orm";
@@ -36,6 +36,8 @@ __export(schema_exports, {
   ccReleaseSyncs: () => ccReleaseSyncs,
   competitors: () => competitors,
   crawlerEventsHourly: () => crawlerEventsHourly,
+  discoveryProbes: () => discoveryProbes,
+  discoverySessions: () => discoverySessions,
   gaAiReferrals: () => gaAiReferrals,
   gaConnections: () => gaConnections,
   gaSocialReferrals: () => gaSocialReferrals,
@@ -59,7 +61,7 @@ __export(schema_exports, {
   trafficSources: () => trafficSources,
   usageCounters: () => usageCounters
 });
-import { index, integer, primaryKey, sqliteTable, text, uniqueIndex } from "drizzle-orm/sqlite-core";
+import { index, integer, primaryKey, real, sqliteTable, text, uniqueIndex } from "drizzle-orm/sqlite-core";
 var projects = sqliteTable("projects", {
   id: text("id").primaryKey(),
   name: text("name").notNull().unique(),
@@ -76,6 +78,7 @@ var projects = sqliteTable("projects", {
   autoExtractBacklinks: integer("auto_extract_backlinks").notNull().default(0),
   configSource: text("config_source").notNull().default("cli"),
   configRevision: integer("config_revision").notNull().default(1),
+  icpDescription: text("icp_description"),
   createdAt: text("created_at").notNull(),
   updatedAt: text("updated_at").notNull()
 });
@@ -83,6 +86,7 @@ var queries = sqliteTable("queries", {
   id: text("id").primaryKey(),
   projectId: text("project_id").notNull().references(() => projects.id, { onDelete: "cascade" }),
   query: text("query").notNull(),
+  provenance: text("provenance"),
   createdAt: text("created_at").notNull()
 }, (table) => [
   index("idx_queries_project").on(table.projectId),
@@ -92,6 +96,7 @@ var competitors = sqliteTable("competitors", {
   id: text("id").primaryKey(),
   projectId: text("project_id").notNull().references(() => projects.id, { onDelete: "cascade" }),
   domain: text("domain").notNull(),
+  provenance: text("provenance"),
   createdAt: text("created_at").notNull()
 }, (table) => [
   index("idx_competitors_project").on(table.projectId),
@@ -653,6 +658,43 @@ var rawEventSamples = sqliteTable("raw_event_samples", {
   index("idx_raw_event_samples_source_ts").on(table.sourceId, table.ts),
   index("idx_raw_event_samples_event_type").on(table.eventType)
 ]);
+var discoverySessions = sqliteTable("discovery_sessions", {
+  id: text("id").primaryKey(),
+  projectId: text("project_id").notNull().references(() => projects.id, { onDelete: "cascade" }),
+  runId: text("run_id"),
+  status: text("status").notNull().default("queued"),
+  icpDescription: text("icp_description"),
+  seedProvider: text("seed_provider"),
+  seedCountRaw: integer("seed_count_raw"),
+  seedCount: integer("seed_count"),
+  dedupThreshold: real("dedup_threshold"),
+  probeCount: integer("probe_count"),
+  citedCount: integer("cited_count"),
+  aspirationalCount: integer("aspirational_count"),
+  wastedCount: integer("wasted_count"),
+  competitorMap: text("competitor_map").notNull().default("[]"),
+  error: text("error"),
+  startedAt: text("started_at"),
+  finishedAt: text("finished_at"),
+  createdAt: text("created_at").notNull()
+}, (table) => [
+  index("idx_discovery_sessions_project_created").on(table.projectId, table.createdAt),
+  index("idx_discovery_sessions_run").on(table.runId)
+]);
+var discoveryProbes = sqliteTable("discovery_probes", {
+  id: text("id").primaryKey(),
+  sessionId: text("session_id").notNull().references(() => discoverySessions.id, { onDelete: "cascade" }),
+  projectId: text("project_id").notNull().references(() => projects.id, { onDelete: "cascade" }),
+  query: text("query").notNull(),
+  bucket: text("bucket"),
+  citationState: text("citation_state").notNull(),
+  citedDomains: text("cited_domains").notNull().default("[]"),
+  rawResponse: text("raw_response"),
+  createdAt: text("created_at").notNull()
+}, (table) => [
+  index("idx_discovery_probes_session").on(table.sessionId),
+  index("idx_discovery_probes_project").on(table.projectId)
+]);
 var migrationsTable = sqliteTable("_migrations", {
   version: integer("version").primaryKey(),
   name: text("name").notNull(),
@@ -1681,6 +1723,75 @@ var MIGRATION_VERSIONS = [
     statements: [
       `DROP INDEX IF EXISTS idx_schedules_project`
     ]
+  },
+  {
+    version: 55,
+    name: "discovery-foundation",
+    // Adds the three-ring discovery foundation: per-project ICP, query/competitor
+    // provenance (so we can trace adopted basket entries back to a discovery
+    // session), and the two tables that hold a discovery session's research
+    // output. No UNIQUE(session_id, query) on discovery_probes — v2 will probe
+    // the same query across multiple providers in the same session.
+    //
+    // `competitor_map` defaults to '[]' (JSON array) — see DTO
+    // `discoveryCompetitorMapEntrySchema` for the entry shape `{domain, hits}`.
+    // Backfill of `provenance='cli'` runs once: existing pre-v55 rows are
+    // attributed to manual CLI entry so a future NULL distinctly means
+    // "post-v55 row missing provenance" (a bug to catch in review).
+    statements: [
+      `ALTER TABLE projects ADD COLUMN icp_description TEXT`,
+      `ALTER TABLE queries ADD COLUMN provenance TEXT`,
+      `ALTER TABLE competitors ADD COLUMN provenance TEXT`,
+      `UPDATE queries SET provenance = 'cli' WHERE provenance IS NULL`,
+      `UPDATE competitors SET provenance = 'cli' WHERE provenance IS NULL`,
+      `CREATE TABLE IF NOT EXISTS discovery_sessions (
+         id                  TEXT PRIMARY KEY,
+         project_id          TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
+         status              TEXT NOT NULL DEFAULT 'queued',
+         icp_description     TEXT,
+         seed_provider       TEXT,
+         seed_count_raw      INTEGER,
+         seed_count          INTEGER,
+         dedup_threshold     REAL,
+         probe_count         INTEGER,
+         cited_count         INTEGER,
+         aspirational_count  INTEGER,
+         wasted_count        INTEGER,
+         competitor_map      TEXT NOT NULL DEFAULT '[]',
+         error               TEXT,
+         started_at          TEXT,
+         finished_at         TEXT,
+         created_at          TEXT NOT NULL
+       )`,
+      // "Latest session per project" is the access pattern; SQLite walks the
+      // composite index backwards for ORDER BY created_at DESC.
+      `CREATE INDEX IF NOT EXISTS idx_discovery_sessions_project_created ON discovery_sessions(project_id, created_at)`,
+      `CREATE TABLE IF NOT EXISTS discovery_probes (
+         id              TEXT PRIMARY KEY,
+         session_id      TEXT NOT NULL REFERENCES discovery_sessions(id) ON DELETE CASCADE,
+         project_id      TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
+         query           TEXT NOT NULL,
+         bucket          TEXT,
+         citation_state  TEXT NOT NULL,
+         cited_domains   TEXT NOT NULL DEFAULT '[]',
+         raw_response    TEXT,
+         created_at      TEXT NOT NULL
+       )`,
+      `CREATE INDEX IF NOT EXISTS idx_discovery_probes_session ON discovery_probes(session_id)`,
+      `CREATE INDEX IF NOT EXISTS idx_discovery_probes_project ON discovery_probes(project_id)`
+    ]
+  },
+  {
+    version: 56,
+    name: "discovery-sessions-run-id",
+    // Links a discovery_sessions row back to the runs row that drove it. Without
+    // this column the run-coordinator can't tell two concurrent discovery
+    // sessions apart for the same project — it would fall back to "latest
+    // non-queued session" and surface the wrong bucket counts to Aero.
+    statements: [
+      `ALTER TABLE discovery_sessions ADD COLUMN run_id TEXT`,
+      `CREATE INDEX IF NOT EXISTS idx_discovery_sessions_run ON discovery_sessions(run_id)`
+    ]
   }
 ];
 function isDuplicateColumnError(err) {
@@ -3519,6 +3630,8 @@ export {
   crawlerEventsHourly,
   aiReferralEventsHourly,
   rawEventSamples,
+  discoverySessions,
+  discoveryProbes,
   createClient,
   parseJsonColumn,
   extractLegacyCredentials,