npm - @ainyc/canonry - Versions diffs - 4.85.0 → 4.87.0 - Mend

@ainyc/canonry 4.85.0 → 4.87.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

package/dist/{chunk-3K3QRSYE.js → chunk-5LW7CJAO.js} RENAMED Viewed

@@ -46,8 +46,10 @@ import {
   adsSummaryDtoSchema,
   adsSyncResponseSchema,
   agentProvidersResponseDtoSchema,
+  aggregateHarvestedQueries,
   apiKeyDtoSchema,
   apiKeyListDtoSchema,
+  applyHarvestSemanticNovelty,
   auditLogEntrySchema,
   authInvalid,
   authRequired,
@@ -70,6 +72,7 @@ import {
   brandKeyFromText,
   brandLabelFromDomain,
   brandMetricsDtoSchema,
+  buildHarvestAnchorTerms,
   categorizeSource,
   categorizeSourceWithCompetitors,
   categoryLabel,
@@ -104,6 +107,7 @@ import {
   deriveWinnabilityClass,
   determineAnswerMentioned,
   discoveryBucketSchema,
+  discoveryHarvestDtoSchema,
   discoveryPromotePreviewSchema,
   discoveryPromoteRequestSchema,
   discoveryPromoteResultSchema,
@@ -130,6 +134,7 @@ import {
   ga4SocialReferralHistoryEntrySchema,
   ga4StatusDtoSchema,
   ga4SyncResponseDtoSchema,
+  gateHarvestedSearchQueries,
   gbpAccountListResponseSchema,
   gbpDailyMetricListResponseSchema,
   gbpDiscoverRequestSchema,
@@ -165,6 +170,7 @@ import {
   missingDependency,
   normalizeProjectAliases,
   normalizeProjectDomain,
+  normalizeQueryText,
   normalizeUrlPath,
   notFound,
   notImplemented,
@@ -175,6 +181,7 @@ import {
   pickClusterRepresentative,
   projectConfigSchema,
   projectDtoSchema,
+  projectOverviewDtoSchema,
   projectReportDtoSchema,
   projectUpsertRequestSchema,
   providerError,
@@ -248,7 +255,7 @@ import {
   wordpressSchemaDeployResultDtoSchema,
   wordpressSchemaStatusResultDtoSchema,
   wordpressStatusDtoSchema
-} from "./chunk-I2BJC3DT.js";
+} from "./chunk-MDRDX5R2.js";
 // src/intelligence-service.ts
 import { eq as eq37, desc as desc18, asc as asc5, and as and27, ne as ne5, or as or5, inArray as inArray14, gte as gte7, lte as lte4 } from "drizzle-orm";
@@ -4527,35 +4534,101 @@ function buildAiSourceOrigin(snapshots, projectDomains, competitorDomains, topDo
 // ../intelligence/src/movement-summary.ts
 function buildMovementSummary(currentSnapshots, previousSnapshots, options = {}) {
+  return buildSignalMovementSummary(
+    currentSnapshots,
+    previousSnapshots,
+    (snapshot) => snapshot.citationState === CitationStates.cited,
+    options
+  );
+}
+function buildCitationMovementSummary(currentSnapshots, previousSnapshots, options = {}) {
+  return buildMovementSummary(currentSnapshots, previousSnapshots, options);
+}
+function buildMentionMovementSummary(currentSnapshots, previousSnapshots, options = {}) {
+  return buildSignalMovementSummary(
+    currentSnapshots,
+    previousSnapshots,
+    (snapshot) => snapshot.answerMentioned === true,
+    options
+  );
+}
+function buildMovementComparison(currentSnapshots, previousSnapshots, options = {}) {
+  const currentIds = collectQueryIds(currentSnapshots);
+  const previousIds = collectQueryIds(previousSnapshots);
+  const hasPreviousRun = previousSnapshots.length > 0;
+  if (!hasPreviousRun) {
+    return {
+      hasPreviousRun: false,
+      comparable: false,
+      querySetChanged: false,
+      previousRunAt: null,
+      currentQueryCount: currentIds.size,
+      previousQueryCount: 0,
+      comparableQueryCount: 0,
+      addedQueryCount: 0,
+      removedQueryCount: 0,
+      addedQueries: [],
+      removedQueries: []
+    };
+  }
+  const comparableIds = intersection(currentIds, previousIds);
+  const addedIds = difference(currentIds, previousIds);
+  const removedIds = difference(previousIds, currentIds);
+  const querySetChanged = addedIds.size > 0 || removedIds.size > 0;
+  return {
+    hasPreviousRun: true,
+    comparable: !querySetChanged && currentIds.size > 0,
+    querySetChanged,
+    previousRunAt: options.previousRunAt ?? null,
+    currentQueryCount: currentIds.size,
+    previousQueryCount: previousIds.size,
+    comparableQueryCount: comparableIds.size,
+    addedQueryCount: addedIds.size,
+    removedQueryCount: removedIds.size,
+    addedQueries: resolveQueryTexts(addedIds, options.queryLookup),
+    removedQueries: resolveQueryTexts(removedIds, options.queryLookup)
+  };
+}
+function buildSignalMovementSummary(currentSnapshots, previousSnapshots, isActive, options) {
   if (previousSnapshots.length === 0) {
-    const citedIds = collectCitedQueryIds(currentSnapshots);
-    const citedCount = citedIds.size;
-    const tone2 = citedCount > 0 ? "positive" : "neutral";
+    const activeIds = collectActiveQueryIds(currentSnapshots, isActive);
     return withQueryLists(
-      { gained: citedCount, lost: 0, tone: tone2, hasPreviousRun: false },
-      citedIds,
+      {
+        gained: activeIds.size,
+        lost: 0,
+        tone: activeIds.size > 0 ? "positive" : "neutral",
+        hasPreviousRun: false
+      },
+      activeIds,
       /* @__PURE__ */ new Set(),
       options.queryLookup
     );
   }
-  const latestCited = collectCitedQueryIds(currentSnapshots);
-  const previousCited = collectCitedQueryIds(previousSnapshots);
-  const gainedIds = /* @__PURE__ */ new Set();
-  const lostIds = /* @__PURE__ */ new Set();
-  for (const id of latestCited) {
-    if (!previousCited.has(id)) gainedIds.add(id);
-  }
-  for (const id of previousCited) {
-    if (!latestCited.has(id)) lostIds.add(id);
-  }
-  const tone = lostIds.size > gainedIds.size ? "negative" : gainedIds.size > lostIds.size ? "positive" : "neutral";
+  const comparableIds = intersection(
+    collectQueryIds(currentSnapshots),
+    collectQueryIds(previousSnapshots)
+  );
+  const currentActive = intersection(collectActiveQueryIds(currentSnapshots, isActive), comparableIds);
+  const previousActive = intersection(collectActiveQueryIds(previousSnapshots, isActive), comparableIds);
+  const gainedIds = difference(currentActive, previousActive);
+  const lostIds = difference(previousActive, currentActive);
   return withQueryLists(
-    { gained: gainedIds.size, lost: lostIds.size, tone, hasPreviousRun: true },
+    {
+      gained: gainedIds.size,
+      lost: lostIds.size,
+      tone: movementTone(gainedIds.size, lostIds.size),
+      hasPreviousRun: true
+    },
     gainedIds,
     lostIds,
     options.queryLookup
   );
 }
+function movementTone(gained, lost) {
+  if (lost > gained) return "negative";
+  if (gained > lost) return "positive";
+  return "neutral";
+}
 function withQueryLists(base, gainedIds, lostIds, lookup) {
   if (!lookup) return base;
   return {
@@ -4565,6 +4638,7 @@ function withQueryLists(base, gainedIds, lostIds, lookup) {
   };
 }
 function resolveQueryTexts(ids, lookup) {
+  if (!lookup) return [];
   const out = [];
   for (const id of ids) {
     const text2 = lookup.get(id);
@@ -4572,12 +4646,33 @@ function resolveQueryTexts(ids, lookup) {
   }
   return out.sort();
 }
-function collectCitedQueryIds(snapshots) {
-  const cited = /* @__PURE__ */ new Set();
-  for (const s of snapshots) {
-    if (s.citationState === CitationStates.cited && s.queryId) cited.add(s.queryId);
+function collectQueryIds(snapshots) {
+  const ids = /* @__PURE__ */ new Set();
+  for (const snapshot of snapshots) {
+    if (snapshot.queryId) ids.add(snapshot.queryId);
   }
-  return cited;
+  return ids;
+}
+function collectActiveQueryIds(snapshots, isActive) {
+  const active = /* @__PURE__ */ new Set();
+  for (const snapshot of snapshots) {
+    if (snapshot.queryId && isActive(snapshot)) active.add(snapshot.queryId);
+  }
+  return active;
+}
+function intersection(left, right) {
+  const out = /* @__PURE__ */ new Set();
+  for (const value of left) {
+    if (right.has(value)) out.add(value);
+  }
+  return out;
+}
+function difference(left, right) {
+  const out = /* @__PURE__ */ new Set();
+  for (const value of left) {
+    if (!right.has(value)) out.add(value);
+  }
+  return out;
 }
 // ../intelligence/src/score-tones.ts
@@ -5022,12 +5117,12 @@ var DEFAULT_LIMIT = 10;
 function buildSuggestedQueries(gscRows, options) {
   const minImpressions = options.minImpressions ?? DEFAULT_MIN_IMPRESSIONS;
   const limit = options.limit ?? DEFAULT_LIMIT;
-  const trackedSet = new Set(options.trackedQueries.map(normalizeQuery));
+  const trackedSet = new Set(options.trackedQueries.map(normalizeQueryText));
   let skippedAlreadyTracked = 0;
   const candidates = [];
   for (const row of gscRows) {
     if (row.impressions < minImpressions) continue;
-    const normalized = normalizeQuery(row.query);
+    const normalized = normalizeQueryText(row.query);
     if (normalized.length === 0) continue;
     if (trackedSet.has(normalized)) {
       skippedAlreadyTracked++;
@@ -5049,9 +5144,6 @@ function buildSuggestedQueries(gscRows, options) {
     skippedAlreadyTracked
   };
 }
-function normalizeQuery(value) {
-  return value.trim().toLowerCase();
-}
 function buildReason(row) {
   const impressionsLabel = formatImpressions2(row.impressions);
   if (row.avgPosition <= 10) {
@@ -12919,18 +13011,31 @@ async function compositeRoutes(app) {
     const snapshotRunIds = new Set(sparklineRunIds);
     for (const run of latestVisRunGroup) snapshotRunIds.add(run.id);
     for (const run of previousVisRunGroup) snapshotRunIds.add(run.id);
-    const snapshotsByRun = loadSnapshotsByRunIds(app, [...snapshotRunIds]);
+    const projectQueries = app.db.select({ id: queries.id, query: queries.query }).from(queries).where(eq17(queries.projectId, project.id)).all();
+    const queryIdByText = new Map(projectQueries.map((q) => [normalizeQueryText(q.query), q.id]));
+    const snapshotsByRun = loadSnapshotsByRunIds(app, [...snapshotRunIds], queryIdByText);
     const latestSnapshots = latestVisRunGroup.flatMap((r) => snapshotsByRun.get(r.id) ?? []);
     const previousSnapshots = previousVisRunGroup.flatMap((r) => snapshotsByRun.get(r.id) ?? []);
-    const { queryCounts, providers } = summarizeFromSnapshots(latestSnapshots);
+    const trackedLatest = latestSnapshots.filter((s) => !s.archived);
+    const trackedPrevious = previousSnapshots.filter((s) => !s.archived);
+    const trackedSnapshotsByRun = new Map(
+      [...snapshotsByRun].map(([runId, snaps]) => [runId, snaps.filter((s) => !s.archived)])
+    );
+    const { queryCounts, providers } = summarizeFromSnapshots(trackedLatest);
     const transitions = summarizeTransitionsFromSnapshots(
-      latestSnapshots,
-      previousSnapshots,
+      trackedLatest,
+      trackedPrevious,
       previousVisibilityRun?.createdAt ?? null
     );
     const competitorRows = app.db.select().from(competitors).where(eq17(competitors.projectId, project.id)).all();
-    const projectQueries = app.db.select({ id: queries.id, query: queries.query }).from(queries).where(eq17(queries.projectId, project.id)).all();
     const queryLookup = { byId: new Map(projectQueries.map((q) => [q.id, q.query])) };
+    for (const snapshots of snapshotsByRun.values()) {
+      for (const snapshot of snapshots) {
+        if (snapshot.queryText && !queryLookup.byId.has(snapshot.queryId)) {
+          queryLookup.byId.set(snapshot.queryId, snapshot.queryText);
+        }
+      }
+    }
     const configuredApiProviders = project.providers.filter((p) => !p.startsWith("cdp:"));
     const mentionShareCompetitors = competitorRows.map((c) => ({
       domain: c.domain,
@@ -12940,32 +13045,39 @@ async function compositeRoutes(app) {
       brandTokens: [brandLabelFromDomain(c.domain)].filter((t) => t.length >= 3)
     }));
     const scores = {
-      mention: buildMentionCoverage(latestSnapshots, { configuredApiProviders }),
-      visibility: buildVisibilityScore(latestSnapshots, { configuredApiProviders }),
+      mention: buildMentionCoverage(trackedLatest, { configuredApiProviders }),
+      visibility: buildVisibilityScore(trackedLatest, { configuredApiProviders }),
       mentionShare: buildMentionShare(
-        latestSnapshots.map((s) => ({
+        trackedLatest.map((s) => ({
           projectMentioned: s.answerMentioned === true,
           answerText: s.answerText
         })),
         { competitors: mentionShareCompetitors }
       ),
-      gapQueries: buildGapQueryScore(latestSnapshots),
-      mentionGaps: buildMentionGapScore(latestSnapshots),
+      gapQueries: buildGapQueryScore(trackedLatest),
+      mentionGaps: buildMentionGapScore(trackedLatest),
       indexCoverage: buildIndexCoverageScore(app, project.id),
       competitorPressure: buildCompetitorPressureScore(
-        latestSnapshots,
+        trackedLatest,
         competitorRows.map((c) => c.domain),
         competitorRows.length
       ),
       runStatus: buildRunStatusScore(allRuns)
     };
-    const movementSummary = buildMovementSummary(latestSnapshots, previousSnapshots, {
+    const citationMovement = buildCitationMovementSummary(latestSnapshots, previousSnapshots, {
       queryLookup: queryLookup.byId
     });
-    const providerScoresBase = buildProviderScores(latestSnapshots);
+    const mentionMovement = buildMentionMovementSummary(latestSnapshots, previousSnapshots, {
+      queryLookup: queryLookup.byId
+    });
+    const movementComparison = buildMovementComparison(latestSnapshots, previousSnapshots, {
+      queryLookup: queryLookup.byId,
+      previousRunAt: previousVisibilityRun?.createdAt ?? null
+    });
+    const providerScoresBase = buildProviderScores(trackedLatest);
     const providerTrends = buildProviderTrends(
       visibilityRuns.slice(0, DEFAULT_RUN_HISTORY_LIMIT).map((r) => ({ id: r.id, createdAt: r.createdAt })),
-      snapshotsByRun,
+      trackedSnapshotsByRun,
       DEFAULT_RUN_HISTORY_LIMIT
     );
     const providerScores = providerScoresBase.map((score) => {
@@ -12973,13 +13085,13 @@ async function compositeRoutes(app) {
       return trend.length > 1 ? { ...score, trend: trend.map((p) => p.rate) } : score;
     });
     const overviewCompetitors = buildOverviewCompetitors(
-      latestSnapshots,
+      trackedLatest,
       competitorRows.map((c) => ({ id: c.id, domain: c.domain })),
       queryLookup
     );
     const attentionItems = buildAttentionItems(insightRows, allRuns);
     const sparklineRuns = visibilityRuns.slice(0, DEFAULT_RUN_HISTORY_LIMIT).map((r) => ({ id: r.id, createdAt: r.createdAt, status: r.status }));
-    const runHistory = buildRunHistory(sparklineRuns, snapshotsByRun);
+    const runHistory = buildRunHistory(sparklineRuns, trackedSnapshotsByRun);
     scores.mention.trend = runHistory.map((p) => p.mentionRate);
     scores.visibility.trend = runHistory.map((p) => p.citationRate);
     const suggestedQueries = buildSuggestedQueriesFromGsc(
@@ -12996,7 +13108,12 @@ async function compositeRoutes(app) {
       providers,
       transitions,
       scores,
-      movementSummary,
+      // Keep the legacy citation-only field for API compatibility. New
+      // consumers read the explicitly named siblings below.
+      movementSummary: citationMovement,
+      citationMovement,
+      mentionMovement,
+      movementComparison,
       competitors: overviewCompetitors,
       providerScores,
       attentionItems,
@@ -13106,12 +13223,13 @@ function summarizeRun(run) {
     createdAt: run.createdAt
   };
 }
-function loadSnapshotsByRunIds(app, runIds) {
+function loadSnapshotsByRunIds(app, runIds, queryIdByText) {
   const result = /* @__PURE__ */ new Map();
   if (runIds.length === 0) return result;
-  const rows = filterTrackedSnapshots(app.db.select({
+  const rows = app.db.select({
     runId: querySnapshots.runId,
     queryId: querySnapshots.queryId,
+    queryText: querySnapshots.queryText,
     provider: querySnapshots.provider,
     model: querySnapshots.model,
     citationState: querySnapshots.citationState,
@@ -13119,11 +13237,30 @@ function loadSnapshotsByRunIds(app, runIds) {
     answerText: querySnapshots.answerText,
     competitorOverlap: querySnapshots.competitorOverlap,
     citedDomains: querySnapshots.citedDomains
-  }).from(querySnapshots).where(inArray9(querySnapshots.runId, [...runIds])).all());
+  }).from(querySnapshots).where(inArray9(querySnapshots.runId, [...runIds])).all();
   for (const row of rows) {
+    const queryText = row.queryText?.trim() || null;
+    let queryId;
+    let archived = false;
+    if (row.queryId) {
+      queryId = row.queryId;
+    } else if (queryText) {
+      const tracked = queryIdByText.get(normalizeQueryText(queryText));
+      if (tracked) {
+        queryId = tracked;
+      } else {
+        queryId = `archived:${normalizeQueryText(queryText)}`;
+        archived = true;
+      }
+    } else {
+      queryId = null;
+    }
+    if (!queryId) continue;
     const list = result.get(row.runId) ?? [];
     list.push({
-      queryId: row.queryId,
+      queryId,
+      queryText,
+      archived,
       provider: row.provider,
       model: row.model,
       citationState: row.citationState,
@@ -13577,6 +13714,7 @@ var SCHEMA_TABLE = {
   DomainClassificationsResponseDto: domainClassificationsResponseDtoSchema,
   RecommendationBriefDto: recommendationBriefDtoSchema,
   RecommendationExplanationDto: recommendationExplanationDtoSchema,
+  DiscoveryHarvestDto: discoveryHarvestDtoSchema,
   DiscoveryPromotePreview: discoveryPromotePreviewSchema,
   DiscoveryPromoteResult: discoveryPromoteResultSchema,
   DiscoverySessionDetailDto: discoverySessionDetailDtoSchema,
@@ -13612,6 +13750,7 @@ var SCHEMA_TABLE = {
   LocationContext: locationContextSchema,
   NotificationDto: notificationDtoSchema,
   ProjectDto: projectDtoSchema,
+  ProjectOverviewDto: projectOverviewDtoSchema,
   ProjectReportDto: projectReportDtoSchema,
   QueryDto: queryDtoSchema,
   RunDetailDto: runDetailDtoSchema,
@@ -16845,12 +16984,11 @@ var routeCatalog = [
     method: "get",
     path: "/api/v1/projects/{name}/overview",
     summary: "Get a composite overview of project health",
-    description: 'Bundles project info, latest run, top undismissed insights, the latest health snapshot, query cited rate, per-provider breakdown, and transitions vs. the previous run. Designed for the "how is project X doing?" question so agents can answer in one call.',
+    description: 'Bundles project info, latest run, top undismissed insights, health, independent mention and citation coverage, query-basket comparability, and separate mention/citation movement over the shared query cohort. Designed for the "how is project X doing?" question so agents can answer in one call.',
     tags: ["intelligence"],
     parameters: [nameParameter],
     responses: {
-      // TODO: Add `ProjectOverviewDto` Zod schema in contracts.
-      200: rawJsonResponse("Overview returned.", looseObjectSchema),
+      200: jsonResponse("Overview returned.", "ProjectOverviewDto"),
       404: errorResponse("Project not found.")
     }
   },
@@ -17432,6 +17570,23 @@ var routeCatalog = [
       404: errorResponse("Project or session not found.")
     }
   },
+  {
+    method: "get",
+    path: "/api/v1/projects/{name}/discover/sessions/{id}/harvest",
+    summary: "Harvest issued search queries (grounding fan-out) from a session",
+    description: "Reads the search queries the answer engine actually issued to answer each probe (Gemini's `groundingMetadata.webSearchQueries` fan-out) back out of the session's stored probe payloads, then runs a mandatory quality gate and returns the survivors as candidate seeds, ranked by how many distinct probes issued each one. The gate drops navigational/phone lookups, over-specific outliers, off-subject acronym collisions, exact already-tracked matches, and \u2014 via an embedding cosine pass over the project's tracked queries \u2014 semantic duplicates (paraphrases/synonyms an exact match can't see). `semanticNoveltyApplied` reports whether that embedding pass ran (it falls back to exact-match when embeddings are unavailable). These are a THIRD signal \u2014 *issued retrieval queries* \u2014 distinct from `mention` (answer text) and `cited` (source list); they carry no demand of their own. Read-only and derived: nothing is probed, tracked, or promoted. `minProbeHits` raises the recurrence floor; `anchor=false` disables the subject anchor for new-subject discovery on a well-scoped project. `stats` carries the raw count and a per-reason rejection tally. Issue #713.",
+    tags: ["discovery"],
+    parameters: [
+      nameParameter,
+      { name: "id", in: "path", required: true, description: "Discovery session ID.", schema: stringSchema },
+      { name: "minProbeHits", in: "query", required: false, description: "Minimum number of distinct probes a candidate must appear in to be admitted (recurrence floor). Default 1.", schema: stringSchema },
+      { name: "anchor", in: "query", required: false, description: 'Set to "false" to disable the subject-anchor filter. Default applies it (when the subject corpus is rich enough).', schema: stringSchema }
+    ],
+    responses: {
+      200: jsonResponse("Harvested candidate seeds + gate stats returned.", "DiscoveryHarvestDto"),
+      404: errorResponse("Project or session not found.")
+    }
+  },
   {
     method: "get",
     path: "/api/v1/projects/{name}/discover/sessions/{id}/promote",
@@ -32896,6 +33051,72 @@ async function discoveryRoutes(app, opts) {
       return reply.send(detail);
     }
   );
+  app.get(
+    "/projects/:name/discover/sessions/:id/harvest",
+    async (request, reply) => {
+      const project = resolveProject(app.db, request.params.name);
+      const session = app.db.select().from(discoverySessions).where(eq34(discoverySessions.id, request.params.id)).get();
+      if (!session || session.projectId !== project.id) {
+        throw notFound("Discovery session", request.params.id);
+      }
+      const parsedFloor = parseInt(request.query.minProbeHits ?? "", 10);
+      const minProbeHits = Number.isNaN(parsedFloor) || parsedFloor < 1 ? 1 : parsedFloor;
+      const applyAnchor = request.query.anchor !== "false";
+      const provider = session.seedProvider ?? "gemini";
+      const probeRows = app.db.select().from(discoveryProbes).where(eq34(discoveryProbes.sessionId, session.id)).all();
+      const extract = opts.harvestSearchQueries;
+      const probesWithQueries = probeRows.map((row) => {
+        if (!extract || !row.rawResponse) return { searchQueries: [] };
+        try {
+          const raw = JSON.parse(row.rawResponse);
+          return { searchQueries: extract({ provider, rawResponse: raw }) };
+        } catch {
+          return { searchQueries: [] };
+        }
+      });
+      const trackedQueries = app.db.select({ query: queries.query }).from(queries).where(eq34(queries.projectId, project.id)).all().map((r) => r.query);
+      const anchorTerms = buildHarvestAnchorTerms(
+        [session.icpDescription ?? "", ...trackedQueries],
+        effectiveDomains(project)
+      );
+      const aggregated = aggregateHarvestedQueries(probesWithQueries);
+      let result = gateHarvestedSearchQueries({
+        candidates: aggregated,
+        trackedQueries,
+        anchorTerms,
+        minProbeHits,
+        applyAnchor
+      });
+      let semanticNoveltyApplied = false;
+      if (opts.embedQueries && result.admitted.length > 0 && trackedQueries.length > 0) {
+        try {
+          const candidateTexts = result.admitted.map((c) => c.query);
+          const vectors = await opts.embedQueries([...candidateTexts, ...trackedQueries]);
+          if (vectors.length === candidateTexts.length + trackedQueries.length) {
+            result = applyHarvestSemanticNovelty({
+              result,
+              candidateVectors: vectors.slice(0, candidateTexts.length),
+              trackedVectors: vectors.slice(candidateTexts.length)
+            });
+            semanticNoveltyApplied = true;
+          }
+        } catch {
+        }
+      }
+      const harvest = {
+        sessionId: session.id,
+        projectId: project.id,
+        provider,
+        status: session.status,
+        minProbeHits,
+        anchorApplied: result.anchorApplied,
+        semanticNoveltyApplied,
+        candidates: result.admitted,
+        stats: result.stats
+      };
+      return reply.send(harvest);
+    }
+  );
   app.get(
     "/projects/:name/discover/sessions/:id/promote",
     async (request, reply) => {
@@ -33581,7 +33802,9 @@ async function apiRoutes(app, opts) {
       discoverLatestRelease: opts.discoverLatestRelease
     });
     await api.register(discoveryRoutes, {
-      onDiscoveryRunRequested: opts.onDiscoveryRunRequested
+      onDiscoveryRunRequested: opts.onDiscoveryRunRequested,
+      harvestSearchQueries: opts.harvestSearchQueries,
+      embedQueries: opts.embedQueries
     });
     await api.register(technicalAeoRoutes, {
       onSiteAuditRequested: opts.onSiteAuditRequested

package/dist/{chunk-62YB3ML7.js → chunk-6XMXBAEW.js} RENAMED Viewed

@@ -23,7 +23,7 @@ import {
   trafficConnectVercelRequestSchema,
   trafficConnectWordpressRequestSchema,
   trafficEventKindSchema
-} from "./chunk-I2BJC3DT.js";
+} from "./chunk-MDRDX5R2.js";
 // src/config.ts
 import fs from "fs";
@@ -3454,6 +3454,18 @@ var getApiV1ProjectsByNameDiscoverSessionsById = (options) => {
     ...options
   });
 };
+var getApiV1ProjectsByNameDiscoverSessionsByIdHarvest = (options) => {
+  return (options.client ?? client).get({
+    security: [
+      {
+        scheme: "bearer",
+        type: "http"
+      }
+    ],
+    url: "/api/v1/projects/{name}/discover/sessions/{id}/harvest",
+    ...options
+  });
+};
 var getApiV1ProjectsByNameDiscoverSessionsByIdPromote = (options) => {
   return (options.client ?? client).get({
     security: [
@@ -4750,6 +4762,19 @@ var ApiClient = class {
       })
     );
   }
+  async getDiscoveryHarvest(project, sessionId, opts) {
+    return this.invoke(
+      () => getApiV1ProjectsByNameDiscoverSessionsByIdHarvest({
+        client: this.heyClient,
+        path: { name: project, id: sessionId },
+        query: {
+          minProbeHits: opts?.minProbeHits !== void 0 ? String(opts.minProbeHits) : void 0,
+          // The server treats anchor=false as "disable"; omit otherwise.
+          anchor: opts?.anchor === false ? "false" : void 0
+        }
+      })
+    );
+  }
   async previewDiscoveryPromote(project, sessionId) {
     return this.invoke(
       () => getApiV1ProjectsByNameDiscoverSessionsByIdPromote({
@@ -5494,6 +5519,12 @@ var discoverySessionIdInputSchema = z2.object({
   project: projectNameSchema,
   sessionId: z2.string().min(1).describe("Discovery session ID returned by canonry_discover_run_start.")
 });
+var discoveryHarvestInputSchema = z2.object({
+  project: projectNameSchema,
+  sessionId: z2.string().min(1).describe("Discovery session ID returned by canonry_discover_run_start."),
+  minProbeHits: z2.number().int().positive().optional().describe("Recurrence floor \u2014 a candidate must have appeared in at least this many distinct probes to be admitted. Default 1."),
+  anchor: z2.boolean().optional().describe("Apply the subject-anchor filter that drops off-topic acronym collisions. Default true; pass false for new-subject discovery on a well-scoped project.")
+});
 var discoveryPromoteInputSchema = z2.object({
   project: projectNameSchema,
   sessionId: z2.string().min(1).describe("Discovery session ID returned by canonry_discover_run_start."),
@@ -5566,7 +5597,7 @@ var canonryMcpTools = [
   defineTool({
     name: "canonry_project_overview",
     title: "Get project overview (composite)",
-    description: 'One-call summary for "how is project X doing?" \u2014 bundles project info, latest run, top undismissed insights, latest health snapshot, query cited rate, per-provider breakdown, gained/lost/emerging vs the previous run, the five score gauges (visibility, gap queries, index coverage, competitor pressure, run status), per-(provider, model) scores, configured competitors with pressure labels, an attention queue of critical/high insights, and a recent-runs sparkline. Filterable by location and time window. Prefer this over fanning out to separate tools.',
+    description: 'One-call summary for "how is project X doing?". Returns independent mention and citation coverage, separate query-level movement for each signal, query-basket comparability with added/removed counts, latest run and health, insights, provider/model breakdowns, competitors, attention items, and recent history. Movement excludes queries not shared by both sweeps. Filterable by location and time window. Prefer this over fanning out to separate tools.',
     access: "read",
     tier: "core",
     inputSchema: z2.object({
@@ -6749,6 +6780,20 @@ var canonryMcpTools = [
     openApiOperations: ["GET /api/v1/projects/{name}/discover/sessions/{id}"],
     handler: (client2, input) => client2.getDiscoverySession(input.project, input.sessionId)
   }),
+  defineTool({
+    name: "canonry_discover_harvest",
+    title: "Harvest discovery search queries",
+    description: `Read the search queries the answer engine actually issued (Gemini's grounding fan-out) back out of a session's stored probes, gate them for buyer-intent + novelty, and return the survivors as candidate seeds ranked by how many distinct probes issued each one. These are a THIRD signal \u2014 issued retrieval queries \u2014 distinct from mention (answer text) and cited (source list); they carry no demand of their own. Read-only and derived: nothing is probed, tracked, or promoted. Use it to surface "queries the model searched for that you aren't tracking yet"; the operator/agent then decides what to add via canonry_query_add. minProbeHits raises the recurrence floor; anchor=false disables the subject filter. stats carries the raw count and per-reason rejection tally.`,
+    access: "read",
+    tier: "discovery",
+    inputSchema: discoveryHarvestInputSchema,
+    annotations: readAnnotations(),
+    openApiOperations: ["GET /api/v1/projects/{name}/discover/sessions/{id}/harvest"],
+    handler: (client2, input) => client2.getDiscoveryHarvest(input.project, input.sessionId, {
+      minProbeHits: input.minProbeHits,
+      anchor: input.anchor
+    })
+  }),
   defineTool({
     name: "canonry_discover_promote_preview",
     title: "Preview discovery promotion",