npm - @ema.co/mcp-toolkit - Versions diffs - 2026.3.25-3 → 2026.3.25-4 - Mend

@ema.co/mcp-toolkit 2026.3.25-3 → 2026.3.25-4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

package/dist/auth/login.js +26 -26
package/dist/knowledge/extractors/agent-catalog.js +14 -8
package/dist/knowledge/pipeline/confidence.js +137 -25
package/dist/knowledge/search-client.js +16 -3
package/dist/mcp/domain/workflow-def-schema.js +1 -1
package/dist/mcp/domain/workflow-def-validator.js +2 -2
package/dist/mcp/guidance/classify.js +5 -4
package/dist/mcp/guidance/defaults.js +2 -1
package/dist/mcp/guidance.js +2 -1
package/dist/mcp/handlers/knowledge/confidence-loop.js +80 -11
package/dist/mcp/handlers/persona/create.js +119 -7
package/dist/mcp/handlers/utils.js +5 -1
package/dist/mcp/handlers/workflow/adapter.js +2 -0
package/dist/mcp/handlers/workflow/index.js +23 -0
package/dist/mcp/handlers/workflow/validation.js +29 -5
package/dist/mcp/knowledge-guidance-topics.js +8 -3
package/dist/mcp/resources-dynamic.js +41 -11
package/dist/mcp/tools.js +5 -0
package/package.json +1 -1

package/dist/auth/login.js CHANGED Viewed

@@ -145,14 +145,14 @@ async function loginWithPasteToken(appUrl) {
     await new Promise((resolve) => {
         exec(cmd, () => resolve());
     });
-    console.log(`\nBrowser opened to: ${appUrl}`);
-    console.log("Log in normally, then grab your bearer token:\n");
-    console.log("  1. Open DevTools (F12 or Cmd+Opt+I)");
-    console.log("  2. Go to Network tab");
-    console.log("  3. Filter for 'generate_token_from_code'");
-    console.log("  4. Click the request → Response tab");
-    console.log("  5. Copy the access_token value (starts with eyJ...)\n");
-    const rl = createInterface({ input: process.stdin, output: process.stdout });
+    console.error(`\nBrowser opened to: ${appUrl}`);
+    console.error("Log in normally, then grab your bearer token:\n");
+    console.error("  1. Open DevTools (F12 or Cmd+Opt+I)");
+    console.error("  2. Go to Network tab");
+    console.error("  3. Filter for 'generate_token_from_code'");
+    console.error("  4. Click the request → Response tab");
+    console.error("  5. Copy the access_token value (starts with eyJ...)\n");
+    const rl = createInterface({ input: process.stdin, output: process.stderr });
     const token = await new Promise((resolve) => {
         rl.question("Paste bearer token here: ", (answer) => {
             rl.close();
@@ -205,8 +205,8 @@ export async function loginGoogleOAuth(opts = {}) {
             });
         });
         // Navigate to Cloud Console — triggers Google auth
-        console.log("Opening browser for Google authentication...");
-        console.log("Sign in with your Google account — the token will be captured automatically.\n");
+        console.error("Opening browser for Google authentication...");
+        console.error("Sign in with your Google account — the token will be captured automatically.\n");
         await page.goto("https://console.cloud.google.com/");
         const token = await tokenPromise;
         return token;
@@ -232,15 +232,15 @@ async function isPlaywrightAvailable() {
 // ─────────────────────────────────────────────────────────────────────────────
 async function promptTenantSelection(tenants) {
     const { createInterface } = await import("node:readline");
-    console.log(`\nMultiple tenants available:`);
+    console.error(`\nMultiple tenants available:`);
     for (let i = 0; i < tenants.length; i++) {
         const t = tenants[i];
         const marker = t.is_current ? " ← current" : "";
-        console.log(`  [${i + 1}] ${t.company_name} (${t.domain})${marker}`);
+        console.error(`  [${i + 1}] ${t.company_name} (${t.domain})${marker}`);
     }
     const defaultIdx = tenants.findIndex((t) => t.is_current);
     const defaultNum = defaultIdx >= 0 ? defaultIdx + 1 : 1;
-    const rl = createInterface({ input: process.stdin, output: process.stdout });
+    const rl = createInterface({ input: process.stdin, output: process.stderr });
     const answer = await new Promise((resolve) => {
         rl.question(`\nSelect tenant [${defaultNum}]: `, (ans) => {
             rl.close();
@@ -250,11 +250,11 @@ async function promptTenantSelection(tenants) {
     const choice = answer === "" ? defaultNum : parseInt(answer, 10);
     if (choice >= 1 && choice <= tenants.length) {
         const selected = tenants[choice - 1];
-        console.log(`Selected: ${selected.company_name}`);
+        console.error(`Selected: ${selected.company_name}`);
         return selected.tenant_id;
     }
     // Invalid input — use default
-    console.log(`Invalid choice, using default: ${tenants[defaultNum - 1].company_name}`);
+    console.error(`Invalid choice, using default: ${tenants[defaultNum - 1].company_name}`);
     return tenants[defaultNum - 1].tenant_id;
 }
 // ─────────────────────────────────────────────────────────────────────────────
@@ -281,22 +281,22 @@ export async function login(opts = {}) {
     else if (await isPlaywrightAvailable()) {
         // Primary: Playwright-controlled browser, with paste-token fallback on failure
         try {
-            console.log("Opening browser for login...");
-            console.log("Authenticate normally — the token will be captured automatically.\n");
+            console.error("Opening browser for login...");
+            console.error("Authenticate normally — the token will be captured automatically.\n");
             tokenResponse = await loginWithPlaywright(appUrl, apiUrl, timeoutMs);
         }
         catch (err) {
             const msg = err instanceof Error ? err.message : String(err);
-            console.log(`\nBrowser login failed: ${msg}`);
-            console.log("Falling back to manual token entry...\n");
+            console.error(`\nBrowser login failed: ${msg}`);
+            console.error("Falling back to manual token entry...\n");
             tokenResponse = await loginWithPasteToken(appUrl);
         }
     }
     else {
         // Fallback: system browser + paste URL
-        console.log("(Playwright not installed — using paste-URL fallback)\n");
-        console.log("For automatic login, install playwright-chromium:");
-        console.log("  npm install -g playwright-chromium\n");
+        console.error("(Playwright not installed — using paste-URL fallback)\n");
+        console.error("For automatic login, install playwright-chromium:");
+        console.error("  npm install -g playwright-chromium\n");
         tokenResponse = await loginWithPasteToken(appUrl);
     }
     const token = tokenResponse.access_token;
@@ -318,10 +318,10 @@ export async function login(opts = {}) {
             const match = accessibleTenants.find((t) => t.tenant_id === opts.tenantId);
             if (match) {
                 selectedTenantId = match.tenant_id;
-                console.log(`\nUsing tenant: ${match.company_name} (${match.domain})`);
+                console.error(`\nUsing tenant: ${match.company_name} (${match.domain})`);
             }
             else {
-                console.log(`\nWarning: tenant ${opts.tenantId} not in accessible list. Using default.`);
+                console.error(`\nWarning: tenant ${opts.tenantId} not in accessible list. Using default.`);
             }
         }
         else if (process.stdin.isTTY) {
@@ -330,10 +330,10 @@ export async function login(opts = {}) {
         }
         else {
             // Non-interactive: log what's available
-            console.log(`\nAccessible tenants (${accessibleTenants.length}):`);
+            console.error(`\nAccessible tenants (${accessibleTenants.length}):`);
             for (const t of accessibleTenants) {
                 const marker = t.is_current ? " ← active" : "";
-                console.log(`  ${t.company_name} (${t.domain})${marker}`);
+                console.error(`  ${t.company_name} (${t.domain})${marker}`);
             }
         }
     }

package/dist/knowledge/extractors/agent-catalog.js CHANGED Viewed

@@ -1,15 +1,21 @@
 export async function extractAgentCatalog(config) {
     const { AGENT_CATALOG } = await import("../../sdk/generated/agent-catalog.js");
     const documents = AGENT_CATALOG.map((action) => {
-        const inputs = (action.inputs ?? []).map((i) => `${i.name}: ${i.type}`).join(", ");
-        const outputs = (action.outputs ?? []).map((o) => `${o.name}: ${o.type}`).join(", ");
+        const inputs = (action.inputs ?? []).map((i) => `${i.name} (${i.type})${i.description ? `: ${i.description}` : ""}`).join("; ");
+        const outputs = (action.outputs ?? []).map((o) => `${o.name} (${o.type})${o.description ? `: ${o.description}` : ""}`).join("; ");
+        const criticalRules = (action.criticalRules ?? []).map((r) => `- ${r}`).join("\n");
+        // Build prose content optimized for semantic search.
+        // Repeat the actionName in natural language so DE embeddings match queries like "call_llm".
         const content = [
-            action.displayName,
-            action.description,
-            action.whenToUse,
-            inputs ? `Inputs: ${inputs}` : "",
-            outputs ? `Outputs: ${outputs}` : "",
-            action.aliases?.length ? `Aliases: ${action.aliases.join(", ")}` : "",
+            `# ${action.actionName} — ${action.displayName}`,
+            "",
+            `The ${action.actionName} action (also known as "${action.displayName}") ${action.description?.toLowerCase() ?? ""}`,
+            action.whenToUse ? `\nUse ${action.actionName} when: ${action.whenToUse}` : "",
+            action.whenNotToUse ? `\nDo NOT use ${action.actionName} when: ${action.whenNotToUse}` : "",
+            inputs ? `\n## Inputs for ${action.actionName}\n${inputs}` : "",
+            outputs ? `\n## Outputs from ${action.actionName}\n${outputs}` : "",
+            criticalRules ? `\n## Critical rules for ${action.actionName}\n${criticalRules}` : "",
+            action.aliases?.length ? `\nAlso known as: ${action.aliases.join(", ")}` : "",
         ].filter(Boolean).join("\n");
         return {
             id: `entity:${action.actionName}`,

package/dist/knowledge/pipeline/confidence.js CHANGED Viewed

@@ -53,7 +53,7 @@ export function computeConfidenceScore(provenance, feedbackDelta, boost) {
     const floor = Math.max(0, base - MAX_NEGATIVE_DRIFT);
     return Math.max(floor, Math.min(1.0, adjusted));
 }
-/** Per-event feedback deltas (used by runtime confidence-loop) */
+/** Per-event feedback deltas (used by runtime confidence-loop for backward compat) */
 export const FEEDBACK_DELTA_NEGATIVE = -0.08;
 export const FEEDBACK_DELTA_POSITIVE = 0.04;
 /** Maximum boost above provenance base from positive feedback */
@@ -62,6 +62,12 @@ export const MAX_POSITIVE_BOOST = 0.15;
 export const MAX_NEGATIVE_DRIFT = 0.30;
 /** Minimum score delta to trigger a DE update (avoids churn) */
 export const MIN_SCORE_DELTA = 0.05;
+/** Minimum unique clients required before score changes take effect */
+export const MIN_CORROBORATION_CLIENTS = 2;
+/** Evidence-based feedback gets stronger delta multiplier */
+export const EVIDENCE_MULTIPLIER = 1.5; // deploy failure = hard evidence
+/** Soft feedback gets weaker delta multiplier */
+export const SOFT_MULTIPLIER = 0.5; // "this seems wrong" without evidence
 /** Score thresholds for label assignment — single source of truth */
 export const LABEL_THRESHOLDS = {
     verified: 0.80, // >= 0.80
@@ -79,19 +85,134 @@ export function scoreToLabel(score) {
         return "inferred";
     return "low-confidence";
 }
+/**
+ * Compute confidence adjustment based on the ratio of negative to total feedback.
+ *
+ * Key principle: a doc with 500 positives and 5 negatives (1% negative) should NOT
+ * be downgraded — the 5 are likely confused agents, not a real problem.
+ *
+ * The ratio determines the direction. The total count determines the strength.
+ * Both matter: low ratio + high count = strong confidence. High ratio + low count = weak signal.
+ *
+ * @param negativeCount - Total negative feedback events
+ * @param positiveCount - Total positive feedback events
+ * @param uniqueClients - Number of distinct clients (for corroboration)
+ * @returns Confidence delta to apply (negative = downgrade, positive = upgrade)
+ */
+export function computeFeedbackDelta(negativeCount, positiveCount, uniqueClients = 1) {
+    const total = negativeCount + positiveCount;
+    if (total === 0)
+        return 0;
+    const negativeRatio = negativeCount / total;
+    // Confidence bands based on negative ratio
+    // High negative ratio = downgrade, low ratio = upgrade, middle = neutral
+    let baseDelta;
+    if (negativeRatio >= 0.80) {
+        // Overwhelmingly negative — strong downgrade
+        baseDelta = -0.15;
+    }
+    else if (negativeRatio >= 0.60) {
+        // Mostly negative — moderate downgrade
+        baseDelta = -0.10;
+    }
+    else if (negativeRatio >= 0.40) {
+        // Mixed signals — slight downgrade (benefit of doubt to negative)
+        baseDelta = -0.05;
+    }
+    else if (negativeRatio >= 0.20) {
+        // Mostly positive with some complaints — neutral/slight upgrade
+        baseDelta = 0.02;
+    }
+    else {
+        // Overwhelmingly positive — upgrade
+        baseDelta = 0.05;
+    }
+    // Volume amplifier — more total feedback = more confidence in the signal
+    // But with diminishing returns (log scale)
+    const volumeMultiplier = Math.min(2.0, 1.0 + Math.log2(Math.max(1, total)) * 0.15);
+    // Corroboration amplifier — multiple independent clients agreeing is stronger
+    const corroborationMultiplier = Math.min(1.5, 1.0 + Math.max(0, uniqueClients - 1) * 0.1);
+    return baseDelta * volumeMultiplier * corroborationMultiplier;
+}
+/**
+ * Compute effective floor — allows breakthrough under sustained, high-ratio negative feedback.
+ *
+ * The base floor (provenance - MAX_NEGATIVE_DRIFT) protects against noise.
+ * The floor only gives way when: high negative ratio AND sufficient volume.
+ * This prevents a single bad feedback from breaking the floor.
+ */
+export function effectiveFloor(provenanceBase, negativeRatio, totalCount) {
+    const baseFloor = Math.max(0, provenanceBase - MAX_NEGATIVE_DRIFT);
+    // Floor only gives way when: high negative ratio AND sufficient volume
+    if (negativeRatio < 0.70 || totalCount < 5)
+        return baseFloor;
+    // Beyond threshold: floor lowers proportionally to how negative the ratio is
+    const floorReduction = (negativeRatio - 0.70) * totalCount * 0.01;
+    return Math.max(0.10, baseFloor - floorReduction);
+}
+// ── Legacy graduated functions (kept for backward compat imports) ────────────
+/** @deprecated Use computeFeedbackDelta instead */
+export function graduatedNegativeDelta(negativeCount) {
+    if (negativeCount >= 8)
+        return -0.14;
+    if (negativeCount >= 5)
+        return -0.12;
+    if (negativeCount >= 3)
+        return -0.08;
+    if (negativeCount >= 2)
+        return -0.06;
+    return -0.04;
+}
+/** @deprecated Use computeFeedbackDelta instead */
+export function graduatedPositiveDelta(positiveCount) {
+    if (positiveCount >= 5)
+        return 0.06;
+    if (positiveCount >= 3)
+        return 0.04;
+    return 0.03;
+}
 /**
  * Compute a per-event feedback delta and apply to current score.
  * Used by the runtime confidence loop when a single feedback event arrives.
  *
- * @returns { newScore, label } — clamped to [0, provenanceBase + MAX_POSITIVE_BOOST]
+ * When feedbackHistory is provided, uses graduated deltas that accelerate
+ * with corroboration. Without feedbackHistory, uses flat deltas for
+ * backward compatibility.
+ *
+ * @returns { newScore, label } — clamped to [floor, provenanceBase + MAX_POSITIVE_BOOST]
  */
-export function applyFeedbackDelta(currentScore, provenance, isNegative) {
-    const delta = isNegative ? FEEDBACK_DELTA_NEGATIVE : FEEDBACK_DELTA_POSITIVE;
+export function applyFeedbackDelta(currentScore, provenance, isNegative, feedbackHistory) {
     const provenanceBase = PROVENANCE_BASE_SCORES[provenance] ?? PROVENANCE_BASE_SCORES["inferred"];
     const maxScore = provenanceBase + MAX_POSITIVE_BOOST;
-    const minScore = Math.max(0, provenanceBase - MAX_NEGATIVE_DRIFT);
-    const newScore = Math.max(minScore, Math.min(maxScore, currentScore + delta));
-    return { newScore, label: scoreToLabel(newScore) };
+    let delta;
+    let minScore;
+    if (feedbackHistory) {
+        // Ratio-based model — direction from ratio, strength from volume
+        const { negativeCount, positiveCount, uniqueClients } = feedbackHistory;
+        // Add the current event to history for calculation
+        const adjNeg = isNegative ? negativeCount + 1 : negativeCount;
+        const adjPos = isNegative ? positiveCount : positiveCount + 1;
+        const total = adjNeg + adjPos;
+        const negRatio = total > 0 ? adjNeg / total : 0;
+        delta = computeFeedbackDelta(adjNeg, adjPos, uniqueClients);
+        // Apply delta relative to provenance base, not current score
+        const targetScore = provenanceBase + delta;
+        // Move current score toward target (don't jump, converge)
+        const moveRate = 0.3; // converge 30% toward target per event
+        const newScore = currentScore + (targetScore - currentScore) * moveRate;
+        minScore = effectiveFloor(provenanceBase, negRatio, total);
+        return {
+            newScore: Math.max(minScore, Math.min(maxScore, newScore)),
+            label: scoreToLabel(Math.max(minScore, Math.min(maxScore, newScore))),
+        };
+    }
+    else {
+        // Legacy flat model for backward compatibility
+        delta = isNegative ? FEEDBACK_DELTA_NEGATIVE : FEEDBACK_DELTA_POSITIVE;
+        minScore = Math.max(0, provenanceBase - MAX_NEGATIVE_DRIFT);
+        const newScore = Math.max(minScore, Math.min(maxScore, currentScore + delta));
+        return { newScore, label: scoreToLabel(newScore) };
+    }
 }
 /**
  * Feedback signal classification — single source of truth.
@@ -281,25 +402,16 @@ function buildReport(source, totalEntries, correlated, signalMap) {
     const signals = [];
     const lowConfidence = [];
     for (const [docId, signal] of signalMap) {
-        const netNegative = signal.negative - signal.positive;
-        let delta;
-        // Stepped delta tiers based on aggregate feedback count
-        if (netNegative >= 5) {
-            delta = -0.40;
-        }
-        else if (netNegative >= 3) {
-            delta = -0.25;
-        }
-        else if (netNegative >= 1) {
-            delta = -0.10;
-        }
-        else {
-            delta = 0;
-        }
-        // Estimate label from score using "curated" as a conservative baseline.
+        // Ratio-based delta — direction from ratio, strength from volume
+        const total = signal.negative + signal.positive;
+        const negRatio = total > 0 ? signal.negative / total : 0;
+        const delta = computeFeedbackDelta(signal.negative, signal.positive);
+        // Estimate label using "curated" baseline + adaptive floor.
         // This is advisory — applyConfidenceSignals() recomputes with actual provenance.
-        // For low-provenance docs (raw-document, inferred), the real label may differ.
-        const estimatedScore = computeConfidenceScore("curated", delta);
+        const provenanceBase = PROVENANCE_BASE_SCORES["curated"];
+        const maxScore = provenanceBase + MAX_POSITIVE_BOOST;
+        const minScore = effectiveFloor(provenanceBase, negRatio, total);
+        const estimatedScore = Math.max(minScore, Math.min(maxScore, provenanceBase + delta));
         const label = scoreToLabel(estimatedScore);
         if (label === "low-confidence") {
             lowConfidence.push(docId);

package/dist/knowledge/search-client.js CHANGED Viewed

@@ -478,9 +478,22 @@ async function searchDirect(query, options) {
     // Dynamic domain boost — if query signals a specific platform, boost its domain
     // and demote the other. DE serves both platforms; this keeps results focused.
     const queryBoost = buildQueryBoostSpec(query, filters);
-    if (queryBoost) {
-        body.boostSpec = queryBoost;
-    }
+    // Confidence boost — always applied. Verified docs rank higher, low-confidence lower.
+    // This makes the feedback loop visible at search time: downgraded docs get demoted
+    // regardless of relevance. DE boost values are additive to relevance score.
+    // Values calibrated against signal viewer: semantic relevance spreads 0.07-0.99,
+    // so boosts must be large enough to move docs across that range.
+    const confidenceBoosts = [
+        { condition: 'confidence: ANY("verified")', boost: 0.5 },
+        { condition: 'confidence: ANY("inferred")', boost: -0.2 },
+        { condition: 'confidence: ANY("low-confidence")', boost: -0.8 },
+    ];
+    const querySpecs = (queryBoost?.conditionBoostSpecs ?? []);
+    const allBoosts = [
+        ...querySpecs,
+        ...confidenceBoosts,
+    ];
+    body.boostSpec = { conditionBoostSpecs: allBoosts };
     // Always request snippets — works with chunked datastores.
     // (Extractive answers do NOT work with chunking, only snippets.)
     // For answer mode, also request summary with citations.

package/dist/mcp/domain/workflow-def-schema.js CHANGED Viewed

@@ -35,7 +35,7 @@ export const WORKFLOW_DEF_SCHEMA = {
                         namespaces: {
                             type: "array",
                             items: { type: "string" },
-                            description: "Namespace path (e.g., ['ema', 'personas', '<id>'])",
+                            description: "Namespace path — MUST be copied exactly from workflow(mode='get') response. Do NOT construct manually.",
                         },
                         name: {
                             type: "string",

package/dist/mcp/domain/workflow-def-validator.js CHANGED Viewed

@@ -149,8 +149,8 @@ function validateEnumTypes(wf, issues) {
             }
             enumNames.add(name.name);
         }
-        // Validate options array
-        const options = et.options;
+        // Validate options/values array — proto uses "options", compiled proto uses "values"
+        const options = (et.options ?? et.values);
         if (!Array.isArray(options) || options.length === 0) {
             issues.push({
                 path: `${prefix}.options`,

package/dist/mcp/guidance/classify.js CHANGED Viewed

@@ -40,16 +40,17 @@ export function classifyResult(result, unfilteredCount) {
             return "error_500";
         return "error";
     }
-    // Success shapes
+    // Success shapes — order matters: check deploy before created,
+    // because deploy results also carry persona_id but aren't "created".
+    if (result.deployed === true || result.workflow_deployed === true || (result.mode === "deploy" && status === "deployed")) {
+        return "deployed";
+    }
     if (result.success === true || result.persona_id) {
         // Created entity
         if (result.persona_id && !result.workflow_def) {
             return "created";
         }
     }
-    if (result.deployed === true || (result.mode === "deploy" && !error)) {
-        return "deployed";
-    }
     // List shapes — check count
     const count = typeof result.count === "number" ? result.count : undefined;
     if (count !== undefined) {

package/dist/mcp/guidance/defaults.js CHANGED Viewed

@@ -30,7 +30,8 @@ export function getDefaultGuidance(shape, ctx) {
             };
         case "deployed":
             return {
-                _next_step: "Verify: workflow(mode='get', persona_id='{persona_id}') — confirm workflow is active.",
+                _next_step: "Test your deployed workflow: conversation(method='create', persona_id='{persona_id}') for chat, or upload documents via persona(id='{persona_id}', data={method:'upload', path:'/path/to/doc.pdf'}) for dashboard.",
+                _tip: "Deployed successfully. The workflow is now active.",
             };
         case "deploy_failed":
             return {

package/dist/mcp/guidance.js CHANGED Viewed

@@ -119,6 +119,7 @@ function generateDecisionFlow(tools) {
 2. \`knowledge("workflow patterns for <your use case>")\` → learn the correct workflow pattern
 3. \`${createPersona}\` → creates persona
 4. \`${getWorkflow}\` → get starter workflow + generation schema (FULL input/output specs from API) + fingerprint
+   Use \`compact=true\` for a smaller response (workflowName + fingerprint + workflow_def only, no schema).
 5. Build a complete workflow_def using the generation schema — it shows ALL required inputs per action
 6. Upload data sources if needed — \`persona(id="<new_id>", data={method:"upload", path:"/path/to/doc.pdf"})\`
 7. \`workflow(mode="validate", persona_id="...", workflow_def={...})\` → catch errors BEFORE deploying
@@ -131,7 +132,7 @@ function generateDecisionFlow(tools) {
         const get = opExample("workflow", "Get");
         const deploy = opExample("workflow", "Deploy");
         sections.push(`**Modifying an existing AI Employee's workflow?**
-1. \`${get}\` → get current workflow_def + schema + fingerprint
+1. \`${get}\` → get current workflow_def + schema + fingerprint (use \`compact=true\` for smaller response)
 2. LLM modifies the workflow_def JSON (use the returned workflow_def as format reference)
 3. \`workflow(mode="validate", persona_id="...", workflow_def={...})\` → catch errors before deploying
 4. \`${deploy}\``);

package/dist/mcp/handlers/knowledge/confidence-loop.js CHANGED Viewed

@@ -16,7 +16,7 @@
  */
 import { getDocument, upsertDocument } from "../../../knowledge/search-client.js";
 import { sanitizeId } from "../../../knowledge/pipeline/document.js";
-import { CATEGORY_SIGNAL, FeedbackSignal, MIN_SCORE_DELTA, applyFeedbackDelta, } from "../../../knowledge/pipeline/confidence.js";
+import { PROVENANCE_BASE_SCORES, CATEGORY_SIGNAL, FeedbackSignal, MIN_SCORE_DELTA, MIN_CORROBORATION_CLIENTS, EVIDENCE_MULTIPLIER, MAX_POSITIVE_BOOST, applyFeedbackDelta, effectiveFloor, scoreToLabel, } from "../../../knowledge/pipeline/confidence.js";
 import { getOrCreateClientId } from "../feedback/client-id.js";
 // ─────────────────────────────────────────────────────────────────────────────
 // Guard constants
@@ -38,18 +38,36 @@ export const SESSION_UPDATE_CAP = 50;
 const cooldownMap = new Map();
 /** Session-wide update counter */
 let sessionUpdateCount = 0;
+/** Per-document feedback history for graduated scoring */
+const feedbackHistoryMap = new Map();
+/** Classify feedback strength based on context */
+export function classifyEvidence(category, context) {
+    // Deploy failures are hard evidence
+    if (context?.includes("deploy_failure"))
+        return "hard";
+    if (category === "correction" && context?.includes("deploy"))
+        return "hard";
+    // Explicit corrections with knowledge_ref are medium-hard
+    if (category === "correction")
+        return "hard";
+    // Everything else is soft signal
+    return "soft";
+}
 /** Reset all guard state (for test isolation) */
 export function _resetGuardState() {
     cooldownMap.clear();
     sessionUpdateCount = 0;
+    feedbackHistoryMap.clear();
 }
 /**
  * Process a feedback entry and update DE document confidence if applicable.
  * Returns the update details if a document was modified, undefined otherwise.
  *
  * Best-effort: never throws. Failures are logged but don't block feedback submission.
+ *
+ * @param context - Optional context string for evidence classification (e.g., "deploy_failure")
  */
-export async function processConfidenceFeedback(category, knowledgeRef, qualityData) {
+export async function processConfidenceFeedback(category, knowledgeRef, qualityData, context) {
     // Classify using the semantic signal map (single source of truth)
     const signal = CATEGORY_SIGNAL[category];
     if (!signal || signal === FeedbackSignal.NEUTRAL)
@@ -75,9 +93,10 @@ export async function processConfidenceFeedback(category, knowledgeRef, qualityD
     }
     // ── Guard: per-document cooldown ────────────────────────────────────────
     const docId = sanitizeId(knowledgeRef);
+    let clientId = "unknown";
     if (getCooldownWindow() > 0) {
         try {
-            const clientId = await getOrCreateClientId();
+            clientId = await getOrCreateClientId();
             const cooldownKey = `${clientId}:${docId}`;
             const lastUpdate = cooldownMap.get(cooldownKey);
             const now = Date.now();
@@ -89,6 +108,41 @@ export async function processConfidenceFeedback(category, knowledgeRef, qualityD
             // Best-effort — if client ID fails, skip cooldown check
         }
     }
+    else {
+        try {
+            clientId = await getOrCreateClientId();
+        }
+        catch {
+            // Best-effort
+        }
+    }
+    // ── Track feedback history ──────────────────────────────────────────────
+    let accumulator = feedbackHistoryMap.get(docId);
+    if (!accumulator) {
+        accumulator = { negativeCount: 0, positiveCount: 0, uniqueClients: new Set(), lastUpdated: Date.now() };
+        feedbackHistoryMap.set(docId, accumulator);
+    }
+    // Snapshot BEFORE incrementing — applyFeedbackDelta adds the current event internally
+    const feedbackHistorySnapshot = {
+        negativeCount: accumulator.negativeCount,
+        positiveCount: accumulator.positiveCount,
+        uniqueClients: accumulator.uniqueClients.size + (accumulator.uniqueClients.has(clientId) ? 0 : 1),
+    };
+    if (isNegative)
+        accumulator.negativeCount++;
+    if (isPositive)
+        accumulator.positiveCount++;
+    accumulator.uniqueClients.add(clientId);
+    accumulator.lastUpdated = Date.now();
+    // ── Evidence classification ─────────────────────────────────────────────
+    const evidence = classifyEvidence(category, context);
+    const isHardEvidence = evidence === "hard";
+    // ── Corroboration check ─────────────────────────────────────────────────
+    // Require multiple independent clients unless hard evidence (deploy failure)
+    if (!isHardEvidence && accumulator.uniqueClients.size < MIN_CORROBORATION_CLIENTS) {
+        // Accumulate count but defer score change — single source could be noise
+        return undefined;
+    }
     try {
         // Look up the document — sanitize ID to match DE storage format (colons → underscores)
         const docResult = await getDocument(docId);
@@ -97,11 +151,27 @@ export async function processConfidenceFeedback(category, knowledgeRef, qualityD
         const structData = (docResult.document.structData ?? {});
         const currentScore = structData.confidence_score ?? 0.5;
         const provenance = structData.provenance ?? "inferred";
-        // Compute new score using shared model (same constants + label function as batch path)
-        const { newScore, label: newConfidence } = applyFeedbackDelta(currentScore, provenance, isNegative);
+        // Compute new score using ratio-based model
+        // feedbackHistorySnapshot has counts BEFORE this event; applyFeedbackDelta adds the current event
+        const { newScore: rawNewScore } = applyFeedbackDelta(currentScore, provenance, isNegative, feedbackHistorySnapshot);
+        // Apply evidence multiplier — hard evidence moves score faster
+        let newScore = rawNewScore;
+        if (isHardEvidence && rawNewScore !== currentScore) {
+            const rawDelta = rawNewScore - currentScore;
+            const amplifiedDelta = rawDelta * EVIDENCE_MULTIPLIER;
+            // Re-clamp after amplification
+            const provenanceBase = PROVENANCE_BASE_SCORES[provenance] ?? PROVENANCE_BASE_SCORES["inferred"];
+            const total = accumulator.negativeCount + accumulator.positiveCount;
+            const negRatio = total > 0 ? accumulator.negativeCount / total : 0;
+            const minScore = effectiveFloor(provenanceBase, negRatio, total);
+            const maxScore = provenanceBase + MAX_POSITIVE_BOOST;
+            newScore = Math.max(minScore, Math.min(maxScore, currentScore + amplifiedDelta));
+        }
         // Skip if change is too small
         if (Math.abs(newScore - currentScore) < MIN_SCORE_DELTA)
             return undefined;
+        // Compute final label
+        const finalLabel = scoreToLabel(newScore);
         // Update the document in DE with new confidence
         // Note: DE only supports top-level field masks, not sub-field paths within structData
         const updatedDoc = {
@@ -109,11 +179,11 @@ export async function processConfidenceFeedback(category, knowledgeRef, qualityD
             structData: {
                 ...structData,
                 confidence_score: newScore,
-                confidence: newConfidence,
+                confidence: finalLabel,
                 confidence_updated_at: new Date().toISOString(),
                 confidence_reason: isNegative
-                    ? `Downgraded: ${category} feedback`
-                    : `Upgraded: ${category} feedback`,
+                    ? `Downgraded: ${category} feedback (${evidence} evidence, ${accumulator.uniqueClients.size} clients)`
+                    : `Upgraded: ${category} feedback (${accumulator.uniqueClients.size} clients)`,
             },
         };
         const result = await upsertDocument(updatedDoc, {
@@ -127,7 +197,6 @@ export async function processConfidenceFeedback(category, knowledgeRef, qualityD
         sessionUpdateCount++;
         if (getCooldownWindow() > 0) {
             try {
-                const clientId = await getOrCreateClientId();
                 cooldownMap.set(`${clientId}:${docId}`, Date.now());
             }
             catch {
@@ -138,8 +207,8 @@ export async function processConfidenceFeedback(category, knowledgeRef, qualityD
             documentId: knowledgeRef,
             previousScore: Math.round(currentScore * 1000) / 1000,
             newScore: Math.round(newScore * 1000) / 1000,
-            newConfidence,
-            reason: `${isNegative ? "Downgraded" : "Upgraded"} by ${category} feedback`,
+            newConfidence: finalLabel,
+            reason: `${isNegative ? "Downgraded" : "Upgraded"} by ${category} feedback (${evidence} evidence, ${accumulator.uniqueClients.size} clients)`,
         };
     }
     catch (err) {

package/dist/mcp/handlers/persona/create.js CHANGED Viewed

@@ -18,7 +18,7 @@
  * )
  * ```
  */
-import { resolvePersona, getTemplates, getPersonaTypeFromTemplate, sanitizePersonaById, } from "../utils.js";
+import { resolvePersona, getTemplates, getPersonaTypeFromTemplate, normalizeTriggerType, sanitizePersonaById, } from "../utils.js";
 import { sanitizeWidgets } from "../../../sdk/proto-config.js";
 import { validateWorkflowOutputs } from "../workflow/validate-outputs.js";
 import { SanitizationSession, detectWithPatterns, } from "../../domain/sanitizer.js";
@@ -62,6 +62,7 @@ export async function handleCreate(args, client, getTemplateId) {
     const includeData = args.include_data ?? args.clone_data;
     // Resolve source - could be template or persona
     let templateId;
+    let templateTriggerType; // normalized trigger_type from template
     let sourcePersonaId;
     let sourcePersona = null;
     let sourcePersonaType;
@@ -123,6 +124,7 @@ export async function handleCreate(args, client, getTemplateId) {
             }
             if (template) {
                 templateId = template.id;
+                templateTriggerType = normalizeTriggerType(template.trigger_type);
                 fromType = "template";
             }
         }
@@ -149,6 +151,14 @@ export async function handleCreate(args, client, getTemplateId) {
             _tip: "Use from='<template_id>' with an ID from the list above, or type='voice|chat|dashboard' for a standard template.",
         };
     }
+    // Resolve trigger_type from template if not already set (deprecated template_id or args.type paths)
+    if (fromType === "template" && templateId && !templateTriggerType) {
+        const templates = await getTemplates(client);
+        const resolvedTemplate = templates.find(t => t.id === templateId);
+        if (resolvedTemplate) {
+            templateTriggerType = normalizeTriggerType(resolvedTemplate.trigger_type);
+        }
+    }
     // For persona cloning, default include_data to true
     const effectiveIncludeData = sourcePersonaId ? (includeData ?? true) : false;
     // API requires EITHER template_id OR source_persona_id, NOT both
@@ -206,6 +216,35 @@ export async function handleCreate(args, client, getTemplateId) {
     // The workflow may not be immediately visible via getPersonaById due to async processing,
     // but it IS copied. Don't emit false "workflow not copied" warnings.
     const workflowClonedByApi = !!sourcePersonaId;
+    // Extract workflowName so agents can deploy without calling workflow(mode="get")
+    // (which returns 176K-301K chars). Best-effort: never fail the create over this.
+    // The persona GET may not include workflow_def immediately after creation (async copy),
+    // so we try but also provide a lightweight fallback hint.
+    let workflowName;
+    let hasVoiceWidgets = false;
+    try {
+        // Small delay to allow workflow copy to propagate
+        await new Promise(r => setTimeout(r, 500));
+        const newPersonaForWf = await client.getPersonaById(newPersonaId);
+        const wfDef = newPersonaForWf?.workflow_def;
+        if (wfDef && typeof wfDef === "object" && "workflowName" in wfDef) {
+            workflowName = wfDef.workflowName;
+        }
+        // Detect voice: explicit type arg or voiceSettings widget presence
+        // Voice personas use chat trigger_type (1) but have voiceSettings widgets
+        if (args.type?.toLowerCase() === "voice") {
+            hasVoiceWidgets = true;
+        }
+        else {
+            const widgets = newPersonaForWf?.proto_config?.widgets;
+            if (Array.isArray(widgets)) {
+                hasVoiceWidgets = widgets.some((w) => w?.name === "voiceSettings");
+            }
+        }
+    }
+    catch {
+        // Best-effort — don't fail create if we can't fetch workflowName
+    }
     // Apply workflow_def if provided (the create API doesn't accept workflow directly)
     const workflowDef = args.workflow_def;
     let workflowApplied = false;
@@ -260,12 +299,15 @@ export async function handleCreate(args, client, getTemplateId) {
                 workflowApplied,
                 workflowDef,
                 workflowError,
+                workflowName,
                 sourcePersonaType,
                 dashboardCloneResult,
                 actionsError: validation.errors.join("; "),
                 createdFromTemplate: fromType === "template",
                 clonedFromPersona: fromType === "persona",
                 workflowOutputWarnings,
+                templateTriggerType,
+                hasVoiceWidgets,
             });
         }
         // Build execution context
@@ -287,11 +329,14 @@ export async function handleCreate(args, client, getTemplateId) {
             workflowDef,
             workflowError,
             workflowOutputWarnings,
+            workflowName,
             sourcePersonaType,
             // Don't include dashboardCloneResult - actions handle data operations
             actionsResult,
             createdFromTemplate: fromType === "template",
             clonedFromPersona: fromType === "persona",
+            templateTriggerType,
+            hasVoiceWidgets,
         });
     }
     // ═══════════════════════════════════════════════════════════════════════════
@@ -313,10 +358,13 @@ export async function handleCreate(args, client, getTemplateId) {
             workflowDef,
             workflowError,
             workflowOutputWarnings,
+            workflowName,
             sourcePersonaType,
             dashboardCloneResult,
             createdFromTemplate: fromType === "template",
             clonedFromPersona: fromType === "persona",
+            templateTriggerType,
+            hasVoiceWidgets,
         });
     }
     return buildCreateResult({
@@ -327,12 +375,74 @@ export async function handleCreate(args, client, getTemplateId) {
         workflowDef,
         workflowError,
         workflowOutputWarnings,
+        workflowName,
         sourcePersonaType,
         dashboardCloneResult,
         createdFromTemplate: fromType === "template",
         clonedFromPersona: fromType === "persona",
+        templateTriggerType,
+        hasVoiceWidgets,
     });
 }
+/**
+ * Return type-specific workflow next-steps so agents get relevant guidance.
+ *
+ * Without this, every persona type gets chat-oriented instructions
+ * ("add intent categorization, search nodes") which confuses agents
+ * working on dashboards, doc-gen, or agent-QA personas.
+ */
+function getTypeSpecificNextSteps(triggerType, personaId, hasVoiceWidgets) {
+    switch (triggerType) {
+        case "chat":
+        case "chatbot": {
+            const steps = [
+                "1. BUILD WORKFLOW: Add intent categorization, search nodes, response handling",
+                `2. If uploading docs: Workflow MUST have search/v2 node or documents will NOT be used`,
+                `3. Get current workflow: workflow(mode='get', persona_id='${personaId}')`,
+                `4. Deploy complete workflow: workflow(mode='deploy', persona_id='${personaId}', workflow_def={...})`,
+            ];
+            if (hasVoiceWidgets) {
+                steps.push("NOTE: Voice personas use chat_trigger (NOT voice_trigger). The voice_trigger is for a different workflow pattern.");
+            }
+            return steps;
+        }
+        case "dashboard":
+            return [
+                "1. BUILD WORKFLOW: Add entity_extraction_with_documents for document processing",
+                "2. Configure extraction_columns for the data you want to extract",
+                `3. Get current workflow: workflow(mode='get', persona_id='${personaId}')`,
+                `4. Deploy workflow: workflow(mode='deploy', persona_id='${personaId}', workflow_def={...})`,
+            ];
+        case "thread":
+            return [
+                "1. BUILD WORKFLOW: Add thread_categorizer, search nodes, response handling",
+                "2. This is a thread-based persona — workflows trigger on support tickets, not chat",
+                `3. Get current workflow: workflow(mode='get', persona_id='${personaId}')`,
+                `4. Deploy workflow: workflow(mode='deploy', persona_id='${personaId}', workflow_def={...})`,
+            ];
+        case "document_generation":
+            return [
+                "1. CHECK EXISTING WORKFLOW: The template may already include a functional document_synthesis workflow",
+                `2. Get current workflow: workflow(mode='get', persona_id='${personaId}') — check if it already has document_synthesis`,
+                "3. If functional: redeploy as-is. If not: add document_synthesis node with workflowInputs",
+                `4. Deploy workflow: workflow(mode='deploy', persona_id='${personaId}', workflow_def={...})`,
+            ];
+        case "agent_qa":
+            return [
+                "1. IMPORTANT: Agent QA workflows require UI configuration in the Ema platform",
+                "2. MCP-based workflow building is NOT yet fully supported for Agent QA",
+                "3. Configure QA parameters (contact reasons, scoring rubric) in the Ema UI",
+                `4. Use workflow(mode='get', persona_id='${personaId}') to inspect the current workflow after UI setup`,
+            ];
+        default:
+            // Fallback: generic guidance for unknown or new trigger types
+            return [
+                "1. BUILD WORKFLOW: Add the appropriate nodes for this persona type",
+                `2. Get current workflow: workflow(mode='get', persona_id='${personaId}')`,
+                `3. Deploy complete workflow: workflow(mode='deploy', persona_id='${personaId}', workflow_def={...})`,
+            ];
+    }
+}
 /**
  * Build the create/clone result object
  */
@@ -341,6 +451,11 @@ function buildCreateResult(opts) {
         success: true,
         persona_id: opts.newPersonaId,
         name: opts.name,
+        // Include workflowName so agents can deploy without calling workflow(mode="get")
+        // which returns 176K-301K chars and is unusable in most contexts.
+        ...(opts.workflowName ? { workflowName: opts.workflowName } : {
+            _workflowName_hint: `workflowName not yet available (async copy in progress). Get it with: persona(id="${opts.newPersonaId}", include_workflow=true) — much smaller than workflow(mode="get").`,
+        }),
     };
     if (opts.sanitization) {
         result.sanitization = opts.sanitization;
@@ -377,14 +492,11 @@ function buildCreateResult(opts) {
     }
     // ── CRITICAL GUIDANCE: Template workflows are minimal starters ──
     // This is where LLMs often go wrong - they create from template and think they're done
+    // Provide type-specific next steps so agents get relevant guidance (not chat-oriented
+    // instructions for dashboard/docgen/agent-qa personas).
     if (opts.createdFromTemplate && !opts.workflowApplied) {
         result._warning = "PERSONA CREATED BUT WORKFLOW IS INCOMPLETE. Template workflows are minimal starters (just trigger→respond).";
-        result._required_next_steps = [
-            "1. BUILD WORKFLOW: Add intent categorization, search nodes, response handling",
-            `2. If uploading docs: Workflow MUST have search/v2 node or documents will NOT be used`,
-            `3. Get current workflow: workflow(mode='get', persona_id='${opts.newPersonaId}')`,
-            `4. Deploy complete workflow: workflow(mode='deploy', persona_id='${opts.newPersonaId}', workflow_def={...})`,
-        ];
+        result._required_next_steps = getTypeSpecificNextSteps(opts.templateTriggerType, opts.newPersonaId, opts.hasVoiceWidgets);
         result._common_mistake = "Creating from template, uploading docs, and declaring 'done' WITHOUT building the workflow. The deploy will now BLOCK this pattern.";
     }
     if (opts.dashboardCloneResult) {

package/dist/mcp/handlers/utils.js CHANGED Viewed

@@ -15,8 +15,12 @@ import { PersonaTriggerTypeEnumLabels } from "../../sdk/generated/api-types.js";
 export function normalizeTriggerType(triggerType) {
     if (triggerType === undefined || triggerType === null)
         return undefined;
-    // If already a string, normalize to lowercase
+    // If already a string, try parsing as number first (API sometimes returns "2" instead of 2)
     if (typeof triggerType === "string") {
+        const asNumber = Number(triggerType);
+        if (!isNaN(asNumber) && PersonaTriggerTypeEnumLabels[asNumber]) {
+            return PersonaTriggerTypeEnumLabels[asNumber];
+        }
         return triggerType.toLowerCase();
     }
     // Convert numeric trigger_type to label using generated mapping from OpenAPI

package/dist/mcp/handlers/workflow/adapter.js CHANGED Viewed

@@ -65,6 +65,8 @@ export async function handleWorkflowAdapter(args, createClient, getDefaultEnvNam
             return handleWorkflow({
                 mode: "get",
                 persona_id: personaId,
+                compact: normalizedArgs.compact,
+                slim: normalizedArgs.slim,
                 env: normalizedArgs.env,
             }, client, () => undefined, cache);
         }

package/dist/mcp/handlers/workflow/index.js CHANGED Viewed

@@ -192,6 +192,29 @@ async function handleWorkflowGet(args, client, cache) {
         name: w.name,
         type: w.type,
     }));
+    // ── Compact mode: skip generation_schema, return only what agents need ──
+    const compact = args.compact === true;
+    if (compact) {
+        // Slim the workflow_def in compact mode (always — agents don't need displaySettings)
+        const compactWorkflowDef = workflowDef ? slimWorkflowDef(workflowDef) : null;
+        return {
+            persona_id: persona.id,
+            persona_name: persona.name,
+            persona_type: persona.type,
+            fingerprint: fingerprintPersona(persona),
+            workflow_def: compactWorkflowDef,
+            available_widgets: availableWidgets,
+            _compact: true,
+            _next_steps: [
+                "You have the current workflow_def and fingerprint.",
+                "Modify the workflow_def as needed.",
+                "Use knowledge('<action_name>') to look up any unfamiliar action's inputs/outputs.",
+                "Deploy with: workflow(mode='deploy', persona_id='...', base_fingerprint='<fingerprint>', workflow_def={...})",
+            ],
+            _tip: "Use compact=false (or omit compact) for the full generation_schema with all action I/O specs, constraints, and widget bindings. Use knowledge('<action_name>') for individual action specs.",
+        };
+    }
+    // ── Full mode (default): includes generation_schema + all guidance ──
     // Get generation schema for LLM — API-first + DE-first for structural invariants
     const schema = await generateSchema(client, cache);
     // Get deprecated actions (API-first, with fallback)

package/dist/mcp/handlers/workflow/validation.js CHANGED Viewed

@@ -863,7 +863,35 @@ export function validateCategorizersFallback(workflowDef) {
                 const catName = (cat.name ?? "").toLowerCase();
                 return catName === "fallback" || catName === "other";
             });
-            // Method 2: Check enumTypes for this categorizer (API format)
+            // Method 2a: Check typeArguments.categories.enumType reference (proto format)
+            // This is the STRUCTURAL link — the categorizer's typeArguments points to
+            // the exact enumType that defines its categories.
+            if (!hasFallback) {
+                const typeArgs = action.typeArguments;
+                const catArgs = typeArgs?.categories;
+                const enumTypeRef = catArgs?.enumType;
+                if (enumTypeRef) {
+                    // Extract the enum name from the reference (can be nested)
+                    let refEnumName = "";
+                    const refName = enumTypeRef.name;
+                    if (typeof refName === "string") {
+                        refEnumName = refName;
+                    }
+                    else if (typeof refName === "object" && refName !== null) {
+                        const nameObj = refName;
+                        if (typeof nameObj.name === "string") {
+                            refEnumName = nameObj.name;
+                        }
+                    }
+                    if (refEnumName) {
+                        const enumCategories = enumTypeCategoryMap.get(refEnumName.toLowerCase());
+                        if (enumCategories && (enumCategories.has("fallback") || enumCategories.has("other"))) {
+                            hasFallback = true;
+                        }
+                    }
+                }
+            }
+            // Method 2b: Heuristic name matching (API format fallback)
             // The enumType name often contains the categorizer node name
             // SECURITY: Only associate enumTypes that match this specific categorizer
             if (!hasFallback) {
@@ -879,10 +907,6 @@ export function validateCategorizersFallback(workflowDef) {
                     }
                 }
             }
-            // NOTE: Method 3 REMOVED - was too permissive
-            // Previous implementation checked if ANY enumType has Fallback, which incorrectly
-            // marked ALL categorizers as having Fallback if ANY one did.
-            // This caused false negatives (missed warnings) for categorizers without Fallback.
             if (!hasFallback) {
                 categorizersWithoutFallback.push(nodeName);
             }

package/dist/mcp/knowledge-guidance-topics.js CHANGED Viewed

@@ -1437,16 +1437,21 @@ result types, categories, and tags. Zero-result responses suggest common entry p
 ## Feedback → Confidence Loop
 When feedback with \`knowledge_ref\` is submitted, the referenced document's confidence_score
-is updated in DE in real-time. Negative feedback decreases score, positive increases it.
-DE native boost controls (boost-verified, demote-inferred) then handle ranking automatically.
+is updated in DE using a ratio-based model. The score reflects the ratio of negative to total
+feedback — a doc with 500 positives and 5 negatives stays healthy (1% negative = noise).
+Hard evidence (deploy failures, corrections) bypasses the corroboration gate; soft evidence
+(confusion, gaps) requires 2+ independent clients before applying. DE search boosts verified
+docs (+0.5) and demotes low-confidence docs (-0.8) at query time.
 Use \`feedback(method="submit", category="gap", knowledge_ref="doc-id", message="...")\`
-to trigger the confidence loop.`,
+to trigger the confidence loop. For deploy-related feedback, include \`context="deploy_failure"\`
+for stronger signal weighting.`,
         status: "verified",
         criticalRules: [
             "_actions is machine-readable — agents can execute actions directly without parsing text",
             "Confidence loop is best-effort — feedback submission never fails due to confidence update errors",
             "Related queries are deterministic — computed from result metadata, no LLM call",
+            "Hard evidence (deploy failures) applies immediately; soft evidence needs 2+ client corroboration",
         ],
     },
 };

package/dist/mcp/resources-dynamic.js CHANGED Viewed

@@ -923,6 +923,22 @@ See also: \`ema://rules/json-output-patterns\` for custom_agent/output_fields (s
         generate: async () => {
             return `# named_inputs Format (API Shape)
+> **WARNING**: The \`named_inputs_<Name>\` suffix pattern is a UI LABEL convention only.
+> The API wire format uses a SINGLE \`named_inputs\` key with all bindings inside one multiBinding.
+> Using separate \`named_inputs_Search_Results\`, \`named_inputs_Conversation\` keys causes HTTP 500.
+>
+> Correct wire format:
+> \`\`\`json
+> "named_inputs": {
+>   "multiBinding": {
+>     "elements": [
+>       { "namedBinding": { "name": "Search_Results", "value": { "actionOutput": { ... } } } },
+>       { "namedBinding": { "name": "Conversation", "value": { "actionOutput": { ... } } } }
+>     ]
+>   }
+> }
+> \`\`\`
 ## Two Levels — Both Required
 \`named_inputs\` has TWO aspects that agents must get right:
@@ -1681,22 +1697,33 @@ A doc can have **high relevance but low confidence** (score=0.95, confidence_sco
 ---
-## Scoring Model
+## Scoring Model (Ratio-Based)
+Confidence is determined by the **ratio** of negative to total feedback, not absolute counts.
+A doc with 500 positives and 5 negatives (1% negative) stays healthy — the 5 are noise.
 \`\`\`
-confidence_score = clamp(floor, max_score,
-  current_score + delta
-)
-where:
-  initial score = provenance_base (0.65–0.90)
-  delta = -0.08 per negative feedback, +0.04 per positive
-  max_score = provenance_base + 0.15 (caps positive inflation)
-  floor = provenance_base - ${MAX_NEGATIVE_DRIFT.toFixed(2)} (prevents catastrophic drops)
+negative_ratio = negative_count / (negative_count + positive_count)
+base_delta = f(ratio)   # -0.15 at >=80% neg, +0.05 at <20% neg
+volume_amp = log2(total) * 0.15   # more feedback = stronger signal (capped 2x)
+corroboration_amp = (unique_clients - 1) * 0.1   # multiple clients = stronger (capped 1.5x)
+delta = base_delta * volume_amp * corroboration_amp
+target = provenance_base + delta
+score converges 30% toward target per event (no jumps)
 \`\`\`
-Deltas accumulate from the current score — the 3rd negative hits harder because it starts from an already-reduced score.
+**Adaptive floor**: Standard floor is provenance_base - ${MAX_NEGATIVE_DRIFT.toFixed(2)}.
+But when ratio >= 70% negative AND total >= 5, floor lowers — allowing truly bad docs
+to reach low-confidence. Absolute minimum is 0.10.
+**Evidence classification**: Deploy failures (\`context="deploy_failure"\`) and corrections
+are "hard evidence" — they bypass the corroboration gate and get 1.5x delta multiplier.
+Confusion and gap reports are "soft evidence" — need 2+ independent clients before applying.
-**Abuse prevention**: Per-document cooldown (max 1 update per doc per client per hour) and per-session cap (max 50 updates per server lifetime) prevent runaway feedback from destroying document confidence. The provenance floor ensures high-trust docs (code-derived, curated) can never drop into low-confidence territory from feedback alone.
+**Abuse prevention**: Per-document cooldown (max 1 update per doc per client per hour),
+per-session cap (max 50 updates), and corroboration requirement for soft evidence prevent
+single-agent spam from affecting scores.
 ---
@@ -2552,6 +2579,9 @@ This is a **deployable** raw workflow_def — copy and adapt.
 }
 \`\`\`
+> **NOTE**: For new deployments, use \`namedResults\` with \`namedResultsEnabled: true\` instead of legacy \`results\`.
+> The legacy \`results\` format may return "Workflow has no outputs" for newly created personas.
 ---
 ## Deployment Flow

package/dist/mcp/tools.js CHANGED Viewed

@@ -578,6 +578,7 @@ Sync a persona between environments (dev, staging, prod). Always preview first.
 ## Get (return data for LLM to work with)
 - \`workflow(mode="get", persona_id="abc")\` - returns workflow_def, schema, patterns, deprecation warnings
+- \`workflow(mode="get", persona_id="abc", compact=true)\` - compact response: workflowName + fingerprint + workflow_def only (no generation_schema). Use for modifications to existing workflows where you already know the actions.
 - LLM analyzes, compares, and generates workflows using this data
 ## Validate (static validation with path enumeration)
@@ -654,6 +655,10 @@ Note: \`workflow_def_path\` also works with mode=validate and mode=optimize.
                         type: "number",
                         description: "Timeout in milliseconds (for mode=validate). Default: 100",
                     },
+                    compact: {
+                        type: "boolean",
+                        description: "For mode=get: return ONLY workflowName, fingerprint, workflow_def, persona_type, available_widgets — no generation_schema, no guidance. Reduces response from ~176K to <20K chars. Use for modifications to existing workflows. Default: false",
+                    },
                     slim: {
                         type: "boolean",
                         description: "For mode=get: return slimmed workflow_def (strips displaySettings, truncates long inline values). Reduces ~60-70% for large workflows. Default: false",

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@ema.co/mcp-toolkit",
-  "version": "2026.3.25-3",
+  "version": "2026.3.25-4",
   "description": "Ema AI Employee toolkit - MCP server, CLI, and SDK for managing AI Employees across environments",
   "type": "module",
   "main": "dist/index.js",