@ema.co/mcp-toolkit 2026.3.25-3 → 2026.3.25-4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -145,14 +145,14 @@ async function loginWithPasteToken(appUrl) {
145
145
  await new Promise((resolve) => {
146
146
  exec(cmd, () => resolve());
147
147
  });
148
- console.log(`\nBrowser opened to: ${appUrl}`);
149
- console.log("Log in normally, then grab your bearer token:\n");
150
- console.log(" 1. Open DevTools (F12 or Cmd+Opt+I)");
151
- console.log(" 2. Go to Network tab");
152
- console.log(" 3. Filter for 'generate_token_from_code'");
153
- console.log(" 4. Click the request → Response tab");
154
- console.log(" 5. Copy the access_token value (starts with eyJ...)\n");
155
- const rl = createInterface({ input: process.stdin, output: process.stdout });
148
+ console.error(`\nBrowser opened to: ${appUrl}`);
149
+ console.error("Log in normally, then grab your bearer token:\n");
150
+ console.error(" 1. Open DevTools (F12 or Cmd+Opt+I)");
151
+ console.error(" 2. Go to Network tab");
152
+ console.error(" 3. Filter for 'generate_token_from_code'");
153
+ console.error(" 4. Click the request → Response tab");
154
+ console.error(" 5. Copy the access_token value (starts with eyJ...)\n");
155
+ const rl = createInterface({ input: process.stdin, output: process.stderr });
156
156
  const token = await new Promise((resolve) => {
157
157
  rl.question("Paste bearer token here: ", (answer) => {
158
158
  rl.close();
@@ -205,8 +205,8 @@ export async function loginGoogleOAuth(opts = {}) {
205
205
  });
206
206
  });
207
207
  // Navigate to Cloud Console — triggers Google auth
208
- console.log("Opening browser for Google authentication...");
209
- console.log("Sign in with your Google account — the token will be captured automatically.\n");
208
+ console.error("Opening browser for Google authentication...");
209
+ console.error("Sign in with your Google account — the token will be captured automatically.\n");
210
210
  await page.goto("https://console.cloud.google.com/");
211
211
  const token = await tokenPromise;
212
212
  return token;
@@ -232,15 +232,15 @@ async function isPlaywrightAvailable() {
232
232
  // ─────────────────────────────────────────────────────────────────────────────
233
233
  async function promptTenantSelection(tenants) {
234
234
  const { createInterface } = await import("node:readline");
235
- console.log(`\nMultiple tenants available:`);
235
+ console.error(`\nMultiple tenants available:`);
236
236
  for (let i = 0; i < tenants.length; i++) {
237
237
  const t = tenants[i];
238
238
  const marker = t.is_current ? " ← current" : "";
239
- console.log(` [${i + 1}] ${t.company_name} (${t.domain})${marker}`);
239
+ console.error(` [${i + 1}] ${t.company_name} (${t.domain})${marker}`);
240
240
  }
241
241
  const defaultIdx = tenants.findIndex((t) => t.is_current);
242
242
  const defaultNum = defaultIdx >= 0 ? defaultIdx + 1 : 1;
243
- const rl = createInterface({ input: process.stdin, output: process.stdout });
243
+ const rl = createInterface({ input: process.stdin, output: process.stderr });
244
244
  const answer = await new Promise((resolve) => {
245
245
  rl.question(`\nSelect tenant [${defaultNum}]: `, (ans) => {
246
246
  rl.close();
@@ -250,11 +250,11 @@ async function promptTenantSelection(tenants) {
250
250
  const choice = answer === "" ? defaultNum : parseInt(answer, 10);
251
251
  if (choice >= 1 && choice <= tenants.length) {
252
252
  const selected = tenants[choice - 1];
253
- console.log(`Selected: ${selected.company_name}`);
253
+ console.error(`Selected: ${selected.company_name}`);
254
254
  return selected.tenant_id;
255
255
  }
256
256
  // Invalid input — use default
257
- console.log(`Invalid choice, using default: ${tenants[defaultNum - 1].company_name}`);
257
+ console.error(`Invalid choice, using default: ${tenants[defaultNum - 1].company_name}`);
258
258
  return tenants[defaultNum - 1].tenant_id;
259
259
  }
260
260
  // ─────────────────────────────────────────────────────────────────────────────
@@ -281,22 +281,22 @@ export async function login(opts = {}) {
281
281
  else if (await isPlaywrightAvailable()) {
282
282
  // Primary: Playwright-controlled browser, with paste-token fallback on failure
283
283
  try {
284
- console.log("Opening browser for login...");
285
- console.log("Authenticate normally — the token will be captured automatically.\n");
284
+ console.error("Opening browser for login...");
285
+ console.error("Authenticate normally — the token will be captured automatically.\n");
286
286
  tokenResponse = await loginWithPlaywright(appUrl, apiUrl, timeoutMs);
287
287
  }
288
288
  catch (err) {
289
289
  const msg = err instanceof Error ? err.message : String(err);
290
- console.log(`\nBrowser login failed: ${msg}`);
291
- console.log("Falling back to manual token entry...\n");
290
+ console.error(`\nBrowser login failed: ${msg}`);
291
+ console.error("Falling back to manual token entry...\n");
292
292
  tokenResponse = await loginWithPasteToken(appUrl);
293
293
  }
294
294
  }
295
295
  else {
296
296
  // Fallback: system browser + paste URL
297
- console.log("(Playwright not installed — using paste-URL fallback)\n");
298
- console.log("For automatic login, install playwright-chromium:");
299
- console.log(" npm install -g playwright-chromium\n");
297
+ console.error("(Playwright not installed — using paste-URL fallback)\n");
298
+ console.error("For automatic login, install playwright-chromium:");
299
+ console.error(" npm install -g playwright-chromium\n");
300
300
  tokenResponse = await loginWithPasteToken(appUrl);
301
301
  }
302
302
  const token = tokenResponse.access_token;
@@ -318,10 +318,10 @@ export async function login(opts = {}) {
318
318
  const match = accessibleTenants.find((t) => t.tenant_id === opts.tenantId);
319
319
  if (match) {
320
320
  selectedTenantId = match.tenant_id;
321
- console.log(`\nUsing tenant: ${match.company_name} (${match.domain})`);
321
+ console.error(`\nUsing tenant: ${match.company_name} (${match.domain})`);
322
322
  }
323
323
  else {
324
- console.log(`\nWarning: tenant ${opts.tenantId} not in accessible list. Using default.`);
324
+ console.error(`\nWarning: tenant ${opts.tenantId} not in accessible list. Using default.`);
325
325
  }
326
326
  }
327
327
  else if (process.stdin.isTTY) {
@@ -330,10 +330,10 @@ export async function login(opts = {}) {
330
330
  }
331
331
  else {
332
332
  // Non-interactive: log what's available
333
- console.log(`\nAccessible tenants (${accessibleTenants.length}):`);
333
+ console.error(`\nAccessible tenants (${accessibleTenants.length}):`);
334
334
  for (const t of accessibleTenants) {
335
335
  const marker = t.is_current ? " ← active" : "";
336
- console.log(` ${t.company_name} (${t.domain})${marker}`);
336
+ console.error(` ${t.company_name} (${t.domain})${marker}`);
337
337
  }
338
338
  }
339
339
  }
@@ -1,15 +1,21 @@
1
1
  export async function extractAgentCatalog(config) {
2
2
  const { AGENT_CATALOG } = await import("../../sdk/generated/agent-catalog.js");
3
3
  const documents = AGENT_CATALOG.map((action) => {
4
- const inputs = (action.inputs ?? []).map((i) => `${i.name}: ${i.type}`).join(", ");
5
- const outputs = (action.outputs ?? []).map((o) => `${o.name}: ${o.type}`).join(", ");
4
+ const inputs = (action.inputs ?? []).map((i) => `${i.name} (${i.type})${i.description ? `: ${i.description}` : ""}`).join("; ");
5
+ const outputs = (action.outputs ?? []).map((o) => `${o.name} (${o.type})${o.description ? `: ${o.description}` : ""}`).join("; ");
6
+ const criticalRules = (action.criticalRules ?? []).map((r) => `- ${r}`).join("\n");
7
+ // Build prose content optimized for semantic search.
8
+ // Repeat the actionName in natural language so DE embeddings match queries like "call_llm".
6
9
  const content = [
7
- action.displayName,
8
- action.description,
9
- action.whenToUse,
10
- inputs ? `Inputs: ${inputs}` : "",
11
- outputs ? `Outputs: ${outputs}` : "",
12
- action.aliases?.length ? `Aliases: ${action.aliases.join(", ")}` : "",
10
+ `# ${action.actionName} — ${action.displayName}`,
11
+ "",
12
+ `The ${action.actionName} action (also known as "${action.displayName}") ${action.description?.toLowerCase() ?? ""}`,
13
+ action.whenToUse ? `\nUse ${action.actionName} when: ${action.whenToUse}` : "",
14
+ action.whenNotToUse ? `\nDo NOT use ${action.actionName} when: ${action.whenNotToUse}` : "",
15
+ inputs ? `\n## Inputs for ${action.actionName}\n${inputs}` : "",
16
+ outputs ? `\n## Outputs from ${action.actionName}\n${outputs}` : "",
17
+ criticalRules ? `\n## Critical rules for ${action.actionName}\n${criticalRules}` : "",
18
+ action.aliases?.length ? `\nAlso known as: ${action.aliases.join(", ")}` : "",
13
19
  ].filter(Boolean).join("\n");
14
20
  return {
15
21
  id: `entity:${action.actionName}`,
@@ -53,7 +53,7 @@ export function computeConfidenceScore(provenance, feedbackDelta, boost) {
53
53
  const floor = Math.max(0, base - MAX_NEGATIVE_DRIFT);
54
54
  return Math.max(floor, Math.min(1.0, adjusted));
55
55
  }
56
- /** Per-event feedback deltas (used by runtime confidence-loop) */
56
+ /** Per-event feedback deltas (used by runtime confidence-loop for backward compat) */
57
57
  export const FEEDBACK_DELTA_NEGATIVE = -0.08;
58
58
  export const FEEDBACK_DELTA_POSITIVE = 0.04;
59
59
  /** Maximum boost above provenance base from positive feedback */
@@ -62,6 +62,12 @@ export const MAX_POSITIVE_BOOST = 0.15;
62
62
  export const MAX_NEGATIVE_DRIFT = 0.30;
63
63
  /** Minimum score delta to trigger a DE update (avoids churn) */
64
64
  export const MIN_SCORE_DELTA = 0.05;
65
+ /** Minimum unique clients required before score changes take effect */
66
+ export const MIN_CORROBORATION_CLIENTS = 2;
67
+ /** Evidence-based feedback gets stronger delta multiplier */
68
+ export const EVIDENCE_MULTIPLIER = 1.5; // deploy failure = hard evidence
69
+ /** Soft feedback gets weaker delta multiplier */
70
+ export const SOFT_MULTIPLIER = 0.5; // "this seems wrong" without evidence
65
71
  /** Score thresholds for label assignment — single source of truth */
66
72
  export const LABEL_THRESHOLDS = {
67
73
  verified: 0.80, // >= 0.80
@@ -79,19 +85,134 @@ export function scoreToLabel(score) {
79
85
  return "inferred";
80
86
  return "low-confidence";
81
87
  }
88
+ /**
89
+ * Compute confidence adjustment based on the ratio of negative to total feedback.
90
+ *
91
+ * Key principle: a doc with 500 positives and 5 negatives (1% negative) should NOT
92
+ * be downgraded — the 5 are likely confused agents, not a real problem.
93
+ *
94
+ * The ratio determines the direction. The total count determines the strength.
95
+ * Both matter: low ratio + high count = strong confidence. High ratio + low count = weak signal.
96
+ *
97
+ * @param negativeCount - Total negative feedback events
98
+ * @param positiveCount - Total positive feedback events
99
+ * @param uniqueClients - Number of distinct clients (for corroboration)
100
+ * @returns Confidence delta to apply (negative = downgrade, positive = upgrade)
101
+ */
102
+ export function computeFeedbackDelta(negativeCount, positiveCount, uniqueClients = 1) {
103
+ const total = negativeCount + positiveCount;
104
+ if (total === 0)
105
+ return 0;
106
+ const negativeRatio = negativeCount / total;
107
+ // Confidence bands based on negative ratio
108
+ // High negative ratio = downgrade, low ratio = upgrade, middle = neutral
109
+ let baseDelta;
110
+ if (negativeRatio >= 0.80) {
111
+ // Overwhelmingly negative — strong downgrade
112
+ baseDelta = -0.15;
113
+ }
114
+ else if (negativeRatio >= 0.60) {
115
+ // Mostly negative — moderate downgrade
116
+ baseDelta = -0.10;
117
+ }
118
+ else if (negativeRatio >= 0.40) {
119
+ // Mixed signals — slight downgrade (benefit of doubt to negative)
120
+ baseDelta = -0.05;
121
+ }
122
+ else if (negativeRatio >= 0.20) {
123
+ // Mostly positive with some complaints — neutral/slight upgrade
124
+ baseDelta = 0.02;
125
+ }
126
+ else {
127
+ // Overwhelmingly positive — upgrade
128
+ baseDelta = 0.05;
129
+ }
130
+ // Volume amplifier — more total feedback = more confidence in the signal
131
+ // But with diminishing returns (log scale)
132
+ const volumeMultiplier = Math.min(2.0, 1.0 + Math.log2(Math.max(1, total)) * 0.15);
133
+ // Corroboration amplifier — multiple independent clients agreeing is stronger
134
+ const corroborationMultiplier = Math.min(1.5, 1.0 + Math.max(0, uniqueClients - 1) * 0.1);
135
+ return baseDelta * volumeMultiplier * corroborationMultiplier;
136
+ }
137
+ /**
138
+ * Compute effective floor — allows breakthrough under sustained, high-ratio negative feedback.
139
+ *
140
+ * The base floor (provenance - MAX_NEGATIVE_DRIFT) protects against noise.
141
+ * The floor only gives way when: high negative ratio AND sufficient volume.
142
+ * This prevents a single bad feedback from breaking the floor.
143
+ */
144
+ export function effectiveFloor(provenanceBase, negativeRatio, totalCount) {
145
+ const baseFloor = Math.max(0, provenanceBase - MAX_NEGATIVE_DRIFT);
146
+ // Floor only gives way when: high negative ratio AND sufficient volume
147
+ if (negativeRatio < 0.70 || totalCount < 5)
148
+ return baseFloor;
149
+ // Beyond threshold: floor lowers proportionally to how negative the ratio is
150
+ const floorReduction = (negativeRatio - 0.70) * totalCount * 0.01;
151
+ return Math.max(0.10, baseFloor - floorReduction);
152
+ }
153
+ // ── Legacy graduated functions (kept for backward compat imports) ────────────
154
+ /** @deprecated Use computeFeedbackDelta instead */
155
+ export function graduatedNegativeDelta(negativeCount) {
156
+ if (negativeCount >= 8)
157
+ return -0.14;
158
+ if (negativeCount >= 5)
159
+ return -0.12;
160
+ if (negativeCount >= 3)
161
+ return -0.08;
162
+ if (negativeCount >= 2)
163
+ return -0.06;
164
+ return -0.04;
165
+ }
166
+ /** @deprecated Use computeFeedbackDelta instead */
167
+ export function graduatedPositiveDelta(positiveCount) {
168
+ if (positiveCount >= 5)
169
+ return 0.06;
170
+ if (positiveCount >= 3)
171
+ return 0.04;
172
+ return 0.03;
173
+ }
82
174
  /**
83
175
  * Compute a per-event feedback delta and apply to current score.
84
176
  * Used by the runtime confidence loop when a single feedback event arrives.
85
177
  *
86
- * @returns { newScore, label } clamped to [0, provenanceBase + MAX_POSITIVE_BOOST]
178
+ * When feedbackHistory is provided, uses graduated deltas that accelerate
179
+ * with corroboration. Without feedbackHistory, uses flat deltas for
180
+ * backward compatibility.
181
+ *
182
+ * @returns { newScore, label } — clamped to [floor, provenanceBase + MAX_POSITIVE_BOOST]
87
183
  */
88
- export function applyFeedbackDelta(currentScore, provenance, isNegative) {
89
- const delta = isNegative ? FEEDBACK_DELTA_NEGATIVE : FEEDBACK_DELTA_POSITIVE;
184
+ export function applyFeedbackDelta(currentScore, provenance, isNegative, feedbackHistory) {
90
185
  const provenanceBase = PROVENANCE_BASE_SCORES[provenance] ?? PROVENANCE_BASE_SCORES["inferred"];
91
186
  const maxScore = provenanceBase + MAX_POSITIVE_BOOST;
92
- const minScore = Math.max(0, provenanceBase - MAX_NEGATIVE_DRIFT);
93
- const newScore = Math.max(minScore, Math.min(maxScore, currentScore + delta));
94
- return { newScore, label: scoreToLabel(newScore) };
187
+ let delta;
188
+ let minScore;
189
+ if (feedbackHistory) {
190
+ // Ratio-based model — direction from ratio, strength from volume
191
+ const { negativeCount, positiveCount, uniqueClients } = feedbackHistory;
192
+ // Add the current event to history for calculation
193
+ const adjNeg = isNegative ? negativeCount + 1 : negativeCount;
194
+ const adjPos = isNegative ? positiveCount : positiveCount + 1;
195
+ const total = adjNeg + adjPos;
196
+ const negRatio = total > 0 ? adjNeg / total : 0;
197
+ delta = computeFeedbackDelta(adjNeg, adjPos, uniqueClients);
198
+ // Apply delta relative to provenance base, not current score
199
+ const targetScore = provenanceBase + delta;
200
+ // Move current score toward target (don't jump, converge)
201
+ const moveRate = 0.3; // converge 30% toward target per event
202
+ const newScore = currentScore + (targetScore - currentScore) * moveRate;
203
+ minScore = effectiveFloor(provenanceBase, negRatio, total);
204
+ return {
205
+ newScore: Math.max(minScore, Math.min(maxScore, newScore)),
206
+ label: scoreToLabel(Math.max(minScore, Math.min(maxScore, newScore))),
207
+ };
208
+ }
209
+ else {
210
+ // Legacy flat model for backward compatibility
211
+ delta = isNegative ? FEEDBACK_DELTA_NEGATIVE : FEEDBACK_DELTA_POSITIVE;
212
+ minScore = Math.max(0, provenanceBase - MAX_NEGATIVE_DRIFT);
213
+ const newScore = Math.max(minScore, Math.min(maxScore, currentScore + delta));
214
+ return { newScore, label: scoreToLabel(newScore) };
215
+ }
95
216
  }
96
217
  /**
97
218
  * Feedback signal classification — single source of truth.
@@ -281,25 +402,16 @@ function buildReport(source, totalEntries, correlated, signalMap) {
281
402
  const signals = [];
282
403
  const lowConfidence = [];
283
404
  for (const [docId, signal] of signalMap) {
284
- const netNegative = signal.negative - signal.positive;
285
- let delta;
286
- // Stepped delta tiers based on aggregate feedback count
287
- if (netNegative >= 5) {
288
- delta = -0.40;
289
- }
290
- else if (netNegative >= 3) {
291
- delta = -0.25;
292
- }
293
- else if (netNegative >= 1) {
294
- delta = -0.10;
295
- }
296
- else {
297
- delta = 0;
298
- }
299
- // Estimate label from score using "curated" as a conservative baseline.
405
+ // Ratio-based delta direction from ratio, strength from volume
406
+ const total = signal.negative + signal.positive;
407
+ const negRatio = total > 0 ? signal.negative / total : 0;
408
+ const delta = computeFeedbackDelta(signal.negative, signal.positive);
409
+ // Estimate label using "curated" baseline + adaptive floor.
300
410
  // This is advisory — applyConfidenceSignals() recomputes with actual provenance.
301
- // For low-provenance docs (raw-document, inferred), the real label may differ.
302
- const estimatedScore = computeConfidenceScore("curated", delta);
411
+ const provenanceBase = PROVENANCE_BASE_SCORES["curated"];
412
+ const maxScore = provenanceBase + MAX_POSITIVE_BOOST;
413
+ const minScore = effectiveFloor(provenanceBase, negRatio, total);
414
+ const estimatedScore = Math.max(minScore, Math.min(maxScore, provenanceBase + delta));
303
415
  const label = scoreToLabel(estimatedScore);
304
416
  if (label === "low-confidence") {
305
417
  lowConfidence.push(docId);
@@ -478,9 +478,22 @@ async function searchDirect(query, options) {
478
478
  // Dynamic domain boost — if query signals a specific platform, boost its domain
479
479
  // and demote the other. DE serves both platforms; this keeps results focused.
480
480
  const queryBoost = buildQueryBoostSpec(query, filters);
481
- if (queryBoost) {
482
- body.boostSpec = queryBoost;
483
- }
481
+ // Confidence boost — always applied. Verified docs rank higher, low-confidence lower.
482
+ // This makes the feedback loop visible at search time: downgraded docs get demoted
483
+ // regardless of relevance. DE boost values are additive to relevance score.
484
+ // Values calibrated against signal viewer: semantic relevance spreads 0.07-0.99,
485
+ // so boosts must be large enough to move docs across that range.
486
+ const confidenceBoosts = [
487
+ { condition: 'confidence: ANY("verified")', boost: 0.5 },
488
+ { condition: 'confidence: ANY("inferred")', boost: -0.2 },
489
+ { condition: 'confidence: ANY("low-confidence")', boost: -0.8 },
490
+ ];
491
+ const querySpecs = (queryBoost?.conditionBoostSpecs ?? []);
492
+ const allBoosts = [
493
+ ...querySpecs,
494
+ ...confidenceBoosts,
495
+ ];
496
+ body.boostSpec = { conditionBoostSpecs: allBoosts };
484
497
  // Always request snippets — works with chunked datastores.
485
498
  // (Extractive answers do NOT work with chunking, only snippets.)
486
499
  // For answer mode, also request summary with citations.
@@ -35,7 +35,7 @@ export const WORKFLOW_DEF_SCHEMA = {
35
35
  namespaces: {
36
36
  type: "array",
37
37
  items: { type: "string" },
38
- description: "Namespace path (e.g., ['ema', 'personas', '<id>'])",
38
+ description: "Namespace path MUST be copied exactly from workflow(mode='get') response. Do NOT construct manually.",
39
39
  },
40
40
  name: {
41
41
  type: "string",
@@ -149,8 +149,8 @@ function validateEnumTypes(wf, issues) {
149
149
  }
150
150
  enumNames.add(name.name);
151
151
  }
152
- // Validate options array
153
- const options = et.options;
152
+ // Validate options/values array — proto uses "options", compiled proto uses "values"
153
+ const options = (et.options ?? et.values);
154
154
  if (!Array.isArray(options) || options.length === 0) {
155
155
  issues.push({
156
156
  path: `${prefix}.options`,
@@ -40,16 +40,17 @@ export function classifyResult(result, unfilteredCount) {
40
40
  return "error_500";
41
41
  return "error";
42
42
  }
43
- // Success shapes
43
+ // Success shapes — order matters: check deploy before created,
44
+ // because deploy results also carry persona_id but aren't "created".
45
+ if (result.deployed === true || result.workflow_deployed === true || (result.mode === "deploy" && status === "deployed")) {
46
+ return "deployed";
47
+ }
44
48
  if (result.success === true || result.persona_id) {
45
49
  // Created entity
46
50
  if (result.persona_id && !result.workflow_def) {
47
51
  return "created";
48
52
  }
49
53
  }
50
- if (result.deployed === true || (result.mode === "deploy" && !error)) {
51
- return "deployed";
52
- }
53
54
  // List shapes — check count
54
55
  const count = typeof result.count === "number" ? result.count : undefined;
55
56
  if (count !== undefined) {
@@ -30,7 +30,8 @@ export function getDefaultGuidance(shape, ctx) {
30
30
  };
31
31
  case "deployed":
32
32
  return {
33
- _next_step: "Verify: workflow(mode='get', persona_id='{persona_id}') confirm workflow is active.",
33
+ _next_step: "Test your deployed workflow: conversation(method='create', persona_id='{persona_id}') for chat, or upload documents via persona(id='{persona_id}', data={method:'upload', path:'/path/to/doc.pdf'}) for dashboard.",
34
+ _tip: "Deployed successfully. The workflow is now active.",
34
35
  };
35
36
  case "deploy_failed":
36
37
  return {
@@ -119,6 +119,7 @@ function generateDecisionFlow(tools) {
119
119
  2. \`knowledge("workflow patterns for <your use case>")\` → learn the correct workflow pattern
120
120
  3. \`${createPersona}\` → creates persona
121
121
  4. \`${getWorkflow}\` → get starter workflow + generation schema (FULL input/output specs from API) + fingerprint
122
+ Use \`compact=true\` for a smaller response (workflowName + fingerprint + workflow_def only, no schema).
122
123
  5. Build a complete workflow_def using the generation schema — it shows ALL required inputs per action
123
124
  6. Upload data sources if needed — \`persona(id="<new_id>", data={method:"upload", path:"/path/to/doc.pdf"})\`
124
125
  7. \`workflow(mode="validate", persona_id="...", workflow_def={...})\` → catch errors BEFORE deploying
@@ -131,7 +132,7 @@ function generateDecisionFlow(tools) {
131
132
  const get = opExample("workflow", "Get");
132
133
  const deploy = opExample("workflow", "Deploy");
133
134
  sections.push(`**Modifying an existing AI Employee's workflow?**
134
- 1. \`${get}\` → get current workflow_def + schema + fingerprint
135
+ 1. \`${get}\` → get current workflow_def + schema + fingerprint (use \`compact=true\` for smaller response)
135
136
  2. LLM modifies the workflow_def JSON (use the returned workflow_def as format reference)
136
137
  3. \`workflow(mode="validate", persona_id="...", workflow_def={...})\` → catch errors before deploying
137
138
  4. \`${deploy}\``);
@@ -16,7 +16,7 @@
16
16
  */
17
17
  import { getDocument, upsertDocument } from "../../../knowledge/search-client.js";
18
18
  import { sanitizeId } from "../../../knowledge/pipeline/document.js";
19
- import { CATEGORY_SIGNAL, FeedbackSignal, MIN_SCORE_DELTA, applyFeedbackDelta, } from "../../../knowledge/pipeline/confidence.js";
19
+ import { PROVENANCE_BASE_SCORES, CATEGORY_SIGNAL, FeedbackSignal, MIN_SCORE_DELTA, MIN_CORROBORATION_CLIENTS, EVIDENCE_MULTIPLIER, MAX_POSITIVE_BOOST, applyFeedbackDelta, effectiveFloor, scoreToLabel, } from "../../../knowledge/pipeline/confidence.js";
20
20
  import { getOrCreateClientId } from "../feedback/client-id.js";
21
21
  // ─────────────────────────────────────────────────────────────────────────────
22
22
  // Guard constants
@@ -38,18 +38,36 @@ export const SESSION_UPDATE_CAP = 50;
38
38
  const cooldownMap = new Map();
39
39
  /** Session-wide update counter */
40
40
  let sessionUpdateCount = 0;
41
+ /** Per-document feedback history for graduated scoring */
42
+ const feedbackHistoryMap = new Map();
43
+ /** Classify feedback strength based on context */
44
+ export function classifyEvidence(category, context) {
45
+ // Deploy failures are hard evidence
46
+ if (context?.includes("deploy_failure"))
47
+ return "hard";
48
+ if (category === "correction" && context?.includes("deploy"))
49
+ return "hard";
50
+ // Explicit corrections with knowledge_ref are medium-hard
51
+ if (category === "correction")
52
+ return "hard";
53
+ // Everything else is soft signal
54
+ return "soft";
55
+ }
41
56
  /** Reset all guard state (for test isolation) */
42
57
  export function _resetGuardState() {
43
58
  cooldownMap.clear();
44
59
  sessionUpdateCount = 0;
60
+ feedbackHistoryMap.clear();
45
61
  }
46
62
  /**
47
63
  * Process a feedback entry and update DE document confidence if applicable.
48
64
  * Returns the update details if a document was modified, undefined otherwise.
49
65
  *
50
66
  * Best-effort: never throws. Failures are logged but don't block feedback submission.
67
+ *
68
+ * @param context - Optional context string for evidence classification (e.g., "deploy_failure")
51
69
  */
52
- export async function processConfidenceFeedback(category, knowledgeRef, qualityData) {
70
+ export async function processConfidenceFeedback(category, knowledgeRef, qualityData, context) {
53
71
  // Classify using the semantic signal map (single source of truth)
54
72
  const signal = CATEGORY_SIGNAL[category];
55
73
  if (!signal || signal === FeedbackSignal.NEUTRAL)
@@ -75,9 +93,10 @@ export async function processConfidenceFeedback(category, knowledgeRef, qualityD
75
93
  }
76
94
  // ── Guard: per-document cooldown ────────────────────────────────────────
77
95
  const docId = sanitizeId(knowledgeRef);
96
+ let clientId = "unknown";
78
97
  if (getCooldownWindow() > 0) {
79
98
  try {
80
- const clientId = await getOrCreateClientId();
99
+ clientId = await getOrCreateClientId();
81
100
  const cooldownKey = `${clientId}:${docId}`;
82
101
  const lastUpdate = cooldownMap.get(cooldownKey);
83
102
  const now = Date.now();
@@ -89,6 +108,41 @@ export async function processConfidenceFeedback(category, knowledgeRef, qualityD
89
108
  // Best-effort — if client ID fails, skip cooldown check
90
109
  }
91
110
  }
111
+ else {
112
+ try {
113
+ clientId = await getOrCreateClientId();
114
+ }
115
+ catch {
116
+ // Best-effort
117
+ }
118
+ }
119
+ // ── Track feedback history ──────────────────────────────────────────────
120
+ let accumulator = feedbackHistoryMap.get(docId);
121
+ if (!accumulator) {
122
+ accumulator = { negativeCount: 0, positiveCount: 0, uniqueClients: new Set(), lastUpdated: Date.now() };
123
+ feedbackHistoryMap.set(docId, accumulator);
124
+ }
125
+ // Snapshot BEFORE incrementing — applyFeedbackDelta adds the current event internally
126
+ const feedbackHistorySnapshot = {
127
+ negativeCount: accumulator.negativeCount,
128
+ positiveCount: accumulator.positiveCount,
129
+ uniqueClients: accumulator.uniqueClients.size + (accumulator.uniqueClients.has(clientId) ? 0 : 1),
130
+ };
131
+ if (isNegative)
132
+ accumulator.negativeCount++;
133
+ if (isPositive)
134
+ accumulator.positiveCount++;
135
+ accumulator.uniqueClients.add(clientId);
136
+ accumulator.lastUpdated = Date.now();
137
+ // ── Evidence classification ─────────────────────────────────────────────
138
+ const evidence = classifyEvidence(category, context);
139
+ const isHardEvidence = evidence === "hard";
140
+ // ── Corroboration check ─────────────────────────────────────────────────
141
+ // Require multiple independent clients unless hard evidence (deploy failure)
142
+ if (!isHardEvidence && accumulator.uniqueClients.size < MIN_CORROBORATION_CLIENTS) {
143
+ // Accumulate count but defer score change — single source could be noise
144
+ return undefined;
145
+ }
92
146
  try {
93
147
  // Look up the document — sanitize ID to match DE storage format (colons → underscores)
94
148
  const docResult = await getDocument(docId);
@@ -97,11 +151,27 @@ export async function processConfidenceFeedback(category, knowledgeRef, qualityD
97
151
  const structData = (docResult.document.structData ?? {});
98
152
  const currentScore = structData.confidence_score ?? 0.5;
99
153
  const provenance = structData.provenance ?? "inferred";
100
- // Compute new score using shared model (same constants + label function as batch path)
101
- const { newScore, label: newConfidence } = applyFeedbackDelta(currentScore, provenance, isNegative);
154
+ // Compute new score using ratio-based model
155
+ // feedbackHistorySnapshot has counts BEFORE this event; applyFeedbackDelta adds the current event
156
+ const { newScore: rawNewScore } = applyFeedbackDelta(currentScore, provenance, isNegative, feedbackHistorySnapshot);
157
+ // Apply evidence multiplier — hard evidence moves score faster
158
+ let newScore = rawNewScore;
159
+ if (isHardEvidence && rawNewScore !== currentScore) {
160
+ const rawDelta = rawNewScore - currentScore;
161
+ const amplifiedDelta = rawDelta * EVIDENCE_MULTIPLIER;
162
+ // Re-clamp after amplification
163
+ const provenanceBase = PROVENANCE_BASE_SCORES[provenance] ?? PROVENANCE_BASE_SCORES["inferred"];
164
+ const total = accumulator.negativeCount + accumulator.positiveCount;
165
+ const negRatio = total > 0 ? accumulator.negativeCount / total : 0;
166
+ const minScore = effectiveFloor(provenanceBase, negRatio, total);
167
+ const maxScore = provenanceBase + MAX_POSITIVE_BOOST;
168
+ newScore = Math.max(minScore, Math.min(maxScore, currentScore + amplifiedDelta));
169
+ }
102
170
  // Skip if change is too small
103
171
  if (Math.abs(newScore - currentScore) < MIN_SCORE_DELTA)
104
172
  return undefined;
173
+ // Compute final label
174
+ const finalLabel = scoreToLabel(newScore);
105
175
  // Update the document in DE with new confidence
106
176
  // Note: DE only supports top-level field masks, not sub-field paths within structData
107
177
  const updatedDoc = {
@@ -109,11 +179,11 @@ export async function processConfidenceFeedback(category, knowledgeRef, qualityD
109
179
  structData: {
110
180
  ...structData,
111
181
  confidence_score: newScore,
112
- confidence: newConfidence,
182
+ confidence: finalLabel,
113
183
  confidence_updated_at: new Date().toISOString(),
114
184
  confidence_reason: isNegative
115
- ? `Downgraded: ${category} feedback`
116
- : `Upgraded: ${category} feedback`,
185
+ ? `Downgraded: ${category} feedback (${evidence} evidence, ${accumulator.uniqueClients.size} clients)`
186
+ : `Upgraded: ${category} feedback (${accumulator.uniqueClients.size} clients)`,
117
187
  },
118
188
  };
119
189
  const result = await upsertDocument(updatedDoc, {
@@ -127,7 +197,6 @@ export async function processConfidenceFeedback(category, knowledgeRef, qualityD
127
197
  sessionUpdateCount++;
128
198
  if (getCooldownWindow() > 0) {
129
199
  try {
130
- const clientId = await getOrCreateClientId();
131
200
  cooldownMap.set(`${clientId}:${docId}`, Date.now());
132
201
  }
133
202
  catch {
@@ -138,8 +207,8 @@ export async function processConfidenceFeedback(category, knowledgeRef, qualityD
138
207
  documentId: knowledgeRef,
139
208
  previousScore: Math.round(currentScore * 1000) / 1000,
140
209
  newScore: Math.round(newScore * 1000) / 1000,
141
- newConfidence,
142
- reason: `${isNegative ? "Downgraded" : "Upgraded"} by ${category} feedback`,
210
+ newConfidence: finalLabel,
211
+ reason: `${isNegative ? "Downgraded" : "Upgraded"} by ${category} feedback (${evidence} evidence, ${accumulator.uniqueClients.size} clients)`,
143
212
  };
144
213
  }
145
214
  catch (err) {
@@ -18,7 +18,7 @@
18
18
  * )
19
19
  * ```
20
20
  */
21
- import { resolvePersona, getTemplates, getPersonaTypeFromTemplate, sanitizePersonaById, } from "../utils.js";
21
+ import { resolvePersona, getTemplates, getPersonaTypeFromTemplate, normalizeTriggerType, sanitizePersonaById, } from "../utils.js";
22
22
  import { sanitizeWidgets } from "../../../sdk/proto-config.js";
23
23
  import { validateWorkflowOutputs } from "../workflow/validate-outputs.js";
24
24
  import { SanitizationSession, detectWithPatterns, } from "../../domain/sanitizer.js";
@@ -62,6 +62,7 @@ export async function handleCreate(args, client, getTemplateId) {
62
62
  const includeData = args.include_data ?? args.clone_data;
63
63
  // Resolve source - could be template or persona
64
64
  let templateId;
65
+ let templateTriggerType; // normalized trigger_type from template
65
66
  let sourcePersonaId;
66
67
  let sourcePersona = null;
67
68
  let sourcePersonaType;
@@ -123,6 +124,7 @@ export async function handleCreate(args, client, getTemplateId) {
123
124
  }
124
125
  if (template) {
125
126
  templateId = template.id;
127
+ templateTriggerType = normalizeTriggerType(template.trigger_type);
126
128
  fromType = "template";
127
129
  }
128
130
  }
@@ -149,6 +151,14 @@ export async function handleCreate(args, client, getTemplateId) {
149
151
  _tip: "Use from='<template_id>' with an ID from the list above, or type='voice|chat|dashboard' for a standard template.",
150
152
  };
151
153
  }
154
+ // Resolve trigger_type from template if not already set (deprecated template_id or args.type paths)
155
+ if (fromType === "template" && templateId && !templateTriggerType) {
156
+ const templates = await getTemplates(client);
157
+ const resolvedTemplate = templates.find(t => t.id === templateId);
158
+ if (resolvedTemplate) {
159
+ templateTriggerType = normalizeTriggerType(resolvedTemplate.trigger_type);
160
+ }
161
+ }
152
162
  // For persona cloning, default include_data to true
153
163
  const effectiveIncludeData = sourcePersonaId ? (includeData ?? true) : false;
154
164
  // API requires EITHER template_id OR source_persona_id, NOT both
@@ -206,6 +216,35 @@ export async function handleCreate(args, client, getTemplateId) {
206
216
  // The workflow may not be immediately visible via getPersonaById due to async processing,
207
217
  // but it IS copied. Don't emit false "workflow not copied" warnings.
208
218
  const workflowClonedByApi = !!sourcePersonaId;
219
+ // Extract workflowName so agents can deploy without calling workflow(mode="get")
220
+ // (which returns 176K-301K chars). Best-effort: never fail the create over this.
221
+ // The persona GET may not include workflow_def immediately after creation (async copy),
222
+ // so we try but also provide a lightweight fallback hint.
223
+ let workflowName;
224
+ let hasVoiceWidgets = false;
225
+ try {
226
+ // Small delay to allow workflow copy to propagate
227
+ await new Promise(r => setTimeout(r, 500));
228
+ const newPersonaForWf = await client.getPersonaById(newPersonaId);
229
+ const wfDef = newPersonaForWf?.workflow_def;
230
+ if (wfDef && typeof wfDef === "object" && "workflowName" in wfDef) {
231
+ workflowName = wfDef.workflowName;
232
+ }
233
+ // Detect voice: explicit type arg or voiceSettings widget presence
234
+ // Voice personas use chat trigger_type (1) but have voiceSettings widgets
235
+ if (args.type?.toLowerCase() === "voice") {
236
+ hasVoiceWidgets = true;
237
+ }
238
+ else {
239
+ const widgets = newPersonaForWf?.proto_config?.widgets;
240
+ if (Array.isArray(widgets)) {
241
+ hasVoiceWidgets = widgets.some((w) => w?.name === "voiceSettings");
242
+ }
243
+ }
244
+ }
245
+ catch {
246
+ // Best-effort — don't fail create if we can't fetch workflowName
247
+ }
209
248
  // Apply workflow_def if provided (the create API doesn't accept workflow directly)
210
249
  const workflowDef = args.workflow_def;
211
250
  let workflowApplied = false;
@@ -260,12 +299,15 @@ export async function handleCreate(args, client, getTemplateId) {
260
299
  workflowApplied,
261
300
  workflowDef,
262
301
  workflowError,
302
+ workflowName,
263
303
  sourcePersonaType,
264
304
  dashboardCloneResult,
265
305
  actionsError: validation.errors.join("; "),
266
306
  createdFromTemplate: fromType === "template",
267
307
  clonedFromPersona: fromType === "persona",
268
308
  workflowOutputWarnings,
309
+ templateTriggerType,
310
+ hasVoiceWidgets,
269
311
  });
270
312
  }
271
313
  // Build execution context
@@ -287,11 +329,14 @@ export async function handleCreate(args, client, getTemplateId) {
287
329
  workflowDef,
288
330
  workflowError,
289
331
  workflowOutputWarnings,
332
+ workflowName,
290
333
  sourcePersonaType,
291
334
  // Don't include dashboardCloneResult - actions handle data operations
292
335
  actionsResult,
293
336
  createdFromTemplate: fromType === "template",
294
337
  clonedFromPersona: fromType === "persona",
338
+ templateTriggerType,
339
+ hasVoiceWidgets,
295
340
  });
296
341
  }
297
342
  // ═══════════════════════════════════════════════════════════════════════════
@@ -313,10 +358,13 @@ export async function handleCreate(args, client, getTemplateId) {
313
358
  workflowDef,
314
359
  workflowError,
315
360
  workflowOutputWarnings,
361
+ workflowName,
316
362
  sourcePersonaType,
317
363
  dashboardCloneResult,
318
364
  createdFromTemplate: fromType === "template",
319
365
  clonedFromPersona: fromType === "persona",
366
+ templateTriggerType,
367
+ hasVoiceWidgets,
320
368
  });
321
369
  }
322
370
  return buildCreateResult({
@@ -327,12 +375,74 @@ export async function handleCreate(args, client, getTemplateId) {
327
375
  workflowDef,
328
376
  workflowError,
329
377
  workflowOutputWarnings,
378
+ workflowName,
330
379
  sourcePersonaType,
331
380
  dashboardCloneResult,
332
381
  createdFromTemplate: fromType === "template",
333
382
  clonedFromPersona: fromType === "persona",
383
+ templateTriggerType,
384
+ hasVoiceWidgets,
334
385
  });
335
386
  }
387
+ /**
388
+ * Return type-specific workflow next-steps so agents get relevant guidance.
389
+ *
390
+ * Without this, every persona type gets chat-oriented instructions
391
+ * ("add intent categorization, search nodes") which confuses agents
392
+ * working on dashboards, doc-gen, or agent-QA personas.
393
+ */
394
+ function getTypeSpecificNextSteps(triggerType, personaId, hasVoiceWidgets) {
395
+ switch (triggerType) {
396
+ case "chat":
397
+ case "chatbot": {
398
+ const steps = [
399
+ "1. BUILD WORKFLOW: Add intent categorization, search nodes, response handling",
400
+ `2. If uploading docs: Workflow MUST have search/v2 node or documents will NOT be used`,
401
+ `3. Get current workflow: workflow(mode='get', persona_id='${personaId}')`,
402
+ `4. Deploy complete workflow: workflow(mode='deploy', persona_id='${personaId}', workflow_def={...})`,
403
+ ];
404
+ if (hasVoiceWidgets) {
405
+ steps.push("NOTE: Voice personas use chat_trigger (NOT voice_trigger). The voice_trigger is for a different workflow pattern.");
406
+ }
407
+ return steps;
408
+ }
409
+ case "dashboard":
410
+ return [
411
+ "1. BUILD WORKFLOW: Add entity_extraction_with_documents for document processing",
412
+ "2. Configure extraction_columns for the data you want to extract",
413
+ `3. Get current workflow: workflow(mode='get', persona_id='${personaId}')`,
414
+ `4. Deploy workflow: workflow(mode='deploy', persona_id='${personaId}', workflow_def={...})`,
415
+ ];
416
+ case "thread":
417
+ return [
418
+ "1. BUILD WORKFLOW: Add thread_categorizer, search nodes, response handling",
419
+ "2. This is a thread-based persona — workflows trigger on support tickets, not chat",
420
+ `3. Get current workflow: workflow(mode='get', persona_id='${personaId}')`,
421
+ `4. Deploy workflow: workflow(mode='deploy', persona_id='${personaId}', workflow_def={...})`,
422
+ ];
423
+ case "document_generation":
424
+ return [
425
+ "1. CHECK EXISTING WORKFLOW: The template may already include a functional document_synthesis workflow",
426
+ `2. Get current workflow: workflow(mode='get', persona_id='${personaId}') — check if it already has document_synthesis`,
427
+ "3. If functional: redeploy as-is. If not: add document_synthesis node with workflowInputs",
428
+ `4. Deploy workflow: workflow(mode='deploy', persona_id='${personaId}', workflow_def={...})`,
429
+ ];
430
+ case "agent_qa":
431
+ return [
432
+ "1. IMPORTANT: Agent QA workflows require UI configuration in the Ema platform",
433
+ "2. MCP-based workflow building is NOT yet fully supported for Agent QA",
434
+ "3. Configure QA parameters (contact reasons, scoring rubric) in the Ema UI",
435
+ `4. Use workflow(mode='get', persona_id='${personaId}') to inspect the current workflow after UI setup`,
436
+ ];
437
+ default:
438
+ // Fallback: generic guidance for unknown or new trigger types
439
+ return [
440
+ "1. BUILD WORKFLOW: Add the appropriate nodes for this persona type",
441
+ `2. Get current workflow: workflow(mode='get', persona_id='${personaId}')`,
442
+ `3. Deploy complete workflow: workflow(mode='deploy', persona_id='${personaId}', workflow_def={...})`,
443
+ ];
444
+ }
445
+ }
336
446
  /**
337
447
  * Build the create/clone result object
338
448
  */
@@ -341,6 +451,11 @@ function buildCreateResult(opts) {
341
451
  success: true,
342
452
  persona_id: opts.newPersonaId,
343
453
  name: opts.name,
454
+ // Include workflowName so agents can deploy without calling workflow(mode="get")
455
+ // which returns 176K-301K chars and is unusable in most contexts.
456
+ ...(opts.workflowName ? { workflowName: opts.workflowName } : {
457
+ _workflowName_hint: `workflowName not yet available (async copy in progress). Get it with: persona(id="${opts.newPersonaId}", include_workflow=true) — much smaller than workflow(mode="get").`,
458
+ }),
344
459
  };
345
460
  if (opts.sanitization) {
346
461
  result.sanitization = opts.sanitization;
@@ -377,14 +492,11 @@ function buildCreateResult(opts) {
377
492
  }
378
493
  // ── CRITICAL GUIDANCE: Template workflows are minimal starters ──
379
494
  // This is where LLMs often go wrong - they create from template and think they're done
495
+ // Provide type-specific next steps so agents get relevant guidance (not chat-oriented
496
+ // instructions for dashboard/docgen/agent-qa personas).
380
497
  if (opts.createdFromTemplate && !opts.workflowApplied) {
381
498
  result._warning = "PERSONA CREATED BUT WORKFLOW IS INCOMPLETE. Template workflows are minimal starters (just trigger→respond).";
382
- result._required_next_steps = [
383
- "1. BUILD WORKFLOW: Add intent categorization, search nodes, response handling",
384
- `2. If uploading docs: Workflow MUST have search/v2 node or documents will NOT be used`,
385
- `3. Get current workflow: workflow(mode='get', persona_id='${opts.newPersonaId}')`,
386
- `4. Deploy complete workflow: workflow(mode='deploy', persona_id='${opts.newPersonaId}', workflow_def={...})`,
387
- ];
499
+ result._required_next_steps = getTypeSpecificNextSteps(opts.templateTriggerType, opts.newPersonaId, opts.hasVoiceWidgets);
388
500
  result._common_mistake = "Creating from template, uploading docs, and declaring 'done' WITHOUT building the workflow. The deploy will now BLOCK this pattern.";
389
501
  }
390
502
  if (opts.dashboardCloneResult) {
@@ -15,8 +15,12 @@ import { PersonaTriggerTypeEnumLabels } from "../../sdk/generated/api-types.js";
15
15
  export function normalizeTriggerType(triggerType) {
16
16
  if (triggerType === undefined || triggerType === null)
17
17
  return undefined;
18
- // If already a string, normalize to lowercase
18
+ // If already a string, try parsing as number first (API sometimes returns "2" instead of 2)
19
19
  if (typeof triggerType === "string") {
20
+ const asNumber = Number(triggerType);
21
+ if (!isNaN(asNumber) && PersonaTriggerTypeEnumLabels[asNumber]) {
22
+ return PersonaTriggerTypeEnumLabels[asNumber];
23
+ }
20
24
  return triggerType.toLowerCase();
21
25
  }
22
26
  // Convert numeric trigger_type to label using generated mapping from OpenAPI
@@ -65,6 +65,8 @@ export async function handleWorkflowAdapter(args, createClient, getDefaultEnvNam
65
65
  return handleWorkflow({
66
66
  mode: "get",
67
67
  persona_id: personaId,
68
+ compact: normalizedArgs.compact,
69
+ slim: normalizedArgs.slim,
68
70
  env: normalizedArgs.env,
69
71
  }, client, () => undefined, cache);
70
72
  }
@@ -192,6 +192,29 @@ async function handleWorkflowGet(args, client, cache) {
192
192
  name: w.name,
193
193
  type: w.type,
194
194
  }));
195
+ // ── Compact mode: skip generation_schema, return only what agents need ──
196
+ const compact = args.compact === true;
197
+ if (compact) {
198
+ // Slim the workflow_def in compact mode (always — agents don't need displaySettings)
199
+ const compactWorkflowDef = workflowDef ? slimWorkflowDef(workflowDef) : null;
200
+ return {
201
+ persona_id: persona.id,
202
+ persona_name: persona.name,
203
+ persona_type: persona.type,
204
+ fingerprint: fingerprintPersona(persona),
205
+ workflow_def: compactWorkflowDef,
206
+ available_widgets: availableWidgets,
207
+ _compact: true,
208
+ _next_steps: [
209
+ "You have the current workflow_def and fingerprint.",
210
+ "Modify the workflow_def as needed.",
211
+ "Use knowledge('<action_name>') to look up any unfamiliar action's inputs/outputs.",
212
+ "Deploy with: workflow(mode='deploy', persona_id='...', base_fingerprint='<fingerprint>', workflow_def={...})",
213
+ ],
214
+ _tip: "Use compact=false (or omit compact) for the full generation_schema with all action I/O specs, constraints, and widget bindings. Use knowledge('<action_name>') for individual action specs.",
215
+ };
216
+ }
217
+ // ── Full mode (default): includes generation_schema + all guidance ──
195
218
  // Get generation schema for LLM — API-first + DE-first for structural invariants
196
219
  const schema = await generateSchema(client, cache);
197
220
  // Get deprecated actions (API-first, with fallback)
@@ -863,7 +863,35 @@ export function validateCategorizersFallback(workflowDef) {
863
863
  const catName = (cat.name ?? "").toLowerCase();
864
864
  return catName === "fallback" || catName === "other";
865
865
  });
866
- // Method 2: Check enumTypes for this categorizer (API format)
866
+ // Method 2a: Check typeArguments.categories.enumType reference (proto format)
867
+ // This is the STRUCTURAL link — the categorizer's typeArguments points to
868
+ // the exact enumType that defines its categories.
869
+ if (!hasFallback) {
870
+ const typeArgs = action.typeArguments;
871
+ const catArgs = typeArgs?.categories;
872
+ const enumTypeRef = catArgs?.enumType;
873
+ if (enumTypeRef) {
874
+ // Extract the enum name from the reference (can be nested)
875
+ let refEnumName = "";
876
+ const refName = enumTypeRef.name;
877
+ if (typeof refName === "string") {
878
+ refEnumName = refName;
879
+ }
880
+ else if (typeof refName === "object" && refName !== null) {
881
+ const nameObj = refName;
882
+ if (typeof nameObj.name === "string") {
883
+ refEnumName = nameObj.name;
884
+ }
885
+ }
886
+ if (refEnumName) {
887
+ const enumCategories = enumTypeCategoryMap.get(refEnumName.toLowerCase());
888
+ if (enumCategories && (enumCategories.has("fallback") || enumCategories.has("other"))) {
889
+ hasFallback = true;
890
+ }
891
+ }
892
+ }
893
+ }
894
+ // Method 2b: Heuristic name matching (API format fallback)
867
895
  // The enumType name often contains the categorizer node name
868
896
  // SECURITY: Only associate enumTypes that match this specific categorizer
869
897
  if (!hasFallback) {
@@ -879,10 +907,6 @@ export function validateCategorizersFallback(workflowDef) {
879
907
  }
880
908
  }
881
909
  }
882
- // NOTE: Method 3 REMOVED - was too permissive
883
- // Previous implementation checked if ANY enumType has Fallback, which incorrectly
884
- // marked ALL categorizers as having Fallback if ANY one did.
885
- // This caused false negatives (missed warnings) for categorizers without Fallback.
886
910
  if (!hasFallback) {
887
911
  categorizersWithoutFallback.push(nodeName);
888
912
  }
@@ -1437,16 +1437,21 @@ result types, categories, and tags. Zero-result responses suggest common entry p
1437
1437
 
1438
1438
  ## Feedback → Confidence Loop
1439
1439
  When feedback with \`knowledge_ref\` is submitted, the referenced document's confidence_score
1440
- is updated in DE in real-time. Negative feedback decreases score, positive increases it.
1441
- DE native boost controls (boost-verified, demote-inferred) then handle ranking automatically.
1440
+ is updated in DE using a ratio-based model. The score reflects the ratio of negative to total
1441
+ feedback a doc with 500 positives and 5 negatives stays healthy (1% negative = noise).
1442
+ Hard evidence (deploy failures, corrections) bypasses the corroboration gate; soft evidence
1443
+ (confusion, gaps) requires 2+ independent clients before applying. DE search boosts verified
1444
+ docs (+0.5) and demotes low-confidence docs (-0.8) at query time.
1442
1445
 
1443
1446
  Use \`feedback(method="submit", category="gap", knowledge_ref="doc-id", message="...")\`
1444
- to trigger the confidence loop.`,
1447
+ to trigger the confidence loop. For deploy-related feedback, include \`context="deploy_failure"\`
1448
+ for stronger signal weighting.`,
1445
1449
  status: "verified",
1446
1450
  criticalRules: [
1447
1451
  "_actions is machine-readable — agents can execute actions directly without parsing text",
1448
1452
  "Confidence loop is best-effort — feedback submission never fails due to confidence update errors",
1449
1453
  "Related queries are deterministic — computed from result metadata, no LLM call",
1454
+ "Hard evidence (deploy failures) applies immediately; soft evidence needs 2+ client corroboration",
1450
1455
  ],
1451
1456
  },
1452
1457
  };
@@ -923,6 +923,22 @@ See also: \`ema://rules/json-output-patterns\` for custom_agent/output_fields (s
923
923
  generate: async () => {
924
924
  return `# named_inputs Format (API Shape)
925
925
 
926
+ > **WARNING**: The \`named_inputs_<Name>\` suffix pattern is a UI LABEL convention only.
927
+ > The API wire format uses a SINGLE \`named_inputs\` key with all bindings inside one multiBinding.
928
+ > Using separate \`named_inputs_Search_Results\`, \`named_inputs_Conversation\` keys causes HTTP 500.
929
+ >
930
+ > Correct wire format:
931
+ > \`\`\`json
932
+ > "named_inputs": {
933
+ > "multiBinding": {
934
+ > "elements": [
935
+ > { "namedBinding": { "name": "Search_Results", "value": { "actionOutput": { ... } } } },
936
+ > { "namedBinding": { "name": "Conversation", "value": { "actionOutput": { ... } } } }
937
+ > ]
938
+ > }
939
+ > }
940
+ > \`\`\`
941
+
926
942
  ## Two Levels — Both Required
927
943
 
928
944
  \`named_inputs\` has TWO aspects that agents must get right:
@@ -1681,22 +1697,33 @@ A doc can have **high relevance but low confidence** (score=0.95, confidence_sco
1681
1697
 
1682
1698
  ---
1683
1699
 
1684
- ## Scoring Model
1700
+ ## Scoring Model (Ratio-Based)
1701
+
1702
+ Confidence is determined by the **ratio** of negative to total feedback, not absolute counts.
1703
+ A doc with 500 positives and 5 negatives (1% negative) stays healthy — the 5 are noise.
1685
1704
 
1686
1705
  \`\`\`
1687
- confidence_score = clamp(floor, max_score,
1688
- current_score + delta
1689
- )
1690
- where:
1691
- initial score = provenance_base (0.65–0.90)
1692
- delta = -0.08 per negative feedback, +0.04 per positive
1693
- max_score = provenance_base + 0.15 (caps positive inflation)
1694
- floor = provenance_base - ${MAX_NEGATIVE_DRIFT.toFixed(2)} (prevents catastrophic drops)
1706
+ negative_ratio = negative_count / (negative_count + positive_count)
1707
+ base_delta = f(ratio) # -0.15 at >=80% neg, +0.05 at <20% neg
1708
+ volume_amp = log2(total) * 0.15 # more feedback = stronger signal (capped 2x)
1709
+ corroboration_amp = (unique_clients - 1) * 0.1 # multiple clients = stronger (capped 1.5x)
1710
+ delta = base_delta * volume_amp * corroboration_amp
1711
+
1712
+ target = provenance_base + delta
1713
+ score converges 30% toward target per event (no jumps)
1695
1714
  \`\`\`
1696
1715
 
1697
- Deltas accumulate from the current score — the 3rd negative hits harder because it starts from an already-reduced score.
1716
+ **Adaptive floor**: Standard floor is provenance_base - ${MAX_NEGATIVE_DRIFT.toFixed(2)}.
1717
+ But when ratio >= 70% negative AND total >= 5, floor lowers — allowing truly bad docs
1718
+ to reach low-confidence. Absolute minimum is 0.10.
1719
+
1720
+ **Evidence classification**: Deploy failures (\`context="deploy_failure"\`) and corrections
1721
+ are "hard evidence" — they bypass the corroboration gate and get 1.5x delta multiplier.
1722
+ Confusion and gap reports are "soft evidence" — need 2+ independent clients before applying.
1698
1723
 
1699
- **Abuse prevention**: Per-document cooldown (max 1 update per doc per client per hour) and per-session cap (max 50 updates per server lifetime) prevent runaway feedback from destroying document confidence. The provenance floor ensures high-trust docs (code-derived, curated) can never drop into low-confidence territory from feedback alone.
1724
+ **Abuse prevention**: Per-document cooldown (max 1 update per doc per client per hour),
1725
+ per-session cap (max 50 updates), and corroboration requirement for soft evidence prevent
1726
+ single-agent spam from affecting scores.
1700
1727
 
1701
1728
  ---
1702
1729
 
@@ -2552,6 +2579,9 @@ This is a **deployable** raw workflow_def — copy and adapt.
2552
2579
  }
2553
2580
  \`\`\`
2554
2581
 
2582
+ > **NOTE**: For new deployments, use \`namedResults\` with \`namedResultsEnabled: true\` instead of legacy \`results\`.
2583
+ > The legacy \`results\` format may return "Workflow has no outputs" for newly created personas.
2584
+
2555
2585
  ---
2556
2586
 
2557
2587
  ## Deployment Flow
package/dist/mcp/tools.js CHANGED
@@ -578,6 +578,7 @@ Sync a persona between environments (dev, staging, prod). Always preview first.
578
578
 
579
579
  ## Get (return data for LLM to work with)
580
580
  - \`workflow(mode="get", persona_id="abc")\` - returns workflow_def, schema, patterns, deprecation warnings
581
+ - \`workflow(mode="get", persona_id="abc", compact=true)\` - compact response: workflowName + fingerprint + workflow_def only (no generation_schema). Use for modifications to existing workflows where you already know the actions.
581
582
  - LLM analyzes, compares, and generates workflows using this data
582
583
 
583
584
  ## Validate (static validation with path enumeration)
@@ -654,6 +655,10 @@ Note: \`workflow_def_path\` also works with mode=validate and mode=optimize.
654
655
  type: "number",
655
656
  description: "Timeout in milliseconds (for mode=validate). Default: 100",
656
657
  },
658
+ compact: {
659
+ type: "boolean",
660
+ description: "For mode=get: return ONLY workflowName, fingerprint, workflow_def, persona_type, available_widgets — no generation_schema, no guidance. Reduces response from ~176K to <20K chars. Use for modifications to existing workflows. Default: false",
661
+ },
657
662
  slim: {
658
663
  type: "boolean",
659
664
  description: "For mode=get: return slimmed workflow_def (strips displaySettings, truncates long inline values). Reduces ~60-70% for large workflows. Default: false",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ema.co/mcp-toolkit",
3
- "version": "2026.3.25-3",
3
+ "version": "2026.3.25-4",
4
4
  "description": "Ema AI Employee toolkit - MCP server, CLI, and SDK for managing AI Employees across environments",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",