@ema.co/mcp-toolkit 2026.3.25-4 → 2026.3.29-1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/config/tool-guidance.js +2 -2
- package/dist/knowledge/search-client.js +13 -2
- package/dist/knowledge/search-config.js +2 -1
- package/dist/mcp/handlers/feedback/index.js +32 -0
- package/dist/mcp/handlers/feedback/store.js +4 -0
- package/dist/mcp/handlers/knowledge/confidence-loop.js +10 -5
- package/dist/mcp/handlers/knowledge/index.js +23 -6
- package/dist/mcp/handlers/knowledge/outcome-feedback.js +205 -0
- package/dist/mcp/handlers/knowledge/session-state.js +110 -0
- package/dist/mcp/handlers/workflow/deploy.js +33 -0
- package/package.json +1 -1
|
@@ -176,13 +176,13 @@ export const TOOL_GUIDANCE = {
|
|
|
176
176
|
operations: [
|
|
177
177
|
{ name: "Get", description: "Fetch current workflow_def + generation schema", example: 'workflow(mode="get", persona_id="...")' },
|
|
178
178
|
{ name: "Get (slim)", description: "Fetch slimmed workflow_def for large workflows (strips displaySettings, truncates long values, ~60-70% smaller)", example: 'workflow(mode="get", persona_id="...", slim=true)' },
|
|
179
|
-
{ name: "Deploy", description: "Deploy LLM-generated workflow_def", example: 'workflow(mode="deploy", persona_id="...", workflow_def={...})' },
|
|
179
|
+
{ name: "Deploy", description: "Deploy LLM-generated workflow_def. Deploy outcomes automatically feed knowledge quality — failures demote consulted docs. Test with conversation() after deploy to validate intent alignment.", example: 'workflow(mode="deploy", persona_id="...", workflow_def={...})' },
|
|
180
180
|
{ name: "Validate", description: "Static validation with path enumeration", example: 'workflow(mode="validate", persona_id="...")' },
|
|
181
181
|
{ name: "Optimize", description: "Structural graph optimization", example: 'workflow(mode="optimize", persona_id="...")' },
|
|
182
182
|
],
|
|
183
183
|
nextSteps: {
|
|
184
184
|
get: "Build a workflow_def based on the generation_schema and deploy it.",
|
|
185
|
-
deploy: "
|
|
185
|
+
deploy: "Test with conversation() to validate intent alignment. Deploy success only means the API accepted it — conversation testing validates the persona actually works.",
|
|
186
186
|
validate: "Fix any reported issues, then deploy.",
|
|
187
187
|
optimize: "Review optimized workflow_def, then deploy if acceptable.",
|
|
188
188
|
},
|
|
@@ -653,6 +653,8 @@ export async function browseDocuments(options = {}) {
|
|
|
653
653
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
654
654
|
// User Event Tracking
|
|
655
655
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
656
|
+
/** Counters for UserEvent pipeline health — exposed via feedback(method="analyze"). */
|
|
657
|
+
export const userEventCounters = { sent: 0, failed: 0 };
|
|
656
658
|
export async function writeUserEvent(event) {
|
|
657
659
|
if (!isVertexEventsEnabled())
|
|
658
660
|
return;
|
|
@@ -661,15 +663,24 @@ export async function writeUserEvent(event) {
|
|
|
661
663
|
if (!headers)
|
|
662
664
|
return;
|
|
663
665
|
try {
|
|
664
|
-
await fetch(`${de.baseUrl}/${de.datastorePath}/userEvents:write`, {
|
|
666
|
+
const resp = await fetch(`${de.baseUrl}/${de.datastorePath}/userEvents:write`, {
|
|
665
667
|
method: "POST",
|
|
666
668
|
headers,
|
|
667
669
|
body: JSON.stringify(event),
|
|
668
670
|
signal: AbortSignal.timeout(5_000),
|
|
669
671
|
});
|
|
672
|
+
if (!resp.ok) {
|
|
673
|
+
userEventCounters.failed++;
|
|
674
|
+
const detail = await resp.text().catch(() => "");
|
|
675
|
+
console.error(`[SEARCH-CLIENT] UserEvent write failed: ${resp.status} — ${detail.slice(0, 200)}`);
|
|
676
|
+
}
|
|
677
|
+
else {
|
|
678
|
+
userEventCounters.sent++;
|
|
679
|
+
}
|
|
670
680
|
}
|
|
671
681
|
catch {
|
|
672
|
-
|
|
682
|
+
userEventCounters.failed++;
|
|
683
|
+
// Fire-and-forget — timeout or network error
|
|
673
684
|
}
|
|
674
685
|
}
|
|
675
686
|
/**
|
|
@@ -38,8 +38,9 @@ export function getSearchBackend() {
|
|
|
38
38
|
export function isDiscoveryEngineEnabled() {
|
|
39
39
|
return getSearchBackend() === "discovery-engine";
|
|
40
40
|
}
|
|
41
|
+
/** UserEvent tracking is ON by default. Set EMA_VERTEX_EVENTS=false to opt out. */
|
|
41
42
|
export function isVertexEventsEnabled() {
|
|
42
|
-
return process.env.EMA_VERTEX_EVENTS?.trim().toLowerCase()
|
|
43
|
+
return process.env.EMA_VERTEX_EVENTS?.trim().toLowerCase() !== "false";
|
|
43
44
|
}
|
|
44
45
|
export function getDeConfig() {
|
|
45
46
|
const project = process.env.EMA_GCP_PROJECT?.trim() || DEFAULT_PROJECT;
|
|
@@ -16,6 +16,9 @@ import { submitFeedback, listFeedback, listTelemetry, analyzeFeedback, rotateLog
|
|
|
16
16
|
import { markProbeResponded } from "./probes.js";
|
|
17
17
|
import { appendToOutbox, flushOutbox, getOutboxStats, readLocalMessages } from "./outbox.js";
|
|
18
18
|
import { isRemoteEnabled } from "./remote-store.js";
|
|
19
|
+
import { writeUserEvent } from "../../../knowledge/search-client.js";
|
|
20
|
+
import { getOrCreateClientId } from "./client-id.js";
|
|
21
|
+
import { getAttributionToken } from "../knowledge/session-state.js";
|
|
19
22
|
import { analyzeGlobal } from "./global-analysis.js";
|
|
20
23
|
import { TOOLKIT_VERSION } from "../env/config.js";
|
|
21
24
|
const VALID_CATEGORIES = ALL_CATEGORIES;
|
|
@@ -141,6 +144,35 @@ async function handleSubmit(args) {
|
|
|
141
144
|
// Best-effort — don't block feedback submission
|
|
142
145
|
}
|
|
143
146
|
}
|
|
147
|
+
// UserEvent emission: fire DE conversion/view-item for positive feedback with knowledge_ref.
|
|
148
|
+
// Independent of confidence loop — no guards, no cooldown. Fire-and-forget.
|
|
149
|
+
if (knowledgeRef) {
|
|
150
|
+
const isSuccess = category === "success";
|
|
151
|
+
const isHighQuality = category === "quality"
|
|
152
|
+
&& (qualityData?.accuracy ?? 0) >= 4
|
|
153
|
+
&& (qualityData?.usefulness ?? 0) >= 4;
|
|
154
|
+
const isInteraction = category === "interaction";
|
|
155
|
+
if (isSuccess || isHighQuality || isInteraction) {
|
|
156
|
+
const conversionType = isSuccess ? "knowledge-success"
|
|
157
|
+
: isHighQuality ? "knowledge-quality-high"
|
|
158
|
+
: undefined; // interaction → view-item, no conversionType
|
|
159
|
+
getOrCreateClientId()
|
|
160
|
+
.then((clientId) => {
|
|
161
|
+
const token = getAttributionToken(knowledgeRef);
|
|
162
|
+
writeUserEvent({
|
|
163
|
+
eventType: conversionType ? "conversion" : "view-item",
|
|
164
|
+
userPseudoId: clientId,
|
|
165
|
+
...(token ? { attributionToken: token } : {}),
|
|
166
|
+
documents: [{
|
|
167
|
+
id: knowledgeRef,
|
|
168
|
+
...(conversionType ? { conversionValue: isSuccess ? 1.0 : 0.8 } : {}),
|
|
169
|
+
}],
|
|
170
|
+
...(conversionType ? { conversionType } : {}),
|
|
171
|
+
}).catch(() => { });
|
|
172
|
+
})
|
|
173
|
+
.catch(() => { });
|
|
174
|
+
}
|
|
175
|
+
}
|
|
144
176
|
return {
|
|
145
177
|
success: true,
|
|
146
178
|
feedback_id: entry.id,
|
|
@@ -12,6 +12,7 @@ import { promises as fs } from "node:fs";
|
|
|
12
12
|
import { join } from "node:path";
|
|
13
13
|
import { randomUUID } from "node:crypto";
|
|
14
14
|
import { getToolkitRoot } from "../../../sdk/paths.js";
|
|
15
|
+
import { userEventCounters } from "../../../knowledge/search-client.js";
|
|
15
16
|
import { appendToOutbox } from "./outbox.js";
|
|
16
17
|
import { isRemoteEnabled } from "./remote-store.js";
|
|
17
18
|
import { SESSION_ID } from "./session.js";
|
|
@@ -384,6 +385,8 @@ export async function analyzeFeedback(rootOverride) {
|
|
|
384
385
|
qualityEntries.reduce((sum, e) => sum + (e.quality_data.accuracy ?? 0), 0) /
|
|
385
386
|
qualityEntries.length;
|
|
386
387
|
}
|
|
388
|
+
// UserEvent pipeline counters (in-memory, this session only)
|
|
389
|
+
const hasEventActivity = userEventCounters.sent > 0 || userEventCounters.failed > 0;
|
|
387
390
|
return {
|
|
388
391
|
summary: {
|
|
389
392
|
total_feedback: feedback.length,
|
|
@@ -394,6 +397,7 @@ export async function analyzeFeedback(rootOverride) {
|
|
|
394
397
|
telemetry_period: telemetry.length > 0
|
|
395
398
|
? { from: telemetry[0].ts, to: telemetry[telemetry.length - 1].ts }
|
|
396
399
|
: null,
|
|
400
|
+
...(hasEventActivity ? { user_events: { ...userEventCounters } } : {}),
|
|
397
401
|
},
|
|
398
402
|
category_breakdown: categoryBreakdown,
|
|
399
403
|
hot_spots: {
|
|
@@ -40,13 +40,18 @@ const cooldownMap = new Map();
|
|
|
40
40
|
let sessionUpdateCount = 0;
|
|
41
41
|
/** Per-document feedback history for graduated scoring */
|
|
42
42
|
const feedbackHistoryMap = new Map();
|
|
43
|
+
/** Recognized outcome suffixes from the outcome-feedback module */
|
|
44
|
+
const OUTCOME_SUFFIXES = ["_success", "_failure", "_partial", "_misaligned", "_accepted"];
|
|
43
45
|
/** Classify feedback strength based on context */
|
|
44
46
|
export function classifyEvidence(category, context) {
|
|
45
|
-
//
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
47
|
+
// Any tool outcome with a structured context suffix is hard evidence.
|
|
48
|
+
// Context format from outcome-feedback: "{tool}_{operation}_{quality}"
|
|
49
|
+
if (context) {
|
|
50
|
+
for (const suffix of OUTCOME_SUFFIXES) {
|
|
51
|
+
if (context.endsWith(suffix))
|
|
52
|
+
return "hard";
|
|
53
|
+
}
|
|
54
|
+
}
|
|
50
55
|
// Explicit corrections with knowledge_ref are medium-hard
|
|
51
56
|
if (category === "correction")
|
|
52
57
|
return "hard";
|
|
@@ -15,6 +15,8 @@ import { inferSourceType } from "../../../knowledge/pipeline/types.js";
|
|
|
15
15
|
import { computeConfidenceScore } from "../../../knowledge/pipeline/confidence.js";
|
|
16
16
|
import { actionsForSearchResults, actionsForNoResults, actionsForPublish } from "../response-actions.js";
|
|
17
17
|
import { generateRelatedQueries } from "./related-queries.js";
|
|
18
|
+
import { getOrCreateClientId } from "../feedback/client-id.js";
|
|
19
|
+
import { recordSearchResults } from "./session-state.js";
|
|
18
20
|
const GCS_BUCKET = "em1-knowledge";
|
|
19
21
|
async function checkSupersedeGuard(supersedes) {
|
|
20
22
|
if (!supersedes || supersedes.length === 0)
|
|
@@ -599,6 +601,8 @@ async function handleQuery(args) {
|
|
|
599
601
|
if (related.length > 0) {
|
|
600
602
|
result._related_queries = related;
|
|
601
603
|
}
|
|
604
|
+
// Record results in session state for attribution cache + consultedDocs tracking
|
|
605
|
+
recordSearchResults(results.map((r) => ({ id: r.id })), response.attributionToken);
|
|
602
606
|
fireSearchEvent(query, response.attributionToken);
|
|
603
607
|
return result;
|
|
604
608
|
}
|
|
@@ -649,6 +653,8 @@ function contextualNextStep(results) {
|
|
|
649
653
|
return "Follow the guide steps relevant to your task";
|
|
650
654
|
return "Review results and refine search if needed";
|
|
651
655
|
}
|
|
656
|
+
/** Cached client ID for UserEvent pseudoId — resolved once, then sync. */
|
|
657
|
+
let _cachedPseudoId;
|
|
652
658
|
function fireSearchEvent(query, attributionToken) {
|
|
653
659
|
recordTelemetry({
|
|
654
660
|
type: "search_event",
|
|
@@ -657,10 +663,21 @@ function fireSearchEvent(query, attributionToken) {
|
|
|
657
663
|
ok: true,
|
|
658
664
|
resource_uri: attributionToken,
|
|
659
665
|
}).catch(() => { });
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
+
// Resolve pseudoId: use cached value if available, otherwise resolve async (first call only)
|
|
667
|
+
const emit = (pseudoId) => {
|
|
668
|
+
writeUserEvent({
|
|
669
|
+
eventType: "search",
|
|
670
|
+
userPseudoId: pseudoId,
|
|
671
|
+
attributionToken,
|
|
672
|
+
searchInfo: { searchQuery: query },
|
|
673
|
+
}).catch(() => { });
|
|
674
|
+
};
|
|
675
|
+
if (_cachedPseudoId) {
|
|
676
|
+
emit(_cachedPseudoId);
|
|
677
|
+
}
|
|
678
|
+
else {
|
|
679
|
+
getOrCreateClientId()
|
|
680
|
+
.then((id) => { _cachedPseudoId = id; emit(id); })
|
|
681
|
+
.catch(() => { emit("mcp-agent"); }); // fallback if client-id resolution fails
|
|
682
|
+
}
|
|
666
683
|
}
|
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Outcome Feedback — Layered signal emitter for knowledge quality feedback.
|
|
3
|
+
*
|
|
4
|
+
* Captures the full lifecycle of an agent interaction:
|
|
5
|
+
*
|
|
6
|
+
* Intent stated → work done → outcome assessed at multiple layers
|
|
7
|
+
*
|
|
8
|
+
* Signal layers (each carries different weight):
|
|
9
|
+
* 1. API acceptance (0.1) — system accepted the input structurally
|
|
10
|
+
* 2. Agent assessment (0.3) — agent's self-evaluation of alignment
|
|
11
|
+
* 3. Agent validation (0.5) — functional testing (conversation, debug)
|
|
12
|
+
* 4. End-user signal (1.0) — real user responded, kept using it, or abandoned
|
|
13
|
+
*
|
|
14
|
+
* Intent tracking:
|
|
15
|
+
* - Original intent is recorded at the start of the interaction
|
|
16
|
+
* - If intent pivots mid-journey, that's knowledge (not failure):
|
|
17
|
+
* "Users with intent X often pivot to Y" → proactive suggestion for next agent
|
|
18
|
+
* - Outcome is assessed against the FINAL intent, not the original
|
|
19
|
+
* - But the pivot itself is published as a pattern signal
|
|
20
|
+
*/
|
|
21
|
+
import { getConsultedDocs } from "./session-state.js";
|
|
22
|
+
import { processConfidenceFeedback } from "./confidence-loop.js";
|
|
23
|
+
import { writeUserEvent } from "../../../knowledge/search-client.js";
|
|
24
|
+
import { getOrCreateClientId } from "../feedback/client-id.js";
|
|
25
|
+
/** Weight multiplier per signal layer */
|
|
26
|
+
const LAYER_WEIGHTS = {
|
|
27
|
+
system: 0.1, // API accepted it — weakest signal
|
|
28
|
+
agent: 0.3, // Agent self-assessment — has context but may be biased
|
|
29
|
+
validation: 0.5, // Functional testing — objective but synthetic
|
|
30
|
+
user: 1.0, // End-user response — ground truth
|
|
31
|
+
};
|
|
32
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
33
|
+
// Outcome Resolution
|
|
34
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
35
|
+
function resolveOutcome(success) {
|
|
36
|
+
if (typeof success === "boolean") {
|
|
37
|
+
return success
|
|
38
|
+
? { quality: "success", isPositive: true, suffix: "success", category: "success" }
|
|
39
|
+
: { quality: "failure", isPositive: false, suffix: "failure", category: "correction" };
|
|
40
|
+
}
|
|
41
|
+
switch (success) {
|
|
42
|
+
case "success":
|
|
43
|
+
return { quality: "success", isPositive: true, suffix: "success", category: "success" };
|
|
44
|
+
case "partial":
|
|
45
|
+
return { quality: "partial", isPositive: true, suffix: "partial", category: "success" };
|
|
46
|
+
case "accepted":
|
|
47
|
+
return { quality: "accepted", isPositive: true, suffix: "accepted", category: "interaction" };
|
|
48
|
+
case "misaligned":
|
|
49
|
+
return { quality: "misaligned", isPositive: false, suffix: "misaligned", category: "correction" };
|
|
50
|
+
case "failure":
|
|
51
|
+
return { quality: "failure", isPositive: false, suffix: "failure", category: "correction" };
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
55
|
+
// Intent Pivot Tracking
|
|
56
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
57
|
+
const MAX_PIVOTS = 100;
|
|
58
|
+
/** In-memory pivot accumulator — aggregated by the journey/feedback adapter */
|
|
59
|
+
const intentPivots = [];
|
|
60
|
+
/** Get accumulated intent pivots (for journey reporting) */
|
|
61
|
+
export function getIntentPivots() {
|
|
62
|
+
return intentPivots;
|
|
63
|
+
}
|
|
64
|
+
/** Reset pivot state (for test isolation) */
|
|
65
|
+
export function _resetIntentPivots() {
|
|
66
|
+
intentPivots.length = 0;
|
|
67
|
+
}
|
|
68
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
69
|
+
// Main Entry Point
|
|
70
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
71
|
+
/**
|
|
72
|
+
* Emit outcome feedback for all consulted knowledge docs in the current session.
|
|
73
|
+
*
|
|
74
|
+
* Best-effort: never throws. Failures are logged but don't block the calling handler.
|
|
75
|
+
*
|
|
76
|
+
* @example
|
|
77
|
+
* // Layer 1: Deploy accepted (system signal, low weight)
|
|
78
|
+
* await emitOutcomeFeedback({
|
|
79
|
+
* tool: "workflow", operation: "deploy", success: "accepted", layer: "system",
|
|
80
|
+
* });
|
|
81
|
+
*
|
|
82
|
+
* // Layer 2: Agent thinks it looks right
|
|
83
|
+
* await emitOutcomeFeedback({
|
|
84
|
+
* tool: "workflow", operation: "deploy", success: "partial", layer: "agent",
|
|
85
|
+
* intent: "route billing questions to billing team",
|
|
86
|
+
* });
|
|
87
|
+
*
|
|
88
|
+
* // Layer 3: Conversation test confirms behavior
|
|
89
|
+
* await emitOutcomeFeedback({
|
|
90
|
+
* tool: "conversation", operation: "test", success: "success", layer: "validation",
|
|
91
|
+
* intent: "route billing questions to billing team",
|
|
92
|
+
* });
|
|
93
|
+
*
|
|
94
|
+
* // Layer 4: End-user kept using it (highest weight)
|
|
95
|
+
* await emitOutcomeFeedback({
|
|
96
|
+
* tool: "conversation", operation: "usage", success: true, layer: "user",
|
|
97
|
+
* });
|
|
98
|
+
*
|
|
99
|
+
* // Intent pivot (knowledge, not failure):
|
|
100
|
+
* await emitOutcomeFeedback({
|
|
101
|
+
* tool: "workflow", operation: "deploy", success: "success", layer: "validation",
|
|
102
|
+
* intent: {
|
|
103
|
+
* original: "route billing questions to billing team",
|
|
104
|
+
* final: "route billing questions and auto-generate invoice summaries",
|
|
105
|
+
* pivotReason: "user realized they also need invoice summaries during testing",
|
|
106
|
+
* },
|
|
107
|
+
* });
|
|
108
|
+
*/
|
|
109
|
+
export async function emitOutcomeFeedback(event) {
|
|
110
|
+
const docs = getConsultedDocs();
|
|
111
|
+
const outcome = resolveOutcome(event.success);
|
|
112
|
+
const layer = event.layer ?? "system";
|
|
113
|
+
const weight = LAYER_WEIGHTS[layer];
|
|
114
|
+
const eventType = `${event.tool}-${event.operation}-${outcome.suffix}`;
|
|
115
|
+
// Track intent pivots — these are knowledge signals, not failures
|
|
116
|
+
if (event.intent && typeof event.intent === "object" && event.intent.final && event.intent.final !== event.intent.original) {
|
|
117
|
+
intentPivots.push({
|
|
118
|
+
original: event.intent.original,
|
|
119
|
+
final: event.intent.final,
|
|
120
|
+
reason: event.intent.pivotReason,
|
|
121
|
+
tool: event.tool,
|
|
122
|
+
timestamp: new Date().toISOString(),
|
|
123
|
+
});
|
|
124
|
+
// Evict oldest entries to prevent unbounded growth
|
|
125
|
+
while (intentPivots.length > MAX_PIVOTS)
|
|
126
|
+
intentPivots.shift();
|
|
127
|
+
console.error(`[INTENT-PIVOT] "${event.intent.original}" → "${event.intent.final}"` +
|
|
128
|
+
(event.intent.pivotReason ? ` (${event.intent.pivotReason})` : ""));
|
|
129
|
+
}
|
|
130
|
+
if (docs.size === 0) {
|
|
131
|
+
return { docs_processed: 0, confidence_updates: 0, event_type: eventType };
|
|
132
|
+
}
|
|
133
|
+
// "accepted" = system accepted it, no intent validation → skip confidence updates.
|
|
134
|
+
// This applies at ALL layers: "accepted" means structural acceptance, not functional
|
|
135
|
+
// success. Use "partial" or "success" to indicate functional validation.
|
|
136
|
+
if (outcome.quality === "accepted") {
|
|
137
|
+
return { docs_processed: docs.size, confidence_updates: 0, event_type: eventType };
|
|
138
|
+
}
|
|
139
|
+
const defaultCategory = outcome.category;
|
|
140
|
+
// Context encodes tool, operation, quality, AND layer for evidence classification
|
|
141
|
+
const context = `${event.tool}_${event.operation}_${outcome.suffix}`;
|
|
142
|
+
let updates = 0;
|
|
143
|
+
// Scale the quality signal for lower layers: system/agent produce weaker
|
|
144
|
+
// quality data, so downgrade "success" to "interaction" (neutral) at system layer.
|
|
145
|
+
// Higher layers (validation, user) keep the original category.
|
|
146
|
+
const effectiveDefaultCategory = (layer === "system" && defaultCategory === "success")
|
|
147
|
+
? "interaction" // System-layer positive is neutral — API acceptance isn't validation
|
|
148
|
+
: defaultCategory;
|
|
149
|
+
const assessment = event.agentAssessment;
|
|
150
|
+
const helpfulSet = new Set(assessment?.helpful ?? []);
|
|
151
|
+
const misleadingSet = new Set(assessment?.misleading ?? []);
|
|
152
|
+
for (const docId of docs) {
|
|
153
|
+
try {
|
|
154
|
+
// Agent assessment overrides the blanket signal per doc
|
|
155
|
+
let category = effectiveDefaultCategory;
|
|
156
|
+
if (helpfulSet.has(docId)) {
|
|
157
|
+
category = "success";
|
|
158
|
+
}
|
|
159
|
+
else if (misleadingSet.has(docId)) {
|
|
160
|
+
category = "correction";
|
|
161
|
+
}
|
|
162
|
+
const result = await processConfidenceFeedback(category, docId, undefined, context);
|
|
163
|
+
if (result)
|
|
164
|
+
updates++;
|
|
165
|
+
}
|
|
166
|
+
catch {
|
|
167
|
+
// Best-effort
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
// Emit gap signals for missing knowledge
|
|
171
|
+
if (assessment?.missing) {
|
|
172
|
+
for (const topic of assessment.missing) {
|
|
173
|
+
try {
|
|
174
|
+
await processConfidenceFeedback("gap", `missing:${topic}`, undefined, context);
|
|
175
|
+
}
|
|
176
|
+
catch {
|
|
177
|
+
// Best-effort
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
// Fire UserEvents (fire-and-forget)
|
|
182
|
+
getOrCreateClientId()
|
|
183
|
+
.then((clientId) => {
|
|
184
|
+
// Conversion value = outcome quality × layer weight
|
|
185
|
+
const baseValue = outcome.quality === "success" ? 1.0
|
|
186
|
+
: outcome.quality === "partial" ? 0.5
|
|
187
|
+
: 0;
|
|
188
|
+
const conversionValue = baseValue * weight;
|
|
189
|
+
const isConversion = outcome.isPositive && outcome.quality !== "accepted";
|
|
190
|
+
const documents = [...docs].map((docId) => ({
|
|
191
|
+
id: docId,
|
|
192
|
+
...(isConversion ? { conversionValue } : {}),
|
|
193
|
+
}));
|
|
194
|
+
writeUserEvent({
|
|
195
|
+
eventType: isConversion ? "conversion" : "view-item",
|
|
196
|
+
userPseudoId: clientId,
|
|
197
|
+
...(isConversion ? { conversionType: eventType } : {}),
|
|
198
|
+
documents,
|
|
199
|
+
}).catch(() => { });
|
|
200
|
+
})
|
|
201
|
+
.catch(() => { });
|
|
202
|
+
console.error(`[OUTCOME-FEEDBACK] ${eventType} (layer=${layer}, weight=${weight}): ` +
|
|
203
|
+
`${docs.size} consulted docs, ${updates} confidence updates`);
|
|
204
|
+
return { docs_processed: docs.size, confidence_updates: updates, event_type: eventType };
|
|
205
|
+
}
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Session State — Shared tracking for knowledge search sessions.
|
|
3
|
+
*
|
|
4
|
+
* Tracks which documents were served (consultedDocs), their attribution tokens,
|
|
5
|
+
* and DE resource names. Used by:
|
|
6
|
+
* - Task 2.2: consultedDocs for deploy outcome feedback
|
|
7
|
+
* - Task 2.9: attribution token cache for UserEvent correlation
|
|
8
|
+
* - Task 2.10: conversion events from positive feedback
|
|
9
|
+
* - Task 2.12: auto-citation tracking + dedup for answer mode
|
|
10
|
+
*/
|
|
11
|
+
const ATTRIBUTION_TTL_MS = 30 * 60 * 1000; // 30 minutes
|
|
12
|
+
const MAX_CACHE_SIZE = 500;
|
|
13
|
+
const SWEEP_INTERVAL_MS = 5 * 60 * 1000; // 5 minutes
|
|
14
|
+
/** Documents consulted in the current deploy cycle. Reset before each deploy attempt. */
|
|
15
|
+
const consultedDocs = new Set();
|
|
16
|
+
/** Attribution token cache: artifactId → {token, ts, deResourceName}. */
|
|
17
|
+
const attributionCache = new Map();
|
|
18
|
+
/** Per-session dedup for L1 auto-citation view-item events. */
|
|
19
|
+
const citationDedupeSet = new Set();
|
|
20
|
+
/** Deploy attempt counter for retry-aware feedback. */
|
|
21
|
+
let deployAttempts = 0;
|
|
22
|
+
// ─── Periodic sweep to prevent memory leak ───────────────────────────────────
|
|
23
|
+
let sweepTimer;
|
|
24
|
+
function startSweepTimer() {
|
|
25
|
+
if (sweepTimer)
|
|
26
|
+
return;
|
|
27
|
+
sweepTimer = setInterval(() => {
|
|
28
|
+
const now = Date.now();
|
|
29
|
+
for (const [id, entry] of attributionCache) {
|
|
30
|
+
if (now - entry.ts > ATTRIBUTION_TTL_MS)
|
|
31
|
+
attributionCache.delete(id);
|
|
32
|
+
}
|
|
33
|
+
// Evict oldest if over max size
|
|
34
|
+
if (attributionCache.size > MAX_CACHE_SIZE) {
|
|
35
|
+
const sorted = [...attributionCache.entries()].sort((a, b) => a[1].ts - b[1].ts);
|
|
36
|
+
const toRemove = sorted.slice(0, sorted.length - MAX_CACHE_SIZE);
|
|
37
|
+
for (const [id] of toRemove)
|
|
38
|
+
attributionCache.delete(id);
|
|
39
|
+
}
|
|
40
|
+
}, SWEEP_INTERVAL_MS);
|
|
41
|
+
// Don't prevent process exit
|
|
42
|
+
if (sweepTimer && typeof sweepTimer === "object" && "unref" in sweepTimer) {
|
|
43
|
+
sweepTimer.unref();
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
// ─── Public API ──────────────────────────────────────────────────────────────
|
|
47
|
+
/**
|
|
48
|
+
* Record that documents were served in a search response.
|
|
49
|
+
* Call after each knowledge() search returns results.
|
|
50
|
+
*/
|
|
51
|
+
export function recordSearchResults(results, attributionToken) {
|
|
52
|
+
startSweepTimer();
|
|
53
|
+
const now = Date.now();
|
|
54
|
+
for (const r of results) {
|
|
55
|
+
if (!r.id)
|
|
56
|
+
continue;
|
|
57
|
+
consultedDocs.add(r.id);
|
|
58
|
+
attributionCache.set(r.id, {
|
|
59
|
+
token: attributionToken,
|
|
60
|
+
ts: now,
|
|
61
|
+
deResourceName: r.deResourceName,
|
|
62
|
+
});
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
/** Get the attribution token for a document (if cached and not expired). */
|
|
66
|
+
export function getAttributionToken(docId) {
|
|
67
|
+
const entry = attributionCache.get(docId);
|
|
68
|
+
if (!entry)
|
|
69
|
+
return undefined;
|
|
70
|
+
if (Date.now() - entry.ts > ATTRIBUTION_TTL_MS) {
|
|
71
|
+
attributionCache.delete(docId);
|
|
72
|
+
return undefined;
|
|
73
|
+
}
|
|
74
|
+
return entry.token;
|
|
75
|
+
}
|
|
76
|
+
/** Get all documents consulted in the current deploy cycle. */
|
|
77
|
+
export function getConsultedDocs() {
|
|
78
|
+
return consultedDocs;
|
|
79
|
+
}
|
|
80
|
+
/** Reset consultedDocs for a new deploy cycle. Call before each deploy attempt. */
|
|
81
|
+
export function resetConsultedDocs() {
|
|
82
|
+
consultedDocs.clear();
|
|
83
|
+
}
|
|
84
|
+
/** Increment deploy attempts counter. Call at start of each deploy. */
|
|
85
|
+
export function incrementDeployAttempts() {
|
|
86
|
+
deployAttempts++;
|
|
87
|
+
}
|
|
88
|
+
/** Get current deploy attempt count. */
|
|
89
|
+
export function getDeployAttempts() {
|
|
90
|
+
return deployAttempts;
|
|
91
|
+
}
|
|
92
|
+
/** Check if a citation has already been tracked this session (for L1 dedup). */
|
|
93
|
+
export function hasEmittedCitation(docId) {
|
|
94
|
+
return citationDedupeSet.has(docId);
|
|
95
|
+
}
|
|
96
|
+
/** Mark a citation as emitted this session. */
|
|
97
|
+
export function markCitationEmitted(docId) {
|
|
98
|
+
citationDedupeSet.add(docId);
|
|
99
|
+
}
|
|
100
|
+
/** Reset all state (for test isolation). */
|
|
101
|
+
export function _resetSessionState() {
|
|
102
|
+
consultedDocs.clear();
|
|
103
|
+
attributionCache.clear();
|
|
104
|
+
citationDedupeSet.clear();
|
|
105
|
+
deployAttempts = 0;
|
|
106
|
+
if (sweepTimer) {
|
|
107
|
+
clearInterval(sweepTimer);
|
|
108
|
+
sweepTimer = undefined;
|
|
109
|
+
}
|
|
110
|
+
}
|
|
@@ -26,8 +26,11 @@ import { validateWorkflowSchema, validateSearchDataSourceConsistency, validateNo
|
|
|
26
26
|
hasKnowledgeSearchNodes, extractActionTypeName, } from "./validation.js";
|
|
27
27
|
import { validateWorkflowOutputs } from "./validate-outputs.js";
|
|
28
28
|
import { validateWorkflowDefStructure } from "../../domain/workflow-def-validator.js";
|
|
29
|
+
import { incrementDeployAttempts } from "../knowledge/session-state.js";
|
|
30
|
+
import { emitOutcomeFeedback } from "../knowledge/outcome-feedback.js";
|
|
29
31
|
// Re-export for backwards compatibility with existing imports
|
|
30
32
|
export { hasKnowledgeSearchNodes, extractActionTypeName };
|
|
33
|
+
// Deploy outcome feedback — thin wrapper around the generalized outcome-feedback utility
|
|
31
34
|
/**
|
|
32
35
|
* Handle workflow deploy mode
|
|
33
36
|
*/
|
|
@@ -52,6 +55,8 @@ export async function handleWorkflowDeploy(args, client) {
|
|
|
52
55
|
if (!workflowDef) {
|
|
53
56
|
return { error: "workflow_def required for deploy mode" };
|
|
54
57
|
}
|
|
58
|
+
// Track deploy attempts for retry-aware feedback
|
|
59
|
+
incrementDeployAttempts();
|
|
55
60
|
const persona = await client.getPersonaById(personaId);
|
|
56
61
|
if (!persona) {
|
|
57
62
|
return { error: `Persona not found: ${personaId}` };
|
|
@@ -319,6 +324,11 @@ export async function handleWorkflowDeploy(args, client) {
|
|
|
319
324
|
// ═══════════════════════════════════════════════════════════════════════════
|
|
320
325
|
const response = updateResponse;
|
|
321
326
|
if (response?.proto_config_updated === false) {
|
|
327
|
+
// Emit deploy failure feedback before returning
|
|
328
|
+
try {
|
|
329
|
+
await emitOutcomeFeedback({ tool: "workflow", operation: "deploy", success: false });
|
|
330
|
+
}
|
|
331
|
+
catch { /* best-effort */ }
|
|
322
332
|
return {
|
|
323
333
|
mode: "deploy",
|
|
324
334
|
status: "failed",
|
|
@@ -403,10 +413,33 @@ export async function handleWorkflowDeploy(args, client) {
|
|
|
403
413
|
catch {
|
|
404
414
|
// Post-deploy check is best-effort
|
|
405
415
|
}
|
|
416
|
+
// ═══════════════════════════════════════════════════════════════════════════
|
|
417
|
+
// DEPLOY-OUTCOME FEEDBACK: Boost confidence on consulted knowledge docs
|
|
418
|
+
// This closes the feedback loop — successful deploys improve DE ranking.
|
|
419
|
+
// ═══════════════════════════════════════════════════════════════════════════
|
|
420
|
+
try {
|
|
421
|
+
const feedback = await emitOutcomeFeedback({ tool: "workflow", operation: "deploy", success: "accepted" });
|
|
422
|
+
if (feedback.docs_processed > 0) {
|
|
423
|
+
result._knowledge_feedback = {
|
|
424
|
+
outcome: "accepted",
|
|
425
|
+
docs_consulted: feedback.docs_processed,
|
|
426
|
+
confidence_updates: feedback.confidence_updates,
|
|
427
|
+
_tip: "Deploy accepted by API. Test with conversation() to validate intent alignment — that produces stronger confidence signals.",
|
|
428
|
+
};
|
|
429
|
+
}
|
|
430
|
+
}
|
|
431
|
+
catch {
|
|
432
|
+
// Best-effort — never block deploy response
|
|
433
|
+
}
|
|
406
434
|
return result;
|
|
407
435
|
}
|
|
408
436
|
catch (err) {
|
|
409
437
|
const errorMessage = err instanceof Error ? err.message : String(err);
|
|
438
|
+
// Emit deploy failure feedback (best-effort, before building response)
|
|
439
|
+
try {
|
|
440
|
+
await emitOutcomeFeedback({ tool: "workflow", operation: "deploy", success: false });
|
|
441
|
+
}
|
|
442
|
+
catch { /* best-effort */ }
|
|
410
443
|
// Preserve structured error information if available
|
|
411
444
|
const errorResult = {
|
|
412
445
|
mode: "deploy",
|
package/package.json
CHANGED