thumbgate 1.15.0 → 1.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +6 -6
- package/.claude-plugin/plugin.json +3 -3
- package/.well-known/llms.txt +5 -5
- package/.well-known/mcp/server-card.json +1 -1
- package/README.md +59 -35
- package/adapters/chatgpt/openapi.yaml +118 -2
- package/adapters/claude/.mcp.json +2 -2
- package/adapters/mcp/server-stdio.js +210 -84
- package/adapters/opencode/opencode.json +1 -1
- package/bench/prompt-eval-suite.json +5 -1
- package/bin/cli.js +157 -8
- package/config/evals/agent-safety-eval.json +338 -22
- package/config/gates/routine.json +43 -0
- package/config/github-about.json +3 -3
- package/config/model-candidates.json +131 -0
- package/openapi/openapi.yaml +118 -2
- package/package.json +55 -48
- package/public/blog.html +7 -7
- package/public/codex-plugin.html +6 -6
- package/public/compare.html +29 -23
- package/public/dashboard.html +82 -10
- package/public/guide.html +28 -28
- package/public/index.html +216 -98
- package/public/learn.html +50 -22
- package/public/lessons.html +1 -1
- package/public/numbers.html +17 -17
- package/public/pro.html +82 -18
- package/scripts/agent-audit-trace.js +55 -0
- package/scripts/agent-memory-lifecycle.js +96 -0
- package/scripts/agent-readiness-plan.js +118 -0
- package/scripts/agentic-data-pipeline.js +21 -1
- package/scripts/agents-sdk-sandbox-plan.js +57 -0
- package/scripts/ai-org-governance.js +98 -0
- package/scripts/ai-search-distribution.js +43 -0
- package/scripts/artifact-agent-plan.js +81 -0
- package/scripts/billing.js +27 -8
- package/scripts/cli-schema.js +18 -2
- package/scripts/code-mode-mcp-plan.js +71 -0
- package/scripts/context-engine.js +1 -2
- package/scripts/context-manager.js +4 -1
- package/scripts/dashboard-render-spec.js +1 -1
- package/scripts/dashboard.js +275 -9
- package/scripts/decision-journal.js +13 -3
- package/scripts/document-workflow-governance.js +62 -0
- package/scripts/enterprise-agent-rollout.js +34 -0
- package/scripts/experience-replay-governance.js +69 -0
- package/scripts/export-hf-dataset.js +1 -1
- package/scripts/feedback-loop.js +92 -4
- package/scripts/feedback-to-rules.js +17 -23
- package/scripts/gates-engine.js +4 -6
- package/scripts/growth-campaigns.js +49 -0
- package/scripts/harness-selector.js +16 -4
- package/scripts/hybrid-supervisor-agent.js +64 -0
- package/scripts/inference-cache-policy.js +72 -0
- package/scripts/inference-economics.js +53 -0
- package/scripts/internal-agent-bootstrap.js +12 -2
- package/scripts/knowledge-layer-plan.js +108 -0
- package/scripts/lesson-inference.js +183 -44
- package/scripts/lesson-search.js +4 -1
- package/scripts/llm-client.js +157 -26
- package/scripts/mailer/resend-mailer.js +112 -1
- package/scripts/mcp-transport-strategy.js +66 -0
- package/scripts/memory-store-governance.js +60 -0
- package/scripts/meta-agent-loop.js +7 -13
- package/scripts/model-access-eligibility.js +38 -0
- package/scripts/model-migration-readiness.js +55 -0
- package/scripts/operational-integrity.js +96 -3
- package/scripts/otel-declarative-config.js +56 -0
- package/scripts/perplexity-client.js +1 -1
- package/scripts/post-training-governance.js +34 -0
- package/scripts/private-core-boundary.js +72 -0
- package/scripts/production-agent-readiness.js +40 -0
- package/scripts/prompt-eval.js +564 -32
- package/scripts/prompt-programs.js +93 -0
- package/scripts/provider-action-normalizer.js +585 -0
- package/scripts/scaling-law-claims.js +60 -0
- package/scripts/security-scanner.js +1 -1
- package/scripts/self-distill-agent.js +7 -32
- package/scripts/seo-gsd.js +232 -55
- package/scripts/skill-rag-router.js +53 -0
- package/scripts/spec-gate.js +1 -1
- package/scripts/student-consistent-training.js +73 -0
- package/scripts/synthetic-data-provenance.js +98 -0
- package/scripts/task-context-result.js +81 -0
- package/scripts/telemetry-analytics.js +149 -0
- package/scripts/thompson-sampling.js +2 -2
- package/scripts/token-savings.js +7 -6
- package/scripts/token-tco.js +46 -0
- package/scripts/tool-registry.js +63 -3
- package/scripts/verification-loop.js +10 -1
- package/scripts/verifier-scoring.js +71 -0
- package/scripts/workflow-sentinel.js +284 -28
- package/scripts/workspace-agent-routines.js +118 -0
- package/src/api/server.js +381 -120
- package/scripts/analytics-report.js +0 -328
- package/scripts/autonomous-workflow.js +0 -377
- package/scripts/billing-setup.js +0 -109
- package/scripts/creator-campaigns.js +0 -239
- package/scripts/cross-encoder-reranker.js +0 -235
- package/scripts/daemon-manager.js +0 -108
- package/scripts/decision-trace.js +0 -354
- package/scripts/delegation-runtime.js +0 -896
- package/scripts/dispatch-brief.js +0 -159
- package/scripts/distribution-surfaces.js +0 -110
- package/scripts/feedback-history-distiller.js +0 -382
- package/scripts/funnel-analytics.js +0 -35
- package/scripts/history-distiller.js +0 -200
- package/scripts/hosted-job-launcher.js +0 -256
- package/scripts/intent-router.js +0 -392
- package/scripts/lesson-reranker.js +0 -263
- package/scripts/lesson-retrieval.js +0 -148
- package/scripts/managed-lesson-agent.js +0 -183
- package/scripts/operational-dashboard.js +0 -103
- package/scripts/operational-summary.js +0 -129
- package/scripts/operator-artifacts.js +0 -608
- package/scripts/optimize-context.js +0 -17
- package/scripts/org-dashboard.js +0 -206
- package/scripts/partner-orchestration.js +0 -146
- package/scripts/predictive-insights.js +0 -356
- package/scripts/pulse.js +0 -80
- package/scripts/reflector-agent.js +0 -221
- package/scripts/sales-pipeline.js +0 -681
- package/scripts/session-episode-store.js +0 -329
- package/scripts/session-health-sensor.js +0 -242
- package/scripts/session-report.js +0 -120
- package/scripts/swarm-coordinator.js +0 -81
- package/scripts/tool-kpi-tracker.js +0 -12
- package/scripts/webhook-delivery.js +0 -62
- package/scripts/workflow-sprint-intake.js +0 -475
|
@@ -1,896 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
'use strict';
|
|
3
|
-
|
|
4
|
-
const crypto = require('crypto');
|
|
5
|
-
const fs = require('fs');
|
|
6
|
-
const path = require('path');
|
|
7
|
-
const { ensureDir } = require('./fs-utils');
|
|
8
|
-
const {
|
|
9
|
-
loadSubagentProfiles,
|
|
10
|
-
getAllowedTools,
|
|
11
|
-
} = require('./mcp-policy');
|
|
12
|
-
const {
|
|
13
|
-
loadModel,
|
|
14
|
-
saveModel,
|
|
15
|
-
updateModel,
|
|
16
|
-
getReliability,
|
|
17
|
-
} = require('./thompson-sampling');
|
|
18
|
-
|
|
19
|
-
const DELEGATION_MODES = ['off', 'auto', 'sequential'];
|
|
20
|
-
const HANDOFF_OUTCOMES = ['accepted', 'rejected', 'aborted'];
|
|
21
|
-
const RECENT_FAILURE_WINDOW_MS = 30 * 24 * 60 * 60 * 1000;
|
|
22
|
-
const PROMOTABLE_REASON_CODES = new Set([
|
|
23
|
-
'single_phase_task',
|
|
24
|
-
'missing_required_evidence',
|
|
25
|
-
'unresolved_handoff_exists',
|
|
26
|
-
]);
|
|
27
|
-
|
|
28
|
-
function getFeedbackLoopModule() {
|
|
29
|
-
return require('./feedback-loop');
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
function getVerificationLoopModule() {
|
|
33
|
-
return require('./verification-loop');
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
function readJSONL(filePath) {
|
|
38
|
-
if (!fs.existsSync(filePath)) return [];
|
|
39
|
-
const raw = fs.readFileSync(filePath, 'utf-8').trim();
|
|
40
|
-
if (!raw) return [];
|
|
41
|
-
return raw
|
|
42
|
-
.split('\n')
|
|
43
|
-
.map((line) => {
|
|
44
|
-
try {
|
|
45
|
-
return JSON.parse(line);
|
|
46
|
-
} catch {
|
|
47
|
-
return null;
|
|
48
|
-
}
|
|
49
|
-
})
|
|
50
|
-
.filter(Boolean);
|
|
51
|
-
}
|
|
52
|
-
|
|
53
|
-
function appendJSONL(filePath, record) {
|
|
54
|
-
ensureDir(path.dirname(filePath));
|
|
55
|
-
fs.appendFileSync(filePath, `${JSON.stringify(record)}\n`);
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
function createDelegationError(message, statusCode, details = null) {
|
|
59
|
-
const err = new Error(message);
|
|
60
|
-
err.statusCode = statusCode;
|
|
61
|
-
if (details) {
|
|
62
|
-
err.details = details;
|
|
63
|
-
}
|
|
64
|
-
return err;
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
function normalizeDelegationMode(mode) {
|
|
68
|
-
if (mode === undefined || mode === null || mode === '') {
|
|
69
|
-
return 'off';
|
|
70
|
-
}
|
|
71
|
-
const value = String(mode).trim().toLowerCase();
|
|
72
|
-
if (!DELEGATION_MODES.includes(value)) {
|
|
73
|
-
throw new Error(`Unsupported delegationMode '${mode}'`);
|
|
74
|
-
}
|
|
75
|
-
return value;
|
|
76
|
-
}
|
|
77
|
-
|
|
78
|
-
function normalizeText(value) {
|
|
79
|
-
if (value === undefined || value === null) return '';
|
|
80
|
-
return String(value).trim().replace(/\s+/g, ' ');
|
|
81
|
-
}
|
|
82
|
-
|
|
83
|
-
function unique(values = []) {
|
|
84
|
-
return Array.from(new Set(values.filter(Boolean)));
|
|
85
|
-
}
|
|
86
|
-
|
|
87
|
-
function getDelegationPaths() {
|
|
88
|
-
const { FEEDBACK_DIR } = getFeedbackLoopModule().getFeedbackPaths();
|
|
89
|
-
return {
|
|
90
|
-
FEEDBACK_DIR,
|
|
91
|
-
DELEGATION_LOG_PATH: path.join(FEEDBACK_DIR, 'delegation-log.jsonl'),
|
|
92
|
-
DELEGATION_MODEL_PATH: path.join(FEEDBACK_DIR, 'delegation-model.json'),
|
|
93
|
-
};
|
|
94
|
-
}
|
|
95
|
-
|
|
96
|
-
function readDelegationEvents(filePath) {
|
|
97
|
-
const { DELEGATION_LOG_PATH } = getDelegationPaths();
|
|
98
|
-
return readJSONL(filePath || DELEGATION_LOG_PATH);
|
|
99
|
-
}
|
|
100
|
-
|
|
101
|
-
function getSemanticKind(action = {}) {
|
|
102
|
-
const name = String(action.name || '').trim();
|
|
103
|
-
if ([
|
|
104
|
-
'construct_context_pack',
|
|
105
|
-
'context_provenance',
|
|
106
|
-
'feedback_summary',
|
|
107
|
-
'recall',
|
|
108
|
-
'commerce_recall',
|
|
109
|
-
'diagnose_failure',
|
|
110
|
-
].includes(name)) {
|
|
111
|
-
return 'evidence';
|
|
112
|
-
}
|
|
113
|
-
if ([
|
|
114
|
-
'evaluate_context_pack',
|
|
115
|
-
'feedback_stats',
|
|
116
|
-
'gate_stats',
|
|
117
|
-
'dashboard',
|
|
118
|
-
].includes(name)) {
|
|
119
|
-
return 'verification';
|
|
120
|
-
}
|
|
121
|
-
if ([
|
|
122
|
-
'capture_feedback',
|
|
123
|
-
'prevention_rules',
|
|
124
|
-
'export_dpo_pairs',
|
|
125
|
-
'export_databricks_bundle',
|
|
126
|
-
'generate_skill',
|
|
127
|
-
'satisfy_gate',
|
|
128
|
-
].includes(name)) {
|
|
129
|
-
return 'mutation';
|
|
130
|
-
}
|
|
131
|
-
return 'general';
|
|
132
|
-
}
|
|
133
|
-
|
|
134
|
-
function deriveSemanticPhases(actions = []) {
|
|
135
|
-
if (!Array.isArray(actions) || actions.length === 0) return [];
|
|
136
|
-
const phases = [];
|
|
137
|
-
let current = {
|
|
138
|
-
kind: getSemanticKind(actions[0]),
|
|
139
|
-
actions: [actions[0]],
|
|
140
|
-
};
|
|
141
|
-
|
|
142
|
-
for (const action of actions.slice(1)) {
|
|
143
|
-
const kind = getSemanticKind(action);
|
|
144
|
-
if (kind === current.kind) {
|
|
145
|
-
current.actions.push(action);
|
|
146
|
-
continue;
|
|
147
|
-
}
|
|
148
|
-
phases.push(current);
|
|
149
|
-
current = { kind, actions: [action] };
|
|
150
|
-
}
|
|
151
|
-
phases.push(current);
|
|
152
|
-
|
|
153
|
-
return phases.map((phase, index) => ({
|
|
154
|
-
phaseIndex: index,
|
|
155
|
-
kind: phase.kind,
|
|
156
|
-
parallel: phase.actions.length > 1,
|
|
157
|
-
actions: phase.actions,
|
|
158
|
-
}));
|
|
159
|
-
}
|
|
160
|
-
|
|
161
|
-
function buildTaskKey({ intentId, repoPath, context }) {
|
|
162
|
-
const hash = crypto.createHash('sha1');
|
|
163
|
-
hash.update(String(intentId || '').trim());
|
|
164
|
-
hash.update('\n');
|
|
165
|
-
hash.update(String(repoPath || '').trim());
|
|
166
|
-
hash.update('\n');
|
|
167
|
-
hash.update(normalizeText(context));
|
|
168
|
-
return hash.digest('hex');
|
|
169
|
-
}
|
|
170
|
-
|
|
171
|
-
function deriveActiveHandoffs(events = []) {
|
|
172
|
-
const activeByTaskKey = new Map();
|
|
173
|
-
const activeByHandoffId = new Map();
|
|
174
|
-
|
|
175
|
-
for (const event of events) {
|
|
176
|
-
if (!event || typeof event !== 'object') continue;
|
|
177
|
-
if (event.eventType === 'started') {
|
|
178
|
-
activeByTaskKey.set(event.taskKey, event);
|
|
179
|
-
activeByHandoffId.set(event.handoffId, event);
|
|
180
|
-
continue;
|
|
181
|
-
}
|
|
182
|
-
if (event.eventType === 'completed') {
|
|
183
|
-
activeByHandoffId.delete(event.handoffId);
|
|
184
|
-
const active = activeByTaskKey.get(event.taskKey);
|
|
185
|
-
if (active && active.handoffId === event.handoffId) {
|
|
186
|
-
activeByTaskKey.delete(event.taskKey);
|
|
187
|
-
}
|
|
188
|
-
}
|
|
189
|
-
}
|
|
190
|
-
|
|
191
|
-
return {
|
|
192
|
-
byTaskKey: activeByTaskKey,
|
|
193
|
-
byHandoffId: activeByHandoffId,
|
|
194
|
-
};
|
|
195
|
-
}
|
|
196
|
-
|
|
197
|
-
function buildContextDigest(plan = {}, context) {
|
|
198
|
-
return JSON.stringify({
|
|
199
|
-
context: normalizeText(context || plan.context || ''),
|
|
200
|
-
actions: Array.isArray(plan.actions) ? plan.actions.map((action) => action.name) : [],
|
|
201
|
-
checks: plan.partnerStrategy && Array.isArray(plan.partnerStrategy.recommendedChecks)
|
|
202
|
-
? plan.partnerStrategy.recommendedChecks
|
|
203
|
-
: [],
|
|
204
|
-
});
|
|
205
|
-
}
|
|
206
|
-
|
|
207
|
-
function getProfileCandidates(mcpProfile, hasMutation) {
|
|
208
|
-
if (mcpProfile === 'readonly') {
|
|
209
|
-
return hasMutation ? ['secure_runtime', 'review_workflow'] : ['review_workflow', 'secure_runtime'];
|
|
210
|
-
}
|
|
211
|
-
if (mcpProfile === 'dispatch') {
|
|
212
|
-
return ['review_workflow'];
|
|
213
|
-
}
|
|
214
|
-
if (mcpProfile === 'locked') {
|
|
215
|
-
return ['secure_runtime'];
|
|
216
|
-
}
|
|
217
|
-
return hasMutation
|
|
218
|
-
? ['pr_workflow', 'review_workflow', 'secure_runtime']
|
|
219
|
-
: ['review_workflow', 'pr_workflow', 'secure_runtime'];
|
|
220
|
-
}
|
|
221
|
-
|
|
222
|
-
function selectDelegateProfile({ mcpProfile, plan, contextChars }) {
|
|
223
|
-
const config = loadSubagentProfiles();
|
|
224
|
-
const semanticPhases = deriveSemanticPhases(plan.actions);
|
|
225
|
-
const hasMutation = semanticPhases.some((phase) => phase.kind === 'mutation');
|
|
226
|
-
const candidates = getProfileCandidates(mcpProfile, hasMutation);
|
|
227
|
-
let fallback = null;
|
|
228
|
-
|
|
229
|
-
for (const profileName of candidates) {
|
|
230
|
-
const profileConfig = config.profiles[profileName];
|
|
231
|
-
if (!profileConfig || !profileConfig.mcpProfile) continue;
|
|
232
|
-
const allowedTools = new Set(getAllowedTools(profileConfig.mcpProfile));
|
|
233
|
-
const actionsFit = Array.isArray(plan.actions)
|
|
234
|
-
? plan.actions.every((action) => allowedTools.has(action.name))
|
|
235
|
-
: false;
|
|
236
|
-
const maxChars = Number(profileConfig.context && profileConfig.context.maxChars) || 0;
|
|
237
|
-
const contextFits = maxChars > 0 ? contextChars <= maxChars : false;
|
|
238
|
-
if (!actionsFit) continue;
|
|
239
|
-
const candidate = {
|
|
240
|
-
delegateProfile: profileName,
|
|
241
|
-
profileConfig,
|
|
242
|
-
actionsFit,
|
|
243
|
-
contextFits,
|
|
244
|
-
maxChars,
|
|
245
|
-
};
|
|
246
|
-
if (contextFits) {
|
|
247
|
-
return candidate;
|
|
248
|
-
}
|
|
249
|
-
if (!fallback) {
|
|
250
|
-
fallback = candidate;
|
|
251
|
-
}
|
|
252
|
-
}
|
|
253
|
-
|
|
254
|
-
return fallback || {
|
|
255
|
-
delegateProfile: null,
|
|
256
|
-
profileConfig: null,
|
|
257
|
-
actionsFit: false,
|
|
258
|
-
contextFits: false,
|
|
259
|
-
maxChars: 0,
|
|
260
|
-
};
|
|
261
|
-
}
|
|
262
|
-
|
|
263
|
-
function loadDelegationModel(modelPath) {
|
|
264
|
-
return loadModel(modelPath);
|
|
265
|
-
}
|
|
266
|
-
|
|
267
|
-
function getReliabilityBias(model, categories = []) {
|
|
268
|
-
const reliability = getReliability(model);
|
|
269
|
-
const relevant = categories
|
|
270
|
-
.map((category) => reliability[category])
|
|
271
|
-
.filter(Boolean);
|
|
272
|
-
|
|
273
|
-
if (relevant.length === 0) {
|
|
274
|
-
return 0;
|
|
275
|
-
}
|
|
276
|
-
|
|
277
|
-
const average = relevant.reduce((sum, entry) => sum + entry.reliability, 0) / relevant.length;
|
|
278
|
-
return Math.max(-0.25, Math.min(0.25, (average - 0.5) * 0.5));
|
|
279
|
-
}
|
|
280
|
-
|
|
281
|
-
function hasRecentSimilarFailure(events = [], intentId, delegateProfile) {
|
|
282
|
-
const cutoff = Date.now() - RECENT_FAILURE_WINDOW_MS;
|
|
283
|
-
return events.some((event) => {
|
|
284
|
-
if (!event || typeof event !== 'object') return false;
|
|
285
|
-
const timestamp = event.timestamp ? new Date(event.timestamp).getTime() : 0;
|
|
286
|
-
if (timestamp < cutoff) return false;
|
|
287
|
-
if (event.intentId !== intentId) return false;
|
|
288
|
-
if (delegateProfile && event.delegateProfile && event.delegateProfile !== delegateProfile) return false;
|
|
289
|
-
|
|
290
|
-
if (event.eventType === 'rejected_start') {
|
|
291
|
-
return true;
|
|
292
|
-
}
|
|
293
|
-
|
|
294
|
-
if (event.eventType !== 'completed') {
|
|
295
|
-
return false;
|
|
296
|
-
}
|
|
297
|
-
|
|
298
|
-
return event.outcome !== 'accepted' || event.verificationAccepted === false;
|
|
299
|
-
});
|
|
300
|
-
}
|
|
301
|
-
|
|
302
|
-
function buildHandoffContract({ plan, delegateProfile, plannedChecks = [] }) {
|
|
303
|
-
const profiles = loadSubagentProfiles();
|
|
304
|
-
const profileConfig = profiles.profiles[delegateProfile];
|
|
305
|
-
const requiredChecks = unique([
|
|
306
|
-
...(Array.isArray(plannedChecks) ? plannedChecks : []),
|
|
307
|
-
...(plan.partnerStrategy && Array.isArray(plan.partnerStrategy.recommendedChecks)
|
|
308
|
-
? plan.partnerStrategy.recommendedChecks
|
|
309
|
-
: []),
|
|
310
|
-
]);
|
|
311
|
-
|
|
312
|
-
return {
|
|
313
|
-
objective: plan.context
|
|
314
|
-
? `${plan.intent.description}: ${plan.context}`
|
|
315
|
-
: plan.intent.description,
|
|
316
|
-
scopeIn: Array.isArray(plan.actions) ? plan.actions.map((action) => action.name) : [],
|
|
317
|
-
scopeOut: [
|
|
318
|
-
'parallel fan-out',
|
|
319
|
-
'nested handoffs',
|
|
320
|
-
'unapproved scope expansion',
|
|
321
|
-
],
|
|
322
|
-
requiredEvidence: ['summary', 'result_context'],
|
|
323
|
-
requiredChecks,
|
|
324
|
-
contextBudget: profileConfig && profileConfig.context
|
|
325
|
-
? {
|
|
326
|
-
maxItems: profileConfig.context.maxItems,
|
|
327
|
-
maxChars: profileConfig.context.maxChars,
|
|
328
|
-
}
|
|
329
|
-
: null,
|
|
330
|
-
completionDefinition: 'Return a concise summary, include result context, and report attempts, violations, and executed checks.',
|
|
331
|
-
};
|
|
332
|
-
}
|
|
333
|
-
|
|
334
|
-
function evaluateDelegation(params = {}) {
|
|
335
|
-
const delegationMode = normalizeDelegationMode(params.delegationMode);
|
|
336
|
-
const plan = params.plan || {};
|
|
337
|
-
const mcpProfile = String(params.mcpProfile || plan.mcpProfile || 'default').trim();
|
|
338
|
-
const context = String(params.context || plan.context || '');
|
|
339
|
-
const repoPath = params.repoPath || null;
|
|
340
|
-
const taskKey = buildTaskKey({
|
|
341
|
-
intentId: plan.intent && plan.intent.id,
|
|
342
|
-
repoPath,
|
|
343
|
-
context,
|
|
344
|
-
});
|
|
345
|
-
const semanticPhases = deriveSemanticPhases(plan.actions);
|
|
346
|
-
const hasEvidence = semanticPhases.some((phase) => phase.kind === 'evidence');
|
|
347
|
-
const hasMutation = semanticPhases.some((phase) => phase.kind === 'mutation');
|
|
348
|
-
const contextDigest = buildContextDigest(plan, context);
|
|
349
|
-
const contextChars = contextDigest.length;
|
|
350
|
-
const selection = selectDelegateProfile({ mcpProfile, plan, contextChars });
|
|
351
|
-
const { DELEGATION_MODEL_PATH } = getDelegationPaths();
|
|
352
|
-
const model = loadDelegationModel(DELEGATION_MODEL_PATH);
|
|
353
|
-
const events = readDelegationEvents();
|
|
354
|
-
const activeHandoffs = deriveActiveHandoffs(events);
|
|
355
|
-
const activeHandoff = activeHandoffs.byTaskKey.get(taskKey) || null;
|
|
356
|
-
const reliabilityBias = selection.delegateProfile
|
|
357
|
-
? getReliabilityBias(model, [
|
|
358
|
-
'delegation_global',
|
|
359
|
-
`intent_${plan.intent && plan.intent.id}`,
|
|
360
|
-
`profile_${selection.delegateProfile}`,
|
|
361
|
-
])
|
|
362
|
-
: 0;
|
|
363
|
-
const recentFailure = selection.delegateProfile
|
|
364
|
-
? hasRecentSimilarFailure(events, plan.intent && plan.intent.id, selection.delegateProfile)
|
|
365
|
-
: false;
|
|
366
|
-
|
|
367
|
-
let score = 0;
|
|
368
|
-
if (semanticPhases.length >= 2) score += 0.25;
|
|
369
|
-
else score -= 0.30;
|
|
370
|
-
|
|
371
|
-
if (hasEvidence && hasMutation) score += 0.15;
|
|
372
|
-
if (selection.delegateProfile) score += 0.15;
|
|
373
|
-
if (plan.codegraphImpact && plan.codegraphImpact.enabled && Array.isArray(plan.codegraphImpact.verificationHints) && plan.codegraphImpact.verificationHints.length > 0) {
|
|
374
|
-
score += 0.10;
|
|
375
|
-
}
|
|
376
|
-
if (selection.contextFits) score += 0.10;
|
|
377
|
-
else if (selection.delegateProfile) score -= 0.20;
|
|
378
|
-
score += reliabilityBias;
|
|
379
|
-
if (recentFailure) score -= 0.25;
|
|
380
|
-
|
|
381
|
-
score = Number(Math.max(0, Math.min(1, score)).toFixed(3));
|
|
382
|
-
|
|
383
|
-
let reasonCode = 'delegation_disabled';
|
|
384
|
-
let delegationReason = 'Delegation is disabled for this plan.';
|
|
385
|
-
let delegationEligible = false;
|
|
386
|
-
let executionMode = 'single_agent';
|
|
387
|
-
let delegateProfile = null;
|
|
388
|
-
let handoffContract = null;
|
|
389
|
-
|
|
390
|
-
if (delegationMode !== 'off') {
|
|
391
|
-
if (plan.status !== 'ready') {
|
|
392
|
-
reasonCode = 'checkpoint_required';
|
|
393
|
-
delegationReason = 'Delegation is blocked until the required approval checkpoint is cleared.';
|
|
394
|
-
} else if (mcpProfile === 'dispatch') {
|
|
395
|
-
reasonCode = 'dispatch_profile';
|
|
396
|
-
delegationReason = 'Dispatch MCP profile may inspect plans and metrics, but remote handoffs stay disabled.';
|
|
397
|
-
} else if (mcpProfile === 'locked') {
|
|
398
|
-
reasonCode = 'locked_profile';
|
|
399
|
-
delegationReason = 'Locked MCP profile may inspect the plan but cannot start a handoff.';
|
|
400
|
-
} else if (semanticPhases.length < 2) {
|
|
401
|
-
reasonCode = 'single_phase_task';
|
|
402
|
-
delegationReason = 'Delegation was skipped because the task collapses into a single semantic phase.';
|
|
403
|
-
} else if ((plan.actions || []).length < 3) {
|
|
404
|
-
reasonCode = 'insufficient_actions';
|
|
405
|
-
delegationReason = 'Delegation was skipped because the task does not have enough action surface to justify a handoff.';
|
|
406
|
-
} else if (!selection.delegateProfile) {
|
|
407
|
-
reasonCode = 'no_compatible_delegate';
|
|
408
|
-
delegationReason = 'Delegation was skipped because no existing delegate profile can execute the required actions within policy.';
|
|
409
|
-
} else if (!selection.contextFits) {
|
|
410
|
-
reasonCode = 'budget_exceeded';
|
|
411
|
-
delegationReason = 'Delegation was skipped because the context exceeds the selected delegate profile budget.';
|
|
412
|
-
} else if (activeHandoff) {
|
|
413
|
-
reasonCode = 'unresolved_handoff_exists';
|
|
414
|
-
delegationReason = 'Delegation is blocked because an unresolved handoff already exists for this task.';
|
|
415
|
-
} else if (recentFailure) {
|
|
416
|
-
reasonCode = 'recent_failure';
|
|
417
|
-
delegationReason = 'Delegation confidence was reduced by a recent similar failure, so the planner kept the task single-agent.';
|
|
418
|
-
} else if (score < 0.6) {
|
|
419
|
-
reasonCode = 'low_score';
|
|
420
|
-
delegationReason = 'Delegation was considered but the reliability score did not clear the handoff threshold.';
|
|
421
|
-
} else {
|
|
422
|
-
reasonCode = 'delegation_selected';
|
|
423
|
-
delegationReason = 'Delegation cleared the structural, budget, and reliability checks.';
|
|
424
|
-
delegationEligible = true;
|
|
425
|
-
executionMode = 'sequential_delegate';
|
|
426
|
-
delegateProfile = selection.delegateProfile;
|
|
427
|
-
handoffContract = buildHandoffContract({
|
|
428
|
-
plan,
|
|
429
|
-
delegateProfile,
|
|
430
|
-
plannedChecks: params.plannedChecks,
|
|
431
|
-
});
|
|
432
|
-
}
|
|
433
|
-
}
|
|
434
|
-
|
|
435
|
-
return {
|
|
436
|
-
delegationMode,
|
|
437
|
-
executionMode,
|
|
438
|
-
delegationEligible,
|
|
439
|
-
delegationScore: executionMode === 'sequential_delegate' ? score : 0,
|
|
440
|
-
delegationReason,
|
|
441
|
-
delegateProfile,
|
|
442
|
-
handoffContract,
|
|
443
|
-
reasonCode,
|
|
444
|
-
rawDelegationScore: score,
|
|
445
|
-
taskKey,
|
|
446
|
-
activeHandoffId: activeHandoff ? activeHandoff.handoffId : null,
|
|
447
|
-
semanticPhases,
|
|
448
|
-
contextChars,
|
|
449
|
-
};
|
|
450
|
-
}
|
|
451
|
-
|
|
452
|
-
function buildRejectedStartEvent(params = {}) {
|
|
453
|
-
return {
|
|
454
|
-
eventType: 'rejected_start',
|
|
455
|
-
handoffId: null,
|
|
456
|
-
taskKey: params.taskKey,
|
|
457
|
-
intentId: params.intentId,
|
|
458
|
-
delegateProfile: params.delegateProfile || null,
|
|
459
|
-
mcpProfile: params.mcpProfile,
|
|
460
|
-
partnerProfile: params.partnerProfile || null,
|
|
461
|
-
reasonCode: params.reasonCode,
|
|
462
|
-
reason: params.reason,
|
|
463
|
-
context: normalizeText(params.context),
|
|
464
|
-
repoPath: params.repoPath || null,
|
|
465
|
-
timestamp: new Date().toISOString(),
|
|
466
|
-
};
|
|
467
|
-
}
|
|
468
|
-
|
|
469
|
-
function buildDelegationFeedback(reasonCode, params = {}) {
|
|
470
|
-
if (reasonCode === 'single_phase_task') {
|
|
471
|
-
return {
|
|
472
|
-
context: `Delegation was attempted for single-phase intent '${params.intentId}'.`,
|
|
473
|
-
whatWentWrong: 'The task did not justify a handoff.',
|
|
474
|
-
whatToChange: 'Keep single-phase work single-agent unless a second semantic phase appears.',
|
|
475
|
-
};
|
|
476
|
-
}
|
|
477
|
-
if (reasonCode === 'missing_required_evidence') {
|
|
478
|
-
return {
|
|
479
|
-
context: `Delegation completed without required evidence for intent '${params.intentId}'.`,
|
|
480
|
-
whatWentWrong: 'The handoff returned without a usable summary or result context.',
|
|
481
|
-
whatToChange: 'Require summary and result context before accepting delegated work.',
|
|
482
|
-
};
|
|
483
|
-
}
|
|
484
|
-
if (reasonCode === 'unresolved_handoff_exists') {
|
|
485
|
-
return {
|
|
486
|
-
context: `Delegation was re-attempted while a handoff was still unresolved for intent '${params.intentId}'.`,
|
|
487
|
-
whatWentWrong: 'A second handoff started before the first one completed.',
|
|
488
|
-
whatToChange: 'Finish or abort the active handoff before starting another one on the same task.',
|
|
489
|
-
};
|
|
490
|
-
}
|
|
491
|
-
return null;
|
|
492
|
-
}
|
|
493
|
-
|
|
494
|
-
function promoteDelegationFailure(reasonCode, params = {}) {
|
|
495
|
-
if (!PROMOTABLE_REASON_CODES.has(reasonCode)) {
|
|
496
|
-
return null;
|
|
497
|
-
}
|
|
498
|
-
const payload = buildDelegationFeedback(reasonCode, params);
|
|
499
|
-
if (!payload) {
|
|
500
|
-
return null;
|
|
501
|
-
}
|
|
502
|
-
const { captureFeedback } = getFeedbackLoopModule();
|
|
503
|
-
return captureFeedback({
|
|
504
|
-
signal: 'down',
|
|
505
|
-
context: payload.context,
|
|
506
|
-
whatWentWrong: payload.whatWentWrong,
|
|
507
|
-
whatToChange: payload.whatToChange,
|
|
508
|
-
tags: unique([
|
|
509
|
-
'delegation',
|
|
510
|
-
reasonCode,
|
|
511
|
-
params.intentId ? `intent:${params.intentId}` : null,
|
|
512
|
-
params.delegateProfile ? `delegate:${params.delegateProfile}` : null,
|
|
513
|
-
]),
|
|
514
|
-
skill: 'delegation_runtime',
|
|
515
|
-
});
|
|
516
|
-
}
|
|
517
|
-
|
|
518
|
-
function persistRejectedStart(params = {}) {
|
|
519
|
-
const { DELEGATION_LOG_PATH } = getDelegationPaths();
|
|
520
|
-
const event = buildRejectedStartEvent(params);
|
|
521
|
-
appendJSONL(DELEGATION_LOG_PATH, event);
|
|
522
|
-
promoteDelegationFailure(params.reasonCode, params);
|
|
523
|
-
return event;
|
|
524
|
-
}
|
|
525
|
-
|
|
526
|
-
function startHandoff(params = {}) {
|
|
527
|
-
const evaluation = evaluateDelegation({
|
|
528
|
-
delegationMode: 'sequential',
|
|
529
|
-
plan: params.plan,
|
|
530
|
-
mcpProfile: params.mcpProfile,
|
|
531
|
-
context: params.context,
|
|
532
|
-
repoPath: params.repoPath,
|
|
533
|
-
plannedChecks: params.plannedChecks,
|
|
534
|
-
});
|
|
535
|
-
|
|
536
|
-
if (String(params.mcpProfile || '').trim() === 'dispatch') {
|
|
537
|
-
persistRejectedStart({
|
|
538
|
-
taskKey: evaluation.taskKey,
|
|
539
|
-
intentId: params.plan && params.plan.intent ? params.plan.intent.id : null,
|
|
540
|
-
delegateProfile: null,
|
|
541
|
-
mcpProfile: params.mcpProfile,
|
|
542
|
-
partnerProfile: params.partnerProfile,
|
|
543
|
-
reasonCode: 'dispatch_profile',
|
|
544
|
-
reason: 'Dispatch MCP profile may not start handoffs.',
|
|
545
|
-
context: params.context,
|
|
546
|
-
repoPath: params.repoPath,
|
|
547
|
-
});
|
|
548
|
-
throw createDelegationError('Dispatch MCP profile may not start handoffs.', 403);
|
|
549
|
-
}
|
|
550
|
-
|
|
551
|
-
if (String(params.mcpProfile || '').trim() === 'locked') {
|
|
552
|
-
persistRejectedStart({
|
|
553
|
-
taskKey: evaluation.taskKey,
|
|
554
|
-
intentId: params.plan && params.plan.intent ? params.plan.intent.id : null,
|
|
555
|
-
delegateProfile: null,
|
|
556
|
-
mcpProfile: params.mcpProfile,
|
|
557
|
-
partnerProfile: params.partnerProfile,
|
|
558
|
-
reasonCode: 'locked_profile',
|
|
559
|
-
reason: 'Locked MCP profile may not start handoffs.',
|
|
560
|
-
context: params.context,
|
|
561
|
-
repoPath: params.repoPath,
|
|
562
|
-
});
|
|
563
|
-
throw createDelegationError('Locked MCP profile may not start handoffs.', 403);
|
|
564
|
-
}
|
|
565
|
-
|
|
566
|
-
if (!evaluation.delegationEligible || evaluation.executionMode !== 'sequential_delegate') {
|
|
567
|
-
persistRejectedStart({
|
|
568
|
-
taskKey: evaluation.taskKey,
|
|
569
|
-
intentId: params.plan && params.plan.intent ? params.plan.intent.id : null,
|
|
570
|
-
delegateProfile: evaluation.delegateProfile,
|
|
571
|
-
mcpProfile: params.mcpProfile,
|
|
572
|
-
partnerProfile: params.partnerProfile,
|
|
573
|
-
reasonCode: evaluation.reasonCode,
|
|
574
|
-
reason: evaluation.delegationReason,
|
|
575
|
-
context: params.context,
|
|
576
|
-
repoPath: params.repoPath,
|
|
577
|
-
});
|
|
578
|
-
throw createDelegationError(evaluation.delegationReason, evaluation.reasonCode === 'unresolved_handoff_exists' ? 409 : 422, {
|
|
579
|
-
reasonCode: evaluation.reasonCode,
|
|
580
|
-
});
|
|
581
|
-
}
|
|
582
|
-
|
|
583
|
-
if (params.delegateProfile && params.delegateProfile !== evaluation.delegateProfile) {
|
|
584
|
-
persistRejectedStart({
|
|
585
|
-
taskKey: evaluation.taskKey,
|
|
586
|
-
intentId: params.plan && params.plan.intent ? params.plan.intent.id : null,
|
|
587
|
-
delegateProfile: params.delegateProfile,
|
|
588
|
-
mcpProfile: params.mcpProfile,
|
|
589
|
-
partnerProfile: params.partnerProfile,
|
|
590
|
-
reasonCode: 'delegate_profile_mismatch',
|
|
591
|
-
reason: 'Requested delegateProfile does not match the planner-selected profile.',
|
|
592
|
-
context: params.context,
|
|
593
|
-
repoPath: params.repoPath,
|
|
594
|
-
});
|
|
595
|
-
throw createDelegationError('Requested delegateProfile does not match the planner-selected profile.', 400);
|
|
596
|
-
}
|
|
597
|
-
|
|
598
|
-
const { DELEGATION_LOG_PATH } = getDelegationPaths();
|
|
599
|
-
const handoffId = `handoff_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`;
|
|
600
|
-
const event = {
|
|
601
|
-
eventType: 'started',
|
|
602
|
-
handoffId,
|
|
603
|
-
taskKey: evaluation.taskKey,
|
|
604
|
-
intentId: params.plan.intent.id,
|
|
605
|
-
delegateProfile: evaluation.delegateProfile,
|
|
606
|
-
mcpProfile: params.mcpProfile,
|
|
607
|
-
partnerProfile: params.partnerProfile || null,
|
|
608
|
-
context: normalizeText(params.context || params.plan.context || ''),
|
|
609
|
-
repoPath: params.repoPath || null,
|
|
610
|
-
plannedChecks: unique([
|
|
611
|
-
...(Array.isArray(params.plannedChecks) ? params.plannedChecks : []),
|
|
612
|
-
...(evaluation.handoffContract && Array.isArray(evaluation.handoffContract.requiredChecks)
|
|
613
|
-
? evaluation.handoffContract.requiredChecks
|
|
614
|
-
: []),
|
|
615
|
-
]),
|
|
616
|
-
contract: evaluation.handoffContract,
|
|
617
|
-
delegationScore: evaluation.delegationScore,
|
|
618
|
-
timestamp: new Date().toISOString(),
|
|
619
|
-
};
|
|
620
|
-
|
|
621
|
-
appendJSONL(DELEGATION_LOG_PATH, event);
|
|
622
|
-
|
|
623
|
-
return {
|
|
624
|
-
handoffId,
|
|
625
|
-
taskKey: event.taskKey,
|
|
626
|
-
status: 'started',
|
|
627
|
-
executionMode: 'sequential_delegate',
|
|
628
|
-
delegateProfile: event.delegateProfile,
|
|
629
|
-
handoffContract: event.contract,
|
|
630
|
-
};
|
|
631
|
-
}
|
|
632
|
-
|
|
633
|
-
function buildDelegationDiagnosis(activeHandoff, params = {}) {
|
|
634
|
-
if (params.reasonCode === 'missing_required_evidence') {
|
|
635
|
-
return {
|
|
636
|
-
rootCauseCategory: 'delegation_evidence_gap',
|
|
637
|
-
criticalFailureStep: 'handoff_completion',
|
|
638
|
-
violations: [{
|
|
639
|
-
constraintId: 'delegation:missing_required_evidence',
|
|
640
|
-
message: 'Delegated work was completed without required evidence.',
|
|
641
|
-
}],
|
|
642
|
-
evidence: [],
|
|
643
|
-
};
|
|
644
|
-
}
|
|
645
|
-
|
|
646
|
-
if (params.outcome === 'aborted') {
|
|
647
|
-
return {
|
|
648
|
-
rootCauseCategory: 'delegation_aborted',
|
|
649
|
-
criticalFailureStep: 'handoff_completion',
|
|
650
|
-
violations: [{
|
|
651
|
-
constraintId: 'delegation:aborted',
|
|
652
|
-
message: 'Delegated work was aborted before completion.',
|
|
653
|
-
}],
|
|
654
|
-
evidence: [],
|
|
655
|
-
};
|
|
656
|
-
}
|
|
657
|
-
|
|
658
|
-
if (params.outcome === 'rejected') {
|
|
659
|
-
return {
|
|
660
|
-
rootCauseCategory: 'delegation_rejected',
|
|
661
|
-
criticalFailureStep: 'handoff_completion',
|
|
662
|
-
violations: [{
|
|
663
|
-
constraintId: 'delegation:rejected',
|
|
664
|
-
message: 'Delegated work was rejected after review.',
|
|
665
|
-
}],
|
|
666
|
-
evidence: [],
|
|
667
|
-
};
|
|
668
|
-
}
|
|
669
|
-
|
|
670
|
-
if (params.verification && params.verification.accepted === false && params.verification.finalVerification && params.verification.finalVerification.diagnosis) {
|
|
671
|
-
return params.verification.finalVerification.diagnosis;
|
|
672
|
-
}
|
|
673
|
-
|
|
674
|
-
return {
|
|
675
|
-
rootCauseCategory: 'delegation_failure',
|
|
676
|
-
criticalFailureStep: 'handoff_completion',
|
|
677
|
-
violations: [{
|
|
678
|
-
constraintId: 'delegation:failed',
|
|
679
|
-
message: 'Delegated work failed to complete cleanly.',
|
|
680
|
-
}],
|
|
681
|
-
evidence: [],
|
|
682
|
-
};
|
|
683
|
-
}
|
|
684
|
-
|
|
685
|
-
function updateDelegationModel(params = {}) {
|
|
686
|
-
const { DELEGATION_MODEL_PATH } = getDelegationPaths();
|
|
687
|
-
const model = loadDelegationModel(DELEGATION_MODEL_PATH);
|
|
688
|
-
updateModel(model, {
|
|
689
|
-
signal: params.signal,
|
|
690
|
-
timestamp: params.timestamp || new Date().toISOString(),
|
|
691
|
-
categories: unique([
|
|
692
|
-
'delegation_global',
|
|
693
|
-
params.intentId ? `intent_${params.intentId}` : null,
|
|
694
|
-
params.delegateProfile ? `profile_${params.delegateProfile}` : null,
|
|
695
|
-
params.partnerProfile ? `partner_${params.partnerProfile}` : null,
|
|
696
|
-
]),
|
|
697
|
-
});
|
|
698
|
-
saveModel(model, DELEGATION_MODEL_PATH);
|
|
699
|
-
return getReliability(model);
|
|
700
|
-
}
|
|
701
|
-
|
|
702
|
-
function completeHandoff(params = {}) {
|
|
703
|
-
const outcome = String(params.outcome || '').trim().toLowerCase();
|
|
704
|
-
if (!HANDOFF_OUTCOMES.includes(outcome)) {
|
|
705
|
-
throw createDelegationError(`Unsupported handoff outcome '${params.outcome}'`, 400);
|
|
706
|
-
}
|
|
707
|
-
|
|
708
|
-
const events = readDelegationEvents();
|
|
709
|
-
const active = deriveActiveHandoffs(events).byHandoffId.get(params.handoffId);
|
|
710
|
-
if (!active) {
|
|
711
|
-
throw createDelegationError(`No active handoff found for '${params.handoffId}'`, 404);
|
|
712
|
-
}
|
|
713
|
-
|
|
714
|
-
const summary = normalizeText(params.summary);
|
|
715
|
-
const resultContext = normalizeText(params.resultContext);
|
|
716
|
-
const missingRequiredEvidence = outcome !== 'aborted' && !summary && !resultContext;
|
|
717
|
-
let verification = null;
|
|
718
|
-
|
|
719
|
-
if (outcome !== 'aborted' && resultContext) {
|
|
720
|
-
verification = getVerificationLoopModule().runVerificationLoop({
|
|
721
|
-
context: resultContext,
|
|
722
|
-
tags: unique([
|
|
723
|
-
'delegation',
|
|
724
|
-
active.intentId ? `intent:${active.intentId}` : null,
|
|
725
|
-
active.delegateProfile ? `delegate:${active.delegateProfile}` : null,
|
|
726
|
-
]),
|
|
727
|
-
partnerProfile: active.partnerProfile || null,
|
|
728
|
-
maxRetries: 0,
|
|
729
|
-
});
|
|
730
|
-
}
|
|
731
|
-
|
|
732
|
-
const verificationAccepted = verification ? verification.accepted : null;
|
|
733
|
-
const negativeOutcome = outcome !== 'accepted' || verificationAccepted === false || missingRequiredEvidence;
|
|
734
|
-
const diagnosis = negativeOutcome
|
|
735
|
-
? buildDelegationDiagnosis(active, {
|
|
736
|
-
outcome,
|
|
737
|
-
reasonCode: missingRequiredEvidence ? 'missing_required_evidence' : null,
|
|
738
|
-
verification,
|
|
739
|
-
})
|
|
740
|
-
: null;
|
|
741
|
-
|
|
742
|
-
if (diagnosis && (!verification || !verification.persistedDiagnosis)) {
|
|
743
|
-
getFeedbackLoopModule().appendDiagnosticRecord({
|
|
744
|
-
source: 'delegation_runtime',
|
|
745
|
-
step: diagnosis.criticalFailureStep || 'handoff_completion',
|
|
746
|
-
context: resultContext || summary || active.context || '',
|
|
747
|
-
diagnosis,
|
|
748
|
-
metadata: {
|
|
749
|
-
handoffId: active.handoffId,
|
|
750
|
-
intentId: active.intentId,
|
|
751
|
-
delegateProfile: active.delegateProfile,
|
|
752
|
-
outcome,
|
|
753
|
-
},
|
|
754
|
-
});
|
|
755
|
-
}
|
|
756
|
-
|
|
757
|
-
if (missingRequiredEvidence) {
|
|
758
|
-
promoteDelegationFailure('missing_required_evidence', {
|
|
759
|
-
intentId: active.intentId,
|
|
760
|
-
delegateProfile: active.delegateProfile,
|
|
761
|
-
});
|
|
762
|
-
}
|
|
763
|
-
|
|
764
|
-
const { DELEGATION_LOG_PATH } = getDelegationPaths();
|
|
765
|
-
const event = {
|
|
766
|
-
eventType: 'completed',
|
|
767
|
-
handoffId: active.handoffId,
|
|
768
|
-
taskKey: active.taskKey,
|
|
769
|
-
intentId: active.intentId,
|
|
770
|
-
delegateProfile: active.delegateProfile,
|
|
771
|
-
mcpProfile: active.mcpProfile,
|
|
772
|
-
partnerProfile: active.partnerProfile,
|
|
773
|
-
outcome,
|
|
774
|
-
summary: summary || null,
|
|
775
|
-
resultContext: resultContext || null,
|
|
776
|
-
attempts: Number.isFinite(Number(params.attempts)) ? Number(params.attempts) : 1,
|
|
777
|
-
violationCount: Number.isFinite(Number(params.violationCount)) ? Number(params.violationCount) : 0,
|
|
778
|
-
tokenEstimate: Number.isFinite(Number(params.tokenEstimate)) ? Number(params.tokenEstimate) : null,
|
|
779
|
-
latencyMs: Number.isFinite(Number(params.latencyMs)) ? Number(params.latencyMs) : null,
|
|
780
|
-
verificationAccepted,
|
|
781
|
-
verification: verification
|
|
782
|
-
? {
|
|
783
|
-
accepted: verification.accepted,
|
|
784
|
-
attempts: verification.attempts,
|
|
785
|
-
maxRetries: verification.maxRetries,
|
|
786
|
-
finalVerification: verification.finalVerification,
|
|
787
|
-
persistedDiagnosis: verification.persistedDiagnosis,
|
|
788
|
-
}
|
|
789
|
-
: null,
|
|
790
|
-
diagnosis,
|
|
791
|
-
timestamp: new Date().toISOString(),
|
|
792
|
-
};
|
|
793
|
-
appendJSONL(DELEGATION_LOG_PATH, event);
|
|
794
|
-
|
|
795
|
-
const reliability = updateDelegationModel({
|
|
796
|
-
signal: negativeOutcome ? 'negative' : 'positive',
|
|
797
|
-
intentId: active.intentId,
|
|
798
|
-
delegateProfile: active.delegateProfile,
|
|
799
|
-
partnerProfile: active.partnerProfile,
|
|
800
|
-
timestamp: event.timestamp,
|
|
801
|
-
});
|
|
802
|
-
|
|
803
|
-
return {
|
|
804
|
-
handoffId: active.handoffId,
|
|
805
|
-
status: 'completed',
|
|
806
|
-
outcome,
|
|
807
|
-
verificationAccepted,
|
|
808
|
-
diagnosis,
|
|
809
|
-
reliability,
|
|
810
|
-
};
|
|
811
|
-
}
|
|
812
|
-
|
|
813
|
-
function summarizeDelegation(feedbackDir = null) {
|
|
814
|
-
const paths = feedbackDir
|
|
815
|
-
? {
|
|
816
|
-
DELEGATION_LOG_PATH: path.join(feedbackDir, 'delegation-log.jsonl'),
|
|
817
|
-
DELEGATION_MODEL_PATH: path.join(feedbackDir, 'delegation-model.json'),
|
|
818
|
-
}
|
|
819
|
-
: getDelegationPaths();
|
|
820
|
-
const events = readDelegationEvents(paths.DELEGATION_LOG_PATH);
|
|
821
|
-
const active = deriveActiveHandoffs(events);
|
|
822
|
-
const completions = events.filter((event) => event.eventType === 'completed');
|
|
823
|
-
const starts = events.filter((event) => event.eventType === 'started');
|
|
824
|
-
const rejectedStarts = events.filter((event) => event.eventType === 'rejected_start');
|
|
825
|
-
const verificationSamples = completions.filter((event) => typeof event.verificationAccepted === 'boolean');
|
|
826
|
-
const verificationFailures = verificationSamples.filter((event) => event.verificationAccepted === false);
|
|
827
|
-
const completedAttempts = completions
|
|
828
|
-
.map((event) => event.attempts)
|
|
829
|
-
.filter((value) => Number.isFinite(value));
|
|
830
|
-
const tokenEstimates = completions
|
|
831
|
-
.map((event) => event.tokenEstimate)
|
|
832
|
-
.filter((value) => Number.isFinite(value));
|
|
833
|
-
const failingProfiles = {};
|
|
834
|
-
const failingIntents = {};
|
|
835
|
-
|
|
836
|
-
for (const event of [...rejectedStarts, ...completions]) {
|
|
837
|
-
const failed = event.eventType === 'rejected_start'
|
|
838
|
-
|| event.outcome === 'rejected'
|
|
839
|
-
|| event.outcome === 'aborted'
|
|
840
|
-
|| event.verificationAccepted === false;
|
|
841
|
-
if (!failed) continue;
|
|
842
|
-
if (event.delegateProfile) {
|
|
843
|
-
failingProfiles[event.delegateProfile] = (failingProfiles[event.delegateProfile] || 0) + 1;
|
|
844
|
-
}
|
|
845
|
-
if (event.intentId) {
|
|
846
|
-
failingIntents[event.intentId] = (failingIntents[event.intentId] || 0) + 1;
|
|
847
|
-
}
|
|
848
|
-
}
|
|
849
|
-
|
|
850
|
-
const model = loadDelegationModel(paths.DELEGATION_MODEL_PATH);
|
|
851
|
-
const reliability = getReliability(model);
|
|
852
|
-
|
|
853
|
-
return {
|
|
854
|
-
attemptCount: starts.length,
|
|
855
|
-
acceptedCount: completions.filter((event) => event.outcome === 'accepted').length,
|
|
856
|
-
rejectedCount: completions.filter((event) => event.outcome === 'rejected').length,
|
|
857
|
-
abortedCount: completions.filter((event) => event.outcome === 'aborted').length,
|
|
858
|
-
activeCount: active.byHandoffId.size,
|
|
859
|
-
avoidedDelegationCount: rejectedStarts.length,
|
|
860
|
-
verificationFailureRate: verificationSamples.length > 0
|
|
861
|
-
? Number((verificationFailures.length / verificationSamples.length).toFixed(3))
|
|
862
|
-
: 0,
|
|
863
|
-
averageAttemptsPerTask: completedAttempts.length > 0
|
|
864
|
-
? Number((completedAttempts.reduce((sum, value) => sum + value, 0) / completedAttempts.length).toFixed(2))
|
|
865
|
-
: 0,
|
|
866
|
-
averageTokenEstimate: tokenEstimates.length > 0
|
|
867
|
-
? Number((tokenEstimates.reduce((sum, value) => sum + value, 0) / tokenEstimates.length).toFixed(2))
|
|
868
|
-
: 0,
|
|
869
|
-
topFailingProfiles: Object.entries(failingProfiles)
|
|
870
|
-
.sort((a, b) => b[1] - a[1])
|
|
871
|
-
.slice(0, 5)
|
|
872
|
-
.map(([key, count]) => ({ key, count })),
|
|
873
|
-
topFailingIntents: Object.entries(failingIntents)
|
|
874
|
-
.sort((a, b) => b[1] - a[1])
|
|
875
|
-
.slice(0, 5)
|
|
876
|
-
.map(([key, count]) => ({ key, count })),
|
|
877
|
-
reliability: {
|
|
878
|
-
global: reliability.delegation_global || null,
|
|
879
|
-
},
|
|
880
|
-
};
|
|
881
|
-
}
|
|
882
|
-
|
|
883
|
-
module.exports = {
|
|
884
|
-
DELEGATION_MODES,
|
|
885
|
-
HANDOFF_OUTCOMES,
|
|
886
|
-
normalizeDelegationMode,
|
|
887
|
-
deriveSemanticPhases,
|
|
888
|
-
buildTaskKey,
|
|
889
|
-
getDelegationPaths,
|
|
890
|
-
readDelegationEvents,
|
|
891
|
-
deriveActiveHandoffs,
|
|
892
|
-
evaluateDelegation,
|
|
893
|
-
startHandoff,
|
|
894
|
-
completeHandoff,
|
|
895
|
-
summarizeDelegation,
|
|
896
|
-
};
|