jiva-core 0.3.2 → 0.3.3-dev.bd250bd
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/npm-publish-dev.yml +39 -0
- package/.github/workflows/npm-publish.yml +31 -0
- package/Dockerfile +12 -7
- package/README.md +10 -0
- package/cloud-run.yaml +1 -1
- package/cloud-run.yaml.template +1 -1
- package/dist/core/agent-spawner.d.ts.map +1 -1
- package/dist/core/agent-spawner.js +3 -0
- package/dist/core/agent-spawner.js.map +1 -1
- package/dist/core/client-agent.d.ts +46 -19
- package/dist/core/client-agent.d.ts.map +1 -1
- package/dist/core/client-agent.js +332 -219
- package/dist/core/client-agent.js.map +1 -1
- package/dist/core/config.d.ts +73 -17
- package/dist/core/config.d.ts.map +1 -1
- package/dist/core/config.js +20 -6
- package/dist/core/config.js.map +1 -1
- package/dist/core/dual-agent.d.ts +20 -0
- package/dist/core/dual-agent.d.ts.map +1 -1
- package/dist/core/dual-agent.js +217 -49
- package/dist/core/dual-agent.js.map +1 -1
- package/dist/core/manager-agent.d.ts +9 -2
- package/dist/core/manager-agent.d.ts.map +1 -1
- package/dist/core/manager-agent.js +43 -14
- package/dist/core/manager-agent.js.map +1 -1
- package/dist/core/types/agent-context.d.ts +30 -0
- package/dist/core/types/agent-context.d.ts.map +1 -0
- package/dist/core/types/agent-context.js +8 -0
- package/dist/core/types/agent-context.js.map +1 -0
- package/dist/core/types/completion-signal.d.ts +17 -0
- package/dist/core/types/completion-signal.d.ts.map +1 -0
- package/dist/core/types/completion-signal.js +8 -0
- package/dist/core/types/completion-signal.js.map +1 -0
- package/dist/core/utils/serialize-agent-context.d.ts +23 -0
- package/dist/core/utils/serialize-agent-context.d.ts.map +1 -0
- package/dist/core/utils/serialize-agent-context.js +73 -0
- package/dist/core/utils/serialize-agent-context.js.map +1 -0
- package/dist/core/worker-agent.d.ts +9 -1
- package/dist/core/worker-agent.d.ts.map +1 -1
- package/dist/core/worker-agent.js +235 -39
- package/dist/core/worker-agent.js.map +1 -1
- package/dist/index.d.ts +3 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +1 -0
- package/dist/index.js.map +1 -1
- package/dist/interfaces/cli/index.js +91 -14
- package/dist/interfaces/cli/index.js.map +1 -1
- package/dist/interfaces/cli/setup-wizard.d.ts.map +1 -1
- package/dist/interfaces/cli/setup-wizard.js +93 -1
- package/dist/interfaces/cli/setup-wizard.js.map +1 -1
- package/dist/interfaces/http/session-manager.d.ts.map +1 -1
- package/dist/interfaces/http/session-manager.js +34 -7
- package/dist/interfaces/http/session-manager.js.map +1 -1
- package/dist/models/krutrim.d.ts +1 -1
- package/dist/models/krutrim.d.ts.map +1 -1
- package/dist/models/krutrim.js +4 -3
- package/dist/models/krutrim.js.map +1 -1
- package/dist/models/orchestrator.d.ts +24 -0
- package/dist/models/orchestrator.d.ts.map +1 -1
- package/dist/models/orchestrator.js +40 -6
- package/dist/models/orchestrator.js.map +1 -1
- package/dist/storage/gcp-bucket-provider.d.ts.map +1 -1
- package/dist/storage/gcp-bucket-provider.js +1 -11
- package/dist/storage/gcp-bucket-provider.js.map +1 -1
- package/dist/utils/platform.d.ts +13 -0
- package/dist/utils/platform.d.ts.map +1 -0
- package/dist/utils/platform.js +23 -0
- package/dist/utils/platform.js.map +1 -0
- package/package.json +7 -8
- package/.claude/settings.local.json +0 -18
|
@@ -9,6 +9,7 @@
|
|
|
9
9
|
* - STANDARD: Creation requests → file existence + basic validation
|
|
10
10
|
* - THOROUGH: Complex/testing requests OR failures → full E2E validation with tools
|
|
11
11
|
*/
|
|
12
|
+
import { serializeAgentContext } from './utils/serialize-agent-context.js';
|
|
12
13
|
import { logger } from '../utils/logger.js';
|
|
13
14
|
import { orchestrationLogger } from '../utils/orchestration-logger.js';
|
|
14
15
|
export var InvolvementLevel;
|
|
@@ -17,36 +18,97 @@ export var InvolvementLevel;
|
|
|
17
18
|
InvolvementLevel["STANDARD"] = "standard";
|
|
18
19
|
InvolvementLevel["THOROUGH"] = "thorough";
|
|
19
20
|
})(InvolvementLevel || (InvolvementLevel = {}));
|
|
21
|
+
/**
|
|
22
|
+
* Normalize the LLM-returned mustUseTools value to string[] | undefined.
|
|
23
|
+
* The LLM may return null, a string, or an array.
|
|
24
|
+
*/
|
|
25
|
+
function normalizeMustUseTools(value) {
|
|
26
|
+
if (Array.isArray(value))
|
|
27
|
+
return value;
|
|
28
|
+
if (typeof value === 'string' && value)
|
|
29
|
+
return [value];
|
|
30
|
+
return undefined;
|
|
31
|
+
}
|
|
20
32
|
export class ClientAgent {
|
|
21
33
|
orchestrator;
|
|
22
34
|
mcpManager;
|
|
23
35
|
mcpClient;
|
|
24
36
|
failureCount = 0;
|
|
25
|
-
//
|
|
26
|
-
|
|
27
|
-
'filesystem__read_text_file',
|
|
28
|
-
'filesystem__list_directory',
|
|
29
|
-
'filesystem__directory_tree',
|
|
30
|
-
'filesystem__search_files',
|
|
31
|
-
'playwright__browser_navigate',
|
|
32
|
-
'playwright__browser_console_messages',
|
|
33
|
-
'playwright__browser_take_screenshot',
|
|
34
|
-
'playwright__browser_evaluate',
|
|
35
|
-
];
|
|
37
|
+
// Lazily cached list of all available tool names (populated on first use)
|
|
38
|
+
_availableTools = null;
|
|
36
39
|
constructor(orchestrator, mcpManager) {
|
|
37
40
|
this.orchestrator = orchestrator;
|
|
38
41
|
this.mcpManager = mcpManager;
|
|
39
42
|
this.mcpClient = mcpManager.getClient();
|
|
40
43
|
}
|
|
44
|
+
// ─── Tool Discovery ───────────────────────────────────────────────────────
|
|
45
|
+
/**
|
|
46
|
+
* Returns all tool names currently available from connected MCP servers.
|
|
47
|
+
* Result is cached after the first call; call resetToolCache() if servers change.
|
|
48
|
+
*/
|
|
49
|
+
getAvailableTools() {
|
|
50
|
+
if (this._availableTools === null) {
|
|
51
|
+
this._availableTools = this.mcpClient.getAllTools().map(t => t.name);
|
|
52
|
+
logger.debug(`[Client] Discovered ${this._availableTools.length} available tools: ${this._availableTools.join(', ')}`);
|
|
53
|
+
}
|
|
54
|
+
return this._availableTools;
|
|
55
|
+
}
|
|
56
|
+
/** Reset the tool cache (e.g. after MCP server reconnects). */
|
|
57
|
+
resetToolCache() {
|
|
58
|
+
this._availableTools = null;
|
|
59
|
+
}
|
|
60
|
+
/**
|
|
61
|
+
* Find the first available tool whose name contains any of the given substrings.
|
|
62
|
+
* Returns null if no match is found.
|
|
63
|
+
*/
|
|
64
|
+
findTool(...patterns) {
|
|
65
|
+
const tools = this.getAvailableTools();
|
|
66
|
+
for (const pattern of patterns) {
|
|
67
|
+
const found = tools.find(t => t.includes(pattern));
|
|
68
|
+
if (found)
|
|
69
|
+
return found;
|
|
70
|
+
}
|
|
71
|
+
return null;
|
|
72
|
+
}
|
|
73
|
+
/**
|
|
74
|
+
* Build a human-readable summary of available tool categories for LLM prompts.
|
|
75
|
+
*/
|
|
76
|
+
buildToolContextForPrompt() {
|
|
77
|
+
const tools = this.getAvailableTools();
|
|
78
|
+
if (tools.length === 0) {
|
|
79
|
+
return 'No MCP tools are currently available.';
|
|
80
|
+
}
|
|
81
|
+
// Group by server prefix (everything before __)
|
|
82
|
+
const byServer = {};
|
|
83
|
+
for (const tool of tools) {
|
|
84
|
+
const [server] = tool.split('__');
|
|
85
|
+
if (!byServer[server])
|
|
86
|
+
byServer[server] = [];
|
|
87
|
+
byServer[server].push(tool);
|
|
88
|
+
}
|
|
89
|
+
return Object.entries(byServer)
|
|
90
|
+
.map(([server, serverTools]) => `- ${server}: ${serverTools.join(', ')}`)
|
|
91
|
+
.join('\n');
|
|
92
|
+
}
|
|
93
|
+
// ─── Task Analysis ────────────────────────────────────────────────────────
|
|
41
94
|
/**
|
|
42
95
|
* Use LLM to analyze the task and determine involvement level + requirements.
|
|
43
96
|
* Replaces keyword-based determineInvolvementLevel() and parseRequirements()
|
|
44
97
|
* with semantic understanding that avoids false positives.
|
|
98
|
+
*
|
|
99
|
+
* NOTE: failureCount escalation has been removed. Involvement level is now
|
|
100
|
+
* determined purely by task type. Per-subtask correction is handled by
|
|
101
|
+
* CompletionSignal + DualAgent retry budget instead.
|
|
45
102
|
*/
|
|
46
|
-
async analyzeTaskRequirements(userMessage, subtasks, workerResult) {
|
|
103
|
+
async analyzeTaskRequirements(userMessage, subtasks, workerResult, agentContext) {
|
|
47
104
|
const workerContext = workerResult
|
|
48
105
|
? `\nWorker Result (first 500 chars): ${workerResult.result.substring(0, 500)}\nWorker Success: ${workerResult.success}\nTools Used: ${workerResult.toolsUsed.join(', ') || 'none'} (${workerResult.toolsUsed.length} total)`
|
|
49
106
|
: '';
|
|
107
|
+
const availableToolsContext = this.buildToolContextForPrompt();
|
|
108
|
+
// Include serialized agent context for richer analysis
|
|
109
|
+
const contextBlock = agentContext
|
|
110
|
+
? `\nAGENT CONTEXT:\n${serializeAgentContext(agentContext, 'client')}`
|
|
111
|
+
: '';
|
|
50
112
|
const analysisPrompt = `You are a task analyst for a software agent system. Analyze the user's request to determine:
|
|
51
113
|
1. How deeply to validate the Worker's output (involvement level)
|
|
52
114
|
2. What specific requirements the task implies
|
|
@@ -55,8 +117,10 @@ USER MESSAGE: ${userMessage}
|
|
|
55
117
|
|
|
56
118
|
SUBTASKS: ${JSON.stringify(subtasks)}
|
|
57
119
|
${workerContext}
|
|
120
|
+
${contextBlock}
|
|
58
121
|
|
|
59
|
-
|
|
122
|
+
AVAILABLE TOOLS (what the Worker and Client can actually use):
|
|
123
|
+
${availableToolsContext}
|
|
60
124
|
|
|
61
125
|
Respond ONLY with valid JSON in this exact format (no other text):
|
|
62
126
|
{
|
|
@@ -73,19 +137,14 @@ Respond ONLY with valid JSON in this exact format (no other text):
|
|
|
73
137
|
}
|
|
74
138
|
|
|
75
139
|
CRITICAL RULES for involvementLevel:
|
|
76
|
-
- THOROUGH: ONLY when the user EXPLICITLY asks to test or verify something
|
|
140
|
+
- THOROUGH: ONLY when the user EXPLICITLY asks to test or verify something, OR for complex multi-file operations (>3 subtasks)
|
|
77
141
|
- MINIMAL: Information-only requests (listing files, explaining code, describing something, answering questions) where no files are created or modified
|
|
78
142
|
- STANDARD: Default for creation, modification, or action tasks
|
|
79
143
|
|
|
80
144
|
CRITICAL RULES for requirements:
|
|
81
|
-
-
|
|
82
|
-
-
|
|
83
|
-
-
|
|
84
|
-
- "check how much space my caches use" = type "information", mustUseTools null
|
|
85
|
-
- "find the biggest files in Downloads" = type "information", mustUseTools null
|
|
86
|
-
- "test the login page in the browser" = type "testing", mustUseTools ["playwright__"]
|
|
87
|
-
- "create index.html and verify it works" = type "file_creation" + type "testing" with playwright
|
|
88
|
-
- "make sure the server is running" = type "verification", mustUseTools null
|
|
145
|
+
- Only set mustUseTools to tool names listed in AVAILABLE TOOLS above — do NOT reference tools that are not available
|
|
146
|
+
- If the required tool is not in AVAILABLE TOOLS, set mustUseTools to null
|
|
147
|
+
- "testing" type should only be set when the user explicitly wants something executed and verified
|
|
89
148
|
- If no specific tools are required, set mustUseTools to null
|
|
90
149
|
- Always include at least one requirement entry`;
|
|
91
150
|
try {
|
|
@@ -111,16 +170,13 @@ CRITICAL RULES for requirements:
|
|
|
111
170
|
default:
|
|
112
171
|
level = InvolvementLevel.STANDARD;
|
|
113
172
|
}
|
|
114
|
-
//
|
|
115
|
-
|
|
116
|
-
logger.debug(`[Client] Escalating to THOROUGH due to ${this.failureCount} previous failures`);
|
|
117
|
-
level = InvolvementLevel.THOROUGH;
|
|
118
|
-
}
|
|
173
|
+
// NOTE: failureCount escalation removed — involvement level is determined
|
|
174
|
+
// purely by task type. Per-subtask correction is handled by CompletionSignal.
|
|
119
175
|
const requirements = (analysis.requirements || []).map((req) => ({
|
|
120
176
|
type: req.type || 'other',
|
|
121
177
|
description: req.description || 'General task completion',
|
|
122
178
|
filePath: req.filePath || undefined,
|
|
123
|
-
mustUseTools: req.mustUseTools
|
|
179
|
+
mustUseTools: normalizeMustUseTools(req.mustUseTools),
|
|
124
180
|
}));
|
|
125
181
|
// Ensure at least one requirement
|
|
126
182
|
if (requirements.length === 0) {
|
|
@@ -134,118 +190,23 @@ CRITICAL RULES for requirements:
|
|
|
134
190
|
catch (error) {
|
|
135
191
|
logger.warn(`[Client] LLM task analysis failed: ${error}, falling back to STANDARD`);
|
|
136
192
|
}
|
|
137
|
-
// Fallback: STANDARD with generic requirement
|
|
193
|
+
// Fallback: STANDARD with generic requirement (no failureCount escalation)
|
|
138
194
|
return {
|
|
139
|
-
level:
|
|
195
|
+
level: InvolvementLevel.STANDARD,
|
|
140
196
|
requirements: [{ type: 'other', description: 'General task completion' }],
|
|
141
197
|
};
|
|
142
198
|
}
|
|
143
|
-
|
|
144
|
-
* @deprecated Use analyzeTaskRequirements() instead. Kept for reference.
|
|
145
|
-
* Determine involvement level based on user request complexity
|
|
146
|
-
*/
|
|
147
|
-
determineInvolvementLevel(userMessage, subtasks) {
|
|
148
|
-
const messageLower = userMessage.toLowerCase();
|
|
149
|
-
const subtasksLower = subtasks.join(' ').toLowerCase();
|
|
150
|
-
// THOROUGH: User explicitly requests testing/verification
|
|
151
|
-
const testKeywords = ['test', 'verify', 'check', 'make sure', 'ensure', 'validate'];
|
|
152
|
-
if (testKeywords.some(kw => messageLower.includes(kw))) {
|
|
153
|
-
logger.debug('[Client] THOROUGH mode: Testing/verification requested');
|
|
154
|
-
return InvolvementLevel.THOROUGH;
|
|
155
|
-
}
|
|
156
|
-
// THOROUGH: After failures (user frustrated)
|
|
157
|
-
if (this.failureCount > 0) {
|
|
158
|
-
logger.debug(`[Client] THOROUGH mode: ${this.failureCount} previous failures detected`);
|
|
159
|
-
return InvolvementLevel.THOROUGH;
|
|
160
|
-
}
|
|
161
|
-
// THOROUGH: Complex multi-file operations
|
|
162
|
-
if (subtasks.length > 3 || (messageLower.includes('component') && messageLower.includes('index.html'))) {
|
|
163
|
-
logger.debug('[Client] THOROUGH mode: Complex multi-file operation');
|
|
164
|
-
return InvolvementLevel.THOROUGH;
|
|
165
|
-
}
|
|
166
|
-
// MINIMAL: Information-only requests
|
|
167
|
-
const infoKeywords = ['what', 'list', 'show', 'explain', 'describe', 'how', 'tell me'];
|
|
168
|
-
const creationKeywords = ['create', 'build', 'write', 'generate', 'make', 'add'];
|
|
169
|
-
const hasInfoKeyword = infoKeywords.some(kw => messageLower.includes(kw));
|
|
170
|
-
const hasCreationKeyword = creationKeywords.some(kw => messageLower.includes(kw));
|
|
171
|
-
if (hasInfoKeyword && !hasCreationKeyword) {
|
|
172
|
-
logger.debug('[Client] MINIMAL mode: Information request');
|
|
173
|
-
return InvolvementLevel.MINIMAL;
|
|
174
|
-
}
|
|
175
|
-
// STANDARD: Default for creation/modification tasks
|
|
176
|
-
logger.debug('[Client] STANDARD mode: Regular creation task');
|
|
177
|
-
return InvolvementLevel.STANDARD;
|
|
178
|
-
}
|
|
179
|
-
/**
|
|
180
|
-
* @deprecated Use analyzeTaskRequirements() instead. Kept for reference.
|
|
181
|
-
* Parse requirements from user message
|
|
182
|
-
*/
|
|
183
|
-
parseRequirements(userMessage, subtasks) {
|
|
184
|
-
const requirements = [];
|
|
185
|
-
const messageLower = userMessage.toLowerCase();
|
|
186
|
-
const combined = (messageLower + ' ' + subtasks.join(' ').toLowerCase());
|
|
187
|
-
// Detect file creation requirements
|
|
188
|
-
const fileMatches = userMessage.match(/(?:create|build|generate|write|save as)\s+([a-zA-Z0-9._/-]+\.(html|js|css|md|json|txt|py|ts|tsx|jsx))/gi);
|
|
189
|
-
if (fileMatches) {
|
|
190
|
-
fileMatches.forEach(match => {
|
|
191
|
-
const filename = match.split(/\s+/).pop();
|
|
192
|
-
if (filename) {
|
|
193
|
-
requirements.push({
|
|
194
|
-
type: 'file_creation',
|
|
195
|
-
description: `Create file: ${filename}`,
|
|
196
|
-
filePath: filename,
|
|
197
|
-
});
|
|
198
|
-
}
|
|
199
|
-
});
|
|
200
|
-
}
|
|
201
|
-
// Detect testing requirements (explicit verification requests)
|
|
202
|
-
const testKeywords = ['test', 'verify', 'check', 'make sure', 'ensure'];
|
|
203
|
-
if (testKeywords.some(kw => combined.includes(kw))) {
|
|
204
|
-
requirements.push({
|
|
205
|
-
type: 'testing',
|
|
206
|
-
description: 'Verify functionality through testing',
|
|
207
|
-
mustUseTools: ['playwright__'],
|
|
208
|
-
});
|
|
209
|
-
}
|
|
210
|
-
// Detect browser verification requirements - ONLY for file verification, not general browsing
|
|
211
|
-
// This should trigger for "open index.html in browser" but NOT for "open linkedin.com"
|
|
212
|
-
const isLocalFileOpen = (combined.includes('open') && combined.includes('.html')) ||
|
|
213
|
-
(combined.includes('browser') && combined.includes('.html'));
|
|
214
|
-
const isExternalUrl = combined.match(/open\s+(?:https?:\/\/)?(?:www\.)?[a-z0-9-]+\.[a-z]{2,}/i);
|
|
215
|
-
if (isLocalFileOpen && !isExternalUrl) {
|
|
216
|
-
requirements.push({
|
|
217
|
-
type: 'verification',
|
|
218
|
-
description: 'Browser testing required for local HTML file',
|
|
219
|
-
mustUseTools: ['playwright__browser_navigate', 'playwright__browser_console_messages'],
|
|
220
|
-
});
|
|
221
|
-
}
|
|
222
|
-
// For external URLs, don't require specific tools - just opening the page is enough
|
|
223
|
-
if (isExternalUrl) {
|
|
224
|
-
requirements.push({
|
|
225
|
-
type: 'verification',
|
|
226
|
-
description: 'Open external URL',
|
|
227
|
-
// No mustUseTools - Worker just needs to navigate, Client shouldn't demand specific validation tools
|
|
228
|
-
});
|
|
229
|
-
}
|
|
230
|
-
// Default: at least verify Worker did some work
|
|
231
|
-
if (requirements.length === 0) {
|
|
232
|
-
requirements.push({
|
|
233
|
-
type: 'other',
|
|
234
|
-
description: 'General task completion',
|
|
235
|
-
});
|
|
236
|
-
}
|
|
237
|
-
return requirements;
|
|
238
|
-
}
|
|
199
|
+
// ─── Main Validation Entry Point ──────────────────────────────────────────
|
|
239
200
|
/**
|
|
240
201
|
* Validate Worker's work at appropriate involvement level
|
|
241
202
|
*/
|
|
242
|
-
async validate(userMessage, subtasks, workerResult, involvementLevel) {
|
|
203
|
+
async validate(userMessage, subtasks, workerResult, involvementLevel, agentContext) {
|
|
243
204
|
// Use LLM-based analysis instead of keyword matching
|
|
244
|
-
const { level: analyzedLevel, requirements } = await this.analyzeTaskRequirements(userMessage, subtasks, workerResult);
|
|
205
|
+
const { level: analyzedLevel, requirements } = await this.analyzeTaskRequirements(userMessage, subtasks, workerResult, agentContext);
|
|
245
206
|
let level = involvementLevel || analyzedLevel;
|
|
246
207
|
// CRITICAL: Use LLM to check for unjustified failure claims BEFORE other validation
|
|
247
208
|
// Even in MINIMAL mode, we must catch agents giving up without trying
|
|
248
|
-
const failureAnalysis = await this.analyzeForUnjustifiedFailure(userMessage, workerResult);
|
|
209
|
+
const failureAnalysis = await this.analyzeForUnjustifiedFailure(userMessage, workerResult, agentContext);
|
|
249
210
|
if (failureAnalysis.claimsFailure && !failureAnalysis.hasEvidence) {
|
|
250
211
|
logger.info(`[Client] Detected unjustified failure claim - escalating from ${level} to STANDARD`);
|
|
251
212
|
logger.info(`[Client] LLM reasoning: ${failureAnalysis.reasoning}`);
|
|
@@ -272,7 +233,7 @@ CRITICAL RULES for requirements:
|
|
|
272
233
|
}
|
|
273
234
|
// Layer 0.5: Result-vs-Evidence Coherence Check (always done, catches hallucinated accomplishments)
|
|
274
235
|
// This detects when the Worker claims to have done things its tool usage doesn't support
|
|
275
|
-
const coherenceAnalysis = await this.analyzeResultCoherence(userMessage, workerResult);
|
|
236
|
+
const coherenceAnalysis = await this.analyzeResultCoherence(userMessage, workerResult, agentContext);
|
|
276
237
|
orchestrationLogger.logClientCoherenceCheck(coherenceAnalysis.isCoherent, coherenceAnalysis.unsupportedClaims, coherenceAnalysis.reasoning);
|
|
277
238
|
if (!coherenceAnalysis.isCoherent) {
|
|
278
239
|
logger.info(`[Client] Detected incoherent result — Worker claims not supported by tool usage`);
|
|
@@ -305,12 +266,14 @@ CRITICAL RULES for requirements:
|
|
|
305
266
|
result.approved = result.requirementsMet;
|
|
306
267
|
if (!result.approved && result.issues.length > 0) {
|
|
307
268
|
// Generate an actionable correction instruction via LLM instead of echoing raw validation issues
|
|
308
|
-
result.nextAction = await this.generateCorrectionInstruction(userMessage, subtasks.join('; '), result.issues, workerResult);
|
|
309
|
-
this.failureCount++;
|
|
269
|
+
result.nextAction = await this.generateCorrectionInstruction(userMessage, subtasks.join('; '), result.issues, workerResult, agentContext);
|
|
270
|
+
this.failureCount++; // Telemetry only — no longer drives escalation
|
|
310
271
|
}
|
|
311
272
|
else {
|
|
312
|
-
this.failureCount = 0;
|
|
273
|
+
this.failureCount = 0;
|
|
313
274
|
}
|
|
275
|
+
// Analyze CompletionSignal (LLM-based per-subtask assessment)
|
|
276
|
+
result.completionSignal = await this.analyzeCompletionSignal(userMessage, workerResult, result.issues, agentContext);
|
|
314
277
|
// Log the validation outcome
|
|
315
278
|
orchestrationLogger.logClientValidation(result.approved, result.issues, result.nextAction);
|
|
316
279
|
return result;
|
|
@@ -319,12 +282,14 @@ CRITICAL RULES for requirements:
|
|
|
319
282
|
* Use LLM to analyze worker result for unjustified failure claims
|
|
320
283
|
* This is language-agnostic and captures semantic meaning
|
|
321
284
|
*/
|
|
322
|
-
async analyzeForUnjustifiedFailure(userMessage, workerResult) {
|
|
285
|
+
async analyzeForUnjustifiedFailure(userMessage, workerResult, agentContext) {
|
|
323
286
|
const toolCount = workerResult.toolsUsed.length;
|
|
324
287
|
const toolList = workerResult.toolsUsed.join(', ') || 'none';
|
|
288
|
+
const contextBlock = agentContext ? `\n${serializeAgentContext(agentContext, 'client')}` : '';
|
|
325
289
|
const analysisPrompt = `You are a quality control agent. Analyze the following Worker response to determine if it's claiming failure and whether that failure is justified.
|
|
326
290
|
|
|
327
291
|
USER REQUEST: ${userMessage}
|
|
292
|
+
${contextBlock}
|
|
328
293
|
|
|
329
294
|
WORKER RESPONSE:
|
|
330
295
|
${workerResult.result}
|
|
@@ -380,7 +345,7 @@ Respond ONLY with the JSON, no other text.`;
|
|
|
380
345
|
* and found no bugs" but only used list_directory and never read a single file.
|
|
381
346
|
* This runs at ALL involvement levels including MINIMAL.
|
|
382
347
|
*/
|
|
383
|
-
async analyzeResultCoherence(userMessage, workerResult) {
|
|
348
|
+
async analyzeResultCoherence(userMessage, workerResult, agentContext) {
|
|
384
349
|
// Skip coherence check if Worker used no tools (caught by zero-tools guard)
|
|
385
350
|
// or if Worker explicitly failed (caught by failure analysis)
|
|
386
351
|
if (workerResult.toolsUsed.length === 0 || !workerResult.success) {
|
|
@@ -388,9 +353,12 @@ Respond ONLY with the JSON, no other text.`;
|
|
|
388
353
|
}
|
|
389
354
|
const toolList = workerResult.toolsUsed.join(', ');
|
|
390
355
|
const uniqueTools = [...new Set(workerResult.toolsUsed)].join(', ');
|
|
356
|
+
const availableToolsContext = this.buildToolContextForPrompt();
|
|
357
|
+
const contextBlock = agentContext ? `\n${serializeAgentContext(agentContext, 'client')}` : '';
|
|
391
358
|
const coherencePrompt = `You are a strict quality auditor. Your job is to determine whether a Worker agent's result is SUPPORTED by the tools it actually used, or whether it fabricated/hallucinated claims.
|
|
392
359
|
|
|
393
360
|
USER REQUEST: ${userMessage}
|
|
361
|
+
${contextBlock}
|
|
394
362
|
|
|
395
363
|
WORKER RESULT:
|
|
396
364
|
${workerResult.result.substring(0, 1000)}
|
|
@@ -399,13 +367,16 @@ TOOLS ACTUALLY USED (in order): ${toolList}
|
|
|
399
367
|
UNIQUE TOOLS USED: ${uniqueTools}
|
|
400
368
|
TOTAL TOOL CALLS: ${workerResult.toolsUsed.length}
|
|
401
369
|
|
|
370
|
+
AVAILABLE TOOLS IN THIS SYSTEM:
|
|
371
|
+
${availableToolsContext}
|
|
372
|
+
|
|
402
373
|
CRITICAL: Analyze whether the claims in the Worker's result are supported by the tools it used.
|
|
403
374
|
|
|
404
|
-
Key tool semantics:
|
|
405
|
-
-
|
|
406
|
-
-
|
|
407
|
-
-
|
|
408
|
-
-
|
|
375
|
+
Key tool semantics to apply:
|
|
376
|
+
- Tools with names like "list_directory", "directory_tree", "search_files" show file/folder NAMES only — they do NOT read file contents
|
|
377
|
+
- Tools with names like "read_text_file", "read_file", "get_file_content" actually read file content
|
|
378
|
+
- Tools with names like "shell_exec", "run_command", "bash", "execute" run shell commands — infer what was run from the worker's result
|
|
379
|
+
- For any other tool, infer its semantics from its name
|
|
409
380
|
|
|
410
381
|
Common hallucination patterns to detect:
|
|
411
382
|
1. Worker claims to have "inspected", "reviewed", "analyzed", or "scanned" source code but never used read_text_file — it only listed directories
|
|
@@ -452,7 +423,10 @@ Respond ONLY with valid JSON:
|
|
|
452
423
|
const issues = [];
|
|
453
424
|
// Zero-tools guard: if Worker used no tools at all and this is not a purely
|
|
454
425
|
// informational/conversational task, reject immediately
|
|
455
|
-
if
|
|
426
|
+
// Exception: if the Worker has structured failedTools, it DID try — the
|
|
427
|
+
// tools simply errored. Raising a "no tools used" issue on top of real
|
|
428
|
+
// failures creates a misleading retry loop rather than an honest exit.
|
|
429
|
+
if (workerResult.toolsUsed.length === 0 && workerResult.failedTools.length === 0 && involvementLevel !== InvolvementLevel.MINIMAL) {
|
|
456
430
|
const isConversational = requirements.every(r => r.type === 'information' || r.type === 'other');
|
|
457
431
|
if (!isConversational) {
|
|
458
432
|
issues.push('Worker completed the task without using any tools. ' +
|
|
@@ -471,8 +445,12 @@ Respond ONLY with valid JSON:
|
|
|
471
445
|
}
|
|
472
446
|
}
|
|
473
447
|
}
|
|
474
|
-
// Check if Worker succeeded
|
|
475
|
-
|
|
448
|
+
// Check if Worker succeeded.
|
|
449
|
+
// If failedTools is non-empty the Worker already proved it tried — the failure
|
|
450
|
+
// is evidence-backed. Raising a generic "retry with appropriate tool usage"
|
|
451
|
+
// issue would just queue another pointless attempt. Let it through; synthesis
|
|
452
|
+
// will report the specific failures to the user.
|
|
453
|
+
if (!workerResult.success && workerResult.failedTools.length === 0) {
|
|
476
454
|
issues.push('Worker did not complete the task successfully. The task needs to be retried with appropriate tool usage.');
|
|
477
455
|
}
|
|
478
456
|
return { issues };
|
|
@@ -503,46 +481,52 @@ Respond ONLY with valid JSON:
|
|
|
503
481
|
}
|
|
504
482
|
}
|
|
505
483
|
}
|
|
506
|
-
//
|
|
484
|
+
// Shell-based deep verification (THOROUGH only)
|
|
507
485
|
if (level === InvolvementLevel.THOROUGH) {
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
workerResult.result.includes('.html')) {
|
|
511
|
-
// Extract HTML filename from result
|
|
512
|
-
const htmlMatch = workerResult.result.match(/([a-zA-Z0-9._-]+\.html)/);
|
|
513
|
-
if (htmlMatch) {
|
|
514
|
-
const htmlFile = htmlMatch[1];
|
|
515
|
-
const browserValidation = await this.validateInBrowser(htmlFile);
|
|
516
|
-
if (!browserValidation.valid) {
|
|
517
|
-
issues.push(browserValidation.issue);
|
|
518
|
-
}
|
|
519
|
-
}
|
|
520
|
-
}
|
|
521
|
-
}
|
|
486
|
+
const shellIssues = await this.validateWithShell(requirements, workerResult);
|
|
487
|
+
issues.push(...shellIssues.issues);
|
|
522
488
|
}
|
|
523
489
|
return { issues };
|
|
524
490
|
}
|
|
491
|
+
// ─── Tool-Based Verification ────────────────────────────────────────────────────
|
|
525
492
|
/**
|
|
526
|
-
* Check
|
|
493
|
+
* Check whether a file exists, using whichever MCP tool is available.
|
|
494
|
+
* Tries filesystem read tools first, then falls back to shell.
|
|
527
495
|
*/
|
|
528
496
|
async fileExists(filePath) {
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
head: 1
|
|
533
|
-
|
|
534
|
-
|
|
497
|
+
const readTool = this.findTool('read_text_file', 'read_file', 'get_file_content');
|
|
498
|
+
if (readTool) {
|
|
499
|
+
try {
|
|
500
|
+
await this.mcpClient.executeTool(readTool, { path: filePath, head: 1 });
|
|
501
|
+
return true;
|
|
502
|
+
}
|
|
503
|
+
catch {
|
|
504
|
+
return false;
|
|
505
|
+
}
|
|
535
506
|
}
|
|
536
|
-
|
|
537
|
-
|
|
507
|
+
const shellTool = this.findTool('shell_exec', 'run_command', 'bash', 'execute');
|
|
508
|
+
if (shellTool) {
|
|
509
|
+
try {
|
|
510
|
+
const result = await this.mcpClient.executeTool(shellTool, {
|
|
511
|
+
command: `test -f "${filePath}" && echo "exists" || echo "not_found"`,
|
|
512
|
+
});
|
|
513
|
+
return String(result).includes('exists');
|
|
514
|
+
}
|
|
515
|
+
catch {
|
|
516
|
+
return false;
|
|
517
|
+
}
|
|
538
518
|
}
|
|
519
|
+
logger.debug('[Client] No tool available to verify file existence');
|
|
520
|
+
return false;
|
|
539
521
|
}
|
|
540
522
|
/**
|
|
541
523
|
* Use LLM to generate an actionable correction instruction from raw validation issues.
|
|
542
|
-
*
|
|
543
|
-
*
|
|
524
|
+
* Translates internal validation failures into concrete, tool-specific directions
|
|
525
|
+
* for the Worker, referencing only the tools actually available in the system.
|
|
544
526
|
*/
|
|
545
|
-
async generateCorrectionInstruction(userMessage, subtask, issues, workerResult) {
|
|
527
|
+
async generateCorrectionInstruction(userMessage, subtask, issues, workerResult, agentContext) {
|
|
528
|
+
const availableToolsContext = this.buildToolContextForPrompt();
|
|
529
|
+
const contextBlock = agentContext ? `\nAGENT CONTEXT:\n${serializeAgentContext(agentContext, 'client')}` : '';
|
|
546
530
|
const correctionPrompt = `You are generating a correction instruction for a Worker agent that failed to complete a task properly.
|
|
547
531
|
|
|
548
532
|
ORIGINAL USER REQUEST: ${userMessage}
|
|
@@ -555,10 +539,13 @@ ${issues.map((issue, i) => `${i + 1}. ${issue}`).join('\n')}
|
|
|
555
539
|
WORKER'S RESULT (first 300 chars): ${workerResult.result.substring(0, 300)}
|
|
556
540
|
TOOLS WORKER USED: ${workerResult.toolsUsed.join(', ') || 'none'}
|
|
557
541
|
|
|
542
|
+
AVAILABLE TOOLS THE WORKER CAN USE:
|
|
543
|
+
${availableToolsContext}
|
|
544
|
+
${contextBlock}
|
|
545
|
+
|
|
558
546
|
Generate a CLEAR, ACTIONABLE instruction that tells the Worker exactly what to do to fix the issues.
|
|
559
547
|
The instruction should:
|
|
560
|
-
- Be a direct command
|
|
561
|
-
- Reference specific tools or actions the Worker should take
|
|
548
|
+
- Be a direct command referencing specific available tools by name
|
|
562
549
|
- Be concise (1-2 sentences)
|
|
563
550
|
- NOT include validation jargon like "mustUseTools", "requirements", or "involvement level"
|
|
564
551
|
- NOT be a generic statement like "retry the task" — be specific about WHAT to do
|
|
@@ -584,76 +571,202 @@ Respond ONLY with the correction instruction text, nothing else.`;
|
|
|
584
571
|
return `Fix the following issue and retry: ${issues[0]}`;
|
|
585
572
|
}
|
|
586
573
|
/**
|
|
587
|
-
* Validate file contents for common issues
|
|
574
|
+
* Validate file contents for common issues, using whichever MCP tool is available.
|
|
588
575
|
*/
|
|
589
576
|
async validateFileContents(filePath) {
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
577
|
+
const readTool = this.findTool('read_text_file', 'read_file', 'get_file_content');
|
|
578
|
+
const shellTool = this.findTool('shell_exec', 'run_command', 'bash', 'execute');
|
|
579
|
+
let contentStr = null;
|
|
580
|
+
if (readTool) {
|
|
581
|
+
try {
|
|
582
|
+
const content = await this.mcpClient.executeTool(readTool, { path: filePath, head: 200 });
|
|
583
|
+
contentStr = typeof content === 'string' ? content : JSON.stringify(content);
|
|
584
|
+
}
|
|
585
|
+
catch (error) {
|
|
586
|
+
return { valid: false, issue: `Could not read file: ${error}` };
|
|
587
|
+
}
|
|
588
|
+
}
|
|
589
|
+
else if (shellTool) {
|
|
590
|
+
try {
|
|
591
|
+
const content = await this.mcpClient.executeTool(shellTool, {
|
|
592
|
+
command: `head -200 "${filePath}" 2>&1`,
|
|
593
|
+
});
|
|
594
|
+
contentStr = String(content);
|
|
595
|
+
}
|
|
596
|
+
catch (error) {
|
|
597
|
+
return { valid: false, issue: `Could not read file via shell: ${error}` };
|
|
598
|
+
}
|
|
599
|
+
}
|
|
600
|
+
if (contentStr === null) {
|
|
601
|
+
return { valid: true }; // No read tool available; skip content check
|
|
602
|
+
}
|
|
603
|
+
// Check path reference integrity in HTML files
|
|
604
|
+
if (filePath.endsWith('.html')) {
|
|
605
|
+
const hrefMatches = contentStr.match(/href="([^"]+)"/g) || [];
|
|
606
|
+
const srcMatches = contentStr.match(/src="([^"]+)"/g) || [];
|
|
607
|
+
for (const match of [...hrefMatches, ...srcMatches]) {
|
|
608
|
+
const pathMatch = match.match(/(?:href|src)="([^"]+)"/);
|
|
609
|
+
if (pathMatch) {
|
|
610
|
+
const referencedPath = pathMatch[1];
|
|
611
|
+
if (!referencedPath.startsWith('http') && !referencedPath.startsWith('data:')) {
|
|
612
|
+
const exists = await this.fileExists(referencedPath);
|
|
613
|
+
if (!exists) {
|
|
614
|
+
return {
|
|
615
|
+
valid: false,
|
|
616
|
+
issue: `HTML references non-existent file: ${referencedPath}. Fix file paths or create missing files.`,
|
|
617
|
+
};
|
|
612
618
|
}
|
|
613
619
|
}
|
|
614
620
|
}
|
|
615
621
|
}
|
|
616
|
-
return { valid: true };
|
|
617
622
|
}
|
|
618
|
-
|
|
619
|
-
|
|
623
|
+
return { valid: true };
|
|
624
|
+
}
|
|
625
|
+
/**
|
|
626
|
+
* THOROUGH-level shell-based verification: runs lightweight, read-only shell
|
|
627
|
+
* commands to confirm work was actually done (file sizes, test output presence, etc.).
|
|
628
|
+
* Skips gracefully when no shell tool is available.
|
|
629
|
+
*/
|
|
630
|
+
async validateWithShell(requirements, workerResult) {
|
|
631
|
+
const issues = [];
|
|
632
|
+
const shellTool = this.findTool('shell_exec', 'run_command', 'bash', 'execute');
|
|
633
|
+
if (!shellTool) {
|
|
634
|
+
logger.debug('[Client] No shell tool available for THOROUGH shell validation — skipping');
|
|
635
|
+
return { issues };
|
|
636
|
+
}
|
|
637
|
+
for (const req of requirements) {
|
|
638
|
+
if ((req.type === 'file_creation' || req.type === 'file_modification') && req.filePath) {
|
|
639
|
+
try {
|
|
640
|
+
const result = await this.mcpClient.executeTool(shellTool, {
|
|
641
|
+
command: `wc -c "${req.filePath}" 2>&1`,
|
|
642
|
+
});
|
|
643
|
+
const resultStr = String(result);
|
|
644
|
+
if (resultStr.includes('No such file') || resultStr.includes('cannot access')) {
|
|
645
|
+
issues.push(`Shell verification failed: ${req.filePath} does not exist on disk.`);
|
|
646
|
+
}
|
|
647
|
+
else {
|
|
648
|
+
const sizeMatch = resultStr.match(/^\s*(\d+)/);
|
|
649
|
+
if (sizeMatch && parseInt(sizeMatch[1], 10) === 0) {
|
|
650
|
+
issues.push(`File ${req.filePath} was created but is empty.`);
|
|
651
|
+
}
|
|
652
|
+
}
|
|
653
|
+
}
|
|
654
|
+
catch (error) {
|
|
655
|
+
logger.debug(`[Client] Shell validation error for ${req.filePath}: ${error}`);
|
|
656
|
+
}
|
|
657
|
+
}
|
|
658
|
+
if (req.type === 'testing') {
|
|
659
|
+
const hasTestOutput = workerResult.result.match(/passed|failed|error|PASS|FAIL|✓|✗|tests run|test suite/i);
|
|
660
|
+
if (!hasTestOutput) {
|
|
661
|
+
logger.debug('[Client] THOROUGH: testing requirement but no test output detected in worker result');
|
|
662
|
+
}
|
|
663
|
+
}
|
|
620
664
|
}
|
|
665
|
+
return { issues };
|
|
621
666
|
}
|
|
667
|
+
// ─── Completion Signal Analysis ──────────────────────────────────────────────
|
|
622
668
|
/**
|
|
623
|
-
*
|
|
669
|
+
* LLM-based per-subtask assessment of completion confidence, blocker type,
|
|
670
|
+
* and suggested corrective strategy. Called at the end of validate().
|
|
624
671
|
*/
|
|
625
|
-
async
|
|
672
|
+
async analyzeCompletionSignal(userMessage, workerResult, issues, agentContext) {
|
|
673
|
+
// If approved with no issues, high confidence
|
|
674
|
+
if (issues.length === 0) {
|
|
675
|
+
return { confidence: 'high', progressMade: true };
|
|
676
|
+
}
|
|
677
|
+
// Data-driven short-circuit: if the Worker reported structured tool failures,
|
|
678
|
+
// we know exactly what happened — no LLM inference needed.
|
|
679
|
+
// suggestedStrategy is 'escalate' because rephrasing the instruction cannot
|
|
680
|
+
// fix a tool that is fundamentally erroring (wrong path, permissions, etc.).
|
|
681
|
+
if (workerResult.failedTools.length > 0) {
|
|
682
|
+
const failureSummary = workerResult.failedTools
|
|
683
|
+
.map(f => `${f.toolName} (${f.attempts} attempt${f.attempts !== 1 ? 's' : ''}) — ${f.lastError}`)
|
|
684
|
+
.join('; ');
|
|
685
|
+
logger.info(`[Client] Data-driven CompletionSignal: tool_failure — ${failureSummary}`);
|
|
686
|
+
return {
|
|
687
|
+
confidence: 'none',
|
|
688
|
+
progressMade: false,
|
|
689
|
+
blockerType: 'tool_failure',
|
|
690
|
+
suggestedStrategy: 'escalate',
|
|
691
|
+
};
|
|
692
|
+
}
|
|
693
|
+
const contextBlock = agentContext ? `\n${serializeAgentContext(agentContext, 'client')}` : '';
|
|
694
|
+
const signalPrompt = `You are analyzing a subtask execution to produce a CompletionSignal.
|
|
695
|
+
|
|
696
|
+
USER REQUEST: ${userMessage}
|
|
697
|
+
${contextBlock}
|
|
698
|
+
|
|
699
|
+
WORKER RESULT (first 500 chars): ${workerResult.result.substring(0, 500)}
|
|
700
|
+
WORKER SUCCESS: ${workerResult.success}
|
|
701
|
+
TOOLS USED: ${workerResult.toolsUsed.join(', ') || 'none'} (${workerResult.toolsUsed.length} total)
|
|
702
|
+
|
|
703
|
+
VALIDATION ISSUES:
|
|
704
|
+
${issues.map((issue, i) => `${i + 1}. ${issue}`).join('\n')}
|
|
705
|
+
|
|
706
|
+
Analyze and respond ONLY with valid JSON:
|
|
707
|
+
{
|
|
708
|
+
"confidence": "<high | medium | low | none>",
|
|
709
|
+
"progressMade": <true if Worker made any measurable forward progress, false otherwise>,
|
|
710
|
+
"blockerType": "<tool_failure | hallucination | scope_drift | partial | loop | capability_gap | null>",
|
|
711
|
+
"suggestedStrategy": "<retry | rephrase | decompose | skip | escalate>"
|
|
712
|
+
}
|
|
713
|
+
|
|
714
|
+
RULES:
|
|
715
|
+
- confidence "high": all issues are minor or cosmetic
|
|
716
|
+
- confidence "medium": some progress but incomplete
|
|
717
|
+
- confidence "low": significant issues, little useful work
|
|
718
|
+
- confidence "none": no useful work done at all
|
|
719
|
+
- blockerType "tool_failure": Worker tried but tool errored
|
|
720
|
+
- blockerType "hallucination": Worker claimed work it didn't do
|
|
721
|
+
- blockerType "scope_drift": Worker did something unrelated
|
|
722
|
+
- blockerType "partial": Worker made progress but didn't finish
|
|
723
|
+
- blockerType "loop": Worker is repeating the same action
|
|
724
|
+
- blockerType "capability_gap": Task requires tools/capabilities not available
|
|
725
|
+
- suggestedStrategy: recommend the best recovery approach`;
|
|
626
726
|
try {
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
// Check for console errors
|
|
634
|
-
const errors = await this.mcpClient.executeTool('playwright__browser_console_messages', {
|
|
635
|
-
level: 'error',
|
|
727
|
+
const response = await this.orchestrator.chat({
|
|
728
|
+
messages: [
|
|
729
|
+
{ role: 'system', content: 'You are a strict completion analyst. Respond only with valid JSON.' },
|
|
730
|
+
{ role: 'user', content: signalPrompt },
|
|
731
|
+
],
|
|
732
|
+
temperature: 0.1,
|
|
636
733
|
});
|
|
637
|
-
const
|
|
638
|
-
if (
|
|
734
|
+
const jsonMatch = response.content.match(/\{[\s\S]*\}/);
|
|
735
|
+
if (jsonMatch) {
|
|
736
|
+
const parsed = JSON.parse(jsonMatch[0]);
|
|
639
737
|
return {
|
|
640
|
-
|
|
641
|
-
|
|
738
|
+
confidence: parsed.confidence || 'low',
|
|
739
|
+
progressMade: parsed.progressMade ?? false,
|
|
740
|
+
blockerType: parsed.blockerType === 'null' ? undefined : parsed.blockerType,
|
|
741
|
+
suggestedStrategy: parsed.suggestedStrategy,
|
|
642
742
|
};
|
|
643
743
|
}
|
|
644
|
-
return { valid: true };
|
|
645
744
|
}
|
|
646
745
|
catch (error) {
|
|
647
|
-
logger.debug(`[Client]
|
|
648
|
-
// Don't fail validation if browser test fails - might not have browser available
|
|
649
|
-
return { valid: true };
|
|
746
|
+
logger.debug(`[Client] Failed to analyze completion signal: ${error}`);
|
|
650
747
|
}
|
|
748
|
+
// Fallback: conservative signal
|
|
749
|
+
return {
|
|
750
|
+
confidence: 'low',
|
|
751
|
+
progressMade: workerResult.toolsUsed.length > 0,
|
|
752
|
+
blockerType: 'partial',
|
|
753
|
+
suggestedStrategy: 'retry',
|
|
754
|
+
};
|
|
651
755
|
}
|
|
756
|
+
// ─── Session Management ─────────────────────────────────────────────────────
|
|
652
757
|
/**
|
|
653
|
-
* Reset
|
|
758
|
+
* Reset session state (call at the start of each new conversation/session).
|
|
759
|
+
* Renamed from resetFailureTracking() for clarity — failureCount is now
|
|
760
|
+
* telemetry-only and does not drive involvement escalation.
|
|
654
761
|
*/
|
|
655
|
-
|
|
762
|
+
resetSessionState() {
|
|
656
763
|
this.failureCount = 0;
|
|
657
764
|
}
|
|
765
|
+
/**
|
|
766
|
+
* @deprecated Use resetSessionState() instead.
|
|
767
|
+
*/
|
|
768
|
+
resetFailureTracking() {
|
|
769
|
+
this.resetSessionState();
|
|
770
|
+
}
|
|
658
771
|
}
|
|
659
772
|
//# sourceMappingURL=client-agent.js.map
|