jiva-core 0.3.2 → 0.3.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Dockerfile +12 -7
- package/cloud-run.yaml +1 -1
- package/cloud-run.yaml.template +1 -1
- package/dist/core/client-agent.d.ts +27 -20
- package/dist/core/client-agent.d.ts.map +1 -1
- package/dist/core/client-agent.js +187 -204
- package/dist/core/client-agent.js.map +1 -1
- package/dist/core/config.d.ts.map +1 -1
- package/dist/core/config.js +8 -3
- package/dist/core/config.js.map +1 -1
- package/dist/interfaces/cli/index.js +18 -0
- package/dist/interfaces/cli/index.js.map +1 -1
- package/dist/interfaces/cli/setup-wizard.js +1 -1
- package/dist/interfaces/cli/setup-wizard.js.map +1 -1
- package/dist/interfaces/http/routes/ui.d.ts +4 -0
- package/dist/interfaces/http/routes/ui.d.ts.map +1 -0
- package/dist/interfaces/http/routes/ui.js +213 -0
- package/dist/interfaces/http/routes/ui.js.map +1 -0
- package/dist/interfaces/http/session-manager.d.ts.map +1 -1
- package/dist/interfaces/http/session-manager.js +15 -7
- package/dist/interfaces/http/session-manager.js.map +1 -1
- package/dist/public/app.js +361 -0
- package/dist/public/index.html +122 -0
- package/dist/public/styles.css +319 -0
- package/dist/storage/gcp-bucket-provider.d.ts.map +1 -1
- package/dist/storage/gcp-bucket-provider.js +1 -11
- package/dist/storage/gcp-bucket-provider.js.map +1 -1
- package/package.json +3 -7
package/Dockerfile
CHANGED
|
@@ -8,8 +8,8 @@ WORKDIR /app
|
|
|
8
8
|
COPY package*.json ./
|
|
9
9
|
COPY tsconfig.json ./
|
|
10
10
|
|
|
11
|
-
# Install dependencies (
|
|
12
|
-
RUN npm ci
|
|
11
|
+
# Install dependencies (skip postinstall/playwright in CI environment)
|
|
12
|
+
RUN npm ci --ignore-scripts
|
|
13
13
|
|
|
14
14
|
# Copy source code
|
|
15
15
|
COPY src ./src
|
|
@@ -28,16 +28,21 @@ RUN apk add --no-cache dumb-init
|
|
|
28
28
|
# Copy package files
|
|
29
29
|
COPY package*.json ./
|
|
30
30
|
|
|
31
|
-
# Install production dependencies only
|
|
32
|
-
RUN npm ci --omit=dev && npm cache clean --force
|
|
31
|
+
# Install production dependencies only (skip postinstall/playwright)
|
|
32
|
+
RUN npm ci --omit=dev --ignore-scripts && npm cache clean --force
|
|
33
|
+
|
|
34
|
+
# Pre-install MCP servers globally so they don't need to be downloaded per-session
|
|
35
|
+
RUN npm install -g @modelcontextprotocol/server-filesystem
|
|
33
36
|
|
|
34
37
|
# Copy built application from builder
|
|
35
38
|
COPY --from=builder /app/dist ./dist
|
|
36
39
|
|
|
37
|
-
# Create
|
|
38
|
-
RUN
|
|
40
|
+
# Create workspace directory and set permissions
|
|
41
|
+
RUN mkdir -p /workspace && \
|
|
42
|
+
addgroup -g 1001 -S nodejs && \
|
|
39
43
|
adduser -S nodejs -u 1001 && \
|
|
40
|
-
chown -R nodejs:nodejs /app
|
|
44
|
+
chown -R nodejs:nodejs /app && \
|
|
45
|
+
chown nodejs:nodejs /workspace
|
|
41
46
|
|
|
42
47
|
USER nodejs
|
|
43
48
|
|
package/cloud-run.yaml
CHANGED
package/cloud-run.yaml.template
CHANGED
|
@@ -35,24 +35,30 @@ export declare class ClientAgent {
|
|
|
35
35
|
private mcpManager;
|
|
36
36
|
private mcpClient;
|
|
37
37
|
private failureCount;
|
|
38
|
-
private
|
|
38
|
+
private _availableTools;
|
|
39
39
|
constructor(orchestrator: ModelOrchestrator, mcpManager: MCPServerManager);
|
|
40
40
|
/**
|
|
41
|
-
*
|
|
42
|
-
*
|
|
43
|
-
* with semantic understanding that avoids false positives.
|
|
41
|
+
* Returns all tool names currently available from connected MCP servers.
|
|
42
|
+
* Result is cached after the first call; call resetToolCache() if servers change.
|
|
44
43
|
*/
|
|
45
|
-
private
|
|
44
|
+
private getAvailableTools;
|
|
45
|
+
/** Reset the tool cache (e.g. after MCP server reconnects). */
|
|
46
|
+
resetToolCache(): void;
|
|
47
|
+
/**
|
|
48
|
+
* Find the first available tool whose name contains any of the given substrings.
|
|
49
|
+
* Returns null if no match is found.
|
|
50
|
+
*/
|
|
51
|
+
private findTool;
|
|
46
52
|
/**
|
|
47
|
-
*
|
|
48
|
-
* Determine involvement level based on user request complexity
|
|
53
|
+
* Build a human-readable summary of available tool categories for LLM prompts.
|
|
49
54
|
*/
|
|
50
|
-
|
|
55
|
+
private buildToolContextForPrompt;
|
|
51
56
|
/**
|
|
52
|
-
*
|
|
53
|
-
*
|
|
57
|
+
* Use LLM to analyze the task and determine involvement level + requirements.
|
|
58
|
+
* Replaces keyword-based determineInvolvementLevel() and parseRequirements()
|
|
59
|
+
* with semantic understanding that avoids false positives.
|
|
54
60
|
*/
|
|
55
|
-
|
|
61
|
+
private analyzeTaskRequirements;
|
|
56
62
|
/**
|
|
57
63
|
* Validate Worker's work at appropriate involvement level
|
|
58
64
|
*/
|
|
@@ -78,26 +84,27 @@ export declare class ClientAgent {
|
|
|
78
84
|
*/
|
|
79
85
|
private validateOutcome;
|
|
80
86
|
/**
|
|
81
|
-
* Check
|
|
87
|
+
* Check whether a file exists, using whichever MCP tool is available.
|
|
88
|
+
* Tries filesystem read tools first, then falls back to shell.
|
|
82
89
|
*/
|
|
83
90
|
private fileExists;
|
|
84
91
|
/**
|
|
85
92
|
* Use LLM to generate an actionable correction instruction from raw validation issues.
|
|
86
|
-
*
|
|
87
|
-
*
|
|
93
|
+
* Translates internal validation failures into concrete, tool-specific directions
|
|
94
|
+
* for the Worker, referencing only the tools actually available in the system.
|
|
88
95
|
*/
|
|
89
96
|
private generateCorrectionInstruction;
|
|
90
97
|
/**
|
|
91
|
-
* Validate file contents for common issues
|
|
98
|
+
* Validate file contents for common issues, using whichever MCP tool is available.
|
|
92
99
|
*/
|
|
93
100
|
private validateFileContents;
|
|
94
101
|
/**
|
|
95
|
-
*
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
/**
|
|
99
|
-
* Reset failure tracking (for new conversation/session)
|
|
102
|
+
* THOROUGH-level shell-based verification: runs lightweight, read-only shell
|
|
103
|
+
* commands to confirm work was actually done (file sizes, test output presence, etc.).
|
|
104
|
+
* Skips gracefully when no shell tool is available.
|
|
100
105
|
*/
|
|
106
|
+
private validateWithShell;
|
|
107
|
+
/** Reset failure tracking (call at the start of each new conversation/session). */
|
|
101
108
|
resetFailureTracking(): void;
|
|
102
109
|
}
|
|
103
110
|
//# sourceMappingURL=client-agent.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"client-agent.d.ts","sourceRoot":"","sources":["../../src/core/client-agent.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAC9D,OAAO,EAAE,gBAAgB,EAAE,MAAM,0BAA0B,CAAC;AAI5D,OAAO,EAAE,YAAY,EAAE,MAAM,mBAAmB,CAAC;AAgBjD,oBAAY,gBAAgB;IAC1B,OAAO,YAAY,CAAM,0BAA0B;IACnD,QAAQ,aAAa,CAAI,mCAAmC;IAC5D,QAAQ,aAAa;CACtB;AAED,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,eAAe,GAAG,mBAAmB,GAAG,SAAS,GAAG,cAAc,GAAG,aAAa,GAAG,OAAO,CAAC;IACnG,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,YAAY,CAAC,EAAE,MAAM,EAAE,CAAC;CACzB;AAED,MAAM,WAAW,gBAAgB;IAC/B,QAAQ,EAAE,OAAO,CAAC;IAClB,eAAe,EAAE,OAAO,CAAC;IACzB,MAAM,EAAE,MAAM,EAAE,CAAC;IACjB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,gBAAgB,EAAE,gBAAgB,CAAC;CACpC;AAED,qBAAa,WAAW;IACtB,OAAO,CAAC,YAAY,CAAoB;IACxC,OAAO,CAAC,UAAU,CAAmB;IACrC,OAAO,CAAC,SAAS,CAAY;IAC7B,OAAO,CAAC,YAAY,CAAa;IAGjC,OAAO,CAAC,
|
|
1
|
+
{"version":3,"file":"client-agent.d.ts","sourceRoot":"","sources":["../../src/core/client-agent.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAC9D,OAAO,EAAE,gBAAgB,EAAE,MAAM,0BAA0B,CAAC;AAI5D,OAAO,EAAE,YAAY,EAAE,MAAM,mBAAmB,CAAC;AAgBjD,oBAAY,gBAAgB;IAC1B,OAAO,YAAY,CAAM,0BAA0B;IACnD,QAAQ,aAAa,CAAI,mCAAmC;IAC5D,QAAQ,aAAa;CACtB;AAED,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,eAAe,GAAG,mBAAmB,GAAG,SAAS,GAAG,cAAc,GAAG,aAAa,GAAG,OAAO,CAAC;IACnG,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,YAAY,CAAC,EAAE,MAAM,EAAE,CAAC;CACzB;AAED,MAAM,WAAW,gBAAgB;IAC/B,QAAQ,EAAE,OAAO,CAAC;IAClB,eAAe,EAAE,OAAO,CAAC;IACzB,MAAM,EAAE,MAAM,EAAE,CAAC;IACjB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,gBAAgB,EAAE,gBAAgB,CAAC;CACpC;AAED,qBAAa,WAAW;IACtB,OAAO,CAAC,YAAY,CAAoB;IACxC,OAAO,CAAC,UAAU,CAAmB;IACrC,OAAO,CAAC,SAAS,CAAY;IAC7B,OAAO,CAAC,YAAY,CAAa;IAGjC,OAAO,CAAC,eAAe,CAAyB;gBAEpC,YAAY,EAAE,iBAAiB,EAAE,UAAU,EAAE,gBAAgB;IAQzE;;;OAGG;IACH,OAAO,CAAC,iBAAiB;IAQzB,+DAA+D;IAC/D,cAAc,IAAI,IAAI;IAItB;;;OAGG;IACH,OAAO,CAAC,QAAQ;IAShB;;OAEG;IACH,OAAO,CAAC,yBAAyB;IAmBjC;;;;OAIG;YACW,uBAAuB;IAgHrC;;OAEG;IACG,QAAQ,CACZ,WAAW,EAAE,MAAM,EACnB,QAAQ,EAAE,MAAM,EAAE,EAClB,YAAY,EAAE,YAAY,EAC1B,gBAAgB,CAAC,EAAE,gBAAgB,GAClC,OAAO,CAAC,gBAAgB,CAAC;IAyG5B;;;OAGG;YACW,4BAA4B;IA8D1C;;;;;OAKG;YACW,sBAAsB;IA6EpC;;OAEG;IACH,OAAO,CAAC,eAAe;IA+CvB;;OAEG;YACW,eAAe;IAsC7B;;;OAGG;YACW,UAAU;IA2BxB;;;;OAIG;YACW,6BAA6B;IAoD3C;;OAEG;YACW,oBAAoB;IAoDlC;;;;OAIG;YACW,iBAAiB;IA+C/B,mFAAmF;IACnF,oBAAoB,IAAI,IAAI;CAG7B"}
|
|
@@ -22,22 +22,63 @@ export class ClientAgent {
|
|
|
22
22
|
mcpManager;
|
|
23
23
|
mcpClient;
|
|
24
24
|
failureCount = 0;
|
|
25
|
-
//
|
|
26
|
-
|
|
27
|
-
'filesystem__read_text_file',
|
|
28
|
-
'filesystem__list_directory',
|
|
29
|
-
'filesystem__directory_tree',
|
|
30
|
-
'filesystem__search_files',
|
|
31
|
-
'playwright__browser_navigate',
|
|
32
|
-
'playwright__browser_console_messages',
|
|
33
|
-
'playwright__browser_take_screenshot',
|
|
34
|
-
'playwright__browser_evaluate',
|
|
35
|
-
];
|
|
25
|
+
// Lazily cached list of all available tool names (populated on first use)
|
|
26
|
+
_availableTools = null;
|
|
36
27
|
constructor(orchestrator, mcpManager) {
|
|
37
28
|
this.orchestrator = orchestrator;
|
|
38
29
|
this.mcpManager = mcpManager;
|
|
39
30
|
this.mcpClient = mcpManager.getClient();
|
|
40
31
|
}
|
|
32
|
+
// ─── Tool Discovery ───────────────────────────────────────────────────────
|
|
33
|
+
/**
|
|
34
|
+
* Returns all tool names currently available from connected MCP servers.
|
|
35
|
+
* Result is cached after the first call; call resetToolCache() if servers change.
|
|
36
|
+
*/
|
|
37
|
+
getAvailableTools() {
|
|
38
|
+
if (this._availableTools === null) {
|
|
39
|
+
this._availableTools = this.mcpClient.getAllTools().map(t => t.name);
|
|
40
|
+
logger.debug(`[Client] Discovered ${this._availableTools.length} available tools: ${this._availableTools.join(', ')}`);
|
|
41
|
+
}
|
|
42
|
+
return this._availableTools;
|
|
43
|
+
}
|
|
44
|
+
/** Reset the tool cache (e.g. after MCP server reconnects). */
|
|
45
|
+
resetToolCache() {
|
|
46
|
+
this._availableTools = null;
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* Find the first available tool whose name contains any of the given substrings.
|
|
50
|
+
* Returns null if no match is found.
|
|
51
|
+
*/
|
|
52
|
+
findTool(...patterns) {
|
|
53
|
+
const tools = this.getAvailableTools();
|
|
54
|
+
for (const pattern of patterns) {
|
|
55
|
+
const found = tools.find(t => t.includes(pattern));
|
|
56
|
+
if (found)
|
|
57
|
+
return found;
|
|
58
|
+
}
|
|
59
|
+
return null;
|
|
60
|
+
}
|
|
61
|
+
/**
|
|
62
|
+
* Build a human-readable summary of available tool categories for LLM prompts.
|
|
63
|
+
*/
|
|
64
|
+
buildToolContextForPrompt() {
|
|
65
|
+
const tools = this.getAvailableTools();
|
|
66
|
+
if (tools.length === 0) {
|
|
67
|
+
return 'No MCP tools are currently available.';
|
|
68
|
+
}
|
|
69
|
+
// Group by server prefix (everything before __)
|
|
70
|
+
const byServer = {};
|
|
71
|
+
for (const tool of tools) {
|
|
72
|
+
const [server] = tool.split('__');
|
|
73
|
+
if (!byServer[server])
|
|
74
|
+
byServer[server] = [];
|
|
75
|
+
byServer[server].push(tool);
|
|
76
|
+
}
|
|
77
|
+
return Object.entries(byServer)
|
|
78
|
+
.map(([server, serverTools]) => `- ${server}: ${serverTools.join(', ')}`)
|
|
79
|
+
.join('\n');
|
|
80
|
+
}
|
|
81
|
+
// ─── Task Analysis ────────────────────────────────────────────────────────
|
|
41
82
|
/**
|
|
42
83
|
* Use LLM to analyze the task and determine involvement level + requirements.
|
|
43
84
|
* Replaces keyword-based determineInvolvementLevel() and parseRequirements()
|
|
@@ -47,6 +88,7 @@ export class ClientAgent {
|
|
|
47
88
|
const workerContext = workerResult
|
|
48
89
|
? `\nWorker Result (first 500 chars): ${workerResult.result.substring(0, 500)}\nWorker Success: ${workerResult.success}\nTools Used: ${workerResult.toolsUsed.join(', ') || 'none'} (${workerResult.toolsUsed.length} total)`
|
|
49
90
|
: '';
|
|
91
|
+
const availableToolsContext = this.buildToolContextForPrompt();
|
|
50
92
|
const analysisPrompt = `You are a task analyst for a software agent system. Analyze the user's request to determine:
|
|
51
93
|
1. How deeply to validate the Worker's output (involvement level)
|
|
52
94
|
2. What specific requirements the task implies
|
|
@@ -58,6 +100,9 @@ ${workerContext}
|
|
|
58
100
|
|
|
59
101
|
PREVIOUS FAILURE COUNT: ${this.failureCount}
|
|
60
102
|
|
|
103
|
+
AVAILABLE TOOLS (what the Worker and Client can actually use):
|
|
104
|
+
${availableToolsContext}
|
|
105
|
+
|
|
61
106
|
Respond ONLY with valid JSON in this exact format (no other text):
|
|
62
107
|
{
|
|
63
108
|
"involvementLevel": "<MINIMAL | STANDARD | THOROUGH>",
|
|
@@ -73,19 +118,14 @@ Respond ONLY with valid JSON in this exact format (no other text):
|
|
|
73
118
|
}
|
|
74
119
|
|
|
75
120
|
CRITICAL RULES for involvementLevel:
|
|
76
|
-
- THOROUGH: ONLY when the user EXPLICITLY asks to test or verify something
|
|
121
|
+
- THOROUGH: ONLY when the user EXPLICITLY asks to test or verify something, OR after previous failures (failureCount > 0), OR for complex multi-file operations (>3 subtasks)
|
|
77
122
|
- MINIMAL: Information-only requests (listing files, explaining code, describing something, answering questions) where no files are created or modified
|
|
78
123
|
- STANDARD: Default for creation, modification, or action tasks
|
|
79
124
|
|
|
80
125
|
CRITICAL RULES for requirements:
|
|
81
|
-
-
|
|
82
|
-
-
|
|
83
|
-
-
|
|
84
|
-
- "check how much space my caches use" = type "information", mustUseTools null
|
|
85
|
-
- "find the biggest files in Downloads" = type "information", mustUseTools null
|
|
86
|
-
- "test the login page in the browser" = type "testing", mustUseTools ["playwright__"]
|
|
87
|
-
- "create index.html and verify it works" = type "file_creation" + type "testing" with playwright
|
|
88
|
-
- "make sure the server is running" = type "verification", mustUseTools null
|
|
126
|
+
- Only set mustUseTools to tool names listed in AVAILABLE TOOLS above — do NOT reference tools that are not available
|
|
127
|
+
- If the required tool is not in AVAILABLE TOOLS, set mustUseTools to null
|
|
128
|
+
- "testing" type should only be set when the user explicitly wants something executed and verified
|
|
89
129
|
- If no specific tools are required, set mustUseTools to null
|
|
90
130
|
- Always include at least one requirement entry`;
|
|
91
131
|
try {
|
|
@@ -140,102 +180,7 @@ CRITICAL RULES for requirements:
|
|
|
140
180
|
requirements: [{ type: 'other', description: 'General task completion' }],
|
|
141
181
|
};
|
|
142
182
|
}
|
|
143
|
-
|
|
144
|
-
* @deprecated Use analyzeTaskRequirements() instead. Kept for reference.
|
|
145
|
-
* Determine involvement level based on user request complexity
|
|
146
|
-
*/
|
|
147
|
-
determineInvolvementLevel(userMessage, subtasks) {
|
|
148
|
-
const messageLower = userMessage.toLowerCase();
|
|
149
|
-
const subtasksLower = subtasks.join(' ').toLowerCase();
|
|
150
|
-
// THOROUGH: User explicitly requests testing/verification
|
|
151
|
-
const testKeywords = ['test', 'verify', 'check', 'make sure', 'ensure', 'validate'];
|
|
152
|
-
if (testKeywords.some(kw => messageLower.includes(kw))) {
|
|
153
|
-
logger.debug('[Client] THOROUGH mode: Testing/verification requested');
|
|
154
|
-
return InvolvementLevel.THOROUGH;
|
|
155
|
-
}
|
|
156
|
-
// THOROUGH: After failures (user frustrated)
|
|
157
|
-
if (this.failureCount > 0) {
|
|
158
|
-
logger.debug(`[Client] THOROUGH mode: ${this.failureCount} previous failures detected`);
|
|
159
|
-
return InvolvementLevel.THOROUGH;
|
|
160
|
-
}
|
|
161
|
-
// THOROUGH: Complex multi-file operations
|
|
162
|
-
if (subtasks.length > 3 || (messageLower.includes('component') && messageLower.includes('index.html'))) {
|
|
163
|
-
logger.debug('[Client] THOROUGH mode: Complex multi-file operation');
|
|
164
|
-
return InvolvementLevel.THOROUGH;
|
|
165
|
-
}
|
|
166
|
-
// MINIMAL: Information-only requests
|
|
167
|
-
const infoKeywords = ['what', 'list', 'show', 'explain', 'describe', 'how', 'tell me'];
|
|
168
|
-
const creationKeywords = ['create', 'build', 'write', 'generate', 'make', 'add'];
|
|
169
|
-
const hasInfoKeyword = infoKeywords.some(kw => messageLower.includes(kw));
|
|
170
|
-
const hasCreationKeyword = creationKeywords.some(kw => messageLower.includes(kw));
|
|
171
|
-
if (hasInfoKeyword && !hasCreationKeyword) {
|
|
172
|
-
logger.debug('[Client] MINIMAL mode: Information request');
|
|
173
|
-
return InvolvementLevel.MINIMAL;
|
|
174
|
-
}
|
|
175
|
-
// STANDARD: Default for creation/modification tasks
|
|
176
|
-
logger.debug('[Client] STANDARD mode: Regular creation task');
|
|
177
|
-
return InvolvementLevel.STANDARD;
|
|
178
|
-
}
|
|
179
|
-
/**
|
|
180
|
-
* @deprecated Use analyzeTaskRequirements() instead. Kept for reference.
|
|
181
|
-
* Parse requirements from user message
|
|
182
|
-
*/
|
|
183
|
-
parseRequirements(userMessage, subtasks) {
|
|
184
|
-
const requirements = [];
|
|
185
|
-
const messageLower = userMessage.toLowerCase();
|
|
186
|
-
const combined = (messageLower + ' ' + subtasks.join(' ').toLowerCase());
|
|
187
|
-
// Detect file creation requirements
|
|
188
|
-
const fileMatches = userMessage.match(/(?:create|build|generate|write|save as)\s+([a-zA-Z0-9._/-]+\.(html|js|css|md|json|txt|py|ts|tsx|jsx))/gi);
|
|
189
|
-
if (fileMatches) {
|
|
190
|
-
fileMatches.forEach(match => {
|
|
191
|
-
const filename = match.split(/\s+/).pop();
|
|
192
|
-
if (filename) {
|
|
193
|
-
requirements.push({
|
|
194
|
-
type: 'file_creation',
|
|
195
|
-
description: `Create file: ${filename}`,
|
|
196
|
-
filePath: filename,
|
|
197
|
-
});
|
|
198
|
-
}
|
|
199
|
-
});
|
|
200
|
-
}
|
|
201
|
-
// Detect testing requirements (explicit verification requests)
|
|
202
|
-
const testKeywords = ['test', 'verify', 'check', 'make sure', 'ensure'];
|
|
203
|
-
if (testKeywords.some(kw => combined.includes(kw))) {
|
|
204
|
-
requirements.push({
|
|
205
|
-
type: 'testing',
|
|
206
|
-
description: 'Verify functionality through testing',
|
|
207
|
-
mustUseTools: ['playwright__'],
|
|
208
|
-
});
|
|
209
|
-
}
|
|
210
|
-
// Detect browser verification requirements - ONLY for file verification, not general browsing
|
|
211
|
-
// This should trigger for "open index.html in browser" but NOT for "open linkedin.com"
|
|
212
|
-
const isLocalFileOpen = (combined.includes('open') && combined.includes('.html')) ||
|
|
213
|
-
(combined.includes('browser') && combined.includes('.html'));
|
|
214
|
-
const isExternalUrl = combined.match(/open\s+(?:https?:\/\/)?(?:www\.)?[a-z0-9-]+\.[a-z]{2,}/i);
|
|
215
|
-
if (isLocalFileOpen && !isExternalUrl) {
|
|
216
|
-
requirements.push({
|
|
217
|
-
type: 'verification',
|
|
218
|
-
description: 'Browser testing required for local HTML file',
|
|
219
|
-
mustUseTools: ['playwright__browser_navigate', 'playwright__browser_console_messages'],
|
|
220
|
-
});
|
|
221
|
-
}
|
|
222
|
-
// For external URLs, don't require specific tools - just opening the page is enough
|
|
223
|
-
if (isExternalUrl) {
|
|
224
|
-
requirements.push({
|
|
225
|
-
type: 'verification',
|
|
226
|
-
description: 'Open external URL',
|
|
227
|
-
// No mustUseTools - Worker just needs to navigate, Client shouldn't demand specific validation tools
|
|
228
|
-
});
|
|
229
|
-
}
|
|
230
|
-
// Default: at least verify Worker did some work
|
|
231
|
-
if (requirements.length === 0) {
|
|
232
|
-
requirements.push({
|
|
233
|
-
type: 'other',
|
|
234
|
-
description: 'General task completion',
|
|
235
|
-
});
|
|
236
|
-
}
|
|
237
|
-
return requirements;
|
|
238
|
-
}
|
|
183
|
+
// ─── Main Validation Entry Point ──────────────────────────────────────────
|
|
239
184
|
/**
|
|
240
185
|
* Validate Worker's work at appropriate involvement level
|
|
241
186
|
*/
|
|
@@ -388,6 +333,7 @@ Respond ONLY with the JSON, no other text.`;
|
|
|
388
333
|
}
|
|
389
334
|
const toolList = workerResult.toolsUsed.join(', ');
|
|
390
335
|
const uniqueTools = [...new Set(workerResult.toolsUsed)].join(', ');
|
|
336
|
+
const availableToolsContext = this.buildToolContextForPrompt();
|
|
391
337
|
const coherencePrompt = `You are a strict quality auditor. Your job is to determine whether a Worker agent's result is SUPPORTED by the tools it actually used, or whether it fabricated/hallucinated claims.
|
|
392
338
|
|
|
393
339
|
USER REQUEST: ${userMessage}
|
|
@@ -399,13 +345,16 @@ TOOLS ACTUALLY USED (in order): ${toolList}
|
|
|
399
345
|
UNIQUE TOOLS USED: ${uniqueTools}
|
|
400
346
|
TOTAL TOOL CALLS: ${workerResult.toolsUsed.length}
|
|
401
347
|
|
|
348
|
+
AVAILABLE TOOLS IN THIS SYSTEM:
|
|
349
|
+
${availableToolsContext}
|
|
350
|
+
|
|
402
351
|
CRITICAL: Analyze whether the claims in the Worker's result are supported by the tools it used.
|
|
403
352
|
|
|
404
|
-
Key tool semantics:
|
|
405
|
-
-
|
|
406
|
-
-
|
|
407
|
-
-
|
|
408
|
-
-
|
|
353
|
+
Key tool semantics to apply:
|
|
354
|
+
- Tools with names like "list_directory", "directory_tree", "search_files" show file/folder NAMES only — they do NOT read file contents
|
|
355
|
+
- Tools with names like "read_text_file", "read_file", "get_file_content" actually read file content
|
|
356
|
+
- Tools with names like "shell_exec", "run_command", "bash", "execute" run shell commands — infer what was run from the worker's result
|
|
357
|
+
- For any other tool, infer its semantics from its name
|
|
409
358
|
|
|
410
359
|
Common hallucination patterns to detect:
|
|
411
360
|
1. Worker claims to have "inspected", "reviewed", "analyzed", or "scanned" source code but never used read_text_file — it only listed directories
|
|
@@ -503,46 +452,51 @@ Respond ONLY with valid JSON:
|
|
|
503
452
|
}
|
|
504
453
|
}
|
|
505
454
|
}
|
|
506
|
-
//
|
|
455
|
+
// Shell-based deep verification (THOROUGH only)
|
|
507
456
|
if (level === InvolvementLevel.THOROUGH) {
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
workerResult.result.includes('.html')) {
|
|
511
|
-
// Extract HTML filename from result
|
|
512
|
-
const htmlMatch = workerResult.result.match(/([a-zA-Z0-9._-]+\.html)/);
|
|
513
|
-
if (htmlMatch) {
|
|
514
|
-
const htmlFile = htmlMatch[1];
|
|
515
|
-
const browserValidation = await this.validateInBrowser(htmlFile);
|
|
516
|
-
if (!browserValidation.valid) {
|
|
517
|
-
issues.push(browserValidation.issue);
|
|
518
|
-
}
|
|
519
|
-
}
|
|
520
|
-
}
|
|
521
|
-
}
|
|
457
|
+
const shellIssues = await this.validateWithShell(requirements, workerResult);
|
|
458
|
+
issues.push(...shellIssues.issues);
|
|
522
459
|
}
|
|
523
460
|
return { issues };
|
|
524
461
|
}
|
|
462
|
+
// ─── Tool-Based Verification ────────────────────────────────────────────────────
|
|
525
463
|
/**
|
|
526
|
-
* Check
|
|
464
|
+
* Check whether a file exists, using whichever MCP tool is available.
|
|
465
|
+
* Tries filesystem read tools first, then falls back to shell.
|
|
527
466
|
*/
|
|
528
467
|
async fileExists(filePath) {
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
head: 1
|
|
533
|
-
|
|
534
|
-
|
|
468
|
+
const readTool = this.findTool('read_text_file', 'read_file', 'get_file_content');
|
|
469
|
+
if (readTool) {
|
|
470
|
+
try {
|
|
471
|
+
await this.mcpClient.executeTool(readTool, { path: filePath, head: 1 });
|
|
472
|
+
return true;
|
|
473
|
+
}
|
|
474
|
+
catch {
|
|
475
|
+
return false;
|
|
476
|
+
}
|
|
535
477
|
}
|
|
536
|
-
|
|
537
|
-
|
|
478
|
+
const shellTool = this.findTool('shell_exec', 'run_command', 'bash', 'execute');
|
|
479
|
+
if (shellTool) {
|
|
480
|
+
try {
|
|
481
|
+
const result = await this.mcpClient.executeTool(shellTool, {
|
|
482
|
+
command: `test -f "${filePath}" && echo "exists" || echo "not_found"`,
|
|
483
|
+
});
|
|
484
|
+
return String(result).includes('exists');
|
|
485
|
+
}
|
|
486
|
+
catch {
|
|
487
|
+
return false;
|
|
488
|
+
}
|
|
538
489
|
}
|
|
490
|
+
logger.debug('[Client] No tool available to verify file existence');
|
|
491
|
+
return false;
|
|
539
492
|
}
|
|
540
493
|
/**
|
|
541
494
|
* Use LLM to generate an actionable correction instruction from raw validation issues.
|
|
542
|
-
*
|
|
543
|
-
*
|
|
495
|
+
* Translates internal validation failures into concrete, tool-specific directions
|
|
496
|
+
* for the Worker, referencing only the tools actually available in the system.
|
|
544
497
|
*/
|
|
545
498
|
async generateCorrectionInstruction(userMessage, subtask, issues, workerResult) {
|
|
499
|
+
const availableToolsContext = this.buildToolContextForPrompt();
|
|
546
500
|
const correctionPrompt = `You are generating a correction instruction for a Worker agent that failed to complete a task properly.
|
|
547
501
|
|
|
548
502
|
ORIGINAL USER REQUEST: ${userMessage}
|
|
@@ -555,10 +509,12 @@ ${issues.map((issue, i) => `${i + 1}. ${issue}`).join('\n')}
|
|
|
555
509
|
WORKER'S RESULT (first 300 chars): ${workerResult.result.substring(0, 300)}
|
|
556
510
|
TOOLS WORKER USED: ${workerResult.toolsUsed.join(', ') || 'none'}
|
|
557
511
|
|
|
512
|
+
AVAILABLE TOOLS THE WORKER CAN USE:
|
|
513
|
+
${availableToolsContext}
|
|
514
|
+
|
|
558
515
|
Generate a CLEAR, ACTIONABLE instruction that tells the Worker exactly what to do to fix the issues.
|
|
559
516
|
The instruction should:
|
|
560
|
-
- Be a direct command
|
|
561
|
-
- Reference specific tools or actions the Worker should take
|
|
517
|
+
- Be a direct command referencing specific available tools by name
|
|
562
518
|
- Be concise (1-2 sentences)
|
|
563
519
|
- NOT include validation jargon like "mustUseTools", "requirements", or "involvement level"
|
|
564
520
|
- NOT be a generic statement like "retry the task" — be specific about WHAT to do
|
|
@@ -584,74 +540,101 @@ Respond ONLY with the correction instruction text, nothing else.`;
|
|
|
584
540
|
return `Fix the following issue and retry: ${issues[0]}`;
|
|
585
541
|
}
|
|
586
542
|
/**
|
|
587
|
-
* Validate file contents for common issues
|
|
543
|
+
* Validate file contents for common issues, using whichever MCP tool is available.
|
|
588
544
|
*/
|
|
589
545
|
async validateFileContents(filePath) {
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
546
|
+
const readTool = this.findTool('read_text_file', 'read_file', 'get_file_content');
|
|
547
|
+
const shellTool = this.findTool('shell_exec', 'run_command', 'bash', 'execute');
|
|
548
|
+
let contentStr = null;
|
|
549
|
+
if (readTool) {
|
|
550
|
+
try {
|
|
551
|
+
const content = await this.mcpClient.executeTool(readTool, { path: filePath, head: 200 });
|
|
552
|
+
contentStr = typeof content === 'string' ? content : JSON.stringify(content);
|
|
553
|
+
}
|
|
554
|
+
catch (error) {
|
|
555
|
+
return { valid: false, issue: `Could not read file: ${error}` };
|
|
556
|
+
}
|
|
557
|
+
}
|
|
558
|
+
else if (shellTool) {
|
|
559
|
+
try {
|
|
560
|
+
const content = await this.mcpClient.executeTool(shellTool, {
|
|
561
|
+
command: `head -200 "${filePath}" 2>&1`,
|
|
562
|
+
});
|
|
563
|
+
contentStr = String(content);
|
|
564
|
+
}
|
|
565
|
+
catch (error) {
|
|
566
|
+
return { valid: false, issue: `Could not read file via shell: ${error}` };
|
|
567
|
+
}
|
|
568
|
+
}
|
|
569
|
+
if (contentStr === null) {
|
|
570
|
+
return { valid: true }; // No read tool available; skip content check
|
|
571
|
+
}
|
|
572
|
+
// Check path reference integrity in HTML files
|
|
573
|
+
if (filePath.endsWith('.html')) {
|
|
574
|
+
const hrefMatches = contentStr.match(/href="([^"]+)"/g) || [];
|
|
575
|
+
const srcMatches = contentStr.match(/src="([^"]+)"/g) || [];
|
|
576
|
+
for (const match of [...hrefMatches, ...srcMatches]) {
|
|
577
|
+
const pathMatch = match.match(/(?:href|src)="([^"]+)"/);
|
|
578
|
+
if (pathMatch) {
|
|
579
|
+
const referencedPath = pathMatch[1];
|
|
580
|
+
if (!referencedPath.startsWith('http') && !referencedPath.startsWith('data:')) {
|
|
581
|
+
const exists = await this.fileExists(referencedPath);
|
|
582
|
+
if (!exists) {
|
|
583
|
+
return {
|
|
584
|
+
valid: false,
|
|
585
|
+
issue: `HTML references non-existent file: ${referencedPath}. Fix file paths or create missing files.`,
|
|
586
|
+
};
|
|
612
587
|
}
|
|
613
588
|
}
|
|
614
589
|
}
|
|
615
590
|
}
|
|
616
|
-
return { valid: true };
|
|
617
|
-
}
|
|
618
|
-
catch (error) {
|
|
619
|
-
return { valid: false, issue: `Could not read file: ${error}` };
|
|
620
591
|
}
|
|
592
|
+
return { valid: true };
|
|
621
593
|
}
|
|
622
594
|
/**
|
|
623
|
-
*
|
|
595
|
+
* THOROUGH-level shell-based verification: runs lightweight, read-only shell
|
|
596
|
+
* commands to confirm work was actually done (file sizes, test output presence, etc.).
|
|
597
|
+
* Skips gracefully when no shell tool is available.
|
|
624
598
|
*/
|
|
625
|
-
async
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
url: fileUrl,
|
|
632
|
-
});
|
|
633
|
-
// Check for console errors
|
|
634
|
-
const errors = await this.mcpClient.executeTool('playwright__browser_console_messages', {
|
|
635
|
-
level: 'error',
|
|
636
|
-
});
|
|
637
|
-
const errorStr = typeof errors === 'string' ? errors : JSON.stringify(errors);
|
|
638
|
-
if (errorStr && errorStr.length > 0 && !errorStr.includes('[]')) {
|
|
639
|
-
return {
|
|
640
|
-
valid: false,
|
|
641
|
-
issue: `Browser errors detected in ${htmlFile}: ${errorStr}. Fix these errors before delivery.`
|
|
642
|
-
};
|
|
643
|
-
}
|
|
644
|
-
return { valid: true };
|
|
599
|
+
async validateWithShell(requirements, workerResult) {
|
|
600
|
+
const issues = [];
|
|
601
|
+
const shellTool = this.findTool('shell_exec', 'run_command', 'bash', 'execute');
|
|
602
|
+
if (!shellTool) {
|
|
603
|
+
logger.debug('[Client] No shell tool available for THOROUGH shell validation — skipping');
|
|
604
|
+
return { issues };
|
|
645
605
|
}
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
606
|
+
for (const req of requirements) {
|
|
607
|
+
if ((req.type === 'file_creation' || req.type === 'file_modification') && req.filePath) {
|
|
608
|
+
try {
|
|
609
|
+
const result = await this.mcpClient.executeTool(shellTool, {
|
|
610
|
+
command: `wc -c "${req.filePath}" 2>&1`,
|
|
611
|
+
});
|
|
612
|
+
const resultStr = String(result);
|
|
613
|
+
if (resultStr.includes('No such file') || resultStr.includes('cannot access')) {
|
|
614
|
+
issues.push(`Shell verification failed: ${req.filePath} does not exist on disk.`);
|
|
615
|
+
}
|
|
616
|
+
else {
|
|
617
|
+
const sizeMatch = resultStr.match(/^\s*(\d+)/);
|
|
618
|
+
if (sizeMatch && parseInt(sizeMatch[1], 10) === 0) {
|
|
619
|
+
issues.push(`File ${req.filePath} was created but is empty.`);
|
|
620
|
+
}
|
|
621
|
+
}
|
|
622
|
+
}
|
|
623
|
+
catch (error) {
|
|
624
|
+
logger.debug(`[Client] Shell validation error for ${req.filePath}: ${error}`);
|
|
625
|
+
}
|
|
626
|
+
}
|
|
627
|
+
if (req.type === 'testing') {
|
|
628
|
+
const hasTestOutput = workerResult.result.match(/passed|failed|error|PASS|FAIL|✓|✗|tests run|test suite/i);
|
|
629
|
+
if (!hasTestOutput) {
|
|
630
|
+
logger.debug('[Client] THOROUGH: testing requirement but no test output detected in worker result');
|
|
631
|
+
}
|
|
632
|
+
}
|
|
650
633
|
}
|
|
634
|
+
return { issues };
|
|
651
635
|
}
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
*/
|
|
636
|
+
// ─── Session Management ─────────────────────────────────────────────────────
|
|
637
|
+
/** Reset failure tracking (call at the start of each new conversation/session). */
|
|
655
638
|
resetFailureTracking() {
|
|
656
639
|
this.failureCount = 0;
|
|
657
640
|
}
|