jiva-core 0.3.42-dev.c44c725 → 0.3.43-dev.00a530a

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. package/dist/code/agent.d.ts +2 -0
  2. package/dist/code/agent.d.ts.map +1 -1
  3. package/dist/code/agent.js +11 -4
  4. package/dist/code/agent.js.map +1 -1
  5. package/dist/core/agent-interface.d.ts +5 -0
  6. package/dist/core/agent-interface.d.ts.map +1 -1
  7. package/dist/core/agent-spawner.d.ts.map +1 -1
  8. package/dist/core/agent-spawner.js +2 -1
  9. package/dist/core/agent-spawner.js.map +1 -1
  10. package/dist/core/conversation-manager.d.ts +17 -3
  11. package/dist/core/conversation-manager.d.ts.map +1 -1
  12. package/dist/core/conversation-manager.js +21 -8
  13. package/dist/core/conversation-manager.js.map +1 -1
  14. package/dist/core/dual-agent.d.ts +16 -0
  15. package/dist/core/dual-agent.d.ts.map +1 -1
  16. package/dist/core/dual-agent.js +35 -7
  17. package/dist/core/dual-agent.js.map +1 -1
  18. package/dist/core/manager-agent.d.ts.map +1 -1
  19. package/dist/core/manager-agent.js +19 -8
  20. package/dist/core/manager-agent.js.map +1 -1
  21. package/dist/core/worker-agent.d.ts.map +1 -1
  22. package/dist/core/worker-agent.js +59 -9
  23. package/dist/core/worker-agent.js.map +1 -1
  24. package/dist/evaluator/directive-adapter.d.ts +15 -0
  25. package/dist/evaluator/directive-adapter.d.ts.map +1 -0
  26. package/dist/evaluator/directive-adapter.js +75 -0
  27. package/dist/evaluator/directive-adapter.js.map +1 -0
  28. package/dist/evaluator/evaluator-agent.d.ts +35 -0
  29. package/dist/evaluator/evaluator-agent.d.ts.map +1 -0
  30. package/dist/evaluator/evaluator-agent.js +369 -0
  31. package/dist/evaluator/evaluator-agent.js.map +1 -0
  32. package/dist/evaluator/harness.d.ts +31 -0
  33. package/dist/evaluator/harness.d.ts.map +1 -0
  34. package/dist/evaluator/harness.js +70 -0
  35. package/dist/evaluator/harness.js.map +1 -0
  36. package/dist/evaluator/index.d.ts +47 -0
  37. package/dist/evaluator/index.d.ts.map +1 -0
  38. package/dist/evaluator/index.js +79 -0
  39. package/dist/evaluator/index.js.map +1 -0
  40. package/dist/evaluator/tools/agent-tools.d.ts +23 -0
  41. package/dist/evaluator/tools/agent-tools.d.ts.map +1 -0
  42. package/dist/evaluator/tools/agent-tools.js +111 -0
  43. package/dist/evaluator/tools/agent-tools.js.map +1 -0
  44. package/dist/evaluator/types.d.ts +82 -0
  45. package/dist/evaluator/types.d.ts.map +1 -0
  46. package/dist/evaluator/types.js +9 -0
  47. package/dist/evaluator/types.js.map +1 -0
  48. package/dist/interfaces/cli/index.js +36 -13
  49. package/dist/interfaces/cli/index.js.map +1 -1
  50. package/dist/interfaces/cli/repl.d.ts +6 -0
  51. package/dist/interfaces/cli/repl.d.ts.map +1 -1
  52. package/dist/interfaces/cli/repl.js +153 -28
  53. package/dist/interfaces/cli/repl.js.map +1 -1
  54. package/dist/interfaces/http/middleware/auth.d.ts.map +1 -1
  55. package/dist/interfaces/http/middleware/auth.js +2 -1
  56. package/dist/interfaces/http/middleware/auth.js.map +1 -1
  57. package/dist/interfaces/http/routes/chat.d.ts.map +1 -1
  58. package/dist/interfaces/http/routes/chat.js +78 -0
  59. package/dist/interfaces/http/routes/chat.js.map +1 -1
  60. package/dist/interfaces/http/routes/session.d.ts.map +1 -1
  61. package/dist/interfaces/http/routes/session.js +22 -0
  62. package/dist/interfaces/http/routes/session.js.map +1 -1
  63. package/dist/interfaces/http/session-manager.d.ts.map +1 -1
  64. package/dist/interfaces/http/session-manager.js +4 -0
  65. package/dist/interfaces/http/session-manager.js.map +1 -1
  66. package/dist/mcp/client.d.ts.map +1 -1
  67. package/dist/mcp/client.js +9 -1
  68. package/dist/mcp/client.js.map +1 -1
  69. package/dist/models/harmony.d.ts.map +1 -1
  70. package/dist/models/harmony.js +31 -1
  71. package/dist/models/harmony.js.map +1 -1
  72. package/dist/models/model-client.d.ts.map +1 -1
  73. package/dist/models/model-client.js +5 -3
  74. package/dist/models/model-client.js.map +1 -1
  75. package/dist/models/orchestrator.d.ts +6 -0
  76. package/dist/models/orchestrator.d.ts.map +1 -1
  77. package/dist/models/orchestrator.js +20 -6
  78. package/dist/models/orchestrator.js.map +1 -1
  79. package/dist/models/token-tracker.d.ts +72 -0
  80. package/dist/models/token-tracker.d.ts.map +1 -0
  81. package/dist/models/token-tracker.js +110 -0
  82. package/dist/models/token-tracker.js.map +1 -0
  83. package/dist/storage/types.d.ts +6 -0
  84. package/dist/storage/types.d.ts.map +1 -1
  85. package/dist/storage/types.js.map +1 -1
  86. package/package.json +2 -1
@@ -0,0 +1,35 @@
1
+ /**
2
+ * EvaluatorAgent — autonomous supervisor for Jiva tasks.
3
+ *
4
+ * Architecture mirrors CodeAgent's single tool-calling loop, but instead of
5
+ * writing code the evaluator reads workspace files, validates completion against
6
+ * the directive, and uses interact_with_agent to nudge the main agent when gaps
7
+ * are found.
8
+ *
9
+ * The evaluator has completely isolated LLM state — its own ModelOrchestrator,
10
+ * its own MCPServerManager, and its own message history. It never shares context
11
+ * with the agent it supervises.
12
+ */
13
+ import type { EvaluatorConfig, EvaluationContext, EvaluationResult } from './types.js';
14
+ export declare class EvaluatorAgent {
15
+ private orchestrator;
16
+ private mcpManager;
17
+ private workspace;
18
+ private targetAgent;
19
+ private maxIterations;
20
+ private maxCycles;
21
+ private virtualTools;
22
+ constructor(config: EvaluatorConfig);
23
+ /** Return token usage accumulated by the evaluator's isolated orchestrator. */
24
+ getOrchestratorTokenUsage(): import("../models/token-tracker.js").TokenUsageSnapshot;
25
+ /**
26
+ * Evaluate whether the main agent has completed the goal described in userMessage.
27
+ *
28
+ * The evaluator runs its own agentic loop: it reads files, assesses completion,
29
+ * sends nudges to the main agent via interact_with_agent, and re-checks until
30
+ * satisfied or the cycle limit is reached.
31
+ */
32
+ evaluate(ctx: EvaluationContext): Promise<EvaluationResult>;
33
+ private runEvaluationCycle;
34
+ }
35
+ //# sourceMappingURL=evaluator-agent.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"evaluator-agent.d.ts","sourceRoot":"","sources":["../../src/evaluator/evaluator-agent.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAUH,OAAO,KAAK,EACV,eAAe,EACf,iBAAiB,EACjB,gBAAgB,EAEjB,MAAM,YAAY,CAAC;AAiHpB,qBAAa,cAAc;IACzB,OAAO,CAAC,YAAY,CAAoB;IACxC,OAAO,CAAC,UAAU,CAAmB;IACrC,OAAO,CAAC,SAAS,CAAmB;IACpC,OAAO,CAAC,WAAW,CAA8C;IACjE,OAAO,CAAC,aAAa,CAAS;IAC9B,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,YAAY,CAAmB;gBAE3B,MAAM,EAAE,eAAe;IAUnC,+EAA+E;IAC/E,yBAAyB;IAIzB;;;;;;OAMG;IACG,QAAQ,CAAC,GAAG,EAAE,iBAAiB,GAAG,OAAO,CAAC,gBAAgB,CAAC;YAuFnD,kBAAkB;CA0MjC"}
@@ -0,0 +1,369 @@
1
+ /**
2
+ * EvaluatorAgent — autonomous supervisor for Jiva tasks.
3
+ *
4
+ * Architecture mirrors CodeAgent's single tool-calling loop, but instead of
5
+ * writing code the evaluator reads workspace files, validates completion against
6
+ * the directive, and uses interact_with_agent to nudge the main agent when gaps
7
+ * are found.
8
+ *
9
+ * The evaluator has completely isolated LLM state — its own ModelOrchestrator,
10
+ * its own MCPServerManager, and its own message history. It never shares context
11
+ * with the agent it supervises.
12
+ */
13
+ import { formatToolResult } from '../models/harmony.js';
14
+ import { logger } from '../utils/logger.js';
15
+ import { deriveEvaluatorDirective } from './directive-adapter.js';
16
+ import { EVALUATOR_VIRTUAL_TOOLS } from './tools/agent-tools.js';
17
+ // ─── Constants ────────────────────────────────────────────────────────────────
18
+ const DEFAULT_MAX_ITERATIONS = 30;
19
+ const DEFAULT_MAX_CYCLES = 5;
20
+ const DOOM_LOOP_THRESHOLD = 3;
21
+ // ─── Result parsing ───────────────────────────────────────────────────────────
22
+ /**
23
+ * Extract a structured EvaluationResult from the evaluator's final LLM response.
24
+ * Looks for a JSON code block; falls back to a best-effort parse.
25
+ */
26
+ /** Strip Harmony/XML tool-call markup so it doesn't pollute the summary text. */
27
+ function stripToolCallMarkup(text) {
28
+ return text
29
+ .replace(/<tool_call>[\s\S]*?<\/tool_call>/gi, '')
30
+ .replace(/<\|call\|>[\s\S]*?<\|return\|>/g, '')
31
+ .replace(/<\|channel\|>\w+/g, '')
32
+ .trim();
33
+ }
34
+ function parseEvaluationResult(text, nudgesSent, cyclesRan, evidence) {
35
+ // Strip any tool-call markup before attempting to parse
36
+ const stripped = stripToolCallMarkup(text);
37
+ try {
38
+ // Look for ```json ... ``` block (prefer stripped, fall back to raw)
39
+ const jsonMatch = stripped.match(/```json\s*([\s\S]+?)\s*```/i) ||
40
+ text.match(/```json\s*([\s\S]+?)\s*```/i);
41
+ const raw = jsonMatch ? jsonMatch[1] : stripped;
42
+ const parsed = JSON.parse(raw.trim());
43
+ return {
44
+ passed: Boolean(parsed.passed),
45
+ gaps: Array.isArray(parsed.gaps) ? parsed.gaps : [],
46
+ nudgesSent,
47
+ cyclesRan,
48
+ evidence,
49
+ summary: typeof parsed.summary === 'string' ? parsed.summary : stripped.substring(0, 300),
50
+ };
51
+ }
52
+ catch {
53
+ // Free-form response — determine pass/fail from keywords in the stripped text
54
+ const lower = stripped.toLowerCase();
55
+ const passed = lower.includes('evaluation passed') ||
56
+ lower.includes('all tasks complete') ||
57
+ lower.includes('work is complete') ||
58
+ lower.includes('"passed": true');
59
+ // If the stripped text is empty (was entirely tool calls) or too short to be
60
+ // a real verdict, treat this as inconclusive rather than a definitive failure.
61
+ const inconclusive = stripped.length < 20;
62
+ return {
63
+ passed: inconclusive ? false : passed,
64
+ gaps: inconclusive
65
+ ? ['Evaluation inconclusive — evaluator did not produce a final verdict']
66
+ : [],
67
+ nudgesSent,
68
+ cyclesRan,
69
+ evidence,
70
+ summary: inconclusive
71
+ ? 'The evaluator did not produce a parseable verdict. Review the agent logs for details.'
72
+ : stripped.substring(0, 300),
73
+ };
74
+ }
75
+ }
76
+ // ─── System prompt ────────────────────────────────────────────────────────────
77
+ function buildSystemPrompt(workspaceDir, evaluatorDirective) {
78
+ return `You are an autonomous evaluation agent operating in evaluation mode.
79
+ You DO NOT perform tasks — you VALIDATE whether tasks have been completed.
80
+
81
+ WORKSPACE: ${workspaceDir}
82
+ All relative paths are resolved relative to the workspace directory above.
83
+ Use absolute paths for all file operations.
84
+
85
+ YOUR EVALUATION APPROACH:
86
+ 1. Read workspace files to understand what has been produced.
87
+ 2. Compare the actual output against the requirements in your directive.
88
+ 3. If gaps exist, send targeted instructions to the main agent using interact_with_agent.
89
+ 4. Re-read files after each nudge to confirm corrections were applied.
90
+ 5. When you are satisfied (or exhausted your options), output your final JSON assessment.
91
+
92
+ TOOL SELECTION:
93
+ - To READ files → use the filesystem MCP tools (filesystem__read_file, filesystem__search_files, etc.)
94
+ - To SEND instructions to the main agent → use interact_with_agent
95
+ - To LIST available conversations → use list_agent_conversations
96
+ - To INSPECT conversation history → use get_conversation_history
97
+
98
+ FINAL OUTPUT — when evaluation is complete, respond with ONLY this JSON (no other text):
99
+ \`\`\`json
100
+ {
101
+ "passed": true | false,
102
+ "gaps": ["specific gap 1", "specific gap 2"],
103
+ "summary": "1-3 sentence assessment."
104
+ }
105
+ \`\`\`
106
+
107
+ ${evaluatorDirective}`;
108
+ }
109
+ // ─── EvaluatorAgent ───────────────────────────────────────────────────────────
110
+ export class EvaluatorAgent {
111
+ orchestrator;
112
+ mcpManager;
113
+ workspace;
114
+ targetAgent;
115
+ maxIterations;
116
+ maxCycles;
117
+ virtualTools;
118
+ constructor(config) {
119
+ this.orchestrator = config.orchestrator;
120
+ this.mcpManager = config.mcpManager;
121
+ this.workspace = config.workspace;
122
+ this.targetAgent = config.targetAgent;
123
+ this.maxIterations = config.maxIterationsPerCycle ?? DEFAULT_MAX_ITERATIONS;
124
+ this.maxCycles = config.maxEvaluationCycles ?? DEFAULT_MAX_CYCLES;
125
+ this.virtualTools = EVALUATOR_VIRTUAL_TOOLS;
126
+ }
127
+ /** Return token usage accumulated by the evaluator's isolated orchestrator. */
128
+ getOrchestratorTokenUsage() {
129
+ return this.orchestrator.getTokenUsage();
130
+ }
131
+ /**
132
+ * Evaluate whether the main agent has completed the goal described in userMessage.
133
+ *
134
+ * The evaluator runs its own agentic loop: it reads files, assesses completion,
135
+ * sends nudges to the main agent via interact_with_agent, and re-checks until
136
+ * satisfied or the cycle limit is reached.
137
+ */
138
+ async evaluate(ctx) {
139
+ logger.info('[Evaluator] Starting evaluation');
140
+ // Optionally load a specific conversation into the target agent
141
+ if (ctx.targetConversationId) {
142
+ logger.info(`[Evaluator] Loading conversation: ${ctx.targetConversationId}`);
143
+ await this.targetAgent.loadConversation(ctx.targetConversationId);
144
+ }
145
+ let nudgesSent = 0;
146
+ let cyclesRan = 0;
147
+ const evidence = [];
148
+ // The directive for the evaluator — wraps the workspace directive with evaluation framing
149
+ const rawDirective = this.workspace.getDirectivePrompt() ?? '';
150
+ const evaluatorDirective = deriveEvaluatorDirective(rawDirective);
151
+ const workspaceDir = this.workspace.getWorkspaceDir();
152
+ const systemPrompt = buildSystemPrompt(workspaceDir, evaluatorDirective);
153
+ // Build the unified tool list: MCP tools + virtual tools
154
+ const mcpToolDefs = this.mcpManager
155
+ .getClient()
156
+ .getAllTools()
157
+ .map((t) => ({
158
+ name: t.name,
159
+ description: t.description,
160
+ parameters: t.inputSchema ?? { type: 'object', properties: {} },
161
+ }));
162
+ const virtualToolDefs = this.virtualTools.map((t) => ({
163
+ name: t.name,
164
+ description: t.description,
165
+ parameters: t.parameters,
166
+ }));
167
+ const allToolDefs = [...mcpToolDefs, ...virtualToolDefs];
168
+ // Evaluation loop — each cycle runs the full agentic loop once.
169
+ // The evaluator terminates early if it produces a JSON result with "passed": true.
170
+ let lastResult = null;
171
+ for (let cycle = 0; cycle < this.maxCycles; cycle++) {
172
+ cyclesRan = cycle + 1;
173
+ logger.info(`[Evaluator] Cycle ${cyclesRan}/${this.maxCycles}`);
174
+ const cycleResult = await this.runEvaluationCycle({
175
+ systemPrompt,
176
+ userMessage: ctx.userMessage,
177
+ allToolDefs,
178
+ workspaceDir,
179
+ nudgesSentRef: { value: nudgesSent },
180
+ evidence,
181
+ });
182
+ nudgesSent = cycleResult.nudgesSentAfter;
183
+ lastResult = parseEvaluationResult(cycleResult.finalText, nudgesSent, cyclesRan, [...evidence]);
184
+ logger.info(`[Evaluator] Cycle ${cyclesRan} complete — passed: ${lastResult.passed}, gaps: ${lastResult.gaps.length}`);
185
+ if (lastResult.passed) {
186
+ break;
187
+ }
188
+ }
189
+ if (!lastResult) {
190
+ return {
191
+ passed: false,
192
+ gaps: ['Evaluation could not be completed — max cycles reached without a result'],
193
+ nudgesSent,
194
+ cyclesRan,
195
+ evidence,
196
+ summary: 'Evaluation did not produce a result within the allowed cycle budget.',
197
+ };
198
+ }
199
+ return lastResult;
200
+ }
201
+ // ─── Private: single cycle ──────────────────────────────────────────────────
202
+ async runEvaluationCycle(params) {
203
+ const { systemPrompt, userMessage, allToolDefs, workspaceDir, nudgesSentRef, evidence } = params;
204
+ // Fresh isolated message history for each cycle
205
+ const messages = [
206
+ { role: 'developer', content: systemPrompt },
207
+ {
208
+ role: 'user',
209
+ content: `Evaluate whether the following goal has been fully and correctly completed:\n\n` +
210
+ `GOAL: ${userMessage}\n\n` +
211
+ `Start by reading the relevant workspace files, then assess completion against the ` +
212
+ `original directive. If gaps exist, use interact_with_agent to guide the main agent. ` +
213
+ `When done, output your JSON assessment.`,
214
+ },
215
+ ];
216
+ const toolContext = {
217
+ workspaceDir,
218
+ targetAgent: this.targetAgent,
219
+ onNudgeSent: () => {
220
+ nudgesSentRef.value++;
221
+ },
222
+ onEvidenceFound: (path) => {
223
+ if (!evidence.includes(path))
224
+ evidence.push(path);
225
+ },
226
+ };
227
+ // Doom loop detection
228
+ const recentCalls = [];
229
+ let consecutiveApiErrors = 0;
230
+ const MAX_API_ERRORS = 3;
231
+ let finalText = '';
232
+ let iterations = 0;
233
+ for (let i = 0; i < this.maxIterations; i++) {
234
+ iterations = i + 1;
235
+ const iterPct = i / this.maxIterations;
236
+ const isFinalPhase = iterPct >= 0.90;
237
+ // Near-limit nudge
238
+ if (iterPct >= 0.80 && iterPct < 0.90 && i > 0) {
239
+ const alreadyNudged = messages.some((m) => typeof m.content === 'string' && m.content.includes('FINAL PHASE'));
240
+ if (!alreadyNudged) {
241
+ messages.push({
242
+ role: 'user',
243
+ content: `You are approaching the iteration limit (${iterations}/${this.maxIterations}). ` +
244
+ `Finish any remaining checks and produce your final JSON assessment.`,
245
+ });
246
+ }
247
+ }
248
+ if (isFinalPhase) {
249
+ const alreadyStripped = messages.some((m) => typeof m.content === 'string' && m.content.includes('FINAL PHASE'));
250
+ if (!alreadyStripped) {
251
+ messages.push({
252
+ role: 'user',
253
+ content: 'FINAL PHASE: Tools are disabled. You MUST respond with ONLY the JSON assessment block now.\n\n' +
254
+ '```json\n{"passed": true|false, "gaps": [], "summary": "..."}\n```',
255
+ });
256
+ }
257
+ }
258
+ let response;
259
+ try {
260
+ response = await this.orchestrator.chatWithFallback({
261
+ messages,
262
+ tools: isFinalPhase ? [] : allToolDefs,
263
+ temperature: 0.1,
264
+ }, false);
265
+ consecutiveApiErrors = 0;
266
+ }
267
+ catch (error) {
268
+ consecutiveApiErrors++;
269
+ const msg = error instanceof Error ? error.message : String(error);
270
+ logger.error(`[Evaluator] Model error (${consecutiveApiErrors}/${MAX_API_ERRORS}): ${msg}`);
271
+ if (consecutiveApiErrors >= MAX_API_ERRORS) {
272
+ finalText = '[Evaluation aborted due to repeated model errors]';
273
+ break;
274
+ }
275
+ // Add placeholder so conversation structure remains valid
276
+ const lastMsg = messages[messages.length - 1];
277
+ if (lastMsg && lastMsg.role !== 'assistant') {
278
+ messages.push({ role: 'assistant', content: '' });
279
+ }
280
+ messages.push({
281
+ role: 'user',
282
+ content: 'The previous request failed. Please continue the evaluation and output your JSON assessment.',
283
+ });
284
+ continue;
285
+ }
286
+ // Record assistant message
287
+ if (response.toolCalls && response.toolCalls.length > 0) {
288
+ messages.push({
289
+ role: 'assistant',
290
+ content: response.content || null,
291
+ tool_calls: response.toolCalls,
292
+ });
293
+ }
294
+ else {
295
+ messages.push({ role: 'assistant', content: response.content });
296
+ }
297
+ // No tool calls → evaluator is done
298
+ if (!response.toolCalls || response.toolCalls.length === 0) {
299
+ finalText = response.content || '[No evaluation response]';
300
+ break;
301
+ }
302
+ // Execute tool calls
303
+ for (const toolCall of response.toolCalls) {
304
+ const toolName = toolCall.function.name;
305
+ let toolArgs = {};
306
+ try {
307
+ toolArgs = JSON.parse(toolCall.function.arguments);
308
+ }
309
+ catch {
310
+ // malformed args — continue with empty args
311
+ }
312
+ // Doom loop guard
313
+ const callSig = `${toolName}:${JSON.stringify(toolArgs)}`;
314
+ recentCalls.push(callSig);
315
+ if (recentCalls.length > DOOM_LOOP_THRESHOLD)
316
+ recentCalls.shift();
317
+ if (recentCalls.length === DOOM_LOOP_THRESHOLD &&
318
+ recentCalls.every((c) => c === recentCalls[0])) {
319
+ logger.warn(`[Evaluator] Doom loop detected for tool: ${toolName}`);
320
+ messages.push({
321
+ role: 'user',
322
+ content: `STOP: You are calling \`${toolName}\` with the same arguments repeatedly. ` +
323
+ `Move on and output your final JSON assessment based on what you have found so far.`,
324
+ });
325
+ break;
326
+ }
327
+ logger.info(`[Evaluator] Tool: ${toolName}`);
328
+ let toolResult;
329
+ // Check virtual tools first, then fall through to MCP
330
+ const virtualTool = this.virtualTools.find((t) => t.name === toolName);
331
+ if (virtualTool) {
332
+ try {
333
+ toolResult = await virtualTool.execute(toolArgs, toolContext);
334
+ }
335
+ catch (e) {
336
+ toolResult = `Error executing ${toolName}: ${e instanceof Error ? e.message : String(e)}`;
337
+ }
338
+ }
339
+ else {
340
+ // MCP tool
341
+ try {
342
+ const result = await this.mcpManager.getClient().executeTool(toolName, toolArgs);
343
+ toolResult = typeof result === 'string' ? result : JSON.stringify(result);
344
+ // Track filesystem reads as evidence
345
+ if (toolName.includes('read') ||
346
+ toolName.includes('search') ||
347
+ toolName.includes('list')) {
348
+ const pathArg = toolArgs.path ||
349
+ toolArgs.query ||
350
+ toolArgs.pattern;
351
+ if (pathArg)
352
+ toolContext.onEvidenceFound(pathArg);
353
+ }
354
+ }
355
+ catch (e) {
356
+ toolResult = `Error executing ${toolName}: ${e instanceof Error ? e.message : String(e)}`;
357
+ }
358
+ }
359
+ const toolMessage = formatToolResult(toolCall.id, toolName, toolResult);
360
+ messages.push(toolMessage);
361
+ }
362
+ }
363
+ if (!finalText) {
364
+ finalText = `[Evaluator reached max iterations (${iterations}) without producing a final assessment]`;
365
+ }
366
+ return { finalText, nudgesSentAfter: nudgesSentRef.value };
367
+ }
368
+ }
369
+ //# sourceMappingURL=evaluator-agent.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"evaluator-agent.js","sourceRoot":"","sources":["../../src/evaluator/evaluator-agent.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAKH,OAAO,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AAExD,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAC5C,OAAO,EAAE,wBAAwB,EAAE,MAAM,wBAAwB,CAAC;AAClE,OAAO,EAAE,uBAAuB,EAAuB,MAAM,wBAAwB,CAAC;AAQtF,iFAAiF;AAEjF,MAAM,sBAAsB,GAAG,EAAE,CAAC;AAClC,MAAM,kBAAkB,GAAG,CAAC,CAAC;AAC7B,MAAM,mBAAmB,GAAG,CAAC,CAAC;AAE9B,iFAAiF;AAEjF;;;GAGG;AACH,iFAAiF;AACjF,SAAS,mBAAmB,CAAC,IAAY;IACvC,OAAO,IAAI;SACR,OAAO,CAAC,oCAAoC,EAAE,EAAE,CAAC;SACjD,OAAO,CAAC,iCAAiC,EAAE,EAAE,CAAC;SAC9C,OAAO,CAAC,mBAAmB,EAAE,EAAE,CAAC;SAChC,IAAI,EAAE,CAAC;AACZ,CAAC;AAED,SAAS,qBAAqB,CAC5B,IAAY,EACZ,UAAkB,EAClB,SAAiB,EACjB,QAAkB;IAElB,wDAAwD;IACxD,MAAM,QAAQ,GAAG,mBAAmB,CAAC,IAAI,CAAC,CAAC;IAE3C,IAAI,CAAC;QACH,qEAAqE;QACrE,MAAM,SAAS,GACb,QAAQ,CAAC,KAAK,CAAC,6BAA6B,CAAC;YAC7C,IAAI,CAAC,KAAK,CAAC,6BAA6B,CAAC,CAAC;QAC5C,MAAM,GAAG,GAAG,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC;QAChD,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,CAAC;QAEtC,OAAO;YACL,MAAM,EAAE,OAAO,CAAC,MAAM,CAAC,MAAM,CAAC;YAC9B,IAAI,EAAE,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE;YACnD,UAAU;YACV,SAAS;YACT,QAAQ;YACR,OAAO,EAAE,OAAO,MAAM,CAAC,OAAO,KAAK,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC,EAAE,GAAG,CAAC;SAC1F,CAAC;IACJ,CAAC;IAAC,MAAM,CAAC;QACP,8EAA8E;QAC9E,MAAM,KAAK,GAAG,QAAQ,CAAC,WAAW,EAAE,CAAC;QACrC,MAAM,MAAM,GACV,KAAK,CAAC,QAAQ,CAAC,mBAAmB,CAAC;YACnC,KAAK,CAAC,QAAQ,CAAC,oBAAoB,CAAC;YACpC,KAAK,CAAC,QAAQ,CAAC,kBAAkB,CAAC;YAClC,KAAK,CAAC,QAAQ,CAAC,gBAAgB,CAAC,CAAC;QAEnC,6EAA6E;QAC7E,+EAA+E;QAC/E,MAAM,YAAY,GAAG,QAAQ,CAAC,MAAM,GAAG,EAAE,CAAC;QAE1C,OAAO;YACL,MAAM,EAAE,YAAY,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM;YACrC,IAAI,EAAE,YAAY;gBAChB,CAAC,CAAC,CAAC,qEAAqE,CAAC;gBACzE,CAAC,CAAC,EAAE;YACN,UAAU;YACV,SAAS;YACT,QAAQ;YACR,OAAO,EAAE,YAAY;gBACnB,CAAC,CAAC,uFAAuF;gBACzF,CAAC,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC,EAAE,GAAG,CAAC;SAC/B,CAAC;IACJ,CAAC;AACH,CAAC;AAED,iFAAiF;AAEjF,SAAS,iBAAiB,CAAC,YAAoB,EAAE,kBAA0B;IACzE,OAAO;;;aAGI,YAAY;;;;;;;;;;;;;;;;;;;;;;;;;;EA0BvB,kBAAkB,EAAE,CAAC;AACvB,CAAC;AAED,iFAAiF;AAEjF,MAAM,OAAO,cAAc;IACjB,YAAY,CAAoB;IAChC,UAAU,CAAmB;IAC7B,SAAS,CAAmB;IAC5B,WAAW,CAA8C;IACzD,aAAa,CAAS;IACtB,SAAS,CAAS;IAClB,YAAY,CAAmB;IAEvC,YAAY,MAAuB;QACjC,IAAI,CAAC,YAAY,GAAG,MAAM,CAAC,YAAY,CAAC;QACxC,IAAI,CAAC,UAAU,GAAG,MAAM,CAAC,UAAU,CAAC;QACpC,IAAI,CAAC,SAAS,GAAG,MAAM,CAAC,SAAS,CAAC;QAClC,IAAI,CAAC,WAAW,GAAG,MAAM,CAAC,WAAW,CAAC;QACtC,IAAI,CAAC,aAAa,GAAG,MAAM,CAAC,qBAAqB,IAAI,sBAAsB,CAAC;QAC5E,IAAI,CAAC,SAAS,GAAG,MAAM,CAAC,mBAAmB,IAAI,kBAAkB,CAAC;QAClE,IAAI,CAAC,YAAY,GAAG,uBAAuB,CAAC;IAC9C,CAAC;IAED,+EAA+E;IAC/E,yBAAyB;QACvB,OAAO,IAAI,CAAC,YAAY,CAAC,aAAa,EAAE,CAAC;IAC3C,CAAC;IAED;;;;;;OAMG;IACH,KAAK,CAAC,QAAQ,CAAC,GAAsB;QACnC,MAAM,CAAC,IAAI,CAAC,iCAAiC,CAAC,CAAC;QAE/C,gEAAgE;QAChE,IAAI,GAAG,CAAC,oBAAoB,EAAE,CAAC;YAC7B,MAAM,CAAC,IAAI,CAAC,qCAAqC,GAAG,CAAC,oBAAoB,EAAE,CAAC,CAAC;YAC7E,MAAM,IAAI,CAAC,WAAW,CAAC,gBAAgB,CAAC,GAAG,CAAC,oBAAoB,CAAC,CAAC;QACpE,CAAC;QAED,IAAI,UAAU,GAAG,CAAC,CAAC;QACnB,IAAI,SAAS,GAAG,CAAC,CAAC;QAClB,MAAM,QAAQ,GAAa,EAAE,CAAC;QAE9B,0FAA0F;QAC1F,MAAM,YAAY,GAAG,IAAI,CAAC,SAAS,CAAC,kBAAkB,EAAE,IAAI,EAAE,CAAC;QAC/D,MAAM,kBAAkB,GAAG,wBAAwB,CAAC,YAAY,CAAC,CAAC;QAClE,MAAM,YAAY,GAAG,IAAI,CAAC,SAAS,CAAC,eAAe,EAAE,CAAC;QACtD,MAAM,YAAY,GAAG,iBAAiB,CAAC,YAAY,EAAE,kBAAkB,CAAC,CAAC;QAEzE,yDAAyD;QACzD,MAAM,WAAW,GAAG,IAAI,CAAC,UAAU;aAChC,SAAS,EAAE;aACX,WAAW,EAAE;aACb,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;YACX,IAAI,EAAE,CAAC,CAAC,IAAI;YACZ,WAAW,EAAE,CAAC,CAAC,WAAW;YAC1B,UAAU,EAAG,CAAS,CAAC,WAAW,IAAI,EAAE,IAAI,EAAE,QAAiB,EAAE,UAAU,EAAE,EAAE,EAAE;SAClF,CAAC,CAAW,CAAC;QAEhB,MAAM,eAAe,GAAW,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;YAC5D,IAAI,EAAE,CAAC,CAAC,IAAI;YACZ,WAAW,EAAE,CAAC,CAAC,WAAW;YAC1B,UAAU,EAAE,CAAC,CAAC,UAAU;SACzB,CAAC,CAAC,CAAC;QAEJ,MAAM,WAAW,GAAW,CAAC,GAAG,WAAW,EAAE,GAAG,eAAe,CAAC,CAAC;QAEjE,gEAAgE;QAChE,mFAAmF;QACnF,IAAI,UAAU,GAA4B,IAAI,CAAC;QAE/C,KAAK,IAAI,KAAK,GAAG,CAAC,EAAE,KAAK,GAAG,IAAI,CAAC,SAAS,EAAE,KAAK,EAAE,EAAE,CAAC;YACpD,SAAS,GAAG,KAAK,GAAG,CAAC,CAAC;YACtB,MAAM,CAAC,IAAI,CAAC,qBAAqB,SAAS,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC,CAAC;YAEhE,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,kBAAkB,CAAC;gBAChD,YAAY;gBACZ,WAAW,EAAE,GAAG,CAAC,WAAW;gBAC5B,WAAW;gBACX,YAAY;gBACZ,aAAa,EAAE,EAAE,KAAK,EAAE,UAAU,EAAE;gBACpC,QAAQ;aACT,CAAC,CAAC;YAEH,UAAU,GAAG,WAAW,CAAC,eAAe,CAAC;YACzC,UAAU,GAAG,qBAAqB,CAChC,WAAW,CAAC,SAAS,EACrB,UAAU,EACV,SAAS,EACT,CAAC,GAAG,QAAQ,CAAC,CACd,CAAC;YAEF,MAAM,CAAC,IAAI,CACT,qBAAqB,SAAS,uBAAuB,UAAU,CAAC,MAAM,WAAW,UAAU,CAAC,IAAI,CAAC,MAAM,EAAE,CAC1G,CAAC;YAEF,IAAI,UAAU,CAAC,MAAM,EAAE,CAAC;gBACtB,MAAM;YACR,CAAC;QACH,CAAC;QAED,IAAI,CAAC,UAAU,EAAE,CAAC;YAChB,OAAO;gBACL,MAAM,EAAE,KAAK;gBACb,IAAI,EAAE,CAAC,yEAAyE,CAAC;gBACjF,UAAU;gBACV,SAAS;gBACT,QAAQ;gBACR,OAAO,EAAE,sEAAsE;aAChF,CAAC;QACJ,CAAC;QAED,OAAO,UAAU,CAAC;IACpB,CAAC;IAED,+EAA+E;IAEvE,KAAK,CAAC,kBAAkB,CAAC,MAOhC;QACC,MAAM,EAAE,YAAY,EAAE,WAAW,EAAE,WAAW,EAAE,YAAY,EAAE,aAAa,EAAE,QAAQ,EAAE,GACrF,MAAM,CAAC;QAET,gDAAgD;QAChD,MAAM,QAAQ,GAAc;YAC1B,EAAE,IAAI,EAAE,WAAkB,EAAE,OAAO,EAAE,YAAY,EAAE;YACnD;gBACE,IAAI,EAAE,MAAM;gBACZ,OAAO,EACL,iFAAiF;oBACjF,SAAS,WAAW,MAAM;oBAC1B,oFAAoF;oBACpF,sFAAsF;oBACtF,yCAAyC;aAC5C;SACF,CAAC;QAEF,MAAM,WAAW,GAAyB;YACxC,YAAY;YACZ,WAAW,EAAE,IAAI,CAAC,WAAW;YAC7B,WAAW,EAAE,GAAG,EAAE;gBAChB,aAAa,CAAC,KAAK,EAAE,CAAC;YACxB,CAAC;YACD,eAAe,EAAE,CAAC,IAAY,EAAE,EAAE;gBAChC,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC;oBAAE,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACpD,CAAC;SACF,CAAC;QAEF,sBAAsB;QACtB,MAAM,WAAW,GAAa,EAAE,CAAC;QACjC,IAAI,oBAAoB,GAAG,CAAC,CAAC;QAC7B,MAAM,cAAc,GAAG,CAAC,CAAC;QACzB,IAAI,SAAS,GAAG,EAAE,CAAC;QACnB,IAAI,UAAU,GAAG,CAAC,CAAC;QAEnB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,aAAa,EAAE,CAAC,EAAE,EAAE,CAAC;YAC5C,UAAU,GAAG,CAAC,GAAG,CAAC,CAAC;YACnB,MAAM,OAAO,GAAG,CAAC,GAAG,IAAI,CAAC,aAAa,CAAC;YACvC,MAAM,YAAY,GAAG,OAAO,IAAI,IAAI,CAAC;YAErC,mBAAmB;YACnB,IAAI,OAAO,IAAI,IAAI,IAAI,OAAO,GAAG,IAAI,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;gBAC/C,MAAM,aAAa,GAAG,QAAQ,CAAC,IAAI,CACjC,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,CAAC,OAAO,KAAK,QAAQ,IAAI,CAAC,CAAC,OAAO,CAAC,QAAQ,CAAC,aAAa,CAAC,CAC1E,CAAC;gBACF,IAAI,CAAC,aAAa,EAAE,CAAC;oBACnB,QAAQ,CAAC,IAAI,CAAC;wBACZ,IAAI,EAAE,MAAM;wBACZ,OAAO,EACL,4CAA4C,UAAU,IAAI,IAAI,CAAC,aAAa,KAAK;4BACjF,qEAAqE;qBACxE,CAAC,CAAC;gBACL,CAAC;YACH,CAAC;YAED,IAAI,YAAY,EAAE,CAAC;gBACjB,MAAM,eAAe,GAAG,QAAQ,CAAC,IAAI,CACnC,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,CAAC,OAAO,KAAK,QAAQ,IAAI,CAAC,CAAC,OAAO,CAAC,QAAQ,CAAC,aAAa,CAAC,CAC1E,CAAC;gBACF,IAAI,CAAC,eAAe,EAAE,CAAC;oBACrB,QAAQ,CAAC,IAAI,CAAC;wBACZ,IAAI,EAAE,MAAM;wBACZ,OAAO,EACL,gGAAgG;4BAChG,oEAAoE;qBACvE,CAAC,CAAC;gBACL,CAAC;YACH,CAAC;YAED,IAAI,QAAQ,CAAC;YACb,IAAI,CAAC;gBACH,QAAQ,GAAG,MAAM,IAAI,CAAC,YAAY,CAAC,gBAAgB,CACjD;oBACE,QAAQ;oBACR,KAAK,EAAE,YAAY,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,WAAW;oBACtC,WAAW,EAAE,GAAG;iBACjB,EACD,KAAK,CACN,CAAC;gBACF,oBAAoB,GAAG,CAAC,CAAC;YAC3B,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,oBAAoB,EAAE,CAAC;gBACvB,MAAM,GAAG,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;gBACnE,MAAM,CAAC,KAAK,CAAC,4BAA4B,oBAAoB,IAAI,cAAc,MAAM,GAAG,EAAE,CAAC,CAAC;gBAE5F,IAAI,oBAAoB,IAAI,cAAc,EAAE,CAAC;oBAC3C,SAAS,GAAG,mDAAmD,CAAC;oBAChE,MAAM;gBACR,CAAC;gBAED,0DAA0D;gBAC1D,MAAM,OAAO,GAAG,QAAQ,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;gBAC9C,IAAI,OAAO,IAAI,OAAO,CAAC,IAAI,KAAK,WAAW,EAAE,CAAC;oBAC5C,QAAQ,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,WAAW,EAAE,OAAO,EAAE,EAAE,EAAE,CAAC,CAAC;gBACpD,CAAC;gBACD,QAAQ,CAAC,IAAI,CAAC;oBACZ,IAAI,EAAE,MAAM;oBACZ,OAAO,EAAE,8FAA8F;iBACxG,CAAC,CAAC;gBACH,SAAS;YACX,CAAC;YAED,2BAA2B;YAC3B,IAAI,QAAQ,CAAC,SAAS,IAAI,QAAQ,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACxD,QAAQ,CAAC,IAAI,CAAC;oBACZ,IAAI,EAAE,WAAW;oBACjB,OAAO,EAAE,QAAQ,CAAC,OAAO,IAAI,IAAI;oBACjC,UAAU,EAAE,QAAQ,CAAC,SAAS;iBACxB,CAAC,CAAC;YACZ,CAAC;iBAAM,CAAC;gBACN,QAAQ,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,WAAW,EAAE,OAAO,EAAE,QAAQ,CAAC,OAAO,EAAE,CAAC,CAAC;YAClE,CAAC;YAED,oCAAoC;YACpC,IAAI,CAAC,QAAQ,CAAC,SAAS,IAAI,QAAQ,CAAC,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBAC3D,SAAS,GAAG,QAAQ,CAAC,OAAO,IAAI,0BAA0B,CAAC;gBAC3D,MAAM;YACR,CAAC;YAED,qBAAqB;YACrB,KAAK,MAAM,QAAQ,IAAI,QAAQ,CAAC,SAAS,EAAE,CAAC;gBAC1C,MAAM,QAAQ,GAAG,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC;gBACxC,IAAI,QAAQ,GAA4B,EAAE,CAAC;gBAC3C,IAAI,CAAC;oBACH,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC;gBACrD,CAAC;gBAAC,MAAM,CAAC;oBACP,4CAA4C;gBAC9C,CAAC;gBAED,kBAAkB;gBAClB,MAAM,OAAO,GAAG,GAAG,QAAQ,IAAI,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,EAAE,CAAC;gBAC1D,WAAW,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;gBAC1B,IAAI,WAAW,CAAC,MAAM,GAAG,mBAAmB;oBAAE,WAAW,CAAC,KAAK,EAAE,CAAC;gBAElE,IACE,WAAW,CAAC,MAAM,KAAK,mBAAmB;oBAC1C,WAAW,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,KAAK,WAAW,CAAC,CAAC,CAAC,CAAC,EAC9C,CAAC;oBACD,MAAM,CAAC,IAAI,CAAC,4CAA4C,QAAQ,EAAE,CAAC,CAAC;oBACpE,QAAQ,CAAC,IAAI,CAAC;wBACZ,IAAI,EAAE,MAAM;wBACZ,OAAO,EAAE,2BAA2B,QAAQ,yCAAyC;4BACnF,oFAAoF;qBACvF,CAAC,CAAC;oBACH,MAAM;gBACR,CAAC;gBAED,MAAM,CAAC,IAAI,CAAC,qBAAqB,QAAQ,EAAE,CAAC,CAAC;gBAE7C,IAAI,UAAkB,CAAC;gBAEvB,sDAAsD;gBACtD,MAAM,WAAW,GAAG,IAAI,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,QAAQ,CAAC,CAAC;gBAEvE,IAAI,WAAW,EAAE,CAAC;oBAChB,IAAI,CAAC;wBACH,UAAU,GAAG,MAAM,WAAW,CAAC,OAAO,CAAC,QAAQ,EAAE,WAAW,CAAC,CAAC;oBAChE,CAAC;oBAAC,OAAO,CAAC,EAAE,CAAC;wBACX,UAAU,GAAG,mBAAmB,QAAQ,KAAK,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC;oBAC5F,CAAC;gBACH,CAAC;qBAAM,CAAC;oBACN,WAAW;oBACX,IAAI,CAAC;wBACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,SAAS,EAAE,CAAC,WAAW,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;wBACjF,UAAU,GAAG,OAAO,MAAM,KAAK,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;wBAE1E,qCAAqC;wBACrC,IACE,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC;4BACzB,QAAQ,CAAC,QAAQ,CAAC,QAAQ,CAAC;4BAC3B,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC,EACzB,CAAC;4BACD,MAAM,OAAO,GACV,QAAQ,CAAC,IAAe;gCACxB,QAAQ,CAAC,KAAgB;gCACzB,QAAQ,CAAC,OAAkB,CAAC;4BAC/B,IAAI,OAAO;gCAAE,WAAW,CAAC,eAAe,CAAC,OAAO,CAAC,CAAC;wBACpD,CAAC;oBACH,CAAC;oBAAC,OAAO,CAAC,EAAE,CAAC;wBACX,UAAU,GAAG,mBAAmB,QAAQ,KAAK,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC;oBAC5F,CAAC;gBACH,CAAC;gBAED,MAAM,WAAW,GAAG,gBAAgB,CAAC,QAAQ,CAAC,EAAE,EAAE,QAAQ,EAAE,UAAU,CAAC,CAAC;gBACxE,QAAQ,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;YAC7B,CAAC;QACH,CAAC;QAED,IAAI,CAAC,SAAS,EAAE,CAAC;YACf,SAAS,GAAG,sCAAsC,UAAU,yCAAyC,CAAC;QACxG,CAAC;QAED,OAAO,EAAE,SAAS,EAAE,eAAe,EAAE,aAAa,CAAC,KAAK,EAAE,CAAC;IAC7D,CAAC;CACF"}
@@ -0,0 +1,31 @@
1
+ /**
2
+ * EvaluatorHarness — coordinates the main Jiva agent and the evaluator.
3
+ *
4
+ * Flow per user message:
5
+ * 1. Run the main agent (mainAgent.chat(userMessage))
6
+ * 2. Run the evaluator (evaluatorAgent.evaluate({ userMessage }))
7
+ * — the evaluator reads workspace files, identifies gaps, and nudges the
8
+ * main agent as needed via its interact_with_agent tool
9
+ * 3. Return a HarnessResult containing both the main response and the
10
+ * evaluation outcome
11
+ */
12
+ import { EvaluatorAgent } from './evaluator-agent.js';
13
+ import type { IAgent } from '../core/agent-interface.js';
14
+ import type { EvaluationContext, HarnessOptions, HarnessResult } from './types.js';
15
+ export declare class EvaluatorHarness {
16
+ private mainAgent;
17
+ private evaluatorAgent;
18
+ private verbose;
19
+ constructor(mainAgent: IAgent, evaluatorAgent: EvaluatorAgent, options?: HarnessOptions);
20
+ /**
21
+ * Process a user message through the harness:
22
+ * 1. Main agent processes the request.
23
+ * 2. Evaluator validates and guides completion.
24
+ */
25
+ run(userMessage: string, evalCtx?: Partial<EvaluationContext>): Promise<HarnessResult>;
26
+ /** Stop both agents cooperatively. */
27
+ stop(): void;
28
+ /** Cleanup both agents' resources. */
29
+ cleanup(): Promise<void>;
30
+ }
31
+ //# sourceMappingURL=harness.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"harness.d.ts","sourceRoot":"","sources":["../../src/evaluator/harness.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAGH,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AACtD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,4BAA4B,CAAC;AACzD,OAAO,KAAK,EAAE,iBAAiB,EAAE,cAAc,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAEnF,qBAAa,gBAAgB;IAC3B,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,cAAc,CAAiB;IACvC,OAAO,CAAC,OAAO,CAAU;gBAGvB,SAAS,EAAE,MAAM,EACjB,cAAc,EAAE,cAAc,EAC9B,OAAO,GAAE,cAAmB;IAO9B;;;;OAIG;IACG,GAAG,CAAC,WAAW,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,OAAO,CAAC,iBAAiB,CAAC,GAAG,OAAO,CAAC,aAAa,CAAC;IA2C5F,sCAAsC;IACtC,IAAI,IAAI,IAAI;IAIZ,sCAAsC;IAChC,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CAI/B"}
@@ -0,0 +1,70 @@
1
+ /**
2
+ * EvaluatorHarness — coordinates the main Jiva agent and the evaluator.
3
+ *
4
+ * Flow per user message:
5
+ * 1. Run the main agent (mainAgent.chat(userMessage))
6
+ * 2. Run the evaluator (evaluatorAgent.evaluate({ userMessage }))
7
+ * — the evaluator reads workspace files, identifies gaps, and nudges the
8
+ * main agent as needed via its interact_with_agent tool
9
+ * 3. Return a HarnessResult containing both the main response and the
10
+ * evaluation outcome
11
+ */
12
+ import { logger } from '../utils/logger.js';
13
+ export class EvaluatorHarness {
14
+ mainAgent;
15
+ evaluatorAgent;
16
+ verbose;
17
+ constructor(mainAgent, evaluatorAgent, options = {}) {
18
+ this.mainAgent = mainAgent;
19
+ this.evaluatorAgent = evaluatorAgent;
20
+ this.verbose = options.verbose ?? true;
21
+ }
22
+ /**
23
+ * Process a user message through the harness:
24
+ * 1. Main agent processes the request.
25
+ * 2. Evaluator validates and guides completion.
26
+ */
27
+ async run(userMessage, evalCtx) {
28
+ // ── Step 1: Main agent ─────────────────────────────────────────────────
29
+ if (this.verbose) {
30
+ logger.info('[Harness] Main agent processing user request');
31
+ }
32
+ const mainResponse = await this.mainAgent.chat(userMessage);
33
+ if (this.verbose) {
34
+ logger.info(`[Harness] Main agent done — ${mainResponse.iterations} iterations, ` +
35
+ `${mainResponse.toolsUsed.length} tools used`);
36
+ }
37
+ // ── Step 2: Evaluator ──────────────────────────────────────────────────
38
+ if (this.verbose) {
39
+ logger.info('[Harness] Evaluator starting validation');
40
+ }
41
+ const evaluationContext = {
42
+ userMessage,
43
+ targetConversationId: evalCtx?.targetConversationId,
44
+ };
45
+ const evaluation = await this.evaluatorAgent.evaluate(evaluationContext);
46
+ if (this.verbose) {
47
+ const outcome = evaluation.passed
48
+ ? `✓ Passed (${evaluation.cyclesRan} cycles, ${evaluation.nudgesSent} nudges)`
49
+ : `✗ Failed — ${evaluation.gaps.length} gap(s) remain after ${evaluation.cyclesRan} cycles`;
50
+ logger.info(`[Harness] Evaluation complete — ${outcome}`);
51
+ }
52
+ return {
53
+ mainAgentResponse: mainResponse.content,
54
+ mainAgentIterations: mainResponse.iterations,
55
+ evaluation,
56
+ mainAgentTokenUsage: this.mainAgent.getTokenUsage(),
57
+ evaluatorTokenUsage: this.evaluatorAgent.getOrchestratorTokenUsage(),
58
+ };
59
+ }
60
+ /** Stop both agents cooperatively. */
61
+ stop() {
62
+ this.mainAgent.stop();
63
+ }
64
+ /** Cleanup both agents' resources. */
65
+ async cleanup() {
66
+ await this.mainAgent.cleanup();
67
+ await this.evaluatorAgent['mcpManager'].cleanup();
68
+ }
69
+ }
70
+ //# sourceMappingURL=harness.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"harness.js","sourceRoot":"","sources":["../../src/evaluator/harness.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAK5C,MAAM,OAAO,gBAAgB;IACnB,SAAS,CAAS;IAClB,cAAc,CAAiB;IAC/B,OAAO,CAAU;IAEzB,YACE,SAAiB,EACjB,cAA8B,EAC9B,UAA0B,EAAE;QAE5B,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;QAC3B,IAAI,CAAC,cAAc,GAAG,cAAc,CAAC;QACrC,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC,OAAO,IAAI,IAAI,CAAC;IACzC,CAAC;IAED;;;;OAIG;IACH,KAAK,CAAC,GAAG,CAAC,WAAmB,EAAE,OAAoC;QACjE,0EAA0E;QAC1E,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YACjB,MAAM,CAAC,IAAI,CAAC,8CAA8C,CAAC,CAAC;QAC9D,CAAC;QAED,MAAM,YAAY,GAAG,MAAM,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QAE5D,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YACjB,MAAM,CAAC,IAAI,CACT,+BAA+B,YAAY,CAAC,UAAU,eAAe;gBACrE,GAAG,YAAY,CAAC,SAAS,CAAC,MAAM,aAAa,CAC9C,CAAC;QACJ,CAAC;QAED,0EAA0E;QAC1E,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YACjB,MAAM,CAAC,IAAI,CAAC,yCAAyC,CAAC,CAAC;QACzD,CAAC;QAED,MAAM,iBAAiB,GAAsB;YAC3C,WAAW;YACX,oBAAoB,EAAE,OAAO,EAAE,oBAAoB;SACpD,CAAC;QAEF,MAAM,UAAU,GAAG,MAAM,IAAI,CAAC,cAAc,CAAC,QAAQ,CAAC,iBAAiB,CAAC,CAAC;QAEzE,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YACjB,MAAM,OAAO,GAAG,UAAU,CAAC,MAAM;gBAC/B,CAAC,CAAC,aAAa,UAAU,CAAC,SAAS,YAAY,UAAU,CAAC,UAAU,UAAU;gBAC9E,CAAC,CAAC,cAAc,UAAU,CAAC,IAAI,CAAC,MAAM,wBAAwB,UAAU,CAAC,SAAS,SAAS,CAAC;YAC9F,MAAM,CAAC,IAAI,CAAC,mCAAmC,OAAO,EAAE,CAAC,CAAC;QAC5D,CAAC;QAED,OAAO;YACL,iBAAiB,EAAE,YAAY,CAAC,OAAO;YACvC,mBAAmB,EAAE,YAAY,CAAC,UAAU;YAC5C,UAAU;YACV,mBAAmB,EAAE,IAAI,CAAC,SAAS,CAAC,aAAa,EAAE;YACnD,mBAAmB,EAAE,IAAI,CAAC,cAAc,CAAC,yBAAyB,EAAE;SACrE,CAAC;IACJ,CAAC;IAED,sCAAsC;IACtC,IAAI;QACF,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC;IACxB,CAAC;IAED,sCAAsC;IACtC,KAAK,CAAC,OAAO;QACX,MAAM,IAAI,CAAC,SAAS,CAAC,OAAO,EAAE,CAAC;QAC/B,MAAM,IAAI,CAAC,cAAc,CAAC,YAAY,CAAC,CAAC,OAAO,EAAE,CAAC;IACpD,CAAC;CACF"}
@@ -0,0 +1,47 @@
1
+ /**
2
+ * Evaluator module — public API.
3
+ *
4
+ * Usage (CLI):
5
+ * const harness = await createEvaluatorHarness(mainAgent, mcpServers, orchestratorConfig);
6
+ * const result = await harness.run(userMessage);
7
+ *
8
+ * Usage (HTTP):
9
+ * const harness = await createEvaluatorHarness(mainAgent, mcpServers, orchestratorConfig);
10
+ * app.post('/api/chat/harness', async (req, res) => {
11
+ * const result = await harness.run(req.body.message);
12
+ * res.json(result);
13
+ * });
14
+ */
15
+ export { EvaluatorAgent } from './evaluator-agent.js';
16
+ export { EvaluatorHarness } from './harness.js';
17
+ export { deriveEvaluatorDirective } from './directive-adapter.js';
18
+ export { InteractWithAgentTool, ListAgentConversationsTool, GetConversationHistoryTool, EVALUATOR_VIRTUAL_TOOLS, } from './tools/agent-tools.js';
19
+ export type { EvaluatorConfig, EvaluationContext, EvaluationResult, HarnessOptions, HarnessResult, EvaluatorToolContext, } from './types.js';
20
+ import { EvaluatorHarness } from './harness.js';
21
+ import type { IAgent } from '../core/agent-interface.js';
22
+ import type { MCPServerConfig } from '../core/config.js';
23
+ import type { HarnessOptions } from './types.js';
24
+ export interface EvaluatorOrchestratorConfig {
25
+ endpoint: string;
26
+ apiKey: string;
27
+ model: string;
28
+ useHarmonyFormat?: boolean;
29
+ /** Optional tool-calling model (same env vars as main agent). */
30
+ toolCallingEndpoint?: string;
31
+ toolCallingApiKey?: string;
32
+ toolCallingModel?: string;
33
+ }
34
+ /**
35
+ * Create an EvaluatorHarness that pairs the given main agent with a fresh,
36
+ * isolated EvaluatorAgent.
37
+ *
38
+ * The evaluator uses the same model config and MCP servers as the main agent
39
+ * but runs with completely separate LLM state.
40
+ *
41
+ * @param mainAgent The IAgent instance to supervise.
42
+ * @param mcpServers MCP server configs (same set as the main agent uses).
43
+ * @param orchestratorCfg Model connection details for the evaluator's orchestrator.
44
+ * @param harnessOptions Optional harness behaviour tweaks.
45
+ */
46
+ export declare function createEvaluatorHarness(mainAgent: IAgent, mcpServers: Record<string, MCPServerConfig>, orchestratorCfg: EvaluatorOrchestratorConfig, harnessOptions?: HarnessOptions): Promise<EvaluatorHarness>;
47
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/evaluator/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAEH,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AACtD,OAAO,EAAE,gBAAgB,EAAE,MAAM,cAAc,CAAC;AAChD,OAAO,EAAE,wBAAwB,EAAE,MAAM,wBAAwB,CAAC;AAClE,OAAO,EACL,qBAAqB,EACrB,0BAA0B,EAC1B,0BAA0B,EAC1B,uBAAuB,GACxB,MAAM,wBAAwB,CAAC;AAChC,YAAY,EACV,eAAe,EACf,iBAAiB,EACjB,gBAAgB,EAChB,cAAc,EACd,aAAa,EACb,oBAAoB,GACrB,MAAM,YAAY,CAAC;AAQpB,OAAO,EAAE,gBAAgB,EAAE,MAAM,cAAc,CAAC;AAChD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,4BAA4B,CAAC;AACzD,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,mBAAmB,CAAC;AACzD,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,YAAY,CAAC;AAEjD,MAAM,WAAW,2BAA2B;IAC1C,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,gBAAgB,CAAC,EAAE,OAAO,CAAC;IAC3B,iEAAiE;IACjE,mBAAmB,CAAC,EAAE,MAAM,CAAC;IAC7B,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC3B;AAED;;;;;;;;;;;GAWG;AACH,wBAAsB,sBAAsB,CAC1C,SAAS,EAAE,MAAM,EACjB,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,eAAe,CAAC,EAC3C,eAAe,EAAE,2BAA2B,EAC5C,cAAc,GAAE,cAAmB,GAClC,OAAO,CAAC,gBAAgB,CAAC,CAiD3B"}