jiva-core 0.3.1 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +22 -0
- package/dist/core/agent-spawner.d.ts.map +1 -1
- package/dist/core/agent-spawner.js +34 -21
- package/dist/core/agent-spawner.js.map +1 -1
- package/dist/core/agent.js +1 -1
- package/dist/core/agent.js.map +1 -1
- package/dist/core/client-agent.d.ts +21 -0
- package/dist/core/client-agent.d.ts.map +1 -1
- package/dist/core/client-agent.js +263 -10
- package/dist/core/client-agent.js.map +1 -1
- package/dist/core/conversation-manager.js +3 -3
- package/dist/core/conversation-manager.js.map +1 -1
- package/dist/core/dual-agent.d.ts.map +1 -1
- package/dist/core/dual-agent.js +3 -1
- package/dist/core/dual-agent.js.map +1 -1
- package/dist/core/manager-agent.d.ts +13 -0
- package/dist/core/manager-agent.d.ts.map +1 -1
- package/dist/core/manager-agent.js +120 -43
- package/dist/core/manager-agent.js.map +1 -1
- package/dist/core/worker-agent.d.ts.map +1 -1
- package/dist/core/worker-agent.js +11 -3
- package/dist/core/worker-agent.js.map +1 -1
- package/dist/interfaces/cli/index.js +2 -2
- package/dist/interfaces/cli/index.js.map +1 -1
- package/dist/interfaces/http/session-manager.d.ts.map +1 -1
- package/dist/interfaces/http/session-manager.js +14 -3
- package/dist/interfaces/http/session-manager.js.map +1 -1
- package/dist/models/krutrim.js +1 -1
- package/dist/models/krutrim.js.map +1 -1
- package/dist/personas/persona-manager.d.ts +14 -3
- package/dist/personas/persona-manager.d.ts.map +1 -1
- package/dist/personas/persona-manager.js +74 -17
- package/dist/personas/persona-manager.js.map +1 -1
- package/dist/storage/gcp-bucket-provider.d.ts +1 -0
- package/dist/storage/gcp-bucket-provider.d.ts.map +1 -1
- package/dist/storage/gcp-bucket-provider.js +17 -0
- package/dist/storage/gcp-bucket-provider.js.map +1 -1
- package/dist/storage/local-provider.d.ts +1 -0
- package/dist/storage/local-provider.d.ts.map +1 -1
- package/dist/storage/local-provider.js +9 -0
- package/dist/storage/local-provider.js.map +1 -1
- package/dist/storage/provider.d.ts +5 -0
- package/dist/storage/provider.d.ts.map +1 -1
- package/dist/storage/provider.js.map +1 -1
- package/dist/utils/logger.d.ts +19 -0
- package/dist/utils/logger.d.ts.map +1 -1
- package/dist/utils/logger.js +49 -1
- package/dist/utils/logger.js.map +1 -1
- package/dist/utils/orchestration-logger.d.ts +27 -3
- package/dist/utils/orchestration-logger.d.ts.map +1 -1
- package/dist/utils/orchestration-logger.js +110 -6
- package/dist/utils/orchestration-logger.js.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -68,6 +68,28 @@ jiva persona package-skill my-skill
|
|
|
68
68
|
|
|
69
69
|
See **[Personas Guide](docs/guides/PERSONAS.md)** for complete documentation.
|
|
70
70
|
|
|
71
|
+
### v0.3.2 Bug Fixes & Quality Improvements
|
|
72
|
+
**Bug Fixes:**
|
|
73
|
+
- **Persona Isolation**: Sub-agents now use ephemeral personas that don't overwrite parent agent configuration
|
|
74
|
+
- **Context Propagation**: Workspace directory automatically injected into all sub-agent task messages
|
|
75
|
+
- **Enhanced Logging**: Persona context included in all log messages for better multi-agent debugging
|
|
76
|
+
- **Reduced Hallucination**: Temperature lowered from 0.3-0.7 to 0.1-0.2 for more deterministic, fact-based behavior
|
|
77
|
+
|
|
78
|
+
**HTTP/Cloud Compatibility:**
|
|
79
|
+
- **🚨 Per-Tenant Persona Config** (CRITICAL): Fixed cross-tenant persona configuration leakage - each tenant's settings now isolated in GCS
|
|
80
|
+
- **Session-Scoped Logging**: Fixed persona context race conditions in concurrent HTTP sessions
|
|
81
|
+
- **Cloud-Aware Orchestration**: Orchestration logs now persist to cloud storage (GCS/S3) instead of ephemeral filesystem
|
|
82
|
+
|
|
83
|
+
**Quality Improvements:**
|
|
84
|
+
- **LLM-Based Validation**: Client agent uses semantic analysis instead of keyword matching for involvement level detection
|
|
85
|
+
- **Coherence Checking**: Detects when Worker fabricates accomplishments not supported by actual tool usage
|
|
86
|
+
- **Robust Plan Parsing**: Manager agent parsing with JSON format, LLM cleanup fallback, and garbage filtering
|
|
87
|
+
- **Client Logging**: Full orchestration traceability for validation decisions and quality control
|
|
88
|
+
|
|
89
|
+
**⚠️ CRITICAL:** If running in HTTP/Cloud mode (v0.3.1+), **upgrade immediately**:
|
|
90
|
+
- v0.3.1 has cross-tenant configuration leakage (security issue)
|
|
91
|
+
- v0.3.2 fixes multi-tenant isolation with per-tenant storage
|
|
92
|
+
|
|
71
93
|
### v0.2.1 Features
|
|
72
94
|
- **Dual-Agent System**: Separate Manager and Worker agents for better task focus and reliability
|
|
73
95
|
- **Chain-of-Thought Logging**: Transparent reasoning at INFO level with clean ASCII formatting
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"agent-spawner.d.ts","sourceRoot":"","sources":["../../src/core/agent-spawner.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,OAAO,EAAE,SAAS,EAAmB,MAAM,iBAAiB,CAAC;AAC7D,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAC9D,OAAO,EAAE,gBAAgB,EAAE,MAAM,0BAA0B,CAAC;AAC5D,OAAO,EAAE,gBAAgB,EAAE,MAAM,gBAAgB,CAAC;AAClD,OAAO,EAAE,mBAAmB,EAAE,MAAM,2BAA2B,CAAC;AAChE,OAAO,EAAE,cAAc,EAAE,MAAM,gCAAgC,CAAC;AAGhE,MAAM,WAAW,YAAY;IAC3B,EAAE,EAAE,MAAM,CAAC;IACX,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,SAAS,CAAC;IACjB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,IAAI,CAAC;IAChB,YAAY,EAAE,MAAM,CAAC;IACrB,MAAM,EAAE,QAAQ,GAAG,WAAW,GAAG,QAAQ,CAAC;IAC1C,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,iBAAiB;IAChC,OAAO,EAAE,MAAM,CAAC;IAChB,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,aAAa,CAAC,EAAE,MAAM,CAAC;CACxB;AAED,MAAM,WAAW,kBAAkB;IACjC,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,UAAU,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,MAAM,EAAE,CAAC;CACrB;AAED,qBAAa,YAAY;IACvB,OAAO,CAAC,MAAM,CAAwC;IACtD,OAAO,CAAC,YAAY,CAAoB;IACxC,OAAO,CAAC,UAAU,CAAmB;IACrC,OAAO,CAAC,SAAS,CAAmB;IACpC,OAAO,CAAC,mBAAmB,CAA6B;IACxD,OAAO,CAAC,kBAAkB,CAAiB;IAC3C,OAAO,CAAC,aAAa,CAAC,CAAS;IAC/B,OAAO,CAAC,QAAQ,CAAS;IACzB,OAAO,CAAC,YAAY,CAAS;gBAG3B,YAAY,EAAE,iBAAiB,EAC/B,UAAU,EAAE,gBAAgB,EAC5B,SAAS,EAAE,gBAAgB,EAC3B,mBAAmB,EAAE,mBAAmB,GAAG,IAAI,EAC/C,kBAAkB,EAAE,cAAc,EAClC,OAAO,CAAC,EAAE;QACR,aAAa,CAAC,EAAE,MAAM,CAAC;QACvB,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,YAAY,CAAC,EAAE,MAAM,CAAC;KACvB;IAYH;;OAEG;IACH,YAAY,IAAI,OAAO;IAIvB;;OAEG;IACH,eAAe,IAAI,MAAM;IAIzB;;OAEG;IACH,WAAW,IAAI,MAAM;IAIrB;;OAEG;IACG,UAAU,CAAC,OAAO,EAAE,iBAAiB,GAAG,OAAO,CAAC,kBAAkB,CAAC;
|
|
1
|
+
{"version":3,"file":"agent-spawner.d.ts","sourceRoot":"","sources":["../../src/core/agent-spawner.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,OAAO,EAAE,SAAS,EAAmB,MAAM,iBAAiB,CAAC;AAC7D,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAC9D,OAAO,EAAE,gBAAgB,EAAE,MAAM,0BAA0B,CAAC;AAC5D,OAAO,EAAE,gBAAgB,EAAE,MAAM,gBAAgB,CAAC;AAClD,OAAO,EAAE,mBAAmB,EAAE,MAAM,2BAA2B,CAAC;AAChE,OAAO,EAAE,cAAc,EAAE,MAAM,gCAAgC,CAAC;AAGhE,MAAM,WAAW,YAAY;IAC3B,EAAE,EAAE,MAAM,CAAC;IACX,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,SAAS,CAAC;IACjB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,IAAI,CAAC;IAChB,YAAY,EAAE,MAAM,CAAC;IACrB,MAAM,EAAE,QAAQ,GAAG,WAAW,GAAG,QAAQ,CAAC;IAC1C,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,iBAAiB;IAChC,OAAO,EAAE,MAAM,CAAC;IAChB,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,aAAa,CAAC,EAAE,MAAM,CAAC;CACxB;AAED,MAAM,WAAW,kBAAkB;IACjC,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,UAAU,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,MAAM,EAAE,CAAC;CACrB;AAED,qBAAa,YAAY;IACvB,OAAO,CAAC,MAAM,CAAwC;IACtD,OAAO,CAAC,YAAY,CAAoB;IACxC,OAAO,CAAC,UAAU,CAAmB;IACrC,OAAO,CAAC,SAAS,CAAmB;IACpC,OAAO,CAAC,mBAAmB,CAA6B;IACxD,OAAO,CAAC,kBAAkB,CAAiB;IAC3C,OAAO,CAAC,aAAa,CAAC,CAAS;IAC/B,OAAO,CAAC,QAAQ,CAAS;IACzB,OAAO,CAAC,YAAY,CAAS;gBAG3B,YAAY,EAAE,iBAAiB,EAC/B,UAAU,EAAE,gBAAgB,EAC5B,SAAS,EAAE,gBAAgB,EAC3B,mBAAmB,EAAE,mBAAmB,GAAG,IAAI,EAC/C,kBAAkB,EAAE,cAAc,EAClC,OAAO,CAAC,EAAE;QACR,aAAa,CAAC,EAAE,MAAM,CAAC;QACvB,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,YAAY,CAAC,EAAE,MAAM,CAAC;KACvB;IAYH;;OAEG;IACH,YAAY,IAAI,OAAO;IAIvB;;OAEG;IACH,eAAe,IAAI,MAAM;IAIzB;;OAEG;IACH,WAAW,IAAI,MAAM;IAIrB;;OAEG;IACG,UAAU,CAAC,OAAO,EAAE,iBAAiB,GAAG,OAAO,CAAC,kBAAkB,CAAC;IA8JzE;;OAEG;IACH,QAAQ,CAAC,OAAO,EAAE,MAAM,GAAG,YAAY,GAAG,SAAS;IAInD;;OAEG;IACH,UAAU,IAAI,YAAY,EAAE;IAI5B;;OAEG;IACH,oBAAoB,IAAI,MAAM,EAAE;IAIhC;;OAEG;IACG,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CAU/B"}
|
|
@@ -77,12 +77,13 @@ export class AgentSpawner {
|
|
|
77
77
|
logger.info(`[AgentSpawner] Task: ${request.task}`);
|
|
78
78
|
logger.info(`[AgentSpawner] Depth: ${this.currentDepth + 1}/${this.maxDepth}`);
|
|
79
79
|
try {
|
|
80
|
-
// Create
|
|
81
|
-
|
|
80
|
+
// Create an ephemeral PersonaManager for the sub-agent
|
|
81
|
+
// Ephemeral = true means it won't persist persona to global config
|
|
82
|
+
const subPersonaManager = new PersonaManager([], true);
|
|
82
83
|
await subPersonaManager.initialize();
|
|
83
|
-
// Activate the requested persona
|
|
84
|
+
// Activate the requested persona (ephemeral, won't overwrite parent's persona)
|
|
84
85
|
await subPersonaManager.activatePersona(request.persona);
|
|
85
|
-
logger.success(`[AgentSpawner] Activated persona: ${request.persona}`);
|
|
86
|
+
logger.success(`[AgentSpawner] Activated ephemeral persona: ${request.persona}`);
|
|
86
87
|
// Merge persona MCP servers
|
|
87
88
|
const personaMCPServers = subPersonaManager.getPersonaMCPServers();
|
|
88
89
|
if (Object.keys(personaMCPServers).length > 0) {
|
|
@@ -129,25 +130,37 @@ export class AgentSpawner {
|
|
|
129
130
|
};
|
|
130
131
|
this.agents.set(agentId, spawnedAgent);
|
|
131
132
|
// Prepare task message with context
|
|
132
|
-
|
|
133
|
+
// IMPORTANT: Always include workspace path for sub-agents
|
|
134
|
+
const workspacePath = this.workspace.getWorkspaceDir();
|
|
135
|
+
let contextSection = `Project root: ${workspacePath}`;
|
|
133
136
|
if (request.context) {
|
|
134
|
-
|
|
137
|
+
contextSection += `\n${request.context}`;
|
|
138
|
+
}
|
|
139
|
+
const taskMessage = `CONTEXT:\n${contextSection}\n\nTASK:\n${request.task}`;
|
|
140
|
+
// Save parent's persona context and set sub-agent's persona for logging
|
|
141
|
+
const parentPersonaContext = logger.getPersonaContext();
|
|
142
|
+
try {
|
|
143
|
+
logger.setPersonaContext(request.persona);
|
|
144
|
+
// Execute the task
|
|
145
|
+
logger.info(`[AgentSpawner] Executing task with sub-agent...`);
|
|
146
|
+
const response = await subAgent.chat(taskMessage);
|
|
147
|
+
// Update agent status
|
|
148
|
+
spawnedAgent.messageCount = 1;
|
|
149
|
+
spawnedAgent.status = 'completed';
|
|
150
|
+
spawnedAgent.result = response.content;
|
|
151
|
+
logger.success(`[AgentSpawner] Sub-agent completed task (${response.iterations} iterations)`);
|
|
152
|
+
return {
|
|
153
|
+
agentId,
|
|
154
|
+
persona: request.persona,
|
|
155
|
+
result: response.content,
|
|
156
|
+
iterations: response.iterations,
|
|
157
|
+
toolsUsed: response.toolsUsed,
|
|
158
|
+
};
|
|
159
|
+
}
|
|
160
|
+
finally {
|
|
161
|
+
// Always restore parent's persona context
|
|
162
|
+
logger.setPersonaContext(parentPersonaContext);
|
|
135
163
|
}
|
|
136
|
-
// Execute the task
|
|
137
|
-
logger.info(`[AgentSpawner] Executing task with sub-agent...`);
|
|
138
|
-
const response = await subAgent.chat(taskMessage);
|
|
139
|
-
// Update agent status
|
|
140
|
-
spawnedAgent.messageCount = 1;
|
|
141
|
-
spawnedAgent.status = 'completed';
|
|
142
|
-
spawnedAgent.result = response.content;
|
|
143
|
-
logger.success(`[AgentSpawner] Sub-agent completed task (${response.iterations} iterations)`);
|
|
144
|
-
return {
|
|
145
|
-
agentId,
|
|
146
|
-
persona: request.persona,
|
|
147
|
-
result: response.content,
|
|
148
|
-
iterations: response.iterations,
|
|
149
|
-
toolsUsed: response.toolsUsed,
|
|
150
|
-
};
|
|
151
164
|
}
|
|
152
165
|
catch (error) {
|
|
153
166
|
logger.error(`[AgentSpawner] Sub-agent failed:`, error);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"agent-spawner.js","sourceRoot":"","sources":["../../src/core/agent-spawner.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,OAAO,EAAE,SAAS,EAAmB,MAAM,iBAAiB,CAAC;AAK7D,OAAO,EAAE,cAAc,EAAE,MAAM,gCAAgC,CAAC;AAChE,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AA4B5C,MAAM,OAAO,YAAY;IACf,MAAM,GAA8B,IAAI,GAAG,EAAE,CAAC;IAC9C,YAAY,CAAoB;IAChC,UAAU,CAAmB;IAC7B,SAAS,CAAmB;IAC5B,mBAAmB,CAA6B;IAChD,kBAAkB,CAAiB;IACnC,aAAa,CAAU;IACvB,QAAQ,CAAS;IACjB,YAAY,CAAS;IAE7B,YACE,YAA+B,EAC/B,UAA4B,EAC5B,SAA2B,EAC3B,mBAA+C,EAC/C,kBAAkC,EAClC,OAIC;QAED,IAAI,CAAC,YAAY,GAAG,YAAY,CAAC;QACjC,IAAI,CAAC,UAAU,GAAG,UAAU,CAAC;QAC7B,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;QAC3B,IAAI,CAAC,mBAAmB,GAAG,mBAAmB,CAAC;QAC/C,IAAI,CAAC,kBAAkB,GAAG,kBAAkB,CAAC;QAC7C,IAAI,CAAC,aAAa,GAAG,OAAO,EAAE,aAAa,CAAC;QAC5C,IAAI,CAAC,QAAQ,GAAG,OAAO,EAAE,QAAQ,IAAI,CAAC,CAAC,CAAC,oCAAoC;QAC5E,IAAI,CAAC,YAAY,GAAG,OAAO,EAAE,YAAY,IAAI,CAAC,CAAC;IACjD,CAAC;IAED;;OAEG;IACH,YAAY;QACV,OAAO,IAAI,CAAC,YAAY,GAAG,IAAI,CAAC,QAAQ,CAAC;IAC3C,CAAC;IAED;;OAEG;IACH,eAAe;QACb,OAAO,IAAI,CAAC,YAAY,CAAC;IAC3B,CAAC;IAED;;OAEG;IACH,WAAW;QACT,OAAO,IAAI,CAAC,QAAQ,CAAC;IACvB,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,UAAU,CAAC,OAA0B;QACzC,oBAAoB;QACpB,IAAI,IAAI,CAAC,YAAY,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;YACvC,MAAM,IAAI,KAAK,CACb,wBAAwB,IAAI,CAAC,QAAQ,0CAA0C,CAChF,CAAC;QACJ,CAAC;QAED,uCAAuC;QACvC,IAAI,IAAI,CAAC,kBAAkB,CAAC,WAAW,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACvD,MAAM,CAAC,IAAI,CAAC,oEAAoE,CAAC,CAAC;YAClF,MAAM,IAAI,CAAC,kBAAkB,CAAC,UAAU,EAAE,CAAC;QAC7C,CAAC;QAED,0BAA0B;QAC1B,MAAM,iBAAiB,GAAG,IAAI,CAAC,kBAAkB,CAAC,WAAW,EAAE,CAAC;QAChE,IAAI,iBAAiB,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACnC,MAAM,IAAI,KAAK,CACb,yHAAyH,CAC1H,CAAC;QACJ,CAAC;QAED,MAAM,aAAa,GAAG,iBAAiB,CAAC,IAAI,CAAC,CAAC,CAAM,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,KAAK,OAAO,CAAC,OAAO,CAAC,CAAC;QAE9F,IAAI,CAAC,aAAa,EAAE,CAAC;YACnB,MAAM,IAAI,KAAK,CACb,YAAY,OAAO,CAAC,OAAO,oCAAoC,iBAAiB,CAAC,GAAG,CAAC,CAAC,CAAM,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAC/H,CAAC;QACJ,CAAC;QAED,MAAM,OAAO,GAAG,SAAS,IAAI,CAAC,GAAG,EAAE,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,CAAC;QAEjF,MAAM,CAAC,IAAI,CAAC,mDAAmD,OAAO,CAAC,OAAO,EAAE,CAAC,CAAC;QAClF,MAAM,CAAC,IAAI,CAAC,wBAAwB,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC;QACpD,MAAM,CAAC,IAAI,CAAC,yBAAyB,IAAI,CAAC,YAAY,GAAG,CAAC,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC,CAAC;QAE/E,IAAI,CAAC;YACH,
|
|
1
|
+
{"version":3,"file":"agent-spawner.js","sourceRoot":"","sources":["../../src/core/agent-spawner.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,OAAO,EAAE,SAAS,EAAmB,MAAM,iBAAiB,CAAC;AAK7D,OAAO,EAAE,cAAc,EAAE,MAAM,gCAAgC,CAAC;AAChE,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AA4B5C,MAAM,OAAO,YAAY;IACf,MAAM,GAA8B,IAAI,GAAG,EAAE,CAAC;IAC9C,YAAY,CAAoB;IAChC,UAAU,CAAmB;IAC7B,SAAS,CAAmB;IAC5B,mBAAmB,CAA6B;IAChD,kBAAkB,CAAiB;IACnC,aAAa,CAAU;IACvB,QAAQ,CAAS;IACjB,YAAY,CAAS;IAE7B,YACE,YAA+B,EAC/B,UAA4B,EAC5B,SAA2B,EAC3B,mBAA+C,EAC/C,kBAAkC,EAClC,OAIC;QAED,IAAI,CAAC,YAAY,GAAG,YAAY,CAAC;QACjC,IAAI,CAAC,UAAU,GAAG,UAAU,CAAC;QAC7B,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;QAC3B,IAAI,CAAC,mBAAmB,GAAG,mBAAmB,CAAC;QAC/C,IAAI,CAAC,kBAAkB,GAAG,kBAAkB,CAAC;QAC7C,IAAI,CAAC,aAAa,GAAG,OAAO,EAAE,aAAa,CAAC;QAC5C,IAAI,CAAC,QAAQ,GAAG,OAAO,EAAE,QAAQ,IAAI,CAAC,CAAC,CAAC,oCAAoC;QAC5E,IAAI,CAAC,YAAY,GAAG,OAAO,EAAE,YAAY,IAAI,CAAC,CAAC;IACjD,CAAC;IAED;;OAEG;IACH,YAAY;QACV,OAAO,IAAI,CAAC,YAAY,GAAG,IAAI,CAAC,QAAQ,CAAC;IAC3C,CAAC;IAED;;OAEG;IACH,eAAe;QACb,OAAO,IAAI,CAAC,YAAY,CAAC;IAC3B,CAAC;IAED;;OAEG;IACH,WAAW;QACT,OAAO,IAAI,CAAC,QAAQ,CAAC;IACvB,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,UAAU,CAAC,OAA0B;QACzC,oBAAoB;QACpB,IAAI,IAAI,CAAC,YAAY,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;YACvC,MAAM,IAAI,KAAK,CACb,wBAAwB,IAAI,CAAC,QAAQ,0CAA0C,CAChF,CAAC;QACJ,CAAC;QAED,uCAAuC;QACvC,IAAI,IAAI,CAAC,kBAAkB,CAAC,WAAW,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACvD,MAAM,CAAC,IAAI,CAAC,oEAAoE,CAAC,CAAC;YAClF,MAAM,IAAI,CAAC,kBAAkB,CAAC,UAAU,EAAE,CAAC;QAC7C,CAAC;QAED,0BAA0B;QAC1B,MAAM,iBAAiB,GAAG,IAAI,CAAC,kBAAkB,CAAC,WAAW,EAAE,CAAC;QAChE,IAAI,iBAAiB,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACnC,MAAM,IAAI,KAAK,CACb,yHAAyH,CAC1H,CAAC;QACJ,CAAC;QAED,MAAM,aAAa,GAAG,iBAAiB,CAAC,IAAI,CAAC,CAAC,CAAM,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,KAAK,OAAO,CAAC,OAAO,CAAC,CAAC;QAE9F,IAAI,CAAC,aAAa,EAAE,CAAC;YACnB,MAAM,IAAI,KAAK,CACb,YAAY,OAAO,CAAC,OAAO,oCAAoC,iBAAiB,CAAC,GAAG,CAAC,CAAC,CAAM,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAC/H,CAAC;QACJ,CAAC;QAED,MAAM,OAAO,GAAG,SAAS,IAAI,CAAC,GAAG,EAAE,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,CAAC;QAEjF,MAAM,CAAC,IAAI,CAAC,mDAAmD,OAAO,CAAC,OAAO,EAAE,CAAC,CAAC;QAClF,MAAM,CAAC,IAAI,CAAC,wBAAwB,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC;QACpD,MAAM,CAAC,IAAI,CAAC,yBAAyB,IAAI,CAAC,YAAY,GAAG,CAAC,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC,CAAC;QAE/E,IAAI,CAAC;YACH,uDAAuD;YACvD,mEAAmE;YACnE,MAAM,iBAAiB,GAAG,IAAI,cAAc,CAAC,EAAE,EAAE,IAAI,CAAC,CAAC;YACvD,MAAM,iBAAiB,CAAC,UAAU,EAAE,CAAC;YAErC,+EAA+E;YAC/E,MAAM,iBAAiB,CAAC,eAAe,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;YACzD,MAAM,CAAC,OAAO,CAAC,+CAA+C,OAAO,CAAC,OAAO,EAAE,CAAC,CAAC;YAEjF,4BAA4B;YAC5B,MAAM,iBAAiB,GAAG,iBAAiB,CAAC,oBAAoB,EAAE,CAAC;YACnE,IAAI,MAAM,CAAC,IAAI,CAAC,iBAAiB,CAAC,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC9C,MAAM,CAAC,IAAI,CAAC,0BAA0B,MAAM,CAAC,IAAI,CAAC,iBAAiB,CAAC,CAAC,MAAM,2BAA2B,CAAC,CAAC;gBAExG,KAAK,MAAM,CAAC,IAAI,EAAE,MAAM,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,iBAAiB,CAAC,EAAE,CAAC;oBAC/D,IAAI,CAAC;wBACH,MAAM,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC,IAAI,EAAE,MAAa,CAAC,CAAC;oBACvD,CAAC;oBAAC,OAAO,KAAK,EAAE,CAAC;wBACf,oDAAoD;wBACpD,MAAM,CAAC,KAAK,CAAC,8BAA8B,IAAI,mCAAmC,CAAC,CAAC;oBACtF,CAAC;gBACH,CAAC;YACH,CAAC;YAED,mDAAmD;YACnD,MAAM,UAAU,GAAG,IAAI,YAAY,CACjC,IAAI,CAAC,YAAY,EACjB,IAAI,CAAC,UAAU,EACf,IAAI,CAAC,SAAS,EACd,IAAI,CAAC,mBAAmB,EACxB,IAAI,CAAC,kBAAkB,EACvB;gBACE,aAAa,EAAE,OAAO;gBACtB,QAAQ,EAAE,IAAI,CAAC,QAAQ;gBACvB,YAAY,EAAE,IAAI,CAAC,YAAY,GAAG,CAAC;aACpC,CACF,CAAC;YAEF,MAAM,cAAc,GAAoB;gBACtC,YAAY,EAAE,IAAI,CAAC,YAAY;gBAC/B,UAAU,EAAE,IAAI,CAAC,UAAU;gBAC3B,SAAS,EAAE,IAAI,CAAC,SAAS;gBACzB,mBAAmB,EAAE,IAAI,CAAC,mBAAmB,IAAI,SAAS;gBAC1D,cAAc,EAAE,iBAAiB;gBACjC,WAAW,EAAE,EAAE;gBACf,aAAa,EAAE,OAAO,CAAC,aAAa,IAAI,EAAE;gBAC1C,aAAa,EAAE,IAAI,CAAC,QAAQ,EAAE,mCAAmC;gBACjE,QAAQ,EAAE,KAAK,EAAE,6BAA6B;aAC/C,CAAC;YAEF,MAAM,QAAQ,GAAG,IAAI,SAAS,CAAC,cAAc,CAAC,CAAC;YAE/C,gEAAgE;YAChE,QAAQ,CAAC,eAAe,CAAC,UAAU,CAAC,CAAC;YAErC,0BAA0B;YAC1B,MAAM,YAAY,GAAiB;gBACjC,EAAE,EAAE,OAAO;gBACX,OAAO,EAAE,OAAO,CAAC,OAAO;gBACxB,KAAK,EAAE,QAAQ;gBACf,QAAQ,EAAE,IAAI,CAAC,aAAa;gBAC5B,SAAS,EAAE,IAAI,IAAI,EAAE;gBACrB,YAAY,EAAE,CAAC;gBACf,MAAM,EAAE,QAAQ;aACjB,CAAC;YAEF,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,OAAO,EAAE,YAAY,CAAC,CAAC;YAEvC,oCAAoC;YACpC,0DAA0D;YAC1D,MAAM,aAAa,GAAG,IAAI,CAAC,SAAS,CAAC,eAAe,EAAE,CAAC;YACvD,IAAI,cAAc,GAAG,iBAAiB,aAAa,EAAE,CAAC;YAEtD,IAAI,OAAO,CAAC,OAAO,EAAE,CAAC;gBACpB,cAAc,IAAI,KAAK,OAAO,CAAC,OAAO,EAAE,CAAC;YAC3C,CAAC;YAED,MAAM,WAAW,GAAG,aAAa,cAAc,cAAc,OAAO,CAAC,IAAI,EAAE,CAAC;YAE5E,wEAAwE;YACxE,MAAM,oBAAoB,GAAG,MAAM,CAAC,iBAAiB,EAAE,CAAC;YAExD,IAAI,CAAC;gBACH,MAAM,CAAC,iBAAiB,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;gBAE1C,mBAAmB;gBACnB,MAAM,CAAC,IAAI,CAAC,iDAAiD,CAAC,CAAC;gBAC/D,MAAM,QAAQ,GAAG,MAAM,QAAQ,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;gBAElD,sBAAsB;gBACtB,YAAY,CAAC,YAAY,GAAG,CAAC,CAAC;gBAC9B,YAAY,CAAC,MAAM,GAAG,WAAW,CAAC;gBAClC,YAAY,CAAC,MAAM,GAAG,QAAQ,CAAC,OAAO,CAAC;gBAEvC,MAAM,CAAC,OAAO,CAAC,4CAA4C,QAAQ,CAAC,UAAU,cAAc,CAAC,CAAC;gBAE9F,OAAO;oBACL,OAAO;oBACP,OAAO,EAAE,OAAO,CAAC,OAAO;oBACxB,MAAM,EAAE,QAAQ,CAAC,OAAO;oBACxB,UAAU,EAAE,QAAQ,CAAC,UAAU;oBAC/B,SAAS,EAAE,QAAQ,CAAC,SAAS;iBAC9B,CAAC;YACJ,CAAC;oBAAS,CAAC;gBACT,0CAA0C;gBAC1C,MAAM,CAAC,iBAAiB,CAAC,oBAAoB,CAAC,CAAC;YACjD,CAAC;QAEH,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,CAAC,KAAK,CAAC,kCAAkC,EAAE,KAAK,CAAC,CAAC;YAExD,MAAM,KAAK,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;YACvC,IAAI,KAAK,EAAE,CAAC;gBACV,KAAK,CAAC,MAAM,GAAG,QAAQ,CAAC;gBACxB,KAAK,CAAC,MAAM,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;YACxE,CAAC;YAED,MAAM,KAAK,CAAC;QACd,CAAC;IACH,CAAC;IAED;;OAEG;IACH,QAAQ,CAAC,OAAe;QACtB,OAAO,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;IAClC,CAAC;IAED;;OAEG;IACH,UAAU;QACR,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC;IAC1C,CAAC;IAED;;OAEG;IACH,oBAAoB;QAClB,OAAO,IAAI,CAAC,kBAAkB,CAAC,WAAW,EAAE,CAAC,GAAG,CAAC,CAAC,CAAM,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;IAChF,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,OAAO;QACX,KAAK,MAAM,CAAC,OAAO,EAAE,YAAY,CAAC,IAAI,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,EAAE,CAAC;YAC5D,IAAI,CAAC;gBACH,MAAM,YAAY,CAAC,KAAK,CAAC,OAAO,EAAE,CAAC;YACrC,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,MAAM,CAAC,IAAI,CAAC,0CAA0C,OAAO,GAAG,EAAE,KAAK,CAAC,CAAC;YAC3E,CAAC;QACH,CAAC;QACD,IAAI,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC;IACtB,CAAC;CACF"}
|
package/dist/core/agent.js
CHANGED
|
@@ -24,7 +24,7 @@ export class JivaAgent {
|
|
|
24
24
|
this.workspace = config.workspace;
|
|
25
25
|
this.conversationManager = config.conversationManager || null;
|
|
26
26
|
this.maxIterations = config.maxIterations || 10;
|
|
27
|
-
this.temperature = config.temperature || 0.
|
|
27
|
+
this.temperature = config.temperature || 0.2; // Lower default to reduce hallucination
|
|
28
28
|
this.autoSave = config.autoSave !== false; // Default true
|
|
29
29
|
this.condensingThreshold = config.condensingThreshold || 30;
|
|
30
30
|
this.initializeSystemPrompt();
|
package/dist/core/agent.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"agent.js","sourceRoot":"","sources":["../../src/core/agent.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAOH,OAAO,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AACxD,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAC5C,OAAO,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AAmB/C,MAAM,OAAO,SAAS;IACZ,YAAY,CAAoB;IAChC,UAAU,CAAmB;IAC7B,SAAS,CAAmB;IAC5B,mBAAmB,CAA6B;IAChD,aAAa,CAAS;IACtB,WAAW,CAAS;IACpB,mBAAmB,GAAc,EAAE,CAAC;IACpC,QAAQ,CAAU;IAClB,mBAAmB,CAAS;IAC5B,oBAAoB,GAAW,EAAE,CAAC,CAAC,iDAAiD;IAE5F,YAAY,MAAmB;QAC7B,IAAI,CAAC,YAAY,GAAG,MAAM,CAAC,YAAY,CAAC;QACxC,IAAI,CAAC,UAAU,GAAG,MAAM,CAAC,UAAU,CAAC;QACpC,IAAI,CAAC,SAAS,GAAG,MAAM,CAAC,SAAS,CAAC;QAClC,IAAI,CAAC,mBAAmB,GAAG,MAAM,CAAC,mBAAmB,IAAI,IAAI,CAAC;QAC9D,IAAI,CAAC,aAAa,GAAG,MAAM,CAAC,aAAa,IAAI,EAAE,CAAC;QAChD,IAAI,CAAC,WAAW,GAAG,MAAM,CAAC,WAAW,IAAI,GAAG,CAAC;
|
|
1
|
+
{"version":3,"file":"agent.js","sourceRoot":"","sources":["../../src/core/agent.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAOH,OAAO,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AACxD,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAC5C,OAAO,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AAmB/C,MAAM,OAAO,SAAS;IACZ,YAAY,CAAoB;IAChC,UAAU,CAAmB;IAC7B,SAAS,CAAmB;IAC5B,mBAAmB,CAA6B;IAChD,aAAa,CAAS;IACtB,WAAW,CAAS;IACpB,mBAAmB,GAAc,EAAE,CAAC;IACpC,QAAQ,CAAU;IAClB,mBAAmB,CAAS;IAC5B,oBAAoB,GAAW,EAAE,CAAC,CAAC,iDAAiD;IAE5F,YAAY,MAAmB;QAC7B,IAAI,CAAC,YAAY,GAAG,MAAM,CAAC,YAAY,CAAC;QACxC,IAAI,CAAC,UAAU,GAAG,MAAM,CAAC,UAAU,CAAC;QACpC,IAAI,CAAC,SAAS,GAAG,MAAM,CAAC,SAAS,CAAC;QAClC,IAAI,CAAC,mBAAmB,GAAG,MAAM,CAAC,mBAAmB,IAAI,IAAI,CAAC;QAC9D,IAAI,CAAC,aAAa,GAAG,MAAM,CAAC,aAAa,IAAI,EAAE,CAAC;QAChD,IAAI,CAAC,WAAW,GAAG,MAAM,CAAC,WAAW,IAAI,GAAG,CAAC,CAAC,wCAAwC;QACtF,IAAI,CAAC,QAAQ,GAAG,MAAM,CAAC,QAAQ,KAAK,KAAK,CAAC,CAAC,eAAe;QAC1D,IAAI,CAAC,mBAAmB,GAAG,MAAM,CAAC,mBAAmB,IAAI,EAAE,CAAC;QAE5D,IAAI,CAAC,sBAAsB,EAAE,CAAC;IAChC,CAAC;IAED;;OAEG;IACK,sBAAsB;QAC5B,uCAAuC;QACvC,MAAM,WAAW,GAAa;YAC5B,2FAA2F;YAC3F,EAAE;YACF,gCAAgC,IAAI,CAAC,SAAS,CAAC,eAAe,EAAE,EAAE;YAClE,EAAE;YACF,mFAAmF;YACnF,8GAA8G;SAC/G,CAAC;QAEF,IAAI,CAAC,mBAAmB,CAAC,IAAI,CAAC;YAC5B,IAAI,EAAE,QAAQ;YACd,OAAO,EAAE,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC;SAChC,CAAC,CAAC;QAEH,8DAA8D;QAC9D,+DAA+D;QAC/D,MAAM,cAAc,GAAa;YAC/B,wBAAwB;YACxB,EAAE;YACF,+FAA+F;YAC/F,6EAA6E;YAC7E,4EAA4E;YAC5E,+EAA+E;YAC/E,4EAA4E;YAC5E,mGAAmG;YACnG,EAAE;YACF,aAAa;YACb,+CAA+C;YAC/C,iEAAiE;YACjE,iEAAiE;YACjE,gDAAgD;YAChD,yDAAyD;YACzD,EAAE;YACF,4BAA4B;YAC5B,qEAAqE;YACrE,oEAAoE;YACpE,mEAAmE;YACnE,sEAAsE;YACtE,wEAAwE;YACxE,EAAE;SACH,CAAC;QAEF,iDAAiD;QACjD,IAAI,CAAC,oBAAoB,GAAG,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAEtD,+CAA+C;QAC/C,IAAI,CAAC,mBAAmB,CAAC,IAAI,CAAC;YAC5B,IAAI,EAAE,WAAW;YACjB,OAAO,EAAE,IAAI,CAAC,oBAAoB;SACnC,CAAC,CAAC;IACL,CAAC;IAED;;;OAGG;IACK,iBAAiB;QACvB,MAAM,aAAa,GAAG,IAAI,CAAC,mBAAmB,CAAC,CAAC,CAAC,CAAC;QAElD,sBAAsB;QACtB,MAAM,eAAe,GAAG,IAAI,CAAC,SAAS,CAAC,kBAAkB,EAAE,CAAC;QAE5D,+CAA+C;QAC/C,IAAI,gBAAgB,GAAG,IAAI,CAAC,oBAAoB,CAAC;QACjD,IAAI,eAAe,EAAE,CAAC;YACpB,gBAAgB,GAAG,GAAG,IAAI,CAAC,oBAAoB,KAAK,eAAe,IAAI,CAAC;YACxE,MAAM,CAAC,KAAK,CAAC,yCAAyC,CAAC,CAAC;QAC1D,CAAC;QACD,8DAA8D;QAE9D,OAAO;YACL,aAAa;YACb;gBACE,IAAI,EAAE,WAAW;gBACjB,OAAO,EAAE,gBAAgB;aAC1B;SACF,CAAC;IACJ,CAAC;IAED;;;OAGG;IACK,uBAAuB,CAAC,cAAsB,EAAE;QACtD,mDAAmD;QACnD,MAAM,cAAc,GAAG,IAAI,CAAC,iBAAiB,EAAE,CAAC;QAEhD,IAAI,IAAI,CAAC,mBAAmB,CAAC,MAAM,IAAI,WAAW,GAAG,CAAC,EAAE,CAAC;YACvD,yEAAyE;YACzE,OAAO,CAAC,GAAG,cAAc,EAAE,GAAG,IAAI,CAAC,mBAAmB,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;QACnE,CAAC;QAED,6DAA6D;QAC7D,MAAM,cAAc,GAAG,IAAI,CAAC,mBAAmB,CAAC,KAAK,CAAC,CAAC,WAAW,CAAC,CAAC;QAEpE,OAAO,CAAC,GAAG,cAAc,EAAE,GAAG,cAAc,CAAC,CAAC;IAChD,CAAC;IAED;;OAEG;IACK,oBAAoB,CAAC,OAAe;QAC1C,MAAM,SAAS,GAAG,CAAC,OAAO,EAAE,IAAI,EAAE,KAAK,EAAE,OAAO,EAAE,WAAW,EAAE,cAAc,EAAE,gBAAgB,EAAE,cAAc,CAAC,CAAC;QACjH,MAAM,MAAM,GAAG,CAAC,aAAa,EAAE,iBAAiB,EAAE,YAAY,EAAE,KAAK,EAAE,QAAQ,EAAE,WAAW,EAAE,KAAK,EAAE,SAAS,CAAC,CAAC;QAChH,MAAM,YAAY,GAAG,OAAO,CAAC,WAAW,EAAE,CAAC,IAAI,EAAE,CAAC;QAElD,+DAA+D;QAC/D,IAAI,OAAO,CAAC,MAAM,GAAG,EAAE,EAAE,CAAC;YACxB,MAAM,OAAO,GAAG,CAAC,GAAG,SAAS,EAAE,GAAG,MAAM,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CACvD,YAAY,KAAK,OAAO;gBACxB,YAAY,CAAC,UAAU,CAAC,OAAO,GAAG,GAAG,CAAC;gBACtC,YAAY,CAAC,UAAU,CAAC,OAAO,GAAG,GAAG,CAAC;gBACtC,YAAY,CAAC,UAAU,CAAC,OAAO,GAAG,GAAG,CAAC,CACvC,CAAC;YACF,IAAI,OAAO;gBAAE,OAAO,IAAI,CAAC;QAC3B,CAAC;QAED,OAAO,KAAK,CAAC;IACf,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,IAAI,CAAC,WAAmB;QAC5B,MAAM,CAAC,IAAI,CAAC,4BAA4B,CAAC,CAAC;QAE1C,8BAA8B;QAC9B,IAAI,CAAC,mBAAmB,CAAC,IAAI,CAAC;YAC5B,IAAI,EAAE,MAAM;YACZ,OAAO,EAAE,WAAW;SACrB,CAAC,CAAC;QAEH,MAAM,SAAS,GAAa,EAAE,CAAC;QAC/B,IAAI,UAAU,GAAG,CAAC,CAAC;QACnB,IAAI,aAAa,GAAG,EAAE,CAAC;QAEvB,kDAAkD;QAClD,MAAM,QAAQ,GAAG,IAAI,CAAC,oBAAoB,CAAC,WAAW,CAAC,CAAC;QACxD,MAAM,YAAY,GAAG,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,kBAAkB,EAAE,CAAC;QAE3D,yDAAyD;QACzD,OAAO,UAAU,GAAG,IAAI,CAAC,aAAa,EAAE,CAAC;YACvC,UAAU,EAAE,CAAC;YACb,MAAM,CAAC,KAAK,CAAC,mBAAmB,UAAU,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC,CAAC;YAEpE,uCAAuC;YACvC,MAAM,KAAK,GAAG,IAAI,CAAC,UAAU,CAAC,SAAS,EAAE,CAAC,WAAW,EAAE,CAAC;YAExD,oDAAoD;YACpD,MAAM,cAAc,GAAG,IAAI,CAAC,uBAAuB,CAAC,EAAE,CAAC,CAAC;YAExD,IAAI,cAAc,CAAC,MAAM,GAAG,IAAI,CAAC,mBAAmB,CAAC,MAAM,EAAE,CAAC;gBAC5D,MAAM,CAAC,KAAK,CAAC,yBAAyB,IAAI,CAAC,mBAAmB,CAAC,MAAM,MAAM,cAAc,CAAC,MAAM,WAAW,CAAC,CAAC;YAC/G,CAAC;YAED,6DAA6D;YAC7D,IAAI,QAAuB,CAAC;YAC5B,IAAI,CAAC;gBACH,QAAQ,GAAG,MAAM,IAAI,CAAC,YAAY,CAAC,IAAI,CAAC;oBACtC,QAAQ,EAAE,cAAc;oBACxB,KAAK,EAAE,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS;oBAC3C,WAAW,EAAE,IAAI,CAAC,WAAW;oBAC7B,yEAAyE;oBACzE,uEAAuE;iBACxE,CAAC,CAAC;YACL,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,MAAM,CAAC,KAAK,CAAC,mBAAmB,EAAE,KAAK,CAAC,CAAC;gBACzC,MAAM,IAAI,SAAS,CACjB,sCAAsC,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,CAC/F,CAAC;YACJ,CAAC;YAED,oCAAoC;YACpC,IAAI,CAAC,mBAAmB,CAAC,IAAI,CAAC;gBAC5B,IAAI,EAAE,WAAW;gBACjB,OAAO,EAAE,QAAQ,CAAC,OAAO;aAC1B,CAAC,CAAC;YAEH,gCAAgC;YAChC,IAAI,QAAQ,CAAC,SAAS,IAAI,QAAQ,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACxD,MAAM,CAAC,IAAI,CAAC,mBAAmB,QAAQ,CAAC,SAAS,CAAC,MAAM,eAAe,CAAC,CAAC;gBAEzE,qBAAqB;gBACrB,KAAK,MAAM,QAAQ,IAAI,QAAQ,CAAC,SAAS,EAAE,CAAC;oBAC1C,MAAM,QAAQ,GAAG,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC;oBACxC,MAAM,CAAC,IAAI,CAAC,mBAAmB,QAAQ,EAAE,CAAC,CAAC;oBAE3C,IAAI,CAAC;wBACH,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC;wBACrD,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,SAAS,EAAE,CAAC,WAAW,CAAC,QAAQ,EAAE,IAAI,CAAC,CAAC;wBAE7E,SAAS,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;wBAEzB,6BAA6B;wBAC7B,MAAM,WAAW,GAAG,gBAAgB,CAClC,QAAQ,CAAC,EAAE,EACX,QAAQ,EACR,MAAM,CACP,CAAC;wBAEF,IAAI,CAAC,mBAAmB,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;wBAE3C,MAAM,CAAC,OAAO,CAAC,QAAQ,QAAQ,wBAAwB,CAAC,CAAC;oBAC3D,CAAC;oBAAC,OAAO,KAAK,EAAE,CAAC;wBACf,MAAM,CAAC,KAAK,CAAC,QAAQ,QAAQ,mBAAmB,EAAE,KAAK,CAAC,CAAC;wBAEzD,2BAA2B;wBAC3B,IAAI,CAAC,mBAAmB,CAAC,IAAI,CAAC;4BAC5B,IAAI,EAAE,MAAM;4BACZ,IAAI,EAAE,QAAQ;4BACd,YAAY,EAAE,QAAQ,CAAC,EAAE;4BACzB,OAAO,EAAE,UAAU,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE;yBAC5E,CAAC,CAAC;oBACL,CAAC;gBACH,CAAC;gBAED,wCAAwC;gBACxC,SAAS;YACX,CAAC;YAED,8CAA8C;YAC9C,aAAa,GAAG,QAAQ,CAAC,OAAO,CAAC;YAEjC,kEAAkE;YAClE,IAAI,CAAC,QAAQ,IAAI,UAAU,GAAG,IAAI,CAAC,aAAa,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,iBAAiB,CAAC,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC;gBAClG,MAAM,CAAC,KAAK,CAAC,6EAA6E,CAAC,CAAC;gBAE5F,gEAAgE;gBAChE,IAAI,CAAC,mBAAmB,CAAC,IAAI,CAAC;oBAC5B,IAAI,EAAE,QAAQ;oBACd,OAAO,EAAE,wPAAwP;iBAClQ,CAAC,CAAC;gBAEH,6CAA6C;gBAC7C,SAAS;YACX,CAAC;YAED,sDAAsD;YACtD,MAAM;QACR,CAAC;QAED,IAAI,UAAU,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;YACrC,MAAM,CAAC,IAAI,CAAC,wBAAwB,CAAC,CAAC;YACtC,aAAa,GAAG,aAAa,IAAI,qDAAqD,CAAC;QACzF,CAAC;QAED,oCAAoC;QACpC,IAAI,IAAI,CAAC,QAAQ,IAAI,IAAI,CAAC,mBAAmB,EAAE,CAAC;YAC9C,MAAM,IAAI,CAAC,mBAAmB,CAAC,QAAQ,CACrC,IAAI,CAAC,mBAAmB,EACxB,IAAI,CAAC,SAAS,CAAC,eAAe,EAAE,EAChC,IAAI,CAAC,YAAY,CAClB,CAAC;QACJ,CAAC;QAED,yCAAyC;QACzC,IAAI,IAAI,CAAC,mBAAmB,CAAC,MAAM,GAAG,IAAI,CAAC,mBAAmB,IAAI,IAAI,CAAC,mBAAmB,EAAE,CAAC;YAC3F,MAAM,CAAC,IAAI,CAAC,+CAA+C,CAAC,CAAC;YAC7D,IAAI,CAAC,mBAAmB,GAAG,MAAM,IAAI,CAAC,mBAAmB,CAAC,oBAAoB,CAC5E,IAAI,CAAC,mBAAmB,EACxB,IAAI,CAAC,YAAY,EACjB,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,mBAAmB,GAAG,GAAG,CAAC,CAC3C,CAAC;QACJ,CAAC;QAED,OAAO;YACL,OAAO,EAAE,aAAa;YACtB,UAAU;YACV,SAAS;SACV,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,iBAAiB,CAAC,QAAgB;QACxC,oDAAoD;QACpD,MAAM,oBAAoB,GAAG;YAC3B,eAAe;YACf,wBAAwB;YACxB,UAAU;YACV,MAAM;YACN,SAAS;YACT,qBAAqB;SACtB,CAAC;QAEF,MAAM,aAAa,GAAG,QAAQ,CAAC,WAAW,EAAE,CAAC;QAE7C,mDAAmD;QACnD,IAAI,QAAQ,CAAC,MAAM,GAAG,EAAE,EAAE,CAAC;YACzB,OAAO,KAAK,CAAC;QACf,CAAC;QAED,6BAA6B;QAC7B,MAAM,eAAe,GAAG;YACtB,OAAO;YACP,QAAQ;YACR,WAAW;YACX,WAAW;YACX,QAAQ;SACT,CAAC;QAEF,IAAI,eAAe,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,CAAC,aAAa,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC,EAAE,CAAC;YACzE,OAAO,KAAK,CAAC,CAAC,mCAAmC;QACnD,CAAC;QAED,kCAAkC;QAClC,OAAO,oBAAoB,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,CAAC,aAAa,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC,CAAC;IACnF,CAAC;IAED;;OAEG;IACH,iBAAiB;QACf,IAAI,CAAC,mBAAmB,GAAG,EAAE,CAAC;QAC9B,IAAI,CAAC,sBAAsB,EAAE,CAAC;QAC9B,MAAM,CAAC,IAAI,CAAC,4BAA4B,CAAC,CAAC;IAC5C,CAAC;IAED;;OAEG;IACH,sBAAsB;QACpB,OAAO,CAAC,GAAG,IAAI,CAAC,mBAAmB,CAAC,CAAC;IACvC,CAAC;IAED;;OAEG;IACH,sBAAsB,CAAC,QAAmB;QACxC,IAAI,CAAC,mBAAmB,GAAG,QAAQ,CAAC;IACtC,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,gBAAgB;QACpB,IAAI,CAAC,IAAI,CAAC,mBAAmB,EAAE,CAAC;YAC9B,MAAM,CAAC,IAAI,CAAC,sCAAsC,CAAC,CAAC;YACpD,OAAO,IAAI,CAAC;QACd,CAAC;QAED,MAAM,EAAE,GAAG,MAAM,IAAI,CAAC,mBAAmB,CAAC,gBAAgB,CACxD,IAAI,CAAC,mBAAmB,EACxB,IAAI,CAAC,SAAS,CAAC,eAAe,EAAE,EAChC,SAAS,EACT,IAAI,CAAC,YAAY,CAClB,CAAC;QAEF,MAAM,CAAC,OAAO,CAAC,uBAAuB,EAAE,EAAE,CAAC,CAAC;QAC5C,OAAO,EAAE,CAAC;IACZ,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,gBAAgB,CAAC,EAAU;QAC/B,IAAI,CAAC,IAAI,CAAC,mBAAmB,EAAE,CAAC;YAC9B,MAAM,IAAI,SAAS,CAAC,sCAAsC,CAAC,CAAC;QAC9D,CAAC;QAED,MAAM,YAAY,GAAG,MAAM,IAAI,CAAC,mBAAmB,CAAC,gBAAgB,CAAC,EAAE,CAAC,CAAC;QACzE,IAAI,CAAC,mBAAmB,GAAG,YAAY,CAAC,QAAQ,CAAC;QAEjD,MAAM,CAAC,OAAO,CAAC,wBAAwB,EAAE,EAAE,CAAC,CAAC;IAC/C,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,iBAAiB;QACrB,IAAI,CAAC,IAAI,CAAC,mBAAmB,EAAE,CAAC;YAC9B,MAAM,IAAI,SAAS,CAAC,sCAAsC,CAAC,CAAC;QAC9D,CAAC;QAED,OAAO,MAAM,IAAI,CAAC,mBAAmB,CAAC,iBAAiB,EAAE,CAAC;IAC5D,CAAC;IAED;;OAEG;IACH,YAAY;QACV,OAAO,IAAI,CAAC,SAAS,CAAC;IACxB,CAAC;IAED;;OAEG;IACH,aAAa;QACX,OAAO,IAAI,CAAC,UAAU,CAAC;IACzB,CAAC;IAED;;OAEG;IACH,eAAe;QACb,OAAO,IAAI,CAAC,YAAY,CAAC;IAC3B,CAAC;IAED;;OAEG;IACH,sBAAsB;QACpB,OAAO,IAAI,CAAC,mBAAmB,CAAC;IAClC,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,OAAO;QACX,MAAM,CAAC,IAAI,CAAC,gCAAgC,CAAC,CAAC;QAE9C,iCAAiC;QACjC,IAAI,IAAI,CAAC,QAAQ,IAAI,IAAI,CAAC,mBAAmB,IAAI,IAAI,CAAC,mBAAmB,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACrF,MAAM,IAAI,CAAC,mBAAmB,CAAC,QAAQ,CACrC,IAAI,CAAC,mBAAmB,EACxB,IAAI,CAAC,SAAS,CAAC,eAAe,EAAE,EAChC,IAAI,CAAC,YAAY,CAClB,CAAC;QACJ,CAAC;QAED,MAAM,IAAI,CAAC,UAAU,CAAC,OAAO,EAAE,CAAC;IAClC,CAAC;CACF"}
|
|
@@ -38,10 +38,18 @@ export declare class ClientAgent {
|
|
|
38
38
|
private readonly ALLOWED_TOOLS;
|
|
39
39
|
constructor(orchestrator: ModelOrchestrator, mcpManager: MCPServerManager);
|
|
40
40
|
/**
|
|
41
|
+
* Use LLM to analyze the task and determine involvement level + requirements.
|
|
42
|
+
* Replaces keyword-based determineInvolvementLevel() and parseRequirements()
|
|
43
|
+
* with semantic understanding that avoids false positives.
|
|
44
|
+
*/
|
|
45
|
+
private analyzeTaskRequirements;
|
|
46
|
+
/**
|
|
47
|
+
* @deprecated Use analyzeTaskRequirements() instead. Kept for reference.
|
|
41
48
|
* Determine involvement level based on user request complexity
|
|
42
49
|
*/
|
|
43
50
|
determineInvolvementLevel(userMessage: string, subtasks: string[]): InvolvementLevel;
|
|
44
51
|
/**
|
|
52
|
+
* @deprecated Use analyzeTaskRequirements() instead. Kept for reference.
|
|
45
53
|
* Parse requirements from user message
|
|
46
54
|
*/
|
|
47
55
|
parseRequirements(userMessage: string, subtasks: string[]): Requirement[];
|
|
@@ -54,6 +62,13 @@ export declare class ClientAgent {
|
|
|
54
62
|
* This is language-agnostic and captures semantic meaning
|
|
55
63
|
*/
|
|
56
64
|
private analyzeForUnjustifiedFailure;
|
|
65
|
+
/**
|
|
66
|
+
* Use LLM to cross-check the Worker's result claims against its actual tool usage.
|
|
67
|
+
* Catches hallucinated accomplishments — e.g., Worker claims "I inspected all source files
|
|
68
|
+
* and found no bugs" but only used list_directory and never read a single file.
|
|
69
|
+
* This runs at ALL involvement levels including MINIMAL.
|
|
70
|
+
*/
|
|
71
|
+
private analyzeResultCoherence;
|
|
57
72
|
/**
|
|
58
73
|
* Layer 1: Process Validation (metadata only, no tools)
|
|
59
74
|
*/
|
|
@@ -66,6 +81,12 @@ export declare class ClientAgent {
|
|
|
66
81
|
* Check if file exists using read tool
|
|
67
82
|
*/
|
|
68
83
|
private fileExists;
|
|
84
|
+
/**
|
|
85
|
+
* Use LLM to generate an actionable correction instruction from raw validation issues.
|
|
86
|
+
* Instead of echoing "requires using playwright__ but Worker did not use these tools",
|
|
87
|
+
* produces something like "Use the filesystem tools to list ~/Library/Caches and report sizes".
|
|
88
|
+
*/
|
|
89
|
+
private generateCorrectionInstruction;
|
|
69
90
|
/**
|
|
70
91
|
* Validate file contents for common issues
|
|
71
92
|
*/
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"client-agent.d.ts","sourceRoot":"","sources":["../../src/core/client-agent.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAC9D,OAAO,EAAE,gBAAgB,EAAE,MAAM,0BAA0B,CAAC;
|
|
1
|
+
{"version":3,"file":"client-agent.d.ts","sourceRoot":"","sources":["../../src/core/client-agent.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAC9D,OAAO,EAAE,gBAAgB,EAAE,MAAM,0BAA0B,CAAC;AAI5D,OAAO,EAAE,YAAY,EAAE,MAAM,mBAAmB,CAAC;AAgBjD,oBAAY,gBAAgB;IAC1B,OAAO,YAAY,CAAM,0BAA0B;IACnD,QAAQ,aAAa,CAAI,mCAAmC;IAC5D,QAAQ,aAAa;CACtB;AAED,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,eAAe,GAAG,mBAAmB,GAAG,SAAS,GAAG,cAAc,GAAG,aAAa,GAAG,OAAO,CAAC;IACnG,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,YAAY,CAAC,EAAE,MAAM,EAAE,CAAC;CACzB;AAED,MAAM,WAAW,gBAAgB;IAC/B,QAAQ,EAAE,OAAO,CAAC;IAClB,eAAe,EAAE,OAAO,CAAC;IACzB,MAAM,EAAE,MAAM,EAAE,CAAC;IACjB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,gBAAgB,EAAE,gBAAgB,CAAC;CACpC;AAED,qBAAa,WAAW;IACtB,OAAO,CAAC,YAAY,CAAoB;IACxC,OAAO,CAAC,UAAU,CAAmB;IACrC,OAAO,CAAC,SAAS,CAAY;IAC7B,OAAO,CAAC,YAAY,CAAa;IAGjC,OAAO,CAAC,QAAQ,CAAC,aAAa,CAS5B;gBAEU,YAAY,EAAE,iBAAiB,EAAE,UAAU,EAAE,gBAAgB;IAMzE;;;;OAIG;YACW,uBAAuB;IA+GrC;;;OAGG;IACH,yBAAyB,CAAC,WAAW,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,GAAG,gBAAgB;IAuCpF;;;OAGG;IACH,iBAAiB,CAAC,WAAW,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,GAAG,WAAW,EAAE;IAgEzE;;OAEG;IACG,QAAQ,CACZ,WAAW,EAAE,MAAM,EACnB,QAAQ,EAAE,MAAM,EAAE,EAClB,YAAY,EAAE,YAAY,EAC1B,gBAAgB,CAAC,EAAE,gBAAgB,GAClC,OAAO,CAAC,gBAAgB,CAAC;IAyG5B;;;OAGG;YACW,4BAA4B;IA8D1C;;;;;OAKG;YACW,sBAAsB;IAyEpC;;OAEG;IACH,OAAO,CAAC,eAAe;IA+CvB;;OAEG;YACW,eAAe;IAgD7B;;OAEG;YACW,UAAU;IAYxB;;;;OAIG;YACW,6BAA6B;IAiD3C;;OAEG;YACW,oBAAoB;IAqClC;;OAEG;YACW,iBAAiB;IA+B/B;;OAEG;IACH,oBAAoB,IAAI,IAAI;CAG7B"}
|
|
@@ -10,6 +10,7 @@
|
|
|
10
10
|
* - THOROUGH: Complex/testing requests OR failures → full E2E validation with tools
|
|
11
11
|
*/
|
|
12
12
|
import { logger } from '../utils/logger.js';
|
|
13
|
+
import { orchestrationLogger } from '../utils/orchestration-logger.js';
|
|
13
14
|
export var InvolvementLevel;
|
|
14
15
|
(function (InvolvementLevel) {
|
|
15
16
|
InvolvementLevel["MINIMAL"] = "minimal";
|
|
@@ -38,6 +39,109 @@ export class ClientAgent {
|
|
|
38
39
|
this.mcpClient = mcpManager.getClient();
|
|
39
40
|
}
|
|
40
41
|
/**
|
|
42
|
+
* Use LLM to analyze the task and determine involvement level + requirements.
|
|
43
|
+
* Replaces keyword-based determineInvolvementLevel() and parseRequirements()
|
|
44
|
+
* with semantic understanding that avoids false positives.
|
|
45
|
+
*/
|
|
46
|
+
async analyzeTaskRequirements(userMessage, subtasks, workerResult) {
|
|
47
|
+
const workerContext = workerResult
|
|
48
|
+
? `\nWorker Result (first 500 chars): ${workerResult.result.substring(0, 500)}\nWorker Success: ${workerResult.success}\nTools Used: ${workerResult.toolsUsed.join(', ') || 'none'} (${workerResult.toolsUsed.length} total)`
|
|
49
|
+
: '';
|
|
50
|
+
const analysisPrompt = `You are a task analyst for a software agent system. Analyze the user's request to determine:
|
|
51
|
+
1. How deeply to validate the Worker's output (involvement level)
|
|
52
|
+
2. What specific requirements the task implies
|
|
53
|
+
|
|
54
|
+
USER MESSAGE: ${userMessage}
|
|
55
|
+
|
|
56
|
+
SUBTASKS: ${JSON.stringify(subtasks)}
|
|
57
|
+
${workerContext}
|
|
58
|
+
|
|
59
|
+
PREVIOUS FAILURE COUNT: ${this.failureCount}
|
|
60
|
+
|
|
61
|
+
Respond ONLY with valid JSON in this exact format (no other text):
|
|
62
|
+
{
|
|
63
|
+
"involvementLevel": "<MINIMAL | STANDARD | THOROUGH>",
|
|
64
|
+
"involvementReasoning": "<brief explanation of why this level>",
|
|
65
|
+
"requirements": [
|
|
66
|
+
{
|
|
67
|
+
"type": "<file_creation | file_modification | testing | verification | information | other>",
|
|
68
|
+
"description": "<what this requirement entails>",
|
|
69
|
+
"filePath": null,
|
|
70
|
+
"mustUseTools": null
|
|
71
|
+
}
|
|
72
|
+
]
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
CRITICAL RULES for involvementLevel:
|
|
76
|
+
- THOROUGH: ONLY when the user EXPLICITLY asks to test or verify something in a browser/test environment, OR after previous failures (failureCount > 0), OR for complex multi-file operations (>3 subtasks)
|
|
77
|
+
- MINIMAL: Information-only requests (listing files, explaining code, describing something, answering questions) where no files are created or modified
|
|
78
|
+
- STANDARD: Default for creation, modification, or action tasks
|
|
79
|
+
|
|
80
|
+
CRITICAL RULES for requirements:
|
|
81
|
+
- "testing" type with mustUseTools ["playwright__"] should ONLY be set when the user wants browser-based testing or verification of a web page/HTML/UI
|
|
82
|
+
- Words like "check", "find", "verify" in the context of system administration (disk space, processes, configurations) are NOT browser testing — they are "information" type WITHOUT playwright tools
|
|
83
|
+
- Examples:
|
|
84
|
+
- "check how much space my caches use" = type "information", mustUseTools null
|
|
85
|
+
- "find the biggest files in Downloads" = type "information", mustUseTools null
|
|
86
|
+
- "test the login page in the browser" = type "testing", mustUseTools ["playwright__"]
|
|
87
|
+
- "create index.html and verify it works" = type "file_creation" + type "testing" with playwright
|
|
88
|
+
- "make sure the server is running" = type "verification", mustUseTools null
|
|
89
|
+
- If no specific tools are required, set mustUseTools to null
|
|
90
|
+
- Always include at least one requirement entry`;
|
|
91
|
+
try {
|
|
92
|
+
const response = await this.orchestrator.chat({
|
|
93
|
+
messages: [
|
|
94
|
+
{ role: 'system', content: 'You are a strict task analyst. Respond only with valid JSON.' },
|
|
95
|
+
{ role: 'user', content: analysisPrompt },
|
|
96
|
+
],
|
|
97
|
+
temperature: 0.1,
|
|
98
|
+
});
|
|
99
|
+
const jsonMatch = response.content.match(/\{[\s\S]*\}/);
|
|
100
|
+
if (jsonMatch) {
|
|
101
|
+
const analysis = JSON.parse(jsonMatch[0]);
|
|
102
|
+
// Map string to enum
|
|
103
|
+
let level;
|
|
104
|
+
switch (analysis.involvementLevel?.toUpperCase()) {
|
|
105
|
+
case 'THOROUGH':
|
|
106
|
+
level = InvolvementLevel.THOROUGH;
|
|
107
|
+
break;
|
|
108
|
+
case 'MINIMAL':
|
|
109
|
+
level = InvolvementLevel.MINIMAL;
|
|
110
|
+
break;
|
|
111
|
+
default:
|
|
112
|
+
level = InvolvementLevel.STANDARD;
|
|
113
|
+
}
|
|
114
|
+
// Hard override: escalate to THOROUGH after failures
|
|
115
|
+
if (this.failureCount > 0 && level !== InvolvementLevel.THOROUGH) {
|
|
116
|
+
logger.debug(`[Client] Escalating to THOROUGH due to ${this.failureCount} previous failures`);
|
|
117
|
+
level = InvolvementLevel.THOROUGH;
|
|
118
|
+
}
|
|
119
|
+
const requirements = (analysis.requirements || []).map((req) => ({
|
|
120
|
+
type: req.type || 'other',
|
|
121
|
+
description: req.description || 'General task completion',
|
|
122
|
+
filePath: req.filePath || undefined,
|
|
123
|
+
mustUseTools: req.mustUseTools || undefined,
|
|
124
|
+
}));
|
|
125
|
+
// Ensure at least one requirement
|
|
126
|
+
if (requirements.length === 0) {
|
|
127
|
+
requirements.push({ type: 'other', description: 'General task completion' });
|
|
128
|
+
}
|
|
129
|
+
logger.info(`[Client] LLM task analysis: ${level.toUpperCase()} involvement — ${analysis.involvementReasoning || 'no reasoning provided'}`);
|
|
130
|
+
logger.debug(`[Client] Requirements: ${JSON.stringify(requirements.map(r => ({ type: r.type, desc: r.description })))}`);
|
|
131
|
+
return { level, requirements };
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
catch (error) {
|
|
135
|
+
logger.warn(`[Client] LLM task analysis failed: ${error}, falling back to STANDARD`);
|
|
136
|
+
}
|
|
137
|
+
// Fallback: STANDARD with generic requirement
|
|
138
|
+
return {
|
|
139
|
+
level: this.failureCount > 0 ? InvolvementLevel.THOROUGH : InvolvementLevel.STANDARD,
|
|
140
|
+
requirements: [{ type: 'other', description: 'General task completion' }],
|
|
141
|
+
};
|
|
142
|
+
}
|
|
143
|
+
/**
|
|
144
|
+
* @deprecated Use analyzeTaskRequirements() instead. Kept for reference.
|
|
41
145
|
* Determine involvement level based on user request complexity
|
|
42
146
|
*/
|
|
43
147
|
determineInvolvementLevel(userMessage, subtasks) {
|
|
@@ -73,6 +177,7 @@ export class ClientAgent {
|
|
|
73
177
|
return InvolvementLevel.STANDARD;
|
|
74
178
|
}
|
|
75
179
|
/**
|
|
180
|
+
* @deprecated Use analyzeTaskRequirements() instead. Kept for reference.
|
|
76
181
|
* Parse requirements from user message
|
|
77
182
|
*/
|
|
78
183
|
parseRequirements(userMessage, subtasks) {
|
|
@@ -135,8 +240,9 @@ export class ClientAgent {
|
|
|
135
240
|
* Validate Worker's work at appropriate involvement level
|
|
136
241
|
*/
|
|
137
242
|
async validate(userMessage, subtasks, workerResult, involvementLevel) {
|
|
138
|
-
|
|
139
|
-
const requirements = this.
|
|
243
|
+
// Use LLM-based analysis instead of keyword matching
|
|
244
|
+
const { level: analyzedLevel, requirements } = await this.analyzeTaskRequirements(userMessage, subtasks, workerResult);
|
|
245
|
+
let level = involvementLevel || analyzedLevel;
|
|
140
246
|
// CRITICAL: Use LLM to check for unjustified failure claims BEFORE other validation
|
|
141
247
|
// Even in MINIMAL mode, we must catch agents giving up without trying
|
|
142
248
|
const failureAnalysis = await this.analyzeForUnjustifiedFailure(userMessage, workerResult);
|
|
@@ -150,6 +256,8 @@ export class ClientAgent {
|
|
|
150
256
|
}
|
|
151
257
|
}
|
|
152
258
|
logger.info(`[Client] Validating with ${level.toUpperCase()} involvement`);
|
|
259
|
+
// Log the analysis for orchestration tracing
|
|
260
|
+
orchestrationLogger.logClientAnalysis(level, requirements.length, `Requirements: ${requirements.map(r => r.type).join(', ')}`);
|
|
153
261
|
const result = {
|
|
154
262
|
approved: false,
|
|
155
263
|
requirementsMet: false,
|
|
@@ -162,8 +270,26 @@ export class ClientAgent {
|
|
|
162
270
|
`REJECTED: Worker claims failure without sufficient evidence. ${failureAnalysis.reasoning}`;
|
|
163
271
|
result.issues.push(failureIssue);
|
|
164
272
|
}
|
|
273
|
+
// Layer 0.5: Result-vs-Evidence Coherence Check (always done, catches hallucinated accomplishments)
|
|
274
|
+
// This detects when the Worker claims to have done things its tool usage doesn't support
|
|
275
|
+
const coherenceAnalysis = await this.analyzeResultCoherence(userMessage, workerResult);
|
|
276
|
+
orchestrationLogger.logClientCoherenceCheck(coherenceAnalysis.isCoherent, coherenceAnalysis.unsupportedClaims, coherenceAnalysis.reasoning);
|
|
277
|
+
if (!coherenceAnalysis.isCoherent) {
|
|
278
|
+
logger.info(`[Client] Detected incoherent result — Worker claims not supported by tool usage`);
|
|
279
|
+
logger.info(`[Client] Unsupported claims: ${coherenceAnalysis.unsupportedClaims.join('; ')}`);
|
|
280
|
+
logger.info(`[Client] Coherence reasoning: ${coherenceAnalysis.reasoning}`);
|
|
281
|
+
const coherenceIssue = coherenceAnalysis.suggestedAction ||
|
|
282
|
+
`REJECTED: Worker's result contains claims not supported by its actual tool usage. ${coherenceAnalysis.reasoning}`;
|
|
283
|
+
result.issues.push(coherenceIssue);
|
|
284
|
+
// Escalate involvement level — the Worker is hallucinating, we need stricter validation
|
|
285
|
+
if (level === InvolvementLevel.MINIMAL) {
|
|
286
|
+
level = InvolvementLevel.STANDARD;
|
|
287
|
+
result.involvementLevel = level;
|
|
288
|
+
logger.info(`[Client] Escalating to STANDARD due to incoherent result`);
|
|
289
|
+
}
|
|
290
|
+
}
|
|
165
291
|
// Layer 1: Process Validation (always done, no tools needed)
|
|
166
|
-
const processValidation = this.validateProcess(requirements, workerResult);
|
|
292
|
+
const processValidation = this.validateProcess(requirements, workerResult, level);
|
|
167
293
|
if (processValidation.issues.length > 0) {
|
|
168
294
|
result.issues.push(...processValidation.issues);
|
|
169
295
|
}
|
|
@@ -178,12 +304,15 @@ export class ClientAgent {
|
|
|
178
304
|
result.requirementsMet = result.issues.length === 0;
|
|
179
305
|
result.approved = result.requirementsMet;
|
|
180
306
|
if (!result.approved && result.issues.length > 0) {
|
|
181
|
-
|
|
307
|
+
// Generate an actionable correction instruction via LLM instead of echoing raw validation issues
|
|
308
|
+
result.nextAction = await this.generateCorrectionInstruction(userMessage, subtasks.join('; '), result.issues, workerResult);
|
|
182
309
|
this.failureCount++;
|
|
183
310
|
}
|
|
184
311
|
else {
|
|
185
312
|
this.failureCount = 0; // Reset on success
|
|
186
313
|
}
|
|
314
|
+
// Log the validation outcome
|
|
315
|
+
orchestrationLogger.logClientValidation(result.approved, result.issues, result.nextAction);
|
|
187
316
|
return result;
|
|
188
317
|
}
|
|
189
318
|
/**
|
|
@@ -245,28 +374,106 @@ Respond ONLY with the JSON, no other text.`;
|
|
|
245
374
|
reasoning: 'Analysis could not be performed',
|
|
246
375
|
};
|
|
247
376
|
}
|
|
377
|
+
/**
|
|
378
|
+
* Use LLM to cross-check the Worker's result claims against its actual tool usage.
|
|
379
|
+
* Catches hallucinated accomplishments — e.g., Worker claims "I inspected all source files
|
|
380
|
+
* and found no bugs" but only used list_directory and never read a single file.
|
|
381
|
+
* This runs at ALL involvement levels including MINIMAL.
|
|
382
|
+
*/
|
|
383
|
+
async analyzeResultCoherence(userMessage, workerResult) {
|
|
384
|
+
// Skip coherence check if Worker used no tools (caught by zero-tools guard)
|
|
385
|
+
// or if Worker explicitly failed (caught by failure analysis)
|
|
386
|
+
if (workerResult.toolsUsed.length === 0 || !workerResult.success) {
|
|
387
|
+
return { isCoherent: true, reasoning: 'Skipped — handled by other checks', unsupportedClaims: [] };
|
|
388
|
+
}
|
|
389
|
+
const toolList = workerResult.toolsUsed.join(', ');
|
|
390
|
+
const uniqueTools = [...new Set(workerResult.toolsUsed)].join(', ');
|
|
391
|
+
const coherencePrompt = `You are a strict quality auditor. Your job is to determine whether a Worker agent's result is SUPPORTED by the tools it actually used, or whether it fabricated/hallucinated claims.
|
|
392
|
+
|
|
393
|
+
USER REQUEST: ${userMessage}
|
|
394
|
+
|
|
395
|
+
WORKER RESULT:
|
|
396
|
+
${workerResult.result.substring(0, 1000)}
|
|
397
|
+
|
|
398
|
+
TOOLS ACTUALLY USED (in order): ${toolList}
|
|
399
|
+
UNIQUE TOOLS USED: ${uniqueTools}
|
|
400
|
+
TOTAL TOOL CALLS: ${workerResult.toolsUsed.length}
|
|
401
|
+
|
|
402
|
+
CRITICAL: Analyze whether the claims in the Worker's result are supported by the tools it used.
|
|
403
|
+
|
|
404
|
+
Key tool semantics:
|
|
405
|
+
- filesystem__list_directory / filesystem__directory_tree / filesystem__search_files = only shows file/folder NAMES and structure, does NOT read file contents
|
|
406
|
+
- filesystem__read_text_file / filesystem__read_file = actually reads file content
|
|
407
|
+
- mcp-shell-server__shell_exec = runs a shell command (check what command was likely run based on context)
|
|
408
|
+
- playwright__* = browser automation tools
|
|
409
|
+
|
|
410
|
+
Common hallucination patterns to detect:
|
|
411
|
+
1. Worker claims to have "inspected", "reviewed", "analyzed", or "scanned" source code but never used read_text_file — it only listed directories
|
|
412
|
+
2. Worker claims "no bugs found" or "code is correct" without reading any source files
|
|
413
|
+
3. Worker claims to have run tests or builds but no shell_exec tool was used (or the result doesn't reference actual test output)
|
|
414
|
+
4. Worker provides specific code details (line numbers, variable names, function logic) without having read the files containing them
|
|
415
|
+
5. Worker makes definitive statements about code quality, correctness, or behavior without having read the relevant code
|
|
416
|
+
|
|
417
|
+
Respond ONLY with valid JSON:
|
|
418
|
+
{
|
|
419
|
+
"isCoherent": <true if ALL claims in the result are supported by actual tool usage, false if any claims are fabricated>,
|
|
420
|
+
"reasoning": "<brief explanation of what's supported vs what's fabricated>",
|
|
421
|
+
"unsupportedClaims": ["<list each specific claim that is NOT supported by tool usage>"],
|
|
422
|
+
"suggestedAction": "<if not coherent, what the Worker should actually do — e.g., 'Read the source files using filesystem__read_text_file before claiming to have analyzed them'>"
|
|
423
|
+
}`;
|
|
424
|
+
try {
|
|
425
|
+
const response = await this.orchestrator.chat({
|
|
426
|
+
messages: [
|
|
427
|
+
{ role: 'system', content: 'You are a strict quality auditor. Respond only with valid JSON.' },
|
|
428
|
+
{ role: 'user', content: coherencePrompt },
|
|
429
|
+
],
|
|
430
|
+
temperature: 0.1,
|
|
431
|
+
});
|
|
432
|
+
const jsonMatch = response.content.match(/\{[\s\S]*\}/);
|
|
433
|
+
if (jsonMatch) {
|
|
434
|
+
const analysis = JSON.parse(jsonMatch[0]);
|
|
435
|
+
return analysis;
|
|
436
|
+
}
|
|
437
|
+
}
|
|
438
|
+
catch (error) {
|
|
439
|
+
logger.debug(`[Client] Failed to analyze result coherence: ${error}`);
|
|
440
|
+
}
|
|
441
|
+
// Default: assume coherent if analysis fails
|
|
442
|
+
return {
|
|
443
|
+
isCoherent: true,
|
|
444
|
+
reasoning: 'Coherence analysis could not be performed',
|
|
445
|
+
unsupportedClaims: [],
|
|
446
|
+
};
|
|
447
|
+
}
|
|
248
448
|
/**
|
|
249
449
|
* Layer 1: Process Validation (metadata only, no tools)
|
|
250
450
|
*/
|
|
251
|
-
validateProcess(requirements, workerResult) {
|
|
451
|
+
validateProcess(requirements, workerResult, involvementLevel) {
|
|
252
452
|
const issues = [];
|
|
453
|
+
// Zero-tools guard: if Worker used no tools at all and this is not a purely
|
|
454
|
+
// informational/conversational task, reject immediately
|
|
455
|
+
if (workerResult.toolsUsed.length === 0 && involvementLevel !== InvolvementLevel.MINIMAL) {
|
|
456
|
+
const isConversational = requirements.every(r => r.type === 'information' || r.type === 'other');
|
|
457
|
+
if (!isConversational) {
|
|
458
|
+
issues.push('Worker completed the task without using any tools. ' +
|
|
459
|
+
'The task requires actual tool usage (file operations, shell commands, browser actions, etc.) — ' +
|
|
460
|
+
'not just generating a text response. Use the available tools to actually perform the task.');
|
|
461
|
+
}
|
|
462
|
+
}
|
|
253
463
|
// Check if Worker used appropriate tools for requirements
|
|
254
464
|
for (const req of requirements) {
|
|
255
465
|
if (req.mustUseTools && req.mustUseTools.length > 0) {
|
|
256
466
|
const usedRequiredTool = req.mustUseTools.some(requiredTool => {
|
|
257
|
-
// Match if required tool is contained in actual tool name (e.g., 'playwright__browser' matches 'playwright__browser_navigate')
|
|
258
|
-
// OR if actual tool exactly matches the required tool
|
|
259
467
|
return workerResult.toolsUsed.some(actualTool => actualTool === requiredTool || actualTool.startsWith(requiredTool) || requiredTool.startsWith(actualTool.split('__')[0] + '__'));
|
|
260
468
|
});
|
|
261
469
|
if (!usedRequiredTool) {
|
|
262
|
-
issues.push(`${req.description} requires using ${req.mustUseTools.join(' or ')} but Worker did not use these tools
|
|
263
|
-
`Create a subtask specifically for ${req.description.toLowerCase()}.`);
|
|
470
|
+
issues.push(`${req.description} requires using ${req.mustUseTools.join(' or ')} but Worker did not use these tools.`);
|
|
264
471
|
}
|
|
265
472
|
}
|
|
266
473
|
}
|
|
267
474
|
// Check if Worker succeeded
|
|
268
475
|
if (!workerResult.success) {
|
|
269
|
-
issues.push(
|
|
476
|
+
issues.push('Worker did not complete the task successfully. The task needs to be retried with appropriate tool usage.');
|
|
270
477
|
}
|
|
271
478
|
return { issues };
|
|
272
479
|
}
|
|
@@ -330,6 +537,52 @@ Respond ONLY with the JSON, no other text.`;
|
|
|
330
537
|
return false;
|
|
331
538
|
}
|
|
332
539
|
}
|
|
540
|
+
/**
|
|
541
|
+
* Use LLM to generate an actionable correction instruction from raw validation issues.
|
|
542
|
+
* Instead of echoing "requires using playwright__ but Worker did not use these tools",
|
|
543
|
+
* produces something like "Use the filesystem tools to list ~/Library/Caches and report sizes".
|
|
544
|
+
*/
|
|
545
|
+
async generateCorrectionInstruction(userMessage, subtask, issues, workerResult) {
|
|
546
|
+
const correctionPrompt = `You are generating a correction instruction for a Worker agent that failed to complete a task properly.
|
|
547
|
+
|
|
548
|
+
ORIGINAL USER REQUEST: ${userMessage}
|
|
549
|
+
|
|
550
|
+
SUBTASK THAT WAS ATTEMPTED: ${subtask}
|
|
551
|
+
|
|
552
|
+
VALIDATION ISSUES FOUND:
|
|
553
|
+
${issues.map((issue, i) => `${i + 1}. ${issue}`).join('\n')}
|
|
554
|
+
|
|
555
|
+
WORKER'S RESULT (first 300 chars): ${workerResult.result.substring(0, 300)}
|
|
556
|
+
TOOLS WORKER USED: ${workerResult.toolsUsed.join(', ') || 'none'}
|
|
557
|
+
|
|
558
|
+
Generate a CLEAR, ACTIONABLE instruction that tells the Worker exactly what to do to fix the issues.
|
|
559
|
+
The instruction should:
|
|
560
|
+
- Be a direct command (e.g., "Use the run_process tool to execute 'du -sh ~/Library/Caches' and report the output")
|
|
561
|
+
- Reference specific tools or actions the Worker should take
|
|
562
|
+
- Be concise (1-2 sentences)
|
|
563
|
+
- NOT include validation jargon like "mustUseTools", "requirements", or "involvement level"
|
|
564
|
+
- NOT be a generic statement like "retry the task" — be specific about WHAT to do
|
|
565
|
+
|
|
566
|
+
Respond ONLY with the correction instruction text, nothing else.`;
|
|
567
|
+
try {
|
|
568
|
+
const response = await this.orchestrator.chat({
|
|
569
|
+
messages: [
|
|
570
|
+
{ role: 'system', content: 'You generate concise, actionable correction instructions for a Worker agent. Respond with only the instruction text.' },
|
|
571
|
+
{ role: 'user', content: correctionPrompt },
|
|
572
|
+
],
|
|
573
|
+
temperature: 0.1,
|
|
574
|
+
});
|
|
575
|
+
const instruction = response.content.trim();
|
|
576
|
+
if (instruction.length > 10 && instruction.length < 500) {
|
|
577
|
+
return instruction;
|
|
578
|
+
}
|
|
579
|
+
}
|
|
580
|
+
catch (error) {
|
|
581
|
+
logger.warn(`[Client] Failed to generate correction instruction: ${error}`);
|
|
582
|
+
}
|
|
583
|
+
// Fallback: use first issue with a prefix
|
|
584
|
+
return `Fix the following issue and retry: ${issues[0]}`;
|
|
585
|
+
}
|
|
333
586
|
/**
|
|
334
587
|
* Validate file contents for common issues
|
|
335
588
|
*/
|