joonecli 0.1.1 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/__tests__/config.test.js +1 -0
- package/dist/__tests__/config.test.js.map +1 -1
- package/dist/__tests__/installHostDeps.test.js +45 -0
- package/dist/__tests__/installHostDeps.test.js.map +1 -0
- package/dist/__tests__/whitelistedBackend.test.js +18 -0
- package/dist/__tests__/whitelistedBackend.test.js.map +1 -0
- package/dist/cli/config.d.ts +2 -0
- package/dist/cli/config.js +1 -0
- package/dist/cli/config.js.map +1 -1
- package/dist/cli/index.js +84 -97
- package/dist/cli/index.js.map +1 -1
- package/dist/commands/builtinCommands.js +6 -6
- package/dist/commands/builtinCommands.js.map +1 -1
- package/dist/commands/commandRegistry.d.ts +3 -1
- package/dist/commands/commandRegistry.js.map +1 -1
- package/dist/core/agentLoop.d.ts +11 -28
- package/dist/core/agentLoop.js +68 -229
- package/dist/core/agentLoop.js.map +1 -1
- package/dist/core/compactor.js +2 -2
- package/dist/core/compactor.js.map +1 -1
- package/dist/core/contextGuard.d.ts +5 -0
- package/dist/core/contextGuard.js +30 -3
- package/dist/core/contextGuard.js.map +1 -1
- package/dist/core/events.d.ts +45 -0
- package/dist/core/events.js +8 -0
- package/dist/core/events.js.map +1 -0
- package/dist/core/promptBuilder.js.map +1 -1
- package/dist/core/sessionStore.js +3 -2
- package/dist/core/sessionStore.js.map +1 -1
- package/dist/core/tokenCounter.d.ts +8 -1
- package/dist/core/tokenCounter.js +28 -0
- package/dist/core/tokenCounter.js.map +1 -1
- package/dist/hitl/bridge.js +1 -27
- package/dist/hitl/bridge.js.map +1 -1
- package/dist/middleware/loopDetection.d.ts +7 -23
- package/dist/middleware/loopDetection.js +38 -42
- package/dist/middleware/loopDetection.js.map +1 -1
- package/dist/sandbox/whitelistedBackend.d.ts +5 -0
- package/dist/sandbox/whitelistedBackend.js +27 -0
- package/dist/sandbox/whitelistedBackend.js.map +1 -0
- package/dist/tools/askUser.d.ts +12 -3
- package/dist/tools/askUser.js +16 -28
- package/dist/tools/askUser.js.map +1 -1
- package/dist/tools/bashTool.d.ts +11 -0
- package/dist/tools/bashTool.js +51 -0
- package/dist/tools/bashTool.js.map +1 -0
- package/dist/tools/index.d.ts +15 -27
- package/dist/tools/index.js +9 -181
- package/dist/tools/index.js.map +1 -1
- package/dist/tools/installHostDeps.d.ts +8 -0
- package/dist/tools/installHostDeps.js +44 -0
- package/dist/tools/installHostDeps.js.map +1 -0
- package/dist/tracing/sessionTracer.d.ts +1 -0
- package/dist/tracing/sessionTracer.js +4 -1
- package/dist/tracing/sessionTracer.js.map +1 -1
- package/dist/ui/App.js +116 -55
- package/dist/ui/App.js.map +1 -1
- package/dist/ui/components/ActionLog.d.ts +7 -0
- package/dist/ui/components/ActionLog.js +63 -0
- package/dist/ui/components/ActionLog.js.map +1 -0
- package/dist/ui/components/FileBrowser.d.ts +2 -0
- package/dist/ui/components/FileBrowser.js +41 -0
- package/dist/ui/components/FileBrowser.js.map +1 -0
- package/dist/ui/components/MessageBubble.js +1 -1
- package/dist/ui/components/MessageBubble.js.map +1 -1
- package/package.json +8 -5
- package/AGENTS.md +0 -56
- package/Handover.md +0 -115
- package/PROGRESS.md +0 -160
- package/dist/__tests__/m55.test.js +0 -160
- package/dist/__tests__/m55.test.js.map +0 -1
- package/dist/__tests__/middleware.test.js +0 -169
- package/dist/__tests__/middleware.test.js.map +0 -1
- package/dist/__tests__/optimizations.test.d.ts +0 -1
- package/dist/__tests__/optimizations.test.js +0 -136
- package/dist/__tests__/optimizations.test.js.map +0 -1
- package/dist/__tests__/security.test.d.ts +0 -1
- package/dist/__tests__/security.test.js +0 -86
- package/dist/__tests__/security.test.js.map +0 -1
- package/dist/__tests__/streaming.test.d.ts +0 -1
- package/dist/__tests__/streaming.test.js +0 -71
- package/dist/__tests__/streaming.test.js.map +0 -1
- package/dist/__tests__/toolRouter.test.d.ts +0 -1
- package/dist/__tests__/toolRouter.test.js +0 -37
- package/dist/__tests__/toolRouter.test.js.map +0 -1
- package/dist/__tests__/tools.test.d.ts +0 -1
- package/dist/__tests__/tools.test.js +0 -112
- package/dist/__tests__/tools.test.js.map +0 -1
- package/dist/core/subAgent.d.ts +0 -56
- package/dist/core/subAgent.js +0 -240
- package/dist/core/subAgent.js.map +0 -1
- package/dist/debug_google.d.ts +0 -1
- package/dist/debug_google.js +0 -23
- package/dist/debug_google.js.map +0 -1
- package/dist/middleware/commandSanitizer.d.ts +0 -18
- package/dist/middleware/commandSanitizer.js +0 -50
- package/dist/middleware/commandSanitizer.js.map +0 -1
- package/dist/middleware/permission.d.ts +0 -17
- package/dist/middleware/permission.js +0 -59
- package/dist/middleware/permission.js.map +0 -1
- package/dist/middleware/pipeline.d.ts +0 -31
- package/dist/middleware/pipeline.js +0 -62
- package/dist/middleware/pipeline.js.map +0 -1
- package/dist/middleware/preCompletion.d.ts +0 -29
- package/dist/middleware/preCompletion.js +0 -82
- package/dist/middleware/preCompletion.js.map +0 -1
- package/dist/middleware/types.d.ts +0 -40
- package/dist/middleware/types.js +0 -8
- package/dist/middleware/types.js.map +0 -1
- package/dist/skills/loader.d.ts +0 -55
- package/dist/skills/loader.js +0 -132
- package/dist/skills/loader.js.map +0 -1
- package/dist/skills/tools.d.ts +0 -5
- package/dist/skills/tools.js +0 -78
- package/dist/skills/tools.js.map +0 -1
- package/dist/test_cache.d.ts +0 -1
- package/dist/test_cache.js +0 -55
- package/dist/test_cache.js.map +0 -1
- package/dist/test_google.d.ts +0 -1
- package/dist/test_google.js +0 -36
- package/dist/test_google.js.map +0 -1
- package/dist/tools/browser.d.ts +0 -19
- package/dist/tools/browser.js +0 -111
- package/dist/tools/browser.js.map +0 -1
- package/dist/tools/registry.d.ts +0 -31
- package/dist/tools/registry.js +0 -168
- package/dist/tools/registry.js.map +0 -1
- package/dist/tools/router.d.ts +0 -34
- package/dist/tools/router.js +0 -75
- package/dist/tools/router.js.map +0 -1
- package/dist/tools/security.d.ts +0 -28
- package/dist/tools/security.js +0 -183
- package/dist/tools/security.js.map +0 -1
- package/dist/tools/spawnAgent.d.ts +0 -19
- package/dist/tools/spawnAgent.js +0 -130
- package/dist/tools/spawnAgent.js.map +0 -1
- package/dist/tools/webSearch.d.ts +0 -6
- package/dist/tools/webSearch.js +0 -120
- package/dist/tools/webSearch.js.map +0 -1
- package/docs/01_insights_and_patterns.md +0 -27
- package/docs/02_edge_cases_and_mitigations.md +0 -143
- package/docs/03_initial_implementation_plan.md +0 -66
- package/docs/04_tech_stack_proposal.md +0 -20
- package/docs/05_prd.md +0 -87
- package/docs/06_user_stories.md +0 -72
- package/docs/07_system_architecture.md +0 -138
- package/docs/08_roadmap.md +0 -200
- package/e2b/Dockerfile +0 -26
- package/src/__tests__/bootstrap.test.ts +0 -111
- package/src/__tests__/config.test.ts +0 -97
- package/src/__tests__/m55.test.ts +0 -238
- package/src/__tests__/middleware.test.ts +0 -219
- package/src/__tests__/modelFactory.test.ts +0 -63
- package/src/__tests__/optimizations.test.ts +0 -201
- package/src/__tests__/promptBuilder.test.ts +0 -141
- package/src/__tests__/sandbox.test.ts +0 -102
- package/src/__tests__/security.test.ts +0 -122
- package/src/__tests__/streaming.test.ts +0 -82
- package/src/__tests__/toolRouter.test.ts +0 -52
- package/src/__tests__/tools.test.ts +0 -146
- package/src/__tests__/tracing.test.ts +0 -196
- package/src/agents/agentRegistry.ts +0 -69
- package/src/agents/agentSpec.ts +0 -67
- package/src/agents/builtinAgents.ts +0 -142
- package/src/cli/config.ts +0 -124
- package/src/cli/index.ts +0 -742
- package/src/cli/modelFactory.ts +0 -174
- package/src/cli/postinstall.ts +0 -28
- package/src/cli/providers.ts +0 -107
- package/src/commands/builtinCommands.ts +0 -293
- package/src/commands/commandRegistry.ts +0 -194
- package/src/core/agentLoop.d.ts.map +0 -1
- package/src/core/agentLoop.ts +0 -312
- package/src/core/autoSave.ts +0 -95
- package/src/core/compactor.ts +0 -252
- package/src/core/contextGuard.ts +0 -129
- package/src/core/errors.ts +0 -202
- package/src/core/promptBuilder.d.ts.map +0 -1
- package/src/core/promptBuilder.ts +0 -139
- package/src/core/reasoningRouter.ts +0 -121
- package/src/core/retry.ts +0 -75
- package/src/core/sessionResumer.ts +0 -90
- package/src/core/sessionStore.ts +0 -216
- package/src/core/subAgent.ts +0 -339
- package/src/core/tokenCounter.ts +0 -64
- package/src/evals/dataset.ts +0 -67
- package/src/evals/evaluator.ts +0 -81
- package/src/hitl/bridge.ts +0 -160
- package/src/middleware/commandSanitizer.ts +0 -60
- package/src/middleware/loopDetection.ts +0 -63
- package/src/middleware/permission.ts +0 -72
- package/src/middleware/pipeline.ts +0 -75
- package/src/middleware/preCompletion.ts +0 -94
- package/src/middleware/types.ts +0 -45
- package/src/sandbox/bootstrap.ts +0 -121
- package/src/sandbox/manager.ts +0 -239
- package/src/sandbox/sync.ts +0 -157
- package/src/skills/loader.ts +0 -143
- package/src/skills/tools.ts +0 -99
- package/src/skills/types.ts +0 -13
- package/src/test_cache.ts +0 -72
- package/src/tools/askUser.ts +0 -47
- package/src/tools/browser.ts +0 -137
- package/src/tools/index.d.ts.map +0 -1
- package/src/tools/index.ts +0 -237
- package/src/tools/registry.ts +0 -198
- package/src/tools/router.ts +0 -78
- package/src/tools/security.ts +0 -220
- package/src/tools/spawnAgent.ts +0 -158
- package/src/tools/webSearch.ts +0 -142
- package/src/tracing/analyzer.ts +0 -265
- package/src/tracing/langsmith.ts +0 -63
- package/src/tracing/sessionTracer.ts +0 -202
- package/src/tracing/types.ts +0 -49
- package/src/types/valyu.d.ts +0 -37
- package/src/ui/App.tsx +0 -404
- package/src/ui/components/HITLPrompt.tsx +0 -119
- package/src/ui/components/Header.tsx +0 -51
- package/src/ui/components/MessageBubble.tsx +0 -46
- package/src/ui/components/StatusBar.tsx +0 -138
- package/src/ui/components/StreamingText.tsx +0 -48
- package/src/ui/components/ToolCallPanel.tsx +0 -80
- package/tests/commands/commands.test.ts +0 -356
- package/tests/core/compactor.test.ts +0 -217
- package/tests/core/retryAndErrors.test.ts +0 -164
- package/tests/core/sessionResumer.test.ts +0 -95
- package/tests/core/sessionStore.test.ts +0 -84
- package/tests/core/stability.test.ts +0 -165
- package/tests/core/subAgent.test.ts +0 -238
- package/tests/hitl/hitlBridge.test.ts +0 -115
- package/tsconfig.json +0 -16
- package/vitest.config.ts +0 -10
- package/vitest.out +0 -48
- /package/dist/__tests__/{m55.test.d.ts → installHostDeps.test.d.ts} +0 -0
- /package/dist/__tests__/{middleware.test.d.ts → whitelistedBackend.test.d.ts} +0 -0
package/src/core/subAgent.ts
DELETED
|
@@ -1,339 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Sub-Agent Manager
|
|
3
|
-
*
|
|
4
|
-
* Spawns and orchestrates isolated sub-agents for scoped tasks.
|
|
5
|
-
* Each sub-agent gets its own ExecutionHarness with a separate conversation
|
|
6
|
-
* history. Only the final SubAgentResult is returned to the main agent,
|
|
7
|
-
* discarding the sub-agent's internal conversation to save context.
|
|
8
|
-
*
|
|
9
|
-
* Supports both synchronous (blocking) and asynchronous (non-blocking) modes.
|
|
10
|
-
*
|
|
11
|
-
* Safety:
|
|
12
|
-
* - Depth limit of 1: sub-agents cannot spawn other sub-agents
|
|
13
|
-
* - maxTurns cap per agent prevents doom-loops
|
|
14
|
-
* - Concurrent async agent cap of 3 prevents resource exhaustion
|
|
15
|
-
* - Per-agent token budget tracking
|
|
16
|
-
*/
|
|
17
|
-
|
|
18
|
-
import { AgentSpec, SubAgentResult } from "../agents/agentSpec.js";
|
|
19
|
-
import { AgentRegistry } from "../agents/agentRegistry.js";
|
|
20
|
-
import { DynamicToolInterface, ToolResult } from "../tools/index.js";
|
|
21
|
-
import { ContextState } from "../core/promptBuilder.js";
|
|
22
|
-
import { countMessageTokens } from "../core/tokenCounter.js";
|
|
23
|
-
import { BaseMessage, HumanMessage, AIMessage, SystemMessage, ToolMessage } from "@langchain/core/messages";
|
|
24
|
-
import { BaseChatModel } from "@langchain/core/language_models/chat_models";
|
|
25
|
-
import { Runnable } from "@langchain/core/runnables";
|
|
26
|
-
|
|
27
|
-
// ─── Constants ──────────────────────────────────────────────────────────────────
|
|
28
|
-
|
|
29
|
-
const DEFAULT_MAX_TURNS = 10;
|
|
30
|
-
const MAX_CONCURRENT_ASYNC = 3;
|
|
31
|
-
const ASYNC_EXPIRY_MS = 5 * 60 * 1000; // 5 minutes
|
|
32
|
-
|
|
33
|
-
// ─── Async Task State ───────────────────────────────────────────────────────────
|
|
34
|
-
|
|
35
|
-
interface AsyncTask {
|
|
36
|
-
taskId: string;
|
|
37
|
-
agentName: string;
|
|
38
|
-
taskDescription: string;
|
|
39
|
-
promise: Promise<SubAgentResult>;
|
|
40
|
-
result?: SubAgentResult;
|
|
41
|
-
startedAt: number;
|
|
42
|
-
completed: boolean;
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
// ─── SubAgentManager ────────────────────────────────────────────────────────────
|
|
46
|
-
|
|
47
|
-
export class SubAgentManager {
|
|
48
|
-
private registry: AgentRegistry;
|
|
49
|
-
private allTools: DynamicToolInterface[];
|
|
50
|
-
private llm: Runnable | BaseChatModel;
|
|
51
|
-
private asyncTasks: Map<string, AsyncTask> = new Map();
|
|
52
|
-
private taskCounter = 0;
|
|
53
|
-
|
|
54
|
-
constructor(
|
|
55
|
-
registry: AgentRegistry,
|
|
56
|
-
tools: DynamicToolInterface[],
|
|
57
|
-
llm: Runnable | BaseChatModel,
|
|
58
|
-
) {
|
|
59
|
-
this.registry = registry;
|
|
60
|
-
// Filter out spawn_agent and check_agent to prevent recursive nesting (depth-1 limit)
|
|
61
|
-
this.allTools = tools.filter(
|
|
62
|
-
(t) => t.name !== "spawn_agent" && t.name !== "check_agent"
|
|
63
|
-
);
|
|
64
|
-
this.llm = llm;
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
/**
|
|
68
|
-
* Synchronous spawn — blocks until the sub-agent finishes.
|
|
69
|
-
*/
|
|
70
|
-
async spawn(
|
|
71
|
-
agentName: string,
|
|
72
|
-
task: string,
|
|
73
|
-
maxTurnsOverride?: number
|
|
74
|
-
): Promise<SubAgentResult> {
|
|
75
|
-
const spec = this.registry.get(agentName);
|
|
76
|
-
if (!spec) {
|
|
77
|
-
return this.makeErrorResult(
|
|
78
|
-
agentName,
|
|
79
|
-
task,
|
|
80
|
-
`Unknown agent "${agentName}". Available: ${this.registry.getNames().join(", ")}`
|
|
81
|
-
);
|
|
82
|
-
}
|
|
83
|
-
|
|
84
|
-
return this.runAgent(spec, task, maxTurnsOverride);
|
|
85
|
-
}
|
|
86
|
-
|
|
87
|
-
/**
|
|
88
|
-
* Asynchronous spawn — returns immediately with a taskId.
|
|
89
|
-
* The main agent can poll with getResult(taskId).
|
|
90
|
-
*/
|
|
91
|
-
async spawnAsync(
|
|
92
|
-
agentName: string,
|
|
93
|
-
task: string,
|
|
94
|
-
maxTurnsOverride?: number
|
|
95
|
-
): Promise<string> {
|
|
96
|
-
// Cap concurrent async agents
|
|
97
|
-
this.cleanupExpired();
|
|
98
|
-
const activeCount = Array.from(this.asyncTasks.values())
|
|
99
|
-
.filter((t) => !t.completed).length;
|
|
100
|
-
|
|
101
|
-
if (activeCount >= MAX_CONCURRENT_ASYNC) {
|
|
102
|
-
throw new Error(
|
|
103
|
-
`Maximum concurrent async agents reached (${MAX_CONCURRENT_ASYNC}). ` +
|
|
104
|
-
`Wait for existing tasks to complete or check them with check_agent.`
|
|
105
|
-
);
|
|
106
|
-
}
|
|
107
|
-
|
|
108
|
-
const spec = this.registry.get(agentName);
|
|
109
|
-
if (!spec) {
|
|
110
|
-
throw new Error(
|
|
111
|
-
`Unknown agent "${agentName}". Available: ${this.registry.getNames().join(", ")}`
|
|
112
|
-
);
|
|
113
|
-
}
|
|
114
|
-
|
|
115
|
-
const taskId = `task_${++this.taskCounter}_${Date.now()}`;
|
|
116
|
-
|
|
117
|
-
const promise = this.runAgent(spec, task, maxTurnsOverride).then((result) => {
|
|
118
|
-
const asyncTask = this.asyncTasks.get(taskId);
|
|
119
|
-
if (asyncTask) {
|
|
120
|
-
asyncTask.result = result;
|
|
121
|
-
asyncTask.completed = true;
|
|
122
|
-
}
|
|
123
|
-
return result;
|
|
124
|
-
});
|
|
125
|
-
|
|
126
|
-
this.asyncTasks.set(taskId, {
|
|
127
|
-
taskId,
|
|
128
|
-
agentName,
|
|
129
|
-
taskDescription: task,
|
|
130
|
-
promise,
|
|
131
|
-
startedAt: Date.now(),
|
|
132
|
-
completed: false,
|
|
133
|
-
});
|
|
134
|
-
|
|
135
|
-
return taskId;
|
|
136
|
-
}
|
|
137
|
-
|
|
138
|
-
/**
|
|
139
|
-
* Check the status or get the result of an async task.
|
|
140
|
-
* Returns the result if completed, or a status message if still running.
|
|
141
|
-
*/
|
|
142
|
-
async getResult(taskId: string): Promise<SubAgentResult | string> {
|
|
143
|
-
const asyncTask = this.asyncTasks.get(taskId);
|
|
144
|
-
if (!asyncTask) {
|
|
145
|
-
return `Unknown task ID: ${taskId}. No such async task exists.`;
|
|
146
|
-
}
|
|
147
|
-
|
|
148
|
-
if (asyncTask.completed && asyncTask.result) {
|
|
149
|
-
// Clean up the task
|
|
150
|
-
this.asyncTasks.delete(taskId);
|
|
151
|
-
return asyncTask.result;
|
|
152
|
-
}
|
|
153
|
-
|
|
154
|
-
const elapsed = Math.round((Date.now() - asyncTask.startedAt) / 1000);
|
|
155
|
-
return `Task "${asyncTask.taskDescription}" (agent: ${asyncTask.agentName}) ` +
|
|
156
|
-
`is still running (${elapsed}s elapsed).`;
|
|
157
|
-
}
|
|
158
|
-
|
|
159
|
-
/**
|
|
160
|
-
* Core execution loop for a sub-agent.
|
|
161
|
-
* Creates an isolated conversation and runs a multi-turn loop.
|
|
162
|
-
*/
|
|
163
|
-
private async runAgent(
|
|
164
|
-
spec: AgentSpec,
|
|
165
|
-
task: string,
|
|
166
|
-
maxTurnsOverride?: number
|
|
167
|
-
): Promise<SubAgentResult> {
|
|
168
|
-
const startTime = Date.now();
|
|
169
|
-
const maxTurns = maxTurnsOverride ?? spec.maxTurns ?? DEFAULT_MAX_TURNS;
|
|
170
|
-
|
|
171
|
-
// Resolve available tools for this agent
|
|
172
|
-
const agentTools = spec.tools
|
|
173
|
-
? this.allTools.filter((t) => spec.tools!.includes(t.name))
|
|
174
|
-
: this.allTools;
|
|
175
|
-
|
|
176
|
-
// Create isolated conversation history
|
|
177
|
-
const systemPrompt = new HumanMessage(
|
|
178
|
-
`<system-directive>\n${spec.systemPrompt}\n\n--- Current Task ---\n${task}\n</system-directive>`
|
|
179
|
-
);
|
|
180
|
-
|
|
181
|
-
const history: BaseMessage[] = [
|
|
182
|
-
new HumanMessage(task),
|
|
183
|
-
];
|
|
184
|
-
|
|
185
|
-
let promptTokens = 0;
|
|
186
|
-
let completionTokens = 0;
|
|
187
|
-
let toolCallCount = 0;
|
|
188
|
-
let turnsUsed = 0;
|
|
189
|
-
let lastResponse = "";
|
|
190
|
-
const filesModified: Set<string> = new Set();
|
|
191
|
-
|
|
192
|
-
// Build LangChain tool declarations for binding
|
|
193
|
-
const toolDeclarations = agentTools.map((t) => ({
|
|
194
|
-
name: t.name,
|
|
195
|
-
description: t.description,
|
|
196
|
-
schema: t.schema,
|
|
197
|
-
}));
|
|
198
|
-
|
|
199
|
-
try {
|
|
200
|
-
// Bind tools to the LLM for this sub-agent session
|
|
201
|
-
let boundLlm: any;
|
|
202
|
-
if ("bindTools" in this.llm && typeof (this.llm as any).bindTools === "function") {
|
|
203
|
-
boundLlm = (this.llm as any).bindTools(toolDeclarations);
|
|
204
|
-
} else {
|
|
205
|
-
boundLlm = this.llm;
|
|
206
|
-
}
|
|
207
|
-
|
|
208
|
-
for (let turn = 0; turn < maxTurns; turn++) {
|
|
209
|
-
turnsUsed++;
|
|
210
|
-
|
|
211
|
-
// Build the full message array
|
|
212
|
-
const messages = [systemPrompt, ...history];
|
|
213
|
-
const stepPromptTokens = countMessageTokens(messages);
|
|
214
|
-
promptTokens += stepPromptTokens;
|
|
215
|
-
|
|
216
|
-
// Invoke the LLM
|
|
217
|
-
const response = await boundLlm.invoke(messages);
|
|
218
|
-
const responseTokens = countMessageTokens([response as AIMessage]);
|
|
219
|
-
completionTokens += responseTokens;
|
|
220
|
-
|
|
221
|
-
const aiMessage = response as AIMessage;
|
|
222
|
-
history.push(aiMessage);
|
|
223
|
-
|
|
224
|
-
// Extract text content
|
|
225
|
-
if (typeof aiMessage.content === "string" && aiMessage.content.length > 0) {
|
|
226
|
-
lastResponse = aiMessage.content;
|
|
227
|
-
}
|
|
228
|
-
|
|
229
|
-
// Check for tool calls
|
|
230
|
-
if (!aiMessage.tool_calls || aiMessage.tool_calls.length === 0) {
|
|
231
|
-
// No tool calls — agent is done
|
|
232
|
-
break;
|
|
233
|
-
}
|
|
234
|
-
|
|
235
|
-
// Execute tool calls
|
|
236
|
-
for (const call of aiMessage.tool_calls) {
|
|
237
|
-
if (!call.id) continue;
|
|
238
|
-
|
|
239
|
-
const tool = agentTools.find((t) => t.name === call.name);
|
|
240
|
-
if (!tool) {
|
|
241
|
-
history.push(new ToolMessage({
|
|
242
|
-
content: `Error: Tool "${call.name}" is not available to this sub-agent.`,
|
|
243
|
-
tool_call_id: call.id,
|
|
244
|
-
}));
|
|
245
|
-
continue;
|
|
246
|
-
}
|
|
247
|
-
|
|
248
|
-
toolCallCount++;
|
|
249
|
-
|
|
250
|
-
try {
|
|
251
|
-
const result = await tool.execute(call.args);
|
|
252
|
-
const output = typeof result === "string" ? result : (result as ToolResult).content;
|
|
253
|
-
|
|
254
|
-
// Track file modifications
|
|
255
|
-
if (call.name === "write_file" && call.args?.path) {
|
|
256
|
-
filesModified.add(call.args.path);
|
|
257
|
-
}
|
|
258
|
-
|
|
259
|
-
history.push(new ToolMessage({
|
|
260
|
-
content: output,
|
|
261
|
-
tool_call_id: call.id,
|
|
262
|
-
}));
|
|
263
|
-
} catch (err: any) {
|
|
264
|
-
history.push(new ToolMessage({
|
|
265
|
-
content: `Tool error: ${err.message}`,
|
|
266
|
-
tool_call_id: call.id,
|
|
267
|
-
}));
|
|
268
|
-
}
|
|
269
|
-
}
|
|
270
|
-
}
|
|
271
|
-
|
|
272
|
-
// Determine outcome
|
|
273
|
-
const outcome = turnsUsed >= maxTurns ? "partial" : "success";
|
|
274
|
-
|
|
275
|
-
return {
|
|
276
|
-
agentName: spec.name,
|
|
277
|
-
taskDescription: task,
|
|
278
|
-
outcome,
|
|
279
|
-
result: lastResponse || "(Sub-agent produced no text output)",
|
|
280
|
-
filesModified: Array.from(filesModified),
|
|
281
|
-
toolCallCount,
|
|
282
|
-
tokenUsage: { prompt: promptTokens, completion: completionTokens },
|
|
283
|
-
duration: Date.now() - startTime,
|
|
284
|
-
turnsUsed,
|
|
285
|
-
};
|
|
286
|
-
} catch (error: any) {
|
|
287
|
-
return this.makeErrorResult(
|
|
288
|
-
spec.name,
|
|
289
|
-
task,
|
|
290
|
-
`Sub-agent error: ${error.message}`,
|
|
291
|
-
{ promptTokens, completionTokens, toolCallCount, turnsUsed, startTime, filesModified }
|
|
292
|
-
);
|
|
293
|
-
}
|
|
294
|
-
}
|
|
295
|
-
|
|
296
|
-
/**
|
|
297
|
-
* Creates an error SubAgentResult.
|
|
298
|
-
*/
|
|
299
|
-
private makeErrorResult(
|
|
300
|
-
agentName: string,
|
|
301
|
-
task: string,
|
|
302
|
-
errorMsg: string,
|
|
303
|
-
partial?: {
|
|
304
|
-
promptTokens: number;
|
|
305
|
-
completionTokens: number;
|
|
306
|
-
toolCallCount: number;
|
|
307
|
-
turnsUsed: number;
|
|
308
|
-
startTime: number;
|
|
309
|
-
filesModified: Set<string>;
|
|
310
|
-
}
|
|
311
|
-
): SubAgentResult {
|
|
312
|
-
return {
|
|
313
|
-
agentName,
|
|
314
|
-
taskDescription: task,
|
|
315
|
-
outcome: "failure",
|
|
316
|
-
result: errorMsg,
|
|
317
|
-
filesModified: partial ? Array.from(partial.filesModified) : [],
|
|
318
|
-
toolCallCount: partial?.toolCallCount ?? 0,
|
|
319
|
-
tokenUsage: {
|
|
320
|
-
prompt: partial?.promptTokens ?? 0,
|
|
321
|
-
completion: partial?.completionTokens ?? 0,
|
|
322
|
-
},
|
|
323
|
-
duration: partial ? Date.now() - partial.startTime : 0,
|
|
324
|
-
turnsUsed: partial?.turnsUsed ?? 0,
|
|
325
|
-
};
|
|
326
|
-
}
|
|
327
|
-
|
|
328
|
-
/**
|
|
329
|
-
* Clean up expired async tasks.
|
|
330
|
-
*/
|
|
331
|
-
private cleanupExpired(): void {
|
|
332
|
-
const now = Date.now();
|
|
333
|
-
for (const [taskId, task] of this.asyncTasks.entries()) {
|
|
334
|
-
if (now - task.startedAt > ASYNC_EXPIRY_MS) {
|
|
335
|
-
this.asyncTasks.delete(taskId);
|
|
336
|
-
}
|
|
337
|
-
}
|
|
338
|
-
}
|
|
339
|
-
}
|
package/src/core/tokenCounter.ts
DELETED
|
@@ -1,64 +0,0 @@
|
|
|
1
|
-
import { BaseMessage } from "@langchain/core/messages";
|
|
2
|
-
|
|
3
|
-
/**
|
|
4
|
-
* Lightweight token counter using character-based heuristic.
|
|
5
|
-
*
|
|
6
|
-
* Approximation: ~4 characters per token for English text.
|
|
7
|
-
* This avoids a dependency on tiktoken while being accurate enough
|
|
8
|
-
* for capacity threshold decisions (~90% accuracy for English).
|
|
9
|
-
*
|
|
10
|
-
* For production accuracy, swap to tiktoken with the appropriate
|
|
11
|
-
* model-specific encoding.
|
|
12
|
-
*/
|
|
13
|
-
|
|
14
|
-
const CHARS_PER_TOKEN = 4;
|
|
15
|
-
|
|
16
|
-
/**
|
|
17
|
-
* Estimates the token count for a string.
|
|
18
|
-
*/
|
|
19
|
-
export function estimateTokens(text: string): number {
|
|
20
|
-
return Math.ceil(text.length / CHARS_PER_TOKEN);
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
/**
|
|
24
|
-
* Estimates the total token count across a list of messages.
|
|
25
|
-
*/
|
|
26
|
-
export function countMessageTokens(messages: BaseMessage[]): number {
|
|
27
|
-
let total = 0;
|
|
28
|
-
|
|
29
|
-
for (const msg of messages) {
|
|
30
|
-
if (typeof msg.content === "string") {
|
|
31
|
-
total += estimateTokens(msg.content);
|
|
32
|
-
} else if (Array.isArray(msg.content)) {
|
|
33
|
-
// Handle multi-part messages (text + tool calls)
|
|
34
|
-
for (const part of msg.content) {
|
|
35
|
-
if (typeof part === "string") {
|
|
36
|
-
total += estimateTokens(part);
|
|
37
|
-
} else if ("text" in part && typeof part.text === "string") {
|
|
38
|
-
total += estimateTokens(part.text);
|
|
39
|
-
}
|
|
40
|
-
}
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
// Account for role/name overhead (~4 tokens per message)
|
|
44
|
-
total += 4;
|
|
45
|
-
}
|
|
46
|
-
|
|
47
|
-
return total;
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
/**
|
|
51
|
-
* Checks if the message history is approaching the context window limit.
|
|
52
|
-
*
|
|
53
|
-
* @param messages - The current conversation messages.
|
|
54
|
-
* @param maxTokens - The model's context window size.
|
|
55
|
-
* @param threshold - Fraction of capacity to trigger compaction (default: 0.8 = 80%).
|
|
56
|
-
*/
|
|
57
|
-
export function isNearCapacity(
|
|
58
|
-
messages: BaseMessage[],
|
|
59
|
-
maxTokens: number,
|
|
60
|
-
threshold = 0.8
|
|
61
|
-
): boolean {
|
|
62
|
-
const used = countMessageTokens(messages);
|
|
63
|
-
return used >= maxTokens * threshold;
|
|
64
|
-
}
|
package/src/evals/dataset.ts
DELETED
|
@@ -1,67 +0,0 @@
|
|
|
1
|
-
import { Client } from "langsmith";
|
|
2
|
-
|
|
3
|
-
const client = new Client();
|
|
4
|
-
const DATASET_NAME = "joone-baseline-v1";
|
|
5
|
-
|
|
6
|
-
/**
|
|
7
|
-
* Definition of our baseline evaluation dataset.
|
|
8
|
-
*/
|
|
9
|
-
const BASELINE_EXAMPLES = [
|
|
10
|
-
{
|
|
11
|
-
inputs: {
|
|
12
|
-
instruction: "Write a python script that calculates the 10th fibonacci number and saves the result to /workspace/fib_result.txt",
|
|
13
|
-
},
|
|
14
|
-
outputs: {
|
|
15
|
-
expected_file: "/workspace/fib_result.txt",
|
|
16
|
-
expected_content: "55\n", // 0,1,1,2,3,5,8,13,21,34,55
|
|
17
|
-
},
|
|
18
|
-
},
|
|
19
|
-
{
|
|
20
|
-
inputs: {
|
|
21
|
-
instruction: `Create a TypeScript file at /workspace/math.ts with a function 'add(a: number, b: number)' that returns their sum.
|
|
22
|
-
Then write a test file at /workspace/math.test.ts using the 'node:assert' module.
|
|
23
|
-
Finally, use the bash tool to run 'npx tsx math.test.ts' to verify it passes.`,
|
|
24
|
-
},
|
|
25
|
-
outputs: {
|
|
26
|
-
expected_file: "/workspace/math.ts",
|
|
27
|
-
expected_test_execution: true,
|
|
28
|
-
},
|
|
29
|
-
},
|
|
30
|
-
{
|
|
31
|
-
inputs: {
|
|
32
|
-
instruction: "List all files in the current project root directory and save the output to /workspace/ls.txt",
|
|
33
|
-
},
|
|
34
|
-
outputs: {
|
|
35
|
-
expected_file: "/workspace/ls.txt",
|
|
36
|
-
},
|
|
37
|
-
},
|
|
38
|
-
];
|
|
39
|
-
|
|
40
|
-
/**
|
|
41
|
-
* Programmatically creates the baseline dataset in LangSmith if it doesn't already exist.
|
|
42
|
-
*/
|
|
43
|
-
export async function ensureBaselineDataset(): Promise<string> {
|
|
44
|
-
try {
|
|
45
|
-
const dataset = await client.readDataset({ datasetName: DATASET_NAME });
|
|
46
|
-
console.log(`[Eval] Dataset '${DATASET_NAME}' already exists (ID: ${dataset.id}).`);
|
|
47
|
-
return DATASET_NAME;
|
|
48
|
-
} catch (error: any) {
|
|
49
|
-
if (error?.message?.includes("not found") || error?.status === 404) {
|
|
50
|
-
console.log(`[Eval] Creating dataset '${DATASET_NAME}' from scratch...`);
|
|
51
|
-
const dataset = await client.createDataset(DATASET_NAME, {
|
|
52
|
-
description: "Baseline tasks to evaluate Joone's core sandbox, tool routing, and reasoning precision.",
|
|
53
|
-
});
|
|
54
|
-
|
|
55
|
-
for (const example of BASELINE_EXAMPLES) {
|
|
56
|
-
await client.createExample(
|
|
57
|
-
example.inputs,
|
|
58
|
-
example.outputs,
|
|
59
|
-
{ datasetId: dataset.id }
|
|
60
|
-
);
|
|
61
|
-
}
|
|
62
|
-
console.log(`[Eval] Successfully seeded dataset '${DATASET_NAME}' with ${BASELINE_EXAMPLES.length} examples.`);
|
|
63
|
-
return DATASET_NAME;
|
|
64
|
-
}
|
|
65
|
-
throw error;
|
|
66
|
-
}
|
|
67
|
-
}
|
package/src/evals/evaluator.ts
DELETED
|
@@ -1,81 +0,0 @@
|
|
|
1
|
-
import { Run, Example } from "langsmith";
|
|
2
|
-
import { EvaluationResult } from "langsmith/evaluation";
|
|
3
|
-
|
|
4
|
-
/**
|
|
5
|
-
* Custom evaluator: Success Validator
|
|
6
|
-
* Checks if the agent crashed or returned a fatal error trace.
|
|
7
|
-
*/
|
|
8
|
-
export async function successEvaluator(run: Run, example?: Example): Promise<EvaluationResult> {
|
|
9
|
-
// If the trace has an error field, the harness threw an unhandled exception.
|
|
10
|
-
const isError = !!run.error;
|
|
11
|
-
|
|
12
|
-
return {
|
|
13
|
-
key: "execution_success",
|
|
14
|
-
score: isError ? 0 : 1,
|
|
15
|
-
comment: isError ? run.error : "Agent completed execution loop cleanly.",
|
|
16
|
-
};
|
|
17
|
-
}
|
|
18
|
-
|
|
19
|
-
/**
|
|
20
|
-
* Custom evaluator: Cache Efficiency
|
|
21
|
-
* Checks if the run utilized Anthropic Prompt Caching efficiently (> 70%).
|
|
22
|
-
*
|
|
23
|
-
* Note: Requires the LLM to emit `cache_creation_input_tokens` and `cache_read_input_tokens`
|
|
24
|
-
* in its usage metadata payload, which is currently extracted by the SessionTracer.
|
|
25
|
-
*/
|
|
26
|
-
export async function cacheEfficiencyEvaluator(run: Run, example?: Example): Promise<EvaluationResult> {
|
|
27
|
-
const outputs = run.outputs || {};
|
|
28
|
-
const metrics = outputs.metrics; // We will attach metrics to the harness output
|
|
29
|
-
|
|
30
|
-
if (!metrics || !metrics.totalTokens) {
|
|
31
|
-
return {
|
|
32
|
-
key: "cache_hit_rate",
|
|
33
|
-
score: null, // N/A (e.g., OpenAI or missing data)
|
|
34
|
-
comment: "No token metrics found in run output.",
|
|
35
|
-
};
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
const creationTokens = metrics.cacheCreationTokens || 0;
|
|
39
|
-
const readTokens = metrics.cacheReadTokens || 0;
|
|
40
|
-
|
|
41
|
-
if (creationTokens === 0 && readTokens === 0) {
|
|
42
|
-
return {
|
|
43
|
-
key: "cache_hit_rate",
|
|
44
|
-
score: 0,
|
|
45
|
-
comment: "Prompt caching is not active or not supported by this provider.",
|
|
46
|
-
};
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
const totalInputTokens = metrics.promptTokens;
|
|
50
|
-
const hitRate = readTokens / totalInputTokens;
|
|
51
|
-
|
|
52
|
-
return {
|
|
53
|
-
key: "cache_hit_rate",
|
|
54
|
-
score: hitRate,
|
|
55
|
-
comment: `Cache Hit Rate: ${(hitRate * 100).toFixed(1)}% (${readTokens} / ${totalInputTokens} input tokens)`,
|
|
56
|
-
};
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
/**
|
|
60
|
-
* Custom evaluator: Output Artifact Check
|
|
61
|
-
* Verifies if the file the agent was instructed to create actually exists
|
|
62
|
-
* in the Sandbox after execution.
|
|
63
|
-
*/
|
|
64
|
-
export async function filePresenceEvaluator(run: Run, example?: Example): Promise<EvaluationResult> {
|
|
65
|
-
if (!example?.outputs?.expected_file) {
|
|
66
|
-
return { key: "expected_file_created", score: null };
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
// The harnessed output should return a manifest or state snapshot we can verify
|
|
70
|
-
const outputs = run.outputs || {};
|
|
71
|
-
const fileManifest = outputs.fileManifest || [];
|
|
72
|
-
|
|
73
|
-
const expectedFile = example.outputs.expected_file;
|
|
74
|
-
const didCreate = fileManifest.includes(expectedFile);
|
|
75
|
-
|
|
76
|
-
return {
|
|
77
|
-
key: "expected_file_created",
|
|
78
|
-
score: didCreate ? 1 : 0,
|
|
79
|
-
comment: didCreate ? `File ${expectedFile} created successfully.` : `Failed to create expected file: ${expectedFile}`,
|
|
80
|
-
};
|
|
81
|
-
}
|