@bluecopa/harness 1.0.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. package/README.md +212 -117
  2. package/dist/arc/index.d.ts +796 -0
  3. package/dist/arc/index.js +2863 -0
  4. package/dist/arc/index.js.map +1 -0
  5. package/dist/observability/otel.d.ts +36 -0
  6. package/dist/observability/otel.js +73 -0
  7. package/dist/observability/otel.js.map +1 -0
  8. package/dist/shared-types-DRxnerLT.d.ts +138 -0
  9. package/dist/skills/index.d.ts +67 -0
  10. package/dist/skills/index.js +282 -0
  11. package/dist/skills/index.js.map +1 -0
  12. package/package.json +26 -2
  13. package/AGENTS.md +0 -18
  14. package/docs/guides/observability.md +0 -32
  15. package/docs/guides/providers.md +0 -51
  16. package/docs/guides/skills.md +0 -25
  17. package/docs/security/skill-sandbox-threat-model.md +0 -20
  18. package/src/agent/create-agent.ts +0 -884
  19. package/src/agent/create-tools.ts +0 -33
  20. package/src/agent/step-executor.ts +0 -15
  21. package/src/agent/types.ts +0 -57
  22. package/src/context/llm-compaction-strategy.ts +0 -37
  23. package/src/context/prepare-step.ts +0 -65
  24. package/src/context/token-tracker.ts +0 -26
  25. package/src/extracted/manifest.json +0 -10
  26. package/src/extracted/prompts/compaction.md +0 -5
  27. package/src/extracted/prompts/system.md +0 -5
  28. package/src/extracted/tools.json +0 -82
  29. package/src/hooks/hook-runner.ts +0 -22
  30. package/src/hooks/tool-wrappers.ts +0 -64
  31. package/src/interfaces/compaction-strategy.ts +0 -18
  32. package/src/interfaces/hooks.ts +0 -24
  33. package/src/interfaces/sandbox-provider.ts +0 -29
  34. package/src/interfaces/session-store.ts +0 -48
  35. package/src/interfaces/tool-provider.ts +0 -70
  36. package/src/loop/bridge.ts +0 -363
  37. package/src/loop/context-store.ts +0 -207
  38. package/src/loop/lcm-tool-loop.ts +0 -163
  39. package/src/loop/vercel-agent-loop.ts +0 -279
  40. package/src/observability/context.ts +0 -17
  41. package/src/observability/metrics.ts +0 -27
  42. package/src/observability/otel.ts +0 -105
  43. package/src/observability/tracing.ts +0 -13
  44. package/src/optimization/agent-evaluator.ts +0 -40
  45. package/src/optimization/config-serializer.ts +0 -16
  46. package/src/optimization/optimization-runner.ts +0 -39
  47. package/src/optimization/trace-collector.ts +0 -33
  48. package/src/permissions/permission-manager.ts +0 -34
  49. package/src/providers/composite-tool-provider.ts +0 -72
  50. package/src/providers/control-plane-e2b-executor.ts +0 -218
  51. package/src/providers/e2b-tool-provider.ts +0 -68
  52. package/src/providers/local-tool-provider.ts +0 -190
  53. package/src/providers/skill-sandbox-provider.ts +0 -46
  54. package/src/sessions/file-session-store.ts +0 -61
  55. package/src/sessions/in-memory-session-store.ts +0 -39
  56. package/src/sessions/session-manager.ts +0 -44
  57. package/src/skills/skill-loader.ts +0 -52
  58. package/src/skills/skill-manager.ts +0 -175
  59. package/src/skills/skill-router.ts +0 -99
  60. package/src/skills/skill-types.ts +0 -26
  61. package/src/subagents/subagent-manager.ts +0 -22
  62. package/src/subagents/task-tool.ts +0 -13
  63. package/tests/integration/agent-loop-basic.spec.ts +0 -56
  64. package/tests/integration/agent-skill-default-from-sandbox.spec.ts +0 -66
  65. package/tests/integration/concurrency-single-turn.spec.ts +0 -35
  66. package/tests/integration/otel-metrics-emission.spec.ts +0 -62
  67. package/tests/integration/otel-trace-propagation.spec.ts +0 -48
  68. package/tests/integration/parity-benchmark.spec.ts +0 -45
  69. package/tests/integration/provider-local-smoke.spec.ts +0 -63
  70. package/tests/integration/session-resume.spec.ts +0 -30
  71. package/tests/integration/skill-install-rollback.spec.ts +0 -64
  72. package/tests/integration/skill-sandbox-file-blob.spec.ts +0 -54
  73. package/tests/integration/skills-progressive-disclosure.spec.ts +0 -61
  74. package/tests/integration/streaming-compaction-boundary.spec.ts +0 -43
  75. package/tests/integration/structured-messages-agent.spec.ts +0 -265
  76. package/tests/integration/subagent-isolation.spec.ts +0 -24
  77. package/tests/security/skill-sandbox-isolation.spec.ts +0 -51
  78. package/tests/unit/create-tools-schema-parity.spec.ts +0 -22
  79. package/tests/unit/extracted-manifest.spec.ts +0 -41
  80. package/tests/unit/interfaces-contract.spec.ts +0 -101
  81. package/tests/unit/structured-messages.spec.ts +0 -176
  82. package/tests/unit/token-tracker.spec.ts +0 -22
  83. package/tsconfig.json +0 -14
  84. package/vitest.config.ts +0 -7
@@ -1,363 +0,0 @@
1
- import type { ToolProvider, ToolResult } from '../interfaces/tool-provider';
2
-
3
- // ── Request / response types ──
4
-
5
- export interface LlmRequest {
6
- id: string;
7
- model: string;
8
- prompt: string;
9
- }
10
-
11
- export interface WebFetchRequest {
12
- id: string;
13
- url: string;
14
- }
15
-
16
- export interface WebSearchRequest {
17
- id: string;
18
- query: string;
19
- }
20
-
21
- export interface AskUserRequest {
22
- id: string;
23
- question: string;
24
- }
25
-
26
- export interface TellUserRequest {
27
- id: string;
28
- message: string;
29
- }
30
-
31
- export type BridgeRequest =
32
- | { id: string; type: 'llm'; model: string; prompt: string }
33
- | { id: string; type: 'web_fetch'; url: string }
34
- | { id: string; type: 'web_search'; query: string }
35
- | { id: string; type: 'ask_user'; question: string }
36
- | { id: string; type: 'tell_user'; message: string };
37
-
38
- // ── Activity log entry ──
39
-
40
- export interface ActivityEntry {
41
- ts: number;
42
- event: 'tool_start' | 'tool_end';
43
- tool: string;
44
- input?: string;
45
- output?: string;
46
- exit_code?: number;
47
- model?: string;
48
- duration_ms?: number;
49
- }
50
-
51
- // ── Config ──
52
-
53
- export interface BridgeConfig {
54
- toolProvider: ToolProvider;
55
- bridgeDir?: string;
56
- pollIntervalMs?: number;
57
- onLlmRequest?(req: LlmRequest): Promise<string>;
58
- onWebFetchRequest?(req: WebFetchRequest): Promise<string>;
59
- onWebSearchRequest?(req: WebSearchRequest): Promise<string>;
60
- onAskUserRequest?(req: AskUserRequest): Promise<string>;
61
- onTellUserRequest?(req: TellUserRequest): Promise<void>;
62
- onActivity?(entry: ActivityEntry): void;
63
- }
64
-
65
- const DEFAULT_BRIDGE_DIR = '/var/run/bridge';
66
- const DEFAULT_POLL_INTERVAL = 200;
67
-
68
- // ── Python module template ──
69
-
70
- function bridgePythonModule(bridgeDir: string): string {
71
- return `"""
72
- harness_bridge — file-based IPC for sandbox REPL scripts.
73
-
74
- All external I/O (LLM calls, web fetch, user interaction) is routed through
75
- request/response files that the harness polls and fulfills.
76
-
77
- Every operation is logged to activity.jsonl for real-time observability.
78
- """
79
- import json, time, os, uuid, subprocess
80
-
81
- BRIDGE_DIR = ${JSON.stringify(bridgeDir)}
82
- ACTIVITY_FILE = os.path.join(BRIDGE_DIR, "activity.jsonl")
83
- REQUESTS_FILE = os.path.join(BRIDGE_DIR, "requests.jsonl")
84
-
85
- def _log_activity(event, detail):
86
- entry = {"ts": time.time(), "event": event, **detail}
87
- with open(ACTIVITY_FILE, "a") as f:
88
- f.write(json.dumps(entry) + "\\n")
89
-
90
- def _call(req_type, payload):
91
- req_id = str(uuid.uuid4())[:8]
92
- _log_activity("tool_start", {"tool": req_type, **{k: str(v)[:200] for k, v in payload.items()}})
93
- start = time.time()
94
- with open(REQUESTS_FILE, "a") as f:
95
- f.write(json.dumps({"id": req_id, "type": req_type, **payload}) + "\\n")
96
- resp_file = os.path.join(BRIDGE_DIR, f"resp_{req_id}.json")
97
- while not os.path.exists(resp_file):
98
- time.sleep(0.1)
99
- with open(resp_file) as f:
100
- result = json.load(f)["output"]
101
- elapsed = int((time.time() - start) * 1000)
102
- _log_activity("tool_end", {"tool": req_type, "output": str(result)[:500], "duration_ms": elapsed})
103
- return result
104
-
105
- def llm_query(prompt, model="claude-sonnet-4-5"):
106
- """Send a prompt to an LLM. The harness fulfills this via its API key."""
107
- return _call("llm", {"model": model, "prompt": prompt})
108
-
109
- def web_fetch(url):
110
- """Fetch a URL. The harness fulfills this (sandbox has no network)."""
111
- return _call("web_fetch", {"url": url})
112
-
113
- def web_search(query):
114
- """Web search. The harness fulfills this (sandbox has no network)."""
115
- return _call("web_search", {"query": query})
116
-
117
- def ask_user(question):
118
- """Ask the user a question. The harness prompts in the terminal."""
119
- return _call("ask_user", {"question": question})
120
-
121
- def tell_user(message):
122
- """Display a message to the user. The harness renders it."""
123
- _call("tell_user", {"message": message})
124
-
125
- def bash(command):
126
- """Run a shell command locally in the sandbox."""
127
- _log_activity("tool_start", {"tool": "bash", "input": command[:200]})
128
- start = time.time()
129
- r = subprocess.run(command, shell=True, capture_output=True, text=True, timeout=300)
130
- output = r.stdout if r.returncode == 0 else f"ERROR (exit {r.returncode}): {r.stderr}"
131
- elapsed = int((time.time() - start) * 1000)
132
- _log_activity("tool_end", {"tool": "bash", "exit_code": r.returncode, "output": output[:1000], "duration_ms": elapsed})
133
- return output
134
-
135
- def read_file(path):
136
- """Read a file from the sandbox filesystem."""
137
- _log_activity("tool_start", {"tool": "read_file", "input": path})
138
- start = time.time()
139
- try:
140
- with open(path) as f:
141
- content = f.read()
142
- elapsed = int((time.time() - start) * 1000)
143
- _log_activity("tool_end", {"tool": "read_file", "output": f"{len(content)} chars", "duration_ms": elapsed})
144
- return content
145
- except Exception as e:
146
- elapsed = int((time.time() - start) * 1000)
147
- _log_activity("tool_end", {"tool": "read_file", "output": f"ERROR: {e}", "duration_ms": elapsed})
148
- return f"ERROR: {e}"
149
-
150
- def write_file(path, content):
151
- """Write a file to the sandbox filesystem."""
152
- _log_activity("tool_start", {"tool": "write_file", "input": f"{path} ({len(content)} chars)"})
153
- start = time.time()
154
- try:
155
- os.makedirs(os.path.dirname(path) or ".", exist_ok=True)
156
- with open(path, "w") as f:
157
- f.write(content)
158
- elapsed = int((time.time() - start) * 1000)
159
- _log_activity("tool_end", {"tool": "write_file", "output": "ok", "duration_ms": elapsed})
160
- except Exception as e:
161
- elapsed = int((time.time() - start) * 1000)
162
- _log_activity("tool_end", {"tool": "write_file", "output": f"ERROR: {e}", "duration_ms": elapsed})
163
- raise
164
- `;
165
- }
166
-
167
- // ── Bridge implementation ──
168
-
169
- export class SandboxBridge {
170
- private readonly tp: ToolProvider;
171
- private readonly bridgeDir: string;
172
- private readonly pollInterval: number;
173
- private readonly config: BridgeConfig;
174
- private processedLineCount = 0;
175
- private activityLineCount = 0;
176
-
177
- constructor(config: BridgeConfig) {
178
- this.config = config;
179
- this.tp = config.toolProvider;
180
- this.bridgeDir = config.bridgeDir ?? DEFAULT_BRIDGE_DIR;
181
- this.pollInterval = config.pollIntervalMs ?? DEFAULT_POLL_INTERVAL;
182
- }
183
-
184
- /** Inject the bridge Python module and create the bridge directory in the sandbox. */
185
- async setup(): Promise<void> {
186
- await this.tp.bash(`mkdir -p ${this.bridgeDir}`);
187
- await this.tp.writeFile(
188
- `${this.bridgeDir}/harness_bridge.py`,
189
- bridgePythonModule(this.bridgeDir)
190
- );
191
- // Clear any stale files from a previous run
192
- await this.tp.bash(`rm -f ${this.bridgeDir}/requests.jsonl ${this.bridgeDir}/activity.jsonl ${this.bridgeDir}/resp_*.json`);
193
- this.processedLineCount = 0;
194
- this.activityLineCount = 0;
195
- }
196
-
197
- /**
198
- * Run the REPL script and poll for bridge requests until it completes.
199
- * Returns the script's stdout/stderr.
200
- */
201
- async pollUntilComplete(scriptPath: string): Promise<string> {
202
- // Start the script (long-running — use large timeout)
203
- const scriptPromise = this.tp.bash(
204
- `cd /workspace 2>/dev/null; PYTHONPATH=${this.bridgeDir}:$PYTHONPATH python3 ${scriptPath} 2>&1`,
205
- { timeout: 600_000 }
206
- );
207
-
208
- let scriptDone = false;
209
- let scriptResult: ToolResult | undefined;
210
-
211
- // Attach a non-blocking completion handler
212
- scriptPromise.then((result) => {
213
- scriptDone = true;
214
- scriptResult = result;
215
- }).catch((err) => {
216
- scriptDone = true;
217
- scriptResult = {
218
- success: false,
219
- output: '',
220
- error: err instanceof Error ? err.message : String(err),
221
- };
222
- });
223
-
224
- // Poll loop
225
- while (!scriptDone) {
226
- await this.pollOnce();
227
- await sleep(this.pollInterval);
228
- }
229
-
230
- // Final poll to catch any trailing requests/activity
231
- await this.pollOnce();
232
-
233
- return scriptResult!.success
234
- ? scriptResult!.output
235
- : `REPL ERROR: ${scriptResult!.error ?? scriptResult!.output}`;
236
- }
237
-
238
- // ── internal polling ──
239
-
240
- private async pollOnce(): Promise<void> {
241
- await Promise.all([
242
- this.pollRequests(),
243
- this.pollActivity(),
244
- ]);
245
- }
246
-
247
- private async pollRequests(): Promise<void> {
248
- const result = await this.tp.readFile(`${this.bridgeDir}/requests.jsonl`);
249
- if (!result.success || !result.output.trim()) return;
250
-
251
- const lines = result.output.split('\n').filter(Boolean);
252
- const newLines = lines.slice(this.processedLineCount);
253
- this.processedLineCount = lines.length;
254
-
255
- for (const line of newLines) {
256
- let req: BridgeRequest;
257
- try {
258
- req = JSON.parse(line) as BridgeRequest;
259
- } catch {
260
- continue;
261
- }
262
- await this.fulfillRequest(req);
263
- }
264
- }
265
-
266
- private async pollActivity(): Promise<void> {
267
- if (!this.config.onActivity) return;
268
-
269
- const result = await this.tp.readFile(`${this.bridgeDir}/activity.jsonl`);
270
- if (!result.success || !result.output.trim()) return;
271
-
272
- const lines = result.output.split('\n').filter(Boolean);
273
- const newLines = lines.slice(this.activityLineCount);
274
- this.activityLineCount = lines.length;
275
-
276
- for (const line of newLines) {
277
- try {
278
- const entry = JSON.parse(line) as ActivityEntry;
279
- this.config.onActivity(entry);
280
- } catch {
281
- // skip malformed
282
- }
283
- }
284
- }
285
-
286
- private async fulfillRequest(req: BridgeRequest): Promise<void> {
287
- let output: string;
288
-
289
- try {
290
- switch (req.type) {
291
- case 'llm':
292
- if (!this.config.onLlmRequest) {
293
- output = 'ERROR: LLM callback not configured';
294
- break;
295
- }
296
- output = await this.config.onLlmRequest({
297
- id: req.id,
298
- model: req.model,
299
- prompt: req.prompt,
300
- });
301
- break;
302
-
303
- case 'web_fetch':
304
- if (!this.config.onWebFetchRequest) {
305
- output = 'ERROR: WebFetch callback not configured';
306
- break;
307
- }
308
- output = await this.config.onWebFetchRequest({
309
- id: req.id,
310
- url: req.url,
311
- });
312
- break;
313
-
314
- case 'web_search':
315
- if (!this.config.onWebSearchRequest) {
316
- output = 'ERROR: WebSearch callback not configured';
317
- break;
318
- }
319
- output = await this.config.onWebSearchRequest({
320
- id: req.id,
321
- query: req.query,
322
- });
323
- break;
324
-
325
- case 'ask_user':
326
- if (!this.config.onAskUserRequest) {
327
- output = 'ERROR: AskUser callback not configured';
328
- break;
329
- }
330
- output = await this.config.onAskUserRequest({
331
- id: req.id,
332
- question: req.question,
333
- });
334
- break;
335
-
336
- case 'tell_user':
337
- if (this.config.onTellUserRequest) {
338
- await this.config.onTellUserRequest({
339
- id: req.id,
340
- message: req.message,
341
- });
342
- }
343
- output = 'ok';
344
- break;
345
-
346
- default:
347
- output = `ERROR: unknown request type "${(req as any).type}"`;
348
- }
349
- } catch (err) {
350
- output = `ERROR: ${err instanceof Error ? err.message : String(err)}`;
351
- }
352
-
353
- // Write response file so the Python side unblocks
354
- await this.tp.writeFile(
355
- `${this.bridgeDir}/resp_${req.id}.json`,
356
- JSON.stringify({ output })
357
- );
358
- }
359
- }
360
-
361
- function sleep(ms: number): Promise<void> {
362
- return new Promise((resolve) => setTimeout(resolve, ms));
363
- }
@@ -1,207 +0,0 @@
1
- import type { AgentMessage } from '../agent/types';
2
-
3
- export interface ContextStoreConfig {
4
- /** Maximum token budget for the trimmed view. Default: 150_000 */
5
- maxTokenBudget?: number;
6
- /** Fraction of budget that triggers trimming. Default: 0.80 */
7
- trimThreshold?: number;
8
- /** Minimum char length before a tool output gets stubbed. Default: 500 */
9
- stubThreshold?: number;
10
- }
11
-
12
- const DEFAULT_MAX_BUDGET = 150_000;
13
- const DEFAULT_TRIM_THRESHOLD = 0.80;
14
- const DEFAULT_STUB_THRESHOLD = 500;
15
- const CHARS_PER_TOKEN = 4;
16
- const HOT_ZONE_RATIO = 0.60;
17
-
18
- /**
19
- * Lossless context store.
20
- *
21
- * Stores every AgentMessage verbatim in `raw`. Produces a trimmed `view`
22
- * that fits within a token budget by stubbing only mechanical overhead
23
- * (long tool outputs, base64 blobs, ANSI codes) in the cold zone.
24
- *
25
- * User and assistant text messages are **never** modified.
26
- */
27
- export class LosslessContextStore {
28
- private raw: AgentMessage[] = [];
29
- private readonly maxBudget: number;
30
- private readonly trimThreshold: number;
31
- private readonly stubThreshold: number;
32
-
33
- constructor(config: ContextStoreConfig = {}) {
34
- this.maxBudget = config.maxTokenBudget ?? DEFAULT_MAX_BUDGET;
35
- this.trimThreshold = config.trimThreshold ?? DEFAULT_TRIM_THRESHOLD;
36
- this.stubThreshold = config.stubThreshold ?? DEFAULT_STUB_THRESHOLD;
37
- }
38
-
39
- /** Append new messages, dedup by comparing length against existing tail. */
40
- ingest(messages: AgentMessage[]): void {
41
- if (messages.length === 0) return;
42
-
43
- // If incoming is a superset of existing (common case — agent sends full history each call),
44
- // replace entirely. Otherwise append the delta.
45
- if (messages.length >= this.raw.length && this.isPrefixMatch(messages)) {
46
- const newMessages = messages.slice(this.raw.length);
47
- this.raw.push(...newMessages);
48
- } else {
49
- // Full replacement (conversation was reset or diverged)
50
- this.raw = [...messages];
51
- }
52
- }
53
-
54
- /** Trimmed view that fits within the token budget. */
55
- getView(): AgentMessage[] {
56
- const estimated = this.estimateTokens();
57
- const threshold = this.maxBudget * this.trimThreshold;
58
-
59
- // Under threshold — return as-is (shallow copy)
60
- if (estimated <= threshold) {
61
- return [...this.raw];
62
- }
63
-
64
- return this.buildTrimmedView();
65
- }
66
-
67
- /** Full unmodified history. */
68
- getRaw(): AgentMessage[] {
69
- return [...this.raw];
70
- }
71
-
72
- /** Approximate token count of the raw store. */
73
- estimateTokens(): number {
74
- return this.estimateTokensFor(this.raw);
75
- }
76
-
77
- /** Stats for logging. */
78
- stats(): { raw: number; view: number; tokensRaw: number; tokensView: number } {
79
- const view = this.getView();
80
- return {
81
- raw: this.raw.length,
82
- view: view.length,
83
- tokensRaw: this.estimateTokensFor(this.raw),
84
- tokensView: this.estimateTokensFor(view),
85
- };
86
- }
87
-
88
- // ── internals ──
89
-
90
- private isPrefixMatch(incoming: AgentMessage[]): boolean {
91
- for (let i = 0; i < this.raw.length; i++) {
92
- const existing = this.raw[i]!;
93
- const candidate = incoming[i]!;
94
- if (existing.role !== candidate.role || existing.content !== candidate.content) {
95
- return false;
96
- }
97
- }
98
- return true;
99
- }
100
-
101
- private estimateTokensFor(msgs: AgentMessage[]): number {
102
- let chars = 0;
103
- for (const m of msgs) {
104
- chars += m.content.length + m.role.length + 4; // role + separators
105
- }
106
- return Math.ceil(chars / CHARS_PER_TOKEN);
107
- }
108
-
109
- private buildTrimmedView(): AgentMessage[] {
110
- const hotBudgetTokens = Math.floor(this.maxBudget * HOT_ZONE_RATIO);
111
- const coldBudgetTokens = this.maxBudget - hotBudgetTokens;
112
-
113
- // ── 1. Determine hot zone boundary (work backward from end) ──
114
- let hotTokens = 0;
115
- let hotStart = this.raw.length;
116
- for (let i = this.raw.length - 1; i >= 0; i--) {
117
- const msgTokens = this.estimateMessageTokens(this.raw[i]!);
118
- if (hotTokens + msgTokens > hotBudgetTokens) break;
119
- hotTokens += msgTokens;
120
- hotStart = i;
121
- }
122
-
123
- const hotZone = this.raw.slice(hotStart);
124
- const coldZone = this.raw.slice(0, hotStart);
125
-
126
- if (coldZone.length === 0) return [...hotZone];
127
-
128
- // ── Pass 1: detect live tool IDs referenced in hot zone ──
129
- const liveToolIds = new Set<string>();
130
- for (const m of hotZone) {
131
- // Tool results are formatted as "ToolName: output" by create-agent
132
- if (m.role === 'tool') {
133
- const colonIdx = m.content.indexOf(':');
134
- if (colonIdx > 0) {
135
- liveToolIds.add(m.content.slice(0, colonIdx));
136
- }
137
- }
138
- }
139
-
140
- // ── Pass 2 & 3: trim cold zone ──
141
- const trimmedCold: AgentMessage[] = [];
142
- let coldTokens = 0;
143
-
144
- for (const m of coldZone) {
145
- // Never modify user or assistant text
146
- if (m.role === 'user' || m.role === 'assistant') {
147
- const tokens = this.estimateMessageTokens(m);
148
- if (coldTokens + tokens <= coldBudgetTokens) {
149
- trimmedCold.push(m);
150
- coldTokens += tokens;
151
- }
152
- continue;
153
- }
154
-
155
- // System messages: keep as-is
156
- if (m.role === 'system') {
157
- const tokens = this.estimateMessageTokens(m);
158
- if (coldTokens + tokens <= coldBudgetTokens) {
159
- trimmedCold.push(m);
160
- coldTokens += tokens;
161
- }
162
- continue;
163
- }
164
-
165
- // Tool results in cold zone
166
- if (m.role === 'tool') {
167
- const colonIdx = m.content.indexOf(':');
168
- const toolName = colonIdx > 0 ? m.content.slice(0, colonIdx) : '';
169
- const toolOutput = colonIdx > 0 ? m.content.slice(colonIdx + 2) : m.content;
170
-
171
- // Pass 3a: drop orphaned tool results (tool not referenced in hot zone and output is large)
172
- if (!liveToolIds.has(toolName) && toolOutput.length > this.stubThreshold * 2) {
173
- continue; // drop entirely
174
- }
175
-
176
- // Pass 2: stub large tool outputs
177
- let content = m.content;
178
- if (toolOutput.length > this.stubThreshold) {
179
- content = `${toolName}: [output truncated: ${toolOutput.length} chars]`;
180
- }
181
-
182
- // Pass 3b: strip base64 data and ANSI codes
183
- content = this.stripMechanicalOverhead(content);
184
-
185
- const tokens = Math.ceil(content.length / CHARS_PER_TOKEN);
186
- if (coldTokens + tokens <= coldBudgetTokens) {
187
- trimmedCold.push({ ...m, content });
188
- coldTokens += tokens;
189
- }
190
- }
191
- }
192
-
193
- return [...trimmedCold, ...hotZone];
194
- }
195
-
196
- private estimateMessageTokens(m: AgentMessage): number {
197
- return Math.ceil((m.content.length + m.role.length + 4) / CHARS_PER_TOKEN);
198
- }
199
-
200
- private stripMechanicalOverhead(content: string): string {
201
- // Strip base64 data URIs
202
- let cleaned = content.replace(/data:[^;]+;base64,[A-Za-z0-9+/=]{100,}/g, '[base64 data removed]');
203
- // Strip ANSI escape codes
204
- cleaned = cleaned.replace(/\x1b\[[0-9;]*[a-zA-Z]/g, '');
205
- return cleaned;
206
- }
207
- }