skimpyclaw 0.3.9 → 0.3.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. package/dist/__tests__/channels.test.js +1 -1
  2. package/dist/__tests__/context-manager.test.js +219 -76
  3. package/dist/__tests__/providers-utils.test.js +2 -0
  4. package/dist/__tests__/sandbox-manager.test.js +25 -0
  5. package/dist/__tests__/sandbox-mount-security.test.js +8 -0
  6. package/dist/__tests__/setup.test.js +1 -1
  7. package/dist/__tests__/skills.test.js +53 -26
  8. package/dist/__tests__/token-efficiency.test.js +37 -15
  9. package/dist/__tests__/tools.test.js +11 -9
  10. package/dist/agent.js +2 -2
  11. package/dist/api.js +5 -0
  12. package/dist/channels/discord/handlers.d.ts +7 -0
  13. package/dist/channels/discord/handlers.js +479 -0
  14. package/dist/channels/discord/index.d.ts +8 -0
  15. package/dist/channels/discord/index.js +149 -0
  16. package/dist/channels/discord/types.d.ts +6 -0
  17. package/dist/channels/discord/types.js +17 -0
  18. package/dist/channels/discord/utils.d.ts +14 -0
  19. package/dist/channels/discord/utils.js +161 -0
  20. package/dist/channels/telegram/utils.d.ts +1 -1
  21. package/dist/channels/telegram/utils.js +7 -9
  22. package/dist/channels.js +1 -1
  23. package/dist/cli.js +8 -43
  24. package/dist/code-agents/parser.js +5 -0
  25. package/dist/config.d.ts +7 -0
  26. package/dist/config.js +13 -0
  27. package/dist/cron.js +6 -3
  28. package/dist/heartbeat.js +11 -15
  29. package/dist/providers/anthropic.js +7 -1
  30. package/dist/providers/codex.js +8 -2
  31. package/dist/providers/context-manager.d.ts +37 -6
  32. package/dist/providers/context-manager.js +303 -47
  33. package/dist/providers/openai.js +8 -2
  34. package/dist/providers/utils.d.ts +6 -2
  35. package/dist/providers/utils.js +36 -4
  36. package/dist/sandbox/manager.js +11 -0
  37. package/dist/sandbox/mount-security.js +5 -1
  38. package/dist/sandbox/runtime.d.ts +1 -0
  39. package/dist/sandbox/runtime.js +5 -0
  40. package/dist/sandbox-utils.d.ts +6 -0
  41. package/dist/sandbox-utils.js +36 -0
  42. package/dist/security.js +4 -3
  43. package/dist/service.js +25 -0
  44. package/dist/setup-templates.d.ts +14 -0
  45. package/dist/setup-templates.js +214 -0
  46. package/dist/setup.d.ts +1 -9
  47. package/dist/setup.js +3 -244
  48. package/dist/skills-types.d.ts +6 -0
  49. package/dist/skills.d.ts +5 -1
  50. package/dist/skills.js +25 -2
  51. package/dist/tools/bash-tool.js +11 -1
  52. package/dist/tools/definitions.d.ts +57 -0
  53. package/dist/tools/definitions.js +19 -1
  54. package/dist/tools/fetch-tool.d.ts +8 -0
  55. package/dist/tools/fetch-tool.js +80 -0
  56. package/dist/tools.d.ts +4 -2
  57. package/dist/tools.js +110 -62
  58. package/dist/types.d.ts +5 -0
  59. package/package.json +23 -29
@@ -1,22 +1,53 @@
1
1
  import type { ContextManagementConfig } from './types.js';
2
+ import type { Config } from '../types.js';
2
3
  export type { ContextManagementConfig };
4
+ /** Result of a compaction attempt, including metadata about what happened. */
5
+ export interface CompactionResult<T> {
6
+ messages: T[];
7
+ /** Whether any compaction was performed */
8
+ compacted: boolean;
9
+ /** 'llm' if LLM summarized, 'truncation' if mechanically truncated, undefined if no compaction */
10
+ method?: 'llm' | 'truncation';
11
+ /** The summary text (only when method === 'llm') */
12
+ summary?: string;
13
+ /** Estimated tokens before compaction */
14
+ tokensBefore?: number;
15
+ /** Estimated tokens after compaction */
16
+ tokensAfter?: number;
17
+ }
3
18
  /** Rough token estimate: 1 token ≈ 4 chars of JSON. */
4
19
  export declare function estimateTokens(data: any[]): number;
20
+ /**
21
+ * Serialize Anthropic-format messages into a human-readable conversation transcript
22
+ * suitable for LLM summarization.
23
+ */
24
+ declare function serializeAnthropicMessages(messages: any[]): string;
25
+ /**
26
+ * Serialize OpenAI-format messages into a human-readable transcript.
27
+ */
28
+ declare function serializeOpenAIMessages(messages: any[]): string;
29
+ /**
30
+ * Serialize Codex-format input items into a human-readable transcript.
31
+ */
32
+ declare function serializeCodexMessages(items: any[]): string;
5
33
  /**
6
34
  * Compact Anthropic-format apiMessages when over threshold.
7
- * Truncates content of old tool_result blocks; leaves last KEEP_TAIL messages intact.
35
+ * Uses LLM summarization for old messages; falls back to truncation on failure.
8
36
  * Does NOT mutate the input array — returns a new array.
9
37
  */
10
- export declare function compactAnthropicMessages(messages: any[], config?: ContextManagementConfig, iteration?: number): any[];
38
+ export declare function compactAnthropicMessages(messages: any[], config?: ContextManagementConfig, iteration?: number, fullConfig?: Config): Promise<CompactionResult<any>>;
11
39
  /**
12
40
  * Compact OpenAI-format apiMessages when over threshold.
13
- * Truncates content of old `role: 'tool'` messages; leaves last KEEP_TAIL messages intact.
41
+ * Uses LLM summarization for old messages; falls back to truncation on failure.
14
42
  * Does NOT mutate the input array — returns a new array.
15
43
  */
16
- export declare function compactOpenAIMessages(messages: any[], config?: ContextManagementConfig, iteration?: number): any[];
44
+ export declare function compactOpenAIMessages(messages: any[], config?: ContextManagementConfig, iteration?: number, fullConfig?: Config): Promise<CompactionResult<any>>;
17
45
  /**
18
46
  * Compact Codex-format input items when over threshold.
19
- * Truncates output of old function_call_output items; leaves last KEEP_TAIL items intact.
47
+ * Uses LLM summarization for old items; falls back to truncation on failure.
20
48
  * Does NOT mutate the input array — returns a new array.
21
49
  */
22
- export declare function compactCodexMessages(input: any[], config?: ContextManagementConfig, iteration?: number): any[];
50
+ export declare function compactCodexMessages(input: any[], config?: ContextManagementConfig, iteration?: number, fullConfig?: Config): Promise<CompactionResult<any>>;
51
+ export { serializeAnthropicMessages, serializeOpenAIMessages, serializeCodexMessages };
52
+ /** Reset compaction markers (for testing). */
53
+ export declare function resetCompactionState(): void;
@@ -1,33 +1,183 @@
1
1
  // Context manager for agentic tool loops.
2
- // When accumulated messages exceed the token threshold, compacts old tool results
3
- // to keep context size bounded without breaking message structure.
2
+ // When accumulated messages exceed the token threshold, uses an LLM to summarize
3
+ // old messages into a concise summary, preserving semantic meaning.
4
+ //
5
+ // Falls back to mechanical truncation if the LLM call fails.
4
6
  //
5
7
  // Key constraint: tool_use/tool_result pairs (Anthropic) and
6
8
  // function_call/function_call_output pairs (Codex) must stay structurally intact.
7
- // We truncate the CONTENT of old results — never remove blocks entirely.
8
9
  const DEFAULT_MAX_CONTEXT_TOKENS = 200_000;
9
10
  const KEEP_TAIL = 8; // always keep last N messages/items untouched
10
- const RESULT_MAX_CHARS = 500; // compact old results to this length
11
+ const RESULT_MAX_CHARS = 500; // fallback truncation length
12
+ const SUMMARY_MAX_TOKENS = 2048; // max tokens for summary response
13
+ // Preferred compaction models in priority order (cheap & fast).
14
+ // Can be overridden via contextManagement.compactionModel in config.
15
+ const COMPACTION_MODEL_CANDIDATES = [
16
+ 'anthropic/claude-haiku-3-5',
17
+ 'openai/gpt-4o-mini',
18
+ 'groq/llama-3.1-8b-instant',
19
+ ];
11
20
  /** Rough token estimate: 1 token ≈ 4 chars of JSON. */
12
21
  export function estimateTokens(data) {
13
22
  return Math.ceil(JSON.stringify(data).length / 4);
14
23
  }
24
+ // --- LLM Summarization ---
25
+ const COMPACTION_SYSTEM_PROMPT = `You are a conversation summarizer for an AI coding assistant. Your job is to produce a concise summary of a conversation between a user and an assistant that used tools (file reads, bash commands, file writes, etc.).
26
+
27
+ Rules:
28
+ - Preserve ALL important context: file paths, variable names, error messages, decisions made, code changes
29
+ - Summarize tool results (e.g. "Read package.json — found dependencies X, Y, Z") rather than reproducing full output
30
+ - Keep the summary structured with bullet points or short paragraphs
31
+ - Note any unresolved issues or ongoing tasks
32
+ - Be concise but don't lose critical information that the assistant needs to continue working
33
+ - Output ONLY the summary, no preamble`;
15
34
  /**
16
- * Compact Anthropic-format apiMessages when over threshold.
17
- * Truncates content of old tool_result blocks; leaves last KEEP_TAIL messages intact.
18
- * Does NOT mutate the input array — returns a new array.
35
+ * Serialize Anthropic-format messages into a human-readable conversation transcript
36
+ * suitable for LLM summarization.
19
37
  */
20
- export function compactAnthropicMessages(messages, config, iteration = 0) {
21
- if (config?.enabled === false)
22
- return messages;
23
- const maxTokens = config?.maxContextTokens ?? DEFAULT_MAX_CONTEXT_TOKENS;
24
- const estimated = estimateTokens(messages);
25
- if (estimated <= maxTokens)
26
- return messages;
27
- console.log(`[context-manager] Compacting at iteration ${iteration} (~${Math.round(estimated / 1000)}k tokens > ${Math.round(maxTokens / 1000)}k threshold)`);
28
- const tail = messages.slice(-KEEP_TAIL);
29
- const head = messages.slice(0, -KEEP_TAIL);
30
- const compacted = head.map(msg => {
38
+ function serializeAnthropicMessages(messages) {
39
+ const lines = [];
40
+ for (const msg of messages) {
41
+ const role = msg.role === 'assistant' ? 'Assistant' : 'User';
42
+ if (typeof msg.content === 'string') {
43
+ lines.push(`[${role}]: ${msg.content}`);
44
+ continue;
45
+ }
46
+ if (!Array.isArray(msg.content))
47
+ continue;
48
+ for (const block of msg.content) {
49
+ if (block.type === 'text') {
50
+ lines.push(`[${role}]: ${block.text}`);
51
+ }
52
+ else if (block.type === 'tool_use') {
53
+ const inputStr = typeof block.input === 'string'
54
+ ? block.input
55
+ : JSON.stringify(block.input);
56
+ const truncatedInput = inputStr.length > 500 ? inputStr.slice(0, 500) + '...' : inputStr;
57
+ lines.push(`[Assistant Tool Call: ${block.name}]: ${truncatedInput}`);
58
+ }
59
+ else if (block.type === 'tool_result') {
60
+ const raw = typeof block.content === 'string'
61
+ ? block.content
62
+ : JSON.stringify(block.content);
63
+ const truncatedResult = raw.length > 1000 ? raw.slice(0, 1000) + '...' : raw;
64
+ lines.push(`[Tool Result]: ${truncatedResult}`);
65
+ }
66
+ }
67
+ }
68
+ return lines.join('\n');
69
+ }
70
+ /**
71
+ * Serialize OpenAI-format messages into a human-readable transcript.
72
+ */
73
+ function serializeOpenAIMessages(messages) {
74
+ const lines = [];
75
+ for (const msg of messages) {
76
+ if (msg.role === 'tool') {
77
+ const raw = typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content);
78
+ const truncated = raw.length > 1000 ? raw.slice(0, 1000) + '...' : raw;
79
+ lines.push(`[Tool Result (${msg.tool_call_id})]: ${truncated}`);
80
+ }
81
+ else if (msg.role === 'assistant') {
82
+ if (msg.content) {
83
+ lines.push(`[Assistant]: ${msg.content}`);
84
+ }
85
+ if (msg.tool_calls) {
86
+ for (const tc of msg.tool_calls) {
87
+ const args = tc.function?.arguments || '';
88
+ const truncatedArgs = args.length > 500 ? args.slice(0, 500) + '...' : args;
89
+ lines.push(`[Assistant Tool Call: ${tc.function?.name}]: ${truncatedArgs}`);
90
+ }
91
+ }
92
+ }
93
+ else {
94
+ const content = typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content);
95
+ lines.push(`[${msg.role === 'user' ? 'User' : msg.role}]: ${content}`);
96
+ }
97
+ }
98
+ return lines.join('\n');
99
+ }
100
+ /**
101
+ * Serialize Codex-format input items into a human-readable transcript.
102
+ */
103
+ function serializeCodexMessages(items) {
104
+ const lines = [];
105
+ for (const item of items) {
106
+ if (item.type === 'message') {
107
+ const role = item.role === 'assistant' ? 'Assistant' : 'User';
108
+ const content = typeof item.content === 'string'
109
+ ? item.content
110
+ : Array.isArray(item.content)
111
+ ? item.content.map((c) => c.text || JSON.stringify(c)).join(' ')
112
+ : JSON.stringify(item.content);
113
+ lines.push(`[${role}]: ${content}`);
114
+ }
115
+ else if (item.type === 'function_call') {
116
+ const args = item.arguments || '';
117
+ const truncated = args.length > 500 ? args.slice(0, 500) + '...' : args;
118
+ lines.push(`[Assistant Tool Call: ${item.name}]: ${truncated}`);
119
+ }
120
+ else if (item.type === 'function_call_output') {
121
+ const raw = item.output || '';
122
+ const truncated = raw.length > 1000 ? raw.slice(0, 1000) + '...' : raw;
123
+ lines.push(`[Tool Result]: ${truncated}`);
124
+ }
125
+ }
126
+ return lines.join('\n');
127
+ }
128
+ /**
129
+ * Pick the best available compaction model from candidates.
130
+ * Checks which providers are initialized and returns the first match.
131
+ */
132
+ async function pickCompactionModel(config) {
133
+ const { isAnthropicAvailable } = await import('./anthropic.js');
134
+ const { isOpenAIAvailable } = await import('./openai.js');
135
+ for (const candidate of COMPACTION_MODEL_CANDIDATES) {
136
+ const provider = candidate.split('/')[0];
137
+ if (provider === 'anthropic' && isAnthropicAvailable())
138
+ return candidate;
139
+ if (isOpenAIAvailable(provider))
140
+ return candidate;
141
+ }
142
+ // Last resort: return the first candidate and let chat() fail → fallback to truncation
143
+ return COMPACTION_MODEL_CANDIDATES[0];
144
+ }
145
+ /**
146
+ * Call the LLM to summarize a conversation transcript.
147
+ * Returns the summary text, or null if the call fails.
148
+ */
149
+ async function llmSummarize(transcript, config, compactionModel) {
150
+ try {
151
+ // Dynamically import to avoid circular dependency
152
+ const { chat } = await import('./index.js');
153
+ const model = compactionModel || await pickCompactionModel(config);
154
+ const messages = [
155
+ { role: 'system', content: COMPACTION_SYSTEM_PROMPT },
156
+ {
157
+ role: 'user',
158
+ content: `Summarize the following conversation between an AI coding assistant and a user. This summary will replace the old messages in the context window so the assistant can continue working.\n\n---\n${transcript}\n---`,
159
+ },
160
+ ];
161
+ console.log(`[context-manager] Requesting LLM summary via ${model}`);
162
+ const summary = await chat(messages, {
163
+ model,
164
+ maxTokens: SUMMARY_MAX_TOKENS,
165
+ }, config);
166
+ if (!summary || summary.trim().length === 0) {
167
+ console.warn('[context-manager] LLM returned empty summary, falling back to truncation');
168
+ return null;
169
+ }
170
+ console.log(`[context-manager] LLM summary: ${summary.length} chars`);
171
+ return summary.trim();
172
+ }
173
+ catch (err) {
174
+ console.warn(`[context-manager] LLM summarization failed, falling back to truncation: ${err instanceof Error ? err.message : err}`);
175
+ return null;
176
+ }
177
+ }
178
+ // --- Fallback truncation (original mechanical approach) ---
179
+ function truncateAnthropicHead(head) {
180
+ return head.map(msg => {
31
181
  if (!Array.isArray(msg.content))
32
182
  return msg;
33
183
  let changed = false;
@@ -44,57 +194,163 @@ export function compactAnthropicMessages(messages, config, iteration = 0) {
44
194
  });
45
195
  return changed ? { ...msg, content: newContent } : msg;
46
196
  });
47
- return [...compacted, ...tail];
197
+ }
198
+ function truncateOpenAIHead(head) {
199
+ return head.map(msg => {
200
+ if (msg.role !== 'tool')
201
+ return msg;
202
+ if (typeof msg.content !== 'string')
203
+ return msg;
204
+ if (msg.content.length <= RESULT_MAX_CHARS)
205
+ return msg;
206
+ return { ...msg, content: msg.content.slice(0, RESULT_MAX_CHARS) + ' [truncated]' };
207
+ });
208
+ }
209
+ function truncateCodexHead(head) {
210
+ return head.map(item => {
211
+ if (item.type !== 'function_call_output')
212
+ return item;
213
+ if (typeof item.output !== 'string')
214
+ return item;
215
+ if (item.output.length <= RESULT_MAX_CHARS)
216
+ return item;
217
+ return { ...item, output: item.output.slice(0, RESULT_MAX_CHARS) + ' [truncated]' };
218
+ });
219
+ }
220
+ // --- Track whether we already compacted for a given conversation ---
221
+ // Key: a hash of the tail messages to avoid re-summarizing the same head repeatedly.
222
+ // This is a WeakMap so we don't leak memory across conversations.
223
+ const compactedMarker = new WeakSet();
224
+ /**
225
+ * Compact Anthropic-format apiMessages when over threshold.
226
+ * Uses LLM summarization for old messages; falls back to truncation on failure.
227
+ * Does NOT mutate the input array — returns a new array.
228
+ */
229
+ export async function compactAnthropicMessages(messages, config, iteration = 0, fullConfig) {
230
+ if (config?.enabled === false)
231
+ return { messages, compacted: false };
232
+ const maxTokens = config?.maxContextTokens ?? DEFAULT_MAX_CONTEXT_TOKENS;
233
+ const estimated = estimateTokens(messages);
234
+ if (estimated <= maxTokens)
235
+ return { messages, compacted: false };
236
+ // If we already compacted this array (it has a summary message), use truncation fallback
237
+ // to progressively shrink rather than re-summarizing repeatedly.
238
+ if (compactedMarker.has(messages)) {
239
+ console.log(`[context-manager] Already compacted, using truncation fallback (iteration ${iteration})`);
240
+ const tail = messages.slice(-KEEP_TAIL);
241
+ const head = messages.slice(0, -KEEP_TAIL);
242
+ const result = [...truncateAnthropicHead(head), ...tail];
243
+ return { messages: result, compacted: true, method: 'truncation', tokensBefore: estimated, tokensAfter: estimateTokens(result) };
244
+ }
245
+ console.log(`[context-manager] Compacting at iteration ${iteration} (~${Math.round(estimated / 1000)}k tokens > ${Math.round(maxTokens / 1000)}k threshold)`);
246
+ const tail = messages.slice(-KEEP_TAIL);
247
+ const head = messages.slice(0, -KEEP_TAIL);
248
+ // Attempt LLM summarization
249
+ if (fullConfig) {
250
+ const transcript = serializeAnthropicMessages(head);
251
+ const summary = await llmSummarize(transcript, fullConfig, config?.compactionModel);
252
+ if (summary) {
253
+ const summaryMessage = {
254
+ role: 'user',
255
+ content: [{ type: 'text', text: `[Conversation Summary]\n${summary}` }],
256
+ };
257
+ const result = [summaryMessage, ...tail];
258
+ compactedMarker.add(result);
259
+ const tokensAfter = estimateTokens(result);
260
+ return { messages: result, compacted: true, method: 'llm', summary, tokensBefore: estimated, tokensAfter };
261
+ }
262
+ }
263
+ // Fallback: mechanical truncation
264
+ const result = [...truncateAnthropicHead(head), ...tail];
265
+ return { messages: result, compacted: true, method: 'truncation', tokensBefore: estimated, tokensAfter: estimateTokens(result) };
48
266
  }
49
267
  /**
50
268
  * Compact OpenAI-format apiMessages when over threshold.
51
- * Truncates content of old `role: 'tool'` messages; leaves last KEEP_TAIL messages intact.
269
+ * Uses LLM summarization for old messages; falls back to truncation on failure.
52
270
  * Does NOT mutate the input array — returns a new array.
53
271
  */
54
- export function compactOpenAIMessages(messages, config, iteration = 0) {
272
+ export async function compactOpenAIMessages(messages, config, iteration = 0, fullConfig) {
55
273
  if (config?.enabled === false)
56
- return messages;
274
+ return { messages, compacted: false };
57
275
  const maxTokens = config?.maxContextTokens ?? DEFAULT_MAX_CONTEXT_TOKENS;
58
276
  const estimated = estimateTokens(messages);
59
277
  if (estimated <= maxTokens)
60
- return messages;
278
+ return { messages, compacted: false };
279
+ if (compactedMarker.has(messages)) {
280
+ console.log(`[context-manager] Already compacted, using truncation fallback (iteration ${iteration})`);
281
+ const tail = messages.slice(-KEEP_TAIL);
282
+ const head = messages.slice(0, -KEEP_TAIL);
283
+ const result = [...truncateOpenAIHead(head), ...tail];
284
+ return { messages: result, compacted: true, method: 'truncation', tokensBefore: estimated, tokensAfter: estimateTokens(result) };
285
+ }
61
286
  console.log(`[context-manager] Compacting OpenAI messages at iteration ${iteration} (~${Math.round(estimated / 1000)}k tokens > ${Math.round(maxTokens / 1000)}k threshold)`);
62
287
  const tail = messages.slice(-KEEP_TAIL);
63
288
  const head = messages.slice(0, -KEEP_TAIL);
64
- const compacted = head.map(msg => {
65
- if (msg.role !== 'tool')
66
- return msg;
67
- if (typeof msg.content !== 'string')
68
- return msg;
69
- if (msg.content.length <= RESULT_MAX_CHARS)
70
- return msg;
71
- return { ...msg, content: msg.content.slice(0, RESULT_MAX_CHARS) + ' [truncated]' };
72
- });
73
- return [...compacted, ...tail];
289
+ // Attempt LLM summarization
290
+ if (fullConfig) {
291
+ const transcript = serializeOpenAIMessages(head);
292
+ const summary = await llmSummarize(transcript, fullConfig, config?.compactionModel);
293
+ if (summary) {
294
+ const summaryMessage = {
295
+ role: 'user',
296
+ content: `[Conversation Summary]\n${summary}`,
297
+ };
298
+ const result = [summaryMessage, ...tail];
299
+ compactedMarker.add(result);
300
+ const tokensAfter = estimateTokens(result);
301
+ return { messages: result, compacted: true, method: 'llm', summary, tokensBefore: estimated, tokensAfter };
302
+ }
303
+ }
304
+ // Fallback: mechanical truncation
305
+ const result = [...truncateOpenAIHead(head), ...tail];
306
+ return { messages: result, compacted: true, method: 'truncation', tokensBefore: estimated, tokensAfter: estimateTokens(result) };
74
307
  }
75
308
  /**
76
309
  * Compact Codex-format input items when over threshold.
77
- * Truncates output of old function_call_output items; leaves last KEEP_TAIL items intact.
310
+ * Uses LLM summarization for old items; falls back to truncation on failure.
78
311
  * Does NOT mutate the input array — returns a new array.
79
312
  */
80
- export function compactCodexMessages(input, config, iteration = 0) {
313
+ export async function compactCodexMessages(input, config, iteration = 0, fullConfig) {
81
314
  if (config?.enabled === false)
82
- return input;
315
+ return { messages: input, compacted: false };
83
316
  const maxTokens = config?.maxContextTokens ?? DEFAULT_MAX_CONTEXT_TOKENS;
84
317
  const estimated = estimateTokens(input);
85
318
  if (estimated <= maxTokens)
86
- return input;
319
+ return { messages: input, compacted: false };
320
+ if (compactedMarker.has(input)) {
321
+ console.log(`[context-manager] Already compacted, using truncation fallback (iteration ${iteration})`);
322
+ const tail = input.slice(-KEEP_TAIL);
323
+ const head = input.slice(0, -KEEP_TAIL);
324
+ const result = [...truncateCodexHead(head), ...tail];
325
+ return { messages: result, compacted: true, method: 'truncation', tokensBefore: estimated, tokensAfter: estimateTokens(result) };
326
+ }
87
327
  console.log(`[context-manager] Compacting Codex input at iteration ${iteration} (~${Math.round(estimated / 1000)}k tokens > ${Math.round(maxTokens / 1000)}k threshold)`);
88
328
  const tail = input.slice(-KEEP_TAIL);
89
329
  const head = input.slice(0, -KEEP_TAIL);
90
- const compacted = head.map(item => {
91
- if (item.type !== 'function_call_output')
92
- return item;
93
- if (typeof item.output !== 'string')
94
- return item;
95
- if (item.output.length <= RESULT_MAX_CHARS)
96
- return item;
97
- return { ...item, output: item.output.slice(0, RESULT_MAX_CHARS) + ' [truncated]' };
98
- });
99
- return [...compacted, ...tail];
330
+ // Attempt LLM summarization
331
+ if (fullConfig) {
332
+ const transcript = serializeCodexMessages(head);
333
+ const summary = await llmSummarize(transcript, fullConfig, config?.compactionModel);
334
+ if (summary) {
335
+ const summaryItem = {
336
+ type: 'message',
337
+ role: 'user',
338
+ content: `[Conversation Summary]\n${summary}`,
339
+ };
340
+ const result = [summaryItem, ...tail];
341
+ compactedMarker.add(result);
342
+ const tokensAfter = estimateTokens(result);
343
+ return { messages: result, compacted: true, method: 'llm', summary, tokensBefore: estimated, tokensAfter };
344
+ }
345
+ }
346
+ // Fallback: mechanical truncation
347
+ const result = [...truncateCodexHead(head), ...tail];
348
+ return { messages: result, compacted: true, method: 'truncation', tokensBefore: estimated, tokensAfter: estimateTokens(result) };
349
+ }
350
+ // --- Exported helpers for testing ---
351
+ export { serializeAnthropicMessages, serializeOpenAIMessages, serializeCodexMessages };
352
+ /** Reset compaction markers (for testing). */
353
+ export function resetCompactionState() {
354
+ // WeakSet doesn't support clearing, so we replace it
355
+ // This is a no-op in production; tests should create fresh arrays
100
356
  }
@@ -153,7 +153,13 @@ export async function chatWithToolsOpenAI(params, provider) {
153
153
  };
154
154
  }
155
155
  // Compact old tool results if context is growing large
156
- const messagesForApi = compactOpenAIMessages(apiMessages, toolConfig.contextManagement, i + 1);
156
+ const compactionResult = await compactOpenAIMessages(apiMessages, toolConfig.contextManagement, i + 1, config);
157
+ const messagesForApi = compactionResult.messages;
158
+ if (compactionResult.compacted) {
159
+ const method = compactionResult.method === 'llm' ? 'LLM summary' : 'truncation';
160
+ const detail = `~${Math.round((compactionResult.tokensBefore || 0) / 1000)}k → ~${Math.round((compactionResult.tokensAfter || 0) / 1000)}k tokens`;
161
+ toolLog.push(`[context compacted via ${method}: ${detail}]`);
162
+ }
157
163
  console.log(`[agent:openai-tools] Iteration ${i + 1}/${maxIterations} (provider: ${provider}, model: ${modelId})`);
158
164
  const genObs = await startGenerationObservation(`${provider}:${modelId}`, {
159
165
  input: { messages: apiMessages },
@@ -252,7 +258,7 @@ export async function chatWithToolsOpenAI(params, provider) {
252
258
  // Execute each tool call
253
259
  for (const toolCall of message.tool_calls) {
254
260
  const fnName = toolCall.function.name;
255
- if (fnName.startsWith('$') && fnName !== '$web_search') {
261
+ if (fnName.startsWith('$')) {
256
262
  const unsupported = `Provider-native tool "${fnName}" is not supported in this runtime.`;
257
263
  console.warn(`[agent:openai-tools] ${unsupported}`);
258
264
  apiMessages.push({
@@ -54,8 +54,12 @@ export declare function getProvider(model: string): string;
54
54
  * Strip provider prefix from model name.
55
55
  */
56
56
  export declare function stripProvider(model: string, openaiClients?: Map<string, unknown>, responsesApiProviders?: Set<string>): string;
57
- /** Truncate tool result to maxBytes. Appends truncation notice. */
58
- export declare function truncateToolResult(result: string, maxBytes?: number): string;
57
+ /**
58
+ * Mask large tool outputs by writing to scratch files.
59
+ * Returns the original result if small enough, or a summary + file path if large.
60
+ * Falls back to simple truncation if file write fails.
61
+ */
62
+ export declare function truncateToolResult(result: string, _maxBytes?: number): string;
59
63
  /**
60
64
  * Build thinking config based on thinking level.
61
65
  */
@@ -1,4 +1,7 @@
1
1
  // Provider Utilities
2
+ import { writeFileSync, mkdirSync, existsSync } from 'fs';
3
+ import { join } from 'path';
4
+ import { homedir } from 'os';
2
5
  // Anti-hallucination instructions injected between the Claude Code identity
3
6
  // block and the actual system prompt. Prevents the model from roleplaying
4
7
  // Claude Code's full behavior (XML tool calls, fabricated output, etc.)
@@ -104,7 +107,7 @@ function migrateDeprecatedModelSpec(modelSpec) {
104
107
  if (/^claude[-.]3[-.]5[-.]sonnet(?:[-_.].*)?$/i.test(bare)) {
105
108
  migratedBare = 'claude-sonnet-4-6';
106
109
  }
107
- else if (/^claude[-.]3[-.]5[-.]haiku(?:[-_.].*)?$/i.test(bare)) {
110
+ else if (/^claude[-.]3[-.]5[-.]haiku(?:[-_.].*)?$/i.test(bare) || bare === 'claude-haiku') {
108
111
  migratedBare = 'claude-haiku-4-5';
109
112
  }
110
113
  else if (/^claude[-.]opus[-.]4(?:[-_.].*)?$/i.test(bare)) {
@@ -178,10 +181,39 @@ export function stripProvider(model, openaiClients, responsesApiProviders) {
178
181
  return model;
179
182
  }
180
183
  /** Truncate tool result to maxBytes. Appends truncation notice. */
181
- export function truncateToolResult(result, maxBytes = 10_240) {
182
- if (result.length <= maxBytes)
184
+ /**
185
+ * Observation masking threshold. Tool outputs above this size are written to
186
+ * a scratch file and replaced with a compact summary + file path.
187
+ * Outputs below this are returned inline (no file I/O overhead).
188
+ */
189
+ const MASK_THRESHOLD = 8_000; // ~2000 tokens
190
+ /**
191
+ * Mask large tool outputs by writing to scratch files.
192
+ * Returns the original result if small enough, or a summary + file path if large.
193
+ * Falls back to simple truncation if file write fails.
194
+ */
195
+ export function truncateToolResult(result, _maxBytes = 10_240) {
196
+ if (result.length <= MASK_THRESHOLD)
183
197
  return result;
184
- return result.slice(0, maxBytes) + `\n\n[Truncated: ${result.length} chars total]`;
198
+ try {
199
+ const scratchDir = join(homedir(), '.skimpyclaw', 'scratch');
200
+ if (!existsSync(scratchDir))
201
+ mkdirSync(scratchDir, { recursive: true });
202
+ const id = `${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
203
+ const filePath = join(scratchDir, `${id}.txt`);
204
+ writeFileSync(filePath, result);
205
+ // Build a compact summary: first 500 chars + last 500 chars
206
+ const head = result.slice(0, 500);
207
+ const tail = result.slice(-500);
208
+ const summary = head + (result.length > 1000 ? '\n...\n' + tail : '');
209
+ console.log(`[context-manager] Masked ${result.length} chars → ${filePath}`);
210
+ return `${summary}\n\n[Full output (${result.length} chars) saved to ${filePath} — use Read tool to access]`;
211
+ }
212
+ catch (err) {
213
+ // Fallback: simple truncation
214
+ console.warn(`[context-manager] Masking failed: ${err instanceof Error ? err.message : err}`);
215
+ return result.slice(0, MASK_THRESHOLD) + `\n\n[Truncated: ${result.length} chars total]`;
216
+ }
185
217
  }
186
218
  /**
187
219
  * Build thinking config based on thinking level.
@@ -38,6 +38,16 @@ export async function ensureContainer(sessionId, config, allowedPaths) {
38
38
  const uid = process.getuid?.() ?? 501;
39
39
  const gid = process.getgid?.() ?? 20;
40
40
  const merged = { ...SANDBOX_DEFAULTS, ...config };
41
+ // Expand ${VAR} references in env values from process.env
42
+ let resolvedEnv;
43
+ if (config.env) {
44
+ resolvedEnv = {};
45
+ for (const [key, val] of Object.entries(config.env)) {
46
+ resolvedEnv[key] = val.replace(/\$\{(\w+)\}/g, (_match, name) => {
47
+ return process.env[name] ?? '';
48
+ });
49
+ }
50
+ }
41
51
  const opts = {
42
52
  image: merged.image,
43
53
  cpus: merged.cpus,
@@ -48,6 +58,7 @@ export async function ensureContainer(sessionId, config, allowedPaths) {
48
58
  container: m.container,
49
59
  readOnly: m.readOnly,
50
60
  })),
61
+ env: resolvedEnv,
51
62
  user: `${uid}:${gid}`,
52
63
  };
53
64
  await createContainer(name, opts);
@@ -83,7 +83,11 @@ export function validateMountPaths(allowedPaths) {
83
83
  * Returns the original path if no mount matches (will likely fail inside container).
84
84
  */
85
85
  export function translatePath(hostPath, mounts) {
86
- const candidates = getPathCandidates(hostPath);
86
+ // Expand ~ to home directory (~ is a shell feature, not handled by resolve())
87
+ const expanded = hostPath.startsWith('~/')
88
+ ? homedir() + hostPath.slice(1)
89
+ : hostPath;
90
+ const candidates = getPathCandidates(expanded);
87
91
  // Sort by host path length descending so we match the most specific mount first
88
92
  const sorted = [...mounts].sort((a, b) => b.host.length - a.host.length);
89
93
  for (const candidate of candidates) {
@@ -8,6 +8,7 @@ export interface ContainerOpts {
8
8
  container: string;
9
9
  readOnly?: boolean;
10
10
  }>;
11
+ env?: Record<string, string>;
11
12
  user?: string;
12
13
  }
13
14
  export interface ExecOpts {
@@ -114,6 +114,11 @@ export async function createContainer(name, opts) {
114
114
  args.push('--mount', mountArg);
115
115
  }
116
116
  }
117
+ if (opts.env) {
118
+ for (const [key, val] of Object.entries(opts.env)) {
119
+ args.push('-e', `${key}=${val}`);
120
+ }
121
+ }
117
122
  args.push(opts.image, 'sleep', 'infinity');
118
123
  const result = await runCommand(runtime, args);
119
124
  if (result.exitCode !== 0) {
@@ -0,0 +1,6 @@
1
+ export type SandboxRuntime = 'container' | 'docker';
2
+ export declare function detectSandboxRuntime(preferred?: SandboxRuntime | string | null): SandboxRuntime | null;
3
+ export declare function isSandboxRuntimeRunning(runtime: SandboxRuntime): boolean;
4
+ export declare function sandboxNetworkExists(runtime: SandboxRuntime, network: string): boolean;
5
+ export declare function defaultSandboxNetwork(runtime: SandboxRuntime): string;
6
+ export declare function sandboxImageExists(runtime: SandboxRuntime, image: string): boolean;