@ebowwa/coder 0.7.64 → 0.7.66
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +36233 -32
- package/dist/interfaces/ui/terminal/cli/index.js +34318 -158
- package/dist/interfaces/ui/terminal/native/README.md +53 -0
- package/dist/interfaces/ui/terminal/native/claude_code_native.darwin-x64.node +0 -0
- package/dist/interfaces/ui/terminal/native/claude_code_native.dylib +0 -0
- package/dist/interfaces/ui/terminal/native/index.d.ts +0 -0
- package/dist/interfaces/ui/terminal/native/index.darwin-arm64.node +0 -0
- package/dist/interfaces/ui/terminal/native/index.js +43 -0
- package/dist/interfaces/ui/terminal/native/index.node +0 -0
- package/dist/interfaces/ui/terminal/native/package.json +34 -0
- package/dist/native/README.md +53 -0
- package/dist/native/claude_code_native.darwin-x64.node +0 -0
- package/dist/native/claude_code_native.dylib +0 -0
- package/dist/native/index.d.ts +0 -480
- package/dist/native/index.darwin-arm64.node +0 -0
- package/dist/native/index.js +43 -1625
- package/dist/native/index.node +0 -0
- package/dist/native/package.json +34 -0
- package/native/index.darwin-arm64.node +0 -0
- package/native/index.js +33 -19
- package/package.json +3 -2
- package/packages/src/core/agent-loop/__tests__/compaction.test.ts +17 -14
- package/packages/src/core/agent-loop/compaction.ts +6 -2
- package/packages/src/core/agent-loop/index.ts +2 -0
- package/packages/src/core/agent-loop/loop-state.ts +1 -1
- package/packages/src/core/agent-loop/turn-executor.ts +4 -0
- package/packages/src/core/agent-loop/types.ts +4 -0
- package/packages/src/core/api-client-impl.ts +377 -176
- package/packages/src/core/cognitive-security/hooks.ts +2 -1
- package/packages/src/core/config/todo +7 -0
- package/packages/src/core/context/__tests__/integration.test.ts +334 -0
- package/packages/src/core/context/compaction.ts +170 -0
- package/packages/src/core/context/constants.ts +58 -0
- package/packages/src/core/context/extraction.ts +85 -0
- package/packages/src/core/context/index.ts +66 -0
- package/packages/src/core/context/summarization.ts +251 -0
- package/packages/src/core/context/token-estimation.ts +98 -0
- package/packages/src/core/context/types.ts +59 -0
- package/packages/src/core/models.ts +81 -4
- package/packages/src/core/normalizers/todo +5 -1
- package/packages/src/core/providers/README.md +230 -0
- package/packages/src/core/providers/__tests__/providers.test.ts +135 -0
- package/packages/src/core/providers/index.ts +419 -0
- package/packages/src/core/providers/types.ts +132 -0
- package/packages/src/core/retry.ts +10 -0
- package/packages/src/ecosystem/tools/index.ts +174 -0
- package/packages/src/index.ts +23 -2
- package/packages/src/interfaces/ui/index.ts +17 -20
- package/packages/src/interfaces/ui/spinner.ts +2 -2
- package/packages/src/interfaces/ui/terminal/bridge/index.ts +370 -0
- package/packages/src/interfaces/ui/terminal/bridge/ipc.ts +829 -0
- package/packages/src/interfaces/ui/terminal/bridge/screen-export.ts +968 -0
- package/packages/src/interfaces/ui/terminal/bridge/types.ts +226 -0
- package/packages/src/interfaces/ui/terminal/bridge/useBridge.ts +210 -0
- package/packages/src/interfaces/ui/terminal/cli/bootstrap.ts +132 -0
- package/packages/src/interfaces/ui/terminal/cli/index.ts +200 -13
- package/packages/src/interfaces/ui/terminal/cli/interactive/index.ts +110 -0
- package/packages/src/interfaces/ui/terminal/cli/interactive/input-handler.ts +402 -0
- package/packages/src/interfaces/ui/terminal/cli/interactive/interactive-runner.ts +820 -0
- package/packages/src/interfaces/ui/terminal/cli/interactive/message-store.ts +299 -0
- package/packages/src/interfaces/ui/terminal/cli/interactive/types.ts +274 -0
- package/packages/src/interfaces/ui/terminal/shared/index.ts +13 -0
- package/packages/src/interfaces/ui/terminal/shared/query.ts +9 -3
- package/packages/src/interfaces/ui/terminal/shared/setup.ts +5 -1
- package/packages/src/interfaces/ui/terminal/shared/spinner-frames.ts +73 -0
- package/packages/src/interfaces/ui/terminal/shared/status-line.ts +10 -2
- package/packages/src/native/index.ts +404 -27
- package/packages/src/native/tui_v2_types.ts +39 -0
- package/packages/src/teammates/coordination.test.ts +279 -0
- package/packages/src/teammates/coordination.ts +646 -0
- package/packages/src/teammates/index.ts +95 -25
- package/packages/src/teammates/integration.test.ts +272 -0
- package/packages/src/teammates/runner.test.ts +235 -0
- package/packages/src/teammates/runner.ts +750 -0
- package/packages/src/teammates/schemas.ts +673 -0
- package/packages/src/types/index.ts +1 -0
- package/packages/src/core/context-compaction.ts +0 -578
- package/packages/src/interfaces/ui/Screenshot 2026-03-02 at 9.23.10/342/200/257PM.png +0 -0
- package/packages/src/interfaces/ui/Screenshot 2026-03-03 at 10.55.11/342/200/257AM.png +0 -0
- package/packages/src/interfaces/ui/terminal/tui/HelpPanel.tsx +0 -262
- package/packages/src/interfaces/ui/terminal/tui/InputContext.tsx +0 -232
- package/packages/src/interfaces/ui/terminal/tui/InputField.tsx +0 -62
- package/packages/src/interfaces/ui/terminal/tui/InteractiveTUI.tsx +0 -537
- package/packages/src/interfaces/ui/terminal/tui/MessageArea.tsx +0 -107
- package/packages/src/interfaces/ui/terminal/tui/MessageStore.tsx +0 -240
- package/packages/src/interfaces/ui/terminal/tui/StatusBar.tsx +0 -54
- package/packages/src/interfaces/ui/terminal/tui/commands.ts +0 -438
- package/packages/src/interfaces/ui/terminal/tui/components/InteractiveElements.tsx +0 -584
- package/packages/src/interfaces/ui/terminal/tui/components/MultilineInput.tsx +0 -614
- package/packages/src/interfaces/ui/terminal/tui/components/PaneManager.tsx +0 -333
- package/packages/src/interfaces/ui/terminal/tui/components/Sidebar.tsx +0 -604
- package/packages/src/interfaces/ui/terminal/tui/components/index.ts +0 -118
- package/packages/src/interfaces/ui/terminal/tui/console.ts +0 -49
- package/packages/src/interfaces/ui/terminal/tui/index.ts +0 -90
- package/packages/src/interfaces/ui/terminal/tui/run.tsx +0 -42
- package/packages/src/interfaces/ui/terminal/tui/spinner.ts +0 -69
- package/packages/src/interfaces/ui/terminal/tui/tui-app.tsx +0 -390
- package/packages/src/interfaces/ui/terminal/tui/tui-footer.ts +0 -422
- package/packages/src/interfaces/ui/terminal/tui/types.ts +0 -186
- package/packages/src/interfaces/ui/terminal/tui/useInputHandler.ts +0 -104
- package/packages/src/interfaces/ui/terminal/tui/useNativeInput.ts +0 -239
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Context Module - Context compaction for conversation management
|
|
3
|
+
*
|
|
4
|
+
* When the context window fills up, we need to compact messages to continue.
|
|
5
|
+
* This module provides token estimation, summarization, and compaction utilities.
|
|
6
|
+
*
|
|
7
|
+
* @example
|
|
8
|
+
* ```ts
|
|
9
|
+
* import { compactMessages, needsCompaction, estimateMessagesTokens } from "./context/index.js";
|
|
10
|
+
*
|
|
11
|
+
* // Check if compaction is needed
|
|
12
|
+
* if (needsCompaction(messages, maxTokens)) {
|
|
13
|
+
* const result = await compactMessages(messages, maxTokens);
|
|
14
|
+
* console.log(`Saved ${result.tokensBefore - result.tokensAfter} tokens`);
|
|
15
|
+
* }
|
|
16
|
+
* ```
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
// Types
|
|
20
|
+
export type {
|
|
21
|
+
LLMSummarizationOptions,
|
|
22
|
+
CompactionOptions,
|
|
23
|
+
CompactionResult,
|
|
24
|
+
CompactionStats,
|
|
25
|
+
ToolPair,
|
|
26
|
+
} from "./types.js";
|
|
27
|
+
|
|
28
|
+
// Constants
|
|
29
|
+
export {
|
|
30
|
+
CHARS_PER_TOKEN,
|
|
31
|
+
DEFAULT_KEEP_LAST,
|
|
32
|
+
DEFAULT_KEEP_FIRST,
|
|
33
|
+
MIN_MESSAGES_FOR_COMPACTION,
|
|
34
|
+
DEFAULT_COMPACTION_THRESHOLD,
|
|
35
|
+
MAX_SUMMARY_LENGTH,
|
|
36
|
+
SUMMARY_MAX_TOKENS,
|
|
37
|
+
} from "./constants.js";
|
|
38
|
+
|
|
39
|
+
// Token Estimation
|
|
40
|
+
export {
|
|
41
|
+
estimateTokens,
|
|
42
|
+
estimateBlockTokens,
|
|
43
|
+
estimateMessageTokens,
|
|
44
|
+
estimateMessagesTokens,
|
|
45
|
+
} from "./token-estimation.js";
|
|
46
|
+
|
|
47
|
+
// Extraction
|
|
48
|
+
export {
|
|
49
|
+
extractTextFromMessage,
|
|
50
|
+
extractToolPairs,
|
|
51
|
+
extractToolNames,
|
|
52
|
+
} from "./extraction.js";
|
|
53
|
+
|
|
54
|
+
// Summarization
|
|
55
|
+
export {
|
|
56
|
+
summarizeMessages,
|
|
57
|
+
summarizeWithLLM,
|
|
58
|
+
compactContentNative,
|
|
59
|
+
} from "./summarization.js";
|
|
60
|
+
|
|
61
|
+
// Compaction
|
|
62
|
+
export {
|
|
63
|
+
compactMessages,
|
|
64
|
+
needsCompaction,
|
|
65
|
+
getCompactionStats,
|
|
66
|
+
} from "./compaction.js";
|
|
@@ -0,0 +1,251 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Summarization - Create summaries of conversation messages
|
|
3
|
+
*
|
|
4
|
+
* Strategies:
|
|
5
|
+
* 1. Native compact_content (Rust) - Fast preprocessing
|
|
6
|
+
* 2. Simple concatenation + truncation - Fallback
|
|
7
|
+
* 3. LLM-based summarization - Best quality when API available
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import type { Message, ToolResultBlock } from "../../types/index.js";
|
|
11
|
+
import { SUMMARIZATION_MODEL } from "../models.js";
|
|
12
|
+
import type { LLMSummarizationOptions } from "./types.js";
|
|
13
|
+
import {
|
|
14
|
+
MAX_SUMMARY_LENGTH,
|
|
15
|
+
SUMMARY_MAX_TOKENS,
|
|
16
|
+
SUMMARIZATION_SYSTEM_PROMPT,
|
|
17
|
+
SUMMARIZATION_PROMPT,
|
|
18
|
+
} from "./constants.js";
|
|
19
|
+
import { extractTextFromMessage, extractToolNames } from "./extraction.js";
|
|
20
|
+
|
|
21
|
+
// Lazy-load native module
|
|
22
|
+
let _native: {
|
|
23
|
+
compact_content?: (
|
|
24
|
+
content: string,
|
|
25
|
+
maxTokens: number,
|
|
26
|
+
strategy?: "truncate" | "summarize" | "extract"
|
|
27
|
+
) => string;
|
|
28
|
+
} | null = null;
|
|
29
|
+
let _nativeLoadAttempted = false;
|
|
30
|
+
|
|
31
|
+
function getNative(): typeof _native {
|
|
32
|
+
if (_nativeLoadAttempted) return _native;
|
|
33
|
+
_nativeLoadAttempted = true;
|
|
34
|
+
|
|
35
|
+
try {
|
|
36
|
+
const nativePath = require.resolve("../../../native/index.js");
|
|
37
|
+
const nativeModule = require(nativePath);
|
|
38
|
+
_native = nativeModule.getNative?.() ?? null;
|
|
39
|
+
} catch {
|
|
40
|
+
_native = null;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
return _native;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Preprocess content using native compact_content if available.
|
|
48
|
+
* Used to reduce large content before LLM summarization.
|
|
49
|
+
*/
|
|
50
|
+
function preprocessContent(content: string, maxTokens: number): string {
|
|
51
|
+
const native = getNative();
|
|
52
|
+
|
|
53
|
+
if (native?.compact_content) {
|
|
54
|
+
// Use native "extract" strategy for better preservation of structure
|
|
55
|
+
return native.compact_content(content, maxTokens, "extract");
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
// Fallback: simple truncation
|
|
59
|
+
const maxChars = maxTokens * 4;
|
|
60
|
+
if (content.length <= maxChars) return content;
|
|
61
|
+
return content.slice(0, maxChars) + "\n\n...[content truncated]";
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
/**
|
|
65
|
+
* Simple summarization that concatenates and truncates.
|
|
66
|
+
* Can be enhanced later with LLM-based summarization.
|
|
67
|
+
*/
|
|
68
|
+
export async function summarizeMessages(messages: Message[]): Promise<string> {
|
|
69
|
+
if (!messages || messages.length === 0) {
|
|
70
|
+
return "";
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
const summaryParts: string[] = [];
|
|
74
|
+
summaryParts.push(`[Context Summary: ${messages.length} messages compacted]\n`);
|
|
75
|
+
|
|
76
|
+
// Track tool operations for a cleaner summary
|
|
77
|
+
const toolOperations: string[] = [];
|
|
78
|
+
|
|
79
|
+
for (let i = 0; i < messages.length; i++) {
|
|
80
|
+
const message = messages[i];
|
|
81
|
+
if (!message) continue;
|
|
82
|
+
|
|
83
|
+
const role = message.role.toUpperCase();
|
|
84
|
+
const text = extractTextFromMessage(message);
|
|
85
|
+
|
|
86
|
+
// Track tool operations
|
|
87
|
+
toolOperations.push(...extractToolNames(message));
|
|
88
|
+
|
|
89
|
+
// Add truncated message content
|
|
90
|
+
const truncated = text.length > 300 ? `${text.slice(0, 300)}...` : text;
|
|
91
|
+
summaryParts.push(`${role}: ${truncated}\n`);
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// Add tool summary
|
|
95
|
+
if (toolOperations.length > 0) {
|
|
96
|
+
const toolCounts = toolOperations.reduce((acc, tool) => {
|
|
97
|
+
acc[tool] = (acc[tool] || 0) + 1;
|
|
98
|
+
return acc;
|
|
99
|
+
}, {} as Record<string, number>);
|
|
100
|
+
|
|
101
|
+
const toolSummary = Object.entries(toolCounts)
|
|
102
|
+
.map(([name, count]) => `${name}(${count})`)
|
|
103
|
+
.join(", ");
|
|
104
|
+
summaryParts.push(`\nTools used: ${toolSummary}\n`);
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
let summary = summaryParts.join("");
|
|
108
|
+
|
|
109
|
+
// Truncate if too long
|
|
110
|
+
if (summary.length > MAX_SUMMARY_LENGTH) {
|
|
111
|
+
summary = summary.slice(0, MAX_SUMMARY_LENGTH) + "\n...[truncated]";
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
return summary;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
/**
|
|
118
|
+
* Summarize messages using an LLM for better context preservation.
|
|
119
|
+
* Falls back to simple truncation if LLM fails or no API key provided.
|
|
120
|
+
*/
|
|
121
|
+
export async function summarizeWithLLM(
|
|
122
|
+
messages: Message[],
|
|
123
|
+
options: LLMSummarizationOptions = {}
|
|
124
|
+
): Promise<string> {
|
|
125
|
+
const {
|
|
126
|
+
apiKey = process.env.ANTHROPIC_AUTH_TOKEN || process.env.ANTHROPIC_API_KEY || process.env.CLAUDE_API_KEY,
|
|
127
|
+
model = SUMMARIZATION_MODEL,
|
|
128
|
+
baseUrl = process.env.ANTHROPIC_BASE_URL || "https://api.anthropic.com",
|
|
129
|
+
timeout = 30000,
|
|
130
|
+
} = options;
|
|
131
|
+
|
|
132
|
+
// No API key - fall back to simple summarization
|
|
133
|
+
if (!apiKey) {
|
|
134
|
+
return summarizeMessages(messages);
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
try {
|
|
138
|
+
// Build the conversation text for summarization
|
|
139
|
+
let conversationText = messages.map((msg) => {
|
|
140
|
+
const role = msg.role.toUpperCase();
|
|
141
|
+
const text = extractTextFromMessage(msg);
|
|
142
|
+
|
|
143
|
+
// Extract tool info
|
|
144
|
+
const tools: string[] = [];
|
|
145
|
+
for (const block of msg.content) {
|
|
146
|
+
if (block.type === "tool_use") {
|
|
147
|
+
tools.push(`[TOOL_USE: ${block.name}]`);
|
|
148
|
+
} else if (block.type === "tool_result") {
|
|
149
|
+
const resultBlock = block as ToolResultBlock;
|
|
150
|
+
const preview = typeof resultBlock.content === "string"
|
|
151
|
+
? resultBlock.content.slice(0, 200)
|
|
152
|
+
: "[complex result]";
|
|
153
|
+
tools.push(`[TOOL_RESULT: ${resultBlock.is_error ? "ERROR" : "OK"}] ${preview}`);
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
const toolsStr = tools.length > 0 ? `\n${tools.join("\n")}` : "";
|
|
158
|
+
return `${role}:\n${text.slice(0, 2000)}${toolsStr}`;
|
|
159
|
+
}).join("\n\n---\n\n");
|
|
160
|
+
|
|
161
|
+
// Preprocess with native if content is very large (reduce API costs)
|
|
162
|
+
const estimatedInputTokens = Math.ceil(conversationText.length / 4);
|
|
163
|
+
if (estimatedInputTokens > 8000) {
|
|
164
|
+
// Use native preprocessing to reduce size before LLM
|
|
165
|
+
conversationText = preprocessContent(conversationText, 6000);
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
// Build request
|
|
169
|
+
const requestBody = {
|
|
170
|
+
model,
|
|
171
|
+
max_tokens: SUMMARY_MAX_TOKENS,
|
|
172
|
+
system: SUMMARIZATION_SYSTEM_PROMPT,
|
|
173
|
+
messages: [{
|
|
174
|
+
role: "user" as const,
|
|
175
|
+
content: SUMMARIZATION_PROMPT.replace("{{MESSAGES}}", conversationText),
|
|
176
|
+
}],
|
|
177
|
+
};
|
|
178
|
+
|
|
179
|
+
// Make API call with timeout
|
|
180
|
+
const controller = new AbortController();
|
|
181
|
+
const timeoutId = setTimeout(() => controller.abort(), timeout);
|
|
182
|
+
|
|
183
|
+
try {
|
|
184
|
+
const response = await fetch(`${baseUrl}/v1/messages`, {
|
|
185
|
+
method: "POST",
|
|
186
|
+
headers: {
|
|
187
|
+
"Content-Type": "application/json",
|
|
188
|
+
"x-api-key": apiKey,
|
|
189
|
+
"anthropic-version": "2023-06-01",
|
|
190
|
+
},
|
|
191
|
+
body: JSON.stringify(requestBody),
|
|
192
|
+
signal: controller.signal,
|
|
193
|
+
});
|
|
194
|
+
|
|
195
|
+
clearTimeout(timeoutId);
|
|
196
|
+
|
|
197
|
+
if (!response.ok) {
|
|
198
|
+
const errorText = await response.text();
|
|
199
|
+
console.error(`\x1b[33m[Compaction] LLM summarization failed: ${response.status} - ${errorText}\x1b[0m`);
|
|
200
|
+
return summarizeMessages(messages);
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
const data = await response.json() as {
|
|
204
|
+
content?: Array<{ type: string; text?: string }>;
|
|
205
|
+
usage?: { input_tokens: number; output_tokens: number };
|
|
206
|
+
};
|
|
207
|
+
|
|
208
|
+
// Extract text from response
|
|
209
|
+
const summaryText = data.content
|
|
210
|
+
?.filter((block) => block.type === "text")
|
|
211
|
+
.map((block) => block.text || "")
|
|
212
|
+
.join("\n") || "";
|
|
213
|
+
|
|
214
|
+
if (!summaryText) {
|
|
215
|
+
console.error("\x1b[33m[Compaction] LLM returned empty summary\x1b[0m");
|
|
216
|
+
return summarizeMessages(messages);
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
// Log usage for debugging
|
|
220
|
+
if (data.usage) {
|
|
221
|
+
console.log(`\x1b[90m[Compaction] LLM summary: ${data.usage.input_tokens} in, ${data.usage.output_tokens} out\x1b[0m`);
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
return `[LLM Summary of ${messages.length} messages]\n\n${summaryText}`;
|
|
225
|
+
|
|
226
|
+
} finally {
|
|
227
|
+
clearTimeout(timeoutId);
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
} catch (error) {
|
|
231
|
+
const errorMsg = error instanceof Error ? error.message : String(error);
|
|
232
|
+
console.error(`\x1b[33m[Compaction] LLM summarization error: ${errorMsg}\x1b[0m`);
|
|
233
|
+
// Fall back to simple summarization
|
|
234
|
+
return summarizeMessages(messages);
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
/**
|
|
239
|
+
* Compact content using native module (exposed for direct use)
|
|
240
|
+
*/
|
|
241
|
+
export function compactContentNative(
|
|
242
|
+
content: string,
|
|
243
|
+
maxTokens: number,
|
|
244
|
+
strategy: "truncate" | "summarize" | "extract" = "extract"
|
|
245
|
+
): string | null {
|
|
246
|
+
const native = getNative();
|
|
247
|
+
if (native?.compact_content) {
|
|
248
|
+
return native.compact_content(content, maxTokens, strategy);
|
|
249
|
+
}
|
|
250
|
+
return null;
|
|
251
|
+
}
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Token Estimation - Estimate token counts for messages
|
|
3
|
+
*
|
|
4
|
+
* Uses native Rust module for accurate, code-aware token counting.
|
|
5
|
+
* Falls back to heuristic (~4 chars/token) if native unavailable.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import type { Message, ContentBlock } from "../../types/index.js";
|
|
9
|
+
import { CHARS_PER_TOKEN } from "./constants.js";
|
|
10
|
+
|
|
11
|
+
// Lazy-load native module to avoid circular dependencies
|
|
12
|
+
let _native: { count_tokens?: (text: string) => number } | null = null;
|
|
13
|
+
let _nativeLoadAttempted = false;
|
|
14
|
+
|
|
15
|
+
function getNative(): { count_tokens?: (text: string) => number } | null {
|
|
16
|
+
if (_nativeLoadAttempted) return _native;
|
|
17
|
+
_nativeLoadAttempted = true;
|
|
18
|
+
|
|
19
|
+
try {
|
|
20
|
+
// Dynamic import to avoid bundling issues
|
|
21
|
+
const nativePath = require.resolve("../../../native/index.js");
|
|
22
|
+
_native = require(nativePath).getNative?.() ?? null;
|
|
23
|
+
} catch {
|
|
24
|
+
// Native module not available, use fallback
|
|
25
|
+
_native = null;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
return _native;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* Estimate the number of tokens in a text string.
|
|
33
|
+
* Uses native code-aware tokenizer if available, falls back to heuristic.
|
|
34
|
+
*/
|
|
35
|
+
export function estimateTokens(text: string): number {
|
|
36
|
+
if (!text || text.length === 0) return 0;
|
|
37
|
+
|
|
38
|
+
// Try native module first (more accurate, code-aware)
|
|
39
|
+
const native = getNative();
|
|
40
|
+
if (native?.count_tokens) {
|
|
41
|
+
return native.count_tokens(text);
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
// Fallback: simple heuristic
|
|
45
|
+
return Math.ceil(text.length / CHARS_PER_TOKEN);
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Estimate tokens for a single content block
|
|
50
|
+
*/
|
|
51
|
+
export function estimateBlockTokens(block: ContentBlock): number {
|
|
52
|
+
switch (block.type) {
|
|
53
|
+
case "text":
|
|
54
|
+
return estimateTokens(block.text);
|
|
55
|
+
case "image":
|
|
56
|
+
// Images are roughly 85-110 tokens for standard sizes
|
|
57
|
+
// Use 100 as an average estimate
|
|
58
|
+
return 100;
|
|
59
|
+
case "tool_use":
|
|
60
|
+
// Tool use: name + JSON input
|
|
61
|
+
const toolInput = JSON.stringify(block.input);
|
|
62
|
+
return estimateTokens(block.name) + estimateTokens(toolInput) + 10; // overhead
|
|
63
|
+
case "tool_result":
|
|
64
|
+
// Tool result: content + metadata
|
|
65
|
+
if (typeof block.content === "string") {
|
|
66
|
+
return estimateTokens(block.content) + 10;
|
|
67
|
+
}
|
|
68
|
+
return block.content.reduce((sum, b) => sum + estimateBlockTokens(b), 0) + 10;
|
|
69
|
+
case "thinking":
|
|
70
|
+
return estimateTokens(block.thinking);
|
|
71
|
+
default:
|
|
72
|
+
return 0;
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Estimate the total number of tokens in a message
|
|
78
|
+
*/
|
|
79
|
+
export function estimateMessageTokens(message: Message): number {
|
|
80
|
+
// Role overhead (~4 tokens)
|
|
81
|
+
const roleOverhead = 4;
|
|
82
|
+
|
|
83
|
+
// Sum all content blocks
|
|
84
|
+
const contentTokens = message.content.reduce(
|
|
85
|
+
(sum, block) => sum + estimateBlockTokens(block),
|
|
86
|
+
0
|
|
87
|
+
);
|
|
88
|
+
|
|
89
|
+
return roleOverhead + contentTokens;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* Get total estimated tokens across all messages
|
|
94
|
+
*/
|
|
95
|
+
export function estimateMessagesTokens(messages: Message[]): number {
|
|
96
|
+
if (!messages || messages.length === 0) return 0;
|
|
97
|
+
return messages.reduce((sum, msg) => sum + estimateMessageTokens(msg), 0);
|
|
98
|
+
}
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Context Types - Type definitions for context compaction
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import type { ContentBlock, Message } from "../../types/index.js";
|
|
6
|
+
|
|
7
|
+
/** Options for LLM-based summarization */
|
|
8
|
+
export interface LLMSummarizationOptions {
|
|
9
|
+
/** API key for the LLM */
|
|
10
|
+
apiKey?: string;
|
|
11
|
+
/** Model to use for summarization (default: haiku) */
|
|
12
|
+
model?: string;
|
|
13
|
+
/** Base URL for API (for non-Anthropic providers) */
|
|
14
|
+
baseUrl?: string;
|
|
15
|
+
/** Timeout in ms (default: 30000) */
|
|
16
|
+
timeout?: number;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
/** Options for message compaction */
|
|
20
|
+
export interface CompactionOptions {
|
|
21
|
+
/** Number of initial messages to keep unchanged */
|
|
22
|
+
keepFirst?: number;
|
|
23
|
+
/** Number of recent messages to keep unchanged */
|
|
24
|
+
keepLast?: number;
|
|
25
|
+
/** Whether to preserve tool_use/tool_result pairs */
|
|
26
|
+
preserveToolPairs?: boolean;
|
|
27
|
+
/** Use LLM for summarization (default: true if API key available) */
|
|
28
|
+
useLLMSummarization?: boolean;
|
|
29
|
+
/** API key for LLM summarization (falls back to env) */
|
|
30
|
+
apiKey?: string;
|
|
31
|
+
/** Base URL for API (for non-Anthropic providers like Z.AI) */
|
|
32
|
+
baseUrl?: string;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
/** Result of a compaction operation */
|
|
36
|
+
export interface CompactionResult {
|
|
37
|
+
/** The compacted messages */
|
|
38
|
+
messages: Message[];
|
|
39
|
+
/** Number of messages removed */
|
|
40
|
+
messagesRemoved: number;
|
|
41
|
+
/** Estimated tokens before compaction */
|
|
42
|
+
tokensBefore: number;
|
|
43
|
+
/** Estimated tokens after compaction */
|
|
44
|
+
tokensAfter: number;
|
|
45
|
+
/** Whether compaction actually occurred */
|
|
46
|
+
didCompact: boolean;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
/** Statistics from compaction */
|
|
50
|
+
export interface CompactionStats {
|
|
51
|
+
reductionPercent: number;
|
|
52
|
+
tokensSaved: number;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
/** Tool use/result pair for preservation */
|
|
56
|
+
export interface ToolPair {
|
|
57
|
+
use: { type: "tool_use"; id: string; name: string; input: unknown };
|
|
58
|
+
result?: { type: "tool_result"; tool_use_id: string; content: string | ContentBlock[]; is_error?: boolean };
|
|
59
|
+
}
|
|
@@ -28,7 +28,7 @@ export interface ModelDefinition {
|
|
|
28
28
|
/** Whether model supports extended thinking */
|
|
29
29
|
supportsThinking: boolean;
|
|
30
30
|
/** Model provider */
|
|
31
|
-
provider: "anthropic" | "zhipu" | "openai" | "other";
|
|
31
|
+
provider: "anthropic" | "zhipu" | "minimax" | "openai" | "other";
|
|
32
32
|
/** Whether model supports vision/images */
|
|
33
33
|
supportsVision: boolean;
|
|
34
34
|
}
|
|
@@ -164,18 +164,63 @@ export const MODELS: Record<string, ModelDefinition> = {
|
|
|
164
164
|
supportsVision: true,
|
|
165
165
|
},
|
|
166
166
|
|
|
167
|
-
// GLM Series (Zhipu AI)
|
|
167
|
+
// GLM Series (Zhipu AI - Coding Plan with Shared Quota)
|
|
168
|
+
// Note: GLM-5 consumes quota at 3x (peak) and 2x (off-peak)
|
|
168
169
|
"glm-5": {
|
|
169
170
|
id: "glm-5",
|
|
170
171
|
name: "GLM-5",
|
|
171
172
|
fullName: "GLM-5",
|
|
172
173
|
contextWindow: 200_000,
|
|
173
174
|
maxOutput: 128_000,
|
|
174
|
-
pricing: { input: 0.5, output: 0.5, cacheWrite: 0, cacheRead: 0 },
|
|
175
|
+
pricing: { input: 0.5, output: 0.5, cacheWrite: 0, cacheRead: 0 },
|
|
175
176
|
supportsThinking: true,
|
|
176
177
|
provider: "zhipu",
|
|
177
178
|
supportsVision: true,
|
|
178
179
|
},
|
|
180
|
+
"glm-4.7": {
|
|
181
|
+
id: "glm-4.7",
|
|
182
|
+
name: "GLM-4.7",
|
|
183
|
+
fullName: "GLM-4.7",
|
|
184
|
+
contextWindow: 128_000,
|
|
185
|
+
maxOutput: 8_192,
|
|
186
|
+
pricing: { input: 0.5, output: 0.5, cacheWrite: 0, cacheRead: 0 },
|
|
187
|
+
supportsThinking: true,
|
|
188
|
+
provider: "zhipu",
|
|
189
|
+
supportsVision: true,
|
|
190
|
+
},
|
|
191
|
+
"glm-4.6": {
|
|
192
|
+
id: "glm-4.6",
|
|
193
|
+
name: "GLM-4.6",
|
|
194
|
+
fullName: "GLM-4.6",
|
|
195
|
+
contextWindow: 128_000,
|
|
196
|
+
maxOutput: 8_192,
|
|
197
|
+
pricing: { input: 0.5, output: 0.5, cacheWrite: 0, cacheRead: 0 },
|
|
198
|
+
supportsThinking: true,
|
|
199
|
+
provider: "zhipu",
|
|
200
|
+
supportsVision: true,
|
|
201
|
+
},
|
|
202
|
+
"glm-4.5v": {
|
|
203
|
+
id: "glm-4.5v",
|
|
204
|
+
name: "GLM-4.5V",
|
|
205
|
+
fullName: "GLM-4.5 Vision",
|
|
206
|
+
contextWindow: 128_000,
|
|
207
|
+
maxOutput: 4_096,
|
|
208
|
+
pricing: { input: 0.5, output: 0.5, cacheWrite: 0, cacheRead: 0 },
|
|
209
|
+
supportsThinking: false,
|
|
210
|
+
provider: "zhipu",
|
|
211
|
+
supportsVision: true,
|
|
212
|
+
},
|
|
213
|
+
"glm-4.5": {
|
|
214
|
+
id: "glm-4.5",
|
|
215
|
+
name: "GLM-4.5",
|
|
216
|
+
fullName: "GLM-4.5",
|
|
217
|
+
contextWindow: 128_000,
|
|
218
|
+
maxOutput: 4_096,
|
|
219
|
+
pricing: { input: 0.5, output: 0.5, cacheWrite: 0, cacheRead: 0 },
|
|
220
|
+
supportsThinking: false,
|
|
221
|
+
provider: "zhipu",
|
|
222
|
+
supportsVision: false,
|
|
223
|
+
},
|
|
179
224
|
"glm-4.5-air": {
|
|
180
225
|
id: "glm-4.5-air",
|
|
181
226
|
name: "GLM-4.5 Air",
|
|
@@ -187,6 +232,19 @@ export const MODELS: Record<string, ModelDefinition> = {
|
|
|
187
232
|
provider: "zhipu",
|
|
188
233
|
supportsVision: false,
|
|
189
234
|
},
|
|
235
|
+
|
|
236
|
+
// MiniMax Series
|
|
237
|
+
"minimax-m2.5": {
|
|
238
|
+
id: "MiniMax-M2.5",
|
|
239
|
+
name: "M2.5",
|
|
240
|
+
fullName: "MiniMax M2.5",
|
|
241
|
+
contextWindow: 200_000,
|
|
242
|
+
maxOutput: 32_000,
|
|
243
|
+
pricing: { input: 0.3, output: 0.9, cacheWrite: 0, cacheRead: 0 },
|
|
244
|
+
supportsThinking: false,
|
|
245
|
+
provider: "minimax",
|
|
246
|
+
supportsVision: true,
|
|
247
|
+
},
|
|
190
248
|
};
|
|
191
249
|
|
|
192
250
|
// ============================================
|
|
@@ -195,19 +253,38 @@ export const MODELS: Record<string, ModelDefinition> = {
|
|
|
195
253
|
|
|
196
254
|
/** Models available for interactive use (shown in /models command) */
|
|
197
255
|
export const AVAILABLE_MODELS = [
|
|
256
|
+
// Claude 4.x (Anthropic - stub)
|
|
198
257
|
MODELS["claude-opus-4-6"]!,
|
|
199
258
|
MODELS["claude-sonnet-4-6"]!,
|
|
200
259
|
MODELS["claude-haiku-4-5"]!,
|
|
260
|
+
// GLM Series (Zhipu / Z.AI - coding plan)
|
|
201
261
|
MODELS["glm-5"]!,
|
|
262
|
+
MODELS["glm-4.7"]!,
|
|
263
|
+
MODELS["glm-4.5v"]!,
|
|
264
|
+
// MiniMax Series
|
|
265
|
+
MODELS["minimax-m2.5"]!,
|
|
202
266
|
] as const;
|
|
203
267
|
|
|
204
268
|
/** Model aliases for subagent tasks */
|
|
205
269
|
export const MODEL_ALIASES = {
|
|
270
|
+
// Claude aliases
|
|
206
271
|
haiku: "claude-haiku-4-5",
|
|
207
272
|
sonnet: "claude-sonnet-4-6",
|
|
208
273
|
opus: "claude-opus-4-6",
|
|
209
|
-
fast: "glm-4.5-air",
|
|
210
274
|
default: "claude-sonnet-4-6",
|
|
275
|
+
// GLM aliases (Zhipu / Z.AI)
|
|
276
|
+
glm: "glm-5",
|
|
277
|
+
glm5: "glm-5",
|
|
278
|
+
glm4: "glm-4.7",
|
|
279
|
+
glm47: "glm-4.7",
|
|
280
|
+
glm46: "glm-4.6",
|
|
281
|
+
glm45v: "glm-4.5v",
|
|
282
|
+
glm45: "glm-4.5",
|
|
283
|
+
glm45air: "glm-4.5-air",
|
|
284
|
+
fast: "glm-4.5-air",
|
|
285
|
+
// MiniMax aliases
|
|
286
|
+
minimax: "minimax-m2.5",
|
|
287
|
+
m25: "minimax-m2.5",
|
|
211
288
|
} as const;
|
|
212
289
|
|
|
213
290
|
// ============================================
|