@llmtune/cli 0.1.3 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/dist/agent/loop.d.ts +5 -0
- package/dist/agent/loop.js +163 -89
- package/dist/auth/client.js +9 -0
- package/dist/compact/auto-compact.d.ts +25 -0
- package/dist/compact/auto-compact.js +65 -0
- package/dist/compact/budget.d.ts +8 -0
- package/dist/compact/budget.js +36 -0
- package/dist/compact/service.d.ts +10 -2
- package/dist/compact/service.js +78 -19
- package/dist/context/agent-identity.js +2 -1
- package/dist/marketplace/client.js +18 -6
- package/dist/memory/service.d.ts +3 -0
- package/dist/memory/service.js +16 -0
- package/dist/repl/repl.js +15 -8
- package/llmtune-session-1780260929719.json +6 -0
- package/package.json +3 -2
- package/scripts/qa-full.js +440 -0
- package/scripts/smoke-test.js +142 -0
package/README.md
CHANGED
|
@@ -6,10 +6,10 @@ AI CLI Agent for your terminal, powered by [llmtune.io](https://llmtune.io).
|
|
|
6
6
|
|
|
7
7
|
```bash
|
|
8
8
|
# Install globally
|
|
9
|
-
npm install -g llmtune
|
|
9
|
+
npm install -g @llmtune/cli
|
|
10
10
|
|
|
11
11
|
# Or run directly
|
|
12
|
-
npx llmtune
|
|
12
|
+
npx @llmtune/cli
|
|
13
13
|
|
|
14
14
|
# Configure your API key
|
|
15
15
|
llmtune login
|
package/dist/agent/loop.d.ts
CHANGED
|
@@ -1,12 +1,17 @@
|
|
|
1
1
|
import OpenAI from "openai";
|
|
2
2
|
import { ToolRegistry } from "../tools/registry";
|
|
3
3
|
import { Conversation } from "./conversation";
|
|
4
|
+
import { PermissionManager } from "../tools/permissions";
|
|
4
5
|
export interface AgentLoopConfig {
|
|
5
6
|
model?: string;
|
|
6
7
|
maxTurns?: number;
|
|
7
8
|
verbose?: boolean;
|
|
9
|
+
stream?: boolean;
|
|
8
10
|
cwd: string;
|
|
9
11
|
workspaceRoot: string;
|
|
12
|
+
permissions?: PermissionManager;
|
|
13
|
+
/** When true, skip adding userInput (already in conversation). */
|
|
14
|
+
skipUserInput?: boolean;
|
|
10
15
|
}
|
|
11
16
|
export interface AgentLoopResult {
|
|
12
17
|
finalText: string;
|
package/dist/agent/loop.js
CHANGED
|
@@ -5,11 +5,18 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
5
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
6
|
exports.runAgentLoop = runAgentLoop;
|
|
7
7
|
const builder_1 = require("../context/builder");
|
|
8
|
+
const auto_compact_1 = require("../compact/auto-compact");
|
|
9
|
+
const service_1 = require("../memory/service");
|
|
10
|
+
const tokens_1 = require("../utils/tokens");
|
|
8
11
|
const chalk_1 = __importDefault(require("chalk"));
|
|
9
12
|
async function runAgentLoop(client, conversation, registry, userInput, config, onTextChunk) {
|
|
10
13
|
const model = config.model ?? "z-ai/GLM-5.1";
|
|
11
14
|
const maxTurns = config.maxTurns ?? 20;
|
|
12
|
-
|
|
15
|
+
const useStream = config.stream !== false;
|
|
16
|
+
if (!config.skipUserInput) {
|
|
17
|
+
conversation.addUserMessage(userInput);
|
|
18
|
+
(0, service_1.saveActiveTask)(userInput);
|
|
19
|
+
}
|
|
13
20
|
const toolSpecs = registry.listSpecs();
|
|
14
21
|
const openaiTools = toolSpecs.map((spec) => ({
|
|
15
22
|
type: "function",
|
|
@@ -22,101 +29,32 @@ async function runAgentLoop(client, conversation, registry, userInput, config, o
|
|
|
22
29
|
const contextResult = await (0, builder_1.buildContextPrompt)(config.workspaceRoot, config.cwd, {
|
|
23
30
|
model: config.model,
|
|
24
31
|
});
|
|
25
|
-
const
|
|
32
|
+
const memoryPrompt = (0, service_1.buildMemoryPrompt)();
|
|
33
|
+
const contextPrompt = memoryPrompt
|
|
34
|
+
? `${contextResult.prompt}\n\n${memoryPrompt}`
|
|
35
|
+
: contextResult.prompt;
|
|
36
|
+
const toolSchemaTokens = (0, tokens_1.estimateTokens)(JSON.stringify(toolSpecs));
|
|
37
|
+
await (0, auto_compact_1.maybeAutoCompact)({
|
|
38
|
+
client,
|
|
39
|
+
model,
|
|
40
|
+
conversation,
|
|
41
|
+
systemPrompt: contextPrompt,
|
|
42
|
+
toolSchemaTokens,
|
|
43
|
+
});
|
|
26
44
|
let totalToolCalls = 0;
|
|
27
45
|
let totalTokensIn = 0;
|
|
28
46
|
let totalTokensOut = 0;
|
|
29
47
|
let turns = 0;
|
|
30
48
|
let finalText = "";
|
|
31
49
|
for (let turn = 0; turn < maxTurns; turn++) {
|
|
32
|
-
const
|
|
33
|
-
const
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
systemMessage,
|
|
39
|
-
...apiMessages.map((msg) => {
|
|
40
|
-
if (msg.role === "system")
|
|
41
|
-
return { role: "system", content: msg.content };
|
|
42
|
-
if (msg.role === "user")
|
|
43
|
-
return { role: "user", content: msg.content };
|
|
44
|
-
if (msg.role === "assistant") {
|
|
45
|
-
const m = {
|
|
46
|
-
role: "assistant",
|
|
47
|
-
content: msg.content || null,
|
|
48
|
-
};
|
|
49
|
-
if (msg.toolCalls && msg.toolCalls.length > 0) {
|
|
50
|
-
m.tool_calls = msg.toolCalls.map((tc) => ({
|
|
51
|
-
id: tc.id,
|
|
52
|
-
type: "function",
|
|
53
|
-
function: { name: tc.function.name, arguments: tc.function.arguments },
|
|
54
|
-
}));
|
|
55
|
-
}
|
|
56
|
-
return m;
|
|
57
|
-
}
|
|
58
|
-
if (msg.role === "tool") {
|
|
59
|
-
return {
|
|
60
|
-
role: "tool",
|
|
61
|
-
tool_call_id: msg.toolCallId ?? "",
|
|
62
|
-
content: typeof msg.content === "string" ? msg.content : JSON.stringify(msg.content),
|
|
63
|
-
};
|
|
64
|
-
}
|
|
65
|
-
return { role: "user", content: msg.content };
|
|
66
|
-
}),
|
|
67
|
-
];
|
|
68
|
-
const stream = await client.chat.completions.create({
|
|
69
|
-
model,
|
|
70
|
-
messages: allMessages,
|
|
71
|
-
tools: openaiTools.length > 0 ? openaiTools : undefined,
|
|
72
|
-
stream: true,
|
|
73
|
-
temperature: 0.7,
|
|
74
|
-
max_tokens: 16384,
|
|
75
|
-
});
|
|
76
|
-
let assistantContent = "";
|
|
77
|
-
const toolCalls = [];
|
|
78
|
-
let currentToolCall = null;
|
|
79
|
-
for await (const chunk of stream) {
|
|
80
|
-
const delta = chunk.choices[0]?.delta;
|
|
81
|
-
if (!delta)
|
|
82
|
-
continue;
|
|
83
|
-
if (delta.content) {
|
|
84
|
-
assistantContent += delta.content;
|
|
85
|
-
if (onTextChunk)
|
|
86
|
-
onTextChunk(delta.content);
|
|
87
|
-
else
|
|
88
|
-
process.stdout.write(delta.content);
|
|
89
|
-
}
|
|
90
|
-
if (delta.tool_calls) {
|
|
91
|
-
for (const tc of delta.tool_calls) {
|
|
92
|
-
if (tc.id && tc.function?.name) {
|
|
93
|
-
currentToolCall = {
|
|
94
|
-
id: tc.id,
|
|
95
|
-
name: tc.function.name,
|
|
96
|
-
arguments: tc.function.arguments ?? "",
|
|
97
|
-
};
|
|
98
|
-
toolCalls.push({
|
|
99
|
-
id: tc.id,
|
|
100
|
-
type: "function",
|
|
101
|
-
function: { name: tc.function.name, arguments: tc.function.arguments ?? "" },
|
|
102
|
-
});
|
|
103
|
-
}
|
|
104
|
-
else if (currentToolCall && tc.function?.arguments) {
|
|
105
|
-
currentToolCall.arguments += tc.function.arguments;
|
|
106
|
-
const last = toolCalls[toolCalls.length - 1];
|
|
107
|
-
if (last)
|
|
108
|
-
last.function.arguments = currentToolCall.arguments;
|
|
109
|
-
}
|
|
110
|
-
}
|
|
111
|
-
}
|
|
112
|
-
if (chunk.usage) {
|
|
113
|
-
totalTokensIn += chunk.usage.prompt_tokens ?? 0;
|
|
114
|
-
totalTokensOut += chunk.usage.completion_tokens ?? 0;
|
|
115
|
-
}
|
|
116
|
-
}
|
|
117
|
-
if (!onTextChunk)
|
|
118
|
-
console.log();
|
|
50
|
+
const allMessages = buildApiMessages(conversation, contextPrompt);
|
|
51
|
+
const turnResult = useStream
|
|
52
|
+
? await runStreamingTurn(client, model, allMessages, openaiTools, onTextChunk)
|
|
53
|
+
: await runBufferedTurn(client, model, allMessages, openaiTools);
|
|
54
|
+
totalTokensIn += turnResult.tokensIn;
|
|
55
|
+
totalTokensOut += turnResult.tokensOut;
|
|
119
56
|
turns++;
|
|
57
|
+
const { assistantContent, toolCalls } = turnResult;
|
|
120
58
|
if (toolCalls.length === 0) {
|
|
121
59
|
conversation.addAssistantMessage(assistantContent);
|
|
122
60
|
finalText = assistantContent;
|
|
@@ -134,6 +72,18 @@ async function runAgentLoop(client, conversation, registry, userInput, config, o
|
|
|
134
72
|
}
|
|
135
73
|
const summary = summarizeToolInput(tc.function.name, toolInput);
|
|
136
74
|
console.log(chalk_1.default.cyan(` ▶ ${tc.function.name}`) + chalk_1.default.dim(` ${summary}`));
|
|
75
|
+
if (config.permissions) {
|
|
76
|
+
const tool = registry.get(tc.function.name);
|
|
77
|
+
const spec = tool?.spec();
|
|
78
|
+
const isDestructive = spec?.isDestructive === true;
|
|
79
|
+
const perm = await config.permissions.check(tc.function.name, toolInput, isDestructive);
|
|
80
|
+
if (perm.behavior === "deny") {
|
|
81
|
+
const denyMsg = perm.message ?? "User denied tool execution";
|
|
82
|
+
console.log(chalk_1.default.yellow(` ⊘ ${tc.function.name}: ${denyMsg}`));
|
|
83
|
+
conversation.addToolResult(tc.id, `Denied: ${denyMsg}`);
|
|
84
|
+
continue;
|
|
85
|
+
}
|
|
86
|
+
}
|
|
137
87
|
const toolCtx = {
|
|
138
88
|
workspaceRoot: config.workspaceRoot,
|
|
139
89
|
cwd: config.cwd,
|
|
@@ -154,6 +104,130 @@ async function runAgentLoop(client, conversation, registry, userInput, config, o
|
|
|
154
104
|
}
|
|
155
105
|
return { finalText, totalToolCalls, totalTokensIn, totalTokensOut, turns };
|
|
156
106
|
}
|
|
107
|
+
function buildApiMessages(conversation, contextPrompt) {
|
|
108
|
+
const systemMessage = {
|
|
109
|
+
role: "system",
|
|
110
|
+
content: contextPrompt,
|
|
111
|
+
};
|
|
112
|
+
return [
|
|
113
|
+
systemMessage,
|
|
114
|
+
...conversation.getApiMessages().map((msg) => {
|
|
115
|
+
if (msg.role === "system")
|
|
116
|
+
return { role: "system", content: msg.content };
|
|
117
|
+
if (msg.role === "user")
|
|
118
|
+
return { role: "user", content: msg.content };
|
|
119
|
+
if (msg.role === "assistant") {
|
|
120
|
+
const m = {
|
|
121
|
+
role: "assistant",
|
|
122
|
+
content: msg.content || null,
|
|
123
|
+
};
|
|
124
|
+
if (msg.toolCalls && msg.toolCalls.length > 0) {
|
|
125
|
+
m.tool_calls = msg.toolCalls.map((tc) => ({
|
|
126
|
+
id: tc.id,
|
|
127
|
+
type: "function",
|
|
128
|
+
function: { name: tc.function.name, arguments: tc.function.arguments },
|
|
129
|
+
}));
|
|
130
|
+
}
|
|
131
|
+
return m;
|
|
132
|
+
}
|
|
133
|
+
if (msg.role === "tool") {
|
|
134
|
+
return {
|
|
135
|
+
role: "tool",
|
|
136
|
+
tool_call_id: msg.toolCallId ?? "",
|
|
137
|
+
content: typeof msg.content === "string" ? msg.content : JSON.stringify(msg.content),
|
|
138
|
+
};
|
|
139
|
+
}
|
|
140
|
+
return { role: "user", content: msg.content };
|
|
141
|
+
}),
|
|
142
|
+
];
|
|
143
|
+
}
|
|
144
|
+
async function runStreamingTurn(client, model, messages, openaiTools, onTextChunk) {
|
|
145
|
+
const stream = await client.chat.completions.create({
|
|
146
|
+
model,
|
|
147
|
+
messages,
|
|
148
|
+
tools: openaiTools.length > 0 ? openaiTools : undefined,
|
|
149
|
+
stream: true,
|
|
150
|
+
temperature: 0.7,
|
|
151
|
+
max_tokens: 16384,
|
|
152
|
+
});
|
|
153
|
+
let assistantContent = "";
|
|
154
|
+
const toolCalls = [];
|
|
155
|
+
let currentToolCall = null;
|
|
156
|
+
let tokensIn = 0;
|
|
157
|
+
let tokensOut = 0;
|
|
158
|
+
for await (const chunk of stream) {
|
|
159
|
+
const delta = chunk.choices[0]?.delta;
|
|
160
|
+
if (!delta)
|
|
161
|
+
continue;
|
|
162
|
+
if (delta.content) {
|
|
163
|
+
assistantContent += delta.content;
|
|
164
|
+
if (onTextChunk)
|
|
165
|
+
onTextChunk(delta.content);
|
|
166
|
+
else
|
|
167
|
+
process.stdout.write(delta.content);
|
|
168
|
+
}
|
|
169
|
+
if (delta.tool_calls) {
|
|
170
|
+
for (const tc of delta.tool_calls) {
|
|
171
|
+
if (tc.id && tc.function?.name) {
|
|
172
|
+
currentToolCall = {
|
|
173
|
+
id: tc.id,
|
|
174
|
+
name: tc.function.name,
|
|
175
|
+
arguments: tc.function.arguments ?? "",
|
|
176
|
+
};
|
|
177
|
+
toolCalls.push({
|
|
178
|
+
id: tc.id,
|
|
179
|
+
type: "function",
|
|
180
|
+
function: { name: tc.function.name, arguments: tc.function.arguments ?? "" },
|
|
181
|
+
});
|
|
182
|
+
}
|
|
183
|
+
else if (currentToolCall && tc.function?.arguments) {
|
|
184
|
+
currentToolCall.arguments += tc.function.arguments;
|
|
185
|
+
const last = toolCalls[toolCalls.length - 1];
|
|
186
|
+
if (last)
|
|
187
|
+
last.function.arguments = currentToolCall.arguments;
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
if (chunk.usage) {
|
|
192
|
+
tokensIn += chunk.usage.prompt_tokens ?? 0;
|
|
193
|
+
tokensOut += chunk.usage.completion_tokens ?? 0;
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
if (!onTextChunk)
|
|
197
|
+
console.log();
|
|
198
|
+
return { assistantContent, toolCalls, tokensIn, tokensOut };
|
|
199
|
+
}
|
|
200
|
+
async function runBufferedTurn(client, model, messages, openaiTools) {
|
|
201
|
+
const response = await client.chat.completions.create({
|
|
202
|
+
model,
|
|
203
|
+
messages,
|
|
204
|
+
tools: openaiTools.length > 0 ? openaiTools : undefined,
|
|
205
|
+
stream: false,
|
|
206
|
+
temperature: 0.7,
|
|
207
|
+
max_tokens: 16384,
|
|
208
|
+
});
|
|
209
|
+
const choice = response.choices[0];
|
|
210
|
+
const msg = choice?.message;
|
|
211
|
+
const assistantContent = msg?.content ?? "";
|
|
212
|
+
const toolCalls = (msg?.tool_calls ?? []).map((tc) => ({
|
|
213
|
+
id: tc.id,
|
|
214
|
+
type: "function",
|
|
215
|
+
function: {
|
|
216
|
+
name: tc.function.name,
|
|
217
|
+
arguments: tc.function.arguments ?? "",
|
|
218
|
+
},
|
|
219
|
+
}));
|
|
220
|
+
if (assistantContent) {
|
|
221
|
+
process.stdout.write(assistantContent);
|
|
222
|
+
console.log();
|
|
223
|
+
}
|
|
224
|
+
return {
|
|
225
|
+
assistantContent,
|
|
226
|
+
toolCalls,
|
|
227
|
+
tokensIn: response.usage?.prompt_tokens ?? 0,
|
|
228
|
+
tokensOut: response.usage?.completion_tokens ?? 0,
|
|
229
|
+
};
|
|
230
|
+
}
|
|
157
231
|
function summarizeToolInput(name, input) {
|
|
158
232
|
const n = name.toLowerCase();
|
|
159
233
|
if (n === "bash") {
|
package/dist/auth/client.js
CHANGED
|
@@ -7,15 +7,24 @@ exports.createClient = createClient;
|
|
|
7
7
|
exports.getDefaultModel = getDefaultModel;
|
|
8
8
|
const openai_1 = __importDefault(require("openai"));
|
|
9
9
|
const config_1 = require("./config");
|
|
10
|
+
const version_1 = require("../version");
|
|
10
11
|
function createClient() {
|
|
11
12
|
const apiKey = (0, config_1.loadConfig)().apiKey;
|
|
12
13
|
if (!apiKey) {
|
|
13
14
|
console.error("Not logged in. Run: llmtune login");
|
|
14
15
|
process.exit(1);
|
|
15
16
|
}
|
|
17
|
+
const cwd = process.cwd();
|
|
16
18
|
return new openai_1.default({
|
|
17
19
|
apiKey,
|
|
18
20
|
baseURL: (0, config_1.getApiBase)(),
|
|
21
|
+
defaultHeaders: {
|
|
22
|
+
"X-LLMTune-Client": "cli",
|
|
23
|
+
"X-LLMTune-CLI-Version": version_1.CLI_VERSION,
|
|
24
|
+
"X-LLMTune-Context-Managed": "true",
|
|
25
|
+
"X-Workspace-Root": cwd,
|
|
26
|
+
"X-CWD": cwd,
|
|
27
|
+
},
|
|
19
28
|
});
|
|
20
29
|
}
|
|
21
30
|
function getDefaultModel() {
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import OpenAI from "openai";
|
|
2
|
+
import { Conversation } from "../agent/conversation";
|
|
3
|
+
import { type CompactResult } from "./service";
|
|
4
|
+
export interface AutoCompactOptions {
|
|
5
|
+
client: OpenAI;
|
|
6
|
+
model: string;
|
|
7
|
+
conversation: Conversation;
|
|
8
|
+
systemPrompt: string;
|
|
9
|
+
toolSchemaTokens: number;
|
|
10
|
+
minMessages?: number;
|
|
11
|
+
}
|
|
12
|
+
export interface AutoCompactResult {
|
|
13
|
+
compacted: boolean;
|
|
14
|
+
result?: CompactResult;
|
|
15
|
+
microcompactTokensSaved: number;
|
|
16
|
+
estimatedTokens: number;
|
|
17
|
+
threshold: number;
|
|
18
|
+
}
|
|
19
|
+
export declare function estimateSessionTokens(conversation: Conversation, systemPrompt: string, toolSchemaTokens: number): number;
|
|
20
|
+
/**
|
|
21
|
+
* Microcompact verbose tool results, then LLM-summarize older messages if still over budget.
|
|
22
|
+
*/
|
|
23
|
+
export declare function maybeAutoCompact(options: AutoCompactOptions): Promise<AutoCompactResult>;
|
|
24
|
+
export declare function printCompactionNotice(result: CompactResult, trigger: "manual" | "auto", activeTask?: string): void;
|
|
25
|
+
//# sourceMappingURL=auto-compact.d.ts.map
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.estimateSessionTokens = estimateSessionTokens;
|
|
7
|
+
exports.maybeAutoCompact = maybeAutoCompact;
|
|
8
|
+
exports.printCompactionNotice = printCompactionNotice;
|
|
9
|
+
const chalk_1 = __importDefault(require("chalk"));
|
|
10
|
+
const service_1 = require("./service");
|
|
11
|
+
const microcompact_1 = require("./microcompact");
|
|
12
|
+
const tokens_1 = require("../utils/tokens");
|
|
13
|
+
const budget_1 = require("./budget");
|
|
14
|
+
const service_2 = require("../memory/service");
|
|
15
|
+
function estimateSessionTokens(conversation, systemPrompt, toolSchemaTokens) {
|
|
16
|
+
const messageTokens = (0, tokens_1.estimateMessagesTokens)(conversation.messages.map((m) => ({
|
|
17
|
+
role: m.role,
|
|
18
|
+
content: m.content,
|
|
19
|
+
})));
|
|
20
|
+
return (0, tokens_1.estimateTokens)(systemPrompt) + toolSchemaTokens + messageTokens;
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Microcompact verbose tool results, then LLM-summarize older messages if still over budget.
|
|
24
|
+
*/
|
|
25
|
+
async function maybeAutoCompact(options) {
|
|
26
|
+
const { client, model, conversation, systemPrompt, toolSchemaTokens, minMessages = 8, } = options;
|
|
27
|
+
const threshold = (0, budget_1.getCompactThreshold)(model);
|
|
28
|
+
let microcompactTokensSaved = 0;
|
|
29
|
+
const { compacted: microcompacted, tokensSaved } = (0, microcompact_1.microcompactMessages)(conversation.messages);
|
|
30
|
+
if (tokensSaved > 0) {
|
|
31
|
+
conversation.messages.length = 0;
|
|
32
|
+
conversation.messages.push(...microcompacted);
|
|
33
|
+
microcompactTokensSaved = tokensSaved;
|
|
34
|
+
}
|
|
35
|
+
let estimatedTokens = estimateSessionTokens(conversation, systemPrompt, toolSchemaTokens);
|
|
36
|
+
if (conversation.messages.length < minMessages || estimatedTokens <= threshold) {
|
|
37
|
+
return { compacted: false, microcompactTokensSaved, estimatedTokens, threshold };
|
|
38
|
+
}
|
|
39
|
+
const activeTask = (0, service_2.getActiveTask)();
|
|
40
|
+
const result = await (0, service_1.compactConversation)(client, model, conversation, undefined, {
|
|
41
|
+
trigger: "auto",
|
|
42
|
+
activeTask,
|
|
43
|
+
});
|
|
44
|
+
estimatedTokens = estimateSessionTokens(conversation, systemPrompt, toolSchemaTokens);
|
|
45
|
+
printCompactionNotice(result, "auto", activeTask);
|
|
46
|
+
return {
|
|
47
|
+
compacted: true,
|
|
48
|
+
result,
|
|
49
|
+
microcompactTokensSaved,
|
|
50
|
+
estimatedTokens,
|
|
51
|
+
threshold,
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
function printCompactionNotice(result, trigger, activeTask) {
|
|
55
|
+
const label = trigger === "auto" ? "Auto-compacted" : "Compacted";
|
|
56
|
+
console.log("");
|
|
57
|
+
console.log(chalk_1.default.yellow(`⚠ ${label}: ${result.preCompactMessages} messages → ${result.postCompactMessages} ` +
|
|
58
|
+
`(~${result.tokensSaved.toLocaleString()} tokens saved)`));
|
|
59
|
+
if (activeTask) {
|
|
60
|
+
console.log(chalk_1.default.dim(` Active task preserved: "${activeTask.slice(0, 120)}${activeTask.length > 120 ? "..." : ""}"`));
|
|
61
|
+
}
|
|
62
|
+
console.log(chalk_1.default.dim(" Use /uncompact to restore full history."));
|
|
63
|
+
console.log("");
|
|
64
|
+
}
|
|
65
|
+
//# sourceMappingURL=auto-compact.js.map
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Model context windows and auto-compaction thresholds for the CLI.
|
|
3
|
+
*/
|
|
4
|
+
export declare const COMPACT_THRESHOLD_RATIO = 0.75;
|
|
5
|
+
export declare const KEEP_TAIL_MESSAGES = 6;
|
|
6
|
+
export declare function getModelContextWindow(model: string): number;
|
|
7
|
+
export declare function getCompactThreshold(model: string): number;
|
|
8
|
+
//# sourceMappingURL=budget.d.ts.map
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Model context windows and auto-compaction thresholds for the CLI.
|
|
4
|
+
*/
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.KEEP_TAIL_MESSAGES = exports.COMPACT_THRESHOLD_RATIO = void 0;
|
|
7
|
+
exports.getModelContextWindow = getModelContextWindow;
|
|
8
|
+
exports.getCompactThreshold = getCompactThreshold;
|
|
9
|
+
const MODEL_CONTEXT_WINDOWS = {
|
|
10
|
+
"z-ai/GLM-5.1": 128_000,
|
|
11
|
+
"z-ai/glm-5.1": 128_000,
|
|
12
|
+
"gpt-4o": 128_000,
|
|
13
|
+
"gpt-4o-mini": 128_000,
|
|
14
|
+
"claude-sonnet-4": 200_000,
|
|
15
|
+
};
|
|
16
|
+
const DEFAULT_CONTEXT_WINDOW = 64_000;
|
|
17
|
+
exports.COMPACT_THRESHOLD_RATIO = 0.75;
|
|
18
|
+
exports.KEEP_TAIL_MESSAGES = 6;
|
|
19
|
+
function getModelContextWindow(model) {
|
|
20
|
+
const normalized = model.trim().toLowerCase();
|
|
21
|
+
if (MODEL_CONTEXT_WINDOWS[model])
|
|
22
|
+
return MODEL_CONTEXT_WINDOWS[model];
|
|
23
|
+
if (MODEL_CONTEXT_WINDOWS[normalized])
|
|
24
|
+
return MODEL_CONTEXT_WINDOWS[normalized];
|
|
25
|
+
const env = process.env.LLMTUNE_CONTEXT_WINDOW;
|
|
26
|
+
if (env) {
|
|
27
|
+
const parsed = Number.parseInt(env, 10);
|
|
28
|
+
if (Number.isFinite(parsed) && parsed > 0)
|
|
29
|
+
return parsed;
|
|
30
|
+
}
|
|
31
|
+
return DEFAULT_CONTEXT_WINDOW;
|
|
32
|
+
}
|
|
33
|
+
function getCompactThreshold(model) {
|
|
34
|
+
return Math.floor(getModelContextWindow(model) * exports.COMPACT_THRESHOLD_RATIO);
|
|
35
|
+
}
|
|
36
|
+
//# sourceMappingURL=budget.js.map
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import OpenAI from "openai";
|
|
2
|
-
import { Conversation } from "../agent/conversation";
|
|
2
|
+
import { Conversation, type Message } from "../agent/conversation";
|
|
3
3
|
export interface CompactResult {
|
|
4
4
|
tokensSaved: number;
|
|
5
5
|
preCompactTokens: number;
|
|
@@ -7,7 +7,15 @@ export interface CompactResult {
|
|
|
7
7
|
preCompactMessages: number;
|
|
8
8
|
postCompactMessages: number;
|
|
9
9
|
summary: string;
|
|
10
|
+
activeTask?: string;
|
|
11
|
+
trigger: "manual" | "auto";
|
|
10
12
|
}
|
|
11
|
-
export
|
|
13
|
+
export interface CompactOptions {
|
|
14
|
+
trigger?: "manual" | "auto";
|
|
15
|
+
activeTask?: string;
|
|
16
|
+
keepTail?: number;
|
|
17
|
+
}
|
|
18
|
+
export declare function extractActiveTask(messages: Message[]): string;
|
|
19
|
+
export declare function compactConversation(client: OpenAI, model: string, conversation: Conversation, sessionsDir?: string, options?: CompactOptions): Promise<CompactResult>;
|
|
12
20
|
export declare function uncompactConversation(conversation: Conversation, sessionsDir?: string): boolean;
|
|
13
21
|
//# sourceMappingURL=service.d.ts.map
|