@llmtune/cli 0.1.3 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -6,10 +6,10 @@ AI CLI Agent for your terminal, powered by [llmtune.io](https://llmtune.io).
6
6
 
7
7
  ```bash
8
8
  # Install globally
9
- npm install -g llmtune
9
+ npm install -g @llmtune/cli
10
10
 
11
11
  # Or run directly
12
- npx llmtune
12
+ npx @llmtune/cli
13
13
 
14
14
  # Configure your API key
15
15
  llmtune login
@@ -1,12 +1,17 @@
1
1
  import OpenAI from "openai";
2
2
  import { ToolRegistry } from "../tools/registry";
3
3
  import { Conversation } from "./conversation";
4
+ import { PermissionManager } from "../tools/permissions";
4
5
  export interface AgentLoopConfig {
5
6
  model?: string;
6
7
  maxTurns?: number;
7
8
  verbose?: boolean;
9
+ stream?: boolean;
8
10
  cwd: string;
9
11
  workspaceRoot: string;
12
+ permissions?: PermissionManager;
13
+ /** When true, skip adding userInput (already in conversation). */
14
+ skipUserInput?: boolean;
10
15
  }
11
16
  export interface AgentLoopResult {
12
17
  finalText: string;
@@ -5,11 +5,18 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
5
5
  Object.defineProperty(exports, "__esModule", { value: true });
6
6
  exports.runAgentLoop = runAgentLoop;
7
7
  const builder_1 = require("../context/builder");
8
+ const auto_compact_1 = require("../compact/auto-compact");
9
+ const service_1 = require("../memory/service");
10
+ const tokens_1 = require("../utils/tokens");
8
11
  const chalk_1 = __importDefault(require("chalk"));
9
12
  async function runAgentLoop(client, conversation, registry, userInput, config, onTextChunk) {
10
13
  const model = config.model ?? "z-ai/GLM-5.1";
11
14
  const maxTurns = config.maxTurns ?? 20;
12
- conversation.addUserMessage(userInput);
15
+ const useStream = config.stream !== false;
16
+ if (!config.skipUserInput) {
17
+ conversation.addUserMessage(userInput);
18
+ (0, service_1.saveActiveTask)(userInput);
19
+ }
13
20
  const toolSpecs = registry.listSpecs();
14
21
  const openaiTools = toolSpecs.map((spec) => ({
15
22
  type: "function",
@@ -22,101 +29,32 @@ async function runAgentLoop(client, conversation, registry, userInput, config, o
22
29
  const contextResult = await (0, builder_1.buildContextPrompt)(config.workspaceRoot, config.cwd, {
23
30
  model: config.model,
24
31
  });
25
- const contextPrompt = contextResult.prompt;
32
+ const memoryPrompt = (0, service_1.buildMemoryPrompt)();
33
+ const contextPrompt = memoryPrompt
34
+ ? `${contextResult.prompt}\n\n${memoryPrompt}`
35
+ : contextResult.prompt;
36
+ const toolSchemaTokens = (0, tokens_1.estimateTokens)(JSON.stringify(toolSpecs));
37
+ await (0, auto_compact_1.maybeAutoCompact)({
38
+ client,
39
+ model,
40
+ conversation,
41
+ systemPrompt: contextPrompt,
42
+ toolSchemaTokens,
43
+ });
26
44
  let totalToolCalls = 0;
27
45
  let totalTokensIn = 0;
28
46
  let totalTokensOut = 0;
29
47
  let turns = 0;
30
48
  let finalText = "";
31
49
  for (let turn = 0; turn < maxTurns; turn++) {
32
- const apiMessages = conversation.getApiMessages();
33
- const systemMessage = {
34
- role: "system",
35
- content: contextPrompt,
36
- };
37
- const allMessages = [
38
- systemMessage,
39
- ...apiMessages.map((msg) => {
40
- if (msg.role === "system")
41
- return { role: "system", content: msg.content };
42
- if (msg.role === "user")
43
- return { role: "user", content: msg.content };
44
- if (msg.role === "assistant") {
45
- const m = {
46
- role: "assistant",
47
- content: msg.content || null,
48
- };
49
- if (msg.toolCalls && msg.toolCalls.length > 0) {
50
- m.tool_calls = msg.toolCalls.map((tc) => ({
51
- id: tc.id,
52
- type: "function",
53
- function: { name: tc.function.name, arguments: tc.function.arguments },
54
- }));
55
- }
56
- return m;
57
- }
58
- if (msg.role === "tool") {
59
- return {
60
- role: "tool",
61
- tool_call_id: msg.toolCallId ?? "",
62
- content: typeof msg.content === "string" ? msg.content : JSON.stringify(msg.content),
63
- };
64
- }
65
- return { role: "user", content: msg.content };
66
- }),
67
- ];
68
- const stream = await client.chat.completions.create({
69
- model,
70
- messages: allMessages,
71
- tools: openaiTools.length > 0 ? openaiTools : undefined,
72
- stream: true,
73
- temperature: 0.7,
74
- max_tokens: 16384,
75
- });
76
- let assistantContent = "";
77
- const toolCalls = [];
78
- let currentToolCall = null;
79
- for await (const chunk of stream) {
80
- const delta = chunk.choices[0]?.delta;
81
- if (!delta)
82
- continue;
83
- if (delta.content) {
84
- assistantContent += delta.content;
85
- if (onTextChunk)
86
- onTextChunk(delta.content);
87
- else
88
- process.stdout.write(delta.content);
89
- }
90
- if (delta.tool_calls) {
91
- for (const tc of delta.tool_calls) {
92
- if (tc.id && tc.function?.name) {
93
- currentToolCall = {
94
- id: tc.id,
95
- name: tc.function.name,
96
- arguments: tc.function.arguments ?? "",
97
- };
98
- toolCalls.push({
99
- id: tc.id,
100
- type: "function",
101
- function: { name: tc.function.name, arguments: tc.function.arguments ?? "" },
102
- });
103
- }
104
- else if (currentToolCall && tc.function?.arguments) {
105
- currentToolCall.arguments += tc.function.arguments;
106
- const last = toolCalls[toolCalls.length - 1];
107
- if (last)
108
- last.function.arguments = currentToolCall.arguments;
109
- }
110
- }
111
- }
112
- if (chunk.usage) {
113
- totalTokensIn += chunk.usage.prompt_tokens ?? 0;
114
- totalTokensOut += chunk.usage.completion_tokens ?? 0;
115
- }
116
- }
117
- if (!onTextChunk)
118
- console.log();
50
+ const allMessages = buildApiMessages(conversation, contextPrompt);
51
+ const turnResult = useStream
52
+ ? await runStreamingTurn(client, model, allMessages, openaiTools, onTextChunk)
53
+ : await runBufferedTurn(client, model, allMessages, openaiTools);
54
+ totalTokensIn += turnResult.tokensIn;
55
+ totalTokensOut += turnResult.tokensOut;
119
56
  turns++;
57
+ const { assistantContent, toolCalls } = turnResult;
120
58
  if (toolCalls.length === 0) {
121
59
  conversation.addAssistantMessage(assistantContent);
122
60
  finalText = assistantContent;
@@ -134,6 +72,18 @@ async function runAgentLoop(client, conversation, registry, userInput, config, o
134
72
  }
135
73
  const summary = summarizeToolInput(tc.function.name, toolInput);
136
74
  console.log(chalk_1.default.cyan(` ▶ ${tc.function.name}`) + chalk_1.default.dim(` ${summary}`));
75
+ if (config.permissions) {
76
+ const tool = registry.get(tc.function.name);
77
+ const spec = tool?.spec();
78
+ const isDestructive = spec?.isDestructive === true;
79
+ const perm = await config.permissions.check(tc.function.name, toolInput, isDestructive);
80
+ if (perm.behavior === "deny") {
81
+ const denyMsg = perm.message ?? "User denied tool execution";
82
+ console.log(chalk_1.default.yellow(` ⊘ ${tc.function.name}: ${denyMsg}`));
83
+ conversation.addToolResult(tc.id, `Denied: ${denyMsg}`);
84
+ continue;
85
+ }
86
+ }
137
87
  const toolCtx = {
138
88
  workspaceRoot: config.workspaceRoot,
139
89
  cwd: config.cwd,
@@ -154,6 +104,130 @@ async function runAgentLoop(client, conversation, registry, userInput, config, o
154
104
  }
155
105
  return { finalText, totalToolCalls, totalTokensIn, totalTokensOut, turns };
156
106
  }
107
+ function buildApiMessages(conversation, contextPrompt) {
108
+ const systemMessage = {
109
+ role: "system",
110
+ content: contextPrompt,
111
+ };
112
+ return [
113
+ systemMessage,
114
+ ...conversation.getApiMessages().map((msg) => {
115
+ if (msg.role === "system")
116
+ return { role: "system", content: msg.content };
117
+ if (msg.role === "user")
118
+ return { role: "user", content: msg.content };
119
+ if (msg.role === "assistant") {
120
+ const m = {
121
+ role: "assistant",
122
+ content: msg.content || null,
123
+ };
124
+ if (msg.toolCalls && msg.toolCalls.length > 0) {
125
+ m.tool_calls = msg.toolCalls.map((tc) => ({
126
+ id: tc.id,
127
+ type: "function",
128
+ function: { name: tc.function.name, arguments: tc.function.arguments },
129
+ }));
130
+ }
131
+ return m;
132
+ }
133
+ if (msg.role === "tool") {
134
+ return {
135
+ role: "tool",
136
+ tool_call_id: msg.toolCallId ?? "",
137
+ content: typeof msg.content === "string" ? msg.content : JSON.stringify(msg.content),
138
+ };
139
+ }
140
+ return { role: "user", content: msg.content };
141
+ }),
142
+ ];
143
+ }
144
+ async function runStreamingTurn(client, model, messages, openaiTools, onTextChunk) {
145
+ const stream = await client.chat.completions.create({
146
+ model,
147
+ messages,
148
+ tools: openaiTools.length > 0 ? openaiTools : undefined,
149
+ stream: true,
150
+ temperature: 0.7,
151
+ max_tokens: 16384,
152
+ });
153
+ let assistantContent = "";
154
+ const toolCalls = [];
155
+ let currentToolCall = null;
156
+ let tokensIn = 0;
157
+ let tokensOut = 0;
158
+ for await (const chunk of stream) {
159
+ const delta = chunk.choices[0]?.delta;
160
+ if (!delta)
161
+ continue;
162
+ if (delta.content) {
163
+ assistantContent += delta.content;
164
+ if (onTextChunk)
165
+ onTextChunk(delta.content);
166
+ else
167
+ process.stdout.write(delta.content);
168
+ }
169
+ if (delta.tool_calls) {
170
+ for (const tc of delta.tool_calls) {
171
+ if (tc.id && tc.function?.name) {
172
+ currentToolCall = {
173
+ id: tc.id,
174
+ name: tc.function.name,
175
+ arguments: tc.function.arguments ?? "",
176
+ };
177
+ toolCalls.push({
178
+ id: tc.id,
179
+ type: "function",
180
+ function: { name: tc.function.name, arguments: tc.function.arguments ?? "" },
181
+ });
182
+ }
183
+ else if (currentToolCall && tc.function?.arguments) {
184
+ currentToolCall.arguments += tc.function.arguments;
185
+ const last = toolCalls[toolCalls.length - 1];
186
+ if (last)
187
+ last.function.arguments = currentToolCall.arguments;
188
+ }
189
+ }
190
+ }
191
+ if (chunk.usage) {
192
+ tokensIn += chunk.usage.prompt_tokens ?? 0;
193
+ tokensOut += chunk.usage.completion_tokens ?? 0;
194
+ }
195
+ }
196
+ if (!onTextChunk)
197
+ console.log();
198
+ return { assistantContent, toolCalls, tokensIn, tokensOut };
199
+ }
200
+ async function runBufferedTurn(client, model, messages, openaiTools) {
201
+ const response = await client.chat.completions.create({
202
+ model,
203
+ messages,
204
+ tools: openaiTools.length > 0 ? openaiTools : undefined,
205
+ stream: false,
206
+ temperature: 0.7,
207
+ max_tokens: 16384,
208
+ });
209
+ const choice = response.choices[0];
210
+ const msg = choice?.message;
211
+ const assistantContent = msg?.content ?? "";
212
+ const toolCalls = (msg?.tool_calls ?? []).map((tc) => ({
213
+ id: tc.id,
214
+ type: "function",
215
+ function: {
216
+ name: tc.function.name,
217
+ arguments: tc.function.arguments ?? "",
218
+ },
219
+ }));
220
+ if (assistantContent) {
221
+ process.stdout.write(assistantContent);
222
+ console.log();
223
+ }
224
+ return {
225
+ assistantContent,
226
+ toolCalls,
227
+ tokensIn: response.usage?.prompt_tokens ?? 0,
228
+ tokensOut: response.usage?.completion_tokens ?? 0,
229
+ };
230
+ }
157
231
  function summarizeToolInput(name, input) {
158
232
  const n = name.toLowerCase();
159
233
  if (n === "bash") {
@@ -7,15 +7,24 @@ exports.createClient = createClient;
7
7
  exports.getDefaultModel = getDefaultModel;
8
8
  const openai_1 = __importDefault(require("openai"));
9
9
  const config_1 = require("./config");
10
+ const version_1 = require("../version");
10
11
  function createClient() {
11
12
  const apiKey = (0, config_1.loadConfig)().apiKey;
12
13
  if (!apiKey) {
13
14
  console.error("Not logged in. Run: llmtune login");
14
15
  process.exit(1);
15
16
  }
17
+ const cwd = process.cwd();
16
18
  return new openai_1.default({
17
19
  apiKey,
18
20
  baseURL: (0, config_1.getApiBase)(),
21
+ defaultHeaders: {
22
+ "X-LLMTune-Client": "cli",
23
+ "X-LLMTune-CLI-Version": version_1.CLI_VERSION,
24
+ "X-LLMTune-Context-Managed": "true",
25
+ "X-Workspace-Root": cwd,
26
+ "X-CWD": cwd,
27
+ },
19
28
  });
20
29
  }
21
30
  function getDefaultModel() {
@@ -0,0 +1,25 @@
1
+ import OpenAI from "openai";
2
+ import { Conversation } from "../agent/conversation";
3
+ import { type CompactResult } from "./service";
4
+ export interface AutoCompactOptions {
5
+ client: OpenAI;
6
+ model: string;
7
+ conversation: Conversation;
8
+ systemPrompt: string;
9
+ toolSchemaTokens: number;
10
+ minMessages?: number;
11
+ }
12
+ export interface AutoCompactResult {
13
+ compacted: boolean;
14
+ result?: CompactResult;
15
+ microcompactTokensSaved: number;
16
+ estimatedTokens: number;
17
+ threshold: number;
18
+ }
19
+ export declare function estimateSessionTokens(conversation: Conversation, systemPrompt: string, toolSchemaTokens: number): number;
20
+ /**
21
+ * Microcompact verbose tool results, then LLM-summarize older messages if still over budget.
22
+ */
23
+ export declare function maybeAutoCompact(options: AutoCompactOptions): Promise<AutoCompactResult>;
24
+ export declare function printCompactionNotice(result: CompactResult, trigger: "manual" | "auto", activeTask?: string): void;
25
+ //# sourceMappingURL=auto-compact.d.ts.map
@@ -0,0 +1,65 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.estimateSessionTokens = estimateSessionTokens;
7
+ exports.maybeAutoCompact = maybeAutoCompact;
8
+ exports.printCompactionNotice = printCompactionNotice;
9
+ const chalk_1 = __importDefault(require("chalk"));
10
+ const service_1 = require("./service");
11
+ const microcompact_1 = require("./microcompact");
12
+ const tokens_1 = require("../utils/tokens");
13
+ const budget_1 = require("./budget");
14
+ const service_2 = require("../memory/service");
15
+ function estimateSessionTokens(conversation, systemPrompt, toolSchemaTokens) {
16
+ const messageTokens = (0, tokens_1.estimateMessagesTokens)(conversation.messages.map((m) => ({
17
+ role: m.role,
18
+ content: m.content,
19
+ })));
20
+ return (0, tokens_1.estimateTokens)(systemPrompt) + toolSchemaTokens + messageTokens;
21
+ }
22
+ /**
23
+ * Microcompact verbose tool results, then LLM-summarize older messages if still over budget.
24
+ */
25
+ async function maybeAutoCompact(options) {
26
+ const { client, model, conversation, systemPrompt, toolSchemaTokens, minMessages = 8, } = options;
27
+ const threshold = (0, budget_1.getCompactThreshold)(model);
28
+ let microcompactTokensSaved = 0;
29
+ const { compacted: microcompacted, tokensSaved } = (0, microcompact_1.microcompactMessages)(conversation.messages);
30
+ if (tokensSaved > 0) {
31
+ conversation.messages.length = 0;
32
+ conversation.messages.push(...microcompacted);
33
+ microcompactTokensSaved = tokensSaved;
34
+ }
35
+ let estimatedTokens = estimateSessionTokens(conversation, systemPrompt, toolSchemaTokens);
36
+ if (conversation.messages.length < minMessages || estimatedTokens <= threshold) {
37
+ return { compacted: false, microcompactTokensSaved, estimatedTokens, threshold };
38
+ }
39
+ const activeTask = (0, service_2.getActiveTask)();
40
+ const result = await (0, service_1.compactConversation)(client, model, conversation, undefined, {
41
+ trigger: "auto",
42
+ activeTask,
43
+ });
44
+ estimatedTokens = estimateSessionTokens(conversation, systemPrompt, toolSchemaTokens);
45
+ printCompactionNotice(result, "auto", activeTask);
46
+ return {
47
+ compacted: true,
48
+ result,
49
+ microcompactTokensSaved,
50
+ estimatedTokens,
51
+ threshold,
52
+ };
53
+ }
54
+ function printCompactionNotice(result, trigger, activeTask) {
55
+ const label = trigger === "auto" ? "Auto-compacted" : "Compacted";
56
+ console.log("");
57
+ console.log(chalk_1.default.yellow(`⚠ ${label}: ${result.preCompactMessages} messages → ${result.postCompactMessages} ` +
58
+ `(~${result.tokensSaved.toLocaleString()} tokens saved)`));
59
+ if (activeTask) {
60
+ console.log(chalk_1.default.dim(` Active task preserved: "${activeTask.slice(0, 120)}${activeTask.length > 120 ? "..." : ""}"`));
61
+ }
62
+ console.log(chalk_1.default.dim(" Use /uncompact to restore full history."));
63
+ console.log("");
64
+ }
65
+ //# sourceMappingURL=auto-compact.js.map
@@ -0,0 +1,8 @@
1
+ /**
2
+ * Model context windows and auto-compaction thresholds for the CLI.
3
+ */
4
+ export declare const COMPACT_THRESHOLD_RATIO = 0.75;
5
+ export declare const KEEP_TAIL_MESSAGES = 6;
6
+ export declare function getModelContextWindow(model: string): number;
7
+ export declare function getCompactThreshold(model: string): number;
8
+ //# sourceMappingURL=budget.d.ts.map
@@ -0,0 +1,36 @@
1
+ "use strict";
2
+ /**
3
+ * Model context windows and auto-compaction thresholds for the CLI.
4
+ */
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.KEEP_TAIL_MESSAGES = exports.COMPACT_THRESHOLD_RATIO = void 0;
7
+ exports.getModelContextWindow = getModelContextWindow;
8
+ exports.getCompactThreshold = getCompactThreshold;
9
+ const MODEL_CONTEXT_WINDOWS = {
10
+ "z-ai/GLM-5.1": 128_000,
11
+ "z-ai/glm-5.1": 128_000,
12
+ "gpt-4o": 128_000,
13
+ "gpt-4o-mini": 128_000,
14
+ "claude-sonnet-4": 200_000,
15
+ };
16
+ const DEFAULT_CONTEXT_WINDOW = 64_000;
17
+ exports.COMPACT_THRESHOLD_RATIO = 0.75;
18
+ exports.KEEP_TAIL_MESSAGES = 6;
19
+ function getModelContextWindow(model) {
20
+ const normalized = model.trim().toLowerCase();
21
+ if (MODEL_CONTEXT_WINDOWS[model])
22
+ return MODEL_CONTEXT_WINDOWS[model];
23
+ if (MODEL_CONTEXT_WINDOWS[normalized])
24
+ return MODEL_CONTEXT_WINDOWS[normalized];
25
+ const env = process.env.LLMTUNE_CONTEXT_WINDOW;
26
+ if (env) {
27
+ const parsed = Number.parseInt(env, 10);
28
+ if (Number.isFinite(parsed) && parsed > 0)
29
+ return parsed;
30
+ }
31
+ return DEFAULT_CONTEXT_WINDOW;
32
+ }
33
+ function getCompactThreshold(model) {
34
+ return Math.floor(getModelContextWindow(model) * exports.COMPACT_THRESHOLD_RATIO);
35
+ }
36
+ //# sourceMappingURL=budget.js.map
@@ -1,5 +1,5 @@
1
1
  import OpenAI from "openai";
2
- import { Conversation } from "../agent/conversation";
2
+ import { Conversation, type Message } from "../agent/conversation";
3
3
  export interface CompactResult {
4
4
  tokensSaved: number;
5
5
  preCompactTokens: number;
@@ -7,7 +7,15 @@ export interface CompactResult {
7
7
  preCompactMessages: number;
8
8
  postCompactMessages: number;
9
9
  summary: string;
10
+ activeTask?: string;
11
+ trigger: "manual" | "auto";
10
12
  }
11
- export declare function compactConversation(client: OpenAI, model: string, conversation: Conversation, sessionsDir?: string): Promise<CompactResult>;
13
+ export interface CompactOptions {
14
+ trigger?: "manual" | "auto";
15
+ activeTask?: string;
16
+ keepTail?: number;
17
+ }
18
+ export declare function extractActiveTask(messages: Message[]): string;
19
+ export declare function compactConversation(client: OpenAI, model: string, conversation: Conversation, sessionsDir?: string, options?: CompactOptions): Promise<CompactResult>;
12
20
  export declare function uncompactConversation(conversation: Conversation, sessionsDir?: string): boolean;
13
21
  //# sourceMappingURL=service.d.ts.map