@llmtune/cli 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -6,10 +6,10 @@ AI CLI Agent for your terminal, powered by [llmtune.io](https://llmtune.io).
6
6
 
7
7
  ```bash
8
8
  # Install globally
9
- npm install -g llmtune
9
+ npm install -g @llmtune/cli
10
10
 
11
11
  # Or run directly
12
- npx llmtune
12
+ npx @llmtune/cli
13
13
 
14
14
  # Configure your API key
15
15
  llmtune login
@@ -1,12 +1,17 @@
1
1
  import OpenAI from "openai";
2
2
  import { ToolRegistry } from "../tools/registry";
3
3
  import { Conversation } from "./conversation";
4
+ import { PermissionManager } from "../tools/permissions";
4
5
  export interface AgentLoopConfig {
5
6
  model?: string;
6
7
  maxTurns?: number;
7
8
  verbose?: boolean;
9
+ stream?: boolean;
8
10
  cwd: string;
9
11
  workspaceRoot: string;
12
+ permissions?: PermissionManager;
13
+ /** When true, skip adding userInput (already in conversation). */
14
+ skipUserInput?: boolean;
10
15
  }
11
16
  export interface AgentLoopResult {
12
17
  finalText: string;
@@ -5,11 +5,18 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
5
5
  Object.defineProperty(exports, "__esModule", { value: true });
6
6
  exports.runAgentLoop = runAgentLoop;
7
7
  const builder_1 = require("../context/builder");
8
+ const auto_compact_1 = require("../compact/auto-compact");
9
+ const service_1 = require("../memory/service");
10
+ const tokens_1 = require("../utils/tokens");
8
11
  const chalk_1 = __importDefault(require("chalk"));
9
12
  async function runAgentLoop(client, conversation, registry, userInput, config, onTextChunk) {
10
13
  const model = config.model ?? "z-ai/GLM-5.1";
11
14
  const maxTurns = config.maxTurns ?? 20;
12
- conversation.addUserMessage(userInput);
15
+ const useStream = config.stream !== false;
16
+ if (!config.skipUserInput) {
17
+ conversation.addUserMessage(userInput);
18
+ (0, service_1.saveActiveTask)(userInput);
19
+ }
13
20
  const toolSpecs = registry.listSpecs();
14
21
  const openaiTools = toolSpecs.map((spec) => ({
15
22
  type: "function",
@@ -22,101 +29,32 @@ async function runAgentLoop(client, conversation, registry, userInput, config, o
22
29
  const contextResult = await (0, builder_1.buildContextPrompt)(config.workspaceRoot, config.cwd, {
23
30
  model: config.model,
24
31
  });
25
- const contextPrompt = contextResult.prompt;
32
+ const memoryPrompt = (0, service_1.buildMemoryPrompt)();
33
+ const contextPrompt = memoryPrompt
34
+ ? `${contextResult.prompt}\n\n${memoryPrompt}`
35
+ : contextResult.prompt;
36
+ const toolSchemaTokens = (0, tokens_1.estimateTokens)(JSON.stringify(toolSpecs));
37
+ await (0, auto_compact_1.maybeAutoCompact)({
38
+ client,
39
+ model,
40
+ conversation,
41
+ systemPrompt: contextPrompt,
42
+ toolSchemaTokens,
43
+ });
26
44
  let totalToolCalls = 0;
27
45
  let totalTokensIn = 0;
28
46
  let totalTokensOut = 0;
29
47
  let turns = 0;
30
48
  let finalText = "";
31
49
  for (let turn = 0; turn < maxTurns; turn++) {
32
- const apiMessages = conversation.getApiMessages();
33
- const systemMessage = {
34
- role: "system",
35
- content: contextPrompt,
36
- };
37
- const allMessages = [
38
- systemMessage,
39
- ...apiMessages.map((msg) => {
40
- if (msg.role === "system")
41
- return { role: "system", content: msg.content };
42
- if (msg.role === "user")
43
- return { role: "user", content: msg.content };
44
- if (msg.role === "assistant") {
45
- const m = {
46
- role: "assistant",
47
- content: msg.content || null,
48
- };
49
- if (msg.toolCalls && msg.toolCalls.length > 0) {
50
- m.tool_calls = msg.toolCalls.map((tc) => ({
51
- id: tc.id,
52
- type: "function",
53
- function: { name: tc.function.name, arguments: tc.function.arguments },
54
- }));
55
- }
56
- return m;
57
- }
58
- if (msg.role === "tool") {
59
- return {
60
- role: "tool",
61
- tool_call_id: msg.toolCallId ?? "",
62
- content: typeof msg.content === "string" ? msg.content : JSON.stringify(msg.content),
63
- };
64
- }
65
- return { role: "user", content: msg.content };
66
- }),
67
- ];
68
- const stream = await client.chat.completions.create({
69
- model,
70
- messages: allMessages,
71
- tools: openaiTools.length > 0 ? openaiTools : undefined,
72
- stream: true,
73
- temperature: 0.7,
74
- max_tokens: 16384,
75
- });
76
- let assistantContent = "";
77
- const toolCalls = [];
78
- let currentToolCall = null;
79
- for await (const chunk of stream) {
80
- const delta = chunk.choices[0]?.delta;
81
- if (!delta)
82
- continue;
83
- if (delta.content) {
84
- assistantContent += delta.content;
85
- if (onTextChunk)
86
- onTextChunk(delta.content);
87
- else
88
- process.stdout.write(delta.content);
89
- }
90
- if (delta.tool_calls) {
91
- for (const tc of delta.tool_calls) {
92
- if (tc.id && tc.function?.name) {
93
- currentToolCall = {
94
- id: tc.id,
95
- name: tc.function.name,
96
- arguments: tc.function.arguments ?? "",
97
- };
98
- toolCalls.push({
99
- id: tc.id,
100
- type: "function",
101
- function: { name: tc.function.name, arguments: tc.function.arguments ?? "" },
102
- });
103
- }
104
- else if (currentToolCall && tc.function?.arguments) {
105
- currentToolCall.arguments += tc.function.arguments;
106
- const last = toolCalls[toolCalls.length - 1];
107
- if (last)
108
- last.function.arguments = currentToolCall.arguments;
109
- }
110
- }
111
- }
112
- if (chunk.usage) {
113
- totalTokensIn += chunk.usage.prompt_tokens ?? 0;
114
- totalTokensOut += chunk.usage.completion_tokens ?? 0;
115
- }
116
- }
117
- if (!onTextChunk)
118
- console.log();
50
+ const allMessages = buildApiMessages(conversation, contextPrompt);
51
+ const turnResult = useStream
52
+ ? await runStreamingTurn(client, model, allMessages, openaiTools, onTextChunk)
53
+ : await runBufferedTurn(client, model, allMessages, openaiTools);
54
+ totalTokensIn += turnResult.tokensIn;
55
+ totalTokensOut += turnResult.tokensOut;
119
56
  turns++;
57
+ const { assistantContent, toolCalls } = turnResult;
120
58
  if (toolCalls.length === 0) {
121
59
  conversation.addAssistantMessage(assistantContent);
122
60
  finalText = assistantContent;
@@ -134,6 +72,18 @@ async function runAgentLoop(client, conversation, registry, userInput, config, o
134
72
  }
135
73
  const summary = summarizeToolInput(tc.function.name, toolInput);
136
74
  console.log(chalk_1.default.cyan(` ▶ ${tc.function.name}`) + chalk_1.default.dim(` ${summary}`));
75
+ if (config.permissions) {
76
+ const tool = registry.get(tc.function.name);
77
+ const spec = tool?.spec();
78
+ const isDestructive = spec?.isDestructive === true;
79
+ const perm = await config.permissions.check(tc.function.name, toolInput, isDestructive);
80
+ if (perm.behavior === "deny") {
81
+ const denyMsg = perm.message ?? "User denied tool execution";
82
+ console.log(chalk_1.default.yellow(` ⊘ ${tc.function.name}: ${denyMsg}`));
83
+ conversation.addToolResult(tc.id, `Denied: ${denyMsg}`);
84
+ continue;
85
+ }
86
+ }
137
87
  const toolCtx = {
138
88
  workspaceRoot: config.workspaceRoot,
139
89
  cwd: config.cwd,
@@ -154,6 +104,130 @@ async function runAgentLoop(client, conversation, registry, userInput, config, o
154
104
  }
155
105
  return { finalText, totalToolCalls, totalTokensIn, totalTokensOut, turns };
156
106
  }
107
+ function buildApiMessages(conversation, contextPrompt) {
108
+ const systemMessage = {
109
+ role: "system",
110
+ content: contextPrompt,
111
+ };
112
+ return [
113
+ systemMessage,
114
+ ...conversation.getApiMessages().map((msg) => {
115
+ if (msg.role === "system")
116
+ return { role: "system", content: msg.content };
117
+ if (msg.role === "user")
118
+ return { role: "user", content: msg.content };
119
+ if (msg.role === "assistant") {
120
+ const m = {
121
+ role: "assistant",
122
+ content: msg.content || null,
123
+ };
124
+ if (msg.toolCalls && msg.toolCalls.length > 0) {
125
+ m.tool_calls = msg.toolCalls.map((tc) => ({
126
+ id: tc.id,
127
+ type: "function",
128
+ function: { name: tc.function.name, arguments: tc.function.arguments },
129
+ }));
130
+ }
131
+ return m;
132
+ }
133
+ if (msg.role === "tool") {
134
+ return {
135
+ role: "tool",
136
+ tool_call_id: msg.toolCallId ?? "",
137
+ content: typeof msg.content === "string" ? msg.content : JSON.stringify(msg.content),
138
+ };
139
+ }
140
+ return { role: "user", content: msg.content };
141
+ }),
142
+ ];
143
+ }
144
+ async function runStreamingTurn(client, model, messages, openaiTools, onTextChunk) {
145
+ const stream = await client.chat.completions.create({
146
+ model,
147
+ messages,
148
+ tools: openaiTools.length > 0 ? openaiTools : undefined,
149
+ stream: true,
150
+ temperature: 0.7,
151
+ max_tokens: 16384,
152
+ });
153
+ let assistantContent = "";
154
+ const toolCalls = [];
155
+ let currentToolCall = null;
156
+ let tokensIn = 0;
157
+ let tokensOut = 0;
158
+ for await (const chunk of stream) {
159
+ const delta = chunk.choices[0]?.delta;
160
+ if (!delta)
161
+ continue;
162
+ if (delta.content) {
163
+ assistantContent += delta.content;
164
+ if (onTextChunk)
165
+ onTextChunk(delta.content);
166
+ else
167
+ process.stdout.write(delta.content);
168
+ }
169
+ if (delta.tool_calls) {
170
+ for (const tc of delta.tool_calls) {
171
+ if (tc.id && tc.function?.name) {
172
+ currentToolCall = {
173
+ id: tc.id,
174
+ name: tc.function.name,
175
+ arguments: tc.function.arguments ?? "",
176
+ };
177
+ toolCalls.push({
178
+ id: tc.id,
179
+ type: "function",
180
+ function: { name: tc.function.name, arguments: tc.function.arguments ?? "" },
181
+ });
182
+ }
183
+ else if (currentToolCall && tc.function?.arguments) {
184
+ currentToolCall.arguments += tc.function.arguments;
185
+ const last = toolCalls[toolCalls.length - 1];
186
+ if (last)
187
+ last.function.arguments = currentToolCall.arguments;
188
+ }
189
+ }
190
+ }
191
+ if (chunk.usage) {
192
+ tokensIn += chunk.usage.prompt_tokens ?? 0;
193
+ tokensOut += chunk.usage.completion_tokens ?? 0;
194
+ }
195
+ }
196
+ if (!onTextChunk)
197
+ console.log();
198
+ return { assistantContent, toolCalls, tokensIn, tokensOut };
199
+ }
200
+ async function runBufferedTurn(client, model, messages, openaiTools) {
201
+ const response = await client.chat.completions.create({
202
+ model,
203
+ messages,
204
+ tools: openaiTools.length > 0 ? openaiTools : undefined,
205
+ stream: false,
206
+ temperature: 0.7,
207
+ max_tokens: 16384,
208
+ });
209
+ const choice = response.choices[0];
210
+ const msg = choice?.message;
211
+ const assistantContent = msg?.content ?? "";
212
+ const toolCalls = (msg?.tool_calls ?? []).map((tc) => ({
213
+ id: tc.id,
214
+ type: "function",
215
+ function: {
216
+ name: tc.function.name,
217
+ arguments: tc.function.arguments ?? "",
218
+ },
219
+ }));
220
+ if (assistantContent) {
221
+ process.stdout.write(assistantContent);
222
+ console.log();
223
+ }
224
+ return {
225
+ assistantContent,
226
+ toolCalls,
227
+ tokensIn: response.usage?.prompt_tokens ?? 0,
228
+ tokensOut: response.usage?.completion_tokens ?? 0,
229
+ };
230
+ }
157
231
  function summarizeToolInput(name, input) {
158
232
  const n = name.toLowerCase();
159
233
  if (n === "bash") {
@@ -7,15 +7,24 @@ exports.createClient = createClient;
7
7
  exports.getDefaultModel = getDefaultModel;
8
8
  const openai_1 = __importDefault(require("openai"));
9
9
  const config_1 = require("./config");
10
+ const version_1 = require("../version");
10
11
  function createClient() {
11
12
  const apiKey = (0, config_1.loadConfig)().apiKey;
12
13
  if (!apiKey) {
13
14
  console.error("Not logged in. Run: llmtune login");
14
15
  process.exit(1);
15
16
  }
17
+ const cwd = process.cwd();
16
18
  return new openai_1.default({
17
19
  apiKey,
18
20
  baseURL: (0, config_1.getApiBase)(),
21
+ defaultHeaders: {
22
+ "X-LLMTune-Client": "cli",
23
+ "X-LLMTune-CLI-Version": version_1.CLI_VERSION,
24
+ "X-LLMTune-Context-Managed": "true",
25
+ "X-Workspace-Root": cwd,
26
+ "X-CWD": cwd,
27
+ },
19
28
  });
20
29
  }
21
30
  function getDefaultModel() {
@@ -0,0 +1,25 @@
1
+ import OpenAI from "openai";
2
+ import { Conversation } from "../agent/conversation";
3
+ import { type CompactResult } from "./service";
4
+ export interface AutoCompactOptions {
5
+ client: OpenAI;
6
+ model: string;
7
+ conversation: Conversation;
8
+ systemPrompt: string;
9
+ toolSchemaTokens: number;
10
+ minMessages?: number;
11
+ }
12
+ export interface AutoCompactResult {
13
+ compacted: boolean;
14
+ result?: CompactResult;
15
+ microcompactTokensSaved: number;
16
+ estimatedTokens: number;
17
+ threshold: number;
18
+ }
19
+ export declare function estimateSessionTokens(conversation: Conversation, systemPrompt: string, toolSchemaTokens: number): number;
20
+ /**
21
+ * Microcompact verbose tool results, then LLM-summarize older messages if still over budget.
22
+ */
23
+ export declare function maybeAutoCompact(options: AutoCompactOptions): Promise<AutoCompactResult>;
24
+ export declare function printCompactionNotice(result: CompactResult, trigger: "manual" | "auto", activeTask?: string): void;
25
+ //# sourceMappingURL=auto-compact.d.ts.map
@@ -0,0 +1,65 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.estimateSessionTokens = estimateSessionTokens;
7
+ exports.maybeAutoCompact = maybeAutoCompact;
8
+ exports.printCompactionNotice = printCompactionNotice;
9
+ const chalk_1 = __importDefault(require("chalk"));
10
+ const service_1 = require("./service");
11
+ const microcompact_1 = require("./microcompact");
12
+ const tokens_1 = require("../utils/tokens");
13
+ const budget_1 = require("./budget");
14
+ const service_2 = require("../memory/service");
15
+ function estimateSessionTokens(conversation, systemPrompt, toolSchemaTokens) {
16
+ const messageTokens = (0, tokens_1.estimateMessagesTokens)(conversation.messages.map((m) => ({
17
+ role: m.role,
18
+ content: m.content,
19
+ })));
20
+ return (0, tokens_1.estimateTokens)(systemPrompt) + toolSchemaTokens + messageTokens;
21
+ }
22
+ /**
23
+ * Microcompact verbose tool results, then LLM-summarize older messages if still over budget.
24
+ */
25
+ async function maybeAutoCompact(options) {
26
+ const { client, model, conversation, systemPrompt, toolSchemaTokens, minMessages = 8, } = options;
27
+ const threshold = (0, budget_1.getCompactThreshold)(model);
28
+ let microcompactTokensSaved = 0;
29
+ const { compacted: microcompacted, tokensSaved } = (0, microcompact_1.microcompactMessages)(conversation.messages);
30
+ if (tokensSaved > 0) {
31
+ conversation.messages.length = 0;
32
+ conversation.messages.push(...microcompacted);
33
+ microcompactTokensSaved = tokensSaved;
34
+ }
35
+ let estimatedTokens = estimateSessionTokens(conversation, systemPrompt, toolSchemaTokens);
36
+ if (conversation.messages.length < minMessages || estimatedTokens <= threshold) {
37
+ return { compacted: false, microcompactTokensSaved, estimatedTokens, threshold };
38
+ }
39
+ const activeTask = (0, service_2.getActiveTask)();
40
+ const result = await (0, service_1.compactConversation)(client, model, conversation, undefined, {
41
+ trigger: "auto",
42
+ activeTask,
43
+ });
44
+ estimatedTokens = estimateSessionTokens(conversation, systemPrompt, toolSchemaTokens);
45
+ printCompactionNotice(result, "auto", activeTask);
46
+ return {
47
+ compacted: true,
48
+ result,
49
+ microcompactTokensSaved,
50
+ estimatedTokens,
51
+ threshold,
52
+ };
53
+ }
54
+ function printCompactionNotice(result, trigger, activeTask) {
55
+ const label = trigger === "auto" ? "Auto-compacted" : "Compacted";
56
+ console.log("");
57
+ console.log(chalk_1.default.yellow(`⚠ ${label}: ${result.preCompactMessages} messages → ${result.postCompactMessages} ` +
58
+ `(~${result.tokensSaved.toLocaleString()} tokens saved)`));
59
+ if (activeTask) {
60
+ console.log(chalk_1.default.dim(` Active task preserved: "${activeTask.slice(0, 120)}${activeTask.length > 120 ? "..." : ""}"`));
61
+ }
62
+ console.log(chalk_1.default.dim(" Use /uncompact to restore full history."));
63
+ console.log("");
64
+ }
65
+ //# sourceMappingURL=auto-compact.js.map
@@ -0,0 +1,8 @@
1
+ /**
2
+ * Model context windows and auto-compaction thresholds for the CLI.
3
+ */
4
+ export declare const COMPACT_THRESHOLD_RATIO = 0.75;
5
+ export declare const KEEP_TAIL_MESSAGES = 6;
6
+ export declare function getModelContextWindow(model: string): number;
7
+ export declare function getCompactThreshold(model: string): number;
8
+ //# sourceMappingURL=budget.d.ts.map
@@ -0,0 +1,36 @@
1
+ "use strict";
2
+ /**
3
+ * Model context windows and auto-compaction thresholds for the CLI.
4
+ */
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.KEEP_TAIL_MESSAGES = exports.COMPACT_THRESHOLD_RATIO = void 0;
7
+ exports.getModelContextWindow = getModelContextWindow;
8
+ exports.getCompactThreshold = getCompactThreshold;
9
+ const MODEL_CONTEXT_WINDOWS = {
10
+ "z-ai/GLM-5.1": 128_000,
11
+ "z-ai/glm-5.1": 128_000,
12
+ "gpt-4o": 128_000,
13
+ "gpt-4o-mini": 128_000,
14
+ "claude-sonnet-4": 200_000,
15
+ };
16
+ const DEFAULT_CONTEXT_WINDOW = 64_000;
17
+ exports.COMPACT_THRESHOLD_RATIO = 0.75;
18
+ exports.KEEP_TAIL_MESSAGES = 6;
19
+ function getModelContextWindow(model) {
20
+ const normalized = model.trim().toLowerCase();
21
+ if (MODEL_CONTEXT_WINDOWS[model])
22
+ return MODEL_CONTEXT_WINDOWS[model];
23
+ if (MODEL_CONTEXT_WINDOWS[normalized])
24
+ return MODEL_CONTEXT_WINDOWS[normalized];
25
+ const env = process.env.LLMTUNE_CONTEXT_WINDOW;
26
+ if (env) {
27
+ const parsed = Number.parseInt(env, 10);
28
+ if (Number.isFinite(parsed) && parsed > 0)
29
+ return parsed;
30
+ }
31
+ return DEFAULT_CONTEXT_WINDOW;
32
+ }
33
+ function getCompactThreshold(model) {
34
+ return Math.floor(getModelContextWindow(model) * exports.COMPACT_THRESHOLD_RATIO);
35
+ }
36
+ //# sourceMappingURL=budget.js.map
@@ -1,5 +1,5 @@
1
1
  import OpenAI from "openai";
2
- import { Conversation } from "../agent/conversation";
2
+ import { Conversation, type Message } from "../agent/conversation";
3
3
  export interface CompactResult {
4
4
  tokensSaved: number;
5
5
  preCompactTokens: number;
@@ -7,7 +7,15 @@ export interface CompactResult {
7
7
  preCompactMessages: number;
8
8
  postCompactMessages: number;
9
9
  summary: string;
10
+ activeTask?: string;
11
+ trigger: "manual" | "auto";
10
12
  }
11
- export declare function compactConversation(client: OpenAI, model: string, conversation: Conversation, sessionsDir?: string): Promise<CompactResult>;
13
+ export interface CompactOptions {
14
+ trigger?: "manual" | "auto";
15
+ activeTask?: string;
16
+ keepTail?: number;
17
+ }
18
+ export declare function extractActiveTask(messages: Message[]): string;
19
+ export declare function compactConversation(client: OpenAI, model: string, conversation: Conversation, sessionsDir?: string, options?: CompactOptions): Promise<CompactResult>;
12
20
  export declare function uncompactConversation(conversation: Conversation, sessionsDir?: string): boolean;
13
21
  //# sourceMappingURL=service.d.ts.map
@@ -33,9 +33,12 @@ var __importStar = (this && this.__importStar) || (function () {
33
33
  };
34
34
  })();
35
35
  Object.defineProperty(exports, "__esModule", { value: true });
36
+ exports.extractActiveTask = extractActiveTask;
36
37
  exports.compactConversation = compactConversation;
37
38
  exports.uncompactConversation = uncompactConversation;
38
39
  const tokens_1 = require("../utils/tokens");
40
+ const budget_1 = require("./budget");
41
+ const service_1 = require("../memory/service");
39
42
  const fs = __importStar(require("fs"));
40
43
  const path = __importStar(require("path"));
41
44
  const os = __importStar(require("os"));
@@ -53,22 +56,55 @@ Include:
53
56
  7. Pending Tasks: Tasks explicitly requested but not yet done
54
57
  8. Current Work: What was being worked on immediately before this summary
55
58
 
59
+ The user's ACTIVE TASK must appear verbatim under "Current Work" and "Pending Tasks" if not finished.
60
+
56
61
  Respond ONLY with plain text. No XML tags. No tool calls.`;
57
- async function compactConversation(client, model, conversation, sessionsDir) {
62
+ function extractActiveTask(messages) {
63
+ const userMsgs = messages
64
+ .filter((m) => m.role === "user")
65
+ .map((m) => (typeof m.content === "string" ? m.content.trim() : ""))
66
+ .filter(Boolean);
67
+ if (userMsgs.length === 0)
68
+ return "";
69
+ // Prefer the most recent substantive user message (skip one-word replies)
70
+ for (let i = userMsgs.length - 1; i >= 0; i--) {
71
+ const msg = userMsgs[i];
72
+ if (msg.length >= 12 && !msg.startsWith("/")) {
73
+ return msg.slice(0, 500);
74
+ }
75
+ }
76
+ return userMsgs[userMsgs.length - 1].slice(0, 500);
77
+ }
78
+ async function compactConversation(client, model, conversation, sessionsDir, options) {
79
+ const trigger = options?.trigger ?? "manual";
80
+ const keepTail = options?.keepTail ?? budget_1.KEEP_TAIL_MESSAGES;
58
81
  const messages = conversation.messages;
82
+ const activeTask = options?.activeTask ?? extractActiveTask(messages);
83
+ if (activeTask) {
84
+ (0, service_1.saveActiveTask)(activeTask);
85
+ }
59
86
  const preCompactTokens = (0, tokens_1.estimateTokens)(messages.map((m) => (typeof m.content === "string" ? m.content : JSON.stringify(m.content))).join(" "));
60
87
  const preCompactCount = messages.length;
61
- // Save raw history before compacting
62
88
  saveRawHistory(conversation, sessionsDir);
63
- // Build context for summary: last N messages
64
- const contextMessages = messages.slice(-20);
65
- const summaryRequestMessages = contextMessages
66
- .filter((m) => m.role !== "system")
89
+ const systemMessages = messages.filter((m) => m.role === "system");
90
+ const nonSystem = messages.filter((m) => m.role !== "system");
91
+ const tailMessages = nonSystem.slice(-keepTail);
92
+ const messagesToCompact = nonSystem.slice(0, -keepTail);
93
+ const summarySource = messagesToCompact.length > 0 ? messagesToCompact : nonSystem.slice(0, Math.max(0, nonSystem.length - 2));
94
+ const summaryRequestMessages = summarySource
95
+ .filter((m) => m.role === "user" || m.role === "assistant")
96
+ .slice(-30)
67
97
  .map((m) => ({
68
98
  role: m.role,
69
- content: typeof m.content === "string" ? m.content : JSON.stringify(m.content),
99
+ content: truncateForSummary(m),
70
100
  }));
71
- summaryRequestMessages.push({ role: "user", content: COMPACT_SYSTEM_PROMPT });
101
+ const taskBlock = activeTask
102
+ ? `\n\nCRITICAL — preserve this active task in your summary:\n"${activeTask}"`
103
+ : "";
104
+ summaryRequestMessages.push({
105
+ role: "user",
106
+ content: `${COMPACT_SYSTEM_PROMPT}${taskBlock}`,
107
+ });
72
108
  let summary = "";
73
109
  try {
74
110
  const response = await client.chat.completions.create({
@@ -80,24 +116,25 @@ async function compactConversation(client, model, conversation, sessionsDir) {
80
116
  summary = response.choices[0]?.message?.content?.trim() ?? "";
81
117
  }
82
118
  catch {
83
- summary = buildFallbackSummary(messages);
119
+ summary = buildFallbackSummary(messages, activeTask);
84
120
  }
85
121
  if (!summary) {
86
- summary = buildFallbackSummary(messages);
122
+ summary = buildFallbackSummary(messages, activeTask);
123
+ }
124
+ if (activeTask) {
125
+ summary = `## Active Task\n${activeTask}\n\n${summary}`;
87
126
  }
88
- // Replace conversation with boundary marker + summary
89
127
  const boundaryMsg = {
90
128
  role: "system",
91
- content: `[COMPACT BOUNDARY] Compacted at ${new Date().toISOString()}. ${preCompactCount} messages summarized. Raw history preserved.`,
129
+ content: `[COMPACT BOUNDARY] ${trigger === "auto" ? "Auto-compacted" : "Compacted"} at ${new Date().toISOString()}. ` +
130
+ `${preCompactCount} messages summarized (${tailMessages.length} recent messages kept). Raw history preserved.`,
92
131
  };
93
132
  const summaryMsg = {
94
133
  role: "system",
95
134
  content: `## Conversation Summary\n\n${summary}`,
96
135
  };
97
- // Keep system messages + boundary + summary
98
- const systemMessages = messages.filter((m) => m.role === "system" && m === messages[0]);
99
136
  conversation.messages.length = 0;
100
- conversation.messages.push(...systemMessages, boundaryMsg, summaryMsg);
137
+ conversation.messages.push(...systemMessages, boundaryMsg, summaryMsg, ...tailMessages);
101
138
  const postCompactTokens = (0, tokens_1.estimateTokens)(conversation.messages.map((m) => (typeof m.content === "string" ? m.content : JSON.stringify(m.content))).join(" "));
102
139
  const postCompactCount = conversation.messages.length;
103
140
  return {
@@ -107,6 +144,8 @@ async function compactConversation(client, model, conversation, sessionsDir) {
107
144
  preCompactMessages: preCompactCount,
108
145
  postCompactMessages: postCompactCount,
109
146
  summary,
147
+ activeTask: activeTask || undefined,
148
+ trigger,
110
149
  };
111
150
  }
112
151
  function uncompactConversation(conversation, sessionsDir) {
@@ -125,6 +164,14 @@ function uncompactConversation(conversation, sessionsDir) {
125
164
  return false;
126
165
  }
127
166
  }
167
+ function truncateForSummary(msg) {
168
+ let text = typeof msg.content === "string" ? msg.content : JSON.stringify(msg.content);
169
+ if (msg.toolCalls?.length) {
170
+ const tools = msg.toolCalls.map((tc) => tc.function.name).join(", ");
171
+ text = `[tools: ${tools}] ${text}`;
172
+ }
173
+ return text.length > 3000 ? text.slice(0, 3000) + "\n... (truncated)" : text;
174
+ }
128
175
  function saveRawHistory(conversation, sessionsDir) {
129
176
  const dir = sessionsDir ?? path.join(os.homedir(), ".llmtune", "sessions");
130
177
  if (!fs.existsSync(dir)) {
@@ -135,7 +182,7 @@ function saveRawHistory(conversation, sessionsDir) {
135
182
  fs.writeFileSync(rawPath, JSON.stringify({ messages: conversation.messages }, null, 2), "utf-8");
136
183
  }
137
184
  }
138
- function buildFallbackSummary(messages) {
185
+ function buildFallbackSummary(messages, activeTask) {
139
186
  const userMsgs = messages
140
187
  .filter((m) => m.role === "user")
141
188
  .map((m) => (typeof m.content === "string" ? m.content.slice(0, 200) : ""))
@@ -143,14 +190,26 @@ function buildFallbackSummary(messages) {
143
190
  const toolNames = messages
144
191
  .filter((m) => m.role === "assistant" && m.toolCalls)
145
192
  .flatMap((m) => m.toolCalls?.map((tc) => tc.function.name) ?? []);
146
- const parts = [`Conversation had ${messages.length} messages.`];
193
+ const parts = [];
194
+ if (activeTask) {
195
+ parts.push(`## Active Task\n${activeTask}`);
196
+ }
197
+ parts.push(`Conversation had ${messages.length} messages.`);
147
198
  if (toolNames.length > 0) {
148
199
  const unique = [...new Set(toolNames)];
149
200
  parts.push(`Tools used: ${unique.join(", ")}`);
150
201
  }
151
202
  if (userMsgs.length > 0) {
152
- parts.push(`Last user message: ${userMsgs[userMsgs.length - 1].slice(0, 150)}`);
203
+ parts.push(`Recent user requests:\n${userMsgs.slice(-5).map((m) => `- ${m}`).join("\n")}`);
204
+ }
205
+ const allContent = messages
206
+ .map((m) => (typeof m.content === "string" ? m.content : ""))
207
+ .join(" ");
208
+ const fileMatches = allContent.match(/[\w/.\\:-]+\.(ts|tsx|js|jsx|py|rs|go|java|json|yaml|yml|md)/g);
209
+ if (fileMatches) {
210
+ const uniqueFiles = [...new Set(fileMatches)].slice(0, 15);
211
+ parts.push(`Files mentioned: ${uniqueFiles.join(", ")}`);
153
212
  }
154
- return parts.join("\n");
213
+ return parts.join("\n\n");
155
214
  }
156
215
  //# sourceMappingURL=service.js.map
@@ -27,7 +27,8 @@ function buildAgentIdentitySection(model) {
27
27
  "## Behavior",
28
28
  "- Be concise, direct, and helpful.",
29
29
  "- Use tools to investigate and make changes — do not only describe what you would do.",
30
- "- Stay on the user's task; do not ask them to remind you unless context was explicitly compacted.",
30
+ "- Stay on the user's task; continue from the conversation summary and active task in memory if present.",
31
+ "- If you see a [COMPACT BOUNDARY] message, treat the summary above it as prior context — do not ask the user to repeat their task unless the summary is empty.",
31
32
  ].join("\n");
32
33
  }
33
34
  //# sourceMappingURL=agent-identity.js.map
@@ -3,6 +3,7 @@ declare const MEMORY_FILES: {
3
3
  readonly "project-notes": "project-notes.md";
4
4
  readonly decisions: "decisions.md";
5
5
  readonly architecture: "architecture.md";
6
+ readonly "active-task": "active-task.md";
6
7
  };
7
8
  type MemoryCategory = keyof typeof MEMORY_FILES;
8
9
  export interface MemoryEntry {
@@ -15,6 +16,8 @@ export declare function writeMemory(category: MemoryCategory, content: string):
15
16
  export declare function appendMemory(category: MemoryCategory, line: string): void;
16
17
  export declare function readAllMemory(): MemoryEntry[];
17
18
  export declare function buildMemoryPrompt(): string;
19
+ export declare function saveActiveTask(task: string): void;
20
+ export declare function getActiveTask(): string;
18
21
  export declare function clearMemory(category?: MemoryCategory): void;
19
22
  export declare function getMemoryDir(): string;
20
23
  export declare function initMemoryFiles(): void;
@@ -38,6 +38,8 @@ exports.writeMemory = writeMemory;
38
38
  exports.appendMemory = appendMemory;
39
39
  exports.readAllMemory = readAllMemory;
40
40
  exports.buildMemoryPrompt = buildMemoryPrompt;
41
+ exports.saveActiveTask = saveActiveTask;
42
+ exports.getActiveTask = getActiveTask;
41
43
  exports.clearMemory = clearMemory;
42
44
  exports.getMemoryDir = getMemoryDir;
43
45
  exports.initMemoryFiles = initMemoryFiles;
@@ -50,6 +52,7 @@ const MEMORY_FILES = {
50
52
  "project-notes": "project-notes.md",
51
53
  decisions: "decisions.md",
52
54
  architecture: "architecture.md",
55
+ "active-task": "active-task.md",
53
56
  };
54
57
  function ensureMemoryDir() {
55
58
  if (!fs.existsSync(MEMORY_DIR)) {
@@ -104,6 +107,18 @@ function buildMemoryPrompt() {
104
107
  });
105
108
  return "## User Memory\n\n" + sections.join("\n\n");
106
109
  }
110
+ function saveActiveTask(task) {
111
+ const trimmed = task.trim();
112
+ if (!trimmed)
113
+ return;
114
+ writeMemory("active-task", `# Active Task\n\n${trimmed}`);
115
+ }
116
+ function getActiveTask() {
117
+ const raw = readMemory("active-task");
118
+ if (!raw)
119
+ return "";
120
+ return raw.replace(/^#\s*Active Task\s*\n+/i, "").trim();
121
+ }
107
122
  function clearMemory(category) {
108
123
  if (category) {
109
124
  const filePath = getMemoryPath(category);
@@ -135,6 +150,7 @@ function initMemoryFiles() {
135
150
  "project-notes": "# Project Notes\n# Key facts about the current project\n# Example: Auth uses JWT + bcrypt\n# Example: Database is Neon PostgreSQL via Prisma\n",
136
151
  decisions: "# Architecture Decisions\n# Record important technical decisions\n# Example: Decided to use Prisma instead of Drizzle for ORM\n",
137
152
  architecture: "# Architecture Overview\n# Describe the project structure\n# Example: Frontend: Next.js 16, Backend: Express 5, DB: Neon\n",
153
+ "active-task": "# Active Task\n# Updated automatically from your latest requests\n",
138
154
  };
139
155
  for (const [category, defaultContent] of Object.entries(defaults)) {
140
156
  const filePath = getMemoryPath(category);
package/dist/repl/repl.js CHANGED
@@ -41,6 +41,7 @@ const chalk_1 = __importDefault(require("chalk"));
41
41
  const readline_1 = require("readline");
42
42
  const conversation_1 = require("../agent/conversation");
43
43
  const registry_1 = require("../tools/registry");
44
+ const permissions_1 = require("../tools/permissions");
44
45
  const loop_1 = require("../agent/loop");
45
46
  const read_1 = require("../tools/tools/read");
46
47
  const write_1 = require("../tools/tools/write");
@@ -51,6 +52,7 @@ const grep_1 = require("../tools/tools/grep");
51
52
  const web_fetch_1 = require("../tools/tools/web-fetch");
52
53
  const ask_user_1 = require("../tools/tools/ask-user");
53
54
  const service_1 = require("../compact/service");
55
+ const auto_compact_1 = require("../compact/auto-compact");
54
56
  const analyzer_1 = require("../context/analyzer");
55
57
  const builder_1 = require("../context/builder");
56
58
  const loader_1 = require("../skills/loader");
@@ -84,7 +86,7 @@ ${chalk_1.default.bold("LLMTune CLI - Commands:")}
84
86
  async function startRepl(options) {
85
87
  const registry = new registry_1.ToolRegistry();
86
88
  const cwd = process.cwd();
87
- const trustedTools = new Set();
89
+ const permissions = new permissions_1.PermissionManager();
88
90
  registry.register(read_1.readTool);
89
91
  registry.register(write_1.writeTool);
90
92
  registry.register(edit_1.editTool);
@@ -151,16 +153,17 @@ async function startRepl(options) {
151
153
  console.log(chalk_1.default.yellow(` Warning: tool "${toolName}" not allowed for skill trust level "${trustLevel}"`));
152
154
  }
153
155
  }
154
- // Add skill system prompt and user message
155
156
  conversation.addSystemMessage(execution.systemPrompt);
156
- conversation.addUserMessage(execution.userMessage);
157
157
  try {
158
158
  const result = await (0, loop_1.runAgentLoop)(options.client, conversation, registry, execution.userMessage, {
159
159
  model: currentModel,
160
160
  maxTurns: 50,
161
161
  verbose,
162
+ stream: streamMode,
162
163
  cwd,
163
164
  workspaceRoot: cwd,
165
+ permissions,
166
+ skipUserInput: false,
164
167
  });
165
168
  (0, logger_1.logEvent)({ event: "tool_call", tool: skillName, latency_ms: 0 });
166
169
  }
@@ -179,7 +182,7 @@ async function startRepl(options) {
179
182
  conversation,
180
183
  registry,
181
184
  skills,
182
- trustedTools,
185
+ permissions,
183
186
  cwd,
184
187
  client: options.client,
185
188
  getModel: () => currentModel,
@@ -204,8 +207,10 @@ async function startRepl(options) {
204
207
  model: currentModel,
205
208
  maxTurns: 50,
206
209
  verbose,
210
+ stream: streamMode,
207
211
  cwd,
208
212
  workspaceRoot: cwd,
213
+ permissions,
209
214
  });
210
215
  if (result.totalTokensIn > 0 || result.totalTokensOut > 0) {
211
216
  const cost = estimateCostFromUsage(result.totalTokensIn, result.totalTokensOut);
@@ -279,10 +284,12 @@ async function handleCommand(input, ctx) {
279
284
  }
280
285
  console.log(chalk_1.default.dim("Compacting conversation..."));
281
286
  try {
282
- const result = await (0, service_1.compactConversation)(ctx.client, ctx.getModel(), ctx.conversation);
283
- console.log(chalk_1.default.green(`\nCompacted: ${result.preCompactMessages} -> ${result.postCompactMessages} messages`));
287
+ const result = await (0, service_1.compactConversation)(ctx.client, ctx.getModel(), ctx.conversation, undefined, {
288
+ trigger: "manual",
289
+ });
290
+ (0, auto_compact_1.printCompactionNotice)(result, "manual", result.activeTask);
291
+ console.log(chalk_1.default.green(`Compacted: ${result.preCompactMessages} -> ${result.postCompactMessages} messages`));
284
292
  console.log(chalk_1.default.dim(` Tokens saved: ~${result.tokensSaved.toLocaleString()}`));
285
- console.log(chalk_1.default.dim(` Use /uncompact to restore full history.`));
286
293
  (0, logger_1.logEvent)({ event: "compaction", tokens_saved: result.tokensSaved, messages_before: result.preCompactMessages, messages_after: result.postCompactMessages, trigger: "manual" });
287
294
  }
288
295
  catch (err) {
@@ -320,7 +327,7 @@ async function handleCommand(input, ctx) {
320
327
  break;
321
328
  case "/trust":
322
329
  if (args[0]) {
323
- ctx.trustedTools.add(args[0].toLowerCase());
330
+ ctx.permissions.trustTool(args[0].toLowerCase());
324
331
  console.log(chalk_1.default.green(`Trusting tool: ${args[0]} (no confirmation needed)`));
325
332
  }
326
333
  else {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@llmtune/cli",
3
- "version": "0.1.3",
3
+ "version": "0.1.5",
4
4
  "description": "LLMTune CLI -AI CLI Agent powered by llmtune.io",
5
5
  "main": "dist/index.js",
6
6
  "bin": {
@@ -10,7 +10,8 @@
10
10
  "build": "tsc",
11
11
  "dev": "tsx src/index.ts",
12
12
  "start": "node dist/index.js",
13
- "lint": "tsc --noEmit"
13
+ "lint": "tsc --noEmit",
14
+ "test": "npm run build && node scripts/smoke-test.js"
14
15
  },
15
16
  "keywords": ["llmtune", "cli", "ai", "agent", "coding"],
16
17
  "license": "MIT",
@@ -0,0 +1,142 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * CLI smoke tests — run: npm test
4
+ * Set LLMTUNE_API_KEY or configure ~/.llmtune/config.json for live API tests.
5
+ */
6
+ const fs = require("fs")
7
+ const path = require("path")
8
+ const os = require("os")
9
+
10
+ let passed = 0
11
+ let failed = 0
12
+ let skipped = 0
13
+
14
+ function ok(name) {
15
+ passed++
16
+ console.log(` ✓ ${name}`)
17
+ }
18
+
19
+ function fail(name, err) {
20
+ failed++
21
+ console.log(` ✗ ${name}: ${err}`)
22
+ }
23
+
24
+ function skip(name, reason) {
25
+ skipped++
26
+ console.log(` ⊘ ${name} (${reason})`)
27
+ }
28
+
29
+ function test(name, fn) {
30
+ try {
31
+ fn()
32
+ ok(name)
33
+ } catch (e) {
34
+ fail(name, e.message)
35
+ }
36
+ }
37
+
38
+ async function testAsync(name, fn) {
39
+ try {
40
+ await fn()
41
+ ok(name)
42
+ } catch (e) {
43
+ fail(name, e.message)
44
+ }
45
+ }
46
+
47
+ async function main() {
48
+ console.log("=== LLMTune CLI Smoke Tests ===\n")
49
+
50
+ test("dist/index.js exists", () => {
51
+ const p = path.join(__dirname, "..", "dist", "index.js")
52
+ if (!fs.existsSync(p)) throw new Error("run npm run build first")
53
+ })
54
+
55
+ test("agent identity module", () => {
56
+ const { buildAgentIdentitySection } = require("../dist/context/agent-identity")
57
+ const text = buildAgentIdentitySection("z-ai/GLM-5.1")
58
+ if (!text.includes("LLMTune Agent")) throw new Error("missing identity")
59
+ if (text.toLowerCase().includes("you are claude")) throw new Error("should not identify as Claude")
60
+ })
61
+
62
+ test("git context (Windows-safe)", () => {
63
+ const { collectGitContext } = require("../dist/context/git-context")
64
+ collectGitContext(process.cwd())
65
+ })
66
+
67
+ test("microcompact", () => {
68
+ const { microcompactMessages } = require("../dist/compact/microcompact")
69
+ const { compacted } = microcompactMessages([
70
+ { role: "tool", content: "x".repeat(5000) },
71
+ ])
72
+ if (compacted[0].content.length >= 5000) throw new Error("not compressed")
73
+ })
74
+
75
+ test("context analyzer", () => {
76
+ const { analyzeContextUsage } = require("../dist/context/analyzer")
77
+ const a = analyzeContextUsage({
78
+ systemPrompt: "test",
79
+ toolSpecs: [],
80
+ messages: [{ role: "user", content: "hi" }],
81
+ model: "z-ai/GLM-5.1",
82
+ })
83
+ if (a.totalTokens <= 0) throw new Error("expected tokens > 0")
84
+ })
85
+
86
+ test("permission manager trust", () => {
87
+ const { PermissionManager } = require("../dist/tools/permissions")
88
+ const pm = new PermissionManager()
89
+ pm.trustTool("bash")
90
+ if (!pm.isTrusted("bash")) throw new Error("trust failed")
91
+ })
92
+
93
+ await testAsync("registry dispatchAsync", async () => {
94
+ const { ToolRegistry } = require("../dist/tools/registry")
95
+ const { bashTool } = require("../dist/tools/tools/bash")
96
+ const reg = new ToolRegistry()
97
+ reg.register(bashTool)
98
+ await reg.dispatchAsync("bash", { command: "echo SMOKE_OK" }, {
99
+ workspaceRoot: process.cwd(),
100
+ cwd: process.cwd(),
101
+ })
102
+ })
103
+
104
+ const configPath = path.join(os.homedir(), ".llmtune", "config.json")
105
+ const hasApi = Boolean(process.env.LLMTUNE_API_KEY) || fs.existsSync(configPath)
106
+
107
+ if (hasApi) {
108
+ await testAsync("API agent loop + bash tool", async () => {
109
+ const { createClient } = require("../dist/auth/client")
110
+ const { Conversation } = require("../dist/agent/conversation")
111
+ const { ToolRegistry } = require("../dist/tools/registry")
112
+ const { bashTool } = require("../dist/tools/tools/bash")
113
+ const { runAgentLoop } = require("../dist/agent/loop")
114
+
115
+ const client = createClient()
116
+ const registry = new ToolRegistry()
117
+ registry.register(bashTool)
118
+ const conversation = new Conversation("z-ai/GLM-5.1")
119
+ const cwd = process.cwd()
120
+
121
+ const result = await runAgentLoop(
122
+ client,
123
+ conversation,
124
+ registry,
125
+ "Run bash command: echo AGENT_SMOKE_OK. Use bash tool only.",
126
+ { model: "z-ai/GLM-5.1", maxTurns: 3, stream: false, cwd, workspaceRoot: cwd },
127
+ )
128
+
129
+ if (result.totalToolCalls === 0) throw new Error("expected at least one tool call")
130
+ })
131
+ } else {
132
+ skip("API agent loop + bash tool", "no API key")
133
+ }
134
+
135
+ console.log(`\n=== Results: ${passed} passed, ${failed} failed, ${skipped} skipped ===`)
136
+ process.exit(failed > 0 ? 1 : 0)
137
+ }
138
+
139
+ main().catch((err) => {
140
+ console.error(err)
141
+ process.exit(1)
142
+ })