npm - @llmtune/cli - Versions diffs - 0.1.3 → 0.1.6 - Mend

@llmtune/cli 0.1.3 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

package/README.md +2 -2
package/dist/agent/loop.d.ts +5 -0
package/dist/agent/loop.js +163 -89
package/dist/auth/client.js +9 -0
package/dist/compact/auto-compact.d.ts +25 -0
package/dist/compact/auto-compact.js +65 -0
package/dist/compact/budget.d.ts +8 -0
package/dist/compact/budget.js +36 -0
package/dist/compact/service.d.ts +10 -2
package/dist/compact/service.js +78 -19
package/dist/context/agent-identity.js +2 -1
package/dist/marketplace/client.js +18 -6
package/dist/memory/service.d.ts +3 -0
package/dist/memory/service.js +16 -0
package/dist/repl/repl.js +15 -8
package/llmtune-session-1780260929719.json +6 -0
package/package.json +3 -2
package/scripts/qa-full.js +440 -0
package/scripts/smoke-test.js +142 -0

package/README.md CHANGED Viewed

@@ -6,10 +6,10 @@ AI CLI Agent for your terminal, powered by [llmtune.io](https://llmtune.io).
 ```bash
 # Install globally
-npm install -g llmtune
+npm install -g @llmtune/cli
 # Or run directly
-npx llmtune
+npx @llmtune/cli
 # Configure your API key
 llmtune login

package/dist/agent/loop.d.ts CHANGED Viewed

@@ -1,12 +1,17 @@
 import OpenAI from "openai";
 import { ToolRegistry } from "../tools/registry";
 import { Conversation } from "./conversation";
+import { PermissionManager } from "../tools/permissions";
 export interface AgentLoopConfig {
     model?: string;
     maxTurns?: number;
     verbose?: boolean;
+    stream?: boolean;
     cwd: string;
     workspaceRoot: string;
+    permissions?: PermissionManager;
+    /** When true, skip adding userInput (already in conversation). */
+    skipUserInput?: boolean;
 }
 export interface AgentLoopResult {
     finalText: string;

package/dist/agent/loop.js CHANGED Viewed

@@ -5,11 +5,18 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.runAgentLoop = runAgentLoop;
 const builder_1 = require("../context/builder");
+const auto_compact_1 = require("../compact/auto-compact");
+const service_1 = require("../memory/service");
+const tokens_1 = require("../utils/tokens");
 const chalk_1 = __importDefault(require("chalk"));
 async function runAgentLoop(client, conversation, registry, userInput, config, onTextChunk) {
     const model = config.model ?? "z-ai/GLM-5.1";
     const maxTurns = config.maxTurns ?? 20;
-    conversation.addUserMessage(userInput);
+    const useStream = config.stream !== false;
+    if (!config.skipUserInput) {
+        conversation.addUserMessage(userInput);
+        (0, service_1.saveActiveTask)(userInput);
+    }
     const toolSpecs = registry.listSpecs();
     const openaiTools = toolSpecs.map((spec) => ({
         type: "function",
@@ -22,101 +29,32 @@ async function runAgentLoop(client, conversation, registry, userInput, config, o
     const contextResult = await (0, builder_1.buildContextPrompt)(config.workspaceRoot, config.cwd, {
         model: config.model,
     });
-    const contextPrompt = contextResult.prompt;
+    const memoryPrompt = (0, service_1.buildMemoryPrompt)();
+    const contextPrompt = memoryPrompt
+        ? `${contextResult.prompt}\n\n${memoryPrompt}`
+        : contextResult.prompt;
+    const toolSchemaTokens = (0, tokens_1.estimateTokens)(JSON.stringify(toolSpecs));
+    await (0, auto_compact_1.maybeAutoCompact)({
+        client,
+        model,
+        conversation,
+        systemPrompt: contextPrompt,
+        toolSchemaTokens,
+    });
     let totalToolCalls = 0;
     let totalTokensIn = 0;
     let totalTokensOut = 0;
     let turns = 0;
     let finalText = "";
     for (let turn = 0; turn < maxTurns; turn++) {
-        const apiMessages = conversation.getApiMessages();
-        const systemMessage = {
-            role: "system",
-            content: contextPrompt,
-        };
-        const allMessages = [
-            systemMessage,
-            ...apiMessages.map((msg) => {
-                if (msg.role === "system")
-                    return { role: "system", content: msg.content };
-                if (msg.role === "user")
-                    return { role: "user", content: msg.content };
-                if (msg.role === "assistant") {
-                    const m = {
-                        role: "assistant",
-                        content: msg.content || null,
-                    };
-                    if (msg.toolCalls && msg.toolCalls.length > 0) {
-                        m.tool_calls = msg.toolCalls.map((tc) => ({
-                            id: tc.id,
-                            type: "function",
-                            function: { name: tc.function.name, arguments: tc.function.arguments },
-                        }));
-                    }
-                    return m;
-                }
-                if (msg.role === "tool") {
-                    return {
-                        role: "tool",
-                        tool_call_id: msg.toolCallId ?? "",
-                        content: typeof msg.content === "string" ? msg.content : JSON.stringify(msg.content),
-                    };
-                }
-                return { role: "user", content: msg.content };
-            }),
-        ];
-        const stream = await client.chat.completions.create({
-            model,
-            messages: allMessages,
-            tools: openaiTools.length > 0 ? openaiTools : undefined,
-            stream: true,
-            temperature: 0.7,
-            max_tokens: 16384,
-        });
-        let assistantContent = "";
-        const toolCalls = [];
-        let currentToolCall = null;
-        for await (const chunk of stream) {
-            const delta = chunk.choices[0]?.delta;
-            if (!delta)
-                continue;
-            if (delta.content) {
-                assistantContent += delta.content;
-                if (onTextChunk)
-                    onTextChunk(delta.content);
-                else
-                    process.stdout.write(delta.content);
-            }
-            if (delta.tool_calls) {
-                for (const tc of delta.tool_calls) {
-                    if (tc.id && tc.function?.name) {
-                        currentToolCall = {
-                            id: tc.id,
-                            name: tc.function.name,
-                            arguments: tc.function.arguments ?? "",
-                        };
-                        toolCalls.push({
-                            id: tc.id,
-                            type: "function",
-                            function: { name: tc.function.name, arguments: tc.function.arguments ?? "" },
-                        });
-                    }
-                    else if (currentToolCall && tc.function?.arguments) {
-                        currentToolCall.arguments += tc.function.arguments;
-                        const last = toolCalls[toolCalls.length - 1];
-                        if (last)
-                            last.function.arguments = currentToolCall.arguments;
-                    }
-                }
-            }
-            if (chunk.usage) {
-                totalTokensIn += chunk.usage.prompt_tokens ?? 0;
-                totalTokensOut += chunk.usage.completion_tokens ?? 0;
-            }
-        }
-        if (!onTextChunk)
-            console.log();
+        const allMessages = buildApiMessages(conversation, contextPrompt);
+        const turnResult = useStream
+            ? await runStreamingTurn(client, model, allMessages, openaiTools, onTextChunk)
+            : await runBufferedTurn(client, model, allMessages, openaiTools);
+        totalTokensIn += turnResult.tokensIn;
+        totalTokensOut += turnResult.tokensOut;
         turns++;
+        const { assistantContent, toolCalls } = turnResult;
         if (toolCalls.length === 0) {
             conversation.addAssistantMessage(assistantContent);
             finalText = assistantContent;
@@ -134,6 +72,18 @@ async function runAgentLoop(client, conversation, registry, userInput, config, o
             }
             const summary = summarizeToolInput(tc.function.name, toolInput);
             console.log(chalk_1.default.cyan(`  ▶ ${tc.function.name}`) + chalk_1.default.dim(` ${summary}`));
+            if (config.permissions) {
+                const tool = registry.get(tc.function.name);
+                const spec = tool?.spec();
+                const isDestructive = spec?.isDestructive === true;
+                const perm = await config.permissions.check(tc.function.name, toolInput, isDestructive);
+                if (perm.behavior === "deny") {
+                    const denyMsg = perm.message ?? "User denied tool execution";
+                    console.log(chalk_1.default.yellow(`  ⊘ ${tc.function.name}: ${denyMsg}`));
+                    conversation.addToolResult(tc.id, `Denied: ${denyMsg}`);
+                    continue;
+                }
+            }
             const toolCtx = {
                 workspaceRoot: config.workspaceRoot,
                 cwd: config.cwd,
@@ -154,6 +104,130 @@ async function runAgentLoop(client, conversation, registry, userInput, config, o
     }
     return { finalText, totalToolCalls, totalTokensIn, totalTokensOut, turns };
 }
+function buildApiMessages(conversation, contextPrompt) {
+    const systemMessage = {
+        role: "system",
+        content: contextPrompt,
+    };
+    return [
+        systemMessage,
+        ...conversation.getApiMessages().map((msg) => {
+            if (msg.role === "system")
+                return { role: "system", content: msg.content };
+            if (msg.role === "user")
+                return { role: "user", content: msg.content };
+            if (msg.role === "assistant") {
+                const m = {
+                    role: "assistant",
+                    content: msg.content || null,
+                };
+                if (msg.toolCalls && msg.toolCalls.length > 0) {
+                    m.tool_calls = msg.toolCalls.map((tc) => ({
+                        id: tc.id,
+                        type: "function",
+                        function: { name: tc.function.name, arguments: tc.function.arguments },
+                    }));
+                }
+                return m;
+            }
+            if (msg.role === "tool") {
+                return {
+                    role: "tool",
+                    tool_call_id: msg.toolCallId ?? "",
+                    content: typeof msg.content === "string" ? msg.content : JSON.stringify(msg.content),
+                };
+            }
+            return { role: "user", content: msg.content };
+        }),
+    ];
+}
+async function runStreamingTurn(client, model, messages, openaiTools, onTextChunk) {
+    const stream = await client.chat.completions.create({
+        model,
+        messages,
+        tools: openaiTools.length > 0 ? openaiTools : undefined,
+        stream: true,
+        temperature: 0.7,
+        max_tokens: 16384,
+    });
+    let assistantContent = "";
+    const toolCalls = [];
+    let currentToolCall = null;
+    let tokensIn = 0;
+    let tokensOut = 0;
+    for await (const chunk of stream) {
+        const delta = chunk.choices[0]?.delta;
+        if (!delta)
+            continue;
+        if (delta.content) {
+            assistantContent += delta.content;
+            if (onTextChunk)
+                onTextChunk(delta.content);
+            else
+                process.stdout.write(delta.content);
+        }
+        if (delta.tool_calls) {
+            for (const tc of delta.tool_calls) {
+                if (tc.id && tc.function?.name) {
+                    currentToolCall = {
+                        id: tc.id,
+                        name: tc.function.name,
+                        arguments: tc.function.arguments ?? "",
+                    };
+                    toolCalls.push({
+                        id: tc.id,
+                        type: "function",
+                        function: { name: tc.function.name, arguments: tc.function.arguments ?? "" },
+                    });
+                }
+                else if (currentToolCall && tc.function?.arguments) {
+                    currentToolCall.arguments += tc.function.arguments;
+                    const last = toolCalls[toolCalls.length - 1];
+                    if (last)
+                        last.function.arguments = currentToolCall.arguments;
+                }
+            }
+        }
+        if (chunk.usage) {
+            tokensIn += chunk.usage.prompt_tokens ?? 0;
+            tokensOut += chunk.usage.completion_tokens ?? 0;
+        }
+    }
+    if (!onTextChunk)
+        console.log();
+    return { assistantContent, toolCalls, tokensIn, tokensOut };
+}
+async function runBufferedTurn(client, model, messages, openaiTools) {
+    const response = await client.chat.completions.create({
+        model,
+        messages,
+        tools: openaiTools.length > 0 ? openaiTools : undefined,
+        stream: false,
+        temperature: 0.7,
+        max_tokens: 16384,
+    });
+    const choice = response.choices[0];
+    const msg = choice?.message;
+    const assistantContent = msg?.content ?? "";
+    const toolCalls = (msg?.tool_calls ?? []).map((tc) => ({
+        id: tc.id,
+        type: "function",
+        function: {
+            name: tc.function.name,
+            arguments: tc.function.arguments ?? "",
+        },
+    }));
+    if (assistantContent) {
+        process.stdout.write(assistantContent);
+        console.log();
+    }
+    return {
+        assistantContent,
+        toolCalls,
+        tokensIn: response.usage?.prompt_tokens ?? 0,
+        tokensOut: response.usage?.completion_tokens ?? 0,
+    };
+}
 function summarizeToolInput(name, input) {
     const n = name.toLowerCase();
     if (n === "bash") {

package/dist/auth/client.js CHANGED Viewed

@@ -7,15 +7,24 @@ exports.createClient = createClient;
 exports.getDefaultModel = getDefaultModel;
 const openai_1 = __importDefault(require("openai"));
 const config_1 = require("./config");
+const version_1 = require("../version");
 function createClient() {
     const apiKey = (0, config_1.loadConfig)().apiKey;
     if (!apiKey) {
         console.error("Not logged in. Run: llmtune login");
         process.exit(1);
     }
+    const cwd = process.cwd();
     return new openai_1.default({
         apiKey,
         baseURL: (0, config_1.getApiBase)(),
+        defaultHeaders: {
+            "X-LLMTune-Client": "cli",
+            "X-LLMTune-CLI-Version": version_1.CLI_VERSION,
+            "X-LLMTune-Context-Managed": "true",
+            "X-Workspace-Root": cwd,
+            "X-CWD": cwd,
+        },
     });
 }
 function getDefaultModel() {

package/dist/compact/auto-compact.d.ts ADDED Viewed

@@ -0,0 +1,25 @@
+import OpenAI from "openai";
+import { Conversation } from "../agent/conversation";
+import { type CompactResult } from "./service";
+export interface AutoCompactOptions {
+    client: OpenAI;
+    model: string;
+    conversation: Conversation;
+    systemPrompt: string;
+    toolSchemaTokens: number;
+    minMessages?: number;
+}
+export interface AutoCompactResult {
+    compacted: boolean;
+    result?: CompactResult;
+    microcompactTokensSaved: number;
+    estimatedTokens: number;
+    threshold: number;
+}
+export declare function estimateSessionTokens(conversation: Conversation, systemPrompt: string, toolSchemaTokens: number): number;
+/**
+ * Microcompact verbose tool results, then LLM-summarize older messages if still over budget.
+ */
+export declare function maybeAutoCompact(options: AutoCompactOptions): Promise<AutoCompactResult>;
+export declare function printCompactionNotice(result: CompactResult, trigger: "manual" | "auto", activeTask?: string): void;
+//# sourceMappingURL=auto-compact.d.ts.map

package/dist/compact/auto-compact.js ADDED Viewed

@@ -0,0 +1,65 @@
+"use strict";
+var __importDefault = (this && this.__importDefault) || function (mod) {
+    return (mod && mod.__esModule) ? mod : { "default": mod };
+};
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.estimateSessionTokens = estimateSessionTokens;
+exports.maybeAutoCompact = maybeAutoCompact;
+exports.printCompactionNotice = printCompactionNotice;
+const chalk_1 = __importDefault(require("chalk"));
+const service_1 = require("./service");
+const microcompact_1 = require("./microcompact");
+const tokens_1 = require("../utils/tokens");
+const budget_1 = require("./budget");
+const service_2 = require("../memory/service");
+function estimateSessionTokens(conversation, systemPrompt, toolSchemaTokens) {
+    const messageTokens = (0, tokens_1.estimateMessagesTokens)(conversation.messages.map((m) => ({
+        role: m.role,
+        content: m.content,
+    })));
+    return (0, tokens_1.estimateTokens)(systemPrompt) + toolSchemaTokens + messageTokens;
+}
+/**
+ * Microcompact verbose tool results, then LLM-summarize older messages if still over budget.
+ */
+async function maybeAutoCompact(options) {
+    const { client, model, conversation, systemPrompt, toolSchemaTokens, minMessages = 8, } = options;
+    const threshold = (0, budget_1.getCompactThreshold)(model);
+    let microcompactTokensSaved = 0;
+    const { compacted: microcompacted, tokensSaved } = (0, microcompact_1.microcompactMessages)(conversation.messages);
+    if (tokensSaved > 0) {
+        conversation.messages.length = 0;
+        conversation.messages.push(...microcompacted);
+        microcompactTokensSaved = tokensSaved;
+    }
+    let estimatedTokens = estimateSessionTokens(conversation, systemPrompt, toolSchemaTokens);
+    if (conversation.messages.length < minMessages || estimatedTokens <= threshold) {
+        return { compacted: false, microcompactTokensSaved, estimatedTokens, threshold };
+    }
+    const activeTask = (0, service_2.getActiveTask)();
+    const result = await (0, service_1.compactConversation)(client, model, conversation, undefined, {
+        trigger: "auto",
+        activeTask,
+    });
+    estimatedTokens = estimateSessionTokens(conversation, systemPrompt, toolSchemaTokens);
+    printCompactionNotice(result, "auto", activeTask);
+    return {
+        compacted: true,
+        result,
+        microcompactTokensSaved,
+        estimatedTokens,
+        threshold,
+    };
+}
+function printCompactionNotice(result, trigger, activeTask) {
+    const label = trigger === "auto" ? "Auto-compacted" : "Compacted";
+    console.log("");
+    console.log(chalk_1.default.yellow(`⚠ ${label}: ${result.preCompactMessages} messages → ${result.postCompactMessages} ` +
+        `(~${result.tokensSaved.toLocaleString()} tokens saved)`));
+    if (activeTask) {
+        console.log(chalk_1.default.dim(`  Active task preserved: "${activeTask.slice(0, 120)}${activeTask.length > 120 ? "..." : ""}"`));
+    }
+    console.log(chalk_1.default.dim("  Use /uncompact to restore full history."));
+    console.log("");
+}
+//# sourceMappingURL=auto-compact.js.map

package/dist/compact/budget.d.ts ADDED Viewed

@@ -0,0 +1,8 @@
+/**
+ * Model context windows and auto-compaction thresholds for the CLI.
+ */
+export declare const COMPACT_THRESHOLD_RATIO = 0.75;
+export declare const KEEP_TAIL_MESSAGES = 6;
+export declare function getModelContextWindow(model: string): number;
+export declare function getCompactThreshold(model: string): number;
+//# sourceMappingURL=budget.d.ts.map

package/dist/compact/budget.js ADDED Viewed

@@ -0,0 +1,36 @@
+"use strict";
+/**
+ * Model context windows and auto-compaction thresholds for the CLI.
+ */
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.KEEP_TAIL_MESSAGES = exports.COMPACT_THRESHOLD_RATIO = void 0;
+exports.getModelContextWindow = getModelContextWindow;
+exports.getCompactThreshold = getCompactThreshold;
+const MODEL_CONTEXT_WINDOWS = {
+    "z-ai/GLM-5.1": 128_000,
+    "z-ai/glm-5.1": 128_000,
+    "gpt-4o": 128_000,
+    "gpt-4o-mini": 128_000,
+    "claude-sonnet-4": 200_000,
+};
+const DEFAULT_CONTEXT_WINDOW = 64_000;
+exports.COMPACT_THRESHOLD_RATIO = 0.75;
+exports.KEEP_TAIL_MESSAGES = 6;
+function getModelContextWindow(model) {
+    const normalized = model.trim().toLowerCase();
+    if (MODEL_CONTEXT_WINDOWS[model])
+        return MODEL_CONTEXT_WINDOWS[model];
+    if (MODEL_CONTEXT_WINDOWS[normalized])
+        return MODEL_CONTEXT_WINDOWS[normalized];
+    const env = process.env.LLMTUNE_CONTEXT_WINDOW;
+    if (env) {
+        const parsed = Number.parseInt(env, 10);
+        if (Number.isFinite(parsed) && parsed > 0)
+            return parsed;
+    }
+    return DEFAULT_CONTEXT_WINDOW;
+}
+function getCompactThreshold(model) {
+    return Math.floor(getModelContextWindow(model) * exports.COMPACT_THRESHOLD_RATIO);
+}
+//# sourceMappingURL=budget.js.map

package/dist/compact/service.d.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 import OpenAI from "openai";
-import { Conversation } from "../agent/conversation";
+import { Conversation, type Message } from "../agent/conversation";
 export interface CompactResult {
     tokensSaved: number;
     preCompactTokens: number;
@@ -7,7 +7,15 @@ export interface CompactResult {
     preCompactMessages: number;
     postCompactMessages: number;
     summary: string;
+    activeTask?: string;
+    trigger: "manual" | "auto";
 }
-export declare function compactConversation(client: OpenAI, model: string, conversation: Conversation, sessionsDir?: string): Promise<CompactResult>;
+export interface CompactOptions {
+    trigger?: "manual" | "auto";
+    activeTask?: string;
+    keepTail?: number;
+}
+export declare function extractActiveTask(messages: Message[]): string;
+export declare function compactConversation(client: OpenAI, model: string, conversation: Conversation, sessionsDir?: string, options?: CompactOptions): Promise<CompactResult>;
 export declare function uncompactConversation(conversation: Conversation, sessionsDir?: string): boolean;
 //# sourceMappingURL=service.d.ts.map