npm - @kernel.chat/kbot - Versions diffs - 4.0.0 → 4.0.1 - Mend

@kernel.chat/kbot 4.0.0 → 4.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/README.md CHANGED Viewed

@@ -36,6 +36,12 @@ Most terminal AI agents lock you into one provider, one model, one way of workin
 - **Programmatic SDK** — use kbot as a library in your own apps.
 - **MCP server built in** — plug kbot into Claude Code, Cursor, VS Code, Zed, or Neovim as a tool provider.
+## Benchmarks
+Methodology-explicit comparison vs other CLI agents → [BENCHMARKS.md](./BENCHMARKS.md). TL;DR: kbot beats Aider (4.4×) and OpenCode (5.7×) on cold start; loses to Claude Code, Codex, and jcode on raw boot but wins on cost-per-task (BYOK + Ollama fallback), vertical depth (Ableton/security/computer-use/channels), and offline availability (~70% of representative tasks).
+Using jcode? Wire kbot in as an MCP backend → [templates/jcode-integration.md](./templates/jcode-integration.md).
 ## Use with Claude Code / Cursor / Zed
 kbot is designed to compound with your existing AI editor, not replace it. One command wires everything up — MCP server config + a Claude Code skill that pre-authorizes the integration so safety filters don't refuse legitimate kbot calls.

package/dist/cache-warmth.d.ts ADDED Viewed

@@ -0,0 +1,25 @@
+/** Anthropic prompt cache TTL — 5 minutes */
+export declare const CACHE_TTL_MS: number;
+/** Hash a system prompt to a short stable key */
+export declare function hashPrompt(text: string): string;
+/** Reset in-memory cache (test hook) */
+export declare function _resetCacheWarmthCache(): void;
+/** Record a successful API call's timestamp */
+export declare function recordCacheCall(model: string, promptHash: string, now?: number): void;
+export interface CacheWarmthCheck {
+    warm: boolean;
+    ageMs?: number;
+    estimatedExtraCostUSD?: number;
+    message?: string;
+}
+/**
+ * Check whether the prompt cache is still warm for (model, promptHash).
+ * Returns warm=true if no prior call OR within TTL. Returns warm=false
+ * with a chalk.yellow message when cold AND we haven't warned for this
+ * specific cold-event yet.
+ *
+ * @param costPerMTokInput USD per million input tokens (from auth.ts)
+ * @param promptTokenEstimate rough token count (e.g. text.length / 4)
+ */
+export declare function checkCacheWarmth(model: string, promptHash: string, costPerMTokInput: number, promptTokenEstimate: number, now?: number): CacheWarmthCheck;
+//# sourceMappingURL=cache-warmth.d.ts.map

package/dist/cache-warmth.js ADDED Viewed

@@ -0,0 +1,131 @@
+// kbot Cache Warmth — Anthropic prompt cache TTL warning
+//
+// Anthropic's prompt cache has a 5-minute TTL. If the next API call lands
+// after the cache expired, the user pays full input-token price instead
+// of the cached price. This module tracks per-(model, prompt-hash) call
+// timestamps and warns once per cold event.
+//
+// State persists at ~/.kbot/cache-warmth.json (atomic tmp+rename writes).
+import { createHash } from 'node:crypto';
+import { existsSync, mkdirSync, readFileSync, renameSync, writeFileSync } from 'node:fs';
+import { homedir } from 'node:os';
+import { join, dirname } from 'node:path';
+import chalk from 'chalk';
+/** Anthropic prompt cache TTL — 5 minutes */
+export const CACHE_TTL_MS = 5 * 60 * 1000;
+/** State file location — overridable via KBOT_CACHE_WARMTH_PATH (test hook) */
+function statePath() {
+    return process.env.KBOT_CACHE_WARMTH_PATH || join(homedir(), '.kbot', 'cache-warmth.json');
+}
+/** Hash a system prompt to a short stable key */
+export function hashPrompt(text) {
+    return createHash('md5').update(text).digest('hex').slice(0, 16);
+}
+let cached;
+function emptyState() {
+    return { lastCall: {}, warnedColdEvents: {} };
+}
+function loadState() {
+    if (cached)
+        return cached;
+    try {
+        const path = statePath();
+        if (!existsSync(path)) {
+            cached = emptyState();
+            return cached;
+        }
+        const raw = readFileSync(path, 'utf8');
+        const parsed = JSON.parse(raw);
+        cached = {
+            lastCall: parsed.lastCall || {},
+            warnedColdEvents: parsed.warnedColdEvents || {},
+        };
+        return cached;
+    }
+    catch {
+        cached = emptyState();
+        return cached;
+    }
+}
+function saveState(state) {
+    try {
+        const path = statePath();
+        const dir = dirname(path);
+        if (!existsSync(dir))
+            mkdirSync(dir, { recursive: true });
+        const tmp = `${path}.${process.pid}.tmp`;
+        writeFileSync(tmp, JSON.stringify(state), 'utf8');
+        renameSync(tmp, path);
+    }
+    catch {
+        // Non-fatal — state is best-effort
+    }
+}
+/** Reset in-memory cache (test hook) */
+export function _resetCacheWarmthCache() {
+    cached = undefined;
+}
+/** Build the composite key */
+function key(model, promptHash) {
+    return `${model}::${promptHash}`;
+}
+/** Record a successful API call's timestamp */
+export function recordCacheCall(model, promptHash, now = Date.now()) {
+    const state = loadState();
+    state.lastCall[key(model, promptHash)] = now;
+    saveState(state);
+}
+/** Format ms as "Nm Ss" */
+function formatAge(ms) {
+    const totalSec = Math.floor(ms / 1000);
+    const m = Math.floor(totalSec / 60);
+    const s = totalSec % 60;
+    return `${m}m ${s}s`;
+}
+/**
+ * Check whether the prompt cache is still warm for (model, promptHash).
+ * Returns warm=true if no prior call OR within TTL. Returns warm=false
+ * with a chalk.yellow message when cold AND we haven't warned for this
+ * specific cold-event yet.
+ *
+ * @param costPerMTokInput USD per million input tokens (from auth.ts)
+ * @param promptTokenEstimate rough token count (e.g. text.length / 4)
+ */
+export function checkCacheWarmth(model, promptHash, costPerMTokInput, promptTokenEstimate, now = Date.now()) {
+    if (process.env.KBOT_CACHE_WARMTH_WARN === 'off') {
+        return { warm: true };
+    }
+    const state = loadState();
+    const k = key(model, promptHash);
+    const last = state.lastCall[k];
+    // First call ever for this (model, prompt) — cache wasn't expected to exist
+    if (!last)
+        return { warm: true };
+    const ageMs = now - last;
+    if (ageMs <= CACHE_TTL_MS) {
+        return { warm: true, ageMs };
+    }
+    // Cold — but only warn once per cold-event (keyed on the prior lastCall ts)
+    const warned = state.warnedColdEvents[k] || [];
+    if (warned.includes(last)) {
+        return { warm: false, ageMs };
+    }
+    // Cost estimate: cached reads are ~10% of full input price; the cold
+    // call pays roughly 90% extra vs. the warm path it would have hit.
+    // We report the full input cost as the "extra" — a conservative upper
+    // bound that matches what the user actually pays for these tokens.
+    const extraUSD = (costPerMTokInput * promptTokenEstimate) / 1_000_000;
+    // Persist that we've warned so subsequent calls in the same cold-event
+    // (e.g. a tool loop) don't re-warn until a fresh warm window opens.
+    warned.push(last);
+    // Keep the list bounded
+    if (warned.length > 32)
+        warned.splice(0, warned.length - 32);
+    state.warnedColdEvents[k] = warned;
+    saveState(state);
+    const message = chalk.yellow(`[kbot] Anthropic prompt cache likely cold — last call was ${formatAge(ageMs)} ago (TTL is 5m). ` +
+        `This call will pay full input price (~$${extraUSD.toFixed(2)} more). ` +
+        `Run kbot doctor cache for tips.`);
+    return { warm: false, ageMs, estimatedExtraCostUSD: extraUSD, message };
+}
+//# sourceMappingURL=cache-warmth.js.map

package/dist/streaming.js CHANGED Viewed

@@ -67,6 +67,24 @@ export async function streamAnthropicResponse(apiKey, apiUrl, model, system, mes
     }
     if (tools && tools.length > 0)
         body.tools = tools;
+    // Anthropic prompt-cache TTL warning (jcode borrow). Warn once per (model,
+    // prompt-hash) cold event when the cache likely expired since last call.
+    if (apiUrl.includes('anthropic') && system && process.env.KBOT_CACHE_WARMTH_WARN !== 'off') {
+        try {
+            const { hashPrompt, checkCacheWarmth, recordCacheCall } = await import('./cache-warmth.js');
+            const promptHash = hashPrompt(system);
+            const inputCostPerMTok = model.includes('opus') ? 15 : model.includes('haiku') ? 0.8 : 3;
+            const promptTokenEstimate = Math.ceil(system.length / 4);
+            const check = checkCacheWarmth(model, promptHash, inputCostPerMTok, promptTokenEstimate);
+            if (!check.warm && check.message) {
+                console.warn((await import('chalk')).default.yellow(check.message));
+            }
+            recordCacheCall(model, promptHash);
+        }
+        catch {
+            // never let warning logic break the API call
+        }
+    }
     let res;
     let lastError;
     for (let attempt = 0; attempt <= MAX_STREAM_RETRIES; attempt++) {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@kernel.chat/kbot",
-  "version": "4.0.0",
+  "version": "4.0.1",
   "description": "Open-source terminal AI agent. 100+ specialist skills, 35 specialist agents, 20 providers. Dreams, learns, watches your system. Controls your phone. Fully local, fully sovereign. MIT. v4.0 — evidence-based curation.",
   "type": "module",
   "repository": {