@kernel.chat/kbot 4.0.0 → 4.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -36,6 +36,12 @@ Most terminal AI agents lock you into one provider, one model, one way of workin
36
36
  - **Programmatic SDK** — use kbot as a library in your own apps.
37
37
  - **MCP server built in** — plug kbot into Claude Code, Cursor, VS Code, Zed, or Neovim as a tool provider.
38
38
 
39
+ ## Benchmarks
40
+
41
+ Methodology-explicit comparison vs other CLI agents → [BENCHMARKS.md](./BENCHMARKS.md). TL;DR: kbot beats Aider (4.4×) and OpenCode (5.7×) on cold start; loses to Claude Code, Codex, and jcode on raw boot but wins on cost-per-task (BYOK + Ollama fallback), vertical depth (Ableton/security/computer-use/channels), and offline availability (~70% of representative tasks).
42
+
43
+ Using jcode? Wire kbot in as an MCP backend → [templates/jcode-integration.md](./templates/jcode-integration.md).
44
+
39
45
  ## Use with Claude Code / Cursor / Zed
40
46
 
41
47
  kbot is designed to compound with your existing AI editor, not replace it. One command wires everything up — MCP server config + a Claude Code skill that pre-authorizes the integration so safety filters don't refuse legitimate kbot calls.
@@ -0,0 +1,25 @@
1
+ /** Anthropic prompt cache TTL — 5 minutes */
2
+ export declare const CACHE_TTL_MS: number;
3
+ /** Hash a system prompt to a short stable key */
4
+ export declare function hashPrompt(text: string): string;
5
+ /** Reset in-memory cache (test hook) */
6
+ export declare function _resetCacheWarmthCache(): void;
7
+ /** Record a successful API call's timestamp */
8
+ export declare function recordCacheCall(model: string, promptHash: string, now?: number): void;
9
+ export interface CacheWarmthCheck {
10
+ warm: boolean;
11
+ ageMs?: number;
12
+ estimatedExtraCostUSD?: number;
13
+ message?: string;
14
+ }
15
+ /**
16
+ * Check whether the prompt cache is still warm for (model, promptHash).
17
+ * Returns warm=true if no prior call OR within TTL. Returns warm=false
18
+ * with a chalk.yellow message when cold AND we haven't warned for this
19
+ * specific cold-event yet.
20
+ *
21
+ * @param costPerMTokInput USD per million input tokens (from auth.ts)
22
+ * @param promptTokenEstimate rough token count (e.g. text.length / 4)
23
+ */
24
+ export declare function checkCacheWarmth(model: string, promptHash: string, costPerMTokInput: number, promptTokenEstimate: number, now?: number): CacheWarmthCheck;
25
+ //# sourceMappingURL=cache-warmth.d.ts.map
@@ -0,0 +1,131 @@
1
+ // kbot Cache Warmth — Anthropic prompt cache TTL warning
2
+ //
3
+ // Anthropic's prompt cache has a 5-minute TTL. If the next API call lands
4
+ // after the cache expired, the user pays full input-token price instead
5
+ // of the cached price. This module tracks per-(model, prompt-hash) call
6
+ // timestamps and warns once per cold event.
7
+ //
8
+ // State persists at ~/.kbot/cache-warmth.json (atomic tmp+rename writes).
9
+ import { createHash } from 'node:crypto';
10
+ import { existsSync, mkdirSync, readFileSync, renameSync, writeFileSync } from 'node:fs';
11
+ import { homedir } from 'node:os';
12
+ import { join, dirname } from 'node:path';
13
+ import chalk from 'chalk';
14
+ /** Anthropic prompt cache TTL — 5 minutes */
15
+ export const CACHE_TTL_MS = 5 * 60 * 1000;
16
+ /** State file location — overridable via KBOT_CACHE_WARMTH_PATH (test hook) */
17
+ function statePath() {
18
+ return process.env.KBOT_CACHE_WARMTH_PATH || join(homedir(), '.kbot', 'cache-warmth.json');
19
+ }
20
+ /** Hash a system prompt to a short stable key */
21
+ export function hashPrompt(text) {
22
+ return createHash('md5').update(text).digest('hex').slice(0, 16);
23
+ }
24
+ let cached;
25
+ function emptyState() {
26
+ return { lastCall: {}, warnedColdEvents: {} };
27
+ }
28
+ function loadState() {
29
+ if (cached)
30
+ return cached;
31
+ try {
32
+ const path = statePath();
33
+ if (!existsSync(path)) {
34
+ cached = emptyState();
35
+ return cached;
36
+ }
37
+ const raw = readFileSync(path, 'utf8');
38
+ const parsed = JSON.parse(raw);
39
+ cached = {
40
+ lastCall: parsed.lastCall || {},
41
+ warnedColdEvents: parsed.warnedColdEvents || {},
42
+ };
43
+ return cached;
44
+ }
45
+ catch {
46
+ cached = emptyState();
47
+ return cached;
48
+ }
49
+ }
50
+ function saveState(state) {
51
+ try {
52
+ const path = statePath();
53
+ const dir = dirname(path);
54
+ if (!existsSync(dir))
55
+ mkdirSync(dir, { recursive: true });
56
+ const tmp = `${path}.${process.pid}.tmp`;
57
+ writeFileSync(tmp, JSON.stringify(state), 'utf8');
58
+ renameSync(tmp, path);
59
+ }
60
+ catch {
61
+ // Non-fatal — state is best-effort
62
+ }
63
+ }
64
+ /** Reset in-memory cache (test hook) */
65
+ export function _resetCacheWarmthCache() {
66
+ cached = undefined;
67
+ }
68
+ /** Build the composite key */
69
+ function key(model, promptHash) {
70
+ return `${model}::${promptHash}`;
71
+ }
72
+ /** Record a successful API call's timestamp */
73
+ export function recordCacheCall(model, promptHash, now = Date.now()) {
74
+ const state = loadState();
75
+ state.lastCall[key(model, promptHash)] = now;
76
+ saveState(state);
77
+ }
78
+ /** Format ms as "Nm Ss" */
79
+ function formatAge(ms) {
80
+ const totalSec = Math.floor(ms / 1000);
81
+ const m = Math.floor(totalSec / 60);
82
+ const s = totalSec % 60;
83
+ return `${m}m ${s}s`;
84
+ }
85
+ /**
86
+ * Check whether the prompt cache is still warm for (model, promptHash).
87
+ * Returns warm=true if no prior call OR within TTL. Returns warm=false
88
+ * with a chalk.yellow message when cold AND we haven't warned for this
89
+ * specific cold-event yet.
90
+ *
91
+ * @param costPerMTokInput USD per million input tokens (from auth.ts)
92
+ * @param promptTokenEstimate rough token count (e.g. text.length / 4)
93
+ */
94
+ export function checkCacheWarmth(model, promptHash, costPerMTokInput, promptTokenEstimate, now = Date.now()) {
95
+ if (process.env.KBOT_CACHE_WARMTH_WARN === 'off') {
96
+ return { warm: true };
97
+ }
98
+ const state = loadState();
99
+ const k = key(model, promptHash);
100
+ const last = state.lastCall[k];
101
+ // First call ever for this (model, prompt) — cache wasn't expected to exist
102
+ if (!last)
103
+ return { warm: true };
104
+ const ageMs = now - last;
105
+ if (ageMs <= CACHE_TTL_MS) {
106
+ return { warm: true, ageMs };
107
+ }
108
+ // Cold — but only warn once per cold-event (keyed on the prior lastCall ts)
109
+ const warned = state.warnedColdEvents[k] || [];
110
+ if (warned.includes(last)) {
111
+ return { warm: false, ageMs };
112
+ }
113
+ // Cost estimate: cached reads are ~10% of full input price; the cold
114
+ // call pays roughly 90% extra vs. the warm path it would have hit.
115
+ // We report the full input cost as the "extra" — a conservative upper
116
+ // bound that matches what the user actually pays for these tokens.
117
+ const extraUSD = (costPerMTokInput * promptTokenEstimate) / 1_000_000;
118
+ // Persist that we've warned so subsequent calls in the same cold-event
119
+ // (e.g. a tool loop) don't re-warn until a fresh warm window opens.
120
+ warned.push(last);
121
+ // Keep the list bounded
122
+ if (warned.length > 32)
123
+ warned.splice(0, warned.length - 32);
124
+ state.warnedColdEvents[k] = warned;
125
+ saveState(state);
126
+ const message = chalk.yellow(`[kbot] Anthropic prompt cache likely cold — last call was ${formatAge(ageMs)} ago (TTL is 5m). ` +
127
+ `This call will pay full input price (~$${extraUSD.toFixed(2)} more). ` +
128
+ `Run kbot doctor cache for tips.`);
129
+ return { warm: false, ageMs, estimatedExtraCostUSD: extraUSD, message };
130
+ }
131
+ //# sourceMappingURL=cache-warmth.js.map
package/dist/streaming.js CHANGED
@@ -67,6 +67,24 @@ export async function streamAnthropicResponse(apiKey, apiUrl, model, system, mes
67
67
  }
68
68
  if (tools && tools.length > 0)
69
69
  body.tools = tools;
70
+ // Anthropic prompt-cache TTL warning (jcode borrow). Warn once per (model,
71
+ // prompt-hash) cold event when the cache likely expired since last call.
72
+ if (apiUrl.includes('anthropic') && system && process.env.KBOT_CACHE_WARMTH_WARN !== 'off') {
73
+ try {
74
+ const { hashPrompt, checkCacheWarmth, recordCacheCall } = await import('./cache-warmth.js');
75
+ const promptHash = hashPrompt(system);
76
+ const inputCostPerMTok = model.includes('opus') ? 15 : model.includes('haiku') ? 0.8 : 3;
77
+ const promptTokenEstimate = Math.ceil(system.length / 4);
78
+ const check = checkCacheWarmth(model, promptHash, inputCostPerMTok, promptTokenEstimate);
79
+ if (!check.warm && check.message) {
80
+ console.warn((await import('chalk')).default.yellow(check.message));
81
+ }
82
+ recordCacheCall(model, promptHash);
83
+ }
84
+ catch {
85
+ // never let warning logic break the API call
86
+ }
87
+ }
70
88
  let res;
71
89
  let lastError;
72
90
  for (let attempt = 0; attempt <= MAX_STREAM_RETRIES; attempt++) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@kernel.chat/kbot",
3
- "version": "4.0.0",
3
+ "version": "4.0.1",
4
4
  "description": "Open-source terminal AI agent. 100+ specialist skills, 35 specialist agents, 20 providers. Dreams, learns, watches your system. Controls your phone. Fully local, fully sovereign. MIT. v4.0 — evidence-based curation.",
5
5
  "type": "module",
6
6
  "repository": {