codeep 2.0.1 → 2.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -526,6 +526,64 @@ Then call it as `/sec-review src/api/login.ts` (or `/sec` via the alias).
526
526
  **Discovery:** `/commands` lists all available templates. Project files shadow
527
527
  global files with the same name. Aliases also work for autocomplete.
528
528
 
529
+ ### Personalities (`/personality`, new in 2.0.3)
530
+
531
+ Swap how the agent talks and what it prioritises mid-conversation:
532
+
533
+ ```
534
+ /personality # list available
535
+ /personality concise # short answers, no preamble
536
+ /personality security # treat every input as hostile
537
+ /personality senior-reviewer # push back on shortcuts, name things well
538
+ /personality ship-it # pick first reasonable approach
539
+ /personality off # back to default tone
540
+ ```
541
+
542
+ Six built-in presets: `concise`, `verbose`, `security`, `senior-reviewer`,
543
+ `junior-mentor`, `ship-it`. The active one persists across sessions
544
+ (stored in `~/.codeep/config.json` as `activePersonality`).
545
+
546
+ **Custom personalities** — drop a Markdown file in
547
+ `.codeep/personalities/<name>.md` (project) or
548
+ `~/.codeep/personalities/<name>.md` (global):
549
+
550
+ ```markdown
551
+ # Personality: PR Reviewer
552
+
553
+ You are reviewing a PR from a junior engineer:
554
+ - Cite line numbers for every concern.
555
+ - Suggest an alternative, don't just flag the problem.
556
+ - Keep tone collaborative, not pedantic.
557
+ - End with one thing the author did well.
558
+ ```
559
+
560
+ First `# Personality:` line is the display name; the rest is appended
561
+ to the agent's system prompt verbatim when active. Project shadows
562
+ global shadows built-in (by name).
563
+
564
+ ### Activity Insights (`/insights`, new in 2.0.3)
565
+
566
+ Summarise what the agent has actually done for you over a window — runs,
567
+ tool actions, projects touched, most-edited files — sourced from
568
+ `~/.codeep/history/<id>.json` (one file per agent run, automatic).
569
+
570
+ ```
571
+ /insights # last 7 days (default)
572
+ /insights --days 30 # last month
573
+ /insights --days 1 # today only
574
+ ```
575
+
576
+ Surfaces (markdown rendered in chat):
577
+
578
+ - Headline tally: runs · actions · active time · active-days density · avg actions/run
579
+ - **By project** sorted by active time
580
+ - **Top tools** (read_file × 340, write_file × 80, …)
581
+ - **Most-touched files** (with `~` prefix for readability)
582
+ - **Recent runs** — 10 most recent with project, duration, and the user prompt that started them
583
+
584
+ Cost / token usage isn't in `/insights` (it lives in `/cost` per-session
585
+ since the token tracker is in-memory). Insights is history-only.
586
+
529
587
  ### Project Intelligence (`/init`, `/scan`)
530
588
 
531
589
  Initialize a project and scan it once to cache deep analysis for faster AI responses:
@@ -600,6 +600,101 @@ Anything else the agent should know — edge cases, gotchas, things to double-ch
600
600
  }
601
601
  }
602
602
  // ─── Export ────────────────────────────────────────────────────────────────
603
+ // ─── Personalities + insights (2.0.3) ─────────────────────────────────────
604
+ case 'personality': {
605
+ const { formatPersonalityList, findPersonality } = await import('../utils/personalities.js');
606
+ const sub = args[0]?.toLowerCase();
607
+ if (!sub) {
608
+ return { handled: true, response: formatPersonalityList(session.workspaceRoot) };
609
+ }
610
+ if (sub === 'off' || sub === 'none' || sub === 'clear') {
611
+ config.set('activePersonality', null);
612
+ return { handled: true, response: 'Personality cleared — agent uses default tone.' };
613
+ }
614
+ const p = findPersonality(sub, session.workspaceRoot);
615
+ if (!p) {
616
+ return { handled: true, response: `No personality named \`${sub}\`. Run \`/personality\` to see available.` };
617
+ }
618
+ config.set('activePersonality', p.name);
619
+ return {
620
+ handled: true,
621
+ response: `Active personality: **${p.displayName}** (\`${p.name}\`, ${p.scope})\n\n_${p.description}_\n\nClear with \`/personality off\`.`,
622
+ };
623
+ }
624
+ case 'insights': {
625
+ const { formatInsights } = await import('../utils/insights.js');
626
+ let days = 7;
627
+ for (let i = 0; i < args.length; i++) {
628
+ const a = args[i];
629
+ if (a === '--days' && args[i + 1]) {
630
+ const n = parseInt(args[i + 1], 10);
631
+ if (Number.isFinite(n))
632
+ days = n;
633
+ }
634
+ else if (a.startsWith('--days=')) {
635
+ const n = parseInt(a.slice('--days='.length), 10);
636
+ if (Number.isFinite(n))
637
+ days = n;
638
+ }
639
+ }
640
+ return { handled: true, response: formatInsights({ days }) };
641
+ }
642
+ // ─── Plan mode (2.0.2) ────────────────────────────────────────────────────
643
+ case 'plan': {
644
+ // Identical contract to TUI /plan: generate a pre-execution plan,
645
+ // surface it, hold as pending so /go can execute it without re-planning.
646
+ if (!args.length) {
647
+ const { getPendingPlan } = await import('../utils/planMode.js');
648
+ const cur = getPendingPlan();
649
+ return {
650
+ handled: true,
651
+ response: cur
652
+ ? `**Pending plan for:** _${cur.task}_\n\n${cur.plan}\n\n---\nRun \`/go\` to execute, or \`/plan <revised task>\` to revise.`
653
+ : 'Usage: `/plan <task>` — generates a plan you can review, then `/go` to execute.',
654
+ };
655
+ }
656
+ const task = args.join(' ');
657
+ onChunk(`_Generating plan for: ${task.slice(0, 80)}${task.length > 80 ? '…' : ''}_\n\n`);
658
+ try {
659
+ const { generatePlan } = await import('../utils/planMode.js');
660
+ const plan = await generatePlan(task);
661
+ return {
662
+ handled: true,
663
+ response: `${plan}\n\n---\nRun \`/go\` to execute this plan, or \`/plan <revised task>\` to refine it.`,
664
+ streaming: true,
665
+ };
666
+ }
667
+ catch (err) {
668
+ return { handled: true, response: `Plan generation failed: ${err.message}`, streaming: true };
669
+ }
670
+ }
671
+ case 'go': {
672
+ const { getPendingPlan, composeExecutionPrompt, clearPendingPlan } = await import('../utils/planMode.js');
673
+ const cur = getPendingPlan();
674
+ if (!cur) {
675
+ return { handled: true, response: 'No pending plan. Run `/plan <task>` first.' };
676
+ }
677
+ const prompt = composeExecutionPrompt(cur);
678
+ clearPendingPlan();
679
+ onChunk(`_Executing approved plan…_\n\n`);
680
+ try {
681
+ const { buildProjectContext } = await import('./session.js');
682
+ const ctx = buildProjectContext(session.workspaceRoot);
683
+ const agentResult = await runAgent(prompt, ctx, {
684
+ abortSignal,
685
+ onIteration: (_i, msg) => { onChunk(msg + '\n'); },
686
+ onThinking: (text) => { onChunk(text); },
687
+ });
688
+ return {
689
+ handled: true,
690
+ response: agentResult.finalResponse || '_(plan executed; no final summary)_',
691
+ streaming: true,
692
+ };
693
+ }
694
+ catch (err) {
695
+ return { handled: true, response: `Plan execution failed: ${err.message}`, streaming: true };
696
+ }
697
+ }
603
698
  case 'export': {
604
699
  if (!session.history.length)
605
700
  return { handled: true, response: 'No messages to export.' };
@@ -52,6 +52,12 @@ const AVAILABLE_COMMANDS = [
52
52
  { name: 'mcp', description: 'Manage MCP servers, marketplace, resources, prompts', input: { hint: '[browse | install <id> | add | remove | reload | resources | read <uri> | prompts | prompt <server> <name>]' } },
53
53
  { name: 'openrouter', description: 'OpenRouter routing preferences (prefer/ignore/fallbacks/privacy/clear)', input: { hint: '[show | prefer <p,...> | ignore <p,...> | fallbacks on|off | privacy strict|allow | clear]' } },
54
54
  { name: 'export', description: 'Export conversation', input: { hint: 'json | md | txt' } },
55
+ // Plan mode (2.0.2)
56
+ { name: 'plan', description: 'Generate a numbered plan for a task — review before /go executes', input: { hint: '<task>' } },
57
+ { name: 'go', description: 'Execute the pending plan from /plan' },
58
+ // Personalities + insights (2.0.3)
59
+ { name: 'personality', description: 'List or switch agent tone preset', input: { hint: '[name | off]' } },
60
+ { name: 'insights', description: 'Activity summary over the last N days (default 7)', input: { hint: '[--days N]' } },
55
61
  // Project intelligence
56
62
  { name: 'scan', description: 'Scan project structure and generate summary' },
57
63
  { name: 'review', description: 'Run code review on project or specific files', input: { hint: '[file…]' } },
package/dist/api/index.js CHANGED
@@ -593,6 +593,13 @@ async function chatAnthropic(message, history, model, apiKey, onChunk, abortSign
593
593
  headers['x-api-key'] = apiKey;
594
594
  }
595
595
  try {
596
+ // Anthropic prompt caching: wrap system as an array with a
597
+ // `cache_control` marker so the static system prompt (typically large
598
+ // and stable across a session) is cached. Below 1024 input tokens
599
+ // Anthropic silently skips caching — no error.
600
+ const cachedSystem = useNativeSystem
601
+ ? { system: [{ type: 'text', text: systemPrompt, cache_control: { type: 'ephemeral' } }] }
602
+ : {};
596
603
  const response = await fetch(`${baseUrl}/v1/messages`, {
597
604
  method: 'POST',
598
605
  headers,
@@ -602,7 +609,7 @@ async function chatAnthropic(message, history, model, apiKey, onChunk, abortSign
602
609
  max_tokens: maxTokens,
603
610
  temperature,
604
611
  stream,
605
- ...(useNativeSystem ? { system: systemPrompt } : {}),
612
+ ...cachedSystem,
606
613
  }),
607
614
  signal: controller.signal,
608
615
  });
@@ -649,6 +656,8 @@ async function handleAnthropicStream(body, onChunk) {
649
656
  let buffer = '';
650
657
  let inputTokens = 0;
651
658
  let outputTokens = 0;
659
+ let cacheCreationTokens = 0;
660
+ let cacheReadTokens = 0;
652
661
  let streamModel = '';
653
662
  while (true) {
654
663
  const { done, value } = await reader.read();
@@ -669,9 +678,13 @@ async function handleAnthropicStream(body, onChunk) {
669
678
  onChunk(text);
670
679
  }
671
680
  }
672
- // message_start contains input_tokens
681
+ // message_start contains input_tokens (and cache create/read
682
+ // when prompt caching is in play).
673
683
  if (parsed.type === 'message_start' && parsed.message?.usage) {
674
- inputTokens = parsed.message.usage.input_tokens || 0;
684
+ const u = parsed.message.usage;
685
+ inputTokens = u.input_tokens || 0;
686
+ cacheCreationTokens = u.cache_creation_input_tokens || 0;
687
+ cacheReadTokens = u.cache_read_input_tokens || 0;
675
688
  streamModel = parsed.message.model || '';
676
689
  }
677
690
  // message_delta contains output_tokens
@@ -686,8 +699,15 @@ async function handleAnthropicStream(body, onChunk) {
686
699
  }
687
700
  }
688
701
  // Record token usage
689
- if (inputTokens > 0 || outputTokens > 0) {
690
- recordTokenUsage({ promptTokens: inputTokens, completionTokens: outputTokens, totalTokens: inputTokens + outputTokens }, streamModel || 'unknown', config.get('provider'));
702
+ if (inputTokens > 0 || outputTokens > 0 || cacheReadTokens > 0 || cacheCreationTokens > 0) {
703
+ const totalPrompt = inputTokens + cacheCreationTokens + cacheReadTokens;
704
+ recordTokenUsage({
705
+ promptTokens: totalPrompt,
706
+ completionTokens: outputTokens,
707
+ totalTokens: totalPrompt + outputTokens,
708
+ cacheCreationTokens: cacheCreationTokens || undefined,
709
+ cacheReadTokens: cacheReadTokens || undefined,
710
+ }, streamModel || 'unknown', config.get('provider'));
691
711
  }
692
712
  // Strip <think> tags from MiniMax responses
693
713
  return stripThinkTags(chunks.join(''));
@@ -57,6 +57,13 @@ interface ConfigSchema {
57
57
  data_collection?: 'allow' | 'deny';
58
58
  require_parameters?: boolean;
59
59
  };
60
+ /**
61
+ * Active personality preset (`concise`, `senior-reviewer`, custom user
62
+ * personalities from .codeep/personalities/*.md, …). When set, the
63
+ * loader text is appended to every agent system prompt. See
64
+ * utils/personalities.ts.
65
+ */
66
+ activePersonality?: string | null;
60
67
  }
61
68
  export type { AgentMode };
62
69
  export type { LanguageCode };
@@ -91,6 +91,10 @@ const COMMAND_DESCRIPTIONS = {
91
91
  'hooks': 'List installed lifecycle hooks (.codeep/hooks/<event>.sh)',
92
92
  'mcp': 'Manage MCP servers (browse, install, add, remove, resources, prompts)',
93
93
  'openrouter': 'Tune OpenRouter routing (preferred / ignore providers, fallbacks, privacy)',
94
+ 'plan': 'Generate a numbered plan for a task — review before /go executes it',
95
+ 'go': 'Execute the pending plan from /plan',
96
+ 'personality': 'Switch agent tone: concise / verbose / security / senior-reviewer / etc',
97
+ 'insights': 'Activity summary over the last N days (default 7): runs, files, tools, projects',
94
98
  };
95
99
  import { helpCategories, keyboardShortcuts } from './components/Help.js';
96
100
  import { handleSettingsKey, SETTINGS } from './components/Settings.js';
@@ -230,6 +234,10 @@ export class App {
230
234
  // Keep in lockstep with COMMAND_DESCRIPTIONS below and helpCategories.
231
235
  'compact', 'commands', 'checkpoint', 'checkpoints', 'rewind',
232
236
  'hooks', 'mcp', 'openrouter',
237
+ // 2.0.2 — plan mode.
238
+ 'plan', 'go',
239
+ // 2.0.3 — personalities + insights.
240
+ 'personality', 'insights',
233
241
  'c', 't', 'd', 'r', 'f', 'e', 'o', 'b', 'p',
234
242
  ];
235
243
  constructor(options) {
@@ -216,6 +216,109 @@ export async function handleCommand(command, args, ctx) {
216
216
  runAgentTask(args.join(' '), true, ctx, () => null, () => { });
217
217
  break;
218
218
  }
219
+ case 'insights': {
220
+ const { formatInsights } = await import('../utils/insights.js');
221
+ // Parse `--days N` (default 7). Accept both `--days 30` and `--days=30`.
222
+ let days = 7;
223
+ for (let i = 0; i < args.length; i++) {
224
+ const a = args[i];
225
+ if (a === '--days' && args[i + 1]) {
226
+ const n = parseInt(args[i + 1], 10);
227
+ if (Number.isFinite(n))
228
+ days = n;
229
+ }
230
+ else if (a.startsWith('--days=')) {
231
+ const n = parseInt(a.slice('--days='.length), 10);
232
+ if (Number.isFinite(n))
233
+ days = n;
234
+ }
235
+ }
236
+ ctx.app.addMessage({ role: 'system', content: formatInsights({ days }) });
237
+ break;
238
+ }
239
+ case 'personality': {
240
+ const { formatPersonalityList, findPersonality } = await import('../utils/personalities.js');
241
+ const sub = args[0]?.toLowerCase();
242
+ if (!sub) {
243
+ ctx.app.addMessage({ role: 'system', content: formatPersonalityList(ctx.projectPath) });
244
+ break;
245
+ }
246
+ if (sub === 'off' || sub === 'none' || sub === 'clear') {
247
+ config.set('activePersonality', null);
248
+ ctx.app.notify('Personality cleared — agent uses default tone.');
249
+ break;
250
+ }
251
+ const personality = findPersonality(sub, ctx.projectPath);
252
+ if (!personality) {
253
+ ctx.app.notify(`No personality named "${sub}". Run /personality to see available.`);
254
+ break;
255
+ }
256
+ config.set('activePersonality', personality.name);
257
+ ctx.app.addMessage({
258
+ role: 'system',
259
+ content: `Active personality: **${personality.displayName}** (\`${personality.name}\`, ${personality.scope})\n\n_${personality.description}_\n\nClear with \`/personality off\`.`,
260
+ });
261
+ break;
262
+ }
263
+ case 'plan': {
264
+ // Plan mode: ask the model for a plan, surface it, hold as pending.
265
+ // The user runs /go to execute or /plan <revised> to revise. See
266
+ // src/utils/planMode.ts for the rationale + system prompt.
267
+ if (!args.length) {
268
+ const { getPendingPlan } = await import('../utils/planMode.js');
269
+ const cur = getPendingPlan();
270
+ if (cur) {
271
+ ctx.app.addMessage({
272
+ role: 'system',
273
+ content: `**Pending plan for:** _${cur.task}_\n\n${cur.plan}\n\n---\nRun \`/go\` to execute, or \`/plan <revised task>\` to revise.`,
274
+ });
275
+ }
276
+ else {
277
+ ctx.app.notify('Usage: /plan <task> — generates a plan you can review, then /go to execute.');
278
+ }
279
+ return;
280
+ }
281
+ if (ctx.isAgentRunning()) {
282
+ ctx.app.notify('Agent already running. Use /stop first.');
283
+ return;
284
+ }
285
+ const task = args.join(' ');
286
+ ctx.app.addMessage({ role: 'user', content: `/plan ${task}` });
287
+ ctx.app.notify('Generating plan…');
288
+ try {
289
+ const { generatePlan } = await import('../utils/planMode.js');
290
+ const plan = await generatePlan(task);
291
+ ctx.app.addMessage({
292
+ role: 'assistant',
293
+ content: `${plan}\n\n---\nRun \`/go\` to execute this plan, or \`/plan <revised task>\` to refine it.`,
294
+ });
295
+ }
296
+ catch (err) {
297
+ ctx.app.notify(`Plan generation failed: ${err.message}`);
298
+ }
299
+ break;
300
+ }
301
+ case 'go': {
302
+ // Execute the pending plan from /plan. The agent loop sees the
303
+ // task + plan as a single prompt, so MCP tools, hooks, permissions,
304
+ // and verification all apply unchanged.
305
+ const { getPendingPlan, composeExecutionPrompt, clearPendingPlan } = await import('../utils/planMode.js');
306
+ const cur = getPendingPlan();
307
+ if (!cur) {
308
+ ctx.app.notify('No pending plan. Run `/plan <task>` first.');
309
+ return;
310
+ }
311
+ if (ctx.isAgentRunning()) {
312
+ ctx.app.notify('Agent already running. Use /stop first.');
313
+ return;
314
+ }
315
+ const prompt = composeExecutionPrompt(cur);
316
+ clearPendingPlan();
317
+ ctx.app.notify(`Executing plan for: ${cur.task.slice(0, 80)}${cur.task.length > 80 ? '…' : ''}`);
318
+ const { runAgentTask } = await import('./agentExecution.js');
319
+ runAgentTask(prompt, false, ctx, () => null, () => { });
320
+ break;
321
+ }
219
322
  case 'stop': {
220
323
  if (ctx.isAgentRunning() && ctx.abortController) {
221
324
  ctx.abortController.abort();
@@ -46,6 +46,8 @@ export const helpCategories = [
46
46
  items: [
47
47
  { key: '/agent <task>', description: 'Run agent with task' },
48
48
  { key: '/agent-dry <task>', description: 'Dry run (no changes)' },
49
+ { key: '/plan <task>', description: 'Generate a plan first — review before /go executes' },
50
+ { key: '/go', description: 'Execute the pending plan from /plan' },
49
51
  { key: '/stop', description: 'Stop running agent' },
50
52
  { key: '/undo', description: 'Undo last agent action' },
51
53
  { key: '/undo-all', description: 'Undo all agent actions' },
@@ -121,6 +123,9 @@ export const helpCategories = [
121
123
  { key: '/profile save <name>', description: 'Save current provider+model as profile' },
122
124
  { key: '/profile list', description: 'List saved profiles' },
123
125
  { key: '/openrouter', description: 'OpenRouter routing prefs (prefer/ignore providers, fallbacks, privacy)' },
126
+ { key: '/personality', description: 'List or switch agent tone (concise / verbose / security / senior-reviewer / …)' },
127
+ { key: '/personality <name>', description: 'Activate a personality. /personality off to clear.' },
128
+ { key: '/insights [--days N]', description: 'Activity summary — runs, files, tools, projects over the last N days (default 7)' },
124
129
  ],
125
130
  },
126
131
  {
@@ -253,6 +253,19 @@ export async function runAgent(prompt, projectContext, options = {}) {
253
253
  if (skillCatalogBlock) {
254
254
  systemPrompt += '\n\n' + skillCatalogBlock;
255
255
  }
256
+ // Active personality goes LAST — appended after skills / project rules /
257
+ // smart context so its tone overrides earlier conventions. Set via
258
+ // `/personality <name>`; empty when no personality is active.
259
+ try {
260
+ const { getActivePersonalityPrompt } = await import('./personalities.js');
261
+ const personalityPrompt = getActivePersonalityPrompt(projectContext.root);
262
+ if (personalityPrompt) {
263
+ systemPrompt += personalityPrompt;
264
+ }
265
+ }
266
+ catch {
267
+ // Personality loading must never block an agent run.
268
+ }
256
269
  // Initial user message with optional task plan
257
270
  let initialPrompt = prompt;
258
271
  if (taskPlan) {
@@ -301,9 +301,25 @@ additionalTools) {
301
301
  }
302
302
  else {
303
303
  endpoint = `${baseUrl}/v1/messages`;
304
+ // Anthropic prompt caching. Two cache breakpoints:
305
+ // 1. `system` (largest stable block — system prompt + skills catalog)
306
+ // 2. last tool in `tools` (Anthropic caches everything up to and
307
+ // including the marker, so this caches the entire tools array)
308
+ // Cache hits cost 0.1× input. Misses ("cache creation") cost 1.25×.
309
+ // Net win after the 2nd same-shape request. Below 1024 input tokens
310
+ // Anthropic silently skips caching — no error path to handle.
311
+ const anthropicTools = getAnthropicTools(additionalTools);
312
+ const cachedTools = anthropicTools.length > 0
313
+ ? [
314
+ ...anthropicTools.slice(0, -1),
315
+ { ...anthropicTools[anthropicTools.length - 1], cache_control: { type: 'ephemeral' } },
316
+ ]
317
+ : anthropicTools;
304
318
  body = {
305
- model, system: systemPrompt, messages,
306
- tools: getAnthropicTools(additionalTools), stream: useStreaming,
319
+ model,
320
+ system: [{ type: 'text', text: systemPrompt, cache_control: { type: 'ephemeral' } }],
321
+ messages,
322
+ tools: cachedTools, stream: useStreaming,
307
323
  ...tempParam, max_tokens: getEffectiveMaxTokens(providerId, Math.max(config.get('maxTokens'), 16384)),
308
324
  };
309
325
  }
@@ -435,10 +451,15 @@ export async function agentChatFallback(messages, systemPrompt, onChunk, abortSi
435
451
  }
436
452
  else {
437
453
  endpoint = `${baseUrl}/v1/messages`;
454
+ // Fallback path injects system+tools as the first user message
455
+ // (no native tool API). Cache that block — it's large and stable.
438
456
  body = {
439
457
  model,
440
458
  messages: [
441
- { role: 'user', content: fallbackPrompt },
459
+ {
460
+ role: 'user',
461
+ content: [{ type: 'text', text: fallbackPrompt, cache_control: { type: 'ephemeral' } }],
462
+ },
442
463
  { role: 'assistant', content: 'Understood. I will use the tools as specified.' },
443
464
  ...messages,
444
465
  ],
@@ -169,13 +169,30 @@ export async function handleAnthropicAgentStream(body, onChunk, model, providerI
169
169
  const data = line.slice(6);
170
170
  try {
171
171
  const parsed = JSON.parse(data);
172
- // message_start has input tokens; message_delta has output tokens — merge both
172
+ // message_start has input tokens (incl. cache create/read fields if
173
+ // prompt caching is in use); message_delta has output tokens —
174
+ // merge both so extractAnthropicUsage sees the full picture.
173
175
  if (parsed.type === 'message_start' && parsed.message?.usage) {
174
- usageData = { usage: { input_tokens: parsed.message.usage.input_tokens || 0, output_tokens: 0 } };
176
+ const u = parsed.message.usage;
177
+ usageData = {
178
+ usage: {
179
+ input_tokens: u.input_tokens || 0,
180
+ output_tokens: 0,
181
+ cache_creation_input_tokens: u.cache_creation_input_tokens || 0,
182
+ cache_read_input_tokens: u.cache_read_input_tokens || 0,
183
+ },
184
+ };
175
185
  }
176
186
  else if (parsed.type === 'message_delta' && parsed.usage) {
177
- const inputTokens = usageData?.usage?.input_tokens || 0;
178
- usageData = { usage: { input_tokens: inputTokens, output_tokens: parsed.usage.output_tokens || 0 } };
187
+ const prev = usageData?.usage ?? {};
188
+ usageData = {
189
+ usage: {
190
+ input_tokens: prev.input_tokens || 0,
191
+ output_tokens: parsed.usage.output_tokens || 0,
192
+ cache_creation_input_tokens: prev.cache_creation_input_tokens || 0,
193
+ cache_read_input_tokens: prev.cache_read_input_tokens || 0,
194
+ },
195
+ };
179
196
  }
180
197
  if (parsed.type === 'content_block_start') {
181
198
  const block = parsed.content_block;
@@ -0,0 +1,30 @@
1
+ /**
2
+ * `/insights` — agent activity summary over a configurable window.
3
+ *
4
+ * Source of truth: `~/.codeep/history/<id>.json`, one file per agent
5
+ * run, written by the agent loop. Schema (relevant fields):
6
+ * { id, startTime, endTime, prompt, projectRoot, actions: [
7
+ * { type: 'write' | 'read' | 'execute' | …, path?, command?, timestamp }
8
+ * ]
9
+ * }
10
+ *
11
+ * We deliberately don't read sessions/*.json here — sessions store
12
+ * chat history without tool-level detail, while history/ captures the
13
+ * exact actions which is what users want to see ("which file did I
14
+ * touch most this week?").
15
+ *
16
+ * Cost / token usage is per-process and lost across restarts (the
17
+ * token tracker is in-memory), so /insights reports actions and time
18
+ * but not historical dollar amounts. The current session's cost still
19
+ * shows in /cost.
20
+ */
21
+ interface InsightsOptions {
22
+ /** Days to look back. Default 7. */
23
+ days?: number;
24
+ }
25
+ /**
26
+ * Format `/insights` output as Markdown. Returns a friendly empty-state
27
+ * message when there's no history in the window — we don't error.
28
+ */
29
+ export declare function formatInsights(opts?: InsightsOptions): string;
30
+ export {};
@@ -0,0 +1,166 @@
1
+ /**
2
+ * `/insights` — agent activity summary over a configurable window.
3
+ *
4
+ * Source of truth: `~/.codeep/history/<id>.json`, one file per agent
5
+ * run, written by the agent loop. Schema (relevant fields):
6
+ * { id, startTime, endTime, prompt, projectRoot, actions: [
7
+ * { type: 'write' | 'read' | 'execute' | …, path?, command?, timestamp }
8
+ * ]
9
+ * }
10
+ *
11
+ * We deliberately don't read sessions/*.json here — sessions store
12
+ * chat history without tool-level detail, while history/ captures the
13
+ * exact actions which is what users want to see ("which file did I
14
+ * touch most this week?").
15
+ *
16
+ * Cost / token usage is per-process and lost across restarts (the
17
+ * token tracker is in-memory), so /insights reports actions and time
18
+ * but not historical dollar amounts. The current session's cost still
19
+ * shows in /cost.
20
+ */
21
+ import { readFileSync, readdirSync, existsSync } from 'fs';
22
+ import { join, basename } from 'path';
23
+ import { homedir } from 'os';
24
+ function loadHistoryRuns(sinceMs) {
25
+ const dir = join(homedir(), '.codeep', 'history');
26
+ if (!existsSync(dir))
27
+ return [];
28
+ let files;
29
+ try {
30
+ files = readdirSync(dir).filter((f) => f.endsWith('.json'));
31
+ }
32
+ catch {
33
+ return [];
34
+ }
35
+ const runs = [];
36
+ for (const file of files) {
37
+ try {
38
+ const raw = readFileSync(join(dir, file), 'utf8');
39
+ const run = JSON.parse(raw);
40
+ if (typeof run.startTime !== 'number')
41
+ continue;
42
+ if (run.startTime < sinceMs)
43
+ continue;
44
+ runs.push(run);
45
+ }
46
+ catch {
47
+ // skip malformed
48
+ }
49
+ }
50
+ return runs.sort((a, b) => b.startTime - a.startTime);
51
+ }
52
+ function fmtDuration(ms) {
53
+ if (ms < 60_000)
54
+ return `${Math.round(ms / 1000)}s`;
55
+ if (ms < 3_600_000)
56
+ return `${Math.round(ms / 60_000)}m`;
57
+ const h = Math.floor(ms / 3_600_000);
58
+ const m = Math.round((ms % 3_600_000) / 60_000);
59
+ return m === 0 ? `${h}h` : `${h}h ${m}m`;
60
+ }
61
+ function relativeDayBucket(ts, now) {
62
+ const dayMs = 86_400_000;
63
+ const days = Math.floor((now - ts) / dayMs);
64
+ if (days === 0)
65
+ return 'today';
66
+ if (days === 1)
67
+ return 'yesterday';
68
+ if (days < 7)
69
+ return `${days}d ago`;
70
+ return new Date(ts).toISOString().slice(0, 10);
71
+ }
72
+ /**
73
+ * Format `/insights` output as Markdown. Returns a friendly empty-state
74
+ * message when there's no history in the window — we don't error.
75
+ */
76
+ export function formatInsights(opts = {}) {
77
+ const days = Math.max(1, Math.min(365, opts.days ?? 7));
78
+ const now = Date.now();
79
+ const sinceMs = now - days * 86_400_000;
80
+ const runs = loadHistoryRuns(sinceMs);
81
+ const lines = [`## Activity — last ${days} day${days === 1 ? '' : 's'}`, ''];
82
+ if (runs.length === 0) {
83
+ lines.push(`_No agent runs in the last ${days} day${days === 1 ? '' : 's'}._`);
84
+ lines.push('');
85
+ lines.push('Run an agent task with `/agent <task>` (or just type a request when agent mode is on) — the activity here populates from `~/.codeep/history/`.');
86
+ return lines.join('\n');
87
+ }
88
+ // ── Headline metrics ──────────────────────────────────────────────────────
89
+ const totalRuns = runs.length;
90
+ const totalActions = runs.reduce((s, r) => s + (r.actions?.length ?? 0), 0);
91
+ const totalActiveMs = runs.reduce((s, r) => s + Math.max(0, (r.endTime ?? r.startTime) - r.startTime), 0);
92
+ const avgActions = (totalActions / totalRuns).toFixed(1);
93
+ const distinctDays = new Set(runs.map((r) => new Date(r.startTime).toISOString().slice(0, 10))).size;
94
+ lines.push(`**${totalRuns}** run${totalRuns === 1 ? '' : 's'}`
95
+ + ` · **${totalActions}** tool action${totalActions === 1 ? '' : 's'}`
96
+ + ` · **${fmtDuration(totalActiveMs)}** active`
97
+ + ` · **${distinctDays}**/${days} active day${distinctDays === 1 ? '' : 's'}`
98
+ + ` · avg **${avgActions}** action${avgActions === '1.0' ? '' : 's'}/run`);
99
+ // ── By project ────────────────────────────────────────────────────────────
100
+ const byProject = new Map();
101
+ for (const r of runs) {
102
+ const proj = r.projectRoot ? basename(r.projectRoot) : '(no project)';
103
+ const cur = byProject.get(proj) ?? { runs: 0, actions: 0, activeMs: 0 };
104
+ cur.runs++;
105
+ cur.actions += r.actions?.length ?? 0;
106
+ cur.activeMs += Math.max(0, (r.endTime ?? r.startTime) - r.startTime);
107
+ byProject.set(proj, cur);
108
+ }
109
+ const projects = [...byProject.entries()].sort((a, b) => b[1].activeMs - a[1].activeMs);
110
+ if (projects.length > 0) {
111
+ lines.push('', '### By project', '');
112
+ lines.push('| Project | Runs | Actions | Active time |');
113
+ lines.push('|---|---:|---:|---:|');
114
+ for (const [name, s] of projects.slice(0, 8)) {
115
+ lines.push(`| \`${name}\` | ${s.runs} | ${s.actions} | ${fmtDuration(s.activeMs)} |`);
116
+ }
117
+ if (projects.length > 8)
118
+ lines.push(`| _… and ${projects.length - 8} more_ | | | |`);
119
+ }
120
+ // ── Top tool types ────────────────────────────────────────────────────────
121
+ const byType = new Map();
122
+ for (const r of runs) {
123
+ for (const a of r.actions ?? []) {
124
+ byType.set(a.type, (byType.get(a.type) ?? 0) + 1);
125
+ }
126
+ }
127
+ const tools = [...byType.entries()].sort((a, b) => b[1] - a[1]);
128
+ if (tools.length > 0) {
129
+ lines.push('', '### Top tools', '');
130
+ for (const [name, count] of tools.slice(0, 8)) {
131
+ lines.push(`- \`${name}\` × **${count}**`);
132
+ }
133
+ }
134
+ // ── Top files touched ─────────────────────────────────────────────────────
135
+ const byPath = new Map();
136
+ for (const r of runs) {
137
+ for (const a of r.actions ?? []) {
138
+ if (a.path)
139
+ byPath.set(a.path, (byPath.get(a.path) ?? 0) + 1);
140
+ }
141
+ }
142
+ const files = [...byPath.entries()].sort((a, b) => b[1] - a[1]);
143
+ if (files.length > 0) {
144
+ lines.push('', '### Most-touched files', '');
145
+ for (const [path, count] of files.slice(0, 8)) {
146
+ // Trim home prefix for readability
147
+ const display = path.replace(homedir(), '~');
148
+ lines.push(`- \`${display}\` × **${count}**`);
149
+ }
150
+ }
151
+ // ── Recent runs ───────────────────────────────────────────────────────────
152
+ lines.push('', '### Recent runs', '');
153
+ for (const r of runs.slice(0, 10)) {
154
+ const when = relativeDayBucket(r.startTime, now);
155
+ const dur = fmtDuration(Math.max(0, (r.endTime ?? r.startTime) - r.startTime));
156
+ const proj = r.projectRoot ? basename(r.projectRoot) : '—';
157
+ const prompt = (r.prompt || '').replace(/\s+/g, ' ').trim();
158
+ const promptShort = prompt.length > 80 ? prompt.slice(0, 77) + '…' : prompt;
159
+ lines.push(`- _${when}_ · **${proj}** · ${dur} · ${promptShort}`);
160
+ }
161
+ // ── Session cost callout ──────────────────────────────────────────────────
162
+ // We only track tokens in-memory per process, so historical cost isn't
163
+ // available. Tell the user where to look for the current session.
164
+ lines.push('', "_For this session's cost + cache savings, run `/cost`._");
165
+ return lines.join('\n');
166
+ }
@@ -0,0 +1,50 @@
1
+ /**
2
+ * Personalities — pluggable system prompt addenda that shape how the
3
+ * agent communicates and what it prioritises.
4
+ *
5
+ * Storage:
6
+ * - **Built-in**: hardcoded below (concise, verbose, security,
7
+ * senior-reviewer, junior-mentor, ship-it).
8
+ * - **Project**: `<workspace>/.codeep/personalities/<name>.md`
9
+ * - **Global**: `~/.codeep/personalities/<name>.md`
10
+ *
11
+ * Project shadows global shadows built-in, by name.
12
+ *
13
+ * File format (project / global):
14
+ * ```
15
+ * # Personality: Concise Reviewer
16
+ * <free-form Markdown body — gets appended to system prompt verbatim>
17
+ * ```
18
+ * (The first H1 line is parsed as the display name; everything else is
19
+ * the prompt body.)
20
+ *
21
+ * Activation:
22
+ * - `config.activePersonality` holds the active name (or null/undefined
23
+ * for default behaviour).
24
+ * - `getActivePersonalityPrompt(workspaceRoot)` returns the prompt
25
+ * addendum to inject into the agent's system prompt, or '' when no
26
+ * personality is active.
27
+ * - Persists across sessions until cleared with `/personality off`.
28
+ */
29
+ export type PersonalityScope = 'builtin' | 'project' | 'global';
30
+ export interface Personality {
31
+ /** Slug (filename without .md, or built-in id). Lowercase, hyphens. */
32
+ name: string;
33
+ /** Human display label shown in `/personality` list. */
34
+ displayName: string;
35
+ /** One-line description for the list view. */
36
+ description: string;
37
+ /** Markdown body appended to the system prompt when active. */
38
+ prompt: string;
39
+ scope: PersonalityScope;
40
+ }
41
+ export declare function loadAllPersonalities(workspaceRoot?: string): Personality[];
42
+ export declare function findPersonality(name: string, workspaceRoot?: string): Personality | null;
43
+ /**
44
+ * Returns the prompt addendum for the currently active personality, or
45
+ * '' when none is set. Called from agent.ts after the base system prompt
46
+ * is composed — appended last so personality overrides apply even if
47
+ * project rules conflict.
48
+ */
49
+ export declare function getActivePersonalityPrompt(workspaceRoot?: string): string;
50
+ export declare function formatPersonalityList(workspaceRoot?: string): string;
@@ -0,0 +1,226 @@
1
+ /**
2
+ * Personalities — pluggable system prompt addenda that shape how the
3
+ * agent communicates and what it prioritises.
4
+ *
5
+ * Storage:
6
+ * - **Built-in**: hardcoded below (concise, verbose, security,
7
+ * senior-reviewer, junior-mentor, ship-it).
8
+ * - **Project**: `<workspace>/.codeep/personalities/<name>.md`
9
+ * - **Global**: `~/.codeep/personalities/<name>.md`
10
+ *
11
+ * Project shadows global shadows built-in, by name.
12
+ *
13
+ * File format (project / global):
14
+ * ```
15
+ * # Personality: Concise Reviewer
16
+ * <free-form Markdown body — gets appended to system prompt verbatim>
17
+ * ```
18
+ * (The first H1 line is parsed as the display name; everything else is
19
+ * the prompt body.)
20
+ *
21
+ * Activation:
22
+ * - `config.activePersonality` holds the active name (or null/undefined
23
+ * for default behaviour).
24
+ * - `getActivePersonalityPrompt(workspaceRoot)` returns the prompt
25
+ * addendum to inject into the agent's system prompt, or '' when no
26
+ * personality is active.
27
+ * - Persists across sessions until cleared with `/personality off`.
28
+ */
29
+ import { readFileSync, readdirSync, existsSync } from 'fs';
30
+ import { join } from 'path';
31
+ import { homedir } from 'os';
32
+ import { config } from '../config/index.js';
33
+ const BUILTIN = [
34
+ {
35
+ name: 'concise',
36
+ displayName: 'Concise',
37
+ description: 'Short answers. No preamble. No filler. Get in, get out.',
38
+ scope: 'builtin',
39
+ prompt: `
40
+
41
+ ## Personality: Concise
42
+
43
+ Keep responses tight:
44
+ - Skip preamble ("Great question!", "Let me help…") — go straight to substance.
45
+ - Use bullet points over paragraphs for lists of 3+ items.
46
+ - One code block per answer when possible; no commentary around obvious code.
47
+ - Prefer "Done." over "I've successfully completed the task by…"
48
+ - No emojis unless the user explicitly uses them first.`,
49
+ },
50
+ {
51
+ name: 'verbose',
52
+ displayName: 'Verbose',
53
+ description: 'Detailed explanations with rationale, alternatives considered, and caveats.',
54
+ scope: 'builtin',
55
+ prompt: `
56
+
57
+ ## Personality: Verbose
58
+
59
+ Take time to explain:
60
+ - For every non-trivial change, lay out: what / why / alternatives I considered / why I chose this one.
61
+ - Cite line numbers and file paths so the user can audit.
62
+ - When reading code, summarise what the surrounding context does before acting — this catches misunderstandings early.
63
+ - End complex tasks with a "what to verify" checklist for the user.`,
64
+ },
65
+ {
66
+ name: 'security',
67
+ displayName: 'Security-paranoid',
68
+ description: 'Flags every input as untrusted, second-guesses every API call, prefers defensive code.',
69
+ scope: 'builtin',
70
+ prompt: `
71
+
72
+ ## Personality: Security-paranoid
73
+
74
+ Treat every input as hostile until proven otherwise:
75
+ - For any code that touches user input, env vars, file paths, or network: enumerate the attack surface in a short comment block above the code.
76
+ - Prefer allowlists over blocklists. Prefer parameterised queries / escape-on-output to ad-hoc sanitisation.
77
+ - Flag every secret/key reference and ensure it's read from env or secret manager — never inline.
78
+ - When suggesting dependencies, prefer audited ones (cite stars / last-publish date) and note known CVEs if any.
79
+ - After implementing, list 2-3 concrete attack scenarios you considered (e.g. "what if input contains '../'?") and how the code handles them.`,
80
+ },
81
+ {
82
+ name: 'senior-reviewer',
83
+ displayName: 'Senior reviewer',
84
+ description: 'Strong opinions on architecture, naming, abstraction boundaries. Pushes back on shortcuts.',
85
+ scope: 'builtin',
86
+ prompt: `
87
+
88
+ ## Personality: Senior Reviewer
89
+
90
+ Critique like a staff engineer reviewing a PR from a colleague:
91
+ - If the proposed approach has a cleaner alternative, propose it first — even if the user's framing pushed toward the messier one.
92
+ - Name things with the team in mind. Reject lazy names (handler, util, manager) and propose specific ones.
93
+ - Watch for premature abstraction (one-call helpers) and missing abstractions (3rd copy of the same 5 lines).
94
+ - Push back on "just for now" hacks unless the user explicitly says it's a throwaway.
95
+ - Mention what's NOT tested when adding new code, and suggest the test cases that'd catch likely regressions.`,
96
+ },
97
+ {
98
+ name: 'junior-mentor',
99
+ displayName: 'Junior mentor',
100
+ description: 'Explains concepts as you go, links to docs, suggests what to learn next.',
101
+ scope: 'builtin',
102
+ prompt: `
103
+
104
+ ## Personality: Junior Mentor
105
+
106
+ The user is learning — meet them where they are:
107
+ - Before introducing a new concept, give a 1-2 sentence "why this exists" context.
108
+ - Use analogies for abstract topics (closures = "a backpack the function carries"). Keep them grounded, not fancy.
109
+ - Link to canonical docs (MDN, language reference, official tutorial) rather than blog posts.
110
+ - After completing a task, suggest 1 thing to read or 1 small follow-up exercise that reinforces the concept just used.
111
+ - Resist showing off. Don't introduce ES2024 destructuring spread tricks when a plain for-loop teaches the lesson better.`,
112
+ },
113
+ {
114
+ name: 'ship-it',
115
+ displayName: 'Ship it',
116
+ description: 'Optimise for speed-to-merge. No bikeshedding. "Done is better than perfect" mode.',
117
+ scope: 'builtin',
118
+ prompt: `
119
+
120
+ ## Personality: Ship It
121
+
122
+ The user wants this merged today:
123
+ - Pick the first reasonable approach. Don't enumerate three alternatives — commit to one.
124
+ - Inline TODO comments are fine for cleanup-later items. Don't refactor adjacent code.
125
+ - Test the happy path. Edge cases can wait for follow-up unless they're security-relevant.
126
+ - Suggest minimum-viable solution, not robust-for-all-cases. The user can iterate.
127
+ - If the user asks "should we also…", default to "no, ship this first, that's a separate PR".`,
128
+ },
129
+ ];
130
+ /** Load custom personalities from a `.codeep/personalities/` directory. */
131
+ function loadFromDir(dir, scope) {
132
+ if (!existsSync(dir))
133
+ return [];
134
+ const out = [];
135
+ let entries;
136
+ try {
137
+ entries = readdirSync(dir);
138
+ }
139
+ catch {
140
+ return [];
141
+ }
142
+ for (const entry of entries) {
143
+ if (!entry.endsWith('.md'))
144
+ continue;
145
+ const name = entry.slice(0, -3).toLowerCase();
146
+ if (!/^[a-z0-9][a-z0-9-]*$/.test(name))
147
+ continue; // skip weirdly-named files
148
+ try {
149
+ const raw = readFileSync(join(dir, entry), 'utf8');
150
+ if (raw.length > 64 * 1024)
151
+ continue; // cap at 64 KB
152
+ // First H1 → displayName; rest → prompt.
153
+ const h1 = raw.match(/^#\s+(?:Personality:\s+)?(.+)$/m);
154
+ const displayName = h1?.[1].trim() ?? name;
155
+ const body = h1 ? raw.slice(raw.indexOf('\n', raw.indexOf(h1[0])) + 1).trimStart() : raw;
156
+ // First paragraph (or line) → description (cap 200 chars).
157
+ const firstPara = body.split(/\n\s*\n/)[0]?.replace(/\s+/g, ' ').trim() ?? '';
158
+ const description = firstPara.length > 200 ? firstPara.slice(0, 197) + '…' : firstPara;
159
+ out.push({
160
+ name,
161
+ displayName,
162
+ description: description || `Custom personality from ${entry}`,
163
+ prompt: '\n\n## Personality: ' + displayName + '\n\n' + body,
164
+ scope,
165
+ });
166
+ }
167
+ catch {
168
+ // Skip broken files — never crash personality loading.
169
+ }
170
+ }
171
+ return out;
172
+ }
173
+ export function loadAllPersonalities(workspaceRoot) {
174
+ const project = workspaceRoot
175
+ ? loadFromDir(join(workspaceRoot, '.codeep', 'personalities'), 'project')
176
+ : [];
177
+ const global = loadFromDir(join(homedir(), '.codeep', 'personalities'), 'global');
178
+ // Merge with scope priority: project > global > builtin.
179
+ const byName = new Map();
180
+ for (const p of BUILTIN)
181
+ byName.set(p.name, p);
182
+ for (const p of global)
183
+ byName.set(p.name, p);
184
+ for (const p of project)
185
+ byName.set(p.name, p);
186
+ return [...byName.values()].sort((a, b) => a.name.localeCompare(b.name));
187
+ }
188
+ export function findPersonality(name, workspaceRoot) {
189
+ const lower = name.toLowerCase();
190
+ return loadAllPersonalities(workspaceRoot).find((p) => p.name === lower) ?? null;
191
+ }
192
+ /**
193
+ * Returns the prompt addendum for the currently active personality, or
194
+ * '' when none is set. Called from agent.ts after the base system prompt
195
+ * is composed — appended last so personality overrides apply even if
196
+ * project rules conflict.
197
+ */
198
+ export function getActivePersonalityPrompt(workspaceRoot) {
199
+ const name = config.get('activePersonality');
200
+ if (!name)
201
+ return '';
202
+ const p = findPersonality(name, workspaceRoot);
203
+ return p?.prompt ?? '';
204
+ }
205
+ export function formatPersonalityList(workspaceRoot) {
206
+ const list = loadAllPersonalities(workspaceRoot);
207
+ const active = config.get('activePersonality');
208
+ const lines = ['## Personalities', ''];
209
+ if (active) {
210
+ lines.push(`**Active:** \`${active}\` — switch with \`/personality <name>\` or clear with \`/personality off\`.`);
211
+ }
212
+ else {
213
+ lines.push('**Active:** _(none — agent uses default tone)_');
214
+ }
215
+ lines.push('');
216
+ lines.push('| Name | Scope | Description |');
217
+ lines.push('|---|---|---|');
218
+ for (const p of list) {
219
+ const tag = p.scope === 'builtin' ? 'built-in' : p.scope;
220
+ const marker = active === p.name ? ' ✓' : '';
221
+ lines.push(`| \`${p.name}\`${marker} | ${tag} | ${p.description} |`);
222
+ }
223
+ lines.push('');
224
+ lines.push('Drop a `<name>.md` file into `.codeep/personalities/` (project) or `~/.codeep/personalities/` (global) to add your own — first `#` line becomes the display name, body becomes the prompt addendum.');
225
+ return lines.join('\n');
226
+ }
@@ -0,0 +1,45 @@
1
+ /**
2
+ * Plan mode — explicit pre-execution preview.
3
+ *
4
+ * Flow:
5
+ * 1. User runs `/plan <task>` — we ask the LLM for a numbered plan
6
+ * (no tool calls, no file changes) and surface it to the user.
7
+ * 2. We hold the (task, plan) pair as the *pending* plan, scoped to
8
+ * the current process.
9
+ * 3. User runs `/go` to execute, or `/plan <revised task>` to refine.
10
+ * `/go` hands the original task + approved plan to the regular
11
+ * agent loop as a single prompt, so the existing tool execution,
12
+ * verification, and permission paths apply unchanged.
13
+ *
14
+ * Why this design (MVP):
15
+ * - Zero changes to the agent loop, MCP wiring, or ACP server.
16
+ * - Plan rendering reuses the chat markdown renderer (no new TUI
17
+ * panel to maintain).
18
+ * - Edit flow is just "/plan <revised task>" — generates a new plan
19
+ * that replaces the pending one; user pays one extra LLM call but
20
+ * gets human-readable revision history in the chat above.
21
+ * - When we ship a proper plan-mode UI later (TUI panel with
22
+ * Accept/Edit/Reject buttons + per-step progress), it can keep
23
+ * using this module as the backend.
24
+ */
25
+ export interface PendingPlan {
26
+ task: string;
27
+ plan: string;
28
+ createdAt: number;
29
+ }
30
+ /**
31
+ * Ask the model for a plan for the given task. Stores the (task, plan)
32
+ * pair as the pending plan so a subsequent `/go` can execute it.
33
+ * Throws on chat failure — caller renders the error.
34
+ */
35
+ export declare function generatePlan(task: string, onChunk?: (text: string) => void): Promise<string>;
36
+ export declare function getPendingPlan(): PendingPlan | null;
37
+ export declare function clearPendingPlan(): void;
38
+ /**
39
+ * Compose the prompt the agent loop receives when the user runs `/go`.
40
+ * The agent treats this as a normal task, so tool calls / verification /
41
+ * permissions / hooks all flow through the existing paths — we just
42
+ * front-load the plan as context so the model doesn't re-plan
43
+ * implicitly.
44
+ */
45
+ export declare function composeExecutionPrompt(plan: PendingPlan): string;
@@ -0,0 +1,94 @@
1
+ /**
2
+ * Plan mode — explicit pre-execution preview.
3
+ *
4
+ * Flow:
5
+ * 1. User runs `/plan <task>` — we ask the LLM for a numbered plan
6
+ * (no tool calls, no file changes) and surface it to the user.
7
+ * 2. We hold the (task, plan) pair as the *pending* plan, scoped to
8
+ * the current process.
9
+ * 3. User runs `/go` to execute, or `/plan <revised task>` to refine.
10
+ * `/go` hands the original task + approved plan to the regular
11
+ * agent loop as a single prompt, so the existing tool execution,
12
+ * verification, and permission paths apply unchanged.
13
+ *
14
+ * Why this design (MVP):
15
+ * - Zero changes to the agent loop, MCP wiring, or ACP server.
16
+ * - Plan rendering reuses the chat markdown renderer (no new TUI
17
+ * panel to maintain).
18
+ * - Edit flow is just "/plan <revised task>" — generates a new plan
19
+ * that replaces the pending one; user pays one extra LLM call but
20
+ * gets human-readable revision history in the chat above.
21
+ * - When we ship a proper plan-mode UI later (TUI panel with
22
+ * Accept/Edit/Reject buttons + per-step progress), it can keep
23
+ * using this module as the backend.
24
+ */
25
+ import { chat } from '../api/index.js';
26
+ const PLAN_SYSTEM_PROMPT = `You are in PLAN MODE.
27
+
28
+ The user has given you a task. **Do not execute anything.** Do not call
29
+ tools. Do not modify files. Do not run shell commands. Instead, produce
30
+ a numbered plan of the steps you would take, so the user can review and
31
+ approve before any changes happen.
32
+
33
+ Format your response as Markdown:
34
+
35
+ ## Plan: <one-line summary of the task>
36
+
37
+ 1. **<short step name>** — what you'd do (e.g. read file, edit lines, run command).
38
+ _Why:_ rationale (1 sentence).
39
+ _Expected outcome:_ what the user should see after this step.
40
+
41
+ 2. **<step name>** — …
42
+
43
+ (continue numbering)
44
+
45
+ End with these three lines (verbatim labels, fill in values):
46
+
47
+ **Risk:** <low | medium | high> — <one-sentence reason; e.g. "schema change, requires migration">
48
+ **Files affected:** <comma-separated list of paths you'd touch, or "none">
49
+ **Commands run:** <comma-separated shell commands, or "none">
50
+
51
+ Rules:
52
+ - Be concrete. Reference real file paths from the project if you know them.
53
+ - Keep steps small enough that each maps to one or two tool calls when
54
+ later executed.
55
+ - If the task is ambiguous, list the assumption(s) you're making at the
56
+ top of the plan ("Assumes: ...") so the user can correct before /go.
57
+ - Do NOT produce code — describe what you'd change, not the change
58
+ itself. Code generation belongs in execution, not planning.
59
+ - If the task is trivial (single-file rename, single-line edit), say so
60
+ in one sentence and skip the formal plan — don't bloat tiny work.`;
61
+ let pending = null;
62
+ /**
63
+ * Ask the model for a plan for the given task. Stores the (task, plan)
64
+ * pair as the pending plan so a subsequent `/go` can execute it.
65
+ * Throws on chat failure — caller renders the error.
66
+ */
67
+ export async function generatePlan(task, onChunk) {
68
+ const history = [
69
+ { role: 'system', content: PLAN_SYSTEM_PROMPT },
70
+ ];
71
+ const plan = await chat(task, history, onChunk);
72
+ pending = { task, plan, createdAt: Date.now() };
73
+ return plan;
74
+ }
75
+ export function getPendingPlan() {
76
+ return pending;
77
+ }
78
+ export function clearPendingPlan() {
79
+ pending = null;
80
+ }
81
+ /**
82
+ * Compose the prompt the agent loop receives when the user runs `/go`.
83
+ * The agent treats this as a normal task, so tool calls / verification /
84
+ * permissions / hooks all flow through the existing paths — we just
85
+ * front-load the plan as context so the model doesn't re-plan
86
+ * implicitly.
87
+ */
88
+ export function composeExecutionPrompt(plan) {
89
+ return `${plan.task}
90
+
91
+ I've reviewed the following plan and approved it. Execute it step by step. If any step turns out to be wrong or impossible mid-execution, stop and report — don't silently improvise.
92
+
93
+ ${plan.plan}`;
94
+ }
@@ -5,6 +5,13 @@ export interface TokenUsage {
5
5
  promptTokens: number;
6
6
  completionTokens: number;
7
7
  totalTokens: number;
8
+ /** Anthropic prompt caching: tokens written to the cache on this call
9
+ * (billed at ~1.25× input rate). Undefined for providers that don't
10
+ * support caching or for calls below the cache size threshold. */
11
+ cacheCreationTokens?: number;
12
+ /** Anthropic prompt caching: tokens read from cache on this call
13
+ * (billed at ~0.1× input rate — the big savings live here). */
14
+ cacheReadTokens?: number;
8
15
  }
9
16
  export interface SessionTokenStats {
10
17
  totalPromptTokens: number;
@@ -18,6 +25,9 @@ interface TokenRecord {
18
25
  promptTokens: number;
19
26
  completionTokens: number;
20
27
  totalTokens: number;
28
+ /** Anthropic prompt caching breakdown — see TokenUsage. */
29
+ cacheCreationTokens?: number;
30
+ cacheReadTokens?: number;
21
31
  model: string;
22
32
  provider: string;
23
33
  /** Authoritative per-call USD from the provider (OpenRouter), if available. */
@@ -59,6 +69,19 @@ export interface ProviderCostBreakdown {
59
69
  * Get cost breakdown grouped by provider/model
60
70
  */
61
71
  export declare function getCostBreakdown(): ProviderCostBreakdown[];
72
+ /**
73
+ * Aggregate Anthropic prompt-caching stats for the current session.
74
+ * Returns the breakdown plus an estimate of what the input billing would
75
+ * have been *without* caching, so we can surface "you saved $X" to the
76
+ * user.
77
+ */
78
+ export interface CacheStats {
79
+ cacheCreationTokens: number;
80
+ cacheReadTokens: number;
81
+ /** Sum of estimatedSavings across all Anthropic-priced records. */
82
+ estimatedSavingsUsd: number;
83
+ }
84
+ export declare function getCacheStats(): CacheStats;
62
85
  /**
63
86
  * Get session stats
64
87
  */
@@ -81,6 +81,8 @@ export function recordTokenUsage(usage, model, provider, actualCostUsd) {
81
81
  promptTokens: usage.promptTokens,
82
82
  completionTokens: usage.completionTokens,
83
83
  totalTokens: usage.totalTokens,
84
+ cacheCreationTokens: usage.cacheCreationTokens,
85
+ cacheReadTokens: usage.cacheReadTokens,
84
86
  model,
85
87
  provider,
86
88
  actualCostUsd,
@@ -104,10 +106,19 @@ export function extractOpenAIUsage(data) {
104
106
  */
105
107
  export function extractAnthropicUsage(data) {
106
108
  if (data?.usage) {
109
+ const inputTokens = data.usage.input_tokens || 0;
110
+ const outputTokens = data.usage.output_tokens || 0;
111
+ const cacheCreation = data.usage.cache_creation_input_tokens || 0;
112
+ const cacheRead = data.usage.cache_read_input_tokens || 0;
113
+ // Anthropic returns input_tokens EXCLUSIVE of cache creation and cache
114
+ // read tokens — they're reported separately. Total prompt = sum of all
115
+ // three so our context window math doesn't undercount.
107
116
  return {
108
- promptTokens: data.usage.input_tokens || 0,
109
- completionTokens: data.usage.output_tokens || 0,
110
- totalTokens: (data.usage.input_tokens || 0) + (data.usage.output_tokens || 0),
117
+ promptTokens: inputTokens + cacheCreation + cacheRead,
118
+ completionTokens: outputTokens,
119
+ totalTokens: inputTokens + cacheCreation + cacheRead + outputTokens,
120
+ cacheCreationTokens: cacheCreation || undefined,
121
+ cacheReadTokens: cacheRead || undefined,
111
122
  };
112
123
  }
113
124
  return null;
@@ -132,13 +143,42 @@ export function getCostBreakdown() {
132
143
  else {
133
144
  const pricing = MODEL_PRICING[record.model];
134
145
  if (pricing) {
135
- existing.estimatedCost += (record.promptTokens / 1_000_000) * pricing.inputPer1M + (record.completionTokens / 1_000_000) * pricing.outputPer1M;
146
+ // Anthropic prompt caching: cache_creation_input is billed at 1.25×
147
+ // the base input rate, cache_read_input at 0.1×. The remaining
148
+ // (uncached) prompt tokens bill at the standard 1.0× rate.
149
+ const cacheCreate = record.cacheCreationTokens ?? 0;
150
+ const cacheRead = record.cacheReadTokens ?? 0;
151
+ const uncachedPrompt = Math.max(0, record.promptTokens - cacheCreate - cacheRead);
152
+ existing.estimatedCost +=
153
+ (uncachedPrompt / 1_000_000) * pricing.inputPer1M
154
+ + (cacheCreate / 1_000_000) * pricing.inputPer1M * 1.25
155
+ + (cacheRead / 1_000_000) * pricing.inputPer1M * 0.1
156
+ + (record.completionTokens / 1_000_000) * pricing.outputPer1M;
136
157
  }
137
158
  }
138
159
  grouped.set(key, existing);
139
160
  }
140
161
  return Array.from(grouped.values());
141
162
  }
163
+ export function getCacheStats() {
164
+ let cacheCreate = 0;
165
+ let cacheRead = 0;
166
+ let savings = 0;
167
+ for (const record of records) {
168
+ cacheCreate += record.cacheCreationTokens ?? 0;
169
+ cacheRead += record.cacheReadTokens ?? 0;
170
+ // Savings = what cache-read tokens would have cost at full input rate,
171
+ // minus what they actually cost at 0.1×. (Cache creation is a slight
172
+ // *penalty* of 0.25× — netted in for honest reporting.)
173
+ const pricing = MODEL_PRICING[record.model];
174
+ if (pricing) {
175
+ const cReadSaved = ((record.cacheReadTokens ?? 0) / 1_000_000) * pricing.inputPer1M * 0.9;
176
+ const cCreateCost = ((record.cacheCreationTokens ?? 0) / 1_000_000) * pricing.inputPer1M * 0.25;
177
+ savings += cReadSaved - cCreateCost;
178
+ }
179
+ }
180
+ return { cacheCreationTokens: cacheCreate, cacheReadTokens: cacheRead, estimatedSavingsUsd: Math.max(0, savings) };
181
+ }
142
182
  /**
143
183
  * Get session stats
144
184
  */
@@ -207,6 +247,18 @@ export function formatCostReport() {
207
247
  lines.push(`| \`${b.provider}\` / \`${b.model}\` | ${formatTokenCount(b.promptTokens)} | ${formatTokenCount(b.completionTokens)} | $${b.estimatedCost.toFixed(4)} |`);
208
248
  }
209
249
  }
250
+ // Prompt caching summary — only shown if at least one cached call landed.
251
+ const cache = getCacheStats();
252
+ if (cache.cacheReadTokens > 0 || cache.cacheCreationTokens > 0) {
253
+ lines.push('', '### Prompt caching');
254
+ lines.push(`**Cache reads:** ${formatTokenCount(cache.cacheReadTokens)} tokens (billed at 0.1× input rate)`);
255
+ if (cache.cacheCreationTokens > 0) {
256
+ lines.push(`**Cache writes:** ${formatTokenCount(cache.cacheCreationTokens)} tokens (billed at 1.25× input rate)`);
257
+ }
258
+ if (cache.estimatedSavingsUsd > 0) {
259
+ lines.push(`**Estimated savings vs no caching:** $${cache.estimatedSavingsUsd.toFixed(4)}`);
260
+ }
261
+ }
210
262
  // Models with no pricing entry don't contribute to cost — flag so users
211
263
  // aren't surprised the total looks low.
212
264
  const untracked = breakdown.filter(b => b.estimatedCost === 0 && (b.promptTokens + b.completionTokens) > 0);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "codeep",
3
- "version": "2.0.1",
3
+ "version": "2.0.3",
4
4
  "description": "AI-powered coding assistant built for the terminal. Multiple LLM providers, project-aware context, and a seamless development workflow.",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",