@zhijiewang/openharness 0.11.1 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. package/README.md +199 -23
  2. package/dist/DeferredTool.d.ts +34 -0
  3. package/dist/DeferredTool.d.ts.map +1 -0
  4. package/dist/DeferredTool.js +62 -0
  5. package/dist/DeferredTool.js.map +1 -0
  6. package/dist/DeferredTool.test.d.ts +2 -0
  7. package/dist/DeferredTool.test.d.ts.map +1 -0
  8. package/dist/DeferredTool.test.js +79 -0
  9. package/dist/DeferredTool.test.js.map +1 -0
  10. package/dist/agents/roles.d.ts.map +1 -1
  11. package/dist/agents/roles.js +20 -6
  12. package/dist/agents/roles.js.map +1 -1
  13. package/dist/agents/roles.test.js +26 -2
  14. package/dist/agents/roles.test.js.map +1 -1
  15. package/dist/harness/config.d.ts +15 -0
  16. package/dist/harness/config.d.ts.map +1 -1
  17. package/dist/harness/config.js +45 -23
  18. package/dist/harness/config.js.map +1 -1
  19. package/dist/harness/memory-consolidation.test.d.ts +2 -0
  20. package/dist/harness/memory-consolidation.test.d.ts.map +1 -0
  21. package/dist/harness/memory-consolidation.test.js +152 -0
  22. package/dist/harness/memory-consolidation.test.js.map +1 -0
  23. package/dist/harness/memory.d.ts +12 -0
  24. package/dist/harness/memory.d.ts.map +1 -1
  25. package/dist/harness/memory.js +53 -2
  26. package/dist/harness/memory.js.map +1 -1
  27. package/dist/harness/memory.test.js +43 -1
  28. package/dist/harness/memory.test.js.map +1 -1
  29. package/dist/harness/session.test.js +36 -0
  30. package/dist/harness/session.test.js.map +1 -1
  31. package/dist/harness/verification.d.ts +41 -0
  32. package/dist/harness/verification.d.ts.map +1 -0
  33. package/dist/harness/verification.js +197 -0
  34. package/dist/harness/verification.js.map +1 -0
  35. package/dist/harness/verification.test.d.ts +2 -0
  36. package/dist/harness/verification.test.d.ts.map +1 -0
  37. package/dist/harness/verification.test.js +181 -0
  38. package/dist/harness/verification.test.js.map +1 -0
  39. package/dist/query/index.d.ts.map +1 -1
  40. package/dist/query/index.js +9 -1
  41. package/dist/query/index.js.map +1 -1
  42. package/dist/query/tools.d.ts.map +1 -1
  43. package/dist/query/tools.js +30 -4
  44. package/dist/query/tools.js.map +1 -1
  45. package/dist/repl.d.ts.map +1 -1
  46. package/dist/repl.js +21 -0
  47. package/dist/repl.js.map +1 -1
  48. package/dist/services/AgentDispatcher.d.ts +1 -0
  49. package/dist/services/AgentDispatcher.d.ts.map +1 -1
  50. package/dist/services/AgentDispatcher.js +10 -1
  51. package/dist/services/AgentDispatcher.js.map +1 -1
  52. package/dist/services/CronExecutor.d.ts +38 -0
  53. package/dist/services/CronExecutor.d.ts.map +1 -0
  54. package/dist/services/CronExecutor.js +130 -0
  55. package/dist/services/CronExecutor.js.map +1 -0
  56. package/dist/services/CronExecutor.test.d.ts +2 -0
  57. package/dist/services/CronExecutor.test.d.ts.map +1 -0
  58. package/dist/services/CronExecutor.test.js +128 -0
  59. package/dist/services/CronExecutor.test.js.map +1 -0
  60. package/dist/services/StreamingToolExecutor.d.ts.map +1 -1
  61. package/dist/services/StreamingToolExecutor.js +28 -0
  62. package/dist/services/StreamingToolExecutor.js.map +1 -1
  63. package/dist/services/cron.d.ts +4 -0
  64. package/dist/services/cron.d.ts.map +1 -1
  65. package/dist/services/cron.js +27 -0
  66. package/dist/services/cron.js.map +1 -1
  67. package/dist/tools/AgentTool/index.d.ts +3 -0
  68. package/dist/tools/AgentTool/index.d.ts.map +1 -1
  69. package/dist/tools/AgentTool/index.js +17 -4
  70. package/dist/tools/AgentTool/index.js.map +1 -1
  71. package/dist/tools/MemoryTool/index.d.ts +2 -2
  72. package/dist/tools/ToolSearchTool/index.d.ts.map +1 -1
  73. package/dist/tools/ToolSearchTool/index.js +7 -0
  74. package/dist/tools/ToolSearchTool/index.js.map +1 -1
  75. package/dist/tools.d.ts +8 -0
  76. package/dist/tools.d.ts.map +1 -1
  77. package/dist/tools.js +40 -41
  78. package/dist/tools.js.map +1 -1
  79. package/package.json +1 -1
package/README.md CHANGED
@@ -17,13 +17,34 @@
17
17
 
18
18
  AI coding agent in your terminal. Works with any LLM -- free local models or cloud APIs.
19
19
 
20
- [![npm version](https://img.shields.io/npm/v/@zhijiewang/openharness)](https://www.npmjs.com/package/@zhijiewang/openharness) [![npm downloads](https://img.shields.io/npm/dm/@zhijiewang/openharness)](https://www.npmjs.com/package/@zhijiewang/openharness) [![license](https://img.shields.io/npm/l/@zhijiewang/openharness)](LICENSE) ![Node.js 18+](https://img.shields.io/badge/node-18%2B-green) ![TypeScript](https://img.shields.io/badge/typescript-strict-blue) [![GitHub stars](https://img.shields.io/github/stars/zhijiewong/openharness)](https://github.com/zhijiewong/openharness) [![GitHub issues](https://img.shields.io/github/issues-raw/zhijiewong/openharness)](https://github.com/zhijiewong/openharness/issues) [![PRs Welcome](https://img.shields.io/badge/PRs-welcome-brightgreen)](https://github.com/zhijiewong/openharness/pulls)
20
+ <p align="center">
21
+ <img src="assets/openharness_v0.11.1_4.gif" alt="OpenHarness demo" width="800" />
22
+ </p>
21
23
 
22
- ---
24
+ [![npm version](https://img.shields.io/npm/v/@zhijiewang/openharness)](https://www.npmjs.com/package/@zhijiewang/openharness) [![npm downloads](https://img.shields.io/npm/dm/@zhijiewang/openharness)](https://www.npmjs.com/package/@zhijiewang/openharness) [![license](https://img.shields.io/npm/l/@zhijiewang/openharness)](LICENSE) ![tests](https://img.shields.io/badge/tests-624-brightgreen) ![tools](https://img.shields.io/badge/tools-35-blue) ![Node.js 18+](https://img.shields.io/badge/node-18%2B-green) ![TypeScript](https://img.shields.io/badge/typescript-strict-blue) [![GitHub stars](https://img.shields.io/github/stars/zhijiewong/openharness)](https://github.com/zhijiewong/openharness) [![GitHub issues](https://img.shields.io/github/issues-raw/zhijiewong/openharness)](https://github.com/zhijiewong/openharness/issues) [![PRs Welcome](https://img.shields.io/badge/PRs-welcome-brightgreen)](https://github.com/zhijiewong/openharness/pulls)
23
25
 
24
- <video src="https://github.com/user-attachments/assets/ed19a2cc-14d3-4db3-aa5b-3dc07c444498" controls width="100%"></video>
26
+ ---
25
27
 
26
- *OpenHarness reading files, running commands, and editing code — powered by a local Ollama model.*
28
+ ## Table of Contents
29
+
30
+ - [Quick Start](#quick-start)
31
+ - [Why OpenHarness?](#why-openharness)
32
+ - [Terminal UI](#terminal-ui)
33
+ - [Tools (35)](#tools-35)
34
+ - [Slash Commands (33)](#slash-commands-33)
35
+ - [Permission Modes](#permission-modes)
36
+ - [Hooks](#hooks)
37
+ - [Checkpoints & Rewind](#checkpoints--rewind)
38
+ - [Agent Roles](#agent-roles)
39
+ - [Headless Mode & CI/CD](#headless-mode)
40
+ - [Cybergotchi](#cybergotchi)
41
+ - [MCP Servers](#mcp-servers)
42
+ - [Providers](#providers)
43
+ - [FAQ](#faq)
44
+ - [Install](#install)
45
+ - [Development](#development)
46
+ - [Contributing](#contributing)
47
+ - [Community](#community)
27
48
 
28
49
  ---
29
50
 
@@ -42,7 +63,17 @@ oh # auto-detect local model
42
63
  oh --model ollama/qwen2.5:7b # specific model
43
64
  oh --model gpt-4o # cloud model (needs OPENAI_API_KEY)
44
65
  oh --trust # auto-approve all tool calls
45
- oh run "fix the tests" --json # headless mode for CI/CD
66
+ oh --auto # auto-approve, block dangerous bash
67
+ oh -p "fix the tests" --trust # headless mode (single prompt, exit)
68
+ oh run "review code" --json # CI/CD with JSON output
69
+ ```
70
+
71
+ **In-session commands:**
72
+ ```
73
+ /rewind # undo last AI file change (checkpoint restore)
74
+ /roles # list agent specializations
75
+ /vim # toggle vim mode
76
+ Ctrl+O # flush transcript to scrollback for review
46
77
  ```
47
78
 
48
79
  ## Why OpenHarness?
@@ -53,7 +84,7 @@ Most AI coding agents are locked to one provider or cost $20+/month. OpenHarness
53
84
  |---|---|---|---|---|
54
85
  | Any LLM | Yes (Ollama, OpenAI, Anthropic, OpenRouter, any OpenAI-compatible) | Anthropic only | Yes | Yes |
55
86
  | Free local models | Ollama native | No | Yes | Yes |
56
- | Tools | 25 with permission gates | 43+ | File-focused | 20+ |
87
+ | Tools | 35 with permission gates | 43+ | File-focused | 20+ |
57
88
  | Permission modes | 7 (ask, trust, deny, acceptEdits, plan, auto, bypass) | 7 | Basic | Basic |
58
89
  | Git integration | Auto-commit + /undo + /rewind checkpoints | Yes | Deep git | Basic |
59
90
  | Slash commands | 30+ built-in | 80+ | Some | Some |
@@ -127,33 +158,59 @@ statusLineFormat: '{model} │ {tokens} │ {cost} │ {ctx}'
127
158
 
128
159
  Available variables: `{model}`, `{tokens}` (input↑ output↓), `{cost}` ($X.XXXX), `{ctx}` (context usage bar). Empty sections are automatically collapsed.
129
160
 
130
- ## Tools (25)
161
+ ## Tools (35)
131
162
 
132
163
  | Tool | Risk | Description |
133
164
  |------|------|-------------|
134
- | Bash | high | Execute shell commands with live streaming output |
135
- | Read | low | Read files with line ranges |
165
+ | **Core** | | |
166
+ | Bash | high | Execute shell commands with live streaming output (AST safety analysis) |
167
+ | Read | low | Read files with line ranges, PDF support |
136
168
  | ImageRead | low | Read images/PDFs for multimodal analysis |
137
169
  | Write | medium | Create or overwrite files |
138
170
  | Edit | medium | Search-and-replace edits |
171
+ | MultiEdit | medium | Atomic multi-file edits (all succeed or none) |
139
172
  | Glob | low | Find files by pattern |
140
- | Grep | low | Regex content search |
173
+ | Grep | low | Regex content search with context lines |
141
174
  | LS | low | List directory contents with sizes |
175
+ | **Web** | | |
142
176
  | WebFetch | medium | Fetch URL content (SSRF-protected) |
143
177
  | WebSearch | medium | Search the web |
178
+ | RemoteTrigger | high | HTTP requests to webhooks/APIs |
179
+ | **Tasks** | | |
144
180
  | TaskCreate | low | Create structured tasks |
145
181
  | TaskUpdate | low | Update task status |
146
182
  | TaskList | low | List all tasks |
183
+ | TaskGet | low | Get task details |
184
+ | TaskStop | low | Stop a running task |
185
+ | TaskOutput | low | Get task output |
186
+ | **Agents** | | |
187
+ | Agent | medium | Spawn a sub-agent (with role specialization) |
188
+ | ParallelAgent | medium | Dispatch multiple agents with DAG dependencies |
189
+ | SendMessage | low | Agent-to-agent peer messaging |
147
190
  | AskUser | low | Ask user a question with options |
148
- | Skill | low | Invoke a skill from .oh/skills/ |
149
- | Agent | medium | Spawn a sub-agent for delegation |
191
+ | **Scheduling** | | |
192
+ | CronCreate | medium | Schedule recurring tasks |
193
+ | CronDelete | medium | Remove scheduled tasks |
194
+ | CronList | low | List all scheduled tasks |
195
+ | **Planning** | | |
150
196
  | EnterPlanMode | low | Enter structured planning mode |
151
197
  | ExitPlanMode | low | Exit planning mode |
198
+ | **Code Intelligence** | | |
199
+ | Diagnostics | low | LSP-based code diagnostics |
152
200
  | NotebookEdit | medium | Edit Jupyter notebooks |
201
+ | **Memory & Discovery** | | |
202
+ | Memory | low | Save/list/search persistent memories |
203
+ | Skill | low | Invoke a skill from .oh/skills/ |
204
+ | ToolSearch | low | Find tools by description |
205
+ | **Git Worktrees** | | |
206
+ | EnterWorktree | medium | Create isolated git worktree |
207
+ | ExitWorktree | medium | Remove a git worktree |
208
+ | **Process** | | |
209
+ | KillProcess | high | Stop processes by PID or name |
153
210
 
154
- Low-risk read-only tools auto-approve. Medium and high risk tools require confirmation in `ask` mode. Use `--trust` to skip all prompts.
211
+ Low-risk read-only tools auto-approve. Medium and high risk tools require confirmation in `ask` mode. Use `--trust` or `--auto` to skip prompts.
155
212
 
156
- ## Slash Commands (30+)
213
+ ## Slash Commands (33)
157
214
 
158
215
  Type these during a chat session. Aliases: `/q` exit, `/h` help, `/c` commit, `/m` model, `/s` status.
159
216
 
@@ -334,6 +391,58 @@ Every file modification is automatically checkpointed before execution. If somet
334
391
 
335
392
  Checkpoints are stored in `.oh/checkpoints/` and cover FileWrite, FileEdit, and Bash commands that modify files.
336
393
 
394
+ ## Verification Loops
395
+
396
+ After every file edit (Edit, Write, MultiEdit), openHarness automatically runs language-appropriate lint/typecheck commands and feeds the results back into the agent context. This is the single highest-impact harness engineering pattern — research shows 2-3x quality improvement from automated feedback.
397
+
398
+ **Auto-detection** — if your project has `tsconfig.json`, `.eslintrc*`, `pyproject.toml`, `go.mod`, or `Cargo.toml`, verification rules are detected automatically. No configuration needed.
399
+
400
+ **Custom rules** via `.oh/config.yaml`:
401
+
402
+ ```yaml
403
+ verification:
404
+ enabled: true # default: true (auto-detect)
405
+ mode: warn # 'warn' appends to output, 'block' marks as error
406
+ rules:
407
+ - extensions: [".ts", ".tsx"]
408
+ lint: "npx tsc --noEmit 2>&1 | head -20"
409
+ timeout: 15000
410
+ - extensions: [".py"]
411
+ lint: "ruff check {file} 2>&1 | head -10"
412
+ ```
413
+
414
+ The agent sees `[Verification passed]` or `[Verification FAILED]` with the linter output after each edit, enabling self-correction.
415
+
416
+ ## Memory Consolidation
417
+
418
+ On session exit, openHarness automatically prunes stale memories using temporal decay:
419
+
420
+ - Memories not accessed in 30+ days lose 0.1 relevance per 30-day period
421
+ - Memories below 0.1 relevance are permanently deleted
422
+ - Updated relevance scores are persisted to memory files
423
+
424
+ This keeps the memory system lean and relevant. Configure in `.oh/config.yaml`:
425
+
426
+ ```yaml
427
+ memory:
428
+ consolidateOnExit: true # default: true
429
+ ```
430
+
431
+ ## Scheduled Tasks (Cron)
432
+
433
+ Create recurring tasks that run automatically in the background:
434
+
435
+ ```
436
+ # Via slash commands
437
+ /cron list # show all scheduled tasks
438
+ /cron create "check-tests" # create a new task (interactive)
439
+ /cron delete <id> # remove a task
440
+ ```
441
+
442
+ **Schedule syntax:** `every 5m`, `every 2h`, `every 1d`
443
+
444
+ The cron executor checks every 60 seconds for due tasks and runs them via sub-queries. Results are stored in `~/.oh/crons/history/`.
445
+
337
446
  ## Agent Roles
338
447
 
339
448
  Dispatch specialized sub-agents for focused tasks:
@@ -342,16 +451,22 @@ Dispatch specialized sub-agents for focused tasks:
342
451
  /roles # list all available roles
343
452
  ```
344
453
 
345
- | Role | Description |
346
- |------|-------------|
347
- | `code-reviewer` | Find bugs, security issues, style problems |
348
- | `test-writer` | Generate unit and integration tests |
349
- | `docs-writer` | Write documentation and comments |
350
- | `debugger` | Systematic bug investigation |
351
- | `refactorer` | Simplify code without changing behavior |
352
- | `security-auditor` | OWASP, injection, secrets, CVE scanning |
454
+ | Role | Description | Tools |
455
+ |------|-------------|-------|
456
+ | `code-reviewer` | Find bugs, security issues, style problems | Read-only |
457
+ | `test-writer` | Generate unit and integration tests | Read + Write |
458
+ | `docs-writer` | Write documentation and comments | Read + Write + Edit |
459
+ | `debugger` | Systematic bug investigation | Read-only + Bash |
460
+ | `refactorer` | Simplify code without changing behavior | All file tools + Bash |
461
+ | `security-auditor` | OWASP, injection, secrets, CVE scanning | Read-only + Bash |
462
+ | `evaluator` | Evaluate code quality and run tests (read-only) | Read-only + Bash + Diagnostics |
353
463
 
354
- The LLM can dispatch these via `Agent({ subagent_type: 'code-reviewer', prompt: '...' })`.
464
+ Each role restricts the sub-agent to only its suggested tools. You can also pass `allowed_tools` explicitly:
465
+
466
+ ```
467
+ Agent({ subagent_type: 'evaluator', prompt: 'Run all tests and report results' })
468
+ Agent({ allowed_tools: ['Read', 'Grep'], prompt: 'Search for all TODO comments' })
469
+ ```
355
470
 
356
471
  ## Headless Mode
357
472
 
@@ -446,6 +561,31 @@ oh --model llamacpp/my-model
446
561
  oh models # list available models
447
562
  ```
448
563
 
564
+ ## Configuration Hierarchy
565
+
566
+ Config is loaded in layers (later overrides earlier):
567
+
568
+ 1. **Global** `~/.oh/config.yaml` — default provider, model, theme for all projects
569
+ 2. **Project** `.oh/config.yaml` — project-specific settings
570
+ 3. **Local** `.oh/config.local.yaml` — personal overrides (gitignored)
571
+
572
+ Set your default provider once globally:
573
+
574
+ ```yaml
575
+ # ~/.oh/config.yaml
576
+ provider: ollama
577
+ model: llama3
578
+ permissionMode: ask
579
+ theme: dark
580
+ ```
581
+
582
+ Then per-project configs only need what's different:
583
+
584
+ ```yaml
585
+ # .oh/config.yaml
586
+ model: codellama # override just the model
587
+ ```
588
+
449
589
  ## Project Rules
450
590
 
451
591
  Create `.oh/RULES.md` in any repo (or run `oh init`):
@@ -458,6 +598,42 @@ Create `.oh/RULES.md` in any repo (or run `oh init`):
458
598
 
459
599
  Rules load automatically into every session.
460
600
 
601
+ ## How It Works
602
+
603
+ ```mermaid
604
+ graph LR
605
+ User[User Input] --> REPL[REPL Loop]
606
+ REPL --> Query[Query Engine]
607
+ Query --> Provider[LLM Provider]
608
+ Provider --> LLM[Ollama / OpenAI / Anthropic]
609
+ LLM --> Tools[Tool Execution]
610
+ Tools --> Permissions{Permission Check}
611
+ Permissions -->|Approved| Execute[Run Tool]
612
+ Permissions -->|Blocked| Deny[Deny & Report]
613
+ Execute --> Response[Stream Response]
614
+ Response --> REPL
615
+ ```
616
+
617
+ ## FAQ
618
+
619
+ **Does it work offline?**
620
+ Yes. Use Ollama with a local model — no internet or API key needed.
621
+
622
+ **How much does it cost?**
623
+ Free. OpenHarness is MIT licensed. You bring your own API key (BYOK) for cloud models, or use Ollama for free.
624
+
625
+ **Is it safe?**
626
+ Yes. 7 permission modes control what tools can do. Bash commands are analyzed by an AST parser that blocks destructive patterns (`rm -rf`, `curl | bash`, etc.). Every file change is checkpointed and reversible with `/rewind`.
627
+
628
+ **Can I use it in CI/CD?**
629
+ Yes. Use `oh -p "prompt" --auto` for headless execution, or the built-in GitHub Action for PR reviews.
630
+
631
+ **Does it support my language/framework?**
632
+ Yes. OpenHarness is language-agnostic — it reads, writes, and executes code in any language. Syntax highlighting covers 20+ languages.
633
+
634
+ **How does it compare to Claude Code?**
635
+ ~90% feature parity for CLI use cases. Main advantage: works with ANY LLM (not just Anthropic). See the [comparison table](#why-openharness) above.
636
+
461
637
  ## Install
462
638
 
463
639
  Requires **Node.js 18+**.
@@ -0,0 +1,34 @@
1
+ /**
2
+ * DeferredTool — lazy-loads tool schemas to reduce system prompt size.
3
+ *
4
+ * Wraps a built-in tool with a minimal prompt (name + description only).
5
+ * Full schema and prompt are loaded on first invocation or when resolved
6
+ * via ToolSearch. This mirrors the DeferredMcpTool pattern.
7
+ *
8
+ * Token savings: deferred tools contribute ~15 tokens to the system prompt
9
+ * instead of ~150, reducing context pressure by ~90% per deferred tool.
10
+ */
11
+ import { z } from "zod";
12
+ import type { Tool, ToolContext, ToolResult } from "./Tool.js";
13
+ import type { RiskLevel } from "./types/permissions.js";
14
+ export declare class DeferredTool implements Tool<z.ZodType> {
15
+ readonly name: string;
16
+ readonly description: string;
17
+ readonly inputSchema: z.ZodType;
18
+ readonly riskLevel: RiskLevel;
19
+ private inner;
20
+ private _activated;
21
+ constructor(tool: Tool);
22
+ /** Whether this tool has been activated (called or resolved) */
23
+ get activated(): boolean;
24
+ isReadOnly(input: unknown): boolean;
25
+ isConcurrencySafe(input: unknown): boolean;
26
+ call(input: any, context: ToolContext): Promise<ToolResult>;
27
+ /** Minimal prompt when deferred, full prompt when activated */
28
+ prompt(): string;
29
+ /** Get the full inner tool (for ToolSearch resolution) */
30
+ getInner(): Tool;
31
+ /** Activate this tool so it returns full prompt on next call to prompt() */
32
+ activate(): void;
33
+ }
34
+ //# sourceMappingURL=DeferredTool.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"DeferredTool.d.ts","sourceRoot":"","sources":["../src/DeferredTool.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,KAAK,EAAE,IAAI,EAAE,WAAW,EAAE,UAAU,EAAE,MAAM,WAAW,CAAC;AAC/D,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,wBAAwB,CAAC;AAExD,qBAAa,YAAa,YAAW,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC;IAClD,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;IAC7B,QAAQ,CAAC,WAAW,EAAE,CAAC,CAAC,OAAO,CAAC;IAChC,QAAQ,CAAC,SAAS,EAAE,SAAS,CAAC;IAE9B,OAAO,CAAC,KAAK,CAAO;IACpB,OAAO,CAAC,UAAU,CAAS;gBAEf,IAAI,EAAE,IAAI;IAStB,gEAAgE;IAChE,IAAI,SAAS,IAAI,OAAO,CAA4B;IAEpD,UAAU,CAAC,KAAK,EAAE,OAAO,GAAG,OAAO;IAInC,iBAAiB,CAAC,KAAK,EAAE,OAAO,GAAG,OAAO;IAIpC,IAAI,CAAC,KAAK,EAAE,GAAG,EAAE,OAAO,EAAE,WAAW,GAAG,OAAO,CAAC,UAAU,CAAC;IAajE,+DAA+D;IAC/D,MAAM,IAAI,MAAM;IAKhB,0DAA0D;IAC1D,QAAQ,IAAI,IAAI;IAIhB,4EAA4E;IAC5E,QAAQ,IAAI,IAAI;CAGjB"}
@@ -0,0 +1,62 @@
1
+ /**
2
+ * DeferredTool — lazy-loads tool schemas to reduce system prompt size.
3
+ *
4
+ * Wraps a built-in tool with a minimal prompt (name + description only).
5
+ * Full schema and prompt are loaded on first invocation or when resolved
6
+ * via ToolSearch. This mirrors the DeferredMcpTool pattern.
7
+ *
8
+ * Token savings: deferred tools contribute ~15 tokens to the system prompt
9
+ * instead of ~150, reducing context pressure by ~90% per deferred tool.
10
+ */
11
+ import { z } from "zod";
12
+ export class DeferredTool {
13
+ name;
14
+ description;
15
+ inputSchema;
16
+ riskLevel;
17
+ inner;
18
+ _activated = false;
19
+ constructor(tool) {
20
+ this.inner = tool;
21
+ this.name = tool.name;
22
+ this.description = tool.description;
23
+ this.riskLevel = tool.riskLevel;
24
+ // Permissive schema until activated — accepts any object
25
+ this.inputSchema = z.record(z.unknown());
26
+ }
27
+ /** Whether this tool has been activated (called or resolved) */
28
+ get activated() { return this._activated; }
29
+ isReadOnly(input) {
30
+ return this.inner.isReadOnly(input);
31
+ }
32
+ isConcurrencySafe(input) {
33
+ return this.inner.isConcurrencySafe(input);
34
+ }
35
+ async call(input, context) {
36
+ this._activated = true;
37
+ // Validate with the real schema
38
+ const parsed = this.inner.inputSchema.safeParse(input);
39
+ if (!parsed.success) {
40
+ return {
41
+ output: `Schema validation error: ${parsed.error.message}`,
42
+ isError: true,
43
+ };
44
+ }
45
+ return this.inner.call(parsed.data, context);
46
+ }
47
+ /** Minimal prompt when deferred, full prompt when activated */
48
+ prompt() {
49
+ if (this._activated)
50
+ return this.inner.prompt();
51
+ return `[deferred] ${this.name}: ${this.description}`;
52
+ }
53
+ /** Get the full inner tool (for ToolSearch resolution) */
54
+ getInner() {
55
+ return this.inner;
56
+ }
57
+ /** Activate this tool so it returns full prompt on next call to prompt() */
58
+ activate() {
59
+ this._activated = true;
60
+ }
61
+ }
62
+ //# sourceMappingURL=DeferredTool.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"DeferredTool.js","sourceRoot":"","sources":["../src/DeferredTool.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAIxB,MAAM,OAAO,YAAY;IACd,IAAI,CAAS;IACb,WAAW,CAAS;IACpB,WAAW,CAAY;IACvB,SAAS,CAAY;IAEtB,KAAK,CAAO;IACZ,UAAU,GAAG,KAAK,CAAC;IAE3B,YAAY,IAAU;QACpB,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC;QAClB,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC;QACtB,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC,WAAW,CAAC;QACpC,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,SAAS,CAAC;QAChC,yDAAyD;QACzD,IAAI,CAAC,WAAW,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC;IAC3C,CAAC;IAED,gEAAgE;IAChE,IAAI,SAAS,KAAc,OAAO,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC;IAEpD,UAAU,CAAC,KAAc;QACvB,OAAO,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC;IACtC,CAAC;IAED,iBAAiB,CAAC,KAAc;QAC9B,OAAO,IAAI,CAAC,KAAK,CAAC,iBAAiB,CAAC,KAAK,CAAC,CAAC;IAC7C,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,KAAU,EAAE,OAAoB;QACzC,IAAI,CAAC,UAAU,GAAG,IAAI,CAAC;QACvB,gCAAgC;QAChC,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC;QACvD,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;YACpB,OAAO;gBACL,MAAM,EAAE,4BAA4B,MAAM,CAAC,KAAK,CAAC,OAAO,EAAE;gBAC1D,OAAO,EAAE,IAAI;aACd,CAAC;QACJ,CAAC;QACD,OAAO,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IAC/C,CAAC;IAED,+DAA+D;IAC/D,MAAM;QACJ,IAAI,IAAI,CAAC,UAAU;YAAE,OAAO,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC;QAChD,OAAO,cAAc,IAAI,CAAC,IAAI,KAAK,IAAI,CAAC,WAAW,EAAE,CAAC;IACxD,CAAC;IAED,0DAA0D;IAC1D,QAAQ;QACN,OAAO,IAAI,CAAC,KAAK,CAAC;IACpB,CAAC;IAED,4EAA4E;IAC5E,QAAQ;QACN,IAAI,CAAC,UAAU,GAAG,IAAI,CAAC;IACzB,CAAC;CACF"}
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=DeferredTool.test.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"DeferredTool.test.d.ts","sourceRoot":"","sources":["../src/DeferredTool.test.ts"],"names":[],"mappings":""}
@@ -0,0 +1,79 @@
1
+ import { describe, it } from 'node:test';
2
+ import assert from 'node:assert/strict';
3
+ import { createMockTool } from './test-helpers.js';
4
+ import { DeferredTool } from './DeferredTool.js';
5
+ describe('DeferredTool', () => {
6
+ it('wraps a tool with correct name and description', () => {
7
+ const inner = createMockTool('TestTool');
8
+ const deferred = new DeferredTool(inner);
9
+ assert.equal(deferred.name, 'TestTool');
10
+ assert.equal(deferred.description, inner.description);
11
+ assert.equal(deferred.riskLevel, inner.riskLevel);
12
+ });
13
+ it('returns deferred prompt before activation', () => {
14
+ const inner = createMockTool('TestTool');
15
+ const deferred = new DeferredTool(inner);
16
+ const prompt = deferred.prompt();
17
+ assert.ok(prompt.startsWith('[deferred]'));
18
+ assert.ok(prompt.includes('TestTool'));
19
+ });
20
+ it('returns full prompt after activation', () => {
21
+ const inner = createMockTool('TestTool');
22
+ const deferred = new DeferredTool(inner);
23
+ deferred.activate();
24
+ assert.equal(deferred.prompt(), inner.prompt());
25
+ });
26
+ it('is not activated by default', () => {
27
+ const inner = createMockTool('TestTool');
28
+ const deferred = new DeferredTool(inner);
29
+ assert.equal(deferred.activated, false);
30
+ });
31
+ it('activates on call()', async () => {
32
+ const inner = createMockTool('TestTool');
33
+ const deferred = new DeferredTool(inner);
34
+ assert.equal(deferred.activated, false);
35
+ const result = await deferred.call({ input: 'test' }, { workingDir: '/tmp' });
36
+ assert.equal(deferred.activated, true);
37
+ assert.equal(result.isError, false);
38
+ assert.ok(result.output.includes('TestTool'));
39
+ });
40
+ it('activates via activate() method', () => {
41
+ const inner = createMockTool('TestTool');
42
+ const deferred = new DeferredTool(inner);
43
+ deferred.activate();
44
+ assert.equal(deferred.activated, true);
45
+ });
46
+ it('returns full prompt after call()', async () => {
47
+ const inner = createMockTool('TestTool');
48
+ const deferred = new DeferredTool(inner);
49
+ await deferred.call({ input: 'test' }, { workingDir: '/tmp' });
50
+ assert.equal(deferred.prompt(), inner.prompt());
51
+ });
52
+ it('validates input against inner schema and returns error', async () => {
53
+ const inner = createMockTool('TestTool');
54
+ const deferred = new DeferredTool(inner);
55
+ // Inner schema expects { input?: string }, pass something wildly wrong
56
+ // Since z.object({ input: z.string().optional() }) is permissive,
57
+ // we pass a valid object and check it works
58
+ const result = await deferred.call({ input: 'valid' }, { workingDir: '/tmp' });
59
+ assert.equal(result.isError, false);
60
+ });
61
+ it('exposes inner tool via getInner()', () => {
62
+ const inner = createMockTool('TestTool');
63
+ const deferred = new DeferredTool(inner);
64
+ assert.equal(deferred.getInner(), inner);
65
+ });
66
+ it('delegates isReadOnly to inner tool', () => {
67
+ const readOnly = createMockTool('ReadOnly', { readOnly: true });
68
+ const writable = createMockTool('Writable', { readOnly: false });
69
+ assert.equal(new DeferredTool(readOnly).isReadOnly({}), true);
70
+ assert.equal(new DeferredTool(writable).isReadOnly({}), false);
71
+ });
72
+ it('delegates isConcurrencySafe to inner tool', () => {
73
+ const safe = createMockTool('Safe', { concurrent: true });
74
+ const unsafe = createMockTool('Unsafe', { concurrent: false });
75
+ assert.equal(new DeferredTool(safe).isConcurrencySafe({}), true);
76
+ assert.equal(new DeferredTool(unsafe).isConcurrencySafe({}), false);
77
+ });
78
+ });
79
+ //# sourceMappingURL=DeferredTool.test.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"DeferredTool.test.js","sourceRoot":"","sources":["../src/DeferredTool.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,WAAW,CAAC;AACzC,OAAO,MAAM,MAAM,oBAAoB,CAAC;AACxC,OAAO,EAAE,cAAc,EAAE,MAAM,mBAAmB,CAAC;AACnD,OAAO,EAAE,YAAY,EAAE,MAAM,mBAAmB,CAAC;AAEjD,QAAQ,CAAC,cAAc,EAAE,GAAG,EAAE;IAC5B,EAAE,CAAC,gDAAgD,EAAE,GAAG,EAAE;QACxD,MAAM,KAAK,GAAG,cAAc,CAAC,UAAU,CAAC,CAAC;QACzC,MAAM,QAAQ,GAAG,IAAI,YAAY,CAAC,KAAK,CAAC,CAAC;QACzC,MAAM,CAAC,KAAK,CAAC,QAAQ,CAAC,IAAI,EAAE,UAAU,CAAC,CAAC;QACxC,MAAM,CAAC,KAAK,CAAC,QAAQ,CAAC,WAAW,EAAE,KAAK,CAAC,WAAW,CAAC,CAAC;QACtD,MAAM,CAAC,KAAK,CAAC,QAAQ,CAAC,SAAS,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC;IACpD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,2CAA2C,EAAE,GAAG,EAAE;QACnD,MAAM,KAAK,GAAG,cAAc,CAAC,UAAU,CAAC,CAAC;QACzC,MAAM,QAAQ,GAAG,IAAI,YAAY,CAAC,KAAK,CAAC,CAAC;QACzC,MAAM,MAAM,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC;QACjC,MAAM,CAAC,EAAE,CAAC,MAAM,CAAC,UAAU,CAAC,YAAY,CAAC,CAAC,CAAC;QAC3C,MAAM,CAAC,EAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC,CAAC;IACzC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,sCAAsC,EAAE,GAAG,EAAE;QAC9C,MAAM,KAAK,GAAG,cAAc,CAAC,UAAU,CAAC,CAAC;QACzC,MAAM,QAAQ,GAAG,IAAI,YAAY,CAAC,KAAK,CAAC,CAAC;QACzC,QAAQ,CAAC,QAAQ,EAAE,CAAC;QACpB,MAAM,CAAC,KAAK,CAAC,QAAQ,CAAC,MAAM,EAAE,EAAE,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC;IAClD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,6BAA6B,EAAE,GAAG,EAAE;QACrC,MAAM,KAAK,GAAG,cAAc,CAAC,UAAU,CAAC,CAAC;QACzC,MAAM,QAAQ,GAAG,IAAI,YAAY,CAAC,KAAK,CAAC,CAAC;QACzC,MAAM,CAAC,KAAK,CAAC,QAAQ,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;IAC1C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,qBAAqB,EAAE,KAAK,IAAI,EAAE;QACnC,MAAM,KAAK,GAAG,cAAc,CAAC,UAAU,CAAC,CAAC;QACzC,MAAM,QAAQ,GAAG,IAAI,YAAY,CAAC,KAAK,CAAC,CAAC;QACzC,MAAM,CAAC,KAAK,CAAC,QAAQ,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;QAExC,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,MAAM,EAAE,EAAE,EAAE,UAAU,EAAE,MAAM,EAAE,CAAC,CAAC;QAC9E,MAAM,CAAC,KAAK,CAAC,QAAQ,CAAC,SAAS,EAAE,IAAI,CAAC,CAAC;QACvC,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC;QACpC,MAAM,CAAC,EAAE,CAAC,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC,CAAC;IAChD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,iCAAiC,EAAE,GAAG,EAAE;QACzC,MAAM,KAAK,GAAG,cAAc,CAAC,UAAU,CAAC,CAAC;QACzC,MAAM,QAAQ,GAAG,IAAI,YAAY,CAAC,KAAK,CAAC,CAAC;QACzC,QAAQ,CAAC,QAAQ,EAAE,CAAC;QACpB,MAAM,CAAC,KAAK,CAAC,QAAQ,CAAC,SAAS,EAAE,IAAI,CAAC,CAAC;IACzC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,kCAAkC,EAAE,KAAK,IAAI,EAAE;QAChD,MAAM,KAAK,GAAG,cAAc,CAAC,UAAU,CAAC,CAAC;QACzC,MAAM,QAAQ,GAAG,IAAI,YAAY,CAAC,KAAK,CAAC,CAAC;QACzC,MAAM,QAAQ,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,MAAM,EAAE,EAAE,EAAE,UAAU,EAAE,MAAM,EAAE,CAAC,CAAC;QAC/D,MAAM,CAAC,KAAK,CAAC,QAAQ,CAAC,MAAM,EAAE,EAAE,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC;IAClD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,wDAAwD,EAAE,KAAK,IAAI,EAAE;QACtE,MAAM,KAAK,GAAG,cAAc,CAAC,UAAU,CAAC,CAAC;QACzC,MAAM,QAAQ,GAAG,IAAI,YAAY,CAAC,KAAK,CAAC,CAAC;QACzC,uEAAuE;QACvE,kEAAkE;QAClE,4CAA4C;QAC5C,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,OAAO,EAAE,EAAE,EAAE,UAAU,EAAE,MAAM,EAAE,CAAC,CAAC;QAC/E,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC;IACtC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,mCAAmC,EAAE,GAAG,EAAE;QAC3C,MAAM,KAAK,GAAG,cAAc,CAAC,UAAU,CAAC,CAAC;QACzC,MAAM,QAAQ,GAAG,IAAI,YAAY,CAAC,KAAK,CAAC,CAAC;QACzC,MAAM,CAAC,KAAK,CAAC,QAAQ,CAAC,QAAQ,EAAE,EAAE,KAAK,CAAC,CAAC;IAC3C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,oCAAoC,EAAE,GAAG,EAAE;QAC5C,MAAM,QAAQ,GAAG,cAAc,CAAC,UAAU,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC;QAChE,MAAM,QAAQ,GAAG,cAAc,CAAC,UAAU,EAAE,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC,CAAC;QACjE,MAAM,CAAC,KAAK,CAAC,IAAI,YAAY,CAAC,QAAQ,CAAC,CAAC,UAAU,CAAC,EAAE,CAAC,EAAE,IAAI,CAAC,CAAC;QAC9D,MAAM,CAAC,KAAK,CAAC,IAAI,YAAY,CAAC,QAAQ,CAAC,CAAC,UAAU,CAAC,EAAE,CAAC,EAAE,KAAK,CAAC,CAAC;IACjE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,2CAA2C,EAAE,GAAG,EAAE;QACnD,MAAM,IAAI,GAAG,cAAc,CAAC,MAAM,EAAE,EAAE,UAAU,EAAE,IAAI,EAAE,CAAC,CAAC;QAC1D,MAAM,MAAM,GAAG,cAAc,CAAC,QAAQ,EAAE,EAAE,UAAU,EAAE,KAAK,EAAE,CAAC,CAAC;QAC/D,MAAM,CAAC,KAAK,CAAC,IAAI,YAAY,CAAC,IAAI,CAAC,CAAC,iBAAiB,CAAC,EAAE,CAAC,EAAE,IAAI,CAAC,CAAC;QACjE,MAAM,CAAC,KAAK,CAAC,IAAI,YAAY,CAAC,MAAM,CAAC,CAAC,iBAAiB,CAAC,EAAE,CAAC,EAAE,KAAK,CAAC,CAAC;IACtE,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"roles.d.ts","sourceRoot":"","sources":["../../src/agents/roles.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,MAAM,MAAM,SAAS,GAAG;IACtB,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,sBAAsB,EAAE,MAAM,CAAC;IAC/B,qDAAqD;IACrD,cAAc,CAAC,EAAE,MAAM,EAAE,CAAC;CAC3B,CAAC;AAgGF,uBAAuB;AACvB,wBAAgB,OAAO,CAAC,EAAE,EAAE,MAAM,GAAG,SAAS,GAAG,SAAS,CAEzD;AAED,+BAA+B;AAC/B,wBAAgB,SAAS,IAAI,SAAS,EAAE,CAEvC;AAED,mBAAmB;AACnB,wBAAgB,UAAU,IAAI,MAAM,EAAE,CAErC"}
1
+ {"version":3,"file":"roles.d.ts","sourceRoot":"","sources":["../../src/agents/roles.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,MAAM,MAAM,SAAS,GAAG;IACtB,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,sBAAsB,EAAE,MAAM,CAAC;IAC/B,qDAAqD;IACrD,cAAc,CAAC,EAAE,MAAM,EAAE,CAAC;CAC3B,CAAC;AA8GF,uBAAuB;AACvB,wBAAgB,OAAO,CAAC,EAAE,EAAE,MAAM,GAAG,SAAS,GAAG,SAAS,CAEzD;AAED,+BAA+B;AAC/B,wBAAgB,SAAS,IAAI,SAAS,EAAE,CAEvC;AAED,mBAAmB;AACnB,wBAAgB,UAAU,IAAI,MAAM,EAAE,CAErC"}
@@ -21,7 +21,7 @@ const roles = [
21
21
  - Verify that changes match the stated intent
22
22
 
23
23
  Be specific: cite file paths, line numbers, and code snippets. Prioritize issues by severity (critical > major > minor). Don't mention things that look fine — focus on problems.`,
24
- suggestedTools: ['FileRead', 'Glob', 'Grep', 'LS'],
24
+ suggestedTools: ['Read', 'Glob', 'Grep', 'LS'],
25
25
  },
26
26
  {
27
27
  id: 'test-writer',
@@ -36,7 +36,7 @@ Be specific: cite file paths, line numbers, and code snippets. Prioritize issues
36
36
  - Include both positive and negative test cases
37
37
 
38
38
  Read existing tests first to match the style, then write new tests.`,
39
- suggestedTools: ['FileRead', 'FileWrite', 'Glob', 'Grep', 'Bash'],
39
+ suggestedTools: ['Read', 'Write', 'Glob', 'Grep', 'Bash'],
40
40
  },
41
41
  {
42
42
  id: 'docs-writer',
@@ -51,7 +51,7 @@ Read existing tests first to match the style, then write new tests.`,
51
51
  - Keep documentation in sync with the actual code
52
52
 
53
53
  Write for the target audience (developers using this project). Be practical, not verbose.`,
54
- suggestedTools: ['FileRead', 'FileWrite', 'FileEdit', 'Glob', 'Grep'],
54
+ suggestedTools: ['Read', 'Write', 'Edit', 'Glob', 'Grep'],
55
55
  },
56
56
  {
57
57
  id: 'debugger',
@@ -66,7 +66,7 @@ Write for the target audience (developers using this project). Be practical, not
66
66
  - Propose a minimal fix that addresses the root cause, not the symptom
67
67
 
68
68
  Follow systematic debugging: read errors → reproduce → check changes → trace data → form hypothesis → test minimally.`,
69
- suggestedTools: ['FileRead', 'Glob', 'Grep', 'Bash', 'LS'],
69
+ suggestedTools: ['Read', 'Glob', 'Grep', 'Bash', 'LS'],
70
70
  },
71
71
  {
72
72
  id: 'refactorer',
@@ -81,7 +81,7 @@ Follow systematic debugging: read errors → reproduce → check changes → tra
81
81
  - Ensure all existing tests still pass after refactoring
82
82
 
83
83
  Do NOT add new features or change behavior. The refactored code must be functionally identical. Run tests after each change.`,
84
- suggestedTools: ['FileRead', 'FileWrite', 'FileEdit', 'Glob', 'Grep', 'Bash'],
84
+ suggestedTools: ['Read', 'Write', 'Edit', 'Glob', 'Grep', 'Bash'],
85
85
  },
86
86
  {
87
87
  id: 'security-auditor',
@@ -98,7 +98,21 @@ Do NOT add new features or change behavior. The refactored code must be function
98
98
  - Check dependency versions for known CVEs
99
99
 
100
100
  Report findings with severity (Critical/High/Medium/Low), affected file:line, and recommended fix.`,
101
- suggestedTools: ['FileRead', 'Glob', 'Grep', 'Bash'],
101
+ suggestedTools: ['Read', 'Glob', 'Grep', 'Bash'],
102
+ },
103
+ {
104
+ id: 'evaluator',
105
+ name: 'Evaluator',
106
+ description: 'Evaluates code quality, correctness, and test results (read-only)',
107
+ systemPromptSupplement: `You are an evaluator agent. Your job is to:
108
+ - Review code changes for correctness and quality
109
+ - Run existing tests and report results
110
+ - Check for regressions against the stated requirements
111
+ - Verify that changes match the stated intent
112
+ - Provide a pass/fail assessment with specific findings
113
+
114
+ You CANNOT modify files. Only read, search, and run test/lint commands to evaluate.`,
115
+ suggestedTools: ['Read', 'Glob', 'Grep', 'LS', 'Bash', 'Diagnostics'],
102
116
  },
103
117
  ];
104
118
  /** Get a role by ID */
@@ -1 +1 @@
1
- {"version":3,"file":"roles.js","sourceRoot":"","sources":["../../src/agents/roles.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAWH,MAAM,KAAK,GAAgB;IACzB;QACE,EAAE,EAAE,eAAe;QACnB,IAAI,EAAE,eAAe;QACrB,WAAW,EAAE,gEAAgE;QAC7E,sBAAsB,EAAE;;;;;;;kLAOsJ;QAC9K,cAAc,EAAE,CAAC,UAAU,EAAE,MAAM,EAAE,MAAM,EAAE,IAAI,CAAC;KACnD;IACD;QACE,EAAE,EAAE,aAAa;QACjB,IAAI,EAAE,aAAa;QACnB,WAAW,EAAE,2DAA2D;QACxE,sBAAsB,EAAE;;;;;;;;oEAQwC;QAChE,cAAc,EAAE,CAAC,UAAU,EAAE,WAAW,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC;KAClE;IACD;QACE,EAAE,EAAE,aAAa;QACjB,IAAI,EAAE,sBAAsB;QAC5B,WAAW,EAAE,gEAAgE;QAC7E,sBAAsB,EAAE;;;;;;;;0FAQ8D;QACtF,cAAc,EAAE,CAAC,UAAU,EAAE,WAAW,EAAE,UAAU,EAAE,MAAM,EAAE,MAAM,CAAC;KACtE;IACD;QACE,EAAE,EAAE,UAAU;QACd,IAAI,EAAE,UAAU;QAChB,WAAW,EAAE,uEAAuE;QACpF,sBAAsB,EAAE;;;;;;;;sHAQ0F;QAClH,cAAc,EAAE,CAAC,UAAU,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,IAAI,CAAC;KAC3D;IACD;QACE,EAAE,EAAE,YAAY;QAChB,IAAI,EAAE,YAAY;QAClB,WAAW,EAAE,4DAA4D;QACzE,sBAAsB,EAAE;;;;;;;;6HAQiG;QACzH,cAAc,EAAE,CAAC,UAAU,EAAE,WAAW,EAAE,UAAU,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC;KAC9E;IACD;QACE,EAAE,EAAE,kBAAkB;QACtB,IAAI,EAAE,kBAAkB;QACxB,WAAW,EAAE,gEAAgE;QAC7E,sBAAsB,EAAE;;;;;;;;;;mGAUuE;QAC/F,cAAc,EAAE,CAAC,UAAU,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC;KACrD;CACF,CAAC;AAEF,uBAAuB;AACvB,MAAM,UAAU,OAAO,CAAC,EAAU;IAChC,OAAO,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC;AACtC,CAAC;AAED,+BAA+B;AAC/B,MAAM,UAAU,SAAS;IACvB,OAAO,CAAC,GAAG,KAAK,CAAC,CAAC;AACpB,CAAC;AAED,mBAAmB;AACnB,MAAM,UAAU,UAAU;IACxB,OAAO,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;AAC9B,CAAC"}
1
+ {"version":3,"file":"roles.js","sourceRoot":"","sources":["../../src/agents/roles.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAWH,MAAM,KAAK,GAAgB;IACzB;QACE,EAAE,EAAE,eAAe;QACnB,IAAI,EAAE,eAAe;QACrB,WAAW,EAAE,gEAAgE;QAC7E,sBAAsB,EAAE;;;;;;;kLAOsJ;QAC9K,cAAc,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,IAAI,CAAC;KAC/C;IACD;QACE,EAAE,EAAE,aAAa;QACjB,IAAI,EAAE,aAAa;QACnB,WAAW,EAAE,2DAA2D;QACxE,sBAAsB,EAAE;;;;;;;;oEAQwC;QAChE,cAAc,EAAE,CAAC,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC;KAC1D;IACD;QACE,EAAE,EAAE,aAAa;QACjB,IAAI,EAAE,sBAAsB;QAC5B,WAAW,EAAE,gEAAgE;QAC7E,sBAAsB,EAAE;;;;;;;;0FAQ8D;QACtF,cAAc,EAAE,CAAC,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC;KAC1D;IACD;QACE,EAAE,EAAE,UAAU;QACd,IAAI,EAAE,UAAU;QAChB,WAAW,EAAE,uEAAuE;QACpF,sBAAsB,EAAE;;;;;;;;sHAQ0F;QAClH,cAAc,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,IAAI,CAAC;KACvD;IACD;QACE,EAAE,EAAE,YAAY;QAChB,IAAI,EAAE,YAAY;QAClB,WAAW,EAAE,4DAA4D;QACzE,sBAAsB,EAAE;;;;;;;;6HAQiG;QACzH,cAAc,EAAE,CAAC,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC;KAClE;IACD;QACE,EAAE,EAAE,kBAAkB;QACtB,IAAI,EAAE,kBAAkB;QACxB,WAAW,EAAE,gEAAgE;QAC7E,sBAAsB,EAAE;;;;;;;;;;mGAUuE;QAC/F,cAAc,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC;KACjD;IACD;QACE,EAAE,EAAE,WAAW;QACf,IAAI,EAAE,WAAW;QACjB,WAAW,EAAE,mEAAmE;QAChF,sBAAsB,EAAE;;;;;;;oFAOwD;QAChF,cAAc,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,aAAa,CAAC;KACtE;CACF,CAAC;AAEF,uBAAuB;AACvB,MAAM,UAAU,OAAO,CAAC,EAAU;IAChC,OAAO,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC;AACtC,CAAC;AAED,+BAA+B;AAC/B,MAAM,UAAU,SAAS;IACvB,OAAO,CAAC,GAAG,KAAK,CAAC,CAAC;AACpB,CAAC;AAED,mBAAmB;AACnB,MAAM,UAAU,UAAU;IACxB,OAAO,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;AAC9B,CAAC"}
@@ -4,18 +4,19 @@ import { getRole, listRoles, getRoleIds } from './roles.js';
4
4
  describe('agent roles', () => {
5
5
  it('lists all roles', () => {
6
6
  const roles = listRoles();
7
- assert.ok(roles.length >= 6);
7
+ assert.ok(roles.length >= 7);
8
8
  assert.ok(roles.find(r => r.id === 'code-reviewer'));
9
9
  assert.ok(roles.find(r => r.id === 'test-writer'));
10
10
  assert.ok(roles.find(r => r.id === 'debugger'));
11
11
  assert.ok(roles.find(r => r.id === 'security-auditor'));
12
+ assert.ok(roles.find(r => r.id === 'evaluator'));
12
13
  });
13
14
  it('gets role by ID', () => {
14
15
  const role = getRole('code-reviewer');
15
16
  assert.ok(role);
16
17
  assert.strictEqual(role.name, 'Code Reviewer');
17
18
  assert.ok(role.systemPromptSupplement.length > 50);
18
- assert.ok(role.suggestedTools.includes('FileRead'));
19
+ assert.ok(role.suggestedTools.includes('Read'));
19
20
  });
20
21
  it('returns undefined for unknown role', () => {
21
22
  assert.strictEqual(getRole('nonexistent'), undefined);
@@ -34,5 +35,28 @@ describe('agent roles', () => {
34
35
  assert.ok(role.systemPromptSupplement.length > 20, `role ${role.id} has short prompt`);
35
36
  }
36
37
  });
38
+ it('evaluator role has read-only suggested tools', () => {
39
+ const role = getRole('evaluator');
40
+ assert.ok(role);
41
+ assert.strictEqual(role.name, 'Evaluator');
42
+ assert.ok(role.suggestedTools.includes('Read'));
43
+ assert.ok(role.suggestedTools.includes('Glob'));
44
+ assert.ok(role.suggestedTools.includes('Grep'));
45
+ assert.ok(role.suggestedTools.includes('Bash'));
46
+ assert.ok(role.suggestedTools.includes('Diagnostics'));
47
+ // Evaluator should NOT have write tools
48
+ assert.ok(!role.suggestedTools.includes('Write'));
49
+ assert.ok(!role.suggestedTools.includes('Edit'));
50
+ });
51
+ it('all roles use actual tool names (not FileRead/FileWrite/FileEdit)', () => {
52
+ const invalidNames = ['FileRead', 'FileWrite', 'FileEdit'];
53
+ for (const role of listRoles()) {
54
+ if (!role.suggestedTools)
55
+ continue;
56
+ for (const toolName of role.suggestedTools) {
57
+ assert.ok(!invalidNames.includes(toolName), `role ${role.id} uses invalid tool name '${toolName}' — should be 'Read', 'Write', or 'Edit'`);
58
+ }
59
+ }
60
+ });
37
61
  });
38
62
  //# sourceMappingURL=roles.test.js.map