@zhijiewang/openharness 0.11.1 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +199 -23
- package/dist/DeferredTool.d.ts +34 -0
- package/dist/DeferredTool.d.ts.map +1 -0
- package/dist/DeferredTool.js +62 -0
- package/dist/DeferredTool.js.map +1 -0
- package/dist/DeferredTool.test.d.ts +2 -0
- package/dist/DeferredTool.test.d.ts.map +1 -0
- package/dist/DeferredTool.test.js +79 -0
- package/dist/DeferredTool.test.js.map +1 -0
- package/dist/agents/roles.d.ts.map +1 -1
- package/dist/agents/roles.js +20 -6
- package/dist/agents/roles.js.map +1 -1
- package/dist/agents/roles.test.js +26 -2
- package/dist/agents/roles.test.js.map +1 -1
- package/dist/harness/config.d.ts +15 -0
- package/dist/harness/config.d.ts.map +1 -1
- package/dist/harness/config.js +45 -23
- package/dist/harness/config.js.map +1 -1
- package/dist/harness/memory-consolidation.test.d.ts +2 -0
- package/dist/harness/memory-consolidation.test.d.ts.map +1 -0
- package/dist/harness/memory-consolidation.test.js +152 -0
- package/dist/harness/memory-consolidation.test.js.map +1 -0
- package/dist/harness/memory.d.ts +12 -0
- package/dist/harness/memory.d.ts.map +1 -1
- package/dist/harness/memory.js +53 -2
- package/dist/harness/memory.js.map +1 -1
- package/dist/harness/memory.test.js +43 -1
- package/dist/harness/memory.test.js.map +1 -1
- package/dist/harness/session.test.js +36 -0
- package/dist/harness/session.test.js.map +1 -1
- package/dist/harness/verification.d.ts +41 -0
- package/dist/harness/verification.d.ts.map +1 -0
- package/dist/harness/verification.js +197 -0
- package/dist/harness/verification.js.map +1 -0
- package/dist/harness/verification.test.d.ts +2 -0
- package/dist/harness/verification.test.d.ts.map +1 -0
- package/dist/harness/verification.test.js +181 -0
- package/dist/harness/verification.test.js.map +1 -0
- package/dist/query/index.d.ts.map +1 -1
- package/dist/query/index.js +9 -1
- package/dist/query/index.js.map +1 -1
- package/dist/query/tools.d.ts.map +1 -1
- package/dist/query/tools.js +30 -4
- package/dist/query/tools.js.map +1 -1
- package/dist/repl.d.ts.map +1 -1
- package/dist/repl.js +21 -0
- package/dist/repl.js.map +1 -1
- package/dist/services/AgentDispatcher.d.ts +1 -0
- package/dist/services/AgentDispatcher.d.ts.map +1 -1
- package/dist/services/AgentDispatcher.js +10 -1
- package/dist/services/AgentDispatcher.js.map +1 -1
- package/dist/services/CronExecutor.d.ts +38 -0
- package/dist/services/CronExecutor.d.ts.map +1 -0
- package/dist/services/CronExecutor.js +130 -0
- package/dist/services/CronExecutor.js.map +1 -0
- package/dist/services/CronExecutor.test.d.ts +2 -0
- package/dist/services/CronExecutor.test.d.ts.map +1 -0
- package/dist/services/CronExecutor.test.js +128 -0
- package/dist/services/CronExecutor.test.js.map +1 -0
- package/dist/services/StreamingToolExecutor.d.ts.map +1 -1
- package/dist/services/StreamingToolExecutor.js +28 -0
- package/dist/services/StreamingToolExecutor.js.map +1 -1
- package/dist/services/cron.d.ts +4 -0
- package/dist/services/cron.d.ts.map +1 -1
- package/dist/services/cron.js +27 -0
- package/dist/services/cron.js.map +1 -1
- package/dist/tools/AgentTool/index.d.ts +3 -0
- package/dist/tools/AgentTool/index.d.ts.map +1 -1
- package/dist/tools/AgentTool/index.js +17 -4
- package/dist/tools/AgentTool/index.js.map +1 -1
- package/dist/tools/MemoryTool/index.d.ts +2 -2
- package/dist/tools/ToolSearchTool/index.d.ts.map +1 -1
- package/dist/tools/ToolSearchTool/index.js +7 -0
- package/dist/tools/ToolSearchTool/index.js.map +1 -1
- package/dist/tools.d.ts +8 -0
- package/dist/tools.d.ts.map +1 -1
- package/dist/tools.js +40 -41
- package/dist/tools.js.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -17,13 +17,34 @@
|
|
|
17
17
|
|
|
18
18
|
AI coding agent in your terminal. Works with any LLM -- free local models or cloud APIs.
|
|
19
19
|
|
|
20
|
-
|
|
20
|
+
<p align="center">
|
|
21
|
+
<img src="assets/openharness_v0.11.1_4.gif" alt="OpenHarness demo" width="800" />
|
|
22
|
+
</p>
|
|
21
23
|
|
|
22
|
-
|
|
24
|
+
[](https://www.npmjs.com/package/@zhijiewang/openharness) [](https://www.npmjs.com/package/@zhijiewang/openharness) [](LICENSE)     [](https://github.com/zhijiewong/openharness) [](https://github.com/zhijiewong/openharness/issues) [](https://github.com/zhijiewong/openharness/pulls)
|
|
23
25
|
|
|
24
|
-
|
|
26
|
+
---
|
|
25
27
|
|
|
26
|
-
|
|
28
|
+
## Table of Contents
|
|
29
|
+
|
|
30
|
+
- [Quick Start](#quick-start)
|
|
31
|
+
- [Why OpenHarness?](#why-openharness)
|
|
32
|
+
- [Terminal UI](#terminal-ui)
|
|
33
|
+
- [Tools (35)](#tools-35)
|
|
34
|
+
- [Slash Commands (33)](#slash-commands-33)
|
|
35
|
+
- [Permission Modes](#permission-modes)
|
|
36
|
+
- [Hooks](#hooks)
|
|
37
|
+
- [Checkpoints & Rewind](#checkpoints--rewind)
|
|
38
|
+
- [Agent Roles](#agent-roles)
|
|
39
|
+
- [Headless Mode & CI/CD](#headless-mode)
|
|
40
|
+
- [Cybergotchi](#cybergotchi)
|
|
41
|
+
- [MCP Servers](#mcp-servers)
|
|
42
|
+
- [Providers](#providers)
|
|
43
|
+
- [FAQ](#faq)
|
|
44
|
+
- [Install](#install)
|
|
45
|
+
- [Development](#development)
|
|
46
|
+
- [Contributing](#contributing)
|
|
47
|
+
- [Community](#community)
|
|
27
48
|
|
|
28
49
|
---
|
|
29
50
|
|
|
@@ -42,7 +63,17 @@ oh # auto-detect local model
|
|
|
42
63
|
oh --model ollama/qwen2.5:7b # specific model
|
|
43
64
|
oh --model gpt-4o # cloud model (needs OPENAI_API_KEY)
|
|
44
65
|
oh --trust # auto-approve all tool calls
|
|
45
|
-
oh
|
|
66
|
+
oh --auto # auto-approve, block dangerous bash
|
|
67
|
+
oh -p "fix the tests" --trust # headless mode (single prompt, exit)
|
|
68
|
+
oh run "review code" --json # CI/CD with JSON output
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
**In-session commands:**
|
|
72
|
+
```
|
|
73
|
+
/rewind # undo last AI file change (checkpoint restore)
|
|
74
|
+
/roles # list agent specializations
|
|
75
|
+
/vim # toggle vim mode
|
|
76
|
+
Ctrl+O # flush transcript to scrollback for review
|
|
46
77
|
```
|
|
47
78
|
|
|
48
79
|
## Why OpenHarness?
|
|
@@ -53,7 +84,7 @@ Most AI coding agents are locked to one provider or cost $20+/month. OpenHarness
|
|
|
53
84
|
|---|---|---|---|---|
|
|
54
85
|
| Any LLM | Yes (Ollama, OpenAI, Anthropic, OpenRouter, any OpenAI-compatible) | Anthropic only | Yes | Yes |
|
|
55
86
|
| Free local models | Ollama native | No | Yes | Yes |
|
|
56
|
-
| Tools |
|
|
87
|
+
| Tools | 35 with permission gates | 43+ | File-focused | 20+ |
|
|
57
88
|
| Permission modes | 7 (ask, trust, deny, acceptEdits, plan, auto, bypass) | 7 | Basic | Basic |
|
|
58
89
|
| Git integration | Auto-commit + /undo + /rewind checkpoints | Yes | Deep git | Basic |
|
|
59
90
|
| Slash commands | 30+ built-in | 80+ | Some | Some |
|
|
@@ -127,33 +158,59 @@ statusLineFormat: '{model} │ {tokens} │ {cost} │ {ctx}'
|
|
|
127
158
|
|
|
128
159
|
Available variables: `{model}`, `{tokens}` (input↑ output↓), `{cost}` ($X.XXXX), `{ctx}` (context usage bar). Empty sections are automatically collapsed.
|
|
129
160
|
|
|
130
|
-
## Tools (
|
|
161
|
+
## Tools (35)
|
|
131
162
|
|
|
132
163
|
| Tool | Risk | Description |
|
|
133
164
|
|------|------|-------------|
|
|
134
|
-
|
|
|
135
|
-
|
|
|
165
|
+
| **Core** | | |
|
|
166
|
+
| Bash | high | Execute shell commands with live streaming output (AST safety analysis) |
|
|
167
|
+
| Read | low | Read files with line ranges, PDF support |
|
|
136
168
|
| ImageRead | low | Read images/PDFs for multimodal analysis |
|
|
137
169
|
| Write | medium | Create or overwrite files |
|
|
138
170
|
| Edit | medium | Search-and-replace edits |
|
|
171
|
+
| MultiEdit | medium | Atomic multi-file edits (all succeed or none) |
|
|
139
172
|
| Glob | low | Find files by pattern |
|
|
140
|
-
| Grep | low | Regex content search |
|
|
173
|
+
| Grep | low | Regex content search with context lines |
|
|
141
174
|
| LS | low | List directory contents with sizes |
|
|
175
|
+
| **Web** | | |
|
|
142
176
|
| WebFetch | medium | Fetch URL content (SSRF-protected) |
|
|
143
177
|
| WebSearch | medium | Search the web |
|
|
178
|
+
| RemoteTrigger | high | HTTP requests to webhooks/APIs |
|
|
179
|
+
| **Tasks** | | |
|
|
144
180
|
| TaskCreate | low | Create structured tasks |
|
|
145
181
|
| TaskUpdate | low | Update task status |
|
|
146
182
|
| TaskList | low | List all tasks |
|
|
183
|
+
| TaskGet | low | Get task details |
|
|
184
|
+
| TaskStop | low | Stop a running task |
|
|
185
|
+
| TaskOutput | low | Get task output |
|
|
186
|
+
| **Agents** | | |
|
|
187
|
+
| Agent | medium | Spawn a sub-agent (with role specialization) |
|
|
188
|
+
| ParallelAgent | medium | Dispatch multiple agents with DAG dependencies |
|
|
189
|
+
| SendMessage | low | Agent-to-agent peer messaging |
|
|
147
190
|
| AskUser | low | Ask user a question with options |
|
|
148
|
-
|
|
|
149
|
-
|
|
|
191
|
+
| **Scheduling** | | |
|
|
192
|
+
| CronCreate | medium | Schedule recurring tasks |
|
|
193
|
+
| CronDelete | medium | Remove scheduled tasks |
|
|
194
|
+
| CronList | low | List all scheduled tasks |
|
|
195
|
+
| **Planning** | | |
|
|
150
196
|
| EnterPlanMode | low | Enter structured planning mode |
|
|
151
197
|
| ExitPlanMode | low | Exit planning mode |
|
|
198
|
+
| **Code Intelligence** | | |
|
|
199
|
+
| Diagnostics | low | LSP-based code diagnostics |
|
|
152
200
|
| NotebookEdit | medium | Edit Jupyter notebooks |
|
|
201
|
+
| **Memory & Discovery** | | |
|
|
202
|
+
| Memory | low | Save/list/search persistent memories |
|
|
203
|
+
| Skill | low | Invoke a skill from .oh/skills/ |
|
|
204
|
+
| ToolSearch | low | Find tools by description |
|
|
205
|
+
| **Git Worktrees** | | |
|
|
206
|
+
| EnterWorktree | medium | Create isolated git worktree |
|
|
207
|
+
| ExitWorktree | medium | Remove a git worktree |
|
|
208
|
+
| **Process** | | |
|
|
209
|
+
| KillProcess | high | Stop processes by PID or name |
|
|
153
210
|
|
|
154
|
-
Low-risk read-only tools auto-approve. Medium and high risk tools require confirmation in `ask` mode. Use `--trust` to skip
|
|
211
|
+
Low-risk read-only tools auto-approve. Medium and high risk tools require confirmation in `ask` mode. Use `--trust` or `--auto` to skip prompts.
|
|
155
212
|
|
|
156
|
-
## Slash Commands (
|
|
213
|
+
## Slash Commands (33)
|
|
157
214
|
|
|
158
215
|
Type these during a chat session. Aliases: `/q` exit, `/h` help, `/c` commit, `/m` model, `/s` status.
|
|
159
216
|
|
|
@@ -334,6 +391,58 @@ Every file modification is automatically checkpointed before execution. If somet
|
|
|
334
391
|
|
|
335
392
|
Checkpoints are stored in `.oh/checkpoints/` and cover FileWrite, FileEdit, and Bash commands that modify files.
|
|
336
393
|
|
|
394
|
+
## Verification Loops
|
|
395
|
+
|
|
396
|
+
After every file edit (Edit, Write, MultiEdit), openHarness automatically runs language-appropriate lint/typecheck commands and feeds the results back into the agent context. This is the single highest-impact harness engineering pattern — research shows 2-3x quality improvement from automated feedback.
|
|
397
|
+
|
|
398
|
+
**Auto-detection** — if your project has `tsconfig.json`, `.eslintrc*`, `pyproject.toml`, `go.mod`, or `Cargo.toml`, verification rules are detected automatically. No configuration needed.
|
|
399
|
+
|
|
400
|
+
**Custom rules** via `.oh/config.yaml`:
|
|
401
|
+
|
|
402
|
+
```yaml
|
|
403
|
+
verification:
|
|
404
|
+
enabled: true # default: true (auto-detect)
|
|
405
|
+
mode: warn # 'warn' appends to output, 'block' marks as error
|
|
406
|
+
rules:
|
|
407
|
+
- extensions: [".ts", ".tsx"]
|
|
408
|
+
lint: "npx tsc --noEmit 2>&1 | head -20"
|
|
409
|
+
timeout: 15000
|
|
410
|
+
- extensions: [".py"]
|
|
411
|
+
lint: "ruff check {file} 2>&1 | head -10"
|
|
412
|
+
```
|
|
413
|
+
|
|
414
|
+
The agent sees `[Verification passed]` or `[Verification FAILED]` with the linter output after each edit, enabling self-correction.
|
|
415
|
+
|
|
416
|
+
## Memory Consolidation
|
|
417
|
+
|
|
418
|
+
On session exit, openHarness automatically prunes stale memories using temporal decay:
|
|
419
|
+
|
|
420
|
+
- Memories not accessed in 30+ days lose 0.1 relevance per 30-day period
|
|
421
|
+
- Memories below 0.1 relevance are permanently deleted
|
|
422
|
+
- Updated relevance scores are persisted to memory files
|
|
423
|
+
|
|
424
|
+
This keeps the memory system lean and relevant. Configure in `.oh/config.yaml`:
|
|
425
|
+
|
|
426
|
+
```yaml
|
|
427
|
+
memory:
|
|
428
|
+
consolidateOnExit: true # default: true
|
|
429
|
+
```
|
|
430
|
+
|
|
431
|
+
## Scheduled Tasks (Cron)
|
|
432
|
+
|
|
433
|
+
Create recurring tasks that run automatically in the background:
|
|
434
|
+
|
|
435
|
+
```
|
|
436
|
+
# Via slash commands
|
|
437
|
+
/cron list # show all scheduled tasks
|
|
438
|
+
/cron create "check-tests" # create a new task (interactive)
|
|
439
|
+
/cron delete <id> # remove a task
|
|
440
|
+
```
|
|
441
|
+
|
|
442
|
+
**Schedule syntax:** `every 5m`, `every 2h`, `every 1d`
|
|
443
|
+
|
|
444
|
+
The cron executor checks every 60 seconds for due tasks and runs them via sub-queries. Results are stored in `~/.oh/crons/history/`.
|
|
445
|
+
|
|
337
446
|
## Agent Roles
|
|
338
447
|
|
|
339
448
|
Dispatch specialized sub-agents for focused tasks:
|
|
@@ -342,16 +451,22 @@ Dispatch specialized sub-agents for focused tasks:
|
|
|
342
451
|
/roles # list all available roles
|
|
343
452
|
```
|
|
344
453
|
|
|
345
|
-
| Role | Description |
|
|
346
|
-
|
|
347
|
-
| `code-reviewer` | Find bugs, security issues, style problems |
|
|
348
|
-
| `test-writer` | Generate unit and integration tests |
|
|
349
|
-
| `docs-writer` | Write documentation and comments |
|
|
350
|
-
| `debugger` | Systematic bug investigation |
|
|
351
|
-
| `refactorer` | Simplify code without changing behavior |
|
|
352
|
-
| `security-auditor` | OWASP, injection, secrets, CVE scanning |
|
|
454
|
+
| Role | Description | Tools |
|
|
455
|
+
|------|-------------|-------|
|
|
456
|
+
| `code-reviewer` | Find bugs, security issues, style problems | Read-only |
|
|
457
|
+
| `test-writer` | Generate unit and integration tests | Read + Write |
|
|
458
|
+
| `docs-writer` | Write documentation and comments | Read + Write + Edit |
|
|
459
|
+
| `debugger` | Systematic bug investigation | Read-only + Bash |
|
|
460
|
+
| `refactorer` | Simplify code without changing behavior | All file tools + Bash |
|
|
461
|
+
| `security-auditor` | OWASP, injection, secrets, CVE scanning | Read-only + Bash |
|
|
462
|
+
| `evaluator` | Evaluate code quality and run tests (read-only) | Read-only + Bash + Diagnostics |
|
|
353
463
|
|
|
354
|
-
|
|
464
|
+
Each role restricts the sub-agent to only its suggested tools. You can also pass `allowed_tools` explicitly:
|
|
465
|
+
|
|
466
|
+
```
|
|
467
|
+
Agent({ subagent_type: 'evaluator', prompt: 'Run all tests and report results' })
|
|
468
|
+
Agent({ allowed_tools: ['Read', 'Grep'], prompt: 'Search for all TODO comments' })
|
|
469
|
+
```
|
|
355
470
|
|
|
356
471
|
## Headless Mode
|
|
357
472
|
|
|
@@ -446,6 +561,31 @@ oh --model llamacpp/my-model
|
|
|
446
561
|
oh models # list available models
|
|
447
562
|
```
|
|
448
563
|
|
|
564
|
+
## Configuration Hierarchy
|
|
565
|
+
|
|
566
|
+
Config is loaded in layers (later overrides earlier):
|
|
567
|
+
|
|
568
|
+
1. **Global** `~/.oh/config.yaml` — default provider, model, theme for all projects
|
|
569
|
+
2. **Project** `.oh/config.yaml` — project-specific settings
|
|
570
|
+
3. **Local** `.oh/config.local.yaml` — personal overrides (gitignored)
|
|
571
|
+
|
|
572
|
+
Set your default provider once globally:
|
|
573
|
+
|
|
574
|
+
```yaml
|
|
575
|
+
# ~/.oh/config.yaml
|
|
576
|
+
provider: ollama
|
|
577
|
+
model: llama3
|
|
578
|
+
permissionMode: ask
|
|
579
|
+
theme: dark
|
|
580
|
+
```
|
|
581
|
+
|
|
582
|
+
Then per-project configs only need what's different:
|
|
583
|
+
|
|
584
|
+
```yaml
|
|
585
|
+
# .oh/config.yaml
|
|
586
|
+
model: codellama # override just the model
|
|
587
|
+
```
|
|
588
|
+
|
|
449
589
|
## Project Rules
|
|
450
590
|
|
|
451
591
|
Create `.oh/RULES.md` in any repo (or run `oh init`):
|
|
@@ -458,6 +598,42 @@ Create `.oh/RULES.md` in any repo (or run `oh init`):
|
|
|
458
598
|
|
|
459
599
|
Rules load automatically into every session.
|
|
460
600
|
|
|
601
|
+
## How It Works
|
|
602
|
+
|
|
603
|
+
```mermaid
|
|
604
|
+
graph LR
|
|
605
|
+
User[User Input] --> REPL[REPL Loop]
|
|
606
|
+
REPL --> Query[Query Engine]
|
|
607
|
+
Query --> Provider[LLM Provider]
|
|
608
|
+
Provider --> LLM[Ollama / OpenAI / Anthropic]
|
|
609
|
+
LLM --> Tools[Tool Execution]
|
|
610
|
+
Tools --> Permissions{Permission Check}
|
|
611
|
+
Permissions -->|Approved| Execute[Run Tool]
|
|
612
|
+
Permissions -->|Blocked| Deny[Deny & Report]
|
|
613
|
+
Execute --> Response[Stream Response]
|
|
614
|
+
Response --> REPL
|
|
615
|
+
```
|
|
616
|
+
|
|
617
|
+
## FAQ
|
|
618
|
+
|
|
619
|
+
**Does it work offline?**
|
|
620
|
+
Yes. Use Ollama with a local model — no internet or API key needed.
|
|
621
|
+
|
|
622
|
+
**How much does it cost?**
|
|
623
|
+
Free. OpenHarness is MIT licensed. You bring your own API key (BYOK) for cloud models, or use Ollama for free.
|
|
624
|
+
|
|
625
|
+
**Is it safe?**
|
|
626
|
+
Yes. 7 permission modes control what tools can do. Bash commands are analyzed by an AST parser that blocks destructive patterns (`rm -rf`, `curl | bash`, etc.). Every file change is checkpointed and reversible with `/rewind`.
|
|
627
|
+
|
|
628
|
+
**Can I use it in CI/CD?**
|
|
629
|
+
Yes. Use `oh -p "prompt" --auto` for headless execution, or the built-in GitHub Action for PR reviews.
|
|
630
|
+
|
|
631
|
+
**Does it support my language/framework?**
|
|
632
|
+
Yes. OpenHarness is language-agnostic — it reads, writes, and executes code in any language. Syntax highlighting covers 20+ languages.
|
|
633
|
+
|
|
634
|
+
**How does it compare to Claude Code?**
|
|
635
|
+
~90% feature parity for CLI use cases. Main advantage: works with ANY LLM (not just Anthropic). See the [comparison table](#why-openharness) above.
|
|
636
|
+
|
|
461
637
|
## Install
|
|
462
638
|
|
|
463
639
|
Requires **Node.js 18+**.
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DeferredTool — lazy-loads tool schemas to reduce system prompt size.
|
|
3
|
+
*
|
|
4
|
+
* Wraps a built-in tool with a minimal prompt (name + description only).
|
|
5
|
+
* Full schema and prompt are loaded on first invocation or when resolved
|
|
6
|
+
* via ToolSearch. This mirrors the DeferredMcpTool pattern.
|
|
7
|
+
*
|
|
8
|
+
* Token savings: deferred tools contribute ~15 tokens to the system prompt
|
|
9
|
+
* instead of ~150, reducing context pressure by ~90% per deferred tool.
|
|
10
|
+
*/
|
|
11
|
+
import { z } from "zod";
|
|
12
|
+
import type { Tool, ToolContext, ToolResult } from "./Tool.js";
|
|
13
|
+
import type { RiskLevel } from "./types/permissions.js";
|
|
14
|
+
export declare class DeferredTool implements Tool<z.ZodType> {
|
|
15
|
+
readonly name: string;
|
|
16
|
+
readonly description: string;
|
|
17
|
+
readonly inputSchema: z.ZodType;
|
|
18
|
+
readonly riskLevel: RiskLevel;
|
|
19
|
+
private inner;
|
|
20
|
+
private _activated;
|
|
21
|
+
constructor(tool: Tool);
|
|
22
|
+
/** Whether this tool has been activated (called or resolved) */
|
|
23
|
+
get activated(): boolean;
|
|
24
|
+
isReadOnly(input: unknown): boolean;
|
|
25
|
+
isConcurrencySafe(input: unknown): boolean;
|
|
26
|
+
call(input: any, context: ToolContext): Promise<ToolResult>;
|
|
27
|
+
/** Minimal prompt when deferred, full prompt when activated */
|
|
28
|
+
prompt(): string;
|
|
29
|
+
/** Get the full inner tool (for ToolSearch resolution) */
|
|
30
|
+
getInner(): Tool;
|
|
31
|
+
/** Activate this tool so it returns full prompt on next call to prompt() */
|
|
32
|
+
activate(): void;
|
|
33
|
+
}
|
|
34
|
+
//# sourceMappingURL=DeferredTool.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"DeferredTool.d.ts","sourceRoot":"","sources":["../src/DeferredTool.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,KAAK,EAAE,IAAI,EAAE,WAAW,EAAE,UAAU,EAAE,MAAM,WAAW,CAAC;AAC/D,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,wBAAwB,CAAC;AAExD,qBAAa,YAAa,YAAW,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC;IAClD,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;IAC7B,QAAQ,CAAC,WAAW,EAAE,CAAC,CAAC,OAAO,CAAC;IAChC,QAAQ,CAAC,SAAS,EAAE,SAAS,CAAC;IAE9B,OAAO,CAAC,KAAK,CAAO;IACpB,OAAO,CAAC,UAAU,CAAS;gBAEf,IAAI,EAAE,IAAI;IAStB,gEAAgE;IAChE,IAAI,SAAS,IAAI,OAAO,CAA4B;IAEpD,UAAU,CAAC,KAAK,EAAE,OAAO,GAAG,OAAO;IAInC,iBAAiB,CAAC,KAAK,EAAE,OAAO,GAAG,OAAO;IAIpC,IAAI,CAAC,KAAK,EAAE,GAAG,EAAE,OAAO,EAAE,WAAW,GAAG,OAAO,CAAC,UAAU,CAAC;IAajE,+DAA+D;IAC/D,MAAM,IAAI,MAAM;IAKhB,0DAA0D;IAC1D,QAAQ,IAAI,IAAI;IAIhB,4EAA4E;IAC5E,QAAQ,IAAI,IAAI;CAGjB"}
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DeferredTool — lazy-loads tool schemas to reduce system prompt size.
|
|
3
|
+
*
|
|
4
|
+
* Wraps a built-in tool with a minimal prompt (name + description only).
|
|
5
|
+
* Full schema and prompt are loaded on first invocation or when resolved
|
|
6
|
+
* via ToolSearch. This mirrors the DeferredMcpTool pattern.
|
|
7
|
+
*
|
|
8
|
+
* Token savings: deferred tools contribute ~15 tokens to the system prompt
|
|
9
|
+
* instead of ~150, reducing context pressure by ~90% per deferred tool.
|
|
10
|
+
*/
|
|
11
|
+
import { z } from "zod";
|
|
12
|
+
export class DeferredTool {
|
|
13
|
+
name;
|
|
14
|
+
description;
|
|
15
|
+
inputSchema;
|
|
16
|
+
riskLevel;
|
|
17
|
+
inner;
|
|
18
|
+
_activated = false;
|
|
19
|
+
constructor(tool) {
|
|
20
|
+
this.inner = tool;
|
|
21
|
+
this.name = tool.name;
|
|
22
|
+
this.description = tool.description;
|
|
23
|
+
this.riskLevel = tool.riskLevel;
|
|
24
|
+
// Permissive schema until activated — accepts any object
|
|
25
|
+
this.inputSchema = z.record(z.unknown());
|
|
26
|
+
}
|
|
27
|
+
/** Whether this tool has been activated (called or resolved) */
|
|
28
|
+
get activated() { return this._activated; }
|
|
29
|
+
isReadOnly(input) {
|
|
30
|
+
return this.inner.isReadOnly(input);
|
|
31
|
+
}
|
|
32
|
+
isConcurrencySafe(input) {
|
|
33
|
+
return this.inner.isConcurrencySafe(input);
|
|
34
|
+
}
|
|
35
|
+
async call(input, context) {
|
|
36
|
+
this._activated = true;
|
|
37
|
+
// Validate with the real schema
|
|
38
|
+
const parsed = this.inner.inputSchema.safeParse(input);
|
|
39
|
+
if (!parsed.success) {
|
|
40
|
+
return {
|
|
41
|
+
output: `Schema validation error: ${parsed.error.message}`,
|
|
42
|
+
isError: true,
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
return this.inner.call(parsed.data, context);
|
|
46
|
+
}
|
|
47
|
+
/** Minimal prompt when deferred, full prompt when activated */
|
|
48
|
+
prompt() {
|
|
49
|
+
if (this._activated)
|
|
50
|
+
return this.inner.prompt();
|
|
51
|
+
return `[deferred] ${this.name}: ${this.description}`;
|
|
52
|
+
}
|
|
53
|
+
/** Get the full inner tool (for ToolSearch resolution) */
|
|
54
|
+
getInner() {
|
|
55
|
+
return this.inner;
|
|
56
|
+
}
|
|
57
|
+
/** Activate this tool so it returns full prompt on next call to prompt() */
|
|
58
|
+
activate() {
|
|
59
|
+
this._activated = true;
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
//# sourceMappingURL=DeferredTool.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"DeferredTool.js","sourceRoot":"","sources":["../src/DeferredTool.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAIxB,MAAM,OAAO,YAAY;IACd,IAAI,CAAS;IACb,WAAW,CAAS;IACpB,WAAW,CAAY;IACvB,SAAS,CAAY;IAEtB,KAAK,CAAO;IACZ,UAAU,GAAG,KAAK,CAAC;IAE3B,YAAY,IAAU;QACpB,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC;QAClB,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC;QACtB,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC,WAAW,CAAC;QACpC,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,SAAS,CAAC;QAChC,yDAAyD;QACzD,IAAI,CAAC,WAAW,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC;IAC3C,CAAC;IAED,gEAAgE;IAChE,IAAI,SAAS,KAAc,OAAO,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC;IAEpD,UAAU,CAAC,KAAc;QACvB,OAAO,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC;IACtC,CAAC;IAED,iBAAiB,CAAC,KAAc;QAC9B,OAAO,IAAI,CAAC,KAAK,CAAC,iBAAiB,CAAC,KAAK,CAAC,CAAC;IAC7C,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,KAAU,EAAE,OAAoB;QACzC,IAAI,CAAC,UAAU,GAAG,IAAI,CAAC;QACvB,gCAAgC;QAChC,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC;QACvD,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;YACpB,OAAO;gBACL,MAAM,EAAE,4BAA4B,MAAM,CAAC,KAAK,CAAC,OAAO,EAAE;gBAC1D,OAAO,EAAE,IAAI;aACd,CAAC;QACJ,CAAC;QACD,OAAO,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IAC/C,CAAC;IAED,+DAA+D;IAC/D,MAAM;QACJ,IAAI,IAAI,CAAC,UAAU;YAAE,OAAO,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC;QAChD,OAAO,cAAc,IAAI,CAAC,IAAI,KAAK,IAAI,CAAC,WAAW,EAAE,CAAC;IACxD,CAAC;IAED,0DAA0D;IAC1D,QAAQ;QACN,OAAO,IAAI,CAAC,KAAK,CAAC;IACpB,CAAC;IAED,4EAA4E;IAC5E,QAAQ;QACN,IAAI,CAAC,UAAU,GAAG,IAAI,CAAC;IACzB,CAAC;CACF"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"DeferredTool.test.d.ts","sourceRoot":"","sources":["../src/DeferredTool.test.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
import { describe, it } from 'node:test';
|
|
2
|
+
import assert from 'node:assert/strict';
|
|
3
|
+
import { createMockTool } from './test-helpers.js';
|
|
4
|
+
import { DeferredTool } from './DeferredTool.js';
|
|
5
|
+
describe('DeferredTool', () => {
|
|
6
|
+
it('wraps a tool with correct name and description', () => {
|
|
7
|
+
const inner = createMockTool('TestTool');
|
|
8
|
+
const deferred = new DeferredTool(inner);
|
|
9
|
+
assert.equal(deferred.name, 'TestTool');
|
|
10
|
+
assert.equal(deferred.description, inner.description);
|
|
11
|
+
assert.equal(deferred.riskLevel, inner.riskLevel);
|
|
12
|
+
});
|
|
13
|
+
it('returns deferred prompt before activation', () => {
|
|
14
|
+
const inner = createMockTool('TestTool');
|
|
15
|
+
const deferred = new DeferredTool(inner);
|
|
16
|
+
const prompt = deferred.prompt();
|
|
17
|
+
assert.ok(prompt.startsWith('[deferred]'));
|
|
18
|
+
assert.ok(prompt.includes('TestTool'));
|
|
19
|
+
});
|
|
20
|
+
it('returns full prompt after activation', () => {
|
|
21
|
+
const inner = createMockTool('TestTool');
|
|
22
|
+
const deferred = new DeferredTool(inner);
|
|
23
|
+
deferred.activate();
|
|
24
|
+
assert.equal(deferred.prompt(), inner.prompt());
|
|
25
|
+
});
|
|
26
|
+
it('is not activated by default', () => {
|
|
27
|
+
const inner = createMockTool('TestTool');
|
|
28
|
+
const deferred = new DeferredTool(inner);
|
|
29
|
+
assert.equal(deferred.activated, false);
|
|
30
|
+
});
|
|
31
|
+
it('activates on call()', async () => {
|
|
32
|
+
const inner = createMockTool('TestTool');
|
|
33
|
+
const deferred = new DeferredTool(inner);
|
|
34
|
+
assert.equal(deferred.activated, false);
|
|
35
|
+
const result = await deferred.call({ input: 'test' }, { workingDir: '/tmp' });
|
|
36
|
+
assert.equal(deferred.activated, true);
|
|
37
|
+
assert.equal(result.isError, false);
|
|
38
|
+
assert.ok(result.output.includes('TestTool'));
|
|
39
|
+
});
|
|
40
|
+
it('activates via activate() method', () => {
|
|
41
|
+
const inner = createMockTool('TestTool');
|
|
42
|
+
const deferred = new DeferredTool(inner);
|
|
43
|
+
deferred.activate();
|
|
44
|
+
assert.equal(deferred.activated, true);
|
|
45
|
+
});
|
|
46
|
+
it('returns full prompt after call()', async () => {
|
|
47
|
+
const inner = createMockTool('TestTool');
|
|
48
|
+
const deferred = new DeferredTool(inner);
|
|
49
|
+
await deferred.call({ input: 'test' }, { workingDir: '/tmp' });
|
|
50
|
+
assert.equal(deferred.prompt(), inner.prompt());
|
|
51
|
+
});
|
|
52
|
+
it('validates input against inner schema and returns error', async () => {
|
|
53
|
+
const inner = createMockTool('TestTool');
|
|
54
|
+
const deferred = new DeferredTool(inner);
|
|
55
|
+
// Inner schema expects { input?: string }, pass something wildly wrong
|
|
56
|
+
// Since z.object({ input: z.string().optional() }) is permissive,
|
|
57
|
+
// we pass a valid object and check it works
|
|
58
|
+
const result = await deferred.call({ input: 'valid' }, { workingDir: '/tmp' });
|
|
59
|
+
assert.equal(result.isError, false);
|
|
60
|
+
});
|
|
61
|
+
it('exposes inner tool via getInner()', () => {
|
|
62
|
+
const inner = createMockTool('TestTool');
|
|
63
|
+
const deferred = new DeferredTool(inner);
|
|
64
|
+
assert.equal(deferred.getInner(), inner);
|
|
65
|
+
});
|
|
66
|
+
it('delegates isReadOnly to inner tool', () => {
|
|
67
|
+
const readOnly = createMockTool('ReadOnly', { readOnly: true });
|
|
68
|
+
const writable = createMockTool('Writable', { readOnly: false });
|
|
69
|
+
assert.equal(new DeferredTool(readOnly).isReadOnly({}), true);
|
|
70
|
+
assert.equal(new DeferredTool(writable).isReadOnly({}), false);
|
|
71
|
+
});
|
|
72
|
+
it('delegates isConcurrencySafe to inner tool', () => {
|
|
73
|
+
const safe = createMockTool('Safe', { concurrent: true });
|
|
74
|
+
const unsafe = createMockTool('Unsafe', { concurrent: false });
|
|
75
|
+
assert.equal(new DeferredTool(safe).isConcurrencySafe({}), true);
|
|
76
|
+
assert.equal(new DeferredTool(unsafe).isConcurrencySafe({}), false);
|
|
77
|
+
});
|
|
78
|
+
});
|
|
79
|
+
//# sourceMappingURL=DeferredTool.test.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"DeferredTool.test.js","sourceRoot":"","sources":["../src/DeferredTool.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,WAAW,CAAC;AACzC,OAAO,MAAM,MAAM,oBAAoB,CAAC;AACxC,OAAO,EAAE,cAAc,EAAE,MAAM,mBAAmB,CAAC;AACnD,OAAO,EAAE,YAAY,EAAE,MAAM,mBAAmB,CAAC;AAEjD,QAAQ,CAAC,cAAc,EAAE,GAAG,EAAE;IAC5B,EAAE,CAAC,gDAAgD,EAAE,GAAG,EAAE;QACxD,MAAM,KAAK,GAAG,cAAc,CAAC,UAAU,CAAC,CAAC;QACzC,MAAM,QAAQ,GAAG,IAAI,YAAY,CAAC,KAAK,CAAC,CAAC;QACzC,MAAM,CAAC,KAAK,CAAC,QAAQ,CAAC,IAAI,EAAE,UAAU,CAAC,CAAC;QACxC,MAAM,CAAC,KAAK,CAAC,QAAQ,CAAC,WAAW,EAAE,KAAK,CAAC,WAAW,CAAC,CAAC;QACtD,MAAM,CAAC,KAAK,CAAC,QAAQ,CAAC,SAAS,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC;IACpD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,2CAA2C,EAAE,GAAG,EAAE;QACnD,MAAM,KAAK,GAAG,cAAc,CAAC,UAAU,CAAC,CAAC;QACzC,MAAM,QAAQ,GAAG,IAAI,YAAY,CAAC,KAAK,CAAC,CAAC;QACzC,MAAM,MAAM,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC;QACjC,MAAM,CAAC,EAAE,CAAC,MAAM,CAAC,UAAU,CAAC,YAAY,CAAC,CAAC,CAAC;QAC3C,MAAM,CAAC,EAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC,CAAC;IACzC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,sCAAsC,EAAE,GAAG,EAAE;QAC9C,MAAM,KAAK,GAAG,cAAc,CAAC,UAAU,CAAC,CAAC;QACzC,MAAM,QAAQ,GAAG,IAAI,YAAY,CAAC,KAAK,CAAC,CAAC;QACzC,QAAQ,CAAC,QAAQ,EAAE,CAAC;QACpB,MAAM,CAAC,KAAK,CAAC,QAAQ,CAAC,MAAM,EAAE,EAAE,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC;IAClD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,6BAA6B,EAAE,GAAG,EAAE;QACrC,MAAM,KAAK,GAAG,cAAc,CAAC,UAAU,CAAC,CAAC;QACzC,MAAM,QAAQ,GAAG,IAAI,YAAY,CAAC,KAAK,CAAC,CAAC;QACzC,MAAM,CAAC,KAAK,CAAC,QAAQ,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;IAC1C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,qBAAqB,EAAE,KAAK,IAAI,EAAE;QACnC,MAAM,KAAK,GAAG,cAAc,CAAC,UAAU,CAAC,CAAC;QACzC,MAAM,QAAQ,GAAG,IAAI,YAAY,CAAC,KAAK,CAAC,CAAC;QACzC,MAAM,CAAC,KAAK,CAAC,QAAQ,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;QAExC,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,MAAM,EAAE,EAAE,EAAE,UAAU,EAAE,MAAM,EAAE,CAAC,CAAC;QAC9E,MAAM,CAAC,KAAK,CAAC,QAAQ,CAAC,SAAS,EAAE,IAAI,CAAC,CAAC;QACvC,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC;QACpC,MAAM,CAAC,EAAE,CAAC,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC,CAAC;IAChD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,iCAAiC,EAAE,GAAG,EAAE;QACzC,MAAM,KAAK,GAAG,cAAc,CAAC,UAAU,CAAC,CAAC;QACzC,MAAM,QAAQ,GAAG,IAAI,YAAY,CAAC,KAAK,CAAC,CAAC;QACzC,QAAQ,CAAC,QAAQ,EAAE,CAAC;QACpB,MAAM,CAAC,KAAK,CAAC,QAAQ,CAAC,SAAS,EAAE,IAAI,CAAC,CAAC;IACzC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,kCAAkC,EAAE,KAAK,IAAI,EAAE;QAChD,MAAM,KAAK,GAAG,cAAc,CAAC,UAAU,CAAC,CAAC;QACzC,MAAM,QAAQ,GAAG,IAAI,YAAY,CAAC,KAAK,CAAC,CAAC;QACzC,MAAM,QAAQ,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,MAAM,EAAE,EAAE,EAAE,UAAU,EAAE,MAAM,EAAE,CAAC,CAAC;QAC/D,MAAM,CAAC,KAAK,CAAC,QAAQ,CAAC,MAAM,EAAE,EAAE,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC;IAClD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,wDAAwD,EAAE,KAAK,IAAI,EAAE;QACtE,MAAM,KAAK,GAAG,cAAc,CAAC,UAAU,CAAC,CAAC;QACzC,MAAM,QAAQ,GAAG,IAAI,YAAY,CAAC,KAAK,CAAC,CAAC;QACzC,uEAAuE;QACvE,kEAAkE;QAClE,4CAA4C;QAC5C,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,OAAO,EAAE,EAAE,EAAE,UAAU,EAAE,MAAM,EAAE,CAAC,CAAC;QAC/E,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC;IACtC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,mCAAmC,EAAE,GAAG,EAAE;QAC3C,MAAM,KAAK,GAAG,cAAc,CAAC,UAAU,CAAC,CAAC;QACzC,MAAM,QAAQ,GAAG,IAAI,YAAY,CAAC,KAAK,CAAC,CAAC;QACzC,MAAM,CAAC,KAAK,CAAC,QAAQ,CAAC,QAAQ,EAAE,EAAE,KAAK,CAAC,CAAC;IAC3C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,oCAAoC,EAAE,GAAG,EAAE;QAC5C,MAAM,QAAQ,GAAG,cAAc,CAAC,UAAU,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC;QAChE,MAAM,QAAQ,GAAG,cAAc,CAAC,UAAU,EAAE,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC,CAAC;QACjE,MAAM,CAAC,KAAK,CAAC,IAAI,YAAY,CAAC,QAAQ,CAAC,CAAC,UAAU,CAAC,EAAE,CAAC,EAAE,IAAI,CAAC,CAAC;QAC9D,MAAM,CAAC,KAAK,CAAC,IAAI,YAAY,CAAC,QAAQ,CAAC,CAAC,UAAU,CAAC,EAAE,CAAC,EAAE,KAAK,CAAC,CAAC;IACjE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,2CAA2C,EAAE,GAAG,EAAE;QACnD,MAAM,IAAI,GAAG,cAAc,CAAC,MAAM,EAAE,EAAE,UAAU,EAAE,IAAI,EAAE,CAAC,CAAC;QAC1D,MAAM,MAAM,GAAG,cAAc,CAAC,QAAQ,EAAE,EAAE,UAAU,EAAE,KAAK,EAAE,CAAC,CAAC;QAC/D,MAAM,CAAC,KAAK,CAAC,IAAI,YAAY,CAAC,IAAI,CAAC,CAAC,iBAAiB,CAAC,EAAE,CAAC,EAAE,IAAI,CAAC,CAAC;QACjE,MAAM,CAAC,KAAK,CAAC,IAAI,YAAY,CAAC,MAAM,CAAC,CAAC,iBAAiB,CAAC,EAAE,CAAC,EAAE,KAAK,CAAC,CAAC;IACtE,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"roles.d.ts","sourceRoot":"","sources":["../../src/agents/roles.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,MAAM,MAAM,SAAS,GAAG;IACtB,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,sBAAsB,EAAE,MAAM,CAAC;IAC/B,qDAAqD;IACrD,cAAc,CAAC,EAAE,MAAM,EAAE,CAAC;CAC3B,CAAC;
|
|
1
|
+
{"version":3,"file":"roles.d.ts","sourceRoot":"","sources":["../../src/agents/roles.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,MAAM,MAAM,SAAS,GAAG;IACtB,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,sBAAsB,EAAE,MAAM,CAAC;IAC/B,qDAAqD;IACrD,cAAc,CAAC,EAAE,MAAM,EAAE,CAAC;CAC3B,CAAC;AA8GF,uBAAuB;AACvB,wBAAgB,OAAO,CAAC,EAAE,EAAE,MAAM,GAAG,SAAS,GAAG,SAAS,CAEzD;AAED,+BAA+B;AAC/B,wBAAgB,SAAS,IAAI,SAAS,EAAE,CAEvC;AAED,mBAAmB;AACnB,wBAAgB,UAAU,IAAI,MAAM,EAAE,CAErC"}
|
package/dist/agents/roles.js
CHANGED
|
@@ -21,7 +21,7 @@ const roles = [
|
|
|
21
21
|
- Verify that changes match the stated intent
|
|
22
22
|
|
|
23
23
|
Be specific: cite file paths, line numbers, and code snippets. Prioritize issues by severity (critical > major > minor). Don't mention things that look fine — focus on problems.`,
|
|
24
|
-
suggestedTools: ['
|
|
24
|
+
suggestedTools: ['Read', 'Glob', 'Grep', 'LS'],
|
|
25
25
|
},
|
|
26
26
|
{
|
|
27
27
|
id: 'test-writer',
|
|
@@ -36,7 +36,7 @@ Be specific: cite file paths, line numbers, and code snippets. Prioritize issues
|
|
|
36
36
|
- Include both positive and negative test cases
|
|
37
37
|
|
|
38
38
|
Read existing tests first to match the style, then write new tests.`,
|
|
39
|
-
suggestedTools: ['
|
|
39
|
+
suggestedTools: ['Read', 'Write', 'Glob', 'Grep', 'Bash'],
|
|
40
40
|
},
|
|
41
41
|
{
|
|
42
42
|
id: 'docs-writer',
|
|
@@ -51,7 +51,7 @@ Read existing tests first to match the style, then write new tests.`,
|
|
|
51
51
|
- Keep documentation in sync with the actual code
|
|
52
52
|
|
|
53
53
|
Write for the target audience (developers using this project). Be practical, not verbose.`,
|
|
54
|
-
suggestedTools: ['
|
|
54
|
+
suggestedTools: ['Read', 'Write', 'Edit', 'Glob', 'Grep'],
|
|
55
55
|
},
|
|
56
56
|
{
|
|
57
57
|
id: 'debugger',
|
|
@@ -66,7 +66,7 @@ Write for the target audience (developers using this project). Be practical, not
|
|
|
66
66
|
- Propose a minimal fix that addresses the root cause, not the symptom
|
|
67
67
|
|
|
68
68
|
Follow systematic debugging: read errors → reproduce → check changes → trace data → form hypothesis → test minimally.`,
|
|
69
|
-
suggestedTools: ['
|
|
69
|
+
suggestedTools: ['Read', 'Glob', 'Grep', 'Bash', 'LS'],
|
|
70
70
|
},
|
|
71
71
|
{
|
|
72
72
|
id: 'refactorer',
|
|
@@ -81,7 +81,7 @@ Follow systematic debugging: read errors → reproduce → check changes → tra
|
|
|
81
81
|
- Ensure all existing tests still pass after refactoring
|
|
82
82
|
|
|
83
83
|
Do NOT add new features or change behavior. The refactored code must be functionally identical. Run tests after each change.`,
|
|
84
|
-
suggestedTools: ['
|
|
84
|
+
suggestedTools: ['Read', 'Write', 'Edit', 'Glob', 'Grep', 'Bash'],
|
|
85
85
|
},
|
|
86
86
|
{
|
|
87
87
|
id: 'security-auditor',
|
|
@@ -98,7 +98,21 @@ Do NOT add new features or change behavior. The refactored code must be function
|
|
|
98
98
|
- Check dependency versions for known CVEs
|
|
99
99
|
|
|
100
100
|
Report findings with severity (Critical/High/Medium/Low), affected file:line, and recommended fix.`,
|
|
101
|
-
suggestedTools: ['
|
|
101
|
+
suggestedTools: ['Read', 'Glob', 'Grep', 'Bash'],
|
|
102
|
+
},
|
|
103
|
+
{
|
|
104
|
+
id: 'evaluator',
|
|
105
|
+
name: 'Evaluator',
|
|
106
|
+
description: 'Evaluates code quality, correctness, and test results (read-only)',
|
|
107
|
+
systemPromptSupplement: `You are an evaluator agent. Your job is to:
|
|
108
|
+
- Review code changes for correctness and quality
|
|
109
|
+
- Run existing tests and report results
|
|
110
|
+
- Check for regressions against the stated requirements
|
|
111
|
+
- Verify that changes match the stated intent
|
|
112
|
+
- Provide a pass/fail assessment with specific findings
|
|
113
|
+
|
|
114
|
+
You CANNOT modify files. Only read, search, and run test/lint commands to evaluate.`,
|
|
115
|
+
suggestedTools: ['Read', 'Glob', 'Grep', 'LS', 'Bash', 'Diagnostics'],
|
|
102
116
|
},
|
|
103
117
|
];
|
|
104
118
|
/** Get a role by ID */
|
package/dist/agents/roles.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"roles.js","sourceRoot":"","sources":["../../src/agents/roles.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAWH,MAAM,KAAK,GAAgB;IACzB;QACE,EAAE,EAAE,eAAe;QACnB,IAAI,EAAE,eAAe;QACrB,WAAW,EAAE,gEAAgE;QAC7E,sBAAsB,EAAE;;;;;;;kLAOsJ;QAC9K,cAAc,EAAE,CAAC,
|
|
1
|
+
{"version":3,"file":"roles.js","sourceRoot":"","sources":["../../src/agents/roles.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAWH,MAAM,KAAK,GAAgB;IACzB;QACE,EAAE,EAAE,eAAe;QACnB,IAAI,EAAE,eAAe;QACrB,WAAW,EAAE,gEAAgE;QAC7E,sBAAsB,EAAE;;;;;;;kLAOsJ;QAC9K,cAAc,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,IAAI,CAAC;KAC/C;IACD;QACE,EAAE,EAAE,aAAa;QACjB,IAAI,EAAE,aAAa;QACnB,WAAW,EAAE,2DAA2D;QACxE,sBAAsB,EAAE;;;;;;;;oEAQwC;QAChE,cAAc,EAAE,CAAC,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC;KAC1D;IACD;QACE,EAAE,EAAE,aAAa;QACjB,IAAI,EAAE,sBAAsB;QAC5B,WAAW,EAAE,gEAAgE;QAC7E,sBAAsB,EAAE;;;;;;;;0FAQ8D;QACtF,cAAc,EAAE,CAAC,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC;KAC1D;IACD;QACE,EAAE,EAAE,UAAU;QACd,IAAI,EAAE,UAAU;QAChB,WAAW,EAAE,uEAAuE;QACpF,sBAAsB,EAAE;;;;;;;;sHAQ0F;QAClH,cAAc,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,IAAI,CAAC;KACvD;IACD;QACE,EAAE,EAAE,YAAY;QAChB,IAAI,EAAE,YAAY;QAClB,WAAW,EAAE,4DAA4D;QACzE,sBAAsB,EAAE;;;;;;;;6HAQiG;QACzH,cAAc,EAAE,CAAC,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC;KAClE;IACD;QACE,EAAE,EAAE,kBAAkB;QACtB,IAAI,EAAE,kBAAkB;QACxB,WAAW,EAAE,gEAAgE;QAC7E,sBAAsB,EAAE;;;;;;;;;;mGAUuE;QAC/F,cAAc,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC;KACjD;IACD;QACE,EAAE,EAAE,WAAW;QACf,IAAI,EAAE,WAAW;QACjB,WAAW,EAAE,mEAAmE;QAChF,sBAAsB,EAAE;;;;;;;oFAOwD;QAChF,cAAc,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,aAAa,CAAC;KACtE;CACF,CAAC;AAEF,uBAAuB;AACvB,MAAM,UAAU,OAAO,CAAC,EAAU;IAChC,OAAO,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC;AACtC,CAAC;AAED,+BAA+B;AAC/B,MAAM,UAAU,SAAS;IACvB,OAAO,CAAC,GAAG,KAAK,CAAC,CAAC;AACpB,CAAC;AAED,mBAAmB;AACnB,MAAM,UAAU,UAAU;IACxB,OAAO,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;AAC9B,CAAC"}
|
|
@@ -4,18 +4,19 @@ import { getRole, listRoles, getRoleIds } from './roles.js';
|
|
|
4
4
|
describe('agent roles', () => {
|
|
5
5
|
it('lists all roles', () => {
|
|
6
6
|
const roles = listRoles();
|
|
7
|
-
assert.ok(roles.length >=
|
|
7
|
+
assert.ok(roles.length >= 7);
|
|
8
8
|
assert.ok(roles.find(r => r.id === 'code-reviewer'));
|
|
9
9
|
assert.ok(roles.find(r => r.id === 'test-writer'));
|
|
10
10
|
assert.ok(roles.find(r => r.id === 'debugger'));
|
|
11
11
|
assert.ok(roles.find(r => r.id === 'security-auditor'));
|
|
12
|
+
assert.ok(roles.find(r => r.id === 'evaluator'));
|
|
12
13
|
});
|
|
13
14
|
it('gets role by ID', () => {
|
|
14
15
|
const role = getRole('code-reviewer');
|
|
15
16
|
assert.ok(role);
|
|
16
17
|
assert.strictEqual(role.name, 'Code Reviewer');
|
|
17
18
|
assert.ok(role.systemPromptSupplement.length > 50);
|
|
18
|
-
assert.ok(role.suggestedTools.includes('
|
|
19
|
+
assert.ok(role.suggestedTools.includes('Read'));
|
|
19
20
|
});
|
|
20
21
|
it('returns undefined for unknown role', () => {
|
|
21
22
|
assert.strictEqual(getRole('nonexistent'), undefined);
|
|
@@ -34,5 +35,28 @@ describe('agent roles', () => {
|
|
|
34
35
|
assert.ok(role.systemPromptSupplement.length > 20, `role ${role.id} has short prompt`);
|
|
35
36
|
}
|
|
36
37
|
});
|
|
38
|
+
it('evaluator role has read-only suggested tools', () => {
|
|
39
|
+
const role = getRole('evaluator');
|
|
40
|
+
assert.ok(role);
|
|
41
|
+
assert.strictEqual(role.name, 'Evaluator');
|
|
42
|
+
assert.ok(role.suggestedTools.includes('Read'));
|
|
43
|
+
assert.ok(role.suggestedTools.includes('Glob'));
|
|
44
|
+
assert.ok(role.suggestedTools.includes('Grep'));
|
|
45
|
+
assert.ok(role.suggestedTools.includes('Bash'));
|
|
46
|
+
assert.ok(role.suggestedTools.includes('Diagnostics'));
|
|
47
|
+
// Evaluator should NOT have write tools
|
|
48
|
+
assert.ok(!role.suggestedTools.includes('Write'));
|
|
49
|
+
assert.ok(!role.suggestedTools.includes('Edit'));
|
|
50
|
+
});
|
|
51
|
+
it('all roles use actual tool names (not FileRead/FileWrite/FileEdit)', () => {
|
|
52
|
+
const invalidNames = ['FileRead', 'FileWrite', 'FileEdit'];
|
|
53
|
+
for (const role of listRoles()) {
|
|
54
|
+
if (!role.suggestedTools)
|
|
55
|
+
continue;
|
|
56
|
+
for (const toolName of role.suggestedTools) {
|
|
57
|
+
assert.ok(!invalidNames.includes(toolName), `role ${role.id} uses invalid tool name '${toolName}' — should be 'Read', 'Write', or 'Edit'`);
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
});
|
|
37
61
|
});
|
|
38
62
|
//# sourceMappingURL=roles.test.js.map
|