bernard-agent 0.5.2 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. package/README.md +163 -39
  2. package/dist/agent.d.ts +20 -2
  3. package/dist/agent.js +224 -9
  4. package/dist/agent.js.map +1 -1
  5. package/dist/config.d.ts +10 -2
  6. package/dist/config.js +36 -11
  7. package/dist/config.js.map +1 -1
  8. package/dist/context.d.ts +4 -2
  9. package/dist/context.js +9 -6
  10. package/dist/context.js.map +1 -1
  11. package/dist/domains.js +35 -0
  12. package/dist/domains.js.map +1 -1
  13. package/dist/index.js +1 -0
  14. package/dist/index.js.map +1 -1
  15. package/dist/output.d.ts +27 -0
  16. package/dist/output.js +97 -5
  17. package/dist/output.js.map +1 -1
  18. package/dist/paths.d.ts +2 -0
  19. package/dist/paths.js +3 -1
  20. package/dist/paths.js.map +1 -1
  21. package/dist/rag-worker.js +16 -0
  22. package/dist/rag-worker.js.map +1 -1
  23. package/dist/repl.js +378 -18
  24. package/dist/repl.js.map +1 -1
  25. package/dist/reserved-names.d.ts +5 -0
  26. package/dist/reserved-names.js +32 -0
  27. package/dist/reserved-names.js.map +1 -0
  28. package/dist/routines.js +10 -19
  29. package/dist/routines.js.map +1 -1
  30. package/dist/specialist-candidates.d.ts +45 -0
  31. package/dist/specialist-candidates.js +154 -0
  32. package/dist/specialist-candidates.js.map +1 -0
  33. package/dist/specialist-detector.d.ts +12 -0
  34. package/dist/specialist-detector.js +124 -0
  35. package/dist/specialist-detector.js.map +1 -0
  36. package/dist/specialist-matcher.d.ts +15 -0
  37. package/dist/specialist-matcher.js +192 -0
  38. package/dist/specialist-matcher.js.map +1 -0
  39. package/dist/specialists.d.ts +50 -0
  40. package/dist/specialists.js +173 -0
  41. package/dist/specialists.js.map +1 -0
  42. package/dist/tools/agent-pool.d.ts +20 -0
  43. package/dist/tools/agent-pool.js +41 -0
  44. package/dist/tools/agent-pool.js.map +1 -0
  45. package/dist/tools/index.d.ts +2 -1
  46. package/dist/tools/index.js +3 -1
  47. package/dist/tools/index.js.map +1 -1
  48. package/dist/tools/specialist-run.d.ts +39 -0
  49. package/dist/tools/specialist-run.js +123 -0
  50. package/dist/tools/specialist-run.js.map +1 -0
  51. package/dist/tools/specialist.d.ts +40 -0
  52. package/dist/tools/specialist.js +107 -0
  53. package/dist/tools/specialist.js.map +1 -0
  54. package/dist/tools/subagent.d.ts +1 -1
  55. package/dist/tools/subagent.js +8 -11
  56. package/dist/tools/subagent.js.map +1 -1
  57. package/dist/tools/task.d.ts +45 -0
  58. package/dist/tools/task.js +155 -0
  59. package/dist/tools/task.js.map +1 -0
  60. package/dist/update.d.ts +7 -0
  61. package/dist/update.js +16 -2
  62. package/dist/update.js.map +1 -1
  63. package/package.json +1 -1
package/README.md CHANGED
@@ -24,7 +24,11 @@ A local CLI AI agent that executes terminal commands, manages scheduled tasks, r
24
24
  - [Date and Time](#date-and-time)
25
25
  - [Time Range Calculations](#time-range-calculations)
26
26
  - [Sub-Agents](#sub-agents)
27
+ - [Tasks](#tasks)
27
28
  - [Routines](#routines)
29
+ - [Specialists](#specialists)
30
+ - [Specialist Suggestions](#specialist-suggestions)
31
+ - [Critic Mode](#critic-mode)
28
32
  - [Cron Jobs (Scheduled Tasks)](#cron-jobs-scheduled-tasks)
29
33
  - [Creating Jobs](#creating-jobs)
30
34
  - [Managing Jobs](#managing-jobs)
@@ -124,17 +128,19 @@ bernard providers
124
128
 
125
129
  Bernard loads `.env` from the current directory first, then falls back to `~/.bernard/.env`.
126
130
 
127
- | Variable | Description | Default |
128
- | ----------------------- | ------------------------------------------- | ------------------------- |
129
- | `BERNARD_PROVIDER` | LLM provider (`anthropic`, `openai`, `xai`) | `anthropic` |
130
- | `BERNARD_MODEL` | Model name | Provider-specific default |
131
- | `BERNARD_MAX_TOKENS` | Max response tokens | `4096` |
132
- | `BERNARD_SHELL_TIMEOUT` | Shell command timeout (ms) | `30000` |
133
- | `BERNARD_RAG_ENABLED` | Enable the RAG memory system | `true` |
134
- | `BERNARD_DEBUG` | Enable debug logging | unset |
135
- | `ANTHROPIC_API_KEY` | Anthropic API key | |
136
- | `OPENAI_API_KEY` | OpenAI API key | |
137
- | `XAI_API_KEY` | xAI API key | — |
131
+ | Variable | Description | Default |
132
+ | ----------------------- | ----------------------------------------------------- | ------------------------- |
133
+ | `BERNARD_PROVIDER` | LLM provider (`anthropic`, `openai`, `xai`) | `anthropic` |
134
+ | `BERNARD_MODEL` | Model name | Provider-specific default |
135
+ | `BERNARD_MAX_TOKENS` | Max response tokens | `4096` |
136
+ | `BERNARD_SHELL_TIMEOUT` | Shell command timeout (ms) | `30000` |
137
+ | `BERNARD_TOKEN_WINDOW` | Context window size for compression (0 = auto-detect) | `0` |
138
+ | `BERNARD_RAG_ENABLED` | Enable the RAG memory system | `true` |
139
+ | `BERNARD_CRITIC_MODE` | Enable critic mode for response verification | `false` |
140
+ | `BERNARD_DEBUG` | Enable debug logging | unset |
141
+ | `ANTHROPIC_API_KEY` | Anthropic API key | — |
142
+ | `OPENAI_API_KEY` | OpenAI API key | — |
143
+ | `XAI_API_KEY` | xAI API key | — |
138
144
 
139
145
  ### Providers and Models
140
146
 
@@ -150,10 +156,11 @@ You can switch providers and models at any time during a session with `/provider
150
156
 
151
157
  Options can be changed during a session with `/options` or persisted to `~/.bernard/preferences.json`:
152
158
 
153
- | Option | Default | Description |
154
- | --------------- | ------- | ------------------------------------- |
155
- | `max-tokens` | `4096` | Maximum tokens per AI response |
156
- | `shell-timeout` | `30000` | Shell command timeout in milliseconds |
159
+ | Option | Default | Description |
160
+ | --------------- | ------- | ----------------------------------------------------- |
161
+ | `max-tokens` | `4096` | Maximum tokens per AI response |
162
+ | `shell-timeout` | `30000` | Shell command timeout in milliseconds |
163
+ | `token-window` | `0` | Context window size for compression (0 = auto-detect) |
157
164
 
158
165
  From the CLI:
159
166
 
@@ -217,24 +224,30 @@ Features:
217
224
 
218
225
  ### REPL Slash Commands
219
226
 
220
- | Command | Description |
221
- | ----------------- | -------------------------------------------- |
222
- | `/help` | Show available commands |
223
- | `/clear` | Clear conversation history and scratch notes |
224
- | `/memory` | List all persistent memories |
225
- | `/scratch` | List session scratch notes |
226
- | `/mcp` | List connected MCP servers and their tools |
227
- | `/cron` | Show cron jobs and daemon status |
228
- | `/rag` | Show RAG memory stats and recent facts |
229
- | `/provider` | Switch LLM provider interactively |
230
- | `/model` | Switch model for the current provider |
231
- | `/theme` | Switch color theme |
232
- | `/routines` | List saved routines |
233
- | `/create-routine` | Create a routine with guided AI assistance |
234
- | `/options` | View and modify runtime options |
235
- | `/exit` | Quit Bernard (also: `exit`, `quit`) |
236
-
237
- Type `/{routine-id}` to invoke a saved routine directly (e.g., `/deploy-staging`).
227
+ | Command | Description |
228
+ | ----------------- | ------------------------------------------------------------------------- |
229
+ | `/help` | Show available commands |
230
+ | `/clear` | Clear conversation history and scratch notes |
231
+ | `/compact` | Compress conversation history in-place |
232
+ | `/task` | Run an isolated task (no history, structured output) |
233
+ | `/memory` | List all persistent memories |
234
+ | `/scratch` | List session scratch notes |
235
+ | `/mcp` | List connected MCP servers and their tools |
236
+ | `/cron` | Show cron jobs and daemon status |
237
+ | `/rag` | Show RAG memory stats and recent facts |
238
+ | `/provider` | Switch LLM provider interactively |
239
+ | `/model` | Switch model for the current provider |
240
+ | `/theme` | Switch color theme |
241
+ | `/routines` | List saved routines |
242
+ | `/create-routine` | Create a routine with guided AI assistance |
243
+ | `/create-task` | Create a task routine (`task-` prefixed) with guided AI assistance |
244
+ | `/specialists` | List saved specialists |
245
+ | `/candidates` | Review auto-detected specialist suggestions _(v0.6.0+)_ |
246
+ | `/critic` | Toggle critic mode for response verification (on/off) |
247
+ | `/options` | View and modify runtime options (max-tokens, shell-timeout, token-window) |
248
+ | `/exit` | Quit Bernard (also: `exit`, `quit`) |
249
+
250
+ Type `/{routine-id}` or `/{specialist-id}` to invoke a saved routine or specialist directly (e.g., `/deploy-staging`).
238
251
 
239
252
  Prefix with `\` to send a `/`-prefixed message as text instead of a command (e.g., `\/etc/hosts` sends the literal string).
240
253
 
@@ -333,7 +346,28 @@ bernard> check the disk usage on /, look up the weather in Austin, and count lin
333
346
 
334
347
  Up to 4 concurrent sub-agents. Each gets 10 max steps. Color-coded output in the terminal.
335
348
 
336
- ### Routines
349
+ ### Tasks _(v0.6.0+)_
350
+
351
+ Tasks are isolated, focused executions that return structured JSON output. Unlike sub-agents (which return free-form text), tasks always produce a `{status, output, details?}` response — making them ideal for machine-readable results, routine chaining, and conditional branching.
352
+
353
+ ```
354
+ bernard> /task List all TypeScript files in the src directory
355
+ ┌─ task — List all TypeScript files in the src directory
356
+ ▶ shell: find src -name "*.ts" -type f
357
+ └─ task success: Found 23 .ts files
358
+
359
+ Found 23 .ts files
360
+ ```
361
+
362
+ Key differences from sub-agents:
363
+
364
+ - **5-step budget** (vs. 10 for sub-agents) — tasks are meant to be quick and focused
365
+ - **Structured JSON output** — always returns `{status: "success"|"error", output: string, details?: string}`
366
+ - **No conversation history** — completely isolated from the current session
367
+ - **Available as both a tool and a command** — the agent can call `task` during routines for chaining, or users can run `/task` directly from the REPL
368
+ - **Shared concurrency pool** — tasks and sub-agents share the same 4-slot limit
369
+
370
+ ### Routines _(v0.5.0+)_
337
371
 
338
372
  Named, persistent multi-step workflows that you can teach Bernard and later invoke with a slash command. Routines capture procedures — deploy scripts, release checklists, onboarding flows — as free-form markdown.
339
373
 
@@ -374,6 +408,84 @@ Use `/routines` in the REPL for a quick list. Routine names also appear in the l
374
408
 
375
409
  Storage: one JSON file per routine in `~/.local/share/bernard/routines/`. Max 100 routines. IDs must be lowercase kebab-case (1–60 chars).
376
410
 
411
+ ### Specialists _(v0.6.0+)_
412
+
413
+ Specialists are reusable expert profiles — persistent personas with custom system prompts and behavioral guidelines that shape how a sub-agent approaches work. Unlike routines (which define _what_ steps to follow), specialists define _how_ to work.
414
+
415
+ ```
416
+ bernard> create a specialist called "code-reviewer" that reviews code for correctness, style, and security
417
+ ▶ specialist: create { id: "code-reviewer", name: "Code Reviewer", ... }
418
+
419
+ Specialist "Code Reviewer" (code-reviewer) created.
420
+ ```
421
+
422
+ Run a specialist by typing `/{specialist-id}` or using the `specialist_run` tool:
423
+
424
+ ```
425
+ bernard> /code-reviewer review the changes in src/agent.ts
426
+ ┌─ spec:1 [Code Reviewer] — review the changes in src/agent.ts
427
+ ▶ shell: git diff src/agent.ts
428
+ └─ spec:1 done
429
+ ```
430
+
431
+ Each specialist run gets its own `generateText` loop with a 10-step budget, using the specialist's system prompt and guidelines as its persona. Specialists share the concurrency pool with sub-agents and tasks (4 slots max).
432
+
433
+ Manage specialists:
434
+
435
+ ```
436
+ bernard> list my specialists
437
+ ▶ specialist: list
438
+
439
+ bernard> show the code-reviewer specialist
440
+ ▶ specialist: read { id: "code-reviewer" }
441
+
442
+ bernard> update the code-reviewer specialist to also check for accessibility
443
+ ▶ specialist: update { id: "code-reviewer", guidelines: [...] }
444
+
445
+ bernard> delete the code-reviewer specialist
446
+ ▶ specialist: delete { id: "code-reviewer" }
447
+ ```
448
+
449
+ Use `/specialists` in the REPL for a quick list. Specialist names also appear in the live hint/autocomplete system when typing `/`.
450
+
451
+ Storage: one JSON file per specialist in `~/.local/share/bernard/specialists/`. Max 50 specialists. IDs must be lowercase kebab-case (1–60 chars).
452
+
453
+ ### Specialist Suggestions _(v0.6.0+)_
454
+
455
+ Bernard automatically detects recurring delegation patterns in your conversations and suggests new specialists. Detection runs in the background when you exit a session or use `/clear --save`.
456
+
457
+ When candidates are detected, you'll see a notification at the start of your next session:
458
+
459
+ ```
460
+ 2 specialist suggestion(s) pending. Use /candidates to review.
461
+ ```
462
+
463
+ Use `/candidates` to see pending suggestions with their name, description, confidence score, and reasoning. You can then accept or reject candidates conversationally (e.g., "accept the code-review candidate"), and Bernard will create the specialist for you.
464
+
465
+ Candidates are auto-dismissed after 30 days if not reviewed. Up to 10 pending candidates are stored at a time.
466
+
467
+ Storage: one JSON file per candidate in `~/.local/share/bernard/specialist-candidates/`.
468
+
469
+ ### Critic Mode _(v0.6.0+)_
470
+
471
+ Critic mode adds planning, proactive scratch/memory usage, and post-response verification. Toggle it during a session:
472
+
473
+ ```bash
474
+ /critic on # Enable critic mode
475
+ /critic off # Disable critic mode
476
+ /critic # Show current status
477
+ ```
478
+
479
+ When enabled:
480
+
481
+ - **Planning** — Bernard writes a plan to scratch before multi-step tasks
482
+ - **Proactive scratch** — Accumulates findings in scratch during complex work
483
+ - **Verification** — After tool-using responses, a critic agent reviews the work and prints a verdict (PASS/WARN/FAIL)
484
+
485
+ The critic checks that claimed actions match actual tool calls and flags any discrepancies. It adds one extra LLM call after tool-using responses. Simple knowledge answers are not verified.
486
+
487
+ Default: off. Recommended for high-stakes work (deployments, git operations, multi-file edits).
488
+
377
489
  ---
378
490
 
379
491
  ## Cron Jobs (Scheduled Tasks)
@@ -566,16 +678,19 @@ Bernard automatically compresses conversation history when it approaches 75% of
566
678
 
567
679
  Summarization and domain-specific fact extraction run in parallel. Scratch notes survive compression, so multi-step task progress is never lost.
568
680
 
681
+ When critic mode is enabled (`/critic on`), Bernard writes plans to scratch before complex tasks and verifies outcomes after tool use. See [Critic Mode](#critic-mode).
682
+
569
683
  ### RAG Memory
570
684
 
571
685
  Bernard has a Retrieval-Augmented Generation (RAG) system that provides long-term memory beyond the current session:
572
686
 
573
- - **Domain-specific extraction** — facts are extracted into three specialized domains, each with its own LLM prompt:
687
+ - **Domain-specific extraction** — facts are extracted into four specialized domains, each with its own LLM prompt:
574
688
  - **Tool Usage Patterns** — command sequences, error resolutions, build/deploy workflows
575
689
  - **User Preferences** — communication style, workflow conventions, repeated instructions
576
690
  - **General Knowledge** — project structure, architecture decisions, environment info
577
- - **Parallel extraction** — all three domain extractors run concurrently via `Promise.allSettled`, so wall-clock latency is roughly the same as a single extraction
578
- - **Per-domain retrieval** — search returns up to 3 results per domain (9 total max), preventing any single domain from crowding out others
691
+ - **Conversation Summaries** — what was discussed, approaches taken, tools/specialists/routines used, outcomes
692
+ - **Parallel extraction** — all four domain extractors run concurrently via `Promise.allSettled`, so wall-clock latency is roughly the same as a single extraction
693
+ - **Per-domain retrieval** — search returns up to 5 results per domain (15 total max), preventing any single domain from crowding out others
579
694
  - **Domain-grouped context** — recalled facts are organized by domain with headings in the system prompt, giving the LLM clear signal about what kind of knowledge each fact represents
580
695
  - **Semantic search** — on each new user message, relevant facts are retrieved and injected into the system prompt as "Recalled Context"
581
696
  - **Local embeddings** — uses FastEmbed (`AllMiniLML6V2`, 384 dimensions) for fully local embedding computation
@@ -619,6 +734,8 @@ Bernard stores all data in `~/.bernard/`:
619
734
  ├── memory/ # Persistent memories (*.md)
620
735
  ├── models/ # Embedding model cache (fastembed)
621
736
  ├── routines/ # Saved routines (*.json)
737
+ ├── specialists/ # Saved specialist profiles (*.json)
738
+ ├── specialist-candidates/ # Auto-detected specialist suggestions (*.json)
622
739
  ├── rag/
623
740
  │ └── memories.json # RAG fact embeddings
624
741
  └── cron/
@@ -687,8 +804,11 @@ src/
687
804
  ├── rag.ts # RAG store (domain-tagged embeddings + per-domain search)
688
805
  ├── embeddings.ts # FastEmbed wrapper
689
806
  ├── routines.ts # RoutineStore (named multi-step workflows)
807
+ ├── specialists.ts # SpecialistStore (reusable expert profiles)
808
+ ├── specialist-candidates.ts # CandidateStore (auto-detected suggestions)
809
+ ├── specialist-detector.ts # LLM-based specialist pattern detection
690
810
  ├── mcp.ts # MCP server manager
691
- ├── rag-worker.ts # Background RAG fact extraction worker
811
+ ├── rag-worker.ts # Background RAG fact extraction + candidate detection
692
812
  ├── setup.ts # First-time setup wizard
693
813
  ├── history.ts # Conversation save/load
694
814
  ├── logger.ts # Debug file logger
@@ -708,7 +828,11 @@ src/
708
828
  │ ├── mcp.ts # MCP config (stdio)
709
829
  │ ├── mcp-url.ts # MCP config (URL-based)
710
830
  │ ├── routine.ts # Routine management tool
711
- └── subagent.ts # Parallel sub-agents
831
+ ├── specialist.ts # Specialist management tool
832
+ │ ├── specialist-run.ts # Specialist execution (sub-agent with custom persona)
833
+ │ ├── subagent.ts # Parallel sub-agents
834
+ │ ├── task.ts # Isolated task execution (structured JSON output)
835
+ │ └── agent-pool.ts # Shared concurrency pool for agents, tasks, and specialists
712
836
  └── cron/
713
837
  ├── cli.ts # Cron CLI subcommands
714
838
  ├── types.ts # Cron type definitions
package/dist/agent.d.ts CHANGED
@@ -5,6 +5,8 @@ import type { BernardConfig } from './config.js';
5
5
  import type { MemoryStore } from './memory.js';
6
6
  import type { RAGStore, RAGSearchResult } from './rag.js';
7
7
  import { RoutineStore, type RoutineSummary } from './routines.js';
8
+ import { SpecialistStore, type SpecialistSummary } from './specialists.js';
9
+ import { type SpecialistMatch } from './specialist-matcher.js';
8
10
  /**
9
11
  * Assembles the full system prompt including base instructions, memory context, and MCP status.
10
12
  * @internal Exported for testing only.
@@ -13,8 +15,15 @@ import { RoutineStore, type RoutineSummary } from './routines.js';
13
15
  * @param mcpServerNames - Names of currently connected MCP servers, if any
14
16
  * @param ragResults - RAG search results to include as recalled context
15
17
  * @param routineSummaries - Routine summaries to list in the prompt
18
+ * @param specialistSummaries - Specialist summaries to list in the prompt
19
+ * @param specialistMatches - Pre-computed specialist match results for the current input
16
20
  */
17
- export declare function buildSystemPrompt(config: BernardConfig, memoryStore: MemoryStore, mcpServerNames?: string[], ragResults?: RAGSearchResult[], routineSummaries?: RoutineSummary[]): string;
21
+ export declare function buildSystemPrompt(config: BernardConfig, memoryStore: MemoryStore, mcpServerNames?: string[], ragResults?: RAGSearchResult[], routineSummaries?: RoutineSummary[], specialistSummaries?: SpecialistSummary[], specialistMatches?: SpecialistMatch[]): string;
22
+ export interface CompactResult {
23
+ compacted: boolean;
24
+ tokensBefore: number;
25
+ tokensAfter: number;
26
+ }
18
27
  /**
19
28
  * Core agent that manages a multi-step conversation loop with tool calling via the Vercel AI SDK.
20
29
  *
@@ -37,7 +46,8 @@ export declare class Agent {
37
46
  private lastStepPromptTokens;
38
47
  private spinnerStats;
39
48
  private routineStore;
40
- constructor(config: BernardConfig, toolOptions: ToolOptions, memoryStore: MemoryStore, mcpTools?: Record<string, any>, mcpServerNames?: string[], alertContext?: string, initialHistory?: CoreMessage[], ragStore?: RAGStore, routineStore?: RoutineStore);
49
+ private specialistStore;
50
+ constructor(config: BernardConfig, toolOptions: ToolOptions, memoryStore: MemoryStore, mcpTools?: Record<string, any>, mcpServerNames?: string[], alertContext?: string, initialHistory?: CoreMessage[], ragStore?: RAGStore, routineStore?: RoutineStore, specialistStore?: SpecialistStore);
41
51
  /** Returns the current conversation message history. */
42
52
  getHistory(): CoreMessage[];
43
53
  /** Returns the RAG search results from the most recent `processInput` call. */
@@ -46,6 +56,8 @@ export declare class Agent {
46
56
  abort(): void;
47
57
  /** Attaches a spinner stats object that will be updated with token usage during generation. */
48
58
  setSpinnerStats(stats: SpinnerStats): void;
59
+ /** Updates the alert context injected into the system prompt (e.g., specialist candidates). */
60
+ setAlertContext(ctx: string): void;
49
61
  /**
50
62
  * Sends user input through the agent loop: RAG retrieval, context compression, LLM generation, and tool execution.
51
63
  *
@@ -55,6 +67,12 @@ export declare class Agent {
55
67
  * @throws Error wrapping the underlying API error if generation fails for non-abort, non-overflow reasons
56
68
  */
57
69
  processInput(userInput: string): Promise<void>;
70
+ /** Extracts a structured log of tool calls from generateText step results. */
71
+ private extractToolCallLog;
72
+ /** Runs the critic agent to verify the main agent's response against actual tool calls. */
73
+ private runCritic;
74
+ /** Compresses conversation history in-place, returning token usage stats. */
75
+ compactHistory(): Promise<CompactResult>;
58
76
  /** Resets conversation history, scratch notes, and RAG tracking state for a fresh session. */
59
77
  clearHistory(): void;
60
78
  }