npm - bernard-agent - Versions diffs - 0.5.2 → 0.6.1 - Mend

bernard-agent 0.5.2 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (63) hide show

package/README.md +163 -39
package/dist/agent.d.ts +20 -2
package/dist/agent.js +224 -9
package/dist/agent.js.map +1 -1
package/dist/config.d.ts +10 -2
package/dist/config.js +36 -11
package/dist/config.js.map +1 -1
package/dist/context.d.ts +4 -2
package/dist/context.js +9 -6
package/dist/context.js.map +1 -1
package/dist/domains.js +35 -0
package/dist/domains.js.map +1 -1
package/dist/index.js +1 -0
package/dist/index.js.map +1 -1
package/dist/output.d.ts +27 -0
package/dist/output.js +97 -5
package/dist/output.js.map +1 -1
package/dist/paths.d.ts +2 -0
package/dist/paths.js +3 -1
package/dist/paths.js.map +1 -1
package/dist/rag-worker.js +16 -0
package/dist/rag-worker.js.map +1 -1
package/dist/repl.js +378 -18
package/dist/repl.js.map +1 -1
package/dist/reserved-names.d.ts +5 -0
package/dist/reserved-names.js +32 -0
package/dist/reserved-names.js.map +1 -0
package/dist/routines.js +10 -19
package/dist/routines.js.map +1 -1
package/dist/specialist-candidates.d.ts +45 -0
package/dist/specialist-candidates.js +154 -0
package/dist/specialist-candidates.js.map +1 -0
package/dist/specialist-detector.d.ts +12 -0
package/dist/specialist-detector.js +124 -0
package/dist/specialist-detector.js.map +1 -0
package/dist/specialist-matcher.d.ts +15 -0
package/dist/specialist-matcher.js +192 -0
package/dist/specialist-matcher.js.map +1 -0
package/dist/specialists.d.ts +50 -0
package/dist/specialists.js +173 -0
package/dist/specialists.js.map +1 -0
package/dist/tools/agent-pool.d.ts +20 -0
package/dist/tools/agent-pool.js +41 -0
package/dist/tools/agent-pool.js.map +1 -0
package/dist/tools/index.d.ts +2 -1
package/dist/tools/index.js +3 -1
package/dist/tools/index.js.map +1 -1
package/dist/tools/specialist-run.d.ts +39 -0
package/dist/tools/specialist-run.js +123 -0
package/dist/tools/specialist-run.js.map +1 -0
package/dist/tools/specialist.d.ts +40 -0
package/dist/tools/specialist.js +107 -0
package/dist/tools/specialist.js.map +1 -0
package/dist/tools/subagent.d.ts +1 -1
package/dist/tools/subagent.js +8 -11
package/dist/tools/subagent.js.map +1 -1
package/dist/tools/task.d.ts +45 -0
package/dist/tools/task.js +155 -0
package/dist/tools/task.js.map +1 -0
package/dist/update.d.ts +7 -0
package/dist/update.js +16 -2
package/dist/update.js.map +1 -1
package/package.json +1 -1

package/README.md CHANGED Viewed

@@ -24,7 +24,11 @@ A local CLI AI agent that executes terminal commands, manages scheduled tasks, r
   - [Date and Time](#date-and-time)
   - [Time Range Calculations](#time-range-calculations)
   - [Sub-Agents](#sub-agents)
+  - [Tasks](#tasks)
   - [Routines](#routines)
+  - [Specialists](#specialists)
+  - [Specialist Suggestions](#specialist-suggestions)
+  - [Critic Mode](#critic-mode)
 - [Cron Jobs (Scheduled Tasks)](#cron-jobs-scheduled-tasks)
   - [Creating Jobs](#creating-jobs)
   - [Managing Jobs](#managing-jobs)
@@ -124,17 +128,19 @@ bernard providers
 Bernard loads `.env` from the current directory first, then falls back to `~/.bernard/.env`.
-| Variable                | Description                                 | Default                   |
-| ----------------------- | ------------------------------------------- | ------------------------- |
-| `BERNARD_PROVIDER`      | LLM provider (`anthropic`, `openai`, `xai`) | `anthropic`               |
-| `BERNARD_MODEL`         | Model name                                  | Provider-specific default |
-| `BERNARD_MAX_TOKENS`    | Max response tokens                         | `4096`                    |
-| `BERNARD_SHELL_TIMEOUT` | Shell command timeout (ms)                  | `30000`                   |
-| `BERNARD_RAG_ENABLED`   | Enable the RAG memory system                | `true`                    |
-| `BERNARD_DEBUG`         | Enable debug logging                        | unset                     |
-| `ANTHROPIC_API_KEY`     | Anthropic API key                           | —                         |
-| `OPENAI_API_KEY`        | OpenAI API key                              | —                         |
-| `XAI_API_KEY`           | xAI API key                                 | —                         |
+| Variable                | Description                                           | Default                   |
+| ----------------------- | ----------------------------------------------------- | ------------------------- |
+| `BERNARD_PROVIDER`      | LLM provider (`anthropic`, `openai`, `xai`)           | `anthropic`               |
+| `BERNARD_MODEL`         | Model name                                            | Provider-specific default |
+| `BERNARD_MAX_TOKENS`    | Max response tokens                                   | `4096`                    |
+| `BERNARD_SHELL_TIMEOUT` | Shell command timeout (ms)                            | `30000`                   |
+| `BERNARD_TOKEN_WINDOW`  | Context window size for compression (0 = auto-detect) | `0`                       |
+| `BERNARD_RAG_ENABLED`   | Enable the RAG memory system                          | `true`                    |
+| `BERNARD_CRITIC_MODE`   | Enable critic mode for response verification          | `false`                   |
+| `BERNARD_DEBUG`         | Enable debug logging                                  | unset                     |
+| `ANTHROPIC_API_KEY`     | Anthropic API key                                     | —                         |
+| `OPENAI_API_KEY`        | OpenAI API key                                        | —                         |
+| `XAI_API_KEY`           | xAI API key                                           | —                         |
 ### Providers and Models
@@ -150,10 +156,11 @@ You can switch providers and models at any time during a session with `/provider
 Options can be changed during a session with `/options` or persisted to `~/.bernard/preferences.json`:
-| Option          | Default | Description                           |
-| --------------- | ------- | ------------------------------------- |
-| `max-tokens`    | `4096`  | Maximum tokens per AI response        |
-| `shell-timeout` | `30000` | Shell command timeout in milliseconds |
+| Option          | Default | Description                                           |
+| --------------- | ------- | ----------------------------------------------------- |
+| `max-tokens`    | `4096`  | Maximum tokens per AI response                        |
+| `shell-timeout` | `30000` | Shell command timeout in milliseconds                 |
+| `token-window`  | `0`     | Context window size for compression (0 = auto-detect) |
 From the CLI:
@@ -217,24 +224,30 @@ Features:
 ### REPL Slash Commands
-| Command           | Description                                  |
-| ----------------- | -------------------------------------------- |
-| `/help`           | Show available commands                      |
-| `/clear`          | Clear conversation history and scratch notes |
-| `/memory`         | List all persistent memories                 |
-| `/scratch`        | List session scratch notes                   |
-| `/mcp`            | List connected MCP servers and their tools   |
-| `/cron`           | Show cron jobs and daemon status             |
-| `/rag`            | Show RAG memory stats and recent facts       |
-| `/provider`       | Switch LLM provider interactively            |
-| `/model`          | Switch model for the current provider        |
-| `/theme`          | Switch color theme                           |
-| `/routines`       | List saved routines                          |
-| `/create-routine` | Create a routine with guided AI assistance   |
-| `/options`        | View and modify runtime options              |
-| `/exit`           | Quit Bernard (also: `exit`, `quit`)          |
-Type `/{routine-id}` to invoke a saved routine directly (e.g., `/deploy-staging`).
+| Command           | Description                                                               |
+| ----------------- | ------------------------------------------------------------------------- |
+| `/help`           | Show available commands                                                   |
+| `/clear`          | Clear conversation history and scratch notes                              |
+| `/compact`        | Compress conversation history in-place                                    |
+| `/task`           | Run an isolated task (no history, structured output)                      |
+| `/memory`         | List all persistent memories                                              |
+| `/scratch`        | List session scratch notes                                                |
+| `/mcp`            | List connected MCP servers and their tools                                |
+| `/cron`           | Show cron jobs and daemon status                                          |
+| `/rag`            | Show RAG memory stats and recent facts                                    |
+| `/provider`       | Switch LLM provider interactively                                         |
+| `/model`          | Switch model for the current provider                                     |
+| `/theme`          | Switch color theme                                                        |
+| `/routines`       | List saved routines                                                       |
+| `/create-routine` | Create a routine with guided AI assistance                                |
+| `/create-task`    | Create a task routine (`task-` prefixed) with guided AI assistance        |
+| `/specialists`    | List saved specialists                                                    |
+| `/candidates`     | Review auto-detected specialist suggestions _(v0.6.0+)_                   |
+| `/critic`         | Toggle critic mode for response verification (on/off)                     |
+| `/options`        | View and modify runtime options (max-tokens, shell-timeout, token-window) |
+| `/exit`           | Quit Bernard (also: `exit`, `quit`)                                       |
+Type `/{routine-id}` or `/{specialist-id}` to invoke a saved routine or specialist directly (e.g., `/deploy-staging`).
 Prefix with `\` to send a `/`-prefixed message as text instead of a command (e.g., `\/etc/hosts` sends the literal string).
@@ -333,7 +346,28 @@ bernard> check the disk usage on /, look up the weather in Austin, and count lin
 Up to 4 concurrent sub-agents. Each gets 10 max steps. Color-coded output in the terminal.
-### Routines
+### Tasks _(v0.6.0+)_
+Tasks are isolated, focused executions that return structured JSON output. Unlike sub-agents (which return free-form text), tasks always produce a `{status, output, details?}` response — making them ideal for machine-readable results, routine chaining, and conditional branching.
+```
+bernard> /task List all TypeScript files in the src directory
+┌─ task — List all TypeScript files in the src directory
+  ▶ shell: find src -name "*.ts" -type f
+└─ task success: Found 23 .ts files
+Found 23 .ts files
+```
+Key differences from sub-agents:
+- **5-step budget** (vs. 10 for sub-agents) — tasks are meant to be quick and focused
+- **Structured JSON output** — always returns `{status: "success"|"error", output: string, details?: string}`
+- **No conversation history** — completely isolated from the current session
+- **Available as both a tool and a command** — the agent can call `task` during routines for chaining, or users can run `/task` directly from the REPL
+- **Shared concurrency pool** — tasks and sub-agents share the same 4-slot limit
+### Routines _(v0.5.0+)_
 Named, persistent multi-step workflows that you can teach Bernard and later invoke with a slash command. Routines capture procedures — deploy scripts, release checklists, onboarding flows — as free-form markdown.
@@ -374,6 +408,84 @@ Use `/routines` in the REPL for a quick list. Routine names also appear in the l
 Storage: one JSON file per routine in `~/.local/share/bernard/routines/`. Max 100 routines. IDs must be lowercase kebab-case (1–60 chars).
+### Specialists _(v0.6.0+)_
+Specialists are reusable expert profiles — persistent personas with custom system prompts and behavioral guidelines that shape how a sub-agent approaches work. Unlike routines (which define _what_ steps to follow), specialists define _how_ to work.
+```
+bernard> create a specialist called "code-reviewer" that reviews code for correctness, style, and security
+  ▶ specialist: create { id: "code-reviewer", name: "Code Reviewer", ... }
+Specialist "Code Reviewer" (code-reviewer) created.
+```
+Run a specialist by typing `/{specialist-id}` or using the `specialist_run` tool:
+```
+bernard> /code-reviewer review the changes in src/agent.ts
+┌─ spec:1 [Code Reviewer] — review the changes in src/agent.ts
+  ▶ shell: git diff src/agent.ts
+└─ spec:1 done
+```
+Each specialist run gets its own `generateText` loop with a 10-step budget, using the specialist's system prompt and guidelines as its persona. Specialists share the concurrency pool with sub-agents and tasks (4 slots max).
+Manage specialists:
+```
+bernard> list my specialists
+  ▶ specialist: list
+bernard> show the code-reviewer specialist
+  ▶ specialist: read { id: "code-reviewer" }
+bernard> update the code-reviewer specialist to also check for accessibility
+  ▶ specialist: update { id: "code-reviewer", guidelines: [...] }
+bernard> delete the code-reviewer specialist
+  ▶ specialist: delete { id: "code-reviewer" }
+```
+Use `/specialists` in the REPL for a quick list. Specialist names also appear in the live hint/autocomplete system when typing `/`.
+Storage: one JSON file per specialist in `~/.local/share/bernard/specialists/`. Max 50 specialists. IDs must be lowercase kebab-case (1–60 chars).
+### Specialist Suggestions _(v0.6.0+)_
+Bernard automatically detects recurring delegation patterns in your conversations and suggests new specialists. Detection runs in the background when you exit a session or use `/clear --save`.
+When candidates are detected, you'll see a notification at the start of your next session:
+```
+  2 specialist suggestion(s) pending. Use /candidates to review.
+```
+Use `/candidates` to see pending suggestions with their name, description, confidence score, and reasoning. You can then accept or reject candidates conversationally (e.g., "accept the code-review candidate"), and Bernard will create the specialist for you.
+Candidates are auto-dismissed after 30 days if not reviewed. Up to 10 pending candidates are stored at a time.
+Storage: one JSON file per candidate in `~/.local/share/bernard/specialist-candidates/`.
+### Critic Mode _(v0.6.0+)_
+Critic mode adds planning, proactive scratch/memory usage, and post-response verification. Toggle it during a session:
+```bash
+/critic on    # Enable critic mode
+/critic off   # Disable critic mode
+/critic       # Show current status
+```
+When enabled:
+- **Planning** — Bernard writes a plan to scratch before multi-step tasks
+- **Proactive scratch** — Accumulates findings in scratch during complex work
+- **Verification** — After tool-using responses, a critic agent reviews the work and prints a verdict (PASS/WARN/FAIL)
+The critic checks that claimed actions match actual tool calls and flags any discrepancies. It adds one extra LLM call after tool-using responses. Simple knowledge answers are not verified.
+Default: off. Recommended for high-stakes work (deployments, git operations, multi-file edits).
 ---
 ## Cron Jobs (Scheduled Tasks)
@@ -566,16 +678,19 @@ Bernard automatically compresses conversation history when it approaches 75% of
 Summarization and domain-specific fact extraction run in parallel. Scratch notes survive compression, so multi-step task progress is never lost.
+When critic mode is enabled (`/critic on`), Bernard writes plans to scratch before complex tasks and verifies outcomes after tool use. See [Critic Mode](#critic-mode).
 ### RAG Memory
 Bernard has a Retrieval-Augmented Generation (RAG) system that provides long-term memory beyond the current session:
-- **Domain-specific extraction** — facts are extracted into three specialized domains, each with its own LLM prompt:
+- **Domain-specific extraction** — facts are extracted into four specialized domains, each with its own LLM prompt:
   - **Tool Usage Patterns** — command sequences, error resolutions, build/deploy workflows
   - **User Preferences** — communication style, workflow conventions, repeated instructions
   - **General Knowledge** — project structure, architecture decisions, environment info
-- **Parallel extraction** — all three domain extractors run concurrently via `Promise.allSettled`, so wall-clock latency is roughly the same as a single extraction
-- **Per-domain retrieval** — search returns up to 3 results per domain (9 total max), preventing any single domain from crowding out others
+  - **Conversation Summaries** — what was discussed, approaches taken, tools/specialists/routines used, outcomes
+- **Parallel extraction** — all four domain extractors run concurrently via `Promise.allSettled`, so wall-clock latency is roughly the same as a single extraction
+- **Per-domain retrieval** — search returns up to 5 results per domain (15 total max), preventing any single domain from crowding out others
 - **Domain-grouped context** — recalled facts are organized by domain with headings in the system prompt, giving the LLM clear signal about what kind of knowledge each fact represents
 - **Semantic search** — on each new user message, relevant facts are retrieved and injected into the system prompt as "Recalled Context"
 - **Local embeddings** — uses FastEmbed (`AllMiniLML6V2`, 384 dimensions) for fully local embedding computation
@@ -619,6 +734,8 @@ Bernard stores all data in `~/.bernard/`:
 ├── memory/                      # Persistent memories (*.md)
 ├── models/                      # Embedding model cache (fastembed)
 ├── routines/                    # Saved routines (*.json)
+├── specialists/                 # Saved specialist profiles (*.json)
+├── specialist-candidates/       # Auto-detected specialist suggestions (*.json)
 ├── rag/
 │   └── memories.json            # RAG fact embeddings
 └── cron/
@@ -687,8 +804,11 @@ src/
 ├── rag.ts                # RAG store (domain-tagged embeddings + per-domain search)
 ├── embeddings.ts         # FastEmbed wrapper
 ├── routines.ts           # RoutineStore (named multi-step workflows)
+├── specialists.ts        # SpecialistStore (reusable expert profiles)
+├── specialist-candidates.ts  # CandidateStore (auto-detected suggestions)
+├── specialist-detector.ts    # LLM-based specialist pattern detection
 ├── mcp.ts                # MCP server manager
-├── rag-worker.ts         # Background RAG fact extraction worker
+├── rag-worker.ts         # Background RAG fact extraction + candidate detection
 ├── setup.ts              # First-time setup wizard
 ├── history.ts            # Conversation save/load
 ├── logger.ts             # Debug file logger
@@ -708,7 +828,11 @@ src/
 │   ├── mcp.ts            # MCP config (stdio)
 │   ├── mcp-url.ts        # MCP config (URL-based)
 │   ├── routine.ts        # Routine management tool
-│   └── subagent.ts       # Parallel sub-agents
+│   ├── specialist.ts     # Specialist management tool
+│   ├── specialist-run.ts # Specialist execution (sub-agent with custom persona)
+│   ├── subagent.ts       # Parallel sub-agents
+│   ├── task.ts           # Isolated task execution (structured JSON output)
+│   └── agent-pool.ts     # Shared concurrency pool for agents, tasks, and specialists
 └── cron/
     ├── cli.ts            # Cron CLI subcommands
     ├── types.ts          # Cron type definitions

package/dist/agent.d.ts CHANGED Viewed

@@ -5,6 +5,8 @@ import type { BernardConfig } from './config.js';
 import type { MemoryStore } from './memory.js';
 import type { RAGStore, RAGSearchResult } from './rag.js';
 import { RoutineStore, type RoutineSummary } from './routines.js';
+import { SpecialistStore, type SpecialistSummary } from './specialists.js';
+import { type SpecialistMatch } from './specialist-matcher.js';
 /**
  * Assembles the full system prompt including base instructions, memory context, and MCP status.
  * @internal Exported for testing only.
@@ -13,8 +15,15 @@ import { RoutineStore, type RoutineSummary } from './routines.js';
  * @param mcpServerNames - Names of currently connected MCP servers, if any
  * @param ragResults - RAG search results to include as recalled context
  * @param routineSummaries - Routine summaries to list in the prompt
+ * @param specialistSummaries - Specialist summaries to list in the prompt
+ * @param specialistMatches - Pre-computed specialist match results for the current input
  */
-export declare function buildSystemPrompt(config: BernardConfig, memoryStore: MemoryStore, mcpServerNames?: string[], ragResults?: RAGSearchResult[], routineSummaries?: RoutineSummary[]): string;
+export declare function buildSystemPrompt(config: BernardConfig, memoryStore: MemoryStore, mcpServerNames?: string[], ragResults?: RAGSearchResult[], routineSummaries?: RoutineSummary[], specialistSummaries?: SpecialistSummary[], specialistMatches?: SpecialistMatch[]): string;
+export interface CompactResult {
+    compacted: boolean;
+    tokensBefore: number;
+    tokensAfter: number;
+}
 /**
  * Core agent that manages a multi-step conversation loop with tool calling via the Vercel AI SDK.
  *
@@ -37,7 +46,8 @@ export declare class Agent {
     private lastStepPromptTokens;
     private spinnerStats;
     private routineStore;
-    constructor(config: BernardConfig, toolOptions: ToolOptions, memoryStore: MemoryStore, mcpTools?: Record<string, any>, mcpServerNames?: string[], alertContext?: string, initialHistory?: CoreMessage[], ragStore?: RAGStore, routineStore?: RoutineStore);
+    private specialistStore;
+    constructor(config: BernardConfig, toolOptions: ToolOptions, memoryStore: MemoryStore, mcpTools?: Record<string, any>, mcpServerNames?: string[], alertContext?: string, initialHistory?: CoreMessage[], ragStore?: RAGStore, routineStore?: RoutineStore, specialistStore?: SpecialistStore);
     /** Returns the current conversation message history. */
     getHistory(): CoreMessage[];
     /** Returns the RAG search results from the most recent `processInput` call. */
@@ -46,6 +56,8 @@ export declare class Agent {
     abort(): void;
     /** Attaches a spinner stats object that will be updated with token usage during generation. */
     setSpinnerStats(stats: SpinnerStats): void;
+    /** Updates the alert context injected into the system prompt (e.g., specialist candidates). */
+    setAlertContext(ctx: string): void;
     /**
      * Sends user input through the agent loop: RAG retrieval, context compression, LLM generation, and tool execution.
      *
@@ -55,6 +67,12 @@ export declare class Agent {
      * @throws Error wrapping the underlying API error if generation fails for non-abort, non-overflow reasons
      */
     processInput(userInput: string): Promise<void>;
+    /** Extracts a structured log of tool calls from generateText step results. */
+    private extractToolCallLog;
+    /** Runs the critic agent to verify the main agent's response against actual tool calls. */
+    private runCritic;
+    /** Compresses conversation history in-place, returning token usage stats. */
+    compactHistory(): Promise<CompactResult>;
     /** Resets conversation history, scratch notes, and RAG tracking state for a fresh session. */
     clearHistory(): void;
 }