npm - bernard-agent - Versions diffs - 0.7.0 → 0.8.0 - Mend

bernard-agent 0.7.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

package/README.md +96 -50
package/dist/agent.d.ts +7 -4
package/dist/agent.js +88 -127
package/dist/agent.js.map +1 -1
package/dist/config.d.ts +13 -1
package/dist/config.js +66 -4
package/dist/config.js.map +1 -1
package/dist/critic.d.ts +32 -0
package/dist/critic.js +118 -0
package/dist/critic.js.map +1 -0
package/dist/cron/runner.js +54 -34
package/dist/cron/runner.js.map +1 -1
package/dist/output.d.ts +6 -4
package/dist/output.js +31 -10
package/dist/output.js.map +1 -1
package/dist/overlap-checker.d.ts +61 -0
package/dist/overlap-checker.js +106 -0
package/dist/overlap-checker.js.map +1 -0
package/dist/pac.d.ts +37 -0
package/dist/pac.js +69 -0
package/dist/pac.js.map +1 -0
package/dist/rag-query.js +2 -1
package/dist/rag-query.js.map +1 -1
package/dist/rag-worker.js +4 -3
package/dist/rag-worker.js.map +1 -1
package/dist/repl.js +160 -4
package/dist/repl.js.map +1 -1
package/dist/specialist-candidates.d.ts +13 -0
package/dist/specialist-candidates.js.map +1 -1
package/dist/specialist-detector.d.ts +20 -5
package/dist/specialist-detector.js +72 -9
package/dist/specialist-detector.js.map +1 -1
package/dist/specialist-matcher.d.ts +5 -0
package/dist/specialist-matcher.js +1 -0
package/dist/specialist-matcher.js.map +1 -1
package/dist/tools/datetime.d.ts +6 -0
package/dist/tools/datetime.js +38 -1
package/dist/tools/datetime.js.map +1 -1
package/dist/tools/mcp-url.js +1 -1
package/dist/tools/mcp-url.js.map +1 -1
package/dist/tools/specialist-run.js +37 -12
package/dist/tools/specialist-run.js.map +1 -1
package/dist/tools/subagent.js +37 -12
package/dist/tools/subagent.js.map +1 -1
package/package.json +1 -1

package/README.md CHANGED Viewed

@@ -129,19 +129,22 @@ bernard providers
 Bernard loads `.env` from the current directory first, then falls back to `~/.bernard/.env`.
-| Variable                | Description                                           | Default                   |
-| ----------------------- | ----------------------------------------------------- | ------------------------- |
-| `BERNARD_PROVIDER`      | LLM provider (`anthropic`, `openai`, `xai`)           | `anthropic`               |
-| `BERNARD_MODEL`         | Model name                                            | Provider-specific default |
-| `BERNARD_MAX_TOKENS`    | Max response tokens                                   | `4096`                    |
-| `BERNARD_SHELL_TIMEOUT` | Shell command timeout (ms)                            | `30000`                   |
-| `BERNARD_TOKEN_WINDOW`  | Context window size for compression (0 = auto-detect) | `0`                       |
-| `BERNARD_RAG_ENABLED`   | Enable the RAG memory system                          | `true`                    |
-| `BERNARD_CRITIC_MODE`   | Enable critic mode for response verification          | `false`                   |
-| `BERNARD_DEBUG`         | Enable debug logging                                  | unset                     |
-| `ANTHROPIC_API_KEY`     | Anthropic API key                                     | —                         |
-| `OPENAI_API_KEY`        | OpenAI API key                                        | —                         |
-| `XAI_API_KEY`           | xAI API key                                           | —                         |
+| Variable                          | Description                                              | Default                   |
+| --------------------------------- | -------------------------------------------------------- | ------------------------- |
+| `BERNARD_PROVIDER`                | LLM provider (`anthropic`, `openai`, `xai`)              | `anthropic`               |
+| `BERNARD_MODEL`                   | Model name                                               | Provider-specific default |
+| `BERNARD_MAX_TOKENS`              | Max response tokens                                      | `4096`                    |
+| `BERNARD_SHELL_TIMEOUT`           | Shell command timeout (ms)                               | `30000`                   |
+| `BERNARD_TOKEN_WINDOW`            | Context window size for compression (0 = auto-detect)    | `0`                       |
+| `BERNARD_MAX_STEPS`               | Max agent loop iterations per request                    | `25`                      |
+| `BERNARD_RAG_ENABLED`             | Enable the RAG memory system                             | `true`                    |
+| `BERNARD_CRITIC_MODE`             | Enable critic mode for response verification             | `false`                   |
+| `BERNARD_AUTO_CREATE_SPECIALISTS` | Auto-create specialists above confidence threshold       | `false`                   |
+| `BERNARD_AUTO_CREATE_THRESHOLD`   | Confidence threshold for auto-creating specialists (0-1) | `0.8`                     |
+| `BERNARD_DEBUG`                   | Enable debug logging                                     | unset                     |
+| `ANTHROPIC_API_KEY`               | Anthropic API key                                        | —                         |
+| `OPENAI_API_KEY`                  | OpenAI API key                                           | —                         |
+| `XAI_API_KEY`                     | xAI API key                                              | —                         |
 ### Providers and Models
@@ -157,11 +160,12 @@ You can switch providers and models at any time during a session with `/provider
 Options can be changed during a session with `/options` or persisted to `~/.bernard/preferences.json`:
-| Option          | Default | Description                                           |
-| --------------- | ------- | ----------------------------------------------------- |
-| `max-tokens`    | `4096`  | Maximum tokens per AI response                        |
-| `shell-timeout` | `30000` | Shell command timeout in milliseconds                 |
-| `token-window`  | `0`     | Context window size for compression (0 = auto-detect) |
+| Option          | Default | Description                                                  |
+| --------------- | ------- | ------------------------------------------------------------ |
+| `max-tokens`    | `4096`  | Maximum tokens per AI response                               |
+| `max-steps`     | `25`    | Maximum agent loop iterations per request (tool call chains) |
+| `shell-timeout` | `30000` | Shell command timeout in milliseconds                        |
+| `token-window`  | `0`     | Context window size for compression (0 = auto-detect)        |
 From the CLI:
@@ -225,28 +229,30 @@ Features:
 ### REPL Slash Commands
-| Command           | Description                                                               |
-| ----------------- | ------------------------------------------------------------------------- |
-| `/help`           | Show available commands                                                   |
-| `/clear`          | Clear conversation history and scratch notes                              |
-| `/compact`        | Compress conversation history in-place                                    |
-| `/task`           | Run an isolated task (no history, structured output)                      |
-| `/memory`         | List all persistent memories                                              |
-| `/scratch`        | List session scratch notes                                                |
-| `/mcp`            | List connected MCP servers and their tools                                |
-| `/cron`           | Show cron jobs and daemon status                                          |
-| `/rag`            | Show RAG memory stats and recent facts                                    |
-| `/provider`       | Switch LLM provider interactively                                         |
-| `/model`          | Switch model for the current provider                                     |
-| `/theme`          | Switch color theme                                                        |
-| `/routines`       | List saved routines                                                       |
-| `/create-routine` | Create a routine with guided AI assistance                                |
-| `/create-task`    | Create a task routine (`task-` prefixed) with guided AI assistance        |
-| `/specialists`    | List saved specialists                                                    |
-| `/candidates`     | Review auto-detected specialist suggestions _(v0.6.0+)_                   |
-| `/critic`         | Toggle critic mode for response verification (on/off)                     |
-| `/options`        | View and modify runtime options (max-tokens, shell-timeout, token-window) |
-| `/exit`           | Quit Bernard (also: `exit`, `quit`)                                       |
+| Command           | Description                                                                          |
+| ----------------- | ------------------------------------------------------------------------------------ |
+| `/help`           | Show available commands                                                              |
+| `/clear`          | Clear conversation history and scratch notes                                         |
+| `/compact`        | Compress conversation history in-place                                               |
+| `/task`           | Run an isolated task (no history, structured output)                                 |
+| `/memory`         | List all persistent memories                                                         |
+| `/scratch`        | List session scratch notes                                                           |
+| `/mcp`            | List connected MCP servers and their tools                                           |
+| `/cron`           | Show cron jobs and daemon status                                                     |
+| `/rag`            | Show RAG memory stats and recent facts                                               |
+| `/provider`       | Switch LLM provider interactively                                                    |
+| `/model`          | Switch model for the current provider                                                |
+| `/theme`          | Switch color theme                                                                   |
+| `/routines`       | List saved routines                                                                  |
+| `/create-routine` | Create a routine with guided AI assistance                                           |
+| `/create-task`    | Create a task routine (`task-` prefixed) with guided AI assistance                   |
+| `/specialists`    | List saved specialists                                                               |
+| `/candidates`     | Review auto-detected specialist suggestions _(v0.6.0+)_                              |
+| `/critic`         | Toggle critic mode for response verification (on/off)                                |
+| `/agent-options`  | Configure auto-creation for specialist agents                                        |
+| `/options`        | View and modify runtime options (max-tokens, max-steps, shell-timeout, token-window) |
+| `/debug`          | Print a diagnostic report for troubleshooting (no secrets leaked)                    |
+| `/exit`           | Quit Bernard (also: `exit`, `quit`)                                                  |
 Type `/{routine-id}` or `/{specialist-id}` to invoke a saved routine or specialist directly (e.g., `/deploy-staging`).
@@ -361,7 +367,7 @@ bernard> check the disk usage on /, look up the weather in Austin, and count lin
   ...
 ```
-Up to 4 concurrent sub-agents. Each gets 10 max steps. Color-coded output in the terminal. Sub-agents accept per-invocation provider/model overrides to use a different LLM than the main session.
+Up to 4 concurrent sub-agents. Each gets 50% of the main agent's step budget (e.g. 13 steps when `max-steps` is 25). Color-coded output in the terminal. Sub-agents accept per-invocation provider/model overrides to use a different LLM than the main session.
 ### Tasks _(v0.6.0+)_
@@ -481,6 +487,19 @@ When candidates are detected, you'll see a notification at the start of your nex
 Use `/candidates` to see pending suggestions with their name, description, confidence score, and reasoning. You can then accept or reject candidates conversationally (e.g., "accept the code-review candidate"), and Bernard will create the specialist for you.
+**Overlap detection** — Before suggesting a new specialist, Bernard computes a token-based similarity score against all existing specialists and pending candidates. If the overlap exceeds 60%, the candidate is suppressed. When a candidate partially overlaps with an existing specialist, Bernard may suggest enhancing the existing specialist instead.
+**Auto-creation** — You can enable automatic specialist creation for high-confidence candidates:
+```bash
+/agent-options auto-create on       # Enable auto-creation
+/agent-options auto-create off      # Disable auto-creation
+/agent-options threshold 0.85       # Set confidence threshold (0-1)
+/agent-options                      # Show current settings
+```
+Or via environment variables: `BERNARD_AUTO_CREATE_SPECIALISTS=true` and `BERNARD_AUTO_CREATE_THRESHOLD=0.85`.
 Candidates are auto-dismissed after 30 days if not reviewed. Up to 10 pending candidates are stored at a time.
 Storage: one JSON file per candidate in `~/.local/share/bernard/specialist-candidates/`.
@@ -503,6 +522,12 @@ When enabled:
 The critic checks that claimed actions match actual tool calls and flags any discrepancies. It adds one extra LLM call after tool-using responses. Simple knowledge answers are not verified.
+**PAC System (Plan-Act-Critic)** — When critic mode is enabled, sub-agents and specialists also get critic verification via a reusable PAC loop. The PAC loop runs the critic after each sub-agent/specialist execution, and if the critic finds issues, it retries the task with feedback (up to 2 retries). This applies to:
+- Sub-agents (`agent` tool)
+- Specialist runs (`specialist_run` tool)
+- Cron job executions (daemon mode)
 Default: off. Recommended for high-stakes work (deployments, git operations, multi-file edits).
 ---
@@ -697,6 +722,8 @@ Bernard automatically compresses conversation history when it approaches 75% of
 Summarization and domain-specific fact extraction run in parallel. Scratch notes survive compression, so multi-step task progress is never lost.
+**Auto-continue on truncation:** If a response hits the `max-tokens` limit and is cut off, Bernard automatically continues where it left off (up to 3 continuations). After completing, it shows a recommended `max-tokens` value based on actual usage. If the response is still incomplete after 3 continuations, a warning is shown with instructions to increase the limit via `/options max-tokens <value>`.
 When critic mode is enabled (`/critic on`), Bernard writes plans to scratch before complex tasks and verifies outcomes after tool use. See [Critic Mode](#critic-mode).
 ### RAG Memory
@@ -741,17 +768,17 @@ Storage: `~/.bernard/conversation-history.json`
 ## File Structure
-Bernard stores all data in `~/.bernard/`:
+Bernard follows the [XDG Base Directory Specification](https://specifications.freedesktop.org/basedir/latest/), splitting files across four standard directories:
 ```
-~/.bernard/
-├── keys.json                    # API keys (mode 0600)
+~/.config/bernard/               # Config (XDG_CONFIG_HOME)
 ├── preferences.json             # Provider, model, options
+├── keys.json                    # API keys (mode 0600)
 ├── .env                         # Fallback environment config
-├── mcp.json                     # MCP server configuration
-├── conversation-history.json    # Last session (for --resume)
+└── mcp.json                     # MCP server configuration
+~/.local/share/bernard/          # Data (XDG_DATA_HOME)
 ├── memory/                      # Persistent memories (*.md)
-├── models/                      # Embedding model cache (fastembed)
 ├── routines/                    # Saved routines (*.json)
 ├── specialists/                 # Saved specialist profiles (*.json)
 ├── specialist-candidates/       # Auto-detected specialist suggestions (*.json)
@@ -759,12 +786,22 @@ Bernard stores all data in `~/.bernard/`:
 │   └── memories.json            # RAG fact embeddings
 └── cron/
     ├── jobs.json                # Scheduled jobs
-    ├── daemon.pid               # Daemon process ID
-    ├── daemon.log               # Daemon output (rotates at 1MB)
     ├── logs/                    # Per-job execution logs
     └── alerts/                  # Cron alert files
+~/.cache/bernard/                # Cache (XDG_CACHE_HOME)
+├── models/                      # Embedding model cache (fastembed)
+└── update-check.json            # Update check state
+~/.local/state/bernard/          # State (XDG_STATE_HOME)
+├── conversation-history.json    # Last session (for --resume)
+├── logs/                        # Debug log files (*.jsonl)
+├── cron-daemon.pid              # Daemon process ID
+└── cron-daemon.log              # Daemon output (rotates at 1MB)
 ```
+Override all directories with a single flat path: `BERNARD_HOME=/path`. On first run, files are auto-migrated from legacy `~/.bernard/` to XDG locations.
 ---
 ## Development
@@ -796,6 +833,10 @@ BERNARD_DEBUG=1 bernard
 Logs are written to `.logs/YYYY-MM-DD.log` in JSON format, covering agent processing, RAG operations, context compression, tool execution, and MCP operations.
+### Diagnostic Report
+Use `/debug` in the REPL to print a diagnostic report useful for troubleshooting. The report includes runtime info (Bernard version, Node.js version, OS), LLM configuration, API key status (configured/not set — keys are never shown), MCP server status, RAG/memory/cron state, conversation stats, active settings, and file paths. No secrets are included in the output.
 ### Adding a New Provider
 1. Install the AI SDK provider package (e.g., `npm install @ai-sdk/google`)
@@ -815,6 +856,7 @@ src/
 ├── repl.ts               # Interactive REPL loop
 ├── agent.ts              # Agent class (generateText loop)
 ├── config.ts             # Config loading and validation
+├── critic.ts             # Critic agent for response verification
 ├── output.ts             # Terminal formatting (Chalk)
 ├── theme.ts              # Color theme definitions and switching
 ├── memory.ts             # MemoryStore (persistent + scratch)
@@ -826,7 +868,11 @@ src/
 ├── specialists.ts        # SpecialistStore (reusable expert profiles)
 ├── specialist-candidates.ts  # CandidateStore (auto-detected suggestions)
 ├── specialist-detector.ts    # LLM-based specialist pattern detection
+├── specialist-matcher.ts    # Keyword scorer for specialist auto-dispatch
 ├── mcp.ts                # MCP server manager
+├── overlap-checker.ts    # Token-based Jaccard overlap for specialist dedup
+├── pac.ts                # Plan-Act-Critic loop wrapper
+├── paths.ts              # Centralized XDG file path resolution
 ├── rag-worker.ts         # Background RAG fact extraction + candidate detection
 ├── setup.ts              # First-time setup wizard
 ├── history.ts            # Conversation save/load
@@ -883,7 +929,7 @@ Found a bug? Please [open an issue](https://github.com/phillt/bernard/issues/new
 - Steps to reproduce the problem
 - Expected vs. actual behavior
-- Your environment (OS, Node version, Bernard version, provider/model)
+- Your environment — run `/debug` in the REPL and paste the output
 - Any relevant logs (run with `BERNARD_DEBUG=1` for verbose output)
 ## Third-Party Licenses

package/dist/agent.d.ts CHANGED Viewed

@@ -49,6 +49,8 @@ export declare class Agent {
     private routineStore;
     private specialistStore;
     private candidateStore?;
+    private stepLimitHitCount;
+    private lastStepLimitHit;
     constructor(config: BernardConfig, toolOptions: ToolOptions, memoryStore: MemoryStore, mcpTools?: Record<string, any>, mcpServerNames?: string[], alertContext?: string, initialHistory?: CoreMessage[], ragStore?: RAGStore, routineStore?: RoutineStore, specialistStore?: SpecialistStore, candidateStore?: CandidateStoreReader);
     /** Returns the current conversation message history. */
     getHistory(): CoreMessage[];
@@ -56,6 +58,11 @@ export declare class Agent {
     getLastRAGResults(): RAGSearchResult[];
     /** Cancels the in-flight LLM request, if any. Safe to call when no request is active. */
     abort(): void;
+    /** Returns step limit hit info from last processInput, or null if limit wasn't hit. */
+    getStepLimitHit(): {
+        currentLimit: number;
+        hitCount: number;
+    } | null;
     /** Attaches a spinner stats object that will be updated with token usage during generation. */
     setSpinnerStats(stats: SpinnerStats): void;
     /** Updates the alert context injected into the system prompt (e.g., specialist candidates). */
@@ -69,10 +76,6 @@ export declare class Agent {
      * @throws Error wrapping the underlying API error if generation fails for non-abort, non-overflow reasons
      */
     processInput(userInput: string): Promise<void>;
-    /** Extracts a structured log of tool calls from generateText step results. */
-    private extractToolCallLog;
-    /** Runs the critic agent to verify the main agent's response against actual tool calls. */
-    private runCritic;
     /** Compresses conversation history in-place, returning token usage stats. */
     compactHistory(): Promise<CompactResult>;
     /** Resets conversation history, scratch notes, and RAG tracking state for a fresh session. */

package/dist/agent.js CHANGED Viewed

@@ -9,6 +9,7 @@ const subagent_js_1 = require("./tools/subagent.js");
 const task_js_1 = require("./tools/task.js");
 const output_js_1 = require("./output.js");
 const logger_js_1 = require("./logger.js");
+const critic_js_1 = require("./critic.js");
 const context_js_1 = require("./context.js");
 const routines_js_1 = require("./routines.js");
 const specialists_js_1 = require("./specialists.js");
@@ -16,6 +17,7 @@ const specialist_run_js_1 = require("./tools/specialist-run.js");
 const specialist_matcher_js_1 = require("./specialist-matcher.js");
 const memory_context_js_1 = require("./memory-context.js");
 const rag_query_js_1 = require("./rag-query.js");
+const datetime_js_1 = require("./tools/datetime.js");
 const BASE_SYSTEM_PROMPT = `# Identity
 You are Bernard, a local CLI AI agent with direct shell access, persistent memory, and a suite of tools for system tasks, web reading, and scheduling.
@@ -38,6 +40,14 @@ You exist only while processing a user message. Each response is a single turn:
 - When uncertain about intent, ask a clarifying question rather than guessing.
 - If a request is ambiguous or risky, state your assumptions before acting.
+## Planning
+Before executing any task that requires more than two tool calls:
+1. Briefly outline your plan in your response text — what steps you intend to take and in what order.
+2. Execute the plan step by step. If the approach needs to change, state the revised plan before continuing.
+3. After completion, summarize what was done and the outcome.
+This makes your reasoning visible and reduces errors on multi-step tasks. For simple tasks (1-2 tool calls), skip the plan and act directly.
 ## Tool Execution Integrity
 - NEVER simulate, fabricate, or narrate tool execution. If a task requires running a command, you MUST call the shell tool — do not write prose describing what a command "would return" or pretend you already ran it.
 - Your text output can only describe results you actually received from a tool call in this conversation. If you have not called a tool, you have no results to report.
@@ -115,10 +125,10 @@ const CRITIC_MODE_PROMPT = `## Reliability Mode (Active)
 You are operating with enhanced reliability. Follow these additional rules:
-### Planning
-Before executing any task that requires more than two tool calls, file modifications, git operations, or multi-step research:
-1. Write a brief plan to scratch (key: "plan") listing the steps you intend to take and the expected outcomes.
-2. Reference this plan during execution. Update it if the approach changes.
+### Enhanced Planning (Scratch-Based)
+In addition to stating your plan in text, persist it to scratch for reliability:
+1. Write your plan to scratch (key: "plan") listing steps and expected outcomes.
+2. Reference and update the scratch plan during execution.
 3. After completion, delete the plan from scratch to keep it clean.
 ### Proactive Scratch Usage
@@ -133,32 +143,6 @@ Before executing any task that requires more than two tool calls, file modificat
 ### Verification
 - After any mutation (file write, git commit, API call), immediately verify the outcome with a read-only command.
 - Your work will be reviewed by a critic agent afterward. Only claim what you can prove with tool output.`;
-const CRITIC_TOTAL_RESULT_BUDGET = 8000;
-const CRITIC_MIN_RESULT_CHARS = 500;
-const CRITIC_MAX_RESPONSE_LENGTH = 4000;
-const CRITIC_MAX_ARGS_LENGTH = 1000;
-const CRITIC_SYSTEM_PROMPT = `You are a verification agent for Bernard, a CLI AI assistant. Your role is to review the agent's work and verify its integrity.
-You will receive:
-1. The user's original request
-2. The agent's final text response
-3. A log of actual tool calls made (tool name, arguments, results) — note that tool results, arguments, and the agent response may be truncated for context efficiency
-Your job:
-- Check if the agent's claims in its response are supported by actual tool call results.
-- Verify that tool calls were actually made for actions the agent claims to have performed.
-- Flag any claims not backed by tool evidence (e.g., "I created the file" but no shell/write tool call).
-- Flag any tool results that suggest failure but were reported as success.
-- Tool results and the agent response may be truncated for context efficiency. If a tool result appears cut off, do not treat the missing portion as evidence of failure. Only flag FAIL when there is positive evidence of failure (e.g., an error message visible in the output), not merely the absence of success confirmation in truncated output.
-- Check if the response addresses the user's original intent.
-Output format (plain text, concise):
-VERDICT: PASS | WARN | FAIL
-[1-3 sentence explanation]
-[If WARN/FAIL: specific issues found]
-Be strict but fair. Not every response needs tool calls — knowledge answers are fine. Focus on cases where the agent *claims* to have done something via tools.`;
-const CRITIC_MAX_RETRIES = 2;
 /**
  * Assembles the full system prompt including base instructions, memory context, and MCP status.
  * @internal Exported for testing only.
@@ -171,13 +155,7 @@ const CRITIC_MAX_RETRIES = 2;
  * @param specialistMatches - Pre-computed specialist match results for the current input
  */
 function buildSystemPrompt(config, memoryStore, mcpServerNames, ragResults, routineSummaries, specialistSummaries, specialistMatches) {
-    const today = new Date().toLocaleDateString('en-US', {
-        weekday: 'long',
-        year: 'numeric',
-        month: 'long',
-        day: 'numeric',
-    });
-    let prompt = BASE_SYSTEM_PROMPT + `\n\nToday's date is ${today}.`;
+    let prompt = BASE_SYSTEM_PROMPT + `\n\nCurrent date and time: ${(0, datetime_js_1.formatCurrentDateTime)()}.`;
     prompt += `\nYou are running as provider: ${config.provider}, model: ${config.model}. The user can switch with /provider and /model.`;
     if (config.criticMode) {
         prompt += '\n\n' + CRITIC_MODE_PROMPT;
@@ -268,6 +246,8 @@ class Agent {
     routineStore;
     specialistStore;
     candidateStore;
+    stepLimitHitCount = 0;
+    lastStepLimitHit = false;
     constructor(config, toolOptions, memoryStore, mcpTools, mcpServerNames, alertContext, initialHistory, ragStore, routineStore, specialistStore, candidateStore) {
         this.config = config;
         this.toolOptions = toolOptions;
@@ -296,6 +276,12 @@ class Agent {
     abort() {
         this.abortController?.abort();
     }
+    /** Returns step limit hit info from last processInput, or null if limit wasn't hit. */
+    getStepLimitHit() {
+        if (!this.lastStepLimitHit)
+            return null;
+        return { currentLimit: this.config.maxSteps, hitCount: this.stepLimitHitCount };
+    }
     /** Attaches a spinner stats object that will be updated with token usage during generation. */
     setSpinnerStats(stats) {
         this.spinnerStats = stats;
@@ -313,13 +299,15 @@ class Agent {
      * @throws Error wrapping the underlying API error if generation fails for non-abort, non-overflow reasons
      */
     async processInput(userInput) {
-        this.history.push({ role: 'user', content: userInput });
+        this.lastStepLimitHit = false;
+        const timestamped = (0, datetime_js_1.timestampUserMessage)(userInput);
+        this.history.push({ role: 'user', content: timestamped });
         this.abortController = new AbortController();
         this.lastStepPromptTokens = 0;
         this.lastRAGResults = [];
         try {
             // Check if context compression is needed
-            const newMessageEstimate = Math.ceil(userInput.length / 4);
+            const newMessageEstimate = Math.ceil(timestamped.length / 4);
             if ((0, context_js_1.shouldCompress)(this.lastPromptTokens, newMessageEstimate, this.config.model, this.config.tokenWindow)) {
                 (0, output_js_1.printInfo)('Compressing conversation context...');
                 this.history = await (0, context_js_1.compressHistory)(this.history, this.config, this.ragStore);
@@ -378,7 +366,7 @@ class Agent {
             const callGenerateText = (messages) => (0, ai_1.generateText)({
                 model: (0, index_js_1.getModel)(this.config.provider, this.config.model),
                 tools,
-                maxSteps: 20,
+                maxSteps: this.config.maxSteps,
                 maxTokens: this.config.maxTokens,
                 system: systemPrompt,
                 messages: messages ?? this.history,
@@ -429,25 +417,74 @@ class Agent {
                     throw apiErr;
                 }
             }
+            // Auto-continue when the model hit the maxTokens limit mid-response
+            const MAX_CONTINUATIONS = 3;
+            let continuations = 0;
+            let continuationTokens = 0;
+            while (result.finishReason === 'length' && continuations < MAX_CONTINUATIONS) {
+                if (this.abortController?.signal.aborted)
+                    break;
+                continuationTokens += result.usage?.completionTokens ?? 0;
+                continuations++;
+                (0, output_js_1.printWarning)(`Response truncated (hit ${this.config.maxTokens} token limit). Auto-continuing... (${continuations}/${MAX_CONTINUATIONS})`);
+                // Append partial response to history so continuation has context
+                const partialMessages = (0, context_js_1.truncateToolResults)(result.response.messages);
+                this.history.push(...partialMessages);
+                this.history.push({
+                    role: 'user',
+                    content: '[Your previous response was cut off. Please continue exactly where you left off.]',
+                });
+                // Restart spinner for the continuation call
+                if (this.spinnerStats) {
+                    (0, output_js_1.startSpinner)(() => (0, output_js_1.buildSpinnerMessage)(this.spinnerStats));
+                }
+                result = await callGenerateText();
+            }
+            if (continuations > 0) {
+                const totalCompletionTokens = continuationTokens + (result.usage?.completionTokens ?? 0);
+                const recommended = Math.ceil((totalCompletionTokens * 1.25) / 1024) * 1024;
+                if (result.finishReason === 'length') {
+                    (0, output_js_1.printWarning)(`Response still incomplete after ${MAX_CONTINUATIONS} continuations. ` +
+                        `Increase the token limit: /options max-tokens ${recommended}`);
+                }
+                else {
+                    (0, output_js_1.printInfo)(`Tip: Response needed ~${totalCompletionTokens} tokens (limit: ${this.config.maxTokens}). ` +
+                        `To avoid future truncation: /options max-tokens ${recommended}`);
+                }
+            }
+            // Detect maxSteps exhaustion
+            if (result.finishReason === 'tool-calls' && result.steps.length >= this.config.maxSteps) {
+                this.lastStepLimitHit = true;
+                this.stepLimitHitCount++;
+                const msg = this.stepLimitHitCount >= 2
+                    ? `Stopped at loop limit of ${this.config.maxSteps}. Use /options max-steps to adjust permanently.`
+                    : `Stopped at loop limit of ${this.config.maxSteps}.`;
+                (0, output_js_1.printWarning)(msg);
+            }
             // Run critic verification if enabled and tool calls were made
-            if (this.config.criticMode && !this.abortController?.signal.aborted) {
-                let toolCallLog = this.extractToolCallLog(result.steps);
-                if (toolCallLog.length > 0) {
+            if (this.config.criticMode &&
+                !this.abortController?.signal.aborted &&
+                !this.lastStepLimitHit) {
+                let toolLog = (0, critic_js_1.extractToolCallLog)(result.steps);
+                if (toolLog.length > 0) {
                     let retryCount = 0;
-                    while (retryCount <= CRITIC_MAX_RETRIES) {
+                    while (retryCount <= critic_js_1.CRITIC_MAX_RETRIES) {
                         if (this.abortController?.signal.aborted)
                             break;
-                        const criticResult = await this.runCritic(userInput, result.text, toolCallLog, retryCount > 0);
+                        const criticResult = await (0, critic_js_1.runCritic)(this.config, userInput, result.text, toolLog, {
+                            isRetry: retryCount > 0,
+                            abortSignal: this.abortController?.signal,
+                        });
                         // null (error) or PASS — stop looping
                         if (!criticResult || criticResult.verdict === 'PASS')
                             break;
                         // Exhausted retries — warn and stop
-                        if (retryCount >= CRITIC_MAX_RETRIES) {
+                        if (retryCount >= critic_js_1.CRITIC_MAX_RETRIES) {
                             (0, output_js_1.printInfo)('Critic still unsatisfied after maximum retries.');
                             break;
                         }
                         retryCount++;
-                        (0, output_js_1.printCriticRetry)(retryCount, CRITIC_MAX_RETRIES);
+                        (0, output_js_1.printCriticRetry)(retryCount, critic_js_1.CRITIC_MAX_RETRIES);
                         // Push current attempt's messages + critic feedback into history before retrying
                         try {
                             const truncatedResultMessages = (0, context_js_1.truncateToolResults)(result.response.messages);
@@ -457,9 +494,9 @@ class Agent {
                                 content: `The critic agent reviewed your work and found issues:\n\nVERDICT: ${criticResult.verdict}\n${criticResult.explanation}\n\nPlease address these issues and try again.`,
                             });
                             result = await callGenerateText();
-                            toolCallLog = this.extractToolCallLog(result.steps);
+                            toolLog = (0, critic_js_1.extractToolCallLog)(result.steps);
                             // If no tool calls in retry, nothing more to verify
-                            if (toolCallLog.length === 0)
+                            if (toolLog.length === 0)
                                 break;
                         }
                         catch (retryErr) {
@@ -488,84 +525,6 @@ class Agent {
             this.spinnerStats = null;
         }
     }
-    /** Extracts a structured log of tool calls from generateText step results. */
-    extractToolCallLog(steps) {
-        const entries = [];
-        for (const step of steps) {
-            // AI SDK guarantees toolResults[i] corresponds to toolCalls[i] within each step
-            for (let i = 0; i < step.toolCalls.length; i++) {
-                const tc = step.toolCalls[i];
-                const tr = step.toolResults[i];
-                entries.push({
-                    toolName: tc.toolName,
-                    args: tc.args,
-                    result: tr?.result,
-                });
-            }
-        }
-        return entries;
-    }
-    /** Runs the critic agent to verify the main agent's response against actual tool calls. */
-    async runCritic(userInput, responseText, toolCallLog, isRetry = false) {
-        try {
-            if (isRetry) {
-                (0, output_js_1.printCriticReVerify)();
-            }
-            else {
-                (0, output_js_1.printCriticStart)();
-            }
-            const perResultLimit = Math.max(CRITIC_MIN_RESULT_CHARS, Math.floor(CRITIC_TOTAL_RESULT_BUDGET / toolCallLog.length));
-            const truncatedLog = toolCallLog.map((entry) => {
-                const raw = typeof entry.result === 'string' ? entry.result : JSON.stringify(entry.result ?? null);
-                const truncated = raw.length > perResultLimit ? raw.slice(0, perResultLimit) + '...' : raw;
-                return {
-                    toolName: entry.toolName,
-                    args: entry.args,
-                    result: truncated,
-                };
-            });
-            const truncatedResponse = responseText.length > CRITIC_MAX_RESPONSE_LENGTH
-                ? responseText.slice(0, CRITIC_MAX_RESPONSE_LENGTH) + '\n... (truncated)'
-                : responseText;
-            const criticMessage = `## Original User Request
-${userInput}
-## Agent Response
-${truncatedResponse}
-## Tool Call Log (${truncatedLog.length} calls)
-${truncatedLog
-                .map((e, i) => {
-                const argsStr = JSON.stringify(e.args);
-                const truncatedArgs = argsStr.length > CRITIC_MAX_ARGS_LENGTH
-                    ? argsStr.slice(0, CRITIC_MAX_ARGS_LENGTH) + '...'
-                    : argsStr;
-                return `${i + 1}. ${e.toolName}(${truncatedArgs})\n   Result: ${e.result}`;
-            })
-                .join('\n\n')}`;
-            const result = await (0, ai_1.generateText)({
-                model: (0, index_js_1.getModel)(this.config.provider, this.config.model),
-                system: CRITIC_SYSTEM_PROMPT,
-                messages: [{ role: 'user', content: criticMessage }],
-                maxSteps: 1,
-                maxTokens: 1024,
-                abortSignal: this.abortController?.signal,
-            });
-            if (result.text) {
-                const parsed = (0, output_js_1.parseCriticVerdict)(result.text);
-                (0, output_js_1.printCriticVerdict)(result.text);
-                return {
-                    verdict: parsed.verdict,
-                    explanation: parsed.explanation,
-                };
-            }
-            return null;
-        }
-        catch (err) {
-            (0, logger_js_1.debugLog)('agent:critic:error', err instanceof Error ? err.message : String(err));
-            return null;
-        }
-    }
     /** Compresses conversation history in-place, returning token usage stats. */
     async compactHistory() {
         const tokensBefore = (0, context_js_1.estimateHistoryTokens)(this.history);
@@ -584,6 +543,8 @@ ${truncatedLog
         this.memoryStore.clearScratch();
         this.previousRAGFacts = new Set();
         this.lastRAGResults = [];
+        this.stepLimitHitCount = 0;
+        this.lastStepLimitHit = false;
     }
 }
 exports.Agent = Agent;