RubyGems - ollama_agent - Versions diffs - 0.1.0 → 0.3.0 - Mend

ollama_agent 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (97) hide show

checksums.yaml +4 -4
data/.cursor/skills/ruby-code-review-levels/SKILL.md +115 -0
data/.cursor/skills/self-improvement-sandbox-safety/SKILL.md +65 -0
data/.env.example +25 -0
data/CHANGELOG.md +40 -0
data/README.md +135 -4
data/docs/ARCHITECTURE.md +42 -0
data/docs/PERFORMANCE.md +22 -0
data/docs/SESSIONS.md +48 -0
data/docs/TOOLS.md +53 -0
data/docs/TOOL_RUNTIME.md +154 -0
data/docs/superpowers/plans/2026-03-26-production-ready-ollama-agent.md +2454 -0
data/docs/superpowers/specs/2026-03-26-production-ready-ollama-agent-design.md +400 -0
data/lib/ollama_agent/agent/agent_config.rb +53 -0
data/lib/ollama_agent/agent/client_wiring.rb +76 -0
data/lib/ollama_agent/agent/prompt_wiring.rb +55 -0
data/lib/ollama_agent/agent/session_wiring.rb +53 -0
data/lib/ollama_agent/agent.rb +148 -73
data/lib/ollama_agent/agent_prompt.rb +31 -1
data/lib/ollama_agent/chat_stream_carry.rb +88 -0
data/lib/ollama_agent/chat_stream_thinking_format.rb +29 -0
data/lib/ollama_agent/cli.rb +394 -4
data/lib/ollama_agent/console.rb +177 -5
data/lib/ollama_agent/context/manager.rb +100 -0
data/lib/ollama_agent/context/token_counter.rb +33 -0
data/lib/ollama_agent/diff_path_validator.rb +32 -10
data/lib/ollama_agent/env_config.rb +44 -0
data/lib/ollama_agent/external_agents/TODO-plan.md +1948 -0
data/lib/ollama_agent/external_agents/argv_interp.rb +21 -0
data/lib/ollama_agent/external_agents/default_agents.yml +60 -0
data/lib/ollama_agent/external_agents/delegate_logger.rb +31 -0
data/lib/ollama_agent/external_agents/delegate_timeout_status.rb +12 -0
data/lib/ollama_agent/external_agents/env_helpers.rb +38 -0
data/lib/ollama_agent/external_agents/path_validator.rb +32 -0
data/lib/ollama_agent/external_agents/probe.rb +122 -0
data/lib/ollama_agent/external_agents/registry.rb +50 -0
data/lib/ollama_agent/external_agents/runner.rb +118 -0
data/lib/ollama_agent/external_agents.rb +9 -0
data/lib/ollama_agent/global_dotenv.rb +39 -0
data/lib/ollama_agent/model_env.rb +26 -0
data/lib/ollama_agent/ollama_chat_thinking_stream.rb +41 -0
data/lib/ollama_agent/ollama_connection.rb +6 -1
data/lib/ollama_agent/patch_risk.rb +81 -0
data/lib/ollama_agent/patch_support.rb +27 -1
data/lib/ollama_agent/path_sandbox.rb +62 -0
data/lib/ollama_agent/prompt_skills/clean_ruby.md +131 -0
data/lib/ollama_agent/prompt_skills/code_review.md +112 -0
data/lib/ollama_agent/prompt_skills/design_patterns.md +56 -0
data/lib/ollama_agent/prompt_skills/manifest.yml +25 -0
data/lib/ollama_agent/prompt_skills/ollama_agent_patterns.md +132 -0
data/lib/ollama_agent/prompt_skills/rails_best_practices.md +41 -0
data/lib/ollama_agent/prompt_skills/rails_style.md +138 -0
data/lib/ollama_agent/prompt_skills/rspec.md +280 -0
data/lib/ollama_agent/prompt_skills/rubocop.md +7 -0
data/lib/ollama_agent/prompt_skills/ruby_style.md +121 -0
data/lib/ollama_agent/prompt_skills/solid.md +270 -0
data/lib/ollama_agent/prompt_skills/solid_ruby.md +223 -0
data/lib/ollama_agent/prompt_skills.rb +169 -0
data/lib/ollama_agent/repo_list.rb +4 -1
data/lib/ollama_agent/resilience/audit_logger.rb +79 -0
data/lib/ollama_agent/resilience/retry_middleware.rb +45 -0
data/lib/ollama_agent/resilience/retry_policy.rb +51 -0
data/lib/ollama_agent/ruby_index_tool_support.rb +17 -6
data/lib/ollama_agent/runner.rb +123 -0
data/lib/ollama_agent/sandboxed_tools/delegate_external.rb +62 -0
data/lib/ollama_agent/sandboxed_tools/file_read_write.rb +100 -0
data/lib/ollama_agent/sandboxed_tools/search_text.rb +60 -0
data/lib/ollama_agent/sandboxed_tools.rb +55 -116
data/lib/ollama_agent/search_backend.rb +93 -0
data/lib/ollama_agent/self_improvement/analyzer.rb +34 -0
data/lib/ollama_agent/self_improvement/improver.rb +340 -0
data/lib/ollama_agent/self_improvement/modes.rb +25 -0
data/lib/ollama_agent/self_improvement/ruby_mastery_context.rb +66 -0
data/lib/ollama_agent/self_improvement.rb +5 -0
data/lib/ollama_agent/session/session.rb +8 -0
data/lib/ollama_agent/session/store.rb +68 -0
data/lib/ollama_agent/streaming/console_streamer.rb +29 -0
data/lib/ollama_agent/streaming/hooks.rb +39 -0
data/lib/ollama_agent/tool_arguments.rb +13 -1
data/lib/ollama_agent/tool_content_parser.rb +1 -1
data/lib/ollama_agent/tool_runtime/executor.rb +34 -0
data/lib/ollama_agent/tool_runtime/json_extractor.rb +62 -0
data/lib/ollama_agent/tool_runtime/loop.rb +72 -0
data/lib/ollama_agent/tool_runtime/memory.rb +32 -0
data/lib/ollama_agent/tool_runtime/ollama_json_planner.rb +98 -0
data/lib/ollama_agent/tool_runtime/plan_extractor.rb +12 -0
data/lib/ollama_agent/tool_runtime/registry.rb +60 -0
data/lib/ollama_agent/tool_runtime/tool.rb +24 -0
data/lib/ollama_agent/tool_runtime.rb +24 -0
data/lib/ollama_agent/tools/registry.rb +55 -0
data/lib/ollama_agent/tools_schema.rb +74 -1
data/lib/ollama_agent/user_prompt.rb +35 -0
data/lib/ollama_agent/version.rb +1 -1
data/lib/ollama_agent.rb +25 -0
data/reproduce_429.rb +40 -0
data/sig/ollama_agent.rbs +111 -1
metadata +78 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 73ed8cb32bcabdd008dee785606c96aefc9bf626a3c3a1187b7bab8ef68c9627
-  data.tar.gz: 150f32efcba0bd8602cd571654bcfc8f42b99af56873a57d792202bb957b4701
+  metadata.gz: c3518f124795a2efb13b9e88abb2b7c90f935c918da3c7d30458ff2fff245f1b
+  data.tar.gz: 101d5e4bdbfbd5e6196e9744c94f1803c1ca32fea746b275cda8ad30783788d1
 SHA512:
-  metadata.gz: 18ce5ec435f2b8330e2684dd116bd33b9e399d93857a5469365902a19a3f99b61bf7d227df746b5bbe82d7b77c4f952b3e2ad8ac61b204498e4d07f469978de0
-  data.tar.gz: fd59f660457f2de80e949a098283b38eeccb94cb9daa757158d76d3f457a1f0926ef30f5f5e98ba864ef7504470842775c971ed0a373f48ba8e26b6b0ecdc129
+  metadata.gz: 1aad321bf07bb4ddea9daffeca46724e842408ac3f5d96997d77c3f81b30f10a3b1d28a4f8260bc14cde249cb63069e3fcc5aa1aecc982c0852a979733a2b1b3
+  data.tar.gz: 5d67531648baf16bf30b63e9d3726f14243d19d939f56875fb245254ad9538eded9ea4c1fe3e38cf1ded9d93b77f392f1ac4145ae0e351b654a531802696f230

data/.cursor/skills/ruby-code-review-levels/SKILL.md ADDED Viewed

@@ -0,0 +1,115 @@
+---
+name: ruby-code-review-levels
+description: >
+  Deterministic 5-level Ruby/Rails PR review checklist for production risk and architectural depth.
+---
+# Skill: Ruby Code Review Levels (Production-Oriented)
+Use this skill when the user asks for a Ruby (Rails API system) code review, refactor guidance, or PR feedback. The goal is to catch failure modes before they ship—not just fix style.
+## How to use
+For each review, walk levels in order and produce findings grouped by level. Apply these gates:
+- Reject immediately if Level 2 fails (logic/edge-case correctness).
+- Reject immediately if Level 4 violates system invariants (idempotency, determinism, state correctness).
+- Reject immediately if Level 5 lacks required safety controls (observability + external/API safety + graceful failure).
+- Otherwise, accept with Level 1/3 improvements if they are non-blocking.
+## Level 1 — Syntax & Style Review (Mechanical)
+Objective: enforce Ruby idioms, readability, and consistency.
+Checks:
+- Project RuboCop compliance using the repo’s configuration (not “default” relaxed rules).
+- Naming clarity: no generic `data`, `obj`, `thing`, `tmp`; purpose-named identifiers.
+- Ruby idioms: prefer `&&`/`||` appropriately, avoid `present?`/Rails-only helpers in hot paths, avoid awkward defensive `nil` checks when Ruby truthiness is sufficient.
+- No dead code / no commented-out blocks.
+- Method size: aim for small methods (roughly <= 10–15 LOC); split larger logic into intention-revealing private methods.
+W5H (required for non-trivial changes):
+- What: what does this code do?
+- Why: why is this approach needed here?
+- Where: where does this belong (controller/model/service/pattern)?
+- When: when does it run (sync vs async, request vs background)?
+- How it fails: what are the failure modes and what happens next?
+## Level 2 — Correctness & Edge Case Review (Logic)
+Objective: ensure correct behavior for realistic inputs and boundary conditions.
+Mandatory checks:
+- Nil handling is explicit and intentional (no accidental `nil` propagation).
+- Boundaries: empty collections, zero values, missing keys, unexpected formats.
+- Determinism: same inputs should produce the same outputs (especially for decision logic).
+- Numeric safety: for floats/rounding, ensure the behavior is stable and documented.
+- Time/zone correctness: use the correct time source (avoid mixing app time and system time incorrectly).
+- Idempotency guards (where relevant): avoid double-processing, double-exits, duplicate updates.
+- Validate invariants before doing work (e.g., existence checks, “state is allowed” checks).
+Questions:
+- What if upstream data is stale/incomplete?
+- What if state already changed between decision and action?
+- Are there hidden assumptions about ordering or concurrency?
+## Level 3 — Design & Abstraction Review (Structure)
+Objective: ensure boundaries are real and abstractions are earned.
+Rules:
+- SRP: each unit has one reason to change.
+- Don’t add useless service objects that just proxy model calls.
+- Put domain logic with models (or dedicated POROs) and keep controllers thin.
+- Extract shared logic across strategies instead of duplicating it with small variations.
+- Avoid “fake polymorphism”: only abstract when it reduces duplication or clarifies invariants.
+Review heuristics:
+- Is the abstraction describing a real concept (e.g., `Position`, `Policy`, `Validator`)?
+- Where should this logic live: model, service (orchestrator), or PORO (calculation/logic)?
+- Are naming and ownership clear enough that a future developer can extend it safely?
+## Level 4 — System & Architectural Review (Integrity)
+Objective: ensure correctness under load, failures, and concurrency.
+Focus:
+- Deterministic flows: event-driven or async sequences should be explicit.
+- Idempotency at system boundaries: prevent duplicate side effects (DB writes, external calls).
+- State management: single source of truth; avoid mixing “cache truth” and “DB truth” without reconciliation.
+- Invariants:
+  - Never place/trigger the same action twice for the same logical entity.
+  - Never transition state in an invalid order.
+  - Always re-check critical preconditions close to execution if the world can change.
+Failure modes:
+- What happens if external dependencies fail or time out?
+- Are retries safe (idempotent) and bounded (no infinite loops)?
+- Are partial failures handled with clear compensation or persistence rules?
+## Level 5 — Production Readiness Review (Safety)
+Objective: make the change safe to ship.
+Required checks:
+- Observability: structured logs (or consistent log lines), including correlation/request IDs when available.
+- External/API safety: timeouts, bounded retries with backoff, and verification of post-conditions.
+- Graceful degradation: clear error propagation; avoid silent failures.
+- Performance sanity: avoid accidental N+1, and keep work bounded (no unbounded loops).
+- Testing coverage for the critical behavior:
+  - at least one spec for the new behavior
+  - edge cases for nil/boundaries
+  - idempotency/duplicate prevention when side effects exist
+Exit criteria:
+- Level 2 + Level 4 must be green.
+- Level 5 requires explicit safety and observability for any side-effecting changes.
+## Output format (what to write back to the user)
+For each finding, include:
+- Level (1–5)
+- File and symbol/method name (where possible)
+- Why it fails or risks failure (one concise sentence)
+- Concrete fix suggestion (smallest change that addresses it)

data/.cursor/skills/self-improvement-sandbox-safety/SKILL.md ADDED Viewed

@@ -0,0 +1,65 @@
+---
+name: self-improvement-sandbox-safety
+description: >
+  Guardrails for `ollama_agent improve --mode automated` sandbox safety: minimal diffs, build-file restoration, patch validation, and safe merge rules.
+---
+# Skill: Self-Improvement Sandbox Safety
+Use this skill when the agent is asked to run or design improvements using the gem’s self-improvement flow, especially:
+- `ollama_agent self_review --mode automated`
+- `ollama_agent improve --mode automated --apply`
+This skill is about preventing “test succeeded in sandbox but tree broke” failures and preventing the model from taking risky actions in the live repo.
+## Non-negotiable invariants
+### 1) Keep build-critical files intact (or restore them)
+- Do not delete or corrupt `Gemfile`, `Gemfile.lock`, `*.gemspec`, `exe/`, or `.ruby-version` in the sandbox.
+- Assume the model may still break these during `edit_file`.
+- The system should restore build essentials before running tests. Still, avoid “creative” patching of those files.
+### 2) Run tests inside the sandbox
+- Always run `bundle exec rspec` with the working directory set to the sandbox root.
+- Ensure bundler points at the sandbox `Gemfile` (via `BUNDLE_GEMFILE`).
+### 3) Require valid unified diffs
+When producing an `edit_file` patch:
+- Include `--- a/<path>` and `+++ b/<path>` headers.
+- Use the correct ordering: `---` then `+++` then `@@ -x,y +x,y @@` hunk headers.
+- Ensure the hunk `@@` line counts match the changed block exactly.
+- Avoid legacy context-diff hunks like `--- N,M ----`.
+- Prefer minimal, local hunks: one logical change per hunk.
+### 4) Avoid merging ignored test artifacts
+With `--apply`:
+- Merge only actual source changes.
+- Never merge files that should be treated as artifacts, caches, or status trackers created during test runs (e.g. `.rspec_status`).
+- If the sandbox contains ignored test artifacts, skip them during merge.
+## Minimal-diff strategy (to avoid fragile mega-patches)
+Required behavior for `--mode automated`:
+- Keep each `edit_file` patch small.
+- Do not replace whole methods unless the patch is tiny and context is exact.
+- Do not replace multi-hundred-line hunks.
+- Prefer a sequence:
+  - add a helper (small)
+  - update one call site (small)
+  - add/adjust one focused spec (small)
+## “Patch checklist” before edit_file
+Before generating a patch, verify:
+- The target path in the patch (`+++ b/...`) matches the file you’re editing.
+- The patch contains at least one `@@ ... @@` hunk header.
+- The diff hunk order is correct (first `+++ ...` must appear before the first `@@`).
+- The patch contains the exact surrounding context lines expected by the dry-run validator.
+## Prompt addendum snippet (to include in FIX_PROMPT)
+If you need to extend an existing FIX_PROMPT, add something like:
+Minimal diffs only: fewest lines per edit_file, exact @@ counts—no whole-method or mega-hunks. Never delete build-critical files (Gemfile, Gemfile.lock, *.gemspec, exe/) and rely on restore before tests. With --apply, never merge ignored test artifacts (e.g. .rspec_status).

data/.env.example CHANGED Viewed

@@ -25,3 +25,28 @@ OLLAMA_AGENT_MODEL=gpt-oss:120b-cloud
 OLLAMA_AGENT_TIMEOUT=120
 # Parse tool JSON from assistant output (set to "1" to enable)
 OLLAMA_AGENT_PARSE_TOOL_JSON=0
+# Bundled Markdown prompt skills (default: on). Set to "0" to disable.
+OLLAMA_AGENT_SKILLS=1
+# Comma-separated manifest ids to load (omit = all bundled). Example: ruby_style,rubocop
+# OLLAMA_AGENT_SKILLS_INCLUDE=
+# Comma-separated ids to omit from bundled skills
+# OLLAMA_AGENT_SKILLS_EXCLUDE=
+# Extra SKILL-style .md files or directories (colon-separated on Unix)
+# OLLAMA_AGENT_SKILL_PATHS=
+# Extra paths from env (default: on). Set to "0" to disable.
+OLLAMA_AGENT_EXTERNAL_SKILLS=1
+# Set to "1" to add orchestrator tools (list_external_agents, delegate_to_agent) to `ask`
+OLLAMA_AGENT_ORCHESTRATOR=0
+# Optional alias for orchestrator mode on ask: set to "orchestrator"
+# OLLAMA_AGENT_MODE=
+# Optional YAML path overriding ~/.config/ollama_agent/agents.yml for external CLI definitions
+# OLLAMA_AGENT_EXTERNAL_AGENTS_CONFIG=
+# Max bytes of stdout+stderr returned from delegate_to_agent (default 100000)
+# OLLAMA_AGENT_DELEGATE_MAX_OUTPUT_BYTES=
+# Emit structured delegation audit logs to stderr (also enabled by OLLAMA_AGENT_DEBUG=1)
+# OLLAMA_AGENT_DELEGATE_LOG=0
+# Per-tool binary overrides (examples; see default_agents.yml)
+# OLLAMA_AGENT_CLAUDE_CLI_PATH=
+# OLLAMA_AGENT_GEMINI_CLI_PATH=
+# OLLAMA_AGENT_CODEX_CLI_PATH=
+# OLLAMA_AGENT_CURSOR_CLI_PATH=

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,45 @@
 ## [Unreleased]
+## [0.3.0] - 2026-04-06
+### Added
+- `ToolRuntime` — JSON plan loop for custom tools (`OllamaJsonPlanner`, registry, executor); see `docs/TOOL_RUNTIME.md`
+- Optional **ruby_mastery** context for `self_review` / `improve` (`OLLAMA_AGENT_RUBY_MASTERY`, `--no-ruby-mastery`)
+- `OllamaAgent::ModelEnv` — shared model name resolution from environment
+- `OllamaAgent::GlobalDotenv` — load repo-root `.env` after `ollama_client` so CLI picks up `OLLAMA_AGENT_*` without extra exports
+- Self-improvement automated mode: `--verify` (`syntax`, `rubocop`, `rspec`), `OLLAMA_AGENT_IMPROVE_VERIFY`, `--stream`, and a success message when `--apply` was not used
+- External agents / argv expansion and related orchestration refinements
+### Changed
+- `SearchBackend` finds `rg` / `grep` by scanning `PATH` (avoids relying on a `command` executable on trimmed `PATH`)
+### Fixed
+- `SelfImprovement::Improver#run` accepts `max_tokens` and `context_summarize` from the CLI (Ruby 3 keyword compatibility)
+## [0.2.0] - 2026-03-26
+### Added
+- `write_file` tool — create or overwrite files (complements `edit_file` for surgical diffs)
+- `OllamaAgent::Tools.register` — extensible tool registry for library consumers
+- `Streaming::Hooks` — event bus (`on_token`, `on_tool_call`, `on_tool_result`, `on_complete`, `on_error`, `on_retry`)
+- `--stream` / `OLLAMA_AGENT_STREAM=1` — live streaming token output
+- `Resilience::RetryMiddleware` — exponential backoff on timeout/503/429 (default 3 retries)
+- `Resilience::AuditLogger` — NDJSON audit log under `.ollama_agent/logs/` (`--audit` / `OLLAMA_AGENT_AUDIT=1`)
+- `Context::Manager` — sliding-window token trim before each chat call (`OLLAMA_AGENT_MAX_TOKENS`)
+- `Session::Store` — crash-safe NDJSON session persistence (`--session`, `--resume`)
+- `ollama_agent sessions` — list saved sessions
+- `OllamaAgent::Runner` — stable public library facade with SemVer contract from 0.2.0
+- `docs/ARCHITECTURE.md`, `docs/TOOLS.md`, `docs/SESSIONS.md`
+### Changed
+- `READ_ONLY_TOOLS` now excludes both `edit_file` and `write_file`
+- `Agent` now exposes `#hooks` (`Streaming::Hooks`) and `#session_id`
+### New environment variables
+- `OLLAMA_AGENT_STREAM`, `OLLAMA_AGENT_MAX_TOKENS`
+- `OLLAMA_AGENT_MAX_RETRIES`, `OLLAMA_AGENT_RETRY_BASE_DELAY`
+- `OLLAMA_AGENT_AUDIT`, `OLLAMA_AGENT_AUDIT_LOG_PATH`
 ## [0.1.0] - 2026-03-21
 - Initial release

data/README.md CHANGED Viewed

@@ -11,13 +11,25 @@ Ruby gem that runs a **CLI coding agent** against a local [Ollama](https://ollam
 - Tool `search_code` – search code with ripgrep or grep.
 - Tool `edit_file` – apply unified diffs safely.
 - CLI built with Thor, entry point `exe/ollama_agent`.
+- **`self_review`** – self-review / improvement with a **`--mode`**:
+  - **`analysis`** (default, alias `1`) — read-only tools; report only; no writes.
+  - **`interactive`** (alias `2`, `fix`) — full tools on `--root`; you confirm each patch (like `ask`); optional `-y` / `--semi`.
+  - **`automated`** (alias `3`, `sandbox`) — temp copy, agent edits, **`bundle exec rspec`** in the sandbox, optional **`--apply`** to merge into your checkout.
+- **`improve`** — same as **`self_review --mode automated`** (you can pass **`--mode automated`** explicitly; other modes belong on **`self_review`**).
+- **`orchestrate`** / **`OLLAMA_AGENT_ORCHESTRATOR=1`** — optional **orchestrator** tools to probe and delegate to other local CLI agents (see [Orchestrator](#orchestrator-external-cli-agents)); **`agents`** lists availability.
+- **Ruby API** — embed **`Runner`**, **`Agent`**, custom tools, hooks, sessions, and (optionally) **`ToolRuntime`**; see [Library usage (Ruby)](#library-usage-ruby).
 ## Requirements
-- Ruby ≥ 3.2
+- Ruby ≥ 3.2 (enforced in the gemspec as `required_ruby_version`)
 - **Local:** Ollama running and a capable tool-calling model, **or**
 - **Ollama Cloud:** API key and a cloud-capable model name (see below)
+### Prerequisites (external tools)
+- **`patch`** — required for `edit_file` (GNU `patch` on `PATH`). On Windows, use Git Bash, WSL, GnuWin32, or another environment that provides `patch`.
+- **`rg` (ripgrep) or `grep`** — text mode for `search_code` needs at least one of these on `PATH` (ripgrep is preferred when present).
 ## Installation
 From RubyGems (when published) or from this repository:
@@ -40,6 +52,9 @@ Apply proposed patches without interactive confirmation:
 ```bash
 bundle exec ruby exe/ollama_agent ask -y "Your task"
+# Review / audit only (no patches, writes, or delegation)—same as a report-style self_review
+bundle exec ruby exe/ollama_agent ask --read-only "Summarize risks in this repo"
 ```
 Long-running models (slow local inference):
@@ -54,6 +69,27 @@ Interactive REPL:
 bundle exec ruby exe/ollama_agent ask --interactive
 ```
+Self-review modes (default project root is the **current working directory** unless you set `--root` or `OLLAMA_AGENT_ROOT`):
+```bash
+# Mode 1 — analysis only (default)
+bundle exec ruby exe/ollama_agent self_review
+bundle exec ruby exe/ollama_agent self_review --mode analysis
+# Mode 2 — optional fixes in the working tree (confirm each patch, or -y / --semi)
+bundle exec ruby exe/ollama_agent self_review --mode interactive
+# Mode 3 — sandbox + tests + optional merge back (same as `improve`)
+# Without --apply, edits stay in a temp dir only; pass --apply to copy changed files into your checkout.
+bundle exec ruby exe/ollama_agent self_review --mode automated
+bundle exec ruby exe/ollama_agent self_review --mode automated --apply
+bundle exec ruby exe/ollama_agent improve --apply
+```
+**`ruby_mastery` (optional):** When the [`ruby_mastery`](https://github.com/shubhamtaywade82/ruby_mastery) gem is installed (this repo lists it in the `Gemfile` for development), **`self_review`** (all modes) and **`improve`** prepend a **markdown static-analysis** section to the user prompt. Add the same gem to your app’s `Gemfile` if you want that behavior outside this checkout. Disable with **`--no-ruby-mastery`** or **`OLLAMA_AGENT_RUBY_MASTERY=0`**. Limit size with **`OLLAMA_AGENT_RUBY_MASTERY_MAX_CHARS`** (default `60000`).
+For mode 3, `-y` skips all patch prompts; `--no-semi` prompts for every patch when not using `-y`.
 With a **thinking-capable** model, enable reasoning output:
 ```bash
@@ -64,7 +100,9 @@ bundle exec ruby exe/ollama_agent ask -i --think true
 The CLI uses **ANSI colors** on a TTY (banner, prompt, patch prompts). **Assistant replies** are rendered as **Markdown** (headings, lists, bold, code fences) via `tty-markdown` when stdout is a TTY and **`NO_COLOR`** is unset. Disable Markdown rendering with **`OLLAMA_AGENT_MARKDOWN=0`**. Disable all colors with **`NO_COLOR`** or **`OLLAMA_AGENT_COLOR=0`**.
-When **thinking** is enabled, internal reasoning is shown in a **framed, dim** block labeled **Thinking**; the user-facing reply is labeled **Assistant** in green when the model returns both fields. Thinking text is **plain dim** by default (so it stays visually separate from the reply). Set **`OLLAMA_AGENT_THINKING_MARKDOWN=1`** to render thinking through Markdown too (muted colors).
+When **thinking** is enabled, internal reasoning is shown under a **Thinking** label; the user-facing reply is labeled **Assistant** in green when the model returns both fields. By default (**`OLLAMA_AGENT_THINKING_STYLE=compact`**, Cursor-like), one **Thinking** header is printed per `ask` run and every later reasoning chunk in that run is appended with **blank lines only** (no repeated banner, no rule lines)—including after turns where the model printed tool JSON or other non-empty `content`. Set **`OLLAMA_AGENT_THINKING_STYLE=framed`** for the legacy boxed style (banner + long rulers on every assistant message). Thinking body text is **plain dim** by default. Set **`OLLAMA_AGENT_THINKING_MARKDOWN=1`** to render thinking through Markdown too (muted colors).
+With **`--stream`** / **`OLLAMA_AGENT_STREAM=1`**, reasoning streams in **dim** text under a single **Thinking** line, then **`Assistant`** and the reply stream in normal styling—closer to Cursor than printing everything as one token stream. (This uses a small hook on ollama-client’s chat stream; `hooks[:on_thinking]` is also emitted for custom subscribers.)
 ### Ollama Cloud
@@ -87,23 +125,116 @@ bundle exec ruby exe/ollama_agent ask "Your task"
 | `OLLAMA_BASE_URL` | Ollama API base URL (default from ollama-client: `http://localhost:11434`; use `https://ollama.com` for cloud) |
 | `OLLAMA_API_KEY` | API key for Ollama Cloud (`https://ollama.com`); optional for local HTTP |
 | `OLLAMA_AGENT_MODEL` | Model name (overrides default from ollama-client) |
-| `OLLAMA_AGENT_ROOT` | Project root (defaults to current working directory) |
+| `OLLAMA_AGENT_ROOT` | Project root for tools (`list_files`, `read_file`, etc.). Defaults to **current working directory** when unset (CLI never falls back to the gem install path). |
 | `OLLAMA_AGENT_DEBUG` | Set to `1` to print validation diagnostics on stderr |
+| `OLLAMA_AGENT_STRICT_ENV` | Set to `1` so invalid numeric env values (e.g. `OLLAMA_AGENT_MAX_TURNS`) raise `ConfigurationError` instead of falling back to defaults |
 | `OLLAMA_AGENT_MAX_TURNS` | Max chat rounds with tool calls (default: 64) |
 | `OLLAMA_AGENT_TIMEOUT` | HTTP read/open timeout in seconds for Ollama requests (default **120**; use `ask --timeout` / `-t` to override per run) |
 | `OLLAMA_AGENT_PARSE_TOOL_JSON` | Set to `1` to run tools parsed from JSON lines in assistant text (fallback when the model does not emit native tool calls) |
 | `NO_COLOR` | Set (any value) to disable ANSI colors (see [no-color.org](https://no-color.org/)) |
 | `OLLAMA_AGENT_COLOR` | Set to `0` to disable colors even on a TTY |
 | `OLLAMA_AGENT_MARKDOWN` | Set to `0` to disable Markdown formatting of assistant replies (plain text only) |
-| `OLLAMA_AGENT_THINKING_MARKDOWN` | Set to `1` to render **thinking** text with Markdown (muted); default is plain dim text inside the Thinking frame |
+| `OLLAMA_AGENT_THINKING_STYLE` | `compact` (default) = one **Thinking** label per run, blank lines between later reasoning chunks; `framed` = repeat full banner/rulers each message |
+| `OLLAMA_AGENT_THINKING_MARKDOWN` | Set to `1` to render **thinking** text with Markdown (muted); default is plain dim text |
 | `OLLAMA_AGENT_THINK` | Model **thinking** mode for compatible models: `true` / `false`, or `high` / `medium` / `low` (see ollama-client `think:`). Empty = omit (server default). |
+| `OLLAMA_AGENT_PATCH_RISK_MAX_DIFF_LINES` | Max changed-line count before a diff is treated as "large" for semi-auto patch risk (default **80**) |
 | `OLLAMA_AGENT_INDEX_REBUILD` | Set to `1` to drop the cached Prism Ruby index before the next symbol search in this process |
 | `OLLAMA_AGENT_RUBY_INDEX_MAX_FILES` | Max `.rb` files to parse per index build (default **5000**) |
 | `OLLAMA_AGENT_RUBY_INDEX_MAX_FILE_BYTES` | Skip Ruby files larger than this many bytes (default **512000**) |
 | `OLLAMA_AGENT_RUBY_INDEX_MAX_LINES` | Max result lines for `search_code` class/module/method modes (default **200**) |
 | `OLLAMA_AGENT_RUBY_INDEX_MAX_CHARS` | Max characters of index output per search (default **60000**) |
 | `OLLAMA_AGENT_MAX_READ_FILE_BYTES` | Max bytes for a **full** `read_file` (no line range); larger files return an error (default **2097152**, 2 MiB). Line-range reads stream and are not limited by this cap. |
+| `OLLAMA_AGENT_RG_PATH` | Absolute path to `rg` for `search_code` text mode (optional; otherwise first `rg` on `PATH`) |
+| `OLLAMA_AGENT_GREP_PATH` | Absolute path to `grep` fallback (optional; otherwise first `grep` on `PATH`) |
 | `OLLAMA_AGENT_INDEX_REBUILD` | The Prism index is rebuilt when this env value **changes** (e.g. unset → `1`); it is **not** rebuilt on every tool call while it stays `1`. |
+| `OLLAMA_AGENT_SKILLS` | `1`/`on`/`0`/`off` — include **bundled** prompt skills (default **on**). Same as `--no-skills` on the CLI when off. |
+| `OLLAMA_AGENT_SKILLS_INCLUDE` | Comma-separated **manifest ids** to load (omit = all bundled). Example: `ruby_style,rubocop,code_review`. |
+| `OLLAMA_AGENT_SKILLS_EXCLUDE` | Comma-separated ids to skip from the bundled set. |
+| `OLLAMA_AGENT_SKILL_PATHS` | Extra `.md` files or directories, **colon-separated** (Unix `PATH` style). Directory entries load all `*.md` in sorted order. Merged with `--skill-paths`. |
+| `OLLAMA_AGENT_EXTERNAL_SKILLS` | `1`/`0` — include content from `OLLAMA_AGENT_SKILL_PATHS` (default **on**). Set `0` to use bundled-only without unsetting paths. |
+### Prompt skills (bundled + optional paths)
+The system prompt is the **base agent instructions** (`AgentPrompt`) plus optional **Markdown** sections. Bundled files live under `lib/ollama_agent/prompt_skills/` and are listed in `manifest.yml`. Each file may use Cursor-style YAML frontmatter (`---` … `---`); the loader strips frontmatter before sending text to the model.
+**Manifest ids** (in load order): `clean_ruby`, `ruby_style`, `rubocop`, `solid`, `solid_ruby`, `design_patterns`, `rspec`, `rails_style`, `rails_best_practices`, `code_review`, `ollama_agent_patterns`.
+Bundled bodies were copied from Cursor `SKILL.md` files under `~/.cursor/skills/` (and `ollama_agent_patterns` from this repo’s `.cursor/skills/ollama-agent-patterns`). Re-copy when you update those skills upstream.
+Many full skills can be **large**; use `OLLAMA_AGENT_SKILLS_INCLUDE` to trim for small-context models.
+CLI flags (also available on `ask`, `self_review`, `improve`): `--no-skills`, `--skill-paths 'path1:path2/dir'`.
+To run **`self_review` / `ask` against the installed gem’s source** (e.g. to hack on `ollama_agent` itself), pass an explicit root, for example `--root "$(bundle show ollama_agent)"` or a path to a git clone.
+### Orchestrator (external CLI agents)
+Use the **`orchestrate`** command (or **`OLLAMA_AGENT_ORCHESTRATOR=1`** with **`ask`**) to expose tools **`list_external_agents`** and **`delegate_to_agent`**. The Ollama model should gather context with **`read_file` / `search_code`**, list installed CLIs, then delegate a **short** task + context to an external agent (Claude Code, Gemini CLI, Codex, Cursor CLI, etc.). Definitions live in `lib/ollama_agent/external_agents/default_agents.yml`; override or extend via **`~/.config/ollama_agent/agents.yml`** or **`OLLAMA_AGENT_EXTERNAL_AGENTS_CONFIG`**.
+- **`ollama_agent agents`** — print a table of configured agents and whether each binary is on `PATH`.
+- **`ollama_agent doctor`** — alias for `agents`.
+- **`delegate_to_agent`** runs a **fixed argv** (no shell) with **`cwd`** = project root; output is capped (**`OLLAMA_AGENT_DELEGATE_MAX_OUTPUT_BYTES`**, default 100k). Confirm each run unless **`-y`**.
+- Delegation audit logs: set **`OLLAMA_AGENT_DELEGATE_LOG=1`** (or `OLLAMA_AGENT_DEBUG=1`) to emit a structured stderr line with agent id, argv, env keys (names only), exit code, and duration.
+- Adjust **`argv` / `version_argv`** in YAML to match your real CLI (vendor flags differ). If a tool has no stable non-interactive mode, do not expose it in the registry.
+- Tool contract version: **`OllamaAgent::ORCHESTRATOR_TOOLS_SCHEMA_VERSION`**.
+### Library usage (Ruby)
+Most of this README is **CLI-first** (commands and environment variables above). The same capabilities exist as **Ruby APIs**—the [Features](#features) list (file tools, `self_review` / `improve`, orchestrator, skills, etc.) is implemented under `lib/ollama_agent/`. For a **layer diagram** (agent → tools → hooks → session), see [docs/ARCHITECTURE.md](docs/ARCHITECTURE.md).
+**Coding agent — `Runner` (facade)** — Stable entry for apps: `OllamaAgent::Runner.build(root:, model:, stream:, session_id:, resume:, read_only:, orchestrator:, skills_enabled:, skill_paths:, audit:, max_tokens:, context_summarize:, stdin:, stdout:, ...)` then `#run(query)`. Optional **`stdin`** / **`stdout`** (default TTY) feed patch/write/delegate confirmations—use `StringIO` in tests or automation to avoid blocking on `$stdin.gets`. Exposes `#hooks` (`Streaming::Hooks`) for `:on_token`, `:on_thinking` (streamed reasoning when `stream: true` and the model supports it), `:on_tool_call`, `:on_tool_result`, `:on_complete`. Full keyword list: [`lib/ollama_agent/runner.rb`](lib/ollama_agent/runner.rb).
+**Coding agent — `Agent` (direct)** — `OllamaAgent::Agent.new(client:, root:, ...)` when you inject an `Ollama::Client` (or test double), tweak options the CLI does not expose, or skip `Runner`.
+**Custom tools (coding agent)** — `OllamaAgent::Tools.register("tool_name", schema: { ... }) { |args, root:, read_only:| ... }` merges extra function definitions into the chat tool list; handlers run in the same sandbox as built-in tools.
+**Resilience and observability** — Default client path uses `Resilience::RetryMiddleware`. Structured step logging: enable **`audit: true`** on `Runner.build` or **`OLLAMA_AGENT_AUDIT=1`** (see Environment table). Context trimming: **`max_tokens`** / **`context_summarize`** on `Runner.build`.
+**Sessions** — Pass **`session_id`** and optional **`resume: true`** on `Runner.build` to persist messages under `.ollama_agent/sessions/` (`Session::Store`).
+**Self-improvement (sandbox)** — CLI commands **`improve`** / **`self_review --mode automated`** wrap `OllamaAgent::SelfImprovement` (sandbox copy, tests, optional merge). Use the CLI for the full flow; the module is available for advanced integration.
+**`ToolRuntime` (alternate loop, optional)** — Not used by the CLI. For **non–file-edit** agents (e.g. another gem that defines its own tools), a small **JSON plan** loop: the model returns one object per step `{"tool":"name","args":{...}}`, `ToolRuntime::Registry` resolves it, `Executor` runs your `Tool` subclasses, `Memory` holds short-term history. Use a **swappable planner** (anything implementing `next_step(context:, memory:, registry:)`) such as `OllamaJsonPlanner` (`Ollama::Client#chat` + JSON extraction). **Step-by-step guide:** [docs/TOOL_RUNTIME.md](docs/TOOL_RUNTIME.md).
+- **Termination:** a tool may return `{ "status" => "done" }` to stop. Unknown tool names → `OllamaAgent::ToolRuntime::InvalidPlanError`; too many steps → `MaxStepsExceeded`. **`Loop#run`** returns the **last tool result** (same value as the final `Executor#execute` return).
+- **Runnable examples:** `spec/ollama_agent/tool_runtime/`.
+**Model and server:** `OllamaJsonPlanner` uses the same default as the coding agent: `OLLAMA_AGENT_MODEL` if set, otherwise `Ollama::Config.new.model` (from ollama-client). The model must exist on whatever host you use. **Use the same client setup as the CLI:** `OllamaAgent::OllamaConnection.apply_env_to_config` copies `OLLAMA_BASE_URL` and `OLLAMA_API_KEY` into `Ollama::Config`. If you only run `Ollama::Client.new(config: Ollama::Config.new)` in `irb`, you stay on **localhost** while `OLLAMA_AGENT_MODEL` may still name a **cloud** model from the README cloud example → **404**. Either apply `apply_env_to_config` (below) or unset the cloud model / pass `model: "llama3.2"`.
+```ruby
+require "ollama_agent"
+require "ollama_client"
+class EchoTool < OllamaAgent::ToolRuntime::Tool
+  def name = "echo"
+  def description = "Echo args"
+  def schema = { "type" => "object", "properties" => { "msg" => { "type" => "string" } } }
+  def call(args)
+    return { "status" => "done", "echo" => args["msg"] } if args["msg"] == "bye"
+    { "status" => "ok", "echo" => args["msg"] }
+  end
+end
+registry = OllamaAgent::ToolRuntime::Registry.new([EchoTool.new])
+memory = OllamaAgent::ToolRuntime::Memory.new
+config = Ollama::Config.new
+OllamaAgent::OllamaConnection.apply_env_to_config(config)
+client = Ollama::Client.new(config: config)
+planner = OllamaAgent::ToolRuntime::OllamaJsonPlanner.new(client: client)
+last = OllamaAgent::ToolRuntime::Loop.new(
+  planner: planner,
+  registry: registry,
+  executor: OllamaAgent::ToolRuntime::Executor.new,
+  memory: memory,
+  max_steps: 10
+).run(context: "Say hello then echo bye to finish.")
+# last => e.g. { "status" => "done", "echo" => "bye" }
+```
 ## Troubleshooting

data/docs/ARCHITECTURE.md ADDED Viewed

@@ -0,0 +1,42 @@
+# Architecture
+ollama_agent is a layered gem. Each layer is independently opt-in.
+## Data Flow
+```
+CLI / Runner.run(query)
+  → Session::Store.resume (if --resume)
+  → Agent#run
+      → Context::Manager.trim(messages)
+      → OllamaConnection + Resilience::RetryMiddleware
+          → Ollama::Client#chat
+              → Streaming::Hooks.emit(:on_token, ...)
+      → Tools::Registry / SandboxedTools.execute_tool(name, args)
+          → Resilience::AuditLogger (via hooks)
+      → Session::Store.save (after each turn)
+  → Streaming::Hooks.emit(:on_complete, ...)
+```
+## Layers
+| Layer | Files | Opt-in via |
+|-------|-------|-----------|
+| Core agent | `agent.rb`, `agent/*.rb`, `sandboxed_tools.rb`, `sandboxed_tools/*.rb` | Always on |
+| Path sandbox | `path_sandbox.rb` | Always on for file/search/list tools |
+| Env helpers | `env_config.rb` | Used by `Agent` and `SandboxedTools` for numeric ENV parsing |
+| User prompts | `user_prompt.rb` | Injectable stdin/stdout (default TTY); `Runner.build(stdin:, stdout:)` |
+| Tool Registry | `tools/registry.rb` | `OllamaAgent::Tools.register(...)` |
+| Streaming | `streaming/hooks.rb`, `streaming/console_streamer.rb` | `--stream` / `OLLAMA_AGENT_STREAM=1` |
+| Resilience | `resilience/retry_middleware.rb`, `resilience/audit_logger.rb` | On by default (retries); `--audit` for logging |
+| Context Manager | `context/manager.rb` | `--max-tokens N` / `OLLAMA_AGENT_MAX_TOKENS` |
+| Session | `session/store.rb` | `--session NAME` |
+| Runner API | `runner.rb` | `require "ollama_agent"; OllamaAgent::Runner.build(...)` |
+## Path sandbox (symlinks)
+Tool paths are checked with `PathSandbox.allowed?`: after expanding relative to the project root, `File.realpath` must stay under `File.realpath(project_root)`. A symlink **inside** the repo that points **outside** is rejected, so the model cannot follow `link → /etc` style escapes. Paths that do not yet exist are allowed only when every existing parent directory resolves under the real root (see `nonexistent_path_allowed_under_root?` in `path_sandbox.rb`).
+## ToolRuntime (parallel path)
+The coding agent flow above is **not** the only entry point. `OllamaAgent::ToolRuntime` implements a separate **JSON plan → tool → memory** loop for custom `Tool` classes and injectable planners. It is **not** used by `exe/ollama_agent`. See [TOOL_RUNTIME.md](TOOL_RUNTIME.md).

data/docs/PERFORMANCE.md ADDED Viewed

@@ -0,0 +1,22 @@
+# Performance notes
+This document records **known costs** and **when to optimize**. No changes here are mandatory for typical CLI use.
+## Text search (`search_code`, text mode)
+Each call spawns **`rg`** or **`grep`** as a subprocess. For very chatty agents or huge trees, that overhead can dominate. Before changing behavior:
+1. Measure wall time for your workload (local disk vs network FS matters).
+2. Consider narrowing `directory`, or using Ruby index modes (`mode: method`, etc.) which avoid ripgrep for symbol queries.
+## Patch application (`edit_file`)
+The flow runs **`patch --dry-run`** before apply when validation passes, then **`patch`** again on success—two processes per confirmed edit. Caching or reusing dry-run output would save one spawn but adds complexity; only pursue if profiling shows it matters.
+## Full-file reads
+`read_file` without line range loads the whole file (subject to `OLLAMA_AGENT_MAX_READ_FILE_BYTES`). Prefer `start_line` / `end_line` for large logs.
+## Context trimming (`Context::Manager#trim`)
+Each trim pass keeps a parallel array of per-message token estimates so the sliding-window loop does not re-scan message bodies on every `over_budget?` check. If you change trimming strategy, profile long sessions with a tight `max_tokens` budget before adding further caching.

data/docs/SESSIONS.md ADDED Viewed

@@ -0,0 +1,48 @@
+# Session Persistence
+Sessions save conversation history to `.ollama_agent/sessions/` under the project root.
+## CLI usage
+```bash
+# Start a named session
+ollama_agent ask --session my-refactor "Refactor the CLI module"
+# Resume it later (picks up exactly where it left off)
+ollama_agent ask --session my-refactor --resume "Now update the specs too"
+# Resume in interactive REPL
+ollama_agent ask -i --session my-refactor --resume
+# Resume most recent session (no name needed)
+ollama_agent ask --resume
+# List all sessions for the current project
+ollama_agent sessions
+```
+## Library API
+```ruby
+runner = OllamaAgent::Runner.build(
+  root:       "/my/project",
+  session_id: "my-refactor",
+  resume:     true
+)
+runner.run("Continue — now also add integration tests")
+```
+## File format
+Sessions are NDJSON files — one JSON object per line, human-readable and `jq`-able:
+```
+.ollama_agent/sessions/my-refactor.ndjson
+```
+```bash
+# View the last 5 messages
+tail -5 .ollama_agent/sessions/my-refactor.ndjson | jq .
+```
+Messages are appended after every agent turn — if the agent crashes mid-session, all completed turns are preserved.

data/docs/TOOLS.md ADDED Viewed

@@ -0,0 +1,53 @@
+# Custom Tool Registration
+Register a custom tool before calling `Runner.build`. The tool is automatically injected into the model's tool list.
+```ruby
+require "ollama_agent"
+OllamaAgent::Tools.register(
+  :run_tests,
+  schema: {
+    description: "Run the RSpec test suite and return the output",
+    properties: {
+      suite: { type: "string", description: "Path to spec file or directory (default: spec/)" }
+    },
+    required: []
+  }
+) do |args, root:, read_only:|
+  return "run_tests is disabled in read-only mode." if read_only
+  suite = args["suite"] || "spec/"
+  require "open3"
+  output, = Open3.capture2("bundle", "exec", "rspec", suite, chdir: root)
+  output
+end
+runner = OllamaAgent::Runner.build(root: "/my/project")
+runner.run("Fix the failing tests, then run them to confirm they pass")
+```
+## Handler signature
+```ruby
+OllamaAgent::Tools.register(:tool_name, schema: { ... }) do |args, root:, read_only:|
+  # args      — Hash of tool arguments from the model
+  # root      — String absolute path to the project root
+  # read_only — Boolean; return an error string if true and the tool writes files
+  "return value as String"
+end
+```
+## Schema format
+The `schema:` hash is the `function` body (without `name` — that comes from the first argument):
+```ruby
+schema: {
+  description: "What this tool does",
+  properties: {
+    param_name: { type: "string", description: "what it is" }
+  },
+  required: ["param_name"]
+}
+```