RubyGems - anima-core - Versions diffs - 1.3.0 → 1.4.0 - Mend

anima-core 1.3.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (75) hide show

checksums.yaml +4 -4
data/.reek.yml +6 -7
data/README.md +64 -16
data/app/decorators/tool_call_decorator.rb +3 -3
data/app/jobs/agent_request_job.rb +2 -2
data/app/jobs/passive_recall_job.rb +6 -11
data/app/models/concerns/message/broadcasting.rb +1 -0
data/app/models/goal.rb +2 -1
data/app/models/message.rb +0 -13
data/app/models/pending_message.rb +150 -2
data/app/models/session.rb +324 -266
data/bin/inspect-cassette +144 -0
data/bin/release +212 -0
data/bin/with-llms +20 -0
data/config/database.yml +1 -0
data/db/cable_structure.sql +9 -0
data/db/migrate/20260330120000_add_source_to_pending_messages.rb +8 -0
data/db/migrate/20260401180000_add_api_metrics_to_messages.rb +7 -0
data/db/migrate/20260401210935_remove_recalled_message_ids_from_sessions.rb +5 -0
data/db/migrate/20260403080031_add_initial_cwd_to_sessions.rb +5 -0
data/db/queue_structure.sql +61 -0
data/db/structure.sql +120 -0
data/lib/agent_loop.rb +42 -13
data/lib/analytical_brain/runner.rb +12 -2
data/lib/analytical_brain/tools/activate_skill.rb +2 -2
data/lib/analytical_brain/tools/assign_nickname.rb +1 -1
data/lib/analytical_brain/tools/deactivate_skill.rb +2 -1
data/lib/analytical_brain/tools/deactivate_workflow.rb +2 -1
data/lib/analytical_brain/tools/finish_goal.rb +3 -0
data/lib/analytical_brain/tools/goal_messaging.rb +28 -0
data/lib/analytical_brain/tools/read_workflow.rb +2 -2
data/lib/analytical_brain/tools/set_goal.rb +5 -1
data/lib/analytical_brain/tools/update_goal.rb +5 -1
data/lib/anima/cli.rb +41 -13
data/lib/anima/installer.rb +13 -0
data/lib/anima/settings.rb +13 -7
data/lib/anima/version.rb +1 -1
data/lib/events/agent_message.rb +14 -0
data/lib/events/subscribers/persister.rb +2 -1
data/lib/events/subscribers/subagent_message_router.rb +4 -7
data/lib/llm/client.rb +37 -30
data/lib/mneme/compressed_viewport.rb +8 -4
data/lib/mneme/passive_recall.rb +85 -16
data/lib/mneme/runner.rb +15 -4
data/lib/providers/anthropic.rb +112 -7
data/lib/shell_session.rb +185 -2
data/lib/tools/base.rb +0 -1
data/lib/tools/bash.rb +16 -14
data/lib/tools/mark_goal_completed.rb +4 -5
data/lib/tools/registry.rb +6 -1
data/lib/tools/response_truncator.rb +1 -1
data/lib/tools/spawn_specialist.rb +10 -8
data/lib/tools/spawn_subagent.rb +17 -13
data/lib/tools/subagent_prompts.rb +13 -15
data/lib/tui/app.rb +389 -146
data/lib/tui/cable_client.rb +9 -16
data/lib/tui/decorators/base_decorator.rb +24 -4
data/lib/tui/decorators/bash_decorator.rb +1 -1
data/lib/tui/decorators/edit_decorator.rb +4 -2
data/lib/tui/decorators/read_decorator.rb +4 -2
data/lib/tui/decorators/think_decorator.rb +2 -2
data/lib/tui/decorators/web_get_decorator.rb +1 -1
data/lib/tui/decorators/write_decorator.rb +4 -2
data/lib/tui/flash.rb +19 -14
data/lib/tui/formatting.rb +20 -9
data/lib/tui/input_buffer.rb +6 -6
data/lib/tui/message_store.rb +89 -1
data/lib/tui/performance_logger.rb +2 -3
data/lib/tui/screens/chat.rb +56 -60
data/lib/tui/settings.rb +86 -0
data/templates/config.toml +12 -9
data/templates/tui.toml +209 -0
metadata +14 -3
data/config/initializers/fts5_schema_dump.rb +0 -21
data/lib/environment_probe.rb +0 -232

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 2ec0963d3bb57afc7a12414258c90d645dd200ca489bd7ab34e6ace19ea11927
-  data.tar.gz: 712d1904e5ddf7b0c244ddeffb2ed993f6265d097eb18000b139cdb08a9bfc4c
+  metadata.gz: 283cb2ad728734b96a5badc8b6fc2624c920d3dbefd17412bf5c5f4ae452d6e3
+  data.tar.gz: 0ecef454ffd58b1a4c232338e58a4d20fe4b42f4c3d2ecd6aca6dcc446b0daed
 SHA512:
-  metadata.gz: 3b70800258af5296bf243e7b0c020efd6f7736e3f1f58734352e022ca8ec7a15599ffce938fc1adc511bc37a7141472845dcead21b6512cdc09dfceae8b04b87
-  data.tar.gz: e70536705ac4d1cc468a951079799edd8677cae3c167251c60cccfbb51f79b0e2e7f896b5420f4972c8a59ee3d377ae0fd284d3603db441bd45313a1030f60ca
+  metadata.gz: 69115665f072f86b590222cdf4c6ec3ca75be2287fea4d1b50b2361525cb31b75e561b6f6e0ae0e375dfa7c5717b6d549237202f76a7dd4a64826c19a26311fe
+  data.tar.gz: b7b35426013e036bc18c5bd8906f26cadd360be8246ef213018deea41cac52e04747a321c6aaf5835dc9f04ff6c19f04bd344de97c08227591bf6563e03abab1

data/.reek.yml CHANGED Viewed

@@ -13,8 +13,8 @@ detectors:
   NilCheck:
     exclude:
       - "Anima::Settings#get"
+      - "TUI::Settings#get"
   # Rescue blocks naturally reference the error object more than self.
-  # EnvironmentProbe assembles output from local data structures — not envy.
   # Brain transcript builds from event collection — the method's entire purpose.
   # ConfigMigrator text processing methods naturally reference local line arrays.
   # ToolDecorator subclasses operate on the tool result — that's the pattern.
@@ -22,7 +22,6 @@ detectors:
   FeatureEnvy:
     exclude:
       - "AnalyticalBrainJob#perform"
-      - "EnvironmentProbe"
       - "AnalyticalBrain::Runner#build_messages"
       - "Anima::ConfigMigrator"
       - "WebGetToolDecorator"
@@ -68,13 +67,13 @@ detectors:
       - "WebGetToolDecorator#text_html"
   # Session model is the core domain object — methods grow naturally.
   # Mcp CLI accumulates subcommand helpers across add/remove/list/secrets.
-  # EnvironmentProbe probes multiple orthogonal facets (OS, Git, project files).
-  # Each facet needs its own detection + formatting methods.
+  # ShellSession probes multiple orthogonal facets (CWD, Git, project files)
+  # and manages PTY lifecycle — methods grow with responsibilities.
   TooManyMethods:
     exclude:
       - "Session"
       - "Anima::CLI::Mcp"
-      - "EnvironmentProbe"
+      - "ShellSession"
       # Runner composes system prompt from modular sections — methods grow with responsibilities.
       - "AnalyticalBrain::Runner"
   # Decorators branch on tool type across 4 render modes — inherent to the pattern.
@@ -86,11 +85,11 @@ detectors:
       # Runner checks session type to compose responsibilities — the core dispatch.
       - "AnalyticalBrain::Runner"
   # EventDecorator holds shared rendering constants (icons, markers, dispatch maps).
-  # Event model holds domain type constants (TYPES, CONTEXT_TYPES, SPAWN_TOOLS, etc.).
+  # Message holds domain type constants (TYPES, CONTEXT_TYPES, LLM_TYPES, etc.).
   TooManyConstants:
     exclude:
       - "EventDecorator"
-      - "Event"
+      - "Message"
   # encode_utf8 is descriptive — the digit triggers a false positive.
   UncommunicativeMethodName:
     exclude:

data/README.md CHANGED Viewed

@@ -14,7 +14,7 @@ Anima is different. It's built on the premise that if you want an agent — a re
 **Memory that works like memory.** Other systems bolt on memory as an afterthought — filing cabinets the agent has to consciously open mid-task. It never does; the truck is already moving. Anima's memory department ([Mneme](#semantic-memory-mneme)) runs as a third brain process on the event bus. It summarizes what's about to leave the viewport. It compresses short-term into long-term, like biological memory consolidating during sleep. It pins critical moments to active goals so exact instructions survive where summaries would lose nuance. And it recalls — automatically, passively — surfacing relevant older memories right after the soul, right before the present. The agent doesn't decide to remember. It just remembers.
-**Sub-agents that already know everything.** When Anima spawns a sub-agent, it inherits the parent's full event stream — every file read, every decision, every user message. No "let me summarize what I know." Lossless context. Zero wasted tool calls on rediscovery.
+**Sub-agents that know who they are.** When Anima spawns a sub-agent, it starts clean — identity, task, and nothing else. No inherited conversation history means the sub-agent works on its task, not the parent's trajectory. Context flows through explicit messages, not leaked assistant turns.
 **A soul the agent writes itself.** Anima's first session is birth. The agent wakes up, explores its world, meets its human, and writes its own identity. Not a personality description in a config file — a living document the agent authors and evolves. Always in context, always its own.
@@ -63,7 +63,7 @@ Anima (Ruby, Rails 8.1 headless)
 ├── Skills       — domain knowledge bundles (Markdown, user-extensible)
 ├── Workflows    — operational recipes for multi-step tasks
 ├── MCP          — external tool integration (Model Context Protocol)
-├── Sub-agents   — autonomous child sessions with lossless context inheritance
+├── Sub-agents   — autonomous child sessions with isolated context
 ├── Mneme        — memory department (summarization, compression, pinning, recall)
 │
 │ Designed:
@@ -129,7 +129,8 @@ State directory (`~/.anima/`):
 ```
 ~/.anima/
 ├── soul.md          # Agent's self-authored identity (always in context)
-├── config.toml      # Main settings (hot-reloadable)
+├── config.toml      # Brain settings (hot-reloadable)
+├── tui.toml         # TUI settings (hot-reloadable)
 ├── mcp.toml         # MCP server configuration
 ├── config/
 │   └── credentials/   # Rails encrypted credentials (includes AR encryption keys)
@@ -141,7 +142,7 @@ State directory (`~/.anima/`):
 └── tmp/
 ```
-Updates: `anima update` — upgrades the gem, merges new config settings into your existing `config.toml` without overwriting customized values, and restarts the systemd service if it's running. Use `anima update --migrate-only` to skip the gem upgrade and only add missing config keys.
+Updates: `anima update` — upgrades the gem, merges new config settings into both `config.toml` and `tui.toml` without overwriting customized values, and restarts the systemd service if it's running. Use `anima update --migrate-only` to skip the gem upgrade and only add missing config keys.
 ### Authentication Setup
@@ -178,7 +179,7 @@ Plus dynamic tools from configured MCP servers, namespaced as `server_name__tool
 ### Sub-Agents
-Sub-agents aren't processes — they're sessions on the same event bus. When a sub-agent spawns, its viewport assembles from two scopes: its own events (prioritized) and the parent's events (filling remaining budget). No context serialization, no summary prompts — the sub-agent sees the parent's raw event stream and already knows everything the parent knows. Lossless inheritance by architecture, not by prompting.
+Sub-agents aren't processes — they're sessions on the same event bus. When a sub-agent spawns, it starts with a clean context: a system prompt (identity + communication instructions), a Goal from the task description, and a single user message containing the task — auto-pinned so it survives viewport eviction. No parent conversation history. Sub-agents inherit the parent shell's working directory at spawn time and use a separate model and token budget (configurable via `subagent_model` and `subagent_token_budget`).
 Two types:
@@ -194,24 +195,25 @@ Two types:
 **Generic Sub-agents** — child sessions with custom tool grants for ad-hoc tasks. Each generic sub-agent gets a Haiku-generated nickname (e.g. `@loop-sleuth`, `@api-scout`) for @mention addressing.
-Each sub-agent is spawned with a single **Goal** pinned from its task description and a framing message that redirects attention away from inherited parent goals. When done, the sub-agent calls `mark_goal_completed` to deliver results to the parent — this is the explicit finish line that prevents runaway agents. Sub-agents also get half the main agent's thinking budget to limit scope creep.
+Each sub-agent is spawned with a single **Goal** from its task description and a pinned user message containing the task text. When done, the sub-agent calls `mark_goal_completed` to deliver results to the parent — this is the explicit finish line that prevents runaway agents. Sub-agents also get half the main agent's thinking budget to limit scope creep.
 Between spawn and completion, sub-agents communicate through natural text — their `agent_message` events route to the parent session automatically, and the parent replies via `@name` mentions. Workers become colleagues.
 ### Skills
-Domain knowledge bundles loaded from Markdown files. Skills provide specialized expertise that the analytical brain activates and deactivates based on conversation context.
+Domain knowledge bundles loaded from Markdown files. Skills provide specialized expertise that the analytical brain activates based on conversation context. Skill content enters the conversation as phantom tool_use/tool_result pairs through the `PendingMessage` promotion flow — the same mechanism used for sub-agent messages. This keeps the system prompt stable for prompt caching while skills flow through the sliding window like regular messages.
 - **Built-in skills:** ActiveRecord, Draper decorators, DragonRuby, MCP server, RatatuiRuby, RSpec, GitHub issues
 - **User skills:** Drop `.md` files into `~/.anima/skills/` to add custom knowledge
 - **Override:** User skills with the same name replace built-in ones
 - **Format:** Flat files (`skill-name.md`) or directories (`skill-name/SKILL.md` with `examples/` and `references/`)
+- **Viewport deduplication:** The brain's skill catalog excludes skills already visible in the viewport, preventing redundant activation
 Active skills are displayed in the TUI HUD panel (toggle with `C-a → h`).
 ### Workflows
-Operational recipes that describe multi-step tasks. Unlike skills (domain knowledge), workflows describe WHAT to do. The analytical brain activates a workflow when it recognizes a matching task, converts the prose into tracked goals, and deactivates it when done.
+Operational recipes that describe multi-step tasks. Unlike skills (domain knowledge), workflows describe WHAT to do. The analytical brain activates a workflow when it recognizes a matching task, converts the prose into tracked goals, and deactivates it when done. Like skills, workflow content enters the conversation as phantom tool pairs through the same `PendingMessage` flow.
 - **Built-in workflows:** `feature`, `commit`, `create_plan`, `implement_plan`, `review_pr`, `create_note`, `research_codebase`, `decompose_ticket`, and more
 - **User workflows:** Drop `.md` files into `~/.anima/workflows/` to add custom workflows
@@ -286,7 +288,9 @@ Goals form a two-level hierarchy (root goals with sub-goals) and are displayed i
 ### Configuration
-All tunable values are exposed through `~/.anima/config.toml` with hot-reload (no restart needed):
+Brain and TUI have separate config files — both hot-reloadable (no restart needed).
+**Brain settings** (`~/.anima/config.toml`):
 ```toml
 [llm]
@@ -294,15 +298,14 @@ model = "claude-opus-4-6"
 fast_model = "claude-haiku-4-5"
 max_tokens = 8192
 max_tool_rounds = 250
-token_budget = 190_000
+token_budget = 120_000
+subagent_model = "claude-sonnet-4-6"
+subagent_token_budget = 90_000
 [timeouts]
 api = 300
 command = 30
-[goals]
-completed_decay_messages = 5
 [analytical_brain]
 max_tokens = 4096
 blocking_on_user_message = true
@@ -313,6 +316,27 @@ default_view_mode = "basic"
 name_generation_interval = 30
 ```
+**TUI settings** (`~/.anima/tui.toml`):
+```toml
+[connection]
+default_host = "localhost:42134"    # Override per-launch with --host
+[chat]
+scroll_step = 1
+viewport_back_buffer = 3
+[theme]
+rate_limit_warning = 70             # Yellow at 70%
+rate_limit_critical = 90            # Red at 90%
+user_message_bg = 22                # 256-color: dark green
+assistant_message_bg = 17           # 256-color: dark navy
+scrollbar_thumb = "cyan"
+border_focused = "yellow"
+```
+The TUI is a standalone client with zero Rails dependency. Its settings cover connection tuning, scroll behavior, terminal watchdog, theme colors, and performance logging. See `~/.anima/tui.toml` for all available options.
 ## Design
 ### Three Layers (mirroring biology)
@@ -341,7 +365,7 @@ Events flow through two channels:
 1. **In-process** — Rails Structured Event Reporter (local subscribers like Persister)
 2. **Over the wire** — Action Cable WebSocket (`Event::Broadcasting` callbacks push to connected TUI clients)
-Events fire, subscribers react, state updates. The system prompt — soul, active skills, active workflow, current goals — is assembled fresh for each LLM call from live state, not from the event stream. The agent's identity (soul.md) and capabilities (skills, workflows) are always current, never stale.
+Events fire, subscribers react, state updates. The system prompt — soul and current goals — is assembled fresh for each LLM call from live state, not from the event stream. Skills and workflows flow through the message stream as phantom tool pairs, keeping the system prompt stable for prompt caching. The agent's identity (soul.md) is always current, never stale.
 ### Context as Viewport, Not Tape
@@ -351,7 +375,7 @@ The viewport is a live query, not a log. It walks events newest-first until the
 This means sessions are endless. No compaction. No lossy rewriting. The model always operates in fresh, high-quality context. The [dumb zone](https://github.com/humanlayer/advanced-context-engineering-for-coding-agents/blob/main/ace-fca.md) never arrives. Meanwhile, Mneme runs as a background department — summarizing evicted events into persistent snapshots so past context is preserved, not destroyed.
-Sub-agent viewports compose from two event scopes — their own events (prioritized) and parent events (filling remaining budget). Same mechanism, no special handling. The bus is the architecture.
+Sub-agent viewports use the same mechanism — their own events only, no parent context inheritance. The parent provides context through the task description, and the sub-agent builds its own conversation from a clean slate.
 ### Brain as Microservices on a Shared Event Bus
@@ -408,6 +432,30 @@ The right-side HUD panel shows session state at a glance: session name, goals (w
 **Braille spinner**: An animated braille character (U+2800-U+28FF) replaces the old "Thinking..." label in both the chat viewport and HUD. Each processing state has a distinct animation pattern — smooth snake rotation for LLM generation, staccato pulse for tool execution, rapid deceleration for interrupting. Sub-agents in the HUD show state-driven icons: `●` (generating, green), `◉` (tool executing, green), `●` (interrupting, red), `◌` (idle, grey).
+**Token Economy HUD**: A fixed panel at the bottom of the HUD displays API economics extracted from every Anthropic response:
+```
+╭ 📊 Token Economy ────────────────────╮
+│  5h ░░░░░░░░  1% ➞3h42m              │
+│  7d ▓▓▓▓▓▓▓▓ 98%                     │
+│  ⚡ ▓▓▓▓▓▓░░ 69%                     │
+│  💾 6.3K tokens                      │
+│     ⠛⣿⣷⣶⣿⣿⣿⣿⣷⣶⣿⣿⣿           │
+│  🟢 Verbose                          │
+╰──────────────────────────────────────╯
+```
+| Row | Description |
+|-----|-------------|
+| `5h` | 5-hour rate limit utilization with progress bar and reset countdown |
+| `7d` | 7-day rate limit utilization with progress bar |
+| `⚡` | Cache hit rate — percentage of input tokens served from cache |
+| `💾` | Cumulative tokens saved by cache hits |
+| `⠛⣿` | Braille sparkline — per-call cache hit history (2 calls per character); drops signal cache busts |
+| `🟢` | Connection status and current view mode |
+Progress bars are color-coded: green (< 70%), yellow (70-89%), red (>= 90%) for rate limits; inverted for cache hits (green >= 70%, red < 30%). All data comes from Anthropic API response headers and usage objects, broadcast as message metadata via ActionCable.
 When content exceeds the panel height, the HUD scrolls. Three input methods:
 | Input | Action |
@@ -609,7 +657,7 @@ This single example demonstrates every core principle:
 - Mneme memory department (eviction-triggered summarization, persistent snapshots, goal-scoped event pinning, associative recall)
 - 12 built-in tools + MCP integration (HTTP + stdio transports)
 - 7 built-in skills + 13 built-in workflows (user-extensible)
-- Sub-agents with lossless context inheritance (5 specialists + generic)
+- Sub-agents with isolated context (5 specialists + generic)
 - Client-server architecture with WebSocket transport + graceful reconnection
 - Collapsible HUD panel with goals, skills, workflow, and sub-agent tracking
 - Three TUI view modes (Basic / Verbose / Debug)

data/app/decorators/tool_call_decorator.rb CHANGED Viewed

@@ -105,10 +105,10 @@ class ToolCallDecorator < MessageDecorator
   # Formats write tool input with file path header and content body.
   # Content newlines are preserved so the TUI can render them as
   # separate lines, matching how read_file tool responses display file content.
-  # @param input [Hash] tool input hash with "file_path" and "content" keys
+  # @param input [Hash] tool input hash with "path" and "content" keys
   # @return [String] path + content with real newlines, or TOON-encoded hash when content is empty
   def format_write_content(input)
-    path = input.dig("file_path").to_s
+    path = input.dig("path").to_s
     content = input.dig("content").to_s
     return Toon.encode(input) if content.empty?
@@ -126,7 +126,7 @@ class ToolCallDecorator < MessageDecorator
     when "web_get"
       "GET #{input&.dig("url")}"
     when "read_file", "edit_file", "write_file"
-      input&.dig("file_path").to_s
+      input&.dig("path").to_s
     else
       truncate_lines(Toon.encode(input), max_lines: 2)
     end

data/app/jobs/agent_request_job.rb CHANGED Viewed

@@ -66,10 +66,10 @@ class AgentRequestJob < ApplicationJob
       agent_loop.run
     end
-    # Process any pending messages queued while we were busy.
+    # Process any pending messages that arrived after the last tool round.
     loop do
       promoted = session.promote_pending_messages!
-      break if promoted == 0
+      break if promoted[:texts].empty? && promoted[:pairs].empty?
       agent_loop.run
     end

data/app/jobs/passive_recall_job.rb CHANGED Viewed

@@ -1,11 +1,11 @@
 # frozen_string_literal: true
 # Runs passive recall after goal updates — searches message history for
-# context relevant to active goals and caches results on the session
-# for viewport injection.
+# context relevant to active goals and injects phantom tool_call/tool_response
+# pairs into the session's message stream.
 #
-# Idempotent: multiple enqueues for the same session safely overwrite
-# each other's results — last one wins.
+# Phantom pairs ride the conveyor belt like regular messages, getting
+# cached, evicted, and compressed by Mneme naturally.
 #
 # @example
 #   PassiveRecallJob.perform_later(session.id)
@@ -17,13 +17,8 @@ class PassiveRecallJob < ApplicationJob
   # @param session_id [Integer]
   def perform(session_id)
     session = Session.find(session_id)
-    results = Mneme::PassiveRecall.new(session).call
+    count = Mneme::PassiveRecall.new(session).call
-    if results.any?
-      session.update_column(:recalled_message_ids, results.map(&:message_id))
-      Mneme.logger.info("session=#{session_id} — passive recall found #{results.size} memories")
-    elsif session.recalled_message_ids.present?
-      session.update_column(:recalled_message_ids, [])
-    end
+    Mneme.logger.info("session=#{session_id} — passive recall injected #{count} phantom pairs") if count > 0
   end
 end

data/app/models/concerns/message/broadcasting.rb CHANGED Viewed

@@ -68,6 +68,7 @@ module Message::Broadcasting
     mode = session.view_mode
     decorator = MessageDecorator.for(self)
     broadcast_payload = payload.merge("id" => id, "action" => action)
+    broadcast_payload["api_metrics"] = api_metrics if api_metrics.present?
     if decorator
       broadcast_payload["rendered"] = {mode => decorator.render(mode)}

data/app/models/goal.rb CHANGED Viewed

@@ -31,7 +31,8 @@ class Goal < ApplicationRecord
   scope :not_evicted, -> { where(evicted_at: nil) }
   # @!method self.evictable
-  #   Completed goals pending eviction — visible to the brain for age-based review.
+  #   Completed goals not yet evicted — their phantom pairs remain in the
+  #   sliding window until Mneme compresses them during the eviction cycle.
   #   @return [ActiveRecord::Relation]
   scope :evictable, -> { completed.where(evicted_at: nil) }

data/app/models/message.rb CHANGED Viewed

@@ -28,8 +28,6 @@ class Message < ApplicationRecord
   CONTEXT_TYPES = %w[system_message user_message agent_message tool_call tool_response].freeze
   CONVERSATION_TYPES = %w[user_message agent_message system_message].freeze
   THINK_TOOL = "think"
-  SPAWN_TOOLS = %w[spawn_subagent spawn_specialist].freeze
   # Message types that require a tool_use_id to pair call with response.
   TOOL_TYPES = %w[tool_call tool_response].freeze
@@ -71,17 +69,6 @@ class Message < ApplicationRecord
   #   @return [ActiveRecord::Relation]
   scope :context_messages, -> { where(message_type: CONTEXT_TYPES) }
-  # @!method self.excluding_spawn_messages
-  #   Excludes spawn_subagent/spawn_specialist tool_call and tool_response messages.
-  #   Used when building parent context for sub-agents — spawn messages cause role
-  #   confusion because the sub-agent sees sibling spawn results and mistakes
-  #   itself for the parent.
-  #   @return [ActiveRecord::Relation]
-  scope :excluding_spawn_messages, -> {
-    where.not("message_type IN (?) AND json_extract(payload, '$.tool_name') IN (?)",
-      TOOL_TYPES, SPAWN_TOOLS)
-  }
   # Maps message_type to the Anthropic Messages API role.
   # @return [String] "user" or "assistant"
   def api_role

data/app/models/pending_message.rb CHANGED Viewed

@@ -1,27 +1,175 @@
 # frozen_string_literal: true
-# A user message waiting to enter a session's conversation history.
+# A message waiting to enter a session's conversation history.
 # Pending messages live in their own table — they are NOT part of the
 # message stream and have no database ID that could interleave with
 # tool_call/tool_response pairs.
 #
-# Created when a user sends a message while the session is processing.
+# Created when a message arrives while the session is processing.
 # Promoted to a real {Message} (delete + create in transaction) when
 # the current agent loop completes, giving the new message an ID that
 # naturally follows the tool batch.
 #
+# Each pending message knows its source (+source_type+, +source_name+)
+# and how to serialize itself for the LLM conversation via {#to_llm_messages}.
+# Non-user messages (sub-agent results, recalled skills, workflows, recall,
+# goal events) become synthetic tool_use/tool_result pairs so the LLM sees
+# "a tool I invoked returned a result" rather than "a user wrote me."
+#
 # @see Session#enqueue_user_message
 # @see Session#promote_pending_messages!
 class PendingMessage < ApplicationRecord
+  # Synthetic tool names used in tool_use/tool_result pairs injected into
+  # the parent LLM conversation when non-user messages are promoted.
+  # These tools don't exist in the agent's registry — the agent sees
+  # them as its own past actions (phantom tool calls).
+  SUBAGENT_TOOL = "subagent_message"
+  RECALL_SKILL_TOOL = "recall_skill"
+  RECALL_WORKFLOW_TOOL = "recall_workflow"
+  RECALL_MEMORY_TOOL = "recall_memory"
+  RECALL_GOAL_TOOL = "recall_goal"
+  # Source types that produce phantom tool_use/tool_result pairs on promotion.
+  # User messages produce plain text blocks instead.
+  PHANTOM_PAIR_TYPES = %w[subagent skill workflow recall goal].freeze
+  # Maps each phantom pair source type to its synthetic tool name.
+  PHANTOM_TOOL_NAMES = {
+    "subagent" => SUBAGENT_TOOL,
+    "skill" => RECALL_SKILL_TOOL,
+    "workflow" => RECALL_WORKFLOW_TOOL,
+    "recall" => RECALL_MEMORY_TOOL,
+    "goal" => RECALL_GOAL_TOOL
+  }.freeze
+  # Maps each phantom pair source type to a lambda building its tool input.
+  PHANTOM_TOOL_INPUTS = {
+    "subagent" => ->(name) { {from: name} },
+    "skill" => ->(name) { {skill: name} },
+    "workflow" => ->(name) { {workflow: name} },
+    "recall" => ->(name) { {message_id: name.to_i} },
+    "goal" => ->(name) { {goal_id: name.to_i} }
+  }.freeze
   belongs_to :session
   validates :content, presence: true
+  validates :source_type, inclusion: {in: %w[user subagent skill workflow recall goal]}
+  validates :source_name, presence: true, unless: :user?
   after_create_commit :broadcast_created
   after_destroy_commit :broadcast_removed
+  # @return [Boolean] true when this is a plain user message
+  def user?
+    source_type == "user"
+  end
+  # @return [Boolean] true when this message originated from a sub-agent
+  def subagent?
+    source_type == "subagent"
+  end
+  # @return [Boolean] true when this message carries recalled skill content
+  def skill?
+    source_type == "skill"
+  end
+  # @return [Boolean] true when this message carries recalled workflow content
+  def workflow?
+    source_type == "workflow"
+  end
+  # @return [Boolean] true when this message is an associative recall phantom pair
+  def recall?
+    source_type == "recall"
+  end
+  # @return [Boolean] true when this message carries a goal event
+  def goal?
+    source_type == "goal"
+  end
+  # @return [Boolean] true when promotion produces phantom tool_use/tool_result pairs
+  def phantom_pair?
+    source_type.in?(PHANTOM_PAIR_TYPES)
+  end
+  # Phantom tool name for DB persistence and LLM injection.
+  # Each phantom pair source type maps to a synthetic tool name.
+  #
+  # @return [String] phantom tool name
+  def phantom_tool_name
+    PHANTOM_TOOL_NAMES.fetch(source_type)
+  end
+  # Phantom tool input hash for DB persistence and LLM injection.
+  #
+  # @return [Hash] tool input hash
+  def phantom_tool_input
+    PHANTOM_TOOL_INPUTS.fetch(source_type).call(source_name)
+  end
+  # Content formatted for display and history persistence.
+  # Sub-agent messages include an attribution prefix. Skill/workflow
+  # messages include a recall label. User messages pass through unchanged.
+  #
+  # @return [String]
+  def display_content
+    case source_type
+    when "subagent"
+      format(Tools::ResponseTruncator::ATTRIBUTION_FORMAT, source_name, content)
+    when "skill"
+      "[recalled skill: #{source_name}]\n#{content}"
+    when "workflow"
+      "[recalled workflow: #{source_name}]\n#{content}"
+    when "goal"
+      "[goal #{source_name}]\n#{content}"
+    else
+      content
+    end
+  end
+  # Builds LLM message hashes for this pending message.
+  #
+  # Phantom pair types become synthetic tool_use/tool_result pairs so the
+  # LLM sees them as its own past invocations. User messages return plain
+  # content for injection as text blocks within the current tool_results turn.
+  #
+  # @return [Array<Hash>] synthetic tool pair for phantom pair types
+  # @return [String] raw content for user messages
+  def to_llm_messages
+    return content unless phantom_pair?
+    build_phantom_pair(phantom_tool_name, phantom_tool_input)
+  end
   private
+  # Builds a phantom tool_use/tool_result message pair.
+  # Follows the same format for all non-user source types — the only
+  # difference is the tool name and input hash.
+  #
+  # Phantom pairs keep the system prompt stable for prompt caching (#395).
+  # Instead of injecting skills/workflows into the system prompt (which
+  # busts the cache on every change), they flow through the sliding window
+  # as messages the LLM "recalls" via phantom tool invocations.
+  #
+  # @param tool_name [String] phantom tool name (not in the agent's registry)
+  # @param input [Hash] tool input hash
+  # @return [Array<Hash>] two-element array: assistant tool_use + user tool_result
+  def build_phantom_pair(tool_name, input)
+    tool_use_id = "#{tool_name}_#{id}"
+    [
+      {role: "assistant", content: [
+        {type: "tool_use", id: tool_use_id, name: tool_name, input: input}
+      ]},
+      {role: "user", content: [
+        {type: "tool_result", tool_use_id: tool_use_id, content: content}
+      ]}
+    ]
+  end
   # Broadcasts a pending message appearance so TUI clients render the
   # dimmed indicator immediately.
   def broadcast_created