RubyGems - swarm_sdk - Versions diffs - 2.7.14 → 3.0.0.alpha2 - Mend

swarm_sdk 2.7.14 → 3.0.0.alpha2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (185) hide show

checksums.yaml +4 -4
data/lib/swarm_sdk/ruby_llm_patches/chat_callbacks_patch.rb +16 -0
data/lib/swarm_sdk/ruby_llm_patches/init.rb +4 -1
data/lib/swarm_sdk/v3/agent.rb +1165 -0
data/lib/swarm_sdk/v3/agent_builder.rb +533 -0
data/lib/swarm_sdk/v3/agent_definition.rb +330 -0
data/lib/swarm_sdk/v3/configuration.rb +490 -0
data/lib/swarm_sdk/v3/debug_log.rb +86 -0
data/lib/swarm_sdk/v3/event_stream.rb +130 -0
data/lib/swarm_sdk/v3/hooks/context.rb +112 -0
data/lib/swarm_sdk/v3/hooks/result.rb +115 -0
data/lib/swarm_sdk/v3/hooks/runner.rb +128 -0
data/lib/swarm_sdk/v3/mcp/connector.rb +183 -0
data/lib/swarm_sdk/v3/mcp/mcp_error.rb +15 -0
data/lib/swarm_sdk/v3/mcp/server_definition.rb +125 -0
data/lib/swarm_sdk/v3/mcp/ssl_http_transport.rb +103 -0
data/lib/swarm_sdk/v3/mcp/stdio_transport.rb +135 -0
data/lib/swarm_sdk/v3/mcp/tool_proxy.rb +53 -0
data/lib/swarm_sdk/v3/memory/adapters/base.rb +297 -0
data/lib/swarm_sdk/v3/memory/adapters/faiss_support.rb +194 -0
data/lib/swarm_sdk/v3/memory/adapters/filesystem_adapter.rb +212 -0
data/lib/swarm_sdk/v3/memory/adapters/sqlite_adapter.rb +507 -0
data/lib/swarm_sdk/v3/memory/adapters/vector_utils.rb +88 -0
data/lib/swarm_sdk/v3/memory/card.rb +206 -0
data/lib/swarm_sdk/v3/memory/cluster.rb +146 -0
data/lib/swarm_sdk/v3/memory/compressor.rb +496 -0
data/lib/swarm_sdk/v3/memory/consolidator.rb +427 -0
data/lib/swarm_sdk/v3/memory/context_builder.rb +339 -0
data/lib/swarm_sdk/v3/memory/edge.rb +105 -0
data/lib/swarm_sdk/v3/memory/embedder.rb +185 -0
data/lib/swarm_sdk/v3/memory/exposure_tracker.rb +104 -0
data/lib/swarm_sdk/v3/memory/ingestion_pipeline.rb +394 -0
data/lib/swarm_sdk/v3/memory/retriever.rb +289 -0
data/lib/swarm_sdk/v3/memory/store.rb +489 -0
data/lib/swarm_sdk/v3/skills/loader.rb +147 -0
data/lib/swarm_sdk/v3/skills/manifest.rb +45 -0
data/lib/swarm_sdk/v3/sub_task_agent.rb +248 -0
data/lib/swarm_sdk/v3/tools/base.rb +80 -0
data/lib/swarm_sdk/v3/tools/bash.rb +174 -0
data/lib/swarm_sdk/v3/tools/clock.rb +32 -0
data/lib/swarm_sdk/v3/tools/document_converters/base.rb +84 -0
data/lib/swarm_sdk/v3/tools/document_converters/docx_converter.rb +120 -0
data/lib/swarm_sdk/v3/tools/document_converters/pdf_converter.rb +111 -0
data/lib/swarm_sdk/v3/tools/document_converters/xlsx_converter.rb +128 -0
data/lib/swarm_sdk/v3/tools/edit.rb +111 -0
data/lib/swarm_sdk/v3/tools/glob.rb +96 -0
data/lib/swarm_sdk/v3/tools/grep.rb +200 -0
data/lib/swarm_sdk/v3/tools/message_teammate.rb +15 -0
data/lib/swarm_sdk/v3/tools/message_user.rb +15 -0
data/lib/swarm_sdk/v3/tools/read.rb +213 -0
data/lib/swarm_sdk/v3/tools/read_tracker.rb +40 -0
data/lib/swarm_sdk/v3/tools/registry.rb +208 -0
data/lib/swarm_sdk/v3/tools/sub_task.rb +183 -0
data/lib/swarm_sdk/v3/tools/think.rb +88 -0
data/lib/swarm_sdk/v3/tools/write.rb +87 -0
data/lib/swarm_sdk/v3.rb +145 -0
metadata +88 -149
data/lib/swarm_sdk/agent/RETRY_LOGIC.md +0 -175
data/lib/swarm_sdk/agent/builder.rb +0 -705
data/lib/swarm_sdk/agent/chat.rb +0 -1438
data/lib/swarm_sdk/agent/chat_helpers/context_tracker.rb +0 -375
data/lib/swarm_sdk/agent/chat_helpers/event_emitter.rb +0 -204
data/lib/swarm_sdk/agent/chat_helpers/hook_integration.rb +0 -480
data/lib/swarm_sdk/agent/chat_helpers/instrumentation.rb +0 -85
data/lib/swarm_sdk/agent/chat_helpers/llm_configuration.rb +0 -290
data/lib/swarm_sdk/agent/chat_helpers/logging_helpers.rb +0 -116
data/lib/swarm_sdk/agent/chat_helpers/serialization.rb +0 -83
data/lib/swarm_sdk/agent/chat_helpers/system_reminder_injector.rb +0 -134
data/lib/swarm_sdk/agent/chat_helpers/system_reminders.rb +0 -79
data/lib/swarm_sdk/agent/chat_helpers/token_tracking.rb +0 -146
data/lib/swarm_sdk/agent/context.rb +0 -115
data/lib/swarm_sdk/agent/context_manager.rb +0 -315
data/lib/swarm_sdk/agent/definition.rb +0 -588
data/lib/swarm_sdk/agent/llm_instrumentation_middleware.rb +0 -226
data/lib/swarm_sdk/agent/system_prompt_builder.rb +0 -173
data/lib/swarm_sdk/agent/tool_registry.rb +0 -189
data/lib/swarm_sdk/agent_registry.rb +0 -146
data/lib/swarm_sdk/builders/base_builder.rb +0 -558
data/lib/swarm_sdk/claude_code_agent_adapter.rb +0 -205
data/lib/swarm_sdk/concerns/cleanupable.rb +0 -42
data/lib/swarm_sdk/concerns/snapshotable.rb +0 -67
data/lib/swarm_sdk/concerns/validatable.rb +0 -55
data/lib/swarm_sdk/config.rb +0 -368
data/lib/swarm_sdk/configuration/parser.rb +0 -397
data/lib/swarm_sdk/configuration/translator.rb +0 -285
data/lib/swarm_sdk/configuration.rb +0 -165
data/lib/swarm_sdk/context_compactor/metrics.rb +0 -147
data/lib/swarm_sdk/context_compactor/token_counter.rb +0 -102
data/lib/swarm_sdk/context_compactor.rb +0 -335
data/lib/swarm_sdk/context_management/builder.rb +0 -128
data/lib/swarm_sdk/context_management/context.rb +0 -328
data/lib/swarm_sdk/custom_tool_registry.rb +0 -226
data/lib/swarm_sdk/defaults.rb +0 -251
data/lib/swarm_sdk/events_to_messages.rb +0 -199
data/lib/swarm_sdk/hooks/adapter.rb +0 -359
data/lib/swarm_sdk/hooks/context.rb +0 -197
data/lib/swarm_sdk/hooks/definition.rb +0 -80
data/lib/swarm_sdk/hooks/error.rb +0 -29
data/lib/swarm_sdk/hooks/executor.rb +0 -146
data/lib/swarm_sdk/hooks/registry.rb +0 -147
data/lib/swarm_sdk/hooks/result.rb +0 -150
data/lib/swarm_sdk/hooks/shell_executor.rb +0 -256
data/lib/swarm_sdk/hooks/tool_call.rb +0 -35
data/lib/swarm_sdk/hooks/tool_result.rb +0 -62
data/lib/swarm_sdk/log_collector.rb +0 -227
data/lib/swarm_sdk/log_stream.rb +0 -127
data/lib/swarm_sdk/markdown_parser.rb +0 -75
data/lib/swarm_sdk/model_aliases.json +0 -8
data/lib/swarm_sdk/models.json +0 -44002
data/lib/swarm_sdk/models.rb +0 -161
data/lib/swarm_sdk/node_context.rb +0 -245
data/lib/swarm_sdk/observer/builder.rb +0 -81
data/lib/swarm_sdk/observer/config.rb +0 -45
data/lib/swarm_sdk/observer/manager.rb +0 -248
data/lib/swarm_sdk/patterns/agent_observer.rb +0 -160
data/lib/swarm_sdk/permissions/config.rb +0 -239
data/lib/swarm_sdk/permissions/error_formatter.rb +0 -121
data/lib/swarm_sdk/permissions/path_matcher.rb +0 -35
data/lib/swarm_sdk/permissions/validator.rb +0 -173
data/lib/swarm_sdk/permissions_builder.rb +0 -122
data/lib/swarm_sdk/plugin.rb +0 -309
data/lib/swarm_sdk/plugin_registry.rb +0 -101
data/lib/swarm_sdk/proc_helpers.rb +0 -53
data/lib/swarm_sdk/prompts/base_system_prompt.md.erb +0 -119
data/lib/swarm_sdk/restore_result.rb +0 -65
data/lib/swarm_sdk/result.rb +0 -241
data/lib/swarm_sdk/snapshot.rb +0 -156
data/lib/swarm_sdk/snapshot_from_events.rb +0 -397
data/lib/swarm_sdk/state_restorer.rb +0 -476
data/lib/swarm_sdk/state_snapshot.rb +0 -334
data/lib/swarm_sdk/swarm/agent_initializer.rb +0 -648
data/lib/swarm_sdk/swarm/all_agents_builder.rb +0 -204
data/lib/swarm_sdk/swarm/builder.rb +0 -256
data/lib/swarm_sdk/swarm/executor.rb +0 -446
data/lib/swarm_sdk/swarm/hook_triggers.rb +0 -162
data/lib/swarm_sdk/swarm/lazy_delegate_chat.rb +0 -372
data/lib/swarm_sdk/swarm/logging_callbacks.rb +0 -361
data/lib/swarm_sdk/swarm/mcp_configurator.rb +0 -290
data/lib/swarm_sdk/swarm/swarm_registry_builder.rb +0 -67
data/lib/swarm_sdk/swarm/tool_configurator.rb +0 -392
data/lib/swarm_sdk/swarm.rb +0 -973
data/lib/swarm_sdk/swarm_loader.rb +0 -145
data/lib/swarm_sdk/swarm_registry.rb +0 -136
data/lib/swarm_sdk/tools/base.rb +0 -63
data/lib/swarm_sdk/tools/bash.rb +0 -280
data/lib/swarm_sdk/tools/clock.rb +0 -46
data/lib/swarm_sdk/tools/delegate.rb +0 -389
data/lib/swarm_sdk/tools/document_converters/base_converter.rb +0 -83
data/lib/swarm_sdk/tools/document_converters/docx_converter.rb +0 -99
data/lib/swarm_sdk/tools/document_converters/html_converter.rb +0 -101
data/lib/swarm_sdk/tools/document_converters/pdf_converter.rb +0 -78
data/lib/swarm_sdk/tools/document_converters/xlsx_converter.rb +0 -194
data/lib/swarm_sdk/tools/edit.rb +0 -145
data/lib/swarm_sdk/tools/glob.rb +0 -166
data/lib/swarm_sdk/tools/grep.rb +0 -235
data/lib/swarm_sdk/tools/image_extractors/docx_image_extractor.rb +0 -43
data/lib/swarm_sdk/tools/image_extractors/pdf_image_extractor.rb +0 -167
data/lib/swarm_sdk/tools/image_formats/tiff_builder.rb +0 -65
data/lib/swarm_sdk/tools/mcp_tool_stub.rb +0 -198
data/lib/swarm_sdk/tools/multi_edit.rb +0 -236
data/lib/swarm_sdk/tools/path_resolver.rb +0 -92
data/lib/swarm_sdk/tools/read.rb +0 -261
data/lib/swarm_sdk/tools/registry.rb +0 -205
data/lib/swarm_sdk/tools/scratchpad/scratchpad_list.rb +0 -117
data/lib/swarm_sdk/tools/scratchpad/scratchpad_read.rb +0 -97
data/lib/swarm_sdk/tools/scratchpad/scratchpad_write.rb +0 -108
data/lib/swarm_sdk/tools/stores/read_tracker.rb +0 -96
data/lib/swarm_sdk/tools/stores/scratchpad_storage.rb +0 -273
data/lib/swarm_sdk/tools/stores/storage.rb +0 -142
data/lib/swarm_sdk/tools/stores/todo_manager.rb +0 -65
data/lib/swarm_sdk/tools/think.rb +0 -100
data/lib/swarm_sdk/tools/todo_write.rb +0 -237
data/lib/swarm_sdk/tools/web_fetch.rb +0 -264
data/lib/swarm_sdk/tools/write.rb +0 -112
data/lib/swarm_sdk/transcript_builder.rb +0 -278
data/lib/swarm_sdk/utils.rb +0 -68
data/lib/swarm_sdk/validation_result.rb +0 -33
data/lib/swarm_sdk/version.rb +0 -5
data/lib/swarm_sdk/workflow/agent_config.rb +0 -95
data/lib/swarm_sdk/workflow/builder.rb +0 -227
data/lib/swarm_sdk/workflow/executor.rb +0 -497
data/lib/swarm_sdk/workflow/node_builder.rb +0 -593
data/lib/swarm_sdk/workflow/transformer_executor.rb +0 -250
data/lib/swarm_sdk/workflow.rb +0 -589
data/lib/swarm_sdk.rb +0 -721

data/lib/swarm_sdk/v3/agent.rb ADDED Viewed

@@ -0,0 +1,1165 @@
+# frozen_string_literal: true
+module SwarmSDK
+  module V3
+    # Runtime agent with built-in memory
+    #
+    # The Agent ties together RubyLLM::Chat, the memory system, and tools.
+    # The LLM's context window is a staging area — older turns get consolidated
+    # into memory cards, and retrieval brings relevant memory back on demand.
+    #
+    # ## Interruption Safety
+    #
+    # The agent supports safe interruption via {#interrupt!}, which raises
+    # `Async::Stop` in the fiber running {#ask}. When adding new features
+    # to the agent, follow these rules:
+    #
+    # 1. **Interruptible phases** (LLM calls, tool execution, streaming):
+    #    Code here can be interrupted at any fiber yield point. Do NOT leave
+    #    shared state half-updated — use snapshot/restore or flags to detect
+    #    incomplete operations in `ensure` blocks.
+    #
+    # 2. **Uninterruptible phases** (memory writes, STM capture, eviction):
+    #    Wrap in `Async::Task.current.defer_stop { }` to defer `Async::Stop`
+    #    until the block completes. Use this for any multi-step I/O that must
+    #    be atomic (e.g., writing cards + saving the index).
+    #
+    # 3. **New instance state**: If a new feature adds state that is modified
+    #    during {#ask}, ensure it is either rolled back on interruption (via
+    #    `ensure`) or protected with `defer_stop`.
+    #
+    # 4. **Subprocesses**: Any code that spawns a subprocess (Open3, etc.)
+    #    must terminate it in an `ensure` block. `Async::Stop` bypasses
+    #    `rescue StandardError` — only `ensure` is guaranteed to run.
+    #
+    # @see #interrupt!
+    # @see #execute_turn
+    #
+    # @example Basic usage
+    #   definition = AgentDefinition.new(
+    #     name: :assistant,
+    #     description: "A helpful assistant",
+    #     model: "claude-sonnet-4",
+    #     tools: [:Read, :Write, :Edit, :Bash, :Grep, :Glob],
+    #     memory_directory: ".swarm/memory",
+    #   )
+    #
+    #   agent = Agent.new(definition)
+    #   response = agent.ask("Build a login page")
+    #
+    # @example Without memory
+    #   definition = AgentDefinition.new(
+    #     name: :chat,
+    #     description: "Simple chat",
+    #   )
+    #   agent = Agent.new(definition)
+    #   response = agent.ask("Hello!")
+    #
+    # @example Interrupting a running agent
+    #   Async do |parent|
+    #     task = parent.async { agent.ask("Long running task") }
+    #     agent.running?      # => true
+    #     agent.interrupt!    # => true
+    #     result = task.wait  # => nil
+    #     agent.running?      # => false
+    #   end
+    class Agent
+      # @return [AgentDefinition] Immutable agent configuration
+      attr_reader :definition
+      # @return [String] Unique instance identifier (name_<hex>)
+      attr_reader :id
+      # @return [Array<Skills::Manifest>] Loaded skills (available after first ask)
+      attr_reader :loaded_skills
+      # Create a new agent
+      #
+      # Lazy-initializes the RubyLLM::Chat and memory system on first ask().
+      #
+      # @param definition [AgentDefinition] Agent configuration
+      def initialize(definition)
+        @definition = definition
+        @id = "#{definition.name}_#{SecureRandom.hex(3)}"
+        @chat = nil
+        @memory_store = nil
+        @stm_buffer = []
+        @turn_counter = 0
+        @total_input_tokens = 0
+        @total_output_tokens = 0
+        @initialized = false
+        @semaphore = Async::Semaphore.new(1)
+        @current_task = nil
+        @pending_ingestion = nil
+        @steering_queue = []
+        @loaded_skills = nil
+        @base_system_prompt = nil
+        @mcp_connectors = []
+        @hooks = Hooks::Runner.new(definition.hooks)
+      end
+      # Send a message to the agent and get a response
+      #
+      # The ask() flow:
+      # 1. Lazy-initialize (create chat, memory, tools)
+      # 2. Retrieve relevant memory cards for the prompt
+      # 3. Build working context (system prompt + memory + recent turns)
+      # 4. Execute via RubyLLM::Chat (handles tool loop internally)
+      # 5. Capture turn in STM buffer
+      # 6. Ingest turn into memory (async)
+      # 7. Evict old turns from STM if buffer exceeds limit
+      # 8. Emit events
+      #
+      # Supports safe interruption via {#interrupt!}. When interrupted,
+      # returns nil and leaves the agent in a consistent state for the
+      # next ask() call. Check {#running?} to see if an ask is in progress.
+      #
+      # @param prompt [String] User message
+      # @param output_schema [Hash, Object, nil] Per-call schema override (nil = use definition default)
+      # @yield [event] Optional block receives ALL events (content_chunk, tool_call, etc.)
+      # @yieldparam event [Hash] Event hash with :type, :timestamp, and event-specific fields
+      # @return [RubyLLM::Message, nil] LLM response, or nil if interrupted
+      #
+      # @example Simple ask
+      #   response = agent.ask("What is 2+2?")
+      #   puts response.content
+      #
+      # @example Receive all events via block
+      #   agent.ask("Tell me a story") do |event|
+      #     case event[:type]
+      #     when "content_chunk"
+      #       print event[:content]
+      #     when "tool_call"
+      #       puts "Calling #{event[:tool]}..."
+      #     end
+      #   end
+      #
+      # @example With structured output
+      #   schema = { type: "object", properties: { answer: { type: "integer" } } }
+      #   response = agent.ask("What is 2+2?", output_schema: schema)
+      #   response.content  # => { "answer" => 4 }
+      def ask(prompt, output_schema: nil, &block)
+        with_block_emitter(block) do
+          Sync do |task|
+            @semaphore.acquire do
+              @current_task = task
+              begin
+                lazy_initialize!
+                before_result = @hooks.run(:before_ask, Hooks::Context.new(
+                  event: :before_ask, agent_name: @definition.name, prompt: prompt,
+                ))
+                if before_result.halt?
+                  nil
+                else
+                  prompt = before_result.value if before_result.replace?
+                  response = execute_turn(prompt, output_schema: output_schema)
+                  @hooks.run(:after_ask, Hooks::Context.new(
+                    event: :after_ask, agent_name: @definition.name, prompt: prompt, response: response,
+                  ))
+                  @hooks.run(:on_stop, Hooks::Context.new(
+                    event: :on_stop, agent_name: @definition.name, response: response,
+                  ))
+                  response
+                end
+              rescue Async::Stop
+                EventStream.emit(type: "agent_interrupted", agent: @id, turn: @turn_counter)
+                nil
+              ensure
+                @current_task = nil
+              end
+            end
+          end
+        end
+      end
+      # Stop whatever the agent is doing
+      #
+      # Raises Async::Stop in the fiber running ask(). Safe to call from
+      # another Async fiber in the same reactor. Idempotent — returns nil
+      # if the agent is idle.
+      #
+      # @return [Boolean, nil] true if a running task was stopped, nil if idle
+      #
+      # @example Interrupt from another fiber
+      #   Async do |parent|
+      #     task = parent.async { agent.ask("Long task") }
+      #     sleep 1
+      #     agent.interrupt!  # => true
+      #     result = task.wait  # => nil
+      #   end
+      #
+      # @example No-op when idle
+      #   agent.interrupt!  # => nil
+      def interrupt!
+        return unless @current_task
+        @current_task.stop
+        true
+      end
+      # Whether the agent is currently executing an ask() call
+      #
+      # Returns true only while the agent holds the semaphore and is
+      # actively processing a turn. Useful for deciding whether to
+      # call {#interrupt!}.
+      #
+      # @return [Boolean]
+      #
+      # @example Guard an interrupt call
+      #   agent.interrupt! if agent.running?
+      def running?
+        !@current_task.nil?
+      end
+      # Queue a high-priority message that interrupts the current tool batch
+      #
+      # Steering messages are injected after the current tool completes,
+      # skipping any remaining tools in the batch. The message is delivered
+      # as a `role: user` message before the next LLM call.
+      #
+      # Use this for urgent interruptions that should preempt normal execution.
+      #
+      # @param message [String] Message content to inject
+      # @return [void]
+      #
+      # @example Inject an urgent update while the agent is working
+      #   agent.on_tool_result do |tool_call, result|
+      #     agent.steer("Priority update: server is down")
+      #   end
+      def steer(message)
+        @steering_queue << message
+      end
+      # Clear all queued steering messages
+      #
+      # @return [void]
+      def clear_steering_queue
+        @steering_queue.clear
+      end
+      # Get recent messages (STM buffer)
+      #
+      # @return [Array<Hash>] Recent conversation turns
+      def messages
+        @stm_buffer.dup
+      end
+      # Reset conversation and optionally clear memory
+      #
+      # @param clear_memory [Boolean] Also clear memory storage
+      # @return [void]
+      def clear(clear_memory: false)
+        wait_for_pending_ingestion
+        disconnect_mcp_servers
+        @stm_buffer.clear
+        @steering_queue.clear
+        @turn_counter = 0
+        @chat&.reset_messages!(preserve_system_prompt: true)
+        return unless clear_memory && @memory_store
+        # Clear memory by reinitializing the adapter
+        @memory_store = nil
+      end
+      # Token usage statistics
+      #
+      # @return [Hash] Input and output token counts
+      def tokens
+        { input: @total_input_tokens, output: @total_output_tokens }
+      end
+      # Agent name from definition
+      #
+      # @return [Symbol]
+      def name
+        @definition.name
+      end
+      # Whether the agent has been initialized
+      #
+      # @return [Boolean]
+      def initialized?
+        @initialized
+      end
+      # Read-only access to the memory store
+      #
+      # @return [Memory::Store, nil] Memory store or nil if not enabled
+      def memory
+        @memory_store
+      end
+      # Whether memory operations are read-only
+      #
+      # Subtask agents override this to return true, preventing
+      # access counter updates during context building.
+      #
+      # @return [Boolean]
+      def memory_read_only?
+        false
+      end
+      # Run memory defragmentation (compression, consolidation, promotion, pruning)
+      #
+      # Call this between sessions, on a schedule, or whenever appropriate.
+      # Does not run during conversation turns — the SDK user controls when.
+      #
+      # @yield [event] Optional block receives defrag events
+      # @yieldparam event [Hash] Event hash with :type, :timestamp, and event-specific fields
+      # @return [Hash, nil] Defragmentation results, or nil if memory not enabled
+      #
+      # @example Run defrag after a session
+      #   agent.defrag!
+      #   #=> { duplicates_merged: 0, conflicts_detected: 0,
+      #   #     cards_compressed: 3, cards_promoted: 1, cards_pruned: 0 }
+      #
+      # @example Run defrag with progress display
+      #   agent.defrag! do |event|
+      #     case event[:type]
+      #     when "memory_defrag_progress"
+      #       puts "#{event[:phase]}: #{event[:phase_current]}/#{event[:phase_total]}"
+      #     end
+      #   end
+      def defrag!(&block)
+        return unless @definition.memory_enabled?
+        with_block_emitter(block) do
+          Sync do
+            @semaphore.acquire do
+              lazy_initialize!
+              wait_for_pending_ingestion
+              @memory_store.defrag!
+            end
+          end
+        end
+      end
+      private
+      # Wrap execution with block emitter setup
+      #
+      # Temporarily sets the block emitter for the current fiber, ensuring
+      # it is restored to its previous value after the block completes.
+      # This allows nested ask() calls to work correctly.
+      #
+      # If no block is provided (nil), the current block_emitter is preserved.
+      # This allows SubTaskAgent.ask() to inherit the parent's block emitter.
+      #
+      # @param block [Proc, nil] Block to receive events, or nil to inherit current
+      # @yield Executes with block emitter set
+      # @return [Object] Return value from the yielded block
+      def with_block_emitter(block)
+        # If no block provided, don't change block_emitter - inherit parent's
+        return yield if block.nil?
+        previous = EventStream.block_emitter
+        EventStream.block_emitter = block
+        yield
+      ensure
+        EventStream.block_emitter = previous if block
+      end
+      # Wait for any pending background ingestion to complete
+      #
+      # Called before starting a new ingestion, before eviction writes,
+      # and before defrag to prevent concurrent adapter writes.
+      #
+      # @return [void]
+      def wait_for_pending_ingestion
+        return unless @pending_ingestion
+        @pending_ingestion.wait
+        @pending_ingestion = nil
+      end
+      # Lazy-initialize the agent on first ask()
+      #
+      # Creates RubyLLM::Chat, tools, and memory system.
+      #
+      # @return [void]
+      def lazy_initialize!
+        return if @initialized
+        @loaded_skills = load_skills
+        @base_system_prompt = build_base_system_prompt
+        @chat = create_chat
+        configure_chat
+        initialize_memory if @definition.memory_enabled?
+        connect_mcp_servers
+        attach_tools
+        @initialized = true
+        if @definition.output_schema && !@chat.model.structured_output?
+          raise ConfigurationError,
+            "Agent #{@id} has output_schema but model #{@definition.model} does not support structured output"
+        end
+        EventStream.emit(
+          type: "agent_initialized",
+          agent: @id,
+          model: @definition.model,
+          memory_enabled: @definition.memory_enabled?,
+          skills_loaded: @loaded_skills.size,
+        )
+      end
+      # Create RubyLLM::Chat instance
+      #
+      # When a custom base_url is specified on the definition, creates an
+      # isolated RubyLLM context so the provider connects to the right endpoint.
+      # base_url is a connection-level setting, not a request parameter.
+      #
+      # @return [RubyLLM::Chat]
+      def create_chat
+        opts = { model: @definition.model }
+        if @definition.provider
+          opts[:assume_model_exists] = true
+          opts[:provider] = @definition.provider.to_sym
+        end
+        if @definition.base_url
+          context = create_context_with_base_url(@definition.base_url, @definition.provider)
+          context.chat(**opts)
+        else
+          RubyLLM.chat(**opts)
+        end
+      end
+      # Configure LLM chat with definition settings
+      #
+      # Uses raw parameter passthrough — no abstractions like with_thinking.
+      # The SDK user controls exactly what goes to the API via parameters/headers.
+      # Streaming is handled natively by passing a block to chat.ask(),
+      # not via with_params(stream: ...).
+      # Registers event callbacks for tool_call/tool_result emissions.
+      #
+      # Responses API must be enabled first because {RubyLLM::Chat#with_responses_api}
+      # swaps the provider instance — subsequent configuration must see the final provider.
+      #
+      # @return [void]
+      def configure_chat
+        enable_responses_api if @definition.api_version == "v1/responses"
+        @chat.with_params(**@definition.parameters) unless @definition.parameters.empty?
+        @chat.with_headers(**@definition.headers) unless @definition.headers.empty?
+        if @base_system_prompt
+          @chat.with_instructions(cacheable_instructions(@base_system_prompt))
+        end
+        if @definition.max_concurrent_tools
+          @chat.with_tool_concurrency(:async, max: @definition.max_concurrent_tools)
+        end
+        register_event_callbacks
+        register_tool_callbacks
+      end
+      # Register RubyLLM callbacks for event emission
+      #
+      # Emits tool_call and tool_result events via EventStream so
+      # consumers can monitor agent activity in real time.
+      #
+      # @return [void]
+      def register_event_callbacks
+        agent_id = @id
+        @chat.on_tool_call do |tool_call|
+          EventStream.emit(
+            type: "tool_call",
+            agent: agent_id,
+            tool: tool_call.name,
+            arguments: tool_call.arguments,
+          )
+        end
+        @chat.on_tool_result do |_tool_call, result|
+          EventStream.emit(
+            type: "tool_result",
+            agent: agent_id,
+            result_preview: result.to_s[0..200],
+          )
+        end
+      end
+      # Register unified tool callbacks for hooks and steering
+      #
+      # Uses the RubyLLM patch ({RubyLLM::Chat::MultiSubscriberCallbacks})
+      # which provides `around_tool_execution` receiving (tool_call, tool_instance, execute_proc).
+      #
+      # CRITICAL: `around_tool_execution` is SINGLE-CALLBACK — each registration
+      # replaces the previous. This method combines user hooks and steering logic
+      # in one callback.
+      #
+      # Tool execution flow:
+      # 1. Check skip flag (set by steering queue drain)
+      # 2. Run before_tool hooks
+      # 3. Execute tool
+      # 4. Run after_tool hooks
+      # 5. Check steering queue and set skip flag for remaining tools
+      #
+      # After all tools complete, `after_tool_calls` drains the steering queue
+      # and injects it as a user message before the next LLM call.
+      #
+      # @return [void]
+      def register_tool_callbacks
+        # Capture references for closure (following existing pattern)
+        steering_queue = @steering_queue
+        skip_flag = { active: false } # Mutable holder for closure
+        hooks = @hooks
+        agent_name = @definition.name
+        agent_id = @id
+        chat = @chat
+        @chat.around_tool_execution do |tool_call, _tool_instance, execute_proc|
+          # 1. Check steering queue first (may have been populated during previous tool's execution)
+          #    If steering was injected, skip this and all remaining tools in the batch.
+          unless steering_queue.empty?
+            skip_flag[:active] = true
+          end
+          # 2. Skip if steering interrupted this batch
+          if skip_flag[:active]
+            EventStream.emit(type: "tool_skipped", agent: agent_id, tool: tool_call.name)
+            next "Skipped due to queued user message."
+          end
+          # 3. Before hook
+          if hooks.any_tool_hooks?
+            before_ctx = Hooks::Context.new(
+              event: :before_tool,
+              agent_name: agent_name,
+              tool_name: tool_call.name,
+              tool_arguments: tool_call.arguments.transform_keys(&:to_sym),
+            )
+            before_result = hooks.run(:before_tool, before_ctx)
+            next(before_result.value || "Hook blocked execution of #{tool_call.name}") if before_result.halt?
+          end
+          # 4. Execute tool
+          output = execute_proc.call
+          # 5. After hook
+          if hooks.any_tool_hooks?
+            after_ctx = Hooks::Context.new(
+              event: :after_tool,
+              agent_name: agent_name,
+              tool_name: tool_call.name,
+              tool_arguments: tool_call.arguments.transform_keys(&:to_sym),
+              tool_result: output,
+            )
+            after_result = hooks.run(:after_tool, after_ctx)
+            output = after_result.value if after_result.replace?
+          end
+          output
+        end
+        # Capture steering mode for closure
+        steering_mode = @definition.steering_mode
+        @chat.after_tool_calls do
+          skip_flag[:active] = false
+          next if steering_queue.empty?
+          # Drain based on mode
+          messages = if steering_mode == :one_at_a_time
+            [steering_queue.shift]
+          else
+            result = steering_queue.dup
+            steering_queue.clear
+            result
+          end
+          content = messages.join("\n\n")
+          EventStream.emit(type: "steering_injected", agent: agent_id, message_count: messages.size)
+          chat.add_message(role: :user, content: content)
+        end
+      end
+      # Drain steering messages queued while the agent was idle
+      #
+      # Covers the path where messages are steered between ask() calls
+      # (or before the first ask) — the `after_tool_calls` hook never
+      # fires when there are no tool calls.
+      #
+      # Respects steering_mode from definition:
+      # - :all - drain all messages at once (default)
+      # - :one_at_a_time - drain only the first message
+      #
+      # @return [void]
+      def drain_steering_queue
+        return if @steering_queue.empty?
+        messages = drain_queue_by_mode(@steering_queue, @definition.steering_mode)
+        return if messages.empty?
+        content = messages.join("\n\n")
+        EventStream.emit(type: "steering_injected", agent: @id, message_count: messages.size)
+        @chat.add_message(role: :user, content: content)
+      end
+      # Enable OpenAI Responses API on the chat instance
+      #
+      # Uses RubyLLM's built-in Responses API support which swaps the provider
+      # to {RubyLLM::Providers::OpenAIResponses}, routing requests to the
+      # /v1/responses endpoint instead of /v1/chat/completions.
+      #
+      # Stateful mode is enabled, which tracks response IDs across turns for
+      # automatic conversation threading with 5-minute TTL.
+      #
+      # @return [void]
+      def enable_responses_api
+        if @definition.base_url && !@definition.base_url.include?("api.openai.com")
+          DebugLog.log(
+            "agent",
+            "Responses API requested but using custom endpoint #{@definition.base_url}. " \
+              "Custom endpoints typically don't support /v1/responses.",
+          )
+        end
+        @chat.with_responses_api(stateful: true, store: true)
+        DebugLog.log("agent", "Enabled Responses API (v1/responses)")
+      end
+      # Load skill manifests from configured skill directories
+      #
+      # @return [Array<Skills::Manifest>] Discovered skill manifests
+      def load_skills
+        return [] if @definition.skills.empty?
+        Skills::Loader.scan(@definition.skills)
+      end
+      # Build the base system prompt with skills metadata appended
+      #
+      # If skills were discovered, appends the XML metadata block to the
+      # definition's system prompt. The agent uses the existing Read tool
+      # to load full skill instructions when activating a skill.
+      #
+      # @return [String, nil] System prompt with skills, or original prompt
+      def build_base_system_prompt
+        prompt = @definition.system_prompt
+        return prompt if @loaded_skills.empty?
+        skills_xml = Skills::Loader.format_xml(@loaded_skills)
+        [prompt, skills_xml].compact.join("\n\n")
+      end
+      # Attach tools to the chat
+      #
+      # Combines SDK-registered tools with MCP-discovered tools.
+      #
+      # @return [void]
+      def attach_tools
+        tool_instances = Tools::Registry.create_all(@definition, memory_store: @memory_store)
+        mcp_tool_instances = @mcp_connectors.flat_map(&:to_ruby_llm_tools)
+        all_tools = tool_instances + mcp_tool_instances
+        @chat.with_tools(*all_tools) unless all_tools.empty?
+      end
+      # Initialize the memory system
+      #
+      # Creates a dedicated background chat for memory operations
+      # (compression, segmentation) so LLM calls don't pollute the
+      # agent's main conversation history.
+      #
+      # @return [void]
+      def initialize_memory
+        adapter = resolve_memory_adapter
+        embedder = Memory::Embedder.new
+        background_chat = create_background_chat
+        @memory_store = Memory::Store.new(
+          adapter: adapter,
+          embedder: embedder,
+          chat: background_chat,
+          retrieval_top_k: @definition.memory_retrieval_top_k,
+          semantic_weight: @definition.memory_semantic_weight,
+          keyword_weight: @definition.memory_keyword_weight,
+          associative_memory: @definition.memory_associative,
+        )
+        @memory_store.load
+      end
+      # Resolve the memory adapter from definition
+      #
+      # Handles three cases:
+      # 1. Symbol (e.g., :sqlite, :filesystem) - creates appropriate adapter
+      # 2. Adapter instance - uses it directly
+      # 3. nil - falls back to filesystem adapter
+      #
+      # @return [Memory::Adapters::Base]
+      def resolve_memory_adapter
+        case @definition.memory_adapter
+        when Symbol
+          create_adapter_from_symbol(@definition.memory_adapter)
+        when Memory::Adapters::Base
+          @definition.memory_adapter
+        else
+          create_filesystem_adapter
+        end
+      end
+      # Create an adapter instance from a symbol type
+      #
+      # @param adapter_type [Symbol] Adapter type (:sqlite or :filesystem)
+      # @return [Memory::Adapters::Base]
+      # @raise [ArgumentError] If adapter_type is unknown
+      def create_adapter_from_symbol(adapter_type)
+        case adapter_type
+        when :sqlite
+          Memory::Adapters::SqliteAdapter.new(@definition.memory_directory)
+        when :filesystem
+          create_filesystem_adapter
+        else
+          raise ArgumentError, "Unknown memory adapter type: #{adapter_type.inspect}. " \
+            "Valid types are :sqlite and :filesystem."
+        end
+      end
+      # Create a filesystem adapter from definition
+      #
+      # @return [Memory::Adapters::FilesystemAdapter]
+      def create_filesystem_adapter
+        Memory::Adapters::FilesystemAdapter.new(@definition.memory_directory)
+      end
+      # Create a dedicated chat for background memory operations
+      #
+      # Uses background_model/provider/base_url from Configuration if set,
+      # otherwise falls back to the agent's model, provider, and base_url.
+      # This chat is separate from the agent's main chat, preventing
+      # compression/segmentation LLM calls from polluting the conversation history.
+      #
+      # @return [RubyLLM::Chat]
+      def create_background_chat
+        config = Configuration.instance
+        model = config.background_model || @definition.model
+        opts = { model: model }
+        provider = config.background_provider || @definition.provider
+        if provider
+          opts[:provider] = provider.to_sym
+          opts[:assume_model_exists] = true
+        end
+        # Fall back to agent's base_url if no background_base_url configured
+        base_url = config.background_base_url || @definition.base_url
+        if base_url
+          provider_name = provider || @definition.provider
+          context = create_context_with_base_url(base_url, provider_name)
+          context.chat(**opts)
+        else
+          RubyLLM.chat(**opts)
+        end
+      end
+      # Create a RubyLLM context with a custom API base URL
+      #
+      # Uses RubyLLM.context to create an isolated configuration where the
+      # provider's api_base points to the given URL. This ensures base_url
+      # is a connection-level setting, not a request body parameter.
+      #
+      # @param base_url [String] Custom API endpoint URL
+      # @param provider_name [String, Symbol, nil] Provider name (e.g., "anthropic")
+      # @return [RubyLLM::Context]
+      def create_context_with_base_url(base_url, provider_name)
+        RubyLLM.context do |ctx_config|
+          config_attr = :"#{provider_name}_api_base" if provider_name
+          if config_attr && ctx_config.respond_to?(:"#{config_attr}=")
+            ctx_config.public_send(:"#{config_attr}=", base_url)
+          end
+        end
+      end
+      # Execute a single conversation turn
+      #
+      # Tool calls are captured via a `:tool_call` subscription during the turn,
+      # because RubyLLM's `complete()` loop handles tool calls internally and
+      # the returned response is always the final text response (never tool calls).
+      #
+      # Always streams. Content chunks are emitted via EventStream. The caller
+      # receives events through the block emitter set by `ask()`.
+      #
+      # Interruption safety:
+      # - Chat messages are snapshotted before the LLM call
+      # - If interrupted during the interruptible phase (LLM + tools), chat is
+      #   restored from snapshot and the turn counter is decremented
+      # - Memory operations (STM capture, ingestion, eviction) run inside
+      #   `defer_stop` so they complete atomically even if interrupt! was called
+      #
+      # @param prompt [String] User message
+      # @param output_schema [Hash, Object, nil] Per-call schema override (nil = use definition default)
+      # @return [RubyLLM::Message] Response
+      def execute_turn(prompt, output_schema: nil)
+        @turn_counter += 1
+        turn_id = "turn_#{@turn_counter}"
+        DebugLog.log("agent", "=== Turn #{@turn_counter} start ===")
+        EventStream.emit(type: "agent_start", agent: @id, turn: @turn_counter, prompt_preview: prompt[0..100])
+        # Snapshot chat messages for rollback if LLM call is interrupted.
+        # chat.messages returns the live array, so we record its length.
+        message_restore_point = @chat.messages.length
+        llm_completed = false
+        # Subscribe to tool_call events to capture all tool calls during this turn.
+        # RubyLLM's complete() loop processes tool calls internally and returns
+        # only the final text response, so we can't inspect response.tool_calls.
+        turn_tool_calls = []
+        tool_call_subscription = @chat.subscribe(:tool_call) do |tool_call|
+          turn_tool_calls << { name: tool_call.name, arguments: tool_call.arguments }
+        end
+        begin
+          # Build working context with memory retrieval
+          DebugLog.time("agent", "populate_chat_context") do
+            emit_timed("memory_retrieval", agent: @id) { populate_chat_context(prompt) }
+          end
+          # Apply output schema (per-call override takes precedence over definition default).
+          # with_schema(nil) clears any previously set schema, keeping behavior correct
+          # when alternating between schema and non-schema calls.
+          @chat.with_schema(output_schema || @definition.output_schema)
+          # Drain any steering messages queued while idle (no tool loop to trigger the hook)
+          drain_steering_queue
+          # === INTERRUPTIBLE PHASE ===
+          # LLM HTTP call + streaming + tool execution loop.
+          # Can be interrupted at any fiber yield point via Async::Stop.
+          response = DebugLog.time("agent", "llm_call") do
+            @chat.ask(prompt) do |chunk|
+              EventStream.emit(type: "content_chunk", agent: @id, content: chunk.content) if chunk.content
+            end
+          end
+          llm_completed = true
+          tool_call_subscription.unsubscribe
+          # Track tokens
+          track_tokens(response)
+          # === UNINTERRUPTIBLE PHASE ===
+          # Memory operations protected from Async::Stop via defer_stop.
+          # If interrupt! was called during this phase, Stop is deferred
+          # until after these operations complete.
+          Async::Task.current.defer_stop do
+            # Capture turn in STM buffer (including tool calls from the subscription)
+            capture_turn(prompt, response, turn_id, tool_calls: turn_tool_calls)
+            # Wait for any previous background ingestion to complete
+            # before starting a new one (prevents concurrent adapter writes)
+            emit_timed("memory_wait_ingestion", agent: @id) { wait_for_pending_ingestion }
+            # Spawn ingestion as a background task so it runs concurrently
+            # with the next LLM call. The task is a child of the current
+            # ask() task, so it gets cancelled on interrupt — acceptable
+            # because STM already captured the turn data above.
+            #
+            # Capture current emitters to propagate to the child Fiber,
+            # since Fiber-local storage is not inherited by child tasks.
+            captured_global = EventStream.emitter
+            captured_block = EventStream.block_emitter
+            @pending_ingestion = Async::Task.current.async do
+              # Propagate emitters to child Fiber so events reach subscribers
+              EventStream.emitter = captured_global
+              EventStream.block_emitter = captured_block
+              DebugLog.time("agent", "ingest_into_memory") do
+                ingest_into_memory(prompt, response, turn_id, tool_calls: turn_tool_calls)
+              end
+            end
+            # Evict old turns from STM to LTM
+            DebugLog.time("agent", "evict_stm") do
+              emit_timed("memory_eviction", agent: @id) { evict_stm }
+            end
+          end
+          DebugLog.log("agent", "=== Turn #{@turn_counter} complete ===")
+          EventStream.emit(type: "agent_stop", agent: @id, turn: @turn_counter)
+          response
+        ensure
+          tool_call_subscription&.unsubscribe
+          # Rollback chat state if LLM call was interrupted.
+          # For memory-enabled agents, populate_chat_context rebuilds from scratch
+          # on the next ask(), but restoration keeps the chat consistent regardless.
+          unless llm_completed
+            overflow = @chat.messages.length - message_restore_point
+            @chat.messages.pop(overflow) if overflow > 0
+            @turn_counter -= 1
+          end
+        end
+      end
+      # Drain messages from a queue based on mode
+      #
+      # @param queue [Array<String>] Queue to drain
+      # @param mode [Symbol] :all or :one_at_a_time
+      # @return [Array<String>] Drained messages
+      def drain_queue_by_mode(queue, mode)
+        return [] if queue.empty?
+        if mode == :one_at_a_time
+          [queue.shift]
+        else
+          result = queue.dup
+          queue.clear
+          result
+        end
+      end
+      # Populate chat messages with memory-augmented context
+      #
+      # @param prompt [String] Current user query
+      # @return [void]
+      def populate_chat_context(prompt)
+        return unless @memory_store
+        # Reset chat messages and rebuild from memory context
+        @chat.reset_messages!(preserve_system_prompt: true)
+        context_messages = @memory_store.build_context(
+          query: prompt,
+          recent_turns: @stm_buffer,
+          system_prompt: @base_system_prompt,
+          read_only: memory_read_only?,
+        )
+        # Replace system prompt with memory-augmented version
+        system_msg = context_messages.find { |m| m[:role] == "system" }
+        if system_msg
+          @chat.with_instructions(cacheable_instructions(system_msg[:content]), replace: true)
+        end
+        # Add recent turns to chat
+        non_system = context_messages.reject { |m| m[:role] == "system" }
+        non_system.each do |msg|
+          @chat.add_message(role: msg[:role], content: msg[:content])
+        end
+      end
+      # Emit a timed event pair (start/complete) around a block
+      #
+      # Only emits if the block takes longer than 10ms, to avoid
+      # flooding the event stream with trivially fast operations.
+      #
+      # @param type [String] Event type prefix
+      # @param data [Hash] Additional event data
+      # @yield Block to time
+      # @return [Object] Block return value
+      def emit_timed(type, **data)
+        start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
+        result = yield
+        elapsed_ms = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start) * 1000).round
+        if elapsed_ms >= 10
+          EventStream.emit(type: type, elapsed_ms: elapsed_ms, **data)
+        end
+        result
+      end
+      # Track token usage from response
+      #
+      # @param response [RubyLLM::Message] LLM response
+      # @return [void]
+      def track_tokens(response)
+        return unless response.respond_to?(:input_tokens)
+        @total_input_tokens += response.input_tokens || 0
+        @total_output_tokens += response.output_tokens || 0
+      end
+      # Capture a turn in the STM buffer including tool calls
+      #
+      # Stores the full turn: prompt + response content + tool calls.
+      # Tool calls are captured via the `:tool_call` subscription in execute_turn,
+      # not by inspecting the response (which is always the final text response).
+      #
+      # @param prompt [String] User message
+      # @param response [RubyLLM::Message] Assistant response
+      # @param turn_id [String] Turn identifier
+      # @param tool_calls [Array<Hash>] Tool calls captured during this turn
+      # @return [void]
+      def capture_turn(prompt, response, turn_id, tool_calls: [])
+        @stm_buffer << { role: "user", content: prompt, turn_id: turn_id }
+        assistant_msg = { role: "assistant", content: response.content, turn_id: turn_id }
+        assistant_msg[:tool_calls] = tool_calls unless tool_calls.empty?
+        @stm_buffer << assistant_msg
+      end
+      # Ingest a turn into long-term memory
+      #
+      # Includes tool call information in the ingested text so tool usage
+      # context is preserved in memory cards.
+      #
+      # @param prompt [String] User message
+      # @param response [RubyLLM::Message] Assistant response
+      # @param turn_id [String] Turn identifier
+      # @param tool_calls [Array<Hash>] Tool calls captured during this turn
+      # @return [void]
+      def ingest_into_memory(prompt, response, turn_id, tool_calls: [])
+        return unless @memory_store
+        turn_text = format_turn_for_ingestion(prompt, response, tool_calls: tool_calls)
+        @memory_store.ingest_turn(text: turn_text, turn_id: turn_id)
+        @memory_store.save
+      rescue StandardError => e
+        warn("[SwarmSDK::V3::Agent] Memory ingestion error: #{e.class}: #{e.message}")
+        EventStream.emit(
+          type: "memory_ingestion_error",
+          agent: @id,
+          error: "#{e.class}: #{e.message}",
+        )
+      end
+      # Format a turn for memory ingestion, including tool calls
+      #
+      # Tool calls are passed explicitly from execute_turn's subscription,
+      # rather than inspected from the response (which is always the final text).
+      #
+      # @param prompt [String] User message
+      # @param response [RubyLLM::Message] Assistant response
+      # @param tool_calls [Array<Hash>] Tool calls captured during this turn
+      # @return [String] Formatted turn text
+      def format_turn_for_ingestion(prompt, response, tool_calls: [])
+        parts = ["User: #{prompt}"]
+        unless tool_calls.empty?
+          tool_parts = tool_calls.map { |tc| "Tool call: #{tc[:name]}(#{tc[:arguments].inspect})" }
+          parts << "Tool calls: #{tool_parts.join("; ")}"
+        end
+        parts << "Assistant: #{response.content}"
+        parts.join("\n\n")
+      end
+      # Evict oldest turns from STM buffer to LTM when it exceeds the limit
+      #
+      # Evicted turns are ingested into long-term memory before removal,
+      # ensuring no information is lost during the transition.
+      # Each turn = 2 messages (user + assistant), so buffer limit = stm_turns * 2
+      #
+      # @return [void]
+      def evict_stm
+        max_messages = @definition.memory_stm_turns * 2
+        return if @stm_buffer.size <= max_messages
+        evicted_count = @stm_buffer.size - max_messages
+        evicted = @stm_buffer.shift(evicted_count)
+        # Wait for pending background ingestion before writing evicted turns
+        # to prevent concurrent adapter writes
+        wait_for_pending_ingestion
+        # Ingest evicted turns into LTM before they're lost
+        ingest_evicted_turns(evicted)
+        EventStream.emit(
+          type: "stm_eviction",
+          agent: @id,
+          evicted_count: evicted.size / 2,
+          remaining: @stm_buffer.size / 2,
+        )
+      end
+      # Ingest evicted STM turns into long-term memory
+      #
+      # @param evicted [Array<Hash>] Evicted messages
+      # @return [void]
+      def ingest_evicted_turns(evicted)
+        return unless @memory_store && evicted.any?
+        evicted_text = evicted.map { |m| "#{m[:role].capitalize}: #{m[:content]}" }.join("\n\n")
+        @memory_store.ingest_turn(text: evicted_text, turn_id: "evicted_#{@turn_counter}")
+        @memory_store.save
+      rescue StandardError => e
+        warn("[SwarmSDK::V3::Agent] STM eviction ingestion error: #{e.class}: #{e.message}")
+        EventStream.emit(
+          type: "memory_ingestion_error",
+          agent: @id,
+          error: "#{e.class}: #{e.message}",
+        )
+      end
+      # Whether the current chat uses the Anthropic provider
+      #
+      # @return [Boolean]
+      def anthropic_provider?
+        @chat.model.provider == "anthropic"
+      end
+      # Wrap instructions text with Anthropic prompt caching if applicable
+      #
+      # For Anthropic models, wraps the text with {cache_control: {type: "ephemeral"}}
+      # via RubyLLM's provider-specific Content class. This enables prompt caching,
+      # which reduces input token costs by ~90% on cache hits. The cache covers
+      # everything up to and including the marked block (tools + system prompt).
+      #
+      # For other providers (OpenAI, Gemini), returns text as-is since they
+      # handle caching automatically.
+      #
+      # @param text [String] Instructions text to potentially cache
+      # @return [String, RubyLLM::Content::Raw] Original text or cached content
+      #
+      # @example Anthropic provider
+      #   cacheable_instructions("You are helpful.")
+      #   #=> RubyLLM::Content::Raw with cache_control
+      #
+      # @example OpenAI provider
+      #   cacheable_instructions("You are helpful.")
+      #   #=> "You are helpful."
+      def cacheable_instructions(text)
+        return text unless anthropic_provider?
+        RubyLLM::Providers::Anthropic::Content.new(text, cache: true)
+      end
+      # Connect to all configured MCP servers
+      #
+      # @return [void]
+      def connect_mcp_servers
+        @definition.mcp_servers.each do |server_def|
+          connector = V3::MCP::Connector.new(server_def)
+          connector.connect!
+          @mcp_connectors << connector
+          EventStream.emit(
+            type: "mcp_server_connected",
+            agent: @id,
+            server: server_def.name,
+            tools: connector.available_tools.map(&:name),
+          )
+        end
+      end
+      # Disconnect all MCP servers
+      #
+      # @return [void]
+      def disconnect_mcp_servers
+        @mcp_connectors.each(&:disconnect!)
+        @mcp_connectors.clear
+      end
+    end
+  end
+end