swarm_sdk 2.7.14 → 3.0.0.alpha2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (185) hide show
  1. checksums.yaml +4 -4
  2. data/lib/swarm_sdk/ruby_llm_patches/chat_callbacks_patch.rb +16 -0
  3. data/lib/swarm_sdk/ruby_llm_patches/init.rb +4 -1
  4. data/lib/swarm_sdk/v3/agent.rb +1165 -0
  5. data/lib/swarm_sdk/v3/agent_builder.rb +533 -0
  6. data/lib/swarm_sdk/v3/agent_definition.rb +330 -0
  7. data/lib/swarm_sdk/v3/configuration.rb +490 -0
  8. data/lib/swarm_sdk/v3/debug_log.rb +86 -0
  9. data/lib/swarm_sdk/v3/event_stream.rb +130 -0
  10. data/lib/swarm_sdk/v3/hooks/context.rb +112 -0
  11. data/lib/swarm_sdk/v3/hooks/result.rb +115 -0
  12. data/lib/swarm_sdk/v3/hooks/runner.rb +128 -0
  13. data/lib/swarm_sdk/v3/mcp/connector.rb +183 -0
  14. data/lib/swarm_sdk/v3/mcp/mcp_error.rb +15 -0
  15. data/lib/swarm_sdk/v3/mcp/server_definition.rb +125 -0
  16. data/lib/swarm_sdk/v3/mcp/ssl_http_transport.rb +103 -0
  17. data/lib/swarm_sdk/v3/mcp/stdio_transport.rb +135 -0
  18. data/lib/swarm_sdk/v3/mcp/tool_proxy.rb +53 -0
  19. data/lib/swarm_sdk/v3/memory/adapters/base.rb +297 -0
  20. data/lib/swarm_sdk/v3/memory/adapters/faiss_support.rb +194 -0
  21. data/lib/swarm_sdk/v3/memory/adapters/filesystem_adapter.rb +212 -0
  22. data/lib/swarm_sdk/v3/memory/adapters/sqlite_adapter.rb +507 -0
  23. data/lib/swarm_sdk/v3/memory/adapters/vector_utils.rb +88 -0
  24. data/lib/swarm_sdk/v3/memory/card.rb +206 -0
  25. data/lib/swarm_sdk/v3/memory/cluster.rb +146 -0
  26. data/lib/swarm_sdk/v3/memory/compressor.rb +496 -0
  27. data/lib/swarm_sdk/v3/memory/consolidator.rb +427 -0
  28. data/lib/swarm_sdk/v3/memory/context_builder.rb +339 -0
  29. data/lib/swarm_sdk/v3/memory/edge.rb +105 -0
  30. data/lib/swarm_sdk/v3/memory/embedder.rb +185 -0
  31. data/lib/swarm_sdk/v3/memory/exposure_tracker.rb +104 -0
  32. data/lib/swarm_sdk/v3/memory/ingestion_pipeline.rb +394 -0
  33. data/lib/swarm_sdk/v3/memory/retriever.rb +289 -0
  34. data/lib/swarm_sdk/v3/memory/store.rb +489 -0
  35. data/lib/swarm_sdk/v3/skills/loader.rb +147 -0
  36. data/lib/swarm_sdk/v3/skills/manifest.rb +45 -0
  37. data/lib/swarm_sdk/v3/sub_task_agent.rb +248 -0
  38. data/lib/swarm_sdk/v3/tools/base.rb +80 -0
  39. data/lib/swarm_sdk/v3/tools/bash.rb +174 -0
  40. data/lib/swarm_sdk/v3/tools/clock.rb +32 -0
  41. data/lib/swarm_sdk/v3/tools/document_converters/base.rb +84 -0
  42. data/lib/swarm_sdk/v3/tools/document_converters/docx_converter.rb +120 -0
  43. data/lib/swarm_sdk/v3/tools/document_converters/pdf_converter.rb +111 -0
  44. data/lib/swarm_sdk/v3/tools/document_converters/xlsx_converter.rb +128 -0
  45. data/lib/swarm_sdk/v3/tools/edit.rb +111 -0
  46. data/lib/swarm_sdk/v3/tools/glob.rb +96 -0
  47. data/lib/swarm_sdk/v3/tools/grep.rb +200 -0
  48. data/lib/swarm_sdk/v3/tools/message_teammate.rb +15 -0
  49. data/lib/swarm_sdk/v3/tools/message_user.rb +15 -0
  50. data/lib/swarm_sdk/v3/tools/read.rb +213 -0
  51. data/lib/swarm_sdk/v3/tools/read_tracker.rb +40 -0
  52. data/lib/swarm_sdk/v3/tools/registry.rb +208 -0
  53. data/lib/swarm_sdk/v3/tools/sub_task.rb +183 -0
  54. data/lib/swarm_sdk/v3/tools/think.rb +88 -0
  55. data/lib/swarm_sdk/v3/tools/write.rb +87 -0
  56. data/lib/swarm_sdk/v3.rb +145 -0
  57. metadata +88 -149
  58. data/lib/swarm_sdk/agent/RETRY_LOGIC.md +0 -175
  59. data/lib/swarm_sdk/agent/builder.rb +0 -705
  60. data/lib/swarm_sdk/agent/chat.rb +0 -1438
  61. data/lib/swarm_sdk/agent/chat_helpers/context_tracker.rb +0 -375
  62. data/lib/swarm_sdk/agent/chat_helpers/event_emitter.rb +0 -204
  63. data/lib/swarm_sdk/agent/chat_helpers/hook_integration.rb +0 -480
  64. data/lib/swarm_sdk/agent/chat_helpers/instrumentation.rb +0 -85
  65. data/lib/swarm_sdk/agent/chat_helpers/llm_configuration.rb +0 -290
  66. data/lib/swarm_sdk/agent/chat_helpers/logging_helpers.rb +0 -116
  67. data/lib/swarm_sdk/agent/chat_helpers/serialization.rb +0 -83
  68. data/lib/swarm_sdk/agent/chat_helpers/system_reminder_injector.rb +0 -134
  69. data/lib/swarm_sdk/agent/chat_helpers/system_reminders.rb +0 -79
  70. data/lib/swarm_sdk/agent/chat_helpers/token_tracking.rb +0 -146
  71. data/lib/swarm_sdk/agent/context.rb +0 -115
  72. data/lib/swarm_sdk/agent/context_manager.rb +0 -315
  73. data/lib/swarm_sdk/agent/definition.rb +0 -588
  74. data/lib/swarm_sdk/agent/llm_instrumentation_middleware.rb +0 -226
  75. data/lib/swarm_sdk/agent/system_prompt_builder.rb +0 -173
  76. data/lib/swarm_sdk/agent/tool_registry.rb +0 -189
  77. data/lib/swarm_sdk/agent_registry.rb +0 -146
  78. data/lib/swarm_sdk/builders/base_builder.rb +0 -558
  79. data/lib/swarm_sdk/claude_code_agent_adapter.rb +0 -205
  80. data/lib/swarm_sdk/concerns/cleanupable.rb +0 -42
  81. data/lib/swarm_sdk/concerns/snapshotable.rb +0 -67
  82. data/lib/swarm_sdk/concerns/validatable.rb +0 -55
  83. data/lib/swarm_sdk/config.rb +0 -368
  84. data/lib/swarm_sdk/configuration/parser.rb +0 -397
  85. data/lib/swarm_sdk/configuration/translator.rb +0 -285
  86. data/lib/swarm_sdk/configuration.rb +0 -165
  87. data/lib/swarm_sdk/context_compactor/metrics.rb +0 -147
  88. data/lib/swarm_sdk/context_compactor/token_counter.rb +0 -102
  89. data/lib/swarm_sdk/context_compactor.rb +0 -335
  90. data/lib/swarm_sdk/context_management/builder.rb +0 -128
  91. data/lib/swarm_sdk/context_management/context.rb +0 -328
  92. data/lib/swarm_sdk/custom_tool_registry.rb +0 -226
  93. data/lib/swarm_sdk/defaults.rb +0 -251
  94. data/lib/swarm_sdk/events_to_messages.rb +0 -199
  95. data/lib/swarm_sdk/hooks/adapter.rb +0 -359
  96. data/lib/swarm_sdk/hooks/context.rb +0 -197
  97. data/lib/swarm_sdk/hooks/definition.rb +0 -80
  98. data/lib/swarm_sdk/hooks/error.rb +0 -29
  99. data/lib/swarm_sdk/hooks/executor.rb +0 -146
  100. data/lib/swarm_sdk/hooks/registry.rb +0 -147
  101. data/lib/swarm_sdk/hooks/result.rb +0 -150
  102. data/lib/swarm_sdk/hooks/shell_executor.rb +0 -256
  103. data/lib/swarm_sdk/hooks/tool_call.rb +0 -35
  104. data/lib/swarm_sdk/hooks/tool_result.rb +0 -62
  105. data/lib/swarm_sdk/log_collector.rb +0 -227
  106. data/lib/swarm_sdk/log_stream.rb +0 -127
  107. data/lib/swarm_sdk/markdown_parser.rb +0 -75
  108. data/lib/swarm_sdk/model_aliases.json +0 -8
  109. data/lib/swarm_sdk/models.json +0 -44002
  110. data/lib/swarm_sdk/models.rb +0 -161
  111. data/lib/swarm_sdk/node_context.rb +0 -245
  112. data/lib/swarm_sdk/observer/builder.rb +0 -81
  113. data/lib/swarm_sdk/observer/config.rb +0 -45
  114. data/lib/swarm_sdk/observer/manager.rb +0 -248
  115. data/lib/swarm_sdk/patterns/agent_observer.rb +0 -160
  116. data/lib/swarm_sdk/permissions/config.rb +0 -239
  117. data/lib/swarm_sdk/permissions/error_formatter.rb +0 -121
  118. data/lib/swarm_sdk/permissions/path_matcher.rb +0 -35
  119. data/lib/swarm_sdk/permissions/validator.rb +0 -173
  120. data/lib/swarm_sdk/permissions_builder.rb +0 -122
  121. data/lib/swarm_sdk/plugin.rb +0 -309
  122. data/lib/swarm_sdk/plugin_registry.rb +0 -101
  123. data/lib/swarm_sdk/proc_helpers.rb +0 -53
  124. data/lib/swarm_sdk/prompts/base_system_prompt.md.erb +0 -119
  125. data/lib/swarm_sdk/restore_result.rb +0 -65
  126. data/lib/swarm_sdk/result.rb +0 -241
  127. data/lib/swarm_sdk/snapshot.rb +0 -156
  128. data/lib/swarm_sdk/snapshot_from_events.rb +0 -397
  129. data/lib/swarm_sdk/state_restorer.rb +0 -476
  130. data/lib/swarm_sdk/state_snapshot.rb +0 -334
  131. data/lib/swarm_sdk/swarm/agent_initializer.rb +0 -648
  132. data/lib/swarm_sdk/swarm/all_agents_builder.rb +0 -204
  133. data/lib/swarm_sdk/swarm/builder.rb +0 -256
  134. data/lib/swarm_sdk/swarm/executor.rb +0 -446
  135. data/lib/swarm_sdk/swarm/hook_triggers.rb +0 -162
  136. data/lib/swarm_sdk/swarm/lazy_delegate_chat.rb +0 -372
  137. data/lib/swarm_sdk/swarm/logging_callbacks.rb +0 -361
  138. data/lib/swarm_sdk/swarm/mcp_configurator.rb +0 -290
  139. data/lib/swarm_sdk/swarm/swarm_registry_builder.rb +0 -67
  140. data/lib/swarm_sdk/swarm/tool_configurator.rb +0 -392
  141. data/lib/swarm_sdk/swarm.rb +0 -973
  142. data/lib/swarm_sdk/swarm_loader.rb +0 -145
  143. data/lib/swarm_sdk/swarm_registry.rb +0 -136
  144. data/lib/swarm_sdk/tools/base.rb +0 -63
  145. data/lib/swarm_sdk/tools/bash.rb +0 -280
  146. data/lib/swarm_sdk/tools/clock.rb +0 -46
  147. data/lib/swarm_sdk/tools/delegate.rb +0 -389
  148. data/lib/swarm_sdk/tools/document_converters/base_converter.rb +0 -83
  149. data/lib/swarm_sdk/tools/document_converters/docx_converter.rb +0 -99
  150. data/lib/swarm_sdk/tools/document_converters/html_converter.rb +0 -101
  151. data/lib/swarm_sdk/tools/document_converters/pdf_converter.rb +0 -78
  152. data/lib/swarm_sdk/tools/document_converters/xlsx_converter.rb +0 -194
  153. data/lib/swarm_sdk/tools/edit.rb +0 -145
  154. data/lib/swarm_sdk/tools/glob.rb +0 -166
  155. data/lib/swarm_sdk/tools/grep.rb +0 -235
  156. data/lib/swarm_sdk/tools/image_extractors/docx_image_extractor.rb +0 -43
  157. data/lib/swarm_sdk/tools/image_extractors/pdf_image_extractor.rb +0 -167
  158. data/lib/swarm_sdk/tools/image_formats/tiff_builder.rb +0 -65
  159. data/lib/swarm_sdk/tools/mcp_tool_stub.rb +0 -198
  160. data/lib/swarm_sdk/tools/multi_edit.rb +0 -236
  161. data/lib/swarm_sdk/tools/path_resolver.rb +0 -92
  162. data/lib/swarm_sdk/tools/read.rb +0 -261
  163. data/lib/swarm_sdk/tools/registry.rb +0 -205
  164. data/lib/swarm_sdk/tools/scratchpad/scratchpad_list.rb +0 -117
  165. data/lib/swarm_sdk/tools/scratchpad/scratchpad_read.rb +0 -97
  166. data/lib/swarm_sdk/tools/scratchpad/scratchpad_write.rb +0 -108
  167. data/lib/swarm_sdk/tools/stores/read_tracker.rb +0 -96
  168. data/lib/swarm_sdk/tools/stores/scratchpad_storage.rb +0 -273
  169. data/lib/swarm_sdk/tools/stores/storage.rb +0 -142
  170. data/lib/swarm_sdk/tools/stores/todo_manager.rb +0 -65
  171. data/lib/swarm_sdk/tools/think.rb +0 -100
  172. data/lib/swarm_sdk/tools/todo_write.rb +0 -237
  173. data/lib/swarm_sdk/tools/web_fetch.rb +0 -264
  174. data/lib/swarm_sdk/tools/write.rb +0 -112
  175. data/lib/swarm_sdk/transcript_builder.rb +0 -278
  176. data/lib/swarm_sdk/utils.rb +0 -68
  177. data/lib/swarm_sdk/validation_result.rb +0 -33
  178. data/lib/swarm_sdk/version.rb +0 -5
  179. data/lib/swarm_sdk/workflow/agent_config.rb +0 -95
  180. data/lib/swarm_sdk/workflow/builder.rb +0 -227
  181. data/lib/swarm_sdk/workflow/executor.rb +0 -497
  182. data/lib/swarm_sdk/workflow/node_builder.rb +0 -593
  183. data/lib/swarm_sdk/workflow/transformer_executor.rb +0 -250
  184. data/lib/swarm_sdk/workflow.rb +0 -589
  185. data/lib/swarm_sdk.rb +0 -721
@@ -0,0 +1,1165 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SwarmSDK
4
+ module V3
5
+ # Runtime agent with built-in memory
6
+ #
7
+ # The Agent ties together RubyLLM::Chat, the memory system, and tools.
8
+ # The LLM's context window is a staging area — older turns get consolidated
9
+ # into memory cards, and retrieval brings relevant memory back on demand.
10
+ #
11
+ # ## Interruption Safety
12
+ #
13
+ # The agent supports safe interruption via {#interrupt!}, which raises
14
+ # `Async::Stop` in the fiber running {#ask}. When adding new features
15
+ # to the agent, follow these rules:
16
+ #
17
+ # 1. **Interruptible phases** (LLM calls, tool execution, streaming):
18
+ # Code here can be interrupted at any fiber yield point. Do NOT leave
19
+ # shared state half-updated — use snapshot/restore or flags to detect
20
+ # incomplete operations in `ensure` blocks.
21
+ #
22
+ # 2. **Uninterruptible phases** (memory writes, STM capture, eviction):
23
+ # Wrap in `Async::Task.current.defer_stop { }` to defer `Async::Stop`
24
+ # until the block completes. Use this for any multi-step I/O that must
25
+ # be atomic (e.g., writing cards + saving the index).
26
+ #
27
+ # 3. **New instance state**: If a new feature adds state that is modified
28
+ # during {#ask}, ensure it is either rolled back on interruption (via
29
+ # `ensure`) or protected with `defer_stop`.
30
+ #
31
+ # 4. **Subprocesses**: Any code that spawns a subprocess (Open3, etc.)
32
+ # must terminate it in an `ensure` block. `Async::Stop` bypasses
33
+ # `rescue StandardError` — only `ensure` is guaranteed to run.
34
+ #
35
+ # @see #interrupt!
36
+ # @see #execute_turn
37
+ #
38
+ # @example Basic usage
39
+ # definition = AgentDefinition.new(
40
+ # name: :assistant,
41
+ # description: "A helpful assistant",
42
+ # model: "claude-sonnet-4",
43
+ # tools: [:Read, :Write, :Edit, :Bash, :Grep, :Glob],
44
+ # memory_directory: ".swarm/memory",
45
+ # )
46
+ #
47
+ # agent = Agent.new(definition)
48
+ # response = agent.ask("Build a login page")
49
+ #
50
+ # @example Without memory
51
+ # definition = AgentDefinition.new(
52
+ # name: :chat,
53
+ # description: "Simple chat",
54
+ # )
55
+ # agent = Agent.new(definition)
56
+ # response = agent.ask("Hello!")
57
+ #
58
+ # @example Interrupting a running agent
59
+ # Async do |parent|
60
+ # task = parent.async { agent.ask("Long running task") }
61
+ # agent.running? # => true
62
+ # agent.interrupt! # => true
63
+ # result = task.wait # => nil
64
+ # agent.running? # => false
65
+ # end
66
+ class Agent
67
+ # @return [AgentDefinition] Immutable agent configuration
68
+ attr_reader :definition
69
+
70
+ # @return [String] Unique instance identifier (name_<hex>)
71
+ attr_reader :id
72
+
73
+ # @return [Array<Skills::Manifest>] Loaded skills (available after first ask)
74
+ attr_reader :loaded_skills
75
+
76
+ # Create a new agent
77
+ #
78
+ # Lazy-initializes the RubyLLM::Chat and memory system on first ask().
79
+ #
80
+ # @param definition [AgentDefinition] Agent configuration
81
+ def initialize(definition)
82
+ @definition = definition
83
+ @id = "#{definition.name}_#{SecureRandom.hex(3)}"
84
+ @chat = nil
85
+ @memory_store = nil
86
+ @stm_buffer = []
87
+ @turn_counter = 0
88
+ @total_input_tokens = 0
89
+ @total_output_tokens = 0
90
+ @initialized = false
91
+ @semaphore = Async::Semaphore.new(1)
92
+ @current_task = nil
93
+ @pending_ingestion = nil
94
+ @steering_queue = []
95
+ @loaded_skills = nil
96
+ @base_system_prompt = nil
97
+ @mcp_connectors = []
98
+ @hooks = Hooks::Runner.new(definition.hooks)
99
+ end
100
+
101
+ # Send a message to the agent and get a response
102
+ #
103
+ # The ask() flow:
104
+ # 1. Lazy-initialize (create chat, memory, tools)
105
+ # 2. Retrieve relevant memory cards for the prompt
106
+ # 3. Build working context (system prompt + memory + recent turns)
107
+ # 4. Execute via RubyLLM::Chat (handles tool loop internally)
108
+ # 5. Capture turn in STM buffer
109
+ # 6. Ingest turn into memory (async)
110
+ # 7. Evict old turns from STM if buffer exceeds limit
111
+ # 8. Emit events
112
+ #
113
+ # Supports safe interruption via {#interrupt!}. When interrupted,
114
+ # returns nil and leaves the agent in a consistent state for the
115
+ # next ask() call. Check {#running?} to see if an ask is in progress.
116
+ #
117
+ # @param prompt [String] User message
118
+ # @param output_schema [Hash, Object, nil] Per-call schema override (nil = use definition default)
119
+ # @yield [event] Optional block receives ALL events (content_chunk, tool_call, etc.)
120
+ # @yieldparam event [Hash] Event hash with :type, :timestamp, and event-specific fields
121
+ # @return [RubyLLM::Message, nil] LLM response, or nil if interrupted
122
+ #
123
+ # @example Simple ask
124
+ # response = agent.ask("What is 2+2?")
125
+ # puts response.content
126
+ #
127
+ # @example Receive all events via block
128
+ # agent.ask("Tell me a story") do |event|
129
+ # case event[:type]
130
+ # when "content_chunk"
131
+ # print event[:content]
132
+ # when "tool_call"
133
+ # puts "Calling #{event[:tool]}..."
134
+ # end
135
+ # end
136
+ #
137
+ # @example With structured output
138
+ # schema = { type: "object", properties: { answer: { type: "integer" } } }
139
+ # response = agent.ask("What is 2+2?", output_schema: schema)
140
+ # response.content # => { "answer" => 4 }
141
+ def ask(prompt, output_schema: nil, &block)
142
+ with_block_emitter(block) do
143
+ Sync do |task|
144
+ @semaphore.acquire do
145
+ @current_task = task
146
+ begin
147
+ lazy_initialize!
148
+
149
+ before_result = @hooks.run(:before_ask, Hooks::Context.new(
150
+ event: :before_ask, agent_name: @definition.name, prompt: prompt,
151
+ ))
152
+
153
+ if before_result.halt?
154
+ nil
155
+ else
156
+ prompt = before_result.value if before_result.replace?
157
+ response = execute_turn(prompt, output_schema: output_schema)
158
+
159
+ @hooks.run(:after_ask, Hooks::Context.new(
160
+ event: :after_ask, agent_name: @definition.name, prompt: prompt, response: response,
161
+ ))
162
+
163
+ @hooks.run(:on_stop, Hooks::Context.new(
164
+ event: :on_stop, agent_name: @definition.name, response: response,
165
+ ))
166
+
167
+ response
168
+ end
169
+ rescue Async::Stop
170
+ EventStream.emit(type: "agent_interrupted", agent: @id, turn: @turn_counter)
171
+ nil
172
+ ensure
173
+ @current_task = nil
174
+ end
175
+ end
176
+ end
177
+ end
178
+ end
179
+
180
+ # Stop whatever the agent is doing
181
+ #
182
+ # Raises Async::Stop in the fiber running ask(). Safe to call from
183
+ # another Async fiber in the same reactor. Idempotent — returns nil
184
+ # if the agent is idle.
185
+ #
186
+ # @return [Boolean, nil] true if a running task was stopped, nil if idle
187
+ #
188
+ # @example Interrupt from another fiber
189
+ # Async do |parent|
190
+ # task = parent.async { agent.ask("Long task") }
191
+ # sleep 1
192
+ # agent.interrupt! # => true
193
+ # result = task.wait # => nil
194
+ # end
195
+ #
196
+ # @example No-op when idle
197
+ # agent.interrupt! # => nil
198
+ def interrupt!
199
+ return unless @current_task
200
+
201
+ @current_task.stop
202
+ true
203
+ end
204
+
205
+ # Whether the agent is currently executing an ask() call
206
+ #
207
+ # Returns true only while the agent holds the semaphore and is
208
+ # actively processing a turn. Useful for deciding whether to
209
+ # call {#interrupt!}.
210
+ #
211
+ # @return [Boolean]
212
+ #
213
+ # @example Guard an interrupt call
214
+ # agent.interrupt! if agent.running?
215
+ def running?
216
+ !@current_task.nil?
217
+ end
218
+
219
+ # Queue a high-priority message that interrupts the current tool batch
220
+ #
221
+ # Steering messages are injected after the current tool completes,
222
+ # skipping any remaining tools in the batch. The message is delivered
223
+ # as a `role: user` message before the next LLM call.
224
+ #
225
+ # Use this for urgent interruptions that should preempt normal execution.
226
+ #
227
+ # @param message [String] Message content to inject
228
+ # @return [void]
229
+ #
230
+ # @example Inject an urgent update while the agent is working
231
+ # agent.on_tool_result do |tool_call, result|
232
+ # agent.steer("Priority update: server is down")
233
+ # end
234
+ def steer(message)
235
+ @steering_queue << message
236
+ end
237
+
238
+ # Clear all queued steering messages
239
+ #
240
+ # @return [void]
241
+ def clear_steering_queue
242
+ @steering_queue.clear
243
+ end
244
+
245
+ # Get recent messages (STM buffer)
246
+ #
247
+ # @return [Array<Hash>] Recent conversation turns
248
+ def messages
249
+ @stm_buffer.dup
250
+ end
251
+
252
+ # Reset conversation and optionally clear memory
253
+ #
254
+ # @param clear_memory [Boolean] Also clear memory storage
255
+ # @return [void]
256
+ def clear(clear_memory: false)
257
+ wait_for_pending_ingestion
258
+ disconnect_mcp_servers
259
+ @stm_buffer.clear
260
+ @steering_queue.clear
261
+ @turn_counter = 0
262
+ @chat&.reset_messages!(preserve_system_prompt: true)
263
+
264
+ return unless clear_memory && @memory_store
265
+
266
+ # Clear memory by reinitializing the adapter
267
+ @memory_store = nil
268
+ end
269
+
270
+ # Token usage statistics
271
+ #
272
+ # @return [Hash] Input and output token counts
273
+ def tokens
274
+ { input: @total_input_tokens, output: @total_output_tokens }
275
+ end
276
+
277
+ # Agent name from definition
278
+ #
279
+ # @return [Symbol]
280
+ def name
281
+ @definition.name
282
+ end
283
+
284
+ # Whether the agent has been initialized
285
+ #
286
+ # @return [Boolean]
287
+ def initialized?
288
+ @initialized
289
+ end
290
+
291
+ # Read-only access to the memory store
292
+ #
293
+ # @return [Memory::Store, nil] Memory store or nil if not enabled
294
+ def memory
295
+ @memory_store
296
+ end
297
+
298
+ # Whether memory operations are read-only
299
+ #
300
+ # Subtask agents override this to return true, preventing
301
+ # access counter updates during context building.
302
+ #
303
+ # @return [Boolean]
304
+ def memory_read_only?
305
+ false
306
+ end
307
+
308
+ # Run memory defragmentation (compression, consolidation, promotion, pruning)
309
+ #
310
+ # Call this between sessions, on a schedule, or whenever appropriate.
311
+ # Does not run during conversation turns — the SDK user controls when.
312
+ #
313
+ # @yield [event] Optional block receives defrag events
314
+ # @yieldparam event [Hash] Event hash with :type, :timestamp, and event-specific fields
315
+ # @return [Hash, nil] Defragmentation results, or nil if memory not enabled
316
+ #
317
+ # @example Run defrag after a session
318
+ # agent.defrag!
319
+ # #=> { duplicates_merged: 0, conflicts_detected: 0,
320
+ # # cards_compressed: 3, cards_promoted: 1, cards_pruned: 0 }
321
+ #
322
+ # @example Run defrag with progress display
323
+ # agent.defrag! do |event|
324
+ # case event[:type]
325
+ # when "memory_defrag_progress"
326
+ # puts "#{event[:phase]}: #{event[:phase_current]}/#{event[:phase_total]}"
327
+ # end
328
+ # end
329
+ def defrag!(&block)
330
+ return unless @definition.memory_enabled?
331
+
332
+ with_block_emitter(block) do
333
+ Sync do
334
+ @semaphore.acquire do
335
+ lazy_initialize!
336
+ wait_for_pending_ingestion
337
+ @memory_store.defrag!
338
+ end
339
+ end
340
+ end
341
+ end
342
+
343
+ private
344
+
345
+ # Wrap execution with block emitter setup
346
+ #
347
+ # Temporarily sets the block emitter for the current fiber, ensuring
348
+ # it is restored to its previous value after the block completes.
349
+ # This allows nested ask() calls to work correctly.
350
+ #
351
+ # If no block is provided (nil), the current block_emitter is preserved.
352
+ # This allows SubTaskAgent.ask() to inherit the parent's block emitter.
353
+ #
354
+ # @param block [Proc, nil] Block to receive events, or nil to inherit current
355
+ # @yield Executes with block emitter set
356
+ # @return [Object] Return value from the yielded block
357
+ def with_block_emitter(block)
358
+ # If no block provided, don't change block_emitter - inherit parent's
359
+ return yield if block.nil?
360
+
361
+ previous = EventStream.block_emitter
362
+ EventStream.block_emitter = block
363
+ yield
364
+ ensure
365
+ EventStream.block_emitter = previous if block
366
+ end
367
+
368
+ # Wait for any pending background ingestion to complete
369
+ #
370
+ # Called before starting a new ingestion, before eviction writes,
371
+ # and before defrag to prevent concurrent adapter writes.
372
+ #
373
+ # @return [void]
374
+ def wait_for_pending_ingestion
375
+ return unless @pending_ingestion
376
+
377
+ @pending_ingestion.wait
378
+ @pending_ingestion = nil
379
+ end
380
+
381
+ # Lazy-initialize the agent on first ask()
382
+ #
383
+ # Creates RubyLLM::Chat, tools, and memory system.
384
+ #
385
+ # @return [void]
386
+ def lazy_initialize!
387
+ return if @initialized
388
+
389
+ @loaded_skills = load_skills
390
+ @base_system_prompt = build_base_system_prompt
391
+
392
+ @chat = create_chat
393
+ configure_chat
394
+ initialize_memory if @definition.memory_enabled?
395
+ connect_mcp_servers
396
+ attach_tools
397
+
398
+ @initialized = true
399
+
400
+ if @definition.output_schema && !@chat.model.structured_output?
401
+ raise ConfigurationError,
402
+ "Agent #{@id} has output_schema but model #{@definition.model} does not support structured output"
403
+ end
404
+
405
+ EventStream.emit(
406
+ type: "agent_initialized",
407
+ agent: @id,
408
+ model: @definition.model,
409
+ memory_enabled: @definition.memory_enabled?,
410
+ skills_loaded: @loaded_skills.size,
411
+ )
412
+ end
413
+
414
+ # Create RubyLLM::Chat instance
415
+ #
416
+ # When a custom base_url is specified on the definition, creates an
417
+ # isolated RubyLLM context so the provider connects to the right endpoint.
418
+ # base_url is a connection-level setting, not a request parameter.
419
+ #
420
+ # @return [RubyLLM::Chat]
421
+ def create_chat
422
+ opts = { model: @definition.model }
423
+ if @definition.provider
424
+ opts[:assume_model_exists] = true
425
+ opts[:provider] = @definition.provider.to_sym
426
+ end
427
+
428
+ if @definition.base_url
429
+ context = create_context_with_base_url(@definition.base_url, @definition.provider)
430
+ context.chat(**opts)
431
+ else
432
+ RubyLLM.chat(**opts)
433
+ end
434
+ end
435
+
436
+ # Configure LLM chat with definition settings
437
+ #
438
+ # Uses raw parameter passthrough — no abstractions like with_thinking.
439
+ # The SDK user controls exactly what goes to the API via parameters/headers.
440
+ # Streaming is handled natively by passing a block to chat.ask(),
441
+ # not via with_params(stream: ...).
442
+ # Registers event callbacks for tool_call/tool_result emissions.
443
+ #
444
+ # Responses API must be enabled first because {RubyLLM::Chat#with_responses_api}
445
+ # swaps the provider instance — subsequent configuration must see the final provider.
446
+ #
447
+ # @return [void]
448
+ def configure_chat
449
+ enable_responses_api if @definition.api_version == "v1/responses"
450
+
451
+ @chat.with_params(**@definition.parameters) unless @definition.parameters.empty?
452
+
453
+ @chat.with_headers(**@definition.headers) unless @definition.headers.empty?
454
+
455
+ if @base_system_prompt
456
+ @chat.with_instructions(cacheable_instructions(@base_system_prompt))
457
+ end
458
+
459
+ if @definition.max_concurrent_tools
460
+ @chat.with_tool_concurrency(:async, max: @definition.max_concurrent_tools)
461
+ end
462
+
463
+ register_event_callbacks
464
+ register_tool_callbacks
465
+ end
466
+
467
+ # Register RubyLLM callbacks for event emission
468
+ #
469
+ # Emits tool_call and tool_result events via EventStream so
470
+ # consumers can monitor agent activity in real time.
471
+ #
472
+ # @return [void]
473
+ def register_event_callbacks
474
+ agent_id = @id
475
+
476
+ @chat.on_tool_call do |tool_call|
477
+ EventStream.emit(
478
+ type: "tool_call",
479
+ agent: agent_id,
480
+ tool: tool_call.name,
481
+ arguments: tool_call.arguments,
482
+ )
483
+ end
484
+
485
+ @chat.on_tool_result do |_tool_call, result|
486
+ EventStream.emit(
487
+ type: "tool_result",
488
+ agent: agent_id,
489
+ result_preview: result.to_s[0..200],
490
+ )
491
+ end
492
+ end
493
+
494
+ # Register unified tool callbacks for hooks and steering
495
+ #
496
+ # Uses the RubyLLM patch ({RubyLLM::Chat::MultiSubscriberCallbacks})
497
+ # which provides `around_tool_execution` receiving (tool_call, tool_instance, execute_proc).
498
+ #
499
+ # CRITICAL: `around_tool_execution` is SINGLE-CALLBACK — each registration
500
+ # replaces the previous. This method combines user hooks and steering logic
501
+ # in one callback.
502
+ #
503
+ # Tool execution flow:
504
+ # 1. Check skip flag (set by steering queue drain)
505
+ # 2. Run before_tool hooks
506
+ # 3. Execute tool
507
+ # 4. Run after_tool hooks
508
+ # 5. Check steering queue and set skip flag for remaining tools
509
+ #
510
+ # After all tools complete, `after_tool_calls` drains the steering queue
511
+ # and injects it as a user message before the next LLM call.
512
+ #
513
+ # @return [void]
514
+ def register_tool_callbacks
515
+ # Capture references for closure (following existing pattern)
516
+ steering_queue = @steering_queue
517
+ skip_flag = { active: false } # Mutable holder for closure
518
+ hooks = @hooks
519
+ agent_name = @definition.name
520
+ agent_id = @id
521
+ chat = @chat
522
+
523
+ @chat.around_tool_execution do |tool_call, _tool_instance, execute_proc|
524
+ # 1. Check steering queue first (may have been populated during previous tool's execution)
525
+ # If steering was injected, skip this and all remaining tools in the batch.
526
+ unless steering_queue.empty?
527
+ skip_flag[:active] = true
528
+ end
529
+
530
+ # 2. Skip if steering interrupted this batch
531
+ if skip_flag[:active]
532
+ EventStream.emit(type: "tool_skipped", agent: agent_id, tool: tool_call.name)
533
+ next "Skipped due to queued user message."
534
+ end
535
+
536
+ # 3. Before hook
537
+ if hooks.any_tool_hooks?
538
+ before_ctx = Hooks::Context.new(
539
+ event: :before_tool,
540
+ agent_name: agent_name,
541
+ tool_name: tool_call.name,
542
+ tool_arguments: tool_call.arguments.transform_keys(&:to_sym),
543
+ )
544
+ before_result = hooks.run(:before_tool, before_ctx)
545
+ next(before_result.value || "Hook blocked execution of #{tool_call.name}") if before_result.halt?
546
+ end
547
+
548
+ # 4. Execute tool
549
+ output = execute_proc.call
550
+
551
+ # 5. After hook
552
+ if hooks.any_tool_hooks?
553
+ after_ctx = Hooks::Context.new(
554
+ event: :after_tool,
555
+ agent_name: agent_name,
556
+ tool_name: tool_call.name,
557
+ tool_arguments: tool_call.arguments.transform_keys(&:to_sym),
558
+ tool_result: output,
559
+ )
560
+ after_result = hooks.run(:after_tool, after_ctx)
561
+ output = after_result.value if after_result.replace?
562
+ end
563
+
564
+ output
565
+ end
566
+
567
+ # Capture steering mode for closure
568
+ steering_mode = @definition.steering_mode
569
+
570
+ @chat.after_tool_calls do
571
+ skip_flag[:active] = false
572
+ next if steering_queue.empty?
573
+
574
+ # Drain based on mode
575
+ messages = if steering_mode == :one_at_a_time
576
+ [steering_queue.shift]
577
+ else
578
+ result = steering_queue.dup
579
+ steering_queue.clear
580
+ result
581
+ end
582
+
583
+ content = messages.join("\n\n")
584
+ EventStream.emit(type: "steering_injected", agent: agent_id, message_count: messages.size)
585
+ chat.add_message(role: :user, content: content)
586
+ end
587
+ end
588
+
589
+ # Drain steering messages queued while the agent was idle
590
+ #
591
+ # Covers the path where messages are steered between ask() calls
592
+ # (or before the first ask) — the `after_tool_calls` hook never
593
+ # fires when there are no tool calls.
594
+ #
595
+ # Respects steering_mode from definition:
596
+ # - :all - drain all messages at once (default)
597
+ # - :one_at_a_time - drain only the first message
598
+ #
599
+ # @return [void]
600
+ def drain_steering_queue
601
+ return if @steering_queue.empty?
602
+
603
+ messages = drain_queue_by_mode(@steering_queue, @definition.steering_mode)
604
+ return if messages.empty?
605
+
606
+ content = messages.join("\n\n")
607
+ EventStream.emit(type: "steering_injected", agent: @id, message_count: messages.size)
608
+ @chat.add_message(role: :user, content: content)
609
+ end
610
+
611
+ # Enable OpenAI Responses API on the chat instance
612
+ #
613
+ # Uses RubyLLM's built-in Responses API support which swaps the provider
614
+ # to {RubyLLM::Providers::OpenAIResponses}, routing requests to the
615
+ # /v1/responses endpoint instead of /v1/chat/completions.
616
+ #
617
+ # Stateful mode is enabled, which tracks response IDs across turns for
618
+ # automatic conversation threading with 5-minute TTL.
619
+ #
620
+ # @return [void]
621
+ def enable_responses_api
622
+ if @definition.base_url && !@definition.base_url.include?("api.openai.com")
623
+ DebugLog.log(
624
+ "agent",
625
+ "Responses API requested but using custom endpoint #{@definition.base_url}. " \
626
+ "Custom endpoints typically don't support /v1/responses.",
627
+ )
628
+ end
629
+
630
+ @chat.with_responses_api(stateful: true, store: true)
631
+ DebugLog.log("agent", "Enabled Responses API (v1/responses)")
632
+ end
633
+
634
+ # Load skill manifests from configured skill directories
635
+ #
636
+ # @return [Array<Skills::Manifest>] Discovered skill manifests
637
+ def load_skills
638
+ return [] if @definition.skills.empty?
639
+
640
+ Skills::Loader.scan(@definition.skills)
641
+ end
642
+
643
+ # Build the base system prompt with skills metadata appended
644
+ #
645
+ # If skills were discovered, appends the XML metadata block to the
646
+ # definition's system prompt. The agent uses the existing Read tool
647
+ # to load full skill instructions when activating a skill.
648
+ #
649
+ # @return [String, nil] System prompt with skills, or original prompt
650
+ def build_base_system_prompt
651
+ prompt = @definition.system_prompt
652
+ return prompt if @loaded_skills.empty?
653
+
654
+ skills_xml = Skills::Loader.format_xml(@loaded_skills)
655
+ [prompt, skills_xml].compact.join("\n\n")
656
+ end
657
+
658
+ # Attach tools to the chat
659
+ #
660
+ # Combines SDK-registered tools with MCP-discovered tools.
661
+ #
662
+ # @return [void]
663
+ def attach_tools
664
+ tool_instances = Tools::Registry.create_all(@definition, memory_store: @memory_store)
665
+ mcp_tool_instances = @mcp_connectors.flat_map(&:to_ruby_llm_tools)
666
+ all_tools = tool_instances + mcp_tool_instances
667
+ @chat.with_tools(*all_tools) unless all_tools.empty?
668
+ end
669
+
670
+ # Initialize the memory system
671
+ #
672
+ # Creates a dedicated background chat for memory operations
673
+ # (compression, segmentation) so LLM calls don't pollute the
674
+ # agent's main conversation history.
675
+ #
676
+ # @return [void]
677
+ def initialize_memory
678
+ adapter = resolve_memory_adapter
679
+ embedder = Memory::Embedder.new
680
+ background_chat = create_background_chat
681
+
682
+ @memory_store = Memory::Store.new(
683
+ adapter: adapter,
684
+ embedder: embedder,
685
+ chat: background_chat,
686
+ retrieval_top_k: @definition.memory_retrieval_top_k,
687
+ semantic_weight: @definition.memory_semantic_weight,
688
+ keyword_weight: @definition.memory_keyword_weight,
689
+ associative_memory: @definition.memory_associative,
690
+ )
691
+
692
+ @memory_store.load
693
+ end
694
+
695
+ # Resolve the memory adapter from definition
696
+ #
697
+ # Handles three cases:
698
+ # 1. Symbol (e.g., :sqlite, :filesystem) - creates appropriate adapter
699
+ # 2. Adapter instance - uses it directly
700
+ # 3. nil - falls back to filesystem adapter
701
+ #
702
+ # @return [Memory::Adapters::Base]
703
+ def resolve_memory_adapter
704
+ case @definition.memory_adapter
705
+ when Symbol
706
+ create_adapter_from_symbol(@definition.memory_adapter)
707
+ when Memory::Adapters::Base
708
+ @definition.memory_adapter
709
+ else
710
+ create_filesystem_adapter
711
+ end
712
+ end
713
+
714
+ # Create an adapter instance from a symbol type
715
+ #
716
+ # @param adapter_type [Symbol] Adapter type (:sqlite or :filesystem)
717
+ # @return [Memory::Adapters::Base]
718
+ # @raise [ArgumentError] If adapter_type is unknown
719
+ def create_adapter_from_symbol(adapter_type)
720
+ case adapter_type
721
+ when :sqlite
722
+ Memory::Adapters::SqliteAdapter.new(@definition.memory_directory)
723
+ when :filesystem
724
+ create_filesystem_adapter
725
+ else
726
+ raise ArgumentError, "Unknown memory adapter type: #{adapter_type.inspect}. " \
727
+ "Valid types are :sqlite and :filesystem."
728
+ end
729
+ end
730
+
731
+ # Create a filesystem adapter from definition
732
+ #
733
+ # @return [Memory::Adapters::FilesystemAdapter]
734
+ def create_filesystem_adapter
735
+ Memory::Adapters::FilesystemAdapter.new(@definition.memory_directory)
736
+ end
737
+
738
+ # Create a dedicated chat for background memory operations
739
+ #
740
+ # Uses background_model/provider/base_url from Configuration if set,
741
+ # otherwise falls back to the agent's model, provider, and base_url.
742
+ # This chat is separate from the agent's main chat, preventing
743
+ # compression/segmentation LLM calls from polluting the conversation history.
744
+ #
745
+ # @return [RubyLLM::Chat]
746
+ def create_background_chat
747
+ config = Configuration.instance
748
+ model = config.background_model || @definition.model
749
+ opts = { model: model }
750
+
751
+ provider = config.background_provider || @definition.provider
752
+ if provider
753
+ opts[:provider] = provider.to_sym
754
+ opts[:assume_model_exists] = true
755
+ end
756
+
757
+ # Fall back to agent's base_url if no background_base_url configured
758
+ base_url = config.background_base_url || @definition.base_url
759
+
760
+ if base_url
761
+ provider_name = provider || @definition.provider
762
+ context = create_context_with_base_url(base_url, provider_name)
763
+ context.chat(**opts)
764
+ else
765
+ RubyLLM.chat(**opts)
766
+ end
767
+ end
768
+
769
+ # Create a RubyLLM context with a custom API base URL
770
+ #
771
+ # Uses RubyLLM.context to create an isolated configuration where the
772
+ # provider's api_base points to the given URL. This ensures base_url
773
+ # is a connection-level setting, not a request body parameter.
774
+ #
775
+ # @param base_url [String] Custom API endpoint URL
776
+ # @param provider_name [String, Symbol, nil] Provider name (e.g., "anthropic")
777
+ # @return [RubyLLM::Context]
778
+ def create_context_with_base_url(base_url, provider_name)
779
+ RubyLLM.context do |ctx_config|
780
+ config_attr = :"#{provider_name}_api_base" if provider_name
781
+ if config_attr && ctx_config.respond_to?(:"#{config_attr}=")
782
+ ctx_config.public_send(:"#{config_attr}=", base_url)
783
+ end
784
+ end
785
+ end
786
+
787
+ # Execute a single conversation turn
788
+ #
789
+ # Tool calls are captured via a `:tool_call` subscription during the turn,
790
+ # because RubyLLM's `complete()` loop handles tool calls internally and
791
+ # the returned response is always the final text response (never tool calls).
792
+ #
793
+ # Always streams. Content chunks are emitted via EventStream. The caller
794
+ # receives events through the block emitter set by `ask()`.
795
+ #
796
+ # Interruption safety:
797
+ # - Chat messages are snapshotted before the LLM call
798
+ # - If interrupted during the interruptible phase (LLM + tools), chat is
799
+ # restored from snapshot and the turn counter is decremented
800
+ # - Memory operations (STM capture, ingestion, eviction) run inside
801
+ # `defer_stop` so they complete atomically even if interrupt! was called
802
+ #
803
+ # @param prompt [String] User message
804
+ # @param output_schema [Hash, Object, nil] Per-call schema override (nil = use definition default)
805
+ # @return [RubyLLM::Message] Response
806
+ def execute_turn(prompt, output_schema: nil)
807
+ @turn_counter += 1
808
+ turn_id = "turn_#{@turn_counter}"
809
+
810
+ DebugLog.log("agent", "=== Turn #{@turn_counter} start ===")
811
+
812
+ EventStream.emit(type: "agent_start", agent: @id, turn: @turn_counter, prompt_preview: prompt[0..100])
813
+
814
+ # Snapshot chat messages for rollback if LLM call is interrupted.
815
+ # chat.messages returns the live array, so we record its length.
816
+ message_restore_point = @chat.messages.length
817
+ llm_completed = false
818
+
819
+ # Subscribe to tool_call events to capture all tool calls during this turn.
820
+ # RubyLLM's complete() loop processes tool calls internally and returns
821
+ # only the final text response, so we can't inspect response.tool_calls.
822
+ turn_tool_calls = []
823
+ tool_call_subscription = @chat.subscribe(:tool_call) do |tool_call|
824
+ turn_tool_calls << { name: tool_call.name, arguments: tool_call.arguments }
825
+ end
826
+
827
+ begin
828
+ # Build working context with memory retrieval
829
+ DebugLog.time("agent", "populate_chat_context") do
830
+ emit_timed("memory_retrieval", agent: @id) { populate_chat_context(prompt) }
831
+ end
832
+
833
+ # Apply output schema (per-call override takes precedence over definition default).
834
+ # with_schema(nil) clears any previously set schema, keeping behavior correct
835
+ # when alternating between schema and non-schema calls.
836
+ @chat.with_schema(output_schema || @definition.output_schema)
837
+
838
+ # Drain any steering messages queued while idle (no tool loop to trigger the hook)
839
+ drain_steering_queue
840
+
841
+ # === INTERRUPTIBLE PHASE ===
842
+ # LLM HTTP call + streaming + tool execution loop.
843
+ # Can be interrupted at any fiber yield point via Async::Stop.
844
+ response = DebugLog.time("agent", "llm_call") do
845
+ @chat.ask(prompt) do |chunk|
846
+ EventStream.emit(type: "content_chunk", agent: @id, content: chunk.content) if chunk.content
847
+ end
848
+ end
849
+
850
+ llm_completed = true
851
+ tool_call_subscription.unsubscribe
852
+
853
+ # Track tokens
854
+ track_tokens(response)
855
+
856
+ # === UNINTERRUPTIBLE PHASE ===
857
+ # Memory operations protected from Async::Stop via defer_stop.
858
+ # If interrupt! was called during this phase, Stop is deferred
859
+ # until after these operations complete.
860
+ Async::Task.current.defer_stop do
861
+ # Capture turn in STM buffer (including tool calls from the subscription)
862
+ capture_turn(prompt, response, turn_id, tool_calls: turn_tool_calls)
863
+
864
+ # Wait for any previous background ingestion to complete
865
+ # before starting a new one (prevents concurrent adapter writes)
866
+ emit_timed("memory_wait_ingestion", agent: @id) { wait_for_pending_ingestion }
867
+
868
+ # Spawn ingestion as a background task so it runs concurrently
869
+ # with the next LLM call. The task is a child of the current
870
+ # ask() task, so it gets cancelled on interrupt — acceptable
871
+ # because STM already captured the turn data above.
872
+ #
873
+ # Capture current emitters to propagate to the child Fiber,
874
+ # since Fiber-local storage is not inherited by child tasks.
875
+ captured_global = EventStream.emitter
876
+ captured_block = EventStream.block_emitter
877
+
878
+ @pending_ingestion = Async::Task.current.async do
879
+ # Propagate emitters to child Fiber so events reach subscribers
880
+ EventStream.emitter = captured_global
881
+ EventStream.block_emitter = captured_block
882
+
883
+ DebugLog.time("agent", "ingest_into_memory") do
884
+ ingest_into_memory(prompt, response, turn_id, tool_calls: turn_tool_calls)
885
+ end
886
+ end
887
+
888
+ # Evict old turns from STM to LTM
889
+ DebugLog.time("agent", "evict_stm") do
890
+ emit_timed("memory_eviction", agent: @id) { evict_stm }
891
+ end
892
+ end
893
+
894
+ DebugLog.log("agent", "=== Turn #{@turn_counter} complete ===")
895
+ EventStream.emit(type: "agent_stop", agent: @id, turn: @turn_counter)
896
+
897
+ response
898
+ ensure
899
+ tool_call_subscription&.unsubscribe
900
+
901
+ # Rollback chat state if LLM call was interrupted.
902
+ # For memory-enabled agents, populate_chat_context rebuilds from scratch
903
+ # on the next ask(), but restoration keeps the chat consistent regardless.
904
+ unless llm_completed
905
+ overflow = @chat.messages.length - message_restore_point
906
+ @chat.messages.pop(overflow) if overflow > 0
907
+ @turn_counter -= 1
908
+ end
909
+ end
910
+ end
911
+
912
+ # Drain messages from a queue based on mode
913
+ #
914
+ # @param queue [Array<String>] Queue to drain
915
+ # @param mode [Symbol] :all or :one_at_a_time
916
+ # @return [Array<String>] Drained messages
917
+ def drain_queue_by_mode(queue, mode)
918
+ return [] if queue.empty?
919
+
920
+ if mode == :one_at_a_time
921
+ [queue.shift]
922
+ else
923
+ result = queue.dup
924
+ queue.clear
925
+ result
926
+ end
927
+ end
928
+
929
+ # Populate chat messages with memory-augmented context
930
+ #
931
+ # @param prompt [String] Current user query
932
+ # @return [void]
933
+ def populate_chat_context(prompt)
934
+ return unless @memory_store
935
+
936
+ # Reset chat messages and rebuild from memory context
937
+ @chat.reset_messages!(preserve_system_prompt: true)
938
+
939
+ context_messages = @memory_store.build_context(
940
+ query: prompt,
941
+ recent_turns: @stm_buffer,
942
+ system_prompt: @base_system_prompt,
943
+ read_only: memory_read_only?,
944
+ )
945
+
946
+ # Replace system prompt with memory-augmented version
947
+ system_msg = context_messages.find { |m| m[:role] == "system" }
948
+ if system_msg
949
+ @chat.with_instructions(cacheable_instructions(system_msg[:content]), replace: true)
950
+ end
951
+
952
+ # Add recent turns to chat
953
+ non_system = context_messages.reject { |m| m[:role] == "system" }
954
+ non_system.each do |msg|
955
+ @chat.add_message(role: msg[:role], content: msg[:content])
956
+ end
957
+ end
958
+
959
+ # Emit a timed event pair (start/complete) around a block
960
+ #
961
+ # Only emits if the block takes longer than 10ms, to avoid
962
+ # flooding the event stream with trivially fast operations.
963
+ #
964
+ # @param type [String] Event type prefix
965
+ # @param data [Hash] Additional event data
966
+ # @yield Block to time
967
+ # @return [Object] Block return value
968
+ def emit_timed(type, **data)
969
+ start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
970
+ result = yield
971
+ elapsed_ms = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start) * 1000).round
972
+
973
+ if elapsed_ms >= 10
974
+ EventStream.emit(type: type, elapsed_ms: elapsed_ms, **data)
975
+ end
976
+
977
+ result
978
+ end
979
+
980
+ # Track token usage from response
981
+ #
982
+ # @param response [RubyLLM::Message] LLM response
983
+ # @return [void]
984
+ def track_tokens(response)
985
+ return unless response.respond_to?(:input_tokens)
986
+
987
+ @total_input_tokens += response.input_tokens || 0
988
+ @total_output_tokens += response.output_tokens || 0
989
+ end
990
+
991
+ # Capture a turn in the STM buffer including tool calls
992
+ #
993
+ # Stores the full turn: prompt + response content + tool calls.
994
+ # Tool calls are captured via the `:tool_call` subscription in execute_turn,
995
+ # not by inspecting the response (which is always the final text response).
996
+ #
997
+ # @param prompt [String] User message
998
+ # @param response [RubyLLM::Message] Assistant response
999
+ # @param turn_id [String] Turn identifier
1000
+ # @param tool_calls [Array<Hash>] Tool calls captured during this turn
1001
+ # @return [void]
1002
+ def capture_turn(prompt, response, turn_id, tool_calls: [])
1003
+ @stm_buffer << { role: "user", content: prompt, turn_id: turn_id }
1004
+
1005
+ assistant_msg = { role: "assistant", content: response.content, turn_id: turn_id }
1006
+ assistant_msg[:tool_calls] = tool_calls unless tool_calls.empty?
1007
+
1008
+ @stm_buffer << assistant_msg
1009
+ end
1010
+
1011
+ # Ingest a turn into long-term memory
1012
+ #
1013
+ # Includes tool call information in the ingested text so tool usage
1014
+ # context is preserved in memory cards.
1015
+ #
1016
+ # @param prompt [String] User message
1017
+ # @param response [RubyLLM::Message] Assistant response
1018
+ # @param turn_id [String] Turn identifier
1019
+ # @param tool_calls [Array<Hash>] Tool calls captured during this turn
1020
+ # @return [void]
1021
+ def ingest_into_memory(prompt, response, turn_id, tool_calls: [])
1022
+ return unless @memory_store
1023
+
1024
+ turn_text = format_turn_for_ingestion(prompt, response, tool_calls: tool_calls)
1025
+ @memory_store.ingest_turn(text: turn_text, turn_id: turn_id)
1026
+ @memory_store.save
1027
+ rescue StandardError => e
1028
+ warn("[SwarmSDK::V3::Agent] Memory ingestion error: #{e.class}: #{e.message}")
1029
+ EventStream.emit(
1030
+ type: "memory_ingestion_error",
1031
+ agent: @id,
1032
+ error: "#{e.class}: #{e.message}",
1033
+ )
1034
+ end
1035
+
1036
+ # Format a turn for memory ingestion, including tool calls
1037
+ #
1038
+ # Tool calls are passed explicitly from execute_turn's subscription,
1039
+ # rather than inspected from the response (which is always the final text).
1040
+ #
1041
+ # @param prompt [String] User message
1042
+ # @param response [RubyLLM::Message] Assistant response
1043
+ # @param tool_calls [Array<Hash>] Tool calls captured during this turn
1044
+ # @return [String] Formatted turn text
1045
+ def format_turn_for_ingestion(prompt, response, tool_calls: [])
1046
+ parts = ["User: #{prompt}"]
1047
+
1048
+ unless tool_calls.empty?
1049
+ tool_parts = tool_calls.map { |tc| "Tool call: #{tc[:name]}(#{tc[:arguments].inspect})" }
1050
+ parts << "Tool calls: #{tool_parts.join("; ")}"
1051
+ end
1052
+
1053
+ parts << "Assistant: #{response.content}"
1054
+ parts.join("\n\n")
1055
+ end
1056
+
1057
+ # Evict oldest turns from STM buffer to LTM when it exceeds the limit
1058
+ #
1059
+ # Evicted turns are ingested into long-term memory before removal,
1060
+ # ensuring no information is lost during the transition.
1061
+ # Each turn = 2 messages (user + assistant), so buffer limit = stm_turns * 2
1062
+ #
1063
+ # @return [void]
1064
+ def evict_stm
1065
+ max_messages = @definition.memory_stm_turns * 2
1066
+ return if @stm_buffer.size <= max_messages
1067
+
1068
+ evicted_count = @stm_buffer.size - max_messages
1069
+ evicted = @stm_buffer.shift(evicted_count)
1070
+
1071
+ # Wait for pending background ingestion before writing evicted turns
1072
+ # to prevent concurrent adapter writes
1073
+ wait_for_pending_ingestion
1074
+
1075
+ # Ingest evicted turns into LTM before they're lost
1076
+ ingest_evicted_turns(evicted)
1077
+
1078
+ EventStream.emit(
1079
+ type: "stm_eviction",
1080
+ agent: @id,
1081
+ evicted_count: evicted.size / 2,
1082
+ remaining: @stm_buffer.size / 2,
1083
+ )
1084
+ end
1085
+
1086
+ # Ingest evicted STM turns into long-term memory
1087
+ #
1088
+ # @param evicted [Array<Hash>] Evicted messages
1089
+ # @return [void]
1090
+ def ingest_evicted_turns(evicted)
1091
+ return unless @memory_store && evicted.any?
1092
+
1093
+ evicted_text = evicted.map { |m| "#{m[:role].capitalize}: #{m[:content]}" }.join("\n\n")
1094
+ @memory_store.ingest_turn(text: evicted_text, turn_id: "evicted_#{@turn_counter}")
1095
+ @memory_store.save
1096
+ rescue StandardError => e
1097
+ warn("[SwarmSDK::V3::Agent] STM eviction ingestion error: #{e.class}: #{e.message}")
1098
+ EventStream.emit(
1099
+ type: "memory_ingestion_error",
1100
+ agent: @id,
1101
+ error: "#{e.class}: #{e.message}",
1102
+ )
1103
+ end
1104
+
1105
+ # Whether the current chat uses the Anthropic provider
1106
+ #
1107
+ # @return [Boolean]
1108
+ def anthropic_provider?
1109
+ @chat.model.provider == "anthropic"
1110
+ end
1111
+
1112
+ # Wrap instructions text with Anthropic prompt caching if applicable
1113
+ #
1114
+ # For Anthropic models, wraps the text with {cache_control: {type: "ephemeral"}}
1115
+ # via RubyLLM's provider-specific Content class. This enables prompt caching,
1116
+ # which reduces input token costs by ~90% on cache hits. The cache covers
1117
+ # everything up to and including the marked block (tools + system prompt).
1118
+ #
1119
+ # For other providers (OpenAI, Gemini), returns text as-is since they
1120
+ # handle caching automatically.
1121
+ #
1122
+ # @param text [String] Instructions text to potentially cache
1123
+ # @return [String, RubyLLM::Content::Raw] Original text or cached content
1124
+ #
1125
+ # @example Anthropic provider
1126
+ # cacheable_instructions("You are helpful.")
1127
+ # #=> RubyLLM::Content::Raw with cache_control
1128
+ #
1129
+ # @example OpenAI provider
1130
+ # cacheable_instructions("You are helpful.")
1131
+ # #=> "You are helpful."
1132
+ def cacheable_instructions(text)
1133
+ return text unless anthropic_provider?
1134
+
1135
+ RubyLLM::Providers::Anthropic::Content.new(text, cache: true)
1136
+ end
1137
+
1138
+ # Connect to all configured MCP servers
1139
+ #
1140
+ # @return [void]
1141
+ def connect_mcp_servers
1142
+ @definition.mcp_servers.each do |server_def|
1143
+ connector = V3::MCP::Connector.new(server_def)
1144
+ connector.connect!
1145
+ @mcp_connectors << connector
1146
+
1147
+ EventStream.emit(
1148
+ type: "mcp_server_connected",
1149
+ agent: @id,
1150
+ server: server_def.name,
1151
+ tools: connector.available_tools.map(&:name),
1152
+ )
1153
+ end
1154
+ end
1155
+
1156
+ # Disconnect all MCP servers
1157
+ #
1158
+ # @return [void]
1159
+ def disconnect_mcp_servers
1160
+ @mcp_connectors.each(&:disconnect!)
1161
+ @mcp_connectors.clear
1162
+ end
1163
+ end
1164
+ end
1165
+ end