swarm_sdk 2.0.0.pre.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/swarm_sdk/agent/builder.rb +333 -0
- data/lib/swarm_sdk/agent/chat/context_tracker.rb +271 -0
- data/lib/swarm_sdk/agent/chat/hook_integration.rb +372 -0
- data/lib/swarm_sdk/agent/chat/logging_helpers.rb +99 -0
- data/lib/swarm_sdk/agent/chat/system_reminder_injector.rb +114 -0
- data/lib/swarm_sdk/agent/chat.rb +779 -0
- data/lib/swarm_sdk/agent/context.rb +108 -0
- data/lib/swarm_sdk/agent/definition.rb +335 -0
- data/lib/swarm_sdk/configuration.rb +251 -0
- data/lib/swarm_sdk/context_compactor/metrics.rb +147 -0
- data/lib/swarm_sdk/context_compactor/token_counter.rb +106 -0
- data/lib/swarm_sdk/context_compactor.rb +340 -0
- data/lib/swarm_sdk/hooks/adapter.rb +359 -0
- data/lib/swarm_sdk/hooks/context.rb +163 -0
- data/lib/swarm_sdk/hooks/definition.rb +80 -0
- data/lib/swarm_sdk/hooks/error.rb +29 -0
- data/lib/swarm_sdk/hooks/executor.rb +146 -0
- data/lib/swarm_sdk/hooks/registry.rb +143 -0
- data/lib/swarm_sdk/hooks/result.rb +150 -0
- data/lib/swarm_sdk/hooks/shell_executor.rb +254 -0
- data/lib/swarm_sdk/hooks/tool_call.rb +35 -0
- data/lib/swarm_sdk/hooks/tool_result.rb +62 -0
- data/lib/swarm_sdk/log_collector.rb +83 -0
- data/lib/swarm_sdk/log_stream.rb +69 -0
- data/lib/swarm_sdk/markdown_parser.rb +46 -0
- data/lib/swarm_sdk/permissions/config.rb +239 -0
- data/lib/swarm_sdk/permissions/error_formatter.rb +121 -0
- data/lib/swarm_sdk/permissions/path_matcher.rb +35 -0
- data/lib/swarm_sdk/permissions/validator.rb +173 -0
- data/lib/swarm_sdk/permissions_builder.rb +122 -0
- data/lib/swarm_sdk/prompts/base_system_prompt.md.erb +237 -0
- data/lib/swarm_sdk/providers/openai_with_responses.rb +582 -0
- data/lib/swarm_sdk/result.rb +97 -0
- data/lib/swarm_sdk/swarm/agent_initializer.rb +224 -0
- data/lib/swarm_sdk/swarm/all_agents_builder.rb +62 -0
- data/lib/swarm_sdk/swarm/builder.rb +240 -0
- data/lib/swarm_sdk/swarm/mcp_configurator.rb +151 -0
- data/lib/swarm_sdk/swarm/tool_configurator.rb +267 -0
- data/lib/swarm_sdk/swarm.rb +837 -0
- data/lib/swarm_sdk/tools/bash.rb +274 -0
- data/lib/swarm_sdk/tools/delegate.rb +152 -0
- data/lib/swarm_sdk/tools/document_converters/base_converter.rb +83 -0
- data/lib/swarm_sdk/tools/document_converters/docx_converter.rb +99 -0
- data/lib/swarm_sdk/tools/document_converters/pdf_converter.rb +78 -0
- data/lib/swarm_sdk/tools/document_converters/xlsx_converter.rb +194 -0
- data/lib/swarm_sdk/tools/edit.rb +150 -0
- data/lib/swarm_sdk/tools/glob.rb +158 -0
- data/lib/swarm_sdk/tools/grep.rb +231 -0
- data/lib/swarm_sdk/tools/image_extractors/docx_image_extractor.rb +43 -0
- data/lib/swarm_sdk/tools/image_extractors/pdf_image_extractor.rb +163 -0
- data/lib/swarm_sdk/tools/image_formats/tiff_builder.rb +65 -0
- data/lib/swarm_sdk/tools/multi_edit.rb +232 -0
- data/lib/swarm_sdk/tools/path_resolver.rb +43 -0
- data/lib/swarm_sdk/tools/read.rb +251 -0
- data/lib/swarm_sdk/tools/registry.rb +73 -0
- data/lib/swarm_sdk/tools/scratchpad_list.rb +88 -0
- data/lib/swarm_sdk/tools/scratchpad_read.rb +59 -0
- data/lib/swarm_sdk/tools/scratchpad_write.rb +88 -0
- data/lib/swarm_sdk/tools/stores/read_tracker.rb +61 -0
- data/lib/swarm_sdk/tools/stores/scratchpad.rb +153 -0
- data/lib/swarm_sdk/tools/stores/todo_manager.rb +65 -0
- data/lib/swarm_sdk/tools/todo_write.rb +216 -0
- data/lib/swarm_sdk/tools/write.rb +117 -0
- data/lib/swarm_sdk/utils.rb +50 -0
- data/lib/swarm_sdk/version.rb +5 -0
- data/lib/swarm_sdk.rb +69 -0
- metadata +169 -0
@@ -0,0 +1,779 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module SwarmSDK
|
4
|
+
module Agent
|
5
|
+
# Chat extends RubyLLM::Chat to enable parallel agent-to-agent tool calling
|
6
|
+
# with two-level rate limiting to prevent API quota exhaustion
|
7
|
+
#
|
8
|
+
# ## Rate Limiting Strategy
|
9
|
+
#
|
10
|
+
# In hierarchical agent trees, unlimited parallelism can cause exponential growth:
|
11
|
+
# Main → 10 agents → 100 agents → 1,000 agents = API meltdown!
|
12
|
+
#
|
13
|
+
# Solution: Two-level semaphore system
|
14
|
+
# 1. **Global semaphore** - Total concurrent LLM calls across entire swarm
|
15
|
+
# 2. **Local semaphore** - Max concurrent tool calls for this specific agent
|
16
|
+
#
|
17
|
+
# ## Architecture
|
18
|
+
#
|
19
|
+
# This class is now organized with clear separation of concerns:
|
20
|
+
# - Core (this file): Initialization, provider setup, rate limiting, parallel execution
|
21
|
+
# - SystemReminderInjector: First message reminders, TodoWrite reminders
|
22
|
+
# - LoggingHelpers: Tool call formatting, result serialization
|
23
|
+
# - ContextTracker: Logging callbacks, delegation tracking
|
24
|
+
# - HookIntegration: Hook system integration (wraps tool execution with hooks)
|
25
|
+
class Chat < RubyLLM::Chat
|
26
|
+
# Include logging helpers for tool call formatting
|
27
|
+
include LoggingHelpers
|
28
|
+
|
29
|
+
# Include hook integration for user_prompt hooks and hook trigger methods
|
30
|
+
# This module overrides ask() to inject user_prompt hooks
|
31
|
+
# and provides trigger methods for pre/post tool use hooks
|
32
|
+
include HookIntegration
|
33
|
+
|
34
|
+
# Register custom provider for responses API support
|
35
|
+
# This is done once at class load time
|
36
|
+
unless RubyLLM::Provider.providers.key?(:openai_with_responses)
|
37
|
+
RubyLLM::Provider.register(:openai_with_responses, SwarmSDK::Providers::OpenAIWithResponses)
|
38
|
+
end
|
39
|
+
|
40
|
+
# Initialize AgentChat with rate limiting
|
41
|
+
#
|
42
|
+
# @param definition [Hash] Agent definition containing all configuration
|
43
|
+
# @param global_semaphore [Async::Semaphore, nil] Shared across all agents (not part of definition)
|
44
|
+
# @param options [Hash] Additional options to pass to RubyLLM::Chat
|
45
|
+
# @raise [ArgumentError] If provider doesn't support custom base_url or provider not specified with base_url
|
46
|
+
def initialize(definition:, global_semaphore: nil, **options)
|
47
|
+
# Extract configuration from definition
|
48
|
+
model = definition[:model]
|
49
|
+
provider = definition[:provider]
|
50
|
+
context_window = definition[:context_window]
|
51
|
+
max_concurrent_tools = definition[:max_concurrent_tools]
|
52
|
+
base_url = definition[:base_url]
|
53
|
+
api_version = definition[:api_version]
|
54
|
+
timeout = definition[:timeout] || Definition::DEFAULT_TIMEOUT
|
55
|
+
assume_model_exists = definition[:assume_model_exists]
|
56
|
+
system_prompt = definition[:system_prompt]
|
57
|
+
parameters = definition[:parameters]
|
58
|
+
headers = definition[:headers]
|
59
|
+
|
60
|
+
# Create isolated context if custom base_url or timeout specified
|
61
|
+
if base_url || timeout != Definition::DEFAULT_TIMEOUT
|
62
|
+
# Provider is required when using custom base_url
|
63
|
+
raise ArgumentError, "Provider must be specified when base_url is set" if base_url && !provider
|
64
|
+
|
65
|
+
# Determine actual provider to use
|
66
|
+
actual_provider = determine_provider(provider, base_url, api_version)
|
67
|
+
RubyLLM.logger.debug("SwarmSDK Agent::Chat: Using provider '#{actual_provider}' (requested='#{provider}', api_version='#{api_version}')")
|
68
|
+
|
69
|
+
context = build_custom_context(provider: provider, base_url: base_url, timeout: timeout)
|
70
|
+
|
71
|
+
# Use assume_model_exists to bypass model validation for custom endpoints
|
72
|
+
# Default to true when base_url is set, false otherwise (unless explicitly specified)
|
73
|
+
assume_model_exists = base_url ? true : false if assume_model_exists.nil?
|
74
|
+
|
75
|
+
super(model: model, provider: actual_provider, assume_model_exists: assume_model_exists, context: context, **options)
|
76
|
+
|
77
|
+
# Configure custom provider after creation (RubyLLM doesn't support custom init params)
|
78
|
+
if actual_provider == :openai_with_responses && api_version == "v1/responses"
|
79
|
+
configure_responses_api_provider
|
80
|
+
end
|
81
|
+
elsif provider
|
82
|
+
# No custom base_url or timeout: use RubyLLM's defaults (with optional provider override)
|
83
|
+
assume_model_exists = false if assume_model_exists.nil?
|
84
|
+
super(model: model, provider: provider, assume_model_exists: assume_model_exists, **options)
|
85
|
+
else
|
86
|
+
# No custom base_url, timeout, or provider: use RubyLLM's defaults
|
87
|
+
assume_model_exists = false if assume_model_exists.nil?
|
88
|
+
super(model: model, assume_model_exists: assume_model_exists, **options)
|
89
|
+
end
|
90
|
+
|
91
|
+
# Rate limiting semaphores
|
92
|
+
@global_semaphore = global_semaphore
|
93
|
+
@local_semaphore = max_concurrent_tools ? Async::Semaphore.new(max_concurrent_tools) : nil
|
94
|
+
@explicit_context_window = context_window
|
95
|
+
|
96
|
+
# Track TodoWrite usage for periodic reminders
|
97
|
+
@last_todowrite_message_index = nil
|
98
|
+
|
99
|
+
# Agent context for logging (set via setup_context)
|
100
|
+
@agent_context = nil
|
101
|
+
|
102
|
+
# Context tracker (created after agent_context is set)
|
103
|
+
@context_tracker = nil
|
104
|
+
|
105
|
+
# Try to fetch real model info for accurate context tracking
|
106
|
+
# This searches across ALL providers, so it works even when using proxies
|
107
|
+
# (e.g., Claude model through OpenAI-compatible proxy)
|
108
|
+
fetch_real_model_info(model)
|
109
|
+
|
110
|
+
# Configure system prompt, parameters, and headers after parent initialization
|
111
|
+
with_instructions(system_prompt) if system_prompt
|
112
|
+
configure_parameters(parameters)
|
113
|
+
configure_headers(headers)
|
114
|
+
end
|
115
|
+
|
116
|
+
# Setup agent context
|
117
|
+
#
|
118
|
+
# Sets the agent context for this chat, enabling delegation tracking.
|
119
|
+
# This is always called, regardless of whether logging is enabled.
|
120
|
+
#
|
121
|
+
# @param context [Agent::Context] Agent context for this chat
|
122
|
+
# @return [void]
|
123
|
+
def setup_context(context)
|
124
|
+
@agent_context = context
|
125
|
+
@context_tracker = ContextTracker.new(self, context)
|
126
|
+
end
|
127
|
+
|
128
|
+
# Setup logging callbacks
|
129
|
+
#
|
130
|
+
# This configures the chat to emit log events via LogStream.
|
131
|
+
# Should only be called when LogStream.emitter is set.
|
132
|
+
#
|
133
|
+
# @return [void]
|
134
|
+
def setup_logging
|
135
|
+
raise StateError, "Agent context not set. Call setup_context first." unless @agent_context
|
136
|
+
|
137
|
+
@context_tracker.setup_logging
|
138
|
+
end
|
139
|
+
|
140
|
+
# Emit model lookup warning if one occurred during initialization
|
141
|
+
#
|
142
|
+
# If a model wasn't found in the registry during initialization, this will
|
143
|
+
# emit a proper JSON log event through LogStream.
|
144
|
+
#
|
145
|
+
# @param agent_name [Symbol, String] The agent name for logging context
|
146
|
+
def emit_model_lookup_warning(agent_name)
|
147
|
+
return unless @model_lookup_error
|
148
|
+
|
149
|
+
LogStream.emit(
|
150
|
+
type: "model_lookup_warning",
|
151
|
+
agent: agent_name,
|
152
|
+
model: @model_lookup_error[:model],
|
153
|
+
error_message: @model_lookup_error[:error_message],
|
154
|
+
suggestions: @model_lookup_error[:suggestions].map { |s| { id: s.id, name: s.name, context_window: s.context_window } },
|
155
|
+
)
|
156
|
+
end
|
157
|
+
|
158
|
+
# Override ask to inject system reminders and periodic TodoWrite reminders
|
159
|
+
#
|
160
|
+
# Note: This is called BEFORE HookIntegration#ask (due to module include order),
|
161
|
+
# so HookIntegration will wrap this and inject user_prompt hooks.
|
162
|
+
#
|
163
|
+
# @param prompt [String] User prompt
|
164
|
+
# @param options [Hash] Additional options to pass to complete
|
165
|
+
# @return [RubyLLM::Message] LLM response
|
166
|
+
def ask(prompt, **options)
|
167
|
+
# Check if this is the first user message
|
168
|
+
if SystemReminderInjector.first_message?(self)
|
169
|
+
# Manually construct the first message sequence with system reminders
|
170
|
+
SystemReminderInjector.inject_first_message_reminders(self, prompt)
|
171
|
+
|
172
|
+
# Trigger user_prompt hook manually since we're bypassing the normal ask flow
|
173
|
+
if @hook_executor
|
174
|
+
hook_result = trigger_user_prompt(prompt)
|
175
|
+
|
176
|
+
# Check if hook halted execution
|
177
|
+
if hook_result[:halted]
|
178
|
+
# Return a halted message instead of calling LLM
|
179
|
+
return RubyLLM::Message.new(
|
180
|
+
role: :assistant,
|
181
|
+
content: hook_result[:halt_message],
|
182
|
+
model_id: model.id,
|
183
|
+
)
|
184
|
+
end
|
185
|
+
|
186
|
+
# NOTE: We ignore modified_prompt for first message since reminders already injected
|
187
|
+
end
|
188
|
+
|
189
|
+
# Call complete to get LLM response
|
190
|
+
complete(**options)
|
191
|
+
else
|
192
|
+
# Inject periodic TodoWrite reminder if needed
|
193
|
+
if SystemReminderInjector.should_inject_todowrite_reminder?(self, @last_todowrite_message_index)
|
194
|
+
add_message(role: :user, content: SystemReminderInjector::TODOWRITE_PERIODIC_REMINDER)
|
195
|
+
# Update tracking
|
196
|
+
@last_todowrite_message_index = SystemReminderInjector.find_last_todowrite_index(self)
|
197
|
+
end
|
198
|
+
|
199
|
+
# Normal ask behavior for subsequent messages
|
200
|
+
# This calls super which goes to HookIntegration's ask override
|
201
|
+
super(prompt, **options)
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
# Override handle_tool_calls to execute multiple tool calls in parallel with rate limiting.
|
206
|
+
#
|
207
|
+
# RubyLLM's default implementation executes tool calls one at a time. This
|
208
|
+
# override uses Async to execute all tool calls concurrently, with semaphores
|
209
|
+
# to prevent API quota exhaustion. Hooks are integrated via HookIntegration module.
|
210
|
+
#
|
211
|
+
# @param response [RubyLLM::Message] LLM response with tool calls
|
212
|
+
# @param block [Proc] Optional block passed through to complete
|
213
|
+
# @return [RubyLLM::Message] Final response when loop completes
|
214
|
+
def handle_tool_calls(response, &block)
|
215
|
+
# Single tool call: sequential execution with hooks
|
216
|
+
if response.tool_calls.size == 1
|
217
|
+
tool_call = response.tool_calls.values.first
|
218
|
+
|
219
|
+
# Handle pre_tool_use hook (skip for delegation tools)
|
220
|
+
unless delegation_tool_call?(tool_call)
|
221
|
+
# Trigger pre_tool_use hook (can block or provide custom result)
|
222
|
+
pre_result = trigger_pre_tool_use(tool_call)
|
223
|
+
|
224
|
+
# Handle finish_agent marker
|
225
|
+
if pre_result[:finish_agent]
|
226
|
+
message = RubyLLM::Message.new(
|
227
|
+
role: :assistant,
|
228
|
+
content: pre_result[:custom_result],
|
229
|
+
model_id: model.id,
|
230
|
+
)
|
231
|
+
# Set custom finish reason before triggering on_end_message
|
232
|
+
@context_tracker.finish_reason_override = "finish_agent" if @context_tracker
|
233
|
+
# Trigger on_end_message to ensure agent_stop event is emitted
|
234
|
+
@on[:end_message]&.call(message)
|
235
|
+
return message
|
236
|
+
end
|
237
|
+
|
238
|
+
# Handle finish_swarm marker
|
239
|
+
if pre_result[:finish_swarm]
|
240
|
+
return { __finish_swarm__: true, message: pre_result[:custom_result] }
|
241
|
+
end
|
242
|
+
|
243
|
+
# Handle blocked execution
|
244
|
+
unless pre_result[:proceed]
|
245
|
+
content = pre_result[:custom_result] || "Tool execution blocked by hook"
|
246
|
+
message = add_message(
|
247
|
+
role: :tool,
|
248
|
+
content: content,
|
249
|
+
tool_call_id: tool_call.id,
|
250
|
+
)
|
251
|
+
@on[:end_message]&.call(message)
|
252
|
+
return complete(&block)
|
253
|
+
end
|
254
|
+
end
|
255
|
+
|
256
|
+
# Execute tool
|
257
|
+
@on[:tool_call]&.call(tool_call)
|
258
|
+
|
259
|
+
result = execute_tool_with_error_handling(tool_call)
|
260
|
+
|
261
|
+
@on[:tool_result]&.call(result)
|
262
|
+
|
263
|
+
# Trigger post_tool_use hook (skip for delegation tools)
|
264
|
+
unless delegation_tool_call?(tool_call)
|
265
|
+
result = trigger_post_tool_use(result, tool_call: tool_call)
|
266
|
+
end
|
267
|
+
|
268
|
+
# Check for finish markers from hooks
|
269
|
+
if result.is_a?(Hash)
|
270
|
+
if result[:__finish_agent__]
|
271
|
+
# Finish this agent with the provided message
|
272
|
+
message = RubyLLM::Message.new(
|
273
|
+
role: :assistant,
|
274
|
+
content: result[:message],
|
275
|
+
model_id: model.id,
|
276
|
+
)
|
277
|
+
# Set custom finish reason before triggering on_end_message
|
278
|
+
@context_tracker.finish_reason_override = "finish_agent" if @context_tracker
|
279
|
+
# Trigger on_end_message to ensure agent_stop event is emitted
|
280
|
+
@on[:end_message]&.call(message)
|
281
|
+
return message
|
282
|
+
elsif result[:__finish_swarm__]
|
283
|
+
# Propagate finish_swarm marker up (don't add to conversation)
|
284
|
+
return result
|
285
|
+
end
|
286
|
+
end
|
287
|
+
|
288
|
+
# Check for halt result
|
289
|
+
return result if result.is_a?(RubyLLM::Tool::Halt)
|
290
|
+
|
291
|
+
# Add tool result to conversation
|
292
|
+
content = result.is_a?(RubyLLM::Content) ? result : result.to_s
|
293
|
+
message = add_message(
|
294
|
+
role: :tool,
|
295
|
+
content: content,
|
296
|
+
tool_call_id: tool_call.id,
|
297
|
+
)
|
298
|
+
@on[:end_message]&.call(message)
|
299
|
+
|
300
|
+
# Continue loop
|
301
|
+
return complete(&block)
|
302
|
+
end
|
303
|
+
|
304
|
+
# Multiple tool calls: execute in parallel with rate limiting and hooks
|
305
|
+
halt_result = nil
|
306
|
+
|
307
|
+
results = Async do
|
308
|
+
tasks = response.tool_calls.map do |_id, tool_call|
|
309
|
+
Async do
|
310
|
+
# Acquire semaphores (queues if limit reached)
|
311
|
+
acquire_semaphores do
|
312
|
+
@on[:tool_call]&.call(tool_call)
|
313
|
+
|
314
|
+
# Handle pre_tool_use hook (skip for delegation tools)
|
315
|
+
unless delegation_tool_call?(tool_call)
|
316
|
+
pre_result = trigger_pre_tool_use(tool_call)
|
317
|
+
|
318
|
+
# Handle finish markers first (early exit)
|
319
|
+
# Don't call on_tool_result for finish markers - they're not tool results
|
320
|
+
if pre_result[:finish_agent]
|
321
|
+
result = { __finish_agent__: true, message: pre_result[:custom_result] }
|
322
|
+
next { tool_call: tool_call, result: result, message: nil }
|
323
|
+
end
|
324
|
+
|
325
|
+
if pre_result[:finish_swarm]
|
326
|
+
result = { __finish_swarm__: true, message: pre_result[:custom_result] }
|
327
|
+
next { tool_call: tool_call, result: result, message: nil }
|
328
|
+
end
|
329
|
+
|
330
|
+
# Handle blocked execution
|
331
|
+
unless pre_result[:proceed]
|
332
|
+
result = pre_result[:custom_result] || "Tool execution blocked by hook"
|
333
|
+
@on[:tool_result]&.call(result)
|
334
|
+
|
335
|
+
content = result.is_a?(RubyLLM::Content) ? result : result.to_s
|
336
|
+
message = add_message(
|
337
|
+
role: :tool,
|
338
|
+
content: content,
|
339
|
+
tool_call_id: tool_call.id,
|
340
|
+
)
|
341
|
+
@on[:end_message]&.call(message)
|
342
|
+
|
343
|
+
next { tool_call: tool_call, result: result, message: message }
|
344
|
+
end
|
345
|
+
end
|
346
|
+
|
347
|
+
# Execute tool - Faraday yields during HTTP I/O
|
348
|
+
result = execute_tool_with_error_handling(tool_call)
|
349
|
+
|
350
|
+
@on[:tool_result]&.call(result)
|
351
|
+
|
352
|
+
# Trigger post_tool_use hook (skip for delegation tools)
|
353
|
+
unless delegation_tool_call?(tool_call)
|
354
|
+
result = trigger_post_tool_use(result, tool_call: tool_call)
|
355
|
+
end
|
356
|
+
|
357
|
+
# Check if result is a finish marker (don't add to conversation)
|
358
|
+
if result.is_a?(Hash) && (result[:__finish_agent__] || result[:__finish_swarm__])
|
359
|
+
# Finish markers will be detected after parallel execution completes
|
360
|
+
{ tool_call: tool_call, result: result, message: nil }
|
361
|
+
else
|
362
|
+
# Add tool result to conversation
|
363
|
+
content = result.is_a?(RubyLLM::Content) ? result : result.to_s
|
364
|
+
message = add_message(
|
365
|
+
role: :tool,
|
366
|
+
content: content,
|
367
|
+
tool_call_id: tool_call.id,
|
368
|
+
)
|
369
|
+
@on[:end_message]&.call(message)
|
370
|
+
|
371
|
+
# Return result data for collection
|
372
|
+
{ tool_call: tool_call, result: result, message: message }
|
373
|
+
end
|
374
|
+
end
|
375
|
+
end
|
376
|
+
end
|
377
|
+
|
378
|
+
# Wait for all tasks to complete
|
379
|
+
tasks.map(&:wait)
|
380
|
+
end.wait
|
381
|
+
|
382
|
+
# Check for halt and finish results
|
383
|
+
results.each do |data|
|
384
|
+
result = data[:result]
|
385
|
+
|
386
|
+
# Check for halt result (from tool execution errors)
|
387
|
+
if result.is_a?(RubyLLM::Tool::Halt)
|
388
|
+
halt_result = result
|
389
|
+
# Continue checking for finish markers below
|
390
|
+
end
|
391
|
+
|
392
|
+
# Check for finish markers (from hooks)
|
393
|
+
if result.is_a?(Hash)
|
394
|
+
if result[:__finish_agent__]
|
395
|
+
message = RubyLLM::Message.new(
|
396
|
+
role: :assistant,
|
397
|
+
content: result[:message],
|
398
|
+
model_id: model.id,
|
399
|
+
)
|
400
|
+
# Set custom finish reason before triggering on_end_message
|
401
|
+
@context_tracker.finish_reason_override = "finish_agent" if @context_tracker
|
402
|
+
# Trigger on_end_message to ensure agent_stop event is emitted
|
403
|
+
@on[:end_message]&.call(message)
|
404
|
+
return message
|
405
|
+
elsif result[:__finish_swarm__]
|
406
|
+
# Propagate finish_swarm marker up
|
407
|
+
return result
|
408
|
+
end
|
409
|
+
end
|
410
|
+
end
|
411
|
+
|
412
|
+
# Return halt result if we found one (but no finish markers)
|
413
|
+
halt_result = results.find { |data| data[:result].is_a?(RubyLLM::Tool::Halt) }&.dig(:result)
|
414
|
+
|
415
|
+
# Continue automatic loop (recursive call to complete)
|
416
|
+
halt_result || complete(&block)
|
417
|
+
end
|
418
|
+
|
419
|
+
# Get the provider instance
|
420
|
+
#
|
421
|
+
# Exposes the RubyLLM provider instance for configuration.
|
422
|
+
# This is needed for setting agent_name and other provider-specific settings.
|
423
|
+
#
|
424
|
+
# @return [RubyLLM::Provider::Base] Provider instance
|
425
|
+
attr_reader :provider, :global_semaphore, :local_semaphore, :real_model_info, :context_tracker
|
426
|
+
|
427
|
+
# Get context window limit for the current model
|
428
|
+
#
|
429
|
+
# Priority order:
|
430
|
+
# 1. Explicit context_window parameter (user override)
|
431
|
+
# 2. Real model info from RubyLLM registry (searched across all providers)
|
432
|
+
# 3. Model info from chat (may be nil if assume_model_exists was used)
|
433
|
+
#
|
434
|
+
# @return [Integer, nil] Maximum context tokens, or nil if not available
|
435
|
+
def context_limit
|
436
|
+
# Priority 1: Explicit override
|
437
|
+
return @explicit_context_window if @explicit_context_window
|
438
|
+
|
439
|
+
# Priority 2: Real model info from registry (searched across all providers)
|
440
|
+
return @real_model_info.context_window if @real_model_info&.context_window
|
441
|
+
|
442
|
+
# Priority 3: Fall back to model from chat
|
443
|
+
model.context_window
|
444
|
+
rescue StandardError
|
445
|
+
nil
|
446
|
+
end
|
447
|
+
|
448
|
+
# Calculate cumulative input tokens for the conversation
|
449
|
+
#
|
450
|
+
# The latest assistant message's input_tokens already includes the cumulative
|
451
|
+
# total for the entire conversation (all previous messages, system instructions,
|
452
|
+
# tool definitions, etc.). We don't sum across messages as that would double-count.
|
453
|
+
#
|
454
|
+
# @return [Integer] Total input tokens used in conversation
|
455
|
+
def cumulative_input_tokens
|
456
|
+
# Find the latest assistant message with input_tokens
|
457
|
+
messages.reverse.find { |msg| msg.role == :assistant && msg.input_tokens }&.input_tokens || 0
|
458
|
+
end
|
459
|
+
|
460
|
+
# Calculate cumulative output tokens across all assistant messages
|
461
|
+
#
|
462
|
+
# Unlike input tokens, output tokens are per-response and should be summed.
|
463
|
+
#
|
464
|
+
# @return [Integer] Total output tokens used in conversation
|
465
|
+
def cumulative_output_tokens
|
466
|
+
messages.select { |msg| msg.role == :assistant }.sum { |msg| msg.output_tokens || 0 }
|
467
|
+
end
|
468
|
+
|
469
|
+
# Calculate total tokens used (input + output)
|
470
|
+
#
|
471
|
+
# @return [Integer] Total tokens used in conversation
|
472
|
+
def cumulative_total_tokens
|
473
|
+
cumulative_input_tokens + cumulative_output_tokens
|
474
|
+
end
|
475
|
+
|
476
|
+
# Calculate percentage of context window used
|
477
|
+
#
|
478
|
+
# @return [Float] Percentage (0.0 to 100.0), or 0.0 if limit unavailable
|
479
|
+
def context_usage_percentage
|
480
|
+
limit = context_limit
|
481
|
+
return 0.0 if limit.nil? || limit.zero?
|
482
|
+
|
483
|
+
(cumulative_total_tokens.to_f / limit * 100).round(2)
|
484
|
+
end
|
485
|
+
|
486
|
+
# Calculate remaining tokens in context window
|
487
|
+
#
|
488
|
+
# @return [Integer, nil] Tokens remaining, or nil if limit unavailable
|
489
|
+
def tokens_remaining
|
490
|
+
limit = context_limit
|
491
|
+
return if limit.nil?
|
492
|
+
|
493
|
+
limit - cumulative_total_tokens
|
494
|
+
end
|
495
|
+
|
496
|
+
# Compact the conversation history to reduce token usage
|
497
|
+
#
|
498
|
+
# Uses the Hybrid Production Strategy to intelligently compress the conversation:
|
499
|
+
# 1. Tool result pruning - Truncate tool outputs (they're 80%+ of tokens!)
|
500
|
+
# 2. Checkpoint creation - LLM-generated summary of conversation chunks
|
501
|
+
# 3. Sliding window - Keep recent messages in full detail
|
502
|
+
#
|
503
|
+
# This is a manual operation - call it when you need to free up context space.
|
504
|
+
# The method emits compression events via LogStream for monitoring.
|
505
|
+
#
|
506
|
+
# ## Usage
|
507
|
+
#
|
508
|
+
# # Use defaults
|
509
|
+
# metrics = agent.compact_context
|
510
|
+
# puts metrics.summary
|
511
|
+
#
|
512
|
+
# # With custom options
|
513
|
+
# metrics = agent.compact_context(
|
514
|
+
# tool_result_max_length: 300,
|
515
|
+
# checkpoint_threshold: 40,
|
516
|
+
# sliding_window_size: 15
|
517
|
+
# )
|
518
|
+
#
|
519
|
+
# @param options [Hash] Compression options (see ContextCompactor::DEFAULT_OPTIONS)
|
520
|
+
# @return [ContextCompactor::Metrics] Compression statistics
|
521
|
+
def compact_context(**options)
|
522
|
+
compactor = ContextCompactor.new(self, options)
|
523
|
+
compactor.compact
|
524
|
+
end
|
525
|
+
|
526
|
+
private
|
527
|
+
|
528
|
+
# Build custom RubyLLM context for base_url/timeout overrides
|
529
|
+
#
|
530
|
+
# @param provider [String, Symbol] Provider name
|
531
|
+
# @param base_url [String, nil] Custom API base URL
|
532
|
+
# @param timeout [Integer] Request timeout in seconds
|
533
|
+
# @return [RubyLLM::Context] Configured context
|
534
|
+
def build_custom_context(provider:, base_url:, timeout:)
|
535
|
+
RubyLLM.context do |config|
|
536
|
+
# Set timeout for all providers
|
537
|
+
config.request_timeout = timeout
|
538
|
+
|
539
|
+
# Configure base_url if specified
|
540
|
+
next unless base_url
|
541
|
+
|
542
|
+
case provider.to_s
|
543
|
+
when "openai", "deepseek", "perplexity", "mistral", "openrouter"
|
544
|
+
config.openai_api_base = base_url
|
545
|
+
config.openai_api_key = ENV["OPENAI_API_KEY"] || "dummy-key-for-local"
|
546
|
+
when "ollama"
|
547
|
+
config.ollama_api_base = base_url
|
548
|
+
when "gpustack"
|
549
|
+
config.gpustack_api_base = base_url
|
550
|
+
config.gpustack_api_key = ENV["GPUSTACK_API_KEY"] || "dummy-key"
|
551
|
+
else
|
552
|
+
raise ArgumentError,
|
553
|
+
"Provider '#{provider}' doesn't support custom base_url. " \
|
554
|
+
"Only OpenAI-compatible providers (openai, deepseek, perplexity, mistral, openrouter), " \
|
555
|
+
"ollama, and gpustack support custom endpoints."
|
556
|
+
end
|
557
|
+
end
|
558
|
+
end
|
559
|
+
|
560
|
+
# Fetch real model info for accurate context tracking
|
561
|
+
#
|
562
|
+
# This searches across ALL providers, so it works even when using proxies
|
563
|
+
# (e.g., Claude model through OpenAI-compatible proxy).
|
564
|
+
#
|
565
|
+
# @param model [String] Model ID to lookup
|
566
|
+
# @return [void]
|
567
|
+
def fetch_real_model_info(model)
|
568
|
+
@model_lookup_error = nil
|
569
|
+
@real_model_info = begin
|
570
|
+
RubyLLM.models.find(model) # Searches all providers when no provider specified
|
571
|
+
rescue StandardError => e
|
572
|
+
# Store warning info to emit later through LogStream
|
573
|
+
suggestions = suggest_similar_models(model)
|
574
|
+
@model_lookup_error = {
|
575
|
+
model: model,
|
576
|
+
error_message: e.message,
|
577
|
+
suggestions: suggestions,
|
578
|
+
}
|
579
|
+
nil
|
580
|
+
end
|
581
|
+
end
|
582
|
+
|
583
|
+
# Determine which provider to use based on configuration
|
584
|
+
#
|
585
|
+
# When using base_url with OpenAI-compatible providers and api_version is set to
|
586
|
+
# 'v1/responses', use our custom provider that supports the responses API endpoint.
|
587
|
+
#
|
588
|
+
# @param provider [Symbol, String] The requested provider
|
589
|
+
# @param base_url [String, nil] Custom base URL
|
590
|
+
# @param api_version [String, nil] API endpoint version
|
591
|
+
# @return [Symbol] The provider to use
|
592
|
+
def determine_provider(provider, base_url, api_version)
|
593
|
+
return provider unless base_url
|
594
|
+
|
595
|
+
# Use custom provider for OpenAI-compatible providers when api_version is v1/responses
|
596
|
+
# The custom provider supports both chat/completions and responses endpoints
|
597
|
+
case provider.to_s
|
598
|
+
when "openai", "deepseek", "perplexity", "mistral", "openrouter"
|
599
|
+
if api_version == "v1/responses"
|
600
|
+
:openai_with_responses
|
601
|
+
else
|
602
|
+
provider
|
603
|
+
end
|
604
|
+
else
|
605
|
+
provider
|
606
|
+
end
|
607
|
+
end
|
608
|
+
|
609
|
+
# Configure the custom provider after creation to use responses API
|
610
|
+
#
|
611
|
+
# RubyLLM doesn't support passing custom parameters to provider initialization,
|
612
|
+
# so we configure the provider after the chat is created.
|
613
|
+
def configure_responses_api_provider
|
614
|
+
return unless provider.is_a?(SwarmSDK::Providers::OpenAIWithResponses)
|
615
|
+
|
616
|
+
provider.use_responses_api = true
|
617
|
+
RubyLLM.logger.debug("SwarmSDK: Configured provider to use responses API")
|
618
|
+
end
|
619
|
+
|
620
|
+
# Configure LLM parameters with proper temperature normalization
|
621
|
+
#
|
622
|
+
# Note: RubyLLM only normalizes temperature (for models that require specific values
|
623
|
+
# like gpt-5-mini which requires temperature=1.0) when using with_temperature().
|
624
|
+
# The with_params() method is designed for sending unparsed parameters directly to
|
625
|
+
# the LLM without provider-specific normalization. Therefore, we extract temperature
|
626
|
+
# and call with_temperature() separately to ensure proper normalization.
|
627
|
+
#
|
628
|
+
# @param params [Hash] Parameter hash (may include temperature and other params)
|
629
|
+
# @return [self] Returns self for method chaining
|
630
|
+
def configure_parameters(params)
|
631
|
+
return self if params.nil? || params.empty?
|
632
|
+
|
633
|
+
# Extract temperature for separate handling
|
634
|
+
if params[:temperature]
|
635
|
+
with_temperature(params[:temperature])
|
636
|
+
params = params.except(:temperature)
|
637
|
+
end
|
638
|
+
|
639
|
+
# Apply remaining parameters
|
640
|
+
with_params(**params) if params.any?
|
641
|
+
|
642
|
+
self
|
643
|
+
end
|
644
|
+
|
645
|
+
# Configure custom HTTP headers for LLM requests
|
646
|
+
#
|
647
|
+
# @param headers [Hash, nil] Custom HTTP headers
|
648
|
+
# @return [self] Returns self for method chaining
|
649
|
+
def configure_headers(headers)
|
650
|
+
return self if headers.nil? || headers.empty?
|
651
|
+
|
652
|
+
with_headers(**headers)
|
653
|
+
|
654
|
+
self
|
655
|
+
end
|
656
|
+
|
657
|
+
# Acquire both global and local semaphores (if configured).
|
658
|
+
#
|
659
|
+
# Semaphores queue requests when limits are reached, ensuring graceful
|
660
|
+
# degradation instead of API errors.
|
661
|
+
#
|
662
|
+
# Order matters: acquire global first (broader scope), then local
|
663
|
+
def acquire_semaphores(&block)
|
664
|
+
if @global_semaphore && @local_semaphore
|
665
|
+
# Both limits: acquire global first, then local
|
666
|
+
@global_semaphore.acquire do
|
667
|
+
@local_semaphore.acquire(&block)
|
668
|
+
end
|
669
|
+
elsif @global_semaphore
|
670
|
+
# Only global limit
|
671
|
+
@global_semaphore.acquire(&block)
|
672
|
+
elsif @local_semaphore
|
673
|
+
# Only local limit
|
674
|
+
@local_semaphore.acquire(&block)
|
675
|
+
else
|
676
|
+
# No limits: execute immediately
|
677
|
+
yield
|
678
|
+
end
|
679
|
+
end
|
680
|
+
|
681
|
+
# Suggest similar models when a model is not found
|
682
|
+
#
|
683
|
+
# @param query [String] Model name to search for
|
684
|
+
# @return [Array<RubyLLM::Model::Info>] Up to 3 similar models
|
685
|
+
def suggest_similar_models(query)
|
686
|
+
normalized_query = query.to_s.downcase.gsub(/[.\-_]/, "")
|
687
|
+
|
688
|
+
RubyLLM.models.all.select do |model|
|
689
|
+
normalized_id = model.id.downcase.gsub(/[.\-_]/, "")
|
690
|
+
normalized_id.include?(normalized_query) ||
|
691
|
+
model.name&.downcase&.gsub(/[.\-_]/, "")&.include?(normalized_query)
|
692
|
+
end.first(3)
|
693
|
+
rescue StandardError
|
694
|
+
[]
|
695
|
+
end
|
696
|
+
|
697
|
+
# Execute a tool with ArgumentError handling for missing parameters
|
698
|
+
#
|
699
|
+
# When a tool is called with missing required parameters, this catches the
|
700
|
+
# ArgumentError and returns a helpful message to the LLM with:
|
701
|
+
# - Which parameter is missing
|
702
|
+
# - Instructions to retry with correct parameters
|
703
|
+
# - System reminder showing all required parameters
|
704
|
+
#
|
705
|
+
# @param tool_call [RubyLLM::ToolCall] Tool call from LLM
|
706
|
+
# @return [String, Object] Tool result or error message
|
707
|
+
def execute_tool_with_error_handling(tool_call)
|
708
|
+
execute_tool(tool_call)
|
709
|
+
rescue ArgumentError => e
|
710
|
+
# Extract parameter info from the error message
|
711
|
+
# ArgumentError messages typically: "missing keyword: parameter_name" or "missing keywords: param1, param2"
|
712
|
+
build_missing_parameter_error(tool_call, e)
|
713
|
+
end
|
714
|
+
|
715
|
+
# Build a helpful error message for missing tool parameters
|
716
|
+
#
|
717
|
+
# @param tool_call [RubyLLM::ToolCall] Tool call that failed
|
718
|
+
# @param error [ArgumentError] The ArgumentError raised
|
719
|
+
# @return [String] Formatted error message with parameter information
|
720
|
+
def build_missing_parameter_error(tool_call, error)
|
721
|
+
tool_name = tool_call.name
|
722
|
+
tool_instance = tools[tool_name.to_sym]
|
723
|
+
|
724
|
+
# Extract which parameters are missing from error message
|
725
|
+
missing_params = if error.message.match(/missing keyword(?:s)?: (.+)/)
|
726
|
+
::Regexp.last_match(1).split(", ").map(&:strip)
|
727
|
+
else
|
728
|
+
["unknown"]
|
729
|
+
end
|
730
|
+
|
731
|
+
# Get tool parameter information from RubyLLM::Tool
|
732
|
+
param_info = if tool_instance.respond_to?(:parameters)
|
733
|
+
# RubyLLM tools have a parameters method that returns { name => Parameter }
|
734
|
+
tool_instance.parameters.map do |_param_name, param_obj|
|
735
|
+
{
|
736
|
+
name: param_obj.name.to_s,
|
737
|
+
type: param_obj.type,
|
738
|
+
description: param_obj.description,
|
739
|
+
required: param_obj.required,
|
740
|
+
}
|
741
|
+
end
|
742
|
+
else
|
743
|
+
[]
|
744
|
+
end
|
745
|
+
|
746
|
+
# Build error message
|
747
|
+
error_message = "Error calling #{tool_name}: #{error.message}\n\n"
|
748
|
+
error_message += "Please retry the tool call with all required parameters.\n\n"
|
749
|
+
|
750
|
+
# Add system reminder with parameter information
|
751
|
+
if param_info.any?
|
752
|
+
required_params = param_info.select { |p| p[:required] }
|
753
|
+
|
754
|
+
error_message += "<system-reminder>\n"
|
755
|
+
error_message += "The #{tool_name} tool requires the following parameters:\n\n"
|
756
|
+
|
757
|
+
required_params.each do |param|
|
758
|
+
error_message += "- #{param[:name]} (#{param[:type]}, REQUIRED): #{param[:description]}\n"
|
759
|
+
end
|
760
|
+
|
761
|
+
optional_params = param_info.reject { |p| p[:required] }
|
762
|
+
if optional_params.any?
|
763
|
+
error_message += "\nOptional parameters:\n"
|
764
|
+
optional_params.each do |param|
|
765
|
+
error_message += "- #{param[:name]} (#{param[:type]}): #{param[:description]}\n"
|
766
|
+
end
|
767
|
+
end
|
768
|
+
|
769
|
+
error_message += "\nYou were missing: #{missing_params.join(", ")}\n"
|
770
|
+
error_message += "</system-reminder>"
|
771
|
+
else
|
772
|
+
error_message += "Missing parameters: #{missing_params.join(", ")}"
|
773
|
+
end
|
774
|
+
|
775
|
+
error_message
|
776
|
+
end
|
777
|
+
end
|
778
|
+
end
|
779
|
+
end
|