swarm_sdk 2.0.0.pre.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. checksums.yaml +7 -0
  2. data/lib/swarm_sdk/agent/builder.rb +333 -0
  3. data/lib/swarm_sdk/agent/chat/context_tracker.rb +271 -0
  4. data/lib/swarm_sdk/agent/chat/hook_integration.rb +372 -0
  5. data/lib/swarm_sdk/agent/chat/logging_helpers.rb +99 -0
  6. data/lib/swarm_sdk/agent/chat/system_reminder_injector.rb +114 -0
  7. data/lib/swarm_sdk/agent/chat.rb +779 -0
  8. data/lib/swarm_sdk/agent/context.rb +108 -0
  9. data/lib/swarm_sdk/agent/definition.rb +335 -0
  10. data/lib/swarm_sdk/configuration.rb +251 -0
  11. data/lib/swarm_sdk/context_compactor/metrics.rb +147 -0
  12. data/lib/swarm_sdk/context_compactor/token_counter.rb +106 -0
  13. data/lib/swarm_sdk/context_compactor.rb +340 -0
  14. data/lib/swarm_sdk/hooks/adapter.rb +359 -0
  15. data/lib/swarm_sdk/hooks/context.rb +163 -0
  16. data/lib/swarm_sdk/hooks/definition.rb +80 -0
  17. data/lib/swarm_sdk/hooks/error.rb +29 -0
  18. data/lib/swarm_sdk/hooks/executor.rb +146 -0
  19. data/lib/swarm_sdk/hooks/registry.rb +143 -0
  20. data/lib/swarm_sdk/hooks/result.rb +150 -0
  21. data/lib/swarm_sdk/hooks/shell_executor.rb +254 -0
  22. data/lib/swarm_sdk/hooks/tool_call.rb +35 -0
  23. data/lib/swarm_sdk/hooks/tool_result.rb +62 -0
  24. data/lib/swarm_sdk/log_collector.rb +83 -0
  25. data/lib/swarm_sdk/log_stream.rb +69 -0
  26. data/lib/swarm_sdk/markdown_parser.rb +46 -0
  27. data/lib/swarm_sdk/permissions/config.rb +239 -0
  28. data/lib/swarm_sdk/permissions/error_formatter.rb +121 -0
  29. data/lib/swarm_sdk/permissions/path_matcher.rb +35 -0
  30. data/lib/swarm_sdk/permissions/validator.rb +173 -0
  31. data/lib/swarm_sdk/permissions_builder.rb +122 -0
  32. data/lib/swarm_sdk/prompts/base_system_prompt.md.erb +237 -0
  33. data/lib/swarm_sdk/providers/openai_with_responses.rb +582 -0
  34. data/lib/swarm_sdk/result.rb +97 -0
  35. data/lib/swarm_sdk/swarm/agent_initializer.rb +224 -0
  36. data/lib/swarm_sdk/swarm/all_agents_builder.rb +62 -0
  37. data/lib/swarm_sdk/swarm/builder.rb +240 -0
  38. data/lib/swarm_sdk/swarm/mcp_configurator.rb +151 -0
  39. data/lib/swarm_sdk/swarm/tool_configurator.rb +267 -0
  40. data/lib/swarm_sdk/swarm.rb +837 -0
  41. data/lib/swarm_sdk/tools/bash.rb +274 -0
  42. data/lib/swarm_sdk/tools/delegate.rb +152 -0
  43. data/lib/swarm_sdk/tools/document_converters/base_converter.rb +83 -0
  44. data/lib/swarm_sdk/tools/document_converters/docx_converter.rb +99 -0
  45. data/lib/swarm_sdk/tools/document_converters/pdf_converter.rb +78 -0
  46. data/lib/swarm_sdk/tools/document_converters/xlsx_converter.rb +194 -0
  47. data/lib/swarm_sdk/tools/edit.rb +150 -0
  48. data/lib/swarm_sdk/tools/glob.rb +158 -0
  49. data/lib/swarm_sdk/tools/grep.rb +231 -0
  50. data/lib/swarm_sdk/tools/image_extractors/docx_image_extractor.rb +43 -0
  51. data/lib/swarm_sdk/tools/image_extractors/pdf_image_extractor.rb +163 -0
  52. data/lib/swarm_sdk/tools/image_formats/tiff_builder.rb +65 -0
  53. data/lib/swarm_sdk/tools/multi_edit.rb +232 -0
  54. data/lib/swarm_sdk/tools/path_resolver.rb +43 -0
  55. data/lib/swarm_sdk/tools/read.rb +251 -0
  56. data/lib/swarm_sdk/tools/registry.rb +73 -0
  57. data/lib/swarm_sdk/tools/scratchpad_list.rb +88 -0
  58. data/lib/swarm_sdk/tools/scratchpad_read.rb +59 -0
  59. data/lib/swarm_sdk/tools/scratchpad_write.rb +88 -0
  60. data/lib/swarm_sdk/tools/stores/read_tracker.rb +61 -0
  61. data/lib/swarm_sdk/tools/stores/scratchpad.rb +153 -0
  62. data/lib/swarm_sdk/tools/stores/todo_manager.rb +65 -0
  63. data/lib/swarm_sdk/tools/todo_write.rb +216 -0
  64. data/lib/swarm_sdk/tools/write.rb +117 -0
  65. data/lib/swarm_sdk/utils.rb +50 -0
  66. data/lib/swarm_sdk/version.rb +5 -0
  67. data/lib/swarm_sdk.rb +69 -0
  68. metadata +169 -0
@@ -0,0 +1,779 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SwarmSDK
4
+ module Agent
5
+ # Chat extends RubyLLM::Chat to enable parallel agent-to-agent tool calling
6
+ # with two-level rate limiting to prevent API quota exhaustion
7
+ #
8
+ # ## Rate Limiting Strategy
9
+ #
10
+ # In hierarchical agent trees, unlimited parallelism can cause exponential growth:
11
+ # Main → 10 agents → 100 agents → 1,000 agents = API meltdown!
12
+ #
13
+ # Solution: Two-level semaphore system
14
+ # 1. **Global semaphore** - Total concurrent LLM calls across entire swarm
15
+ # 2. **Local semaphore** - Max concurrent tool calls for this specific agent
16
+ #
17
+ # ## Architecture
18
+ #
19
+ # This class is now organized with clear separation of concerns:
20
+ # - Core (this file): Initialization, provider setup, rate limiting, parallel execution
21
+ # - SystemReminderInjector: First message reminders, TodoWrite reminders
22
+ # - LoggingHelpers: Tool call formatting, result serialization
23
+ # - ContextTracker: Logging callbacks, delegation tracking
24
+ # - HookIntegration: Hook system integration (wraps tool execution with hooks)
25
+ class Chat < RubyLLM::Chat
26
+ # Include logging helpers for tool call formatting
27
+ include LoggingHelpers
28
+
29
+ # Include hook integration for user_prompt hooks and hook trigger methods
30
+ # This module overrides ask() to inject user_prompt hooks
31
+ # and provides trigger methods for pre/post tool use hooks
32
+ include HookIntegration
33
+
34
+ # Register custom provider for responses API support
35
+ # This is done once at class load time
36
+ unless RubyLLM::Provider.providers.key?(:openai_with_responses)
37
+ RubyLLM::Provider.register(:openai_with_responses, SwarmSDK::Providers::OpenAIWithResponses)
38
+ end
39
+
40
+ # Initialize AgentChat with rate limiting
41
+ #
42
+ # @param definition [Hash] Agent definition containing all configuration
43
+ # @param global_semaphore [Async::Semaphore, nil] Shared across all agents (not part of definition)
44
+ # @param options [Hash] Additional options to pass to RubyLLM::Chat
45
+ # @raise [ArgumentError] If provider doesn't support custom base_url or provider not specified with base_url
46
+ def initialize(definition:, global_semaphore: nil, **options)
47
+ # Extract configuration from definition
48
+ model = definition[:model]
49
+ provider = definition[:provider]
50
+ context_window = definition[:context_window]
51
+ max_concurrent_tools = definition[:max_concurrent_tools]
52
+ base_url = definition[:base_url]
53
+ api_version = definition[:api_version]
54
+ timeout = definition[:timeout] || Definition::DEFAULT_TIMEOUT
55
+ assume_model_exists = definition[:assume_model_exists]
56
+ system_prompt = definition[:system_prompt]
57
+ parameters = definition[:parameters]
58
+ headers = definition[:headers]
59
+
60
+ # Create isolated context if custom base_url or timeout specified
61
+ if base_url || timeout != Definition::DEFAULT_TIMEOUT
62
+ # Provider is required when using custom base_url
63
+ raise ArgumentError, "Provider must be specified when base_url is set" if base_url && !provider
64
+
65
+ # Determine actual provider to use
66
+ actual_provider = determine_provider(provider, base_url, api_version)
67
+ RubyLLM.logger.debug("SwarmSDK Agent::Chat: Using provider '#{actual_provider}' (requested='#{provider}', api_version='#{api_version}')")
68
+
69
+ context = build_custom_context(provider: provider, base_url: base_url, timeout: timeout)
70
+
71
+ # Use assume_model_exists to bypass model validation for custom endpoints
72
+ # Default to true when base_url is set, false otherwise (unless explicitly specified)
73
+ assume_model_exists = base_url ? true : false if assume_model_exists.nil?
74
+
75
+ super(model: model, provider: actual_provider, assume_model_exists: assume_model_exists, context: context, **options)
76
+
77
+ # Configure custom provider after creation (RubyLLM doesn't support custom init params)
78
+ if actual_provider == :openai_with_responses && api_version == "v1/responses"
79
+ configure_responses_api_provider
80
+ end
81
+ elsif provider
82
+ # No custom base_url or timeout: use RubyLLM's defaults (with optional provider override)
83
+ assume_model_exists = false if assume_model_exists.nil?
84
+ super(model: model, provider: provider, assume_model_exists: assume_model_exists, **options)
85
+ else
86
+ # No custom base_url, timeout, or provider: use RubyLLM's defaults
87
+ assume_model_exists = false if assume_model_exists.nil?
88
+ super(model: model, assume_model_exists: assume_model_exists, **options)
89
+ end
90
+
91
+ # Rate limiting semaphores
92
+ @global_semaphore = global_semaphore
93
+ @local_semaphore = max_concurrent_tools ? Async::Semaphore.new(max_concurrent_tools) : nil
94
+ @explicit_context_window = context_window
95
+
96
+ # Track TodoWrite usage for periodic reminders
97
+ @last_todowrite_message_index = nil
98
+
99
+ # Agent context for logging (set via setup_context)
100
+ @agent_context = nil
101
+
102
+ # Context tracker (created after agent_context is set)
103
+ @context_tracker = nil
104
+
105
+ # Try to fetch real model info for accurate context tracking
106
+ # This searches across ALL providers, so it works even when using proxies
107
+ # (e.g., Claude model through OpenAI-compatible proxy)
108
+ fetch_real_model_info(model)
109
+
110
+ # Configure system prompt, parameters, and headers after parent initialization
111
+ with_instructions(system_prompt) if system_prompt
112
+ configure_parameters(parameters)
113
+ configure_headers(headers)
114
+ end
115
+
116
+ # Setup agent context
117
+ #
118
+ # Sets the agent context for this chat, enabling delegation tracking.
119
+ # This is always called, regardless of whether logging is enabled.
120
+ #
121
+ # @param context [Agent::Context] Agent context for this chat
122
+ # @return [void]
123
+ def setup_context(context)
124
+ @agent_context = context
125
+ @context_tracker = ContextTracker.new(self, context)
126
+ end
127
+
128
+ # Setup logging callbacks
129
+ #
130
+ # This configures the chat to emit log events via LogStream.
131
+ # Should only be called when LogStream.emitter is set.
132
+ #
133
+ # @return [void]
134
+ def setup_logging
135
+ raise StateError, "Agent context not set. Call setup_context first." unless @agent_context
136
+
137
+ @context_tracker.setup_logging
138
+ end
139
+
140
+ # Emit model lookup warning if one occurred during initialization
141
+ #
142
+ # If a model wasn't found in the registry during initialization, this will
143
+ # emit a proper JSON log event through LogStream.
144
+ #
145
+ # @param agent_name [Symbol, String] The agent name for logging context
146
+ def emit_model_lookup_warning(agent_name)
147
+ return unless @model_lookup_error
148
+
149
+ LogStream.emit(
150
+ type: "model_lookup_warning",
151
+ agent: agent_name,
152
+ model: @model_lookup_error[:model],
153
+ error_message: @model_lookup_error[:error_message],
154
+ suggestions: @model_lookup_error[:suggestions].map { |s| { id: s.id, name: s.name, context_window: s.context_window } },
155
+ )
156
+ end
157
+
158
+ # Override ask to inject system reminders and periodic TodoWrite reminders
159
+ #
160
+ # Note: This is called BEFORE HookIntegration#ask (due to module include order),
161
+ # so HookIntegration will wrap this and inject user_prompt hooks.
162
+ #
163
+ # @param prompt [String] User prompt
164
+ # @param options [Hash] Additional options to pass to complete
165
+ # @return [RubyLLM::Message] LLM response
166
+ def ask(prompt, **options)
167
+ # Check if this is the first user message
168
+ if SystemReminderInjector.first_message?(self)
169
+ # Manually construct the first message sequence with system reminders
170
+ SystemReminderInjector.inject_first_message_reminders(self, prompt)
171
+
172
+ # Trigger user_prompt hook manually since we're bypassing the normal ask flow
173
+ if @hook_executor
174
+ hook_result = trigger_user_prompt(prompt)
175
+
176
+ # Check if hook halted execution
177
+ if hook_result[:halted]
178
+ # Return a halted message instead of calling LLM
179
+ return RubyLLM::Message.new(
180
+ role: :assistant,
181
+ content: hook_result[:halt_message],
182
+ model_id: model.id,
183
+ )
184
+ end
185
+
186
+ # NOTE: We ignore modified_prompt for first message since reminders already injected
187
+ end
188
+
189
+ # Call complete to get LLM response
190
+ complete(**options)
191
+ else
192
+ # Inject periodic TodoWrite reminder if needed
193
+ if SystemReminderInjector.should_inject_todowrite_reminder?(self, @last_todowrite_message_index)
194
+ add_message(role: :user, content: SystemReminderInjector::TODOWRITE_PERIODIC_REMINDER)
195
+ # Update tracking
196
+ @last_todowrite_message_index = SystemReminderInjector.find_last_todowrite_index(self)
197
+ end
198
+
199
+ # Normal ask behavior for subsequent messages
200
+ # This calls super which goes to HookIntegration's ask override
201
+ super(prompt, **options)
202
+ end
203
+ end
204
+
205
+ # Override handle_tool_calls to execute multiple tool calls in parallel with rate limiting.
206
+ #
207
+ # RubyLLM's default implementation executes tool calls one at a time. This
208
+ # override uses Async to execute all tool calls concurrently, with semaphores
209
+ # to prevent API quota exhaustion. Hooks are integrated via HookIntegration module.
210
+ #
211
+ # @param response [RubyLLM::Message] LLM response with tool calls
212
+ # @param block [Proc] Optional block passed through to complete
213
+ # @return [RubyLLM::Message] Final response when loop completes
214
+ def handle_tool_calls(response, &block)
215
+ # Single tool call: sequential execution with hooks
216
+ if response.tool_calls.size == 1
217
+ tool_call = response.tool_calls.values.first
218
+
219
+ # Handle pre_tool_use hook (skip for delegation tools)
220
+ unless delegation_tool_call?(tool_call)
221
+ # Trigger pre_tool_use hook (can block or provide custom result)
222
+ pre_result = trigger_pre_tool_use(tool_call)
223
+
224
+ # Handle finish_agent marker
225
+ if pre_result[:finish_agent]
226
+ message = RubyLLM::Message.new(
227
+ role: :assistant,
228
+ content: pre_result[:custom_result],
229
+ model_id: model.id,
230
+ )
231
+ # Set custom finish reason before triggering on_end_message
232
+ @context_tracker.finish_reason_override = "finish_agent" if @context_tracker
233
+ # Trigger on_end_message to ensure agent_stop event is emitted
234
+ @on[:end_message]&.call(message)
235
+ return message
236
+ end
237
+
238
+ # Handle finish_swarm marker
239
+ if pre_result[:finish_swarm]
240
+ return { __finish_swarm__: true, message: pre_result[:custom_result] }
241
+ end
242
+
243
+ # Handle blocked execution
244
+ unless pre_result[:proceed]
245
+ content = pre_result[:custom_result] || "Tool execution blocked by hook"
246
+ message = add_message(
247
+ role: :tool,
248
+ content: content,
249
+ tool_call_id: tool_call.id,
250
+ )
251
+ @on[:end_message]&.call(message)
252
+ return complete(&block)
253
+ end
254
+ end
255
+
256
+ # Execute tool
257
+ @on[:tool_call]&.call(tool_call)
258
+
259
+ result = execute_tool_with_error_handling(tool_call)
260
+
261
+ @on[:tool_result]&.call(result)
262
+
263
+ # Trigger post_tool_use hook (skip for delegation tools)
264
+ unless delegation_tool_call?(tool_call)
265
+ result = trigger_post_tool_use(result, tool_call: tool_call)
266
+ end
267
+
268
+ # Check for finish markers from hooks
269
+ if result.is_a?(Hash)
270
+ if result[:__finish_agent__]
271
+ # Finish this agent with the provided message
272
+ message = RubyLLM::Message.new(
273
+ role: :assistant,
274
+ content: result[:message],
275
+ model_id: model.id,
276
+ )
277
+ # Set custom finish reason before triggering on_end_message
278
+ @context_tracker.finish_reason_override = "finish_agent" if @context_tracker
279
+ # Trigger on_end_message to ensure agent_stop event is emitted
280
+ @on[:end_message]&.call(message)
281
+ return message
282
+ elsif result[:__finish_swarm__]
283
+ # Propagate finish_swarm marker up (don't add to conversation)
284
+ return result
285
+ end
286
+ end
287
+
288
+ # Check for halt result
289
+ return result if result.is_a?(RubyLLM::Tool::Halt)
290
+
291
+ # Add tool result to conversation
292
+ content = result.is_a?(RubyLLM::Content) ? result : result.to_s
293
+ message = add_message(
294
+ role: :tool,
295
+ content: content,
296
+ tool_call_id: tool_call.id,
297
+ )
298
+ @on[:end_message]&.call(message)
299
+
300
+ # Continue loop
301
+ return complete(&block)
302
+ end
303
+
304
+ # Multiple tool calls: execute in parallel with rate limiting and hooks
305
+ halt_result = nil
306
+
307
+ results = Async do
308
+ tasks = response.tool_calls.map do |_id, tool_call|
309
+ Async do
310
+ # Acquire semaphores (queues if limit reached)
311
+ acquire_semaphores do
312
+ @on[:tool_call]&.call(tool_call)
313
+
314
+ # Handle pre_tool_use hook (skip for delegation tools)
315
+ unless delegation_tool_call?(tool_call)
316
+ pre_result = trigger_pre_tool_use(tool_call)
317
+
318
+ # Handle finish markers first (early exit)
319
+ # Don't call on_tool_result for finish markers - they're not tool results
320
+ if pre_result[:finish_agent]
321
+ result = { __finish_agent__: true, message: pre_result[:custom_result] }
322
+ next { tool_call: tool_call, result: result, message: nil }
323
+ end
324
+
325
+ if pre_result[:finish_swarm]
326
+ result = { __finish_swarm__: true, message: pre_result[:custom_result] }
327
+ next { tool_call: tool_call, result: result, message: nil }
328
+ end
329
+
330
+ # Handle blocked execution
331
+ unless pre_result[:proceed]
332
+ result = pre_result[:custom_result] || "Tool execution blocked by hook"
333
+ @on[:tool_result]&.call(result)
334
+
335
+ content = result.is_a?(RubyLLM::Content) ? result : result.to_s
336
+ message = add_message(
337
+ role: :tool,
338
+ content: content,
339
+ tool_call_id: tool_call.id,
340
+ )
341
+ @on[:end_message]&.call(message)
342
+
343
+ next { tool_call: tool_call, result: result, message: message }
344
+ end
345
+ end
346
+
347
+ # Execute tool - Faraday yields during HTTP I/O
348
+ result = execute_tool_with_error_handling(tool_call)
349
+
350
+ @on[:tool_result]&.call(result)
351
+
352
+ # Trigger post_tool_use hook (skip for delegation tools)
353
+ unless delegation_tool_call?(tool_call)
354
+ result = trigger_post_tool_use(result, tool_call: tool_call)
355
+ end
356
+
357
+ # Check if result is a finish marker (don't add to conversation)
358
+ if result.is_a?(Hash) && (result[:__finish_agent__] || result[:__finish_swarm__])
359
+ # Finish markers will be detected after parallel execution completes
360
+ { tool_call: tool_call, result: result, message: nil }
361
+ else
362
+ # Add tool result to conversation
363
+ content = result.is_a?(RubyLLM::Content) ? result : result.to_s
364
+ message = add_message(
365
+ role: :tool,
366
+ content: content,
367
+ tool_call_id: tool_call.id,
368
+ )
369
+ @on[:end_message]&.call(message)
370
+
371
+ # Return result data for collection
372
+ { tool_call: tool_call, result: result, message: message }
373
+ end
374
+ end
375
+ end
376
+ end
377
+
378
+ # Wait for all tasks to complete
379
+ tasks.map(&:wait)
380
+ end.wait
381
+
382
+ # Check for halt and finish results
383
+ results.each do |data|
384
+ result = data[:result]
385
+
386
+ # Check for halt result (from tool execution errors)
387
+ if result.is_a?(RubyLLM::Tool::Halt)
388
+ halt_result = result
389
+ # Continue checking for finish markers below
390
+ end
391
+
392
+ # Check for finish markers (from hooks)
393
+ if result.is_a?(Hash)
394
+ if result[:__finish_agent__]
395
+ message = RubyLLM::Message.new(
396
+ role: :assistant,
397
+ content: result[:message],
398
+ model_id: model.id,
399
+ )
400
+ # Set custom finish reason before triggering on_end_message
401
+ @context_tracker.finish_reason_override = "finish_agent" if @context_tracker
402
+ # Trigger on_end_message to ensure agent_stop event is emitted
403
+ @on[:end_message]&.call(message)
404
+ return message
405
+ elsif result[:__finish_swarm__]
406
+ # Propagate finish_swarm marker up
407
+ return result
408
+ end
409
+ end
410
+ end
411
+
412
+ # Return halt result if we found one (but no finish markers)
413
+ halt_result = results.find { |data| data[:result].is_a?(RubyLLM::Tool::Halt) }&.dig(:result)
414
+
415
+ # Continue automatic loop (recursive call to complete)
416
+ halt_result || complete(&block)
417
+ end
418
+
419
+ # Get the provider instance
420
+ #
421
+ # Exposes the RubyLLM provider instance for configuration.
422
+ # This is needed for setting agent_name and other provider-specific settings.
423
+ #
424
+ # @return [RubyLLM::Provider::Base] Provider instance
425
+ attr_reader :provider, :global_semaphore, :local_semaphore, :real_model_info, :context_tracker
426
+
427
+ # Get context window limit for the current model
428
+ #
429
+ # Priority order:
430
+ # 1. Explicit context_window parameter (user override)
431
+ # 2. Real model info from RubyLLM registry (searched across all providers)
432
+ # 3. Model info from chat (may be nil if assume_model_exists was used)
433
+ #
434
+ # @return [Integer, nil] Maximum context tokens, or nil if not available
435
+ def context_limit
436
+ # Priority 1: Explicit override
437
+ return @explicit_context_window if @explicit_context_window
438
+
439
+ # Priority 2: Real model info from registry (searched across all providers)
440
+ return @real_model_info.context_window if @real_model_info&.context_window
441
+
442
+ # Priority 3: Fall back to model from chat
443
+ model.context_window
444
+ rescue StandardError
445
+ nil
446
+ end
447
+
448
+ # Calculate cumulative input tokens for the conversation
449
+ #
450
+ # The latest assistant message's input_tokens already includes the cumulative
451
+ # total for the entire conversation (all previous messages, system instructions,
452
+ # tool definitions, etc.). We don't sum across messages as that would double-count.
453
+ #
454
+ # @return [Integer] Total input tokens used in conversation
455
+ def cumulative_input_tokens
456
+ # Find the latest assistant message with input_tokens
457
+ messages.reverse.find { |msg| msg.role == :assistant && msg.input_tokens }&.input_tokens || 0
458
+ end
459
+
460
+ # Calculate cumulative output tokens across all assistant messages
461
+ #
462
+ # Unlike input tokens, output tokens are per-response and should be summed.
463
+ #
464
+ # @return [Integer] Total output tokens used in conversation
465
+ def cumulative_output_tokens
466
+ messages.select { |msg| msg.role == :assistant }.sum { |msg| msg.output_tokens || 0 }
467
+ end
468
+
469
+ # Calculate total tokens used (input + output)
470
+ #
471
+ # @return [Integer] Total tokens used in conversation
472
+ def cumulative_total_tokens
473
+ cumulative_input_tokens + cumulative_output_tokens
474
+ end
475
+
476
+ # Calculate percentage of context window used
477
+ #
478
+ # @return [Float] Percentage (0.0 to 100.0), or 0.0 if limit unavailable
479
+ def context_usage_percentage
480
+ limit = context_limit
481
+ return 0.0 if limit.nil? || limit.zero?
482
+
483
+ (cumulative_total_tokens.to_f / limit * 100).round(2)
484
+ end
485
+
486
+ # Calculate remaining tokens in context window
487
+ #
488
+ # @return [Integer, nil] Tokens remaining, or nil if limit unavailable
489
+ def tokens_remaining
490
+ limit = context_limit
491
+ return if limit.nil?
492
+
493
+ limit - cumulative_total_tokens
494
+ end
495
+
496
+ # Compact the conversation history to reduce token usage
497
+ #
498
+ # Uses the Hybrid Production Strategy to intelligently compress the conversation:
499
+ # 1. Tool result pruning - Truncate tool outputs (they're 80%+ of tokens!)
500
+ # 2. Checkpoint creation - LLM-generated summary of conversation chunks
501
+ # 3. Sliding window - Keep recent messages in full detail
502
+ #
503
+ # This is a manual operation - call it when you need to free up context space.
504
+ # The method emits compression events via LogStream for monitoring.
505
+ #
506
+ # ## Usage
507
+ #
508
+ # # Use defaults
509
+ # metrics = agent.compact_context
510
+ # puts metrics.summary
511
+ #
512
+ # # With custom options
513
+ # metrics = agent.compact_context(
514
+ # tool_result_max_length: 300,
515
+ # checkpoint_threshold: 40,
516
+ # sliding_window_size: 15
517
+ # )
518
+ #
519
+ # @param options [Hash] Compression options (see ContextCompactor::DEFAULT_OPTIONS)
520
+ # @return [ContextCompactor::Metrics] Compression statistics
521
+ def compact_context(**options)
522
+ compactor = ContextCompactor.new(self, options)
523
+ compactor.compact
524
+ end
525
+
526
+ private
527
+
528
+ # Build custom RubyLLM context for base_url/timeout overrides
529
+ #
530
+ # @param provider [String, Symbol] Provider name
531
+ # @param base_url [String, nil] Custom API base URL
532
+ # @param timeout [Integer] Request timeout in seconds
533
+ # @return [RubyLLM::Context] Configured context
534
+ def build_custom_context(provider:, base_url:, timeout:)
535
+ RubyLLM.context do |config|
536
+ # Set timeout for all providers
537
+ config.request_timeout = timeout
538
+
539
+ # Configure base_url if specified
540
+ next unless base_url
541
+
542
+ case provider.to_s
543
+ when "openai", "deepseek", "perplexity", "mistral", "openrouter"
544
+ config.openai_api_base = base_url
545
+ config.openai_api_key = ENV["OPENAI_API_KEY"] || "dummy-key-for-local"
546
+ when "ollama"
547
+ config.ollama_api_base = base_url
548
+ when "gpustack"
549
+ config.gpustack_api_base = base_url
550
+ config.gpustack_api_key = ENV["GPUSTACK_API_KEY"] || "dummy-key"
551
+ else
552
+ raise ArgumentError,
553
+ "Provider '#{provider}' doesn't support custom base_url. " \
554
+ "Only OpenAI-compatible providers (openai, deepseek, perplexity, mistral, openrouter), " \
555
+ "ollama, and gpustack support custom endpoints."
556
+ end
557
+ end
558
+ end
559
+
560
+ # Fetch real model info for accurate context tracking
561
+ #
562
+ # This searches across ALL providers, so it works even when using proxies
563
+ # (e.g., Claude model through OpenAI-compatible proxy).
564
+ #
565
+ # @param model [String] Model ID to lookup
566
+ # @return [void]
567
+ def fetch_real_model_info(model)
568
+ @model_lookup_error = nil
569
+ @real_model_info = begin
570
+ RubyLLM.models.find(model) # Searches all providers when no provider specified
571
+ rescue StandardError => e
572
+ # Store warning info to emit later through LogStream
573
+ suggestions = suggest_similar_models(model)
574
+ @model_lookup_error = {
575
+ model: model,
576
+ error_message: e.message,
577
+ suggestions: suggestions,
578
+ }
579
+ nil
580
+ end
581
+ end
582
+
583
+ # Determine which provider to use based on configuration
584
+ #
585
+ # When using base_url with OpenAI-compatible providers and api_version is set to
586
+ # 'v1/responses', use our custom provider that supports the responses API endpoint.
587
+ #
588
+ # @param provider [Symbol, String] The requested provider
589
+ # @param base_url [String, nil] Custom base URL
590
+ # @param api_version [String, nil] API endpoint version
591
+ # @return [Symbol] The provider to use
592
+ def determine_provider(provider, base_url, api_version)
593
+ return provider unless base_url
594
+
595
+ # Use custom provider for OpenAI-compatible providers when api_version is v1/responses
596
+ # The custom provider supports both chat/completions and responses endpoints
597
+ case provider.to_s
598
+ when "openai", "deepseek", "perplexity", "mistral", "openrouter"
599
+ if api_version == "v1/responses"
600
+ :openai_with_responses
601
+ else
602
+ provider
603
+ end
604
+ else
605
+ provider
606
+ end
607
+ end
608
+
609
+ # Configure the custom provider after creation to use responses API
610
+ #
611
+ # RubyLLM doesn't support passing custom parameters to provider initialization,
612
+ # so we configure the provider after the chat is created.
613
+ def configure_responses_api_provider
614
+ return unless provider.is_a?(SwarmSDK::Providers::OpenAIWithResponses)
615
+
616
+ provider.use_responses_api = true
617
+ RubyLLM.logger.debug("SwarmSDK: Configured provider to use responses API")
618
+ end
619
+
620
+ # Configure LLM parameters with proper temperature normalization
621
+ #
622
+ # Note: RubyLLM only normalizes temperature (for models that require specific values
623
+ # like gpt-5-mini which requires temperature=1.0) when using with_temperature().
624
+ # The with_params() method is designed for sending unparsed parameters directly to
625
+ # the LLM without provider-specific normalization. Therefore, we extract temperature
626
+ # and call with_temperature() separately to ensure proper normalization.
627
+ #
628
+ # @param params [Hash] Parameter hash (may include temperature and other params)
629
+ # @return [self] Returns self for method chaining
630
+ def configure_parameters(params)
631
+ return self if params.nil? || params.empty?
632
+
633
+ # Extract temperature for separate handling
634
+ if params[:temperature]
635
+ with_temperature(params[:temperature])
636
+ params = params.except(:temperature)
637
+ end
638
+
639
+ # Apply remaining parameters
640
+ with_params(**params) if params.any?
641
+
642
+ self
643
+ end
644
+
645
+ # Configure custom HTTP headers for LLM requests
646
+ #
647
+ # @param headers [Hash, nil] Custom HTTP headers
648
+ # @return [self] Returns self for method chaining
649
+ def configure_headers(headers)
650
+ return self if headers.nil? || headers.empty?
651
+
652
+ with_headers(**headers)
653
+
654
+ self
655
+ end
656
+
657
+ # Acquire both global and local semaphores (if configured).
658
+ #
659
+ # Semaphores queue requests when limits are reached, ensuring graceful
660
+ # degradation instead of API errors.
661
+ #
662
+ # Order matters: acquire global first (broader scope), then local
663
+ def acquire_semaphores(&block)
664
+ if @global_semaphore && @local_semaphore
665
+ # Both limits: acquire global first, then local
666
+ @global_semaphore.acquire do
667
+ @local_semaphore.acquire(&block)
668
+ end
669
+ elsif @global_semaphore
670
+ # Only global limit
671
+ @global_semaphore.acquire(&block)
672
+ elsif @local_semaphore
673
+ # Only local limit
674
+ @local_semaphore.acquire(&block)
675
+ else
676
+ # No limits: execute immediately
677
+ yield
678
+ end
679
+ end
680
+
681
+ # Suggest similar models when a model is not found
682
+ #
683
+ # @param query [String] Model name to search for
684
+ # @return [Array<RubyLLM::Model::Info>] Up to 3 similar models
685
+ def suggest_similar_models(query)
686
+ normalized_query = query.to_s.downcase.gsub(/[.\-_]/, "")
687
+
688
+ RubyLLM.models.all.select do |model|
689
+ normalized_id = model.id.downcase.gsub(/[.\-_]/, "")
690
+ normalized_id.include?(normalized_query) ||
691
+ model.name&.downcase&.gsub(/[.\-_]/, "")&.include?(normalized_query)
692
+ end.first(3)
693
+ rescue StandardError
694
+ []
695
+ end
696
+
697
+ # Execute a tool with ArgumentError handling for missing parameters
698
+ #
699
+ # When a tool is called with missing required parameters, this catches the
700
+ # ArgumentError and returns a helpful message to the LLM with:
701
+ # - Which parameter is missing
702
+ # - Instructions to retry with correct parameters
703
+ # - System reminder showing all required parameters
704
+ #
705
+ # @param tool_call [RubyLLM::ToolCall] Tool call from LLM
706
+ # @return [String, Object] Tool result or error message
707
+ def execute_tool_with_error_handling(tool_call)
708
+ execute_tool(tool_call)
709
+ rescue ArgumentError => e
710
+ # Extract parameter info from the error message
711
+ # ArgumentError messages typically: "missing keyword: parameter_name" or "missing keywords: param1, param2"
712
+ build_missing_parameter_error(tool_call, e)
713
+ end
714
+
715
+ # Build a helpful error message for missing tool parameters
716
+ #
717
+ # @param tool_call [RubyLLM::ToolCall] Tool call that failed
718
+ # @param error [ArgumentError] The ArgumentError raised
719
+ # @return [String] Formatted error message with parameter information
720
+ def build_missing_parameter_error(tool_call, error)
721
+ tool_name = tool_call.name
722
+ tool_instance = tools[tool_name.to_sym]
723
+
724
+ # Extract which parameters are missing from error message
725
+ missing_params = if error.message.match(/missing keyword(?:s)?: (.+)/)
726
+ ::Regexp.last_match(1).split(", ").map(&:strip)
727
+ else
728
+ ["unknown"]
729
+ end
730
+
731
+ # Get tool parameter information from RubyLLM::Tool
732
+ param_info = if tool_instance.respond_to?(:parameters)
733
+ # RubyLLM tools have a parameters method that returns { name => Parameter }
734
+ tool_instance.parameters.map do |_param_name, param_obj|
735
+ {
736
+ name: param_obj.name.to_s,
737
+ type: param_obj.type,
738
+ description: param_obj.description,
739
+ required: param_obj.required,
740
+ }
741
+ end
742
+ else
743
+ []
744
+ end
745
+
746
+ # Build error message
747
+ error_message = "Error calling #{tool_name}: #{error.message}\n\n"
748
+ error_message += "Please retry the tool call with all required parameters.\n\n"
749
+
750
+ # Add system reminder with parameter information
751
+ if param_info.any?
752
+ required_params = param_info.select { |p| p[:required] }
753
+
754
+ error_message += "<system-reminder>\n"
755
+ error_message += "The #{tool_name} tool requires the following parameters:\n\n"
756
+
757
+ required_params.each do |param|
758
+ error_message += "- #{param[:name]} (#{param[:type]}, REQUIRED): #{param[:description]}\n"
759
+ end
760
+
761
+ optional_params = param_info.reject { |p| p[:required] }
762
+ if optional_params.any?
763
+ error_message += "\nOptional parameters:\n"
764
+ optional_params.each do |param|
765
+ error_message += "- #{param[:name]} (#{param[:type]}): #{param[:description]}\n"
766
+ end
767
+ end
768
+
769
+ error_message += "\nYou were missing: #{missing_params.join(", ")}\n"
770
+ error_message += "</system-reminder>"
771
+ else
772
+ error_message += "Missing parameters: #{missing_params.join(", ")}"
773
+ end
774
+
775
+ error_message
776
+ end
777
+ end
778
+ end
779
+ end