swarm_sdk 2.0.0.pre.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. checksums.yaml +7 -0
  2. data/lib/swarm_sdk/agent/builder.rb +333 -0
  3. data/lib/swarm_sdk/agent/chat/context_tracker.rb +271 -0
  4. data/lib/swarm_sdk/agent/chat/hook_integration.rb +372 -0
  5. data/lib/swarm_sdk/agent/chat/logging_helpers.rb +99 -0
  6. data/lib/swarm_sdk/agent/chat/system_reminder_injector.rb +114 -0
  7. data/lib/swarm_sdk/agent/chat.rb +779 -0
  8. data/lib/swarm_sdk/agent/context.rb +108 -0
  9. data/lib/swarm_sdk/agent/definition.rb +335 -0
  10. data/lib/swarm_sdk/configuration.rb +251 -0
  11. data/lib/swarm_sdk/context_compactor/metrics.rb +147 -0
  12. data/lib/swarm_sdk/context_compactor/token_counter.rb +106 -0
  13. data/lib/swarm_sdk/context_compactor.rb +340 -0
  14. data/lib/swarm_sdk/hooks/adapter.rb +359 -0
  15. data/lib/swarm_sdk/hooks/context.rb +163 -0
  16. data/lib/swarm_sdk/hooks/definition.rb +80 -0
  17. data/lib/swarm_sdk/hooks/error.rb +29 -0
  18. data/lib/swarm_sdk/hooks/executor.rb +146 -0
  19. data/lib/swarm_sdk/hooks/registry.rb +143 -0
  20. data/lib/swarm_sdk/hooks/result.rb +150 -0
  21. data/lib/swarm_sdk/hooks/shell_executor.rb +254 -0
  22. data/lib/swarm_sdk/hooks/tool_call.rb +35 -0
  23. data/lib/swarm_sdk/hooks/tool_result.rb +62 -0
  24. data/lib/swarm_sdk/log_collector.rb +83 -0
  25. data/lib/swarm_sdk/log_stream.rb +69 -0
  26. data/lib/swarm_sdk/markdown_parser.rb +46 -0
  27. data/lib/swarm_sdk/permissions/config.rb +239 -0
  28. data/lib/swarm_sdk/permissions/error_formatter.rb +121 -0
  29. data/lib/swarm_sdk/permissions/path_matcher.rb +35 -0
  30. data/lib/swarm_sdk/permissions/validator.rb +173 -0
  31. data/lib/swarm_sdk/permissions_builder.rb +122 -0
  32. data/lib/swarm_sdk/prompts/base_system_prompt.md.erb +237 -0
  33. data/lib/swarm_sdk/providers/openai_with_responses.rb +582 -0
  34. data/lib/swarm_sdk/result.rb +97 -0
  35. data/lib/swarm_sdk/swarm/agent_initializer.rb +224 -0
  36. data/lib/swarm_sdk/swarm/all_agents_builder.rb +62 -0
  37. data/lib/swarm_sdk/swarm/builder.rb +240 -0
  38. data/lib/swarm_sdk/swarm/mcp_configurator.rb +151 -0
  39. data/lib/swarm_sdk/swarm/tool_configurator.rb +267 -0
  40. data/lib/swarm_sdk/swarm.rb +837 -0
  41. data/lib/swarm_sdk/tools/bash.rb +274 -0
  42. data/lib/swarm_sdk/tools/delegate.rb +152 -0
  43. data/lib/swarm_sdk/tools/document_converters/base_converter.rb +83 -0
  44. data/lib/swarm_sdk/tools/document_converters/docx_converter.rb +99 -0
  45. data/lib/swarm_sdk/tools/document_converters/pdf_converter.rb +78 -0
  46. data/lib/swarm_sdk/tools/document_converters/xlsx_converter.rb +194 -0
  47. data/lib/swarm_sdk/tools/edit.rb +150 -0
  48. data/lib/swarm_sdk/tools/glob.rb +158 -0
  49. data/lib/swarm_sdk/tools/grep.rb +231 -0
  50. data/lib/swarm_sdk/tools/image_extractors/docx_image_extractor.rb +43 -0
  51. data/lib/swarm_sdk/tools/image_extractors/pdf_image_extractor.rb +163 -0
  52. data/lib/swarm_sdk/tools/image_formats/tiff_builder.rb +65 -0
  53. data/lib/swarm_sdk/tools/multi_edit.rb +232 -0
  54. data/lib/swarm_sdk/tools/path_resolver.rb +43 -0
  55. data/lib/swarm_sdk/tools/read.rb +251 -0
  56. data/lib/swarm_sdk/tools/registry.rb +73 -0
  57. data/lib/swarm_sdk/tools/scratchpad_list.rb +88 -0
  58. data/lib/swarm_sdk/tools/scratchpad_read.rb +59 -0
  59. data/lib/swarm_sdk/tools/scratchpad_write.rb +88 -0
  60. data/lib/swarm_sdk/tools/stores/read_tracker.rb +61 -0
  61. data/lib/swarm_sdk/tools/stores/scratchpad.rb +153 -0
  62. data/lib/swarm_sdk/tools/stores/todo_manager.rb +65 -0
  63. data/lib/swarm_sdk/tools/todo_write.rb +216 -0
  64. data/lib/swarm_sdk/tools/write.rb +117 -0
  65. data/lib/swarm_sdk/utils.rb +50 -0
  66. data/lib/swarm_sdk/version.rb +5 -0
  67. data/lib/swarm_sdk.rb +69 -0
  68. metadata +169 -0
@@ -0,0 +1,582 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SwarmSDK
4
+ module Providers
5
+ # Extended OpenAI provider with responses API support
6
+ #
7
+ # RubyLLM's OpenAI provider only supports v1/chat/completions.
8
+ # This provider extends it to also support v1/responses for models
9
+ # that require it (e.g., gpt-5-pro, o-series models).
10
+ #
11
+ # ## Usage
12
+ #
13
+ # Set via AgentChat when api_version is configured:
14
+ #
15
+ # @example Via SwarmSDK AgentChat (automatic)
16
+ # # In swarm.yml:
17
+ # agents:
18
+ # researcher:
19
+ # model: gpt-5-pro
20
+ # api_version: "v1/responses" # Automatically uses this provider
21
+ #
22
+ # @example Direct instantiation
23
+ # provider = OpenAIWithResponses.new(config, use_responses_api: true)
24
+ # chat = RubyLLM::Chat.new(model: "gpt-5-pro", provider: provider)
25
+ #
26
+ # ## Features
27
+ #
28
+ # - **Stateful mode**: Uses `previous_response_id` with `store: true` for efficient multi-turn
29
+ # - **Stateless fallback**: Automatically falls back to sending full history if server doesn't store responses
30
+ # - **TTL tracking**: Expires response IDs after 5 minutes to prevent "not found" errors
31
+ # - **Auto-recovery**: Detects repeated failures and disables `previous_response_id` entirely
32
+ #
33
+ class OpenAIWithResponses < RubyLLM::Providers::OpenAI
34
+ attr_accessor :use_responses_api
35
+ attr_writer :agent_name
36
+
37
+ # OpenAI Responses API expires response IDs after inactivity
38
+ # Conservative estimate: 5 minutes (300 seconds)
39
+ RESPONSE_ID_TTL = 300
40
+
41
+ # Initialize the provider
42
+ #
43
+ # @param config [RubyLLM::Configuration] Configuration object
44
+ # @param use_responses_api [Boolean, nil] Force endpoint choice (nil = auto-detect)
45
+ def initialize(config, use_responses_api: nil)
46
+ super(config)
47
+ @use_responses_api = use_responses_api
48
+ @model_id = nil
49
+ @last_response_id = nil # Track last response ID for conversation state
50
+ @last_response_time = nil # Track when response ID was created
51
+ @response_id_failures = 0 # Track consecutive failures with response IDs
52
+ @disable_response_id = false # Disable previous_response_id if repeatedly failing
53
+ @agent_name = nil # Agent name for logging context (set externally)
54
+ end
55
+
56
+ # Return the completion endpoint URL
57
+ #
58
+ # @return [String] Either 'responses' or 'chat/completions'
59
+ def completion_url
60
+ endpoint = determine_endpoint
61
+ RubyLLM.logger.debug("SwarmSDK OpenAIWithResponses: Using endpoint '#{endpoint}' (use_responses_api=#{@use_responses_api}, model=#{@model_id})")
62
+ endpoint
63
+ end
64
+
65
+ # Return the streaming endpoint URL
66
+ #
67
+ # @return [String] Same as completion_url
68
+ def stream_url
69
+ completion_url
70
+ end
71
+
72
+ # Override complete to capture model_id before making request
73
+ #
74
+ # This allows auto-detection to work by inspecting the model being used
75
+ def complete(messages, tools:, temperature:, model:, params: {}, headers: {}, schema: nil, &block)
76
+ @model_id = model.id
77
+ super
78
+ rescue RubyLLM::BadRequestError => e
79
+ # Handle "response not found" errors by starting a fresh conversation
80
+ if e.message.include?("not found") && @last_response_id
81
+ @response_id_failures += 1
82
+
83
+ # After 2 failures, disable previous_response_id entirely
84
+ if @response_id_failures >= 2
85
+ RubyLLM.logger.debug("SwarmSDK: Response IDs repeatedly not found (#{@response_id_failures} failures). " \
86
+ "The server may not support storing responses. Disabling previous_response_id for this session.")
87
+ @disable_response_id = true
88
+ else
89
+ RubyLLM.logger.debug("SwarmSDK: Response ID '#{@last_response_id}' not found (failure ##{@response_id_failures}), starting fresh conversation")
90
+ end
91
+
92
+ @last_response_id = nil
93
+ @last_response_time = nil
94
+ retry
95
+ else
96
+ raise
97
+ end
98
+ rescue RubyLLM::Error => e
99
+ # If error explicitly mentions responses API and we're not using it, retry with responses API
100
+ if should_retry_with_responses_api?(e)
101
+ RubyLLM.logger.warn("SwarmSDK: Retrying with responses API for model: #{@model_id}")
102
+ @use_responses_api = true
103
+ retry
104
+ else
105
+ raise
106
+ end
107
+ end
108
+
109
+ # Override render_payload to transform request body for Responses API
110
+ #
111
+ # The Responses API uses 'input' instead of 'messages' parameter
112
+ #
113
+ # @param messages [Array<RubyLLM::Message>] Conversation messages
114
+ # @param tools [Hash] Available tools
115
+ # @param temperature [Float, nil] Sampling temperature
116
+ # @param model [RubyLLM::Model] Model to use
117
+ # @param stream [Boolean] Enable streaming
118
+ # @param schema [Hash, nil] Response format schema
119
+ # @return [Hash] Request payload
120
+ def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil)
121
+ if should_use_responses_api?
122
+ render_responses_payload(messages, tools: tools, temperature: temperature, model: model, stream: stream, schema: schema)
123
+ else
124
+ # Use original OpenAI chat/completions format
125
+ super
126
+ end
127
+ end
128
+
129
+ # Override parse_completion_response to handle Responses API response format
130
+ #
131
+ # @param response [Faraday::Response] HTTP response
132
+ # @return [RubyLLM::Message, nil] Parsed message or nil
133
+ def parse_completion_response(response)
134
+ # Guard against nil response body before delegating to parsers
135
+ if response.body.nil?
136
+ log_parse_error("nil", "Received nil response body from API", response.body)
137
+ return
138
+ end
139
+
140
+ if should_use_responses_api?
141
+ parse_responses_api_response(response)
142
+ else
143
+ super
144
+ end
145
+ rescue NoMethodError => e
146
+ # Catch fetch/dig errors on nil and provide better context
147
+ if e.message.include?("undefined method") && (e.message.include?("fetch") || e.message.include?("dig"))
148
+ log_parse_error(e.class.name, e.message, response.body)
149
+ nil
150
+ else
151
+ raise
152
+ end
153
+ end
154
+
155
+ private
156
+
157
+ # Determine which endpoint to use based on configuration and model
158
+ #
159
+ # @return [String] 'responses' or 'chat/completions'
160
+ def determine_endpoint
161
+ if @use_responses_api.nil?
162
+ # Auto-detect based on model name
163
+ requires_responses_api? ? "responses" : "chat/completions"
164
+ elsif @use_responses_api
165
+ "responses"
166
+ else
167
+ "chat/completions"
168
+ end
169
+ end
170
+
171
+ # Check if the current model requires the responses API
172
+ #
173
+ # Since we control this via api_version configuration, we don't auto-detect.
174
+ # This method is only called when use_responses_api is nil (no explicit setting).
175
+ #
176
+ # @return [Boolean] false - default to chat/completions for auto-detect
177
+ def requires_responses_api?
178
+ # Default to chat/completions when not explicitly configured
179
+ # Users should set api_version: "v1/responses" to use responses API
180
+ false
181
+ end
182
+
183
+ # Check if we should use responses API for the current request
184
+ #
185
+ # @return [Boolean] true if responses API should be used
186
+ def should_use_responses_api?
187
+ if @use_responses_api.nil?
188
+ # Auto-detect based on model
189
+ requires_responses_api?
190
+ else
191
+ @use_responses_api
192
+ end
193
+ end
194
+
195
+ # Build request body for Responses API
196
+ #
197
+ # The Responses API uses conversation state via previous_response_id.
198
+ # For multi-turn conversations:
199
+ # 1. First turn: Send input with user message
200
+ # 2. Get response with tool calls in output
201
+ # 3. Next turn: Send previous_response_id + input with function_call_output items
202
+ #
203
+ # @param messages [Array<RubyLLM::Message>] Conversation messages
204
+ # @param tools [Hash] Available tools
205
+ # @param temperature [Float, nil] Sampling temperature
206
+ # @param model [RubyLLM::Model] Model to use
207
+ # @param stream [Boolean] Enable streaming
208
+ # @param schema [Hash, nil] Response format schema
209
+ # @return [Hash] Request payload
210
+ def render_responses_payload(messages, tools:, temperature:, model:, stream: false, schema: nil)
211
+ payload = {
212
+ model: model.id,
213
+ stream: stream,
214
+ }
215
+
216
+ # Use previous_response_id for multi-turn conversations
217
+ # Only use it if:
218
+ # 1. Not disabled due to repeated failures
219
+ # 2. We have a response ID and timestamp
220
+ # 3. It hasn't expired (based on TTL)
221
+ # 4. There are new messages to send
222
+ use_previous_response = !@disable_response_id &&
223
+ @last_response_id &&
224
+ @last_response_time &&
225
+ (Time.now - @last_response_time) < RESPONSE_ID_TTL &&
226
+ has_new_messages?(messages)
227
+
228
+ if use_previous_response
229
+ RubyLLM.logger.debug("SwarmSDK: Multi-turn request with previous_response_id=#{@last_response_id}")
230
+ payload[:previous_response_id] = @last_response_id
231
+ # Only send NEW input (messages after the last response)
232
+ new_input = format_new_input_messages(messages)
233
+ payload[:input] = new_input
234
+ RubyLLM.logger.debug("SwarmSDK: New input for multi-turn: #{JSON.pretty_generate(new_input)}")
235
+ else
236
+ if @last_response_id && @last_response_time && (Time.now - @last_response_time) >= RESPONSE_ID_TTL
237
+ RubyLLM.logger.debug("SwarmSDK: Response ID expired (age: #{(Time.now - @last_response_time).round}s), starting new conversation chain")
238
+ else
239
+ RubyLLM.logger.debug("SwarmSDK: First turn request (no previous_response_id)")
240
+ end
241
+ # First turn or no conversation state or expired
242
+ initial_input = format_input_messages(messages)
243
+ payload[:input] = initial_input
244
+ RubyLLM.logger.debug("SwarmSDK: Initial input: #{JSON.pretty_generate(initial_input)}")
245
+ end
246
+
247
+ payload[:temperature] = temperature unless temperature.nil?
248
+
249
+ # CRITICAL: Explicitly set store: true to ensure responses are saved
250
+ # Without this, previous_response_id will not work because the response won't be retrievable
251
+ payload[:store] = true
252
+
253
+ # Use flat tool format for Responses API
254
+ payload[:tools] = tools.map { |_, tool| responses_tool_for(tool) } if tools.any?
255
+
256
+ if schema
257
+ strict = schema[:strict] != false
258
+ payload[:response_format] = {
259
+ type: "json_schema",
260
+ json_schema: {
261
+ name: "response",
262
+ schema: schema,
263
+ strict: strict,
264
+ },
265
+ }
266
+ end
267
+
268
+ payload[:stream_options] = { include_usage: true } if stream
269
+ payload
270
+ end
271
+
272
+ # Check if there are new messages since the last response
273
+ #
274
+ # @param messages [Array<RubyLLM::Message>] All conversation messages
275
+ # @return [Boolean] True if there are new messages (tool results or user messages)
276
+ def has_new_messages?(messages)
277
+ return false if messages.empty?
278
+
279
+ # Check if the last few messages include tool results (role: :tool)
280
+ # This indicates we need to send them with previous_response_id
281
+ messages.last(5).any? { |msg| msg.role == :tool }
282
+ end
283
+
284
+ # Format only NEW messages for Responses API (used with previous_response_id)
285
+ #
286
+ # When using previous_response_id, only send new input that wasn't in the previous request.
287
+ # This typically includes:
288
+ # - Tool results (as function_call_output items)
289
+ # - New user messages
290
+ #
291
+ # @param messages [Array<RubyLLM::Message>] All conversation messages
292
+ # @return [Array<Hash>] Formatted input array with only new messages
293
+ def format_new_input_messages(messages)
294
+ formatted = []
295
+
296
+ # Find messages after the last assistant response
297
+ # Typically this will be tool results and potentially new user input
298
+ last_assistant_idx = messages.rindex { |msg| msg.role == :assistant }
299
+
300
+ if last_assistant_idx
301
+ new_messages = messages[(last_assistant_idx + 1)..-1]
302
+
303
+ new_messages.each do |msg|
304
+ case msg.role
305
+ when :tool
306
+ # Tool results become function_call_output items
307
+ formatted << {
308
+ type: "function_call_output",
309
+ call_id: msg.tool_call_id,
310
+ output: msg.content.to_s,
311
+ }
312
+ when :user
313
+ # New user messages
314
+ formatted << {
315
+ role: "user",
316
+ content: Media.format_content(msg.content),
317
+ }
318
+ when :system
319
+ # New system messages (rare but possible)
320
+ formatted << {
321
+ role: "developer",
322
+ content: Media.format_content(msg.content),
323
+ }
324
+ end
325
+ end
326
+ end
327
+
328
+ formatted
329
+ end
330
+
331
+ # Format messages for Responses API input (first turn)
332
+ #
333
+ # For the first request in a conversation, include all user/system/assistant messages.
334
+ # Tool calls and tool results are excluded as they're part of the conversation state.
335
+ #
336
+ # @param messages [Array<RubyLLM::Message>] Conversation messages
337
+ # @return [Array<Hash>] Formatted input array
338
+ def format_input_messages(messages)
339
+ formatted = []
340
+
341
+ messages.each do |msg|
342
+ case msg.role
343
+ when :user
344
+ formatted << {
345
+ role: "user",
346
+ content: Media.format_content(msg.content),
347
+ }
348
+ when :system
349
+ formatted << {
350
+ role: "developer", # Responses API uses 'developer' instead of 'system'
351
+ content: Media.format_content(msg.content),
352
+ }
353
+ when :assistant
354
+ # Assistant messages - only include if they have text content (not just tool calls)
355
+ unless msg.content.nil? || msg.content.empty?
356
+ formatted << {
357
+ role: "assistant",
358
+ content: Media.format_content(msg.content),
359
+ }
360
+ end
361
+ # NOTE: Tool calls are NOT included in input - they're part of the output/conversation state
362
+ when :tool
363
+ # Tool result messages should NOT be in the first request
364
+ # They're only sent with previous_response_id
365
+ nil
366
+ end
367
+ end
368
+
369
+ formatted
370
+ end
371
+
372
+ # Convert tool to Responses API format (flat structure)
373
+ #
374
+ # Responses API uses a flat format with type at top level:
375
+ # { type: "function", name: "tool_name", description: "...", parameters: {...} }
376
+ #
377
+ # This differs from chat/completions which nests under 'function':
378
+ # { type: "function", function: { name: "tool_name", ... } }
379
+ #
380
+ # @param tool [RubyLLM::Tool] Tool to convert
381
+ # @return [Hash] Tool definition in Responses API format
382
+ def responses_tool_for(tool)
383
+ {
384
+ type: "function",
385
+ name: tool.name,
386
+ description: tool.description,
387
+ parameters: {
388
+ type: "object",
389
+ properties: tool.parameters.transform_values { |param| param_schema(param) },
390
+ required: tool.parameters.select { |_, p| p.required }.keys,
391
+ },
392
+ }
393
+ end
394
+
395
+ # Build parameter schema for a tool parameter
396
+ #
397
+ # @param param [RubyLLM::Tool::Parameter] Parameter to convert
398
+ # @return [Hash] Parameter schema
399
+ def param_schema(param)
400
+ {
401
+ type: param.type,
402
+ description: param.description,
403
+ }.compact
404
+ end
405
+
406
+ # Parse Responses API response
407
+ #
408
+ # The Responses API may have a different response structure than chat/completions.
409
+ # This method tries multiple possible paths to find the message data.
410
+ # IMPORTANT: Also captures the response ID for multi-turn conversations.
411
+ #
412
+ # @param response [Faraday::Response] HTTP response
413
+ # @return [RubyLLM::Message, nil] Parsed message or nil
414
+ def parse_responses_api_response(response)
415
+ data = response.body
416
+
417
+ # Handle nil or non-hash response body
418
+ unless data.is_a?(Hash)
419
+ log_parse_error("TypeError", "Expected response body to be Hash, got #{data.class}", data)
420
+ return
421
+ end
422
+
423
+ # Debug logging to see actual response structure
424
+ RubyLLM.logger.debug("SwarmSDK Responses API response: #{JSON.pretty_generate(data)}")
425
+
426
+ return if data.empty?
427
+
428
+ raise RubyLLM::Error.new(response, data.dig("error", "message")) if data.dig("error", "message")
429
+
430
+ # Capture response ID and timestamp for conversation state (if not disabled)
431
+ unless @disable_response_id
432
+ @last_response_id = data["id"]
433
+ @last_response_time = Time.now
434
+ @response_id_failures = 0 # Reset failure counter on success
435
+ RubyLLM.logger.debug("SwarmSDK captured response_id: #{@last_response_id} at #{@last_response_time}")
436
+ end
437
+
438
+ # Try different possible paths for the message data
439
+ message_data = extract_message_data(data)
440
+
441
+ RubyLLM.logger.debug("SwarmSDK extracted message_data: #{message_data.inspect} (class: #{message_data.class})")
442
+
443
+ return unless message_data
444
+
445
+ # Ensure message_data is a hash
446
+ unless message_data.is_a?(Hash)
447
+ RubyLLM.logger.error("SwarmSDK expected message_data to be Hash, got #{message_data.class}")
448
+ return
449
+ end
450
+
451
+ RubyLLM::Message.new(
452
+ role: :assistant,
453
+ content: message_data["content"] || "", # Provide empty string as fallback
454
+ tool_calls: parse_tool_calls(message_data["tool_calls"]),
455
+ input_tokens: extract_input_tokens(data),
456
+ output_tokens: extract_output_tokens(data),
457
+ model_id: data["model"],
458
+ raw: response,
459
+ )
460
+ end
461
+
462
+ # Extract message data from Responses API response
463
+ #
464
+ # The Responses API uses an 'output' array with different item types:
465
+ # - reasoning: Model's internal reasoning
466
+ # - function_call: Tool call to execute
467
+ # - message: Text response
468
+ #
469
+ # @param data [Hash] Response body
470
+ # @return [Hash] Message data synthesized from output array
471
+ def extract_message_data(data)
472
+ output = data["output"]
473
+
474
+ # If no output array, try fallback paths
475
+ unless output.is_a?(Array)
476
+ return data.dig("choices", 0, "message") || # Standard OpenAI format
477
+ data.dig("response") || # Another possible format
478
+ data.dig("message") # Direct message format
479
+ end
480
+
481
+ # Parse the output array to extract content and tool calls
482
+ content_parts = []
483
+ tool_calls = []
484
+
485
+ output.each do |item|
486
+ case item["type"]
487
+ when "message"
488
+ # Message contains a content array with typed items
489
+ if item["content"].is_a?(Array)
490
+ item["content"].each do |content_item|
491
+ case content_item["type"]
492
+ when "output_text"
493
+ content_parts << content_item["text"]
494
+ when "text"
495
+ content_parts << content_item["text"]
496
+ end
497
+ end
498
+ elsif item["content"].is_a?(String)
499
+ content_parts << item["content"]
500
+ elsif item["text"]
501
+ content_parts << item["text"]
502
+ end
503
+ when "function_call"
504
+ # Convert to RubyLLM tool call format
505
+ tool_calls << {
506
+ "id" => item["call_id"],
507
+ "type" => "function",
508
+ "function" => {
509
+ "name" => item["name"],
510
+ "arguments" => item["arguments"],
511
+ },
512
+ }
513
+ when "reasoning"
514
+ # Skip reasoning items (internal model thought process)
515
+ nil
516
+ end
517
+ end
518
+
519
+ # Synthesize a message data hash
520
+ {
521
+ "role" => "assistant",
522
+ "content" => content_parts.join("\n"),
523
+ "tool_calls" => tool_calls.empty? ? nil : tool_calls,
524
+ }
525
+ end
526
+
527
+ # Extract input tokens from various possible locations
528
+ #
529
+ # @param data [Hash] Response body
530
+ # @return [Integer] Input token count
531
+ def extract_input_tokens(data)
532
+ data.dig("usage", "prompt_tokens") ||
533
+ data.dig("usage", "input_tokens") ||
534
+ 0
535
+ end
536
+
537
+ # Extract output tokens from various possible locations
538
+ #
539
+ # @param data [Hash] Response body
540
+ # @return [Integer] Output token count
541
+ def extract_output_tokens(data)
542
+ data.dig("usage", "completion_tokens") ||
543
+ data.dig("usage", "output_tokens") ||
544
+ 0
545
+ end
546
+
547
+ # Check if we should retry with responses API after an error
548
+ #
549
+ # @param error [RubyLLM::Error] The error that occurred
550
+ # @return [Boolean] true if we should retry with responses API
551
+ def should_retry_with_responses_api?(error)
552
+ # Only retry if we haven't already tried responses API
553
+ return false if @use_responses_api
554
+
555
+ # Check if error message explicitly mentions responses API
556
+ error.message.include?("v1/responses") ||
557
+ error.message.include?("only supported in") && error.message.include?("responses")
558
+ end
559
+
560
+ # Log response parsing errors as JSON events through LogStream
561
+ #
562
+ # @param error_class [String] Error class name
563
+ # @param error_message [String] Error message
564
+ # @param response_body [Object] Response body that failed to parse
565
+ def log_parse_error(error_class, error_message, response_body)
566
+ if @agent_name
567
+ # Emit structured JSON log through LogStream
568
+ LogStream.emit(
569
+ type: "response_parse_error",
570
+ agent: @agent_name,
571
+ error_class: error_class,
572
+ error_message: error_message,
573
+ response_body: response_body.inspect,
574
+ )
575
+ else
576
+ # Fallback to RubyLLM logger if agent name not set
577
+ RubyLLM.logger.error("SwarmSDK: #{error_class}: #{error_message}\nResponse: #{response_body.inspect}")
578
+ end
579
+ end
580
+ end
581
+ end
582
+ end
@@ -0,0 +1,97 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SwarmSDK
4
+ class Result
5
+ attr_reader :content, :agent, :cost, :tokens, :duration, :logs, :error, :metadata
6
+
7
+ def initialize(content: nil, agent:, cost: 0.0, tokens: {}, duration: 0.0, logs: [], error: nil, metadata: {})
8
+ @content = content
9
+ @agent = agent
10
+ @cost = cost
11
+ @tokens = tokens
12
+ @duration = duration
13
+ @logs = logs
14
+ @error = error
15
+ @metadata = metadata
16
+ end
17
+
18
+ def success?
19
+ @error.nil?
20
+ end
21
+
22
+ def failure?
23
+ !success?
24
+ end
25
+
26
+ def to_h
27
+ {
28
+ content: @content,
29
+ agent: @agent,
30
+ cost: @cost,
31
+ tokens: @tokens,
32
+ duration: @duration,
33
+ success: success?,
34
+ error: @error&.message,
35
+ metadata: @metadata,
36
+ }.compact
37
+ end
38
+
39
+ def to_json(*args)
40
+ to_h.to_json(*args)
41
+ end
42
+
43
+ # Calculate total cost across all LLM responses
44
+ #
45
+ # Cost accumulation works as follows:
46
+ # - Input cost: The LAST response's input_cost already includes the cost for the
47
+ # full conversation history (all previous messages + current context)
48
+ # - Output cost: Each response generates NEW tokens, so we SUM all output_costs
49
+ # - Total = Last input_cost + Sum of all output_costs
50
+ #
51
+ # IMPORTANT: Do NOT sum total_cost across all entries - that would count
52
+ # input costs multiple times since each call includes the full history!
53
+ def total_cost
54
+ entries_with_usage = @logs.select { |entry| entry.dig(:usage, :total_cost) }
55
+ return 0.0 if entries_with_usage.empty?
56
+
57
+ # Last entry's input cost (includes full conversation history)
58
+ last_input_cost = entries_with_usage.last.dig(:usage, :input_cost) || 0.0
59
+
60
+ # Sum all output costs (each response generates new tokens)
61
+ total_output_cost = entries_with_usage.sum { |entry| entry.dig(:usage, :output_cost) || 0.0 }
62
+
63
+ last_input_cost + total_output_cost
64
+ end
65
+
66
+ # Get total tokens from the last LLM response with cumulative tracking
67
+ #
68
+ # Token accumulation works as follows:
69
+ # - Input tokens: Each API call sends the full conversation history, so the latest
70
+ # response's cumulative_input_tokens already represents the full context
71
+ # - Output tokens: Each response generates new tokens, cumulative_output_tokens sums them
72
+ # - The cumulative_total_tokens in the last response already does this correctly
73
+ #
74
+ # IMPORTANT: Do NOT sum total_tokens across all log entries - that would count
75
+ # input tokens multiple times since each call includes the full history!
76
+ def total_tokens
77
+ last_entry = @logs.reverse.find { |entry| entry.dig(:usage, :cumulative_total_tokens) }
78
+ last_entry&.dig(:usage, :cumulative_total_tokens) || 0
79
+ end
80
+
81
+ # Get list of all agents involved in execution
82
+ def agents_involved
83
+ @logs.map { |entry| entry[:agent] }.compact.uniq.map(&:to_sym)
84
+ end
85
+
86
+ # Count total LLM requests made
87
+ # Each LLM API call produces either agent_step (tool calls) or agent_stop (final answer)
88
+ def llm_requests
89
+ @logs.count { |entry| entry[:type] == "agent_step" || entry[:type] == "agent_stop" }
90
+ end
91
+
92
+ # Count total tool calls made
93
+ def tool_calls_count
94
+ @logs.count { |entry| entry[:type] == "tool_call" }
95
+ end
96
+ end
97
+ end