swarm_sdk 2.0.0.pre.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/swarm_sdk/agent/builder.rb +333 -0
- data/lib/swarm_sdk/agent/chat/context_tracker.rb +271 -0
- data/lib/swarm_sdk/agent/chat/hook_integration.rb +372 -0
- data/lib/swarm_sdk/agent/chat/logging_helpers.rb +99 -0
- data/lib/swarm_sdk/agent/chat/system_reminder_injector.rb +114 -0
- data/lib/swarm_sdk/agent/chat.rb +779 -0
- data/lib/swarm_sdk/agent/context.rb +108 -0
- data/lib/swarm_sdk/agent/definition.rb +335 -0
- data/lib/swarm_sdk/configuration.rb +251 -0
- data/lib/swarm_sdk/context_compactor/metrics.rb +147 -0
- data/lib/swarm_sdk/context_compactor/token_counter.rb +106 -0
- data/lib/swarm_sdk/context_compactor.rb +340 -0
- data/lib/swarm_sdk/hooks/adapter.rb +359 -0
- data/lib/swarm_sdk/hooks/context.rb +163 -0
- data/lib/swarm_sdk/hooks/definition.rb +80 -0
- data/lib/swarm_sdk/hooks/error.rb +29 -0
- data/lib/swarm_sdk/hooks/executor.rb +146 -0
- data/lib/swarm_sdk/hooks/registry.rb +143 -0
- data/lib/swarm_sdk/hooks/result.rb +150 -0
- data/lib/swarm_sdk/hooks/shell_executor.rb +254 -0
- data/lib/swarm_sdk/hooks/tool_call.rb +35 -0
- data/lib/swarm_sdk/hooks/tool_result.rb +62 -0
- data/lib/swarm_sdk/log_collector.rb +83 -0
- data/lib/swarm_sdk/log_stream.rb +69 -0
- data/lib/swarm_sdk/markdown_parser.rb +46 -0
- data/lib/swarm_sdk/permissions/config.rb +239 -0
- data/lib/swarm_sdk/permissions/error_formatter.rb +121 -0
- data/lib/swarm_sdk/permissions/path_matcher.rb +35 -0
- data/lib/swarm_sdk/permissions/validator.rb +173 -0
- data/lib/swarm_sdk/permissions_builder.rb +122 -0
- data/lib/swarm_sdk/prompts/base_system_prompt.md.erb +237 -0
- data/lib/swarm_sdk/providers/openai_with_responses.rb +582 -0
- data/lib/swarm_sdk/result.rb +97 -0
- data/lib/swarm_sdk/swarm/agent_initializer.rb +224 -0
- data/lib/swarm_sdk/swarm/all_agents_builder.rb +62 -0
- data/lib/swarm_sdk/swarm/builder.rb +240 -0
- data/lib/swarm_sdk/swarm/mcp_configurator.rb +151 -0
- data/lib/swarm_sdk/swarm/tool_configurator.rb +267 -0
- data/lib/swarm_sdk/swarm.rb +837 -0
- data/lib/swarm_sdk/tools/bash.rb +274 -0
- data/lib/swarm_sdk/tools/delegate.rb +152 -0
- data/lib/swarm_sdk/tools/document_converters/base_converter.rb +83 -0
- data/lib/swarm_sdk/tools/document_converters/docx_converter.rb +99 -0
- data/lib/swarm_sdk/tools/document_converters/pdf_converter.rb +78 -0
- data/lib/swarm_sdk/tools/document_converters/xlsx_converter.rb +194 -0
- data/lib/swarm_sdk/tools/edit.rb +150 -0
- data/lib/swarm_sdk/tools/glob.rb +158 -0
- data/lib/swarm_sdk/tools/grep.rb +231 -0
- data/lib/swarm_sdk/tools/image_extractors/docx_image_extractor.rb +43 -0
- data/lib/swarm_sdk/tools/image_extractors/pdf_image_extractor.rb +163 -0
- data/lib/swarm_sdk/tools/image_formats/tiff_builder.rb +65 -0
- data/lib/swarm_sdk/tools/multi_edit.rb +232 -0
- data/lib/swarm_sdk/tools/path_resolver.rb +43 -0
- data/lib/swarm_sdk/tools/read.rb +251 -0
- data/lib/swarm_sdk/tools/registry.rb +73 -0
- data/lib/swarm_sdk/tools/scratchpad_list.rb +88 -0
- data/lib/swarm_sdk/tools/scratchpad_read.rb +59 -0
- data/lib/swarm_sdk/tools/scratchpad_write.rb +88 -0
- data/lib/swarm_sdk/tools/stores/read_tracker.rb +61 -0
- data/lib/swarm_sdk/tools/stores/scratchpad.rb +153 -0
- data/lib/swarm_sdk/tools/stores/todo_manager.rb +65 -0
- data/lib/swarm_sdk/tools/todo_write.rb +216 -0
- data/lib/swarm_sdk/tools/write.rb +117 -0
- data/lib/swarm_sdk/utils.rb +50 -0
- data/lib/swarm_sdk/version.rb +5 -0
- data/lib/swarm_sdk.rb +69 -0
- metadata +169 -0
@@ -0,0 +1,582 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module SwarmSDK
|
4
|
+
module Providers
|
5
|
+
# Extended OpenAI provider with responses API support
|
6
|
+
#
|
7
|
+
# RubyLLM's OpenAI provider only supports v1/chat/completions.
|
8
|
+
# This provider extends it to also support v1/responses for models
|
9
|
+
# that require it (e.g., gpt-5-pro, o-series models).
|
10
|
+
#
|
11
|
+
# ## Usage
|
12
|
+
#
|
13
|
+
# Set via AgentChat when api_version is configured:
|
14
|
+
#
|
15
|
+
# @example Via SwarmSDK AgentChat (automatic)
|
16
|
+
# # In swarm.yml:
|
17
|
+
# agents:
|
18
|
+
# researcher:
|
19
|
+
# model: gpt-5-pro
|
20
|
+
# api_version: "v1/responses" # Automatically uses this provider
|
21
|
+
#
|
22
|
+
# @example Direct instantiation
|
23
|
+
# provider = OpenAIWithResponses.new(config, use_responses_api: true)
|
24
|
+
# chat = RubyLLM::Chat.new(model: "gpt-5-pro", provider: provider)
|
25
|
+
#
|
26
|
+
# ## Features
|
27
|
+
#
|
28
|
+
# - **Stateful mode**: Uses `previous_response_id` with `store: true` for efficient multi-turn
|
29
|
+
# - **Stateless fallback**: Automatically falls back to sending full history if server doesn't store responses
|
30
|
+
# - **TTL tracking**: Expires response IDs after 5 minutes to prevent "not found" errors
|
31
|
+
# - **Auto-recovery**: Detects repeated failures and disables `previous_response_id` entirely
|
32
|
+
#
|
33
|
+
class OpenAIWithResponses < RubyLLM::Providers::OpenAI
|
34
|
+
attr_accessor :use_responses_api
|
35
|
+
attr_writer :agent_name
|
36
|
+
|
37
|
+
# OpenAI Responses API expires response IDs after inactivity
|
38
|
+
# Conservative estimate: 5 minutes (300 seconds)
|
39
|
+
RESPONSE_ID_TTL = 300
|
40
|
+
|
41
|
+
# Initialize the provider
|
42
|
+
#
|
43
|
+
# @param config [RubyLLM::Configuration] Configuration object
|
44
|
+
# @param use_responses_api [Boolean, nil] Force endpoint choice (nil = auto-detect)
|
45
|
+
def initialize(config, use_responses_api: nil)
|
46
|
+
super(config)
|
47
|
+
@use_responses_api = use_responses_api
|
48
|
+
@model_id = nil
|
49
|
+
@last_response_id = nil # Track last response ID for conversation state
|
50
|
+
@last_response_time = nil # Track when response ID was created
|
51
|
+
@response_id_failures = 0 # Track consecutive failures with response IDs
|
52
|
+
@disable_response_id = false # Disable previous_response_id if repeatedly failing
|
53
|
+
@agent_name = nil # Agent name for logging context (set externally)
|
54
|
+
end
|
55
|
+
|
56
|
+
# Return the completion endpoint URL
|
57
|
+
#
|
58
|
+
# @return [String] Either 'responses' or 'chat/completions'
|
59
|
+
def completion_url
|
60
|
+
endpoint = determine_endpoint
|
61
|
+
RubyLLM.logger.debug("SwarmSDK OpenAIWithResponses: Using endpoint '#{endpoint}' (use_responses_api=#{@use_responses_api}, model=#{@model_id})")
|
62
|
+
endpoint
|
63
|
+
end
|
64
|
+
|
65
|
+
# Return the streaming endpoint URL
|
66
|
+
#
|
67
|
+
# @return [String] Same as completion_url
|
68
|
+
def stream_url
|
69
|
+
completion_url
|
70
|
+
end
|
71
|
+
|
72
|
+
# Override complete to capture model_id before making request
|
73
|
+
#
|
74
|
+
# This allows auto-detection to work by inspecting the model being used
|
75
|
+
def complete(messages, tools:, temperature:, model:, params: {}, headers: {}, schema: nil, &block)
|
76
|
+
@model_id = model.id
|
77
|
+
super
|
78
|
+
rescue RubyLLM::BadRequestError => e
|
79
|
+
# Handle "response not found" errors by starting a fresh conversation
|
80
|
+
if e.message.include?("not found") && @last_response_id
|
81
|
+
@response_id_failures += 1
|
82
|
+
|
83
|
+
# After 2 failures, disable previous_response_id entirely
|
84
|
+
if @response_id_failures >= 2
|
85
|
+
RubyLLM.logger.debug("SwarmSDK: Response IDs repeatedly not found (#{@response_id_failures} failures). " \
|
86
|
+
"The server may not support storing responses. Disabling previous_response_id for this session.")
|
87
|
+
@disable_response_id = true
|
88
|
+
else
|
89
|
+
RubyLLM.logger.debug("SwarmSDK: Response ID '#{@last_response_id}' not found (failure ##{@response_id_failures}), starting fresh conversation")
|
90
|
+
end
|
91
|
+
|
92
|
+
@last_response_id = nil
|
93
|
+
@last_response_time = nil
|
94
|
+
retry
|
95
|
+
else
|
96
|
+
raise
|
97
|
+
end
|
98
|
+
rescue RubyLLM::Error => e
|
99
|
+
# If error explicitly mentions responses API and we're not using it, retry with responses API
|
100
|
+
if should_retry_with_responses_api?(e)
|
101
|
+
RubyLLM.logger.warn("SwarmSDK: Retrying with responses API for model: #{@model_id}")
|
102
|
+
@use_responses_api = true
|
103
|
+
retry
|
104
|
+
else
|
105
|
+
raise
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
# Override render_payload to transform request body for Responses API
|
110
|
+
#
|
111
|
+
# The Responses API uses 'input' instead of 'messages' parameter
|
112
|
+
#
|
113
|
+
# @param messages [Array<RubyLLM::Message>] Conversation messages
|
114
|
+
# @param tools [Hash] Available tools
|
115
|
+
# @param temperature [Float, nil] Sampling temperature
|
116
|
+
# @param model [RubyLLM::Model] Model to use
|
117
|
+
# @param stream [Boolean] Enable streaming
|
118
|
+
# @param schema [Hash, nil] Response format schema
|
119
|
+
# @return [Hash] Request payload
|
120
|
+
def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil)
|
121
|
+
if should_use_responses_api?
|
122
|
+
render_responses_payload(messages, tools: tools, temperature: temperature, model: model, stream: stream, schema: schema)
|
123
|
+
else
|
124
|
+
# Use original OpenAI chat/completions format
|
125
|
+
super
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
# Override parse_completion_response to handle Responses API response format
|
130
|
+
#
|
131
|
+
# @param response [Faraday::Response] HTTP response
|
132
|
+
# @return [RubyLLM::Message, nil] Parsed message or nil
|
133
|
+
def parse_completion_response(response)
|
134
|
+
# Guard against nil response body before delegating to parsers
|
135
|
+
if response.body.nil?
|
136
|
+
log_parse_error("nil", "Received nil response body from API", response.body)
|
137
|
+
return
|
138
|
+
end
|
139
|
+
|
140
|
+
if should_use_responses_api?
|
141
|
+
parse_responses_api_response(response)
|
142
|
+
else
|
143
|
+
super
|
144
|
+
end
|
145
|
+
rescue NoMethodError => e
|
146
|
+
# Catch fetch/dig errors on nil and provide better context
|
147
|
+
if e.message.include?("undefined method") && (e.message.include?("fetch") || e.message.include?("dig"))
|
148
|
+
log_parse_error(e.class.name, e.message, response.body)
|
149
|
+
nil
|
150
|
+
else
|
151
|
+
raise
|
152
|
+
end
|
153
|
+
end
|
154
|
+
|
155
|
+
private
|
156
|
+
|
157
|
+
# Determine which endpoint to use based on configuration and model
|
158
|
+
#
|
159
|
+
# @return [String] 'responses' or 'chat/completions'
|
160
|
+
def determine_endpoint
|
161
|
+
if @use_responses_api.nil?
|
162
|
+
# Auto-detect based on model name
|
163
|
+
requires_responses_api? ? "responses" : "chat/completions"
|
164
|
+
elsif @use_responses_api
|
165
|
+
"responses"
|
166
|
+
else
|
167
|
+
"chat/completions"
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
# Check if the current model requires the responses API
|
172
|
+
#
|
173
|
+
# Since we control this via api_version configuration, we don't auto-detect.
|
174
|
+
# This method is only called when use_responses_api is nil (no explicit setting).
|
175
|
+
#
|
176
|
+
# @return [Boolean] false - default to chat/completions for auto-detect
|
177
|
+
def requires_responses_api?
|
178
|
+
# Default to chat/completions when not explicitly configured
|
179
|
+
# Users should set api_version: "v1/responses" to use responses API
|
180
|
+
false
|
181
|
+
end
|
182
|
+
|
183
|
+
# Check if we should use responses API for the current request
|
184
|
+
#
|
185
|
+
# @return [Boolean] true if responses API should be used
|
186
|
+
def should_use_responses_api?
|
187
|
+
if @use_responses_api.nil?
|
188
|
+
# Auto-detect based on model
|
189
|
+
requires_responses_api?
|
190
|
+
else
|
191
|
+
@use_responses_api
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
# Build request body for Responses API
|
196
|
+
#
|
197
|
+
# The Responses API uses conversation state via previous_response_id.
|
198
|
+
# For multi-turn conversations:
|
199
|
+
# 1. First turn: Send input with user message
|
200
|
+
# 2. Get response with tool calls in output
|
201
|
+
# 3. Next turn: Send previous_response_id + input with function_call_output items
|
202
|
+
#
|
203
|
+
# @param messages [Array<RubyLLM::Message>] Conversation messages
|
204
|
+
# @param tools [Hash] Available tools
|
205
|
+
# @param temperature [Float, nil] Sampling temperature
|
206
|
+
# @param model [RubyLLM::Model] Model to use
|
207
|
+
# @param stream [Boolean] Enable streaming
|
208
|
+
# @param schema [Hash, nil] Response format schema
|
209
|
+
# @return [Hash] Request payload
|
210
|
+
def render_responses_payload(messages, tools:, temperature:, model:, stream: false, schema: nil)
|
211
|
+
payload = {
|
212
|
+
model: model.id,
|
213
|
+
stream: stream,
|
214
|
+
}
|
215
|
+
|
216
|
+
# Use previous_response_id for multi-turn conversations
|
217
|
+
# Only use it if:
|
218
|
+
# 1. Not disabled due to repeated failures
|
219
|
+
# 2. We have a response ID and timestamp
|
220
|
+
# 3. It hasn't expired (based on TTL)
|
221
|
+
# 4. There are new messages to send
|
222
|
+
use_previous_response = !@disable_response_id &&
|
223
|
+
@last_response_id &&
|
224
|
+
@last_response_time &&
|
225
|
+
(Time.now - @last_response_time) < RESPONSE_ID_TTL &&
|
226
|
+
has_new_messages?(messages)
|
227
|
+
|
228
|
+
if use_previous_response
|
229
|
+
RubyLLM.logger.debug("SwarmSDK: Multi-turn request with previous_response_id=#{@last_response_id}")
|
230
|
+
payload[:previous_response_id] = @last_response_id
|
231
|
+
# Only send NEW input (messages after the last response)
|
232
|
+
new_input = format_new_input_messages(messages)
|
233
|
+
payload[:input] = new_input
|
234
|
+
RubyLLM.logger.debug("SwarmSDK: New input for multi-turn: #{JSON.pretty_generate(new_input)}")
|
235
|
+
else
|
236
|
+
if @last_response_id && @last_response_time && (Time.now - @last_response_time) >= RESPONSE_ID_TTL
|
237
|
+
RubyLLM.logger.debug("SwarmSDK: Response ID expired (age: #{(Time.now - @last_response_time).round}s), starting new conversation chain")
|
238
|
+
else
|
239
|
+
RubyLLM.logger.debug("SwarmSDK: First turn request (no previous_response_id)")
|
240
|
+
end
|
241
|
+
# First turn or no conversation state or expired
|
242
|
+
initial_input = format_input_messages(messages)
|
243
|
+
payload[:input] = initial_input
|
244
|
+
RubyLLM.logger.debug("SwarmSDK: Initial input: #{JSON.pretty_generate(initial_input)}")
|
245
|
+
end
|
246
|
+
|
247
|
+
payload[:temperature] = temperature unless temperature.nil?
|
248
|
+
|
249
|
+
# CRITICAL: Explicitly set store: true to ensure responses are saved
|
250
|
+
# Without this, previous_response_id will not work because the response won't be retrievable
|
251
|
+
payload[:store] = true
|
252
|
+
|
253
|
+
# Use flat tool format for Responses API
|
254
|
+
payload[:tools] = tools.map { |_, tool| responses_tool_for(tool) } if tools.any?
|
255
|
+
|
256
|
+
if schema
|
257
|
+
strict = schema[:strict] != false
|
258
|
+
payload[:response_format] = {
|
259
|
+
type: "json_schema",
|
260
|
+
json_schema: {
|
261
|
+
name: "response",
|
262
|
+
schema: schema,
|
263
|
+
strict: strict,
|
264
|
+
},
|
265
|
+
}
|
266
|
+
end
|
267
|
+
|
268
|
+
payload[:stream_options] = { include_usage: true } if stream
|
269
|
+
payload
|
270
|
+
end
|
271
|
+
|
272
|
+
# Check if there are new messages since the last response
|
273
|
+
#
|
274
|
+
# @param messages [Array<RubyLLM::Message>] All conversation messages
|
275
|
+
# @return [Boolean] True if there are new messages (tool results or user messages)
|
276
|
+
def has_new_messages?(messages)
|
277
|
+
return false if messages.empty?
|
278
|
+
|
279
|
+
# Check if the last few messages include tool results (role: :tool)
|
280
|
+
# This indicates we need to send them with previous_response_id
|
281
|
+
messages.last(5).any? { |msg| msg.role == :tool }
|
282
|
+
end
|
283
|
+
|
284
|
+
# Format only NEW messages for Responses API (used with previous_response_id)
|
285
|
+
#
|
286
|
+
# When using previous_response_id, only send new input that wasn't in the previous request.
|
287
|
+
# This typically includes:
|
288
|
+
# - Tool results (as function_call_output items)
|
289
|
+
# - New user messages
|
290
|
+
#
|
291
|
+
# @param messages [Array<RubyLLM::Message>] All conversation messages
|
292
|
+
# @return [Array<Hash>] Formatted input array with only new messages
|
293
|
+
def format_new_input_messages(messages)
|
294
|
+
formatted = []
|
295
|
+
|
296
|
+
# Find messages after the last assistant response
|
297
|
+
# Typically this will be tool results and potentially new user input
|
298
|
+
last_assistant_idx = messages.rindex { |msg| msg.role == :assistant }
|
299
|
+
|
300
|
+
if last_assistant_idx
|
301
|
+
new_messages = messages[(last_assistant_idx + 1)..-1]
|
302
|
+
|
303
|
+
new_messages.each do |msg|
|
304
|
+
case msg.role
|
305
|
+
when :tool
|
306
|
+
# Tool results become function_call_output items
|
307
|
+
formatted << {
|
308
|
+
type: "function_call_output",
|
309
|
+
call_id: msg.tool_call_id,
|
310
|
+
output: msg.content.to_s,
|
311
|
+
}
|
312
|
+
when :user
|
313
|
+
# New user messages
|
314
|
+
formatted << {
|
315
|
+
role: "user",
|
316
|
+
content: Media.format_content(msg.content),
|
317
|
+
}
|
318
|
+
when :system
|
319
|
+
# New system messages (rare but possible)
|
320
|
+
formatted << {
|
321
|
+
role: "developer",
|
322
|
+
content: Media.format_content(msg.content),
|
323
|
+
}
|
324
|
+
end
|
325
|
+
end
|
326
|
+
end
|
327
|
+
|
328
|
+
formatted
|
329
|
+
end
|
330
|
+
|
331
|
+
# Format messages for Responses API input (first turn)
|
332
|
+
#
|
333
|
+
# For the first request in a conversation, include all user/system/assistant messages.
|
334
|
+
# Tool calls and tool results are excluded as they're part of the conversation state.
|
335
|
+
#
|
336
|
+
# @param messages [Array<RubyLLM::Message>] Conversation messages
|
337
|
+
# @return [Array<Hash>] Formatted input array
|
338
|
+
def format_input_messages(messages)
|
339
|
+
formatted = []
|
340
|
+
|
341
|
+
messages.each do |msg|
|
342
|
+
case msg.role
|
343
|
+
when :user
|
344
|
+
formatted << {
|
345
|
+
role: "user",
|
346
|
+
content: Media.format_content(msg.content),
|
347
|
+
}
|
348
|
+
when :system
|
349
|
+
formatted << {
|
350
|
+
role: "developer", # Responses API uses 'developer' instead of 'system'
|
351
|
+
content: Media.format_content(msg.content),
|
352
|
+
}
|
353
|
+
when :assistant
|
354
|
+
# Assistant messages - only include if they have text content (not just tool calls)
|
355
|
+
unless msg.content.nil? || msg.content.empty?
|
356
|
+
formatted << {
|
357
|
+
role: "assistant",
|
358
|
+
content: Media.format_content(msg.content),
|
359
|
+
}
|
360
|
+
end
|
361
|
+
# NOTE: Tool calls are NOT included in input - they're part of the output/conversation state
|
362
|
+
when :tool
|
363
|
+
# Tool result messages should NOT be in the first request
|
364
|
+
# They're only sent with previous_response_id
|
365
|
+
nil
|
366
|
+
end
|
367
|
+
end
|
368
|
+
|
369
|
+
formatted
|
370
|
+
end
|
371
|
+
|
372
|
+
# Convert tool to Responses API format (flat structure)
|
373
|
+
#
|
374
|
+
# Responses API uses a flat format with type at top level:
|
375
|
+
# { type: "function", name: "tool_name", description: "...", parameters: {...} }
|
376
|
+
#
|
377
|
+
# This differs from chat/completions which nests under 'function':
|
378
|
+
# { type: "function", function: { name: "tool_name", ... } }
|
379
|
+
#
|
380
|
+
# @param tool [RubyLLM::Tool] Tool to convert
|
381
|
+
# @return [Hash] Tool definition in Responses API format
|
382
|
+
def responses_tool_for(tool)
|
383
|
+
{
|
384
|
+
type: "function",
|
385
|
+
name: tool.name,
|
386
|
+
description: tool.description,
|
387
|
+
parameters: {
|
388
|
+
type: "object",
|
389
|
+
properties: tool.parameters.transform_values { |param| param_schema(param) },
|
390
|
+
required: tool.parameters.select { |_, p| p.required }.keys,
|
391
|
+
},
|
392
|
+
}
|
393
|
+
end
|
394
|
+
|
395
|
+
# Build parameter schema for a tool parameter
|
396
|
+
#
|
397
|
+
# @param param [RubyLLM::Tool::Parameter] Parameter to convert
|
398
|
+
# @return [Hash] Parameter schema
|
399
|
+
def param_schema(param)
|
400
|
+
{
|
401
|
+
type: param.type,
|
402
|
+
description: param.description,
|
403
|
+
}.compact
|
404
|
+
end
|
405
|
+
|
406
|
+
# Parse Responses API response
|
407
|
+
#
|
408
|
+
# The Responses API may have a different response structure than chat/completions.
|
409
|
+
# This method tries multiple possible paths to find the message data.
|
410
|
+
# IMPORTANT: Also captures the response ID for multi-turn conversations.
|
411
|
+
#
|
412
|
+
# @param response [Faraday::Response] HTTP response
|
413
|
+
# @return [RubyLLM::Message, nil] Parsed message or nil
|
414
|
+
def parse_responses_api_response(response)
|
415
|
+
data = response.body
|
416
|
+
|
417
|
+
# Handle nil or non-hash response body
|
418
|
+
unless data.is_a?(Hash)
|
419
|
+
log_parse_error("TypeError", "Expected response body to be Hash, got #{data.class}", data)
|
420
|
+
return
|
421
|
+
end
|
422
|
+
|
423
|
+
# Debug logging to see actual response structure
|
424
|
+
RubyLLM.logger.debug("SwarmSDK Responses API response: #{JSON.pretty_generate(data)}")
|
425
|
+
|
426
|
+
return if data.empty?
|
427
|
+
|
428
|
+
raise RubyLLM::Error.new(response, data.dig("error", "message")) if data.dig("error", "message")
|
429
|
+
|
430
|
+
# Capture response ID and timestamp for conversation state (if not disabled)
|
431
|
+
unless @disable_response_id
|
432
|
+
@last_response_id = data["id"]
|
433
|
+
@last_response_time = Time.now
|
434
|
+
@response_id_failures = 0 # Reset failure counter on success
|
435
|
+
RubyLLM.logger.debug("SwarmSDK captured response_id: #{@last_response_id} at #{@last_response_time}")
|
436
|
+
end
|
437
|
+
|
438
|
+
# Try different possible paths for the message data
|
439
|
+
message_data = extract_message_data(data)
|
440
|
+
|
441
|
+
RubyLLM.logger.debug("SwarmSDK extracted message_data: #{message_data.inspect} (class: #{message_data.class})")
|
442
|
+
|
443
|
+
return unless message_data
|
444
|
+
|
445
|
+
# Ensure message_data is a hash
|
446
|
+
unless message_data.is_a?(Hash)
|
447
|
+
RubyLLM.logger.error("SwarmSDK expected message_data to be Hash, got #{message_data.class}")
|
448
|
+
return
|
449
|
+
end
|
450
|
+
|
451
|
+
RubyLLM::Message.new(
|
452
|
+
role: :assistant,
|
453
|
+
content: message_data["content"] || "", # Provide empty string as fallback
|
454
|
+
tool_calls: parse_tool_calls(message_data["tool_calls"]),
|
455
|
+
input_tokens: extract_input_tokens(data),
|
456
|
+
output_tokens: extract_output_tokens(data),
|
457
|
+
model_id: data["model"],
|
458
|
+
raw: response,
|
459
|
+
)
|
460
|
+
end
|
461
|
+
|
462
|
+
# Extract message data from Responses API response
|
463
|
+
#
|
464
|
+
# The Responses API uses an 'output' array with different item types:
|
465
|
+
# - reasoning: Model's internal reasoning
|
466
|
+
# - function_call: Tool call to execute
|
467
|
+
# - message: Text response
|
468
|
+
#
|
469
|
+
# @param data [Hash] Response body
|
470
|
+
# @return [Hash] Message data synthesized from output array
|
471
|
+
def extract_message_data(data)
|
472
|
+
output = data["output"]
|
473
|
+
|
474
|
+
# If no output array, try fallback paths
|
475
|
+
unless output.is_a?(Array)
|
476
|
+
return data.dig("choices", 0, "message") || # Standard OpenAI format
|
477
|
+
data.dig("response") || # Another possible format
|
478
|
+
data.dig("message") # Direct message format
|
479
|
+
end
|
480
|
+
|
481
|
+
# Parse the output array to extract content and tool calls
|
482
|
+
content_parts = []
|
483
|
+
tool_calls = []
|
484
|
+
|
485
|
+
output.each do |item|
|
486
|
+
case item["type"]
|
487
|
+
when "message"
|
488
|
+
# Message contains a content array with typed items
|
489
|
+
if item["content"].is_a?(Array)
|
490
|
+
item["content"].each do |content_item|
|
491
|
+
case content_item["type"]
|
492
|
+
when "output_text"
|
493
|
+
content_parts << content_item["text"]
|
494
|
+
when "text"
|
495
|
+
content_parts << content_item["text"]
|
496
|
+
end
|
497
|
+
end
|
498
|
+
elsif item["content"].is_a?(String)
|
499
|
+
content_parts << item["content"]
|
500
|
+
elsif item["text"]
|
501
|
+
content_parts << item["text"]
|
502
|
+
end
|
503
|
+
when "function_call"
|
504
|
+
# Convert to RubyLLM tool call format
|
505
|
+
tool_calls << {
|
506
|
+
"id" => item["call_id"],
|
507
|
+
"type" => "function",
|
508
|
+
"function" => {
|
509
|
+
"name" => item["name"],
|
510
|
+
"arguments" => item["arguments"],
|
511
|
+
},
|
512
|
+
}
|
513
|
+
when "reasoning"
|
514
|
+
# Skip reasoning items (internal model thought process)
|
515
|
+
nil
|
516
|
+
end
|
517
|
+
end
|
518
|
+
|
519
|
+
# Synthesize a message data hash
|
520
|
+
{
|
521
|
+
"role" => "assistant",
|
522
|
+
"content" => content_parts.join("\n"),
|
523
|
+
"tool_calls" => tool_calls.empty? ? nil : tool_calls,
|
524
|
+
}
|
525
|
+
end
|
526
|
+
|
527
|
+
# Extract input tokens from various possible locations
|
528
|
+
#
|
529
|
+
# @param data [Hash] Response body
|
530
|
+
# @return [Integer] Input token count
|
531
|
+
def extract_input_tokens(data)
|
532
|
+
data.dig("usage", "prompt_tokens") ||
|
533
|
+
data.dig("usage", "input_tokens") ||
|
534
|
+
0
|
535
|
+
end
|
536
|
+
|
537
|
+
# Extract output tokens from various possible locations
|
538
|
+
#
|
539
|
+
# @param data [Hash] Response body
|
540
|
+
# @return [Integer] Output token count
|
541
|
+
def extract_output_tokens(data)
|
542
|
+
data.dig("usage", "completion_tokens") ||
|
543
|
+
data.dig("usage", "output_tokens") ||
|
544
|
+
0
|
545
|
+
end
|
546
|
+
|
547
|
+
# Check if we should retry with responses API after an error
|
548
|
+
#
|
549
|
+
# @param error [RubyLLM::Error] The error that occurred
|
550
|
+
# @return [Boolean] true if we should retry with responses API
|
551
|
+
def should_retry_with_responses_api?(error)
|
552
|
+
# Only retry if we haven't already tried responses API
|
553
|
+
return false if @use_responses_api
|
554
|
+
|
555
|
+
# Check if error message explicitly mentions responses API
|
556
|
+
error.message.include?("v1/responses") ||
|
557
|
+
error.message.include?("only supported in") && error.message.include?("responses")
|
558
|
+
end
|
559
|
+
|
560
|
+
# Log response parsing errors as JSON events through LogStream
|
561
|
+
#
|
562
|
+
# @param error_class [String] Error class name
|
563
|
+
# @param error_message [String] Error message
|
564
|
+
# @param response_body [Object] Response body that failed to parse
|
565
|
+
def log_parse_error(error_class, error_message, response_body)
|
566
|
+
if @agent_name
|
567
|
+
# Emit structured JSON log through LogStream
|
568
|
+
LogStream.emit(
|
569
|
+
type: "response_parse_error",
|
570
|
+
agent: @agent_name,
|
571
|
+
error_class: error_class,
|
572
|
+
error_message: error_message,
|
573
|
+
response_body: response_body.inspect,
|
574
|
+
)
|
575
|
+
else
|
576
|
+
# Fallback to RubyLLM logger if agent name not set
|
577
|
+
RubyLLM.logger.error("SwarmSDK: #{error_class}: #{error_message}\nResponse: #{response_body.inspect}")
|
578
|
+
end
|
579
|
+
end
|
580
|
+
end
|
581
|
+
end
|
582
|
+
end
|
@@ -0,0 +1,97 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module SwarmSDK
|
4
|
+
class Result
|
5
|
+
attr_reader :content, :agent, :cost, :tokens, :duration, :logs, :error, :metadata
|
6
|
+
|
7
|
+
def initialize(content: nil, agent:, cost: 0.0, tokens: {}, duration: 0.0, logs: [], error: nil, metadata: {})
|
8
|
+
@content = content
|
9
|
+
@agent = agent
|
10
|
+
@cost = cost
|
11
|
+
@tokens = tokens
|
12
|
+
@duration = duration
|
13
|
+
@logs = logs
|
14
|
+
@error = error
|
15
|
+
@metadata = metadata
|
16
|
+
end
|
17
|
+
|
18
|
+
def success?
|
19
|
+
@error.nil?
|
20
|
+
end
|
21
|
+
|
22
|
+
def failure?
|
23
|
+
!success?
|
24
|
+
end
|
25
|
+
|
26
|
+
def to_h
|
27
|
+
{
|
28
|
+
content: @content,
|
29
|
+
agent: @agent,
|
30
|
+
cost: @cost,
|
31
|
+
tokens: @tokens,
|
32
|
+
duration: @duration,
|
33
|
+
success: success?,
|
34
|
+
error: @error&.message,
|
35
|
+
metadata: @metadata,
|
36
|
+
}.compact
|
37
|
+
end
|
38
|
+
|
39
|
+
def to_json(*args)
|
40
|
+
to_h.to_json(*args)
|
41
|
+
end
|
42
|
+
|
43
|
+
# Calculate total cost across all LLM responses
|
44
|
+
#
|
45
|
+
# Cost accumulation works as follows:
|
46
|
+
# - Input cost: The LAST response's input_cost already includes the cost for the
|
47
|
+
# full conversation history (all previous messages + current context)
|
48
|
+
# - Output cost: Each response generates NEW tokens, so we SUM all output_costs
|
49
|
+
# - Total = Last input_cost + Sum of all output_costs
|
50
|
+
#
|
51
|
+
# IMPORTANT: Do NOT sum total_cost across all entries - that would count
|
52
|
+
# input costs multiple times since each call includes the full history!
|
53
|
+
def total_cost
|
54
|
+
entries_with_usage = @logs.select { |entry| entry.dig(:usage, :total_cost) }
|
55
|
+
return 0.0 if entries_with_usage.empty?
|
56
|
+
|
57
|
+
# Last entry's input cost (includes full conversation history)
|
58
|
+
last_input_cost = entries_with_usage.last.dig(:usage, :input_cost) || 0.0
|
59
|
+
|
60
|
+
# Sum all output costs (each response generates new tokens)
|
61
|
+
total_output_cost = entries_with_usage.sum { |entry| entry.dig(:usage, :output_cost) || 0.0 }
|
62
|
+
|
63
|
+
last_input_cost + total_output_cost
|
64
|
+
end
|
65
|
+
|
66
|
+
# Get total tokens from the last LLM response with cumulative tracking
|
67
|
+
#
|
68
|
+
# Token accumulation works as follows:
|
69
|
+
# - Input tokens: Each API call sends the full conversation history, so the latest
|
70
|
+
# response's cumulative_input_tokens already represents the full context
|
71
|
+
# - Output tokens: Each response generates new tokens, cumulative_output_tokens sums them
|
72
|
+
# - The cumulative_total_tokens in the last response already does this correctly
|
73
|
+
#
|
74
|
+
# IMPORTANT: Do NOT sum total_tokens across all log entries - that would count
|
75
|
+
# input tokens multiple times since each call includes the full history!
|
76
|
+
def total_tokens
|
77
|
+
last_entry = @logs.reverse.find { |entry| entry.dig(:usage, :cumulative_total_tokens) }
|
78
|
+
last_entry&.dig(:usage, :cumulative_total_tokens) || 0
|
79
|
+
end
|
80
|
+
|
81
|
+
# Get list of all agents involved in execution
|
82
|
+
def agents_involved
|
83
|
+
@logs.map { |entry| entry[:agent] }.compact.uniq.map(&:to_sym)
|
84
|
+
end
|
85
|
+
|
86
|
+
# Count total LLM requests made
|
87
|
+
# Each LLM API call produces either agent_step (tool calls) or agent_stop (final answer)
|
88
|
+
def llm_requests
|
89
|
+
@logs.count { |entry| entry[:type] == "agent_step" || entry[:type] == "agent_stop" }
|
90
|
+
end
|
91
|
+
|
92
|
+
# Count total tool calls made
|
93
|
+
def tool_calls_count
|
94
|
+
@logs.count { |entry| entry[:type] == "tool_call" }
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|