swarm_sdk 2.7.7 → 2.7.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b83fd113813e0667b02d12c5ef6b9f4d47c134afa0665e2b6e482a6c9d3dacd7
4
- data.tar.gz: 8a758e06817af7690c21fc446597b0e1a91b876643d20a4a111a62bc953d3d5e
3
+ metadata.gz: 28f97819b8742950ef0a081490c54c1ff25c5e69e6befde0e48af177160b42cf
4
+ data.tar.gz: d31dc35d85816fd10f02de92bf1f047278b2ad91c176cd3dc17d6f137ca7b73a
5
5
  SHA512:
6
- metadata.gz: 5bfcfaf82ca105e6b1e1f04e009c703b641c25af6797ccd72c945a0c8dbbfbe60a0d783fc9e8d43322bbefac95b16123c4e7232c4ac9e1850a770d1120568344
7
- data.tar.gz: 0aa16f8d6ebe0fd2acb4cb52baba570000c82c4607d015d3a4569d671f9743cb1199edd80953002b8dc0fb673fb464ebfc2d5d0c645f3afb50258c7451281304
6
+ metadata.gz: 44a557f8935a59242fe3a22bebdf0bd10492852c6a9c7b54124ab1496ced76fc75b3ad5d2fa53c190cf65de3f5332238aa49de96270f390bfad648cd520538a5
7
+ data.tar.gz: c52b1b1b502b7bf3113e68730cc1cfde970e022144bb6481cc7dfabf94cb5051a874cc7d5857b61f51a5f0ff92f71742f28d5672322dfbeb15cb369abcd57737
@@ -52,23 +52,23 @@ module SwarmSDK
52
52
  # Execute request
53
53
  @app.call(env).on_complete do |response_env|
54
54
  end_time = Time.now
55
- duration = end_time - start_time
55
+
56
+ # Determine if this was a streaming request based on whether chunks were accumulated
57
+ # This is more reliable than parsing response content
58
+ is_streaming = accumulated_raw_chunks.any?
56
59
 
57
60
  # For streaming: use accumulated raw SSE chunks
58
61
  # For non-streaming: use response body
59
- raw_body = if accumulated_raw_chunks.any?
60
- accumulated_raw_chunks.join
61
- else
62
- response_env.body
63
- end
62
+ raw_body = is_streaming ? accumulated_raw_chunks.join : response_env.body
64
63
 
65
64
  # Store SSE body in Fiber-local for citation extraction
66
65
  # This allows append_citations_to_content to access the full SSE body
67
66
  # even though response.body is empty for streaming responses
68
- Fiber[:last_sse_body] = raw_body if accumulated_raw_chunks.any?
67
+ Fiber[:last_sse_body] = raw_body if is_streaming
69
68
 
70
69
  # Emit response event
71
- emit_response_event(response_env, start_time, end_time, duration, raw_body)
70
+ timing = { start_time: start_time, end_time: end_time, duration: end_time - start_time }
71
+ emit_response_event(response_env, timing, raw_body, is_streaming)
72
72
  end
73
73
  end
74
74
 
@@ -96,21 +96,17 @@ module SwarmSDK
96
96
  # Emit response event
97
97
  #
98
98
  # @param env [Faraday::Env] Response environment
99
- # @param start_time [Time] Request start time
100
- # @param end_time [Time] Request end time
101
- # @param duration [Float] Request duration in seconds
99
+ # @param timing [Hash] Timing information with :start_time, :end_time, :duration keys
102
100
  # @param raw_body [String, nil] Raw response body (SSE stream for streaming, JSON for non-streaming)
101
+ # @param streaming [Boolean] Whether this was a streaming response (determined by chunk accumulation)
103
102
  # @return [void]
104
- def emit_response_event(env, start_time, end_time, duration, raw_body)
105
- # Detect if this is a streaming response (starts with "data:")
106
- streaming = raw_body.is_a?(String) && raw_body.start_with?("data:")
107
-
103
+ def emit_response_event(env, timing, raw_body, streaming)
108
104
  response_data = {
109
105
  provider: @provider_name,
110
106
  body: parse_body(raw_body),
111
107
  streaming: streaming,
112
- duration_seconds: duration.round(3),
113
- timestamp: end_time.utc.iso8601,
108
+ duration_seconds: timing[:duration].round(3),
109
+ timestamp: timing[:end_time].utc.iso8601,
114
110
  status: env.status,
115
111
  }
116
112
 
@@ -166,6 +162,9 @@ module SwarmSDK
166
162
 
167
163
  # Parse request/response body
168
164
  #
165
+ # For requests: returns parsed JSON hash
166
+ # For responses: returns full body (JSON parsed or raw string for SSE)
167
+ #
169
168
  # @param body [String, Hash, nil] HTTP body
170
169
  # @return [Hash, String, nil] Parsed body
171
170
  def parse_body(body)
@@ -177,8 +176,9 @@ module SwarmSDK
177
176
  # Try to parse JSON
178
177
  JSON.parse(body)
179
178
  rescue JSON::ParserError
180
- # Return truncated string if not JSON
181
- body.to_s[0..1000]
179
+ # Return full body for SSE/non-JSON responses
180
+ # Don't truncate - let consumers decide how to handle large bodies
181
+ body.to_s
182
182
  rescue StandardError
183
183
  nil
184
184
  end
@@ -92,6 +92,7 @@ module SwarmSDK
92
92
  mcp_log_level: ["SWARM_SDK_MCP_LOG_LEVEL", -> { Defaults::Logging::MCP_LOG_LEVEL }],
93
93
  default_execution_timeout: ["SWARM_SDK_DEFAULT_EXECUTION_TIMEOUT", -> { Defaults::Timeouts::EXECUTION_TIMEOUT_SECONDS }],
94
94
  default_turn_timeout: ["SWARM_SDK_DEFAULT_TURN_TIMEOUT", -> { Defaults::Timeouts::TURN_TIMEOUT_SECONDS }],
95
+ mcp_request_timeout: ["SWARM_SDK_MCP_REQUEST_TIMEOUT", -> { Defaults::Timeouts::MCP_REQUEST_SECONDS }],
95
96
  }.freeze
96
97
 
97
98
  # WebFetch and control settings
@@ -94,6 +94,47 @@ module SwarmSDK
94
94
  # Time-to-live for cached response IDs. 5 minutes allows conversation
95
95
  # continuity while preventing stale cache issues.
96
96
  RESPONSES_API_TTL_SECONDS = 300
97
+
98
+ # MCP client request timeout (seconds)
99
+ #
100
+ # Default timeout for MCP server connections. 5 minutes accommodates
101
+ # long-running SSE streams and tool executions. This timeout applies to
102
+ # the entire operation (operation_timeout in HTTPX), so it must be long
103
+ # enough for SSE connections that may run for extended periods.
104
+ MCP_REQUEST_SECONDS = 300
105
+ end
106
+
107
+ # MCP reconnection configuration
108
+ #
109
+ # Settings for automatic reconnection when SSE/streamable connections drop.
110
+ # Note: The background SSE notification stream uses operation_timeout which
111
+ # limits total connection duration. Since this stream is meant to stay open
112
+ # indefinitely for server notifications, we configure aggressive reconnection
113
+ # so timeouts are transparent to users. Tool calls use separate connections
114
+ # and are unaffected by SSE stream timeouts.
115
+ module McpReconnection
116
+ # Maximum number of reconnection attempts
117
+ #
118
+ # Very high value (effectively infinite) because the SSE notification stream
119
+ # is expected to timeout periodically due to operation_timeout limitations.
120
+ # Reconnection is transparent - tool calls continue working regardless.
121
+ MAX_RETRIES = 1000
122
+
123
+ # Initial delay between reconnection attempts (milliseconds)
124
+ #
125
+ # Fast initial reconnect (500ms) to minimize notification gaps.
126
+ INITIAL_DELAY_MS = 500
127
+
128
+ # Exponential backoff growth factor
129
+ #
130
+ # Slow growth (1.2x) because we expect frequent reconnections.
131
+ # 500ms -> 600ms -> 720ms -> 864ms -> 1037ms -> ...
132
+ DELAY_GROW_FACTOR = 1.2
133
+
134
+ # Maximum delay between reconnection attempts (milliseconds)
135
+ #
136
+ # Caps at 10 seconds to ensure responsive reconnection even after many retries.
137
+ MAX_DELAY_MS = 10_000
97
138
  end
98
139
 
99
140
  # Output and content size limits
@@ -117,38 +117,92 @@ module SwarmSDK
117
117
  #
118
118
  # Agents that are ONLY delegates with shared_across_delegations: false
119
119
  # are NOT created here - they'll be created as delegation instances in pass 2a.
120
+ #
121
+ # Agent creation is parallelized using Async::Barrier for faster initialization.
120
122
  def pass_1_create_agents
121
123
  # Create plugin storages for agents
122
124
  create_plugin_storages
123
125
 
124
126
  tool_configurator = ToolConfigurator.new(@swarm, @swarm.scratchpad_storage, @swarm.plugin_storages)
125
127
 
126
- @swarm.agent_definitions.each do |name, agent_definition|
127
- # Skip if this agent will only exist as delegation instances
128
- next if should_skip_primary_creation?(name, agent_definition)
128
+ # Filter agents that need primary creation
129
+ agents_to_create = @swarm.agent_definitions.reject do |name, agent_definition|
130
+ should_skip_primary_creation?(name, agent_definition)
131
+ end
132
+
133
+ # Create agents in parallel using Async::Barrier
134
+ results = create_agents_in_parallel(agents_to_create, tool_configurator)
129
135
 
130
- chat = create_agent_chat(name, agent_definition, tool_configurator)
136
+ # Store results and notify plugins (sequential for safety)
137
+ results.each do |name, chat, agent_definition|
131
138
  @agents[name] = chat
132
-
133
- # Notify plugins that agent was initialized
134
139
  notify_plugins_agent_initialized(name, chat, agent_definition, tool_configurator)
135
140
  end
136
141
  end
137
142
 
138
- # Pass 2: Create delegation instances and wire delegation tools
143
+ # Create multiple agents in parallel using Async fibers
139
144
  #
140
- # This pass has three sub-steps that must happen in order:
141
- # 2a. Create delegation instances (ONLY for agents with shared_across_delegations: false)
142
- # 2b. Wire primary agents to delegation instances OR shared primaries
143
- # 2c. Wire delegation instances to their delegates (nested delegation support)
144
- def pass_2_register_delegation_tools
145
- tool_configurator = ToolConfigurator.new(@swarm, @swarm.scratchpad_storage, @swarm.plugin_storages)
145
+ # @param agents_to_create [Hash] Hash of { name => agent_definition }
146
+ # @param tool_configurator [ToolConfigurator] Shared tool configurator
147
+ # @return [Array<Array>] Array of [name, chat, agent_definition] tuples
148
+ def create_agents_in_parallel(agents_to_create, tool_configurator)
149
+ return [] if agents_to_create.empty?
150
+
151
+ results = []
152
+ errors = []
153
+ mutex = Mutex.new
154
+
155
+ Sync do
156
+ barrier = Async::Barrier.new
157
+
158
+ agents_to_create.each do |name, agent_definition|
159
+ barrier.async do
160
+ chat = create_agent_chat(name, agent_definition, tool_configurator)
161
+ mutex.synchronize { results << [name, chat, agent_definition] }
162
+ rescue StandardError => e
163
+ # Catch errors to avoid Async warning logs (which fail in tests with StringIO)
164
+ mutex.synchronize { errors << [name, e] }
165
+ end
166
+ end
167
+
168
+ barrier.wait
169
+ end
170
+
171
+ # Re-raise first error if any occurred
172
+ unless errors.empty?
173
+ # Emit events for all errors (not just the first)
174
+ errors.each do |agent_name, err|
175
+ LogStream.emit(
176
+ type: "agent_initialization_error",
177
+ agent: agent_name,
178
+ error_class: err.class.name,
179
+ error_message: err.message,
180
+ timestamp: Time.now.utc.iso8601,
181
+ )
182
+ end
183
+
184
+ # Re-raise first error with context
185
+ name, error = errors.first
186
+ raise error.class, "Agent '#{name}' initialization failed: #{error.message}", error.backtrace
187
+ end
188
+
189
+ results
190
+ end
191
+
192
+ # Collect all delegation instances that need to be created
193
+ #
194
+ # Validates delegation configs and returns a list of instances to create.
195
+ # This is done sequentially to fail fast on configuration errors.
196
+ #
197
+ # @return [Array<Hash>] Array of { instance_name:, base_name:, definition: }
198
+ def collect_delegation_instances_to_create
199
+ instances = []
146
200
 
147
- # Sub-pass 2a: Create delegation instances for isolated agents
148
201
  @swarm.agent_definitions.each do |delegator_name, delegator_def|
149
202
  delegator_def.delegation_configs.each do |delegation_config|
150
203
  delegate_base_name = delegation_config[:agent]
151
204
 
205
+ # Validate delegate exists
152
206
  unless @swarm.agent_definitions.key?(delegate_base_name)
153
207
  raise ConfigurationError,
154
208
  "Agent '#{delegator_name}' delegates to unknown agent '#{delegate_base_name}'"
@@ -156,24 +210,95 @@ module SwarmSDK
156
210
 
157
211
  delegate_definition = @swarm.agent_definitions[delegate_base_name]
158
212
 
159
- # Check isolation mode of the DELEGATE agent
160
- # If delegate wants to be shared, skip instance creation (use primary)
213
+ # Skip if delegate wants to be shared (use primary instead)
161
214
  next if delegate_definition.shared_across_delegations
162
215
 
163
- # Create unique delegation instance (isolated mode)
164
216
  instance_name = "#{delegate_base_name}@#{delegator_name}"
165
217
 
166
- # V7.0: Use existing register_all_tools (no new method needed!)
167
- delegation_chat = create_agent_chat_for_delegation(
218
+ instances << {
168
219
  instance_name: instance_name,
169
220
  base_name: delegate_base_name,
170
- agent_definition: delegate_definition,
171
- tool_configurator: tool_configurator,
172
- )
221
+ definition: delegate_definition,
222
+ }
223
+ end
224
+ end
225
+
226
+ instances
227
+ end
228
+
229
+ # Create multiple delegation instances in parallel using Async fibers
230
+ #
231
+ # @param instances_to_create [Array<Hash>] Array of instance configs
232
+ # @param tool_configurator [ToolConfigurator] Shared tool configurator
233
+ # @return [Array<Array>] Array of [instance_name, chat] tuples
234
+ def create_delegation_instances_in_parallel(instances_to_create, tool_configurator)
235
+ return [] if instances_to_create.empty?
236
+
237
+ results = []
238
+ errors = []
239
+ mutex = Mutex.new
240
+
241
+ Sync do
242
+ barrier = Async::Barrier.new
243
+
244
+ instances_to_create.each do |config|
245
+ barrier.async do
246
+ delegation_chat = create_agent_chat_for_delegation(
247
+ instance_name: config[:instance_name],
248
+ base_name: config[:base_name],
249
+ agent_definition: config[:definition],
250
+ tool_configurator: tool_configurator,
251
+ )
252
+ mutex.synchronize { results << [config[:instance_name], delegation_chat] }
253
+ rescue StandardError => e
254
+ # Catch errors to avoid Async warning logs (which fail in tests with StringIO)
255
+ mutex.synchronize { errors << [config[:instance_name], e] }
256
+ end
257
+ end
258
+
259
+ barrier.wait
260
+ end
173
261
 
174
- # Store in delegation_instances hash
175
- @swarm.delegation_instances[instance_name] = delegation_chat
262
+ # Re-raise first error if any occurred
263
+ unless errors.empty?
264
+ # Emit events for all errors (not just the first)
265
+ errors.each do |inst_name, err|
266
+ LogStream.emit(
267
+ type: "delegation_instance_initialization_error",
268
+ instance_name: inst_name,
269
+ error_class: err.class.name,
270
+ error_message: err.message,
271
+ timestamp: Time.now.utc.iso8601,
272
+ )
176
273
  end
274
+
275
+ # Re-raise first error with context
276
+ instance_name, error = errors.first
277
+ raise error.class, "Delegation instance '#{instance_name}' initialization failed: #{error.message}", error.backtrace
278
+ end
279
+
280
+ results
281
+ end
282
+
283
+ # Pass 2: Create delegation instances and wire delegation tools
284
+ #
285
+ # This pass has three sub-steps that must happen in order:
286
+ # 2a. Create delegation instances (ONLY for agents with shared_across_delegations: false)
287
+ # 2b. Wire primary agents to delegation instances OR shared primaries
288
+ # 2c. Wire delegation instances to their delegates (nested delegation support)
289
+ #
290
+ # Sub-pass 2a is parallelized using Async::Barrier for faster initialization.
291
+ def pass_2_register_delegation_tools
292
+ tool_configurator = ToolConfigurator.new(@swarm, @swarm.scratchpad_storage, @swarm.plugin_storages)
293
+
294
+ # Sub-pass 2a: Create delegation instances for isolated agents (parallelized)
295
+ delegation_instances_to_create = collect_delegation_instances_to_create
296
+
297
+ results = create_delegation_instances_in_parallel(delegation_instances_to_create, tool_configurator)
298
+
299
+ # Store results after all parallel creation completes
300
+ results.each do |instance_name, delegation_chat|
301
+ @swarm.delegation_instances[instance_name] = delegation_chat
177
302
  end
178
303
 
179
304
  # Sub-pass 2b: Wire primary agents to delegation instances OR shared primaries OR registered swarms
@@ -130,7 +130,8 @@ module SwarmSDK
130
130
  # @return [RubyLLM::MCP::Client] Initialized MCP client
131
131
  def initialize_mcp_client(config)
132
132
  # Convert timeout from seconds to milliseconds
133
- timeout_seconds = config[:timeout] || 30
133
+ # Use explicit config[:timeout] if provided, otherwise use global default
134
+ timeout_seconds = config[:timeout] || SwarmSDK.config.mcp_request_timeout
134
135
  timeout_ms = timeout_seconds * 1000
135
136
 
136
137
  # Determine transport type
@@ -179,11 +180,16 @@ module SwarmSDK
179
180
  # @param config [Hash] MCP server configuration
180
181
  # @return [Hash] SSE configuration
181
182
  def build_sse_config(config)
182
- {
183
+ sse_config = {
183
184
  url: config[:url],
184
185
  headers: config[:headers] || {},
185
186
  version: config[:version]&.to_sym || :http2,
186
187
  }
188
+
189
+ # Add reconnection options for resilient SSE connections
190
+ sse_config[:reconnection] = build_reconnection_options(config)
191
+
192
+ sse_config
187
193
  end
188
194
 
189
195
  # Build streamable (HTTP) transport configuration
@@ -200,9 +206,30 @@ module SwarmSDK
200
206
  # Only include rate_limit if present
201
207
  streamable_config[:rate_limit] = config[:rate_limit] if config[:rate_limit]
202
208
 
209
+ # Add reconnection options for resilient streamable connections
210
+ streamable_config[:reconnection] = build_reconnection_options(config)
211
+
203
212
  streamable_config
204
213
  end
205
214
 
215
+ # Build reconnection options from config or defaults
216
+ #
217
+ # Provides exponential backoff reconnection for SSE/streamable transports.
218
+ # Can be customized per-server or uses global defaults.
219
+ #
220
+ # @param config [Hash] MCP server configuration
221
+ # @return [Hash] Reconnection options
222
+ def build_reconnection_options(config)
223
+ reconnection_config = config[:reconnection] || {}
224
+
225
+ {
226
+ max_retries: reconnection_config[:max_retries] || Defaults::McpReconnection::MAX_RETRIES,
227
+ initial_reconnection_delay: reconnection_config[:initial_delay] || Defaults::McpReconnection::INITIAL_DELAY_MS,
228
+ reconnection_delay_grow_factor: reconnection_config[:delay_grow_factor] || Defaults::McpReconnection::DELAY_GROW_FACTOR,
229
+ max_reconnection_delay: reconnection_config[:max_delay] || Defaults::McpReconnection::MAX_DELAY_MS,
230
+ }
231
+ end
232
+
206
233
  # Emit MCP server initialization start event
207
234
  #
208
235
  # @param agent_name [Symbol] Agent name
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module SwarmSDK
4
- VERSION = "2.7.7"
4
+ VERSION = "2.7.8"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: swarm_sdk
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.7.7
4
+ version: 2.7.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Paulo Arruda