RubyGems - swarm_sdk - Versions diffs - 2.7.7 → 2.7.8 - Mend

swarm_sdk 2.7.7 → 2.7.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

checksums.yaml +4 -4
data/lib/swarm_sdk/agent/llm_instrumentation_middleware.rb +19 -19
data/lib/swarm_sdk/config.rb +1 -0
data/lib/swarm_sdk/defaults.rb +41 -0
data/lib/swarm_sdk/swarm/agent_initializer.rb +149 -24
data/lib/swarm_sdk/swarm/mcp_configurator.rb +29 -2
data/lib/swarm_sdk/version.rb +1 -1
metadata +1 -1

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: b83fd113813e0667b02d12c5ef6b9f4d47c134afa0665e2b6e482a6c9d3dacd7
-  data.tar.gz: 8a758e06817af7690c21fc446597b0e1a91b876643d20a4a111a62bc953d3d5e
+  metadata.gz: 28f97819b8742950ef0a081490c54c1ff25c5e69e6befde0e48af177160b42cf
+  data.tar.gz: d31dc35d85816fd10f02de92bf1f047278b2ad91c176cd3dc17d6f137ca7b73a
 SHA512:
-  metadata.gz: 5bfcfaf82ca105e6b1e1f04e009c703b641c25af6797ccd72c945a0c8dbbfbe60a0d783fc9e8d43322bbefac95b16123c4e7232c4ac9e1850a770d1120568344
-  data.tar.gz: 0aa16f8d6ebe0fd2acb4cb52baba570000c82c4607d015d3a4569d671f9743cb1199edd80953002b8dc0fb673fb464ebfc2d5d0c645f3afb50258c7451281304
+  metadata.gz: 44a557f8935a59242fe3a22bebdf0bd10492852c6a9c7b54124ab1496ced76fc75b3ad5d2fa53c190cf65de3f5332238aa49de96270f390bfad648cd520538a5
+  data.tar.gz: c52b1b1b502b7bf3113e68730cc1cfde970e022144bb6481cc7dfabf94cb5051a874cc7d5857b61f51a5f0ff92f71742f28d5672322dfbeb15cb369abcd57737

data/lib/swarm_sdk/agent/llm_instrumentation_middleware.rb CHANGED Viewed

@@ -52,23 +52,23 @@ module SwarmSDK
         # Execute request
         @app.call(env).on_complete do |response_env|
           end_time = Time.now
-          duration = end_time - start_time
+          # Determine if this was a streaming request based on whether chunks were accumulated
+          # This is more reliable than parsing response content
+          is_streaming = accumulated_raw_chunks.any?
           # For streaming: use accumulated raw SSE chunks
           # For non-streaming: use response body
-          raw_body = if accumulated_raw_chunks.any?
-            accumulated_raw_chunks.join
-          else
-            response_env.body
-          end
+          raw_body = is_streaming ? accumulated_raw_chunks.join : response_env.body
           # Store SSE body in Fiber-local for citation extraction
           # This allows append_citations_to_content to access the full SSE body
           # even though response.body is empty for streaming responses
-          Fiber[:last_sse_body] = raw_body if accumulated_raw_chunks.any?
+          Fiber[:last_sse_body] = raw_body if is_streaming
           # Emit response event
-          emit_response_event(response_env, start_time, end_time, duration, raw_body)
+          timing = { start_time: start_time, end_time: end_time, duration: end_time - start_time }
+          emit_response_event(response_env, timing, raw_body, is_streaming)
         end
       end
@@ -96,21 +96,17 @@ module SwarmSDK
       # Emit response event
       #
       # @param env [Faraday::Env] Response environment
-      # @param start_time [Time] Request start time
-      # @param end_time [Time] Request end time
-      # @param duration [Float] Request duration in seconds
+      # @param timing [Hash] Timing information with :start_time, :end_time, :duration keys
       # @param raw_body [String, nil] Raw response body (SSE stream for streaming, JSON for non-streaming)
+      # @param streaming [Boolean] Whether this was a streaming response (determined by chunk accumulation)
       # @return [void]
-      def emit_response_event(env, start_time, end_time, duration, raw_body)
-        # Detect if this is a streaming response (starts with "data:")
-        streaming = raw_body.is_a?(String) && raw_body.start_with?("data:")
+      def emit_response_event(env, timing, raw_body, streaming)
         response_data = {
           provider: @provider_name,
           body: parse_body(raw_body),
           streaming: streaming,
-          duration_seconds: duration.round(3),
-          timestamp: end_time.utc.iso8601,
+          duration_seconds: timing[:duration].round(3),
+          timestamp: timing[:end_time].utc.iso8601,
           status: env.status,
         }
@@ -166,6 +162,9 @@ module SwarmSDK
       # Parse request/response body
       #
+      # For requests: returns parsed JSON hash
+      # For responses: returns full body (JSON parsed or raw string for SSE)
+      #
       # @param body [String, Hash, nil] HTTP body
       # @return [Hash, String, nil] Parsed body
       def parse_body(body)
@@ -177,8 +176,9 @@ module SwarmSDK
         # Try to parse JSON
         JSON.parse(body)
       rescue JSON::ParserError
-        # Return truncated string if not JSON
-        body.to_s[0..1000]
+        # Return full body for SSE/non-JSON responses
+        # Don't truncate - let consumers decide how to handle large bodies
+        body.to_s
       rescue StandardError
         nil
       end

data/lib/swarm_sdk/config.rb CHANGED Viewed

@@ -92,6 +92,7 @@ module SwarmSDK
       mcp_log_level: ["SWARM_SDK_MCP_LOG_LEVEL", -> { Defaults::Logging::MCP_LOG_LEVEL }],
       default_execution_timeout: ["SWARM_SDK_DEFAULT_EXECUTION_TIMEOUT", -> { Defaults::Timeouts::EXECUTION_TIMEOUT_SECONDS }],
       default_turn_timeout: ["SWARM_SDK_DEFAULT_TURN_TIMEOUT", -> { Defaults::Timeouts::TURN_TIMEOUT_SECONDS }],
+      mcp_request_timeout: ["SWARM_SDK_MCP_REQUEST_TIMEOUT", -> { Defaults::Timeouts::MCP_REQUEST_SECONDS }],
     }.freeze
     # WebFetch and control settings

data/lib/swarm_sdk/defaults.rb CHANGED Viewed

@@ -94,6 +94,47 @@ module SwarmSDK
       # Time-to-live for cached response IDs. 5 minutes allows conversation
       # continuity while preventing stale cache issues.
       RESPONSES_API_TTL_SECONDS = 300
+      # MCP client request timeout (seconds)
+      #
+      # Default timeout for MCP server connections. 5 minutes accommodates
+      # long-running SSE streams and tool executions. This timeout applies to
+      # the entire operation (operation_timeout in HTTPX), so it must be long
+      # enough for SSE connections that may run for extended periods.
+      MCP_REQUEST_SECONDS = 300
+    end
+    # MCP reconnection configuration
+    #
+    # Settings for automatic reconnection when SSE/streamable connections drop.
+    # Note: The background SSE notification stream uses operation_timeout which
+    # limits total connection duration. Since this stream is meant to stay open
+    # indefinitely for server notifications, we configure aggressive reconnection
+    # so timeouts are transparent to users. Tool calls use separate connections
+    # and are unaffected by SSE stream timeouts.
+    module McpReconnection
+      # Maximum number of reconnection attempts
+      #
+      # Very high value (effectively infinite) because the SSE notification stream
+      # is expected to timeout periodically due to operation_timeout limitations.
+      # Reconnection is transparent - tool calls continue working regardless.
+      MAX_RETRIES = 1000
+      # Initial delay between reconnection attempts (milliseconds)
+      #
+      # Fast initial reconnect (500ms) to minimize notification gaps.
+      INITIAL_DELAY_MS = 500
+      # Exponential backoff growth factor
+      #
+      # Slow growth (1.2x) because we expect frequent reconnections.
+      # 500ms -> 600ms -> 720ms -> 864ms -> 1037ms -> ...
+      DELAY_GROW_FACTOR = 1.2
+      # Maximum delay between reconnection attempts (milliseconds)
+      #
+      # Caps at 10 seconds to ensure responsive reconnection even after many retries.
+      MAX_DELAY_MS = 10_000
     end
     # Output and content size limits

data/lib/swarm_sdk/swarm/agent_initializer.rb CHANGED Viewed

@@ -117,38 +117,92 @@ module SwarmSDK
       #
       # Agents that are ONLY delegates with shared_across_delegations: false
       # are NOT created here - they'll be created as delegation instances in pass 2a.
+      #
+      # Agent creation is parallelized using Async::Barrier for faster initialization.
       def pass_1_create_agents
         # Create plugin storages for agents
         create_plugin_storages
         tool_configurator = ToolConfigurator.new(@swarm, @swarm.scratchpad_storage, @swarm.plugin_storages)
-        @swarm.agent_definitions.each do |name, agent_definition|
-          # Skip if this agent will only exist as delegation instances
-          next if should_skip_primary_creation?(name, agent_definition)
+        # Filter agents that need primary creation
+        agents_to_create = @swarm.agent_definitions.reject do |name, agent_definition|
+          should_skip_primary_creation?(name, agent_definition)
+        end
+        # Create agents in parallel using Async::Barrier
+        results = create_agents_in_parallel(agents_to_create, tool_configurator)
-          chat = create_agent_chat(name, agent_definition, tool_configurator)
+        # Store results and notify plugins (sequential for safety)
+        results.each do |name, chat, agent_definition|
           @agents[name] = chat
-          # Notify plugins that agent was initialized
           notify_plugins_agent_initialized(name, chat, agent_definition, tool_configurator)
         end
       end
-      # Pass 2: Create delegation instances and wire delegation tools
+      # Create multiple agents in parallel using Async fibers
       #
-      # This pass has three sub-steps that must happen in order:
-      # 2a. Create delegation instances (ONLY for agents with shared_across_delegations: false)
-      # 2b. Wire primary agents to delegation instances OR shared primaries
-      # 2c. Wire delegation instances to their delegates (nested delegation support)
-      def pass_2_register_delegation_tools
-        tool_configurator = ToolConfigurator.new(@swarm, @swarm.scratchpad_storage, @swarm.plugin_storages)
+      # @param agents_to_create [Hash] Hash of { name => agent_definition }
+      # @param tool_configurator [ToolConfigurator] Shared tool configurator
+      # @return [Array<Array>] Array of [name, chat, agent_definition] tuples
+      def create_agents_in_parallel(agents_to_create, tool_configurator)
+        return [] if agents_to_create.empty?
+        results = []
+        errors = []
+        mutex = Mutex.new
+        Sync do
+          barrier = Async::Barrier.new
+          agents_to_create.each do |name, agent_definition|
+            barrier.async do
+              chat = create_agent_chat(name, agent_definition, tool_configurator)
+              mutex.synchronize { results << [name, chat, agent_definition] }
+            rescue StandardError => e
+              # Catch errors to avoid Async warning logs (which fail in tests with StringIO)
+              mutex.synchronize { errors << [name, e] }
+            end
+          end
+          barrier.wait
+        end
+        # Re-raise first error if any occurred
+        unless errors.empty?
+          # Emit events for all errors (not just the first)
+          errors.each do |agent_name, err|
+            LogStream.emit(
+              type: "agent_initialization_error",
+              agent: agent_name,
+              error_class: err.class.name,
+              error_message: err.message,
+              timestamp: Time.now.utc.iso8601,
+            )
+          end
+          # Re-raise first error with context
+          name, error = errors.first
+          raise error.class, "Agent '#{name}' initialization failed: #{error.message}", error.backtrace
+        end
+        results
+      end
+      # Collect all delegation instances that need to be created
+      #
+      # Validates delegation configs and returns a list of instances to create.
+      # This is done sequentially to fail fast on configuration errors.
+      #
+      # @return [Array<Hash>] Array of { instance_name:, base_name:, definition: }
+      def collect_delegation_instances_to_create
+        instances = []
-        # Sub-pass 2a: Create delegation instances for isolated agents
         @swarm.agent_definitions.each do |delegator_name, delegator_def|
           delegator_def.delegation_configs.each do |delegation_config|
             delegate_base_name = delegation_config[:agent]
+            # Validate delegate exists
             unless @swarm.agent_definitions.key?(delegate_base_name)
               raise ConfigurationError,
                 "Agent '#{delegator_name}' delegates to unknown agent '#{delegate_base_name}'"
@@ -156,24 +210,95 @@ module SwarmSDK
             delegate_definition = @swarm.agent_definitions[delegate_base_name]
-            # Check isolation mode of the DELEGATE agent
-            # If delegate wants to be shared, skip instance creation (use primary)
+            # Skip if delegate wants to be shared (use primary instead)
             next if delegate_definition.shared_across_delegations
-            # Create unique delegation instance (isolated mode)
             instance_name = "#{delegate_base_name}@#{delegator_name}"
-            # V7.0: Use existing register_all_tools (no new method needed!)
-            delegation_chat = create_agent_chat_for_delegation(
+            instances << {
               instance_name: instance_name,
               base_name: delegate_base_name,
-              agent_definition: delegate_definition,
-              tool_configurator: tool_configurator,
-            )
+              definition: delegate_definition,
+            }
+          end
+        end
+        instances
+      end
+      # Create multiple delegation instances in parallel using Async fibers
+      #
+      # @param instances_to_create [Array<Hash>] Array of instance configs
+      # @param tool_configurator [ToolConfigurator] Shared tool configurator
+      # @return [Array<Array>] Array of [instance_name, chat] tuples
+      def create_delegation_instances_in_parallel(instances_to_create, tool_configurator)
+        return [] if instances_to_create.empty?
+        results = []
+        errors = []
+        mutex = Mutex.new
+        Sync do
+          barrier = Async::Barrier.new
+          instances_to_create.each do |config|
+            barrier.async do
+              delegation_chat = create_agent_chat_for_delegation(
+                instance_name: config[:instance_name],
+                base_name: config[:base_name],
+                agent_definition: config[:definition],
+                tool_configurator: tool_configurator,
+              )
+              mutex.synchronize { results << [config[:instance_name], delegation_chat] }
+            rescue StandardError => e
+              # Catch errors to avoid Async warning logs (which fail in tests with StringIO)
+              mutex.synchronize { errors << [config[:instance_name], e] }
+            end
+          end
+          barrier.wait
+        end
-            # Store in delegation_instances hash
-            @swarm.delegation_instances[instance_name] = delegation_chat
+        # Re-raise first error if any occurred
+        unless errors.empty?
+          # Emit events for all errors (not just the first)
+          errors.each do |inst_name, err|
+            LogStream.emit(
+              type: "delegation_instance_initialization_error",
+              instance_name: inst_name,
+              error_class: err.class.name,
+              error_message: err.message,
+              timestamp: Time.now.utc.iso8601,
+            )
           end
+          # Re-raise first error with context
+          instance_name, error = errors.first
+          raise error.class, "Delegation instance '#{instance_name}' initialization failed: #{error.message}", error.backtrace
+        end
+        results
+      end
+      # Pass 2: Create delegation instances and wire delegation tools
+      #
+      # This pass has three sub-steps that must happen in order:
+      # 2a. Create delegation instances (ONLY for agents with shared_across_delegations: false)
+      # 2b. Wire primary agents to delegation instances OR shared primaries
+      # 2c. Wire delegation instances to their delegates (nested delegation support)
+      #
+      # Sub-pass 2a is parallelized using Async::Barrier for faster initialization.
+      def pass_2_register_delegation_tools
+        tool_configurator = ToolConfigurator.new(@swarm, @swarm.scratchpad_storage, @swarm.plugin_storages)
+        # Sub-pass 2a: Create delegation instances for isolated agents (parallelized)
+        delegation_instances_to_create = collect_delegation_instances_to_create
+        results = create_delegation_instances_in_parallel(delegation_instances_to_create, tool_configurator)
+        # Store results after all parallel creation completes
+        results.each do |instance_name, delegation_chat|
+          @swarm.delegation_instances[instance_name] = delegation_chat
         end
         # Sub-pass 2b: Wire primary agents to delegation instances OR shared primaries OR registered swarms

data/lib/swarm_sdk/swarm/mcp_configurator.rb CHANGED Viewed

@@ -130,7 +130,8 @@ module SwarmSDK
       # @return [RubyLLM::MCP::Client] Initialized MCP client
       def initialize_mcp_client(config)
         # Convert timeout from seconds to milliseconds
-        timeout_seconds = config[:timeout] || 30
+        # Use explicit config[:timeout] if provided, otherwise use global default
+        timeout_seconds = config[:timeout] || SwarmSDK.config.mcp_request_timeout
         timeout_ms = timeout_seconds * 1000
         # Determine transport type
@@ -179,11 +180,16 @@ module SwarmSDK
       # @param config [Hash] MCP server configuration
       # @return [Hash] SSE configuration
       def build_sse_config(config)
-        {
+        sse_config = {
           url: config[:url],
           headers: config[:headers] || {},
           version: config[:version]&.to_sym || :http2,
         }
+        # Add reconnection options for resilient SSE connections
+        sse_config[:reconnection] = build_reconnection_options(config)
+        sse_config
       end
       # Build streamable (HTTP) transport configuration
@@ -200,9 +206,30 @@ module SwarmSDK
         # Only include rate_limit if present
         streamable_config[:rate_limit] = config[:rate_limit] if config[:rate_limit]
+        # Add reconnection options for resilient streamable connections
+        streamable_config[:reconnection] = build_reconnection_options(config)
         streamable_config
       end
+      # Build reconnection options from config or defaults
+      #
+      # Provides exponential backoff reconnection for SSE/streamable transports.
+      # Can be customized per-server or uses global defaults.
+      #
+      # @param config [Hash] MCP server configuration
+      # @return [Hash] Reconnection options
+      def build_reconnection_options(config)
+        reconnection_config = config[:reconnection] || {}
+        {
+          max_retries: reconnection_config[:max_retries] || Defaults::McpReconnection::MAX_RETRIES,
+          initial_reconnection_delay: reconnection_config[:initial_delay] || Defaults::McpReconnection::INITIAL_DELAY_MS,
+          reconnection_delay_grow_factor: reconnection_config[:delay_grow_factor] || Defaults::McpReconnection::DELAY_GROW_FACTOR,
+          max_reconnection_delay: reconnection_config[:max_delay] || Defaults::McpReconnection::MAX_DELAY_MS,
+        }
+      end
       # Emit MCP server initialization start event
       #
       # @param agent_name [Symbol] Agent name

data/lib/swarm_sdk/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module SwarmSDK
-  VERSION = "2.7.7"
+  VERSION = "2.7.8"
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: swarm_sdk
 version: !ruby/object:Gem::Version
-  version: 2.7.7
+  version: 2.7.8
 platform: ruby
 authors:
 - Paulo Arruda