swarm_sdk 2.5.4 → 2.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4729a555c9f839d1c507a4353c74d522cfe21b8fdf50a7727d6ee078c89609e6
4
- data.tar.gz: f21e5971305b0011f924861afc3738e30f3517e25da1ba2e1b26ad3b9052ccca
3
+ metadata.gz: 6a4f3f78d9207417839e82926b9960b56f6217cf030db8c741f8ef01111c7866
4
+ data.tar.gz: 49b9bb1af8c79090dcb061dc519e9c9c286829f695feb02385be1a44082d8f24
5
5
  SHA512:
6
- metadata.gz: 3117334f14af1d526b949b9a21d20b5bd2098a34b06aaf1dfde2499a98b94ec8e284269eeff2030856d4a6fa1ad3fe37d3cd05f6121c57495c7a11baf217d804
7
- data.tar.gz: e076f6ccde790b5a5b209cd2c46e7d534e7c45a703338f306c191a131cb8e40eb5c40e064f3410e5a897e6d654f586bb89a641404910604c90f40e48d0b2472f
6
+ metadata.gz: 4760b9d3c722515a641c28da4a0575b8695074fcf8de8ccea0f71a14f23e1f53bfab6eec846132318fe159f253f7d70c166d36aaf06eb13c437043eb1d3bf97d
7
+ data.tar.gz: 60b24194187ab0a14e532e44b06c47e562fbf6edea5e35f369823bbf697558f8dbd4705ed3c91aea8d061573eb21d45651c6d665a46e9c3121109e7cd1728f30
@@ -712,17 +712,64 @@ module SwarmSDK
712
712
 
713
713
  # --- LLM Call Retry Logic ---
714
714
 
715
- # Call LLM provider with retry logic for transient failures
715
+ # Call LLM provider with smart retry logic based on error type
716
716
  #
717
- # Includes special handling for 400 Bad Request errors:
717
+ # ## Error Categorization
718
+ #
719
+ # **Non-Retryable Client Errors (4xx)**: Return error message immediately
720
+ # - 400 Bad Request (after orphan tool call recovery attempt)
721
+ # - 401 Unauthorized (invalid API key)
722
+ # - 402 Payment Required (billing issue)
723
+ # - 403 Forbidden (permission denied)
724
+ # - 422 Unprocessable Entity (invalid parameters)
725
+ # - Other 4xx errors
726
+ #
727
+ # **Retryable Server Errors (5xx)**: Retry with delays
728
+ # - 429 Rate Limit (RubyLLM already retried 3x)
729
+ # - 500 Server Error (RubyLLM already retried 3x)
730
+ # - 502-503 Service Unavailable (RubyLLM already retried 3x)
731
+ # - 529 Overloaded (RubyLLM already retried 3x)
732
+ # Note: If we see these errors, RubyLLM has already tried 3 times
733
+ #
734
+ # **Network Errors**: Retry with delays
735
+ # - Timeouts, connection failures, etc.
736
+ #
737
+ # ## Special Handling
738
+ #
739
+ # **400 Bad Request with Orphan Tool Calls**:
718
740
  # - Attempts to prune orphan tool calls (tool_use without tool_result)
719
741
  # - If pruning succeeds, retries immediately without counting as retry
742
+ # - If pruning fails or not applicable, returns error message immediately
720
743
  #
721
- # @param max_retries [Integer] Maximum retry attempts
744
+ # ## Error Response Format
745
+ #
746
+ # Non-retryable errors return as assistant messages for natural delegation flow:
747
+ # ```ruby
748
+ # RubyLLM::Message.new(
749
+ # role: :assistant,
750
+ # content: "I encountered an error: [details]"
751
+ # )
752
+ # ```
753
+ #
754
+ # @param max_retries [Integer] Maximum retry attempts at SDK level
755
+ # Note: RubyLLM already retries 429/5xx errors 3 times before this
722
756
  # @param delay [Integer] Delay between retries in seconds
723
757
  # @yield Block that performs the LLM call
724
- # @return [Object] Result from block
725
- def call_llm_with_retry(max_retries: 10, delay: 10, &block)
758
+ # @return [RubyLLM::Message, Object] Result from block or error message
759
+ #
760
+ # @example Handling 401 Unauthorized
761
+ # result = call_llm_with_retry do
762
+ # @llm_chat.complete
763
+ # end
764
+ # # Returns immediately: Message with "Unauthorized" error
765
+ #
766
+ # @example Handling 500 Server Error
767
+ # result = call_llm_with_retry(max_retries: 3, delay: 15) do
768
+ # @llm_chat.complete
769
+ # end
770
+ # # Retries up to 3 times with 15s delays
771
+ # # (RubyLLM already tried 3x, so 6 total attempts)
772
+ def call_llm_with_retry(max_retries: 3, delay: 15, &block)
726
773
  attempts = 0
727
774
  pruning_attempted = false
728
775
 
@@ -731,22 +778,68 @@ module SwarmSDK
731
778
 
732
779
  begin
733
780
  return yield
781
+
782
+ # === CATEGORY A: NON-RETRYABLE CLIENT ERRORS ===
734
783
  rescue RubyLLM::BadRequestError => e
735
- # Try to recover from 400 Bad Request by pruning orphan tool calls
736
- # This can happen when tool execution is interrupted mid-stream
784
+ # Special case: Try orphan tool call recovery ONCE
785
+ # This handles interrupted tool executions (tool_use without tool_result)
737
786
  unless pruning_attempted
738
787
  pruned = recover_from_orphan_tool_calls(e)
739
788
  if pruned > 0
740
789
  pruning_attempted = true
741
- # Don't count this as a regular retry, try again immediately
742
- attempts -= 1
790
+ attempts -= 1 # Don't count as retry
743
791
  next
744
792
  end
745
793
  end
746
794
 
747
- # Fall through to standard retry logic
795
+ # No recovery possible - fail immediately with error message
796
+ emit_non_retryable_error(e, "BadRequest")
797
+ return build_error_message(e)
798
+ rescue RubyLLM::UnauthorizedError => e
799
+ # 401: Authentication failed - won't fix by retrying
800
+ emit_non_retryable_error(e, "Unauthorized")
801
+ return build_error_message(e)
802
+ rescue RubyLLM::PaymentRequiredError => e
803
+ # 402: Billing issue - won't fix by retrying
804
+ emit_non_retryable_error(e, "PaymentRequired")
805
+ return build_error_message(e)
806
+ rescue RubyLLM::ForbiddenError => e
807
+ # 403: Permission denied - won't fix by retrying
808
+ emit_non_retryable_error(e, "Forbidden")
809
+ return build_error_message(e)
810
+
811
+ # === CATEGORY B: RETRYABLE SERVER ERRORS ===
812
+ # IMPORTANT: Must come BEFORE generic RubyLLM::Error to avoid being caught by it
813
+ rescue RubyLLM::RateLimitError,
814
+ RubyLLM::ServerError,
815
+ RubyLLM::ServiceUnavailableError,
816
+ RubyLLM::OverloadedError => e
817
+ # These errors indicate temporary provider issues
818
+ # RubyLLM already retried 3 times with exponential backoff (~0.7s)
819
+ # Retry a few more times with longer delays to give provider time
748
820
  handle_retry_or_raise(e, attempts, max_retries, delay)
821
+
822
+ # === CATEGORY A (CONTINUED): OTHER CLIENT ERRORS ===
823
+ # IMPORTANT: Must come AFTER specific error classes (including server errors)
824
+ rescue RubyLLM::Error => e
825
+ # Generic RubyLLM::Error - check for specific status codes
826
+ if e.response&.status == 422
827
+ # 422: Unprocessable Entity - semantic validation failure
828
+ emit_non_retryable_error(e, "UnprocessableEntity")
829
+ return build_error_message(e)
830
+ elsif e.response&.status && (400..499).include?(e.response.status)
831
+ # Other 4xx errors - conservative: don't retry unknown client errors
832
+ emit_non_retryable_error(e, "ClientError")
833
+ return build_error_message(e)
834
+ end
835
+
836
+ # Unknown error type without status code - conservative: don't retry
837
+ emit_non_retryable_error(e, "UnknownAPIError")
838
+ return build_error_message(e)
839
+
840
+ # === CATEGORY C: NETWORK/OTHER ERRORS ===
749
841
  rescue StandardError => e
842
+ # Network errors, timeouts, unknown errors - retry with delays
750
843
  handle_retry_or_raise(e, attempts, max_retries, delay)
751
844
  end
752
845
  end
@@ -792,6 +885,95 @@ module SwarmSDK
792
885
  sleep(delay)
793
886
  end
794
887
 
888
+ # Build an error message as an assistant response
889
+ #
890
+ # Non-retryable errors are returned as assistant messages instead of raising.
891
+ # This allows errors to flow naturally through delegation - parent agents
892
+ # can see child agent errors and respond appropriately.
893
+ #
894
+ # @param error [RubyLLM::Error, StandardError] The error that occurred
895
+ # @return [RubyLLM::Message] Assistant message containing formatted error
896
+ #
897
+ # @example Error message for delegation
898
+ # error = RubyLLM::UnauthorizedError.new(response, "Invalid API key")
899
+ # message = build_error_message(error)
900
+ # # => Message with role: :assistant, content: "I encountered an error: ..."
901
+ def build_error_message(error)
902
+ content = format_error_message(error)
903
+
904
+ RubyLLM::Message.new(
905
+ role: :assistant,
906
+ content: content,
907
+ model_id: model_id,
908
+ )
909
+ end
910
+
911
+ # Format error details into user-friendly message
912
+ #
913
+ # @param error [RubyLLM::Error, StandardError] The error to format
914
+ # @return [String] Formatted error message with type, status, and guidance
915
+ #
916
+ # @example Formatting 401 error
917
+ # format_error_message(unauthorized_error)
918
+ # # => "I encountered an error while processing your request:
919
+ # # **Error Type:** UnauthorizedError
920
+ # # **Status Code:** 401
921
+ # # **Message:** Invalid API key
922
+ # # Please check your API credentials."
923
+ def format_error_message(error)
924
+ status = error.respond_to?(:response) ? error.response&.status : nil
925
+
926
+ msg = "I encountered an error while processing your request:\n\n"
927
+ msg += "**Error Type:** #{error.class.name.split("::").last}\n"
928
+ msg += "**Status Code:** #{status}\n" if status
929
+ msg += "**Message:** #{error.message}\n\n"
930
+ msg += "This error indicates a problem that cannot be automatically recovered. "
931
+
932
+ # Add context-specific guidance based on error type
933
+ msg += case error
934
+ when RubyLLM::UnauthorizedError
935
+ "Please check your API credentials."
936
+ when RubyLLM::PaymentRequiredError
937
+ "Please check your account billing status."
938
+ when RubyLLM::ForbiddenError
939
+ "You may not have permission to access this resource."
940
+ when RubyLLM::BadRequestError
941
+ "The request format may be invalid."
942
+ else
943
+ "Please review the error and try again."
944
+ end
945
+
946
+ msg
947
+ end
948
+
949
+ # Emit llm_request_failed event for non-retryable errors
950
+ #
951
+ # This event provides visibility into errors that fail immediately
952
+ # without retry attempts. Useful for monitoring auth failures,
953
+ # billing issues, and other non-transient problems.
954
+ #
955
+ # @param error [RubyLLM::Error, StandardError] The error that occurred
956
+ # @param error_type [String] Friendly error type name for logging
957
+ # @return [void]
958
+ #
959
+ # @example Emitting unauthorized error event
960
+ # emit_non_retryable_error(error, "Unauthorized")
961
+ # # Emits: { type: "llm_request_failed", error_type: "Unauthorized", ... }
962
+ def emit_non_retryable_error(error, error_type)
963
+ LogStream.emit(
964
+ type: "llm_request_failed",
965
+ agent: @agent_name,
966
+ swarm_id: @agent_context&.swarm_id,
967
+ parent_swarm_id: @agent_context&.parent_swarm_id,
968
+ model: model_id,
969
+ error_type: error_type,
970
+ error_class: error.class.name,
971
+ error_message: error.message,
972
+ status_code: error.respond_to?(:response) ? error.response&.status : nil,
973
+ retryable: false,
974
+ )
975
+ end
976
+
795
977
  # Recover from 400 Bad Request by pruning orphan tool calls
796
978
  #
797
979
  # @param error [RubyLLM::BadRequestError] The error that occurred
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module SwarmSDK
4
- VERSION = "2.5.4"
4
+ VERSION = "2.5.5"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: swarm_sdk
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.5.4
4
+ version: 2.5.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Paulo Arruda