swarm_sdk 2.5.4 → 2.5.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/swarm_sdk/agent/chat.rb +192 -10
- data/lib/swarm_sdk/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 6a4f3f78d9207417839e82926b9960b56f6217cf030db8c741f8ef01111c7866
|
|
4
|
+
data.tar.gz: 49b9bb1af8c79090dcb061dc519e9c9c286829f695feb02385be1a44082d8f24
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 4760b9d3c722515a641c28da4a0575b8695074fcf8de8ccea0f71a14f23e1f53bfab6eec846132318fe159f253f7d70c166d36aaf06eb13c437043eb1d3bf97d
|
|
7
|
+
data.tar.gz: 60b24194187ab0a14e532e44b06c47e562fbf6edea5e35f369823bbf697558f8dbd4705ed3c91aea8d061573eb21d45651c6d665a46e9c3121109e7cd1728f30
|
data/lib/swarm_sdk/agent/chat.rb
CHANGED
|
@@ -712,17 +712,64 @@ module SwarmSDK
|
|
|
712
712
|
|
|
713
713
|
# --- LLM Call Retry Logic ---
|
|
714
714
|
|
|
715
|
-
# Call LLM provider with retry logic
|
|
715
|
+
# Call LLM provider with smart retry logic based on error type
|
|
716
716
|
#
|
|
717
|
-
#
|
|
717
|
+
# ## Error Categorization
|
|
718
|
+
#
|
|
719
|
+
# **Non-Retryable Client Errors (4xx)**: Return error message immediately
|
|
720
|
+
# - 400 Bad Request (after orphan tool call recovery attempt)
|
|
721
|
+
# - 401 Unauthorized (invalid API key)
|
|
722
|
+
# - 402 Payment Required (billing issue)
|
|
723
|
+
# - 403 Forbidden (permission denied)
|
|
724
|
+
# - 422 Unprocessable Entity (invalid parameters)
|
|
725
|
+
# - Other 4xx errors
|
|
726
|
+
#
|
|
727
|
+
# **Retryable Server Errors (5xx)**: Retry with delays
|
|
728
|
+
# - 429 Rate Limit (RubyLLM already retried 3x)
|
|
729
|
+
# - 500 Server Error (RubyLLM already retried 3x)
|
|
730
|
+
# - 502-503 Service Unavailable (RubyLLM already retried 3x)
|
|
731
|
+
# - 529 Overloaded (RubyLLM already retried 3x)
|
|
732
|
+
# Note: If we see these errors, RubyLLM has already tried 3 times
|
|
733
|
+
#
|
|
734
|
+
# **Network Errors**: Retry with delays
|
|
735
|
+
# - Timeouts, connection failures, etc.
|
|
736
|
+
#
|
|
737
|
+
# ## Special Handling
|
|
738
|
+
#
|
|
739
|
+
# **400 Bad Request with Orphan Tool Calls**:
|
|
718
740
|
# - Attempts to prune orphan tool calls (tool_use without tool_result)
|
|
719
741
|
# - If pruning succeeds, retries immediately without counting as retry
|
|
742
|
+
# - If pruning fails or not applicable, returns error message immediately
|
|
720
743
|
#
|
|
721
|
-
#
|
|
744
|
+
# ## Error Response Format
|
|
745
|
+
#
|
|
746
|
+
# Non-retryable errors return as assistant messages for natural delegation flow:
|
|
747
|
+
# ```ruby
|
|
748
|
+
# RubyLLM::Message.new(
|
|
749
|
+
# role: :assistant,
|
|
750
|
+
# content: "I encountered an error: [details]"
|
|
751
|
+
# )
|
|
752
|
+
# ```
|
|
753
|
+
#
|
|
754
|
+
# @param max_retries [Integer] Maximum retry attempts at SDK level
|
|
755
|
+
# Note: RubyLLM already retries 429/5xx errors 3 times before this
|
|
722
756
|
# @param delay [Integer] Delay between retries in seconds
|
|
723
757
|
# @yield Block that performs the LLM call
|
|
724
|
-
# @return [Object] Result from block
|
|
725
|
-
|
|
758
|
+
# @return [RubyLLM::Message, Object] Result from block or error message
|
|
759
|
+
#
|
|
760
|
+
# @example Handling 401 Unauthorized
|
|
761
|
+
# result = call_llm_with_retry do
|
|
762
|
+
# @llm_chat.complete
|
|
763
|
+
# end
|
|
764
|
+
# # Returns immediately: Message with "Unauthorized" error
|
|
765
|
+
#
|
|
766
|
+
# @example Handling 500 Server Error
|
|
767
|
+
# result = call_llm_with_retry(max_retries: 3, delay: 15) do
|
|
768
|
+
# @llm_chat.complete
|
|
769
|
+
# end
|
|
770
|
+
# # Retries up to 3 times with 15s delays
|
|
771
|
+
# # (RubyLLM already tried 3x, so 6 total attempts)
|
|
772
|
+
def call_llm_with_retry(max_retries: 3, delay: 15, &block)
|
|
726
773
|
attempts = 0
|
|
727
774
|
pruning_attempted = false
|
|
728
775
|
|
|
@@ -731,22 +778,68 @@ module SwarmSDK
|
|
|
731
778
|
|
|
732
779
|
begin
|
|
733
780
|
return yield
|
|
781
|
+
|
|
782
|
+
# === CATEGORY A: NON-RETRYABLE CLIENT ERRORS ===
|
|
734
783
|
rescue RubyLLM::BadRequestError => e
|
|
735
|
-
#
|
|
736
|
-
# This
|
|
784
|
+
# Special case: Try orphan tool call recovery ONCE
|
|
785
|
+
# This handles interrupted tool executions (tool_use without tool_result)
|
|
737
786
|
unless pruning_attempted
|
|
738
787
|
pruned = recover_from_orphan_tool_calls(e)
|
|
739
788
|
if pruned > 0
|
|
740
789
|
pruning_attempted = true
|
|
741
|
-
# Don't count
|
|
742
|
-
attempts -= 1
|
|
790
|
+
attempts -= 1 # Don't count as retry
|
|
743
791
|
next
|
|
744
792
|
end
|
|
745
793
|
end
|
|
746
794
|
|
|
747
|
-
#
|
|
795
|
+
# No recovery possible - fail immediately with error message
|
|
796
|
+
emit_non_retryable_error(e, "BadRequest")
|
|
797
|
+
return build_error_message(e)
|
|
798
|
+
rescue RubyLLM::UnauthorizedError => e
|
|
799
|
+
# 401: Authentication failed - won't fix by retrying
|
|
800
|
+
emit_non_retryable_error(e, "Unauthorized")
|
|
801
|
+
return build_error_message(e)
|
|
802
|
+
rescue RubyLLM::PaymentRequiredError => e
|
|
803
|
+
# 402: Billing issue - won't fix by retrying
|
|
804
|
+
emit_non_retryable_error(e, "PaymentRequired")
|
|
805
|
+
return build_error_message(e)
|
|
806
|
+
rescue RubyLLM::ForbiddenError => e
|
|
807
|
+
# 403: Permission denied - won't fix by retrying
|
|
808
|
+
emit_non_retryable_error(e, "Forbidden")
|
|
809
|
+
return build_error_message(e)
|
|
810
|
+
|
|
811
|
+
# === CATEGORY B: RETRYABLE SERVER ERRORS ===
|
|
812
|
+
# IMPORTANT: Must come BEFORE generic RubyLLM::Error to avoid being caught by it
|
|
813
|
+
rescue RubyLLM::RateLimitError,
|
|
814
|
+
RubyLLM::ServerError,
|
|
815
|
+
RubyLLM::ServiceUnavailableError,
|
|
816
|
+
RubyLLM::OverloadedError => e
|
|
817
|
+
# These errors indicate temporary provider issues
|
|
818
|
+
# RubyLLM already retried 3 times with exponential backoff (~0.7s)
|
|
819
|
+
# Retry a few more times with longer delays to give provider time
|
|
748
820
|
handle_retry_or_raise(e, attempts, max_retries, delay)
|
|
821
|
+
|
|
822
|
+
# === CATEGORY A (CONTINUED): OTHER CLIENT ERRORS ===
|
|
823
|
+
# IMPORTANT: Must come AFTER specific error classes (including server errors)
|
|
824
|
+
rescue RubyLLM::Error => e
|
|
825
|
+
# Generic RubyLLM::Error - check for specific status codes
|
|
826
|
+
if e.response&.status == 422
|
|
827
|
+
# 422: Unprocessable Entity - semantic validation failure
|
|
828
|
+
emit_non_retryable_error(e, "UnprocessableEntity")
|
|
829
|
+
return build_error_message(e)
|
|
830
|
+
elsif e.response&.status && (400..499).include?(e.response.status)
|
|
831
|
+
# Other 4xx errors - conservative: don't retry unknown client errors
|
|
832
|
+
emit_non_retryable_error(e, "ClientError")
|
|
833
|
+
return build_error_message(e)
|
|
834
|
+
end
|
|
835
|
+
|
|
836
|
+
# Unknown error type without status code - conservative: don't retry
|
|
837
|
+
emit_non_retryable_error(e, "UnknownAPIError")
|
|
838
|
+
return build_error_message(e)
|
|
839
|
+
|
|
840
|
+
# === CATEGORY C: NETWORK/OTHER ERRORS ===
|
|
749
841
|
rescue StandardError => e
|
|
842
|
+
# Network errors, timeouts, unknown errors - retry with delays
|
|
750
843
|
handle_retry_or_raise(e, attempts, max_retries, delay)
|
|
751
844
|
end
|
|
752
845
|
end
|
|
@@ -792,6 +885,95 @@ module SwarmSDK
|
|
|
792
885
|
sleep(delay)
|
|
793
886
|
end
|
|
794
887
|
|
|
888
|
+
# Build an error message as an assistant response
|
|
889
|
+
#
|
|
890
|
+
# Non-retryable errors are returned as assistant messages instead of raising.
|
|
891
|
+
# This allows errors to flow naturally through delegation - parent agents
|
|
892
|
+
# can see child agent errors and respond appropriately.
|
|
893
|
+
#
|
|
894
|
+
# @param error [RubyLLM::Error, StandardError] The error that occurred
|
|
895
|
+
# @return [RubyLLM::Message] Assistant message containing formatted error
|
|
896
|
+
#
|
|
897
|
+
# @example Error message for delegation
|
|
898
|
+
# error = RubyLLM::UnauthorizedError.new(response, "Invalid API key")
|
|
899
|
+
# message = build_error_message(error)
|
|
900
|
+
# # => Message with role: :assistant, content: "I encountered an error: ..."
|
|
901
|
+
def build_error_message(error)
|
|
902
|
+
content = format_error_message(error)
|
|
903
|
+
|
|
904
|
+
RubyLLM::Message.new(
|
|
905
|
+
role: :assistant,
|
|
906
|
+
content: content,
|
|
907
|
+
model_id: model_id,
|
|
908
|
+
)
|
|
909
|
+
end
|
|
910
|
+
|
|
911
|
+
# Format error details into user-friendly message
|
|
912
|
+
#
|
|
913
|
+
# @param error [RubyLLM::Error, StandardError] The error to format
|
|
914
|
+
# @return [String] Formatted error message with type, status, and guidance
|
|
915
|
+
#
|
|
916
|
+
# @example Formatting 401 error
|
|
917
|
+
# format_error_message(unauthorized_error)
|
|
918
|
+
# # => "I encountered an error while processing your request:
|
|
919
|
+
# # **Error Type:** UnauthorizedError
|
|
920
|
+
# # **Status Code:** 401
|
|
921
|
+
# # **Message:** Invalid API key
|
|
922
|
+
# # Please check your API credentials."
|
|
923
|
+
def format_error_message(error)
|
|
924
|
+
status = error.respond_to?(:response) ? error.response&.status : nil
|
|
925
|
+
|
|
926
|
+
msg = "I encountered an error while processing your request:\n\n"
|
|
927
|
+
msg += "**Error Type:** #{error.class.name.split("::").last}\n"
|
|
928
|
+
msg += "**Status Code:** #{status}\n" if status
|
|
929
|
+
msg += "**Message:** #{error.message}\n\n"
|
|
930
|
+
msg += "This error indicates a problem that cannot be automatically recovered. "
|
|
931
|
+
|
|
932
|
+
# Add context-specific guidance based on error type
|
|
933
|
+
msg += case error
|
|
934
|
+
when RubyLLM::UnauthorizedError
|
|
935
|
+
"Please check your API credentials."
|
|
936
|
+
when RubyLLM::PaymentRequiredError
|
|
937
|
+
"Please check your account billing status."
|
|
938
|
+
when RubyLLM::ForbiddenError
|
|
939
|
+
"You may not have permission to access this resource."
|
|
940
|
+
when RubyLLM::BadRequestError
|
|
941
|
+
"The request format may be invalid."
|
|
942
|
+
else
|
|
943
|
+
"Please review the error and try again."
|
|
944
|
+
end
|
|
945
|
+
|
|
946
|
+
msg
|
|
947
|
+
end
|
|
948
|
+
|
|
949
|
+
# Emit llm_request_failed event for non-retryable errors
|
|
950
|
+
#
|
|
951
|
+
# This event provides visibility into errors that fail immediately
|
|
952
|
+
# without retry attempts. Useful for monitoring auth failures,
|
|
953
|
+
# billing issues, and other non-transient problems.
|
|
954
|
+
#
|
|
955
|
+
# @param error [RubyLLM::Error, StandardError] The error that occurred
|
|
956
|
+
# @param error_type [String] Friendly error type name for logging
|
|
957
|
+
# @return [void]
|
|
958
|
+
#
|
|
959
|
+
# @example Emitting unauthorized error event
|
|
960
|
+
# emit_non_retryable_error(error, "Unauthorized")
|
|
961
|
+
# # Emits: { type: "llm_request_failed", error_type: "Unauthorized", ... }
|
|
962
|
+
def emit_non_retryable_error(error, error_type)
|
|
963
|
+
LogStream.emit(
|
|
964
|
+
type: "llm_request_failed",
|
|
965
|
+
agent: @agent_name,
|
|
966
|
+
swarm_id: @agent_context&.swarm_id,
|
|
967
|
+
parent_swarm_id: @agent_context&.parent_swarm_id,
|
|
968
|
+
model: model_id,
|
|
969
|
+
error_type: error_type,
|
|
970
|
+
error_class: error.class.name,
|
|
971
|
+
error_message: error.message,
|
|
972
|
+
status_code: error.respond_to?(:response) ? error.response&.status : nil,
|
|
973
|
+
retryable: false,
|
|
974
|
+
)
|
|
975
|
+
end
|
|
976
|
+
|
|
795
977
|
# Recover from 400 Bad Request by pruning orphan tool calls
|
|
796
978
|
#
|
|
797
979
|
# @param error [RubyLLM::BadRequestError] The error that occurred
|
data/lib/swarm_sdk/version.rb
CHANGED