ruby_llm-agents 1.3.1 → 1.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/ruby_llm/agents/core/configuration.rb +1 -1
- data/lib/ruby_llm/agents/core/version.rb +1 -1
- data/lib/ruby_llm/agents/dsl/reliability.rb +27 -2
- data/lib/ruby_llm/agents/infrastructure/reliability.rb +30 -2
- data/lib/ruby_llm/agents/pipeline/middleware/instrumentation.rb +49 -0
- data/lib/ruby_llm/agents/pipeline/middleware/reliability.rb +45 -5
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: f5ca913fd17a05f6a541111f194a07259002280967bedb74d74173b53298eaed
|
|
4
|
+
data.tar.gz: 537f02ab5b3f40b4edcb225118ebd320f0a4a76976b9e077acfecd4bd47c5e5a
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 6016113d5051912f51f1dc1d957c9a5eb45d62028d5920284aae300189ea0e6a5347f231a252e7f76d002156421ab442f2b55299f4e7b68a73e12ef14eacb64d
|
|
7
|
+
data.tar.gz: 063ce841e9405b5afd7b9468c1db4646a7621702e5a4121c29d775339cfab550f0e85783fbcc63d12b5553efac90e7a3a9aa93a8f704d17a8a023d2a13f35fe7
|
|
@@ -621,7 +621,7 @@ module RubyLLM
|
|
|
621
621
|
@default_fallback_models = []
|
|
622
622
|
@default_total_timeout = nil
|
|
623
623
|
@default_retryable_patterns = {
|
|
624
|
-
rate_limiting: ["rate limit", "rate_limit", "too many requests", "429"],
|
|
624
|
+
rate_limiting: ["rate limit", "rate_limit", "too many requests", "429", "quota"],
|
|
625
625
|
server_errors: ["500", "502", "503", "504", "service unavailable",
|
|
626
626
|
"internal server error", "bad gateway", "gateway timeout"],
|
|
627
627
|
capacity: ["overloaded", "capacity"]
|
|
@@ -54,6 +54,7 @@ module RubyLLM
|
|
|
54
54
|
@total_timeout = builder.total_timeout_value if builder.total_timeout_value
|
|
55
55
|
@circuit_breaker_config = builder.circuit_breaker_config if builder.circuit_breaker_config
|
|
56
56
|
@retryable_patterns = builder.retryable_patterns_list if builder.retryable_patterns_list
|
|
57
|
+
@non_fallback_errors = builder.non_fallback_errors_list if builder.non_fallback_errors_list
|
|
57
58
|
end
|
|
58
59
|
|
|
59
60
|
# Returns the complete reliability configuration hash
|
|
@@ -70,7 +71,8 @@ module RubyLLM
|
|
|
70
71
|
fallback_providers: fallback_providers,
|
|
71
72
|
total_timeout: total_timeout,
|
|
72
73
|
circuit_breaker: circuit_breaker_config,
|
|
73
|
-
retryable_patterns: retryable_patterns
|
|
74
|
+
retryable_patterns: retryable_patterns,
|
|
75
|
+
non_fallback_errors: non_fallback_errors
|
|
74
76
|
}.compact
|
|
75
77
|
end
|
|
76
78
|
|
|
@@ -196,6 +198,17 @@ module RubyLLM
|
|
|
196
198
|
@retryable_patterns || inherited_retryable_patterns
|
|
197
199
|
end
|
|
198
200
|
|
|
201
|
+
# Sets or returns additional error classes that should never trigger fallback
|
|
202
|
+
#
|
|
203
|
+
# @param error_classes [Array<Class>] Error classes that should fail immediately
|
|
204
|
+
# @return [Array<Class>, nil] The current non-fallback error classes
|
|
205
|
+
# @example
|
|
206
|
+
# non_fallback_errors MyValidationError, MySchemaError
|
|
207
|
+
def non_fallback_errors(*error_classes)
|
|
208
|
+
@non_fallback_errors = error_classes.flatten if error_classes.any?
|
|
209
|
+
@non_fallback_errors || inherited_non_fallback_errors
|
|
210
|
+
end
|
|
211
|
+
|
|
199
212
|
# @!endgroup
|
|
200
213
|
|
|
201
214
|
private
|
|
@@ -236,6 +249,12 @@ module RubyLLM
|
|
|
236
249
|
superclass.retryable_patterns
|
|
237
250
|
end
|
|
238
251
|
|
|
252
|
+
def inherited_non_fallback_errors
|
|
253
|
+
return nil unless superclass.respond_to?(:non_fallback_errors)
|
|
254
|
+
|
|
255
|
+
superclass.non_fallback_errors
|
|
256
|
+
end
|
|
257
|
+
|
|
239
258
|
def default_retries_config
|
|
240
259
|
{
|
|
241
260
|
max: 0,
|
|
@@ -249,7 +268,8 @@ module RubyLLM
|
|
|
249
268
|
# Inner builder class for block-style configuration
|
|
250
269
|
class ReliabilityBuilder
|
|
251
270
|
attr_reader :retries_config, :fallback_models_list, :total_timeout_value,
|
|
252
|
-
:circuit_breaker_config, :retryable_patterns_list, :fallback_providers_list
|
|
271
|
+
:circuit_breaker_config, :retryable_patterns_list, :fallback_providers_list,
|
|
272
|
+
:non_fallback_errors_list
|
|
253
273
|
|
|
254
274
|
def initialize
|
|
255
275
|
@retries_config = nil
|
|
@@ -258,6 +278,7 @@ module RubyLLM
|
|
|
258
278
|
@circuit_breaker_config = nil
|
|
259
279
|
@retryable_patterns_list = nil
|
|
260
280
|
@fallback_providers_list = []
|
|
281
|
+
@non_fallback_errors_list = nil
|
|
261
282
|
end
|
|
262
283
|
|
|
263
284
|
def retries(max: 0, backoff: :exponential, base: 0.4, max_delay: 3.0, on: [])
|
|
@@ -300,6 +321,10 @@ module RubyLLM
|
|
|
300
321
|
def retryable_patterns(*patterns)
|
|
301
322
|
@retryable_patterns_list = patterns.flatten
|
|
302
323
|
end
|
|
324
|
+
|
|
325
|
+
def non_fallback_errors(*error_classes)
|
|
326
|
+
@non_fallback_errors_list = error_classes.flatten
|
|
327
|
+
end
|
|
303
328
|
end
|
|
304
329
|
end
|
|
305
330
|
end
|
|
@@ -88,18 +88,46 @@ module RubyLLM
|
|
|
88
88
|
#
|
|
89
89
|
# @api public
|
|
90
90
|
class AllModelsExhaustedError < Error
|
|
91
|
-
attr_reader :models_tried, :last_error
|
|
91
|
+
attr_reader :models_tried, :last_error, :attempts
|
|
92
92
|
|
|
93
93
|
# @param models_tried [Array<String>] List of models that were attempted
|
|
94
94
|
# @param last_error [Exception] The last error that occurred
|
|
95
|
-
|
|
95
|
+
# @param attempts [Array<Hash>, nil] Per-model attempt data from AttemptTracker
|
|
96
|
+
def initialize(models_tried, last_error, attempts: nil)
|
|
96
97
|
@models_tried = models_tried
|
|
97
98
|
@last_error = last_error
|
|
99
|
+
@attempts = attempts
|
|
98
100
|
super("All models exhausted: #{models_tried.join(', ')}. Last error: #{last_error.message}")
|
|
99
101
|
end
|
|
100
102
|
end
|
|
101
103
|
|
|
102
104
|
class << self
|
|
105
|
+
# Default list of error classes that should never trigger fallback
|
|
106
|
+
#
|
|
107
|
+
# These errors indicate programming bugs that won't be fixed by trying
|
|
108
|
+
# a different model. They should fail immediately.
|
|
109
|
+
#
|
|
110
|
+
# @return [Array<Class>] Error classes that are non-fallback by default
|
|
111
|
+
def default_non_fallback_errors
|
|
112
|
+
@default_non_fallback_errors ||= [
|
|
113
|
+
ArgumentError,
|
|
114
|
+
TypeError,
|
|
115
|
+
NameError,
|
|
116
|
+
NoMethodError,
|
|
117
|
+
NotImplementedError
|
|
118
|
+
]
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
# Determines if an error is a programming error that should not trigger fallback
|
|
122
|
+
#
|
|
123
|
+
# @param error [Exception] The error to check
|
|
124
|
+
# @param custom_errors [Array<Class>] Additional error classes to consider non-fallback
|
|
125
|
+
# @return [Boolean] true if the error should fail immediately
|
|
126
|
+
def non_fallback_error?(error, custom_errors: [])
|
|
127
|
+
all = default_non_fallback_errors + Array(custom_errors)
|
|
128
|
+
all.any? { |klass| error.is_a?(klass) }
|
|
129
|
+
end
|
|
130
|
+
|
|
103
131
|
# Default list of error classes that are considered retryable
|
|
104
132
|
#
|
|
105
133
|
# These errors typically indicate transient issues that may resolve on retry.
|
|
@@ -232,6 +232,17 @@ module RubyLLM
|
|
|
232
232
|
data[:tool_calls_count] = context[:tool_calls].size
|
|
233
233
|
end
|
|
234
234
|
|
|
235
|
+
# Add reliability attempts if present
|
|
236
|
+
if context[:reliability_attempts].present?
|
|
237
|
+
data[:attempts] = context[:reliability_attempts]
|
|
238
|
+
data[:attempts_count] = context[:reliability_attempts].size
|
|
239
|
+
end
|
|
240
|
+
|
|
241
|
+
# Add response if persist_responses is enabled
|
|
242
|
+
if global_config.persist_responses && context.output.respond_to?(:content)
|
|
243
|
+
data[:response] = serialize_response(context)
|
|
244
|
+
end
|
|
245
|
+
|
|
235
246
|
data
|
|
236
247
|
end
|
|
237
248
|
|
|
@@ -304,6 +315,17 @@ module RubyLLM
|
|
|
304
315
|
data[:tool_calls_count] = context[:tool_calls].size
|
|
305
316
|
end
|
|
306
317
|
|
|
318
|
+
# Add reliability attempts if present
|
|
319
|
+
if context[:reliability_attempts].present?
|
|
320
|
+
data[:attempts] = context[:reliability_attempts]
|
|
321
|
+
data[:attempts_count] = context[:reliability_attempts].size
|
|
322
|
+
end
|
|
323
|
+
|
|
324
|
+
# Add response if persist_responses is enabled
|
|
325
|
+
if global_config.persist_responses && context.output.respond_to?(:content)
|
|
326
|
+
data[:response] = serialize_response(context)
|
|
327
|
+
end
|
|
328
|
+
|
|
307
329
|
data
|
|
308
330
|
end
|
|
309
331
|
|
|
@@ -359,6 +381,33 @@ module RubyLLM
|
|
|
359
381
|
message.to_s[0, 1000]
|
|
360
382
|
end
|
|
361
383
|
|
|
384
|
+
# Serializes the response content for storage
|
|
385
|
+
#
|
|
386
|
+
# @param context [Context] The execution context
|
|
387
|
+
# @return [Hash, nil] Serialized response data
|
|
388
|
+
def serialize_response(context)
|
|
389
|
+
return nil unless context.output
|
|
390
|
+
|
|
391
|
+
content = context.output.content
|
|
392
|
+
return nil if content.nil?
|
|
393
|
+
|
|
394
|
+
# Build response hash similar to core instrumentation
|
|
395
|
+
response_data = { content: content }
|
|
396
|
+
|
|
397
|
+
# Add model_id if available
|
|
398
|
+
response_data[:model_id] = context.model_used if context.model_used
|
|
399
|
+
|
|
400
|
+
# Add token info if available
|
|
401
|
+
response_data[:input_tokens] = context.input_tokens if context.input_tokens
|
|
402
|
+
response_data[:output_tokens] = context.output_tokens if context.output_tokens
|
|
403
|
+
|
|
404
|
+
# Apply redaction for sensitive data
|
|
405
|
+
Redactor.redact(response_data)
|
|
406
|
+
rescue StandardError => e
|
|
407
|
+
error("Failed to serialize response: #{e.message}")
|
|
408
|
+
nil
|
|
409
|
+
end
|
|
410
|
+
|
|
362
411
|
# Queues async logging via background job
|
|
363
412
|
#
|
|
364
413
|
# @param data [Hash] Execution data
|
|
@@ -97,12 +97,14 @@ module RubyLLM
|
|
|
97
97
|
started_at = Time.current
|
|
98
98
|
last_error = nil
|
|
99
99
|
context.attempts_made = 0
|
|
100
|
+
tracker = Agents::AttemptTracker.new
|
|
100
101
|
|
|
101
102
|
models_to_try.each do |current_model|
|
|
102
103
|
# Check circuit breaker for this model
|
|
103
104
|
breaker = get_circuit_breaker(current_model, context)
|
|
104
105
|
if breaker&.open?
|
|
105
106
|
debug("Circuit breaker open for #{current_model}, skipping")
|
|
107
|
+
tracker.record_short_circuit(current_model)
|
|
106
108
|
next
|
|
107
109
|
end
|
|
108
110
|
|
|
@@ -112,17 +114,27 @@ module RubyLLM
|
|
|
112
114
|
config: config,
|
|
113
115
|
total_deadline: total_deadline,
|
|
114
116
|
started_at: started_at,
|
|
115
|
-
breaker: breaker
|
|
117
|
+
breaker: breaker,
|
|
118
|
+
tracker: tracker
|
|
116
119
|
)
|
|
117
120
|
|
|
118
|
-
|
|
121
|
+
if result
|
|
122
|
+
context[:reliability_attempts] = tracker.to_json_array
|
|
123
|
+
return result
|
|
124
|
+
end
|
|
119
125
|
|
|
120
126
|
# Capture the last error from context for the final error
|
|
121
127
|
last_error = context.error
|
|
122
128
|
end
|
|
123
129
|
|
|
130
|
+
# Store attempts even on total failure
|
|
131
|
+
context[:reliability_attempts] = tracker.to_json_array
|
|
132
|
+
|
|
124
133
|
# All models exhausted
|
|
125
|
-
raise Agents::Reliability::AllModelsExhaustedError.new(
|
|
134
|
+
raise Agents::Reliability::AllModelsExhaustedError.new(
|
|
135
|
+
models_to_try, last_error,
|
|
136
|
+
attempts: tracker.to_json_array
|
|
137
|
+
)
|
|
126
138
|
end
|
|
127
139
|
|
|
128
140
|
# Tries a model with retry logic
|
|
@@ -134,7 +146,7 @@ module RubyLLM
|
|
|
134
146
|
# @param started_at [Time] When execution started
|
|
135
147
|
# @param breaker [CircuitBreaker, nil] The circuit breaker for this model
|
|
136
148
|
# @return [Context, nil] The context if successful, nil to try next model
|
|
137
|
-
def try_model_with_retries(context:, model:, config:, total_deadline:, started_at:, breaker:)
|
|
149
|
+
def try_model_with_retries(context:, model:, config:, total_deadline:, started_at:, breaker:, tracker:)
|
|
138
150
|
retries_config = config[:retries] || {}
|
|
139
151
|
max_retries = retries_config[:max] || 0
|
|
140
152
|
attempt_index = 0
|
|
@@ -146,6 +158,8 @@ module RubyLLM
|
|
|
146
158
|
context.attempt = attempt_index + 1
|
|
147
159
|
context.attempts_made += 1
|
|
148
160
|
|
|
161
|
+
attempt = tracker.start_attempt(model)
|
|
162
|
+
|
|
149
163
|
begin
|
|
150
164
|
# Override the model for this attempt
|
|
151
165
|
original_model = context.model
|
|
@@ -153,14 +167,19 @@ module RubyLLM
|
|
|
153
167
|
|
|
154
168
|
@app.call(context)
|
|
155
169
|
|
|
156
|
-
# Success - record in circuit breaker
|
|
170
|
+
# Success - record in circuit breaker and tracker
|
|
157
171
|
breaker&.record_success!
|
|
172
|
+
tracker.complete_attempt(attempt, success: true, response: context.output)
|
|
158
173
|
|
|
159
174
|
return context
|
|
160
175
|
|
|
161
176
|
rescue StandardError => e
|
|
162
177
|
context.error = e
|
|
163
178
|
breaker&.record_failure!
|
|
179
|
+
tracker.complete_attempt(attempt, success: false, error: e)
|
|
180
|
+
|
|
181
|
+
# Programming errors fail immediately — no retry, no fallback
|
|
182
|
+
raise if non_fallback_error?(e, config)
|
|
164
183
|
|
|
165
184
|
# Check if we should retry
|
|
166
185
|
if should_retry?(e, config, attempt_index, max_retries, total_deadline)
|
|
@@ -202,10 +221,31 @@ module RubyLLM
|
|
|
202
221
|
def should_retry?(error, config, attempt_index, max_retries, total_deadline)
|
|
203
222
|
return false if attempt_index >= max_retries
|
|
204
223
|
return false if total_deadline && Time.current > total_deadline
|
|
224
|
+
# Don't retry if fallback models are available — move to next model instead
|
|
225
|
+
return false if has_fallback_models?(config)
|
|
205
226
|
|
|
206
227
|
retryable_error?(error, config)
|
|
207
228
|
end
|
|
208
229
|
|
|
230
|
+
# Checks if an error is a programming error that should not trigger fallback
|
|
231
|
+
#
|
|
232
|
+
# @param error [Exception] The error to check
|
|
233
|
+
# @param config [Hash] The reliability configuration
|
|
234
|
+
# @return [Boolean] Whether the error should fail immediately
|
|
235
|
+
def non_fallback_error?(error, config)
|
|
236
|
+
custom_errors = config[:non_fallback_errors] || []
|
|
237
|
+
Agents::Reliability.non_fallback_error?(error, custom_errors: custom_errors)
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
# Returns whether fallback models are configured
|
|
241
|
+
#
|
|
242
|
+
# @param config [Hash] The reliability configuration
|
|
243
|
+
# @return [Boolean]
|
|
244
|
+
def has_fallback_models?(config)
|
|
245
|
+
fallbacks = config[:fallback_models]
|
|
246
|
+
fallbacks.is_a?(Array) && fallbacks.any?
|
|
247
|
+
end
|
|
248
|
+
|
|
209
249
|
# Checks if an error is retryable
|
|
210
250
|
#
|
|
211
251
|
# @param error [Exception] The error to check
|