ruby_llm-agents 0.2.4 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +273 -0
  3. data/app/channels/ruby_llm/agents/executions_channel.rb +24 -1
  4. data/app/controllers/concerns/ruby_llm/agents/filterable.rb +81 -0
  5. data/app/controllers/concerns/ruby_llm/agents/paginatable.rb +51 -0
  6. data/app/controllers/ruby_llm/agents/agents_controller.rb +228 -59
  7. data/app/controllers/ruby_llm/agents/dashboard_controller.rb +167 -12
  8. data/app/controllers/ruby_llm/agents/executions_controller.rb +189 -31
  9. data/app/controllers/ruby_llm/agents/settings_controller.rb +20 -0
  10. data/app/helpers/ruby_llm/agents/application_helper.rb +307 -7
  11. data/app/models/ruby_llm/agents/execution/analytics.rb +224 -20
  12. data/app/models/ruby_llm/agents/execution/metrics.rb +41 -25
  13. data/app/models/ruby_llm/agents/execution/scopes.rb +234 -14
  14. data/app/models/ruby_llm/agents/execution.rb +259 -16
  15. data/app/services/ruby_llm/agents/agent_registry.rb +49 -12
  16. data/app/views/layouts/rubyllm/agents/application.html.erb +351 -85
  17. data/app/views/rubyllm/agents/agents/_version_comparison.html.erb +186 -0
  18. data/app/views/rubyllm/agents/agents/show.html.erb +233 -10
  19. data/app/views/rubyllm/agents/dashboard/_action_center.html.erb +62 -0
  20. data/app/views/rubyllm/agents/dashboard/_alerts_feed.html.erb +62 -0
  21. data/app/views/rubyllm/agents/dashboard/_breaker_strip.html.erb +47 -0
  22. data/app/views/rubyllm/agents/dashboard/_budgets_bar.html.erb +165 -0
  23. data/app/views/rubyllm/agents/dashboard/_now_strip.html.erb +10 -0
  24. data/app/views/rubyllm/agents/dashboard/_now_strip_values.html.erb +71 -0
  25. data/app/views/rubyllm/agents/dashboard/index.html.erb +215 -109
  26. data/app/views/rubyllm/agents/executions/_filters.html.erb +152 -155
  27. data/app/views/rubyllm/agents/executions/_list.html.erb +103 -12
  28. data/app/views/rubyllm/agents/executions/dry_run.html.erb +149 -0
  29. data/app/views/rubyllm/agents/executions/index.html.erb +17 -72
  30. data/app/views/rubyllm/agents/executions/index.turbo_stream.erb +16 -2
  31. data/app/views/rubyllm/agents/executions/show.html.erb +693 -14
  32. data/app/views/rubyllm/agents/settings/show.html.erb +369 -0
  33. data/app/views/rubyllm/agents/shared/_filter_dropdown.html.erb +121 -0
  34. data/app/views/rubyllm/agents/shared/_select_dropdown.html.erb +85 -0
  35. data/config/routes.rb +7 -0
  36. data/lib/generators/ruby_llm_agents/templates/add_attempts_migration.rb.tt +27 -0
  37. data/lib/generators/ruby_llm_agents/templates/add_caching_migration.rb.tt +23 -0
  38. data/lib/generators/ruby_llm_agents/templates/add_finish_reason_migration.rb.tt +19 -0
  39. data/lib/generators/ruby_llm_agents/templates/add_routing_migration.rb.tt +19 -0
  40. data/lib/generators/ruby_llm_agents/templates/add_streaming_migration.rb.tt +8 -0
  41. data/lib/generators/ruby_llm_agents/templates/add_tracing_migration.rb.tt +34 -0
  42. data/lib/generators/ruby_llm_agents/templates/agent.rb.tt +66 -4
  43. data/lib/generators/ruby_llm_agents/templates/application_agent.rb.tt +53 -6
  44. data/lib/generators/ruby_llm_agents/templates/initializer.rb.tt +139 -8
  45. data/lib/generators/ruby_llm_agents/templates/migration.rb.tt +38 -1
  46. data/lib/generators/ruby_llm_agents/upgrade_generator.rb +78 -0
  47. data/lib/ruby_llm/agents/alert_manager.rb +207 -0
  48. data/lib/ruby_llm/agents/attempt_tracker.rb +295 -0
  49. data/lib/ruby_llm/agents/base.rb +580 -112
  50. data/lib/ruby_llm/agents/budget_tracker.rb +360 -0
  51. data/lib/ruby_llm/agents/circuit_breaker.rb +197 -0
  52. data/lib/ruby_llm/agents/configuration.rb +279 -1
  53. data/lib/ruby_llm/agents/engine.rb +58 -6
  54. data/lib/ruby_llm/agents/execution_logger_job.rb +17 -6
  55. data/lib/ruby_llm/agents/inflections.rb +13 -2
  56. data/lib/ruby_llm/agents/instrumentation.rb +538 -87
  57. data/lib/ruby_llm/agents/redactor.rb +130 -0
  58. data/lib/ruby_llm/agents/reliability.rb +185 -0
  59. data/lib/ruby_llm/agents/version.rb +3 -1
  60. data/lib/ruby_llm/agents.rb +52 -0
  61. metadata +41 -2
  62. data/app/controllers/ruby_llm/agents/application_controller.rb +0 -37
@@ -4,36 +4,148 @@ module RubyLLM
4
4
  module Agents
5
5
  # Instrumentation concern for tracking agent executions
6
6
  #
7
- # Provides execution timing, token tracking, cost calculation, and error handling.
8
- # Logs all executions to the database via a background job.
7
+ # Provides comprehensive execution tracking including:
8
+ # - Timing metrics (started_at, completed_at, duration_ms)
9
+ # - Token usage tracking (input, output, cached)
10
+ # - Cost calculation via RubyLLM pricing data
11
+ # - Error and timeout handling with status tracking
12
+ # - Safe parameter sanitization for logging
9
13
  #
10
- # == Usage
14
+ # Included automatically in {RubyLLM::Agents::Base}.
11
15
  #
12
- # Included automatically in RubyLLM::Agents::Base
13
- #
14
- # == Customization
15
- #
16
- # Override `execution_metadata` in your agent to add custom data:
17
- #
18
- # def execution_metadata
19
- # { query: query, user_id: Current.user&.id }
16
+ # @example Adding custom metadata to executions
17
+ # class MyAgent < ApplicationAgent
18
+ # def execution_metadata
19
+ # { user_id: Current.user&.id, request_id: request.uuid }
20
+ # end
20
21
  # end
21
22
  #
23
+ # @see RubyLLM::Agents::Execution
24
+ # @see RubyLLM::Agents::ExecutionLoggerJob
25
+ # @api private
22
26
  module Instrumentation
23
27
  extend ActiveSupport::Concern
24
28
 
25
29
  included do
30
+ # @!attribute [rw] execution_id
31
+ # The ID of the current execution record
32
+ # @return [Integer, nil]
26
33
  attr_accessor :execution_id
27
34
  end
28
35
 
29
- # Wrap agent execution with metrics tracking
30
- # Creates execution record at start with 'running' status, updates on completion
31
- # Uses ensure block to guarantee status is updated even if complete_execution fails
36
+ # Wraps agent execution with comprehensive metrics tracking (for reliability-enabled agents)
37
+ #
38
+ # Creates a single execution record and tracks multiple attempts within it.
39
+ # Used by execute_with_reliability for retry/fallback scenarios.
40
+ #
41
+ # Uses catch/throw pattern because the yielded block uses `throw :execution_success`
42
+ # to exit early on success. Regular `return` from within a block would bypass
43
+ # our completion code, so we use throw/catch to properly intercept success cases.
44
+ #
45
+ # @param models_to_try [Array<String>] List of models in the fallback chain
46
+ # @yield [AttemptTracker] Block receives attempt tracker for recording attempts
47
+ # @return [Object] The result from the yielded block
48
+ # @raise [Timeout::Error] Re-raised after logging timeout status
49
+ # @raise [StandardError] Re-raised after logging error status
50
+ def instrument_execution_with_attempts(models_to_try:, &block)
51
+ started_at = Time.current
52
+ @last_response = nil
53
+ @status_update_completed = false
54
+ raised_exception = nil
55
+ completion_error = nil
56
+
57
+ attempt_tracker = AttemptTracker.new
58
+
59
+ # Create execution record with running status and fallback chain
60
+ execution = create_running_execution(started_at, fallback_chain: models_to_try)
61
+ self.execution_id = execution&.id
62
+
63
+ # Use catch to intercept successful early returns from the block
64
+ # The block uses `throw :execution_success, result` instead of `return`
65
+ result = catch(:execution_success) do
66
+ begin
67
+ yield(attempt_tracker)
68
+ # If we reach here normally (no throw), the block completed without success
69
+ # This happens when AllModelsExhaustedError is raised
70
+ nil
71
+ rescue Timeout::Error, Reliability::TotalTimeoutError => e
72
+ raised_exception = e
73
+ begin
74
+ complete_execution_with_attempts(
75
+ execution,
76
+ attempt_tracker: attempt_tracker,
77
+ completed_at: Time.current,
78
+ status: "timeout",
79
+ error: e
80
+ )
81
+ @status_update_completed = true
82
+ rescue StandardError => completion_err
83
+ completion_error = completion_err
84
+ end
85
+ raise
86
+ rescue StandardError => e
87
+ raised_exception = e
88
+ begin
89
+ complete_execution_with_attempts(
90
+ execution,
91
+ attempt_tracker: attempt_tracker,
92
+ completed_at: Time.current,
93
+ status: "error",
94
+ error: e
95
+ )
96
+ @status_update_completed = true
97
+ rescue StandardError => completion_err
98
+ completion_error = completion_err
99
+ end
100
+ raise
101
+ ensure
102
+ # Only run emergency fallback if we haven't completed AND we're not in success path
103
+ # The success path completion happens AFTER the catch block
104
+ unless @status_update_completed || !$!
105
+ actual_error = completion_error || raised_exception || $!
106
+ mark_execution_failed!(execution, error: actual_error)
107
+ end
108
+ end
109
+ end
110
+
111
+ # If we caught a successful throw, complete the execution properly
112
+ # result will be non-nil if throw :execution_success was called
113
+ if result && !@status_update_completed
114
+ begin
115
+ complete_execution_with_attempts(
116
+ execution,
117
+ attempt_tracker: attempt_tracker,
118
+ completed_at: Time.current,
119
+ status: "success"
120
+ )
121
+ @status_update_completed = true
122
+ rescue StandardError => e
123
+ Rails.logger.error("[RubyLLM::Agents] Failed to complete successful execution: #{e.class}: #{e.message}")
124
+ mark_execution_failed!(execution, error: e)
125
+ end
126
+ end
127
+
128
+ result
129
+ end
130
+
131
+ # Wraps agent execution with comprehensive metrics tracking
132
+ #
133
+ # Execution lifecycle:
134
+ # 1. Creates execution record immediately with 'running' status
135
+ # 2. Yields to the block for actual agent execution
136
+ # 3. Updates record with final status and metrics
137
+ # 4. Uses ensure block to guarantee status update even on failures
138
+ #
139
+ # @yield The block containing the actual agent execution
140
+ # @return [Object] The result from the yielded block
141
+ # @raise [Timeout::Error] Re-raised after logging timeout status
142
+ # @raise [StandardError] Re-raised after logging error status
32
143
  def instrument_execution(&block)
33
144
  started_at = Time.current
34
145
  @last_response = nil
35
- @execution_status_updated = false
36
- original_error = nil
146
+ @status_update_completed = false
147
+ raised_exception = nil
148
+ completion_error = nil
37
149
 
38
150
  # Create execution record immediately with running status
39
151
  execution = create_running_execution(started_at)
@@ -43,44 +155,69 @@ module RubyLLM
43
155
  result = yield
44
156
 
45
157
  # Update to success
46
- complete_execution(
47
- execution,
48
- completed_at: Time.current,
49
- status: "success",
50
- response: @last_response
51
- )
52
- @execution_status_updated = true
158
+ # NOTE: If this fails, we capture the error but DON'T re-raise
159
+ # The ensure block will handle it via mark_execution_failed!
160
+ begin
161
+ complete_execution(
162
+ execution,
163
+ completed_at: Time.current,
164
+ status: "success",
165
+ response: @last_response
166
+ )
167
+ @status_update_completed = true
168
+ rescue StandardError => e
169
+ completion_error = e
170
+ # Don't re-raise - let ensure block handle via mark_execution_failed!
171
+ end
53
172
 
54
173
  result
55
174
  rescue Timeout::Error => e
56
- original_error = e
57
- complete_execution(
58
- execution,
59
- completed_at: Time.current,
60
- status: "timeout",
61
- error: e
62
- )
63
- @execution_status_updated = true
175
+ raised_exception = e
176
+ begin
177
+ complete_execution(
178
+ execution,
179
+ completed_at: Time.current,
180
+ status: "timeout",
181
+ error: e
182
+ )
183
+ @status_update_completed = true
184
+ rescue StandardError => completion_err
185
+ completion_error = completion_err
186
+ end
64
187
  raise
65
- rescue => e
66
- original_error = e
67
- complete_execution(
68
- execution,
69
- completed_at: Time.current,
70
- status: "error",
71
- error: e
72
- )
73
- @execution_status_updated = true
188
+ rescue StandardError => e
189
+ raised_exception = e
190
+ begin
191
+ complete_execution(
192
+ execution,
193
+ completed_at: Time.current,
194
+ status: "error",
195
+ error: e
196
+ )
197
+ @status_update_completed = true
198
+ rescue StandardError => completion_err
199
+ completion_error = completion_err
200
+ end
74
201
  raise
75
202
  ensure
76
- # Guarantee execution is marked as error if complete_execution failed
77
- unless @execution_status_updated
78
- mark_execution_failed!(execution, error: original_error)
203
+ # Emergency fallback: mark as error if complete_execution itself failed
204
+ # This ensures executions never remain stuck in 'running' status
205
+ unless @status_update_completed
206
+ # Prefer completion_error (from update! failure) over raised_exception (from execution)
207
+ # Use $! as final fallback - it holds the current exception being propagated
208
+ actual_error = completion_error || raised_exception || $!
209
+ mark_execution_failed!(execution, error: actual_error)
79
210
  end
80
211
  end
81
212
  end
82
213
 
83
- # Store response for metrics extraction
214
+ # Stores the LLM response for metrics extraction
215
+ #
216
+ # Called by the agent after receiving a response from the LLM.
217
+ # The response is used to extract token counts and model information.
218
+ #
219
+ # @param response [RubyLLM::Message] The response from the LLM
220
+ # @return [RubyLLM::Message] The same response (for method chaining)
84
221
  def capture_response(response)
85
222
  @last_response = response
86
223
  response
@@ -88,8 +225,15 @@ module RubyLLM
88
225
 
89
226
  private
90
227
 
91
- # Create execution record with running status at start
92
- def create_running_execution(started_at)
228
+ # Creates initial execution record with 'running' status
229
+ #
230
+ # @param started_at [Time] When the execution started
231
+ # @param fallback_chain [Array<String>] Optional list of models in fallback chain
232
+ # @return [RubyLLM::Agents::Execution, nil] The created record, or nil on failure
233
+ def create_running_execution(started_at, fallback_chain: [])
234
+ config = RubyLLM::Agents.configuration
235
+ metadata = execution_metadata
236
+
93
237
  execution_data = {
94
238
  agent_type: self.class.name,
95
239
  agent_version: self.class.version,
@@ -97,20 +241,45 @@ module RubyLLM
97
241
  temperature: temperature,
98
242
  started_at: started_at,
99
243
  status: "running",
100
- parameters: sanitized_parameters,
101
- metadata: execution_metadata,
102
- system_prompt: safe_system_prompt,
103
- user_prompt: safe_user_prompt
244
+ parameters: redacted_parameters,
245
+ metadata: metadata,
246
+ system_prompt: config.persist_prompts ? redacted_system_prompt : nil,
247
+ user_prompt: config.persist_prompts ? redacted_user_prompt : nil,
248
+ streaming: self.class.streaming
104
249
  }
105
250
 
251
+ # Extract tracing fields from metadata if present
252
+ execution_data[:request_id] = metadata[:request_id] if metadata[:request_id]
253
+ execution_data[:trace_id] = metadata[:trace_id] if metadata[:trace_id]
254
+ execution_data[:span_id] = metadata[:span_id] if metadata[:span_id]
255
+ execution_data[:parent_execution_id] = metadata[:parent_execution_id] if metadata[:parent_execution_id]
256
+ execution_data[:root_execution_id] = metadata[:root_execution_id] if metadata[:root_execution_id]
257
+
258
+ # Add fallback chain if provided (for reliability-enabled executions)
259
+ if fallback_chain.any?
260
+ execution_data[:fallback_chain] = fallback_chain
261
+ execution_data[:attempts] = []
262
+ execution_data[:attempts_count] = 0
263
+ end
264
+
106
265
  RubyLLM::Agents::Execution.create!(execution_data)
107
266
  rescue StandardError => e
108
- # Log error but don't fail the execution
267
+ # Log error but don't fail the agent execution itself
109
268
  Rails.logger.error("[RubyLLM::Agents] Failed to create execution record: #{e.message}")
110
269
  nil
111
270
  end
112
271
 
113
- # Update execution record on completion
272
+ # Updates execution record with completion data
273
+ #
274
+ # Calculates duration, extracts response metrics, and saves final status.
275
+ # Falls back to legacy logging if the initial execution record is nil.
276
+ #
277
+ # @param execution [Execution, nil] The execution record to update
278
+ # @param completed_at [Time] When the execution completed
279
+ # @param status [String] Final status ("success", "error", "timeout")
280
+ # @param response [RubyLLM::Message, nil] The LLM response (if successful)
281
+ # @param error [Exception, nil] The exception (if failed)
282
+ # @return [void]
114
283
  def complete_execution(execution, completed_at:, status:, response: nil, error: nil)
115
284
  return legacy_log_execution(completed_at: completed_at, status: status, response: response, error: error) unless execution
116
285
 
@@ -123,6 +292,9 @@ module RubyLLM
123
292
  status: status
124
293
  }
125
294
 
295
+ # Add streaming metrics if available
296
+ update_data[:time_to_first_token_ms] = time_to_first_token_ms if respond_to?(:time_to_first_token_ms) && time_to_first_token_ms
297
+
126
298
  # Add response data if available (using safe extraction)
127
299
  response_data = safe_extract_response_data(response)
128
300
  if response_data.any?
@@ -149,12 +321,113 @@ module RubyLLM
149
321
  Rails.logger.warn("[RubyLLM::Agents] Cost calculation failed: #{cost_error.message}")
150
322
  end
151
323
  end
324
+ rescue ActiveRecord::RecordInvalid => e
325
+ Rails.logger.error("[RubyLLM::Agents] Validation failed for execution #{execution&.id}: #{e.record.errors.full_messages.join(', ')}")
326
+ if Rails.env.development? || Rails.env.test?
327
+ Rails.logger.error("[RubyLLM::Agents] Update data: #{update_data.inspect}")
328
+ end
329
+ raise
152
330
  rescue StandardError => e
153
- Rails.logger.error("[RubyLLM::Agents] Failed to update execution record: #{e.message}")
154
- raise # Re-raise so ensure block can handle emergency update
331
+ Rails.logger.error("[RubyLLM::Agents] Failed to update execution record #{execution&.id}: #{e.class}: #{e.message}")
332
+ if Rails.env.development? || Rails.env.test?
333
+ Rails.logger.error("[RubyLLM::Agents] Update data: #{update_data.inspect}")
334
+ end
335
+ raise
155
336
  end
156
337
 
157
- # Fallback for when initial execution creation failed
338
+ # Updates execution record with completion data and attempt tracking
339
+ #
340
+ # Similar to complete_execution but handles multi-attempt scenarios with
341
+ # aggregated token counts and costs from all attempts.
342
+ #
343
+ # @param execution [Execution, nil] The execution record to update
344
+ # @param attempt_tracker [AttemptTracker] The attempt tracker with attempt data
345
+ # @param completed_at [Time] When the execution completed
346
+ # @param status [String] Final status ("success", "error", "timeout")
347
+ # @param error [Exception, nil] The exception (if failed)
348
+ # @return [void]
349
+ def complete_execution_with_attempts(execution, attempt_tracker:, completed_at:, status:, error: nil)
350
+ return unless execution
351
+
352
+ started_at = execution.started_at
353
+ duration_ms = ((completed_at - started_at) * 1000).round
354
+
355
+ config = RubyLLM::Agents.configuration
356
+
357
+ update_data = {
358
+ completed_at: completed_at,
359
+ duration_ms: duration_ms,
360
+ status: status,
361
+ attempts: attempt_tracker.to_json_array,
362
+ attempts_count: attempt_tracker.attempts_count,
363
+ chosen_model_id: attempt_tracker.chosen_model_id,
364
+ input_tokens: attempt_tracker.total_input_tokens,
365
+ output_tokens: attempt_tracker.total_output_tokens,
366
+ total_tokens: attempt_tracker.total_tokens,
367
+ cached_tokens: attempt_tracker.total_cached_tokens
368
+ }
369
+
370
+ # Add streaming metrics if available
371
+ update_data[:time_to_first_token_ms] = time_to_first_token_ms if respond_to?(:time_to_first_token_ms) && time_to_first_token_ms
372
+
373
+ # Add finish reason from response if available
374
+ if @last_response
375
+ finish_reason = safe_extract_finish_reason(@last_response)
376
+ update_data[:finish_reason] = finish_reason if finish_reason
377
+ end
378
+
379
+ # Add routing/retry tracking fields
380
+ routing_data = extract_routing_data(attempt_tracker, error)
381
+ update_data.merge!(routing_data)
382
+
383
+ # Add response data if we have a last response
384
+ if @last_response && config.persist_responses
385
+ update_data[:response] = redacted_response(@last_response)
386
+ end
387
+
388
+ # Add error data if failed
389
+ if error
390
+ update_data.merge!(
391
+ error_message: error.message.to_s.truncate(65535),
392
+ error_class: error.class.name
393
+ )
394
+ end
395
+
396
+ execution.update!(update_data)
397
+
398
+ # Calculate costs from all attempts
399
+ if attempt_tracker.attempts_count > 0
400
+ begin
401
+ execution.aggregate_attempt_costs!
402
+ execution.save!
403
+ rescue StandardError => cost_error
404
+ Rails.logger.warn("[RubyLLM::Agents] Cost calculation failed: #{cost_error.message}")
405
+ end
406
+ end
407
+ rescue ActiveRecord::RecordInvalid => e
408
+ Rails.logger.error("[RubyLLM::Agents] Validation failed for execution #{execution&.id}: #{e.record.errors.full_messages.join(', ')}")
409
+ if Rails.env.development? || Rails.env.test?
410
+ Rails.logger.error("[RubyLLM::Agents] Update data: #{update_data.inspect}")
411
+ end
412
+ raise
413
+ rescue StandardError => e
414
+ Rails.logger.error("[RubyLLM::Agents] Failed to update execution record #{execution&.id}: #{e.class}: #{e.message}")
415
+ if Rails.env.development? || Rails.env.test?
416
+ Rails.logger.error("[RubyLLM::Agents] Update data: #{update_data.inspect}")
417
+ end
418
+ raise
419
+ end
420
+
421
+ # Fallback logging when initial execution record creation failed
422
+ #
423
+ # Creates execution via background job or synchronously based on configuration.
424
+ # Used as a last resort to ensure execution data is captured.
425
+ #
426
+ # @param completed_at [Time] When the execution completed
427
+ # @param status [String] Final status
428
+ # @param response [RubyLLM::Message, nil] The LLM response
429
+ # @param error [Exception, nil] The exception if failed
430
+ # @return [void]
158
431
  def legacy_log_execution(completed_at:, status:, response: nil, error: nil)
159
432
  execution_data = {
160
433
  agent_type: self.class.name,
@@ -192,37 +465,72 @@ module RubyLLM
192
465
  end
193
466
  end
194
467
 
195
- # Sanitize parameters to remove sensitive data
468
+ # Sanitizes parameters by removing sensitive data
469
+ #
470
+ # @deprecated Use {#redacted_parameters} instead
471
+ # @return [Hash] Sanitized parameters safe for logging
196
472
  def sanitized_parameters
197
- params = @options.dup
198
-
199
- # Remove sensitive keys
200
- sensitive_keys = %i[password token api_key secret credential auth key]
201
- sensitive_keys.each { |key| params.delete(key) }
202
-
203
- # Convert ActiveRecord objects to IDs
204
- params.transform_values do |value|
205
- case value
206
- when defined?(ActiveRecord::Base) && ActiveRecord::Base
207
- { id: value.id, type: value.class.name }
208
- when Array
209
- if value.first.is_a?(ActiveRecord::Base)
210
- { ids: value.first(10).map(&:id), type: value.first.class.name, count: value.size }
211
- else
212
- value.first(10)
213
- end
214
- else
215
- value
216
- end
217
- end
473
+ redacted_parameters
474
+ end
475
+
476
+ # Returns parameters with sensitive data redacted using the Redactor
477
+ #
478
+ # Uses the configured redaction rules to remove sensitive fields and
479
+ # apply pattern-based redaction. Also converts ActiveRecord objects
480
+ # to ID references.
481
+ #
482
+ # @return [Hash] Redacted parameters safe for logging
483
+ def redacted_parameters
484
+ params = @options.except(:skip_cache, :dry_run)
485
+ Redactor.redact(params)
486
+ end
487
+
488
+ # Returns the system prompt with redaction applied
489
+ #
490
+ # @return [String, nil] The redacted system prompt
491
+ def redacted_system_prompt
492
+ prompt = safe_system_prompt
493
+ return nil unless prompt
494
+
495
+ Redactor.redact_string(prompt)
496
+ end
497
+
498
+ # Returns the user prompt with redaction applied
499
+ #
500
+ # @return [String, nil] The redacted user prompt
501
+ def redacted_user_prompt
502
+ prompt = safe_user_prompt
503
+ return nil unless prompt
504
+
505
+ Redactor.redact_string(prompt)
506
+ end
507
+
508
+ # Returns the response with redaction applied
509
+ #
510
+ # @param response [RubyLLM::Message] The LLM response
511
+ # @return [Hash] Redacted response data
512
+ def redacted_response(response)
513
+ data = safe_serialize_response(response)
514
+ Redactor.redact(data)
218
515
  end
219
516
 
220
- # Hook for subclasses to add custom metadata
517
+ # Hook for subclasses to add custom metadata to executions
518
+ #
519
+ # Override this method in your agent to include application-specific
520
+ # data like user IDs, request IDs, or feature flags.
521
+ #
522
+ # @return [Hash] Custom metadata to store with the execution
523
+ # @example
524
+ # def execution_metadata
525
+ # { user_id: Current.user&.id, experiment: "v2" }
526
+ # end
221
527
  def execution_metadata
222
528
  {}
223
529
  end
224
530
 
225
- # Safely capture system prompt (may raise or return nil)
531
+ # Safely captures system prompt, handling errors gracefully
532
+ #
533
+ # @return [String, nil] The system prompt or nil if unavailable
226
534
  def safe_system_prompt
227
535
  respond_to?(:system_prompt) ? system_prompt.to_s : nil
228
536
  rescue StandardError => e
@@ -230,7 +538,9 @@ module RubyLLM
230
538
  nil
231
539
  end
232
540
 
233
- # Safely capture user prompt (may raise or return nil)
541
+ # Safely captures user prompt, handling errors gracefully
542
+ #
543
+ # @return [String, nil] The user prompt or nil if unavailable
234
544
  def safe_user_prompt
235
545
  respond_to?(:user_prompt) ? user_prompt.to_s : nil
236
546
  rescue StandardError => e
@@ -238,7 +548,12 @@ module RubyLLM
238
548
  nil
239
549
  end
240
550
 
241
- # Safely extract a value from response, returning default if method doesn't exist
551
+ # Safely extracts a value from response object
552
+ #
553
+ # @param response [Object] The response object
554
+ # @param method [Symbol] The method to call
555
+ # @param default [Object] Default value if method unavailable
556
+ # @return [Object] The extracted value or default
242
557
  def safe_response_value(response, method, default = nil)
243
558
  return default unless response.respond_to?(method)
244
559
  response.public_send(method)
@@ -246,7 +561,10 @@ module RubyLLM
246
561
  default
247
562
  end
248
563
 
249
- # Safely extract all response data with fallbacks
564
+ # Extracts all response metrics with safe fallbacks
565
+ #
566
+ # @param response [RubyLLM::Message, nil] The LLM response
567
+ # @return [Hash] Extracted response data (empty if response invalid)
250
568
  def safe_extract_response_data(response)
251
569
  return {} unless response.is_a?(RubyLLM::Message)
252
570
 
@@ -256,11 +574,120 @@ module RubyLLM
256
574
  cached_tokens: safe_response_value(response, :cached_tokens, 0),
257
575
  cache_creation_tokens: safe_response_value(response, :cache_creation_tokens, 0),
258
576
  model_id: safe_response_value(response, :model_id),
577
+ finish_reason: safe_extract_finish_reason(response),
259
578
  response: safe_serialize_response(response)
260
579
  }.compact
261
580
  end
262
581
 
263
- # Safe version of serialize_response
582
+ # Extracts finish reason from response, normalizing to standard values
583
+ #
584
+ # @param response [RubyLLM::Message] The LLM response
585
+ # @return [String, nil] Normalized finish reason
586
+ def safe_extract_finish_reason(response)
587
+ reason = safe_response_value(response, :finish_reason) ||
588
+ safe_response_value(response, :stop_reason)
589
+ return nil unless reason
590
+
591
+ # Normalize to standard values
592
+ normalized = reason.to_s.downcase
593
+ case normalized
594
+ when "stop", "end_turn", "stop_sequence"
595
+ "stop"
596
+ when "length", "max_tokens"
597
+ "length"
598
+ when "content_filter", "safety"
599
+ "content_filter"
600
+ when "tool_calls", "tool_use", "function_call"
601
+ "tool_calls"
602
+ else
603
+ "other"
604
+ end
605
+ end
606
+
607
+ # Extracts routing/retry tracking data from attempt tracker
608
+ #
609
+ # Analyzes the execution attempts to determine:
610
+ # - Why a fallback was used (fallback_reason)
611
+ # - Whether the error is retryable
612
+ # - Whether rate limiting occurred
613
+ #
614
+ # @param attempt_tracker [AttemptTracker] The attempt tracker
615
+ # @param error [Exception, nil] The final error (if any)
616
+ # @return [Hash] Routing data to merge into execution
617
+ def extract_routing_data(attempt_tracker, error)
618
+ data = {}
619
+
620
+ # Determine if a fallback was used and why
621
+ if attempt_tracker.used_fallback?
622
+ data[:fallback_reason] = determine_fallback_reason(attempt_tracker)
623
+ end
624
+
625
+ # Check if error is retryable
626
+ if error
627
+ data[:retryable] = retryable_error?(error)
628
+ data[:rate_limited] = rate_limit_error?(error)
629
+ end
630
+
631
+ data
632
+ end
633
+
634
+ # Determines the reason for using a fallback model
635
+ #
636
+ # @param attempt_tracker [AttemptTracker] The attempt tracker
637
+ # @return [String] Fallback reason
638
+ def determine_fallback_reason(attempt_tracker)
639
+ # Analyze failed attempts to determine why fallback was needed
640
+ failed = attempt_tracker.failed_attempts
641
+ return "other" if failed.empty?
642
+
643
+ last_failed = failed.last
644
+ error_class = last_failed[:error_class]
645
+
646
+ case error_class
647
+ when /RateLimitError/, /TooManyRequestsError/
648
+ "rate_limit"
649
+ when /Timeout/
650
+ "timeout"
651
+ when /ContentFilter/, /SafetyError/
652
+ "safety"
653
+ when /BudgetExceeded/
654
+ "price_limit"
655
+ else
656
+ "error"
657
+ end
658
+ end
659
+
660
+ # Checks if an error is retryable
661
+ #
662
+ # @param error [Exception] The error
663
+ # @return [Boolean] true if retryable
664
+ def retryable_error?(error)
665
+ return false unless error
666
+
667
+ # Check against known retryable error patterns
668
+ error_class = error.class.name
669
+ error_class.match?(/Timeout|ConnectionError|RateLimitError|ServiceUnavailable|BadGateway/)
670
+ end
671
+
672
+ # Checks if an error indicates rate limiting
673
+ #
674
+ # @param error [Exception] The error
675
+ # @return [Boolean] true if rate limited
676
+ def rate_limit_error?(error)
677
+ return false unless error
678
+
679
+ error_class = error.class.name
680
+ error_message = error.message.to_s.downcase
681
+
682
+ error_class.match?(/RateLimitError|TooManyRequests/) ||
683
+ error_message.include?("rate limit") ||
684
+ error_message.include?("too many requests")
685
+ end
686
+
687
+ # Serializes response to a hash for storage
688
+ #
689
+ # @param response [RubyLLM::Message] The LLM response
690
+ # @return [Hash] Serialized response data
264
691
  def safe_serialize_response(response)
265
692
  {
266
693
  content: safe_response_value(response, :content),
@@ -272,17 +699,41 @@ module RubyLLM
272
699
  }.compact
273
700
  end
274
701
 
275
- # Emergency fallback - mark execution as error using update_columns
276
- # Bypasses callbacks/validations to ensure status is always updated
702
+ # Emergency fallback to mark execution as failed
703
+ #
704
+ # Uses update_all to bypass ActiveRecord callbacks and validations,
705
+ # ensuring the status is updated even if the model is in an invalid state.
706
+ # Only updates records that are still in 'running' status to prevent
707
+ # race conditions.
708
+ #
709
+ # @param execution [Execution, nil] The execution record
710
+ # @param error [Exception, nil] The exception that caused the failure
711
+ # @return [void]
277
712
  def mark_execution_failed!(execution, error: nil)
278
713
  return unless execution&.id
279
714
  return unless execution.status == "running"
280
715
 
716
+ # If no error was captured, create a synthetic one with current stack trace
717
+ # This helps debug cases where error details are lost
718
+ if error.nil?
719
+ Rails.logger.error("[RubyLLM::Agents] BUG: mark_execution_failed! called with nil error")
720
+ Rails.logger.error("[RubyLLM::Agents] Stack trace:\n #{caller.first(15).join("\n ")}")
721
+
722
+ synthetic_error = RuntimeError.new("No error was captured - check logs for stack trace")
723
+ synthetic_error.set_backtrace(caller)
724
+ error = synthetic_error
725
+ end
726
+
727
+ # Build a detailed error message including backtrace for debugging
728
+ backtrace_info = error.backtrace&.first(5)&.join("\n ") || ""
729
+ error_message = "#{error.class}: #{error.message}"
730
+ error_message += "\n #{backtrace_info}" if backtrace_info.present?
731
+
281
732
  update_data = {
282
733
  status: "error",
283
734
  completed_at: Time.current,
284
- error_class: error&.class&.name || "InstrumentationError",
285
- error_message: (error&.message || "Execution status update failed").to_s.truncate(65535)
735
+ error_class: error.class.name,
736
+ error_message: error_message.to_s.truncate(65535)
286
737
  }
287
738
 
288
739
  execution.class.where(id: execution.id, status: "running").update_all(update_data)