ruby_llm-agents 3.5.5 → 3.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +21 -0
  3. data/app/controllers/ruby_llm/agents/dashboard_controller.rb +155 -10
  4. data/app/controllers/ruby_llm/agents/executions_controller.rb +1 -3
  5. data/app/helpers/ruby_llm/agents/application_helper.rb +15 -28
  6. data/app/models/ruby_llm/agents/execution/replayable.rb +124 -0
  7. data/app/models/ruby_llm/agents/execution/scopes.rb +42 -1
  8. data/app/models/ruby_llm/agents/execution.rb +50 -1
  9. data/app/models/ruby_llm/agents/tenant/budgetable.rb +28 -4
  10. data/app/views/layouts/ruby_llm/agents/application.html.erb +41 -28
  11. data/app/views/ruby_llm/agents/agents/show.html.erb +16 -1
  12. data/app/views/ruby_llm/agents/dashboard/_top_tenants.html.erb +47 -0
  13. data/app/views/ruby_llm/agents/dashboard/index.html.erb +404 -107
  14. data/app/views/ruby_llm/agents/system_config/show.html.erb +0 -13
  15. data/lib/generators/ruby_llm_agents/rename_agent_generator.rb +53 -0
  16. data/lib/generators/ruby_llm_agents/templates/initializer.rb.tt +0 -15
  17. data/lib/generators/ruby_llm_agents/templates/rename_agent_migration.rb.tt +19 -0
  18. data/lib/ruby_llm/agents/agent_tool.rb +125 -0
  19. data/lib/ruby_llm/agents/audio/speaker.rb +5 -3
  20. data/lib/ruby_llm/agents/audio/speech_pricing.rb +63 -187
  21. data/lib/ruby_llm/agents/audio/transcriber.rb +5 -3
  22. data/lib/ruby_llm/agents/audio/transcription_pricing.rb +5 -7
  23. data/lib/ruby_llm/agents/base_agent.rb +144 -5
  24. data/lib/ruby_llm/agents/core/configuration.rb +178 -53
  25. data/lib/ruby_llm/agents/core/errors.rb +3 -77
  26. data/lib/ruby_llm/agents/core/instrumentation.rb +0 -17
  27. data/lib/ruby_llm/agents/core/version.rb +1 -1
  28. data/lib/ruby_llm/agents/dsl/base.rb +0 -8
  29. data/lib/ruby_llm/agents/dsl/queryable.rb +124 -0
  30. data/lib/ruby_llm/agents/dsl.rb +1 -0
  31. data/lib/ruby_llm/agents/eval/eval_result.rb +73 -0
  32. data/lib/ruby_llm/agents/eval/eval_run.rb +124 -0
  33. data/lib/ruby_llm/agents/eval/eval_suite.rb +264 -0
  34. data/lib/ruby_llm/agents/eval.rb +5 -0
  35. data/lib/ruby_llm/agents/image/concerns/image_operation_execution.rb +2 -1
  36. data/lib/ruby_llm/agents/image/generator/pricing.rb +75 -217
  37. data/lib/ruby_llm/agents/image/generator.rb +5 -3
  38. data/lib/ruby_llm/agents/infrastructure/attempt_tracker.rb +8 -0
  39. data/lib/ruby_llm/agents/infrastructure/circuit_breaker.rb +4 -2
  40. data/lib/ruby_llm/agents/pipeline/builder.rb +43 -0
  41. data/lib/ruby_llm/agents/pipeline/context.rb +11 -1
  42. data/lib/ruby_llm/agents/pipeline/executor.rb +1 -25
  43. data/lib/ruby_llm/agents/pipeline/middleware/budget.rb +26 -1
  44. data/lib/ruby_llm/agents/pipeline/middleware/cache.rb +18 -0
  45. data/lib/ruby_llm/agents/pipeline/middleware/instrumentation.rb +90 -0
  46. data/lib/ruby_llm/agents/pipeline/middleware/reliability.rb +29 -0
  47. data/lib/ruby_llm/agents/pipeline/middleware/tenant.rb +11 -4
  48. data/lib/ruby_llm/agents/pipeline.rb +0 -92
  49. data/lib/ruby_llm/agents/results/background_removal_result.rb +11 -1
  50. data/lib/ruby_llm/agents/results/base.rb +23 -1
  51. data/lib/ruby_llm/agents/results/embedding_result.rb +14 -1
  52. data/lib/ruby_llm/agents/results/image_analysis_result.rb +11 -1
  53. data/lib/ruby_llm/agents/results/image_edit_result.rb +11 -1
  54. data/lib/ruby_llm/agents/results/image_generation_result.rb +12 -3
  55. data/lib/ruby_llm/agents/results/image_pipeline_result.rb +11 -1
  56. data/lib/ruby_llm/agents/results/image_transform_result.rb +11 -1
  57. data/lib/ruby_llm/agents/results/image_upscale_result.rb +11 -1
  58. data/lib/ruby_llm/agents/results/image_variation_result.rb +11 -1
  59. data/lib/ruby_llm/agents/results/speech_result.rb +20 -1
  60. data/lib/ruby_llm/agents/results/transcription_result.rb +20 -1
  61. data/lib/ruby_llm/agents/text/embedder.rb +23 -18
  62. data/lib/ruby_llm/agents.rb +73 -5
  63. data/lib/tasks/ruby_llm_agents.rake +21 -0
  64. metadata +11 -6
  65. data/lib/ruby_llm/agents/infrastructure/reliability/breaker_manager.rb +0 -80
  66. data/lib/ruby_llm/agents/infrastructure/reliability/execution_constraints.rb +0 -69
  67. data/lib/ruby_llm/agents/infrastructure/reliability/executor.rb +0 -125
  68. data/lib/ruby_llm/agents/infrastructure/reliability/fallback_routing.rb +0 -72
  69. data/lib/ruby_llm/agents/infrastructure/reliability/retry_strategy.rb +0 -82
@@ -0,0 +1,124 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Agents
5
+ module DSL
6
+ # Adds execution querying capabilities to agent classes.
7
+ #
8
+ # Mixed into BaseAgent via `extend DSL::Queryable`, making all methods
9
+ # available as class methods on agent classes.
10
+ #
11
+ # @example Basic queries
12
+ # SupportAgent.executions.successful.recent
13
+ # SupportAgent.executions.today.expensive(0.50)
14
+ #
15
+ # @example Convenience methods
16
+ # SupportAgent.last_run
17
+ # SupportAgent.stats
18
+ # SupportAgent.total_spent(since: 1.week)
19
+ #
20
+ module Queryable
21
+ # Returns an ActiveRecord::Relation scoped to this agent's executions.
22
+ #
23
+ # @return [ActiveRecord::Relation]
24
+ #
25
+ # @example
26
+ # SupportAgent.executions.successful.last(5)
27
+ # SupportAgent.executions.where("total_cost > ?", 0.01)
28
+ #
29
+ def executions
30
+ RubyLLM::Agents::Execution.by_agent(name)
31
+ end
32
+
33
+ # Returns the most recent execution for this agent.
34
+ #
35
+ # @return [RubyLLM::Agents::Execution, nil]
36
+ #
37
+ def last_run
38
+ executions.order(created_at: :desc).first
39
+ end
40
+
41
+ # Returns recent failed executions.
42
+ #
43
+ # @param since [ActiveSupport::Duration] Time window (default: 24.hours)
44
+ # @return [ActiveRecord::Relation]
45
+ #
46
+ def failures(since: 24.hours)
47
+ executions.failed.where("created_at > ?", since.ago)
48
+ end
49
+
50
+ # Returns total cost spent by this agent.
51
+ #
52
+ # @param since [ActiveSupport::Duration, nil] Optional time window
53
+ # @return [BigDecimal] Total cost in USD
54
+ #
55
+ def total_spent(since: nil)
56
+ scope = executions
57
+ scope = scope.where("created_at > ?", since.ago) if since
58
+ scope.sum(:total_cost)
59
+ end
60
+
61
+ # Returns a stats summary hash for this agent.
62
+ #
63
+ # @param since [ActiveSupport::Duration, nil] Time window
64
+ # @return [Hash] Stats summary
65
+ #
66
+ def stats(since: nil)
67
+ scope = executions
68
+ scope = scope.where("created_at > ?", since.ago) if since
69
+
70
+ total = scope.count
71
+ successful = scope.successful.count
72
+
73
+ {
74
+ total: total,
75
+ successful: successful,
76
+ failed: scope.failed.count,
77
+ success_rate: total.zero? ? 0.0 : (successful.to_f / total * 100).round(1),
78
+ avg_duration_ms: scope.average(:duration_ms)&.round,
79
+ avg_cost: total.zero? ? 0 : (scope.sum(:total_cost).to_f / total).round(6),
80
+ total_cost: scope.sum(:total_cost),
81
+ total_tokens: scope.sum(:total_tokens),
82
+ avg_tokens: scope.average(:total_tokens)&.round
83
+ }
84
+ end
85
+
86
+ # Returns cost breakdown by model for this agent.
87
+ #
88
+ # @param since [ActiveSupport::Duration, nil] Time window
89
+ # @return [Hash{String => Hash}] Costs per model
90
+ #
91
+ def cost_by_model(since: nil)
92
+ scope = executions
93
+ scope = scope.where("created_at > ?", since.ago) if since
94
+
95
+ scope.group(:model_id).pluck(
96
+ :model_id,
97
+ Arel.sql("COUNT(*)"),
98
+ Arel.sql("SUM(total_cost)"),
99
+ Arel.sql("AVG(total_cost)")
100
+ ).each_with_object({}) do |(model, count, total, avg), hash|
101
+ hash[model] = {
102
+ count: count,
103
+ total_cost: total&.to_f&.round(6) || 0,
104
+ avg_cost: avg&.to_f&.round(6) || 0
105
+ }
106
+ end
107
+ end
108
+
109
+ # Returns executions matching specific parameter values.
110
+ #
111
+ # @param params [Hash] Parameter key-value pairs to match
112
+ # @return [ActiveRecord::Relation]
113
+ #
114
+ def with_params(**params)
115
+ scope = executions
116
+ params.each do |key, value|
117
+ scope = scope.with_parameter(key, value)
118
+ end
119
+ scope
120
+ end
121
+ end
122
+ end
123
+ end
124
+ end
@@ -3,6 +3,7 @@
3
3
  require_relative "dsl/base"
4
4
  require_relative "dsl/reliability"
5
5
  require_relative "dsl/caching"
6
+ require_relative "dsl/queryable"
6
7
 
7
8
  module RubyLLM
8
9
  module Agents
@@ -0,0 +1,73 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Agents
5
+ module Eval
6
+ # Holds the result of evaluating a single test case.
7
+ #
8
+ # Contains the test case definition, the agent's result, the score,
9
+ # and any error that occurred during execution.
10
+ class EvalResult
11
+ attr_reader :test_case, :agent_result, :score, :execution_id, :error
12
+
13
+ def initialize(test_case:, agent_result:, score:, execution_id: nil, error: nil)
14
+ @test_case = test_case
15
+ @agent_result = agent_result
16
+ @score = score
17
+ @execution_id = execution_id
18
+ @error = error
19
+ end
20
+
21
+ def test_case_name
22
+ test_case.name
23
+ end
24
+
25
+ def input
26
+ test_case.input
27
+ end
28
+
29
+ def expected
30
+ test_case.expected
31
+ end
32
+
33
+ def passed?(threshold = 0.5)
34
+ score.passed?(threshold)
35
+ end
36
+
37
+ def failed?(threshold = 0.5)
38
+ score.failed?(threshold)
39
+ end
40
+
41
+ def errored?
42
+ !error.nil?
43
+ end
44
+
45
+ def actual
46
+ return nil unless agent_result
47
+
48
+ if agent_result.respond_to?(:route)
49
+ {route: agent_result.route}
50
+ elsif agent_result.respond_to?(:content)
51
+ agent_result.content
52
+ else
53
+ agent_result
54
+ end
55
+ end
56
+
57
+ def to_h
58
+ {
59
+ name: test_case_name,
60
+ score: score.value,
61
+ reason: score.reason,
62
+ passed: passed?,
63
+ input: input,
64
+ expected: expected,
65
+ actual: actual,
66
+ execution_id: execution_id,
67
+ error: error&.message
68
+ }
69
+ end
70
+ end
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,124 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Agents
5
+ module Eval
6
+ # Aggregate results from running an eval suite.
7
+ #
8
+ # Provides score calculation, pass/fail counts, failure details,
9
+ # and a formatted summary string.
10
+ class EvalRun
11
+ attr_reader :suite, :results, :model, :pass_threshold,
12
+ :started_at, :completed_at
13
+
14
+ def initialize(suite:, results:, model:, pass_threshold:, started_at:, completed_at:)
15
+ @suite = suite
16
+ @results = results
17
+ @model = model
18
+ @pass_threshold = pass_threshold
19
+ @started_at = started_at
20
+ @completed_at = completed_at
21
+ end
22
+
23
+ def agent_class
24
+ suite.respond_to?(:agent_class) ? suite.agent_class : suite
25
+ end
26
+
27
+ # Average score across all test cases (0.0 to 1.0)
28
+ def score
29
+ return 0.0 if results.empty?
30
+
31
+ results.sum { |r| r.score.value } / results.size.to_f
32
+ end
33
+
34
+ def score_pct
35
+ (score * 100).round(1)
36
+ end
37
+
38
+ def total_cases
39
+ results.size
40
+ end
41
+
42
+ def passed
43
+ results.count { |r| r.passed?(pass_threshold) }
44
+ end
45
+
46
+ def failed
47
+ results.count { |r| r.failed?(pass_threshold) }
48
+ end
49
+
50
+ def failures
51
+ results.select { |r| r.failed?(pass_threshold) }
52
+ end
53
+
54
+ def errors
55
+ results.select(&:errored?)
56
+ end
57
+
58
+ def total_cost
59
+ results.sum do |r|
60
+ next 0 unless r.execution_id
61
+
62
+ if defined?(Execution)
63
+ Execution.find_by(id: r.execution_id)&.total_cost || 0
64
+ else
65
+ 0
66
+ end
67
+ end
68
+ rescue
69
+ 0
70
+ end
71
+
72
+ def duration_ms
73
+ return 0 unless started_at && completed_at
74
+
75
+ ((completed_at - started_at) * 1000).to_i
76
+ end
77
+
78
+ def summary
79
+ agent_name = agent_class.respond_to?(:name) ? agent_class.name : agent_class.to_s
80
+ lines = ["#{agent_name} Eval — #{started_at.strftime("%Y-%m-%d %H:%M")}"]
81
+ lines << "Model: #{model} | Score: #{score_pct}% | #{passed}/#{total_cases} passed"
82
+ lines << "Cost: $#{"%.4f" % total_cost} | Duration: #{(duration_ms / 1000.0).round(1)}s"
83
+
84
+ if failures.any?
85
+ lines << ""
86
+ lines << "Failures:"
87
+ failures.each do |r|
88
+ lines << " - #{r.test_case_name}: expected #{r.expected.inspect}, got #{r.actual.inspect} (#{r.score.reason})"
89
+ end
90
+ end
91
+
92
+ if errors.any?
93
+ lines << ""
94
+ lines << "Errors:"
95
+ errors.each do |r|
96
+ lines << " - #{r.test_case_name}: #{r.error.message}"
97
+ end
98
+ end
99
+
100
+ lines.join("\n")
101
+ end
102
+
103
+ def to_h
104
+ {
105
+ agent: agent_class.respond_to?(:name) ? agent_class.name : agent_class.to_s,
106
+ model: model,
107
+ score: score,
108
+ score_pct: score_pct,
109
+ total_cases: total_cases,
110
+ passed: passed,
111
+ failed: failed,
112
+ total_cost: total_cost,
113
+ duration_ms: duration_ms,
114
+ results: results.map(&:to_h)
115
+ }
116
+ end
117
+
118
+ def to_json(*args)
119
+ to_h.to_json(*args)
120
+ end
121
+ end
122
+ end
123
+ end
124
+ end
@@ -0,0 +1,264 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Agents
5
+ module Eval
6
+ # Score value object — returned by every scorer
7
+ Score = Struct.new(:value, :reason, keyword_init: true) do
8
+ def initialize(value:, reason: nil)
9
+ super(value: value.to_f.clamp(0.0, 1.0), reason: reason)
10
+ end
11
+
12
+ def passed?(threshold = 0.5)
13
+ value >= threshold
14
+ end
15
+
16
+ def failed?(threshold = 0.5)
17
+ !passed?(threshold)
18
+ end
19
+ end
20
+
21
+ # A single test case definition
22
+ TestCase = Struct.new(:name, :input, :expected, :scorer, :options, keyword_init: true) do
23
+ def resolve_input
24
+ input.is_a?(Proc) ? input.call : input
25
+ end
26
+ end
27
+
28
+ # Defines test cases for an agent, runs them, scores results.
29
+ #
30
+ # @example
31
+ # class SupportRouter::Eval < RubyLLM::Agents::EvalSuite
32
+ # agent SupportRouter
33
+ # test_case "billing", input: { message: "charged twice" }, expected: { route: :billing }
34
+ # end
35
+ #
36
+ # run = SupportRouter::Eval.run!
37
+ # puts run.summary
38
+ class EvalSuite
39
+ class << self
40
+ attr_reader :agent_class, :test_cases, :eval_options
41
+
42
+ def inherited(subclass)
43
+ super
44
+ subclass.instance_variable_set(:@test_cases, [])
45
+ subclass.instance_variable_set(:@eval_options, {})
46
+ end
47
+
48
+ # --- DSL ---
49
+
50
+ def agent(klass)
51
+ @agent_class = klass
52
+ end
53
+
54
+ def test_case(name, input:, expected: nil, score: nil, **options)
55
+ @test_cases << TestCase.new(
56
+ name: name,
57
+ input: input,
58
+ expected: expected,
59
+ scorer: score,
60
+ options: options
61
+ )
62
+ end
63
+
64
+ def dataset(path)
65
+ full_path = path.start_with?("/") ? path : Rails.root.join(path).to_s
66
+ cases = YAML.safe_load_file(full_path, permitted_classes: [Symbol], symbolize_names: true)
67
+ cases.each do |tc|
68
+ test_case(
69
+ tc[:name],
70
+ input: tc[:input],
71
+ expected: tc[:expected],
72
+ score: tc[:score]&.to_sym,
73
+ **tc.except(:name, :input, :expected, :score)
74
+ )
75
+ end
76
+ end
77
+
78
+ def eval_model(value)
79
+ @eval_options[:model] = value
80
+ end
81
+
82
+ def eval_temperature(value)
83
+ @eval_options[:temperature] = value
84
+ end
85
+
86
+ # --- Running ---
87
+
88
+ def run!(model: nil, only: nil, pass_threshold: 0.5, overrides: {}, **options)
89
+ validate!
90
+ cases = only ? @test_cases.select { |tc| Array(only).include?(tc.name) } : @test_cases
91
+ resolved_model = model || @eval_options[:model]
92
+ temperature = @eval_options[:temperature]
93
+ started_at = Time.current
94
+
95
+ results = cases.map do |tc|
96
+ run_single(tc, model: resolved_model, temperature: temperature, overrides: overrides)
97
+ end
98
+
99
+ EvalRun.new(
100
+ suite: self,
101
+ results: results,
102
+ model: resolved_model || (agent_class.respond_to?(:model) ? agent_class.model : nil),
103
+ pass_threshold: pass_threshold,
104
+ started_at: started_at,
105
+ completed_at: Time.current
106
+ )
107
+ end
108
+
109
+ def validate!
110
+ raise ConfigurationError, "No agent class set" unless @agent_class
111
+ raise ConfigurationError, "No test cases defined" if @test_cases.empty?
112
+
113
+ @test_cases.each do |tc|
114
+ next if tc.input.is_a?(Proc)
115
+ next unless @agent_class.respond_to?(:params)
116
+
117
+ agent_params = @agent_class.params
118
+ required = agent_params.select { |_, v| v[:required] }.keys
119
+ missing = required - tc.input.keys
120
+ if missing.any?
121
+ raise ConfigurationError,
122
+ "Test case '#{tc.name}' missing required params: #{missing.join(", ")}"
123
+ end
124
+ end
125
+ true
126
+ end
127
+
128
+ def for(agent_klass, &block)
129
+ suite = Class.new(self)
130
+ suite.agent(agent_klass)
131
+ suite.instance_eval(&block) if block
132
+ suite
133
+ end
134
+
135
+ private
136
+
137
+ def run_single(tc, model:, temperature:, overrides:)
138
+ input = tc.resolve_input
139
+ call_options = input.dup
140
+ call_options.merge!(overrides) if overrides.any?
141
+ call_options[:model] = model if model
142
+ call_options[:temperature] = temperature if temperature
143
+
144
+ agent_result = agent_class.call(**call_options)
145
+ score = evaluate(tc, agent_result)
146
+
147
+ EvalResult.new(
148
+ test_case: tc,
149
+ agent_result: agent_result,
150
+ score: score,
151
+ execution_id: agent_result.respond_to?(:execution_id) ? agent_result.execution_id : nil
152
+ )
153
+ rescue ArgumentError
154
+ raise
155
+ rescue => e
156
+ EvalResult.new(
157
+ test_case: tc,
158
+ agent_result: nil,
159
+ score: Score.new(value: 0.0, reason: "Error: #{e.class}: #{e.message}"),
160
+ error: e
161
+ )
162
+ end
163
+
164
+ def evaluate(tc, agent_result)
165
+ case tc.scorer
166
+ when Proc
167
+ coerce_score(tc.scorer.call(agent_result, tc.expected))
168
+ when :contains
169
+ score_contains(agent_result, tc.expected)
170
+ when :llm_judge
171
+ score_llm_judge(agent_result, tc)
172
+ when :exact_match, nil
173
+ score_exact_match(agent_result, tc.expected)
174
+ else
175
+ raise ArgumentError, "Unknown scorer: #{tc.scorer}"
176
+ end
177
+ end
178
+
179
+ def coerce_score(value)
180
+ case value
181
+ when Score then value
182
+ when Numeric then Score.new(value: value)
183
+ when true then Score.new(value: 1.0)
184
+ when false then Score.new(value: 0.0)
185
+ else Score.new(value: 0.0, reason: "Scorer returned #{value.class}")
186
+ end
187
+ end
188
+
189
+ # --- Built-in scorers ---
190
+
191
+ def score_exact_match(result, expected)
192
+ actual = extract_comparable(result)
193
+ expected_val = normalize_expected(expected)
194
+
195
+ if actual == expected_val
196
+ Score.new(value: 1.0)
197
+ else
198
+ Score.new(value: 0.0, reason: "Expected #{expected_val.inspect}, got #{actual.inspect}")
199
+ end
200
+ end
201
+
202
+ def score_contains(result, expected)
203
+ content = result.respond_to?(:content) ? result.content.to_s : result.to_s
204
+ targets = Array(expected)
205
+ missing = targets.reject { |e| content.downcase.include?(e.to_s.downcase) }
206
+
207
+ if missing.empty?
208
+ Score.new(value: 1.0)
209
+ else
210
+ Score.new(value: 0.0, reason: "Missing: #{missing.join(", ")}")
211
+ end
212
+ end
213
+
214
+ def score_llm_judge(result, tc)
215
+ content = result.respond_to?(:content) ? result.content.to_s : result.to_s
216
+ criteria = tc.options[:criteria]
217
+ judge_model = tc.options[:judge_model] || "gpt-4o-mini"
218
+
219
+ prompt = <<~PROMPT
220
+ You are evaluating an AI agent's response. Score it from 0 to 10.
221
+
222
+ ## Input
223
+ #{tc.input.inspect}
224
+
225
+ ## Agent Response
226
+ #{content}
227
+
228
+ ## Criteria
229
+ #{criteria}
230
+
231
+ Respond with ONLY a JSON object:
232
+ {"score": <0-10>, "reason": "<brief explanation>"}
233
+ PROMPT
234
+
235
+ chat = RubyLLM.chat(model: judge_model)
236
+ parsed = JSON.parse(chat.ask(prompt).content)
237
+ Score.new(value: parsed["score"].to_f / 10.0, reason: parsed["reason"])
238
+ rescue => e
239
+ Score.new(value: 0.0, reason: "Judge error: #{e.message}")
240
+ end
241
+
242
+ def extract_comparable(result)
243
+ if result.respond_to?(:route)
244
+ {route: result.route}
245
+ elsif result.respond_to?(:content)
246
+ content = result.content
247
+ content.is_a?(Hash) ? content.transform_keys(&:to_sym) : content.to_s.strip
248
+ else
249
+ result
250
+ end
251
+ end
252
+
253
+ def normalize_expected(expected)
254
+ case expected
255
+ when Hash then expected.transform_keys(&:to_sym)
256
+ when String then expected.strip
257
+ else expected
258
+ end
259
+ end
260
+ end
261
+ end
262
+ end
263
+ end
264
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "eval/eval_suite"
4
+ require_relative "eval/eval_result"
5
+ require_relative "eval/eval_run"
@@ -85,7 +85,8 @@ module RubyLLM
85
85
  if config.async_logging && defined?(ExecutionLoggerJob)
86
86
  ExecutionLoggerJob.perform_later(execution_data)
87
87
  else
88
- RubyLLM::Agents::Execution.create!(execution_data)
88
+ execution = RubyLLM::Agents::Execution.create!(execution_data)
89
+ result.execution_id = execution.id if result.respond_to?(:execution_id=)
89
90
  end
90
91
  rescue => e
91
92
  Rails.logger.error("[RubyLLM::Agents] Failed to record #{execution_type} execution: #{e.message}") if defined?(Rails)