braintrust 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,178 +1,22 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "../api"
4
- require_relative "scorer"
5
- require "opentelemetry/sdk"
6
- require "json"
3
+ require_relative "../functions"
7
4
 
8
5
  module Braintrust
9
6
  module Eval
10
- # Functions provides remote function execution capabilities
11
- # Allows calling prompts hosted on Braintrust servers as tasks or scorers
7
+ # @deprecated Use {Braintrust::Functions} instead.
12
8
  module Functions
13
9
  class << self
14
- # Create a task callable that invokes a remote function
15
- # @param project [String] Project name
16
- # @param slug [String] Function slug
17
- # @param state [State, nil] Braintrust state (defaults to global)
18
- # @param tracer_provider [TracerProvider, nil] OpenTelemetry tracer provider
19
- # @return [Proc] Callable that accepts input and returns output
20
- def task(project:, slug:, state: nil, tracer_provider: nil)
21
- state ||= Braintrust.current_state
22
- raise Error, "No state available" unless state
23
-
24
- # Resolve function ID from project + slug
25
- api = API.new(state: state)
26
- function_metadata = resolve_function(api, project, slug)
27
- function_id = function_metadata["id"]
28
- function_name = function_metadata["name"] || slug
29
-
30
- # Get tracer for creating spans
31
- tracer_provider ||= OpenTelemetry.tracer_provider
32
- tracer = tracer_provider.tracer("braintrust.functions")
33
-
34
- # Return a lambda that invokes the remote function with tracing
35
- lambda do |input|
36
- # Create a span for the function invocation
37
- tracer.in_span("function: #{slug}") do |span|
38
- span.set_attribute("braintrust.span_attributes", JSON.dump({type: "function"}))
39
- span.set_attribute("braintrust.input_json", JSON.dump(input))
40
- span.set_attribute("braintrust.function.name", function_name)
41
- span.set_attribute("braintrust.function.id", function_id)
42
- span.set_attribute("braintrust.function.slug", slug)
43
-
44
- begin
45
- # Invoke the function via API
46
- output = api.functions.invoke(id: function_id, input: input)
47
- span.set_attribute("braintrust.output_json", JSON.dump(output))
48
- output
49
- rescue => e
50
- # Record exception and set error status
51
- span.record_exception(e)
52
- span.status = OpenTelemetry::Trace::Status.error(e.message)
53
- raise
54
- end
55
- end
56
- end
57
- end
58
-
59
- # Create a scorer that invokes a remote function by ID
60
- # @param id [String] Function UUID
61
- # @param version [String, nil] Optional version to pin to
62
- # @param state [State, nil] Braintrust state (defaults to global)
63
- # @param tracer_provider [TracerProvider, nil] OpenTelemetry tracer provider
64
- # @return [Scorer] Scorer object that invokes remote function
65
- def scorer_by_id(id:, state: nil, version: nil, tracer_provider: nil)
66
- state ||= Braintrust.current_state
67
- api = API.new(state: state)
68
- api.login
69
-
70
- function_metadata = api.functions.get(id: id, version: version)
71
- function_id = function_metadata["id"]
72
- function_name = function_metadata["name"] || id
73
-
74
- tracer_provider ||= OpenTelemetry.tracer_provider
75
- tracer = tracer_provider.tracer("braintrust.functions")
76
-
77
- build_scorer(function_id: function_id, function_name: function_name, api: api, tracer: tracer)
10
+ # @deprecated Use {Braintrust::Functions.task} instead.
11
+ def task(**kwargs)
12
+ Log.warn_once(:eval_functions_task, "Braintrust::Eval::Functions.task is deprecated: use Braintrust::Functions.task instead.")
13
+ Braintrust::Functions.task(**kwargs)
78
14
  end
79
15
 
80
- # Create a scorer that invokes a remote function
81
- # @param project [String] Project name
82
- # @param slug [String] Function slug
83
- # @param state [State, nil] Braintrust state (defaults to global)
84
- # @param tracer_provider [TracerProvider, nil] OpenTelemetry tracer provider
85
- # @return [Scorer] Scorer object that invokes remote function
86
- def scorer(project:, slug:, state: nil, tracer_provider: nil)
87
- state ||= Braintrust.current_state
88
- raise Error, "No state available" unless state
89
-
90
- # Resolve function ID from project + slug
91
- api = API.new(state: state)
92
- function_metadata = resolve_function(api, project, slug)
93
- function_id = function_metadata["id"]
94
- function_name = function_metadata["name"] || slug
95
-
96
- # Get tracer for creating spans
97
- tracer_provider ||= OpenTelemetry.tracer_provider
98
- tracer = tracer_provider.tracer("braintrust.functions")
99
-
100
- build_scorer(function_id: function_id, function_name: function_name, api: api, tracer: tracer)
101
- end
102
-
103
- private
104
-
105
- # Build a Scorer that invokes a remote function
106
- # Shared implementation used by both scorer and scorer_by_id
107
- # @param function_id [String] Function UUID
108
- # @param function_name [String] Function display name
109
- # @param api [API] Braintrust API client
110
- # @param tracer [OpenTelemetry::Trace::Tracer] Tracer instance
111
- # @return [Scorer]
112
- def build_scorer(function_id:, function_name:, api:, tracer:)
113
- Scorer.new(function_name) do |input, expected, output, metadata|
114
- tracer.in_span("function: #{function_name}") do |span|
115
- scorer_input = {
116
- input: input,
117
- expected: expected,
118
- output: output,
119
- metadata: metadata
120
- }
121
-
122
- span.set_attribute("braintrust.span_attributes", JSON.dump({type: "function"}))
123
- span.set_attribute("braintrust.input_json", JSON.dump(scorer_input))
124
- span.set_attribute("braintrust.function.name", function_name)
125
- span.set_attribute("braintrust.function.id", function_id)
126
-
127
- begin
128
- result = api.functions.invoke(id: function_id, input: scorer_input)
129
-
130
- score = case result
131
- when Numeric
132
- result.to_f
133
- when true
134
- 1.0
135
- when false
136
- 0.0
137
- when Hash
138
- if result.key?("score")
139
- result["score"].to_f
140
- else
141
- raise Error, "Hash result must contain 'score' key"
142
- end
143
- when String
144
- result.to_f
145
- when nil
146
- nil
147
- else
148
- raise Error, "Unsupported result type: #{result.class}"
149
- end
150
-
151
- span.set_attribute("braintrust.output_json", JSON.dump(score))
152
- score
153
- rescue => e
154
- span.record_exception(e)
155
- span.status = OpenTelemetry::Trace::Status.error(e.message)
156
- raise
157
- end
158
- end
159
- end
160
- end
161
-
162
- # Resolve function ID from project name and slug
163
- # @param api [API] API client
164
- # @param project [String] Project name
165
- # @param slug [String] Function slug
166
- # @return [Hash] Function metadata
167
- def resolve_function(api, project, slug)
168
- result = api.functions.list(project_name: project, slug: slug)
169
- functions = result["objects"]
170
-
171
- if functions.nil? || functions.empty?
172
- raise Error, "Function '#{slug}' not found in project '#{project}'"
173
- end
174
-
175
- functions.first
16
+ # @deprecated Use {Braintrust::Functions.scorer} instead.
17
+ def scorer(**kwargs)
18
+ Log.warn_once(:eval_functions_scorer, "Braintrust::Eval::Functions.scorer is deprecated: use Braintrust::Functions.scorer instead.")
19
+ Braintrust::Functions.scorer(**kwargs)
176
20
  end
177
21
  end
178
22
  end
@@ -1,59 +1,52 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative "case"
4
- require_relative "cases"
5
- require_relative "scorer"
6
4
  require_relative "result"
7
5
  require_relative "summary"
6
+ require_relative "trace"
8
7
  require_relative "../internal/thread_pool"
8
+ require_relative "../api/internal/btql"
9
9
 
10
10
  require "opentelemetry/sdk"
11
11
  require "json"
12
12
 
13
13
  module Braintrust
14
14
  module Eval
15
- # Internal runner class that performs the execution of the Eval and returns the result
15
+ # Internal runner class that performs the execution of the Eval and returns the result.
16
+ # Receives a fully-normalized Context — all callables are already typed wrappers.
16
17
  class Runner
17
18
  # Maximum parallelism allowed (mirrors Internal::ThreadPool::MAX_PARALLELISM)
18
19
  MAX_PARALLELISM = Internal::ThreadPool::MAX_PARALLELISM
19
20
 
20
- def initialize(task:, scorers:, experiment_id: nil, experiment_name: nil,
21
- project_id: nil, project_name: nil, state: nil, tracer_provider: nil,
22
- on_progress: nil, parent: nil)
23
- @experiment_id = experiment_id
24
- @experiment_name = experiment_name
25
- @project_id = project_id
26
- @project_name = project_name
27
- @task = task
28
- @scorers = normalize_scorers(scorers)
29
- @state = state
30
- @tracer_provider = tracer_provider || OpenTelemetry.tracer_provider
31
- @tracer = @tracer_provider.tracer("braintrust-eval")
32
- @parent_attr = parent ? "#{parent[:object_type]}:#{parent[:object_id]}" : nil
33
- @generation = parent&.dig(:generation)
34
- @on_progress = on_progress
21
+ # Per-case mutable accumulator. Built from Case, populated by task and scoring stages.
22
+ CaseContext = Struct.new(:input, :expected, :output, :metadata, :tags, :trace, :origin, keyword_init: true)
23
+
24
+ # @param eval_context [Context] Normalized eval context
25
+ def initialize(eval_context)
26
+ @eval_context = eval_context
27
+ tracer_provider = eval_context.tracer_provider || OpenTelemetry.tracer_provider
28
+ @tracer = tracer_provider.tracer("braintrust-eval")
35
29
 
36
30
  # Mutex for thread-safe score collection
37
31
  @score_mutex = Mutex.new
38
32
  end
39
33
 
40
34
  # Run evaluation and return Result
41
- # @param cases [Array, Enumerable] Test cases
42
35
  # @param parallelism [Integer] Number of parallel workers (default: 1)
43
36
  # @return [Result]
44
- def run(cases, parallelism: 1)
37
+ def run(parallelism: 1)
45
38
  start_time = Time.now
46
- normalized_cases = normalize_cases(cases)
39
+ eval_cases = eval_context.cases
47
40
  errors = Queue.new
48
41
  @scores = {} # Reset for each run: { scorer_name => Array<Numeric> }
49
42
 
50
43
  if parallelism && parallelism > 1
51
- Internal::ThreadPool.each(normalized_cases, parallelism: parallelism) do |test_case|
52
- run_case(test_case, errors)
44
+ Internal::ThreadPool.each(eval_cases, parallelism: parallelism) do |eval_case|
45
+ run_eval_case(build_case_context(eval_case), errors)
53
46
  end
54
47
  else
55
- normalized_cases.each do |test_case|
56
- run_case(test_case, errors)
48
+ eval_cases.each do |eval_case|
49
+ run_eval_case(build_case_context(eval_case), errors)
57
50
  end
58
51
  end
59
52
 
@@ -64,15 +57,15 @@ module Braintrust
64
57
  duration = Time.now - start_time
65
58
 
66
59
  # Generate permalink (only when state and experiment are available)
67
- permalink = if @state && experiment_id
68
- @state.object_permalink(object_type: "experiment", object_id: experiment_id)
60
+ permalink = if eval_context.state && eval_context.experiment_id
61
+ eval_context.state.object_permalink(object_type: "experiment", object_id: eval_context.experiment_id)
69
62
  end
70
63
 
71
64
  Result.new(
72
- experiment_id: experiment_id,
73
- experiment_name: experiment_name,
74
- project_id: project_id,
75
- project_name: project_name,
65
+ experiment_id: eval_context.experiment_id,
66
+ experiment_name: eval_context.experiment_name,
67
+ project_id: eval_context.project_id,
68
+ project_name: eval_context.project_name,
76
69
  permalink: permalink,
77
70
  errors: error_array,
78
71
  duration: duration,
@@ -82,86 +75,71 @@ module Braintrust
82
75
 
83
76
  private
84
77
 
85
- attr_reader :experiment_id, :experiment_name, :project_id, :project_name,
86
- :task, :scorers, :tracer, :parent_attr
78
+ attr_reader :eval_context, :tracer
87
79
 
88
80
  # Run a single test case with OpenTelemetry tracing
89
81
  # Creates eval span (parent) with task and score as children
90
- # @param test_case [Case] The test case
82
+ # @param case_context [CaseContext] The per-case accumulator
91
83
  # @param errors [Queue] Thread-safe error collection queue
92
- def run_case(test_case, errors)
84
+ def run_eval_case(case_context, errors)
93
85
  tracer.in_span("eval") do |eval_span|
94
- eval_span.set_attribute("braintrust.parent", parent_attr) if parent_attr
86
+ eval_span.set_attribute("braintrust.parent", eval_context.parent_span_attr) if eval_context.parent_span_attr
95
87
 
96
88
  # Set tags early so they're present even if task fails
97
- eval_span.set_attribute("braintrust.tags", test_case.tags) if test_case.tags
89
+ eval_span.set_attribute("braintrust.tags", case_context.tags) if case_context.tags
98
90
 
99
91
  # Run task
100
- output = nil
101
92
  begin
102
- output = run_task(test_case)
93
+ case_context.output = run_task(case_context)
103
94
  rescue => e
104
95
  # Error already recorded on task span, set eval span status
105
96
  eval_span.status = OpenTelemetry::Trace::Status.error(e.message)
106
- errors << "Task failed for input '#{test_case.input}': #{e.message}"
107
- if @on_progress
108
- error_progress = {
109
- "id" => eval_span.context.hex_span_id,
110
- "error" => e.message
111
- }
112
- if test_case.origin
113
- error_progress["origin"] = test_case.origin.is_a?(String) ? JSON.parse(test_case.origin) : test_case.origin
114
- end
115
- @on_progress.call(error_progress)
116
- end
97
+ errors << "Task failed for input '#{case_context.input}': #{e.message}"
98
+ report_progress(eval_span, case_context, error: e.message)
117
99
  next
118
100
  end
119
101
 
102
+ # Flush spans so they're queryable via BTQL, then build trace
103
+ eval_context.tracer_provider&.force_flush
104
+ case_context.trace = build_trace(eval_span)
105
+
120
106
  # Run scorers
121
107
  case_scores = nil
122
108
  begin
123
- case_scores = run_scorers(test_case, output)
109
+ case_scores = run_scorers(case_context)
124
110
  rescue => e
125
111
  # Error already recorded on score span, set eval span status
126
112
  eval_span.status = OpenTelemetry::Trace::Status.error(e.message)
127
- errors << "Scorers failed for input '#{test_case.input}': #{e.message}"
113
+ errors << "Scorers failed for input '#{case_context.input}': #{e.message}"
128
114
  end
129
115
 
130
116
  # Set eval span attributes (after task and scorers complete)
131
117
  set_json_attr(eval_span, "braintrust.span_attributes", build_span_attributes("eval"))
132
- set_json_attr(eval_span, "braintrust.input_json", test_case.input)
133
- set_json_attr(eval_span, "braintrust.output_json", output)
134
- set_json_attr(eval_span, "braintrust.expected", test_case.expected) if test_case.expected
118
+ set_json_attr(eval_span, "braintrust.input_json", case_context.input)
119
+ set_json_attr(eval_span, "braintrust.output_json", case_context.output)
120
+ set_json_attr(eval_span, "braintrust.expected", case_context.expected) if case_context.expected
135
121
 
136
122
  # Set origin for cases from remote sources (already JSON-serialized)
137
- eval_span.set_attribute("braintrust.origin", test_case.origin) if test_case.origin
138
-
139
- if @on_progress
140
- progress = {
141
- "id" => eval_span.context.hex_span_id,
142
- "data" => output,
143
- "scores" => case_scores || {}
144
- }
145
- if test_case.origin
146
- progress["origin"] = test_case.origin.is_a?(String) ? JSON.parse(test_case.origin) : test_case.origin
147
- end
148
- @on_progress.call(progress)
149
- end
123
+ eval_span.set_attribute("braintrust.origin", case_context.origin) if case_context.origin
124
+
125
+ report_progress(eval_span, case_context, data: case_context.output, scores: case_scores || {})
150
126
  end
151
127
  end
152
128
 
153
129
  # Run task with OpenTelemetry tracing
154
130
  # Creates task span with input and output
155
- # @param test_case [Case] The test case
131
+ # @param case_context [CaseContext] The per-case context
156
132
  # @return [Object] Task output
157
- def run_task(test_case)
133
+ def run_task(case_context)
158
134
  tracer.in_span("task") do |task_span|
159
- task_span.set_attribute("braintrust.parent", parent_attr) if parent_attr
135
+ task_span.set_attribute("braintrust.parent", eval_context.parent_span_attr) if eval_context.parent_span_attr
160
136
  set_json_attr(task_span, "braintrust.span_attributes", build_span_attributes("task"))
161
- set_json_attr(task_span, "braintrust.input_json", test_case.input)
137
+ set_json_attr(task_span, "braintrust.input_json", case_context.input)
162
138
 
163
139
  begin
164
- output = task.call(test_case.input)
140
+ output = eval_context.task.call(
141
+ input: case_context.input
142
+ )
165
143
  set_json_attr(task_span, "braintrust.output_json", output)
166
144
  output
167
145
  rescue => e
@@ -175,18 +153,24 @@ module Braintrust
175
153
 
176
154
  # Run scorers with OpenTelemetry tracing
177
155
  # Creates single score span for all scorers
178
- # @param test_case [Case] The test case
179
- # @param output [Object] Task output
156
+ # @param case_context [CaseContext] The per-case context (output must be populated)
180
157
  # @return [Hash] Scores hash { scorer_name => score_value }
181
- def run_scorers(test_case, output)
158
+ def run_scorers(case_context)
182
159
  tracer.in_span("score") do |score_span|
183
- score_span.set_attribute("braintrust.parent", parent_attr) if parent_attr
160
+ score_span.set_attribute("braintrust.parent", eval_context.parent_span_attr) if eval_context.parent_span_attr
184
161
  set_json_attr(score_span, "braintrust.span_attributes", build_span_attributes("score"))
185
162
 
163
+ scorer_kwargs = {
164
+ input: case_context.input,
165
+ expected: case_context.expected,
166
+ output: case_context.output,
167
+ metadata: case_context.metadata || {},
168
+ trace: case_context.trace
169
+ }
186
170
  scores = {}
187
171
  scorer_error = nil
188
- scorers.each do |scorer|
189
- score_value = scorer.call(test_case.input, test_case.expected, output, test_case.metadata || {})
172
+ eval_context.scorers.each do |scorer|
173
+ score_value = scorer.call(**scorer_kwargs)
190
174
  scores[scorer.name] = score_value
191
175
 
192
176
  # Collect raw score for summary (thread-safe)
@@ -207,36 +191,44 @@ module Braintrust
207
191
  end
208
192
  end
209
193
 
210
- # Normalize cases input to Cases wrapper
211
- # @param cases_input [Array, Enumerable, Cases] The cases input
212
- # @return [Cases]
213
- def normalize_cases(cases_input)
214
- case cases_input
215
- when Cases
216
- cases_input
217
- when Array, Enumerable
218
- Cases.new(cases_input)
219
- else
220
- if cases_input.respond_to?(:each)
221
- Cases.new(cases_input)
222
- else
223
- raise ArgumentError, "cases must be Array or Enumerable"
224
- end
225
- end
194
+ # Build a lazy Trace for a case, backed by BTQL.
195
+ # Returns nil when state or experiment_id are unavailable (local-only mode).
196
+ # @param eval_span [OpenTelemetry::Trace::Span] The eval span for this case
197
+ # @return [Eval::Trace, nil]
198
+ def build_trace(eval_span)
199
+ return nil unless eval_context.state && eval_context.experiment_id
200
+
201
+ root_span_id = eval_span.context.hex_trace_id
202
+ object_type = "experiment"
203
+ object_id = eval_context.experiment_id
204
+ btql = API::Internal::BTQL.new(eval_context.state)
205
+
206
+ Eval::Trace.new(
207
+ spans: -> { btql.trace_spans(object_type: object_type, object_id: object_id, root_span_id: root_span_id) }
208
+ )
226
209
  end
227
210
 
228
- # Normalize scorers to Scorer objects
229
- # @param scorers_input [Array] The scorers input (Scorer objects or callables)
230
- # @return [Array<Scorer>]
231
- def normalize_scorers(scorers_input)
232
- scorers_input.map do |scorer|
233
- case scorer
234
- when Scorer
235
- scorer
236
- else
237
- Scorer.new(scorer)
238
- end
211
+ # Build a CaseContext from a Case struct
212
+ # @param eval_case [Case] The eval case
213
+ # @return [CaseContext]
214
+ def build_case_context(eval_case)
215
+ CaseContext.new(
216
+ input: eval_case.input, expected: eval_case.expected,
217
+ metadata: eval_case.metadata, tags: eval_case.tags, origin: eval_case.origin
218
+ )
219
+ end
220
+
221
+ # Report progress for a case via on_progress callback.
222
+ # Rescues errors in the callback so a broken handler never crashes the eval.
223
+ def report_progress(eval_span, case_context, **fields)
224
+ return unless eval_context.on_progress
225
+ progress = {"id" => eval_span.context.hex_span_id}.merge(fields.transform_keys(&:to_s))
226
+ if case_context.origin
227
+ progress["origin"] = case_context.origin.is_a?(String) ? JSON.parse(case_context.origin) : case_context.origin
239
228
  end
229
+ eval_context.on_progress.call(progress)
230
+ rescue => e
231
+ Braintrust.logger.warn("on_progress callback error: #{e.message}")
240
232
  end
241
233
 
242
234
  # Record error on span with exception event and error status
@@ -258,8 +250,8 @@ module Braintrust
258
250
  # @return [Hash]
259
251
  def build_span_attributes(type)
260
252
  attrs = {type: type}
261
- attrs[:name] = experiment_name if experiment_name
262
- attrs[:generation] = @generation if @generation
253
+ attrs[:name] = eval_context.experiment_name if eval_context.experiment_name
254
+ attrs[:generation] = eval_context.generation if eval_context.generation
263
255
  attrs
264
256
  end
265
257
 
@@ -1,112 +1,40 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require_relative "../scorer"
4
+
3
5
  module Braintrust
4
6
  module Eval
5
- # Scorer wraps a scoring function that evaluates task output against expected values
6
- # Scorers can accept 3 params (input, expected, output) or 4 params (input, expected, output, metadata)
7
- # They can return a float, hash, or array of hashes
8
- class Scorer
9
- attr_reader :name
10
-
11
- # Create a new scorer
12
- # @param name_or_callable [String, Symbol, #call] Name or callable (if callable, name is auto-detected)
13
- # @param callable [#call, nil] Callable if name was provided separately
14
- # @param block [Proc, nil] Block if no callable provided
15
- def initialize(name_or_callable = nil, callable = nil, &block)
16
- # Determine name and callable from arguments
17
- if name_or_callable.nil? && callable.nil? && block.nil?
18
- raise ArgumentError, "Must provide callable or block"
19
- end
7
+ # @deprecated Use {Braintrust::Scorer} instead.
8
+ module Scorer
9
+ # @deprecated Use {Braintrust::Scorer.new} instead.
10
+ def self.new(name_or_callable = nil, callable = nil, &block)
11
+ Log.warn_once(:eval_scorer_class, "Braintrust::Eval::Scorer is deprecated: use Braintrust::Scorer.new instead.")
20
12
 
21
- # If first arg is a string/symbol, it's the name
22
13
  if name_or_callable.is_a?(String) || name_or_callable.is_a?(Symbol)
23
- @name = name_or_callable.to_s
24
- @callable = callable || block
25
- raise ArgumentError, "Must provide callable or block" unless @callable
14
+ name = name_or_callable.to_s
15
+ block = callable.method(:call) if callable && !block
26
16
  else
27
- # First arg is the callable, try to auto-detect name
28
- @callable = name_or_callable || callable || block
29
- @name = detect_name(@callable)
17
+ resolved = name_or_callable || callable
18
+ block = resolved.method(:call) if resolved && !block
19
+ name = nil
30
20
  end
31
21
 
32
- # Validate callable
33
- unless @callable.respond_to?(:call)
34
- raise ArgumentError, "Scorer must be callable (respond to :call)"
35
- end
36
-
37
- # Detect arity and wrap callable if needed
38
- @wrapped_callable = wrap_callable(@callable)
22
+ scorer = Braintrust::Scorer.new(name, &block)
23
+ scorer.singleton_class.prepend(PositionalArgsRemapping)
24
+ scorer
39
25
  end
40
26
 
41
- # Call the scorer
42
- # @param input [Object] The input to the task
43
- # @param expected [Object] The expected output
44
- # @param output [Object] The actual output from the task
45
- # @param metadata [Hash] Optional metadata
46
- # @return [Float, Hash, Array] Score value(s)
47
- def call(input, expected, output, metadata = {})
48
- @wrapped_callable.call(input, expected, output, metadata)
49
- end
50
-
51
- private
52
-
53
- # Detect the name from a callable object
54
- # @param callable [#call] The callable
55
- # @return [String] The detected name
56
- def detect_name(callable)
57
- # Method objects have .name
58
- if callable.is_a?(Method)
59
- return callable.name.to_s
60
- end
61
-
62
- # Objects with .name method
63
- if callable.respond_to?(:name)
64
- return callable.name.to_s
65
- end
66
-
67
- # Fallback
68
- "scorer"
69
- end
70
-
71
- # Wrap the callable to always accept 4 parameters
72
- # @param callable [#call] The callable to wrap
73
- # @return [Proc] Wrapped callable that accepts 4 params
74
- def wrap_callable(callable)
75
- arity = callable_arity(callable)
76
-
77
- case arity
78
- when 3
79
- # Callable takes 3 params - wrap to ignore metadata
80
- ->(input, expected, output, metadata) {
81
- callable.call(input, expected, output)
82
- }
83
- when 4, -4, -1
84
- # Callable takes 4 params (or variadic with 4+)
85
- # -4 means optional 4th param
86
- # -1 means variadic (*args)
87
- callable
88
- else
89
- raise ArgumentError, "Scorer must accept 3 or 4 parameters (got arity #{arity})"
90
- end
91
- end
92
-
93
- # Get the arity of a callable
94
- # @param callable [#call] The callable
95
- # @return [Integer] The arity
96
- def callable_arity(callable)
97
- if callable.respond_to?(:arity)
98
- callable.arity
99
- elsif callable.respond_to?(:method)
100
- callable.method(:call).arity
101
- else
102
- # Assume 3 params if we can't detect
103
- 3
27
+ # @deprecated Maps positional #call(input, expected, output, metadata) to keyword args.
28
+ # Will be removed when the legacy Eval::Scorer API is removed.
29
+ module PositionalArgsRemapping
30
+ def call(*args, **kwargs)
31
+ if args.any?
32
+ Log.warn_once(:scorer_positional_call, "Calling a Scorer with positional args is deprecated: use keyword args (input:, expected:, output:, metadata:) instead.")
33
+ kwargs = {input: args[0], expected: args[1], output: args[2], metadata: args[3]}
34
+ end
35
+ super(**kwargs)
104
36
  end
105
37
  end
106
38
  end
107
39
  end
108
-
109
- # Value object wrapping a remote scorer function UUID.
110
- # Used by Eval.run to distinguish remote scorers from local callables.
111
- ScorerId = Struct.new(:function_id, :version, keyword_init: true)
112
40
  end