braintrust 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 19c996fdd8b5b96cb52af3d3dfd855d26fc73338efffa2c6c4ada7d71fdc0e76
4
- data.tar.gz: fada237f8610fee6f54aa0c820777900c31e6417312e5e0305df60744995e190
3
+ metadata.gz: 0f30760b63f57dfa236f8f8f74c60aabad6e693f86a57bf8699b028eb00e8639
4
+ data.tar.gz: fcae112dc4175b2248a853405587921f16eb2c67d2b8930e2a3877cc09b9e9d1
5
5
  SHA512:
6
- metadata.gz: de4d8d52ecb56254ef041df2ae0beb085ff69a23f2cc66cf472862ded5380736fb79d6fa05b8fa3b566a3de0539a9164d81b94c37a7a4bad13d7920df2e27889
7
- data.tar.gz: 1c285c2a42009decb4a0bb51b224bed27d19650fc4bb71b66c27ad074d557cb4932b07818614f5e9219f4d713d1cc3c4b9fb82756ce37a7c5e1aa12e3fbcc438
6
+ metadata.gz: 16e96c1f75646d2b581cb7a5c1c50ca66de3e625c75da11c6a9fd263313adea9a936e1e30f4a9733e16e56d3120737731d47ab89d858aafd7543b75011cbc9de
7
+ data.tar.gz: b926449904f3dafe6803f76105ee8d85b134c777b827c93b877390e318c167e1f8212eab7f207e1fc9a8e8b77cd6a2d48c5c374f412fc4af0a0f614a6c4de94e
@@ -164,7 +164,7 @@ module Braintrust
164
164
  raise ArgumentError, "Unsupported HTTP method: #{method}"
165
165
  end
166
166
 
167
- request["Authorization"] = "Bearer #{@state.api_key}"
167
+ request["Authorization"] = "Bearer #{@state.api_key!}"
168
168
 
169
169
  # Execute request with timing
170
170
  start_time = Time.now
@@ -239,7 +239,7 @@ module Braintrust
239
239
  raise ArgumentError, "Unsupported HTTP method: #{method}"
240
240
  end
241
241
 
242
- request["Authorization"] = "Bearer #{@state.api_key}"
242
+ request["Authorization"] = "Bearer #{@state.api_key!}"
243
243
 
244
244
  # Execute request with timing
245
245
  start_time = Time.now
@@ -63,7 +63,7 @@ module Braintrust
63
63
 
64
64
  request = Net::HTTP::Post.new(uri)
65
65
  request["Content-Type"] = "application/json"
66
- request["Authorization"] = "Bearer #{@state.api_key}"
66
+ request["Authorization"] = "Bearer #{@state.api_key!}"
67
67
  request["Accept"] = "application/x-jsonlines"
68
68
  request.body = JSON.dump(payload)
69
69
 
@@ -39,7 +39,7 @@ module Braintrust
39
39
 
40
40
  request = Net::HTTP::Post.new(uri)
41
41
  request["Content-Type"] = "application/json"
42
- request["Authorization"] = "Bearer #{@state.api_key}"
42
+ request["Authorization"] = "Bearer #{@state.api_key!}"
43
43
  request.body = JSON.dump(payload)
44
44
 
45
45
  response = Braintrust::Internal::Http.with_redirects(uri, request)
@@ -59,7 +59,7 @@ module Braintrust
59
59
  uri = URI("#{@state.api_url}/v1/experiment/#{id}")
60
60
 
61
61
  request = Net::HTTP::Delete.new(uri)
62
- request["Authorization"] = "Bearer #{@state.api_key}"
62
+ request["Authorization"] = "Bearer #{@state.api_key!}"
63
63
 
64
64
  response = Braintrust::Internal::Http.with_redirects(uri, request)
65
65
 
@@ -24,7 +24,7 @@ module Braintrust
24
24
 
25
25
  request = Net::HTTP::Post.new(uri)
26
26
  request["Content-Type"] = "application/json"
27
- request["Authorization"] = "Bearer #{@state.api_key}"
27
+ request["Authorization"] = "Bearer #{@state.api_key!}"
28
28
  request.body = JSON.dump({name: name})
29
29
 
30
30
  response = Braintrust::Internal::Http.with_redirects(uri, request)
@@ -44,7 +44,7 @@ module Braintrust
44
44
  uri = URI("#{@state.api_url}/v1/project/#{id}")
45
45
 
46
46
  request = Net::HTTP::Delete.new(uri)
47
- request["Authorization"] = "Bearer #{@state.api_key}"
47
+ request["Authorization"] = "Bearer #{@state.api_key!}"
48
48
 
49
49
  response = Braintrust::Internal::Http.with_redirects(uri, request)
50
50
 
@@ -0,0 +1,157 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "internal/callable"
4
+
5
+ module Braintrust
6
+ # Classifier wraps a classification function that categorizes and labels eval outputs.
7
+ #
8
+ # Unlike scorers (which return numeric 0-1 values), classifiers return structured
9
+ # {Classification} items with an id and optional label and metadata.
10
+ #
11
+ # Use inline with a block (keyword args):
12
+ # classifier = Classifier.new("category") { |output:| {name: "category", id: "greeting", label: "Greeting"} }
13
+ #
14
+ # Or include in a class and define #call with keyword args:
15
+ # class CategoryClassifier
16
+ # include Braintrust::Classifier
17
+ #
18
+ # def call(output:)
19
+ # {name: "category", id: "greeting", label: "Greeting"}
20
+ # end
21
+ # end
22
+ #
23
+ # Classifiers may return a single Classification hash, an Array of them, or nil
24
+ # (meaning no classifications for this case).
25
+ module Classifier
26
+ DEFAULT_NAME = "classifier"
27
+
28
+ # @param base [Class] the class including Classifier
29
+ def self.included(base)
30
+ base.include(Callable)
31
+ end
32
+
33
+ # Create a block-based classifier.
34
+ #
35
+ # @param name [String, nil] optional name (defaults to "classifier")
36
+ # @param block [Proc] the classification implementation; declare only the keyword
37
+ # args you need. Extra kwargs are filtered out automatically.
38
+ #
39
+ # Supported kwargs: +input:+, +expected:+, +output:+, +metadata:+, +trace:+, +parameters:+
40
+ # @return [Classifier::Block]
41
+ # @raise [ArgumentError] if the block has unsupported arity
42
+ def self.new(name = nil, &block)
43
+ Block.new(name: name || DEFAULT_NAME, &block)
44
+ end
45
+
46
+ # Included into classes that +include Classifier+. Prepends KeywordFilter and
47
+ # ClassificationNormalizer so #call receives only declared kwargs and always returns
48
+ # Array<Hash>. Also provides a default #name and #call_parameters.
49
+ module Callable
50
+ # Normalizes the raw return value of #call into Array<Hash>.
51
+ # Nested inside Callable because it depends on #name which Callable provides.
52
+ module ClassificationNormalizer
53
+ # @return [Array<Hash>] normalized classification hashes with :name, :id, and optional :label, :metadata keys
54
+ def call(**kwargs)
55
+ normalize_classification_result(super)
56
+ end
57
+
58
+ private
59
+
60
+ # @param result [Hash, Array<Hash>, nil] raw return value from #call
61
+ # @return [Array<Hash>] zero or more classification hashes with :name, :id keys
62
+ # @raise [ArgumentError] if any item is not a non-empty object
63
+ def normalize_classification_result(result)
64
+ case result
65
+ when nil then []
66
+ when Array then result.map { |item| normalize_classification_item(item) }
67
+ when Hash then [normalize_classification_item(result)]
68
+ else
69
+ raise ArgumentError, "When returning structured classifier results, each classification must be a non-empty object. Got: #{result.inspect}"
70
+ end
71
+ end
72
+
73
+ # Fills in missing :name from the classifier, validates :id.
74
+ # @param item [Hash] a classification hash
75
+ # @return [Hash] the item with :name defaulted and validated
76
+ # @raise [ArgumentError] if item is not a non-empty Hash
77
+ def normalize_classification_item(item)
78
+ unless item.is_a?(Hash) && !item.empty?
79
+ raise ArgumentError, "When returning structured classifier results, each classification must be a non-empty object. Got: #{item.inspect}"
80
+ end
81
+
82
+ # :name defaults to the classifier's resolved name when missing, empty, or non-string
83
+ unless item[:name].is_a?(String) && !item[:name].empty?
84
+ item = item.merge(name: name)
85
+ end
86
+
87
+ item
88
+ end
89
+ end
90
+
91
+ # Infrastructure modules prepended onto every classifier class.
92
+ # Used both to set up the ancestor chain and to skip past them in
93
+ # #call_parameters so KeywordFilter sees the real call signature.
94
+ PREPENDED = [Internal::Callable::KeywordFilter, ClassificationNormalizer].freeze
95
+
96
+ # @param base [Class] the class including Callable
97
+ def self.included(base)
98
+ PREPENDED.each { |mod| base.prepend(mod) }
99
+ end
100
+
101
+ # Default name derived from the class name (e.g. CategoryClassifier -> "category_classifier").
102
+ # @return [String]
103
+ def name
104
+ klass = self.class.name&.split("::")&.last
105
+ return Classifier::DEFAULT_NAME unless klass
106
+ klass.gsub(/([a-z])([A-Z])/, '\1_\2').downcase
107
+ end
108
+
109
+ # Provides KeywordFilter with the actual call signature of the subclass.
110
+ # Walks past PREPENDED modules in the ancestor chain so that user-defined
111
+ # #call keyword params are correctly introspected.
112
+ # Block overrides this to point directly at @block.parameters.
113
+ # @return [Array<Array>] parameter list
114
+ def call_parameters
115
+ meth = method(:call)
116
+ meth = meth.super_method while meth.super_method && PREPENDED.include?(meth.owner)
117
+ meth.parameters
118
+ end
119
+ end
120
+
121
+ # Block-based classifier. Stores a Proc and delegates #call to it.
122
+ # Includes Classifier so it satisfies +Classifier ===+ checks.
123
+ # Exposes #call_parameters so KeywordFilter can introspect the block's
124
+ # declared kwargs rather than Block#call's **kwargs signature.
125
+ class Block
126
+ include Classifier
127
+
128
+ # @return [String]
129
+ attr_reader :name
130
+
131
+ # @param name [String] classifier name
132
+ # @param block [Proc] classification implementation; must use keyword args or zero-arity
133
+ # @raise [ArgumentError] if the block uses positional params
134
+ def initialize(name: DEFAULT_NAME, &block)
135
+ @name = name
136
+ params = block.parameters
137
+ unless Internal::Callable::KeywordFilter.has_any_keywords?(params) || block.arity == 0
138
+ raise ArgumentError, "Classifier block must use keyword args (got arity #{block.arity})"
139
+ end
140
+ @block = block
141
+ end
142
+
143
+ # @param kwargs [Hash] keyword arguments (filtered by KeywordFilter)
144
+ # @return [Array<Hash>] normalized classification results
145
+ def call(**kwargs)
146
+ @block.call(**kwargs)
147
+ end
148
+
149
+ # Exposes the block's parameter list so KeywordFilter can filter
150
+ # kwargs to match the block's declared keywords.
151
+ # @return [Array<Array>] parameter list from Proc#parameters
152
+ def call_parameters
153
+ @block.parameters
154
+ end
155
+ end
156
+ end
157
+ end
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require_relative "internal/api_key_resolver"
4
+
3
5
  module Braintrust
4
6
  # Configuration object that reads from environment variables
5
7
  # and allows overriding with explicit options
@@ -39,7 +41,7 @@ module Braintrust
39
41
  end
40
42
 
41
43
  new(
42
- api_key: api_key || ((ENV["BRAINTRUST_API_KEY"] && ENV["BRAINTRUST_API_KEY"].empty?) ? nil : ENV["BRAINTRUST_API_KEY"]),
44
+ api_key: Internal::ApiKeyResolver.resolve(explicit_api_key: api_key),
43
45
  org_name: org_name || ENV["BRAINTRUST_ORG_NAME"],
44
46
  default_project: default_project || ENV["BRAINTRUST_DEFAULT_PROJECT"],
45
47
  app_url: app_url || ENV["BRAINTRUST_APP_URL"] || "https://www.braintrust.dev",
@@ -1,18 +1,20 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative "cases"
4
+ require_relative "../classifier"
4
5
 
5
6
  module Braintrust
6
7
  module Eval
7
8
  # Holds all normalized, ready-to-execute eval components.
8
9
  # Use Context.build to construct from raw user inputs.
9
10
  class Context
10
- attr_reader :task, :scorers, :cases, :experiment_id, :experiment_name,
11
- :project_id, :project_name, :state, :tracer_provider,
11
+ attr_reader :task, :scorers, :classifiers, :cases, :experiment_id,
12
+ :experiment_name, :project_id, :project_name, :state, :tracer_provider,
12
13
  :on_progress, :parent_span_attr, :generation, :parameters
13
14
 
14
15
  # @param task [Task] Normalized task wrapper
15
16
  # @param scorers [Array<Scorer>] Normalized scorer wrappers
17
+ # @param classifiers [Array<Classifier>] Normalized classifier wrappers
16
18
  # @param cases [Cases] Normalized eval cases
17
19
  # @param experiment_id [String, nil] Experiment ID for logging and trace linkage
18
20
  # @param experiment_name [String, nil] Experiment name, included in span attributes
@@ -24,11 +26,13 @@ module Braintrust
24
26
  # @param parent_span_attr [String, nil] Formatted parent span identifier ("type:id"), linking spans to a parent context
25
27
  # @param generation [Integer, nil] Generation number from the parent span context, used to link spans in a trace hierarchy
26
28
  # @param parameters [Hash, nil] Runtime parameters passed to task and scorers as a `parameters:` keyword argument
27
- def initialize(task:, scorers:, cases:, experiment_id: nil, experiment_name: nil,
28
- project_id: nil, project_name: nil, state: nil, tracer_provider: nil,
29
- on_progress: nil, parent_span_attr: nil, generation: nil, parameters: nil)
29
+ def initialize(task:, scorers:, cases:, classifiers: [],
30
+ experiment_id: nil, experiment_name: nil, project_id: nil,
31
+ project_name: nil, state: nil, tracer_provider: nil, on_progress: nil,
32
+ parent_span_attr: nil, generation: nil, parameters: nil)
30
33
  @task = task
31
34
  @scorers = scorers
35
+ @classifiers = classifiers
32
36
  @cases = cases
33
37
  @experiment_id = experiment_id
34
38
  @experiment_name = experiment_name
@@ -46,6 +50,7 @@ module Braintrust
46
50
  # Delegates to Factory for normalization.
47
51
  # @param task [Task, Proc, #call] Task to evaluate; wrapped into a {Task} if needed
48
52
  # @param scorers [Array<Scorer, Proc, String, Scorer::ID, #call>] Scorers; each is normalized into a {Scorer}
53
+ # @param classifiers [Array<Classifier, Proc, #call>] Classifiers; each is normalized into a {Classifier}
49
54
  # @param cases [Cases, Array, Enumerable] Eval cases; wrapped into {Cases} if needed
50
55
  # @param experiment_id [String, nil] Experiment ID for logging
51
56
  # @param experiment_name [String, nil] Experiment name, included in span attributes
@@ -57,14 +62,15 @@ module Braintrust
57
62
  # @param parent [Hash, nil] Parent span info with keys :object_type, :object_id, and optionally :generation
58
63
  # @param parameters [Hash, nil] Runtime parameters passed to task and scorers as a `parameters:` keyword argument
59
64
  # @return [Context]
60
- def self.build(task:, scorers:, cases:, experiment_id: nil, experiment_name: nil,
61
- project_id: nil, project_name: nil, state: nil, tracer_provider: nil,
62
- on_progress: nil, parent: nil, parameters: nil)
65
+ def self.build(task:, scorers:, cases:, classifiers: [],
66
+ experiment_id: nil, experiment_name: nil, project_id: nil,
67
+ project_name: nil, state: nil, tracer_provider: nil, on_progress: nil,
68
+ parent: nil, parameters: nil)
63
69
  Factory.new(
64
70
  state: state, tracer_provider: tracer_provider,
65
71
  project_id: project_id, project_name: project_name
66
72
  ).build(
67
- task: task, scorers: scorers, cases: cases,
73
+ task: task, scorers: scorers, classifiers: classifiers, cases: cases,
68
74
  experiment_id: experiment_id, experiment_name: experiment_name,
69
75
  on_progress: on_progress, parent: parent, parameters: parameters
70
76
  )
@@ -86,17 +92,19 @@ module Braintrust
86
92
  # Normalize raw inputs and construct a {Context}.
87
93
  # @param task [Task, Proc, #call] Raw task
88
94
  # @param scorers [Array] Raw scorers
95
+ # @param classifiers [Array] Raw classifiers
89
96
  # @param cases [Cases, Array, Enumerable] Raw eval cases
90
97
  # @param experiment_id [String, nil]
91
98
  # @param experiment_name [String, nil]
92
99
  # @param on_progress [Proc, nil]
93
100
  # @param parent [Hash, nil] Parent span info with keys :object_type, :object_id, and optionally :generation
94
101
  # @return [Context]
95
- def build(task:, scorers:, cases:, experiment_id: nil, experiment_name: nil,
96
- on_progress: nil, parent: nil, parameters: nil)
102
+ def build(task:, scorers:, cases:, classifiers: [], experiment_id: nil,
103
+ experiment_name: nil, on_progress: nil, parent: nil, parameters: nil)
97
104
  Context.new(
98
105
  task: normalize_task(task),
99
106
  scorers: normalize_scorers(scorers),
107
+ classifiers: normalize_classifiers(classifiers),
100
108
  cases: normalize_cases(cases),
101
109
  experiment_id: experiment_id,
102
110
  experiment_name: experiment_name,
@@ -188,6 +196,23 @@ module Braintrust
188
196
  end
189
197
  end
190
198
  end
199
+
200
+ # @param raw [Array<Classifier, Proc, #call>]
201
+ # @return [Array<Classifier>]
202
+ def normalize_classifiers(raw)
203
+ raw.map do |classifier|
204
+ case classifier
205
+ when Braintrust::Classifier
206
+ classifier
207
+ when Proc
208
+ # Pass Proc/Lambda directly to preserve keyword arg info
209
+ Braintrust::Classifier.new(&classifier)
210
+ else
211
+ name = classifier.respond_to?(:name) ? classifier.name : nil
212
+ Braintrust::Classifier.new(name, &classifier.method(:call))
213
+ end
214
+ end
215
+ end
191
216
  end
192
217
  end
193
218
  end
@@ -40,11 +40,12 @@ module Braintrust
40
40
  # }
41
41
  # )
42
42
  class Evaluator
43
- attr_accessor :task, :scorers, :parameters
43
+ attr_accessor :task, :scorers, :classifiers, :parameters
44
44
 
45
- def initialize(task: nil, scorers: [], parameters: {})
45
+ def initialize(task: nil, scorers: [], classifiers: [], parameters: {})
46
46
  @task = task
47
47
  @scorers = scorers
48
+ @classifiers = classifiers
48
49
  @parameters = parameters
49
50
  end
50
51
 
@@ -68,6 +69,7 @@ module Braintrust
68
69
  # @param project_id [String, nil] Project UUID (skips project creation)
69
70
  # @param dataset [String, Hash, Dataset, Dataset::ID, nil] Dataset to fetch
70
71
  # @param scorers [Array, nil] Additional scorers (merged with evaluator's own)
72
+ # @param classifiers [Array, nil] Additional classifiers (merged with evaluator's own)
71
73
  # @param parent [Hash, nil] Parent span context
72
74
  # @param state [State, nil] Braintrust state
73
75
  # @param update [Boolean] If true, allow reusing existing experiment (default: false)
@@ -75,16 +77,19 @@ module Braintrust
75
77
  # @return [Result]
76
78
  def run(cases, on_progress: nil, quiet: false,
77
79
  project: nil, experiment: nil, project_id: nil,
78
- dataset: nil, scorers: nil, parent: nil,
80
+ dataset: nil, scorers: nil, classifiers: nil, parent: nil,
79
81
  state: nil, update: false, tracer_provider: nil,
80
82
  parameters: nil)
81
83
  all_scorers = scorers ? self.scorers + scorers : self.scorers
84
+ all_classifiers = classifiers ?
85
+ self.classifiers + classifiers :
86
+ self.classifiers
82
87
  Braintrust::Eval.run(
83
- task: task, scorers: all_scorers, cases: cases, dataset: dataset,
84
- project: project, experiment: experiment, project_id: project_id,
85
- parent: parent, on_progress: on_progress, quiet: quiet,
86
- state: state, update: update, tracer_provider: tracer_provider,
87
- parameters: parameters
88
+ task: task, scorers: all_scorers, classifiers: all_classifiers,
89
+ cases: cases, dataset: dataset, project: project,
90
+ experiment: experiment, project_id: project_id, parent: parent,
91
+ on_progress: on_progress, quiet: quiet, state: state, update: update,
92
+ tracer_provider: tracer_provider, parameters: parameters
88
93
  )
89
94
  end
90
95
  end
@@ -9,7 +9,7 @@ module Braintrust
9
9
  # Contains experiment metadata, errors, timing information, and raw score data
10
10
  class Result
11
11
  attr_reader :experiment_id, :experiment_name, :project_id, :project_name,
12
- :permalink, :errors, :duration, :scores
12
+ :permalink, :errors, :duration, :scores, :classifications
13
13
 
14
14
  # Create a new result
15
15
  # @param experiment_id [String] The experiment ID
@@ -20,8 +20,9 @@ module Braintrust
20
20
  # @param errors [Array<String>] List of errors that occurred
21
21
  # @param duration [Float] Duration in seconds
22
22
  # @param scores [Hash, nil] Raw score data { scorer_name => Array<Numeric> }
23
+ # @param classifications [Hash, nil] Classification results { name => Array<ClassificationItem> }, nil when no classifiers ran
23
24
  def initialize(experiment_id:, experiment_name:, project_id:, project_name:,
24
- permalink:, errors:, duration:, scores: nil)
25
+ permalink:, errors:, duration:, scores: nil, classifications: nil)
25
26
  @experiment_id = experiment_id
26
27
  @experiment_name = experiment_name
27
28
  @project_id = project_id
@@ -30,6 +31,7 @@ module Braintrust
30
31
  @errors = errors
31
32
  @duration = duration
32
33
  @scores = scores
34
+ @classifications = classifications
33
35
  end
34
36
 
35
37
  # Check if the evaluation was successful (no errors)
@@ -27,8 +27,9 @@ module Braintrust
27
27
  @eval_context = eval_context
28
28
  @tracer = eval_context.tracer_provider.tracer("braintrust-eval")
29
29
 
30
- # Mutex for thread-safe score collection
30
+ # Mutexes for thread-safe result collection
31
31
  @score_mutex = Mutex.new
32
+ @classification_mutex = Mutex.new
32
33
  end
33
34
 
34
35
  # Run evaluation and return Result
@@ -39,6 +40,7 @@ module Braintrust
39
40
  eval_cases = eval_context.cases
40
41
  errors = Queue.new
41
42
  @scores = {} # Reset for each run: { scorer_name => Array<Numeric> }
43
+ @classifications = {} # Reset for each run: { classifier_name => Array<ClassificationItem> }
42
44
 
43
45
  if parallelism && parallelism > 1
44
46
  Internal::ThreadPool.each(eval_cases, parallelism: parallelism) do |eval_case|
@@ -69,7 +71,8 @@ module Braintrust
69
71
  permalink: permalink,
70
72
  errors: error_array,
71
73
  duration: duration,
72
- scores: @scores
74
+ scores: @scores,
75
+ classifications: @classifications.empty? ? nil : @classifications
73
76
  )
74
77
  end
75
78
 
@@ -119,6 +122,17 @@ module Braintrust
119
122
  errors << "Scorers failed for input '#{kase.input}': #{e.message}"
120
123
  end
121
124
 
125
+ # Run classifiers (independent of scorers; errors do not abort eval)
126
+ classifier_errors = run_classifiers(kase, eval_span)
127
+ unless classifier_errors.empty?
128
+ existing_metadata = kase.metadata || {}
129
+ classifier_errors_metadata = existing_metadata.merge(classifier_errors: classifier_errors)
130
+ set_json_attr(eval_span, "braintrust.metadata", classifier_errors_metadata)
131
+ classifier_errors.each do |classifier_name, message|
132
+ errors << "Classifier '#{classifier_name}' failed for input '#{kase.input}': #{message}"
133
+ end
134
+ end
135
+
122
136
  # Set output after task completes
123
137
  set_json_attr(eval_span, "braintrust.output_json", {output: kase.output})
124
138
 
@@ -318,6 +332,104 @@ module Braintrust
318
332
  score_results.each { |s| (@scores[s[:name]] ||= []) << s[:score] }
319
333
  end
320
334
  end
335
+
336
+ # Run all classifiers for a case. Classifier errors are non-fatal and stored in metadata.
337
+ # @param kase [CaseContext] The per-case context (output must be populated)
338
+ # @param eval_span [OpenTelemetry::Trace::Span] The eval span for this case
339
+ # @return [Hash] classifier_errors map (name -> error message), empty if no errors
340
+ def run_classifiers(kase, eval_span)
341
+ return {} if eval_context.classifiers.empty?
342
+
343
+ classifier_kwargs = {
344
+ input: kase.input,
345
+ expected: kase.expected,
346
+ output: kase.output,
347
+ metadata: kase.metadata || {},
348
+ trace: kase.trace,
349
+ parameters: eval_context.parameters || {}
350
+ }
351
+ classifier_input = {
352
+ input: kase.input,
353
+ expected: kase.expected,
354
+ output: kase.output,
355
+ metadata: kase.metadata || {},
356
+ parameters: eval_context.parameters || {}
357
+ }
358
+
359
+ case_classifications = {}
360
+ classifier_errors = {}
361
+
362
+ eval_context.classifiers.each_with_index do |classifier, index|
363
+ classifier_name = classifier.name || "classifier_#{index}"
364
+ begin
365
+ results = run_classifier(classifier, classifier_kwargs, classifier_input)
366
+ results.each do |item|
367
+ item_name = item[:name]
368
+ classification_item = item.except(:name)
369
+ (case_classifications[item_name] ||= []) << classification_item
370
+ end
371
+ collect_classifications(results)
372
+ rescue => e
373
+ Braintrust::Log.warn("[Classifier] #{classifier_name} failed: #{e.message}")
374
+ classifier_errors[classifier_name] = e.message
375
+ end
376
+ end
377
+
378
+ unless case_classifications.empty?
379
+ set_json_attr(eval_span, "braintrust.classifications", case_classifications)
380
+ end
381
+
382
+ classifier_errors
383
+ end
384
+
385
+ # Run a single classifier inside its own span.
386
+ # @param classifier [Classifier] The classifier to run
387
+ # @param classifier_kwargs [Hash] Keyword arguments for the classifier
388
+ # @param classifier_input [Hash] Input to log on the span
389
+ # @return [Array<Hash>] Normalized classification results from the classifier
390
+ def run_classifier(classifier, classifier_kwargs, classifier_input)
391
+ tracer.in_span(classifier.name) do |classifier_span|
392
+ classifier_span.set_attribute("braintrust.parent", eval_context.parent_span_attr) if eval_context.parent_span_attr
393
+ set_json_attr(classifier_span, "braintrust.span_attributes", build_classifier_span_attributes(classifier.name))
394
+ set_json_attr(classifier_span, "braintrust.input_json", classifier_input)
395
+
396
+ classification_results = classifier.call(**classifier_kwargs)
397
+
398
+ # Build output dict keyed by name -> array of items (for span logging)
399
+ output_by_name = {}
400
+ classification_results.each do |item|
401
+ (output_by_name[item[:name]] ||= []) << item.except(:name)
402
+ end
403
+
404
+ set_json_attr(classifier_span, "braintrust.output_json", output_by_name)
405
+
406
+ classification_results
407
+ rescue => e
408
+ record_span_error(classifier_span, e, "ClassifierError")
409
+ raise
410
+ end
411
+ end
412
+
413
+ # Build span_attributes for a classifier span.
414
+ # @param classifier_name [String] The classifier name
415
+ # @return [Hash]
416
+ def build_classifier_span_attributes(classifier_name)
417
+ attrs = {type: "classifier", name: classifier_name, purpose: "scorer"}
418
+ attrs[:generation] = eval_context.generation if eval_context.generation
419
+ attrs
420
+ end
421
+
422
+ # Collect classification results into the global accumulator (thread-safe).
423
+ # Converts Classification to ClassificationItem by dropping :name.
424
+ # @param classification_results [Array<Hash>] Classification results from a classifier
425
+ def collect_classifications(classification_results)
426
+ @classification_mutex.synchronize do
427
+ classification_results.each do |item|
428
+ item_name = item[:name]
429
+ (@classifications[item_name] ||= []) << item.except(:name)
430
+ end
431
+ end
432
+ end
321
433
  end
322
434
  end
323
435
  end
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require_relative "classifier"
3
4
  require_relative "scorer"
4
5
  require_relative "task"
5
6
  require_relative "functions"
@@ -160,7 +161,10 @@ module Braintrust
160
161
  # - String: dataset name (fetches from same project)
161
162
  # - Hash: {name:, id:, project:, version:, limit:}
162
163
  # @param task [#call] The task to evaluate (must be callable)
163
- # @param scorers [Array<String, Scorer, #call>] The scorers to use (String names, Scorer objects, or callables)
164
+ # @param scorers [Array<String, Scorer, #call>, nil] The scorers to use (String names, Scorer objects, or callables).
165
+ # At least one of scorers or classifiers must be provided.
166
+ # @param classifiers [Array<Classifier, #call>, nil] The classifiers to use.
167
+ # At least one of scorers or classifiers must be provided.
164
168
  # @param on_progress [#call, nil] Optional callback fired after each test case.
165
169
  # Receives a Hash: {"data" => output, "scores" => {name => value}} on success,
166
170
  # or {"error" => message} on failure.
@@ -177,13 +181,16 @@ module Braintrust
177
181
  # @param parent [Hash, nil] Parent span context ({object_type:, object_id:, generation:})
178
182
  # @param parameters [Hash, nil] Runtime parameters passed to task and scorers as a `parameters:` keyword argument
179
183
  # @return [Result]
180
- def run(task:, scorers:, project: nil, experiment: nil,
181
- cases: nil, dataset: nil, on_progress: nil,
184
+ def run(task:, scorers: nil, classifiers: nil, project: nil,
185
+ experiment: nil, cases: nil, dataset: nil, on_progress: nil,
182
186
  parallelism: 1, tags: nil, metadata: nil, update: false, quiet: false,
183
187
  state: nil, tracer_provider: nil, project_id: nil, parent: nil,
184
188
  parameters: nil)
185
189
  # Validate required parameters
186
- validate_params!(task: task, scorers: scorers, cases: cases, dataset: dataset)
190
+ validate_params!(task: task, scorers: scorers,
191
+ classifiers: classifiers, cases: cases, dataset: dataset)
192
+ scorers ||= []
193
+ classifiers ||= []
187
194
 
188
195
  experiment_id = nil
189
196
  project_name = project
@@ -216,6 +223,7 @@ module Braintrust
216
223
  context = Context.build(
217
224
  task: task,
218
225
  scorers: scorers,
226
+ classifiers: classifiers,
219
227
  cases: cases,
220
228
  experiment_id: experiment_id,
221
229
  experiment_name: experiment,
@@ -245,9 +253,19 @@ module Braintrust
245
253
 
246
254
  # Validate required parameters
247
255
  # @raise [ArgumentError] if validation fails
248
- def validate_params!(task:, scorers:, cases:, dataset:)
256
+ def validate_params!(task:, scorers:, classifiers:, cases:, dataset:)
249
257
  raise ArgumentError, "task is required" unless task
250
- raise ArgumentError, "scorers is required" unless scorers
258
+
259
+ # Validate task is callable before anything else
260
+ unless task.respond_to?(:call)
261
+ raise ArgumentError, "task must be callable (respond to :call)"
262
+ end
263
+
264
+ has_scorers = scorers && !scorers.empty?
265
+ has_classifiers = classifiers && !classifiers.empty?
266
+ unless has_scorers || has_classifiers
267
+ raise ArgumentError, "at least one of scorers or classifiers is required"
268
+ end
251
269
 
252
270
  # Validate cases and dataset are mutually exclusive
253
271
  if cases && dataset
@@ -258,11 +276,6 @@ module Braintrust
258
276
  unless cases || dataset
259
277
  raise ArgumentError, "must specify either 'cases' or 'dataset'"
260
278
  end
261
-
262
- # Validate task is callable
263
- unless task.respond_to?(:call)
264
- raise ArgumentError, "task must be callable (respond to :call)"
265
- end
266
279
  end
267
280
 
268
281
  # Resolve project by name or ID. Creates if needed.
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+
5
+ module Braintrust
6
+ module Internal
7
+ # Resolves the Braintrust API key from explicit options, ENV, or the nearest
8
+ # .braintrust.json file without mutating the process environment.
9
+ class ApiKeyResolver
10
+ ENV_KEY = "BRAINTRUST_API_KEY"
11
+ CONFIG_FILE = ".braintrust.json"
12
+ SEARCH_PARENT_LIMIT = 64
13
+
14
+ def self.resolve(explicit_api_key: nil, start_dir: Dir.pwd)
15
+ return explicit_api_key unless explicit_api_key.nil?
16
+
17
+ env_api_key = ENV[ENV_KEY]
18
+ return env_api_key if env_api_key && !env_api_key.strip.empty?
19
+
20
+ find_file_api_key(start_dir)
21
+ end
22
+
23
+ def self.find_file_api_key(start_dir = Dir.pwd)
24
+ dir = start_dir
25
+
26
+ 0.upto(SEARCH_PARENT_LIMIT) do
27
+ config_path = File.join(dir, CONFIG_FILE)
28
+
29
+ begin
30
+ contents = File.read(config_path)
31
+ rescue Errno::ENOENT, Errno::ENOTDIR
32
+ # Missing candidates are not boundaries; keep walking upward.
33
+ rescue
34
+ return nil
35
+ else
36
+ return parse_api_key(contents)
37
+ end
38
+
39
+ parent = File.dirname(dir)
40
+ break if parent == dir
41
+ dir = parent
42
+ end
43
+
44
+ nil
45
+ rescue
46
+ nil
47
+ end
48
+
49
+ def self.parse_api_key(contents)
50
+ config = JSON.parse(contents)
51
+ return nil unless config.is_a?(Hash)
52
+
53
+ value = config[ENV_KEY]
54
+ (value.is_a?(String) && !value.strip.empty?) ? value : nil
55
+ rescue JSON::ParserError, TypeError
56
+ nil
57
+ end
58
+
59
+ private_class_method :find_file_api_key, :parse_api_key
60
+ end
61
+ end
62
+ end
@@ -20,6 +20,11 @@ module Braintrust
20
20
  {"name" => scorer_name}
21
21
  end
22
22
  entry = {"scores" => scores}
23
+ classifiers = (evaluator.classifiers || []).each_with_index.map do |classifier, i|
24
+ classifier_name = classifier.respond_to?(:name) ? classifier.name : "classifier_#{i}"
25
+ {"name" => classifier_name}
26
+ end
27
+ entry["classifiers"] = classifiers unless classifiers.empty?
23
28
  params = serialize_parameters(evaluator.parameters)
24
29
  entry["parameters"] = params if params
25
30
  result[name] = entry
@@ -11,7 +11,7 @@
11
11
  # require "braintrust/setup"
12
12
  #
13
13
  # Environment variables:
14
- # BRAINTRUST_API_KEY - Required for tracing to work
14
+ # BRAINTRUST_API_KEY - Required for tracing to work; falls back to .braintrust.json
15
15
  # BRAINTRUST_AUTO_INSTRUMENT - Set to "false" to disable (default: true)
16
16
  # BRAINTRUST_INSTRUMENT_ONLY - Comma-separated whitelist
17
17
  # BRAINTRUST_INSTRUMENT_EXCEPT - Comma-separated blacklist
@@ -6,6 +6,8 @@ module Braintrust
6
6
  # State object that holds Braintrust configuration
7
7
  # Thread-safe global state management
8
8
  class State
9
+ class MissingAPIKeyError < ArgumentError; end
10
+
9
11
  attr_reader :api_key, :org_name, :org_id, :default_project, :app_url, :api_url, :proxy_url, :logged_in, :config
10
12
 
11
13
  @mutex = Mutex.new
@@ -66,7 +68,7 @@ module Braintrust
66
68
  def initialize(api_key: nil, org_name: nil, org_id: nil, default_project: nil, app_url: nil, api_url: nil, proxy_url: nil, blocking_login: false, enable_tracing: true, tracer_provider: nil, config: nil, exporter: nil)
67
69
  # Instance-level mutex for thread-safe login
68
70
  @login_mutex = Mutex.new
69
- raise ArgumentError, "api_key is required" if api_key.nil? || api_key.empty?
71
+ raise MissingAPIKeyError, "api_key is required" if api_key.nil? || api_key.empty?
70
72
 
71
73
  @api_key = api_key
72
74
  @org_name = org_name
@@ -101,6 +103,11 @@ module Braintrust
101
103
  end
102
104
  end
103
105
 
106
+ def api_key!
107
+ raise MissingAPIKeyError, "api_key is required" if @api_key.nil? || @api_key.empty?
108
+ @api_key
109
+ end
110
+
104
111
  # Thread-safe global state getter
105
112
  def self.global
106
113
  @mutex.synchronize { @global_state }
@@ -121,9 +128,10 @@ module Braintrust
121
128
  @login_mutex.synchronize do
122
129
  # Return early if already logged in
123
130
  return self if @logged_in
131
+ api_key = api_key!
124
132
 
125
133
  result = API::Internal::Auth.login(
126
- api_key: @api_key,
134
+ api_key: api_key,
127
135
  app_url: @app_url,
128
136
  org_name: @org_name
129
137
  )
@@ -167,6 +175,9 @@ module Braintrust
167
175
  login
168
176
  Log.debug("Background login succeeded")
169
177
  break
178
+ rescue MissingAPIKeyError => e
179
+ Log.debug("Background login skipped: #{e.message}")
180
+ break
170
181
  rescue => e
171
182
  retry_count += 1
172
183
  delay = [0.001 * 2**(retry_count - 1), max_delay].min
@@ -190,7 +201,7 @@ module Braintrust
190
201
  # Raises ArgumentError if state is invalid
191
202
  # @return [self]
192
203
  def validate
193
- raise ArgumentError, "api_key is required" if @api_key.nil? || @api_key.empty?
204
+ api_key!
194
205
  raise ArgumentError, "api_url is required" if @api_url.nil? || @api_url.empty?
195
206
  raise ArgumentError, "app_url is required" if @app_url.nil? || @app_url.empty?
196
207
 
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "opentelemetry/exporter/otlp"
4
+ require_relative "../state"
4
5
 
5
6
  module Braintrust
6
7
  module Trace
@@ -18,6 +19,8 @@ module Braintrust
18
19
  FAILURE = OpenTelemetry::SDK::Trace::Export::FAILURE
19
20
 
20
21
  def initialize(endpoint:, api_key:)
22
+ raise State::MissingAPIKeyError, "api_key is required" if api_key.nil? || api_key.empty?
23
+
21
24
  super(endpoint: endpoint, headers: {"Authorization" => "Bearer #{api_key}"})
22
25
  end
23
26
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Braintrust
4
- VERSION = "0.3.2"
4
+ VERSION = "0.4.0"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: braintrust
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.2
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Braintrust
@@ -90,6 +90,7 @@ files:
90
90
  - lib/braintrust/api/internal/btql.rb
91
91
  - lib/braintrust/api/internal/experiments.rb
92
92
  - lib/braintrust/api/internal/projects.rb
93
+ - lib/braintrust/classifier.rb
93
94
  - lib/braintrust/config.rb
94
95
  - lib/braintrust/contrib.rb
95
96
  - lib/braintrust/contrib/anthropic/deprecated.rb
@@ -147,6 +148,7 @@ files:
147
148
  - lib/braintrust/eval/summary.rb
148
149
  - lib/braintrust/eval/trace.rb
149
150
  - lib/braintrust/functions.rb
151
+ - lib/braintrust/internal/api_key_resolver.rb
150
152
  - lib/braintrust/internal/callable.rb
151
153
  - lib/braintrust/internal/encoding.rb
152
154
  - lib/braintrust/internal/env.rb
@@ -213,7 +215,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
213
215
  - !ruby/object:Gem::Version
214
216
  version: '0'
215
217
  requirements: []
216
- rubygems_version: 3.6.9
218
+ rubygems_version: 4.0.10
217
219
  specification_version: 4
218
220
  summary: Ruby SDK for Braintrust
219
221
  test_files: []