braintrust 0.3.2 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/braintrust/api/datasets.rb +1 -1
- data/lib/braintrust/api/functions.rb +1 -1
- data/lib/braintrust/api/internal/btql.rb +1 -1
- data/lib/braintrust/api/internal/experiments.rb +2 -2
- data/lib/braintrust/api/internal/projects.rb +2 -2
- data/lib/braintrust/classifier.rb +157 -0
- data/lib/braintrust/config.rb +3 -1
- data/lib/braintrust/eval/context.rb +36 -11
- data/lib/braintrust/eval/evaluator.rb +13 -8
- data/lib/braintrust/eval/result.rb +4 -2
- data/lib/braintrust/eval/runner.rb +114 -2
- data/lib/braintrust/eval.rb +24 -11
- data/lib/braintrust/internal/api_key_resolver.rb +62 -0
- data/lib/braintrust/server/services/list_service.rb +5 -0
- data/lib/braintrust/setup.rb +1 -1
- data/lib/braintrust/state.rb +14 -3
- data/lib/braintrust/trace/span_exporter.rb +3 -0
- data/lib/braintrust/version.rb +1 -1
- metadata +4 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 0f30760b63f57dfa236f8f8f74c60aabad6e693f86a57bf8699b028eb00e8639
|
|
4
|
+
data.tar.gz: fcae112dc4175b2248a853405587921f16eb2c67d2b8930e2a3877cc09b9e9d1
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 16e96c1f75646d2b581cb7a5c1c50ca66de3e625c75da11c6a9fd263313adea9a936e1e30f4a9733e16e56d3120737731d47ab89d858aafd7543b75011cbc9de
|
|
7
|
+
data.tar.gz: b926449904f3dafe6803f76105ee8d85b134c777b827c93b877390e318c167e1f8212eab7f207e1fc9a8e8b77cd6a2d48c5c374f412fc4af0a0f614a6c4de94e
|
|
@@ -164,7 +164,7 @@ module Braintrust
|
|
|
164
164
|
raise ArgumentError, "Unsupported HTTP method: #{method}"
|
|
165
165
|
end
|
|
166
166
|
|
|
167
|
-
request["Authorization"] = "Bearer #{@state.api_key}"
|
|
167
|
+
request["Authorization"] = "Bearer #{@state.api_key!}"
|
|
168
168
|
|
|
169
169
|
# Execute request with timing
|
|
170
170
|
start_time = Time.now
|
|
@@ -239,7 +239,7 @@ module Braintrust
|
|
|
239
239
|
raise ArgumentError, "Unsupported HTTP method: #{method}"
|
|
240
240
|
end
|
|
241
241
|
|
|
242
|
-
request["Authorization"] = "Bearer #{@state.api_key}"
|
|
242
|
+
request["Authorization"] = "Bearer #{@state.api_key!}"
|
|
243
243
|
|
|
244
244
|
# Execute request with timing
|
|
245
245
|
start_time = Time.now
|
|
@@ -63,7 +63,7 @@ module Braintrust
|
|
|
63
63
|
|
|
64
64
|
request = Net::HTTP::Post.new(uri)
|
|
65
65
|
request["Content-Type"] = "application/json"
|
|
66
|
-
request["Authorization"] = "Bearer #{@state.api_key}"
|
|
66
|
+
request["Authorization"] = "Bearer #{@state.api_key!}"
|
|
67
67
|
request["Accept"] = "application/x-jsonlines"
|
|
68
68
|
request.body = JSON.dump(payload)
|
|
69
69
|
|
|
@@ -39,7 +39,7 @@ module Braintrust
|
|
|
39
39
|
|
|
40
40
|
request = Net::HTTP::Post.new(uri)
|
|
41
41
|
request["Content-Type"] = "application/json"
|
|
42
|
-
request["Authorization"] = "Bearer #{@state.api_key}"
|
|
42
|
+
request["Authorization"] = "Bearer #{@state.api_key!}"
|
|
43
43
|
request.body = JSON.dump(payload)
|
|
44
44
|
|
|
45
45
|
response = Braintrust::Internal::Http.with_redirects(uri, request)
|
|
@@ -59,7 +59,7 @@ module Braintrust
|
|
|
59
59
|
uri = URI("#{@state.api_url}/v1/experiment/#{id}")
|
|
60
60
|
|
|
61
61
|
request = Net::HTTP::Delete.new(uri)
|
|
62
|
-
request["Authorization"] = "Bearer #{@state.api_key}"
|
|
62
|
+
request["Authorization"] = "Bearer #{@state.api_key!}"
|
|
63
63
|
|
|
64
64
|
response = Braintrust::Internal::Http.with_redirects(uri, request)
|
|
65
65
|
|
|
@@ -24,7 +24,7 @@ module Braintrust
|
|
|
24
24
|
|
|
25
25
|
request = Net::HTTP::Post.new(uri)
|
|
26
26
|
request["Content-Type"] = "application/json"
|
|
27
|
-
request["Authorization"] = "Bearer #{@state.api_key}"
|
|
27
|
+
request["Authorization"] = "Bearer #{@state.api_key!}"
|
|
28
28
|
request.body = JSON.dump({name: name})
|
|
29
29
|
|
|
30
30
|
response = Braintrust::Internal::Http.with_redirects(uri, request)
|
|
@@ -44,7 +44,7 @@ module Braintrust
|
|
|
44
44
|
uri = URI("#{@state.api_url}/v1/project/#{id}")
|
|
45
45
|
|
|
46
46
|
request = Net::HTTP::Delete.new(uri)
|
|
47
|
-
request["Authorization"] = "Bearer #{@state.api_key}"
|
|
47
|
+
request["Authorization"] = "Bearer #{@state.api_key!}"
|
|
48
48
|
|
|
49
49
|
response = Braintrust::Internal::Http.with_redirects(uri, request)
|
|
50
50
|
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "internal/callable"
|
|
4
|
+
|
|
5
|
+
module Braintrust
|
|
6
|
+
# Classifier wraps a classification function that categorizes and labels eval outputs.
|
|
7
|
+
#
|
|
8
|
+
# Unlike scorers (which return numeric 0-1 values), classifiers return structured
|
|
9
|
+
# {Classification} items with an id and optional label and metadata.
|
|
10
|
+
#
|
|
11
|
+
# Use inline with a block (keyword args):
|
|
12
|
+
# classifier = Classifier.new("category") { |output:| {name: "category", id: "greeting", label: "Greeting"} }
|
|
13
|
+
#
|
|
14
|
+
# Or include in a class and define #call with keyword args:
|
|
15
|
+
# class CategoryClassifier
|
|
16
|
+
# include Braintrust::Classifier
|
|
17
|
+
#
|
|
18
|
+
# def call(output:)
|
|
19
|
+
# {name: "category", id: "greeting", label: "Greeting"}
|
|
20
|
+
# end
|
|
21
|
+
# end
|
|
22
|
+
#
|
|
23
|
+
# Classifiers may return a single Classification hash, an Array of them, or nil
|
|
24
|
+
# (meaning no classifications for this case).
|
|
25
|
+
module Classifier
|
|
26
|
+
DEFAULT_NAME = "classifier"
|
|
27
|
+
|
|
28
|
+
# @param base [Class] the class including Classifier
|
|
29
|
+
def self.included(base)
|
|
30
|
+
base.include(Callable)
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Create a block-based classifier.
|
|
34
|
+
#
|
|
35
|
+
# @param name [String, nil] optional name (defaults to "classifier")
|
|
36
|
+
# @param block [Proc] the classification implementation; declare only the keyword
|
|
37
|
+
# args you need. Extra kwargs are filtered out automatically.
|
|
38
|
+
#
|
|
39
|
+
# Supported kwargs: +input:+, +expected:+, +output:+, +metadata:+, +trace:+, +parameters:+
|
|
40
|
+
# @return [Classifier::Block]
|
|
41
|
+
# @raise [ArgumentError] if the block has unsupported arity
|
|
42
|
+
def self.new(name = nil, &block)
|
|
43
|
+
Block.new(name: name || DEFAULT_NAME, &block)
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# Included into classes that +include Classifier+. Prepends KeywordFilter and
|
|
47
|
+
# ClassificationNormalizer so #call receives only declared kwargs and always returns
|
|
48
|
+
# Array<Hash>. Also provides a default #name and #call_parameters.
|
|
49
|
+
module Callable
|
|
50
|
+
# Normalizes the raw return value of #call into Array<Hash>.
|
|
51
|
+
# Nested inside Callable because it depends on #name which Callable provides.
|
|
52
|
+
module ClassificationNormalizer
|
|
53
|
+
# @return [Array<Hash>] normalized classification hashes with :name, :id, and optional :label, :metadata keys
|
|
54
|
+
def call(**kwargs)
|
|
55
|
+
normalize_classification_result(super)
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
private
|
|
59
|
+
|
|
60
|
+
# @param result [Hash, Array<Hash>, nil] raw return value from #call
|
|
61
|
+
# @return [Array<Hash>] zero or more classification hashes with :name, :id keys
|
|
62
|
+
# @raise [ArgumentError] if any item is not a non-empty object
|
|
63
|
+
def normalize_classification_result(result)
|
|
64
|
+
case result
|
|
65
|
+
when nil then []
|
|
66
|
+
when Array then result.map { |item| normalize_classification_item(item) }
|
|
67
|
+
when Hash then [normalize_classification_item(result)]
|
|
68
|
+
else
|
|
69
|
+
raise ArgumentError, "When returning structured classifier results, each classification must be a non-empty object. Got: #{result.inspect}"
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# Fills in missing :name from the classifier, validates :id.
|
|
74
|
+
# @param item [Hash] a classification hash
|
|
75
|
+
# @return [Hash] the item with :name defaulted and validated
|
|
76
|
+
# @raise [ArgumentError] if item is not a non-empty Hash
|
|
77
|
+
def normalize_classification_item(item)
|
|
78
|
+
unless item.is_a?(Hash) && !item.empty?
|
|
79
|
+
raise ArgumentError, "When returning structured classifier results, each classification must be a non-empty object. Got: #{item.inspect}"
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# :name defaults to the classifier's resolved name when missing, empty, or non-string
|
|
83
|
+
unless item[:name].is_a?(String) && !item[:name].empty?
|
|
84
|
+
item = item.merge(name: name)
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
item
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# Infrastructure modules prepended onto every classifier class.
|
|
92
|
+
# Used both to set up the ancestor chain and to skip past them in
|
|
93
|
+
# #call_parameters so KeywordFilter sees the real call signature.
|
|
94
|
+
PREPENDED = [Internal::Callable::KeywordFilter, ClassificationNormalizer].freeze
|
|
95
|
+
|
|
96
|
+
# @param base [Class] the class including Callable
|
|
97
|
+
def self.included(base)
|
|
98
|
+
PREPENDED.each { |mod| base.prepend(mod) }
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
# Default name derived from the class name (e.g. CategoryClassifier -> "category_classifier").
|
|
102
|
+
# @return [String]
|
|
103
|
+
def name
|
|
104
|
+
klass = self.class.name&.split("::")&.last
|
|
105
|
+
return Classifier::DEFAULT_NAME unless klass
|
|
106
|
+
klass.gsub(/([a-z])([A-Z])/, '\1_\2').downcase
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
# Provides KeywordFilter with the actual call signature of the subclass.
|
|
110
|
+
# Walks past PREPENDED modules in the ancestor chain so that user-defined
|
|
111
|
+
# #call keyword params are correctly introspected.
|
|
112
|
+
# Block overrides this to point directly at @block.parameters.
|
|
113
|
+
# @return [Array<Array>] parameter list
|
|
114
|
+
def call_parameters
|
|
115
|
+
meth = method(:call)
|
|
116
|
+
meth = meth.super_method while meth.super_method && PREPENDED.include?(meth.owner)
|
|
117
|
+
meth.parameters
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
# Block-based classifier. Stores a Proc and delegates #call to it.
|
|
122
|
+
# Includes Classifier so it satisfies +Classifier ===+ checks.
|
|
123
|
+
# Exposes #call_parameters so KeywordFilter can introspect the block's
|
|
124
|
+
# declared kwargs rather than Block#call's **kwargs signature.
|
|
125
|
+
class Block
|
|
126
|
+
include Classifier
|
|
127
|
+
|
|
128
|
+
# @return [String]
|
|
129
|
+
attr_reader :name
|
|
130
|
+
|
|
131
|
+
# @param name [String] classifier name
|
|
132
|
+
# @param block [Proc] classification implementation; must use keyword args or zero-arity
|
|
133
|
+
# @raise [ArgumentError] if the block uses positional params
|
|
134
|
+
def initialize(name: DEFAULT_NAME, &block)
|
|
135
|
+
@name = name
|
|
136
|
+
params = block.parameters
|
|
137
|
+
unless Internal::Callable::KeywordFilter.has_any_keywords?(params) || block.arity == 0
|
|
138
|
+
raise ArgumentError, "Classifier block must use keyword args (got arity #{block.arity})"
|
|
139
|
+
end
|
|
140
|
+
@block = block
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
# @param kwargs [Hash] keyword arguments (filtered by KeywordFilter)
|
|
144
|
+
# @return [Array<Hash>] normalized classification results
|
|
145
|
+
def call(**kwargs)
|
|
146
|
+
@block.call(**kwargs)
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
# Exposes the block's parameter list so KeywordFilter can filter
|
|
150
|
+
# kwargs to match the block's declared keywords.
|
|
151
|
+
# @return [Array<Array>] parameter list from Proc#parameters
|
|
152
|
+
def call_parameters
|
|
153
|
+
@block.parameters
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
end
|
|
157
|
+
end
|
data/lib/braintrust/config.rb
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require_relative "internal/api_key_resolver"
|
|
4
|
+
|
|
3
5
|
module Braintrust
|
|
4
6
|
# Configuration object that reads from environment variables
|
|
5
7
|
# and allows overriding with explicit options
|
|
@@ -39,7 +41,7 @@ module Braintrust
|
|
|
39
41
|
end
|
|
40
42
|
|
|
41
43
|
new(
|
|
42
|
-
api_key:
|
|
44
|
+
api_key: Internal::ApiKeyResolver.resolve(explicit_api_key: api_key),
|
|
43
45
|
org_name: org_name || ENV["BRAINTRUST_ORG_NAME"],
|
|
44
46
|
default_project: default_project || ENV["BRAINTRUST_DEFAULT_PROJECT"],
|
|
45
47
|
app_url: app_url || ENV["BRAINTRUST_APP_URL"] || "https://www.braintrust.dev",
|
|
@@ -1,18 +1,20 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require_relative "cases"
|
|
4
|
+
require_relative "../classifier"
|
|
4
5
|
|
|
5
6
|
module Braintrust
|
|
6
7
|
module Eval
|
|
7
8
|
# Holds all normalized, ready-to-execute eval components.
|
|
8
9
|
# Use Context.build to construct from raw user inputs.
|
|
9
10
|
class Context
|
|
10
|
-
attr_reader :task, :scorers, :
|
|
11
|
-
:project_id, :project_name, :state, :tracer_provider,
|
|
11
|
+
attr_reader :task, :scorers, :classifiers, :cases, :experiment_id,
|
|
12
|
+
:experiment_name, :project_id, :project_name, :state, :tracer_provider,
|
|
12
13
|
:on_progress, :parent_span_attr, :generation, :parameters
|
|
13
14
|
|
|
14
15
|
# @param task [Task] Normalized task wrapper
|
|
15
16
|
# @param scorers [Array<Scorer>] Normalized scorer wrappers
|
|
17
|
+
# @param classifiers [Array<Classifier>] Normalized classifier wrappers
|
|
16
18
|
# @param cases [Cases] Normalized eval cases
|
|
17
19
|
# @param experiment_id [String, nil] Experiment ID for logging and trace linkage
|
|
18
20
|
# @param experiment_name [String, nil] Experiment name, included in span attributes
|
|
@@ -24,11 +26,13 @@ module Braintrust
|
|
|
24
26
|
# @param parent_span_attr [String, nil] Formatted parent span identifier ("type:id"), linking spans to a parent context
|
|
25
27
|
# @param generation [Integer, nil] Generation number from the parent span context, used to link spans in a trace hierarchy
|
|
26
28
|
# @param parameters [Hash, nil] Runtime parameters passed to task and scorers as a `parameters:` keyword argument
|
|
27
|
-
def initialize(task:, scorers:, cases:,
|
|
28
|
-
|
|
29
|
-
|
|
29
|
+
def initialize(task:, scorers:, cases:, classifiers: [],
|
|
30
|
+
experiment_id: nil, experiment_name: nil, project_id: nil,
|
|
31
|
+
project_name: nil, state: nil, tracer_provider: nil, on_progress: nil,
|
|
32
|
+
parent_span_attr: nil, generation: nil, parameters: nil)
|
|
30
33
|
@task = task
|
|
31
34
|
@scorers = scorers
|
|
35
|
+
@classifiers = classifiers
|
|
32
36
|
@cases = cases
|
|
33
37
|
@experiment_id = experiment_id
|
|
34
38
|
@experiment_name = experiment_name
|
|
@@ -46,6 +50,7 @@ module Braintrust
|
|
|
46
50
|
# Delegates to Factory for normalization.
|
|
47
51
|
# @param task [Task, Proc, #call] Task to evaluate; wrapped into a {Task} if needed
|
|
48
52
|
# @param scorers [Array<Scorer, Proc, String, Scorer::ID, #call>] Scorers; each is normalized into a {Scorer}
|
|
53
|
+
# @param classifiers [Array<Classifier, Proc, #call>] Classifiers; each is normalized into a {Classifier}
|
|
49
54
|
# @param cases [Cases, Array, Enumerable] Eval cases; wrapped into {Cases} if needed
|
|
50
55
|
# @param experiment_id [String, nil] Experiment ID for logging
|
|
51
56
|
# @param experiment_name [String, nil] Experiment name, included in span attributes
|
|
@@ -57,14 +62,15 @@ module Braintrust
|
|
|
57
62
|
# @param parent [Hash, nil] Parent span info with keys :object_type, :object_id, and optionally :generation
|
|
58
63
|
# @param parameters [Hash, nil] Runtime parameters passed to task and scorers as a `parameters:` keyword argument
|
|
59
64
|
# @return [Context]
|
|
60
|
-
def self.build(task:, scorers:, cases:,
|
|
61
|
-
|
|
62
|
-
|
|
65
|
+
def self.build(task:, scorers:, cases:, classifiers: [],
|
|
66
|
+
experiment_id: nil, experiment_name: nil, project_id: nil,
|
|
67
|
+
project_name: nil, state: nil, tracer_provider: nil, on_progress: nil,
|
|
68
|
+
parent: nil, parameters: nil)
|
|
63
69
|
Factory.new(
|
|
64
70
|
state: state, tracer_provider: tracer_provider,
|
|
65
71
|
project_id: project_id, project_name: project_name
|
|
66
72
|
).build(
|
|
67
|
-
task: task, scorers: scorers, cases: cases,
|
|
73
|
+
task: task, scorers: scorers, classifiers: classifiers, cases: cases,
|
|
68
74
|
experiment_id: experiment_id, experiment_name: experiment_name,
|
|
69
75
|
on_progress: on_progress, parent: parent, parameters: parameters
|
|
70
76
|
)
|
|
@@ -86,17 +92,19 @@ module Braintrust
|
|
|
86
92
|
# Normalize raw inputs and construct a {Context}.
|
|
87
93
|
# @param task [Task, Proc, #call] Raw task
|
|
88
94
|
# @param scorers [Array] Raw scorers
|
|
95
|
+
# @param classifiers [Array] Raw classifiers
|
|
89
96
|
# @param cases [Cases, Array, Enumerable] Raw eval cases
|
|
90
97
|
# @param experiment_id [String, nil]
|
|
91
98
|
# @param experiment_name [String, nil]
|
|
92
99
|
# @param on_progress [Proc, nil]
|
|
93
100
|
# @param parent [Hash, nil] Parent span info with keys :object_type, :object_id, and optionally :generation
|
|
94
101
|
# @return [Context]
|
|
95
|
-
def build(task:, scorers:, cases:,
|
|
96
|
-
on_progress: nil, parent: nil, parameters: nil)
|
|
102
|
+
def build(task:, scorers:, cases:, classifiers: [], experiment_id: nil,
|
|
103
|
+
experiment_name: nil, on_progress: nil, parent: nil, parameters: nil)
|
|
97
104
|
Context.new(
|
|
98
105
|
task: normalize_task(task),
|
|
99
106
|
scorers: normalize_scorers(scorers),
|
|
107
|
+
classifiers: normalize_classifiers(classifiers),
|
|
100
108
|
cases: normalize_cases(cases),
|
|
101
109
|
experiment_id: experiment_id,
|
|
102
110
|
experiment_name: experiment_name,
|
|
@@ -188,6 +196,23 @@ module Braintrust
|
|
|
188
196
|
end
|
|
189
197
|
end
|
|
190
198
|
end
|
|
199
|
+
|
|
200
|
+
# @param raw [Array<Classifier, Proc, #call>]
|
|
201
|
+
# @return [Array<Classifier>]
|
|
202
|
+
def normalize_classifiers(raw)
|
|
203
|
+
raw.map do |classifier|
|
|
204
|
+
case classifier
|
|
205
|
+
when Braintrust::Classifier
|
|
206
|
+
classifier
|
|
207
|
+
when Proc
|
|
208
|
+
# Pass Proc/Lambda directly to preserve keyword arg info
|
|
209
|
+
Braintrust::Classifier.new(&classifier)
|
|
210
|
+
else
|
|
211
|
+
name = classifier.respond_to?(:name) ? classifier.name : nil
|
|
212
|
+
Braintrust::Classifier.new(name, &classifier.method(:call))
|
|
213
|
+
end
|
|
214
|
+
end
|
|
215
|
+
end
|
|
191
216
|
end
|
|
192
217
|
end
|
|
193
218
|
end
|
|
@@ -40,11 +40,12 @@ module Braintrust
|
|
|
40
40
|
# }
|
|
41
41
|
# )
|
|
42
42
|
class Evaluator
|
|
43
|
-
attr_accessor :task, :scorers, :parameters
|
|
43
|
+
attr_accessor :task, :scorers, :classifiers, :parameters
|
|
44
44
|
|
|
45
|
-
def initialize(task: nil, scorers: [], parameters: {})
|
|
45
|
+
def initialize(task: nil, scorers: [], classifiers: [], parameters: {})
|
|
46
46
|
@task = task
|
|
47
47
|
@scorers = scorers
|
|
48
|
+
@classifiers = classifiers
|
|
48
49
|
@parameters = parameters
|
|
49
50
|
end
|
|
50
51
|
|
|
@@ -68,6 +69,7 @@ module Braintrust
|
|
|
68
69
|
# @param project_id [String, nil] Project UUID (skips project creation)
|
|
69
70
|
# @param dataset [String, Hash, Dataset, Dataset::ID, nil] Dataset to fetch
|
|
70
71
|
# @param scorers [Array, nil] Additional scorers (merged with evaluator's own)
|
|
72
|
+
# @param classifiers [Array, nil] Additional classifiers (merged with evaluator's own)
|
|
71
73
|
# @param parent [Hash, nil] Parent span context
|
|
72
74
|
# @param state [State, nil] Braintrust state
|
|
73
75
|
# @param update [Boolean] If true, allow reusing existing experiment (default: false)
|
|
@@ -75,16 +77,19 @@ module Braintrust
|
|
|
75
77
|
# @return [Result]
|
|
76
78
|
def run(cases, on_progress: nil, quiet: false,
|
|
77
79
|
project: nil, experiment: nil, project_id: nil,
|
|
78
|
-
dataset: nil, scorers: nil, parent: nil,
|
|
80
|
+
dataset: nil, scorers: nil, classifiers: nil, parent: nil,
|
|
79
81
|
state: nil, update: false, tracer_provider: nil,
|
|
80
82
|
parameters: nil)
|
|
81
83
|
all_scorers = scorers ? self.scorers + scorers : self.scorers
|
|
84
|
+
all_classifiers = classifiers ?
|
|
85
|
+
self.classifiers + classifiers :
|
|
86
|
+
self.classifiers
|
|
82
87
|
Braintrust::Eval.run(
|
|
83
|
-
task: task, scorers: all_scorers,
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
state: state, update: update,
|
|
87
|
-
parameters: parameters
|
|
88
|
+
task: task, scorers: all_scorers, classifiers: all_classifiers,
|
|
89
|
+
cases: cases, dataset: dataset, project: project,
|
|
90
|
+
experiment: experiment, project_id: project_id, parent: parent,
|
|
91
|
+
on_progress: on_progress, quiet: quiet, state: state, update: update,
|
|
92
|
+
tracer_provider: tracer_provider, parameters: parameters
|
|
88
93
|
)
|
|
89
94
|
end
|
|
90
95
|
end
|
|
@@ -9,7 +9,7 @@ module Braintrust
|
|
|
9
9
|
# Contains experiment metadata, errors, timing information, and raw score data
|
|
10
10
|
class Result
|
|
11
11
|
attr_reader :experiment_id, :experiment_name, :project_id, :project_name,
|
|
12
|
-
:permalink, :errors, :duration, :scores
|
|
12
|
+
:permalink, :errors, :duration, :scores, :classifications
|
|
13
13
|
|
|
14
14
|
# Create a new result
|
|
15
15
|
# @param experiment_id [String] The experiment ID
|
|
@@ -20,8 +20,9 @@ module Braintrust
|
|
|
20
20
|
# @param errors [Array<String>] List of errors that occurred
|
|
21
21
|
# @param duration [Float] Duration in seconds
|
|
22
22
|
# @param scores [Hash, nil] Raw score data { scorer_name => Array<Numeric> }
|
|
23
|
+
# @param classifications [Hash, nil] Classification results { name => Array<ClassificationItem> }, nil when no classifiers ran
|
|
23
24
|
def initialize(experiment_id:, experiment_name:, project_id:, project_name:,
|
|
24
|
-
permalink:, errors:, duration:, scores: nil)
|
|
25
|
+
permalink:, errors:, duration:, scores: nil, classifications: nil)
|
|
25
26
|
@experiment_id = experiment_id
|
|
26
27
|
@experiment_name = experiment_name
|
|
27
28
|
@project_id = project_id
|
|
@@ -30,6 +31,7 @@ module Braintrust
|
|
|
30
31
|
@errors = errors
|
|
31
32
|
@duration = duration
|
|
32
33
|
@scores = scores
|
|
34
|
+
@classifications = classifications
|
|
33
35
|
end
|
|
34
36
|
|
|
35
37
|
# Check if the evaluation was successful (no errors)
|
|
@@ -27,8 +27,9 @@ module Braintrust
|
|
|
27
27
|
@eval_context = eval_context
|
|
28
28
|
@tracer = eval_context.tracer_provider.tracer("braintrust-eval")
|
|
29
29
|
|
|
30
|
-
#
|
|
30
|
+
# Mutexes for thread-safe result collection
|
|
31
31
|
@score_mutex = Mutex.new
|
|
32
|
+
@classification_mutex = Mutex.new
|
|
32
33
|
end
|
|
33
34
|
|
|
34
35
|
# Run evaluation and return Result
|
|
@@ -39,6 +40,7 @@ module Braintrust
|
|
|
39
40
|
eval_cases = eval_context.cases
|
|
40
41
|
errors = Queue.new
|
|
41
42
|
@scores = {} # Reset for each run: { scorer_name => Array<Numeric> }
|
|
43
|
+
@classifications = {} # Reset for each run: { classifier_name => Array<ClassificationItem> }
|
|
42
44
|
|
|
43
45
|
if parallelism && parallelism > 1
|
|
44
46
|
Internal::ThreadPool.each(eval_cases, parallelism: parallelism) do |eval_case|
|
|
@@ -69,7 +71,8 @@ module Braintrust
|
|
|
69
71
|
permalink: permalink,
|
|
70
72
|
errors: error_array,
|
|
71
73
|
duration: duration,
|
|
72
|
-
scores: @scores
|
|
74
|
+
scores: @scores,
|
|
75
|
+
classifications: @classifications.empty? ? nil : @classifications
|
|
73
76
|
)
|
|
74
77
|
end
|
|
75
78
|
|
|
@@ -119,6 +122,17 @@ module Braintrust
|
|
|
119
122
|
errors << "Scorers failed for input '#{kase.input}': #{e.message}"
|
|
120
123
|
end
|
|
121
124
|
|
|
125
|
+
# Run classifiers (independent of scorers; errors do not abort eval)
|
|
126
|
+
classifier_errors = run_classifiers(kase, eval_span)
|
|
127
|
+
unless classifier_errors.empty?
|
|
128
|
+
existing_metadata = kase.metadata || {}
|
|
129
|
+
classifier_errors_metadata = existing_metadata.merge(classifier_errors: classifier_errors)
|
|
130
|
+
set_json_attr(eval_span, "braintrust.metadata", classifier_errors_metadata)
|
|
131
|
+
classifier_errors.each do |classifier_name, message|
|
|
132
|
+
errors << "Classifier '#{classifier_name}' failed for input '#{kase.input}': #{message}"
|
|
133
|
+
end
|
|
134
|
+
end
|
|
135
|
+
|
|
122
136
|
# Set output after task completes
|
|
123
137
|
set_json_attr(eval_span, "braintrust.output_json", {output: kase.output})
|
|
124
138
|
|
|
@@ -318,6 +332,104 @@ module Braintrust
|
|
|
318
332
|
score_results.each { |s| (@scores[s[:name]] ||= []) << s[:score] }
|
|
319
333
|
end
|
|
320
334
|
end
|
|
335
|
+
|
|
336
|
+
# Run all classifiers for a case. Classifier errors are non-fatal and stored in metadata.
|
|
337
|
+
# @param kase [CaseContext] The per-case context (output must be populated)
|
|
338
|
+
# @param eval_span [OpenTelemetry::Trace::Span] The eval span for this case
|
|
339
|
+
# @return [Hash] classifier_errors map (name -> error message), empty if no errors
|
|
340
|
+
def run_classifiers(kase, eval_span)
|
|
341
|
+
return {} if eval_context.classifiers.empty?
|
|
342
|
+
|
|
343
|
+
classifier_kwargs = {
|
|
344
|
+
input: kase.input,
|
|
345
|
+
expected: kase.expected,
|
|
346
|
+
output: kase.output,
|
|
347
|
+
metadata: kase.metadata || {},
|
|
348
|
+
trace: kase.trace,
|
|
349
|
+
parameters: eval_context.parameters || {}
|
|
350
|
+
}
|
|
351
|
+
classifier_input = {
|
|
352
|
+
input: kase.input,
|
|
353
|
+
expected: kase.expected,
|
|
354
|
+
output: kase.output,
|
|
355
|
+
metadata: kase.metadata || {},
|
|
356
|
+
parameters: eval_context.parameters || {}
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
case_classifications = {}
|
|
360
|
+
classifier_errors = {}
|
|
361
|
+
|
|
362
|
+
eval_context.classifiers.each_with_index do |classifier, index|
|
|
363
|
+
classifier_name = classifier.name || "classifier_#{index}"
|
|
364
|
+
begin
|
|
365
|
+
results = run_classifier(classifier, classifier_kwargs, classifier_input)
|
|
366
|
+
results.each do |item|
|
|
367
|
+
item_name = item[:name]
|
|
368
|
+
classification_item = item.except(:name)
|
|
369
|
+
(case_classifications[item_name] ||= []) << classification_item
|
|
370
|
+
end
|
|
371
|
+
collect_classifications(results)
|
|
372
|
+
rescue => e
|
|
373
|
+
Braintrust::Log.warn("[Classifier] #{classifier_name} failed: #{e.message}")
|
|
374
|
+
classifier_errors[classifier_name] = e.message
|
|
375
|
+
end
|
|
376
|
+
end
|
|
377
|
+
|
|
378
|
+
unless case_classifications.empty?
|
|
379
|
+
set_json_attr(eval_span, "braintrust.classifications", case_classifications)
|
|
380
|
+
end
|
|
381
|
+
|
|
382
|
+
classifier_errors
|
|
383
|
+
end
|
|
384
|
+
|
|
385
|
+
# Run a single classifier inside its own span.
|
|
386
|
+
# @param classifier [Classifier] The classifier to run
|
|
387
|
+
# @param classifier_kwargs [Hash] Keyword arguments for the classifier
|
|
388
|
+
# @param classifier_input [Hash] Input to log on the span
|
|
389
|
+
# @return [Array<Hash>] Normalized classification results from the classifier
|
|
390
|
+
def run_classifier(classifier, classifier_kwargs, classifier_input)
|
|
391
|
+
tracer.in_span(classifier.name) do |classifier_span|
|
|
392
|
+
classifier_span.set_attribute("braintrust.parent", eval_context.parent_span_attr) if eval_context.parent_span_attr
|
|
393
|
+
set_json_attr(classifier_span, "braintrust.span_attributes", build_classifier_span_attributes(classifier.name))
|
|
394
|
+
set_json_attr(classifier_span, "braintrust.input_json", classifier_input)
|
|
395
|
+
|
|
396
|
+
classification_results = classifier.call(**classifier_kwargs)
|
|
397
|
+
|
|
398
|
+
# Build output dict keyed by name -> array of items (for span logging)
|
|
399
|
+
output_by_name = {}
|
|
400
|
+
classification_results.each do |item|
|
|
401
|
+
(output_by_name[item[:name]] ||= []) << item.except(:name)
|
|
402
|
+
end
|
|
403
|
+
|
|
404
|
+
set_json_attr(classifier_span, "braintrust.output_json", output_by_name)
|
|
405
|
+
|
|
406
|
+
classification_results
|
|
407
|
+
rescue => e
|
|
408
|
+
record_span_error(classifier_span, e, "ClassifierError")
|
|
409
|
+
raise
|
|
410
|
+
end
|
|
411
|
+
end
|
|
412
|
+
|
|
413
|
+
# Build span_attributes for a classifier span.
|
|
414
|
+
# @param classifier_name [String] The classifier name
|
|
415
|
+
# @return [Hash]
|
|
416
|
+
def build_classifier_span_attributes(classifier_name)
|
|
417
|
+
attrs = {type: "classifier", name: classifier_name, purpose: "scorer"}
|
|
418
|
+
attrs[:generation] = eval_context.generation if eval_context.generation
|
|
419
|
+
attrs
|
|
420
|
+
end
|
|
421
|
+
|
|
422
|
+
# Collect classification results into the global accumulator (thread-safe).
|
|
423
|
+
# Converts Classification to ClassificationItem by dropping :name.
|
|
424
|
+
# @param classification_results [Array<Hash>] Classification results from a classifier
|
|
425
|
+
def collect_classifications(classification_results)
|
|
426
|
+
@classification_mutex.synchronize do
|
|
427
|
+
classification_results.each do |item|
|
|
428
|
+
item_name = item[:name]
|
|
429
|
+
(@classifications[item_name] ||= []) << item.except(:name)
|
|
430
|
+
end
|
|
431
|
+
end
|
|
432
|
+
end
|
|
321
433
|
end
|
|
322
434
|
end
|
|
323
435
|
end
|
data/lib/braintrust/eval.rb
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require_relative "classifier"
|
|
3
4
|
require_relative "scorer"
|
|
4
5
|
require_relative "task"
|
|
5
6
|
require_relative "functions"
|
|
@@ -160,7 +161,10 @@ module Braintrust
|
|
|
160
161
|
# - String: dataset name (fetches from same project)
|
|
161
162
|
# - Hash: {name:, id:, project:, version:, limit:}
|
|
162
163
|
# @param task [#call] The task to evaluate (must be callable)
|
|
163
|
-
# @param scorers [Array<String, Scorer, #call
|
|
164
|
+
# @param scorers [Array<String, Scorer, #call>, nil] The scorers to use (String names, Scorer objects, or callables).
|
|
165
|
+
# At least one of scorers or classifiers must be provided.
|
|
166
|
+
# @param classifiers [Array<Classifier, #call>, nil] The classifiers to use.
|
|
167
|
+
# At least one of scorers or classifiers must be provided.
|
|
164
168
|
# @param on_progress [#call, nil] Optional callback fired after each test case.
|
|
165
169
|
# Receives a Hash: {"data" => output, "scores" => {name => value}} on success,
|
|
166
170
|
# or {"error" => message} on failure.
|
|
@@ -177,13 +181,16 @@ module Braintrust
|
|
|
177
181
|
# @param parent [Hash, nil] Parent span context ({object_type:, object_id:, generation:})
|
|
178
182
|
# @param parameters [Hash, nil] Runtime parameters passed to task and scorers as a `parameters:` keyword argument
|
|
179
183
|
# @return [Result]
|
|
180
|
-
def run(task:, scorers
|
|
181
|
-
cases: nil, dataset: nil, on_progress: nil,
|
|
184
|
+
def run(task:, scorers: nil, classifiers: nil, project: nil,
|
|
185
|
+
experiment: nil, cases: nil, dataset: nil, on_progress: nil,
|
|
182
186
|
parallelism: 1, tags: nil, metadata: nil, update: false, quiet: false,
|
|
183
187
|
state: nil, tracer_provider: nil, project_id: nil, parent: nil,
|
|
184
188
|
parameters: nil)
|
|
185
189
|
# Validate required parameters
|
|
186
|
-
validate_params!(task: task, scorers: scorers,
|
|
190
|
+
validate_params!(task: task, scorers: scorers,
|
|
191
|
+
classifiers: classifiers, cases: cases, dataset: dataset)
|
|
192
|
+
scorers ||= []
|
|
193
|
+
classifiers ||= []
|
|
187
194
|
|
|
188
195
|
experiment_id = nil
|
|
189
196
|
project_name = project
|
|
@@ -216,6 +223,7 @@ module Braintrust
|
|
|
216
223
|
context = Context.build(
|
|
217
224
|
task: task,
|
|
218
225
|
scorers: scorers,
|
|
226
|
+
classifiers: classifiers,
|
|
219
227
|
cases: cases,
|
|
220
228
|
experiment_id: experiment_id,
|
|
221
229
|
experiment_name: experiment,
|
|
@@ -245,9 +253,19 @@ module Braintrust
|
|
|
245
253
|
|
|
246
254
|
# Validate required parameters
|
|
247
255
|
# @raise [ArgumentError] if validation fails
|
|
248
|
-
def validate_params!(task:, scorers:, cases:, dataset:)
|
|
256
|
+
def validate_params!(task:, scorers:, classifiers:, cases:, dataset:)
|
|
249
257
|
raise ArgumentError, "task is required" unless task
|
|
250
|
-
|
|
258
|
+
|
|
259
|
+
# Validate task is callable before anything else
|
|
260
|
+
unless task.respond_to?(:call)
|
|
261
|
+
raise ArgumentError, "task must be callable (respond to :call)"
|
|
262
|
+
end
|
|
263
|
+
|
|
264
|
+
has_scorers = scorers && !scorers.empty?
|
|
265
|
+
has_classifiers = classifiers && !classifiers.empty?
|
|
266
|
+
unless has_scorers || has_classifiers
|
|
267
|
+
raise ArgumentError, "at least one of scorers or classifiers is required"
|
|
268
|
+
end
|
|
251
269
|
|
|
252
270
|
# Validate cases and dataset are mutually exclusive
|
|
253
271
|
if cases && dataset
|
|
@@ -258,11 +276,6 @@ module Braintrust
|
|
|
258
276
|
unless cases || dataset
|
|
259
277
|
raise ArgumentError, "must specify either 'cases' or 'dataset'"
|
|
260
278
|
end
|
|
261
|
-
|
|
262
|
-
# Validate task is callable
|
|
263
|
-
unless task.respond_to?(:call)
|
|
264
|
-
raise ArgumentError, "task must be callable (respond to :call)"
|
|
265
|
-
end
|
|
266
279
|
end
|
|
267
280
|
|
|
268
281
|
# Resolve project by name or ID. Creates if needed.
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
|
|
5
|
+
module Braintrust
|
|
6
|
+
module Internal
|
|
7
|
+
# Resolves the Braintrust API key from explicit options, ENV, or the nearest
|
|
8
|
+
# .braintrust.json file without mutating the process environment.
|
|
9
|
+
class ApiKeyResolver
|
|
10
|
+
ENV_KEY = "BRAINTRUST_API_KEY"
|
|
11
|
+
CONFIG_FILE = ".braintrust.json"
|
|
12
|
+
SEARCH_PARENT_LIMIT = 64
|
|
13
|
+
|
|
14
|
+
def self.resolve(explicit_api_key: nil, start_dir: Dir.pwd)
|
|
15
|
+
return explicit_api_key unless explicit_api_key.nil?
|
|
16
|
+
|
|
17
|
+
env_api_key = ENV[ENV_KEY]
|
|
18
|
+
return env_api_key if env_api_key && !env_api_key.strip.empty?
|
|
19
|
+
|
|
20
|
+
find_file_api_key(start_dir)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def self.find_file_api_key(start_dir = Dir.pwd)
|
|
24
|
+
dir = start_dir
|
|
25
|
+
|
|
26
|
+
0.upto(SEARCH_PARENT_LIMIT) do
|
|
27
|
+
config_path = File.join(dir, CONFIG_FILE)
|
|
28
|
+
|
|
29
|
+
begin
|
|
30
|
+
contents = File.read(config_path)
|
|
31
|
+
rescue Errno::ENOENT, Errno::ENOTDIR
|
|
32
|
+
# Missing candidates are not boundaries; keep walking upward.
|
|
33
|
+
rescue
|
|
34
|
+
return nil
|
|
35
|
+
else
|
|
36
|
+
return parse_api_key(contents)
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
parent = File.dirname(dir)
|
|
40
|
+
break if parent == dir
|
|
41
|
+
dir = parent
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
nil
|
|
45
|
+
rescue
|
|
46
|
+
nil
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def self.parse_api_key(contents)
|
|
50
|
+
config = JSON.parse(contents)
|
|
51
|
+
return nil unless config.is_a?(Hash)
|
|
52
|
+
|
|
53
|
+
value = config[ENV_KEY]
|
|
54
|
+
(value.is_a?(String) && !value.strip.empty?) ? value : nil
|
|
55
|
+
rescue JSON::ParserError, TypeError
|
|
56
|
+
nil
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
private_class_method :find_file_api_key, :parse_api_key
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
end
|
|
@@ -20,6 +20,11 @@ module Braintrust
|
|
|
20
20
|
{"name" => scorer_name}
|
|
21
21
|
end
|
|
22
22
|
entry = {"scores" => scores}
|
|
23
|
+
classifiers = (evaluator.classifiers || []).each_with_index.map do |classifier, i|
|
|
24
|
+
classifier_name = classifier.respond_to?(:name) ? classifier.name : "classifier_#{i}"
|
|
25
|
+
{"name" => classifier_name}
|
|
26
|
+
end
|
|
27
|
+
entry["classifiers"] = classifiers unless classifiers.empty?
|
|
23
28
|
params = serialize_parameters(evaluator.parameters)
|
|
24
29
|
entry["parameters"] = params if params
|
|
25
30
|
result[name] = entry
|
data/lib/braintrust/setup.rb
CHANGED
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
# require "braintrust/setup"
|
|
12
12
|
#
|
|
13
13
|
# Environment variables:
|
|
14
|
-
# BRAINTRUST_API_KEY - Required for tracing to work
|
|
14
|
+
# BRAINTRUST_API_KEY - Required for tracing to work; falls back to .braintrust.json
|
|
15
15
|
# BRAINTRUST_AUTO_INSTRUMENT - Set to "false" to disable (default: true)
|
|
16
16
|
# BRAINTRUST_INSTRUMENT_ONLY - Comma-separated whitelist
|
|
17
17
|
# BRAINTRUST_INSTRUMENT_EXCEPT - Comma-separated blacklist
|
data/lib/braintrust/state.rb
CHANGED
|
@@ -6,6 +6,8 @@ module Braintrust
|
|
|
6
6
|
# State object that holds Braintrust configuration
|
|
7
7
|
# Thread-safe global state management
|
|
8
8
|
class State
|
|
9
|
+
class MissingAPIKeyError < ArgumentError; end
|
|
10
|
+
|
|
9
11
|
attr_reader :api_key, :org_name, :org_id, :default_project, :app_url, :api_url, :proxy_url, :logged_in, :config
|
|
10
12
|
|
|
11
13
|
@mutex = Mutex.new
|
|
@@ -66,7 +68,7 @@ module Braintrust
|
|
|
66
68
|
def initialize(api_key: nil, org_name: nil, org_id: nil, default_project: nil, app_url: nil, api_url: nil, proxy_url: nil, blocking_login: false, enable_tracing: true, tracer_provider: nil, config: nil, exporter: nil)
|
|
67
69
|
# Instance-level mutex for thread-safe login
|
|
68
70
|
@login_mutex = Mutex.new
|
|
69
|
-
raise
|
|
71
|
+
raise MissingAPIKeyError, "api_key is required" if api_key.nil? || api_key.empty?
|
|
70
72
|
|
|
71
73
|
@api_key = api_key
|
|
72
74
|
@org_name = org_name
|
|
@@ -101,6 +103,11 @@ module Braintrust
|
|
|
101
103
|
end
|
|
102
104
|
end
|
|
103
105
|
|
|
106
|
+
def api_key!
|
|
107
|
+
raise MissingAPIKeyError, "api_key is required" if @api_key.nil? || @api_key.empty?
|
|
108
|
+
@api_key
|
|
109
|
+
end
|
|
110
|
+
|
|
104
111
|
# Thread-safe global state getter
|
|
105
112
|
def self.global
|
|
106
113
|
@mutex.synchronize { @global_state }
|
|
@@ -121,9 +128,10 @@ module Braintrust
|
|
|
121
128
|
@login_mutex.synchronize do
|
|
122
129
|
# Return early if already logged in
|
|
123
130
|
return self if @logged_in
|
|
131
|
+
api_key = api_key!
|
|
124
132
|
|
|
125
133
|
result = API::Internal::Auth.login(
|
|
126
|
-
api_key:
|
|
134
|
+
api_key: api_key,
|
|
127
135
|
app_url: @app_url,
|
|
128
136
|
org_name: @org_name
|
|
129
137
|
)
|
|
@@ -167,6 +175,9 @@ module Braintrust
|
|
|
167
175
|
login
|
|
168
176
|
Log.debug("Background login succeeded")
|
|
169
177
|
break
|
|
178
|
+
rescue MissingAPIKeyError => e
|
|
179
|
+
Log.debug("Background login skipped: #{e.message}")
|
|
180
|
+
break
|
|
170
181
|
rescue => e
|
|
171
182
|
retry_count += 1
|
|
172
183
|
delay = [0.001 * 2**(retry_count - 1), max_delay].min
|
|
@@ -190,7 +201,7 @@ module Braintrust
|
|
|
190
201
|
# Raises ArgumentError if state is invalid
|
|
191
202
|
# @return [self]
|
|
192
203
|
def validate
|
|
193
|
-
|
|
204
|
+
api_key!
|
|
194
205
|
raise ArgumentError, "api_url is required" if @api_url.nil? || @api_url.empty?
|
|
195
206
|
raise ArgumentError, "app_url is required" if @app_url.nil? || @app_url.empty?
|
|
196
207
|
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require "opentelemetry/exporter/otlp"
|
|
4
|
+
require_relative "../state"
|
|
4
5
|
|
|
5
6
|
module Braintrust
|
|
6
7
|
module Trace
|
|
@@ -18,6 +19,8 @@ module Braintrust
|
|
|
18
19
|
FAILURE = OpenTelemetry::SDK::Trace::Export::FAILURE
|
|
19
20
|
|
|
20
21
|
def initialize(endpoint:, api_key:)
|
|
22
|
+
raise State::MissingAPIKeyError, "api_key is required" if api_key.nil? || api_key.empty?
|
|
23
|
+
|
|
21
24
|
super(endpoint: endpoint, headers: {"Authorization" => "Bearer #{api_key}"})
|
|
22
25
|
end
|
|
23
26
|
|
data/lib/braintrust/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: braintrust
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.4.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Braintrust
|
|
@@ -90,6 +90,7 @@ files:
|
|
|
90
90
|
- lib/braintrust/api/internal/btql.rb
|
|
91
91
|
- lib/braintrust/api/internal/experiments.rb
|
|
92
92
|
- lib/braintrust/api/internal/projects.rb
|
|
93
|
+
- lib/braintrust/classifier.rb
|
|
93
94
|
- lib/braintrust/config.rb
|
|
94
95
|
- lib/braintrust/contrib.rb
|
|
95
96
|
- lib/braintrust/contrib/anthropic/deprecated.rb
|
|
@@ -147,6 +148,7 @@ files:
|
|
|
147
148
|
- lib/braintrust/eval/summary.rb
|
|
148
149
|
- lib/braintrust/eval/trace.rb
|
|
149
150
|
- lib/braintrust/functions.rb
|
|
151
|
+
- lib/braintrust/internal/api_key_resolver.rb
|
|
150
152
|
- lib/braintrust/internal/callable.rb
|
|
151
153
|
- lib/braintrust/internal/encoding.rb
|
|
152
154
|
- lib/braintrust/internal/env.rb
|
|
@@ -213,7 +215,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
213
215
|
- !ruby/object:Gem::Version
|
|
214
216
|
version: '0'
|
|
215
217
|
requirements: []
|
|
216
|
-
rubygems_version:
|
|
218
|
+
rubygems_version: 4.0.10
|
|
217
219
|
specification_version: 4
|
|
218
220
|
summary: Ruby SDK for Braintrust
|
|
219
221
|
test_files: []
|