braintrust 0.1.4 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +71 -2
- data/lib/braintrust/api/datasets.rb +10 -0
- data/lib/braintrust/api/internal/experiments.rb +1 -1
- data/lib/braintrust/dataset.rb +10 -6
- data/lib/braintrust/eval/evaluator.rb +72 -0
- data/lib/braintrust/eval/functions.rb +44 -10
- data/lib/braintrust/eval/runner.rb +55 -13
- data/lib/braintrust/eval/scorer.rb +4 -0
- data/lib/braintrust/eval.rb +97 -50
- data/lib/braintrust/server/auth/clerk_token.rb +68 -0
- data/lib/braintrust/server/auth/no_auth.rb +14 -0
- data/lib/braintrust/server/handlers/eval.rb +217 -0
- data/lib/braintrust/server/handlers/health.rb +16 -0
- data/lib/braintrust/server/handlers/list.rb +74 -0
- data/lib/braintrust/server/middleware/auth.rb +29 -0
- data/lib/braintrust/server/middleware/cors.rb +87 -0
- data/lib/braintrust/server/rack/app.rb +38 -0
- data/lib/braintrust/server/rack.rb +36 -0
- data/lib/braintrust/server/router.rb +37 -0
- data/lib/braintrust/server/sse.rb +52 -0
- data/lib/braintrust/server.rb +8 -0
- data/lib/braintrust/trace/span_exporter.rb +36 -0
- data/lib/braintrust/trace.rb +3 -4
- data/lib/braintrust/version.rb +1 -1
- metadata +15 -1
data/lib/braintrust/eval.rb
CHANGED
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require_relative "eval/scorer"
|
|
4
|
+
require_relative "eval/evaluator"
|
|
4
5
|
require_relative "eval/runner"
|
|
6
|
+
require_relative "eval/functions"
|
|
5
7
|
require_relative "api/internal/projects"
|
|
6
8
|
require_relative "api/internal/experiments"
|
|
7
9
|
require_relative "dataset"
|
|
@@ -186,14 +188,17 @@ module Braintrust
|
|
|
186
188
|
end
|
|
187
189
|
|
|
188
190
|
# Run an evaluation
|
|
189
|
-
# @param project [String] The project name
|
|
190
|
-
# @param experiment [String] The experiment name
|
|
191
|
+
# @param project [String, nil] The project name (triggers full API mode: creates project + experiment)
|
|
192
|
+
# @param experiment [String, nil] The experiment name
|
|
191
193
|
# @param cases [Array, Enumerable, nil] The test cases (mutually exclusive with dataset)
|
|
192
194
|
# @param dataset [String, Hash, nil] Dataset to fetch (mutually exclusive with cases)
|
|
193
195
|
# - String: dataset name (fetches from same project)
|
|
194
196
|
# - Hash: {name:, id:, project:, version:, limit:}
|
|
195
197
|
# @param task [#call] The task to evaluate (must be callable)
|
|
196
198
|
# @param scorers [Array<Scorer, #call>] The scorers to use (Scorer objects or callables)
|
|
199
|
+
# @param on_progress [#call, nil] Optional callback fired after each test case.
|
|
200
|
+
# Receives a Hash: {"data" => output, "scores" => {name => value}} on success,
|
|
201
|
+
# or {"error" => message} on failure.
|
|
197
202
|
# @param parallelism [Integer] Number of parallel workers (default: 1).
|
|
198
203
|
# When parallelism > 1, test cases are executed concurrently using a thread pool.
|
|
199
204
|
# The task and scorers MUST be thread-safe when using parallelism > 1.
|
|
@@ -201,53 +206,45 @@ module Braintrust
|
|
|
201
206
|
# @param metadata [Hash] Optional experiment metadata
|
|
202
207
|
# @param update [Boolean] If true, allow reusing existing experiment (default: false)
|
|
203
208
|
# @param quiet [Boolean] If true, suppress result output (default: false)
|
|
204
|
-
# @param
|
|
209
|
+
# @param state [State, nil] Braintrust state (defaults to global state)
|
|
205
210
|
# @param tracer_provider [TracerProvider, nil] OpenTelemetry tracer provider (defaults to global)
|
|
206
211
|
# @return [Result]
|
|
207
|
-
def run(
|
|
208
|
-
cases: nil, dataset: nil,
|
|
212
|
+
def run(task:, scorers:, project: nil, experiment: nil,
|
|
213
|
+
cases: nil, dataset: nil, on_progress: nil,
|
|
209
214
|
parallelism: 1, tags: nil, metadata: nil, update: false, quiet: false,
|
|
210
|
-
|
|
215
|
+
state: nil, tracer_provider: nil, project_id: nil, parent: nil)
|
|
211
216
|
# Validate required parameters
|
|
212
|
-
validate_params!(
|
|
213
|
-
cases: cases, dataset: dataset, task: task, scorers: scorers)
|
|
217
|
+
validate_params!(task: task, scorers: scorers, cases: cases, dataset: dataset)
|
|
214
218
|
|
|
215
|
-
#
|
|
216
|
-
|
|
219
|
+
# Resolve any ScorerId entries to real Scorer objects
|
|
220
|
+
scorers = resolve_scorers(scorers, state: state, tracer_provider: tracer_provider)
|
|
217
221
|
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
api.login
|
|
222
|
+
experiment_id = nil
|
|
223
|
+
project_name = project
|
|
221
224
|
|
|
222
|
-
#
|
|
223
|
-
|
|
224
|
-
|
|
225
|
+
# Full API mode: project name or project_id provided, resolve via API
|
|
226
|
+
if project || project_id
|
|
227
|
+
state ||= Braintrust.current_state
|
|
228
|
+
state.login
|
|
225
229
|
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
dataset_version = resolved[:dataset_version]
|
|
231
|
-
end
|
|
232
|
-
|
|
233
|
-
# Register project and experiment via internal API
|
|
234
|
-
projects_api = API::Internal::Projects.new(api.state)
|
|
235
|
-
experiments_api = API::Internal::Experiments.new(api.state)
|
|
230
|
+
if dataset
|
|
231
|
+
resolved = resolve_dataset(dataset, project, state)
|
|
232
|
+
cases = resolved[:cases]
|
|
233
|
+
end
|
|
236
234
|
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
project_name = project_result["name"]
|
|
235
|
+
# Skip experiment creation for remote evals (parent present).
|
|
236
|
+
# The OTLP backend creates experiments from ingested spans.
|
|
237
|
+
unless parent
|
|
238
|
+
project_id, project_name = resolve_project(state, project, project_id)
|
|
239
|
+
experiment_id = create_experiment(
|
|
240
|
+
state, experiment, project_id,
|
|
241
|
+
update: update, tags: tags, metadata: metadata,
|
|
242
|
+
dataset_id: resolved&.dig(:dataset_id),
|
|
243
|
+
dataset_version: resolved&.dig(:dataset_version)
|
|
244
|
+
)
|
|
245
|
+
parent = {object_type: "experiment_id", object_id: experiment_id}
|
|
246
|
+
end
|
|
247
|
+
end
|
|
251
248
|
|
|
252
249
|
# Instantiate Runner and run evaluation
|
|
253
250
|
runner = Runner.new(
|
|
@@ -257,8 +254,10 @@ module Braintrust
|
|
|
257
254
|
project_name: project_name,
|
|
258
255
|
task: task,
|
|
259
256
|
scorers: scorers,
|
|
260
|
-
|
|
261
|
-
tracer_provider: tracer_provider
|
|
257
|
+
state: state,
|
|
258
|
+
tracer_provider: tracer_provider,
|
|
259
|
+
on_progress: on_progress,
|
|
260
|
+
parent: parent
|
|
262
261
|
)
|
|
263
262
|
result = runner.run(cases, parallelism: parallelism)
|
|
264
263
|
|
|
@@ -276,11 +275,29 @@ module Braintrust
|
|
|
276
275
|
puts result.to_pretty
|
|
277
276
|
end
|
|
278
277
|
|
|
278
|
+
# Resolve scorers array: ScorerId entries become real Scorer objects, others pass through
|
|
279
|
+
# @param scorers [Array] Scorers (Scorer, callable, or ScorerId)
|
|
280
|
+
# @param state [State, nil] Braintrust state (required for ScorerId resolution)
|
|
281
|
+
# @param tracer_provider [TracerProvider, nil] OpenTelemetry tracer provider
|
|
282
|
+
# @return [Array<Scorer, #call>] Resolved scorers
|
|
283
|
+
def resolve_scorers(scorers, state: nil, tracer_provider: nil)
|
|
284
|
+
scorers.map do |scorer|
|
|
285
|
+
if scorer.is_a?(ScorerId)
|
|
286
|
+
Functions.scorer_by_id(
|
|
287
|
+
id: scorer.function_id,
|
|
288
|
+
version: scorer.version,
|
|
289
|
+
state: state,
|
|
290
|
+
tracer_provider: tracer_provider
|
|
291
|
+
)
|
|
292
|
+
else
|
|
293
|
+
scorer
|
|
294
|
+
end
|
|
295
|
+
end
|
|
296
|
+
end
|
|
297
|
+
|
|
279
298
|
# Validate required parameters
|
|
280
299
|
# @raise [ArgumentError] if validation fails
|
|
281
|
-
def validate_params!(
|
|
282
|
-
raise ArgumentError, "project is required" unless project
|
|
283
|
-
raise ArgumentError, "experiment is required" unless experiment
|
|
300
|
+
def validate_params!(task:, scorers:, cases:, dataset:)
|
|
284
301
|
raise ArgumentError, "task is required" unless task
|
|
285
302
|
raise ArgumentError, "scorers is required" unless scorers
|
|
286
303
|
|
|
@@ -300,27 +317,57 @@ module Braintrust
|
|
|
300
317
|
end
|
|
301
318
|
end
|
|
302
319
|
|
|
320
|
+
# Resolve project by name or ID. Creates if needed.
|
|
321
|
+
# @return [Array(String, String)] [project_id, project_name]
|
|
322
|
+
def resolve_project(state, project, project_id)
|
|
323
|
+
if project_id
|
|
324
|
+
[project_id, project]
|
|
325
|
+
else
|
|
326
|
+
result = API::Internal::Projects.new(state).create(name: project)
|
|
327
|
+
[result["id"], result["name"]]
|
|
328
|
+
end
|
|
329
|
+
end
|
|
330
|
+
|
|
331
|
+
# Create an experiment in the given project.
|
|
332
|
+
# @return [String] experiment_id
|
|
333
|
+
def create_experiment(state, name, project_id,
|
|
334
|
+
update: false, tags: nil, metadata: nil,
|
|
335
|
+
dataset_id: nil, dataset_version: nil)
|
|
336
|
+
result = API::Internal::Experiments.new(state).create(
|
|
337
|
+
name: name,
|
|
338
|
+
project_id: project_id,
|
|
339
|
+
ensure_new: !update,
|
|
340
|
+
tags: tags,
|
|
341
|
+
metadata: metadata,
|
|
342
|
+
dataset_id: dataset_id,
|
|
343
|
+
dataset_version: dataset_version
|
|
344
|
+
)
|
|
345
|
+
result["id"]
|
|
346
|
+
end
|
|
347
|
+
|
|
303
348
|
# Resolve dataset parameter to cases with metadata for experiment linking
|
|
304
349
|
# @param dataset [String, Hash, Dataset] Dataset specifier or instance
|
|
305
350
|
# @param project [String] Project name (used as default if not specified)
|
|
306
|
-
# @param
|
|
351
|
+
# @param state [State] Braintrust state
|
|
307
352
|
# @return [Hash] Hash with :cases, :dataset_id, and :dataset_version
|
|
308
|
-
def resolve_dataset(dataset, project,
|
|
353
|
+
def resolve_dataset(dataset, project, state)
|
|
309
354
|
limit = nil
|
|
310
355
|
|
|
311
356
|
dataset_obj = case dataset
|
|
312
357
|
when Dataset
|
|
313
358
|
dataset
|
|
359
|
+
when DatasetId
|
|
360
|
+
Dataset.new(id: dataset.id, state: state)
|
|
314
361
|
when String
|
|
315
|
-
Dataset.new(name: dataset, project: project,
|
|
362
|
+
Dataset.new(name: dataset, project: project, state: state)
|
|
316
363
|
when Hash
|
|
317
364
|
opts = dataset.dup
|
|
318
365
|
limit = opts.delete(:limit)
|
|
319
366
|
opts[:project] ||= project
|
|
320
|
-
opts[:
|
|
367
|
+
opts[:state] = state
|
|
321
368
|
Dataset.new(**opts)
|
|
322
369
|
else
|
|
323
|
-
raise ArgumentError, "dataset must be String, Hash, or
|
|
370
|
+
raise ArgumentError, "dataset must be String, Hash, Dataset, or DatasetId, got #{dataset.class}"
|
|
324
371
|
end
|
|
325
372
|
|
|
326
373
|
cases = dataset_obj.fetch_all(limit: limit)
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "net/http"
|
|
4
|
+
require "json"
|
|
5
|
+
|
|
6
|
+
module Braintrust
|
|
7
|
+
module Server
|
|
8
|
+
module Auth
|
|
9
|
+
# Validates Clerk JWT session tokens via the Braintrust app endpoint.
|
|
10
|
+
# The browser forwards the Clerk session token which is validated by
|
|
11
|
+
# POST /api/apikey/login on the app server.
|
|
12
|
+
class ClerkToken
|
|
13
|
+
DEFAULT_APP_URL = "https://www.braintrust.dev"
|
|
14
|
+
RACK_AUTH_HEADER = "HTTP_AUTHORIZATION"
|
|
15
|
+
RACK_ORG_NAME_HEADER = "HTTP_X_BT_ORG_NAME"
|
|
16
|
+
BEARER_PATTERN = /\ABearer (.+)\z/
|
|
17
|
+
LOGIN_PATH = "/api/apikey/login"
|
|
18
|
+
|
|
19
|
+
def initialize(app_url: nil)
|
|
20
|
+
@app_url = app_url || DEFAULT_APP_URL
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def authenticate(env)
|
|
24
|
+
token = extract_bearer_token(env)
|
|
25
|
+
return nil unless token
|
|
26
|
+
|
|
27
|
+
login_response = validate_token(token)
|
|
28
|
+
return nil unless login_response
|
|
29
|
+
|
|
30
|
+
org_name = env[RACK_ORG_NAME_HEADER]
|
|
31
|
+
|
|
32
|
+
{
|
|
33
|
+
"api_key" => token,
|
|
34
|
+
"org_id" => login_response["org_id"],
|
|
35
|
+
"org_name" => org_name || login_response["org_name"],
|
|
36
|
+
"app_url" => @app_url,
|
|
37
|
+
"api_url" => login_response["api_url"] || @app_url
|
|
38
|
+
}
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
private
|
|
42
|
+
|
|
43
|
+
def extract_bearer_token(env)
|
|
44
|
+
header = env[RACK_AUTH_HEADER]
|
|
45
|
+
return nil unless header
|
|
46
|
+
header[BEARER_PATTERN, 1]
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def validate_token(token)
|
|
50
|
+
uri = URI("#{@app_url}#{LOGIN_PATH}")
|
|
51
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
|
52
|
+
http.use_ssl = uri.scheme == "https"
|
|
53
|
+
|
|
54
|
+
request = Net::HTTP::Post.new(uri)
|
|
55
|
+
request["Content-Type"] = "application/json"
|
|
56
|
+
request.body = JSON.dump({token: token})
|
|
57
|
+
|
|
58
|
+
response = http.request(request)
|
|
59
|
+
return nil unless response.code == "200"
|
|
60
|
+
|
|
61
|
+
JSON.parse(response.body)
|
|
62
|
+
rescue
|
|
63
|
+
nil
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
end
|
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
|
|
5
|
+
module Braintrust
|
|
6
|
+
module Server
|
|
7
|
+
module Handlers
|
|
8
|
+
# POST /eval — adapter that maps HTTP request to Evaluator#run and streams SSE results.
|
|
9
|
+
# Handles auth passthrough, datasets, remote scorers, project_id, and parent.
|
|
10
|
+
class Eval
|
|
11
|
+
def initialize(evaluators)
|
|
12
|
+
@evaluators = evaluators
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def call(env)
|
|
16
|
+
body = parse_body(env)
|
|
17
|
+
return error_response(400, "Invalid JSON body") unless body
|
|
18
|
+
|
|
19
|
+
name = body["name"]
|
|
20
|
+
return error_response(400, "Missing required field: name") unless name
|
|
21
|
+
|
|
22
|
+
evaluator = @evaluators[name]
|
|
23
|
+
return error_response(404, "Evaluator '#{name}' not found") unless evaluator
|
|
24
|
+
|
|
25
|
+
data = body["data"]
|
|
26
|
+
return error_response(400, "Missing required field: data") unless data
|
|
27
|
+
|
|
28
|
+
# Validate exactly one data source
|
|
29
|
+
data_sources = ["data", "dataset_name", "dataset_id"].count { |k| data.key?(k) }
|
|
30
|
+
return error_response(400, "Exactly one data source required") if data_sources != 1
|
|
31
|
+
|
|
32
|
+
experiment_name = body["experiment_name"]
|
|
33
|
+
|
|
34
|
+
# Resolve data source
|
|
35
|
+
cases, dataset = resolve_data_source(data)
|
|
36
|
+
|
|
37
|
+
# Resolve remote scorers from request
|
|
38
|
+
remote_scorer_ids = resolve_remote_scorers(body["scores"])
|
|
39
|
+
|
|
40
|
+
# Resolve parent span context
|
|
41
|
+
parent = resolve_parent(body["parent"])
|
|
42
|
+
|
|
43
|
+
# Build state from auth context (if present)
|
|
44
|
+
state = build_state(env)
|
|
45
|
+
|
|
46
|
+
# The protocol-rack adapter (used by Falcon and any server built on
|
|
47
|
+
# protocol-http) buffers `each`-based bodies through an Enumerable path.
|
|
48
|
+
# Detect it via the "protocol.http.request" env key it injects, and use
|
|
49
|
+
# SSEStreamBody (call-only) so it dispatches through the Streaming path.
|
|
50
|
+
body_class = env.key?("protocol.http.request") ? SSEStreamBody : SSEBody
|
|
51
|
+
|
|
52
|
+
sse_body = body_class.new do |sse|
|
|
53
|
+
# Only pass project/experiment params when state is available
|
|
54
|
+
run_opts = {
|
|
55
|
+
on_progress: ->(progress_data) {
|
|
56
|
+
# Build remote eval protocol events from generic progress data.
|
|
57
|
+
# Runner provides: id, data/error, scores (optional), origin (optional).
|
|
58
|
+
# Protocol requires: id, object_type, origin, name, format, output_type, event, data.
|
|
59
|
+
base = {
|
|
60
|
+
"object_type" => "task",
|
|
61
|
+
"name" => name,
|
|
62
|
+
"format" => "code",
|
|
63
|
+
"output_type" => "completion"
|
|
64
|
+
}
|
|
65
|
+
base["id"] = progress_data["id"] if progress_data["id"]
|
|
66
|
+
base["origin"] = progress_data["origin"] if progress_data["origin"]
|
|
67
|
+
|
|
68
|
+
if progress_data.key?("error")
|
|
69
|
+
sse.event("progress", JSON.dump(base.merge("event" => "error", "data" => progress_data["error"])))
|
|
70
|
+
else
|
|
71
|
+
sse.event("progress", JSON.dump(base.merge("event" => "json_delta", "data" => JSON.dump(progress_data["data"]))))
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
# Signal per-cell completion so the UI exits "Streaming..." state
|
|
75
|
+
# and updates the progress bar immediately.
|
|
76
|
+
sse.event("progress", JSON.dump(base.merge("event" => "done", "data" => "")))
|
|
77
|
+
},
|
|
78
|
+
quiet: true
|
|
79
|
+
}
|
|
80
|
+
run_opts[:parent] = parent if parent
|
|
81
|
+
run_opts[:scorers] = remote_scorer_ids if remote_scorer_ids
|
|
82
|
+
run_opts[:dataset] = dataset if dataset
|
|
83
|
+
|
|
84
|
+
if state
|
|
85
|
+
run_opts[:state] = state
|
|
86
|
+
run_opts[:experiment] = experiment_name if experiment_name
|
|
87
|
+
run_opts[:project_id] = body["project_id"] if body["project_id"]
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
result = evaluator.run(cases, **run_opts)
|
|
91
|
+
|
|
92
|
+
# Flush buffered OTLP spans before sending completion events.
|
|
93
|
+
# The BatchSpanProcessor exports every ~5s; fast evals can finish
|
|
94
|
+
# before a single export fires, causing the UI to see no results.
|
|
95
|
+
Braintrust::Trace.flush_spans
|
|
96
|
+
|
|
97
|
+
# Build summary from result scores
|
|
98
|
+
averaged_scores = {}
|
|
99
|
+
result.scorer_stats.each do |scorer_name, stats|
|
|
100
|
+
averaged_scores[scorer_name] = stats.score_mean
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
sse.event("summary", JSON.dump({
|
|
104
|
+
"scores" => averaged_scores,
|
|
105
|
+
"experiment_name" => experiment_name,
|
|
106
|
+
"experiment_id" => result.experiment_id,
|
|
107
|
+
"project_id" => result.project_id
|
|
108
|
+
}))
|
|
109
|
+
|
|
110
|
+
sse.event("done", "")
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
[200, {"content-type" => "text/event-stream", "cache-control" => "no-cache", "connection" => "keep-alive"}, sse_body]
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
private
|
|
117
|
+
|
|
118
|
+
# Resolve data source from the data field.
|
|
119
|
+
# Returns [cases, dataset] where exactly one is non-nil.
|
|
120
|
+
def resolve_data_source(data)
|
|
121
|
+
if data.key?("data")
|
|
122
|
+
cases = data["data"].map do |d|
|
|
123
|
+
{input: d["input"], expected: d["expected"]}
|
|
124
|
+
end
|
|
125
|
+
[cases, nil]
|
|
126
|
+
elsif data.key?("dataset_id")
|
|
127
|
+
[nil, Braintrust::DatasetId.new(id: data["dataset_id"])]
|
|
128
|
+
elsif data.key?("dataset_name")
|
|
129
|
+
dataset_opts = {name: data["dataset_name"]}
|
|
130
|
+
dataset_opts[:project] = data["project_name"] if data["project_name"]
|
|
131
|
+
[nil, dataset_opts]
|
|
132
|
+
else
|
|
133
|
+
[nil, nil]
|
|
134
|
+
end
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
# Map request scores array to ScorerId structs.
|
|
138
|
+
# The UI sends function_id as a nested object: {"function_id": "uuid"}.
|
|
139
|
+
def resolve_remote_scorers(scores)
|
|
140
|
+
return nil if scores.nil? || scores.empty?
|
|
141
|
+
scores.map do |s|
|
|
142
|
+
func_id = s["function_id"]
|
|
143
|
+
func_id = func_id["function_id"] if func_id.is_a?(Hash)
|
|
144
|
+
Braintrust::ScorerId.new(
|
|
145
|
+
function_id: func_id,
|
|
146
|
+
version: s["version"]
|
|
147
|
+
)
|
|
148
|
+
end
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
# Map request parent to symbol-keyed Hash.
|
|
152
|
+
# Hardcode playground_id to match Java SDK behavior.
|
|
153
|
+
# Also extracts generation from propagated_event for span_attributes.
|
|
154
|
+
def resolve_parent(parent)
|
|
155
|
+
return nil unless parent.is_a?(Hash)
|
|
156
|
+
object_id = parent["object_id"]
|
|
157
|
+
return nil unless object_id
|
|
158
|
+
|
|
159
|
+
generation = parent.dig("propagated_event", "span_attributes", "generation")
|
|
160
|
+
|
|
161
|
+
result = {object_type: "playground_id", object_id: object_id}
|
|
162
|
+
result[:generation] = generation if generation
|
|
163
|
+
result
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
# Build State from auth context set by Auth middleware.
|
|
167
|
+
# Returns nil when no auth context is present (e.g. NoAuth strategy).
|
|
168
|
+
# Uses an LRU-style cache (max 64 entries) keyed by [api_key, app_url, org_name].
|
|
169
|
+
def build_state(env)
|
|
170
|
+
auth = env["braintrust.auth"]
|
|
171
|
+
return nil unless auth.is_a?(Hash)
|
|
172
|
+
|
|
173
|
+
cache_key = [auth["api_key"], auth["app_url"], auth["org_name"]]
|
|
174
|
+
|
|
175
|
+
@state_mutex ||= Mutex.new
|
|
176
|
+
@state_cache ||= {}
|
|
177
|
+
|
|
178
|
+
@state_mutex.synchronize do
|
|
179
|
+
cached = @state_cache[cache_key]
|
|
180
|
+
return cached if cached
|
|
181
|
+
|
|
182
|
+
state = Braintrust::State.new(
|
|
183
|
+
api_key: auth["api_key"],
|
|
184
|
+
org_id: auth["org_id"],
|
|
185
|
+
org_name: auth["org_name"],
|
|
186
|
+
app_url: auth["app_url"],
|
|
187
|
+
api_url: auth["api_url"],
|
|
188
|
+
enable_tracing: false
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
# Evict oldest entry if cache is full
|
|
192
|
+
if @state_cache.size >= 64
|
|
193
|
+
oldest_key = @state_cache.keys.first
|
|
194
|
+
@state_cache.delete(oldest_key)
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
@state_cache[cache_key] = state
|
|
198
|
+
state
|
|
199
|
+
end
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
def parse_body(env)
|
|
203
|
+
body = env["rack.input"]&.read
|
|
204
|
+
return nil if body.nil? || body.empty?
|
|
205
|
+
JSON.parse(body)
|
|
206
|
+
rescue JSON::ParserError
|
|
207
|
+
nil
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
def error_response(status, message)
|
|
211
|
+
[status, {"content-type" => "application/json"},
|
|
212
|
+
[JSON.dump({"error" => message})]]
|
|
213
|
+
end
|
|
214
|
+
end
|
|
215
|
+
end
|
|
216
|
+
end
|
|
217
|
+
end
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
|
|
5
|
+
module Braintrust
|
|
6
|
+
module Server
|
|
7
|
+
module Handlers
|
|
8
|
+
# GET / — simple health check endpoint.
|
|
9
|
+
class Health
|
|
10
|
+
def call(_env)
|
|
11
|
+
[200, {"content-type" => "application/json"}, [JSON.dump({"status" => "ok"})]]
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
|
|
5
|
+
module Braintrust
|
|
6
|
+
module Server
|
|
7
|
+
module Handlers
|
|
8
|
+
# GET/POST /list — returns all evaluators keyed by name.
|
|
9
|
+
#
|
|
10
|
+
# Response format (Braintrust dev server protocol):
|
|
11
|
+
# {
|
|
12
|
+
# "evaluator-name": {
|
|
13
|
+
# "parameters": { # optional
|
|
14
|
+
# "type": "braintrust.staticParameters",
|
|
15
|
+
# "schema": {
|
|
16
|
+
# "param_name": { "type": "data", "schema": {...}, "default": ..., "description": ... }
|
|
17
|
+
# },
|
|
18
|
+
# "source": null
|
|
19
|
+
# },
|
|
20
|
+
# "scores": [{ "name": "scorer_name" }, ...]
|
|
21
|
+
# }
|
|
22
|
+
# }
|
|
23
|
+
class List
|
|
24
|
+
def initialize(evaluators)
|
|
25
|
+
@evaluators = evaluators
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def call(_env)
|
|
29
|
+
result = {}
|
|
30
|
+
@evaluators.each do |name, evaluator|
|
|
31
|
+
scores = (evaluator.scorers || []).each_with_index.map do |scorer, i|
|
|
32
|
+
scorer_name = scorer.respond_to?(:name) ? scorer.name : "score_#{i}"
|
|
33
|
+
{"name" => scorer_name}
|
|
34
|
+
end
|
|
35
|
+
entry = {"scores" => scores}
|
|
36
|
+
params = serialize_parameters(evaluator.parameters)
|
|
37
|
+
entry["parameters"] = params if params
|
|
38
|
+
result[name] = entry
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
[200, {"content-type" => "application/json"},
|
|
42
|
+
[JSON.dump(result)]]
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
private
|
|
46
|
+
|
|
47
|
+
# Convert user-defined parameters to the dev server protocol format.
|
|
48
|
+
# Wraps in a staticParameters container with "data" typed entries.
|
|
49
|
+
def serialize_parameters(parameters)
|
|
50
|
+
return nil unless parameters && !parameters.empty?
|
|
51
|
+
|
|
52
|
+
schema = {}
|
|
53
|
+
parameters.each do |name, spec|
|
|
54
|
+
spec = spec.transform_keys(&:to_s) if spec.is_a?(Hash)
|
|
55
|
+
if spec.is_a?(Hash)
|
|
56
|
+
schema[name.to_s] = {
|
|
57
|
+
"type" => "data",
|
|
58
|
+
"schema" => {"type" => spec["type"] || "string"},
|
|
59
|
+
"default" => spec["default"],
|
|
60
|
+
"description" => spec["description"]
|
|
61
|
+
}
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
{
|
|
66
|
+
"type" => "braintrust.staticParameters",
|
|
67
|
+
"schema" => schema,
|
|
68
|
+
"source" => nil
|
|
69
|
+
}
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
|
|
5
|
+
module Braintrust
|
|
6
|
+
module Server
|
|
7
|
+
module Middleware
|
|
8
|
+
# Auth middleware that validates requests using a pluggable strategy.
|
|
9
|
+
# Sets env["braintrust.auth"] with the authentication result on success.
|
|
10
|
+
class Auth
|
|
11
|
+
def initialize(app, strategy:)
|
|
12
|
+
@app = app
|
|
13
|
+
@strategy = strategy
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def call(env)
|
|
17
|
+
auth_result = @strategy.authenticate(env)
|
|
18
|
+
unless auth_result
|
|
19
|
+
return [401, {"content-type" => "application/json"},
|
|
20
|
+
[JSON.dump({"error" => "Unauthorized"})]]
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
env["braintrust.auth"] = auth_result
|
|
24
|
+
@app.call(env)
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|