braintrust 0.2.1 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +163 -10
- data/lib/braintrust/api/functions.rb +3 -1
- data/lib/braintrust/api/internal/btql.rb +3 -33
- data/lib/braintrust/contrib/rails/server/application_controller.rb +34 -0
- data/lib/braintrust/contrib/rails/server/engine.rb +72 -0
- data/lib/braintrust/contrib/rails/server/eval_controller.rb +36 -0
- data/lib/braintrust/contrib/rails/server/generator.rb +43 -0
- data/lib/braintrust/contrib/rails/server/health_controller.rb +15 -0
- data/lib/braintrust/contrib/rails/server/list_controller.rb +16 -0
- data/lib/braintrust/contrib/rails/server/routes.rb +8 -0
- data/lib/braintrust/contrib/rails/server.rb +20 -0
- data/lib/braintrust/eval/context.rb +84 -21
- data/lib/braintrust/eval/evaluator.rb +16 -2
- data/lib/braintrust/eval/runner.rb +120 -75
- data/lib/braintrust/eval.rb +22 -2
- data/lib/braintrust/internal/retry.rb +41 -0
- data/lib/braintrust/prompt.rb +11 -5
- data/lib/braintrust/scorer.rb +55 -4
- data/lib/braintrust/server/handlers/eval.rb +8 -168
- data/lib/braintrust/server/handlers/list.rb +3 -41
- data/lib/braintrust/server/rack.rb +2 -0
- data/lib/braintrust/server/services/eval_service.rb +226 -0
- data/lib/braintrust/server/services/list_service.rb +64 -0
- data/lib/braintrust/trace/span_processor.rb +0 -5
- data/lib/braintrust/version.rb +1 -1
- metadata +26 -127
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 27e146b06451b844b1e6416353b20f6bd572c3d1169a12a439745cb7280ce0ec
|
|
4
|
+
data.tar.gz: d726e3a146a2180bf2714846d56e65fa9d3ef1ce773adb116a8e6b1b79ba823c
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 69e5150452e9dde1491664af1137cc05a9a5b651dbb5fdee27ff8a09e0e11b51c283c163019566045e1771679ed6f2eece4dd1753aa06f899e3681e7c6b99d15
|
|
7
|
+
data.tar.gz: 28cc8c86bdc13db8d33ad0dc28325c0d858f37ba1b9f41212c52e514eed649b14596c66153bca58de251c4c6dd1ddcb170d24ae100a33f912f49349671821f7a
|
data/README.md
CHANGED
|
@@ -21,6 +21,7 @@ This is the official Ruby SDK for [Braintrust](https://www.braintrust.dev), for
|
|
|
21
21
|
- [Attachments](#attachments)
|
|
22
22
|
- [Viewing traces](#viewing-traces)
|
|
23
23
|
- [Evals](#evals)
|
|
24
|
+
- [Tasks](#tasks)
|
|
24
25
|
- [Datasets](#datasets)
|
|
25
26
|
- [Scorers](#scorers)
|
|
26
27
|
- [Dev Server](#dev-server)
|
|
@@ -259,6 +260,50 @@ Braintrust::Eval.run(
|
|
|
259
260
|
)
|
|
260
261
|
```
|
|
261
262
|
|
|
263
|
+
See [eval.rb](./examples/eval.rb) for a full example.
|
|
264
|
+
|
|
265
|
+
### Tasks
|
|
266
|
+
|
|
267
|
+
Define the code being evaluated as a lambda or a class. Tasks receive `input:` as a keyword argument:
|
|
268
|
+
|
|
269
|
+
```ruby
|
|
270
|
+
# Lambda
|
|
271
|
+
task = ->(input:) { classify(input) }
|
|
272
|
+
|
|
273
|
+
# Class-based (auto-derives name from class: "food_classifier")
|
|
274
|
+
class FoodClassifier
|
|
275
|
+
include Braintrust::Task
|
|
276
|
+
|
|
277
|
+
def call(input:)
|
|
278
|
+
classify(input)
|
|
279
|
+
end
|
|
280
|
+
end
|
|
281
|
+
```
|
|
282
|
+
|
|
283
|
+
#### With parameters
|
|
284
|
+
|
|
285
|
+
Tasks can accept `parameters:` as input to drive their behavior:
|
|
286
|
+
|
|
287
|
+
```ruby
|
|
288
|
+
task = ->(input:, parameters:) {
|
|
289
|
+
value = parameters["value"]
|
|
290
|
+
from_unit = parameters["to_unit"] || 'c'
|
|
291
|
+
to_unit = parameters["from_unit"] || 'f'
|
|
292
|
+
|
|
293
|
+
convert_temp(temperature: value, from_unit: from_unit , to_unit: to_unit)
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
Braintrust::Eval.run(
|
|
297
|
+
project: "my-project",
|
|
298
|
+
cases: [...],
|
|
299
|
+
task: task,
|
|
300
|
+
scorers: [...],
|
|
301
|
+
parameters: {"value" => 23.0}
|
|
302
|
+
)
|
|
303
|
+
```
|
|
304
|
+
|
|
305
|
+
See [parameters.rb](./examples/eval/parameters.rb) for a full example.
|
|
306
|
+
|
|
262
307
|
### Datasets
|
|
263
308
|
|
|
264
309
|
Use test cases from a Braintrust dataset:
|
|
@@ -287,6 +332,8 @@ Braintrust::Eval.run(
|
|
|
287
332
|
)
|
|
288
333
|
```
|
|
289
334
|
|
|
335
|
+
See [dataset.rb](./examples/eval/dataset.rb) for a full example.
|
|
336
|
+
|
|
290
337
|
### Scorers
|
|
291
338
|
|
|
292
339
|
Use scoring functions defined in Braintrust:
|
|
@@ -315,6 +362,46 @@ Braintrust::Eval.run(
|
|
|
315
362
|
)
|
|
316
363
|
```
|
|
317
364
|
|
|
365
|
+
See [remote_functions.rb](./examples/eval/remote_functions.rb) for a full example.
|
|
366
|
+
|
|
367
|
+
#### Scorer metadata
|
|
368
|
+
|
|
369
|
+
Scorers can return a Hash with `:score` and `:metadata` to attach structured context to the score. The metadata is logged on the scorer's span and visible in the Braintrust UI for debugging and filtering:
|
|
370
|
+
|
|
371
|
+
```ruby
|
|
372
|
+
Braintrust::Scorer.new("translation") do |expected:, output:|
|
|
373
|
+
common_words = output.downcase.split & expected.downcase.split
|
|
374
|
+
overlap = common_words.size.to_f / expected.split.size
|
|
375
|
+
{
|
|
376
|
+
score: overlap,
|
|
377
|
+
metadata: {word_overlap: common_words.size, missing_words: expected.downcase.split - output.downcase.split}
|
|
378
|
+
}
|
|
379
|
+
end
|
|
380
|
+
```
|
|
381
|
+
|
|
382
|
+
See [scorer_metadata.rb](./examples/eval/scorer_metadata.rb) for a full example.
|
|
383
|
+
|
|
384
|
+
#### Multiple scores from one scorer
|
|
385
|
+
|
|
386
|
+
When several scores can be computed together (e.g. in one LLM call), you can return an `Array` of score `Hash` instead of a single value. Each metric appears as a separate score column in the Braintrust UI:
|
|
387
|
+
|
|
388
|
+
```ruby
|
|
389
|
+
Braintrust::Scorer.new("summary_quality") do |output:, expected:|
|
|
390
|
+
words = output.downcase.split
|
|
391
|
+
key_terms = expected[:key_terms]
|
|
392
|
+
covered = key_terms.count { |t| words.include?(t) }
|
|
393
|
+
|
|
394
|
+
[
|
|
395
|
+
{name: "coverage", score: covered.to_f / key_terms.size, metadata: {missing: key_terms - words}},
|
|
396
|
+
{name: "conciseness", score: words.size <= expected[:max_words] ? 1.0 : 0.0}
|
|
397
|
+
]
|
|
398
|
+
end
|
|
399
|
+
```
|
|
400
|
+
|
|
401
|
+
`name` and `score` are required, `metadata` is optional.
|
|
402
|
+
|
|
403
|
+
See [multi_score.rb](./examples/eval/multi_score.rb) for a full example.
|
|
404
|
+
|
|
318
405
|
#### Trace scoring
|
|
319
406
|
|
|
320
407
|
Scorers can access the full evaluation trace (all spans generated by the task) by declaring a `trace:` keyword parameter. This is useful for inspecting intermediate LLM calls, validating tool usage, or checking the message thread:
|
|
@@ -344,11 +431,28 @@ Braintrust::Eval.run(
|
|
|
344
431
|
)
|
|
345
432
|
```
|
|
346
433
|
|
|
347
|
-
See
|
|
434
|
+
See [trace_scoring.rb](./examples/eval/trace_scoring.rb) for a full example.
|
|
435
|
+
|
|
436
|
+
#### Scorer parameters
|
|
437
|
+
|
|
438
|
+
Scorers can also accept `parameters:` to use runtime configuration in their scoring logic. Like tasks, scorers that don't declare `parameters:` are unaffected:
|
|
439
|
+
|
|
440
|
+
```ruby
|
|
441
|
+
Braintrust::Scorer.new("threshold_match") do |expected:, output:, parameters:|
|
|
442
|
+
threshold = parameters["threshold"] || 0.8
|
|
443
|
+
similarity(output, expected) >= threshold ? 1.0 : 0.0
|
|
444
|
+
end
|
|
445
|
+
```
|
|
446
|
+
|
|
447
|
+
See [parameters.rb](./examples/eval/parameters.rb) for a full example.
|
|
348
448
|
|
|
349
449
|
### Dev Server
|
|
350
450
|
|
|
351
|
-
Run evaluations from the Braintrust web UI against code in your own application.
|
|
451
|
+
Run evaluations from the Braintrust web UI against code in your own application.
|
|
452
|
+
|
|
453
|
+
#### Run as a Rack app
|
|
454
|
+
|
|
455
|
+
Define evaluators, pass them to the dev server, and start serving:
|
|
352
456
|
|
|
353
457
|
```ruby
|
|
354
458
|
# eval_server.ru
|
|
@@ -374,10 +478,21 @@ run Braintrust::Server::Rack.app(
|
|
|
374
478
|
)
|
|
375
479
|
```
|
|
376
480
|
|
|
481
|
+
Add your Rack server to your Gemfile:
|
|
482
|
+
|
|
483
|
+
```ruby
|
|
484
|
+
gem "rack"
|
|
485
|
+
gem "puma" # recommended
|
|
486
|
+
```
|
|
487
|
+
|
|
488
|
+
Then start the server:
|
|
489
|
+
|
|
377
490
|
```bash
|
|
378
491
|
bundle exec rackup eval_server.ru -p 8300 -o 0.0.0.0
|
|
379
492
|
```
|
|
380
493
|
|
|
494
|
+
See example: [server/eval.ru](./examples/server/eval.ru)
|
|
495
|
+
|
|
381
496
|
**Custom evaluators**
|
|
382
497
|
|
|
383
498
|
Evaluators can also be defined as subclasses:
|
|
@@ -394,6 +509,51 @@ class FoodClassifier < Braintrust::Eval::Evaluator
|
|
|
394
509
|
end
|
|
395
510
|
```
|
|
396
511
|
|
|
512
|
+
#### Run as a Rails engine
|
|
513
|
+
|
|
514
|
+
Use the Rails engine when your evaluators live inside an existing Rails app and you want to mount the Braintrust eval server into that application.
|
|
515
|
+
|
|
516
|
+
Define each evaluator in its own file, for example under `app/evaluators/`:
|
|
517
|
+
|
|
518
|
+
```ruby
|
|
519
|
+
# app/evaluators/food_classifier.rb
|
|
520
|
+
class FoodClassifier < Braintrust::Eval::Evaluator
|
|
521
|
+
def task
|
|
522
|
+
->(input:) { classify(input) }
|
|
523
|
+
end
|
|
524
|
+
|
|
525
|
+
def scorers
|
|
526
|
+
[Braintrust::Scorer.new("exact_match") { |expected:, output:| output == expected ? 1.0 : 0.0 }]
|
|
527
|
+
end
|
|
528
|
+
end
|
|
529
|
+
```
|
|
530
|
+
|
|
531
|
+
Then generate the Braintrust initializer:
|
|
532
|
+
|
|
533
|
+
```bash
|
|
534
|
+
bin/rails generate braintrust:eval_server
|
|
535
|
+
```
|
|
536
|
+
|
|
537
|
+
```ruby
|
|
538
|
+
# config/routes.rb
|
|
539
|
+
Rails.application.routes.draw do
|
|
540
|
+
mount Braintrust::Contrib::Rails::Engine, at: "/braintrust"
|
|
541
|
+
end
|
|
542
|
+
```
|
|
543
|
+
|
|
544
|
+
The generator writes `config/initializers/braintrust_server.rb`, where you can review or customize the slug-to-evaluator mapping it discovers from `app/evaluators/**/*.rb` and `evaluators/**/*.rb`.
|
|
545
|
+
|
|
546
|
+
See example: [contrib/rails/eval.rb](./examples/contrib/rails/eval.rb)
|
|
547
|
+
|
|
548
|
+
**Developing locally**
|
|
549
|
+
|
|
550
|
+
If you want to skip authentication on incoming eval requests while developing locally:
|
|
551
|
+
|
|
552
|
+
- **For Rack**: Pass `auth: :none` to `Braintrust::Server::Rack.app(...)`
|
|
553
|
+
- **For Rails**: Set `config.auth = :none` in `config/initializers/braintrust_server.rb`
|
|
554
|
+
|
|
555
|
+
*NOTE: Setting `:none` disables authentication on incoming requests into your server; executing evals requires a `BRAINTRUST_API_KEY` to fetch resources.*
|
|
556
|
+
|
|
397
557
|
**Supported web servers**
|
|
398
558
|
|
|
399
559
|
The dev server requires the `rack` gem and a Rack-compatible web server.
|
|
@@ -405,14 +565,7 @@ The dev server requires the `rack` gem and a Rack-compatible web server.
|
|
|
405
565
|
| [Passenger](https://www.phusionpassenger.com/) | 6.x | |
|
|
406
566
|
| [WEBrick](https://github.com/ruby/webrick) | Not supported | Does not support server-sent events. |
|
|
407
567
|
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
```ruby
|
|
411
|
-
gem "rack"
|
|
412
|
-
gem "puma" # recommended
|
|
413
|
-
```
|
|
414
|
-
|
|
415
|
-
See example: [server/eval.ru](./examples/server/eval.ru)
|
|
568
|
+
See examples: [server/eval.ru](./examples/server/eval.ru),
|
|
416
569
|
|
|
417
570
|
## Documentation
|
|
418
571
|
|
|
@@ -25,13 +25,15 @@ module Braintrust
|
|
|
25
25
|
# List functions with optional filters
|
|
26
26
|
# GET /v1/function?project_name=X&...
|
|
27
27
|
# @param project_name [String, nil] Filter by project name
|
|
28
|
+
# @param project_id [String, nil] Filter by project ID (UUID)
|
|
28
29
|
# @param function_name [String, nil] Filter by function name
|
|
29
30
|
# @param slug [String, nil] Filter by slug
|
|
30
31
|
# @param limit [Integer, nil] Limit number of results
|
|
31
32
|
# @return [Hash] Response with "objects" array
|
|
32
|
-
def list(project_name: nil, function_name: nil, slug: nil, limit: nil)
|
|
33
|
+
def list(project_name: nil, project_id: nil, function_name: nil, slug: nil, limit: nil)
|
|
33
34
|
params = {}
|
|
34
35
|
params["project_name"] = project_name if project_name
|
|
36
|
+
params["project_id"] = project_id if project_id
|
|
35
37
|
params["function_name"] = function_name if function_name
|
|
36
38
|
params["slug"] = slug if slug
|
|
37
39
|
params["limit"] = limit if limit
|
|
@@ -11,19 +11,6 @@ module Braintrust
|
|
|
11
11
|
# Internal BTQL client for querying spans.
|
|
12
12
|
# Not part of the public API — instantiated directly where needed.
|
|
13
13
|
class BTQL
|
|
14
|
-
# Maximum number of retries before returning partial results.
|
|
15
|
-
# Covers both freshness lag (partially indexed) and ingestion lag
|
|
16
|
-
# (spans not yet visible to BTQL after OTel flush).
|
|
17
|
-
MAX_FRESHNESS_RETRIES = 7
|
|
18
|
-
|
|
19
|
-
# Base delay (seconds) between retries (doubles each attempt, capped).
|
|
20
|
-
FRESHNESS_BASE_DELAY = 1.0
|
|
21
|
-
|
|
22
|
-
# Maximum delay (seconds) between retries. Caps exponential growth
|
|
23
|
-
# so we keep polling at a reasonable rate in the later window.
|
|
24
|
-
# Schedule: 1, 2, 4, 8, 8, 8, 8 = ~39s total worst-case.
|
|
25
|
-
MAX_FRESHNESS_DELAY = 8.0
|
|
26
|
-
|
|
27
14
|
def initialize(state)
|
|
28
15
|
@state = state
|
|
29
16
|
end
|
|
@@ -31,36 +18,19 @@ module Braintrust
|
|
|
31
18
|
# Query spans belonging to a specific trace within an object.
|
|
32
19
|
#
|
|
33
20
|
# Builds a BTQL SQL query that matches the root_span_id and excludes scorer spans.
|
|
34
|
-
#
|
|
21
|
+
# Returns a single-shot result; callers are responsible for retry and error handling.
|
|
35
22
|
#
|
|
36
23
|
# @param object_type [String] e.g. "experiment"
|
|
37
24
|
# @param object_id [String] Object UUID
|
|
38
25
|
# @param root_span_id [String] Hex trace ID of the root span
|
|
39
|
-
# @return [Array<Hash
|
|
26
|
+
# @return [Array(Array<Hash>, String)] [rows, freshness]
|
|
40
27
|
def trace_spans(object_type:, object_id:, root_span_id:)
|
|
41
28
|
query = build_trace_query(
|
|
42
29
|
object_type: object_type,
|
|
43
30
|
object_id: object_id,
|
|
44
31
|
root_span_id: root_span_id
|
|
45
32
|
)
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
retries = 0
|
|
49
|
-
loop do
|
|
50
|
-
rows, freshness = execute_query(payload)
|
|
51
|
-
# Return when data is fresh AND non-empty, or we've exhausted retries.
|
|
52
|
-
# We retry on empty even when "complete" because there is ingestion lag
|
|
53
|
-
# between OTel flush and BTQL indexing — the server may report "complete"
|
|
54
|
-
# before it knows about newly-flushed spans.
|
|
55
|
-
return rows if (freshness == "complete" && !rows.empty?) || retries >= MAX_FRESHNESS_RETRIES
|
|
56
|
-
|
|
57
|
-
retries += 1
|
|
58
|
-
delay = [FRESHNESS_BASE_DELAY * (2**(retries - 1)), MAX_FRESHNESS_DELAY].min
|
|
59
|
-
sleep(delay)
|
|
60
|
-
end
|
|
61
|
-
rescue => e
|
|
62
|
-
Braintrust::Log.warn("[BTQL] Query failed: #{e.message}")
|
|
63
|
-
[]
|
|
33
|
+
execute_query(query: query, fmt: "jsonl")
|
|
64
34
|
end
|
|
65
35
|
|
|
66
36
|
private
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Braintrust
|
|
4
|
+
module Contrib
|
|
5
|
+
module Rails
|
|
6
|
+
module Server
|
|
7
|
+
class ApplicationController < ActionController::API
|
|
8
|
+
before_action :authenticate!
|
|
9
|
+
|
|
10
|
+
private
|
|
11
|
+
|
|
12
|
+
def authenticate!
|
|
13
|
+
auth_result = Engine.auth_strategy.authenticate(request.env)
|
|
14
|
+
unless auth_result
|
|
15
|
+
render json: {"error" => "Unauthorized"}, status: :unauthorized
|
|
16
|
+
return
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
request.env["braintrust.auth"] = auth_result
|
|
20
|
+
@braintrust_auth = auth_result
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def parse_json_body
|
|
24
|
+
body = request.body.read
|
|
25
|
+
return nil if body.nil? || body.empty?
|
|
26
|
+
JSON.parse(body)
|
|
27
|
+
rescue JSON::ParserError
|
|
28
|
+
nil
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Braintrust
|
|
4
|
+
module Contrib
|
|
5
|
+
module Rails
|
|
6
|
+
module Server
|
|
7
|
+
class Engine < ::Rails::Engine
|
|
8
|
+
isolate_namespace Braintrust::Contrib::Rails::Server
|
|
9
|
+
|
|
10
|
+
config.evaluators = {}
|
|
11
|
+
config.auth = :clerk_token
|
|
12
|
+
|
|
13
|
+
# Register the engine's routes file so Rails loads it during initialization.
|
|
14
|
+
paths["config/routes.rb"] << File.expand_path("routes.rb", __dir__)
|
|
15
|
+
|
|
16
|
+
initializer "braintrust.server.cors" do |app|
|
|
17
|
+
app.middleware.use Braintrust::Server::Middleware::Cors
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
# Class-level helpers that read from engine config.
|
|
21
|
+
|
|
22
|
+
def self.evaluators
|
|
23
|
+
config.evaluators
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def self.auth_strategy
|
|
27
|
+
resolve_auth(config.auth)
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def self.list_service
|
|
31
|
+
Braintrust::Server::Services::List.new(-> { config.evaluators })
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Long-lived so the state cache persists across requests.
|
|
35
|
+
def self.eval_service
|
|
36
|
+
@eval_service ||= Braintrust::Server::Services::Eval.new(-> { config.evaluators })
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# Support the explicit `|config|` style used by this integration while
|
|
40
|
+
# still delegating zero-arity DSL blocks to Rails' native implementation.
|
|
41
|
+
def self.configure(&block)
|
|
42
|
+
return super if block&.arity == 0
|
|
43
|
+
yield config if block
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def self.resolve_auth(auth)
|
|
47
|
+
case auth
|
|
48
|
+
when :none
|
|
49
|
+
Braintrust::Server::Auth::NoAuth.new
|
|
50
|
+
when :clerk_token
|
|
51
|
+
Braintrust::Server::Auth::ClerkToken.new
|
|
52
|
+
when Symbol, String
|
|
53
|
+
raise ArgumentError, "Unknown auth strategy #{auth.inspect}. Expected :none, :clerk_token, or an auth object."
|
|
54
|
+
else
|
|
55
|
+
auth
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
private_class_method :resolve_auth
|
|
59
|
+
|
|
60
|
+
generators do
|
|
61
|
+
require "braintrust/contrib/rails/server/generator"
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
require_relative "application_controller"
|
|
70
|
+
require_relative "health_controller"
|
|
71
|
+
require_relative "list_controller"
|
|
72
|
+
require_relative "eval_controller"
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Braintrust
|
|
4
|
+
module Contrib
|
|
5
|
+
module Rails
|
|
6
|
+
module Server
|
|
7
|
+
class EvalController < ApplicationController
|
|
8
|
+
include ActionController::Live
|
|
9
|
+
|
|
10
|
+
def create
|
|
11
|
+
body = parse_json_body
|
|
12
|
+
unless body
|
|
13
|
+
render json: {"error" => "Invalid JSON body"}, status: :bad_request
|
|
14
|
+
return
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
result = Engine.eval_service.validate(body)
|
|
18
|
+
if result[:error]
|
|
19
|
+
render json: {"error" => result[:error]}, status: result[:status]
|
|
20
|
+
return
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
response.headers["Content-Type"] = "text/event-stream"
|
|
24
|
+
response.headers["Cache-Control"] = "no-cache"
|
|
25
|
+
response.headers["Connection"] = "keep-alive"
|
|
26
|
+
|
|
27
|
+
sse = Braintrust::Server::SSEWriter.new { |chunk| response.stream.write(chunk) }
|
|
28
|
+
Engine.eval_service.stream(result, auth: @braintrust_auth, sse: sse)
|
|
29
|
+
ensure
|
|
30
|
+
response.stream.close
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "rails/generators"
|
|
4
|
+
|
|
5
|
+
module Braintrust
|
|
6
|
+
module Contrib
|
|
7
|
+
module Rails
|
|
8
|
+
module Server
|
|
9
|
+
module Generators
|
|
10
|
+
class ServerGenerator < ::Rails::Generators::Base
|
|
11
|
+
namespace "braintrust:server"
|
|
12
|
+
source_root File.expand_path("templates", __dir__)
|
|
13
|
+
|
|
14
|
+
def create_initializer
|
|
15
|
+
@evaluators = discovered_evaluators
|
|
16
|
+
template "initializer.rb.tt", "config/initializers/braintrust_server.rb"
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
private
|
|
20
|
+
|
|
21
|
+
def discovered_evaluators
|
|
22
|
+
evaluator_roots.flat_map do |root|
|
|
23
|
+
Dir[File.join(destination_root, root, "**/*.rb")].sort.map do |file|
|
|
24
|
+
relative_path = file.delete_prefix("#{File.join(destination_root, root)}/").sub(/\.rb\z/, "")
|
|
25
|
+
{
|
|
26
|
+
class_name: relative_path.split("/").map(&:camelize).join("::"),
|
|
27
|
+
slug: relative_path.tr("/", "-").tr("_", "-")
|
|
28
|
+
}
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def evaluator_roots
|
|
34
|
+
%w[app/evaluators evaluators].select do |root|
|
|
35
|
+
Dir.exist?(File.join(destination_root, root))
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Braintrust
|
|
4
|
+
module Contrib
|
|
5
|
+
module Rails
|
|
6
|
+
module Server
|
|
7
|
+
class ListController < ApplicationController
|
|
8
|
+
def show
|
|
9
|
+
result = Engine.list_service.call
|
|
10
|
+
render json: result
|
|
11
|
+
end
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
begin
|
|
4
|
+
require "action_controller"
|
|
5
|
+
require "rails/engine"
|
|
6
|
+
rescue LoadError
|
|
7
|
+
raise LoadError,
|
|
8
|
+
"Rails (actionpack + railties) is required for the Braintrust Rails server engine. " \
|
|
9
|
+
"Add `gem 'rails'` or `gem 'actionpack'` and `gem 'railties'` to your Gemfile."
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
require "json"
|
|
13
|
+
require_relative "../../eval"
|
|
14
|
+
require_relative "../../server/sse"
|
|
15
|
+
require_relative "../../server/auth/no_auth"
|
|
16
|
+
require_relative "../../server/auth/clerk_token"
|
|
17
|
+
require_relative "../../server/middleware/cors"
|
|
18
|
+
require_relative "../../server/services/list_service"
|
|
19
|
+
require_relative "../../server/services/eval_service"
|
|
20
|
+
require_relative "server/engine"
|