braintrust 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +148 -24
  3. data/lib/braintrust/api/internal/btql.rb +124 -0
  4. data/lib/braintrust/api/internal/experiments.rb +19 -0
  5. data/lib/braintrust/api/internal/projects.rb +19 -0
  6. data/lib/braintrust/contrib/rails/server/application_controller.rb +34 -0
  7. data/lib/braintrust/contrib/rails/server/engine.rb +72 -0
  8. data/lib/braintrust/contrib/rails/server/eval_controller.rb +36 -0
  9. data/lib/braintrust/contrib/rails/server/generator.rb +43 -0
  10. data/lib/braintrust/contrib/rails/server/health_controller.rb +15 -0
  11. data/lib/braintrust/contrib/rails/server/list_controller.rb +16 -0
  12. data/lib/braintrust/contrib/rails/server/routes.rb +8 -0
  13. data/lib/braintrust/contrib/rails/server.rb +20 -0
  14. data/lib/braintrust/dataset.rb +6 -3
  15. data/lib/braintrust/eval/context.rb +131 -0
  16. data/lib/braintrust/eval/evaluator.rb +11 -5
  17. data/lib/braintrust/eval/functions.rb +10 -166
  18. data/lib/braintrust/eval/runner.rb +165 -145
  19. data/lib/braintrust/eval/scorer.rb +24 -96
  20. data/lib/braintrust/eval/trace.rb +129 -0
  21. data/lib/braintrust/eval.rb +60 -132
  22. data/lib/braintrust/functions.rb +168 -0
  23. data/lib/braintrust/internal/callable.rb +83 -0
  24. data/lib/braintrust/logger.rb +9 -0
  25. data/lib/braintrust/scorer.rb +173 -0
  26. data/lib/braintrust/server/handlers/eval.rb +8 -168
  27. data/lib/braintrust/server/handlers/list.rb +3 -41
  28. data/lib/braintrust/server/rack.rb +2 -0
  29. data/lib/braintrust/server/services/eval_service.rb +214 -0
  30. data/lib/braintrust/server/services/list_service.rb +64 -0
  31. data/lib/braintrust/task.rb +108 -0
  32. data/lib/braintrust/trace/span_processor.rb +0 -5
  33. data/lib/braintrust/version.rb +1 -1
  34. metadata +18 -1
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d67e6d0faeb24297af8a5f43ac1bd1ceacff1f37df2610244ae5f81e34c4ae5f
4
- data.tar.gz: 489ec68fee424aa8aa1880b73b58f1f26529493d8898cd0ae5876d3b919fcb7c
3
+ metadata.gz: c07be3c454a924c5c97c2653136a2b9cdd1098409af16326b1db8676c5c8b0d2
4
+ data.tar.gz: c1eb75eefdcacebc2c955ae23aa3196d276a76d6ab828cdfb817c7e9168325b3
5
5
  SHA512:
6
- metadata.gz: cd876122ad92c5439ff45e975fd84418bfcc7d72d6f9398e48b1ac4c60f09fb96c2b85b46ee1c8de6a75291c0b7d2754ee2fa069f77f8a2f8a4c069132c59d94
7
- data.tar.gz: 45d3f80f69ac9725d93aa0db24815da093bfd992b5418f8551c8d25e8caef9299f270a92fa922a4bc4bf3190d9f823a35c7203f9a74bd58daee31869b987f103
6
+ metadata.gz: d02058bd5321ed16ea2f785aaeb24f4d4f105c5357c3c7ceb2a8a02c090b69c7187623b23e14d5026bb0cf236e64dddae7025509d7b2d6769bb50f110612120f
7
+ data.tar.gz: 15627209b382c023c2640e1d2219b6d33b84cb7c67ba1a3b8e3ebbe1aa912d3df832583a1e37b3831699b67ea81f3b4242b67a606dfdd727827e648a6509fea7
data/README.md CHANGED
@@ -252,13 +252,15 @@ Braintrust::Eval.run(
252
252
  {input: "apple", expected: "fruit"},
253
253
  {input: "carrot", expected: "vegetable"}
254
254
  ],
255
- task: ->(input) { classify(input) },
255
+ task: ->(input:) { classify(input) },
256
256
  scorers: [
257
- ->(input, expected, output) { output == expected ? 1.0 : 0.0 }
257
+ ->(expected:, output:) { output == expected ? 1.0 : 0.0 }
258
258
  ]
259
259
  )
260
260
  ```
261
261
 
262
+ See [eval.rb](./examples/eval.rb) for a full example.
263
+
262
264
  ### Datasets
263
265
 
264
266
  Use test cases from a Braintrust dataset:
@@ -267,7 +269,7 @@ Use test cases from a Braintrust dataset:
267
269
  Braintrust::Eval.run(
268
270
  project: "my-project",
269
271
  dataset: "my-dataset",
270
- task: ->(input) { classify(input) },
272
+ task: ->(input:) { classify(input) },
271
273
  scorers: [...]
272
274
  )
273
275
  ```
@@ -282,11 +284,13 @@ Braintrust::Eval.run(
282
284
  {input: "apple", expected: "fruit", tags: ["produce"], metadata: {difficulty: "easy"}},
283
285
  {input: "salmon", expected: "protein", tags: ["seafood"], metadata: {difficulty: "medium"}}
284
286
  ],
285
- task: ->(input) { classify(input) },
287
+ task: ->(input:) { classify(input) },
286
288
  scorers: [...]
287
289
  )
288
290
  ```
289
291
 
292
+ See [dataset.rb](./examples/eval/dataset.rb) for a full example.
293
+
290
294
  ### Scorers
291
295
 
292
296
  Use scoring functions defined in Braintrust:
@@ -295,33 +299,104 @@ Use scoring functions defined in Braintrust:
295
299
  Braintrust::Eval.run(
296
300
  project: "my-project",
297
301
  cases: [...],
298
- task: ->(input) { ... },
302
+ task: ->(input:) { ... },
303
+ scorers: ["accuracy-scorer"]
304
+ )
305
+ ```
306
+
307
+ Or define scorers inline with `Scorer.new`:
308
+
309
+ ```ruby
310
+ Braintrust::Eval.run(
311
+ project: "my-project",
312
+ cases: [...],
313
+ task: ->(input:) { ... },
299
314
  scorers: [
300
- Braintrust::Eval::Functions.scorer(project: "my-project", slug: "accuracy-scorer")
315
+ Braintrust::Scorer.new("exact_match") do |expected:, output:|
316
+ output == expected ? 1.0 : 0.0
317
+ end
301
318
  ]
302
319
  )
303
320
  ```
304
321
 
305
- Or define scorers inline with `Eval.scorer`:
322
+ See [remote_functions.rb](./examples/eval/remote_functions.rb) for a full example.
323
+
324
+ #### Scorer metadata
325
+
326
+ Scorers can return a Hash with `:score` and `:metadata` to attach structured context to the score. The metadata is logged on the scorer's span and visible in the Braintrust UI for debugging and filtering:
327
+
328
+ ```ruby
329
+ Braintrust::Scorer.new("translation") do |expected:, output:|
330
+ common_words = output.downcase.split & expected.downcase.split
331
+ overlap = common_words.size.to_f / expected.split.size
332
+ {
333
+ score: overlap,
334
+ metadata: {word_overlap: common_words.size, missing_words: expected.downcase.split - output.downcase.split}
335
+ }
336
+ end
337
+ ```
338
+
339
+ See [scorer_metadata.rb](./examples/eval/scorer_metadata.rb) for a full example.
340
+
341
+ #### Multiple scores from one scorer
342
+
343
+ When several scores can be computed together (e.g. in one LLM call), you can return an `Array` of score `Hash` instead of a single value. Each metric appears as a separate score column in the Braintrust UI:
344
+
345
+ ```ruby
346
+ Braintrust::Scorer.new("summary_quality") do |output:, expected:|
347
+ words = output.downcase.split
348
+ key_terms = expected[:key_terms]
349
+ covered = key_terms.count { |t| words.include?(t) }
350
+
351
+ [
352
+ {name: "coverage", score: covered.to_f / key_terms.size, metadata: {missing: key_terms - words}},
353
+ {name: "conciseness", score: words.size <= expected[:max_words] ? 1.0 : 0.0}
354
+ ]
355
+ end
356
+ ```
357
+
358
+ `name` and `score` are required, `metadata` is optional.
359
+
360
+ See [multi_score.rb](./examples/eval/multi_score.rb) for a full example.
361
+
362
+ #### Trace scoring
363
+
364
+ Scorers can access the full evaluation trace (all spans generated by the task) by declaring a `trace:` keyword parameter. This is useful for inspecting intermediate LLM calls, validating tool usage, or checking the message thread:
306
365
 
307
366
  ```ruby
308
367
  Braintrust::Eval.run(
309
368
  project: "my-project",
310
- cases: [...],
311
- task: ->(input) { ... },
369
+ cases: [{input: "What is 2+2?", expected: "4"}],
370
+ task: Braintrust::Task.new { |input:| my_llm_pipeline(input) },
312
371
  scorers: [
313
- Braintrust::Eval.scorer("exact_match") do |input, expected, output|
372
+ # Access the full trace to inspect LLM spans
373
+ Braintrust::Scorer.new("uses_system_prompt") do |output:, trace:|
374
+ messages = trace.thread # reconstructed message thread from LLM spans
375
+ messages.any? { |m| m["role"] == "system" } ? 1.0 : 0.0
376
+ end,
377
+
378
+ # Filter spans by type
379
+ Braintrust::Scorer.new("single_llm_call") do |output:, trace:|
380
+ trace.spans(span_type: "llm").length == 1 ? 1.0 : 0.0
381
+ end,
382
+
383
+ # Scorers without trace: still work — the parameter is filtered out automatically
384
+ Braintrust::Scorer.new("exact_match") do |output:, expected:|
314
385
  output == expected ? 1.0 : 0.0
315
386
  end
316
387
  ]
317
388
  )
318
389
  ```
319
390
 
320
- See examples: [eval.rb](./examples/eval.rb), [dataset.rb](./examples/eval/dataset.rb), [remote_functions.rb](./examples/eval/remote_functions.rb)
391
+ See [trace_scoring.rb](./examples/eval/trace_scoring.rb) for a full example.
321
392
 
322
393
  ### Dev Server
323
394
 
324
- Run evaluations from the Braintrust web UI against code in your own application. Define evaluators, pass them to the dev server, and start serving:
395
+ Run evaluations from the Braintrust web UI against code in your own application.
396
+
397
+ #### Run as a Rack app
398
+
399
+ Define evaluators, pass them to the dev server, and start serving:
325
400
 
326
401
  ```ruby
327
402
  # eval_server.ru
@@ -330,9 +405,9 @@ require "braintrust/server"
330
405
 
331
406
  # Define evaluators — these can reference your application code (models, services, etc.)
332
407
  food_classifier = Braintrust::Eval::Evaluator.new(
333
- task: ->(input) { FoodClassifier.classify(input) },
408
+ task: ->(input:) { FoodClassifier.classify(input) },
334
409
  scorers: [
335
- Braintrust::Eval.scorer("exact_match") { |input, expected, output| output == expected ? 1.0 : 0.0 }
410
+ Braintrust::Scorer.new("exact_match") { |expected:, output:| output == expected ? 1.0 : 0.0 }
336
411
  ]
337
412
  )
338
413
 
@@ -347,10 +422,21 @@ run Braintrust::Server::Rack.app(
347
422
  )
348
423
  ```
349
424
 
425
+ Add your Rack server to your Gemfile:
426
+
427
+ ```ruby
428
+ gem "rack"
429
+ gem "puma" # recommended
430
+ ```
431
+
432
+ Then start the server:
433
+
350
434
  ```bash
351
435
  bundle exec rackup eval_server.ru -p 8300 -o 0.0.0.0
352
436
  ```
353
437
 
438
+ See example: [server/eval.ru](./examples/server/eval.ru)
439
+
354
440
  **Custom evaluators**
355
441
 
356
442
  Evaluators can also be defined as subclasses:
@@ -358,15 +444,60 @@ Evaluators can also be defined as subclasses:
358
444
  ```ruby
359
445
  class FoodClassifier < Braintrust::Eval::Evaluator
360
446
  def task
361
- ->(input) { classify(input) }
447
+ ->(input:) { classify(input) }
448
+ end
449
+
450
+ def scorers
451
+ [Braintrust::Scorer.new("exact_match") { |expected:, output:| output == expected ? 1.0 : 0.0 }]
452
+ end
453
+ end
454
+ ```
455
+
456
+ #### Run as a Rails engine
457
+
458
+ Use the Rails engine when your evaluators live inside an existing Rails app and you want to mount the Braintrust eval server into that application.
459
+
460
+ Define each evaluator in its own file, for example under `app/evaluators/`:
461
+
462
+ ```ruby
463
+ # app/evaluators/food_classifier.rb
464
+ class FoodClassifier < Braintrust::Eval::Evaluator
465
+ def task
466
+ ->(input:) { classify(input) }
362
467
  end
363
468
 
364
469
  def scorers
365
- [Braintrust::Eval.scorer("exact_match") { |i, e, o| o == e ? 1.0 : 0.0 }]
470
+ [Braintrust::Scorer.new("exact_match") { |expected:, output:| output == expected ? 1.0 : 0.0 }]
366
471
  end
367
472
  end
368
473
  ```
369
474
 
475
+ Then generate the Braintrust initializer:
476
+
477
+ ```bash
478
+ bin/rails generate braintrust:eval_server
479
+ ```
480
+
481
+ ```ruby
482
+ # config/routes.rb
483
+ Rails.application.routes.draw do
484
+ mount Braintrust::Contrib::Rails::Engine, at: "/braintrust"
485
+ end
486
+ ```
487
+
488
+ The generator writes `config/initializers/braintrust_server.rb`, where you can review or customize the slug-to-evaluator mapping it discovers from `app/evaluators/**/*.rb` and `evaluators/**/*.rb`.
489
+
490
+ See example: [contrib/rails/eval.rb](./examples/contrib/rails/eval.rb)
491
+
492
+ **Developing locally**
493
+
494
+ If you want to skip authentication on incoming eval requests while developing locally:
495
+
496
+ - **For Rack**: Pass `auth: :none` to `Braintrust::Server::Rack.app(...)`
497
+ - **For Rails**: Set `config.auth = :none` in `config/initializers/braintrust_server.rb`
498
+
499
+ *NOTE: Setting `:none` disables authentication on incoming requests into your server; executing evals requires a `BRAINTRUST_API_KEY` to fetch resources.*
500
+
370
501
  **Supported web servers**
371
502
 
372
503
  The dev server requires the `rack` gem and a Rack-compatible web server.
@@ -378,14 +509,7 @@ The dev server requires the `rack` gem and a Rack-compatible web server.
378
509
  | [Passenger](https://www.phusionpassenger.com/) | 6.x | |
379
510
  | [WEBrick](https://github.com/ruby/webrick) | Not supported | Does not support server-sent events. |
380
511
 
381
- Add your chosen server to your Gemfile:
382
-
383
- ```ruby
384
- gem "rack"
385
- gem "puma" # recommended
386
- ```
387
-
388
- See example: [server/eval.ru](./examples/server/eval.ru)
512
+ See examples: [server/eval.ru](./examples/server/eval.ru),
389
513
 
390
514
  ## Documentation
391
515
 
@@ -0,0 +1,124 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "net/http"
4
+ require "json"
5
+ require "uri"
6
+ require_relative "../../internal/http"
7
+
8
+ module Braintrust
9
+ class API
10
+ module Internal
11
+ # Internal BTQL client for querying spans.
12
+ # Not part of the public API — instantiated directly where needed.
13
+ class BTQL
14
+ # Maximum number of retries before returning partial results.
15
+ # Covers both freshness lag (partially indexed) and ingestion lag
16
+ # (spans not yet visible to BTQL after OTel flush).
17
+ MAX_FRESHNESS_RETRIES = 7
18
+
19
+ # Base delay (seconds) between retries (doubles each attempt, capped).
20
+ FRESHNESS_BASE_DELAY = 1.0
21
+
22
+ # Maximum delay (seconds) between retries. Caps exponential growth
23
+ # so we keep polling at a reasonable rate in the later window.
24
+ # Schedule: 1, 2, 4, 8, 8, 8, 8 = ~39s total worst-case.
25
+ MAX_FRESHNESS_DELAY = 8.0
26
+
27
+ def initialize(state)
28
+ @state = state
29
+ end
30
+
31
+ # Query spans belonging to a specific trace within an object.
32
+ #
33
+ # Builds a BTQL SQL query that matches the root_span_id and excludes scorer spans.
34
+ # Retries with exponential backoff if the response indicates data is not yet fresh.
35
+ #
36
+ # @param object_type [String] e.g. "experiment"
37
+ # @param object_id [String] Object UUID
38
+ # @param root_span_id [String] Hex trace ID of the root span
39
+ # @return [Array<Hash>] Parsed span data
40
+ def trace_spans(object_type:, object_id:, root_span_id:)
41
+ query = build_trace_query(
42
+ object_type: object_type,
43
+ object_id: object_id,
44
+ root_span_id: root_span_id
45
+ )
46
+ payload = {query: query, fmt: "jsonl"}
47
+
48
+ retries = 0
49
+ loop do
50
+ rows, freshness = execute_query(payload)
51
+ # Return when data is fresh AND non-empty, or we've exhausted retries.
52
+ # We retry on empty even when "complete" because there is ingestion lag
53
+ # between OTel flush and BTQL indexing — the server may report "complete"
54
+ # before it knows about newly-flushed spans.
55
+ return rows if (freshness == "complete" && !rows.empty?) || retries >= MAX_FRESHNESS_RETRIES
56
+
57
+ retries += 1
58
+ delay = [FRESHNESS_BASE_DELAY * (2**(retries - 1)), MAX_FRESHNESS_DELAY].min
59
+ sleep(delay)
60
+ end
61
+ rescue => e
62
+ Braintrust::Log.warn("[BTQL] Query failed: #{e.message}")
63
+ []
64
+ end
65
+
66
+ private
67
+
68
+ # Build a BTQL SQL query string for fetching trace spans.
69
+ #
70
+ # Selects all spans for a given root_span_id, excluding scorer spans
71
+ # (span_attributes.type = 'score').
72
+ #
73
+ # @param object_type [String] e.g. "experiment"
74
+ # @param object_id [String] Object UUID
75
+ # @param root_span_id [String] Hex trace ID
76
+ # @return [String] BTQL SQL query
77
+ def build_trace_query(object_type:, object_id:, root_span_id:)
78
+ escaped_root = root_span_id.gsub("'", "''")
79
+ escaped_id = object_id.gsub("'", "''")
80
+
81
+ "SELECT * FROM #{object_type}('#{escaped_id}') " \
82
+ "WHERE root_span_id = '#{escaped_root}' " \
83
+ "AND span_attributes.type != 'score' " \
84
+ "LIMIT 1000"
85
+ end
86
+
87
+ # Execute a BTQL query and parse the JSONL response.
88
+ #
89
+ # @param payload [Hash] BTQL request payload
90
+ # @return [Array(Array<Hash>, String)] [parsed_rows, freshness_state]
91
+ def execute_query(payload)
92
+ uri = URI("#{@state.api_url}/btql")
93
+
94
+ request = Net::HTTP::Post.new(uri)
95
+ request["Content-Type"] = "application/json"
96
+ request["Authorization"] = "Bearer #{@state.api_key}"
97
+ request["Accept"] = "application/x-jsonlines"
98
+ request.body = JSON.dump(payload)
99
+
100
+ response = Braintrust::Internal::Http.with_redirects(uri, request)
101
+
102
+ unless response.is_a?(Net::HTTPSuccess)
103
+ raise Braintrust::Error, "HTTP #{response.code} for POST #{uri}: #{response.body}"
104
+ end
105
+
106
+ freshness = response["x-bt-freshness-state"] || "complete"
107
+ [parse_jsonl(response.body), freshness]
108
+ end
109
+
110
+ # Parse a JSONL response body into an array of hashes.
111
+ #
112
+ # @param body [String] JSONL response body
113
+ # @return [Array<Hash>]
114
+ def parse_jsonl(body)
115
+ body.each_line.filter_map do |line|
116
+ line = line.strip
117
+ next if line.empty?
118
+ JSON.parse(line)
119
+ end
120
+ end
121
+ end
122
+ end
123
+ end
124
+ end
@@ -50,6 +50,25 @@ module Braintrust
50
50
 
51
51
  JSON.parse(response.body)
52
52
  end
53
+
54
+ # Delete an experiment
55
+ # DELETE /v1/experiment/:id
56
+ # @param id [String] Experiment ID
57
+ # @return [Hash] Deleted experiment data
58
+ def delete(id:)
59
+ uri = URI("#{@state.api_url}/v1/experiment/#{id}")
60
+
61
+ request = Net::HTTP::Delete.new(uri)
62
+ request["Authorization"] = "Bearer #{@state.api_key}"
63
+
64
+ response = Braintrust::Internal::Http.with_redirects(uri, request)
65
+
66
+ unless response.is_a?(Net::HTTPSuccess)
67
+ raise Error, "HTTP #{response.code} for DELETE #{uri}: #{response.body}"
68
+ end
69
+
70
+ JSON.parse(response.body)
71
+ end
53
72
  end
54
73
  end
55
74
  end
@@ -35,6 +35,25 @@ module Braintrust
35
35
 
36
36
  JSON.parse(response.body)
37
37
  end
38
+
39
+ # Delete a project
40
+ # DELETE /v1/project/:id
41
+ # @param id [String] Project UUID
42
+ # @return [Hash] Deleted project data
43
+ def delete(id:)
44
+ uri = URI("#{@state.api_url}/v1/project/#{id}")
45
+
46
+ request = Net::HTTP::Delete.new(uri)
47
+ request["Authorization"] = "Bearer #{@state.api_key}"
48
+
49
+ response = Braintrust::Internal::Http.with_redirects(uri, request)
50
+
51
+ unless response.is_a?(Net::HTTPSuccess)
52
+ raise Error, "HTTP #{response.code} for DELETE #{uri}: #{response.body}"
53
+ end
54
+
55
+ JSON.parse(response.body)
56
+ end
38
57
  end
39
58
  end
40
59
  end
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Braintrust
4
+ module Contrib
5
+ module Rails
6
+ module Server
7
+ class ApplicationController < ActionController::API
8
+ before_action :authenticate!
9
+
10
+ private
11
+
12
+ def authenticate!
13
+ auth_result = Engine.auth_strategy.authenticate(request.env)
14
+ unless auth_result
15
+ render json: {"error" => "Unauthorized"}, status: :unauthorized
16
+ return
17
+ end
18
+
19
+ request.env["braintrust.auth"] = auth_result
20
+ @braintrust_auth = auth_result
21
+ end
22
+
23
+ def parse_json_body
24
+ body = request.body.read
25
+ return nil if body.nil? || body.empty?
26
+ JSON.parse(body)
27
+ rescue JSON::ParserError
28
+ nil
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,72 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Braintrust
4
+ module Contrib
5
+ module Rails
6
+ module Server
7
+ class Engine < ::Rails::Engine
8
+ isolate_namespace Braintrust::Contrib::Rails::Server
9
+
10
+ config.evaluators = {}
11
+ config.auth = :clerk_token
12
+
13
+ # Register the engine's routes file so Rails loads it during initialization.
14
+ paths["config/routes.rb"] << File.expand_path("routes.rb", __dir__)
15
+
16
+ initializer "braintrust.server.cors" do |app|
17
+ app.middleware.use Braintrust::Server::Middleware::Cors
18
+ end
19
+
20
+ # Class-level helpers that read from engine config.
21
+
22
+ def self.evaluators
23
+ config.evaluators
24
+ end
25
+
26
+ def self.auth_strategy
27
+ resolve_auth(config.auth)
28
+ end
29
+
30
+ def self.list_service
31
+ Braintrust::Server::Services::List.new(-> { config.evaluators })
32
+ end
33
+
34
+ # Long-lived so the state cache persists across requests.
35
+ def self.eval_service
36
+ @eval_service ||= Braintrust::Server::Services::Eval.new(-> { config.evaluators })
37
+ end
38
+
39
+ # Support the explicit `|config|` style used by this integration while
40
+ # still delegating zero-arity DSL blocks to Rails' native implementation.
41
+ def self.configure(&block)
42
+ return super if block&.arity == 0
43
+ yield config if block
44
+ end
45
+
46
+ def self.resolve_auth(auth)
47
+ case auth
48
+ when :none
49
+ Braintrust::Server::Auth::NoAuth.new
50
+ when :clerk_token
51
+ Braintrust::Server::Auth::ClerkToken.new
52
+ when Symbol, String
53
+ raise ArgumentError, "Unknown auth strategy #{auth.inspect}. Expected :none, :clerk_token, or an auth object."
54
+ else
55
+ auth
56
+ end
57
+ end
58
+ private_class_method :resolve_auth
59
+
60
+ generators do
61
+ require "braintrust/contrib/rails/server/generator"
62
+ end
63
+ end
64
+ end
65
+ end
66
+ end
67
+ end
68
+
69
+ require_relative "application_controller"
70
+ require_relative "health_controller"
71
+ require_relative "list_controller"
72
+ require_relative "eval_controller"
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Braintrust
4
+ module Contrib
5
+ module Rails
6
+ module Server
7
+ class EvalController < ApplicationController
8
+ include ActionController::Live
9
+
10
+ def create
11
+ body = parse_json_body
12
+ unless body
13
+ render json: {"error" => "Invalid JSON body"}, status: :bad_request
14
+ return
15
+ end
16
+
17
+ result = Engine.eval_service.validate(body)
18
+ if result[:error]
19
+ render json: {"error" => result[:error]}, status: result[:status]
20
+ return
21
+ end
22
+
23
+ response.headers["Content-Type"] = "text/event-stream"
24
+ response.headers["Cache-Control"] = "no-cache"
25
+ response.headers["Connection"] = "keep-alive"
26
+
27
+ sse = Braintrust::Server::SSEWriter.new { |chunk| response.stream.write(chunk) }
28
+ Engine.eval_service.stream(result, auth: @braintrust_auth, sse: sse)
29
+ ensure
30
+ response.stream.close
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "rails/generators"
4
+
5
+ module Braintrust
6
+ module Contrib
7
+ module Rails
8
+ module Server
9
+ module Generators
10
+ class ServerGenerator < ::Rails::Generators::Base
11
+ namespace "braintrust:server"
12
+ source_root File.expand_path("templates", __dir__)
13
+
14
+ def create_initializer
15
+ @evaluators = discovered_evaluators
16
+ template "initializer.rb.tt", "config/initializers/braintrust_server.rb"
17
+ end
18
+
19
+ private
20
+
21
+ def discovered_evaluators
22
+ evaluator_roots.flat_map do |root|
23
+ Dir[File.join(destination_root, root, "**/*.rb")].sort.map do |file|
24
+ relative_path = file.delete_prefix("#{File.join(destination_root, root)}/").sub(/\.rb\z/, "")
25
+ {
26
+ class_name: relative_path.split("/").map(&:camelize).join("::"),
27
+ slug: relative_path.tr("/", "-").tr("_", "-")
28
+ }
29
+ end
30
+ end
31
+ end
32
+
33
+ def evaluator_roots
34
+ %w[app/evaluators evaluators].select do |root|
35
+ Dir.exist?(File.join(destination_root, root))
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Braintrust
4
+ module Contrib
5
+ module Rails
6
+ module Server
7
+ class HealthController < ApplicationController
8
+ def show
9
+ render json: {"status" => "ok"}
10
+ end
11
+ end
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Braintrust
4
+ module Contrib
5
+ module Rails
6
+ module Server
7
+ class ListController < ApplicationController
8
+ def show
9
+ result = Engine.list_service.call
10
+ render json: result
11
+ end
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ Braintrust::Contrib::Rails::Server::Engine.routes.draw do
4
+ get "/", to: "health#show"
5
+ get "/list", to: "list#show"
6
+ post "/list", to: "list#show"
7
+ post "/eval", to: "eval#create"
8
+ end