braintrust 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +56 -0
- data/lib/braintrust/api/functions.rb +3 -1
- data/lib/braintrust/api/internal/btql.rb +3 -33
- data/lib/braintrust/eval/context.rb +84 -21
- data/lib/braintrust/eval/evaluator.rb +16 -2
- data/lib/braintrust/eval/runner.rb +56 -39
- data/lib/braintrust/eval.rb +22 -2
- data/lib/braintrust/internal/retry.rb +41 -0
- data/lib/braintrust/prompt.rb +11 -5
- data/lib/braintrust/server/services/eval_service.rb +13 -1
- data/lib/braintrust/version.rb +1 -1
- metadata +16 -127
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 27e146b06451b844b1e6416353b20f6bd572c3d1169a12a439745cb7280ce0ec
|
|
4
|
+
data.tar.gz: d726e3a146a2180bf2714846d56e65fa9d3ef1ce773adb116a8e6b1b79ba823c
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 69e5150452e9dde1491664af1137cc05a9a5b651dbb5fdee27ff8a09e0e11b51c283c163019566045e1771679ed6f2eece4dd1753aa06f899e3681e7c6b99d15
|
|
7
|
+
data.tar.gz: 28cc8c86bdc13db8d33ad0dc28325c0d858f37ba1b9f41212c52e514eed649b14596c66153bca58de251c4c6dd1ddcb170d24ae100a33f912f49349671821f7a
|
data/README.md
CHANGED
|
@@ -21,6 +21,7 @@ This is the official Ruby SDK for [Braintrust](https://www.braintrust.dev), for
|
|
|
21
21
|
- [Attachments](#attachments)
|
|
22
22
|
- [Viewing traces](#viewing-traces)
|
|
23
23
|
- [Evals](#evals)
|
|
24
|
+
- [Tasks](#tasks)
|
|
24
25
|
- [Datasets](#datasets)
|
|
25
26
|
- [Scorers](#scorers)
|
|
26
27
|
- [Dev Server](#dev-server)
|
|
@@ -261,6 +262,48 @@ Braintrust::Eval.run(
|
|
|
261
262
|
|
|
262
263
|
See [eval.rb](./examples/eval.rb) for a full example.
|
|
263
264
|
|
|
265
|
+
### Tasks
|
|
266
|
+
|
|
267
|
+
Define the code being evaluated as a lambda or a class. Tasks receive `input:` as a keyword argument:
|
|
268
|
+
|
|
269
|
+
```ruby
|
|
270
|
+
# Lambda
|
|
271
|
+
task = ->(input:) { classify(input) }
|
|
272
|
+
|
|
273
|
+
# Class-based (auto-derives name from class: "food_classifier")
|
|
274
|
+
class FoodClassifier
|
|
275
|
+
include Braintrust::Task
|
|
276
|
+
|
|
277
|
+
def call(input:)
|
|
278
|
+
classify(input)
|
|
279
|
+
end
|
|
280
|
+
end
|
|
281
|
+
```
|
|
282
|
+
|
|
283
|
+
#### With parameters
|
|
284
|
+
|
|
285
|
+
Tasks can accept `parameters:` as input to drive their behavior:
|
|
286
|
+
|
|
287
|
+
```ruby
|
|
288
|
+
task = ->(input:, parameters:) {
|
|
289
|
+
value = parameters["value"]
|
|
290
|
+
from_unit = parameters["to_unit"] || 'c'
|
|
291
|
+
to_unit = parameters["from_unit"] || 'f'
|
|
292
|
+
|
|
293
|
+
convert_temp(temperature: value, from_unit: from_unit , to_unit: to_unit)
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
Braintrust::Eval.run(
|
|
297
|
+
project: "my-project",
|
|
298
|
+
cases: [...],
|
|
299
|
+
task: task,
|
|
300
|
+
scorers: [...],
|
|
301
|
+
parameters: {"value" => 23.0}
|
|
302
|
+
)
|
|
303
|
+
```
|
|
304
|
+
|
|
305
|
+
See [parameters.rb](./examples/eval/parameters.rb) for a full example.
|
|
306
|
+
|
|
264
307
|
### Datasets
|
|
265
308
|
|
|
266
309
|
Use test cases from a Braintrust dataset:
|
|
@@ -390,6 +433,19 @@ Braintrust::Eval.run(
|
|
|
390
433
|
|
|
391
434
|
See [trace_scoring.rb](./examples/eval/trace_scoring.rb) for a full example.
|
|
392
435
|
|
|
436
|
+
#### Scorer parameters
|
|
437
|
+
|
|
438
|
+
Scorers can also accept `parameters:` to use runtime configuration in their scoring logic. Like tasks, scorers that don't declare `parameters:` are unaffected:
|
|
439
|
+
|
|
440
|
+
```ruby
|
|
441
|
+
Braintrust::Scorer.new("threshold_match") do |expected:, output:, parameters:|
|
|
442
|
+
threshold = parameters["threshold"] || 0.8
|
|
443
|
+
similarity(output, expected) >= threshold ? 1.0 : 0.0
|
|
444
|
+
end
|
|
445
|
+
```
|
|
446
|
+
|
|
447
|
+
See [parameters.rb](./examples/eval/parameters.rb) for a full example.
|
|
448
|
+
|
|
393
449
|
### Dev Server
|
|
394
450
|
|
|
395
451
|
Run evaluations from the Braintrust web UI against code in your own application.
|
|
@@ -25,13 +25,15 @@ module Braintrust
|
|
|
25
25
|
# List functions with optional filters
|
|
26
26
|
# GET /v1/function?project_name=X&...
|
|
27
27
|
# @param project_name [String, nil] Filter by project name
|
|
28
|
+
# @param project_id [String, nil] Filter by project ID (UUID)
|
|
28
29
|
# @param function_name [String, nil] Filter by function name
|
|
29
30
|
# @param slug [String, nil] Filter by slug
|
|
30
31
|
# @param limit [Integer, nil] Limit number of results
|
|
31
32
|
# @return [Hash] Response with "objects" array
|
|
32
|
-
def list(project_name: nil, function_name: nil, slug: nil, limit: nil)
|
|
33
|
+
def list(project_name: nil, project_id: nil, function_name: nil, slug: nil, limit: nil)
|
|
33
34
|
params = {}
|
|
34
35
|
params["project_name"] = project_name if project_name
|
|
36
|
+
params["project_id"] = project_id if project_id
|
|
35
37
|
params["function_name"] = function_name if function_name
|
|
36
38
|
params["slug"] = slug if slug
|
|
37
39
|
params["limit"] = limit if limit
|
|
@@ -11,19 +11,6 @@ module Braintrust
|
|
|
11
11
|
# Internal BTQL client for querying spans.
|
|
12
12
|
# Not part of the public API — instantiated directly where needed.
|
|
13
13
|
class BTQL
|
|
14
|
-
# Maximum number of retries before returning partial results.
|
|
15
|
-
# Covers both freshness lag (partially indexed) and ingestion lag
|
|
16
|
-
# (spans not yet visible to BTQL after OTel flush).
|
|
17
|
-
MAX_FRESHNESS_RETRIES = 7
|
|
18
|
-
|
|
19
|
-
# Base delay (seconds) between retries (doubles each attempt, capped).
|
|
20
|
-
FRESHNESS_BASE_DELAY = 1.0
|
|
21
|
-
|
|
22
|
-
# Maximum delay (seconds) between retries. Caps exponential growth
|
|
23
|
-
# so we keep polling at a reasonable rate in the later window.
|
|
24
|
-
# Schedule: 1, 2, 4, 8, 8, 8, 8 = ~39s total worst-case.
|
|
25
|
-
MAX_FRESHNESS_DELAY = 8.0
|
|
26
|
-
|
|
27
14
|
def initialize(state)
|
|
28
15
|
@state = state
|
|
29
16
|
end
|
|
@@ -31,36 +18,19 @@ module Braintrust
|
|
|
31
18
|
# Query spans belonging to a specific trace within an object.
|
|
32
19
|
#
|
|
33
20
|
# Builds a BTQL SQL query that matches the root_span_id and excludes scorer spans.
|
|
34
|
-
#
|
|
21
|
+
# Returns a single-shot result; callers are responsible for retry and error handling.
|
|
35
22
|
#
|
|
36
23
|
# @param object_type [String] e.g. "experiment"
|
|
37
24
|
# @param object_id [String] Object UUID
|
|
38
25
|
# @param root_span_id [String] Hex trace ID of the root span
|
|
39
|
-
# @return [Array<Hash
|
|
26
|
+
# @return [Array(Array<Hash>, String)] [rows, freshness]
|
|
40
27
|
def trace_spans(object_type:, object_id:, root_span_id:)
|
|
41
28
|
query = build_trace_query(
|
|
42
29
|
object_type: object_type,
|
|
43
30
|
object_id: object_id,
|
|
44
31
|
root_span_id: root_span_id
|
|
45
32
|
)
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
retries = 0
|
|
49
|
-
loop do
|
|
50
|
-
rows, freshness = execute_query(payload)
|
|
51
|
-
# Return when data is fresh AND non-empty, or we've exhausted retries.
|
|
52
|
-
# We retry on empty even when "complete" because there is ingestion lag
|
|
53
|
-
# between OTel flush and BTQL indexing — the server may report "complete"
|
|
54
|
-
# before it knows about newly-flushed spans.
|
|
55
|
-
return rows if (freshness == "complete" && !rows.empty?) || retries >= MAX_FRESHNESS_RETRIES
|
|
56
|
-
|
|
57
|
-
retries += 1
|
|
58
|
-
delay = [FRESHNESS_BASE_DELAY * (2**(retries - 1)), MAX_FRESHNESS_DELAY].min
|
|
59
|
-
sleep(delay)
|
|
60
|
-
end
|
|
61
|
-
rescue => e
|
|
62
|
-
Braintrust::Log.warn("[BTQL] Query failed: #{e.message}")
|
|
63
|
-
[]
|
|
33
|
+
execute_query(query: query, fmt: "jsonl")
|
|
64
34
|
end
|
|
65
35
|
|
|
66
36
|
private
|
|
@@ -9,11 +9,24 @@ module Braintrust
|
|
|
9
9
|
class Context
|
|
10
10
|
attr_reader :task, :scorers, :cases, :experiment_id, :experiment_name,
|
|
11
11
|
:project_id, :project_name, :state, :tracer_provider,
|
|
12
|
-
:on_progress, :parent_span_attr, :generation
|
|
12
|
+
:on_progress, :parent_span_attr, :generation, :parameters
|
|
13
13
|
|
|
14
|
+
# @param task [Task] Normalized task wrapper
|
|
15
|
+
# @param scorers [Array<Scorer>] Normalized scorer wrappers
|
|
16
|
+
# @param cases [Cases] Normalized eval cases
|
|
17
|
+
# @param experiment_id [String, nil] Experiment ID for logging and trace linkage
|
|
18
|
+
# @param experiment_name [String, nil] Experiment name, included in span attributes
|
|
19
|
+
# @param project_id [String, nil] Project ID
|
|
20
|
+
# @param project_name [String, nil] Project name
|
|
21
|
+
# @param state [Braintrust::State, nil] Authenticated API state; nil for local-only evals
|
|
22
|
+
# @param tracer_provider [#tracer, nil] OpenTelemetry tracer provider
|
|
23
|
+
# @param on_progress [Proc, nil] Callback invoked after each case completes, receiving a progress Hash
|
|
24
|
+
# @param parent_span_attr [String, nil] Formatted parent span identifier ("type:id"), linking spans to a parent context
|
|
25
|
+
# @param generation [Integer, nil] Generation number from the parent span context, used to link spans in a trace hierarchy
|
|
26
|
+
# @param parameters [Hash, nil] Runtime parameters passed to task and scorers as a `parameters:` keyword argument
|
|
14
27
|
def initialize(task:, scorers:, cases:, experiment_id: nil, experiment_name: nil,
|
|
15
28
|
project_id: nil, project_name: nil, state: nil, tracer_provider: nil,
|
|
16
|
-
on_progress: nil, parent_span_attr: nil, generation: nil)
|
|
29
|
+
on_progress: nil, parent_span_attr: nil, generation: nil, parameters: nil)
|
|
17
30
|
@task = task
|
|
18
31
|
@scorers = scorers
|
|
19
32
|
@cases = cases
|
|
@@ -26,40 +39,83 @@ module Braintrust
|
|
|
26
39
|
@on_progress = on_progress
|
|
27
40
|
@parent_span_attr = parent_span_attr
|
|
28
41
|
@generation = generation
|
|
42
|
+
@parameters = parameters
|
|
29
43
|
end
|
|
30
44
|
|
|
31
45
|
# Build a Context from raw user inputs.
|
|
32
|
-
#
|
|
33
|
-
#
|
|
46
|
+
# Delegates to Factory for normalization.
|
|
47
|
+
# @param task [Task, Proc, #call] Task to evaluate; wrapped into a {Task} if needed
|
|
48
|
+
# @param scorers [Array<Scorer, Proc, String, Scorer::ID, #call>] Scorers; each is normalized into a {Scorer}
|
|
49
|
+
# @param cases [Cases, Array, Enumerable] Eval cases; wrapped into {Cases} if needed
|
|
50
|
+
# @param experiment_id [String, nil] Experiment ID for logging
|
|
51
|
+
# @param experiment_name [String, nil] Experiment name, included in span attributes
|
|
52
|
+
# @param project_id [String, nil] Project ID
|
|
53
|
+
# @param project_name [String, nil] Project name; required when resolving scorer slugs
|
|
54
|
+
# @param state [Braintrust::State, nil] Authenticated API state; nil for local-only evals
|
|
55
|
+
# @param tracer_provider [#tracer, nil] OpenTelemetry tracer provider; defaults to global provider
|
|
56
|
+
# @param on_progress [Proc, nil] Callback invoked after each case completes, receiving a progress Hash
|
|
57
|
+
# @param parent [Hash, nil] Parent span info with keys :object_type, :object_id, and optionally :generation
|
|
58
|
+
# @param parameters [Hash, nil] Runtime parameters passed to task and scorers as a `parameters:` keyword argument
|
|
59
|
+
# @return [Context]
|
|
34
60
|
def self.build(task:, scorers:, cases:, experiment_id: nil, experiment_name: nil,
|
|
35
61
|
project_id: nil, project_name: nil, state: nil, tracer_provider: nil,
|
|
36
|
-
on_progress: nil, parent: nil)
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
scorers:
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
experiment_name: experiment_name,
|
|
45
|
-
project_id: project_id,
|
|
46
|
-
project_name: project_name,
|
|
47
|
-
state: state,
|
|
48
|
-
tracer_provider: tracer_provider,
|
|
49
|
-
on_progress: on_progress,
|
|
50
|
-
parent_span_attr: factory.resolve_parent_span_attr(parent),
|
|
51
|
-
generation: parent&.dig(:generation)
|
|
62
|
+
on_progress: nil, parent: nil, parameters: nil)
|
|
63
|
+
Factory.new(
|
|
64
|
+
state: state, tracer_provider: tracer_provider,
|
|
65
|
+
project_id: project_id, project_name: project_name
|
|
66
|
+
).build(
|
|
67
|
+
task: task, scorers: scorers, cases: cases,
|
|
68
|
+
experiment_id: experiment_id, experiment_name: experiment_name,
|
|
69
|
+
on_progress: on_progress, parent: parent, parameters: parameters
|
|
52
70
|
)
|
|
53
71
|
end
|
|
54
72
|
|
|
55
73
|
# Encapsulates normalization of raw user inputs into typed wrappers.
|
|
56
74
|
class Factory
|
|
57
|
-
|
|
75
|
+
# @param state [Braintrust::State, nil] Authenticated API state; passed through to scorer resolution
|
|
76
|
+
# @param tracer_provider [#tracer, nil] OpenTelemetry tracer provider; passed through to remote scorers
|
|
77
|
+
# @param project_id [String, nil] Project ID; passed through to the built Context
|
|
78
|
+
# @param project_name [String, nil] Project name; required when resolving scorer slugs
|
|
79
|
+
def initialize(state: nil, tracer_provider: nil, project_id: nil, project_name: nil)
|
|
58
80
|
@state = state
|
|
59
81
|
@tracer_provider = tracer_provider
|
|
82
|
+
@project_id = project_id
|
|
60
83
|
@project_name = project_name
|
|
61
84
|
end
|
|
62
85
|
|
|
86
|
+
# Normalize raw inputs and construct a {Context}.
|
|
87
|
+
# @param task [Task, Proc, #call] Raw task
|
|
88
|
+
# @param scorers [Array] Raw scorers
|
|
89
|
+
# @param cases [Cases, Array, Enumerable] Raw eval cases
|
|
90
|
+
# @param experiment_id [String, nil]
|
|
91
|
+
# @param experiment_name [String, nil]
|
|
92
|
+
# @param on_progress [Proc, nil]
|
|
93
|
+
# @param parent [Hash, nil] Parent span info with keys :object_type, :object_id, and optionally :generation
|
|
94
|
+
# @return [Context]
|
|
95
|
+
def build(task:, scorers:, cases:, experiment_id: nil, experiment_name: nil,
|
|
96
|
+
on_progress: nil, parent: nil, parameters: nil)
|
|
97
|
+
Context.new(
|
|
98
|
+
task: normalize_task(task),
|
|
99
|
+
scorers: normalize_scorers(scorers),
|
|
100
|
+
cases: normalize_cases(cases),
|
|
101
|
+
experiment_id: experiment_id,
|
|
102
|
+
experiment_name: experiment_name,
|
|
103
|
+
project_id: @project_id,
|
|
104
|
+
project_name: @project_name,
|
|
105
|
+
state: @state,
|
|
106
|
+
tracer_provider: @tracer_provider || OpenTelemetry.tracer_provider,
|
|
107
|
+
on_progress: on_progress,
|
|
108
|
+
parent_span_attr: resolve_parent_span_attr(parent),
|
|
109
|
+
generation: parent&.dig(:generation),
|
|
110
|
+
parameters: parameters
|
|
111
|
+
)
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
private
|
|
115
|
+
|
|
116
|
+
# @param raw [Cases, Array, Enumerable, #each]
|
|
117
|
+
# @return [Cases]
|
|
118
|
+
# @raise [ArgumentError] if raw is not enumerable
|
|
63
119
|
def normalize_cases(raw)
|
|
64
120
|
case raw
|
|
65
121
|
when Cases
|
|
@@ -75,11 +131,15 @@ module Braintrust
|
|
|
75
131
|
end
|
|
76
132
|
end
|
|
77
133
|
|
|
134
|
+
# @param parent [Hash, nil]
|
|
135
|
+
# @return [String, nil] Formatted as "type:id", e.g. "experiment_id:abc-123"
|
|
78
136
|
def resolve_parent_span_attr(parent)
|
|
79
137
|
return nil unless parent
|
|
80
138
|
"#{parent[:object_type]}:#{parent[:object_id]}"
|
|
81
139
|
end
|
|
82
140
|
|
|
141
|
+
# @param raw [Task, Proc, #call]
|
|
142
|
+
# @return [Task]
|
|
83
143
|
def normalize_task(raw)
|
|
84
144
|
case raw
|
|
85
145
|
when Task
|
|
@@ -95,6 +155,9 @@ module Braintrust
|
|
|
95
155
|
end
|
|
96
156
|
end
|
|
97
157
|
|
|
158
|
+
# @param raw [Array<Scorer, Proc, String, Scorer::ID, #call>]
|
|
159
|
+
# @return [Array<Scorer>]
|
|
160
|
+
# @raise [ArgumentError] if a String slug is given without a project name
|
|
98
161
|
def normalize_scorers(raw)
|
|
99
162
|
raw.map do |scorer|
|
|
100
163
|
case scorer
|
|
@@ -27,6 +27,18 @@ module Braintrust
|
|
|
27
27
|
# Braintrust::Scorer.new("exact_match") { |expected:, output:| output == expected ? 1.0 : 0.0 }
|
|
28
28
|
# ]
|
|
29
29
|
# )
|
|
30
|
+
#
|
|
31
|
+
# @example Remote eval with parameters (for Playground UI)
|
|
32
|
+
# Braintrust::Eval::Evaluator.new(
|
|
33
|
+
# task: ->(input:, parameters:) {
|
|
34
|
+
# model = parameters["model"] || "gpt-4"
|
|
35
|
+
# # Use model to generate response...
|
|
36
|
+
# },
|
|
37
|
+
# scorers: [Braintrust::Scorer.new("exact") { |expected:, output:| output == expected ? 1.0 : 0.0 }],
|
|
38
|
+
# parameters: {
|
|
39
|
+
# "model" => {type: "string", default: "gpt-4", description: "Model to use"}
|
|
40
|
+
# }
|
|
41
|
+
# )
|
|
30
42
|
class Evaluator
|
|
31
43
|
attr_accessor :task, :scorers, :parameters
|
|
32
44
|
|
|
@@ -64,13 +76,15 @@ module Braintrust
|
|
|
64
76
|
def run(cases, on_progress: nil, quiet: false,
|
|
65
77
|
project: nil, experiment: nil, project_id: nil,
|
|
66
78
|
dataset: nil, scorers: nil, parent: nil,
|
|
67
|
-
state: nil, update: false, tracer_provider: nil
|
|
79
|
+
state: nil, update: false, tracer_provider: nil,
|
|
80
|
+
parameters: nil)
|
|
68
81
|
all_scorers = scorers ? self.scorers + scorers : self.scorers
|
|
69
82
|
Braintrust::Eval.run(
|
|
70
83
|
task: task, scorers: all_scorers, cases: cases, dataset: dataset,
|
|
71
84
|
project: project, experiment: experiment, project_id: project_id,
|
|
72
85
|
parent: parent, on_progress: on_progress, quiet: quiet,
|
|
73
|
-
state: state, update: update, tracer_provider: tracer_provider
|
|
86
|
+
state: state, update: update, tracer_provider: tracer_provider,
|
|
87
|
+
parameters: parameters
|
|
74
88
|
)
|
|
75
89
|
end
|
|
76
90
|
end
|
|
@@ -6,6 +6,7 @@ require_relative "summary"
|
|
|
6
6
|
require_relative "trace"
|
|
7
7
|
require_relative "../internal/thread_pool"
|
|
8
8
|
require_relative "../api/internal/btql"
|
|
9
|
+
require_relative "../internal/retry"
|
|
9
10
|
|
|
10
11
|
require "opentelemetry/sdk"
|
|
11
12
|
require "json"
|
|
@@ -24,8 +25,7 @@ module Braintrust
|
|
|
24
25
|
# @param eval_context [Context] Normalized eval context
|
|
25
26
|
def initialize(eval_context)
|
|
26
27
|
@eval_context = eval_context
|
|
27
|
-
|
|
28
|
-
@tracer = tracer_provider.tracer("braintrust-eval")
|
|
28
|
+
@tracer = eval_context.tracer_provider.tracer("braintrust-eval")
|
|
29
29
|
|
|
30
30
|
# Mutex for thread-safe score collection
|
|
31
31
|
@score_mutex = Mutex.new
|
|
@@ -79,50 +79,50 @@ module Braintrust
|
|
|
79
79
|
|
|
80
80
|
# Run a single test case with OpenTelemetry tracing
|
|
81
81
|
# Creates eval span (parent) with task and score as children
|
|
82
|
-
# @param
|
|
82
|
+
# @param kase [CaseContext] The per-case accumulator
|
|
83
83
|
# @param errors [Queue] Thread-safe error collection queue
|
|
84
|
-
def run_eval_case(
|
|
84
|
+
def run_eval_case(kase, errors)
|
|
85
85
|
# Each eval case starts its own trace — detach from any ambient span context
|
|
86
86
|
eval_span = tracer.start_root_span("eval")
|
|
87
87
|
OpenTelemetry::Trace.with_span(eval_span) do
|
|
88
88
|
# Set attributes known before task execution
|
|
89
89
|
eval_span.set_attribute("braintrust.parent", eval_context.parent_span_attr) if eval_context.parent_span_attr
|
|
90
90
|
set_json_attr(eval_span, "braintrust.span_attributes", build_span_attributes("eval"))
|
|
91
|
-
set_json_attr(eval_span, "braintrust.input_json", {input:
|
|
92
|
-
set_json_attr(eval_span, "braintrust.expected",
|
|
93
|
-
set_json_attr(eval_span, "braintrust.metadata",
|
|
94
|
-
eval_span.set_attribute("braintrust.tags",
|
|
95
|
-
eval_span.set_attribute("braintrust.origin",
|
|
91
|
+
set_json_attr(eval_span, "braintrust.input_json", {input: kase.input})
|
|
92
|
+
set_json_attr(eval_span, "braintrust.expected", kase.expected) if kase.expected
|
|
93
|
+
set_json_attr(eval_span, "braintrust.metadata", kase.metadata) if kase.metadata
|
|
94
|
+
eval_span.set_attribute("braintrust.tags", kase.tags) if kase.tags
|
|
95
|
+
eval_span.set_attribute("braintrust.origin", kase.origin) if kase.origin
|
|
96
96
|
|
|
97
97
|
# Run task
|
|
98
98
|
begin
|
|
99
|
-
|
|
99
|
+
kase.output = run_task(kase)
|
|
100
100
|
rescue => e
|
|
101
101
|
# Error already recorded on task span, set eval span status
|
|
102
102
|
eval_span.status = OpenTelemetry::Trace::Status.error(e.message)
|
|
103
103
|
set_json_attr(eval_span, "braintrust.output_json", {output: nil})
|
|
104
|
-
errors << "Task failed for input '#{
|
|
105
|
-
report_progress(eval_span,
|
|
104
|
+
errors << "Task failed for input '#{kase.input}': #{e.message}"
|
|
105
|
+
report_progress(eval_span, kase, error: e.message)
|
|
106
106
|
next
|
|
107
107
|
end
|
|
108
108
|
|
|
109
109
|
# Flush spans so they're queryable via BTQL, then build trace
|
|
110
|
-
eval_context.tracer_provider
|
|
111
|
-
|
|
110
|
+
eval_context.tracer_provider.force_flush if eval_context.tracer_provider.respond_to?(:force_flush)
|
|
111
|
+
kase.trace = build_trace(eval_span)
|
|
112
112
|
|
|
113
113
|
# Run scorers
|
|
114
114
|
begin
|
|
115
|
-
run_scorers(
|
|
115
|
+
run_scorers(kase)
|
|
116
116
|
rescue => e
|
|
117
117
|
# Error already recorded on score span, set eval span status
|
|
118
118
|
eval_span.status = OpenTelemetry::Trace::Status.error(e.message)
|
|
119
|
-
errors << "Scorers failed for input '#{
|
|
119
|
+
errors << "Scorers failed for input '#{kase.input}': #{e.message}"
|
|
120
120
|
end
|
|
121
121
|
|
|
122
122
|
# Set output after task completes
|
|
123
|
-
set_json_attr(eval_span, "braintrust.output_json", {output:
|
|
123
|
+
set_json_attr(eval_span, "braintrust.output_json", {output: kase.output})
|
|
124
124
|
|
|
125
|
-
report_progress(eval_span,
|
|
125
|
+
report_progress(eval_span, kase, data: kase.output)
|
|
126
126
|
end
|
|
127
127
|
ensure
|
|
128
128
|
eval_span&.finish
|
|
@@ -130,17 +130,18 @@ module Braintrust
|
|
|
130
130
|
|
|
131
131
|
# Run task with OpenTelemetry tracing
|
|
132
132
|
# Creates task span with input and output
|
|
133
|
-
# @param
|
|
133
|
+
# @param kase [CaseContext] The per-case context
|
|
134
134
|
# @return [Object] Task output
|
|
135
|
-
def run_task(
|
|
135
|
+
def run_task(kase)
|
|
136
136
|
tracer.in_span("task") do |task_span|
|
|
137
137
|
task_span.set_attribute("braintrust.parent", eval_context.parent_span_attr) if eval_context.parent_span_attr
|
|
138
138
|
set_json_attr(task_span, "braintrust.span_attributes", build_span_attributes("task"))
|
|
139
|
-
set_json_attr(task_span, "braintrust.input_json",
|
|
139
|
+
set_json_attr(task_span, "braintrust.input_json", kase.input)
|
|
140
140
|
|
|
141
141
|
begin
|
|
142
142
|
output = eval_context.task.call(
|
|
143
|
-
input:
|
|
143
|
+
input: kase.input,
|
|
144
|
+
parameters: eval_context.parameters || {}
|
|
144
145
|
)
|
|
145
146
|
set_json_attr(task_span, "braintrust.output_json", output)
|
|
146
147
|
output
|
|
@@ -155,20 +156,22 @@ module Braintrust
|
|
|
155
156
|
|
|
156
157
|
# Run scorers with OpenTelemetry tracing.
|
|
157
158
|
# Creates one span per scorer, each a direct child of the current (eval) span.
|
|
158
|
-
# @param
|
|
159
|
-
def run_scorers(
|
|
159
|
+
# @param kase [CaseContext] The per-case context (output must be populated)
|
|
160
|
+
def run_scorers(kase)
|
|
160
161
|
scorer_kwargs = {
|
|
161
|
-
input:
|
|
162
|
-
expected:
|
|
163
|
-
output:
|
|
164
|
-
metadata:
|
|
165
|
-
trace:
|
|
162
|
+
input: kase.input,
|
|
163
|
+
expected: kase.expected,
|
|
164
|
+
output: kase.output,
|
|
165
|
+
metadata: kase.metadata || {},
|
|
166
|
+
trace: kase.trace,
|
|
167
|
+
parameters: eval_context.parameters || {}
|
|
166
168
|
}
|
|
167
169
|
scorer_input = {
|
|
168
|
-
input:
|
|
169
|
-
expected:
|
|
170
|
-
output:
|
|
171
|
-
metadata:
|
|
170
|
+
input: kase.input,
|
|
171
|
+
expected: kase.expected,
|
|
172
|
+
output: kase.output,
|
|
173
|
+
metadata: kase.metadata || {},
|
|
174
|
+
parameters: eval_context.parameters || {}
|
|
172
175
|
}
|
|
173
176
|
|
|
174
177
|
scorer_error = nil
|
|
@@ -224,9 +227,23 @@ module Braintrust
|
|
|
224
227
|
object_id = eval_context.experiment_id
|
|
225
228
|
btql = API::Internal::BTQL.new(eval_context.state)
|
|
226
229
|
|
|
227
|
-
Eval::Trace.new(
|
|
228
|
-
|
|
229
|
-
|
|
230
|
+
Eval::Trace.new(spans: -> { fetch_trace_spans(btql, object_type, object_id, root_span_id) })
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
# Fetch trace spans with retry to handle freshness and ingestion lag.
|
|
234
|
+
# @return [Array<Hash>] Parsed span data
|
|
235
|
+
def fetch_trace_spans(btql, object_type, object_id, root_span_id)
|
|
236
|
+
rows, _freshness = Internal::Retry.with_backoff(
|
|
237
|
+
max_retries: 7, base_delay: 1.0, max_delay: 8.0,
|
|
238
|
+
until: ->(result) {
|
|
239
|
+
r, f = result
|
|
240
|
+
f == "complete" && !r.empty?
|
|
241
|
+
}
|
|
242
|
+
) { btql.trace_spans(object_type: object_type, object_id: object_id, root_span_id: root_span_id) }
|
|
243
|
+
rows || []
|
|
244
|
+
rescue => e
|
|
245
|
+
Braintrust::Log.warn("[BTQL] Query failed: #{e.message}")
|
|
246
|
+
[]
|
|
230
247
|
end
|
|
231
248
|
|
|
232
249
|
# Build a CaseContext from a Case struct
|
|
@@ -241,11 +258,11 @@ module Braintrust
|
|
|
241
258
|
|
|
242
259
|
# Report progress for a case via on_progress callback.
|
|
243
260
|
# Rescues errors in the callback so a broken handler never crashes the eval.
|
|
244
|
-
def report_progress(eval_span,
|
|
261
|
+
def report_progress(eval_span, kase, **fields)
|
|
245
262
|
return unless eval_context.on_progress
|
|
246
263
|
progress = {"id" => eval_span.context.hex_span_id}.merge(fields.transform_keys(&:to_s))
|
|
247
|
-
if
|
|
248
|
-
progress["origin"] =
|
|
264
|
+
if kase.origin
|
|
265
|
+
progress["origin"] = kase.origin.is_a?(String) ? JSON.parse(kase.origin) : kase.origin
|
|
249
266
|
end
|
|
250
267
|
eval_context.on_progress.call(progress)
|
|
251
268
|
rescue => e
|
data/lib/braintrust/eval.rb
CHANGED
|
@@ -105,6 +105,21 @@ module Braintrust
|
|
|
105
105
|
# scorers: [->(expected:, output:) { output == expected ? 1.0 : 0.0 }]
|
|
106
106
|
# )
|
|
107
107
|
#
|
|
108
|
+
# @example Using parameters for configurable tasks
|
|
109
|
+
# # Tasks and scorers that declare `parameters:` receive it automatically.
|
|
110
|
+
# # Those that don't are unaffected — KeywordFilter strips unknown kwargs.
|
|
111
|
+
# Braintrust::Eval.run(
|
|
112
|
+
# project: "my-project",
|
|
113
|
+
# experiment: "with-params",
|
|
114
|
+
# cases: [{input: "hello", expected: "HELLO!"}],
|
|
115
|
+
# task: ->(input:, parameters:) {
|
|
116
|
+
# suffix = parameters["suffix"] || ""
|
|
117
|
+
# input.upcase + suffix
|
|
118
|
+
# },
|
|
119
|
+
# scorers: [->(expected:, output:) { output == expected ? 1.0 : 0.0 }],
|
|
120
|
+
# parameters: {"suffix" => "!"}
|
|
121
|
+
# )
|
|
122
|
+
#
|
|
108
123
|
# @example Using metadata and tags
|
|
109
124
|
# Braintrust::Eval.run(
|
|
110
125
|
# project: "my-project",
|
|
@@ -158,11 +173,15 @@ module Braintrust
|
|
|
158
173
|
# @param quiet [Boolean] If true, suppress result output (default: false)
|
|
159
174
|
# @param state [State, nil] Braintrust state (defaults to global state)
|
|
160
175
|
# @param tracer_provider [TracerProvider, nil] OpenTelemetry tracer provider (defaults to global)
|
|
176
|
+
# @param project_id [String, nil] Project UUID (skips project creation when provided)
|
|
177
|
+
# @param parent [Hash, nil] Parent span context ({object_type:, object_id:, generation:})
|
|
178
|
+
# @param parameters [Hash, nil] Runtime parameters passed to task and scorers as a `parameters:` keyword argument
|
|
161
179
|
# @return [Result]
|
|
162
180
|
def run(task:, scorers:, project: nil, experiment: nil,
|
|
163
181
|
cases: nil, dataset: nil, on_progress: nil,
|
|
164
182
|
parallelism: 1, tags: nil, metadata: nil, update: false, quiet: false,
|
|
165
|
-
state: nil, tracer_provider: nil, project_id: nil, parent: nil
|
|
183
|
+
state: nil, tracer_provider: nil, project_id: nil, parent: nil,
|
|
184
|
+
parameters: nil)
|
|
166
185
|
# Validate required parameters
|
|
167
186
|
validate_params!(task: task, scorers: scorers, cases: cases, dataset: dataset)
|
|
168
187
|
|
|
@@ -205,7 +224,8 @@ module Braintrust
|
|
|
205
224
|
state: state,
|
|
206
225
|
tracer_provider: tracer_provider,
|
|
207
226
|
on_progress: on_progress,
|
|
208
|
-
parent: parent
|
|
227
|
+
parent: parent,
|
|
228
|
+
parameters: parameters
|
|
209
229
|
)
|
|
210
230
|
result = Runner.new(context).run(parallelism: parallelism)
|
|
211
231
|
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Braintrust
|
|
4
|
+
module Internal
|
|
5
|
+
module Retry
|
|
6
|
+
MAX_RETRIES = 7
|
|
7
|
+
BASE_DELAY = 1.0
|
|
8
|
+
MAX_DELAY = 8.0
|
|
9
|
+
|
|
10
|
+
# Retry a block with exponential backoff.
|
|
11
|
+
#
|
|
12
|
+
# The block is the task to attempt. Its return value is captured each attempt.
|
|
13
|
+
#
|
|
14
|
+
# @param max_retries [Integer] Maximum number of retries after the first attempt
|
|
15
|
+
# @param base_delay [Float] Initial delay in seconds (doubles each retry)
|
|
16
|
+
# @param max_delay [Float] Cap on delay between retries
|
|
17
|
+
# @param until [Proc, nil] Optional condition — receives block result, truthy stops retrying.
|
|
18
|
+
# When omitted, the block result's own truthiness decides.
|
|
19
|
+
# @return The last block result (whether retries were exhausted or condition was met)
|
|
20
|
+
#
|
|
21
|
+
# @example Simple: retry until truthy
|
|
22
|
+
# conn = Retry.with_backoff(max_retries: 5) { try_connect }
|
|
23
|
+
#
|
|
24
|
+
# @example With condition: retry until non-empty
|
|
25
|
+
# data = Retry.with_backoff(until: ->(r) { r.any? }) { api.fetch }
|
|
26
|
+
#
|
|
27
|
+
def self.with_backoff(max_retries: MAX_RETRIES, base_delay: BASE_DELAY, max_delay: MAX_DELAY, until: nil, &task)
|
|
28
|
+
check = binding.local_variable_get(:until)
|
|
29
|
+
result = task.call
|
|
30
|
+
retries = 0
|
|
31
|
+
while retries < max_retries && !(check ? check.call(result) : result)
|
|
32
|
+
retries += 1
|
|
33
|
+
delay = [base_delay * (2**(retries - 1)), max_delay].min
|
|
34
|
+
sleep(delay)
|
|
35
|
+
result = task.call
|
|
36
|
+
end
|
|
37
|
+
result
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
data/lib/braintrust/prompt.rb
CHANGED
|
@@ -11,23 +11,28 @@ module Braintrust
|
|
|
11
11
|
# params = prompt.build(text: "Article to summarize...")
|
|
12
12
|
# client.messages.create(**params)
|
|
13
13
|
class Prompt
|
|
14
|
-
attr_reader :id, :name, :slug, :project_id
|
|
14
|
+
attr_reader :id, :name, :slug, :project_id, :version
|
|
15
15
|
|
|
16
16
|
# Load a prompt from Braintrust
|
|
17
17
|
#
|
|
18
|
-
# @param project [String] Project name
|
|
18
|
+
# @param project [String, nil] Project name (provide either project or project_id)
|
|
19
|
+
# @param project_id [String, nil] Project ID (UUID, provide either project or project_id)
|
|
19
20
|
# @param slug [String] Prompt slug
|
|
20
21
|
# @param version [String, nil] Specific version (default: latest)
|
|
21
22
|
# @param defaults [Hash] Default variable values for build()
|
|
22
23
|
# @param api [API, nil] Braintrust API client (default: creates one using global state)
|
|
23
24
|
# @return [Prompt]
|
|
24
|
-
def self.load(
|
|
25
|
+
def self.load(slug:, project: nil, project_id: nil, version: nil, defaults: {}, api: nil)
|
|
26
|
+
raise ArgumentError, "Either project or project_id is required" unless project || project_id
|
|
27
|
+
|
|
25
28
|
api ||= API.new
|
|
26
29
|
|
|
27
30
|
# Find the function by project + slug
|
|
28
|
-
result = api.functions.list(project_name: project, slug: slug)
|
|
31
|
+
result = api.functions.list(project_name: project, project_id: project_id, slug: slug)
|
|
29
32
|
function = result.dig("objects")&.first
|
|
30
|
-
|
|
33
|
+
|
|
34
|
+
identifier = project ? "project '#{project}'" : "project_id '#{project_id}'"
|
|
35
|
+
raise Error, "Prompt '#{slug}' not found in #{identifier}" unless function
|
|
31
36
|
|
|
32
37
|
# Fetch full function data including prompt_data
|
|
33
38
|
full_data = api.functions.get(id: function["id"], version: version)
|
|
@@ -47,6 +52,7 @@ module Braintrust
|
|
|
47
52
|
@name = data["name"]
|
|
48
53
|
@slug = data["slug"]
|
|
49
54
|
@project_id = data["project_id"]
|
|
55
|
+
@version = data["_xact_id"]
|
|
50
56
|
end
|
|
51
57
|
|
|
52
58
|
# Get the raw prompt definition
|
|
@@ -40,7 +40,8 @@ module Braintrust
|
|
|
40
40
|
experiment_name: body["experiment_name"],
|
|
41
41
|
remote_scorer_ids: resolve_remote_scorers(body["scores"]),
|
|
42
42
|
parent: resolve_parent(body["parent"]),
|
|
43
|
-
project_id: body["project_id"]
|
|
43
|
+
project_id: body["project_id"],
|
|
44
|
+
parameters: resolve_parameters(body["parameters"], evaluator)
|
|
44
45
|
}
|
|
45
46
|
end
|
|
46
47
|
|
|
@@ -57,6 +58,7 @@ module Braintrust
|
|
|
57
58
|
remote_scorer_ids = validated[:remote_scorer_ids]
|
|
58
59
|
parent = validated[:parent]
|
|
59
60
|
project_id = validated[:project_id]
|
|
61
|
+
parameters = validated[:parameters]
|
|
60
62
|
|
|
61
63
|
state = build_state(auth)
|
|
62
64
|
|
|
@@ -89,6 +91,7 @@ module Braintrust
|
|
|
89
91
|
}
|
|
90
92
|
run_opts[:parent] = parent if parent
|
|
91
93
|
run_opts[:scorers] = remote_scorer_ids if remote_scorer_ids
|
|
94
|
+
run_opts[:parameters] = parameters if parameters && !parameters.empty?
|
|
92
95
|
run_opts[:dataset] = dataset if dataset
|
|
93
96
|
|
|
94
97
|
if state
|
|
@@ -161,6 +164,15 @@ module Braintrust
|
|
|
161
164
|
@evaluators
|
|
162
165
|
end
|
|
163
166
|
|
|
167
|
+
# Merge request parameters with evaluator's parameter defaults.
|
|
168
|
+
# Request values override defaults. Returns a string-keyed Hash.
|
|
169
|
+
def resolve_parameters(raw_params, evaluator)
|
|
170
|
+
defaults = (evaluator.parameters || {}).to_h { |name, spec|
|
|
171
|
+
[name.to_s, spec.is_a?(Hash) ? (spec[:default] || spec["default"]) : nil]
|
|
172
|
+
}.compact
|
|
173
|
+
defaults.merge(raw_params || {})
|
|
174
|
+
end
|
|
175
|
+
|
|
164
176
|
# Resolve data source from the data field.
|
|
165
177
|
# Returns [cases, dataset] where exactly one is non-nil.
|
|
166
178
|
def resolve_data_source(data)
|
data/lib/braintrust/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: braintrust
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.3.
|
|
4
|
+
version: 0.3.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Braintrust
|
|
@@ -9,6 +9,20 @@ bindir: exe
|
|
|
9
9
|
cert_chain: []
|
|
10
10
|
date: 1980-01-02 00:00:00.000000000 Z
|
|
11
11
|
dependencies:
|
|
12
|
+
- !ruby/object:Gem::Dependency
|
|
13
|
+
name: logger
|
|
14
|
+
requirement: !ruby/object:Gem::Requirement
|
|
15
|
+
requirements:
|
|
16
|
+
- - ">="
|
|
17
|
+
- !ruby/object:Gem::Version
|
|
18
|
+
version: '1.0'
|
|
19
|
+
type: :runtime
|
|
20
|
+
prerelease: false
|
|
21
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
22
|
+
requirements:
|
|
23
|
+
- - ">="
|
|
24
|
+
- !ruby/object:Gem::Version
|
|
25
|
+
version: '1.0'
|
|
12
26
|
- !ruby/object:Gem::Dependency
|
|
13
27
|
name: opentelemetry-sdk
|
|
14
28
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -51,132 +65,6 @@ dependencies:
|
|
|
51
65
|
- - "~>"
|
|
52
66
|
- !ruby/object:Gem::Version
|
|
53
67
|
version: 3.3.1
|
|
54
|
-
- !ruby/object:Gem::Dependency
|
|
55
|
-
name: minitest
|
|
56
|
-
requirement: !ruby/object:Gem::Requirement
|
|
57
|
-
requirements:
|
|
58
|
-
- - "~>"
|
|
59
|
-
- !ruby/object:Gem::Version
|
|
60
|
-
version: '5.0'
|
|
61
|
-
type: :development
|
|
62
|
-
prerelease: false
|
|
63
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
64
|
-
requirements:
|
|
65
|
-
- - "~>"
|
|
66
|
-
- !ruby/object:Gem::Version
|
|
67
|
-
version: '5.0'
|
|
68
|
-
- !ruby/object:Gem::Dependency
|
|
69
|
-
name: rake
|
|
70
|
-
requirement: !ruby/object:Gem::Requirement
|
|
71
|
-
requirements:
|
|
72
|
-
- - "~>"
|
|
73
|
-
- !ruby/object:Gem::Version
|
|
74
|
-
version: '13.0'
|
|
75
|
-
type: :development
|
|
76
|
-
prerelease: false
|
|
77
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
78
|
-
requirements:
|
|
79
|
-
- - "~>"
|
|
80
|
-
- !ruby/object:Gem::Version
|
|
81
|
-
version: '13.0'
|
|
82
|
-
- !ruby/object:Gem::Dependency
|
|
83
|
-
name: standard
|
|
84
|
-
requirement: !ruby/object:Gem::Requirement
|
|
85
|
-
requirements:
|
|
86
|
-
- - "~>"
|
|
87
|
-
- !ruby/object:Gem::Version
|
|
88
|
-
version: '1.0'
|
|
89
|
-
type: :development
|
|
90
|
-
prerelease: false
|
|
91
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
92
|
-
requirements:
|
|
93
|
-
- - "~>"
|
|
94
|
-
- !ruby/object:Gem::Version
|
|
95
|
-
version: '1.0'
|
|
96
|
-
- !ruby/object:Gem::Dependency
|
|
97
|
-
name: simplecov
|
|
98
|
-
requirement: !ruby/object:Gem::Requirement
|
|
99
|
-
requirements:
|
|
100
|
-
- - "~>"
|
|
101
|
-
- !ruby/object:Gem::Version
|
|
102
|
-
version: '0.22'
|
|
103
|
-
type: :development
|
|
104
|
-
prerelease: false
|
|
105
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
106
|
-
requirements:
|
|
107
|
-
- - "~>"
|
|
108
|
-
- !ruby/object:Gem::Version
|
|
109
|
-
version: '0.22'
|
|
110
|
-
- !ruby/object:Gem::Dependency
|
|
111
|
-
name: vcr
|
|
112
|
-
requirement: !ruby/object:Gem::Requirement
|
|
113
|
-
requirements:
|
|
114
|
-
- - "~>"
|
|
115
|
-
- !ruby/object:Gem::Version
|
|
116
|
-
version: '6.0'
|
|
117
|
-
type: :development
|
|
118
|
-
prerelease: false
|
|
119
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
120
|
-
requirements:
|
|
121
|
-
- - "~>"
|
|
122
|
-
- !ruby/object:Gem::Version
|
|
123
|
-
version: '6.0'
|
|
124
|
-
- !ruby/object:Gem::Dependency
|
|
125
|
-
name: webmock
|
|
126
|
-
requirement: !ruby/object:Gem::Requirement
|
|
127
|
-
requirements:
|
|
128
|
-
- - "~>"
|
|
129
|
-
- !ruby/object:Gem::Version
|
|
130
|
-
version: '3.0'
|
|
131
|
-
type: :development
|
|
132
|
-
prerelease: false
|
|
133
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
134
|
-
requirements:
|
|
135
|
-
- - "~>"
|
|
136
|
-
- !ruby/object:Gem::Version
|
|
137
|
-
version: '3.0'
|
|
138
|
-
- !ruby/object:Gem::Dependency
|
|
139
|
-
name: appraisal
|
|
140
|
-
requirement: !ruby/object:Gem::Requirement
|
|
141
|
-
requirements:
|
|
142
|
-
- - "~>"
|
|
143
|
-
- !ruby/object:Gem::Version
|
|
144
|
-
version: '2.5'
|
|
145
|
-
type: :development
|
|
146
|
-
prerelease: false
|
|
147
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
148
|
-
requirements:
|
|
149
|
-
- - "~>"
|
|
150
|
-
- !ruby/object:Gem::Version
|
|
151
|
-
version: '2.5'
|
|
152
|
-
- !ruby/object:Gem::Dependency
|
|
153
|
-
name: yard
|
|
154
|
-
requirement: !ruby/object:Gem::Requirement
|
|
155
|
-
requirements:
|
|
156
|
-
- - "~>"
|
|
157
|
-
- !ruby/object:Gem::Version
|
|
158
|
-
version: '0.9'
|
|
159
|
-
type: :development
|
|
160
|
-
prerelease: false
|
|
161
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
162
|
-
requirements:
|
|
163
|
-
- - "~>"
|
|
164
|
-
- !ruby/object:Gem::Version
|
|
165
|
-
version: '0.9'
|
|
166
|
-
- !ruby/object:Gem::Dependency
|
|
167
|
-
name: kramdown
|
|
168
|
-
requirement: !ruby/object:Gem::Requirement
|
|
169
|
-
requirements:
|
|
170
|
-
- - "~>"
|
|
171
|
-
- !ruby/object:Gem::Version
|
|
172
|
-
version: '2.0'
|
|
173
|
-
type: :development
|
|
174
|
-
prerelease: false
|
|
175
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
176
|
-
requirements:
|
|
177
|
-
- - "~>"
|
|
178
|
-
- !ruby/object:Gem::Version
|
|
179
|
-
version: '2.0'
|
|
180
68
|
description: 'Braintrust Ruby SDK for evals, tracing and more. '
|
|
181
69
|
email:
|
|
182
70
|
- info@braintrust.dev
|
|
@@ -258,6 +146,7 @@ files:
|
|
|
258
146
|
- lib/braintrust/internal/env.rb
|
|
259
147
|
- lib/braintrust/internal/http.rb
|
|
260
148
|
- lib/braintrust/internal/origin.rb
|
|
149
|
+
- lib/braintrust/internal/retry.rb
|
|
261
150
|
- lib/braintrust/internal/template.rb
|
|
262
151
|
- lib/braintrust/internal/thread_pool.rb
|
|
263
152
|
- lib/braintrust/internal/time.rb
|