braintrust 0.0.2 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +91 -4
- data/lib/braintrust/config.rb +21 -4
- data/lib/braintrust/eval.rb +164 -0
- data/lib/braintrust/state.rb +14 -6
- data/lib/braintrust/trace/attachment.rb +138 -0
- data/lib/braintrust/trace/contrib/anthropic.rb +439 -0
- data/lib/braintrust/trace/span_filter.rb +59 -0
- data/lib/braintrust/trace/span_processor.rb +29 -3
- data/lib/braintrust/trace.rb +47 -7
- data/lib/braintrust/version.rb +1 -1
- data/lib/braintrust.rb +8 -2
- metadata +4 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 39d85e02bd85a931ee7f16de103d48d1184048e3ad8d791eda37bc323a653716
|
|
4
|
+
data.tar.gz: a0b1d5493e8ad3004007e78d608154077a33c92a436bce23eb36cfbe94c3bdd4
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: a5dcbd1b2bf2c0ab2355ff36c9cfce4fe10e175c0aa8df80ea3176be4002271744ca3a9fd7ef52cec888e0b326772518554921f7d657a79ba347b26c4c93b80c
|
|
7
|
+
data.tar.gz: 78677bd57e6ed1778f74b87e050dd5bbfdc8390e73f919aa57ea680cd2cd4338086e5df4982274c3fd62e690d34ec81078d7c76e75a467ab2f5b0667e6d530d6
|
data/README.md
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# Braintrust Ruby SDK
|
|
2
2
|
|
|
3
|
-
[](https://rubygems.org/gems/braintrust)
|
|
4
|
+
[](https://gemdocs.org/gems/braintrust/)
|
|
5
5
|

|
|
6
6
|
|
|
7
7
|
## Overview
|
|
@@ -136,10 +136,95 @@ puts "View trace at: #{Braintrust::Trace.permalink(root_span)}"
|
|
|
136
136
|
OpenTelemetry.tracer_provider.shutdown
|
|
137
137
|
```
|
|
138
138
|
|
|
139
|
+
### Anthropic Tracing
|
|
140
|
+
|
|
141
|
+
```ruby
|
|
142
|
+
require "braintrust"
|
|
143
|
+
require "anthropic"
|
|
144
|
+
|
|
145
|
+
Braintrust.init
|
|
146
|
+
|
|
147
|
+
client = Anthropic::Client.new(api_key: ENV["ANTHROPIC_API_KEY"])
|
|
148
|
+
|
|
149
|
+
Braintrust::Trace::Anthropic.wrap(client)
|
|
150
|
+
|
|
151
|
+
tracer = OpenTelemetry.tracer_provider.tracer("anthropic-app")
|
|
152
|
+
root_span = nil
|
|
153
|
+
|
|
154
|
+
message = tracer.in_span("chat-message") do |span|
|
|
155
|
+
root_span = span
|
|
156
|
+
|
|
157
|
+
client.messages.create(
|
|
158
|
+
model: "claude-3-5-sonnet-20241022",
|
|
159
|
+
max_tokens: 100,
|
|
160
|
+
system: "You are a helpful assistant.",
|
|
161
|
+
messages: [
|
|
162
|
+
{role: "user", content: "Say hello!"}
|
|
163
|
+
]
|
|
164
|
+
)
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
puts "Response: #{message.content[0].text}"
|
|
168
|
+
|
|
169
|
+
puts "View trace at: #{Braintrust::Trace.permalink(root_span)}"
|
|
170
|
+
|
|
171
|
+
OpenTelemetry.tracer_provider.shutdown
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
### Attachments
|
|
175
|
+
|
|
176
|
+
Attachments allow you to log binary data (images, PDFs, audio, etc.) as part of your traces. This is particularly useful for multimodal AI applications like vision models.
|
|
177
|
+
|
|
178
|
+
```ruby
|
|
179
|
+
require "braintrust"
|
|
180
|
+
require "braintrust/trace/attachment"
|
|
181
|
+
|
|
182
|
+
Braintrust.init
|
|
183
|
+
|
|
184
|
+
tracer = OpenTelemetry.tracer_provider.tracer("vision-app")
|
|
185
|
+
|
|
186
|
+
tracer.in_span("analyze-image") do |span|
|
|
187
|
+
# Create attachment from file
|
|
188
|
+
att = Braintrust::Trace::Attachment.from_file(
|
|
189
|
+
Braintrust::Trace::Attachment::IMAGE_PNG,
|
|
190
|
+
"./photo.png"
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
# Build message with attachment (OpenAI/Anthropic format)
|
|
194
|
+
messages = [
|
|
195
|
+
{
|
|
196
|
+
role: "user",
|
|
197
|
+
content: [
|
|
198
|
+
{type: "text", text: "What's in this image?"},
|
|
199
|
+
att.to_h # Converts to {"type" => "base64_attachment", "content" => "data:..."}
|
|
200
|
+
]
|
|
201
|
+
}
|
|
202
|
+
]
|
|
203
|
+
|
|
204
|
+
# Log to trace
|
|
205
|
+
span.set_attribute("braintrust.input_json", JSON.generate(messages))
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
OpenTelemetry.tracer_provider.shutdown
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
You can create attachments from bytes, files, or URLs:
|
|
212
|
+
|
|
213
|
+
```ruby
|
|
214
|
+
# From bytes
|
|
215
|
+
att = Braintrust::Trace::Attachment.from_bytes("image/jpeg", image_data)
|
|
216
|
+
|
|
217
|
+
# From file
|
|
218
|
+
att = Braintrust::Trace::Attachment.from_file("application/pdf", "./doc.pdf")
|
|
219
|
+
|
|
220
|
+
# From URL
|
|
221
|
+
att = Braintrust::Trace::Attachment.from_url("https://example.com/image.png")
|
|
222
|
+
```
|
|
223
|
+
|
|
139
224
|
## Features
|
|
140
225
|
|
|
141
226
|
- **Evaluations**: Run systematic evaluations of your AI systems with custom scoring functions
|
|
142
|
-
- **Tracing**: Automatic instrumentation for OpenAI API calls with OpenTelemetry
|
|
227
|
+
- **Tracing**: Automatic instrumentation for OpenAI and Anthropic API calls with OpenTelemetry
|
|
143
228
|
- **Datasets**: Manage and version your evaluation datasets
|
|
144
229
|
- **Experiments**: Track different versions and configurations of your AI systems
|
|
145
230
|
- **Observability**: Monitor your AI applications in production
|
|
@@ -151,13 +236,15 @@ Check out the [`examples/`](./examples/) directory for complete working examples
|
|
|
151
236
|
- [eval.rb](./examples/eval.rb) - Create and run evaluations with custom test cases and scoring functions
|
|
152
237
|
- [trace.rb](./examples/trace.rb) - Manual span creation and tracing
|
|
153
238
|
- [openai.rb](./examples/openai.rb) - Automatically trace OpenAI API calls
|
|
239
|
+
- [anthropic.rb](./examples/anthropic.rb) - Automatically trace Anthropic API calls
|
|
240
|
+
- [trace/trace_attachments.rb](./examples/trace/trace_attachments.rb) - Log attachments (images, PDFs) in traces
|
|
154
241
|
- [eval/dataset.rb](./examples/eval/dataset.rb) - Run evaluations using datasets stored in Braintrust
|
|
155
242
|
- [eval/remote_functions.rb](./examples/eval/remote_functions.rb) - Use remote scoring functions
|
|
156
243
|
|
|
157
244
|
## Documentation
|
|
158
245
|
|
|
159
246
|
- [Braintrust Documentation](https://www.braintrust.dev/docs)
|
|
160
|
-
- [API Documentation](https://
|
|
247
|
+
- [API Documentation](https://gemdocs.org/gems/braintrust/)
|
|
161
248
|
|
|
162
249
|
## Contributing
|
|
163
250
|
|
data/lib/braintrust/config.rb
CHANGED
|
@@ -4,14 +4,18 @@ module Braintrust
|
|
|
4
4
|
# Configuration object that reads from environment variables
|
|
5
5
|
# and allows overriding with explicit options
|
|
6
6
|
class Config
|
|
7
|
-
attr_reader :api_key, :org_name, :default_project, :app_url, :api_url
|
|
7
|
+
attr_reader :api_key, :org_name, :default_project, :app_url, :api_url,
|
|
8
|
+
:filter_ai_spans, :span_filter_funcs
|
|
8
9
|
|
|
9
|
-
def initialize(api_key: nil, org_name: nil, default_project: nil, app_url: nil, api_url: nil
|
|
10
|
+
def initialize(api_key: nil, org_name: nil, default_project: nil, app_url: nil, api_url: nil,
|
|
11
|
+
filter_ai_spans: nil, span_filter_funcs: nil)
|
|
10
12
|
@api_key = api_key
|
|
11
13
|
@org_name = org_name
|
|
12
14
|
@default_project = default_project
|
|
13
15
|
@app_url = app_url
|
|
14
16
|
@api_url = api_url
|
|
17
|
+
@filter_ai_spans = filter_ai_spans
|
|
18
|
+
@span_filter_funcs = span_filter_funcs || []
|
|
15
19
|
end
|
|
16
20
|
|
|
17
21
|
# Create a Config from environment variables, with option overrides
|
|
@@ -21,14 +25,27 @@ module Braintrust
|
|
|
21
25
|
# @param default_project [String, nil] Default project (overrides BRAINTRUST_DEFAULT_PROJECT env var)
|
|
22
26
|
# @param app_url [String, nil] App URL (overrides BRAINTRUST_APP_URL env var)
|
|
23
27
|
# @param api_url [String, nil] API URL (overrides BRAINTRUST_API_URL env var)
|
|
28
|
+
# @param filter_ai_spans [Boolean, nil] Enable AI span filtering (overrides BRAINTRUST_OTEL_FILTER_AI_SPANS env var)
|
|
29
|
+
# @param span_filter_funcs [Array<Proc>, nil] Custom span filter functions
|
|
24
30
|
# @return [Config] the created config
|
|
25
|
-
def self.from_env(api_key: nil, org_name: nil, default_project: nil, app_url: nil, api_url: nil
|
|
31
|
+
def self.from_env(api_key: nil, org_name: nil, default_project: nil, app_url: nil, api_url: nil,
|
|
32
|
+
filter_ai_spans: nil, span_filter_funcs: nil)
|
|
33
|
+
# Parse filter_ai_spans from ENV if not explicitly provided
|
|
34
|
+
env_filter_ai_spans = ENV["BRAINTRUST_OTEL_FILTER_AI_SPANS"]
|
|
35
|
+
filter_ai_spans_value = if filter_ai_spans.nil?
|
|
36
|
+
env_filter_ai_spans&.downcase == "true"
|
|
37
|
+
else
|
|
38
|
+
filter_ai_spans
|
|
39
|
+
end
|
|
40
|
+
|
|
26
41
|
new(
|
|
27
42
|
api_key: api_key || ENV["BRAINTRUST_API_KEY"],
|
|
28
43
|
org_name: org_name || ENV["BRAINTRUST_ORG_NAME"],
|
|
29
44
|
default_project: default_project || ENV["BRAINTRUST_DEFAULT_PROJECT"],
|
|
30
45
|
app_url: app_url || ENV["BRAINTRUST_APP_URL"] || "https://www.braintrust.dev",
|
|
31
|
-
api_url: api_url || ENV["BRAINTRUST_API_URL"] || "https://api.braintrust.dev"
|
|
46
|
+
api_url: api_url || ENV["BRAINTRUST_API_URL"] || "https://api.braintrust.dev",
|
|
47
|
+
filter_ai_spans: filter_ai_spans_value,
|
|
48
|
+
span_filter_funcs: span_filter_funcs
|
|
32
49
|
)
|
|
33
50
|
end
|
|
34
51
|
end
|
data/lib/braintrust/eval.rb
CHANGED
|
@@ -9,6 +9,170 @@ require "opentelemetry/sdk"
|
|
|
9
9
|
require "json"
|
|
10
10
|
|
|
11
11
|
module Braintrust
|
|
12
|
+
# Evaluation framework for testing AI systems with custom test cases and scoring functions.
|
|
13
|
+
#
|
|
14
|
+
# The Eval module provides tools for running systematic evaluations of your AI systems. An
|
|
15
|
+
# evaluation consists of:
|
|
16
|
+
# - **Cases**: Test inputs with optional expected outputs
|
|
17
|
+
# - **Task**: The code/model being evaluated
|
|
18
|
+
# - **Scorers**: Functions that judge the quality of outputs
|
|
19
|
+
#
|
|
20
|
+
# @example Basic evaluation with inline cases
|
|
21
|
+
# require "braintrust"
|
|
22
|
+
#
|
|
23
|
+
# Braintrust.init
|
|
24
|
+
#
|
|
25
|
+
# # Define a simple task (the code being evaluated)
|
|
26
|
+
# task = ->(input) { input.include?("a") ? "fruit" : "vegetable" }
|
|
27
|
+
#
|
|
28
|
+
# # Run evaluation with inline cases
|
|
29
|
+
# Braintrust::Eval.run(
|
|
30
|
+
# project: "my-project",
|
|
31
|
+
# experiment: "food-classifier",
|
|
32
|
+
# cases: [
|
|
33
|
+
# {input: "apple", expected: "fruit"},
|
|
34
|
+
# {input: "carrot", expected: "vegetable"},
|
|
35
|
+
# {input: "banana", expected: "fruit"}
|
|
36
|
+
# ],
|
|
37
|
+
# task: task,
|
|
38
|
+
# scorers: [
|
|
39
|
+
# # Named scorer with Eval.scorer
|
|
40
|
+
# Braintrust::Eval.scorer("exact_match") do |input, expected, output|
|
|
41
|
+
# output == expected ? 1.0 : 0.0
|
|
42
|
+
# end
|
|
43
|
+
# ]
|
|
44
|
+
# )
|
|
45
|
+
#
|
|
46
|
+
# @example Different ways to define scorers (recommended patterns)
|
|
47
|
+
# # Method reference (auto-uses method name as scorer name)
|
|
48
|
+
# def exact_match(input, expected, output)
|
|
49
|
+
# output == expected ? 1.0 : 0.0
|
|
50
|
+
# end
|
|
51
|
+
#
|
|
52
|
+
# # Named scorer with Eval.scorer
|
|
53
|
+
# case_insensitive = Braintrust::Eval.scorer("case_insensitive") do |input, expected, output|
|
|
54
|
+
# output.downcase == expected.downcase ? 1.0 : 0.0
|
|
55
|
+
# end
|
|
56
|
+
#
|
|
57
|
+
# # Callable class with name method
|
|
58
|
+
# class FuzzyMatch
|
|
59
|
+
# def name
|
|
60
|
+
# "fuzzy_match"
|
|
61
|
+
# end
|
|
62
|
+
#
|
|
63
|
+
# def call(input, expected, output, metadata = {})
|
|
64
|
+
# threshold = metadata[:threshold] || 0.8
|
|
65
|
+
# # scoring logic here
|
|
66
|
+
# 1.0
|
|
67
|
+
# end
|
|
68
|
+
# end
|
|
69
|
+
#
|
|
70
|
+
# # Anonymous lambda that returns named score object
|
|
71
|
+
# multi_score = ->(input, expected, output) {
|
|
72
|
+
# [
|
|
73
|
+
# {name: "exact_match", score: output == expected ? 1.0 : 0.0},
|
|
74
|
+
# {name: "length_match", score: output.length == expected.length ? 1.0 : 0.0}
|
|
75
|
+
# ]
|
|
76
|
+
# }
|
|
77
|
+
#
|
|
78
|
+
# # All can be used together
|
|
79
|
+
# Braintrust::Eval.run(
|
|
80
|
+
# project: "my-project",
|
|
81
|
+
# experiment: "scorer-examples",
|
|
82
|
+
# cases: [{input: "test", expected: "test"}],
|
|
83
|
+
# task: ->(input) { input },
|
|
84
|
+
# scorers: [method(:exact_match), case_insensitive, FuzzyMatch.new, multi_score]
|
|
85
|
+
# )
|
|
86
|
+
#
|
|
87
|
+
# @example Different ways to define tasks
|
|
88
|
+
# # Lambda
|
|
89
|
+
# task_lambda = ->(input) { "result" }
|
|
90
|
+
#
|
|
91
|
+
# # Proc
|
|
92
|
+
# task_proc = proc { |input| "result" }
|
|
93
|
+
#
|
|
94
|
+
# # Method reference
|
|
95
|
+
# def my_task(input)
|
|
96
|
+
# "result"
|
|
97
|
+
# end
|
|
98
|
+
# task_method = method(:my_task)
|
|
99
|
+
#
|
|
100
|
+
# # Callable class
|
|
101
|
+
# class MyTask
|
|
102
|
+
# def call(input)
|
|
103
|
+
# "result"
|
|
104
|
+
# end
|
|
105
|
+
# end
|
|
106
|
+
# task_class = MyTask.new
|
|
107
|
+
#
|
|
108
|
+
# # All of these can be used as the task parameter
|
|
109
|
+
# Braintrust::Eval.run(
|
|
110
|
+
# project: "my-project",
|
|
111
|
+
# experiment: "task-examples",
|
|
112
|
+
# cases: [{input: "test"}],
|
|
113
|
+
# task: task_lambda, # or task_proc, task_method, task_class
|
|
114
|
+
# scorers: [
|
|
115
|
+
# Braintrust::Eval.scorer("my_scorer") { |input, expected, output| 1.0 }
|
|
116
|
+
# ]
|
|
117
|
+
# )
|
|
118
|
+
#
|
|
119
|
+
# @example Using datasets instead of inline cases
|
|
120
|
+
# # Fetch cases from a dataset stored in Braintrust
|
|
121
|
+
# Braintrust::Eval.run(
|
|
122
|
+
# project: "my-project",
|
|
123
|
+
# experiment: "with-dataset",
|
|
124
|
+
# dataset: "my-dataset-name", # fetches from same project
|
|
125
|
+
# task: ->(input) { "result" },
|
|
126
|
+
# scorers: [
|
|
127
|
+
# Braintrust::Eval.scorer("my_scorer") { |input, expected, output| 1.0 }
|
|
128
|
+
# ]
|
|
129
|
+
# )
|
|
130
|
+
#
|
|
131
|
+
# # Or with more options
|
|
132
|
+
# Braintrust::Eval.run(
|
|
133
|
+
# project: "my-project",
|
|
134
|
+
# experiment: "with-dataset-options",
|
|
135
|
+
# dataset: {
|
|
136
|
+
# name: "my-dataset",
|
|
137
|
+
# project: "other-project",
|
|
138
|
+
# version: "1.0",
|
|
139
|
+
# limit: 100
|
|
140
|
+
# },
|
|
141
|
+
# task: ->(input) { "result" },
|
|
142
|
+
# scorers: [
|
|
143
|
+
# Braintrust::Eval.scorer("my_scorer") { |input, expected, output| 1.0 }
|
|
144
|
+
# ]
|
|
145
|
+
# )
|
|
146
|
+
#
|
|
147
|
+
# @example Using metadata and tags
|
|
148
|
+
# Braintrust::Eval.run(
|
|
149
|
+
# project: "my-project",
|
|
150
|
+
# experiment: "with-metadata",
|
|
151
|
+
# cases: [
|
|
152
|
+
# {
|
|
153
|
+
# input: "apple",
|
|
154
|
+
# expected: "fruit",
|
|
155
|
+
# tags: ["tropical", "sweet"],
|
|
156
|
+
# metadata: {threshold: 0.9, category: "produce"}
|
|
157
|
+
# }
|
|
158
|
+
# ],
|
|
159
|
+
# task: ->(input) { "fruit" },
|
|
160
|
+
# scorers: [
|
|
161
|
+
# # Scorer can access case metadata
|
|
162
|
+
# Braintrust::Eval.scorer("threshold_match") do |input, expected, output, metadata|
|
|
163
|
+
# threshold = metadata[:threshold] || 0.5
|
|
164
|
+
# # scoring logic using threshold
|
|
165
|
+
# 1.0
|
|
166
|
+
# end
|
|
167
|
+
# ],
|
|
168
|
+
# # Experiment-level tags and metadata
|
|
169
|
+
# tags: ["v1", "production"],
|
|
170
|
+
# metadata: {
|
|
171
|
+
# model: "gpt-4",
|
|
172
|
+
# temperature: 0.7,
|
|
173
|
+
# version: "1.0.0"
|
|
174
|
+
# }
|
|
175
|
+
# )
|
|
12
176
|
module Eval
|
|
13
177
|
class << self
|
|
14
178
|
# Create a scorer with a name and callable
|
data/lib/braintrust/state.rb
CHANGED
|
@@ -6,7 +6,7 @@ module Braintrust
|
|
|
6
6
|
# State object that holds Braintrust configuration
|
|
7
7
|
# Thread-safe global state management
|
|
8
8
|
class State
|
|
9
|
-
attr_reader :api_key, :org_name, :org_id, :default_project, :app_url, :api_url, :proxy_url, :logged_in
|
|
9
|
+
attr_reader :api_key, :org_name, :org_id, :default_project, :app_url, :api_url, :proxy_url, :logged_in, :config
|
|
10
10
|
|
|
11
11
|
@mutex = Mutex.new
|
|
12
12
|
@global_state = nil
|
|
@@ -20,15 +20,20 @@ module Braintrust
|
|
|
20
20
|
# @param blocking_login [Boolean] whether to block and login synchronously (default: false)
|
|
21
21
|
# @param enable_tracing [Boolean] whether to enable OpenTelemetry tracing (default: true)
|
|
22
22
|
# @param tracer_provider [TracerProvider, nil] Optional tracer provider to use
|
|
23
|
+
# @param filter_ai_spans [Boolean, nil] Enable AI span filtering
|
|
24
|
+
# @param span_filter_funcs [Array<Proc>, nil] Custom span filter functions
|
|
25
|
+
# @param exporter [Exporter, nil] Optional exporter override (for testing)
|
|
23
26
|
# @return [State] the created state
|
|
24
|
-
def self.from_env(api_key: nil, org_name: nil, default_project: nil, app_url: nil, api_url: nil, blocking_login: false, enable_tracing: true, tracer_provider: nil)
|
|
27
|
+
def self.from_env(api_key: nil, org_name: nil, default_project: nil, app_url: nil, api_url: nil, blocking_login: false, enable_tracing: true, tracer_provider: nil, filter_ai_spans: nil, span_filter_funcs: nil, exporter: nil)
|
|
25
28
|
require_relative "config"
|
|
26
29
|
config = Config.from_env(
|
|
27
30
|
api_key: api_key,
|
|
28
31
|
org_name: org_name,
|
|
29
32
|
default_project: default_project,
|
|
30
33
|
app_url: app_url,
|
|
31
|
-
api_url: api_url
|
|
34
|
+
api_url: api_url,
|
|
35
|
+
filter_ai_spans: filter_ai_spans,
|
|
36
|
+
span_filter_funcs: span_filter_funcs
|
|
32
37
|
)
|
|
33
38
|
new(
|
|
34
39
|
api_key: config.api_key,
|
|
@@ -38,11 +43,13 @@ module Braintrust
|
|
|
38
43
|
api_url: config.api_url,
|
|
39
44
|
blocking_login: blocking_login,
|
|
40
45
|
enable_tracing: enable_tracing,
|
|
41
|
-
tracer_provider: tracer_provider
|
|
46
|
+
tracer_provider: tracer_provider,
|
|
47
|
+
config: config,
|
|
48
|
+
exporter: exporter
|
|
42
49
|
)
|
|
43
50
|
end
|
|
44
51
|
|
|
45
|
-
def initialize(api_key: nil, org_name: nil, org_id: nil, default_project: nil, app_url: nil, api_url: nil, proxy_url: nil, blocking_login: false, enable_tracing: true, tracer_provider: nil)
|
|
52
|
+
def initialize(api_key: nil, org_name: nil, org_id: nil, default_project: nil, app_url: nil, api_url: nil, proxy_url: nil, blocking_login: false, enable_tracing: true, tracer_provider: nil, config: nil, exporter: nil)
|
|
46
53
|
# Instance-level mutex for thread-safe login
|
|
47
54
|
@login_mutex = Mutex.new
|
|
48
55
|
raise ArgumentError, "api_key is required" if api_key.nil? || api_key.empty?
|
|
@@ -55,6 +62,7 @@ module Braintrust
|
|
|
55
62
|
@api_url = api_url
|
|
56
63
|
@proxy_url = proxy_url
|
|
57
64
|
@logged_in = false
|
|
65
|
+
@config = config
|
|
58
66
|
|
|
59
67
|
# Perform login after state setup
|
|
60
68
|
if blocking_login
|
|
@@ -66,7 +74,7 @@ module Braintrust
|
|
|
66
74
|
# Setup tracing if requested
|
|
67
75
|
if enable_tracing
|
|
68
76
|
require_relative "trace"
|
|
69
|
-
Trace.setup(self, tracer_provider)
|
|
77
|
+
Trace.setup(self, tracer_provider, exporter: exporter)
|
|
70
78
|
end
|
|
71
79
|
end
|
|
72
80
|
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "base64"
|
|
4
|
+
require "net/http"
|
|
5
|
+
require "uri"
|
|
6
|
+
|
|
7
|
+
module Braintrust
|
|
8
|
+
module Trace
|
|
9
|
+
# Attachment represents binary data (images, audio, PDFs, etc.) that can be logged
|
|
10
|
+
# as part of traces in Braintrust. Attachments are stored securely and can be viewed
|
|
11
|
+
# in the Braintrust UI.
|
|
12
|
+
#
|
|
13
|
+
# Attachments are particularly useful for multimodal AI applications, such as vision
|
|
14
|
+
# models that process images.
|
|
15
|
+
#
|
|
16
|
+
# @example Create attachment from file
|
|
17
|
+
# att = Braintrust::Trace::Attachment.from_file("image/png", "./photo.png")
|
|
18
|
+
# data_url = att.to_data_url
|
|
19
|
+
# # => "data:image/png;base64,iVBORw0KGgo..."
|
|
20
|
+
#
|
|
21
|
+
# @example Create attachment from bytes
|
|
22
|
+
# att = Braintrust::Trace::Attachment.from_bytes("image/jpeg", image_bytes)
|
|
23
|
+
# message = att.to_message
|
|
24
|
+
# # => {"type" => "base64_attachment", "content" => "data:image/jpeg;base64,..."}
|
|
25
|
+
#
|
|
26
|
+
# @example Use in a trace span
|
|
27
|
+
# att = Braintrust::Trace::Attachment.from_file("image/png", "./photo.png")
|
|
28
|
+
# messages = [
|
|
29
|
+
# {
|
|
30
|
+
# role: "user",
|
|
31
|
+
# content: [
|
|
32
|
+
# {type: "text", text: "What's in this image?"},
|
|
33
|
+
# att.to_h # Converts to {"type" => "base64_attachment", "content" => "..."}
|
|
34
|
+
# ]
|
|
35
|
+
# }
|
|
36
|
+
# ]
|
|
37
|
+
# span.set_attribute("braintrust.input_json", JSON.generate(messages))
|
|
38
|
+
class Attachment
|
|
39
|
+
# Common MIME type constants for convenience
|
|
40
|
+
IMAGE_PNG = "image/png"
|
|
41
|
+
IMAGE_JPEG = "image/jpeg"
|
|
42
|
+
IMAGE_JPG = "image/jpg"
|
|
43
|
+
IMAGE_GIF = "image/gif"
|
|
44
|
+
IMAGE_WEBP = "image/webp"
|
|
45
|
+
TEXT_PLAIN = "text/plain"
|
|
46
|
+
APPLICATION_PDF = "application/pdf"
|
|
47
|
+
|
|
48
|
+
# @!visibility private
|
|
49
|
+
def initialize(content_type, data)
|
|
50
|
+
@content_type = content_type
|
|
51
|
+
@data = data
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# Creates an attachment from raw bytes.
|
|
55
|
+
#
|
|
56
|
+
# @param content_type [String] MIME type of the data (e.g., "image/png")
|
|
57
|
+
# @param data [String] Binary data as a string
|
|
58
|
+
# @return [Attachment] New attachment instance
|
|
59
|
+
#
|
|
60
|
+
# @example
|
|
61
|
+
# image_data = File.binread("photo.png")
|
|
62
|
+
# att = Braintrust::Trace::Attachment.from_bytes("image/png", image_data)
|
|
63
|
+
def self.from_bytes(content_type, data)
|
|
64
|
+
new(content_type, data)
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# Creates an attachment by reading from a file.
|
|
68
|
+
#
|
|
69
|
+
# @param content_type [String] MIME type of the file (e.g., "image/png")
|
|
70
|
+
# @param path [String] Path to the file to read
|
|
71
|
+
# @return [Attachment] New attachment instance
|
|
72
|
+
# @raise [Errno::ENOENT] If the file does not exist
|
|
73
|
+
#
|
|
74
|
+
# @example
|
|
75
|
+
# att = Braintrust::Trace::Attachment.from_file("image/png", "./photo.png")
|
|
76
|
+
def self.from_file(content_type, path)
|
|
77
|
+
data = File.binread(path)
|
|
78
|
+
new(content_type, data)
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# Creates an attachment by fetching data from a URL.
|
|
82
|
+
#
|
|
83
|
+
# The content type is inferred from the Content-Type header in the HTTP response.
|
|
84
|
+
# If the header is not present, it falls back to "application/octet-stream".
|
|
85
|
+
#
|
|
86
|
+
# @param url [String] URL to fetch
|
|
87
|
+
# @return [Attachment] New attachment instance
|
|
88
|
+
# @raise [StandardError] If the HTTP request fails
|
|
89
|
+
#
|
|
90
|
+
# @example
|
|
91
|
+
# att = Braintrust::Trace::Attachment.from_url("https://example.com/image.png")
|
|
92
|
+
def self.from_url(url)
|
|
93
|
+
uri = URI.parse(url)
|
|
94
|
+
response = Net::HTTP.get_response(uri)
|
|
95
|
+
|
|
96
|
+
unless response.is_a?(Net::HTTPSuccess)
|
|
97
|
+
raise StandardError, "Failed to fetch URL: #{response.code} #{response.message}"
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
content_type = response.content_type || "application/octet-stream"
|
|
101
|
+
new(content_type, response.body)
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
# Converts the attachment to a data URL format.
|
|
105
|
+
#
|
|
106
|
+
# @return [String] Data URL in the format "data:<content-type>;base64,<encoded-data>"
|
|
107
|
+
#
|
|
108
|
+
# @example
|
|
109
|
+
# att = Braintrust::Trace::Attachment.from_bytes("image/png", image_data)
|
|
110
|
+
# att.to_data_url
|
|
111
|
+
# # => "data:image/png;base64,iVBORw0KGgo..."
|
|
112
|
+
def to_data_url
|
|
113
|
+
encoded = Base64.strict_encode64(@data)
|
|
114
|
+
"data:#{@content_type};base64,#{encoded}"
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
# Converts the attachment to a message format suitable for LLM APIs.
|
|
118
|
+
#
|
|
119
|
+
# @return [Hash] Message hash with "type" and "content" keys
|
|
120
|
+
#
|
|
121
|
+
# @example
|
|
122
|
+
# att = Braintrust::Trace::Attachment.from_bytes("image/png", image_data)
|
|
123
|
+
# att.to_message
|
|
124
|
+
# # => {"type" => "base64_attachment", "content" => "data:image/png;base64,..."}
|
|
125
|
+
def to_message
|
|
126
|
+
{
|
|
127
|
+
"type" => "base64_attachment",
|
|
128
|
+
"content" => to_data_url
|
|
129
|
+
}
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
# Alias for {#to_message}. Converts the attachment to a hash representation.
|
|
133
|
+
#
|
|
134
|
+
# @return [Hash] Same as {#to_message}
|
|
135
|
+
alias_method :to_h, :to_message
|
|
136
|
+
end
|
|
137
|
+
end
|
|
138
|
+
end
|
|
@@ -0,0 +1,439 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "opentelemetry/sdk"
|
|
4
|
+
require "json"
|
|
5
|
+
|
|
6
|
+
module Braintrust
|
|
7
|
+
module Trace
|
|
8
|
+
module Anthropic
|
|
9
|
+
# Helper to safely set a JSON attribute on a span
|
|
10
|
+
# Only sets the attribute if obj is present
|
|
11
|
+
# @param span [OpenTelemetry::Trace::Span] the span to set attribute on
|
|
12
|
+
# @param attr_name [String] the attribute name (e.g., "braintrust.output_json")
|
|
13
|
+
# @param obj [Object] the object to serialize to JSON
|
|
14
|
+
# @return [void]
|
|
15
|
+
def self.set_json_attr(span, attr_name, obj)
|
|
16
|
+
return unless obj
|
|
17
|
+
span.set_attribute(attr_name, JSON.generate(obj))
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
# Parse usage tokens from Anthropic API response, handling cache tokens
|
|
21
|
+
# Maps Anthropic field names to Braintrust standard names:
|
|
22
|
+
# - input_tokens → contributes to prompt_tokens
|
|
23
|
+
# - cache_creation_input_tokens → prompt_cache_creation_tokens (and adds to prompt_tokens)
|
|
24
|
+
# - cache_read_input_tokens → prompt_cached_tokens (and adds to prompt_tokens)
|
|
25
|
+
# - output_tokens → completion_tokens
|
|
26
|
+
# - total_tokens → tokens (or calculated if missing)
|
|
27
|
+
#
|
|
28
|
+
# @param usage [Hash, Object] usage object from Anthropic response
|
|
29
|
+
# @return [Hash<String, Integer>] metrics hash with normalized names
|
|
30
|
+
def self.parse_usage_tokens(usage)
|
|
31
|
+
metrics = {}
|
|
32
|
+
return metrics unless usage
|
|
33
|
+
|
|
34
|
+
# Convert to hash if it's an object
|
|
35
|
+
usage_hash = usage.respond_to?(:to_h) ? usage.to_h : usage
|
|
36
|
+
|
|
37
|
+
# Extract base values for calculation
|
|
38
|
+
input_tokens = 0
|
|
39
|
+
cache_creation_tokens = 0
|
|
40
|
+
cache_read_tokens = 0
|
|
41
|
+
|
|
42
|
+
usage_hash.each do |key, value|
|
|
43
|
+
next unless value.is_a?(Numeric)
|
|
44
|
+
key_str = key.to_s
|
|
45
|
+
|
|
46
|
+
case key_str
|
|
47
|
+
when "input_tokens"
|
|
48
|
+
input_tokens = value.to_i
|
|
49
|
+
when "cache_creation_input_tokens"
|
|
50
|
+
cache_creation_tokens = value.to_i
|
|
51
|
+
metrics["prompt_cache_creation_tokens"] = value.to_i
|
|
52
|
+
when "cache_read_input_tokens"
|
|
53
|
+
cache_read_tokens = value.to_i
|
|
54
|
+
metrics["prompt_cached_tokens"] = value.to_i
|
|
55
|
+
when "output_tokens"
|
|
56
|
+
metrics["completion_tokens"] = value.to_i
|
|
57
|
+
when "total_tokens"
|
|
58
|
+
metrics["tokens"] = value.to_i
|
|
59
|
+
else
|
|
60
|
+
# Keep other numeric fields as-is (future-proofing)
|
|
61
|
+
metrics[key_str] = value.to_i
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# Calculate total prompt tokens (input + cache creation + cache read)
|
|
66
|
+
total_prompt_tokens = input_tokens + cache_creation_tokens + cache_read_tokens
|
|
67
|
+
metrics["prompt_tokens"] = total_prompt_tokens
|
|
68
|
+
|
|
69
|
+
# Calculate total tokens if not provided by Anthropic
|
|
70
|
+
if !metrics.key?("tokens") && metrics.key?("completion_tokens")
|
|
71
|
+
metrics["tokens"] = total_prompt_tokens + metrics["completion_tokens"]
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
metrics
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# Wrap an Anthropic::Client to automatically create spans for messages and responses
|
|
78
|
+
# Supports both synchronous and streaming requests
|
|
79
|
+
# @param client [Anthropic::Client] the Anthropic client to wrap
|
|
80
|
+
# @param tracer_provider [OpenTelemetry::SDK::Trace::TracerProvider] the tracer provider (defaults to global)
|
|
81
|
+
def self.wrap(client, tracer_provider: nil)
|
|
82
|
+
tracer_provider ||= ::OpenTelemetry.tracer_provider
|
|
83
|
+
|
|
84
|
+
# Wrap messages.create
|
|
85
|
+
wrap_messages_create(client, tracer_provider)
|
|
86
|
+
|
|
87
|
+
# Wrap messages.stream (Anthropic SDK always has this method)
|
|
88
|
+
wrap_messages_stream(client, tracer_provider)
|
|
89
|
+
|
|
90
|
+
client
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# Wrap messages.create API
|
|
94
|
+
# @param client [Anthropic::Client] the Anthropic client
|
|
95
|
+
# @param tracer_provider [OpenTelemetry::SDK::Trace::TracerProvider] the tracer provider
|
|
96
|
+
def self.wrap_messages_create(client, tracer_provider)
|
|
97
|
+
# Create a wrapper module that intercepts messages.create
|
|
98
|
+
wrapper = Module.new do
|
|
99
|
+
define_method(:create) do |**params|
|
|
100
|
+
tracer = tracer_provider.tracer("braintrust")
|
|
101
|
+
|
|
102
|
+
tracer.in_span("anthropic.messages.create") do |span|
|
|
103
|
+
# Initialize metadata hash
|
|
104
|
+
metadata = {
|
|
105
|
+
"provider" => "anthropic",
|
|
106
|
+
"endpoint" => "/v1/messages"
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
# Capture request metadata fields
|
|
110
|
+
metadata_fields = %i[
|
|
111
|
+
model max_tokens temperature top_p top_k stop_sequences
|
|
112
|
+
stream tools tool_choice thinking metadata service_tier
|
|
113
|
+
]
|
|
114
|
+
|
|
115
|
+
metadata_fields.each do |field|
|
|
116
|
+
metadata[field.to_s] = params[field] if params.key?(field)
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
# Build input messages array, prepending system prompt if present
|
|
120
|
+
input_messages = []
|
|
121
|
+
|
|
122
|
+
# Prepend system prompt as a message if present
|
|
123
|
+
if params[:system]
|
|
124
|
+
# System can be a string or array of text blocks
|
|
125
|
+
system_content = params[:system]
|
|
126
|
+
if system_content.is_a?(Array)
|
|
127
|
+
# Extract text from array of text blocks
|
|
128
|
+
system_text = system_content.map { |block|
|
|
129
|
+
block.is_a?(Hash) ? block[:text] : block
|
|
130
|
+
}.join("\n")
|
|
131
|
+
input_messages << {role: "system", content: system_text}
|
|
132
|
+
else
|
|
133
|
+
input_messages << {role: "system", content: system_content}
|
|
134
|
+
end
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
# Add user/assistant messages
|
|
138
|
+
if params[:messages]
|
|
139
|
+
messages_array = params[:messages].map(&:to_h)
|
|
140
|
+
input_messages.concat(messages_array)
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
# Set input messages as JSON
|
|
144
|
+
if input_messages.any?
|
|
145
|
+
span.set_attribute("braintrust.input_json", JSON.generate(input_messages))
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
# Call the original method
|
|
149
|
+
response = super(**params)
|
|
150
|
+
|
|
151
|
+
# Format output as array of messages (same format as input)
|
|
152
|
+
if response.respond_to?(:content) && response.content
|
|
153
|
+
content_array = response.content.map(&:to_h)
|
|
154
|
+
output = [{
|
|
155
|
+
role: response.respond_to?(:role) ? response.role : "assistant",
|
|
156
|
+
content: content_array
|
|
157
|
+
}]
|
|
158
|
+
span.set_attribute("braintrust.output_json", JSON.generate(output))
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
# Set metrics (token usage with Anthropic-specific cache tokens)
|
|
162
|
+
if response.respond_to?(:usage) && response.usage
|
|
163
|
+
metrics = Braintrust::Trace::Anthropic.parse_usage_tokens(response.usage)
|
|
164
|
+
span.set_attribute("braintrust.metrics", JSON.generate(metrics)) unless metrics.empty?
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
# Add response metadata fields
|
|
168
|
+
if response.respond_to?(:stop_reason) && response.stop_reason
|
|
169
|
+
metadata["stop_reason"] = response.stop_reason
|
|
170
|
+
end
|
|
171
|
+
if response.respond_to?(:stop_sequence) && response.stop_sequence
|
|
172
|
+
metadata["stop_sequence"] = response.stop_sequence
|
|
173
|
+
end
|
|
174
|
+
# Update model if present in response (in case it was resolved from "latest")
|
|
175
|
+
if response.respond_to?(:model) && response.model
|
|
176
|
+
metadata["model"] = response.model
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
# Set metadata ONCE at the end with complete hash
|
|
180
|
+
span.set_attribute("braintrust.metadata", JSON.generate(metadata))
|
|
181
|
+
|
|
182
|
+
response
|
|
183
|
+
end
|
|
184
|
+
end
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
# Prepend the wrapper to the messages resource
|
|
188
|
+
client.messages.singleton_class.prepend(wrapper)
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
# Wrap messages.stream API
|
|
192
|
+
# @param client [Anthropic::Client] the Anthropic client
|
|
193
|
+
# @param tracer_provider [OpenTelemetry::SDK::Trace::TracerProvider] the tracer provider
|
|
194
|
+
def self.wrap_messages_stream(client, tracer_provider)
|
|
195
|
+
# Create a wrapper module that intercepts messages.stream
|
|
196
|
+
wrapper = Module.new do
|
|
197
|
+
define_method(:stream) do |**params, &block|
|
|
198
|
+
tracer = tracer_provider.tracer("braintrust")
|
|
199
|
+
aggregated_events = []
|
|
200
|
+
|
|
201
|
+
metadata = {
|
|
202
|
+
"provider" => "anthropic",
|
|
203
|
+
"endpoint" => "/v1/messages",
|
|
204
|
+
"stream" => true
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
# Start span with proper context
|
|
208
|
+
span = tracer.start_span("anthropic.messages.create")
|
|
209
|
+
|
|
210
|
+
# Capture request metadata fields
|
|
211
|
+
metadata_fields = %i[
|
|
212
|
+
model max_tokens temperature top_p top_k stop_sequences
|
|
213
|
+
tools tool_choice thinking metadata service_tier
|
|
214
|
+
]
|
|
215
|
+
|
|
216
|
+
metadata_fields.each do |field|
|
|
217
|
+
metadata[field.to_s] = params[field] if params.key?(field)
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
# Build input messages array, prepending system prompt if present
|
|
221
|
+
input_messages = []
|
|
222
|
+
|
|
223
|
+
if params[:system]
|
|
224
|
+
system_content = params[:system]
|
|
225
|
+
if system_content.is_a?(Array)
|
|
226
|
+
system_text = system_content.map { |block|
|
|
227
|
+
block.is_a?(Hash) ? block[:text] : block
|
|
228
|
+
}.join("\n")
|
|
229
|
+
input_messages << {role: "system", content: system_text}
|
|
230
|
+
else
|
|
231
|
+
input_messages << {role: "system", content: system_content}
|
|
232
|
+
end
|
|
233
|
+
end
|
|
234
|
+
|
|
235
|
+
if params[:messages]
|
|
236
|
+
messages_array = params[:messages].map(&:to_h)
|
|
237
|
+
input_messages.concat(messages_array)
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
if input_messages.any?
|
|
241
|
+
span.set_attribute("braintrust.input_json", JSON.generate(input_messages))
|
|
242
|
+
end
|
|
243
|
+
|
|
244
|
+
# Set initial metadata
|
|
245
|
+
span.set_attribute("braintrust.metadata", JSON.generate(metadata))
|
|
246
|
+
|
|
247
|
+
# Call the original stream method WITHOUT passing the block
|
|
248
|
+
# We'll handle the block ourselves to aggregate events
|
|
249
|
+
begin
|
|
250
|
+
stream = super(**params)
|
|
251
|
+
rescue => e
|
|
252
|
+
span.record_exception(e)
|
|
253
|
+
span.status = ::OpenTelemetry::Trace::Status.error("Anthropic API error: #{e.message}")
|
|
254
|
+
span.finish
|
|
255
|
+
raise
|
|
256
|
+
end
|
|
257
|
+
|
|
258
|
+
# Store references on the stream object itself for the wrapper
|
|
259
|
+
stream.instance_variable_set(:@braintrust_aggregated_events, aggregated_events)
|
|
260
|
+
stream.instance_variable_set(:@braintrust_span, span)
|
|
261
|
+
stream.instance_variable_set(:@braintrust_metadata, metadata)
|
|
262
|
+
|
|
263
|
+
# Wrap the stream to aggregate events
|
|
264
|
+
original_each = stream.method(:each)
|
|
265
|
+
stream.define_singleton_method(:each) do |&user_block|
|
|
266
|
+
events = instance_variable_get(:@braintrust_aggregated_events)
|
|
267
|
+
span_obj = instance_variable_get(:@braintrust_span)
|
|
268
|
+
meta = instance_variable_get(:@braintrust_metadata)
|
|
269
|
+
|
|
270
|
+
begin
|
|
271
|
+
original_each.call do |event|
|
|
272
|
+
# Store event data for aggregation
|
|
273
|
+
events << event.to_h if event.respond_to?(:to_h)
|
|
274
|
+
# Call user's block if provided
|
|
275
|
+
user_block&.call(event)
|
|
276
|
+
end
|
|
277
|
+
rescue => e
|
|
278
|
+
span_obj.record_exception(e)
|
|
279
|
+
span_obj.status = ::OpenTelemetry::Trace::Status.error("Streaming error: #{e.message}")
|
|
280
|
+
raise
|
|
281
|
+
ensure
|
|
282
|
+
# Always aggregate and finish span after stream completes
|
|
283
|
+
unless events.empty?
|
|
284
|
+
aggregated_output = Braintrust::Trace::Anthropic.aggregate_streaming_events(events)
|
|
285
|
+
|
|
286
|
+
# Set output
|
|
287
|
+
if aggregated_output[:content]
|
|
288
|
+
output = [{
|
|
289
|
+
role: "assistant",
|
|
290
|
+
content: aggregated_output[:content]
|
|
291
|
+
}]
|
|
292
|
+
Braintrust::Trace::Anthropic.set_json_attr(span_obj, "braintrust.output_json", output)
|
|
293
|
+
end
|
|
294
|
+
|
|
295
|
+
# Set metrics if usage is available
|
|
296
|
+
if aggregated_output[:usage]
|
|
297
|
+
metrics = Braintrust::Trace::Anthropic.parse_usage_tokens(aggregated_output[:usage])
|
|
298
|
+
Braintrust::Trace::Anthropic.set_json_attr(span_obj, "braintrust.metrics", metrics) unless metrics.empty?
|
|
299
|
+
end
|
|
300
|
+
|
|
301
|
+
# Update metadata with response fields
|
|
302
|
+
meta["stop_reason"] = aggregated_output[:stop_reason] if aggregated_output[:stop_reason]
|
|
303
|
+
meta["model"] = aggregated_output[:model] if aggregated_output[:model]
|
|
304
|
+
Braintrust::Trace::Anthropic.set_json_attr(span_obj, "braintrust.metadata", meta)
|
|
305
|
+
end
|
|
306
|
+
|
|
307
|
+
span_obj.finish
|
|
308
|
+
end
|
|
309
|
+
end
|
|
310
|
+
|
|
311
|
+
# If a block was provided to stream(), call each with it immediately
|
|
312
|
+
if block
|
|
313
|
+
stream.each(&block)
|
|
314
|
+
end
|
|
315
|
+
|
|
316
|
+
stream
|
|
317
|
+
end
|
|
318
|
+
end
|
|
319
|
+
|
|
320
|
+
# Prepend the wrapper to the messages resource
|
|
321
|
+
client.messages.singleton_class.prepend(wrapper)
|
|
322
|
+
end
|
|
323
|
+
|
|
324
|
+
# Aggregate streaming events into a single response structure
|
|
325
|
+
# @param events [Array<Hash>] array of event hashes from stream
|
|
326
|
+
# @return [Hash] aggregated response with content, usage, etc.
|
|
327
|
+
def self.aggregate_streaming_events(events)
|
|
328
|
+
return {} if events.empty?
|
|
329
|
+
|
|
330
|
+
result = {
|
|
331
|
+
content: [],
|
|
332
|
+
usage: {},
|
|
333
|
+
stop_reason: nil,
|
|
334
|
+
model: nil
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
# Track content blocks by index
|
|
338
|
+
content_blocks = {}
|
|
339
|
+
content_builders = {}
|
|
340
|
+
|
|
341
|
+
events.each do |event|
|
|
342
|
+
event_type = event[:type] || event["type"]
|
|
343
|
+
next unless event_type
|
|
344
|
+
|
|
345
|
+
case event_type
|
|
346
|
+
when "message_start"
|
|
347
|
+
# Extract model and initial usage (input tokens, cache tokens)
|
|
348
|
+
message = event[:message] || event["message"]
|
|
349
|
+
if message
|
|
350
|
+
result[:model] = message[:model] || message["model"]
|
|
351
|
+
if message[:usage] || message["usage"]
|
|
352
|
+
usage = message[:usage] || message["usage"]
|
|
353
|
+
result[:usage].merge!(usage)
|
|
354
|
+
end
|
|
355
|
+
end
|
|
356
|
+
|
|
357
|
+
when "content_block_start"
|
|
358
|
+
# Initialize a new content block
|
|
359
|
+
index = event[:index] || event["index"]
|
|
360
|
+
content_block = event[:content_block] || event["content_block"]
|
|
361
|
+
content_blocks[index] = content_block if index && content_block
|
|
362
|
+
|
|
363
|
+
when "content_block_delta"
|
|
364
|
+
# Accumulate deltas for content blocks
|
|
365
|
+
index = event[:index] || event["index"]
|
|
366
|
+
delta = event[:delta] || event["delta"]
|
|
367
|
+
next unless index && delta
|
|
368
|
+
|
|
369
|
+
delta_type = delta[:type] || delta["type"]
|
|
370
|
+
content_blocks[index] ||= {}
|
|
371
|
+
|
|
372
|
+
case delta_type
|
|
373
|
+
when "text_delta"
|
|
374
|
+
# Accumulate text
|
|
375
|
+
text = delta[:text] || delta["text"]
|
|
376
|
+
if text
|
|
377
|
+
content_builders[index] ||= ""
|
|
378
|
+
content_builders[index] += text
|
|
379
|
+
content_blocks[index][:type] = "text"
|
|
380
|
+
content_blocks[index]["type"] = "text"
|
|
381
|
+
end
|
|
382
|
+
|
|
383
|
+
when "input_json_delta"
|
|
384
|
+
# Accumulate JSON for tool_use blocks
|
|
385
|
+
partial_json = delta[:partial_json] || delta["partial_json"]
|
|
386
|
+
if partial_json
|
|
387
|
+
content_builders[index] ||= ""
|
|
388
|
+
content_builders[index] += partial_json
|
|
389
|
+
content_blocks[index][:type] = "tool_use"
|
|
390
|
+
content_blocks[index]["type"] = "tool_use"
|
|
391
|
+
end
|
|
392
|
+
end
|
|
393
|
+
|
|
394
|
+
when "message_delta"
|
|
395
|
+
# Get final stop reason and cumulative usage (output tokens)
|
|
396
|
+
delta = event[:delta] || event["delta"]
|
|
397
|
+
if delta
|
|
398
|
+
stop_reason = delta[:stop_reason] || delta["stop_reason"]
|
|
399
|
+
result[:stop_reason] = stop_reason if stop_reason
|
|
400
|
+
end
|
|
401
|
+
|
|
402
|
+
usage = event[:usage] || event["usage"]
|
|
403
|
+
result[:usage].merge!(usage) if usage
|
|
404
|
+
end
|
|
405
|
+
end
|
|
406
|
+
|
|
407
|
+
# Build final content array from aggregated blocks
|
|
408
|
+
content_builders.each do |index, text|
|
|
409
|
+
block = content_blocks[index]
|
|
410
|
+
next unless block
|
|
411
|
+
|
|
412
|
+
block_type = block[:type] || block["type"]
|
|
413
|
+
case block_type
|
|
414
|
+
when "text"
|
|
415
|
+
block[:text] = text
|
|
416
|
+
block["text"] = text
|
|
417
|
+
when "tool_use"
|
|
418
|
+
# Parse the accumulated JSON string
|
|
419
|
+
begin
|
|
420
|
+
parsed = JSON.parse(text)
|
|
421
|
+
block[:input] = parsed
|
|
422
|
+
block["input"] = parsed
|
|
423
|
+
rescue JSON::ParserError
|
|
424
|
+
block[:input] = text
|
|
425
|
+
block["input"] = text
|
|
426
|
+
end
|
|
427
|
+
end
|
|
428
|
+
end
|
|
429
|
+
|
|
430
|
+
# Convert blocks hash to sorted array
|
|
431
|
+
if content_blocks.any?
|
|
432
|
+
result[:content] = content_blocks.keys.sort.map { |idx| content_blocks[idx] }
|
|
433
|
+
end
|
|
434
|
+
|
|
435
|
+
result
|
|
436
|
+
end
|
|
437
|
+
end
|
|
438
|
+
end
|
|
439
|
+
end
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Braintrust
|
|
4
|
+
module Trace
|
|
5
|
+
# Span filtering logic for Braintrust tracing
|
|
6
|
+
#
|
|
7
|
+
# Filters allow you to control which spans are exported to Braintrust.
|
|
8
|
+
# This is useful for reducing noise and cost by filtering out non-AI spans.
|
|
9
|
+
#
|
|
10
|
+
# Filter functions take a span and return:
|
|
11
|
+
# 1 = keep the span
|
|
12
|
+
# 0 = no influence (continue to next filter)
|
|
13
|
+
# -1 = drop the span
|
|
14
|
+
module SpanFilter
|
|
15
|
+
# System attributes that should be ignored when checking for AI indicators
|
|
16
|
+
SYSTEM_ATTRIBUTES = [
|
|
17
|
+
"braintrust.parent",
|
|
18
|
+
"braintrust.org",
|
|
19
|
+
"braintrust.app_url"
|
|
20
|
+
].freeze
|
|
21
|
+
|
|
22
|
+
# Prefixes that indicate an AI-related span
|
|
23
|
+
AI_PREFIXES = [
|
|
24
|
+
"gen_ai.",
|
|
25
|
+
"braintrust.",
|
|
26
|
+
"llm.",
|
|
27
|
+
"ai.",
|
|
28
|
+
"traceloop."
|
|
29
|
+
].freeze
|
|
30
|
+
|
|
31
|
+
# AI span filter that keeps spans with AI-related names or attributes
|
|
32
|
+
#
|
|
33
|
+
# @param span [OpenTelemetry::SDK::Trace::SpanData] The span to filter
|
|
34
|
+
# @return [Integer] 1 to keep, -1 to drop, 0 for no influence
|
|
35
|
+
def self.ai_filter(span)
|
|
36
|
+
# Check span name for AI prefixes
|
|
37
|
+
span_name = span.name
|
|
38
|
+
AI_PREFIXES.each do |prefix|
|
|
39
|
+
return 1 if span_name.start_with?(prefix)
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# Check attributes for AI prefixes (skip system attributes)
|
|
43
|
+
# span.attributes returns a hash
|
|
44
|
+
attributes = span.attributes || {}
|
|
45
|
+
attributes.each do |attr_key, _attr_value|
|
|
46
|
+
attr_key_str = attr_key.to_s
|
|
47
|
+
next if SYSTEM_ATTRIBUTES.include?(attr_key_str)
|
|
48
|
+
|
|
49
|
+
AI_PREFIXES.each do |prefix|
|
|
50
|
+
return 1 if attr_key_str.start_with?(prefix)
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# Drop non-AI spans
|
|
55
|
+
-1
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
@@ -5,14 +5,16 @@ require "opentelemetry/sdk"
|
|
|
5
5
|
module Braintrust
|
|
6
6
|
module Trace
|
|
7
7
|
# Custom span processor that adds Braintrust-specific attributes to spans
|
|
8
|
+
# and optionally filters spans based on custom filter functions.
|
|
8
9
|
class SpanProcessor
|
|
9
10
|
PARENT_ATTR_KEY = "braintrust.parent"
|
|
10
11
|
ORG_ATTR_KEY = "braintrust.org"
|
|
11
12
|
APP_URL_ATTR_KEY = "braintrust.app_url"
|
|
12
13
|
|
|
13
|
-
def initialize(wrapped_processor, state)
|
|
14
|
+
def initialize(wrapped_processor, state, filters = [])
|
|
14
15
|
@wrapped = wrapped_processor
|
|
15
16
|
@state = state
|
|
17
|
+
@filters = filters || []
|
|
16
18
|
end
|
|
17
19
|
|
|
18
20
|
def on_start(span, parent_context)
|
|
@@ -33,9 +35,10 @@ module Braintrust
|
|
|
33
35
|
@wrapped.on_start(span, parent_context)
|
|
34
36
|
end
|
|
35
37
|
|
|
36
|
-
# Called when a span ends
|
|
38
|
+
# Called when a span ends - apply filters before forwarding
|
|
37
39
|
def on_finish(span)
|
|
38
|
-
|
|
40
|
+
# Only forward span if it passes filters
|
|
41
|
+
@wrapped.on_finish(span) if should_forward_span?(span)
|
|
39
42
|
end
|
|
40
43
|
|
|
41
44
|
# Shutdown the processor
|
|
@@ -73,6 +76,29 @@ module Braintrust
|
|
|
73
76
|
# Return the parent attribute from the parent span
|
|
74
77
|
parent_span.attributes&.[](PARENT_ATTR_KEY)
|
|
75
78
|
end
|
|
79
|
+
|
|
80
|
+
# Determine if a span should be forwarded to the wrapped processor
|
|
81
|
+
# based on configured filters
|
|
82
|
+
def should_forward_span?(span)
|
|
83
|
+
# Always keep root spans (spans with no parent)
|
|
84
|
+
# Check if parent_span_id is the invalid/zero span ID
|
|
85
|
+
is_root = span.parent_span_id == OpenTelemetry::Trace::INVALID_SPAN_ID
|
|
86
|
+
return true if is_root
|
|
87
|
+
|
|
88
|
+
# If no filters, keep everything
|
|
89
|
+
return true if @filters.empty?
|
|
90
|
+
|
|
91
|
+
# Apply filters in order - first non-zero result wins
|
|
92
|
+
@filters.each do |filter|
|
|
93
|
+
result = filter.call(span)
|
|
94
|
+
return true if result > 0 # Keep span
|
|
95
|
+
return false if result < 0 # Drop span
|
|
96
|
+
# result == 0: no influence, continue to next filter
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
# All filters returned 0 (no influence), default to keep
|
|
100
|
+
true
|
|
101
|
+
end
|
|
76
102
|
end
|
|
77
103
|
end
|
|
78
104
|
end
|
data/lib/braintrust/trace.rb
CHANGED
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
require "opentelemetry/sdk"
|
|
4
4
|
require "opentelemetry/exporter/otlp"
|
|
5
5
|
require_relative "trace/span_processor"
|
|
6
|
+
require_relative "trace/span_filter"
|
|
6
7
|
require_relative "logger"
|
|
7
8
|
|
|
8
9
|
# OpenAI integration is optional - automatically loaded if openai gem is available
|
|
@@ -13,13 +14,22 @@ rescue LoadError
|
|
|
13
14
|
# OpenAI gem not installed - integration will not be available
|
|
14
15
|
end
|
|
15
16
|
|
|
17
|
+
# Anthropic integration is optional - automatically loaded if anthropic gem is available
|
|
18
|
+
begin
|
|
19
|
+
require "anthropic"
|
|
20
|
+
require_relative "trace/contrib/anthropic"
|
|
21
|
+
rescue LoadError
|
|
22
|
+
# Anthropic gem not installed - integration will not be available
|
|
23
|
+
end
|
|
24
|
+
|
|
16
25
|
module Braintrust
|
|
17
26
|
module Trace
|
|
18
27
|
# Set up OpenTelemetry tracing with Braintrust
|
|
19
28
|
# @param state [State] Braintrust state
|
|
20
29
|
# @param tracer_provider [TracerProvider, nil] Optional tracer provider
|
|
30
|
+
# @param exporter [Exporter, nil] Optional exporter override (for testing)
|
|
21
31
|
# @return [void]
|
|
22
|
-
def self.setup(state, tracer_provider = nil)
|
|
32
|
+
def self.setup(state, tracer_provider = nil, exporter: nil)
|
|
23
33
|
if tracer_provider
|
|
24
34
|
# Use the explicitly provided tracer provider
|
|
25
35
|
# DO NOT set as global - user is managing it themselves
|
|
@@ -41,13 +51,17 @@ module Braintrust
|
|
|
41
51
|
end
|
|
42
52
|
|
|
43
53
|
# Enable Braintrust tracing (adds span processor)
|
|
44
|
-
|
|
54
|
+
config = state.config
|
|
55
|
+
enable(tracer_provider, state: state, config: config, exporter: exporter)
|
|
45
56
|
end
|
|
46
57
|
|
|
47
|
-
def self.enable(tracer_provider, state: nil, exporter: nil)
|
|
58
|
+
def self.enable(tracer_provider, state: nil, exporter: nil, config: nil)
|
|
48
59
|
state ||= Braintrust.current_state
|
|
49
60
|
raise Error, "No state available" unless state
|
|
50
61
|
|
|
62
|
+
# Get config from state if available
|
|
63
|
+
config ||= state.respond_to?(:config) ? state.config : nil
|
|
64
|
+
|
|
51
65
|
# Create OTLP HTTP exporter unless override provided
|
|
52
66
|
exporter ||= OpenTelemetry::Exporter::OTLP::Exporter.new(
|
|
53
67
|
endpoint: "#{state.api_url}/otel/v1/traces",
|
|
@@ -56,11 +70,18 @@ module Braintrust
|
|
|
56
70
|
}
|
|
57
71
|
)
|
|
58
72
|
|
|
59
|
-
#
|
|
60
|
-
|
|
73
|
+
# Use SimpleSpanProcessor for InMemorySpanExporter (testing), BatchSpanProcessor for production
|
|
74
|
+
span_processor = if exporter.is_a?(OpenTelemetry::SDK::Trace::Export::InMemorySpanExporter)
|
|
75
|
+
OpenTelemetry::SDK::Trace::Export::SimpleSpanProcessor.new(exporter)
|
|
76
|
+
else
|
|
77
|
+
OpenTelemetry::SDK::Trace::Export::BatchSpanProcessor.new(exporter)
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# Build filters array from config
|
|
81
|
+
filters = build_filters(config)
|
|
61
82
|
|
|
62
|
-
# Wrap
|
|
63
|
-
processor = SpanProcessor.new(
|
|
83
|
+
# Wrap span processor in our custom span processor to add Braintrust attributes and filters
|
|
84
|
+
processor = SpanProcessor.new(span_processor, state, filters)
|
|
64
85
|
|
|
65
86
|
# Register with tracer provider
|
|
66
87
|
tracer_provider.add_span_processor(processor)
|
|
@@ -75,6 +96,25 @@ module Braintrust
|
|
|
75
96
|
self
|
|
76
97
|
end
|
|
77
98
|
|
|
99
|
+
# Build filters array from config
|
|
100
|
+
# @param config [Config, nil] Configuration object
|
|
101
|
+
# @return [Array<Proc>] Array of filter functions
|
|
102
|
+
def self.build_filters(config)
|
|
103
|
+
filters = []
|
|
104
|
+
|
|
105
|
+
# Add custom filters first (they have priority)
|
|
106
|
+
if config&.span_filter_funcs&.any?
|
|
107
|
+
filters.concat(config.span_filter_funcs)
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
# Add AI filter if enabled
|
|
111
|
+
if config&.filter_ai_spans
|
|
112
|
+
filters << SpanFilter.method(:ai_filter)
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
filters
|
|
116
|
+
end
|
|
117
|
+
|
|
78
118
|
# Generate a permalink URL for a span to view in the Braintrust UI
|
|
79
119
|
# Returns an empty string if the permalink cannot be generated
|
|
80
120
|
# @param span [OpenTelemetry::Trace::Span] The span to generate a permalink for
|
data/lib/braintrust/version.rb
CHANGED
data/lib/braintrust.rb
CHANGED
|
@@ -37,8 +37,11 @@ module Braintrust
|
|
|
37
37
|
# @param blocking_login [Boolean] Whether to block and login synchronously (default: false - async background login)
|
|
38
38
|
# @param enable_tracing [Boolean] Whether to enable OpenTelemetry tracing (default: true)
|
|
39
39
|
# @param tracer_provider [TracerProvider, nil] Optional tracer provider to use instead of creating one
|
|
40
|
+
# @param filter_ai_spans [Boolean, nil] Enable AI span filtering (overrides BRAINTRUST_OTEL_FILTER_AI_SPANS env var)
|
|
41
|
+
# @param span_filter_funcs [Array<Proc>, nil] Custom span filter functions
|
|
42
|
+
# @param exporter [Exporter, nil] Optional exporter override (for testing)
|
|
40
43
|
# @return [State] the created state
|
|
41
|
-
def self.init(api_key: nil, org_name: nil, default_project: nil, app_url: nil, api_url: nil, set_global: true, blocking_login: false, enable_tracing: true, tracer_provider: nil)
|
|
44
|
+
def self.init(api_key: nil, org_name: nil, default_project: nil, app_url: nil, api_url: nil, set_global: true, blocking_login: false, enable_tracing: true, tracer_provider: nil, filter_ai_spans: nil, span_filter_funcs: nil, exporter: nil)
|
|
42
45
|
state = State.from_env(
|
|
43
46
|
api_key: api_key,
|
|
44
47
|
org_name: org_name,
|
|
@@ -47,7 +50,10 @@ module Braintrust
|
|
|
47
50
|
api_url: api_url,
|
|
48
51
|
blocking_login: blocking_login,
|
|
49
52
|
enable_tracing: enable_tracing,
|
|
50
|
-
tracer_provider: tracer_provider
|
|
53
|
+
tracer_provider: tracer_provider,
|
|
54
|
+
filter_ai_spans: filter_ai_spans,
|
|
55
|
+
span_filter_funcs: span_filter_funcs,
|
|
56
|
+
exporter: exporter
|
|
51
57
|
)
|
|
52
58
|
|
|
53
59
|
State.global = state if set_global
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: braintrust
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.0.
|
|
4
|
+
version: 0.0.4
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Braintrust
|
|
@@ -202,7 +202,10 @@ files:
|
|
|
202
202
|
- lib/braintrust/logger.rb
|
|
203
203
|
- lib/braintrust/state.rb
|
|
204
204
|
- lib/braintrust/trace.rb
|
|
205
|
+
- lib/braintrust/trace/attachment.rb
|
|
206
|
+
- lib/braintrust/trace/contrib/anthropic.rb
|
|
205
207
|
- lib/braintrust/trace/contrib/openai.rb
|
|
208
|
+
- lib/braintrust/trace/span_filter.rb
|
|
206
209
|
- lib/braintrust/trace/span_processor.rb
|
|
207
210
|
- lib/braintrust/version.rb
|
|
208
211
|
homepage: https://github.com/braintrustdata/braintrust-sdk-ruby
|