braintrust 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3330f067e867f2d0d81ea12db80bb2dbf83d9ca447ef819e2bff7c97138e7f9e
4
- data.tar.gz: f2e4f8e34bfe2292d5b3883923d4b638de89b6c930144c02a4c7941b831ca7e6
3
+ metadata.gz: 127f10c355ef8d5b0968dcb3197d9612a68455087ad704fa17e5dcb41512ad6d
4
+ data.tar.gz: 0e1d31073d9d71f43a74f7d4b37cea8644b119afc282501ee4b311c6cad059ad
5
5
  SHA512:
6
- metadata.gz: feb0969a48253f3729b9f38d243abcb6f0367176b9cddf9df112be9e415f8b5dadf90ca50588bd2e09be84a4efb406f40aa13b4ccab88310bba1a73e993205e6
7
- data.tar.gz: 67e9c6163c4b20944953e7063286f98b2e079e424fd80991376110864971f251cb1ca6a663a99d923d9cd92d74be17030dba90a9ba5619f610d307d4075aa38f
6
+ metadata.gz: 654fae04c4cf51fa32b27864b92ac832e3e37472bfaabe20871aa1899ba027ae4bff6e0a054f833fcb7afe3ef0d3870479ecb824f4c7af8180ca8ea65b21a41c
7
+ data.tar.gz: c03683f9793b38477986ade0694f38178434b70c1eea7a1870c2b80a89ad45278fe54f5c7f880eec22719ca0698fab0ad8de5efba5123ee1692c18b0a258d94c
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # Braintrust Ruby SDK
2
2
 
3
- [![Gem Version](https://badge.fury.io/rb/braintrust.svg)](https://badge.fury.io/rb/braintrust)
3
+ [![Gem Version](https://img.shields.io/gem/v/braintrust.svg)](https://rubygems.org/gems/braintrust)
4
4
  [![Documentation](https://img.shields.io/badge/docs-rubydoc.info-blue.svg)](https://rubydoc.info/gems/braintrust)
5
5
  ![Beta](https://img.shields.io/badge/status-beta-yellow)
6
6
 
@@ -136,10 +136,45 @@ puts "View trace at: #{Braintrust::Trace.permalink(root_span)}"
136
136
  OpenTelemetry.tracer_provider.shutdown
137
137
  ```
138
138
 
139
+ ### Anthropic Tracing
140
+
141
+ ```ruby
142
+ require "braintrust"
143
+ require "anthropic"
144
+
145
+ Braintrust.init
146
+
147
+ client = Anthropic::Client.new(api_key: ENV["ANTHROPIC_API_KEY"])
148
+
149
+ Braintrust::Trace::Anthropic.wrap(client)
150
+
151
+ tracer = OpenTelemetry.tracer_provider.tracer("anthropic-app")
152
+ root_span = nil
153
+
154
+ message = tracer.in_span("chat-message") do |span|
155
+ root_span = span
156
+
157
+ client.messages.create(
158
+ model: "claude-3-5-sonnet-20241022",
159
+ max_tokens: 100,
160
+ system: "You are a helpful assistant.",
161
+ messages: [
162
+ {role: "user", content: "Say hello!"}
163
+ ]
164
+ )
165
+ end
166
+
167
+ puts "Response: #{message.content[0].text}"
168
+
169
+ puts "View trace at: #{Braintrust::Trace.permalink(root_span)}"
170
+
171
+ OpenTelemetry.tracer_provider.shutdown
172
+ ```
173
+
139
174
  ## Features
140
175
 
141
176
  - **Evaluations**: Run systematic evaluations of your AI systems with custom scoring functions
142
- - **Tracing**: Automatic instrumentation for OpenAI API calls with OpenTelemetry
177
+ - **Tracing**: Automatic instrumentation for OpenAI and Anthropic API calls with OpenTelemetry
143
178
  - **Datasets**: Manage and version your evaluation datasets
144
179
  - **Experiments**: Track different versions and configurations of your AI systems
145
180
  - **Observability**: Monitor your AI applications in production
@@ -151,6 +186,7 @@ Check out the [`examples/`](./examples/) directory for complete working examples
151
186
  - [eval.rb](./examples/eval.rb) - Create and run evaluations with custom test cases and scoring functions
152
187
  - [trace.rb](./examples/trace.rb) - Manual span creation and tracing
153
188
  - [openai.rb](./examples/openai.rb) - Automatically trace OpenAI API calls
189
+ - [anthropic.rb](./examples/anthropic.rb) - Automatically trace Anthropic API calls
154
190
  - [eval/dataset.rb](./examples/eval/dataset.rb) - Run evaluations using datasets stored in Braintrust
155
191
  - [eval/remote_functions.rb](./examples/eval/remote_functions.rb) - Use remote scoring functions
156
192
 
@@ -0,0 +1,439 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "opentelemetry/sdk"
4
+ require "json"
5
+
6
+ module Braintrust
7
+ module Trace
8
+ module Anthropic
9
+ # Helper to safely set a JSON attribute on a span
10
+ # Only sets the attribute if obj is present
11
+ # @param span [OpenTelemetry::Trace::Span] the span to set attribute on
12
+ # @param attr_name [String] the attribute name (e.g., "braintrust.output_json")
13
+ # @param obj [Object] the object to serialize to JSON
14
+ # @return [void]
15
+ def self.set_json_attr(span, attr_name, obj)
16
+ return unless obj
17
+ span.set_attribute(attr_name, JSON.generate(obj))
18
+ end
19
+
20
+ # Parse usage tokens from Anthropic API response, handling cache tokens
21
+ # Maps Anthropic field names to Braintrust standard names:
22
+ # - input_tokens → contributes to prompt_tokens
23
+ # - cache_creation_input_tokens → prompt_cache_creation_tokens (and adds to prompt_tokens)
24
+ # - cache_read_input_tokens → prompt_cached_tokens (and adds to prompt_tokens)
25
+ # - output_tokens → completion_tokens
26
+ # - total_tokens → tokens (or calculated if missing)
27
+ #
28
+ # @param usage [Hash, Object] usage object from Anthropic response
29
+ # @return [Hash<String, Integer>] metrics hash with normalized names
30
+ def self.parse_usage_tokens(usage)
31
+ metrics = {}
32
+ return metrics unless usage
33
+
34
+ # Convert to hash if it's an object
35
+ usage_hash = usage.respond_to?(:to_h) ? usage.to_h : usage
36
+
37
+ # Extract base values for calculation
38
+ input_tokens = 0
39
+ cache_creation_tokens = 0
40
+ cache_read_tokens = 0
41
+
42
+ usage_hash.each do |key, value|
43
+ next unless value.is_a?(Numeric)
44
+ key_str = key.to_s
45
+
46
+ case key_str
47
+ when "input_tokens"
48
+ input_tokens = value.to_i
49
+ when "cache_creation_input_tokens"
50
+ cache_creation_tokens = value.to_i
51
+ metrics["prompt_cache_creation_tokens"] = value.to_i
52
+ when "cache_read_input_tokens"
53
+ cache_read_tokens = value.to_i
54
+ metrics["prompt_cached_tokens"] = value.to_i
55
+ when "output_tokens"
56
+ metrics["completion_tokens"] = value.to_i
57
+ when "total_tokens"
58
+ metrics["tokens"] = value.to_i
59
+ else
60
+ # Keep other numeric fields as-is (future-proofing)
61
+ metrics[key_str] = value.to_i
62
+ end
63
+ end
64
+
65
+ # Calculate total prompt tokens (input + cache creation + cache read)
66
+ total_prompt_tokens = input_tokens + cache_creation_tokens + cache_read_tokens
67
+ metrics["prompt_tokens"] = total_prompt_tokens
68
+
69
+ # Calculate total tokens if not provided by Anthropic
70
+ if !metrics.key?("tokens") && metrics.key?("completion_tokens")
71
+ metrics["tokens"] = total_prompt_tokens + metrics["completion_tokens"]
72
+ end
73
+
74
+ metrics
75
+ end
76
+
77
+ # Wrap an Anthropic::Client to automatically create spans for messages and responses
78
+ # Supports both synchronous and streaming requests
79
+ # @param client [Anthropic::Client] the Anthropic client to wrap
80
+ # @param tracer_provider [OpenTelemetry::SDK::Trace::TracerProvider] the tracer provider (defaults to global)
81
+ def self.wrap(client, tracer_provider: nil)
82
+ tracer_provider ||= ::OpenTelemetry.tracer_provider
83
+
84
+ # Wrap messages.create
85
+ wrap_messages_create(client, tracer_provider)
86
+
87
+ # Wrap messages.stream (Anthropic SDK always has this method)
88
+ wrap_messages_stream(client, tracer_provider)
89
+
90
+ client
91
+ end
92
+
93
+ # Wrap messages.create API
94
+ # @param client [Anthropic::Client] the Anthropic client
95
+ # @param tracer_provider [OpenTelemetry::SDK::Trace::TracerProvider] the tracer provider
96
+ def self.wrap_messages_create(client, tracer_provider)
97
+ # Create a wrapper module that intercepts messages.create
98
+ wrapper = Module.new do
99
+ define_method(:create) do |**params|
100
+ tracer = tracer_provider.tracer("braintrust")
101
+
102
+ tracer.in_span("anthropic.messages.create") do |span|
103
+ # Initialize metadata hash
104
+ metadata = {
105
+ "provider" => "anthropic",
106
+ "endpoint" => "/v1/messages"
107
+ }
108
+
109
+ # Capture request metadata fields
110
+ metadata_fields = %i[
111
+ model max_tokens temperature top_p top_k stop_sequences
112
+ stream tools tool_choice thinking metadata service_tier
113
+ ]
114
+
115
+ metadata_fields.each do |field|
116
+ metadata[field.to_s] = params[field] if params.key?(field)
117
+ end
118
+
119
+ # Build input messages array, prepending system prompt if present
120
+ input_messages = []
121
+
122
+ # Prepend system prompt as a message if present
123
+ if params[:system]
124
+ # System can be a string or array of text blocks
125
+ system_content = params[:system]
126
+ if system_content.is_a?(Array)
127
+ # Extract text from array of text blocks
128
+ system_text = system_content.map { |block|
129
+ block.is_a?(Hash) ? block[:text] : block
130
+ }.join("\n")
131
+ input_messages << {role: "system", content: system_text}
132
+ else
133
+ input_messages << {role: "system", content: system_content}
134
+ end
135
+ end
136
+
137
+ # Add user/assistant messages
138
+ if params[:messages]
139
+ messages_array = params[:messages].map(&:to_h)
140
+ input_messages.concat(messages_array)
141
+ end
142
+
143
+ # Set input messages as JSON
144
+ if input_messages.any?
145
+ span.set_attribute("braintrust.input_json", JSON.generate(input_messages))
146
+ end
147
+
148
+ # Call the original method
149
+ response = super(**params)
150
+
151
+ # Format output as array of messages (same format as input)
152
+ if response.respond_to?(:content) && response.content
153
+ content_array = response.content.map(&:to_h)
154
+ output = [{
155
+ role: response.respond_to?(:role) ? response.role : "assistant",
156
+ content: content_array
157
+ }]
158
+ span.set_attribute("braintrust.output_json", JSON.generate(output))
159
+ end
160
+
161
+ # Set metrics (token usage with Anthropic-specific cache tokens)
162
+ if response.respond_to?(:usage) && response.usage
163
+ metrics = Braintrust::Trace::Anthropic.parse_usage_tokens(response.usage)
164
+ span.set_attribute("braintrust.metrics", JSON.generate(metrics)) unless metrics.empty?
165
+ end
166
+
167
+ # Add response metadata fields
168
+ if response.respond_to?(:stop_reason) && response.stop_reason
169
+ metadata["stop_reason"] = response.stop_reason
170
+ end
171
+ if response.respond_to?(:stop_sequence) && response.stop_sequence
172
+ metadata["stop_sequence"] = response.stop_sequence
173
+ end
174
+ # Update model if present in response (in case it was resolved from "latest")
175
+ if response.respond_to?(:model) && response.model
176
+ metadata["model"] = response.model
177
+ end
178
+
179
+ # Set metadata ONCE at the end with complete hash
180
+ span.set_attribute("braintrust.metadata", JSON.generate(metadata))
181
+
182
+ response
183
+ end
184
+ end
185
+ end
186
+
187
+ # Prepend the wrapper to the messages resource
188
+ client.messages.singleton_class.prepend(wrapper)
189
+ end
190
+
191
+ # Wrap messages.stream API
192
+ # @param client [Anthropic::Client] the Anthropic client
193
+ # @param tracer_provider [OpenTelemetry::SDK::Trace::TracerProvider] the tracer provider
194
+ def self.wrap_messages_stream(client, tracer_provider)
195
+ # Create a wrapper module that intercepts messages.stream
196
+ wrapper = Module.new do
197
+ define_method(:stream) do |**params, &block|
198
+ tracer = tracer_provider.tracer("braintrust")
199
+ aggregated_events = []
200
+
201
+ metadata = {
202
+ "provider" => "anthropic",
203
+ "endpoint" => "/v1/messages",
204
+ "stream" => true
205
+ }
206
+
207
+ # Start span with proper context
208
+ span = tracer.start_span("anthropic.messages.create")
209
+
210
+ # Capture request metadata fields
211
+ metadata_fields = %i[
212
+ model max_tokens temperature top_p top_k stop_sequences
213
+ tools tool_choice thinking metadata service_tier
214
+ ]
215
+
216
+ metadata_fields.each do |field|
217
+ metadata[field.to_s] = params[field] if params.key?(field)
218
+ end
219
+
220
+ # Build input messages array, prepending system prompt if present
221
+ input_messages = []
222
+
223
+ if params[:system]
224
+ system_content = params[:system]
225
+ if system_content.is_a?(Array)
226
+ system_text = system_content.map { |block|
227
+ block.is_a?(Hash) ? block[:text] : block
228
+ }.join("\n")
229
+ input_messages << {role: "system", content: system_text}
230
+ else
231
+ input_messages << {role: "system", content: system_content}
232
+ end
233
+ end
234
+
235
+ if params[:messages]
236
+ messages_array = params[:messages].map(&:to_h)
237
+ input_messages.concat(messages_array)
238
+ end
239
+
240
+ if input_messages.any?
241
+ span.set_attribute("braintrust.input_json", JSON.generate(input_messages))
242
+ end
243
+
244
+ # Set initial metadata
245
+ span.set_attribute("braintrust.metadata", JSON.generate(metadata))
246
+
247
+ # Call the original stream method WITHOUT passing the block
248
+ # We'll handle the block ourselves to aggregate events
249
+ begin
250
+ stream = super(**params)
251
+ rescue => e
252
+ span.record_exception(e)
253
+ span.status = ::OpenTelemetry::Trace::Status.error("Anthropic API error: #{e.message}")
254
+ span.finish
255
+ raise
256
+ end
257
+
258
+ # Store references on the stream object itself for the wrapper
259
+ stream.instance_variable_set(:@braintrust_aggregated_events, aggregated_events)
260
+ stream.instance_variable_set(:@braintrust_span, span)
261
+ stream.instance_variable_set(:@braintrust_metadata, metadata)
262
+
263
+ # Wrap the stream to aggregate events
264
+ original_each = stream.method(:each)
265
+ stream.define_singleton_method(:each) do |&user_block|
266
+ events = instance_variable_get(:@braintrust_aggregated_events)
267
+ span_obj = instance_variable_get(:@braintrust_span)
268
+ meta = instance_variable_get(:@braintrust_metadata)
269
+
270
+ begin
271
+ original_each.call do |event|
272
+ # Store event data for aggregation
273
+ events << event.to_h if event.respond_to?(:to_h)
274
+ # Call user's block if provided
275
+ user_block&.call(event)
276
+ end
277
+ rescue => e
278
+ span_obj.record_exception(e)
279
+ span_obj.status = ::OpenTelemetry::Trace::Status.error("Streaming error: #{e.message}")
280
+ raise
281
+ ensure
282
+ # Always aggregate and finish span after stream completes
283
+ unless events.empty?
284
+ aggregated_output = Braintrust::Trace::Anthropic.aggregate_streaming_events(events)
285
+
286
+ # Set output
287
+ if aggregated_output[:content]
288
+ output = [{
289
+ role: "assistant",
290
+ content: aggregated_output[:content]
291
+ }]
292
+ Braintrust::Trace::Anthropic.set_json_attr(span_obj, "braintrust.output_json", output)
293
+ end
294
+
295
+ # Set metrics if usage is available
296
+ if aggregated_output[:usage]
297
+ metrics = Braintrust::Trace::Anthropic.parse_usage_tokens(aggregated_output[:usage])
298
+ Braintrust::Trace::Anthropic.set_json_attr(span_obj, "braintrust.metrics", metrics) unless metrics.empty?
299
+ end
300
+
301
+ # Update metadata with response fields
302
+ meta["stop_reason"] = aggregated_output[:stop_reason] if aggregated_output[:stop_reason]
303
+ meta["model"] = aggregated_output[:model] if aggregated_output[:model]
304
+ Braintrust::Trace::Anthropic.set_json_attr(span_obj, "braintrust.metadata", meta)
305
+ end
306
+
307
+ span_obj.finish
308
+ end
309
+ end
310
+
311
+ # If a block was provided to stream(), call each with it immediately
312
+ if block
313
+ stream.each(&block)
314
+ end
315
+
316
+ stream
317
+ end
318
+ end
319
+
320
+ # Prepend the wrapper to the messages resource
321
+ client.messages.singleton_class.prepend(wrapper)
322
+ end
323
+
324
+ # Aggregate streaming events into a single response structure
325
+ # @param events [Array<Hash>] array of event hashes from stream
326
+ # @return [Hash] aggregated response with content, usage, etc.
327
+ def self.aggregate_streaming_events(events)
328
+ return {} if events.empty?
329
+
330
+ result = {
331
+ content: [],
332
+ usage: {},
333
+ stop_reason: nil,
334
+ model: nil
335
+ }
336
+
337
+ # Track content blocks by index
338
+ content_blocks = {}
339
+ content_builders = {}
340
+
341
+ events.each do |event|
342
+ event_type = event[:type] || event["type"]
343
+ next unless event_type
344
+
345
+ case event_type
346
+ when "message_start"
347
+ # Extract model and initial usage (input tokens, cache tokens)
348
+ message = event[:message] || event["message"]
349
+ if message
350
+ result[:model] = message[:model] || message["model"]
351
+ if message[:usage] || message["usage"]
352
+ usage = message[:usage] || message["usage"]
353
+ result[:usage].merge!(usage)
354
+ end
355
+ end
356
+
357
+ when "content_block_start"
358
+ # Initialize a new content block
359
+ index = event[:index] || event["index"]
360
+ content_block = event[:content_block] || event["content_block"]
361
+ content_blocks[index] = content_block if index && content_block
362
+
363
+ when "content_block_delta"
364
+ # Accumulate deltas for content blocks
365
+ index = event[:index] || event["index"]
366
+ delta = event[:delta] || event["delta"]
367
+ next unless index && delta
368
+
369
+ delta_type = delta[:type] || delta["type"]
370
+ content_blocks[index] ||= {}
371
+
372
+ case delta_type
373
+ when "text_delta"
374
+ # Accumulate text
375
+ text = delta[:text] || delta["text"]
376
+ if text
377
+ content_builders[index] ||= ""
378
+ content_builders[index] += text
379
+ content_blocks[index][:type] = "text"
380
+ content_blocks[index]["type"] = "text"
381
+ end
382
+
383
+ when "input_json_delta"
384
+ # Accumulate JSON for tool_use blocks
385
+ partial_json = delta[:partial_json] || delta["partial_json"]
386
+ if partial_json
387
+ content_builders[index] ||= ""
388
+ content_builders[index] += partial_json
389
+ content_blocks[index][:type] = "tool_use"
390
+ content_blocks[index]["type"] = "tool_use"
391
+ end
392
+ end
393
+
394
+ when "message_delta"
395
+ # Get final stop reason and cumulative usage (output tokens)
396
+ delta = event[:delta] || event["delta"]
397
+ if delta
398
+ stop_reason = delta[:stop_reason] || delta["stop_reason"]
399
+ result[:stop_reason] = stop_reason if stop_reason
400
+ end
401
+
402
+ usage = event[:usage] || event["usage"]
403
+ result[:usage].merge!(usage) if usage
404
+ end
405
+ end
406
+
407
+ # Build final content array from aggregated blocks
408
+ content_builders.each do |index, text|
409
+ block = content_blocks[index]
410
+ next unless block
411
+
412
+ block_type = block[:type] || block["type"]
413
+ case block_type
414
+ when "text"
415
+ block[:text] = text
416
+ block["text"] = text
417
+ when "tool_use"
418
+ # Parse the accumulated JSON string
419
+ begin
420
+ parsed = JSON.parse(text)
421
+ block[:input] = parsed
422
+ block["input"] = parsed
423
+ rescue JSON::ParserError
424
+ block[:input] = text
425
+ block["input"] = text
426
+ end
427
+ end
428
+ end
429
+
430
+ # Convert blocks hash to sorted array
431
+ if content_blocks.any?
432
+ result[:content] = content_blocks.keys.sort.map { |idx| content_blocks[idx] }
433
+ end
434
+
435
+ result
436
+ end
437
+ end
438
+ end
439
+ end
@@ -13,6 +13,14 @@ rescue LoadError
13
13
  # OpenAI gem not installed - integration will not be available
14
14
  end
15
15
 
16
+ # Anthropic integration is optional - automatically loaded if anthropic gem is available
17
+ begin
18
+ require "anthropic"
19
+ require_relative "trace/contrib/anthropic"
20
+ rescue LoadError
21
+ # Anthropic gem not installed - integration will not be available
22
+ end
23
+
16
24
  module Braintrust
17
25
  module Trace
18
26
  # Set up OpenTelemetry tracing with Braintrust
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Braintrust
4
- VERSION = "0.0.2"
4
+ VERSION = "0.0.3"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: braintrust
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Braintrust
@@ -202,6 +202,7 @@ files:
202
202
  - lib/braintrust/logger.rb
203
203
  - lib/braintrust/state.rb
204
204
  - lib/braintrust/trace.rb
205
+ - lib/braintrust/trace/contrib/anthropic.rb
205
206
  - lib/braintrust/trace/contrib/openai.rb
206
207
  - lib/braintrust/trace/span_processor.rb
207
208
  - lib/braintrust/version.rb