braintrust 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 626876b443795d28b4ba5d12f8bf10381c3052d5d196adb01207d545303f3d1e
4
- data.tar.gz: 347ca89ea9f485ca6521a38c067bdd15074db4e6a4757523901888a8d4cc3e9c
3
+ metadata.gz: e1a5c8840f707c7b4da95e4ccc8abea32591606d667a309432f2955d5df26eca
4
+ data.tar.gz: a45e62f34a1d59dd11e1cc46ff8d128a495a45a80fa1ce2026c76b648b58de89
5
5
  SHA512:
6
- metadata.gz: 7b827a4f92e2bc4b39e41174e62dacdc431fdc0b6c8d13882bdcaaa369af9621174fc01bafa3d0d594b71464ea374c6904e9834631957951dad87d6583a58dc9
7
- data.tar.gz: bb6f2d3807765ef4ad591849e0972379fc3f97ef8d90bda0785e1d4dab87ce5e91d954d9d3c8fc7eff6c9295d120d6cbe07acb5bb348873c842d791a3fbdce84
6
+ metadata.gz: 75b71465a80ed2cfd3c6600113dd62357d01e0bd672f2043045f56d7d0223882cc2c5fd9f8927973ae546f99b68971bbe66fd34d66ec6fd62fafd65ca52abcd7
7
+ data.tar.gz: 06eb21fec07c05755aacd0a214cd16594f47a7033e723a2798df26692a0c15ccd9d5ed614588cc805620d303bb0f1a1c577c66b25c92c6dbbaa40283023cd662
data/README.md CHANGED
@@ -22,7 +22,7 @@ This is the official Ruby SDK for [Braintrust](https://www.braintrust.dev), for
22
22
  - [Viewing traces](#viewing-traces)
23
23
  - [Evals](#evals)
24
24
  - [Datasets](#datasets)
25
- - [Remote scorers](#remote-scorers)
25
+ - [Scorers](#scorers)
26
26
  - [Documentation](#documentation)
27
27
  - [Troubleshooting](#troubleshooting)
28
28
  - [Contributing](#contributing)
@@ -260,7 +260,7 @@ Braintrust::Eval.run(
260
260
 
261
261
  ### Datasets
262
262
 
263
- Load test cases from a Braintrust dataset:
263
+ Use test cases from a Braintrust dataset:
264
264
 
265
265
  ```ruby
266
266
  Braintrust::Eval.run(
@@ -271,7 +271,22 @@ Braintrust::Eval.run(
271
271
  )
272
272
  ```
273
273
 
274
- ### Remote scorers
274
+ Or define test cases inline with metadata and tags:
275
+
276
+ ```ruby
277
+ Braintrust::Eval.run(
278
+ project: "my-project",
279
+ experiment: "classifier-v1",
280
+ cases: [
281
+ {input: "apple", expected: "fruit", tags: ["produce"], metadata: {difficulty: "easy"}},
282
+ {input: "salmon", expected: "protein", tags: ["seafood"], metadata: {difficulty: "medium"}}
283
+ ],
284
+ task: ->(input) { classify(input) },
285
+ scorers: [...]
286
+ )
287
+ ```
288
+
289
+ ### Scorers
275
290
 
276
291
  Use scoring functions defined in Braintrust:
277
292
 
@@ -281,7 +296,22 @@ Braintrust::Eval.run(
281
296
  cases: [...],
282
297
  task: ->(input) { ... },
283
298
  scorers: [
284
- Braintrust::Scorer.remote("my-project", "accuracy-scorer")
299
+ Braintrust::Eval::Functions.scorer(project: "my-project", slug: "accuracy-scorer")
300
+ ]
301
+ )
302
+ ```
303
+
304
+ Or define scorers inline with `Eval.scorer`:
305
+
306
+ ```ruby
307
+ Braintrust::Eval.run(
308
+ project: "my-project",
309
+ cases: [...],
310
+ task: ->(input) { ... },
311
+ scorers: [
312
+ Braintrust::Eval.scorer("exact_match") do |input, expected, output|
313
+ output == expected ? 1.0 : 0.0
314
+ end
285
315
  ]
286
316
  )
287
317
  ```
@@ -4,6 +4,7 @@ require "net/http"
4
4
  require "json"
5
5
  require "uri"
6
6
  require_relative "../logger"
7
+ require_relative "../internal/http"
7
8
 
8
9
  module Braintrust
9
10
  class API
@@ -85,7 +86,7 @@ module Braintrust
85
86
  # @param id [String] Dataset UUID
86
87
  # @return [String] Permalink URL
87
88
  def permalink(id:)
88
- "#{@state.app_url}/app/#{@state.org_name}/object?object_type=dataset&object_id=#{id}"
89
+ @state.object_permalink(object_type: "dataset", object_id: id)
89
90
  end
90
91
 
91
92
  # Fetch records from dataset using BTQL
@@ -111,6 +112,7 @@ module Braintrust
111
112
  payload[:version] = version if version
112
113
 
113
114
  response = http_post_json_raw("/btql", payload)
115
+ Braintrust::Internal::Http.decompress_response!(response)
114
116
 
115
117
  # Parse JSONL response
116
118
  records = response.body.lines
@@ -158,9 +160,7 @@ module Braintrust
158
160
  start_time = Time.now
159
161
  Log.debug("[API] #{method.upcase} #{uri}")
160
162
 
161
- http = Net::HTTP.new(uri.host, uri.port)
162
- http.use_ssl = (uri.scheme == "https")
163
- response = http.request(request)
163
+ response = Braintrust::Internal::Http.with_redirects(uri, request)
164
164
 
165
165
  duration_ms = ((Time.now - start_time) * 1000).round(2)
166
166
  Log.debug("[API] #{method.upcase} #{uri} -> #{response.code} (#{duration_ms}ms, #{response.body.bytesize} bytes)")
@@ -4,6 +4,7 @@ require "net/http"
4
4
  require "json"
5
5
  require "uri"
6
6
  require_relative "../logger"
7
+ require_relative "../internal/http"
7
8
 
8
9
  module Braintrust
9
10
  class API
@@ -242,9 +243,7 @@ module Braintrust
242
243
  start_time = Time.now
243
244
  Log.debug("[API] #{method.upcase} #{uri}")
244
245
 
245
- http = Net::HTTP.new(uri.host, uri.port)
246
- http.use_ssl = (uri.scheme == "https")
247
- response = http.request(request)
246
+ response = Braintrust::Internal::Http.with_redirects(uri, request)
248
247
 
249
248
  duration_ms = ((Time.now - start_time) * 1000).round(2)
250
249
  Log.debug("[API] #{method.upcase} #{uri} -> #{response.code} (#{duration_ms}ms, #{response.body.bytesize} bytes)")
@@ -4,6 +4,7 @@ require "net/http"
4
4
  require "json"
5
5
  require "uri"
6
6
  require_relative "../../logger"
7
+ require_relative "../../internal/http"
7
8
 
8
9
  module Braintrust
9
10
  class API
@@ -44,12 +45,7 @@ module Braintrust
44
45
  request = Net::HTTP::Post.new(uri)
45
46
  request["Authorization"] = "Bearer #{api_key}"
46
47
 
47
- http = Net::HTTP.new(uri.hostname, uri.port)
48
- http.use_ssl = true if uri.scheme == "https"
49
-
50
- response = http.start do |http_session|
51
- http_session.request(request)
52
- end
48
+ response = Braintrust::Internal::Http.with_redirects(uri, request)
53
49
 
54
50
  Log.debug("Login: received response [#{response.code}]")
55
51
 
@@ -0,0 +1,56 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "net/http"
4
+ require "json"
5
+ require "uri"
6
+ require_relative "../../internal/http"
7
+
8
+ module Braintrust
9
+ class API
10
+ module Internal
11
+ # Internal Experiments API
12
+ # Not part of the public API - use through Eval.run
13
+ class Experiments
14
+ def initialize(state)
15
+ @state = state
16
+ end
17
+
18
+ # Create an experiment
19
+ # POST /v1/experiment
20
+ # @param name [String] Experiment name
21
+ # @param project_id [String] Project ID
22
+ # @param ensure_new [Boolean] If true (default), fail if exists; if false, return existing
23
+ # @param tags [Array<String>, nil] Optional tags
24
+ # @param metadata [Hash, nil] Optional metadata
25
+ # @return [Hash] Experiment data with "id", "name", "project_id", etc.
26
+ def create(name:, project_id:, ensure_new: true, tags: nil, metadata: nil,
27
+ dataset_id: nil, dataset_version: nil)
28
+ uri = URI("#{@state.api_url}/v1/experiment")
29
+
30
+ payload = {
31
+ project_id: project_id,
32
+ name: name,
33
+ ensure_new: ensure_new
34
+ }
35
+ payload[:tags] = tags if tags
36
+ payload[:metadata] = metadata if metadata
37
+ payload[:dataset_id] = dataset_id if dataset_id
38
+ payload[:dataset_version] = dataset_version if dataset_version
39
+
40
+ request = Net::HTTP::Post.new(uri)
41
+ request["Content-Type"] = "application/json"
42
+ request["Authorization"] = "Bearer #{@state.api_key}"
43
+ request.body = JSON.dump(payload)
44
+
45
+ response = Braintrust::Internal::Http.with_redirects(uri, request)
46
+
47
+ unless response.is_a?(Net::HTTPSuccess)
48
+ raise Error, "HTTP #{response.code} for POST #{uri}: #{response.body}"
49
+ end
50
+
51
+ JSON.parse(response.body)
52
+ end
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "net/http"
4
+ require "json"
5
+ require "uri"
6
+ require_relative "../../internal/http"
7
+
8
+ module Braintrust
9
+ class API
10
+ module Internal
11
+ # Internal Projects API
12
+ # Not part of the public API - use through Eval.run
13
+ class Projects
14
+ def initialize(state)
15
+ @state = state
16
+ end
17
+
18
+ # Create or get a project by name (idempotent)
19
+ # POST /v1/project
20
+ # @param name [String] Project name
21
+ # @return [Hash] Project data with "id", "name", "org_id", etc.
22
+ def create(name:)
23
+ uri = URI("#{@state.api_url}/v1/project")
24
+
25
+ request = Net::HTTP::Post.new(uri)
26
+ request["Content-Type"] = "application/json"
27
+ request["Authorization"] = "Bearer #{@state.api_key}"
28
+ request.body = JSON.dump({name: name})
29
+
30
+ response = Braintrust::Internal::Http.with_redirects(uri, request)
31
+
32
+ unless response.is_a?(Net::HTTPSuccess)
33
+ raise Error, "HTTP #{response.code} for POST #{uri}: #{response.body}"
34
+ end
35
+
36
+ JSON.parse(response.body)
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
@@ -25,5 +25,22 @@ module Braintrust
25
25
  def functions
26
26
  @functions ||= API::Functions.new(self)
27
27
  end
28
+
29
+ # Login to Braintrust API (idempotent)
30
+ # @return [self]
31
+ def login
32
+ @state.login
33
+ self
34
+ end
35
+
36
+ # Generate a permalink URL to view an object in the Braintrust UI
37
+ # This is for the /object endpoint (experiments, datasets, etc.)
38
+ # For trace span permalinks, use Trace.permalink instead.
39
+ # @param object_type [String] Type of object (e.g., "experiment", "dataset")
40
+ # @param object_id [String] Object UUID
41
+ # @return [String] Permalink URL
42
+ def object_permalink(object_type:, object_id:)
43
+ @state.object_permalink(object_type: object_type, object_id: object_id)
44
+ end
28
45
  end
29
46
  end
@@ -169,8 +169,8 @@ module Braintrust
169
169
  input_messages = []
170
170
 
171
171
  begin
172
- if params[:system]
173
- system_content = params[:system]
172
+ if params[:system_]
173
+ system_content = params[:system_]
174
174
  if system_content.is_a?(Array)
175
175
  system_text = system_content.map { |blk|
176
176
  blk.is_a?(Hash) ? blk[:text] : blk
@@ -98,8 +98,8 @@ module Braintrust
98
98
  def set_input(span, params)
99
99
  input_messages = []
100
100
 
101
- if params[:system]
102
- system_content = params[:system]
101
+ if params[:system_]
102
+ system_content = params[:system_]
103
103
  if system_content.is_a?(Array)
104
104
  system_text = system_content.map { |blk|
105
105
  blk.is_a?(Hash) ? blk[:text] : blk
@@ -0,0 +1,185 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "api"
4
+ require_relative "internal/origin"
5
+
6
+ module Braintrust
7
+ # High-level interface for working with Braintrust datasets.
8
+ # Provides both eager loading and lazy enumeration for efficient access to dataset records.
9
+ #
10
+ # @example Basic usage (uses global state)
11
+ # Braintrust.init(api_key: "...")
12
+ # dataset = Braintrust::Dataset.new(name: "my-dataset", project: "my-project")
13
+ # dataset.each { |record| puts record[:input] }
14
+ #
15
+ # @example With explicit API client
16
+ # api = Braintrust::API.new(state: my_state)
17
+ # dataset = Braintrust::Dataset.new(name: "my-dataset", project: "my-project", api: api)
18
+ #
19
+ # @example Eager loading for small datasets
20
+ # records = dataset.fetch_all(limit: 100)
21
+ #
22
+ # @example Using Enumerable methods
23
+ # dataset.take(10)
24
+ # dataset.select { |r| r[:tags]&.include?("important") }
25
+ #
26
+ # @example With version pinning
27
+ # dataset = Braintrust::Dataset.new(name: "my-dataset", project: "my-project", version: "1.0")
28
+ class Dataset
29
+ include Enumerable
30
+
31
+ # Default number of records to fetch per API page
32
+ DEFAULT_PAGE_SIZE = 1000
33
+
34
+ attr_reader :name, :project, :version
35
+
36
+ # Initialize a dataset reference
37
+ # @param name [String, nil] Dataset name (required if id not provided)
38
+ # @param id [String, nil] Dataset UUID (required if name not provided)
39
+ # @param project [String, nil] Project name (required if using name)
40
+ # @param version [String, nil] Optional version to pin to
41
+ # @param api [API, nil] Braintrust API client (defaults to API.new using global state)
42
+ def initialize(name: nil, id: nil, project: nil, version: nil, api: nil)
43
+ @name = name
44
+ @provided_id = id
45
+ @project = project
46
+ @version = version
47
+ @api = api || API.new
48
+ @resolved_id = nil
49
+ @metadata = nil
50
+
51
+ validate_params!
52
+ end
53
+
54
+ # Get the dataset ID, resolving from name if necessary
55
+ # @return [String] Dataset UUID
56
+ def id
57
+ return @provided_id if @provided_id
58
+ resolve_name! unless @resolved_id
59
+ @resolved_id
60
+ end
61
+
62
+ # Get the dataset metadata from the API
63
+ # Makes an API call if metadata hasn't been fetched yet.
64
+ # Note: When initialized with name, metadata is fetched during name resolution.
65
+ # When initialized with ID, this triggers a separate get_by_id call.
66
+ # @return [Hash] Dataset metadata including name, description, created, etc.
67
+ def metadata
68
+ fetch_metadata! unless @metadata
69
+ @metadata
70
+ end
71
+
72
+ # Fetch all records eagerly into an array
73
+ # @param limit [Integer, nil] Maximum records to return (nil for all)
74
+ # @return [Array<Hash>] Array of records with :input, :expected, :tags, :metadata, :origin
75
+ def fetch_all(limit: nil)
76
+ records = []
77
+ each_record(limit: limit) { |record| records << record }
78
+ records
79
+ end
80
+
81
+ # Iterate over records lazily (implements Enumerable)
82
+ # Fetches pages on demand for memory efficiency with large datasets.
83
+ # @yield [Hash] Each record with :input, :expected, :tags, :metadata, :origin
84
+ def each(&block)
85
+ return enum_for(:each) unless block_given?
86
+ each_record(&block)
87
+ end
88
+
89
+ private
90
+
91
+ def validate_params!
92
+ if @provided_id.nil? && @name.nil?
93
+ raise ArgumentError, "must specify either :name or :id"
94
+ end
95
+
96
+ if @name && @project.nil?
97
+ raise ArgumentError, ":project is required when using :name"
98
+ end
99
+ end
100
+
101
+ # Resolve dataset name to ID (also fetches metadata as side effect)
102
+ def resolve_name!
103
+ @metadata = @api.datasets.get(project_name: @project, name: @name)
104
+ @resolved_id = @metadata["id"]
105
+ end
106
+
107
+ # Fetch metadata explicitly (for when ID was provided directly)
108
+ def fetch_metadata!
109
+ if @provided_id
110
+ @metadata = @api.datasets.get_by_id(id: @provided_id)
111
+ else
112
+ resolve_name! unless @metadata
113
+ end
114
+ end
115
+
116
+ # Core iteration with pagination
117
+ # @param limit [Integer, nil] Maximum records to return
118
+ def each_record(limit: nil, &block)
119
+ dataset_id = id # Resolve once
120
+ cursor = nil
121
+ count = 0
122
+
123
+ loop do
124
+ page_limit = if limit
125
+ [DEFAULT_PAGE_SIZE, limit - count].min
126
+ else
127
+ DEFAULT_PAGE_SIZE
128
+ end
129
+
130
+ result = @api.datasets.fetch(
131
+ id: dataset_id,
132
+ limit: page_limit,
133
+ cursor: cursor,
134
+ version: @version
135
+ )
136
+
137
+ result[:records].each do |raw_record|
138
+ record = build_record(raw_record, dataset_id)
139
+ block.call(record)
140
+ count += 1
141
+ break if limit && count >= limit
142
+ end
143
+
144
+ # Stop if we've hit the limit or no more pages
145
+ break if limit && count >= limit
146
+
147
+ cursor = result[:cursor]
148
+ break unless cursor
149
+ end
150
+ end
151
+
152
+ # Build a normalized record hash from raw API response
153
+ # @param raw [Hash] Raw record from API
154
+ # @param dataset_id [String] Dataset ID for origin
155
+ # @return [Hash] Normalized record with origin
156
+ def build_record(raw, dataset_id)
157
+ record = {}
158
+ record[:input] = raw["input"] if raw.key?("input")
159
+ record[:expected] = raw["expected"] if raw.key?("expected")
160
+ record[:tags] = raw["tags"] if raw.key?("tags")
161
+ record[:metadata] = raw["metadata"] if raw.key?("metadata")
162
+
163
+ origin = build_origin(raw, dataset_id)
164
+ record[:origin] = origin if origin
165
+
166
+ record
167
+ end
168
+
169
+ # Build origin JSON for tracing/linking
170
+ # @param raw [Hash] Raw record from API
171
+ # @param dataset_id [String] Dataset ID (fallback if not in record)
172
+ # @return [String, nil] JSON-serialized origin, or nil if record lacks required fields
173
+ def build_origin(raw, dataset_id)
174
+ return nil unless raw["id"] && raw["_xact_id"]
175
+
176
+ Internal::Origin.to_json(
177
+ object_type: "dataset",
178
+ object_id: raw["dataset_id"] || dataset_id,
179
+ id: raw["id"],
180
+ xact_id: raw["_xact_id"],
181
+ created: raw["created"]
182
+ )
183
+ end
184
+ end
185
+ end
@@ -7,6 +7,8 @@ module Braintrust
7
7
  # @attr expected [Object, nil] The expected output (optional)
8
8
  # @attr tags [Array<String>, nil] Optional tags for filtering/grouping
9
9
  # @attr metadata [Hash, nil] Optional metadata for the case
10
- Case = Struct.new(:input, :expected, :tags, :metadata, keyword_init: true)
10
+ # @attr origin [Hash, nil] Origin pointer for cases from remote sources (e.g., datasets).
11
+ # Contains: object_type, object_id, id, _xact_id, created
12
+ Case = Struct.new(:input, :expected, :tags, :metadata, :origin, keyword_init: true)
11
13
  end
12
14
  end
@@ -98,9 +98,18 @@ module Braintrust
98
98
  # The remote scorer receives all scorer arguments
99
99
  result = api.functions.invoke(id: function_id, input: scorer_input)
100
100
 
101
- # Parse result as float score
102
- # The remote function should return a number
103
- score = result.is_a?(Numeric) ? result.to_f : result.to_s.to_f
101
+ score = case result
102
+ when Hash
103
+ if result.key?("score")
104
+ result["score"].to_f
105
+ else
106
+ raise Error, "Hash result must contain 'score' key"
107
+ end
108
+ when String
109
+ result.to_f
110
+ else
111
+ raise Error, "Unsupported result type: #{result.class}"
112
+ end
104
113
 
105
114
  span.set_attribute("braintrust.output_json", JSON.dump(score))
106
115
  score
@@ -18,14 +18,14 @@ module Braintrust
18
18
  MAX_PARALLELISM = Internal::ThreadPool::MAX_PARALLELISM
19
19
 
20
20
  def initialize(experiment_id:, experiment_name:, project_id:, project_name:,
21
- task:, scorers:, state:, tracer_provider: nil)
21
+ task:, scorers:, api:, tracer_provider: nil)
22
22
  @experiment_id = experiment_id
23
23
  @experiment_name = experiment_name
24
24
  @project_id = project_id
25
25
  @project_name = project_name
26
26
  @task = task
27
27
  @scorers = normalize_scorers(scorers)
28
- @state = state
28
+ @api = api
29
29
  @tracer_provider = tracer_provider || OpenTelemetry.tracer_provider
30
30
  @tracer = @tracer_provider.tracer("braintrust-eval")
31
31
  @parent_attr = "experiment_id:#{experiment_id}"
@@ -61,7 +61,7 @@ module Braintrust
61
61
  duration = Time.now - start_time
62
62
 
63
63
  # Generate permalink
64
- permalink = "#{state.app_url}/app/#{state.org_name}/object?object_type=experiment&object_id=#{experiment_id}"
64
+ permalink = @api.object_permalink(object_type: "experiment", object_id: experiment_id)
65
65
 
66
66
  Result.new(
67
67
  experiment_id: experiment_id,
@@ -78,7 +78,7 @@ module Braintrust
78
78
  private
79
79
 
80
80
  attr_reader :experiment_id, :experiment_name, :project_id, :project_name,
81
- :task, :scorers, :state, :tracer, :parent_attr
81
+ :task, :scorers, :tracer, :parent_attr
82
82
 
83
83
  # Run a single test case with OpenTelemetry tracing
84
84
  # Creates eval span (parent) with task and score as children
@@ -116,6 +116,9 @@ module Braintrust
116
116
  set_json_attr(eval_span, "braintrust.input_json", test_case.input)
117
117
  set_json_attr(eval_span, "braintrust.output_json", output)
118
118
  set_json_attr(eval_span, "braintrust.expected", test_case.expected) if test_case.expected
119
+
120
+ # Set origin for cases from remote sources (already JSON-serialized)
121
+ eval_span.set_attribute("braintrust.origin", test_case.origin) if test_case.origin
119
122
  end
120
123
  end
121
124
 
@@ -2,7 +2,9 @@
2
2
 
3
3
  require_relative "eval/scorer"
4
4
  require_relative "eval/runner"
5
- require_relative "internal/experiments"
5
+ require_relative "api/internal/projects"
6
+ require_relative "api/internal/experiments"
7
+ require_relative "dataset"
6
8
 
7
9
  require "opentelemetry/sdk"
8
10
  require "json"
@@ -199,39 +201,53 @@ module Braintrust
199
201
  # @param metadata [Hash] Optional experiment metadata
200
202
  # @param update [Boolean] If true, allow reusing existing experiment (default: false)
201
203
  # @param quiet [Boolean] If true, suppress result output (default: false)
202
- # @param state [State, nil] Braintrust state (defaults to global state)
204
+ # @param api [API, nil] Braintrust API client (defaults to API.new using global state)
203
205
  # @param tracer_provider [TracerProvider, nil] OpenTelemetry tracer provider (defaults to global)
204
206
  # @return [Result]
205
207
  def run(project:, experiment:, task:, scorers:,
206
208
  cases: nil, dataset: nil,
207
209
  parallelism: 1, tags: nil, metadata: nil, update: false, quiet: false,
208
- state: nil, tracer_provider: nil)
210
+ api: nil, tracer_provider: nil)
209
211
  # Validate required parameters
210
212
  validate_params!(project: project, experiment: experiment,
211
213
  cases: cases, dataset: dataset, task: task, scorers: scorers)
212
214
 
213
- # Get state from parameter or global
214
- state ||= Braintrust.current_state
215
- raise Error, "No state available" unless state
215
+ # Get API from parameter or create from global state
216
+ api ||= API.new
216
217
 
217
- # Ensure state is logged in (to populate org_name, etc.)
218
+ # Ensure logged in (to populate org_name, etc.)
218
219
  # login is idempotent and returns early if already logged in
219
- state.login
220
+ api.login
220
221
 
221
222
  # Resolve dataset to cases if dataset parameter provided
223
+ dataset_id = nil
224
+ dataset_version = nil
225
+
222
226
  if dataset
223
- cases = resolve_dataset(dataset, project, state)
227
+ resolved = resolve_dataset(dataset, project, api)
228
+ cases = resolved[:cases]
229
+ dataset_id = resolved[:dataset_id]
230
+ dataset_version = resolved[:dataset_version]
224
231
  end
225
232
 
226
- # Register project and experiment via API
227
- result = Internal::Experiments.get_or_create(
228
- experiment, project, state: state,
229
- tags: tags, metadata: metadata, update: update
233
+ # Register project and experiment via internal API
234
+ projects_api = API::Internal::Projects.new(api.state)
235
+ experiments_api = API::Internal::Experiments.new(api.state)
236
+
237
+ project_result = projects_api.create(name: project)
238
+ experiment_result = experiments_api.create(
239
+ name: experiment,
240
+ project_id: project_result["id"],
241
+ ensure_new: !update,
242
+ tags: tags,
243
+ metadata: metadata,
244
+ dataset_id: dataset_id,
245
+ dataset_version: dataset_version
230
246
  )
231
247
 
232
- experiment_id = result[:experiment_id]
233
- project_id = result[:project_id]
234
- project_name = result[:project_name]
248
+ experiment_id = experiment_result["id"]
249
+ project_id = project_result["id"]
250
+ project_name = project_result["name"]
235
251
 
236
252
  # Instantiate Runner and run evaluation
237
253
  runner = Runner.new(
@@ -241,7 +257,7 @@ module Braintrust
241
257
  project_name: project_name,
242
258
  task: task,
243
259
  scorers: scorers,
244
- state: state,
260
+ api: api,
245
261
  tracer_provider: tracer_provider
246
262
  )
247
263
  result = runner.run(cases, parallelism: parallelism)
@@ -284,85 +300,38 @@ module Braintrust
284
300
  end
285
301
  end
286
302
 
287
- # Resolve dataset parameter to an array of case records
288
- # @param dataset [String, Hash] Dataset specifier
289
- # @param project [String] Project name (used as default if not specified in hash)
290
- # @param state [State] Braintrust state
291
- # @return [Array<Hash>] Array of case records
292
- def resolve_dataset(dataset, project, state)
293
- require_relative "api"
303
+ # Resolve dataset parameter to cases with metadata for experiment linking
304
+ # @param dataset [String, Hash, Dataset] Dataset specifier or instance
305
+ # @param project [String] Project name (used as default if not specified)
306
+ # @param api [API] Braintrust API client
307
+ # @return [Hash] Hash with :cases, :dataset_id, and :dataset_version
308
+ def resolve_dataset(dataset, project, api)
309
+ limit = nil
294
310
 
295
- # Parse dataset parameter
296
- dataset_opts = case dataset
311
+ dataset_obj = case dataset
312
+ when Dataset
313
+ dataset
297
314
  when String
298
- # String: dataset name in same project
299
- {name: dataset, project: project}
315
+ Dataset.new(name: dataset, project: project, api: api)
300
316
  when Hash
301
- # Hash: explicit options
302
- dataset.dup
303
- else
304
- raise ArgumentError, "dataset must be String or Hash, got #{dataset.class}"
305
- end
306
-
307
- # Apply defaults
308
- dataset_opts[:project] ||= project
309
-
310
- # Create API client
311
- api = API.new(state: state)
312
-
313
- # Resolve dataset ID
314
- dataset_id = if dataset_opts[:id]
315
- # ID provided directly
316
- dataset_opts[:id]
317
- elsif dataset_opts[:name]
318
- # Fetch by name + project
319
- metadata = api.datasets.get(
320
- project_name: dataset_opts[:project],
321
- name: dataset_opts[:name]
322
- )
323
- metadata["id"]
317
+ opts = dataset.dup
318
+ limit = opts.delete(:limit)
319
+ opts[:project] ||= project
320
+ opts[:api] = api
321
+ Dataset.new(**opts)
324
322
  else
325
- raise ArgumentError, "dataset hash must specify either :name or :id"
323
+ raise ArgumentError, "dataset must be String, Hash, or Dataset, got #{dataset.class}"
326
324
  end
327
325
 
328
- # Fetch records with pagination
329
- limit_per_page = 1000
330
- max_records = dataset_opts[:limit]
331
- version = dataset_opts[:version]
332
- records = []
333
- cursor = nil
334
-
335
- loop do
336
- result = api.datasets.fetch(
337
- id: dataset_id,
338
- limit: limit_per_page,
339
- cursor: cursor,
340
- version: version
341
- )
342
-
343
- records.concat(result[:records])
326
+ cases = dataset_obj.fetch_all(limit: limit)
344
327
 
345
- # Check if we've hit the user-specified limit
346
- if max_records && records.length >= max_records
347
- records = records.take(max_records)
348
- break
349
- end
328
+ # Use pinned version if available, otherwise compute from max(_xact_id)
329
+ version = dataset_obj.version
330
+ version ||= cases
331
+ .filter_map { |c| c[:origin] && JSON.parse(c[:origin])["_xact_id"] }
332
+ .max
350
333
 
351
- # Check if there's more data
352
- cursor = result[:cursor]
353
- break unless cursor
354
- end
355
-
356
- # Filter records to only include Case-compatible fields
357
- # Case accepts: input, expected, tags, metadata
358
- records.map do |record|
359
- filtered = {}
360
- filtered[:input] = record["input"] if record.key?("input")
361
- filtered[:expected] = record["expected"] if record.key?("expected")
362
- filtered[:tags] = record["tags"] if record.key?("tags")
363
- filtered[:metadata] = record["metadata"] if record.key?("metadata")
364
- filtered
365
- end
334
+ {cases: cases, dataset_id: dataset_obj.id, dataset_version: version}
366
335
  end
367
336
  end
368
337
  end
@@ -0,0 +1,97 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "net/http"
4
+ require "uri"
5
+ require "zlib"
6
+ require "stringio"
7
+ require_relative "../logger"
8
+
9
+ module Braintrust
10
+ module Internal
11
+ # HTTP utilities for redirect following and response decompression.
12
+ # Drop-in enhancement for raw Net::HTTP request calls throughout the SDK.
13
+ module Http
14
+ DEFAULT_MAX_REDIRECTS = 5
15
+
16
+ # Execute an HTTP request, following redirects as needed.
17
+ #
18
+ # @param uri [URI] The request URI
19
+ # @param request [Net::HTTPRequest] The prepared request object
20
+ # @param max_redirects [Integer] Maximum number of redirects to follow
21
+ # @return [Net::HTTPResponse] The final response
22
+ # @raise [Braintrust::Error] On too many redirects or missing Location header
23
+ def self.with_redirects(uri, request, max_redirects: DEFAULT_MAX_REDIRECTS)
24
+ response = perform_request(uri, request)
25
+
26
+ redirects = 0
27
+ original_request = request
28
+
29
+ while response.is_a?(Net::HTTPRedirection)
30
+ redirects += 1
31
+ if redirects > max_redirects
32
+ raise Error, "Too many redirects (max #{max_redirects})"
33
+ end
34
+
35
+ location = response["location"]
36
+ unless location
37
+ raise Error, "Redirect response #{response.code} without Location header"
38
+ end
39
+
40
+ redirect_uri = URI(location)
41
+ redirect_uri = uri + redirect_uri unless redirect_uri.host
42
+
43
+ Log.debug("[HTTP] Following #{response.code} redirect to #{redirect_uri}")
44
+
45
+ request = build_redirect_request(response, redirect_uri, original_request, uri)
46
+ uri = redirect_uri
47
+ response = perform_request(uri, request)
48
+ end
49
+
50
+ response
51
+ end
52
+
53
+ # Decompress an HTTP response body in place based on Content-Encoding.
54
+ # No-op if the response has no recognized encoding.
55
+ #
56
+ # @param response [Net::HTTPResponse] The response to decompress
57
+ # @return [void]
58
+ def self.decompress_response!(response)
59
+ encoding = response["content-encoding"]&.downcase
60
+ case encoding
61
+ when "gzip", "x-gzip"
62
+ gz = Zlib::GzipReader.new(StringIO.new(response.body))
63
+ response.body.replace(gz.read)
64
+ gz.close
65
+ response.delete("content-encoding")
66
+ end
67
+ end
68
+
69
+ def self.perform_request(uri, request)
70
+ http = Net::HTTP.new(uri.host, uri.port)
71
+ http.use_ssl = (uri.scheme == "https")
72
+ http.request(request)
73
+ end
74
+ private_class_method :perform_request
75
+
76
+ def self.build_redirect_request(response, redirect_uri, original_request, original_uri)
77
+ if response.code == "307" || response.code == "308"
78
+ request = original_request.class.new(redirect_uri)
79
+ request.body = original_request.body
80
+ request["Content-Type"] = original_request["Content-Type"] if original_request["Content-Type"]
81
+ else
82
+ # 301, 302, 303: follow with GET, no body
83
+ request = Net::HTTP::Get.new(redirect_uri)
84
+ end
85
+
86
+ # Strip Authorization when redirecting to a different host (e.g. S3)
87
+ if original_uri.host == redirect_uri.host
88
+ auth = original_request["Authorization"]
89
+ request["Authorization"] = auth if auth
90
+ end
91
+
92
+ request
93
+ end
94
+ private_class_method :build_redirect_request
95
+ end
96
+ end
97
+ end
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+
5
+ module Braintrust
6
+ module Internal
7
+ # Origin provides serialization for source object pointers in Braintrust.
8
+ # Used internally to link spans back to their source records (e.g., dataset rows).
9
+ module Origin
10
+ # Serialize an origin pointer to JSON
11
+ # @param object_type [String] Type of source object (e.g., "dataset", "playground_logs")
12
+ # @param object_id [String] ID of the source object
13
+ # @param id [String] ID of the specific record within the source
14
+ # @param xact_id [String] Transaction ID
15
+ # @param created [String, nil] Creation timestamp
16
+ # @return [String] JSON-serialized origin
17
+ def self.to_json(object_type:, object_id:, id:, xact_id:, created:)
18
+ JSON.dump({
19
+ object_type: object_type,
20
+ object_id: object_id,
21
+ id: id,
22
+ _xact_id: xact_id,
23
+ created: created
24
+ })
25
+ end
26
+ end
27
+ end
28
+ end
@@ -139,6 +139,16 @@ module Braintrust
139
139
  end
140
140
  end
141
141
 
142
+ # Generate a permalink URL to view an object in the Braintrust UI
143
+ # This is for the /object endpoint (experiments, datasets, etc.)
144
+ # For trace span permalinks, use Trace.permalink instead.
145
+ # @param object_type [String] Type of object (e.g., "experiment", "dataset")
146
+ # @param object_id [String] Object UUID
147
+ # @return [String] Permalink URL
148
+ def object_permalink(object_type:, object_id:)
149
+ "#{@app_url}/app/#{@org_name}/object?object_type=#{object_type}&object_id=#{object_id}"
150
+ end
151
+
142
152
  # Login to Braintrust API in a background thread with retry logic
143
153
  # Retries indefinitely with exponential backoff until success
144
154
  # Idempotent: returns early if already logged in
@@ -2,6 +2,7 @@
2
2
 
3
3
  require "net/http"
4
4
  require_relative "../internal/encoding"
5
+ require_relative "../internal/http"
5
6
  require "uri"
6
7
 
7
8
  module Braintrust
@@ -91,7 +92,8 @@ module Braintrust
91
92
  # att = Braintrust::Trace::Attachment.from_url("https://example.com/image.png")
92
93
  def self.from_url(url)
93
94
  uri = URI.parse(url)
94
- response = Net::HTTP.get_response(uri)
95
+ request = Net::HTTP::Get.new(uri)
96
+ response = Braintrust::Internal::Http.with_redirects(uri, request)
95
97
 
96
98
  unless response.is_a?(Net::HTTPSuccess)
97
99
  raise StandardError, "Failed to fetch URL: #{response.code} #{response.message}"
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Braintrust
4
- VERSION = "0.1.2"
4
+ VERSION = "0.1.4"
5
5
  end
data/lib/braintrust.rb CHANGED
@@ -6,7 +6,7 @@ require_relative "braintrust/state"
6
6
  require_relative "braintrust/trace"
7
7
  require_relative "braintrust/api"
8
8
  require_relative "braintrust/prompt"
9
- require_relative "braintrust/internal/experiments"
9
+ require_relative "braintrust/dataset"
10
10
  require_relative "braintrust/internal/env"
11
11
  require_relative "braintrust/eval"
12
12
  require_relative "braintrust/contrib"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: braintrust
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Braintrust
@@ -193,6 +193,8 @@ files:
193
193
  - lib/braintrust/api/datasets.rb
194
194
  - lib/braintrust/api/functions.rb
195
195
  - lib/braintrust/api/internal/auth.rb
196
+ - lib/braintrust/api/internal/experiments.rb
197
+ - lib/braintrust/api/internal/projects.rb
196
198
  - lib/braintrust/config.rb
197
199
  - lib/braintrust/contrib.rb
198
200
  - lib/braintrust/contrib/anthropic/deprecated.rb
@@ -228,6 +230,7 @@ files:
228
230
  - lib/braintrust/contrib/setup.rb
229
231
  - lib/braintrust/contrib/support/openai.rb
230
232
  - lib/braintrust/contrib/support/otel.rb
233
+ - lib/braintrust/dataset.rb
231
234
  - lib/braintrust/eval.rb
232
235
  - lib/braintrust/eval/case.rb
233
236
  - lib/braintrust/eval/cases.rb
@@ -239,7 +242,8 @@ files:
239
242
  - lib/braintrust/eval/summary.rb
240
243
  - lib/braintrust/internal/encoding.rb
241
244
  - lib/braintrust/internal/env.rb
242
- - lib/braintrust/internal/experiments.rb
245
+ - lib/braintrust/internal/http.rb
246
+ - lib/braintrust/internal/origin.rb
243
247
  - lib/braintrust/internal/template.rb
244
248
  - lib/braintrust/internal/thread_pool.rb
245
249
  - lib/braintrust/internal/time.rb
@@ -1,129 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "net/http"
4
- require "json"
5
- require "uri"
6
- require_relative "../logger"
7
-
8
- module Braintrust
9
- module Internal
10
- # Experiments module provides internal API methods for registering projects and experiments
11
- # Methods are marked private to prevent direct user access - use through Eval.run
12
- module Experiments
13
- # Public convenience method to register/get both project and experiment
14
- # @param experiment_name [String] The experiment name
15
- # @param project_name [String] The project name
16
- # @param state [State] Braintrust state with API key and URL
17
- # @param tags [Array<String>, nil] Optional experiment tags
18
- # @param metadata [Hash, nil] Optional experiment metadata
19
- # @param update [Boolean] If true, allow reusing existing experiment (default: false)
20
- # @return [Hash] Hash with :experiment_id, :experiment_name, :project_id, :project_name
21
- def self.get_or_create(experiment_name, project_name, state:,
22
- tags: nil, metadata: nil, update: false)
23
- # Register/get project first
24
- project = register_project(project_name, state)
25
-
26
- # Then register/get experiment
27
- experiment = register_experiment(
28
- experiment_name,
29
- project["id"],
30
- state,
31
- tags: tags,
32
- metadata: metadata,
33
- update: update
34
- )
35
-
36
- {
37
- experiment_id: experiment["id"],
38
- experiment_name: experiment["name"],
39
- project_id: project["id"],
40
- project_name: project["name"]
41
- }
42
- end
43
-
44
- # Register or get a project by name
45
- # POST /v1/project with {name: "project-name"}
46
- # Returns existing project if already exists
47
- # @param name [String] Project name
48
- # @param state [State] Braintrust state
49
- # @return [Hash] Project data with "id", "name", "org_id", etc.
50
- # @raise [Braintrust::Error] if API call fails
51
- def self.register_project(name, state)
52
- Log.debug("Registering project: #{name}")
53
-
54
- uri = URI("#{state.api_url}/v1/project")
55
- request = Net::HTTP::Post.new(uri)
56
- request["Content-Type"] = "application/json"
57
- request["Authorization"] = "Bearer #{state.api_key}"
58
- request.body = JSON.dump({name: name})
59
-
60
- http = Net::HTTP.new(uri.hostname, uri.port)
61
- http.use_ssl = true if uri.scheme == "https"
62
-
63
- response = http.start do |http_session|
64
- http_session.request(request)
65
- end
66
-
67
- Log.debug("Register project response: [#{response.code}]")
68
-
69
- # Handle response codes
70
- unless response.is_a?(Net::HTTPSuccess)
71
- raise Error, "Failed to register project '#{name}': [#{response.code}] #{response.body}"
72
- end
73
-
74
- project = JSON.parse(response.body)
75
- Log.debug("Project registered: #{project["id"]} (#{project["name"]})")
76
- project
77
- end
78
- private_class_method :register_project
79
-
80
- # Register or get an experiment by name
81
- # POST /v1/experiment with {project_id:, name:, ensure_new:, tags:[], metadata:{}}
82
- # @param name [String] Experiment name
83
- # @param project_id [String] Project ID
84
- # @param state [State] Braintrust state
85
- # @param tags [Array<String>, nil] Optional tags
86
- # @param metadata [Hash, nil] Optional metadata
87
- # @param update [Boolean] If true, allow reusing existing experiment (ensure_new: false)
88
- # @return [Hash] Experiment data with "id", "name", "project_id", etc.
89
- # @raise [Braintrust::Error] if API call fails
90
- def self.register_experiment(name, project_id, state, tags: nil, metadata: nil, update: false)
91
- Log.debug("Registering experiment: #{name} (project: #{project_id}, update: #{update})")
92
-
93
- uri = URI("#{state.api_url}/v1/experiment")
94
- request = Net::HTTP::Post.new(uri)
95
- request["Content-Type"] = "application/json"
96
- request["Authorization"] = "Bearer #{state.api_key}"
97
-
98
- payload = {
99
- project_id: project_id,
100
- name: name,
101
- ensure_new: !update # When update=true, allow reusing existing experiment
102
- }
103
- payload[:tags] = tags if tags
104
- payload[:metadata] = metadata if metadata
105
-
106
- request.body = JSON.dump(payload)
107
-
108
- http = Net::HTTP.new(uri.hostname, uri.port)
109
- http.use_ssl = true if uri.scheme == "https"
110
-
111
- response = http.start do |http_session|
112
- http_session.request(request)
113
- end
114
-
115
- Log.debug("Register experiment response: [#{response.code}]")
116
-
117
- # Handle response codes
118
- unless response.is_a?(Net::HTTPSuccess)
119
- raise Error, "Failed to register experiment '#{name}': [#{response.code}] #{response.body}"
120
- end
121
-
122
- experiment = JSON.parse(response.body)
123
- Log.debug("Experiment registered: #{experiment["id"]} (#{experiment["name"]})")
124
- experiment
125
- end
126
- private_class_method :register_experiment
127
- end
128
- end
129
- end