braintrust 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 626876b443795d28b4ba5d12f8bf10381c3052d5d196adb01207d545303f3d1e
4
- data.tar.gz: 347ca89ea9f485ca6521a38c067bdd15074db4e6a4757523901888a8d4cc3e9c
3
+ metadata.gz: 83ff9b69dc144333dba85a5f68e5a40d482ca99b3cae4bb55abe24ef2d05c296
4
+ data.tar.gz: 1a52913de27b3536c7881203f91d3d6050d53e66afeb90900d3c8a04b180951d
5
5
  SHA512:
6
- metadata.gz: 7b827a4f92e2bc4b39e41174e62dacdc431fdc0b6c8d13882bdcaaa369af9621174fc01bafa3d0d594b71464ea374c6904e9834631957951dad87d6583a58dc9
7
- data.tar.gz: bb6f2d3807765ef4ad591849e0972379fc3f97ef8d90bda0785e1d4dab87ce5e91d954d9d3c8fc7eff6c9295d120d6cbe07acb5bb348873c842d791a3fbdce84
6
+ metadata.gz: fb7da28ba278c6a1cff5bd143e28808c723f1bf1507a6fe73d55b76f81d17e74ffc62f5c5dde030a1a5101797f3399592d13d525d46ad39b6b96047f7e47a3d6
7
+ data.tar.gz: d49db21d70faba9e3e9b59a61b88d55897cd5420ef636ae18c7b68a4c50d1428be2fbe0c0efc0b26f3a159bdbfb629025f0cc1aa8489187ecf5b586d57c8e1d2
data/README.md CHANGED
@@ -22,7 +22,7 @@ This is the official Ruby SDK for [Braintrust](https://www.braintrust.dev), for
22
22
  - [Viewing traces](#viewing-traces)
23
23
  - [Evals](#evals)
24
24
  - [Datasets](#datasets)
25
- - [Remote scorers](#remote-scorers)
25
+ - [Scorers](#scorers)
26
26
  - [Documentation](#documentation)
27
27
  - [Troubleshooting](#troubleshooting)
28
28
  - [Contributing](#contributing)
@@ -260,7 +260,7 @@ Braintrust::Eval.run(
260
260
 
261
261
  ### Datasets
262
262
 
263
- Load test cases from a Braintrust dataset:
263
+ Use test cases from a Braintrust dataset:
264
264
 
265
265
  ```ruby
266
266
  Braintrust::Eval.run(
@@ -271,7 +271,22 @@ Braintrust::Eval.run(
271
271
  )
272
272
  ```
273
273
 
274
- ### Remote scorers
274
+ Or define test cases inline with metadata and tags:
275
+
276
+ ```ruby
277
+ Braintrust::Eval.run(
278
+ project: "my-project",
279
+ experiment: "classifier-v1",
280
+ cases: [
281
+ {input: "apple", expected: "fruit", tags: ["produce"], metadata: {difficulty: "easy"}},
282
+ {input: "salmon", expected: "protein", tags: ["seafood"], metadata: {difficulty: "medium"}}
283
+ ],
284
+ task: ->(input) { classify(input) },
285
+ scorers: [...]
286
+ )
287
+ ```
288
+
289
+ ### Scorers
275
290
 
276
291
  Use scoring functions defined in Braintrust:
277
292
 
@@ -281,7 +296,22 @@ Braintrust::Eval.run(
281
296
  cases: [...],
282
297
  task: ->(input) { ... },
283
298
  scorers: [
284
- Braintrust::Scorer.remote("my-project", "accuracy-scorer")
299
+ Braintrust::Eval::Functions.scorer(project: "my-project", slug: "accuracy-scorer")
300
+ ]
301
+ )
302
+ ```
303
+
304
+ Or define scorers inline with `Eval.scorer`:
305
+
306
+ ```ruby
307
+ Braintrust::Eval.run(
308
+ project: "my-project",
309
+ cases: [...],
310
+ task: ->(input) { ... },
311
+ scorers: [
312
+ Braintrust::Eval.scorer("exact_match") do |input, expected, output|
313
+ output == expected ? 1.0 : 0.0
314
+ end
285
315
  ]
286
316
  )
287
317
  ```
@@ -85,7 +85,7 @@ module Braintrust
85
85
  # @param id [String] Dataset UUID
86
86
  # @return [String] Permalink URL
87
87
  def permalink(id:)
88
- "#{@state.app_url}/app/#{@state.org_name}/object?object_type=dataset&object_id=#{id}"
88
+ @state.object_permalink(object_type: "dataset", object_id: id)
89
89
  end
90
90
 
91
91
  # Fetch records from dataset using BTQL
@@ -0,0 +1,54 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "net/http"
4
+ require "json"
5
+ require "uri"
6
+
7
+ module Braintrust
8
+ class API
9
+ module Internal
10
+ # Internal Experiments API
11
+ # Not part of the public API - use through Eval.run
12
+ class Experiments
13
+ def initialize(state)
14
+ @state = state
15
+ end
16
+
17
+ # Create an experiment
18
+ # POST /v1/experiment
19
+ # @param name [String] Experiment name
20
+ # @param project_id [String] Project ID
21
+ # @param ensure_new [Boolean] If true (default), fail if exists; if false, return existing
22
+ # @param tags [Array<String>, nil] Optional tags
23
+ # @param metadata [Hash, nil] Optional metadata
24
+ # @return [Hash] Experiment data with "id", "name", "project_id", etc.
25
+ def create(name:, project_id:, ensure_new: true, tags: nil, metadata: nil)
26
+ uri = URI("#{@state.api_url}/v1/experiment")
27
+
28
+ payload = {
29
+ project_id: project_id,
30
+ name: name,
31
+ ensure_new: ensure_new
32
+ }
33
+ payload[:tags] = tags if tags
34
+ payload[:metadata] = metadata if metadata
35
+
36
+ request = Net::HTTP::Post.new(uri)
37
+ request["Content-Type"] = "application/json"
38
+ request["Authorization"] = "Bearer #{@state.api_key}"
39
+ request.body = JSON.dump(payload)
40
+
41
+ http = Net::HTTP.new(uri.host, uri.port)
42
+ http.use_ssl = (uri.scheme == "https")
43
+ response = http.request(request)
44
+
45
+ unless response.is_a?(Net::HTTPSuccess)
46
+ raise Error, "HTTP #{response.code} for POST #{uri}: #{response.body}"
47
+ end
48
+
49
+ JSON.parse(response.body)
50
+ end
51
+ end
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,42 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "net/http"
4
+ require "json"
5
+ require "uri"
6
+
7
+ module Braintrust
8
+ class API
9
+ module Internal
10
+ # Internal Projects API
11
+ # Not part of the public API - use through Eval.run
12
+ class Projects
13
+ def initialize(state)
14
+ @state = state
15
+ end
16
+
17
+ # Create or get a project by name (idempotent)
18
+ # POST /v1/project
19
+ # @param name [String] Project name
20
+ # @return [Hash] Project data with "id", "name", "org_id", etc.
21
+ def create(name:)
22
+ uri = URI("#{@state.api_url}/v1/project")
23
+
24
+ request = Net::HTTP::Post.new(uri)
25
+ request["Content-Type"] = "application/json"
26
+ request["Authorization"] = "Bearer #{@state.api_key}"
27
+ request.body = JSON.dump({name: name})
28
+
29
+ http = Net::HTTP.new(uri.host, uri.port)
30
+ http.use_ssl = (uri.scheme == "https")
31
+ response = http.request(request)
32
+
33
+ unless response.is_a?(Net::HTTPSuccess)
34
+ raise Error, "HTTP #{response.code} for POST #{uri}: #{response.body}"
35
+ end
36
+
37
+ JSON.parse(response.body)
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
@@ -25,5 +25,22 @@ module Braintrust
25
25
  def functions
26
26
  @functions ||= API::Functions.new(self)
27
27
  end
28
+
29
+ # Login to Braintrust API (idempotent)
30
+ # @return [self]
31
+ def login
32
+ @state.login
33
+ self
34
+ end
35
+
36
+ # Generate a permalink URL to view an object in the Braintrust UI
37
+ # This is for the /object endpoint (experiments, datasets, etc.)
38
+ # For trace span permalinks, use Trace.permalink instead.
39
+ # @param object_type [String] Type of object (e.g., "experiment", "dataset")
40
+ # @param object_id [String] Object UUID
41
+ # @return [String] Permalink URL
42
+ def object_permalink(object_type:, object_id:)
43
+ @state.object_permalink(object_type: object_type, object_id: object_id)
44
+ end
28
45
  end
29
46
  end
@@ -169,8 +169,8 @@ module Braintrust
169
169
  input_messages = []
170
170
 
171
171
  begin
172
- if params[:system]
173
- system_content = params[:system]
172
+ if params[:system_]
173
+ system_content = params[:system_]
174
174
  if system_content.is_a?(Array)
175
175
  system_text = system_content.map { |blk|
176
176
  blk.is_a?(Hash) ? blk[:text] : blk
@@ -98,8 +98,8 @@ module Braintrust
98
98
  def set_input(span, params)
99
99
  input_messages = []
100
100
 
101
- if params[:system]
102
- system_content = params[:system]
101
+ if params[:system_]
102
+ system_content = params[:system_]
103
103
  if system_content.is_a?(Array)
104
104
  system_text = system_content.map { |blk|
105
105
  blk.is_a?(Hash) ? blk[:text] : blk
@@ -0,0 +1,185 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "api"
4
+ require_relative "internal/origin"
5
+
6
+ module Braintrust
7
+ # High-level interface for working with Braintrust datasets.
8
+ # Provides both eager loading and lazy enumeration for efficient access to dataset records.
9
+ #
10
+ # @example Basic usage (uses global state)
11
+ # Braintrust.init(api_key: "...")
12
+ # dataset = Braintrust::Dataset.new(name: "my-dataset", project: "my-project")
13
+ # dataset.each { |record| puts record[:input] }
14
+ #
15
+ # @example With explicit API client
16
+ # api = Braintrust::API.new(state: my_state)
17
+ # dataset = Braintrust::Dataset.new(name: "my-dataset", project: "my-project", api: api)
18
+ #
19
+ # @example Eager loading for small datasets
20
+ # records = dataset.fetch_all(limit: 100)
21
+ #
22
+ # @example Using Enumerable methods
23
+ # dataset.take(10)
24
+ # dataset.select { |r| r[:tags]&.include?("important") }
25
+ #
26
+ # @example With version pinning
27
+ # dataset = Braintrust::Dataset.new(name: "my-dataset", project: "my-project", version: "1.0")
28
+ class Dataset
29
+ include Enumerable
30
+
31
+ # Default number of records to fetch per API page
32
+ DEFAULT_PAGE_SIZE = 1000
33
+
34
+ attr_reader :name, :project, :version
35
+
36
+ # Initialize a dataset reference
37
+ # @param name [String, nil] Dataset name (required if id not provided)
38
+ # @param id [String, nil] Dataset UUID (required if name not provided)
39
+ # @param project [String, nil] Project name (required if using name)
40
+ # @param version [String, nil] Optional version to pin to
41
+ # @param api [API, nil] Braintrust API client (defaults to API.new using global state)
42
+ def initialize(name: nil, id: nil, project: nil, version: nil, api: nil)
43
+ @name = name
44
+ @provided_id = id
45
+ @project = project
46
+ @version = version
47
+ @api = api || API.new
48
+ @resolved_id = nil
49
+ @metadata = nil
50
+
51
+ validate_params!
52
+ end
53
+
54
+ # Get the dataset ID, resolving from name if necessary
55
+ # @return [String] Dataset UUID
56
+ def id
57
+ return @provided_id if @provided_id
58
+ resolve_name! unless @resolved_id
59
+ @resolved_id
60
+ end
61
+
62
+ # Get the dataset metadata from the API
63
+ # Makes an API call if metadata hasn't been fetched yet.
64
+ # Note: When initialized with name, metadata is fetched during name resolution.
65
+ # When initialized with ID, this triggers a separate get_by_id call.
66
+ # @return [Hash] Dataset metadata including name, description, created, etc.
67
+ def metadata
68
+ fetch_metadata! unless @metadata
69
+ @metadata
70
+ end
71
+
72
+ # Fetch all records eagerly into an array
73
+ # @param limit [Integer, nil] Maximum records to return (nil for all)
74
+ # @return [Array<Hash>] Array of records with :input, :expected, :tags, :metadata, :origin
75
+ def fetch_all(limit: nil)
76
+ records = []
77
+ each_record(limit: limit) { |record| records << record }
78
+ records
79
+ end
80
+
81
+ # Iterate over records lazily (implements Enumerable)
82
+ # Fetches pages on demand for memory efficiency with large datasets.
83
+ # @yield [Hash] Each record with :input, :expected, :tags, :metadata, :origin
84
+ def each(&block)
85
+ return enum_for(:each) unless block_given?
86
+ each_record(&block)
87
+ end
88
+
89
+ private
90
+
91
+ def validate_params!
92
+ if @provided_id.nil? && @name.nil?
93
+ raise ArgumentError, "must specify either :name or :id"
94
+ end
95
+
96
+ if @name && @project.nil?
97
+ raise ArgumentError, ":project is required when using :name"
98
+ end
99
+ end
100
+
101
+ # Resolve dataset name to ID (also fetches metadata as side effect)
102
+ def resolve_name!
103
+ @metadata = @api.datasets.get(project_name: @project, name: @name)
104
+ @resolved_id = @metadata["id"]
105
+ end
106
+
107
+ # Fetch metadata explicitly (for when ID was provided directly)
108
+ def fetch_metadata!
109
+ if @provided_id
110
+ @metadata = @api.datasets.get_by_id(id: @provided_id)
111
+ else
112
+ resolve_name! unless @metadata
113
+ end
114
+ end
115
+
116
+ # Core iteration with pagination
117
+ # @param limit [Integer, nil] Maximum records to return
118
+ def each_record(limit: nil, &block)
119
+ dataset_id = id # Resolve once
120
+ cursor = nil
121
+ count = 0
122
+
123
+ loop do
124
+ page_limit = if limit
125
+ [DEFAULT_PAGE_SIZE, limit - count].min
126
+ else
127
+ DEFAULT_PAGE_SIZE
128
+ end
129
+
130
+ result = @api.datasets.fetch(
131
+ id: dataset_id,
132
+ limit: page_limit,
133
+ cursor: cursor,
134
+ version: @version
135
+ )
136
+
137
+ result[:records].each do |raw_record|
138
+ record = build_record(raw_record, dataset_id)
139
+ block.call(record)
140
+ count += 1
141
+ break if limit && count >= limit
142
+ end
143
+
144
+ # Stop if we've hit the limit or no more pages
145
+ break if limit && count >= limit
146
+
147
+ cursor = result[:cursor]
148
+ break unless cursor
149
+ end
150
+ end
151
+
152
+ # Build a normalized record hash from raw API response
153
+ # @param raw [Hash] Raw record from API
154
+ # @param dataset_id [String] Dataset ID for origin
155
+ # @return [Hash] Normalized record with origin
156
+ def build_record(raw, dataset_id)
157
+ record = {}
158
+ record[:input] = raw["input"] if raw.key?("input")
159
+ record[:expected] = raw["expected"] if raw.key?("expected")
160
+ record[:tags] = raw["tags"] if raw.key?("tags")
161
+ record[:metadata] = raw["metadata"] if raw.key?("metadata")
162
+
163
+ origin = build_origin(raw, dataset_id)
164
+ record[:origin] = origin if origin
165
+
166
+ record
167
+ end
168
+
169
+ # Build origin JSON for tracing/linking
170
+ # @param raw [Hash] Raw record from API
171
+ # @param dataset_id [String] Dataset ID (fallback if not in record)
172
+ # @return [String, nil] JSON-serialized origin, or nil if record lacks required fields
173
+ def build_origin(raw, dataset_id)
174
+ return nil unless raw["id"] && raw["_xact_id"]
175
+
176
+ Internal::Origin.to_json(
177
+ object_type: "dataset",
178
+ object_id: raw["dataset_id"] || dataset_id,
179
+ id: raw["id"],
180
+ xact_id: raw["_xact_id"],
181
+ created: raw["created"]
182
+ )
183
+ end
184
+ end
185
+ end
@@ -7,6 +7,8 @@ module Braintrust
7
7
  # @attr expected [Object, nil] The expected output (optional)
8
8
  # @attr tags [Array<String>, nil] Optional tags for filtering/grouping
9
9
  # @attr metadata [Hash, nil] Optional metadata for the case
10
- Case = Struct.new(:input, :expected, :tags, :metadata, keyword_init: true)
10
+ # @attr origin [Hash, nil] Origin pointer for cases from remote sources (e.g., datasets).
11
+ # Contains: object_type, object_id, id, _xact_id, created
12
+ Case = Struct.new(:input, :expected, :tags, :metadata, :origin, keyword_init: true)
11
13
  end
12
14
  end
@@ -18,14 +18,14 @@ module Braintrust
18
18
  MAX_PARALLELISM = Internal::ThreadPool::MAX_PARALLELISM
19
19
 
20
20
  def initialize(experiment_id:, experiment_name:, project_id:, project_name:,
21
- task:, scorers:, state:, tracer_provider: nil)
21
+ task:, scorers:, api:, tracer_provider: nil)
22
22
  @experiment_id = experiment_id
23
23
  @experiment_name = experiment_name
24
24
  @project_id = project_id
25
25
  @project_name = project_name
26
26
  @task = task
27
27
  @scorers = normalize_scorers(scorers)
28
- @state = state
28
+ @api = api
29
29
  @tracer_provider = tracer_provider || OpenTelemetry.tracer_provider
30
30
  @tracer = @tracer_provider.tracer("braintrust-eval")
31
31
  @parent_attr = "experiment_id:#{experiment_id}"
@@ -61,7 +61,7 @@ module Braintrust
61
61
  duration = Time.now - start_time
62
62
 
63
63
  # Generate permalink
64
- permalink = "#{state.app_url}/app/#{state.org_name}/object?object_type=experiment&object_id=#{experiment_id}"
64
+ permalink = @api.object_permalink(object_type: "experiment", object_id: experiment_id)
65
65
 
66
66
  Result.new(
67
67
  experiment_id: experiment_id,
@@ -78,7 +78,7 @@ module Braintrust
78
78
  private
79
79
 
80
80
  attr_reader :experiment_id, :experiment_name, :project_id, :project_name,
81
- :task, :scorers, :state, :tracer, :parent_attr
81
+ :task, :scorers, :tracer, :parent_attr
82
82
 
83
83
  # Run a single test case with OpenTelemetry tracing
84
84
  # Creates eval span (parent) with task and score as children
@@ -116,6 +116,9 @@ module Braintrust
116
116
  set_json_attr(eval_span, "braintrust.input_json", test_case.input)
117
117
  set_json_attr(eval_span, "braintrust.output_json", output)
118
118
  set_json_attr(eval_span, "braintrust.expected", test_case.expected) if test_case.expected
119
+
120
+ # Set origin for cases from remote sources (already JSON-serialized)
121
+ eval_span.set_attribute("braintrust.origin", test_case.origin) if test_case.origin
119
122
  end
120
123
  end
121
124
 
@@ -2,7 +2,9 @@
2
2
 
3
3
  require_relative "eval/scorer"
4
4
  require_relative "eval/runner"
5
- require_relative "internal/experiments"
5
+ require_relative "api/internal/projects"
6
+ require_relative "api/internal/experiments"
7
+ require_relative "dataset"
6
8
 
7
9
  require "opentelemetry/sdk"
8
10
  require "json"
@@ -199,39 +201,45 @@ module Braintrust
199
201
  # @param metadata [Hash] Optional experiment metadata
200
202
  # @param update [Boolean] If true, allow reusing existing experiment (default: false)
201
203
  # @param quiet [Boolean] If true, suppress result output (default: false)
202
- # @param state [State, nil] Braintrust state (defaults to global state)
204
+ # @param api [API, nil] Braintrust API client (defaults to API.new using global state)
203
205
  # @param tracer_provider [TracerProvider, nil] OpenTelemetry tracer provider (defaults to global)
204
206
  # @return [Result]
205
207
  def run(project:, experiment:, task:, scorers:,
206
208
  cases: nil, dataset: nil,
207
209
  parallelism: 1, tags: nil, metadata: nil, update: false, quiet: false,
208
- state: nil, tracer_provider: nil)
210
+ api: nil, tracer_provider: nil)
209
211
  # Validate required parameters
210
212
  validate_params!(project: project, experiment: experiment,
211
213
  cases: cases, dataset: dataset, task: task, scorers: scorers)
212
214
 
213
- # Get state from parameter or global
214
- state ||= Braintrust.current_state
215
- raise Error, "No state available" unless state
215
+ # Get API from parameter or create from global state
216
+ api ||= API.new
216
217
 
217
- # Ensure state is logged in (to populate org_name, etc.)
218
+ # Ensure logged in (to populate org_name, etc.)
218
219
  # login is idempotent and returns early if already logged in
219
- state.login
220
+ api.login
220
221
 
221
222
  # Resolve dataset to cases if dataset parameter provided
222
223
  if dataset
223
- cases = resolve_dataset(dataset, project, state)
224
+ cases = resolve_dataset(dataset, project, api)
224
225
  end
225
226
 
226
- # Register project and experiment via API
227
- result = Internal::Experiments.get_or_create(
228
- experiment, project, state: state,
229
- tags: tags, metadata: metadata, update: update
227
+ # Register project and experiment via internal API
228
+ projects_api = API::Internal::Projects.new(api.state)
229
+ experiments_api = API::Internal::Experiments.new(api.state)
230
+
231
+ project_result = projects_api.create(name: project)
232
+ experiment_result = experiments_api.create(
233
+ name: experiment,
234
+ project_id: project_result["id"],
235
+ ensure_new: !update,
236
+ tags: tags,
237
+ metadata: metadata
230
238
  )
231
239
 
232
- experiment_id = result[:experiment_id]
233
- project_id = result[:project_id]
234
- project_name = result[:project_name]
240
+ experiment_id = experiment_result["id"]
241
+ project_id = project_result["id"]
242
+ project_name = project_result["name"]
235
243
 
236
244
  # Instantiate Runner and run evaluation
237
245
  runner = Runner.new(
@@ -241,7 +249,7 @@ module Braintrust
241
249
  project_name: project_name,
242
250
  task: task,
243
251
  scorers: scorers,
244
- state: state,
252
+ api: api,
245
253
  tracer_provider: tracer_provider
246
254
  )
247
255
  result = runner.run(cases, parallelism: parallelism)
@@ -285,84 +293,29 @@ module Braintrust
285
293
  end
286
294
 
287
295
  # Resolve dataset parameter to an array of case records
288
- # @param dataset [String, Hash] Dataset specifier
289
- # @param project [String] Project name (used as default if not specified in hash)
290
- # @param state [State] Braintrust state
296
+ # @param dataset [String, Hash, Dataset] Dataset specifier or instance
297
+ # @param project [String] Project name (used as default if not specified)
298
+ # @param api [API] Braintrust API client
291
299
  # @return [Array<Hash>] Array of case records
292
- def resolve_dataset(dataset, project, state)
293
- require_relative "api"
300
+ def resolve_dataset(dataset, project, api)
301
+ limit = nil
294
302
 
295
- # Parse dataset parameter
296
- dataset_opts = case dataset
303
+ dataset_obj = case dataset
304
+ when Dataset
305
+ dataset
297
306
  when String
298
- # String: dataset name in same project
299
- {name: dataset, project: project}
307
+ Dataset.new(name: dataset, project: project, api: api)
300
308
  when Hash
301
- # Hash: explicit options
302
- dataset.dup
309
+ opts = dataset.dup
310
+ limit = opts.delete(:limit)
311
+ opts[:project] ||= project
312
+ opts[:api] = api
313
+ Dataset.new(**opts)
303
314
  else
304
- raise ArgumentError, "dataset must be String or Hash, got #{dataset.class}"
315
+ raise ArgumentError, "dataset must be String, Hash, or Dataset, got #{dataset.class}"
305
316
  end
306
317
 
307
- # Apply defaults
308
- dataset_opts[:project] ||= project
309
-
310
- # Create API client
311
- api = API.new(state: state)
312
-
313
- # Resolve dataset ID
314
- dataset_id = if dataset_opts[:id]
315
- # ID provided directly
316
- dataset_opts[:id]
317
- elsif dataset_opts[:name]
318
- # Fetch by name + project
319
- metadata = api.datasets.get(
320
- project_name: dataset_opts[:project],
321
- name: dataset_opts[:name]
322
- )
323
- metadata["id"]
324
- else
325
- raise ArgumentError, "dataset hash must specify either :name or :id"
326
- end
327
-
328
- # Fetch records with pagination
329
- limit_per_page = 1000
330
- max_records = dataset_opts[:limit]
331
- version = dataset_opts[:version]
332
- records = []
333
- cursor = nil
334
-
335
- loop do
336
- result = api.datasets.fetch(
337
- id: dataset_id,
338
- limit: limit_per_page,
339
- cursor: cursor,
340
- version: version
341
- )
342
-
343
- records.concat(result[:records])
344
-
345
- # Check if we've hit the user-specified limit
346
- if max_records && records.length >= max_records
347
- records = records.take(max_records)
348
- break
349
- end
350
-
351
- # Check if there's more data
352
- cursor = result[:cursor]
353
- break unless cursor
354
- end
355
-
356
- # Filter records to only include Case-compatible fields
357
- # Case accepts: input, expected, tags, metadata
358
- records.map do |record|
359
- filtered = {}
360
- filtered[:input] = record["input"] if record.key?("input")
361
- filtered[:expected] = record["expected"] if record.key?("expected")
362
- filtered[:tags] = record["tags"] if record.key?("tags")
363
- filtered[:metadata] = record["metadata"] if record.key?("metadata")
364
- filtered
365
- end
318
+ dataset_obj.fetch_all(limit: limit)
366
319
  end
367
320
  end
368
321
  end
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+
5
+ module Braintrust
6
+ module Internal
7
+ # Origin provides serialization for source object pointers in Braintrust.
8
+ # Used internally to link spans back to their source records (e.g., dataset rows).
9
+ module Origin
10
+ # Serialize an origin pointer to JSON
11
+ # @param object_type [String] Type of source object (e.g., "dataset", "playground_logs")
12
+ # @param object_id [String] ID of the source object
13
+ # @param id [String] ID of the specific record within the source
14
+ # @param xact_id [String] Transaction ID
15
+ # @param created [String, nil] Creation timestamp
16
+ # @return [String] JSON-serialized origin
17
+ def self.to_json(object_type:, object_id:, id:, xact_id:, created:)
18
+ JSON.dump({
19
+ object_type: object_type,
20
+ object_id: object_id,
21
+ id: id,
22
+ _xact_id: xact_id,
23
+ created: created
24
+ })
25
+ end
26
+ end
27
+ end
28
+ end
@@ -139,6 +139,16 @@ module Braintrust
139
139
  end
140
140
  end
141
141
 
142
+ # Generate a permalink URL to view an object in the Braintrust UI
143
+ # This is for the /object endpoint (experiments, datasets, etc.)
144
+ # For trace span permalinks, use Trace.permalink instead.
145
+ # @param object_type [String] Type of object (e.g., "experiment", "dataset")
146
+ # @param object_id [String] Object UUID
147
+ # @return [String] Permalink URL
148
+ def object_permalink(object_type:, object_id:)
149
+ "#{@app_url}/app/#{@org_name}/object?object_type=#{object_type}&object_id=#{object_id}"
150
+ end
151
+
142
152
  # Login to Braintrust API in a background thread with retry logic
143
153
  # Retries indefinitely with exponential backoff until success
144
154
  # Idempotent: returns early if already logged in
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Braintrust
4
- VERSION = "0.1.2"
4
+ VERSION = "0.1.3"
5
5
  end
data/lib/braintrust.rb CHANGED
@@ -6,7 +6,7 @@ require_relative "braintrust/state"
6
6
  require_relative "braintrust/trace"
7
7
  require_relative "braintrust/api"
8
8
  require_relative "braintrust/prompt"
9
- require_relative "braintrust/internal/experiments"
9
+ require_relative "braintrust/dataset"
10
10
  require_relative "braintrust/internal/env"
11
11
  require_relative "braintrust/eval"
12
12
  require_relative "braintrust/contrib"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: braintrust
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Braintrust
@@ -193,6 +193,8 @@ files:
193
193
  - lib/braintrust/api/datasets.rb
194
194
  - lib/braintrust/api/functions.rb
195
195
  - lib/braintrust/api/internal/auth.rb
196
+ - lib/braintrust/api/internal/experiments.rb
197
+ - lib/braintrust/api/internal/projects.rb
196
198
  - lib/braintrust/config.rb
197
199
  - lib/braintrust/contrib.rb
198
200
  - lib/braintrust/contrib/anthropic/deprecated.rb
@@ -228,6 +230,7 @@ files:
228
230
  - lib/braintrust/contrib/setup.rb
229
231
  - lib/braintrust/contrib/support/openai.rb
230
232
  - lib/braintrust/contrib/support/otel.rb
233
+ - lib/braintrust/dataset.rb
231
234
  - lib/braintrust/eval.rb
232
235
  - lib/braintrust/eval/case.rb
233
236
  - lib/braintrust/eval/cases.rb
@@ -239,7 +242,7 @@ files:
239
242
  - lib/braintrust/eval/summary.rb
240
243
  - lib/braintrust/internal/encoding.rb
241
244
  - lib/braintrust/internal/env.rb
242
- - lib/braintrust/internal/experiments.rb
245
+ - lib/braintrust/internal/origin.rb
243
246
  - lib/braintrust/internal/template.rb
244
247
  - lib/braintrust/internal/thread_pool.rb
245
248
  - lib/braintrust/internal/time.rb
@@ -1,129 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "net/http"
4
- require "json"
5
- require "uri"
6
- require_relative "../logger"
7
-
8
- module Braintrust
9
- module Internal
10
- # Experiments module provides internal API methods for registering projects and experiments
11
- # Methods are marked private to prevent direct user access - use through Eval.run
12
- module Experiments
13
- # Public convenience method to register/get both project and experiment
14
- # @param experiment_name [String] The experiment name
15
- # @param project_name [String] The project name
16
- # @param state [State] Braintrust state with API key and URL
17
- # @param tags [Array<String>, nil] Optional experiment tags
18
- # @param metadata [Hash, nil] Optional experiment metadata
19
- # @param update [Boolean] If true, allow reusing existing experiment (default: false)
20
- # @return [Hash] Hash with :experiment_id, :experiment_name, :project_id, :project_name
21
- def self.get_or_create(experiment_name, project_name, state:,
22
- tags: nil, metadata: nil, update: false)
23
- # Register/get project first
24
- project = register_project(project_name, state)
25
-
26
- # Then register/get experiment
27
- experiment = register_experiment(
28
- experiment_name,
29
- project["id"],
30
- state,
31
- tags: tags,
32
- metadata: metadata,
33
- update: update
34
- )
35
-
36
- {
37
- experiment_id: experiment["id"],
38
- experiment_name: experiment["name"],
39
- project_id: project["id"],
40
- project_name: project["name"]
41
- }
42
- end
43
-
44
- # Register or get a project by name
45
- # POST /v1/project with {name: "project-name"}
46
- # Returns existing project if already exists
47
- # @param name [String] Project name
48
- # @param state [State] Braintrust state
49
- # @return [Hash] Project data with "id", "name", "org_id", etc.
50
- # @raise [Braintrust::Error] if API call fails
51
- def self.register_project(name, state)
52
- Log.debug("Registering project: #{name}")
53
-
54
- uri = URI("#{state.api_url}/v1/project")
55
- request = Net::HTTP::Post.new(uri)
56
- request["Content-Type"] = "application/json"
57
- request["Authorization"] = "Bearer #{state.api_key}"
58
- request.body = JSON.dump({name: name})
59
-
60
- http = Net::HTTP.new(uri.hostname, uri.port)
61
- http.use_ssl = true if uri.scheme == "https"
62
-
63
- response = http.start do |http_session|
64
- http_session.request(request)
65
- end
66
-
67
- Log.debug("Register project response: [#{response.code}]")
68
-
69
- # Handle response codes
70
- unless response.is_a?(Net::HTTPSuccess)
71
- raise Error, "Failed to register project '#{name}': [#{response.code}] #{response.body}"
72
- end
73
-
74
- project = JSON.parse(response.body)
75
- Log.debug("Project registered: #{project["id"]} (#{project["name"]})")
76
- project
77
- end
78
- private_class_method :register_project
79
-
80
- # Register or get an experiment by name
81
- # POST /v1/experiment with {project_id:, name:, ensure_new:, tags:[], metadata:{}}
82
- # @param name [String] Experiment name
83
- # @param project_id [String] Project ID
84
- # @param state [State] Braintrust state
85
- # @param tags [Array<String>, nil] Optional tags
86
- # @param metadata [Hash, nil] Optional metadata
87
- # @param update [Boolean] If true, allow reusing existing experiment (ensure_new: false)
88
- # @return [Hash] Experiment data with "id", "name", "project_id", etc.
89
- # @raise [Braintrust::Error] if API call fails
90
- def self.register_experiment(name, project_id, state, tags: nil, metadata: nil, update: false)
91
- Log.debug("Registering experiment: #{name} (project: #{project_id}, update: #{update})")
92
-
93
- uri = URI("#{state.api_url}/v1/experiment")
94
- request = Net::HTTP::Post.new(uri)
95
- request["Content-Type"] = "application/json"
96
- request["Authorization"] = "Bearer #{state.api_key}"
97
-
98
- payload = {
99
- project_id: project_id,
100
- name: name,
101
- ensure_new: !update # When update=true, allow reusing existing experiment
102
- }
103
- payload[:tags] = tags if tags
104
- payload[:metadata] = metadata if metadata
105
-
106
- request.body = JSON.dump(payload)
107
-
108
- http = Net::HTTP.new(uri.hostname, uri.port)
109
- http.use_ssl = true if uri.scheme == "https"
110
-
111
- response = http.start do |http_session|
112
- http_session.request(request)
113
- end
114
-
115
- Log.debug("Register experiment response: [#{response.code}]")
116
-
117
- # Handle response codes
118
- unless response.is_a?(Net::HTTPSuccess)
119
- raise Error, "Failed to register experiment '#{name}': [#{response.code}] #{response.body}"
120
- end
121
-
122
- experiment = JSON.parse(response.body)
123
- Log.debug("Experiment registered: #{experiment["id"]} (#{experiment["name"]})")
124
- experiment
125
- end
126
- private_class_method :register_experiment
127
- end
128
- end
129
- end