braintrust 0.0.1.alpha.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,418 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "eval/case"
4
+ require_relative "eval/cases"
5
+ require_relative "eval/scorer"
6
+ require_relative "eval/result"
7
+ require_relative "internal/experiments"
8
+ require "opentelemetry/sdk"
9
+ require "json"
10
+
11
+ module Braintrust
12
+ module Eval
13
+ class << self
14
+ # Create a scorer with a name and callable
15
+ # @param name [String] The scorer name
16
+ # @param callable [#call, nil] Optional callable (if not using block)
17
+ # @param block [Proc] The scorer block
18
+ # @return [Scorer]
19
+ def scorer(name, callable = nil, &block)
20
+ Scorer.new(name, callable, &block)
21
+ end
22
+
23
+ # Run an evaluation
24
+ # @param project [String] The project name
25
+ # @param experiment [String] The experiment name
26
+ # @param cases [Array, Enumerable, nil] The test cases (mutually exclusive with dataset)
27
+ # @param dataset [String, Hash, nil] Dataset to fetch (mutually exclusive with cases)
28
+ # - String: dataset name (fetches from same project)
29
+ # - Hash: {name:, id:, project:, version:, limit:}
30
+ # @param task [#call] The task to evaluate (must be callable)
31
+ # @param scorers [Array<Scorer, #call>] The scorers to use (Scorer objects or callables)
32
+ # @param parallelism [Integer] Number of parallel workers (default: 1)
33
+ # @param tags [Array<String>] Optional experiment tags
34
+ # @param metadata [Hash] Optional experiment metadata
35
+ # @param update [Boolean] If true, allow reusing existing experiment (default: false)
36
+ # @param quiet [Boolean] If true, suppress result output (default: false)
37
+ # @param state [State, nil] Braintrust state (defaults to global state)
38
+ # @param tracer_provider [TracerProvider, nil] OpenTelemetry tracer provider (defaults to global)
39
+ # @return [Result]
40
+ def run(project:, experiment:, task:, scorers:,
41
+ cases: nil, dataset: nil,
42
+ parallelism: 1, tags: nil, metadata: nil, update: false, quiet: false,
43
+ state: nil, tracer_provider: nil)
44
+ # Validate required parameters
45
+ validate_params!(project: project, experiment: experiment,
46
+ cases: cases, dataset: dataset, task: task, scorers: scorers)
47
+
48
+ # Get state from parameter or global
49
+ state ||= Braintrust.current_state
50
+ raise Error, "No state available" unless state
51
+
52
+ # Ensure state is logged in (to populate org_name, etc.)
53
+ # login is idempotent and returns early if already logged in
54
+ state.login
55
+
56
+ # Resolve dataset to cases if dataset parameter provided
57
+ if dataset
58
+ cases = resolve_dataset(dataset, project, state)
59
+ end
60
+
61
+ # Register project and experiment via API
62
+ result = Internal::Experiments.get_or_create(
63
+ experiment, project, state: state,
64
+ tags: tags, metadata: metadata, update: update
65
+ )
66
+
67
+ experiment_id = result[:experiment_id]
68
+ project_id = result[:project_id]
69
+ project_name = result[:project_name]
70
+
71
+ # Run the eval with resolved experiment info
72
+ result = run_internal(
73
+ experiment_id: experiment_id,
74
+ experiment_name: experiment,
75
+ project_id: project_id,
76
+ project_name: project_name,
77
+ cases: cases,
78
+ task: task,
79
+ scorers: scorers,
80
+ state: state,
81
+ tracer_provider: tracer_provider
82
+ )
83
+
84
+ # Print result summary unless quiet
85
+ print_result(result) unless quiet
86
+
87
+ result
88
+ end
89
+
90
+ private
91
+
92
+ # Internal eval runner that doesn't touch the API
93
+ # @param experiment_id [String] Resolved experiment ID
94
+ # @param experiment_name [String] Experiment name
95
+ # @param project_id [String] Resolved project ID
96
+ # @param project_name [String] Project name
97
+ # @param cases [Array, Enumerable, Cases] Test cases
98
+ # @param task [#call] Task callable
99
+ # @param scorers [Array] Scorers
100
+ # @param state [State] Braintrust state
101
+ # @param tracer_provider [TracerProvider, nil] OpenTelemetry tracer provider
102
+ # @return [Result]
103
+ def run_internal(experiment_id:, experiment_name:, project_id:, project_name:,
104
+ cases:, task:, scorers:, state:, tracer_provider: nil)
105
+ start_time = Time.now
106
+
107
+ # Get tracer for creating spans
108
+ tracer_provider ||= OpenTelemetry.tracer_provider
109
+ tracer = tracer_provider.tracer("braintrust-eval")
110
+
111
+ # Parent attribute for all eval spans
112
+ parent_attr = "experiment_id:#{experiment_id}"
113
+
114
+ # Normalize cases to Cases wrapper
115
+ normalized_cases = normalize_cases(cases)
116
+
117
+ # Normalize scorers to Scorer objects
118
+ normalized_scorers = normalize_scorers(scorers)
119
+
120
+ # Collect errors
121
+ errors = []
122
+
123
+ # Run each case with tracing
124
+ normalized_cases.each do |test_case|
125
+ run_case(test_case, task, normalized_scorers, errors,
126
+ tracer, parent_attr)
127
+ end
128
+
129
+ # Calculate duration
130
+ duration = Time.now - start_time
131
+
132
+ # Generate permalink: {app_url}/app/{org}/object?object_type=experiment&object_id={experiment_id}
133
+ permalink = "#{state.app_url}/app/#{state.org_name}/object?object_type=experiment&object_id=#{experiment_id}"
134
+
135
+ # Return result
136
+ Result.new(
137
+ experiment_id: experiment_id,
138
+ experiment_name: experiment_name,
139
+ project_id: project_id,
140
+ permalink: permalink,
141
+ errors: errors,
142
+ duration: duration
143
+ )
144
+ end
145
+
146
+ # Print result summary to stdout
147
+ # @param result [Result] The evaluation result
148
+ def print_result(result)
149
+ puts result
150
+ end
151
+
152
+ # Validate required parameters
153
+ # @raise [ArgumentError] if validation fails
154
+ def validate_params!(project:, experiment:, cases:, dataset:, task:, scorers:)
155
+ raise ArgumentError, "project is required" unless project
156
+ raise ArgumentError, "experiment is required" unless experiment
157
+ raise ArgumentError, "task is required" unless task
158
+ raise ArgumentError, "scorers is required" unless scorers
159
+
160
+ # Validate cases and dataset are mutually exclusive
161
+ if cases && dataset
162
+ raise ArgumentError, "cannot specify both 'cases' and 'dataset' - they are mutually exclusive"
163
+ end
164
+
165
+ # Validate at least one data source is provided
166
+ unless cases || dataset
167
+ raise ArgumentError, "must specify either 'cases' or 'dataset'"
168
+ end
169
+
170
+ # Validate task is callable
171
+ unless task.respond_to?(:call)
172
+ raise ArgumentError, "task must be callable (respond to :call)"
173
+ end
174
+ end
175
+
176
+ # Resolve dataset parameter to an array of case records
177
+ # @param dataset [String, Hash] Dataset specifier
178
+ # @param project [String] Project name (used as default if not specified in hash)
179
+ # @param state [State] Braintrust state
180
+ # @return [Array<Hash>] Array of case records
181
+ def resolve_dataset(dataset, project, state)
182
+ require_relative "api"
183
+
184
+ # Parse dataset parameter
185
+ dataset_opts = case dataset
186
+ when String
187
+ # String: dataset name in same project
188
+ {name: dataset, project: project}
189
+ when Hash
190
+ # Hash: explicit options
191
+ dataset.dup
192
+ else
193
+ raise ArgumentError, "dataset must be String or Hash, got #{dataset.class}"
194
+ end
195
+
196
+ # Apply defaults
197
+ dataset_opts[:project] ||= project
198
+
199
+ # Create API client
200
+ api = API.new(state: state)
201
+
202
+ # Resolve dataset ID
203
+ dataset_id = if dataset_opts[:id]
204
+ # ID provided directly
205
+ dataset_opts[:id]
206
+ elsif dataset_opts[:name]
207
+ # Fetch by name + project
208
+ metadata = api.datasets.get(
209
+ project_name: dataset_opts[:project],
210
+ name: dataset_opts[:name]
211
+ )
212
+ metadata["id"]
213
+ else
214
+ raise ArgumentError, "dataset hash must specify either :name or :id"
215
+ end
216
+
217
+ # Fetch records with pagination
218
+ limit_per_page = 1000
219
+ max_records = dataset_opts[:limit]
220
+ version = dataset_opts[:version]
221
+ records = []
222
+ cursor = nil
223
+
224
+ loop do
225
+ result = api.datasets.fetch(
226
+ id: dataset_id,
227
+ limit: limit_per_page,
228
+ cursor: cursor,
229
+ version: version
230
+ )
231
+
232
+ records.concat(result[:records])
233
+
234
+ # Check if we've hit the user-specified limit
235
+ if max_records && records.length >= max_records
236
+ records = records.take(max_records)
237
+ break
238
+ end
239
+
240
+ # Check if there's more data
241
+ cursor = result[:cursor]
242
+ break unless cursor
243
+ end
244
+
245
+ # Filter records to only include Case-compatible fields
246
+ # Case accepts: input, expected, tags, metadata
247
+ records.map do |record|
248
+ filtered = {}
249
+ filtered[:input] = record["input"] if record.key?("input")
250
+ filtered[:expected] = record["expected"] if record.key?("expected")
251
+ filtered[:tags] = record["tags"] if record.key?("tags")
252
+ filtered[:metadata] = record["metadata"] if record.key?("metadata")
253
+ filtered
254
+ end
255
+ end
256
+
257
+ # Normalize cases input to Cases wrapper
258
+ # @param cases_input [Array, Enumerable, Cases] The cases input
259
+ # @return [Cases]
260
+ def normalize_cases(cases_input)
261
+ case cases_input
262
+ when Cases
263
+ cases_input
264
+ when Array, Enumerable
265
+ Cases.new(cases_input)
266
+ else
267
+ if cases_input.respond_to?(:each)
268
+ Cases.new(cases_input)
269
+ else
270
+ raise ArgumentError, "cases must be Array or Enumerable"
271
+ end
272
+ end
273
+ end
274
+
275
+ # Normalize scorers to Scorer objects
276
+ # @param scorers_input [Array] The scorers input (Scorer objects or callables)
277
+ # @return [Array<Scorer>]
278
+ def normalize_scorers(scorers_input)
279
+ scorers_input.map do |scorer|
280
+ case scorer
281
+ when Scorer
282
+ # Already a Scorer
283
+ scorer
284
+ else
285
+ # Wrap callable in Scorer (auto-detects name)
286
+ Scorer.new(scorer)
287
+ end
288
+ end
289
+ end
290
+
291
+ # Run a single test case with OpenTelemetry tracing
292
+ # Creates eval span (parent) with task and score as children
293
+ # @param test_case [Case] The test case
294
+ # @param task [#call] The task
295
+ # @param scorers [Array<Scorer>] The scorers
296
+ # @param errors [Array<String>] Error collection array
297
+ # @param tracer [Tracer] OpenTelemetry tracer
298
+ # @param parent_attr [String] Parent attribute (experiment_id:project/exp_id)
299
+ def run_case(test_case, task, scorers, errors, tracer, parent_attr)
300
+ # Create eval span (parent)
301
+ tracer.in_span("eval") do |eval_span|
302
+ eval_span.set_attribute("braintrust.parent", parent_attr)
303
+
304
+ # Set tags early so they're present even if task fails
305
+ eval_span.set_attribute("braintrust.tags", test_case.tags) if test_case.tags
306
+
307
+ # Run task
308
+ output = nil
309
+ begin
310
+ output = run_task(test_case, task, tracer, parent_attr)
311
+ rescue => e
312
+ # Error already recorded on task span, set eval span status
313
+ eval_span.status = OpenTelemetry::Trace::Status.error(e.message)
314
+ errors << "Task failed for input '#{test_case.input}': #{e.message}"
315
+ next
316
+ end
317
+
318
+ # Run scorers
319
+ begin
320
+ run_scorers(test_case, output, scorers, tracer, parent_attr)
321
+ rescue => e
322
+ # Error already recorded on score span, set eval span status
323
+ eval_span.status = OpenTelemetry::Trace::Status.error(e.message)
324
+ errors << "Scorers failed for input '#{test_case.input}': #{e.message}"
325
+ end
326
+
327
+ # Set eval span attributes (after task and scorers complete)
328
+ set_json_attr(eval_span, "braintrust.span_attributes", {type: "eval"})
329
+ set_json_attr(eval_span, "braintrust.input_json", test_case.input)
330
+ set_json_attr(eval_span, "braintrust.output_json", output)
331
+ set_json_attr(eval_span, "braintrust.expected", test_case.expected) if test_case.expected
332
+ end
333
+ end
334
+
335
+ # Run task with OpenTelemetry tracing
336
+ # Creates task span with input and output
337
+ # @param test_case [Case] The test case
338
+ # @param task [#call] The task
339
+ # @param tracer [Tracer] OpenTelemetry tracer
340
+ # @param parent_attr [String] Parent attribute
341
+ # @return [Object] Task output
342
+ def run_task(test_case, task, tracer, parent_attr)
343
+ tracer.in_span("task") do |task_span|
344
+ task_span.set_attribute("braintrust.parent", parent_attr)
345
+ set_json_attr(task_span, "braintrust.span_attributes", {type: "task"})
346
+ set_json_attr(task_span, "braintrust.input_json", test_case.input)
347
+
348
+ begin
349
+ output = task.call(test_case.input)
350
+ set_json_attr(task_span, "braintrust.output_json", output)
351
+ output
352
+ rescue => e
353
+ # Record exception event with stacktrace, then set error status
354
+ task_span.record_exception(e)
355
+ task_span.status = OpenTelemetry::Trace::Status.error(e.message)
356
+ raise
357
+ end
358
+ end
359
+ end
360
+
361
+ # Run scorers with OpenTelemetry tracing
362
+ # Creates single score span for all scorers
363
+ # @param test_case [Case] The test case
364
+ # @param output [Object] Task output
365
+ # @param scorers [Array<Scorer>] The scorers
366
+ # @param tracer [Tracer] OpenTelemetry tracer
367
+ # @param parent_attr [String] Parent attribute
368
+ def run_scorers(test_case, output, scorers, tracer, parent_attr)
369
+ tracer.in_span("score") do |score_span|
370
+ score_span.set_attribute("braintrust.parent", parent_attr)
371
+ set_json_attr(score_span, "braintrust.span_attributes", {type: "score"})
372
+
373
+ scores = {}
374
+ scorer_error = nil
375
+ scorers.each do |scorer|
376
+ score_value = scorer.call(test_case.input, test_case.expected, output, test_case.metadata || {})
377
+ scores[scorer.name] = score_value
378
+ rescue => e
379
+ # Record first error but continue processing other scorers
380
+ scorer_error ||= "Scorer '#{scorer.name}' failed: #{e.message}"
381
+ record_span_error(score_span, e, "ScorerError")
382
+ end
383
+
384
+ # Always set scores attribute, even if some scorers failed
385
+ set_json_attr(score_span, "braintrust.scores", scores)
386
+
387
+ # Raise after setting scores so we can see which scorers succeeded
388
+ raise scorer_error if scorer_error
389
+ end
390
+ end
391
+
392
+ # Record error on span with exception event and error status
393
+ # @param span [OpenTelemetry::Trace::Span] The span to record error on
394
+ # @param error [Exception] The error that occurred
395
+ # @param error_type [String] The error type name (optional, used for custom error classification)
396
+ def record_span_error(span, error, error_type = nil)
397
+ # Record exception with stacktrace (OpenTelemetry standard)
398
+ if error_type
399
+ # For custom error types, add type override
400
+ span.record_exception(error, attributes: {"exception.type" => error_type})
401
+ else
402
+ span.record_exception(error)
403
+ end
404
+
405
+ # Set span status to error
406
+ span.status = OpenTelemetry::Trace::Status.error(error.message)
407
+ end
408
+
409
+ # Set a span attribute by JSON encoding the value
410
+ # @param span [OpenTelemetry::Trace::Span] The span
411
+ # @param key [String] The attribute key
412
+ # @param value [Object] The value to JSON encode
413
+ def set_json_attr(span, key, value)
414
+ span.set_attribute(key, JSON.dump(value))
415
+ end
416
+ end
417
+ end
418
+ end
@@ -0,0 +1,129 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "net/http"
4
+ require "json"
5
+ require "uri"
6
+ require_relative "../logger"
7
+
8
+ module Braintrust
9
+ module Internal
10
+ # Experiments module provides internal API methods for registering projects and experiments
11
+ # Methods are marked private to prevent direct user access - use through Eval.run
12
+ module Experiments
13
+ # Public convenience method to register/get both project and experiment
14
+ # @param experiment_name [String] The experiment name
15
+ # @param project_name [String] The project name
16
+ # @param state [State] Braintrust state with API key and URL
17
+ # @param tags [Array<String>, nil] Optional experiment tags
18
+ # @param metadata [Hash, nil] Optional experiment metadata
19
+ # @param update [Boolean] If true, allow reusing existing experiment (default: false)
20
+ # @return [Hash] Hash with :experiment_id, :experiment_name, :project_id, :project_name
21
+ def self.get_or_create(experiment_name, project_name, state:,
22
+ tags: nil, metadata: nil, update: false)
23
+ # Register/get project first
24
+ project = register_project(project_name, state)
25
+
26
+ # Then register/get experiment
27
+ experiment = register_experiment(
28
+ experiment_name,
29
+ project["id"],
30
+ state,
31
+ tags: tags,
32
+ metadata: metadata,
33
+ update: update
34
+ )
35
+
36
+ {
37
+ experiment_id: experiment["id"],
38
+ experiment_name: experiment["name"],
39
+ project_id: project["id"],
40
+ project_name: project["name"]
41
+ }
42
+ end
43
+
44
+ # Register or get a project by name
45
+ # POST /v1/project with {name: "project-name"}
46
+ # Returns existing project if already exists
47
+ # @param name [String] Project name
48
+ # @param state [State] Braintrust state
49
+ # @return [Hash] Project data with "id", "name", "org_id", etc.
50
+ # @raise [Braintrust::Error] if API call fails
51
+ def self.register_project(name, state)
52
+ Log.debug("Registering project: #{name}")
53
+
54
+ uri = URI("#{state.api_url}/v1/project")
55
+ request = Net::HTTP::Post.new(uri)
56
+ request["Content-Type"] = "application/json"
57
+ request["Authorization"] = "Bearer #{state.api_key}"
58
+ request.body = JSON.dump({name: name})
59
+
60
+ http = Net::HTTP.new(uri.hostname, uri.port)
61
+ http.use_ssl = true if uri.scheme == "https"
62
+
63
+ response = http.start do |http_session|
64
+ http_session.request(request)
65
+ end
66
+
67
+ Log.debug("Register project response: [#{response.code}]")
68
+
69
+ # Handle response codes
70
+ unless response.is_a?(Net::HTTPSuccess)
71
+ raise Error, "Failed to register project '#{name}': [#{response.code}] #{response.body}"
72
+ end
73
+
74
+ project = JSON.parse(response.body)
75
+ Log.debug("Project registered: #{project["id"]} (#{project["name"]})")
76
+ project
77
+ end
78
+ private_class_method :register_project
79
+
80
+ # Register or get an experiment by name
81
+ # POST /v1/experiment with {project_id:, name:, ensure_new:, tags:[], metadata:{}}
82
+ # @param name [String] Experiment name
83
+ # @param project_id [String] Project ID
84
+ # @param state [State] Braintrust state
85
+ # @param tags [Array<String>, nil] Optional tags
86
+ # @param metadata [Hash, nil] Optional metadata
87
+ # @param update [Boolean] If true, allow reusing existing experiment (ensure_new: false)
88
+ # @return [Hash] Experiment data with "id", "name", "project_id", etc.
89
+ # @raise [Braintrust::Error] if API call fails
90
+ def self.register_experiment(name, project_id, state, tags: nil, metadata: nil, update: false)
91
+ Log.debug("Registering experiment: #{name} (project: #{project_id}, update: #{update})")
92
+
93
+ uri = URI("#{state.api_url}/v1/experiment")
94
+ request = Net::HTTP::Post.new(uri)
95
+ request["Content-Type"] = "application/json"
96
+ request["Authorization"] = "Bearer #{state.api_key}"
97
+
98
+ payload = {
99
+ project_id: project_id,
100
+ name: name,
101
+ ensure_new: !update # When update=true, allow reusing existing experiment
102
+ }
103
+ payload[:tags] = tags if tags
104
+ payload[:metadata] = metadata if metadata
105
+
106
+ request.body = JSON.dump(payload)
107
+
108
+ http = Net::HTTP.new(uri.hostname, uri.port)
109
+ http.use_ssl = true if uri.scheme == "https"
110
+
111
+ response = http.start do |http_session|
112
+ http_session.request(request)
113
+ end
114
+
115
+ Log.debug("Register experiment response: [#{response.code}]")
116
+
117
+ # Handle response codes
118
+ unless response.is_a?(Net::HTTPSuccess)
119
+ raise Error, "Failed to register experiment '#{name}': [#{response.code}] #{response.body}"
120
+ end
121
+
122
+ experiment = JSON.parse(response.body)
123
+ Log.debug("Experiment registered: #{experiment["id"]} (#{experiment["name"]})")
124
+ experiment
125
+ end
126
+ private_class_method :register_experiment
127
+ end
128
+ end
129
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "logger"
4
+
5
+ module Braintrust
6
+ # Simple logger for Braintrust SDK
7
+ module Log
8
+ # Default to WARN unless BRAINTRUST_DEBUG is set
9
+ level = ENV["BRAINTRUST_DEBUG"] ? Logger::DEBUG : Logger::WARN
10
+ @logger = Logger.new($stderr, level: level)
11
+
12
+ class << self
13
+ attr_accessor :logger
14
+
15
+ def debug(message)
16
+ @logger.debug(message)
17
+ end
18
+
19
+ def info(message)
20
+ @logger.info(message)
21
+ end
22
+
23
+ def warn(message)
24
+ @logger.warn(message)
25
+ end
26
+
27
+ def error(message)
28
+ @logger.error(message)
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,121 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "api/internal/auth"
4
+
5
+ module Braintrust
6
+ # State object that holds Braintrust configuration
7
+ # Thread-safe global state management
8
+ class State
9
+ attr_reader :api_key, :org_name, :org_id, :default_parent, :app_url, :api_url, :proxy_url, :logged_in
10
+
11
+ @mutex = Mutex.new
12
+ @global_state = nil
13
+
14
+ def initialize(api_key: nil, org_name: nil, org_id: nil, default_parent: nil, app_url: nil, api_url: nil, proxy_url: nil, logged_in: false)
15
+ # Instance-level mutex for thread-safe login
16
+ @login_mutex = Mutex.new
17
+ raise ArgumentError, "api_key is required" if api_key.nil? || api_key.empty?
18
+
19
+ @api_key = api_key
20
+ @org_name = org_name
21
+ @org_id = org_id
22
+ @default_parent = default_parent
23
+ @app_url = app_url || "https://www.braintrust.dev"
24
+ @api_url = api_url
25
+ @proxy_url = proxy_url
26
+ @logged_in = logged_in
27
+ end
28
+
29
+ # Thread-safe global state getter
30
+ def self.global
31
+ @mutex.synchronize { @global_state }
32
+ end
33
+
34
+ # Thread-safe global state setter
35
+ def self.global=(state)
36
+ @mutex.synchronize { @global_state = state }
37
+ end
38
+
39
+ # Login to Braintrust API and update state with org info
40
+ # Makes synchronous HTTP request via API::Auth
41
+ # Updates @org_id, @org_name, @api_url, @proxy_url, @logged_in
42
+ # Idempotent: returns early if already logged in
43
+ # Thread-safe: protected by mutex
44
+ # @return [self]
45
+ def login
46
+ @login_mutex.synchronize do
47
+ # Return early if already logged in
48
+ return self if @logged_in
49
+
50
+ result = API::Internal::Auth.login(
51
+ api_key: @api_key,
52
+ app_url: @app_url,
53
+ org_name: @org_name
54
+ )
55
+
56
+ # Update state with org info
57
+ @org_id = result.org_id
58
+ @org_name = result.org_name
59
+ @api_url = result.api_url
60
+ @proxy_url = result.proxy_url
61
+ @logged_in = true
62
+
63
+ self
64
+ end
65
+ end
66
+
67
+ # Login to Braintrust API in a background thread with retry logic
68
+ # Retries indefinitely with exponential backoff until success
69
+ # Idempotent: returns early if already logged in
70
+ # Thread-safe: login method is protected by mutex
71
+ # @return [self]
72
+ def login_in_thread
73
+ # Return early if already logged in (without spawning thread)
74
+ return self if @logged_in
75
+
76
+ @login_thread = Thread.new do
77
+ retry_count = 0
78
+ max_delay = 5.0
79
+
80
+ loop do
81
+ Log.debug("Background login attempt #{retry_count + 1}")
82
+ login
83
+ Log.debug("Background login succeeded")
84
+ break
85
+ rescue => e
86
+ retry_count += 1
87
+ delay = [0.001 * 2**(retry_count - 1), max_delay].min
88
+ Log.debug("Background login failed (attempt #{retry_count}): #{e.message}. Retrying in #{delay}s...")
89
+ sleep delay
90
+ end
91
+ end
92
+
93
+ self
94
+ end
95
+
96
+ # Wait for background login thread to complete (for testing)
97
+ # @param timeout [Numeric, nil] Optional timeout in seconds
98
+ # @return [self]
99
+ def wait_for_login(timeout = nil)
100
+ @login_thread&.join(timeout)
101
+ self
102
+ end
103
+
104
+ # Validate state is properly configured
105
+ # Raises ArgumentError if state is invalid
106
+ # @return [self]
107
+ def validate
108
+ raise ArgumentError, "api_key is required" if @api_key.nil? || @api_key.empty?
109
+ raise ArgumentError, "api_url is required" if @api_url.nil? || @api_url.empty?
110
+ raise ArgumentError, "app_url is required" if @app_url.nil? || @app_url.empty?
111
+
112
+ # If logged_in is true, org_id and org_name should be present
113
+ if @logged_in
114
+ raise ArgumentError, "org_id is required when logged_in is true" if @org_id.nil? || @org_id.empty?
115
+ raise ArgumentError, "org_name is required when logged_in is true" if @org_name.nil? || @org_name.empty?
116
+ end
117
+
118
+ self
119
+ end
120
+ end
121
+ end