braintrust 0.0.1.alpha.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,137 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../api"
4
+ require_relative "scorer"
5
+ require "opentelemetry/sdk"
6
+ require "json"
7
+
8
+ module Braintrust
9
+ module Eval
10
+ # Functions provides remote function execution capabilities
11
+ # Allows calling prompts hosted on Braintrust servers as tasks or scorers
12
+ module Functions
13
+ class << self
14
+ # Create a task callable that invokes a remote function
15
+ # @param project [String] Project name
16
+ # @param slug [String] Function slug
17
+ # @param state [State, nil] Braintrust state (defaults to global)
18
+ # @param tracer_provider [TracerProvider, nil] OpenTelemetry tracer provider
19
+ # @return [Proc] Callable that accepts input and returns output
20
+ def task(project:, slug:, state: nil, tracer_provider: nil)
21
+ state ||= Braintrust.current_state
22
+ raise Error, "No state available" unless state
23
+
24
+ # Resolve function ID from project + slug
25
+ api = API.new(state: state)
26
+ function_metadata = resolve_function(api, project, slug)
27
+ function_id = function_metadata["id"]
28
+ function_name = function_metadata["name"] || slug
29
+
30
+ # Get tracer for creating spans
31
+ tracer_provider ||= OpenTelemetry.tracer_provider
32
+ tracer = tracer_provider.tracer("braintrust.functions")
33
+
34
+ # Return a lambda that invokes the remote function with tracing
35
+ lambda do |input|
36
+ # Create a span for the function invocation
37
+ tracer.in_span("function: #{slug}") do |span|
38
+ span.set_attribute("braintrust.span_attributes", JSON.dump({type: "function"}))
39
+ span.set_attribute("braintrust.input_json", JSON.dump(input))
40
+ span.set_attribute("braintrust.function.name", function_name)
41
+ span.set_attribute("braintrust.function.id", function_id)
42
+ span.set_attribute("braintrust.function.slug", slug)
43
+
44
+ begin
45
+ # Invoke the function via API
46
+ output = api.functions.invoke(id: function_id, input: input)
47
+ span.set_attribute("braintrust.output_json", JSON.dump(output))
48
+ output
49
+ rescue => e
50
+ # Record exception and set error status
51
+ span.record_exception(e)
52
+ span.status = OpenTelemetry::Trace::Status.error(e.message)
53
+ raise
54
+ end
55
+ end
56
+ end
57
+ end
58
+
59
+ # Create a scorer that invokes a remote function
60
+ # @param project [String] Project name
61
+ # @param slug [String] Function slug
62
+ # @param state [State, nil] Braintrust state (defaults to global)
63
+ # @param tracer_provider [TracerProvider, nil] OpenTelemetry tracer provider
64
+ # @return [Scorer] Scorer object that invokes remote function
65
+ def scorer(project:, slug:, state: nil, tracer_provider: nil)
66
+ state ||= Braintrust.current_state
67
+ raise Error, "No state available" unless state
68
+
69
+ # Resolve function ID from project + slug
70
+ api = API.new(state: state)
71
+ function_metadata = resolve_function(api, project, slug)
72
+ function_id = function_metadata["id"]
73
+ function_name = function_metadata["name"] || slug
74
+
75
+ # Get tracer for creating spans
76
+ tracer_provider ||= OpenTelemetry.tracer_provider
77
+ tracer = tracer_provider.tracer("braintrust.functions")
78
+
79
+ # Create a scorer that invokes the remote function
80
+ Scorer.new(slug) do |input, expected, output, metadata|
81
+ # Create a span for the function invocation
82
+ tracer.in_span("function: #{slug}") do |span|
83
+ scorer_input = {
84
+ input: input,
85
+ expected: expected,
86
+ output: output,
87
+ metadata: metadata
88
+ }
89
+
90
+ span.set_attribute("braintrust.span_attributes", JSON.dump({type: "function"}))
91
+ span.set_attribute("braintrust.input_json", JSON.dump(scorer_input))
92
+ span.set_attribute("braintrust.function.name", function_name)
93
+ span.set_attribute("braintrust.function.id", function_id)
94
+ span.set_attribute("braintrust.function.slug", slug)
95
+
96
+ begin
97
+ # Invoke the function via API
98
+ # The remote scorer receives all scorer arguments
99
+ result = api.functions.invoke(id: function_id, input: scorer_input)
100
+
101
+ # Parse result as float score
102
+ # The remote function should return a number
103
+ score = result.is_a?(Numeric) ? result.to_f : result.to_s.to_f
104
+
105
+ span.set_attribute("braintrust.output_json", JSON.dump(score))
106
+ score
107
+ rescue => e
108
+ # Record exception and set error status
109
+ span.record_exception(e)
110
+ span.status = OpenTelemetry::Trace::Status.error(e.message)
111
+ raise
112
+ end
113
+ end
114
+ end
115
+ end
116
+
117
+ private
118
+
119
+ # Resolve function ID from project name and slug
120
+ # @param api [API] API client
121
+ # @param project [String] Project name
122
+ # @param slug [String] Function slug
123
+ # @return [Hash] Function metadata
124
+ def resolve_function(api, project, slug)
125
+ result = api.functions.list(project_name: project, slug: slug)
126
+ functions = result["objects"]
127
+
128
+ if functions.nil? || functions.empty?
129
+ raise Error, "Function '#{slug}' not found in project '#{project}'"
130
+ end
131
+
132
+ functions.first
133
+ end
134
+ end
135
+ end
136
+ end
137
+ end
@@ -0,0 +1,53 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Braintrust
4
+ module Eval
5
+ # Result represents the outcome of an evaluation run
6
+ # Contains experiment metadata, errors, and timing information
7
+ class Result
8
+ attr_reader :experiment_id, :experiment_name, :project_id,
9
+ :permalink, :errors, :duration
10
+
11
+ # Create a new result
12
+ # @param experiment_id [String] The experiment ID
13
+ # @param experiment_name [String] The experiment name
14
+ # @param project_id [String] The project ID
15
+ # @param permalink [String] Link to view the experiment in Braintrust UI
16
+ # @param errors [Array<String>] List of errors that occurred
17
+ # @param duration [Float] Duration in seconds
18
+ def initialize(experiment_id:, experiment_name:, project_id:,
19
+ permalink:, errors:, duration:)
20
+ @experiment_id = experiment_id
21
+ @experiment_name = experiment_name
22
+ @project_id = project_id
23
+ @permalink = permalink
24
+ @errors = errors
25
+ @duration = duration
26
+ end
27
+
28
+ # Check if the evaluation was successful (no errors)
29
+ # @return [Boolean]
30
+ def success?
31
+ errors.empty?
32
+ end
33
+
34
+ # Check if the evaluation failed (has errors)
35
+ # @return [Boolean]
36
+ def failed?
37
+ !success?
38
+ end
39
+
40
+ # Format the result as a human-readable string (Go SDK format)
41
+ # @return [String]
42
+ def to_s
43
+ [
44
+ "Experiment: #{experiment_name}",
45
+ "ID: #{experiment_id}",
46
+ "Link: #{permalink}",
47
+ "Duration: #{duration.round(2)}s",
48
+ "Errors: #{errors.length}"
49
+ ].join("\n")
50
+ end
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,108 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Braintrust
4
+ module Eval
5
+ # Scorer wraps a scoring function that evaluates task output against expected values
6
+ # Scorers can accept 3 params (input, expected, output) or 4 params (input, expected, output, metadata)
7
+ # They can return a float, hash, or array of hashes
8
+ class Scorer
9
+ attr_reader :name
10
+
11
+ # Create a new scorer
12
+ # @param name_or_callable [String, Symbol, #call] Name or callable (if callable, name is auto-detected)
13
+ # @param callable [#call, nil] Callable if name was provided separately
14
+ # @param block [Proc, nil] Block if no callable provided
15
+ def initialize(name_or_callable = nil, callable = nil, &block)
16
+ # Determine name and callable from arguments
17
+ if name_or_callable.nil? && callable.nil? && block.nil?
18
+ raise ArgumentError, "Must provide callable or block"
19
+ end
20
+
21
+ # If first arg is a string/symbol, it's the name
22
+ if name_or_callable.is_a?(String) || name_or_callable.is_a?(Symbol)
23
+ @name = name_or_callable.to_s
24
+ @callable = callable || block
25
+ raise ArgumentError, "Must provide callable or block" unless @callable
26
+ else
27
+ # First arg is the callable, try to auto-detect name
28
+ @callable = name_or_callable || callable || block
29
+ @name = detect_name(@callable)
30
+ end
31
+
32
+ # Validate callable
33
+ unless @callable.respond_to?(:call)
34
+ raise ArgumentError, "Scorer must be callable (respond to :call)"
35
+ end
36
+
37
+ # Detect arity and wrap callable if needed
38
+ @wrapped_callable = wrap_callable(@callable)
39
+ end
40
+
41
+ # Call the scorer
42
+ # @param input [Object] The input to the task
43
+ # @param expected [Object] The expected output
44
+ # @param output [Object] The actual output from the task
45
+ # @param metadata [Hash] Optional metadata
46
+ # @return [Float, Hash, Array] Score value(s)
47
+ def call(input, expected, output, metadata = {})
48
+ @wrapped_callable.call(input, expected, output, metadata)
49
+ end
50
+
51
+ private
52
+
53
+ # Detect the name from a callable object
54
+ # @param callable [#call] The callable
55
+ # @return [String] The detected name
56
+ def detect_name(callable)
57
+ # Method objects have .name
58
+ if callable.is_a?(Method)
59
+ return callable.name.to_s
60
+ end
61
+
62
+ # Objects with .name method
63
+ if callable.respond_to?(:name)
64
+ return callable.name.to_s
65
+ end
66
+
67
+ # Fallback
68
+ "scorer"
69
+ end
70
+
71
+ # Wrap the callable to always accept 4 parameters
72
+ # @param callable [#call] The callable to wrap
73
+ # @return [Proc] Wrapped callable that accepts 4 params
74
+ def wrap_callable(callable)
75
+ arity = callable_arity(callable)
76
+
77
+ case arity
78
+ when 3
79
+ # Callable takes 3 params - wrap to ignore metadata
80
+ ->(input, expected, output, metadata) {
81
+ callable.call(input, expected, output)
82
+ }
83
+ when 4, -4, -1
84
+ # Callable takes 4 params (or variadic with 4+)
85
+ # -4 means optional 4th param
86
+ # -1 means variadic (*args)
87
+ callable
88
+ else
89
+ raise ArgumentError, "Scorer must accept 3 or 4 parameters (got arity #{arity})"
90
+ end
91
+ end
92
+
93
+ # Get the arity of a callable
94
+ # @param callable [#call] The callable
95
+ # @return [Integer] The arity
96
+ def callable_arity(callable)
97
+ if callable.respond_to?(:arity)
98
+ callable.arity
99
+ elsif callable.respond_to?(:method)
100
+ callable.method(:call).arity
101
+ else
102
+ # Assume 3 params if we can't detect
103
+ 3
104
+ end
105
+ end
106
+ end
107
+ end
108
+ end