braintrust 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +42 -15
- data/lib/braintrust/api/internal/btql.rb +124 -0
- data/lib/braintrust/api/internal/experiments.rb +19 -0
- data/lib/braintrust/api/internal/projects.rb +19 -0
- data/lib/braintrust/dataset.rb +6 -3
- data/lib/braintrust/eval/context.rb +131 -0
- data/lib/braintrust/eval/evaluator.rb +11 -5
- data/lib/braintrust/eval/functions.rb +10 -166
- data/lib/braintrust/eval/runner.rb +100 -108
- data/lib/braintrust/eval/scorer.rb +24 -96
- data/lib/braintrust/eval/trace.rb +129 -0
- data/lib/braintrust/eval.rb +60 -132
- data/lib/braintrust/functions.rb +168 -0
- data/lib/braintrust/internal/callable.rb +83 -0
- data/lib/braintrust/logger.rb +9 -0
- data/lib/braintrust/scorer.rb +122 -0
- data/lib/braintrust/server/handlers/eval.rb +3 -3
- data/lib/braintrust/task.rb +108 -0
- data/lib/braintrust/version.rb +1 -1
- metadata +8 -1
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Braintrust
|
|
4
|
+
module Internal
|
|
5
|
+
module Callable
|
|
6
|
+
# Filters keyword arguments so callers can pass a superset of kwargs
|
|
7
|
+
# and the receiver only gets the ones it declared. This avoids Ruby 3.2+
|
|
8
|
+
# ArgumentError for unknown keywords without requiring ** on every definition.
|
|
9
|
+
#
|
|
10
|
+
# When prepended on a class, intercepts #call and slices kwargs to match
|
|
11
|
+
# the declared parameters before forwarding. Methods with **keyrest
|
|
12
|
+
# receive all kwargs unfiltered.
|
|
13
|
+
#
|
|
14
|
+
# @example
|
|
15
|
+
# class Greeter
|
|
16
|
+
# prepend Internal::Callable::KeywordFilter
|
|
17
|
+
# def call(name:)
|
|
18
|
+
# "hello #{name}"
|
|
19
|
+
# end
|
|
20
|
+
# end
|
|
21
|
+
# Greeter.new.call(name: "world", extra: "ignored") # => "hello world"
|
|
22
|
+
module KeywordFilter
|
|
23
|
+
# Filter kwargs to only the keyword params declared by the given parameters list.
|
|
24
|
+
# Returns kwargs unchanged if parameters include **keyrest.
|
|
25
|
+
#
|
|
26
|
+
# @param params [Array<Array>] parameter list from Proc#parameters or Method#parameters
|
|
27
|
+
# @param kwargs [Hash] keyword arguments to filter
|
|
28
|
+
# @return [Hash] filtered keyword arguments
|
|
29
|
+
def self.filter(params, kwargs)
|
|
30
|
+
return kwargs if has_keyword_splat?(params)
|
|
31
|
+
|
|
32
|
+
declared_keys = params
|
|
33
|
+
.select { |type, _| type == :keyreq || type == :key }
|
|
34
|
+
.map(&:last)
|
|
35
|
+
kwargs.slice(*declared_keys)
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Wrap a Proc to filter kwargs to only its declared keyword params.
|
|
39
|
+
# Returns the block unchanged if it accepts **keyrest.
|
|
40
|
+
#
|
|
41
|
+
# @param block [Proc] the block to wrap
|
|
42
|
+
# @return [Proc] a wrapper that filters kwargs, or the original block
|
|
43
|
+
def self.wrap_block(block)
|
|
44
|
+
return block if has_keyword_splat?(block.parameters)
|
|
45
|
+
->(**kw) { block.call(**filter(block.parameters, kw)) }
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Whether params include ** (keyword splat / keyrest).
|
|
49
|
+
#
|
|
50
|
+
# @param params [Array<Array>] parameter list
|
|
51
|
+
# @return [Boolean]
|
|
52
|
+
def self.has_keyword_splat?(params)
|
|
53
|
+
params.any? { |type, _| type == :keyrest }
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Whether params include any keyword parameters (key, keyreq, or keyrest).
|
|
57
|
+
#
|
|
58
|
+
# @param params [Array<Array>] parameter list
|
|
59
|
+
# @return [Boolean]
|
|
60
|
+
def self.has_any_keywords?(params)
|
|
61
|
+
params.any? { |type, _| type == :keyreq || type == :key || type == :keyrest }
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# When prepended, filters kwargs before the next #call in the ancestor chain.
|
|
65
|
+
# If the instance defines #call_parameters, uses those.
|
|
66
|
+
# Otherwise introspects super_method.
|
|
67
|
+
#
|
|
68
|
+
# @param kwargs [Hash] keyword arguments
|
|
69
|
+
# @return [Object] result of the filtered #call
|
|
70
|
+
def call(**kwargs)
|
|
71
|
+
params = if respond_to?(:call_parameters)
|
|
72
|
+
call_parameters
|
|
73
|
+
else
|
|
74
|
+
impl = method(:call).super_method
|
|
75
|
+
return super unless impl
|
|
76
|
+
impl.parameters
|
|
77
|
+
end
|
|
78
|
+
super(**KeywordFilter.filter(params, kwargs))
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
end
|
data/lib/braintrust/logger.rb
CHANGED
|
@@ -8,6 +8,7 @@ module Braintrust
|
|
|
8
8
|
# Default to WARN unless BRAINTRUST_DEBUG is set
|
|
9
9
|
level = ENV["BRAINTRUST_DEBUG"] ? Logger::DEBUG : Logger::WARN
|
|
10
10
|
@logger = Logger.new($stderr, level: level)
|
|
11
|
+
@warned = Set.new
|
|
11
12
|
|
|
12
13
|
class << self
|
|
13
14
|
attr_accessor :logger
|
|
@@ -24,6 +25,14 @@ module Braintrust
|
|
|
24
25
|
@logger.warn(message)
|
|
25
26
|
end
|
|
26
27
|
|
|
28
|
+
# Emit a warning only once per unique key.
|
|
29
|
+
# Subsequent calls with the same key are silently ignored.
|
|
30
|
+
def warn_once(key, message)
|
|
31
|
+
return if @warned.include?(key)
|
|
32
|
+
@warned.add(key)
|
|
33
|
+
@logger.warn(message)
|
|
34
|
+
end
|
|
35
|
+
|
|
27
36
|
def error(message)
|
|
28
37
|
@logger.error(message)
|
|
29
38
|
end
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "internal/callable"
|
|
4
|
+
|
|
5
|
+
module Braintrust
|
|
6
|
+
# Scorer wraps a scoring function that evaluates task output against expected values.
|
|
7
|
+
#
|
|
8
|
+
# Use inline with a block (keyword args):
|
|
9
|
+
# scorer = Scorer.new("my_scorer") { |expected:, output:| output == expected ? 1.0 : 0.0 }
|
|
10
|
+
#
|
|
11
|
+
# Or include in a class and define #call with keyword args:
|
|
12
|
+
# class FuzzyMatch
|
|
13
|
+
# include Braintrust::Scorer
|
|
14
|
+
#
|
|
15
|
+
# def call(expected:, output:)
|
|
16
|
+
# output == expected ? 1.0 : 0.0
|
|
17
|
+
# end
|
|
18
|
+
# end
|
|
19
|
+
#
|
|
20
|
+
# Legacy callables with 3 or 4 positional params are auto-wrapped for
|
|
21
|
+
# backwards compatibility but emit a deprecation warning.
|
|
22
|
+
module Scorer
|
|
23
|
+
DEFAULT_NAME = "scorer"
|
|
24
|
+
|
|
25
|
+
# @param base [Class] the class including Scorer
|
|
26
|
+
def self.included(base)
|
|
27
|
+
base.include(Callable)
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# Create a block-based scorer.
|
|
31
|
+
#
|
|
32
|
+
# @param name [String, nil] optional name (defaults to "scorer")
|
|
33
|
+
# @param block [Proc] the scoring implementation; declare only the keyword
|
|
34
|
+
# args you need. Extra kwargs are filtered out automatically.
|
|
35
|
+
#
|
|
36
|
+
# Supported kwargs: +input:+, +expected:+, +output:+, +metadata:+, +trace:+
|
|
37
|
+
# @return [Scorer::Block]
|
|
38
|
+
# @raise [ArgumentError] if the block has unsupported arity
|
|
39
|
+
def self.new(name = nil, &block)
|
|
40
|
+
Block.new(name: name || DEFAULT_NAME, &block)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Included into classes that +include Scorer+. Prepends KeywordFilter
|
|
44
|
+
# so #call receives only its declared kwargs, and provides a default #name.
|
|
45
|
+
module Callable
|
|
46
|
+
# @param base [Class] the class including Callable
|
|
47
|
+
def self.included(base)
|
|
48
|
+
base.prepend(Internal::Callable::KeywordFilter)
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# Default name derived from the class name (e.g. FuzzyMatch -> "fuzzy_match").
|
|
52
|
+
# @return [String]
|
|
53
|
+
def name
|
|
54
|
+
klass = self.class.name&.split("::")&.last
|
|
55
|
+
return Scorer::DEFAULT_NAME unless klass
|
|
56
|
+
klass.gsub(/([a-z])([A-Z])/, '\1_\2').downcase
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Block-based scorer. Stores a Proc and delegates #call to it.
|
|
61
|
+
# Includes Scorer so it satisfies +Scorer ===+ checks (e.g. in Context::Factory).
|
|
62
|
+
# Exposes #call_parameters so KeywordFilter can introspect the block's
|
|
63
|
+
# declared kwargs rather than Block#call's **kwargs signature.
|
|
64
|
+
class Block
|
|
65
|
+
include Scorer
|
|
66
|
+
|
|
67
|
+
# @return [String]
|
|
68
|
+
attr_reader :name
|
|
69
|
+
|
|
70
|
+
# @param name [String] scorer name
|
|
71
|
+
# @param block [Proc] scoring implementation
|
|
72
|
+
def initialize(name: DEFAULT_NAME, &block)
|
|
73
|
+
@name = name
|
|
74
|
+
@block = wrap_block(block)
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# @param kwargs [Hash] keyword arguments (filtered by KeywordFilter)
|
|
78
|
+
# @return [Float, Hash, Array] score result
|
|
79
|
+
def call(**kwargs)
|
|
80
|
+
@block.call(**kwargs)
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# Exposes the block's parameter list so KeywordFilter can filter
|
|
84
|
+
# kwargs to match the block's declared keywords.
|
|
85
|
+
# @return [Array<Array>] parameter list from Proc#parameters
|
|
86
|
+
def call_parameters
|
|
87
|
+
@block.parameters
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
private
|
|
91
|
+
|
|
92
|
+
# Legacy positional wrapping: arity 3/4/-4/-1 maps to (input, expected, output[, metadata]).
|
|
93
|
+
# Keyword and zero-arity blocks are stored raw; KeywordFilter handles filtering at call time.
|
|
94
|
+
# @param block [Proc]
|
|
95
|
+
# @return [Proc]
|
|
96
|
+
def wrap_block(block)
|
|
97
|
+
params = block.parameters
|
|
98
|
+
if Internal::Callable::KeywordFilter.has_any_keywords?(params) || block.arity == 0
|
|
99
|
+
block
|
|
100
|
+
else
|
|
101
|
+
case block.arity
|
|
102
|
+
when 3
|
|
103
|
+
Log.warn_once(:scorer_positional_3, "Scorer with positional params (input, expected, output) is deprecated. Use keyword args: |input:, expected:, output:| instead.")
|
|
104
|
+
->(**kw) { block.call(kw[:input], kw[:expected], kw[:output]) }
|
|
105
|
+
when 4, -4, -1
|
|
106
|
+
Log.warn_once(:scorer_positional_4, "Scorer with positional params (input, expected, output, metadata) is deprecated. Use keyword args: |input:, expected:, output:, metadata:| instead.")
|
|
107
|
+
->(**kw) { block.call(kw[:input], kw[:expected], kw[:output], kw[:metadata]) }
|
|
108
|
+
else
|
|
109
|
+
raise ArgumentError, "Scorer must accept keyword args or 3-4 positional params (got arity #{block.arity})"
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
# Value object wrapping a remote scorer function UUID.
|
|
116
|
+
# Used by Eval.run to distinguish remote scorers from local callables.
|
|
117
|
+
ID = Struct.new(:function_id, :version, keyword_init: true)
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
# @deprecated Use {Braintrust::Scorer::ID} instead.
|
|
121
|
+
ScorerId = Scorer::ID
|
|
122
|
+
end
|
|
@@ -124,7 +124,7 @@ module Braintrust
|
|
|
124
124
|
end
|
|
125
125
|
[cases, nil]
|
|
126
126
|
elsif data.key?("dataset_id")
|
|
127
|
-
[nil, Braintrust::
|
|
127
|
+
[nil, Braintrust::Dataset::ID.new(id: data["dataset_id"])]
|
|
128
128
|
elsif data.key?("dataset_name")
|
|
129
129
|
dataset_opts = {name: data["dataset_name"]}
|
|
130
130
|
dataset_opts[:project] = data["project_name"] if data["project_name"]
|
|
@@ -134,14 +134,14 @@ module Braintrust
|
|
|
134
134
|
end
|
|
135
135
|
end
|
|
136
136
|
|
|
137
|
-
# Map request scores array to
|
|
137
|
+
# Map request scores array to Scorer::ID structs.
|
|
138
138
|
# The UI sends function_id as a nested object: {"function_id": "uuid"}.
|
|
139
139
|
def resolve_remote_scorers(scores)
|
|
140
140
|
return nil if scores.nil? || scores.empty?
|
|
141
141
|
scores.map do |s|
|
|
142
142
|
func_id = s["function_id"]
|
|
143
143
|
func_id = func_id["function_id"] if func_id.is_a?(Hash)
|
|
144
|
-
Braintrust::
|
|
144
|
+
Braintrust::Scorer::ID.new(
|
|
145
145
|
function_id: func_id,
|
|
146
146
|
version: s["version"]
|
|
147
147
|
)
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "internal/callable"
|
|
4
|
+
|
|
5
|
+
module Braintrust
|
|
6
|
+
# Task wraps a callable that processes inputs.
|
|
7
|
+
#
|
|
8
|
+
# Use inline with a block (keyword args):
|
|
9
|
+
# task = Task.new("my_task") { |input:| process(input) }
|
|
10
|
+
#
|
|
11
|
+
# Or include in a class and define #call with keyword args:
|
|
12
|
+
# class MyTask
|
|
13
|
+
# include Braintrust::Task
|
|
14
|
+
#
|
|
15
|
+
# def call(input:)
|
|
16
|
+
# process(input)
|
|
17
|
+
# end
|
|
18
|
+
# end
|
|
19
|
+
#
|
|
20
|
+
# Legacy callables with 1 positional param are auto-wrapped for
|
|
21
|
+
# backwards compatibility but emit a deprecation warning.
|
|
22
|
+
module Task
|
|
23
|
+
DEFAULT_NAME = "task"
|
|
24
|
+
|
|
25
|
+
# @param base [Class] the class including Task
|
|
26
|
+
def self.included(base)
|
|
27
|
+
base.include(Callable)
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# Create a block-based task.
|
|
31
|
+
#
|
|
32
|
+
# @param name [String, nil] optional name (defaults to "task")
|
|
33
|
+
# @param block [Proc] the task implementation; declare only the keyword
|
|
34
|
+
# args you need (e.g. +|input:|+). Extra kwargs passed by the caller
|
|
35
|
+
# are filtered out automatically.
|
|
36
|
+
# @return [Task::Block]
|
|
37
|
+
# @raise [ArgumentError] if the block has unsupported arity
|
|
38
|
+
def self.new(name = nil, &block)
|
|
39
|
+
Block.new(name: name || DEFAULT_NAME, &block)
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# Included into classes that +include Task+. Prepends KeywordFilter
|
|
43
|
+
# so #call receives only its declared kwargs, and provides a default #name.
|
|
44
|
+
module Callable
|
|
45
|
+
# @param base [Class] the class including Callable
|
|
46
|
+
def self.included(base)
|
|
47
|
+
base.prepend(Internal::Callable::KeywordFilter)
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Default name derived from the class name (e.g. MyTask -> "my_task").
|
|
51
|
+
# @return [String]
|
|
52
|
+
def name
|
|
53
|
+
klass = self.class.name&.split("::")&.last
|
|
54
|
+
return Task::DEFAULT_NAME unless klass
|
|
55
|
+
klass.gsub(/([a-z])([A-Z])/, '\1_\2').downcase
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Block-based task. Stores a Proc and delegates #call to it.
|
|
60
|
+
# Includes Task so it satisfies +Task ===+ checks (e.g. in Context::Factory).
|
|
61
|
+
# Exposes #call_parameters so KeywordFilter can introspect the block's
|
|
62
|
+
# declared kwargs rather than Block#call's **kwargs signature.
|
|
63
|
+
class Block
|
|
64
|
+
include Task
|
|
65
|
+
|
|
66
|
+
# @return [String]
|
|
67
|
+
attr_reader :name
|
|
68
|
+
|
|
69
|
+
# @param name [String] task name
|
|
70
|
+
# @param block [Proc] task implementation
|
|
71
|
+
def initialize(name: DEFAULT_NAME, &block)
|
|
72
|
+
@name = name
|
|
73
|
+
@block = wrap_block(block)
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# @param kwargs [Hash] keyword arguments (filtered by KeywordFilter)
|
|
77
|
+
# @return [Object] result of the block
|
|
78
|
+
def call(**kwargs)
|
|
79
|
+
@block.call(**kwargs)
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# Exposes the block's parameter list so KeywordFilter can filter
|
|
83
|
+
# kwargs to match the block's declared keywords.
|
|
84
|
+
# @return [Array<Array>] parameter list from Proc#parameters
|
|
85
|
+
def call_parameters
|
|
86
|
+
@block.parameters
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
private
|
|
90
|
+
|
|
91
|
+
# Legacy positional wrapping: arity 1/-1 gets :input extracted.
|
|
92
|
+
# Keyword and zero-arity blocks are stored raw; KeywordFilter handles filtering at call time.
|
|
93
|
+
# @param block [Proc]
|
|
94
|
+
# @return [Proc]
|
|
95
|
+
def wrap_block(block)
|
|
96
|
+
params = block.parameters
|
|
97
|
+
if Internal::Callable::KeywordFilter.has_any_keywords?(params) || block.arity == 0
|
|
98
|
+
block
|
|
99
|
+
elsif block.arity == 1 || block.arity == -1
|
|
100
|
+
Log.warn_once(:task_positional, "Task with positional param (input) is deprecated. Use keyword args: ->(input:) { ... } instead.")
|
|
101
|
+
->(**kw) { block.call(kw[:input]) }
|
|
102
|
+
else
|
|
103
|
+
raise ArgumentError, "Task must accept keyword args or 1 positional param (got arity #{block.arity})"
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
end
|
data/lib/braintrust/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: braintrust
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.2.
|
|
4
|
+
version: 0.2.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Braintrust
|
|
@@ -193,6 +193,7 @@ files:
|
|
|
193
193
|
- lib/braintrust/api/datasets.rb
|
|
194
194
|
- lib/braintrust/api/functions.rb
|
|
195
195
|
- lib/braintrust/api/internal/auth.rb
|
|
196
|
+
- lib/braintrust/api/internal/btql.rb
|
|
196
197
|
- lib/braintrust/api/internal/experiments.rb
|
|
197
198
|
- lib/braintrust/api/internal/projects.rb
|
|
198
199
|
- lib/braintrust/config.rb
|
|
@@ -234,6 +235,7 @@ files:
|
|
|
234
235
|
- lib/braintrust/eval.rb
|
|
235
236
|
- lib/braintrust/eval/case.rb
|
|
236
237
|
- lib/braintrust/eval/cases.rb
|
|
238
|
+
- lib/braintrust/eval/context.rb
|
|
237
239
|
- lib/braintrust/eval/evaluator.rb
|
|
238
240
|
- lib/braintrust/eval/formatter.rb
|
|
239
241
|
- lib/braintrust/eval/functions.rb
|
|
@@ -241,6 +243,9 @@ files:
|
|
|
241
243
|
- lib/braintrust/eval/runner.rb
|
|
242
244
|
- lib/braintrust/eval/scorer.rb
|
|
243
245
|
- lib/braintrust/eval/summary.rb
|
|
246
|
+
- lib/braintrust/eval/trace.rb
|
|
247
|
+
- lib/braintrust/functions.rb
|
|
248
|
+
- lib/braintrust/internal/callable.rb
|
|
244
249
|
- lib/braintrust/internal/encoding.rb
|
|
245
250
|
- lib/braintrust/internal/env.rb
|
|
246
251
|
- lib/braintrust/internal/http.rb
|
|
@@ -250,6 +255,7 @@ files:
|
|
|
250
255
|
- lib/braintrust/internal/time.rb
|
|
251
256
|
- lib/braintrust/logger.rb
|
|
252
257
|
- lib/braintrust/prompt.rb
|
|
258
|
+
- lib/braintrust/scorer.rb
|
|
253
259
|
- lib/braintrust/server.rb
|
|
254
260
|
- lib/braintrust/server/auth/clerk_token.rb
|
|
255
261
|
- lib/braintrust/server/auth/no_auth.rb
|
|
@@ -264,6 +270,7 @@ files:
|
|
|
264
270
|
- lib/braintrust/server/sse.rb
|
|
265
271
|
- lib/braintrust/setup.rb
|
|
266
272
|
- lib/braintrust/state.rb
|
|
273
|
+
- lib/braintrust/task.rb
|
|
267
274
|
- lib/braintrust/trace.rb
|
|
268
275
|
- lib/braintrust/trace/attachment.rb
|
|
269
276
|
- lib/braintrust/trace/span_exporter.rb
|