langsmithrb_rails 0.1.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rspec_status +158 -79
- data/CHANGELOG.md +13 -0
- data/Gemfile.lock +1 -1
- data/README.md +153 -0
- data/langsmithrb_rails-0.1.1.gem +0 -0
- data/lib/langsmithrb_rails/client.rb +217 -2
- data/lib/langsmithrb_rails/config.rb +143 -46
- data/lib/langsmithrb_rails/evaluation/evaluator.rb +178 -0
- data/lib/langsmithrb_rails/evaluation/llm_evaluator.rb +154 -0
- data/lib/langsmithrb_rails/evaluation/string_evaluator.rb +158 -0
- data/lib/langsmithrb_rails/evaluation.rb +76 -0
- data/lib/langsmithrb_rails/otel/exporter.rb +120 -0
- data/lib/langsmithrb_rails/otel.rb +135 -0
- data/lib/langsmithrb_rails/run_trees.rb +157 -0
- data/lib/langsmithrb_rails/version.rb +1 -1
- data/lib/langsmithrb_rails/wrappers/anthropic.rb +146 -0
- data/lib/langsmithrb_rails/wrappers/base.rb +81 -0
- data/lib/langsmithrb_rails/wrappers/llm.rb +151 -0
- data/lib/langsmithrb_rails/wrappers/openai.rb +193 -0
- data/lib/langsmithrb_rails/wrappers.rb +41 -0
- data/lib/langsmithrb_rails.rb +121 -1
- data/pkg/langsmithrb_rails-0.3.0.gem +0 -0
- metadata +16 -2
@@ -3,6 +3,7 @@
|
|
3
3
|
require "net/http"
|
4
4
|
require "json"
|
5
5
|
require "uri"
|
6
|
+
require "securerandom"
|
6
7
|
|
7
8
|
module LangsmithrbRails
|
8
9
|
# Direct REST client for LangSmith API
|
@@ -30,8 +31,196 @@ module LangsmithrbRails
|
|
30
31
|
patch("/runs/#{id}", payload)
|
31
32
|
end
|
32
33
|
|
34
|
+
# Get a run by ID
|
35
|
+
# @param id [String] Run ID
|
36
|
+
# @return [Hash] Response with status and body
|
37
|
+
def get_run(id)
|
38
|
+
get("/runs/#{id}")
|
39
|
+
end
|
40
|
+
|
41
|
+
# List runs with optional filters
|
42
|
+
# @param project_name [String] Filter by project name
|
43
|
+
# @param trace_id [String] Filter by trace ID
|
44
|
+
# @param run_type [String] Filter by run type
|
45
|
+
# @param limit [Integer] Maximum number of runs to return
|
46
|
+
# @param offset [Integer] Offset for pagination
|
47
|
+
# @return [Hash] Response with status and body
|
48
|
+
def list_runs(project_name: nil, trace_id: nil, run_type: nil, limit: 100, offset: 0)
|
49
|
+
query_params = {
|
50
|
+
project_name: project_name,
|
51
|
+
trace_id: trace_id,
|
52
|
+
run_type: run_type,
|
53
|
+
limit: limit,
|
54
|
+
offset: offset
|
55
|
+
}.compact
|
56
|
+
|
57
|
+
query_string = query_params.map { |k, v| "#{k}=#{URI.encode_www_form_component(v.to_s)}" }.join("&")
|
58
|
+
get("/runs?#{query_string}")
|
59
|
+
end
|
60
|
+
|
61
|
+
# Create a dataset in LangSmith
|
62
|
+
# @param name [String] Dataset name
|
63
|
+
# @param description [String] Dataset description
|
64
|
+
# @return [Hash] Response with status and body
|
65
|
+
def create_dataset(name, description: nil)
|
66
|
+
payload = {
|
67
|
+
name: name,
|
68
|
+
description: description
|
69
|
+
}.compact
|
70
|
+
post("/datasets", payload)
|
71
|
+
end
|
72
|
+
|
73
|
+
# Get a dataset by ID
|
74
|
+
# @param id [String] Dataset ID
|
75
|
+
# @return [Hash] Response with status and body
|
76
|
+
def get_dataset(id)
|
77
|
+
get("/datasets/#{id}")
|
78
|
+
end
|
79
|
+
|
80
|
+
# List datasets
|
81
|
+
# @param limit [Integer] Maximum number of datasets to return
|
82
|
+
# @param offset [Integer] Offset for pagination
|
83
|
+
# @return [Hash] Response with status and body
|
84
|
+
def list_datasets(limit: 100, offset: 0)
|
85
|
+
get("/datasets?limit=#{limit}&offset=#{offset}")
|
86
|
+
end
|
87
|
+
|
88
|
+
# Create an example in a dataset
|
89
|
+
# @param dataset_id [String] Dataset ID
|
90
|
+
# @param inputs [Hash] Example inputs
|
91
|
+
# @param outputs [Hash] Example outputs (optional)
|
92
|
+
# @return [Hash] Response with status and body
|
93
|
+
def create_example(dataset_id, inputs, outputs = nil)
|
94
|
+
payload = {
|
95
|
+
dataset_id: dataset_id,
|
96
|
+
inputs: inputs,
|
97
|
+
outputs: outputs
|
98
|
+
}.compact
|
99
|
+
post("/examples", payload)
|
100
|
+
end
|
101
|
+
|
102
|
+
# Get an example by ID
|
103
|
+
# @param id [String] Example ID
|
104
|
+
# @return [Hash] Response with status and body
|
105
|
+
def get_example(id)
|
106
|
+
get("/examples/#{id}")
|
107
|
+
end
|
108
|
+
|
109
|
+
# List examples in a dataset
|
110
|
+
# @param dataset_id [String] Dataset ID
|
111
|
+
# @param limit [Integer] Maximum number of examples to return
|
112
|
+
# @param offset [Integer] Offset for pagination
|
113
|
+
# @return [Hash] Response with status and body
|
114
|
+
def list_examples(dataset_id, limit: 100, offset: 0)
|
115
|
+
get("/datasets/#{dataset_id}/examples?limit=#{limit}&offset=#{offset}")
|
116
|
+
end
|
117
|
+
|
118
|
+
# Create feedback for a run
|
119
|
+
# @param run_id [String] Run ID
|
120
|
+
# @param key [String] Feedback key
|
121
|
+
# @param score [Float, Integer, Boolean] Feedback score
|
122
|
+
# @param value [Hash] Additional feedback data (optional)
|
123
|
+
# @param comment [String] Feedback comment (optional)
|
124
|
+
# @return [Hash] Response with status and body
|
125
|
+
def create_feedback(run_id, key, score, value: nil, comment: nil)
|
126
|
+
payload = {
|
127
|
+
id: SecureRandom.uuid,
|
128
|
+
run_id: run_id,
|
129
|
+
key: key,
|
130
|
+
score: score,
|
131
|
+
value: value,
|
132
|
+
comment: comment
|
133
|
+
}.compact
|
134
|
+
post("/feedback", payload)
|
135
|
+
end
|
136
|
+
|
137
|
+
# Get feedback by ID
|
138
|
+
# @param id [String] Feedback ID
|
139
|
+
# @return [Hash] Response with status and body
|
140
|
+
def get_feedback(id)
|
141
|
+
get("/feedback/#{id}")
|
142
|
+
end
|
143
|
+
|
144
|
+
# List feedback for a run
|
145
|
+
# @param run_id [String] Run ID
|
146
|
+
# @return [Hash] Response with status and body
|
147
|
+
def list_feedback(run_id)
|
148
|
+
get("/runs/#{run_id}/feedback")
|
149
|
+
end
|
150
|
+
|
151
|
+
# Create a project in LangSmith
|
152
|
+
# @param name [String] Project name
|
153
|
+
# @param description [String] Project description (optional)
|
154
|
+
# @return [Hash] Response with status and body
|
155
|
+
def create_project(name, description: nil)
|
156
|
+
payload = {
|
157
|
+
name: name,
|
158
|
+
description: description
|
159
|
+
}.compact
|
160
|
+
post("/projects", payload)
|
161
|
+
end
|
162
|
+
|
163
|
+
# Get a project by name
|
164
|
+
# @param name [String] Project name
|
165
|
+
# @return [Hash] Response with status and body
|
166
|
+
def get_project(name)
|
167
|
+
get("/projects/#{URI.encode_www_form_component(name)}")
|
168
|
+
end
|
169
|
+
|
170
|
+
# List projects
|
171
|
+
# @param limit [Integer] Maximum number of projects to return
|
172
|
+
# @param offset [Integer] Offset for pagination
|
173
|
+
# @return [Hash] Response with status and body
|
174
|
+
def list_projects(limit: 100, offset: 0)
|
175
|
+
get("/projects?limit=#{limit}&offset=#{offset}")
|
176
|
+
end
|
177
|
+
|
178
|
+
# Upload a file attachment
|
179
|
+
# @param file_path [String] Path to the file
|
180
|
+
# @return [Hash] Response with status and body containing the file ID
|
181
|
+
def upload_file(file_path)
|
182
|
+
uri = URI.parse("#{@api_url}/files")
|
183
|
+
|
184
|
+
File.open(file_path, 'rb') do |file|
|
185
|
+
boundary = SecureRandom.hex(16)
|
186
|
+
|
187
|
+
req = Net::HTTP::Post.new(uri.request_uri)
|
188
|
+
req['Authorization'] = "Bearer #{@api_key}" if @api_key
|
189
|
+
req['Content-Type'] = "multipart/form-data; boundary=#{boundary}"
|
190
|
+
|
191
|
+
body = []
|
192
|
+
body << "--#{boundary}\r\n"
|
193
|
+
body << "Content-Disposition: form-data; name=\"file\"; filename=\"#{File.basename(file_path)}\"\r\n"
|
194
|
+
body << "Content-Type: #{content_type_for_file(file_path)}\r\n\r\n"
|
195
|
+
body << file.read
|
196
|
+
body << "\r\n--#{boundary}--\r\n"
|
197
|
+
|
198
|
+
req.body = body.join
|
199
|
+
|
200
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
201
|
+
http.use_ssl = uri.scheme == "https"
|
202
|
+
http.open_timeout = Config[:open_timeout_seconds]
|
203
|
+
http.read_timeout = Config[:timeout_seconds]
|
204
|
+
|
205
|
+
res = http.request(req)
|
206
|
+
{
|
207
|
+
status: res.code.to_i,
|
208
|
+
body: (JSON.parse(res.body) rescue { "raw" => res.body })
|
209
|
+
}
|
210
|
+
end
|
211
|
+
rescue => e
|
212
|
+
{ status: 0, error: e.message }
|
213
|
+
end
|
214
|
+
|
33
215
|
private
|
34
216
|
|
217
|
+
# Make a GET request
|
218
|
+
# @param path [String] API path
|
219
|
+
# @return [Hash] Response with status and body
|
220
|
+
def get(path)
|
221
|
+
request(Net::HTTP::Get, path, nil)
|
222
|
+
end
|
223
|
+
|
35
224
|
# Make a POST request
|
36
225
|
# @param path [String] API path
|
37
226
|
# @param payload [Hash] Request payload
|
@@ -48,6 +237,13 @@ module LangsmithrbRails
|
|
48
237
|
request(Net::HTTP::Patch, path, payload)
|
49
238
|
end
|
50
239
|
|
240
|
+
# Make a DELETE request
|
241
|
+
# @param path [String] API path
|
242
|
+
# @return [Hash] Response with status and body
|
243
|
+
def delete(path)
|
244
|
+
request(Net::HTTP::Delete, path, nil)
|
245
|
+
end
|
246
|
+
|
51
247
|
# Make an HTTP request
|
52
248
|
# @param klass [Class] Net::HTTP request class
|
53
249
|
# @param path [String] API path
|
@@ -62,8 +258,11 @@ module LangsmithrbRails
|
|
62
258
|
|
63
259
|
req = klass.new(uri.request_uri)
|
64
260
|
req["Authorization"] = "Bearer #{@api_key}" if @api_key
|
65
|
-
|
66
|
-
|
261
|
+
|
262
|
+
if payload
|
263
|
+
req["Content-Type"] = "application/json"
|
264
|
+
req.body = JSON.generate(payload)
|
265
|
+
end
|
67
266
|
|
68
267
|
res = http.request(req)
|
69
268
|
{
|
@@ -73,5 +272,21 @@ module LangsmithrbRails
|
|
73
272
|
rescue => e
|
74
273
|
{ status: 0, error: e.message }
|
75
274
|
end
|
275
|
+
|
276
|
+
# Determine content type for a file
|
277
|
+
# @param file_path [String] Path to the file
|
278
|
+
# @return [String] Content type
|
279
|
+
def content_type_for_file(file_path)
|
280
|
+
ext = File.extname(file_path).downcase
|
281
|
+
case ext
|
282
|
+
when ".jpg", ".jpeg" then "image/jpeg"
|
283
|
+
when ".png" then "image/png"
|
284
|
+
when ".pdf" then "application/pdf"
|
285
|
+
when ".txt" then "text/plain"
|
286
|
+
when ".json" then "application/json"
|
287
|
+
when ".csv" then "text/csv"
|
288
|
+
else "application/octet-stream"
|
289
|
+
end
|
290
|
+
end
|
76
291
|
end
|
77
292
|
end
|
@@ -2,71 +2,168 @@
|
|
2
2
|
|
3
3
|
require "yaml"
|
4
4
|
require "ostruct"
|
5
|
+
require "logger"
|
6
|
+
require "singleton"
|
5
7
|
|
6
8
|
module LangsmithrbRails
|
7
9
|
# Configuration class for LangsmithrbRails
|
8
10
|
class Config
|
11
|
+
include Singleton
|
12
|
+
|
9
13
|
# Default configuration values
|
10
14
|
DEFAULTS = {
|
11
15
|
api_url: "https://api.smith.langchain.com",
|
12
|
-
|
16
|
+
project_name: "default",
|
13
17
|
api_key: ENV["LANGSMITH_API_KEY"],
|
14
|
-
|
18
|
+
sampling: 1.0,
|
15
19
|
redact_by_default: true,
|
16
20
|
timeout_seconds: 3.0,
|
17
21
|
open_timeout_seconds: 1.0,
|
18
22
|
env: ENV["RAILS_ENV"] || "development",
|
19
|
-
enabled:
|
23
|
+
enabled: true,
|
24
|
+
# Advanced tracing options
|
25
|
+
trace_all: ENV.fetch("LANGSMITH_TRACE_ALL", "false") == "true",
|
26
|
+
trace_level: ENV.fetch("LANGSMITH_TRACE_LEVEL", "info").to_sym,
|
27
|
+
# OpenTelemetry options
|
28
|
+
otel_enabled: ENV.fetch("LANGSMITH_OTEL_ENABLED", "false") == "true",
|
29
|
+
otel_service_name: ENV.fetch("LANGSMITH_OTEL_SERVICE_NAME", "langsmithrb_rails"),
|
30
|
+
# Evaluation options
|
31
|
+
evaluation_enabled: ENV.fetch("LANGSMITH_EVALUATION_ENABLED", "false") == "true",
|
32
|
+
# Logging options
|
33
|
+
log_level: ENV.fetch("LANGSMITH_LOG_LEVEL", "info").to_sym,
|
34
|
+
log_to_stdout: ENV.fetch("LANGSMITH_LOG_TO_STDOUT", "false") == "true"
|
20
35
|
}.freeze
|
21
36
|
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
file = rails_root.join("config/langsmith.yml")
|
28
|
-
yml = File.exist?(file) ? YAML.load_file(file) : {}
|
29
|
-
env = (ENV["RAILS_ENV"] || "development").to_s
|
30
|
-
@config = OpenStruct.new(DEFAULTS.merge(yml.fetch(env, {})))
|
31
|
-
|
32
|
-
# Override with environment variables if present
|
33
|
-
@config.api_key = ENV["LANGSMITH_API_KEY"] if ENV["LANGSMITH_API_KEY"]
|
34
|
-
@config.project = ENV["LANGSMITH_PROJECT"] if ENV["LANGSMITH_PROJECT"]
|
35
|
-
@config.enabled = @config.api_key.present? unless ENV.key?("LANGSMITH_ENABLED")
|
36
|
-
@config.enabled = ENV["LANGSMITH_ENABLED"] == "true" if ENV.key?("LANGSMITH_ENABLED")
|
37
|
-
|
38
|
-
@config
|
39
|
-
end
|
37
|
+
# Configuration attributes
|
38
|
+
attr_accessor :api_key, :api_url, :project_name, :enabled, :sampling,
|
39
|
+
:trace_all, :trace_level, :otel_enabled, :otel_service_name,
|
40
|
+
:evaluation_enabled, :log_level, :log_to_stdout
|
41
|
+
attr_reader :logger
|
40
42
|
|
41
|
-
|
42
|
-
|
43
|
-
#
|
44
|
-
|
45
|
-
|
43
|
+
# Initialize with default values
|
44
|
+
def initialize
|
45
|
+
# Set defaults first
|
46
|
+
DEFAULTS.each do |key, value|
|
47
|
+
instance_variable_set("@#{key}", value)
|
46
48
|
end
|
47
|
-
|
48
|
-
#
|
49
|
-
|
50
|
-
|
51
|
-
#
|
52
|
-
|
53
|
-
|
49
|
+
|
50
|
+
# Then override with environment variables if present
|
51
|
+
load_from_env
|
52
|
+
|
53
|
+
# Initialize logger
|
54
|
+
@logger = create_logger(@log_level, @log_to_stdout)
|
55
|
+
end
|
56
|
+
|
57
|
+
# Load configuration from environment variables
|
58
|
+
# @return [Config] Self for chaining
|
59
|
+
def load_from_env
|
60
|
+
@api_key = ENV["LANGSMITH_API_KEY"] if ENV["LANGSMITH_API_KEY"]
|
61
|
+
@api_url = ENV["LANGSMITH_API_URL"] if ENV["LANGSMITH_API_URL"]
|
62
|
+
@project_name = ENV["LANGSMITH_PROJECT"] if ENV["LANGSMITH_PROJECT"]
|
63
|
+
@enabled = ENV["LANGSMITH_ENABLED"] == "true" if ENV.key?("LANGSMITH_ENABLED")
|
64
|
+
@sampling = ENV["LANGSMITH_SAMPLING"].to_f if ENV["LANGSMITH_SAMPLING"]
|
65
|
+
@trace_all = ENV["LANGSMITH_TRACE_ALL"] == "true" if ENV.key?("LANGSMITH_TRACE_ALL")
|
66
|
+
@trace_level = ENV["LANGSMITH_TRACE_LEVEL"].to_sym if ENV["LANGSMITH_TRACE_LEVEL"]
|
67
|
+
@otel_enabled = ENV["LANGSMITH_OTEL_ENABLED"] == "true" if ENV.key?("LANGSMITH_OTEL_ENABLED")
|
68
|
+
@otel_service_name = ENV["LANGSMITH_OTEL_SERVICE_NAME"] if ENV["LANGSMITH_OTEL_SERVICE_NAME"]
|
69
|
+
@evaluation_enabled = ENV["LANGSMITH_EVALUATION_ENABLED"] == "true" if ENV.key?("LANGSMITH_EVALUATION_ENABLED")
|
70
|
+
@log_level = ENV["LANGSMITH_LOG_LEVEL"].to_sym if ENV["LANGSMITH_LOG_LEVEL"]
|
71
|
+
@log_to_stdout = ENV["LANGSMITH_LOG_TO_STDOUT"] == "true" if ENV.key?("LANGSMITH_LOG_TO_STDOUT")
|
72
|
+
self
|
73
|
+
end
|
74
|
+
|
75
|
+
# Load configuration from YAML file
|
76
|
+
# @param path [String] Path to YAML file
|
77
|
+
# @return [Config] Self for chaining
|
78
|
+
def load_from_yaml(path)
|
79
|
+
return self unless File.exist?(path)
|
80
|
+
|
81
|
+
yml = YAML.load_file(path)
|
82
|
+
env = (ENV["RAILS_ENV"] || "development").to_s
|
83
|
+
config = yml.fetch(env, {})
|
84
|
+
|
85
|
+
config.each do |key, value|
|
86
|
+
send("#{key}=", value) if respond_to?("#{key}=")
|
54
87
|
end
|
55
|
-
|
56
|
-
#
|
57
|
-
|
58
|
-
|
59
|
-
|
88
|
+
|
89
|
+
# Reset logger if log settings changed
|
90
|
+
@logger = create_logger(@log_level, @log_to_stdout)
|
91
|
+
|
92
|
+
self
|
93
|
+
end
|
94
|
+
|
95
|
+
# Check if LangSmith is enabled
|
96
|
+
# @return [Boolean] Whether LangSmith is enabled
|
97
|
+
def enabled?
|
98
|
+
@enabled
|
99
|
+
end
|
100
|
+
|
101
|
+
# Check if a trace should be sampled
|
102
|
+
# @return [Boolean] Whether to sample the trace
|
103
|
+
def should_sample?
|
104
|
+
return true if @sampling >= 1.0
|
105
|
+
return false if @sampling <= 0.0
|
106
|
+
Random.rand < @sampling
|
107
|
+
end
|
108
|
+
|
109
|
+
# Check if OpenTelemetry is enabled
|
110
|
+
# @return [Boolean] Whether OpenTelemetry is enabled
|
111
|
+
def otel_enabled?
|
112
|
+
@otel_enabled
|
113
|
+
end
|
114
|
+
|
115
|
+
# Check if evaluation is enabled
|
116
|
+
# @return [Boolean] Whether evaluation is enabled
|
117
|
+
def evaluation_enabled?
|
118
|
+
@evaluation_enabled
|
119
|
+
end
|
120
|
+
|
121
|
+
# Set log level
|
122
|
+
# @param level [Symbol, String] Log level
|
123
|
+
# @return [Symbol] Log level
|
124
|
+
def log_level=(level)
|
125
|
+
level = level.to_sym if level.is_a?(String)
|
126
|
+
unless [:debug, :info, :warn, :error, :fatal].include?(level)
|
127
|
+
raise ArgumentError, "Invalid log level: #{level}"
|
60
128
|
end
|
129
|
+
@log_level = level
|
130
|
+
@logger = create_logger(@log_level, @log_to_stdout)
|
131
|
+
@log_level
|
61
132
|
end
|
62
|
-
|
63
|
-
#
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
133
|
+
|
134
|
+
# Get logger instance
|
135
|
+
# @return [Logger] Logger instance
|
136
|
+
def logger
|
137
|
+
@logger ||= create_logger(@log_level, @log_to_stdout)
|
138
|
+
end
|
139
|
+
|
140
|
+
private
|
141
|
+
|
142
|
+
# Create a logger
|
143
|
+
# @param level [Symbol] Log level
|
144
|
+
# @param to_stdout [Boolean] Whether to log to STDOUT
|
145
|
+
# @return [Logger] Logger instance
|
146
|
+
def create_logger(level, to_stdout)
|
147
|
+
logger = Logger.new(to_stdout ? STDOUT : IO::NULL)
|
148
|
+
logger.level = parse_log_level(level)
|
149
|
+
logger.formatter = proc do |severity, datetime, progname, msg|
|
150
|
+
"[LangsmithrbRails] [#{severity}] #{msg}\n"
|
151
|
+
end
|
152
|
+
logger
|
153
|
+
end
|
154
|
+
|
155
|
+
# Parse log level
|
156
|
+
# @param level [Symbol, String] Log level
|
157
|
+
# @return [Integer] Logger level constant
|
158
|
+
def parse_log_level(level)
|
159
|
+
case level.to_s.downcase
|
160
|
+
when "debug" then Logger::DEBUG
|
161
|
+
when "info" then Logger::INFO
|
162
|
+
when "warn" then Logger::WARN
|
163
|
+
when "error" then Logger::ERROR
|
164
|
+
when "fatal" then Logger::FATAL
|
165
|
+
else Logger::INFO
|
166
|
+
end
|
70
167
|
end
|
71
168
|
end
|
72
169
|
end
|
@@ -0,0 +1,178 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module LangsmithrbRails
|
4
|
+
module Evaluation
|
5
|
+
# Base evaluator class
|
6
|
+
class Evaluator
|
7
|
+
attr_reader :client, :project_name, :tags
|
8
|
+
|
9
|
+
# Initialize a new evaluator
|
10
|
+
# @param client [LangsmithrbRails::Client] LangSmith client
|
11
|
+
# @param project_name [String] Optional project name for evaluations
|
12
|
+
# @param tags [Array<String>] Optional tags for evaluations
|
13
|
+
def initialize(client: nil, project_name: nil, tags: [])
|
14
|
+
@client = client || LangsmithrbRails::Client.new
|
15
|
+
@project_name = project_name
|
16
|
+
@tags = tags
|
17
|
+
end
|
18
|
+
|
19
|
+
# Evaluate a prediction against a reference
|
20
|
+
# @param prediction [String, Hash] The prediction to evaluate
|
21
|
+
# @param reference [String, Hash] The reference to compare against
|
22
|
+
# @param input [Hash] Optional input that generated the prediction
|
23
|
+
# @return [Hash] Evaluation result with score and metadata
|
24
|
+
def evaluate(prediction, reference = nil, input = nil)
|
25
|
+
raise NotImplementedError, "Subclasses must implement evaluate method"
|
26
|
+
end
|
27
|
+
|
28
|
+
# Evaluate a run
|
29
|
+
# @param run_id [String] The ID of the run to evaluate
|
30
|
+
# @param reference [String, Hash] The reference to compare against
|
31
|
+
# @return [Hash] Evaluation result with score and metadata
|
32
|
+
def evaluate_run(run_id, reference = nil)
|
33
|
+
# Get the run
|
34
|
+
response = client.get_run(run_id)
|
35
|
+
|
36
|
+
unless response[:status] >= 200 && response[:status] < 300
|
37
|
+
raise "Failed to get run: #{response[:error] || response[:body]}"
|
38
|
+
end
|
39
|
+
|
40
|
+
run = response[:body]
|
41
|
+
|
42
|
+
# Extract prediction from run outputs
|
43
|
+
prediction = extract_prediction_from_run(run)
|
44
|
+
|
45
|
+
# Extract input from run inputs
|
46
|
+
input = run["inputs"]
|
47
|
+
|
48
|
+
# Evaluate
|
49
|
+
result = evaluate(prediction, reference, input)
|
50
|
+
|
51
|
+
# Create feedback
|
52
|
+
create_feedback(run_id, result)
|
53
|
+
|
54
|
+
result
|
55
|
+
end
|
56
|
+
|
57
|
+
# Evaluate multiple runs
|
58
|
+
# @param run_ids [Array<String>] The IDs of the runs to evaluate
|
59
|
+
# @param references [Hash<String, Object>] Map of run IDs to references
|
60
|
+
# @return [Hash<String, Hash>] Map of run IDs to evaluation results
|
61
|
+
def evaluate_runs(run_ids, references = {})
|
62
|
+
results = {}
|
63
|
+
|
64
|
+
run_ids.each do |run_id|
|
65
|
+
reference = references[run_id]
|
66
|
+
results[run_id] = evaluate_run(run_id, reference)
|
67
|
+
end
|
68
|
+
|
69
|
+
results
|
70
|
+
end
|
71
|
+
|
72
|
+
# Evaluate a dataset
|
73
|
+
# @param dataset_id [String] The ID of the dataset to evaluate
|
74
|
+
# @param experiment_name [String] Name for the experiment
|
75
|
+
# @param target_llm [Object] Optional LLM to use for generating predictions
|
76
|
+
# @return [Hash] Evaluation results for the dataset
|
77
|
+
def evaluate_dataset(dataset_id, experiment_name, target_llm = nil)
|
78
|
+
# Get the dataset examples
|
79
|
+
response = client.list_examples(dataset_id)
|
80
|
+
|
81
|
+
unless response[:status] >= 200 && response[:status] < 300
|
82
|
+
raise "Failed to get dataset examples: #{response[:error] || response[:body]}"
|
83
|
+
end
|
84
|
+
|
85
|
+
examples = response[:body]
|
86
|
+
|
87
|
+
results = {
|
88
|
+
experiment_name: experiment_name,
|
89
|
+
dataset_id: dataset_id,
|
90
|
+
evaluator_name: self.class.name,
|
91
|
+
results: []
|
92
|
+
}
|
93
|
+
|
94
|
+
examples.each do |example|
|
95
|
+
# If target LLM is provided, generate a prediction
|
96
|
+
if target_llm
|
97
|
+
prediction = generate_prediction(target_llm, example["inputs"])
|
98
|
+
else
|
99
|
+
# Otherwise use the example's outputs as the prediction
|
100
|
+
prediction = example["outputs"]
|
101
|
+
end
|
102
|
+
|
103
|
+
# Evaluate
|
104
|
+
result = evaluate(prediction, example["outputs"], example["inputs"])
|
105
|
+
|
106
|
+
results[:results] << {
|
107
|
+
example_id: example["id"],
|
108
|
+
score: result[:score],
|
109
|
+
metadata: result[:metadata]
|
110
|
+
}
|
111
|
+
end
|
112
|
+
|
113
|
+
results
|
114
|
+
end
|
115
|
+
|
116
|
+
private
|
117
|
+
|
118
|
+
# Extract prediction from run outputs
|
119
|
+
# @param run [Hash] The run data
|
120
|
+
# @return [Object] The prediction
|
121
|
+
def extract_prediction_from_run(run)
|
122
|
+
outputs = run["outputs"] || {}
|
123
|
+
|
124
|
+
# Try common output keys
|
125
|
+
%w[output response result text completion answer].each do |key|
|
126
|
+
return outputs[key] if outputs.key?(key)
|
127
|
+
end
|
128
|
+
|
129
|
+
# If no common keys found, return the entire outputs
|
130
|
+
outputs
|
131
|
+
end
|
132
|
+
|
133
|
+
# Generate a prediction using an LLM
|
134
|
+
# @param llm [Object] The LLM to use
|
135
|
+
# @param input [Hash] The input to generate from
|
136
|
+
# @return [Object] The generated prediction
|
137
|
+
def generate_prediction(llm, input)
|
138
|
+
if llm.respond_to?(:call)
|
139
|
+
llm.call(input)
|
140
|
+
elsif llm.respond_to?(:generate)
|
141
|
+
llm.generate(input)
|
142
|
+
elsif llm.respond_to?(:complete)
|
143
|
+
llm.complete(input)
|
144
|
+
else
|
145
|
+
raise "Unsupported LLM interface"
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
# Create feedback for a run
|
150
|
+
# @param run_id [String] The ID of the run
|
151
|
+
# @param result [Hash] The evaluation result
|
152
|
+
# @return [Hash] The created feedback
|
153
|
+
def create_feedback(run_id, result)
|
154
|
+
feedback_data = {
|
155
|
+
run_id: run_id,
|
156
|
+
key: self.class.name.split("::").last.downcase,
|
157
|
+
score: result[:score],
|
158
|
+
value: result[:metadata],
|
159
|
+
comment: result[:comment]
|
160
|
+
}.compact
|
161
|
+
|
162
|
+
response = client.create_feedback(
|
163
|
+
feedback_data[:run_id],
|
164
|
+
feedback_data[:key],
|
165
|
+
feedback_data[:score],
|
166
|
+
value: feedback_data[:value],
|
167
|
+
comment: feedback_data[:comment]
|
168
|
+
)
|
169
|
+
|
170
|
+
unless response[:status] >= 200 && response[:status] < 300
|
171
|
+
LangsmithrbRails.logger.error("Failed to create feedback: #{response[:error] || response[:body]}")
|
172
|
+
end
|
173
|
+
|
174
|
+
response[:body]
|
175
|
+
end
|
176
|
+
end
|
177
|
+
end
|
178
|
+
end
|