langsmith-sdk 0.1.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +1 -1
- data/CHANGELOG.md +39 -2
- data/README.md +1 -39
- data/examples/LLM_TRACING.md +0 -58
- data/examples/complex_agent.rb +8 -14
- data/examples/llm_tracing.rb +10 -18
- data/examples/openai_integration.rb +24 -30
- data/lib/langsmith/batch_processor.rb +148 -29
- data/lib/langsmith/client.rb +95 -24
- data/lib/langsmith/configuration.rb +4 -0
- data/lib/langsmith/context.rb +45 -2
- data/lib/langsmith/evaluation/experiment_runner.rb +131 -0
- data/lib/langsmith/evaluation.rb +38 -0
- data/lib/langsmith/run.rb +15 -1
- data/lib/langsmith/run_tree.rb +17 -12
- data/lib/langsmith/version.rb +1 -1
- data/lib/langsmith.rb +1 -1
- metadata +5 -5
- data/langsmith.gemspec +0 -38
- data/lib/langsmith/traceable.rb +0 -120
data/lib/langsmith/client.rb
CHANGED
|
@@ -66,29 +66,6 @@ module Langsmith
|
|
|
66
66
|
patch("/runs/#{run.id}", run.to_h, tenant_id: run.tenant_id)
|
|
67
67
|
end
|
|
68
68
|
|
|
69
|
-
# Batch create/update runs.
|
|
70
|
-
# All runs in a batch should have the same tenant_id for optimal performance.
|
|
71
|
-
#
|
|
72
|
-
# @param post_runs [Array<Run>] runs to create
|
|
73
|
-
# @param patch_runs [Array<Run>] runs to update
|
|
74
|
-
# @param tenant_id [String, nil] tenant ID (inferred from runs if not provided)
|
|
75
|
-
# @return [Hash, nil] API response
|
|
76
|
-
# @raise [APIError] if the request fails
|
|
77
|
-
def batch_ingest(post_runs: [], patch_runs: [], tenant_id: nil)
|
|
78
|
-
return if post_runs.empty? && patch_runs.empty?
|
|
79
|
-
|
|
80
|
-
payload = {}
|
|
81
|
-
payload[:post] = post_runs.map(&:to_h) unless post_runs.empty?
|
|
82
|
-
payload[:patch] = patch_runs.map(&:to_h) unless patch_runs.empty?
|
|
83
|
-
|
|
84
|
-
# Use tenant_id from first run if not explicitly provided
|
|
85
|
-
effective_tenant_id = tenant_id ||
|
|
86
|
-
post_runs.first&.tenant_id ||
|
|
87
|
-
patch_runs.first&.tenant_id
|
|
88
|
-
|
|
89
|
-
post("/runs/batch", payload, tenant_id: effective_tenant_id)
|
|
90
|
-
end
|
|
91
|
-
|
|
92
69
|
# Batch create/update runs using pre-serialized hashes.
|
|
93
70
|
# Used by BatchProcessor which snapshots run data at enqueue time.
|
|
94
71
|
#
|
|
@@ -97,7 +74,7 @@ module Langsmith
|
|
|
97
74
|
# @param tenant_id [String, nil] tenant ID for the request
|
|
98
75
|
# @return [Hash, nil] API response
|
|
99
76
|
# @raise [APIError] if the request fails
|
|
100
|
-
def
|
|
77
|
+
def batch_ingest(post_runs: [], patch_runs: [], tenant_id: nil)
|
|
101
78
|
return if post_runs.empty? && patch_runs.empty?
|
|
102
79
|
|
|
103
80
|
payload = {}
|
|
@@ -107,8 +84,102 @@ module Langsmith
|
|
|
107
84
|
post("/runs/batch", payload, tenant_id: tenant_id)
|
|
108
85
|
end
|
|
109
86
|
|
|
87
|
+
# List examples from a LangSmith dataset.
|
|
88
|
+
#
|
|
89
|
+
# @param dataset_id [String] the dataset ID to fetch examples from
|
|
90
|
+
# @param limit [Integer, nil] max number of examples to return (API max: 100)
|
|
91
|
+
# @param offset [Integer, nil] number of examples to skip
|
|
92
|
+
# @param tenant_id [String, nil] tenant ID (falls back to configured tenant_id)
|
|
93
|
+
# @return [Array<Hash>] array of example objects
|
|
94
|
+
# @raise [APIError] if the request fails
|
|
95
|
+
def list_examples(dataset_id:, limit: nil, offset: nil, tenant_id: nil)
|
|
96
|
+
params = { dataset: dataset_id }
|
|
97
|
+
params[:limit] = limit if limit
|
|
98
|
+
params[:offset] = offset if offset
|
|
99
|
+
|
|
100
|
+
get("/api/v1/examples", params: params, tenant_id: resolve_tenant_id(tenant_id))
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
# Create a new experiment (tracer session) linked to a dataset.
|
|
104
|
+
#
|
|
105
|
+
# @param name [String] experiment name
|
|
106
|
+
# @param dataset_id [String] reference dataset ID
|
|
107
|
+
# @param description [String, nil] optional experiment description
|
|
108
|
+
# @param metadata [Hash, nil] optional metadata (stored as `extra`)
|
|
109
|
+
# @param tenant_id [String, nil] tenant ID (falls back to configured tenant_id)
|
|
110
|
+
# @return [Hash] the created experiment object
|
|
111
|
+
# @raise [APIError] if the request fails
|
|
112
|
+
def create_experiment(name:, dataset_id:, description: nil, metadata: nil, tenant_id: nil)
|
|
113
|
+
payload = {
|
|
114
|
+
name: name,
|
|
115
|
+
reference_dataset_id: dataset_id,
|
|
116
|
+
start_time: Time.now.utc.iso8601
|
|
117
|
+
}
|
|
118
|
+
payload[:description] = description if description
|
|
119
|
+
payload[:extra] = metadata if metadata
|
|
120
|
+
|
|
121
|
+
post("/api/v1/sessions", payload, tenant_id: resolve_tenant_id(tenant_id))
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
# Close an experiment by setting its end time.
|
|
125
|
+
#
|
|
126
|
+
# @param experiment_id [String] the experiment (session) ID
|
|
127
|
+
# @param end_time [String] ISO-8601 end time
|
|
128
|
+
# @param tenant_id [String, nil] tenant ID (falls back to configured tenant_id)
|
|
129
|
+
# @return [Hash] the updated experiment object
|
|
130
|
+
# @raise [APIError] if the request fails
|
|
131
|
+
def close_experiment(experiment_id:, end_time:, tenant_id: nil)
|
|
132
|
+
patch("/api/v1/sessions/#{experiment_id}", { end_time: end_time }, tenant_id: resolve_tenant_id(tenant_id))
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
# Create feedback (a score/annotation) on a run.
|
|
136
|
+
#
|
|
137
|
+
# @param run_id [String] UUID of the run to attach feedback to
|
|
138
|
+
# @param key [String] metric name (e.g. "correctness")
|
|
139
|
+
# @param score [Numeric, nil] numeric score (typically 0.0-1.0)
|
|
140
|
+
# @param value [String, nil] categorical value (alternative to score)
|
|
141
|
+
# @param comment [String, nil] explanation or reasoning
|
|
142
|
+
# @param tenant_id [String, nil] tenant ID (falls back to configured tenant_id)
|
|
143
|
+
# @return [Hash] the created feedback object
|
|
144
|
+
# @raise [APIError] if the request fails
|
|
145
|
+
def create_feedback(run_id:, key:, score: nil, value: nil, comment: nil, tenant_id: nil)
|
|
146
|
+
payload = { run_id: run_id, key: key }
|
|
147
|
+
payload[:score] = score if score
|
|
148
|
+
payload[:value] = value if value
|
|
149
|
+
payload[:comment] = comment if comment
|
|
150
|
+
|
|
151
|
+
post("/api/v1/feedback", payload, tenant_id: resolve_tenant_id(tenant_id))
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
# Read a single run by ID.
|
|
155
|
+
#
|
|
156
|
+
# @param run_id [String] UUID of the run to fetch
|
|
157
|
+
# @param tenant_id [String, nil] tenant ID (falls back to configured tenant_id)
|
|
158
|
+
# @return [Hash] the run object (inputs, outputs, child_run_ids, tokens, etc.)
|
|
159
|
+
# @raise [APIError] if the request fails
|
|
160
|
+
def read_run(run_id:, tenant_id: nil)
|
|
161
|
+
get("/api/v1/runs/#{run_id}", tenant_id: resolve_tenant_id(tenant_id))
|
|
162
|
+
end
|
|
163
|
+
|
|
110
164
|
private
|
|
111
165
|
|
|
166
|
+
def resolve_tenant_id(tenant_id)
|
|
167
|
+
tenant_id || Langsmith.configuration.tenant_id
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
def get(path, params: {}, tenant_id: nil)
|
|
171
|
+
response = connection.get(path, params) do |req|
|
|
172
|
+
req.headers["X-Tenant-Id"] = tenant_id if tenant_id
|
|
173
|
+
end
|
|
174
|
+
handle_response(response)
|
|
175
|
+
rescue Faraday::ConnectionFailed, Faraday::TimeoutError => e
|
|
176
|
+
raise APIError, "Network error: #{e.message}"
|
|
177
|
+
rescue Faraday::Error => e
|
|
178
|
+
raise APIError, "Request failed: #{e.message}" unless e.respond_to?(:response) && e.response
|
|
179
|
+
|
|
180
|
+
handle_response(e.response)
|
|
181
|
+
end
|
|
182
|
+
|
|
112
183
|
def connection
|
|
113
184
|
@connection ||= Faraday.new(url: @endpoint) do |f|
|
|
114
185
|
f.request :json
|
|
@@ -42,6 +42,9 @@ module Langsmith
|
|
|
42
42
|
# @return [String, nil] Tenant ID for multi-tenant scenarios
|
|
43
43
|
attr_accessor :tenant_id
|
|
44
44
|
|
|
45
|
+
# @return [Integer, nil] Maximum buffered run entries (queue + pending); nil means unlimited
|
|
46
|
+
attr_accessor :max_pending_entries
|
|
47
|
+
|
|
45
48
|
def initialize
|
|
46
49
|
@api_key = ENV.fetch("LANGSMITH_API_KEY", nil)
|
|
47
50
|
@endpoint = ENV.fetch("LANGSMITH_ENDPOINT", "https://api.smith.langchain.com")
|
|
@@ -52,6 +55,7 @@ module Langsmith
|
|
|
52
55
|
@timeout = ENV.fetch("LANGSMITH_TIMEOUT", 10).to_i
|
|
53
56
|
@max_retries = ENV.fetch("LANGSMITH_MAX_RETRIES", 3).to_i
|
|
54
57
|
@tenant_id = ENV.fetch("LANGSMITH_TENANT_ID", nil)
|
|
58
|
+
@max_pending_entries = ENV.fetch("LANGSMITH_MAX_PENDING_ENTRIES", nil)&.to_i
|
|
55
59
|
end
|
|
56
60
|
|
|
57
61
|
# Returns whether tracing is enabled in configuration.
|
data/lib/langsmith/context.rb
CHANGED
|
@@ -12,7 +12,9 @@ module Langsmith
|
|
|
12
12
|
# and caused test failures on Ruby 3.2.
|
|
13
13
|
module Context
|
|
14
14
|
CONTEXT_KEY = :langsmith_run_stack
|
|
15
|
-
|
|
15
|
+
EVALUATION_CONTEXT_KEY = :langsmith_evaluation_context
|
|
16
|
+
EVALUATION_ROOT_RUN_ID_KEY = :langsmith_evaluation_root_run_id
|
|
17
|
+
private_constant :CONTEXT_KEY, :EVALUATION_CONTEXT_KEY, :EVALUATION_ROOT_RUN_ID_KEY
|
|
16
18
|
|
|
17
19
|
class << self
|
|
18
20
|
# Returns the current run stack for this thread.
|
|
@@ -49,9 +51,11 @@ module Langsmith
|
|
|
49
51
|
pop
|
|
50
52
|
end
|
|
51
53
|
|
|
52
|
-
# Clear the entire run stack (useful for testing)
|
|
54
|
+
# Clear the entire run stack and evaluation context (useful for testing)
|
|
53
55
|
def clear!
|
|
54
56
|
Thread.current[CONTEXT_KEY] = []
|
|
57
|
+
Thread.current[EVALUATION_CONTEXT_KEY] = nil
|
|
58
|
+
Thread.current[EVALUATION_ROOT_RUN_ID_KEY] = nil
|
|
55
59
|
end
|
|
56
60
|
|
|
57
61
|
# Check if there's an active trace context
|
|
@@ -68,6 +72,45 @@ module Langsmith
|
|
|
68
72
|
def root_run
|
|
69
73
|
run_stack.first
|
|
70
74
|
end
|
|
75
|
+
|
|
76
|
+
# Returns the current evaluation context, or nil when not in evaluation.
|
|
77
|
+
# @return [Hash, nil] hash with :experiment_id and :example_id, or nil
|
|
78
|
+
def evaluation_context
|
|
79
|
+
Thread.current[EVALUATION_CONTEXT_KEY]
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# Returns true when evaluation context is set.
|
|
83
|
+
# @return [Boolean]
|
|
84
|
+
def evaluating?
|
|
85
|
+
!evaluation_context.nil?
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# Stores the root run ID for the current evaluation example.
|
|
89
|
+
# Called by RunTree when creating the first root run inside an evaluation block.
|
|
90
|
+
#
|
|
91
|
+
# @param run_id [String] the root run's ID
|
|
92
|
+
def set_evaluation_root_run_id(run_id)
|
|
93
|
+
Thread.current[EVALUATION_ROOT_RUN_ID_KEY] = run_id
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
# Returns the root run ID for the current evaluation example, or nil.
|
|
97
|
+
# @return [String, nil]
|
|
98
|
+
def evaluation_root_run_id
|
|
99
|
+
Thread.current[EVALUATION_ROOT_RUN_ID_KEY]
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
# Execute a block with evaluation context set.
|
|
103
|
+
# Context is cleared in ensure block even if the block raises.
|
|
104
|
+
#
|
|
105
|
+
# @param experiment_id [String] the experiment session ID
|
|
106
|
+
# @param example_id [String] the dataset example ID
|
|
107
|
+
def with_evaluation(experiment_id:, example_id:)
|
|
108
|
+
Thread.current[EVALUATION_CONTEXT_KEY] = { experiment_id: experiment_id, example_id: example_id }
|
|
109
|
+
yield
|
|
110
|
+
ensure
|
|
111
|
+
Thread.current[EVALUATION_CONTEXT_KEY] = nil
|
|
112
|
+
Thread.current[EVALUATION_ROOT_RUN_ID_KEY] = nil
|
|
113
|
+
end
|
|
71
114
|
end
|
|
72
115
|
end
|
|
73
116
|
end
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Langsmith
|
|
4
|
+
module Evaluation
|
|
5
|
+
# Orchestrates running an evaluation experiment against a dataset.
|
|
6
|
+
#
|
|
7
|
+
# Fetches examples, creates an experiment, runs each example through
|
|
8
|
+
# the user-provided block with evaluation context set, scores outputs
|
|
9
|
+
# with evaluators, and returns a summary of results.
|
|
10
|
+
class ExperimentRunner
|
|
11
|
+
# @param dataset_id [String] the dataset to evaluate against
|
|
12
|
+
# @param experiment_name [String] name for the experiment
|
|
13
|
+
# @param description [String, nil] optional experiment description
|
|
14
|
+
# @param metadata [Hash, nil] optional experiment metadata
|
|
15
|
+
# @param evaluators [Hash] map of evaluator key to callable
|
|
16
|
+
# @param block [Proc] block that receives each example and produces a result
|
|
17
|
+
def initialize(dataset_id:, experiment_name:, description: nil, metadata: nil, evaluators: {}, &block)
|
|
18
|
+
@dataset_id = dataset_id
|
|
19
|
+
@experiment_name = experiment_name
|
|
20
|
+
@description = description
|
|
21
|
+
@metadata = metadata
|
|
22
|
+
@evaluators = evaluators
|
|
23
|
+
@block = block
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# Run the evaluation experiment.
|
|
27
|
+
#
|
|
28
|
+
# @return [Hash] summary with :experiment_id, :total, :succeeded, :failed, :results
|
|
29
|
+
def run
|
|
30
|
+
examples = client.list_examples(dataset_id: @dataset_id)
|
|
31
|
+
|
|
32
|
+
experiment = client.create_experiment(
|
|
33
|
+
name: @experiment_name,
|
|
34
|
+
dataset_id: @dataset_id,
|
|
35
|
+
description: @description,
|
|
36
|
+
metadata: @metadata
|
|
37
|
+
)
|
|
38
|
+
experiment_id = experiment[:id]
|
|
39
|
+
|
|
40
|
+
results = examples.map { |example| run_example(example, experiment_id) }
|
|
41
|
+
|
|
42
|
+
Langsmith.flush
|
|
43
|
+
client.close_experiment(experiment_id: experiment_id, end_time: Time.now.utc.iso8601)
|
|
44
|
+
|
|
45
|
+
build_summary(experiment_id, results)
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
private
|
|
49
|
+
|
|
50
|
+
def client
|
|
51
|
+
Langsmith.client
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def run_example(example, experiment_id)
|
|
55
|
+
outputs = nil
|
|
56
|
+
run_id = nil
|
|
57
|
+
|
|
58
|
+
begin
|
|
59
|
+
Context.with_evaluation(experiment_id: experiment_id, example_id: example[:id]) do
|
|
60
|
+
outputs = @block.call(example)
|
|
61
|
+
run_id = Context.evaluation_root_run_id
|
|
62
|
+
end
|
|
63
|
+
rescue StandardError => e
|
|
64
|
+
return { example_id: example[:id], run_id: nil, status: :error, error: e.message, feedback: nil }
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
feedback = run_evaluators(example, outputs, run_id)
|
|
68
|
+
{ example_id: example[:id], run_id: run_id, status: :success, error: nil, feedback: feedback }
|
|
69
|
+
rescue StandardError => e
|
|
70
|
+
{ example_id: example[:id], run_id: run_id, status: :success, error: e.message, feedback: nil }
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def run_evaluators(example, outputs, run_id)
|
|
74
|
+
return nil if @evaluators.empty? || run_id.nil?
|
|
75
|
+
|
|
76
|
+
Langsmith.flush
|
|
77
|
+
run = fetch_run_with_retry(run_id)
|
|
78
|
+
|
|
79
|
+
@evaluators.each_with_object({}) do |(key, evaluator), feedback|
|
|
80
|
+
feedback[key] = execute_evaluator(key, evaluator, example, outputs, run_id, run)
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# LangSmith has indexing lag after batch ingest — the run may not be
|
|
85
|
+
# queryable immediately. Retry a few times with a short delay.
|
|
86
|
+
def fetch_run_with_retry(run_id, retries: 3, delay: 1)
|
|
87
|
+
client.read_run(run_id: run_id)
|
|
88
|
+
rescue Client::APIError => e
|
|
89
|
+
raise unless e.status_code == 404 && retries.positive?
|
|
90
|
+
|
|
91
|
+
sleep(delay)
|
|
92
|
+
fetch_run_with_retry(run_id, retries: retries - 1, delay: delay)
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def execute_evaluator(key, evaluator, example, outputs, run_id, run)
|
|
96
|
+
result = evaluator.call(
|
|
97
|
+
outputs: outputs,
|
|
98
|
+
reference_outputs: example[:outputs],
|
|
99
|
+
inputs: example[:inputs],
|
|
100
|
+
run: run
|
|
101
|
+
)
|
|
102
|
+
return { score: nil, success: true, skipped: true } if result.nil?
|
|
103
|
+
|
|
104
|
+
normalized = normalize_result(result)
|
|
105
|
+
client.create_feedback(run_id: run_id, key: key.to_s, **normalized)
|
|
106
|
+
normalized.merge(success: true)
|
|
107
|
+
rescue StandardError => e
|
|
108
|
+
{ score: nil, success: false, error: e.message }
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
def normalize_result(result)
|
|
112
|
+
case result
|
|
113
|
+
when true then { score: 1.0, value: nil, comment: nil }
|
|
114
|
+
when false then { score: 0.0, value: nil, comment: nil }
|
|
115
|
+
when Hash then { score: result[:score], value: result[:value], comment: result[:comment] }
|
|
116
|
+
else { score: result, value: nil, comment: nil }
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
def build_summary(experiment_id, results)
|
|
121
|
+
{
|
|
122
|
+
experiment_id: experiment_id,
|
|
123
|
+
total: results.size,
|
|
124
|
+
succeeded: results.count { |r| r[:status] == :success },
|
|
125
|
+
failed: results.count { |r| r[:status] == :error },
|
|
126
|
+
results: results
|
|
127
|
+
}
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
end
|
|
131
|
+
end
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "evaluation/experiment_runner"
|
|
4
|
+
|
|
5
|
+
module Langsmith
|
|
6
|
+
# Public API for running evaluations against LangSmith datasets.
|
|
7
|
+
#
|
|
8
|
+
# @example
|
|
9
|
+
# Langsmith::Evaluation.run(
|
|
10
|
+
# dataset_id: "dataset-uuid",
|
|
11
|
+
# experiment_name: "my-experiment"
|
|
12
|
+
# ) do |example|
|
|
13
|
+
# Langsmith.trace("eval", run_type: "chain", inputs: example[:inputs]) do
|
|
14
|
+
# my_app.call(example[:inputs])
|
|
15
|
+
# end
|
|
16
|
+
# end
|
|
17
|
+
module Evaluation
|
|
18
|
+
# Run an evaluation experiment against a dataset.
|
|
19
|
+
#
|
|
20
|
+
# @param dataset_id [String] the dataset to evaluate against
|
|
21
|
+
# @param experiment_name [String] name for the experiment
|
|
22
|
+
# @param description [String, nil] optional experiment description
|
|
23
|
+
# @param metadata [Hash, nil] optional experiment metadata
|
|
24
|
+
# @param evaluators [Hash] map of evaluator key to callable (see ExperimentRunner)
|
|
25
|
+
# @yield [Hash] each dataset example
|
|
26
|
+
# @return [Hash] summary with :experiment_id, :total, :succeeded, :failed, :results
|
|
27
|
+
def self.run(dataset_id:, experiment_name:, description: nil, metadata: nil, evaluators: {}, &block)
|
|
28
|
+
ExperimentRunner.new(
|
|
29
|
+
dataset_id: dataset_id,
|
|
30
|
+
experiment_name: experiment_name,
|
|
31
|
+
description: description,
|
|
32
|
+
metadata: metadata,
|
|
33
|
+
evaluators: evaluators,
|
|
34
|
+
&block
|
|
35
|
+
).run
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
data/lib/langsmith/run.rb
CHANGED
|
@@ -37,6 +37,12 @@ module Langsmith
|
|
|
37
37
|
# @return [String, nil] tenant ID for multi-tenant scenarios
|
|
38
38
|
attr_reader :tenant_id
|
|
39
39
|
|
|
40
|
+
# @return [String, nil] links this run to a dataset example (for evaluations)
|
|
41
|
+
attr_reader :reference_example_id
|
|
42
|
+
|
|
43
|
+
# @return [String, nil] links this run to an experiment session (overrides project routing)
|
|
44
|
+
attr_reader :session_id
|
|
45
|
+
|
|
40
46
|
# @return [String] trace ID (root run's ID)
|
|
41
47
|
attr_reader :trace_id
|
|
42
48
|
|
|
@@ -81,6 +87,8 @@ module Langsmith
|
|
|
81
87
|
# @param tenant_id [String, nil] tenant ID for multi-tenant scenarios
|
|
82
88
|
# @param trace_id [String, nil] trace ID (defaults to own ID for root runs)
|
|
83
89
|
# @param parent_dotted_order [String, nil] parent's dotted order for tree ordering
|
|
90
|
+
# @param reference_example_id [String, nil] dataset example ID (for evaluations)
|
|
91
|
+
# @param session_id [String, nil] experiment session ID (overrides project routing)
|
|
84
92
|
#
|
|
85
93
|
# @raise [ArgumentError] if run_type is invalid
|
|
86
94
|
def initialize(
|
|
@@ -95,7 +103,9 @@ module Langsmith
|
|
|
95
103
|
id: nil,
|
|
96
104
|
tenant_id: nil,
|
|
97
105
|
trace_id: nil,
|
|
98
|
-
parent_dotted_order: nil
|
|
106
|
+
parent_dotted_order: nil,
|
|
107
|
+
reference_example_id: nil,
|
|
108
|
+
session_id: nil
|
|
99
109
|
)
|
|
100
110
|
@id = id || SecureRandom.uuid
|
|
101
111
|
@name = name
|
|
@@ -114,6 +124,8 @@ module Langsmith
|
|
|
114
124
|
@tags = tags || []
|
|
115
125
|
@extra = extra || {}
|
|
116
126
|
@events = []
|
|
127
|
+
@reference_example_id = reference_example_id
|
|
128
|
+
@session_id = session_id
|
|
117
129
|
# dotted_order is used for ordering runs in the trace tree
|
|
118
130
|
@dotted_order = build_dotted_order(parent_dotted_order)
|
|
119
131
|
end
|
|
@@ -242,6 +254,8 @@ module Langsmith
|
|
|
242
254
|
outputs:,
|
|
243
255
|
error:,
|
|
244
256
|
parent_run_id:,
|
|
257
|
+
reference_example_id:,
|
|
258
|
+
session_id:,
|
|
245
259
|
trace_id:,
|
|
246
260
|
dotted_order:,
|
|
247
261
|
session_name:,
|
data/lib/langsmith/run_tree.rb
CHANGED
|
@@ -20,23 +20,19 @@ module Langsmith
|
|
|
20
20
|
tenant_id: nil,
|
|
21
21
|
project: nil
|
|
22
22
|
)
|
|
23
|
-
# If no explicit parent, check context for current parent
|
|
24
23
|
effective_parent_id = parent_run_id || Context.current_parent_run_id
|
|
25
|
-
|
|
26
|
-
# Inherit tenant_id from parent run if not explicitly set
|
|
27
24
|
effective_tenant_id = tenant_id || Context.current_run&.tenant_id
|
|
28
25
|
|
|
29
26
|
# Child traces must use the same project as their parent to keep the trace tree together.
|
|
30
|
-
# Only root traces can set the project; children always inherit from parent.
|
|
31
27
|
effective_project = Context.current_run&.session_name || project
|
|
32
|
-
|
|
33
|
-
# Inherit trace_id from root run (parent's trace_id)
|
|
34
|
-
# For root runs, trace_id will default to the run's own ID
|
|
35
28
|
effective_trace_id = Context.current_run&.trace_id
|
|
36
|
-
|
|
37
|
-
# Inherit dotted_order from parent for proper trace ordering
|
|
38
29
|
parent_dotted_order = Context.current_run&.dotted_order
|
|
39
30
|
|
|
31
|
+
# Inject evaluation context: session_id on ALL runs, reference_example_id only on root runs.
|
|
32
|
+
eval_ctx = Context.evaluation_context
|
|
33
|
+
effective_session_id = eval_ctx&.dig(:experiment_id)
|
|
34
|
+
effective_ref_example_id = eval_ctx&.dig(:example_id) unless effective_parent_id
|
|
35
|
+
|
|
40
36
|
@run = Run.new(
|
|
41
37
|
name: name,
|
|
42
38
|
run_type: run_type,
|
|
@@ -48,11 +44,13 @@ module Langsmith
|
|
|
48
44
|
tenant_id: effective_tenant_id,
|
|
49
45
|
session_name: effective_project,
|
|
50
46
|
trace_id: effective_trace_id,
|
|
51
|
-
parent_dotted_order: parent_dotted_order
|
|
47
|
+
parent_dotted_order: parent_dotted_order,
|
|
48
|
+
reference_example_id: effective_ref_example_id,
|
|
49
|
+
session_id: effective_session_id
|
|
52
50
|
)
|
|
53
51
|
|
|
54
|
-
|
|
55
|
-
@posted_end = false
|
|
52
|
+
register_evaluation_root_run(effective_parent_id)
|
|
53
|
+
@posted_start = @posted_end = false
|
|
56
54
|
end
|
|
57
55
|
|
|
58
56
|
# Post the run start to LangSmith
|
|
@@ -138,6 +136,13 @@ module Langsmith
|
|
|
138
136
|
|
|
139
137
|
private
|
|
140
138
|
|
|
139
|
+
# Register the root run ID in evaluation context so the runner can
|
|
140
|
+
# attach feedback to it later. Only root runs (no parent) register;
|
|
141
|
+
# child runs must not overwrite.
|
|
142
|
+
def register_evaluation_root_run(effective_parent_id)
|
|
143
|
+
Context.set_evaluation_root_run_id(@run.id) if effective_parent_id.nil? && Context.evaluating?
|
|
144
|
+
end
|
|
145
|
+
|
|
141
146
|
# Sanitize block results to prevent circular references.
|
|
142
147
|
# When users call methods like `run.add_metadata(...)` as the last line,
|
|
143
148
|
# the Run object itself becomes the result, creating a circular reference.
|
data/lib/langsmith/version.rb
CHANGED
data/lib/langsmith.rb
CHANGED
|
@@ -8,7 +8,7 @@ require_relative "langsmith/context"
|
|
|
8
8
|
require_relative "langsmith/client"
|
|
9
9
|
require_relative "langsmith/batch_processor"
|
|
10
10
|
require_relative "langsmith/run_tree"
|
|
11
|
-
require_relative "langsmith/
|
|
11
|
+
require_relative "langsmith/evaluation"
|
|
12
12
|
|
|
13
13
|
module Langsmith
|
|
14
14
|
class << self
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: langsmith-sdk
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.3.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Felipe Cabezudo
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2026-02-11 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: concurrent-ruby
|
|
@@ -91,17 +91,17 @@ files:
|
|
|
91
91
|
- examples/complex_agent.rb
|
|
92
92
|
- examples/llm_tracing.rb
|
|
93
93
|
- examples/openai_integration.rb
|
|
94
|
-
- langsmith.gemspec
|
|
95
94
|
- lib/langsmith.rb
|
|
96
95
|
- lib/langsmith/batch_processor.rb
|
|
97
96
|
- lib/langsmith/client.rb
|
|
98
97
|
- lib/langsmith/configuration.rb
|
|
99
98
|
- lib/langsmith/context.rb
|
|
100
99
|
- lib/langsmith/errors.rb
|
|
100
|
+
- lib/langsmith/evaluation.rb
|
|
101
|
+
- lib/langsmith/evaluation/experiment_runner.rb
|
|
101
102
|
- lib/langsmith/railtie.rb
|
|
102
103
|
- lib/langsmith/run.rb
|
|
103
104
|
- lib/langsmith/run_tree.rb
|
|
104
|
-
- lib/langsmith/traceable.rb
|
|
105
105
|
- lib/langsmith/version.rb
|
|
106
106
|
homepage: https://github.com/felipekb/langsmith-ruby-sdk
|
|
107
107
|
licenses:
|
|
@@ -127,7 +127,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
127
127
|
- !ruby/object:Gem::Version
|
|
128
128
|
version: '0'
|
|
129
129
|
requirements: []
|
|
130
|
-
rubygems_version: 3.
|
|
130
|
+
rubygems_version: 3.4.19
|
|
131
131
|
signing_key:
|
|
132
132
|
specification_version: 4
|
|
133
133
|
summary: Ruby SDK for LangSmith tracing and observability
|
data/langsmith.gemspec
DELETED
|
@@ -1,38 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require_relative "lib/langsmith/version"
|
|
4
|
-
|
|
5
|
-
Gem::Specification.new do |spec|
|
|
6
|
-
spec.name = "langsmith-sdk"
|
|
7
|
-
spec.version = Langsmith::VERSION
|
|
8
|
-
spec.authors = ["Felipe Cabezudo"]
|
|
9
|
-
spec.email = ["felipecabedilo@gmail.com"]
|
|
10
|
-
|
|
11
|
-
spec.summary = "Ruby SDK for LangSmith tracing and observability"
|
|
12
|
-
spec.description = "A Ruby client for LangSmith, providing tracing and observability for LLM applications"
|
|
13
|
-
spec.homepage = "https://github.com/felipekb/langsmith-ruby-sdk"
|
|
14
|
-
spec.license = "MIT"
|
|
15
|
-
spec.required_ruby_version = ">= 3.1.0"
|
|
16
|
-
|
|
17
|
-
spec.metadata["allowed_push_host"] = "https://rubygems.org"
|
|
18
|
-
spec.metadata["homepage_uri"] = spec.homepage
|
|
19
|
-
spec.metadata["source_code_uri"] = spec.homepage
|
|
20
|
-
spec.metadata["changelog_uri"] = "#{spec.homepage}/blob/main/CHANGELOG.md"
|
|
21
|
-
spec.metadata["rubygems_mfa_required"] = "true"
|
|
22
|
-
|
|
23
|
-
spec.files = Dir.chdir(__dir__) do
|
|
24
|
-
`git ls-files -z`.split("\x0").reject do |f|
|
|
25
|
-
(File.expand_path(f) == __FILE__) ||
|
|
26
|
-
f.start_with?(*%w[bin/ test/ spec/ features/ .git .github appveyor Gemfile])
|
|
27
|
-
end
|
|
28
|
-
end
|
|
29
|
-
spec.bindir = "exe"
|
|
30
|
-
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
|
31
|
-
spec.require_paths = ["lib"]
|
|
32
|
-
|
|
33
|
-
# Runtime dependencies
|
|
34
|
-
spec.add_dependency "concurrent-ruby", ">= 1.1", "< 3.0"
|
|
35
|
-
spec.add_dependency "faraday", "~> 2.0"
|
|
36
|
-
spec.add_dependency "faraday-net_http_persistent", "~> 2.0"
|
|
37
|
-
spec.add_dependency "faraday-retry", "~> 2.0"
|
|
38
|
-
end
|