langsmith-sdk 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5dbe9ea720616e2913af73fd43f00815f5ba0f4abc1003a28362800d25df651f
4
- data.tar.gz: b9f37149e9d81794dced53aa493e76507ebb223470bc88c4036bb5a06ac20ecc
3
+ metadata.gz: 011ff7c7fef392b6ad9e53c8f0e281eceb63eb28fa74ceac651e5b313a0cd81e
4
+ data.tar.gz: aa7714d5f8c12b85dc139cf8ee53f54c14337acea77dec108ee71a5239bf368f
5
5
  SHA512:
6
- metadata.gz: b3d6e11333b5324f986d5fbe5a75caeb44ae5a8008a330eec1af39d1f8800268a00129861373f8fa514a14a5cb4d7d486a2d49cb82e4484df7b25bdadf513538
7
- data.tar.gz: fdc400de8ebc5fb807f2b64762fdb496bc290294b2592a4a41289847de254ad81c29b039ced662f1141f991c55157c6261ef9b3589f5d61c795ee3d0a3631fc5
6
+ metadata.gz: 9e4e254230a934ed50aa5acfccc89624074cc136cb5e9013e3f778f2c00ef3eaf6a84504b40ecada07b2bba5f7eb1a08da8946f076cc2ab4f747fb6047c57708
7
+ data.tar.gz: a041143b6706394415bc9e14c2fdfc92a328bb69d20068f381eb0dd9a2dfa7adc2e29ebd63f9b4b99d15bb80f18dedad345371b05da4d2400eee306f47ec89eb
data/.rubocop.yml CHANGED
@@ -37,7 +37,7 @@ Metrics/BlockLength:
37
37
  - 'Rakefile'
38
38
 
39
39
  Metrics/ClassLength:
40
- Max: 200
40
+ Max: 280
41
41
 
42
42
  Metrics/AbcSize:
43
43
  Max: 32
data/CHANGELOG.md CHANGED
@@ -7,6 +7,25 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## [0.3.0] - 2026-02-11
11
+
12
+ ### Added
13
+
14
+ - Evaluation module and `ExperimentRunner` for running experiments against datasets
15
+ - Evaluator protocol with support for custom evaluators in `ExperimentRunner`
16
+ - `Client#create_feedback` for submitting evaluation feedback (`POST /api/v1/feedback`)
17
+ - `Client#read_run` for fetching run details (`GET /api/v1/runs/:run_id`)
18
+ - Evaluation API methods: `list_examples`, `create_experiment`, `close_experiment`
19
+ - Evaluation context wired into run creation
20
+ - Root run ID tracking in evaluation context and `RunTree`
21
+ - `reference_example_id` and `session_id` attributes on `Run`
22
+
23
+ ### Fixed
24
+
25
+ - Retry with delay for `read_run` after flush for reliable evaluation reads
26
+ - Graceful error handling separating user block errors from evaluator errors
27
+ - Root run ID read before `with_evaluation` clears it
28
+
10
29
  ## [0.2.0] - 2025-12-21
11
30
 
12
31
  ### Added
@@ -58,7 +77,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
58
77
  - `prompt` - Prompt template rendering
59
78
  - `parser` - Output parsing operations
60
79
 
61
- [Unreleased]: https://github.com/felipekb/langsmith-ruby-sdk/compare/v0.2.0...HEAD
80
+ [Unreleased]: https://github.com/felipekb/langsmith-ruby-sdk/compare/v0.3.0...HEAD
81
+ [0.3.0]: https://github.com/felipekb/langsmith-ruby-sdk/compare/v0.2.0...v0.3.0
62
82
  [0.2.0]: https://github.com/felipekb/langsmith-ruby-sdk/compare/v0.1.1...v0.2.0
63
83
  [0.1.1]: https://github.com/felipekb/langsmith-ruby-sdk/compare/v0.1.0...v0.1.1
64
84
  [0.1.0]: https://github.com/felipekb/langsmith-ruby-sdk/releases/tag/v0.1.0
@@ -312,9 +312,7 @@ module Langsmith
312
312
  def trim_buffer_if_needed
313
313
  return unless @max_pending_entries
314
314
 
315
- while current_buffer_size > @max_pending_entries
316
- drop_one_entry
317
- end
315
+ drop_one_entry while current_buffer_size > @max_pending_entries
318
316
  end
319
317
 
320
318
  def current_buffer_size
@@ -346,7 +344,9 @@ module Langsmith
346
344
  def log_dropped(entry)
347
345
  return unless ENV["LANGSMITH_DEBUG"]
348
346
 
349
- log_error("Dropped run entry due to max_pending_entries cap (type: #{entry[:type]}, tenant: #{entry[:tenant_id]})")
347
+ log_error(
348
+ "Dropped run entry due to max_pending_entries cap (type: #{entry[:type]}, tenant: #{entry[:tenant_id]})"
349
+ )
350
350
  end
351
351
 
352
352
  def log_error(message, force: false)
@@ -84,8 +84,102 @@ module Langsmith
84
84
  post("/runs/batch", payload, tenant_id: tenant_id)
85
85
  end
86
86
 
87
+ # List examples from a LangSmith dataset.
88
+ #
89
+ # @param dataset_id [String] the dataset ID to fetch examples from
90
+ # @param limit [Integer, nil] max number of examples to return (API max: 100)
91
+ # @param offset [Integer, nil] number of examples to skip
92
+ # @param tenant_id [String, nil] tenant ID (falls back to configured tenant_id)
93
+ # @return [Array<Hash>] array of example objects
94
+ # @raise [APIError] if the request fails
95
+ def list_examples(dataset_id:, limit: nil, offset: nil, tenant_id: nil)
96
+ params = { dataset: dataset_id }
97
+ params[:limit] = limit if limit
98
+ params[:offset] = offset if offset
99
+
100
+ get("/api/v1/examples", params: params, tenant_id: resolve_tenant_id(tenant_id))
101
+ end
102
+
103
+ # Create a new experiment (tracer session) linked to a dataset.
104
+ #
105
+ # @param name [String] experiment name
106
+ # @param dataset_id [String] reference dataset ID
107
+ # @param description [String, nil] optional experiment description
108
+ # @param metadata [Hash, nil] optional metadata (stored as `extra`)
109
+ # @param tenant_id [String, nil] tenant ID (falls back to configured tenant_id)
110
+ # @return [Hash] the created experiment object
111
+ # @raise [APIError] if the request fails
112
+ def create_experiment(name:, dataset_id:, description: nil, metadata: nil, tenant_id: nil)
113
+ payload = {
114
+ name: name,
115
+ reference_dataset_id: dataset_id,
116
+ start_time: Time.now.utc.iso8601
117
+ }
118
+ payload[:description] = description if description
119
+ payload[:extra] = metadata if metadata
120
+
121
+ post("/api/v1/sessions", payload, tenant_id: resolve_tenant_id(tenant_id))
122
+ end
123
+
124
+ # Close an experiment by setting its end time.
125
+ #
126
+ # @param experiment_id [String] the experiment (session) ID
127
+ # @param end_time [String] ISO-8601 end time
128
+ # @param tenant_id [String, nil] tenant ID (falls back to configured tenant_id)
129
+ # @return [Hash] the updated experiment object
130
+ # @raise [APIError] if the request fails
131
+ def close_experiment(experiment_id:, end_time:, tenant_id: nil)
132
+ patch("/api/v1/sessions/#{experiment_id}", { end_time: end_time }, tenant_id: resolve_tenant_id(tenant_id))
133
+ end
134
+
135
+ # Create feedback (a score/annotation) on a run.
136
+ #
137
+ # @param run_id [String] UUID of the run to attach feedback to
138
+ # @param key [String] metric name (e.g. "correctness")
139
+ # @param score [Numeric, nil] numeric score (typically 0.0-1.0)
140
+ # @param value [String, nil] categorical value (alternative to score)
141
+ # @param comment [String, nil] explanation or reasoning
142
+ # @param tenant_id [String, nil] tenant ID (falls back to configured tenant_id)
143
+ # @return [Hash] the created feedback object
144
+ # @raise [APIError] if the request fails
145
+ def create_feedback(run_id:, key:, score: nil, value: nil, comment: nil, tenant_id: nil)
146
+ payload = { run_id: run_id, key: key }
147
+ payload[:score] = score if score
148
+ payload[:value] = value if value
149
+ payload[:comment] = comment if comment
150
+
151
+ post("/api/v1/feedback", payload, tenant_id: resolve_tenant_id(tenant_id))
152
+ end
153
+
154
+ # Read a single run by ID.
155
+ #
156
+ # @param run_id [String] UUID of the run to fetch
157
+ # @param tenant_id [String, nil] tenant ID (falls back to configured tenant_id)
158
+ # @return [Hash] the run object (inputs, outputs, child_run_ids, tokens, etc.)
159
+ # @raise [APIError] if the request fails
160
+ def read_run(run_id:, tenant_id: nil)
161
+ get("/api/v1/runs/#{run_id}", tenant_id: resolve_tenant_id(tenant_id))
162
+ end
163
+
87
164
  private
88
165
 
166
+ def resolve_tenant_id(tenant_id)
167
+ tenant_id || Langsmith.configuration.tenant_id
168
+ end
169
+
170
+ def get(path, params: {}, tenant_id: nil)
171
+ response = connection.get(path, params) do |req|
172
+ req.headers["X-Tenant-Id"] = tenant_id if tenant_id
173
+ end
174
+ handle_response(response)
175
+ rescue Faraday::ConnectionFailed, Faraday::TimeoutError => e
176
+ raise APIError, "Network error: #{e.message}"
177
+ rescue Faraday::Error => e
178
+ raise APIError, "Request failed: #{e.message}" unless e.respond_to?(:response) && e.response
179
+
180
+ handle_response(e.response)
181
+ end
182
+
89
183
  def connection
90
184
  @connection ||= Faraday.new(url: @endpoint) do |f|
91
185
  f.request :json
@@ -12,7 +12,9 @@ module Langsmith
12
12
  # and caused test failures on Ruby 3.2.
13
13
  module Context
14
14
  CONTEXT_KEY = :langsmith_run_stack
15
- private_constant :CONTEXT_KEY
15
+ EVALUATION_CONTEXT_KEY = :langsmith_evaluation_context
16
+ EVALUATION_ROOT_RUN_ID_KEY = :langsmith_evaluation_root_run_id
17
+ private_constant :CONTEXT_KEY, :EVALUATION_CONTEXT_KEY, :EVALUATION_ROOT_RUN_ID_KEY
16
18
 
17
19
  class << self
18
20
  # Returns the current run stack for this thread.
@@ -49,9 +51,11 @@ module Langsmith
49
51
  pop
50
52
  end
51
53
 
52
- # Clear the entire run stack (useful for testing)
54
+ # Clear the entire run stack and evaluation context (useful for testing)
53
55
  def clear!
54
56
  Thread.current[CONTEXT_KEY] = []
57
+ Thread.current[EVALUATION_CONTEXT_KEY] = nil
58
+ Thread.current[EVALUATION_ROOT_RUN_ID_KEY] = nil
55
59
  end
56
60
 
57
61
  # Check if there's an active trace context
@@ -68,6 +72,45 @@ module Langsmith
68
72
  def root_run
69
73
  run_stack.first
70
74
  end
75
+
76
+ # Returns the current evaluation context, or nil when not in evaluation.
77
+ # @return [Hash, nil] hash with :experiment_id and :example_id, or nil
78
+ def evaluation_context
79
+ Thread.current[EVALUATION_CONTEXT_KEY]
80
+ end
81
+
82
+ # Returns true when evaluation context is set.
83
+ # @return [Boolean]
84
+ def evaluating?
85
+ !evaluation_context.nil?
86
+ end
87
+
88
+ # Stores the root run ID for the current evaluation example.
89
+ # Called by RunTree when creating the first root run inside an evaluation block.
90
+ #
91
+ # @param run_id [String] the root run's ID
92
+ def set_evaluation_root_run_id(run_id)
93
+ Thread.current[EVALUATION_ROOT_RUN_ID_KEY] = run_id
94
+ end
95
+
96
+ # Returns the root run ID for the current evaluation example, or nil.
97
+ # @return [String, nil]
98
+ def evaluation_root_run_id
99
+ Thread.current[EVALUATION_ROOT_RUN_ID_KEY]
100
+ end
101
+
102
+ # Execute a block with evaluation context set.
103
+ # Context is cleared in ensure block even if the block raises.
104
+ #
105
+ # @param experiment_id [String] the experiment session ID
106
+ # @param example_id [String] the dataset example ID
107
+ def with_evaluation(experiment_id:, example_id:)
108
+ Thread.current[EVALUATION_CONTEXT_KEY] = { experiment_id: experiment_id, example_id: example_id }
109
+ yield
110
+ ensure
111
+ Thread.current[EVALUATION_CONTEXT_KEY] = nil
112
+ Thread.current[EVALUATION_ROOT_RUN_ID_KEY] = nil
113
+ end
71
114
  end
72
115
  end
73
116
  end
@@ -0,0 +1,131 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Langsmith
4
+ module Evaluation
5
+ # Orchestrates running an evaluation experiment against a dataset.
6
+ #
7
+ # Fetches examples, creates an experiment, runs each example through
8
+ # the user-provided block with evaluation context set, scores outputs
9
+ # with evaluators, and returns a summary of results.
10
+ class ExperimentRunner
11
+ # @param dataset_id [String] the dataset to evaluate against
12
+ # @param experiment_name [String] name for the experiment
13
+ # @param description [String, nil] optional experiment description
14
+ # @param metadata [Hash, nil] optional experiment metadata
15
+ # @param evaluators [Hash] map of evaluator key to callable
16
+ # @param block [Proc] block that receives each example and produces a result
17
+ def initialize(dataset_id:, experiment_name:, description: nil, metadata: nil, evaluators: {}, &block)
18
+ @dataset_id = dataset_id
19
+ @experiment_name = experiment_name
20
+ @description = description
21
+ @metadata = metadata
22
+ @evaluators = evaluators
23
+ @block = block
24
+ end
25
+
26
+ # Run the evaluation experiment.
27
+ #
28
+ # @return [Hash] summary with :experiment_id, :total, :succeeded, :failed, :results
29
+ def run
30
+ examples = client.list_examples(dataset_id: @dataset_id)
31
+
32
+ experiment = client.create_experiment(
33
+ name: @experiment_name,
34
+ dataset_id: @dataset_id,
35
+ description: @description,
36
+ metadata: @metadata
37
+ )
38
+ experiment_id = experiment[:id]
39
+
40
+ results = examples.map { |example| run_example(example, experiment_id) }
41
+
42
+ Langsmith.flush
43
+ client.close_experiment(experiment_id: experiment_id, end_time: Time.now.utc.iso8601)
44
+
45
+ build_summary(experiment_id, results)
46
+ end
47
+
48
+ private
49
+
50
+ def client
51
+ Langsmith.client
52
+ end
53
+
54
+ def run_example(example, experiment_id)
55
+ outputs = nil
56
+ run_id = nil
57
+
58
+ begin
59
+ Context.with_evaluation(experiment_id: experiment_id, example_id: example[:id]) do
60
+ outputs = @block.call(example)
61
+ run_id = Context.evaluation_root_run_id
62
+ end
63
+ rescue StandardError => e
64
+ return { example_id: example[:id], run_id: nil, status: :error, error: e.message, feedback: nil }
65
+ end
66
+
67
+ feedback = run_evaluators(example, outputs, run_id)
68
+ { example_id: example[:id], run_id: run_id, status: :success, error: nil, feedback: feedback }
69
+ rescue StandardError => e
70
+ { example_id: example[:id], run_id: run_id, status: :success, error: e.message, feedback: nil }
71
+ end
72
+
73
+ def run_evaluators(example, outputs, run_id)
74
+ return nil if @evaluators.empty? || run_id.nil?
75
+
76
+ Langsmith.flush
77
+ run = fetch_run_with_retry(run_id)
78
+
79
+ @evaluators.each_with_object({}) do |(key, evaluator), feedback|
80
+ feedback[key] = execute_evaluator(key, evaluator, example, outputs, run_id, run)
81
+ end
82
+ end
83
+
84
+ # LangSmith has indexing lag after batch ingest — the run may not be
85
+ # queryable immediately. Retry a few times with a short delay.
86
+ def fetch_run_with_retry(run_id, retries: 3, delay: 1)
87
+ client.read_run(run_id: run_id)
88
+ rescue Client::APIError => e
89
+ raise unless e.status_code == 404 && retries.positive?
90
+
91
+ sleep(delay)
92
+ fetch_run_with_retry(run_id, retries: retries - 1, delay: delay)
93
+ end
94
+
95
+ def execute_evaluator(key, evaluator, example, outputs, run_id, run)
96
+ result = evaluator.call(
97
+ outputs: outputs,
98
+ reference_outputs: example[:outputs],
99
+ inputs: example[:inputs],
100
+ run: run
101
+ )
102
+ return { score: nil, success: true, skipped: true } if result.nil?
103
+
104
+ normalized = normalize_result(result)
105
+ client.create_feedback(run_id: run_id, key: key.to_s, **normalized)
106
+ normalized.merge(success: true)
107
+ rescue StandardError => e
108
+ { score: nil, success: false, error: e.message }
109
+ end
110
+
111
+ def normalize_result(result)
112
+ case result
113
+ when true then { score: 1.0, value: nil, comment: nil }
114
+ when false then { score: 0.0, value: nil, comment: nil }
115
+ when Hash then { score: result[:score], value: result[:value], comment: result[:comment] }
116
+ else { score: result, value: nil, comment: nil }
117
+ end
118
+ end
119
+
120
+ def build_summary(experiment_id, results)
121
+ {
122
+ experiment_id: experiment_id,
123
+ total: results.size,
124
+ succeeded: results.count { |r| r[:status] == :success },
125
+ failed: results.count { |r| r[:status] == :error },
126
+ results: results
127
+ }
128
+ end
129
+ end
130
+ end
131
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "evaluation/experiment_runner"
4
+
5
+ module Langsmith
6
+ # Public API for running evaluations against LangSmith datasets.
7
+ #
8
+ # @example
9
+ # Langsmith::Evaluation.run(
10
+ # dataset_id: "dataset-uuid",
11
+ # experiment_name: "my-experiment"
12
+ # ) do |example|
13
+ # Langsmith.trace("eval", run_type: "chain", inputs: example[:inputs]) do
14
+ # my_app.call(example[:inputs])
15
+ # end
16
+ # end
17
+ module Evaluation
18
+ # Run an evaluation experiment against a dataset.
19
+ #
20
+ # @param dataset_id [String] the dataset to evaluate against
21
+ # @param experiment_name [String] name for the experiment
22
+ # @param description [String, nil] optional experiment description
23
+ # @param metadata [Hash, nil] optional experiment metadata
24
+ # @param evaluators [Hash] map of evaluator key to callable (see ExperimentRunner)
25
+ # @yield [Hash] each dataset example
26
+ # @return [Hash] summary with :experiment_id, :total, :succeeded, :failed, :results
27
+ def self.run(dataset_id:, experiment_name:, description: nil, metadata: nil, evaluators: {}, &block)
28
+ ExperimentRunner.new(
29
+ dataset_id: dataset_id,
30
+ experiment_name: experiment_name,
31
+ description: description,
32
+ metadata: metadata,
33
+ evaluators: evaluators,
34
+ &block
35
+ ).run
36
+ end
37
+ end
38
+ end
data/lib/langsmith/run.rb CHANGED
@@ -37,6 +37,12 @@ module Langsmith
37
37
  # @return [String, nil] tenant ID for multi-tenant scenarios
38
38
  attr_reader :tenant_id
39
39
 
40
+ # @return [String, nil] links this run to a dataset example (for evaluations)
41
+ attr_reader :reference_example_id
42
+
43
+ # @return [String, nil] links this run to an experiment session (overrides project routing)
44
+ attr_reader :session_id
45
+
40
46
  # @return [String] trace ID (root run's ID)
41
47
  attr_reader :trace_id
42
48
 
@@ -81,6 +87,8 @@ module Langsmith
81
87
  # @param tenant_id [String, nil] tenant ID for multi-tenant scenarios
82
88
  # @param trace_id [String, nil] trace ID (defaults to own ID for root runs)
83
89
  # @param parent_dotted_order [String, nil] parent's dotted order for tree ordering
90
+ # @param reference_example_id [String, nil] dataset example ID (for evaluations)
91
+ # @param session_id [String, nil] experiment session ID (overrides project routing)
84
92
  #
85
93
  # @raise [ArgumentError] if run_type is invalid
86
94
  def initialize(
@@ -95,7 +103,9 @@ module Langsmith
95
103
  id: nil,
96
104
  tenant_id: nil,
97
105
  trace_id: nil,
98
- parent_dotted_order: nil
106
+ parent_dotted_order: nil,
107
+ reference_example_id: nil,
108
+ session_id: nil
99
109
  )
100
110
  @id = id || SecureRandom.uuid
101
111
  @name = name
@@ -114,6 +124,8 @@ module Langsmith
114
124
  @tags = tags || []
115
125
  @extra = extra || {}
116
126
  @events = []
127
+ @reference_example_id = reference_example_id
128
+ @session_id = session_id
117
129
  # dotted_order is used for ordering runs in the trace tree
118
130
  @dotted_order = build_dotted_order(parent_dotted_order)
119
131
  end
@@ -242,6 +254,8 @@ module Langsmith
242
254
  outputs:,
243
255
  error:,
244
256
  parent_run_id:,
257
+ reference_example_id:,
258
+ session_id:,
245
259
  trace_id:,
246
260
  dotted_order:,
247
261
  session_name:,
@@ -20,23 +20,19 @@ module Langsmith
20
20
  tenant_id: nil,
21
21
  project: nil
22
22
  )
23
- # If no explicit parent, check context for current parent
24
23
  effective_parent_id = parent_run_id || Context.current_parent_run_id
25
-
26
- # Inherit tenant_id from parent run if not explicitly set
27
24
  effective_tenant_id = tenant_id || Context.current_run&.tenant_id
28
25
 
29
26
  # Child traces must use the same project as their parent to keep the trace tree together.
30
- # Only root traces can set the project; children always inherit from parent.
31
27
  effective_project = Context.current_run&.session_name || project
32
-
33
- # Inherit trace_id from root run (parent's trace_id)
34
- # For root runs, trace_id will default to the run's own ID
35
28
  effective_trace_id = Context.current_run&.trace_id
36
-
37
- # Inherit dotted_order from parent for proper trace ordering
38
29
  parent_dotted_order = Context.current_run&.dotted_order
39
30
 
31
+ # Inject evaluation context: session_id on ALL runs, reference_example_id only on root runs.
32
+ eval_ctx = Context.evaluation_context
33
+ effective_session_id = eval_ctx&.dig(:experiment_id)
34
+ effective_ref_example_id = eval_ctx&.dig(:example_id) unless effective_parent_id
35
+
40
36
  @run = Run.new(
41
37
  name: name,
42
38
  run_type: run_type,
@@ -48,11 +44,13 @@ module Langsmith
48
44
  tenant_id: effective_tenant_id,
49
45
  session_name: effective_project,
50
46
  trace_id: effective_trace_id,
51
- parent_dotted_order: parent_dotted_order
47
+ parent_dotted_order: parent_dotted_order,
48
+ reference_example_id: effective_ref_example_id,
49
+ session_id: effective_session_id
52
50
  )
53
51
 
54
- @posted_start = false
55
- @posted_end = false
52
+ register_evaluation_root_run(effective_parent_id)
53
+ @posted_start = @posted_end = false
56
54
  end
57
55
 
58
56
  # Post the run start to LangSmith
@@ -138,6 +136,13 @@ module Langsmith
138
136
 
139
137
  private
140
138
 
139
+ # Register the root run ID in evaluation context so the runner can
140
+ # attach feedback to it later. Only root runs (no parent) register;
141
+ # child runs must not overwrite.
142
+ def register_evaluation_root_run(effective_parent_id)
143
+ Context.set_evaluation_root_run_id(@run.id) if effective_parent_id.nil? && Context.evaluating?
144
+ end
145
+
141
146
  # Sanitize block results to prevent circular references.
142
147
  # When users call methods like `run.add_metadata(...)` as the last line,
143
148
  # the Run object itself becomes the result, creating a circular reference.
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langsmith
4
- VERSION = "0.2.0"
4
+ VERSION = "0.3.0"
5
5
  end
data/lib/langsmith.rb CHANGED
@@ -8,6 +8,7 @@ require_relative "langsmith/context"
8
8
  require_relative "langsmith/client"
9
9
  require_relative "langsmith/batch_processor"
10
10
  require_relative "langsmith/run_tree"
11
+ require_relative "langsmith/evaluation"
11
12
 
12
13
  module Langsmith
13
14
  class << self
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: langsmith-sdk
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Felipe Cabezudo
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2025-12-21 00:00:00.000000000 Z
11
+ date: 2026-02-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: concurrent-ruby
@@ -91,13 +91,14 @@ files:
91
91
  - examples/complex_agent.rb
92
92
  - examples/llm_tracing.rb
93
93
  - examples/openai_integration.rb
94
- - langsmith.gemspec
95
94
  - lib/langsmith.rb
96
95
  - lib/langsmith/batch_processor.rb
97
96
  - lib/langsmith/client.rb
98
97
  - lib/langsmith/configuration.rb
99
98
  - lib/langsmith/context.rb
100
99
  - lib/langsmith/errors.rb
100
+ - lib/langsmith/evaluation.rb
101
+ - lib/langsmith/evaluation/experiment_runner.rb
101
102
  - lib/langsmith/railtie.rb
102
103
  - lib/langsmith/run.rb
103
104
  - lib/langsmith/run_tree.rb
@@ -126,7 +127,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
126
127
  - !ruby/object:Gem::Version
127
128
  version: '0'
128
129
  requirements: []
129
- rubygems_version: 3.5.17
130
+ rubygems_version: 3.4.19
130
131
  signing_key:
131
132
  specification_version: 4
132
133
  summary: Ruby SDK for LangSmith tracing and observability
data/langsmith.gemspec DELETED
@@ -1,38 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require_relative "lib/langsmith/version"
4
-
5
- Gem::Specification.new do |spec|
6
- spec.name = "langsmith-sdk"
7
- spec.version = Langsmith::VERSION
8
- spec.authors = ["Felipe Cabezudo"]
9
- spec.email = ["felipecabedilo@gmail.com"]
10
-
11
- spec.summary = "Ruby SDK for LangSmith tracing and observability"
12
- spec.description = "A Ruby client for LangSmith, providing tracing and observability for LLM applications"
13
- spec.homepage = "https://github.com/felipekb/langsmith-ruby-sdk"
14
- spec.license = "MIT"
15
- spec.required_ruby_version = ">= 3.1.0"
16
-
17
- spec.metadata["allowed_push_host"] = "https://rubygems.org"
18
- spec.metadata["homepage_uri"] = spec.homepage
19
- spec.metadata["source_code_uri"] = spec.homepage
20
- spec.metadata["changelog_uri"] = "#{spec.homepage}/blob/main/CHANGELOG.md"
21
- spec.metadata["rubygems_mfa_required"] = "true"
22
-
23
- spec.files = Dir.chdir(__dir__) do
24
- `git ls-files -z`.split("\x0").reject do |f|
25
- (File.expand_path(f) == __FILE__) ||
26
- f.start_with?(*%w[bin/ test/ spec/ features/ .git .github appveyor Gemfile])
27
- end
28
- end
29
- spec.bindir = "exe"
30
- spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
31
- spec.require_paths = ["lib"]
32
-
33
- # Runtime dependencies
34
- spec.add_dependency "concurrent-ruby", ">= 1.1", "< 3.0"
35
- spec.add_dependency "faraday", "~> 2.0"
36
- spec.add_dependency "faraday-net_http_persistent", "~> 2.0"
37
- spec.add_dependency "faraday-retry", "~> 2.0"
38
- end