langsmith-sdk 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/langsmith.gemspec ADDED
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "lib/langsmith/version"
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = "langsmith-sdk"
7
+ spec.version = Langsmith::VERSION
8
+ spec.authors = ["Felipe Cabezudo"]
9
+ spec.email = ["felipecabedilo@gmail.com"]
10
+
11
+ spec.summary = "Ruby SDK for LangSmith tracing and observability"
12
+ spec.description = "A Ruby client for LangSmith, providing tracing and observability for LLM applications"
13
+ spec.homepage = "https://github.com/felipekb/langsmith-ruby-sdk"
14
+ spec.license = "MIT"
15
+ spec.required_ruby_version = ">= 3.1.0"
16
+
17
+ spec.metadata["allowed_push_host"] = "https://rubygems.org"
18
+ spec.metadata["homepage_uri"] = spec.homepage
19
+ spec.metadata["source_code_uri"] = spec.homepage
20
+ spec.metadata["changelog_uri"] = "#{spec.homepage}/blob/main/CHANGELOG.md"
21
+ spec.metadata["rubygems_mfa_required"] = "true"
22
+
23
+ spec.files = Dir.chdir(__dir__) do
24
+ `git ls-files -z`.split("\x0").reject do |f|
25
+ (File.expand_path(f) == __FILE__) ||
26
+ f.start_with?(*%w[bin/ test/ spec/ features/ .git .github appveyor Gemfile])
27
+ end
28
+ end
29
+ spec.bindir = "exe"
30
+ spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
31
+ spec.require_paths = ["lib"]
32
+
33
+ # Runtime dependencies
34
+ spec.add_dependency "concurrent-ruby", ">= 1.1", "< 3.0"
35
+ spec.add_dependency "faraday", "~> 2.0"
36
+ spec.add_dependency "faraday-net_http_persistent", "~> 2.0"
37
+ spec.add_dependency "faraday-retry", "~> 2.0"
38
+ end
@@ -0,0 +1,237 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "concurrent"
4
+
5
+ module Langsmith
6
+ # Background processor that batches trace runs and sends them to LangSmith.
7
+ # Uses concurrent-ruby for thread-safe operations.
8
+ #
9
+ # Thread Safety:
10
+ # - Uses AtomicBoolean for atomic start/shutdown
11
+ # - Uses a Mutex to protect flush_pending from concurrent access
12
+ # - Uses Concurrent::Array for thread-safe pending queues
13
+ class BatchProcessor
14
+ # Entry types for the queue
15
+ CREATE = :create
16
+ UPDATE = :update
17
+ SHUTDOWN = :shutdown
18
+
19
+ def initialize(client: nil, batch_size: nil, flush_interval: nil)
20
+ config = Langsmith.configuration
21
+ @client = client || Client.new
22
+ @batch_size = batch_size || config.batch_size
23
+ @flush_interval = flush_interval || config.flush_interval
24
+
25
+ @queue = Queue.new
26
+ @running = Concurrent::AtomicBoolean.new(false)
27
+ @worker_thread = Concurrent::AtomicReference.new(nil)
28
+ @pending_creates = Concurrent::Array.new
29
+ @pending_updates = Concurrent::Array.new
30
+ @flush_task = nil
31
+ @flush_mutex = Mutex.new
32
+ @shutdown_hook_registered = false
33
+ end
34
+
35
+ def start
36
+ return unless @running.make_true
37
+
38
+ @worker_thread.set(create_worker_thread)
39
+ @flush_task = create_flush_task
40
+ @flush_task.execute
41
+
42
+ register_shutdown_hook
43
+ end
44
+
45
+ def shutdown
46
+ return unless @running.make_false
47
+
48
+ @flush_task&.shutdown
49
+ @queue << { type: SHUTDOWN }
50
+
51
+ worker = @worker_thread.get
52
+ if worker&.alive? && !worker.join(5)
53
+ # Give the worker time to drain the queue gracefully
54
+ log_error("Worker thread did not terminate within timeout", force: true)
55
+ end
56
+
57
+ flush_pending
58
+ end
59
+
60
+ def enqueue_create(run)
61
+ enqueue(CREATE, run)
62
+ end
63
+
64
+ def enqueue_update(run)
65
+ enqueue(UPDATE, run)
66
+ end
67
+
68
+ def flush
69
+ flush_pending
70
+ end
71
+
72
+ def running?
73
+ @running.true?
74
+ end
75
+
76
+ private
77
+
78
+ def enqueue(type, run)
79
+ unless run.is_a?(Run)
80
+ log_error("enqueue expects a Run instance, got #{run.class}")
81
+ return
82
+ end
83
+
84
+ ensure_started
85
+ # Use to_h for creates (full data), to_update_h for updates (minimal PATCH payload)
86
+ run_data = type == CREATE ? run.to_h : run.to_update_h
87
+ @queue << { type: type, run_data: run_data, tenant_id: run.tenant_id }
88
+ end
89
+
90
+ def create_worker_thread
91
+ Thread.new { worker_loop }.tap do |t|
92
+ t.abort_on_exception = false
93
+ t.report_on_exception = false
94
+ end
95
+ end
96
+
97
+ def create_flush_task
98
+ Concurrent::TimerTask.new(
99
+ execution_interval: @flush_interval,
100
+ run_now: false
101
+ ) { safe_flush }
102
+ end
103
+
104
+ def register_shutdown_hook
105
+ return if @shutdown_hook_registered
106
+
107
+ @shutdown_hook_registered = true
108
+ processor = self
109
+ at_exit do
110
+ processor.shutdown if processor.running?
111
+ rescue StandardError => e
112
+ warn "[Langsmith] Error during shutdown: #{e.message}" if ENV["LANGSMITH_DEBUG"]
113
+ end
114
+ end
115
+
116
+ def ensure_started
117
+ start unless running?
118
+ end
119
+
120
+ def worker_loop
121
+ loop do
122
+ entry = @queue.pop
123
+ break if process_entry(entry) == :shutdown
124
+
125
+ flush_if_batch_full
126
+ rescue StandardError => e
127
+ log_error("Batch processor error: #{e.message}")
128
+ end
129
+ end
130
+
131
+ def process_entry(entry)
132
+ case entry[:type]
133
+ when CREATE
134
+ @pending_creates << build_pending_entry(entry)
135
+ when UPDATE
136
+ @pending_updates << build_pending_entry(entry)
137
+ when SHUTDOWN
138
+ drain_queue
139
+ flush_pending
140
+ :shutdown
141
+ end
142
+ end
143
+
144
+ def build_pending_entry(entry)
145
+ { data: entry[:run_data], tenant_id: entry[:tenant_id] }
146
+ end
147
+
148
+ def drain_queue
149
+ loop do
150
+ entry = @queue.pop(true)
151
+ process_entry(entry) unless entry[:type] == SHUTDOWN
152
+ rescue ThreadError
153
+ break
154
+ end
155
+ end
156
+
157
+ def safe_flush
158
+ flush_pending if has_pending?
159
+ rescue StandardError => e
160
+ log_error("Flush task error: #{e.message}")
161
+ end
162
+
163
+ def flush_if_batch_full
164
+ flush_pending if batch_full?
165
+ end
166
+
167
+ def batch_full?
168
+ pending_count >= @batch_size
169
+ end
170
+
171
+ def has_pending?
172
+ pending_count.positive?
173
+ end
174
+
175
+ def pending_count
176
+ @pending_creates.size + @pending_updates.size
177
+ end
178
+
179
+ def flush_pending
180
+ @flush_mutex.synchronize do
181
+ creates = extract_all(@pending_creates)
182
+ updates = extract_all(@pending_updates)
183
+
184
+ return if creates.empty? && updates.empty?
185
+
186
+ send_batches(creates, updates)
187
+ end
188
+ end
189
+
190
+ def extract_all(array)
191
+ result = []
192
+ result << array.shift until array.empty?
193
+ result
194
+ rescue ThreadError
195
+ result
196
+ end
197
+
198
+ def send_batches(creates, updates)
199
+ by_tenant = group_by_tenant(creates, updates)
200
+
201
+ # Send POSTs first, then PATCHes (LangSmith needs runs created before updating)
202
+ send_batch_type(by_tenant, :creates, :post_runs)
203
+ send_batch_type(by_tenant, :updates, :patch_runs)
204
+ end
205
+
206
+ def group_by_tenant(creates, updates)
207
+ {
208
+ creates: creates.group_by { |e| e[:tenant_id] },
209
+ updates: updates.group_by { |e| e[:tenant_id] }
210
+ }
211
+ end
212
+
213
+ def send_batch_type(by_tenant, type_key, param_key)
214
+ by_tenant[type_key].each do |tenant_id, entries|
215
+ runs = entries.map { |e| e[:data] }
216
+ next if runs.empty?
217
+
218
+ send_to_api(tenant_id, param_key, runs)
219
+ end
220
+ end
221
+
222
+ def send_to_api(tenant_id, param_key, runs)
223
+ params = { post_runs: [], patch_runs: [], tenant_id: tenant_id }
224
+ params[param_key] = runs
225
+
226
+ @client.batch_ingest_raw(**params)
227
+ rescue Client::APIError => e
228
+ log_error("Failed to send #{param_key} for tenant #{tenant_id}: #{e.message}", force: true)
229
+ rescue StandardError => e
230
+ log_error("Unexpected error sending #{param_key}: #{e.message}")
231
+ end
232
+
233
+ def log_error(message, force: false)
234
+ warn "[Langsmith] #{message}" if force || ENV["LANGSMITH_DEBUG"]
235
+ end
236
+ end
237
+ end
@@ -0,0 +1,181 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "faraday"
4
+ require "faraday/net_http_persistent"
5
+ require "faraday/retry"
6
+ require "json"
7
+
8
+ module Langsmith
9
+ # HTTP client for communicating with the LangSmith API.
10
+ # Handles authentication, retries, and batch operations.
11
+ class Client
12
+ # Raised when API requests fail.
13
+ class APIError < Langsmith::Error
14
+ # @return [Integer, nil] HTTP status code
15
+ attr_reader :status_code
16
+
17
+ # @return [Hash, String, nil] response body
18
+ attr_reader :response_body
19
+
20
+ # @param message [String] error message
21
+ # @param status_code [Integer, nil] HTTP status code
22
+ # @param response_body [Hash, String, nil] response body
23
+ def initialize(message, status_code: nil, response_body: nil)
24
+ super(message)
25
+ @status_code = status_code
26
+ @response_body = response_body
27
+ end
28
+ end
29
+
30
+ RETRYABLE_EXCEPTIONS = [
31
+ Faraday::ConnectionFailed,
32
+ Faraday::TimeoutError
33
+ ].freeze
34
+
35
+ RETRY_STATUSES = [429, 500, 502, 503, 504].freeze
36
+
37
+ # Creates a new Client instance.
38
+ #
39
+ # @param api_key [String, nil] API key (defaults to configuration)
40
+ # @param endpoint [String, nil] API endpoint (defaults to configuration)
41
+ # @param timeout [Integer, nil] request timeout in seconds (defaults to configuration)
42
+ # @param max_retries [Integer, nil] max retry attempts (defaults to configuration)
43
+ def initialize(api_key: nil, endpoint: nil, timeout: nil, max_retries: nil)
44
+ config = Langsmith.configuration
45
+ @api_key = api_key || config.api_key
46
+ @endpoint = endpoint || config.endpoint
47
+ @timeout = timeout || config.timeout
48
+ @max_retries = max_retries || config.max_retries
49
+ end
50
+
51
+ # Create a new run.
52
+ #
53
+ # @param run [Run] the run to create
54
+ # @return [Hash] API response
55
+ # @raise [APIError] if the request fails
56
+ def create_run(run)
57
+ post("/runs", run.to_h, tenant_id: run.tenant_id)
58
+ end
59
+
60
+ # Update an existing run (typically when it ends).
61
+ #
62
+ # @param run [Run] the run to update
63
+ # @return [Hash] API response
64
+ # @raise [APIError] if the request fails
65
+ def update_run(run)
66
+ patch("/runs/#{run.id}", run.to_h, tenant_id: run.tenant_id)
67
+ end
68
+
69
+ # Batch create/update runs.
70
+ # All runs in a batch should have the same tenant_id for optimal performance.
71
+ #
72
+ # @param post_runs [Array<Run>] runs to create
73
+ # @param patch_runs [Array<Run>] runs to update
74
+ # @param tenant_id [String, nil] tenant ID (inferred from runs if not provided)
75
+ # @return [Hash, nil] API response
76
+ # @raise [APIError] if the request fails
77
+ def batch_ingest(post_runs: [], patch_runs: [], tenant_id: nil)
78
+ return if post_runs.empty? && patch_runs.empty?
79
+
80
+ payload = {}
81
+ payload[:post] = post_runs.map(&:to_h) unless post_runs.empty?
82
+ payload[:patch] = patch_runs.map(&:to_h) unless patch_runs.empty?
83
+
84
+ # Use tenant_id from first run if not explicitly provided
85
+ effective_tenant_id = tenant_id ||
86
+ post_runs.first&.tenant_id ||
87
+ patch_runs.first&.tenant_id
88
+
89
+ post("/runs/batch", payload, tenant_id: effective_tenant_id)
90
+ end
91
+
92
+ # Batch create/update runs using pre-serialized hashes.
93
+ # Used by BatchProcessor which snapshots run data at enqueue time.
94
+ #
95
+ # @param post_runs [Array<Hash>] run hashes to create
96
+ # @param patch_runs [Array<Hash>] run hashes to update
97
+ # @param tenant_id [String, nil] tenant ID for the request
98
+ # @return [Hash, nil] API response
99
+ # @raise [APIError] if the request fails
100
+ def batch_ingest_raw(post_runs: [], patch_runs: [], tenant_id: nil)
101
+ return if post_runs.empty? && patch_runs.empty?
102
+
103
+ payload = {}
104
+ payload[:post] = post_runs unless post_runs.empty?
105
+ payload[:patch] = patch_runs unless patch_runs.empty?
106
+
107
+ post("/runs/batch", payload, tenant_id: tenant_id)
108
+ end
109
+
110
+ private
111
+
112
+ def connection
113
+ @connection ||= Faraday.new(url: @endpoint) do |f|
114
+ f.request :json
115
+ f.response :json, parser_options: { symbolize_names: true }
116
+ f.request :retry,
117
+ max: @max_retries,
118
+ interval: 0.5,
119
+ interval_randomness: 0.5,
120
+ backoff_factor: 2,
121
+ exceptions: RETRYABLE_EXCEPTIONS,
122
+ retry_statuses: RETRY_STATUSES
123
+
124
+ f.headers["X-API-Key"] = @api_key
125
+ f.headers["User-Agent"] = "langsmith-sdk-ruby/#{Langsmith::VERSION}"
126
+
127
+ f.options.timeout = @timeout
128
+ f.options.open_timeout = @timeout
129
+
130
+ f.adapter :net_http_persistent
131
+ end
132
+ end
133
+
134
+ def post(path, body, tenant_id: nil)
135
+ response = connection.post(path, body) do |req|
136
+ req.headers["X-Tenant-Id"] = tenant_id if tenant_id
137
+ end
138
+ handle_response(response)
139
+ rescue Faraday::ConnectionFailed, Faraday::TimeoutError => e
140
+ raise APIError, "Network error: #{e.message}"
141
+ rescue Faraday::Error => e
142
+ # Raised by retry middleware when retries are exhausted
143
+ raise APIError, "Request failed: #{e.message}" unless e.respond_to?(:response) && e.response
144
+
145
+ handle_response(e.response)
146
+ end
147
+
148
+ def patch(path, body, tenant_id: nil)
149
+ response = connection.patch(path, body) do |req|
150
+ req.headers["X-Tenant-Id"] = tenant_id if tenant_id
151
+ end
152
+ handle_response(response)
153
+ rescue Faraday::ConnectionFailed, Faraday::TimeoutError => e
154
+ raise APIError, "Network error: #{e.message}"
155
+ rescue Faraday::Error => e
156
+ # Raised by retry middleware when retries are exhausted
157
+ raise APIError, "Request failed: #{e.message}" unless e.respond_to?(:response) && e.response
158
+
159
+ handle_response(e.response)
160
+ end
161
+
162
+ def handle_response(response)
163
+ case response.status
164
+ when 200..299
165
+ response.body
166
+ when 401
167
+ raise APIError.new("Unauthorized: Invalid API key", status_code: 401, response_body: response.body)
168
+ when 404
169
+ raise APIError.new("Not found", status_code: 404, response_body: response.body)
170
+ when 422
171
+ raise APIError.new("Unprocessable entity: #{response.body}", status_code: 422, response_body: response.body)
172
+ when 429
173
+ raise APIError.new("Rate limited", status_code: 429, response_body: response.body)
174
+ when 500..599
175
+ raise APIError.new("Server error", status_code: response.status, response_body: response.body)
176
+ else
177
+ raise APIError.new("Request failed", status_code: response.status, response_body: response.body)
178
+ end
179
+ end
180
+ end
181
+ end
@@ -0,0 +1,96 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Langsmith
4
+ # Configuration settings for the Langsmith SDK.
5
+ #
6
+ # @example Configure via block
7
+ # Langsmith.configure do |config|
8
+ # config.api_key = "ls_..."
9
+ # config.tracing_enabled = true
10
+ # config.project = "my-project"
11
+ # end
12
+ #
13
+ # @example Configure via environment variables
14
+ # # LANGSMITH_API_KEY=ls_...
15
+ # # LANGSMITH_TRACING=true
16
+ # # LANGSMITH_PROJECT=my-project
17
+ class Configuration
18
+ # @return [String, nil] LangSmith API key (required for tracing)
19
+ attr_accessor :api_key
20
+
21
+ # @return [String] LangSmith API endpoint
22
+ attr_accessor :endpoint
23
+
24
+ # @return [String] Project name for organizing traces
25
+ attr_accessor :project
26
+
27
+ # @return [Boolean] Enable/disable tracing
28
+ attr_accessor :tracing_enabled
29
+
30
+ # @return [Integer] Batch size for sending traces
31
+ attr_accessor :batch_size
32
+
33
+ # @return [Float] Flush interval in seconds
34
+ attr_accessor :flush_interval
35
+
36
+ # @return [Integer] Request timeout in seconds
37
+ attr_accessor :timeout
38
+
39
+ # @return [Integer] Maximum retry attempts for failed requests
40
+ attr_accessor :max_retries
41
+
42
+ # @return [String, nil] Tenant ID for multi-tenant scenarios
43
+ attr_accessor :tenant_id
44
+
45
+ def initialize
46
+ @api_key = ENV.fetch("LANGSMITH_API_KEY", nil)
47
+ @endpoint = ENV.fetch("LANGSMITH_ENDPOINT", "https://api.smith.langchain.com")
48
+ @project = ENV.fetch("LANGSMITH_PROJECT", "default")
49
+ @tracing_enabled = env_boolean("LANGSMITH_TRACING", false)
50
+ @batch_size = ENV.fetch("LANGSMITH_BATCH_SIZE", 100).to_i
51
+ @flush_interval = ENV.fetch("LANGSMITH_FLUSH_INTERVAL", 1.0).to_f
52
+ @timeout = ENV.fetch("LANGSMITH_TIMEOUT", 10).to_i
53
+ @max_retries = ENV.fetch("LANGSMITH_MAX_RETRIES", 3).to_i
54
+ @tenant_id = ENV.fetch("LANGSMITH_TENANT_ID", nil)
55
+ end
56
+
57
+ # Returns whether tracing is enabled in configuration.
58
+ # Note: This only checks the configuration flag, not whether tracing can actually occur.
59
+ # @return [Boolean]
60
+ # @see #tracing_possible?
61
+ def tracing_enabled?
62
+ @tracing_enabled
63
+ end
64
+
65
+ # Returns whether tracing can actually occur (enabled AND has API key).
66
+ # Use this to check if traces will be sent.
67
+ # @return [Boolean]
68
+ def tracing_possible?
69
+ @tracing_enabled && api_key_present?
70
+ end
71
+
72
+ # Returns whether an API key is configured.
73
+ # @return [Boolean]
74
+ def api_key_present?
75
+ !@api_key.nil? && !@api_key.empty?
76
+ end
77
+
78
+ # Validates the configuration, raising an error if invalid.
79
+ # @raise [ConfigurationError] if tracing is enabled but API key is missing
80
+ # @return [void]
81
+ def validate!
82
+ return unless @tracing_enabled
83
+
84
+ raise ConfigurationError, "LANGSMITH_API_KEY is required when tracing is enabled" unless api_key_present?
85
+ end
86
+
87
+ private
88
+
89
+ def env_boolean(key, default)
90
+ value = ENV.fetch(key, nil)
91
+ return default if value.nil?
92
+
93
+ %w[true 1 yes on].include?(value.downcase)
94
+ end
95
+ end
96
+ end
@@ -0,0 +1,73 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Langsmith
4
+ # Thread-local context manager for maintaining the current trace stack.
5
+ # This allows nested traces to automatically link to their parent runs.
6
+ #
7
+ # Each thread maintains its own trace stack, ensuring proper isolation
8
+ # in concurrent environments.
9
+ #
10
+ # Note: We use Thread.current instead of Fiber.storage for compatibility
11
+ # across Ruby versions. Fiber.storage behavior differs between Ruby versions
12
+ # and caused test failures on Ruby 3.2.
13
+ module Context
14
+ CONTEXT_KEY = :langsmith_run_stack
15
+ private_constant :CONTEXT_KEY
16
+
17
+ class << self
18
+ # Returns the current run stack for this thread.
19
+ def run_stack
20
+ Thread.current[CONTEXT_KEY] ||= []
21
+ end
22
+
23
+ # Returns the current (topmost) run, or nil if no active trace
24
+ def current_run
25
+ run_stack.last
26
+ end
27
+
28
+ # Returns the current parent run ID for creating child runs
29
+ def current_parent_run_id
30
+ current_run&.id
31
+ end
32
+
33
+ # Push a run onto the context stack
34
+ def push(run)
35
+ run_stack.push(run)
36
+ run
37
+ end
38
+
39
+ # Pop a run from the context stack
40
+ def pop
41
+ run_stack.pop
42
+ end
43
+
44
+ # Execute a block with a run pushed onto the stack
45
+ def with_run(run)
46
+ push(run)
47
+ yield run
48
+ ensure
49
+ pop
50
+ end
51
+
52
+ # Clear the entire run stack (useful for testing)
53
+ def clear!
54
+ Thread.current[CONTEXT_KEY] = []
55
+ end
56
+
57
+ # Check if there's an active trace context
58
+ def active?
59
+ !run_stack.empty?
60
+ end
61
+
62
+ # Get the depth of the current trace (0 = root level)
63
+ def depth
64
+ run_stack.size
65
+ end
66
+
67
+ # Get the root run of the current trace tree
68
+ def root_run
69
+ run_stack.first
70
+ end
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Langsmith
4
+ # Base error class for all Langsmith errors.
5
+ # All custom errors inherit from this class.
6
+ class Error < StandardError; end
7
+
8
+ # Raised when configuration is invalid or incomplete.
9
+ class ConfigurationError < Error; end
10
+
11
+ # Raised when tracing operations fail.
12
+ class TracingError < Error; end
13
+ end