langsmith-sdk 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +4 -0
- data/.rubocop.yml +120 -0
- data/.ruby-version +1 -0
- data/CHANGELOG.md +48 -0
- data/LICENSE +22 -0
- data/README.md +224 -0
- data/Rakefile +8 -0
- data/examples/LLM_TRACING.md +439 -0
- data/examples/complex_agent.rb +472 -0
- data/examples/llm_tracing.rb +304 -0
- data/examples/openai_integration.rb +751 -0
- data/langsmith.gemspec +38 -0
- data/lib/langsmith/batch_processor.rb +237 -0
- data/lib/langsmith/client.rb +181 -0
- data/lib/langsmith/configuration.rb +96 -0
- data/lib/langsmith/context.rb +73 -0
- data/lib/langsmith/errors.rb +13 -0
- data/lib/langsmith/railtie.rb +86 -0
- data/lib/langsmith/run.rb +320 -0
- data/lib/langsmith/run_tree.rb +154 -0
- data/lib/langsmith/traceable.rb +120 -0
- data/lib/langsmith/version.rb +5 -0
- data/lib/langsmith.rb +144 -0
- metadata +134 -0
|
@@ -0,0 +1,439 @@
|
|
|
1
|
+
# LLM Tracing Examples
|
|
2
|
+
|
|
3
|
+
This guide shows how to trace LLM calls with the LangSmith Ruby SDK, including token usage tracking, streaming, and multi-step chains.
|
|
4
|
+
|
|
5
|
+
## Table of Contents
|
|
6
|
+
|
|
7
|
+
- [Basic LLM Call with Token Usage](#basic-llm-call-with-token-usage)
|
|
8
|
+
- [Adding Metadata](#adding-metadata)
|
|
9
|
+
- [Streaming LLM Calls](#streaming-llm-calls)
|
|
10
|
+
- [Multi-Step Chains](#multi-step-chains)
|
|
11
|
+
- [Using the Traceable Module](#using-the-traceable-module)
|
|
12
|
+
- [OpenAI Integration](#openai-integration)
|
|
13
|
+
- [Anthropic Integration](#anthropic-integration)
|
|
14
|
+
- [Error Handling](#error-handling)
|
|
15
|
+
- [Multi-Tenant Tracing](#multi-tenant-tracing)
|
|
16
|
+
|
|
17
|
+
---
|
|
18
|
+
|
|
19
|
+
## Basic LLM Call with Token Usage
|
|
20
|
+
|
|
21
|
+
Track token usage from your LLM API responses:
|
|
22
|
+
|
|
23
|
+
```ruby
|
|
24
|
+
Langsmith.trace("openai_chat", run_type: "llm", inputs: { prompt: user_message }) do |run|
|
|
25
|
+
response = openai_client.chat(
|
|
26
|
+
parameters: {
|
|
27
|
+
model: "gpt-4",
|
|
28
|
+
messages: [{ role: "user", content: user_message }]
|
|
29
|
+
}
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
# Set token usage from the API response
|
|
33
|
+
run.set_token_usage(
|
|
34
|
+
prompt_tokens: response["usage"]["prompt_tokens"],
|
|
35
|
+
completion_tokens: response["usage"]["completion_tokens"],
|
|
36
|
+
total_tokens: response["usage"]["total_tokens"]
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
response.dig("choices", 0, "message", "content")
|
|
40
|
+
end
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
---
|
|
44
|
+
|
|
45
|
+
## Adding Metadata
|
|
46
|
+
|
|
47
|
+
Enrich your traces with model configuration and response details:
|
|
48
|
+
|
|
49
|
+
```ruby
|
|
50
|
+
Langsmith.trace("llm_call", run_type: "llm") do |run|
|
|
51
|
+
# Add request metadata
|
|
52
|
+
run.add_metadata(
|
|
53
|
+
model: "gpt-4",
|
|
54
|
+
temperature: 0.7,
|
|
55
|
+
max_tokens: 1000,
|
|
56
|
+
provider: "openai"
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
response = call_llm(messages)
|
|
60
|
+
|
|
61
|
+
# Add response metadata
|
|
62
|
+
run.add_metadata(
|
|
63
|
+
finish_reason: response.dig("choices", 0, "finish_reason"),
|
|
64
|
+
response_id: response["id"],
|
|
65
|
+
model_version: response["model"]
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
# Add tags for filtering in LangSmith UI
|
|
69
|
+
run.add_tags("production", "gpt-4", "chat")
|
|
70
|
+
|
|
71
|
+
response.dig("choices", 0, "message", "content")
|
|
72
|
+
end
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
---
|
|
76
|
+
|
|
77
|
+
## Streaming LLM Calls
|
|
78
|
+
|
|
79
|
+
For streaming responses, accumulate tokens and track chunks:
|
|
80
|
+
|
|
81
|
+
```ruby
|
|
82
|
+
Langsmith.trace("streaming_chat", run_type: "llm", inputs: { prompt: prompt }) do |run|
|
|
83
|
+
run.add_metadata(model: "gpt-4", streaming: true)
|
|
84
|
+
|
|
85
|
+
full_response = ""
|
|
86
|
+
chunk_count = 0
|
|
87
|
+
|
|
88
|
+
openai_client.chat(
|
|
89
|
+
parameters: {
|
|
90
|
+
model: "gpt-4",
|
|
91
|
+
messages: [{ role: "user", content: prompt }],
|
|
92
|
+
stream: proc do |chunk, _bytesize|
|
|
93
|
+
content = chunk.dig("choices", 0, "delta", "content")
|
|
94
|
+
if content
|
|
95
|
+
full_response += content
|
|
96
|
+
chunk_count += 1
|
|
97
|
+
|
|
98
|
+
# Optionally track each chunk as an event
|
|
99
|
+
run.add_event(name: "chunk", content_length: content.length)
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
}
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
# Estimate tokens for streaming (OpenAI doesn't return usage for streams)
|
|
106
|
+
run.set_token_usage(
|
|
107
|
+
prompt_tokens: (prompt.length / 4.0).ceil,
|
|
108
|
+
completion_tokens: (full_response.length / 4.0).ceil
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
run.add_metadata(chunk_count: chunk_count, response_length: full_response.length)
|
|
112
|
+
|
|
113
|
+
full_response
|
|
114
|
+
end
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
---
|
|
118
|
+
|
|
119
|
+
## Multi-Step Chains
|
|
120
|
+
|
|
121
|
+
Trace complex workflows with nested calls:
|
|
122
|
+
|
|
123
|
+
```ruby
|
|
124
|
+
Langsmith.trace("rag_chain", run_type: "chain", inputs: { question: question }) do |chain|
|
|
125
|
+
chain.add_metadata(chain_type: "retrieval_qa")
|
|
126
|
+
chain.add_tags("rag", "production")
|
|
127
|
+
|
|
128
|
+
# Step 1: Embed the question
|
|
129
|
+
embedding = Langsmith.trace("embed_question", run_type: "llm") do |run|
|
|
130
|
+
response = openai_client.embeddings(
|
|
131
|
+
parameters: { model: "text-embedding-3-small", input: question }
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
run.set_token_usage(prompt_tokens: response["usage"]["prompt_tokens"], completion_tokens: 0)
|
|
135
|
+
run.add_metadata(model: "text-embedding-3-small", dimensions: 1536)
|
|
136
|
+
|
|
137
|
+
response.dig("data", 0, "embedding")
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
# Step 2: Retrieve relevant documents
|
|
141
|
+
documents = Langsmith.trace("retrieve_docs", run_type: "retriever") do |run|
|
|
142
|
+
run.add_metadata(index: "knowledge_base", top_k: 5)
|
|
143
|
+
|
|
144
|
+
results = vector_store.similarity_search(embedding, limit: 5)
|
|
145
|
+
|
|
146
|
+
run.add_metadata(results_count: results.length)
|
|
147
|
+
results
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
# Step 3: Generate answer
|
|
151
|
+
answer = Langsmith.trace("generate_answer", run_type: "llm") do |run|
|
|
152
|
+
context = documents.map(&:content).join("\n\n")
|
|
153
|
+
|
|
154
|
+
response = openai_client.chat(
|
|
155
|
+
parameters: {
|
|
156
|
+
model: "gpt-4",
|
|
157
|
+
messages: [
|
|
158
|
+
{ role: "system", content: "Answer based on context:\n#{context}" },
|
|
159
|
+
{ role: "user", content: question }
|
|
160
|
+
]
|
|
161
|
+
}
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
run.set_token_usage(
|
|
165
|
+
prompt_tokens: response["usage"]["prompt_tokens"],
|
|
166
|
+
completion_tokens: response["usage"]["completion_tokens"]
|
|
167
|
+
)
|
|
168
|
+
run.add_metadata(model: "gpt-4", context_docs: documents.length)
|
|
169
|
+
|
|
170
|
+
response.dig("choices", 0, "message", "content")
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
answer
|
|
174
|
+
end
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
---
|
|
178
|
+
|
|
179
|
+
## Using the Traceable Module
|
|
180
|
+
|
|
181
|
+
Decorate methods for automatic tracing:
|
|
182
|
+
|
|
183
|
+
```ruby
|
|
184
|
+
class LLMService
|
|
185
|
+
include Langsmith::Traceable
|
|
186
|
+
|
|
187
|
+
def initialize(model: "gpt-4")
|
|
188
|
+
@model = model
|
|
189
|
+
@client = OpenAI::Client.new
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
traceable run_type: "llm", name: "llm_service.chat"
|
|
193
|
+
def chat(messages, temperature: 0.7)
|
|
194
|
+
response = @client.chat(
|
|
195
|
+
parameters: {
|
|
196
|
+
model: @model,
|
|
197
|
+
messages: messages,
|
|
198
|
+
temperature: temperature
|
|
199
|
+
}
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
# Access current run to set token usage
|
|
203
|
+
if (run = Langsmith.current_run)
|
|
204
|
+
run.set_token_usage(
|
|
205
|
+
prompt_tokens: response["usage"]["prompt_tokens"],
|
|
206
|
+
completion_tokens: response["usage"]["completion_tokens"]
|
|
207
|
+
)
|
|
208
|
+
run.add_metadata(model: @model, temperature: temperature)
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
response.dig("choices", 0, "message", "content")
|
|
212
|
+
end
|
|
213
|
+
|
|
214
|
+
traceable run_type: "llm", name: "llm_service.embed"
|
|
215
|
+
def embed(text)
|
|
216
|
+
response = @client.embeddings(
|
|
217
|
+
parameters: { model: "text-embedding-3-small", input: text }
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
Langsmith.current_run&.set_token_usage(
|
|
221
|
+
prompt_tokens: response["usage"]["prompt_tokens"],
|
|
222
|
+
completion_tokens: 0
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
response.dig("data", 0, "embedding")
|
|
226
|
+
end
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
# Usage
|
|
230
|
+
service = LLMService.new(model: "gpt-4")
|
|
231
|
+
response = service.chat([{ role: "user", content: "Hello!" }])
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
---
|
|
235
|
+
|
|
236
|
+
## OpenAI Integration
|
|
237
|
+
|
|
238
|
+
Complete wrapper for the ruby-openai gem:
|
|
239
|
+
|
|
240
|
+
```ruby
|
|
241
|
+
require "openai"
|
|
242
|
+
|
|
243
|
+
module TracedOpenAI
|
|
244
|
+
CLIENT = OpenAI::Client.new
|
|
245
|
+
|
|
246
|
+
module_function
|
|
247
|
+
|
|
248
|
+
def chat(messages:, model: "gpt-4", **options)
|
|
249
|
+
Langsmith.trace("openai.chat", run_type: "llm", inputs: { messages: messages }) do |run|
|
|
250
|
+
run.add_metadata(model: model, **options.slice(:temperature, :max_tokens))
|
|
251
|
+
|
|
252
|
+
response = CLIENT.chat(
|
|
253
|
+
parameters: { model: model, messages: messages, **options }
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
run.set_token_usage(
|
|
257
|
+
prompt_tokens: response["usage"]["prompt_tokens"],
|
|
258
|
+
completion_tokens: response["usage"]["completion_tokens"]
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
run.add_metadata(
|
|
262
|
+
finish_reason: response.dig("choices", 0, "finish_reason"),
|
|
263
|
+
response_id: response["id"]
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
response
|
|
267
|
+
end
|
|
268
|
+
end
|
|
269
|
+
|
|
270
|
+
def embed(input:, model: "text-embedding-3-small")
|
|
271
|
+
Langsmith.trace("openai.embed", run_type: "llm", inputs: { input: input }) do |run|
|
|
272
|
+
run.add_metadata(model: model)
|
|
273
|
+
|
|
274
|
+
response = CLIENT.embeddings(parameters: { model: model, input: input })
|
|
275
|
+
|
|
276
|
+
run.set_token_usage(prompt_tokens: response["usage"]["prompt_tokens"], completion_tokens: 0)
|
|
277
|
+
run.add_metadata(dimensions: response.dig("data", 0, "embedding")&.length)
|
|
278
|
+
|
|
279
|
+
response
|
|
280
|
+
end
|
|
281
|
+
end
|
|
282
|
+
end
|
|
283
|
+
|
|
284
|
+
# Usage
|
|
285
|
+
response = TracedOpenAI.chat(
|
|
286
|
+
messages: [{ role: "user", content: "What is Ruby?" }],
|
|
287
|
+
model: "gpt-4",
|
|
288
|
+
temperature: 0.7
|
|
289
|
+
)
|
|
290
|
+
```
|
|
291
|
+
|
|
292
|
+
---
|
|
293
|
+
|
|
294
|
+
## Anthropic Integration
|
|
295
|
+
|
|
296
|
+
Wrapper for the anthropic gem:
|
|
297
|
+
|
|
298
|
+
```ruby
|
|
299
|
+
require "anthropic"
|
|
300
|
+
|
|
301
|
+
module TracedAnthropic
|
|
302
|
+
CLIENT = Anthropic::Client.new
|
|
303
|
+
|
|
304
|
+
module_function
|
|
305
|
+
|
|
306
|
+
def message(messages:, model: "claude-3-sonnet-20240229", max_tokens: 1024, **options)
|
|
307
|
+
Langsmith.trace("anthropic.message", run_type: "llm", inputs: { messages: messages }) do |run|
|
|
308
|
+
run.add_metadata(model: model, max_tokens: max_tokens, provider: "anthropic")
|
|
309
|
+
|
|
310
|
+
response = CLIENT.messages(
|
|
311
|
+
parameters: {
|
|
312
|
+
model: model,
|
|
313
|
+
messages: messages,
|
|
314
|
+
max_tokens: max_tokens,
|
|
315
|
+
**options
|
|
316
|
+
}
|
|
317
|
+
)
|
|
318
|
+
|
|
319
|
+
# Anthropic returns usage differently
|
|
320
|
+
run.set_token_usage(
|
|
321
|
+
prompt_tokens: response["usage"]["input_tokens"],
|
|
322
|
+
completion_tokens: response["usage"]["output_tokens"]
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
run.add_metadata(
|
|
326
|
+
stop_reason: response["stop_reason"],
|
|
327
|
+
response_id: response["id"]
|
|
328
|
+
)
|
|
329
|
+
|
|
330
|
+
response
|
|
331
|
+
end
|
|
332
|
+
end
|
|
333
|
+
end
|
|
334
|
+
|
|
335
|
+
# Usage
|
|
336
|
+
response = TracedAnthropic.message(
|
|
337
|
+
messages: [{ role: "user", content: "Explain Ruby in one sentence." }],
|
|
338
|
+
model: "claude-3-sonnet-20240229"
|
|
339
|
+
)
|
|
340
|
+
```
|
|
341
|
+
|
|
342
|
+
---
|
|
343
|
+
|
|
344
|
+
## Error Handling
|
|
345
|
+
|
|
346
|
+
Track errors and retries in your traces:
|
|
347
|
+
|
|
348
|
+
```ruby
|
|
349
|
+
Langsmith.trace("llm_with_retry", run_type: "llm", inputs: { prompt: prompt }) do |run|
|
|
350
|
+
run.add_metadata(max_retries: 3)
|
|
351
|
+
|
|
352
|
+
retries = 0
|
|
353
|
+
begin
|
|
354
|
+
response = call_llm(prompt)
|
|
355
|
+
|
|
356
|
+
run.set_token_usage(
|
|
357
|
+
prompt_tokens: response["usage"]["prompt_tokens"],
|
|
358
|
+
completion_tokens: response["usage"]["completion_tokens"]
|
|
359
|
+
)
|
|
360
|
+
run.add_metadata(retries: retries, success: true)
|
|
361
|
+
|
|
362
|
+
response.dig("choices", 0, "message", "content")
|
|
363
|
+
|
|
364
|
+
rescue RateLimitError => e
|
|
365
|
+
retries += 1
|
|
366
|
+
run.add_event(name: "retry", attempt: retries, reason: "rate_limit", wait: 2**retries)
|
|
367
|
+
|
|
368
|
+
if retries <= 3
|
|
369
|
+
sleep(2**retries)
|
|
370
|
+
retry
|
|
371
|
+
end
|
|
372
|
+
|
|
373
|
+
run.add_metadata(success: false, final_error: e.message)
|
|
374
|
+
raise
|
|
375
|
+
|
|
376
|
+
rescue APIError => e
|
|
377
|
+
run.add_event(name: "error", type: e.class.name, message: e.message)
|
|
378
|
+
run.add_metadata(success: false, error_type: e.class.name)
|
|
379
|
+
raise
|
|
380
|
+
end
|
|
381
|
+
end
|
|
382
|
+
```
|
|
383
|
+
|
|
384
|
+
---
|
|
385
|
+
|
|
386
|
+
## Multi-Tenant Tracing
|
|
387
|
+
|
|
388
|
+
Route traces to different tenants:
|
|
389
|
+
|
|
390
|
+
```ruby
|
|
391
|
+
# Global default tenant
|
|
392
|
+
Langsmith.configure do |config|
|
|
393
|
+
config.tenant_id = "default-tenant"
|
|
394
|
+
end
|
|
395
|
+
|
|
396
|
+
# Per-request tenant override
|
|
397
|
+
def process_customer_request(customer_id, prompt)
|
|
398
|
+
tenant_id = "customer-#{customer_id}"
|
|
399
|
+
|
|
400
|
+
Langsmith.trace("customer_llm_call", run_type: "llm", tenant_id: tenant_id) do |run|
|
|
401
|
+
run.add_metadata(customer_id: customer_id)
|
|
402
|
+
|
|
403
|
+
response = call_llm(prompt)
|
|
404
|
+
|
|
405
|
+
run.set_token_usage(
|
|
406
|
+
prompt_tokens: response["usage"]["prompt_tokens"],
|
|
407
|
+
completion_tokens: response["usage"]["completion_tokens"]
|
|
408
|
+
)
|
|
409
|
+
|
|
410
|
+
response.dig("choices", 0, "message", "content")
|
|
411
|
+
end
|
|
412
|
+
end
|
|
413
|
+
|
|
414
|
+
# Nested traces inherit tenant_id
|
|
415
|
+
Langsmith.trace("parent", tenant_id: "tenant-123") do
|
|
416
|
+
Langsmith.trace("child") do |run|
|
|
417
|
+
# This trace also goes to tenant-123
|
|
418
|
+
run.add_metadata(inherited_tenant: true)
|
|
419
|
+
end
|
|
420
|
+
end
|
|
421
|
+
```
|
|
422
|
+
|
|
423
|
+
---
|
|
424
|
+
|
|
425
|
+
## Best Practices
|
|
426
|
+
|
|
427
|
+
1. **Always set token usage** - It enables cost tracking in LangSmith
|
|
428
|
+
2. **Add model metadata** - Include model name, temperature, and other parameters
|
|
429
|
+
3. **Use meaningful names** - Name your traces descriptively (e.g., `"generate_summary"` not `"llm_call"`)
|
|
430
|
+
4. **Track finish reasons** - Helps identify truncated responses
|
|
431
|
+
5. **Use events for streaming** - Track chunk counts and timing
|
|
432
|
+
6. **Handle errors gracefully** - Add error events before re-raising
|
|
433
|
+
7. **Flush before exit** - Call `Langsmith.shutdown` to ensure all traces are sent
|
|
434
|
+
|
|
435
|
+
```ruby
|
|
436
|
+
# At application shutdown
|
|
437
|
+
at_exit { Langsmith.shutdown }
|
|
438
|
+
```
|
|
439
|
+
|