langsmith-sdk 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +4 -0
- data/.rubocop.yml +120 -0
- data/.ruby-version +1 -0
- data/CHANGELOG.md +48 -0
- data/LICENSE +22 -0
- data/README.md +224 -0
- data/Rakefile +8 -0
- data/examples/LLM_TRACING.md +439 -0
- data/examples/complex_agent.rb +472 -0
- data/examples/llm_tracing.rb +304 -0
- data/examples/openai_integration.rb +751 -0
- data/langsmith.gemspec +38 -0
- data/lib/langsmith/batch_processor.rb +237 -0
- data/lib/langsmith/client.rb +181 -0
- data/lib/langsmith/configuration.rb +96 -0
- data/lib/langsmith/context.rb +73 -0
- data/lib/langsmith/errors.rb +13 -0
- data/lib/langsmith/railtie.rb +86 -0
- data/lib/langsmith/run.rb +320 -0
- data/lib/langsmith/run_tree.rb +154 -0
- data/lib/langsmith/traceable.rb +120 -0
- data/lib/langsmith/version.rb +5 -0
- data/lib/langsmith.rb +144 -0
- metadata +134 -0
|
@@ -0,0 +1,304 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Example: Tracing LLM calls with token usage
|
|
4
|
+
#
|
|
5
|
+
# This example demonstrates how to trace LLM API calls and capture
|
|
6
|
+
# token usage, model information, and other metadata.
|
|
7
|
+
#
|
|
8
|
+
# Follows Python SDK patterns for compatibility with LangSmith UI:
|
|
9
|
+
# - Uses set_model() for model/provider metadata (ls_model_name, ls_provider)
|
|
10
|
+
# - Uses input_tokens/output_tokens in set_token_usage()
|
|
11
|
+
# - Uses "new_token" event for streaming TTFT tracking
|
|
12
|
+
#
|
|
13
|
+
# Run with: ruby examples/llm_tracing.rb
|
|
14
|
+
|
|
15
|
+
require_relative "../lib/langsmith"
|
|
16
|
+
|
|
17
|
+
# Configure Langsmith
|
|
18
|
+
Langsmith.configure do |config|
|
|
19
|
+
config.api_key = ENV.fetch("LANGSMITH_API_KEY", "your-api-key")
|
|
20
|
+
config.tracing_enabled = true
|
|
21
|
+
config.project = "llm-examples"
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
# Example 1: Basic LLM call tracing with token usage
|
|
25
|
+
def trace_openai_chat(messages, model: "gpt-4")
|
|
26
|
+
Langsmith.trace("openai_chat", run_type: "llm", inputs: { messages:, model: }) do |run|
|
|
27
|
+
# Set model info using Python SDK pattern (stored in extra.metadata)
|
|
28
|
+
run.set_model(model:, provider: "openai")
|
|
29
|
+
run.add_metadata(temperature: 0.7)
|
|
30
|
+
|
|
31
|
+
# Simulate OpenAI API call
|
|
32
|
+
# In real code, you'd call: response = client.chat.completions.create(...)
|
|
33
|
+
response = simulate_openai_response(messages, model)
|
|
34
|
+
|
|
35
|
+
# Set token usage from the API response (Python SDK uses input_tokens/output_tokens)
|
|
36
|
+
run.set_token_usage(
|
|
37
|
+
input_tokens: response[:usage][:prompt_tokens],
|
|
38
|
+
output_tokens: response[:usage][:completion_tokens],
|
|
39
|
+
total_tokens: response[:usage][:total_tokens]
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
# Add response metadata
|
|
43
|
+
run.add_metadata(
|
|
44
|
+
finish_reason: response[:choices].first[:finish_reason],
|
|
45
|
+
response_id: response[:id]
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
# Return the response content
|
|
49
|
+
response[:choices].first[:message][:content]
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Example 2: Streaming LLM call with TTFT tracking (Python SDK pattern)
|
|
54
|
+
def trace_streaming_llm(prompt)
|
|
55
|
+
Langsmith.trace("streaming_chat", run_type: "llm", inputs: { prompt: }) do |run|
|
|
56
|
+
run.set_model(model: "gpt-4", provider: "openai")
|
|
57
|
+
run.add_metadata(streaming: true)
|
|
58
|
+
|
|
59
|
+
# Track tokens and timing as we stream
|
|
60
|
+
total_output_tokens = 0
|
|
61
|
+
full_response = ""
|
|
62
|
+
first_token_logged = false
|
|
63
|
+
stream_start_time = Time.now
|
|
64
|
+
|
|
65
|
+
# Simulate streaming chunks
|
|
66
|
+
chunks = simulate_streaming_response(prompt)
|
|
67
|
+
chunks.each_with_index do |chunk, index|
|
|
68
|
+
# Add "new_token" event for FIRST token only (Python SDK pattern)
|
|
69
|
+
# LangSmith uses this to calculate time-to-first-token
|
|
70
|
+
unless first_token_logged
|
|
71
|
+
run.add_event(name: "new_token", time: Time.now.utc, token: chunk[:content])
|
|
72
|
+
first_token_logged = true
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
full_response += chunk[:content]
|
|
76
|
+
total_output_tokens += chunk[:tokens]
|
|
77
|
+
|
|
78
|
+
# Simulate streaming delay
|
|
79
|
+
sleep(0.05) if index < chunks.length - 1
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
stream_end_time = Time.now
|
|
83
|
+
tokens_per_second = (total_output_tokens / (stream_end_time - stream_start_time)).round(2)
|
|
84
|
+
|
|
85
|
+
# Set final token usage (Python SDK uses input_tokens/output_tokens)
|
|
86
|
+
run.set_token_usage(
|
|
87
|
+
input_tokens: estimate_prompt_tokens(prompt),
|
|
88
|
+
output_tokens: total_output_tokens,
|
|
89
|
+
total_tokens: estimate_prompt_tokens(prompt) + total_output_tokens
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
run.add_metadata(tokens_per_second:)
|
|
93
|
+
|
|
94
|
+
full_response
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
# Example 3: Chain with multiple LLM calls
|
|
99
|
+
def trace_llm_chain(user_question)
|
|
100
|
+
Langsmith.trace("question_answer_chain", run_type: "chain", inputs: { question: user_question }) do |chain_run|
|
|
101
|
+
chain_run.add_metadata(chain_type: "qa_with_context")
|
|
102
|
+
chain_run.add_tags("qa", "production")
|
|
103
|
+
|
|
104
|
+
# Step 1: Generate search query
|
|
105
|
+
search_query = Langsmith.trace("generate_search_query", run_type: "llm") do |run|
|
|
106
|
+
run.set_model(model: "gpt-3.5-turbo", provider: "openai")
|
|
107
|
+
run.add_metadata(purpose: "query_generation")
|
|
108
|
+
|
|
109
|
+
prompt = "Generate a search query for: #{user_question}"
|
|
110
|
+
response = simulate_quick_llm_call(prompt)
|
|
111
|
+
|
|
112
|
+
run.set_token_usage(input_tokens: 25, output_tokens: 15)
|
|
113
|
+
response
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
# Step 2: Retrieve context (tool call)
|
|
117
|
+
context = Langsmith.trace("retrieve_context", run_type: "retriever") do |run|
|
|
118
|
+
run.add_metadata(index: "knowledge_base", top_k: 3)
|
|
119
|
+
|
|
120
|
+
# Simulate retrieval
|
|
121
|
+
["Context 1: Ruby is a programming language.",
|
|
122
|
+
"Context 2: LangSmith provides observability.",
|
|
123
|
+
"Context 3: Tracing helps debug LLM apps."]
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
# Step 3: Generate final answer
|
|
127
|
+
answer = Langsmith.trace("generate_answer", run_type: "llm") do |run|
|
|
128
|
+
run.set_model(model: "gpt-4", provider: "openai")
|
|
129
|
+
run.add_metadata(purpose: "answer_generation")
|
|
130
|
+
|
|
131
|
+
messages = [
|
|
132
|
+
{ role: "system", content: "Answer based on context: #{context.join("\n")}" },
|
|
133
|
+
{ role: "user", content: user_question }
|
|
134
|
+
]
|
|
135
|
+
|
|
136
|
+
response = simulate_openai_response(messages, "gpt-4")
|
|
137
|
+
|
|
138
|
+
run.set_token_usage(
|
|
139
|
+
input_tokens: response[:usage][:prompt_tokens],
|
|
140
|
+
output_tokens: response[:usage][:completion_tokens],
|
|
141
|
+
total_tokens: response[:usage][:total_tokens]
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
response[:choices].first[:message][:content]
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
answer
|
|
148
|
+
end
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
# Example 4: Using Traceable module for LLM service class
|
|
152
|
+
class LLMService
|
|
153
|
+
include Langsmith::Traceable
|
|
154
|
+
|
|
155
|
+
def initialize(model: "gpt-4", temperature: 0.7)
|
|
156
|
+
@model = model
|
|
157
|
+
@temperature = temperature
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
traceable run_type: "llm", name: "llm_service.chat"
|
|
161
|
+
def chat(messages)
|
|
162
|
+
# In real code: response = @client.chat.completions.create(...)
|
|
163
|
+
response = simulate_openai_response(messages, @model)
|
|
164
|
+
|
|
165
|
+
# Access current run to set model and token usage (Python SDK pattern)
|
|
166
|
+
if (run = Langsmith.current_run)
|
|
167
|
+
run.set_model(model: @model, provider: "openai")
|
|
168
|
+
run.set_token_usage(
|
|
169
|
+
input_tokens: response[:usage][:prompt_tokens],
|
|
170
|
+
output_tokens: response[:usage][:completion_tokens],
|
|
171
|
+
total_tokens: response[:usage][:total_tokens]
|
|
172
|
+
)
|
|
173
|
+
run.add_metadata(temperature: @temperature)
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
response[:choices].first[:message][:content]
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
traceable run_type: "llm", name: "llm_service.embed"
|
|
180
|
+
def embed(text)
|
|
181
|
+
# Simulate embedding call
|
|
182
|
+
tokens_used = (text.length / 4.0).ceil
|
|
183
|
+
|
|
184
|
+
if (run = Langsmith.current_run)
|
|
185
|
+
run.set_model(model: "text-embedding-3-small", provider: "openai")
|
|
186
|
+
# Embeddings only have input tokens, no output tokens
|
|
187
|
+
run.set_token_usage(input_tokens: tokens_used)
|
|
188
|
+
run.add_metadata(dimensions: 1536)
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
Array.new(1536) { rand(-1.0..1.0) }
|
|
192
|
+
end
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
# Example 5: Error handling with LLM calls
|
|
196
|
+
def trace_with_error_handling(prompt)
|
|
197
|
+
Langsmith.trace("llm_with_retry", run_type: "llm", inputs: { prompt: }) do |run|
|
|
198
|
+
run.set_model(model: "gpt-4", provider: "openai")
|
|
199
|
+
run.add_metadata(max_retries: 3)
|
|
200
|
+
|
|
201
|
+
retries = 0
|
|
202
|
+
begin
|
|
203
|
+
# Simulate potential failure
|
|
204
|
+
if rand < 0.3 && retries < 2
|
|
205
|
+
retries += 1
|
|
206
|
+
run.add_event(name: "retry", attempt: retries, reason: "rate_limited")
|
|
207
|
+
raise "Rate limited"
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
response = simulate_openai_response([{ role: "user", content: prompt }], "gpt-4")
|
|
211
|
+
run.set_token_usage(
|
|
212
|
+
input_tokens: response[:usage][:prompt_tokens],
|
|
213
|
+
output_tokens: response[:usage][:completion_tokens],
|
|
214
|
+
total_tokens: response[:usage][:total_tokens]
|
|
215
|
+
)
|
|
216
|
+
run.add_metadata(retries:)
|
|
217
|
+
|
|
218
|
+
response[:choices].first[:message][:content]
|
|
219
|
+
rescue StandardError => e
|
|
220
|
+
run.add_event(name: "error", message: e.message)
|
|
221
|
+
retry if retries < 3
|
|
222
|
+
raise
|
|
223
|
+
end
|
|
224
|
+
end
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
# ============================================================================
|
|
228
|
+
# Helper methods to simulate API responses (replace with real API calls)
|
|
229
|
+
# ============================================================================
|
|
230
|
+
|
|
231
|
+
def simulate_openai_response(messages, model)
|
|
232
|
+
prompt_tokens = messages.sum { |m| (m[:content].length / 4.0).ceil }
|
|
233
|
+
completion_tokens = rand(50..200)
|
|
234
|
+
|
|
235
|
+
{
|
|
236
|
+
id: "chatcmpl-#{SecureRandom.hex(12)}",
|
|
237
|
+
model: model,
|
|
238
|
+
choices: [
|
|
239
|
+
{
|
|
240
|
+
index: 0,
|
|
241
|
+
message: { role: "assistant", content: "This is a simulated response from #{model}." },
|
|
242
|
+
finish_reason: "stop"
|
|
243
|
+
}
|
|
244
|
+
],
|
|
245
|
+
usage: {
|
|
246
|
+
prompt_tokens: prompt_tokens,
|
|
247
|
+
completion_tokens: completion_tokens,
|
|
248
|
+
total_tokens: prompt_tokens + completion_tokens
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
end
|
|
252
|
+
|
|
253
|
+
def simulate_streaming_response(prompt)
|
|
254
|
+
words = %w[This is a simulated streaming response from the LLM model.]
|
|
255
|
+
words.map { |word| { content: "#{word} ", tokens: 1 } }
|
|
256
|
+
end
|
|
257
|
+
|
|
258
|
+
def simulate_quick_llm_call(prompt)
|
|
259
|
+
"search query for: #{prompt.split(":").last.strip}"
|
|
260
|
+
end
|
|
261
|
+
|
|
262
|
+
def estimate_prompt_tokens(text)
|
|
263
|
+
(text.length / 4.0).ceil
|
|
264
|
+
end
|
|
265
|
+
|
|
266
|
+
# ============================================================================
|
|
267
|
+
# Run the examples
|
|
268
|
+
# ============================================================================
|
|
269
|
+
|
|
270
|
+
if __FILE__ == $PROGRAM_NAME
|
|
271
|
+
puts "=" * 60
|
|
272
|
+
puts "LangSmith LLM Tracing Examples"
|
|
273
|
+
puts "=" * 60
|
|
274
|
+
|
|
275
|
+
puts "\n1. Basic LLM call with token usage:"
|
|
276
|
+
result = trace_openai_chat([{ role: "user", content: "What is Ruby?" }])
|
|
277
|
+
puts " Response: #{result}"
|
|
278
|
+
|
|
279
|
+
puts "\n2. Streaming LLM call:"
|
|
280
|
+
result = trace_streaming_llm("Tell me about Ruby programming")
|
|
281
|
+
puts " Response: #{result}"
|
|
282
|
+
|
|
283
|
+
puts "\n3. Multi-step LLM chain:"
|
|
284
|
+
result = trace_llm_chain("How do I trace LLM calls?")
|
|
285
|
+
puts " Response: #{result}"
|
|
286
|
+
|
|
287
|
+
puts "\n4. Using Traceable module:"
|
|
288
|
+
service = LLMService.new(model: "gpt-4", temperature: 0.5)
|
|
289
|
+
result = service.chat([{ role: "user", content: "Hello!" }])
|
|
290
|
+
puts " Chat response: #{result}"
|
|
291
|
+
embedding = service.embed("Hello world")
|
|
292
|
+
puts " Embedding dimensions: #{embedding.length}"
|
|
293
|
+
|
|
294
|
+
puts "\n5. Error handling:"
|
|
295
|
+
result = trace_with_error_handling("Test prompt")
|
|
296
|
+
puts " Response: #{result}"
|
|
297
|
+
|
|
298
|
+
# Ensure all traces are sent before exiting
|
|
299
|
+
Langsmith.shutdown
|
|
300
|
+
|
|
301
|
+
puts "\n" + "=" * 60
|
|
302
|
+
puts "All examples completed! Check LangSmith for traces."
|
|
303
|
+
puts "=" * 60
|
|
304
|
+
end
|