langsmith-sdk 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,304 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Example: Tracing LLM calls with token usage
4
+ #
5
+ # This example demonstrates how to trace LLM API calls and capture
6
+ # token usage, model information, and other metadata.
7
+ #
8
+ # Follows Python SDK patterns for compatibility with LangSmith UI:
9
+ # - Uses set_model() for model/provider metadata (ls_model_name, ls_provider)
10
+ # - Uses input_tokens/output_tokens in set_token_usage()
11
+ # - Uses "new_token" event for streaming TTFT tracking
12
+ #
13
+ # Run with: ruby examples/llm_tracing.rb
14
+
15
+ require_relative "../lib/langsmith"
16
+
17
+ # Configure Langsmith
18
+ Langsmith.configure do |config|
19
+ config.api_key = ENV.fetch("LANGSMITH_API_KEY", "your-api-key")
20
+ config.tracing_enabled = true
21
+ config.project = "llm-examples"
22
+ end
23
+
24
+ # Example 1: Basic LLM call tracing with token usage
25
+ def trace_openai_chat(messages, model: "gpt-4")
26
+ Langsmith.trace("openai_chat", run_type: "llm", inputs: { messages:, model: }) do |run|
27
+ # Set model info using Python SDK pattern (stored in extra.metadata)
28
+ run.set_model(model:, provider: "openai")
29
+ run.add_metadata(temperature: 0.7)
30
+
31
+ # Simulate OpenAI API call
32
+ # In real code, you'd call: response = client.chat.completions.create(...)
33
+ response = simulate_openai_response(messages, model)
34
+
35
+ # Set token usage from the API response (Python SDK uses input_tokens/output_tokens)
36
+ run.set_token_usage(
37
+ input_tokens: response[:usage][:prompt_tokens],
38
+ output_tokens: response[:usage][:completion_tokens],
39
+ total_tokens: response[:usage][:total_tokens]
40
+ )
41
+
42
+ # Add response metadata
43
+ run.add_metadata(
44
+ finish_reason: response[:choices].first[:finish_reason],
45
+ response_id: response[:id]
46
+ )
47
+
48
+ # Return the response content
49
+ response[:choices].first[:message][:content]
50
+ end
51
+ end
52
+
53
+ # Example 2: Streaming LLM call with TTFT tracking (Python SDK pattern)
54
+ def trace_streaming_llm(prompt)
55
+ Langsmith.trace("streaming_chat", run_type: "llm", inputs: { prompt: }) do |run|
56
+ run.set_model(model: "gpt-4", provider: "openai")
57
+ run.add_metadata(streaming: true)
58
+
59
+ # Track tokens and timing as we stream
60
+ total_output_tokens = 0
61
+ full_response = ""
62
+ first_token_logged = false
63
+ stream_start_time = Time.now
64
+
65
+ # Simulate streaming chunks
66
+ chunks = simulate_streaming_response(prompt)
67
+ chunks.each_with_index do |chunk, index|
68
+ # Add "new_token" event for FIRST token only (Python SDK pattern)
69
+ # LangSmith uses this to calculate time-to-first-token
70
+ unless first_token_logged
71
+ run.add_event(name: "new_token", time: Time.now.utc, token: chunk[:content])
72
+ first_token_logged = true
73
+ end
74
+
75
+ full_response += chunk[:content]
76
+ total_output_tokens += chunk[:tokens]
77
+
78
+ # Simulate streaming delay
79
+ sleep(0.05) if index < chunks.length - 1
80
+ end
81
+
82
+ stream_end_time = Time.now
83
+ tokens_per_second = (total_output_tokens / (stream_end_time - stream_start_time)).round(2)
84
+
85
+ # Set final token usage (Python SDK uses input_tokens/output_tokens)
86
+ run.set_token_usage(
87
+ input_tokens: estimate_prompt_tokens(prompt),
88
+ output_tokens: total_output_tokens,
89
+ total_tokens: estimate_prompt_tokens(prompt) + total_output_tokens
90
+ )
91
+
92
+ run.add_metadata(tokens_per_second:)
93
+
94
+ full_response
95
+ end
96
+ end
97
+
98
+ # Example 3: Chain with multiple LLM calls
99
+ def trace_llm_chain(user_question)
100
+ Langsmith.trace("question_answer_chain", run_type: "chain", inputs: { question: user_question }) do |chain_run|
101
+ chain_run.add_metadata(chain_type: "qa_with_context")
102
+ chain_run.add_tags("qa", "production")
103
+
104
+ # Step 1: Generate search query
105
+ search_query = Langsmith.trace("generate_search_query", run_type: "llm") do |run|
106
+ run.set_model(model: "gpt-3.5-turbo", provider: "openai")
107
+ run.add_metadata(purpose: "query_generation")
108
+
109
+ prompt = "Generate a search query for: #{user_question}"
110
+ response = simulate_quick_llm_call(prompt)
111
+
112
+ run.set_token_usage(input_tokens: 25, output_tokens: 15)
113
+ response
114
+ end
115
+
116
+ # Step 2: Retrieve context (tool call)
117
+ context = Langsmith.trace("retrieve_context", run_type: "retriever") do |run|
118
+ run.add_metadata(index: "knowledge_base", top_k: 3)
119
+
120
+ # Simulate retrieval
121
+ ["Context 1: Ruby is a programming language.",
122
+ "Context 2: LangSmith provides observability.",
123
+ "Context 3: Tracing helps debug LLM apps."]
124
+ end
125
+
126
+ # Step 3: Generate final answer
127
+ answer = Langsmith.trace("generate_answer", run_type: "llm") do |run|
128
+ run.set_model(model: "gpt-4", provider: "openai")
129
+ run.add_metadata(purpose: "answer_generation")
130
+
131
+ messages = [
132
+ { role: "system", content: "Answer based on context: #{context.join("\n")}" },
133
+ { role: "user", content: user_question }
134
+ ]
135
+
136
+ response = simulate_openai_response(messages, "gpt-4")
137
+
138
+ run.set_token_usage(
139
+ input_tokens: response[:usage][:prompt_tokens],
140
+ output_tokens: response[:usage][:completion_tokens],
141
+ total_tokens: response[:usage][:total_tokens]
142
+ )
143
+
144
+ response[:choices].first[:message][:content]
145
+ end
146
+
147
+ answer
148
+ end
149
+ end
150
+
151
+ # Example 4: Using Traceable module for LLM service class
152
+ class LLMService
153
+ include Langsmith::Traceable
154
+
155
+ def initialize(model: "gpt-4", temperature: 0.7)
156
+ @model = model
157
+ @temperature = temperature
158
+ end
159
+
160
+ traceable run_type: "llm", name: "llm_service.chat"
161
+ def chat(messages)
162
+ # In real code: response = @client.chat.completions.create(...)
163
+ response = simulate_openai_response(messages, @model)
164
+
165
+ # Access current run to set model and token usage (Python SDK pattern)
166
+ if (run = Langsmith.current_run)
167
+ run.set_model(model: @model, provider: "openai")
168
+ run.set_token_usage(
169
+ input_tokens: response[:usage][:prompt_tokens],
170
+ output_tokens: response[:usage][:completion_tokens],
171
+ total_tokens: response[:usage][:total_tokens]
172
+ )
173
+ run.add_metadata(temperature: @temperature)
174
+ end
175
+
176
+ response[:choices].first[:message][:content]
177
+ end
178
+
179
+ traceable run_type: "llm", name: "llm_service.embed"
180
+ def embed(text)
181
+ # Simulate embedding call
182
+ tokens_used = (text.length / 4.0).ceil
183
+
184
+ if (run = Langsmith.current_run)
185
+ run.set_model(model: "text-embedding-3-small", provider: "openai")
186
+ # Embeddings only have input tokens, no output tokens
187
+ run.set_token_usage(input_tokens: tokens_used)
188
+ run.add_metadata(dimensions: 1536)
189
+ end
190
+
191
+ Array.new(1536) { rand(-1.0..1.0) }
192
+ end
193
+ end
194
+
195
+ # Example 5: Error handling with LLM calls
196
+ def trace_with_error_handling(prompt)
197
+ Langsmith.trace("llm_with_retry", run_type: "llm", inputs: { prompt: }) do |run|
198
+ run.set_model(model: "gpt-4", provider: "openai")
199
+ run.add_metadata(max_retries: 3)
200
+
201
+ retries = 0
202
+ begin
203
+ # Simulate potential failure
204
+ if rand < 0.3 && retries < 2
205
+ retries += 1
206
+ run.add_event(name: "retry", attempt: retries, reason: "rate_limited")
207
+ raise "Rate limited"
208
+ end
209
+
210
+ response = simulate_openai_response([{ role: "user", content: prompt }], "gpt-4")
211
+ run.set_token_usage(
212
+ input_tokens: response[:usage][:prompt_tokens],
213
+ output_tokens: response[:usage][:completion_tokens],
214
+ total_tokens: response[:usage][:total_tokens]
215
+ )
216
+ run.add_metadata(retries:)
217
+
218
+ response[:choices].first[:message][:content]
219
+ rescue StandardError => e
220
+ run.add_event(name: "error", message: e.message)
221
+ retry if retries < 3
222
+ raise
223
+ end
224
+ end
225
+ end
226
+
227
+ # ============================================================================
228
+ # Helper methods to simulate API responses (replace with real API calls)
229
+ # ============================================================================
230
+
231
+ def simulate_openai_response(messages, model)
232
+ prompt_tokens = messages.sum { |m| (m[:content].length / 4.0).ceil }
233
+ completion_tokens = rand(50..200)
234
+
235
+ {
236
+ id: "chatcmpl-#{SecureRandom.hex(12)}",
237
+ model: model,
238
+ choices: [
239
+ {
240
+ index: 0,
241
+ message: { role: "assistant", content: "This is a simulated response from #{model}." },
242
+ finish_reason: "stop"
243
+ }
244
+ ],
245
+ usage: {
246
+ prompt_tokens: prompt_tokens,
247
+ completion_tokens: completion_tokens,
248
+ total_tokens: prompt_tokens + completion_tokens
249
+ }
250
+ }
251
+ end
252
+
253
+ def simulate_streaming_response(prompt)
254
+ words = %w[This is a simulated streaming response from the LLM model.]
255
+ words.map { |word| { content: "#{word} ", tokens: 1 } }
256
+ end
257
+
258
+ def simulate_quick_llm_call(prompt)
259
+ "search query for: #{prompt.split(":").last.strip}"
260
+ end
261
+
262
+ def estimate_prompt_tokens(text)
263
+ (text.length / 4.0).ceil
264
+ end
265
+
266
+ # ============================================================================
267
+ # Run the examples
268
+ # ============================================================================
269
+
270
+ if __FILE__ == $PROGRAM_NAME
271
+ puts "=" * 60
272
+ puts "LangSmith LLM Tracing Examples"
273
+ puts "=" * 60
274
+
275
+ puts "\n1. Basic LLM call with token usage:"
276
+ result = trace_openai_chat([{ role: "user", content: "What is Ruby?" }])
277
+ puts " Response: #{result}"
278
+
279
+ puts "\n2. Streaming LLM call:"
280
+ result = trace_streaming_llm("Tell me about Ruby programming")
281
+ puts " Response: #{result}"
282
+
283
+ puts "\n3. Multi-step LLM chain:"
284
+ result = trace_llm_chain("How do I trace LLM calls?")
285
+ puts " Response: #{result}"
286
+
287
+ puts "\n4. Using Traceable module:"
288
+ service = LLMService.new(model: "gpt-4", temperature: 0.5)
289
+ result = service.chat([{ role: "user", content: "Hello!" }])
290
+ puts " Chat response: #{result}"
291
+ embedding = service.embed("Hello world")
292
+ puts " Embedding dimensions: #{embedding.length}"
293
+
294
+ puts "\n5. Error handling:"
295
+ result = trace_with_error_handling("Test prompt")
296
+ puts " Response: #{result}"
297
+
298
+ # Ensure all traces are sent before exiting
299
+ Langsmith.shutdown
300
+
301
+ puts "\n" + "=" * 60
302
+ puts "All examples completed! Check LangSmith for traces."
303
+ puts "=" * 60
304
+ end