llmops-observability 8.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llmops_observability/__init__.py +27 -0
- llmops_observability/asgi_middleware.py +146 -0
- llmops_observability/config.py +79 -0
- llmops_observability/llm.py +580 -0
- llmops_observability/models.py +32 -0
- llmops_observability/pricing.py +132 -0
- llmops_observability/trace_manager.py +641 -0
- llmops_observability-8.0.0.dist-info/METADATA +263 -0
- llmops_observability-8.0.0.dist-info/RECORD +11 -0
- llmops_observability-8.0.0.dist-info/WHEEL +5 -0
- llmops_observability-8.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,580 @@
|
|
|
1
|
+
"""
|
|
2
|
+
LLM tracking decorator for LLMOps Observability
|
|
3
|
+
Direct Langfuse integration for tracking LLM calls
|
|
4
|
+
Enhanced with veriskGO-style input/output handling
|
|
5
|
+
"""
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
import functools
|
|
8
|
+
import inspect
|
|
9
|
+
import sys
|
|
10
|
+
import time
|
|
11
|
+
import traceback
|
|
12
|
+
from typing import Optional, Dict, Any, List, Union
|
|
13
|
+
from .trace_manager import TraceManager
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def extract_text(resp: Any) -> str:
|
|
17
|
+
"""
|
|
18
|
+
Extract text from various LLM response formats.
|
|
19
|
+
Supports: Bedrock Converse, Bedrock InvokeModel, OpenAI, LangChain, etc.
|
|
20
|
+
"""
|
|
21
|
+
if isinstance(resp, str):
|
|
22
|
+
return resp
|
|
23
|
+
|
|
24
|
+
if not isinstance(resp, dict):
|
|
25
|
+
return str(resp)
|
|
26
|
+
|
|
27
|
+
# Bedrock Converse API
|
|
28
|
+
try:
|
|
29
|
+
return resp["output"]["message"]["content"][0]["text"]
|
|
30
|
+
except (KeyError, IndexError, TypeError):
|
|
31
|
+
pass
|
|
32
|
+
|
|
33
|
+
# Anthropic Messages API
|
|
34
|
+
try:
|
|
35
|
+
return resp["content"][0]["text"]
|
|
36
|
+
except (KeyError, IndexError, TypeError):
|
|
37
|
+
pass
|
|
38
|
+
|
|
39
|
+
# Amazon Titan
|
|
40
|
+
try:
|
|
41
|
+
return resp["results"][0]["outputText"]
|
|
42
|
+
except (KeyError, IndexError, TypeError):
|
|
43
|
+
pass
|
|
44
|
+
|
|
45
|
+
# Cohere
|
|
46
|
+
try:
|
|
47
|
+
return resp["generation"]
|
|
48
|
+
except (KeyError, TypeError):
|
|
49
|
+
pass
|
|
50
|
+
|
|
51
|
+
# AI21
|
|
52
|
+
try:
|
|
53
|
+
return resp["outputs"][0]["text"]
|
|
54
|
+
except (KeyError, IndexError, TypeError):
|
|
55
|
+
pass
|
|
56
|
+
|
|
57
|
+
# Generic text field
|
|
58
|
+
try:
|
|
59
|
+
return resp["text"]
|
|
60
|
+
except (KeyError, TypeError):
|
|
61
|
+
pass
|
|
62
|
+
|
|
63
|
+
# OpenAI format
|
|
64
|
+
try:
|
|
65
|
+
return resp["choices"][0]["message"]["content"]
|
|
66
|
+
except (KeyError, IndexError, TypeError):
|
|
67
|
+
pass
|
|
68
|
+
|
|
69
|
+
return str(resp)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def extract_usage(result: Any, kwargs: Dict[str, Any]) -> Optional[Dict[str, int]]:
|
|
73
|
+
"""
|
|
74
|
+
Extract token usage from LLM response or callback.
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
Dict with input_tokens, output_tokens, total_tokens or None
|
|
78
|
+
"""
|
|
79
|
+
usage = {}
|
|
80
|
+
|
|
81
|
+
# Check if result has usage attribute (OpenAI, Anthropic direct)
|
|
82
|
+
if hasattr(result, 'usage'):
|
|
83
|
+
usage_obj = result.usage
|
|
84
|
+
if hasattr(usage_obj, 'prompt_tokens'):
|
|
85
|
+
usage['input_tokens'] = usage_obj.prompt_tokens
|
|
86
|
+
if hasattr(usage_obj, 'completion_tokens'):
|
|
87
|
+
usage['output_tokens'] = usage_obj.completion_tokens
|
|
88
|
+
if hasattr(usage_obj, 'total_tokens'):
|
|
89
|
+
usage['total_tokens'] = usage_obj.total_tokens
|
|
90
|
+
return usage if usage else None
|
|
91
|
+
|
|
92
|
+
# Check Bedrock response format
|
|
93
|
+
if isinstance(result, dict):
|
|
94
|
+
# Bedrock Converse API
|
|
95
|
+
if 'usage' in result:
|
|
96
|
+
usage_data = result['usage']
|
|
97
|
+
if 'inputTokens' in usage_data:
|
|
98
|
+
usage['input_tokens'] = usage_data['inputTokens']
|
|
99
|
+
if 'outputTokens' in usage_data:
|
|
100
|
+
usage['output_tokens'] = usage_data['outputTokens']
|
|
101
|
+
if 'totalTokens' in usage_data:
|
|
102
|
+
usage['total_tokens'] = usage_data['totalTokens']
|
|
103
|
+
return usage if usage else None
|
|
104
|
+
|
|
105
|
+
# Check for LangChain callbacks in kwargs
|
|
106
|
+
config = kwargs.get('config', {})
|
|
107
|
+
callbacks = config.get('callbacks', [])
|
|
108
|
+
for callback in callbacks:
|
|
109
|
+
# Bedrock Anthropic callback
|
|
110
|
+
if hasattr(callback, 'prompt_tokens'):
|
|
111
|
+
usage['input_tokens'] = callback.prompt_tokens
|
|
112
|
+
if hasattr(callback, 'completion_tokens'):
|
|
113
|
+
usage['output_tokens'] = callback.completion_tokens
|
|
114
|
+
if hasattr(callback, 'total_tokens'):
|
|
115
|
+
usage['total_tokens'] = callback.total_tokens
|
|
116
|
+
if usage:
|
|
117
|
+
return usage
|
|
118
|
+
|
|
119
|
+
return None
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def extract_model_info(args: tuple, kwargs: Dict[str, Any]) -> Optional[str]:
|
|
123
|
+
"""
|
|
124
|
+
Extract model name from function arguments.
|
|
125
|
+
|
|
126
|
+
Returns:
|
|
127
|
+
Model name string or None
|
|
128
|
+
"""
|
|
129
|
+
# Check kwargs for common model parameter names
|
|
130
|
+
for key in ['model', 'model_id', 'model_name', 'modelId']:
|
|
131
|
+
if key in kwargs:
|
|
132
|
+
return str(kwargs[key])
|
|
133
|
+
|
|
134
|
+
# Check if first arg has model attribute (LangChain model objects)
|
|
135
|
+
if args:
|
|
136
|
+
first_arg = args[0]
|
|
137
|
+
if hasattr(first_arg, 'model'):
|
|
138
|
+
return str(first_arg.model)
|
|
139
|
+
if hasattr(first_arg, 'model_id'):
|
|
140
|
+
return str(first_arg.model_id)
|
|
141
|
+
if hasattr(first_arg, 'model_name'):
|
|
142
|
+
return str(first_arg.model_name)
|
|
143
|
+
|
|
144
|
+
return None
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def track_llm_call(
|
|
148
|
+
name: Optional[str] = None,
|
|
149
|
+
*,
|
|
150
|
+
tags: Optional[Dict[str, Any]] = None,
|
|
151
|
+
extract_output: bool = True,
|
|
152
|
+
model: Optional[str] = None, # Allow specifying model explicitly
|
|
153
|
+
capture_locals: Union[bool, List[str]] = False,
|
|
154
|
+
capture_self: bool = False,
|
|
155
|
+
):
|
|
156
|
+
"""
|
|
157
|
+
Decorator to track LLM calls with Langfuse as generations.
|
|
158
|
+
|
|
159
|
+
Enhanced version inspired by veriskGO with proper input/output handling.
|
|
160
|
+
|
|
161
|
+
Args:
|
|
162
|
+
name: Custom name for the generation (defaults to function name)
|
|
163
|
+
tags: Metadata tags to attach to the generation
|
|
164
|
+
extract_output: Whether to extract text from LLM response (default True)
|
|
165
|
+
model: Model ID for cost calculation (e.g., "anthropic.claude-3-sonnet-20240229-v1:0")
|
|
166
|
+
capture_locals: Capture local variables (True/False or list of var names)
|
|
167
|
+
capture_self: Whether to capture 'self' variable (default False)
|
|
168
|
+
|
|
169
|
+
Usage:
|
|
170
|
+
@track_llm_call()
|
|
171
|
+
def call_bedrock(prompt):
|
|
172
|
+
response = bedrock.converse(...)
|
|
173
|
+
return response
|
|
174
|
+
|
|
175
|
+
@track_llm_call(name="summarize", tags={"model": "claude-3"})
|
|
176
|
+
async def summarize(text):
|
|
177
|
+
return await chain.ainvoke(...)
|
|
178
|
+
|
|
179
|
+
Args:
|
|
180
|
+
name: Optional custom name for the generation
|
|
181
|
+
tags: Optional metadata tags
|
|
182
|
+
extract_output: Whether to extract text from LLM response
|
|
183
|
+
"""
|
|
184
|
+
def decorator(func):
|
|
185
|
+
span_name = name or func.__name__
|
|
186
|
+
is_async = inspect.iscoroutinefunction(func)
|
|
187
|
+
|
|
188
|
+
if is_async:
|
|
189
|
+
@functools.wraps(func)
|
|
190
|
+
async def async_wrapper(*args, **kwargs):
|
|
191
|
+
if not TraceManager.has_active_trace():
|
|
192
|
+
return await func(*args, **kwargs)
|
|
193
|
+
|
|
194
|
+
# Setup local variable capture
|
|
195
|
+
from .trace_manager import capture_function_locals, serialize_value as tm_serialize
|
|
196
|
+
tracer, locals_before, locals_after = capture_function_locals(
|
|
197
|
+
func, capture_locals=capture_locals, capture_self=capture_self
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
# Extract callback BEFORE execution for post-execution token reading
|
|
201
|
+
bedrock_callback = None
|
|
202
|
+
config = kwargs.get('config', {})
|
|
203
|
+
callbacks = config.get('callbacks', [])
|
|
204
|
+
for cb in callbacks:
|
|
205
|
+
# Check if it's a Bedrock token usage callback
|
|
206
|
+
if hasattr(cb, 'prompt_tokens') and hasattr(cb, 'completion_tokens'):
|
|
207
|
+
bedrock_callback = cb
|
|
208
|
+
break
|
|
209
|
+
|
|
210
|
+
# Build input - extract prompt if first arg is string
|
|
211
|
+
if args and isinstance(args[0], str):
|
|
212
|
+
input_data = {
|
|
213
|
+
"prompt": args[0],
|
|
214
|
+
"args": args[1:],
|
|
215
|
+
"kwargs": kwargs,
|
|
216
|
+
}
|
|
217
|
+
else:
|
|
218
|
+
input_data = {
|
|
219
|
+
"args": args,
|
|
220
|
+
"kwargs": kwargs,
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
# Start observation context (this will be parent for nested calls)
|
|
224
|
+
obs_ctx = TraceManager.start_observation_context(span_name, "generation", input_data)
|
|
225
|
+
|
|
226
|
+
if not obs_ctx:
|
|
227
|
+
return await func(*args, **kwargs)
|
|
228
|
+
|
|
229
|
+
error = None
|
|
230
|
+
result = None
|
|
231
|
+
start_time = time.time()
|
|
232
|
+
|
|
233
|
+
# Use the observation context properly with 'with' statement
|
|
234
|
+
with obs_ctx as obs:
|
|
235
|
+
# Push observation onto stack so nested calls become children
|
|
236
|
+
TraceManager.push_observation(obs)
|
|
237
|
+
|
|
238
|
+
if tracer:
|
|
239
|
+
sys.settrace(tracer)
|
|
240
|
+
|
|
241
|
+
try:
|
|
242
|
+
result = await func(*args, **kwargs)
|
|
243
|
+
except Exception as e:
|
|
244
|
+
error = e
|
|
245
|
+
raise
|
|
246
|
+
finally:
|
|
247
|
+
if tracer:
|
|
248
|
+
sys.settrace(None)
|
|
249
|
+
|
|
250
|
+
# Pop observation from stack
|
|
251
|
+
TraceManager.pop_observation()
|
|
252
|
+
duration_ms = int((time.time() - start_time) * 1000)
|
|
253
|
+
|
|
254
|
+
# Extract model info - use decorator param if provided
|
|
255
|
+
model_name = model or extract_model_info(args, kwargs)
|
|
256
|
+
|
|
257
|
+
# Extract token usage - Try BOTH callback AND response
|
|
258
|
+
usage_info = None
|
|
259
|
+
total_cost = None
|
|
260
|
+
|
|
261
|
+
# First, try callback (for LangChain with BedrockAnthropicTokenUsageCallbackHandler)
|
|
262
|
+
if bedrock_callback:
|
|
263
|
+
if hasattr(bedrock_callback, 'total_tokens') and bedrock_callback.total_tokens > 0:
|
|
264
|
+
usage_info = {
|
|
265
|
+
"input_tokens": getattr(bedrock_callback, 'prompt_tokens', 0),
|
|
266
|
+
"output_tokens": getattr(bedrock_callback, 'completion_tokens', 0),
|
|
267
|
+
"total_tokens": getattr(bedrock_callback, 'total_tokens', 0)
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
# Get cost from callback if available
|
|
271
|
+
if hasattr(bedrock_callback, 'total_cost'):
|
|
272
|
+
total_cost = getattr(bedrock_callback, 'total_cost', 0)
|
|
273
|
+
|
|
274
|
+
# Also try extracting from response (works for direct Bedrock calls)
|
|
275
|
+
if result and not error:
|
|
276
|
+
response_usage = extract_usage(result, kwargs)
|
|
277
|
+
if response_usage:
|
|
278
|
+
# Use response usage if no callback usage
|
|
279
|
+
if not usage_info:
|
|
280
|
+
usage_info = response_usage
|
|
281
|
+
|
|
282
|
+
# Build output
|
|
283
|
+
if error:
|
|
284
|
+
output_data = {
|
|
285
|
+
"status": "error",
|
|
286
|
+
"error": str(error),
|
|
287
|
+
"stacktrace": traceback.format_exc(),
|
|
288
|
+
"locals_before": locals_before,
|
|
289
|
+
"locals_after": locals_after,
|
|
290
|
+
}
|
|
291
|
+
else:
|
|
292
|
+
# Extract text from response if enabled
|
|
293
|
+
if extract_output:
|
|
294
|
+
try:
|
|
295
|
+
text_output = extract_text(result)
|
|
296
|
+
output_data = {
|
|
297
|
+
"status": "success",
|
|
298
|
+
"text": text_output,
|
|
299
|
+
"raw": result,
|
|
300
|
+
"locals_before": locals_before,
|
|
301
|
+
"locals_after": locals_after,
|
|
302
|
+
}
|
|
303
|
+
except Exception:
|
|
304
|
+
output_data = {
|
|
305
|
+
"status": "success",
|
|
306
|
+
"raw": result,
|
|
307
|
+
"locals_before": locals_before,
|
|
308
|
+
"locals_after": locals_after,
|
|
309
|
+
}
|
|
310
|
+
else:
|
|
311
|
+
output_data = {
|
|
312
|
+
"status": "success",
|
|
313
|
+
"raw": result,
|
|
314
|
+
"locals_before": locals_before,
|
|
315
|
+
"locals_after": locals_after,
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
# Update observation with output, usage, and model
|
|
319
|
+
from .trace_manager import serialize_value
|
|
320
|
+
from .config import get_langfuse_client
|
|
321
|
+
|
|
322
|
+
# Build base update params
|
|
323
|
+
update_params = {
|
|
324
|
+
"output": serialize_value(output_data),
|
|
325
|
+
"metadata": tags or {},
|
|
326
|
+
"level": "ERROR" if error else "DEFAULT",
|
|
327
|
+
"status_message": str(error) if error else None,
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
# Add model info if available
|
|
331
|
+
if model_name:
|
|
332
|
+
update_params["model"] = model_name
|
|
333
|
+
|
|
334
|
+
# Add usage info using Langfuse's usage_details parameter
|
|
335
|
+
if usage_info:
|
|
336
|
+
# Langfuse expects usage_details with input/output/total keys
|
|
337
|
+
update_params["usage_details"] = {
|
|
338
|
+
"input": usage_info.get("input_tokens", 0),
|
|
339
|
+
"output": usage_info.get("output_tokens", 0),
|
|
340
|
+
"total": usage_info.get("total_tokens", 0),
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
# Calculate cost based on model and tokens
|
|
344
|
+
from .pricing import calculate_cost
|
|
345
|
+
|
|
346
|
+
# Always calculate cost breakdown from tokens and model
|
|
347
|
+
cost_dict = calculate_cost(
|
|
348
|
+
input_tokens=usage_info.get("input_tokens", 0),
|
|
349
|
+
output_tokens=usage_info.get("output_tokens", 0),
|
|
350
|
+
model_id=model_name
|
|
351
|
+
)
|
|
352
|
+
|
|
353
|
+
# Use callback cost if available, otherwise use calculated
|
|
354
|
+
if total_cost is None or total_cost == 0:
|
|
355
|
+
total_cost = cost_dict["total"]
|
|
356
|
+
|
|
357
|
+
# Add cost_details if available
|
|
358
|
+
if total_cost is not None and total_cost > 0:
|
|
359
|
+
cost_details_value = {
|
|
360
|
+
"input": cost_dict["input"],
|
|
361
|
+
"output": cost_dict["output"],
|
|
362
|
+
"total": total_cost,
|
|
363
|
+
}
|
|
364
|
+
update_params["cost_details"] = cost_details_value
|
|
365
|
+
|
|
366
|
+
# Use Langfuse's update_current_generation() instead of obs.update()
|
|
367
|
+
langfuse = get_langfuse_client()
|
|
368
|
+
langfuse.update_current_generation(**update_params)
|
|
369
|
+
|
|
370
|
+
# Flush after exiting context
|
|
371
|
+
from .config import get_langfuse_client
|
|
372
|
+
langfuse = get_langfuse_client()
|
|
373
|
+
langfuse.flush()
|
|
374
|
+
|
|
375
|
+
status_str = " (error)" if error else ""
|
|
376
|
+
usage_str = f" [{usage_info.get('total_tokens', 0)} tokens]" if usage_info else ""
|
|
377
|
+
print(f"[LLMOps-Observability] Generation sent{status_str}: {span_name} ({duration_ms}ms){usage_str}")
|
|
378
|
+
|
|
379
|
+
return result
|
|
380
|
+
|
|
381
|
+
return async_wrapper
|
|
382
|
+
else:
|
|
383
|
+
@functools.wraps(func)
|
|
384
|
+
def sync_wrapper(*args, **kwargs):
|
|
385
|
+
if not TraceManager.has_active_trace():
|
|
386
|
+
return func(*args, **kwargs)
|
|
387
|
+
|
|
388
|
+
# Setup local variable capture
|
|
389
|
+
from .trace_manager import capture_function_locals, serialize_value as tm_serialize
|
|
390
|
+
tracer, locals_before, locals_after = capture_function_locals(
|
|
391
|
+
func, capture_locals=capture_locals, capture_self=capture_self
|
|
392
|
+
)
|
|
393
|
+
|
|
394
|
+
# Extract callback BEFORE execution for post-execution token reading
|
|
395
|
+
bedrock_callback = None
|
|
396
|
+
config = kwargs.get('config', {})
|
|
397
|
+
callbacks = config.get('callbacks', [])
|
|
398
|
+
for cb in callbacks:
|
|
399
|
+
# Check if it's a Bedrock token usage callback
|
|
400
|
+
if hasattr(cb, 'prompt_tokens') and hasattr(cb, 'completion_tokens'):
|
|
401
|
+
bedrock_callback = cb
|
|
402
|
+
break
|
|
403
|
+
|
|
404
|
+
# Build input - extract prompt if first arg is string
|
|
405
|
+
if args and isinstance(args[0], str):
|
|
406
|
+
input_data = {
|
|
407
|
+
"prompt": args[0],
|
|
408
|
+
"args": args[1:],
|
|
409
|
+
"kwargs": kwargs,
|
|
410
|
+
}
|
|
411
|
+
else:
|
|
412
|
+
input_data = {
|
|
413
|
+
"args": args,
|
|
414
|
+
"kwargs": kwargs,
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
# Start observation context (this will be parent for nested calls)
|
|
418
|
+
obs_ctx = TraceManager.start_observation_context(span_name, "generation", input_data)
|
|
419
|
+
|
|
420
|
+
if not obs_ctx:
|
|
421
|
+
return func(*args, **kwargs)
|
|
422
|
+
|
|
423
|
+
error = None
|
|
424
|
+
result = None
|
|
425
|
+
start_time = time.time()
|
|
426
|
+
|
|
427
|
+
# Use the observation context properly with 'with' statement
|
|
428
|
+
with obs_ctx as obs:
|
|
429
|
+
# Push observation onto stack so nested calls become children
|
|
430
|
+
TraceManager.push_observation(obs)
|
|
431
|
+
|
|
432
|
+
if tracer:
|
|
433
|
+
sys.settrace(tracer)
|
|
434
|
+
|
|
435
|
+
try:
|
|
436
|
+
result = func(*args, **kwargs)
|
|
437
|
+
except Exception as e:
|
|
438
|
+
error = e
|
|
439
|
+
raise
|
|
440
|
+
finally:
|
|
441
|
+
if tracer:
|
|
442
|
+
sys.settrace(None)
|
|
443
|
+
|
|
444
|
+
# Pop observation from stack
|
|
445
|
+
TraceManager.pop_observation()
|
|
446
|
+
|
|
447
|
+
duration_ms = int((time.time() - start_time) * 1000)
|
|
448
|
+
|
|
449
|
+
# Extract model info - use decorator param if provided
|
|
450
|
+
model_name = model or extract_model_info(args, kwargs)
|
|
451
|
+
|
|
452
|
+
# Extract token usage - Try BOTH callback AND response
|
|
453
|
+
usage_info = None
|
|
454
|
+
total_cost = None
|
|
455
|
+
|
|
456
|
+
# First, try callback (for LangChain with BedrockAnthropicTokenUsageCallbackHandler)
|
|
457
|
+
if bedrock_callback:
|
|
458
|
+
if hasattr(bedrock_callback, 'total_tokens') and bedrock_callback.total_tokens > 0:
|
|
459
|
+
usage_info = {
|
|
460
|
+
"input_tokens": getattr(bedrock_callback, 'prompt_tokens', 0),
|
|
461
|
+
"output_tokens": getattr(bedrock_callback, 'completion_tokens', 0),
|
|
462
|
+
"total_tokens": getattr(bedrock_callback, 'total_tokens', 0)
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
# Get cost from callback if available
|
|
466
|
+
if hasattr(bedrock_callback, 'total_cost'):
|
|
467
|
+
total_cost = getattr(bedrock_callback, 'total_cost', 0)
|
|
468
|
+
|
|
469
|
+
# Also try extracting from response (works for direct Bedrock calls)
|
|
470
|
+
if result and not error:
|
|
471
|
+
response_usage = extract_usage(result, kwargs)
|
|
472
|
+
if response_usage:
|
|
473
|
+
# Use response usage if no callback usage
|
|
474
|
+
if not usage_info:
|
|
475
|
+
usage_info = response_usage
|
|
476
|
+
|
|
477
|
+
# Build output
|
|
478
|
+
if error:
|
|
479
|
+
output_data = {
|
|
480
|
+
"status": "error",
|
|
481
|
+
"error": str(error),
|
|
482
|
+
"stacktrace": traceback.format_exc(),
|
|
483
|
+
"locals_before": locals_before,
|
|
484
|
+
"locals_after": locals_after,
|
|
485
|
+
}
|
|
486
|
+
else:
|
|
487
|
+
# Extract text from response if enabled
|
|
488
|
+
if extract_output:
|
|
489
|
+
try:
|
|
490
|
+
text_output = extract_text(result)
|
|
491
|
+
output_data = {
|
|
492
|
+
"status": "success",
|
|
493
|
+
"text": text_output,
|
|
494
|
+
"raw": result,
|
|
495
|
+
"locals_before": locals_before,
|
|
496
|
+
"locals_after": locals_after,
|
|
497
|
+
}
|
|
498
|
+
except Exception:
|
|
499
|
+
output_data = {
|
|
500
|
+
"status": "success",
|
|
501
|
+
"raw": result,
|
|
502
|
+
"locals_before": locals_before,
|
|
503
|
+
"locals_after": locals_after,
|
|
504
|
+
}
|
|
505
|
+
else:
|
|
506
|
+
output_data = {
|
|
507
|
+
"status": "success",
|
|
508
|
+
"raw": result,
|
|
509
|
+
"locals_before": locals_before,
|
|
510
|
+
"locals_after": locals_after,
|
|
511
|
+
}
|
|
512
|
+
|
|
513
|
+
# Update observation with output, usage, and model
|
|
514
|
+
from .trace_manager import serialize_value
|
|
515
|
+
from .config import get_langfuse_client
|
|
516
|
+
|
|
517
|
+
# Build base update params
|
|
518
|
+
update_params = {
|
|
519
|
+
"output": serialize_value(output_data),
|
|
520
|
+
"metadata": tags or {},
|
|
521
|
+
"level": "ERROR" if error else "DEFAULT",
|
|
522
|
+
"status_message": str(error) if error else None,
|
|
523
|
+
}
|
|
524
|
+
|
|
525
|
+
# Add model info if available
|
|
526
|
+
if model_name:
|
|
527
|
+
update_params["model"] = model_name
|
|
528
|
+
|
|
529
|
+
# Add usage info using Langfuse's usage_details parameter
|
|
530
|
+
if usage_info:
|
|
531
|
+
# Langfuse expects usage_details with input/output/total keys
|
|
532
|
+
update_params["usage_details"] = {
|
|
533
|
+
"input": usage_info.get("input_tokens", 0),
|
|
534
|
+
"output": usage_info.get("output_tokens", 0),
|
|
535
|
+
"total": usage_info.get("total_tokens", 0),
|
|
536
|
+
}
|
|
537
|
+
|
|
538
|
+
# Calculate cost based on model and tokens
|
|
539
|
+
from .pricing import calculate_cost
|
|
540
|
+
|
|
541
|
+
# Always calculate cost breakdown from tokens and model
|
|
542
|
+
cost_dict = calculate_cost(
|
|
543
|
+
input_tokens=usage_info.get("input_tokens", 0),
|
|
544
|
+
output_tokens=usage_info.get("output_tokens", 0),
|
|
545
|
+
model_id=model_name
|
|
546
|
+
)
|
|
547
|
+
|
|
548
|
+
# Use callback cost if available, otherwise use calculated
|
|
549
|
+
if total_cost is None or total_cost == 0:
|
|
550
|
+
total_cost = cost_dict["total"]
|
|
551
|
+
|
|
552
|
+
# Add cost_details if available
|
|
553
|
+
if total_cost is not None and total_cost > 0:
|
|
554
|
+
cost_details_value = {
|
|
555
|
+
"input": cost_dict["input"],
|
|
556
|
+
"output": cost_dict["output"],
|
|
557
|
+
"total": total_cost,
|
|
558
|
+
}
|
|
559
|
+
update_params["cost_details"] = cost_details_value
|
|
560
|
+
|
|
561
|
+
# Use Langfuse's update_current_generation() instead of obs.update()
|
|
562
|
+
langfuse = get_langfuse_client()
|
|
563
|
+
langfuse.update_current_generation(**update_params)
|
|
564
|
+
|
|
565
|
+
# Flush after exiting context
|
|
566
|
+
from .config import get_langfuse_client
|
|
567
|
+
langfuse = get_langfuse_client()
|
|
568
|
+
langfuse.flush()
|
|
569
|
+
|
|
570
|
+
status_str = " (error)" if error else ""
|
|
571
|
+
usage_str = f" [{usage_info.get('total_tokens', 0)} tokens]" if usage_info else ""
|
|
572
|
+
print(f"[LLMOps-Observability] Generation sent{status_str}: {span_name} ({duration_ms}ms){usage_str}")
|
|
573
|
+
|
|
574
|
+
return result
|
|
575
|
+
|
|
576
|
+
return sync_wrapper
|
|
577
|
+
|
|
578
|
+
return decorator
|
|
579
|
+
|
|
580
|
+
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Data models for LLMOps Observability
|
|
3
|
+
"""
|
|
4
|
+
from dataclasses import dataclass, field
|
|
5
|
+
from typing import Optional, Dict, Any
|
|
6
|
+
import time
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class SpanContext:
|
|
11
|
+
"""
|
|
12
|
+
Context holder for span execution.
|
|
13
|
+
Provides all necessary data for span creation and finalization.
|
|
14
|
+
"""
|
|
15
|
+
trace_id: str
|
|
16
|
+
span_id: str
|
|
17
|
+
parent_span_id: Optional[str]
|
|
18
|
+
start_time: float
|
|
19
|
+
span_name: str
|
|
20
|
+
span_type: str = "span" # "span" or "generation"
|
|
21
|
+
tags: Optional[Dict[str, Any]] = None
|
|
22
|
+
metadata: Dict[str, Any] = field(default_factory=dict)
|
|
23
|
+
|
|
24
|
+
# Captured inputs/outputs
|
|
25
|
+
input_data: Dict[str, Any] = field(default_factory=dict)
|
|
26
|
+
output_data: Optional[Any] = None
|
|
27
|
+
error: Optional[Exception] = None
|
|
28
|
+
|
|
29
|
+
@property
|
|
30
|
+
def duration_ms(self) -> int:
|
|
31
|
+
"""Calculate duration in milliseconds"""
|
|
32
|
+
return int((time.time() - self.start_time) * 1000)
|