llmops-observability 8.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,580 @@
1
+ """
2
+ LLM tracking decorator for LLMOps Observability
3
+ Direct Langfuse integration for tracking LLM calls
4
+ Enhanced with veriskGO-style input/output handling
5
+ """
6
+ from __future__ import annotations
7
+ import functools
8
+ import inspect
9
+ import sys
10
+ import time
11
+ import traceback
12
+ from typing import Optional, Dict, Any, List, Union
13
+ from .trace_manager import TraceManager
14
+
15
+
16
+ def extract_text(resp: Any) -> str:
17
+ """
18
+ Extract text from various LLM response formats.
19
+ Supports: Bedrock Converse, Bedrock InvokeModel, OpenAI, LangChain, etc.
20
+ """
21
+ if isinstance(resp, str):
22
+ return resp
23
+
24
+ if not isinstance(resp, dict):
25
+ return str(resp)
26
+
27
+ # Bedrock Converse API
28
+ try:
29
+ return resp["output"]["message"]["content"][0]["text"]
30
+ except (KeyError, IndexError, TypeError):
31
+ pass
32
+
33
+ # Anthropic Messages API
34
+ try:
35
+ return resp["content"][0]["text"]
36
+ except (KeyError, IndexError, TypeError):
37
+ pass
38
+
39
+ # Amazon Titan
40
+ try:
41
+ return resp["results"][0]["outputText"]
42
+ except (KeyError, IndexError, TypeError):
43
+ pass
44
+
45
+ # Cohere
46
+ try:
47
+ return resp["generation"]
48
+ except (KeyError, TypeError):
49
+ pass
50
+
51
+ # AI21
52
+ try:
53
+ return resp["outputs"][0]["text"]
54
+ except (KeyError, IndexError, TypeError):
55
+ pass
56
+
57
+ # Generic text field
58
+ try:
59
+ return resp["text"]
60
+ except (KeyError, TypeError):
61
+ pass
62
+
63
+ # OpenAI format
64
+ try:
65
+ return resp["choices"][0]["message"]["content"]
66
+ except (KeyError, IndexError, TypeError):
67
+ pass
68
+
69
+ return str(resp)
70
+
71
+
72
+ def extract_usage(result: Any, kwargs: Dict[str, Any]) -> Optional[Dict[str, int]]:
73
+ """
74
+ Extract token usage from LLM response or callback.
75
+
76
+ Returns:
77
+ Dict with input_tokens, output_tokens, total_tokens or None
78
+ """
79
+ usage = {}
80
+
81
+ # Check if result has usage attribute (OpenAI, Anthropic direct)
82
+ if hasattr(result, 'usage'):
83
+ usage_obj = result.usage
84
+ if hasattr(usage_obj, 'prompt_tokens'):
85
+ usage['input_tokens'] = usage_obj.prompt_tokens
86
+ if hasattr(usage_obj, 'completion_tokens'):
87
+ usage['output_tokens'] = usage_obj.completion_tokens
88
+ if hasattr(usage_obj, 'total_tokens'):
89
+ usage['total_tokens'] = usage_obj.total_tokens
90
+ return usage if usage else None
91
+
92
+ # Check Bedrock response format
93
+ if isinstance(result, dict):
94
+ # Bedrock Converse API
95
+ if 'usage' in result:
96
+ usage_data = result['usage']
97
+ if 'inputTokens' in usage_data:
98
+ usage['input_tokens'] = usage_data['inputTokens']
99
+ if 'outputTokens' in usage_data:
100
+ usage['output_tokens'] = usage_data['outputTokens']
101
+ if 'totalTokens' in usage_data:
102
+ usage['total_tokens'] = usage_data['totalTokens']
103
+ return usage if usage else None
104
+
105
+ # Check for LangChain callbacks in kwargs
106
+ config = kwargs.get('config', {})
107
+ callbacks = config.get('callbacks', [])
108
+ for callback in callbacks:
109
+ # Bedrock Anthropic callback
110
+ if hasattr(callback, 'prompt_tokens'):
111
+ usage['input_tokens'] = callback.prompt_tokens
112
+ if hasattr(callback, 'completion_tokens'):
113
+ usage['output_tokens'] = callback.completion_tokens
114
+ if hasattr(callback, 'total_tokens'):
115
+ usage['total_tokens'] = callback.total_tokens
116
+ if usage:
117
+ return usage
118
+
119
+ return None
120
+
121
+
122
+ def extract_model_info(args: tuple, kwargs: Dict[str, Any]) -> Optional[str]:
123
+ """
124
+ Extract model name from function arguments.
125
+
126
+ Returns:
127
+ Model name string or None
128
+ """
129
+ # Check kwargs for common model parameter names
130
+ for key in ['model', 'model_id', 'model_name', 'modelId']:
131
+ if key in kwargs:
132
+ return str(kwargs[key])
133
+
134
+ # Check if first arg has model attribute (LangChain model objects)
135
+ if args:
136
+ first_arg = args[0]
137
+ if hasattr(first_arg, 'model'):
138
+ return str(first_arg.model)
139
+ if hasattr(first_arg, 'model_id'):
140
+ return str(first_arg.model_id)
141
+ if hasattr(first_arg, 'model_name'):
142
+ return str(first_arg.model_name)
143
+
144
+ return None
145
+
146
+
147
+ def track_llm_call(
148
+ name: Optional[str] = None,
149
+ *,
150
+ tags: Optional[Dict[str, Any]] = None,
151
+ extract_output: bool = True,
152
+ model: Optional[str] = None, # Allow specifying model explicitly
153
+ capture_locals: Union[bool, List[str]] = False,
154
+ capture_self: bool = False,
155
+ ):
156
+ """
157
+ Decorator to track LLM calls with Langfuse as generations.
158
+
159
+ Enhanced version inspired by veriskGO with proper input/output handling.
160
+
161
+ Args:
162
+ name: Custom name for the generation (defaults to function name)
163
+ tags: Metadata tags to attach to the generation
164
+ extract_output: Whether to extract text from LLM response (default True)
165
+ model: Model ID for cost calculation (e.g., "anthropic.claude-3-sonnet-20240229-v1:0")
166
+ capture_locals: Capture local variables (True/False or list of var names)
167
+ capture_self: Whether to capture 'self' variable (default False)
168
+
169
+ Usage:
170
+ @track_llm_call()
171
+ def call_bedrock(prompt):
172
+ response = bedrock.converse(...)
173
+ return response
174
+
175
+ @track_llm_call(name="summarize", tags={"model": "claude-3"})
176
+ async def summarize(text):
177
+ return await chain.ainvoke(...)
178
+
179
+ Args:
180
+ name: Optional custom name for the generation
181
+ tags: Optional metadata tags
182
+ extract_output: Whether to extract text from LLM response
183
+ """
184
+ def decorator(func):
185
+ span_name = name or func.__name__
186
+ is_async = inspect.iscoroutinefunction(func)
187
+
188
+ if is_async:
189
+ @functools.wraps(func)
190
+ async def async_wrapper(*args, **kwargs):
191
+ if not TraceManager.has_active_trace():
192
+ return await func(*args, **kwargs)
193
+
194
+ # Setup local variable capture
195
+ from .trace_manager import capture_function_locals, serialize_value as tm_serialize
196
+ tracer, locals_before, locals_after = capture_function_locals(
197
+ func, capture_locals=capture_locals, capture_self=capture_self
198
+ )
199
+
200
+ # Extract callback BEFORE execution for post-execution token reading
201
+ bedrock_callback = None
202
+ config = kwargs.get('config', {})
203
+ callbacks = config.get('callbacks', [])
204
+ for cb in callbacks:
205
+ # Check if it's a Bedrock token usage callback
206
+ if hasattr(cb, 'prompt_tokens') and hasattr(cb, 'completion_tokens'):
207
+ bedrock_callback = cb
208
+ break
209
+
210
+ # Build input - extract prompt if first arg is string
211
+ if args and isinstance(args[0], str):
212
+ input_data = {
213
+ "prompt": args[0],
214
+ "args": args[1:],
215
+ "kwargs": kwargs,
216
+ }
217
+ else:
218
+ input_data = {
219
+ "args": args,
220
+ "kwargs": kwargs,
221
+ }
222
+
223
+ # Start observation context (this will be parent for nested calls)
224
+ obs_ctx = TraceManager.start_observation_context(span_name, "generation", input_data)
225
+
226
+ if not obs_ctx:
227
+ return await func(*args, **kwargs)
228
+
229
+ error = None
230
+ result = None
231
+ start_time = time.time()
232
+
233
+ # Use the observation context properly with 'with' statement
234
+ with obs_ctx as obs:
235
+ # Push observation onto stack so nested calls become children
236
+ TraceManager.push_observation(obs)
237
+
238
+ if tracer:
239
+ sys.settrace(tracer)
240
+
241
+ try:
242
+ result = await func(*args, **kwargs)
243
+ except Exception as e:
244
+ error = e
245
+ raise
246
+ finally:
247
+ if tracer:
248
+ sys.settrace(None)
249
+
250
+ # Pop observation from stack
251
+ TraceManager.pop_observation()
252
+ duration_ms = int((time.time() - start_time) * 1000)
253
+
254
+ # Extract model info - use decorator param if provided
255
+ model_name = model or extract_model_info(args, kwargs)
256
+
257
+ # Extract token usage - Try BOTH callback AND response
258
+ usage_info = None
259
+ total_cost = None
260
+
261
+ # First, try callback (for LangChain with BedrockAnthropicTokenUsageCallbackHandler)
262
+ if bedrock_callback:
263
+ if hasattr(bedrock_callback, 'total_tokens') and bedrock_callback.total_tokens > 0:
264
+ usage_info = {
265
+ "input_tokens": getattr(bedrock_callback, 'prompt_tokens', 0),
266
+ "output_tokens": getattr(bedrock_callback, 'completion_tokens', 0),
267
+ "total_tokens": getattr(bedrock_callback, 'total_tokens', 0)
268
+ }
269
+
270
+ # Get cost from callback if available
271
+ if hasattr(bedrock_callback, 'total_cost'):
272
+ total_cost = getattr(bedrock_callback, 'total_cost', 0)
273
+
274
+ # Also try extracting from response (works for direct Bedrock calls)
275
+ if result and not error:
276
+ response_usage = extract_usage(result, kwargs)
277
+ if response_usage:
278
+ # Use response usage if no callback usage
279
+ if not usage_info:
280
+ usage_info = response_usage
281
+
282
+ # Build output
283
+ if error:
284
+ output_data = {
285
+ "status": "error",
286
+ "error": str(error),
287
+ "stacktrace": traceback.format_exc(),
288
+ "locals_before": locals_before,
289
+ "locals_after": locals_after,
290
+ }
291
+ else:
292
+ # Extract text from response if enabled
293
+ if extract_output:
294
+ try:
295
+ text_output = extract_text(result)
296
+ output_data = {
297
+ "status": "success",
298
+ "text": text_output,
299
+ "raw": result,
300
+ "locals_before": locals_before,
301
+ "locals_after": locals_after,
302
+ }
303
+ except Exception:
304
+ output_data = {
305
+ "status": "success",
306
+ "raw": result,
307
+ "locals_before": locals_before,
308
+ "locals_after": locals_after,
309
+ }
310
+ else:
311
+ output_data = {
312
+ "status": "success",
313
+ "raw": result,
314
+ "locals_before": locals_before,
315
+ "locals_after": locals_after,
316
+ }
317
+
318
+ # Update observation with output, usage, and model
319
+ from .trace_manager import serialize_value
320
+ from .config import get_langfuse_client
321
+
322
+ # Build base update params
323
+ update_params = {
324
+ "output": serialize_value(output_data),
325
+ "metadata": tags or {},
326
+ "level": "ERROR" if error else "DEFAULT",
327
+ "status_message": str(error) if error else None,
328
+ }
329
+
330
+ # Add model info if available
331
+ if model_name:
332
+ update_params["model"] = model_name
333
+
334
+ # Add usage info using Langfuse's usage_details parameter
335
+ if usage_info:
336
+ # Langfuse expects usage_details with input/output/total keys
337
+ update_params["usage_details"] = {
338
+ "input": usage_info.get("input_tokens", 0),
339
+ "output": usage_info.get("output_tokens", 0),
340
+ "total": usage_info.get("total_tokens", 0),
341
+ }
342
+
343
+ # Calculate cost based on model and tokens
344
+ from .pricing import calculate_cost
345
+
346
+ # Always calculate cost breakdown from tokens and model
347
+ cost_dict = calculate_cost(
348
+ input_tokens=usage_info.get("input_tokens", 0),
349
+ output_tokens=usage_info.get("output_tokens", 0),
350
+ model_id=model_name
351
+ )
352
+
353
+ # Use callback cost if available, otherwise use calculated
354
+ if total_cost is None or total_cost == 0:
355
+ total_cost = cost_dict["total"]
356
+
357
+ # Add cost_details if available
358
+ if total_cost is not None and total_cost > 0:
359
+ cost_details_value = {
360
+ "input": cost_dict["input"],
361
+ "output": cost_dict["output"],
362
+ "total": total_cost,
363
+ }
364
+ update_params["cost_details"] = cost_details_value
365
+
366
+ # Use Langfuse's update_current_generation() instead of obs.update()
367
+ langfuse = get_langfuse_client()
368
+ langfuse.update_current_generation(**update_params)
369
+
370
+ # Flush after exiting context
371
+ from .config import get_langfuse_client
372
+ langfuse = get_langfuse_client()
373
+ langfuse.flush()
374
+
375
+ status_str = " (error)" if error else ""
376
+ usage_str = f" [{usage_info.get('total_tokens', 0)} tokens]" if usage_info else ""
377
+ print(f"[LLMOps-Observability] Generation sent{status_str}: {span_name} ({duration_ms}ms){usage_str}")
378
+
379
+ return result
380
+
381
+ return async_wrapper
382
+ else:
383
+ @functools.wraps(func)
384
+ def sync_wrapper(*args, **kwargs):
385
+ if not TraceManager.has_active_trace():
386
+ return func(*args, **kwargs)
387
+
388
+ # Setup local variable capture
389
+ from .trace_manager import capture_function_locals, serialize_value as tm_serialize
390
+ tracer, locals_before, locals_after = capture_function_locals(
391
+ func, capture_locals=capture_locals, capture_self=capture_self
392
+ )
393
+
394
+ # Extract callback BEFORE execution for post-execution token reading
395
+ bedrock_callback = None
396
+ config = kwargs.get('config', {})
397
+ callbacks = config.get('callbacks', [])
398
+ for cb in callbacks:
399
+ # Check if it's a Bedrock token usage callback
400
+ if hasattr(cb, 'prompt_tokens') and hasattr(cb, 'completion_tokens'):
401
+ bedrock_callback = cb
402
+ break
403
+
404
+ # Build input - extract prompt if first arg is string
405
+ if args and isinstance(args[0], str):
406
+ input_data = {
407
+ "prompt": args[0],
408
+ "args": args[1:],
409
+ "kwargs": kwargs,
410
+ }
411
+ else:
412
+ input_data = {
413
+ "args": args,
414
+ "kwargs": kwargs,
415
+ }
416
+
417
+ # Start observation context (this will be parent for nested calls)
418
+ obs_ctx = TraceManager.start_observation_context(span_name, "generation", input_data)
419
+
420
+ if not obs_ctx:
421
+ return func(*args, **kwargs)
422
+
423
+ error = None
424
+ result = None
425
+ start_time = time.time()
426
+
427
+ # Use the observation context properly with 'with' statement
428
+ with obs_ctx as obs:
429
+ # Push observation onto stack so nested calls become children
430
+ TraceManager.push_observation(obs)
431
+
432
+ if tracer:
433
+ sys.settrace(tracer)
434
+
435
+ try:
436
+ result = func(*args, **kwargs)
437
+ except Exception as e:
438
+ error = e
439
+ raise
440
+ finally:
441
+ if tracer:
442
+ sys.settrace(None)
443
+
444
+ # Pop observation from stack
445
+ TraceManager.pop_observation()
446
+
447
+ duration_ms = int((time.time() - start_time) * 1000)
448
+
449
+ # Extract model info - use decorator param if provided
450
+ model_name = model or extract_model_info(args, kwargs)
451
+
452
+ # Extract token usage - Try BOTH callback AND response
453
+ usage_info = None
454
+ total_cost = None
455
+
456
+ # First, try callback (for LangChain with BedrockAnthropicTokenUsageCallbackHandler)
457
+ if bedrock_callback:
458
+ if hasattr(bedrock_callback, 'total_tokens') and bedrock_callback.total_tokens > 0:
459
+ usage_info = {
460
+ "input_tokens": getattr(bedrock_callback, 'prompt_tokens', 0),
461
+ "output_tokens": getattr(bedrock_callback, 'completion_tokens', 0),
462
+ "total_tokens": getattr(bedrock_callback, 'total_tokens', 0)
463
+ }
464
+
465
+ # Get cost from callback if available
466
+ if hasattr(bedrock_callback, 'total_cost'):
467
+ total_cost = getattr(bedrock_callback, 'total_cost', 0)
468
+
469
+ # Also try extracting from response (works for direct Bedrock calls)
470
+ if result and not error:
471
+ response_usage = extract_usage(result, kwargs)
472
+ if response_usage:
473
+ # Use response usage if no callback usage
474
+ if not usage_info:
475
+ usage_info = response_usage
476
+
477
+ # Build output
478
+ if error:
479
+ output_data = {
480
+ "status": "error",
481
+ "error": str(error),
482
+ "stacktrace": traceback.format_exc(),
483
+ "locals_before": locals_before,
484
+ "locals_after": locals_after,
485
+ }
486
+ else:
487
+ # Extract text from response if enabled
488
+ if extract_output:
489
+ try:
490
+ text_output = extract_text(result)
491
+ output_data = {
492
+ "status": "success",
493
+ "text": text_output,
494
+ "raw": result,
495
+ "locals_before": locals_before,
496
+ "locals_after": locals_after,
497
+ }
498
+ except Exception:
499
+ output_data = {
500
+ "status": "success",
501
+ "raw": result,
502
+ "locals_before": locals_before,
503
+ "locals_after": locals_after,
504
+ }
505
+ else:
506
+ output_data = {
507
+ "status": "success",
508
+ "raw": result,
509
+ "locals_before": locals_before,
510
+ "locals_after": locals_after,
511
+ }
512
+
513
+ # Update observation with output, usage, and model
514
+ from .trace_manager import serialize_value
515
+ from .config import get_langfuse_client
516
+
517
+ # Build base update params
518
+ update_params = {
519
+ "output": serialize_value(output_data),
520
+ "metadata": tags or {},
521
+ "level": "ERROR" if error else "DEFAULT",
522
+ "status_message": str(error) if error else None,
523
+ }
524
+
525
+ # Add model info if available
526
+ if model_name:
527
+ update_params["model"] = model_name
528
+
529
+ # Add usage info using Langfuse's usage_details parameter
530
+ if usage_info:
531
+ # Langfuse expects usage_details with input/output/total keys
532
+ update_params["usage_details"] = {
533
+ "input": usage_info.get("input_tokens", 0),
534
+ "output": usage_info.get("output_tokens", 0),
535
+ "total": usage_info.get("total_tokens", 0),
536
+ }
537
+
538
+ # Calculate cost based on model and tokens
539
+ from .pricing import calculate_cost
540
+
541
+ # Always calculate cost breakdown from tokens and model
542
+ cost_dict = calculate_cost(
543
+ input_tokens=usage_info.get("input_tokens", 0),
544
+ output_tokens=usage_info.get("output_tokens", 0),
545
+ model_id=model_name
546
+ )
547
+
548
+ # Use callback cost if available, otherwise use calculated
549
+ if total_cost is None or total_cost == 0:
550
+ total_cost = cost_dict["total"]
551
+
552
+ # Add cost_details if available
553
+ if total_cost is not None and total_cost > 0:
554
+ cost_details_value = {
555
+ "input": cost_dict["input"],
556
+ "output": cost_dict["output"],
557
+ "total": total_cost,
558
+ }
559
+ update_params["cost_details"] = cost_details_value
560
+
561
+ # Use Langfuse's update_current_generation() instead of obs.update()
562
+ langfuse = get_langfuse_client()
563
+ langfuse.update_current_generation(**update_params)
564
+
565
+ # Flush after exiting context
566
+ from .config import get_langfuse_client
567
+ langfuse = get_langfuse_client()
568
+ langfuse.flush()
569
+
570
+ status_str = " (error)" if error else ""
571
+ usage_str = f" [{usage_info.get('total_tokens', 0)} tokens]" if usage_info else ""
572
+ print(f"[LLMOps-Observability] Generation sent{status_str}: {span_name} ({duration_ms}ms){usage_str}")
573
+
574
+ return result
575
+
576
+ return sync_wrapper
577
+
578
+ return decorator
579
+
580
+
@@ -0,0 +1,32 @@
1
+ """
2
+ Data models for LLMOps Observability
3
+ """
4
+ from dataclasses import dataclass, field
5
+ from typing import Optional, Dict, Any
6
+ import time
7
+
8
+
9
+ @dataclass
10
+ class SpanContext:
11
+ """
12
+ Context holder for span execution.
13
+ Provides all necessary data for span creation and finalization.
14
+ """
15
+ trace_id: str
16
+ span_id: str
17
+ parent_span_id: Optional[str]
18
+ start_time: float
19
+ span_name: str
20
+ span_type: str = "span" # "span" or "generation"
21
+ tags: Optional[Dict[str, Any]] = None
22
+ metadata: Dict[str, Any] = field(default_factory=dict)
23
+
24
+ # Captured inputs/outputs
25
+ input_data: Dict[str, Any] = field(default_factory=dict)
26
+ output_data: Optional[Any] = None
27
+ error: Optional[Exception] = None
28
+
29
+ @property
30
+ def duration_ms(self) -> int:
31
+ """Calculate duration in milliseconds"""
32
+ return int((time.time() - self.start_time) * 1000)