llmops-observability 10.0.4__tar.gz → 10.0.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {llmops_observability-10.0.4 → llmops_observability-10.0.5}/PKG-INFO +5 -4
- {llmops_observability-10.0.4 → llmops_observability-10.0.5}/README.md +2 -2
- {llmops_observability-10.0.4 → llmops_observability-10.0.5}/pyproject.toml +3 -2
- {llmops_observability-10.0.4 → llmops_observability-10.0.5}/src/llmops_observability/__init__.py +1 -1
- {llmops_observability-10.0.4 → llmops_observability-10.0.5}/src/llmops_observability/asgi_middleware.py +1 -1
- {llmops_observability-10.0.4 → llmops_observability-10.0.5}/src/llmops_observability/config.py +26 -2
- {llmops_observability-10.0.4 → llmops_observability-10.0.5}/src/llmops_observability/llm.py +24 -1
- {llmops_observability-10.0.4 → llmops_observability-10.0.5}/src/llmops_observability/pricing.py +9 -0
- llmops_observability-10.0.5/src/llmops_observability/sqs.py +395 -0
- {llmops_observability-10.0.4 → llmops_observability-10.0.5}/src/llmops_observability/trace_manager.py +59 -1
- {llmops_observability-10.0.4 → llmops_observability-10.0.5}/src/llmops_observability.egg-info/PKG-INFO +5 -4
- {llmops_observability-10.0.4 → llmops_observability-10.0.5}/src/llmops_observability.egg-info/SOURCES.txt +1 -0
- {llmops_observability-10.0.4 → llmops_observability-10.0.5}/src/llmops_observability.egg-info/requires.txt +1 -0
- {llmops_observability-10.0.4 → llmops_observability-10.0.5}/setup.cfg +0 -0
- {llmops_observability-10.0.4 → llmops_observability-10.0.5}/src/llmops_observability/models.py +0 -0
- {llmops_observability-10.0.4 → llmops_observability-10.0.5}/src/llmops_observability.egg-info/dependency_links.txt +0 -0
- {llmops_observability-10.0.4 → llmops_observability-10.0.5}/src/llmops_observability.egg-info/top_level.txt +0 -0
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: llmops-observability
|
|
3
|
-
Version: 10.0.
|
|
4
|
-
Summary: LLMOps Observability SDK with direct Langfuse integration
|
|
3
|
+
Version: 10.0.5
|
|
4
|
+
Summary: LLMOps Observability SDK with direct Langfuse integration and SQS event streaming
|
|
5
5
|
Requires-Python: >=3.9
|
|
6
6
|
Description-Content-Type: text/markdown
|
|
7
7
|
Requires-Dist: langfuse>=2.0.0
|
|
8
8
|
Requires-Dist: httpx
|
|
9
9
|
Requires-Dist: python-dotenv
|
|
10
|
+
Requires-Dist: boto3
|
|
10
11
|
|
|
11
12
|
# LLMOps Observability SDK
|
|
12
13
|
|
|
@@ -15,7 +16,7 @@ A lightweight Python SDK for LLM observability with **direct Langfuse integratio
|
|
|
15
16
|
## Key Features
|
|
16
17
|
|
|
17
18
|
- ⚡ **Instant Tracing**: Sends traces directly to Langfuse in real-time
|
|
18
|
-
- 🎯 **Simple API**:
|
|
19
|
+
- 🎯 **Simple API**: (`@track_function`, `@track_llm_call`)
|
|
19
20
|
- 🚫 **No Complexity**: No SQS queues, no batching, no background workers
|
|
20
21
|
- 🔄 **Sync & Async**: Supports both synchronous and asynchronous functions
|
|
21
22
|
- 🎨 **Provider Agnostic**: Works with any LLM provider (Bedrock, OpenAI, Anthropic, etc.)
|
|
@@ -134,7 +135,7 @@ TraceManager.start_trace(
|
|
|
134
135
|
TraceManager.end_trace()
|
|
135
136
|
```
|
|
136
137
|
|
|
137
|
-
**Method 3: Using `finalize_and_send()` (
|
|
138
|
+
**Method 3: Using `finalize_and_send()` (llmops-observability)**
|
|
138
139
|
```python
|
|
139
140
|
# Start trace
|
|
140
141
|
TraceManager.start_trace(name="chat_session")
|
|
@@ -5,7 +5,7 @@ A lightweight Python SDK for LLM observability with **direct Langfuse integratio
|
|
|
5
5
|
## Key Features
|
|
6
6
|
|
|
7
7
|
- ⚡ **Instant Tracing**: Sends traces directly to Langfuse in real-time
|
|
8
|
-
- 🎯 **Simple API**:
|
|
8
|
+
- 🎯 **Simple API**: (`@track_function`, `@track_llm_call`)
|
|
9
9
|
- 🚫 **No Complexity**: No SQS queues, no batching, no background workers
|
|
10
10
|
- 🔄 **Sync & Async**: Supports both synchronous and asynchronous functions
|
|
11
11
|
- 🎨 **Provider Agnostic**: Works with any LLM provider (Bedrock, OpenAI, Anthropic, etc.)
|
|
@@ -124,7 +124,7 @@ TraceManager.start_trace(
|
|
|
124
124
|
TraceManager.end_trace()
|
|
125
125
|
```
|
|
126
126
|
|
|
127
|
-
**Method 3: Using `finalize_and_send()` (
|
|
127
|
+
**Method 3: Using `finalize_and_send()` (llmops-observability)**
|
|
128
128
|
```python
|
|
129
129
|
# Start trace
|
|
130
130
|
TraceManager.start_trace(name="chat_session")
|
|
@@ -1,13 +1,14 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "llmops-observability"
|
|
3
|
-
version = "10.0.
|
|
4
|
-
description = "LLMOps Observability SDK with direct Langfuse integration
|
|
3
|
+
version = "10.0.5"
|
|
4
|
+
description = "LLMOps Observability SDK with direct Langfuse integration and SQS event streaming"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
requires-python = ">=3.9"
|
|
7
7
|
dependencies = [
|
|
8
8
|
"langfuse>=2.0.0",
|
|
9
9
|
"httpx",
|
|
10
10
|
"python-dotenv",
|
|
11
|
+
"boto3",
|
|
11
12
|
]
|
|
12
13
|
|
|
13
14
|
[build-system]
|
{llmops_observability-10.0.4 → llmops_observability-10.0.5}/src/llmops_observability/__init__.py
RENAMED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"""
|
|
2
2
|
LLMOps Observability SDK – Public API
|
|
3
3
|
Direct Langfuse integration for LLM tracing without SQS/batching.
|
|
4
|
-
Enhanced with
|
|
4
|
+
Enhanced with llmops-observability features: locals capture, nested spans, instant sending.
|
|
5
5
|
"""
|
|
6
6
|
from importlib.metadata import version, PackageNotFoundError
|
|
7
7
|
|
{llmops_observability-10.0.4 → llmops_observability-10.0.5}/src/llmops_observability/config.py
RENAMED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Configuration management for LLMOps Observability
|
|
3
|
-
Direct Langfuse client configuration
|
|
3
|
+
Direct Langfuse client configuration + SQS event streaming
|
|
4
4
|
"""
|
|
5
5
|
import os
|
|
6
6
|
import logging
|
|
7
|
-
from typing import Optional
|
|
7
|
+
from typing import Optional, Dict, Any
|
|
8
8
|
from langfuse import Langfuse
|
|
9
9
|
import httpx
|
|
10
10
|
from dotenv import load_dotenv
|
|
@@ -113,3 +113,27 @@ def configure(
|
|
|
113
113
|
)
|
|
114
114
|
|
|
115
115
|
print(f"[LLMOps-Observability] Langfuse client configured: {base_url}")
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
# ============================================================
|
|
119
|
+
# SQS Configuration
|
|
120
|
+
# ============================================================
|
|
121
|
+
|
|
122
|
+
def get_sqs_config() -> Dict[str, Any]:
|
|
123
|
+
"""
|
|
124
|
+
Get SQS configuration from environment variables.
|
|
125
|
+
|
|
126
|
+
Environment variables:
|
|
127
|
+
- AWS_SQS_URL: SQS queue URL (required to enable SQS)
|
|
128
|
+
- AWS_PROFILE: AWS profile name (default: "default")
|
|
129
|
+
- AWS_REGION: AWS region (default: "us-east-1")
|
|
130
|
+
|
|
131
|
+
Returns:
|
|
132
|
+
Dict with SQS configuration
|
|
133
|
+
"""
|
|
134
|
+
return {
|
|
135
|
+
"aws_sqs_url": os.getenv("AWS_SQS_URL"),
|
|
136
|
+
"aws_profile": os.getenv("AWS_PROFILE", "default"),
|
|
137
|
+
"aws_region": os.getenv("AWS_REGION", "us-east-1"),
|
|
138
|
+
}
|
|
139
|
+
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"""
|
|
2
2
|
LLM tracking decorator for LLMOps Observability
|
|
3
3
|
Direct Langfuse integration for tracking LLM calls
|
|
4
|
-
Enhanced with
|
|
4
|
+
Enhanced with robust input/output handling and SQS event streaming
|
|
5
5
|
"""
|
|
6
6
|
from __future__ import annotations
|
|
7
7
|
import functools
|
|
@@ -11,6 +11,7 @@ import time
|
|
|
11
11
|
import traceback
|
|
12
12
|
from typing import Optional, Dict, Any, List, Union
|
|
13
13
|
from .trace_manager import TraceManager
|
|
14
|
+
from .sqs import send_to_sqs, is_sqs_enabled
|
|
14
15
|
|
|
15
16
|
|
|
16
17
|
def extract_text(resp: Any) -> str:
|
|
@@ -569,6 +570,28 @@ def track_llm_call(
|
|
|
569
570
|
# Use Langfuse's update_current_generation() instead of obs.update()
|
|
570
571
|
langfuse = get_langfuse_client()
|
|
571
572
|
langfuse.update_current_generation(**update_params)
|
|
573
|
+
|
|
574
|
+
# Send span event to SQS (non-blocking, independent of Langfuse)
|
|
575
|
+
if is_sqs_enabled() and TraceManager.has_active_trace():
|
|
576
|
+
trace_id = TraceManager._active.get("trace_id")
|
|
577
|
+
if trace_id:
|
|
578
|
+
span_event = {
|
|
579
|
+
"event_type": "span",
|
|
580
|
+
"trace_id": trace_id,
|
|
581
|
+
"span_id": obs.id if hasattr(obs, 'id') else "unknown",
|
|
582
|
+
"parent_span_id": None,
|
|
583
|
+
"name": span_name,
|
|
584
|
+
"timestamp": TraceManager._now(),
|
|
585
|
+
"duration_ms": duration_ms,
|
|
586
|
+
"input": input_data,
|
|
587
|
+
"output": output_data,
|
|
588
|
+
"metadata": update_params.get("metadata", {})
|
|
589
|
+
}
|
|
590
|
+
if usage_info:
|
|
591
|
+
span_event["usage"] = usage_info
|
|
592
|
+
if "cost_details" in update_params:
|
|
593
|
+
span_event["cost"] = update_params["cost_details"]
|
|
594
|
+
send_to_sqs(span_event)
|
|
572
595
|
|
|
573
596
|
# Flush after exiting context
|
|
574
597
|
from .config import get_langfuse_client
|
{llmops_observability-10.0.4 → llmops_observability-10.0.5}/src/llmops_observability/pricing.py
RENAMED
|
@@ -17,6 +17,15 @@ BEDROCK_PRICING = {
|
|
|
17
17
|
"input": 0.003,
|
|
18
18
|
"output": 0.015,
|
|
19
19
|
},
|
|
20
|
+
# Claude 4 Sonnet (Cross-region inference)
|
|
21
|
+
"us.anthropic.claude-sonnet-4-20250514-v1:0": {
|
|
22
|
+
"input": 0.003, # $3 per 1M tokens
|
|
23
|
+
"output": 0.015, # $15 per 1M tokens
|
|
24
|
+
},
|
|
25
|
+
"us.anthropic.claude-sonnet-4-5-20250929-v1:0": {
|
|
26
|
+
"input": 0.003, # $3 per 1M tokens
|
|
27
|
+
"output": 0.015, # $15 per 1M tokens
|
|
28
|
+
},
|
|
20
29
|
# Claude 3 Sonnet
|
|
21
30
|
"anthropic.claude-3-sonnet-20240229-v1:0": {
|
|
22
31
|
"input": 0.003, # $3 per 1M tokens
|
|
@@ -0,0 +1,395 @@
|
|
|
1
|
+
# src/llmops_observability/sqs.py
|
|
2
|
+
"""
|
|
3
|
+
Production-grade SQS sender with batching, spillover, and clean shutdown.
|
|
4
|
+
Ported from veriskGO with enhanced error handling and resilience.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
import boto3
|
|
9
|
+
import queue
|
|
10
|
+
import threading
|
|
11
|
+
import time
|
|
12
|
+
import os
|
|
13
|
+
import atexit
|
|
14
|
+
import tempfile
|
|
15
|
+
import logging
|
|
16
|
+
from typing import Optional, Dict, Any
|
|
17
|
+
|
|
18
|
+
from .config import get_sqs_config
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
if not logger.handlers:
|
|
22
|
+
handler = logging.StreamHandler()
|
|
23
|
+
formatter = logging.Formatter('[llmops_observability] %(levelname)s: %(message)s')
|
|
24
|
+
handler.setFormatter(formatter)
|
|
25
|
+
logger.addHandler(handler)
|
|
26
|
+
logger.setLevel(logging.INFO)
|
|
27
|
+
|
|
28
|
+
SPILLOVER_FILE = os.path.join(tempfile.gettempdir(), "llmops_observability_spillover_queue.jsonl")
|
|
29
|
+
MAIN_PID = os.getpid()
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class _LLMOpsObservabilitySQS:
|
|
33
|
+
"""
|
|
34
|
+
PRODUCTION-GRADE SQS SENDER
|
|
35
|
+
- Daemon worker threads (never block shutdown)
|
|
36
|
+
- Force-flush on exit (guarantees delivery)
|
|
37
|
+
- Clean shutdown (prevents Event loop is closed errors on Windows)
|
|
38
|
+
- Auto spillover for failed sends
|
|
39
|
+
- Resilient to SQS outages
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
SHUTDOWN_SENTINEL = None # Used to tell workers to stop
|
|
43
|
+
|
|
44
|
+
def __init__(self):
|
|
45
|
+
self.client: Optional[Any] = None
|
|
46
|
+
self.queue_url: Optional[str] = None
|
|
47
|
+
self.sqs_enabled = False
|
|
48
|
+
self._init_once = False
|
|
49
|
+
|
|
50
|
+
# Internal queue for batching
|
|
51
|
+
self._q: queue.Queue = queue.Queue(maxsize=0)
|
|
52
|
+
|
|
53
|
+
# Flag to stop workers cleanly
|
|
54
|
+
self._shutting_down = False
|
|
55
|
+
|
|
56
|
+
# Restore spillover messages from disk
|
|
57
|
+
self._load_spillover()
|
|
58
|
+
|
|
59
|
+
# Start worker threads
|
|
60
|
+
self.worker_count = 4
|
|
61
|
+
self.workers = []
|
|
62
|
+
for i in range(self.worker_count):
|
|
63
|
+
t = threading.Thread(target=self._safe_worker_loop, daemon=True)
|
|
64
|
+
t.start()
|
|
65
|
+
self.workers.append(t)
|
|
66
|
+
|
|
67
|
+
# Initialize AWS SQS connection
|
|
68
|
+
self._auto_initialize()
|
|
69
|
+
|
|
70
|
+
# -------------------------------------------------------
|
|
71
|
+
# CLEAN SHUTDOWN SUPPORT
|
|
72
|
+
# -------------------------------------------------------
|
|
73
|
+
def shutdown(self):
|
|
74
|
+
"""Safely stop worker threads without touching asyncio loop."""
|
|
75
|
+
if self._shutting_down:
|
|
76
|
+
return
|
|
77
|
+
self._shutting_down = True
|
|
78
|
+
|
|
79
|
+
# Signal workers to exit
|
|
80
|
+
for _ in range(self.worker_count):
|
|
81
|
+
self._q.put(self.SHUTDOWN_SENTINEL)
|
|
82
|
+
|
|
83
|
+
# Wait for them to finish
|
|
84
|
+
for t in self.workers:
|
|
85
|
+
try:
|
|
86
|
+
t.join(timeout=1.0)
|
|
87
|
+
except Exception:
|
|
88
|
+
pass
|
|
89
|
+
|
|
90
|
+
# -------------------------------------------------------
|
|
91
|
+
# SPILLOVER SAVE (Fallback storage)
|
|
92
|
+
# -------------------------------------------------------
|
|
93
|
+
def _spillover_save(self, message: Dict[str, Any]):
|
|
94
|
+
"""Save message to disk if SQS send fails (for recovery)."""
|
|
95
|
+
try:
|
|
96
|
+
with open(SPILLOVER_FILE, "a") as f:
|
|
97
|
+
f.write(json.dumps(message) + "\n")
|
|
98
|
+
logger.debug(f"Message saved to spillover: {message.get('event_type')}")
|
|
99
|
+
except Exception as e:
|
|
100
|
+
logger.error(f"Spillover save failed: {e}")
|
|
101
|
+
|
|
102
|
+
# -------------------------------------------------------
|
|
103
|
+
# SPILLOVER LOAD (Recovery from disk)
|
|
104
|
+
# -------------------------------------------------------
|
|
105
|
+
def _load_spillover(self):
|
|
106
|
+
"""Load spillover messages from disk (recovery on startup)."""
|
|
107
|
+
if not os.path.exists(SPILLOVER_FILE):
|
|
108
|
+
return
|
|
109
|
+
|
|
110
|
+
try:
|
|
111
|
+
logger.info("Restoring spillover queue from disk...")
|
|
112
|
+
with open(SPILLOVER_FILE, "r") as f:
|
|
113
|
+
for line in f:
|
|
114
|
+
try:
|
|
115
|
+
self._q.put(json.loads(line.strip()))
|
|
116
|
+
except json.JSONDecodeError:
|
|
117
|
+
continue
|
|
118
|
+
os.remove(SPILLOVER_FILE)
|
|
119
|
+
logger.info("Spillover restored and cleaned.")
|
|
120
|
+
except Exception as e:
|
|
121
|
+
logger.error(f"Spillover load failed: {e}")
|
|
122
|
+
|
|
123
|
+
# -------------------------------------------------------
|
|
124
|
+
# SAFE WORKER LOOP (auto-restarting on crash)
|
|
125
|
+
# -------------------------------------------------------
|
|
126
|
+
def _safe_worker_loop(self):
|
|
127
|
+
"""Worker loop that auto-restarts on crash."""
|
|
128
|
+
while True:
|
|
129
|
+
try:
|
|
130
|
+
self._worker_loop()
|
|
131
|
+
return
|
|
132
|
+
except Exception as e:
|
|
133
|
+
logger.error(f"Worker crashed: {e}")
|
|
134
|
+
time.sleep(0.5)
|
|
135
|
+
logger.info("Restarting worker...")
|
|
136
|
+
|
|
137
|
+
# -------------------------------------------------------
|
|
138
|
+
# REAL WORKER LOOP (batch processing)
|
|
139
|
+
# -------------------------------------------------------
|
|
140
|
+
def _worker_loop(self):
|
|
141
|
+
"""Main worker loop with batch accumulation and send."""
|
|
142
|
+
batch = []
|
|
143
|
+
while True:
|
|
144
|
+
try:
|
|
145
|
+
msg = self._q.get(timeout=0.2)
|
|
146
|
+
|
|
147
|
+
# Shutdown signal
|
|
148
|
+
if msg is self.SHUTDOWN_SENTINEL:
|
|
149
|
+
return
|
|
150
|
+
|
|
151
|
+
batch.append(msg)
|
|
152
|
+
|
|
153
|
+
except queue.Empty:
|
|
154
|
+
pass
|
|
155
|
+
|
|
156
|
+
# Batch conditions: flush if batch size >= 10 or time-based (every ~1s)
|
|
157
|
+
flush_size = len(batch) >= 10
|
|
158
|
+
flush_time = batch and (time.time() % 1 < 0.15)
|
|
159
|
+
|
|
160
|
+
if flush_size or flush_time:
|
|
161
|
+
try:
|
|
162
|
+
self._send_batch(batch)
|
|
163
|
+
except RuntimeError as e:
|
|
164
|
+
if "Event loop is closed" in str(e):
|
|
165
|
+
# Safe ignore — Windows cleanup issue
|
|
166
|
+
return
|
|
167
|
+
raise
|
|
168
|
+
batch = []
|
|
169
|
+
|
|
170
|
+
# -------------------------------------------------------
|
|
171
|
+
# FORCE FLUSH
|
|
172
|
+
# -------------------------------------------------------
|
|
173
|
+
def force_flush(self):
|
|
174
|
+
"""Synchronously send all remaining messages (used on shutdown)."""
|
|
175
|
+
batch = []
|
|
176
|
+
while not self._q.empty():
|
|
177
|
+
try:
|
|
178
|
+
msg = self._q.get_nowait()
|
|
179
|
+
if msg is not self.SHUTDOWN_SENTINEL:
|
|
180
|
+
batch.append(msg)
|
|
181
|
+
except Exception:
|
|
182
|
+
break
|
|
183
|
+
|
|
184
|
+
if batch:
|
|
185
|
+
self._send_batch(batch)
|
|
186
|
+
|
|
187
|
+
time.sleep(0.1)
|
|
188
|
+
|
|
189
|
+
# -------------------------------------------------------
|
|
190
|
+
# AWS INIT (Lazy initialization with fallback)
|
|
191
|
+
# -------------------------------------------------------
|
|
192
|
+
def _auto_initialize(self):
|
|
193
|
+
"""Initialize AWS SQS client from config. Fails gracefully if misconfigured."""
|
|
194
|
+
if self._init_once and self.client:
|
|
195
|
+
return
|
|
196
|
+
|
|
197
|
+
cfg = get_sqs_config()
|
|
198
|
+
self.queue_url = cfg.get("aws_sqs_url")
|
|
199
|
+
|
|
200
|
+
if not self.queue_url:
|
|
201
|
+
logger.info("No SQS URL configured → SQS disabled.")
|
|
202
|
+
self.sqs_enabled = False
|
|
203
|
+
self._init_once = True
|
|
204
|
+
return
|
|
205
|
+
|
|
206
|
+
try:
|
|
207
|
+
session = boto3.Session(
|
|
208
|
+
profile_name=cfg.get("aws_profile"),
|
|
209
|
+
region_name=cfg.get("aws_region")
|
|
210
|
+
)
|
|
211
|
+
self.client = session.client("sqs")
|
|
212
|
+
|
|
213
|
+
# Test connection
|
|
214
|
+
self.client.get_queue_attributes(
|
|
215
|
+
QueueUrl=self.queue_url,
|
|
216
|
+
AttributeNames=["QueueArn"]
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
self.sqs_enabled = True
|
|
220
|
+
logger.info(f"SQS connected → {self.queue_url}")
|
|
221
|
+
|
|
222
|
+
except Exception as e:
|
|
223
|
+
logger.warning(f"SQS initialization failed: {e} → Spillover enabled.")
|
|
224
|
+
self.client = None
|
|
225
|
+
self.sqs_enabled = False
|
|
226
|
+
|
|
227
|
+
self._init_once = True
|
|
228
|
+
|
|
229
|
+
# -------------------------------------------------------
|
|
230
|
+
# PUBLIC SEND API
|
|
231
|
+
# -------------------------------------------------------
|
|
232
|
+
def send(self, message: Optional[Dict[str, Any]]) -> bool:
|
|
233
|
+
"""
|
|
234
|
+
Queue a message for batched send to SQS.
|
|
235
|
+
Non-blocking; if SQS is down, spillover to disk.
|
|
236
|
+
|
|
237
|
+
Args:
|
|
238
|
+
message: Dictionary message to send
|
|
239
|
+
|
|
240
|
+
Returns:
|
|
241
|
+
bool: True if queued successfully
|
|
242
|
+
"""
|
|
243
|
+
if not message:
|
|
244
|
+
return False
|
|
245
|
+
|
|
246
|
+
if not self.sqs_enabled:
|
|
247
|
+
self._auto_initialize()
|
|
248
|
+
|
|
249
|
+
try:
|
|
250
|
+
self._q.put_nowait(message)
|
|
251
|
+
return True
|
|
252
|
+
except Exception as e:
|
|
253
|
+
logger.debug(f"Queue full → spillover: {e}")
|
|
254
|
+
self._spillover_save(message)
|
|
255
|
+
return False
|
|
256
|
+
|
|
257
|
+
def send_immediate(self, message: Optional[Dict[str, Any]]) -> bool:
|
|
258
|
+
"""
|
|
259
|
+
Send message immediately without batching.
|
|
260
|
+
Use for critical messages like trace_end.
|
|
261
|
+
Falls back to spillover if SQS unavailable.
|
|
262
|
+
|
|
263
|
+
Args:
|
|
264
|
+
message: Dictionary message to send
|
|
265
|
+
|
|
266
|
+
Returns:
|
|
267
|
+
bool: True if sent successfully
|
|
268
|
+
"""
|
|
269
|
+
if not message:
|
|
270
|
+
return False
|
|
271
|
+
|
|
272
|
+
if not self.sqs_enabled:
|
|
273
|
+
self._auto_initialize()
|
|
274
|
+
|
|
275
|
+
if not self.client:
|
|
276
|
+
logger.debug("SQS unavailable for immediate send → spillover")
|
|
277
|
+
self._spillover_save(message)
|
|
278
|
+
return False
|
|
279
|
+
|
|
280
|
+
try:
|
|
281
|
+
self.client.send_message(
|
|
282
|
+
QueueUrl=self.queue_url,
|
|
283
|
+
MessageBody=json.dumps(message)
|
|
284
|
+
)
|
|
285
|
+
logger.debug(f"Immediate send OK: {message.get('event_type')}")
|
|
286
|
+
return True
|
|
287
|
+
except Exception as e:
|
|
288
|
+
logger.warning(f"Immediate send failed: {e} → spillover")
|
|
289
|
+
self._spillover_save(message)
|
|
290
|
+
return False
|
|
291
|
+
|
|
292
|
+
# -------------------------------------------------------
|
|
293
|
+
# BATCH SEND
|
|
294
|
+
# -------------------------------------------------------
|
|
295
|
+
def _send_batch(self, batch):
|
|
296
|
+
"""Send a batch of messages to SQS. Retry individually on failure."""
|
|
297
|
+
if not batch:
|
|
298
|
+
return
|
|
299
|
+
|
|
300
|
+
if not self.client:
|
|
301
|
+
self._auto_initialize()
|
|
302
|
+
|
|
303
|
+
if not self.client:
|
|
304
|
+
logger.debug(f"SQS unavailable → spillover {len(batch)} messages")
|
|
305
|
+
for msg in batch:
|
|
306
|
+
self._spillover_save(msg)
|
|
307
|
+
return
|
|
308
|
+
|
|
309
|
+
entries = [
|
|
310
|
+
{"Id": str(i), "MessageBody": json.dumps(msg)}
|
|
311
|
+
for i, msg in enumerate(batch[:10]) # Max 10 per batch API call
|
|
312
|
+
]
|
|
313
|
+
|
|
314
|
+
try:
|
|
315
|
+
response = self.client.send_message_batch(
|
|
316
|
+
QueueUrl=self.queue_url,
|
|
317
|
+
Entries=entries
|
|
318
|
+
)
|
|
319
|
+
logger.debug(f"Batch send OK: {len(entries)} messages")
|
|
320
|
+
except Exception as e:
|
|
321
|
+
logger.warning(f"Batch send failed: {e} → retry individual")
|
|
322
|
+
self._retry_individual(batch)
|
|
323
|
+
|
|
324
|
+
# -------------------------------------------------------
|
|
325
|
+
# RETRY INDIVIDUAL MESSAGES
|
|
326
|
+
# -------------------------------------------------------
|
|
327
|
+
def _retry_individual(self, batch):
|
|
328
|
+
"""Retry individual messages if batch send fails."""
|
|
329
|
+
# Ensure SQS client exists
|
|
330
|
+
if not self.client:
|
|
331
|
+
self._auto_initialize()
|
|
332
|
+
|
|
333
|
+
client = self.client
|
|
334
|
+
if not client:
|
|
335
|
+
logger.debug(f"Client unavailable → spilling {len(batch)} messages")
|
|
336
|
+
for msg in batch:
|
|
337
|
+
self._spillover_save(msg)
|
|
338
|
+
return
|
|
339
|
+
|
|
340
|
+
for msg in batch:
|
|
341
|
+
try:
|
|
342
|
+
client.send_message(
|
|
343
|
+
QueueUrl=self.queue_url,
|
|
344
|
+
MessageBody=json.dumps(msg)
|
|
345
|
+
)
|
|
346
|
+
logger.debug(f"Individual send OK: {msg.get('event_type')}")
|
|
347
|
+
except Exception as e:
|
|
348
|
+
logger.warning(f"Individual send FAILED: {e} → spillover")
|
|
349
|
+
self._spillover_save(msg)
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
# -------------------------------------------------------
|
|
353
|
+
# SINGLETON INSTANCE
|
|
354
|
+
# -------------------------------------------------------
|
|
355
|
+
_sqs_instance = _LLMOpsObservabilitySQS()
|
|
356
|
+
|
|
357
|
+
|
|
358
|
+
def send_to_sqs(bundle: Optional[Dict[str, Any]]) -> bool:
|
|
359
|
+
"""Send a message to SQS queue (batched)."""
|
|
360
|
+
return _sqs_instance.send(bundle)
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
def send_to_sqs_immediate(bundle: Optional[Dict[str, Any]]) -> bool:
|
|
364
|
+
"""Send a message to SQS queue (immediate, no batching)."""
|
|
365
|
+
return _sqs_instance.send_immediate(bundle)
|
|
366
|
+
|
|
367
|
+
|
|
368
|
+
def flush_sqs():
|
|
369
|
+
"""Force flush all pending messages to SQS."""
|
|
370
|
+
return _sqs_instance.force_flush()
|
|
371
|
+
|
|
372
|
+
|
|
373
|
+
def is_sqs_enabled() -> bool:
|
|
374
|
+
"""Check if SQS is enabled and initialized."""
|
|
375
|
+
return _sqs_instance.sqs_enabled
|
|
376
|
+
|
|
377
|
+
|
|
378
|
+
# -------------------------------------------------------
|
|
379
|
+
# AUTO-FLUSH + CLEAN SHUTDOWN
|
|
380
|
+
# -------------------------------------------------------
|
|
381
|
+
def _cleanup_at_exit():
|
|
382
|
+
"""Cleanup handler registered with atexit."""
|
|
383
|
+
if os.getpid() != MAIN_PID:
|
|
384
|
+
return
|
|
385
|
+
|
|
386
|
+
logger.info("Flushing and shutting down SQS...")
|
|
387
|
+
|
|
388
|
+
try:
|
|
389
|
+
_sqs_instance.shutdown() # Stop background threads
|
|
390
|
+
_sqs_instance.force_flush() # Send remaining messages
|
|
391
|
+
except Exception as e:
|
|
392
|
+
logger.error(f"Exit flush failed: {e}")
|
|
393
|
+
|
|
394
|
+
|
|
395
|
+
atexit.register(_cleanup_at_exit)
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Trace Manager for LLMOps Observability
|
|
3
3
|
Handles tracing and tracking of LLM operations with direct Langfuse integration
|
|
4
|
-
|
|
4
|
+
Direct Langfuse integration with SQS event streaming
|
|
5
5
|
"""
|
|
6
6
|
from __future__ import annotations
|
|
7
7
|
import uuid
|
|
@@ -17,6 +17,7 @@ from datetime import datetime, timezone
|
|
|
17
17
|
from typing import Optional, Dict, Any, List, Union
|
|
18
18
|
from .models import SpanContext, TraceConfig
|
|
19
19
|
from .config import get_langfuse_client
|
|
20
|
+
from .sqs import send_to_sqs, send_to_sqs_immediate, is_sqs_enabled
|
|
20
21
|
|
|
21
22
|
# Configure logger
|
|
22
23
|
logger = logging.getLogger(__name__)
|
|
@@ -259,6 +260,26 @@ class TraceManager:
|
|
|
259
260
|
cls._pending_spans.clear()
|
|
260
261
|
|
|
261
262
|
logger.info(f"Trace started: {trace_config.trace_name} | Operation: {trace_config.name} | Env: {trace_config.environment} (ID: {trace_id})")
|
|
263
|
+
|
|
264
|
+
# Send trace_start event to SQS (non-blocking)
|
|
265
|
+
if is_sqs_enabled():
|
|
266
|
+
trace_start_event = {
|
|
267
|
+
"event_type": "trace_start",
|
|
268
|
+
"trace_id": trace_id,
|
|
269
|
+
"trace_name": trace_config.trace_name,
|
|
270
|
+
"operation": trace_config.name,
|
|
271
|
+
"timestamp": cls._now(),
|
|
272
|
+
"metadata": {
|
|
273
|
+
"project_id": trace_config.project_id,
|
|
274
|
+
"environment": trace_config.environment,
|
|
275
|
+
"user_id": trace_config.user_id,
|
|
276
|
+
"session_id": trace_config.session_id,
|
|
277
|
+
**(trace_config.metadata or {})
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
send_to_sqs(trace_start_event)
|
|
281
|
+
logger.debug(f"Trace start event sent to SQS: {trace_id}")
|
|
282
|
+
|
|
262
283
|
return trace_id
|
|
263
284
|
|
|
264
285
|
@classmethod
|
|
@@ -384,6 +405,24 @@ class TraceManager:
|
|
|
384
405
|
with cls._lock:
|
|
385
406
|
cls._end_trace_internal()
|
|
386
407
|
|
|
408
|
+
# Send trace_end event to SQS (immediate, critical message)
|
|
409
|
+
if is_sqs_enabled():
|
|
410
|
+
trace_end_event = {
|
|
411
|
+
"event_type": "trace_end",
|
|
412
|
+
"trace_id": trace_id,
|
|
413
|
+
"user_id": user_id,
|
|
414
|
+
"session_id": session_id,
|
|
415
|
+
"trace_name": trace_name,
|
|
416
|
+
"trace_input": serialize_value(trace_input),
|
|
417
|
+
"trace_output": serialize_value(trace_output),
|
|
418
|
+
"timestamp": cls._now(),
|
|
419
|
+
"metadata": {
|
|
420
|
+
"project_id": cls._active.get("trace_config", {}).project_id if cls._active.get("trace_config") else "unknown"
|
|
421
|
+
}
|
|
422
|
+
}
|
|
423
|
+
send_to_sqs_immediate(trace_end_event)
|
|
424
|
+
logger.debug(f"Trace end event sent to SQS: {trace_id}")
|
|
425
|
+
|
|
387
426
|
logger.info(f"Trace finalized and sent: {trace_name} (ID: {trace_id})")
|
|
388
427
|
return True
|
|
389
428
|
|
|
@@ -704,6 +743,25 @@ def track_function(
|
|
|
704
743
|
status_message=str(error) if error else None,
|
|
705
744
|
)
|
|
706
745
|
|
|
746
|
+
# Send span event to SQS (non-blocking, independent of Langfuse)
|
|
747
|
+
if is_sqs_enabled() and TraceManager.has_active_trace():
|
|
748
|
+
trace_id = TraceManager._active.get("trace_id")
|
|
749
|
+
if trace_id:
|
|
750
|
+
span_event = {
|
|
751
|
+
"event_type": "span",
|
|
752
|
+
"trace_id": trace_id,
|
|
753
|
+
"span_id": obs.id if hasattr(obs, 'id') else "unknown",
|
|
754
|
+
"parent_span_id": None, # Will be updated by decorator context
|
|
755
|
+
"name": span_name,
|
|
756
|
+
"timestamp": TraceManager._now(),
|
|
757
|
+
"duration_ms": duration_ms,
|
|
758
|
+
"input": input_data,
|
|
759
|
+
"output": output_data,
|
|
760
|
+
"metadata": span_metadata
|
|
761
|
+
}
|
|
762
|
+
send_to_sqs(span_event)
|
|
763
|
+
logger.debug(f"Span event sent to SQS: {span_name}")
|
|
764
|
+
|
|
707
765
|
# Note: flush happens after context exit
|
|
708
766
|
|
|
709
767
|
# Flush after exiting context
|
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: llmops-observability
|
|
3
|
-
Version: 10.0.
|
|
4
|
-
Summary: LLMOps Observability SDK with direct Langfuse integration
|
|
3
|
+
Version: 10.0.5
|
|
4
|
+
Summary: LLMOps Observability SDK with direct Langfuse integration and SQS event streaming
|
|
5
5
|
Requires-Python: >=3.9
|
|
6
6
|
Description-Content-Type: text/markdown
|
|
7
7
|
Requires-Dist: langfuse>=2.0.0
|
|
8
8
|
Requires-Dist: httpx
|
|
9
9
|
Requires-Dist: python-dotenv
|
|
10
|
+
Requires-Dist: boto3
|
|
10
11
|
|
|
11
12
|
# LLMOps Observability SDK
|
|
12
13
|
|
|
@@ -15,7 +16,7 @@ A lightweight Python SDK for LLM observability with **direct Langfuse integratio
|
|
|
15
16
|
## Key Features
|
|
16
17
|
|
|
17
18
|
- ⚡ **Instant Tracing**: Sends traces directly to Langfuse in real-time
|
|
18
|
-
- 🎯 **Simple API**:
|
|
19
|
+
- 🎯 **Simple API**: (`@track_function`, `@track_llm_call`)
|
|
19
20
|
- 🚫 **No Complexity**: No SQS queues, no batching, no background workers
|
|
20
21
|
- 🔄 **Sync & Async**: Supports both synchronous and asynchronous functions
|
|
21
22
|
- 🎨 **Provider Agnostic**: Works with any LLM provider (Bedrock, OpenAI, Anthropic, etc.)
|
|
@@ -134,7 +135,7 @@ TraceManager.start_trace(
|
|
|
134
135
|
TraceManager.end_trace()
|
|
135
136
|
```
|
|
136
137
|
|
|
137
|
-
**Method 3: Using `finalize_and_send()` (
|
|
138
|
+
**Method 3: Using `finalize_and_send()` (llmops-observability)**
|
|
138
139
|
```python
|
|
139
140
|
# Start trace
|
|
140
141
|
TraceManager.start_trace(name="chat_session")
|
|
@@ -6,6 +6,7 @@ src/llmops_observability/config.py
|
|
|
6
6
|
src/llmops_observability/llm.py
|
|
7
7
|
src/llmops_observability/models.py
|
|
8
8
|
src/llmops_observability/pricing.py
|
|
9
|
+
src/llmops_observability/sqs.py
|
|
9
10
|
src/llmops_observability/trace_manager.py
|
|
10
11
|
src/llmops_observability.egg-info/PKG-INFO
|
|
11
12
|
src/llmops_observability.egg-info/SOURCES.txt
|
|
File without changes
|
{llmops_observability-10.0.4 → llmops_observability-10.0.5}/src/llmops_observability/models.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|