llmops-observability 10.0.4__tar.gz → 10.0.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (17) hide show
  1. {llmops_observability-10.0.4 → llmops_observability-10.0.5}/PKG-INFO +5 -4
  2. {llmops_observability-10.0.4 → llmops_observability-10.0.5}/README.md +2 -2
  3. {llmops_observability-10.0.4 → llmops_observability-10.0.5}/pyproject.toml +3 -2
  4. {llmops_observability-10.0.4 → llmops_observability-10.0.5}/src/llmops_observability/__init__.py +1 -1
  5. {llmops_observability-10.0.4 → llmops_observability-10.0.5}/src/llmops_observability/asgi_middleware.py +1 -1
  6. {llmops_observability-10.0.4 → llmops_observability-10.0.5}/src/llmops_observability/config.py +26 -2
  7. {llmops_observability-10.0.4 → llmops_observability-10.0.5}/src/llmops_observability/llm.py +24 -1
  8. {llmops_observability-10.0.4 → llmops_observability-10.0.5}/src/llmops_observability/pricing.py +9 -0
  9. llmops_observability-10.0.5/src/llmops_observability/sqs.py +395 -0
  10. {llmops_observability-10.0.4 → llmops_observability-10.0.5}/src/llmops_observability/trace_manager.py +59 -1
  11. {llmops_observability-10.0.4 → llmops_observability-10.0.5}/src/llmops_observability.egg-info/PKG-INFO +5 -4
  12. {llmops_observability-10.0.4 → llmops_observability-10.0.5}/src/llmops_observability.egg-info/SOURCES.txt +1 -0
  13. {llmops_observability-10.0.4 → llmops_observability-10.0.5}/src/llmops_observability.egg-info/requires.txt +1 -0
  14. {llmops_observability-10.0.4 → llmops_observability-10.0.5}/setup.cfg +0 -0
  15. {llmops_observability-10.0.4 → llmops_observability-10.0.5}/src/llmops_observability/models.py +0 -0
  16. {llmops_observability-10.0.4 → llmops_observability-10.0.5}/src/llmops_observability.egg-info/dependency_links.txt +0 -0
  17. {llmops_observability-10.0.4 → llmops_observability-10.0.5}/src/llmops_observability.egg-info/top_level.txt +0 -0
@@ -1,12 +1,13 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: llmops-observability
3
- Version: 10.0.4
4
- Summary: LLMOps Observability SDK with direct Langfuse integration (no SQS/batching)
3
+ Version: 10.0.5
4
+ Summary: LLMOps Observability SDK with direct Langfuse integration and SQS event streaming
5
5
  Requires-Python: >=3.9
6
6
  Description-Content-Type: text/markdown
7
7
  Requires-Dist: langfuse>=2.0.0
8
8
  Requires-Dist: httpx
9
9
  Requires-Dist: python-dotenv
10
+ Requires-Dist: boto3
10
11
 
11
12
  # LLMOps Observability SDK
12
13
 
@@ -15,7 +16,7 @@ A lightweight Python SDK for LLM observability with **direct Langfuse integratio
15
16
  ## Key Features
16
17
 
17
18
  - ⚡ **Instant Tracing**: Sends traces directly to Langfuse in real-time
18
- - 🎯 **Simple API**: Same decorators as veriskGO (`@track_function`, `@track_llm_call`)
19
+ - 🎯 **Simple API**: (`@track_function`, `@track_llm_call`)
19
20
  - 🚫 **No Complexity**: No SQS queues, no batching, no background workers
20
21
  - 🔄 **Sync & Async**: Supports both synchronous and asynchronous functions
21
22
  - 🎨 **Provider Agnostic**: Works with any LLM provider (Bedrock, OpenAI, Anthropic, etc.)
@@ -134,7 +135,7 @@ TraceManager.start_trace(
134
135
  TraceManager.end_trace()
135
136
  ```
136
137
 
137
- **Method 3: Using `finalize_and_send()` (veriskGO-compatible)**
138
+ **Method 3: Using `finalize_and_send()` (llmops-observability)**
138
139
  ```python
139
140
  # Start trace
140
141
  TraceManager.start_trace(name="chat_session")
@@ -5,7 +5,7 @@ A lightweight Python SDK for LLM observability with **direct Langfuse integratio
5
5
  ## Key Features
6
6
 
7
7
  - ⚡ **Instant Tracing**: Sends traces directly to Langfuse in real-time
8
- - 🎯 **Simple API**: Same decorators as veriskGO (`@track_function`, `@track_llm_call`)
8
+ - 🎯 **Simple API**: (`@track_function`, `@track_llm_call`)
9
9
  - 🚫 **No Complexity**: No SQS queues, no batching, no background workers
10
10
  - 🔄 **Sync & Async**: Supports both synchronous and asynchronous functions
11
11
  - 🎨 **Provider Agnostic**: Works with any LLM provider (Bedrock, OpenAI, Anthropic, etc.)
@@ -124,7 +124,7 @@ TraceManager.start_trace(
124
124
  TraceManager.end_trace()
125
125
  ```
126
126
 
127
- **Method 3: Using `finalize_and_send()` (veriskGO-compatible)**
127
+ **Method 3: Using `finalize_and_send()` (llmops-observability)**
128
128
  ```python
129
129
  # Start trace
130
130
  TraceManager.start_trace(name="chat_session")
@@ -1,13 +1,14 @@
1
1
  [project]
2
2
  name = "llmops-observability"
3
- version = "10.0.4"
4
- description = "LLMOps Observability SDK with direct Langfuse integration (no SQS/batching)"
3
+ version = "10.0.5"
4
+ description = "LLMOps Observability SDK with direct Langfuse integration and SQS event streaming"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.9"
7
7
  dependencies = [
8
8
  "langfuse>=2.0.0",
9
9
  "httpx",
10
10
  "python-dotenv",
11
+ "boto3",
11
12
  ]
12
13
 
13
14
  [build-system]
@@ -1,7 +1,7 @@
1
1
  """
2
2
  LLMOps Observability SDK – Public API
3
3
  Direct Langfuse integration for LLM tracing without SQS/batching.
4
- Enhanced with veriskGO-style features: locals capture, nested spans, instant sending.
4
+ Enhanced with llmops-observability features: locals capture, nested spans, instant sending.
5
5
  """
6
6
  from importlib.metadata import version, PackageNotFoundError
7
7
 
@@ -1,7 +1,7 @@
1
1
  """
2
2
  ASGI Middleware for LLMOps Observability
3
3
  Automatic tracing for FastAPI and other ASGI applications
4
- Based on veriskGO's asgi_middleware with direct Langfuse integration
4
+ Direct Langfuse integration for ASGI applications
5
5
  """
6
6
  import uuid
7
7
  import time
@@ -1,10 +1,10 @@
1
1
  """
2
2
  Configuration management for LLMOps Observability
3
- Direct Langfuse client configuration
3
+ Direct Langfuse client configuration + SQS event streaming
4
4
  """
5
5
  import os
6
6
  import logging
7
- from typing import Optional
7
+ from typing import Optional, Dict, Any
8
8
  from langfuse import Langfuse
9
9
  import httpx
10
10
  from dotenv import load_dotenv
@@ -113,3 +113,27 @@ def configure(
113
113
  )
114
114
 
115
115
  print(f"[LLMOps-Observability] Langfuse client configured: {base_url}")
116
+
117
+
118
+ # ============================================================
119
+ # SQS Configuration
120
+ # ============================================================
121
+
122
+ def get_sqs_config() -> Dict[str, Any]:
123
+ """
124
+ Get SQS configuration from environment variables.
125
+
126
+ Environment variables:
127
+ - AWS_SQS_URL: SQS queue URL (required to enable SQS)
128
+ - AWS_PROFILE: AWS profile name (default: "default")
129
+ - AWS_REGION: AWS region (default: "us-east-1")
130
+
131
+ Returns:
132
+ Dict with SQS configuration
133
+ """
134
+ return {
135
+ "aws_sqs_url": os.getenv("AWS_SQS_URL"),
136
+ "aws_profile": os.getenv("AWS_PROFILE", "default"),
137
+ "aws_region": os.getenv("AWS_REGION", "us-east-1"),
138
+ }
139
+
@@ -1,7 +1,7 @@
1
1
  """
2
2
  LLM tracking decorator for LLMOps Observability
3
3
  Direct Langfuse integration for tracking LLM calls
4
- Enhanced with veriskGO-style input/output handling
4
+ Enhanced with robust input/output handling and SQS event streaming
5
5
  """
6
6
  from __future__ import annotations
7
7
  import functools
@@ -11,6 +11,7 @@ import time
11
11
  import traceback
12
12
  from typing import Optional, Dict, Any, List, Union
13
13
  from .trace_manager import TraceManager
14
+ from .sqs import send_to_sqs, is_sqs_enabled
14
15
 
15
16
 
16
17
  def extract_text(resp: Any) -> str:
@@ -569,6 +570,28 @@ def track_llm_call(
569
570
  # Use Langfuse's update_current_generation() instead of obs.update()
570
571
  langfuse = get_langfuse_client()
571
572
  langfuse.update_current_generation(**update_params)
573
+
574
+ # Send span event to SQS (non-blocking, independent of Langfuse)
575
+ if is_sqs_enabled() and TraceManager.has_active_trace():
576
+ trace_id = TraceManager._active.get("trace_id")
577
+ if trace_id:
578
+ span_event = {
579
+ "event_type": "span",
580
+ "trace_id": trace_id,
581
+ "span_id": obs.id if hasattr(obs, 'id') else "unknown",
582
+ "parent_span_id": None,
583
+ "name": span_name,
584
+ "timestamp": TraceManager._now(),
585
+ "duration_ms": duration_ms,
586
+ "input": input_data,
587
+ "output": output_data,
588
+ "metadata": update_params.get("metadata", {})
589
+ }
590
+ if usage_info:
591
+ span_event["usage"] = usage_info
592
+ if "cost_details" in update_params:
593
+ span_event["cost"] = update_params["cost_details"]
594
+ send_to_sqs(span_event)
572
595
 
573
596
  # Flush after exiting context
574
597
  from .config import get_langfuse_client
@@ -17,6 +17,15 @@ BEDROCK_PRICING = {
17
17
  "input": 0.003,
18
18
  "output": 0.015,
19
19
  },
20
+ # Claude 4 Sonnet (Cross-region inference)
21
+ "us.anthropic.claude-sonnet-4-20250514-v1:0": {
22
+ "input": 0.003, # $3 per 1M tokens
23
+ "output": 0.015, # $15 per 1M tokens
24
+ },
25
+ "us.anthropic.claude-sonnet-4-5-20250929-v1:0": {
26
+ "input": 0.003, # $3 per 1M tokens
27
+ "output": 0.015, # $15 per 1M tokens
28
+ },
20
29
  # Claude 3 Sonnet
21
30
  "anthropic.claude-3-sonnet-20240229-v1:0": {
22
31
  "input": 0.003, # $3 per 1M tokens
@@ -0,0 +1,395 @@
1
+ # src/llmops_observability/sqs.py
2
+ """
3
+ Production-grade SQS sender with batching, spillover, and clean shutdown.
4
+ Ported from veriskGO with enhanced error handling and resilience.
5
+ """
6
+
7
+ import json
8
+ import boto3
9
+ import queue
10
+ import threading
11
+ import time
12
+ import os
13
+ import atexit
14
+ import tempfile
15
+ import logging
16
+ from typing import Optional, Dict, Any
17
+
18
+ from .config import get_sqs_config
19
+
20
+ logger = logging.getLogger(__name__)
21
+ if not logger.handlers:
22
+ handler = logging.StreamHandler()
23
+ formatter = logging.Formatter('[llmops_observability] %(levelname)s: %(message)s')
24
+ handler.setFormatter(formatter)
25
+ logger.addHandler(handler)
26
+ logger.setLevel(logging.INFO)
27
+
28
+ SPILLOVER_FILE = os.path.join(tempfile.gettempdir(), "llmops_observability_spillover_queue.jsonl")
29
+ MAIN_PID = os.getpid()
30
+
31
+
32
+ class _LLMOpsObservabilitySQS:
33
+ """
34
+ PRODUCTION-GRADE SQS SENDER
35
+ - Daemon worker threads (never block shutdown)
36
+ - Force-flush on exit (guarantees delivery)
37
+ - Clean shutdown (prevents Event loop is closed errors on Windows)
38
+ - Auto spillover for failed sends
39
+ - Resilient to SQS outages
40
+ """
41
+
42
+ SHUTDOWN_SENTINEL = None # Used to tell workers to stop
43
+
44
+ def __init__(self):
45
+ self.client: Optional[Any] = None
46
+ self.queue_url: Optional[str] = None
47
+ self.sqs_enabled = False
48
+ self._init_once = False
49
+
50
+ # Internal queue for batching
51
+ self._q: queue.Queue = queue.Queue(maxsize=0)
52
+
53
+ # Flag to stop workers cleanly
54
+ self._shutting_down = False
55
+
56
+ # Restore spillover messages from disk
57
+ self._load_spillover()
58
+
59
+ # Start worker threads
60
+ self.worker_count = 4
61
+ self.workers = []
62
+ for i in range(self.worker_count):
63
+ t = threading.Thread(target=self._safe_worker_loop, daemon=True)
64
+ t.start()
65
+ self.workers.append(t)
66
+
67
+ # Initialize AWS SQS connection
68
+ self._auto_initialize()
69
+
70
+ # -------------------------------------------------------
71
+ # CLEAN SHUTDOWN SUPPORT
72
+ # -------------------------------------------------------
73
+ def shutdown(self):
74
+ """Safely stop worker threads without touching asyncio loop."""
75
+ if self._shutting_down:
76
+ return
77
+ self._shutting_down = True
78
+
79
+ # Signal workers to exit
80
+ for _ in range(self.worker_count):
81
+ self._q.put(self.SHUTDOWN_SENTINEL)
82
+
83
+ # Wait for them to finish
84
+ for t in self.workers:
85
+ try:
86
+ t.join(timeout=1.0)
87
+ except Exception:
88
+ pass
89
+
90
+ # -------------------------------------------------------
91
+ # SPILLOVER SAVE (Fallback storage)
92
+ # -------------------------------------------------------
93
+ def _spillover_save(self, message: Dict[str, Any]):
94
+ """Save message to disk if SQS send fails (for recovery)."""
95
+ try:
96
+ with open(SPILLOVER_FILE, "a") as f:
97
+ f.write(json.dumps(message) + "\n")
98
+ logger.debug(f"Message saved to spillover: {message.get('event_type')}")
99
+ except Exception as e:
100
+ logger.error(f"Spillover save failed: {e}")
101
+
102
+ # -------------------------------------------------------
103
+ # SPILLOVER LOAD (Recovery from disk)
104
+ # -------------------------------------------------------
105
+ def _load_spillover(self):
106
+ """Load spillover messages from disk (recovery on startup)."""
107
+ if not os.path.exists(SPILLOVER_FILE):
108
+ return
109
+
110
+ try:
111
+ logger.info("Restoring spillover queue from disk...")
112
+ with open(SPILLOVER_FILE, "r") as f:
113
+ for line in f:
114
+ try:
115
+ self._q.put(json.loads(line.strip()))
116
+ except json.JSONDecodeError:
117
+ continue
118
+ os.remove(SPILLOVER_FILE)
119
+ logger.info("Spillover restored and cleaned.")
120
+ except Exception as e:
121
+ logger.error(f"Spillover load failed: {e}")
122
+
123
+ # -------------------------------------------------------
124
+ # SAFE WORKER LOOP (auto-restarting on crash)
125
+ # -------------------------------------------------------
126
+ def _safe_worker_loop(self):
127
+ """Worker loop that auto-restarts on crash."""
128
+ while True:
129
+ try:
130
+ self._worker_loop()
131
+ return
132
+ except Exception as e:
133
+ logger.error(f"Worker crashed: {e}")
134
+ time.sleep(0.5)
135
+ logger.info("Restarting worker...")
136
+
137
+ # -------------------------------------------------------
138
+ # REAL WORKER LOOP (batch processing)
139
+ # -------------------------------------------------------
140
+ def _worker_loop(self):
141
+ """Main worker loop with batch accumulation and send."""
142
+ batch = []
143
+ while True:
144
+ try:
145
+ msg = self._q.get(timeout=0.2)
146
+
147
+ # Shutdown signal
148
+ if msg is self.SHUTDOWN_SENTINEL:
149
+ return
150
+
151
+ batch.append(msg)
152
+
153
+ except queue.Empty:
154
+ pass
155
+
156
+ # Batch conditions: flush if batch size >= 10 or time-based (every ~1s)
157
+ flush_size = len(batch) >= 10
158
+ flush_time = batch and (time.time() % 1 < 0.15)
159
+
160
+ if flush_size or flush_time:
161
+ try:
162
+ self._send_batch(batch)
163
+ except RuntimeError as e:
164
+ if "Event loop is closed" in str(e):
165
+ # Safe ignore — Windows cleanup issue
166
+ return
167
+ raise
168
+ batch = []
169
+
170
+ # -------------------------------------------------------
171
+ # FORCE FLUSH
172
+ # -------------------------------------------------------
173
+ def force_flush(self):
174
+ """Synchronously send all remaining messages (used on shutdown)."""
175
+ batch = []
176
+ while not self._q.empty():
177
+ try:
178
+ msg = self._q.get_nowait()
179
+ if msg is not self.SHUTDOWN_SENTINEL:
180
+ batch.append(msg)
181
+ except Exception:
182
+ break
183
+
184
+ if batch:
185
+ self._send_batch(batch)
186
+
187
+ time.sleep(0.1)
188
+
189
+ # -------------------------------------------------------
190
+ # AWS INIT (Lazy initialization with fallback)
191
+ # -------------------------------------------------------
192
+ def _auto_initialize(self):
193
+ """Initialize AWS SQS client from config. Fails gracefully if misconfigured."""
194
+ if self._init_once and self.client:
195
+ return
196
+
197
+ cfg = get_sqs_config()
198
+ self.queue_url = cfg.get("aws_sqs_url")
199
+
200
+ if not self.queue_url:
201
+ logger.info("No SQS URL configured → SQS disabled.")
202
+ self.sqs_enabled = False
203
+ self._init_once = True
204
+ return
205
+
206
+ try:
207
+ session = boto3.Session(
208
+ profile_name=cfg.get("aws_profile"),
209
+ region_name=cfg.get("aws_region")
210
+ )
211
+ self.client = session.client("sqs")
212
+
213
+ # Test connection
214
+ self.client.get_queue_attributes(
215
+ QueueUrl=self.queue_url,
216
+ AttributeNames=["QueueArn"]
217
+ )
218
+
219
+ self.sqs_enabled = True
220
+ logger.info(f"SQS connected → {self.queue_url}")
221
+
222
+ except Exception as e:
223
+ logger.warning(f"SQS initialization failed: {e} → Spillover enabled.")
224
+ self.client = None
225
+ self.sqs_enabled = False
226
+
227
+ self._init_once = True
228
+
229
+ # -------------------------------------------------------
230
+ # PUBLIC SEND API
231
+ # -------------------------------------------------------
232
+ def send(self, message: Optional[Dict[str, Any]]) -> bool:
233
+ """
234
+ Queue a message for batched send to SQS.
235
+ Non-blocking; if SQS is down, spillover to disk.
236
+
237
+ Args:
238
+ message: Dictionary message to send
239
+
240
+ Returns:
241
+ bool: True if queued successfully
242
+ """
243
+ if not message:
244
+ return False
245
+
246
+ if not self.sqs_enabled:
247
+ self._auto_initialize()
248
+
249
+ try:
250
+ self._q.put_nowait(message)
251
+ return True
252
+ except Exception as e:
253
+ logger.debug(f"Queue full → spillover: {e}")
254
+ self._spillover_save(message)
255
+ return False
256
+
257
+ def send_immediate(self, message: Optional[Dict[str, Any]]) -> bool:
258
+ """
259
+ Send message immediately without batching.
260
+ Use for critical messages like trace_end.
261
+ Falls back to spillover if SQS unavailable.
262
+
263
+ Args:
264
+ message: Dictionary message to send
265
+
266
+ Returns:
267
+ bool: True if sent successfully
268
+ """
269
+ if not message:
270
+ return False
271
+
272
+ if not self.sqs_enabled:
273
+ self._auto_initialize()
274
+
275
+ if not self.client:
276
+ logger.debug("SQS unavailable for immediate send → spillover")
277
+ self._spillover_save(message)
278
+ return False
279
+
280
+ try:
281
+ self.client.send_message(
282
+ QueueUrl=self.queue_url,
283
+ MessageBody=json.dumps(message)
284
+ )
285
+ logger.debug(f"Immediate send OK: {message.get('event_type')}")
286
+ return True
287
+ except Exception as e:
288
+ logger.warning(f"Immediate send failed: {e} → spillover")
289
+ self._spillover_save(message)
290
+ return False
291
+
292
+ # -------------------------------------------------------
293
+ # BATCH SEND
294
+ # -------------------------------------------------------
295
+ def _send_batch(self, batch):
296
+ """Send a batch of messages to SQS. Retry individually on failure."""
297
+ if not batch:
298
+ return
299
+
300
+ if not self.client:
301
+ self._auto_initialize()
302
+
303
+ if not self.client:
304
+ logger.debug(f"SQS unavailable → spillover {len(batch)} messages")
305
+ for msg in batch:
306
+ self._spillover_save(msg)
307
+ return
308
+
309
+ entries = [
310
+ {"Id": str(i), "MessageBody": json.dumps(msg)}
311
+ for i, msg in enumerate(batch[:10]) # Max 10 per batch API call
312
+ ]
313
+
314
+ try:
315
+ response = self.client.send_message_batch(
316
+ QueueUrl=self.queue_url,
317
+ Entries=entries
318
+ )
319
+ logger.debug(f"Batch send OK: {len(entries)} messages")
320
+ except Exception as e:
321
+ logger.warning(f"Batch send failed: {e} → retry individual")
322
+ self._retry_individual(batch)
323
+
324
+ # -------------------------------------------------------
325
+ # RETRY INDIVIDUAL MESSAGES
326
+ # -------------------------------------------------------
327
+ def _retry_individual(self, batch):
328
+ """Retry individual messages if batch send fails."""
329
+ # Ensure SQS client exists
330
+ if not self.client:
331
+ self._auto_initialize()
332
+
333
+ client = self.client
334
+ if not client:
335
+ logger.debug(f"Client unavailable → spilling {len(batch)} messages")
336
+ for msg in batch:
337
+ self._spillover_save(msg)
338
+ return
339
+
340
+ for msg in batch:
341
+ try:
342
+ client.send_message(
343
+ QueueUrl=self.queue_url,
344
+ MessageBody=json.dumps(msg)
345
+ )
346
+ logger.debug(f"Individual send OK: {msg.get('event_type')}")
347
+ except Exception as e:
348
+ logger.warning(f"Individual send FAILED: {e} → spillover")
349
+ self._spillover_save(msg)
350
+
351
+
352
+ # -------------------------------------------------------
353
+ # SINGLETON INSTANCE
354
+ # -------------------------------------------------------
355
+ _sqs_instance = _LLMOpsObservabilitySQS()
356
+
357
+
358
+ def send_to_sqs(bundle: Optional[Dict[str, Any]]) -> bool:
359
+ """Send a message to SQS queue (batched)."""
360
+ return _sqs_instance.send(bundle)
361
+
362
+
363
+ def send_to_sqs_immediate(bundle: Optional[Dict[str, Any]]) -> bool:
364
+ """Send a message to SQS queue (immediate, no batching)."""
365
+ return _sqs_instance.send_immediate(bundle)
366
+
367
+
368
+ def flush_sqs():
369
+ """Force flush all pending messages to SQS."""
370
+ return _sqs_instance.force_flush()
371
+
372
+
373
+ def is_sqs_enabled() -> bool:
374
+ """Check if SQS is enabled and initialized."""
375
+ return _sqs_instance.sqs_enabled
376
+
377
+
378
+ # -------------------------------------------------------
379
+ # AUTO-FLUSH + CLEAN SHUTDOWN
380
+ # -------------------------------------------------------
381
+ def _cleanup_at_exit():
382
+ """Cleanup handler registered with atexit."""
383
+ if os.getpid() != MAIN_PID:
384
+ return
385
+
386
+ logger.info("Flushing and shutting down SQS...")
387
+
388
+ try:
389
+ _sqs_instance.shutdown() # Stop background threads
390
+ _sqs_instance.force_flush() # Send remaining messages
391
+ except Exception as e:
392
+ logger.error(f"Exit flush failed: {e}")
393
+
394
+
395
+ atexit.register(_cleanup_at_exit)
@@ -1,7 +1,7 @@
1
1
  """
2
2
  Trace Manager for LLMOps Observability
3
3
  Handles tracing and tracking of LLM operations with direct Langfuse integration
4
- Inspired by veriskGO's trace_manager with instant span sending to Langfuse
4
+ Direct Langfuse integration with SQS event streaming
5
5
  """
6
6
  from __future__ import annotations
7
7
  import uuid
@@ -17,6 +17,7 @@ from datetime import datetime, timezone
17
17
  from typing import Optional, Dict, Any, List, Union
18
18
  from .models import SpanContext, TraceConfig
19
19
  from .config import get_langfuse_client
20
+ from .sqs import send_to_sqs, send_to_sqs_immediate, is_sqs_enabled
20
21
 
21
22
  # Configure logger
22
23
  logger = logging.getLogger(__name__)
@@ -259,6 +260,26 @@ class TraceManager:
259
260
  cls._pending_spans.clear()
260
261
 
261
262
  logger.info(f"Trace started: {trace_config.trace_name} | Operation: {trace_config.name} | Env: {trace_config.environment} (ID: {trace_id})")
263
+
264
+ # Send trace_start event to SQS (non-blocking)
265
+ if is_sqs_enabled():
266
+ trace_start_event = {
267
+ "event_type": "trace_start",
268
+ "trace_id": trace_id,
269
+ "trace_name": trace_config.trace_name,
270
+ "operation": trace_config.name,
271
+ "timestamp": cls._now(),
272
+ "metadata": {
273
+ "project_id": trace_config.project_id,
274
+ "environment": trace_config.environment,
275
+ "user_id": trace_config.user_id,
276
+ "session_id": trace_config.session_id,
277
+ **(trace_config.metadata or {})
278
+ }
279
+ }
280
+ send_to_sqs(trace_start_event)
281
+ logger.debug(f"Trace start event sent to SQS: {trace_id}")
282
+
262
283
  return trace_id
263
284
 
264
285
  @classmethod
@@ -384,6 +405,24 @@ class TraceManager:
384
405
  with cls._lock:
385
406
  cls._end_trace_internal()
386
407
 
408
+ # Send trace_end event to SQS (immediate, critical message)
409
+ if is_sqs_enabled():
410
+ trace_end_event = {
411
+ "event_type": "trace_end",
412
+ "trace_id": trace_id,
413
+ "user_id": user_id,
414
+ "session_id": session_id,
415
+ "trace_name": trace_name,
416
+ "trace_input": serialize_value(trace_input),
417
+ "trace_output": serialize_value(trace_output),
418
+ "timestamp": cls._now(),
419
+ "metadata": {
420
+ "project_id": cls._active.get("trace_config", {}).project_id if cls._active.get("trace_config") else "unknown"
421
+ }
422
+ }
423
+ send_to_sqs_immediate(trace_end_event)
424
+ logger.debug(f"Trace end event sent to SQS: {trace_id}")
425
+
387
426
  logger.info(f"Trace finalized and sent: {trace_name} (ID: {trace_id})")
388
427
  return True
389
428
 
@@ -704,6 +743,25 @@ def track_function(
704
743
  status_message=str(error) if error else None,
705
744
  )
706
745
 
746
+ # Send span event to SQS (non-blocking, independent of Langfuse)
747
+ if is_sqs_enabled() and TraceManager.has_active_trace():
748
+ trace_id = TraceManager._active.get("trace_id")
749
+ if trace_id:
750
+ span_event = {
751
+ "event_type": "span",
752
+ "trace_id": trace_id,
753
+ "span_id": obs.id if hasattr(obs, 'id') else "unknown",
754
+ "parent_span_id": None, # Will be updated by decorator context
755
+ "name": span_name,
756
+ "timestamp": TraceManager._now(),
757
+ "duration_ms": duration_ms,
758
+ "input": input_data,
759
+ "output": output_data,
760
+ "metadata": span_metadata
761
+ }
762
+ send_to_sqs(span_event)
763
+ logger.debug(f"Span event sent to SQS: {span_name}")
764
+
707
765
  # Note: flush happens after context exit
708
766
 
709
767
  # Flush after exiting context
@@ -1,12 +1,13 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: llmops-observability
3
- Version: 10.0.4
4
- Summary: LLMOps Observability SDK with direct Langfuse integration (no SQS/batching)
3
+ Version: 10.0.5
4
+ Summary: LLMOps Observability SDK with direct Langfuse integration and SQS event streaming
5
5
  Requires-Python: >=3.9
6
6
  Description-Content-Type: text/markdown
7
7
  Requires-Dist: langfuse>=2.0.0
8
8
  Requires-Dist: httpx
9
9
  Requires-Dist: python-dotenv
10
+ Requires-Dist: boto3
10
11
 
11
12
  # LLMOps Observability SDK
12
13
 
@@ -15,7 +16,7 @@ A lightweight Python SDK for LLM observability with **direct Langfuse integratio
15
16
  ## Key Features
16
17
 
17
18
  - ⚡ **Instant Tracing**: Sends traces directly to Langfuse in real-time
18
- - 🎯 **Simple API**: Same decorators as veriskGO (`@track_function`, `@track_llm_call`)
19
+ - 🎯 **Simple API**: (`@track_function`, `@track_llm_call`)
19
20
  - 🚫 **No Complexity**: No SQS queues, no batching, no background workers
20
21
  - 🔄 **Sync & Async**: Supports both synchronous and asynchronous functions
21
22
  - 🎨 **Provider Agnostic**: Works with any LLM provider (Bedrock, OpenAI, Anthropic, etc.)
@@ -134,7 +135,7 @@ TraceManager.start_trace(
134
135
  TraceManager.end_trace()
135
136
  ```
136
137
 
137
- **Method 3: Using `finalize_and_send()` (veriskGO-compatible)**
138
+ **Method 3: Using `finalize_and_send()` (llmops-observability)**
138
139
  ```python
139
140
  # Start trace
140
141
  TraceManager.start_trace(name="chat_session")
@@ -6,6 +6,7 @@ src/llmops_observability/config.py
6
6
  src/llmops_observability/llm.py
7
7
  src/llmops_observability/models.py
8
8
  src/llmops_observability/pricing.py
9
+ src/llmops_observability/sqs.py
9
10
  src/llmops_observability/trace_manager.py
10
11
  src/llmops_observability.egg-info/PKG-INFO
11
12
  src/llmops_observability.egg-info/SOURCES.txt