lucidicai 1.3.5__py3-none-any.whl → 2.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lucidicai/__init__.py +451 -398
- lucidicai/client.py +324 -49
- lucidicai/constants.py +7 -37
- lucidicai/context.py +25 -0
- lucidicai/dataset.py +112 -0
- lucidicai/decorators.py +96 -325
- lucidicai/errors.py +33 -0
- lucidicai/event.py +50 -59
- lucidicai/event_queue.py +466 -0
- lucidicai/feature_flag.py +336 -0
- lucidicai/session.py +9 -71
- lucidicai/singleton.py +20 -17
- lucidicai/streaming.py +15 -50
- lucidicai/telemetry/context_capture_processor.py +65 -0
- lucidicai/telemetry/extract.py +192 -0
- lucidicai/telemetry/litellm_bridge.py +80 -45
- lucidicai/telemetry/lucidic_exporter.py +125 -142
- lucidicai/telemetry/telemetry_init.py +189 -0
- {lucidicai-1.3.5.dist-info → lucidicai-2.0.1.dist-info}/METADATA +1 -1
- {lucidicai-1.3.5.dist-info → lucidicai-2.0.1.dist-info}/RECORD +22 -16
- {lucidicai-1.3.5.dist-info → lucidicai-2.0.1.dist-info}/WHEEL +0 -0
- {lucidicai-1.3.5.dist-info → lucidicai-2.0.1.dist-info}/top_level.txt +0 -0
lucidicai/event.py
CHANGED
|
@@ -1,62 +1,53 @@
|
|
|
1
|
-
"""Event
|
|
2
|
-
from
|
|
3
|
-
from
|
|
1
|
+
"""Typed Event model for the Lucidic API"""
|
|
2
|
+
from enum import Enum
|
|
3
|
+
from typing import Optional, Dict, Any, List
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class EventType(Enum):
|
|
8
|
+
LLM_GENERATION = "llm_generation"
|
|
9
|
+
FUNCTION_CALL = "function_call"
|
|
10
|
+
ERROR_TRACEBACK = "error_traceback"
|
|
11
|
+
GENERIC = "generic"
|
|
12
|
+
|
|
4
13
|
|
|
5
14
|
class Event:
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
self.event_id =
|
|
15
|
-
self.
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
if
|
|
23
|
-
|
|
15
|
+
"""Client-side representation of an Event returned by the backend.
|
|
16
|
+
|
|
17
|
+
Note: This object is a thin data container; creation and updates are
|
|
18
|
+
performed by the Client. This model reflects the new typed event schema.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
def __init__(self, event_data: Dict[str, Any], client):
|
|
22
|
+
# Identifiers
|
|
23
|
+
self.event_id: Optional[str] = event_data.get("event_id")
|
|
24
|
+
self.session_id: Optional[str] = event_data.get("session_id")
|
|
25
|
+
|
|
26
|
+
# Hierarchy and timing
|
|
27
|
+
self.type: EventType = EventType(event_data.get("type", "generic"))
|
|
28
|
+
self.parent_event_id: Optional[str] = event_data.get("parent_event_id")
|
|
29
|
+
self.created_at: Optional[str] = event_data.get("created_at")
|
|
30
|
+
occurred_at_val = event_data.get("occurred_at")
|
|
31
|
+
# Store occurred_at as datetime if provided in ISO format
|
|
32
|
+
if isinstance(occurred_at_val, str):
|
|
33
|
+
try:
|
|
34
|
+
self.occurred_at: Optional[datetime] = datetime.fromisoformat(occurred_at_val.replace("Z", "+00:00"))
|
|
35
|
+
except Exception:
|
|
36
|
+
self.occurred_at = None
|
|
37
|
+
elif isinstance(occurred_at_val, datetime):
|
|
38
|
+
self.occurred_at = occurred_at_val
|
|
24
39
|
else:
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
self.
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
self._upload_screenshots(**kwargs)
|
|
40
|
-
|
|
41
|
-
def _build_request_data(self, **kwargs) -> dict:
|
|
42
|
-
from .client import Client
|
|
43
|
-
num_new_screenshots = len(kwargs.get("screenshots", []) or [])
|
|
44
|
-
return {
|
|
45
|
-
"description": Client().mask(kwargs.get("description", None)),
|
|
46
|
-
"result": Client().mask(kwargs.get("result", None)),
|
|
47
|
-
"is_finished": self.is_finished,
|
|
48
|
-
"cost_added": kwargs.get("cost_added", None),
|
|
49
|
-
"model": kwargs.get("model", None),
|
|
50
|
-
"nscreenshots": len(self.screenshots) + num_new_screenshots,
|
|
51
|
-
"duration": kwargs.get("duration", None),
|
|
52
|
-
"function_name": kwargs.get("function_name", None),
|
|
53
|
-
"arguments": kwargs.get("arguments", None),
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
def _upload_screenshots(self, **kwargs) -> None:
|
|
57
|
-
from .client import Client
|
|
58
|
-
if 'screenshots' in kwargs and kwargs['screenshots'] is not None:
|
|
59
|
-
for i in range(len(kwargs['screenshots'])):
|
|
60
|
-
presigned_url, bucket_name, object_key = get_presigned_url(Client().agent_id, session_id=self.session_id, event_id=self.event_id, nthscreenshot=len(self.screenshots))
|
|
61
|
-
upload_image_to_s3(presigned_url, kwargs['screenshots'][i], "JPEG")
|
|
62
|
-
self.screenshots.append(kwargs['screenshots'][i])
|
|
40
|
+
self.occurred_at = None
|
|
41
|
+
self.duration: Optional[float] = event_data.get("duration")
|
|
42
|
+
|
|
43
|
+
# Tags/metadata
|
|
44
|
+
self.tags: List[str] = event_data.get("tags", []) or []
|
|
45
|
+
self.metadata: Dict[str, Any] = event_data.get("metadata", {}) or {}
|
|
46
|
+
|
|
47
|
+
# Typed payload
|
|
48
|
+
self.payload: Dict[str, Any] = event_data.get("payload", {}) or {}
|
|
49
|
+
|
|
50
|
+
# Local client reference
|
|
51
|
+
self._client = client
|
|
52
|
+
# Completion flag (kept for legacy-like usage; not authoritative)
|
|
53
|
+
self.is_finished: bool = event_data.get("is_finished", False)
|
lucidicai/event_queue.py
ADDED
|
@@ -0,0 +1,466 @@
|
|
|
1
|
+
"""Asynchronous, non-blocking event queue with client-side UUIDs and blob handling.
|
|
2
|
+
|
|
3
|
+
This module implements the TypeScript-style EventQueue for the Python SDK:
|
|
4
|
+
- Immediate return of client_event_id (UUID) on event creation
|
|
5
|
+
- Background batching and retries
|
|
6
|
+
- Client-side blob size detection, preview generation, and gzip upload
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import gzip
|
|
10
|
+
import io
|
|
11
|
+
import json
|
|
12
|
+
import logging
|
|
13
|
+
import os
|
|
14
|
+
import queue
|
|
15
|
+
import threading
|
|
16
|
+
import time
|
|
17
|
+
import requests
|
|
18
|
+
from datetime import datetime, timezone
|
|
19
|
+
from typing import Any, Dict, List, Optional
|
|
20
|
+
|
|
21
|
+
logger = logging.getLogger("Lucidic")
|
|
22
|
+
DEBUG = os.getenv("LUCIDIC_DEBUG", "False") == "True"
|
|
23
|
+
VERBOSE = os.getenv("LUCIDIC_VERBOSE", "False") == "True"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class EventQueue:
|
|
27
|
+
def __init__(self, client):
|
|
28
|
+
# Configuration
|
|
29
|
+
self.max_queue_size: int = int(os.getenv("LUCIDIC_MAX_QUEUE_SIZE", 100000))
|
|
30
|
+
self.flush_interval_ms: int = int(os.getenv("LUCIDIC_FLUSH_INTERVAL", 100))
|
|
31
|
+
self.flush_at_count: int = int(os.getenv("LUCIDIC_FLUSH_AT", 100))
|
|
32
|
+
self.blob_threshold: int = int(os.getenv("LUCIDIC_BLOB_THRESHOLD", 64 * 1024))
|
|
33
|
+
self._daemon_mode = os.getenv("LUCIDIC_DAEMON_QUEUE", "true").lower() == "true"
|
|
34
|
+
|
|
35
|
+
# Runtime state
|
|
36
|
+
self._client = client
|
|
37
|
+
self._queue = queue.Queue(maxsize=self.max_queue_size)
|
|
38
|
+
self._stopped = threading.Event()
|
|
39
|
+
self._flush_event = threading.Event()
|
|
40
|
+
self._worker: Optional[threading.Thread] = None
|
|
41
|
+
self._sent_ids: set[str] = set()
|
|
42
|
+
self._deferred_queue: List[Dict[str, Any]] = []
|
|
43
|
+
self._deferred_lock = threading.Lock()
|
|
44
|
+
|
|
45
|
+
# Thread safety for flush operations
|
|
46
|
+
self._flush_lock = threading.Lock()
|
|
47
|
+
self._processing_count = 0
|
|
48
|
+
self._processing_lock = threading.Lock()
|
|
49
|
+
self._flush_complete = threading.Event()
|
|
50
|
+
|
|
51
|
+
# Start background worker
|
|
52
|
+
self._start_worker()
|
|
53
|
+
|
|
54
|
+
# --- Public API ---
|
|
55
|
+
def queue_event(self, event_request: Dict[str, Any]) -> None:
|
|
56
|
+
"""Enqueue an event for background processing.
|
|
57
|
+
|
|
58
|
+
event_request must include:
|
|
59
|
+
- session_id
|
|
60
|
+
- client_event_id (client-side uuid)
|
|
61
|
+
- type
|
|
62
|
+
- payload (typed payload)
|
|
63
|
+
- occurred_at (ISO string)
|
|
64
|
+
- Optional: duration, tags, metadata, client_parent_event_id
|
|
65
|
+
"""
|
|
66
|
+
# Ensure a defer counter exists for parent-order deferrals
|
|
67
|
+
if "defer_count" not in event_request:
|
|
68
|
+
event_request["defer_count"] = 0
|
|
69
|
+
|
|
70
|
+
try:
|
|
71
|
+
# Try to put with a small timeout to handle full queue
|
|
72
|
+
self._queue.put(event_request, block=True, timeout=0.001)
|
|
73
|
+
|
|
74
|
+
if DEBUG:
|
|
75
|
+
logger.debug(f"[EventQueue] Queued event {event_request.get('client_event_id')}, queue size: {self._queue.qsize()}")
|
|
76
|
+
if VERBOSE:
|
|
77
|
+
logger.debug(f"[EventQueue] Event payload: {json.dumps(event_request, indent=2)}")
|
|
78
|
+
|
|
79
|
+
# Wake worker if batch large enough
|
|
80
|
+
if self._queue.qsize() >= self.flush_at_count:
|
|
81
|
+
self._flush_event.set()
|
|
82
|
+
|
|
83
|
+
except queue.Full:
|
|
84
|
+
if DEBUG:
|
|
85
|
+
logger.debug(f"[EventQueue] Queue at max size {self.max_queue_size}, dropping event")
|
|
86
|
+
# In the original implementation, oldest was dropped. With Queue, we drop the new one.
|
|
87
|
+
# To match original behavior exactly, we'd need a deque, but this is simpler.
|
|
88
|
+
|
|
89
|
+
def force_flush(self, timeout_seconds: float = 5.0) -> None:
|
|
90
|
+
"""Flush current queue synchronously (best-effort). Thread-safe."""
|
|
91
|
+
with self._flush_lock:
|
|
92
|
+
if DEBUG:
|
|
93
|
+
logger.debug(f"[EventQueue] Force flush requested, queue size: {self._queue.qsize()}")
|
|
94
|
+
|
|
95
|
+
# Signal the worker to flush immediately
|
|
96
|
+
self._flush_event.set()
|
|
97
|
+
|
|
98
|
+
# Wait for the queue to be processed
|
|
99
|
+
end_time = time.time() + timeout_seconds
|
|
100
|
+
last_size = -1
|
|
101
|
+
stable_count = 0
|
|
102
|
+
|
|
103
|
+
while time.time() < end_time:
|
|
104
|
+
current_size = self._queue.qsize()
|
|
105
|
+
|
|
106
|
+
# Check if we're making progress
|
|
107
|
+
if current_size == 0 and self._processing_count == 0:
|
|
108
|
+
# Queue is empty and nothing being processed
|
|
109
|
+
if stable_count >= 2: # Wait for 2 cycles to ensure stability
|
|
110
|
+
if DEBUG:
|
|
111
|
+
logger.debug("[EventQueue] Force flush complete - queue empty")
|
|
112
|
+
return
|
|
113
|
+
stable_count += 1
|
|
114
|
+
else:
|
|
115
|
+
stable_count = 0
|
|
116
|
+
|
|
117
|
+
# If size hasn't changed, we might be stuck
|
|
118
|
+
if current_size == last_size:
|
|
119
|
+
stable_count += 1
|
|
120
|
+
if stable_count >= 10: # 0.5 seconds of no progress
|
|
121
|
+
if DEBUG:
|
|
122
|
+
logger.debug(f"[EventQueue] Force flush timeout - queue stuck at {current_size}")
|
|
123
|
+
break
|
|
124
|
+
else:
|
|
125
|
+
stable_count = 0
|
|
126
|
+
last_size = current_size
|
|
127
|
+
|
|
128
|
+
# Signal flush again in case worker missed it
|
|
129
|
+
self._flush_event.set()
|
|
130
|
+
time.sleep(0.05)
|
|
131
|
+
|
|
132
|
+
if DEBUG:
|
|
133
|
+
logger.debug(f"[EventQueue] Force flush ended, remaining: {self._queue.qsize()}")
|
|
134
|
+
|
|
135
|
+
def is_empty(self) -> bool:
|
|
136
|
+
"""Check if queue is completely empty and no events are being processed."""
|
|
137
|
+
with self._processing_lock:
|
|
138
|
+
queue_empty = self._queue.empty()
|
|
139
|
+
not_processing = self._processing_count == 0
|
|
140
|
+
with self._deferred_lock:
|
|
141
|
+
deferred_empty = len(self._deferred_queue) == 0
|
|
142
|
+
return queue_empty and not_processing and deferred_empty
|
|
143
|
+
|
|
144
|
+
def shutdown(self) -> None:
|
|
145
|
+
"""Enhanced shutdown with better flushing."""
|
|
146
|
+
if DEBUG:
|
|
147
|
+
logger.debug(f"[EventQueue] Shutdown requested, queue size: {self._queue.qsize()}")
|
|
148
|
+
|
|
149
|
+
# First try to flush remaining events
|
|
150
|
+
self.force_flush(timeout_seconds=2.0)
|
|
151
|
+
|
|
152
|
+
# Wait for queue to be truly empty
|
|
153
|
+
wait_start = time.time()
|
|
154
|
+
while not self.is_empty() and (time.time() - wait_start < 2.0):
|
|
155
|
+
time.sleep(0.01)
|
|
156
|
+
|
|
157
|
+
if not self.is_empty() and DEBUG:
|
|
158
|
+
logger.debug(f"[EventQueue] Not empty after wait: queue={self._queue.qsize()}, processing={self._processing_count}, deferred={len(self._deferred_queue)}")
|
|
159
|
+
|
|
160
|
+
# Then signal stop
|
|
161
|
+
self._stopped.set()
|
|
162
|
+
self._flush_event.set() # Wake up worker
|
|
163
|
+
|
|
164
|
+
# Wait for worker with timeout
|
|
165
|
+
if self._worker and self._worker.is_alive():
|
|
166
|
+
self._worker.join(timeout=5.0) # Increased timeout
|
|
167
|
+
if self._worker.is_alive() and DEBUG:
|
|
168
|
+
logger.debug("[EventQueue] Worker thread did not terminate in time")
|
|
169
|
+
|
|
170
|
+
# --- Internals ---
|
|
171
|
+
def _start_worker(self) -> None:
|
|
172
|
+
if self._worker and self._worker.is_alive():
|
|
173
|
+
return
|
|
174
|
+
# Use configurable daemon mode
|
|
175
|
+
self._worker = threading.Thread(
|
|
176
|
+
target=self._run_loop,
|
|
177
|
+
name="LucidicEventQueue",
|
|
178
|
+
daemon=self._daemon_mode
|
|
179
|
+
)
|
|
180
|
+
self._worker.start()
|
|
181
|
+
if DEBUG:
|
|
182
|
+
logger.debug(f"[EventQueue] Started worker thread (daemon={self._daemon_mode})")
|
|
183
|
+
|
|
184
|
+
def _run_loop(self) -> None:
|
|
185
|
+
"""Main worker loop using queue.Queue for simpler implementation."""
|
|
186
|
+
while not self._stopped.is_set():
|
|
187
|
+
batch: List[Dict[str, Any]] = []
|
|
188
|
+
deadline = time.time() + (self.flush_interval_ms / 1000.0)
|
|
189
|
+
force_flush = False
|
|
190
|
+
|
|
191
|
+
# Collect batch up to flush_at_count or until deadline
|
|
192
|
+
while True:
|
|
193
|
+
# Check if flush was requested
|
|
194
|
+
if self._flush_event.is_set():
|
|
195
|
+
force_flush = True
|
|
196
|
+
self._flush_event.clear()
|
|
197
|
+
|
|
198
|
+
# During force flush, get ALL events
|
|
199
|
+
if force_flush:
|
|
200
|
+
# Drain entire queue when flushing
|
|
201
|
+
while not self._queue.empty():
|
|
202
|
+
try:
|
|
203
|
+
item = self._queue.get_nowait()
|
|
204
|
+
batch.append(item)
|
|
205
|
+
except queue.Empty:
|
|
206
|
+
break
|
|
207
|
+
# Process what we have
|
|
208
|
+
if batch:
|
|
209
|
+
break
|
|
210
|
+
# If still empty after draining, wait a bit for stragglers
|
|
211
|
+
if not batch:
|
|
212
|
+
time.sleep(0.01)
|
|
213
|
+
continue
|
|
214
|
+
else:
|
|
215
|
+
# Normal batching logic
|
|
216
|
+
if len(batch) >= self.flush_at_count:
|
|
217
|
+
break
|
|
218
|
+
|
|
219
|
+
remaining_time = deadline - time.time()
|
|
220
|
+
if remaining_time <= 0:
|
|
221
|
+
break
|
|
222
|
+
|
|
223
|
+
try:
|
|
224
|
+
# Wait for item with timeout
|
|
225
|
+
timeout = min(remaining_time, 0.05) # Check more frequently
|
|
226
|
+
item = self._queue.get(block=True, timeout=timeout)
|
|
227
|
+
batch.append(item)
|
|
228
|
+
except queue.Empty:
|
|
229
|
+
# Check if stopped
|
|
230
|
+
if self._stopped.is_set():
|
|
231
|
+
# Drain remaining queue on shutdown
|
|
232
|
+
while not self._queue.empty():
|
|
233
|
+
try:
|
|
234
|
+
batch.append(self._queue.get_nowait())
|
|
235
|
+
except queue.Empty:
|
|
236
|
+
break
|
|
237
|
+
break
|
|
238
|
+
# If we have events and deadline passed, process them
|
|
239
|
+
if batch and time.time() >= deadline:
|
|
240
|
+
break
|
|
241
|
+
|
|
242
|
+
# Process batch if we have events
|
|
243
|
+
if batch:
|
|
244
|
+
with self._processing_lock:
|
|
245
|
+
self._processing_count = len(batch)
|
|
246
|
+
try:
|
|
247
|
+
self._process_batch(batch)
|
|
248
|
+
except Exception as e:
|
|
249
|
+
if DEBUG:
|
|
250
|
+
logger.debug(f"[EventQueue] Batch processing error: {e}")
|
|
251
|
+
finally:
|
|
252
|
+
with self._processing_lock:
|
|
253
|
+
self._processing_count = 0
|
|
254
|
+
|
|
255
|
+
# Final drain on shutdown
|
|
256
|
+
final_batch = []
|
|
257
|
+
while not self._queue.empty():
|
|
258
|
+
try:
|
|
259
|
+
final_batch.append(self._queue.get_nowait())
|
|
260
|
+
except queue.Empty:
|
|
261
|
+
break
|
|
262
|
+
if final_batch:
|
|
263
|
+
with self._processing_lock:
|
|
264
|
+
self._processing_count = len(final_batch)
|
|
265
|
+
try:
|
|
266
|
+
self._process_batch(final_batch)
|
|
267
|
+
except Exception:
|
|
268
|
+
pass
|
|
269
|
+
finally:
|
|
270
|
+
with self._processing_lock:
|
|
271
|
+
self._processing_count = 0
|
|
272
|
+
|
|
273
|
+
def _process_batch(self, batch: List[Dict[str, Any]]) -> None:
|
|
274
|
+
"""Process a batch of events with parent-child ordering."""
|
|
275
|
+
if DEBUG:
|
|
276
|
+
logger.debug(f"[EventQueue] Processing batch of {len(batch)} events")
|
|
277
|
+
|
|
278
|
+
# Add any deferred events back to the batch
|
|
279
|
+
with self._deferred_lock:
|
|
280
|
+
if self._deferred_queue:
|
|
281
|
+
batch.extend(self._deferred_queue)
|
|
282
|
+
self._deferred_queue.clear()
|
|
283
|
+
|
|
284
|
+
# Reorder within batch to respect parent -> child when both present
|
|
285
|
+
id_to_evt = {e.get("client_event_id"): e for e in batch if e.get("client_event_id")}
|
|
286
|
+
remaining = list(batch)
|
|
287
|
+
ordered: List[Dict[str, Any]] = []
|
|
288
|
+
|
|
289
|
+
processed_ids: set[str] = set()
|
|
290
|
+
max_iterations = len(remaining) * 2 if remaining else 0
|
|
291
|
+
iters = 0
|
|
292
|
+
while remaining and iters < max_iterations:
|
|
293
|
+
iters += 1
|
|
294
|
+
progressed = False
|
|
295
|
+
next_remaining: List[Dict[str, Any]] = []
|
|
296
|
+
for ev in remaining:
|
|
297
|
+
parent_id = ev.get("client_parent_event_id")
|
|
298
|
+
if not parent_id or (parent_id not in id_to_evt) or (parent_id in processed_ids) or (parent_id in self._sent_ids):
|
|
299
|
+
ordered.append(ev)
|
|
300
|
+
if ev.get("client_event_id"):
|
|
301
|
+
processed_ids.add(ev["client_event_id"])
|
|
302
|
+
progressed = True
|
|
303
|
+
else:
|
|
304
|
+
next_remaining.append(ev)
|
|
305
|
+
remaining = next_remaining if progressed else []
|
|
306
|
+
if not progressed and next_remaining:
|
|
307
|
+
# Break potential cycles by appending the rest
|
|
308
|
+
ordered.extend(next_remaining)
|
|
309
|
+
remaining = []
|
|
310
|
+
|
|
311
|
+
for event_request in ordered:
|
|
312
|
+
if DEBUG:
|
|
313
|
+
logger.debug(f"[EventQueue] Sending event {event_request.get('client_event_id')}")
|
|
314
|
+
|
|
315
|
+
# Retry up to 3 times with exponential backoff
|
|
316
|
+
attempt = 0
|
|
317
|
+
backoff = 0.25
|
|
318
|
+
while attempt < 3:
|
|
319
|
+
try:
|
|
320
|
+
if self._send_event(event_request):
|
|
321
|
+
# Mark as sent if it has id
|
|
322
|
+
ev_id = event_request.get("client_event_id")
|
|
323
|
+
if ev_id:
|
|
324
|
+
self._sent_ids.add(ev_id)
|
|
325
|
+
if DEBUG:
|
|
326
|
+
logger.debug(f"[EventQueue] Successfully sent event {ev_id}")
|
|
327
|
+
break
|
|
328
|
+
except Exception as e:
|
|
329
|
+
attempt += 1
|
|
330
|
+
if DEBUG:
|
|
331
|
+
logger.debug(f"[EventQueue] Failed to send event (attempt {attempt}/3): {e}")
|
|
332
|
+
if attempt >= 3:
|
|
333
|
+
logger.error(f"[EventQueue] Failed to send event after 3 attempts: {event_request.get('client_event_id')}")
|
|
334
|
+
break
|
|
335
|
+
time.sleep(backoff)
|
|
336
|
+
backoff *= 2
|
|
337
|
+
|
|
338
|
+
def _send_event(self, event_request: Dict[str, Any]) -> bool:
|
|
339
|
+
"""Send event with enhanced error handling."""
|
|
340
|
+
try:
|
|
341
|
+
# If parent exists and not yet sent, defer up to 5 times
|
|
342
|
+
parent_id = event_request.get("client_parent_event_id")
|
|
343
|
+
if parent_id and parent_id not in self._sent_ids:
|
|
344
|
+
# Defer unless we've tried several times already
|
|
345
|
+
if event_request.get("defer_count", 0) < 5:
|
|
346
|
+
event_request["defer_count"] = event_request.get("defer_count", 0) + 1
|
|
347
|
+
# Add to deferred queue for next batch
|
|
348
|
+
with self._deferred_lock:
|
|
349
|
+
self._deferred_queue.append(event_request)
|
|
350
|
+
return True
|
|
351
|
+
|
|
352
|
+
# Offload large payloads to blob storage
|
|
353
|
+
payload = event_request.get("payload", {})
|
|
354
|
+
raw_bytes = json.dumps(payload, separators=(",", ":"), ensure_ascii=False).encode("utf-8")
|
|
355
|
+
should_offload = len(raw_bytes) > self.blob_threshold
|
|
356
|
+
|
|
357
|
+
if DEBUG:
|
|
358
|
+
logger.debug(f"[EventQueue] Event size: {len(raw_bytes)} bytes, offload: {should_offload}")
|
|
359
|
+
|
|
360
|
+
send_body: Dict[str, Any] = dict(event_request)
|
|
361
|
+
if should_offload:
|
|
362
|
+
send_body["needs_blob"] = True
|
|
363
|
+
send_body["payload"] = self._to_preview(send_body.get("type"), payload)
|
|
364
|
+
else:
|
|
365
|
+
send_body["needs_blob"] = False
|
|
366
|
+
|
|
367
|
+
if VERBOSE and not should_offload:
|
|
368
|
+
logger.debug(f"[EventQueue] Sending body: {json.dumps(send_body, indent=2)}")
|
|
369
|
+
|
|
370
|
+
# POST /events
|
|
371
|
+
response = self._client.make_request("events", "POST", send_body)
|
|
372
|
+
|
|
373
|
+
# If offloading, synchronously upload compressed payload
|
|
374
|
+
if should_offload:
|
|
375
|
+
blob_url = response.get("blob_url")
|
|
376
|
+
if blob_url:
|
|
377
|
+
compressed = self._compress_json(payload)
|
|
378
|
+
self._upload_blob(blob_url, compressed)
|
|
379
|
+
else:
|
|
380
|
+
logger.error("[EventQueue] No blob_url received for large payload")
|
|
381
|
+
return False
|
|
382
|
+
|
|
383
|
+
return True
|
|
384
|
+
|
|
385
|
+
except Exception as e:
|
|
386
|
+
logger.error(f"[EventQueue] Failed to send event: {e}")
|
|
387
|
+
raise # Re-raise for retry logic
|
|
388
|
+
|
|
389
|
+
# --- Helpers for blob handling ---
|
|
390
|
+
@staticmethod
|
|
391
|
+
def _compress_json(payload: Dict[str, Any]) -> bytes:
|
|
392
|
+
raw = json.dumps(payload, separators=(",", ":"), ensure_ascii=False).encode("utf-8")
|
|
393
|
+
buf = io.BytesIO()
|
|
394
|
+
with gzip.GzipFile(fileobj=buf, mode="wb") as gz:
|
|
395
|
+
gz.write(raw)
|
|
396
|
+
return buf.getvalue()
|
|
397
|
+
|
|
398
|
+
def _upload_blob(self, blob_url: str, data: bytes) -> None:
|
|
399
|
+
"""Upload blob with proper error handling and logging."""
|
|
400
|
+
try:
|
|
401
|
+
if DEBUG:
|
|
402
|
+
logger.debug(f"[EventQueue] Uploading blob, size: {len(data)} bytes")
|
|
403
|
+
|
|
404
|
+
headers = {"Content-Type": "application/json", "Content-Encoding": "gzip"}
|
|
405
|
+
resp = requests.put(blob_url, data=data, headers=headers)
|
|
406
|
+
resp.raise_for_status()
|
|
407
|
+
|
|
408
|
+
if DEBUG:
|
|
409
|
+
logger.debug(f"[EventQueue] Blob upload successful, status: {resp.status_code}")
|
|
410
|
+
|
|
411
|
+
except Exception as e:
|
|
412
|
+
# Log error but don't fail silently
|
|
413
|
+
logger.error(f"[EventQueue] Blob upload failed: {e}")
|
|
414
|
+
# Re-raise to trigger retry logic
|
|
415
|
+
raise
|
|
416
|
+
|
|
417
|
+
@staticmethod
|
|
418
|
+
def _to_preview(event_type: Optional[str], payload: Dict[str, Any]) -> Dict[str, Any]:
|
|
419
|
+
t = (event_type or "generic").lower()
|
|
420
|
+
try:
|
|
421
|
+
if t == "llm_generation":
|
|
422
|
+
req = payload.get("request", {})
|
|
423
|
+
usage = payload.get("usage", {})
|
|
424
|
+
messages = req.get("messages", [])[:5]
|
|
425
|
+
output = payload.get("response", {}).get("output", {})
|
|
426
|
+
compressed_messages = []
|
|
427
|
+
for i, m in enumerate(messages):
|
|
428
|
+
compressed_message_item = {}
|
|
429
|
+
for k, v in m.items():
|
|
430
|
+
compressed_message_item[k] = str(v)[:200] if v else None
|
|
431
|
+
compressed_messages.append(compressed_message_item)
|
|
432
|
+
return {
|
|
433
|
+
"request": {
|
|
434
|
+
"model": req.get("model")[:200] if req.get("model") else None,
|
|
435
|
+
"provider": req.get("provider")[:200] if req.get("provider") else None,
|
|
436
|
+
"messages": compressed_messages,
|
|
437
|
+
},
|
|
438
|
+
"usage": {
|
|
439
|
+
k: usage.get(k) for k in ("input_tokens", "output_tokens", "cost") if k in usage
|
|
440
|
+
},
|
|
441
|
+
"response": {
|
|
442
|
+
"output": str(output)[:200] if output else None,
|
|
443
|
+
}
|
|
444
|
+
}
|
|
445
|
+
if t == "function_call":
|
|
446
|
+
args = payload.get("arguments")
|
|
447
|
+
truncated_args = (
|
|
448
|
+
{k: (str(v)[:200] if v is not None else None) for k, v in args.items()}
|
|
449
|
+
if isinstance(args, dict)
|
|
450
|
+
else (str(args)[:200] if args is not None else None)
|
|
451
|
+
)
|
|
452
|
+
return {
|
|
453
|
+
"function_name": (payload.get("function_name")[:200] if payload.get("function_name") else None),
|
|
454
|
+
"arguments": truncated_args,
|
|
455
|
+
}
|
|
456
|
+
if t == "error_traceback":
|
|
457
|
+
return {
|
|
458
|
+
"error": (payload.get("error")[:200] if payload.get("error") else None),
|
|
459
|
+
}
|
|
460
|
+
if t == "generic":
|
|
461
|
+
return {
|
|
462
|
+
"details": (payload.get("details")[:200] if payload.get("details") else None),
|
|
463
|
+
}
|
|
464
|
+
except Exception:
|
|
465
|
+
pass
|
|
466
|
+
return {"details": "preview_unavailable"}
|