lucidicai 2.0.2__py3-none-any.whl → 2.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. lucidicai/__init__.py +367 -899
  2. lucidicai/api/__init__.py +1 -0
  3. lucidicai/api/client.py +218 -0
  4. lucidicai/api/resources/__init__.py +1 -0
  5. lucidicai/api/resources/dataset.py +192 -0
  6. lucidicai/api/resources/event.py +88 -0
  7. lucidicai/api/resources/session.py +126 -0
  8. lucidicai/core/__init__.py +1 -0
  9. lucidicai/core/config.py +223 -0
  10. lucidicai/core/errors.py +60 -0
  11. lucidicai/core/types.py +35 -0
  12. lucidicai/sdk/__init__.py +1 -0
  13. lucidicai/sdk/context.py +231 -0
  14. lucidicai/sdk/decorators.py +187 -0
  15. lucidicai/sdk/error_boundary.py +299 -0
  16. lucidicai/sdk/event.py +126 -0
  17. lucidicai/sdk/event_builder.py +304 -0
  18. lucidicai/sdk/features/__init__.py +1 -0
  19. lucidicai/sdk/features/dataset.py +605 -0
  20. lucidicai/sdk/features/feature_flag.py +383 -0
  21. lucidicai/sdk/init.py +361 -0
  22. lucidicai/sdk/shutdown_manager.py +302 -0
  23. lucidicai/telemetry/context_bridge.py +82 -0
  24. lucidicai/telemetry/context_capture_processor.py +25 -9
  25. lucidicai/telemetry/litellm_bridge.py +20 -24
  26. lucidicai/telemetry/lucidic_exporter.py +99 -60
  27. lucidicai/telemetry/openai_patch.py +295 -0
  28. lucidicai/telemetry/openai_uninstrument.py +87 -0
  29. lucidicai/telemetry/telemetry_init.py +16 -1
  30. lucidicai/telemetry/utils/model_pricing.py +278 -0
  31. lucidicai/utils/__init__.py +1 -0
  32. lucidicai/utils/images.py +337 -0
  33. lucidicai/utils/logger.py +168 -0
  34. lucidicai/utils/queue.py +393 -0
  35. {lucidicai-2.0.2.dist-info → lucidicai-2.1.1.dist-info}/METADATA +1 -1
  36. {lucidicai-2.0.2.dist-info → lucidicai-2.1.1.dist-info}/RECORD +38 -9
  37. {lucidicai-2.0.2.dist-info → lucidicai-2.1.1.dist-info}/WHEEL +0 -0
  38. {lucidicai-2.0.2.dist-info → lucidicai-2.1.1.dist-info}/top_level.txt +0 -0
lucidicai/sdk/init.py ADDED
@@ -0,0 +1,361 @@
1
+ """SDK initialization module.
2
+
3
+ This module handles SDK initialization, separating concerns from the main __init__.py
4
+ """
5
+ import uuid
6
+ from typing import List, Optional
7
+ import asyncio
8
+ import threading
9
+ from weakref import WeakKeyDictionary
10
+
11
+ from ..api.client import HttpClient
12
+ from ..api.resources.event import EventResource
13
+ from ..api.resources.session import SessionResource
14
+ from ..api.resources.dataset import DatasetResource
15
+ from ..core.config import SDKConfig, get_config, set_config
16
+ from ..utils.queue import EventQueue
17
+ from ..utils.logger import debug, info, warning, error, truncate_id
18
+ from .context import set_active_session, current_session_id
19
+ from .error_boundary import register_cleanup_handler
20
+ from .shutdown_manager import get_shutdown_manager, SessionState
21
+ from ..telemetry.telemetry_init import instrument_providers
22
+ from opentelemetry.sdk.trace import TracerProvider
23
+
24
+
25
+ class SDKState:
26
+ """Container for SDK runtime state."""
27
+
28
+ def __init__(self):
29
+ self.http: Optional[HttpClient] = None
30
+ self.event_queue: Optional[EventQueue] = None
31
+ self.session_id: Optional[str] = None
32
+ self.tracer_provider: Optional[TracerProvider] = None
33
+ self.resources = {}
34
+ # Task-local storage for async task isolation
35
+ self.task_sessions: WeakKeyDictionary = WeakKeyDictionary()
36
+ # Thread-local storage for thread isolation
37
+ self.thread_local = threading.local()
38
+
39
+ def reset(self):
40
+ """Reset SDK state."""
41
+ # Shutdown telemetry first to ensure all spans are exported
42
+ if self.tracer_provider:
43
+ try:
44
+ # Force flush all pending spans with 5 second timeout
45
+ debug("[SDK] Flushing OpenTelemetry spans...")
46
+ self.tracer_provider.force_flush(timeout_millis=5000)
47
+ # Shutdown the tracer provider and all processors
48
+ self.tracer_provider.shutdown()
49
+ debug("[SDK] TracerProvider shutdown complete")
50
+ except Exception as e:
51
+ error(f"[SDK] Error shutting down TracerProvider: {e}")
52
+
53
+ if self.event_queue:
54
+ self.event_queue.shutdown()
55
+ if self.http:
56
+ self.http.close()
57
+
58
+ self.http = None
59
+ self.event_queue = None
60
+ self.session_id = None
61
+ self.tracer_provider = None
62
+ self.resources = {}
63
+ self.task_sessions.clear()
64
+ # Clear thread-local storage for current thread
65
+ if hasattr(self.thread_local, 'session_id'):
66
+ delattr(self.thread_local, 'session_id')
67
+
68
+
69
+ # Global SDK state
70
+ _sdk_state = SDKState()
71
+
72
+
73
+ def init(
74
+ session_name: Optional[str] = None,
75
+ session_id: Optional[str] = None,
76
+ api_key: Optional[str] = None,
77
+ agent_id: Optional[str] = None,
78
+ task: Optional[str] = None,
79
+ providers: Optional[List[str]] = None,
80
+ production_monitoring: bool = False,
81
+ experiment_id: Optional[str] = None,
82
+ evaluators: Optional[List] = None,
83
+ tags: Optional[List] = None,
84
+ datasetitem_id: Optional[str] = None,
85
+ masking_function: Optional[callable] = None,
86
+ auto_end: bool = True,
87
+ capture_uncaught: bool = True,
88
+ ) -> str:
89
+ """Initialize the Lucidic SDK.
90
+
91
+ Args:
92
+ session_name: Name for the session
93
+ session_id: Custom session ID (optional)
94
+ api_key: API key (uses env if not provided)
95
+ agent_id: Agent ID (uses env if not provided)
96
+ task: Task description
97
+ providers: List of telemetry providers to instrument
98
+ production_monitoring: Enable production monitoring
99
+ experiment_id: Experiment ID to associate with session
100
+ evaluators: Ealuators to use
101
+ tags: Session tags
102
+ datasetitem_id: Dataset item ID
103
+ masking_function: Function to mask sensitive data
104
+ auto_end: Automatically end session on exit
105
+ capture_uncaught: Capture uncaught exceptions
106
+
107
+ Returns:
108
+ Session ID
109
+
110
+ Raises:
111
+ APIKeyVerificationError: If API credentials are invalid
112
+ """
113
+ global _sdk_state
114
+
115
+ # Create or update configuration
116
+ config = SDKConfig.from_env(
117
+ api_key=api_key,
118
+ agent_id=agent_id,
119
+ auto_end=auto_end,
120
+ production_monitoring=production_monitoring
121
+ )
122
+
123
+ if providers:
124
+ config.telemetry.providers = providers
125
+
126
+ config.error_handling.capture_uncaught = capture_uncaught
127
+
128
+ # Validate configuration
129
+ errors = config.validate()
130
+ if errors:
131
+ raise ValueError(f"Invalid configuration: {', '.join(errors)}")
132
+
133
+ # Set global config
134
+ set_config(config)
135
+
136
+ # Initialize HTTP client
137
+ if not _sdk_state.http:
138
+ debug("[SDK] Initializing HTTP client")
139
+ _sdk_state.http = HttpClient(config)
140
+
141
+ # Initialize resources
142
+ if not _sdk_state.resources:
143
+ _sdk_state.resources = {
144
+ 'events': EventResource(_sdk_state.http),
145
+ 'sessions': SessionResource(_sdk_state.http),
146
+ 'datasets': DatasetResource(_sdk_state.http)
147
+ }
148
+
149
+ # Initialize event queue
150
+ if not _sdk_state.event_queue:
151
+ debug("[SDK] Initializing event queue")
152
+ # Create a mock client object for backward compatibility
153
+ # The queue needs a client with make_request method
154
+ class ClientAdapter:
155
+ def make_request(self, endpoint, method, data):
156
+ return _sdk_state.http.request(method, endpoint, json=data)
157
+
158
+ _sdk_state.event_queue = EventQueue(ClientAdapter())
159
+
160
+ # Register cleanup handler
161
+ register_cleanup_handler(lambda: _sdk_state.event_queue.force_flush())
162
+ debug("[SDK] Event queue initialized and cleanup handler registered")
163
+
164
+ # Create or retrieve session
165
+ if session_id:
166
+ # Use provided session ID
167
+ real_session_id = session_id
168
+ else:
169
+ # Create new session
170
+ real_session_id = str(uuid.uuid4())
171
+
172
+ # Create session via API - only send non-None values
173
+ session_params = {
174
+ 'session_id': real_session_id,
175
+ 'session_name': session_name or 'Unnamed Session',
176
+ 'agent_id': config.agent_id,
177
+ }
178
+
179
+ # Only add optional fields if they have values
180
+ if task:
181
+ session_params['task'] = task
182
+ if tags:
183
+ session_params['tags'] = tags
184
+ if experiment_id:
185
+ session_params['experiment_id'] = experiment_id
186
+ if datasetitem_id:
187
+ session_params['datasetitem_id'] = datasetitem_id
188
+ if evaluators:
189
+ session_params['evaluators'] = evaluators
190
+ if production_monitoring:
191
+ session_params['production_monitoring'] = production_monitoring
192
+
193
+ debug(f"[SDK] Creating session with params: {session_params}")
194
+ session_resource = _sdk_state.resources['sessions']
195
+ session_data = session_resource.create_session(session_params)
196
+
197
+ # Use the session_id returned by the backend
198
+ real_session_id = session_data.get('session_id', real_session_id)
199
+ _sdk_state.session_id = real_session_id
200
+
201
+ info(f"[SDK] Session created: {truncate_id(real_session_id)} (name: {session_name or 'Unnamed Session'})")
202
+
203
+ # Set active session in context
204
+ set_active_session(real_session_id)
205
+
206
+ # Register session with shutdown manager
207
+ debug(f"[SDK] Registering session with shutdown manager (auto_end={auto_end})")
208
+ shutdown_manager = get_shutdown_manager()
209
+ session_state = SessionState(
210
+ session_id=real_session_id,
211
+ http_client=_sdk_state.resources, # Pass resources dict which has sessions
212
+ event_queue=_sdk_state.event_queue,
213
+ auto_end=auto_end
214
+ )
215
+ shutdown_manager.register_session(real_session_id, session_state)
216
+
217
+ # Initialize telemetry if providers specified
218
+ if providers:
219
+ debug(f"[SDK] Initializing telemetry for providers: {providers}")
220
+ _initialize_telemetry(providers)
221
+
222
+ return real_session_id
223
+
224
+
225
+ def _initialize_telemetry(providers: List[str]) -> None:
226
+ """Initialize telemetry providers.
227
+
228
+ Args:
229
+ providers: List of provider names
230
+ """
231
+ global _sdk_state
232
+
233
+ if not _sdk_state.tracer_provider:
234
+ # Import here to avoid circular dependency
235
+ from ..telemetry.lucidic_exporter import LucidicSpanExporter
236
+ from ..telemetry.context_capture_processor import ContextCaptureProcessor
237
+ from opentelemetry.sdk.trace.export import BatchSpanProcessor
238
+
239
+ # Create tracer provider with our processors
240
+ _sdk_state.tracer_provider = TracerProvider()
241
+
242
+ # Add context capture processor FIRST to capture context before export
243
+ context_processor = ContextCaptureProcessor()
244
+ _sdk_state.tracer_provider.add_span_processor(context_processor)
245
+
246
+ # Add exporter processor
247
+ exporter = LucidicSpanExporter()
248
+ export_processor = BatchSpanProcessor(exporter)
249
+ _sdk_state.tracer_provider.add_span_processor(export_processor)
250
+
251
+ # Instrument providers
252
+ instrument_providers(providers, _sdk_state.tracer_provider, {})
253
+
254
+ info(f"[Telemetry] Initialized for providers: {providers}")
255
+
256
+
257
+ def set_task_session(session_id: str) -> None:
258
+ """Set session ID for current async task (if in async context)."""
259
+ try:
260
+ if task := asyncio.current_task():
261
+ _sdk_state.task_sessions[task] = session_id
262
+ debug(f"[SDK] Set task-local session {truncate_id(session_id)} for task {task.get_name()}")
263
+ except RuntimeError:
264
+ # Not in async context, ignore
265
+ pass
266
+
267
+
268
+ def clear_task_session() -> None:
269
+ """Clear session ID for current async task (if in async context)."""
270
+ try:
271
+ if task := asyncio.current_task():
272
+ _sdk_state.task_sessions.pop(task, None)
273
+ debug(f"[SDK] Cleared task-local session for task {task.get_name()}")
274
+ except RuntimeError:
275
+ # Not in async context, ignore
276
+ pass
277
+
278
+
279
+ def set_thread_session(session_id: str) -> None:
280
+ """Set session ID for current thread.
281
+
282
+ This provides true thread-local storage that doesn't inherit from parent thread.
283
+ """
284
+ _sdk_state.thread_local.session_id = session_id
285
+ current_thread = threading.current_thread()
286
+ debug(f"[SDK] Set thread-local session {truncate_id(session_id)} for thread {current_thread.name}")
287
+
288
+
289
+ def clear_thread_session() -> None:
290
+ """Clear session ID for current thread."""
291
+ if hasattr(_sdk_state.thread_local, 'session_id'):
292
+ delattr(_sdk_state.thread_local, 'session_id')
293
+ current_thread = threading.current_thread()
294
+ debug(f"[SDK] Cleared thread-local session for thread {current_thread.name}")
295
+
296
+
297
+ def get_thread_session() -> Optional[str]:
298
+ """Get session ID from thread-local storage."""
299
+ return getattr(_sdk_state.thread_local, 'session_id', None)
300
+
301
+
302
+ def is_main_thread() -> bool:
303
+ """Check if we're running in the main thread."""
304
+ return threading.current_thread() is threading.main_thread()
305
+
306
+
307
+ def get_session_id() -> Optional[str]:
308
+ """Get the current session ID.
309
+
310
+ Priority:
311
+ 1. Task-local session (for async tasks)
312
+ 2. Thread-local session (for threads) - NO FALLBACK for threads
313
+ 3. SDK state session (for main thread)
314
+ 4. Context variable session (fallback for main thread only)
315
+ """
316
+ # First check task-local storage for async isolation
317
+ try:
318
+ if task := asyncio.current_task():
319
+ if task_session := _sdk_state.task_sessions.get(task):
320
+ debug(f"[SDK] Using task-local session {truncate_id(task_session)}")
321
+ return task_session
322
+ except RuntimeError:
323
+ # Not in async context
324
+ pass
325
+
326
+ # Check if we're in a thread
327
+ if not is_main_thread():
328
+ # For threads, ONLY use thread-local storage - no fallback!
329
+ # This prevents inheriting the parent thread's session
330
+ thread_session = get_thread_session()
331
+ if thread_session:
332
+ debug(f"[SDK] Using thread-local session {truncate_id(thread_session)}")
333
+ else:
334
+ debug(f"[SDK] Thread {threading.current_thread().name} has no thread-local session")
335
+ return thread_session # Return None if not set - don't fall back!
336
+
337
+ # For main thread only: fall back to SDK state or context variable
338
+ return _sdk_state.session_id or current_session_id.get()
339
+
340
+
341
+ def get_http() -> Optional[HttpClient]:
342
+ """Get the HTTP client instance."""
343
+ return _sdk_state.http
344
+
345
+
346
+ def get_event_queue() -> Optional[EventQueue]:
347
+ """Get the event queue instance."""
348
+ return _sdk_state.event_queue
349
+
350
+
351
+ def get_resources() -> dict:
352
+ """Get API resource instances."""
353
+ return _sdk_state.resources
354
+
355
+
356
+ def clear_state() -> None:
357
+ """Clear SDK state (for testing)."""
358
+ global _sdk_state
359
+ debug("[SDK] Clearing SDK state")
360
+ _sdk_state.reset()
361
+ _sdk_state = SDKState()
@@ -0,0 +1,302 @@
1
+ """Shutdown manager for graceful cleanup.
2
+
3
+ Coordinates shutdown across all active sessions, ensuring proper cleanup
4
+ on process exit. Inspired by TypeScript SDK's shutdown-manager.ts.
5
+ """
6
+ import atexit
7
+ import signal
8
+ import sys
9
+ import threading
10
+ import time
11
+ from typing import Dict, Optional, Set, Callable
12
+ from dataclasses import dataclass
13
+
14
+ from ..utils.logger import debug, info, warning, error, truncate_id
15
+
16
+
17
+ @dataclass
18
+ class SessionState:
19
+ """State information for an active session."""
20
+ session_id: str
21
+ http_client: Optional[object] = None
22
+ event_queue: Optional[object] = None
23
+ is_shutting_down: bool = False
24
+ auto_end: bool = True
25
+
26
+
27
+ class ShutdownManager:
28
+ """Singleton manager for coordinating shutdown across all active sessions.
29
+
30
+ Ensures process listeners are only registered once and all sessions
31
+ are properly ended on exit.
32
+ """
33
+
34
+ _instance: Optional['ShutdownManager'] = None
35
+ _lock = threading.Lock()
36
+
37
+ def __new__(cls):
38
+ if cls._instance is None:
39
+ with cls._lock:
40
+ if cls._instance is None:
41
+ cls._instance = super().__new__(cls)
42
+ cls._instance._initialized = False
43
+ return cls._instance
44
+
45
+ def __init__(self):
46
+ # only initialize once
47
+ if self._initialized:
48
+ return
49
+
50
+ self._initialized = True
51
+ self.active_sessions: Dict[str, SessionState] = {}
52
+ self.is_shutting_down = False
53
+ self.shutdown_complete = threading.Event()
54
+ self.listeners_registered = False
55
+ self._session_lock = threading.Lock()
56
+
57
+ debug("[ShutdownManager] Initialized")
58
+
59
+ def register_session(self, session_id: str, state: SessionState) -> None:
60
+ """Register a new active session.
61
+
62
+ Args:
63
+ session_id: Session identifier
64
+ state: Session state information
65
+ """
66
+ with self._session_lock:
67
+ debug(f"[ShutdownManager] Registering session {truncate_id(session_id)}, auto_end={state.auto_end}")
68
+ self.active_sessions[session_id] = state
69
+
70
+ # ensure listeners are registered
71
+ self._ensure_listeners_registered()
72
+
73
+ def unregister_session(self, session_id: str) -> None:
74
+ """Unregister a session after it ends.
75
+
76
+ Args:
77
+ session_id: Session identifier
78
+ """
79
+ with self._session_lock:
80
+ debug(f"[ShutdownManager] Unregistering session {truncate_id(session_id)}")
81
+ self.active_sessions.pop(session_id, None)
82
+
83
+ def get_active_session_count(self) -> int:
84
+ """Get count of active sessions."""
85
+ with self._session_lock:
86
+ return len(self.active_sessions)
87
+
88
+ def is_session_active(self, session_id: str) -> bool:
89
+ """Check if a session is active.
90
+
91
+ Args:
92
+ session_id: Session identifier
93
+
94
+ Returns:
95
+ True if session is active
96
+ """
97
+ with self._session_lock:
98
+ return session_id in self.active_sessions
99
+
100
+ def _ensure_listeners_registered(self) -> None:
101
+ """Register process exit listeners once."""
102
+ if self.listeners_registered:
103
+ return
104
+
105
+ self.listeners_registered = True
106
+ debug("[ShutdownManager] Registering global shutdown listeners (atexit, SIGINT, SIGTERM, uncaught exceptions)")
107
+
108
+ # register atexit handler for normal termination
109
+ atexit.register(self._handle_exit)
110
+
111
+ # register signal handlers for interrupts
112
+ signal.signal(signal.SIGINT, self._signal_handler)
113
+ signal.signal(signal.SIGTERM, self._signal_handler)
114
+
115
+ # register uncaught exception handler
116
+ sys.excepthook = self._exception_handler
117
+
118
+ def _signal_handler(self, signum, frame):
119
+ """Handle shutdown signals."""
120
+ info(f"[ShutdownManager] Received signal {signum}, initiating graceful shutdown")
121
+ self._handle_shutdown(f"signal_{signum}")
122
+ # exit after cleanup
123
+ sys.exit(0)
124
+
125
+ def _exception_handler(self, exc_type, exc_value, exc_traceback):
126
+ """Handle uncaught exceptions."""
127
+ # log the exception
128
+ error(f"[ShutdownManager] Uncaught exception: {exc_type.__name__}: {exc_value}")
129
+
130
+ # Create an error event for the uncaught exception
131
+ try:
132
+ from ..sdk.event import create_event
133
+ import traceback
134
+
135
+ error_message = f"{exc_type.__name__}: {exc_value}"
136
+ traceback_str = ''.join(traceback.format_exception(exc_type, exc_value, exc_traceback))
137
+
138
+ create_event(
139
+ type="error_traceback",
140
+ error=error_message,
141
+ traceback=traceback_str
142
+ )
143
+ debug(f"[ShutdownManager] Created error_traceback event for uncaught exception")
144
+ except Exception as e:
145
+ debug(f"[ShutdownManager] Failed to create error_traceback event: {e}")
146
+
147
+ # perform shutdown
148
+ self._handle_shutdown("uncaught_exception")
149
+
150
+ # call default handler
151
+ sys.__excepthook__(exc_type, exc_value, exc_traceback)
152
+
153
+ def _handle_exit(self):
154
+ """Handle normal process exit."""
155
+ debug("[ShutdownManager] Normal process exit triggered (atexit)")
156
+ self._handle_shutdown("atexit")
157
+
158
+ def _handle_shutdown(self, trigger: str) -> None:
159
+ """Coordinate shutdown of all sessions.
160
+
161
+ Args:
162
+ trigger: What triggered the shutdown
163
+ """
164
+ if self.is_shutting_down:
165
+ debug(f"[ShutdownManager] Already shutting down, ignoring {trigger}")
166
+ return
167
+
168
+ self.is_shutting_down = True
169
+
170
+ with self._session_lock:
171
+ session_count = len(self.active_sessions)
172
+ if session_count == 0:
173
+ debug("[ShutdownManager] No active sessions to clean up")
174
+ self.shutdown_complete.set()
175
+ return
176
+
177
+ info(f"[ShutdownManager] Shutdown initiated by {trigger}, ending {session_count} active session(s)")
178
+
179
+ # perform shutdown in separate thread to avoid deadlocks
180
+ import threading
181
+ shutdown_thread = threading.Thread(
182
+ target=self._perform_shutdown,
183
+ name="ShutdownThread"
184
+ )
185
+ shutdown_thread.daemon = True
186
+ shutdown_thread.start()
187
+
188
+ # wait for shutdown with timeout
189
+ if not self.shutdown_complete.wait(timeout=30):
190
+ warning("[ShutdownManager] Shutdown timeout after 30s")
191
+
192
+ def _perform_shutdown(self) -> None:
193
+ """Perform the actual shutdown of all sessions."""
194
+ debug("[ShutdownManager] _perform_shutdown thread started")
195
+ try:
196
+ sessions_to_end = []
197
+
198
+ with self._session_lock:
199
+ # collect sessions that need ending
200
+ for session_id, state in self.active_sessions.items():
201
+ if state.auto_end and not state.is_shutting_down:
202
+ state.is_shutting_down = True
203
+ sessions_to_end.append((session_id, state))
204
+
205
+ debug(f"[ShutdownManager] Found {len(sessions_to_end)} sessions to end")
206
+
207
+ # end all sessions
208
+ for session_id, state in sessions_to_end:
209
+ try:
210
+ debug(f"[ShutdownManager] Ending session {truncate_id(session_id)}")
211
+ self._end_session(session_id, state)
212
+ except Exception as e:
213
+ error(f"[ShutdownManager] Error ending session {truncate_id(session_id)}: {e}")
214
+
215
+ # Final telemetry shutdown after all sessions are ended
216
+ try:
217
+ from ..sdk.init import _sdk_state
218
+ if hasattr(_sdk_state, 'tracer_provider') and _sdk_state.tracer_provider:
219
+ debug("[ShutdownManager] Final OpenTelemetry shutdown")
220
+ try:
221
+ # Final flush and shutdown with longer timeout
222
+ _sdk_state.tracer_provider.force_flush(timeout_millis=5000)
223
+ _sdk_state.tracer_provider.shutdown()
224
+ debug("[ShutdownManager] OpenTelemetry shutdown complete")
225
+ except Exception as e:
226
+ error(f"[ShutdownManager] Error in final telemetry shutdown: {e}")
227
+ except ImportError:
228
+ pass # SDK not initialized
229
+
230
+ info("[ShutdownManager] Shutdown complete")
231
+
232
+ except Exception as e:
233
+ error(f"[ShutdownManager] Unexpected error in _perform_shutdown: {e}")
234
+ import traceback
235
+ error(f"[ShutdownManager] Traceback: {traceback.format_exc()}")
236
+ finally:
237
+ debug("[ShutdownManager] Setting shutdown_complete event")
238
+ self.shutdown_complete.set()
239
+
240
+ def _end_session(self, session_id: str, state: SessionState) -> None:
241
+ """End a single session with cleanup.
242
+
243
+ Args:
244
+ session_id: Session identifier
245
+ state: Session state
246
+ """
247
+ # Flush OpenTelemetry spans first (before event queue)
248
+ try:
249
+ # Get the global tracer provider if it exists
250
+ from ..sdk.init import _sdk_state
251
+ if hasattr(_sdk_state, 'tracer_provider') and _sdk_state.tracer_provider:
252
+ debug(f"[ShutdownManager] Flushing OpenTelemetry spans for session {truncate_id(session_id)}")
253
+ try:
254
+ # Force flush with 3 second timeout
255
+ _sdk_state.tracer_provider.force_flush(timeout_millis=3000)
256
+ except Exception as e:
257
+ error(f"[ShutdownManager] Error flushing spans: {e}")
258
+ except ImportError:
259
+ pass # SDK not initialized
260
+
261
+ # Skip event queue flush during shutdown to avoid hanging
262
+ # The queue worker is a daemon thread and will flush on its own
263
+ if state.event_queue:
264
+ debug(f"[ShutdownManager] Skipping event queue flush during shutdown for session {truncate_id(session_id)}")
265
+
266
+ # end session via API if http client present
267
+ if state.http_client and session_id:
268
+ try:
269
+ debug(f"[ShutdownManager] Ending session {truncate_id(session_id)} via API")
270
+ debug(f"[ShutdownManager] http_client type: {type(state.http_client)}, keys: {state.http_client.keys() if isinstance(state.http_client, dict) else 'not a dict'}")
271
+ # state.http_client is a resources dict with 'sessions' key
272
+ if isinstance(state.http_client, dict) and 'sessions' in state.http_client:
273
+ state.http_client['sessions'].end_session(
274
+ session_id,
275
+ is_successful=False,
276
+ session_eval_reason="Process shutdown"
277
+ )
278
+ debug(f"[ShutdownManager] Session {truncate_id(session_id)} ended via API")
279
+ else:
280
+ debug(f"[ShutdownManager] Cannot end session - http_client not properly configured")
281
+ except Exception as e:
282
+ error(f"[ShutdownManager] Error ending session via API: {e}")
283
+
284
+ # unregister the session
285
+ self.unregister_session(session_id)
286
+
287
+ def reset(self) -> None:
288
+ """Reset shutdown manager (for testing)."""
289
+ with self._session_lock:
290
+ self.active_sessions.clear()
291
+ self.is_shutting_down = False
292
+ self.shutdown_complete.clear()
293
+ # note: we don't reset listeners_registered as they persist
294
+
295
+
296
+ # global singleton instance
297
+ _shutdown_manager = ShutdownManager()
298
+
299
+
300
+ def get_shutdown_manager() -> ShutdownManager:
301
+ """Get the global shutdown manager instance."""
302
+ return _shutdown_manager