lucidicai 1.3.2__py3-none-any.whl → 2.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lucidicai/__init__.py CHANGED
@@ -2,31 +2,50 @@ import atexit
2
2
  import logging
3
3
  import os
4
4
  import signal
5
+ import sys
6
+ import traceback
7
+ import threading
5
8
  from typing import List, Literal, Optional
6
9
 
10
+ from dotenv import load_dotenv
11
+
7
12
  from .client import Client
8
13
  from .errors import APIKeyVerificationError, InvalidOperationError, LucidicNotInitializedError, PromptError
9
14
  from .event import Event
10
15
  from .session import Session
11
- from .step import Step
12
-
13
- # Import OpenTelemetry-based handlers
14
- from .telemetry.otel_handlers import (
15
- OTelOpenAIHandler,
16
- OTelAnthropicHandler,
17
- OTelLangChainHandler,
18
- OTelPydanticAIHandler,
19
- OTelOpenAIAgentsHandler,
20
- OTelLiteLLMHandler
21
- )
22
-
23
- # Import telemetry manager
24
- from .telemetry.otel_init import LucidicTelemetry
16
+ from .singleton import clear_singletons
25
17
 
26
18
  # Import decorators
27
- from .decorators import step, event
19
+ from .decorators import event
20
+ from .context import (
21
+ set_active_session,
22
+ bind_session,
23
+ bind_session_async,
24
+ clear_active_session,
25
+ current_session_id,
26
+ session,
27
+ session_async,
28
+ run_session,
29
+ run_in_session,
30
+ )
28
31
 
29
- ProviderType = Literal["openai", "anthropic", "langchain", "pydantic_ai", "openai_agents", "litellm"]
32
+ ProviderType = Literal[
33
+ "openai",
34
+ "anthropic",
35
+ "langchain",
36
+ "pydantic_ai",
37
+ "openai_agents",
38
+ "litellm",
39
+ "bedrock",
40
+ "aws_bedrock",
41
+ "amazon_bedrock",
42
+ "google",
43
+ "google_generativeai",
44
+ "vertexai",
45
+ "vertex_ai",
46
+ "cohere",
47
+ "groq",
48
+ ]
30
49
 
31
50
  # Configure logging
32
51
  logger = logging.getLogger("Lucidic")
@@ -38,63 +57,175 @@ if not logger.handlers:
38
57
  logger.setLevel(logging.INFO)
39
58
 
40
59
 
41
- def _setup_providers(client: Client, providers: List[ProviderType]) -> None:
42
- """Set up providers for the client, avoiding duplication
43
-
44
- Args:
45
- client: The Lucidic client instance
46
- providers: List of provider types to set up
60
+ # Crash/exit capture configuration
61
+ MAX_ERROR_DESCRIPTION_LENGTH = 16384
62
+ _crash_handlers_installed = False
63
+ _original_sys_excepthook = None
64
+ _original_threading_excepthook = None
65
+ _shutdown_lock = threading.Lock()
66
+ _is_shutting_down = False
67
+
68
+
69
+ def _mask_and_truncate(text: Optional[str]) -> Optional[str]:
70
+ """Apply masking and truncate to a safe length. Best effort; never raises."""
71
+ if text is None:
72
+ return text
73
+ try:
74
+ masked = Client().mask(text)
75
+ except Exception:
76
+ masked = text
77
+ if masked is None:
78
+ return masked
79
+ return masked[:MAX_ERROR_DESCRIPTION_LENGTH]
80
+
81
+
82
+ def _post_fatal_event(exit_code: int, description: str, extra: Optional[dict] = None) -> None:
83
+ """Best-effort creation of a final Lucidic event on fatal paths.
84
+
85
+ - Idempotent using a process-wide shutdown flag to avoid duplicates when
86
+ multiple hooks fire (signal + excepthook).
87
+ - Swallows all exceptions to avoid interfering with shutdown.
47
88
  """
48
- # Track which providers have been set up to avoid duplication
49
- setup_providers = set()
50
-
51
- # Initialize telemetry if using OpenTelemetry
52
- if providers:
53
- telemetry = LucidicTelemetry()
54
- if not telemetry.is_initialized():
55
- telemetry.initialize(agent_id=client.agent_id)
56
-
57
- for provider in providers:
58
- if provider in setup_providers:
59
- continue
89
+ global _is_shutting_down
90
+ with _shutdown_lock:
91
+ if _is_shutting_down:
92
+ return
93
+ _is_shutting_down = True
94
+ try:
95
+ client = Client()
96
+ session = getattr(client, 'session', None)
97
+ if not session or getattr(session, 'is_finished', False):
98
+ return
99
+ arguments = {"exit_code": exit_code}
100
+ if extra:
101
+ try:
102
+ arguments.update(extra)
103
+ except Exception:
104
+ pass
105
+
106
+ # Create a single immutable event describing the crash
107
+ session.create_event(
108
+ type="error_traceback",
109
+ error=_mask_and_truncate(description),
110
+ traceback="",
111
+ metadata={"exit_code": exit_code, **({} if not extra else extra)},
112
+ )
113
+ except Exception:
114
+ # Never raise during shutdown
115
+ pass
116
+
117
+
118
+ def _install_crash_handlers() -> None:
119
+ """Install global uncaught exception handlers (idempotent)."""
120
+ global _crash_handlers_installed, _original_sys_excepthook, _original_threading_excepthook
121
+ if _crash_handlers_installed:
122
+ return
123
+
124
+ _original_sys_excepthook = sys.excepthook
125
+
126
+ def _sys_hook(exc_type, exc, tb):
127
+ try:
128
+ trace_str = ''.join(traceback.format_exception(exc_type, exc, tb))
129
+ except Exception:
130
+ trace_str = f"Uncaught exception: {getattr(exc_type, '__name__', str(exc_type))}: {exc}"
131
+
132
+ # Emit final event and end the session as unsuccessful
133
+ _post_fatal_event(1, trace_str, {
134
+ "exception_type": getattr(exc_type, "__name__", str(exc_type)),
135
+ "exception_message": str(exc),
136
+ "thread_name": threading.current_thread().name,
137
+ })
138
+
139
+ # Follow proper shutdown sequence to prevent broken pipes
140
+ try:
141
+ client = Client()
142
+
143
+ # 1. Flush OpenTelemetry spans first
144
+ if hasattr(client, '_tracer_provider'):
145
+ try:
146
+ client._tracer_provider.force_flush(timeout_millis=5000)
147
+ except Exception:
148
+ pass
149
+
150
+ # 2. Flush and shutdown EventQueue (with active sessions cleared)
151
+ if hasattr(client, "_event_queue"):
152
+ try:
153
+ # Clear active sessions to allow shutdown
154
+ client._event_queue._active_sessions.clear()
155
+ client._event_queue.force_flush()
156
+ client._event_queue.shutdown(timeout=5.0)
157
+ except Exception:
158
+ pass
60
159
 
61
- if provider == "openai":
62
- client.set_provider(OTelOpenAIHandler())
63
- setup_providers.add("openai")
64
- elif provider == "anthropic":
65
- client.set_provider(OTelAnthropicHandler())
66
- setup_providers.add("anthropic")
67
- elif provider == "langchain":
68
- client.set_provider(OTelLangChainHandler())
69
- logger.info("For LangChain, make sure to create a handler and attach it to your top-level Agent class.")
70
- setup_providers.add("langchain")
71
- elif provider == "pydantic_ai":
72
- client.set_provider(OTelPydanticAIHandler())
73
- setup_providers.add("pydantic_ai")
74
- elif provider == "openai_agents":
160
+ # 3. Shutdown TracerProvider after EventQueue
161
+ if hasattr(client, '_tracer_provider'):
162
+ try:
163
+ client._tracer_provider.shutdown()
164
+ except Exception:
165
+ pass
166
+
167
+ # 4. Mark client as shutting down to prevent new requests
168
+ client._shutdown = True
169
+
170
+ # 5. Prevent auto_end double work
75
171
  try:
76
- client.set_provider(OTelOpenAIAgentsHandler())
77
- setup_providers.add("openai_agents")
78
- except Exception as e:
79
- logger.error(f"Failed to set up OpenAI Agents provider: {e}")
80
- raise
81
- elif provider == "litellm":
82
- client.set_provider(OTelLiteLLMHandler())
83
- setup_providers.add("litellm")
172
+ client.auto_end = False
173
+ except Exception:
174
+ pass
175
+
176
+ # 6. End session explicitly as unsuccessful
177
+ end_session()
178
+
179
+ except Exception:
180
+ pass
181
+
182
+ # Chain to original to preserve default printing/behavior
183
+ try:
184
+ _original_sys_excepthook(exc_type, exc, tb)
185
+ except Exception:
186
+ # Avoid recursion/errors in fatal path
187
+ pass
188
+
189
+ sys.excepthook = _sys_hook
190
+
191
+ # For Python 3.8+, only treat main-thread exceptions as fatal (process-exiting)
192
+ if hasattr(threading, 'excepthook'):
193
+ _original_threading_excepthook = threading.excepthook
194
+
195
+ def _thread_hook(args):
196
+ try:
197
+ if args.thread is threading.main_thread():
198
+ # For main thread exceptions, use full shutdown sequence
199
+ _sys_hook(args.exc_type, args.exc_value, args.exc_traceback)
200
+ else:
201
+ # For non-main threads, just flush spans without full shutdown
202
+ try:
203
+ client = Client()
204
+ # Flush any pending spans from this thread
205
+ if hasattr(client, '_tracer_provider'):
206
+ client._tracer_provider.force_flush(timeout_millis=1000)
207
+ # Force flush events but don't shutdown
208
+ if hasattr(client, "_event_queue"):
209
+ client._event_queue.force_flush()
210
+ except Exception:
211
+ pass
212
+ except Exception:
213
+ pass
214
+ try:
215
+ _original_threading_excepthook(args)
216
+ except Exception:
217
+ pass
218
+
219
+ threading.excepthook = _thread_hook
220
+
221
+ _crash_handlers_installed = True
84
222
 
85
223
  __all__ = [
86
- 'Client',
87
224
  'Session',
88
- 'Step',
89
225
  'Event',
90
226
  'init',
91
- 'continue_session',
92
- 'create_step',
93
- 'end_step',
94
- 'update_step',
227
+ 'create_experiment',
95
228
  'create_event',
96
- 'update_event',
97
- 'end_event',
98
229
  'end_session',
99
230
  'get_prompt',
100
231
  'get_session',
@@ -103,8 +234,15 @@ __all__ = [
103
234
  'LucidicNotInitializedError',
104
235
  'PromptError',
105
236
  'InvalidOperationError',
106
- 'step',
107
237
  'event',
238
+ 'set_active_session',
239
+ 'bind_session',
240
+ 'bind_session_async',
241
+ 'clear_active_session',
242
+ 'session',
243
+ 'session_async',
244
+ 'run_session',
245
+ 'run_in_session',
108
246
  ]
109
247
 
110
248
 
@@ -116,12 +254,12 @@ def init(
116
254
  task: Optional[str] = None,
117
255
  providers: Optional[List[ProviderType]] = [],
118
256
  production_monitoring: Optional[bool] = False,
119
- mass_sim_id: Optional[str] = None,
120
257
  experiment_id: Optional[str] = None,
121
258
  rubrics: Optional[list] = None,
122
259
  tags: Optional[list] = None,
123
260
  masking_function = None,
124
261
  auto_end: Optional[bool] = True,
262
+ capture_uncaught: Optional[bool] = True,
125
263
  ) -> str:
126
264
  """
127
265
  Initialize the Lucidic client.
@@ -133,7 +271,6 @@ def init(
133
271
  agent_id: Agent ID. If not provided, will use the LUCIDIC_AGENT_ID environment variable.
134
272
  task: Task description.
135
273
  providers: List of provider types ("openai", "anthropic", "langchain", "pydantic_ai").
136
- mass_sim_id: Optional mass simulation ID, if session is to be part of a mass simulation.
137
274
  experiment_id: Optional experiment ID, if session is to be part of an experiment.
138
275
  rubrics: Optional rubrics for evaluation, list of strings.
139
276
  tags: Optional tags for the session, list of strings.
@@ -144,6 +281,11 @@ def init(
144
281
  InvalidOperationError: If the client is already initialized.
145
282
  APIKeyVerificationError: If the API key is invalid.
146
283
  """
284
+
285
+ load_dotenv()
286
+
287
+ if os.getenv("LUCIDIC_DEBUG", "False").lower() == "true":
288
+ logger.setLevel(logging.DEBUG)
147
289
 
148
290
  # get current client which will be NullClient if never lai is never initialized
149
291
  client = Client()
@@ -173,10 +315,13 @@ def init(
173
315
  auto_end = os.getenv("LUCIDIC_AUTO_END", "True").lower() == "true"
174
316
 
175
317
  # Set up providers
176
- _setup_providers(client, providers)
318
+ # Use the client's singleton telemetry initialization
319
+ if providers:
320
+ success = client.initialize_telemetry(providers)
321
+ if not success:
322
+ logger.warning("[Telemetry] Failed to initialize telemetry for some providers")
177
323
  real_session_id = client.init_session(
178
324
  session_name=session_name,
179
- mass_sim_id=mass_sim_id,
180
325
  task=task,
181
326
  rubrics=rubrics,
182
327
  tags=tags,
@@ -189,52 +334,28 @@ def init(
189
334
 
190
335
  # Set the auto_end flag on the client
191
336
  client.auto_end = auto_end
337
+ # Bind this session id to the current execution context for async-safety
338
+ try:
339
+ set_active_session(real_session_id)
340
+ except Exception:
341
+ pass
342
+ # Install crash handlers unless explicitly disabled
343
+ try:
344
+ if capture_uncaught:
345
+ _install_crash_handlers()
346
+ # Also install error event handler for uncaught exceptions
347
+ try:
348
+ from .errors import install_error_handler
349
+ install_error_handler()
350
+ except Exception:
351
+ pass
352
+ except Exception:
353
+ pass
192
354
 
193
355
  logger.info("Session initialized successfully")
194
356
  return real_session_id
195
357
 
196
358
 
197
- def continue_session(
198
- session_id: str,
199
- api_key: Optional[str] = None,
200
- agent_id: Optional[str] = None,
201
- providers: Optional[List[ProviderType]] = [],
202
- masking_function = None,
203
- auto_end: Optional[bool] = True,
204
- ):
205
- if api_key is None:
206
- api_key = os.getenv("LUCIDIC_API_KEY", None)
207
- if api_key is None:
208
- raise APIKeyVerificationError("Make sure to either pass your API key into lai.init() or set the LUCIDIC_API_KEY environment variable.")
209
- if agent_id is None:
210
- agent_id = os.getenv("LUCIDIC_AGENT_ID", None)
211
- if agent_id is None:
212
- raise APIKeyVerificationError("Lucidic agent ID not specified. Make sure to either pass your agent ID into lai.init() or set the LUCIDIC_AGENT_ID environment variable.")
213
-
214
- client = Client()
215
- if client.session:
216
- raise InvalidOperationError("[Lucidic] Session already in progress. Please call lai.end_session() or lai.reset_sdk() first.")
217
- # if not yet initialized or still the NullClient -> create a real client when init is called
218
- if not getattr(client, 'initialized', False):
219
- client = Client(api_key=api_key, agent_id=agent_id)
220
-
221
- # Handle auto_end with environment variable support
222
- if auto_end is None:
223
- auto_end = os.getenv("LUCIDIC_AUTO_END", "True").lower() == "true"
224
-
225
- # Set up providers
226
- _setup_providers(client, providers)
227
- session_id = client.continue_session(session_id=session_id)
228
- if masking_function:
229
- client.masking_function = masking_function
230
-
231
- # Set the auto_end flag on the client
232
- client.auto_end = auto_end
233
-
234
- logger.info(f"Session {session_id} continuing...")
235
- return session_id # For consistency
236
-
237
-
238
359
  def update_session(
239
360
  task: Optional[str] = None,
240
361
  session_eval: Optional[float] = None,
@@ -252,17 +373,28 @@ def update_session(
252
373
  is_successful: Whether the session was successful.
253
374
  is_successful_reason: Session success reason.
254
375
  """
376
+ # Prefer context-bound session over global active session
255
377
  client = Client()
256
- if not client.session:
378
+ target_sid = None
379
+ try:
380
+ target_sid = current_session_id.get(None)
381
+ except Exception:
382
+ target_sid = None
383
+ if not target_sid and client.session:
384
+ target_sid = client.session.session_id
385
+ if not target_sid:
257
386
  return
258
- client.session.update_session(**locals())
387
+ # Use ephemeral session facade to avoid mutating global state
388
+ session = client.session if (client.session and client.session.session_id == target_sid) else Session(agent_id=client.agent_id, session_id=target_sid)
389
+ session.update_session(**locals())
259
390
 
260
391
 
261
392
  def end_session(
262
393
  session_eval: Optional[float] = None,
263
394
  session_eval_reason: Optional[str] = None,
264
395
  is_successful: Optional[bool] = None,
265
- is_successful_reason: Optional[str] = None
396
+ is_successful_reason: Optional[str] = None,
397
+ wait_for_flush: bool = True
266
398
  ) -> None:
267
399
  """
268
400
  End the current session.
@@ -272,48 +404,194 @@ def end_session(
272
404
  session_eval_reason: Session evaluation reason.
273
405
  is_successful: Whether the session was successful.
274
406
  is_successful_reason: Session success reason.
407
+ wait_for_flush: Whether to block until event queue is empty (default True).
408
+ Set to False during signal handling to prevent hangs.
275
409
  """
276
410
  client = Client()
277
- if not client.session:
411
+ # Prefer context-bound session id
412
+ target_sid = None
413
+ try:
414
+ target_sid = current_session_id.get(None)
415
+ except Exception:
416
+ target_sid = None
417
+ if not target_sid and client.session:
418
+ target_sid = client.session.session_id
419
+ if not target_sid:
278
420
  return
421
+
422
+ # If ending the globally active session, perform cleanup
423
+ if client.session and client.session.session_id == target_sid:
424
+ # Best-effort: wait for LiteLLM callbacks to flush before ending
425
+ try:
426
+ import litellm
427
+ cbs = getattr(litellm, 'callbacks', None)
428
+ if cbs:
429
+ for cb in cbs:
430
+ try:
431
+ if hasattr(cb, 'wait_for_pending_callbacks'):
432
+ cb.wait_for_pending_callbacks(timeout=1)
433
+ except Exception:
434
+ pass
435
+ except Exception:
436
+ pass
437
+ # CRITICAL: Flush OpenTelemetry spans FIRST (blocking)
438
+ # This ensures all spans are converted to events before we flush the event queue
439
+ try:
440
+ if hasattr(client, '_tracer_provider') and client._tracer_provider:
441
+ logger.debug("[Session] Flushing OpenTelemetry spans before session end...")
442
+ # Force flush with generous timeout to ensure all spans are exported
443
+ # The BatchSpanProcessor now exports every 100ms, so this should be quick
444
+ success = client._tracer_provider.force_flush(timeout_millis=10000) # 10 second timeout
445
+ if not success:
446
+ logger.warning("[Session] OpenTelemetry flush timed out - some spans may be lost")
447
+ else:
448
+ logger.debug("[Session] OpenTelemetry spans flushed successfully")
449
+ except Exception as e:
450
+ logger.debug(f"[Session] Failed to flush telemetry spans: {e}")
451
+
452
+ # THEN flush event queue (which now contains events from flushed spans)
453
+ try:
454
+ if hasattr(client, '_event_queue'):
455
+ logger.debug("[Session] Flushing event queue...")
456
+ client._event_queue.force_flush(timeout_seconds=10.0)
457
+
458
+ # Wait for queue to be completely empty (only if blocking)
459
+ if wait_for_flush:
460
+ import time
461
+ wait_start = time.time()
462
+ max_wait = 10.0 # seconds - timeout for blob uploads
463
+ while not client._event_queue.is_empty():
464
+ if time.time() - wait_start > max_wait:
465
+ logger.warning(f"[Session] EventQueue not empty after {max_wait}s timeout")
466
+ break
467
+ time.sleep(0.1)
468
+
469
+ if client._event_queue.is_empty():
470
+ logger.debug("[Session] EventQueue confirmed empty")
471
+ else:
472
+ logger.debug("[Session] Non-blocking mode - skipping wait for empty queue")
473
+ except Exception as e:
474
+ logger.debug(f"[Session] Failed to flush event queue: {e}")
475
+
476
+ # Mark session as inactive FIRST (prevents race conditions)
477
+ client.mark_session_inactive(target_sid)
478
+
479
+ # Send only expected fields to update endpoint
480
+ update_kwargs = {
481
+ "is_finished": True,
482
+ "session_eval": session_eval,
483
+ "session_eval_reason": session_eval_reason,
484
+ "is_successful": is_successful,
485
+ "is_successful_reason": is_successful_reason,
486
+ }
487
+ try:
488
+ client.session.update_session(**update_kwargs)
489
+ except Exception as e:
490
+ logger.warning(f"[Session] Failed to update session: {e}")
491
+
492
+ # Clear only the global session reference, not the singleton
493
+ # This preserves the client and event queue for other threads
494
+ client.session = None
495
+ logger.debug(f"[Session] Ended global session {target_sid}")
496
+ # DO NOT shutdown event queue - other threads may be using it
497
+ # DO NOT call client.clear() - preserve singleton for other threads
498
+ return
499
+
500
+ # Otherwise, end the specified session id without clearing global state
501
+ # First flush telemetry and event queue for non-global sessions too
502
+ try:
503
+ if hasattr(client, '_tracer_provider') and client._tracer_provider:
504
+ logger.debug(f"[Session] Flushing OpenTelemetry spans for session {target_sid[:8]}...")
505
+ success = client._tracer_provider.force_flush(timeout_millis=10000)
506
+ if not success:
507
+ logger.warning("[Session] OpenTelemetry flush timed out")
508
+ except Exception as e:
509
+ logger.debug(f"[Session] Failed to flush telemetry spans: {e}")
279
510
 
280
- # Wait for any pending LiteLLM callbacks before ending session
281
- for provider in client.providers:
282
- if hasattr(provider, '_callback') and hasattr(provider._callback, 'wait_for_pending_callbacks'):
283
- logger.info("Waiting for LiteLLM callbacks to complete before ending session...")
284
- provider._callback.wait_for_pending_callbacks(timeout=5.0)
285
-
286
- client.session.update_session(is_finished=True, **locals())
287
- client.clear()
511
+ # Flush and wait for event queue to empty
512
+ try:
513
+ if hasattr(client, '_event_queue'):
514
+ logger.debug(f"[Session] Flushing event queue for session {target_sid[:8]}...")
515
+ client._event_queue.force_flush(timeout_seconds=10.0)
516
+
517
+ # Wait for queue to be completely empty (only if blocking)
518
+ if wait_for_flush:
519
+ import time
520
+ wait_start = time.time()
521
+ max_wait = 10.0 # seconds - timeout for blob uploads
522
+ while not client._event_queue.is_empty():
523
+ if time.time() - wait_start > max_wait:
524
+ logger.warning(f"[Session] EventQueue not empty after {max_wait}s timeout")
525
+ break
526
+ time.sleep(0.1)
527
+
528
+ if client._event_queue.is_empty():
529
+ logger.debug(f"[Session] EventQueue confirmed empty for session {target_sid[:8]}")
530
+ else:
531
+ logger.debug(f"[Session] Non-blocking mode - skipping wait for session {target_sid[:8]}")
532
+ except Exception as e:
533
+ logger.debug(f"[Session] Failed to flush event queue: {e}")
534
+
535
+ # CRITICAL: Mark session as inactive FIRST for ALL sessions
536
+ client.mark_session_inactive(target_sid)
537
+
538
+ temp = Session(agent_id=client.agent_id, session_id=target_sid)
539
+ update_kwargs = {
540
+ "is_finished": True,
541
+ "session_eval": session_eval,
542
+ "session_eval_reason": session_eval_reason,
543
+ "is_successful": is_successful,
544
+ "is_successful_reason": is_successful_reason,
545
+ }
546
+ try:
547
+ temp.update_session(**update_kwargs)
548
+ except Exception as e:
549
+ logger.warning(f"[Session] Failed to update session: {e}")
288
550
 
289
551
 
290
- def reset_sdk() -> None:
552
+ def flush(timeout_seconds: float = 2.0) -> bool:
291
553
  """
292
- DEPRECATED: Reset the SDK.
293
- """
294
- return
295
-
296
- client = Client()
297
- if not client.initialized:
298
- return
554
+ Manually flush all pending telemetry data.
299
555
 
300
- # Shutdown OpenTelemetry if it was initialized
301
- telemetry = LucidicTelemetry()
302
- if telemetry.is_initialized():
303
- telemetry.uninstrument_all()
556
+ Flushes both OpenTelemetry spans and queued events to ensure
557
+ all telemetry data is sent to the backend. This is called
558
+ automatically on process exit but can be called manually
559
+ for explicit control.
304
560
 
305
- client.clear()
306
-
307
-
308
- def _cleanup_telemetry():
309
- """Cleanup function for OpenTelemetry shutdown"""
561
+ Args:
562
+ timeout_seconds: Maximum time to wait for flush
563
+
564
+ Returns:
565
+ True if all flushes succeeded, False otherwise
566
+
567
+ Example:
568
+ ```python
569
+ import lucidicai as lai
570
+
571
+ # ... your code using Lucidic ...
572
+
573
+ # Manually flush before critical operation
574
+ lai.flush()
575
+ ```
576
+ """
310
577
  try:
311
- telemetry = LucidicTelemetry()
312
- if telemetry.is_initialized():
313
- telemetry.uninstrument_all()
314
- logger.info("OpenTelemetry instrumentation cleaned up")
578
+ client = Client()
579
+ success = True
580
+
581
+ # Flush OpenTelemetry spans first
582
+ if hasattr(client, 'flush_telemetry'):
583
+ span_success = client.flush_telemetry(timeout_seconds)
584
+ success = success and span_success
585
+
586
+ # Then flush event queue
587
+ if hasattr(client, '_event_queue'):
588
+ client._event_queue.force_flush(timeout_seconds)
589
+
590
+ logger.debug(f"[Flush] Manual flush completed (success={success})")
591
+ return success
315
592
  except Exception as e:
316
- logger.error(f"Error during telemetry cleanup: {e}")
593
+ logger.error(f"Failed to flush telemetry: {e}")
594
+ return False
317
595
 
318
596
 
319
597
  def _auto_end_session():
@@ -323,252 +601,271 @@ def _auto_end_session():
323
601
  if hasattr(client, 'auto_end') and client.auto_end and client.session and not client.session.is_finished:
324
602
  logger.info("Auto-ending active session on exit")
325
603
  client.auto_end = False # To avoid repeating auto-end on exit
326
- end_session()
604
+
605
+ # Flush telemetry
606
+ if hasattr(client, '_tracer_provider'):
607
+ client._tracer_provider.force_flush(timeout_millis=5000)
608
+
609
+ # Force flush event queue before ending session
610
+ if hasattr(client, '_event_queue'):
611
+ if logger.isEnabledFor(logging.DEBUG):
612
+ logger.debug("[Shutdown] Flushing event queue before session end")
613
+ client._event_queue.force_flush(timeout_seconds=5.0)
614
+
615
+ # Use non-blocking mode during shutdown to prevent hangs
616
+ # The actual wait for queue empty happens in _cleanup_singleton_on_exit
617
+ end_session(wait_for_flush=False)
618
+
327
619
  except Exception as e:
328
620
  logger.debug(f"Error during auto-end session: {e}")
329
621
 
330
622
 
623
+ def _cleanup_singleton_on_exit():
624
+ """
625
+ Clean up singleton resources only on process exit.
626
+
627
+ CRITICAL ORDER:
628
+ 1. Flush OpenTelemetry spans (blocking) - ensures spans become events
629
+ 2. Flush EventQueue - sends all events including those from spans
630
+ 3. Close HTTP session - graceful TCP FIN prevents broken pipes
631
+ 4. Clear singletons - final cleanup
632
+
633
+ This order is essential to prevent lost events and broken connections.
634
+ """
635
+ try:
636
+ client = Client()
637
+
638
+ # 1. FIRST: Flush OpenTelemetry spans (blocking until exported)
639
+ # This is the critical fix - we must flush spans before events
640
+ if hasattr(client, '_tracer_provider') and client._tracer_provider:
641
+ try:
642
+ # Small delay to ensure spans have reached the processor
643
+ import time
644
+ time.sleep(0.1) # 100ms to let spans reach BatchSpanProcessor
645
+
646
+ logger.debug("[Exit] Flushing OpenTelemetry spans...")
647
+ # force_flush() blocks until all spans are exported or timeout
648
+ success = client._tracer_provider.force_flush(timeout_millis=3000)
649
+ if success:
650
+ logger.debug("[Exit] OpenTelemetry spans flushed successfully")
651
+ else:
652
+ logger.warning("[Exit] OpenTelemetry flush timed out - some spans may be lost")
653
+
654
+ # DON'T shutdown TracerProvider yet - wait until after EventQueue
655
+ # This prevents losing spans that are still being processed
656
+ except Exception as e:
657
+ logger.debug(f"[Exit] Telemetry cleanup error: {e}")
658
+
659
+ # 2. SECOND: Flush and shutdown EventQueue
660
+ # Now it contains all events from the flushed spans
661
+ if hasattr(client, '_event_queue'):
662
+ try:
663
+ logger.debug("[Exit] Flushing event queue...")
664
+ client._event_queue.force_flush(timeout_seconds=2.0)
665
+
666
+ # Wait for queue to be completely empty before proceeding
667
+ import time
668
+ max_wait = 5.0 # seconds
669
+ start_time = time.time()
670
+ while not client._event_queue.is_empty():
671
+ if time.time() - start_time > max_wait:
672
+ logger.warning("[Exit] EventQueue not empty after timeout")
673
+ break
674
+ time.sleep(0.01) # Small sleep to avoid busy waiting
675
+
676
+ if client._event_queue.is_empty():
677
+ logger.debug("[Exit] EventQueue is empty, proceeding with shutdown")
678
+
679
+ # Clear any stale active sessions (threads may have died without cleanup)
680
+ if hasattr(client, '_active_sessions'):
681
+ with client._active_sessions_lock:
682
+ if client._active_sessions:
683
+ logger.debug(f"[Exit] Clearing {len(client._active_sessions)} remaining active sessions")
684
+ client._active_sessions.clear()
685
+
686
+ # Now shutdown EventQueue
687
+ client._event_queue.shutdown()
688
+ logger.debug("[Exit] Event queue shutdown complete")
689
+ except Exception as e:
690
+ logger.debug(f"[Exit] Event queue cleanup error: {e}")
691
+
692
+ # 3. THIRD: Shutdown TracerProvider after EventQueue is done
693
+ # This ensures all spans can be exported before shutdown
694
+ if hasattr(client, '_tracer_provider') and client._tracer_provider:
695
+ try:
696
+ logger.debug("[Exit] Shutting down TracerProvider...")
697
+ client._tracer_provider.shutdown()
698
+ logger.debug("[Exit] TracerProvider shutdown complete")
699
+ except Exception as e:
700
+ logger.debug(f"[Exit] TracerProvider shutdown error: {e}")
701
+
702
+ # 4. FOURTH: Close HTTP session ONLY after everything else
703
+ # This prevents broken pipes by ensuring all events are sent first
704
+ if hasattr(client, 'request_session'):
705
+ try:
706
+ # Mark client as shutting down to prevent new requests
707
+ client._shutdown = True
708
+ logger.debug("[Exit] Closing HTTP session (queue empty, worker stopped)")
709
+ client.request_session.close()
710
+ logger.debug("[Exit] HTTP session closed gracefully")
711
+ except Exception as e:
712
+ logger.debug(f"[Exit] HTTP session cleanup error: {e}")
713
+
714
+ # 5. FINALLY: Clear singletons
715
+ # Safe to destroy now that all data is flushed
716
+ clear_singletons()
717
+ logger.debug("[Exit] Singleton cleanup complete")
718
+
719
+ except Exception as e:
720
+ # Silent fail on exit to avoid disrupting process termination
721
+ if logger.isEnabledFor(logging.DEBUG):
722
+ logger.debug(f"[Exit] Cleanup error: {e}")
723
+
724
+
331
725
  def _signal_handler(signum, frame):
332
- """Handle interruption signals"""
726
+ """Handle interruption signals with better queue flushing."""
727
+ # Best-effort final event for signal exits
728
+ try:
729
+ try:
730
+ name = signal.Signals(signum).name
731
+ except Exception:
732
+ name = str(signum)
733
+ try:
734
+ stack_str = ''.join(traceback.format_stack(frame)) if frame else ''
735
+ except Exception:
736
+ stack_str = ''
737
+ desc = _mask_and_truncate(f"Received signal {name}\n{stack_str}")
738
+ _post_fatal_event(128 + signum, desc, {"signal": name, "signum": signum})
739
+ except Exception:
740
+ pass
741
+
742
+ # Proper shutdown sequence matching atexit handler
743
+ try:
744
+ client = Client()
745
+
746
+ # 1. FIRST: Flush OpenTelemetry spans
747
+ if hasattr(client, '_tracer_provider') and client._tracer_provider:
748
+ try:
749
+ logger.debug(f"[Signal] Flushing OpenTelemetry spans on signal {signum}")
750
+ client._tracer_provider.force_flush(timeout_millis=2000) # Shorter timeout for signals
751
+ except Exception:
752
+ pass
753
+
754
+ # 2. SECOND: Flush and shutdown EventQueue
755
+ if hasattr(client, "_event_queue"):
756
+ logger.debug(f"[Signal] Flushing event queue on signal {signum}")
757
+ client._event_queue.force_flush(timeout_seconds=2.0)
758
+
759
+ # Clear active sessions to allow shutdown
760
+ if hasattr(client, '_active_sessions'):
761
+ with client._active_sessions_lock:
762
+ client._active_sessions.clear()
763
+
764
+ client._event_queue.shutdown()
765
+
766
+ # 3. THIRD: Shutdown TracerProvider after EventQueue
767
+ if hasattr(client, '_tracer_provider') and client._tracer_provider:
768
+ logger.debug(f"[Signal] Shutting down TracerProvider on signal {signum}")
769
+ try:
770
+ client._tracer_provider.shutdown()
771
+ except Exception:
772
+ pass
773
+
774
+ # 4. Mark client as shutting down
775
+ client._shutdown = True
776
+
777
+ except Exception:
778
+ pass
779
+
780
+ logger.debug(f"[Signal] Auto-ending session on signal {signum}")
333
781
  _auto_end_session()
334
- _cleanup_telemetry()
335
782
  # Re-raise the signal for default handling
336
783
  signal.signal(signum, signal.SIG_DFL)
337
784
  os.kill(os.getpid(), signum)
338
785
 
339
786
 
340
- # Register cleanup functions (auto-end runs first due to LIFO order)
341
- atexit.register(_cleanup_telemetry)
342
- atexit.register(_auto_end_session)
787
+ # Register cleanup functions
788
+ atexit.register(_cleanup_singleton_on_exit) # Clean up singleton resources on exit
789
+ atexit.register(_auto_end_session) # Auto-end session if enabled
343
790
 
344
791
  # Register signal handlers for graceful shutdown
345
792
  signal.signal(signal.SIGINT, _signal_handler)
346
793
  signal.signal(signal.SIGTERM, _signal_handler)
347
794
 
348
795
 
349
- def create_mass_sim(
350
- mass_sim_name: str,
351
- total_num_sessions: int,
796
+ def create_experiment(
797
+ experiment_name: str,
798
+ pass_fail_rubrics: Optional[list] = None,
799
+ score_rubrics: Optional[list] = None,
800
+ description: Optional[str] = None,
801
+ tags: Optional[list] = None,
352
802
  api_key: Optional[str] = None,
353
803
  agent_id: Optional[str] = None,
354
- task: Optional[str] = None,
355
- tags: Optional[list] = None
356
804
  ) -> str:
357
805
  """
358
- Create a new mass simulation.
359
-
360
- Args:
361
- mass_sim_name: Name of the mass simulation.
362
- total_num_sessions: Total intended number of sessions. More sessions can be added later.
363
- api_key: API key for authentication. If not provided, will use the LUCIDIC_API_KEY environment variable.
364
- agent_id: Agent ID. If not provided, will use the LUCIDIC_AGENT_ID environment variable.
806
+ Create a new experiment for grouping and analyzing sessions.
807
+
808
+ Args:
809
+ experiment_name: Name of the experiment (required)
810
+ pass_fail_rubrics: List of pass/fail rubric names to associate
811
+ description: Description of the experiment
365
812
  task: Task description.
366
- tags: Tags for the mass simulation.
367
-
368
- Returns:
369
- mass_sim_id: ID of the created mass simulation. Pass this to lai.init() to create a new session in the mass sim.
813
+ tags: List of tags for categorization
814
+ score_rubrics: List of score rubric names to associate
815
+ api_key: API key (uses env if not provided)
816
+ agent_id: Agent ID (uses env if not provided)
817
+
818
+ Returns:
819
+ experiment_id: UUID of the created experiment
820
+
821
+ Raises:
822
+ APIKeyVerificationError: If API key is invalid or missing
823
+ InvalidOperationError: If experiment creation fails
824
+ ValueError: If name is empty
370
825
  """
826
+
827
+ # validation
828
+ if not experiment_name:
829
+ raise ValueError("Experiment name is required")
830
+
371
831
  if api_key is None:
372
832
  api_key = os.getenv("LUCIDIC_API_KEY", None)
373
833
  if api_key is None:
374
- raise APIKeyVerificationError("Make sure to either pass your API key into lai.init() or set the LUCIDIC_API_KEY environment variable.")
834
+ raise APIKeyVerificationError("Make sure to either pass your API key into create_experiment() or set the LUCIDIC_API_KEY environment variable.")
375
835
  if agent_id is None:
376
836
  agent_id = os.getenv("LUCIDIC_AGENT_ID", None)
377
837
  if agent_id is None:
378
- raise APIKeyVerificationError("Lucidic agent ID not specified. Make sure to either pass your agent ID into lai.init() or set the LUCIDIC_AGENT_ID environment variable.")
379
- try:
380
- client = Client()
381
- except LucidicNotInitializedError:
382
- client = Client( # TODO: fail hard if incorrect API key or agent ID provided and wrong, fail silently if not provided
383
- api_key=api_key,
384
- agent_id=agent_id,
385
- )
386
- mass_sim_id = client.init_mass_sim(mass_sim_name=mass_sim_name, total_num_sims=total_num_sessions, task=task, tags=tags) # TODO: change total_num_sims to total_num_sessions everywhere
387
- logger.info(f"Created mass simulation with ID: {mass_sim_id}")
388
- return mass_sim_id
389
-
390
-
391
- def create_step(
392
- state: Optional[str] = None,
393
- action: Optional[str] = None,
394
- goal: Optional[str] = None,
395
- eval_score: Optional[float] = None,
396
- eval_description: Optional[str] = None,
397
- screenshot: Optional[str] = None,
398
- screenshot_path: Optional[str] = None
399
- ) -> None:
400
- """
401
- Create a new step. Previous step must be finished to create a new step.
402
-
403
- Args:
404
- state: State description.
405
- action: Action description.
406
- goal: Goal description.
407
- eval_score: Evaluation score.
408
- eval_description: Evaluation description.
409
- screenshot: Screenshot encoded in base64. Provide either screenshot or screenshot_path.
410
- screenshot_path: Screenshot path. Provide either screenshot or screenshot_path.
411
- """
412
- client = Client()
413
- if not client.session:
414
- return
415
- return client.session.create_step(**locals())
416
-
417
-
418
- def update_step(
419
- step_id: Optional[str] = None,
420
- state: Optional[str] = None,
421
- action: Optional[str] = None,
422
- goal: Optional[str] = None,
423
- eval_score: Optional[float] = None,
424
- eval_description: Optional[str] = None,
425
- screenshot: Optional[str] = None,
426
- screenshot_path: Optional[str] = None
427
- ) -> None:
428
- """
429
- Update the current step.
430
-
431
- Args:
432
- step_id: ID of the step to update.
433
- state: State description.
434
- action: Action description.
435
- goal: Goal description.
436
- eval_score: Evaluation score.
437
- eval_description: Evaluation description.
438
- screenshot: Screenshot encoded in base64. Provide either screenshot or screenshot_path.
439
- screenshot_path: Screenshot path. Provide either screenshot or screenshot_path.
440
- """
441
- client = Client()
442
- if not client.session:
443
- return
444
- if not client.session.active_step:
445
- raise InvalidOperationError("No active step to update")
446
- client.session.update_step(**locals())
447
-
448
-
449
- def end_step(
450
- step_id: Optional[str] = None,
451
- state: Optional[str] = None,
452
- action: Optional[str] = None,
453
- goal: Optional[str] = None,
454
- eval_score: Optional[float] = None,
455
- eval_description: Optional[str] = None,
456
- screenshot: Optional[str] = None,
457
- screenshot_path: Optional[str] = None
458
- ) -> None:
459
- """
460
- End the current step.
461
-
462
- Args:
463
- step_id: ID of the step to end.
464
- state: State description.
465
- action: Action description.
466
- goal: Goal description.
467
- eval_score: Evaluation score.
468
- eval_description: Evaluation description.
469
- screenshot: Screenshot encoded in base64. Provide either screenshot or screenshot_path.
470
- screenshot_path: Screenshot path.
471
- """
472
- client = Client()
473
- if not client.session:
474
- return
475
-
476
- if not client.session.active_step and step_id is None:
477
- raise InvalidOperationError("No active step to end")
478
-
479
- # Filter out None values from locals
480
- params = locals()
481
- kwargs = {k: v for k, v in params.items() if v is not None and k not in ['client', 'params']}
482
- kwargs['is_finished'] = True
483
-
484
- client.session.update_step(**kwargs)
838
+ raise APIKeyVerificationError("Lucidic agent ID not specified. Make sure to either pass your agent ID into create_experiment() or set the LUCIDIC_AGENT_ID environment variable.")
485
839
 
840
+ # combine rubrics into single list
841
+ rubric_names = (pass_fail_rubrics or []) + (score_rubrics or [])
486
842
 
487
- def create_event(
488
- step_id: Optional[str] = None,
489
- description: Optional[str] = None,
490
- result: Optional[str] = None,
491
- cost_added: Optional[float] = None,
492
- model: Optional[str] = None,
493
- screenshots: Optional[List[str]] = None,
494
- function_name: Optional[str] = None,
495
- arguments: Optional[dict] = None,
496
- ) -> str:
497
- """
498
- Create a new event in the current step. Current step must not be finished.
499
-
500
- Args:
501
- description: Description of the event.
502
- result: Result of the event.
503
- cost_added: Cost added by the event.
504
- model: Model used for the event.
505
- screenshots: List of screenshots encoded in base64.
506
- function_name: Name of the function that created the event.
507
- arguments: Arguments of the function that created the event.
508
- """
509
-
843
+ # get current client which will be NullClient if never lai.init() is never called
510
844
  client = Client()
511
- if not client.session:
512
- return
513
- return client.session.create_event(**locals())
845
+ # if not yet initialized or still the NullClient -> create a real client when init is called
846
+ if not getattr(client, 'initialized', False):
847
+ client = Client(api_key=api_key, agent_id=agent_id)
848
+ else:
849
+ # Already initialized, this is a re-init
850
+ if api_key is not None and agent_id is not None and (api_key != client.api_key or agent_id != client.agent_id):
851
+ client.set_api_key(api_key)
852
+ client.agent_id = agent_id
514
853
 
854
+ # create experiment
855
+ experiment_id = client.create_experiment(experiment_name=experiment_name, rubric_names=rubric_names, description=description, tags=tags)
856
+ logger.info(f"Created experiment with ID: {experiment_id}")
515
857
 
516
- def update_event(
517
- event_id: Optional[str] = None,
518
- description: Optional[str] = None,
519
- result: Optional[str] = None,
520
- cost_added: Optional[float] = None,
521
- model: Optional[str] = None,
522
- screenshots: Optional[List[str]] = None,
523
- function_name: Optional[str] = None,
524
- arguments: Optional[dict] = None,
525
- ) -> None:
526
- """
527
- Update the event with the given ID in the current step.
528
-
529
- Args:
530
- event_id: ID of the event to update.
531
- description: Description of the event.
532
- result: Result of the event.
533
- cost_added: Cost added by the event.
534
- model: Model used for the event.
535
- screenshots: List of screenshots encoded in base64.
536
- function_name: Name of the function that created the event.
537
- arguments: Arguments of the function that created the event.
538
- """
539
- client = Client()
540
- if not client.session:
541
- return
542
- client.session.update_event(**locals())
858
+ return experiment_id
543
859
 
544
860
 
545
- def end_event(
546
- event_id: Optional[str] = None,
547
- description: Optional[str] = None,
548
- result: Optional[str] = None,
549
- cost_added: Optional[float] = None,
550
- model: Optional[str] = None,
551
- screenshots: Optional[List[str]] = None,
552
- function_name: Optional[str] = None,
553
- arguments: Optional[dict] = None,
554
- ) -> None:
555
- """
556
- End the latest event in the current step.
557
-
558
- Args:
559
- event_id: ID of the event to end.
560
- description: Description of the event.
561
- result: Result of the event.
562
- cost_added: Cost added by the event.
563
- model: Model used for the event.
564
- screenshots: List of screenshots encoded in base64.
565
- function_name: Name of the function that created the event.
566
- arguments: Arguments of the function that created the event.
567
- """
861
+ def create_event(
862
+ type: str = "generic",
863
+ **kwargs
864
+ ) -> str:
568
865
  client = Client()
569
866
  if not client.session:
570
867
  return
571
- client.session.update_event(is_finished=True, **locals())
868
+ return client.session.create_event(type=type, **kwargs)
572
869
 
573
870
 
574
871
  def get_prompt(