lucidicai 1.3.5__py3-none-any.whl → 2.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lucidicai/__init__.py CHANGED
@@ -7,27 +7,16 @@ import traceback
7
7
  import threading
8
8
  from typing import List, Literal, Optional
9
9
 
10
+ from dotenv import load_dotenv
11
+
10
12
  from .client import Client
11
13
  from .errors import APIKeyVerificationError, InvalidOperationError, LucidicNotInitializedError, PromptError
12
14
  from .event import Event
13
15
  from .session import Session
14
- from .step import Step
15
-
16
- # Import OpenTelemetry-based handlers
17
- from .telemetry.otel_handlers import (
18
- OTelOpenAIHandler,
19
- OTelAnthropicHandler,
20
- OTelLangChainHandler,
21
- OTelPydanticAIHandler,
22
- OTelOpenAIAgentsHandler,
23
- OTelLiteLLMHandler
24
- )
25
-
26
- # Import telemetry manager
27
- from .telemetry.otel_init import LucidicTelemetry
16
+ from .singleton import clear_singletons
28
17
 
29
18
  # Import decorators
30
- from .decorators import step, event
19
+ from .decorators import event
31
20
  from .context import (
32
21
  set_active_session,
33
22
  bind_session,
@@ -114,13 +103,13 @@ def _post_fatal_event(exit_code: int, description: str, extra: Optional[dict] =
114
103
  except Exception:
115
104
  pass
116
105
 
117
- event_id = session.create_event(
118
- description=_mask_and_truncate(description),
119
- result=f"process exited with code {exit_code}",
120
- function_name="__process_exit__",
121
- arguments=arguments,
106
+ # Create a single immutable event describing the crash
107
+ session.create_event(
108
+ type="error_traceback",
109
+ error=_mask_and_truncate(description),
110
+ traceback="",
111
+ metadata={"exit_code": exit_code, **({} if not extra else extra)},
122
112
  )
123
- session.update_event(event_id=event_id, is_finished=True)
124
113
  except Exception:
125
114
  # Never raise during shutdown
126
115
  pass
@@ -146,31 +135,50 @@ def _install_crash_handlers() -> None:
146
135
  "exception_message": str(exc),
147
136
  "thread_name": threading.current_thread().name,
148
137
  })
138
+
139
+ # Follow proper shutdown sequence to prevent broken pipes
149
140
  try:
150
- # Prevent auto_end double work
151
141
  client = Client()
152
- try:
153
- client.auto_end = False
154
- except Exception:
155
- pass
156
- # End session explicitly as unsuccessful
157
- end_session()
158
- except Exception:
159
- pass
160
- # Best-effort force flush and shutdown telemetry
161
- try:
162
- telemetry = LucidicTelemetry()
163
- if telemetry.is_initialized():
142
+
143
+ # 1. Flush OpenTelemetry spans first
144
+ if hasattr(client, '_tracer_provider'):
145
+ try:
146
+ client._tracer_provider.force_flush(timeout_millis=5000)
147
+ except Exception:
148
+ pass
149
+
150
+ # 2. Flush and shutdown EventQueue (with active sessions cleared)
151
+ if hasattr(client, "_event_queue"):
164
152
  try:
165
- telemetry.force_flush()
153
+ # Clear active sessions to allow shutdown
154
+ client._event_queue._active_sessions.clear()
155
+ client._event_queue.force_flush()
156
+ client._event_queue.shutdown(timeout=5.0)
166
157
  except Exception:
167
158
  pass
159
+
160
+ # 3. Shutdown TracerProvider after EventQueue
161
+ if hasattr(client, '_tracer_provider'):
168
162
  try:
169
- telemetry.uninstrument_all()
163
+ client._tracer_provider.shutdown()
170
164
  except Exception:
171
165
  pass
166
+
167
+ # 4. Mark client as shutting down to prevent new requests
168
+ client._shutdown = True
169
+
170
+ # 5. Prevent auto_end double work
171
+ try:
172
+ client.auto_end = False
173
+ except Exception:
174
+ pass
175
+
176
+ # 6. End session explicitly as unsuccessful
177
+ end_session()
178
+
172
179
  except Exception:
173
180
  pass
181
+
174
182
  # Chain to original to preserve default printing/behavior
175
183
  try:
176
184
  _original_sys_excepthook(exc_type, exc, tb)
@@ -187,7 +195,20 @@ def _install_crash_handlers() -> None:
187
195
  def _thread_hook(args):
188
196
  try:
189
197
  if args.thread is threading.main_thread():
198
+ # For main thread exceptions, use full shutdown sequence
190
199
  _sys_hook(args.exc_type, args.exc_value, args.exc_traceback)
200
+ else:
201
+ # For non-main threads, just flush spans without full shutdown
202
+ try:
203
+ client = Client()
204
+ # Flush any pending spans from this thread
205
+ if hasattr(client, '_tracer_provider'):
206
+ client._tracer_provider.force_flush(timeout_millis=1000)
207
+ # Force flush events but don't shutdown
208
+ if hasattr(client, "_event_queue"):
209
+ client._event_queue.force_flush()
210
+ except Exception:
211
+ pass
191
212
  except Exception:
192
213
  pass
193
214
  try:
@@ -199,83 +220,12 @@ def _install_crash_handlers() -> None:
199
220
 
200
221
  _crash_handlers_installed = True
201
222
 
202
- def _setup_providers(client: Client, providers: List[ProviderType]) -> None:
203
- """Set up providers for the client, avoiding duplication
204
-
205
- Args:
206
- client: The Lucidic client instance
207
- providers: List of provider types to set up
208
- """
209
- # Track which providers have been set up to avoid duplication
210
- setup_providers = set()
211
-
212
- # Initialize telemetry if using OpenTelemetry
213
- if providers:
214
- telemetry = LucidicTelemetry()
215
- if not telemetry.is_initialized():
216
- telemetry.initialize(agent_id=client.agent_id)
217
-
218
- for provider in providers:
219
- if provider in setup_providers:
220
- continue
221
-
222
- if provider == "openai":
223
- client.set_provider(OTelOpenAIHandler())
224
- setup_providers.add("openai")
225
- elif provider == "anthropic":
226
- client.set_provider(OTelAnthropicHandler())
227
- setup_providers.add("anthropic")
228
- elif provider == "langchain":
229
- client.set_provider(OTelLangChainHandler())
230
- logger.info("For LangChain, make sure to create a handler and attach it to your top-level Agent class.")
231
- setup_providers.add("langchain")
232
- elif provider == "pydantic_ai":
233
- client.set_provider(OTelPydanticAIHandler())
234
- setup_providers.add("pydantic_ai")
235
- elif provider == "openai_agents":
236
- try:
237
- client.set_provider(OTelOpenAIAgentsHandler())
238
- setup_providers.add("openai_agents")
239
- except Exception as e:
240
- logger.error(f"Failed to set up OpenAI Agents provider: {e}")
241
- raise
242
- elif provider == "litellm":
243
- client.set_provider(OTelLiteLLMHandler())
244
- setup_providers.add("litellm")
245
- elif provider in ("bedrock", "aws_bedrock", "amazon_bedrock"):
246
- from .telemetry.otel_handlers import OTelBedrockHandler
247
- client.set_provider(OTelBedrockHandler())
248
- setup_providers.add("bedrock")
249
- elif provider in ("google", "google_generativeai"):
250
- from .telemetry.otel_handlers import OTelGoogleGenerativeAIHandler
251
- client.set_provider(OTelGoogleGenerativeAIHandler())
252
- setup_providers.add("google")
253
- elif provider in ("vertexai", "vertex_ai"):
254
- from .telemetry.otel_handlers import OTelVertexAIHandler
255
- client.set_provider(OTelVertexAIHandler())
256
- setup_providers.add("vertexai")
257
- elif provider == "cohere":
258
- from .telemetry.otel_handlers import OTelCohereHandler
259
- client.set_provider(OTelCohereHandler())
260
- setup_providers.add("cohere")
261
- elif provider == "groq":
262
- from .telemetry.otel_handlers import OTelGroqHandler
263
- client.set_provider(OTelGroqHandler())
264
- setup_providers.add("groq")
265
-
266
223
  __all__ = [
267
- 'Client',
268
224
  'Session',
269
- 'Step',
270
225
  'Event',
271
226
  'init',
272
- 'continue_session',
273
- 'create_step',
274
- 'end_step',
275
- 'update_step',
227
+ 'create_experiment',
276
228
  'create_event',
277
- 'update_event',
278
- 'end_event',
279
229
  'end_session',
280
230
  'get_prompt',
281
231
  'get_session',
@@ -284,7 +234,6 @@ __all__ = [
284
234
  'LucidicNotInitializedError',
285
235
  'PromptError',
286
236
  'InvalidOperationError',
287
- 'step',
288
237
  'event',
289
238
  'set_active_session',
290
239
  'bind_session',
@@ -305,7 +254,6 @@ def init(
305
254
  task: Optional[str] = None,
306
255
  providers: Optional[List[ProviderType]] = [],
307
256
  production_monitoring: Optional[bool] = False,
308
- mass_sim_id: Optional[str] = None,
309
257
  experiment_id: Optional[str] = None,
310
258
  rubrics: Optional[list] = None,
311
259
  tags: Optional[list] = None,
@@ -323,7 +271,6 @@ def init(
323
271
  agent_id: Agent ID. If not provided, will use the LUCIDIC_AGENT_ID environment variable.
324
272
  task: Task description.
325
273
  providers: List of provider types ("openai", "anthropic", "langchain", "pydantic_ai").
326
- mass_sim_id: Optional mass simulation ID, if session is to be part of a mass simulation.
327
274
  experiment_id: Optional experiment ID, if session is to be part of an experiment.
328
275
  rubrics: Optional rubrics for evaluation, list of strings.
329
276
  tags: Optional tags for the session, list of strings.
@@ -334,6 +281,11 @@ def init(
334
281
  InvalidOperationError: If the client is already initialized.
335
282
  APIKeyVerificationError: If the API key is invalid.
336
283
  """
284
+
285
+ load_dotenv()
286
+
287
+ if os.getenv("LUCIDIC_DEBUG", "False").lower() == "true":
288
+ logger.setLevel(logging.DEBUG)
337
289
 
338
290
  # get current client which will be NullClient if never lai is never initialized
339
291
  client = Client()
@@ -363,10 +315,13 @@ def init(
363
315
  auto_end = os.getenv("LUCIDIC_AUTO_END", "True").lower() == "true"
364
316
 
365
317
  # Set up providers
366
- _setup_providers(client, providers)
318
+ # Use the client's singleton telemetry initialization
319
+ if providers:
320
+ success = client.initialize_telemetry(providers)
321
+ if not success:
322
+ logger.warning("[Telemetry] Failed to initialize telemetry for some providers")
367
323
  real_session_id = client.init_session(
368
324
  session_name=session_name,
369
- mass_sim_id=mass_sim_id,
370
325
  task=task,
371
326
  rubrics=rubrics,
372
327
  tags=tags,
@@ -388,6 +343,12 @@ def init(
388
343
  try:
389
344
  if capture_uncaught:
390
345
  _install_crash_handlers()
346
+ # Also install error event handler for uncaught exceptions
347
+ try:
348
+ from .errors import install_error_handler
349
+ install_error_handler()
350
+ except Exception:
351
+ pass
391
352
  except Exception:
392
353
  pass
393
354
 
@@ -395,52 +356,6 @@ def init(
395
356
  return real_session_id
396
357
 
397
358
 
398
- def continue_session(
399
- session_id: str,
400
- api_key: Optional[str] = None,
401
- agent_id: Optional[str] = None,
402
- providers: Optional[List[ProviderType]] = [],
403
- masking_function = None,
404
- auto_end: Optional[bool] = True,
405
- ):
406
- if api_key is None:
407
- api_key = os.getenv("LUCIDIC_API_KEY", None)
408
- if api_key is None:
409
- raise APIKeyVerificationError("Make sure to either pass your API key into lai.init() or set the LUCIDIC_API_KEY environment variable.")
410
- if agent_id is None:
411
- agent_id = os.getenv("LUCIDIC_AGENT_ID", None)
412
- if agent_id is None:
413
- raise APIKeyVerificationError("Lucidic agent ID not specified. Make sure to either pass your agent ID into lai.init() or set the LUCIDIC_AGENT_ID environment variable.")
414
-
415
- client = Client()
416
- if client.session:
417
- raise InvalidOperationError("[Lucidic] Session already in progress. Please call lai.end_session() or lai.reset_sdk() first.")
418
- # if not yet initialized or still the NullClient -> create a real client when init is called
419
- if not getattr(client, 'initialized', False):
420
- client = Client(api_key=api_key, agent_id=agent_id)
421
-
422
- # Handle auto_end with environment variable support
423
- if auto_end is None:
424
- auto_end = os.getenv("LUCIDIC_AUTO_END", "True").lower() == "true"
425
-
426
- # Set up providers
427
- _setup_providers(client, providers)
428
- session_id = client.continue_session(session_id=session_id)
429
- if masking_function:
430
- client.masking_function = masking_function
431
-
432
- # Set the auto_end flag on the client
433
- client.auto_end = auto_end
434
-
435
- logger.info(f"Session {session_id} continuing...")
436
- # Bind this session id to the current execution context for async-safety
437
- try:
438
- set_active_session(session_id)
439
- except Exception:
440
- pass
441
- return session_id # For consistency
442
-
443
-
444
359
  def update_session(
445
360
  task: Optional[str] = None,
446
361
  session_eval: Optional[float] = None,
@@ -478,7 +393,8 @@ def end_session(
478
393
  session_eval: Optional[float] = None,
479
394
  session_eval_reason: Optional[str] = None,
480
395
  is_successful: Optional[bool] = None,
481
- is_successful_reason: Optional[str] = None
396
+ is_successful_reason: Optional[str] = None,
397
+ wait_for_flush: bool = True
482
398
  ) -> None:
483
399
  """
484
400
  End the current session.
@@ -488,6 +404,8 @@ def end_session(
488
404
  session_eval_reason: Session evaluation reason.
489
405
  is_successful: Whether the session was successful.
490
406
  is_successful_reason: Session success reason.
407
+ wait_for_flush: Whether to block until event queue is empty (default True).
408
+ Set to False during signal handling to prevent hangs.
491
409
  """
492
410
  client = Client()
493
411
  # Prefer context-bound session id
@@ -501,49 +419,179 @@ def end_session(
501
419
  if not target_sid:
502
420
  return
503
421
 
504
- # If ending the globally active session, keep existing cleanup behavior
422
+ # If ending the globally active session, perform cleanup
505
423
  if client.session and client.session.session_id == target_sid:
506
- # Wait for any pending LiteLLM callbacks before ending session
507
- for provider in client.providers:
508
- if hasattr(provider, '_callback') and hasattr(provider._callback, 'wait_for_pending_callbacks'):
509
- logger.info("Waiting for LiteLLM callbacks to complete before ending session...")
510
- provider._callback.wait_for_pending_callbacks(timeout=5.0)
511
- client.session.update_session(is_finished=True, **locals())
512
- client.clear()
424
+ # Best-effort: wait for LiteLLM callbacks to flush before ending
425
+ try:
426
+ import litellm
427
+ cbs = getattr(litellm, 'callbacks', None)
428
+ if cbs:
429
+ for cb in cbs:
430
+ try:
431
+ if hasattr(cb, 'wait_for_pending_callbacks'):
432
+ cb.wait_for_pending_callbacks(timeout=1)
433
+ except Exception:
434
+ pass
435
+ except Exception:
436
+ pass
437
+ # CRITICAL: Flush OpenTelemetry spans FIRST (blocking)
438
+ # This ensures all spans are converted to events before we flush the event queue
439
+ try:
440
+ if hasattr(client, '_tracer_provider') and client._tracer_provider:
441
+ logger.debug("[Session] Flushing OpenTelemetry spans before session end...")
442
+ # Force flush with generous timeout to ensure all spans are exported
443
+ # The BatchSpanProcessor now exports every 100ms, so this should be quick
444
+ success = client._tracer_provider.force_flush(timeout_millis=10000) # 10 second timeout
445
+ if not success:
446
+ logger.warning("[Session] OpenTelemetry flush timed out - some spans may be lost")
447
+ else:
448
+ logger.debug("[Session] OpenTelemetry spans flushed successfully")
449
+ except Exception as e:
450
+ logger.debug(f"[Session] Failed to flush telemetry spans: {e}")
451
+
452
+ # THEN flush event queue (which now contains events from flushed spans)
453
+ try:
454
+ if hasattr(client, '_event_queue'):
455
+ logger.debug("[Session] Flushing event queue...")
456
+ client._event_queue.force_flush(timeout_seconds=10.0)
457
+
458
+ # Wait for queue to be completely empty (only if blocking)
459
+ if wait_for_flush:
460
+ import time
461
+ wait_start = time.time()
462
+ max_wait = 10.0 # seconds - timeout for blob uploads
463
+ while not client._event_queue.is_empty():
464
+ if time.time() - wait_start > max_wait:
465
+ logger.warning(f"[Session] EventQueue not empty after {max_wait}s timeout")
466
+ break
467
+ time.sleep(0.1)
468
+
469
+ if client._event_queue.is_empty():
470
+ logger.debug("[Session] EventQueue confirmed empty")
471
+ else:
472
+ logger.debug("[Session] Non-blocking mode - skipping wait for empty queue")
473
+ except Exception as e:
474
+ logger.debug(f"[Session] Failed to flush event queue: {e}")
475
+
476
+ # Mark session as inactive FIRST (prevents race conditions)
477
+ client.mark_session_inactive(target_sid)
478
+
479
+ # Send only expected fields to update endpoint
480
+ update_kwargs = {
481
+ "is_finished": True,
482
+ "session_eval": session_eval,
483
+ "session_eval_reason": session_eval_reason,
484
+ "is_successful": is_successful,
485
+ "is_successful_reason": is_successful_reason,
486
+ }
487
+ try:
488
+ client.session.update_session(**update_kwargs)
489
+ except Exception as e:
490
+ logger.warning(f"[Session] Failed to update session: {e}")
491
+
492
+ # Clear only the global session reference, not the singleton
493
+ # This preserves the client and event queue for other threads
494
+ client.session = None
495
+ logger.debug(f"[Session] Ended global session {target_sid}")
496
+ # DO NOT shutdown event queue - other threads may be using it
497
+ # DO NOT call client.clear() - preserve singleton for other threads
513
498
  return
514
499
 
515
500
  # Otherwise, end the specified session id without clearing global state
501
+ # First flush telemetry and event queue for non-global sessions too
502
+ try:
503
+ if hasattr(client, '_tracer_provider') and client._tracer_provider:
504
+ logger.debug(f"[Session] Flushing OpenTelemetry spans for session {target_sid[:8]}...")
505
+ success = client._tracer_provider.force_flush(timeout_millis=10000)
506
+ if not success:
507
+ logger.warning("[Session] OpenTelemetry flush timed out")
508
+ except Exception as e:
509
+ logger.debug(f"[Session] Failed to flush telemetry spans: {e}")
510
+
511
+ # Flush and wait for event queue to empty
512
+ try:
513
+ if hasattr(client, '_event_queue'):
514
+ logger.debug(f"[Session] Flushing event queue for session {target_sid[:8]}...")
515
+ client._event_queue.force_flush(timeout_seconds=10.0)
516
+
517
+ # Wait for queue to be completely empty (only if blocking)
518
+ if wait_for_flush:
519
+ import time
520
+ wait_start = time.time()
521
+ max_wait = 10.0 # seconds - timeout for blob uploads
522
+ while not client._event_queue.is_empty():
523
+ if time.time() - wait_start > max_wait:
524
+ logger.warning(f"[Session] EventQueue not empty after {max_wait}s timeout")
525
+ break
526
+ time.sleep(0.1)
527
+
528
+ if client._event_queue.is_empty():
529
+ logger.debug(f"[Session] EventQueue confirmed empty for session {target_sid[:8]}")
530
+ else:
531
+ logger.debug(f"[Session] Non-blocking mode - skipping wait for session {target_sid[:8]}")
532
+ except Exception as e:
533
+ logger.debug(f"[Session] Failed to flush event queue: {e}")
534
+
535
+ # CRITICAL: Mark session as inactive FIRST for ALL sessions
536
+ client.mark_session_inactive(target_sid)
537
+
516
538
  temp = Session(agent_id=client.agent_id, session_id=target_sid)
517
- temp.update_session(is_finished=True, **locals())
539
+ update_kwargs = {
540
+ "is_finished": True,
541
+ "session_eval": session_eval,
542
+ "session_eval_reason": session_eval_reason,
543
+ "is_successful": is_successful,
544
+ "is_successful_reason": is_successful_reason,
545
+ }
546
+ try:
547
+ temp.update_session(**update_kwargs)
548
+ except Exception as e:
549
+ logger.warning(f"[Session] Failed to update session: {e}")
518
550
 
519
551
 
520
- def reset_sdk() -> None:
521
- """
522
- DEPRECATED: Reset the SDK.
552
+ def flush(timeout_seconds: float = 2.0) -> bool:
523
553
  """
524
- return
525
-
526
- client = Client()
527
- if not client.initialized:
528
- return
554
+ Manually flush all pending telemetry data.
529
555
 
530
- # Shutdown OpenTelemetry if it was initialized
531
- telemetry = LucidicTelemetry()
532
- if telemetry.is_initialized():
533
- telemetry.uninstrument_all()
556
+ Flushes both OpenTelemetry spans and queued events to ensure
557
+ all telemetry data is sent to the backend. This is called
558
+ automatically on process exit but can be called manually
559
+ for explicit control.
534
560
 
535
- client.clear()
536
-
537
-
538
- def _cleanup_telemetry():
539
- """Cleanup function for OpenTelemetry shutdown"""
561
+ Args:
562
+ timeout_seconds: Maximum time to wait for flush
563
+
564
+ Returns:
565
+ True if all flushes succeeded, False otherwise
566
+
567
+ Example:
568
+ ```python
569
+ import lucidicai as lai
570
+
571
+ # ... your code using Lucidic ...
572
+
573
+ # Manually flush before critical operation
574
+ lai.flush()
575
+ ```
576
+ """
540
577
  try:
541
- telemetry = LucidicTelemetry()
542
- if telemetry.is_initialized():
543
- telemetry.uninstrument_all()
544
- logger.info("OpenTelemetry instrumentation cleaned up")
578
+ client = Client()
579
+ success = True
580
+
581
+ # Flush OpenTelemetry spans first
582
+ if hasattr(client, 'flush_telemetry'):
583
+ span_success = client.flush_telemetry(timeout_seconds)
584
+ success = success and span_success
585
+
586
+ # Then flush event queue
587
+ if hasattr(client, '_event_queue'):
588
+ client._event_queue.force_flush(timeout_seconds)
589
+
590
+ logger.debug(f"[Flush] Manual flush completed (success={success})")
591
+ return success
545
592
  except Exception as e:
546
- logger.error(f"Error during telemetry cleanup: {e}")
593
+ logger.error(f"Failed to flush telemetry: {e}")
594
+ return False
547
595
 
548
596
 
549
597
  def _auto_end_session():
@@ -553,13 +601,129 @@ def _auto_end_session():
553
601
  if hasattr(client, 'auto_end') and client.auto_end and client.session and not client.session.is_finished:
554
602
  logger.info("Auto-ending active session on exit")
555
603
  client.auto_end = False # To avoid repeating auto-end on exit
556
- end_session()
604
+
605
+ # Flush telemetry
606
+ if hasattr(client, '_tracer_provider'):
607
+ client._tracer_provider.force_flush(timeout_millis=5000)
608
+
609
+ # Force flush event queue before ending session
610
+ if hasattr(client, '_event_queue'):
611
+ if logger.isEnabledFor(logging.DEBUG):
612
+ logger.debug("[Shutdown] Flushing event queue before session end")
613
+ client._event_queue.force_flush(timeout_seconds=5.0)
614
+
615
+ # Use non-blocking mode during shutdown to prevent hangs
616
+ # The actual wait for queue empty happens in _cleanup_singleton_on_exit
617
+ end_session(wait_for_flush=False)
618
+
557
619
  except Exception as e:
558
620
  logger.debug(f"Error during auto-end session: {e}")
559
621
 
560
622
 
623
+ def _cleanup_singleton_on_exit():
624
+ """
625
+ Clean up singleton resources only on process exit.
626
+
627
+ CRITICAL ORDER:
628
+ 1. Flush OpenTelemetry spans (blocking) - ensures spans become events
629
+ 2. Flush EventQueue - sends all events including those from spans
630
+ 3. Close HTTP session - graceful TCP FIN prevents broken pipes
631
+ 4. Clear singletons - final cleanup
632
+
633
+ This order is essential to prevent lost events and broken connections.
634
+ """
635
+ try:
636
+ client = Client()
637
+
638
+ # 1. FIRST: Flush OpenTelemetry spans (blocking until exported)
639
+ # This is the critical fix - we must flush spans before events
640
+ if hasattr(client, '_tracer_provider') and client._tracer_provider:
641
+ try:
642
+ # Small delay to ensure spans have reached the processor
643
+ import time
644
+ time.sleep(0.1) # 100ms to let spans reach BatchSpanProcessor
645
+
646
+ logger.debug("[Exit] Flushing OpenTelemetry spans...")
647
+ # force_flush() blocks until all spans are exported or timeout
648
+ success = client._tracer_provider.force_flush(timeout_millis=3000)
649
+ if success:
650
+ logger.debug("[Exit] OpenTelemetry spans flushed successfully")
651
+ else:
652
+ logger.warning("[Exit] OpenTelemetry flush timed out - some spans may be lost")
653
+
654
+ # DON'T shutdown TracerProvider yet - wait until after EventQueue
655
+ # This prevents losing spans that are still being processed
656
+ except Exception as e:
657
+ logger.debug(f"[Exit] Telemetry cleanup error: {e}")
658
+
659
+ # 2. SECOND: Flush and shutdown EventQueue
660
+ # Now it contains all events from the flushed spans
661
+ if hasattr(client, '_event_queue'):
662
+ try:
663
+ logger.debug("[Exit] Flushing event queue...")
664
+ client._event_queue.force_flush(timeout_seconds=2.0)
665
+
666
+ # Wait for queue to be completely empty before proceeding
667
+ import time
668
+ max_wait = 5.0 # seconds
669
+ start_time = time.time()
670
+ while not client._event_queue.is_empty():
671
+ if time.time() - start_time > max_wait:
672
+ logger.warning("[Exit] EventQueue not empty after timeout")
673
+ break
674
+ time.sleep(0.01) # Small sleep to avoid busy waiting
675
+
676
+ if client._event_queue.is_empty():
677
+ logger.debug("[Exit] EventQueue is empty, proceeding with shutdown")
678
+
679
+ # Clear any stale active sessions (threads may have died without cleanup)
680
+ if hasattr(client, '_active_sessions'):
681
+ with client._active_sessions_lock:
682
+ if client._active_sessions:
683
+ logger.debug(f"[Exit] Clearing {len(client._active_sessions)} remaining active sessions")
684
+ client._active_sessions.clear()
685
+
686
+ # Now shutdown EventQueue
687
+ client._event_queue.shutdown()
688
+ logger.debug("[Exit] Event queue shutdown complete")
689
+ except Exception as e:
690
+ logger.debug(f"[Exit] Event queue cleanup error: {e}")
691
+
692
+ # 3. THIRD: Shutdown TracerProvider after EventQueue is done
693
+ # This ensures all spans can be exported before shutdown
694
+ if hasattr(client, '_tracer_provider') and client._tracer_provider:
695
+ try:
696
+ logger.debug("[Exit] Shutting down TracerProvider...")
697
+ client._tracer_provider.shutdown()
698
+ logger.debug("[Exit] TracerProvider shutdown complete")
699
+ except Exception as e:
700
+ logger.debug(f"[Exit] TracerProvider shutdown error: {e}")
701
+
702
+ # 4. FOURTH: Close HTTP session ONLY after everything else
703
+ # This prevents broken pipes by ensuring all events are sent first
704
+ if hasattr(client, 'request_session'):
705
+ try:
706
+ # Mark client as shutting down to prevent new requests
707
+ client._shutdown = True
708
+ logger.debug("[Exit] Closing HTTP session (queue empty, worker stopped)")
709
+ client.request_session.close()
710
+ logger.debug("[Exit] HTTP session closed gracefully")
711
+ except Exception as e:
712
+ logger.debug(f"[Exit] HTTP session cleanup error: {e}")
713
+
714
+ # 5. FINALLY: Clear singletons
715
+ # Safe to destroy now that all data is flushed
716
+ clear_singletons()
717
+ logger.debug("[Exit] Singleton cleanup complete")
718
+
719
+ except Exception as e:
720
+ # Silent fail on exit to avoid disrupting process termination
721
+ if logger.isEnabledFor(logging.DEBUG):
722
+ logger.debug(f"[Exit] Cleanup error: {e}")
723
+
724
+
561
725
  def _signal_handler(signum, frame):
562
- """Handle interruption signals"""
726
+ """Handle interruption signals with better queue flushing."""
563
727
  # Best-effort final event for signal exits
564
728
  try:
565
729
  try:
@@ -574,245 +738,134 @@ def _signal_handler(signum, frame):
574
738
  _post_fatal_event(128 + signum, desc, {"signal": name, "signum": signum})
575
739
  except Exception:
576
740
  pass
741
+
742
+ # Proper shutdown sequence matching atexit handler
743
+ try:
744
+ client = Client()
745
+
746
+ # 1. FIRST: Flush OpenTelemetry spans
747
+ if hasattr(client, '_tracer_provider') and client._tracer_provider:
748
+ try:
749
+ logger.debug(f"[Signal] Flushing OpenTelemetry spans on signal {signum}")
750
+ client._tracer_provider.force_flush(timeout_millis=2000) # Shorter timeout for signals
751
+ except Exception:
752
+ pass
753
+
754
+ # 2. SECOND: Flush and shutdown EventQueue
755
+ if hasattr(client, "_event_queue"):
756
+ logger.debug(f"[Signal] Flushing event queue on signal {signum}")
757
+ client._event_queue.force_flush(timeout_seconds=2.0)
758
+
759
+ # Clear active sessions to allow shutdown
760
+ if hasattr(client, '_active_sessions'):
761
+ with client._active_sessions_lock:
762
+ client._active_sessions.clear()
763
+
764
+ client._event_queue.shutdown()
765
+
766
+ # 3. THIRD: Shutdown TracerProvider after EventQueue
767
+ if hasattr(client, '_tracer_provider') and client._tracer_provider:
768
+ logger.debug(f"[Signal] Shutting down TracerProvider on signal {signum}")
769
+ try:
770
+ client._tracer_provider.shutdown()
771
+ except Exception:
772
+ pass
773
+
774
+ # 4. Mark client as shutting down
775
+ client._shutdown = True
776
+
777
+ except Exception:
778
+ pass
779
+
780
+ logger.debug(f"[Signal] Auto-ending session on signal {signum}")
577
781
  _auto_end_session()
578
- _cleanup_telemetry()
579
782
  # Re-raise the signal for default handling
580
783
  signal.signal(signum, signal.SIG_DFL)
581
784
  os.kill(os.getpid(), signum)
582
785
 
583
786
 
584
- # Register cleanup functions (auto-end runs first due to LIFO order)
585
- atexit.register(_cleanup_telemetry)
586
- atexit.register(_auto_end_session)
787
+ # Register cleanup functions
788
+ atexit.register(_cleanup_singleton_on_exit) # Clean up singleton resources on exit
789
+ atexit.register(_auto_end_session) # Auto-end session if enabled
587
790
 
588
791
  # Register signal handlers for graceful shutdown
589
792
  signal.signal(signal.SIGINT, _signal_handler)
590
793
  signal.signal(signal.SIGTERM, _signal_handler)
591
794
 
592
795
 
593
- def create_mass_sim(
594
- mass_sim_name: str,
595
- total_num_sessions: int,
796
+ def create_experiment(
797
+ experiment_name: str,
798
+ pass_fail_rubrics: Optional[list] = None,
799
+ score_rubrics: Optional[list] = None,
800
+ description: Optional[str] = None,
801
+ tags: Optional[list] = None,
596
802
  api_key: Optional[str] = None,
597
803
  agent_id: Optional[str] = None,
598
- task: Optional[str] = None,
599
- tags: Optional[list] = None
600
804
  ) -> str:
601
805
  """
602
- Create a new mass simulation.
603
-
604
- Args:
605
- mass_sim_name: Name of the mass simulation.
606
- total_num_sessions: Total intended number of sessions. More sessions can be added later.
607
- api_key: API key for authentication. If not provided, will use the LUCIDIC_API_KEY environment variable.
608
- agent_id: Agent ID. If not provided, will use the LUCIDIC_AGENT_ID environment variable.
806
+ Create a new experiment for grouping and analyzing sessions.
807
+
808
+ Args:
809
+ experiment_name: Name of the experiment (required)
810
+ pass_fail_rubrics: List of pass/fail rubric names to associate
811
+ description: Description of the experiment
609
812
  task: Task description.
610
- tags: Tags for the mass simulation.
611
-
612
- Returns:
613
- mass_sim_id: ID of the created mass simulation. Pass this to lai.init() to create a new session in the mass sim.
813
+ tags: List of tags for categorization
814
+ score_rubrics: List of score rubric names to associate
815
+ api_key: API key (uses env if not provided)
816
+ agent_id: Agent ID (uses env if not provided)
817
+
818
+ Returns:
819
+ experiment_id: UUID of the created experiment
820
+
821
+ Raises:
822
+ APIKeyVerificationError: If API key is invalid or missing
823
+ InvalidOperationError: If experiment creation fails
824
+ ValueError: If name is empty
614
825
  """
826
+
827
+ # validation
828
+ if not experiment_name:
829
+ raise ValueError("Experiment name is required")
830
+
615
831
  if api_key is None:
616
832
  api_key = os.getenv("LUCIDIC_API_KEY", None)
617
833
  if api_key is None:
618
- raise APIKeyVerificationError("Make sure to either pass your API key into lai.init() or set the LUCIDIC_API_KEY environment variable.")
834
+ raise APIKeyVerificationError("Make sure to either pass your API key into create_experiment() or set the LUCIDIC_API_KEY environment variable.")
619
835
  if agent_id is None:
620
836
  agent_id = os.getenv("LUCIDIC_AGENT_ID", None)
621
837
  if agent_id is None:
622
- raise APIKeyVerificationError("Lucidic agent ID not specified. Make sure to either pass your agent ID into lai.init() or set the LUCIDIC_AGENT_ID environment variable.")
623
- try:
624
- client = Client()
625
- except LucidicNotInitializedError:
626
- client = Client( # TODO: fail hard if incorrect API key or agent ID provided and wrong, fail silently if not provided
627
- api_key=api_key,
628
- agent_id=agent_id,
629
- )
630
- mass_sim_id = client.init_mass_sim(mass_sim_name=mass_sim_name, total_num_sims=total_num_sessions, task=task, tags=tags) # TODO: change total_num_sims to total_num_sessions everywhere
631
- logger.info(f"Created mass simulation with ID: {mass_sim_id}")
632
- return mass_sim_id
633
-
634
-
635
- def create_step(
636
- state: Optional[str] = None,
637
- action: Optional[str] = None,
638
- goal: Optional[str] = None,
639
- eval_score: Optional[float] = None,
640
- eval_description: Optional[str] = None,
641
- screenshot: Optional[str] = None,
642
- screenshot_path: Optional[str] = None
643
- ) -> None:
644
- """
645
- Create a new step. Previous step must be finished to create a new step.
646
-
647
- Args:
648
- state: State description.
649
- action: Action description.
650
- goal: Goal description.
651
- eval_score: Evaluation score.
652
- eval_description: Evaluation description.
653
- screenshot: Screenshot encoded in base64. Provide either screenshot or screenshot_path.
654
- screenshot_path: Screenshot path. Provide either screenshot or screenshot_path.
655
- """
656
- client = Client()
657
- if not client.session:
658
- return
659
- return client.session.create_step(**locals())
660
-
661
-
662
- def update_step(
663
- step_id: Optional[str] = None,
664
- state: Optional[str] = None,
665
- action: Optional[str] = None,
666
- goal: Optional[str] = None,
667
- eval_score: Optional[float] = None,
668
- eval_description: Optional[str] = None,
669
- screenshot: Optional[str] = None,
670
- screenshot_path: Optional[str] = None
671
- ) -> None:
672
- """
673
- Update the current step.
674
-
675
- Args:
676
- step_id: ID of the step to update.
677
- state: State description.
678
- action: Action description.
679
- goal: Goal description.
680
- eval_score: Evaluation score.
681
- eval_description: Evaluation description.
682
- screenshot: Screenshot encoded in base64. Provide either screenshot or screenshot_path.
683
- screenshot_path: Screenshot path. Provide either screenshot or screenshot_path.
684
- """
685
- client = Client()
686
- if not client.session:
687
- return
688
- if not client.session.active_step:
689
- raise InvalidOperationError("No active step to update")
690
- client.session.update_step(**locals())
691
-
692
-
693
- def end_step(
694
- step_id: Optional[str] = None,
695
- state: Optional[str] = None,
696
- action: Optional[str] = None,
697
- goal: Optional[str] = None,
698
- eval_score: Optional[float] = None,
699
- eval_description: Optional[str] = None,
700
- screenshot: Optional[str] = None,
701
- screenshot_path: Optional[str] = None
702
- ) -> None:
703
- """
704
- End the current step.
705
-
706
- Args:
707
- step_id: ID of the step to end.
708
- state: State description.
709
- action: Action description.
710
- goal: Goal description.
711
- eval_score: Evaluation score.
712
- eval_description: Evaluation description.
713
- screenshot: Screenshot encoded in base64. Provide either screenshot or screenshot_path.
714
- screenshot_path: Screenshot path.
715
- """
716
- client = Client()
717
- if not client.session:
718
- return
719
-
720
- if not client.session.active_step and step_id is None:
721
- raise InvalidOperationError("No active step to end")
722
-
723
- # Filter out None values from locals
724
- params = locals()
725
- kwargs = {k: v for k, v in params.items() if v is not None and k not in ['client', 'params']}
726
- kwargs['is_finished'] = True
727
-
728
- client.session.update_step(**kwargs)
838
+ raise APIKeyVerificationError("Lucidic agent ID not specified. Make sure to either pass your agent ID into create_experiment() or set the LUCIDIC_AGENT_ID environment variable.")
729
839
 
840
+ # combine rubrics into single list
841
+ rubric_names = (pass_fail_rubrics or []) + (score_rubrics or [])
730
842
 
731
- def create_event(
732
- step_id: Optional[str] = None,
733
- description: Optional[str] = None,
734
- result: Optional[str] = None,
735
- cost_added: Optional[float] = None,
736
- model: Optional[str] = None,
737
- screenshots: Optional[List[str]] = None,
738
- function_name: Optional[str] = None,
739
- arguments: Optional[dict] = None,
740
- ) -> str:
741
- """
742
- Create a new event in the current step. Current step must not be finished.
743
-
744
- Args:
745
- description: Description of the event.
746
- result: Result of the event.
747
- cost_added: Cost added by the event.
748
- model: Model used for the event.
749
- screenshots: List of screenshots encoded in base64.
750
- function_name: Name of the function that created the event.
751
- arguments: Arguments of the function that created the event.
752
- """
753
-
843
+ # get current client which will be NullClient if never lai.init() is never called
754
844
  client = Client()
755
- if not client.session:
756
- return
757
- return client.session.create_event(**locals())
845
+ # if not yet initialized or still the NullClient -> create a real client when init is called
846
+ if not getattr(client, 'initialized', False):
847
+ client = Client(api_key=api_key, agent_id=agent_id)
848
+ else:
849
+ # Already initialized, this is a re-init
850
+ if api_key is not None and agent_id is not None and (api_key != client.api_key or agent_id != client.agent_id):
851
+ client.set_api_key(api_key)
852
+ client.agent_id = agent_id
758
853
 
854
+ # create experiment
855
+ experiment_id = client.create_experiment(experiment_name=experiment_name, rubric_names=rubric_names, description=description, tags=tags)
856
+ logger.info(f"Created experiment with ID: {experiment_id}")
759
857
 
760
- def update_event(
761
- event_id: Optional[str] = None,
762
- description: Optional[str] = None,
763
- result: Optional[str] = None,
764
- cost_added: Optional[float] = None,
765
- model: Optional[str] = None,
766
- screenshots: Optional[List[str]] = None,
767
- function_name: Optional[str] = None,
768
- arguments: Optional[dict] = None,
769
- ) -> None:
770
- """
771
- Update the event with the given ID in the current step.
772
-
773
- Args:
774
- event_id: ID of the event to update.
775
- description: Description of the event.
776
- result: Result of the event.
777
- cost_added: Cost added by the event.
778
- model: Model used for the event.
779
- screenshots: List of screenshots encoded in base64.
780
- function_name: Name of the function that created the event.
781
- arguments: Arguments of the function that created the event.
782
- """
783
- client = Client()
784
- if not client.session:
785
- return
786
- client.session.update_event(**locals())
858
+ return experiment_id
787
859
 
788
860
 
789
- def end_event(
790
- event_id: Optional[str] = None,
791
- description: Optional[str] = None,
792
- result: Optional[str] = None,
793
- cost_added: Optional[float] = None,
794
- model: Optional[str] = None,
795
- screenshots: Optional[List[str]] = None,
796
- function_name: Optional[str] = None,
797
- arguments: Optional[dict] = None,
798
- ) -> None:
799
- """
800
- End the latest event in the current step.
801
-
802
- Args:
803
- event_id: ID of the event to end.
804
- description: Description of the event.
805
- result: Result of the event.
806
- cost_added: Cost added by the event.
807
- model: Model used for the event.
808
- screenshots: List of screenshots encoded in base64.
809
- function_name: Name of the function that created the event.
810
- arguments: Arguments of the function that created the event.
811
- """
861
+ def create_event(
862
+ type: str = "generic",
863
+ **kwargs
864
+ ) -> str:
812
865
  client = Client()
813
866
  if not client.session:
814
867
  return
815
- client.session.update_event(is_finished=True, **locals())
868
+ return client.session.create_event(type=type, **kwargs)
816
869
 
817
870
 
818
871
  def get_prompt(