lucidicai 1.3.5__py3-none-any.whl → 2.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lucidicai/__init__.py CHANGED
@@ -7,27 +7,16 @@ import traceback
7
7
  import threading
8
8
  from typing import List, Literal, Optional
9
9
 
10
+ from dotenv import load_dotenv
11
+
10
12
  from .client import Client
11
13
  from .errors import APIKeyVerificationError, InvalidOperationError, LucidicNotInitializedError, PromptError
12
14
  from .event import Event
13
15
  from .session import Session
14
- from .step import Step
15
-
16
- # Import OpenTelemetry-based handlers
17
- from .telemetry.otel_handlers import (
18
- OTelOpenAIHandler,
19
- OTelAnthropicHandler,
20
- OTelLangChainHandler,
21
- OTelPydanticAIHandler,
22
- OTelOpenAIAgentsHandler,
23
- OTelLiteLLMHandler
24
- )
25
-
26
- # Import telemetry manager
27
- from .telemetry.otel_init import LucidicTelemetry
16
+ from .singleton import clear_singletons
28
17
 
29
18
  # Import decorators
30
- from .decorators import step, event
19
+ from .decorators import event
31
20
  from .context import (
32
21
  set_active_session,
33
22
  bind_session,
@@ -39,6 +28,17 @@ from .context import (
39
28
  run_session,
40
29
  run_in_session,
41
30
  )
31
+ from .dataset import get_dataset, get_dataset_items
32
+ from .feature_flag import (
33
+ get_feature_flag,
34
+ get_bool_flag,
35
+ get_int_flag,
36
+ get_float_flag,
37
+ get_string_flag,
38
+ get_json_flag,
39
+ clear_feature_flag_cache,
40
+ FeatureFlagError
41
+ )
42
42
 
43
43
  ProviderType = Literal[
44
44
  "openai",
@@ -114,13 +114,13 @@ def _post_fatal_event(exit_code: int, description: str, extra: Optional[dict] =
114
114
  except Exception:
115
115
  pass
116
116
 
117
- event_id = session.create_event(
118
- description=_mask_and_truncate(description),
119
- result=f"process exited with code {exit_code}",
120
- function_name="__process_exit__",
121
- arguments=arguments,
117
+ # Create a single immutable event describing the crash
118
+ session.create_event(
119
+ type="error_traceback",
120
+ error=_mask_and_truncate(description),
121
+ traceback="",
122
+ metadata={"exit_code": exit_code, **({} if not extra else extra)},
122
123
  )
123
- session.update_event(event_id=event_id, is_finished=True)
124
124
  except Exception:
125
125
  # Never raise during shutdown
126
126
  pass
@@ -146,31 +146,50 @@ def _install_crash_handlers() -> None:
146
146
  "exception_message": str(exc),
147
147
  "thread_name": threading.current_thread().name,
148
148
  })
149
+
150
+ # Follow proper shutdown sequence to prevent broken pipes
149
151
  try:
150
- # Prevent auto_end double work
151
152
  client = Client()
152
- try:
153
- client.auto_end = False
154
- except Exception:
155
- pass
156
- # End session explicitly as unsuccessful
157
- end_session()
158
- except Exception:
159
- pass
160
- # Best-effort force flush and shutdown telemetry
161
- try:
162
- telemetry = LucidicTelemetry()
163
- if telemetry.is_initialized():
153
+
154
+ # 1. Flush OpenTelemetry spans first
155
+ if hasattr(client, '_tracer_provider'):
156
+ try:
157
+ client._tracer_provider.force_flush(timeout_millis=5000)
158
+ except Exception:
159
+ pass
160
+
161
+ # 2. Flush and shutdown EventQueue (with active sessions cleared)
162
+ if hasattr(client, "_event_queue"):
164
163
  try:
165
- telemetry.force_flush()
164
+ # Clear active sessions to allow shutdown
165
+ client._event_queue._active_sessions.clear()
166
+ client._event_queue.force_flush()
167
+ client._event_queue.shutdown(timeout=5.0)
166
168
  except Exception:
167
169
  pass
170
+
171
+ # 3. Shutdown TracerProvider after EventQueue
172
+ if hasattr(client, '_tracer_provider'):
168
173
  try:
169
- telemetry.uninstrument_all()
174
+ client._tracer_provider.shutdown()
170
175
  except Exception:
171
176
  pass
177
+
178
+ # 4. Mark client as shutting down to prevent new requests
179
+ client._shutdown = True
180
+
181
+ # 5. Prevent auto_end double work
182
+ try:
183
+ client.auto_end = False
184
+ except Exception:
185
+ pass
186
+
187
+ # 6. End session explicitly as unsuccessful
188
+ end_session()
189
+
172
190
  except Exception:
173
191
  pass
192
+
174
193
  # Chain to original to preserve default printing/behavior
175
194
  try:
176
195
  _original_sys_excepthook(exc_type, exc, tb)
@@ -187,7 +206,20 @@ def _install_crash_handlers() -> None:
187
206
  def _thread_hook(args):
188
207
  try:
189
208
  if args.thread is threading.main_thread():
209
+ # For main thread exceptions, use full shutdown sequence
190
210
  _sys_hook(args.exc_type, args.exc_value, args.exc_traceback)
211
+ else:
212
+ # For non-main threads, just flush spans without full shutdown
213
+ try:
214
+ client = Client()
215
+ # Flush any pending spans from this thread
216
+ if hasattr(client, '_tracer_provider'):
217
+ client._tracer_provider.force_flush(timeout_millis=1000)
218
+ # Force flush events but don't shutdown
219
+ if hasattr(client, "_event_queue"):
220
+ client._event_queue.force_flush()
221
+ except Exception:
222
+ pass
191
223
  except Exception:
192
224
  pass
193
225
  try:
@@ -199,92 +231,30 @@ def _install_crash_handlers() -> None:
199
231
 
200
232
  _crash_handlers_installed = True
201
233
 
202
- def _setup_providers(client: Client, providers: List[ProviderType]) -> None:
203
- """Set up providers for the client, avoiding duplication
204
-
205
- Args:
206
- client: The Lucidic client instance
207
- providers: List of provider types to set up
208
- """
209
- # Track which providers have been set up to avoid duplication
210
- setup_providers = set()
211
-
212
- # Initialize telemetry if using OpenTelemetry
213
- if providers:
214
- telemetry = LucidicTelemetry()
215
- if not telemetry.is_initialized():
216
- telemetry.initialize(agent_id=client.agent_id)
217
-
218
- for provider in providers:
219
- if provider in setup_providers:
220
- continue
221
-
222
- if provider == "openai":
223
- client.set_provider(OTelOpenAIHandler())
224
- setup_providers.add("openai")
225
- elif provider == "anthropic":
226
- client.set_provider(OTelAnthropicHandler())
227
- setup_providers.add("anthropic")
228
- elif provider == "langchain":
229
- client.set_provider(OTelLangChainHandler())
230
- logger.info("For LangChain, make sure to create a handler and attach it to your top-level Agent class.")
231
- setup_providers.add("langchain")
232
- elif provider == "pydantic_ai":
233
- client.set_provider(OTelPydanticAIHandler())
234
- setup_providers.add("pydantic_ai")
235
- elif provider == "openai_agents":
236
- try:
237
- client.set_provider(OTelOpenAIAgentsHandler())
238
- setup_providers.add("openai_agents")
239
- except Exception as e:
240
- logger.error(f"Failed to set up OpenAI Agents provider: {e}")
241
- raise
242
- elif provider == "litellm":
243
- client.set_provider(OTelLiteLLMHandler())
244
- setup_providers.add("litellm")
245
- elif provider in ("bedrock", "aws_bedrock", "amazon_bedrock"):
246
- from .telemetry.otel_handlers import OTelBedrockHandler
247
- client.set_provider(OTelBedrockHandler())
248
- setup_providers.add("bedrock")
249
- elif provider in ("google", "google_generativeai"):
250
- from .telemetry.otel_handlers import OTelGoogleGenerativeAIHandler
251
- client.set_provider(OTelGoogleGenerativeAIHandler())
252
- setup_providers.add("google")
253
- elif provider in ("vertexai", "vertex_ai"):
254
- from .telemetry.otel_handlers import OTelVertexAIHandler
255
- client.set_provider(OTelVertexAIHandler())
256
- setup_providers.add("vertexai")
257
- elif provider == "cohere":
258
- from .telemetry.otel_handlers import OTelCohereHandler
259
- client.set_provider(OTelCohereHandler())
260
- setup_providers.add("cohere")
261
- elif provider == "groq":
262
- from .telemetry.otel_handlers import OTelGroqHandler
263
- client.set_provider(OTelGroqHandler())
264
- setup_providers.add("groq")
265
-
266
234
  __all__ = [
267
- 'Client',
268
235
  'Session',
269
- 'Step',
270
236
  'Event',
271
237
  'init',
272
- 'continue_session',
273
- 'create_step',
274
- 'end_step',
275
- 'update_step',
238
+ 'create_experiment',
276
239
  'create_event',
277
- 'update_event',
278
- 'end_event',
279
240
  'end_session',
280
241
  'get_prompt',
281
242
  'get_session',
243
+ 'get_dataset',
244
+ 'get_dataset_items',
245
+ 'get_feature_flag',
246
+ 'get_bool_flag',
247
+ 'get_int_flag',
248
+ 'get_float_flag',
249
+ 'get_string_flag',
250
+ 'get_json_flag',
251
+ 'clear_feature_flag_cache',
252
+ 'FeatureFlagError',
282
253
  'ProviderType',
283
254
  'APIKeyVerificationError',
284
255
  'LucidicNotInitializedError',
285
256
  'PromptError',
286
257
  'InvalidOperationError',
287
- 'step',
288
258
  'event',
289
259
  'set_active_session',
290
260
  'bind_session',
@@ -305,10 +275,10 @@ def init(
305
275
  task: Optional[str] = None,
306
276
  providers: Optional[List[ProviderType]] = [],
307
277
  production_monitoring: Optional[bool] = False,
308
- mass_sim_id: Optional[str] = None,
309
278
  experiment_id: Optional[str] = None,
310
279
  rubrics: Optional[list] = None,
311
280
  tags: Optional[list] = None,
281
+ dataset_item_id: Optional[str] = None,
312
282
  masking_function = None,
313
283
  auto_end: Optional[bool] = True,
314
284
  capture_uncaught: Optional[bool] = True,
@@ -323,10 +293,10 @@ def init(
323
293
  agent_id: Agent ID. If not provided, will use the LUCIDIC_AGENT_ID environment variable.
324
294
  task: Task description.
325
295
  providers: List of provider types ("openai", "anthropic", "langchain", "pydantic_ai").
326
- mass_sim_id: Optional mass simulation ID, if session is to be part of a mass simulation.
327
296
  experiment_id: Optional experiment ID, if session is to be part of an experiment.
328
297
  rubrics: Optional rubrics for evaluation, list of strings.
329
298
  tags: Optional tags for the session, list of strings.
299
+ dataset_item_id: Optional dataset item ID to link session to a dataset item.
330
300
  masking_function: Optional function to mask sensitive data.
331
301
  auto_end: If True, automatically end the session on process exit. Defaults to True.
332
302
 
@@ -334,6 +304,11 @@ def init(
334
304
  InvalidOperationError: If the client is already initialized.
335
305
  APIKeyVerificationError: If the API key is invalid.
336
306
  """
307
+
308
+ load_dotenv()
309
+
310
+ if os.getenv("LUCIDIC_DEBUG", "False").lower() == "true":
311
+ logger.setLevel(logging.DEBUG)
337
312
 
338
313
  # get current client which will be NullClient if never lai is never initialized
339
314
  client = Client()
@@ -363,16 +338,20 @@ def init(
363
338
  auto_end = os.getenv("LUCIDIC_AUTO_END", "True").lower() == "true"
364
339
 
365
340
  # Set up providers
366
- _setup_providers(client, providers)
341
+ # Use the client's singleton telemetry initialization
342
+ if providers:
343
+ success = client.initialize_telemetry(providers)
344
+ if not success:
345
+ logger.warning("[Telemetry] Failed to initialize telemetry for some providers")
367
346
  real_session_id = client.init_session(
368
347
  session_name=session_name,
369
- mass_sim_id=mass_sim_id,
370
348
  task=task,
371
349
  rubrics=rubrics,
372
350
  tags=tags,
373
351
  production_monitoring=production_monitoring,
374
352
  session_id=session_id,
375
353
  experiment_id=experiment_id,
354
+ dataset_item_id=dataset_item_id,
376
355
  )
377
356
  if masking_function:
378
357
  client.masking_function = masking_function
@@ -388,6 +367,12 @@ def init(
388
367
  try:
389
368
  if capture_uncaught:
390
369
  _install_crash_handlers()
370
+ # Also install error event handler for uncaught exceptions
371
+ try:
372
+ from .errors import install_error_handler
373
+ install_error_handler()
374
+ except Exception:
375
+ pass
391
376
  except Exception:
392
377
  pass
393
378
 
@@ -395,52 +380,6 @@ def init(
395
380
  return real_session_id
396
381
 
397
382
 
398
- def continue_session(
399
- session_id: str,
400
- api_key: Optional[str] = None,
401
- agent_id: Optional[str] = None,
402
- providers: Optional[List[ProviderType]] = [],
403
- masking_function = None,
404
- auto_end: Optional[bool] = True,
405
- ):
406
- if api_key is None:
407
- api_key = os.getenv("LUCIDIC_API_KEY", None)
408
- if api_key is None:
409
- raise APIKeyVerificationError("Make sure to either pass your API key into lai.init() or set the LUCIDIC_API_KEY environment variable.")
410
- if agent_id is None:
411
- agent_id = os.getenv("LUCIDIC_AGENT_ID", None)
412
- if agent_id is None:
413
- raise APIKeyVerificationError("Lucidic agent ID not specified. Make sure to either pass your agent ID into lai.init() or set the LUCIDIC_AGENT_ID environment variable.")
414
-
415
- client = Client()
416
- if client.session:
417
- raise InvalidOperationError("[Lucidic] Session already in progress. Please call lai.end_session() or lai.reset_sdk() first.")
418
- # if not yet initialized or still the NullClient -> create a real client when init is called
419
- if not getattr(client, 'initialized', False):
420
- client = Client(api_key=api_key, agent_id=agent_id)
421
-
422
- # Handle auto_end with environment variable support
423
- if auto_end is None:
424
- auto_end = os.getenv("LUCIDIC_AUTO_END", "True").lower() == "true"
425
-
426
- # Set up providers
427
- _setup_providers(client, providers)
428
- session_id = client.continue_session(session_id=session_id)
429
- if masking_function:
430
- client.masking_function = masking_function
431
-
432
- # Set the auto_end flag on the client
433
- client.auto_end = auto_end
434
-
435
- logger.info(f"Session {session_id} continuing...")
436
- # Bind this session id to the current execution context for async-safety
437
- try:
438
- set_active_session(session_id)
439
- except Exception:
440
- pass
441
- return session_id # For consistency
442
-
443
-
444
383
  def update_session(
445
384
  task: Optional[str] = None,
446
385
  session_eval: Optional[float] = None,
@@ -478,7 +417,8 @@ def end_session(
478
417
  session_eval: Optional[float] = None,
479
418
  session_eval_reason: Optional[str] = None,
480
419
  is_successful: Optional[bool] = None,
481
- is_successful_reason: Optional[str] = None
420
+ is_successful_reason: Optional[str] = None,
421
+ wait_for_flush: bool = True
482
422
  ) -> None:
483
423
  """
484
424
  End the current session.
@@ -488,6 +428,8 @@ def end_session(
488
428
  session_eval_reason: Session evaluation reason.
489
429
  is_successful: Whether the session was successful.
490
430
  is_successful_reason: Session success reason.
431
+ wait_for_flush: Whether to block until event queue is empty (default True).
432
+ Set to False during signal handling to prevent hangs.
491
433
  """
492
434
  client = Client()
493
435
  # Prefer context-bound session id
@@ -501,49 +443,179 @@ def end_session(
501
443
  if not target_sid:
502
444
  return
503
445
 
504
- # If ending the globally active session, keep existing cleanup behavior
446
+ # If ending the globally active session, perform cleanup
505
447
  if client.session and client.session.session_id == target_sid:
506
- # Wait for any pending LiteLLM callbacks before ending session
507
- for provider in client.providers:
508
- if hasattr(provider, '_callback') and hasattr(provider._callback, 'wait_for_pending_callbacks'):
509
- logger.info("Waiting for LiteLLM callbacks to complete before ending session...")
510
- provider._callback.wait_for_pending_callbacks(timeout=5.0)
511
- client.session.update_session(is_finished=True, **locals())
512
- client.clear()
448
+ # Best-effort: wait for LiteLLM callbacks to flush before ending
449
+ try:
450
+ import litellm
451
+ cbs = getattr(litellm, 'callbacks', None)
452
+ if cbs:
453
+ for cb in cbs:
454
+ try:
455
+ if hasattr(cb, 'wait_for_pending_callbacks'):
456
+ cb.wait_for_pending_callbacks(timeout=1)
457
+ except Exception:
458
+ pass
459
+ except Exception:
460
+ pass
461
+ # CRITICAL: Flush OpenTelemetry spans FIRST (blocking)
462
+ # This ensures all spans are converted to events before we flush the event queue
463
+ try:
464
+ if hasattr(client, '_tracer_provider') and client._tracer_provider:
465
+ logger.debug("[Session] Flushing OpenTelemetry spans before session end...")
466
+ # Force flush with generous timeout to ensure all spans are exported
467
+ # The BatchSpanProcessor now exports every 100ms, so this should be quick
468
+ success = client._tracer_provider.force_flush(timeout_millis=10000) # 10 second timeout
469
+ if not success:
470
+ logger.warning("[Session] OpenTelemetry flush timed out - some spans may be lost")
471
+ else:
472
+ logger.debug("[Session] OpenTelemetry spans flushed successfully")
473
+ except Exception as e:
474
+ logger.debug(f"[Session] Failed to flush telemetry spans: {e}")
475
+
476
+ # THEN flush event queue (which now contains events from flushed spans)
477
+ try:
478
+ if hasattr(client, '_event_queue'):
479
+ logger.debug("[Session] Flushing event queue...")
480
+ client._event_queue.force_flush(timeout_seconds=10.0)
481
+
482
+ # Wait for queue to be completely empty (only if blocking)
483
+ if wait_for_flush:
484
+ import time
485
+ wait_start = time.time()
486
+ max_wait = 10.0 # seconds - timeout for blob uploads
487
+ while not client._event_queue.is_empty():
488
+ if time.time() - wait_start > max_wait:
489
+ logger.warning(f"[Session] EventQueue not empty after {max_wait}s timeout")
490
+ break
491
+ time.sleep(0.1)
492
+
493
+ if client._event_queue.is_empty():
494
+ logger.debug("[Session] EventQueue confirmed empty")
495
+ else:
496
+ logger.debug("[Session] Non-blocking mode - skipping wait for empty queue")
497
+ except Exception as e:
498
+ logger.debug(f"[Session] Failed to flush event queue: {e}")
499
+
500
+ # Mark session as inactive FIRST (prevents race conditions)
501
+ client.mark_session_inactive(target_sid)
502
+
503
+ # Send only expected fields to update endpoint
504
+ update_kwargs = {
505
+ "is_finished": True,
506
+ "session_eval": session_eval,
507
+ "session_eval_reason": session_eval_reason,
508
+ "is_successful": is_successful,
509
+ "is_successful_reason": is_successful_reason,
510
+ }
511
+ try:
512
+ client.session.update_session(**update_kwargs)
513
+ except Exception as e:
514
+ logger.warning(f"[Session] Failed to update session: {e}")
515
+
516
+ # Clear only the global session reference, not the singleton
517
+ # This preserves the client and event queue for other threads
518
+ client.session = None
519
+ logger.debug(f"[Session] Ended global session {target_sid}")
520
+ # DO NOT shutdown event queue - other threads may be using it
521
+ # DO NOT call client.clear() - preserve singleton for other threads
513
522
  return
514
523
 
515
524
  # Otherwise, end the specified session id without clearing global state
525
+ # First flush telemetry and event queue for non-global sessions too
526
+ try:
527
+ if hasattr(client, '_tracer_provider') and client._tracer_provider:
528
+ logger.debug(f"[Session] Flushing OpenTelemetry spans for session {target_sid[:8]}...")
529
+ success = client._tracer_provider.force_flush(timeout_millis=10000)
530
+ if not success:
531
+ logger.warning("[Session] OpenTelemetry flush timed out")
532
+ except Exception as e:
533
+ logger.debug(f"[Session] Failed to flush telemetry spans: {e}")
534
+
535
+ # Flush and wait for event queue to empty
536
+ try:
537
+ if hasattr(client, '_event_queue'):
538
+ logger.debug(f"[Session] Flushing event queue for session {target_sid[:8]}...")
539
+ client._event_queue.force_flush(timeout_seconds=10.0)
540
+
541
+ # Wait for queue to be completely empty (only if blocking)
542
+ if wait_for_flush:
543
+ import time
544
+ wait_start = time.time()
545
+ max_wait = 10.0 # seconds - timeout for blob uploads
546
+ while not client._event_queue.is_empty():
547
+ if time.time() - wait_start > max_wait:
548
+ logger.warning(f"[Session] EventQueue not empty after {max_wait}s timeout")
549
+ break
550
+ time.sleep(0.1)
551
+
552
+ if client._event_queue.is_empty():
553
+ logger.debug(f"[Session] EventQueue confirmed empty for session {target_sid[:8]}")
554
+ else:
555
+ logger.debug(f"[Session] Non-blocking mode - skipping wait for session {target_sid[:8]}")
556
+ except Exception as e:
557
+ logger.debug(f"[Session] Failed to flush event queue: {e}")
558
+
559
+ # CRITICAL: Mark session as inactive FIRST for ALL sessions
560
+ client.mark_session_inactive(target_sid)
561
+
516
562
  temp = Session(agent_id=client.agent_id, session_id=target_sid)
517
- temp.update_session(is_finished=True, **locals())
563
+ update_kwargs = {
564
+ "is_finished": True,
565
+ "session_eval": session_eval,
566
+ "session_eval_reason": session_eval_reason,
567
+ "is_successful": is_successful,
568
+ "is_successful_reason": is_successful_reason,
569
+ }
570
+ try:
571
+ temp.update_session(**update_kwargs)
572
+ except Exception as e:
573
+ logger.warning(f"[Session] Failed to update session: {e}")
518
574
 
519
575
 
520
- def reset_sdk() -> None:
576
+ def flush(timeout_seconds: float = 2.0) -> bool:
521
577
  """
522
- DEPRECATED: Reset the SDK.
523
- """
524
- return
525
-
526
- client = Client()
527
- if not client.initialized:
528
- return
578
+ Manually flush all pending telemetry data.
529
579
 
530
- # Shutdown OpenTelemetry if it was initialized
531
- telemetry = LucidicTelemetry()
532
- if telemetry.is_initialized():
533
- telemetry.uninstrument_all()
580
+ Flushes both OpenTelemetry spans and queued events to ensure
581
+ all telemetry data is sent to the backend. This is called
582
+ automatically on process exit but can be called manually
583
+ for explicit control.
534
584
 
535
- client.clear()
536
-
537
-
538
- def _cleanup_telemetry():
539
- """Cleanup function for OpenTelemetry shutdown"""
585
+ Args:
586
+ timeout_seconds: Maximum time to wait for flush
587
+
588
+ Returns:
589
+ True if all flushes succeeded, False otherwise
590
+
591
+ Example:
592
+ ```python
593
+ import lucidicai as lai
594
+
595
+ # ... your code using Lucidic ...
596
+
597
+ # Manually flush before critical operation
598
+ lai.flush()
599
+ ```
600
+ """
540
601
  try:
541
- telemetry = LucidicTelemetry()
542
- if telemetry.is_initialized():
543
- telemetry.uninstrument_all()
544
- logger.info("OpenTelemetry instrumentation cleaned up")
602
+ client = Client()
603
+ success = True
604
+
605
+ # Flush OpenTelemetry spans first
606
+ if hasattr(client, 'flush_telemetry'):
607
+ span_success = client.flush_telemetry(timeout_seconds)
608
+ success = success and span_success
609
+
610
+ # Then flush event queue
611
+ if hasattr(client, '_event_queue'):
612
+ client._event_queue.force_flush(timeout_seconds)
613
+
614
+ logger.debug(f"[Flush] Manual flush completed (success={success})")
615
+ return success
545
616
  except Exception as e:
546
- logger.error(f"Error during telemetry cleanup: {e}")
617
+ logger.error(f"Failed to flush telemetry: {e}")
618
+ return False
547
619
 
548
620
 
549
621
  def _auto_end_session():
@@ -553,13 +625,129 @@ def _auto_end_session():
553
625
  if hasattr(client, 'auto_end') and client.auto_end and client.session and not client.session.is_finished:
554
626
  logger.info("Auto-ending active session on exit")
555
627
  client.auto_end = False # To avoid repeating auto-end on exit
556
- end_session()
628
+
629
+ # Flush telemetry
630
+ if hasattr(client, '_tracer_provider'):
631
+ client._tracer_provider.force_flush(timeout_millis=5000)
632
+
633
+ # Force flush event queue before ending session
634
+ if hasattr(client, '_event_queue'):
635
+ if logger.isEnabledFor(logging.DEBUG):
636
+ logger.debug("[Shutdown] Flushing event queue before session end")
637
+ client._event_queue.force_flush(timeout_seconds=5.0)
638
+
639
+ # Use non-blocking mode during shutdown to prevent hangs
640
+ # The actual wait for queue empty happens in _cleanup_singleton_on_exit
641
+ end_session(wait_for_flush=False)
642
+
557
643
  except Exception as e:
558
644
  logger.debug(f"Error during auto-end session: {e}")
559
645
 
560
646
 
647
+ def _cleanup_singleton_on_exit():
648
+ """
649
+ Clean up singleton resources only on process exit.
650
+
651
+ CRITICAL ORDER:
652
+ 1. Flush OpenTelemetry spans (blocking) - ensures spans become events
653
+ 2. Flush EventQueue - sends all events including those from spans
654
+ 3. Close HTTP session - graceful TCP FIN prevents broken pipes
655
+ 4. Clear singletons - final cleanup
656
+
657
+ This order is essential to prevent lost events and broken connections.
658
+ """
659
+ try:
660
+ client = Client()
661
+
662
+ # 1. FIRST: Flush OpenTelemetry spans (blocking until exported)
663
+ # This is the critical fix - we must flush spans before events
664
+ if hasattr(client, '_tracer_provider') and client._tracer_provider:
665
+ try:
666
+ # Small delay to ensure spans have reached the processor
667
+ import time
668
+ time.sleep(0.1) # 100ms to let spans reach BatchSpanProcessor
669
+
670
+ logger.debug("[Exit] Flushing OpenTelemetry spans...")
671
+ # force_flush() blocks until all spans are exported or timeout
672
+ success = client._tracer_provider.force_flush(timeout_millis=3000)
673
+ if success:
674
+ logger.debug("[Exit] OpenTelemetry spans flushed successfully")
675
+ else:
676
+ logger.warning("[Exit] OpenTelemetry flush timed out - some spans may be lost")
677
+
678
+ # DON'T shutdown TracerProvider yet - wait until after EventQueue
679
+ # This prevents losing spans that are still being processed
680
+ except Exception as e:
681
+ logger.debug(f"[Exit] Telemetry cleanup error: {e}")
682
+
683
+ # 2. SECOND: Flush and shutdown EventQueue
684
+ # Now it contains all events from the flushed spans
685
+ if hasattr(client, '_event_queue'):
686
+ try:
687
+ logger.debug("[Exit] Flushing event queue...")
688
+ client._event_queue.force_flush(timeout_seconds=2.0)
689
+
690
+ # Wait for queue to be completely empty before proceeding
691
+ import time
692
+ max_wait = 5.0 # seconds
693
+ start_time = time.time()
694
+ while not client._event_queue.is_empty():
695
+ if time.time() - start_time > max_wait:
696
+ logger.warning("[Exit] EventQueue not empty after timeout")
697
+ break
698
+ time.sleep(0.01) # Small sleep to avoid busy waiting
699
+
700
+ if client._event_queue.is_empty():
701
+ logger.debug("[Exit] EventQueue is empty, proceeding with shutdown")
702
+
703
+ # Clear any stale active sessions (threads may have died without cleanup)
704
+ if hasattr(client, '_active_sessions'):
705
+ with client._active_sessions_lock:
706
+ if client._active_sessions:
707
+ logger.debug(f"[Exit] Clearing {len(client._active_sessions)} remaining active sessions")
708
+ client._active_sessions.clear()
709
+
710
+ # Now shutdown EventQueue
711
+ client._event_queue.shutdown()
712
+ logger.debug("[Exit] Event queue shutdown complete")
713
+ except Exception as e:
714
+ logger.debug(f"[Exit] Event queue cleanup error: {e}")
715
+
716
+ # 3. THIRD: Shutdown TracerProvider after EventQueue is done
717
+ # This ensures all spans can be exported before shutdown
718
+ if hasattr(client, '_tracer_provider') and client._tracer_provider:
719
+ try:
720
+ logger.debug("[Exit] Shutting down TracerProvider...")
721
+ client._tracer_provider.shutdown()
722
+ logger.debug("[Exit] TracerProvider shutdown complete")
723
+ except Exception as e:
724
+ logger.debug(f"[Exit] TracerProvider shutdown error: {e}")
725
+
726
+ # 4. FOURTH: Close HTTP session ONLY after everything else
727
+ # This prevents broken pipes by ensuring all events are sent first
728
+ if hasattr(client, 'request_session'):
729
+ try:
730
+ # Mark client as shutting down to prevent new requests
731
+ client._shutdown = True
732
+ logger.debug("[Exit] Closing HTTP session (queue empty, worker stopped)")
733
+ client.request_session.close()
734
+ logger.debug("[Exit] HTTP session closed gracefully")
735
+ except Exception as e:
736
+ logger.debug(f"[Exit] HTTP session cleanup error: {e}")
737
+
738
+ # 5. FINALLY: Clear singletons
739
+ # Safe to destroy now that all data is flushed
740
+ clear_singletons()
741
+ logger.debug("[Exit] Singleton cleanup complete")
742
+
743
+ except Exception as e:
744
+ # Silent fail on exit to avoid disrupting process termination
745
+ if logger.isEnabledFor(logging.DEBUG):
746
+ logger.debug(f"[Exit] Cleanup error: {e}")
747
+
748
+
561
749
  def _signal_handler(signum, frame):
562
- """Handle interruption signals"""
750
+ """Handle interruption signals with better queue flushing."""
563
751
  # Best-effort final event for signal exits
564
752
  try:
565
753
  try:
@@ -574,245 +762,134 @@ def _signal_handler(signum, frame):
574
762
  _post_fatal_event(128 + signum, desc, {"signal": name, "signum": signum})
575
763
  except Exception:
576
764
  pass
765
+
766
+ # Proper shutdown sequence matching atexit handler
767
+ try:
768
+ client = Client()
769
+
770
+ # 1. FIRST: Flush OpenTelemetry spans
771
+ if hasattr(client, '_tracer_provider') and client._tracer_provider:
772
+ try:
773
+ logger.debug(f"[Signal] Flushing OpenTelemetry spans on signal {signum}")
774
+ client._tracer_provider.force_flush(timeout_millis=2000) # Shorter timeout for signals
775
+ except Exception:
776
+ pass
777
+
778
+ # 2. SECOND: Flush and shutdown EventQueue
779
+ if hasattr(client, "_event_queue"):
780
+ logger.debug(f"[Signal] Flushing event queue on signal {signum}")
781
+ client._event_queue.force_flush(timeout_seconds=2.0)
782
+
783
+ # Clear active sessions to allow shutdown
784
+ if hasattr(client, '_active_sessions'):
785
+ with client._active_sessions_lock:
786
+ client._active_sessions.clear()
787
+
788
+ client._event_queue.shutdown()
789
+
790
+ # 3. THIRD: Shutdown TracerProvider after EventQueue
791
+ if hasattr(client, '_tracer_provider') and client._tracer_provider:
792
+ logger.debug(f"[Signal] Shutting down TracerProvider on signal {signum}")
793
+ try:
794
+ client._tracer_provider.shutdown()
795
+ except Exception:
796
+ pass
797
+
798
+ # 4. Mark client as shutting down
799
+ client._shutdown = True
800
+
801
+ except Exception:
802
+ pass
803
+
804
+ logger.debug(f"[Signal] Auto-ending session on signal {signum}")
577
805
  _auto_end_session()
578
- _cleanup_telemetry()
579
806
  # Re-raise the signal for default handling
580
807
  signal.signal(signum, signal.SIG_DFL)
581
808
  os.kill(os.getpid(), signum)
582
809
 
583
810
 
584
- # Register cleanup functions (auto-end runs first due to LIFO order)
585
- atexit.register(_cleanup_telemetry)
586
- atexit.register(_auto_end_session)
811
+ # Register cleanup functions
812
+ atexit.register(_cleanup_singleton_on_exit) # Clean up singleton resources on exit
813
+ atexit.register(_auto_end_session) # Auto-end session if enabled
587
814
 
588
815
  # Register signal handlers for graceful shutdown
589
816
  signal.signal(signal.SIGINT, _signal_handler)
590
817
  signal.signal(signal.SIGTERM, _signal_handler)
591
818
 
592
819
 
593
- def create_mass_sim(
594
- mass_sim_name: str,
595
- total_num_sessions: int,
820
+ def create_experiment(
821
+ experiment_name: str,
822
+ pass_fail_rubrics: Optional[list] = None,
823
+ score_rubrics: Optional[list] = None,
824
+ description: Optional[str] = None,
825
+ tags: Optional[list] = None,
596
826
  api_key: Optional[str] = None,
597
827
  agent_id: Optional[str] = None,
598
- task: Optional[str] = None,
599
- tags: Optional[list] = None
600
828
  ) -> str:
601
829
  """
602
- Create a new mass simulation.
603
-
604
- Args:
605
- mass_sim_name: Name of the mass simulation.
606
- total_num_sessions: Total intended number of sessions. More sessions can be added later.
607
- api_key: API key for authentication. If not provided, will use the LUCIDIC_API_KEY environment variable.
608
- agent_id: Agent ID. If not provided, will use the LUCIDIC_AGENT_ID environment variable.
830
+ Create a new experiment for grouping and analyzing sessions.
831
+
832
+ Args:
833
+ experiment_name: Name of the experiment (required)
834
+ pass_fail_rubrics: List of pass/fail rubric names to associate
835
+ description: Description of the experiment
609
836
  task: Task description.
610
- tags: Tags for the mass simulation.
611
-
612
- Returns:
613
- mass_sim_id: ID of the created mass simulation. Pass this to lai.init() to create a new session in the mass sim.
837
+ tags: List of tags for categorization
838
+ score_rubrics: List of score rubric names to associate
839
+ api_key: API key (uses env if not provided)
840
+ agent_id: Agent ID (uses env if not provided)
841
+
842
+ Returns:
843
+ experiment_id: UUID of the created experiment
844
+
845
+ Raises:
846
+ APIKeyVerificationError: If API key is invalid or missing
847
+ InvalidOperationError: If experiment creation fails
848
+ ValueError: If name is empty
614
849
  """
850
+
851
+ # validation
852
+ if not experiment_name:
853
+ raise ValueError("Experiment name is required")
854
+
615
855
  if api_key is None:
616
856
  api_key = os.getenv("LUCIDIC_API_KEY", None)
617
857
  if api_key is None:
618
- raise APIKeyVerificationError("Make sure to either pass your API key into lai.init() or set the LUCIDIC_API_KEY environment variable.")
858
+ raise APIKeyVerificationError("Make sure to either pass your API key into create_experiment() or set the LUCIDIC_API_KEY environment variable.")
619
859
  if agent_id is None:
620
860
  agent_id = os.getenv("LUCIDIC_AGENT_ID", None)
621
861
  if agent_id is None:
622
- raise APIKeyVerificationError("Lucidic agent ID not specified. Make sure to either pass your agent ID into lai.init() or set the LUCIDIC_AGENT_ID environment variable.")
623
- try:
624
- client = Client()
625
- except LucidicNotInitializedError:
626
- client = Client( # TODO: fail hard if incorrect API key or agent ID provided and wrong, fail silently if not provided
627
- api_key=api_key,
628
- agent_id=agent_id,
629
- )
630
- mass_sim_id = client.init_mass_sim(mass_sim_name=mass_sim_name, total_num_sims=total_num_sessions, task=task, tags=tags) # TODO: change total_num_sims to total_num_sessions everywhere
631
- logger.info(f"Created mass simulation with ID: {mass_sim_id}")
632
- return mass_sim_id
633
-
634
-
635
- def create_step(
636
- state: Optional[str] = None,
637
- action: Optional[str] = None,
638
- goal: Optional[str] = None,
639
- eval_score: Optional[float] = None,
640
- eval_description: Optional[str] = None,
641
- screenshot: Optional[str] = None,
642
- screenshot_path: Optional[str] = None
643
- ) -> None:
644
- """
645
- Create a new step. Previous step must be finished to create a new step.
646
-
647
- Args:
648
- state: State description.
649
- action: Action description.
650
- goal: Goal description.
651
- eval_score: Evaluation score.
652
- eval_description: Evaluation description.
653
- screenshot: Screenshot encoded in base64. Provide either screenshot or screenshot_path.
654
- screenshot_path: Screenshot path. Provide either screenshot or screenshot_path.
655
- """
656
- client = Client()
657
- if not client.session:
658
- return
659
- return client.session.create_step(**locals())
660
-
661
-
662
- def update_step(
663
- step_id: Optional[str] = None,
664
- state: Optional[str] = None,
665
- action: Optional[str] = None,
666
- goal: Optional[str] = None,
667
- eval_score: Optional[float] = None,
668
- eval_description: Optional[str] = None,
669
- screenshot: Optional[str] = None,
670
- screenshot_path: Optional[str] = None
671
- ) -> None:
672
- """
673
- Update the current step.
674
-
675
- Args:
676
- step_id: ID of the step to update.
677
- state: State description.
678
- action: Action description.
679
- goal: Goal description.
680
- eval_score: Evaluation score.
681
- eval_description: Evaluation description.
682
- screenshot: Screenshot encoded in base64. Provide either screenshot or screenshot_path.
683
- screenshot_path: Screenshot path. Provide either screenshot or screenshot_path.
684
- """
685
- client = Client()
686
- if not client.session:
687
- return
688
- if not client.session.active_step:
689
- raise InvalidOperationError("No active step to update")
690
- client.session.update_step(**locals())
691
-
692
-
693
- def end_step(
694
- step_id: Optional[str] = None,
695
- state: Optional[str] = None,
696
- action: Optional[str] = None,
697
- goal: Optional[str] = None,
698
- eval_score: Optional[float] = None,
699
- eval_description: Optional[str] = None,
700
- screenshot: Optional[str] = None,
701
- screenshot_path: Optional[str] = None
702
- ) -> None:
703
- """
704
- End the current step.
705
-
706
- Args:
707
- step_id: ID of the step to end.
708
- state: State description.
709
- action: Action description.
710
- goal: Goal description.
711
- eval_score: Evaluation score.
712
- eval_description: Evaluation description.
713
- screenshot: Screenshot encoded in base64. Provide either screenshot or screenshot_path.
714
- screenshot_path: Screenshot path.
715
- """
716
- client = Client()
717
- if not client.session:
718
- return
719
-
720
- if not client.session.active_step and step_id is None:
721
- raise InvalidOperationError("No active step to end")
722
-
723
- # Filter out None values from locals
724
- params = locals()
725
- kwargs = {k: v for k, v in params.items() if v is not None and k not in ['client', 'params']}
726
- kwargs['is_finished'] = True
727
-
728
- client.session.update_step(**kwargs)
862
+ raise APIKeyVerificationError("Lucidic agent ID not specified. Make sure to either pass your agent ID into create_experiment() or set the LUCIDIC_AGENT_ID environment variable.")
729
863
 
864
+ # combine rubrics into single list
865
+ rubric_names = (pass_fail_rubrics or []) + (score_rubrics or [])
730
866
 
731
- def create_event(
732
- step_id: Optional[str] = None,
733
- description: Optional[str] = None,
734
- result: Optional[str] = None,
735
- cost_added: Optional[float] = None,
736
- model: Optional[str] = None,
737
- screenshots: Optional[List[str]] = None,
738
- function_name: Optional[str] = None,
739
- arguments: Optional[dict] = None,
740
- ) -> str:
741
- """
742
- Create a new event in the current step. Current step must not be finished.
743
-
744
- Args:
745
- description: Description of the event.
746
- result: Result of the event.
747
- cost_added: Cost added by the event.
748
- model: Model used for the event.
749
- screenshots: List of screenshots encoded in base64.
750
- function_name: Name of the function that created the event.
751
- arguments: Arguments of the function that created the event.
752
- """
753
-
867
+ # get current client which will be NullClient if never lai.init() is never called
754
868
  client = Client()
755
- if not client.session:
756
- return
757
- return client.session.create_event(**locals())
869
+ # if not yet initialized or still the NullClient -> create a real client when init is called
870
+ if not getattr(client, 'initialized', False):
871
+ client = Client(api_key=api_key, agent_id=agent_id)
872
+ else:
873
+ # Already initialized, this is a re-init
874
+ if api_key is not None and agent_id is not None and (api_key != client.api_key or agent_id != client.agent_id):
875
+ client.set_api_key(api_key)
876
+ client.agent_id = agent_id
758
877
 
878
+ # create experiment
879
+ experiment_id = client.create_experiment(experiment_name=experiment_name, rubric_names=rubric_names, description=description, tags=tags)
880
+ logger.info(f"Created experiment with ID: {experiment_id}")
759
881
 
760
- def update_event(
761
- event_id: Optional[str] = None,
762
- description: Optional[str] = None,
763
- result: Optional[str] = None,
764
- cost_added: Optional[float] = None,
765
- model: Optional[str] = None,
766
- screenshots: Optional[List[str]] = None,
767
- function_name: Optional[str] = None,
768
- arguments: Optional[dict] = None,
769
- ) -> None:
770
- """
771
- Update the event with the given ID in the current step.
772
-
773
- Args:
774
- event_id: ID of the event to update.
775
- description: Description of the event.
776
- result: Result of the event.
777
- cost_added: Cost added by the event.
778
- model: Model used for the event.
779
- screenshots: List of screenshots encoded in base64.
780
- function_name: Name of the function that created the event.
781
- arguments: Arguments of the function that created the event.
782
- """
783
- client = Client()
784
- if not client.session:
785
- return
786
- client.session.update_event(**locals())
882
+ return experiment_id
787
883
 
788
884
 
789
- def end_event(
790
- event_id: Optional[str] = None,
791
- description: Optional[str] = None,
792
- result: Optional[str] = None,
793
- cost_added: Optional[float] = None,
794
- model: Optional[str] = None,
795
- screenshots: Optional[List[str]] = None,
796
- function_name: Optional[str] = None,
797
- arguments: Optional[dict] = None,
798
- ) -> None:
799
- """
800
- End the latest event in the current step.
801
-
802
- Args:
803
- event_id: ID of the event to end.
804
- description: Description of the event.
805
- result: Result of the event.
806
- cost_added: Cost added by the event.
807
- model: Model used for the event.
808
- screenshots: List of screenshots encoded in base64.
809
- function_name: Name of the function that created the event.
810
- arguments: Arguments of the function that created the event.
811
- """
885
+ def create_event(
886
+ type: str = "generic",
887
+ **kwargs
888
+ ) -> str:
812
889
  client = Client()
813
890
  if not client.session:
814
891
  return
815
- client.session.update_event(is_finished=True, **locals())
892
+ return client.session.create_event(type=type, **kwargs)
816
893
 
817
894
 
818
895
  def get_prompt(