lucidicai 1.3.2__py3-none-any.whl → 2.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lucidicai/__init__.py +648 -351
- lucidicai/client.py +327 -37
- lucidicai/constants.py +7 -37
- lucidicai/context.py +144 -0
- lucidicai/dataset.py +112 -0
- lucidicai/decorators.py +96 -325
- lucidicai/errors.py +33 -0
- lucidicai/event.py +50 -59
- lucidicai/event_queue.py +466 -0
- lucidicai/feature_flag.py +336 -0
- lucidicai/model_pricing.py +11 -0
- lucidicai/session.py +9 -71
- lucidicai/singleton.py +20 -17
- lucidicai/streaming.py +15 -50
- lucidicai/telemetry/context_capture_processor.py +65 -0
- lucidicai/telemetry/extract.py +192 -0
- lucidicai/telemetry/litellm_bridge.py +80 -45
- lucidicai/telemetry/lucidic_exporter.py +139 -144
- lucidicai/telemetry/lucidic_span_processor.py +67 -49
- lucidicai/telemetry/otel_handlers.py +207 -59
- lucidicai/telemetry/otel_init.py +163 -51
- lucidicai/telemetry/otel_provider.py +15 -5
- lucidicai/telemetry/telemetry_init.py +189 -0
- lucidicai/telemetry/utils/universal_image_interceptor.py +89 -0
- {lucidicai-1.3.2.dist-info → lucidicai-2.0.1.dist-info}/METADATA +1 -1
- {lucidicai-1.3.2.dist-info → lucidicai-2.0.1.dist-info}/RECORD +28 -21
- {lucidicai-1.3.2.dist-info → lucidicai-2.0.1.dist-info}/WHEEL +0 -0
- {lucidicai-1.3.2.dist-info → lucidicai-2.0.1.dist-info}/top_level.txt +0 -0
lucidicai/__init__.py
CHANGED
|
@@ -2,31 +2,50 @@ import atexit
|
|
|
2
2
|
import logging
|
|
3
3
|
import os
|
|
4
4
|
import signal
|
|
5
|
+
import sys
|
|
6
|
+
import traceback
|
|
7
|
+
import threading
|
|
5
8
|
from typing import List, Literal, Optional
|
|
6
9
|
|
|
10
|
+
from dotenv import load_dotenv
|
|
11
|
+
|
|
7
12
|
from .client import Client
|
|
8
13
|
from .errors import APIKeyVerificationError, InvalidOperationError, LucidicNotInitializedError, PromptError
|
|
9
14
|
from .event import Event
|
|
10
15
|
from .session import Session
|
|
11
|
-
from .
|
|
12
|
-
|
|
13
|
-
# Import OpenTelemetry-based handlers
|
|
14
|
-
from .telemetry.otel_handlers import (
|
|
15
|
-
OTelOpenAIHandler,
|
|
16
|
-
OTelAnthropicHandler,
|
|
17
|
-
OTelLangChainHandler,
|
|
18
|
-
OTelPydanticAIHandler,
|
|
19
|
-
OTelOpenAIAgentsHandler,
|
|
20
|
-
OTelLiteLLMHandler
|
|
21
|
-
)
|
|
22
|
-
|
|
23
|
-
# Import telemetry manager
|
|
24
|
-
from .telemetry.otel_init import LucidicTelemetry
|
|
16
|
+
from .singleton import clear_singletons
|
|
25
17
|
|
|
26
18
|
# Import decorators
|
|
27
|
-
from .decorators import
|
|
19
|
+
from .decorators import event
|
|
20
|
+
from .context import (
|
|
21
|
+
set_active_session,
|
|
22
|
+
bind_session,
|
|
23
|
+
bind_session_async,
|
|
24
|
+
clear_active_session,
|
|
25
|
+
current_session_id,
|
|
26
|
+
session,
|
|
27
|
+
session_async,
|
|
28
|
+
run_session,
|
|
29
|
+
run_in_session,
|
|
30
|
+
)
|
|
28
31
|
|
|
29
|
-
ProviderType = Literal[
|
|
32
|
+
ProviderType = Literal[
|
|
33
|
+
"openai",
|
|
34
|
+
"anthropic",
|
|
35
|
+
"langchain",
|
|
36
|
+
"pydantic_ai",
|
|
37
|
+
"openai_agents",
|
|
38
|
+
"litellm",
|
|
39
|
+
"bedrock",
|
|
40
|
+
"aws_bedrock",
|
|
41
|
+
"amazon_bedrock",
|
|
42
|
+
"google",
|
|
43
|
+
"google_generativeai",
|
|
44
|
+
"vertexai",
|
|
45
|
+
"vertex_ai",
|
|
46
|
+
"cohere",
|
|
47
|
+
"groq",
|
|
48
|
+
]
|
|
30
49
|
|
|
31
50
|
# Configure logging
|
|
32
51
|
logger = logging.getLogger("Lucidic")
|
|
@@ -38,63 +57,175 @@ if not logger.handlers:
|
|
|
38
57
|
logger.setLevel(logging.INFO)
|
|
39
58
|
|
|
40
59
|
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
60
|
+
# Crash/exit capture configuration
|
|
61
|
+
MAX_ERROR_DESCRIPTION_LENGTH = 16384
|
|
62
|
+
_crash_handlers_installed = False
|
|
63
|
+
_original_sys_excepthook = None
|
|
64
|
+
_original_threading_excepthook = None
|
|
65
|
+
_shutdown_lock = threading.Lock()
|
|
66
|
+
_is_shutting_down = False
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _mask_and_truncate(text: Optional[str]) -> Optional[str]:
|
|
70
|
+
"""Apply masking and truncate to a safe length. Best effort; never raises."""
|
|
71
|
+
if text is None:
|
|
72
|
+
return text
|
|
73
|
+
try:
|
|
74
|
+
masked = Client().mask(text)
|
|
75
|
+
except Exception:
|
|
76
|
+
masked = text
|
|
77
|
+
if masked is None:
|
|
78
|
+
return masked
|
|
79
|
+
return masked[:MAX_ERROR_DESCRIPTION_LENGTH]
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _post_fatal_event(exit_code: int, description: str, extra: Optional[dict] = None) -> None:
|
|
83
|
+
"""Best-effort creation of a final Lucidic event on fatal paths.
|
|
84
|
+
|
|
85
|
+
- Idempotent using a process-wide shutdown flag to avoid duplicates when
|
|
86
|
+
multiple hooks fire (signal + excepthook).
|
|
87
|
+
- Swallows all exceptions to avoid interfering with shutdown.
|
|
47
88
|
"""
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
89
|
+
global _is_shutting_down
|
|
90
|
+
with _shutdown_lock:
|
|
91
|
+
if _is_shutting_down:
|
|
92
|
+
return
|
|
93
|
+
_is_shutting_down = True
|
|
94
|
+
try:
|
|
95
|
+
client = Client()
|
|
96
|
+
session = getattr(client, 'session', None)
|
|
97
|
+
if not session or getattr(session, 'is_finished', False):
|
|
98
|
+
return
|
|
99
|
+
arguments = {"exit_code": exit_code}
|
|
100
|
+
if extra:
|
|
101
|
+
try:
|
|
102
|
+
arguments.update(extra)
|
|
103
|
+
except Exception:
|
|
104
|
+
pass
|
|
105
|
+
|
|
106
|
+
# Create a single immutable event describing the crash
|
|
107
|
+
session.create_event(
|
|
108
|
+
type="error_traceback",
|
|
109
|
+
error=_mask_and_truncate(description),
|
|
110
|
+
traceback="",
|
|
111
|
+
metadata={"exit_code": exit_code, **({} if not extra else extra)},
|
|
112
|
+
)
|
|
113
|
+
except Exception:
|
|
114
|
+
# Never raise during shutdown
|
|
115
|
+
pass
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def _install_crash_handlers() -> None:
|
|
119
|
+
"""Install global uncaught exception handlers (idempotent)."""
|
|
120
|
+
global _crash_handlers_installed, _original_sys_excepthook, _original_threading_excepthook
|
|
121
|
+
if _crash_handlers_installed:
|
|
122
|
+
return
|
|
123
|
+
|
|
124
|
+
_original_sys_excepthook = sys.excepthook
|
|
125
|
+
|
|
126
|
+
def _sys_hook(exc_type, exc, tb):
|
|
127
|
+
try:
|
|
128
|
+
trace_str = ''.join(traceback.format_exception(exc_type, exc, tb))
|
|
129
|
+
except Exception:
|
|
130
|
+
trace_str = f"Uncaught exception: {getattr(exc_type, '__name__', str(exc_type))}: {exc}"
|
|
131
|
+
|
|
132
|
+
# Emit final event and end the session as unsuccessful
|
|
133
|
+
_post_fatal_event(1, trace_str, {
|
|
134
|
+
"exception_type": getattr(exc_type, "__name__", str(exc_type)),
|
|
135
|
+
"exception_message": str(exc),
|
|
136
|
+
"thread_name": threading.current_thread().name,
|
|
137
|
+
})
|
|
138
|
+
|
|
139
|
+
# Follow proper shutdown sequence to prevent broken pipes
|
|
140
|
+
try:
|
|
141
|
+
client = Client()
|
|
142
|
+
|
|
143
|
+
# 1. Flush OpenTelemetry spans first
|
|
144
|
+
if hasattr(client, '_tracer_provider'):
|
|
145
|
+
try:
|
|
146
|
+
client._tracer_provider.force_flush(timeout_millis=5000)
|
|
147
|
+
except Exception:
|
|
148
|
+
pass
|
|
149
|
+
|
|
150
|
+
# 2. Flush and shutdown EventQueue (with active sessions cleared)
|
|
151
|
+
if hasattr(client, "_event_queue"):
|
|
152
|
+
try:
|
|
153
|
+
# Clear active sessions to allow shutdown
|
|
154
|
+
client._event_queue._active_sessions.clear()
|
|
155
|
+
client._event_queue.force_flush()
|
|
156
|
+
client._event_queue.shutdown(timeout=5.0)
|
|
157
|
+
except Exception:
|
|
158
|
+
pass
|
|
60
159
|
|
|
61
|
-
|
|
62
|
-
client
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
client
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
client.set_provider(OTelPydanticAIHandler())
|
|
73
|
-
setup_providers.add("pydantic_ai")
|
|
74
|
-
elif provider == "openai_agents":
|
|
160
|
+
# 3. Shutdown TracerProvider after EventQueue
|
|
161
|
+
if hasattr(client, '_tracer_provider'):
|
|
162
|
+
try:
|
|
163
|
+
client._tracer_provider.shutdown()
|
|
164
|
+
except Exception:
|
|
165
|
+
pass
|
|
166
|
+
|
|
167
|
+
# 4. Mark client as shutting down to prevent new requests
|
|
168
|
+
client._shutdown = True
|
|
169
|
+
|
|
170
|
+
# 5. Prevent auto_end double work
|
|
75
171
|
try:
|
|
76
|
-
client.
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
172
|
+
client.auto_end = False
|
|
173
|
+
except Exception:
|
|
174
|
+
pass
|
|
175
|
+
|
|
176
|
+
# 6. End session explicitly as unsuccessful
|
|
177
|
+
end_session()
|
|
178
|
+
|
|
179
|
+
except Exception:
|
|
180
|
+
pass
|
|
181
|
+
|
|
182
|
+
# Chain to original to preserve default printing/behavior
|
|
183
|
+
try:
|
|
184
|
+
_original_sys_excepthook(exc_type, exc, tb)
|
|
185
|
+
except Exception:
|
|
186
|
+
# Avoid recursion/errors in fatal path
|
|
187
|
+
pass
|
|
188
|
+
|
|
189
|
+
sys.excepthook = _sys_hook
|
|
190
|
+
|
|
191
|
+
# For Python 3.8+, only treat main-thread exceptions as fatal (process-exiting)
|
|
192
|
+
if hasattr(threading, 'excepthook'):
|
|
193
|
+
_original_threading_excepthook = threading.excepthook
|
|
194
|
+
|
|
195
|
+
def _thread_hook(args):
|
|
196
|
+
try:
|
|
197
|
+
if args.thread is threading.main_thread():
|
|
198
|
+
# For main thread exceptions, use full shutdown sequence
|
|
199
|
+
_sys_hook(args.exc_type, args.exc_value, args.exc_traceback)
|
|
200
|
+
else:
|
|
201
|
+
# For non-main threads, just flush spans without full shutdown
|
|
202
|
+
try:
|
|
203
|
+
client = Client()
|
|
204
|
+
# Flush any pending spans from this thread
|
|
205
|
+
if hasattr(client, '_tracer_provider'):
|
|
206
|
+
client._tracer_provider.force_flush(timeout_millis=1000)
|
|
207
|
+
# Force flush events but don't shutdown
|
|
208
|
+
if hasattr(client, "_event_queue"):
|
|
209
|
+
client._event_queue.force_flush()
|
|
210
|
+
except Exception:
|
|
211
|
+
pass
|
|
212
|
+
except Exception:
|
|
213
|
+
pass
|
|
214
|
+
try:
|
|
215
|
+
_original_threading_excepthook(args)
|
|
216
|
+
except Exception:
|
|
217
|
+
pass
|
|
218
|
+
|
|
219
|
+
threading.excepthook = _thread_hook
|
|
220
|
+
|
|
221
|
+
_crash_handlers_installed = True
|
|
84
222
|
|
|
85
223
|
__all__ = [
|
|
86
|
-
'Client',
|
|
87
224
|
'Session',
|
|
88
|
-
'Step',
|
|
89
225
|
'Event',
|
|
90
226
|
'init',
|
|
91
|
-
'
|
|
92
|
-
'create_step',
|
|
93
|
-
'end_step',
|
|
94
|
-
'update_step',
|
|
227
|
+
'create_experiment',
|
|
95
228
|
'create_event',
|
|
96
|
-
'update_event',
|
|
97
|
-
'end_event',
|
|
98
229
|
'end_session',
|
|
99
230
|
'get_prompt',
|
|
100
231
|
'get_session',
|
|
@@ -103,8 +234,15 @@ __all__ = [
|
|
|
103
234
|
'LucidicNotInitializedError',
|
|
104
235
|
'PromptError',
|
|
105
236
|
'InvalidOperationError',
|
|
106
|
-
'step',
|
|
107
237
|
'event',
|
|
238
|
+
'set_active_session',
|
|
239
|
+
'bind_session',
|
|
240
|
+
'bind_session_async',
|
|
241
|
+
'clear_active_session',
|
|
242
|
+
'session',
|
|
243
|
+
'session_async',
|
|
244
|
+
'run_session',
|
|
245
|
+
'run_in_session',
|
|
108
246
|
]
|
|
109
247
|
|
|
110
248
|
|
|
@@ -116,12 +254,12 @@ def init(
|
|
|
116
254
|
task: Optional[str] = None,
|
|
117
255
|
providers: Optional[List[ProviderType]] = [],
|
|
118
256
|
production_monitoring: Optional[bool] = False,
|
|
119
|
-
mass_sim_id: Optional[str] = None,
|
|
120
257
|
experiment_id: Optional[str] = None,
|
|
121
258
|
rubrics: Optional[list] = None,
|
|
122
259
|
tags: Optional[list] = None,
|
|
123
260
|
masking_function = None,
|
|
124
261
|
auto_end: Optional[bool] = True,
|
|
262
|
+
capture_uncaught: Optional[bool] = True,
|
|
125
263
|
) -> str:
|
|
126
264
|
"""
|
|
127
265
|
Initialize the Lucidic client.
|
|
@@ -133,7 +271,6 @@ def init(
|
|
|
133
271
|
agent_id: Agent ID. If not provided, will use the LUCIDIC_AGENT_ID environment variable.
|
|
134
272
|
task: Task description.
|
|
135
273
|
providers: List of provider types ("openai", "anthropic", "langchain", "pydantic_ai").
|
|
136
|
-
mass_sim_id: Optional mass simulation ID, if session is to be part of a mass simulation.
|
|
137
274
|
experiment_id: Optional experiment ID, if session is to be part of an experiment.
|
|
138
275
|
rubrics: Optional rubrics for evaluation, list of strings.
|
|
139
276
|
tags: Optional tags for the session, list of strings.
|
|
@@ -144,6 +281,11 @@ def init(
|
|
|
144
281
|
InvalidOperationError: If the client is already initialized.
|
|
145
282
|
APIKeyVerificationError: If the API key is invalid.
|
|
146
283
|
"""
|
|
284
|
+
|
|
285
|
+
load_dotenv()
|
|
286
|
+
|
|
287
|
+
if os.getenv("LUCIDIC_DEBUG", "False").lower() == "true":
|
|
288
|
+
logger.setLevel(logging.DEBUG)
|
|
147
289
|
|
|
148
290
|
# get current client which will be NullClient if never lai is never initialized
|
|
149
291
|
client = Client()
|
|
@@ -173,10 +315,13 @@ def init(
|
|
|
173
315
|
auto_end = os.getenv("LUCIDIC_AUTO_END", "True").lower() == "true"
|
|
174
316
|
|
|
175
317
|
# Set up providers
|
|
176
|
-
|
|
318
|
+
# Use the client's singleton telemetry initialization
|
|
319
|
+
if providers:
|
|
320
|
+
success = client.initialize_telemetry(providers)
|
|
321
|
+
if not success:
|
|
322
|
+
logger.warning("[Telemetry] Failed to initialize telemetry for some providers")
|
|
177
323
|
real_session_id = client.init_session(
|
|
178
324
|
session_name=session_name,
|
|
179
|
-
mass_sim_id=mass_sim_id,
|
|
180
325
|
task=task,
|
|
181
326
|
rubrics=rubrics,
|
|
182
327
|
tags=tags,
|
|
@@ -189,52 +334,28 @@ def init(
|
|
|
189
334
|
|
|
190
335
|
# Set the auto_end flag on the client
|
|
191
336
|
client.auto_end = auto_end
|
|
337
|
+
# Bind this session id to the current execution context for async-safety
|
|
338
|
+
try:
|
|
339
|
+
set_active_session(real_session_id)
|
|
340
|
+
except Exception:
|
|
341
|
+
pass
|
|
342
|
+
# Install crash handlers unless explicitly disabled
|
|
343
|
+
try:
|
|
344
|
+
if capture_uncaught:
|
|
345
|
+
_install_crash_handlers()
|
|
346
|
+
# Also install error event handler for uncaught exceptions
|
|
347
|
+
try:
|
|
348
|
+
from .errors import install_error_handler
|
|
349
|
+
install_error_handler()
|
|
350
|
+
except Exception:
|
|
351
|
+
pass
|
|
352
|
+
except Exception:
|
|
353
|
+
pass
|
|
192
354
|
|
|
193
355
|
logger.info("Session initialized successfully")
|
|
194
356
|
return real_session_id
|
|
195
357
|
|
|
196
358
|
|
|
197
|
-
def continue_session(
|
|
198
|
-
session_id: str,
|
|
199
|
-
api_key: Optional[str] = None,
|
|
200
|
-
agent_id: Optional[str] = None,
|
|
201
|
-
providers: Optional[List[ProviderType]] = [],
|
|
202
|
-
masking_function = None,
|
|
203
|
-
auto_end: Optional[bool] = True,
|
|
204
|
-
):
|
|
205
|
-
if api_key is None:
|
|
206
|
-
api_key = os.getenv("LUCIDIC_API_KEY", None)
|
|
207
|
-
if api_key is None:
|
|
208
|
-
raise APIKeyVerificationError("Make sure to either pass your API key into lai.init() or set the LUCIDIC_API_KEY environment variable.")
|
|
209
|
-
if agent_id is None:
|
|
210
|
-
agent_id = os.getenv("LUCIDIC_AGENT_ID", None)
|
|
211
|
-
if agent_id is None:
|
|
212
|
-
raise APIKeyVerificationError("Lucidic agent ID not specified. Make sure to either pass your agent ID into lai.init() or set the LUCIDIC_AGENT_ID environment variable.")
|
|
213
|
-
|
|
214
|
-
client = Client()
|
|
215
|
-
if client.session:
|
|
216
|
-
raise InvalidOperationError("[Lucidic] Session already in progress. Please call lai.end_session() or lai.reset_sdk() first.")
|
|
217
|
-
# if not yet initialized or still the NullClient -> create a real client when init is called
|
|
218
|
-
if not getattr(client, 'initialized', False):
|
|
219
|
-
client = Client(api_key=api_key, agent_id=agent_id)
|
|
220
|
-
|
|
221
|
-
# Handle auto_end with environment variable support
|
|
222
|
-
if auto_end is None:
|
|
223
|
-
auto_end = os.getenv("LUCIDIC_AUTO_END", "True").lower() == "true"
|
|
224
|
-
|
|
225
|
-
# Set up providers
|
|
226
|
-
_setup_providers(client, providers)
|
|
227
|
-
session_id = client.continue_session(session_id=session_id)
|
|
228
|
-
if masking_function:
|
|
229
|
-
client.masking_function = masking_function
|
|
230
|
-
|
|
231
|
-
# Set the auto_end flag on the client
|
|
232
|
-
client.auto_end = auto_end
|
|
233
|
-
|
|
234
|
-
logger.info(f"Session {session_id} continuing...")
|
|
235
|
-
return session_id # For consistency
|
|
236
|
-
|
|
237
|
-
|
|
238
359
|
def update_session(
|
|
239
360
|
task: Optional[str] = None,
|
|
240
361
|
session_eval: Optional[float] = None,
|
|
@@ -252,17 +373,28 @@ def update_session(
|
|
|
252
373
|
is_successful: Whether the session was successful.
|
|
253
374
|
is_successful_reason: Session success reason.
|
|
254
375
|
"""
|
|
376
|
+
# Prefer context-bound session over global active session
|
|
255
377
|
client = Client()
|
|
256
|
-
|
|
378
|
+
target_sid = None
|
|
379
|
+
try:
|
|
380
|
+
target_sid = current_session_id.get(None)
|
|
381
|
+
except Exception:
|
|
382
|
+
target_sid = None
|
|
383
|
+
if not target_sid and client.session:
|
|
384
|
+
target_sid = client.session.session_id
|
|
385
|
+
if not target_sid:
|
|
257
386
|
return
|
|
258
|
-
|
|
387
|
+
# Use ephemeral session facade to avoid mutating global state
|
|
388
|
+
session = client.session if (client.session and client.session.session_id == target_sid) else Session(agent_id=client.agent_id, session_id=target_sid)
|
|
389
|
+
session.update_session(**locals())
|
|
259
390
|
|
|
260
391
|
|
|
261
392
|
def end_session(
|
|
262
393
|
session_eval: Optional[float] = None,
|
|
263
394
|
session_eval_reason: Optional[str] = None,
|
|
264
395
|
is_successful: Optional[bool] = None,
|
|
265
|
-
is_successful_reason: Optional[str] = None
|
|
396
|
+
is_successful_reason: Optional[str] = None,
|
|
397
|
+
wait_for_flush: bool = True
|
|
266
398
|
) -> None:
|
|
267
399
|
"""
|
|
268
400
|
End the current session.
|
|
@@ -272,48 +404,194 @@ def end_session(
|
|
|
272
404
|
session_eval_reason: Session evaluation reason.
|
|
273
405
|
is_successful: Whether the session was successful.
|
|
274
406
|
is_successful_reason: Session success reason.
|
|
407
|
+
wait_for_flush: Whether to block until event queue is empty (default True).
|
|
408
|
+
Set to False during signal handling to prevent hangs.
|
|
275
409
|
"""
|
|
276
410
|
client = Client()
|
|
277
|
-
|
|
411
|
+
# Prefer context-bound session id
|
|
412
|
+
target_sid = None
|
|
413
|
+
try:
|
|
414
|
+
target_sid = current_session_id.get(None)
|
|
415
|
+
except Exception:
|
|
416
|
+
target_sid = None
|
|
417
|
+
if not target_sid and client.session:
|
|
418
|
+
target_sid = client.session.session_id
|
|
419
|
+
if not target_sid:
|
|
278
420
|
return
|
|
421
|
+
|
|
422
|
+
# If ending the globally active session, perform cleanup
|
|
423
|
+
if client.session and client.session.session_id == target_sid:
|
|
424
|
+
# Best-effort: wait for LiteLLM callbacks to flush before ending
|
|
425
|
+
try:
|
|
426
|
+
import litellm
|
|
427
|
+
cbs = getattr(litellm, 'callbacks', None)
|
|
428
|
+
if cbs:
|
|
429
|
+
for cb in cbs:
|
|
430
|
+
try:
|
|
431
|
+
if hasattr(cb, 'wait_for_pending_callbacks'):
|
|
432
|
+
cb.wait_for_pending_callbacks(timeout=1)
|
|
433
|
+
except Exception:
|
|
434
|
+
pass
|
|
435
|
+
except Exception:
|
|
436
|
+
pass
|
|
437
|
+
# CRITICAL: Flush OpenTelemetry spans FIRST (blocking)
|
|
438
|
+
# This ensures all spans are converted to events before we flush the event queue
|
|
439
|
+
try:
|
|
440
|
+
if hasattr(client, '_tracer_provider') and client._tracer_provider:
|
|
441
|
+
logger.debug("[Session] Flushing OpenTelemetry spans before session end...")
|
|
442
|
+
# Force flush with generous timeout to ensure all spans are exported
|
|
443
|
+
# The BatchSpanProcessor now exports every 100ms, so this should be quick
|
|
444
|
+
success = client._tracer_provider.force_flush(timeout_millis=10000) # 10 second timeout
|
|
445
|
+
if not success:
|
|
446
|
+
logger.warning("[Session] OpenTelemetry flush timed out - some spans may be lost")
|
|
447
|
+
else:
|
|
448
|
+
logger.debug("[Session] OpenTelemetry spans flushed successfully")
|
|
449
|
+
except Exception as e:
|
|
450
|
+
logger.debug(f"[Session] Failed to flush telemetry spans: {e}")
|
|
451
|
+
|
|
452
|
+
# THEN flush event queue (which now contains events from flushed spans)
|
|
453
|
+
try:
|
|
454
|
+
if hasattr(client, '_event_queue'):
|
|
455
|
+
logger.debug("[Session] Flushing event queue...")
|
|
456
|
+
client._event_queue.force_flush(timeout_seconds=10.0)
|
|
457
|
+
|
|
458
|
+
# Wait for queue to be completely empty (only if blocking)
|
|
459
|
+
if wait_for_flush:
|
|
460
|
+
import time
|
|
461
|
+
wait_start = time.time()
|
|
462
|
+
max_wait = 10.0 # seconds - timeout for blob uploads
|
|
463
|
+
while not client._event_queue.is_empty():
|
|
464
|
+
if time.time() - wait_start > max_wait:
|
|
465
|
+
logger.warning(f"[Session] EventQueue not empty after {max_wait}s timeout")
|
|
466
|
+
break
|
|
467
|
+
time.sleep(0.1)
|
|
468
|
+
|
|
469
|
+
if client._event_queue.is_empty():
|
|
470
|
+
logger.debug("[Session] EventQueue confirmed empty")
|
|
471
|
+
else:
|
|
472
|
+
logger.debug("[Session] Non-blocking mode - skipping wait for empty queue")
|
|
473
|
+
except Exception as e:
|
|
474
|
+
logger.debug(f"[Session] Failed to flush event queue: {e}")
|
|
475
|
+
|
|
476
|
+
# Mark session as inactive FIRST (prevents race conditions)
|
|
477
|
+
client.mark_session_inactive(target_sid)
|
|
478
|
+
|
|
479
|
+
# Send only expected fields to update endpoint
|
|
480
|
+
update_kwargs = {
|
|
481
|
+
"is_finished": True,
|
|
482
|
+
"session_eval": session_eval,
|
|
483
|
+
"session_eval_reason": session_eval_reason,
|
|
484
|
+
"is_successful": is_successful,
|
|
485
|
+
"is_successful_reason": is_successful_reason,
|
|
486
|
+
}
|
|
487
|
+
try:
|
|
488
|
+
client.session.update_session(**update_kwargs)
|
|
489
|
+
except Exception as e:
|
|
490
|
+
logger.warning(f"[Session] Failed to update session: {e}")
|
|
491
|
+
|
|
492
|
+
# Clear only the global session reference, not the singleton
|
|
493
|
+
# This preserves the client and event queue for other threads
|
|
494
|
+
client.session = None
|
|
495
|
+
logger.debug(f"[Session] Ended global session {target_sid}")
|
|
496
|
+
# DO NOT shutdown event queue - other threads may be using it
|
|
497
|
+
# DO NOT call client.clear() - preserve singleton for other threads
|
|
498
|
+
return
|
|
499
|
+
|
|
500
|
+
# Otherwise, end the specified session id without clearing global state
|
|
501
|
+
# First flush telemetry and event queue for non-global sessions too
|
|
502
|
+
try:
|
|
503
|
+
if hasattr(client, '_tracer_provider') and client._tracer_provider:
|
|
504
|
+
logger.debug(f"[Session] Flushing OpenTelemetry spans for session {target_sid[:8]}...")
|
|
505
|
+
success = client._tracer_provider.force_flush(timeout_millis=10000)
|
|
506
|
+
if not success:
|
|
507
|
+
logger.warning("[Session] OpenTelemetry flush timed out")
|
|
508
|
+
except Exception as e:
|
|
509
|
+
logger.debug(f"[Session] Failed to flush telemetry spans: {e}")
|
|
279
510
|
|
|
280
|
-
#
|
|
281
|
-
|
|
282
|
-
if hasattr(
|
|
283
|
-
logger.
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
511
|
+
# Flush and wait for event queue to empty
|
|
512
|
+
try:
|
|
513
|
+
if hasattr(client, '_event_queue'):
|
|
514
|
+
logger.debug(f"[Session] Flushing event queue for session {target_sid[:8]}...")
|
|
515
|
+
client._event_queue.force_flush(timeout_seconds=10.0)
|
|
516
|
+
|
|
517
|
+
# Wait for queue to be completely empty (only if blocking)
|
|
518
|
+
if wait_for_flush:
|
|
519
|
+
import time
|
|
520
|
+
wait_start = time.time()
|
|
521
|
+
max_wait = 10.0 # seconds - timeout for blob uploads
|
|
522
|
+
while not client._event_queue.is_empty():
|
|
523
|
+
if time.time() - wait_start > max_wait:
|
|
524
|
+
logger.warning(f"[Session] EventQueue not empty after {max_wait}s timeout")
|
|
525
|
+
break
|
|
526
|
+
time.sleep(0.1)
|
|
527
|
+
|
|
528
|
+
if client._event_queue.is_empty():
|
|
529
|
+
logger.debug(f"[Session] EventQueue confirmed empty for session {target_sid[:8]}")
|
|
530
|
+
else:
|
|
531
|
+
logger.debug(f"[Session] Non-blocking mode - skipping wait for session {target_sid[:8]}")
|
|
532
|
+
except Exception as e:
|
|
533
|
+
logger.debug(f"[Session] Failed to flush event queue: {e}")
|
|
534
|
+
|
|
535
|
+
# CRITICAL: Mark session as inactive FIRST for ALL sessions
|
|
536
|
+
client.mark_session_inactive(target_sid)
|
|
537
|
+
|
|
538
|
+
temp = Session(agent_id=client.agent_id, session_id=target_sid)
|
|
539
|
+
update_kwargs = {
|
|
540
|
+
"is_finished": True,
|
|
541
|
+
"session_eval": session_eval,
|
|
542
|
+
"session_eval_reason": session_eval_reason,
|
|
543
|
+
"is_successful": is_successful,
|
|
544
|
+
"is_successful_reason": is_successful_reason,
|
|
545
|
+
}
|
|
546
|
+
try:
|
|
547
|
+
temp.update_session(**update_kwargs)
|
|
548
|
+
except Exception as e:
|
|
549
|
+
logger.warning(f"[Session] Failed to update session: {e}")
|
|
288
550
|
|
|
289
551
|
|
|
290
|
-
def
|
|
552
|
+
def flush(timeout_seconds: float = 2.0) -> bool:
|
|
291
553
|
"""
|
|
292
|
-
|
|
293
|
-
"""
|
|
294
|
-
return
|
|
295
|
-
|
|
296
|
-
client = Client()
|
|
297
|
-
if not client.initialized:
|
|
298
|
-
return
|
|
554
|
+
Manually flush all pending telemetry data.
|
|
299
555
|
|
|
300
|
-
|
|
301
|
-
telemetry
|
|
302
|
-
|
|
303
|
-
|
|
556
|
+
Flushes both OpenTelemetry spans and queued events to ensure
|
|
557
|
+
all telemetry data is sent to the backend. This is called
|
|
558
|
+
automatically on process exit but can be called manually
|
|
559
|
+
for explicit control.
|
|
304
560
|
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
561
|
+
Args:
|
|
562
|
+
timeout_seconds: Maximum time to wait for flush
|
|
563
|
+
|
|
564
|
+
Returns:
|
|
565
|
+
True if all flushes succeeded, False otherwise
|
|
566
|
+
|
|
567
|
+
Example:
|
|
568
|
+
```python
|
|
569
|
+
import lucidicai as lai
|
|
570
|
+
|
|
571
|
+
# ... your code using Lucidic ...
|
|
572
|
+
|
|
573
|
+
# Manually flush before critical operation
|
|
574
|
+
lai.flush()
|
|
575
|
+
```
|
|
576
|
+
"""
|
|
310
577
|
try:
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
578
|
+
client = Client()
|
|
579
|
+
success = True
|
|
580
|
+
|
|
581
|
+
# Flush OpenTelemetry spans first
|
|
582
|
+
if hasattr(client, 'flush_telemetry'):
|
|
583
|
+
span_success = client.flush_telemetry(timeout_seconds)
|
|
584
|
+
success = success and span_success
|
|
585
|
+
|
|
586
|
+
# Then flush event queue
|
|
587
|
+
if hasattr(client, '_event_queue'):
|
|
588
|
+
client._event_queue.force_flush(timeout_seconds)
|
|
589
|
+
|
|
590
|
+
logger.debug(f"[Flush] Manual flush completed (success={success})")
|
|
591
|
+
return success
|
|
315
592
|
except Exception as e:
|
|
316
|
-
logger.error(f"
|
|
593
|
+
logger.error(f"Failed to flush telemetry: {e}")
|
|
594
|
+
return False
|
|
317
595
|
|
|
318
596
|
|
|
319
597
|
def _auto_end_session():
|
|
@@ -323,252 +601,271 @@ def _auto_end_session():
|
|
|
323
601
|
if hasattr(client, 'auto_end') and client.auto_end and client.session and not client.session.is_finished:
|
|
324
602
|
logger.info("Auto-ending active session on exit")
|
|
325
603
|
client.auto_end = False # To avoid repeating auto-end on exit
|
|
326
|
-
|
|
604
|
+
|
|
605
|
+
# Flush telemetry
|
|
606
|
+
if hasattr(client, '_tracer_provider'):
|
|
607
|
+
client._tracer_provider.force_flush(timeout_millis=5000)
|
|
608
|
+
|
|
609
|
+
# Force flush event queue before ending session
|
|
610
|
+
if hasattr(client, '_event_queue'):
|
|
611
|
+
if logger.isEnabledFor(logging.DEBUG):
|
|
612
|
+
logger.debug("[Shutdown] Flushing event queue before session end")
|
|
613
|
+
client._event_queue.force_flush(timeout_seconds=5.0)
|
|
614
|
+
|
|
615
|
+
# Use non-blocking mode during shutdown to prevent hangs
|
|
616
|
+
# The actual wait for queue empty happens in _cleanup_singleton_on_exit
|
|
617
|
+
end_session(wait_for_flush=False)
|
|
618
|
+
|
|
327
619
|
except Exception as e:
|
|
328
620
|
logger.debug(f"Error during auto-end session: {e}")
|
|
329
621
|
|
|
330
622
|
|
|
623
|
+
def _cleanup_singleton_on_exit():
|
|
624
|
+
"""
|
|
625
|
+
Clean up singleton resources only on process exit.
|
|
626
|
+
|
|
627
|
+
CRITICAL ORDER:
|
|
628
|
+
1. Flush OpenTelemetry spans (blocking) - ensures spans become events
|
|
629
|
+
2. Flush EventQueue - sends all events including those from spans
|
|
630
|
+
3. Close HTTP session - graceful TCP FIN prevents broken pipes
|
|
631
|
+
4. Clear singletons - final cleanup
|
|
632
|
+
|
|
633
|
+
This order is essential to prevent lost events and broken connections.
|
|
634
|
+
"""
|
|
635
|
+
try:
|
|
636
|
+
client = Client()
|
|
637
|
+
|
|
638
|
+
# 1. FIRST: Flush OpenTelemetry spans (blocking until exported)
|
|
639
|
+
# This is the critical fix - we must flush spans before events
|
|
640
|
+
if hasattr(client, '_tracer_provider') and client._tracer_provider:
|
|
641
|
+
try:
|
|
642
|
+
# Small delay to ensure spans have reached the processor
|
|
643
|
+
import time
|
|
644
|
+
time.sleep(0.1) # 100ms to let spans reach BatchSpanProcessor
|
|
645
|
+
|
|
646
|
+
logger.debug("[Exit] Flushing OpenTelemetry spans...")
|
|
647
|
+
# force_flush() blocks until all spans are exported or timeout
|
|
648
|
+
success = client._tracer_provider.force_flush(timeout_millis=3000)
|
|
649
|
+
if success:
|
|
650
|
+
logger.debug("[Exit] OpenTelemetry spans flushed successfully")
|
|
651
|
+
else:
|
|
652
|
+
logger.warning("[Exit] OpenTelemetry flush timed out - some spans may be lost")
|
|
653
|
+
|
|
654
|
+
# DON'T shutdown TracerProvider yet - wait until after EventQueue
|
|
655
|
+
# This prevents losing spans that are still being processed
|
|
656
|
+
except Exception as e:
|
|
657
|
+
logger.debug(f"[Exit] Telemetry cleanup error: {e}")
|
|
658
|
+
|
|
659
|
+
# 2. SECOND: Flush and shutdown EventQueue
|
|
660
|
+
# Now it contains all events from the flushed spans
|
|
661
|
+
if hasattr(client, '_event_queue'):
|
|
662
|
+
try:
|
|
663
|
+
logger.debug("[Exit] Flushing event queue...")
|
|
664
|
+
client._event_queue.force_flush(timeout_seconds=2.0)
|
|
665
|
+
|
|
666
|
+
# Wait for queue to be completely empty before proceeding
|
|
667
|
+
import time
|
|
668
|
+
max_wait = 5.0 # seconds
|
|
669
|
+
start_time = time.time()
|
|
670
|
+
while not client._event_queue.is_empty():
|
|
671
|
+
if time.time() - start_time > max_wait:
|
|
672
|
+
logger.warning("[Exit] EventQueue not empty after timeout")
|
|
673
|
+
break
|
|
674
|
+
time.sleep(0.01) # Small sleep to avoid busy waiting
|
|
675
|
+
|
|
676
|
+
if client._event_queue.is_empty():
|
|
677
|
+
logger.debug("[Exit] EventQueue is empty, proceeding with shutdown")
|
|
678
|
+
|
|
679
|
+
# Clear any stale active sessions (threads may have died without cleanup)
|
|
680
|
+
if hasattr(client, '_active_sessions'):
|
|
681
|
+
with client._active_sessions_lock:
|
|
682
|
+
if client._active_sessions:
|
|
683
|
+
logger.debug(f"[Exit] Clearing {len(client._active_sessions)} remaining active sessions")
|
|
684
|
+
client._active_sessions.clear()
|
|
685
|
+
|
|
686
|
+
# Now shutdown EventQueue
|
|
687
|
+
client._event_queue.shutdown()
|
|
688
|
+
logger.debug("[Exit] Event queue shutdown complete")
|
|
689
|
+
except Exception as e:
|
|
690
|
+
logger.debug(f"[Exit] Event queue cleanup error: {e}")
|
|
691
|
+
|
|
692
|
+
# 3. THIRD: Shutdown TracerProvider after EventQueue is done
|
|
693
|
+
# This ensures all spans can be exported before shutdown
|
|
694
|
+
if hasattr(client, '_tracer_provider') and client._tracer_provider:
|
|
695
|
+
try:
|
|
696
|
+
logger.debug("[Exit] Shutting down TracerProvider...")
|
|
697
|
+
client._tracer_provider.shutdown()
|
|
698
|
+
logger.debug("[Exit] TracerProvider shutdown complete")
|
|
699
|
+
except Exception as e:
|
|
700
|
+
logger.debug(f"[Exit] TracerProvider shutdown error: {e}")
|
|
701
|
+
|
|
702
|
+
# 4. FOURTH: Close HTTP session ONLY after everything else
|
|
703
|
+
# This prevents broken pipes by ensuring all events are sent first
|
|
704
|
+
if hasattr(client, 'request_session'):
|
|
705
|
+
try:
|
|
706
|
+
# Mark client as shutting down to prevent new requests
|
|
707
|
+
client._shutdown = True
|
|
708
|
+
logger.debug("[Exit] Closing HTTP session (queue empty, worker stopped)")
|
|
709
|
+
client.request_session.close()
|
|
710
|
+
logger.debug("[Exit] HTTP session closed gracefully")
|
|
711
|
+
except Exception as e:
|
|
712
|
+
logger.debug(f"[Exit] HTTP session cleanup error: {e}")
|
|
713
|
+
|
|
714
|
+
# 5. FINALLY: Clear singletons
|
|
715
|
+
# Safe to destroy now that all data is flushed
|
|
716
|
+
clear_singletons()
|
|
717
|
+
logger.debug("[Exit] Singleton cleanup complete")
|
|
718
|
+
|
|
719
|
+
except Exception as e:
|
|
720
|
+
# Silent fail on exit to avoid disrupting process termination
|
|
721
|
+
if logger.isEnabledFor(logging.DEBUG):
|
|
722
|
+
logger.debug(f"[Exit] Cleanup error: {e}")
|
|
723
|
+
|
|
724
|
+
|
|
331
725
|
def _signal_handler(signum, frame):
|
|
332
|
-
"""Handle interruption signals"""
|
|
726
|
+
"""Handle interruption signals with better queue flushing."""
|
|
727
|
+
# Best-effort final event for signal exits
|
|
728
|
+
try:
|
|
729
|
+
try:
|
|
730
|
+
name = signal.Signals(signum).name
|
|
731
|
+
except Exception:
|
|
732
|
+
name = str(signum)
|
|
733
|
+
try:
|
|
734
|
+
stack_str = ''.join(traceback.format_stack(frame)) if frame else ''
|
|
735
|
+
except Exception:
|
|
736
|
+
stack_str = ''
|
|
737
|
+
desc = _mask_and_truncate(f"Received signal {name}\n{stack_str}")
|
|
738
|
+
_post_fatal_event(128 + signum, desc, {"signal": name, "signum": signum})
|
|
739
|
+
except Exception:
|
|
740
|
+
pass
|
|
741
|
+
|
|
742
|
+
# Proper shutdown sequence matching atexit handler
|
|
743
|
+
try:
|
|
744
|
+
client = Client()
|
|
745
|
+
|
|
746
|
+
# 1. FIRST: Flush OpenTelemetry spans
|
|
747
|
+
if hasattr(client, '_tracer_provider') and client._tracer_provider:
|
|
748
|
+
try:
|
|
749
|
+
logger.debug(f"[Signal] Flushing OpenTelemetry spans on signal {signum}")
|
|
750
|
+
client._tracer_provider.force_flush(timeout_millis=2000) # Shorter timeout for signals
|
|
751
|
+
except Exception:
|
|
752
|
+
pass
|
|
753
|
+
|
|
754
|
+
# 2. SECOND: Flush and shutdown EventQueue
|
|
755
|
+
if hasattr(client, "_event_queue"):
|
|
756
|
+
logger.debug(f"[Signal] Flushing event queue on signal {signum}")
|
|
757
|
+
client._event_queue.force_flush(timeout_seconds=2.0)
|
|
758
|
+
|
|
759
|
+
# Clear active sessions to allow shutdown
|
|
760
|
+
if hasattr(client, '_active_sessions'):
|
|
761
|
+
with client._active_sessions_lock:
|
|
762
|
+
client._active_sessions.clear()
|
|
763
|
+
|
|
764
|
+
client._event_queue.shutdown()
|
|
765
|
+
|
|
766
|
+
# 3. THIRD: Shutdown TracerProvider after EventQueue
|
|
767
|
+
if hasattr(client, '_tracer_provider') and client._tracer_provider:
|
|
768
|
+
logger.debug(f"[Signal] Shutting down TracerProvider on signal {signum}")
|
|
769
|
+
try:
|
|
770
|
+
client._tracer_provider.shutdown()
|
|
771
|
+
except Exception:
|
|
772
|
+
pass
|
|
773
|
+
|
|
774
|
+
# 4. Mark client as shutting down
|
|
775
|
+
client._shutdown = True
|
|
776
|
+
|
|
777
|
+
except Exception:
|
|
778
|
+
pass
|
|
779
|
+
|
|
780
|
+
logger.debug(f"[Signal] Auto-ending session on signal {signum}")
|
|
333
781
|
_auto_end_session()
|
|
334
|
-
_cleanup_telemetry()
|
|
335
782
|
# Re-raise the signal for default handling
|
|
336
783
|
signal.signal(signum, signal.SIG_DFL)
|
|
337
784
|
os.kill(os.getpid(), signum)
|
|
338
785
|
|
|
339
786
|
|
|
340
|
-
# Register cleanup functions
|
|
341
|
-
atexit.register(
|
|
342
|
-
atexit.register(_auto_end_session)
|
|
787
|
+
# Register cleanup functions
|
|
788
|
+
atexit.register(_cleanup_singleton_on_exit) # Clean up singleton resources on exit
|
|
789
|
+
atexit.register(_auto_end_session) # Auto-end session if enabled
|
|
343
790
|
|
|
344
791
|
# Register signal handlers for graceful shutdown
|
|
345
792
|
signal.signal(signal.SIGINT, _signal_handler)
|
|
346
793
|
signal.signal(signal.SIGTERM, _signal_handler)
|
|
347
794
|
|
|
348
795
|
|
|
349
|
-
def
|
|
350
|
-
|
|
351
|
-
|
|
796
|
+
def create_experiment(
|
|
797
|
+
experiment_name: str,
|
|
798
|
+
pass_fail_rubrics: Optional[list] = None,
|
|
799
|
+
score_rubrics: Optional[list] = None,
|
|
800
|
+
description: Optional[str] = None,
|
|
801
|
+
tags: Optional[list] = None,
|
|
352
802
|
api_key: Optional[str] = None,
|
|
353
803
|
agent_id: Optional[str] = None,
|
|
354
|
-
task: Optional[str] = None,
|
|
355
|
-
tags: Optional[list] = None
|
|
356
804
|
) -> str:
|
|
357
805
|
"""
|
|
358
|
-
Create a new
|
|
359
|
-
|
|
360
|
-
Args:
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
agent_id: Agent ID. If not provided, will use the LUCIDIC_AGENT_ID environment variable.
|
|
806
|
+
Create a new experiment for grouping and analyzing sessions.
|
|
807
|
+
|
|
808
|
+
Args:
|
|
809
|
+
experiment_name: Name of the experiment (required)
|
|
810
|
+
pass_fail_rubrics: List of pass/fail rubric names to associate
|
|
811
|
+
description: Description of the experiment
|
|
365
812
|
task: Task description.
|
|
366
|
-
tags:
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
813
|
+
tags: List of tags for categorization
|
|
814
|
+
score_rubrics: List of score rubric names to associate
|
|
815
|
+
api_key: API key (uses env if not provided)
|
|
816
|
+
agent_id: Agent ID (uses env if not provided)
|
|
817
|
+
|
|
818
|
+
Returns:
|
|
819
|
+
experiment_id: UUID of the created experiment
|
|
820
|
+
|
|
821
|
+
Raises:
|
|
822
|
+
APIKeyVerificationError: If API key is invalid or missing
|
|
823
|
+
InvalidOperationError: If experiment creation fails
|
|
824
|
+
ValueError: If name is empty
|
|
370
825
|
"""
|
|
826
|
+
|
|
827
|
+
# validation
|
|
828
|
+
if not experiment_name:
|
|
829
|
+
raise ValueError("Experiment name is required")
|
|
830
|
+
|
|
371
831
|
if api_key is None:
|
|
372
832
|
api_key = os.getenv("LUCIDIC_API_KEY", None)
|
|
373
833
|
if api_key is None:
|
|
374
|
-
raise APIKeyVerificationError("Make sure to either pass your API key into
|
|
834
|
+
raise APIKeyVerificationError("Make sure to either pass your API key into create_experiment() or set the LUCIDIC_API_KEY environment variable.")
|
|
375
835
|
if agent_id is None:
|
|
376
836
|
agent_id = os.getenv("LUCIDIC_AGENT_ID", None)
|
|
377
837
|
if agent_id is None:
|
|
378
|
-
raise APIKeyVerificationError("Lucidic agent ID not specified. Make sure to either pass your agent ID into
|
|
379
|
-
try:
|
|
380
|
-
client = Client()
|
|
381
|
-
except LucidicNotInitializedError:
|
|
382
|
-
client = Client( # TODO: fail hard if incorrect API key or agent ID provided and wrong, fail silently if not provided
|
|
383
|
-
api_key=api_key,
|
|
384
|
-
agent_id=agent_id,
|
|
385
|
-
)
|
|
386
|
-
mass_sim_id = client.init_mass_sim(mass_sim_name=mass_sim_name, total_num_sims=total_num_sessions, task=task, tags=tags) # TODO: change total_num_sims to total_num_sessions everywhere
|
|
387
|
-
logger.info(f"Created mass simulation with ID: {mass_sim_id}")
|
|
388
|
-
return mass_sim_id
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
def create_step(
|
|
392
|
-
state: Optional[str] = None,
|
|
393
|
-
action: Optional[str] = None,
|
|
394
|
-
goal: Optional[str] = None,
|
|
395
|
-
eval_score: Optional[float] = None,
|
|
396
|
-
eval_description: Optional[str] = None,
|
|
397
|
-
screenshot: Optional[str] = None,
|
|
398
|
-
screenshot_path: Optional[str] = None
|
|
399
|
-
) -> None:
|
|
400
|
-
"""
|
|
401
|
-
Create a new step. Previous step must be finished to create a new step.
|
|
402
|
-
|
|
403
|
-
Args:
|
|
404
|
-
state: State description.
|
|
405
|
-
action: Action description.
|
|
406
|
-
goal: Goal description.
|
|
407
|
-
eval_score: Evaluation score.
|
|
408
|
-
eval_description: Evaluation description.
|
|
409
|
-
screenshot: Screenshot encoded in base64. Provide either screenshot or screenshot_path.
|
|
410
|
-
screenshot_path: Screenshot path. Provide either screenshot or screenshot_path.
|
|
411
|
-
"""
|
|
412
|
-
client = Client()
|
|
413
|
-
if not client.session:
|
|
414
|
-
return
|
|
415
|
-
return client.session.create_step(**locals())
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
def update_step(
|
|
419
|
-
step_id: Optional[str] = None,
|
|
420
|
-
state: Optional[str] = None,
|
|
421
|
-
action: Optional[str] = None,
|
|
422
|
-
goal: Optional[str] = None,
|
|
423
|
-
eval_score: Optional[float] = None,
|
|
424
|
-
eval_description: Optional[str] = None,
|
|
425
|
-
screenshot: Optional[str] = None,
|
|
426
|
-
screenshot_path: Optional[str] = None
|
|
427
|
-
) -> None:
|
|
428
|
-
"""
|
|
429
|
-
Update the current step.
|
|
430
|
-
|
|
431
|
-
Args:
|
|
432
|
-
step_id: ID of the step to update.
|
|
433
|
-
state: State description.
|
|
434
|
-
action: Action description.
|
|
435
|
-
goal: Goal description.
|
|
436
|
-
eval_score: Evaluation score.
|
|
437
|
-
eval_description: Evaluation description.
|
|
438
|
-
screenshot: Screenshot encoded in base64. Provide either screenshot or screenshot_path.
|
|
439
|
-
screenshot_path: Screenshot path. Provide either screenshot or screenshot_path.
|
|
440
|
-
"""
|
|
441
|
-
client = Client()
|
|
442
|
-
if not client.session:
|
|
443
|
-
return
|
|
444
|
-
if not client.session.active_step:
|
|
445
|
-
raise InvalidOperationError("No active step to update")
|
|
446
|
-
client.session.update_step(**locals())
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
def end_step(
|
|
450
|
-
step_id: Optional[str] = None,
|
|
451
|
-
state: Optional[str] = None,
|
|
452
|
-
action: Optional[str] = None,
|
|
453
|
-
goal: Optional[str] = None,
|
|
454
|
-
eval_score: Optional[float] = None,
|
|
455
|
-
eval_description: Optional[str] = None,
|
|
456
|
-
screenshot: Optional[str] = None,
|
|
457
|
-
screenshot_path: Optional[str] = None
|
|
458
|
-
) -> None:
|
|
459
|
-
"""
|
|
460
|
-
End the current step.
|
|
461
|
-
|
|
462
|
-
Args:
|
|
463
|
-
step_id: ID of the step to end.
|
|
464
|
-
state: State description.
|
|
465
|
-
action: Action description.
|
|
466
|
-
goal: Goal description.
|
|
467
|
-
eval_score: Evaluation score.
|
|
468
|
-
eval_description: Evaluation description.
|
|
469
|
-
screenshot: Screenshot encoded in base64. Provide either screenshot or screenshot_path.
|
|
470
|
-
screenshot_path: Screenshot path.
|
|
471
|
-
"""
|
|
472
|
-
client = Client()
|
|
473
|
-
if not client.session:
|
|
474
|
-
return
|
|
475
|
-
|
|
476
|
-
if not client.session.active_step and step_id is None:
|
|
477
|
-
raise InvalidOperationError("No active step to end")
|
|
478
|
-
|
|
479
|
-
# Filter out None values from locals
|
|
480
|
-
params = locals()
|
|
481
|
-
kwargs = {k: v for k, v in params.items() if v is not None and k not in ['client', 'params']}
|
|
482
|
-
kwargs['is_finished'] = True
|
|
483
|
-
|
|
484
|
-
client.session.update_step(**kwargs)
|
|
838
|
+
raise APIKeyVerificationError("Lucidic agent ID not specified. Make sure to either pass your agent ID into create_experiment() or set the LUCIDIC_AGENT_ID environment variable.")
|
|
485
839
|
|
|
840
|
+
# combine rubrics into single list
|
|
841
|
+
rubric_names = (pass_fail_rubrics or []) + (score_rubrics or [])
|
|
486
842
|
|
|
487
|
-
|
|
488
|
-
step_id: Optional[str] = None,
|
|
489
|
-
description: Optional[str] = None,
|
|
490
|
-
result: Optional[str] = None,
|
|
491
|
-
cost_added: Optional[float] = None,
|
|
492
|
-
model: Optional[str] = None,
|
|
493
|
-
screenshots: Optional[List[str]] = None,
|
|
494
|
-
function_name: Optional[str] = None,
|
|
495
|
-
arguments: Optional[dict] = None,
|
|
496
|
-
) -> str:
|
|
497
|
-
"""
|
|
498
|
-
Create a new event in the current step. Current step must not be finished.
|
|
499
|
-
|
|
500
|
-
Args:
|
|
501
|
-
description: Description of the event.
|
|
502
|
-
result: Result of the event.
|
|
503
|
-
cost_added: Cost added by the event.
|
|
504
|
-
model: Model used for the event.
|
|
505
|
-
screenshots: List of screenshots encoded in base64.
|
|
506
|
-
function_name: Name of the function that created the event.
|
|
507
|
-
arguments: Arguments of the function that created the event.
|
|
508
|
-
"""
|
|
509
|
-
|
|
843
|
+
# get current client which will be NullClient if never lai.init() is never called
|
|
510
844
|
client = Client()
|
|
511
|
-
if not client
|
|
512
|
-
|
|
513
|
-
|
|
845
|
+
# if not yet initialized or still the NullClient -> create a real client when init is called
|
|
846
|
+
if not getattr(client, 'initialized', False):
|
|
847
|
+
client = Client(api_key=api_key, agent_id=agent_id)
|
|
848
|
+
else:
|
|
849
|
+
# Already initialized, this is a re-init
|
|
850
|
+
if api_key is not None and agent_id is not None and (api_key != client.api_key or agent_id != client.agent_id):
|
|
851
|
+
client.set_api_key(api_key)
|
|
852
|
+
client.agent_id = agent_id
|
|
514
853
|
|
|
854
|
+
# create experiment
|
|
855
|
+
experiment_id = client.create_experiment(experiment_name=experiment_name, rubric_names=rubric_names, description=description, tags=tags)
|
|
856
|
+
logger.info(f"Created experiment with ID: {experiment_id}")
|
|
515
857
|
|
|
516
|
-
|
|
517
|
-
event_id: Optional[str] = None,
|
|
518
|
-
description: Optional[str] = None,
|
|
519
|
-
result: Optional[str] = None,
|
|
520
|
-
cost_added: Optional[float] = None,
|
|
521
|
-
model: Optional[str] = None,
|
|
522
|
-
screenshots: Optional[List[str]] = None,
|
|
523
|
-
function_name: Optional[str] = None,
|
|
524
|
-
arguments: Optional[dict] = None,
|
|
525
|
-
) -> None:
|
|
526
|
-
"""
|
|
527
|
-
Update the event with the given ID in the current step.
|
|
528
|
-
|
|
529
|
-
Args:
|
|
530
|
-
event_id: ID of the event to update.
|
|
531
|
-
description: Description of the event.
|
|
532
|
-
result: Result of the event.
|
|
533
|
-
cost_added: Cost added by the event.
|
|
534
|
-
model: Model used for the event.
|
|
535
|
-
screenshots: List of screenshots encoded in base64.
|
|
536
|
-
function_name: Name of the function that created the event.
|
|
537
|
-
arguments: Arguments of the function that created the event.
|
|
538
|
-
"""
|
|
539
|
-
client = Client()
|
|
540
|
-
if not client.session:
|
|
541
|
-
return
|
|
542
|
-
client.session.update_event(**locals())
|
|
858
|
+
return experiment_id
|
|
543
859
|
|
|
544
860
|
|
|
545
|
-
def
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
cost_added: Optional[float] = None,
|
|
550
|
-
model: Optional[str] = None,
|
|
551
|
-
screenshots: Optional[List[str]] = None,
|
|
552
|
-
function_name: Optional[str] = None,
|
|
553
|
-
arguments: Optional[dict] = None,
|
|
554
|
-
) -> None:
|
|
555
|
-
"""
|
|
556
|
-
End the latest event in the current step.
|
|
557
|
-
|
|
558
|
-
Args:
|
|
559
|
-
event_id: ID of the event to end.
|
|
560
|
-
description: Description of the event.
|
|
561
|
-
result: Result of the event.
|
|
562
|
-
cost_added: Cost added by the event.
|
|
563
|
-
model: Model used for the event.
|
|
564
|
-
screenshots: List of screenshots encoded in base64.
|
|
565
|
-
function_name: Name of the function that created the event.
|
|
566
|
-
arguments: Arguments of the function that created the event.
|
|
567
|
-
"""
|
|
861
|
+
def create_event(
|
|
862
|
+
type: str = "generic",
|
|
863
|
+
**kwargs
|
|
864
|
+
) -> str:
|
|
568
865
|
client = Client()
|
|
569
866
|
if not client.session:
|
|
570
867
|
return
|
|
571
|
-
client.session.
|
|
868
|
+
return client.session.create_event(type=type, **kwargs)
|
|
572
869
|
|
|
573
870
|
|
|
574
871
|
def get_prompt(
|