langfun 0.1.2.dev202509210803__py3-none-any.whl → 0.1.2.dev202509230805__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of langfun might be problematic. Click here for more details.

@@ -27,7 +27,6 @@ import functools
27
27
  import threading
28
28
  import time
29
29
  from typing import Annotated, Any, Callable, Iterator, Sequence, Type
30
- import uuid
31
30
 
32
31
  from langfun.env import interface
33
32
  import pyglove as pg
@@ -59,6 +58,16 @@ class BaseSandbox(interface.Sandbox):
59
58
  'Interval to ping the sandbox for keeping it alive..'
60
59
  ] = 60.0
61
60
 
61
+ proactive_session_setup: Annotated[
62
+ bool,
63
+ (
64
+ 'If True, the sandbox will perform setup work before a user session '
65
+ 'is started. This is useful for sandboxes that need to perform '
66
+ 'heavy setup work, which could block the user thread for a long '
67
+ 'time. Applicable only when `reusable` is True.'
68
+ )
69
+ ] = True
70
+
62
71
  #
63
72
  # There is no required methods that subclasses must implement.
64
73
  # Subclasses can override the following methods:
@@ -78,37 +87,92 @@ class BaseSandbox(interface.Sandbox):
78
87
  interface.SandboxStateError: If the sandbox is in a bad state.
79
88
  """
80
89
 
90
+ def _set_status(self, status: interface.Sandbox.Status) -> None:
91
+ """Sets the status of the sandbox."""
92
+ assert self._status != status, (self._status, status)
93
+ self.on_status_change(self._status, status)
94
+ self._status = status
95
+
96
+ def _maybe_report_state_error(self, e: BaseException | None) -> None:
97
+ """Reports sandbox state errors."""
98
+ if isinstance(e, interface.SandboxStateError):
99
+ self._state_errors.append(e)
100
+
81
101
  def _setup_features(self) -> None:
82
102
  """Starts the features in the sandbox."""
103
+ # We keep track of the features that have setup called so we can teardown
104
+ # the features when the sandbox is shutdown.
105
+ self._features_with_setup_called.clear()
106
+
83
107
  for feature in self._features.values():
108
+ self._features_with_setup_called.add(feature.name)
84
109
  feature.setup(self)
85
110
 
86
- def _teardown_features(self) -> None:
87
- """Tears down the features in the sandbox."""
111
+ def _setup_session(self) -> None:
112
+ """Sets up a new session for the sandbox."""
113
+ # We keep track of the features that have setup_session called so we can
114
+ # teardown the session for them when the session ends.
115
+ self._features_with_setup_session_called.clear()
116
+
88
117
  for feature in self._features.values():
89
- feature.teardown()
118
+ self._features_with_setup_session_called.add(feature.name)
119
+ feature.setup_session()
90
120
 
91
- def _start_session(self, session_id: str) -> None:
92
- """Starts a user session."""
93
- self._session_id = session_id
94
- self._session_start_time = time.time()
121
+ def _teardown_features(self) -> interface.FeatureTeardownError | None:
122
+ """Tears down the features in the sandbox.
95
123
 
124
+ IMPORTANT: This method shall not raise any exceptions.
125
+
126
+ Returns:
127
+ FeatureTeardownError: If feature teardown failed with errors.
128
+ Otherwise None.
129
+ """
130
+ errors = {}
96
131
  for feature in self._features.values():
97
- feature.setup_session(session_id)
132
+ if feature.name in self._features_with_setup_called:
133
+ try:
134
+ feature.teardown()
135
+ except BaseException as e: # pylint: disable=broad-except
136
+ self._maybe_report_state_error(e)
137
+ errors[feature.name] = e
138
+ if errors:
139
+ return interface.FeatureTeardownError(sandbox=self, errors=errors)
140
+ return None
141
+
142
+ def _start_session(self) -> None:
143
+ """Starts a user session.
98
144
 
99
- def _end_session(self) -> None:
100
- try:
101
- for feature in self._features.values():
102
- feature.teardown_session(self._session_id)
103
- finally:
104
- pg.logging.info(
105
- '[%s]: User session %s ended. (lifetime: %.2f seconds).',
106
- self.id,
107
- self._session_id,
108
- time.time() - self._session_start_time
109
- )
110
- self._session_id = None
111
- self._session_start_time = None
145
+ Raises:
146
+ BaseException: If feature setup failed with user-defined errors.
147
+ SandboxStateError: If sandbox or feature setup fail due sandbox state
148
+ errors.
149
+ """
150
+ # When pre-session setup is enabled, the session setup is performed
151
+ # before the session is started. Otherwise we setup the session here.
152
+ if not self._enable_pre_session_setup:
153
+ self._setup_session()
154
+
155
+ def _end_session(self) -> interface.SessionTeardownError | None:
156
+ """Ends a user session.
157
+
158
+ IMPORTANT: This method shall not raise any exceptions.
159
+
160
+ Returns:
161
+ SessionTeardownError: If session teardown failed with errors.
162
+ Otherwise None.
163
+ """
164
+ feature_teardown_errors = {}
165
+ for name, feature in self._features.items():
166
+ if name in self._features_with_setup_session_called:
167
+ try:
168
+ feature.teardown_session()
169
+ except BaseException as e: # pylint: disable=broad-except
170
+ self._maybe_report_state_error(e)
171
+ feature_teardown_errors[name] = e
172
+
173
+ return interface.SessionTeardownError(
174
+ sandbox=self, errors=feature_teardown_errors
175
+ ) if feature_teardown_errors else None
112
176
 
113
177
  def _ping(self) -> None:
114
178
  """Implementation of ping for health checking."""
@@ -120,16 +184,17 @@ class BaseSandbox(interface.Sandbox):
120
184
  def _on_bound(self) -> None:
121
185
  """Called when the sandbox is bound."""
122
186
  super()._on_bound()
187
+
123
188
  self._features = pg.Dict({
124
189
  name: pg.clone(feature)
125
190
  for name, feature in self.environment.features.items()
126
191
  })
127
- self._session_id = None
128
- self._session_start_time = None
129
- self._alive = False
130
- self._start_time = None
192
+ self._event_handlers = []
131
193
 
132
- self._needs_housekeep = (
194
+ self._enable_pre_session_setup = (
195
+ self.reusable and self.proactive_session_setup
196
+ )
197
+ self._enables_housekeep = (
133
198
  self.keepalive_interval is not None
134
199
  or any(
135
200
  feature.housekeep_interval is not None
@@ -139,20 +204,48 @@ class BaseSandbox(interface.Sandbox):
139
204
  self._housekeep_thread = None
140
205
  self._housekeep_count = 0
141
206
 
207
+ # Runtime state.
208
+ self._status = self.Status.CREATED
209
+ self._start_time = None
210
+ self._state_errors = []
211
+ self._features_with_setup_called = set()
212
+ self._features_with_setup_session_called = set()
213
+
214
+ self._session_id = None
215
+ self._session_start_time = None
216
+
142
217
  @functools.cached_property
143
218
  def working_dir(self) -> str | None:
144
219
  """Returns the working directory for the sandbox."""
145
220
  return self.id.working_dir(self.environment.root_dir)
146
221
 
147
222
  @property
148
- def is_alive(self) -> bool:
149
- """Returns whether the sandbox is alive."""
150
- return self._alive
223
+ def status(self) -> interface.Sandbox.Status:
224
+ """Returns the state of the sandbox."""
225
+ return self._status
226
+
227
+ def set_acquired(self) -> None:
228
+ """Marks the sandbox as acquired."""
229
+ self._set_status(self.Status.ACQUIRED)
230
+
231
+ def add_event_handler(
232
+ self,
233
+ event_handler: interface.EnvironmentEventHandler | None
234
+ ) -> None:
235
+ """Sets the event handler for the sandbox."""
236
+ self._event_handlers.append(event_handler)
237
+
238
+ def remove_event_handler(
239
+ self,
240
+ event_handler: interface.EnvironmentEventHandler | None
241
+ ) -> None:
242
+ """Removes the event handler for the sandbox."""
243
+ self._event_handlers.remove(event_handler)
151
244
 
152
245
  @property
153
- def is_busy(self) -> bool:
154
- """Returns whether the sandbox is busy."""
155
- return self._session_id not in (None, 'pending')
246
+ def state_errors(self) -> list[interface.SandboxStateError]:
247
+ """Returns all errors encountered during sandbox lifecycle."""
248
+ return self._state_errors
156
249
 
157
250
  @property
158
251
  def features(self) -> dict[str, interface.Feature]:
@@ -166,74 +259,172 @@ class BaseSandbox(interface.Sandbox):
166
259
  def start(self) -> None:
167
260
  """Starts the sandbox.
168
261
 
262
+ State transitions:
263
+ CREATED -> SETTING_UP -> READY: When all sandbox and feature setup
264
+ succeeds.
265
+ CREATED -> SETTING_UP -> SHUTTING_DOWN -> OFFLINE: When sandbox or feature
266
+ setup fails.
267
+
268
+ `start` and `shutdown` should be called in pairs, even when the sandbox
269
+ fails to start. This ensures proper cleanup.
270
+
271
+ Start may fail with two sources of errors:
272
+
273
+ 1. SandboxStateError: If sandbox or feature setup fail due to enviroment
274
+ outage or sandbox state errors.
275
+ 2. BaseException: If feature setup failed with user-defined errors, this
276
+ could happen when there is bug in the user code or non-environment code
277
+ failure.
278
+
279
+ In both cases, the sandbox will be shutdown automatically, and the error
280
+ will be added to `errors`. The sandbox is considered dead and will not be
281
+ further used.
282
+
169
283
  Raises:
170
- interface.SandboxStateError: If the sandbox fails to start.
284
+ SandboxStateError: If the sandbox is in a bad state.
285
+ BaseException: If feature setup failed with user-defined errors.
171
286
  """
172
- assert not self._alive, 'Sandbox is already alive.'
287
+ assert self._status == self.Status.CREATED, (
288
+ f'Sandbox {self.id} cannot be started because '
289
+ f'it is in {self._status} status.'
290
+ )
173
291
 
174
- def start_impl():
175
- t = time.time()
292
+ t = time.time()
293
+ self._state = self.Status.SETTING_UP
294
+
295
+ try:
296
+ # Start the sandbox.
176
297
  self._start()
298
+
299
+ # Setup the features.
177
300
  self._setup_features()
178
301
 
179
- # We mark the sandbox as alive after the setup before the maintenance
180
- # thread is started. This is to avoid the maintenance thread from
181
- # immediately shutting down the sandbox because it's not alive yet.
182
- self._alive = True
183
- self._start_time = time.time()
302
+ # Setup the first session if pre-session setup is enabled.
303
+ if self._enable_pre_session_setup:
304
+ self._setup_session()
184
305
 
185
- if self._needs_housekeep:
306
+ if self._enables_housekeep:
186
307
  self._housekeep_thread = threading.Thread(
187
308
  target=self._housekeep_loop, daemon=True
188
309
  )
189
310
  self._housekeep_thread.start()
190
311
 
312
+ self._start_time = time.time()
313
+
314
+ # Mark the sandbox as ready when all setup succeeds.
315
+ self._set_status(self.Status.READY)
316
+
317
+ self.on_start()
191
318
  pg.logging.info(
192
319
  '[%s]: Sandbox started in %.2f seconds.',
193
320
  self.id, time.time() - t
194
321
  )
195
-
196
- interface.call_with_event(
197
- action=start_impl,
198
- event_handler=self.on_start,
199
- )
322
+ except BaseException as e: # pylint: disable=broad-except
323
+ pg.logging.error(
324
+ '[%s]: Sandbox failed to start: %s',
325
+ self.id, e
326
+ )
327
+ self._maybe_report_state_error(e)
328
+ self.on_start(e)
329
+ self.shutdown()
330
+ raise e
200
331
 
201
332
  def shutdown(self) -> None:
202
333
  """Shuts down the sandbox.
203
334
 
335
+ State transitions:
336
+ SHUTTING_DOWN -> SHUTTING_DOWN: No operation.
337
+ OFFLINE -> OFFLINE: No operation.
338
+ SETTING_UP -> SHUTTING_DOWN -> OFFLINE: When sandbox and feature
339
+ setup fails.
340
+ IN_SESSION -> SHUTTING_DOWN -> OFFLINE: When user session exits while
341
+ sandbox is set not to reuse, or session teardown fails.
342
+ FREE -> SHUTTING_DOWN -> OFFLINE: When sandbox is shutdown when the
343
+ environment is shutting down, or housekeeping loop shuts down the
344
+ sandbox due to housekeeping failures.
345
+
346
+ Please be aware that `shutdown` will be called whenever an operation on the
347
+ sandbox encounters a critical error. This means, `shutdown` should not make
348
+ the assumption that the sandbox is in a healthy state, even `start` could
349
+ fail. As a result, `shutdown` must allow re-entry and be thread-safe with
350
+ other sandbox operations.
351
+
352
+ Shutdown may fail with two sources of errors:
353
+
354
+ 1. SandboxStateError: If the sandbox is in a bad state, and feature teardown
355
+ logic depending on a healthy sandbox may fail. In such case, we do not
356
+ raise error to the user as the user session is considered completed. The
357
+ sandbox is abandoned and new user sessions will be served on other
358
+ sandboxes.
359
+
360
+ 2. BaseException: The sandbox is in good state, but user code raises error
361
+ due to bug or non-environment code failure. In such case, errors will be
362
+ raised to the user so the error could be surfaced and handled properly.
363
+ The sandbox is treated as shutdown and will not be further used.
364
+
204
365
  Raises:
205
- interface.SandboxStateError: If the sandbox is in a bad state.
366
+ BaseException: If feature teardown failed with user-defined errors.
206
367
  """
207
- if not self._alive:
368
+
369
+ # Allow re-entry.
370
+ if self._status in (
371
+ interface.Sandbox.Status.SHUTTING_DOWN,
372
+ interface.Sandbox.Status.OFFLINE
373
+ ):
374
+ return
375
+
376
+ # End current session and shutdown the sandbox if the sandbox is in session.
377
+ if self._status == self.Status.IN_SESSION:
378
+ self.end_session(shutdown_sandbox=True)
208
379
  return
209
380
 
210
- self._alive = False
381
+ self._set_status(interface.Sandbox.Status.SHUTTING_DOWN)
211
382
  shutdown_start_time = time.time()
212
- def shutdown_impl():
213
- self._teardown_features()
383
+
384
+ if (self._housekeep_thread is not None
385
+ and threading.current_thread() is not self._housekeep_thread):
386
+ self._housekeep_thread.join()
387
+ self._housekeep_thread = None
388
+
389
+ teardown_error = self._teardown_features()
390
+ try:
214
391
  self._shutdown()
215
- if (self._housekeep_thread is not None
216
- and threading.current_thread() is not self._housekeep_thread):
217
- self._housekeep_thread.join()
218
- self._housekeep_thread = None
392
+ self._set_status(interface.Sandbox.Status.OFFLINE)
393
+
219
394
  pg.logging.info(
220
- '[%s]: Sandbox shutdown in %.2f seconds. (lifetime: %.2f seconds)',
395
+ '[%s]: Sandbox shutdown in %.2f seconds. '
396
+ '(lifetime: %.2f seconds, teardown errors: %s)',
221
397
  self.id,
222
398
  time.time() - shutdown_start_time,
223
- time.time() - self._start_time if self._start_time else 0
399
+ time.time() - self._start_time if self._start_time else 0,
400
+ teardown_error
401
+ )
402
+ self.on_shutdown(teardown_error)
403
+ shutdown_error = None
404
+ except BaseException as e: # pylint: disable=broad-except
405
+ shutdown_error = e
406
+ self._maybe_report_state_error(e)
407
+ self._set_status(interface.Sandbox.Status.OFFLINE)
408
+ pg.logging.error(
409
+ '[%s]: Sandbox shutdown with error: %s',
410
+ self.id, e
224
411
  )
412
+ self.on_shutdown(teardown_error or shutdown_error)
225
413
 
226
- interface.call_with_event(
227
- action=shutdown_impl,
228
- event_handler=self.on_shutdown,
229
- )
414
+ # We raise non-state errors to the user following timely order, so the user
415
+ # code could be surfaced and handled properly.
416
+ if (teardown_error is not None
417
+ and teardown_error.has_non_sandbox_state_error):
418
+ raise teardown_error
419
+
420
+ if shutdown_error is not None and not isinstance(
421
+ shutdown_error, interface.SandboxStateError
422
+ ):
423
+ raise shutdown_error
230
424
 
231
425
  def ping(self) -> None:
232
426
  """Pings the sandbox to check if it is alive."""
233
- interface.call_with_event(
234
- action=self._ping,
235
- event_handler=self.on_ping,
236
- )
427
+ self._ping()
237
428
 
238
429
  #
239
430
  # API related to a user session.
@@ -254,49 +445,183 @@ class BaseSandbox(interface.Sandbox):
254
445
  """
255
446
  return self._session_id
256
447
 
257
- def set_pending(self) -> None:
258
- """Marks the sandbox as pending for new session."""
259
- self._session_id = 'pending'
448
+ def start_session(
449
+ self,
450
+ session_id: str,
451
+ ) -> None:
452
+ """Begins a user session with the sandbox.
453
+
454
+ State transitions:
455
+ ACQUIRED -> SETTING_UP -> IN_SESSION: When session setup succeeds.
456
+ ACQUIRED -> SETTING_UP -> SHUTTING_DOWN -> OFFLINE: When session setup
457
+ fails.
260
458
 
261
- @property
262
- def is_pending(self) -> bool:
263
- """Returns whether the sandbox is pending for new session."""
264
- return self._session_id == 'pending'
459
+ A session is a sequence of stateful interactions with the sandbox.
460
+ Across different sessions the sandbox are considered stateless.
461
+ `start_session` and `end_session` should always be called in pairs, even
462
+ when the session fails to start. `Sandbox.new_session` context manager is
463
+ the recommended way to use `start_session` and `end_session` in pairs.
265
464
 
266
- def start_session(self, session_id: str) -> None:
267
- """Begins a user session with the sandbox.
465
+ Starting a session may fail with two sources of errors:
466
+
467
+ 1. SandboxStateError: If the sandbox is in a bad state or session setup
468
+ failed.
469
+
470
+ 2. BaseException: If session setup failed with user-defined errors.
471
+
472
+ In both cases, the sandbox will be shutdown automatically and the
473
+ session will be considered ended. The error will be added to `errors`.
474
+ Future session will be served on other sandboxes.
268
475
 
269
476
  Args:
270
477
  session_id: The identifier for the user session.
271
478
 
272
479
  Raises:
273
- interface.SandboxError: If the sandbox already has a user session
274
- or the session cannot be started.
480
+ SandboxStateError: If the sandbox is already in a bad state or session
481
+ setup failed.
482
+ BaseException: If session setup failed with user-defined errors.
275
483
  """
276
- assert self._session_id in (None, 'pending'), (
277
- 'A user session is already active for this sandbox.'
484
+ assert self._status == self.Status.ACQUIRED, (
485
+ f'Sandbox {self.id} is not in acquired state (status={self._status}).'
278
486
  )
279
- interface.call_with_event(
280
- action=self._start_session,
281
- event_handler=self.on_session_start,
282
- action_kwargs={'session_id': session_id},
283
- event_handler_kwargs={'session_id': session_id},
487
+ assert self._session_id is None, (
488
+ f'A user session {self._session_id} is already active '
489
+ f'for sandbox {self.id}.'
284
490
  )
491
+ self._set_status(self.Status.SETTING_UP)
492
+
493
+ self._session_id = session_id
494
+ self._session_start_time = time.time()
285
495
 
286
- def end_session(self) -> None:
287
- """Ends the user session with the sandbox."""
288
- assert self._session_id not in (None, 'pending'), (
496
+ try:
497
+ self._start_session()
498
+ self._set_status(self.Status.IN_SESSION)
499
+ self.on_session_start(session_id)
500
+ except BaseException as e: # pylint: disable=broad-except
501
+ self._maybe_report_state_error(e)
502
+ self.on_session_start(session_id, e)
503
+ self.shutdown()
504
+ raise e
505
+
506
+ def end_session(self, shutdown_sandbox: bool = False) -> None:
507
+ """Ends the user session with the sandbox.
508
+
509
+ State transitions:
510
+ IN_SESSION -> READY: When user session exits normally, and sandbox is set
511
+ to reuse.
512
+ IN_SESSION -> SHUTTING_DOWN -> OFFLINE: When user session exits while
513
+ sandbox is set not to reuse, or session teardown fails.
514
+ IN_SESSION -> SETTING_UP -> READY: When user session exits normally, and
515
+ sandbox is set to reuse, and proactive session setup is enabled.
516
+ IN_SESSION -> SETTING_UP -> SHUTTING_DOWN -> OFFLINE: When user session
517
+ exits normally, and proactive session setup is enabled but fails.
518
+ not (IN_SESSION) -> same state: No operation
519
+
520
+ `end_session` should always be called for each `start_session` call, even
521
+ when the session fails to start, to ensure proper cleanup.
522
+
523
+ `end_session` may fail with two sources of errors:
524
+
525
+ 1. SandboxStateError: If the sandbox is in a bad state or session teardown
526
+ failed.
527
+
528
+ 2. BaseException: If session teardown failed with user-defined errors.
529
+
530
+ In both cases, the sandbox will be shutdown automatically and the
531
+ session will be considered ended. The error will be added to `errors`.
532
+ Future session will be served on other sandboxes.
533
+
534
+ However, SandboxStateError encountered during `end_session` will NOT be
535
+ raised to the user as the user session is considered completed.
536
+
537
+ Args:
538
+ shutdown_sandbox: If True, the sandbox will be shutdown after session
539
+ teardown.
540
+
541
+ Raises:
542
+ BaseException: If session teardown failed with user-defined errors.
543
+ """
544
+ if self._status not in (
545
+ self.Status.IN_SESSION,
546
+ ):
547
+ return
548
+
549
+ assert self._session_id is not None, (
289
550
  'No user session is active for this sandbox'
290
551
  )
291
- try:
292
- interface.call_with_event(
293
- action=self._end_session,
294
- event_handler=self.on_session_end,
295
- event_handler_kwargs={'session_id': self._session_id},
296
- )
297
- finally:
298
- if not self.reusable:
299
- self.shutdown()
552
+ shutdown_sandbox = shutdown_sandbox or not self.reusable
553
+
554
+ # Teardown features for the current session.
555
+ end_session_error = self._end_session()
556
+ previous_session_id = self._session_id
557
+ self._session_id = None
558
+ self._features_with_setup_session_called.clear()
559
+
560
+ # If there is no state error, and proactive session setup is enabled,
561
+ # set up the next session proactively.
562
+ if not self.state_errors:
563
+ if not shutdown_sandbox and self._enable_pre_session_setup:
564
+ def _setup_next_session():
565
+ try:
566
+ self._setup_session()
567
+ self._set_status(interface.Sandbox.Status.READY)
568
+ except BaseException as e: # pylint: disable=broad-except
569
+ pg.logging.error(
570
+ '[%s]: Shutting down sandbox due to practively setting up '
571
+ 'next session failed: %s',
572
+ self.id,
573
+ e
574
+ )
575
+ self._maybe_report_state_error(e)
576
+ self.shutdown()
577
+
578
+ # End session before setting up the next session.
579
+ self.on_session_end(previous_session_id)
580
+
581
+ # Mark the sandbox as setting up to prevent it from being acquired by
582
+ # other threads.
583
+ self._set_status(interface.Sandbox.Status.SETTING_UP)
584
+
585
+ # TODO(daiyip): Consider using a thread pool to perform next session
586
+ # setup.
587
+ threading.Thread(target=_setup_next_session).start()
588
+ else:
589
+ # End session before reporting sandbox status change.
590
+ self.on_session_end(previous_session_id)
591
+
592
+ # If shutdown is requested, mark the sandbox as acquired to prevent it
593
+ # from being acquired by other threads.
594
+ self._set_status(
595
+ interface.Sandbox.Status.ACQUIRED if shutdown_sandbox else
596
+ interface.Sandbox.Status.READY
597
+ )
598
+
599
+ # Otherwise, shutdown the sandbox.
600
+ else:
601
+ self.on_session_end(previous_session_id, self.state_errors[0])
602
+ self._set_status(interface.Sandbox.Status.ACQUIRED)
603
+ shutdown_sandbox = True
604
+
605
+ pg.logging.info(
606
+ '[%s]: User session %s ended. '
607
+ '(lifetime: %.2f seconds, teardown errors: %s).',
608
+ self.id,
609
+ self._session_id,
610
+ time.time() - self._session_start_time,
611
+ end_session_error
612
+ )
613
+ self._session_start_time = None
614
+ self._session_event_handler = None
615
+
616
+ if shutdown_sandbox:
617
+ self.shutdown()
618
+
619
+ # We only raise errors if teardown error contains non-sandbox-state error,
620
+ # meaning that the user code may have bug or other non-environment
621
+ # failures.
622
+ if (end_session_error is not None
623
+ and end_session_error.has_non_sandbox_state_error):
624
+ raise end_session_error # pylint: disable=raising-bad-type
300
625
 
301
626
  #
302
627
  # Housekeeping.
@@ -308,7 +633,7 @@ class BaseSandbox(interface.Sandbox):
308
633
  last_ping = now
309
634
  last_housekeep_time = {name: now for name in self._features.keys()}
310
635
 
311
- while self._alive:
636
+ while self._status not in (self.Status.SHUTTING_DOWN, self.Status.OFFLINE):
312
637
  if self.keepalive_interval is not None:
313
638
  if time.time() - last_ping > self.keepalive_interval:
314
639
  try:
@@ -321,6 +646,7 @@ class BaseSandbox(interface.Sandbox):
321
646
  str(e)
322
647
  )
323
648
  self._housekeep_count += 1
649
+ self._maybe_report_state_error(e)
324
650
  self.shutdown()
325
651
  break
326
652
  last_ping = time.time()
@@ -339,13 +665,141 @@ class BaseSandbox(interface.Sandbox):
339
665
  'Shutting down sandbox...',
340
666
  self.id,
341
667
  feature.name,
342
- str(e)
668
+ e,
343
669
  )
670
+ self._maybe_report_state_error(e)
344
671
  self.shutdown()
345
672
  break
346
673
  self._housekeep_count += 1
347
674
  time.sleep(1)
348
675
 
676
+ #
677
+ # Event handlers subclasses can override.
678
+ #
679
+
680
+ def on_start(self, error: BaseException | None = None) -> None:
681
+ """Called when the sandbox is started."""
682
+ for handler in self._event_handlers:
683
+ handler.on_sandbox_start(self.environment, self, error)
684
+
685
+ def on_status_change(
686
+ self,
687
+ old_status: interface.Sandbox.Status,
688
+ new_status: interface.Sandbox.Status,
689
+ ) -> None:
690
+ """Called when the sandbox status changes."""
691
+ for handler in self._event_handlers:
692
+ handler.on_sandbox_status_change(
693
+ self.environment, self, old_status, new_status
694
+ )
695
+
696
+ def on_shutdown(self, error: BaseException | None = None) -> None:
697
+ """Called when the sandbox is shutdown."""
698
+ for handler in self._event_handlers:
699
+ handler.on_sandbox_shutdown(self.environment, self, error)
700
+
701
+ def on_feature_setup(
702
+ self,
703
+ feature: interface.Feature,
704
+ error: BaseException | None = None
705
+ ) -> None:
706
+ """Called when a feature is setup."""
707
+ for handler in self._event_handlers:
708
+ handler.on_feature_setup(
709
+ self.environment, self, feature, error
710
+ )
711
+
712
+ def on_feature_teardown(
713
+ self,
714
+ feature: interface.Feature,
715
+ error: BaseException | None = None
716
+ ) -> None:
717
+ """Called when a feature is teardown."""
718
+ for handler in self._event_handlers:
719
+ handler.on_feature_teardown(
720
+ self.environment, self, feature, error
721
+ )
722
+
723
+ def on_feature_setup_session(
724
+ self,
725
+ feature: interface.Feature,
726
+ error: BaseException | None = None
727
+ ) -> None:
728
+ """Called when a feature is setup for a user session."""
729
+ for handler in self._event_handlers:
730
+ handler.on_feature_setup_session(
731
+ self.environment, self, feature, self.session_id, error
732
+ )
733
+
734
+ def on_feature_teardown_session(
735
+ self,
736
+ feature: interface.Feature,
737
+ error: BaseException | None = None
738
+ ) -> None:
739
+ """Called when a feature is teardown for a user session."""
740
+ for handler in self._event_handlers:
741
+ handler.on_feature_teardown_session(
742
+ self.environment, self, feature, self.session_id, error
743
+ )
744
+
745
+ def on_feature_housekeep(
746
+ self,
747
+ feature: interface.Feature,
748
+ error: BaseException | None = None
749
+ ) -> None:
750
+ """Called when a feature is housekeeping."""
751
+ for handler in self._event_handlers:
752
+ handler.on_feature_housekeep(
753
+ self.environment, self, feature, error
754
+ )
755
+
756
+ def on_session_start(
757
+ self,
758
+ session_id: str,
759
+ error: BaseException | None = None
760
+ ) -> None:
761
+ """Called when the user session starts."""
762
+ for handler in self._event_handlers:
763
+ handler.on_session_start(
764
+ self.environment, self, session_id, error
765
+ )
766
+
767
+ def on_session_activity(
768
+ self,
769
+ session_id: str,
770
+ name: str,
771
+ feature: interface.Feature | None = None,
772
+ error: BaseException | None = None,
773
+ **kwargs
774
+ ) -> None:
775
+ """Called when a sandbox activity is performed."""
776
+ for handler in self._event_handlers:
777
+ handler.on_session_activity(
778
+ session_id=session_id,
779
+ name=name,
780
+ environment=self.environment,
781
+ sandbox=self,
782
+ feature=feature,
783
+ error=error,
784
+ **kwargs
785
+ )
786
+
787
+ def on_session_end(
788
+ self,
789
+ session_id: str,
790
+ error: BaseException | None = None
791
+ ) -> None:
792
+ """Called when the user session ends."""
793
+ for handler in self._event_handlers:
794
+ handler.on_session_end(
795
+ self.environment, self, session_id, error
796
+ )
797
+
798
+
799
+ #
800
+ # Sandbox service decorator.
801
+ #
802
+
349
803
 
350
804
  def sandbox_service(
351
805
  critical_errors: Sequence[
@@ -358,9 +812,38 @@ def sandbox_service(
358
812
  error matches any of the critical errors. Consequently, the sandbox will be
359
813
  shutdown automatically when the error is raised.
360
814
 
361
- if the decorated method returns a context manager, a wrapper context manager
815
+ Example:
816
+
817
+ ```
818
+ with env:
819
+ with env.sandbox() as sb:
820
+ try:
821
+ sb.test_feature.do_something_with_non_state_error()
822
+ except ValueError:
823
+ # sandbox will not be shutdown.
824
+ pass
825
+
826
+ try:
827
+ sb.test_feature.do_something_with_state_error()
828
+ except ValueError:
829
+ assert sb.state == sb.Status.OFFLINE
830
+ ```
831
+
832
+ If the decorated method returns a context manager, a wrapper context manager
362
833
  will be returned, which will end the session when exiting the context.
363
834
 
835
+ Example:
836
+
837
+ ```
838
+ with env:
839
+ with env.test_feature.do_something_with_context_manager() as result:
840
+ # sandbox will be alive during the whole context manager cycle.
841
+ ```
842
+
843
+ For sandbox service methods, an optional `session_id` argument can be passed
844
+ to create a new session for the service call, even its signature does not
845
+ contain a `session_id` argument.
846
+
364
847
  Args:
365
848
  critical_errors: A sequence of exception types or tuples of exception type
366
849
  and error messages (described in regular expression), when matched, treat
@@ -379,31 +862,45 @@ def sandbox_service(
379
862
  'service method. Please use `self.session_id` instead.'
380
863
  )
381
864
 
865
+ def to_kwargs(*args, **kwargs):
866
+ num_non_self_args = len(signature.arg_names) - 1
867
+ if len(args) > num_non_self_args:
868
+ assert signature.varargs is not None, (signature, args)
869
+ kwargs[signature.varargs.name] = tuple(args[num_non_self_args:])
870
+ args = args[:num_non_self_args]
871
+ for i in range(len(args)):
872
+ # The first argument is `self`.
873
+ kwargs[signature.arg_names[i + 1]] = args[i]
874
+ return kwargs
875
+
382
876
  @functools.wraps(func)
383
877
  def method_wrapper(self, *args, **kwargs) -> Any:
384
878
  """Helper function to safely execute logics in the sandbox."""
385
- assert isinstance(self, (interface.Sandbox, interface.Feature)), self
879
+ assert isinstance(self, (BaseSandbox, interface.Feature)), self
386
880
  sandbox = self.sandbox if isinstance(self, interface.Feature) else self
387
881
 
388
882
  # When a capability is directly accessed from the environment,
389
- # we scope the function call within a short-lived sandbox session. This
883
+ # we create a new session for the capability call. This
390
884
  # prevents the sandbox from being reused for other feature calls.
391
- if sandbox.is_pending:
885
+ if sandbox.status == interface.Sandbox.Status.ACQUIRED:
392
886
  new_session = True
393
- session_id = kwargs.get('session_id', f'session-{uuid.uuid4().hex[:7]}')
887
+ new_session_id = kwargs.get('session_id')
888
+ if new_session_id is None:
889
+ new_session_id = sandbox.environment.new_session_id()
890
+
891
+ # If it's a feature method called from the environment, start a new
892
+ # session for the feature call.
893
+ sandbox.start_session(new_session_id)
394
894
  else:
395
895
  new_session = False
396
- session_id = sandbox.session_id
397
896
 
398
897
  kwargs.pop('session_id', None)
898
+ session_id = sandbox.session_id
399
899
  result = None
900
+ state_error = None
400
901
  error = None
401
- try:
402
- # If it's a feature method called from the environment, start a new
403
- # session for the feature call.
404
- if new_session:
405
- sandbox.start_session(session_id)
406
902
 
903
+ try:
407
904
  # Execute the service function.
408
905
  result = func(self, *args, **kwargs)
409
906
 
@@ -417,30 +914,39 @@ def sandbox_service(
417
914
  # Otherwise, return the result and end the session in the finally block.
418
915
  return result
419
916
  except interface.SandboxStateError as e:
917
+ sandbox._maybe_report_state_error(e) # pylint: disable=protected-access
918
+ state_error = e
420
919
  error = e
421
920
  raise
422
921
  except BaseException as e:
922
+ error = e
423
923
  if pg.match_error(e, critical_errors):
424
- error = e
425
- raise interface.SandboxStateError(
924
+ state_error = interface.SandboxStateError(
426
925
  'Sandbox encountered an unexpected error executing '
427
926
  f'`{func.__name__}` (args={args!r}, kwargs={kwargs!r}): {e}',
428
927
  sandbox=self
429
- ) from e
928
+ )
929
+ sandbox._maybe_report_state_error(state_error) # pylint: disable=protected-access
930
+ raise state_error from e
430
931
  raise
431
932
  finally:
432
- if error is not None:
933
+ if session_id is not None:
934
+ self.on_session_activity(
935
+ name=func.__name__,
936
+ session_id=session_id,
937
+ error=error,
938
+ **to_kwargs(*args, **kwargs),
939
+ )
940
+
941
+ if state_error is not None:
433
942
  sandbox.shutdown()
434
-
435
- # End the session if it's from a feature method and the result is not a
436
- # context manager.
437
- if (new_session
438
- and not isinstance(result, contextlib.AbstractContextManager)):
439
- sandbox.end_session()
440
-
441
- self.on_session_activity(
442
- session_id=session_id, error=error, args=args, **kwargs
443
- )
943
+ elif (new_session
944
+ and not isinstance(result, contextlib.AbstractContextManager)):
945
+ # End the session if it's from a feature method and the result is not
946
+ # a context manager.
947
+ sandbox.end_session(
948
+ shutdown_sandbox=isinstance(error, interface.SandboxStateError)
949
+ )
444
950
  return method_wrapper
445
951
  return decorator
446
952