langfun 0.1.2.dev202509240805__py3-none-any.whl → 0.1.2.dev202509260805__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of langfun might be problematic. Click here for more details.

langfun/env/__init__.py CHANGED
@@ -13,16 +13,12 @@
13
13
  # limitations under the License.
14
14
  """Environment for LLM agents."""
15
15
 
16
- # pylint: disable=g-importing-member, g-bad-import-order
17
- from langfun.env.interface import EnvironmentId
18
- from langfun.env.interface import SandboxId
19
-
16
+ # pylint: disable=g-importing-member, g-bad-import-order, g-import-not-at-top
20
17
  from langfun.env.interface import EnvironmentError # pylint: disable=redefined-builtin
21
18
  from langfun.env.interface import EnvironmentOutageError
22
19
  from langfun.env.interface import EnvironmentOverloadError
23
20
  from langfun.env.interface import SandboxError
24
21
  from langfun.env.interface import SandboxStateError
25
- # from langfun.env.base import SandboxOverloadError
26
22
 
27
23
  from langfun.env.interface import Environment
28
24
  from langfun.env.interface import Sandbox
@@ -35,4 +31,7 @@ from langfun.env.base_feature import BaseFeature
35
31
  from langfun.env import load_balancers
36
32
  from langfun.env.load_balancers import LoadBalancer
37
33
 
34
+ from langfun.env import event_handlers
35
+ EventHandler = event_handlers.EventHandler
36
+
38
37
  # Google-internal imports.
@@ -34,6 +34,7 @@ import langfun.core as lf
34
34
  from langfun.env import base_sandbox
35
35
  from langfun.env import interface
36
36
  from langfun.env import load_balancers
37
+ from langfun.env.event_handlers import base as event_handler_base
37
38
  import pyglove as pg
38
39
 
39
40
 
@@ -60,15 +61,25 @@ class BaseEnvironment(interface.Environment):
60
61
  'will be used as both min and max size. If 0, sandboxes will be '
61
62
  'created on demand and shutdown when user session ends.'
62
63
  )
63
- ] = 1
64
+ ] = (0, 256)
64
65
 
65
66
  load_balancer: Annotated[
66
67
  load_balancers.LoadBalancer,
67
68
  (
68
- 'The load balancer for the environment.'
69
+ 'The load balancer for the environment to acquire sandboxes.'
69
70
  )
70
71
  ] = load_balancers.RoundRobin()
71
72
 
73
+ sandbox_keepalive_interval: Annotated[
74
+ float | None,
75
+ (
76
+ 'The interval in seconds to send keepalive pings to sandboxes. '
77
+ 'If None, sandbox keepalive is disabled. Please note that sandbox '
78
+ 'keepalive is different from feature housekeeping. Usually sandbox '
79
+ 'keepalive and feature housekeeping are different operations.'
80
+ )
81
+ ] = None
82
+
72
83
  proactive_session_setup: Annotated[
73
84
  bool,
74
85
  (
@@ -79,6 +90,13 @@ class BaseEnvironment(interface.Environment):
79
90
  )
80
91
  ] = True
81
92
 
93
+ event_handlers: Annotated[
94
+ list[event_handler_base.EventHandler],
95
+ (
96
+ 'User handler for the environment events.'
97
+ )
98
+ ] = []
99
+
82
100
  outage_grace_period: Annotated[
83
101
  float,
84
102
  (
@@ -97,13 +115,15 @@ class BaseEnvironment(interface.Environment):
97
115
  )
98
116
  ] = 10.0
99
117
 
100
- stats_report_interval: Annotated[
118
+ housekeep_interval: Annotated[
101
119
  float,
102
120
  (
103
- 'The interval in seconds for reporting the environment stats. '
104
- 'If 0, stats will not be reported.'
121
+ 'The interval in seconds for environment housekeeping. It recycles '
122
+ 'the dead sandboxes in the pool. This interval is the minimal time '
123
+ 'to detect outage while there is no request to obtain new sandboxes.'
124
+ 'This is applicable only when the environment enables pooling.'
105
125
  )
106
- ] = 60.0
126
+ ] = 10.0
107
127
 
108
128
  pool_operation_max_parallelism: Annotated[
109
129
  int,
@@ -132,7 +152,7 @@ class BaseEnvironment(interface.Environment):
132
152
  random if self.random_seed is None else random.Random(self.random_seed)
133
153
  )
134
154
 
135
- self._maintenance_thread = None
155
+ self._housekeep_thread = None
136
156
  self._offline_start_time = None
137
157
 
138
158
  #
@@ -145,6 +165,7 @@ class BaseEnvironment(interface.Environment):
145
165
  sandbox_id: str,
146
166
  reusable: bool,
147
167
  proactive_session_setup: bool,
168
+ keepalive_interval: float | None,
148
169
  ) -> base_sandbox.BaseSandbox:
149
170
  """Creates a sandbox with the given identifier.
150
171
 
@@ -153,6 +174,8 @@ class BaseEnvironment(interface.Environment):
153
174
  reusable: Whether the sandbox is reusable across user sessions.
154
175
  proactive_session_setup: Whether the sandbox performs session setup work
155
176
  before a user session is started.
177
+ keepalive_interval: Interval to ping the sandbox for keeping it alive.
178
+ If None, the sandbox will not be pinged.
156
179
 
157
180
  Returns:
158
181
  The created sandbox.
@@ -170,6 +193,11 @@ class BaseEnvironment(interface.Environment):
170
193
  ).hex[:7]
171
194
  return f'session-{suffix}'
172
195
 
196
+ @property
197
+ def housekeep_counter(self) -> int:
198
+ """Returns the housekeeping counter."""
199
+ return self._housekeep_counter
200
+
173
201
  #
174
202
  # Subclasses can override:
175
203
  #
@@ -189,36 +217,40 @@ class BaseEnvironment(interface.Environment):
189
217
  def _start(self) -> None:
190
218
  """Implementation of starting the environment."""
191
219
  if self.min_pool_size > 0:
192
- self._sandbox_pool = [
193
- sandbox
194
- for _, sandbox, _ in lf.concurrent_map(
195
- lambda i: self._bring_up_sandbox_with_retry(
196
- sandbox_id=str(i), shutdown_env_upon_outage=False
197
- ),
198
- range(self.min_pool_size),
199
- silence_on_errors=None,
200
- max_workers=min(
201
- self.pool_operation_max_parallelism,
202
- self.min_pool_size
203
- ),
204
- )
205
- ]
220
+ # Pre-allocate the sandbox pool before usage.
221
+ self._sandbox_pool = [None] * self.min_pool_size
222
+ for i, sandbox, _ in lf.concurrent_map(
223
+ lambda i: self._bring_up_sandbox_with_retry(
224
+ sandbox_id=str(i), shutdown_env_upon_outage=False
225
+ ),
226
+ range(self.min_pool_size),
227
+ silence_on_errors=None,
228
+ max_workers=min(
229
+ self.pool_operation_max_parallelism,
230
+ self.min_pool_size
231
+ ),
232
+ ):
233
+ self._sandbox_pool[i] = sandbox
234
+
206
235
  self._next_sandbox_id = len(self._sandbox_pool)
207
- self._maintenance_thread = threading.Thread(
208
- target=self._maintenance_loop, daemon=True
209
- )
210
- self._maintenance_count = 0
211
- self._maintenance_thread.start()
236
+
237
+ if self.enable_pooling:
238
+ self._housekeep_thread = threading.Thread(
239
+ target=self._housekeep_loop, daemon=True
240
+ )
241
+ self._housekeep_counter = 0
242
+ self._housekeep_thread.start()
212
243
 
213
244
  def _shutdown(self) -> None:
214
245
  """Implementation of shutting down the environment."""
215
- if (self._maintenance_thread is not None
216
- and threading.current_thread() is not self._maintenance_thread):
217
- self._maintenance_thread.join()
218
- self._maintenance_thread = None
246
+ if (self._housekeep_thread is not None
247
+ and threading.current_thread() is not self._housekeep_thread):
248
+ self._housekeep_thread.join()
249
+ self._housekeep_thread = None
219
250
 
220
251
  def _shutdown_sandbox(sandbox: base_sandbox.BaseSandbox) -> None:
221
- sandbox.shutdown()
252
+ if sandbox is not None:
253
+ sandbox.shutdown()
222
254
 
223
255
  if self._sandbox_pool:
224
256
  _ = list(
@@ -251,7 +283,7 @@ class BaseEnvironment(interface.Environment):
251
283
  @property
252
284
  def enable_pooling(self) -> bool:
253
285
  """Returns whether the environment enables pooling."""
254
- return self.min_pool_size > 0
286
+ return self.max_pool_size > 0
255
287
 
256
288
  @property
257
289
  def status(self) -> interface.Environment.Status:
@@ -303,18 +335,14 @@ class BaseEnvironment(interface.Environment):
303
335
  f'it is in {self._status.value!r} status.'
304
336
  )
305
337
 
338
+ starting_time = time.time()
306
339
  try:
307
- with pg.timeit('env.start') as t:
308
- self._start()
309
- self._start_time = time.time()
310
- pg.logging.info(
311
- '[%s]: %s started in %.2f seconds.',
312
- self.id, self.__class__.__name__, t.elapse
313
- )
340
+ self._start()
341
+ self._start_time = time.time()
314
342
  self._set_status(self.Status.ONLINE)
315
- self.on_start()
343
+ self.on_start(duration=time.time() - starting_time)
316
344
  except BaseException as e:
317
- self.on_start(error=e)
345
+ self.on_start(duration=time.time() - starting_time, error=e)
318
346
  self.shutdown()
319
347
  raise e
320
348
 
@@ -332,13 +360,8 @@ class BaseEnvironment(interface.Environment):
332
360
  self._set_status(self.Status.SHUTTING_DOWN)
333
361
 
334
362
  try:
335
- with pg.timeit('env.shutdown') as t:
336
- self._shutdown()
363
+ self._shutdown()
337
364
  self.on_shutdown()
338
- pg.logging.info(
339
- '[%s]: %s shutdown in %.2f seconds.',
340
- self.id, self.__class__.__name__, t.elapse
341
- )
342
365
  except BaseException as e: # pylint: disable=broad-except
343
366
  self.on_shutdown(error=e)
344
367
  raise e
@@ -410,6 +433,7 @@ class BaseEnvironment(interface.Environment):
410
433
  sandbox_id=sandbox_id,
411
434
  reusable=self.enable_pooling,
412
435
  proactive_session_setup=self.proactive_session_setup,
436
+ keepalive_interval=self.sandbox_keepalive_interval,
413
437
  )
414
438
  for handler in self.event_handlers:
415
439
  sandbox.add_event_handler(handler)
@@ -474,20 +498,10 @@ class BaseEnvironment(interface.Environment):
474
498
  # Environment maintenance loop.
475
499
  #
476
500
 
477
- def _maintenance_loop(self) -> None:
478
- """Maintains the server pool."""
479
- pg.logging.info(
480
- '[%s]: %s maintenance thread started.', self.id, self.__class__.__name__
481
- )
482
- stats_report_time = time.time()
501
+ def _housekeep_loop(self) -> None:
502
+ """Housekeeping loop for the environment."""
483
503
  while self._status not in (self.Status.SHUTTING_DOWN, self.Status.OFFLINE):
484
- if time.time() - stats_report_time > self.stats_report_interval:
485
- pg.logging.info(
486
- '[%s] %s stats: %s.',
487
- self.id, self.__class__.__name__, self.stats()
488
- )
489
- stats_report_time = time.time()
490
-
504
+ housekeep_start_time = time.time()
491
505
  dead_pool_indices = [
492
506
  i for i, s in enumerate(self._sandbox_pool)
493
507
  if s.status == interface.Sandbox.Status.OFFLINE
@@ -496,10 +510,18 @@ class BaseEnvironment(interface.Environment):
496
510
  dead_pool_indices
497
511
  ):
498
512
  self.shutdown()
499
- self._maintenance_count += 1
513
+ self._housekeep_counter += 1
514
+ self.on_housekeep(
515
+ time.time() - housekeep_start_time,
516
+ interface.EnvironmentOutageError(
517
+ environment=self,
518
+ offline_duration=self.offline_duration
519
+ )
520
+ )
500
521
  break
501
- self._maintenance_count += 1
502
- time.sleep(1)
522
+ self._housekeep_counter += 1
523
+ self.on_housekeep(time.time() - housekeep_start_time)
524
+ time.sleep(self.housekeep_interval)
503
525
 
504
526
  def _replace_dead_sandboxes(self, dead_pool_indices: list[int]) -> bool:
505
527
  """Replaces a dead sandbox with a new one.
@@ -526,6 +548,8 @@ class BaseEnvironment(interface.Environment):
526
548
  str(i), shutdown_env_upon_outage=False
527
549
  )
528
550
 
551
+ # TODO(daiyip): Consider to loose the condition to allow some dead
552
+ # sandboxes to be replaced successfully.
529
553
  return not any([
530
554
  error for _, _, error in lf.concurrent_map(
531
555
  _replace, dead_pool_indices,
@@ -540,12 +564,26 @@ class BaseEnvironment(interface.Environment):
540
564
  # Event handlers subclasses can override.
541
565
  #
542
566
 
543
- def on_start(self, error: BaseException | None = None) -> None:
567
+ def on_start(
568
+ self,
569
+ duration: float, error: BaseException | None = None
570
+ ) -> None:
544
571
  """Called when the environment is started."""
545
572
  for handler in self.event_handlers:
546
- handler.on_environment_start(self, error)
573
+ handler.on_environment_start(self, duration, error)
574
+
575
+ def on_housekeep(
576
+ self,
577
+ duration: float,
578
+ error: BaseException | None = None
579
+ ) -> None:
580
+ """Called when the environment finishes a round of housekeeping."""
581
+ housekeep_counter = self.housekeep_counter
582
+ for handler in self.event_handlers:
583
+ handler.on_environment_housekeep(self, housekeep_counter, duration, error)
547
584
 
548
585
  def on_shutdown(self, error: BaseException | None = None) -> None:
549
586
  """Called when the environment is shutdown."""
587
+ lifetime = (time.time() - self.start_time) if self.start_time else 0.0
550
588
  for handler in self.event_handlers:
551
- handler.on_environment_shutdown(self, error)
589
+ handler.on_environment_shutdown(self, lifetime, error)
@@ -23,6 +23,8 @@ the `Environment` and `Sandbox` interfaces directly.
23
23
  """
24
24
 
25
25
  import functools
26
+ import os
27
+ import time
26
28
  from typing import Annotated, Callable
27
29
 
28
30
  from langfun.env import interface
@@ -82,6 +84,7 @@ class BaseFeature(interface.Feature):
82
84
  """Called when the feature is bound."""
83
85
  super()._on_bound()
84
86
  self._sandbox = None
87
+ self._housekeep_counter = 0
85
88
 
86
89
  @functools.cached_property
87
90
  def name(self) -> str:
@@ -104,6 +107,14 @@ class BaseFeature(interface.Feature):
104
107
  assert self._sandbox is not None, 'Feature has not been set up yet.'
105
108
  return self._sandbox
106
109
 
110
+ @property
111
+ def working_dir(self) -> str | None:
112
+ """Returns the working directory of the feature."""
113
+ sandbox_workdir = self.sandbox.working_dir
114
+ if sandbox_workdir is None:
115
+ return None
116
+ return os.path.join(sandbox_workdir, self.name)
117
+
107
118
  #
108
119
  # Setup and teardown of the feature.
109
120
  #
@@ -115,13 +126,14 @@ class BaseFeature(interface.Feature):
115
126
  ) -> None:
116
127
  """Triggers an event handler."""
117
128
  error = None
129
+ start_time = time.time()
118
130
  try:
119
131
  action()
120
132
  except BaseException as e: # pylint: disable=broad-except
121
133
  error = e
122
134
  raise
123
135
  finally:
124
- event_handler(error=error)
136
+ event_handler(duration=time.time() - start_time, error=error)
125
137
 
126
138
  def setup(self, sandbox: interface.Sandbox) -> None:
127
139
  """Sets up the feature."""
@@ -146,7 +158,10 @@ class BaseFeature(interface.Feature):
146
158
 
147
159
  def housekeep(self) -> None:
148
160
  """Performs housekeeping for the feature."""
149
- self._do(self._housekeep, self.on_housekeep)
161
+ try:
162
+ self._do(self._housekeep, self.on_housekeep)
163
+ finally:
164
+ self._housekeep_counter += 1
150
165
 
151
166
  #
152
167
  # Event handlers subclasses can override.
@@ -154,51 +169,58 @@ class BaseFeature(interface.Feature):
154
169
 
155
170
  def on_setup(
156
171
  self,
172
+ duration: float,
157
173
  error: BaseException | None = None
158
174
  ) -> None:
159
175
  """Called when the feature is setup."""
160
- self.sandbox.on_feature_setup(self, error)
176
+ self.sandbox.on_feature_setup(self, duration, error)
161
177
 
162
178
  def on_teardown(
163
179
  self,
180
+ duration: float,
164
181
  error: BaseException | None = None
165
182
  ) -> None:
166
183
  """Called when the feature is teardown."""
167
- self.sandbox.on_feature_teardown(self, error)
184
+ self.sandbox.on_feature_teardown(self, duration, error)
168
185
 
169
186
  def on_housekeep(
170
187
  self,
188
+ duration: float,
171
189
  error: BaseException | None = None
172
190
  ) -> None:
173
191
  """Called when the feature has done housekeeping."""
174
- self.sandbox.on_feature_housekeep(self, error)
192
+ self.sandbox.on_feature_housekeep(
193
+ self, self._housekeep_counter, duration, error
194
+ )
175
195
 
176
196
  def on_setup_session(
177
197
  self,
198
+ duration: float,
178
199
  error: BaseException | None = None,
179
200
  ) -> None:
180
201
  """Called when the feature is setup for a user session."""
181
- self.sandbox.on_feature_setup_session(self, error)
202
+ self.sandbox.on_feature_setup_session(self, duration, error)
182
203
 
183
204
  def on_teardown_session(
184
205
  self,
206
+ duration: float,
185
207
  error: BaseException | None = None,
186
208
  ) -> None:
187
209
  """Called when the feature is teardown for a user session."""
188
- self.sandbox.on_feature_teardown_session(self, error)
210
+ self.sandbox.on_feature_teardown_session(self, duration, error)
189
211
 
190
- def on_session_activity(
212
+ def on_activity(
191
213
  self,
192
- session_id: str,
193
214
  name: str,
194
- error: BaseException | None,
215
+ duration: float,
216
+ error: BaseException | None = None,
195
217
  **kwargs
196
218
  ) -> None:
197
219
  """Called when a sandbox activity is performed."""
198
- self.sandbox.on_session_activity(
199
- session_id=session_id,
200
- name=name,
220
+ self.sandbox.on_activity(
221
+ name=f'{self.name}.{name}',
201
222
  feature=self,
202
223
  error=error,
224
+ duration=duration,
203
225
  **kwargs
204
226
  )