langfun 0.1.2.dev202509020804__py3-none-any.whl → 0.1.2.dev202511110805__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of langfun might be problematic. Click here for more details.

Files changed (133) hide show
  1. langfun/__init__.py +1 -1
  2. langfun/core/__init__.py +6 -1
  3. langfun/core/agentic/__init__.py +4 -0
  4. langfun/core/agentic/action.py +412 -103
  5. langfun/core/agentic/action_eval.py +9 -2
  6. langfun/core/agentic/action_test.py +68 -6
  7. langfun/core/async_support.py +104 -5
  8. langfun/core/async_support_test.py +23 -0
  9. langfun/core/coding/python/correction.py +19 -9
  10. langfun/core/coding/python/execution.py +14 -12
  11. langfun/core/coding/python/generation.py +21 -16
  12. langfun/core/coding/python/sandboxing.py +23 -3
  13. langfun/core/component.py +42 -3
  14. langfun/core/concurrent.py +70 -6
  15. langfun/core/concurrent_test.py +9 -2
  16. langfun/core/console.py +1 -1
  17. langfun/core/data/conversion/anthropic.py +12 -3
  18. langfun/core/data/conversion/anthropic_test.py +8 -6
  19. langfun/core/data/conversion/gemini.py +9 -2
  20. langfun/core/data/conversion/gemini_test.py +12 -9
  21. langfun/core/data/conversion/openai.py +145 -31
  22. langfun/core/data/conversion/openai_test.py +161 -17
  23. langfun/core/eval/base.py +47 -43
  24. langfun/core/eval/base_test.py +4 -4
  25. langfun/core/eval/matching.py +5 -2
  26. langfun/core/eval/patching.py +3 -3
  27. langfun/core/eval/scoring.py +4 -3
  28. langfun/core/eval/v2/__init__.py +1 -0
  29. langfun/core/eval/v2/checkpointing.py +30 -4
  30. langfun/core/eval/v2/eval_test_helper.py +1 -1
  31. langfun/core/eval/v2/evaluation.py +60 -14
  32. langfun/core/eval/v2/example.py +22 -11
  33. langfun/core/eval/v2/experiment.py +51 -8
  34. langfun/core/eval/v2/metric_values.py +31 -3
  35. langfun/core/eval/v2/metric_values_test.py +32 -0
  36. langfun/core/eval/v2/metrics.py +39 -4
  37. langfun/core/eval/v2/metrics_test.py +14 -0
  38. langfun/core/eval/v2/progress.py +30 -1
  39. langfun/core/eval/v2/progress_test.py +27 -0
  40. langfun/core/eval/v2/progress_tracking_test.py +6 -0
  41. langfun/core/eval/v2/reporting.py +90 -71
  42. langfun/core/eval/v2/reporting_test.py +20 -6
  43. langfun/core/eval/v2/runners.py +27 -7
  44. langfun/core/eval/v2/runners_test.py +3 -0
  45. langfun/core/langfunc.py +45 -130
  46. langfun/core/langfunc_test.py +6 -4
  47. langfun/core/language_model.py +151 -31
  48. langfun/core/language_model_test.py +9 -3
  49. langfun/core/llms/__init__.py +12 -1
  50. langfun/core/llms/anthropic.py +157 -2
  51. langfun/core/llms/azure_openai.py +29 -17
  52. langfun/core/llms/cache/base.py +25 -3
  53. langfun/core/llms/cache/in_memory.py +48 -7
  54. langfun/core/llms/cache/in_memory_test.py +14 -4
  55. langfun/core/llms/compositional.py +25 -1
  56. langfun/core/llms/deepseek.py +30 -2
  57. langfun/core/llms/fake.py +39 -1
  58. langfun/core/llms/fake_test.py +9 -0
  59. langfun/core/llms/gemini.py +43 -7
  60. langfun/core/llms/google_genai.py +34 -1
  61. langfun/core/llms/groq.py +28 -3
  62. langfun/core/llms/llama_cpp.py +23 -4
  63. langfun/core/llms/openai.py +93 -3
  64. langfun/core/llms/openai_compatible.py +148 -27
  65. langfun/core/llms/openai_compatible_test.py +207 -20
  66. langfun/core/llms/openai_test.py +0 -2
  67. langfun/core/llms/rest.py +16 -1
  68. langfun/core/llms/vertexai.py +59 -8
  69. langfun/core/logging.py +1 -1
  70. langfun/core/mcp/__init__.py +10 -0
  71. langfun/core/mcp/client.py +177 -0
  72. langfun/core/mcp/client_test.py +71 -0
  73. langfun/core/mcp/session.py +241 -0
  74. langfun/core/mcp/session_test.py +54 -0
  75. langfun/core/mcp/testing/simple_mcp_client.py +33 -0
  76. langfun/core/mcp/testing/simple_mcp_server.py +33 -0
  77. langfun/core/mcp/tool.py +256 -0
  78. langfun/core/mcp/tool_test.py +197 -0
  79. langfun/core/memory.py +1 -0
  80. langfun/core/message.py +160 -55
  81. langfun/core/message_test.py +65 -81
  82. langfun/core/modalities/__init__.py +8 -0
  83. langfun/core/modalities/audio.py +21 -1
  84. langfun/core/modalities/image.py +19 -1
  85. langfun/core/modalities/mime.py +62 -3
  86. langfun/core/modalities/pdf.py +19 -1
  87. langfun/core/modalities/video.py +21 -1
  88. langfun/core/modality.py +167 -29
  89. langfun/core/modality_test.py +42 -12
  90. langfun/core/natural_language.py +1 -1
  91. langfun/core/sampling.py +4 -4
  92. langfun/core/sampling_test.py +20 -4
  93. langfun/core/structured/completion.py +34 -44
  94. langfun/core/structured/completion_test.py +23 -43
  95. langfun/core/structured/description.py +54 -50
  96. langfun/core/structured/function_generation.py +29 -12
  97. langfun/core/structured/mapping.py +74 -28
  98. langfun/core/structured/parsing.py +90 -74
  99. langfun/core/structured/parsing_test.py +0 -3
  100. langfun/core/structured/querying.py +242 -156
  101. langfun/core/structured/querying_test.py +95 -64
  102. langfun/core/structured/schema.py +70 -10
  103. langfun/core/structured/schema_generation.py +33 -14
  104. langfun/core/structured/scoring.py +45 -34
  105. langfun/core/structured/tokenization.py +24 -9
  106. langfun/core/subscription.py +2 -2
  107. langfun/core/template.py +175 -50
  108. langfun/core/template_test.py +123 -17
  109. langfun/env/__init__.py +43 -0
  110. langfun/env/base_environment.py +827 -0
  111. langfun/env/base_environment_test.py +473 -0
  112. langfun/env/base_feature.py +304 -0
  113. langfun/env/base_feature_test.py +228 -0
  114. langfun/env/base_sandbox.py +842 -0
  115. langfun/env/base_sandbox_test.py +1235 -0
  116. langfun/env/event_handlers/__init__.py +14 -0
  117. langfun/env/event_handlers/chain.py +233 -0
  118. langfun/env/event_handlers/chain_test.py +253 -0
  119. langfun/env/event_handlers/event_logger.py +472 -0
  120. langfun/env/event_handlers/event_logger_test.py +304 -0
  121. langfun/env/event_handlers/metric_writer.py +726 -0
  122. langfun/env/event_handlers/metric_writer_test.py +214 -0
  123. langfun/env/interface.py +1640 -0
  124. langfun/env/interface_test.py +151 -0
  125. langfun/env/load_balancers.py +59 -0
  126. langfun/env/load_balancers_test.py +139 -0
  127. langfun/env/test_utils.py +497 -0
  128. {langfun-0.1.2.dev202509020804.dist-info → langfun-0.1.2.dev202511110805.dist-info}/METADATA +7 -3
  129. langfun-0.1.2.dev202511110805.dist-info/RECORD +200 -0
  130. langfun-0.1.2.dev202509020804.dist-info/RECORD +0 -172
  131. {langfun-0.1.2.dev202509020804.dist-info → langfun-0.1.2.dev202511110805.dist-info}/WHEEL +0 -0
  132. {langfun-0.1.2.dev202509020804.dist-info → langfun-0.1.2.dev202511110805.dist-info}/licenses/LICENSE +0 -0
  133. {langfun-0.1.2.dev202509020804.dist-info → langfun-0.1.2.dev202511110805.dist-info}/top_level.txt +0 -0
@@ -24,7 +24,14 @@ import pyglove as pg
24
24
 
25
25
 
26
26
  class ActionEval(lf.eval.v2.Evaluation):
27
- """Agent evaluation."""
27
+ """Evaluation for agentic actions.
28
+
29
+ `ActionEval` is a specialized evaluation class for executing and evaluating
30
+ agentic actions based on provided inputs. Each input example is expected to
31
+ contain an `action` attribute. The `process` method executes the action
32
+ within a dedicated `Session`, captures the final result, and returns it
33
+ along with the session details in the metadata.
34
+ """
28
35
 
29
36
  action_args: Annotated[
30
37
  dict[str, Any],
@@ -68,7 +75,7 @@ class ExampleView(pg.Object):
68
75
  class ActionEvalV1(lf_eval.Matching):
69
76
  """Base class for action evaluations.
70
77
 
71
- The input function should returns a list of pg.Dict, with `action` and
78
+ The input function should return a list of pg.Dict, with `action` and
72
79
  `groundtruth` fields.
73
80
  """
74
81
  # We override the schema and prompt to dummy values since they are not used.
@@ -34,6 +34,7 @@ class Bar(action_lib.Action):
34
34
  time.sleep(self.simulate_execution_time)
35
35
  session.query('bar', lm=lm)
36
36
  session.add_metadata(note='bar')
37
+ session.update_progress('Query completed')
37
38
  if self.simulate_action_error:
38
39
  raise ValueError('Bar error')
39
40
  return 2 + pg.contextual_value('baz', 0)
@@ -66,12 +67,18 @@ class Foo(action_lib.Action):
66
67
  time.sleep(self.simulate_execution_time[2])
67
68
  return lf_structured.query(f'subtask_{i}', lm=lm)
68
69
 
70
+ self._state = []
69
71
  for i, output, error in session.concurrent_map(
70
- _sub_task, range(3), max_workers=2, silence_on_errors=None,
72
+ _sub_task,
73
+ range(3),
74
+ max_workers=2,
75
+ ordered=True,
76
+ silence_on_errors=None,
71
77
  ):
72
78
  assert isinstance(i, int), i
73
79
  assert isinstance(output, str), output
74
80
  assert error is None, error
81
+ self._state.append(i)
75
82
  return self.x + Bar(
76
83
  simulate_action_error=self.simulate_action_error,
77
84
  simulate_execution_time=self.simulate_execution_time[3]
@@ -118,10 +125,11 @@ class SessionTest(unittest.TestCase):
118
125
  foo = Foo(1)
119
126
  self.assertIsNone(foo.session)
120
127
  self.assertIsNone(foo.invocation)
128
+ self.assertIsNone(foo.state)
121
129
  self.assertIsNone(foo.result)
122
130
  self.assertIsNone(foo.metadata)
123
131
 
124
- session = action_lib.Session(id='agent@1')
132
+ session = action_lib.Session(id='agent@1', verbose=True)
125
133
  self.assertEqual(session.id, 'agent@1')
126
134
  self.assertFalse(session.has_started)
127
135
  self.assertFalse(session.has_stopped)
@@ -130,12 +138,14 @@ class SessionTest(unittest.TestCase):
130
138
  _ = session.to_html()
131
139
 
132
140
  with session:
133
- result = foo(session, lm=lm, verbose=True)
141
+ result = foo(session, lm=lm)
134
142
 
135
143
  self.assertTrue(session.has_started)
136
144
  self.assertTrue(session.has_stopped)
137
145
  self.assertEqual(result, 3)
138
146
  self.assertIsNone(foo.session)
147
+ self.assertEqual(foo.state, [0, 1, 2])
148
+ self.assertIs(foo.invocation.state, foo.state)
139
149
  self.assertEqual(foo.result, 3)
140
150
  self.assertEqual(
141
151
  foo.metadata, dict(note='foo', subtask_0=0, subtask_1=1, subtask_2=2)
@@ -366,7 +376,7 @@ class SessionTest(unittest.TestCase):
366
376
  self.assertFalse(session.has_stopped)
367
377
 
368
378
  session.start()
369
- result = foo(session, lm=lm, verbose=True)
379
+ result = foo(session, lm=lm)
370
380
  session.end(result)
371
381
 
372
382
  self.assertTrue(session.has_started)
@@ -386,7 +396,7 @@ class SessionTest(unittest.TestCase):
386
396
  session = action_lib.Session(id='agent@1')
387
397
  with self.assertRaisesRegex(ValueError, 'Bar error'):
388
398
  with session:
389
- foo(session, lm=lm, verbose=True)
399
+ foo(session, lm=lm)
390
400
  self.assertTrue(session.has_started)
391
401
  self.assertTrue(session.has_stopped)
392
402
  self.assertTrue(session.has_error)
@@ -399,7 +409,7 @@ class SessionTest(unittest.TestCase):
399
409
  foo = Foo(1, simulate_action_error=True)
400
410
  session = action_lib.Session(id='agent@1')
401
411
  with self.assertRaisesRegex(ValueError, 'Please call `Session.start'):
402
- foo(session, lm=lm, verbose=True)
412
+ foo(session, lm=lm)
403
413
 
404
414
  def test_succeed_with_multiple_actions(self):
405
415
  lm = fake.StaticResponse('lm response')
@@ -480,6 +490,33 @@ class SessionTest(unittest.TestCase):
480
490
  ):
481
491
  foo(lm=lm, max_execution_time=1.0)
482
492
 
493
+ def test_event_handler(self):
494
+
495
+ class MyActionHandler(pg.Object, action_lib.SessionEventHandler):
496
+ def _on_bound(self):
497
+ super()._on_bound()
498
+ self.progresses = []
499
+
500
+ def on_action_progress(self, session, action, title, **kwargs):
501
+ self.progresses.append((action.id, title))
502
+
503
+ handler = MyActionHandler()
504
+ session = action_lib.Session(
505
+ id='agent@1',
506
+ event_handler=action_lib.SessionEventHandlerChain(
507
+ handlers=[handler, action_lib.SessionLogging()]
508
+ )
509
+ )
510
+ bar = Bar()
511
+ with session:
512
+ bar(session, lm=fake.StaticResponse('lm response'))
513
+ session.update_progress('Trajectory completed')
514
+
515
+ self.assertEqual(handler.progresses, [
516
+ ('agent@1:/a1', 'Query completed'),
517
+ ('agent@1:', 'Trajectory completed'),
518
+ ])
519
+
483
520
  def test_log(self):
484
521
  session = action_lib.Session()
485
522
  session.debug('hi', x=1, y=2)
@@ -493,6 +530,31 @@ class SessionTest(unittest.TestCase):
493
530
  self.assertIn('agent@', session.id)
494
531
  self.assertIsInstance(session.as_message(), lf.AIMessage)
495
532
 
533
+ def test_query_with_track_if(self):
534
+ lm = fake.StaticResponse('lm response')
535
+ session = action_lib.Session()
536
+
537
+ # Render session to trigger javascript updates to the HTML when
538
+ # operating on the session.
539
+ _ = session.to_html()
540
+ with session:
541
+ # This query will succeed.
542
+ session.query(
543
+ 'prompt1',
544
+ schema=None,
545
+ lm=lm,
546
+ track_if=lambda q: not q.has_error,
547
+ default=None)
548
+ # This query will fail during parsing.
549
+ session.query(
550
+ 'prompt2',
551
+ schema=int,
552
+ lm=lm,
553
+ track_if=lambda q: not q.has_error,
554
+ default=None)
555
+ self.assertEqual(len(session.root.queries), 1)
556
+ self.assertIsNone(session.root.queries[0].error)
557
+
496
558
 
497
559
  if __name__ == '__main__':
498
560
  unittest.main()
@@ -11,18 +11,117 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
- """Utility for async IO in Langfun."""
14
+ """Utilities for asynchronous programming in Langfun."""
15
15
 
16
16
  import asyncio
17
- from typing import Any, Callable
17
+ import contextlib
18
+ from typing import Any, Awaitable, Callable, Iterator
19
+ import anyio
18
20
  import pyglove as pg
19
21
 
20
22
 
21
23
  async def invoke_async(
22
- callable_object: Callable[..., Any], *args, **kwargs
24
+ sync_callable: Callable[..., Any], *args, **kwargs
23
25
  ) -> Any:
24
- """Invokes a callable asynchronously with `lf.context` manager enabled."""
26
+ """Invokes a sync callable asynchronously in a separate thread.
27
+
28
+ This is useful for wrapping a sync function into an async function,
29
+ allowing multiple calls of the sync function to run concurrently.
30
+ `lf.context` will be propagated to the thread that runs the sync callable.
31
+
32
+ Args:
33
+ sync_callable: The sync callable to invoke.
34
+ *args: Positional arguments to pass to the callable.
35
+ **kwargs: Keyword arguments to pass to the callable.
36
+
37
+ Returns:
38
+ An awaitable that resolves to the return value of the sync_callable.
39
+ """
25
40
  return await asyncio.to_thread(
26
41
  # Enable `lf.context` manager for async calls.
27
- pg.with_contextual_override(callable_object), *args, **kwargs
42
+ pg.with_contextual_override(sync_callable), *args, **kwargs
28
43
  )
44
+
45
+
46
+ def invoke_sync(
47
+ async_callable: Callable[..., Awaitable[Any]],
48
+ *args,
49
+ **kwargs
50
+ ) -> Any:
51
+ """Invokes an async callable synchronously.
52
+
53
+ This is useful for calling an async function from a sync context.
54
+ If there is an existing async event loop in current thread managed by
55
+ `lf.sync_context_manager`, it will be used for running the async callable.
56
+ Otherwise, `anyio.run` will be used to run the async callable in a new
57
+ event loop.
58
+ `lf.context` will be propagated to the async callable.
59
+
60
+ Args:
61
+ async_callable: The async callable to invoke.
62
+ *args: Positional arguments to pass to the callable.
63
+ **kwargs: Keyword arguments to pass to the callable.
64
+
65
+ Returns:
66
+ The return value of the async_callable.
67
+ """
68
+ async def _invoke():
69
+ return await async_callable(*args, **kwargs)
70
+ invoke_fn = pg.with_contextual_override(_invoke)
71
+ blocking_portal = pg.utils.thread_local_get('__blocking_portal__', None)
72
+ if blocking_portal is None:
73
+ return anyio.run(invoke_fn)
74
+ return blocking_portal.call(invoke_fn)
75
+
76
+
77
+ @contextlib.contextmanager
78
+ def sync_context_manager(
79
+ async_context_manager: contextlib.AbstractAsyncContextManager[Any]
80
+ ) -> Iterator[Any]:
81
+ """Adapts an async context manager to a sync context manager.
82
+
83
+ sync_context_manager installs a blocking portal in current thread to run the
84
+ async context manager in a blocking way. It's useful for running async code in
85
+ sync context managers, e.g. `sync_context_manager` can be nested and share the
86
+ same event loop.
87
+
88
+ Example:
89
+
90
+ ```python
91
+ @contextlib.asynccontextmanager
92
+ async def foo(x):
93
+ try:
94
+ yield x
95
+ finally:
96
+ pass
97
+
98
+ with lf.sync_context_manager(foo(x)) as x
99
+ with lf.sync_context_manager(foo(y)) as y:
100
+ ...
101
+ ```
102
+
103
+ Args:
104
+ async_context_manager: The async context manager to adapt.
105
+
106
+ Yields:
107
+ The value yielded by the async context manager.
108
+ """
109
+ blocking_portal = pg.utils.thread_local_get('__blocking_portal__', None)
110
+ portal_exit_stack = None
111
+
112
+ try:
113
+ if blocking_portal is None:
114
+ portal_exit_stack = contextlib.ExitStack()
115
+ blocking_portal = portal_exit_stack.enter_context(
116
+ anyio.from_thread.start_blocking_portal()
117
+ )
118
+ pg.utils.thread_local_set('__blocking_portal__', blocking_portal)
119
+ context_manager = blocking_portal.wrap_async_context_manager(
120
+ async_context_manager
121
+ )
122
+ with context_manager as value:
123
+ yield value
124
+ finally:
125
+ if portal_exit_stack is not None:
126
+ portal_exit_stack.close()
127
+ pg.utils.thread_local_del('__blocking_portal__')
@@ -13,6 +13,7 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import asyncio
16
+ import contextlib
16
17
  import time
17
18
  import unittest
18
19
 
@@ -34,6 +35,28 @@ class AsyncSupportTest(unittest.TestCase):
34
35
  with pg.contextual_override(z=3):
35
36
  self.assertEqual(asyncio.run(r), 6)
36
37
 
38
+ def test_invoke_sync(self):
39
+ @contextlib.asynccontextmanager
40
+ async def bar(x):
41
+ try:
42
+ yield x
43
+ finally:
44
+ pass
45
+
46
+ async def foo(x, *, y):
47
+ time.sleep(2)
48
+ return x + y + pg.contextual_value('z', 0)
49
+
50
+ with pg.contextual_override(z=3):
51
+ with async_support.sync_context_manager(bar(1)) as x:
52
+ self.assertEqual(x, 1)
53
+ with async_support.sync_context_manager(bar(2)) as y:
54
+ self.assertEqual(y, 2)
55
+ self.assertEqual(async_support.invoke_sync(foo, 1, y=2), 6)
56
+
57
+ with pg.contextual_override(z=2):
58
+ self.assertEqual(async_support.invoke_sync(foo, 1, y=2), 5)
59
+
37
60
 
38
61
  if __name__ == '__main__':
39
62
  unittest.main()
@@ -19,13 +19,23 @@ import pyglove as pg
19
19
 
20
20
 
21
21
  class CodeWithError(pg.Object):
22
- """Python code with error."""
22
+ """A structure representing Python code along with an execution error.
23
+
24
+ This is used as input to a language model for error correction, providing
25
+ the model with the code that failed and the error message it produced.
26
+ """
23
27
 
24
28
  code: str
25
29
  error: str
26
30
 
27
31
 
28
32
  class CorrectedCode(pg.Object):
33
+ """A structure containing corrected Python code.
34
+
35
+ This is used as the output schema when asking a language model to correct
36
+ code, expecting the model to return the fixed code in the `corrected_code`
37
+ field.
38
+ """
29
39
  corrected_code: str
30
40
 
31
41
 
@@ -49,7 +59,7 @@ def run_with_correction(
49
59
  code: The source code that may or may not be problematic.
50
60
  error: An optional initial error for `code` when it's problematic, usually
51
61
  caught from elsewhere when it ran. If None, code will be executed once to
52
- verify if its good and obtain a feedback error message.
62
+ verify if it's good and obtain a feedback error message.
53
63
  global_vars: A dict of str to value as the global variables that could be
54
64
  accessed within the corrected code.
55
65
  lm: Language model to be used. If not specified, it will try to use the `lm`
@@ -57,15 +67,15 @@ def run_with_correction(
57
67
  max_attempts: Max number of attempts for the correction.
58
68
  sandbox: If True, run code in sandbox; If False, run code in current
59
69
  process. If None, run in sandbox first, if the output could not be
60
- serialized and pass to current process, run the code again in current
70
+ serialized and passed to current process, run the code again in current
61
71
  process.
62
72
  permission: The permission to run the code.
63
73
  timeout: The timeout for running the corrected code. If None, there is no
64
74
  timeout. Applicable only when sandbox is set to True.
65
75
  returns_code: If True, the return value is a tuple of (result, final code).
66
76
  Otherwise the return value is the result only.
67
- returns_stdout: If True, the stdout (a str) will be returned.
68
- outputs_intermediate: If True, intermediate output will be outputted as a
77
+ returns_stdout: If True, the stdout (a string) will be returned.
78
+ outputs_intermediate: If True, intermediate output will be output as a
69
79
  dict, with the last line's value accessible by key '__result__'. Otherwise
70
80
  the value of the last line will be returned.
71
81
 
@@ -161,7 +171,7 @@ def correct(
161
171
  code: The source code that may or may not be problematic.
162
172
  error: An optional initial error for `code` when it's problematic, usually
163
173
  caught from elsewhere when it ran. If None, code will be executed once to
164
- verify if its good and obtain a feedback error message.
174
+ verify if it's good and obtain a feedback error message.
165
175
  global_vars: A dict of str to value as the global variables that could be
166
176
  accessed within the corrected code.
167
177
  lm: Language model to be used. If not specified, it will try to use the `lm`
@@ -169,7 +179,7 @@ def correct(
169
179
  max_attempts: Max number of attempts for the correction.
170
180
  sandbox: If True, run code in sandbox; If False, run code in current
171
181
  process. If None, run in sandbox first, if the output could not be
172
- serialized and pass to current process, run the code again in current
182
+ serialized and passed to current process, run the code again in current
173
183
  process.
174
184
  timeout: The timeout for running the corrected code. If None, there is no
175
185
  timeout. Applicable only when sandbox is set to True.
@@ -193,7 +203,7 @@ def correct(
193
203
 
194
204
 
195
205
  def _error_feedback_str(error: Exception) -> str:
196
- """Returns the error str for feedback."""
206
+ """Returns the error string for feedback."""
197
207
  if isinstance(error, pg.coding.CodeError):
198
208
  return pg.decolor(error.format(include_complete_code=False))
199
209
  else:
@@ -201,7 +211,7 @@ def _error_feedback_str(error: Exception) -> str:
201
211
 
202
212
 
203
213
  def _maybe_custom_validate(result: Any) -> Any:
204
- """Apply custom validation through __validate_generation__ method."""
214
+ """Applies custom validation through __validate__ method."""
205
215
  if isinstance(result, dict) and "__result__" in result:
206
216
  r = result["__result__"]
207
217
  else:
@@ -45,17 +45,17 @@ def evaluate(
45
45
  global_vars: An optional dict as the globals that could be referenced by the
46
46
  code.
47
47
  permission: Permission for the Python code to run.
48
- returns_stdout: If True, the stdout (a str) will be returned.
48
+ returns_stdout: If True, the stdout (a string) will be returned.
49
49
  outputs_intermediate: Applicable when returns_stdout is False. If True,
50
- intermediate output will be outputted as a dict, with the last line's
51
- value accessible by key '__result__' and the std output accessible by
50
+ intermediate output will be output as a dict, with the last line's
51
+ value accessible by key '__result__' and the stdout accessible by
52
52
  key '__stdout__'. Otherwise the value of the last line will be returned.
53
53
 
54
54
  Returns:
55
55
  The value of the last line of the code block. Or a dict of variable
56
56
  names of all locals to their evaluated values as the output of the code to
57
57
  run. The value for the last line can be accessed by key '__result__'. Or the
58
- stdout as a str.
58
+ stdout as a string.
59
59
  """
60
60
  return pg.coding.evaluate(
61
61
  parsing.clean(code),
@@ -85,28 +85,30 @@ def run(
85
85
 
86
86
  Args:
87
87
  code: Python code to run.
88
- global_vars: An optional dict of
88
+ global_vars: An optional dict as the globals that could be referenced by the
89
+ code.
89
90
  permission: Permission for the Python code to run.
90
- returns_stdout: If True, the stdout (a str) will be returned.
91
+ returns_stdout: If True, the stdout (a string) will be returned.
91
92
  outputs_intermediate: Applicable when returns_stdout is False. If True,
92
- intermediate output will be outputted as a dict, with the last line's
93
- value accessible by key '__result__' and the std output accessible by
93
+ intermediate output will be output as a dict, with the last line's
94
+ value accessible by key '__result__' and the stdout accessible by
94
95
  key '__stdout__'. Otherwise the value of the last line will be returned.
95
96
  sandbox: If True, run code in sandbox; If False, run code in current
96
97
  process. If None, run in sandbox first, if the output could not be
97
- serialized and pass to current process, run the code again in current
98
+ serialized and passed to current process, run the code again in current
98
99
  process.
99
- timeout: Execution timeout in seconds. If None, wait the code the complete.
100
+ timeout: Execution timeout in seconds. If None, wait for the code to
101
+ complete.
100
102
 
101
103
  Returns:
102
104
  The value of the last line of the code block. Or a dict of variable
103
105
  names of all locals to their evaluated values as the output of the code to
104
106
  run. The value for the last line can be accessed by key '__result__'. Or the
105
- stdout as a str.
107
+ stdout as a string.
106
108
 
107
109
  Raises:
108
110
  TimeoutError: If the execution time exceeds the timeout.
109
- Exception: Exception that are raised from the code.
111
+ Exception: Exceptions that are raised from the code.
110
112
  """
111
113
  return pg.coding.run(
112
114
  parsing.clean(code),
@@ -22,9 +22,13 @@ import pyglove as pg
22
22
 
23
23
 
24
24
  class PythonCode(pg.Object):
25
- """Symbolic class for Python code.
25
+ """Represents a piece of Python code that can be executed.
26
26
 
27
- The value of the last expression of the source will be the returned value.
27
+ When `PythonCode` is instantiated within a `PythonCode.auto_run()` context,
28
+ it automatically executes the code and returns the result of the last
29
+ expression. Otherwise, it acts as a container for the source code, which
30
+ can be executed by calling the instance. The class also supports automatic
31
+ error correction via `lf.coding.run_with_correction` when called.
28
32
  """
29
33
 
30
34
  source: Annotated[
@@ -56,7 +60,7 @@ class PythonCode(pg.Object):
56
60
  Otherwise, auto call will be disabled.
57
61
  sandbox: If True, run code in sandbox; If False, run code in current
58
62
  process. If None, run in sandbox first, if the output could not be
59
- serialized and pass to current process, run the code again in current
63
+ serialized and passed to current process, run the code again in current
60
64
  process. Applicable when `enabled` is set to True.
61
65
  timeout: Timeout in seconds. Applicable when both `enabled` and `sandbox`
62
66
  are set to True.
@@ -98,17 +102,17 @@ class PythonCode(pg.Object):
98
102
  Args:
99
103
  sandbox: If True, run code in sandbox; If False, run code in current
100
104
  process. If None, run in sandbox first, if the output could not be
101
- serialized and pass to current process, run the code again in current
105
+ serialized and passed to current process, run the code again in current
102
106
  process.
103
107
  timeout: Timeout in seconds. If None, there is no timeout. Applicable when
104
108
  sandbox is set to True.
105
109
  global_vars: Global variables that could be accessed from the source code.
106
- returns_stdout: If True, the stdout (a str) will be returned.
110
+ returns_stdout: If True, the stdout (a string) will be returned.
107
111
  outputs_intermediate: Applicable when returns_stdout is False. If True,
108
- intermediate output will be outputted as a dict, with the last line's
109
- value accessible by key '__result__' and the std output accessible by
112
+ intermediate output will be output as a dict, with the last line's
113
+ value accessible by key '__result__' and the stdout accessible by
110
114
  key '__stdout__'. Otherwise the value of the last line will be returned.
111
- autofix: Number of attempts to auto fix the generated code. If 0, autofix
115
+ autofix: Number of attempts to autofix the generated code. If 0, autofix
112
116
  is disabled.
113
117
  autofix_lm: Language model to be used. If not specified, it will try to
114
118
  use the `lm` under `lf.context`.
@@ -117,8 +121,8 @@ class PythonCode(pg.Object):
117
121
  The value of the last expression in the source code. Or a dict of local
118
122
  variable names defined in the source code to their values if
119
123
  `outputs_intermediate` is set to True. The value for the last line can be
120
- accessed by key '__result__'. Or the stdout as a str if `returns_stdout`
121
- is set to True.
124
+ accessed by key '__result__'. Or the stdout as a string if
125
+ `returns_stdout` is set to True.
122
126
 
123
127
  Raises:
124
128
  TimeoutError: If `sandbox` is True and timeout has reached.
@@ -152,12 +156,12 @@ class PythonCode(pg.Object):
152
156
  Args:
153
157
  sandbox: If True, run code in sandbox; If False, run code in current
154
158
  process. If None, run in sandbox first, if the output could not be
155
- serialized and pass to current process, run the code again in current
159
+ serialized and passed to current process, run the code again in current
156
160
  process.
157
161
  timeout: Timeout in seconds. If None, there is no timeout. Applicable when
158
162
  sandbox is set to True.
159
163
  global_vars: Global variables that could be accessed from the source code.
160
- autofix: Number of attempts to auto fix the generated code. If 0, autofix
164
+ autofix: Number of attempts to autofix the generated code. If 0, autofix
161
165
  is disabled. Auto-fix is not supported for 'json' protocol.
162
166
  autofix_lm: Language model to be used. If not specified, it will try to
163
167
  use the `lm` under `lf.context`.
@@ -182,10 +186,11 @@ class PythonCode(pg.Object):
182
186
 
183
187
 
184
188
  class PythonFunction(pg.Object):
185
- """Generated Python function via source code.
189
+ """Represents a Python function defined by source code.
186
190
 
187
- The source code will be directly passed into eval() for execution and the
188
- output of the function will be returned.
191
+ This class takes Python source code that defines a function and makes it
192
+ callable. The source code is evaluated to create a function object, which
193
+ can then be invoked like a regular Python function.
189
194
  """
190
195
 
191
196
  name: str
@@ -214,7 +219,7 @@ class PythonFunction(pg.Object):
214
219
  *args: Positional arguments that will be passed to the implementation.
215
220
  sandbox: If True, run code in sandbox; If False, run code in current
216
221
  process. If None, run in sandbox first, if the output could not be
217
- serialized and pass to current process, run the code again in current
222
+ serialized and passed to current process, run the code again in current
218
223
  process.
219
224
  timeout: Timeout in seconds. If None, there is no timeout. Applicable when
220
225
  sandbox is set to True.
@@ -23,7 +23,14 @@ import pyglove as pg
23
23
 
24
24
 
25
25
  class SandboxOutput(pg.Object):
26
- """Sandbox output."""
26
+ """A structure containing the output from a sandbox execution.
27
+
28
+ Attributes:
29
+ stdout: The standard output captured during execution.
30
+ stderr: The standard error captured during execution.
31
+ output_files: A dictionary of file names to their byte content for files
32
+ generated during execution.
33
+ """
27
34
 
28
35
  stdout: Annotated[
29
36
  str,
@@ -42,7 +49,14 @@ class SandboxOutput(pg.Object):
42
49
 
43
50
 
44
51
  class BaseSandbox(pg.Object):
45
- """Interface and partial implementation for Python sandbox."""
52
+ """Base class for Python code sandboxing.
53
+
54
+ A sandbox provides an isolated environment for executing Python code,
55
+ typically with restrictions on file system access, network calls, or other
56
+ potentially harmful operations. This base class defines the interface for
57
+ sandboxes, including methods for running code (`run`), uploading files
58
+ (`upload`), and managing the sandbox lifecycle (`setup`, `cleanup`).
59
+ """
46
60
 
47
61
  def _on_bound(self):
48
62
  super()._on_bound()
@@ -111,7 +125,13 @@ class BaseSandbox(pg.Object):
111
125
 
112
126
 
113
127
  class MultiProcessingSandbox(BaseSandbox):
114
- """Sandbox using multiprocessing."""
128
+ """A sandbox implementation using Python's `multiprocessing`.
129
+
130
+ This sandbox executes code in a separate process, providing isolation from
131
+ the main process. It uses a temporary directory for file operations,
132
+ which is cleaned up when the sandbox is closed. It relies on
133
+ `pg.coding.run` with `sandbox=True` for execution.
134
+ """
115
135
 
116
136
  def _on_bound(self):
117
137
  super()._on_bound()