PyPI - langfun - Versions diffs - 0.1.2.dev202510230805__py3-none-any.whl → 0.1.2.dev202511270805__py3-none-any.whl - Mend

langfun 0.1.2.dev202510230805py3-none-any.whl → 0.1.2.dev202511270805py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of langfun might be problematic. Click here for more details.

Files changed (155) hide show

langfun/core/__init__.py +2 -0
langfun/core/agentic/__init__.py +4 -1
langfun/core/agentic/action.py +447 -29
langfun/core/agentic/action_eval.py +9 -2
langfun/core/agentic/action_test.py +149 -21
langfun/core/async_support.py +32 -3
langfun/core/coding/python/correction.py +19 -9
langfun/core/coding/python/execution.py +14 -12
langfun/core/coding/python/generation.py +21 -16
langfun/core/coding/python/sandboxing.py +23 -3
langfun/core/component.py +42 -3
langfun/core/concurrent.py +70 -6
langfun/core/concurrent_test.py +1 -0
langfun/core/console.py +1 -1
langfun/core/data/conversion/anthropic.py +12 -3
langfun/core/data/conversion/anthropic_test.py +8 -6
langfun/core/data/conversion/gemini.py +9 -2
langfun/core/data/conversion/gemini_test.py +12 -9
langfun/core/data/conversion/openai.py +145 -31
langfun/core/data/conversion/openai_test.py +161 -17
langfun/core/eval/base.py +47 -43
langfun/core/eval/base_test.py +5 -5
langfun/core/eval/matching.py +5 -2
langfun/core/eval/patching.py +3 -3
langfun/core/eval/scoring.py +4 -3
langfun/core/eval/v2/__init__.py +1 -0
langfun/core/eval/v2/checkpointing.py +64 -6
langfun/core/eval/v2/checkpointing_test.py +9 -2
langfun/core/eval/v2/eval_test_helper.py +103 -2
langfun/core/eval/v2/evaluation.py +91 -16
langfun/core/eval/v2/evaluation_test.py +9 -3
langfun/core/eval/v2/example.py +50 -40
langfun/core/eval/v2/example_test.py +16 -8
langfun/core/eval/v2/experiment.py +74 -8
langfun/core/eval/v2/experiment_test.py +19 -0
langfun/core/eval/v2/metric_values.py +31 -3
langfun/core/eval/v2/metric_values_test.py +32 -0
langfun/core/eval/v2/metrics.py +157 -44
langfun/core/eval/v2/metrics_test.py +39 -18
langfun/core/eval/v2/progress.py +30 -1
langfun/core/eval/v2/progress_test.py +27 -0
langfun/core/eval/v2/progress_tracking.py +12 -3
langfun/core/eval/v2/progress_tracking_test.py +6 -1
langfun/core/eval/v2/reporting.py +90 -71
langfun/core/eval/v2/reporting_test.py +24 -6
langfun/core/eval/v2/runners/__init__.py +30 -0
langfun/core/eval/v2/{runners.py → runners/base.py} +59 -142
langfun/core/eval/v2/runners/beam.py +341 -0
langfun/core/eval/v2/runners/beam_test.py +131 -0
langfun/core/eval/v2/runners/ckpt_monitor.py +294 -0
langfun/core/eval/v2/runners/ckpt_monitor_test.py +162 -0
langfun/core/eval/v2/runners/debug.py +40 -0
langfun/core/eval/v2/runners/debug_test.py +76 -0
langfun/core/eval/v2/runners/parallel.py +100 -0
langfun/core/eval/v2/runners/parallel_test.py +95 -0
langfun/core/eval/v2/runners/sequential.py +47 -0
langfun/core/eval/v2/runners/sequential_test.py +172 -0
langfun/core/langfunc.py +45 -130
langfun/core/langfunc_test.py +7 -5
langfun/core/language_model.py +141 -21
langfun/core/language_model_test.py +54 -3
langfun/core/llms/__init__.py +9 -1
langfun/core/llms/anthropic.py +157 -2
langfun/core/llms/azure_openai.py +29 -17
langfun/core/llms/cache/base.py +25 -3
langfun/core/llms/cache/in_memory.py +48 -7
langfun/core/llms/cache/in_memory_test.py +14 -4
langfun/core/llms/compositional.py +25 -1
langfun/core/llms/deepseek.py +30 -2
langfun/core/llms/fake.py +32 -1
langfun/core/llms/gemini.py +55 -17
langfun/core/llms/gemini_test.py +84 -0
langfun/core/llms/google_genai.py +34 -1
langfun/core/llms/groq.py +28 -3
langfun/core/llms/llama_cpp.py +23 -4
langfun/core/llms/openai.py +36 -3
langfun/core/llms/openai_compatible.py +148 -27
langfun/core/llms/openai_compatible_test.py +207 -20
langfun/core/llms/openai_test.py +0 -2
langfun/core/llms/rest.py +12 -1
langfun/core/llms/vertexai.py +58 -8
langfun/core/logging.py +1 -1
langfun/core/mcp/client.py +77 -22
langfun/core/mcp/client_test.py +8 -35
langfun/core/mcp/session.py +94 -29
langfun/core/mcp/session_test.py +54 -0
langfun/core/mcp/tool.py +151 -22
langfun/core/mcp/tool_test.py +197 -0
langfun/core/memory.py +1 -0
langfun/core/message.py +160 -55
langfun/core/message_test.py +65 -81
langfun/core/modalities/__init__.py +8 -0
langfun/core/modalities/audio.py +21 -1
langfun/core/modalities/image.py +19 -1
langfun/core/modalities/mime.py +64 -3
langfun/core/modalities/mime_test.py +11 -0
langfun/core/modalities/pdf.py +19 -1
langfun/core/modalities/video.py +21 -1
langfun/core/modality.py +167 -29
langfun/core/modality_test.py +42 -12
langfun/core/natural_language.py +1 -1
langfun/core/sampling.py +4 -4
langfun/core/sampling_test.py +20 -4
langfun/core/structured/__init__.py +2 -24
langfun/core/structured/completion.py +34 -44
langfun/core/structured/completion_test.py +23 -43
langfun/core/structured/description.py +54 -50
langfun/core/structured/function_generation.py +29 -12
langfun/core/structured/mapping.py +81 -37
langfun/core/structured/parsing.py +95 -79
langfun/core/structured/parsing_test.py +0 -3
langfun/core/structured/querying.py +215 -142
langfun/core/structured/querying_test.py +65 -29
langfun/core/structured/schema/__init__.py +49 -0
langfun/core/structured/schema/base.py +664 -0
langfun/core/structured/schema/base_test.py +531 -0
langfun/core/structured/schema/json.py +174 -0
langfun/core/structured/schema/json_test.py +121 -0
langfun/core/structured/schema/python.py +316 -0
langfun/core/structured/schema/python_test.py +410 -0
langfun/core/structured/schema_generation.py +33 -14
langfun/core/structured/scoring.py +47 -36
langfun/core/structured/tokenization.py +26 -11
langfun/core/subscription.py +2 -2
langfun/core/template.py +174 -49
langfun/core/template_test.py +123 -17
langfun/env/__init__.py +8 -2
langfun/env/base_environment.py +320 -128
langfun/env/base_environment_test.py +473 -0
langfun/env/base_feature.py +92 -15
langfun/env/base_feature_test.py +228 -0
langfun/env/base_sandbox.py +84 -361
langfun/env/base_sandbox_test.py +1235 -0
langfun/env/event_handlers/__init__.py +1 -1
langfun/env/event_handlers/chain.py +233 -0
langfun/env/event_handlers/chain_test.py +253 -0
langfun/env/event_handlers/event_logger.py +95 -98
langfun/env/event_handlers/event_logger_test.py +21 -21
langfun/env/event_handlers/metric_writer.py +225 -140
langfun/env/event_handlers/metric_writer_test.py +23 -6
langfun/env/interface.py +854 -40
langfun/env/interface_test.py +112 -2
langfun/env/load_balancers_test.py +23 -2
langfun/env/test_utils.py +126 -84
{langfun-0.1.2.dev202510230805.dist-info → langfun-0.1.2.dev202511270805.dist-info}/METADATA +1 -1
langfun-0.1.2.dev202511270805.dist-info/RECORD +215 -0
langfun/core/eval/v2/runners_test.py +0 -343
langfun/core/structured/schema.py +0 -987
langfun/core/structured/schema_test.py +0 -982
langfun/env/base_test.py +0 -1481
langfun/env/event_handlers/base.py +0 -350
langfun-0.1.2.dev202510230805.dist-info/RECORD +0 -195
{langfun-0.1.2.dev202510230805.dist-info → langfun-0.1.2.dev202511270805.dist-info}/WHEEL +0 -0
{langfun-0.1.2.dev202510230805.dist-info → langfun-0.1.2.dev202511270805.dist-info}/licenses/LICENSE +0 -0
{langfun-0.1.2.dev202510230805.dist-info → langfun-0.1.2.dev202511270805.dist-info}/top_level.txt +0 -0

langfun/core/agentic/action_eval.py CHANGED Viewed

@@ -24,7 +24,14 @@ import pyglove as pg
 class ActionEval(lf.eval.v2.Evaluation):
-  """Agent evaluation."""
+  """Evaluation for agentic actions.
+  `ActionEval` is a specialized evaluation class for executing and evaluating
+  agentic actions based on provided inputs. Each input example is expected to
+  contain an `action` attribute. The `process` method executes the action
+  within a dedicated `Session`, captures the final result, and returns it
+  along with the session details in the metadata.
+  """
   action_args: Annotated[
       dict[str, Any],
@@ -68,7 +75,7 @@ class ExampleView(pg.Object):
 class ActionEvalV1(lf_eval.Matching):
   """Base class for action evaluations.
-  The input function should returns a list of pg.Dict, with `action` and
+  The input function should return a list of pg.Dict, with `action` and
   `groundtruth` fields.
   """
   # We override the schema and prompt to dummy values since they are not used.

langfun/core/agentic/action_test.py CHANGED Viewed

@@ -52,6 +52,7 @@ class Foo(action_lib.Action):
     with session.track_phase('prepare'):
       session.info('Begin Foo', x=1)
       time.sleep(self.simulate_execution_time[0])
+      Bar()(session, lm=lm)
       session.query(
           'foo',
           schema=int if self.simulate_query_error else None,
@@ -65,6 +66,7 @@ class Foo(action_lib.Action):
     def _sub_task(i):
       session.add_metadata(**{f'subtask_{i}': i})
       time.sleep(self.simulate_execution_time[2])
+      Bar()(session, lm=lm)
       return lf_structured.query(f'subtask_{i}', lm=lm)
     self._state = []
@@ -88,6 +90,50 @@ class Foo(action_lib.Action):
     lf_structured.query('additional query', lm=lm)
+class ExecutionUnitPositionTest(unittest.TestCase):
+  def test_basics(self):
+    pos1 = action_lib.ExecutionUnit.Position(None, 0)
+    self.assertEqual(repr(pos1), 'Position(0)')
+    self.assertEqual(str(pos1), '')
+    self.assertIsNone(pos1.parent)
+    self.assertEqual(pos1.index, 0)
+    self.assertEqual(pos1.indices(), (0,))
+    self.assertEqual(pos1, (0,))
+    self.assertEqual(pos1, '')
+    self.assertEqual(pos1, action_lib.ExecutionUnit.Position(None, 0))
+    self.assertNotEqual(pos1, 1)
+    self.assertNotEqual(pos1, (1,))
+    self.assertNotEqual(pos1, action_lib.ExecutionUnit.Position(None, 1))
+    pos2 = action_lib.ExecutionUnit.Position(pos1, 0)
+    self.assertEqual(repr(pos2), 'Position(0, 0)')
+    self.assertEqual(str(pos2), '1')
+    self.assertEqual(pos2, '1')
+    self.assertEqual(pos2.parent, pos1)
+    self.assertEqual(pos2.index, 0)
+    self.assertEqual(pos2.indices(), (0, 0))
+    self.assertNotEqual(pos1, pos2)
+    self.assertLess(pos1, pos2)
+    self.assertGreater(pos2, pos1)
+    self.assertEqual(
+        hash(pos2),
+        hash(
+            action_lib.ExecutionUnit.Position(
+                action_lib.ExecutionUnit.Position(None, 0), 0
+            )
+        )
+    )
+    pos3 = action_lib.ExecutionUnit.Position(pos2, 0)
+    self.assertEqual(str(pos3), '1.1')
+    self.assertEqual(pos3, '1.1')
+    self.assertEqual(pos3.parent, pos2)
+    self.assertEqual(pos3.index, 0)
+    self.assertEqual(pos3.indices(), (0, 0, 0))
+    self.assertEqual(pos3.to_str(separator='>'), '1>1')
 class ActionInvocationTest(unittest.TestCase):
   def test_basics(self):
@@ -108,9 +154,7 @@ class ExecutionTraceTest(unittest.TestCase):
     self.assertEqual(execution.id, '')
     root = action_lib.ActionInvocation(action=action_lib.RootAction())
-    action_invocation = action_lib.ActionInvocation(
-        action=Foo(1)
-    )
+    action_invocation = action_lib.ActionInvocation(action=Foo(1))
     root.execution.append(action_invocation)
     self.assertEqual(action_invocation.execution.id, '/a1')
@@ -153,6 +197,7 @@ class SessionTest(unittest.TestCase):
     self.assertIsInstance(session.root.action, action_lib.RootAction)
     self.assertIs(session.current_action, session.root)
+    self.assertIs(session.metadata, session.root.metadata)
     #
     # Inspecting the root invocation.
@@ -175,20 +220,25 @@ class SessionTest(unittest.TestCase):
     )
     # The root space should have one action (foo), no queries, and no logs.
+    self.assertEqual(len(root.execution_units), 1)
     self.assertEqual(len(root.actions), 1)
     self.assertEqual(len(root.queries), 0)
     self.assertEqual(len(root.logs), 0)
-    # 1 query from Bar, 2 from Foo and 3 from parallel executions.
-    self.assertEqual(len(session.all_queries), 6)
-    self.assertEqual(len(root.all_queries), 6)
-    # 2 actions: Foo and Bar.
-    self.assertEqual(len(session.all_actions), 2)
-    self.assertEqual(len(root.all_actions), 2)
-    # 1 log from Bar and 1 from Foo.
-    self.assertEqual(len(session.all_logs), 2)
-    self.assertEqual(len(root.all_logs), 2)
+    # 2 query from Bar, 2 from Foo and 2 * 3 from parallel executions.
+    self.assertEqual(len(session.all_queries), 10)
+    self.assertEqual(len(root.all_queries), 10)
+    # 6 actions: Foo and 2 Bar, and 3 Bar from parallel executions.
+    self.assertEqual(len(session.all_actions), 6)
+    self.assertEqual(
+        [str(a.position) for a in session.all_actions],
+        ['1', '1.1', '1.2.1.1', '1.2.2.1', '1.2.3.1', '1.3']
+    )
+    self.assertEqual(len(root.all_actions), 6)
+    # 1 log from Bar and 1 from Foo and 3 from Bar in parallel executions.
+    self.assertEqual(len(session.all_logs), 6)
+    self.assertEqual(len(root.all_logs), 6)
     self.assertIs(session.usage_summary, root.usage_summary)
-    self.assertEqual(root.usage_summary.total.num_requests, 6)
+    self.assertEqual(root.usage_summary.total.num_requests, 10)
     # Inspecting the top-level action (Foo)
     foo_invocation = root.execution[0]
@@ -200,15 +250,19 @@ class SessionTest(unittest.TestCase):
     # Prepare phase.
     prepare_phase = foo_invocation.execution[0]
+    self.assertIsNone(prepare_phase.position)
     self.assertIsInstance(prepare_phase, action_lib.ExecutionTrace)
     self.assertEqual(prepare_phase.id, 'agent@1:/a1/prepare')
-    self.assertEqual(len(prepare_phase.items), 2)
+    self.assertEqual(len(prepare_phase.items), 3)
     self.assertTrue(prepare_phase.has_started)
     self.assertTrue(prepare_phase.has_stopped)
-    self.assertEqual(prepare_phase.usage_summary.total.num_requests, 1)
+    self.assertEqual(prepare_phase.usage_summary.total.num_requests, 2)
     self.assertIsInstance(prepare_phase.items[0], lf.logging.LogEntry)
-    self.assertIsInstance(prepare_phase.items[1], lf_structured.QueryInvocation)
-    self.assertEqual(prepare_phase.items[1].id, 'agent@1:/a1/prepare/q1')
+    self.assertIsInstance(prepare_phase.items[1], action_lib.ActionInvocation)
+    self.assertIs(prepare_phase.items[1].parent_execution_unit, foo_invocation)
+    self.assertEqual(prepare_phase.items[1].id, 'agent@1:/a1/prepare/a1')
+    self.assertIsInstance(prepare_phase.items[2], lf_structured.QueryInvocation)
+    self.assertEqual(prepare_phase.items[2].id, 'agent@1:/a1/prepare/q1')
     # Tracked queries.
     query_invocation = foo_invocation.execution[1]
@@ -230,20 +284,44 @@ class SessionTest(unittest.TestCase):
     # Tracked parallel executions.
     parallel_executions = foo_invocation.execution[2]
+    # root (0) > foo (0) > parallel executions (1)
+    self.assertEqual(parallel_executions.position, (0, 0, 1))
     self.assertEqual(parallel_executions.id, 'agent@1:/a1/p1')
     self.assertIsInstance(parallel_executions, action_lib.ParallelExecutions)
+    self.assertIs(
+        parallel_executions.all_actions[0].parent_execution_unit,
+        parallel_executions
+    )
+    self.assertIs(
+        parallel_executions.all_actions[0].parent_action,
+        foo_invocation
+    )
     self.assertEqual(len(parallel_executions), 3)
     self.assertEqual(parallel_executions[0].id, 'agent@1:/a1/p1/b1')
     self.assertEqual(parallel_executions[1].id, 'agent@1:/a1/p1/b2')
     self.assertEqual(parallel_executions[2].id, 'agent@1:/a1/p1/b3')
+    self.assertEqual(len(parallel_executions[0].execution_units), 1)
+    self.assertEqual(len(parallel_executions[1].execution_units), 1)
+    self.assertEqual(len(parallel_executions[2].execution_units), 1)
     self.assertEqual(len(parallel_executions[0].queries), 1)
+    self.assertEqual(len(parallel_executions[0].all_queries), 2)
     self.assertEqual(len(parallel_executions[1].queries), 1)
+    self.assertEqual(len(parallel_executions[1].all_queries), 2)
     self.assertEqual(len(parallel_executions[2].queries), 1)
+    self.assertEqual(len(parallel_executions[2].all_queries), 2)
+    self.assertEqual(len(parallel_executions.execution_units), 0)
+    self.assertEqual(len(parallel_executions.actions), 0)
+    self.assertEqual(len(parallel_executions.queries), 0)
+    self.assertEqual(len(parallel_executions.logs), 0)
+    self.assertEqual(len(parallel_executions.all_actions), 3)
+    self.assertEqual(len(parallel_executions.all_queries), 6)
+    self.assertEqual(len(parallel_executions.all_logs), 3)
     # Invocation to Bar.
     bar_invocation = foo_invocation.execution[3]
     self.assertIs(bar_invocation.parent_action, foo_invocation)
-    self.assertEqual(bar_invocation.id, 'agent@1:/a1/a1')
+    self.assertIs(bar_invocation.parent_execution_unit, foo_invocation)
+    self.assertEqual(bar_invocation.id, 'agent@1:/a1/a5')
     self.assertIsInstance(bar_invocation, action_lib.ActionInvocation)
     self.assertIsInstance(bar_invocation.action, Bar)
     self.assertEqual(bar_invocation.result, 2)
@@ -497,26 +575,51 @@ class SessionTest(unittest.TestCase):
         super()._on_bound()
         self.progresses = []
+      def on_session_start(self, session):
+        session.add_metadata(progresses=pg.Ref(self.progresses))
       def on_action_progress(self, session, action, title, **kwargs):
         self.progresses.append((action.id, title))
     handler = MyActionHandler()
+    self.assertIs(handler.get(MyActionHandler), handler)
+    self.assertIsNone(handler.get(action_lib.SessionLogging))
+    handler_chain = action_lib.SessionEventHandlerChain(
+        handlers=[handler, action_lib.SessionLogging()]
+    )
+    self.assertIs(handler_chain.get(MyActionHandler), handler)
+    self.assertIs(
+        handler_chain.get(action_lib.SessionLogging),
+        handler_chain.handlers[1]
+    )
     session = action_lib.Session(
         id='agent@1',
-        event_handler=action_lib.SessionEventHandlerChain(
-            handlers=[handler, action_lib.SessionLogging()]
-        )
+        event_handler=handler_chain
     )
     bar = Bar()
     with session:
       bar(session, lm=fake.StaticResponse('lm response'))
       session.update_progress('Trajectory completed')
+    self.assertIs(session.metadata['progresses'], handler.progresses)
     self.assertEqual(handler.progresses, [
         ('agent@1:/a1', 'Query completed'),
         ('agent@1:', 'Trajectory completed'),
     ])
+  def test_clone(self):
+    event_handler = action_lib.SessionLogging()
+    session = action_lib.Session(event_handler=event_handler)
+    other = session.clone()
+    self.assertIsNot(session, other)
+    self.assertIs(other.event_handler, event_handler)
+    other = session.clone(deep=True)
+    self.assertIsNot(session, other)
+    self.assertIsNot(other.event_handler, session.event_handler)
   def test_log(self):
     session = action_lib.Session()
     session.debug('hi', x=1, y=2)
@@ -530,6 +633,31 @@ class SessionTest(unittest.TestCase):
     self.assertIn('agent@', session.id)
     self.assertIsInstance(session.as_message(), lf.AIMessage)
+  def test_query_with_track_if(self):
+    lm = fake.StaticResponse('lm response')
+    session = action_lib.Session()
+    # Render session to trigger javascript updates to the HTML when
+    # operating on the session.
+    _ = session.to_html()
+    with session:
+      # This query will succeed.
+      session.query(
+          'prompt1',
+          schema=None,
+          lm=lm,
+          track_if=lambda q: not q.has_error,
+          default=None)
+      # This query will fail during parsing.
+      session.query(
+          'prompt2',
+          schema=int,
+          lm=lm,
+          track_if=lambda q: not q.has_error,
+          default=None)
+    self.assertEqual(len(session.root.queries), 1)
+    self.assertIsNone(session.root.queries[0].error)
 if __name__ == '__main__':
   unittest.main()

langfun/core/async_support.py CHANGED Viewed

@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Utility for async IO in Langfun."""
+"""Utilities for asynchronous programming in Langfun."""
 import asyncio
 import contextlib
@@ -23,7 +23,20 @@ import pyglove as pg
 async def invoke_async(
     sync_callable: Callable[..., Any], *args, **kwargs
 ) -> Any:
-  """Invokes a callable asynchronously with `lf.context` manager enabled."""
+  """Invokes a sync callable asynchronously in a separate thread.
+  This is useful for wrapping a sync function into an async function,
+  allowing multiple calls of the sync function to run concurrently.
+  `lf.context` will be propagated to the thread that runs the sync callable.
+  Args:
+    sync_callable: The sync callable to invoke.
+    *args: Positional arguments to pass to the callable.
+    **kwargs: Keyword arguments to pass to the callable.
+  Returns:
+    An awaitable that resolves to the return value of the sync_callable.
+  """
   return await asyncio.to_thread(
       # Enable `lf.context` manager for async calls.
       pg.with_contextual_override(sync_callable), *args, **kwargs
@@ -35,7 +48,23 @@ def invoke_sync(
     *args,
     **kwargs
 ) -> Any:
-  """Invokes a async callable synchronously."""
+  """Invokes an async callable synchronously.
+  This is useful for calling an async function from a sync context.
+  If there is an existing async event loop in current thread managed by
+  `lf.sync_context_manager`, it will be used for running the async callable.
+  Otherwise, `anyio.run` will be used to run the async callable in a new
+  event loop.
+  `lf.context` will be propagated to the async callable.
+  Args:
+    async_callable: The async callable to invoke.
+    *args: Positional arguments to pass to the callable.
+    **kwargs: Keyword arguments to pass to the callable.
+  Returns:
+    The return value of the async_callable.
+  """
   async def _invoke():
     return await async_callable(*args, **kwargs)
   invoke_fn = pg.with_contextual_override(_invoke)

langfun/core/coding/python/correction.py CHANGED Viewed

@@ -19,13 +19,23 @@ import pyglove as pg
 class CodeWithError(pg.Object):
-  """Python code with error."""
+  """A structure representing Python code along with an execution error.
+  This is used as input to a language model for error correction, providing
+  the model with the code that failed and the error message it produced.
+  """
   code: str
   error: str
 class CorrectedCode(pg.Object):
+  """A structure containing corrected Python code.
+  This is used as the output schema when asking a language model to correct
+  code, expecting the model to return the fixed code in the `corrected_code`
+  field.
+  """
   corrected_code: str
@@ -49,7 +59,7 @@ def run_with_correction(
     code: The source code that may or may not be problematic.
     error: An optional initial error for `code` when it's problematic, usually
       caught from elsewhere when it ran. If None, code will be executed once to
-      verify if its good and obtain a feedback error message.
+      verify if it's good and obtain a feedback error message.
     global_vars: A dict of str to value as the global variables that could be
       accessed within the corrected code.
     lm: Language model to be used. If not specified, it will try to use the `lm`
@@ -57,15 +67,15 @@ def run_with_correction(
     max_attempts: Max number of attempts for the correction.
     sandbox: If True, run code in sandbox; If False, run code in current
       process. If None, run in sandbox first, if the output could not be
-      serialized and pass to current process, run the code again in current
+      serialized and passed to current process, run the code again in current
       process.
     permission: The permission to run the code.
     timeout: The timeout for running the corrected code. If None, there is no
       timeout. Applicable only when sandbox is set to True.
     returns_code: If True, the return value is a tuple of (result, final code).
       Otherwise the return value is the result only.
-    returns_stdout: If True, the stdout (a str) will be returned.
-    outputs_intermediate: If True, intermediate output will be outputted as a
+    returns_stdout: If True, the stdout (a string) will be returned.
+    outputs_intermediate: If True, intermediate output will be output as a
       dict, with the last line's value accessible by key '__result__'. Otherwise
       the value of the last line will be returned.
@@ -161,7 +171,7 @@ def correct(
     code: The source code that may or may not be problematic.
     error: An optional initial error for `code` when it's problematic, usually
       caught from elsewhere when it ran. If None, code will be executed once to
-      verify if its good and obtain a feedback error message.
+      verify if it's good and obtain a feedback error message.
     global_vars: A dict of str to value as the global variables that could be
       accessed within the corrected code.
     lm: Language model to be used. If not specified, it will try to use the `lm`
@@ -169,7 +179,7 @@ def correct(
     max_attempts: Max number of attempts for the correction.
     sandbox: If True, run code in sandbox; If False, run code in current
       process. If None, run in sandbox first, if the output could not be
-      serialized and pass to current process, run the code again in current
+      serialized and passed to current process, run the code again in current
       process.
     timeout: The timeout for running the corrected code. If None, there is no
       timeout. Applicable only when sandbox is set to True.
@@ -193,7 +203,7 @@ def correct(
 def _error_feedback_str(error: Exception) -> str:
-  """Returns the error str for feedback."""
+  """Returns the error string for feedback."""
   if isinstance(error, pg.coding.CodeError):
     return pg.decolor(error.format(include_complete_code=False))
   else:
@@ -201,7 +211,7 @@ def _error_feedback_str(error: Exception) -> str:
 def _maybe_custom_validate(result: Any) -> Any:
-  """Apply custom validation through __validate_generation__ method."""
+  """Applies custom validation through __validate__ method."""
   if isinstance(result, dict) and "__result__" in result:
     r = result["__result__"]
   else:

langfun/core/coding/python/execution.py CHANGED Viewed

@@ -45,17 +45,17 @@ def evaluate(
     global_vars: An optional dict as the globals that could be referenced by the
       code.
     permission: Permission for the Python code to run.
-    returns_stdout: If True, the stdout (a str) will be returned.
+    returns_stdout: If True, the stdout (a string) will be returned.
     outputs_intermediate: Applicable when returns_stdout is False. If True,
-      intermediate output will be outputted as a dict, with the last line's
-      value accessible by key '__result__' and the std output accessible by
+      intermediate output will be output as a dict, with the last line's
+      value accessible by key '__result__' and the stdout accessible by
       key '__stdout__'. Otherwise the value of the last line will be returned.
   Returns:
     The value of the last line of the code block. Or a dict of variable
     names of all locals to their evaluated values as the output of the code to
     run. The value for the last line can be accessed by key '__result__'. Or the
-    stdout as a str.
+    stdout as a string.
   """
   return pg.coding.evaluate(
       parsing.clean(code),
@@ -85,28 +85,30 @@ def run(
   Args:
     code: Python code to run.
-    global_vars: An optional dict of
+    global_vars: An optional dict as the globals that could be referenced by the
+      code.
     permission: Permission for the Python code to run.
-    returns_stdout: If True, the stdout (a str) will be returned.
+    returns_stdout: If True, the stdout (a string) will be returned.
     outputs_intermediate: Applicable when returns_stdout is False. If True,
-      intermediate output will be outputted as a dict, with the last line's
-      value accessible by key '__result__' and the std output accessible by
+      intermediate output will be output as a dict, with the last line's
+      value accessible by key '__result__' and the stdout accessible by
       key '__stdout__'. Otherwise the value of the last line will be returned.
     sandbox: If True, run code in sandbox; If False, run code in current
       process. If None, run in sandbox first, if the output could not be
-      serialized and pass to current process, run the code again in current
+      serialized and passed to current process, run the code again in current
       process.
-    timeout: Execution timeout in seconds. If None, wait the code the complete.
+    timeout: Execution timeout in seconds. If None, wait for the code to
+      complete.
   Returns:
     The value of the last line of the code block. Or a dict of variable
     names of all locals to their evaluated values as the output of the code to
     run. The value for the last line can be accessed by key '__result__'. Or the
-    stdout as a str.
+    stdout as a string.
   Raises:
     TimeoutError: If the execution time exceeds the timeout.
-    Exception: Exception  that are raised from the code.
+    Exception: Exceptions that are raised from the code.
   """
   return pg.coding.run(
       parsing.clean(code),

langfun/core/coding/python/generation.py CHANGED Viewed

@@ -22,9 +22,13 @@ import pyglove as pg
 class PythonCode(pg.Object):
-  """Symbolic class for Python code.
+  """Represents a piece of Python code that can be executed.
-  The value of the last expression of the source will be the returned value.
+  When `PythonCode` is instantiated within a `PythonCode.auto_run()` context,
+  it automatically executes the code and returns the result of the last
+  expression. Otherwise, it acts as a container for the source code, which
+  can be executed by calling the instance. The class also supports automatic
+  error correction via `lf.coding.run_with_correction` when called.
   """
   source: Annotated[
@@ -56,7 +60,7 @@ class PythonCode(pg.Object):
         Otherwise, auto call will be disabled.
       sandbox: If True, run code in sandbox; If False, run code in current
         process. If None, run in sandbox first, if the output could not be
-        serialized and pass to current process, run the code again in current
+        serialized and passed to current process, run the code again in current
         process. Applicable when `enabled` is set to True.
       timeout: Timeout in seconds. Applicable when both `enabled` and `sandbox`
         are set to True.
@@ -98,17 +102,17 @@ class PythonCode(pg.Object):
     Args:
       sandbox: If True, run code in sandbox; If False, run code in current
         process. If None, run in sandbox first, if the output could not be
-        serialized and pass to current process, run the code again in current
+        serialized and passed to current process, run the code again in current
         process.
       timeout: Timeout in seconds. If None, there is no timeout. Applicable when
         sandbox is set to True.
       global_vars: Global variables that could be accessed from the source code.
-      returns_stdout: If True, the stdout (a str) will be returned.
+      returns_stdout: If True, the stdout (a string) will be returned.
       outputs_intermediate: Applicable when returns_stdout is False. If True,
-        intermediate output will be outputted as a dict, with the last line's
-        value accessible by key '__result__' and the std output accessible by
+        intermediate output will be output as a dict, with the last line's
+        value accessible by key '__result__' and the stdout accessible by
         key '__stdout__'. Otherwise the value of the last line will be returned.
-      autofix: Number of attempts to auto fix the generated code. If 0, autofix
+      autofix: Number of attempts to autofix the generated code. If 0, autofix
         is disabled.
       autofix_lm: Language model to be used. If not specified, it will try to
         use the `lm` under `lf.context`.
@@ -117,8 +121,8 @@ class PythonCode(pg.Object):
       The value of the last expression in the source code. Or a dict of local
       variable names defined in the source code to their values if
       `outputs_intermediate` is set to True. The value for the last line can be
-      accessed by key '__result__'. Or the stdout as a str if `returns_stdout`
-      is set to True.
+      accessed by key '__result__'. Or the stdout as a string if
+      `returns_stdout` is set to True.
     Raises:
       TimeoutError: If `sandbox` is True and timeout has reached.
@@ -152,12 +156,12 @@ class PythonCode(pg.Object):
     Args:
       sandbox: If True, run code in sandbox; If False, run code in current
         process. If None, run in sandbox first, if the output could not be
-        serialized and pass to current process, run the code again in current
+        serialized and passed to current process, run the code again in current
         process.
       timeout: Timeout in seconds. If None, there is no timeout. Applicable when
         sandbox is set to True.
       global_vars: Global variables that could be accessed from the source code.
-      autofix: Number of attempts to auto fix the generated code. If 0, autofix
+      autofix: Number of attempts to autofix the generated code. If 0, autofix
         is disabled. Auto-fix is not supported for 'json' protocol.
       autofix_lm: Language model to be used. If not specified, it will try to
         use the `lm` under `lf.context`.
@@ -182,10 +186,11 @@ class PythonCode(pg.Object):
 class PythonFunction(pg.Object):
-  """Generated Python function via source code.
+  """Represents a Python function defined by source code.
-  The source code will be directly passed into eval() for execution and the
-  output of the function will be returned.
+  This class takes Python source code that defines a function and makes it
+  callable. The source code is evaluated to create a function object, which
+  can then be invoked like a regular Python function.
   """
   name: str
@@ -214,7 +219,7 @@ class PythonFunction(pg.Object):
       *args: Positional arguments that will be passed to the implementation.
       sandbox: If True, run code in sandbox; If False, run code in current
         process. If None, run in sandbox first, if the output could not be
-        serialized and pass to current process, run the code again in current
+        serialized and passed to current process, run the code again in current
         process.
       timeout: Timeout in seconds. If None, there is no timeout. Applicable when
         sandbox is set to True.

langfun/core/coding/python/sandboxing.py CHANGED Viewed

@@ -23,7 +23,14 @@ import pyglove as pg
 class SandboxOutput(pg.Object):
-  """Sandbox output."""
+  """A structure containing the output from a sandbox execution.
+  Attributes:
+    stdout: The standard output captured during execution.
+    stderr: The standard error captured during execution.
+    output_files: A dictionary of file names to their byte content for files
+      generated during execution.
+  """
   stdout: Annotated[
       str,
@@ -42,7 +49,14 @@ class SandboxOutput(pg.Object):
 class BaseSandbox(pg.Object):
-  """Interface and partial implementation for Python sandbox."""
+  """Base class for Python code sandboxing.
+  A sandbox provides an isolated environment for executing Python code,
+  typically with restrictions on file system access, network calls, or other
+  potentially harmful operations. This base class defines the interface for
+  sandboxes, including methods for running code (`run`), uploading files
+  (`upload`), and managing the sandbox lifecycle (`setup`, `cleanup`).
+  """
   def _on_bound(self):
     super()._on_bound()
@@ -111,7 +125,13 @@ class BaseSandbox(pg.Object):
 class MultiProcessingSandbox(BaseSandbox):
-  """Sandbox using multiprocessing."""
+  """A sandbox implementation using Python's `multiprocessing`.
+  This sandbox executes code in a separate process, providing isolation from
+  the main process. It uses a temporary directory for file operations,
+  which is cleaned up when the sandbox is closed. It relies on
+  `pg.coding.run` with `sandbox=True` for execution.
+  """
   def _on_bound(self):
     super()._on_bound()

langfun 0.1.2.dev202510230805__py3-none-any.whl → 0.1.2.dev202511270805__py3-none-any.whl

Potentially problematic release.

langfun 0.1.2.dev202510230805py3-none-any.whl → 0.1.2.dev202511270805py3-none-any.whl