PyPI - langfun - Versions diffs - 0.1.2.dev202504280818__py3-none-any.whl → 0.1.2.dev202504300804__py3-none-any.whl - Mend

langfun 0.1.2.dev202504280818py3-none-any.whl → 0.1.2.dev202504300804py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of langfun might be problematic. Click here for more details.

Files changed (17) hide show

langfun/core/agentic/action_test.py CHANGED Viewed

@@ -22,54 +22,108 @@ import langfun.core.structured as lf_structured
 import pyglove as pg
-class SessionTest(unittest.TestCase):
+class Bar(action_lib.Action):
+  simulate_action_error: bool = False
+  def call(self, session, *, lm, **kwargs):
+    assert session.current_action.action is self
+    session.info('Begin Bar')
+    session.query('bar', lm=lm)
+    session.add_metadata(note='bar')
+    if self.simulate_action_error:
+      raise ValueError('Bar error')
+    return 2
+class Foo(action_lib.Action):
+  x: int
+  simulate_action_error: bool = False
+  simulate_query_error: bool = False
+  def call(self, session, *, lm, **kwargs):
+    assert session.current_action.action is self
+    with session.track_phase('prepare'):
+      session.info('Begin Foo', x=1)
+      session.query(
+          'foo',
+          schema=int if self.simulate_query_error else None,
+          lm=lm
+      )
+    with session.track_queries():
+      self.make_additional_query(lm)
+    session.add_metadata(note='foo')
+    def _sub_task(i):
+      session.add_metadata(**{f'subtask_{i}': i})
+      return lf_structured.query(f'subtask_{i}', lm=lm)
+    for i, output, error in session.concurrent_map(
+        _sub_task, range(3), max_workers=2, silence_on_errors=None,
+    ):
+      assert isinstance(i, int), i
+      assert isinstance(output, str), output
+      assert error is None, error
+    return self.x + Bar(
+        simulate_action_error=self.simulate_action_error
+    )(session, lm=lm)
+  def make_additional_query(self, lm):
+    lf_structured.query('additional query', lm=lm)
+class ActionInvocationTest(unittest.TestCase):
+  def test_basics(self):
+    action_invocation = action_lib.ActionInvocation(
+        action=Foo(1)
+    )
+    self.assertEqual(action_invocation.id, '')
+    root = action_lib.ActionInvocation(action=action_lib.RootAction())
+    root.execution.append(action_invocation)
+    self.assertIs(action_invocation.parent_action, root)
+    self.assertEqual(action_invocation.id, '/a1')
+class ExecutionTraceTest(unittest.TestCase):
   def test_basics(self):
-    test = self
-    class Bar(action_lib.Action):
-      def call(self, session, *, lm, **kwargs):
-        test.assertIs(session.current_action.action, self)
-        session.info('Begin Bar')
-        session.query('bar', lm=lm)
-        session.add_metadata(note='bar')
-        return 2
-    class Foo(action_lib.Action):
-      x: int
-      def call(self, session, *, lm, **kwargs):
-        test.assertIs(session.current_action.action, self)
-        with session.track_phase('prepare'):
-          session.info('Begin Foo', x=1)
-          session.query('foo', lm=lm)
-        with session.track_queries():
-          self.make_additional_query(lm)
-        session.add_metadata(note='foo')
-        def _sub_task(i):
-          session.add_metadata(**{f'subtask_{i}': i})
-          return lf_structured.query(f'subtask_{i}', lm=lm)
-        for i, output, error in session.concurrent_map(
-            _sub_task, range(3), max_workers=2, silence_on_errors=None,
-        ):
-          assert isinstance(i, int), i
-          assert isinstance(output, str), output
-          assert error is None, error
-        return self.x + Bar()(session, lm=lm)
-      def make_additional_query(self, lm):
-        lf_structured.query('additional query', lm=lm)
+    execution = action_lib.ExecutionTrace()
+    self.assertEqual(execution.id, '')
+    root = action_lib.ActionInvocation(action=action_lib.RootAction())
+    action_invocation = action_lib.ActionInvocation(
+        action=Foo(1)
+    )
+    root.execution.append(action_invocation)
+    self.assertEqual(action_invocation.execution.id, '/a1')
+    root.execution.reset()
+    self.assertEqual(len(root.execution), 0)
+class SessionTest(unittest.TestCase):
+  def test_succeeded_trajectory(self):
     lm = fake.StaticResponse('lm response')
     foo = Foo(1)
-    self.assertEqual(foo(lm=lm, verbose=True), 3)
+    self.assertIsNone(foo.session)
+    self.assertIsNone(foo.result)
+    self.assertIsNone(foo.metadata)
+    session = action_lib.Session(id='agent@1')
+    self.assertEqual(session.id, 'agent@1')
+    # Render HTML view to trigger dynamic update during execution.
+    _ = session.to_html()
+    self.assertEqual(foo(session, lm=lm, verbose=True), 3)
+    self.assertIsNone(foo.session)
+    self.assertEqual(foo.result, 3)
+    self.assertEqual(
+        foo.metadata, dict(note='foo', subtask_0=0, subtask_1=1, subtask_2=2)
+    )
-    session = foo.session
-    self.assertIn('session@', session.id)
-    self.assertIsNotNone(session)
     self.assertIsInstance(session.root.action, action_lib.RootAction)
     self.assertIs(session.current_action, session.root)
@@ -78,6 +132,9 @@ class SessionTest(unittest.TestCase):
     #
     root = session.root
+    self.assertIsNone(root.parent_action)
+    self.assertEqual(root.id, 'agent@1:')
+    self.assertEqual(root.execution.id, 'agent@1:')
     self.assertEqual(len(root.execution.items), 1)
     self.assertIs(root.execution.items[0].action, foo)
@@ -104,33 +161,57 @@ class SessionTest(unittest.TestCase):
     # Inspecting the top-level action (Foo)
     foo_invocation = root.execution.items[0]
+    self.assertIs(foo_invocation.parent_action, root)
+    self.assertEqual(foo_invocation.id, 'agent@1:/a1')
+    self.assertEqual(foo_invocation.execution.id, 'agent@1:/a1')
     self.assertEqual(len(foo_invocation.execution.items), 4)
     # Prepare phase.
     prepare_phase = foo_invocation.execution.items[0]
-    self.assertIsInstance(
-        prepare_phase, action_lib.ExecutionTrace
-    )
+    self.assertIsInstance(prepare_phase, action_lib.ExecutionTrace)
+    self.assertEqual(prepare_phase.id, 'agent@1:/a1/prepare')
     self.assertEqual(len(prepare_phase.items), 2)
     self.assertTrue(prepare_phase.has_started)
     self.assertTrue(prepare_phase.has_stopped)
     self.assertEqual(prepare_phase.usage_summary.total.num_requests, 1)
+    self.assertIsInstance(prepare_phase.items[0], lf.logging.LogEntry)
+    self.assertIsInstance(prepare_phase.items[1], lf_structured.QueryInvocation)
+    self.assertEqual(prepare_phase.items[1].id, 'agent@1:/a1/prepare/q1')
     # Tracked queries.
     query_invocation = foo_invocation.execution.items[1]
     self.assertIsInstance(query_invocation, lf_structured.QueryInvocation)
+    self.assertEqual(query_invocation.id, 'agent@1:/a1/q2')
     self.assertIs(query_invocation.lm, lm)
+    self.assertEqual(
+        foo_invocation.execution.indexof(
+            query_invocation, lf_structured.QueryInvocation
+        ),
+        1
+    )
+    self.assertEqual(
+        root.execution.indexof(
+            query_invocation, lf_structured.QueryInvocation
+        ),
+        -1
+    )
     # Tracked parallel executions.
     parallel_executions = foo_invocation.execution.items[2]
+    self.assertEqual(parallel_executions.id, 'agent@1:/a1/p1')
     self.assertIsInstance(parallel_executions, action_lib.ParallelExecutions)
     self.assertEqual(len(parallel_executions), 3)
+    self.assertEqual(parallel_executions[0].id, 'agent@1:/a1/p1/b1')
+    self.assertEqual(parallel_executions[1].id, 'agent@1:/a1/p1/b2')
+    self.assertEqual(parallel_executions[2].id, 'agent@1:/a1/p1/b3')
     self.assertEqual(len(parallel_executions[0].queries), 1)
     self.assertEqual(len(parallel_executions[1].queries), 1)
     self.assertEqual(len(parallel_executions[2].queries), 1)
     # Invocation to Bar.
     bar_invocation = foo_invocation.execution.items[3]
+    self.assertIs(bar_invocation.parent_action, foo_invocation)
+    self.assertEqual(bar_invocation.id, 'agent@1:/a1/a1')
     self.assertIsInstance(bar_invocation, action_lib.ActionInvocation)
     self.assertIsInstance(bar_invocation.action, Bar)
     self.assertEqual(bar_invocation.result, 2)
@@ -144,6 +225,51 @@ class SessionTest(unittest.TestCase):
     json_str = session.to_json_str(save_ref_value=True)
     self.assertIsInstance(pg.from_json_str(json_str), action_lib.Session)
+  def test_failed_action(self):
+    lm = fake.StaticResponse('lm response')
+    foo = Foo(1, simulate_action_error=True)
+    with self.assertRaisesRegex(ValueError, 'Bar error'):
+      foo(lm=lm)
+    session = foo.session
+    self.assertIsNotNone(session)
+    self.assertIsInstance(session.root.action, action_lib.RootAction)
+    self.assertIs(session.current_action, session.root)
+    # Inspecting the root invocation.
+    root = session.root
+    self.assertRegex(root.id, 'agent@.*:')
+    self.assertTrue(root.has_error)
+    foo_invocation = root.execution.items[0]
+    self.assertIsInstance(foo_invocation, action_lib.ActionInvocation)
+    self.assertTrue(foo_invocation.has_error)
+    bar_invocation = foo_invocation.execution.items[3]
+    self.assertIsInstance(bar_invocation, action_lib.ActionInvocation)
+    self.assertTrue(bar_invocation.has_error)
+    # Save to HTML
+    self.assertIn('error', session.to_html().content)
+  def test_failed_query(self):
+    lm = fake.StaticResponse('lm response')
+    foo = Foo(1, simulate_query_error=True)
+    with self.assertRaisesRegex(lf_structured.MappingError, 'SyntaxError'):
+      foo(lm=lm)
+    session = foo.session
+    self.assertIsNotNone(session)
+    self.assertIsInstance(session.root.action, action_lib.RootAction)
+    self.assertIs(session.current_action, session.root)
+    # Inspecting the root invocation.
+    root = session.root
+    self.assertRegex(root.id, 'agent@.*:')
+    self.assertTrue(root.has_error)
+    foo_invocation = root.execution.items[0]
+    self.assertIsInstance(foo_invocation, action_lib.ActionInvocation)
+    self.assertTrue(foo_invocation.has_error)
+    self.assertEqual(len(foo_invocation.execution.items), 2)
   def test_log(self):
     session = action_lib.Session()
     session.debug('hi', x=1, y=2)
@@ -153,8 +279,8 @@ class SessionTest(unittest.TestCase):
     session.fatal('hi', x=1, y=2)
   def test_as_message(self):
-    session = action_lib.Session(id='abc')
-    self.assertEqual(session.id, 'abc')
+    session = action_lib.Session()
+    self.assertIn('agent@', session.id)
     self.assertIsInstance(session.as_message(), lf.AIMessage)

langfun/core/eval/base_test.py CHANGED Viewed

@@ -195,7 +195,7 @@ class EvaluationTest(unittest.TestCase):
             score=1.0,
             logprobs=None,
             is_cached=False,
-            usage=lf.LMSamplingUsage(387, 24, 411),
+            usage=lf.LMSamplingUsage(428, 24, 452),
             tags=['lm-response', 'lm-output', 'transformed'],
         ),
     )
@@ -234,12 +234,12 @@ class EvaluationTest(unittest.TestCase):
                 }
             ),
             usage=dict(
-                total_prompt_tokens=774,
+                total_prompt_tokens=856,
                 total_completion_tokens=25,
                 num_usages=2,
-                average_prompt_tokens=387,
+                average_prompt_tokens=428,
                 average_completion_tokens=12,
-                average_total_tokens=399,
+                average_total_tokens=440,
             ),
         ),
     )

langfun/core/eval/v2/evaluation.py CHANGED Viewed

@@ -167,6 +167,8 @@ class Evaluation(experiment_lib.Experiment):
       example.input = self.example_input_by_id(example.id)
     checkpointed = self._state.ckpt_example(example.id)
+    self._state.update(example, in_progress=True)
     with pg.timeit('evaluate') as timeit, lf.track_usages() as usage_summary:
       if checkpointed is None or checkpointed.has_error:
         if checkpointed is None:
@@ -221,7 +223,7 @@ class Evaluation(experiment_lib.Experiment):
     if example.newly_processed:
       example.end_time = time.time()
-    self._state.update(example)
+    self._state.update(example, in_progress=False)
     return example
   def _process(
@@ -501,6 +503,21 @@ class Evaluation(experiment_lib.Experiment):
           )
       )
+    def _in_progress_tab() -> pg.views.html.controls.Tab | None:
+      """Renders a tab for the in progress examples."""
+      if not self.state.in_progress_examples:
+        return None
+      return pg.views.html.controls.Tab(
+          label='In Progress',
+          content=pg.Html.element(
+              'div', [
+                  self._in_progress_view(
+                      list(self.state.in_progress_examples.values())
+                  )
+              ]
+          )
+      )
     def _metric_tab(metric: metrics_lib.Metric) -> pg.views.html.controls.Tab:
       """Renders a tab for a metric (group)."""
       return pg.views.html.controls.Tab(
@@ -571,10 +588,9 @@ class Evaluation(experiment_lib.Experiment):
               pg.views.html.controls.TabControl(
                   [
                       _definition_tab(),
-                  ] + [
-                      _metric_tab(m) for m in self.metrics
-                  ] + [
-                      _logs_tab()
+                      [_metric_tab(m) for m in self.metrics],
+                      _in_progress_tab(),
+                      _logs_tab(),
                   ],
                   selected=1,
               )
@@ -598,6 +614,27 @@ class Evaluation(experiment_lib.Experiment):
         css_classes=['eval-details'],
     )
+  def _in_progress_view(
+      self, in_progress_examples: list[example_lib.Example]
+  ) -> pg.Html:
+    """Renders a HTML view for the in-progress examples."""
+    current_time = time.time()
+    logs = [f'(Total {len(in_progress_examples)} examples in progress)']
+    for example in in_progress_examples:
+      if example.newly_processed:
+        logs.append(
+            f'Example {example.id}: In progress for '
+            f'{current_time - example.start_time:.2f} seconds.'
+        )
+      else:
+        logs.append(f'Example {example.id}: Recomputing metrics...')
+    return pg.Html.element(
+        'textarea',
+        [pg.Html.escape('\n'.join(logs))],
+        readonly=True,
+        css_classes=['logs-textarea'],
+    )
   def _html_tree_view_config(self) -> dict[str, Any]:
     return dict(
         css_classes=['eval-card'] if self.is_leaf else None
@@ -716,14 +753,27 @@ class EvaluationState:
         'Whether the example is evaluated.'
     ] = False
+    in_progress: Annotated[
+        bool,
+        (
+            'Whether the example is in progress. '
+        )
+    ] = False
     newly_processed: Annotated[
         bool,
-        'Whether the example is newly processed.'
+        (
+            'Whether the example is newly processed. '
+            'Applicable only when evaluated is True.'
+        )
     ] = False
     has_error: Annotated[
         bool,
-        'Whether the example has error.'
+        (
+            'Whether the example has error. '
+            'Applicable only when evaluated is True.'
+        )
     ] = False
   def __init__(self):
@@ -732,6 +782,7 @@ class EvaluationState:
     self._evaluation_status: dict[
         int, EvaluationState.ExampleStatus
     ] = {}
+    self._in_progress_examples: dict[int, example_lib.Example] = {}
   def load(
       self,
@@ -758,6 +809,11 @@ class EvaluationState:
     """Returns the evaluation status of the examples."""
     return self._evaluation_status
+  @property
+  def in_progress_examples(self) -> dict[int, example_lib.Example]:
+    """Returns the in-progress examples."""
+    return self._in_progress_examples
   @property
   def ckpt_examples(self) -> dict[int, example_lib.Example]:
     """Returns the unevaluated examples from checkpoints."""
@@ -773,17 +829,27 @@ class EvaluationState:
         example_id, EvaluationState.ExampleStatus()
     )
-  def update(self, example: example_lib.Example) -> None:
+  def update(self, example: example_lib.Example, in_progress: bool) -> None:
     """Updates the state with the given example."""
-    self._update_status(example)
-    # Processed examples will be removed once it's done.
-    self._ckpt_examples.pop(example.id, None)
+    self._update_status(example, in_progress)
+    if in_progress:
+      self._in_progress_examples[example.id] = example
+    else:
+      self._in_progress_examples.pop(example.id, None)
+      # Processed examples will be removed once it's done.
+      self._ckpt_examples.pop(example.id, None)
-  def _update_status(self, example: example_lib.Example) -> None:
+  def _update_status(
+      self,
+      example: example_lib.Example,
+      in_progress: bool
+  ) -> None:
     """Updates the evaluation status of the example."""
     self._evaluation_status[example.id] = (
         EvaluationState.ExampleStatus(
             evaluated=example.output != pg.MISSING_VALUE,
+            in_progress=in_progress,
             newly_processed=example.newly_processed,
             has_error=example.has_error,
         )

langfun/core/eval/v2/evaluation_test.py CHANGED Viewed

@@ -79,8 +79,10 @@ class EvaluationTest(unittest.TestCase):
     exp = eval_test_helper.TestEvaluation()
     example = exp.evaluate(Example(id=3))
     self.assertTrue(exp.state.get_status(3).evaluated)
+    self.assertFalse(exp.state.get_status(3).in_progress)
     self.assertTrue(exp.state.get_status(3).newly_processed)
     self.assertFalse(exp.state.get_status(3).has_error)
+    self.assertEqual(exp.state.in_progress_examples, {})
     self.assertTrue(example.newly_processed)
     self.assertEqual(example.input, pg.Dict(x=2, y=4, groundtruth=6))
     self.assertEqual(example.output, 6)

langfun/core/structured/__init__.py CHANGED Viewed

@@ -55,8 +55,11 @@ from langfun.core.structured.parsing import call
 from langfun.core.structured.querying import track_queries
 from langfun.core.structured.querying import QueryInvocation
+from langfun.core.structured.querying import LfQuery
 from langfun.core.structured.querying import query
 from langfun.core.structured.querying import query_and_reduce
+from langfun.core.structured.querying import query_protocol
 from langfun.core.structured.querying import query_prompt
 from langfun.core.structured.querying import query_output

langfun/core/structured/mapping.py CHANGED Viewed

@@ -340,8 +340,11 @@ class Mapping(lf.LangFunc):
   schema_title: Annotated[str, 'The section title for schema.'] = 'SCHEMA'
   protocol: Annotated[
-      schema_lib.SchemaProtocol,
-      'The protocol for representing the schema and value.',
+      str,
+      (
+          'A string representing the protocol for formatting the prompt. '
+          'Built-in Langfun protocols are: `python` and `json`.'
+      ),
   ] = 'python'
   #

langfun/core/structured/parsing_test.py CHANGED Viewed

@@ -646,7 +646,7 @@ class CallTest(unittest.TestCase):
             score=1.0,
             logprobs=None,
             is_cached=False,
-            usage=lf.LMSamplingUsage(315, 1, 316),
+            usage=lf.LMSamplingUsage(356, 1, 357),
             tags=['lm-response', 'lm-output', 'transformed']
         ),
     )

langfun 0.1.2.dev202504280818__py3-none-any.whl → 0.1.2.dev202504300804__py3-none-any.whl

Potentially problematic release.

langfun 0.1.2.dev202504280818py3-none-any.whl → 0.1.2.dev202504300804py3-none-any.whl