PyPI - langfun - Versions diffs - 0.1.2.dev202505130804__py3-none-any.whl → 0.1.2.dev202505150805__py3-none-any.whl - Mend

langfun 0.1.2.dev202505130804py3-none-any.whl → 0.1.2.dev202505150805py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of langfun might be problematic. Click here for more details.

Files changed (21) hide show

langfun/core/agentic/action.py +237 -108
langfun/core/agentic/action_eval.py +4 -6
langfun/core/agentic/action_test.py +15 -9
langfun/core/coding/python/correction.py +4 -0
langfun/core/console.py +6 -3
langfun/core/language_model.py +4 -2
langfun/core/llms/anthropic.py +4 -8
langfun/core/llms/anthropic_test.py +38 -13
langfun/core/llms/gemini.py +2 -2
langfun/core/logging.py +3 -4
langfun/core/structured/mapping.py +6 -0
langfun/core/structured/querying.py +324 -91
langfun/core/structured/querying_test.py +242 -2
langfun/core/structured/schema.py +8 -0
langfun/core/structured/schema_generation.py +1 -0
langfun/core/structured/schema_test.py +6 -3
{langfun-0.1.2.dev202505130804.dist-info → langfun-0.1.2.dev202505150805.dist-info}/METADATA +1 -1
{langfun-0.1.2.dev202505130804.dist-info → langfun-0.1.2.dev202505150805.dist-info}/RECORD +21 -21
{langfun-0.1.2.dev202505130804.dist-info → langfun-0.1.2.dev202505150805.dist-info}/WHEEL +1 -1
{langfun-0.1.2.dev202505130804.dist-info → langfun-0.1.2.dev202505150805.dist-info}/licenses/LICENSE +0 -0
{langfun-0.1.2.dev202505130804.dist-info → langfun-0.1.2.dev202505150805.dist-info}/top_level.txt +0 -0

langfun/core/agentic/action.py CHANGED Viewed

@@ -209,7 +209,7 @@ class Action(pg.Object):
   ) -> Any:
     """Executes the action."""
     if session is None:
-      session = Session()
+      session = Session(verbose=verbose)
       session.start()
       if show_progress:
@@ -220,45 +220,13 @@ class Action(pg.Object):
     else:
       self._session = None
-    with session.track_action(self) as invocation:
-      if verbose:
-        session.info('Action execution started.', keep=False, action=self)
+    with session.track_action(self):
       try:
-        result = self.call(session=session, verbose=verbose, **kwargs)
+        result = self.call(session=session, **kwargs)
         self._invocation.end(result)
-        if verbose:
-          session.info(
-              (
-                  f'Action execution succeeded in '
-                  f'{self._invocation.execution.elapse:.2f} seconds.'
-              ),
-              keep=False,
-              result=result
-          )
       except BaseException as e:
         error = pg.utils.ErrorInfo.from_exception(e)
         self._invocation.end(result=None, error=error)
-        if invocation.parent_action is session.root:
-          session.error(
-              (
-                  f'Top-level action execution failed in '
-                  f'{self._invocation.execution.elapse:.2f} seconds.'
-              ),
-              keep=True,
-              action=self,
-              error=error
-          )
-        else:
-          session.warning(
-              (
-                  f'Action execution failed in '
-                  f'{self._invocation.execution.elapse:.2f} seconds.'
-              ),
-              keep=False,
-              action=self,
-              error=error
-          )
         if self._session is not None:
           self._session.end(result=None, error=error)
         raise
@@ -477,21 +445,26 @@ class ExecutionTrace(pg.Object, pg.views.html.HtmlTreeView.Extension):
   def __getitem__(self, index: int) -> TracedItem:
     return self.items[index]
+  def merge_usage_summary(self, usage_summary: lf.UsageSummary) -> None:
+    if usage_summary.total.num_requests == 0:
+      return
+    current_invocation = self
+    while current_invocation is not None:
+      current_invocation.usage_summary.merge(usage_summary)
+      current_invocation = typing.cast(
+          ExecutionTrace,
+          current_invocation.sym_ancestor(
+              lambda x: isinstance(x, ExecutionTrace)
+          )
+      )
   def append(self, item: TracedItem) -> None:
     """Appends an item to the sequence."""
     with pg.notify_on_change(False):
       self.items.append(item)
     if isinstance(item, lf_structured.QueryInvocation):
-      current_invocation = self
-      while current_invocation is not None:
-        current_invocation.usage_summary.merge(item.usage_summary)
-        current_invocation = typing.cast(
-            ExecutionTrace,
-            current_invocation.sym_ancestor(
-                lambda x: isinstance(x, ExecutionTrace)
-            )
-        )
+      self.merge_usage_summary(item.usage_summary)
     if self._tab_control is not None:
       self._tab_control.append(self._execution_item_tab(item))
@@ -519,15 +492,46 @@ class ExecutionTrace(pg.Object, pg.views.html.HtmlTreeView.Extension):
   def execution_summary(self) -> dict[str, Any]:
     """Execution summary string."""
     return pg.Dict(
-        num_queries=len(self.queries),
-        execution_breakdown=[
-            dict(
-                action=action.action.__class__.__name__,
-                usage=action.usage_summary.total,
-                execution_time=action.execution.elapse,
-            )
-            for action in self.actions
-        ]
+        subtree=dict(
+            num_actions=len(self.all_actions),
+            num_action_failures=len([
+                a for a in self.all_actions if a.has_error
+            ]),
+            num_queries=len(self.all_queries),
+            num_oop_failures=len([
+                q for q in self.all_queries if q.has_oop_error
+            ]),
+            num_non_oop_failures=len([
+                q for q in self.all_queries
+                if q.has_error and not q.has_oop_error
+            ]),
+            total_query_time=sum(q.elapse for q in self.all_queries),
+        ),
+        current_level=dict(
+            num_actions=len(self.actions),
+            num_action_failures=len([
+                a for a in self.actions if a.has_error
+            ]),
+            num_queries=len(self.queries),
+            num_oop_failures=len([
+                q for q in self.queries if q.has_oop_error
+            ]),
+            num_non_oop_failures=len([
+                q for q in self.queries
+                if q.has_error and not q.has_oop_error
+            ]),
+            execution_breakdown=[
+                dict(
+                    action=action.action.__class__.__name__,
+                    usage=dict(
+                        total_tokens=action.usage_summary.total.total_tokens,
+                        estimated_cost=action.usage_summary.total.estimated_cost,
+                    ),
+                    execution_time=action.execution.elapse,
+                )
+                for action in self.actions
+            ]
+        )
     )
   #
@@ -894,6 +898,11 @@ class ActionInvocation(pg.Object, pg.views.html.HtmlTreeView.Extension):
     """Returns the usage summary of the action."""
     return self.execution.usage_summary
+  @property
+  def elapse(self) -> float:
+    """Returns the elapsed time of the action."""
+    return self.execution.elapse
   def start(self) -> None:
     """Starts the execution of the action."""
     self.execution.start()
@@ -1025,9 +1034,6 @@ class ActionInvocation(pg.Object, pg.views.html.HtmlTreeView.Extension):
                         self.usage_summary.to_html(  # pylint: disable=g-long-ternary
                             extra_flags=dict(as_badge=True)
                         )
-                        if (interactive
-                            or self.usage_summary.total.num_requests > 0)
-                        else None
                     ),
                 ],
                 css_classes=['execution-tab-title']
@@ -1069,12 +1075,78 @@ class RootAction(Action):
 class Session(pg.Object, pg.views.html.HtmlTreeView.Extension):
   """Session for performing an agentic task."""
-  root: ActionInvocation = ActionInvocation(RootAction())
+  root: Annotated[
+      ActionInvocation,
+      'The root action invocation of the session.'
+  ] = ActionInvocation(RootAction())
   id: Annotated[
       str | None,
       'An optional identifier for the sessin, which will be used for logging.'
   ] = None
+  verbose: Annotated[
+      bool,
+      (
+          'If True, the session will be logged with verbose action and query '
+          'activities.'
+      )
+  ] = False
+  #
+  # Shortcut methods for accessing the root action invocation.
+  #
+  @property
+  def all_queries(self) -> list[lf_structured.QueryInvocation]:
+    """Returns all queries made by the session."""
+    return self.root.all_queries
+  @property
+  def all_actions(self) -> list[ActionInvocation]:
+    """Returns all actions made by the session."""
+    return self.root.all_actions
+  @property
+  def all_logs(self) -> list[lf.logging.LogEntry]:
+    """Returns all logs made by the session."""
+    return self.root.all_logs
+  @property
+  def usage_summary(self) -> lf.UsageSummary:
+    """Returns the usage summary of the session."""
+    return self.root.usage_summary
+  @property
+  def has_started(self) -> bool:
+    """Returns True if the session has started."""
+    return self.root.execution.has_started
+  @property
+  def has_stopped(self) -> bool:
+    """Returns True if the session has stopped."""
+    return self.root.execution.has_stopped
+  @property
+  def has_error(self) -> bool:
+    """Returns True if the session has an error."""
+    return self.root.has_error
+  @property
+  def final_result(self) -> Any:
+    """Returns the final result of the session."""
+    return self.root.result
+  @property
+  def final_error(self) -> pg.utils.ErrorInfo | None:
+    """Returns the error of the session."""
+    return self.root.error
+  @property
+  def elapse(self) -> float:
+    """Returns the elapsed time of the session."""
+    return self.root.elapse
   # NOTE(daiyip): Action execution may involve multi-threading, hence current
   # action and execution are thread-local.
@@ -1118,6 +1190,20 @@ class Session(pg.Object, pg.views.html.HtmlTreeView.Extension):
       metadata: dict[str, Any] | None = None,
   ) -> None:
     """Ends the session."""
+    if error is not None:
+      self.error(
+          f'Trajectory failed in {self.elapse:.2f} seconds.',
+          error=error,
+          metadata=metadata,
+          keep=True,
+      )
+    elif self.verbose:
+      self.info(
+          f'Trajectory succeeded in {self.elapse:.2f} seconds.',
+          result=result,
+          metadata=metadata,
+          keep=False,
+      )
     self.root.end(result, error, metadata)
   def __enter__(self):
@@ -1169,8 +1255,34 @@ class Session(pg.Object, pg.views.html.HtmlTreeView.Extension):
       self._current_execution = invocation.execution
       # Start the execution of the current action.
       self._current_action.start()
+      if self.verbose:
+        self.info(
+            'Action execution started.',
+            action=invocation.action,
+            keep=False,
+        )
       yield invocation
     finally:
+      if invocation.has_error:
+        self.warning(
+            (
+                f'Action execution failed in '
+                f'{invocation.execution.elapse:.2f} seconds.'
+            ),
+            action=invocation.action,
+            error=invocation.error,
+            keep=True,
+        )
+      elif self.verbose:
+        self.info(
+            (
+                f'Action execution succeeded in '
+                f'{invocation.execution.elapse:.2f} seconds.'
+            ),
+            action=invocation.action,
+            result=invocation.result,
+            keep=False,
+        )
       self._current_execution = parent_execution
       self._current_action = parent_action
@@ -1208,18 +1320,63 @@ class Session(pg.Object, pg.views.html.HtmlTreeView.Extension):
       A list of `lf.QueryInvocation` objects, each for a single `lf.query`
       call.
     """
-    with self.track_phase(phase) as execution:
-      with lf_structured.track_queries(include_child_scopes=False) as queries:
-        try:
-          yield queries
-        finally:
-          for i, query in enumerate(queries):
-            query.rebind(
-                id=f'{execution.id}/q{len(execution.queries) + i + 1}',
-                skip_notification=False,
-                raise_on_no_change=False
-            )
-          execution.extend(queries)
+    def _query_start(invocation: lf_structured.QueryInvocation):
+      execution = self._current_execution
+      invocation.rebind(
+          id=f'{execution.id}/q{len(execution.queries) + 1}',
+          skip_notification=False, raise_on_no_change=False
+      )
+      execution.append(invocation)
+      if self.verbose:
+        self.info(
+            'Querying LLM started.',
+            lm=invocation.lm.model_id,
+            output_type=(
+                lf_structured.annotation(invocation.schema.spec)
+                if invocation.schema is not None else None
+            ),
+            keep=False,
+        )
+    def _query_end(invocation: lf_structured.QueryInvocation):
+      self._current_execution.merge_usage_summary(invocation.usage_summary)
+      if invocation.has_error:
+        self.warning(
+            (
+                f'Querying LLM failed in '
+                f'{time.time() - invocation.start_time:.2f} seconds.'
+            ),
+            lm=invocation.lm.model_id,
+            output_type=(
+                lf_structured.annotation(invocation.schema.spec)
+                if invocation.schema is not None else None
+            ),
+            error=invocation.error,
+            keep=True,
+        )
+      elif self.verbose:
+        self.info(
+            (
+                f'Querying LLM succeeded in '
+                f'{time.time() - invocation.start_time:.2f} seconds.'
+            ),
+            lm=invocation.lm.model_id,
+            output_type=(
+                lf_structured.annotation(invocation.schema.spec)
+                if invocation.schema is not None else None
+            ),
+            keep=False,
+        )
+    with self.track_phase(phase), lf_structured.track_queries(
+        include_child_scopes=False,
+        start_callabck=_query_start,
+        end_callabck=_query_end,
+    ) as queries:
+      try:
+        yield queries
+      finally:
+        pass
   #
   # Operations with activity tracking.
@@ -1272,24 +1429,14 @@ class Session(pg.Object, pg.views.html.HtmlTreeView.Extension):
       The result of the query.
     """
     with self.track_queries():
-      start_time = time.time()
-      try:
-        return lf_structured.query(
-            prompt,
-            schema=schema,
-            default=default,
-            lm=lm,
-            examples=examples,
-            **kwargs
-        )
-      except BaseException as e:
-        elapse = time.time() - start_time
-        self.warning(
-            f'Failed to query LLM ({lm.model_id}) in {elapse:.2f} seconds.',
-            error=pg.utils.ErrorInfo.from_exception(e),
-            keep=False,
-        )
-        raise
+      return lf_structured.query(
+          prompt,
+          schema=schema,
+          default=default,
+          lm=lm,
+          examples=examples,
+          **kwargs
+      )
   def concurrent_map(
       self,
@@ -1437,7 +1584,9 @@ class Session(pg.Object, pg.views.html.HtmlTreeView.Extension):
       for_action = self._current_action
     elif isinstance(for_action, Action):
       for_action = for_action.invocation
-      assert for_action is not None
+      assert for_action is not None, (
+          f'Action must be called before it can be logged: {for_action}'
+      )
     log_entry = lf.logging.log(
         level,
@@ -1522,26 +1671,6 @@ class Session(pg.Object, pg.views.html.HtmlTreeView.Extension):
         result=self.root
     )
-  @property
-  def final_result(self) -> Any:
-    """Returns the final result of the session."""
-    return self.root.result
-  @property
-  def has_started(self) -> bool:
-    """Returns whether the session has started."""
-    return self.root.execution.has_started
-  @property
-  def has_stopped(self) -> bool:
-    """Returns whether the session has stopped."""
-    return self.root.execution.has_stopped
-  @property
-  def has_error(self) -> bool:
-    """Returns whether the session has an error."""
-    return self.root.has_error
   @property
   def current_action(self) -> ActionInvocation:
     """Returns the current invocation."""

langfun/core/agentic/action_eval.py CHANGED Viewed

@@ -36,17 +36,15 @@ class ActionEval(lf.eval.v2.Evaluation):
     action = example_input.action
     # We explicitly create a session here to use a custom session ID.
-    with action_lib.Session(id=f'{self.id}#example-{example.id}') as session:
+    with action_lib.Session(
+        id=f'{self.id}#example-{example.id}', verbose=True
+    ) as session:
       # NOTE(daiyip): Setting session as metadata before action execution, so we
       # could use `Evaluation.state.in_progress_examples` to access the session
       # for status reporting from other threads.
       example.metadata['session'] = session
-      with lf.logging.use_log_level('fatal'):
-        kwargs = self.action_args.copy()
-        kwargs.update(verbose=True)
-        action(session=session, **kwargs)
+      action(session=session, **self.action_args)
     return session.final_result, dict(session=session)

langfun/core/agentic/action_test.py CHANGED Viewed

@@ -155,15 +155,19 @@ class SessionTest(unittest.TestCase):
     )
     # The root space should have one action (foo), no queries, and no logs.
-    self.assertEqual(len(list(root.actions)), 1)
-    self.assertEqual(len(list(root.queries)), 0)
-    self.assertEqual(len(list(root.logs)), 0)
+    self.assertEqual(len(root.actions), 1)
+    self.assertEqual(len(root.queries), 0)
+    self.assertEqual(len(root.logs), 0)
     # 1 query from Bar, 2 from Foo and 3 from parallel executions.
-    self.assertEqual(len(list(root.all_queries)), 6)
+    self.assertEqual(len(session.all_queries), 6)
+    self.assertEqual(len(root.all_queries), 6)
     # 2 actions: Foo and Bar.
-    self.assertEqual(len(list(root.all_actions)), 2)
+    self.assertEqual(len(session.all_actions), 2)
+    self.assertEqual(len(root.all_actions), 2)
     # 1 log from Bar and 1 from Foo.
-    self.assertEqual(len(list(root.all_logs)), 2)
+    self.assertEqual(len(session.all_logs), 2)
+    self.assertEqual(len(root.all_logs), 2)
+    self.assertIs(session.usage_summary, root.usage_summary)
     self.assertEqual(root.usage_summary.total.num_requests, 6)
     # Inspecting the top-level action (Foo)
@@ -276,7 +280,7 @@ class SessionTest(unittest.TestCase):
     foo_invocation = root.execution[0]
     self.assertIsInstance(foo_invocation, action_lib.ActionInvocation)
     self.assertTrue(foo_invocation.has_error)
-    self.assertEqual(len(foo_invocation.execution.items), 2)
+    self.assertEqual(len(foo_invocation.execution.items), 3)
   def test_succeeded_with_implicit_session(self):
     lm = fake.StaticResponse('lm response')
@@ -304,7 +308,7 @@ class SessionTest(unittest.TestCase):
     self.assertTrue(session.has_started)
     self.assertTrue(session.has_stopped)
     self.assertTrue(session.has_error)
-    self.assertIsInstance(session.root.error, pg.utils.ErrorInfo)
+    self.assertIsInstance(session.final_error, pg.utils.ErrorInfo)
     self.assertIn('Bar error', str(session.root.error))
   def test_succeeded_with_explicit_session(self):
@@ -409,7 +413,9 @@ class SessionTest(unittest.TestCase):
     self.assertTrue(session.has_stopped)
     self.assertTrue(session.has_error)
     self.assertIsInstance(session.root.error, pg.utils.ErrorInfo)
-    self.assertEqual(len(session.root.execution), 2)
+    self.assertEqual(len(session.root.execution), 3)
+    self.assertEqual(len(session.root.actions), 2)
+    self.assertEqual(len(session.root.logs), 1)
     self.assertFalse(session.root.execution[0].has_error)
     self.assertTrue(session.root.execution[1].has_error)

langfun/core/coding/python/correction.py CHANGED Viewed

@@ -37,6 +37,7 @@ def run_with_correction(
     lm: lf.LanguageModel | None = None,
     max_attempts: int = 5,
     sandbox: bool | None = None,
+    permission: pg.coding.CodePermission = pg.coding.CodePermission.ALL,
     timeout: int | None = 5,
     returns_code: bool = False,
     returns_stdout: bool = False,
@@ -58,6 +59,7 @@ def run_with_correction(
       process. If None, run in sandbox first, if the output could not be
       serialized and pass to current process, run the code again in current
       process.
+    permission: The permission to run the code.
     timeout: The timeout for running the corrected code. If None, there is no
       timeout. Applicable only when sandbox is set to True.
     returns_code: If True, the return value is a tuple of (result, final code).
@@ -88,6 +90,7 @@ def run_with_correction(
             global_vars=global_vars,
             sandbox=sandbox,
             timeout=timeout,
+            permission=permission,
             returns_stdout=returns_stdout,
             outputs_intermediate=outputs_intermediate,
         )
@@ -102,6 +105,7 @@ def run_with_correction(
               global_vars=global_vars,
               sandbox=sandbox,
               timeout=timeout,
+              permission=permission,
               outputs_intermediate=outputs_intermediate,
           )
       )

langfun/core/console.py CHANGED Viewed

@@ -52,10 +52,13 @@ def write(
   )
+_notebook = None
 try:
-  _notebook = sys.modules['IPython'].display
-except Exception:  # pylint: disable=broad-except
-  _notebook = None
+  ipython_module = sys.modules['IPython']
+  if 'IPKernelApp' in ipython_module.get_ipython().config:
+    _notebook = ipython_module.display
+except (KeyError, AttributeError):  # pylint: disable=broad-except
+  pass
 def under_notebook() -> bool:

langfun/core/language_model.py CHANGED Viewed

@@ -1453,7 +1453,8 @@ class UsageSummary(pg.Object, pg.views.HtmlTreeView.Extension):
       self._usage_badge.update(
           self._badge_text(),
           tooltip=pg.format(
-              self, verbose=False, custom_format=self._tooltip_format
+              self, verbose=False, custom_format=self._tooltip_format,
+              hide_default_values=True,
           ),
           styles=dict(color=self._badge_color()),
       )
@@ -1500,7 +1501,8 @@ class UsageSummary(pg.Object, pg.views.HtmlTreeView.Extension):
         usage_badge = pg.views.html.controls.Badge(
             self._badge_text(),
             tooltip=pg.format(
-                self, custom_format=self._tooltip_format, verbose=False
+                self, custom_format=self._tooltip_format, verbose=False,
+                hide_default_values=True,
             ),
             css_classes=['usage-summary'],
             styles=dict(color=self._badge_color()),

langfun/core/llms/anthropic.py CHANGED Viewed

@@ -509,17 +509,13 @@ class Anthropic(rest.REST):
           raise ValueError(f'Unsupported modality: {chunk!r}.')
       return chunk
-    messages = []
     if system_message := prompt.get('system_message'):
       assert isinstance(system_message, lf.SystemMessage), type(system_message)
-      messages.append(
-          system_message.as_format(
-              'anthropic', chunk_preprocessor=modality_check
-          )
-      )
-    messages.append(
+      request['system'] = system_message.text
+    messages = [
         prompt.as_format('anthropic', chunk_preprocessor=modality_check)
-    )
+    ]
     request.update(messages=messages)
     return request

langfun/core/llms/anthropic_test.py CHANGED Viewed

@@ -31,21 +31,46 @@ def mock_requests_post(url: str, json: dict[str, Any], **kwargs):
   response = requests.Response()
   response.status_code = 200
+  # Construct base text from user/assistant messages payload
+  messages_payload_text = '\n'.join(
+      c['content'][0]['text']
+      for c in json.get('messages', [])
+      if c.get('content')
+      and isinstance(c['content'], list)
+      and c['content']
+      and c['content'][0].get('type') == 'text'
+      and 'text' in c['content'][0]
+  )
+  # Check for a system prompt in the request payload
+  system_prompt_text = json.get('system')
+  processed_text_parts = []
+  if system_prompt_text:
+    processed_text_parts.append(system_prompt_text)
+  if messages_payload_text:
+    processed_text_parts.append(messages_payload_text)
+  processed_text = '\n'.join(processed_text_parts)
+  response_content_text = (
+      f'{processed_text} with temperature={json.get("temperature")}, '
+      f'top_k={json.get("top_k")}, '
+      f'top_p={json.get("top_p")}, '
+      f'max_tokens={json.get("max_tokens")}, '
+      f'stop={json.get("stop_sequences")}.'
+  )
   response._content = pg.to_json_str({
-      'content': [{
-          'type': 'text',
-          'text': (
-              '\n'.join(c['content'][0]['text'] for c in json['messages']) +
-              f' with temperature={json.get("temperature")}, '
-              f'top_k={json.get("top_k")}, '
-              f'top_p={json.get("top_p")}, '
-              f'max_tokens={json.get("max_tokens")}, '
-              f'stop={json.get("stop_sequences")}.'
-          ),
-      }],
+      'content': [{'type': 'text', 'text': response_content_text}],
       'usage': {
-          'input_tokens': 2,
-          'output_tokens': 1,
+          'input_tokens': (
+              2
+          ),  # Placeholder: adjust if tests need accurate token counts
+          'output_tokens': (
+              1
+          ),  # Placeholder: adjust if tests need accurate token counts
       },
   }).encode()
   return response

langfun 0.1.2.dev202505130804__py3-none-any.whl → 0.1.2.dev202505150805__py3-none-any.whl

Potentially problematic release.

langfun 0.1.2.dev202505130804py3-none-any.whl → 0.1.2.dev202505150805py3-none-any.whl