PyPI - langfun - Versions diffs - 0.1.2.dev202509120804__py3-none-any.whl → 0.1.2.dev202512150805__py3-none-any.whl - Mend

langfun 0.1.2.dev202509120804py3-none-any.whl → 0.1.2.dev202512150805py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (162) hide show

langfun/__init__.py +1 -1
langfun/core/__init__.py +7 -1
langfun/core/agentic/__init__.py +8 -1
langfun/core/agentic/action.py +740 -112
langfun/core/agentic/action_eval.py +9 -2
langfun/core/agentic/action_test.py +189 -24
langfun/core/async_support.py +104 -5
langfun/core/async_support_test.py +23 -0
langfun/core/coding/python/correction.py +19 -9
langfun/core/coding/python/execution.py +14 -12
langfun/core/coding/python/generation.py +21 -16
langfun/core/coding/python/sandboxing.py +23 -3
langfun/core/component.py +42 -3
langfun/core/concurrent.py +70 -6
langfun/core/concurrent_test.py +9 -2
langfun/core/console.py +1 -1
langfun/core/data/conversion/anthropic.py +12 -3
langfun/core/data/conversion/anthropic_test.py +8 -6
langfun/core/data/conversion/gemini.py +11 -2
langfun/core/data/conversion/gemini_test.py +48 -9
langfun/core/data/conversion/openai.py +145 -31
langfun/core/data/conversion/openai_test.py +161 -17
langfun/core/eval/base.py +48 -44
langfun/core/eval/base_test.py +5 -5
langfun/core/eval/matching.py +5 -2
langfun/core/eval/patching.py +3 -3
langfun/core/eval/scoring.py +4 -3
langfun/core/eval/v2/__init__.py +3 -0
langfun/core/eval/v2/checkpointing.py +148 -46
langfun/core/eval/v2/checkpointing_test.py +9 -2
langfun/core/eval/v2/config_saver.py +37 -0
langfun/core/eval/v2/config_saver_test.py +36 -0
langfun/core/eval/v2/eval_test_helper.py +104 -3
langfun/core/eval/v2/evaluation.py +102 -19
langfun/core/eval/v2/evaluation_test.py +9 -3
langfun/core/eval/v2/example.py +50 -40
langfun/core/eval/v2/example_test.py +16 -8
langfun/core/eval/v2/experiment.py +95 -20
langfun/core/eval/v2/experiment_test.py +19 -0
langfun/core/eval/v2/metric_values.py +31 -3
langfun/core/eval/v2/metric_values_test.py +32 -0
langfun/core/eval/v2/metrics.py +157 -44
langfun/core/eval/v2/metrics_test.py +39 -18
langfun/core/eval/v2/progress.py +31 -1
langfun/core/eval/v2/progress_test.py +27 -0
langfun/core/eval/v2/progress_tracking.py +13 -5
langfun/core/eval/v2/progress_tracking_test.py +9 -1
langfun/core/eval/v2/reporting.py +88 -71
langfun/core/eval/v2/reporting_test.py +24 -6
langfun/core/eval/v2/runners/__init__.py +30 -0
langfun/core/eval/v2/{runners.py → runners/base.py} +73 -180
langfun/core/eval/v2/runners/beam.py +354 -0
langfun/core/eval/v2/runners/beam_test.py +153 -0
langfun/core/eval/v2/runners/ckpt_monitor.py +350 -0
langfun/core/eval/v2/runners/ckpt_monitor_test.py +213 -0
langfun/core/eval/v2/runners/debug.py +40 -0
langfun/core/eval/v2/runners/debug_test.py +76 -0
langfun/core/eval/v2/runners/parallel.py +243 -0
langfun/core/eval/v2/runners/parallel_test.py +182 -0
langfun/core/eval/v2/runners/sequential.py +47 -0
langfun/core/eval/v2/runners/sequential_test.py +169 -0
langfun/core/langfunc.py +45 -130
langfun/core/langfunc_test.py +7 -5
langfun/core/language_model.py +189 -36
langfun/core/language_model_test.py +54 -3
langfun/core/llms/__init__.py +14 -1
langfun/core/llms/anthropic.py +157 -2
langfun/core/llms/azure_openai.py +29 -17
langfun/core/llms/cache/base.py +25 -3
langfun/core/llms/cache/in_memory.py +48 -7
langfun/core/llms/cache/in_memory_test.py +14 -4
langfun/core/llms/compositional.py +25 -1
langfun/core/llms/deepseek.py +30 -2
langfun/core/llms/fake.py +32 -1
langfun/core/llms/gemini.py +90 -12
langfun/core/llms/gemini_test.py +110 -0
langfun/core/llms/google_genai.py +52 -1
langfun/core/llms/groq.py +28 -3
langfun/core/llms/llama_cpp.py +23 -4
langfun/core/llms/openai.py +120 -3
langfun/core/llms/openai_compatible.py +148 -27
langfun/core/llms/openai_compatible_test.py +207 -20
langfun/core/llms/openai_test.py +0 -2
langfun/core/llms/rest.py +16 -1
langfun/core/llms/vertexai.py +78 -8
langfun/core/logging.py +1 -1
langfun/core/mcp/__init__.py +10 -0
langfun/core/mcp/client.py +177 -0
langfun/core/mcp/client_test.py +71 -0
langfun/core/mcp/session.py +241 -0
langfun/core/mcp/session_test.py +54 -0
langfun/core/mcp/testing/simple_mcp_client.py +33 -0
langfun/core/mcp/testing/simple_mcp_server.py +33 -0
langfun/core/mcp/tool.py +254 -0
langfun/core/mcp/tool_test.py +197 -0
langfun/core/memory.py +1 -0
langfun/core/message.py +160 -55
langfun/core/message_test.py +65 -81
langfun/core/modalities/__init__.py +8 -0
langfun/core/modalities/audio.py +21 -1
langfun/core/modalities/image.py +73 -3
langfun/core/modalities/image_test.py +116 -0
langfun/core/modalities/mime.py +78 -4
langfun/core/modalities/mime_test.py +59 -0
langfun/core/modalities/pdf.py +19 -1
langfun/core/modalities/video.py +21 -1
langfun/core/modality.py +167 -29
langfun/core/modality_test.py +42 -12
langfun/core/natural_language.py +1 -1
langfun/core/sampling.py +4 -4
langfun/core/sampling_test.py +20 -4
langfun/core/structured/__init__.py +2 -24
langfun/core/structured/completion.py +34 -44
langfun/core/structured/completion_test.py +23 -43
langfun/core/structured/description.py +54 -50
langfun/core/structured/function_generation.py +29 -12
langfun/core/structured/mapping.py +81 -37
langfun/core/structured/parsing.py +95 -79
langfun/core/structured/parsing_test.py +0 -3
langfun/core/structured/querying.py +230 -154
langfun/core/structured/querying_test.py +69 -33
langfun/core/structured/schema/__init__.py +49 -0
langfun/core/structured/schema/base.py +664 -0
langfun/core/structured/schema/base_test.py +531 -0
langfun/core/structured/schema/json.py +174 -0
langfun/core/structured/schema/json_test.py +121 -0
langfun/core/structured/schema/python.py +316 -0
langfun/core/structured/schema/python_test.py +410 -0
langfun/core/structured/schema_generation.py +33 -14
langfun/core/structured/scoring.py +47 -36
langfun/core/structured/tokenization.py +26 -11
langfun/core/subscription.py +2 -2
langfun/core/template.py +175 -50
langfun/core/template_test.py +123 -17
langfun/env/__init__.py +43 -0
langfun/env/base_environment.py +827 -0
langfun/env/base_environment_test.py +473 -0
langfun/env/base_feature.py +304 -0
langfun/env/base_feature_test.py +228 -0
langfun/env/base_sandbox.py +842 -0
langfun/env/base_sandbox_test.py +1235 -0
langfun/env/event_handlers/__init__.py +14 -0
langfun/env/event_handlers/chain.py +233 -0
langfun/env/event_handlers/chain_test.py +253 -0
langfun/env/event_handlers/event_logger.py +472 -0
langfun/env/event_handlers/event_logger_test.py +304 -0
langfun/env/event_handlers/metric_writer.py +726 -0
langfun/env/event_handlers/metric_writer_test.py +214 -0
langfun/env/interface.py +1640 -0
langfun/env/interface_test.py +153 -0
langfun/env/load_balancers.py +59 -0
langfun/env/load_balancers_test.py +141 -0
langfun/env/test_utils.py +507 -0
{langfun-0.1.2.dev202509120804.dist-info → langfun-0.1.2.dev202512150805.dist-info}/METADATA +7 -3
langfun-0.1.2.dev202512150805.dist-info/RECORD +217 -0
langfun/core/eval/v2/runners_test.py +0 -343
langfun/core/structured/schema.py +0 -987
langfun/core/structured/schema_test.py +0 -982
langfun-0.1.2.dev202509120804.dist-info/RECORD +0 -172
{langfun-0.1.2.dev202509120804.dist-info → langfun-0.1.2.dev202512150805.dist-info}/WHEEL +0 -0
{langfun-0.1.2.dev202509120804.dist-info → langfun-0.1.2.dev202512150805.dist-info}/licenses/LICENSE +0 -0
{langfun-0.1.2.dev202509120804.dist-info → langfun-0.1.2.dev202512150805.dist-info}/top_level.txt +0 -0

langfun/core/eval/v2/progress_tracking_test.py CHANGED Viewed

@@ -14,12 +14,14 @@
 import contextlib
 import io
 import os
+import sys
 import tempfile
 import unittest
+from langfun.core import concurrent as lf_concurrent
 from langfun.core import console as lf_console
 from langfun.core.eval.v2 import eval_test_helper
-from langfun.core.eval.v2 import progress_tracking    # pylint: disable=unused-import
+from langfun.core.eval.v2 import progress_tracking
 from langfun.core.eval.v2 import runners as runners_lib  # pylint: disable=unused-import
 import pyglove as pg
@@ -31,6 +33,7 @@ class HtmlProgressTrackerTest(unittest.TestCase):
     def display(x):
       result['view'] = x.to_html()
+    self.assertFalse(progress_tracking._HtmlProgressTracker.is_per_example())
     lf_console._notebook = pg.Dict(
         display=display
     )
@@ -44,11 +47,14 @@ class HtmlProgressTrackerTest(unittest.TestCase):
 class TqdmProgressTrackerTest(unittest.TestCase):
   def test_basic(self):
+    self.assertFalse(progress_tracking._TqdmProgressTracker.is_per_example())
     root_dir = os.path.join(tempfile.mkdtemp(), 'test_tqdm_progress_tracker')
     experiment = eval_test_helper.test_experiment()
     string_io = io.StringIO()
     with contextlib.redirect_stderr(string_io):
       _ = experiment.run(root_dir, 'new', plugins=[])
+      sys.stderr.flush()
+    lf_concurrent.ProgressBar.refresh()
     self.assertIn('All: 100%', string_io.getvalue())
   def test_with_example_ids(self):
@@ -59,6 +65,8 @@ class TqdmProgressTrackerTest(unittest.TestCase):
     string_io = io.StringIO()
     with contextlib.redirect_stderr(string_io):
       _ = experiment.run(root_dir, 'new', example_ids=[1], plugins=[])
+      sys.stderr.flush()
+    lf_concurrent.ProgressBar.refresh()
     self.assertIn('All: 100%', string_io.getvalue())

langfun/core/eval/v2/reporting.py CHANGED Viewed

@@ -32,8 +32,95 @@ _SUMMARY_FILE = 'summary.html'
 _EVALULATION_DETAIL_FILE = 'index.html'
+class ExampleHtmlGenerator(experiment_lib.Plugin):
+  """Plugin for generating HTML views for each evaluation example."""
+  def on_example_complete(
+      self, runner: Runner, experiment: Experiment, example: Example
+  ):
+    self._save_example_html(runner, experiment, example)
+  def _save_example_html(
+      self, runner: Runner, experiment: Experiment, example: Example
+  ) -> None:
+    """Saves the example in HTML format."""
+    current_run = runner.current_run
+    def _generate():
+      try:
+        with pg.timeit() as t:
+          html = example.to_html(
+              collapse_level=None,
+              enable_summary_tooltip=False,
+              extra_flags=dict(
+                  # For properly rendering the next link.
+                  num_examples=getattr(experiment, 'num_examples', None)
+              ),
+          )
+          html.save(
+              runner.current_run.output_path_for(
+                  experiment, f'{example.id}.html'
+              )
+          )
+        experiment.info(
+            f'\'{example.id}.html\' generated in {t.elapse:.2f} seconds. '
+        )
+      except BaseException as e:  # pylint: disable=broad-except
+        experiment.error(
+            f'Failed to generate \'{example.id}.html\'. '
+            f'Error: {e}, Stacktrace: \n{traceback.format_exc()}.',
+        )
+        raise e
+    def _copy():
+      src_file = current_run.input_path_for(experiment, f'{example.id}.html')
+      dest_file = current_run.output_path_for(experiment, f'{example.id}.html')
+      if src_file == dest_file:
+        return
+      if not pg.io.path_exists(src_file):
+        experiment.warning(
+            f'Skip copying \'{example.id}.html\' as '
+            f'{src_file!r} does not exist.'
+        )
+        return
+      try:
+        with pg.timeit() as t:
+          pg.io.copy(src_file, dest_file)
+        experiment.info(
+            f'\'{example.id}.html\' copied in {t.elapse:.2f} seconds.'
+        )
+      except BaseException as e:  # pylint: disable=broad-except
+        experiment.error(
+            f'Failed to copy {src_file!r} to {dest_file!r}. Error: {e}.'
+        )
+        raise e
+    generate_example_html = current_run.generate_example_html
+    if (generate_example_html == 'all'
+        or (generate_example_html == 'new' and example.newly_processed)
+        or (isinstance(generate_example_html, list)
+            and example.id in generate_example_html)):
+      op = _generate
+    else:
+      op = _copy
+    runner.background_run(op)
 class HtmlReporter(experiment_lib.Plugin):
-  """Plugin for periodically generating HTML reports for the experiment."""
+  """Plugin for periodically generating HTML reports for the experiment.
+  The `HtmlReporter` plugin generates several HTML files during an experiment
+  run:
+    - A `summary.html` at the root of the run directory, summarizing all
+      evaluations in the experiment.
+    - An `index.html` for each leaf evaluation, detailing the evaluation
+      definition, metrics, and logs.
+  These reports are updated periodically in the background during the run,
+  allowing users to monitor progress in near real-time.
+  """
   summary_interval: Annotated[
       int,
@@ -127,7 +214,6 @@ class HtmlReporter(experiment_lib.Plugin):
   def on_example_complete(
       self, runner: Runner, experiment: Experiment, example: Example
   ):
-    self._save_example_html(runner, experiment, example)
     self._maybe_update_experiment_html(runner, experiment)
     self._maybe_update_summary(runner)
@@ -197,72 +283,3 @@ class HtmlReporter(experiment_lib.Plugin):
         runner.background_run(_save)
       else:
         _save()
-  def _save_example_html(
-      self, runner: Runner, experiment: Experiment, example: Example
-  ) -> None:
-    """Saves the example in HTML format."""
-    current_run = runner.current_run
-    def _generate():
-      try:
-        with pg.timeit() as t:
-          html = example.to_html(
-              collapse_level=None,
-              enable_summary_tooltip=False,
-              extra_flags=dict(
-                  # For properly rendering the next link.
-                  num_examples=getattr(experiment, 'num_examples', None)
-              ),
-          )
-          html.save(
-              runner.current_run.output_path_for(
-                  experiment, f'{example.id}.html'
-              )
-          )
-        experiment.info(
-            f'\'{example.id}.html\' generated in {t.elapse:.2f} seconds. '
-        )
-      except BaseException as e:  # pylint: disable=broad-except
-        experiment.error(
-            f'Failed to generate \'{example.id}.html\'. '
-            f'Error: {e}, Stacktrace: \n{traceback.format_exc()}.',
-        )
-        raise e
-    def _copy():
-      src_file = current_run.input_path_for(experiment, f'{example.id}.html')
-      dest_file = current_run.output_path_for(experiment, f'{example.id}.html')
-      if src_file == dest_file:
-        return
-      if not pg.io.path_exists(src_file):
-        experiment.warning(
-            f'Skip copying \'{example.id}.html\' as '
-            f'{src_file!r} does not exist.'
-        )
-        return
-      try:
-        with pg.timeit() as t, pg.io.open(src_file, 'r') as src:
-          content = src.read()
-          with pg.io.open(dest_file, 'w') as dest:
-            dest.write(content)
-        experiment.info(
-            f'\'{example.id}.html\' copied in {t.elapse:.2f} seconds.'
-        )
-      except BaseException as e:  # pylint: disable=broad-except
-        experiment.error(
-            f'Failed to copy {src_file!r} to {dest_file!r}. Error: {e}.'
-        )
-        raise e
-    generate_example_html = current_run.generate_example_html
-    if (generate_example_html == 'all'
-        or (generate_example_html == 'new' and example.newly_processed)
-        or (isinstance(generate_example_html, list)
-            and example.id in generate_example_html)):
-      op = _generate
-    else:
-      op = _copy
-    runner.background_run(op)

langfun/core/eval/v2/reporting_test.py CHANGED Viewed

@@ -29,7 +29,16 @@ class ReportingTest(unittest.TestCase):
     experiment = eval_test_helper.test_experiment()
     checkpointer = checkpointing.BulkCheckpointer('checkpoint.jsonl')
     reporter = reporting.HtmlReporter()
-    run = experiment.run(root_dir, 'new', plugins=[checkpointer, reporter])
+    self.assertFalse(reporter.is_per_example())
+    example_html_generator = reporting.ExampleHtmlGenerator()
+    self.assertTrue(example_html_generator.is_per_example())
+    run = experiment.run(
+        root_dir,
+        'new',
+        plugins=[checkpointer, reporter, example_html_generator]
+    )
     self.assertTrue(
         pg.io.path_exists(os.path.join(run.output_root, 'summary.html'))
     )
@@ -52,8 +61,10 @@ class ReportingTest(unittest.TestCase):
     root_dir = os.path.join(tempfile.mkdtemp(), 'test_reporting2')
     experiment = eval_test_helper.test_experiment()
     run = experiment.run(
-        root_dir, 'new', plugins=[checkpointer, reporter],
-        warm_start_from=run.output_root
+        root_dir,
+        'new',
+        plugins=[checkpointer, reporter, example_html_generator],
+        warm_start_from=run.output_root,
     )
     self.assertTrue(
         pg.io.path_exists(os.path.join(run.output_root, 'summary.html'))
@@ -105,7 +116,12 @@ class ReportingTest(unittest.TestCase):
                   .test_experiment_with_example_html_generation_error())
     checkpointer = checkpointing.BulkCheckpointer('checkpoint.jsonl')
     reporter = reporting.HtmlReporter()
-    run = experiment.run(root_dir, 'new', plugins=[checkpointer, reporter])
+    example_html_generator = reporting.ExampleHtmlGenerator()
+    run = experiment.run(
+        root_dir,
+        'new',
+        plugins=[checkpointer, reporter, example_html_generator]
+    )
     self.assertTrue(
         pg.io.path_exists(os.path.join(run.output_root, 'summary.html'))
     )
@@ -132,8 +148,10 @@ class ReportingTest(unittest.TestCase):
     experiment = (eval_test_helper
                   .test_experiment_with_example_html_generation_error())
     run = experiment.run(
-        root_dir, 'new', plugins=[checkpointer, reporter],
-        warm_start_from=run.output_root
+        root_dir,
+        'new',
+        plugins=[checkpointer, reporter, example_html_generator],
+        warm_start_from=run.output_root,
     )
     self.assertTrue(
         pg.io.path_exists(os.path.join(run.output_root, 'summary.html'))

langfun/core/eval/v2/runners/__init__.py ADDED Viewed

@@ -0,0 +1,30 @@
+# Copyright 2024 The Langfun Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Langfun evaluation runners."""
+# pylint: disable=g-importing-member
+from langfun.core.eval.v2.runners.base import RunnerBase
+from langfun.core.eval.v2.runners.beam import BeamRunner
+from langfun.core.eval.v2.runners.debug import DebugRunner
+from langfun.core.eval.v2.runners.parallel import ParallelRunner
+from langfun.core.eval.v2.runners.sequential import SequentialRunner
+# pylint: enable=g-importing-member
+__all__ = [
+    'RunnerBase',
+    'BeamRunner',
+    'DebugRunner',
+    'ParallelRunner',
+    'SequentialRunner',
+]

langfun 0.1.2.dev202509120804__py3-none-any.whl → 0.1.2.dev202512150805__py3-none-any.whl

langfun 0.1.2.dev202509120804py3-none-any.whl → 0.1.2.dev202512150805py3-none-any.whl