PyPI - langfun - Versions diffs - 0.0.2.dev20240429__py3-none-any.whl → 0.1.2.dev202501140804__py3-none-any.whl - Mend

langfun 0.0.2.dev20240429py3-none-any.whl → 0.1.2.dev202501140804py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (144) hide show

langfun/__init__.py +20 -2
langfun/core/__init__.py +16 -5
langfun/core/agentic/__init__.py +30 -0
langfun/core/agentic/action.py +854 -0
langfun/core/agentic/action_eval.py +150 -0
langfun/core/agentic/action_eval_test.py +109 -0
langfun/core/agentic/action_test.py +136 -0
langfun/core/coding/python/__init__.py +5 -11
langfun/core/coding/python/correction.py +37 -21
langfun/core/coding/python/correction_test.py +29 -3
langfun/core/coding/python/execution.py +40 -216
langfun/core/coding/python/execution_test.py +29 -89
langfun/core/coding/python/generation.py +21 -11
langfun/core/coding/python/generation_test.py +2 -2
langfun/core/coding/python/parsing.py +108 -193
langfun/core/coding/python/parsing_test.py +2 -105
langfun/core/component.py +63 -2
langfun/core/component_test.py +53 -0
langfun/core/concurrent.py +414 -117
langfun/core/concurrent_test.py +111 -24
langfun/core/console.py +18 -5
langfun/core/console_test.py +17 -0
langfun/core/eval/__init__.py +16 -1
langfun/core/eval/base.py +622 -174
langfun/core/eval/base_test.py +200 -54
langfun/core/eval/matching.py +63 -76
langfun/core/eval/matching_test.py +17 -8
langfun/core/eval/patching.py +130 -0
langfun/core/eval/patching_test.py +170 -0
langfun/core/eval/scoring.py +26 -26
langfun/core/eval/scoring_test.py +19 -2
langfun/core/eval/v2/__init__.py +42 -0
langfun/core/eval/v2/checkpointing.py +380 -0
langfun/core/eval/v2/checkpointing_test.py +228 -0
langfun/core/eval/v2/eval_test_helper.py +136 -0
langfun/core/eval/v2/evaluation.py +725 -0
langfun/core/eval/v2/evaluation_test.py +180 -0
langfun/core/eval/v2/example.py +305 -0
langfun/core/eval/v2/example_test.py +128 -0
langfun/core/eval/v2/experiment.py +1048 -0
langfun/core/eval/v2/experiment_test.py +433 -0
langfun/core/eval/v2/metric_values.py +156 -0
langfun/core/eval/v2/metric_values_test.py +80 -0
langfun/core/eval/v2/metrics.py +357 -0
langfun/core/eval/v2/metrics_test.py +203 -0
langfun/core/eval/v2/progress.py +348 -0
langfun/core/eval/v2/progress_test.py +82 -0
langfun/core/eval/v2/progress_tracking.py +210 -0
langfun/core/eval/v2/progress_tracking_test.py +66 -0
langfun/core/eval/v2/reporting.py +270 -0
langfun/core/eval/v2/reporting_test.py +158 -0
langfun/core/eval/v2/runners.py +488 -0
langfun/core/eval/v2/runners_test.py +334 -0
langfun/core/langfunc.py +4 -17
langfun/core/langfunc_test.py +22 -6
langfun/core/language_model.py +577 -39
langfun/core/language_model_test.py +470 -56
langfun/core/llms/__init__.py +87 -16
langfun/core/llms/anthropic.py +312 -87
langfun/core/llms/anthropic_test.py +71 -3
langfun/core/llms/cache/base.py +21 -2
langfun/core/llms/cache/in_memory.py +13 -0
langfun/core/llms/cache/in_memory_test.py +53 -2
langfun/core/llms/compositional.py +101 -0
langfun/core/llms/compositional_test.py +73 -0
langfun/core/llms/deepseek.py +117 -0
langfun/core/llms/deepseek_test.py +61 -0
langfun/core/llms/fake.py +11 -7
langfun/core/llms/fake_test.py +14 -0
langfun/core/llms/gemini.py +507 -0
langfun/core/llms/gemini_test.py +195 -0
langfun/core/llms/google_genai.py +62 -218
langfun/core/llms/google_genai_test.py +9 -202
langfun/core/llms/groq.py +160 -144
langfun/core/llms/groq_test.py +31 -137
langfun/core/llms/llama_cpp.py +15 -42
langfun/core/llms/llama_cpp_test.py +4 -30
langfun/core/llms/openai.py +395 -203
langfun/core/llms/openai_compatible.py +179 -0
langfun/core/llms/openai_compatible_test.py +495 -0
langfun/core/llms/openai_test.py +30 -395
langfun/core/llms/rest.py +113 -0
langfun/core/llms/rest_test.py +111 -0
langfun/core/llms/vertexai.py +192 -0
langfun/core/llms/vertexai_test.py +52 -0
langfun/core/logging.py +284 -0
langfun/core/logging_test.py +125 -0
langfun/core/message.py +319 -9
langfun/core/message_test.py +190 -13
langfun/core/modalities/__init__.py +6 -2
langfun/core/modalities/audio.py +30 -0
langfun/core/modalities/audio_test.py +63 -0
langfun/core/modalities/image.py +39 -20
langfun/core/modalities/image_test.py +52 -9
langfun/core/modalities/mime.py +206 -29
langfun/core/modalities/mime_test.py +90 -9
langfun/core/modalities/ms_office.py +117 -0
langfun/core/modalities/ms_office_test.py +389 -0
langfun/core/modalities/pdf.py +22 -0
langfun/core/modalities/pdf_test.py +57 -0
langfun/core/modalities/video.py +9 -26
langfun/core/modalities/video_test.py +3 -3
langfun/core/modality.py +26 -3
langfun/core/modality_test.py +2 -2
langfun/core/sampling.py +11 -11
langfun/core/structured/__init__.py +12 -16
langfun/core/structured/completion.py +32 -5
langfun/core/structured/completion_test.py +7 -6
langfun/core/structured/description.py +2 -2
langfun/core/structured/description_test.py +3 -3
langfun/core/structured/function_generation.py +60 -27
langfun/core/structured/function_generation_test.py +72 -2
langfun/core/structured/mapping.py +97 -47
langfun/core/structured/mapping_test.py +90 -2
langfun/core/structured/parsing.py +33 -21
langfun/core/structured/parsing_test.py +53 -9
langfun/core/structured/querying.py +746 -0
langfun/core/structured/{prompting_test.py → querying_test.py} +469 -51
langfun/core/structured/schema.py +204 -97
langfun/core/structured/schema_generation.py +1 -1
langfun/core/structured/schema_test.py +130 -29
langfun/core/structured/scoring.py +125 -19
langfun/core/structured/scoring_test.py +30 -0
langfun/core/structured/tokenization.py +64 -0
langfun/core/structured/tokenization_test.py +48 -0
langfun/core/template.py +115 -1
langfun/core/template_test.py +71 -1
langfun/core/templates/conversation.py +9 -0
langfun/core/templates/conversation_test.py +4 -3
langfun/core/templates/selfplay_test.py +10 -2
langfun-0.1.2.dev202501140804.dist-info/METADATA +225 -0
langfun-0.1.2.dev202501140804.dist-info/RECORD +153 -0
{langfun-0.0.2.dev20240429.dist-info → langfun-0.1.2.dev202501140804.dist-info}/WHEEL +1 -1
langfun/core/coding/python/errors.py +0 -108
langfun/core/coding/python/errors_test.py +0 -99
langfun/core/coding/python/permissions.py +0 -90
langfun/core/coding/python/permissions_test.py +0 -86
langfun/core/structured/prompting.py +0 -238
langfun/core/text_formatting.py +0 -162
langfun/core/text_formatting_test.py +0 -47
langfun-0.0.2.dev20240429.dist-info/METADATA +0 -100
langfun-0.0.2.dev20240429.dist-info/RECORD +0 -108
{langfun-0.0.2.dev20240429.dist-info → langfun-0.1.2.dev202501140804.dist-info}/LICENSE +0 -0
{langfun-0.0.2.dev20240429.dist-info → langfun-0.1.2.dev202501140804.dist-info}/top_level.txt +0 -0

langfun/core/eval/v2/reporting.py ADDED Viewed

@@ -0,0 +1,270 @@
+# Copyright 2024 The Langfun Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Reporting evaluation results."""
+import threading
+import time
+import traceback
+from typing import Annotated
+from langfun.core.eval.v2 import example as example_lib
+from langfun.core.eval.v2 import experiment as experiment_lib
+import pyglove as pg
+Runner = experiment_lib.Runner
+Experiment = experiment_lib.Experiment
+Example = example_lib.Example
+_SUMMARY_FILE = 'summary.html'
+_EVALULATION_DETAIL_FILE = 'index.html'
+class HtmlReporter(experiment_lib.Plugin):
+  """Plugin for periodically generating HTML reports for the experiment."""
+  summary_interval: Annotated[
+      int,
+      'The interval of writing summary in seconds.'
+  ] = 60
+  experiment_report_interval: Annotated[
+      int,
+      'The interval of writing report for inidividual experiments in seconds.'
+  ] = 120
+  def _on_bound(self):
+    super()._on_bound()
+    self._last_summary_time = 0
+    self._last_experiment_report_time = {}
+    self._update_thread = None
+    self._stop_update = False
+    self._stop_update_experiment_ids = set()
+    self._summary_lock = None
+    self._experiment_index_lock = None
+  def on_run_start(
+      self,
+      runner: Runner,
+      root: Experiment
+  ) -> None:
+    self._maybe_update_summary(runner)
+    self._last_experiment_report_time = {leaf.id: 0 for leaf in root.leaf_nodes}
+    self._stop_update = False
+    self._stop_update_experiment_ids = set()
+    self._summary_lock = threading.Lock()
+    self._experiment_index_lock = {
+        leaf.id: threading.Lock() for leaf in root.leaf_nodes
+    }
+    self._update_thread = threading.Thread(
+        target=self._update_thread_func, args=(runner,)
+    )
+    self._update_thread.start()
+  def on_run_complete(
+      self,
+      runner: Runner,
+      root: Experiment
+  ) -> None:
+    self._stop_update = True
+    self._maybe_update_summary(runner, force=True)
+  def on_run_abort(
+      self,
+      runner: Runner,
+      root: Experiment,
+      error: BaseException
+  ) -> None:
+    self._stop_update = True
+    self._maybe_update_summary(runner, force=True)
+  def _update_thread_func(self, runner: Runner):
+    while not self._stop_update:
+      self._maybe_update_summary(runner, background=False)
+      for leaf in runner.current_run.experiment.leaf_nodes:
+        if leaf.id in self._stop_update_experiment_ids:
+          continue
+        self._maybe_update_experiment_html(runner, leaf, background=False)
+        if leaf.progress.is_stopped:
+          self._stop_update_experiment_ids.add(leaf.id)
+      time.sleep(5)
+  def on_experiment_start(
+      self,
+      runner: Runner,
+      experiment: Experiment
+  ) -> None:
+    if experiment.is_leaf:
+      self._maybe_update_experiment_html(runner, experiment)
+  def on_experiment_complete(
+      self, runner: Runner, experiment: Experiment
+  ):
+    if experiment.is_leaf:
+      self._maybe_update_experiment_html(runner, experiment, force=True)
+  def on_experiment_abort(
+      self,
+      runner: Runner,
+      experiment: Experiment,
+      error: BaseException
+  ) -> None:
+    del error
+    assert experiment.is_leaf
+    self._maybe_update_experiment_html(runner, experiment, force=True)
+  def on_example_complete(
+      self, runner: Runner, experiment: Experiment, example: Example
+  ):
+    self._save_example_html(runner, experiment, example)
+    self._maybe_update_experiment_html(runner, experiment)
+    self._maybe_update_summary(runner)
+  def _maybe_update_summary(
+      self,
+      runner: Runner,
+      background: bool = True,
+      force: bool = False) -> None:
+    """Maybe update the summary of current run."""
+    run = runner.current_run
+    def _summary():
+      html = run.experiment.to_html(
+          collapse_level=None,
+          extra_flags=dict(
+              current_run=run, interactive=False, card_view=True,
+          )
+      )
+      with self._summary_lock:
+        html.save(
+            run.output_path_for(run.experiment, _SUMMARY_FILE)
+        )
+    if force or (time.time() - self._last_summary_time > self.summary_interval):
+      self._last_summary_time = time.time()
+      if background:
+        runner.background_run(_summary)
+      else:
+        _summary()
+  def _maybe_update_experiment_html(
+      self,
+      runner: Runner,
+      experiment: Experiment,
+      force: bool = False,
+      background: bool = True,
+  ) -> None:
+    def _save():
+      index_html_path = runner.current_run.output_path_for(
+          experiment, _EVALULATION_DETAIL_FILE
+      )
+      try:
+        with pg.timeit() as t:
+          html = experiment.to_html(
+              collapse_level=None,
+              extra_flags=dict(
+                  current_run=runner.current_run,
+                  interactive=False,
+                  card_view=False,
+              ),
+          )
+          with self._experiment_index_lock[experiment.id]:
+            html.save(index_html_path)
+          experiment.info(
+              f'Updated {index_html_path!r} in {t.elapse:.2f} seconds.',
+          )
+      except BaseException as e:  # pylint: disable=broad-except
+        experiment.error(
+            f'Failed to generate {index_html_path!r}. '
+            f'Error: {e}, Stacktrace: \n{traceback.format_exc()}.',
+        )
+        raise e
+    if force or (
+        time.time() - self._last_experiment_report_time[experiment.id]
+        > self.experiment_report_interval
+    ):
+      self._last_experiment_report_time[experiment.id] = time.time()
+      if background:
+        runner.background_run(_save)
+      else:
+        _save()
+  def _save_example_html(
+      self, runner: Runner, experiment: Experiment, example: Example
+  ) -> None:
+    """Saves the example in HTML format."""
+    current_run = runner.current_run
+    def _generate():
+      try:
+        with pg.timeit() as t:
+          html = example.to_html(
+              collapse_level=None,
+              enable_summary_tooltip=False,
+              extra_flags=dict(
+                  # For properly rendering the next link.
+                  num_examples=getattr(experiment, 'num_examples', None)
+              ),
+          )
+          html.save(
+              runner.current_run.output_path_for(
+                  experiment, f'{example.id}.html'
+              )
+          )
+        experiment.info(
+            f'\'{example.id}.html\' generated in {t.elapse:.2f} seconds. '
+        )
+      except BaseException as e:  # pylint: disable=broad-except
+        experiment.error(
+            f'Failed to generate \'{example.id}.html\'. '
+            f'Error: {e}, Stacktrace: \n{traceback.format_exc()}.',
+        )
+        raise e
+    def _copy():
+      src_file = current_run.input_path_for(experiment, f'{example.id}.html')
+      dest_file = current_run.output_path_for(experiment, f'{example.id}.html')
+      if src_file == dest_file:
+        return
+      if not pg.io.path_exists(src_file):
+        experiment.warning(
+            f'Skip copying \'{example.id}.html\' as '
+            f'{src_file!r} does not exist.'
+        )
+        return
+      try:
+        with pg.timeit() as t, pg.io.open(src_file, 'r') as src:
+          content = src.read()
+          with pg.io.open(dest_file, 'w') as dest:
+            dest.write(content)
+        experiment.info(
+            f'\'{example.id}.html\' copied in {t.elapse:.2f} seconds.'
+        )
+      except BaseException as e:  # pylint: disable=broad-except
+        experiment.error(
+            f'Failed to copy {src_file!r} to {dest_file!r}. Error: {e}.'
+        )
+        raise e
+    generate_example_html = current_run.generate_example_html
+    if (generate_example_html == 'all'
+        or (generate_example_html == 'new' and example.newly_processed)
+        or (isinstance(generate_example_html, list)
+            and example.id in generate_example_html)):
+      op = _generate
+    else:
+      op = _copy
+    runner.background_run(op)

langfun/core/eval/v2/reporting_test.py ADDED Viewed

@@ -0,0 +1,158 @@
+# Copyright 2024 The Langfun Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import tempfile
+import unittest
+from langfun.core.eval.v2 import checkpointing
+from langfun.core.eval.v2 import eval_test_helper
+from langfun.core.eval.v2 import reporting
+from langfun.core.eval.v2 import runners as runners_lib  # pylint: disable=unused-import
+import pyglove as pg
+class ReportingTest(unittest.TestCase):
+  def test_reporting(self):
+    root_dir = os.path.join(tempfile.gettempdir(), 'test_reporting')
+    experiment = eval_test_helper.test_experiment()
+    checkpointer = checkpointing.BulkCheckpointer('checkpoint.jsonl')
+    reporter = reporting.HtmlReporter()
+    run = experiment.run(root_dir, 'new', plugins=[checkpointer, reporter])
+    self.assertTrue(
+        pg.io.path_exists(run.output_path_for(experiment, 'summary.html'))
+    )
+    for leaf in experiment.leaf_nodes:
+      self.assertTrue(
+          pg.io.path_exists(run.output_path_for(leaf, 'index.html'))
+      )
+      for i in range(leaf.num_examples):
+        self.assertTrue(
+            pg.io.path_exists(run.output_path_for(leaf, f'{i + 1}.html'))
+        )
+      found_generation_log = False
+      for log_entry in leaf._log_entries:
+        if 'generated in' in log_entry.message:
+          found_generation_log = True
+          break
+      self.assertTrue(found_generation_log)
+    # Test warm start.
+    root_dir = os.path.join(tempfile.gettempdir(), 'test_reporting2')
+    experiment = eval_test_helper.test_experiment()
+    run = experiment.run(
+        root_dir, 'new', plugins=[checkpointer, reporter],
+        warm_start_from=run.output_root
+    )
+    self.assertTrue(
+        pg.io.path_exists(run.output_path_for(experiment, 'summary.html'))
+    )
+    for leaf in experiment.leaf_nodes:
+      self.assertTrue(
+          pg.io.path_exists(run.output_path_for(leaf, 'index.html'))
+      )
+      for i in range(leaf.num_examples):
+        self.assertTrue(
+            pg.io.path_exists(run.output_path_for(leaf, f'{i + 1}.html'))
+        )
+      found_copy_log = False
+      for log_entry in leaf._log_entries:
+        if 'copied in' in log_entry.message:
+          found_copy_log = True
+          break
+      self.assertTrue(found_copy_log)
+  def test_index_html_generation_error(self):
+    root_dir = os.path.join(
+        tempfile.gettempdir(),
+        'test_reporting_with_index_html_generation_error'
+    )
+    experiment = (eval_test_helper
+                  .test_experiment_with_index_html_generation_error())
+    reporter = reporting.HtmlReporter()
+    run = experiment.run(root_dir, 'new', plugins=[reporter])
+    self.assertFalse(
+        pg.io.path_exists(run.output_path_for(experiment, 'summary.html'))
+    )
+    for leaf in experiment.leaf_nodes:
+      self.assertFalse(
+          pg.io.path_exists(run.output_path_for(leaf, 'index.html'))
+      )
+    found_error_log = False
+    for log_entry in experiment._log_entries:
+      if log_entry.message.startswith('Failed to generate'):
+        found_error_log = True
+        break
+    self.assertTrue(found_error_log)
+  def test_example_html_generation_error(self):
+    root_dir = os.path.join(
+        tempfile.gettempdir(),
+        'test_reporting_with_example_html_generation_error'
+    )
+    experiment = (eval_test_helper
+                  .test_experiment_with_example_html_generation_error())
+    checkpointer = checkpointing.BulkCheckpointer('checkpoint.jsonl')
+    reporter = reporting.HtmlReporter()
+    run = experiment.run(root_dir, 'new', plugins=[checkpointer, reporter])
+    self.assertTrue(
+        pg.io.path_exists(run.output_path_for(experiment, 'summary.html'))
+    )
+    for leaf in experiment.leaf_nodes:
+      self.assertTrue(
+          pg.io.path_exists(run.output_path_for(leaf, 'index.html'))
+      )
+      for i in range(leaf.num_examples):
+        self.assertFalse(
+            pg.io.path_exists(run.output_path_for(leaf, f'{i + 1}.html'))
+        )
+    found_error_log = False
+    for log_entry in experiment._log_entries:
+      if log_entry.message.startswith('Failed to generate'):
+        found_error_log = True
+        break
+    self.assertTrue(found_error_log)
+    # Test warm start.
+    root_dir = os.path.join(
+        tempfile.gettempdir(),
+        'test_reporting_with_example_html_generation_error2'
+    )
+    experiment = (eval_test_helper
+                  .test_experiment_with_example_html_generation_error())
+    run = experiment.run(
+        root_dir, 'new', plugins=[checkpointer, reporter],
+        warm_start_from=run.output_root
+    )
+    self.assertTrue(
+        pg.io.path_exists(run.output_path_for(experiment, 'summary.html'))
+    )
+    for leaf in experiment.leaf_nodes:
+      self.assertTrue(
+          pg.io.path_exists(run.output_path_for(leaf, 'index.html'))
+      )
+      for i in range(leaf.num_examples):
+        self.assertFalse(
+            pg.io.path_exists(run.output_path_for(leaf, f'{i + 1}.html'))
+        )
+    found_error_log = False
+    for log_entry in experiment._log_entries:
+      if log_entry.message.startswith('Skip copying'):
+        found_error_log = True
+        break
+    self.assertTrue(found_error_log)
+if __name__ == '__main__':
+  unittest.main()

langfun 0.0.2.dev20240429__py3-none-any.whl → 0.1.2.dev202501140804__py3-none-any.whl

langfun 0.0.2.dev20240429py3-none-any.whl → 0.1.2.dev202501140804py3-none-any.whl