PyPI - langfun - Versions diffs - 0.1.2.dev202411090804__py3-none-any.whl → 0.1.2.dev202411140804__py3-none-any.whl - Mend

langfun 0.1.2.dev202411090804py3-none-any.whl → 0.1.2.dev202411140804py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of langfun might be problematic. Click here for more details.

Files changed (36) hide show

langfun/core/console.py +10 -2
langfun/core/console_test.py +17 -0
langfun/core/eval/__init__.py +2 -0
langfun/core/eval/v2/__init__.py +38 -0
langfun/core/eval/v2/checkpointing.py +135 -0
langfun/core/eval/v2/checkpointing_test.py +89 -0
langfun/core/eval/v2/evaluation.py +627 -0
langfun/core/eval/v2/evaluation_test.py +156 -0
langfun/core/eval/v2/example.py +295 -0
langfun/core/eval/v2/example_test.py +114 -0
langfun/core/eval/v2/experiment.py +949 -0
langfun/core/eval/v2/experiment_test.py +304 -0
langfun/core/eval/v2/metric_values.py +156 -0
langfun/core/eval/v2/metric_values_test.py +80 -0
langfun/core/eval/v2/metrics.py +357 -0
langfun/core/eval/v2/metrics_test.py +203 -0
langfun/core/eval/v2/progress.py +348 -0
langfun/core/eval/v2/progress_test.py +82 -0
langfun/core/eval/v2/progress_tracking.py +209 -0
langfun/core/eval/v2/progress_tracking_test.py +56 -0
langfun/core/eval/v2/reporting.py +144 -0
langfun/core/eval/v2/reporting_test.py +41 -0
langfun/core/eval/v2/runners.py +417 -0
langfun/core/eval/v2/runners_test.py +311 -0
langfun/core/eval/v2/test_helper.py +80 -0
langfun/core/language_model.py +122 -11
langfun/core/language_model_test.py +97 -4
langfun/core/llms/__init__.py +3 -0
langfun/core/llms/compositional.py +101 -0
langfun/core/llms/compositional_test.py +73 -0
langfun/core/llms/vertexai.py +4 -4
{langfun-0.1.2.dev202411090804.dist-info → langfun-0.1.2.dev202411140804.dist-info}/METADATA +1 -1
{langfun-0.1.2.dev202411090804.dist-info → langfun-0.1.2.dev202411140804.dist-info}/RECORD +36 -12
{langfun-0.1.2.dev202411090804.dist-info → langfun-0.1.2.dev202411140804.dist-info}/WHEEL +1 -1
{langfun-0.1.2.dev202411090804.dist-info → langfun-0.1.2.dev202411140804.dist-info}/LICENSE +0 -0
{langfun-0.1.2.dev202411090804.dist-info → langfun-0.1.2.dev202411140804.dist-info}/top_level.txt +0 -0

langfun/core/eval/v2/runners_test.py ADDED Viewed

@@ -0,0 +1,311 @@
+# Copyright 2024 The Langfun Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import tempfile
+import threading
+import time
+from typing import Any
+import unittest
+from langfun.core.eval.v2 import example as example_lib
+from langfun.core.eval.v2 import experiment as experiment_lib
+from langfun.core.eval.v2 import runners as runners_lib  # pylint: disable=unused-import
+from langfun.core.eval.v2 import test_helper
+import pyglove as pg
+Runner = experiment_lib.Runner
+Example = example_lib.Example
+Experiment = experiment_lib.Experiment
+Suite = experiment_lib.Suite
+Plugin = experiment_lib.Plugin
+class TestPlugin(Plugin):
+  started_experiments: list[Experiment] = []
+  completed_experiments: list[Experiment] = []
+  skipped_experiments: list[Experiment] = []
+  started_example_ids: list[int] = []
+  completed_example_ids: list[int] = []
+  skipped_example_ids: list[int] = []
+  start_time: float | None = None
+  complete_time: float | None = None
+  def _on_bound(self):
+    super()._on_bound()
+    self._lock = threading.Lock()
+  def on_run_start(self, runner: Runner, root: Experiment):
+    del root
+    with pg.notify_on_change(False), pg.allow_writable_accessors(True):
+      self.start_time = time.time()
+  def on_run_complete(self, runner: Runner, root: Experiment):
+    del root
+    with pg.notify_on_change(False), pg.allow_writable_accessors(True):
+      self.complete_time = time.time()
+  def on_experiment_start(self, runner: Runner, experiment: Experiment):
+    del runner
+    with pg.notify_on_change(False), self._lock:
+      self.started_experiments.append(pg.Ref(experiment))
+  def on_experiment_skipped(self, runner: Runner, experiment: Experiment):
+    del runner
+    with pg.notify_on_change(False), self._lock:
+      self.skipped_experiments.append(pg.Ref(experiment))
+  def on_experiment_complete(self, runner: Runner, experiment: Experiment):
+    del runner
+    with pg.notify_on_change(False), self._lock:
+      self.completed_experiments.append(pg.Ref(experiment))
+  def on_example_start(
+      self, runner: Runner, experiment: Experiment, example: Example):
+    del runner, experiment
+    with pg.notify_on_change(False), self._lock:
+      self.started_example_ids.append(example.id)
+  def on_example_skipped(
+      self, runner: Runner, experiment: Experiment, example: Example):
+    del runner, experiment
+    with pg.notify_on_change(False), self._lock:
+      self.skipped_example_ids.append(example.id)
+  def on_example_complete(
+      self, runner: Runner, experiment: Experiment, example: Example):
+    del runner, experiment
+    with pg.notify_on_change(False), self._lock:
+      self.completed_example_ids.append(example.id)
+class RunnerTest(unittest.TestCase):
+  def assert_same_list(self, actual: list[Any], expected: list[Any]):
+    self.assertEqual(len(actual), len(expected))
+    for i, (x, y) in enumerate(zip(actual, expected)):
+      if x is not y:
+        print(i, pg.diff(x, y))
+      self.assertIs(x, y)
+  def test_basic(self):
+    plugin = TestPlugin()
+    exp = test_helper.test_experiment()
+    root_dir = os.path.join(tempfile.gettempdir(), 'test_sequential_runner')
+    run = exp.run(root_dir, runner='sequential', plugins=[plugin])
+    self.assertIsNotNone(plugin.start_time)
+    self.assertIsNotNone(plugin.complete_time)
+    self.assertGreater(plugin.complete_time, plugin.start_time)
+    self.assert_same_list(
+        plugin.started_experiments,
+        exp.nonleaf_nodes + exp.leaf_nodes
+    )
+    self.assert_same_list(
+        plugin.completed_experiments,
+        exp.leaf_nodes + list(reversed(exp.nonleaf_nodes))
+    )
+    self.assert_same_list(
+        plugin.started_example_ids, list(range(1, 11)) * 6
+    )
+    self.assert_same_list(
+        plugin.completed_example_ids, list(range(1, 11)) * 6
+    )
+    self.assert_same_list(plugin.skipped_experiments, [])
+    self.assert_same_list(plugin.skipped_example_ids, [])
+    self.assertTrue(
+        pg.io.path_exists(os.path.join(run.output_root, 'run.json'))
+    )
+    for node in exp.nodes:
+      self.assertTrue(node.progress.is_started)
+      self.assertTrue(node.progress.is_completed)
+      if node.is_leaf:
+        self.assertEqual(node.progress.num_skipped, 0)
+        self.assertEqual(node.progress.num_completed, 10)
+        self.assertEqual(node.progress.num_failed, 1)
+      else:
+        self.assertEqual(node.progress.num_skipped, 0)
+        self.assertEqual(node.progress.num_failed, 0)
+        self.assertEqual(node.progress.num_processed, node.progress.num_total)
+  def test_raise_if_has_error(self):
+    root_dir = os.path.join(tempfile.gettempdir(), 'test_raise_if_has_error')
+    exp = test_helper.TestEvaluation()
+    with self.assertRaisesRegex(ValueError, 'x should not be 5'):
+      exp.run(
+          root_dir, runner='sequential', plugins=[], raise_if_has_error=True
+      )
+    with self.assertRaisesRegex(ValueError, 'x should not be 5'):
+      exp.run(root_dir, runner='parallel', plugins=[], raise_if_has_error=True)
+  def test_example_ids(self):
+    root_dir = os.path.join(tempfile.gettempdir(), 'test_example_ids')
+    exp = test_helper.test_experiment()
+    plugin = TestPlugin()
+    _ = exp.run(
+        root_dir, runner='sequential', plugins=[plugin], example_ids=[5, 7, 9]
+    )
+    self.assertEqual(plugin.started_example_ids, [5, 7, 9] * 6)
+    self.assertEqual(plugin.completed_example_ids, [5, 7, 9] * 6)
+  def test_filter(self):
+    plugin = TestPlugin()
+    exp = test_helper.test_experiment()
+    root_dir = os.path.join(tempfile.gettempdir(), 'test_filter')
+    _ = exp.run(
+        root_dir, runner='sequential', plugins=[plugin],
+        filter=lambda e: e.lm.offset != 0
+    )
+    self.assert_same_list(
+        plugin.started_experiments,
+        exp.nonleaf_nodes + exp.leaf_nodes[2:]
+    )
+    self.assert_same_list(
+        plugin.skipped_experiments, exp.leaf_nodes[:2]
+    )
+    self.assert_same_list(
+        plugin.completed_experiments,
+        exp.leaf_nodes[2:] + [exp.children[1], exp]
+    )
+  def test_use_cache(self):
+    @pg.functor()
+    def test_inputs(num_examples: int = 10):
+      return [
+          pg.Dict(
+              x=i // 2, y=(i // 2) ** 2,
+              groundtruth=(i // 2 + (i // 2) ** 2)
+          ) for i in range(num_examples)
+      ]
+    exp = test_helper.TestEvaluation(
+        inputs=test_inputs(num_examples=pg.oneof([2, 4]))
+    )
+    # Global cache.
+    root_dir = os.path.join(tempfile.gettempdir(), 'global_cache')
+    run = exp.run(root_dir, runner='sequential', use_cache='global', plugins=[])
+    self.assertTrue(pg.io.path_exists(run.output_path_for(exp, 'cache.json')))
+    self.assertEqual(exp.usage_summary.cached.total.num_requests, 4)
+    self.assertEqual(exp.usage_summary.uncached.total.num_requests, 2)
+    # Per-dataset cache.
+    root_dir = os.path.join(tempfile.gettempdir(), 'per_dataset')
+    run = exp.run(
+        root_dir, runner='sequential', use_cache='per_dataset', plugins=[]
+    )
+    for leaf in exp.leaf_nodes:
+      self.assertTrue(
+          pg.io.path_exists(run.output_path_for(leaf, 'cache.json'))
+      )
+    self.assertEqual(exp.usage_summary.cached.total.num_requests, 3)
+    self.assertEqual(exp.usage_summary.uncached.total.num_requests, 3)
+    # No cache.
+    root_dir = os.path.join(tempfile.gettempdir(), 'no')
+    run = exp.run(root_dir, runner='sequential', use_cache='no', plugins=[])
+    self.assertFalse(pg.io.path_exists(run.output_path_for(exp, 'cache.json')))
+    for leaf in exp.leaf_nodes:
+      self.assertFalse(
+          pg.io.path_exists(run.output_path_for(leaf, 'cache.json'))
+      )
+    self.assertEqual(exp.usage_summary.cached.total.num_requests, 0)
+    self.assertEqual(exp.usage_summary.uncached.total.num_requests, 6)
+  def test_parallel_runner(self):
+    plugin = TestPlugin()
+    exp = test_helper.test_experiment()
+    root_dir = os.path.join(tempfile.gettempdir(), 'test_parallel_runner')
+    run = exp.run(root_dir, runner='parallel', plugins=[plugin])
+    self.assertIsNotNone(plugin.start_time)
+    self.assertIsNotNone(plugin.complete_time)
+    self.assertGreater(plugin.complete_time, plugin.start_time)
+    self.assertEqual(
+        len(plugin.started_experiments), len(exp.nodes)
+    )
+    self.assertEqual(
+        len(plugin.completed_experiments), len(exp.nodes)
+    )
+    self.assertEqual(
+        len(plugin.started_example_ids), 6 * 10
+    )
+    self.assertEqual(
+        len(plugin.completed_example_ids), 6 * 10
+    )
+    self.assert_same_list(plugin.skipped_experiments, [])
+    self.assert_same_list(plugin.skipped_example_ids, [])
+    self.assertTrue(
+        pg.io.path_exists(os.path.join(run.output_root, 'run.json'))
+    )
+    for node in exp.nodes:
+      self.assertTrue(node.progress.is_started)
+      self.assertTrue(node.progress.is_completed)
+      if node.is_leaf:
+        self.assertEqual(node.progress.num_skipped, 0)
+        self.assertEqual(node.progress.num_completed, 10)
+        self.assertEqual(node.progress.num_failed, 1)
+      else:
+        self.assertEqual(node.progress.num_skipped, 0)
+        self.assertEqual(node.progress.num_failed, 0)
+        self.assertEqual(node.progress.num_processed, node.progress.num_total)
+  def test_debug_runner(self):
+    plugin = TestPlugin()
+    exp = test_helper.test_experiment()
+    root_dir = os.path.join(tempfile.gettempdir(), 'test_debug_runner')
+    run = exp.run(root_dir, runner='debug', plugins=[plugin])
+    self.assertIsNotNone(plugin.start_time)
+    self.assertIsNotNone(plugin.complete_time)
+    self.assertGreater(plugin.complete_time, plugin.start_time)
+    self.assertEqual(
+        len(plugin.started_experiments), len(exp.nodes)
+    )
+    self.assertEqual(
+        len(plugin.completed_experiments), len(exp.nodes)
+    )
+    self.assertEqual(
+        len(plugin.started_example_ids), 6 * 1
+    )
+    self.assertEqual(
+        len(plugin.completed_example_ids), 6 * 1
+    )
+    self.assert_same_list(plugin.skipped_experiments, [])
+    self.assert_same_list(plugin.skipped_example_ids, [])
+    self.assertFalse(
+        pg.io.path_exists(os.path.join(run.output_root, 'run.json'))
+    )
+    for node in exp.nodes:
+      self.assertTrue(node.progress.is_started)
+      self.assertTrue(node.progress.is_completed)
+      if node.is_leaf:
+        self.assertEqual(node.progress.num_skipped, 0)
+        self.assertEqual(node.progress.num_completed, 1)
+        self.assertEqual(node.progress.num_failed, 0)
+      else:
+        self.assertEqual(node.progress.num_skipped, 0)
+        self.assertEqual(node.progress.num_failed, 0)
+        self.assertEqual(node.progress.num_processed, node.progress.num_total)
+if __name__ == '__main__':
+  unittest.main()

langfun/core/eval/v2/test_helper.py ADDED Viewed

@@ -0,0 +1,80 @@
+# Copyright 2024 The Langfun Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Helper classes and functions for evaluation tests."""
+from langfun.core import language_model
+from langfun.core import llms
+from langfun.core import message as message_lib
+from langfun.core import structured
+from langfun.core.eval.v2 import evaluation as evaluation_lib
+from langfun.core.eval.v2 import example as example_lib
+from langfun.core.eval.v2 import experiment as experiment_lib
+from langfun.core.eval.v2 import metrics as metrics_lib
+import pyglove as pg
+Example = example_lib.Example
+Suite = experiment_lib.Suite
+Evaluation = evaluation_lib.Evaluation
+RunId = experiment_lib.RunId
+Run = experiment_lib.Run
+@pg.functor()
+def test_inputs(num_examples: int | None = 10):
+  if num_examples is None:
+    num_examples = 20
+  return [
+      pg.Dict(x=i, y=i ** 2, groundtruth=i + i ** 2)
+      for i in range(num_examples)
+  ]
+class TestLLM(llms.Fake):
+  """Test language model."""
+  offset: int = 0
+  def _response_from(self, prompt: message_lib.Message) -> message_lib.Message:
+    return message_lib.AIMessage(
+        str(prompt.metadata.x + prompt.metadata.y + self.offset)
+    )
+  @property
+  def resource_id(self) -> str:
+    return f'test_llm:{self.offset}'
+class TestEvaluation(Evaluation):
+  """Test evaluation class."""
+  inputs = test_inputs()
+  metrics = [metrics_lib.Match()]
+  lm: language_model.LanguageModel = TestLLM()
+  def process(self, v):
+    if v.x == 5:
+      raise ValueError('x should not be 5')
+    return structured.query(
+        '{{x}} + {{y}} = ?', int, lm=self.lm, x=v.x, y=v.y,
+        metadata_x=v.x, metadata_y=v.y
+    )
+def test_experiment():
+  """Returns a test experiment."""
+  return Suite([
+      TestEvaluation(lm=TestLLM(offset=0)),
+      TestEvaluation(lm=TestLLM(offset=pg.oneof(range(5)))),
+  ])

langfun/core/language_model.py CHANGED Viewed

@@ -17,6 +17,8 @@ import abc
 import contextlib
 import dataclasses
 import enum
+import functools
+import math
 import threading
 import time
 from typing import Annotated, Any, Callable, Iterator, Optional, Sequence, Tuple, Type, Union
@@ -875,7 +877,7 @@ class LanguageModel(component.Component):
       return DEFAULT_MAX_CONCURRENCY  # Default of 1
-class UsageSummary(pg.Object):
+class UsageSummary(pg.Object, pg.views.HtmlTreeView.Extension):
   """Usage sumary."""
   class AggregatedUsage(pg.Object):
@@ -897,20 +899,131 @@ class UsageSummary(pg.Object):
       aggregated = self.breakdown.get(model_id, None)
       with pg.notify_on_change(False):
         self.breakdown[model_id] = usage + aggregated
-        self.rebind(total=self.total + usage, skip_notification=True)
+        self.rebind(
+            total=self.total + usage,
+            raise_on_no_change=False
+        )
+    def merge(self, other: 'UsageSummary.AggregatedUsage') -> None:
+      """Merges the usage summary."""
+      with pg.notify_on_change(False):
+        for model_id, usage in other.breakdown.items():
+          self.add(model_id, usage)
+  def _on_bound(self):
+    super()._on_bound()
+    self._usage_badge = None
+    self._lock = threading.Lock()
   @property
   def total(self) -> LMSamplingUsage:
     return self.cached.total + self.uncached.total
-  def update(self, model_id: str, usage: LMSamplingUsage, is_cached: bool):
+  def add(self, model_id: str, usage: LMSamplingUsage, is_cached: bool):
     """Updates the usage summary."""
-    if is_cached:
-      usage.rebind(estimated_cost=0.0, skip_notification=True)
-      self.cached.add(model_id, usage)
-    else:
-      self.uncached.add(model_id, usage)
+    with self._lock:
+      if is_cached:
+        usage.rebind(estimated_cost=0.0, skip_notification=True)
+        self.cached.add(model_id, usage)
+      else:
+        self.uncached.add(model_id, usage)
+      self._update_view()
+  def merge(self, other: 'UsageSummary', as_cached: bool = False) -> None:
+    """Aggregates the usage summary.
+    Args:
+      other: The usage summary to merge.
+      as_cached: Whether to merge the usage summary as cached.
+    """
+    with self._lock:
+      self.cached.merge(other.cached)
+      if as_cached:
+        self.cached.merge(other.uncached)
+      else:
+        self.uncached.merge(other.uncached)
+      self._update_view()
+  def _sym_nondefault(self) -> dict[str, Any]:
+    """Overrides nondefault values so volatile values are not included."""
+    return dict()
+  #
+  # Html views for the usage summary.
+  #
+  def _update_view(self):
+    if self._usage_badge is not None:
+      self._usage_badge.update(
+          self._badge_text(),
+          tooltip=pg.format(self.total, verbose=False),
+          styles=dict(color=self._badge_color()),
+      )
+  def _badge_text(self) -> str:
+    if self.total.estimated_cost is not None:
+      return f'{self.total.estimated_cost:.3f}'
+    return '0.000'
+  def _badge_color(self) -> str | None:
+    if self.total.estimated_cost is None or self.total.estimated_cost < 1.0:
+      return None
+    # Step 1: The normal cost range is around 1e-3 to 1e5.
+    # Therefore we normalize the log10 value from [-3, 5] to [0, 1].
+    normalized_value = (math.log10(self.total.estimated_cost) + 3) / (5 + 3)
+    # Step 2: Interpolate between green and red
+    red = int(255 * normalized_value)
+    green = int(255 * (1 - normalized_value))
+    return f'rgb({red}, {green}, 0)'
+  def _html_tree_view(
+      self,
+      *,
+      view: pg.views.HtmlTreeView,
+      extra_flags: dict[str, Any] | None = None,
+      **kwargs
+  ) -> pg.Html:
+    extra_flags = extra_flags or {}
+    as_badge = extra_flags.pop('as_badge', False)
+    interactive = extra_flags.get('interactive', True)
+    if as_badge:
+      usage_badge = self._usage_badge
+      if usage_badge is None:
+        usage_badge = pg.views.html.controls.Badge(
+            self._badge_text(),
+            tooltip=pg.format(self.total, verbose=False),
+            css_classes=['usage-summary'],
+            styles=dict(color=self._badge_color()),
+            interactive=True,
+        )
+        if interactive:
+          self._usage_badge = usage_badge
+      return usage_badge.to_html()
+    return super()._html_tree_view(
+        view=view,
+        extra_flags=extra_flags,
+        **kwargs
+    )
+  @classmethod
+  @functools.cache
+  def _html_tree_view_css_styles(cls) -> list[str]:
+    return super()._html_tree_view_css_styles() + [
+        """
+        .usage-summary.label {
+            display: inline-flex;
+            border-radius: 5px;
+            padding: 5px;
+            background-color: #f1f1f1;
+            color: #CCC;
+        }
+        .usage-summary.label::before {
+            content: '$';
+        }
+        """
+    ]
 pg.members(
     dict(
@@ -938,12 +1051,10 @@ class _UsageTracker:
   def __init__(self, model_ids: set[str] | None):
     self.model_ids = model_ids
     self.usage_summary = UsageSummary()
-    self._lock = threading.Lock()
   def track(self, model_id: str, usage: LMSamplingUsage, is_cached: bool):
     if self.model_ids is None or model_id in self.model_ids:
-      with self._lock:
-        self.usage_summary.update(model_id, usage, is_cached)
+      self.usage_summary.add(model_id, usage, is_cached)
 @contextlib.contextmanager

langfun/core/language_model_test.py CHANGED Viewed

@@ -685,7 +685,6 @@ class LanguageModelTest(unittest.TestCase):
             lm2('hi')
             list(concurrent.concurrent_map(call_lm, ['hi', 'hello']))
-    print(usages2)
     self.assertEqual(usages2.uncached.breakdown, {
         'model2': lm_lib.LMSamplingUsage(100, 100, 200, 1, 1.0),
     })
@@ -777,7 +776,7 @@ class UsageSummaryTest(unittest.TestCase):
     self.assertFalse(usage_summary.uncached)
     # Add uncached.
-    usage_summary.update(
+    usage_summary.add(
         'model1', lm_lib.LMSamplingUsage(1, 2, 3, 1, 5.0), False
     )
     self.assertEqual(
@@ -788,7 +787,7 @@ class UsageSummaryTest(unittest.TestCase):
     )
     # Add cached.
     self.assertFalse(usage_summary.cached)
-    usage_summary.update(
+    usage_summary.add(
         'model1', lm_lib.LMSamplingUsage(1, 2, 3, 1, 5.0), True
     )
     self.assertEqual(
@@ -798,7 +797,7 @@ class UsageSummaryTest(unittest.TestCase):
         usage_summary.cached.total, lm_lib.LMSamplingUsage(1, 2, 3, 1, 0.0)
     )
     # Add UsageNotAvailable.
-    usage_summary.update(
+    usage_summary.add(
         'model1', lm_lib.UsageNotAvailable(num_requests=1), False
     )
     self.assertEqual(
@@ -808,6 +807,100 @@ class UsageSummaryTest(unittest.TestCase):
         usage_summary.uncached.total, lm_lib.UsageNotAvailable(num_requests=2)
     )
+  def test_merge(self):
+    usage_summary = lm_lib.UsageSummary()
+    usage_summary.add(
+        'model1', lm_lib.LMSamplingUsage(1, 2, 3, 1, 5.0), False
+    )
+    usage_summary.add(
+        'model2', lm_lib.LMSamplingUsage(1, 2, 3, 1, 5.0), False
+    )
+    usage_summary.add(
+        'model1', lm_lib.LMSamplingUsage(1, 2, 3, 1, 5.0), False
+    )
+    usage_summary2 = lm_lib.UsageSummary()
+    usage_summary2.add(
+        'model1', lm_lib.LMSamplingUsage(1, 2, 3, 1, 5.0), False
+    )
+    usage_summary2.add(
+        'model3', lm_lib.LMSamplingUsage(1, 2, 3, 1, 5.0), False
+    )
+    usage_summary2.merge(usage_summary)
+    self.assertEqual(
+        usage_summary2,
+        lm_lib.UsageSummary(
+            cached=lm_lib.UsageSummary.AggregatedUsage(
+                total=lm_lib.LMSamplingUsage(
+                    prompt_tokens=0,
+                    completion_tokens=0,
+                    total_tokens=0,
+                    num_requests=0,
+                    estimated_cost=0.0,
+                ),
+                breakdown={}
+            ),
+            uncached=lm_lib.UsageSummary.AggregatedUsage(
+                total=lm_lib.LMSamplingUsage(
+                    prompt_tokens=5,
+                    completion_tokens=10,
+                    total_tokens=15,
+                    num_requests=5,
+                    estimated_cost=25.0
+                ),
+                breakdown=dict(
+                    model1=lm_lib.LMSamplingUsage(
+                        prompt_tokens=3,
+                        completion_tokens=6,
+                        total_tokens=9,
+                        num_requests=3,
+                        estimated_cost=15.0
+                    ),
+                    model3=lm_lib.LMSamplingUsage(
+                        prompt_tokens=1,
+                        completion_tokens=2,
+                        total_tokens=3,
+                        num_requests=1,
+                        estimated_cost=5.0
+                    ),
+                    model2=lm_lib.LMSamplingUsage(
+                        prompt_tokens=1,
+                        completion_tokens=2,
+                        total_tokens=3,
+                        num_requests=1,
+                        estimated_cost=5.0
+                    )
+                )
+            )
+        )
+    )
+  def test_html_view(self):
+    usage_summary = lm_lib.UsageSummary()
+    usage_summary.add(
+        'model1', lm_lib.LMSamplingUsage(1, 2, 3, 1, 5.0), False
+    )
+    self.assertIn(
+        '5.000',
+        usage_summary.to_html(extra_flags=dict(as_badge=True)).content
+    )
+    usage_summary.add(
+        'model1', lm_lib.LMSamplingUsage(1, 2, 3, 1, 5.0), False
+    )
+    self.assertIn(
+        '10.000',
+        usage_summary.to_html(
+            extra_flags=dict(as_badge=True, interactive=True)
+        ).content
+    )
+    self.assertTrue(
+        usage_summary.to_html().content.startswith('<details open')
+    )
+    with pg.views.html.controls.HtmlControl.track_scripts() as scripts:
+      usage_summary.add(
+          'model2', lm_lib.LMSamplingUsage(1, 2, 3, 1, 5.0), False
+      )
+      self.assertEqual(len(scripts), 4)
 if __name__ == '__main__':
   unittest.main()

langfun/core/llms/__init__.py CHANGED Viewed

@@ -24,6 +24,9 @@ from langfun.core.llms.fake import StaticMapping
 from langfun.core.llms.fake import StaticResponse
 from langfun.core.llms.fake import StaticSequence
+# Compositional models.
+from langfun.core.llms.compositional import RandomChoice
 # REST-based models.
 from langfun.core.llms.rest import REST

langfun 0.1.2.dev202411090804__py3-none-any.whl → 0.1.2.dev202411140804__py3-none-any.whl

Potentially problematic release.

langfun 0.1.2.dev202411090804py3-none-any.whl → 0.1.2.dev202411140804py3-none-any.whl