PyPI - langfun - Versions diffs - 0.1.2.dev202410110804__py3-none-any.whl → 0.1.2.dev202410130803__py3-none-any.whl - Mend

langfun 0.1.2.dev202410110804py3-none-any.whl → 0.1.2.dev202410130803py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

langfun/core/__init__.py +1 -0
langfun/core/eval/base_test.py +1 -0
langfun/core/langfunc_test.py +2 -2
langfun/core/language_model.py +140 -24
langfun/core/language_model_test.py +166 -36
langfun/core/llms/__init__.py +8 -1
langfun/core/llms/anthropic.py +72 -7
langfun/core/llms/cache/in_memory_test.py +3 -2
langfun/core/llms/fake_test.py +7 -0
langfun/core/llms/groq.py +154 -6
langfun/core/llms/openai.py +300 -42
langfun/core/llms/openai_test.py +35 -8
langfun/core/llms/vertexai.py +121 -16
langfun/core/logging.py +9 -3
langfun/core/message.py +23 -12
langfun/core/message_test.py +2 -2
langfun/core/structured/completion_test.py +1 -0
langfun/core/structured/mapping.py +1 -1
langfun/core/structured/parsing_test.py +2 -1
langfun/core/structured/prompting_test.py +1 -0
langfun/core/template.py +8 -5
langfun/core/templates/selfplay_test.py +4 -2
{langfun-0.1.2.dev202410110804.dist-info → langfun-0.1.2.dev202410130803.dist-info}/METADATA +1 -1
{langfun-0.1.2.dev202410110804.dist-info → langfun-0.1.2.dev202410130803.dist-info}/RECORD +27 -27
{langfun-0.1.2.dev202410110804.dist-info → langfun-0.1.2.dev202410130803.dist-info}/LICENSE +0 -0
{langfun-0.1.2.dev202410110804.dist-info → langfun-0.1.2.dev202410130803.dist-info}/WHEEL +0 -0
{langfun-0.1.2.dev202410110804.dist-info → langfun-0.1.2.dev202410130803.dist-info}/top_level.txt +0 -0

langfun/core/__init__.py CHANGED Viewed

@@ -103,6 +103,7 @@ from langfun.core.language_model import LMSample
 from langfun.core.language_model import LMSamplingOptions
 from langfun.core.language_model import LMSamplingUsage
 from langfun.core.language_model import UsageNotAvailable
+from langfun.core.language_model import UsageSummary
 from langfun.core.language_model import LMSamplingResult
 from langfun.core.language_model import LMScoringResult
 from langfun.core.language_model import LMCache

langfun/core/eval/base_test.py CHANGED Viewed

@@ -194,6 +194,7 @@ class EvaluationTest(unittest.TestCase):
             cache_seed=0,
             score=1.0,
             logprobs=None,
+            is_cached=False,
             usage=lf.LMSamplingUsage(387, 24, 411),
             tags=['lm-response', 'lm-output', 'transformed'],
         ),

langfun/core/langfunc_test.py CHANGED Viewed

@@ -89,7 +89,7 @@ class LangFuncCallTest(unittest.TestCase):
     self.assertEqual(
         r,
         message.AIMessage(
-            'Hello!!!', score=0.0, logprobs=None,
+            'Hello!!!', score=0.0, logprobs=None, is_cached=False,
             usage=language_model.UsageNotAvailable()
         )
     )
@@ -120,7 +120,7 @@ class LangFuncCallTest(unittest.TestCase):
       self.assertEqual(
           r,
           message.AIMessage(
-              'Hello!!!', score=0.0, logprobs=None,
+              'Hello!!!', score=0.0, logprobs=None, is_cached=False,
               usage=language_model.UsageNotAvailable()
           )
       )

langfun/core/language_model.py CHANGED Viewed

@@ -19,7 +19,7 @@ import dataclasses
 import enum
 import threading
 import time
-from typing import Annotated, Any, Callable, Iterator, Sequence, Tuple, Type, Union
+from typing import Annotated, Any, Callable, Iterator, Optional, Sequence, Tuple, Type, Union
 from langfun.core import component
 from langfun.core import concurrent
 from langfun.core import console
@@ -86,25 +86,75 @@ class LMSamplingUsage(pg.Object):
   completion_tokens: int
   total_tokens: int
   num_requests: int = 1
+  estimated_cost: Annotated[
+      float | None,
+      (
+          'Estimated cost in US dollars. If None, cost estimating is not '
+          'suppported on the model being queried.'
+      )
+  ] = None
+  def __bool__(self) -> bool:
+    return self.num_requests > 0
+  @property
+  def average_prompt_tokens(self) -> int:
+    """Returns the average prompt tokens per request."""
+    return self.prompt_tokens // self.num_requests
+  @property
+  def average_completion_tokens(self) -> int:
+    """Returns the average completion tokens per request."""
+    return self.completion_tokens // self.num_requests
+  @property
+  def average_total_tokens(self) -> int:
+    """Returns the average total tokens per request."""
+    return self.total_tokens // self.num_requests
-  def __add__(self, other: 'LMSamplingUsage') -> 'LMSamplingUsage':
+  @property
+  def average_estimated_cost(self) -> float | None:
+    """Returns the average estimated cost per request."""
+    if self.estimated_cost is None:
+      return None
+    return self.estimated_cost / self.num_requests
+  def __add__(self, other: Optional['LMSamplingUsage']) -> 'LMSamplingUsage':
+    if other is None:
+      return self
     return LMSamplingUsage(
         prompt_tokens=self.prompt_tokens + other.prompt_tokens,
         completion_tokens=self.completion_tokens + other.completion_tokens,
         total_tokens=self.total_tokens + other.total_tokens,
         num_requests=self.num_requests + other.num_requests,
+        estimated_cost=(
+            self.estimated_cost + other.estimated_cost    # pylint: disable=g-long-ternary
+            if (self.estimated_cost is not None
+                and other.estimated_cost is not None)
+            else None
+        )
     )
+  def __radd__(self, other: Optional['LMSamplingUsage']) -> 'LMSamplingUsage':
+    return self + other
 class UsageNotAvailable(LMSamplingUsage):
   """Usage information not available."""
   prompt_tokens: pg.typing.Int(0).freeze()       # pytype: disable=invalid-annotation
   completion_tokens: pg.typing.Int(0).freeze()   # pytype: disable=invalid-annotation
   total_tokens: pg.typing.Int(0).freeze()        # pytype: disable=invalid-annotation
-  num_requests: pg.typing.Int(1).freeze()        # pytype: disable=invalid-annotation
+  estimated_cost: pg.typing.Float(default=None, is_noneable=True).freeze()    # pytype: disable=invalid-annotation
-  def __bool__(self) -> bool:
-    return False
+  def __add__(self, other: Optional['LMSamplingUsage']) -> 'UsageNotAvailable':
+    if other is None:
+      return self
+    return UsageNotAvailable(
+        num_requests=self.num_requests + other.num_requests
+    )
+  def __radd__(self, other: Optional['LMSamplingUsage']) -> 'UsageNotAvailable':
+    return self + other
 class LMSamplingResult(pg.Object):
@@ -123,6 +173,11 @@ class LMSamplingResult(pg.Object):
       'Usage information. Currently only OpenAI models are supported.',
   ] = UsageNotAvailable()
+  is_cached: Annotated[
+      bool,
+      'Whether the result is from cache or not.'
+  ] = False
 class LMSamplingOptions(component.Component):
   """Language model sampling options."""
@@ -425,12 +480,13 @@ class LanguageModel(component.Component):
           response = sample.response
           response.metadata.score = sample.score
           response.metadata.logprobs = sample.logprobs
+          response.metadata.is_cached = result.is_cached
           # NOTE(daiyip): Current usage is computed at per-result level,
           # which is accurate when n=1. For n > 1, we average the usage across
           # multiple samples.
           usage = result.usage
-          if len(result.samples) == 1 or not usage:
+          if len(result.samples) == 1 or isinstance(usage, UsageNotAvailable):
             response.metadata.usage = usage
           else:
             n = len(result.samples)
@@ -438,6 +494,9 @@ class LanguageModel(component.Component):
                 prompt_tokens=usage.prompt_tokens // n,
                 completion_tokens=usage.completion_tokens // n,
                 total_tokens=usage.total_tokens // n,
+                estimated_cost=(
+                    usage.estimated_cost / n if usage.estimated_cost else None
+                )
             )
           # Track usage.
@@ -445,7 +504,7 @@ class LanguageModel(component.Component):
           if trackers:
             model_id = self.model_id
             for tracker in trackers:
-              tracker.track(model_id, usage)
+              tracker.track(model_id, usage, result.is_cached)
           # Track the prompt for corresponding response.
           response.source = prompt
@@ -474,7 +533,9 @@ class LanguageModel(component.Component):
         request_to_result_index[len(requests)] = i
         requests.append(prompt)
       else:
-        results[i] = r.clone()
+        result = r.clone()
+        assert result.is_cached, result
+        results[i] = result
     # Sample non-cache-hit prompts.
     if requests:
@@ -491,8 +552,12 @@ class LanguageModel(component.Component):
           sample.response.set('cache_seed', cache_seed)
         if cache_seed is not None:
-          self.cache.put(self, prompt, result.clone(), seed=cache_seed)
+          self.cache.put(
+              self,
+              prompt,
+              result.clone(override=dict(is_cached=True)),
+              seed=cache_seed
+          )
     return results  # pytype: disable=bad-return-type
   @abc.abstractmethod
@@ -800,30 +865,81 @@ class LanguageModel(component.Component):
       return DEFAULT_MAX_CONCURRENCY  # Default of 1
+class UsageSummary(pg.Object):
+  """Usage sumary."""
+  class AggregatedUsage(pg.Object):
+    """Aggregated usage."""
+    total: LMSamplingUsage = LMSamplingUsage(0, 0, 0, 0, 0.0)
+    breakdown: dict[str, LMSamplingUsage] = {}
+    def __bool__(self) -> bool:
+      """Returns True if the usage is non-empty."""
+      return bool(self.breakdown)
+    def add(
+        self,
+        model_id: str,
+        usage: LMSamplingUsage,
+    ) -> None:
+      """Adds an entry to the breakdown."""
+      aggregated = self.breakdown.get(model_id, None)
+      with pg.notify_on_change(False):
+        self.breakdown[model_id] = usage + aggregated
+        self.rebind(total=self.total + usage, skip_notification=True)
+  @property
+  def total(self) -> LMSamplingUsage:
+    return self.cached.total + self.uncached.total
+  def update(self, model_id: str, usage: LMSamplingUsage, is_cached: bool):
+    """Updates the usage summary."""
+    if is_cached:
+      usage.rebind(estimated_cost=0.0, skip_notification=True)
+      self.cached.add(model_id, usage)
+    else:
+      self.uncached.add(model_id, usage)
+pg.members(
+    dict(
+        cached=(
+            pg.typing.Object(
+                UsageSummary.AggregatedUsage,
+                default=UsageSummary.AggregatedUsage()
+            ),
+            'Aggregated usages for cached LLM calls.'
+        ),
+        uncached=(
+            pg.typing.Object(
+                UsageSummary.AggregatedUsage,
+                default=UsageSummary.AggregatedUsage()
+            ),
+            'Aggregated usages for uncached LLM calls.'
+        ),
+    )
+)(UsageSummary)
 class _UsageTracker:
   """Usage tracker."""
   def __init__(self, model_ids: set[str] | None):
     self.model_ids = model_ids
+    self.usage_summary = UsageSummary()
     self._lock = threading.Lock()
-    self.usages = {
-        m: LMSamplingUsage(0, 0, 0, 0) for m in model_ids
-    } if model_ids else {}
-  def track(self, model_id: str, usage: LMSamplingUsage):
-    if self.model_ids is not None and model_id not in self.model_ids:
-      return
-    with self._lock:
-      if not isinstance(usage, UsageNotAvailable) and model_id in self.usages:
-        self.usages[model_id] += usage
-      else:
-        self.usages[model_id] = usage
+  def track(self, model_id: str, usage: LMSamplingUsage, is_cached: bool):
+    if self.model_ids is None or model_id in self.model_ids:
+      with self._lock:
+        self.usage_summary.update(model_id, usage, is_cached)
 @contextlib.contextmanager
 def track_usages(
     *lm: Union[str, LanguageModel]
-) -> Iterator[dict[str, LMSamplingUsage]]:
+) -> Iterator[UsageSummary]:
   """Context manager to track the usages of all language models in scope.
   `lf.track_usages` works with threads spawned by `lf.concurrent_map` and
@@ -854,6 +970,6 @@ def track_usages(
   tracker = _UsageTracker(set(model_ids) if model_ids else None)
   with component.context(__usage_trackers__=trackers + [tracker]):
     try:
-      yield tracker.usages
+      yield tracker.usage_summary
     finally:
       pass

langfun/core/language_model_test.py CHANGED Viewed

@@ -49,6 +49,7 @@ class MockModel(lm_lib.LanguageModel):
                     prompt_tokens=100,
                     completion_tokens=100,
                     total_tokens=200,
+                    estimated_cost=1.0,
                 ),
             )
             for prompt in prompts
@@ -128,14 +129,15 @@ class LanguageModelTest(unittest.TestCase):
                             'foo',
                             score=-1.0,
                             logprobs=None,
-                            usage=lm_lib.LMSamplingUsage(100, 100, 200),
+                            is_cached=False,
+                            usage=lm_lib.LMSamplingUsage(100, 100, 200, 1, 1.0),
                             tags=[message_lib.Message.TAG_LM_RESPONSE],
                         ),
                         score=-1.0,
                         logprobs=None,
                     )
                 ],
-                usage=lm_lib.LMSamplingUsage(100, 100, 200),
+                usage=lm_lib.LMSamplingUsage(100, 100, 200, 1, 1.0),
             ),
             lm_lib.LMSamplingResult(
                 [
@@ -144,14 +146,15 @@ class LanguageModelTest(unittest.TestCase):
                             'bar',
                             score=-1.0,
                             logprobs=None,
-                            usage=lm_lib.LMSamplingUsage(100, 100, 200),
+                            is_cached=False,
+                            usage=lm_lib.LMSamplingUsage(100, 100, 200, 1, 1.0),
                             tags=[message_lib.Message.TAG_LM_RESPONSE],
                         ),
                         score=-1.0,
                         logprobs=None,
                     )
                 ],
-                usage=lm_lib.LMSamplingUsage(100, 100, 200),
+                usage=lm_lib.LMSamplingUsage(100, 100, 200, 1, 1.0),
             ),
         ],
     )
@@ -169,14 +172,15 @@ class LanguageModelTest(unittest.TestCase):
                             'foo' * 2,
                             score=0.5,
                             logprobs=None,
-                            usage=lm_lib.LMSamplingUsage(100, 100, 200),
+                            is_cached=False,
+                            usage=lm_lib.LMSamplingUsage(100, 100, 200, 1, 1.0),
                             tags=[message_lib.Message.TAG_LM_RESPONSE],
                         ),
                         score=0.5,
                         logprobs=None,
                     ),
                 ],
-                usage=lm_lib.LMSamplingUsage(100, 100, 200),
+                usage=lm_lib.LMSamplingUsage(100, 100, 200, 1, 1.0),
             ),
             lm_lib.LMSamplingResult(
                 [
@@ -185,7 +189,8 @@ class LanguageModelTest(unittest.TestCase):
                             'bar' * 2,
                             score=0.5,
                             logprobs=None,
-                            usage=lm_lib.LMSamplingUsage(100, 100, 200),
+                            is_cached=False,
+                            usage=lm_lib.LMSamplingUsage(100, 100, 200, 1, 1.0),
                             tags=[message_lib.Message.TAG_LM_RESPONSE],
                         ),
                         score=0.5,
@@ -193,7 +198,8 @@ class LanguageModelTest(unittest.TestCase):
                     ),
                 ],
                 usage=lm_lib.LMSamplingUsage(
-                    prompt_tokens=100, completion_tokens=100, total_tokens=200
+                    prompt_tokens=100, completion_tokens=100, total_tokens=200,
+                    num_requests=1, estimated_cost=1.0,
                 ),
             ),
         ]
@@ -209,14 +215,15 @@ class LanguageModelTest(unittest.TestCase):
                             'foo',
                             score=1.0,
                             logprobs=None,
-                            usage=lm_lib.LMSamplingUsage(100, 100, 200),
+                            is_cached=False,
+                            usage=lm_lib.LMSamplingUsage(100, 100, 200, 1, 1.0),
                             tags=[message_lib.Message.TAG_LM_RESPONSE],
                         ),
                         score=1.0,
                         logprobs=None,
                     ),
                 ],
-                usage=lm_lib.LMSamplingUsage(100, 100, 200),
+                usage=lm_lib.LMSamplingUsage(100, 100, 200, 1, 1.0),
             ),
             lm_lib.LMSamplingResult(
                 [
@@ -225,7 +232,8 @@ class LanguageModelTest(unittest.TestCase):
                             'bar',
                             score=1.0,
                             logprobs=None,
-                            usage=lm_lib.LMSamplingUsage(100, 100, 200),
+                            is_cached=False,
+                            usage=lm_lib.LMSamplingUsage(100, 100, 200, 1, 1.0),
                             tags=[message_lib.Message.TAG_LM_RESPONSE],
                         ),
                         score=1.0,
@@ -233,7 +241,8 @@ class LanguageModelTest(unittest.TestCase):
                     ),
                 ],
                 usage=lm_lib.LMSamplingUsage(
-                    prompt_tokens=100, completion_tokens=100, total_tokens=200
+                    prompt_tokens=100, completion_tokens=100, total_tokens=200,
+                    num_requests=1, estimated_cost=1.0,
                 ),
             ),
         ]
@@ -248,14 +257,15 @@ class LanguageModelTest(unittest.TestCase):
                             'foo' * 2,
                             score=0.7,
                             logprobs=None,
-                            usage=lm_lib.LMSamplingUsage(100, 100, 200),
+                            is_cached=False,
+                            usage=lm_lib.LMSamplingUsage(100, 100, 200, 1, 1.0),
                             tags=[message_lib.Message.TAG_LM_RESPONSE],
                         ),
                         score=0.7,
                         logprobs=None,
                     ),
                 ],
-                usage=lm_lib.LMSamplingUsage(100, 100, 200),
+                usage=lm_lib.LMSamplingUsage(100, 100, 200, 1, 1.0),
             ),
             lm_lib.LMSamplingResult(
                 [
@@ -264,7 +274,8 @@ class LanguageModelTest(unittest.TestCase):
                             'bar' * 2,
                             score=0.7,
                             logprobs=None,
-                            usage=lm_lib.LMSamplingUsage(100, 100, 200),
+                            is_cached=False,
+                            usage=lm_lib.LMSamplingUsage(100, 100, 200, 1, 1.0),
                             tags=[message_lib.Message.TAG_LM_RESPONSE],
                         ),
                         score=0.7,
@@ -272,7 +283,8 @@ class LanguageModelTest(unittest.TestCase):
                     ),
                 ],
                 usage=lm_lib.LMSamplingUsage(
-                    prompt_tokens=100, completion_tokens=100, total_tokens=200
+                    prompt_tokens=100, completion_tokens=100, total_tokens=200,
+                    num_requests=1, estimated_cost=1.0,
                 ),
             ),
         ]
@@ -284,7 +296,9 @@ class LanguageModelTest(unittest.TestCase):
     self.assertEqual(response.text, 'foo')
     self.assertEqual(response.score, -1.0)
     self.assertIsNone(response.logprobs)
-    self.assertEqual(response.usage, lm_lib.LMSamplingUsage(100, 100, 200))
+    self.assertEqual(
+        response.usage, lm_lib.LMSamplingUsage(100, 100, 200, 1, 1.0)
+    )
     # Test override sampling_options.
     self.assertEqual(
@@ -307,14 +321,17 @@ class LanguageModelTest(unittest.TestCase):
                             cache_seed=0,
                             score=-1.0,
                             logprobs=None,
-                            usage=lm_lib.LMSamplingUsage(100, 100, 200),
+                            is_cached=False,
+                            usage=lm_lib.LMSamplingUsage(
+                                100, 100, 200, 1, 1.0
+                            ),
                             tags=[message_lib.Message.TAG_LM_RESPONSE],
                         ),
                         score=-1.0,
                         logprobs=None,
                     )
                 ],
-                usage=lm_lib.LMSamplingUsage(100, 100, 200),
+                usage=lm_lib.LMSamplingUsage(100, 100, 200, 1, 1.0),
             ),
             lm_lib.LMSamplingResult(
                 [
@@ -324,14 +341,15 @@ class LanguageModelTest(unittest.TestCase):
                             cache_seed=0,
                             score=-1.0,
                             logprobs=None,
-                            usage=lm_lib.LMSamplingUsage(100, 100, 200),
+                            is_cached=False,
+                            usage=lm_lib.LMSamplingUsage(100, 100, 200, 1, 1.0),
                             tags=[message_lib.Message.TAG_LM_RESPONSE],
                         ),
                         score=-1.0,
                         logprobs=None,
                     )
                 ],
-                usage=lm_lib.LMSamplingUsage(100, 100, 200),
+                usage=lm_lib.LMSamplingUsage(100, 100, 200, 1, 1.0),
             ),
         ],
     )
@@ -339,7 +357,9 @@ class LanguageModelTest(unittest.TestCase):
     self.assertEqual(cache.stats.num_hits, 0)
     self.assertEqual(cache.stats.num_updates, 2)
-    self.assertEqual(lm('foo'), 'foo')
+    result = lm('foo')
+    self.assertEqual(result, 'foo')
+    self.assertTrue(result.metadata.is_cached)
     self.assertEqual(lm('bar'), 'bar')
     self.assertEqual(cache.stats.num_queries, 4)
     self.assertEqual(cache.stats.num_hits, 2)
@@ -361,14 +381,15 @@ class LanguageModelTest(unittest.TestCase):
                             cache_seed=0,
                             score=1.0,
                             logprobs=None,
-                            usage=lm_lib.LMSamplingUsage(100, 100, 200),
+                            is_cached=False,
+                            usage=lm_lib.LMSamplingUsage(100, 100, 200, 1, 1.0),
                             tags=[message_lib.Message.TAG_LM_RESPONSE],
                         ),
                         score=1.0,
                         logprobs=None,
                     )
                 ],
-                usage=lm_lib.LMSamplingUsage(100, 100, 200),
+                usage=lm_lib.LMSamplingUsage(100, 100, 200, 1, 1.0),
             ),
             lm_lib.LMSamplingResult(
                 [
@@ -378,14 +399,15 @@ class LanguageModelTest(unittest.TestCase):
                             cache_seed=0,
                             score=1.0,
                             logprobs=None,
-                            usage=lm_lib.LMSamplingUsage(100, 100, 200),
+                            is_cached=False,
+                            usage=lm_lib.LMSamplingUsage(100, 100, 200, 1, 1.0),
                             tags=[message_lib.Message.TAG_LM_RESPONSE],
                         ),
                         score=1.0,
                         logprobs=None,
                     )
                 ],
-                usage=lm_lib.LMSamplingUsage(100, 100, 200),
+                usage=lm_lib.LMSamplingUsage(100, 100, 200, 1, 1.0),
             ),
         ],
     )
@@ -663,20 +685,128 @@ class LanguageModelTest(unittest.TestCase):
             lm2('hi')
             list(concurrent.concurrent_map(call_lm, ['hi', 'hello']))
-    self.assertEqual(usages2, {
-        'model2': lm_lib.LMSamplingUsage(100, 100, 200, 1),
+    print(usages2)
+    self.assertEqual(usages2.uncached.breakdown, {
+        'model2': lm_lib.LMSamplingUsage(100, 100, 200, 1, 1.0),
+    })
+    self.assertFalse(usages2.cached)
+    self.assertEqual(usages3.uncached.breakdown, {
+        'model1': lm_lib.LMSamplingUsage(100 * 4, 100 * 4, 200 * 4, 4, 4.0),
     })
-    self.assertEqual(usages3, {
-        'model1': lm_lib.LMSamplingUsage(100 * 4, 100 * 4, 200 * 4, 4),
+    self.assertFalse(usages3.cached)
+    self.assertEqual(usages4.uncached.breakdown, {
+        'model1': lm_lib.LMSamplingUsage(100 * 4, 100 * 4, 200 * 4, 4, 4.0),
+        'model2': lm_lib.LMSamplingUsage(100, 100, 200, 1, 1.0),
     })
-    self.assertEqual(usages4, {
-        'model1': lm_lib.LMSamplingUsage(100 * 4, 100 * 4, 200 * 4, 4),
-        'model2': lm_lib.LMSamplingUsage(100, 100, 200, 1),
+    self.assertFalse(usages4.cached)
+    self.assertEqual(usages1.uncached.breakdown, {
+        'model1': lm_lib.LMSamplingUsage(100 * 5, 100 * 5, 200 * 5, 5, 5.0),
+        'model2': lm_lib.LMSamplingUsage(100, 100, 200, 1, 1.0),
     })
-    self.assertEqual(usages1, {
-        'model1': lm_lib.LMSamplingUsage(100 * 5, 100 * 5, 200 * 5, 5),
-        'model2': lm_lib.LMSamplingUsage(100, 100, 200, 1),
+    self.assertFalse(usages1.cached)
+    self.assertEqual(
+        usages1.total,
+        lm_lib.LMSamplingUsage(100 * 6, 100 * 6, 200 * 6, 6, 6.0),
+    )
+    cache = in_memory.InMemory()
+    lm = MockModel(cache=cache, name='model1')
+    with lm_lib.track_usages() as usages1:
+      _ = lm('hi')
+    self.assertEqual(usages1.uncached.breakdown, {
+        'model1': lm_lib.LMSamplingUsage(100, 100, 200, 1, 1.0),
     })
+    self.assertFalse(usages1.cached)
+    with lm_lib.track_usages() as usages2:
+      _ = lm('hi')
+    self.assertEqual(usages2.cached.breakdown, {
+        'model1': lm_lib.LMSamplingUsage(100, 100, 200, 1, 0.0),
+    })
+    self.assertFalse(usages2.uncached)
+class LMSamplingUsageTest(unittest.TestCase):
+  def test_basics(self):
+    usage = lm_lib.LMSamplingUsage(100, 200, 300, 4, 5.0)
+    self.assertEqual(usage.num_requests, 4)
+    self.assertEqual(usage.prompt_tokens, 100)
+    self.assertEqual(usage.completion_tokens, 200)
+    self.assertEqual(usage.total_tokens, 300)
+    self.assertEqual(usage.estimated_cost, 5.0)
+    self.assertEqual(usage.average_prompt_tokens, 25)
+    self.assertEqual(usage.average_completion_tokens, 50)
+    self.assertEqual(usage.average_total_tokens, 75)
+    self.assertEqual(usage.average_estimated_cost, 1.25)
+  def test_add(self):
+    usage1 = lm_lib.LMSamplingUsage(100, 200, 300, 4, 5.0)
+    usage2 = lm_lib.LMSamplingUsage(100, 200, 300, 4, 5.0)
+    self.assertEqual(usage1 + usage2, usage1 + usage2)
+    self.assertIs(usage1 + None, usage1)
+    self.assertIs(None + usage1, usage1)
+  def test_usage_not_available(self):
+    usage_not_available = lm_lib.UsageNotAvailable()
+    self.assertEqual(usage_not_available.prompt_tokens, 0)
+    self.assertEqual(usage_not_available.completion_tokens, 0)
+    self.assertEqual(usage_not_available.total_tokens, 0)
+    self.assertEqual(usage_not_available.average_prompt_tokens, 0)
+    self.assertEqual(usage_not_available.average_completion_tokens, 0)
+    self.assertEqual(usage_not_available.average_total_tokens, 0)
+    self.assertIsNone(usage_not_available.average_estimated_cost)
+    self.assertTrue(usage_not_available)
+    self.assertEqual(
+        usage_not_available + lm_lib.LMSamplingUsage(1, 2, 3, 4, 5.0),
+        lm_lib.UsageNotAvailable(num_requests=5)
+    )
+    self.assertEqual(
+        lm_lib.LMSamplingUsage(1, 2, 3, 4, 5.0) + usage_not_available,
+        lm_lib.UsageNotAvailable(num_requests=5)
+    )
+    self.assertIs(None + usage_not_available, usage_not_available)
+    self.assertIs(usage_not_available + None, usage_not_available)
+class UsageSummaryTest(unittest.TestCase):
+  def test_basics(self):
+    usage_summary = lm_lib.UsageSummary()
+    self.assertFalse(usage_summary.total)
+    self.assertFalse(usage_summary.cached)
+    self.assertFalse(usage_summary.uncached)
+    # Add uncached.
+    usage_summary.update(
+        'model1', lm_lib.LMSamplingUsage(1, 2, 3, 1, 5.0), False
+    )
+    self.assertEqual(
+        usage_summary.total, lm_lib.LMSamplingUsage(1, 2, 3, 1, 5.0)
+    )
+    self.assertEqual(
+        usage_summary.uncached.total, lm_lib.LMSamplingUsage(1, 2, 3, 1, 5.0)
+    )
+    # Add cached.
+    self.assertFalse(usage_summary.cached)
+    usage_summary.update(
+        'model1', lm_lib.LMSamplingUsage(1, 2, 3, 1, 5.0), True
+    )
+    self.assertEqual(
+        usage_summary.total, lm_lib.LMSamplingUsage(2, 4, 6, 2, 5.0)
+    )
+    self.assertEqual(
+        usage_summary.cached.total, lm_lib.LMSamplingUsage(1, 2, 3, 1, 0.0)
+    )
+    # Add UsageNotAvailable.
+    usage_summary.update(
+        'model1', lm_lib.UsageNotAvailable(num_requests=1), False
+    )
+    self.assertEqual(
+        usage_summary.total, lm_lib.UsageNotAvailable(num_requests=3)
+    )
+    self.assertEqual(
+        usage_summary.uncached.total, lm_lib.UsageNotAvailable(num_requests=2)
+    )
 if __name__ == '__main__':

langfun 0.1.2.dev202410110804__py3-none-any.whl → 0.1.2.dev202410130803__py3-none-any.whl

langfun 0.1.2.dev202410110804py3-none-any.whl → 0.1.2.dev202410130803py3-none-any.whl