PyPI - langfun - Versions diffs - 0.0.2.dev20240429__py3-none-any.whl → 0.1.2.dev202501150804__py3-none-any.whl - Mend

langfun 0.0.2.dev20240429py3-none-any.whl → 0.1.2.dev202501150804py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (144) hide show

langfun/__init__.py +20 -2
langfun/core/__init__.py +16 -5
langfun/core/agentic/__init__.py +30 -0
langfun/core/agentic/action.py +854 -0
langfun/core/agentic/action_eval.py +150 -0
langfun/core/agentic/action_eval_test.py +109 -0
langfun/core/agentic/action_test.py +136 -0
langfun/core/coding/python/__init__.py +5 -11
langfun/core/coding/python/correction.py +37 -21
langfun/core/coding/python/correction_test.py +29 -3
langfun/core/coding/python/execution.py +40 -216
langfun/core/coding/python/execution_test.py +29 -89
langfun/core/coding/python/generation.py +21 -11
langfun/core/coding/python/generation_test.py +2 -2
langfun/core/coding/python/parsing.py +108 -193
langfun/core/coding/python/parsing_test.py +2 -105
langfun/core/component.py +63 -2
langfun/core/component_test.py +53 -0
langfun/core/concurrent.py +414 -117
langfun/core/concurrent_test.py +111 -24
langfun/core/console.py +17 -5
langfun/core/console_test.py +17 -0
langfun/core/eval/__init__.py +16 -1
langfun/core/eval/base.py +622 -174
langfun/core/eval/base_test.py +200 -54
langfun/core/eval/matching.py +63 -76
langfun/core/eval/matching_test.py +17 -8
langfun/core/eval/patching.py +130 -0
langfun/core/eval/patching_test.py +170 -0
langfun/core/eval/scoring.py +26 -26
langfun/core/eval/scoring_test.py +19 -2
langfun/core/eval/v2/__init__.py +42 -0
langfun/core/eval/v2/checkpointing.py +380 -0
langfun/core/eval/v2/checkpointing_test.py +228 -0
langfun/core/eval/v2/eval_test_helper.py +136 -0
langfun/core/eval/v2/evaluation.py +725 -0
langfun/core/eval/v2/evaluation_test.py +180 -0
langfun/core/eval/v2/example.py +305 -0
langfun/core/eval/v2/example_test.py +128 -0
langfun/core/eval/v2/experiment.py +1048 -0
langfun/core/eval/v2/experiment_test.py +433 -0
langfun/core/eval/v2/metric_values.py +156 -0
langfun/core/eval/v2/metric_values_test.py +80 -0
langfun/core/eval/v2/metrics.py +357 -0
langfun/core/eval/v2/metrics_test.py +203 -0
langfun/core/eval/v2/progress.py +348 -0
langfun/core/eval/v2/progress_test.py +82 -0
langfun/core/eval/v2/progress_tracking.py +210 -0
langfun/core/eval/v2/progress_tracking_test.py +66 -0
langfun/core/eval/v2/reporting.py +270 -0
langfun/core/eval/v2/reporting_test.py +158 -0
langfun/core/eval/v2/runners.py +488 -0
langfun/core/eval/v2/runners_test.py +334 -0
langfun/core/langfunc.py +4 -17
langfun/core/langfunc_test.py +22 -6
langfun/core/language_model.py +577 -39
langfun/core/language_model_test.py +470 -56
langfun/core/llms/__init__.py +87 -16
langfun/core/llms/anthropic.py +312 -87
langfun/core/llms/anthropic_test.py +71 -3
langfun/core/llms/cache/base.py +21 -2
langfun/core/llms/cache/in_memory.py +13 -0
langfun/core/llms/cache/in_memory_test.py +53 -2
langfun/core/llms/compositional.py +101 -0
langfun/core/llms/compositional_test.py +73 -0
langfun/core/llms/deepseek.py +117 -0
langfun/core/llms/deepseek_test.py +61 -0
langfun/core/llms/fake.py +11 -7
langfun/core/llms/fake_test.py +14 -0
langfun/core/llms/gemini.py +507 -0
langfun/core/llms/gemini_test.py +195 -0
langfun/core/llms/google_genai.py +62 -218
langfun/core/llms/google_genai_test.py +9 -202
langfun/core/llms/groq.py +160 -144
langfun/core/llms/groq_test.py +31 -137
langfun/core/llms/llama_cpp.py +15 -42
langfun/core/llms/llama_cpp_test.py +4 -30
langfun/core/llms/openai.py +395 -203
langfun/core/llms/openai_compatible.py +179 -0
langfun/core/llms/openai_compatible_test.py +495 -0
langfun/core/llms/openai_test.py +30 -395
langfun/core/llms/rest.py +113 -0
langfun/core/llms/rest_test.py +111 -0
langfun/core/llms/vertexai.py +192 -0
langfun/core/llms/vertexai_test.py +52 -0
langfun/core/logging.py +284 -0
langfun/core/logging_test.py +125 -0
langfun/core/message.py +319 -9
langfun/core/message_test.py +190 -13
langfun/core/modalities/__init__.py +6 -2
langfun/core/modalities/audio.py +30 -0
langfun/core/modalities/audio_test.py +63 -0
langfun/core/modalities/image.py +39 -20
langfun/core/modalities/image_test.py +52 -9
langfun/core/modalities/mime.py +206 -29
langfun/core/modalities/mime_test.py +90 -9
langfun/core/modalities/ms_office.py +117 -0
langfun/core/modalities/ms_office_test.py +389 -0
langfun/core/modalities/pdf.py +22 -0
langfun/core/modalities/pdf_test.py +57 -0
langfun/core/modalities/video.py +9 -26
langfun/core/modalities/video_test.py +3 -3
langfun/core/modality.py +26 -3
langfun/core/modality_test.py +2 -2
langfun/core/sampling.py +11 -11
langfun/core/structured/__init__.py +12 -16
langfun/core/structured/completion.py +32 -5
langfun/core/structured/completion_test.py +7 -6
langfun/core/structured/description.py +2 -2
langfun/core/structured/description_test.py +3 -3
langfun/core/structured/function_generation.py +60 -27
langfun/core/structured/function_generation_test.py +72 -2
langfun/core/structured/mapping.py +97 -47
langfun/core/structured/mapping_test.py +90 -2
langfun/core/structured/parsing.py +33 -21
langfun/core/structured/parsing_test.py +53 -9
langfun/core/structured/querying.py +746 -0
langfun/core/structured/{prompting_test.py → querying_test.py} +469 -51
langfun/core/structured/schema.py +204 -97
langfun/core/structured/schema_generation.py +1 -1
langfun/core/structured/schema_test.py +130 -29
langfun/core/structured/scoring.py +125 -19
langfun/core/structured/scoring_test.py +30 -0
langfun/core/structured/tokenization.py +64 -0
langfun/core/structured/tokenization_test.py +48 -0
langfun/core/template.py +115 -1
langfun/core/template_test.py +71 -1
langfun/core/templates/conversation.py +9 -0
langfun/core/templates/conversation_test.py +4 -3
langfun/core/templates/selfplay_test.py +10 -2
langfun-0.1.2.dev202501150804.dist-info/METADATA +225 -0
langfun-0.1.2.dev202501150804.dist-info/RECORD +153 -0
{langfun-0.0.2.dev20240429.dist-info → langfun-0.1.2.dev202501150804.dist-info}/WHEEL +1 -1
langfun/core/coding/python/errors.py +0 -108
langfun/core/coding/python/errors_test.py +0 -99
langfun/core/coding/python/permissions.py +0 -90
langfun/core/coding/python/permissions_test.py +0 -86
langfun/core/structured/prompting.py +0 -238
langfun/core/text_formatting.py +0 -162
langfun/core/text_formatting_test.py +0 -47
langfun-0.0.2.dev20240429.dist-info/METADATA +0 -100
langfun-0.0.2.dev20240429.dist-info/RECORD +0 -108
{langfun-0.0.2.dev20240429.dist-info → langfun-0.1.2.dev202501150804.dist-info}/LICENSE +0 -0
{langfun-0.0.2.dev20240429.dist-info → langfun-0.1.2.dev202501150804.dist-info}/top_level.txt +0 -0

langfun/core/language_model.py CHANGED Viewed

@@ -14,20 +14,51 @@
 """Interface for language model."""
 import abc
+import contextlib
 import dataclasses
 import enum
+import functools
+import math
+import threading
 import time
-from typing import Annotated, Any, Callable, Sequence, Tuple, Type, Union
+from typing import Annotated, Any, Callable, Iterator, Optional, Sequence, Tuple, Type, Union
 from langfun.core import component
 from langfun.core import concurrent
 from langfun.core import console
 from langfun.core import message as message_lib
 import pyglove as pg
 TOKENS_PER_REQUEST = 250  # Estimated num tokens for a single request
 DEFAULT_MAX_CONCURRENCY = 1  # Use this as max concurrency if no RPM or TPM data
+#
+# Common errors during calling language models.
+#
+class LMError(RuntimeError):
+  """Base class for language model errors."""
+class RetryableLMError(LMError):
+  """Base class for LLM errors that can be solved by retrying."""
+class RateLimitError(RetryableLMError):
+  """Error for rate limit reached."""
+class TemporaryLMError(RetryableLMError):
+  """Error for temporary service issues that can be retried."""
+#
+# Language model input/output interfaces.
+#
 class LMSample(pg.Object):
   """Response candidate."""
@@ -50,12 +81,140 @@ class LMSample(pg.Object):
   ] = None
+class RetryStats(pg.Object):
+  """Retry stats, which is aggregated across multiple retry entries."""
+  num_occurences: Annotated[
+      int,
+      'Total number of retry attempts on LLM (excluding the first attempt).',
+  ] = 0
+  total_wait_interval: Annotated[
+      float, 'Total wait interval in seconds due to retry.'
+  ] = 0
+  total_call_interval: Annotated[
+      float, 'Total LLM call interval in seconds.'
+  ] = 0
+  errors: Annotated[
+      dict[str, int],
+      'A Counter of error types encountered during the retry attempts.',
+  ] = {}
+  @classmethod
+  def from_retry_entries(
+      cls, retry_entries: Sequence[concurrent.RetryEntry]
+  ) -> 'RetryStats':
+    """Creates a RetryStats from a sequence of RetryEntry."""
+    if not retry_entries:
+      return RetryStats()
+    errors = {}
+    for retry in retry_entries:
+      if retry.error is not None:
+        errors[retry.error.__class__.__name__] = (
+            errors.get(retry.error.__class__.__name__, 0) + 1
+        )
+    return RetryStats(
+        num_occurences=len(retry_entries) - 1,
+        total_wait_interval=sum(e.wait_interval for e in retry_entries),
+        total_call_interval=sum(e.call_interval for e in retry_entries),
+        errors=errors,
+    )
+  def __add__(self, other: 'RetryStats') -> 'RetryStats':
+    errors = self.errors.copy()
+    for error, count in other.errors.items():
+      errors[error] = errors.get(error, 0) + count
+    return RetryStats(
+        num_occurences=self.num_occurences + other.num_occurences,
+        total_wait_interval=self.total_wait_interval
+        + other.total_wait_interval,
+        total_call_interval=self.total_call_interval
+        + other.total_call_interval,
+        errors=errors,
+    )
+  def __radd__(self, other: 'RetryStats') -> 'RetryStats':
+    return self + other
 class LMSamplingUsage(pg.Object):
   """Usage information per completion."""
   prompt_tokens: int
   completion_tokens: int
   total_tokens: int
+  num_requests: int = 1
+  estimated_cost: Annotated[
+      float | None,
+      (
+          'Estimated cost in US dollars. If None, cost estimating is not '
+          'suppported on the model being queried.'
+      ),
+  ] = None
+  retry_stats: RetryStats = RetryStats()
+  def __bool__(self) -> bool:
+    return self.num_requests > 0
+  @property
+  def average_prompt_tokens(self) -> int:
+    """Returns the average prompt tokens per request."""
+    return self.prompt_tokens // self.num_requests
+  @property
+  def average_completion_tokens(self) -> int:
+    """Returns the average completion tokens per request."""
+    return self.completion_tokens // self.num_requests
+  @property
+  def average_total_tokens(self) -> int:
+    """Returns the average total tokens per request."""
+    return self.total_tokens // self.num_requests
+  @property
+  def average_estimated_cost(self) -> float | None:
+    """Returns the average estimated cost per request."""
+    if self.estimated_cost is None:
+      return None
+    return self.estimated_cost / self.num_requests
+  def __add__(self, other: Optional['LMSamplingUsage']) -> 'LMSamplingUsage':
+    if other is None:
+      return self
+    if self.estimated_cost is None:
+      estimated_cost = other.estimated_cost
+    elif other.estimated_cost is None:
+      estimated_cost = self.estimated_cost
+    else:
+      estimated_cost = self.estimated_cost + other.estimated_cost
+    return LMSamplingUsage(
+        prompt_tokens=self.prompt_tokens + other.prompt_tokens,
+        completion_tokens=self.completion_tokens + other.completion_tokens,
+        total_tokens=self.total_tokens + other.total_tokens,
+        num_requests=self.num_requests + other.num_requests,
+        estimated_cost=estimated_cost,
+        retry_stats=self.retry_stats + other.retry_stats,
+    )
+  def __radd__(self, other: Optional['LMSamplingUsage']) -> 'LMSamplingUsage':
+    return self + other
+class UsageNotAvailable(LMSamplingUsage):
+  """Usage information not available."""
+  prompt_tokens: pg.typing.Int(0).freeze()       # pytype: disable=invalid-annotation
+  completion_tokens: pg.typing.Int(0).freeze()   # pytype: disable=invalid-annotation
+  total_tokens: pg.typing.Int(0).freeze()        # pytype: disable=invalid-annotation
+  estimated_cost: pg.typing.Float(default=None, is_noneable=True).freeze()    # pytype: disable=invalid-annotation
+  def __add__(self, other: Optional['LMSamplingUsage']) -> 'UsageNotAvailable':
+    if other is None:
+      return self
+    return UsageNotAvailable(
+        num_requests=self.num_requests + other.num_requests
+    )
+  def __radd__(self, other: Optional['LMSamplingUsage']) -> 'UsageNotAvailable':
+    return self + other
 class LMSamplingResult(pg.Object):
@@ -70,9 +229,14 @@ class LMSamplingResult(pg.Object):
   ] = []
   usage: Annotated[
-      LMSamplingUsage | None,
+      LMSamplingUsage,
       'Usage information. Currently only OpenAI models are supported.',
-  ] = None
+  ] = UsageNotAvailable()
+  is_cached: Annotated[
+      bool,
+      'Whether the result is from cache or not.'
+  ] = False
 class LMSamplingOptions(component.Component):
@@ -166,6 +330,11 @@ class LMScoringResult(pg.Object):
       float,
       'The log likelyhood of the requested completion towards the prompt.',
   ]
+  gradients: Annotated[
+      Any | None,
+      '(Optional) gradients from the score method, w.r.t.' +
+      ' prompt.metadata.weights.',
+  ] = None
 class LMCache(pg.Object):
@@ -180,6 +349,7 @@ class LMCache(pg.Object):
     num_hit_expires: int = 0
     num_misses: int = 0
     num_updates: int = 0
+    num_deletes: int = 0
   @abc.abstractmethod
   def get(
@@ -197,6 +367,15 @@ class LMCache(pg.Object):
   ) -> None:
     """Puts the result of a prompt generated by a language model in cache."""
+  @abc.abstractmethod
+  def delete(
+      self,
+      lm: 'LanguageModel',
+      prompt: message_lib.Message,
+      seed: int,
+  ) -> bool:
+    """Deletes the result of a prompt generated by a language model in cache."""
   @property
   @abc.abstractmethod
   def stats(self) -> Stats:
@@ -290,6 +469,15 @@ class LanguageModel(component.Component):
       )
   ] = True
+  max_retry_interval: Annotated[
+      int,
+      (
+          'The max retry interval in seconds. This is useful when the retry '
+          'interval is exponential, to avoid the wait time to grow '
+          'exponentially.'
+      )
+  ] = 300
   debug: Annotated[
       bool | LMDebugMode,
       (
@@ -303,7 +491,10 @@ class LanguageModel(component.Component):
   def __init__(self, *args, **kwargs) -> None:
     """Overrides __init__ to pass through **kwargs to sampling options."""
-    sampling_options = kwargs.pop('sampling_options', LMSamplingOptions())
+    sampling_options = kwargs.pop(
+        'sampling_options',
+        pg.clone(self.__schema__.fields['sampling_options'].default_value)
+    )
     sampling_options_delta = {}
     for k, v in kwargs.items():
@@ -361,12 +552,13 @@ class LanguageModel(component.Component):
           response = sample.response
           response.metadata.score = sample.score
           response.metadata.logprobs = sample.logprobs
+          response.metadata.is_cached = result.is_cached
           # NOTE(daiyip): Current usage is computed at per-result level,
           # which is accurate when n=1. For n > 1, we average the usage across
           # multiple samples.
           usage = result.usage
-          if len(result.samples) == 1 or usage is None:
+          if len(result.samples) == 1 or isinstance(usage, UsageNotAvailable):
             response.metadata.usage = usage
           else:
             n = len(result.samples)
@@ -374,8 +566,29 @@ class LanguageModel(component.Component):
                 prompt_tokens=usage.prompt_tokens // n,
                 completion_tokens=usage.completion_tokens // n,
                 total_tokens=usage.total_tokens // n,
+                estimated_cost=(
+                    usage.estimated_cost / n if usage.estimated_cost else None
+                ),
+                retry_stats=RetryStats(
+                    num_occurences=usage.retry_stats.num_occurences // n,
+                    total_wait_interval=usage.retry_stats.total_wait_interval
+                    / n,
+                    total_call_interval=usage.retry_stats.total_call_interval
+                    / n,
+                    errors={
+                        error: count // n
+                        for error, count in usage.retry_stats.errors.items()
+                    },
+                ),
             )
+          # Track usage.
+          trackers = component.context_value('__usage_trackers__', [])
+          if trackers:
+            model_id = self.model_id
+            for tracker in trackers:
+              tracker.track(model_id, usage, result.is_cached)
           # Track the prompt for corresponding response.
           response.source = prompt
@@ -403,7 +616,9 @@ class LanguageModel(component.Component):
         request_to_result_index[len(requests)] = i
         requests.append(prompt)
       else:
-        results[i] = r.clone()
+        result = r.clone()
+        assert result.is_cached, result
+        results[i] = result
     # Sample non-cache-hit prompts.
     if requests:
@@ -420,8 +635,12 @@ class LanguageModel(component.Component):
           sample.response.set('cache_seed', cache_seed)
         if cache_seed is not None:
-          self.cache.put(self, prompt, result.clone(), seed=cache_seed)
+          self.cache.put(
+              self,
+              prompt,
+              result.clone(override=dict(is_cached=True)),
+              seed=cache_seed
+          )
     return results  # pytype: disable=bad-return-type
   @abc.abstractmethod
@@ -433,16 +652,16 @@ class LanguageModel(component.Component):
   def _parallel_execute_with_currency_control(
       self,
-      action: Callable[..., Any],
+      action: Callable[..., LMSamplingResult],
       inputs: Sequence[Any],
       retry_on_errors: Union[
           None,
-          Union[Type[Exception], Tuple[Type[Exception], str]],
-          Sequence[Union[Type[Exception], Tuple[Type[Exception], str]]],
-      ] = None,
-  ) -> Any:
+          Union[Type[BaseException], Tuple[Type[BaseException], str]],
+          Sequence[Union[Type[BaseException], Tuple[Type[BaseException], str]]],
+      ] = RetryableLMError,
+  ) -> list[Any]:
     """Helper method for subclasses for implementing _sample."""
-    return concurrent.concurrent_execute(
+    executed_jobs = concurrent.concurrent_execute(
         action,
         inputs,
         executor=self.resource_id if self.max_concurrency else None,
@@ -451,7 +670,16 @@ class LanguageModel(component.Component):
         max_attempts=self.max_attempts,
         retry_interval=self.retry_interval,
         exponential_backoff=self.exponential_backoff,
+        max_retry_interval=self.max_retry_interval,
+        return_jobs=True,
     )
+    for job in executed_jobs:
+      if isinstance(job.result, LMSamplingResult):
+        job.result.usage.rebind(
+            retry_stats=RetryStats.from_retry_entries(job.retry_entries),
+            skip_notification=True,
+        )
+    return [job.result for job in executed_jobs]
   def __call__(
       self, prompt: message_lib.Message, *, cache_seed: int = 0, **kwargs
@@ -479,7 +707,7 @@ class LanguageModel(component.Component):
       prompt: message_lib.Message,
       response: message_lib.Message,
       call_counter: int,
-      usage: LMSamplingUsage | None,
+      usage: LMSamplingUsage,
       elapse: float,
   ) -> None:
     """Outputs debugging information."""
@@ -497,12 +725,13 @@ class LanguageModel(component.Component):
       self._debug_response(response, call_counter, usage, elapse)
   def _debug_model_info(
-      self, call_counter: int, usage: LMSamplingUsage | None) -> None:
+      self, call_counter: int, usage: LMSamplingUsage) -> None:
     """Outputs debugging information about the model."""
     title_suffix = ''
-    if usage and usage.total_tokens != 0:
-      title_suffix = console.colored(
-          f' (total {usage.total_tokens} tokens)', 'red')
+    if usage.total_tokens != 0:
+      title_suffix = pg.colored(
+          f' (total {usage.total_tokens} tokens)', 'red'
+      )
     console.write(
         self.format(compact=True, use_inferred=True),
@@ -514,12 +743,12 @@ class LanguageModel(component.Component):
       self,
       prompt: message_lib.Message,
       call_counter: int,
-      usage: LMSamplingUsage | None,
+      usage: LMSamplingUsage,
   ) -> None:
     """Outputs debugging information about the prompt."""
     title_suffix = ''
-    if usage and usage.prompt_tokens != 0:
-      title_suffix = console.colored(f' ({usage.prompt_tokens} tokens)', 'red')
+    if usage.prompt_tokens != 0:
+      title_suffix = pg.colored(f' ({usage.prompt_tokens} tokens)', 'red')
     console.write(
         # We use metadata 'formatted_text' for scenarios where the prompt text
@@ -542,15 +771,15 @@ class LanguageModel(component.Component):
       self,
       response: message_lib.Message,
       call_counter: int,
-      usage: LMSamplingUsage | None,
+      usage: LMSamplingUsage,
       elapse: float
   ) -> None:
     """Outputs debugging information about the response."""
     title_suffix = ' ('
-    if usage and usage.completion_tokens != 0:
+    if usage.completion_tokens != 0:
       title_suffix += f'{usage.completion_tokens} tokens '
     title_suffix += f'in {elapse:.2f} seconds)'
-    title_suffix = console.colored(title_suffix, 'red')
+    title_suffix = pg.colored(title_suffix, 'red')
     console.write(
         str(response) + '\n',
@@ -560,12 +789,19 @@ class LanguageModel(component.Component):
   def score(
       self,
-      prompt: str | message_lib.Message,
+      prompt: str | message_lib.Message | list[message_lib.Message],
       completions: list[str | message_lib.Message],
       **kwargs,
   ) -> list[LMScoringResult]:
     """Scores the given prompt."""
-    prompt = message_lib.UserMessage.from_value(prompt)
+    if isinstance(prompt, list):
+      if len(prompt) != len(completions):
+        raise ValueError(
+            'prompt and completions must have the same length.'
+        )
+      prompt = [message_lib.UserMessage.from_value(p) for p in prompt]
+    else:
+      prompt = message_lib.UserMessage.from_value(prompt)
     completions = [message_lib.UserMessage.from_value(c) for c in completions]
     call_counter = self._call_counter
@@ -581,7 +817,8 @@ class LanguageModel(component.Component):
       return scoring_results
   def _score(
-      self, prompt: message_lib.Message, completions: list[message_lib.Message]
+      self, prompt: message_lib.Message | list[message_lib.Message],
+      completions: list[message_lib.Message]
   ) -> list[LMScoringResult]:
     """Subclass to implement."""
     raise NotImplementedError(
@@ -590,7 +827,7 @@ class LanguageModel(component.Component):
   def _debug_score(
       self,
-      prompt: message_lib.Message,
+      prompt: message_lib.Message | list[message_lib.Message],
       completions: list[message_lib.Message],
       scoring_results: list[LMScoringResult],
       call_counter: int,
@@ -601,7 +838,7 @@ class LanguageModel(component.Component):
       debug = LMDebugMode.ALL if debug else LMDebugMode.NONE
     if debug & LMDebugMode.INFO:
-      self._debug_model_info(call_counter, None)
+      self._debug_model_info(call_counter, UsageNotAvailable())
     if debug & LMDebugMode.PROMPT:
       console.write(
@@ -609,15 +846,19 @@ class LanguageModel(component.Component):
           title=f'\n[{call_counter}] SCORING LM WITH PROMPT:',
           color='green',
       )
-      referred_modalities = prompt.referred_modalities()
-      if referred_modalities:
-        console.write(
-            pg.object_utils.kvlist_str(
-                [(k, repr(v), None) for k, v in referred_modalities.items()]
-            ),
-            title=f'\n[{call_counter}] MODALITY OBJECTS SENT TO LM:',
-            color='green',
-        )
+      if isinstance(prompt, list):
+        referred_modalities_lst = [p.referred_modalities() for p in prompt]
+      else:
+        referred_modalities_lst = [prompt.referred_modalities(),]
+      if referred_modalities_lst:
+        for referred_modalities in referred_modalities_lst:
+          console.write(
+              pg.object_utils.kvlist_str(
+                  [(k, repr(v), None) for k, v in referred_modalities.items()]
+              ),
+              title=f'\n[{call_counter}] MODALITY OBJECTS SENT TO LM:',
+              color='green',
+          )
     if debug & LMDebugMode.RESPONSE:
       console.write(
@@ -638,6 +879,72 @@ class LanguageModel(component.Component):
             color='blue',
         )
+  def tokenize(
+      self,
+      prompt: str | message_lib.Message,
+      **kwargs,
+  ) -> list[tuple[str | bytes, int]]:
+    """Tokenizes the given prompt."""
+    prompt = message_lib.UserMessage.from_value(prompt)
+    call_counter = self._call_counter
+    self._call_counter += 1
+    with component.context(override_attrs=True, **kwargs):
+      request_start = time.time()
+      tokens = self._tokenize(prompt)
+      elapse = time.time() - request_start
+      self._debug_tokenize(prompt, tokens, call_counter, elapse)
+      return tokens
+  def _tokenize(
+      self, prompt: message_lib.Message
+  ) -> list[tuple[str | bytes, int]]:
+    """Subclass to implement."""
+    raise NotImplementedError(
+        f'{self.__class__.__name__} does not support tokenization.'
+    )
+  def _debug_tokenize(
+      self,
+      prompt: message_lib.Message,
+      tokens: list[tuple[str | bytes, int]],
+      call_counter: int,
+      elapse: float,
+  ):
+    debug = self.debug
+    if isinstance(debug, bool):
+      debug = LMDebugMode.ALL if debug else LMDebugMode.NONE
+    if debug & LMDebugMode.INFO:
+      self._debug_model_info(call_counter, UsageNotAvailable())
+    if debug & LMDebugMode.PROMPT:
+      console.write(
+          prompt,
+          title=f'\n[{call_counter}] PROMPT TO TOKENIZE:',
+          color='green',
+      )
+      referred_modalities_lst = [prompt.referred_modalities(),]
+      if referred_modalities_lst:
+        for referred_modalities in referred_modalities_lst:
+          console.write(
+              pg.object_utils.kvlist_str(
+                  [(k, repr(v), None) for k, v in referred_modalities.items()]
+              ),
+              title=f'\n[{call_counter}] MODALITY OBJECTS SENT TO LM:',
+              color='green',
+          )
+    if debug & LMDebugMode.RESPONSE:
+      console.write(
+          tokens,
+          title=(
+              f'\n[{call_counter}] {len(tokens)} TOKENS RETURNED '
+              f'(in {elapse:.2f} seconds):'
+          ),
+          color='blue',
+      )
   def rate_to_max_concurrency(
       self, requests_per_min: float = 0, tokens_per_min: float = 0
   ) -> int:
@@ -648,3 +955,234 @@ class LanguageModel(component.Component):
       return max(int(requests_per_min / 60), 1)  # Max concurrency can't be zero
     else:
       return DEFAULT_MAX_CONCURRENCY  # Default of 1
+class UsageSummary(pg.Object, pg.views.HtmlTreeView.Extension):
+  """Usage sumary."""
+  class AggregatedUsage(pg.Object):
+    """Aggregated usage."""
+    total: LMSamplingUsage = LMSamplingUsage(0, 0, 0, 0, 0.0)
+    breakdown: dict[str, LMSamplingUsage] = {}
+    def __bool__(self) -> bool:
+      """Returns True if the usage is non-empty."""
+      return bool(self.breakdown)
+    def add(
+        self,
+        model_id: str,
+        usage: LMSamplingUsage,
+    ) -> None:
+      """Adds an entry to the breakdown."""
+      aggregated = self.breakdown.get(model_id, None)
+      with pg.notify_on_change(False):
+        self.breakdown[model_id] = usage + aggregated
+        self.rebind(
+            total=self.total + usage,
+            raise_on_no_change=False
+        )
+    def merge(self, other: 'UsageSummary.AggregatedUsage') -> None:
+      """Merges the usage summary."""
+      with pg.notify_on_change(False):
+        for model_id, usage in other.breakdown.items():
+          self.add(model_id, usage)
+  def _on_bound(self):
+    super()._on_bound()
+    self._usage_badge = None
+    self._lock = threading.Lock()
+  @property
+  def total(self) -> LMSamplingUsage:
+    return self.cached.total + self.uncached.total
+  def add(self, model_id: str, usage: LMSamplingUsage, is_cached: bool):
+    """Updates the usage summary."""
+    with self._lock:
+      if is_cached:
+        usage.rebind(estimated_cost=0.0, skip_notification=True)
+        self.cached.add(model_id, usage)
+      else:
+        self.uncached.add(model_id, usage)
+      self._update_view()
+  def merge(self, other: 'UsageSummary', as_cached: bool = False) -> None:
+    """Aggregates the usage summary.
+    Args:
+      other: The usage summary to merge.
+      as_cached: Whether to merge the usage summary as cached.
+    """
+    with self._lock:
+      self.cached.merge(other.cached)
+      if as_cached:
+        self.cached.merge(other.uncached)
+      else:
+        self.uncached.merge(other.uncached)
+      self._update_view()
+  def _sym_nondefault(self) -> dict[str, Any]:
+    """Overrides nondefault values so volatile values are not included."""
+    return dict()
+  #
+  # Html views for the usage summary.
+  #
+  def _update_view(self):
+    if self._usage_badge is not None:
+      self._usage_badge.update(
+          self._badge_text(),
+          tooltip=pg.format(
+              self, verbose=False, custom_format=self._tooltip_format
+          ),
+          styles=dict(color=self._badge_color()),
+      )
+  def _badge_text(self) -> str:
+    if self.total.estimated_cost is not None:
+      return f'{self.total.estimated_cost:.3f}'
+    return '0.000'
+  def _badge_color(self) -> str | None:
+    if self.total.estimated_cost is None or self.total.estimated_cost < 1.0:
+      return None
+    # Step 1: The normal cost range is around 1e-3 to 1e5.
+    # Therefore we normalize the log10 value from [-3, 5] to [0, 1].
+    normalized_value = (math.log10(self.total.estimated_cost) + 3) / (5 + 3)
+    # Step 2: Interpolate between green and red
+    red = int(255 * normalized_value)
+    green = int(255 * (1 - normalized_value))
+    return f'rgb({red}, {green}, 0)'
+  def _tooltip_format(self, v, root_indent):
+    del root_indent
+    if isinstance(v, int):
+      return f'{v:,}'
+    if isinstance(v, float):
+      return f'{v:,.3f}'
+    return None
+  def _html_tree_view(
+      self,
+      *,
+      view: pg.views.HtmlTreeView,
+      extra_flags: dict[str, Any] | None = None,
+      **kwargs
+  ) -> pg.Html:
+    extra_flags = extra_flags or {}
+    as_badge = extra_flags.pop('as_badge', False)
+    interactive = extra_flags.get('interactive', True)
+    if as_badge:
+      usage_badge = self._usage_badge
+      if usage_badge is None:
+        usage_badge = pg.views.html.controls.Badge(
+            self._badge_text(),
+            tooltip=pg.format(
+                self, custom_format=self._tooltip_format, verbose=False
+            ),
+            css_classes=['usage-summary'],
+            styles=dict(color=self._badge_color()),
+            interactive=True,
+        )
+        if interactive:
+          self._usage_badge = usage_badge
+      return usage_badge.to_html()
+    return super()._html_tree_view(
+        view=view,
+        extra_flags=extra_flags,
+        **kwargs
+    )
+  @classmethod
+  @functools.cache
+  def _html_tree_view_css_styles(cls) -> list[str]:
+    return super()._html_tree_view_css_styles() + [
+        """
+        .usage-summary.label {
+            display: inline-flex;
+            border-radius: 5px;
+            padding: 5px;
+            background-color: #f1f1f1;
+            color: #CCC;
+        }
+        .usage-summary.label::before {
+            content: '$';
+        }
+        """
+    ]
+pg.members(
+    dict(
+        cached=(
+            pg.typing.Object(
+                UsageSummary.AggregatedUsage,
+                default=UsageSummary.AggregatedUsage()
+            ),
+            'Aggregated usages for cached LLM calls.'
+        ),
+        uncached=(
+            pg.typing.Object(
+                UsageSummary.AggregatedUsage,
+                default=UsageSummary.AggregatedUsage()
+            ),
+            'Aggregated usages for uncached LLM calls.'
+        ),
+    )
+)(UsageSummary)
+class _UsageTracker:
+  """Usage tracker."""
+  def __init__(self, model_ids: set[str] | None):
+    self.model_ids = model_ids
+    self.usage_summary = UsageSummary()
+  def track(self, model_id: str, usage: LMSamplingUsage, is_cached: bool):
+    if self.model_ids is None or model_id in self.model_ids:
+      self.usage_summary.add(model_id, usage, is_cached)
+@contextlib.contextmanager
+def track_usages(
+    *lm: Union[str, LanguageModel]
+) -> Iterator[UsageSummary]:
+  """Context manager to track the usages of all language models in scope.
+  `lf.track_usages` works with threads spawned by `lf.concurrent_map` and
+  `lf.concurrent_execute`.
+  Example:
+    ```
+    lm = lf.llms.GeminiPro1()
+    with lf.track_usages() as usages:
+      # invoke any code that will call LLMs.
+    print(usages[lm.model_id])
+    ```
+  Args:
+    *lm: The language model(s) to track. If None, track all models in scope.
+  Yields:
+    A dictionary of model ID to usage. If a model does not supports usage
+    counting, the dict entry will be None.
+  """
+  if not lm:
+    model_ids = None
+  else:
+    model_ids = [m.model_id if isinstance(m, LanguageModel) else m for m in lm]
+  trackers = component.context_value('__usage_trackers__', [])
+  tracker = _UsageTracker(set(model_ids) if model_ids else None)
+  with component.context(__usage_trackers__=trackers + [tracker]):
+    try:
+      yield tracker.usage_summary
+    finally:
+      pass

langfun 0.0.2.dev20240429__py3-none-any.whl → 0.1.2.dev202501150804__py3-none-any.whl

langfun 0.0.2.dev20240429py3-none-any.whl → 0.1.2.dev202501150804py3-none-any.whl