PyPI - langfun - Versions diffs - 0.0.2.dev20240330__py3-none-any.whl → 0.1.2.dev202501140804__py3-none-any.whl - Mend

langfun 0.0.2.dev20240330py3-none-any.whl → 0.1.2.dev202501140804py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (145) hide show

langfun/__init__.py +22 -2
langfun/core/__init__.py +17 -5
langfun/core/agentic/__init__.py +30 -0
langfun/core/agentic/action.py +854 -0
langfun/core/agentic/action_eval.py +150 -0
langfun/core/agentic/action_eval_test.py +109 -0
langfun/core/agentic/action_test.py +136 -0
langfun/core/coding/python/__init__.py +5 -11
langfun/core/coding/python/correction.py +37 -28
langfun/core/coding/python/correction_test.py +29 -3
langfun/core/coding/python/execution.py +40 -216
langfun/core/coding/python/execution_test.py +29 -89
langfun/core/coding/python/generation.py +21 -11
langfun/core/coding/python/generation_test.py +2 -2
langfun/core/coding/python/parsing.py +108 -193
langfun/core/coding/python/parsing_test.py +2 -105
langfun/core/component.py +69 -2
langfun/core/component_test.py +54 -0
langfun/core/concurrent.py +414 -117
langfun/core/concurrent_test.py +111 -24
langfun/core/console.py +18 -5
langfun/core/console_test.py +17 -0
langfun/core/eval/__init__.py +17 -0
langfun/core/eval/base.py +767 -140
langfun/core/eval/base_test.py +238 -53
langfun/core/eval/matching.py +80 -76
langfun/core/eval/matching_test.py +19 -9
langfun/core/eval/patching.py +130 -0
langfun/core/eval/patching_test.py +170 -0
langfun/core/eval/scoring.py +37 -28
langfun/core/eval/scoring_test.py +21 -3
langfun/core/eval/v2/__init__.py +42 -0
langfun/core/eval/v2/checkpointing.py +380 -0
langfun/core/eval/v2/checkpointing_test.py +228 -0
langfun/core/eval/v2/eval_test_helper.py +136 -0
langfun/core/eval/v2/evaluation.py +725 -0
langfun/core/eval/v2/evaluation_test.py +180 -0
langfun/core/eval/v2/example.py +305 -0
langfun/core/eval/v2/example_test.py +128 -0
langfun/core/eval/v2/experiment.py +1048 -0
langfun/core/eval/v2/experiment_test.py +433 -0
langfun/core/eval/v2/metric_values.py +156 -0
langfun/core/eval/v2/metric_values_test.py +80 -0
langfun/core/eval/v2/metrics.py +357 -0
langfun/core/eval/v2/metrics_test.py +203 -0
langfun/core/eval/v2/progress.py +348 -0
langfun/core/eval/v2/progress_test.py +82 -0
langfun/core/eval/v2/progress_tracking.py +210 -0
langfun/core/eval/v2/progress_tracking_test.py +66 -0
langfun/core/eval/v2/reporting.py +270 -0
langfun/core/eval/v2/reporting_test.py +158 -0
langfun/core/eval/v2/runners.py +488 -0
langfun/core/eval/v2/runners_test.py +334 -0
langfun/core/langfunc.py +3 -21
langfun/core/langfunc_test.py +26 -8
langfun/core/language_model.py +686 -48
langfun/core/language_model_test.py +681 -44
langfun/core/llms/__init__.py +100 -12
langfun/core/llms/anthropic.py +488 -0
langfun/core/llms/anthropic_test.py +235 -0
langfun/core/llms/cache/base.py +21 -2
langfun/core/llms/cache/in_memory.py +13 -0
langfun/core/llms/cache/in_memory_test.py +88 -28
langfun/core/llms/compositional.py +101 -0
langfun/core/llms/compositional_test.py +73 -0
langfun/core/llms/deepseek.py +117 -0
langfun/core/llms/deepseek_test.py +61 -0
langfun/core/llms/fake.py +39 -26
langfun/core/llms/fake_test.py +136 -11
langfun/core/llms/gemini.py +507 -0
langfun/core/llms/gemini_test.py +195 -0
langfun/core/llms/google_genai.py +62 -218
langfun/core/llms/google_genai_test.py +9 -197
langfun/core/llms/groq.py +276 -0
langfun/core/llms/groq_test.py +64 -0
langfun/core/llms/llama_cpp.py +15 -40
langfun/core/llms/llama_cpp_test.py +4 -30
langfun/core/llms/openai.py +436 -226
langfun/core/llms/openai_compatible.py +179 -0
langfun/core/llms/openai_compatible_test.py +495 -0
langfun/core/llms/openai_test.py +35 -174
langfun/core/llms/rest.py +113 -0
langfun/core/llms/rest_test.py +111 -0
langfun/core/llms/vertexai.py +192 -0
langfun/core/llms/vertexai_test.py +52 -0
langfun/core/logging.py +284 -0
langfun/core/logging_test.py +125 -0
langfun/core/message.py +319 -9
langfun/core/message_test.py +190 -13
langfun/core/modalities/__init__.py +6 -2
langfun/core/modalities/audio.py +30 -0
langfun/core/modalities/audio_test.py +63 -0
langfun/core/modalities/image.py +39 -20
langfun/core/modalities/image_test.py +52 -9
langfun/core/modalities/mime.py +206 -29
langfun/core/modalities/mime_test.py +90 -9
langfun/core/modalities/ms_office.py +117 -0
langfun/core/modalities/ms_office_test.py +389 -0
langfun/core/modalities/pdf.py +22 -0
langfun/core/modalities/pdf_test.py +57 -0
langfun/core/modalities/video.py +9 -23
langfun/core/modalities/video_test.py +3 -3
langfun/core/modality.py +26 -3
langfun/core/modality_test.py +2 -2
langfun/core/sampling.py +11 -11
langfun/core/structured/__init__.py +15 -16
langfun/core/structured/completion.py +32 -5
langfun/core/structured/completion_test.py +9 -8
langfun/core/structured/description.py +2 -2
langfun/core/structured/description_test.py +3 -3
langfun/core/structured/function_generation.py +278 -0
langfun/core/structured/function_generation_test.py +399 -0
langfun/core/structured/mapping.py +150 -46
langfun/core/structured/mapping_test.py +105 -0
langfun/core/structured/parsing.py +33 -21
langfun/core/structured/parsing_test.py +71 -22
langfun/core/structured/querying.py +746 -0
langfun/core/structured/{prompting_test.py → querying_test.py} +545 -60
langfun/core/structured/schema.py +208 -99
langfun/core/structured/schema_generation.py +1 -1
langfun/core/structured/schema_generation_test.py +2 -2
langfun/core/structured/schema_test.py +133 -34
langfun/core/structured/scoring.py +125 -19
langfun/core/structured/scoring_test.py +30 -0
langfun/core/structured/tokenization.py +64 -0
langfun/core/structured/tokenization_test.py +48 -0
langfun/core/template.py +240 -11
langfun/core/template_test.py +146 -1
langfun/core/templates/conversation.py +9 -0
langfun/core/templates/conversation_test.py +4 -3
langfun/core/templates/selfplay_test.py +14 -2
langfun-0.1.2.dev202501140804.dist-info/METADATA +225 -0
langfun-0.1.2.dev202501140804.dist-info/RECORD +153 -0
{langfun-0.0.2.dev20240330.dist-info → langfun-0.1.2.dev202501140804.dist-info}/WHEEL +1 -1
langfun/core/coding/python/errors.py +0 -108
langfun/core/coding/python/errors_test.py +0 -99
langfun/core/coding/python/permissions.py +0 -90
langfun/core/coding/python/permissions_test.py +0 -86
langfun/core/structured/prompting.py +0 -217
langfun/core/text_formatting.py +0 -162
langfun/core/text_formatting_test.py +0 -47
langfun-0.0.2.dev20240330.dist-info/METADATA +0 -99
langfun-0.0.2.dev20240330.dist-info/RECORD +0 -102
{langfun-0.0.2.dev20240330.dist-info → langfun-0.1.2.dev202501140804.dist-info}/LICENSE +0 -0
{langfun-0.0.2.dev20240330.dist-info → langfun-0.1.2.dev202501140804.dist-info}/top_level.txt +0 -0

langfun/core/language_model.py CHANGED Viewed

@@ -14,16 +14,50 @@
 """Interface for language model."""
 import abc
+import contextlib
 import dataclasses
 import enum
+import functools
+import math
+import threading
 import time
-from typing import Annotated, Any, Callable, Sequence, Tuple, Type, Union
+from typing import Annotated, Any, Callable, Iterator, Optional, Sequence, Tuple, Type, Union
 from langfun.core import component
 from langfun.core import concurrent
 from langfun.core import console
 from langfun.core import message as message_lib
 import pyglove as pg
+TOKENS_PER_REQUEST = 250  # Estimated num tokens for a single request
+DEFAULT_MAX_CONCURRENCY = 1  # Use this as max concurrency if no RPM or TPM data
+#
+# Common errors during calling language models.
+#
+class LMError(RuntimeError):
+  """Base class for language model errors."""
+class RetryableLMError(LMError):
+  """Base class for LLM errors that can be solved by retrying."""
+class RateLimitError(RetryableLMError):
+  """Error for rate limit reached."""
+class TemporaryLMError(RetryableLMError):
+  """Error for temporary service issues that can be retried."""
+#
+# Language model input/output interfaces.
+#
 class LMSample(pg.Object):
   """Response candidate."""
@@ -47,6 +81,142 @@ class LMSample(pg.Object):
   ] = None
+class RetryStats(pg.Object):
+  """Retry stats, which is aggregated across multiple retry entries."""
+  num_occurences: Annotated[
+      int,
+      'Total number of retry attempts on LLM (excluding the first attempt).',
+  ] = 0
+  total_wait_interval: Annotated[
+      float, 'Total wait interval in seconds due to retry.'
+  ] = 0
+  total_call_interval: Annotated[
+      float, 'Total LLM call interval in seconds.'
+  ] = 0
+  errors: Annotated[
+      dict[str, int],
+      'A Counter of error types encountered during the retry attempts.',
+  ] = {}
+  @classmethod
+  def from_retry_entries(
+      cls, retry_entries: Sequence[concurrent.RetryEntry]
+  ) -> 'RetryStats':
+    """Creates a RetryStats from a sequence of RetryEntry."""
+    if not retry_entries:
+      return RetryStats()
+    errors = {}
+    for retry in retry_entries:
+      if retry.error is not None:
+        errors[retry.error.__class__.__name__] = (
+            errors.get(retry.error.__class__.__name__, 0) + 1
+        )
+    return RetryStats(
+        num_occurences=len(retry_entries) - 1,
+        total_wait_interval=sum(e.wait_interval for e in retry_entries),
+        total_call_interval=sum(e.call_interval for e in retry_entries),
+        errors=errors,
+    )
+  def __add__(self, other: 'RetryStats') -> 'RetryStats':
+    errors = self.errors.copy()
+    for error, count in other.errors.items():
+      errors[error] = errors.get(error, 0) + count
+    return RetryStats(
+        num_occurences=self.num_occurences + other.num_occurences,
+        total_wait_interval=self.total_wait_interval
+        + other.total_wait_interval,
+        total_call_interval=self.total_call_interval
+        + other.total_call_interval,
+        errors=errors,
+    )
+  def __radd__(self, other: 'RetryStats') -> 'RetryStats':
+    return self + other
+class LMSamplingUsage(pg.Object):
+  """Usage information per completion."""
+  prompt_tokens: int
+  completion_tokens: int
+  total_tokens: int
+  num_requests: int = 1
+  estimated_cost: Annotated[
+      float | None,
+      (
+          'Estimated cost in US dollars. If None, cost estimating is not '
+          'suppported on the model being queried.'
+      ),
+  ] = None
+  retry_stats: RetryStats = RetryStats()
+  def __bool__(self) -> bool:
+    return self.num_requests > 0
+  @property
+  def average_prompt_tokens(self) -> int:
+    """Returns the average prompt tokens per request."""
+    return self.prompt_tokens // self.num_requests
+  @property
+  def average_completion_tokens(self) -> int:
+    """Returns the average completion tokens per request."""
+    return self.completion_tokens // self.num_requests
+  @property
+  def average_total_tokens(self) -> int:
+    """Returns the average total tokens per request."""
+    return self.total_tokens // self.num_requests
+  @property
+  def average_estimated_cost(self) -> float | None:
+    """Returns the average estimated cost per request."""
+    if self.estimated_cost is None:
+      return None
+    return self.estimated_cost / self.num_requests
+  def __add__(self, other: Optional['LMSamplingUsage']) -> 'LMSamplingUsage':
+    if other is None:
+      return self
+    if self.estimated_cost is None:
+      estimated_cost = other.estimated_cost
+    elif other.estimated_cost is None:
+      estimated_cost = self.estimated_cost
+    else:
+      estimated_cost = self.estimated_cost + other.estimated_cost
+    return LMSamplingUsage(
+        prompt_tokens=self.prompt_tokens + other.prompt_tokens,
+        completion_tokens=self.completion_tokens + other.completion_tokens,
+        total_tokens=self.total_tokens + other.total_tokens,
+        num_requests=self.num_requests + other.num_requests,
+        estimated_cost=estimated_cost,
+        retry_stats=self.retry_stats + other.retry_stats,
+    )
+  def __radd__(self, other: Optional['LMSamplingUsage']) -> 'LMSamplingUsage':
+    return self + other
+class UsageNotAvailable(LMSamplingUsage):
+  """Usage information not available."""
+  prompt_tokens: pg.typing.Int(0).freeze()       # pytype: disable=invalid-annotation
+  completion_tokens: pg.typing.Int(0).freeze()   # pytype: disable=invalid-annotation
+  total_tokens: pg.typing.Int(0).freeze()        # pytype: disable=invalid-annotation
+  estimated_cost: pg.typing.Float(default=None, is_noneable=True).freeze()    # pytype: disable=invalid-annotation
+  def __add__(self, other: Optional['LMSamplingUsage']) -> 'UsageNotAvailable':
+    if other is None:
+      return self
+    return UsageNotAvailable(
+        num_requests=self.num_requests + other.num_requests
+    )
+  def __radd__(self, other: Optional['LMSamplingUsage']) -> 'UsageNotAvailable':
+    return self + other
 class LMSamplingResult(pg.Object):
   """Language model response."""
@@ -58,19 +228,39 @@ class LMSamplingResult(pg.Object):
       ),
   ] = []
+  usage: Annotated[
+      LMSamplingUsage,
+      'Usage information. Currently only OpenAI models are supported.',
+  ] = UsageNotAvailable()
+  is_cached: Annotated[
+      bool,
+      'Whether the result is from cache or not.'
+  ] = False
 class LMSamplingOptions(component.Component):
   """Language model sampling options."""
   temperature: Annotated[
-      float,
+      float | None,
       (
           'Model temperature, which is usually between 0 and 1.0. '
-          'OpenAI models have temperature range from 0.0 to 2.0.'
+          'OpenAI models have temperature range from 0.0 to 2.0. '
+          'If None (default), honor the model\'s default behavior. '
       )
-  ] = 0.0
-  max_tokens: Annotated[int, 'Per example max tokens to generate.'] = 1024
+  ] = None
+  max_tokens: Annotated[
+      int | None,
+      (
+          'Per example max tokens to generate. '
+          'If None, use the model default.'
+      )
+  ] = None
   n: Annotated[int | None, 'Max number of samples to return.'] = 1
   top_k: Annotated[
       int | None,
       (
@@ -78,6 +268,7 @@ class LMSamplingOptions(component.Component):
           'Not applicable to OpenAI models.'
       )
   ] = 40
   top_p: Annotated[
       float | None,
       (
@@ -86,6 +277,7 @@ class LMSamplingOptions(component.Component):
           '`top_p` but not both.'
       ),
   ] = None
   stop: Annotated[
       list[str] | None,
       (
@@ -95,9 +287,11 @@ class LMSamplingOptions(component.Component):
           '`Model:` is reached.'
       ),
   ] = None
   random_seed: Annotated[
       int | None, 'A fixed random seed used during model inference.'
   ] = None
   logprobs: Annotated[
       bool,
       (
@@ -106,6 +300,7 @@ class LMSamplingOptions(component.Component):
           'in the content of message.'
       ),
   ] = False
   top_logprobs: Annotated[
       int | None,
       (
@@ -135,6 +330,11 @@ class LMScoringResult(pg.Object):
       float,
       'The log likelyhood of the requested completion towards the prompt.',
   ]
+  gradients: Annotated[
+      Any | None,
+      '(Optional) gradients from the score method, w.r.t.' +
+      ' prompt.metadata.weights.',
+  ] = None
 class LMCache(pg.Object):
@@ -149,6 +349,7 @@ class LMCache(pg.Object):
     num_hit_expires: int = 0
     num_misses: int = 0
     num_updates: int = 0
+    num_deletes: int = 0
   @abc.abstractmethod
   def get(
@@ -166,6 +367,15 @@ class LMCache(pg.Object):
   ) -> None:
     """Puts the result of a prompt generated by a language model in cache."""
+  @abc.abstractmethod
+  def delete(
+      self,
+      lm: 'LanguageModel',
+      prompt: message_lib.Message,
+      seed: int,
+  ) -> bool:
+    """Deletes the result of a prompt generated by a language model in cache."""
   @property
   @abc.abstractmethod
   def stats(self) -> Stats:
@@ -259,6 +469,15 @@ class LanguageModel(component.Component):
       )
   ] = True
+  max_retry_interval: Annotated[
+      int,
+      (
+          'The max retry interval in seconds. This is useful when the retry '
+          'interval is exponential, to avoid the wait time to grow '
+          'exponentially.'
+      )
+  ] = 300
   debug: Annotated[
       bool | LMDebugMode,
       (
@@ -272,7 +491,10 @@ class LanguageModel(component.Component):
   def __init__(self, *args, **kwargs) -> None:
     """Overrides __init__ to pass through **kwargs to sampling options."""
-    sampling_options = kwargs.pop('sampling_options', LMSamplingOptions())
+    sampling_options = kwargs.pop(
+        'sampling_options',
+        pg.clone(self.__schema__.fields['sampling_options'].default_value)
+    )
     sampling_options_delta = {}
     for k, v in kwargs.items():
@@ -315,9 +537,64 @@ class LanguageModel(component.Component):
     with component.context(override_attrs=True, **kwargs):
       if self.cache is None:
-        return self._sample(prompts)
+        results = self._sample(prompts)
       else:
-        return self._sample_with_cache_lookup(prompts, cache_seed)
+        results = self._sample_with_cache_lookup(prompts, cache_seed)
+      for prompt, result in zip(prompts, results):
+        # Tag LM input.
+        prompt.tag(message_lib.Message.TAG_LM_INPUT)
+        for sample in result.samples:
+          # Update metadata for response message.
+          response = sample.response
+          response.metadata.score = sample.score
+          response.metadata.logprobs = sample.logprobs
+          response.metadata.is_cached = result.is_cached
+          # NOTE(daiyip): Current usage is computed at per-result level,
+          # which is accurate when n=1. For n > 1, we average the usage across
+          # multiple samples.
+          usage = result.usage
+          if len(result.samples) == 1 or isinstance(usage, UsageNotAvailable):
+            response.metadata.usage = usage
+          else:
+            n = len(result.samples)
+            response.metadata.usage = LMSamplingUsage(
+                prompt_tokens=usage.prompt_tokens // n,
+                completion_tokens=usage.completion_tokens // n,
+                total_tokens=usage.total_tokens // n,
+                estimated_cost=(
+                    usage.estimated_cost / n if usage.estimated_cost else None
+                ),
+                retry_stats=RetryStats(
+                    num_occurences=usage.retry_stats.num_occurences // n,
+                    total_wait_interval=usage.retry_stats.total_wait_interval
+                    / n,
+                    total_call_interval=usage.retry_stats.total_call_interval
+                    / n,
+                    errors={
+                        error: count // n
+                        for error, count in usage.retry_stats.errors.items()
+                    },
+                ),
+            )
+          # Track usage.
+          trackers = component.context_value('__usage_trackers__', [])
+          if trackers:
+            model_id = self.model_id
+            for tracker in trackers:
+              tracker.track(model_id, usage, result.is_cached)
+          # Track the prompt for corresponding response.
+          response.source = prompt
+          # Tag LM response.
+          response.tag(message_lib.Message.TAG_LM_RESPONSE)
+      return results
   def _sample_with_cache_lookup(
       self, prompts: list[str | message_lib.Message], cache_seed: int
@@ -339,7 +616,9 @@ class LanguageModel(component.Component):
         request_to_result_index[len(requests)] = i
         requests.append(prompt)
       else:
-        results[i] = r.clone()
+        result = r.clone()
+        assert result.is_cached, result
+        results[i] = result
     # Sample non-cache-hit prompts.
     if requests:
@@ -356,8 +635,12 @@ class LanguageModel(component.Component):
           sample.response.set('cache_seed', cache_seed)
         if cache_seed is not None:
-          self.cache.put(self, prompt, result.clone(), seed=cache_seed)
+          self.cache.put(
+              self,
+              prompt,
+              result.clone(override=dict(is_cached=True)),
+              seed=cache_seed
+          )
     return results  # pytype: disable=bad-return-type
   @abc.abstractmethod
@@ -369,16 +652,16 @@ class LanguageModel(component.Component):
   def _parallel_execute_with_currency_control(
       self,
-      action: Callable[..., Any],
+      action: Callable[..., LMSamplingResult],
       inputs: Sequence[Any],
       retry_on_errors: Union[
           None,
-          Union[Type[Exception], Tuple[Type[Exception], str]],
-          Sequence[Union[Type[Exception], Tuple[Type[Exception], str]]],
-      ] = None,
-  ) -> Any:
+          Union[Type[BaseException], Tuple[Type[BaseException], str]],
+          Sequence[Union[Type[BaseException], Tuple[Type[BaseException], str]]],
+      ] = RetryableLMError,
+  ) -> list[Any]:
     """Helper method for subclasses for implementing _sample."""
-    return concurrent.concurrent_execute(
+    executed_jobs = concurrent.concurrent_execute(
         action,
         inputs,
         executor=self.resource_id if self.max_concurrency else None,
@@ -387,7 +670,16 @@ class LanguageModel(component.Component):
         max_attempts=self.max_attempts,
         retry_interval=self.retry_interval,
         exponential_backoff=self.exponential_backoff,
+        max_retry_interval=self.max_retry_interval,
+        return_jobs=True,
     )
+    for job in executed_jobs:
+      if isinstance(job.result, LMSamplingResult):
+        job.result.usage.rebind(
+            retry_stats=RetryStats.from_retry_entries(job.retry_entries),
+            skip_notification=True,
+        )
+    return [job.result for job in executed_jobs]
   def __call__(
       self, prompt: message_lib.Message, *, cache_seed: int = 0, **kwargs
@@ -405,12 +697,9 @@ class LanguageModel(component.Component):
       result = self.sample(
           [prompt], sampling_options=sampling_options, cache_seed=cache_seed
       )[0]
-      response = result.samples[0].response
-      logprobs = result.samples[0].logprobs
-      response.set('score', result.samples[0].score)
-      response.metadata.logprobs = logprobs
       elapse = time.time() - request_start
-      self._debug(prompt, response, call_counter, elapse)
+      response = result.samples[0].response
+      self._debug(prompt, response, call_counter, result.usage, elapse)
       return response
   def _debug(
@@ -418,35 +707,54 @@ class LanguageModel(component.Component):
       prompt: message_lib.Message,
       response: message_lib.Message,
       call_counter: int,
+      usage: LMSamplingUsage,
       elapse: float,
-  ):
+  ) -> None:
     """Outputs debugging information."""
     debug = self.debug
     if isinstance(debug, bool):
       debug = LMDebugMode.ALL if debug else LMDebugMode.NONE
     if debug & LMDebugMode.INFO:
-      self._debug_model_info(call_counter)
+      self._debug_model_info(call_counter, usage)
     if debug & LMDebugMode.PROMPT:
-      self._debug_prompt(prompt, call_counter)
+      self._debug_prompt(prompt, call_counter, usage)
     if debug & LMDebugMode.RESPONSE:
-      self._debug_response(response, call_counter, elapse)
+      self._debug_response(response, call_counter, usage, elapse)
-  def _debug_model_info(self, call_counter: int):
+  def _debug_model_info(
+      self, call_counter: int, usage: LMSamplingUsage) -> None:
     """Outputs debugging information about the model."""
+    title_suffix = ''
+    if usage.total_tokens != 0:
+      title_suffix = pg.colored(
+          f' (total {usage.total_tokens} tokens)', 'red'
+      )
     console.write(
         self.format(compact=True, use_inferred=True),
-        title=f'[{call_counter}] LM INFO:',
+        title=f'[{call_counter}] LM INFO{title_suffix}:',
         color='magenta',
     )
-  def _debug_prompt(self, prompt: message_lib.Message, call_counter: int):
+  def _debug_prompt(
+      self,
+      prompt: message_lib.Message,
+      call_counter: int,
+      usage: LMSamplingUsage,
+  ) -> None:
     """Outputs debugging information about the prompt."""
+    title_suffix = ''
+    if usage.prompt_tokens != 0:
+      title_suffix = pg.colored(f' ({usage.prompt_tokens} tokens)', 'red')
     console.write(
-        prompt,
-        title=f'\n[{call_counter}] PROMPT SENT TO LM:',
+        # We use metadata 'formatted_text' for scenarios where the prompt text
+        # is formatted by the LM.
+        prompt.get('formatted_text', prompt.text),
+        title=f'\n[{call_counter}] PROMPT SENT TO LM{title_suffix}:',
         color='green',
     )
     referred_modalities = prompt.referred_modalities()
@@ -460,23 +768,40 @@ class LanguageModel(component.Component):
       )
   def _debug_response(
-      self, response: message_lib.Message, call_counter: int, elapse: float
-  ):
+      self,
+      response: message_lib.Message,
+      call_counter: int,
+      usage: LMSamplingUsage,
+      elapse: float
+  ) -> None:
     """Outputs debugging information about the response."""
+    title_suffix = ' ('
+    if usage.completion_tokens != 0:
+      title_suffix += f'{usage.completion_tokens} tokens '
+    title_suffix += f'in {elapse:.2f} seconds)'
+    title_suffix = pg.colored(title_suffix, 'red')
     console.write(
         str(response) + '\n',
-        title=f'\n[{call_counter}] LM RESPONSE (in {elapse:.2f} seconds):',
+        title=f'\n[{call_counter}] LM RESPONSE{title_suffix}:',
         color='blue',
     )
   def score(
       self,
-      prompt: str | message_lib.Message,
+      prompt: str | message_lib.Message | list[message_lib.Message],
       completions: list[str | message_lib.Message],
       **kwargs,
   ) -> list[LMScoringResult]:
     """Scores the given prompt."""
-    prompt = message_lib.UserMessage.from_value(prompt)
+    if isinstance(prompt, list):
+      if len(prompt) != len(completions):
+        raise ValueError(
+            'prompt and completions must have the same length.'
+        )
+      prompt = [message_lib.UserMessage.from_value(p) for p in prompt]
+    else:
+      prompt = message_lib.UserMessage.from_value(prompt)
     completions = [message_lib.UserMessage.from_value(c) for c in completions]
     call_counter = self._call_counter
@@ -492,7 +817,8 @@ class LanguageModel(component.Component):
       return scoring_results
   def _score(
-      self, prompt: message_lib.Message, completions: list[message_lib.Message]
+      self, prompt: message_lib.Message | list[message_lib.Message],
+      completions: list[message_lib.Message]
   ) -> list[LMScoringResult]:
     """Subclass to implement."""
     raise NotImplementedError(
@@ -501,7 +827,7 @@ class LanguageModel(component.Component):
   def _debug_score(
       self,
-      prompt: message_lib.Message,
+      prompt: message_lib.Message | list[message_lib.Message],
       completions: list[message_lib.Message],
       scoring_results: list[LMScoringResult],
       call_counter: int,
@@ -512,7 +838,7 @@ class LanguageModel(component.Component):
       debug = LMDebugMode.ALL if debug else LMDebugMode.NONE
     if debug & LMDebugMode.INFO:
-      self._debug_model_info(call_counter)
+      self._debug_model_info(call_counter, UsageNotAvailable())
     if debug & LMDebugMode.PROMPT:
       console.write(
@@ -520,15 +846,19 @@ class LanguageModel(component.Component):
           title=f'\n[{call_counter}] SCORING LM WITH PROMPT:',
           color='green',
       )
-      referred_modalities = prompt.referred_modalities()
-      if referred_modalities:
-        console.write(
-            pg.object_utils.kvlist_str(
-                [(k, repr(v), None) for k, v in referred_modalities.items()]
-            ),
-            title=f'\n[{call_counter}] MODALITY OBJECTS SENT TO LM:',
-            color='green',
-        )
+      if isinstance(prompt, list):
+        referred_modalities_lst = [p.referred_modalities() for p in prompt]
+      else:
+        referred_modalities_lst = [prompt.referred_modalities(),]
+      if referred_modalities_lst:
+        for referred_modalities in referred_modalities_lst:
+          console.write(
+              pg.object_utils.kvlist_str(
+                  [(k, repr(v), None) for k, v in referred_modalities.items()]
+              ),
+              title=f'\n[{call_counter}] MODALITY OBJECTS SENT TO LM:',
+              color='green',
+          )
     if debug & LMDebugMode.RESPONSE:
       console.write(
@@ -548,3 +878,311 @@ class LanguageModel(component.Component):
             f'score: {r.score}',
             color='blue',
         )
+  def tokenize(
+      self,
+      prompt: str | message_lib.Message,
+      **kwargs,
+  ) -> list[tuple[str | bytes, int]]:
+    """Tokenizes the given prompt."""
+    prompt = message_lib.UserMessage.from_value(prompt)
+    call_counter = self._call_counter
+    self._call_counter += 1
+    with component.context(override_attrs=True, **kwargs):
+      request_start = time.time()
+      tokens = self._tokenize(prompt)
+      elapse = time.time() - request_start
+      self._debug_tokenize(prompt, tokens, call_counter, elapse)
+      return tokens
+  def _tokenize(
+      self, prompt: message_lib.Message
+  ) -> list[tuple[str | bytes, int]]:
+    """Subclass to implement."""
+    raise NotImplementedError(
+        f'{self.__class__.__name__} does not support tokenization.'
+    )
+  def _debug_tokenize(
+      self,
+      prompt: message_lib.Message,
+      tokens: list[tuple[str | bytes, int]],
+      call_counter: int,
+      elapse: float,
+  ):
+    debug = self.debug
+    if isinstance(debug, bool):
+      debug = LMDebugMode.ALL if debug else LMDebugMode.NONE
+    if debug & LMDebugMode.INFO:
+      self._debug_model_info(call_counter, UsageNotAvailable())
+    if debug & LMDebugMode.PROMPT:
+      console.write(
+          prompt,
+          title=f'\n[{call_counter}] PROMPT TO TOKENIZE:',
+          color='green',
+      )
+      referred_modalities_lst = [prompt.referred_modalities(),]
+      if referred_modalities_lst:
+        for referred_modalities in referred_modalities_lst:
+          console.write(
+              pg.object_utils.kvlist_str(
+                  [(k, repr(v), None) for k, v in referred_modalities.items()]
+              ),
+              title=f'\n[{call_counter}] MODALITY OBJECTS SENT TO LM:',
+              color='green',
+          )
+    if debug & LMDebugMode.RESPONSE:
+      console.write(
+          tokens,
+          title=(
+              f'\n[{call_counter}] {len(tokens)} TOKENS RETURNED '
+              f'(in {elapse:.2f} seconds):'
+          ),
+          color='blue',
+      )
+  def rate_to_max_concurrency(
+      self, requests_per_min: float = 0, tokens_per_min: float = 0
+  ) -> int:
+    """Converts a rate to a max concurrency."""
+    if tokens_per_min > 0:
+      return max(int(tokens_per_min / TOKENS_PER_REQUEST / 60), 1)
+    elif requests_per_min > 0:
+      return max(int(requests_per_min / 60), 1)  # Max concurrency can't be zero
+    else:
+      return DEFAULT_MAX_CONCURRENCY  # Default of 1
+class UsageSummary(pg.Object, pg.views.HtmlTreeView.Extension):
+  """Usage sumary."""
+  class AggregatedUsage(pg.Object):
+    """Aggregated usage."""
+    total: LMSamplingUsage = LMSamplingUsage(0, 0, 0, 0, 0.0)
+    breakdown: dict[str, LMSamplingUsage] = {}
+    def __bool__(self) -> bool:
+      """Returns True if the usage is non-empty."""
+      return bool(self.breakdown)
+    def add(
+        self,
+        model_id: str,
+        usage: LMSamplingUsage,
+    ) -> None:
+      """Adds an entry to the breakdown."""
+      aggregated = self.breakdown.get(model_id, None)
+      with pg.notify_on_change(False):
+        self.breakdown[model_id] = usage + aggregated
+        self.rebind(
+            total=self.total + usage,
+            raise_on_no_change=False
+        )
+    def merge(self, other: 'UsageSummary.AggregatedUsage') -> None:
+      """Merges the usage summary."""
+      with pg.notify_on_change(False):
+        for model_id, usage in other.breakdown.items():
+          self.add(model_id, usage)
+  def _on_bound(self):
+    super()._on_bound()
+    self._usage_badge = None
+    self._lock = threading.Lock()
+  @property
+  def total(self) -> LMSamplingUsage:
+    return self.cached.total + self.uncached.total
+  def add(self, model_id: str, usage: LMSamplingUsage, is_cached: bool):
+    """Updates the usage summary."""
+    with self._lock:
+      if is_cached:
+        usage.rebind(estimated_cost=0.0, skip_notification=True)
+        self.cached.add(model_id, usage)
+      else:
+        self.uncached.add(model_id, usage)
+      self._update_view()
+  def merge(self, other: 'UsageSummary', as_cached: bool = False) -> None:
+    """Aggregates the usage summary.
+    Args:
+      other: The usage summary to merge.
+      as_cached: Whether to merge the usage summary as cached.
+    """
+    with self._lock:
+      self.cached.merge(other.cached)
+      if as_cached:
+        self.cached.merge(other.uncached)
+      else:
+        self.uncached.merge(other.uncached)
+      self._update_view()
+  def _sym_nondefault(self) -> dict[str, Any]:
+    """Overrides nondefault values so volatile values are not included."""
+    return dict()
+  #
+  # Html views for the usage summary.
+  #
+  def _update_view(self):
+    if self._usage_badge is not None:
+      self._usage_badge.update(
+          self._badge_text(),
+          tooltip=pg.format(
+              self, verbose=False, custom_format=self._tooltip_format
+          ),
+          styles=dict(color=self._badge_color()),
+      )
+  def _badge_text(self) -> str:
+    if self.total.estimated_cost is not None:
+      return f'{self.total.estimated_cost:.3f}'
+    return '0.000'
+  def _badge_color(self) -> str | None:
+    if self.total.estimated_cost is None or self.total.estimated_cost < 1.0:
+      return None
+    # Step 1: The normal cost range is around 1e-3 to 1e5.
+    # Therefore we normalize the log10 value from [-3, 5] to [0, 1].
+    normalized_value = (math.log10(self.total.estimated_cost) + 3) / (5 + 3)
+    # Step 2: Interpolate between green and red
+    red = int(255 * normalized_value)
+    green = int(255 * (1 - normalized_value))
+    return f'rgb({red}, {green}, 0)'
+  def _tooltip_format(self, v, root_indent):
+    del root_indent
+    if isinstance(v, int):
+      return f'{v:,}'
+    if isinstance(v, float):
+      return f'{v:,.3f}'
+    return None
+  def _html_tree_view(
+      self,
+      *,
+      view: pg.views.HtmlTreeView,
+      extra_flags: dict[str, Any] | None = None,
+      **kwargs
+  ) -> pg.Html:
+    extra_flags = extra_flags or {}
+    as_badge = extra_flags.pop('as_badge', False)
+    interactive = extra_flags.get('interactive', True)
+    if as_badge:
+      usage_badge = self._usage_badge
+      if usage_badge is None:
+        usage_badge = pg.views.html.controls.Badge(
+            self._badge_text(),
+            tooltip=pg.format(
+                self, custom_format=self._tooltip_format, verbose=False
+            ),
+            css_classes=['usage-summary'],
+            styles=dict(color=self._badge_color()),
+            interactive=True,
+        )
+        if interactive:
+          self._usage_badge = usage_badge
+      return usage_badge.to_html()
+    return super()._html_tree_view(
+        view=view,
+        extra_flags=extra_flags,
+        **kwargs
+    )
+  @classmethod
+  @functools.cache
+  def _html_tree_view_css_styles(cls) -> list[str]:
+    return super()._html_tree_view_css_styles() + [
+        """
+        .usage-summary.label {
+            display: inline-flex;
+            border-radius: 5px;
+            padding: 5px;
+            background-color: #f1f1f1;
+            color: #CCC;
+        }
+        .usage-summary.label::before {
+            content: '$';
+        }
+        """
+    ]
+pg.members(
+    dict(
+        cached=(
+            pg.typing.Object(
+                UsageSummary.AggregatedUsage,
+                default=UsageSummary.AggregatedUsage()
+            ),
+            'Aggregated usages for cached LLM calls.'
+        ),
+        uncached=(
+            pg.typing.Object(
+                UsageSummary.AggregatedUsage,
+                default=UsageSummary.AggregatedUsage()
+            ),
+            'Aggregated usages for uncached LLM calls.'
+        ),
+    )
+)(UsageSummary)
+class _UsageTracker:
+  """Usage tracker."""
+  def __init__(self, model_ids: set[str] | None):
+    self.model_ids = model_ids
+    self.usage_summary = UsageSummary()
+  def track(self, model_id: str, usage: LMSamplingUsage, is_cached: bool):
+    if self.model_ids is None or model_id in self.model_ids:
+      self.usage_summary.add(model_id, usage, is_cached)
+@contextlib.contextmanager
+def track_usages(
+    *lm: Union[str, LanguageModel]
+) -> Iterator[UsageSummary]:
+  """Context manager to track the usages of all language models in scope.
+  `lf.track_usages` works with threads spawned by `lf.concurrent_map` and
+  `lf.concurrent_execute`.
+  Example:
+    ```
+    lm = lf.llms.GeminiPro1()
+    with lf.track_usages() as usages:
+      # invoke any code that will call LLMs.
+    print(usages[lm.model_id])
+    ```
+  Args:
+    *lm: The language model(s) to track. If None, track all models in scope.
+  Yields:
+    A dictionary of model ID to usage. If a model does not supports usage
+    counting, the dict entry will be None.
+  """
+  if not lm:
+    model_ids = None
+  else:
+    model_ids = [m.model_id if isinstance(m, LanguageModel) else m for m in lm]
+  trackers = component.context_value('__usage_trackers__', [])
+  tracker = _UsageTracker(set(model_ids) if model_ids else None)
+  with component.context(__usage_trackers__=trackers + [tracker]):
+    try:
+      yield tracker.usage_summary
+    finally:
+      pass

langfun 0.0.2.dev20240330__py3-none-any.whl → 0.1.2.dev202501140804__py3-none-any.whl

langfun 0.0.2.dev20240330py3-none-any.whl → 0.1.2.dev202501140804py3-none-any.whl