PyPI - langfun - Versions diffs - 0.1.2.dev202510230805__py3-none-any.whl → 0.1.2.dev202511270805__py3-none-any.whl - Mend

langfun 0.1.2.dev202510230805py3-none-any.whl → 0.1.2.dev202511270805py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of langfun might be problematic. Click here for more details.

Files changed (155) hide show

langfun/core/__init__.py +2 -0
langfun/core/agentic/__init__.py +4 -1
langfun/core/agentic/action.py +447 -29
langfun/core/agentic/action_eval.py +9 -2
langfun/core/agentic/action_test.py +149 -21
langfun/core/async_support.py +32 -3
langfun/core/coding/python/correction.py +19 -9
langfun/core/coding/python/execution.py +14 -12
langfun/core/coding/python/generation.py +21 -16
langfun/core/coding/python/sandboxing.py +23 -3
langfun/core/component.py +42 -3
langfun/core/concurrent.py +70 -6
langfun/core/concurrent_test.py +1 -0
langfun/core/console.py +1 -1
langfun/core/data/conversion/anthropic.py +12 -3
langfun/core/data/conversion/anthropic_test.py +8 -6
langfun/core/data/conversion/gemini.py +9 -2
langfun/core/data/conversion/gemini_test.py +12 -9
langfun/core/data/conversion/openai.py +145 -31
langfun/core/data/conversion/openai_test.py +161 -17
langfun/core/eval/base.py +47 -43
langfun/core/eval/base_test.py +5 -5
langfun/core/eval/matching.py +5 -2
langfun/core/eval/patching.py +3 -3
langfun/core/eval/scoring.py +4 -3
langfun/core/eval/v2/__init__.py +1 -0
langfun/core/eval/v2/checkpointing.py +64 -6
langfun/core/eval/v2/checkpointing_test.py +9 -2
langfun/core/eval/v2/eval_test_helper.py +103 -2
langfun/core/eval/v2/evaluation.py +91 -16
langfun/core/eval/v2/evaluation_test.py +9 -3
langfun/core/eval/v2/example.py +50 -40
langfun/core/eval/v2/example_test.py +16 -8
langfun/core/eval/v2/experiment.py +74 -8
langfun/core/eval/v2/experiment_test.py +19 -0
langfun/core/eval/v2/metric_values.py +31 -3
langfun/core/eval/v2/metric_values_test.py +32 -0
langfun/core/eval/v2/metrics.py +157 -44
langfun/core/eval/v2/metrics_test.py +39 -18
langfun/core/eval/v2/progress.py +30 -1
langfun/core/eval/v2/progress_test.py +27 -0
langfun/core/eval/v2/progress_tracking.py +12 -3
langfun/core/eval/v2/progress_tracking_test.py +6 -1
langfun/core/eval/v2/reporting.py +90 -71
langfun/core/eval/v2/reporting_test.py +24 -6
langfun/core/eval/v2/runners/__init__.py +30 -0
langfun/core/eval/v2/{runners.py → runners/base.py} +59 -142
langfun/core/eval/v2/runners/beam.py +341 -0
langfun/core/eval/v2/runners/beam_test.py +131 -0
langfun/core/eval/v2/runners/ckpt_monitor.py +294 -0
langfun/core/eval/v2/runners/ckpt_monitor_test.py +162 -0
langfun/core/eval/v2/runners/debug.py +40 -0
langfun/core/eval/v2/runners/debug_test.py +76 -0
langfun/core/eval/v2/runners/parallel.py +100 -0
langfun/core/eval/v2/runners/parallel_test.py +95 -0
langfun/core/eval/v2/runners/sequential.py +47 -0
langfun/core/eval/v2/runners/sequential_test.py +172 -0
langfun/core/langfunc.py +45 -130
langfun/core/langfunc_test.py +7 -5
langfun/core/language_model.py +141 -21
langfun/core/language_model_test.py +54 -3
langfun/core/llms/__init__.py +9 -1
langfun/core/llms/anthropic.py +157 -2
langfun/core/llms/azure_openai.py +29 -17
langfun/core/llms/cache/base.py +25 -3
langfun/core/llms/cache/in_memory.py +48 -7
langfun/core/llms/cache/in_memory_test.py +14 -4
langfun/core/llms/compositional.py +25 -1
langfun/core/llms/deepseek.py +30 -2
langfun/core/llms/fake.py +32 -1
langfun/core/llms/gemini.py +55 -17
langfun/core/llms/gemini_test.py +84 -0
langfun/core/llms/google_genai.py +34 -1
langfun/core/llms/groq.py +28 -3
langfun/core/llms/llama_cpp.py +23 -4
langfun/core/llms/openai.py +36 -3
langfun/core/llms/openai_compatible.py +148 -27
langfun/core/llms/openai_compatible_test.py +207 -20
langfun/core/llms/openai_test.py +0 -2
langfun/core/llms/rest.py +12 -1
langfun/core/llms/vertexai.py +58 -8
langfun/core/logging.py +1 -1
langfun/core/mcp/client.py +77 -22
langfun/core/mcp/client_test.py +8 -35
langfun/core/mcp/session.py +94 -29
langfun/core/mcp/session_test.py +54 -0
langfun/core/mcp/tool.py +151 -22
langfun/core/mcp/tool_test.py +197 -0
langfun/core/memory.py +1 -0
langfun/core/message.py +160 -55
langfun/core/message_test.py +65 -81
langfun/core/modalities/__init__.py +8 -0
langfun/core/modalities/audio.py +21 -1
langfun/core/modalities/image.py +19 -1
langfun/core/modalities/mime.py +64 -3
langfun/core/modalities/mime_test.py +11 -0
langfun/core/modalities/pdf.py +19 -1
langfun/core/modalities/video.py +21 -1
langfun/core/modality.py +167 -29
langfun/core/modality_test.py +42 -12
langfun/core/natural_language.py +1 -1
langfun/core/sampling.py +4 -4
langfun/core/sampling_test.py +20 -4
langfun/core/structured/__init__.py +2 -24
langfun/core/structured/completion.py +34 -44
langfun/core/structured/completion_test.py +23 -43
langfun/core/structured/description.py +54 -50
langfun/core/structured/function_generation.py +29 -12
langfun/core/structured/mapping.py +81 -37
langfun/core/structured/parsing.py +95 -79
langfun/core/structured/parsing_test.py +0 -3
langfun/core/structured/querying.py +215 -142
langfun/core/structured/querying_test.py +65 -29
langfun/core/structured/schema/__init__.py +49 -0
langfun/core/structured/schema/base.py +664 -0
langfun/core/structured/schema/base_test.py +531 -0
langfun/core/structured/schema/json.py +174 -0
langfun/core/structured/schema/json_test.py +121 -0
langfun/core/structured/schema/python.py +316 -0
langfun/core/structured/schema/python_test.py +410 -0
langfun/core/structured/schema_generation.py +33 -14
langfun/core/structured/scoring.py +47 -36
langfun/core/structured/tokenization.py +26 -11
langfun/core/subscription.py +2 -2
langfun/core/template.py +174 -49
langfun/core/template_test.py +123 -17
langfun/env/__init__.py +8 -2
langfun/env/base_environment.py +320 -128
langfun/env/base_environment_test.py +473 -0
langfun/env/base_feature.py +92 -15
langfun/env/base_feature_test.py +228 -0
langfun/env/base_sandbox.py +84 -361
langfun/env/base_sandbox_test.py +1235 -0
langfun/env/event_handlers/__init__.py +1 -1
langfun/env/event_handlers/chain.py +233 -0
langfun/env/event_handlers/chain_test.py +253 -0
langfun/env/event_handlers/event_logger.py +95 -98
langfun/env/event_handlers/event_logger_test.py +21 -21
langfun/env/event_handlers/metric_writer.py +225 -140
langfun/env/event_handlers/metric_writer_test.py +23 -6
langfun/env/interface.py +854 -40
langfun/env/interface_test.py +112 -2
langfun/env/load_balancers_test.py +23 -2
langfun/env/test_utils.py +126 -84
{langfun-0.1.2.dev202510230805.dist-info → langfun-0.1.2.dev202511270805.dist-info}/METADATA +1 -1
langfun-0.1.2.dev202511270805.dist-info/RECORD +215 -0
langfun/core/eval/v2/runners_test.py +0 -343
langfun/core/structured/schema.py +0 -987
langfun/core/structured/schema_test.py +0 -982
langfun/env/base_test.py +0 -1481
langfun/env/event_handlers/base.py +0 -350
langfun-0.1.2.dev202510230805.dist-info/RECORD +0 -195
{langfun-0.1.2.dev202510230805.dist-info → langfun-0.1.2.dev202511270805.dist-info}/WHEEL +0 -0
{langfun-0.1.2.dev202510230805.dist-info → langfun-0.1.2.dev202511270805.dist-info}/licenses/LICENSE +0 -0
{langfun-0.1.2.dev202510230805.dist-info → langfun-0.1.2.dev202511270805.dist-info}/top_level.txt +0 -0

langfun/core/language_model.py CHANGED Viewed

@@ -53,6 +53,10 @@ class RetryableLMError(LMError):
   """Base class for LLM errors that can be solved by retrying."""
+class EmptyGenerationError(RetryableLMError):
+  """Error for empty generaition."""
 class RateLimitError(RetryableLMError):
   """Error for rate limit reached."""
@@ -478,7 +482,7 @@ class UsageNotAvailable(LMSamplingUsage):
 class LMSamplingResult(pg.Object):
-  """Language model response."""
+  """The result from a language model sampling."""
   samples: Annotated[
       list[LMSample],
@@ -575,6 +579,14 @@ class LMSamplingOptions(component.Component):
       int | None, 'Number of max thinking tokens.'
   ] = None
+  thinking_level: Annotated[
+      Literal['low', 'high'] | None,
+      (
+          'Thinking level for Gemini models. High is for complex tasks, '
+          'while low is for faster responses.'
+      ),
+  ] = None
   reasoning_effort: Annotated[
       Literal['low', 'medium', 'high'] | None,
       (
@@ -584,6 +596,15 @@ class LMSamplingOptions(component.Component):
       ),
   ] = None
+  extras: Annotated[
+      dict[str, Any],
+      (
+          'Extra arguments (e.g. configuration for tool calls) to pass to '
+          'the model. This is model-specific, please check model '
+          'implementation to see how to use this.'
+      ),
+  ] = {}
   def cache_key(self) -> tuple[Any, ...]:
     """Returns a tuple of current values as cache key."""
     return (
@@ -672,13 +693,91 @@ class LMDebugMode(enum.IntFlag):
 class LanguageModel(component.Component):
-  """Interface of a language model.
-  Language models are at the center of LLM-based agents. ``LanguageModel``
-  is the interface to interact with different language modles.
-  In langfun, users can use different language models with the same agents,
-  allowing fast prototype, as well as side-by-side comparisons.
+  """Interface for language model.
+  `lf.LanguageModel` is the cornerstone of Langfun, providing a consistent
+  interface for interacting with various language models, such as those from
+  Google, OpenAI, Anthropic, and more. It abstracts away provider-specific
+  details, allowing users to switch between models seamlessly.
+  All language models in Langfun can be accessed via `lf.llms`. For example,
+  `lf.llms.Gpt4()` creates an instance for OpenAI's GPT-4, and
+  `lf.llms.GeminiPro()` creates an instance for Google's Gemini Pro.
+  **Key Features:**
+  *   **Unified API**: Provides `sample`, `score`, and `tokenize` methods
+      across all supported models.
+  *   **Sampling**: The `__call__` method and `sample` method allow generating
+      text completions or chat responses.
+  *   **Scoring**: The `score` method computes the likelihood of completions
+      given a prompt.
+  *   **Tokenization**: The `tokenize` method breaks text into tokens
+      according to the model's tokenizer.
+  *   **Caching**: Built-in support for caching LLM requests to save cost and
+      time via the `cache` attribute.
+  *   **Concurrency**: Manages concurrency to respect API rate limits via
+      `max_concurrency`.
+  *   **Retries**: Automatic retries with exponential backoff for transient
+      errors via `max_attempts` and `retry_interval`.
+  **1. Creating a Language Model:**
+  You can create a language model by instantiating its class or by using
+  `lf.LanguageModel.get`:
+  ```python
+  # Direct instantiation
+  gpt4 = lf.llms.Gpt4()
+  gemini = lf.llms.GeminiPro()
+  # Creation via lf.LanguageModel.get()
+  gpt4 = lf.LanguageModel.get('gpt-4')
+  ```
+  **2. Customizing Sampling Options:**
+  Sampling options like `temperature`, `max_tokens`, etc., can be customized
+  at model creation, or overridden at call time or via `lf.context`.
+  ```python
+  # Set temperature to 0 at model creation
+  lm = lf.llms.Gpt4(temperature=0.0)
+  # Override temperature to 0.5 for a single call
+  response = lm('1 + 1 =', temperature=0.5)
+  # Override temperature to 1.0 using lf.context
+  with lf.context(temperature=1.0):
+    response = lm('1 + 1 =')
+  ```
+  **3. Sampling:**
+  Use `lm()`, `lm.sample()`, or `lf.query()` to generate text:
+  ```python
+  lm = lf.llms.Gpt4()
+  response = lm('1 + 1 =')
+  print(response.text)
+  # Output: 2
+  ```
+  **4. Scoring:**
+  Use `lm.score()` to score completions:
+  ```python
+  lm = lf.llms.Gpt4()
+  results = lm.score('Weather in SF is', completions=['sunny', 'cloudy'])
+  print(results[0].score)
+  # Output: -1.0
+  ```
+  **5. Tokenization:**
+  Use `lm.tokenize()` to get tokens:
+  ```python
+  lm = lf.llms.Gpt4()
+  tokens = lm.tokenize('hello world')
+  print(tokens)
+  # Output: [('hello', 15339), (' world', 1917)]
+  ```
   """
   sampling_options: LMSamplingOptions = LMSamplingOptions()
@@ -989,10 +1088,32 @@ class LanguageModel(component.Component):
     prompts = [message_lib.UserMessage.from_value(p) for p in prompts]
     with component.context(override_attrs=True, **kwargs):
-      if self.cache is None:
-        results = self._sample(prompts)
-      else:
-        results = self._sample_with_cache_lookup(prompts, cache_seed)
+      def _sample_with_retry():
+        if self.cache is None:
+          results = self._sample(prompts)
+        else:
+          results = self._sample_with_cache_lookup(prompts, cache_seed)
+        for i, result in enumerate(results):
+          for sample in result.samples:
+            if not sample.response.text:
+              if self.cache is not None:
+                self.cache.delete(self, prompts[i], seed=cache_seed)
+              raise EmptyGenerationError(
+                  f'Empty generation encountered from model {self.model_id}.'
+              )
+        return results
+      retry_fn = concurrent.with_retry(
+          _sample_with_retry,
+          retry_on_errors=EmptyGenerationError,
+          max_attempts=self.max_attempts,
+          retry_interval=self.retry_interval,
+          exponential_backoff=self.exponential_backoff,
+          max_retry_interval=self.max_retry_interval,
+      )
+      results = retry_fn()
       for prompt, result in zip(prompts, results):
@@ -1001,7 +1122,6 @@ class LanguageModel(component.Component):
         for sample in result.samples:
           # Update metadata for response message.
           response = sample.response
           response.metadata.score = sample.score
           response.metadata.logprobs = sample.logprobs
@@ -1244,11 +1364,11 @@ class LanguageModel(component.Component):
         title=f'\n[{call_counter}] PROMPT SENT TO LM{title_suffix}:',
         color='green',
     )
-    referred_modalities = prompt.referred_modalities()
-    if referred_modalities:
+    if prompt.referred_modalities:
       console.write(
           pg.object_utils.kvlist_str(
-              [(k, repr(v), None) for k, v in referred_modalities.items()]
+              [(k, repr(v), None)
+               for k, v in prompt.referred_modalities.items()]
           ),
           title=f'\n[{call_counter}] MODALITY OBJECTS SENT TO LM:',
           color='green',
@@ -1334,9 +1454,9 @@ class LanguageModel(component.Component):
           color='green',
       )
       if isinstance(prompt, list):
-        referred_modalities_lst = [p.referred_modalities() for p in prompt]
+        referred_modalities_lst = [p.referred_modalities for p in prompt]
       else:
-        referred_modalities_lst = [prompt.referred_modalities(),]
+        referred_modalities_lst = [prompt.referred_modalities,]
       if referred_modalities_lst:
         for referred_modalities in referred_modalities_lst:
           console.write(
@@ -1411,7 +1531,7 @@ class LanguageModel(component.Component):
           title=f'\n[{call_counter}] PROMPT TO TOKENIZE:',
           color='green',
       )
-      referred_modalities_lst = [prompt.referred_modalities(),]
+      referred_modalities_lst = [prompt.referred_modalities,]
       if referred_modalities_lst:
         for referred_modalities in referred_modalities_lst:
           console.write(
@@ -1439,7 +1559,7 @@ class LanguageModel(component.Component):
       max_requests_per_minute: int | None,
       average_tokens_per_request: int = 250
   ) -> int | None:
-    """Estimates max concurrency concurrency based on the rate limits."""
+    """Estimates max concurrency based on the rate limits."""
     # NOTE(daiyip): max concurrency is estimated based on the rate limit.
     # We assume each request has approximately 250 tokens, and each request
     # takes 1 second to complete. This might not be accurate for all models.
@@ -1512,7 +1632,7 @@ class _ConcurrencyControl:
 class UsageSummary(pg.Object, pg.views.HtmlTreeView.Extension):
-  """Usage sumary."""
+  """Usage summary."""
   class AggregatedUsage(pg.Object):
     """Aggregated usage."""

langfun/core/language_model_test.py CHANGED Viewed

@@ -591,6 +591,51 @@ class LanguageModelTest(unittest.TestCase):
     lm = MockModel(cache=cache, top_k=1)
     self.assertEqual(lm('a'), 'a')
+  def test_empty_generation_error(self):
+    class MockModelWithEmptyResponse(MockModel):
+      def _sample(self,
+                  prompts: list[message_lib.Message]
+                  ) -> list[lm_lib.LMSamplingResult]:
+        return [lm_lib.LMSamplingResult(
+            [lm_lib.LMSample(response='')],
+            usage=lm_lib.LMSamplingUsage(100, 0, 100, 1, 1.0)
+        )]
+    lm = MockModelWithEmptyResponse(max_attempts=1, retry_interval=0)
+    with self.assertRaisesRegex(
+        concurrent.RetryError, 'Empty generation encountered'
+    ):
+      lm('a')
+  def test_empty_generation_retry(self):
+    class MockModelWithEmptyThenValid(MockModel):
+      attempt_count: int = 0
+      def _sample(
+          self, prompts: list[message_lib.Message]
+      ) -> list[lm_lib.LMSamplingResult]:
+        self.rebind(attempt_count=self.attempt_count + 1)
+        if self.attempt_count == 1:
+          # First attempt returns empty
+          return [
+              lm_lib.LMSamplingResult(
+                  [lm_lib.LMSample(response='')],
+                  usage=lm_lib.LMSamplingUsage(100, 0, 100, 1, 1.0),
+              )
+          ]
+        else:
+          # Subsequent attempts return valid response
+          return [
+              lm_lib.LMSamplingResult(
+                  [lm_lib.LMSample(response='valid response')],
+                  usage=lm_lib.LMSamplingUsage(100, 100, 200, 1, 1.0),
+              )
+          ]
+    lm = MockModelWithEmptyThenValid(max_attempts=3, retry_interval=0)
+    result = lm('a')
+    self.assertEqual(result.text, 'valid response')
+    self.assertEqual(lm.attempt_count, 2)
   def test_estimate_max_concurrency(self):
     self.assertIsNone(lm_lib.LanguageModel.estimate_max_concurrency(None, None))
     self.assertEqual(
@@ -656,11 +701,17 @@ class LanguageModelTest(unittest.TestCase):
     string_io = io.StringIO()
     lm = MockModel(sampling_options=lm_lib.LMSamplingOptions(top_k=1))
+    image = Image()
     with contextlib.redirect_stdout(string_io):
       self.assertEqual(
-          lm(message_lib.UserMessage(
-              'hi <<[[image]]>>', image=Image()), debug=True),
-          'hi <<[[image]]>>'
+          lm(
+              message_lib.UserMessage(
+                  f'hi <<[[{image.id}]]>>',
+                  referred_modalities=[image],
+              ),
+              debug=True
+          ),
+          f'hi <<[[{image.id}]]>>'
       )
     debug_info = string_io.getvalue()

langfun/core/llms/__init__.py CHANGED Viewed

@@ -30,7 +30,8 @@ from langfun.core.llms.compositional import RandomChoice
 # Base models by request/response protocol.
 from langfun.core.llms.rest import REST
-from langfun.core.llms.openai_compatible import OpenAICompatible
+from langfun.core.llms.openai_compatible import OpenAIChatCompletionAPI
+from langfun.core.llms.openai_compatible import OpenAIResponsesAPI
 from langfun.core.llms.gemini import Gemini
 from langfun.core.llms.anthropic import Anthropic
@@ -41,6 +42,7 @@ from langfun.core.llms.azure_openai import AzureOpenAI
 # Gemini models.
 from langfun.core.llms.google_genai import GenAI
+from langfun.core.llms.google_genai import Gemini3ProPreview
 from langfun.core.llms.google_genai import Gemini25Pro
 from langfun.core.llms.google_genai import Gemini25Flash
 from langfun.core.llms.google_genai import Gemini25ProPreview_20250605
@@ -89,6 +91,7 @@ from langfun.core.llms.vertexai import VertexAIGemini25ProPreview_20250605
 from langfun.core.llms.vertexai import VertexAIGemini25Pro
 from langfun.core.llms.vertexai import VertexAIGemini25Flash
 from langfun.core.llms.vertexai import VertexAIGemini25FlashImagePreview
+from langfun.core.llms.vertexai import VertexAIGemini3ProPreview
 # For backward compatibility.
 GeminiPro1_5 = Gemini15Pro
@@ -151,6 +154,9 @@ from langfun.core.llms.openai import Gpt35
 # Anthropic models.
+from langfun.core.llms.anthropic import Claude45
+from langfun.core.llms.anthropic import Claude45Haiku_20251001
+from langfun.core.llms.anthropic import Claude45Sonnet_20250929
 from langfun.core.llms.anthropic import Claude4
 from langfun.core.llms.anthropic import Claude4Sonnet_20250514
 from langfun.core.llms.anthropic import Claude4Opus_20250514
@@ -168,6 +174,8 @@ from langfun.core.llms.anthropic import Claude3Haiku
 from langfun.core.llms.anthropic import Claude3Haiku_20240307
 from langfun.core.llms.vertexai import VertexAIAnthropic
+from langfun.core.llms.vertexai import VertexAIClaude45Haiku_20251001
+from langfun.core.llms.vertexai import VertexAIClaude45Sonnet_20250929
 from langfun.core.llms.vertexai import VertexAIClaude4Opus_20250514
 from langfun.core.llms.vertexai import VertexAIClaude4Sonnet_20250514
 from langfun.core.llms.vertexai import VertexAIClaude37Sonnet_20250219

langfun/core/llms/anthropic.py CHANGED Viewed

@@ -59,6 +59,60 @@ class AnthropicModelInfo(lf.ModelInfo):
 SUPPORTED_MODELS = [
+    AnthropicModelInfo(
+        model_id='claude-haiku-4-5-20251001',
+        provider='Anthropic',
+        in_service=True,
+        description='Claude 4.5 Haiku model (10/15/2025).',
+        release_date=datetime.datetime(2025, 10, 15),
+        input_modalities=(
+            AnthropicModelInfo.INPUT_IMAGE_TYPES
+            + AnthropicModelInfo.INPUT_DOC_TYPES
+        ),
+        context_length=lf.ModelInfo.ContextLength(
+            max_input_tokens=200_000,
+            max_output_tokens=64_000,
+        ),
+        pricing=lf.ModelInfo.Pricing(
+            cost_per_1m_cached_input_tokens=0.1,
+            cost_per_1m_input_tokens=1,
+            cost_per_1m_output_tokens=5,
+        ),
+        rate_limits=AnthropicModelInfo.RateLimits(
+            # Tier 4 rate limits
+            max_requests_per_minute=4000,
+            max_input_tokens_per_minute=4_000_000,
+            max_output_tokens_per_minute=800_000,
+        ),
+    ),
+    AnthropicModelInfo(
+        model_id='claude-sonnet-4-5-20250929',
+        provider='Anthropic',
+        in_service=True,
+        description='Claude 4.5 Sonnet model (9/29/2025).',
+        release_date=datetime.datetime(2025, 9, 29),
+        input_modalities=(
+            AnthropicModelInfo.INPUT_IMAGE_TYPES
+            + AnthropicModelInfo.INPUT_DOC_TYPES
+        ),
+        context_length=lf.ModelInfo.ContextLength(
+            max_input_tokens=200_000,
+            max_output_tokens=64_000,
+        ),
+        pricing=lf.ModelInfo.Pricing(
+            cost_per_1m_cached_input_tokens=0.3,
+            cost_per_1m_input_tokens=3,
+            cost_per_1m_output_tokens=15,
+        ),
+        rate_limits=AnthropicModelInfo.RateLimits(
+            # Tier 4 rate limits
+            # This rate limit is a total limit that applies to combined traffic
+            # across both Sonnet 4 and Sonnet 4.5.
+            max_requests_per_minute=4000,
+            max_input_tokens_per_minute=2_000_000,
+            max_output_tokens_per_minute=400_000,
+        ),
+    ),
     AnthropicModelInfo(
         model_id='claude-4-opus-20250514',
         provider='Anthropic',
@@ -190,6 +244,62 @@ SUPPORTED_MODELS = [
             max_output_tokens_per_minute=80_000,
         ),
     ),
+    AnthropicModelInfo(
+        model_id='claude-haiku-4-5@20251001',
+        alias_for='claude-haiku-4-5-20251001',
+        provider='VertexAI',
+        in_service=True,
+        description='Claude 4.5 Haiku model served on VertexAI (10/15/2025).',
+        release_date=datetime.datetime(2025, 10, 15),
+        input_modalities=(
+            AnthropicModelInfo.INPUT_IMAGE_TYPES
+            + AnthropicModelInfo.INPUT_DOC_TYPES
+        ),
+        context_length=lf.ModelInfo.ContextLength(
+            max_input_tokens=200_000,
+            max_output_tokens=64_000,
+        ),
+        pricing=lf.ModelInfo.Pricing(
+            # For global endpoint
+            cost_per_1m_cached_input_tokens=0.1,
+            cost_per_1m_input_tokens=1,
+            cost_per_1m_output_tokens=5,
+        ),
+        rate_limits=AnthropicModelInfo.RateLimits(
+            # For global endpoint
+            max_requests_per_minute=2500,
+            max_input_tokens_per_minute=200_000,
+            max_output_tokens_per_minute=0,
+        ),
+    ),
+    AnthropicModelInfo(
+        model_id='claude-sonnet-4-5@20250929',
+        alias_for='claude-sonnet-4-5-20250929',
+        provider='VertexAI',
+        in_service=True,
+        description='Claude 4.5 Sonnet model (9/29/2025).',
+        release_date=datetime.datetime(2025, 9, 29),
+        input_modalities=(
+            AnthropicModelInfo.INPUT_IMAGE_TYPES
+            + AnthropicModelInfo.INPUT_DOC_TYPES
+        ),
+        context_length=lf.ModelInfo.ContextLength(
+            max_input_tokens=200_000,
+            max_output_tokens=64_000,
+        ),
+        pricing=lf.ModelInfo.Pricing(
+            # For global endpoint
+            cost_per_1m_cached_input_tokens=0.3,
+            cost_per_1m_input_tokens=3,
+            cost_per_1m_output_tokens=15,
+        ),
+        rate_limits=AnthropicModelInfo.RateLimits(
+            # For global endpoint
+            max_requests_per_minute=1500,
+            max_input_tokens_per_minute=200_000,
+            max_output_tokens_per_minute=0,
+        ),
+    ),
     AnthropicModelInfo(
         model_id='claude-opus-4@20250514',
         alias_for='claude-opus-4-20250514',
@@ -540,9 +650,34 @@ _SUPPORTED_MODELS_BY_MODEL_ID = {m.model_id: m for m in SUPPORTED_MODELS}
 @lf.use_init_args(['model'])
 class Anthropic(rest.REST):
-  """Anthropic LLMs (Claude) through REST APIs.
+  """Anthropic Claude models.
+  **Quick Start:**
+  ```python
+  import langfun as lf
+  # Call Claude 3.5 Sonnet using API key from environment variable
+  # 'ANTHROPIC_API_KEY'.
+  lm = lf.llms.Claude35Sonnet()
+  r = lm('Who are you?')
+  print(r)
+  ```
+  **Setting up API key:**
+  The Anthropic API key can be specified in following ways:
+  1. At model instantiation:
+     ```python
+     lm = lf.llms.Claude35Sonnet(api_key='MY_API_KEY')
+  2. via environment variable `ANTHROPIC_API_KEY`.
-  See https://docs.anthropic.com/claude/reference/messages_post
+  **References:**
+  *   https://docs.anthropic.com/claude/reference/messages_post
   """
   model: pg.typing.Annotated[
@@ -658,6 +793,8 @@ class Anthropic(rest.REST):
       args.pop('temperature', None)
       args.pop('top_k', None)
       args.pop('top_p', None)
+    if options.extras:
+      args.update(options.extras)
     return args
   def result(self, json: dict[str, Any]) -> lf.LMSamplingResult:
@@ -679,6 +816,24 @@ class Anthropic(rest.REST):
     return super()._error(status_code, content)
+class Claude45(Anthropic):
+  """Base class for Claude 4.5 models."""
+# pylint: disable=invalid-name
+class Claude45Haiku_20251001(Claude45):
+  """Claude 4.5 Haiku model 20251001."""
+  model = 'claude-haiku-4-5-20251001'
+# pylint: disable=invalid-name
+class Claude45Sonnet_20250929(Claude45):
+  """Claude 4.5 Sonnet model 20250929."""
+  model = 'claude-sonnet-4-5-20250929'
 class Claude4(Anthropic):
   """Base class for Claude 4 models."""

langfun/core/llms/azure_openai.py CHANGED Viewed

@@ -23,23 +23,35 @@ import pyglove as pg
 @lf.use_init_args(['model', 'deployment_name'])
 @pg.members([('api_endpoint', pg.typing.Str().freeze(''))])
 class AzureOpenAI(openai.OpenAI):
-  """Azure OpenAI model service.
-  This service interacts with the Azure OpenAI API to generate chat completions.
-  It uses the deployment_name and API version to construct the endpoint, and
-  authenticates using an API key provided via parameter or the
-  AZURE_OPENAI_API_KEY environment variable.
-  Example:
-      lm = AzureOpenAI(
-          model='gpt-4o',
-          deployment_name='gpt-4o',
-          api_version='2024-08-01-preview',
-          azure_endpoint='https://trackname.openai.azure.com/',
-          api_key='token'
-      )
-      response = lf.query(prompt="what the capital of France", lm=lm)
-      print(response)
+  """Azure OpenAI models.
+  **Quick Start:**
+  ```python
+  import langfun as lf
+  # Call GPT-4o on Azure using API key from environment variable
+  # 'AZURE_OPENAI_API_KEY'.
+  lm = lf.llms.AzureOpenAI(
+      model='gpt-4o',
+      deployment_name='my-gpt4o-deployment',
+      api_version='2024-08-01-preview',
+      azure_endpoint='https://my-resource.openai.azure.com/',
+  )
+  r = lm('Who are you?')
+  print(r)
+  ```
+  **Setting up API key:**
+  The Azure OpenAI API key can be specified in following ways:
+  1. At model instantiation:
+     ```python
+     lm = lf.llms.AzureOpenAI(..., api_key='MY_API_KEY')
+     ```
+  2. via environment variable `AZURE_OPENAI_API_KEY`.
   """
   deployment_name: Annotated[

langfun/core/llms/cache/base.py CHANGED Viewed

@@ -22,13 +22,33 @@ import langfun.core as lf
 @dataclasses.dataclass(frozen=True)
 class LMCacheEntry:
-  """LM cache entry."""
+  """Represents a single entry in the language model cache.
+  An `LMCacheEntry` stores the result of a language model sampling operation
+  and an optional expiration timestamp.
+  """
   result: lf.LMSamplingResult
   expire: datetime.datetime | None = None
 class LMCacheBase(lf.LMCache):
-  """The common LMCache base."""
+  """Base class for language model cache implementations.
+  `LMCacheBase` provides the core logic for a key-value based cache,
+  handling key generation, expiration (TTL), and statistics tracking.
+  Subclasses must implement the abstract methods `_get`, `_put`, and `_delete`
+  to provide the specific storage mechanism (e.g., in-memory, file-based).
+  **Key Features:**
+  *   **Customizable Keying**: Allows specifying a custom function to generate
+      cache keys based on the language model, prompt, and seed. If not provided,
+      a default key based on prompt text, sampling options, and seed is used.
+  *   **Time-to-Live (TTL)**: Supports setting an expiration time for cache
+      entries, after which they are considered invalid and removed upon access.
+  *   **Cache Statistics**: Tracks metrics like hits, misses, updates,
+      deletions, and expired hits through the `stats` property.
+  """
   key: Annotated[
       Callable[[lf.LanguageModel, lf.Message, int], Any] | None,
@@ -121,4 +141,6 @@ class LMCacheBase(lf.LMCache):
 def default_key(lm: lf.LanguageModel, prompt: lf.Message, seed: int) -> Any:
   """Default key for LM cache."""
-  return (prompt.text_with_modality_hash, lm.sampling_options.cache_key(), seed)
+  # prompt text already contains the modality id for referenced modality
+  # objects, so no need to include them in the key.
+  return (prompt.text, lm.sampling_options.cache_key(), seed)

langfun 0.1.2.dev202510230805__py3-none-any.whl → 0.1.2.dev202511270805__py3-none-any.whl

Potentially problematic release.

langfun 0.1.2.dev202510230805py3-none-any.whl → 0.1.2.dev202511270805py3-none-any.whl