langfun 0.1.2.dev202510230805__py3-none-any.whl → 0.1.2.dev202511270805__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of langfun might be problematic. Click here for more details.

Files changed (155) hide show
  1. langfun/core/__init__.py +2 -0
  2. langfun/core/agentic/__init__.py +4 -1
  3. langfun/core/agentic/action.py +447 -29
  4. langfun/core/agentic/action_eval.py +9 -2
  5. langfun/core/agentic/action_test.py +149 -21
  6. langfun/core/async_support.py +32 -3
  7. langfun/core/coding/python/correction.py +19 -9
  8. langfun/core/coding/python/execution.py +14 -12
  9. langfun/core/coding/python/generation.py +21 -16
  10. langfun/core/coding/python/sandboxing.py +23 -3
  11. langfun/core/component.py +42 -3
  12. langfun/core/concurrent.py +70 -6
  13. langfun/core/concurrent_test.py +1 -0
  14. langfun/core/console.py +1 -1
  15. langfun/core/data/conversion/anthropic.py +12 -3
  16. langfun/core/data/conversion/anthropic_test.py +8 -6
  17. langfun/core/data/conversion/gemini.py +9 -2
  18. langfun/core/data/conversion/gemini_test.py +12 -9
  19. langfun/core/data/conversion/openai.py +145 -31
  20. langfun/core/data/conversion/openai_test.py +161 -17
  21. langfun/core/eval/base.py +47 -43
  22. langfun/core/eval/base_test.py +5 -5
  23. langfun/core/eval/matching.py +5 -2
  24. langfun/core/eval/patching.py +3 -3
  25. langfun/core/eval/scoring.py +4 -3
  26. langfun/core/eval/v2/__init__.py +1 -0
  27. langfun/core/eval/v2/checkpointing.py +64 -6
  28. langfun/core/eval/v2/checkpointing_test.py +9 -2
  29. langfun/core/eval/v2/eval_test_helper.py +103 -2
  30. langfun/core/eval/v2/evaluation.py +91 -16
  31. langfun/core/eval/v2/evaluation_test.py +9 -3
  32. langfun/core/eval/v2/example.py +50 -40
  33. langfun/core/eval/v2/example_test.py +16 -8
  34. langfun/core/eval/v2/experiment.py +74 -8
  35. langfun/core/eval/v2/experiment_test.py +19 -0
  36. langfun/core/eval/v2/metric_values.py +31 -3
  37. langfun/core/eval/v2/metric_values_test.py +32 -0
  38. langfun/core/eval/v2/metrics.py +157 -44
  39. langfun/core/eval/v2/metrics_test.py +39 -18
  40. langfun/core/eval/v2/progress.py +30 -1
  41. langfun/core/eval/v2/progress_test.py +27 -0
  42. langfun/core/eval/v2/progress_tracking.py +12 -3
  43. langfun/core/eval/v2/progress_tracking_test.py +6 -1
  44. langfun/core/eval/v2/reporting.py +90 -71
  45. langfun/core/eval/v2/reporting_test.py +24 -6
  46. langfun/core/eval/v2/runners/__init__.py +30 -0
  47. langfun/core/eval/v2/{runners.py → runners/base.py} +59 -142
  48. langfun/core/eval/v2/runners/beam.py +341 -0
  49. langfun/core/eval/v2/runners/beam_test.py +131 -0
  50. langfun/core/eval/v2/runners/ckpt_monitor.py +294 -0
  51. langfun/core/eval/v2/runners/ckpt_monitor_test.py +162 -0
  52. langfun/core/eval/v2/runners/debug.py +40 -0
  53. langfun/core/eval/v2/runners/debug_test.py +76 -0
  54. langfun/core/eval/v2/runners/parallel.py +100 -0
  55. langfun/core/eval/v2/runners/parallel_test.py +95 -0
  56. langfun/core/eval/v2/runners/sequential.py +47 -0
  57. langfun/core/eval/v2/runners/sequential_test.py +172 -0
  58. langfun/core/langfunc.py +45 -130
  59. langfun/core/langfunc_test.py +7 -5
  60. langfun/core/language_model.py +141 -21
  61. langfun/core/language_model_test.py +54 -3
  62. langfun/core/llms/__init__.py +9 -1
  63. langfun/core/llms/anthropic.py +157 -2
  64. langfun/core/llms/azure_openai.py +29 -17
  65. langfun/core/llms/cache/base.py +25 -3
  66. langfun/core/llms/cache/in_memory.py +48 -7
  67. langfun/core/llms/cache/in_memory_test.py +14 -4
  68. langfun/core/llms/compositional.py +25 -1
  69. langfun/core/llms/deepseek.py +30 -2
  70. langfun/core/llms/fake.py +32 -1
  71. langfun/core/llms/gemini.py +55 -17
  72. langfun/core/llms/gemini_test.py +84 -0
  73. langfun/core/llms/google_genai.py +34 -1
  74. langfun/core/llms/groq.py +28 -3
  75. langfun/core/llms/llama_cpp.py +23 -4
  76. langfun/core/llms/openai.py +36 -3
  77. langfun/core/llms/openai_compatible.py +148 -27
  78. langfun/core/llms/openai_compatible_test.py +207 -20
  79. langfun/core/llms/openai_test.py +0 -2
  80. langfun/core/llms/rest.py +12 -1
  81. langfun/core/llms/vertexai.py +58 -8
  82. langfun/core/logging.py +1 -1
  83. langfun/core/mcp/client.py +77 -22
  84. langfun/core/mcp/client_test.py +8 -35
  85. langfun/core/mcp/session.py +94 -29
  86. langfun/core/mcp/session_test.py +54 -0
  87. langfun/core/mcp/tool.py +151 -22
  88. langfun/core/mcp/tool_test.py +197 -0
  89. langfun/core/memory.py +1 -0
  90. langfun/core/message.py +160 -55
  91. langfun/core/message_test.py +65 -81
  92. langfun/core/modalities/__init__.py +8 -0
  93. langfun/core/modalities/audio.py +21 -1
  94. langfun/core/modalities/image.py +19 -1
  95. langfun/core/modalities/mime.py +64 -3
  96. langfun/core/modalities/mime_test.py +11 -0
  97. langfun/core/modalities/pdf.py +19 -1
  98. langfun/core/modalities/video.py +21 -1
  99. langfun/core/modality.py +167 -29
  100. langfun/core/modality_test.py +42 -12
  101. langfun/core/natural_language.py +1 -1
  102. langfun/core/sampling.py +4 -4
  103. langfun/core/sampling_test.py +20 -4
  104. langfun/core/structured/__init__.py +2 -24
  105. langfun/core/structured/completion.py +34 -44
  106. langfun/core/structured/completion_test.py +23 -43
  107. langfun/core/structured/description.py +54 -50
  108. langfun/core/structured/function_generation.py +29 -12
  109. langfun/core/structured/mapping.py +81 -37
  110. langfun/core/structured/parsing.py +95 -79
  111. langfun/core/structured/parsing_test.py +0 -3
  112. langfun/core/structured/querying.py +215 -142
  113. langfun/core/structured/querying_test.py +65 -29
  114. langfun/core/structured/schema/__init__.py +49 -0
  115. langfun/core/structured/schema/base.py +664 -0
  116. langfun/core/structured/schema/base_test.py +531 -0
  117. langfun/core/structured/schema/json.py +174 -0
  118. langfun/core/structured/schema/json_test.py +121 -0
  119. langfun/core/structured/schema/python.py +316 -0
  120. langfun/core/structured/schema/python_test.py +410 -0
  121. langfun/core/structured/schema_generation.py +33 -14
  122. langfun/core/structured/scoring.py +47 -36
  123. langfun/core/structured/tokenization.py +26 -11
  124. langfun/core/subscription.py +2 -2
  125. langfun/core/template.py +174 -49
  126. langfun/core/template_test.py +123 -17
  127. langfun/env/__init__.py +8 -2
  128. langfun/env/base_environment.py +320 -128
  129. langfun/env/base_environment_test.py +473 -0
  130. langfun/env/base_feature.py +92 -15
  131. langfun/env/base_feature_test.py +228 -0
  132. langfun/env/base_sandbox.py +84 -361
  133. langfun/env/base_sandbox_test.py +1235 -0
  134. langfun/env/event_handlers/__init__.py +1 -1
  135. langfun/env/event_handlers/chain.py +233 -0
  136. langfun/env/event_handlers/chain_test.py +253 -0
  137. langfun/env/event_handlers/event_logger.py +95 -98
  138. langfun/env/event_handlers/event_logger_test.py +21 -21
  139. langfun/env/event_handlers/metric_writer.py +225 -140
  140. langfun/env/event_handlers/metric_writer_test.py +23 -6
  141. langfun/env/interface.py +854 -40
  142. langfun/env/interface_test.py +112 -2
  143. langfun/env/load_balancers_test.py +23 -2
  144. langfun/env/test_utils.py +126 -84
  145. {langfun-0.1.2.dev202510230805.dist-info → langfun-0.1.2.dev202511270805.dist-info}/METADATA +1 -1
  146. langfun-0.1.2.dev202511270805.dist-info/RECORD +215 -0
  147. langfun/core/eval/v2/runners_test.py +0 -343
  148. langfun/core/structured/schema.py +0 -987
  149. langfun/core/structured/schema_test.py +0 -982
  150. langfun/env/base_test.py +0 -1481
  151. langfun/env/event_handlers/base.py +0 -350
  152. langfun-0.1.2.dev202510230805.dist-info/RECORD +0 -195
  153. {langfun-0.1.2.dev202510230805.dist-info → langfun-0.1.2.dev202511270805.dist-info}/WHEEL +0 -0
  154. {langfun-0.1.2.dev202510230805.dist-info → langfun-0.1.2.dev202511270805.dist-info}/licenses/LICENSE +0 -0
  155. {langfun-0.1.2.dev202510230805.dist-info → langfun-0.1.2.dev202511270805.dist-info}/top_level.txt +0 -0
@@ -53,6 +53,10 @@ class RetryableLMError(LMError):
53
53
  """Base class for LLM errors that can be solved by retrying."""
54
54
 
55
55
 
56
+ class EmptyGenerationError(RetryableLMError):
57
+ """Error for empty generaition."""
58
+
59
+
56
60
  class RateLimitError(RetryableLMError):
57
61
  """Error for rate limit reached."""
58
62
 
@@ -478,7 +482,7 @@ class UsageNotAvailable(LMSamplingUsage):
478
482
 
479
483
 
480
484
  class LMSamplingResult(pg.Object):
481
- """Language model response."""
485
+ """The result from a language model sampling."""
482
486
 
483
487
  samples: Annotated[
484
488
  list[LMSample],
@@ -575,6 +579,14 @@ class LMSamplingOptions(component.Component):
575
579
  int | None, 'Number of max thinking tokens.'
576
580
  ] = None
577
581
 
582
+ thinking_level: Annotated[
583
+ Literal['low', 'high'] | None,
584
+ (
585
+ 'Thinking level for Gemini models. High is for complex tasks, '
586
+ 'while low is for faster responses.'
587
+ ),
588
+ ] = None
589
+
578
590
  reasoning_effort: Annotated[
579
591
  Literal['low', 'medium', 'high'] | None,
580
592
  (
@@ -584,6 +596,15 @@ class LMSamplingOptions(component.Component):
584
596
  ),
585
597
  ] = None
586
598
 
599
+ extras: Annotated[
600
+ dict[str, Any],
601
+ (
602
+ 'Extra arguments (e.g. configuration for tool calls) to pass to '
603
+ 'the model. This is model-specific, please check model '
604
+ 'implementation to see how to use this.'
605
+ ),
606
+ ] = {}
607
+
587
608
  def cache_key(self) -> tuple[Any, ...]:
588
609
  """Returns a tuple of current values as cache key."""
589
610
  return (
@@ -672,13 +693,91 @@ class LMDebugMode(enum.IntFlag):
672
693
 
673
694
 
674
695
  class LanguageModel(component.Component):
675
- """Interface of a language model.
676
-
677
- Language models are at the center of LLM-based agents. ``LanguageModel``
678
- is the interface to interact with different language modles.
679
-
680
- In langfun, users can use different language models with the same agents,
681
- allowing fast prototype, as well as side-by-side comparisons.
696
+ """Interface for language model.
697
+
698
+ `lf.LanguageModel` is the cornerstone of Langfun, providing a consistent
699
+ interface for interacting with various language models, such as those from
700
+ Google, OpenAI, Anthropic, and more. It abstracts away provider-specific
701
+ details, allowing users to switch between models seamlessly.
702
+
703
+ All language models in Langfun can be accessed via `lf.llms`. For example,
704
+ `lf.llms.Gpt4()` creates an instance for OpenAI's GPT-4, and
705
+ `lf.llms.GeminiPro()` creates an instance for Google's Gemini Pro.
706
+
707
+ **Key Features:**
708
+
709
+ * **Unified API**: Provides `sample`, `score`, and `tokenize` methods
710
+ across all supported models.
711
+ * **Sampling**: The `__call__` method and `sample` method allow generating
712
+ text completions or chat responses.
713
+ * **Scoring**: The `score` method computes the likelihood of completions
714
+ given a prompt.
715
+ * **Tokenization**: The `tokenize` method breaks text into tokens
716
+ according to the model's tokenizer.
717
+ * **Caching**: Built-in support for caching LLM requests to save cost and
718
+ time via the `cache` attribute.
719
+ * **Concurrency**: Manages concurrency to respect API rate limits via
720
+ `max_concurrency`.
721
+ * **Retries**: Automatic retries with exponential backoff for transient
722
+ errors via `max_attempts` and `retry_interval`.
723
+
724
+ **1. Creating a Language Model:**
725
+ You can create a language model by instantiating its class or by using
726
+ `lf.LanguageModel.get`:
727
+
728
+ ```python
729
+ # Direct instantiation
730
+ gpt4 = lf.llms.Gpt4()
731
+ gemini = lf.llms.GeminiPro()
732
+
733
+ # Creation via lf.LanguageModel.get()
734
+ gpt4 = lf.LanguageModel.get('gpt-4')
735
+ ```
736
+
737
+ **2. Customizing Sampling Options:**
738
+ Sampling options like `temperature`, `max_tokens`, etc., can be customized
739
+ at model creation, or overridden at call time or via `lf.context`.
740
+
741
+ ```python
742
+ # Set temperature to 0 at model creation
743
+ lm = lf.llms.Gpt4(temperature=0.0)
744
+
745
+ # Override temperature to 0.5 for a single call
746
+ response = lm('1 + 1 =', temperature=0.5)
747
+
748
+ # Override temperature to 1.0 using lf.context
749
+ with lf.context(temperature=1.0):
750
+ response = lm('1 + 1 =')
751
+ ```
752
+
753
+ **3. Sampling:**
754
+ Use `lm()`, `lm.sample()`, or `lf.query()` to generate text:
755
+
756
+ ```python
757
+ lm = lf.llms.Gpt4()
758
+ response = lm('1 + 1 =')
759
+ print(response.text)
760
+ # Output: 2
761
+ ```
762
+
763
+ **4. Scoring:**
764
+ Use `lm.score()` to score completions:
765
+
766
+ ```python
767
+ lm = lf.llms.Gpt4()
768
+ results = lm.score('Weather in SF is', completions=['sunny', 'cloudy'])
769
+ print(results[0].score)
770
+ # Output: -1.0
771
+ ```
772
+
773
+ **5. Tokenization:**
774
+ Use `lm.tokenize()` to get tokens:
775
+ ```python
776
+ lm = lf.llms.Gpt4()
777
+ tokens = lm.tokenize('hello world')
778
+ print(tokens)
779
+ # Output: [('hello', 15339), (' world', 1917)]
780
+ ```
682
781
  """
683
782
 
684
783
  sampling_options: LMSamplingOptions = LMSamplingOptions()
@@ -989,10 +1088,32 @@ class LanguageModel(component.Component):
989
1088
  prompts = [message_lib.UserMessage.from_value(p) for p in prompts]
990
1089
 
991
1090
  with component.context(override_attrs=True, **kwargs):
992
- if self.cache is None:
993
- results = self._sample(prompts)
994
- else:
995
- results = self._sample_with_cache_lookup(prompts, cache_seed)
1091
+
1092
+ def _sample_with_retry():
1093
+ if self.cache is None:
1094
+ results = self._sample(prompts)
1095
+ else:
1096
+ results = self._sample_with_cache_lookup(prompts, cache_seed)
1097
+
1098
+ for i, result in enumerate(results):
1099
+ for sample in result.samples:
1100
+ if not sample.response.text:
1101
+ if self.cache is not None:
1102
+ self.cache.delete(self, prompts[i], seed=cache_seed)
1103
+ raise EmptyGenerationError(
1104
+ f'Empty generation encountered from model {self.model_id}.'
1105
+ )
1106
+ return results
1107
+
1108
+ retry_fn = concurrent.with_retry(
1109
+ _sample_with_retry,
1110
+ retry_on_errors=EmptyGenerationError,
1111
+ max_attempts=self.max_attempts,
1112
+ retry_interval=self.retry_interval,
1113
+ exponential_backoff=self.exponential_backoff,
1114
+ max_retry_interval=self.max_retry_interval,
1115
+ )
1116
+ results = retry_fn()
996
1117
 
997
1118
  for prompt, result in zip(prompts, results):
998
1119
 
@@ -1001,7 +1122,6 @@ class LanguageModel(component.Component):
1001
1122
 
1002
1123
  for sample in result.samples:
1003
1124
  # Update metadata for response message.
1004
-
1005
1125
  response = sample.response
1006
1126
  response.metadata.score = sample.score
1007
1127
  response.metadata.logprobs = sample.logprobs
@@ -1244,11 +1364,11 @@ class LanguageModel(component.Component):
1244
1364
  title=f'\n[{call_counter}] PROMPT SENT TO LM{title_suffix}:',
1245
1365
  color='green',
1246
1366
  )
1247
- referred_modalities = prompt.referred_modalities()
1248
- if referred_modalities:
1367
+ if prompt.referred_modalities:
1249
1368
  console.write(
1250
1369
  pg.object_utils.kvlist_str(
1251
- [(k, repr(v), None) for k, v in referred_modalities.items()]
1370
+ [(k, repr(v), None)
1371
+ for k, v in prompt.referred_modalities.items()]
1252
1372
  ),
1253
1373
  title=f'\n[{call_counter}] MODALITY OBJECTS SENT TO LM:',
1254
1374
  color='green',
@@ -1334,9 +1454,9 @@ class LanguageModel(component.Component):
1334
1454
  color='green',
1335
1455
  )
1336
1456
  if isinstance(prompt, list):
1337
- referred_modalities_lst = [p.referred_modalities() for p in prompt]
1457
+ referred_modalities_lst = [p.referred_modalities for p in prompt]
1338
1458
  else:
1339
- referred_modalities_lst = [prompt.referred_modalities(),]
1459
+ referred_modalities_lst = [prompt.referred_modalities,]
1340
1460
  if referred_modalities_lst:
1341
1461
  for referred_modalities in referred_modalities_lst:
1342
1462
  console.write(
@@ -1411,7 +1531,7 @@ class LanguageModel(component.Component):
1411
1531
  title=f'\n[{call_counter}] PROMPT TO TOKENIZE:',
1412
1532
  color='green',
1413
1533
  )
1414
- referred_modalities_lst = [prompt.referred_modalities(),]
1534
+ referred_modalities_lst = [prompt.referred_modalities,]
1415
1535
  if referred_modalities_lst:
1416
1536
  for referred_modalities in referred_modalities_lst:
1417
1537
  console.write(
@@ -1439,7 +1559,7 @@ class LanguageModel(component.Component):
1439
1559
  max_requests_per_minute: int | None,
1440
1560
  average_tokens_per_request: int = 250
1441
1561
  ) -> int | None:
1442
- """Estimates max concurrency concurrency based on the rate limits."""
1562
+ """Estimates max concurrency based on the rate limits."""
1443
1563
  # NOTE(daiyip): max concurrency is estimated based on the rate limit.
1444
1564
  # We assume each request has approximately 250 tokens, and each request
1445
1565
  # takes 1 second to complete. This might not be accurate for all models.
@@ -1512,7 +1632,7 @@ class _ConcurrencyControl:
1512
1632
 
1513
1633
 
1514
1634
  class UsageSummary(pg.Object, pg.views.HtmlTreeView.Extension):
1515
- """Usage sumary."""
1635
+ """Usage summary."""
1516
1636
 
1517
1637
  class AggregatedUsage(pg.Object):
1518
1638
  """Aggregated usage."""
@@ -591,6 +591,51 @@ class LanguageModelTest(unittest.TestCase):
591
591
  lm = MockModel(cache=cache, top_k=1)
592
592
  self.assertEqual(lm('a'), 'a')
593
593
 
594
+ def test_empty_generation_error(self):
595
+ class MockModelWithEmptyResponse(MockModel):
596
+ def _sample(self,
597
+ prompts: list[message_lib.Message]
598
+ ) -> list[lm_lib.LMSamplingResult]:
599
+ return [lm_lib.LMSamplingResult(
600
+ [lm_lib.LMSample(response='')],
601
+ usage=lm_lib.LMSamplingUsage(100, 0, 100, 1, 1.0)
602
+ )]
603
+ lm = MockModelWithEmptyResponse(max_attempts=1, retry_interval=0)
604
+ with self.assertRaisesRegex(
605
+ concurrent.RetryError, 'Empty generation encountered'
606
+ ):
607
+ lm('a')
608
+
609
+ def test_empty_generation_retry(self):
610
+ class MockModelWithEmptyThenValid(MockModel):
611
+ attempt_count: int = 0
612
+
613
+ def _sample(
614
+ self, prompts: list[message_lib.Message]
615
+ ) -> list[lm_lib.LMSamplingResult]:
616
+ self.rebind(attempt_count=self.attempt_count + 1)
617
+ if self.attempt_count == 1:
618
+ # First attempt returns empty
619
+ return [
620
+ lm_lib.LMSamplingResult(
621
+ [lm_lib.LMSample(response='')],
622
+ usage=lm_lib.LMSamplingUsage(100, 0, 100, 1, 1.0),
623
+ )
624
+ ]
625
+ else:
626
+ # Subsequent attempts return valid response
627
+ return [
628
+ lm_lib.LMSamplingResult(
629
+ [lm_lib.LMSample(response='valid response')],
630
+ usage=lm_lib.LMSamplingUsage(100, 100, 200, 1, 1.0),
631
+ )
632
+ ]
633
+
634
+ lm = MockModelWithEmptyThenValid(max_attempts=3, retry_interval=0)
635
+ result = lm('a')
636
+ self.assertEqual(result.text, 'valid response')
637
+ self.assertEqual(lm.attempt_count, 2)
638
+
594
639
  def test_estimate_max_concurrency(self):
595
640
  self.assertIsNone(lm_lib.LanguageModel.estimate_max_concurrency(None, None))
596
641
  self.assertEqual(
@@ -656,11 +701,17 @@ class LanguageModelTest(unittest.TestCase):
656
701
 
657
702
  string_io = io.StringIO()
658
703
  lm = MockModel(sampling_options=lm_lib.LMSamplingOptions(top_k=1))
704
+ image = Image()
659
705
  with contextlib.redirect_stdout(string_io):
660
706
  self.assertEqual(
661
- lm(message_lib.UserMessage(
662
- 'hi <<[[image]]>>', image=Image()), debug=True),
663
- 'hi <<[[image]]>>'
707
+ lm(
708
+ message_lib.UserMessage(
709
+ f'hi <<[[{image.id}]]>>',
710
+ referred_modalities=[image],
711
+ ),
712
+ debug=True
713
+ ),
714
+ f'hi <<[[{image.id}]]>>'
664
715
  )
665
716
 
666
717
  debug_info = string_io.getvalue()
@@ -30,7 +30,8 @@ from langfun.core.llms.compositional import RandomChoice
30
30
 
31
31
  # Base models by request/response protocol.
32
32
  from langfun.core.llms.rest import REST
33
- from langfun.core.llms.openai_compatible import OpenAICompatible
33
+ from langfun.core.llms.openai_compatible import OpenAIChatCompletionAPI
34
+ from langfun.core.llms.openai_compatible import OpenAIResponsesAPI
34
35
  from langfun.core.llms.gemini import Gemini
35
36
  from langfun.core.llms.anthropic import Anthropic
36
37
 
@@ -41,6 +42,7 @@ from langfun.core.llms.azure_openai import AzureOpenAI
41
42
 
42
43
  # Gemini models.
43
44
  from langfun.core.llms.google_genai import GenAI
45
+ from langfun.core.llms.google_genai import Gemini3ProPreview
44
46
  from langfun.core.llms.google_genai import Gemini25Pro
45
47
  from langfun.core.llms.google_genai import Gemini25Flash
46
48
  from langfun.core.llms.google_genai import Gemini25ProPreview_20250605
@@ -89,6 +91,7 @@ from langfun.core.llms.vertexai import VertexAIGemini25ProPreview_20250605
89
91
  from langfun.core.llms.vertexai import VertexAIGemini25Pro
90
92
  from langfun.core.llms.vertexai import VertexAIGemini25Flash
91
93
  from langfun.core.llms.vertexai import VertexAIGemini25FlashImagePreview
94
+ from langfun.core.llms.vertexai import VertexAIGemini3ProPreview
92
95
 
93
96
  # For backward compatibility.
94
97
  GeminiPro1_5 = Gemini15Pro
@@ -151,6 +154,9 @@ from langfun.core.llms.openai import Gpt35
151
154
 
152
155
  # Anthropic models.
153
156
 
157
+ from langfun.core.llms.anthropic import Claude45
158
+ from langfun.core.llms.anthropic import Claude45Haiku_20251001
159
+ from langfun.core.llms.anthropic import Claude45Sonnet_20250929
154
160
  from langfun.core.llms.anthropic import Claude4
155
161
  from langfun.core.llms.anthropic import Claude4Sonnet_20250514
156
162
  from langfun.core.llms.anthropic import Claude4Opus_20250514
@@ -168,6 +174,8 @@ from langfun.core.llms.anthropic import Claude3Haiku
168
174
  from langfun.core.llms.anthropic import Claude3Haiku_20240307
169
175
 
170
176
  from langfun.core.llms.vertexai import VertexAIAnthropic
177
+ from langfun.core.llms.vertexai import VertexAIClaude45Haiku_20251001
178
+ from langfun.core.llms.vertexai import VertexAIClaude45Sonnet_20250929
171
179
  from langfun.core.llms.vertexai import VertexAIClaude4Opus_20250514
172
180
  from langfun.core.llms.vertexai import VertexAIClaude4Sonnet_20250514
173
181
  from langfun.core.llms.vertexai import VertexAIClaude37Sonnet_20250219
@@ -59,6 +59,60 @@ class AnthropicModelInfo(lf.ModelInfo):
59
59
 
60
60
 
61
61
  SUPPORTED_MODELS = [
62
+ AnthropicModelInfo(
63
+ model_id='claude-haiku-4-5-20251001',
64
+ provider='Anthropic',
65
+ in_service=True,
66
+ description='Claude 4.5 Haiku model (10/15/2025).',
67
+ release_date=datetime.datetime(2025, 10, 15),
68
+ input_modalities=(
69
+ AnthropicModelInfo.INPUT_IMAGE_TYPES
70
+ + AnthropicModelInfo.INPUT_DOC_TYPES
71
+ ),
72
+ context_length=lf.ModelInfo.ContextLength(
73
+ max_input_tokens=200_000,
74
+ max_output_tokens=64_000,
75
+ ),
76
+ pricing=lf.ModelInfo.Pricing(
77
+ cost_per_1m_cached_input_tokens=0.1,
78
+ cost_per_1m_input_tokens=1,
79
+ cost_per_1m_output_tokens=5,
80
+ ),
81
+ rate_limits=AnthropicModelInfo.RateLimits(
82
+ # Tier 4 rate limits
83
+ max_requests_per_minute=4000,
84
+ max_input_tokens_per_minute=4_000_000,
85
+ max_output_tokens_per_minute=800_000,
86
+ ),
87
+ ),
88
+ AnthropicModelInfo(
89
+ model_id='claude-sonnet-4-5-20250929',
90
+ provider='Anthropic',
91
+ in_service=True,
92
+ description='Claude 4.5 Sonnet model (9/29/2025).',
93
+ release_date=datetime.datetime(2025, 9, 29),
94
+ input_modalities=(
95
+ AnthropicModelInfo.INPUT_IMAGE_TYPES
96
+ + AnthropicModelInfo.INPUT_DOC_TYPES
97
+ ),
98
+ context_length=lf.ModelInfo.ContextLength(
99
+ max_input_tokens=200_000,
100
+ max_output_tokens=64_000,
101
+ ),
102
+ pricing=lf.ModelInfo.Pricing(
103
+ cost_per_1m_cached_input_tokens=0.3,
104
+ cost_per_1m_input_tokens=3,
105
+ cost_per_1m_output_tokens=15,
106
+ ),
107
+ rate_limits=AnthropicModelInfo.RateLimits(
108
+ # Tier 4 rate limits
109
+ # This rate limit is a total limit that applies to combined traffic
110
+ # across both Sonnet 4 and Sonnet 4.5.
111
+ max_requests_per_minute=4000,
112
+ max_input_tokens_per_minute=2_000_000,
113
+ max_output_tokens_per_minute=400_000,
114
+ ),
115
+ ),
62
116
  AnthropicModelInfo(
63
117
  model_id='claude-4-opus-20250514',
64
118
  provider='Anthropic',
@@ -190,6 +244,62 @@ SUPPORTED_MODELS = [
190
244
  max_output_tokens_per_minute=80_000,
191
245
  ),
192
246
  ),
247
+ AnthropicModelInfo(
248
+ model_id='claude-haiku-4-5@20251001',
249
+ alias_for='claude-haiku-4-5-20251001',
250
+ provider='VertexAI',
251
+ in_service=True,
252
+ description='Claude 4.5 Haiku model served on VertexAI (10/15/2025).',
253
+ release_date=datetime.datetime(2025, 10, 15),
254
+ input_modalities=(
255
+ AnthropicModelInfo.INPUT_IMAGE_TYPES
256
+ + AnthropicModelInfo.INPUT_DOC_TYPES
257
+ ),
258
+ context_length=lf.ModelInfo.ContextLength(
259
+ max_input_tokens=200_000,
260
+ max_output_tokens=64_000,
261
+ ),
262
+ pricing=lf.ModelInfo.Pricing(
263
+ # For global endpoint
264
+ cost_per_1m_cached_input_tokens=0.1,
265
+ cost_per_1m_input_tokens=1,
266
+ cost_per_1m_output_tokens=5,
267
+ ),
268
+ rate_limits=AnthropicModelInfo.RateLimits(
269
+ # For global endpoint
270
+ max_requests_per_minute=2500,
271
+ max_input_tokens_per_minute=200_000,
272
+ max_output_tokens_per_minute=0,
273
+ ),
274
+ ),
275
+ AnthropicModelInfo(
276
+ model_id='claude-sonnet-4-5@20250929',
277
+ alias_for='claude-sonnet-4-5-20250929',
278
+ provider='VertexAI',
279
+ in_service=True,
280
+ description='Claude 4.5 Sonnet model (9/29/2025).',
281
+ release_date=datetime.datetime(2025, 9, 29),
282
+ input_modalities=(
283
+ AnthropicModelInfo.INPUT_IMAGE_TYPES
284
+ + AnthropicModelInfo.INPUT_DOC_TYPES
285
+ ),
286
+ context_length=lf.ModelInfo.ContextLength(
287
+ max_input_tokens=200_000,
288
+ max_output_tokens=64_000,
289
+ ),
290
+ pricing=lf.ModelInfo.Pricing(
291
+ # For global endpoint
292
+ cost_per_1m_cached_input_tokens=0.3,
293
+ cost_per_1m_input_tokens=3,
294
+ cost_per_1m_output_tokens=15,
295
+ ),
296
+ rate_limits=AnthropicModelInfo.RateLimits(
297
+ # For global endpoint
298
+ max_requests_per_minute=1500,
299
+ max_input_tokens_per_minute=200_000,
300
+ max_output_tokens_per_minute=0,
301
+ ),
302
+ ),
193
303
  AnthropicModelInfo(
194
304
  model_id='claude-opus-4@20250514',
195
305
  alias_for='claude-opus-4-20250514',
@@ -540,9 +650,34 @@ _SUPPORTED_MODELS_BY_MODEL_ID = {m.model_id: m for m in SUPPORTED_MODELS}
540
650
 
541
651
  @lf.use_init_args(['model'])
542
652
  class Anthropic(rest.REST):
543
- """Anthropic LLMs (Claude) through REST APIs.
653
+ """Anthropic Claude models.
654
+
655
+ **Quick Start:**
656
+
657
+ ```python
658
+ import langfun as lf
659
+
660
+ # Call Claude 3.5 Sonnet using API key from environment variable
661
+ # 'ANTHROPIC_API_KEY'.
662
+ lm = lf.llms.Claude35Sonnet()
663
+ r = lm('Who are you?')
664
+ print(r)
665
+ ```
666
+
667
+ **Setting up API key:**
668
+
669
+ The Anthropic API key can be specified in following ways:
670
+
671
+ 1. At model instantiation:
672
+
673
+ ```python
674
+ lm = lf.llms.Claude35Sonnet(api_key='MY_API_KEY')
675
+
676
+ 2. via environment variable `ANTHROPIC_API_KEY`.
544
677
 
545
- See https://docs.anthropic.com/claude/reference/messages_post
678
+ **References:**
679
+
680
+ * https://docs.anthropic.com/claude/reference/messages_post
546
681
  """
547
682
 
548
683
  model: pg.typing.Annotated[
@@ -658,6 +793,8 @@ class Anthropic(rest.REST):
658
793
  args.pop('temperature', None)
659
794
  args.pop('top_k', None)
660
795
  args.pop('top_p', None)
796
+ if options.extras:
797
+ args.update(options.extras)
661
798
  return args
662
799
 
663
800
  def result(self, json: dict[str, Any]) -> lf.LMSamplingResult:
@@ -679,6 +816,24 @@ class Anthropic(rest.REST):
679
816
  return super()._error(status_code, content)
680
817
 
681
818
 
819
+ class Claude45(Anthropic):
820
+ """Base class for Claude 4.5 models."""
821
+
822
+
823
+ # pylint: disable=invalid-name
824
+ class Claude45Haiku_20251001(Claude45):
825
+ """Claude 4.5 Haiku model 20251001."""
826
+
827
+ model = 'claude-haiku-4-5-20251001'
828
+
829
+
830
+ # pylint: disable=invalid-name
831
+ class Claude45Sonnet_20250929(Claude45):
832
+ """Claude 4.5 Sonnet model 20250929."""
833
+
834
+ model = 'claude-sonnet-4-5-20250929'
835
+
836
+
682
837
  class Claude4(Anthropic):
683
838
  """Base class for Claude 4 models."""
684
839
 
@@ -23,23 +23,35 @@ import pyglove as pg
23
23
  @lf.use_init_args(['model', 'deployment_name'])
24
24
  @pg.members([('api_endpoint', pg.typing.Str().freeze(''))])
25
25
  class AzureOpenAI(openai.OpenAI):
26
- """Azure OpenAI model service.
27
-
28
- This service interacts with the Azure OpenAI API to generate chat completions.
29
- It uses the deployment_name and API version to construct the endpoint, and
30
- authenticates using an API key provided via parameter or the
31
- AZURE_OPENAI_API_KEY environment variable.
32
-
33
- Example:
34
- lm = AzureOpenAI(
35
- model='gpt-4o',
36
- deployment_name='gpt-4o',
37
- api_version='2024-08-01-preview',
38
- azure_endpoint='https://trackname.openai.azure.com/',
39
- api_key='token'
40
- )
41
- response = lf.query(prompt="what the capital of France", lm=lm)
42
- print(response)
26
+ """Azure OpenAI models.
27
+
28
+ **Quick Start:**
29
+
30
+ ```python
31
+ import langfun as lf
32
+
33
+ # Call GPT-4o on Azure using API key from environment variable
34
+ # 'AZURE_OPENAI_API_KEY'.
35
+ lm = lf.llms.AzureOpenAI(
36
+ model='gpt-4o',
37
+ deployment_name='my-gpt4o-deployment',
38
+ api_version='2024-08-01-preview',
39
+ azure_endpoint='https://my-resource.openai.azure.com/',
40
+ )
41
+ r = lm('Who are you?')
42
+ print(r)
43
+ ```
44
+
45
+ **Setting up API key:**
46
+
47
+ The Azure OpenAI API key can be specified in following ways:
48
+
49
+ 1. At model instantiation:
50
+
51
+ ```python
52
+ lm = lf.llms.AzureOpenAI(..., api_key='MY_API_KEY')
53
+ ```
54
+ 2. via environment variable `AZURE_OPENAI_API_KEY`.
43
55
  """
44
56
 
45
57
  deployment_name: Annotated[
@@ -22,13 +22,33 @@ import langfun.core as lf
22
22
 
23
23
  @dataclasses.dataclass(frozen=True)
24
24
  class LMCacheEntry:
25
- """LM cache entry."""
25
+ """Represents a single entry in the language model cache.
26
+
27
+ An `LMCacheEntry` stores the result of a language model sampling operation
28
+ and an optional expiration timestamp.
29
+ """
26
30
  result: lf.LMSamplingResult
27
31
  expire: datetime.datetime | None = None
28
32
 
29
33
 
30
34
  class LMCacheBase(lf.LMCache):
31
- """The common LMCache base."""
35
+ """Base class for language model cache implementations.
36
+
37
+ `LMCacheBase` provides the core logic for a key-value based cache,
38
+ handling key generation, expiration (TTL), and statistics tracking.
39
+ Subclasses must implement the abstract methods `_get`, `_put`, and `_delete`
40
+ to provide the specific storage mechanism (e.g., in-memory, file-based).
41
+
42
+ **Key Features:**
43
+
44
+ * **Customizable Keying**: Allows specifying a custom function to generate
45
+ cache keys based on the language model, prompt, and seed. If not provided,
46
+ a default key based on prompt text, sampling options, and seed is used.
47
+ * **Time-to-Live (TTL)**: Supports setting an expiration time for cache
48
+ entries, after which they are considered invalid and removed upon access.
49
+ * **Cache Statistics**: Tracks metrics like hits, misses, updates,
50
+ deletions, and expired hits through the `stats` property.
51
+ """
32
52
 
33
53
  key: Annotated[
34
54
  Callable[[lf.LanguageModel, lf.Message, int], Any] | None,
@@ -121,4 +141,6 @@ class LMCacheBase(lf.LMCache):
121
141
 
122
142
  def default_key(lm: lf.LanguageModel, prompt: lf.Message, seed: int) -> Any:
123
143
  """Default key for LM cache."""
124
- return (prompt.text_with_modality_hash, lm.sampling_options.cache_key(), seed)
144
+ # prompt text already contains the modality id for referenced modality
145
+ # objects, so no need to include them in the key.
146
+ return (prompt.text, lm.sampling_options.cache_key(), seed)