langfun 0.1.2.dev202508250805__py3-none-any.whl → 0.1.2.dev202511110805__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of langfun might be problematic. Click here for more details.

Files changed (133) hide show
  1. langfun/__init__.py +1 -1
  2. langfun/core/__init__.py +6 -1
  3. langfun/core/agentic/__init__.py +4 -0
  4. langfun/core/agentic/action.py +412 -103
  5. langfun/core/agentic/action_eval.py +9 -2
  6. langfun/core/agentic/action_test.py +68 -6
  7. langfun/core/async_support.py +104 -5
  8. langfun/core/async_support_test.py +23 -0
  9. langfun/core/coding/python/correction.py +19 -9
  10. langfun/core/coding/python/execution.py +14 -12
  11. langfun/core/coding/python/generation.py +21 -16
  12. langfun/core/coding/python/sandboxing.py +23 -3
  13. langfun/core/component.py +42 -3
  14. langfun/core/concurrent.py +70 -6
  15. langfun/core/concurrent_test.py +9 -2
  16. langfun/core/console.py +1 -1
  17. langfun/core/data/conversion/anthropic.py +12 -3
  18. langfun/core/data/conversion/anthropic_test.py +8 -6
  19. langfun/core/data/conversion/gemini.py +9 -2
  20. langfun/core/data/conversion/gemini_test.py +12 -9
  21. langfun/core/data/conversion/openai.py +145 -31
  22. langfun/core/data/conversion/openai_test.py +161 -17
  23. langfun/core/eval/base.py +47 -43
  24. langfun/core/eval/base_test.py +4 -4
  25. langfun/core/eval/matching.py +5 -2
  26. langfun/core/eval/patching.py +3 -3
  27. langfun/core/eval/scoring.py +4 -3
  28. langfun/core/eval/v2/__init__.py +1 -0
  29. langfun/core/eval/v2/checkpointing.py +30 -4
  30. langfun/core/eval/v2/eval_test_helper.py +1 -1
  31. langfun/core/eval/v2/evaluation.py +60 -14
  32. langfun/core/eval/v2/example.py +22 -11
  33. langfun/core/eval/v2/experiment.py +51 -8
  34. langfun/core/eval/v2/metric_values.py +31 -3
  35. langfun/core/eval/v2/metric_values_test.py +32 -0
  36. langfun/core/eval/v2/metrics.py +39 -4
  37. langfun/core/eval/v2/metrics_test.py +14 -0
  38. langfun/core/eval/v2/progress.py +30 -1
  39. langfun/core/eval/v2/progress_test.py +27 -0
  40. langfun/core/eval/v2/progress_tracking_test.py +6 -0
  41. langfun/core/eval/v2/reporting.py +90 -71
  42. langfun/core/eval/v2/reporting_test.py +20 -6
  43. langfun/core/eval/v2/runners.py +27 -7
  44. langfun/core/eval/v2/runners_test.py +3 -0
  45. langfun/core/langfunc.py +45 -130
  46. langfun/core/langfunc_test.py +6 -4
  47. langfun/core/language_model.py +151 -31
  48. langfun/core/language_model_test.py +9 -3
  49. langfun/core/llms/__init__.py +12 -1
  50. langfun/core/llms/anthropic.py +157 -2
  51. langfun/core/llms/azure_openai.py +29 -17
  52. langfun/core/llms/cache/base.py +25 -3
  53. langfun/core/llms/cache/in_memory.py +48 -7
  54. langfun/core/llms/cache/in_memory_test.py +14 -4
  55. langfun/core/llms/compositional.py +25 -1
  56. langfun/core/llms/deepseek.py +30 -2
  57. langfun/core/llms/fake.py +39 -1
  58. langfun/core/llms/fake_test.py +9 -0
  59. langfun/core/llms/gemini.py +43 -7
  60. langfun/core/llms/google_genai.py +34 -1
  61. langfun/core/llms/groq.py +28 -3
  62. langfun/core/llms/llama_cpp.py +23 -4
  63. langfun/core/llms/openai.py +93 -3
  64. langfun/core/llms/openai_compatible.py +148 -27
  65. langfun/core/llms/openai_compatible_test.py +207 -20
  66. langfun/core/llms/openai_test.py +0 -2
  67. langfun/core/llms/rest.py +16 -1
  68. langfun/core/llms/vertexai.py +59 -8
  69. langfun/core/logging.py +1 -1
  70. langfun/core/mcp/__init__.py +10 -0
  71. langfun/core/mcp/client.py +177 -0
  72. langfun/core/mcp/client_test.py +71 -0
  73. langfun/core/mcp/session.py +241 -0
  74. langfun/core/mcp/session_test.py +54 -0
  75. langfun/core/mcp/testing/simple_mcp_client.py +33 -0
  76. langfun/core/mcp/testing/simple_mcp_server.py +33 -0
  77. langfun/core/mcp/tool.py +256 -0
  78. langfun/core/mcp/tool_test.py +197 -0
  79. langfun/core/memory.py +1 -0
  80. langfun/core/message.py +160 -55
  81. langfun/core/message_test.py +65 -81
  82. langfun/core/modalities/__init__.py +8 -0
  83. langfun/core/modalities/audio.py +21 -1
  84. langfun/core/modalities/image.py +19 -1
  85. langfun/core/modalities/mime.py +62 -3
  86. langfun/core/modalities/pdf.py +19 -1
  87. langfun/core/modalities/video.py +21 -1
  88. langfun/core/modality.py +167 -29
  89. langfun/core/modality_test.py +42 -12
  90. langfun/core/natural_language.py +1 -1
  91. langfun/core/sampling.py +4 -4
  92. langfun/core/sampling_test.py +20 -4
  93. langfun/core/structured/completion.py +34 -44
  94. langfun/core/structured/completion_test.py +23 -43
  95. langfun/core/structured/description.py +54 -50
  96. langfun/core/structured/function_generation.py +29 -12
  97. langfun/core/structured/mapping.py +74 -28
  98. langfun/core/structured/parsing.py +90 -74
  99. langfun/core/structured/parsing_test.py +0 -3
  100. langfun/core/structured/querying.py +242 -156
  101. langfun/core/structured/querying_test.py +95 -64
  102. langfun/core/structured/schema.py +70 -10
  103. langfun/core/structured/schema_generation.py +33 -14
  104. langfun/core/structured/scoring.py +45 -34
  105. langfun/core/structured/tokenization.py +24 -9
  106. langfun/core/subscription.py +2 -2
  107. langfun/core/template.py +175 -50
  108. langfun/core/template_test.py +123 -17
  109. langfun/env/__init__.py +43 -0
  110. langfun/env/base_environment.py +827 -0
  111. langfun/env/base_environment_test.py +473 -0
  112. langfun/env/base_feature.py +304 -0
  113. langfun/env/base_feature_test.py +228 -0
  114. langfun/env/base_sandbox.py +842 -0
  115. langfun/env/base_sandbox_test.py +1235 -0
  116. langfun/env/event_handlers/__init__.py +14 -0
  117. langfun/env/event_handlers/chain.py +233 -0
  118. langfun/env/event_handlers/chain_test.py +253 -0
  119. langfun/env/event_handlers/event_logger.py +472 -0
  120. langfun/env/event_handlers/event_logger_test.py +304 -0
  121. langfun/env/event_handlers/metric_writer.py +726 -0
  122. langfun/env/event_handlers/metric_writer_test.py +214 -0
  123. langfun/env/interface.py +1640 -0
  124. langfun/env/interface_test.py +151 -0
  125. langfun/env/load_balancers.py +59 -0
  126. langfun/env/load_balancers_test.py +139 -0
  127. langfun/env/test_utils.py +497 -0
  128. {langfun-0.1.2.dev202508250805.dist-info → langfun-0.1.2.dev202511110805.dist-info}/METADATA +7 -3
  129. langfun-0.1.2.dev202511110805.dist-info/RECORD +200 -0
  130. langfun-0.1.2.dev202508250805.dist-info/RECORD +0 -172
  131. {langfun-0.1.2.dev202508250805.dist-info → langfun-0.1.2.dev202511110805.dist-info}/WHEEL +0 -0
  132. {langfun-0.1.2.dev202508250805.dist-info → langfun-0.1.2.dev202511110805.dist-info}/licenses/LICENSE +0 -0
  133. {langfun-0.1.2.dev202508250805.dist-info → langfun-0.1.2.dev202511110805.dist-info}/top_level.txt +0 -0
@@ -478,7 +478,7 @@ class UsageNotAvailable(LMSamplingUsage):
478
478
 
479
479
 
480
480
  class LMSamplingResult(pg.Object):
481
- """Language model response."""
481
+ """The result from a language model sampling."""
482
482
 
483
483
  samples: Annotated[
484
484
  list[LMSample],
@@ -584,6 +584,15 @@ class LMSamplingOptions(component.Component):
584
584
  ),
585
585
  ] = None
586
586
 
587
+ extras: Annotated[
588
+ dict[str, Any],
589
+ (
590
+ 'Extra arguments (e.g. configuration for tool calls) to pass to '
591
+ 'the model. This is model-specific, please check model '
592
+ 'implementation to see how to use this.'
593
+ ),
594
+ ] = {}
595
+
587
596
  def cache_key(self) -> tuple[Any, ...]:
588
597
  """Returns a tuple of current values as cache key."""
589
598
  return (
@@ -672,13 +681,91 @@ class LMDebugMode(enum.IntFlag):
672
681
 
673
682
 
674
683
  class LanguageModel(component.Component):
675
- """Interface of a language model.
676
-
677
- Language models are at the center of LLM-based agents. ``LanguageModel``
678
- is the interface to interact with different language modles.
679
-
680
- In langfun, users can use different language models with the same agents,
681
- allowing fast prototype, as well as side-by-side comparisons.
684
+ """Interface for language model.
685
+
686
+ `lf.LanguageModel` is the cornerstone of Langfun, providing a consistent
687
+ interface for interacting with various language models, such as those from
688
+ Google, OpenAI, Anthropic, and more. It abstracts away provider-specific
689
+ details, allowing users to switch between models seamlessly.
690
+
691
+ All language models in Langfun can be accessed via `lf.llms`. For example,
692
+ `lf.llms.Gpt4()` creates an instance for OpenAI's GPT-4, and
693
+ `lf.llms.GeminiPro()` creates an instance for Google's Gemini Pro.
694
+
695
+ **Key Features:**
696
+
697
+ * **Unified API**: Provides `sample`, `score`, and `tokenize` methods
698
+ across all supported models.
699
+ * **Sampling**: The `__call__` method and `sample` method allow generating
700
+ text completions or chat responses.
701
+ * **Scoring**: The `score` method computes the likelihood of completions
702
+ given a prompt.
703
+ * **Tokenization**: The `tokenize` method breaks text into tokens
704
+ according to the model's tokenizer.
705
+ * **Caching**: Built-in support for caching LLM requests to save cost and
706
+ time via the `cache` attribute.
707
+ * **Concurrency**: Manages concurrency to respect API rate limits via
708
+ `max_concurrency`.
709
+ * **Retries**: Automatic retries with exponential backoff for transient
710
+ errors via `max_attempts` and `retry_interval`.
711
+
712
+ **1. Creating a Language Model:**
713
+ You can create a language model by instantiating its class or by using
714
+ `lf.LanguageModel.get`:
715
+
716
+ ```python
717
+ # Direct instantiation
718
+ gpt4 = lf.llms.Gpt4()
719
+ gemini = lf.llms.GeminiPro()
720
+
721
+ # Creation via lf.LanguageModel.get()
722
+ gpt4 = lf.LanguageModel.get('gpt-4')
723
+ ```
724
+
725
+ **2. Customizing Sampling Options:**
726
+ Sampling options like `temperature`, `max_tokens`, etc., can be customized
727
+ at model creation, or overridden at call time or via `lf.context`.
728
+
729
+ ```python
730
+ # Set temperature to 0 at model creation
731
+ lm = lf.llms.Gpt4(temperature=0.0)
732
+
733
+ # Override temperature to 0.5 for a single call
734
+ response = lm('1 + 1 =', temperature=0.5)
735
+
736
+ # Override temperature to 1.0 using lf.context
737
+ with lf.context(temperature=1.0):
738
+ response = lm('1 + 1 =')
739
+ ```
740
+
741
+ **3. Sampling:**
742
+ Use `lm()`, `lm.sample()`, or `lf.query()` to generate text:
743
+
744
+ ```python
745
+ lm = lf.llms.Gpt4()
746
+ response = lm('1 + 1 =')
747
+ print(response.text)
748
+ # Output: 2
749
+ ```
750
+
751
+ **4. Scoring:**
752
+ Use `lm.score()` to score completions:
753
+
754
+ ```python
755
+ lm = lf.llms.Gpt4()
756
+ results = lm.score('Weather in SF is', completions=['sunny', 'cloudy'])
757
+ print(results[0].score)
758
+ # Output: -1.0
759
+ ```
760
+
761
+ **5. Tokenization:**
762
+ Use `lm.tokenize()` to get tokens:
763
+ ```python
764
+ lm = lf.llms.Gpt4()
765
+ tokens = lm.tokenize('hello world')
766
+ print(tokens)
767
+ # Output: [('hello', 15339), (' world', 1917)]
768
+ ```
682
769
  """
683
770
 
684
771
  sampling_options: LMSamplingOptions = LMSamplingOptions()
@@ -1159,21 +1246,35 @@ class LanguageModel(component.Component):
1159
1246
  ) -> message_lib.Message:
1160
1247
  """Returns the first candidate."""
1161
1248
  prompt = message_lib.UserMessage.from_value(prompt)
1162
- with component.context(override_attrs=True, **kwargs):
1163
- sampling_options = self.sampling_options
1164
- if sampling_options.n != 1:
1165
- sampling_options = sampling_options.clone(override=dict(n=1))
1166
-
1167
- call_counter = self._call_counter
1168
- self._call_counter += 1
1169
- request_start = time.time()
1170
- result = self.sample(
1171
- [prompt], sampling_options=sampling_options, cache_seed=cache_seed
1172
- )[0]
1173
- elapse = time.time() - request_start
1174
- response = result.samples[0].response
1175
- self._debug(prompt, response, call_counter, result.usage, elapse)
1176
- return response
1249
+ start_time = time.time()
1250
+ error_tag = ''
1251
+ try:
1252
+ with component.context(override_attrs=True, **kwargs):
1253
+ sampling_options = self.sampling_options
1254
+ if sampling_options.n != 1:
1255
+ sampling_options = sampling_options.clone(override=dict(n=1))
1256
+
1257
+ call_counter = self._call_counter
1258
+ self._call_counter += 1
1259
+ request_start = time.time()
1260
+ result = self.sample(
1261
+ [prompt], sampling_options=sampling_options, cache_seed=cache_seed
1262
+ )[0]
1263
+ elapse = time.time() - request_start
1264
+ response = result.samples[0].response
1265
+ self._debug(prompt, response, call_counter, result.usage, elapse)
1266
+ return response
1267
+ except BaseException as e:
1268
+ error_tag = pg.ErrorInfo.from_exception(e).tag
1269
+ raise e
1270
+ finally:
1271
+ _METRICS.language_model_calls.increment(
1272
+ model=self.model_id, error=error_tag
1273
+ )
1274
+ _METRICS.language_model_call_duration_ms.record(
1275
+ int((time.time() - start_time) * 1000),
1276
+ model=self.model_id, error=error_tag,
1277
+ )
1177
1278
 
1178
1279
  def _debug(
1179
1280
  self,
@@ -1230,11 +1331,11 @@ class LanguageModel(component.Component):
1230
1331
  title=f'\n[{call_counter}] PROMPT SENT TO LM{title_suffix}:',
1231
1332
  color='green',
1232
1333
  )
1233
- referred_modalities = prompt.referred_modalities()
1234
- if referred_modalities:
1334
+ if prompt.referred_modalities:
1235
1335
  console.write(
1236
1336
  pg.object_utils.kvlist_str(
1237
- [(k, repr(v), None) for k, v in referred_modalities.items()]
1337
+ [(k, repr(v), None)
1338
+ for k, v in prompt.referred_modalities.items()]
1238
1339
  ),
1239
1340
  title=f'\n[{call_counter}] MODALITY OBJECTS SENT TO LM:',
1240
1341
  color='green',
@@ -1320,9 +1421,9 @@ class LanguageModel(component.Component):
1320
1421
  color='green',
1321
1422
  )
1322
1423
  if isinstance(prompt, list):
1323
- referred_modalities_lst = [p.referred_modalities() for p in prompt]
1424
+ referred_modalities_lst = [p.referred_modalities for p in prompt]
1324
1425
  else:
1325
- referred_modalities_lst = [prompt.referred_modalities(),]
1426
+ referred_modalities_lst = [prompt.referred_modalities,]
1326
1427
  if referred_modalities_lst:
1327
1428
  for referred_modalities in referred_modalities_lst:
1328
1429
  console.write(
@@ -1397,7 +1498,7 @@ class LanguageModel(component.Component):
1397
1498
  title=f'\n[{call_counter}] PROMPT TO TOKENIZE:',
1398
1499
  color='green',
1399
1500
  )
1400
- referred_modalities_lst = [prompt.referred_modalities(),]
1501
+ referred_modalities_lst = [prompt.referred_modalities,]
1401
1502
  if referred_modalities_lst:
1402
1503
  for referred_modalities in referred_modalities_lst:
1403
1504
  console.write(
@@ -1425,7 +1526,7 @@ class LanguageModel(component.Component):
1425
1526
  max_requests_per_minute: int | None,
1426
1527
  average_tokens_per_request: int = 250
1427
1528
  ) -> int | None:
1428
- """Estimates max concurrency concurrency based on the rate limits."""
1529
+ """Estimates max concurrency based on the rate limits."""
1429
1530
  # NOTE(daiyip): max concurrency is estimated based on the rate limit.
1430
1531
  # We assume each request has approximately 250 tokens, and each request
1431
1532
  # takes 1 second to complete. This might not be accurate for all models.
@@ -1438,6 +1539,25 @@ class LanguageModel(component.Component):
1438
1539
  return None
1439
1540
 
1440
1541
 
1542
+ class _Metrics:
1543
+ """Metrics for Langfun."""
1544
+
1545
+ def __init__(self):
1546
+ self._metrics = pg.monitoring.metric_collection('/third_party/langfun')
1547
+ self.language_model_calls = self._metrics.get_counter(
1548
+ 'language_model_calls',
1549
+ 'Number of calls to the language model.',
1550
+ parameters={'model': str, 'error': str},
1551
+ )
1552
+ self.language_model_call_duration_ms = self._metrics.get_distribution(
1553
+ 'language_model_call_duration_ms',
1554
+ 'Duration of calls to the language model in milliseconds.',
1555
+ parameters={'model': str, 'error': str},
1556
+ )
1557
+
1558
+ _METRICS = _Metrics()
1559
+
1560
+
1441
1561
  class _ConcurrencyControl:
1442
1562
  """Controls the max concurrent LLM calls for a given model."""
1443
1563
 
@@ -1479,7 +1599,7 @@ class _ConcurrencyControl:
1479
1599
 
1480
1600
 
1481
1601
  class UsageSummary(pg.Object, pg.views.HtmlTreeView.Extension):
1482
- """Usage sumary."""
1602
+ """Usage summary."""
1483
1603
 
1484
1604
  class AggregatedUsage(pg.Object):
1485
1605
  """Aggregated usage."""
@@ -656,11 +656,17 @@ class LanguageModelTest(unittest.TestCase):
656
656
 
657
657
  string_io = io.StringIO()
658
658
  lm = MockModel(sampling_options=lm_lib.LMSamplingOptions(top_k=1))
659
+ image = Image()
659
660
  with contextlib.redirect_stdout(string_io):
660
661
  self.assertEqual(
661
- lm(message_lib.UserMessage(
662
- 'hi <<[[image]]>>', image=Image()), debug=True),
663
- 'hi <<[[image]]>>'
662
+ lm(
663
+ message_lib.UserMessage(
664
+ f'hi <<[[{image.id}]]>>',
665
+ referred_modalities=[image],
666
+ ),
667
+ debug=True
668
+ ),
669
+ f'hi <<[[{image.id}]]>>'
664
670
  )
665
671
 
666
672
  debug_info = string_io.getvalue()
@@ -20,6 +20,7 @@
20
20
  # LMs for testing.
21
21
  from langfun.core.llms.fake import Fake
22
22
  from langfun.core.llms.fake import Echo
23
+ from langfun.core.llms.fake import Pseudo
23
24
  from langfun.core.llms.fake import StaticMapping
24
25
  from langfun.core.llms.fake import StaticResponse
25
26
  from langfun.core.llms.fake import StaticSequence
@@ -29,7 +30,8 @@ from langfun.core.llms.compositional import RandomChoice
29
30
 
30
31
  # Base models by request/response protocol.
31
32
  from langfun.core.llms.rest import REST
32
- from langfun.core.llms.openai_compatible import OpenAICompatible
33
+ from langfun.core.llms.openai_compatible import OpenAIChatCompletionAPI
34
+ from langfun.core.llms.openai_compatible import OpenAIResponsesAPI
33
35
  from langfun.core.llms.gemini import Gemini
34
36
  from langfun.core.llms.anthropic import Anthropic
35
37
 
@@ -61,6 +63,7 @@ from langfun.core.llms.google_genai import Gemini15Flash8B_001
61
63
  from langfun.core.llms.google_genai import Gemini2ProExp_20250205
62
64
  from langfun.core.llms.google_genai import Gemini2FlashThinkingExp_20250121
63
65
  from langfun.core.llms.google_genai import GeminiExp_20241206
66
+ from langfun.core.llms.google_genai import Gemini25FlashImagePreview
64
67
 
65
68
  from langfun.core.llms.vertexai import VertexAIGemini
66
69
  from langfun.core.llms.vertexai import VertexAIGemini2Flash
@@ -86,6 +89,7 @@ from langfun.core.llms.vertexai import VertexAIGemini25FlashPreview_20250520
86
89
  from langfun.core.llms.vertexai import VertexAIGemini25ProPreview_20250605
87
90
  from langfun.core.llms.vertexai import VertexAIGemini25Pro
88
91
  from langfun.core.llms.vertexai import VertexAIGemini25Flash
92
+ from langfun.core.llms.vertexai import VertexAIGemini25FlashImagePreview
89
93
 
90
94
  # For backward compatibility.
91
95
  GeminiPro1_5 = Gemini15Pro
@@ -96,6 +100,8 @@ VertexAIGeminiFlash1_5 = VertexAIGemini15Flash
96
100
  # OpenAI models.
97
101
  from langfun.core.llms.openai import OpenAI
98
102
 
103
+ from langfun.core.llms.openai import Gpt5
104
+ from langfun.core.llms.openai import Gpt5Mini
99
105
  from langfun.core.llms.openai import Gpt41
100
106
  from langfun.core.llms.openai import GptO3
101
107
  from langfun.core.llms.openai import GptO4Mini
@@ -146,6 +152,9 @@ from langfun.core.llms.openai import Gpt35
146
152
 
147
153
  # Anthropic models.
148
154
 
155
+ from langfun.core.llms.anthropic import Claude45
156
+ from langfun.core.llms.anthropic import Claude45Haiku_20251001
157
+ from langfun.core.llms.anthropic import Claude45Sonnet_20250929
149
158
  from langfun.core.llms.anthropic import Claude4
150
159
  from langfun.core.llms.anthropic import Claude4Sonnet_20250514
151
160
  from langfun.core.llms.anthropic import Claude4Opus_20250514
@@ -163,6 +172,8 @@ from langfun.core.llms.anthropic import Claude3Haiku
163
172
  from langfun.core.llms.anthropic import Claude3Haiku_20240307
164
173
 
165
174
  from langfun.core.llms.vertexai import VertexAIAnthropic
175
+ from langfun.core.llms.vertexai import VertexAIClaude45Haiku_20251001
176
+ from langfun.core.llms.vertexai import VertexAIClaude45Sonnet_20250929
166
177
  from langfun.core.llms.vertexai import VertexAIClaude4Opus_20250514
167
178
  from langfun.core.llms.vertexai import VertexAIClaude4Sonnet_20250514
168
179
  from langfun.core.llms.vertexai import VertexAIClaude37Sonnet_20250219
@@ -59,6 +59,60 @@ class AnthropicModelInfo(lf.ModelInfo):
59
59
 
60
60
 
61
61
  SUPPORTED_MODELS = [
62
+ AnthropicModelInfo(
63
+ model_id='claude-haiku-4-5-20251001',
64
+ provider='Anthropic',
65
+ in_service=True,
66
+ description='Claude 4.5 Haiku model (10/15/2025).',
67
+ release_date=datetime.datetime(2025, 10, 15),
68
+ input_modalities=(
69
+ AnthropicModelInfo.INPUT_IMAGE_TYPES
70
+ + AnthropicModelInfo.INPUT_DOC_TYPES
71
+ ),
72
+ context_length=lf.ModelInfo.ContextLength(
73
+ max_input_tokens=200_000,
74
+ max_output_tokens=64_000,
75
+ ),
76
+ pricing=lf.ModelInfo.Pricing(
77
+ cost_per_1m_cached_input_tokens=0.1,
78
+ cost_per_1m_input_tokens=1,
79
+ cost_per_1m_output_tokens=5,
80
+ ),
81
+ rate_limits=AnthropicModelInfo.RateLimits(
82
+ # Tier 4 rate limits
83
+ max_requests_per_minute=4000,
84
+ max_input_tokens_per_minute=4_000_000,
85
+ max_output_tokens_per_minute=800_000,
86
+ ),
87
+ ),
88
+ AnthropicModelInfo(
89
+ model_id='claude-sonnet-4-5-20250929',
90
+ provider='Anthropic',
91
+ in_service=True,
92
+ description='Claude 4.5 Sonnet model (9/29/2025).',
93
+ release_date=datetime.datetime(2025, 9, 29),
94
+ input_modalities=(
95
+ AnthropicModelInfo.INPUT_IMAGE_TYPES
96
+ + AnthropicModelInfo.INPUT_DOC_TYPES
97
+ ),
98
+ context_length=lf.ModelInfo.ContextLength(
99
+ max_input_tokens=200_000,
100
+ max_output_tokens=64_000,
101
+ ),
102
+ pricing=lf.ModelInfo.Pricing(
103
+ cost_per_1m_cached_input_tokens=0.3,
104
+ cost_per_1m_input_tokens=3,
105
+ cost_per_1m_output_tokens=15,
106
+ ),
107
+ rate_limits=AnthropicModelInfo.RateLimits(
108
+ # Tier 4 rate limits
109
+ # This rate limit is a total limit that applies to combined traffic
110
+ # across both Sonnet 4 and Sonnet 4.5.
111
+ max_requests_per_minute=4000,
112
+ max_input_tokens_per_minute=2_000_000,
113
+ max_output_tokens_per_minute=400_000,
114
+ ),
115
+ ),
62
116
  AnthropicModelInfo(
63
117
  model_id='claude-4-opus-20250514',
64
118
  provider='Anthropic',
@@ -190,6 +244,62 @@ SUPPORTED_MODELS = [
190
244
  max_output_tokens_per_minute=80_000,
191
245
  ),
192
246
  ),
247
+ AnthropicModelInfo(
248
+ model_id='claude-haiku-4-5@20251001',
249
+ alias_for='claude-haiku-4-5-20251001',
250
+ provider='VertexAI',
251
+ in_service=True,
252
+ description='Claude 4.5 Haiku model served on VertexAI (10/15/2025).',
253
+ release_date=datetime.datetime(2025, 10, 15),
254
+ input_modalities=(
255
+ AnthropicModelInfo.INPUT_IMAGE_TYPES
256
+ + AnthropicModelInfo.INPUT_DOC_TYPES
257
+ ),
258
+ context_length=lf.ModelInfo.ContextLength(
259
+ max_input_tokens=200_000,
260
+ max_output_tokens=64_000,
261
+ ),
262
+ pricing=lf.ModelInfo.Pricing(
263
+ # For global endpoint
264
+ cost_per_1m_cached_input_tokens=0.1,
265
+ cost_per_1m_input_tokens=1,
266
+ cost_per_1m_output_tokens=5,
267
+ ),
268
+ rate_limits=AnthropicModelInfo.RateLimits(
269
+ # For global endpoint
270
+ max_requests_per_minute=2500,
271
+ max_input_tokens_per_minute=200_000,
272
+ max_output_tokens_per_minute=0,
273
+ ),
274
+ ),
275
+ AnthropicModelInfo(
276
+ model_id='claude-sonnet-4-5@20250929',
277
+ alias_for='claude-sonnet-4-5-20250929',
278
+ provider='VertexAI',
279
+ in_service=True,
280
+ description='Claude 4.5 Sonnet model (9/29/2025).',
281
+ release_date=datetime.datetime(2025, 9, 29),
282
+ input_modalities=(
283
+ AnthropicModelInfo.INPUT_IMAGE_TYPES
284
+ + AnthropicModelInfo.INPUT_DOC_TYPES
285
+ ),
286
+ context_length=lf.ModelInfo.ContextLength(
287
+ max_input_tokens=200_000,
288
+ max_output_tokens=64_000,
289
+ ),
290
+ pricing=lf.ModelInfo.Pricing(
291
+ # For global endpoint
292
+ cost_per_1m_cached_input_tokens=0.3,
293
+ cost_per_1m_input_tokens=3,
294
+ cost_per_1m_output_tokens=15,
295
+ ),
296
+ rate_limits=AnthropicModelInfo.RateLimits(
297
+ # For global endpoint
298
+ max_requests_per_minute=1500,
299
+ max_input_tokens_per_minute=200_000,
300
+ max_output_tokens_per_minute=0,
301
+ ),
302
+ ),
193
303
  AnthropicModelInfo(
194
304
  model_id='claude-opus-4@20250514',
195
305
  alias_for='claude-opus-4-20250514',
@@ -540,9 +650,34 @@ _SUPPORTED_MODELS_BY_MODEL_ID = {m.model_id: m for m in SUPPORTED_MODELS}
540
650
 
541
651
  @lf.use_init_args(['model'])
542
652
  class Anthropic(rest.REST):
543
- """Anthropic LLMs (Claude) through REST APIs.
653
+ """Anthropic Claude models.
654
+
655
+ **Quick Start:**
656
+
657
+ ```python
658
+ import langfun as lf
659
+
660
+ # Call Claude 3.5 Sonnet using API key from environment variable
661
+ # 'ANTHROPIC_API_KEY'.
662
+ lm = lf.llms.Claude35Sonnet()
663
+ r = lm('Who are you?')
664
+ print(r)
665
+ ```
666
+
667
+ **Setting up API key:**
668
+
669
+ The Anthropic API key can be specified in following ways:
670
+
671
+ 1. At model instantiation:
672
+
673
+ ```python
674
+ lm = lf.llms.Claude35Sonnet(api_key='MY_API_KEY')
675
+
676
+ 2. via environment variable `ANTHROPIC_API_KEY`.
544
677
 
545
- See https://docs.anthropic.com/claude/reference/messages_post
678
+ **References:**
679
+
680
+ * https://docs.anthropic.com/claude/reference/messages_post
546
681
  """
547
682
 
548
683
  model: pg.typing.Annotated[
@@ -658,6 +793,8 @@ class Anthropic(rest.REST):
658
793
  args.pop('temperature', None)
659
794
  args.pop('top_k', None)
660
795
  args.pop('top_p', None)
796
+ if options.extras:
797
+ args.update(options.extras)
661
798
  return args
662
799
 
663
800
  def result(self, json: dict[str, Any]) -> lf.LMSamplingResult:
@@ -679,6 +816,24 @@ class Anthropic(rest.REST):
679
816
  return super()._error(status_code, content)
680
817
 
681
818
 
819
+ class Claude45(Anthropic):
820
+ """Base class for Claude 4.5 models."""
821
+
822
+
823
+ # pylint: disable=invalid-name
824
+ class Claude45Haiku_20251001(Claude45):
825
+ """Claude 4.5 Haiku model 20251001."""
826
+
827
+ model = 'claude-haiku-4-5-20251001'
828
+
829
+
830
+ # pylint: disable=invalid-name
831
+ class Claude45Sonnet_20250929(Claude45):
832
+ """Claude 4.5 Sonnet model 20250929."""
833
+
834
+ model = 'claude-sonnet-4-5-20250929'
835
+
836
+
682
837
  class Claude4(Anthropic):
683
838
  """Base class for Claude 4 models."""
684
839
 
@@ -23,23 +23,35 @@ import pyglove as pg
23
23
  @lf.use_init_args(['model', 'deployment_name'])
24
24
  @pg.members([('api_endpoint', pg.typing.Str().freeze(''))])
25
25
  class AzureOpenAI(openai.OpenAI):
26
- """Azure OpenAI model service.
27
-
28
- This service interacts with the Azure OpenAI API to generate chat completions.
29
- It uses the deployment_name and API version to construct the endpoint, and
30
- authenticates using an API key provided via parameter or the
31
- AZURE_OPENAI_API_KEY environment variable.
32
-
33
- Example:
34
- lm = AzureOpenAI(
35
- model='gpt-4o',
36
- deployment_name='gpt-4o',
37
- api_version='2024-08-01-preview',
38
- azure_endpoint='https://trackname.openai.azure.com/',
39
- api_key='token'
40
- )
41
- response = lf.query(prompt="what the capital of France", lm=lm)
42
- print(response)
26
+ """Azure OpenAI models.
27
+
28
+ **Quick Start:**
29
+
30
+ ```python
31
+ import langfun as lf
32
+
33
+ # Call GPT-4o on Azure using API key from environment variable
34
+ # 'AZURE_OPENAI_API_KEY'.
35
+ lm = lf.llms.AzureOpenAI(
36
+ model='gpt-4o',
37
+ deployment_name='my-gpt4o-deployment',
38
+ api_version='2024-08-01-preview',
39
+ azure_endpoint='https://my-resource.openai.azure.com/',
40
+ )
41
+ r = lm('Who are you?')
42
+ print(r)
43
+ ```
44
+
45
+ **Setting up API key:**
46
+
47
+ The Azure OpenAI API key can be specified in following ways:
48
+
49
+ 1. At model instantiation:
50
+
51
+ ```python
52
+ lm = lf.llms.AzureOpenAI(..., api_key='MY_API_KEY')
53
+ ```
54
+ 2. via environment variable `AZURE_OPENAI_API_KEY`.
43
55
  """
44
56
 
45
57
  deployment_name: Annotated[
@@ -22,13 +22,33 @@ import langfun.core as lf
22
22
 
23
23
  @dataclasses.dataclass(frozen=True)
24
24
  class LMCacheEntry:
25
- """LM cache entry."""
25
+ """Represents a single entry in the language model cache.
26
+
27
+ An `LMCacheEntry` stores the result of a language model sampling operation
28
+ and an optional expiration timestamp.
29
+ """
26
30
  result: lf.LMSamplingResult
27
31
  expire: datetime.datetime | None = None
28
32
 
29
33
 
30
34
  class LMCacheBase(lf.LMCache):
31
- """The common LMCache base."""
35
+ """Base class for language model cache implementations.
36
+
37
+ `LMCacheBase` provides the core logic for a key-value based cache,
38
+ handling key generation, expiration (TTL), and statistics tracking.
39
+ Subclasses must implement the abstract methods `_get`, `_put`, and `_delete`
40
+ to provide the specific storage mechanism (e.g., in-memory, file-based).
41
+
42
+ **Key Features:**
43
+
44
+ * **Customizable Keying**: Allows specifying a custom function to generate
45
+ cache keys based on the language model, prompt, and seed. If not provided,
46
+ a default key based on prompt text, sampling options, and seed is used.
47
+ * **Time-to-Live (TTL)**: Supports setting an expiration time for cache
48
+ entries, after which they are considered invalid and removed upon access.
49
+ * **Cache Statistics**: Tracks metrics like hits, misses, updates,
50
+ deletions, and expired hits through the `stats` property.
51
+ """
32
52
 
33
53
  key: Annotated[
34
54
  Callable[[lf.LanguageModel, lf.Message, int], Any] | None,
@@ -121,4 +141,6 @@ class LMCacheBase(lf.LMCache):
121
141
 
122
142
  def default_key(lm: lf.LanguageModel, prompt: lf.Message, seed: int) -> Any:
123
143
  """Default key for LM cache."""
124
- return (prompt.text_with_modality_hash, lm.sampling_options.cache_key(), seed)
144
+ # prompt text already contains the modality id for referenced modality
145
+ # objects, so no need to include them in the key.
146
+ return (prompt.text, lm.sampling_options.cache_key(), seed)