langfun 0.1.2.dev202510200805__py3-none-any.whl → 0.1.2.dev202511160804__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of langfun might be problematic. Click here for more details.

Files changed (146) hide show
  1. langfun/core/__init__.py +1 -0
  2. langfun/core/agentic/action.py +107 -12
  3. langfun/core/agentic/action_eval.py +9 -2
  4. langfun/core/agentic/action_test.py +25 -0
  5. langfun/core/async_support.py +32 -3
  6. langfun/core/coding/python/correction.py +19 -9
  7. langfun/core/coding/python/execution.py +14 -12
  8. langfun/core/coding/python/generation.py +21 -16
  9. langfun/core/coding/python/sandboxing.py +23 -3
  10. langfun/core/component.py +42 -3
  11. langfun/core/concurrent.py +70 -6
  12. langfun/core/concurrent_test.py +1 -0
  13. langfun/core/console.py +1 -1
  14. langfun/core/data/conversion/anthropic.py +12 -3
  15. langfun/core/data/conversion/anthropic_test.py +8 -6
  16. langfun/core/data/conversion/gemini.py +9 -2
  17. langfun/core/data/conversion/gemini_test.py +12 -9
  18. langfun/core/data/conversion/openai.py +145 -31
  19. langfun/core/data/conversion/openai_test.py +161 -17
  20. langfun/core/eval/base.py +48 -44
  21. langfun/core/eval/base_test.py +4 -4
  22. langfun/core/eval/matching.py +5 -2
  23. langfun/core/eval/patching.py +3 -3
  24. langfun/core/eval/scoring.py +4 -3
  25. langfun/core/eval/v2/__init__.py +1 -0
  26. langfun/core/eval/v2/checkpointing.py +39 -5
  27. langfun/core/eval/v2/checkpointing_test.py +1 -1
  28. langfun/core/eval/v2/eval_test_helper.py +97 -1
  29. langfun/core/eval/v2/evaluation.py +88 -16
  30. langfun/core/eval/v2/evaluation_test.py +9 -3
  31. langfun/core/eval/v2/example.py +45 -39
  32. langfun/core/eval/v2/example_test.py +3 -3
  33. langfun/core/eval/v2/experiment.py +51 -8
  34. langfun/core/eval/v2/metric_values.py +31 -3
  35. langfun/core/eval/v2/metric_values_test.py +32 -0
  36. langfun/core/eval/v2/metrics.py +157 -44
  37. langfun/core/eval/v2/metrics_test.py +39 -18
  38. langfun/core/eval/v2/progress.py +30 -1
  39. langfun/core/eval/v2/progress_test.py +27 -0
  40. langfun/core/eval/v2/progress_tracking_test.py +3 -0
  41. langfun/core/eval/v2/reporting.py +90 -71
  42. langfun/core/eval/v2/reporting_test.py +20 -6
  43. langfun/core/eval/v2/runners/__init__.py +26 -0
  44. langfun/core/eval/v2/{runners.py → runners/base.py} +22 -124
  45. langfun/core/eval/v2/runners/debug.py +40 -0
  46. langfun/core/eval/v2/runners/debug_test.py +79 -0
  47. langfun/core/eval/v2/runners/parallel.py +100 -0
  48. langfun/core/eval/v2/runners/parallel_test.py +98 -0
  49. langfun/core/eval/v2/runners/sequential.py +47 -0
  50. langfun/core/eval/v2/runners/sequential_test.py +175 -0
  51. langfun/core/langfunc.py +45 -130
  52. langfun/core/langfunc_test.py +6 -4
  53. langfun/core/language_model.py +103 -16
  54. langfun/core/language_model_test.py +9 -3
  55. langfun/core/llms/__init__.py +7 -1
  56. langfun/core/llms/anthropic.py +157 -2
  57. langfun/core/llms/azure_openai.py +29 -17
  58. langfun/core/llms/cache/base.py +25 -3
  59. langfun/core/llms/cache/in_memory.py +48 -7
  60. langfun/core/llms/cache/in_memory_test.py +14 -4
  61. langfun/core/llms/compositional.py +25 -1
  62. langfun/core/llms/deepseek.py +30 -2
  63. langfun/core/llms/fake.py +32 -1
  64. langfun/core/llms/gemini.py +14 -9
  65. langfun/core/llms/google_genai.py +29 -1
  66. langfun/core/llms/groq.py +28 -3
  67. langfun/core/llms/llama_cpp.py +23 -4
  68. langfun/core/llms/openai.py +36 -3
  69. langfun/core/llms/openai_compatible.py +148 -27
  70. langfun/core/llms/openai_compatible_test.py +207 -20
  71. langfun/core/llms/openai_test.py +0 -2
  72. langfun/core/llms/rest.py +12 -1
  73. langfun/core/llms/vertexai.py +51 -8
  74. langfun/core/logging.py +1 -1
  75. langfun/core/mcp/client.py +77 -22
  76. langfun/core/mcp/client_test.py +8 -35
  77. langfun/core/mcp/session.py +94 -29
  78. langfun/core/mcp/session_test.py +54 -0
  79. langfun/core/mcp/tool.py +151 -22
  80. langfun/core/mcp/tool_test.py +197 -0
  81. langfun/core/memory.py +1 -0
  82. langfun/core/message.py +160 -55
  83. langfun/core/message_test.py +65 -81
  84. langfun/core/modalities/__init__.py +8 -0
  85. langfun/core/modalities/audio.py +21 -1
  86. langfun/core/modalities/image.py +19 -1
  87. langfun/core/modalities/mime.py +62 -3
  88. langfun/core/modalities/pdf.py +19 -1
  89. langfun/core/modalities/video.py +21 -1
  90. langfun/core/modality.py +167 -29
  91. langfun/core/modality_test.py +42 -12
  92. langfun/core/natural_language.py +1 -1
  93. langfun/core/sampling.py +4 -4
  94. langfun/core/sampling_test.py +20 -4
  95. langfun/core/structured/__init__.py +2 -24
  96. langfun/core/structured/completion.py +34 -44
  97. langfun/core/structured/completion_test.py +23 -43
  98. langfun/core/structured/description.py +54 -50
  99. langfun/core/structured/function_generation.py +29 -12
  100. langfun/core/structured/mapping.py +81 -37
  101. langfun/core/structured/parsing.py +95 -79
  102. langfun/core/structured/parsing_test.py +0 -3
  103. langfun/core/structured/querying.py +215 -142
  104. langfun/core/structured/querying_test.py +65 -29
  105. langfun/core/structured/schema/__init__.py +48 -0
  106. langfun/core/structured/schema/base.py +664 -0
  107. langfun/core/structured/schema/base_test.py +531 -0
  108. langfun/core/structured/schema/json.py +174 -0
  109. langfun/core/structured/schema/json_test.py +121 -0
  110. langfun/core/structured/schema/python.py +316 -0
  111. langfun/core/structured/schema/python_test.py +410 -0
  112. langfun/core/structured/schema_generation.py +33 -14
  113. langfun/core/structured/scoring.py +47 -36
  114. langfun/core/structured/tokenization.py +26 -11
  115. langfun/core/subscription.py +2 -2
  116. langfun/core/template.py +175 -50
  117. langfun/core/template_test.py +123 -17
  118. langfun/env/__init__.py +8 -2
  119. langfun/env/base_environment.py +320 -128
  120. langfun/env/base_environment_test.py +473 -0
  121. langfun/env/base_feature.py +92 -15
  122. langfun/env/base_feature_test.py +228 -0
  123. langfun/env/base_sandbox.py +84 -361
  124. langfun/env/base_sandbox_test.py +1235 -0
  125. langfun/env/event_handlers/__init__.py +1 -1
  126. langfun/env/event_handlers/chain.py +233 -0
  127. langfun/env/event_handlers/chain_test.py +253 -0
  128. langfun/env/event_handlers/event_logger.py +95 -98
  129. langfun/env/event_handlers/event_logger_test.py +21 -21
  130. langfun/env/event_handlers/metric_writer.py +225 -140
  131. langfun/env/event_handlers/metric_writer_test.py +23 -6
  132. langfun/env/interface.py +854 -40
  133. langfun/env/interface_test.py +112 -2
  134. langfun/env/load_balancers_test.py +23 -2
  135. langfun/env/test_utils.py +126 -84
  136. {langfun-0.1.2.dev202510200805.dist-info → langfun-0.1.2.dev202511160804.dist-info}/METADATA +1 -1
  137. langfun-0.1.2.dev202511160804.dist-info/RECORD +211 -0
  138. langfun/core/eval/v2/runners_test.py +0 -343
  139. langfun/core/structured/schema.py +0 -987
  140. langfun/core/structured/schema_test.py +0 -982
  141. langfun/env/base_test.py +0 -1481
  142. langfun/env/event_handlers/base.py +0 -350
  143. langfun-0.1.2.dev202510200805.dist-info/RECORD +0 -195
  144. {langfun-0.1.2.dev202510200805.dist-info → langfun-0.1.2.dev202511160804.dist-info}/WHEEL +0 -0
  145. {langfun-0.1.2.dev202510200805.dist-info → langfun-0.1.2.dev202511160804.dist-info}/licenses/LICENSE +0 -0
  146. {langfun-0.1.2.dev202510200805.dist-info → langfun-0.1.2.dev202511160804.dist-info}/top_level.txt +0 -0
@@ -478,7 +478,7 @@ class UsageNotAvailable(LMSamplingUsage):
478
478
 
479
479
 
480
480
  class LMSamplingResult(pg.Object):
481
- """Language model response."""
481
+ """The result from a language model sampling."""
482
482
 
483
483
  samples: Annotated[
484
484
  list[LMSample],
@@ -584,6 +584,15 @@ class LMSamplingOptions(component.Component):
584
584
  ),
585
585
  ] = None
586
586
 
587
+ extras: Annotated[
588
+ dict[str, Any],
589
+ (
590
+ 'Extra arguments (e.g. configuration for tool calls) to pass to '
591
+ 'the model. This is model-specific, please check model '
592
+ 'implementation to see how to use this.'
593
+ ),
594
+ ] = {}
595
+
587
596
  def cache_key(self) -> tuple[Any, ...]:
588
597
  """Returns a tuple of current values as cache key."""
589
598
  return (
@@ -672,13 +681,91 @@ class LMDebugMode(enum.IntFlag):
672
681
 
673
682
 
674
683
  class LanguageModel(component.Component):
675
- """Interface of a language model.
676
-
677
- Language models are at the center of LLM-based agents. ``LanguageModel``
678
- is the interface to interact with different language modles.
679
-
680
- In langfun, users can use different language models with the same agents,
681
- allowing fast prototype, as well as side-by-side comparisons.
684
+ """Interface for language model.
685
+
686
+ `lf.LanguageModel` is the cornerstone of Langfun, providing a consistent
687
+ interface for interacting with various language models, such as those from
688
+ Google, OpenAI, Anthropic, and more. It abstracts away provider-specific
689
+ details, allowing users to switch between models seamlessly.
690
+
691
+ All language models in Langfun can be accessed via `lf.llms`. For example,
692
+ `lf.llms.Gpt4()` creates an instance for OpenAI's GPT-4, and
693
+ `lf.llms.GeminiPro()` creates an instance for Google's Gemini Pro.
694
+
695
+ **Key Features:**
696
+
697
+ * **Unified API**: Provides `sample`, `score`, and `tokenize` methods
698
+ across all supported models.
699
+ * **Sampling**: The `__call__` method and `sample` method allow generating
700
+ text completions or chat responses.
701
+ * **Scoring**: The `score` method computes the likelihood of completions
702
+ given a prompt.
703
+ * **Tokenization**: The `tokenize` method breaks text into tokens
704
+ according to the model's tokenizer.
705
+ * **Caching**: Built-in support for caching LLM requests to save cost and
706
+ time via the `cache` attribute.
707
+ * **Concurrency**: Manages concurrency to respect API rate limits via
708
+ `max_concurrency`.
709
+ * **Retries**: Automatic retries with exponential backoff for transient
710
+ errors via `max_attempts` and `retry_interval`.
711
+
712
+ **1. Creating a Language Model:**
713
+ You can create a language model by instantiating its class or by using
714
+ `lf.LanguageModel.get`:
715
+
716
+ ```python
717
+ # Direct instantiation
718
+ gpt4 = lf.llms.Gpt4()
719
+ gemini = lf.llms.GeminiPro()
720
+
721
+ # Creation via lf.LanguageModel.get()
722
+ gpt4 = lf.LanguageModel.get('gpt-4')
723
+ ```
724
+
725
+ **2. Customizing Sampling Options:**
726
+ Sampling options like `temperature`, `max_tokens`, etc., can be customized
727
+ at model creation, or overridden at call time or via `lf.context`.
728
+
729
+ ```python
730
+ # Set temperature to 0 at model creation
731
+ lm = lf.llms.Gpt4(temperature=0.0)
732
+
733
+ # Override temperature to 0.5 for a single call
734
+ response = lm('1 + 1 =', temperature=0.5)
735
+
736
+ # Override temperature to 1.0 using lf.context
737
+ with lf.context(temperature=1.0):
738
+ response = lm('1 + 1 =')
739
+ ```
740
+
741
+ **3. Sampling:**
742
+ Use `lm()`, `lm.sample()`, or `lf.query()` to generate text:
743
+
744
+ ```python
745
+ lm = lf.llms.Gpt4()
746
+ response = lm('1 + 1 =')
747
+ print(response.text)
748
+ # Output: 2
749
+ ```
750
+
751
+ **4. Scoring:**
752
+ Use `lm.score()` to score completions:
753
+
754
+ ```python
755
+ lm = lf.llms.Gpt4()
756
+ results = lm.score('Weather in SF is', completions=['sunny', 'cloudy'])
757
+ print(results[0].score)
758
+ # Output: -1.0
759
+ ```
760
+
761
+ **5. Tokenization:**
762
+ Use `lm.tokenize()` to get tokens:
763
+ ```python
764
+ lm = lf.llms.Gpt4()
765
+ tokens = lm.tokenize('hello world')
766
+ print(tokens)
767
+ # Output: [('hello', 15339), (' world', 1917)]
768
+ ```
682
769
  """
683
770
 
684
771
  sampling_options: LMSamplingOptions = LMSamplingOptions()
@@ -1244,11 +1331,11 @@ class LanguageModel(component.Component):
1244
1331
  title=f'\n[{call_counter}] PROMPT SENT TO LM{title_suffix}:',
1245
1332
  color='green',
1246
1333
  )
1247
- referred_modalities = prompt.referred_modalities()
1248
- if referred_modalities:
1334
+ if prompt.referred_modalities:
1249
1335
  console.write(
1250
1336
  pg.object_utils.kvlist_str(
1251
- [(k, repr(v), None) for k, v in referred_modalities.items()]
1337
+ [(k, repr(v), None)
1338
+ for k, v in prompt.referred_modalities.items()]
1252
1339
  ),
1253
1340
  title=f'\n[{call_counter}] MODALITY OBJECTS SENT TO LM:',
1254
1341
  color='green',
@@ -1334,9 +1421,9 @@ class LanguageModel(component.Component):
1334
1421
  color='green',
1335
1422
  )
1336
1423
  if isinstance(prompt, list):
1337
- referred_modalities_lst = [p.referred_modalities() for p in prompt]
1424
+ referred_modalities_lst = [p.referred_modalities for p in prompt]
1338
1425
  else:
1339
- referred_modalities_lst = [prompt.referred_modalities(),]
1426
+ referred_modalities_lst = [prompt.referred_modalities,]
1340
1427
  if referred_modalities_lst:
1341
1428
  for referred_modalities in referred_modalities_lst:
1342
1429
  console.write(
@@ -1411,7 +1498,7 @@ class LanguageModel(component.Component):
1411
1498
  title=f'\n[{call_counter}] PROMPT TO TOKENIZE:',
1412
1499
  color='green',
1413
1500
  )
1414
- referred_modalities_lst = [prompt.referred_modalities(),]
1501
+ referred_modalities_lst = [prompt.referred_modalities,]
1415
1502
  if referred_modalities_lst:
1416
1503
  for referred_modalities in referred_modalities_lst:
1417
1504
  console.write(
@@ -1439,7 +1526,7 @@ class LanguageModel(component.Component):
1439
1526
  max_requests_per_minute: int | None,
1440
1527
  average_tokens_per_request: int = 250
1441
1528
  ) -> int | None:
1442
- """Estimates max concurrency concurrency based on the rate limits."""
1529
+ """Estimates max concurrency based on the rate limits."""
1443
1530
  # NOTE(daiyip): max concurrency is estimated based on the rate limit.
1444
1531
  # We assume each request has approximately 250 tokens, and each request
1445
1532
  # takes 1 second to complete. This might not be accurate for all models.
@@ -1512,7 +1599,7 @@ class _ConcurrencyControl:
1512
1599
 
1513
1600
 
1514
1601
  class UsageSummary(pg.Object, pg.views.HtmlTreeView.Extension):
1515
- """Usage sumary."""
1602
+ """Usage summary."""
1516
1603
 
1517
1604
  class AggregatedUsage(pg.Object):
1518
1605
  """Aggregated usage."""
@@ -656,11 +656,17 @@ class LanguageModelTest(unittest.TestCase):
656
656
 
657
657
  string_io = io.StringIO()
658
658
  lm = MockModel(sampling_options=lm_lib.LMSamplingOptions(top_k=1))
659
+ image = Image()
659
660
  with contextlib.redirect_stdout(string_io):
660
661
  self.assertEqual(
661
- lm(message_lib.UserMessage(
662
- 'hi <<[[image]]>>', image=Image()), debug=True),
663
- 'hi <<[[image]]>>'
662
+ lm(
663
+ message_lib.UserMessage(
664
+ f'hi <<[[{image.id}]]>>',
665
+ referred_modalities=[image],
666
+ ),
667
+ debug=True
668
+ ),
669
+ f'hi <<[[{image.id}]]>>'
664
670
  )
665
671
 
666
672
  debug_info = string_io.getvalue()
@@ -30,7 +30,8 @@ from langfun.core.llms.compositional import RandomChoice
30
30
 
31
31
  # Base models by request/response protocol.
32
32
  from langfun.core.llms.rest import REST
33
- from langfun.core.llms.openai_compatible import OpenAICompatible
33
+ from langfun.core.llms.openai_compatible import OpenAIChatCompletionAPI
34
+ from langfun.core.llms.openai_compatible import OpenAIResponsesAPI
34
35
  from langfun.core.llms.gemini import Gemini
35
36
  from langfun.core.llms.anthropic import Anthropic
36
37
 
@@ -151,6 +152,9 @@ from langfun.core.llms.openai import Gpt35
151
152
 
152
153
  # Anthropic models.
153
154
 
155
+ from langfun.core.llms.anthropic import Claude45
156
+ from langfun.core.llms.anthropic import Claude45Haiku_20251001
157
+ from langfun.core.llms.anthropic import Claude45Sonnet_20250929
154
158
  from langfun.core.llms.anthropic import Claude4
155
159
  from langfun.core.llms.anthropic import Claude4Sonnet_20250514
156
160
  from langfun.core.llms.anthropic import Claude4Opus_20250514
@@ -168,6 +172,8 @@ from langfun.core.llms.anthropic import Claude3Haiku
168
172
  from langfun.core.llms.anthropic import Claude3Haiku_20240307
169
173
 
170
174
  from langfun.core.llms.vertexai import VertexAIAnthropic
175
+ from langfun.core.llms.vertexai import VertexAIClaude45Haiku_20251001
176
+ from langfun.core.llms.vertexai import VertexAIClaude45Sonnet_20250929
171
177
  from langfun.core.llms.vertexai import VertexAIClaude4Opus_20250514
172
178
  from langfun.core.llms.vertexai import VertexAIClaude4Sonnet_20250514
173
179
  from langfun.core.llms.vertexai import VertexAIClaude37Sonnet_20250219
@@ -59,6 +59,60 @@ class AnthropicModelInfo(lf.ModelInfo):
59
59
 
60
60
 
61
61
  SUPPORTED_MODELS = [
62
+ AnthropicModelInfo(
63
+ model_id='claude-haiku-4-5-20251001',
64
+ provider='Anthropic',
65
+ in_service=True,
66
+ description='Claude 4.5 Haiku model (10/15/2025).',
67
+ release_date=datetime.datetime(2025, 10, 15),
68
+ input_modalities=(
69
+ AnthropicModelInfo.INPUT_IMAGE_TYPES
70
+ + AnthropicModelInfo.INPUT_DOC_TYPES
71
+ ),
72
+ context_length=lf.ModelInfo.ContextLength(
73
+ max_input_tokens=200_000,
74
+ max_output_tokens=64_000,
75
+ ),
76
+ pricing=lf.ModelInfo.Pricing(
77
+ cost_per_1m_cached_input_tokens=0.1,
78
+ cost_per_1m_input_tokens=1,
79
+ cost_per_1m_output_tokens=5,
80
+ ),
81
+ rate_limits=AnthropicModelInfo.RateLimits(
82
+ # Tier 4 rate limits
83
+ max_requests_per_minute=4000,
84
+ max_input_tokens_per_minute=4_000_000,
85
+ max_output_tokens_per_minute=800_000,
86
+ ),
87
+ ),
88
+ AnthropicModelInfo(
89
+ model_id='claude-sonnet-4-5-20250929',
90
+ provider='Anthropic',
91
+ in_service=True,
92
+ description='Claude 4.5 Sonnet model (9/29/2025).',
93
+ release_date=datetime.datetime(2025, 9, 29),
94
+ input_modalities=(
95
+ AnthropicModelInfo.INPUT_IMAGE_TYPES
96
+ + AnthropicModelInfo.INPUT_DOC_TYPES
97
+ ),
98
+ context_length=lf.ModelInfo.ContextLength(
99
+ max_input_tokens=200_000,
100
+ max_output_tokens=64_000,
101
+ ),
102
+ pricing=lf.ModelInfo.Pricing(
103
+ cost_per_1m_cached_input_tokens=0.3,
104
+ cost_per_1m_input_tokens=3,
105
+ cost_per_1m_output_tokens=15,
106
+ ),
107
+ rate_limits=AnthropicModelInfo.RateLimits(
108
+ # Tier 4 rate limits
109
+ # This rate limit is a total limit that applies to combined traffic
110
+ # across both Sonnet 4 and Sonnet 4.5.
111
+ max_requests_per_minute=4000,
112
+ max_input_tokens_per_minute=2_000_000,
113
+ max_output_tokens_per_minute=400_000,
114
+ ),
115
+ ),
62
116
  AnthropicModelInfo(
63
117
  model_id='claude-4-opus-20250514',
64
118
  provider='Anthropic',
@@ -190,6 +244,62 @@ SUPPORTED_MODELS = [
190
244
  max_output_tokens_per_minute=80_000,
191
245
  ),
192
246
  ),
247
+ AnthropicModelInfo(
248
+ model_id='claude-haiku-4-5@20251001',
249
+ alias_for='claude-haiku-4-5-20251001',
250
+ provider='VertexAI',
251
+ in_service=True,
252
+ description='Claude 4.5 Haiku model served on VertexAI (10/15/2025).',
253
+ release_date=datetime.datetime(2025, 10, 15),
254
+ input_modalities=(
255
+ AnthropicModelInfo.INPUT_IMAGE_TYPES
256
+ + AnthropicModelInfo.INPUT_DOC_TYPES
257
+ ),
258
+ context_length=lf.ModelInfo.ContextLength(
259
+ max_input_tokens=200_000,
260
+ max_output_tokens=64_000,
261
+ ),
262
+ pricing=lf.ModelInfo.Pricing(
263
+ # For global endpoint
264
+ cost_per_1m_cached_input_tokens=0.1,
265
+ cost_per_1m_input_tokens=1,
266
+ cost_per_1m_output_tokens=5,
267
+ ),
268
+ rate_limits=AnthropicModelInfo.RateLimits(
269
+ # For global endpoint
270
+ max_requests_per_minute=2500,
271
+ max_input_tokens_per_minute=200_000,
272
+ max_output_tokens_per_minute=0,
273
+ ),
274
+ ),
275
+ AnthropicModelInfo(
276
+ model_id='claude-sonnet-4-5@20250929',
277
+ alias_for='claude-sonnet-4-5-20250929',
278
+ provider='VertexAI',
279
+ in_service=True,
280
+ description='Claude 4.5 Sonnet model (9/29/2025).',
281
+ release_date=datetime.datetime(2025, 9, 29),
282
+ input_modalities=(
283
+ AnthropicModelInfo.INPUT_IMAGE_TYPES
284
+ + AnthropicModelInfo.INPUT_DOC_TYPES
285
+ ),
286
+ context_length=lf.ModelInfo.ContextLength(
287
+ max_input_tokens=200_000,
288
+ max_output_tokens=64_000,
289
+ ),
290
+ pricing=lf.ModelInfo.Pricing(
291
+ # For global endpoint
292
+ cost_per_1m_cached_input_tokens=0.3,
293
+ cost_per_1m_input_tokens=3,
294
+ cost_per_1m_output_tokens=15,
295
+ ),
296
+ rate_limits=AnthropicModelInfo.RateLimits(
297
+ # For global endpoint
298
+ max_requests_per_minute=1500,
299
+ max_input_tokens_per_minute=200_000,
300
+ max_output_tokens_per_minute=0,
301
+ ),
302
+ ),
193
303
  AnthropicModelInfo(
194
304
  model_id='claude-opus-4@20250514',
195
305
  alias_for='claude-opus-4-20250514',
@@ -540,9 +650,34 @@ _SUPPORTED_MODELS_BY_MODEL_ID = {m.model_id: m for m in SUPPORTED_MODELS}
540
650
 
541
651
  @lf.use_init_args(['model'])
542
652
  class Anthropic(rest.REST):
543
- """Anthropic LLMs (Claude) through REST APIs.
653
+ """Anthropic Claude models.
654
+
655
+ **Quick Start:**
656
+
657
+ ```python
658
+ import langfun as lf
659
+
660
+ # Call Claude 3.5 Sonnet using API key from environment variable
661
+ # 'ANTHROPIC_API_KEY'.
662
+ lm = lf.llms.Claude35Sonnet()
663
+ r = lm('Who are you?')
664
+ print(r)
665
+ ```
666
+
667
+ **Setting up API key:**
668
+
669
+ The Anthropic API key can be specified in following ways:
670
+
671
+ 1. At model instantiation:
672
+
673
+ ```python
674
+ lm = lf.llms.Claude35Sonnet(api_key='MY_API_KEY')
675
+
676
+ 2. via environment variable `ANTHROPIC_API_KEY`.
544
677
 
545
- See https://docs.anthropic.com/claude/reference/messages_post
678
+ **References:**
679
+
680
+ * https://docs.anthropic.com/claude/reference/messages_post
546
681
  """
547
682
 
548
683
  model: pg.typing.Annotated[
@@ -658,6 +793,8 @@ class Anthropic(rest.REST):
658
793
  args.pop('temperature', None)
659
794
  args.pop('top_k', None)
660
795
  args.pop('top_p', None)
796
+ if options.extras:
797
+ args.update(options.extras)
661
798
  return args
662
799
 
663
800
  def result(self, json: dict[str, Any]) -> lf.LMSamplingResult:
@@ -679,6 +816,24 @@ class Anthropic(rest.REST):
679
816
  return super()._error(status_code, content)
680
817
 
681
818
 
819
+ class Claude45(Anthropic):
820
+ """Base class for Claude 4.5 models."""
821
+
822
+
823
+ # pylint: disable=invalid-name
824
+ class Claude45Haiku_20251001(Claude45):
825
+ """Claude 4.5 Haiku model 20251001."""
826
+
827
+ model = 'claude-haiku-4-5-20251001'
828
+
829
+
830
+ # pylint: disable=invalid-name
831
+ class Claude45Sonnet_20250929(Claude45):
832
+ """Claude 4.5 Sonnet model 20250929."""
833
+
834
+ model = 'claude-sonnet-4-5-20250929'
835
+
836
+
682
837
  class Claude4(Anthropic):
683
838
  """Base class for Claude 4 models."""
684
839
 
@@ -23,23 +23,35 @@ import pyglove as pg
23
23
  @lf.use_init_args(['model', 'deployment_name'])
24
24
  @pg.members([('api_endpoint', pg.typing.Str().freeze(''))])
25
25
  class AzureOpenAI(openai.OpenAI):
26
- """Azure OpenAI model service.
27
-
28
- This service interacts with the Azure OpenAI API to generate chat completions.
29
- It uses the deployment_name and API version to construct the endpoint, and
30
- authenticates using an API key provided via parameter or the
31
- AZURE_OPENAI_API_KEY environment variable.
32
-
33
- Example:
34
- lm = AzureOpenAI(
35
- model='gpt-4o',
36
- deployment_name='gpt-4o',
37
- api_version='2024-08-01-preview',
38
- azure_endpoint='https://trackname.openai.azure.com/',
39
- api_key='token'
40
- )
41
- response = lf.query(prompt="what the capital of France", lm=lm)
42
- print(response)
26
+ """Azure OpenAI models.
27
+
28
+ **Quick Start:**
29
+
30
+ ```python
31
+ import langfun as lf
32
+
33
+ # Call GPT-4o on Azure using API key from environment variable
34
+ # 'AZURE_OPENAI_API_KEY'.
35
+ lm = lf.llms.AzureOpenAI(
36
+ model='gpt-4o',
37
+ deployment_name='my-gpt4o-deployment',
38
+ api_version='2024-08-01-preview',
39
+ azure_endpoint='https://my-resource.openai.azure.com/',
40
+ )
41
+ r = lm('Who are you?')
42
+ print(r)
43
+ ```
44
+
45
+ **Setting up API key:**
46
+
47
+ The Azure OpenAI API key can be specified in following ways:
48
+
49
+ 1. At model instantiation:
50
+
51
+ ```python
52
+ lm = lf.llms.AzureOpenAI(..., api_key='MY_API_KEY')
53
+ ```
54
+ 2. via environment variable `AZURE_OPENAI_API_KEY`.
43
55
  """
44
56
 
45
57
  deployment_name: Annotated[
@@ -22,13 +22,33 @@ import langfun.core as lf
22
22
 
23
23
  @dataclasses.dataclass(frozen=True)
24
24
  class LMCacheEntry:
25
- """LM cache entry."""
25
+ """Represents a single entry in the language model cache.
26
+
27
+ An `LMCacheEntry` stores the result of a language model sampling operation
28
+ and an optional expiration timestamp.
29
+ """
26
30
  result: lf.LMSamplingResult
27
31
  expire: datetime.datetime | None = None
28
32
 
29
33
 
30
34
  class LMCacheBase(lf.LMCache):
31
- """The common LMCache base."""
35
+ """Base class for language model cache implementations.
36
+
37
+ `LMCacheBase` provides the core logic for a key-value based cache,
38
+ handling key generation, expiration (TTL), and statistics tracking.
39
+ Subclasses must implement the abstract methods `_get`, `_put`, and `_delete`
40
+ to provide the specific storage mechanism (e.g., in-memory, file-based).
41
+
42
+ **Key Features:**
43
+
44
+ * **Customizable Keying**: Allows specifying a custom function to generate
45
+ cache keys based on the language model, prompt, and seed. If not provided,
46
+ a default key based on prompt text, sampling options, and seed is used.
47
+ * **Time-to-Live (TTL)**: Supports setting an expiration time for cache
48
+ entries, after which they are considered invalid and removed upon access.
49
+ * **Cache Statistics**: Tracks metrics like hits, misses, updates,
50
+ deletions, and expired hits through the `stats` property.
51
+ """
32
52
 
33
53
  key: Annotated[
34
54
  Callable[[lf.LanguageModel, lf.Message, int], Any] | None,
@@ -121,4 +141,6 @@ class LMCacheBase(lf.LMCache):
121
141
 
122
142
  def default_key(lm: lf.LanguageModel, prompt: lf.Message, seed: int) -> Any:
123
143
  """Default key for LM cache."""
124
- return (prompt.text_with_modality_hash, lm.sampling_options.cache_key(), seed)
144
+ # prompt text already contains the modality id for referenced modality
145
+ # objects, so no need to include them in the key.
146
+ return (prompt.text, lm.sampling_options.cache_key(), seed)
@@ -24,7 +24,32 @@ import pyglove as pg
24
24
 
25
25
  @pg.use_init_args(['filename', 'ttl', 'key'])
26
26
  class InMemory(base.LMCacheBase):
27
- """In memory cache."""
27
+ """An in-memory cache for language model lookups.
28
+
29
+ `InMemory` stores LM prompts and their corresponding responses in memory,
30
+ providing a simple and fast caching mechanism for a single session.
31
+ Optionally, it can persist the cache to a JSON file on disk, allowing
32
+ results to be reused across sessions.
33
+
34
+ When a filename is provided, the cache will be loaded from the file upon
35
+ initialization and saved to the file when `save()` is called. This is
36
+ useful for caching results in interactive environments like Colab or
37
+ when running batch jobs.
38
+
39
+ Example:
40
+
41
+ ```python
42
+ import langfun as lf
43
+ # Using in-memory cache without persistence
44
+ lm = lf.llms.GeminiPro(cache=lf.llms.cache.InMemory())
45
+ r = lm.query('hello')
46
+
47
+ # Using in-memory cache with persistence
48
+ lm = lf.llms.GeminiPro(cache=lf.llms.cache.InMemory('cache.json'))
49
+ r = lm.query('hello')
50
+ lm.cache.save()
51
+ ```
52
+ """
28
53
 
29
54
  filename: Annotated[
30
55
  str | None,
@@ -144,17 +169,33 @@ class InMemory(base.LMCacheBase):
144
169
 
145
170
  @contextlib.contextmanager
146
171
  def lm_cache(filename: str | None = None) -> Iterator[InMemory]:
147
- """Context manager to enable cache for LMs under the context.
172
+ """Context manager to enable in-memory cache for LMs in the current context.
173
+
174
+ This context manager sets an `InMemory` cache as the default cache for
175
+ any Langfun language model instantiated within its scope, unless a model
176
+ is explicitly configured with a different cache.
177
+
178
+ If a `filename` is provided, the cache will be loaded from the specified
179
+ file at the beginning of the context and automatically saved back to the
180
+ file upon exiting the context. This is a convenient way to manage
181
+ persistent caching for a block of code.
182
+
183
+ Example:
148
184
 
149
- If LMs under the context manager have explicitly specified cache, they will
150
- use their own cache. Otherwise they will use the cache created by the context
151
- manager.
185
+ ```python
186
+ import langfun as lf
187
+ with lf.lm_cache('my_cache.json'):
188
+ # LMs created here will use 'my_cache.json' for caching.
189
+ lm = lf.llms.GeminiPro()
190
+ print(lm.query('hello'))
191
+ ```
152
192
 
153
193
  Args:
154
- filename: If not None, JSON file to load and save the cache.
194
+ filename: If provided, specifies the JSON file for loading and saving
195
+ the cache.
155
196
 
156
197
  Yields:
157
- A cache object created.
198
+ The `InMemory` cache instance created for this context.
158
199
  """
159
200
  cache = InMemory(filename)
160
201
  try:
@@ -175,18 +175,28 @@ class InMemoryLMCacheTest(unittest.TestCase):
175
175
 
176
176
  cache = in_memory.InMemory()
177
177
  lm = fake.StaticSequence(['1', '2', '3', '4', '5', '6'], cache=cache)
178
- lm(lf.UserMessage('hi <<[[image]]>>', image=CustomModality('foo')))
179
- lm(lf.UserMessage('hi <<[[image]]>>', image=CustomModality('bar')))
178
+ image_foo = CustomModality('foo')
179
+ image_bar = CustomModality('bar')
180
+ lm(
181
+ lf.UserMessage(
182
+ f'hi <<[[{image_foo.id}]]>>', referred_modalities=[image_foo]
183
+ )
184
+ )
185
+ lm(
186
+ lf.UserMessage(
187
+ f'hi <<[[{image_bar.id}]]>>', referred_modalities=[image_bar]
188
+ )
189
+ )
180
190
  self.assertEqual(
181
191
  list(cache.keys()),
182
192
  [
183
193
  (
184
- 'hi <<[[image]]>><image>acbd18db</image>',
194
+ f'hi <<[[{image_foo.id}]]>>',
185
195
  (None, None, 1, 40, None, None),
186
196
  0,
187
197
  ),
188
198
  (
189
- 'hi <<[[image]]>><image>37b51d19</image>',
199
+ f'hi <<[[{image_bar.id}]]>>',
190
200
  (None, None, 1, 40, None, None),
191
201
  0,
192
202
  ),
@@ -21,7 +21,31 @@ import pyglove as pg
21
21
 
22
22
  @pg.use_init_args(['candidates', 'seed'])
23
23
  class RandomChoice(lf.LanguageModel):
24
- """Random choice of a list of LLM models."""
24
+ """A composite language model that randomly selects from a list of candidates.
25
+
26
+ `RandomChoice` acts as a proxy that forwards each request (`sample`, `score`,
27
+ `tokenize`, or `__call__`) to one of the `candidates` selected randomly.
28
+ This can be useful for load balancing across multiple LLM endpoints,
29
+ for A/B testing different models, or for ensembling model outputs
30
+ by calling it multiple times.
31
+
32
+ The selection is determined by the provided `seed`, ensuring reproducibility
33
+ if needed.
34
+
35
+ Example:
36
+
37
+ ```python
38
+ import langfun as lf
39
+
40
+ lm = lf.llms.RandomChoice([
41
+ lf.llms.GeminiPro(),
42
+ lf.llms.GPT4(),
43
+ ])
44
+
45
+ # This call will be handled by either GeminiPro or GPT4, chosen randomly.
46
+ r = lm.sample('hello')
47
+ ```
48
+ """
25
49
 
26
50
  candidates: Annotated[
27
51
  list[lf.LanguageModel],