langfun 0.1.2.dev202510230805__py3-none-any.whl → 0.1.2.dev202511270805__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of langfun might be problematic. Click here for more details.
- langfun/core/__init__.py +2 -0
- langfun/core/agentic/__init__.py +4 -1
- langfun/core/agentic/action.py +447 -29
- langfun/core/agentic/action_eval.py +9 -2
- langfun/core/agentic/action_test.py +149 -21
- langfun/core/async_support.py +32 -3
- langfun/core/coding/python/correction.py +19 -9
- langfun/core/coding/python/execution.py +14 -12
- langfun/core/coding/python/generation.py +21 -16
- langfun/core/coding/python/sandboxing.py +23 -3
- langfun/core/component.py +42 -3
- langfun/core/concurrent.py +70 -6
- langfun/core/concurrent_test.py +1 -0
- langfun/core/console.py +1 -1
- langfun/core/data/conversion/anthropic.py +12 -3
- langfun/core/data/conversion/anthropic_test.py +8 -6
- langfun/core/data/conversion/gemini.py +9 -2
- langfun/core/data/conversion/gemini_test.py +12 -9
- langfun/core/data/conversion/openai.py +145 -31
- langfun/core/data/conversion/openai_test.py +161 -17
- langfun/core/eval/base.py +47 -43
- langfun/core/eval/base_test.py +5 -5
- langfun/core/eval/matching.py +5 -2
- langfun/core/eval/patching.py +3 -3
- langfun/core/eval/scoring.py +4 -3
- langfun/core/eval/v2/__init__.py +1 -0
- langfun/core/eval/v2/checkpointing.py +64 -6
- langfun/core/eval/v2/checkpointing_test.py +9 -2
- langfun/core/eval/v2/eval_test_helper.py +103 -2
- langfun/core/eval/v2/evaluation.py +91 -16
- langfun/core/eval/v2/evaluation_test.py +9 -3
- langfun/core/eval/v2/example.py +50 -40
- langfun/core/eval/v2/example_test.py +16 -8
- langfun/core/eval/v2/experiment.py +74 -8
- langfun/core/eval/v2/experiment_test.py +19 -0
- langfun/core/eval/v2/metric_values.py +31 -3
- langfun/core/eval/v2/metric_values_test.py +32 -0
- langfun/core/eval/v2/metrics.py +157 -44
- langfun/core/eval/v2/metrics_test.py +39 -18
- langfun/core/eval/v2/progress.py +30 -1
- langfun/core/eval/v2/progress_test.py +27 -0
- langfun/core/eval/v2/progress_tracking.py +12 -3
- langfun/core/eval/v2/progress_tracking_test.py +6 -1
- langfun/core/eval/v2/reporting.py +90 -71
- langfun/core/eval/v2/reporting_test.py +24 -6
- langfun/core/eval/v2/runners/__init__.py +30 -0
- langfun/core/eval/v2/{runners.py → runners/base.py} +59 -142
- langfun/core/eval/v2/runners/beam.py +341 -0
- langfun/core/eval/v2/runners/beam_test.py +131 -0
- langfun/core/eval/v2/runners/ckpt_monitor.py +294 -0
- langfun/core/eval/v2/runners/ckpt_monitor_test.py +162 -0
- langfun/core/eval/v2/runners/debug.py +40 -0
- langfun/core/eval/v2/runners/debug_test.py +76 -0
- langfun/core/eval/v2/runners/parallel.py +100 -0
- langfun/core/eval/v2/runners/parallel_test.py +95 -0
- langfun/core/eval/v2/runners/sequential.py +47 -0
- langfun/core/eval/v2/runners/sequential_test.py +172 -0
- langfun/core/langfunc.py +45 -130
- langfun/core/langfunc_test.py +7 -5
- langfun/core/language_model.py +141 -21
- langfun/core/language_model_test.py +54 -3
- langfun/core/llms/__init__.py +9 -1
- langfun/core/llms/anthropic.py +157 -2
- langfun/core/llms/azure_openai.py +29 -17
- langfun/core/llms/cache/base.py +25 -3
- langfun/core/llms/cache/in_memory.py +48 -7
- langfun/core/llms/cache/in_memory_test.py +14 -4
- langfun/core/llms/compositional.py +25 -1
- langfun/core/llms/deepseek.py +30 -2
- langfun/core/llms/fake.py +32 -1
- langfun/core/llms/gemini.py +55 -17
- langfun/core/llms/gemini_test.py +84 -0
- langfun/core/llms/google_genai.py +34 -1
- langfun/core/llms/groq.py +28 -3
- langfun/core/llms/llama_cpp.py +23 -4
- langfun/core/llms/openai.py +36 -3
- langfun/core/llms/openai_compatible.py +148 -27
- langfun/core/llms/openai_compatible_test.py +207 -20
- langfun/core/llms/openai_test.py +0 -2
- langfun/core/llms/rest.py +12 -1
- langfun/core/llms/vertexai.py +58 -8
- langfun/core/logging.py +1 -1
- langfun/core/mcp/client.py +77 -22
- langfun/core/mcp/client_test.py +8 -35
- langfun/core/mcp/session.py +94 -29
- langfun/core/mcp/session_test.py +54 -0
- langfun/core/mcp/tool.py +151 -22
- langfun/core/mcp/tool_test.py +197 -0
- langfun/core/memory.py +1 -0
- langfun/core/message.py +160 -55
- langfun/core/message_test.py +65 -81
- langfun/core/modalities/__init__.py +8 -0
- langfun/core/modalities/audio.py +21 -1
- langfun/core/modalities/image.py +19 -1
- langfun/core/modalities/mime.py +64 -3
- langfun/core/modalities/mime_test.py +11 -0
- langfun/core/modalities/pdf.py +19 -1
- langfun/core/modalities/video.py +21 -1
- langfun/core/modality.py +167 -29
- langfun/core/modality_test.py +42 -12
- langfun/core/natural_language.py +1 -1
- langfun/core/sampling.py +4 -4
- langfun/core/sampling_test.py +20 -4
- langfun/core/structured/__init__.py +2 -24
- langfun/core/structured/completion.py +34 -44
- langfun/core/structured/completion_test.py +23 -43
- langfun/core/structured/description.py +54 -50
- langfun/core/structured/function_generation.py +29 -12
- langfun/core/structured/mapping.py +81 -37
- langfun/core/structured/parsing.py +95 -79
- langfun/core/structured/parsing_test.py +0 -3
- langfun/core/structured/querying.py +215 -142
- langfun/core/structured/querying_test.py +65 -29
- langfun/core/structured/schema/__init__.py +49 -0
- langfun/core/structured/schema/base.py +664 -0
- langfun/core/structured/schema/base_test.py +531 -0
- langfun/core/structured/schema/json.py +174 -0
- langfun/core/structured/schema/json_test.py +121 -0
- langfun/core/structured/schema/python.py +316 -0
- langfun/core/structured/schema/python_test.py +410 -0
- langfun/core/structured/schema_generation.py +33 -14
- langfun/core/structured/scoring.py +47 -36
- langfun/core/structured/tokenization.py +26 -11
- langfun/core/subscription.py +2 -2
- langfun/core/template.py +174 -49
- langfun/core/template_test.py +123 -17
- langfun/env/__init__.py +8 -2
- langfun/env/base_environment.py +320 -128
- langfun/env/base_environment_test.py +473 -0
- langfun/env/base_feature.py +92 -15
- langfun/env/base_feature_test.py +228 -0
- langfun/env/base_sandbox.py +84 -361
- langfun/env/base_sandbox_test.py +1235 -0
- langfun/env/event_handlers/__init__.py +1 -1
- langfun/env/event_handlers/chain.py +233 -0
- langfun/env/event_handlers/chain_test.py +253 -0
- langfun/env/event_handlers/event_logger.py +95 -98
- langfun/env/event_handlers/event_logger_test.py +21 -21
- langfun/env/event_handlers/metric_writer.py +225 -140
- langfun/env/event_handlers/metric_writer_test.py +23 -6
- langfun/env/interface.py +854 -40
- langfun/env/interface_test.py +112 -2
- langfun/env/load_balancers_test.py +23 -2
- langfun/env/test_utils.py +126 -84
- {langfun-0.1.2.dev202510230805.dist-info → langfun-0.1.2.dev202511270805.dist-info}/METADATA +1 -1
- langfun-0.1.2.dev202511270805.dist-info/RECORD +215 -0
- langfun/core/eval/v2/runners_test.py +0 -343
- langfun/core/structured/schema.py +0 -987
- langfun/core/structured/schema_test.py +0 -982
- langfun/env/base_test.py +0 -1481
- langfun/env/event_handlers/base.py +0 -350
- langfun-0.1.2.dev202510230805.dist-info/RECORD +0 -195
- {langfun-0.1.2.dev202510230805.dist-info → langfun-0.1.2.dev202511270805.dist-info}/WHEEL +0 -0
- {langfun-0.1.2.dev202510230805.dist-info → langfun-0.1.2.dev202511270805.dist-info}/licenses/LICENSE +0 -0
- {langfun-0.1.2.dev202510230805.dist-info → langfun-0.1.2.dev202511270805.dist-info}/top_level.txt +0 -0
langfun/core/language_model.py
CHANGED
|
@@ -53,6 +53,10 @@ class RetryableLMError(LMError):
|
|
|
53
53
|
"""Base class for LLM errors that can be solved by retrying."""
|
|
54
54
|
|
|
55
55
|
|
|
56
|
+
class EmptyGenerationError(RetryableLMError):
|
|
57
|
+
"""Error for empty generaition."""
|
|
58
|
+
|
|
59
|
+
|
|
56
60
|
class RateLimitError(RetryableLMError):
|
|
57
61
|
"""Error for rate limit reached."""
|
|
58
62
|
|
|
@@ -478,7 +482,7 @@ class UsageNotAvailable(LMSamplingUsage):
|
|
|
478
482
|
|
|
479
483
|
|
|
480
484
|
class LMSamplingResult(pg.Object):
|
|
481
|
-
"""
|
|
485
|
+
"""The result from a language model sampling."""
|
|
482
486
|
|
|
483
487
|
samples: Annotated[
|
|
484
488
|
list[LMSample],
|
|
@@ -575,6 +579,14 @@ class LMSamplingOptions(component.Component):
|
|
|
575
579
|
int | None, 'Number of max thinking tokens.'
|
|
576
580
|
] = None
|
|
577
581
|
|
|
582
|
+
thinking_level: Annotated[
|
|
583
|
+
Literal['low', 'high'] | None,
|
|
584
|
+
(
|
|
585
|
+
'Thinking level for Gemini models. High is for complex tasks, '
|
|
586
|
+
'while low is for faster responses.'
|
|
587
|
+
),
|
|
588
|
+
] = None
|
|
589
|
+
|
|
578
590
|
reasoning_effort: Annotated[
|
|
579
591
|
Literal['low', 'medium', 'high'] | None,
|
|
580
592
|
(
|
|
@@ -584,6 +596,15 @@ class LMSamplingOptions(component.Component):
|
|
|
584
596
|
),
|
|
585
597
|
] = None
|
|
586
598
|
|
|
599
|
+
extras: Annotated[
|
|
600
|
+
dict[str, Any],
|
|
601
|
+
(
|
|
602
|
+
'Extra arguments (e.g. configuration for tool calls) to pass to '
|
|
603
|
+
'the model. This is model-specific, please check model '
|
|
604
|
+
'implementation to see how to use this.'
|
|
605
|
+
),
|
|
606
|
+
] = {}
|
|
607
|
+
|
|
587
608
|
def cache_key(self) -> tuple[Any, ...]:
|
|
588
609
|
"""Returns a tuple of current values as cache key."""
|
|
589
610
|
return (
|
|
@@ -672,13 +693,91 @@ class LMDebugMode(enum.IntFlag):
|
|
|
672
693
|
|
|
673
694
|
|
|
674
695
|
class LanguageModel(component.Component):
|
|
675
|
-
"""Interface
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
696
|
+
"""Interface for language model.
|
|
697
|
+
|
|
698
|
+
`lf.LanguageModel` is the cornerstone of Langfun, providing a consistent
|
|
699
|
+
interface for interacting with various language models, such as those from
|
|
700
|
+
Google, OpenAI, Anthropic, and more. It abstracts away provider-specific
|
|
701
|
+
details, allowing users to switch between models seamlessly.
|
|
702
|
+
|
|
703
|
+
All language models in Langfun can be accessed via `lf.llms`. For example,
|
|
704
|
+
`lf.llms.Gpt4()` creates an instance for OpenAI's GPT-4, and
|
|
705
|
+
`lf.llms.GeminiPro()` creates an instance for Google's Gemini Pro.
|
|
706
|
+
|
|
707
|
+
**Key Features:**
|
|
708
|
+
|
|
709
|
+
* **Unified API**: Provides `sample`, `score`, and `tokenize` methods
|
|
710
|
+
across all supported models.
|
|
711
|
+
* **Sampling**: The `__call__` method and `sample` method allow generating
|
|
712
|
+
text completions or chat responses.
|
|
713
|
+
* **Scoring**: The `score` method computes the likelihood of completions
|
|
714
|
+
given a prompt.
|
|
715
|
+
* **Tokenization**: The `tokenize` method breaks text into tokens
|
|
716
|
+
according to the model's tokenizer.
|
|
717
|
+
* **Caching**: Built-in support for caching LLM requests to save cost and
|
|
718
|
+
time via the `cache` attribute.
|
|
719
|
+
* **Concurrency**: Manages concurrency to respect API rate limits via
|
|
720
|
+
`max_concurrency`.
|
|
721
|
+
* **Retries**: Automatic retries with exponential backoff for transient
|
|
722
|
+
errors via `max_attempts` and `retry_interval`.
|
|
723
|
+
|
|
724
|
+
**1. Creating a Language Model:**
|
|
725
|
+
You can create a language model by instantiating its class or by using
|
|
726
|
+
`lf.LanguageModel.get`:
|
|
727
|
+
|
|
728
|
+
```python
|
|
729
|
+
# Direct instantiation
|
|
730
|
+
gpt4 = lf.llms.Gpt4()
|
|
731
|
+
gemini = lf.llms.GeminiPro()
|
|
732
|
+
|
|
733
|
+
# Creation via lf.LanguageModel.get()
|
|
734
|
+
gpt4 = lf.LanguageModel.get('gpt-4')
|
|
735
|
+
```
|
|
736
|
+
|
|
737
|
+
**2. Customizing Sampling Options:**
|
|
738
|
+
Sampling options like `temperature`, `max_tokens`, etc., can be customized
|
|
739
|
+
at model creation, or overridden at call time or via `lf.context`.
|
|
740
|
+
|
|
741
|
+
```python
|
|
742
|
+
# Set temperature to 0 at model creation
|
|
743
|
+
lm = lf.llms.Gpt4(temperature=0.0)
|
|
744
|
+
|
|
745
|
+
# Override temperature to 0.5 for a single call
|
|
746
|
+
response = lm('1 + 1 =', temperature=0.5)
|
|
747
|
+
|
|
748
|
+
# Override temperature to 1.0 using lf.context
|
|
749
|
+
with lf.context(temperature=1.0):
|
|
750
|
+
response = lm('1 + 1 =')
|
|
751
|
+
```
|
|
752
|
+
|
|
753
|
+
**3. Sampling:**
|
|
754
|
+
Use `lm()`, `lm.sample()`, or `lf.query()` to generate text:
|
|
755
|
+
|
|
756
|
+
```python
|
|
757
|
+
lm = lf.llms.Gpt4()
|
|
758
|
+
response = lm('1 + 1 =')
|
|
759
|
+
print(response.text)
|
|
760
|
+
# Output: 2
|
|
761
|
+
```
|
|
762
|
+
|
|
763
|
+
**4. Scoring:**
|
|
764
|
+
Use `lm.score()` to score completions:
|
|
765
|
+
|
|
766
|
+
```python
|
|
767
|
+
lm = lf.llms.Gpt4()
|
|
768
|
+
results = lm.score('Weather in SF is', completions=['sunny', 'cloudy'])
|
|
769
|
+
print(results[0].score)
|
|
770
|
+
# Output: -1.0
|
|
771
|
+
```
|
|
772
|
+
|
|
773
|
+
**5. Tokenization:**
|
|
774
|
+
Use `lm.tokenize()` to get tokens:
|
|
775
|
+
```python
|
|
776
|
+
lm = lf.llms.Gpt4()
|
|
777
|
+
tokens = lm.tokenize('hello world')
|
|
778
|
+
print(tokens)
|
|
779
|
+
# Output: [('hello', 15339), (' world', 1917)]
|
|
780
|
+
```
|
|
682
781
|
"""
|
|
683
782
|
|
|
684
783
|
sampling_options: LMSamplingOptions = LMSamplingOptions()
|
|
@@ -989,10 +1088,32 @@ class LanguageModel(component.Component):
|
|
|
989
1088
|
prompts = [message_lib.UserMessage.from_value(p) for p in prompts]
|
|
990
1089
|
|
|
991
1090
|
with component.context(override_attrs=True, **kwargs):
|
|
992
|
-
|
|
993
|
-
|
|
994
|
-
|
|
995
|
-
|
|
1091
|
+
|
|
1092
|
+
def _sample_with_retry():
|
|
1093
|
+
if self.cache is None:
|
|
1094
|
+
results = self._sample(prompts)
|
|
1095
|
+
else:
|
|
1096
|
+
results = self._sample_with_cache_lookup(prompts, cache_seed)
|
|
1097
|
+
|
|
1098
|
+
for i, result in enumerate(results):
|
|
1099
|
+
for sample in result.samples:
|
|
1100
|
+
if not sample.response.text:
|
|
1101
|
+
if self.cache is not None:
|
|
1102
|
+
self.cache.delete(self, prompts[i], seed=cache_seed)
|
|
1103
|
+
raise EmptyGenerationError(
|
|
1104
|
+
f'Empty generation encountered from model {self.model_id}.'
|
|
1105
|
+
)
|
|
1106
|
+
return results
|
|
1107
|
+
|
|
1108
|
+
retry_fn = concurrent.with_retry(
|
|
1109
|
+
_sample_with_retry,
|
|
1110
|
+
retry_on_errors=EmptyGenerationError,
|
|
1111
|
+
max_attempts=self.max_attempts,
|
|
1112
|
+
retry_interval=self.retry_interval,
|
|
1113
|
+
exponential_backoff=self.exponential_backoff,
|
|
1114
|
+
max_retry_interval=self.max_retry_interval,
|
|
1115
|
+
)
|
|
1116
|
+
results = retry_fn()
|
|
996
1117
|
|
|
997
1118
|
for prompt, result in zip(prompts, results):
|
|
998
1119
|
|
|
@@ -1001,7 +1122,6 @@ class LanguageModel(component.Component):
|
|
|
1001
1122
|
|
|
1002
1123
|
for sample in result.samples:
|
|
1003
1124
|
# Update metadata for response message.
|
|
1004
|
-
|
|
1005
1125
|
response = sample.response
|
|
1006
1126
|
response.metadata.score = sample.score
|
|
1007
1127
|
response.metadata.logprobs = sample.logprobs
|
|
@@ -1244,11 +1364,11 @@ class LanguageModel(component.Component):
|
|
|
1244
1364
|
title=f'\n[{call_counter}] PROMPT SENT TO LM{title_suffix}:',
|
|
1245
1365
|
color='green',
|
|
1246
1366
|
)
|
|
1247
|
-
|
|
1248
|
-
if referred_modalities:
|
|
1367
|
+
if prompt.referred_modalities:
|
|
1249
1368
|
console.write(
|
|
1250
1369
|
pg.object_utils.kvlist_str(
|
|
1251
|
-
[(k, repr(v), None)
|
|
1370
|
+
[(k, repr(v), None)
|
|
1371
|
+
for k, v in prompt.referred_modalities.items()]
|
|
1252
1372
|
),
|
|
1253
1373
|
title=f'\n[{call_counter}] MODALITY OBJECTS SENT TO LM:',
|
|
1254
1374
|
color='green',
|
|
@@ -1334,9 +1454,9 @@ class LanguageModel(component.Component):
|
|
|
1334
1454
|
color='green',
|
|
1335
1455
|
)
|
|
1336
1456
|
if isinstance(prompt, list):
|
|
1337
|
-
referred_modalities_lst = [p.referred_modalities
|
|
1457
|
+
referred_modalities_lst = [p.referred_modalities for p in prompt]
|
|
1338
1458
|
else:
|
|
1339
|
-
referred_modalities_lst = [prompt.referred_modalities
|
|
1459
|
+
referred_modalities_lst = [prompt.referred_modalities,]
|
|
1340
1460
|
if referred_modalities_lst:
|
|
1341
1461
|
for referred_modalities in referred_modalities_lst:
|
|
1342
1462
|
console.write(
|
|
@@ -1411,7 +1531,7 @@ class LanguageModel(component.Component):
|
|
|
1411
1531
|
title=f'\n[{call_counter}] PROMPT TO TOKENIZE:',
|
|
1412
1532
|
color='green',
|
|
1413
1533
|
)
|
|
1414
|
-
referred_modalities_lst = [prompt.referred_modalities
|
|
1534
|
+
referred_modalities_lst = [prompt.referred_modalities,]
|
|
1415
1535
|
if referred_modalities_lst:
|
|
1416
1536
|
for referred_modalities in referred_modalities_lst:
|
|
1417
1537
|
console.write(
|
|
@@ -1439,7 +1559,7 @@ class LanguageModel(component.Component):
|
|
|
1439
1559
|
max_requests_per_minute: int | None,
|
|
1440
1560
|
average_tokens_per_request: int = 250
|
|
1441
1561
|
) -> int | None:
|
|
1442
|
-
"""Estimates max concurrency
|
|
1562
|
+
"""Estimates max concurrency based on the rate limits."""
|
|
1443
1563
|
# NOTE(daiyip): max concurrency is estimated based on the rate limit.
|
|
1444
1564
|
# We assume each request has approximately 250 tokens, and each request
|
|
1445
1565
|
# takes 1 second to complete. This might not be accurate for all models.
|
|
@@ -1512,7 +1632,7 @@ class _ConcurrencyControl:
|
|
|
1512
1632
|
|
|
1513
1633
|
|
|
1514
1634
|
class UsageSummary(pg.Object, pg.views.HtmlTreeView.Extension):
|
|
1515
|
-
"""Usage
|
|
1635
|
+
"""Usage summary."""
|
|
1516
1636
|
|
|
1517
1637
|
class AggregatedUsage(pg.Object):
|
|
1518
1638
|
"""Aggregated usage."""
|
|
@@ -591,6 +591,51 @@ class LanguageModelTest(unittest.TestCase):
|
|
|
591
591
|
lm = MockModel(cache=cache, top_k=1)
|
|
592
592
|
self.assertEqual(lm('a'), 'a')
|
|
593
593
|
|
|
594
|
+
def test_empty_generation_error(self):
|
|
595
|
+
class MockModelWithEmptyResponse(MockModel):
|
|
596
|
+
def _sample(self,
|
|
597
|
+
prompts: list[message_lib.Message]
|
|
598
|
+
) -> list[lm_lib.LMSamplingResult]:
|
|
599
|
+
return [lm_lib.LMSamplingResult(
|
|
600
|
+
[lm_lib.LMSample(response='')],
|
|
601
|
+
usage=lm_lib.LMSamplingUsage(100, 0, 100, 1, 1.0)
|
|
602
|
+
)]
|
|
603
|
+
lm = MockModelWithEmptyResponse(max_attempts=1, retry_interval=0)
|
|
604
|
+
with self.assertRaisesRegex(
|
|
605
|
+
concurrent.RetryError, 'Empty generation encountered'
|
|
606
|
+
):
|
|
607
|
+
lm('a')
|
|
608
|
+
|
|
609
|
+
def test_empty_generation_retry(self):
|
|
610
|
+
class MockModelWithEmptyThenValid(MockModel):
|
|
611
|
+
attempt_count: int = 0
|
|
612
|
+
|
|
613
|
+
def _sample(
|
|
614
|
+
self, prompts: list[message_lib.Message]
|
|
615
|
+
) -> list[lm_lib.LMSamplingResult]:
|
|
616
|
+
self.rebind(attempt_count=self.attempt_count + 1)
|
|
617
|
+
if self.attempt_count == 1:
|
|
618
|
+
# First attempt returns empty
|
|
619
|
+
return [
|
|
620
|
+
lm_lib.LMSamplingResult(
|
|
621
|
+
[lm_lib.LMSample(response='')],
|
|
622
|
+
usage=lm_lib.LMSamplingUsage(100, 0, 100, 1, 1.0),
|
|
623
|
+
)
|
|
624
|
+
]
|
|
625
|
+
else:
|
|
626
|
+
# Subsequent attempts return valid response
|
|
627
|
+
return [
|
|
628
|
+
lm_lib.LMSamplingResult(
|
|
629
|
+
[lm_lib.LMSample(response='valid response')],
|
|
630
|
+
usage=lm_lib.LMSamplingUsage(100, 100, 200, 1, 1.0),
|
|
631
|
+
)
|
|
632
|
+
]
|
|
633
|
+
|
|
634
|
+
lm = MockModelWithEmptyThenValid(max_attempts=3, retry_interval=0)
|
|
635
|
+
result = lm('a')
|
|
636
|
+
self.assertEqual(result.text, 'valid response')
|
|
637
|
+
self.assertEqual(lm.attempt_count, 2)
|
|
638
|
+
|
|
594
639
|
def test_estimate_max_concurrency(self):
|
|
595
640
|
self.assertIsNone(lm_lib.LanguageModel.estimate_max_concurrency(None, None))
|
|
596
641
|
self.assertEqual(
|
|
@@ -656,11 +701,17 @@ class LanguageModelTest(unittest.TestCase):
|
|
|
656
701
|
|
|
657
702
|
string_io = io.StringIO()
|
|
658
703
|
lm = MockModel(sampling_options=lm_lib.LMSamplingOptions(top_k=1))
|
|
704
|
+
image = Image()
|
|
659
705
|
with contextlib.redirect_stdout(string_io):
|
|
660
706
|
self.assertEqual(
|
|
661
|
-
lm(
|
|
662
|
-
|
|
663
|
-
|
|
707
|
+
lm(
|
|
708
|
+
message_lib.UserMessage(
|
|
709
|
+
f'hi <<[[{image.id}]]>>',
|
|
710
|
+
referred_modalities=[image],
|
|
711
|
+
),
|
|
712
|
+
debug=True
|
|
713
|
+
),
|
|
714
|
+
f'hi <<[[{image.id}]]>>'
|
|
664
715
|
)
|
|
665
716
|
|
|
666
717
|
debug_info = string_io.getvalue()
|
langfun/core/llms/__init__.py
CHANGED
|
@@ -30,7 +30,8 @@ from langfun.core.llms.compositional import RandomChoice
|
|
|
30
30
|
|
|
31
31
|
# Base models by request/response protocol.
|
|
32
32
|
from langfun.core.llms.rest import REST
|
|
33
|
-
from langfun.core.llms.openai_compatible import
|
|
33
|
+
from langfun.core.llms.openai_compatible import OpenAIChatCompletionAPI
|
|
34
|
+
from langfun.core.llms.openai_compatible import OpenAIResponsesAPI
|
|
34
35
|
from langfun.core.llms.gemini import Gemini
|
|
35
36
|
from langfun.core.llms.anthropic import Anthropic
|
|
36
37
|
|
|
@@ -41,6 +42,7 @@ from langfun.core.llms.azure_openai import AzureOpenAI
|
|
|
41
42
|
|
|
42
43
|
# Gemini models.
|
|
43
44
|
from langfun.core.llms.google_genai import GenAI
|
|
45
|
+
from langfun.core.llms.google_genai import Gemini3ProPreview
|
|
44
46
|
from langfun.core.llms.google_genai import Gemini25Pro
|
|
45
47
|
from langfun.core.llms.google_genai import Gemini25Flash
|
|
46
48
|
from langfun.core.llms.google_genai import Gemini25ProPreview_20250605
|
|
@@ -89,6 +91,7 @@ from langfun.core.llms.vertexai import VertexAIGemini25ProPreview_20250605
|
|
|
89
91
|
from langfun.core.llms.vertexai import VertexAIGemini25Pro
|
|
90
92
|
from langfun.core.llms.vertexai import VertexAIGemini25Flash
|
|
91
93
|
from langfun.core.llms.vertexai import VertexAIGemini25FlashImagePreview
|
|
94
|
+
from langfun.core.llms.vertexai import VertexAIGemini3ProPreview
|
|
92
95
|
|
|
93
96
|
# For backward compatibility.
|
|
94
97
|
GeminiPro1_5 = Gemini15Pro
|
|
@@ -151,6 +154,9 @@ from langfun.core.llms.openai import Gpt35
|
|
|
151
154
|
|
|
152
155
|
# Anthropic models.
|
|
153
156
|
|
|
157
|
+
from langfun.core.llms.anthropic import Claude45
|
|
158
|
+
from langfun.core.llms.anthropic import Claude45Haiku_20251001
|
|
159
|
+
from langfun.core.llms.anthropic import Claude45Sonnet_20250929
|
|
154
160
|
from langfun.core.llms.anthropic import Claude4
|
|
155
161
|
from langfun.core.llms.anthropic import Claude4Sonnet_20250514
|
|
156
162
|
from langfun.core.llms.anthropic import Claude4Opus_20250514
|
|
@@ -168,6 +174,8 @@ from langfun.core.llms.anthropic import Claude3Haiku
|
|
|
168
174
|
from langfun.core.llms.anthropic import Claude3Haiku_20240307
|
|
169
175
|
|
|
170
176
|
from langfun.core.llms.vertexai import VertexAIAnthropic
|
|
177
|
+
from langfun.core.llms.vertexai import VertexAIClaude45Haiku_20251001
|
|
178
|
+
from langfun.core.llms.vertexai import VertexAIClaude45Sonnet_20250929
|
|
171
179
|
from langfun.core.llms.vertexai import VertexAIClaude4Opus_20250514
|
|
172
180
|
from langfun.core.llms.vertexai import VertexAIClaude4Sonnet_20250514
|
|
173
181
|
from langfun.core.llms.vertexai import VertexAIClaude37Sonnet_20250219
|
langfun/core/llms/anthropic.py
CHANGED
|
@@ -59,6 +59,60 @@ class AnthropicModelInfo(lf.ModelInfo):
|
|
|
59
59
|
|
|
60
60
|
|
|
61
61
|
SUPPORTED_MODELS = [
|
|
62
|
+
AnthropicModelInfo(
|
|
63
|
+
model_id='claude-haiku-4-5-20251001',
|
|
64
|
+
provider='Anthropic',
|
|
65
|
+
in_service=True,
|
|
66
|
+
description='Claude 4.5 Haiku model (10/15/2025).',
|
|
67
|
+
release_date=datetime.datetime(2025, 10, 15),
|
|
68
|
+
input_modalities=(
|
|
69
|
+
AnthropicModelInfo.INPUT_IMAGE_TYPES
|
|
70
|
+
+ AnthropicModelInfo.INPUT_DOC_TYPES
|
|
71
|
+
),
|
|
72
|
+
context_length=lf.ModelInfo.ContextLength(
|
|
73
|
+
max_input_tokens=200_000,
|
|
74
|
+
max_output_tokens=64_000,
|
|
75
|
+
),
|
|
76
|
+
pricing=lf.ModelInfo.Pricing(
|
|
77
|
+
cost_per_1m_cached_input_tokens=0.1,
|
|
78
|
+
cost_per_1m_input_tokens=1,
|
|
79
|
+
cost_per_1m_output_tokens=5,
|
|
80
|
+
),
|
|
81
|
+
rate_limits=AnthropicModelInfo.RateLimits(
|
|
82
|
+
# Tier 4 rate limits
|
|
83
|
+
max_requests_per_minute=4000,
|
|
84
|
+
max_input_tokens_per_minute=4_000_000,
|
|
85
|
+
max_output_tokens_per_minute=800_000,
|
|
86
|
+
),
|
|
87
|
+
),
|
|
88
|
+
AnthropicModelInfo(
|
|
89
|
+
model_id='claude-sonnet-4-5-20250929',
|
|
90
|
+
provider='Anthropic',
|
|
91
|
+
in_service=True,
|
|
92
|
+
description='Claude 4.5 Sonnet model (9/29/2025).',
|
|
93
|
+
release_date=datetime.datetime(2025, 9, 29),
|
|
94
|
+
input_modalities=(
|
|
95
|
+
AnthropicModelInfo.INPUT_IMAGE_TYPES
|
|
96
|
+
+ AnthropicModelInfo.INPUT_DOC_TYPES
|
|
97
|
+
),
|
|
98
|
+
context_length=lf.ModelInfo.ContextLength(
|
|
99
|
+
max_input_tokens=200_000,
|
|
100
|
+
max_output_tokens=64_000,
|
|
101
|
+
),
|
|
102
|
+
pricing=lf.ModelInfo.Pricing(
|
|
103
|
+
cost_per_1m_cached_input_tokens=0.3,
|
|
104
|
+
cost_per_1m_input_tokens=3,
|
|
105
|
+
cost_per_1m_output_tokens=15,
|
|
106
|
+
),
|
|
107
|
+
rate_limits=AnthropicModelInfo.RateLimits(
|
|
108
|
+
# Tier 4 rate limits
|
|
109
|
+
# This rate limit is a total limit that applies to combined traffic
|
|
110
|
+
# across both Sonnet 4 and Sonnet 4.5.
|
|
111
|
+
max_requests_per_minute=4000,
|
|
112
|
+
max_input_tokens_per_minute=2_000_000,
|
|
113
|
+
max_output_tokens_per_minute=400_000,
|
|
114
|
+
),
|
|
115
|
+
),
|
|
62
116
|
AnthropicModelInfo(
|
|
63
117
|
model_id='claude-4-opus-20250514',
|
|
64
118
|
provider='Anthropic',
|
|
@@ -190,6 +244,62 @@ SUPPORTED_MODELS = [
|
|
|
190
244
|
max_output_tokens_per_minute=80_000,
|
|
191
245
|
),
|
|
192
246
|
),
|
|
247
|
+
AnthropicModelInfo(
|
|
248
|
+
model_id='claude-haiku-4-5@20251001',
|
|
249
|
+
alias_for='claude-haiku-4-5-20251001',
|
|
250
|
+
provider='VertexAI',
|
|
251
|
+
in_service=True,
|
|
252
|
+
description='Claude 4.5 Haiku model served on VertexAI (10/15/2025).',
|
|
253
|
+
release_date=datetime.datetime(2025, 10, 15),
|
|
254
|
+
input_modalities=(
|
|
255
|
+
AnthropicModelInfo.INPUT_IMAGE_TYPES
|
|
256
|
+
+ AnthropicModelInfo.INPUT_DOC_TYPES
|
|
257
|
+
),
|
|
258
|
+
context_length=lf.ModelInfo.ContextLength(
|
|
259
|
+
max_input_tokens=200_000,
|
|
260
|
+
max_output_tokens=64_000,
|
|
261
|
+
),
|
|
262
|
+
pricing=lf.ModelInfo.Pricing(
|
|
263
|
+
# For global endpoint
|
|
264
|
+
cost_per_1m_cached_input_tokens=0.1,
|
|
265
|
+
cost_per_1m_input_tokens=1,
|
|
266
|
+
cost_per_1m_output_tokens=5,
|
|
267
|
+
),
|
|
268
|
+
rate_limits=AnthropicModelInfo.RateLimits(
|
|
269
|
+
# For global endpoint
|
|
270
|
+
max_requests_per_minute=2500,
|
|
271
|
+
max_input_tokens_per_minute=200_000,
|
|
272
|
+
max_output_tokens_per_minute=0,
|
|
273
|
+
),
|
|
274
|
+
),
|
|
275
|
+
AnthropicModelInfo(
|
|
276
|
+
model_id='claude-sonnet-4-5@20250929',
|
|
277
|
+
alias_for='claude-sonnet-4-5-20250929',
|
|
278
|
+
provider='VertexAI',
|
|
279
|
+
in_service=True,
|
|
280
|
+
description='Claude 4.5 Sonnet model (9/29/2025).',
|
|
281
|
+
release_date=datetime.datetime(2025, 9, 29),
|
|
282
|
+
input_modalities=(
|
|
283
|
+
AnthropicModelInfo.INPUT_IMAGE_TYPES
|
|
284
|
+
+ AnthropicModelInfo.INPUT_DOC_TYPES
|
|
285
|
+
),
|
|
286
|
+
context_length=lf.ModelInfo.ContextLength(
|
|
287
|
+
max_input_tokens=200_000,
|
|
288
|
+
max_output_tokens=64_000,
|
|
289
|
+
),
|
|
290
|
+
pricing=lf.ModelInfo.Pricing(
|
|
291
|
+
# For global endpoint
|
|
292
|
+
cost_per_1m_cached_input_tokens=0.3,
|
|
293
|
+
cost_per_1m_input_tokens=3,
|
|
294
|
+
cost_per_1m_output_tokens=15,
|
|
295
|
+
),
|
|
296
|
+
rate_limits=AnthropicModelInfo.RateLimits(
|
|
297
|
+
# For global endpoint
|
|
298
|
+
max_requests_per_minute=1500,
|
|
299
|
+
max_input_tokens_per_minute=200_000,
|
|
300
|
+
max_output_tokens_per_minute=0,
|
|
301
|
+
),
|
|
302
|
+
),
|
|
193
303
|
AnthropicModelInfo(
|
|
194
304
|
model_id='claude-opus-4@20250514',
|
|
195
305
|
alias_for='claude-opus-4-20250514',
|
|
@@ -540,9 +650,34 @@ _SUPPORTED_MODELS_BY_MODEL_ID = {m.model_id: m for m in SUPPORTED_MODELS}
|
|
|
540
650
|
|
|
541
651
|
@lf.use_init_args(['model'])
|
|
542
652
|
class Anthropic(rest.REST):
|
|
543
|
-
"""Anthropic
|
|
653
|
+
"""Anthropic Claude models.
|
|
654
|
+
|
|
655
|
+
**Quick Start:**
|
|
656
|
+
|
|
657
|
+
```python
|
|
658
|
+
import langfun as lf
|
|
659
|
+
|
|
660
|
+
# Call Claude 3.5 Sonnet using API key from environment variable
|
|
661
|
+
# 'ANTHROPIC_API_KEY'.
|
|
662
|
+
lm = lf.llms.Claude35Sonnet()
|
|
663
|
+
r = lm('Who are you?')
|
|
664
|
+
print(r)
|
|
665
|
+
```
|
|
666
|
+
|
|
667
|
+
**Setting up API key:**
|
|
668
|
+
|
|
669
|
+
The Anthropic API key can be specified in following ways:
|
|
670
|
+
|
|
671
|
+
1. At model instantiation:
|
|
672
|
+
|
|
673
|
+
```python
|
|
674
|
+
lm = lf.llms.Claude35Sonnet(api_key='MY_API_KEY')
|
|
675
|
+
|
|
676
|
+
2. via environment variable `ANTHROPIC_API_KEY`.
|
|
544
677
|
|
|
545
|
-
|
|
678
|
+
**References:**
|
|
679
|
+
|
|
680
|
+
* https://docs.anthropic.com/claude/reference/messages_post
|
|
546
681
|
"""
|
|
547
682
|
|
|
548
683
|
model: pg.typing.Annotated[
|
|
@@ -658,6 +793,8 @@ class Anthropic(rest.REST):
|
|
|
658
793
|
args.pop('temperature', None)
|
|
659
794
|
args.pop('top_k', None)
|
|
660
795
|
args.pop('top_p', None)
|
|
796
|
+
if options.extras:
|
|
797
|
+
args.update(options.extras)
|
|
661
798
|
return args
|
|
662
799
|
|
|
663
800
|
def result(self, json: dict[str, Any]) -> lf.LMSamplingResult:
|
|
@@ -679,6 +816,24 @@ class Anthropic(rest.REST):
|
|
|
679
816
|
return super()._error(status_code, content)
|
|
680
817
|
|
|
681
818
|
|
|
819
|
+
class Claude45(Anthropic):
|
|
820
|
+
"""Base class for Claude 4.5 models."""
|
|
821
|
+
|
|
822
|
+
|
|
823
|
+
# pylint: disable=invalid-name
|
|
824
|
+
class Claude45Haiku_20251001(Claude45):
|
|
825
|
+
"""Claude 4.5 Haiku model 20251001."""
|
|
826
|
+
|
|
827
|
+
model = 'claude-haiku-4-5-20251001'
|
|
828
|
+
|
|
829
|
+
|
|
830
|
+
# pylint: disable=invalid-name
|
|
831
|
+
class Claude45Sonnet_20250929(Claude45):
|
|
832
|
+
"""Claude 4.5 Sonnet model 20250929."""
|
|
833
|
+
|
|
834
|
+
model = 'claude-sonnet-4-5-20250929'
|
|
835
|
+
|
|
836
|
+
|
|
682
837
|
class Claude4(Anthropic):
|
|
683
838
|
"""Base class for Claude 4 models."""
|
|
684
839
|
|
|
@@ -23,23 +23,35 @@ import pyglove as pg
|
|
|
23
23
|
@lf.use_init_args(['model', 'deployment_name'])
|
|
24
24
|
@pg.members([('api_endpoint', pg.typing.Str().freeze(''))])
|
|
25
25
|
class AzureOpenAI(openai.OpenAI):
|
|
26
|
-
"""Azure OpenAI
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
26
|
+
"""Azure OpenAI models.
|
|
27
|
+
|
|
28
|
+
**Quick Start:**
|
|
29
|
+
|
|
30
|
+
```python
|
|
31
|
+
import langfun as lf
|
|
32
|
+
|
|
33
|
+
# Call GPT-4o on Azure using API key from environment variable
|
|
34
|
+
# 'AZURE_OPENAI_API_KEY'.
|
|
35
|
+
lm = lf.llms.AzureOpenAI(
|
|
36
|
+
model='gpt-4o',
|
|
37
|
+
deployment_name='my-gpt4o-deployment',
|
|
38
|
+
api_version='2024-08-01-preview',
|
|
39
|
+
azure_endpoint='https://my-resource.openai.azure.com/',
|
|
40
|
+
)
|
|
41
|
+
r = lm('Who are you?')
|
|
42
|
+
print(r)
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
**Setting up API key:**
|
|
46
|
+
|
|
47
|
+
The Azure OpenAI API key can be specified in following ways:
|
|
48
|
+
|
|
49
|
+
1. At model instantiation:
|
|
50
|
+
|
|
51
|
+
```python
|
|
52
|
+
lm = lf.llms.AzureOpenAI(..., api_key='MY_API_KEY')
|
|
53
|
+
```
|
|
54
|
+
2. via environment variable `AZURE_OPENAI_API_KEY`.
|
|
43
55
|
"""
|
|
44
56
|
|
|
45
57
|
deployment_name: Annotated[
|
langfun/core/llms/cache/base.py
CHANGED
|
@@ -22,13 +22,33 @@ import langfun.core as lf
|
|
|
22
22
|
|
|
23
23
|
@dataclasses.dataclass(frozen=True)
|
|
24
24
|
class LMCacheEntry:
|
|
25
|
-
"""
|
|
25
|
+
"""Represents a single entry in the language model cache.
|
|
26
|
+
|
|
27
|
+
An `LMCacheEntry` stores the result of a language model sampling operation
|
|
28
|
+
and an optional expiration timestamp.
|
|
29
|
+
"""
|
|
26
30
|
result: lf.LMSamplingResult
|
|
27
31
|
expire: datetime.datetime | None = None
|
|
28
32
|
|
|
29
33
|
|
|
30
34
|
class LMCacheBase(lf.LMCache):
|
|
31
|
-
"""
|
|
35
|
+
"""Base class for language model cache implementations.
|
|
36
|
+
|
|
37
|
+
`LMCacheBase` provides the core logic for a key-value based cache,
|
|
38
|
+
handling key generation, expiration (TTL), and statistics tracking.
|
|
39
|
+
Subclasses must implement the abstract methods `_get`, `_put`, and `_delete`
|
|
40
|
+
to provide the specific storage mechanism (e.g., in-memory, file-based).
|
|
41
|
+
|
|
42
|
+
**Key Features:**
|
|
43
|
+
|
|
44
|
+
* **Customizable Keying**: Allows specifying a custom function to generate
|
|
45
|
+
cache keys based on the language model, prompt, and seed. If not provided,
|
|
46
|
+
a default key based on prompt text, sampling options, and seed is used.
|
|
47
|
+
* **Time-to-Live (TTL)**: Supports setting an expiration time for cache
|
|
48
|
+
entries, after which they are considered invalid and removed upon access.
|
|
49
|
+
* **Cache Statistics**: Tracks metrics like hits, misses, updates,
|
|
50
|
+
deletions, and expired hits through the `stats` property.
|
|
51
|
+
"""
|
|
32
52
|
|
|
33
53
|
key: Annotated[
|
|
34
54
|
Callable[[lf.LanguageModel, lf.Message, int], Any] | None,
|
|
@@ -121,4 +141,6 @@ class LMCacheBase(lf.LMCache):
|
|
|
121
141
|
|
|
122
142
|
def default_key(lm: lf.LanguageModel, prompt: lf.Message, seed: int) -> Any:
|
|
123
143
|
"""Default key for LM cache."""
|
|
124
|
-
|
|
144
|
+
# prompt text already contains the modality id for referenced modality
|
|
145
|
+
# objects, so no need to include them in the key.
|
|
146
|
+
return (prompt.text, lm.sampling_options.cache_key(), seed)
|