langfun 0.1.2.dev202508250805__py3-none-any.whl → 0.1.2.dev202511110805__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of langfun might be problematic. Click here for more details.
- langfun/__init__.py +1 -1
- langfun/core/__init__.py +6 -1
- langfun/core/agentic/__init__.py +4 -0
- langfun/core/agentic/action.py +412 -103
- langfun/core/agentic/action_eval.py +9 -2
- langfun/core/agentic/action_test.py +68 -6
- langfun/core/async_support.py +104 -5
- langfun/core/async_support_test.py +23 -0
- langfun/core/coding/python/correction.py +19 -9
- langfun/core/coding/python/execution.py +14 -12
- langfun/core/coding/python/generation.py +21 -16
- langfun/core/coding/python/sandboxing.py +23 -3
- langfun/core/component.py +42 -3
- langfun/core/concurrent.py +70 -6
- langfun/core/concurrent_test.py +9 -2
- langfun/core/console.py +1 -1
- langfun/core/data/conversion/anthropic.py +12 -3
- langfun/core/data/conversion/anthropic_test.py +8 -6
- langfun/core/data/conversion/gemini.py +9 -2
- langfun/core/data/conversion/gemini_test.py +12 -9
- langfun/core/data/conversion/openai.py +145 -31
- langfun/core/data/conversion/openai_test.py +161 -17
- langfun/core/eval/base.py +47 -43
- langfun/core/eval/base_test.py +4 -4
- langfun/core/eval/matching.py +5 -2
- langfun/core/eval/patching.py +3 -3
- langfun/core/eval/scoring.py +4 -3
- langfun/core/eval/v2/__init__.py +1 -0
- langfun/core/eval/v2/checkpointing.py +30 -4
- langfun/core/eval/v2/eval_test_helper.py +1 -1
- langfun/core/eval/v2/evaluation.py +60 -14
- langfun/core/eval/v2/example.py +22 -11
- langfun/core/eval/v2/experiment.py +51 -8
- langfun/core/eval/v2/metric_values.py +31 -3
- langfun/core/eval/v2/metric_values_test.py +32 -0
- langfun/core/eval/v2/metrics.py +39 -4
- langfun/core/eval/v2/metrics_test.py +14 -0
- langfun/core/eval/v2/progress.py +30 -1
- langfun/core/eval/v2/progress_test.py +27 -0
- langfun/core/eval/v2/progress_tracking_test.py +6 -0
- langfun/core/eval/v2/reporting.py +90 -71
- langfun/core/eval/v2/reporting_test.py +20 -6
- langfun/core/eval/v2/runners.py +27 -7
- langfun/core/eval/v2/runners_test.py +3 -0
- langfun/core/langfunc.py +45 -130
- langfun/core/langfunc_test.py +6 -4
- langfun/core/language_model.py +151 -31
- langfun/core/language_model_test.py +9 -3
- langfun/core/llms/__init__.py +12 -1
- langfun/core/llms/anthropic.py +157 -2
- langfun/core/llms/azure_openai.py +29 -17
- langfun/core/llms/cache/base.py +25 -3
- langfun/core/llms/cache/in_memory.py +48 -7
- langfun/core/llms/cache/in_memory_test.py +14 -4
- langfun/core/llms/compositional.py +25 -1
- langfun/core/llms/deepseek.py +30 -2
- langfun/core/llms/fake.py +39 -1
- langfun/core/llms/fake_test.py +9 -0
- langfun/core/llms/gemini.py +43 -7
- langfun/core/llms/google_genai.py +34 -1
- langfun/core/llms/groq.py +28 -3
- langfun/core/llms/llama_cpp.py +23 -4
- langfun/core/llms/openai.py +93 -3
- langfun/core/llms/openai_compatible.py +148 -27
- langfun/core/llms/openai_compatible_test.py +207 -20
- langfun/core/llms/openai_test.py +0 -2
- langfun/core/llms/rest.py +16 -1
- langfun/core/llms/vertexai.py +59 -8
- langfun/core/logging.py +1 -1
- langfun/core/mcp/__init__.py +10 -0
- langfun/core/mcp/client.py +177 -0
- langfun/core/mcp/client_test.py +71 -0
- langfun/core/mcp/session.py +241 -0
- langfun/core/mcp/session_test.py +54 -0
- langfun/core/mcp/testing/simple_mcp_client.py +33 -0
- langfun/core/mcp/testing/simple_mcp_server.py +33 -0
- langfun/core/mcp/tool.py +256 -0
- langfun/core/mcp/tool_test.py +197 -0
- langfun/core/memory.py +1 -0
- langfun/core/message.py +160 -55
- langfun/core/message_test.py +65 -81
- langfun/core/modalities/__init__.py +8 -0
- langfun/core/modalities/audio.py +21 -1
- langfun/core/modalities/image.py +19 -1
- langfun/core/modalities/mime.py +62 -3
- langfun/core/modalities/pdf.py +19 -1
- langfun/core/modalities/video.py +21 -1
- langfun/core/modality.py +167 -29
- langfun/core/modality_test.py +42 -12
- langfun/core/natural_language.py +1 -1
- langfun/core/sampling.py +4 -4
- langfun/core/sampling_test.py +20 -4
- langfun/core/structured/completion.py +34 -44
- langfun/core/structured/completion_test.py +23 -43
- langfun/core/structured/description.py +54 -50
- langfun/core/structured/function_generation.py +29 -12
- langfun/core/structured/mapping.py +74 -28
- langfun/core/structured/parsing.py +90 -74
- langfun/core/structured/parsing_test.py +0 -3
- langfun/core/structured/querying.py +242 -156
- langfun/core/structured/querying_test.py +95 -64
- langfun/core/structured/schema.py +70 -10
- langfun/core/structured/schema_generation.py +33 -14
- langfun/core/structured/scoring.py +45 -34
- langfun/core/structured/tokenization.py +24 -9
- langfun/core/subscription.py +2 -2
- langfun/core/template.py +175 -50
- langfun/core/template_test.py +123 -17
- langfun/env/__init__.py +43 -0
- langfun/env/base_environment.py +827 -0
- langfun/env/base_environment_test.py +473 -0
- langfun/env/base_feature.py +304 -0
- langfun/env/base_feature_test.py +228 -0
- langfun/env/base_sandbox.py +842 -0
- langfun/env/base_sandbox_test.py +1235 -0
- langfun/env/event_handlers/__init__.py +14 -0
- langfun/env/event_handlers/chain.py +233 -0
- langfun/env/event_handlers/chain_test.py +253 -0
- langfun/env/event_handlers/event_logger.py +472 -0
- langfun/env/event_handlers/event_logger_test.py +304 -0
- langfun/env/event_handlers/metric_writer.py +726 -0
- langfun/env/event_handlers/metric_writer_test.py +214 -0
- langfun/env/interface.py +1640 -0
- langfun/env/interface_test.py +151 -0
- langfun/env/load_balancers.py +59 -0
- langfun/env/load_balancers_test.py +139 -0
- langfun/env/test_utils.py +497 -0
- {langfun-0.1.2.dev202508250805.dist-info → langfun-0.1.2.dev202511110805.dist-info}/METADATA +7 -3
- langfun-0.1.2.dev202511110805.dist-info/RECORD +200 -0
- langfun-0.1.2.dev202508250805.dist-info/RECORD +0 -172
- {langfun-0.1.2.dev202508250805.dist-info → langfun-0.1.2.dev202511110805.dist-info}/WHEEL +0 -0
- {langfun-0.1.2.dev202508250805.dist-info → langfun-0.1.2.dev202511110805.dist-info}/licenses/LICENSE +0 -0
- {langfun-0.1.2.dev202508250805.dist-info → langfun-0.1.2.dev202511110805.dist-info}/top_level.txt +0 -0
langfun/core/language_model.py
CHANGED
|
@@ -478,7 +478,7 @@ class UsageNotAvailable(LMSamplingUsage):
|
|
|
478
478
|
|
|
479
479
|
|
|
480
480
|
class LMSamplingResult(pg.Object):
|
|
481
|
-
"""
|
|
481
|
+
"""The result from a language model sampling."""
|
|
482
482
|
|
|
483
483
|
samples: Annotated[
|
|
484
484
|
list[LMSample],
|
|
@@ -584,6 +584,15 @@ class LMSamplingOptions(component.Component):
|
|
|
584
584
|
),
|
|
585
585
|
] = None
|
|
586
586
|
|
|
587
|
+
extras: Annotated[
|
|
588
|
+
dict[str, Any],
|
|
589
|
+
(
|
|
590
|
+
'Extra arguments (e.g. configuration for tool calls) to pass to '
|
|
591
|
+
'the model. This is model-specific, please check model '
|
|
592
|
+
'implementation to see how to use this.'
|
|
593
|
+
),
|
|
594
|
+
] = {}
|
|
595
|
+
|
|
587
596
|
def cache_key(self) -> tuple[Any, ...]:
|
|
588
597
|
"""Returns a tuple of current values as cache key."""
|
|
589
598
|
return (
|
|
@@ -672,13 +681,91 @@ class LMDebugMode(enum.IntFlag):
|
|
|
672
681
|
|
|
673
682
|
|
|
674
683
|
class LanguageModel(component.Component):
|
|
675
|
-
"""Interface
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
684
|
+
"""Interface for language model.
|
|
685
|
+
|
|
686
|
+
`lf.LanguageModel` is the cornerstone of Langfun, providing a consistent
|
|
687
|
+
interface for interacting with various language models, such as those from
|
|
688
|
+
Google, OpenAI, Anthropic, and more. It abstracts away provider-specific
|
|
689
|
+
details, allowing users to switch between models seamlessly.
|
|
690
|
+
|
|
691
|
+
All language models in Langfun can be accessed via `lf.llms`. For example,
|
|
692
|
+
`lf.llms.Gpt4()` creates an instance for OpenAI's GPT-4, and
|
|
693
|
+
`lf.llms.GeminiPro()` creates an instance for Google's Gemini Pro.
|
|
694
|
+
|
|
695
|
+
**Key Features:**
|
|
696
|
+
|
|
697
|
+
* **Unified API**: Provides `sample`, `score`, and `tokenize` methods
|
|
698
|
+
across all supported models.
|
|
699
|
+
* **Sampling**: The `__call__` method and `sample` method allow generating
|
|
700
|
+
text completions or chat responses.
|
|
701
|
+
* **Scoring**: The `score` method computes the likelihood of completions
|
|
702
|
+
given a prompt.
|
|
703
|
+
* **Tokenization**: The `tokenize` method breaks text into tokens
|
|
704
|
+
according to the model's tokenizer.
|
|
705
|
+
* **Caching**: Built-in support for caching LLM requests to save cost and
|
|
706
|
+
time via the `cache` attribute.
|
|
707
|
+
* **Concurrency**: Manages concurrency to respect API rate limits via
|
|
708
|
+
`max_concurrency`.
|
|
709
|
+
* **Retries**: Automatic retries with exponential backoff for transient
|
|
710
|
+
errors via `max_attempts` and `retry_interval`.
|
|
711
|
+
|
|
712
|
+
**1. Creating a Language Model:**
|
|
713
|
+
You can create a language model by instantiating its class or by using
|
|
714
|
+
`lf.LanguageModel.get`:
|
|
715
|
+
|
|
716
|
+
```python
|
|
717
|
+
# Direct instantiation
|
|
718
|
+
gpt4 = lf.llms.Gpt4()
|
|
719
|
+
gemini = lf.llms.GeminiPro()
|
|
720
|
+
|
|
721
|
+
# Creation via lf.LanguageModel.get()
|
|
722
|
+
gpt4 = lf.LanguageModel.get('gpt-4')
|
|
723
|
+
```
|
|
724
|
+
|
|
725
|
+
**2. Customizing Sampling Options:**
|
|
726
|
+
Sampling options like `temperature`, `max_tokens`, etc., can be customized
|
|
727
|
+
at model creation, or overridden at call time or via `lf.context`.
|
|
728
|
+
|
|
729
|
+
```python
|
|
730
|
+
# Set temperature to 0 at model creation
|
|
731
|
+
lm = lf.llms.Gpt4(temperature=0.0)
|
|
732
|
+
|
|
733
|
+
# Override temperature to 0.5 for a single call
|
|
734
|
+
response = lm('1 + 1 =', temperature=0.5)
|
|
735
|
+
|
|
736
|
+
# Override temperature to 1.0 using lf.context
|
|
737
|
+
with lf.context(temperature=1.0):
|
|
738
|
+
response = lm('1 + 1 =')
|
|
739
|
+
```
|
|
740
|
+
|
|
741
|
+
**3. Sampling:**
|
|
742
|
+
Use `lm()`, `lm.sample()`, or `lf.query()` to generate text:
|
|
743
|
+
|
|
744
|
+
```python
|
|
745
|
+
lm = lf.llms.Gpt4()
|
|
746
|
+
response = lm('1 + 1 =')
|
|
747
|
+
print(response.text)
|
|
748
|
+
# Output: 2
|
|
749
|
+
```
|
|
750
|
+
|
|
751
|
+
**4. Scoring:**
|
|
752
|
+
Use `lm.score()` to score completions:
|
|
753
|
+
|
|
754
|
+
```python
|
|
755
|
+
lm = lf.llms.Gpt4()
|
|
756
|
+
results = lm.score('Weather in SF is', completions=['sunny', 'cloudy'])
|
|
757
|
+
print(results[0].score)
|
|
758
|
+
# Output: -1.0
|
|
759
|
+
```
|
|
760
|
+
|
|
761
|
+
**5. Tokenization:**
|
|
762
|
+
Use `lm.tokenize()` to get tokens:
|
|
763
|
+
```python
|
|
764
|
+
lm = lf.llms.Gpt4()
|
|
765
|
+
tokens = lm.tokenize('hello world')
|
|
766
|
+
print(tokens)
|
|
767
|
+
# Output: [('hello', 15339), (' world', 1917)]
|
|
768
|
+
```
|
|
682
769
|
"""
|
|
683
770
|
|
|
684
771
|
sampling_options: LMSamplingOptions = LMSamplingOptions()
|
|
@@ -1159,21 +1246,35 @@ class LanguageModel(component.Component):
|
|
|
1159
1246
|
) -> message_lib.Message:
|
|
1160
1247
|
"""Returns the first candidate."""
|
|
1161
1248
|
prompt = message_lib.UserMessage.from_value(prompt)
|
|
1162
|
-
|
|
1163
|
-
|
|
1164
|
-
|
|
1165
|
-
|
|
1166
|
-
|
|
1167
|
-
|
|
1168
|
-
|
|
1169
|
-
|
|
1170
|
-
|
|
1171
|
-
|
|
1172
|
-
|
|
1173
|
-
|
|
1174
|
-
|
|
1175
|
-
|
|
1176
|
-
|
|
1249
|
+
start_time = time.time()
|
|
1250
|
+
error_tag = ''
|
|
1251
|
+
try:
|
|
1252
|
+
with component.context(override_attrs=True, **kwargs):
|
|
1253
|
+
sampling_options = self.sampling_options
|
|
1254
|
+
if sampling_options.n != 1:
|
|
1255
|
+
sampling_options = sampling_options.clone(override=dict(n=1))
|
|
1256
|
+
|
|
1257
|
+
call_counter = self._call_counter
|
|
1258
|
+
self._call_counter += 1
|
|
1259
|
+
request_start = time.time()
|
|
1260
|
+
result = self.sample(
|
|
1261
|
+
[prompt], sampling_options=sampling_options, cache_seed=cache_seed
|
|
1262
|
+
)[0]
|
|
1263
|
+
elapse = time.time() - request_start
|
|
1264
|
+
response = result.samples[0].response
|
|
1265
|
+
self._debug(prompt, response, call_counter, result.usage, elapse)
|
|
1266
|
+
return response
|
|
1267
|
+
except BaseException as e:
|
|
1268
|
+
error_tag = pg.ErrorInfo.from_exception(e).tag
|
|
1269
|
+
raise e
|
|
1270
|
+
finally:
|
|
1271
|
+
_METRICS.language_model_calls.increment(
|
|
1272
|
+
model=self.model_id, error=error_tag
|
|
1273
|
+
)
|
|
1274
|
+
_METRICS.language_model_call_duration_ms.record(
|
|
1275
|
+
int((time.time() - start_time) * 1000),
|
|
1276
|
+
model=self.model_id, error=error_tag,
|
|
1277
|
+
)
|
|
1177
1278
|
|
|
1178
1279
|
def _debug(
|
|
1179
1280
|
self,
|
|
@@ -1230,11 +1331,11 @@ class LanguageModel(component.Component):
|
|
|
1230
1331
|
title=f'\n[{call_counter}] PROMPT SENT TO LM{title_suffix}:',
|
|
1231
1332
|
color='green',
|
|
1232
1333
|
)
|
|
1233
|
-
|
|
1234
|
-
if referred_modalities:
|
|
1334
|
+
if prompt.referred_modalities:
|
|
1235
1335
|
console.write(
|
|
1236
1336
|
pg.object_utils.kvlist_str(
|
|
1237
|
-
[(k, repr(v), None)
|
|
1337
|
+
[(k, repr(v), None)
|
|
1338
|
+
for k, v in prompt.referred_modalities.items()]
|
|
1238
1339
|
),
|
|
1239
1340
|
title=f'\n[{call_counter}] MODALITY OBJECTS SENT TO LM:',
|
|
1240
1341
|
color='green',
|
|
@@ -1320,9 +1421,9 @@ class LanguageModel(component.Component):
|
|
|
1320
1421
|
color='green',
|
|
1321
1422
|
)
|
|
1322
1423
|
if isinstance(prompt, list):
|
|
1323
|
-
referred_modalities_lst = [p.referred_modalities
|
|
1424
|
+
referred_modalities_lst = [p.referred_modalities for p in prompt]
|
|
1324
1425
|
else:
|
|
1325
|
-
referred_modalities_lst = [prompt.referred_modalities
|
|
1426
|
+
referred_modalities_lst = [prompt.referred_modalities,]
|
|
1326
1427
|
if referred_modalities_lst:
|
|
1327
1428
|
for referred_modalities in referred_modalities_lst:
|
|
1328
1429
|
console.write(
|
|
@@ -1397,7 +1498,7 @@ class LanguageModel(component.Component):
|
|
|
1397
1498
|
title=f'\n[{call_counter}] PROMPT TO TOKENIZE:',
|
|
1398
1499
|
color='green',
|
|
1399
1500
|
)
|
|
1400
|
-
referred_modalities_lst = [prompt.referred_modalities
|
|
1501
|
+
referred_modalities_lst = [prompt.referred_modalities,]
|
|
1401
1502
|
if referred_modalities_lst:
|
|
1402
1503
|
for referred_modalities in referred_modalities_lst:
|
|
1403
1504
|
console.write(
|
|
@@ -1425,7 +1526,7 @@ class LanguageModel(component.Component):
|
|
|
1425
1526
|
max_requests_per_minute: int | None,
|
|
1426
1527
|
average_tokens_per_request: int = 250
|
|
1427
1528
|
) -> int | None:
|
|
1428
|
-
"""Estimates max concurrency
|
|
1529
|
+
"""Estimates max concurrency based on the rate limits."""
|
|
1429
1530
|
# NOTE(daiyip): max concurrency is estimated based on the rate limit.
|
|
1430
1531
|
# We assume each request has approximately 250 tokens, and each request
|
|
1431
1532
|
# takes 1 second to complete. This might not be accurate for all models.
|
|
@@ -1438,6 +1539,25 @@ class LanguageModel(component.Component):
|
|
|
1438
1539
|
return None
|
|
1439
1540
|
|
|
1440
1541
|
|
|
1542
|
+
class _Metrics:
|
|
1543
|
+
"""Metrics for Langfun."""
|
|
1544
|
+
|
|
1545
|
+
def __init__(self):
|
|
1546
|
+
self._metrics = pg.monitoring.metric_collection('/third_party/langfun')
|
|
1547
|
+
self.language_model_calls = self._metrics.get_counter(
|
|
1548
|
+
'language_model_calls',
|
|
1549
|
+
'Number of calls to the language model.',
|
|
1550
|
+
parameters={'model': str, 'error': str},
|
|
1551
|
+
)
|
|
1552
|
+
self.language_model_call_duration_ms = self._metrics.get_distribution(
|
|
1553
|
+
'language_model_call_duration_ms',
|
|
1554
|
+
'Duration of calls to the language model in milliseconds.',
|
|
1555
|
+
parameters={'model': str, 'error': str},
|
|
1556
|
+
)
|
|
1557
|
+
|
|
1558
|
+
_METRICS = _Metrics()
|
|
1559
|
+
|
|
1560
|
+
|
|
1441
1561
|
class _ConcurrencyControl:
|
|
1442
1562
|
"""Controls the max concurrent LLM calls for a given model."""
|
|
1443
1563
|
|
|
@@ -1479,7 +1599,7 @@ class _ConcurrencyControl:
|
|
|
1479
1599
|
|
|
1480
1600
|
|
|
1481
1601
|
class UsageSummary(pg.Object, pg.views.HtmlTreeView.Extension):
|
|
1482
|
-
"""Usage
|
|
1602
|
+
"""Usage summary."""
|
|
1483
1603
|
|
|
1484
1604
|
class AggregatedUsage(pg.Object):
|
|
1485
1605
|
"""Aggregated usage."""
|
|
@@ -656,11 +656,17 @@ class LanguageModelTest(unittest.TestCase):
|
|
|
656
656
|
|
|
657
657
|
string_io = io.StringIO()
|
|
658
658
|
lm = MockModel(sampling_options=lm_lib.LMSamplingOptions(top_k=1))
|
|
659
|
+
image = Image()
|
|
659
660
|
with contextlib.redirect_stdout(string_io):
|
|
660
661
|
self.assertEqual(
|
|
661
|
-
lm(
|
|
662
|
-
|
|
663
|
-
|
|
662
|
+
lm(
|
|
663
|
+
message_lib.UserMessage(
|
|
664
|
+
f'hi <<[[{image.id}]]>>',
|
|
665
|
+
referred_modalities=[image],
|
|
666
|
+
),
|
|
667
|
+
debug=True
|
|
668
|
+
),
|
|
669
|
+
f'hi <<[[{image.id}]]>>'
|
|
664
670
|
)
|
|
665
671
|
|
|
666
672
|
debug_info = string_io.getvalue()
|
langfun/core/llms/__init__.py
CHANGED
|
@@ -20,6 +20,7 @@
|
|
|
20
20
|
# LMs for testing.
|
|
21
21
|
from langfun.core.llms.fake import Fake
|
|
22
22
|
from langfun.core.llms.fake import Echo
|
|
23
|
+
from langfun.core.llms.fake import Pseudo
|
|
23
24
|
from langfun.core.llms.fake import StaticMapping
|
|
24
25
|
from langfun.core.llms.fake import StaticResponse
|
|
25
26
|
from langfun.core.llms.fake import StaticSequence
|
|
@@ -29,7 +30,8 @@ from langfun.core.llms.compositional import RandomChoice
|
|
|
29
30
|
|
|
30
31
|
# Base models by request/response protocol.
|
|
31
32
|
from langfun.core.llms.rest import REST
|
|
32
|
-
from langfun.core.llms.openai_compatible import
|
|
33
|
+
from langfun.core.llms.openai_compatible import OpenAIChatCompletionAPI
|
|
34
|
+
from langfun.core.llms.openai_compatible import OpenAIResponsesAPI
|
|
33
35
|
from langfun.core.llms.gemini import Gemini
|
|
34
36
|
from langfun.core.llms.anthropic import Anthropic
|
|
35
37
|
|
|
@@ -61,6 +63,7 @@ from langfun.core.llms.google_genai import Gemini15Flash8B_001
|
|
|
61
63
|
from langfun.core.llms.google_genai import Gemini2ProExp_20250205
|
|
62
64
|
from langfun.core.llms.google_genai import Gemini2FlashThinkingExp_20250121
|
|
63
65
|
from langfun.core.llms.google_genai import GeminiExp_20241206
|
|
66
|
+
from langfun.core.llms.google_genai import Gemini25FlashImagePreview
|
|
64
67
|
|
|
65
68
|
from langfun.core.llms.vertexai import VertexAIGemini
|
|
66
69
|
from langfun.core.llms.vertexai import VertexAIGemini2Flash
|
|
@@ -86,6 +89,7 @@ from langfun.core.llms.vertexai import VertexAIGemini25FlashPreview_20250520
|
|
|
86
89
|
from langfun.core.llms.vertexai import VertexAIGemini25ProPreview_20250605
|
|
87
90
|
from langfun.core.llms.vertexai import VertexAIGemini25Pro
|
|
88
91
|
from langfun.core.llms.vertexai import VertexAIGemini25Flash
|
|
92
|
+
from langfun.core.llms.vertexai import VertexAIGemini25FlashImagePreview
|
|
89
93
|
|
|
90
94
|
# For backward compatibility.
|
|
91
95
|
GeminiPro1_5 = Gemini15Pro
|
|
@@ -96,6 +100,8 @@ VertexAIGeminiFlash1_5 = VertexAIGemini15Flash
|
|
|
96
100
|
# OpenAI models.
|
|
97
101
|
from langfun.core.llms.openai import OpenAI
|
|
98
102
|
|
|
103
|
+
from langfun.core.llms.openai import Gpt5
|
|
104
|
+
from langfun.core.llms.openai import Gpt5Mini
|
|
99
105
|
from langfun.core.llms.openai import Gpt41
|
|
100
106
|
from langfun.core.llms.openai import GptO3
|
|
101
107
|
from langfun.core.llms.openai import GptO4Mini
|
|
@@ -146,6 +152,9 @@ from langfun.core.llms.openai import Gpt35
|
|
|
146
152
|
|
|
147
153
|
# Anthropic models.
|
|
148
154
|
|
|
155
|
+
from langfun.core.llms.anthropic import Claude45
|
|
156
|
+
from langfun.core.llms.anthropic import Claude45Haiku_20251001
|
|
157
|
+
from langfun.core.llms.anthropic import Claude45Sonnet_20250929
|
|
149
158
|
from langfun.core.llms.anthropic import Claude4
|
|
150
159
|
from langfun.core.llms.anthropic import Claude4Sonnet_20250514
|
|
151
160
|
from langfun.core.llms.anthropic import Claude4Opus_20250514
|
|
@@ -163,6 +172,8 @@ from langfun.core.llms.anthropic import Claude3Haiku
|
|
|
163
172
|
from langfun.core.llms.anthropic import Claude3Haiku_20240307
|
|
164
173
|
|
|
165
174
|
from langfun.core.llms.vertexai import VertexAIAnthropic
|
|
175
|
+
from langfun.core.llms.vertexai import VertexAIClaude45Haiku_20251001
|
|
176
|
+
from langfun.core.llms.vertexai import VertexAIClaude45Sonnet_20250929
|
|
166
177
|
from langfun.core.llms.vertexai import VertexAIClaude4Opus_20250514
|
|
167
178
|
from langfun.core.llms.vertexai import VertexAIClaude4Sonnet_20250514
|
|
168
179
|
from langfun.core.llms.vertexai import VertexAIClaude37Sonnet_20250219
|
langfun/core/llms/anthropic.py
CHANGED
|
@@ -59,6 +59,60 @@ class AnthropicModelInfo(lf.ModelInfo):
|
|
|
59
59
|
|
|
60
60
|
|
|
61
61
|
SUPPORTED_MODELS = [
|
|
62
|
+
AnthropicModelInfo(
|
|
63
|
+
model_id='claude-haiku-4-5-20251001',
|
|
64
|
+
provider='Anthropic',
|
|
65
|
+
in_service=True,
|
|
66
|
+
description='Claude 4.5 Haiku model (10/15/2025).',
|
|
67
|
+
release_date=datetime.datetime(2025, 10, 15),
|
|
68
|
+
input_modalities=(
|
|
69
|
+
AnthropicModelInfo.INPUT_IMAGE_TYPES
|
|
70
|
+
+ AnthropicModelInfo.INPUT_DOC_TYPES
|
|
71
|
+
),
|
|
72
|
+
context_length=lf.ModelInfo.ContextLength(
|
|
73
|
+
max_input_tokens=200_000,
|
|
74
|
+
max_output_tokens=64_000,
|
|
75
|
+
),
|
|
76
|
+
pricing=lf.ModelInfo.Pricing(
|
|
77
|
+
cost_per_1m_cached_input_tokens=0.1,
|
|
78
|
+
cost_per_1m_input_tokens=1,
|
|
79
|
+
cost_per_1m_output_tokens=5,
|
|
80
|
+
),
|
|
81
|
+
rate_limits=AnthropicModelInfo.RateLimits(
|
|
82
|
+
# Tier 4 rate limits
|
|
83
|
+
max_requests_per_minute=4000,
|
|
84
|
+
max_input_tokens_per_minute=4_000_000,
|
|
85
|
+
max_output_tokens_per_minute=800_000,
|
|
86
|
+
),
|
|
87
|
+
),
|
|
88
|
+
AnthropicModelInfo(
|
|
89
|
+
model_id='claude-sonnet-4-5-20250929',
|
|
90
|
+
provider='Anthropic',
|
|
91
|
+
in_service=True,
|
|
92
|
+
description='Claude 4.5 Sonnet model (9/29/2025).',
|
|
93
|
+
release_date=datetime.datetime(2025, 9, 29),
|
|
94
|
+
input_modalities=(
|
|
95
|
+
AnthropicModelInfo.INPUT_IMAGE_TYPES
|
|
96
|
+
+ AnthropicModelInfo.INPUT_DOC_TYPES
|
|
97
|
+
),
|
|
98
|
+
context_length=lf.ModelInfo.ContextLength(
|
|
99
|
+
max_input_tokens=200_000,
|
|
100
|
+
max_output_tokens=64_000,
|
|
101
|
+
),
|
|
102
|
+
pricing=lf.ModelInfo.Pricing(
|
|
103
|
+
cost_per_1m_cached_input_tokens=0.3,
|
|
104
|
+
cost_per_1m_input_tokens=3,
|
|
105
|
+
cost_per_1m_output_tokens=15,
|
|
106
|
+
),
|
|
107
|
+
rate_limits=AnthropicModelInfo.RateLimits(
|
|
108
|
+
# Tier 4 rate limits
|
|
109
|
+
# This rate limit is a total limit that applies to combined traffic
|
|
110
|
+
# across both Sonnet 4 and Sonnet 4.5.
|
|
111
|
+
max_requests_per_minute=4000,
|
|
112
|
+
max_input_tokens_per_minute=2_000_000,
|
|
113
|
+
max_output_tokens_per_minute=400_000,
|
|
114
|
+
),
|
|
115
|
+
),
|
|
62
116
|
AnthropicModelInfo(
|
|
63
117
|
model_id='claude-4-opus-20250514',
|
|
64
118
|
provider='Anthropic',
|
|
@@ -190,6 +244,62 @@ SUPPORTED_MODELS = [
|
|
|
190
244
|
max_output_tokens_per_minute=80_000,
|
|
191
245
|
),
|
|
192
246
|
),
|
|
247
|
+
AnthropicModelInfo(
|
|
248
|
+
model_id='claude-haiku-4-5@20251001',
|
|
249
|
+
alias_for='claude-haiku-4-5-20251001',
|
|
250
|
+
provider='VertexAI',
|
|
251
|
+
in_service=True,
|
|
252
|
+
description='Claude 4.5 Haiku model served on VertexAI (10/15/2025).',
|
|
253
|
+
release_date=datetime.datetime(2025, 10, 15),
|
|
254
|
+
input_modalities=(
|
|
255
|
+
AnthropicModelInfo.INPUT_IMAGE_TYPES
|
|
256
|
+
+ AnthropicModelInfo.INPUT_DOC_TYPES
|
|
257
|
+
),
|
|
258
|
+
context_length=lf.ModelInfo.ContextLength(
|
|
259
|
+
max_input_tokens=200_000,
|
|
260
|
+
max_output_tokens=64_000,
|
|
261
|
+
),
|
|
262
|
+
pricing=lf.ModelInfo.Pricing(
|
|
263
|
+
# For global endpoint
|
|
264
|
+
cost_per_1m_cached_input_tokens=0.1,
|
|
265
|
+
cost_per_1m_input_tokens=1,
|
|
266
|
+
cost_per_1m_output_tokens=5,
|
|
267
|
+
),
|
|
268
|
+
rate_limits=AnthropicModelInfo.RateLimits(
|
|
269
|
+
# For global endpoint
|
|
270
|
+
max_requests_per_minute=2500,
|
|
271
|
+
max_input_tokens_per_minute=200_000,
|
|
272
|
+
max_output_tokens_per_minute=0,
|
|
273
|
+
),
|
|
274
|
+
),
|
|
275
|
+
AnthropicModelInfo(
|
|
276
|
+
model_id='claude-sonnet-4-5@20250929',
|
|
277
|
+
alias_for='claude-sonnet-4-5-20250929',
|
|
278
|
+
provider='VertexAI',
|
|
279
|
+
in_service=True,
|
|
280
|
+
description='Claude 4.5 Sonnet model (9/29/2025).',
|
|
281
|
+
release_date=datetime.datetime(2025, 9, 29),
|
|
282
|
+
input_modalities=(
|
|
283
|
+
AnthropicModelInfo.INPUT_IMAGE_TYPES
|
|
284
|
+
+ AnthropicModelInfo.INPUT_DOC_TYPES
|
|
285
|
+
),
|
|
286
|
+
context_length=lf.ModelInfo.ContextLength(
|
|
287
|
+
max_input_tokens=200_000,
|
|
288
|
+
max_output_tokens=64_000,
|
|
289
|
+
),
|
|
290
|
+
pricing=lf.ModelInfo.Pricing(
|
|
291
|
+
# For global endpoint
|
|
292
|
+
cost_per_1m_cached_input_tokens=0.3,
|
|
293
|
+
cost_per_1m_input_tokens=3,
|
|
294
|
+
cost_per_1m_output_tokens=15,
|
|
295
|
+
),
|
|
296
|
+
rate_limits=AnthropicModelInfo.RateLimits(
|
|
297
|
+
# For global endpoint
|
|
298
|
+
max_requests_per_minute=1500,
|
|
299
|
+
max_input_tokens_per_minute=200_000,
|
|
300
|
+
max_output_tokens_per_minute=0,
|
|
301
|
+
),
|
|
302
|
+
),
|
|
193
303
|
AnthropicModelInfo(
|
|
194
304
|
model_id='claude-opus-4@20250514',
|
|
195
305
|
alias_for='claude-opus-4-20250514',
|
|
@@ -540,9 +650,34 @@ _SUPPORTED_MODELS_BY_MODEL_ID = {m.model_id: m for m in SUPPORTED_MODELS}
|
|
|
540
650
|
|
|
541
651
|
@lf.use_init_args(['model'])
|
|
542
652
|
class Anthropic(rest.REST):
|
|
543
|
-
"""Anthropic
|
|
653
|
+
"""Anthropic Claude models.
|
|
654
|
+
|
|
655
|
+
**Quick Start:**
|
|
656
|
+
|
|
657
|
+
```python
|
|
658
|
+
import langfun as lf
|
|
659
|
+
|
|
660
|
+
# Call Claude 3.5 Sonnet using API key from environment variable
|
|
661
|
+
# 'ANTHROPIC_API_KEY'.
|
|
662
|
+
lm = lf.llms.Claude35Sonnet()
|
|
663
|
+
r = lm('Who are you?')
|
|
664
|
+
print(r)
|
|
665
|
+
```
|
|
666
|
+
|
|
667
|
+
**Setting up API key:**
|
|
668
|
+
|
|
669
|
+
The Anthropic API key can be specified in following ways:
|
|
670
|
+
|
|
671
|
+
1. At model instantiation:
|
|
672
|
+
|
|
673
|
+
```python
|
|
674
|
+
lm = lf.llms.Claude35Sonnet(api_key='MY_API_KEY')
|
|
675
|
+
|
|
676
|
+
2. via environment variable `ANTHROPIC_API_KEY`.
|
|
544
677
|
|
|
545
|
-
|
|
678
|
+
**References:**
|
|
679
|
+
|
|
680
|
+
* https://docs.anthropic.com/claude/reference/messages_post
|
|
546
681
|
"""
|
|
547
682
|
|
|
548
683
|
model: pg.typing.Annotated[
|
|
@@ -658,6 +793,8 @@ class Anthropic(rest.REST):
|
|
|
658
793
|
args.pop('temperature', None)
|
|
659
794
|
args.pop('top_k', None)
|
|
660
795
|
args.pop('top_p', None)
|
|
796
|
+
if options.extras:
|
|
797
|
+
args.update(options.extras)
|
|
661
798
|
return args
|
|
662
799
|
|
|
663
800
|
def result(self, json: dict[str, Any]) -> lf.LMSamplingResult:
|
|
@@ -679,6 +816,24 @@ class Anthropic(rest.REST):
|
|
|
679
816
|
return super()._error(status_code, content)
|
|
680
817
|
|
|
681
818
|
|
|
819
|
+
class Claude45(Anthropic):
|
|
820
|
+
"""Base class for Claude 4.5 models."""
|
|
821
|
+
|
|
822
|
+
|
|
823
|
+
# pylint: disable=invalid-name
|
|
824
|
+
class Claude45Haiku_20251001(Claude45):
|
|
825
|
+
"""Claude 4.5 Haiku model 20251001."""
|
|
826
|
+
|
|
827
|
+
model = 'claude-haiku-4-5-20251001'
|
|
828
|
+
|
|
829
|
+
|
|
830
|
+
# pylint: disable=invalid-name
|
|
831
|
+
class Claude45Sonnet_20250929(Claude45):
|
|
832
|
+
"""Claude 4.5 Sonnet model 20250929."""
|
|
833
|
+
|
|
834
|
+
model = 'claude-sonnet-4-5-20250929'
|
|
835
|
+
|
|
836
|
+
|
|
682
837
|
class Claude4(Anthropic):
|
|
683
838
|
"""Base class for Claude 4 models."""
|
|
684
839
|
|
|
@@ -23,23 +23,35 @@ import pyglove as pg
|
|
|
23
23
|
@lf.use_init_args(['model', 'deployment_name'])
|
|
24
24
|
@pg.members([('api_endpoint', pg.typing.Str().freeze(''))])
|
|
25
25
|
class AzureOpenAI(openai.OpenAI):
|
|
26
|
-
"""Azure OpenAI
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
26
|
+
"""Azure OpenAI models.
|
|
27
|
+
|
|
28
|
+
**Quick Start:**
|
|
29
|
+
|
|
30
|
+
```python
|
|
31
|
+
import langfun as lf
|
|
32
|
+
|
|
33
|
+
# Call GPT-4o on Azure using API key from environment variable
|
|
34
|
+
# 'AZURE_OPENAI_API_KEY'.
|
|
35
|
+
lm = lf.llms.AzureOpenAI(
|
|
36
|
+
model='gpt-4o',
|
|
37
|
+
deployment_name='my-gpt4o-deployment',
|
|
38
|
+
api_version='2024-08-01-preview',
|
|
39
|
+
azure_endpoint='https://my-resource.openai.azure.com/',
|
|
40
|
+
)
|
|
41
|
+
r = lm('Who are you?')
|
|
42
|
+
print(r)
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
**Setting up API key:**
|
|
46
|
+
|
|
47
|
+
The Azure OpenAI API key can be specified in following ways:
|
|
48
|
+
|
|
49
|
+
1. At model instantiation:
|
|
50
|
+
|
|
51
|
+
```python
|
|
52
|
+
lm = lf.llms.AzureOpenAI(..., api_key='MY_API_KEY')
|
|
53
|
+
```
|
|
54
|
+
2. via environment variable `AZURE_OPENAI_API_KEY`.
|
|
43
55
|
"""
|
|
44
56
|
|
|
45
57
|
deployment_name: Annotated[
|
langfun/core/llms/cache/base.py
CHANGED
|
@@ -22,13 +22,33 @@ import langfun.core as lf
|
|
|
22
22
|
|
|
23
23
|
@dataclasses.dataclass(frozen=True)
|
|
24
24
|
class LMCacheEntry:
|
|
25
|
-
"""
|
|
25
|
+
"""Represents a single entry in the language model cache.
|
|
26
|
+
|
|
27
|
+
An `LMCacheEntry` stores the result of a language model sampling operation
|
|
28
|
+
and an optional expiration timestamp.
|
|
29
|
+
"""
|
|
26
30
|
result: lf.LMSamplingResult
|
|
27
31
|
expire: datetime.datetime | None = None
|
|
28
32
|
|
|
29
33
|
|
|
30
34
|
class LMCacheBase(lf.LMCache):
|
|
31
|
-
"""
|
|
35
|
+
"""Base class for language model cache implementations.
|
|
36
|
+
|
|
37
|
+
`LMCacheBase` provides the core logic for a key-value based cache,
|
|
38
|
+
handling key generation, expiration (TTL), and statistics tracking.
|
|
39
|
+
Subclasses must implement the abstract methods `_get`, `_put`, and `_delete`
|
|
40
|
+
to provide the specific storage mechanism (e.g., in-memory, file-based).
|
|
41
|
+
|
|
42
|
+
**Key Features:**
|
|
43
|
+
|
|
44
|
+
* **Customizable Keying**: Allows specifying a custom function to generate
|
|
45
|
+
cache keys based on the language model, prompt, and seed. If not provided,
|
|
46
|
+
a default key based on prompt text, sampling options, and seed is used.
|
|
47
|
+
* **Time-to-Live (TTL)**: Supports setting an expiration time for cache
|
|
48
|
+
entries, after which they are considered invalid and removed upon access.
|
|
49
|
+
* **Cache Statistics**: Tracks metrics like hits, misses, updates,
|
|
50
|
+
deletions, and expired hits through the `stats` property.
|
|
51
|
+
"""
|
|
32
52
|
|
|
33
53
|
key: Annotated[
|
|
34
54
|
Callable[[lf.LanguageModel, lf.Message, int], Any] | None,
|
|
@@ -121,4 +141,6 @@ class LMCacheBase(lf.LMCache):
|
|
|
121
141
|
|
|
122
142
|
def default_key(lm: lf.LanguageModel, prompt: lf.Message, seed: int) -> Any:
|
|
123
143
|
"""Default key for LM cache."""
|
|
124
|
-
|
|
144
|
+
# prompt text already contains the modality id for referenced modality
|
|
145
|
+
# objects, so no need to include them in the key.
|
|
146
|
+
return (prompt.text, lm.sampling_options.cache_key(), seed)
|