langfun 0.1.2.dev202509120804__py3-none-any.whl → 0.1.2.dev202512150805__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langfun/__init__.py +1 -1
- langfun/core/__init__.py +7 -1
- langfun/core/agentic/__init__.py +8 -1
- langfun/core/agentic/action.py +740 -112
- langfun/core/agentic/action_eval.py +9 -2
- langfun/core/agentic/action_test.py +189 -24
- langfun/core/async_support.py +104 -5
- langfun/core/async_support_test.py +23 -0
- langfun/core/coding/python/correction.py +19 -9
- langfun/core/coding/python/execution.py +14 -12
- langfun/core/coding/python/generation.py +21 -16
- langfun/core/coding/python/sandboxing.py +23 -3
- langfun/core/component.py +42 -3
- langfun/core/concurrent.py +70 -6
- langfun/core/concurrent_test.py +9 -2
- langfun/core/console.py +1 -1
- langfun/core/data/conversion/anthropic.py +12 -3
- langfun/core/data/conversion/anthropic_test.py +8 -6
- langfun/core/data/conversion/gemini.py +11 -2
- langfun/core/data/conversion/gemini_test.py +48 -9
- langfun/core/data/conversion/openai.py +145 -31
- langfun/core/data/conversion/openai_test.py +161 -17
- langfun/core/eval/base.py +48 -44
- langfun/core/eval/base_test.py +5 -5
- langfun/core/eval/matching.py +5 -2
- langfun/core/eval/patching.py +3 -3
- langfun/core/eval/scoring.py +4 -3
- langfun/core/eval/v2/__init__.py +3 -0
- langfun/core/eval/v2/checkpointing.py +148 -46
- langfun/core/eval/v2/checkpointing_test.py +9 -2
- langfun/core/eval/v2/config_saver.py +37 -0
- langfun/core/eval/v2/config_saver_test.py +36 -0
- langfun/core/eval/v2/eval_test_helper.py +104 -3
- langfun/core/eval/v2/evaluation.py +102 -19
- langfun/core/eval/v2/evaluation_test.py +9 -3
- langfun/core/eval/v2/example.py +50 -40
- langfun/core/eval/v2/example_test.py +16 -8
- langfun/core/eval/v2/experiment.py +95 -20
- langfun/core/eval/v2/experiment_test.py +19 -0
- langfun/core/eval/v2/metric_values.py +31 -3
- langfun/core/eval/v2/metric_values_test.py +32 -0
- langfun/core/eval/v2/metrics.py +157 -44
- langfun/core/eval/v2/metrics_test.py +39 -18
- langfun/core/eval/v2/progress.py +31 -1
- langfun/core/eval/v2/progress_test.py +27 -0
- langfun/core/eval/v2/progress_tracking.py +13 -5
- langfun/core/eval/v2/progress_tracking_test.py +9 -1
- langfun/core/eval/v2/reporting.py +88 -71
- langfun/core/eval/v2/reporting_test.py +24 -6
- langfun/core/eval/v2/runners/__init__.py +30 -0
- langfun/core/eval/v2/{runners.py → runners/base.py} +73 -180
- langfun/core/eval/v2/runners/beam.py +354 -0
- langfun/core/eval/v2/runners/beam_test.py +153 -0
- langfun/core/eval/v2/runners/ckpt_monitor.py +350 -0
- langfun/core/eval/v2/runners/ckpt_monitor_test.py +213 -0
- langfun/core/eval/v2/runners/debug.py +40 -0
- langfun/core/eval/v2/runners/debug_test.py +76 -0
- langfun/core/eval/v2/runners/parallel.py +243 -0
- langfun/core/eval/v2/runners/parallel_test.py +182 -0
- langfun/core/eval/v2/runners/sequential.py +47 -0
- langfun/core/eval/v2/runners/sequential_test.py +169 -0
- langfun/core/langfunc.py +45 -130
- langfun/core/langfunc_test.py +7 -5
- langfun/core/language_model.py +189 -36
- langfun/core/language_model_test.py +54 -3
- langfun/core/llms/__init__.py +14 -1
- langfun/core/llms/anthropic.py +157 -2
- langfun/core/llms/azure_openai.py +29 -17
- langfun/core/llms/cache/base.py +25 -3
- langfun/core/llms/cache/in_memory.py +48 -7
- langfun/core/llms/cache/in_memory_test.py +14 -4
- langfun/core/llms/compositional.py +25 -1
- langfun/core/llms/deepseek.py +30 -2
- langfun/core/llms/fake.py +32 -1
- langfun/core/llms/gemini.py +90 -12
- langfun/core/llms/gemini_test.py +110 -0
- langfun/core/llms/google_genai.py +52 -1
- langfun/core/llms/groq.py +28 -3
- langfun/core/llms/llama_cpp.py +23 -4
- langfun/core/llms/openai.py +120 -3
- langfun/core/llms/openai_compatible.py +148 -27
- langfun/core/llms/openai_compatible_test.py +207 -20
- langfun/core/llms/openai_test.py +0 -2
- langfun/core/llms/rest.py +16 -1
- langfun/core/llms/vertexai.py +78 -8
- langfun/core/logging.py +1 -1
- langfun/core/mcp/__init__.py +10 -0
- langfun/core/mcp/client.py +177 -0
- langfun/core/mcp/client_test.py +71 -0
- langfun/core/mcp/session.py +241 -0
- langfun/core/mcp/session_test.py +54 -0
- langfun/core/mcp/testing/simple_mcp_client.py +33 -0
- langfun/core/mcp/testing/simple_mcp_server.py +33 -0
- langfun/core/mcp/tool.py +254 -0
- langfun/core/mcp/tool_test.py +197 -0
- langfun/core/memory.py +1 -0
- langfun/core/message.py +160 -55
- langfun/core/message_test.py +65 -81
- langfun/core/modalities/__init__.py +8 -0
- langfun/core/modalities/audio.py +21 -1
- langfun/core/modalities/image.py +73 -3
- langfun/core/modalities/image_test.py +116 -0
- langfun/core/modalities/mime.py +78 -4
- langfun/core/modalities/mime_test.py +59 -0
- langfun/core/modalities/pdf.py +19 -1
- langfun/core/modalities/video.py +21 -1
- langfun/core/modality.py +167 -29
- langfun/core/modality_test.py +42 -12
- langfun/core/natural_language.py +1 -1
- langfun/core/sampling.py +4 -4
- langfun/core/sampling_test.py +20 -4
- langfun/core/structured/__init__.py +2 -24
- langfun/core/structured/completion.py +34 -44
- langfun/core/structured/completion_test.py +23 -43
- langfun/core/structured/description.py +54 -50
- langfun/core/structured/function_generation.py +29 -12
- langfun/core/structured/mapping.py +81 -37
- langfun/core/structured/parsing.py +95 -79
- langfun/core/structured/parsing_test.py +0 -3
- langfun/core/structured/querying.py +230 -154
- langfun/core/structured/querying_test.py +69 -33
- langfun/core/structured/schema/__init__.py +49 -0
- langfun/core/structured/schema/base.py +664 -0
- langfun/core/structured/schema/base_test.py +531 -0
- langfun/core/structured/schema/json.py +174 -0
- langfun/core/structured/schema/json_test.py +121 -0
- langfun/core/structured/schema/python.py +316 -0
- langfun/core/structured/schema/python_test.py +410 -0
- langfun/core/structured/schema_generation.py +33 -14
- langfun/core/structured/scoring.py +47 -36
- langfun/core/structured/tokenization.py +26 -11
- langfun/core/subscription.py +2 -2
- langfun/core/template.py +175 -50
- langfun/core/template_test.py +123 -17
- langfun/env/__init__.py +43 -0
- langfun/env/base_environment.py +827 -0
- langfun/env/base_environment_test.py +473 -0
- langfun/env/base_feature.py +304 -0
- langfun/env/base_feature_test.py +228 -0
- langfun/env/base_sandbox.py +842 -0
- langfun/env/base_sandbox_test.py +1235 -0
- langfun/env/event_handlers/__init__.py +14 -0
- langfun/env/event_handlers/chain.py +233 -0
- langfun/env/event_handlers/chain_test.py +253 -0
- langfun/env/event_handlers/event_logger.py +472 -0
- langfun/env/event_handlers/event_logger_test.py +304 -0
- langfun/env/event_handlers/metric_writer.py +726 -0
- langfun/env/event_handlers/metric_writer_test.py +214 -0
- langfun/env/interface.py +1640 -0
- langfun/env/interface_test.py +153 -0
- langfun/env/load_balancers.py +59 -0
- langfun/env/load_balancers_test.py +141 -0
- langfun/env/test_utils.py +507 -0
- {langfun-0.1.2.dev202509120804.dist-info → langfun-0.1.2.dev202512150805.dist-info}/METADATA +7 -3
- langfun-0.1.2.dev202512150805.dist-info/RECORD +217 -0
- langfun/core/eval/v2/runners_test.py +0 -343
- langfun/core/structured/schema.py +0 -987
- langfun/core/structured/schema_test.py +0 -982
- langfun-0.1.2.dev202509120804.dist-info/RECORD +0 -172
- {langfun-0.1.2.dev202509120804.dist-info → langfun-0.1.2.dev202512150805.dist-info}/WHEEL +0 -0
- {langfun-0.1.2.dev202509120804.dist-info → langfun-0.1.2.dev202512150805.dist-info}/licenses/LICENSE +0 -0
- {langfun-0.1.2.dev202509120804.dist-info → langfun-0.1.2.dev202512150805.dist-info}/top_level.txt +0 -0
langfun/core/langfunc.py
CHANGED
|
@@ -32,146 +32,43 @@ _TLS_LFUN_CALL_STACK = '_langfunc_callstack'
|
|
|
32
32
|
# NOTE(daiyip): Only the template string belongs to the positional arguments,
|
|
33
33
|
# all others are keyword-only for clarity.
|
|
34
34
|
@pg.use_init_args(['template_str'])
|
|
35
|
-
class LangFunc(
|
|
36
|
-
|
|
37
|
-
):
|
|
38
|
-
r"""Base class for natural-language driven component.
|
|
39
|
-
|
|
40
|
-
``LangFunc`` is a language-driven component that enables users to
|
|
41
|
-
seamlessly interact with Language Models (LLMs) using a blend of natural
|
|
42
|
-
language and code. It empowers users to easily modularize prompt/execution
|
|
43
|
-
logics, compose them, and simplify the creation of Language Model (LLM)-based
|
|
44
|
-
components and applications.
|
|
45
|
-
|
|
46
|
-
LangFunc can be conceptualized as a string template with embeddable code,
|
|
47
|
-
but it distinguishes itself from traditional template systems in four key
|
|
48
|
-
ways.
|
|
49
|
-
|
|
50
|
-
Firstly, it enables easy modularization of templates along with the required
|
|
51
|
-
values with OO principles, providing a reusable way for LLM-based content
|
|
52
|
-
generation. For example:
|
|
53
|
-
|
|
54
|
-
```
|
|
55
|
-
class FewshotExamples(lf.LangFunc):
|
|
56
|
-
'''Base for fewshot prompt.
|
|
57
|
-
|
|
58
|
-
{% for example in examples %}
|
|
59
|
-
{{ example }}
|
|
60
|
-
{% endfor %}
|
|
61
|
-
'''
|
|
62
|
-
|
|
63
|
-
# Usage 1: __init__ time binding.
|
|
64
|
-
assert FewshotPrompt(examples=['foo', 'bar'])() == 'foo\nbar'
|
|
65
|
-
|
|
66
|
-
# Usage 2: __call__ time binding.
|
|
67
|
-
assert FewshotPrompt()(examples=['foo', 'bar']) == 'foo\nbar'
|
|
68
|
-
|
|
69
|
-
class ToolDescription(lf.LangFunc):
|
|
70
|
-
'''Tool descriptions.
|
|
71
|
-
|
|
72
|
-
{% for tool in tools %}
|
|
73
|
-
{{ tool.description }}
|
|
74
|
-
{% endfor %}
|
|
75
|
-
'''
|
|
76
|
-
# We want to constrain tools to be a list of `Tool` objects.
|
|
77
|
-
tools: list[Tool]
|
|
78
|
-
|
|
79
|
-
# Raises: runtime type checking will fail on [1, 2, 3].
|
|
80
|
-
ToolDescription(tools=[1, 2, 3])
|
|
81
|
-
```
|
|
82
|
-
|
|
83
|
-
Secondly, it has the capability to compose multiple LangFuncs together,
|
|
84
|
-
enabling the accomplishment of complex language tasks with maximum reuse.
|
|
85
|
-
It allows users to provide program inputs to all the LangFuncs within a
|
|
86
|
-
composition at the top level, significantly simplifying the process of
|
|
87
|
-
providing context for users. For example:
|
|
88
|
-
|
|
89
|
-
```
|
|
90
|
-
class ReAct(lf.LangFunc):
|
|
91
|
-
'''ReAct prompt for tool-use.
|
|
92
|
-
|
|
93
|
-
{{ preamble }}
|
|
94
|
-
{{ tool_description }}
|
|
95
|
-
{{ tool_examples }}
|
|
96
|
-
{{ user_input }}
|
|
97
|
-
'''
|
|
98
|
-
# Default preamble, which could be overriden from subclass
|
|
99
|
-
# or parsed from the `__init__` argument.
|
|
100
|
-
preamble = 'Please help me on my task based on the following tools.',
|
|
101
|
-
|
|
102
|
-
react = ReAct(
|
|
103
|
-
tool_description=ToolDescription()
|
|
104
|
-
tool_examples=FewshotExamples(),
|
|
105
|
-
# Partially bind `tools` and `examples`.
|
|
106
|
-
tools=my_tools,
|
|
107
|
-
examples=[t.examples for t in my_tools]
|
|
108
|
-
)
|
|
109
|
-
|
|
110
|
-
# Late bind `user_input` at __call__ time.
|
|
111
|
-
react(user_input='Help me get a lunch to go, veggie please.' )
|
|
112
|
-
```
|
|
113
|
-
|
|
114
|
-
Thirdly, it allows the flexibility to encapsulate complex compositions to
|
|
115
|
-
reusable classes and modify them. For example:
|
|
116
|
-
|
|
117
|
-
```
|
|
118
|
-
# The compound decorator converts a function into a LangFunc.
|
|
119
|
-
@lf.compound
|
|
120
|
-
def react_with_tools(preamble, tools: list[Tool]):
|
|
121
|
-
return ReAct(
|
|
122
|
-
preamble=preamble,
|
|
123
|
-
tool_description=ToolDescription()
|
|
124
|
-
tool_examples=FewshotExamples(),
|
|
125
|
-
# Partially bind `tools` and `examples`.
|
|
126
|
-
tools=my_tools,
|
|
127
|
-
examples=[t.examples for t in my_tools]
|
|
128
|
-
)
|
|
35
|
+
class LangFunc(template_lib.Template):
|
|
36
|
+
r"""Base class for Language-based functions.
|
|
129
37
|
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
38
|
+
LangFunc represents a function powered by a language model. It is a subclass
|
|
39
|
+
of `lf.Template` and can be thought of as a `lf.Template` augmented with an LM
|
|
40
|
+
and an output transformation. Calling a `lf.LangFunc` is equivalent to calling
|
|
41
|
+
the LM with the rendered prompt and transforming the output.
|
|
133
42
|
|
|
134
|
-
|
|
135
|
-
'''
|
|
43
|
+
LangFunc can be directly constructed and used.
|
|
136
44
|
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
prompt=react_with_tools(
|
|
140
|
-
preamble=(
|
|
141
|
-
f'Please help me solve my problem using tools. '
|
|
142
|
-
f'Current time is {{datetime.datetime.now()}}'),
|
|
143
|
-
tools=my_tools))
|
|
45
|
+
```python
|
|
46
|
+
import langfun as lf
|
|
144
47
|
|
|
145
|
-
|
|
146
|
-
|
|
48
|
+
func = lf.LangFunc("Hello, {{name}}!")
|
|
49
|
+
print(func(name="Gemini", lm=lf.llms.Gemini25Flash()))
|
|
50
|
+
# Output: Hello, how are you today?
|
|
51
|
+
```
|
|
147
52
|
|
|
148
|
-
|
|
149
|
-
it could be manipulated programmatically, turned into a space for data
|
|
150
|
-
sampling, or even tuned by AutoML. For example:
|
|
53
|
+
Or it can be subclassed:
|
|
151
54
|
|
|
152
|
-
|
|
153
|
-
|
|
55
|
+
```python
|
|
56
|
+
import langfun as lf
|
|
154
57
|
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
'Help me solve my problem using the following tools:',
|
|
158
|
-
'Help me with the tools below:',
|
|
159
|
-
...
|
|
160
|
-
])
|
|
161
|
-
# Choose any two of the tools for generating data.
|
|
162
|
-
tools=pg.manyof(2, [
|
|
163
|
-
google_search(...),
|
|
164
|
-
doordash(...),
|
|
165
|
-
...
|
|
166
|
-
])
|
|
58
|
+
class Compute(lf.LangFunc):
|
|
59
|
+
'''Compute a simple arithmetic expression.
|
|
167
60
|
|
|
168
|
-
|
|
169
|
-
|
|
61
|
+
{{expression}} = ?
|
|
62
|
+
'''
|
|
63
|
+
expression: str
|
|
170
64
|
|
|
171
|
-
|
|
65
|
+
def transform_output(self, lm_output: lf.Message) -> lf.Message:
|
|
66
|
+
lm_output.metadata.result = float(lm_output.text)
|
|
67
|
+
return lm_output
|
|
172
68
|
|
|
173
|
-
|
|
174
|
-
|
|
69
|
+
r = Compute(expression="1 + 1")(lm=lf.llms.Gemini25Flash())
|
|
70
|
+
print(r.result)
|
|
71
|
+
# Output: 2.0
|
|
175
72
|
|
|
176
73
|
Final note: always include these capitalized words if you don't want to treat
|
|
177
74
|
the docstr as the template str: THIS IS NOT A TEMPLATE. So as a result, this
|
|
@@ -305,6 +202,24 @@ class LangFunc(
|
|
|
305
202
|
message_cls: Type[message_lib.Message] = message_lib.UserMessage,
|
|
306
203
|
**kwargs,
|
|
307
204
|
) -> message_lib.Message:
|
|
205
|
+
"""Renders the template and transforms it as LM input message.
|
|
206
|
+
|
|
207
|
+
Args:
|
|
208
|
+
allow_partial: If True, allows partial rendering, which leaves unresolved
|
|
209
|
+
variables in place in the output text. Otherwise, raises error when
|
|
210
|
+
there are unresolved variables.
|
|
211
|
+
implicit: If True, reuse the rendering output if a parent `lf.Template`
|
|
212
|
+
is rendering current `lf.Template` multiple times. This is important
|
|
213
|
+
for making sure all references to the same `lf.Template` within a single
|
|
214
|
+
top-level rendering would return the same result. If False, every call
|
|
215
|
+
to `render` will trigger the actual rendering process.
|
|
216
|
+
message_cls: The message class used for creating the return value.
|
|
217
|
+
**kwargs: Values for template variables, which override values from
|
|
218
|
+
member attributes or context.
|
|
219
|
+
|
|
220
|
+
Returns:
|
|
221
|
+
A Message object containing the rendered result.
|
|
222
|
+
"""
|
|
308
223
|
lm_input = super().render(
|
|
309
224
|
allow_partial=allow_partial,
|
|
310
225
|
implicit=implicit,
|
langfun/core/langfunc_test.py
CHANGED
|
@@ -82,7 +82,7 @@ class LangFuncCallTest(unittest.TestCase):
|
|
|
82
82
|
|
|
83
83
|
i = l.render()
|
|
84
84
|
self.assertEqual(i, 'Hello')
|
|
85
|
-
self.assertEqual(i, message.UserMessage('Hello'))
|
|
85
|
+
self.assertEqual(i, message.UserMessage('Hello', __template_input__={}))
|
|
86
86
|
self.assertEqual(i.tags, ['rendered'])
|
|
87
87
|
|
|
88
88
|
r = l()
|
|
@@ -96,7 +96,9 @@ class LangFuncCallTest(unittest.TestCase):
|
|
|
96
96
|
self.assertEqual(r.tags, ['lm-response', 'lm-output'])
|
|
97
97
|
self.assertEqual(
|
|
98
98
|
r.source,
|
|
99
|
-
message.UserMessage(
|
|
99
|
+
message.UserMessage(
|
|
100
|
+
'Hello', metadata=dict(cache_seed=0, __template_input__={})
|
|
101
|
+
)
|
|
100
102
|
)
|
|
101
103
|
self.assertEqual(r.source.tags, ['rendered', 'lm-input'])
|
|
102
104
|
|
|
@@ -107,9 +109,9 @@ class LangFuncCallTest(unittest.TestCase):
|
|
|
107
109
|
' lm=ExcitedEchoer(sampling_options=LMSamplingOptions(temperature=None,'
|
|
108
110
|
' max_tokens=None, n=1, top_k=40, top_p=None, stop=None,'
|
|
109
111
|
' random_seed=None, logprobs=False, top_logprobs=None,'
|
|
110
|
-
' max_thinking_tokens=None,
|
|
111
|
-
' max_concurrency=None, timeout=120.0,
|
|
112
|
-
' retry_interval=(5, 60), exponential_backoff=True,'
|
|
112
|
+
' max_thinking_tokens=None, thinking_level=None, reasoning_effort=None,'
|
|
113
|
+
' extras={}), cache=None, max_concurrency=None, timeout=120.0,'
|
|
114
|
+
' max_attempts=5, retry_interval=(5, 60), exponential_backoff=True,'
|
|
113
115
|
' max_retry_interval=300, debug=False))',
|
|
114
116
|
)
|
|
115
117
|
|
langfun/core/language_model.py
CHANGED
|
@@ -53,6 +53,10 @@ class RetryableLMError(LMError):
|
|
|
53
53
|
"""Base class for LLM errors that can be solved by retrying."""
|
|
54
54
|
|
|
55
55
|
|
|
56
|
+
class EmptyGenerationError(RetryableLMError):
|
|
57
|
+
"""Error for empty generaition."""
|
|
58
|
+
|
|
59
|
+
|
|
56
60
|
class RateLimitError(RetryableLMError):
|
|
57
61
|
"""Error for rate limit reached."""
|
|
58
62
|
|
|
@@ -478,7 +482,7 @@ class UsageNotAvailable(LMSamplingUsage):
|
|
|
478
482
|
|
|
479
483
|
|
|
480
484
|
class LMSamplingResult(pg.Object):
|
|
481
|
-
"""
|
|
485
|
+
"""The result from a language model sampling."""
|
|
482
486
|
|
|
483
487
|
samples: Annotated[
|
|
484
488
|
list[LMSample],
|
|
@@ -575,6 +579,14 @@ class LMSamplingOptions(component.Component):
|
|
|
575
579
|
int | None, 'Number of max thinking tokens.'
|
|
576
580
|
] = None
|
|
577
581
|
|
|
582
|
+
thinking_level: Annotated[
|
|
583
|
+
Literal['low', 'high'] | None,
|
|
584
|
+
(
|
|
585
|
+
'Thinking level for Gemini models. High is for complex tasks, '
|
|
586
|
+
'while low is for faster responses.'
|
|
587
|
+
),
|
|
588
|
+
] = None
|
|
589
|
+
|
|
578
590
|
reasoning_effort: Annotated[
|
|
579
591
|
Literal['low', 'medium', 'high'] | None,
|
|
580
592
|
(
|
|
@@ -584,6 +596,15 @@ class LMSamplingOptions(component.Component):
|
|
|
584
596
|
),
|
|
585
597
|
] = None
|
|
586
598
|
|
|
599
|
+
extras: Annotated[
|
|
600
|
+
dict[str, Any],
|
|
601
|
+
(
|
|
602
|
+
'Extra arguments (e.g. configuration for tool calls) to pass to '
|
|
603
|
+
'the model. This is model-specific, please check model '
|
|
604
|
+
'implementation to see how to use this.'
|
|
605
|
+
),
|
|
606
|
+
] = {}
|
|
607
|
+
|
|
587
608
|
def cache_key(self) -> tuple[Any, ...]:
|
|
588
609
|
"""Returns a tuple of current values as cache key."""
|
|
589
610
|
return (
|
|
@@ -672,13 +693,91 @@ class LMDebugMode(enum.IntFlag):
|
|
|
672
693
|
|
|
673
694
|
|
|
674
695
|
class LanguageModel(component.Component):
|
|
675
|
-
"""Interface
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
696
|
+
"""Interface for language model.
|
|
697
|
+
|
|
698
|
+
`lf.LanguageModel` is the cornerstone of Langfun, providing a consistent
|
|
699
|
+
interface for interacting with various language models, such as those from
|
|
700
|
+
Google, OpenAI, Anthropic, and more. It abstracts away provider-specific
|
|
701
|
+
details, allowing users to switch between models seamlessly.
|
|
702
|
+
|
|
703
|
+
All language models in Langfun can be accessed via `lf.llms`. For example,
|
|
704
|
+
`lf.llms.Gpt4()` creates an instance for OpenAI's GPT-4, and
|
|
705
|
+
`lf.llms.GeminiPro()` creates an instance for Google's Gemini Pro.
|
|
706
|
+
|
|
707
|
+
**Key Features:**
|
|
708
|
+
|
|
709
|
+
* **Unified API**: Provides `sample`, `score`, and `tokenize` methods
|
|
710
|
+
across all supported models.
|
|
711
|
+
* **Sampling**: The `__call__` method and `sample` method allow generating
|
|
712
|
+
text completions or chat responses.
|
|
713
|
+
* **Scoring**: The `score` method computes the likelihood of completions
|
|
714
|
+
given a prompt.
|
|
715
|
+
* **Tokenization**: The `tokenize` method breaks text into tokens
|
|
716
|
+
according to the model's tokenizer.
|
|
717
|
+
* **Caching**: Built-in support for caching LLM requests to save cost and
|
|
718
|
+
time via the `cache` attribute.
|
|
719
|
+
* **Concurrency**: Manages concurrency to respect API rate limits via
|
|
720
|
+
`max_concurrency`.
|
|
721
|
+
* **Retries**: Automatic retries with exponential backoff for transient
|
|
722
|
+
errors via `max_attempts` and `retry_interval`.
|
|
723
|
+
|
|
724
|
+
**1. Creating a Language Model:**
|
|
725
|
+
You can create a language model by instantiating its class or by using
|
|
726
|
+
`lf.LanguageModel.get`:
|
|
727
|
+
|
|
728
|
+
```python
|
|
729
|
+
# Direct instantiation
|
|
730
|
+
gpt4 = lf.llms.Gpt4()
|
|
731
|
+
gemini = lf.llms.GeminiPro()
|
|
732
|
+
|
|
733
|
+
# Creation via lf.LanguageModel.get()
|
|
734
|
+
gpt4 = lf.LanguageModel.get('gpt-4')
|
|
735
|
+
```
|
|
736
|
+
|
|
737
|
+
**2. Customizing Sampling Options:**
|
|
738
|
+
Sampling options like `temperature`, `max_tokens`, etc., can be customized
|
|
739
|
+
at model creation, or overridden at call time or via `lf.context`.
|
|
740
|
+
|
|
741
|
+
```python
|
|
742
|
+
# Set temperature to 0 at model creation
|
|
743
|
+
lm = lf.llms.Gpt4(temperature=0.0)
|
|
744
|
+
|
|
745
|
+
# Override temperature to 0.5 for a single call
|
|
746
|
+
response = lm('1 + 1 =', temperature=0.5)
|
|
747
|
+
|
|
748
|
+
# Override temperature to 1.0 using lf.context
|
|
749
|
+
with lf.context(temperature=1.0):
|
|
750
|
+
response = lm('1 + 1 =')
|
|
751
|
+
```
|
|
752
|
+
|
|
753
|
+
**3. Sampling:**
|
|
754
|
+
Use `lm()`, `lm.sample()`, or `lf.query()` to generate text:
|
|
755
|
+
|
|
756
|
+
```python
|
|
757
|
+
lm = lf.llms.Gpt4()
|
|
758
|
+
response = lm('1 + 1 =')
|
|
759
|
+
print(response.text)
|
|
760
|
+
# Output: 2
|
|
761
|
+
```
|
|
762
|
+
|
|
763
|
+
**4. Scoring:**
|
|
764
|
+
Use `lm.score()` to score completions:
|
|
765
|
+
|
|
766
|
+
```python
|
|
767
|
+
lm = lf.llms.Gpt4()
|
|
768
|
+
results = lm.score('Weather in SF is', completions=['sunny', 'cloudy'])
|
|
769
|
+
print(results[0].score)
|
|
770
|
+
# Output: -1.0
|
|
771
|
+
```
|
|
772
|
+
|
|
773
|
+
**5. Tokenization:**
|
|
774
|
+
Use `lm.tokenize()` to get tokens:
|
|
775
|
+
```python
|
|
776
|
+
lm = lf.llms.Gpt4()
|
|
777
|
+
tokens = lm.tokenize('hello world')
|
|
778
|
+
print(tokens)
|
|
779
|
+
# Output: [('hello', 15339), (' world', 1917)]
|
|
780
|
+
```
|
|
682
781
|
"""
|
|
683
782
|
|
|
684
783
|
sampling_options: LMSamplingOptions = LMSamplingOptions()
|
|
@@ -989,10 +1088,32 @@ class LanguageModel(component.Component):
|
|
|
989
1088
|
prompts = [message_lib.UserMessage.from_value(p) for p in prompts]
|
|
990
1089
|
|
|
991
1090
|
with component.context(override_attrs=True, **kwargs):
|
|
992
|
-
|
|
993
|
-
|
|
994
|
-
|
|
995
|
-
|
|
1091
|
+
|
|
1092
|
+
def _sample_with_retry():
|
|
1093
|
+
if self.cache is None:
|
|
1094
|
+
results = self._sample(prompts)
|
|
1095
|
+
else:
|
|
1096
|
+
results = self._sample_with_cache_lookup(prompts, cache_seed)
|
|
1097
|
+
|
|
1098
|
+
for i, result in enumerate(results):
|
|
1099
|
+
for sample in result.samples:
|
|
1100
|
+
if not sample.response.text:
|
|
1101
|
+
if self.cache is not None:
|
|
1102
|
+
self.cache.delete(self, prompts[i], seed=cache_seed)
|
|
1103
|
+
raise EmptyGenerationError(
|
|
1104
|
+
f'Empty generation encountered from model {self.model_id}.'
|
|
1105
|
+
)
|
|
1106
|
+
return results
|
|
1107
|
+
|
|
1108
|
+
retry_fn = concurrent.with_retry(
|
|
1109
|
+
_sample_with_retry,
|
|
1110
|
+
retry_on_errors=EmptyGenerationError,
|
|
1111
|
+
max_attempts=self.max_attempts,
|
|
1112
|
+
retry_interval=self.retry_interval,
|
|
1113
|
+
exponential_backoff=self.exponential_backoff,
|
|
1114
|
+
max_retry_interval=self.max_retry_interval,
|
|
1115
|
+
)
|
|
1116
|
+
results = retry_fn()
|
|
996
1117
|
|
|
997
1118
|
for prompt, result in zip(prompts, results):
|
|
998
1119
|
|
|
@@ -1001,7 +1122,6 @@ class LanguageModel(component.Component):
|
|
|
1001
1122
|
|
|
1002
1123
|
for sample in result.samples:
|
|
1003
1124
|
# Update metadata for response message.
|
|
1004
|
-
|
|
1005
1125
|
response = sample.response
|
|
1006
1126
|
response.metadata.score = sample.score
|
|
1007
1127
|
response.metadata.logprobs = sample.logprobs
|
|
@@ -1159,21 +1279,35 @@ class LanguageModel(component.Component):
|
|
|
1159
1279
|
) -> message_lib.Message:
|
|
1160
1280
|
"""Returns the first candidate."""
|
|
1161
1281
|
prompt = message_lib.UserMessage.from_value(prompt)
|
|
1162
|
-
|
|
1163
|
-
|
|
1164
|
-
|
|
1165
|
-
|
|
1166
|
-
|
|
1167
|
-
|
|
1168
|
-
|
|
1169
|
-
|
|
1170
|
-
|
|
1171
|
-
|
|
1172
|
-
|
|
1173
|
-
|
|
1174
|
-
|
|
1175
|
-
|
|
1176
|
-
|
|
1282
|
+
start_time = time.time()
|
|
1283
|
+
error_tag = ''
|
|
1284
|
+
try:
|
|
1285
|
+
with component.context(override_attrs=True, **kwargs):
|
|
1286
|
+
sampling_options = self.sampling_options
|
|
1287
|
+
if sampling_options.n != 1:
|
|
1288
|
+
sampling_options = sampling_options.clone(override=dict(n=1))
|
|
1289
|
+
|
|
1290
|
+
call_counter = self._call_counter
|
|
1291
|
+
self._call_counter += 1
|
|
1292
|
+
request_start = time.time()
|
|
1293
|
+
result = self.sample(
|
|
1294
|
+
[prompt], sampling_options=sampling_options, cache_seed=cache_seed
|
|
1295
|
+
)[0]
|
|
1296
|
+
elapse = time.time() - request_start
|
|
1297
|
+
response = result.samples[0].response
|
|
1298
|
+
self._debug(prompt, response, call_counter, result.usage, elapse)
|
|
1299
|
+
return response
|
|
1300
|
+
except BaseException as e:
|
|
1301
|
+
error_tag = pg.ErrorInfo.from_exception(e).tag
|
|
1302
|
+
raise e
|
|
1303
|
+
finally:
|
|
1304
|
+
_METRICS.language_model_calls.increment(
|
|
1305
|
+
model=self.model_id, error=error_tag
|
|
1306
|
+
)
|
|
1307
|
+
_METRICS.language_model_call_duration_ms.record(
|
|
1308
|
+
int((time.time() - start_time) * 1000),
|
|
1309
|
+
model=self.model_id, error=error_tag,
|
|
1310
|
+
)
|
|
1177
1311
|
|
|
1178
1312
|
def _debug(
|
|
1179
1313
|
self,
|
|
@@ -1230,11 +1364,11 @@ class LanguageModel(component.Component):
|
|
|
1230
1364
|
title=f'\n[{call_counter}] PROMPT SENT TO LM{title_suffix}:',
|
|
1231
1365
|
color='green',
|
|
1232
1366
|
)
|
|
1233
|
-
|
|
1234
|
-
if referred_modalities:
|
|
1367
|
+
if prompt.referred_modalities:
|
|
1235
1368
|
console.write(
|
|
1236
1369
|
pg.object_utils.kvlist_str(
|
|
1237
|
-
[(k, repr(v), None)
|
|
1370
|
+
[(k, repr(v), None)
|
|
1371
|
+
for k, v in prompt.referred_modalities.items()]
|
|
1238
1372
|
),
|
|
1239
1373
|
title=f'\n[{call_counter}] MODALITY OBJECTS SENT TO LM:',
|
|
1240
1374
|
color='green',
|
|
@@ -1320,9 +1454,9 @@ class LanguageModel(component.Component):
|
|
|
1320
1454
|
color='green',
|
|
1321
1455
|
)
|
|
1322
1456
|
if isinstance(prompt, list):
|
|
1323
|
-
referred_modalities_lst = [p.referred_modalities
|
|
1457
|
+
referred_modalities_lst = [p.referred_modalities for p in prompt]
|
|
1324
1458
|
else:
|
|
1325
|
-
referred_modalities_lst = [prompt.referred_modalities
|
|
1459
|
+
referred_modalities_lst = [prompt.referred_modalities,]
|
|
1326
1460
|
if referred_modalities_lst:
|
|
1327
1461
|
for referred_modalities in referred_modalities_lst:
|
|
1328
1462
|
console.write(
|
|
@@ -1397,7 +1531,7 @@ class LanguageModel(component.Component):
|
|
|
1397
1531
|
title=f'\n[{call_counter}] PROMPT TO TOKENIZE:',
|
|
1398
1532
|
color='green',
|
|
1399
1533
|
)
|
|
1400
|
-
referred_modalities_lst = [prompt.referred_modalities
|
|
1534
|
+
referred_modalities_lst = [prompt.referred_modalities,]
|
|
1401
1535
|
if referred_modalities_lst:
|
|
1402
1536
|
for referred_modalities in referred_modalities_lst:
|
|
1403
1537
|
console.write(
|
|
@@ -1425,7 +1559,7 @@ class LanguageModel(component.Component):
|
|
|
1425
1559
|
max_requests_per_minute: int | None,
|
|
1426
1560
|
average_tokens_per_request: int = 250
|
|
1427
1561
|
) -> int | None:
|
|
1428
|
-
"""Estimates max concurrency
|
|
1562
|
+
"""Estimates max concurrency based on the rate limits."""
|
|
1429
1563
|
# NOTE(daiyip): max concurrency is estimated based on the rate limit.
|
|
1430
1564
|
# We assume each request has approximately 250 tokens, and each request
|
|
1431
1565
|
# takes 1 second to complete. This might not be accurate for all models.
|
|
@@ -1438,6 +1572,25 @@ class LanguageModel(component.Component):
|
|
|
1438
1572
|
return None
|
|
1439
1573
|
|
|
1440
1574
|
|
|
1575
|
+
class _Metrics:
|
|
1576
|
+
"""Metrics for Langfun."""
|
|
1577
|
+
|
|
1578
|
+
def __init__(self):
|
|
1579
|
+
self._metrics = pg.monitoring.metric_collection('/third_party/langfun')
|
|
1580
|
+
self.language_model_calls = self._metrics.get_counter(
|
|
1581
|
+
'language_model_calls',
|
|
1582
|
+
'Number of calls to the language model.',
|
|
1583
|
+
parameters={'model': str, 'error': str},
|
|
1584
|
+
)
|
|
1585
|
+
self.language_model_call_duration_ms = self._metrics.get_distribution(
|
|
1586
|
+
'language_model_call_duration_ms',
|
|
1587
|
+
'Duration of calls to the language model in milliseconds.',
|
|
1588
|
+
parameters={'model': str, 'error': str},
|
|
1589
|
+
)
|
|
1590
|
+
|
|
1591
|
+
_METRICS = _Metrics()
|
|
1592
|
+
|
|
1593
|
+
|
|
1441
1594
|
class _ConcurrencyControl:
|
|
1442
1595
|
"""Controls the max concurrent LLM calls for a given model."""
|
|
1443
1596
|
|
|
@@ -1479,7 +1632,7 @@ class _ConcurrencyControl:
|
|
|
1479
1632
|
|
|
1480
1633
|
|
|
1481
1634
|
class UsageSummary(pg.Object, pg.views.HtmlTreeView.Extension):
|
|
1482
|
-
"""Usage
|
|
1635
|
+
"""Usage summary."""
|
|
1483
1636
|
|
|
1484
1637
|
class AggregatedUsage(pg.Object):
|
|
1485
1638
|
"""Aggregated usage."""
|
|
@@ -591,6 +591,51 @@ class LanguageModelTest(unittest.TestCase):
|
|
|
591
591
|
lm = MockModel(cache=cache, top_k=1)
|
|
592
592
|
self.assertEqual(lm('a'), 'a')
|
|
593
593
|
|
|
594
|
+
def test_empty_generation_error(self):
|
|
595
|
+
class MockModelWithEmptyResponse(MockModel):
|
|
596
|
+
def _sample(self,
|
|
597
|
+
prompts: list[message_lib.Message]
|
|
598
|
+
) -> list[lm_lib.LMSamplingResult]:
|
|
599
|
+
return [lm_lib.LMSamplingResult(
|
|
600
|
+
[lm_lib.LMSample(response='')],
|
|
601
|
+
usage=lm_lib.LMSamplingUsage(100, 0, 100, 1, 1.0)
|
|
602
|
+
)]
|
|
603
|
+
lm = MockModelWithEmptyResponse(max_attempts=1, retry_interval=0)
|
|
604
|
+
with self.assertRaisesRegex(
|
|
605
|
+
concurrent.RetryError, 'Empty generation encountered'
|
|
606
|
+
):
|
|
607
|
+
lm('a')
|
|
608
|
+
|
|
609
|
+
def test_empty_generation_retry(self):
|
|
610
|
+
class MockModelWithEmptyThenValid(MockModel):
|
|
611
|
+
attempt_count: int = 0
|
|
612
|
+
|
|
613
|
+
def _sample(
|
|
614
|
+
self, prompts: list[message_lib.Message]
|
|
615
|
+
) -> list[lm_lib.LMSamplingResult]:
|
|
616
|
+
self.rebind(attempt_count=self.attempt_count + 1)
|
|
617
|
+
if self.attempt_count == 1:
|
|
618
|
+
# First attempt returns empty
|
|
619
|
+
return [
|
|
620
|
+
lm_lib.LMSamplingResult(
|
|
621
|
+
[lm_lib.LMSample(response='')],
|
|
622
|
+
usage=lm_lib.LMSamplingUsage(100, 0, 100, 1, 1.0),
|
|
623
|
+
)
|
|
624
|
+
]
|
|
625
|
+
else:
|
|
626
|
+
# Subsequent attempts return valid response
|
|
627
|
+
return [
|
|
628
|
+
lm_lib.LMSamplingResult(
|
|
629
|
+
[lm_lib.LMSample(response='valid response')],
|
|
630
|
+
usage=lm_lib.LMSamplingUsage(100, 100, 200, 1, 1.0),
|
|
631
|
+
)
|
|
632
|
+
]
|
|
633
|
+
|
|
634
|
+
lm = MockModelWithEmptyThenValid(max_attempts=3, retry_interval=0)
|
|
635
|
+
result = lm('a')
|
|
636
|
+
self.assertEqual(result.text, 'valid response')
|
|
637
|
+
self.assertEqual(lm.attempt_count, 2)
|
|
638
|
+
|
|
594
639
|
def test_estimate_max_concurrency(self):
|
|
595
640
|
self.assertIsNone(lm_lib.LanguageModel.estimate_max_concurrency(None, None))
|
|
596
641
|
self.assertEqual(
|
|
@@ -656,11 +701,17 @@ class LanguageModelTest(unittest.TestCase):
|
|
|
656
701
|
|
|
657
702
|
string_io = io.StringIO()
|
|
658
703
|
lm = MockModel(sampling_options=lm_lib.LMSamplingOptions(top_k=1))
|
|
704
|
+
image = Image()
|
|
659
705
|
with contextlib.redirect_stdout(string_io):
|
|
660
706
|
self.assertEqual(
|
|
661
|
-
lm(
|
|
662
|
-
|
|
663
|
-
|
|
707
|
+
lm(
|
|
708
|
+
message_lib.UserMessage(
|
|
709
|
+
f'hi <<[[{image.id}]]>>',
|
|
710
|
+
referred_modalities=[image],
|
|
711
|
+
),
|
|
712
|
+
debug=True
|
|
713
|
+
),
|
|
714
|
+
f'hi <<[[{image.id}]]>>'
|
|
664
715
|
)
|
|
665
716
|
|
|
666
717
|
debug_info = string_io.getvalue()
|