langfun 0.1.2.dev202511030805__py3-none-any.whl → 0.1.2.dev202511050805__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of langfun might be problematic. Click here for more details.
- langfun/core/agentic/action.py +76 -9
- langfun/core/agentic/action_eval.py +9 -2
- langfun/core/async_support.py +32 -3
- langfun/core/coding/python/correction.py +19 -9
- langfun/core/coding/python/execution.py +14 -12
- langfun/core/coding/python/generation.py +21 -16
- langfun/core/coding/python/sandboxing.py +23 -3
- langfun/core/component.py +42 -3
- langfun/core/concurrent.py +70 -6
- langfun/core/console.py +1 -1
- langfun/core/data/conversion/anthropic.py +10 -3
- langfun/core/data/conversion/gemini.py +9 -2
- langfun/core/data/conversion/openai.py +17 -7
- langfun/core/eval/base.py +46 -42
- langfun/core/eval/matching.py +5 -2
- langfun/core/eval/patching.py +3 -3
- langfun/core/eval/scoring.py +4 -3
- langfun/core/eval/v2/checkpointing.py +30 -4
- langfun/core/eval/v2/evaluation.py +59 -13
- langfun/core/eval/v2/example.py +22 -11
- langfun/core/eval/v2/experiment.py +51 -8
- langfun/core/eval/v2/metric_values.py +23 -3
- langfun/core/eval/v2/metrics.py +33 -4
- langfun/core/eval/v2/progress.py +9 -1
- langfun/core/eval/v2/reporting.py +15 -1
- langfun/core/eval/v2/runners.py +27 -7
- langfun/core/langfunc.py +45 -130
- langfun/core/language_model.py +88 -10
- langfun/core/llms/anthropic.py +27 -2
- langfun/core/llms/azure_openai.py +29 -17
- langfun/core/llms/cache/base.py +22 -2
- langfun/core/llms/cache/in_memory.py +48 -7
- langfun/core/llms/compositional.py +25 -1
- langfun/core/llms/deepseek.py +29 -1
- langfun/core/llms/fake.py +32 -1
- langfun/core/llms/gemini.py +9 -1
- langfun/core/llms/google_genai.py +29 -1
- langfun/core/llms/groq.py +27 -2
- langfun/core/llms/llama_cpp.py +22 -3
- langfun/core/llms/openai.py +29 -1
- langfun/core/llms/openai_compatible.py +18 -6
- langfun/core/llms/rest.py +12 -1
- langfun/core/llms/vertexai.py +39 -6
- langfun/core/logging.py +1 -1
- langfun/core/mcp/client.py +77 -22
- langfun/core/mcp/session.py +90 -10
- langfun/core/mcp/tool.py +83 -23
- langfun/core/memory.py +1 -0
- langfun/core/message.py +75 -11
- langfun/core/message_test.py +9 -0
- langfun/core/modalities/audio.py +21 -1
- langfun/core/modalities/image.py +19 -1
- langfun/core/modalities/mime.py +54 -4
- langfun/core/modalities/pdf.py +19 -1
- langfun/core/modalities/video.py +21 -1
- langfun/core/modality.py +66 -5
- langfun/core/natural_language.py +1 -1
- langfun/core/sampling.py +4 -4
- langfun/core/structured/completion.py +32 -37
- langfun/core/structured/description.py +54 -50
- langfun/core/structured/function_generation.py +29 -12
- langfun/core/structured/mapping.py +70 -15
- langfun/core/structured/parsing.py +90 -74
- langfun/core/structured/parsing_test.py +0 -3
- langfun/core/structured/querying.py +201 -130
- langfun/core/structured/schema.py +70 -10
- langfun/core/structured/schema_generation.py +33 -14
- langfun/core/structured/scoring.py +45 -34
- langfun/core/structured/tokenization.py +24 -9
- langfun/core/subscription.py +2 -2
- langfun/core/template.py +139 -40
- langfun/core/template_test.py +40 -0
- {langfun-0.1.2.dev202511030805.dist-info → langfun-0.1.2.dev202511050805.dist-info}/METADATA +1 -1
- {langfun-0.1.2.dev202511030805.dist-info → langfun-0.1.2.dev202511050805.dist-info}/RECORD +77 -77
- {langfun-0.1.2.dev202511030805.dist-info → langfun-0.1.2.dev202511050805.dist-info}/WHEEL +0 -0
- {langfun-0.1.2.dev202511030805.dist-info → langfun-0.1.2.dev202511050805.dist-info}/licenses/LICENSE +0 -0
- {langfun-0.1.2.dev202511030805.dist-info → langfun-0.1.2.dev202511050805.dist-info}/top_level.txt +0 -0
|
@@ -23,7 +23,7 @@ import pyglove as pg
|
|
|
23
23
|
|
|
24
24
|
@pg.use_init_args(['examples'])
|
|
25
25
|
class _DescribeStructure(mapping.Mapping):
|
|
26
|
-
"""
|
|
26
|
+
"""Describes a structured value in natural language."""
|
|
27
27
|
|
|
28
28
|
input_title = 'PYTHON_OBJECT'
|
|
29
29
|
context_title = 'CONTEXT_FOR_DESCRIPTION'
|
|
@@ -47,64 +47,68 @@ def describe(
|
|
|
47
47
|
cache_seed: int | None = 0,
|
|
48
48
|
**kwargs,
|
|
49
49
|
) -> str:
|
|
50
|
-
"""Describes a structured value
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
50
|
+
"""Describes a structured value in natural language using an LLM.
|
|
51
|
+
|
|
52
|
+
`lf.describe` takes a Python object, often a `pg.Object` instance,
|
|
53
|
+
and uses a language model to generate a human-readable, natural language
|
|
54
|
+
description of its content. It is the inverse of `lf.parse`.
|
|
55
|
+
|
|
56
|
+
**Example:**
|
|
57
|
+
|
|
58
|
+
```python
|
|
59
|
+
import langfun as lf
|
|
60
|
+
import pyglove as pg
|
|
61
|
+
|
|
62
|
+
class FlightDuration(pg.Object):
|
|
63
|
+
hours: int
|
|
64
|
+
minutes: int
|
|
65
|
+
|
|
66
|
+
class Flight(pg.Object):
|
|
67
|
+
airline: str
|
|
68
|
+
flight_number: str
|
|
69
|
+
departure_airport: str
|
|
70
|
+
arrival_airport: str
|
|
71
|
+
departure_time: str
|
|
72
|
+
arrival_time: str
|
|
73
|
+
duration: FlightDuration
|
|
74
|
+
stops: int
|
|
75
|
+
price: float
|
|
76
|
+
|
|
77
|
+
flight_info = Flight(
|
|
78
|
+
airline='United Airlines',
|
|
79
|
+
flight_number='UA2631',
|
|
80
|
+
departure_airport='SFO',
|
|
81
|
+
arrival_airport='JFK',
|
|
82
|
+
departure_time='2023-09-07T05:15:00',
|
|
83
|
+
arrival_time='2023-09-07T12:12:00',
|
|
84
|
+
duration=FlightDuration(hours=7, minutes=57),
|
|
85
|
+
stops=1,
|
|
86
|
+
price=227,
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
description = lf.describe(flight_info, lm=lf.llms.Gemini25Flash())
|
|
90
|
+
print(description)
|
|
91
|
+
# Possible output:
|
|
92
|
+
# The flight is operated by United Airlines, with the flight number UA2631,
|
|
93
|
+
# departing from SFO at 2023-09-07T05:15:00 and arriving at JFK at
|
|
94
|
+
# 2023-09-07T12:12:00. The flight duration is 7 hours and 57 minutes,
|
|
95
|
+
# with 1 stop, and costs $227.
|
|
96
|
+
```
|
|
93
97
|
|
|
94
98
|
Args:
|
|
95
99
|
value: A structured value to be mapped.
|
|
96
100
|
context: The context information for describing the structured value.
|
|
97
101
|
lm: The language model to use. If not specified, the language model from
|
|
98
102
|
`lf.context` context manager will be used.
|
|
99
|
-
examples: An optional list of fewshot examples for
|
|
100
|
-
|
|
103
|
+
examples: An optional list of fewshot examples for guiding description.
|
|
104
|
+
If None, default examples will be used.
|
|
101
105
|
cache_seed: Seed for computing cache key. The cache key is determined by a
|
|
102
106
|
tuple of (lm, prompt, cache seed). If None, cache will be disabled for
|
|
103
107
|
the query even cache is configured by the LM.
|
|
104
|
-
**kwargs: Keyword arguments passed to the `
|
|
108
|
+
**kwargs: Keyword arguments passed to the `_DescribeStructure`.
|
|
105
109
|
|
|
106
110
|
Returns:
|
|
107
|
-
|
|
111
|
+
A natural language description of the input value.
|
|
108
112
|
"""
|
|
109
113
|
return _DescribeStructure(
|
|
110
114
|
input=value,
|
|
@@ -115,10 +119,10 @@ def describe(
|
|
|
115
119
|
|
|
116
120
|
|
|
117
121
|
def default_describe_examples() -> list[mapping.MappingExample]:
|
|
118
|
-
"""
|
|
122
|
+
"""Returns default examples for `lf.describe`."""
|
|
119
123
|
|
|
120
124
|
class Country(pg.Object):
|
|
121
|
-
"""
|
|
125
|
+
"""An example dataclass for structured mapping."""
|
|
122
126
|
|
|
123
127
|
name: str
|
|
124
128
|
continents: list[
|
|
@@ -26,10 +26,10 @@ import pyglove as pg
|
|
|
26
26
|
|
|
27
27
|
|
|
28
28
|
def unittest_gen(signature, lm, num_retries=1):
|
|
29
|
-
"""Generates unit tests for a
|
|
29
|
+
"""Generates unit tests for a Python function signature."""
|
|
30
30
|
|
|
31
31
|
class UnitTest(pg.Object):
|
|
32
|
-
"""A valid unit test for a
|
|
32
|
+
"""A valid unit test for a Python function."""
|
|
33
33
|
|
|
34
34
|
input: dict[str, Any]
|
|
35
35
|
expected_output: Any
|
|
@@ -55,7 +55,7 @@ def unittest_gen(signature, lm, num_retries=1):
|
|
|
55
55
|
|
|
56
56
|
|
|
57
57
|
def unittest_with_test_cases(f, unittests):
|
|
58
|
-
"""Applies unit tests to a
|
|
58
|
+
"""Applies unit tests to a Python function to be tested."""
|
|
59
59
|
if not unittests:
|
|
60
60
|
raise ValueError(f"No unit tests provided: {unittests}")
|
|
61
61
|
|
|
@@ -87,10 +87,10 @@ def _function_gen(
|
|
|
87
87
|
] = None,
|
|
88
88
|
unittest_num_retries: int = 1,
|
|
89
89
|
):
|
|
90
|
-
"""Generates a
|
|
90
|
+
"""Generates a Python function with LLM and verifies it with unit testing."""
|
|
91
91
|
|
|
92
92
|
class PythonFunctionPrompt(template.Template):
|
|
93
|
-
r"""A template for a
|
|
93
|
+
r"""A template for a Python function generation.
|
|
94
94
|
|
|
95
95
|
Please reply to the last PYTHON_FUNCTION_SIGNATURE with a self-sufficient,
|
|
96
96
|
error-free, and efficiently coded PYTHON_FUNCTION, crafted to the standards
|
|
@@ -195,11 +195,28 @@ def function_gen(
|
|
|
195
195
|
] = None,
|
|
196
196
|
unittest_num_retries: int = 1,
|
|
197
197
|
):
|
|
198
|
-
"""
|
|
198
|
+
r"""Decorator for generating function implementations using an LLM.
|
|
199
199
|
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
200
|
+
`lf.function_gen` is a decorator that automatically generates the
|
|
201
|
+
implementation of a Python function based on its signature and docstring,
|
|
202
|
+
using the specified language model. This is useful for quickly prototyping
|
|
203
|
+
functions or generating boilerplate code.
|
|
204
|
+
|
|
205
|
+
The decorator can also automatically generate and run unit tests to verify
|
|
206
|
+
the correctness of the generated implementation.
|
|
207
|
+
|
|
208
|
+
**Example:**
|
|
209
|
+
|
|
210
|
+
```python
|
|
211
|
+
import langfun as lf
|
|
212
|
+
|
|
213
|
+
@lf.function_gen(lm=lf.llms.Gemini25Flash())
|
|
214
|
+
def product(a: int, b: int) -> int:
|
|
215
|
+
\"\"\"Returns product of a and b.\"\"\"
|
|
216
|
+
|
|
217
|
+
print(product(2, 3))
|
|
218
|
+
# Output: 6
|
|
219
|
+
```
|
|
203
220
|
|
|
204
221
|
Args:
|
|
205
222
|
lm (lf.LanguageModel): The language model used for generating function
|
|
@@ -212,10 +229,10 @@ def function_gen(
|
|
|
212
229
|
tests. You can either provide a list of test cases as tuples of inputs
|
|
213
230
|
and outputs, or a function that throws an error if a test fails, or let
|
|
214
231
|
LLM automatically create the unit test cases. If a generated function is
|
|
215
|
-
|
|
232
|
+
returned, it should pass all the unit tests.
|
|
216
233
|
unittest_num_retries: If unittest is set to "auto", this parameter
|
|
217
|
-
specifies the number of times the LLM
|
|
218
|
-
cases.
|
|
234
|
+
specifies the number of times the LLM should attempt to generate unit
|
|
235
|
+
test cases.
|
|
219
236
|
|
|
220
237
|
Returns:
|
|
221
238
|
The implemented function object.
|
|
@@ -22,7 +22,16 @@ import pyglove as pg
|
|
|
22
22
|
|
|
23
23
|
|
|
24
24
|
class MappingError(Exception): # pylint: disable=g-bad-exception-name
|
|
25
|
-
"""
|
|
25
|
+
"""Error raised during a structured mapping task.
|
|
26
|
+
|
|
27
|
+
`MappingError` is raised when a language model's response cannot be
|
|
28
|
+
successfully parsed or transformed into the target structure defined by
|
|
29
|
+
the schema in structured mapping operations like `lf.query` and `lf.parse`.
|
|
30
|
+
|
|
31
|
+
This error encapsulates both the original exception that occurred during
|
|
32
|
+
parsing (`cause`) and the language model response (`lm_response`) that led
|
|
33
|
+
to the failure, allowing for easier debugging of mapping issues.
|
|
34
|
+
"""
|
|
26
35
|
|
|
27
36
|
def __init__(self, lm_response: lf.Message, cause: Exception):
|
|
28
37
|
self._lm_response = lm_response
|
|
@@ -62,7 +71,53 @@ class MappingError(Exception): # pylint: disable=g-bad-exception-name
|
|
|
62
71
|
class MappingExample(lf.NaturalLanguageFormattable,
|
|
63
72
|
lf.Component,
|
|
64
73
|
pg.views.HtmlTreeView.Extension):
|
|
65
|
-
"""
|
|
74
|
+
"""Represents an example for a structured mapping task.
|
|
75
|
+
|
|
76
|
+
A `MappingExample` defines a single instance of a mapping between an input
|
|
77
|
+
value and an output value, optionally guided by a schema and/or a natural
|
|
78
|
+
language context. It is primarily used to provide few-shot examples to
|
|
79
|
+
structured mapping operations (e.g., `lf.query`, `lf.complete`,
|
|
80
|
+
and `lf.describe`), helping to guide the LLM in performing the desired mapping
|
|
81
|
+
task. If `output` is not provided, the example represents a request to perform
|
|
82
|
+
mapping on the `input`.
|
|
83
|
+
|
|
84
|
+
**Key Attributes:**
|
|
85
|
+
|
|
86
|
+
* `input`: The source value for the mapping (e.g., text, an object).
|
|
87
|
+
* `output`: The target value for the mapping (e.g., a structured object,
|
|
88
|
+
text). If not provided, this example represents a request to perform
|
|
89
|
+
the mapping.
|
|
90
|
+
* `schema`: An optional `lf.structured.Schema` that defines or constrains
|
|
91
|
+
the structure of the `output`. If provided, the LLM will be instructed
|
|
92
|
+
to produce an output conforming to this schema.
|
|
93
|
+
* `context`: Optional natural language context that provides additional
|
|
94
|
+
information relevant to the mapping task.
|
|
95
|
+
* `metadata`: Optional dictionary for additional metadata.
|
|
96
|
+
|
|
97
|
+
**Example:**
|
|
98
|
+
|
|
99
|
+
```python
|
|
100
|
+
import langfun as lf
|
|
101
|
+
import pyglove as pg
|
|
102
|
+
|
|
103
|
+
# Example for translating English to French
|
|
104
|
+
lf.MappingExample(
|
|
105
|
+
input="Hello",
|
|
106
|
+
output="Bonjour"
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
# Example for extracting structured data
|
|
110
|
+
class Flight(pg.Object):
|
|
111
|
+
airline: str
|
|
112
|
+
flight_number: str
|
|
113
|
+
|
|
114
|
+
lf.MappingExample(
|
|
115
|
+
input="I want to book flight AA123.",
|
|
116
|
+
output=Flight(airline="AA", flight_number="123"),
|
|
117
|
+
schema=Flight
|
|
118
|
+
)
|
|
119
|
+
```
|
|
120
|
+
"""
|
|
66
121
|
|
|
67
122
|
input: pg.typing.Annotated[
|
|
68
123
|
pg.typing.Any(transform=schema_lib.mark_missing),
|
|
@@ -84,7 +139,7 @@ class MappingExample(lf.NaturalLanguageFormattable,
|
|
|
84
139
|
# Automatic conversion from annotation to schema.
|
|
85
140
|
schema_lib.schema_spec(noneable=True),
|
|
86
141
|
(
|
|
87
|
-
'A `lf.structured.Schema` object that constrains target value '
|
|
142
|
+
'A `lf.structured.Schema` object that constrains target value. '
|
|
88
143
|
'If None, the target is expected to be a natural language-based '
|
|
89
144
|
'response returned from LMs.'
|
|
90
145
|
),
|
|
@@ -99,7 +154,7 @@ class MappingExample(lf.NaturalLanguageFormattable,
|
|
|
99
154
|
dict[str, Any],
|
|
100
155
|
(
|
|
101
156
|
'The metadata associated with the mapping example, '
|
|
102
|
-
'which
|
|
157
|
+
'which could carry structured data, such as tool function input. '
|
|
103
158
|
'It is a `pg.Dict` object whose keys can be accessed by attributes.'
|
|
104
159
|
),
|
|
105
160
|
] = pg.Dict()
|
|
@@ -242,7 +297,7 @@ class MappingExample(lf.NaturalLanguageFormattable,
|
|
|
242
297
|
|
|
243
298
|
|
|
244
299
|
class Mapping(lf.LangFunc):
|
|
245
|
-
"""Base class for mapping.
|
|
300
|
+
"""Base class for LLM-based mapping operations.
|
|
246
301
|
|
|
247
302
|
{{ preamble }}
|
|
248
303
|
|
|
@@ -263,19 +318,19 @@ class Mapping(lf.LangFunc):
|
|
|
263
318
|
pg.Symbolic,
|
|
264
319
|
(
|
|
265
320
|
'The mapping input. It could be `lf.Message` (a pg.Symbolic '
|
|
266
|
-
'subclass) as natural language input, or other symbolic
|
|
321
|
+
'subclass) as natural language input, or other symbolic objects '
|
|
267
322
|
'as structured input.'
|
|
268
323
|
),
|
|
269
324
|
]
|
|
270
325
|
|
|
271
326
|
context: Annotated[
|
|
272
|
-
str | None, 'The mapping context
|
|
327
|
+
str | None, 'The mapping context as a natural language string.'
|
|
273
328
|
] = None
|
|
274
329
|
|
|
275
330
|
schema: pg.typing.Annotated[
|
|
276
331
|
# Automatic conversion from annotation to schema.
|
|
277
332
|
schema_lib.schema_spec(noneable=True),
|
|
278
|
-
'A `lf.structured.Schema` object that constrains mapping output
|
|
333
|
+
'A `lf.structured.Schema` object that constrains mapping output.',
|
|
279
334
|
] = None
|
|
280
335
|
|
|
281
336
|
permission: Annotated[
|
|
@@ -378,16 +433,16 @@ class Mapping(lf.LangFunc):
|
|
|
378
433
|
default: Annotated[
|
|
379
434
|
Any,
|
|
380
435
|
(
|
|
381
|
-
'The default value to use if
|
|
382
|
-
'
|
|
383
|
-
'
|
|
436
|
+
'The default value to use if parsing fails (after autofix). '
|
|
437
|
+
'If `lf.RAISE_IF_HAS_ERROR` is used (default), an error will be '
|
|
438
|
+
'raised instead.'
|
|
384
439
|
),
|
|
385
440
|
] = lf.RAISE_IF_HAS_ERROR
|
|
386
441
|
|
|
387
442
|
response_postprocess: Annotated[
|
|
388
443
|
Callable[[str], str] | None,
|
|
389
444
|
(
|
|
390
|
-
'A callable object that post
|
|
445
|
+
'A callable object that post-processes the raw LLM response before '
|
|
391
446
|
'parsing it into the output Python object.'
|
|
392
447
|
)
|
|
393
448
|
] = None
|
|
@@ -420,7 +475,7 @@ class Mapping(lf.LangFunc):
|
|
|
420
475
|
return lm_output
|
|
421
476
|
|
|
422
477
|
def parse_result(self, lm_output: lf.Message) -> Any:
|
|
423
|
-
"""
|
|
478
|
+
"""Parses result from LLM response."""
|
|
424
479
|
schema = self.mapping_request.schema
|
|
425
480
|
if schema is None:
|
|
426
481
|
return None
|
|
@@ -444,7 +499,7 @@ class Mapping(lf.LangFunc):
|
|
|
444
499
|
)
|
|
445
500
|
|
|
446
501
|
def postprocess_response(self, response: lf.Message) -> lf.Message:
|
|
447
|
-
"""Post
|
|
502
|
+
"""Post-processes LLM response."""
|
|
448
503
|
if self.response_postprocess is not None:
|
|
449
504
|
postprocessed_text = self.response_postprocess(response.text)
|
|
450
505
|
if postprocessed_text != response.text:
|
|
@@ -452,7 +507,7 @@ class Mapping(lf.LangFunc):
|
|
|
452
507
|
return response
|
|
453
508
|
|
|
454
509
|
def postprocess_result(self, result: Any) -> Any:
|
|
455
|
-
"""Post
|
|
510
|
+
"""Post-processes structured output."""
|
|
456
511
|
return result
|
|
457
512
|
|
|
458
513
|
def globals(self) -> dict[str, Any]:
|
|
@@ -24,7 +24,7 @@ import pyglove as pg
|
|
|
24
24
|
|
|
25
25
|
@lf.use_init_args(['schema', 'default', 'examples'])
|
|
26
26
|
class _ParseStructure(mapping.Mapping):
|
|
27
|
-
"""
|
|
27
|
+
"""Parses an object out from a natural language text."""
|
|
28
28
|
|
|
29
29
|
context_title = 'USER_REQUEST'
|
|
30
30
|
input_title = 'LM_RESPONSE'
|
|
@@ -39,7 +39,7 @@ class _ParseStructure(mapping.Mapping):
|
|
|
39
39
|
|
|
40
40
|
|
|
41
41
|
class _ParseStructureJson(_ParseStructure):
|
|
42
|
-
"""
|
|
42
|
+
"""Parses an object out from a NL text using JSON as the protocol."""
|
|
43
43
|
|
|
44
44
|
preamble = """
|
|
45
45
|
Please help translate the last LM response into JSON based on the request and the schema:
|
|
@@ -55,7 +55,7 @@ class _ParseStructureJson(_ParseStructure):
|
|
|
55
55
|
|
|
56
56
|
|
|
57
57
|
class _ParseStructurePython(_ParseStructure):
|
|
58
|
-
"""
|
|
58
|
+
"""Parses an object out from a NL text using Python as the protocol."""
|
|
59
59
|
|
|
60
60
|
preamble = """
|
|
61
61
|
Please help translate the last {{ input_title }} into {{ output_title}} based on {{ schema_title }}.
|
|
@@ -88,55 +88,55 @@ def parse(
|
|
|
88
88
|
returns_message: bool = False,
|
|
89
89
|
**kwargs,
|
|
90
90
|
) -> Any:
|
|
91
|
-
"""
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
```
|
|
91
|
+
"""Parses a natural language message into a structured object using an LLM.
|
|
92
|
+
|
|
93
|
+
`lf.parse` extracts structured information from a natural language string
|
|
94
|
+
or message according to a provided schema. It is the inverse of
|
|
95
|
+
`lf.describe`.
|
|
96
|
+
|
|
97
|
+
**Example:**
|
|
98
|
+
|
|
99
|
+
```python
|
|
100
|
+
import langfun as lf
|
|
101
|
+
import pyglove as pg
|
|
102
|
+
|
|
103
|
+
class FlightDuration(pg.Object):
|
|
104
|
+
hours: int
|
|
105
|
+
minutes: int
|
|
106
|
+
|
|
107
|
+
class Flight(pg.Object):
|
|
108
|
+
airline: str
|
|
109
|
+
flight_number: str
|
|
110
|
+
departure_airport_code: str
|
|
111
|
+
arrival_airport_code: str
|
|
112
|
+
duration: FlightDuration
|
|
113
|
+
price: float
|
|
114
|
+
|
|
115
|
+
text = '''
|
|
116
|
+
The flight is UA2631 of United Airlines, from SFO to JFK,
|
|
117
|
+
duration is 7 hours and 57 minutes, costing $227.
|
|
118
|
+
'''
|
|
119
|
+
|
|
120
|
+
flight = lf.parse(text, Flight, lm=lf.llms.Gemini25Flash())
|
|
121
|
+
assert flight.airline == 'United Airlines'
|
|
122
|
+
assert flight.duration.hours == 7
|
|
123
|
+
```
|
|
125
124
|
|
|
126
125
|
Args:
|
|
127
126
|
message: A `lf.Message` object or a string as the natural language input.
|
|
128
127
|
It provides the complete context for the parsing.
|
|
129
|
-
schema: A `lf.
|
|
130
|
-
default: The default value if parsing
|
|
131
|
-
be raised
|
|
128
|
+
schema: A `lf.Schema` object or equivalent annotations.
|
|
129
|
+
default: The default value to return if parsing fails. If
|
|
130
|
+
`lf.RAISE_IF_HAS_ERROR` is used (default), an error will be raised
|
|
131
|
+
instead.
|
|
132
132
|
user_prompt: An optional user prompt as the description or ask for the
|
|
133
|
-
message, which
|
|
133
|
+
message, which provides more context for parsing.
|
|
134
134
|
lm: The language model to use. If not specified, the language model from
|
|
135
135
|
`lf.context` context manager will be used.
|
|
136
|
-
examples: An optional list of fewshot examples for
|
|
137
|
-
|
|
136
|
+
examples: An optional list of fewshot examples for guiding parsing. If None,
|
|
137
|
+
default examples will be used.
|
|
138
138
|
include_context: If True, include the request sent to LLM for obtaining the
|
|
139
|
-
response to
|
|
139
|
+
response to parse. Otherwise include only the response.
|
|
140
140
|
cache_seed: Seed for computing cache key. The cache key is determined by a
|
|
141
141
|
tuple of (lm, prompt, cache seed). If None, cache will be disabled for
|
|
142
142
|
the query even cache is configured by the LM.
|
|
@@ -146,10 +146,10 @@ def parse(
|
|
|
146
146
|
`autofix_lm` from `lf.context` context manager will be used. Otherwise it
|
|
147
147
|
will use `lm`.
|
|
148
148
|
protocol: The protocol for schema/value representation. Applicable values
|
|
149
|
-
are 'json' and 'python'. By default 'python' will be used
|
|
149
|
+
are 'json' and 'python'. By default 'python' will be used.
|
|
150
150
|
returns_message: If True, returns `lf.Message` as the output, instead of
|
|
151
151
|
returning the structured `message.result`.
|
|
152
|
-
**kwargs: Keyword arguments passed to the `
|
|
152
|
+
**kwargs: Keyword arguments passed to the `_ParseStructure`
|
|
153
153
|
transform.
|
|
154
154
|
|
|
155
155
|
Returns:
|
|
@@ -223,7 +223,7 @@ async def aparse(
|
|
|
223
223
|
|
|
224
224
|
|
|
225
225
|
def call(
|
|
226
|
-
prompt: str
|
|
226
|
+
prompt: Union[str, lf.Template, lf.Message],
|
|
227
227
|
schema: Union[
|
|
228
228
|
None, schema_lib.Schema, Type[Any], list[Type[Any]], dict[str, Any]
|
|
229
229
|
] = None,
|
|
@@ -240,27 +240,43 @@ def call(
|
|
|
240
240
|
returns_message: bool = False,
|
|
241
241
|
**kwargs,
|
|
242
242
|
) -> Any:
|
|
243
|
-
"""
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
243
|
+
"""Calls a language model and parses the response according to a schema.
|
|
244
|
+
|
|
245
|
+
`lf.call` first calls a language model with a prompt to obtain a natural
|
|
246
|
+
language response, then calls the language model again to parse this
|
|
247
|
+
response into a structured format defined by `schema`. If `schema` is not
|
|
248
|
+
provided, it returns the raw natural language response.
|
|
249
|
+
|
|
250
|
+
**Example:**
|
|
251
|
+
|
|
252
|
+
1. **Call with a Natural Language Prompt**:
|
|
253
|
+
By default, `lf.call` with a string prompt returns a natural language
|
|
254
|
+
response:
|
|
255
|
+
```python
|
|
256
|
+
r = lf.call('Compute one plus one', lm=lf.llms.Gpt4())
|
|
257
|
+
print(r)
|
|
258
|
+
# Output: 2
|
|
259
|
+
```
|
|
260
|
+
|
|
261
|
+
2. **Call with Structured Output**:
|
|
262
|
+
If `schema` is provided, `lf.call` parses the LLM response into the
|
|
263
|
+
specified schema using a second LM call:
|
|
264
|
+
```python
|
|
265
|
+
r = lf.call('Compute one plus one', int, lm=lf.llms.Gpt4())
|
|
266
|
+
print(r)
|
|
267
|
+
# Output: 2
|
|
268
|
+
```
|
|
269
|
+
|
|
270
|
+
3. **Call with Templated Prompt**:
|
|
271
|
+
The prompt can be a template string with placeholders (e.g., `{{x}}`,
|
|
272
|
+
`{{y}}`), whose values are provided as keyword arguments:
|
|
273
|
+
```python
|
|
274
|
+
r = lf.call(
|
|
275
|
+
'Compute {{x}} plus {{y}}',
|
|
276
|
+
int, x='one', y='one', lm=lf.llms.Gpt4())
|
|
277
|
+
print(r)
|
|
278
|
+
# Output: 2
|
|
279
|
+
```
|
|
264
280
|
|
|
265
281
|
Args:
|
|
266
282
|
prompt: User prompt that will be sent to LM, which could be a string or a
|
|
@@ -272,10 +288,10 @@ def call(
|
|
|
272
288
|
If not specified, `lm` from `lf.context` context manager will be used.
|
|
273
289
|
parsing_lm: Language model that will be used for parsing. If None, the `lm`
|
|
274
290
|
for prompting the LM will be used.
|
|
275
|
-
parsing_examples: Examples for parsing the output. If None,
|
|
276
|
-
|
|
291
|
+
parsing_examples: Examples for parsing the output. If None, no examples
|
|
292
|
+
will be used for parsing.
|
|
277
293
|
parsing_include_context: If True, include the request sent to LLM for
|
|
278
|
-
obtaining the response to
|
|
294
|
+
obtaining the response to parse. Otherwise include only the response.
|
|
279
295
|
cache_seed: Seed for computing cache key. The cache key is determined by a
|
|
280
296
|
tuple of (lm, prompt, cache seed). If None, cache will be disabled for
|
|
281
297
|
the query even cache is configured by the LM.
|
|
@@ -284,10 +300,10 @@ def call(
|
|
|
284
300
|
autofix_lm: The language model to use for autofix. If not specified, the
|
|
285
301
|
`autofix_lm` from `lf.context` context manager will be used. Otherwise it
|
|
286
302
|
will use `parsing_lm`.
|
|
287
|
-
response_postprocess: A callback function to post
|
|
303
|
+
response_postprocess: A callback function to post-process the text response
|
|
288
304
|
before sending for parsing.
|
|
289
305
|
protocol: The protocol for schema/value representation. Applicable values
|
|
290
|
-
are 'json' and 'python'. By default 'python' will be used
|
|
306
|
+
are 'json' and 'python'. By default 'python' will be used.
|
|
291
307
|
returns_message: If True, return a `lf.Message` object instead of its text
|
|
292
308
|
or result.
|
|
293
309
|
**kwargs: Keyword arguments. Including options that control the calling
|
|
@@ -387,7 +403,7 @@ def _parse_structure_cls(
|
|
|
387
403
|
|
|
388
404
|
|
|
389
405
|
def default_parse_examples() -> list[mapping.MappingExample]:
|
|
390
|
-
"""
|
|
406
|
+
"""Returns default parsing examples."""
|
|
391
407
|
|
|
392
408
|
class AdditionResults(pg.Object):
|
|
393
409
|
one_plus_one_equals: int | None
|
|
@@ -745,9 +745,6 @@ class CallTest(unittest.TestCase):
|
|
|
745
745
|
parsing.call('what is one plus two?', int, lm=lm, autofix=3), 3
|
|
746
746
|
)
|
|
747
747
|
|
|
748
|
-
def test_call_with_structured_input(self):
|
|
749
|
-
self.assertEqual(parsing.call(1, lm=fake.StaticResponse('2')), '2')
|
|
750
|
-
|
|
751
748
|
def test_call_with_response_postprocess(self):
|
|
752
749
|
target_str = '@TARGET_STR@'
|
|
753
750
|
random_str = '!RANDOM_STR!'
|