langfun 0.1.2.dev202509120804__py3-none-any.whl → 0.1.2.dev202512040805__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langfun/__init__.py +1 -1
- langfun/core/__init__.py +7 -1
- langfun/core/agentic/__init__.py +8 -1
- langfun/core/agentic/action.py +740 -112
- langfun/core/agentic/action_eval.py +9 -2
- langfun/core/agentic/action_test.py +189 -24
- langfun/core/async_support.py +104 -5
- langfun/core/async_support_test.py +23 -0
- langfun/core/coding/python/correction.py +19 -9
- langfun/core/coding/python/execution.py +14 -12
- langfun/core/coding/python/generation.py +21 -16
- langfun/core/coding/python/sandboxing.py +23 -3
- langfun/core/component.py +42 -3
- langfun/core/concurrent.py +70 -6
- langfun/core/concurrent_test.py +9 -2
- langfun/core/console.py +1 -1
- langfun/core/data/conversion/anthropic.py +12 -3
- langfun/core/data/conversion/anthropic_test.py +8 -6
- langfun/core/data/conversion/gemini.py +11 -2
- langfun/core/data/conversion/gemini_test.py +48 -9
- langfun/core/data/conversion/openai.py +145 -31
- langfun/core/data/conversion/openai_test.py +161 -17
- langfun/core/eval/base.py +48 -44
- langfun/core/eval/base_test.py +5 -5
- langfun/core/eval/matching.py +5 -2
- langfun/core/eval/patching.py +3 -3
- langfun/core/eval/scoring.py +4 -3
- langfun/core/eval/v2/__init__.py +2 -0
- langfun/core/eval/v2/checkpointing.py +76 -7
- langfun/core/eval/v2/checkpointing_test.py +9 -2
- langfun/core/eval/v2/config_saver.py +37 -0
- langfun/core/eval/v2/config_saver_test.py +36 -0
- langfun/core/eval/v2/eval_test_helper.py +104 -3
- langfun/core/eval/v2/evaluation.py +92 -17
- langfun/core/eval/v2/evaluation_test.py +9 -3
- langfun/core/eval/v2/example.py +50 -40
- langfun/core/eval/v2/example_test.py +16 -8
- langfun/core/eval/v2/experiment.py +84 -15
- langfun/core/eval/v2/experiment_test.py +19 -0
- langfun/core/eval/v2/metric_values.py +31 -3
- langfun/core/eval/v2/metric_values_test.py +32 -0
- langfun/core/eval/v2/metrics.py +157 -44
- langfun/core/eval/v2/metrics_test.py +39 -18
- langfun/core/eval/v2/progress.py +31 -1
- langfun/core/eval/v2/progress_test.py +27 -0
- langfun/core/eval/v2/progress_tracking.py +13 -5
- langfun/core/eval/v2/progress_tracking_test.py +9 -1
- langfun/core/eval/v2/reporting.py +90 -71
- langfun/core/eval/v2/reporting_test.py +24 -6
- langfun/core/eval/v2/runners/__init__.py +30 -0
- langfun/core/eval/v2/{runners.py → runners/base.py} +72 -180
- langfun/core/eval/v2/runners/beam.py +354 -0
- langfun/core/eval/v2/runners/beam_test.py +153 -0
- langfun/core/eval/v2/runners/ckpt_monitor.py +294 -0
- langfun/core/eval/v2/runners/ckpt_monitor_test.py +162 -0
- langfun/core/eval/v2/runners/debug.py +40 -0
- langfun/core/eval/v2/runners/debug_test.py +76 -0
- langfun/core/eval/v2/runners/parallel.py +243 -0
- langfun/core/eval/v2/runners/parallel_test.py +182 -0
- langfun/core/eval/v2/runners/sequential.py +47 -0
- langfun/core/eval/v2/runners/sequential_test.py +169 -0
- langfun/core/langfunc.py +45 -130
- langfun/core/langfunc_test.py +7 -5
- langfun/core/language_model.py +189 -36
- langfun/core/language_model_test.py +54 -3
- langfun/core/llms/__init__.py +12 -1
- langfun/core/llms/anthropic.py +157 -2
- langfun/core/llms/azure_openai.py +29 -17
- langfun/core/llms/cache/base.py +25 -3
- langfun/core/llms/cache/in_memory.py +48 -7
- langfun/core/llms/cache/in_memory_test.py +14 -4
- langfun/core/llms/compositional.py +25 -1
- langfun/core/llms/deepseek.py +30 -2
- langfun/core/llms/fake.py +32 -1
- langfun/core/llms/gemini.py +64 -12
- langfun/core/llms/gemini_test.py +110 -0
- langfun/core/llms/google_genai.py +34 -1
- langfun/core/llms/groq.py +28 -3
- langfun/core/llms/llama_cpp.py +23 -4
- langfun/core/llms/openai.py +120 -3
- langfun/core/llms/openai_compatible.py +148 -27
- langfun/core/llms/openai_compatible_test.py +207 -20
- langfun/core/llms/openai_test.py +0 -2
- langfun/core/llms/rest.py +16 -1
- langfun/core/llms/vertexai.py +58 -8
- langfun/core/logging.py +1 -1
- langfun/core/mcp/__init__.py +10 -0
- langfun/core/mcp/client.py +177 -0
- langfun/core/mcp/client_test.py +71 -0
- langfun/core/mcp/session.py +241 -0
- langfun/core/mcp/session_test.py +54 -0
- langfun/core/mcp/testing/simple_mcp_client.py +33 -0
- langfun/core/mcp/testing/simple_mcp_server.py +33 -0
- langfun/core/mcp/tool.py +254 -0
- langfun/core/mcp/tool_test.py +197 -0
- langfun/core/memory.py +1 -0
- langfun/core/message.py +160 -55
- langfun/core/message_test.py +65 -81
- langfun/core/modalities/__init__.py +8 -0
- langfun/core/modalities/audio.py +21 -1
- langfun/core/modalities/image.py +73 -3
- langfun/core/modalities/image_test.py +116 -0
- langfun/core/modalities/mime.py +64 -3
- langfun/core/modalities/mime_test.py +11 -0
- langfun/core/modalities/pdf.py +19 -1
- langfun/core/modalities/video.py +21 -1
- langfun/core/modality.py +167 -29
- langfun/core/modality_test.py +42 -12
- langfun/core/natural_language.py +1 -1
- langfun/core/sampling.py +4 -4
- langfun/core/sampling_test.py +20 -4
- langfun/core/structured/__init__.py +2 -24
- langfun/core/structured/completion.py +34 -44
- langfun/core/structured/completion_test.py +23 -43
- langfun/core/structured/description.py +54 -50
- langfun/core/structured/function_generation.py +29 -12
- langfun/core/structured/mapping.py +81 -37
- langfun/core/structured/parsing.py +95 -79
- langfun/core/structured/parsing_test.py +0 -3
- langfun/core/structured/querying.py +230 -154
- langfun/core/structured/querying_test.py +69 -33
- langfun/core/structured/schema/__init__.py +49 -0
- langfun/core/structured/schema/base.py +664 -0
- langfun/core/structured/schema/base_test.py +531 -0
- langfun/core/structured/schema/json.py +174 -0
- langfun/core/structured/schema/json_test.py +121 -0
- langfun/core/structured/schema/python.py +316 -0
- langfun/core/structured/schema/python_test.py +410 -0
- langfun/core/structured/schema_generation.py +33 -14
- langfun/core/structured/scoring.py +47 -36
- langfun/core/structured/tokenization.py +26 -11
- langfun/core/subscription.py +2 -2
- langfun/core/template.py +175 -50
- langfun/core/template_test.py +123 -17
- langfun/env/__init__.py +43 -0
- langfun/env/base_environment.py +827 -0
- langfun/env/base_environment_test.py +473 -0
- langfun/env/base_feature.py +304 -0
- langfun/env/base_feature_test.py +228 -0
- langfun/env/base_sandbox.py +842 -0
- langfun/env/base_sandbox_test.py +1235 -0
- langfun/env/event_handlers/__init__.py +14 -0
- langfun/env/event_handlers/chain.py +233 -0
- langfun/env/event_handlers/chain_test.py +253 -0
- langfun/env/event_handlers/event_logger.py +472 -0
- langfun/env/event_handlers/event_logger_test.py +304 -0
- langfun/env/event_handlers/metric_writer.py +726 -0
- langfun/env/event_handlers/metric_writer_test.py +214 -0
- langfun/env/interface.py +1640 -0
- langfun/env/interface_test.py +153 -0
- langfun/env/load_balancers.py +59 -0
- langfun/env/load_balancers_test.py +141 -0
- langfun/env/test_utils.py +507 -0
- {langfun-0.1.2.dev202509120804.dist-info → langfun-0.1.2.dev202512040805.dist-info}/METADATA +7 -3
- langfun-0.1.2.dev202512040805.dist-info/RECORD +217 -0
- langfun/core/eval/v2/runners_test.py +0 -343
- langfun/core/structured/schema.py +0 -987
- langfun/core/structured/schema_test.py +0 -982
- langfun-0.1.2.dev202509120804.dist-info/RECORD +0 -172
- {langfun-0.1.2.dev202509120804.dist-info → langfun-0.1.2.dev202512040805.dist-info}/WHEEL +0 -0
- {langfun-0.1.2.dev202509120804.dist-info → langfun-0.1.2.dev202512040805.dist-info}/licenses/LICENSE +0 -0
- {langfun-0.1.2.dev202509120804.dist-info → langfun-0.1.2.dev202512040805.dist-info}/top_level.txt +0 -0
|
@@ -22,7 +22,16 @@ import pyglove as pg
|
|
|
22
22
|
|
|
23
23
|
|
|
24
24
|
class MappingError(Exception): # pylint: disable=g-bad-exception-name
|
|
25
|
-
"""
|
|
25
|
+
"""Error raised during a structured mapping task.
|
|
26
|
+
|
|
27
|
+
`MappingError` is raised when a language model's response cannot be
|
|
28
|
+
successfully parsed or transformed into the target structure defined by
|
|
29
|
+
the schema in structured mapping operations like `lf.query` and `lf.parse`.
|
|
30
|
+
|
|
31
|
+
This error encapsulates both the original exception that occurred during
|
|
32
|
+
parsing (`cause`) and the language model response (`lm_response`) that led
|
|
33
|
+
to the failure, allowing for easier debugging of mapping issues.
|
|
34
|
+
"""
|
|
26
35
|
|
|
27
36
|
def __init__(self, lm_response: lf.Message, cause: Exception):
|
|
28
37
|
self._lm_response = lm_response
|
|
@@ -62,7 +71,53 @@ class MappingError(Exception): # pylint: disable=g-bad-exception-name
|
|
|
62
71
|
class MappingExample(lf.NaturalLanguageFormattable,
|
|
63
72
|
lf.Component,
|
|
64
73
|
pg.views.HtmlTreeView.Extension):
|
|
65
|
-
"""
|
|
74
|
+
"""Represents an example for a structured mapping task.
|
|
75
|
+
|
|
76
|
+
A `MappingExample` defines a single instance of a mapping between an input
|
|
77
|
+
value and an output value, optionally guided by a schema and/or a natural
|
|
78
|
+
language context. It is primarily used to provide few-shot examples to
|
|
79
|
+
structured mapping operations (e.g., `lf.query`, `lf.complete`,
|
|
80
|
+
and `lf.describe`), helping to guide the LLM in performing the desired mapping
|
|
81
|
+
task. If `output` is not provided, the example represents a request to perform
|
|
82
|
+
mapping on the `input`.
|
|
83
|
+
|
|
84
|
+
**Key Attributes:**
|
|
85
|
+
|
|
86
|
+
* `input`: The source value for the mapping (e.g., text, an object).
|
|
87
|
+
* `output`: The target value for the mapping (e.g., a structured object,
|
|
88
|
+
text). If not provided, this example represents a request to perform
|
|
89
|
+
the mapping.
|
|
90
|
+
* `schema`: An optional `lf.structured.Schema` that defines or constrains
|
|
91
|
+
the structure of the `output`. If provided, the LLM will be instructed
|
|
92
|
+
to produce an output conforming to this schema.
|
|
93
|
+
* `context`: Optional natural language context that provides additional
|
|
94
|
+
information relevant to the mapping task.
|
|
95
|
+
* `metadata`: Optional dictionary for additional metadata.
|
|
96
|
+
|
|
97
|
+
**Example:**
|
|
98
|
+
|
|
99
|
+
```python
|
|
100
|
+
import langfun as lf
|
|
101
|
+
import pyglove as pg
|
|
102
|
+
|
|
103
|
+
# Example for translating English to French
|
|
104
|
+
lf.MappingExample(
|
|
105
|
+
input="Hello",
|
|
106
|
+
output="Bonjour"
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
# Example for extracting structured data
|
|
110
|
+
class Flight(pg.Object):
|
|
111
|
+
airline: str
|
|
112
|
+
flight_number: str
|
|
113
|
+
|
|
114
|
+
lf.MappingExample(
|
|
115
|
+
input="I want to book flight AA123.",
|
|
116
|
+
output=Flight(airline="AA", flight_number="123"),
|
|
117
|
+
schema=Flight
|
|
118
|
+
)
|
|
119
|
+
```
|
|
120
|
+
"""
|
|
66
121
|
|
|
67
122
|
input: pg.typing.Annotated[
|
|
68
123
|
pg.typing.Any(transform=schema_lib.mark_missing),
|
|
@@ -84,7 +139,7 @@ class MappingExample(lf.NaturalLanguageFormattable,
|
|
|
84
139
|
# Automatic conversion from annotation to schema.
|
|
85
140
|
schema_lib.schema_spec(noneable=True),
|
|
86
141
|
(
|
|
87
|
-
'A `lf.structured.Schema` object that constrains target value '
|
|
142
|
+
'A `lf.structured.Schema` object that constrains target value. '
|
|
88
143
|
'If None, the target is expected to be a natural language-based '
|
|
89
144
|
'response returned from LMs.'
|
|
90
145
|
),
|
|
@@ -99,18 +154,16 @@ class MappingExample(lf.NaturalLanguageFormattable,
|
|
|
99
154
|
dict[str, Any],
|
|
100
155
|
(
|
|
101
156
|
'The metadata associated with the mapping example, '
|
|
102
|
-
'which
|
|
157
|
+
'which could carry structured data, such as tool function input. '
|
|
103
158
|
'It is a `pg.Dict` object whose keys can be accessed by attributes.'
|
|
104
159
|
),
|
|
105
160
|
] = pg.Dict()
|
|
106
161
|
|
|
107
|
-
def schema_repr(
|
|
108
|
-
self, protocol: schema_lib.SchemaProtocol = 'python', **kwargs
|
|
109
|
-
) -> str:
|
|
162
|
+
def schema_repr(self, protocol: str = 'python', **kwargs) -> str:
|
|
110
163
|
"""Returns the string representation of schema based on protocol."""
|
|
111
164
|
if self.schema is None:
|
|
112
165
|
return ''
|
|
113
|
-
return self.schema
|
|
166
|
+
return schema_lib.schema_repr(self.schema, protocol=protocol, **kwargs)
|
|
114
167
|
|
|
115
168
|
@property
|
|
116
169
|
def has_output(self) -> bool:
|
|
@@ -121,12 +174,14 @@ class MappingExample(lf.NaturalLanguageFormattable,
|
|
|
121
174
|
def value_repr(
|
|
122
175
|
cls,
|
|
123
176
|
value: Any,
|
|
124
|
-
protocol:
|
|
177
|
+
protocol: str = 'python',
|
|
125
178
|
use_modality_ref: bool = False,
|
|
126
179
|
**kwargs
|
|
127
180
|
) -> str:
|
|
128
181
|
if isinstance(value, str):
|
|
129
182
|
return value
|
|
183
|
+
if isinstance(value, lf.Message):
|
|
184
|
+
return str(value)
|
|
130
185
|
if isinstance(value, lf.Modality):
|
|
131
186
|
with lf.modality.format_modality_as_ref():
|
|
132
187
|
return str(value)
|
|
@@ -134,11 +189,11 @@ class MappingExample(lf.NaturalLanguageFormattable,
|
|
|
134
189
|
# Placehold modalities if they are present.
|
|
135
190
|
if use_modality_ref and pg.contains(value, type=lf.Modality):
|
|
136
191
|
value = lf.ModalityRef.placehold(value)
|
|
137
|
-
return schema_lib.value_repr(
|
|
192
|
+
return schema_lib.value_repr(value, protocol=protocol, **kwargs)
|
|
138
193
|
|
|
139
194
|
def input_repr(
|
|
140
195
|
self,
|
|
141
|
-
protocol:
|
|
196
|
+
protocol: str = 'python',
|
|
142
197
|
compact: bool = False,
|
|
143
198
|
verbose: bool = True,
|
|
144
199
|
**kwargs
|
|
@@ -150,7 +205,7 @@ class MappingExample(lf.NaturalLanguageFormattable,
|
|
|
150
205
|
|
|
151
206
|
def output_repr(
|
|
152
207
|
self,
|
|
153
|
-
protocol:
|
|
208
|
+
protocol: str = 'python',
|
|
154
209
|
compact: bool = False,
|
|
155
210
|
verbose: bool = True,
|
|
156
211
|
**kwargs
|
|
@@ -192,9 +247,7 @@ class MappingExample(lf.NaturalLanguageFormattable,
|
|
|
192
247
|
|
|
193
248
|
def render_value(view, *, value, **kwargs):
|
|
194
249
|
if isinstance(value, lf.Template):
|
|
195
|
-
|
|
196
|
-
# the input.
|
|
197
|
-
value = value.clone().render()
|
|
250
|
+
value = value.render()
|
|
198
251
|
if value is None:
|
|
199
252
|
return None
|
|
200
253
|
return view.render(value, **kwargs)
|
|
@@ -242,7 +295,7 @@ class MappingExample(lf.NaturalLanguageFormattable,
|
|
|
242
295
|
|
|
243
296
|
|
|
244
297
|
class Mapping(lf.LangFunc):
|
|
245
|
-
"""Base class for mapping.
|
|
298
|
+
"""Base class for LLM-based mapping operations.
|
|
246
299
|
|
|
247
300
|
{{ preamble }}
|
|
248
301
|
|
|
@@ -263,19 +316,19 @@ class Mapping(lf.LangFunc):
|
|
|
263
316
|
pg.Symbolic,
|
|
264
317
|
(
|
|
265
318
|
'The mapping input. It could be `lf.Message` (a pg.Symbolic '
|
|
266
|
-
'subclass) as natural language input, or other symbolic
|
|
319
|
+
'subclass) as natural language input, or other symbolic objects '
|
|
267
320
|
'as structured input.'
|
|
268
321
|
),
|
|
269
322
|
]
|
|
270
323
|
|
|
271
324
|
context: Annotated[
|
|
272
|
-
str | None, 'The mapping context
|
|
325
|
+
str | None, 'The mapping context as a natural language string.'
|
|
273
326
|
] = None
|
|
274
327
|
|
|
275
328
|
schema: pg.typing.Annotated[
|
|
276
329
|
# Automatic conversion from annotation to schema.
|
|
277
330
|
schema_lib.schema_spec(noneable=True),
|
|
278
|
-
'A `lf.structured.Schema` object that constrains mapping output
|
|
331
|
+
'A `lf.structured.Schema` object that constrains mapping output.',
|
|
279
332
|
] = None
|
|
280
333
|
|
|
281
334
|
permission: Annotated[
|
|
@@ -286,12 +339,8 @@ class Mapping(lf.LangFunc):
|
|
|
286
339
|
@property
|
|
287
340
|
def mapping_request(self) -> MappingExample:
|
|
288
341
|
"""Returns a MappingExample as the mapping request."""
|
|
289
|
-
if isinstance(self.input, lf.Message):
|
|
290
|
-
input_value = self.input.text
|
|
291
|
-
else:
|
|
292
|
-
input_value = pg.Ref(self.input)
|
|
293
342
|
return MappingExample(
|
|
294
|
-
input=
|
|
343
|
+
input=pg.Ref(self.input),
|
|
295
344
|
schema=pg.Ref(self.schema),
|
|
296
345
|
context=self.context,
|
|
297
346
|
)
|
|
@@ -382,16 +431,16 @@ class Mapping(lf.LangFunc):
|
|
|
382
431
|
default: Annotated[
|
|
383
432
|
Any,
|
|
384
433
|
(
|
|
385
|
-
'The default value to use if
|
|
386
|
-
'
|
|
387
|
-
'
|
|
434
|
+
'The default value to use if parsing fails (after autofix). '
|
|
435
|
+
'If `lf.RAISE_IF_HAS_ERROR` is used (default), an error will be '
|
|
436
|
+
'raised instead.'
|
|
388
437
|
),
|
|
389
438
|
] = lf.RAISE_IF_HAS_ERROR
|
|
390
439
|
|
|
391
440
|
response_postprocess: Annotated[
|
|
392
441
|
Callable[[str], str] | None,
|
|
393
442
|
(
|
|
394
|
-
'A callable object that post
|
|
443
|
+
'A callable object that post-processes the raw LLM response before '
|
|
395
444
|
'parsing it into the output Python object.'
|
|
396
445
|
)
|
|
397
446
|
] = None
|
|
@@ -402,11 +451,6 @@ class Mapping(lf.LangFunc):
|
|
|
402
451
|
|
|
403
452
|
def transform_input(self, lm_input: lf.Message) -> lf.Message:
|
|
404
453
|
# Find modalities to fill the input message.
|
|
405
|
-
lm_input.metadata.update(
|
|
406
|
-
examples=pg.Ref(self.examples),
|
|
407
|
-
input=pg.Ref(self.input),
|
|
408
|
-
schema=pg.Ref(self.schema) if self.schema is not None else None,
|
|
409
|
-
)
|
|
410
454
|
if isinstance(self.input, lf.Message):
|
|
411
455
|
lm_input.source = self.input
|
|
412
456
|
return lm_input
|
|
@@ -429,7 +473,7 @@ class Mapping(lf.LangFunc):
|
|
|
429
473
|
return lm_output
|
|
430
474
|
|
|
431
475
|
def parse_result(self, lm_output: lf.Message) -> Any:
|
|
432
|
-
"""
|
|
476
|
+
"""Parses result from LLM response."""
|
|
433
477
|
schema = self.mapping_request.schema
|
|
434
478
|
if schema is None:
|
|
435
479
|
return None
|
|
@@ -443,7 +487,7 @@ class Mapping(lf.LangFunc):
|
|
|
443
487
|
response_text = '\n'.join(
|
|
444
488
|
tc.text for tc in lm_output.metadata['tool_calls']
|
|
445
489
|
)
|
|
446
|
-
return schema.
|
|
490
|
+
return schema.parse_value(
|
|
447
491
|
response_text,
|
|
448
492
|
protocol=self.protocol,
|
|
449
493
|
additional_context=self.globals(),
|
|
@@ -453,7 +497,7 @@ class Mapping(lf.LangFunc):
|
|
|
453
497
|
)
|
|
454
498
|
|
|
455
499
|
def postprocess_response(self, response: lf.Message) -> lf.Message:
|
|
456
|
-
"""Post
|
|
500
|
+
"""Post-processes LLM response."""
|
|
457
501
|
if self.response_postprocess is not None:
|
|
458
502
|
postprocessed_text = self.response_postprocess(response.text)
|
|
459
503
|
if postprocessed_text != response.text:
|
|
@@ -461,7 +505,7 @@ class Mapping(lf.LangFunc):
|
|
|
461
505
|
return response
|
|
462
506
|
|
|
463
507
|
def postprocess_result(self, result: Any) -> Any:
|
|
464
|
-
"""Post
|
|
508
|
+
"""Post-processes structured output."""
|
|
465
509
|
return result
|
|
466
510
|
|
|
467
511
|
def globals(self) -> dict[str, Any]:
|
|
@@ -24,7 +24,7 @@ import pyglove as pg
|
|
|
24
24
|
|
|
25
25
|
@lf.use_init_args(['schema', 'default', 'examples'])
|
|
26
26
|
class _ParseStructure(mapping.Mapping):
|
|
27
|
-
"""
|
|
27
|
+
"""Parses an object out from a natural language text."""
|
|
28
28
|
|
|
29
29
|
context_title = 'USER_REQUEST'
|
|
30
30
|
input_title = 'LM_RESPONSE'
|
|
@@ -39,7 +39,7 @@ class _ParseStructure(mapping.Mapping):
|
|
|
39
39
|
|
|
40
40
|
|
|
41
41
|
class _ParseStructureJson(_ParseStructure):
|
|
42
|
-
"""
|
|
42
|
+
"""Parses an object out from a NL text using JSON as the protocol."""
|
|
43
43
|
|
|
44
44
|
preamble = """
|
|
45
45
|
Please help translate the last LM response into JSON based on the request and the schema:
|
|
@@ -55,7 +55,7 @@ class _ParseStructureJson(_ParseStructure):
|
|
|
55
55
|
|
|
56
56
|
|
|
57
57
|
class _ParseStructurePython(_ParseStructure):
|
|
58
|
-
"""
|
|
58
|
+
"""Parses an object out from a NL text using Python as the protocol."""
|
|
59
59
|
|
|
60
60
|
preamble = """
|
|
61
61
|
Please help translate the last {{ input_title }} into {{ output_title}} based on {{ schema_title }}.
|
|
@@ -84,59 +84,59 @@ def parse(
|
|
|
84
84
|
cache_seed: int | None = 0,
|
|
85
85
|
autofix: int = 0,
|
|
86
86
|
autofix_lm: lf.LanguageModel | None = None,
|
|
87
|
-
protocol:
|
|
87
|
+
protocol: str = 'python',
|
|
88
88
|
returns_message: bool = False,
|
|
89
89
|
**kwargs,
|
|
90
90
|
) -> Any:
|
|
91
|
-
"""
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
```
|
|
91
|
+
"""Parses a natural language message into a structured object using an LLM.
|
|
92
|
+
|
|
93
|
+
`lf.parse` extracts structured information from a natural language string
|
|
94
|
+
or message according to a provided schema. It is the inverse of
|
|
95
|
+
`lf.describe`.
|
|
96
|
+
|
|
97
|
+
**Example:**
|
|
98
|
+
|
|
99
|
+
```python
|
|
100
|
+
import langfun as lf
|
|
101
|
+
import pyglove as pg
|
|
102
|
+
|
|
103
|
+
class FlightDuration(pg.Object):
|
|
104
|
+
hours: int
|
|
105
|
+
minutes: int
|
|
106
|
+
|
|
107
|
+
class Flight(pg.Object):
|
|
108
|
+
airline: str
|
|
109
|
+
flight_number: str
|
|
110
|
+
departure_airport_code: str
|
|
111
|
+
arrival_airport_code: str
|
|
112
|
+
duration: FlightDuration
|
|
113
|
+
price: float
|
|
114
|
+
|
|
115
|
+
text = '''
|
|
116
|
+
The flight is UA2631 of United Airlines, from SFO to JFK,
|
|
117
|
+
duration is 7 hours and 57 minutes, costing $227.
|
|
118
|
+
'''
|
|
119
|
+
|
|
120
|
+
flight = lf.parse(text, Flight, lm=lf.llms.Gemini25Flash())
|
|
121
|
+
assert flight.airline == 'United Airlines'
|
|
122
|
+
assert flight.duration.hours == 7
|
|
123
|
+
```
|
|
125
124
|
|
|
126
125
|
Args:
|
|
127
126
|
message: A `lf.Message` object or a string as the natural language input.
|
|
128
127
|
It provides the complete context for the parsing.
|
|
129
|
-
schema: A `lf.
|
|
130
|
-
default: The default value if parsing
|
|
131
|
-
be raised
|
|
128
|
+
schema: A `lf.Schema` object or equivalent annotations.
|
|
129
|
+
default: The default value to return if parsing fails. If
|
|
130
|
+
`lf.RAISE_IF_HAS_ERROR` is used (default), an error will be raised
|
|
131
|
+
instead.
|
|
132
132
|
user_prompt: An optional user prompt as the description or ask for the
|
|
133
|
-
message, which
|
|
133
|
+
message, which provides more context for parsing.
|
|
134
134
|
lm: The language model to use. If not specified, the language model from
|
|
135
135
|
`lf.context` context manager will be used.
|
|
136
|
-
examples: An optional list of fewshot examples for
|
|
137
|
-
|
|
136
|
+
examples: An optional list of fewshot examples for guiding parsing. If None,
|
|
137
|
+
default examples will be used.
|
|
138
138
|
include_context: If True, include the request sent to LLM for obtaining the
|
|
139
|
-
response to
|
|
139
|
+
response to parse. Otherwise include only the response.
|
|
140
140
|
cache_seed: Seed for computing cache key. The cache key is determined by a
|
|
141
141
|
tuple of (lm, prompt, cache seed). If None, cache will be disabled for
|
|
142
142
|
the query even cache is configured by the LM.
|
|
@@ -146,10 +146,10 @@ def parse(
|
|
|
146
146
|
`autofix_lm` from `lf.context` context manager will be used. Otherwise it
|
|
147
147
|
will use `lm`.
|
|
148
148
|
protocol: The protocol for schema/value representation. Applicable values
|
|
149
|
-
are 'json' and 'python'. By default 'python' will be used
|
|
149
|
+
are 'json' and 'python'. By default 'python' will be used.
|
|
150
150
|
returns_message: If True, returns `lf.Message` as the output, instead of
|
|
151
151
|
returning the structured `message.result`.
|
|
152
|
-
**kwargs: Keyword arguments passed to the `
|
|
152
|
+
**kwargs: Keyword arguments passed to the `_ParseStructure`
|
|
153
153
|
transform.
|
|
154
154
|
|
|
155
155
|
Returns:
|
|
@@ -198,7 +198,7 @@ async def aparse(
|
|
|
198
198
|
cache_seed: int | None = 0,
|
|
199
199
|
autofix: int = 0,
|
|
200
200
|
autofix_lm: lf.LanguageModel | None = None,
|
|
201
|
-
protocol:
|
|
201
|
+
protocol: str = 'python',
|
|
202
202
|
returns_message: bool = False,
|
|
203
203
|
**kwargs,
|
|
204
204
|
) -> Any:
|
|
@@ -223,7 +223,7 @@ async def aparse(
|
|
|
223
223
|
|
|
224
224
|
|
|
225
225
|
def call(
|
|
226
|
-
prompt: str
|
|
226
|
+
prompt: Union[str, lf.Template, lf.Message],
|
|
227
227
|
schema: Union[
|
|
228
228
|
None, schema_lib.Schema, Type[Any], list[Type[Any]], dict[str, Any]
|
|
229
229
|
] = None,
|
|
@@ -236,31 +236,47 @@ def call(
|
|
|
236
236
|
autofix: int = 0,
|
|
237
237
|
autofix_lm: lf.LanguageModel | None = None,
|
|
238
238
|
response_postprocess: Callable[[str], str] | None = None,
|
|
239
|
-
protocol:
|
|
239
|
+
protocol: str = 'python',
|
|
240
240
|
returns_message: bool = False,
|
|
241
241
|
**kwargs,
|
|
242
242
|
) -> Any:
|
|
243
|
-
"""
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
243
|
+
"""Calls a language model and parses the response according to a schema.
|
|
244
|
+
|
|
245
|
+
`lf.call` first calls a language model with a prompt to obtain a natural
|
|
246
|
+
language response, then calls the language model again to parse this
|
|
247
|
+
response into a structured format defined by `schema`. If `schema` is not
|
|
248
|
+
provided, it returns the raw natural language response.
|
|
249
|
+
|
|
250
|
+
**Example:**
|
|
251
|
+
|
|
252
|
+
1. **Call with a Natural Language Prompt**:
|
|
253
|
+
By default, `lf.call` with a string prompt returns a natural language
|
|
254
|
+
response:
|
|
255
|
+
```python
|
|
256
|
+
r = lf.call('Compute one plus one', lm=lf.llms.Gpt4())
|
|
257
|
+
print(r)
|
|
258
|
+
# Output: 2
|
|
259
|
+
```
|
|
260
|
+
|
|
261
|
+
2. **Call with Structured Output**:
|
|
262
|
+
If `schema` is provided, `lf.call` parses the LLM response into the
|
|
263
|
+
specified schema using a second LM call:
|
|
264
|
+
```python
|
|
265
|
+
r = lf.call('Compute one plus one', int, lm=lf.llms.Gpt4())
|
|
266
|
+
print(r)
|
|
267
|
+
# Output: 2
|
|
268
|
+
```
|
|
269
|
+
|
|
270
|
+
3. **Call with Templated Prompt**:
|
|
271
|
+
The prompt can be a template string with placeholders (e.g., `{{x}}`,
|
|
272
|
+
`{{y}}`), whose values are provided as keyword arguments:
|
|
273
|
+
```python
|
|
274
|
+
r = lf.call(
|
|
275
|
+
'Compute {{x}} plus {{y}}',
|
|
276
|
+
int, x='one', y='one', lm=lf.llms.Gpt4())
|
|
277
|
+
print(r)
|
|
278
|
+
# Output: 2
|
|
279
|
+
```
|
|
264
280
|
|
|
265
281
|
Args:
|
|
266
282
|
prompt: User prompt that will be sent to LM, which could be a string or a
|
|
@@ -272,10 +288,10 @@ def call(
|
|
|
272
288
|
If not specified, `lm` from `lf.context` context manager will be used.
|
|
273
289
|
parsing_lm: Language model that will be used for parsing. If None, the `lm`
|
|
274
290
|
for prompting the LM will be used.
|
|
275
|
-
parsing_examples: Examples for parsing the output. If None,
|
|
276
|
-
|
|
291
|
+
parsing_examples: Examples for parsing the output. If None, no examples
|
|
292
|
+
will be used for parsing.
|
|
277
293
|
parsing_include_context: If True, include the request sent to LLM for
|
|
278
|
-
obtaining the response to
|
|
294
|
+
obtaining the response to parse. Otherwise include only the response.
|
|
279
295
|
cache_seed: Seed for computing cache key. The cache key is determined by a
|
|
280
296
|
tuple of (lm, prompt, cache seed). If None, cache will be disabled for
|
|
281
297
|
the query even cache is configured by the LM.
|
|
@@ -284,10 +300,10 @@ def call(
|
|
|
284
300
|
autofix_lm: The language model to use for autofix. If not specified, the
|
|
285
301
|
`autofix_lm` from `lf.context` context manager will be used. Otherwise it
|
|
286
302
|
will use `parsing_lm`.
|
|
287
|
-
response_postprocess: A callback function to post
|
|
303
|
+
response_postprocess: A callback function to post-process the text response
|
|
288
304
|
before sending for parsing.
|
|
289
305
|
protocol: The protocol for schema/value representation. Applicable values
|
|
290
|
-
are 'json' and 'python'. By default 'python' will be used
|
|
306
|
+
are 'json' and 'python'. By default 'python' will be used.
|
|
291
307
|
returns_message: If True, return a `lf.Message` object instead of its text
|
|
292
308
|
or result.
|
|
293
309
|
**kwargs: Keyword arguments. Including options that control the calling
|
|
@@ -351,7 +367,7 @@ async def acall(
|
|
|
351
367
|
autofix: int = 0,
|
|
352
368
|
autofix_lm: lf.LanguageModel | None = None,
|
|
353
369
|
response_postprocess: Callable[[str], str] | None = None,
|
|
354
|
-
protocol:
|
|
370
|
+
protocol: str = 'python',
|
|
355
371
|
returns_message: bool = False,
|
|
356
372
|
**kwargs,
|
|
357
373
|
) -> Any:
|
|
@@ -376,7 +392,7 @@ async def acall(
|
|
|
376
392
|
|
|
377
393
|
|
|
378
394
|
def _parse_structure_cls(
|
|
379
|
-
protocol:
|
|
395
|
+
protocol: str,
|
|
380
396
|
) -> Type[_ParseStructure]:
|
|
381
397
|
if protocol == 'json':
|
|
382
398
|
return _ParseStructureJson
|
|
@@ -387,7 +403,7 @@ def _parse_structure_cls(
|
|
|
387
403
|
|
|
388
404
|
|
|
389
405
|
def default_parse_examples() -> list[mapping.MappingExample]:
|
|
390
|
-
"""
|
|
406
|
+
"""Returns default parsing examples."""
|
|
391
407
|
|
|
392
408
|
class AdditionResults(pg.Object):
|
|
393
409
|
one_plus_one_equals: int | None
|
|
@@ -745,9 +745,6 @@ class CallTest(unittest.TestCase):
|
|
|
745
745
|
parsing.call('what is one plus two?', int, lm=lm, autofix=3), 3
|
|
746
746
|
)
|
|
747
747
|
|
|
748
|
-
def test_call_with_structured_input(self):
|
|
749
|
-
self.assertEqual(parsing.call(1, lm=fake.StaticResponse('2')), '2')
|
|
750
|
-
|
|
751
748
|
def test_call_with_response_postprocess(self):
|
|
752
749
|
target_str = '@TARGET_STR@'
|
|
753
750
|
random_str = '!RANDOM_STR!'
|