langfun 0.1.2.dev202511040805__py3-none-any.whl → 0.1.2.dev202511050805__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of langfun might be problematic. Click here for more details.
- langfun/core/agentic/action.py +76 -9
- langfun/core/agentic/action_eval.py +9 -2
- langfun/core/async_support.py +32 -3
- langfun/core/coding/python/correction.py +19 -9
- langfun/core/coding/python/execution.py +14 -12
- langfun/core/coding/python/generation.py +21 -16
- langfun/core/coding/python/sandboxing.py +23 -3
- langfun/core/component.py +42 -3
- langfun/core/concurrent.py +70 -6
- langfun/core/console.py +1 -1
- langfun/core/data/conversion/anthropic.py +10 -3
- langfun/core/data/conversion/gemini.py +9 -2
- langfun/core/data/conversion/openai.py +17 -7
- langfun/core/eval/base.py +46 -42
- langfun/core/eval/matching.py +5 -2
- langfun/core/eval/patching.py +3 -3
- langfun/core/eval/scoring.py +4 -3
- langfun/core/eval/v2/checkpointing.py +30 -4
- langfun/core/eval/v2/evaluation.py +59 -13
- langfun/core/eval/v2/example.py +22 -11
- langfun/core/eval/v2/experiment.py +51 -8
- langfun/core/eval/v2/metric_values.py +23 -3
- langfun/core/eval/v2/metrics.py +33 -4
- langfun/core/eval/v2/progress.py +9 -1
- langfun/core/eval/v2/reporting.py +15 -1
- langfun/core/eval/v2/runners.py +27 -7
- langfun/core/langfunc.py +45 -130
- langfun/core/language_model.py +88 -10
- langfun/core/llms/anthropic.py +27 -2
- langfun/core/llms/azure_openai.py +29 -17
- langfun/core/llms/cache/base.py +22 -2
- langfun/core/llms/cache/in_memory.py +48 -7
- langfun/core/llms/compositional.py +25 -1
- langfun/core/llms/deepseek.py +29 -1
- langfun/core/llms/fake.py +32 -1
- langfun/core/llms/gemini.py +9 -1
- langfun/core/llms/google_genai.py +29 -1
- langfun/core/llms/groq.py +27 -2
- langfun/core/llms/llama_cpp.py +22 -3
- langfun/core/llms/openai.py +29 -1
- langfun/core/llms/openai_compatible.py +18 -6
- langfun/core/llms/rest.py +12 -1
- langfun/core/llms/vertexai.py +39 -6
- langfun/core/logging.py +1 -1
- langfun/core/mcp/client.py +77 -22
- langfun/core/mcp/session.py +90 -10
- langfun/core/mcp/tool.py +83 -23
- langfun/core/memory.py +1 -0
- langfun/core/message.py +59 -12
- langfun/core/message_test.py +3 -0
- langfun/core/modalities/audio.py +21 -1
- langfun/core/modalities/image.py +19 -1
- langfun/core/modalities/mime.py +45 -2
- langfun/core/modalities/pdf.py +19 -1
- langfun/core/modalities/video.py +21 -1
- langfun/core/modality.py +66 -5
- langfun/core/natural_language.py +1 -1
- langfun/core/sampling.py +4 -4
- langfun/core/structured/completion.py +32 -37
- langfun/core/structured/description.py +54 -50
- langfun/core/structured/function_generation.py +29 -12
- langfun/core/structured/mapping.py +70 -15
- langfun/core/structured/parsing.py +90 -74
- langfun/core/structured/querying.py +201 -130
- langfun/core/structured/schema.py +70 -10
- langfun/core/structured/schema_generation.py +33 -14
- langfun/core/structured/scoring.py +45 -34
- langfun/core/structured/tokenization.py +24 -9
- langfun/core/subscription.py +2 -2
- langfun/core/template.py +132 -35
- langfun/core/template_test.py +22 -0
- {langfun-0.1.2.dev202511040805.dist-info → langfun-0.1.2.dev202511050805.dist-info}/METADATA +1 -1
- {langfun-0.1.2.dev202511040805.dist-info → langfun-0.1.2.dev202511050805.dist-info}/RECORD +76 -76
- {langfun-0.1.2.dev202511040805.dist-info → langfun-0.1.2.dev202511050805.dist-info}/WHEEL +0 -0
- {langfun-0.1.2.dev202511040805.dist-info → langfun-0.1.2.dev202511050805.dist-info}/licenses/LICENSE +0 -0
- {langfun-0.1.2.dev202511040805.dist-info → langfun-0.1.2.dev202511050805.dist-info}/top_level.txt +0 -0
|
@@ -274,7 +274,7 @@ class _LfQueryPythonV2(LfQuery):
|
|
|
274
274
|
|
|
275
275
|
|
|
276
276
|
def query(
|
|
277
|
-
prompt: Union[str, lf.Template, Any],
|
|
277
|
+
prompt: Union[str, lf.Template, lf.Message, Any],
|
|
278
278
|
schema: schema_lib.SchemaType | None = None,
|
|
279
279
|
default: Any = lf.RAISE_IF_HAS_ERROR,
|
|
280
280
|
*,
|
|
@@ -298,119 +298,124 @@ def query(
|
|
|
298
298
|
supporting natural language prompts, structured inputs, and multiple advanced
|
|
299
299
|
features.
|
|
300
300
|
|
|
301
|
-
Key Features
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
301
|
+
**Key Features:**
|
|
302
|
+
|
|
303
|
+
* **Input**: Accepts natural language strings, structured inputs (e.g.,
|
|
304
|
+
`pg.Object`), templates (`lf.Template`) with modality objects, messages (
|
|
305
|
+
`lf.Message`) with modality objects, or objects that can be converted to
|
|
306
|
+
`lf.Message` (see `lf.Message.from_value` for details).
|
|
307
|
+
* **Output**: Returns structured outputs when `schema` is specified;
|
|
308
|
+
otherwise, outputs raw natural language (as a string).
|
|
309
|
+
* **Few-shot examples**: Supports structured few-shot examples with the
|
|
310
|
+
`examples` argument.
|
|
311
|
+
* **Multi-LM fan-out**: Sends queries to multiple language models for
|
|
312
|
+
multiple samples in parallel, returning a list of outputs.
|
|
313
|
+
|
|
314
|
+
**Basic Usage:**
|
|
315
|
+
|
|
316
|
+
1. **Natural Language Query**:
|
|
317
|
+
If `schema` is not provided, `lf.query` returns a natural language
|
|
318
|
+
response:
|
|
319
|
+
```python
|
|
320
|
+
r = lf.query('1 + 1 = ?', lm=lf.llms.Gemini25Flash())
|
|
321
|
+
print(r)
|
|
322
|
+
# Output: 2
|
|
323
|
+
```
|
|
324
|
+
|
|
325
|
+
2. **Structured Output**:
|
|
326
|
+
If `schema` is provided, `lf.query` guides LLM to directly generate
|
|
327
|
+
response according to the specified schema, it then parses the response
|
|
328
|
+
into a Python object:
|
|
329
|
+
```python
|
|
330
|
+
r = lf.query('1 + 1 = ?', int, lm=lf.llms.Gemini25Flash())
|
|
331
|
+
print(r)
|
|
332
|
+
# Output: 2
|
|
333
|
+
```
|
|
334
|
+
|
|
335
|
+
**Advanced Usage:**
|
|
336
|
+
|
|
337
|
+
1. **Structured Input**:
|
|
338
|
+
Besides natural language, `prompt` can be a `pg.Object`, whose symbolic
|
|
339
|
+
representation will be sent to the LLM:
|
|
340
|
+
```python
|
|
341
|
+
class Sum(pg.Object):
|
|
342
|
+
a: int
|
|
343
|
+
b: int
|
|
344
|
+
r = lf.query(Sum(1, 1), int, lm=lf.llms.Gemini25Flash())
|
|
345
|
+
print(r)
|
|
346
|
+
# Output: 2
|
|
347
|
+
```
|
|
348
|
+
|
|
349
|
+
2. **Multi-Modal Input**:
|
|
350
|
+
`lf.query` supports prompts containing multi-modal inputs, such as images
|
|
351
|
+
or audio, by embedding modality objects within a template string:
|
|
352
|
+
```python
|
|
353
|
+
image = lf.Image.from_path('/path/to/image.png')
|
|
354
|
+
r = lf.query(
|
|
355
|
+
'what is in the {{image}}?',
|
|
356
|
+
str,
|
|
357
|
+
image=image,
|
|
358
|
+
lm=lf.llms.Gemini25Flash()
|
|
359
|
+
)
|
|
360
|
+
print(r)
|
|
361
|
+
# Output: A cat sitting on a sofa.
|
|
362
|
+
```
|
|
363
|
+
|
|
364
|
+
3. **Few-Shot Examples**:
|
|
365
|
+
You can provide few-shot examples to guide model behavior using the
|
|
366
|
+
`examples` argument. Each example is an `lf.MappingExample` containing
|
|
367
|
+
`input`, `output`, and, if needed, `schema`.
|
|
368
|
+
```python
|
|
369
|
+
class Sentiment(pg.Object):
|
|
370
|
+
sentiment: Literal['positive', 'negative', 'neutral']
|
|
371
|
+
reason: str
|
|
372
|
+
|
|
373
|
+
r = lf.query(
|
|
374
|
+
'I love this movie!',
|
|
375
|
+
Sentiment,
|
|
376
|
+
examples=[
|
|
377
|
+
lf.MappingExample(
|
|
378
|
+
'This movie is terrible.',
|
|
379
|
+
Sentiment(sentiment='negative', reason='The plot is boring.')
|
|
380
|
+
),
|
|
381
|
+
lf.MappingExample(
|
|
382
|
+
'It is okay.',
|
|
383
|
+
Sentiment(sentiment='neutral', reason='The movie is average.')
|
|
384
|
+
),
|
|
385
|
+
],
|
|
386
|
+
lm=lf.llms.Gemini25Flash())
|
|
387
|
+
print(r)
|
|
388
|
+
# Output:
|
|
389
|
+
# Sentiment(
|
|
390
|
+
# sentiment='positive',
|
|
391
|
+
# reason='The user expresses positive feedback.')
|
|
392
|
+
# )
|
|
393
|
+
```
|
|
394
|
+
|
|
395
|
+
4. **Multi-LM Fan-Out**:
|
|
396
|
+
`lf.query` can concurrently query multiple language models by providing
|
|
397
|
+
a list of LMs to the `lm` argument and specifying the number of samples
|
|
398
|
+
for each with `num_samples`.
|
|
399
|
+
```python
|
|
400
|
+
r = lf.query(
|
|
401
|
+
'1 + 1 = ?',
|
|
402
|
+
int,
|
|
403
|
+
lm=[lf.llms.Gemini25Flash(), lf.llms.Gemini()],
|
|
404
|
+
num_samples=[1, 2])
|
|
405
|
+
print(r)
|
|
406
|
+
# Output: [2, 2, 2]
|
|
407
|
+
```
|
|
404
408
|
|
|
405
409
|
Args:
|
|
406
410
|
prompt: The input query. Can be:
|
|
407
411
|
- A natural language string (supports templating with `{{}}`),
|
|
408
|
-
- A `pg.Object`
|
|
412
|
+
- A `pg.Object` for structured input,
|
|
409
413
|
- An `lf.Template` for mixed or template-based inputs.
|
|
410
|
-
schema: Type annotation or `lf.Schema` object for the expected output.
|
|
414
|
+
schema: Type annotation or `lf.Schema` object for the expected output.
|
|
411
415
|
If `None` (default), the response will be a natural language string.
|
|
412
|
-
default:
|
|
413
|
-
error will be raised
|
|
416
|
+
default: The default value to return if parsing fails. If
|
|
417
|
+
`lf.RAISE_IF_HAS_ERROR` is used (default), an error will be raised
|
|
418
|
+
instead.
|
|
414
419
|
lm: The language model(s) to query. Can be:
|
|
415
420
|
- A single `LanguageModel`,
|
|
416
421
|
- A list of `LanguageModel`s for multi-model fan-out.
|
|
@@ -430,18 +435,18 @@ def query(
|
|
|
430
435
|
from `lf.context` or the main `lm`.
|
|
431
436
|
protocol: Format for schema representation. Builtin choices are `'json'` or
|
|
432
437
|
`'python'`, users could extend with their own protocols by subclassing
|
|
433
|
-
`lf.structured.LfQuery
|
|
438
|
+
`lf.structured.LfQuery`. Also protocol could be specified with a version
|
|
434
439
|
in the format of 'protocol:version', e.g., 'python:1.0', so users could
|
|
435
440
|
use a specific version of the prompt based on the protocol. Please see the
|
|
436
441
|
documentation of `LfQuery` for more details. If None, the protocol from
|
|
437
442
|
context manager `lf.query_protocol` will be used, or 'python' if not
|
|
438
443
|
specified.
|
|
439
|
-
returns_message:
|
|
444
|
+
returns_message: If `True`, returns an `lf.Message` object instead of
|
|
440
445
|
the final parsed result.
|
|
441
|
-
skip_lm: If `True`, skips the LLM call and returns the rendered
|
|
446
|
+
skip_lm: If `True`, skips the LLM call and returns the rendered
|
|
442
447
|
prompt as a `UserMessage` object.
|
|
443
448
|
invocation_id: The ID of the query invocation, which will be passed to
|
|
444
|
-
`lf.QueryInvocation
|
|
449
|
+
`lf.QueryInvocation`. If `None`, a unique ID will
|
|
445
450
|
be generated.
|
|
446
451
|
**kwargs: Additional keyword arguments for:
|
|
447
452
|
- Rendering templates (e.g., `template_str`, `preamble`),
|
|
@@ -453,7 +458,7 @@ def query(
|
|
|
453
458
|
Returns:
|
|
454
459
|
The result of the query:
|
|
455
460
|
- A single output or a list of outputs if multiple models/samples are used.
|
|
456
|
-
- Each output is a parsed object matching `schema`, an `lf.Message` (if
|
|
461
|
+
- Each output is a parsed object matching `schema`, an `lf.Message` (if
|
|
457
462
|
`returns_message=True`), or a natural language string (default).
|
|
458
463
|
"""
|
|
459
464
|
# Internal usage logging.
|
|
@@ -671,7 +676,7 @@ def query(
|
|
|
671
676
|
|
|
672
677
|
|
|
673
678
|
async def aquery(
|
|
674
|
-
prompt: Union[str, lf.Template, Any],
|
|
679
|
+
prompt: Union[str, lf.Template, lf.Message, Any],
|
|
675
680
|
schema: schema_lib.SchemaType | None = None,
|
|
676
681
|
default: Any = lf.RAISE_IF_HAS_ERROR,
|
|
677
682
|
*,
|
|
@@ -727,7 +732,7 @@ def query_protocol(protocol: str) -> Iterator[None]:
|
|
|
727
732
|
|
|
728
733
|
|
|
729
734
|
def query_and_reduce(
|
|
730
|
-
prompt: Union[str, lf.Template, Any],
|
|
735
|
+
prompt: Union[str, lf.Template, lf.Message, Any],
|
|
731
736
|
schema: schema_lib.SchemaType | None = None,
|
|
732
737
|
*,
|
|
733
738
|
reduce: Callable[[list[Any]], Any],
|
|
@@ -736,12 +741,12 @@ def query_and_reduce(
|
|
|
736
741
|
**kwargs,
|
|
737
742
|
) -> Any:
|
|
738
743
|
"""Issues multiple `lf.query` calls in parallel and reduce the outputs.
|
|
739
|
-
|
|
744
|
+
|
|
740
745
|
Args:
|
|
741
746
|
prompt: A str (may contain {{}} as template) as natural language input, or a
|
|
742
747
|
`pg.Symbolic` object as structured input as prompt to LLM.
|
|
743
|
-
schema: A type annotation as the schema for output object. If
|
|
744
|
-
the response will be a str in natural language.
|
|
748
|
+
schema: A type annotation as the schema for output object. If None
|
|
749
|
+
(default), the response will be a str in natural language.
|
|
745
750
|
reduce: A function to reduce the outputs of multiple `lf.query` calls. It
|
|
746
751
|
takes a list of outputs and returns the final object.
|
|
747
752
|
lm: The language model to use. If not specified, the language model from
|
|
@@ -765,11 +770,34 @@ def query_and_reduce(
|
|
|
765
770
|
|
|
766
771
|
|
|
767
772
|
def query_prompt(
|
|
768
|
-
prompt: Union[str, lf.Template, Any],
|
|
773
|
+
prompt: Union[str, lf.Template, lf.Message, Any],
|
|
769
774
|
schema: schema_lib.SchemaType | None = None,
|
|
770
775
|
**kwargs,
|
|
771
776
|
) -> lf.Message:
|
|
772
|
-
"""
|
|
777
|
+
"""Renders the prompt message for `lf.query` without calling the LLM.
|
|
778
|
+
|
|
779
|
+
This function simulates the prompt generation step of `lf.query`,
|
|
780
|
+
producing the `lf.Message` object that would be sent to the language model.
|
|
781
|
+
It is useful for debugging prompts or inspecting how inputs are formatted.
|
|
782
|
+
|
|
783
|
+
**Example:**
|
|
784
|
+
|
|
785
|
+
```python
|
|
786
|
+
import langfun as lf
|
|
787
|
+
|
|
788
|
+
prompt_message = lf.query_prompt('1 + 1 = ?', schema=int)
|
|
789
|
+
print(prompt_message.text)
|
|
790
|
+
```
|
|
791
|
+
|
|
792
|
+
Args:
|
|
793
|
+
prompt: The user prompt, which can be a string, `lf.Template`, or any
|
|
794
|
+
serializable object.
|
|
795
|
+
schema: The target schema for the query, used for prompt formatting.
|
|
796
|
+
**kwargs: Additional keyword arguments to pass to `lf.query`.
|
|
797
|
+
|
|
798
|
+
Returns:
|
|
799
|
+
The rendered `lf.Message` object.
|
|
800
|
+
"""
|
|
773
801
|
# Delay import to avoid circular dependency in Colab.
|
|
774
802
|
# llms > data/conversion > structured > querying
|
|
775
803
|
from langfun.core.llms import fake # pylint: disable=g-import-not-at-top
|
|
@@ -791,7 +819,39 @@ def query_output(
|
|
|
791
819
|
schema: schema_lib.SchemaType | None = None,
|
|
792
820
|
**kwargs,
|
|
793
821
|
) -> Any:
|
|
794
|
-
"""
|
|
822
|
+
"""Parses a raw LLM response based on a schema, as `lf.query` would.
|
|
823
|
+
|
|
824
|
+
This function simulates the output processing part of `lf.query`, taking
|
|
825
|
+
a raw response from a language model and parsing it into the desired schema.
|
|
826
|
+
It is useful for reprocessing LLM responses or for testing parsing and
|
|
827
|
+
auto-fixing logic independently of LLM calls.
|
|
828
|
+
|
|
829
|
+
**Example:**
|
|
830
|
+
|
|
831
|
+
```python
|
|
832
|
+
import langfun as lf
|
|
833
|
+
|
|
834
|
+
# Output when schema is provided.
|
|
835
|
+
structured_output = lf.query_output('2', schema=int)
|
|
836
|
+
print(structured_output)
|
|
837
|
+
# Output: 2
|
|
838
|
+
|
|
839
|
+
# Output when no schema is provided.
|
|
840
|
+
raw_output = lf.query_output('The answer is 2.')
|
|
841
|
+
print(raw_output)
|
|
842
|
+
# Output: The answer is 2.
|
|
843
|
+
```
|
|
844
|
+
|
|
845
|
+
Args:
|
|
846
|
+
response: The raw response from an LLM, as a string or `lf.Message`.
|
|
847
|
+
schema: The target schema to parse the response into. If `None`, the
|
|
848
|
+
response text is returned.
|
|
849
|
+
**kwargs: Additional keyword arguments to pass to `lf.query` for parsing
|
|
850
|
+
(e.g., `autofix`, `default`).
|
|
851
|
+
|
|
852
|
+
Returns:
|
|
853
|
+
The parsed object if schema is provided, or the response text otherwise.
|
|
854
|
+
"""
|
|
795
855
|
# Delay import to avoid circular dependency in Colab.
|
|
796
856
|
# llms > data/conversion > structured > querying
|
|
797
857
|
from langfun.core.llms import fake # pylint: disable=g-import-not-at-top
|
|
@@ -812,7 +872,7 @@ def query_reward(
|
|
|
812
872
|
mapping_example: Union[str, mapping.MappingExample],
|
|
813
873
|
response: Union[str, lf.Message],
|
|
814
874
|
) -> float | None:
|
|
815
|
-
"""Returns the reward of an LLM response based on
|
|
875
|
+
"""Returns the reward of an LLM response based on a mapping example."""
|
|
816
876
|
if isinstance(mapping_example, str):
|
|
817
877
|
mapping_example = pg.from_json_str(mapping_example)
|
|
818
878
|
assert isinstance(mapping_example, mapping.MappingExample), mapping_example
|
|
@@ -1250,17 +1310,28 @@ def track_queries(
|
|
|
1250
1310
|
start_callback: Callable[[QueryInvocation], None] | None = None,
|
|
1251
1311
|
end_callback: Callable[[QueryInvocation], None] | None = None,
|
|
1252
1312
|
) -> Iterator[list[QueryInvocation]]:
|
|
1253
|
-
"""
|
|
1313
|
+
"""Tracks all `lf.query` calls made within a `with` block.
|
|
1254
1314
|
|
|
1255
|
-
|
|
1315
|
+
`lf.track_queries` is useful for inspecting LLM inputs and outputs,
|
|
1316
|
+
debugging, and analyzing model behavior. It returns a list of
|
|
1317
|
+
`lf.QueryInvocation` objects, each containing detailed information about
|
|
1318
|
+
a query, such as the input prompt, schema, LLM request/response,
|
|
1319
|
+
and any errors encountered.
|
|
1256
1320
|
|
|
1257
|
-
|
|
1258
|
-
with lf.track_queries() as queries:
|
|
1259
|
-
lf.query('hi', lm=lm)
|
|
1260
|
-
lf.query('What is this {{image}}?', lm=lm, image=image)
|
|
1321
|
+
**Example:**
|
|
1261
1322
|
|
|
1262
|
-
|
|
1263
|
-
|
|
1323
|
+
```python
|
|
1324
|
+
import langfun as lf
|
|
1325
|
+
|
|
1326
|
+
with lf.track_queries() as queries:
|
|
1327
|
+
lf.query('1 + 1 = ?', lm=lf.llms.Gemini25Flash())
|
|
1328
|
+
lf.query('Hello!', lm=lf.llms.Gemini25Flash())
|
|
1329
|
+
|
|
1330
|
+
# Print recorded queries
|
|
1331
|
+
for query in queries:
|
|
1332
|
+
print(query.lm_request)
|
|
1333
|
+
print(query.lm_response)
|
|
1334
|
+
```
|
|
1264
1335
|
|
|
1265
1336
|
Args:
|
|
1266
1337
|
include_child_scopes: If True, the queries made in child scopes will be
|
|
@@ -33,12 +33,12 @@ def include_method_in_prompt(method):
|
|
|
33
33
|
|
|
34
34
|
|
|
35
35
|
def should_include_method_in_prompt(method):
|
|
36
|
-
"""Returns
|
|
36
|
+
"""Returns True if the method should be shown in the prompt."""
|
|
37
37
|
return getattr(method, '__show_in_prompt__', False)
|
|
38
38
|
|
|
39
39
|
|
|
40
40
|
def parse_value_spec(value) -> pg.typing.ValueSpec:
|
|
41
|
-
"""Parses a PyGlove ValueSpec
|
|
41
|
+
"""Parses a PyGlove ValueSpec equivalent into a ValueSpec."""
|
|
42
42
|
if isinstance(value, pg.typing.ValueSpec):
|
|
43
43
|
return value
|
|
44
44
|
|
|
@@ -121,7 +121,67 @@ class Schema(
|
|
|
121
121
|
pg.Object,
|
|
122
122
|
pg.views.HtmlTreeView.Extension
|
|
123
123
|
):
|
|
124
|
-
"""
|
|
124
|
+
"""Schema for structured inputs and outputs.
|
|
125
|
+
|
|
126
|
+
`lf.Schema` provides a unified representation for defining the output schema
|
|
127
|
+
used in Langfun's structured operations like `lf.query`, `lf.parse`,
|
|
128
|
+
`lf.complete`, and `lf.describe`. It acts as an abstraction layer,
|
|
129
|
+
allowing schemas to be defined using Python type annotations, `pg.Object`
|
|
130
|
+
classes, or dictionaries, and then converting them into a format that
|
|
131
|
+
language models can understand.
|
|
132
|
+
|
|
133
|
+
`lf.Schema` can be created from various types using `lf.Schema.from_value`:
|
|
134
|
+
* Built-in types: `int`, `str`, `bool`, `float`
|
|
135
|
+
* Typing constructs: `list`, `dict`, `typing.Union`, `typing.Literal`,
|
|
136
|
+
`typing.Optional`
|
|
137
|
+
* PyGlove classes: `pg.Object` subclasses
|
|
138
|
+
|
|
139
|
+
**1. Creating a Schema:**
|
|
140
|
+
|
|
141
|
+
```python
|
|
142
|
+
import langfun as lf
|
|
143
|
+
import pyglove as pg
|
|
144
|
+
from typing import Literal, Union
|
|
145
|
+
|
|
146
|
+
# From a basic type
|
|
147
|
+
int_schema = lf.Schema.from_value(int)
|
|
148
|
+
|
|
149
|
+
# From a list type
|
|
150
|
+
list_schema = lf.Schema.from_value(list[int])
|
|
151
|
+
|
|
152
|
+
# From a dictionary
|
|
153
|
+
dict_schema = lf.Schema.from_value(dict(a=int, b=str))
|
|
154
|
+
|
|
155
|
+
# From pg.Object
|
|
156
|
+
class Point(pg.Object):
|
|
157
|
+
x: int
|
|
158
|
+
y: int
|
|
159
|
+
point_schema = lf.Schema.from_value(Point)
|
|
160
|
+
|
|
161
|
+
# From Union or Literal
|
|
162
|
+
union_schema = lf.Schema.from_value(Union[int, str])
|
|
163
|
+
literal_schema = lf.Schema.from_value(Literal['A', 'B'])
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
**2. Schema Representation:**
|
|
167
|
+
Once created, a schema object can represent itself in different formats,
|
|
168
|
+
such as Python-like syntax or JSON, which is used in prompts to LLMs.
|
|
169
|
+
|
|
170
|
+
```python
|
|
171
|
+
print(point_schema.repr('python'))
|
|
172
|
+
# Output:
|
|
173
|
+
# class Point:
|
|
174
|
+
# x: int
|
|
175
|
+
# y: int
|
|
176
|
+
|
|
177
|
+
print(dict_schema.repr('json'))
|
|
178
|
+
# Output:
|
|
179
|
+
# {
|
|
180
|
+
# "a": "int",
|
|
181
|
+
# "b": "str"
|
|
182
|
+
# }
|
|
183
|
+
```
|
|
184
|
+
"""
|
|
125
185
|
|
|
126
186
|
spec: pg.typing.Annotated[
|
|
127
187
|
pg.typing.Object(pg.typing.ValueSpec, transform=parse_value_spec),
|
|
@@ -144,7 +204,7 @@ class Schema(
|
|
|
144
204
|
def parse(
|
|
145
205
|
self, text: str, protocol: SchemaProtocol = 'json', **kwargs
|
|
146
206
|
) -> Any:
|
|
147
|
-
"""
|
|
207
|
+
"""Parses a LM generated text into a structured value."""
|
|
148
208
|
value = value_repr(protocol).parse(text, self, **kwargs)
|
|
149
209
|
|
|
150
210
|
# TODO(daiyip): support autofix for schema error.
|
|
@@ -157,7 +217,7 @@ class Schema(
|
|
|
157
217
|
return self.schema_str()
|
|
158
218
|
|
|
159
219
|
def schema_dict(self) -> dict[str, Any]:
|
|
160
|
-
"""Returns the
|
|
220
|
+
"""Returns the dictionary representation of the schema."""
|
|
161
221
|
|
|
162
222
|
def _node(vs: pg.typing.ValueSpec) -> Any:
|
|
163
223
|
if isinstance(vs, pg.typing.PrimitiveType):
|
|
@@ -406,7 +466,7 @@ def class_definitions(
|
|
|
406
466
|
strict: bool = False,
|
|
407
467
|
markdown: bool = False,
|
|
408
468
|
) -> str | None:
|
|
409
|
-
"""Returns a
|
|
469
|
+
"""Returns a string for class definitions."""
|
|
410
470
|
if not classes:
|
|
411
471
|
return None
|
|
412
472
|
def_str = io.StringIO()
|
|
@@ -683,7 +743,7 @@ class ValueRepr(metaclass=abc.ABCMeta):
|
|
|
683
743
|
|
|
684
744
|
@abc.abstractmethod
|
|
685
745
|
def parse(self, text: str, schema: Schema | None = None, **kwargs) -> Any:
|
|
686
|
-
"""
|
|
746
|
+
"""Parses a LM generated text into a structured value."""
|
|
687
747
|
|
|
688
748
|
|
|
689
749
|
class ValuePythonRepr(ValueRepr):
|
|
@@ -739,7 +799,7 @@ class ValuePythonRepr(ValueRepr):
|
|
|
739
799
|
autofix_lm: lf.LanguageModel = lf.contextual(),
|
|
740
800
|
**kwargs,
|
|
741
801
|
) -> Any:
|
|
742
|
-
"""
|
|
802
|
+
"""Parses a Python string into a structured object."""
|
|
743
803
|
del kwargs
|
|
744
804
|
global_vars = additional_context or {}
|
|
745
805
|
if schema is not None:
|
|
@@ -820,7 +880,7 @@ class ValueJsonRepr(ValueRepr):
|
|
|
820
880
|
return pg.to_json_str(dict(result=value))
|
|
821
881
|
|
|
822
882
|
def parse(self, text: str, schema: Schema | None = None, **kwargs) -> Any:
|
|
823
|
-
"""
|
|
883
|
+
"""Parses a JSON string into a structured object."""
|
|
824
884
|
del schema
|
|
825
885
|
try:
|
|
826
886
|
text = cleanup_json(text)
|
|
@@ -837,7 +897,7 @@ class ValueJsonRepr(ValueRepr):
|
|
|
837
897
|
|
|
838
898
|
|
|
839
899
|
def cleanup_json(json_str: str) -> str:
|
|
840
|
-
"""
|
|
900
|
+
"""Cleans up the LM responded JSON string."""
|
|
841
901
|
# Treatments:
|
|
842
902
|
# 1. Extract the JSON string with a top-level dict from the response.
|
|
843
903
|
# This prevents the leading and trailing texts in the response to
|
|
@@ -90,16 +90,35 @@ def generate_class(
|
|
|
90
90
|
skip_lm: bool = False,
|
|
91
91
|
**kwargs,
|
|
92
92
|
) -> Type[Any] | lf.Message:
|
|
93
|
-
"""
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
93
|
+
"""Generates a Python class dynamically from a prompt using an LLM.
|
|
94
|
+
|
|
95
|
+
`lf.structured.generate_class` takes a class name and a natural language
|
|
96
|
+
description (prompt) and uses a language model to generate a Python class
|
|
97
|
+
(inheriting from `pg.Object`) that matches the description.
|
|
98
|
+
This is useful for creating structured data types on-the-fly based on
|
|
99
|
+
dynamic requirements.
|
|
100
|
+
|
|
101
|
+
**Example:**
|
|
102
|
+
|
|
103
|
+
```python
|
|
104
|
+
import langfun as lf
|
|
105
|
+
import pyglove as pg
|
|
106
|
+
|
|
107
|
+
trip_plan_cls = lf.structured.generate_class(
|
|
108
|
+
'TripPlan',
|
|
109
|
+
'A trip plan to visit San Francisco, including a list of destinations,'
|
|
110
|
+
'start date, end date, and total budget.',
|
|
111
|
+
lm=lf.llms.Gemini25Flash())
|
|
112
|
+
|
|
113
|
+
# This might generate a class like:
|
|
114
|
+
# class TripPlan(pg.Object):
|
|
115
|
+
# destinations: list[str]
|
|
116
|
+
# start_date: str
|
|
117
|
+
# end_date: str
|
|
118
|
+
# total_budget: float
|
|
119
|
+
|
|
120
|
+
print(lf.Schema.from_value(trip_plan_cls).schema_str('python'))
|
|
121
|
+
```
|
|
103
122
|
|
|
104
123
|
Args:
|
|
105
124
|
name: Class name to be generated.
|
|
@@ -108,17 +127,17 @@ def generate_class(
|
|
|
108
127
|
lm: The language model to use. If not specified, the language model from
|
|
109
128
|
`lf.context` context manager will be used.
|
|
110
129
|
examples: An optional list of fewshot examples for helping class generation.
|
|
111
|
-
If None, a default single
|
|
112
|
-
`lf.structured.classgen_example` to generate
|
|
130
|
+
If None, a default single-shot example will be used. Use
|
|
131
|
+
`lf.structured.classgen_example` to generate examples.
|
|
113
132
|
returns_message: If True, returns `lf.Message` as the output, instead of
|
|
114
133
|
returning the structured `message.result`.
|
|
115
134
|
skip_lm: If True, returns the rendered prompt as a UserMessage object.
|
|
116
|
-
otherwise
|
|
135
|
+
otherwise returns the LLM response based on the rendered prompt.
|
|
117
136
|
**kwargs: Template variables passed to `prompt` and keyword arguments passed
|
|
118
137
|
to `lf.structured.GenerateClass`.
|
|
119
138
|
|
|
120
139
|
Returns:
|
|
121
|
-
|
|
140
|
+
The generated Python class, or `lf.Message` if `returns_message` is True.
|
|
122
141
|
|
|
123
142
|
Raises:
|
|
124
143
|
CodeError: if generation failed.
|