langfun 0.1.2.dev202509020804__py3-none-any.whl → 0.1.2.dev202511110805__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of langfun might be problematic. Click here for more details.

Files changed (133) hide show
  1. langfun/__init__.py +1 -1
  2. langfun/core/__init__.py +6 -1
  3. langfun/core/agentic/__init__.py +4 -0
  4. langfun/core/agentic/action.py +412 -103
  5. langfun/core/agentic/action_eval.py +9 -2
  6. langfun/core/agentic/action_test.py +68 -6
  7. langfun/core/async_support.py +104 -5
  8. langfun/core/async_support_test.py +23 -0
  9. langfun/core/coding/python/correction.py +19 -9
  10. langfun/core/coding/python/execution.py +14 -12
  11. langfun/core/coding/python/generation.py +21 -16
  12. langfun/core/coding/python/sandboxing.py +23 -3
  13. langfun/core/component.py +42 -3
  14. langfun/core/concurrent.py +70 -6
  15. langfun/core/concurrent_test.py +9 -2
  16. langfun/core/console.py +1 -1
  17. langfun/core/data/conversion/anthropic.py +12 -3
  18. langfun/core/data/conversion/anthropic_test.py +8 -6
  19. langfun/core/data/conversion/gemini.py +9 -2
  20. langfun/core/data/conversion/gemini_test.py +12 -9
  21. langfun/core/data/conversion/openai.py +145 -31
  22. langfun/core/data/conversion/openai_test.py +161 -17
  23. langfun/core/eval/base.py +47 -43
  24. langfun/core/eval/base_test.py +4 -4
  25. langfun/core/eval/matching.py +5 -2
  26. langfun/core/eval/patching.py +3 -3
  27. langfun/core/eval/scoring.py +4 -3
  28. langfun/core/eval/v2/__init__.py +1 -0
  29. langfun/core/eval/v2/checkpointing.py +30 -4
  30. langfun/core/eval/v2/eval_test_helper.py +1 -1
  31. langfun/core/eval/v2/evaluation.py +60 -14
  32. langfun/core/eval/v2/example.py +22 -11
  33. langfun/core/eval/v2/experiment.py +51 -8
  34. langfun/core/eval/v2/metric_values.py +31 -3
  35. langfun/core/eval/v2/metric_values_test.py +32 -0
  36. langfun/core/eval/v2/metrics.py +39 -4
  37. langfun/core/eval/v2/metrics_test.py +14 -0
  38. langfun/core/eval/v2/progress.py +30 -1
  39. langfun/core/eval/v2/progress_test.py +27 -0
  40. langfun/core/eval/v2/progress_tracking_test.py +6 -0
  41. langfun/core/eval/v2/reporting.py +90 -71
  42. langfun/core/eval/v2/reporting_test.py +20 -6
  43. langfun/core/eval/v2/runners.py +27 -7
  44. langfun/core/eval/v2/runners_test.py +3 -0
  45. langfun/core/langfunc.py +45 -130
  46. langfun/core/langfunc_test.py +6 -4
  47. langfun/core/language_model.py +151 -31
  48. langfun/core/language_model_test.py +9 -3
  49. langfun/core/llms/__init__.py +12 -1
  50. langfun/core/llms/anthropic.py +157 -2
  51. langfun/core/llms/azure_openai.py +29 -17
  52. langfun/core/llms/cache/base.py +25 -3
  53. langfun/core/llms/cache/in_memory.py +48 -7
  54. langfun/core/llms/cache/in_memory_test.py +14 -4
  55. langfun/core/llms/compositional.py +25 -1
  56. langfun/core/llms/deepseek.py +30 -2
  57. langfun/core/llms/fake.py +39 -1
  58. langfun/core/llms/fake_test.py +9 -0
  59. langfun/core/llms/gemini.py +43 -7
  60. langfun/core/llms/google_genai.py +34 -1
  61. langfun/core/llms/groq.py +28 -3
  62. langfun/core/llms/llama_cpp.py +23 -4
  63. langfun/core/llms/openai.py +93 -3
  64. langfun/core/llms/openai_compatible.py +148 -27
  65. langfun/core/llms/openai_compatible_test.py +207 -20
  66. langfun/core/llms/openai_test.py +0 -2
  67. langfun/core/llms/rest.py +16 -1
  68. langfun/core/llms/vertexai.py +59 -8
  69. langfun/core/logging.py +1 -1
  70. langfun/core/mcp/__init__.py +10 -0
  71. langfun/core/mcp/client.py +177 -0
  72. langfun/core/mcp/client_test.py +71 -0
  73. langfun/core/mcp/session.py +241 -0
  74. langfun/core/mcp/session_test.py +54 -0
  75. langfun/core/mcp/testing/simple_mcp_client.py +33 -0
  76. langfun/core/mcp/testing/simple_mcp_server.py +33 -0
  77. langfun/core/mcp/tool.py +256 -0
  78. langfun/core/mcp/tool_test.py +197 -0
  79. langfun/core/memory.py +1 -0
  80. langfun/core/message.py +160 -55
  81. langfun/core/message_test.py +65 -81
  82. langfun/core/modalities/__init__.py +8 -0
  83. langfun/core/modalities/audio.py +21 -1
  84. langfun/core/modalities/image.py +19 -1
  85. langfun/core/modalities/mime.py +62 -3
  86. langfun/core/modalities/pdf.py +19 -1
  87. langfun/core/modalities/video.py +21 -1
  88. langfun/core/modality.py +167 -29
  89. langfun/core/modality_test.py +42 -12
  90. langfun/core/natural_language.py +1 -1
  91. langfun/core/sampling.py +4 -4
  92. langfun/core/sampling_test.py +20 -4
  93. langfun/core/structured/completion.py +34 -44
  94. langfun/core/structured/completion_test.py +23 -43
  95. langfun/core/structured/description.py +54 -50
  96. langfun/core/structured/function_generation.py +29 -12
  97. langfun/core/structured/mapping.py +74 -28
  98. langfun/core/structured/parsing.py +90 -74
  99. langfun/core/structured/parsing_test.py +0 -3
  100. langfun/core/structured/querying.py +242 -156
  101. langfun/core/structured/querying_test.py +95 -64
  102. langfun/core/structured/schema.py +70 -10
  103. langfun/core/structured/schema_generation.py +33 -14
  104. langfun/core/structured/scoring.py +45 -34
  105. langfun/core/structured/tokenization.py +24 -9
  106. langfun/core/subscription.py +2 -2
  107. langfun/core/template.py +175 -50
  108. langfun/core/template_test.py +123 -17
  109. langfun/env/__init__.py +43 -0
  110. langfun/env/base_environment.py +827 -0
  111. langfun/env/base_environment_test.py +473 -0
  112. langfun/env/base_feature.py +304 -0
  113. langfun/env/base_feature_test.py +228 -0
  114. langfun/env/base_sandbox.py +842 -0
  115. langfun/env/base_sandbox_test.py +1235 -0
  116. langfun/env/event_handlers/__init__.py +14 -0
  117. langfun/env/event_handlers/chain.py +233 -0
  118. langfun/env/event_handlers/chain_test.py +253 -0
  119. langfun/env/event_handlers/event_logger.py +472 -0
  120. langfun/env/event_handlers/event_logger_test.py +304 -0
  121. langfun/env/event_handlers/metric_writer.py +726 -0
  122. langfun/env/event_handlers/metric_writer_test.py +214 -0
  123. langfun/env/interface.py +1640 -0
  124. langfun/env/interface_test.py +151 -0
  125. langfun/env/load_balancers.py +59 -0
  126. langfun/env/load_balancers_test.py +139 -0
  127. langfun/env/test_utils.py +497 -0
  128. {langfun-0.1.2.dev202509020804.dist-info → langfun-0.1.2.dev202511110805.dist-info}/METADATA +7 -3
  129. langfun-0.1.2.dev202511110805.dist-info/RECORD +200 -0
  130. langfun-0.1.2.dev202509020804.dist-info/RECORD +0 -172
  131. {langfun-0.1.2.dev202509020804.dist-info → langfun-0.1.2.dev202511110805.dist-info}/WHEEL +0 -0
  132. {langfun-0.1.2.dev202509020804.dist-info → langfun-0.1.2.dev202511110805.dist-info}/licenses/LICENSE +0 -0
  133. {langfun-0.1.2.dev202509020804.dist-info → langfun-0.1.2.dev202511110805.dist-info}/top_level.txt +0 -0
@@ -274,11 +274,11 @@ class _LfQueryPythonV2(LfQuery):
274
274
 
275
275
 
276
276
  def query(
277
- prompt: Union[str, lf.Template, Any],
277
+ prompt: Union[str, lf.Template, lf.Message, Any],
278
278
  schema: schema_lib.SchemaType | None = None,
279
279
  default: Any = lf.RAISE_IF_HAS_ERROR,
280
280
  *,
281
- lm: lf.LanguageModel | list[lf.LanguageModel] | None = None,
281
+ lm: lf.LanguageModel | list[lf.LanguageModel],
282
282
  num_samples: int | list[int] = 1,
283
283
  system_message: str | lf.Template | None = None,
284
284
  examples: list[mapping.MappingExample] | None = None,
@@ -298,123 +298,127 @@ def query(
298
298
  supporting natural language prompts, structured inputs, and multiple advanced
299
299
  features.
300
300
 
301
- Key Features:
302
-
303
- - **Input**: Accepts natural language strings, structured inputs (e.g.,
304
- `pg.Object`), and templates (`lf.Template`) with modality objects.
305
-
306
- - **Output**: Returns structured outputs when `schema` is specified;
307
- otherwise, outputs raw natural language (as a string).
308
-
309
- - **Few-shot examples**: Supports structured few-shot examples with the
310
- `examples` argument.
311
-
312
- - **Multi-LM fan-out**: Sends queries to multiple language models with in
313
- multiple samples in parallel, returning a list of outputs.
314
-
315
- Examples:
316
-
317
- Case 1: Regular natural language-based LLM query:
318
-
319
- ```
320
- lf.query('1 + 1 = ?', lm=lf.llms.Gpt4Turbo())
321
-
322
- # Outptut: '2'
323
- ```
324
-
325
- Case 2: Query with structured output.
326
-
327
- ```
328
- lf.query('1 + 1 = ?', int, lm=lf.llms.Gpt4Turbo())
329
-
330
- # Output: 2
331
- ```
332
-
333
- Case 3: Query with structured input.
334
-
335
- ```
336
- class Sum(pg.Object):
337
- a: int
338
- b: int
339
-
340
- lf.query(Sum(1, 1), int, lm=lf.llms.Gpt4Turbo())
341
-
342
- # Output: 2
343
- ```
344
-
345
- Case 4: Query with input of mixed modalities.
346
-
347
- ```
348
- class Animal(pg.Object):
349
- pass
350
-
351
- class Dog(Animal):
352
- pass
353
-
354
- class Entity(pg.Object):
355
- name: str
356
-
357
- lf.query(
358
- 'What is in this {{image}} and {{objects}}?'
359
- list[Entity],
360
- lm=lf.llms.Gpt4Turbo()
361
- image=lf.Image(path='/path/to/a/airplane.png'),
362
- objects=[Dog()],
363
- )
364
-
365
- # Output: [Entity(name='airplane'), Entity(name='dog')]
366
- ```
367
-
368
- Case 5: Query with structured few-shot examples.
369
- ```
370
- lf.query(
371
- 'What is in this {{image}} and {{objects}}?'
372
- list[Entity],
373
- lm=lf.llms.Gpt4Turbo()
374
- image=lf.Image(path='/path/to/a/dinasaur.png'),
375
- objects=[Dog()],
376
- examples=[
377
- lf.MappingExample(
378
- input=lf.Template(
379
- 'What is the object near the house in this {{image}}?',
380
- image=lf.Image(path='/path/to/image.png'),
381
- ),
382
- schema=Entity,
383
- output=Entity('cat'),
384
- ),
385
- ],
386
- )
387
-
388
- # Output: [Entity(name='dinasaur'), Entity(name='dog')]
389
- ```
390
-
391
- Case 6: Multiple queries to multiple models.
392
- ```
393
- lf.query(
394
- '1 + 1 = ?',
395
- int,
396
- lm=[
397
- lf.llms.Gpt4Turbo(),
398
- lf.llms.Gemini1_5Pro(),
399
- ],
400
- num_samples=[1, 2],
401
- )
402
- # Output: [2, 2, 2]
403
- ```
301
+ **Key Features:**
302
+
303
+ * **Input**: Accepts natural language strings, structured inputs (e.g.,
304
+ `pg.Object`), templates (`lf.Template`) with modality objects, messages (
305
+ `lf.Message`) with modality objects, or objects that can be converted to
306
+ `lf.Message` (see `lf.Message.from_value` for details).
307
+ * **Output**: Returns structured outputs when `schema` is specified;
308
+ otherwise, outputs raw natural language (as a string).
309
+ * **Few-shot examples**: Supports structured few-shot examples with the
310
+ `examples` argument.
311
+ * **Multi-LM fan-out**: Sends queries to multiple language models for
312
+ multiple samples in parallel, returning a list of outputs.
313
+
314
+ **Basic Usage:**
315
+
316
+ 1. **Natural Language Query**:
317
+ If `schema` is not provided, `lf.query` returns a natural language
318
+ response:
319
+ ```python
320
+ r = lf.query('1 + 1 = ?', lm=lf.llms.Gemini25Flash())
321
+ print(r)
322
+ # Output: 2
323
+ ```
324
+
325
+ 2. **Structured Output**:
326
+ If `schema` is provided, `lf.query` guides LLM to directly generate
327
+ response according to the specified schema, it then parses the response
328
+ into a Python object:
329
+ ```python
330
+ r = lf.query('1 + 1 = ?', int, lm=lf.llms.Gemini25Flash())
331
+ print(r)
332
+ # Output: 2
333
+ ```
334
+
335
+ **Advanced Usage:**
336
+
337
+ 1. **Structured Input**:
338
+ Besides natural language, `prompt` can be a `pg.Object`, whose symbolic
339
+ representation will be sent to the LLM:
340
+ ```python
341
+ class Sum(pg.Object):
342
+ a: int
343
+ b: int
344
+ r = lf.query(Sum(1, 1), int, lm=lf.llms.Gemini25Flash())
345
+ print(r)
346
+ # Output: 2
347
+ ```
348
+
349
+ 2. **Multi-Modal Input**:
350
+ `lf.query` supports prompts containing multi-modal inputs, such as images
351
+ or audio, by embedding modality objects within a template string:
352
+ ```python
353
+ image = lf.Image.from_path('/path/to/image.png')
354
+ r = lf.query(
355
+ 'what is in the {{image}}?',
356
+ str,
357
+ image=image,
358
+ lm=lf.llms.Gemini25Flash()
359
+ )
360
+ print(r)
361
+ # Output: A cat sitting on a sofa.
362
+ ```
363
+
364
+ 3. **Few-Shot Examples**:
365
+ You can provide few-shot examples to guide model behavior using the
366
+ `examples` argument. Each example is an `lf.MappingExample` containing
367
+ `input`, `output`, and, if needed, `schema`.
368
+ ```python
369
+ class Sentiment(pg.Object):
370
+ sentiment: Literal['positive', 'negative', 'neutral']
371
+ reason: str
372
+
373
+ r = lf.query(
374
+ 'I love this movie!',
375
+ Sentiment,
376
+ examples=[
377
+ lf.MappingExample(
378
+ 'This movie is terrible.',
379
+ Sentiment(sentiment='negative', reason='The plot is boring.')
380
+ ),
381
+ lf.MappingExample(
382
+ 'It is okay.',
383
+ Sentiment(sentiment='neutral', reason='The movie is average.')
384
+ ),
385
+ ],
386
+ lm=lf.llms.Gemini25Flash())
387
+ print(r)
388
+ # Output:
389
+ # Sentiment(
390
+ # sentiment='positive',
391
+ # reason='The user expresses positive feedback.')
392
+ # )
393
+ ```
394
+
395
+ 4. **Multi-LM Fan-Out**:
396
+ `lf.query` can concurrently query multiple language models by providing
397
+ a list of LMs to the `lm` argument and specifying the number of samples
398
+ for each with `num_samples`.
399
+ ```python
400
+ r = lf.query(
401
+ '1 + 1 = ?',
402
+ int,
403
+ lm=[lf.llms.Gemini25Flash(), lf.llms.Gemini()],
404
+ num_samples=[1, 2])
405
+ print(r)
406
+ # Output: [2, 2, 2]
407
+ ```
404
408
 
405
409
  Args:
406
410
  prompt: The input query. Can be:
407
411
  - A natural language string (supports templating with `{{}}`),
408
- - A `pg.Object` object for structured input,
412
+ - A `pg.Object` for structured input,
409
413
  - An `lf.Template` for mixed or template-based inputs.
410
- schema: Type annotation or `lf.Schema` object for the expected output.
414
+ schema: Type annotation or `lf.Schema` object for the expected output.
411
415
  If `None` (default), the response will be a natural language string.
412
- default: Default value to return if parsing fails. If not specified, an
413
- error will be raised.
416
+ default: The default value to return if parsing fails. If
417
+ `lf.RAISE_IF_HAS_ERROR` is used (default), an error will be raised
418
+ instead.
414
419
  lm: The language model(s) to query. Can be:
415
420
  - A single `LanguageModel`,
416
421
  - A list of `LanguageModel`s for multi-model fan-out.
417
- If `None`, the LM from `lf.context` will be used.
418
422
  num_samples: Number of samples to generate. If a list is provided, its
419
423
  length must match the number of models in `lm`.
420
424
  system_message: System instructions to guide the model output. If None,
@@ -431,27 +435,30 @@ def query(
431
435
  from `lf.context` or the main `lm`.
432
436
  protocol: Format for schema representation. Builtin choices are `'json'` or
433
437
  `'python'`, users could extend with their own protocols by subclassing
434
- `lf.structured.LfQuery'. Also protocol could be specified with a version
438
+ `lf.structured.LfQuery`. Also protocol could be specified with a version
435
439
  in the format of 'protocol:version', e.g., 'python:1.0', so users could
436
440
  use a specific version of the prompt based on the protocol. Please see the
437
441
  documentation of `LfQuery` for more details. If None, the protocol from
438
442
  context manager `lf.query_protocol` will be used, or 'python' if not
439
443
  specified.
440
- returns_message: If `True`, returns an `lf.Message` object instead of
444
+ returns_message: If `True`, returns an `lf.Message` object instead of
441
445
  the final parsed result.
442
- skip_lm: If `True`, skips the LLM call and returns the rendered
446
+ skip_lm: If `True`, skips the LLM call and returns the rendered
443
447
  prompt as a `UserMessage` object.
444
448
  invocation_id: The ID of the query invocation, which will be passed to
445
- `lf.QueryInvocation` when `lf.trackIf `None`, a unique ID will
449
+ `lf.QueryInvocation`. If `None`, a unique ID will
446
450
  be generated.
447
451
  **kwargs: Additional keyword arguments for:
448
452
  - Rendering templates (e.g., `template_str`, `preamble`),
449
453
  - Configuring `lf.structured.Mapping`.
454
+ - metadata_xxx, which will be passed through to the rendered message
455
+ metadata under key `xxx`. This allows LLM behavior customization based
456
+ on metadata `xxx` from the prompt.
450
457
 
451
458
  Returns:
452
459
  The result of the query:
453
460
  - A single output or a list of outputs if multiple models/samples are used.
454
- - Each output is a parsed object matching `schema`, an `lf.Message` (if
461
+ - Each output is a parsed object matching `schema`, an `lf.Message` (if
455
462
  `returns_message=True`), or a natural language string (default).
456
463
  """
457
464
  # Internal usage logging.
@@ -527,24 +534,22 @@ def query(
527
534
  ).render(message_cls=lf.SystemMessage)
528
535
 
529
536
  # Normalize query input.
530
- if isinstance(prompt, (lf.Message, str)):
537
+ if isinstance(prompt, str):
531
538
  # Query with structured output.
532
539
  prompt_kwargs = kwargs.copy()
533
540
  prompt_kwargs.pop('template_str', None)
534
541
  query_input = lf.Template.from_value(prompt, **prompt_kwargs)
542
+ elif isinstance(prompt, lf.Message):
543
+ query_input = prompt
535
544
  elif isinstance(prompt, lf.Template):
536
- # Create a copy of the prompt if it has a parent object, so all child
537
- # modality objects could be referred by path relative to the prompt.
538
- query_input = prompt.clone() if prompt.sym_parent is not None else prompt
539
-
540
545
  # Attach template metadata from kwargs. This is used to pass through fields
541
546
  # from kwargs to the rendered message.
542
- template_metadata = {
543
- k: v for k, v in kwargs.items() if k.startswith('metadata_')
544
- }
545
- query_input.rebind(
546
- template_metadata, skip_notification=True, raise_on_no_change=False
547
+ prompt.rebind(
548
+ {k: v for k, v in kwargs.items() if k.startswith('metadata_')},
549
+ skip_notification=True,
550
+ raise_on_no_change=False
547
551
  )
552
+ query_input = prompt
548
553
  elif pg.MISSING_VALUE == prompt:
549
554
  query_input = lf.UserMessage('')
550
555
  else:
@@ -663,11 +668,15 @@ def query(
663
668
 
664
669
  if returns_message:
665
670
  return output_message
666
- return output_message.text if schema in (None, str) else output_message.result
671
+ if schema not in (None, str):
672
+ return output_message.result
673
+ if returns_message or output_message.referred_modalities:
674
+ return output_message
675
+ return output_message.text
667
676
 
668
677
 
669
678
  async def aquery(
670
- prompt: Union[str, lf.Template, Any],
679
+ prompt: Union[str, lf.Template, lf.Message, Any],
671
680
  schema: schema_lib.SchemaType | None = None,
672
681
  default: Any = lf.RAISE_IF_HAS_ERROR,
673
682
  *,
@@ -723,7 +732,7 @@ def query_protocol(protocol: str) -> Iterator[None]:
723
732
 
724
733
 
725
734
  def query_and_reduce(
726
- prompt: Union[str, lf.Template, Any],
735
+ prompt: Union[str, lf.Template, lf.Message, Any],
727
736
  schema: schema_lib.SchemaType | None = None,
728
737
  *,
729
738
  reduce: Callable[[list[Any]], Any],
@@ -732,12 +741,12 @@ def query_and_reduce(
732
741
  **kwargs,
733
742
  ) -> Any:
734
743
  """Issues multiple `lf.query` calls in parallel and reduce the outputs.
735
-
744
+
736
745
  Args:
737
746
  prompt: A str (may contain {{}} as template) as natural language input, or a
738
747
  `pg.Symbolic` object as structured input as prompt to LLM.
739
- schema: A type annotation as the schema for output object. If str (default),
740
- the response will be a str in natural language.
748
+ schema: A type annotation as the schema for output object. If None
749
+ (default), the response will be a str in natural language.
741
750
  reduce: A function to reduce the outputs of multiple `lf.query` calls. It
742
751
  takes a list of outputs and returns the final object.
743
752
  lm: The language model to use. If not specified, the language model from
@@ -761,14 +770,48 @@ def query_and_reduce(
761
770
 
762
771
 
763
772
  def query_prompt(
764
- prompt: Union[str, lf.Template, Any],
773
+ prompt: Union[str, lf.Template, lf.Message, Any],
765
774
  schema: schema_lib.SchemaType | None = None,
766
775
  **kwargs,
767
776
  ) -> lf.Message:
768
- """Returns the final prompt sent to LLM for `lf.query`."""
777
+ """Renders the prompt message for `lf.query` without calling the LLM.
778
+
779
+ This function simulates the prompt generation step of `lf.query`,
780
+ producing the `lf.Message` object that would be sent to the language model.
781
+ It is useful for debugging prompts or inspecting how inputs are formatted.
782
+
783
+ **Example:**
784
+
785
+ ```python
786
+ import langfun as lf
787
+
788
+ prompt_message = lf.query_prompt('1 + 1 = ?', schema=int)
789
+ print(prompt_message.text)
790
+ ```
791
+
792
+ Args:
793
+ prompt: The user prompt, which can be a string, `lf.Template`, or any
794
+ serializable object.
795
+ schema: The target schema for the query, used for prompt formatting.
796
+ **kwargs: Additional keyword arguments to pass to `lf.query`.
797
+
798
+ Returns:
799
+ The rendered `lf.Message` object.
800
+ """
801
+ # Delay import to avoid circular dependency in Colab.
802
+ # llms > data/conversion > structured > querying
803
+ from langfun.core.llms import fake # pylint: disable=g-import-not-at-top
804
+
769
805
  kwargs.pop('returns_message', None)
770
806
  kwargs.pop('skip_lm', None)
771
- return query(prompt, schema, skip_lm=True, returns_message=True, **kwargs)
807
+ return query(
808
+ prompt, schema,
809
+ # The LLM will never be used, it's just a placeholder.
810
+ lm=fake.Pseudo(),
811
+ skip_lm=True,
812
+ returns_message=True,
813
+ **kwargs
814
+ )
772
815
 
773
816
 
774
817
  def query_output(
@@ -776,7 +819,39 @@ def query_output(
776
819
  schema: schema_lib.SchemaType | None = None,
777
820
  **kwargs,
778
821
  ) -> Any:
779
- """Returns the final output of `lf.query` from a provided LLM response."""
822
+ """Parses a raw LLM response based on a schema, as `lf.query` would.
823
+
824
+ This function simulates the output processing part of `lf.query`, taking
825
+ a raw response from a language model and parsing it into the desired schema.
826
+ It is useful for reprocessing LLM responses or for testing parsing and
827
+ auto-fixing logic independently of LLM calls.
828
+
829
+ **Example:**
830
+
831
+ ```python
832
+ import langfun as lf
833
+
834
+ # Output when schema is provided.
835
+ structured_output = lf.query_output('2', schema=int)
836
+ print(structured_output)
837
+ # Output: 2
838
+
839
+ # Output when no schema is provided.
840
+ raw_output = lf.query_output('The answer is 2.')
841
+ print(raw_output)
842
+ # Output: The answer is 2.
843
+ ```
844
+
845
+ Args:
846
+ response: The raw response from an LLM, as a string or `lf.Message`.
847
+ schema: The target schema to parse the response into. If `None`, the
848
+ response text is returned.
849
+ **kwargs: Additional keyword arguments to pass to `lf.query` for parsing
850
+ (e.g., `autofix`, `default`).
851
+
852
+ Returns:
853
+ The parsed object if schema is provided, or the response text otherwise.
854
+ """
780
855
  # Delay import to avoid circular dependency in Colab.
781
856
  # llms > data/conversion > structured > querying
782
857
  from langfun.core.llms import fake # pylint: disable=g-import-not-at-top
@@ -797,7 +872,7 @@ def query_reward(
797
872
  mapping_example: Union[str, mapping.MappingExample],
798
873
  response: Union[str, lf.Message],
799
874
  ) -> float | None:
800
- """Returns the reward of an LLM response based on an mapping example."""
875
+ """Returns the reward of an LLM response based on a mapping example."""
801
876
  if isinstance(mapping_example, str):
802
877
  mapping_example = pg.from_json_str(mapping_example)
803
878
  assert isinstance(mapping_example, mapping.MappingExample), mapping_example
@@ -1196,14 +1271,14 @@ class _QueryTracker:
1196
1271
  )
1197
1272
  ] = True
1198
1273
 
1199
- start_callabck: Annotated[
1274
+ start_callback: Annotated[
1200
1275
  Callable[[QueryInvocation], None] | None,
1201
1276
  (
1202
1277
  'A callback function to be called when a query is started.'
1203
1278
  )
1204
1279
  ] = None
1205
1280
 
1206
- end_callabck: Annotated[
1281
+ end_callback: Annotated[
1207
1282
  Callable[[QueryInvocation], None] | None,
1208
1283
  (
1209
1284
  'A callback function to be called when a query is completed.'
@@ -1219,40 +1294,51 @@ class _QueryTracker:
1219
1294
 
1220
1295
  def track(self, invocation: QueryInvocation) -> None:
1221
1296
  self.tracked_queries.append(invocation)
1222
- if self.start_callabck is not None:
1223
- self.start_callabck(invocation)
1297
+ if self.start_callback is not None:
1298
+ self.start_callback(invocation)
1224
1299
 
1225
1300
  def mark_completed(self, invocation: QueryInvocation) -> None:
1226
1301
  assert invocation in self.tracked_queries, invocation
1227
- if self.end_callabck is not None:
1228
- self.end_callabck(invocation)
1302
+ if self.end_callback is not None:
1303
+ self.end_callback(invocation)
1229
1304
 
1230
1305
 
1231
1306
  @contextlib.contextmanager
1232
1307
  def track_queries(
1233
1308
  include_child_scopes: bool = True,
1234
1309
  *,
1235
- start_callabck: Callable[[QueryInvocation], None] | None = None,
1236
- end_callabck: Callable[[QueryInvocation], None] | None = None,
1310
+ start_callback: Callable[[QueryInvocation], None] | None = None,
1311
+ end_callback: Callable[[QueryInvocation], None] | None = None,
1237
1312
  ) -> Iterator[list[QueryInvocation]]:
1238
- """Track all queries made during the context.
1313
+ """Tracks all `lf.query` calls made within a `with` block.
1314
+
1315
+ `lf.track_queries` is useful for inspecting LLM inputs and outputs,
1316
+ debugging, and analyzing model behavior. It returns a list of
1317
+ `lf.QueryInvocation` objects, each containing detailed information about
1318
+ a query, such as the input prompt, schema, LLM request/response,
1319
+ and any errors encountered.
1239
1320
 
1240
- Example:
1321
+ **Example:**
1241
1322
 
1242
- ```
1243
- with lf.track_queries() as queries:
1244
- lf.query('hi', lm=lm)
1245
- lf.query('What is this {{image}}?', lm=lm, image=image)
1323
+ ```python
1324
+ import langfun as lf
1246
1325
 
1247
- print(queries)
1248
- ```
1326
+ with lf.track_queries() as queries:
1327
+ lf.query('1 + 1 = ?', lm=lf.llms.Gemini25Flash())
1328
+ lf.query('Hello!', lm=lf.llms.Gemini25Flash())
1329
+
1330
+ # Print recorded queries
1331
+ for query in queries:
1332
+ print(query.lm_request)
1333
+ print(query.lm_response)
1334
+ ```
1249
1335
 
1250
1336
  Args:
1251
1337
  include_child_scopes: If True, the queries made in child scopes will be
1252
1338
  included in the returned list. Otherwise, only the queries made in the
1253
1339
  current scope will be included.
1254
- start_callabck: A callback function to be called when a query is started.
1255
- end_callabck: A callback function to be called when a query is completed.
1340
+ start_callback: A callback function to be called when a query is started.
1341
+ end_callback: A callback function to be called when a query is completed.
1256
1342
 
1257
1343
  Yields:
1258
1344
  A list of `QueryInvocation` objects representing the queries made during
@@ -1261,8 +1347,8 @@ def track_queries(
1261
1347
  trackers = lf.context_value('__query_trackers__', [])
1262
1348
  tracker = _QueryTracker(
1263
1349
  include_child_scopes=include_child_scopes,
1264
- start_callabck=start_callabck,
1265
- end_callabck=end_callabck
1350
+ start_callback=start_callback,
1351
+ end_callback=end_callback
1266
1352
  )
1267
1353
 
1268
1354
  with lf.context(