langfun 0.1.2.dev202509120804__py3-none-any.whl → 0.1.2.dev202512150805__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (162) hide show
  1. langfun/__init__.py +1 -1
  2. langfun/core/__init__.py +7 -1
  3. langfun/core/agentic/__init__.py +8 -1
  4. langfun/core/agentic/action.py +740 -112
  5. langfun/core/agentic/action_eval.py +9 -2
  6. langfun/core/agentic/action_test.py +189 -24
  7. langfun/core/async_support.py +104 -5
  8. langfun/core/async_support_test.py +23 -0
  9. langfun/core/coding/python/correction.py +19 -9
  10. langfun/core/coding/python/execution.py +14 -12
  11. langfun/core/coding/python/generation.py +21 -16
  12. langfun/core/coding/python/sandboxing.py +23 -3
  13. langfun/core/component.py +42 -3
  14. langfun/core/concurrent.py +70 -6
  15. langfun/core/concurrent_test.py +9 -2
  16. langfun/core/console.py +1 -1
  17. langfun/core/data/conversion/anthropic.py +12 -3
  18. langfun/core/data/conversion/anthropic_test.py +8 -6
  19. langfun/core/data/conversion/gemini.py +11 -2
  20. langfun/core/data/conversion/gemini_test.py +48 -9
  21. langfun/core/data/conversion/openai.py +145 -31
  22. langfun/core/data/conversion/openai_test.py +161 -17
  23. langfun/core/eval/base.py +48 -44
  24. langfun/core/eval/base_test.py +5 -5
  25. langfun/core/eval/matching.py +5 -2
  26. langfun/core/eval/patching.py +3 -3
  27. langfun/core/eval/scoring.py +4 -3
  28. langfun/core/eval/v2/__init__.py +3 -0
  29. langfun/core/eval/v2/checkpointing.py +148 -46
  30. langfun/core/eval/v2/checkpointing_test.py +9 -2
  31. langfun/core/eval/v2/config_saver.py +37 -0
  32. langfun/core/eval/v2/config_saver_test.py +36 -0
  33. langfun/core/eval/v2/eval_test_helper.py +104 -3
  34. langfun/core/eval/v2/evaluation.py +102 -19
  35. langfun/core/eval/v2/evaluation_test.py +9 -3
  36. langfun/core/eval/v2/example.py +50 -40
  37. langfun/core/eval/v2/example_test.py +16 -8
  38. langfun/core/eval/v2/experiment.py +95 -20
  39. langfun/core/eval/v2/experiment_test.py +19 -0
  40. langfun/core/eval/v2/metric_values.py +31 -3
  41. langfun/core/eval/v2/metric_values_test.py +32 -0
  42. langfun/core/eval/v2/metrics.py +157 -44
  43. langfun/core/eval/v2/metrics_test.py +39 -18
  44. langfun/core/eval/v2/progress.py +31 -1
  45. langfun/core/eval/v2/progress_test.py +27 -0
  46. langfun/core/eval/v2/progress_tracking.py +13 -5
  47. langfun/core/eval/v2/progress_tracking_test.py +9 -1
  48. langfun/core/eval/v2/reporting.py +88 -71
  49. langfun/core/eval/v2/reporting_test.py +24 -6
  50. langfun/core/eval/v2/runners/__init__.py +30 -0
  51. langfun/core/eval/v2/{runners.py → runners/base.py} +73 -180
  52. langfun/core/eval/v2/runners/beam.py +354 -0
  53. langfun/core/eval/v2/runners/beam_test.py +153 -0
  54. langfun/core/eval/v2/runners/ckpt_monitor.py +350 -0
  55. langfun/core/eval/v2/runners/ckpt_monitor_test.py +213 -0
  56. langfun/core/eval/v2/runners/debug.py +40 -0
  57. langfun/core/eval/v2/runners/debug_test.py +76 -0
  58. langfun/core/eval/v2/runners/parallel.py +243 -0
  59. langfun/core/eval/v2/runners/parallel_test.py +182 -0
  60. langfun/core/eval/v2/runners/sequential.py +47 -0
  61. langfun/core/eval/v2/runners/sequential_test.py +169 -0
  62. langfun/core/langfunc.py +45 -130
  63. langfun/core/langfunc_test.py +7 -5
  64. langfun/core/language_model.py +189 -36
  65. langfun/core/language_model_test.py +54 -3
  66. langfun/core/llms/__init__.py +14 -1
  67. langfun/core/llms/anthropic.py +157 -2
  68. langfun/core/llms/azure_openai.py +29 -17
  69. langfun/core/llms/cache/base.py +25 -3
  70. langfun/core/llms/cache/in_memory.py +48 -7
  71. langfun/core/llms/cache/in_memory_test.py +14 -4
  72. langfun/core/llms/compositional.py +25 -1
  73. langfun/core/llms/deepseek.py +30 -2
  74. langfun/core/llms/fake.py +32 -1
  75. langfun/core/llms/gemini.py +90 -12
  76. langfun/core/llms/gemini_test.py +110 -0
  77. langfun/core/llms/google_genai.py +52 -1
  78. langfun/core/llms/groq.py +28 -3
  79. langfun/core/llms/llama_cpp.py +23 -4
  80. langfun/core/llms/openai.py +120 -3
  81. langfun/core/llms/openai_compatible.py +148 -27
  82. langfun/core/llms/openai_compatible_test.py +207 -20
  83. langfun/core/llms/openai_test.py +0 -2
  84. langfun/core/llms/rest.py +16 -1
  85. langfun/core/llms/vertexai.py +78 -8
  86. langfun/core/logging.py +1 -1
  87. langfun/core/mcp/__init__.py +10 -0
  88. langfun/core/mcp/client.py +177 -0
  89. langfun/core/mcp/client_test.py +71 -0
  90. langfun/core/mcp/session.py +241 -0
  91. langfun/core/mcp/session_test.py +54 -0
  92. langfun/core/mcp/testing/simple_mcp_client.py +33 -0
  93. langfun/core/mcp/testing/simple_mcp_server.py +33 -0
  94. langfun/core/mcp/tool.py +254 -0
  95. langfun/core/mcp/tool_test.py +197 -0
  96. langfun/core/memory.py +1 -0
  97. langfun/core/message.py +160 -55
  98. langfun/core/message_test.py +65 -81
  99. langfun/core/modalities/__init__.py +8 -0
  100. langfun/core/modalities/audio.py +21 -1
  101. langfun/core/modalities/image.py +73 -3
  102. langfun/core/modalities/image_test.py +116 -0
  103. langfun/core/modalities/mime.py +78 -4
  104. langfun/core/modalities/mime_test.py +59 -0
  105. langfun/core/modalities/pdf.py +19 -1
  106. langfun/core/modalities/video.py +21 -1
  107. langfun/core/modality.py +167 -29
  108. langfun/core/modality_test.py +42 -12
  109. langfun/core/natural_language.py +1 -1
  110. langfun/core/sampling.py +4 -4
  111. langfun/core/sampling_test.py +20 -4
  112. langfun/core/structured/__init__.py +2 -24
  113. langfun/core/structured/completion.py +34 -44
  114. langfun/core/structured/completion_test.py +23 -43
  115. langfun/core/structured/description.py +54 -50
  116. langfun/core/structured/function_generation.py +29 -12
  117. langfun/core/structured/mapping.py +81 -37
  118. langfun/core/structured/parsing.py +95 -79
  119. langfun/core/structured/parsing_test.py +0 -3
  120. langfun/core/structured/querying.py +230 -154
  121. langfun/core/structured/querying_test.py +69 -33
  122. langfun/core/structured/schema/__init__.py +49 -0
  123. langfun/core/structured/schema/base.py +664 -0
  124. langfun/core/structured/schema/base_test.py +531 -0
  125. langfun/core/structured/schema/json.py +174 -0
  126. langfun/core/structured/schema/json_test.py +121 -0
  127. langfun/core/structured/schema/python.py +316 -0
  128. langfun/core/structured/schema/python_test.py +410 -0
  129. langfun/core/structured/schema_generation.py +33 -14
  130. langfun/core/structured/scoring.py +47 -36
  131. langfun/core/structured/tokenization.py +26 -11
  132. langfun/core/subscription.py +2 -2
  133. langfun/core/template.py +175 -50
  134. langfun/core/template_test.py +123 -17
  135. langfun/env/__init__.py +43 -0
  136. langfun/env/base_environment.py +827 -0
  137. langfun/env/base_environment_test.py +473 -0
  138. langfun/env/base_feature.py +304 -0
  139. langfun/env/base_feature_test.py +228 -0
  140. langfun/env/base_sandbox.py +842 -0
  141. langfun/env/base_sandbox_test.py +1235 -0
  142. langfun/env/event_handlers/__init__.py +14 -0
  143. langfun/env/event_handlers/chain.py +233 -0
  144. langfun/env/event_handlers/chain_test.py +253 -0
  145. langfun/env/event_handlers/event_logger.py +472 -0
  146. langfun/env/event_handlers/event_logger_test.py +304 -0
  147. langfun/env/event_handlers/metric_writer.py +726 -0
  148. langfun/env/event_handlers/metric_writer_test.py +214 -0
  149. langfun/env/interface.py +1640 -0
  150. langfun/env/interface_test.py +153 -0
  151. langfun/env/load_balancers.py +59 -0
  152. langfun/env/load_balancers_test.py +141 -0
  153. langfun/env/test_utils.py +507 -0
  154. {langfun-0.1.2.dev202509120804.dist-info → langfun-0.1.2.dev202512150805.dist-info}/METADATA +7 -3
  155. langfun-0.1.2.dev202512150805.dist-info/RECORD +217 -0
  156. langfun/core/eval/v2/runners_test.py +0 -343
  157. langfun/core/structured/schema.py +0 -987
  158. langfun/core/structured/schema_test.py +0 -982
  159. langfun-0.1.2.dev202509120804.dist-info/RECORD +0 -172
  160. {langfun-0.1.2.dev202509120804.dist-info → langfun-0.1.2.dev202512150805.dist-info}/WHEEL +0 -0
  161. {langfun-0.1.2.dev202509120804.dist-info → langfun-0.1.2.dev202512150805.dist-info}/licenses/LICENSE +0 -0
  162. {langfun-0.1.2.dev202509120804.dist-info → langfun-0.1.2.dev202512150805.dist-info}/top_level.txt +0 -0
@@ -23,7 +23,7 @@ import uuid
23
23
 
24
24
  import langfun.core as lf
25
25
  from langfun.core.structured import mapping
26
- from langfun.core.structured import schema as schema_lib
26
+ from langfun.core.structured.schema import base as schema_lib
27
27
  import pyglove as pg
28
28
 
29
29
 
@@ -274,7 +274,7 @@ class _LfQueryPythonV2(LfQuery):
274
274
 
275
275
 
276
276
  def query(
277
- prompt: Union[str, lf.Template, Any],
277
+ prompt: Union[str, lf.Template, lf.Message, Any],
278
278
  schema: schema_lib.SchemaType | None = None,
279
279
  default: Any = lf.RAISE_IF_HAS_ERROR,
280
280
  *,
@@ -298,119 +298,124 @@ def query(
298
298
  supporting natural language prompts, structured inputs, and multiple advanced
299
299
  features.
300
300
 
301
- Key Features:
302
-
303
- - **Input**: Accepts natural language strings, structured inputs (e.g.,
304
- `pg.Object`), and templates (`lf.Template`) with modality objects.
305
-
306
- - **Output**: Returns structured outputs when `schema` is specified;
307
- otherwise, outputs raw natural language (as a string).
308
-
309
- - **Few-shot examples**: Supports structured few-shot examples with the
310
- `examples` argument.
311
-
312
- - **Multi-LM fan-out**: Sends queries to multiple language models with in
313
- multiple samples in parallel, returning a list of outputs.
314
-
315
- Examples:
316
-
317
- Case 1: Regular natural language-based LLM query:
318
-
319
- ```
320
- lf.query('1 + 1 = ?', lm=lf.llms.Gpt4Turbo())
321
-
322
- # Outptut: '2'
323
- ```
324
-
325
- Case 2: Query with structured output.
326
-
327
- ```
328
- lf.query('1 + 1 = ?', int, lm=lf.llms.Gpt4Turbo())
329
-
330
- # Output: 2
331
- ```
332
-
333
- Case 3: Query with structured input.
334
-
335
- ```
336
- class Sum(pg.Object):
337
- a: int
338
- b: int
339
-
340
- lf.query(Sum(1, 1), int, lm=lf.llms.Gpt4Turbo())
341
-
342
- # Output: 2
343
- ```
344
-
345
- Case 4: Query with input of mixed modalities.
346
-
347
- ```
348
- class Animal(pg.Object):
349
- pass
350
-
351
- class Dog(Animal):
352
- pass
353
-
354
- class Entity(pg.Object):
355
- name: str
356
-
357
- lf.query(
358
- 'What is in this {{image}} and {{objects}}?'
359
- list[Entity],
360
- lm=lf.llms.Gpt4Turbo()
361
- image=lf.Image(path='/path/to/a/airplane.png'),
362
- objects=[Dog()],
363
- )
364
-
365
- # Output: [Entity(name='airplane'), Entity(name='dog')]
366
- ```
367
-
368
- Case 5: Query with structured few-shot examples.
369
- ```
370
- lf.query(
371
- 'What is in this {{image}} and {{objects}}?'
372
- list[Entity],
373
- lm=lf.llms.Gpt4Turbo()
374
- image=lf.Image(path='/path/to/a/dinasaur.png'),
375
- objects=[Dog()],
376
- examples=[
377
- lf.MappingExample(
378
- input=lf.Template(
379
- 'What is the object near the house in this {{image}}?',
380
- image=lf.Image(path='/path/to/image.png'),
381
- ),
382
- schema=Entity,
383
- output=Entity('cat'),
384
- ),
385
- ],
386
- )
387
-
388
- # Output: [Entity(name='dinasaur'), Entity(name='dog')]
389
- ```
390
-
391
- Case 6: Multiple queries to multiple models.
392
- ```
393
- lf.query(
394
- '1 + 1 = ?',
395
- int,
396
- lm=[
397
- lf.llms.Gpt4Turbo(),
398
- lf.llms.Gemini1_5Pro(),
399
- ],
400
- num_samples=[1, 2],
401
- )
402
- # Output: [2, 2, 2]
403
- ```
301
+ **Key Features:**
302
+
303
+ * **Input**: Accepts natural language strings, structured inputs (e.g.,
304
+ `pg.Object`), templates (`lf.Template`) with modality objects, messages (
305
+ `lf.Message`) with modality objects, or objects that can be converted to
306
+ `lf.Message` (see `lf.Message.from_value` for details).
307
+ * **Output**: Returns structured outputs when `schema` is specified;
308
+ otherwise, outputs raw natural language (as a string).
309
+ * **Few-shot examples**: Supports structured few-shot examples with the
310
+ `examples` argument.
311
+ * **Multi-LM fan-out**: Sends queries to multiple language models for
312
+ multiple samples in parallel, returning a list of outputs.
313
+
314
+ **Basic Usage:**
315
+
316
+ 1. **Natural Language Query**:
317
+ If `schema` is not provided, `lf.query` returns a natural language
318
+ response:
319
+ ```python
320
+ r = lf.query('1 + 1 = ?', lm=lf.llms.Gemini25Flash())
321
+ print(r)
322
+ # Output: 2
323
+ ```
324
+
325
+ 2. **Structured Output**:
326
+ If `schema` is provided, `lf.query` guides LLM to directly generate
327
+ response according to the specified schema, it then parses the response
328
+ into a Python object:
329
+ ```python
330
+ r = lf.query('1 + 1 = ?', int, lm=lf.llms.Gemini25Flash())
331
+ print(r)
332
+ # Output: 2
333
+ ```
334
+
335
+ **Advanced Usage:**
336
+
337
+ 1. **Structured Input**:
338
+ Besides natural language, `prompt` can be a `pg.Object`, whose symbolic
339
+ representation will be sent to the LLM:
340
+ ```python
341
+ class Sum(pg.Object):
342
+ a: int
343
+ b: int
344
+ r = lf.query(Sum(1, 1), int, lm=lf.llms.Gemini25Flash())
345
+ print(r)
346
+ # Output: 2
347
+ ```
348
+
349
+ 2. **Multi-Modal Input**:
350
+ `lf.query` supports prompts containing multi-modal inputs, such as images
351
+ or audio, by embedding modality objects within a template string:
352
+ ```python
353
+ image = lf.Image.from_path('/path/to/image.png')
354
+ r = lf.query(
355
+ 'what is in the {{image}}?',
356
+ str,
357
+ image=image,
358
+ lm=lf.llms.Gemini25Flash()
359
+ )
360
+ print(r)
361
+ # Output: A cat sitting on a sofa.
362
+ ```
363
+
364
+ 3. **Few-Shot Examples**:
365
+ You can provide few-shot examples to guide model behavior using the
366
+ `examples` argument. Each example is an `lf.MappingExample` containing
367
+ `input`, `output`, and, if needed, `schema`.
368
+ ```python
369
+ class Sentiment(pg.Object):
370
+ sentiment: Literal['positive', 'negative', 'neutral']
371
+ reason: str
372
+
373
+ r = lf.query(
374
+ 'I love this movie!',
375
+ Sentiment,
376
+ examples=[
377
+ lf.MappingExample(
378
+ 'This movie is terrible.',
379
+ Sentiment(sentiment='negative', reason='The plot is boring.')
380
+ ),
381
+ lf.MappingExample(
382
+ 'It is okay.',
383
+ Sentiment(sentiment='neutral', reason='The movie is average.')
384
+ ),
385
+ ],
386
+ lm=lf.llms.Gemini25Flash())
387
+ print(r)
388
+ # Output:
389
+ # Sentiment(
390
+ # sentiment='positive',
391
+ # reason='The user expresses positive feedback.')
392
+ # )
393
+ ```
394
+
395
+ 4. **Multi-LM Fan-Out**:
396
+ `lf.query` can concurrently query multiple language models by providing
397
+ a list of LMs to the `lm` argument and specifying the number of samples
398
+ for each with `num_samples`.
399
+ ```python
400
+ r = lf.query(
401
+ '1 + 1 = ?',
402
+ int,
403
+ lm=[lf.llms.Gemini25Flash(), lf.llms.Gemini()],
404
+ num_samples=[1, 2])
405
+ print(r)
406
+ # Output: [2, 2, 2]
407
+ ```
404
408
 
405
409
  Args:
406
410
  prompt: The input query. Can be:
407
411
  - A natural language string (supports templating with `{{}}`),
408
- - A `pg.Object` object for structured input,
412
+ - A `pg.Object` for structured input,
409
413
  - An `lf.Template` for mixed or template-based inputs.
410
- schema: Type annotation or `lf.Schema` object for the expected output.
414
+ schema: Type annotation or `lf.Schema` object for the expected output.
411
415
  If `None` (default), the response will be a natural language string.
412
- default: Default value to return if parsing fails. If not specified, an
413
- error will be raised.
416
+ default: The default value to return if parsing fails. If
417
+ `lf.RAISE_IF_HAS_ERROR` is used (default), an error will be raised
418
+ instead.
414
419
  lm: The language model(s) to query. Can be:
415
420
  - A single `LanguageModel`,
416
421
  - A list of `LanguageModel`s for multi-model fan-out.
@@ -430,27 +435,30 @@ def query(
430
435
  from `lf.context` or the main `lm`.
431
436
  protocol: Format for schema representation. Builtin choices are `'json'` or
432
437
  `'python'`, users could extend with their own protocols by subclassing
433
- `lf.structured.LfQuery'. Also protocol could be specified with a version
438
+ `lf.structured.LfQuery`. Also protocol could be specified with a version
434
439
  in the format of 'protocol:version', e.g., 'python:1.0', so users could
435
440
  use a specific version of the prompt based on the protocol. Please see the
436
441
  documentation of `LfQuery` for more details. If None, the protocol from
437
442
  context manager `lf.query_protocol` will be used, or 'python' if not
438
443
  specified.
439
- returns_message: If `True`, returns an `lf.Message` object instead of
444
+ returns_message: If `True`, returns an `lf.Message` object instead of
440
445
  the final parsed result.
441
- skip_lm: If `True`, skips the LLM call and returns the rendered
446
+ skip_lm: If `True`, skips the LLM call and returns the rendered
442
447
  prompt as a `UserMessage` object.
443
448
  invocation_id: The ID of the query invocation, which will be passed to
444
- `lf.QueryInvocation` when `lf.trackIf `None`, a unique ID will
449
+ `lf.QueryInvocation`. If `None`, a unique ID will
445
450
  be generated.
446
451
  **kwargs: Additional keyword arguments for:
447
452
  - Rendering templates (e.g., `template_str`, `preamble`),
448
453
  - Configuring `lf.structured.Mapping`.
454
+ - metadata_xxx, which will be passed through to the rendered message
455
+ metadata under key `xxx`. This allows LLM behavior customization based
456
+ on metadata `xxx` from the prompt.
449
457
 
450
458
  Returns:
451
459
  The result of the query:
452
460
  - A single output or a list of outputs if multiple models/samples are used.
453
- - Each output is a parsed object matching `schema`, an `lf.Message` (if
461
+ - Each output is a parsed object matching `schema`, an `lf.Message` (if
454
462
  `returns_message=True`), or a natural language string (default).
455
463
  """
456
464
  # Internal usage logging.
@@ -526,24 +534,22 @@ def query(
526
534
  ).render(message_cls=lf.SystemMessage)
527
535
 
528
536
  # Normalize query input.
529
- if isinstance(prompt, (lf.Message, str)):
537
+ if isinstance(prompt, str):
530
538
  # Query with structured output.
531
539
  prompt_kwargs = kwargs.copy()
532
540
  prompt_kwargs.pop('template_str', None)
533
541
  query_input = lf.Template.from_value(prompt, **prompt_kwargs)
542
+ elif isinstance(prompt, lf.Message):
543
+ query_input = prompt
534
544
  elif isinstance(prompt, lf.Template):
535
- # Create a copy of the prompt if it has a parent object, so all child
536
- # modality objects could be referred by path relative to the prompt.
537
- query_input = prompt.clone() if prompt.sym_parent is not None else prompt
538
-
539
545
  # Attach template metadata from kwargs. This is used to pass through fields
540
546
  # from kwargs to the rendered message.
541
- template_metadata = {
542
- k: v for k, v in kwargs.items() if k.startswith('metadata_')
543
- }
544
- query_input.rebind(
545
- template_metadata, skip_notification=True, raise_on_no_change=False
547
+ prompt.rebind(
548
+ {k: v for k, v in kwargs.items() if k.startswith('metadata_')},
549
+ skip_notification=True,
550
+ raise_on_no_change=False
546
551
  )
552
+ query_input = prompt
547
553
  elif pg.MISSING_VALUE == prompt:
548
554
  query_input = lf.UserMessage('')
549
555
  else:
@@ -662,11 +668,15 @@ def query(
662
668
 
663
669
  if returns_message:
664
670
  return output_message
665
- return output_message.text if schema in (None, str) else output_message.result
671
+ if schema not in (None, str):
672
+ return output_message.result
673
+ if returns_message or output_message.referred_modalities:
674
+ return output_message
675
+ return output_message.text
666
676
 
667
677
 
668
678
  async def aquery(
669
- prompt: Union[str, lf.Template, Any],
679
+ prompt: Union[str, lf.Template, lf.Message, Any],
670
680
  schema: schema_lib.SchemaType | None = None,
671
681
  default: Any = lf.RAISE_IF_HAS_ERROR,
672
682
  *,
@@ -722,7 +732,7 @@ def query_protocol(protocol: str) -> Iterator[None]:
722
732
 
723
733
 
724
734
  def query_and_reduce(
725
- prompt: Union[str, lf.Template, Any],
735
+ prompt: Union[str, lf.Template, lf.Message, Any],
726
736
  schema: schema_lib.SchemaType | None = None,
727
737
  *,
728
738
  reduce: Callable[[list[Any]], Any],
@@ -731,12 +741,12 @@ def query_and_reduce(
731
741
  **kwargs,
732
742
  ) -> Any:
733
743
  """Issues multiple `lf.query` calls in parallel and reduce the outputs.
734
-
744
+
735
745
  Args:
736
746
  prompt: A str (may contain {{}} as template) as natural language input, or a
737
747
  `pg.Symbolic` object as structured input as prompt to LLM.
738
- schema: A type annotation as the schema for output object. If str (default),
739
- the response will be a str in natural language.
748
+ schema: A type annotation as the schema for output object. If None
749
+ (default), the response will be a str in natural language.
740
750
  reduce: A function to reduce the outputs of multiple `lf.query` calls. It
741
751
  takes a list of outputs and returns the final object.
742
752
  lm: The language model to use. If not specified, the language model from
@@ -760,11 +770,34 @@ def query_and_reduce(
760
770
 
761
771
 
762
772
  def query_prompt(
763
- prompt: Union[str, lf.Template, Any],
773
+ prompt: Union[str, lf.Template, lf.Message, Any],
764
774
  schema: schema_lib.SchemaType | None = None,
765
775
  **kwargs,
766
776
  ) -> lf.Message:
767
- """Returns the final prompt sent to LLM for `lf.query`."""
777
+ """Renders the prompt message for `lf.query` without calling the LLM.
778
+
779
+ This function simulates the prompt generation step of `lf.query`,
780
+ producing the `lf.Message` object that would be sent to the language model.
781
+ It is useful for debugging prompts or inspecting how inputs are formatted.
782
+
783
+ **Example:**
784
+
785
+ ```python
786
+ import langfun as lf
787
+
788
+ prompt_message = lf.query_prompt('1 + 1 = ?', schema=int)
789
+ print(prompt_message.text)
790
+ ```
791
+
792
+ Args:
793
+ prompt: The user prompt, which can be a string, `lf.Template`, or any
794
+ serializable object.
795
+ schema: The target schema for the query, used for prompt formatting.
796
+ **kwargs: Additional keyword arguments to pass to `lf.query`.
797
+
798
+ Returns:
799
+ The rendered `lf.Message` object.
800
+ """
768
801
  # Delay import to avoid circular dependency in Colab.
769
802
  # llms > data/conversion > structured > querying
770
803
  from langfun.core.llms import fake # pylint: disable=g-import-not-at-top
@@ -786,7 +819,39 @@ def query_output(
786
819
  schema: schema_lib.SchemaType | None = None,
787
820
  **kwargs,
788
821
  ) -> Any:
789
- """Returns the final output of `lf.query` from a provided LLM response."""
822
+ """Parses a raw LLM response based on a schema, as `lf.query` would.
823
+
824
+ This function simulates the output processing part of `lf.query`, taking
825
+ a raw response from a language model and parsing it into the desired schema.
826
+ It is useful for reprocessing LLM responses or for testing parsing and
827
+ auto-fixing logic independently of LLM calls.
828
+
829
+ **Example:**
830
+
831
+ ```python
832
+ import langfun as lf
833
+
834
+ # Output when schema is provided.
835
+ structured_output = lf.query_output('2', schema=int)
836
+ print(structured_output)
837
+ # Output: 2
838
+
839
+ # Output when no schema is provided.
840
+ raw_output = lf.query_output('The answer is 2.')
841
+ print(raw_output)
842
+ # Output: The answer is 2.
843
+ ```
844
+
845
+ Args:
846
+ response: The raw response from an LLM, as a string or `lf.Message`.
847
+ schema: The target schema to parse the response into. If `None`, the
848
+ response text is returned.
849
+ **kwargs: Additional keyword arguments to pass to `lf.query` for parsing
850
+ (e.g., `autofix`, `default`).
851
+
852
+ Returns:
853
+ The parsed object if schema is provided, or the response text otherwise.
854
+ """
790
855
  # Delay import to avoid circular dependency in Colab.
791
856
  # llms > data/conversion > structured > querying
792
857
  from langfun.core.llms import fake # pylint: disable=g-import-not-at-top
@@ -807,7 +872,7 @@ def query_reward(
807
872
  mapping_example: Union[str, mapping.MappingExample],
808
873
  response: Union[str, lf.Message],
809
874
  ) -> float | None:
810
- """Returns the reward of an LLM response based on an mapping example."""
875
+ """Returns the reward of an LLM response based on a mapping example."""
811
876
  if isinstance(mapping_example, str):
812
877
  mapping_example = pg.from_json_str(mapping_example)
813
878
  assert isinstance(mapping_example, mapping.MappingExample), mapping_example
@@ -1206,14 +1271,14 @@ class _QueryTracker:
1206
1271
  )
1207
1272
  ] = True
1208
1273
 
1209
- start_callabck: Annotated[
1274
+ start_callback: Annotated[
1210
1275
  Callable[[QueryInvocation], None] | None,
1211
1276
  (
1212
1277
  'A callback function to be called when a query is started.'
1213
1278
  )
1214
1279
  ] = None
1215
1280
 
1216
- end_callabck: Annotated[
1281
+ end_callback: Annotated[
1217
1282
  Callable[[QueryInvocation], None] | None,
1218
1283
  (
1219
1284
  'A callback function to be called when a query is completed.'
@@ -1229,40 +1294,51 @@ class _QueryTracker:
1229
1294
 
1230
1295
  def track(self, invocation: QueryInvocation) -> None:
1231
1296
  self.tracked_queries.append(invocation)
1232
- if self.start_callabck is not None:
1233
- self.start_callabck(invocation)
1297
+ if self.start_callback is not None:
1298
+ self.start_callback(invocation)
1234
1299
 
1235
1300
  def mark_completed(self, invocation: QueryInvocation) -> None:
1236
1301
  assert invocation in self.tracked_queries, invocation
1237
- if self.end_callabck is not None:
1238
- self.end_callabck(invocation)
1302
+ if self.end_callback is not None:
1303
+ self.end_callback(invocation)
1239
1304
 
1240
1305
 
1241
1306
  @contextlib.contextmanager
1242
1307
  def track_queries(
1243
1308
  include_child_scopes: bool = True,
1244
1309
  *,
1245
- start_callabck: Callable[[QueryInvocation], None] | None = None,
1246
- end_callabck: Callable[[QueryInvocation], None] | None = None,
1310
+ start_callback: Callable[[QueryInvocation], None] | None = None,
1311
+ end_callback: Callable[[QueryInvocation], None] | None = None,
1247
1312
  ) -> Iterator[list[QueryInvocation]]:
1248
- """Track all queries made during the context.
1313
+ """Tracks all `lf.query` calls made within a `with` block.
1249
1314
 
1250
- Example:
1315
+ `lf.track_queries` is useful for inspecting LLM inputs and outputs,
1316
+ debugging, and analyzing model behavior. It returns a list of
1317
+ `lf.QueryInvocation` objects, each containing detailed information about
1318
+ a query, such as the input prompt, schema, LLM request/response,
1319
+ and any errors encountered.
1251
1320
 
1252
- ```
1253
- with lf.track_queries() as queries:
1254
- lf.query('hi', lm=lm)
1255
- lf.query('What is this {{image}}?', lm=lm, image=image)
1321
+ **Example:**
1256
1322
 
1257
- print(queries)
1258
- ```
1323
+ ```python
1324
+ import langfun as lf
1325
+
1326
+ with lf.track_queries() as queries:
1327
+ lf.query('1 + 1 = ?', lm=lf.llms.Gemini25Flash())
1328
+ lf.query('Hello!', lm=lf.llms.Gemini25Flash())
1329
+
1330
+ # Print recorded queries
1331
+ for query in queries:
1332
+ print(query.lm_request)
1333
+ print(query.lm_response)
1334
+ ```
1259
1335
 
1260
1336
  Args:
1261
1337
  include_child_scopes: If True, the queries made in child scopes will be
1262
1338
  included in the returned list. Otherwise, only the queries made in the
1263
1339
  current scope will be included.
1264
- start_callabck: A callback function to be called when a query is started.
1265
- end_callabck: A callback function to be called when a query is completed.
1340
+ start_callback: A callback function to be called when a query is started.
1341
+ end_callback: A callback function to be called when a query is completed.
1266
1342
 
1267
1343
  Yields:
1268
1344
  A list of `QueryInvocation` objects representing the queries made during
@@ -1271,8 +1347,8 @@ def track_queries(
1271
1347
  trackers = lf.context_value('__query_trackers__', [])
1272
1348
  tracker = _QueryTracker(
1273
1349
  include_child_scopes=include_child_scopes,
1274
- start_callabck=start_callabck,
1275
- end_callabck=end_callabck
1350
+ start_callback=start_callback,
1351
+ end_callback=end_callback
1276
1352
  )
1277
1353
 
1278
1354
  with lf.context(