langfun 0.1.2.dev202510200805__py3-none-any.whl → 0.1.2.dev202511160804__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of langfun might be problematic. Click here for more details.

Files changed (146) hide show
  1. langfun/core/__init__.py +1 -0
  2. langfun/core/agentic/action.py +107 -12
  3. langfun/core/agentic/action_eval.py +9 -2
  4. langfun/core/agentic/action_test.py +25 -0
  5. langfun/core/async_support.py +32 -3
  6. langfun/core/coding/python/correction.py +19 -9
  7. langfun/core/coding/python/execution.py +14 -12
  8. langfun/core/coding/python/generation.py +21 -16
  9. langfun/core/coding/python/sandboxing.py +23 -3
  10. langfun/core/component.py +42 -3
  11. langfun/core/concurrent.py +70 -6
  12. langfun/core/concurrent_test.py +1 -0
  13. langfun/core/console.py +1 -1
  14. langfun/core/data/conversion/anthropic.py +12 -3
  15. langfun/core/data/conversion/anthropic_test.py +8 -6
  16. langfun/core/data/conversion/gemini.py +9 -2
  17. langfun/core/data/conversion/gemini_test.py +12 -9
  18. langfun/core/data/conversion/openai.py +145 -31
  19. langfun/core/data/conversion/openai_test.py +161 -17
  20. langfun/core/eval/base.py +48 -44
  21. langfun/core/eval/base_test.py +4 -4
  22. langfun/core/eval/matching.py +5 -2
  23. langfun/core/eval/patching.py +3 -3
  24. langfun/core/eval/scoring.py +4 -3
  25. langfun/core/eval/v2/__init__.py +1 -0
  26. langfun/core/eval/v2/checkpointing.py +39 -5
  27. langfun/core/eval/v2/checkpointing_test.py +1 -1
  28. langfun/core/eval/v2/eval_test_helper.py +97 -1
  29. langfun/core/eval/v2/evaluation.py +88 -16
  30. langfun/core/eval/v2/evaluation_test.py +9 -3
  31. langfun/core/eval/v2/example.py +45 -39
  32. langfun/core/eval/v2/example_test.py +3 -3
  33. langfun/core/eval/v2/experiment.py +51 -8
  34. langfun/core/eval/v2/metric_values.py +31 -3
  35. langfun/core/eval/v2/metric_values_test.py +32 -0
  36. langfun/core/eval/v2/metrics.py +157 -44
  37. langfun/core/eval/v2/metrics_test.py +39 -18
  38. langfun/core/eval/v2/progress.py +30 -1
  39. langfun/core/eval/v2/progress_test.py +27 -0
  40. langfun/core/eval/v2/progress_tracking_test.py +3 -0
  41. langfun/core/eval/v2/reporting.py +90 -71
  42. langfun/core/eval/v2/reporting_test.py +20 -6
  43. langfun/core/eval/v2/runners/__init__.py +26 -0
  44. langfun/core/eval/v2/{runners.py → runners/base.py} +22 -124
  45. langfun/core/eval/v2/runners/debug.py +40 -0
  46. langfun/core/eval/v2/runners/debug_test.py +79 -0
  47. langfun/core/eval/v2/runners/parallel.py +100 -0
  48. langfun/core/eval/v2/runners/parallel_test.py +98 -0
  49. langfun/core/eval/v2/runners/sequential.py +47 -0
  50. langfun/core/eval/v2/runners/sequential_test.py +175 -0
  51. langfun/core/langfunc.py +45 -130
  52. langfun/core/langfunc_test.py +6 -4
  53. langfun/core/language_model.py +103 -16
  54. langfun/core/language_model_test.py +9 -3
  55. langfun/core/llms/__init__.py +7 -1
  56. langfun/core/llms/anthropic.py +157 -2
  57. langfun/core/llms/azure_openai.py +29 -17
  58. langfun/core/llms/cache/base.py +25 -3
  59. langfun/core/llms/cache/in_memory.py +48 -7
  60. langfun/core/llms/cache/in_memory_test.py +14 -4
  61. langfun/core/llms/compositional.py +25 -1
  62. langfun/core/llms/deepseek.py +30 -2
  63. langfun/core/llms/fake.py +32 -1
  64. langfun/core/llms/gemini.py +14 -9
  65. langfun/core/llms/google_genai.py +29 -1
  66. langfun/core/llms/groq.py +28 -3
  67. langfun/core/llms/llama_cpp.py +23 -4
  68. langfun/core/llms/openai.py +36 -3
  69. langfun/core/llms/openai_compatible.py +148 -27
  70. langfun/core/llms/openai_compatible_test.py +207 -20
  71. langfun/core/llms/openai_test.py +0 -2
  72. langfun/core/llms/rest.py +12 -1
  73. langfun/core/llms/vertexai.py +51 -8
  74. langfun/core/logging.py +1 -1
  75. langfun/core/mcp/client.py +77 -22
  76. langfun/core/mcp/client_test.py +8 -35
  77. langfun/core/mcp/session.py +94 -29
  78. langfun/core/mcp/session_test.py +54 -0
  79. langfun/core/mcp/tool.py +151 -22
  80. langfun/core/mcp/tool_test.py +197 -0
  81. langfun/core/memory.py +1 -0
  82. langfun/core/message.py +160 -55
  83. langfun/core/message_test.py +65 -81
  84. langfun/core/modalities/__init__.py +8 -0
  85. langfun/core/modalities/audio.py +21 -1
  86. langfun/core/modalities/image.py +19 -1
  87. langfun/core/modalities/mime.py +62 -3
  88. langfun/core/modalities/pdf.py +19 -1
  89. langfun/core/modalities/video.py +21 -1
  90. langfun/core/modality.py +167 -29
  91. langfun/core/modality_test.py +42 -12
  92. langfun/core/natural_language.py +1 -1
  93. langfun/core/sampling.py +4 -4
  94. langfun/core/sampling_test.py +20 -4
  95. langfun/core/structured/__init__.py +2 -24
  96. langfun/core/structured/completion.py +34 -44
  97. langfun/core/structured/completion_test.py +23 -43
  98. langfun/core/structured/description.py +54 -50
  99. langfun/core/structured/function_generation.py +29 -12
  100. langfun/core/structured/mapping.py +81 -37
  101. langfun/core/structured/parsing.py +95 -79
  102. langfun/core/structured/parsing_test.py +0 -3
  103. langfun/core/structured/querying.py +215 -142
  104. langfun/core/structured/querying_test.py +65 -29
  105. langfun/core/structured/schema/__init__.py +48 -0
  106. langfun/core/structured/schema/base.py +664 -0
  107. langfun/core/structured/schema/base_test.py +531 -0
  108. langfun/core/structured/schema/json.py +174 -0
  109. langfun/core/structured/schema/json_test.py +121 -0
  110. langfun/core/structured/schema/python.py +316 -0
  111. langfun/core/structured/schema/python_test.py +410 -0
  112. langfun/core/structured/schema_generation.py +33 -14
  113. langfun/core/structured/scoring.py +47 -36
  114. langfun/core/structured/tokenization.py +26 -11
  115. langfun/core/subscription.py +2 -2
  116. langfun/core/template.py +175 -50
  117. langfun/core/template_test.py +123 -17
  118. langfun/env/__init__.py +8 -2
  119. langfun/env/base_environment.py +320 -128
  120. langfun/env/base_environment_test.py +473 -0
  121. langfun/env/base_feature.py +92 -15
  122. langfun/env/base_feature_test.py +228 -0
  123. langfun/env/base_sandbox.py +84 -361
  124. langfun/env/base_sandbox_test.py +1235 -0
  125. langfun/env/event_handlers/__init__.py +1 -1
  126. langfun/env/event_handlers/chain.py +233 -0
  127. langfun/env/event_handlers/chain_test.py +253 -0
  128. langfun/env/event_handlers/event_logger.py +95 -98
  129. langfun/env/event_handlers/event_logger_test.py +21 -21
  130. langfun/env/event_handlers/metric_writer.py +225 -140
  131. langfun/env/event_handlers/metric_writer_test.py +23 -6
  132. langfun/env/interface.py +854 -40
  133. langfun/env/interface_test.py +112 -2
  134. langfun/env/load_balancers_test.py +23 -2
  135. langfun/env/test_utils.py +126 -84
  136. {langfun-0.1.2.dev202510200805.dist-info → langfun-0.1.2.dev202511160804.dist-info}/METADATA +1 -1
  137. langfun-0.1.2.dev202511160804.dist-info/RECORD +211 -0
  138. langfun/core/eval/v2/runners_test.py +0 -343
  139. langfun/core/structured/schema.py +0 -987
  140. langfun/core/structured/schema_test.py +0 -982
  141. langfun/env/base_test.py +0 -1481
  142. langfun/env/event_handlers/base.py +0 -350
  143. langfun-0.1.2.dev202510200805.dist-info/RECORD +0 -195
  144. {langfun-0.1.2.dev202510200805.dist-info → langfun-0.1.2.dev202511160804.dist-info}/WHEEL +0 -0
  145. {langfun-0.1.2.dev202510200805.dist-info → langfun-0.1.2.dev202511160804.dist-info}/licenses/LICENSE +0 -0
  146. {langfun-0.1.2.dev202510200805.dist-info → langfun-0.1.2.dev202511160804.dist-info}/top_level.txt +0 -0
@@ -23,7 +23,7 @@ import uuid
23
23
 
24
24
  import langfun.core as lf
25
25
  from langfun.core.structured import mapping
26
- from langfun.core.structured import schema as schema_lib
26
+ from langfun.core.structured.schema import base as schema_lib
27
27
  import pyglove as pg
28
28
 
29
29
 
@@ -274,7 +274,7 @@ class _LfQueryPythonV2(LfQuery):
274
274
 
275
275
 
276
276
  def query(
277
- prompt: Union[str, lf.Template, Any],
277
+ prompt: Union[str, lf.Template, lf.Message, Any],
278
278
  schema: schema_lib.SchemaType | None = None,
279
279
  default: Any = lf.RAISE_IF_HAS_ERROR,
280
280
  *,
@@ -298,119 +298,124 @@ def query(
298
298
  supporting natural language prompts, structured inputs, and multiple advanced
299
299
  features.
300
300
 
301
- Key Features:
302
-
303
- - **Input**: Accepts natural language strings, structured inputs (e.g.,
304
- `pg.Object`), and templates (`lf.Template`) with modality objects.
305
-
306
- - **Output**: Returns structured outputs when `schema` is specified;
307
- otherwise, outputs raw natural language (as a string).
308
-
309
- - **Few-shot examples**: Supports structured few-shot examples with the
310
- `examples` argument.
311
-
312
- - **Multi-LM fan-out**: Sends queries to multiple language models with in
313
- multiple samples in parallel, returning a list of outputs.
314
-
315
- Examples:
316
-
317
- Case 1: Regular natural language-based LLM query:
318
-
319
- ```
320
- lf.query('1 + 1 = ?', lm=lf.llms.Gpt4Turbo())
321
-
322
- # Outptut: '2'
323
- ```
324
-
325
- Case 2: Query with structured output.
326
-
327
- ```
328
- lf.query('1 + 1 = ?', int, lm=lf.llms.Gpt4Turbo())
329
-
330
- # Output: 2
331
- ```
332
-
333
- Case 3: Query with structured input.
334
-
335
- ```
336
- class Sum(pg.Object):
337
- a: int
338
- b: int
339
-
340
- lf.query(Sum(1, 1), int, lm=lf.llms.Gpt4Turbo())
341
-
342
- # Output: 2
343
- ```
344
-
345
- Case 4: Query with input of mixed modalities.
346
-
347
- ```
348
- class Animal(pg.Object):
349
- pass
350
-
351
- class Dog(Animal):
352
- pass
353
-
354
- class Entity(pg.Object):
355
- name: str
356
-
357
- lf.query(
358
- 'What is in this {{image}} and {{objects}}?'
359
- list[Entity],
360
- lm=lf.llms.Gpt4Turbo()
361
- image=lf.Image(path='/path/to/a/airplane.png'),
362
- objects=[Dog()],
363
- )
364
-
365
- # Output: [Entity(name='airplane'), Entity(name='dog')]
366
- ```
367
-
368
- Case 5: Query with structured few-shot examples.
369
- ```
370
- lf.query(
371
- 'What is in this {{image}} and {{objects}}?'
372
- list[Entity],
373
- lm=lf.llms.Gpt4Turbo()
374
- image=lf.Image(path='/path/to/a/dinasaur.png'),
375
- objects=[Dog()],
376
- examples=[
377
- lf.MappingExample(
378
- input=lf.Template(
379
- 'What is the object near the house in this {{image}}?',
380
- image=lf.Image(path='/path/to/image.png'),
381
- ),
382
- schema=Entity,
383
- output=Entity('cat'),
384
- ),
385
- ],
386
- )
387
-
388
- # Output: [Entity(name='dinasaur'), Entity(name='dog')]
389
- ```
390
-
391
- Case 6: Multiple queries to multiple models.
392
- ```
393
- lf.query(
394
- '1 + 1 = ?',
395
- int,
396
- lm=[
397
- lf.llms.Gpt4Turbo(),
398
- lf.llms.Gemini1_5Pro(),
399
- ],
400
- num_samples=[1, 2],
401
- )
402
- # Output: [2, 2, 2]
403
- ```
301
+ **Key Features:**
302
+
303
+ * **Input**: Accepts natural language strings, structured inputs (e.g.,
304
+ `pg.Object`), templates (`lf.Template`) with modality objects, messages (
305
+ `lf.Message`) with modality objects, or objects that can be converted to
306
+ `lf.Message` (see `lf.Message.from_value` for details).
307
+ * **Output**: Returns structured outputs when `schema` is specified;
308
+ otherwise, outputs raw natural language (as a string).
309
+ * **Few-shot examples**: Supports structured few-shot examples with the
310
+ `examples` argument.
311
+ * **Multi-LM fan-out**: Sends queries to multiple language models for
312
+ multiple samples in parallel, returning a list of outputs.
313
+
314
+ **Basic Usage:**
315
+
316
+ 1. **Natural Language Query**:
317
+ If `schema` is not provided, `lf.query` returns a natural language
318
+ response:
319
+ ```python
320
+ r = lf.query('1 + 1 = ?', lm=lf.llms.Gemini25Flash())
321
+ print(r)
322
+ # Output: 2
323
+ ```
324
+
325
+ 2. **Structured Output**:
326
+ If `schema` is provided, `lf.query` guides LLM to directly generate
327
+ response according to the specified schema, it then parses the response
328
+ into a Python object:
329
+ ```python
330
+ r = lf.query('1 + 1 = ?', int, lm=lf.llms.Gemini25Flash())
331
+ print(r)
332
+ # Output: 2
333
+ ```
334
+
335
+ **Advanced Usage:**
336
+
337
+ 1. **Structured Input**:
338
+ Besides natural language, `prompt` can be a `pg.Object`, whose symbolic
339
+ representation will be sent to the LLM:
340
+ ```python
341
+ class Sum(pg.Object):
342
+ a: int
343
+ b: int
344
+ r = lf.query(Sum(1, 1), int, lm=lf.llms.Gemini25Flash())
345
+ print(r)
346
+ # Output: 2
347
+ ```
348
+
349
+ 2. **Multi-Modal Input**:
350
+ `lf.query` supports prompts containing multi-modal inputs, such as images
351
+ or audio, by embedding modality objects within a template string:
352
+ ```python
353
+ image = lf.Image.from_path('/path/to/image.png')
354
+ r = lf.query(
355
+ 'what is in the {{image}}?',
356
+ str,
357
+ image=image,
358
+ lm=lf.llms.Gemini25Flash()
359
+ )
360
+ print(r)
361
+ # Output: A cat sitting on a sofa.
362
+ ```
363
+
364
+ 3. **Few-Shot Examples**:
365
+ You can provide few-shot examples to guide model behavior using the
366
+ `examples` argument. Each example is an `lf.MappingExample` containing
367
+ `input`, `output`, and, if needed, `schema`.
368
+ ```python
369
+ class Sentiment(pg.Object):
370
+ sentiment: Literal['positive', 'negative', 'neutral']
371
+ reason: str
372
+
373
+ r = lf.query(
374
+ 'I love this movie!',
375
+ Sentiment,
376
+ examples=[
377
+ lf.MappingExample(
378
+ 'This movie is terrible.',
379
+ Sentiment(sentiment='negative', reason='The plot is boring.')
380
+ ),
381
+ lf.MappingExample(
382
+ 'It is okay.',
383
+ Sentiment(sentiment='neutral', reason='The movie is average.')
384
+ ),
385
+ ],
386
+ lm=lf.llms.Gemini25Flash())
387
+ print(r)
388
+ # Output:
389
+ # Sentiment(
390
+ # sentiment='positive',
391
+ # reason='The user expresses positive feedback.')
392
+ # )
393
+ ```
394
+
395
+ 4. **Multi-LM Fan-Out**:
396
+ `lf.query` can concurrently query multiple language models by providing
397
+ a list of LMs to the `lm` argument and specifying the number of samples
398
+ for each with `num_samples`.
399
+ ```python
400
+ r = lf.query(
401
+ '1 + 1 = ?',
402
+ int,
403
+ lm=[lf.llms.Gemini25Flash(), lf.llms.Gemini()],
404
+ num_samples=[1, 2])
405
+ print(r)
406
+ # Output: [2, 2, 2]
407
+ ```
404
408
 
405
409
  Args:
406
410
  prompt: The input query. Can be:
407
411
  - A natural language string (supports templating with `{{}}`),
408
- - A `pg.Object` object for structured input,
412
+ - A `pg.Object` for structured input,
409
413
  - An `lf.Template` for mixed or template-based inputs.
410
- schema: Type annotation or `lf.Schema` object for the expected output.
414
+ schema: Type annotation or `lf.Schema` object for the expected output.
411
415
  If `None` (default), the response will be a natural language string.
412
- default: Default value to return if parsing fails. If not specified, an
413
- error will be raised.
416
+ default: The default value to return if parsing fails. If
417
+ `lf.RAISE_IF_HAS_ERROR` is used (default), an error will be raised
418
+ instead.
414
419
  lm: The language model(s) to query. Can be:
415
420
  - A single `LanguageModel`,
416
421
  - A list of `LanguageModel`s for multi-model fan-out.
@@ -430,18 +435,18 @@ def query(
430
435
  from `lf.context` or the main `lm`.
431
436
  protocol: Format for schema representation. Builtin choices are `'json'` or
432
437
  `'python'`, users could extend with their own protocols by subclassing
433
- `lf.structured.LfQuery'. Also protocol could be specified with a version
438
+ `lf.structured.LfQuery`. Also protocol could be specified with a version
434
439
  in the format of 'protocol:version', e.g., 'python:1.0', so users could
435
440
  use a specific version of the prompt based on the protocol. Please see the
436
441
  documentation of `LfQuery` for more details. If None, the protocol from
437
442
  context manager `lf.query_protocol` will be used, or 'python' if not
438
443
  specified.
439
- returns_message: If `True`, returns an `lf.Message` object instead of
444
+ returns_message: If `True`, returns an `lf.Message` object instead of
440
445
  the final parsed result.
441
- skip_lm: If `True`, skips the LLM call and returns the rendered
446
+ skip_lm: If `True`, skips the LLM call and returns the rendered
442
447
  prompt as a `UserMessage` object.
443
448
  invocation_id: The ID of the query invocation, which will be passed to
444
- `lf.QueryInvocation` when `lf.trackIf `None`, a unique ID will
449
+ `lf.QueryInvocation`. If `None`, a unique ID will
445
450
  be generated.
446
451
  **kwargs: Additional keyword arguments for:
447
452
  - Rendering templates (e.g., `template_str`, `preamble`),
@@ -453,7 +458,7 @@ def query(
453
458
  Returns:
454
459
  The result of the query:
455
460
  - A single output or a list of outputs if multiple models/samples are used.
456
- - Each output is a parsed object matching `schema`, an `lf.Message` (if
461
+ - Each output is a parsed object matching `schema`, an `lf.Message` (if
457
462
  `returns_message=True`), or a natural language string (default).
458
463
  """
459
464
  # Internal usage logging.
@@ -529,24 +534,22 @@ def query(
529
534
  ).render(message_cls=lf.SystemMessage)
530
535
 
531
536
  # Normalize query input.
532
- if isinstance(prompt, (lf.Message, str)):
537
+ if isinstance(prompt, str):
533
538
  # Query with structured output.
534
539
  prompt_kwargs = kwargs.copy()
535
540
  prompt_kwargs.pop('template_str', None)
536
541
  query_input = lf.Template.from_value(prompt, **prompt_kwargs)
542
+ elif isinstance(prompt, lf.Message):
543
+ query_input = prompt
537
544
  elif isinstance(prompt, lf.Template):
538
- # Create a copy of the prompt if it has a parent object, so all child
539
- # modality objects could be referred by path relative to the prompt.
540
- query_input = prompt.clone() if prompt.sym_parent is not None else prompt
541
-
542
545
  # Attach template metadata from kwargs. This is used to pass through fields
543
546
  # from kwargs to the rendered message.
544
- template_metadata = {
545
- k: v for k, v in kwargs.items() if k.startswith('metadata_')
546
- }
547
- query_input.rebind(
548
- template_metadata, skip_notification=True, raise_on_no_change=False
547
+ prompt.rebind(
548
+ {k: v for k, v in kwargs.items() if k.startswith('metadata_')},
549
+ skip_notification=True,
550
+ raise_on_no_change=False
549
551
  )
552
+ query_input = prompt
550
553
  elif pg.MISSING_VALUE == prompt:
551
554
  query_input = lf.UserMessage('')
552
555
  else:
@@ -665,11 +668,15 @@ def query(
665
668
 
666
669
  if returns_message:
667
670
  return output_message
668
- return output_message.text if schema in (None, str) else output_message.result
671
+ if schema not in (None, str):
672
+ return output_message.result
673
+ if returns_message or output_message.referred_modalities:
674
+ return output_message
675
+ return output_message.text
669
676
 
670
677
 
671
678
  async def aquery(
672
- prompt: Union[str, lf.Template, Any],
679
+ prompt: Union[str, lf.Template, lf.Message, Any],
673
680
  schema: schema_lib.SchemaType | None = None,
674
681
  default: Any = lf.RAISE_IF_HAS_ERROR,
675
682
  *,
@@ -725,7 +732,7 @@ def query_protocol(protocol: str) -> Iterator[None]:
725
732
 
726
733
 
727
734
  def query_and_reduce(
728
- prompt: Union[str, lf.Template, Any],
735
+ prompt: Union[str, lf.Template, lf.Message, Any],
729
736
  schema: schema_lib.SchemaType | None = None,
730
737
  *,
731
738
  reduce: Callable[[list[Any]], Any],
@@ -734,12 +741,12 @@ def query_and_reduce(
734
741
  **kwargs,
735
742
  ) -> Any:
736
743
  """Issues multiple `lf.query` calls in parallel and reduce the outputs.
737
-
744
+
738
745
  Args:
739
746
  prompt: A str (may contain {{}} as template) as natural language input, or a
740
747
  `pg.Symbolic` object as structured input as prompt to LLM.
741
- schema: A type annotation as the schema for output object. If str (default),
742
- the response will be a str in natural language.
748
+ schema: A type annotation as the schema for output object. If None
749
+ (default), the response will be a str in natural language.
743
750
  reduce: A function to reduce the outputs of multiple `lf.query` calls. It
744
751
  takes a list of outputs and returns the final object.
745
752
  lm: The language model to use. If not specified, the language model from
@@ -763,11 +770,34 @@ def query_and_reduce(
763
770
 
764
771
 
765
772
  def query_prompt(
766
- prompt: Union[str, lf.Template, Any],
773
+ prompt: Union[str, lf.Template, lf.Message, Any],
767
774
  schema: schema_lib.SchemaType | None = None,
768
775
  **kwargs,
769
776
  ) -> lf.Message:
770
- """Returns the final prompt sent to LLM for `lf.query`."""
777
+ """Renders the prompt message for `lf.query` without calling the LLM.
778
+
779
+ This function simulates the prompt generation step of `lf.query`,
780
+ producing the `lf.Message` object that would be sent to the language model.
781
+ It is useful for debugging prompts or inspecting how inputs are formatted.
782
+
783
+ **Example:**
784
+
785
+ ```python
786
+ import langfun as lf
787
+
788
+ prompt_message = lf.query_prompt('1 + 1 = ?', schema=int)
789
+ print(prompt_message.text)
790
+ ```
791
+
792
+ Args:
793
+ prompt: The user prompt, which can be a string, `lf.Template`, or any
794
+ serializable object.
795
+ schema: The target schema for the query, used for prompt formatting.
796
+ **kwargs: Additional keyword arguments to pass to `lf.query`.
797
+
798
+ Returns:
799
+ The rendered `lf.Message` object.
800
+ """
771
801
  # Delay import to avoid circular dependency in Colab.
772
802
  # llms > data/conversion > structured > querying
773
803
  from langfun.core.llms import fake # pylint: disable=g-import-not-at-top
@@ -789,7 +819,39 @@ def query_output(
789
819
  schema: schema_lib.SchemaType | None = None,
790
820
  **kwargs,
791
821
  ) -> Any:
792
- """Returns the final output of `lf.query` from a provided LLM response."""
822
+ """Parses a raw LLM response based on a schema, as `lf.query` would.
823
+
824
+ This function simulates the output processing part of `lf.query`, taking
825
+ a raw response from a language model and parsing it into the desired schema.
826
+ It is useful for reprocessing LLM responses or for testing parsing and
827
+ auto-fixing logic independently of LLM calls.
828
+
829
+ **Example:**
830
+
831
+ ```python
832
+ import langfun as lf
833
+
834
+ # Output when schema is provided.
835
+ structured_output = lf.query_output('2', schema=int)
836
+ print(structured_output)
837
+ # Output: 2
838
+
839
+ # Output when no schema is provided.
840
+ raw_output = lf.query_output('The answer is 2.')
841
+ print(raw_output)
842
+ # Output: The answer is 2.
843
+ ```
844
+
845
+ Args:
846
+ response: The raw response from an LLM, as a string or `lf.Message`.
847
+ schema: The target schema to parse the response into. If `None`, the
848
+ response text is returned.
849
+ **kwargs: Additional keyword arguments to pass to `lf.query` for parsing
850
+ (e.g., `autofix`, `default`).
851
+
852
+ Returns:
853
+ The parsed object if schema is provided, or the response text otherwise.
854
+ """
793
855
  # Delay import to avoid circular dependency in Colab.
794
856
  # llms > data/conversion > structured > querying
795
857
  from langfun.core.llms import fake # pylint: disable=g-import-not-at-top
@@ -810,7 +872,7 @@ def query_reward(
810
872
  mapping_example: Union[str, mapping.MappingExample],
811
873
  response: Union[str, lf.Message],
812
874
  ) -> float | None:
813
- """Returns the reward of an LLM response based on an mapping example."""
875
+ """Returns the reward of an LLM response based on a mapping example."""
814
876
  if isinstance(mapping_example, str):
815
877
  mapping_example = pg.from_json_str(mapping_example)
816
878
  assert isinstance(mapping_example, mapping.MappingExample), mapping_example
@@ -1248,17 +1310,28 @@ def track_queries(
1248
1310
  start_callback: Callable[[QueryInvocation], None] | None = None,
1249
1311
  end_callback: Callable[[QueryInvocation], None] | None = None,
1250
1312
  ) -> Iterator[list[QueryInvocation]]:
1251
- """Track all queries made during the context.
1313
+ """Tracks all `lf.query` calls made within a `with` block.
1252
1314
 
1253
- Example:
1315
+ `lf.track_queries` is useful for inspecting LLM inputs and outputs,
1316
+ debugging, and analyzing model behavior. It returns a list of
1317
+ `lf.QueryInvocation` objects, each containing detailed information about
1318
+ a query, such as the input prompt, schema, LLM request/response,
1319
+ and any errors encountered.
1254
1320
 
1255
- ```
1256
- with lf.track_queries() as queries:
1257
- lf.query('hi', lm=lm)
1258
- lf.query('What is this {{image}}?', lm=lm, image=image)
1321
+ **Example:**
1259
1322
 
1260
- print(queries)
1261
- ```
1323
+ ```python
1324
+ import langfun as lf
1325
+
1326
+ with lf.track_queries() as queries:
1327
+ lf.query('1 + 1 = ?', lm=lf.llms.Gemini25Flash())
1328
+ lf.query('Hello!', lm=lf.llms.Gemini25Flash())
1329
+
1330
+ # Print recorded queries
1331
+ for query in queries:
1332
+ print(query.lm_request)
1333
+ print(query.lm_response)
1334
+ ```
1262
1335
 
1263
1336
  Args:
1264
1337
  include_child_scopes: If True, the queries made in child scopes will be
@@ -249,35 +249,60 @@ class QueryTest(unittest.TestCase):
249
249
 
250
250
  def test_root_modality_to_structure_render(self):
251
251
  lm = fake.StaticResponse('1')
252
+ image = modalities.Image.from_bytes(b'mock_image')
252
253
  self.assert_render(
253
- modalities.Image.from_bytes(b'mock_image'),
254
+ image,
254
255
  int,
255
256
  lm=lm,
256
- expected_snippet='\n\nREQUEST:\n <<[[input]]>>\n\n',
257
+ expected_snippet=f'\n\nREQUEST:\n <<[[{image.id}]]>>\n\n',
257
258
  expected_modalities=1,
258
259
  )
259
260
 
260
261
  def test_root_modality_to_str_render(self):
261
262
  lm = fake.StaticResponse('1')
263
+ modality = modalities.Image.from_bytes(b'mock_image')
262
264
  self.assert_render(
263
- modalities.Image.from_bytes(b'mock_image'),
265
+ modality,
264
266
  None,
265
267
  lm=lm,
266
- expected_snippet='<<[[input]]>>',
268
+ expected_snippet=f'<<[[{modality.id}]]>>',
267
269
  exact_match=True,
268
270
  expected_modalities=1,
269
271
  )
270
272
 
271
273
  def test_str_with_modality_to_str_render(self):
272
274
  lm = fake.StaticResponse('A cat and a mouse.')
275
+ cat_image = modalities.Image.from_bytes(b'cat_image')
276
+ mouse_image = modalities.Image.from_bytes(b'mouse_image')
273
277
  self.assert_render(
274
278
  'What are these? {{this_image}} and {{that_image}}',
275
279
  None,
276
- this_image=modalities.Image.from_bytes(b'cat_image'),
277
- that_image=modalities.Image.from_bytes(b'mouse_image'),
280
+ this_image=cat_image,
281
+ that_image=mouse_image,
278
282
  lm=lm,
279
283
  expected_snippet=(
280
- 'What are these? <<[[this_image]]>> and <<[[that_image]]>>'
284
+ f'What are these? <<[[{cat_image.id}]]>> and '
285
+ f'<<[[{mouse_image.id}]]>>'
286
+ ),
287
+ exact_match=True,
288
+ expected_modalities=2,
289
+ )
290
+
291
+ def test_message_with_modality_to_str_render(self):
292
+ lm = fake.StaticResponse('A cat and a mouse.')
293
+ cat_image = modalities.Image.from_bytes(b'cat_image')
294
+ mouse_image = modalities.Image.from_bytes(b'mouse_image')
295
+ self.assert_render(
296
+ lf.Template(
297
+ 'What are these? {{this_image}} and {{that_image}}',
298
+ this_image=cat_image,
299
+ that_image=mouse_image,
300
+ ).render(),
301
+ None,
302
+ lm=lm,
303
+ expected_snippet=(
304
+ f'What are these? <<[[{cat_image.id}]]>> and '
305
+ f'<<[[{mouse_image.id}]]>>'
281
306
  ),
282
307
  exact_match=True,
283
308
  expected_modalities=2,
@@ -285,33 +310,33 @@ class QueryTest(unittest.TestCase):
285
310
 
286
311
  def test_structure_with_modality_to_str_render(self):
287
312
  lm = fake.StaticResponse('A cat and a mouse.')
313
+ cat_image = modalities.Image.from_bytes(b'cat_image')
314
+ mouse_image = modalities.Image.from_bytes(b'mouse_image')
288
315
  self.assert_render(
289
- [
290
- modalities.Image.from_bytes(b'cat_image'),
291
- modalities.Image.from_bytes(b'mouse_image'),
292
- ],
316
+ [cat_image, mouse_image],
293
317
  None,
294
318
  lm=lm,
295
- expected_snippet='`[<<[[input[0]]]>>, <<[[input[1]]]>>]`',
319
+ expected_snippet=(
320
+ f'`[<<[[{cat_image.id}]]>>, <<[[{mouse_image.id}]]>>]`'
321
+ ),
296
322
  exact_match=True,
297
323
  expected_modalities=2,
298
324
  )
299
325
 
300
326
  def test_structure_with_modality_to_structure_render(self):
301
327
  lm = fake.StaticResponse('["cat", "mouse"]')
328
+ cat_image = modalities.Image.from_bytes(b'cat_image')
329
+ mouse_image = modalities.Image.from_bytes(b'mouse_image')
302
330
  self.assert_render(
303
- [
304
- modalities.Image.from_bytes(b'cat_image'),
305
- modalities.Image.from_bytes(b'mouse_image'),
306
- ],
331
+ [cat_image, mouse_image],
307
332
  list[str],
308
333
  lm=lm,
309
- expected_snippet=inspect.cleandoc("""
334
+ expected_snippet=inspect.cleandoc(f"""
310
335
  REQUEST:
311
336
  ```python
312
337
  [
313
- <<[[input[0]]]>>,
314
- <<[[input[1]]]>>
338
+ <<[[{cat_image.id}]]>>,
339
+ <<[[{mouse_image.id}]]>>
315
340
  ]
316
341
  ```
317
342
  """),
@@ -320,25 +345,25 @@ class QueryTest(unittest.TestCase):
320
345
 
321
346
  def test_structure_with_modality_and_examples_to_structure_render(self):
322
347
  lm = fake.StaticResponse('["cat", "mouse"]')
348
+ cat_image = modalities.Image.from_bytes(b'cat_image')
349
+ mouse_image = modalities.Image.from_bytes(b'mouse_image')
350
+ dog_image = modalities.Image.from_bytes(b'dog_image')
323
351
  self.assert_render(
324
- [
325
- modalities.Image.from_bytes(b'cat_image'),
326
- modalities.Image.from_bytes(b'mouse_image'),
327
- ],
352
+ [cat_image, mouse_image],
328
353
  list[str],
329
354
  examples=[
330
355
  mapping.MappingExample(
331
- input=[modalities.Image.from_bytes(b'dog_image')],
356
+ input=[dog_image],
332
357
  schema=list[str],
333
358
  output=['dog'],
334
359
  ),
335
360
  ],
336
361
  lm=lm,
337
- expected_snippet=inspect.cleandoc("""
362
+ expected_snippet=inspect.cleandoc(f"""
338
363
  REQUEST:
339
364
  ```python
340
365
  [
341
- <<[[examples[0].input[0]]]>>
366
+ <<[[{dog_image.id}]]>>
342
367
  ]
343
368
  ```
344
369
 
@@ -356,8 +381,8 @@ class QueryTest(unittest.TestCase):
356
381
  REQUEST:
357
382
  ```python
358
383
  [
359
- <<[[input[0]]]>>,
360
- <<[[input[1]]]>>
384
+ <<[[{cat_image.id}]]>>,
385
+ <<[[{mouse_image.id}]]>>
361
386
  ]
362
387
  ```
363
388
 
@@ -369,6 +394,17 @@ class QueryTest(unittest.TestCase):
369
394
  expected_modalities=3,
370
395
  )
371
396
 
397
+ def test_query_with_modality_output(self):
398
+ cat_image = modalities.Image.from_bytes(b'cat_image')
399
+ lm = fake.StaticResponse(
400
+ lf.Template('Here you go: {{image}}', image=cat_image).render(
401
+ message_cls=lf.AIMessage
402
+ )
403
+ )
404
+ response = querying.query('Generate a cat image', lm=lm)
405
+ self.assertIsInstance(response, lf.AIMessage)
406
+ self.assertEqual(response.modalities(), [cat_image])
407
+
372
408
  def test_multiple_queries(self):
373
409
  self.assertEqual(
374
410
  querying.query(
@@ -545,7 +581,7 @@ class QueryTest(unittest.TestCase):
545
581
  )
546
582
  ).input,
547
583
  )
548
- self.assertIsNotNone(output.get_modality('image'))
584
+ self.assertEqual(len(output.referred_modalities), 1)
549
585
 
550
586
  def test_query_and_reduce(self):
551
587
  self.assertEqual(