langfun 0.1.2.dev202511040805__py3-none-any.whl → 0.1.2.dev202511050805__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of langfun might be problematic. Click here for more details.

Files changed (76) hide show
  1. langfun/core/agentic/action.py +76 -9
  2. langfun/core/agentic/action_eval.py +9 -2
  3. langfun/core/async_support.py +32 -3
  4. langfun/core/coding/python/correction.py +19 -9
  5. langfun/core/coding/python/execution.py +14 -12
  6. langfun/core/coding/python/generation.py +21 -16
  7. langfun/core/coding/python/sandboxing.py +23 -3
  8. langfun/core/component.py +42 -3
  9. langfun/core/concurrent.py +70 -6
  10. langfun/core/console.py +1 -1
  11. langfun/core/data/conversion/anthropic.py +10 -3
  12. langfun/core/data/conversion/gemini.py +9 -2
  13. langfun/core/data/conversion/openai.py +17 -7
  14. langfun/core/eval/base.py +46 -42
  15. langfun/core/eval/matching.py +5 -2
  16. langfun/core/eval/patching.py +3 -3
  17. langfun/core/eval/scoring.py +4 -3
  18. langfun/core/eval/v2/checkpointing.py +30 -4
  19. langfun/core/eval/v2/evaluation.py +59 -13
  20. langfun/core/eval/v2/example.py +22 -11
  21. langfun/core/eval/v2/experiment.py +51 -8
  22. langfun/core/eval/v2/metric_values.py +23 -3
  23. langfun/core/eval/v2/metrics.py +33 -4
  24. langfun/core/eval/v2/progress.py +9 -1
  25. langfun/core/eval/v2/reporting.py +15 -1
  26. langfun/core/eval/v2/runners.py +27 -7
  27. langfun/core/langfunc.py +45 -130
  28. langfun/core/language_model.py +88 -10
  29. langfun/core/llms/anthropic.py +27 -2
  30. langfun/core/llms/azure_openai.py +29 -17
  31. langfun/core/llms/cache/base.py +22 -2
  32. langfun/core/llms/cache/in_memory.py +48 -7
  33. langfun/core/llms/compositional.py +25 -1
  34. langfun/core/llms/deepseek.py +29 -1
  35. langfun/core/llms/fake.py +32 -1
  36. langfun/core/llms/gemini.py +9 -1
  37. langfun/core/llms/google_genai.py +29 -1
  38. langfun/core/llms/groq.py +27 -2
  39. langfun/core/llms/llama_cpp.py +22 -3
  40. langfun/core/llms/openai.py +29 -1
  41. langfun/core/llms/openai_compatible.py +18 -6
  42. langfun/core/llms/rest.py +12 -1
  43. langfun/core/llms/vertexai.py +39 -6
  44. langfun/core/logging.py +1 -1
  45. langfun/core/mcp/client.py +77 -22
  46. langfun/core/mcp/session.py +90 -10
  47. langfun/core/mcp/tool.py +83 -23
  48. langfun/core/memory.py +1 -0
  49. langfun/core/message.py +59 -12
  50. langfun/core/message_test.py +3 -0
  51. langfun/core/modalities/audio.py +21 -1
  52. langfun/core/modalities/image.py +19 -1
  53. langfun/core/modalities/mime.py +45 -2
  54. langfun/core/modalities/pdf.py +19 -1
  55. langfun/core/modalities/video.py +21 -1
  56. langfun/core/modality.py +66 -5
  57. langfun/core/natural_language.py +1 -1
  58. langfun/core/sampling.py +4 -4
  59. langfun/core/structured/completion.py +32 -37
  60. langfun/core/structured/description.py +54 -50
  61. langfun/core/structured/function_generation.py +29 -12
  62. langfun/core/structured/mapping.py +70 -15
  63. langfun/core/structured/parsing.py +90 -74
  64. langfun/core/structured/querying.py +201 -130
  65. langfun/core/structured/schema.py +70 -10
  66. langfun/core/structured/schema_generation.py +33 -14
  67. langfun/core/structured/scoring.py +45 -34
  68. langfun/core/structured/tokenization.py +24 -9
  69. langfun/core/subscription.py +2 -2
  70. langfun/core/template.py +132 -35
  71. langfun/core/template_test.py +22 -0
  72. {langfun-0.1.2.dev202511040805.dist-info → langfun-0.1.2.dev202511050805.dist-info}/METADATA +1 -1
  73. {langfun-0.1.2.dev202511040805.dist-info → langfun-0.1.2.dev202511050805.dist-info}/RECORD +76 -76
  74. {langfun-0.1.2.dev202511040805.dist-info → langfun-0.1.2.dev202511050805.dist-info}/WHEEL +0 -0
  75. {langfun-0.1.2.dev202511040805.dist-info → langfun-0.1.2.dev202511050805.dist-info}/licenses/LICENSE +0 -0
  76. {langfun-0.1.2.dev202511040805.dist-info → langfun-0.1.2.dev202511050805.dist-info}/top_level.txt +0 -0
langfun/core/message.py CHANGED
@@ -32,15 +32,49 @@ class Message(
32
32
  pg.Object,
33
33
  pg.views.HtmlTreeView.Extension
34
34
  ):
35
- """Message.
35
+ """Message between users, LLMs and tools.
36
36
 
37
- ``Message`` is the protocol for users and the system to interact with
38
- LLMs. It consists of a text in the form of natural language,
39
- an identifier of the sender, and a dictionary of Python values as structured
40
- meta-data.
37
+ `lf.Message` is the fundamental unit of communication in Langfun. It
38
+ standardizes interactions with LLMs by encapsulating not only text but also
39
+ multi-modal content, as well as the sender's role and structured metadata.
41
40
 
42
- The subclasses of ``Message`` represent messages sent from different roles.
43
- Agents may use the roles to decide the orchastration logic.
41
+ **Key Components:**
42
+
43
+ * **`text`**: The natural language content of the message.
44
+ * **`sender`**: An identifier for the message originator (e.g., 'User',
45
+ 'AI', 'System').
46
+ * **`metadata`**: A dictionary for structured data, such as tool inputs/
47
+ outputs, scores, or other contextual information.
48
+ * **`referred_modalities`**: A dictionary of modality objects (e.g.,
49
+ `lf.Image`, `lf.Audio`) referenced within the message text via placeholders
50
+ like `<<[[image_id]]>>`.
51
+
52
+ Subclasses like `lf.UserMessage`, `lf.AIMessage`, and `lf.ToolMessage`
53
+ represent messages from specific roles, enabling more complex conversational
54
+ flows and agentic behaviors.
55
+
56
+ **Example:**
57
+
58
+ ```python
59
+ import langfun as lf
60
+
61
+ # Creating a user message with an image
62
+ image = lf.Image.from_path('/path/to/image.png')
63
+ user_message = lf.UserMessage(
64
+ f'What is in this image <<[[{image.id}]]>>?',
65
+ referred_modalities=[image])
66
+
67
+ # Creating an AI message with structured results
68
+ ai_message = lf.AIMessage(
69
+ 'It is a cat.',
70
+ metadata=dict(result=dict(label='cat', confidence=0.9)))
71
+
72
+ print(user_message.chunk())
73
+ # Output: ['What is in this image', <lf.Image object>, '?']
74
+
75
+ print(ai_message.result)
76
+ # Output: {'label': 'cat', 'confidence': 0.9}
77
+ ```
44
78
  """
45
79
 
46
80
  #
@@ -417,7 +451,7 @@ class Message(
417
451
  var_name: str,
418
452
  default: Any = None
419
453
  ) -> modality.Modality | None:
420
- """Gets the modality object referred in the message.
454
+ """Returns modality object referred in the message by its variable name.
421
455
 
422
456
  Args:
423
457
  var_name: The referred variable name for the modality object.
@@ -429,7 +463,14 @@ class Message(
429
463
  return self.referred_modalities.get(var_name, default)
430
464
 
431
465
  def chunk(self, text: str | None = None) -> list[str | modality.Modality]:
432
- """Chunk a message into a list of str or modality objects."""
466
+ """Chunks message into a list of text and modality chunks.
467
+
468
+ Args:
469
+ text: The text to chunk. If None, use `self.text`.
470
+
471
+ Returns:
472
+ A list of text and modality chunks.
473
+ """
433
474
  chunks = []
434
475
 
435
476
  def add_text_chunk(text_piece: str) -> None:
@@ -469,7 +510,7 @@ class Message(
469
510
  def from_chunks(
470
511
  cls, chunks: list[str | modality.Modality], separator: str = ' '
471
512
  ) -> 'Message':
472
- """Assembly a message from a list of string or modality objects."""
513
+ """Assembles a message from a list of string or modality objects."""
473
514
  fused_text = io.StringIO()
474
515
  metadata = dict()
475
516
  referred_modalities = dict()
@@ -559,7 +600,7 @@ class Message(
559
600
  return self.trace(Message.TAG_LM_OUTPUT)
560
601
 
561
602
  def last(self, tag: str) -> Optional['Message']:
562
- """Return the last message wih certain tag."""
603
+ """Returns the last message with a given tag."""
563
604
  current = self
564
605
  while current is not None:
565
606
  if tag in current.tags:
@@ -913,6 +954,12 @@ class _MessageConverterRegistry:
913
954
  if converter.OUTPUT_TYPE is not None:
914
955
  self._type_to_converters[converter.OUTPUT_TYPE].append(converter)
915
956
 
957
+ def unregister(self, converter: Type['MessageConverter']) -> None:
958
+ """Unregisters a message converter."""
959
+ self._name_to_converter.pop(converter.FORMAT_ID, None)
960
+ if converter.OUTPUT_TYPE is not None:
961
+ self._type_to_converters[converter.OUTPUT_TYPE].remove(converter)
962
+
916
963
  def get_by_type(self, t: Type[Any], **kwargs) -> 'MessageConverter':
917
964
  """Returns a message converter for the given type."""
918
965
  t = self._type_to_converters[t]
@@ -948,7 +995,7 @@ class _MessageConverterRegistry:
948
995
  if isinstance(format_or_type, str):
949
996
  return format_or_type in self._name_to_converter
950
997
  assert isinstance(format_or_type, type), format_or_type
951
- return format_or_type in self._type_to_converters
998
+ return bool(self._type_to_converters.get(format_or_type))
952
999
 
953
1000
  def convertible_formats(self) -> list[str]:
954
1001
  """Returns a list of converter names."""
@@ -546,6 +546,9 @@ class MessageConverterTest(unittest.TestCase):
546
546
  message.Message.from_value((1, 2, 3)),
547
547
  message.UserMessage('1,2,3')
548
548
  )
549
+ message.MessageConverter._REGISTRY.unregister(TestConverter)
550
+ message.MessageConverter._REGISTRY.unregister(TestConverter2)
551
+ message.MessageConverter._REGISTRY.unregister(TestConverter3)
549
552
 
550
553
  def test_get_role(self):
551
554
  self.assertEqual(
@@ -18,7 +18,27 @@ from langfun.core.modalities import mime
18
18
 
19
19
 
20
20
  class Audio(mime.Mime):
21
- """Audio."""
21
+ """Represents audio for communicating with language models.
22
+
23
+ `lf.Audio` can be initialized from a URI (HTTP/HTTPS URL or local path)
24
+ using `lf.Audio.from_uri()` or from raw bytes using `lf.Audio.from_bytes()`.
25
+
26
+ **Example:**
27
+
28
+ ```python
29
+ import langfun as lf
30
+
31
+ # Load audio from path
32
+ audio = lf.Audio.from_path('/path/to/audio.mp3')
33
+
34
+ # Use audio in a prompt
35
+ prompt = lf.Template(
36
+ 'What is being said in this audio? {{audio}}', audio=audio
37
+ )
38
+ response = lf.query(prompt, lm=lf.llms.Gemini25Flash())
39
+ print(response)
40
+ ```
41
+ """
22
42
 
23
43
  MIME_PREFIX = 'audio'
24
44
 
@@ -33,7 +33,25 @@ except ImportError:
33
33
 
34
34
 
35
35
  class Image(mime.Mime):
36
- """Image."""
36
+ """Represents an image for communicating with language models.
37
+
38
+ `lf.Image` can be initialized from a URI (HTTP/HTTPS URL or local path)
39
+ using `lf.Image.from_uri()` or from raw bytes using `lf.Image.from_bytes()`.
40
+
41
+ **Example:**
42
+
43
+ ```python
44
+ import langfun as lf
45
+
46
+ # Load image from path
47
+ image = lf.Image.from_path('/path/to/image.png')
48
+
49
+ # Use image in a prompt
50
+ prompt = lf.Template('Describe this image: {{image}}', image=image)
51
+ response = lf.query(prompt, lm=lf.llms.Gemini25Flash())
52
+ print(response)
53
+ ```
54
+ """
37
55
 
38
56
  MIME_PREFIX = 'image'
39
57
 
@@ -37,7 +37,33 @@ def _detect_mime_type(content: bytes) -> str:
37
37
 
38
38
 
39
39
  class Mime(lf.Modality):
40
- """Base for MIME data."""
40
+ """Base class for representing modality data based on MIME types.
41
+
42
+ `lf.Mime` is a subclass of `lf.Modality` that serves as a base for
43
+ handling various data types like images, audio, video, and PDFs,
44
+ identified by their MIME types. It provides unified methods for
45
+ loading data from URIs or bytes (`.from_uri()`, `.from_bytes()`) and
46
+ for accessing content (`.to_bytes()`).
47
+
48
+ Subclasses like `lf.Image`, `lf.Audio`, `lf.Video`, and `lf.PDF`
49
+ specialize in handling specific MIME type prefixes (e.g., 'image/', 'audio/').
50
+
51
+ **Example:**
52
+
53
+ ```python
54
+ import langfun as lf
55
+
56
+ # Load an image from a path
57
+ image = lf.Image.from_path('/path/to/image.png')
58
+ print(image.mime_type)
59
+ # Output: image/png
60
+
61
+ # Create a text document
62
+ text = lf.Custom.from_bytes(b'hello world', mime='text/plain')
63
+ print(text.mime_type)
64
+ # Output: text/plain
65
+ ```
66
+ """
41
67
 
42
68
  # The regular expression that describes the MIME type str.
43
69
  # If None, the MIME type is dynamic. Subclass could override.
@@ -288,7 +314,24 @@ class Mime(lf.Modality):
288
314
 
289
315
  @pg.use_init_args(['mime', 'content', 'uri'])
290
316
  class Custom(Mime):
291
- """Custom MIME data."""
317
+ """Represents content of a custom MIME type.
318
+
319
+ `lf.modalities.Custom` is useful for representing data with MIME types
320
+ that do not have dedicated classes like `lf.Image` or `lf.Audio`.
321
+
322
+ **Example:**
323
+
324
+ ```python
325
+ import langfun as lf
326
+
327
+ # Create a custom MIME object for plain text
328
+ text_data = lf.Custom.from_bytes(
329
+ b'This is a text document.', mime='text/plain'
330
+ )
331
+ print(text_data.mime_type)
332
+ # Output: text/plain
333
+ ```
334
+ """
292
335
 
293
336
  mime: Annotated[
294
337
  str, 'The MIME type of the data. E.g. text/plain, or image/png. '
@@ -17,6 +17,24 @@ from langfun.core.modalities import mime
17
17
 
18
18
 
19
19
  class PDF(mime.Mime):
20
- """PDF document."""
20
+ """Represents a PDF document for communicating with language models.
21
+
22
+ `lf.PDF` can be initialized from a URI (HTTP/HTTPS URL or local path)
23
+ using `lf.PDF.from_uri()` or from raw bytes using `lf.PDF.from_bytes()`.
24
+
25
+ **Example:**
26
+
27
+ ```python
28
+ import langfun as lf
29
+
30
+ # Load PDF from path
31
+ pdf = lf.PDF.from_path('/path/to/document.pdf')
32
+
33
+ # Use PDF in a prompt
34
+ prompt = lf.Template('Summarize this document: {{pdf}}', pdf=pdf)
35
+ response = lf.query(prompt, lm=lf.llms.Gemini25Flash())
36
+ print(response)
37
+ ```
38
+ """
21
39
 
22
40
  MIME_PREFIX = 'application/pdf'
@@ -18,7 +18,27 @@ from langfun.core.modalities import mime
18
18
 
19
19
 
20
20
  class Video(mime.Mime):
21
- """Video."""
21
+ """Represents a video for communicating with language models.
22
+
23
+ `lf.Video` can be initialized from a URI (HTTP/HTTPS URL or local path)
24
+ using `lf.Video.from_uri()` or from raw bytes using `lf.Video.from_bytes()`.
25
+
26
+ **Example:**
27
+
28
+ ```python
29
+ import langfun as lf
30
+
31
+ # Load video from path
32
+ video = lf.Video.from_path('/path/to/video.mp4')
33
+
34
+ # Use video in a prompt
35
+ prompt = lf.Template(
36
+ 'What is happening in this video? {{video}}', video=video
37
+ )
38
+ response = lf.query(prompt, lm=lf.llms.Gemini25Flash())
39
+ print(response)
40
+ ```
41
+ """
22
42
 
23
43
  MIME_PREFIX = 'video'
24
44
 
langfun/core/modality.py CHANGED
@@ -24,7 +24,35 @@ import pyglove as pg
24
24
 
25
25
 
26
26
  class Modality(component.Component, pg.views.HtmlTreeView.Extension):
27
- """Base class for multimodal object."""
27
+ """Base class for representing non-text content in prompts.
28
+
29
+ `lf.Modality` is the base class for multimodal objects such as `lf.Image`,
30
+ `lf.Audio`, and `lf.Video`. It allows these non-text inputs to be
31
+ seamlessly embedded within text prompts for processing by multimodal
32
+ language models.
33
+
34
+ When a `Modality` object is rendered within an `lf.Template`, it is
35
+ replaced by a text marker (e.g., `<<[[image:b10a8db1]]>>`), and the
36
+ modality object itself is stored in the `referred_modalities` field of
37
+ the resulting `lf.Message`. This allows language models to associate
38
+ the placeholder with its content during processing.
39
+
40
+ **Example:**
41
+
42
+ ```python
43
+ import langfun as lf
44
+
45
+ image = lf.Image.from_path('/path/to/image.png')
46
+ prompt = lf.Template('What is in this image? {{image}}', image=image)
47
+
48
+ message = prompt.render()
49
+ print(message.text)
50
+ # Output: What is in this image? <<[[image:b10a8db1]]>>
51
+
52
+ print(message.modalities())
53
+ # Output: [<Image object>]
54
+ ```
55
+ """
28
56
 
29
57
  REF_START = '<<[['
30
58
  REF_END = ']]>>'
@@ -87,11 +115,44 @@ class Modality(component.Component, pg.views.HtmlTreeView.Extension):
87
115
 
88
116
 
89
117
  class ModalityRef(pg.Object, pg.typing.CustomTyping):
90
- """References of modality objects in a symbolic tree.
118
+ """Lightweight placeholder for a `lf.Modality` object in a symbolic tree.
119
+
120
+ `ModalityRef` acts as a reference to a `Modality` object (like `lf.Image`
121
+ or `lf.Audio`) within a structured object hierarchy (e.g., a `pg.Object`).
122
+ Instead of embedding potentially large modality data directly, `ModalityRef`
123
+ stores only the ID of the modality object.
124
+
125
+ This is useful in scenarios where structured objects are serialized or
126
+ manipulated, and it's more efficient to refer to modalities by ID rather
127
+ than copying their content. The `lf.ModalityRef.placehold()` class method
128
+ can be used to replace `Modality` instances in a symbolic object with
129
+ `ModalityRef` placeholders, while `lf.ModalityRef.restore()` can reinstate
130
+ the original `Modality` objects using a lookup table.
131
+
132
+ **Example:**
133
+
134
+ ```python
135
+ import langfun as lf
136
+ import pyglove as pg
137
+
138
+ class ImagePair(pg.Object):
139
+ image1: lf.Image
140
+ image2: lf.Image
141
+
142
+ pair = ImagePair(
143
+ image1=lf.Image(content=b'abc'), image2=lf.Image(content=b'def')
144
+ )
145
+ modalities = lf.Modality.from_value(pair)
146
+
147
+ # Replace Image objects with ModalityRef placeholders
148
+ pair_with_refs = lf.ModalityRef.placehold(pair)
149
+ print(pair_with_refs.image1)
150
+ # Output: ModalityRef(id='image:d81e5a68')
91
151
 
92
- `ModalityRef` was introduced to placehold modality objects in a symbolic
93
- tree, to prevent message from being chunked in the middle of a Python
94
- structure.
152
+ # Restore Image objects from ModalityRef placeholders
153
+ pair_restored = lf.ModalityRef.restore(pair_with_refs, modalities)
154
+ assert pair_restored.image1.content == b'abc'
155
+ ```
95
156
  """
96
157
 
97
158
  id: str
@@ -11,7 +11,7 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
- """Natural language utilities."""
14
+ """Natural language formatting."""
15
15
 
16
16
  import abc
17
17
  import pyglove as pg
langfun/core/sampling.py CHANGED
@@ -38,10 +38,10 @@ def sweep(
38
38
  Union[message_lib.Message, BaseException, None], # LM output.
39
39
  ],
40
40
  ]:
41
- """Sweeps the input/output of this LangFunc concurrently.
41
+ """Sweeps the input/output of a LangFunc search space concurrently.
42
42
 
43
43
  Args:
44
- lfun: An LangFunc object that contains `pg.oneof` as the search space
44
+ lfun: An LangFunc object that contains `pg.oneof` as the search space
45
45
  for sampling.
46
46
  num_examples: Number of examples to sample.
47
47
  max_workers: Max number of concurrent workers to do sampling.
@@ -84,10 +84,10 @@ def random_sample(
84
84
  Union[message_lib.Message, BaseException, None], # LM output.
85
85
  ],
86
86
  ]:
87
- """Random samples the input/output of this LangFunc concurrently.
87
+ """Random samples the input/output of a LangFunc search space concurrently.
88
88
 
89
89
  Args:
90
- lfun: An LangFunc object that contains `pg.oneof` as the search space
90
+ lfun: An LangFunc object that contains `pg.oneof` as the search space
91
91
  for sampling.
92
92
  num_examples: Number of examples to sample.
93
93
  max_workers: Max number of concurrent workers to do sampling.
@@ -116,7 +116,7 @@ class _CompleteStructure(mapping.Mapping):
116
116
  )
117
117
 
118
118
  def postprocess_result(self, result: Any) -> Any:
119
- """Postprocess result."""
119
+ """Postprocesses result."""
120
120
  # Try restore modality objects from the input value to output value.
121
121
  if modalities := self.modalities(self.input):
122
122
  result = lf.ModalityRef.restore(result, modalities)
@@ -151,7 +151,7 @@ class _CompleteStructure(mapping.Mapping):
151
151
  #
152
152
 
153
153
  def has_modality_refs(self, value: Any) -> bool:
154
- """Returns true if the value has modalities."""
154
+ """Returns True if the value has modalities."""
155
155
  return not isinstance(value, lf.Modality) and pg.contains(
156
156
  value, type=lf.Modality
157
157
  )
@@ -181,41 +181,36 @@ def complete(
181
181
  returns_message: bool = False,
182
182
  **kwargs,
183
183
  ) -> Any:
184
- """Complete a symbolic value by filling its missing fields.
185
-
186
- Examples:
187
-
188
- ```
189
- class FlightDuration:
190
- hours: int
191
- minutes: int
192
-
193
- class Flight(pg.Object):
194
- airline: str
195
- flight_number: str
196
- departure_airport_code: str
197
- arrival_airport_code: str
198
- departure_time: str
199
- arrival_time: str
200
- duration: FlightDuration
201
- stops: int
202
- price: float
203
-
204
- prompt = '''
205
- Information about flight UA2631.
206
- '''
207
-
208
- r = lf.query(prompt, Flight)
209
- assert isinstance(r, Flight)
210
- assert r.airline == 'United Airlines'
211
- assert r.departure_airport_code == 'SFO'
212
- assert r.duration.hour = 7
213
- ```
184
+ """Completes a symbolic value by filling its missing fields using an LLM.
185
+
186
+ `lf.complete` is used to fill in missing information in structured
187
+ data. It takes a partially defined `pg.Object` instance where some fields
188
+ are marked as `lf.MISSING`, and uses a language model to infer and
189
+ populate those fields based on the provided values.
190
+
191
+ **Example:**
192
+
193
+ ```python
194
+ import langfun as lf
195
+ import pyglove as pg
196
+
197
+ class Country(pg.Object):
198
+ name: str
199
+ capital: str = lf.MISSING
200
+ population: int = lf.MISSING
201
+
202
+ # Filling missing fields of Country(name='France')
203
+ country = lf.complete(Country(name='France'), lm=lf.llms.Gemini25Flash())
204
+ print(country)
205
+ # Output: Country(name='France', capital='Paris', population=67000000)
206
+ ```
214
207
 
215
208
  Args:
216
- input_value: A symbolic value that may contain missing values.
217
- default: The default value if parsing failed. If not specified, error will
218
- be raised.
209
+ input_value: A symbolic value that may contain missing values marked
210
+ by `lf.MISSING`.
211
+ default: The default value to return if parsing fails. If
212
+ `lf.RAISE_IF_HAS_ERROR` is used (default), an error will be raised
213
+ instead.
219
214
  lm: The language model to use. If not specified, the language model from
220
215
  `lf.context` context manager will be used.
221
216
  examples: An optional list of fewshot examples for helping parsing. If None,
@@ -231,10 +226,10 @@ def complete(
231
226
  returns_message: If True, returns `lf.Message` as the output, instead of
232
227
  returning the structured `message.result`.
233
228
  **kwargs: Keyword arguments passed to the
234
- `lf.structured.NaturalLanguageToStructureed` transform.
229
+ `lf.structured.Mapping` transform.
235
230
 
236
231
  Returns:
237
- The result based on the schema.
232
+ The input object with missing fields completed by LLM.
238
233
  """
239
234
  t = _CompleteStructure(
240
235
  input=schema_lib.mark_missing(input_value),
@@ -23,7 +23,7 @@ import pyglove as pg
23
23
 
24
24
  @pg.use_init_args(['examples'])
25
25
  class _DescribeStructure(mapping.Mapping):
26
- """Describe a structured value in natural language."""
26
+ """Describes a structured value in natural language."""
27
27
 
28
28
  input_title = 'PYTHON_OBJECT'
29
29
  context_title = 'CONTEXT_FOR_DESCRIPTION'
@@ -47,64 +47,68 @@ def describe(
47
47
  cache_seed: int | None = 0,
48
48
  **kwargs,
49
49
  ) -> str:
50
- """Describes a structured value using natural language.
51
-
52
- Examples:
53
-
54
- ```
55
- class FlightDuration(pg.Object):
56
- hours: int
57
- minutes: int
58
-
59
- class Flight(pg.Object):
60
- airline: str
61
- flight_number: str
62
- departure_airport: str
63
- arrival_airport: str
64
- departure_time: str
65
- arrival_time: str
66
- duration: FlightDuration
67
- stops: int
68
- price: float
69
-
70
- text = lf.describe(
71
- Flight(
72
- airline='United Airlines',
73
- flight_number='UA2631',
74
- depature_airport: 'SFO',
75
- arrival_airport: 'JFK',
76
- depature_time: '2023-09-07T05:15:00',
77
- arrival_time: '2023-09-07T12:12:00',
78
- duration: FlightDuration(
79
- hours=7,
80
- minutes=57
81
- ),
82
- stops=1,
83
- price=227,
84
- ))
85
- print(text)
86
-
87
- >> The flight is operated by United Airlines, has the flight number UA2631,
88
- >> departs from San Francisco International Airport (SFO), arrives at John
89
- >> F. Kennedy International Airport (JFK), It departs at
90
- >> 2023-09-07T05:15:00, arrives at 2023-09-07T12:12:00, has a duration of 7
91
- >> hours and 57 minutes, makes 1 stop, and costs $227.
92
- ```
50
+ """Describes a structured value in natural language using an LLM.
51
+
52
+ `lf.describe` takes a Python object, often a `pg.Object` instance,
53
+ and uses a language model to generate a human-readable, natural language
54
+ description of its content. It is the inverse of `lf.parse`.
55
+
56
+ **Example:**
57
+
58
+ ```python
59
+ import langfun as lf
60
+ import pyglove as pg
61
+
62
+ class FlightDuration(pg.Object):
63
+ hours: int
64
+ minutes: int
65
+
66
+ class Flight(pg.Object):
67
+ airline: str
68
+ flight_number: str
69
+ departure_airport: str
70
+ arrival_airport: str
71
+ departure_time: str
72
+ arrival_time: str
73
+ duration: FlightDuration
74
+ stops: int
75
+ price: float
76
+
77
+ flight_info = Flight(
78
+ airline='United Airlines',
79
+ flight_number='UA2631',
80
+ departure_airport='SFO',
81
+ arrival_airport='JFK',
82
+ departure_time='2023-09-07T05:15:00',
83
+ arrival_time='2023-09-07T12:12:00',
84
+ duration=FlightDuration(hours=7, minutes=57),
85
+ stops=1,
86
+ price=227,
87
+ )
88
+
89
+ description = lf.describe(flight_info, lm=lf.llms.Gemini25Flash())
90
+ print(description)
91
+ # Possible output:
92
+ # The flight is operated by United Airlines, with the flight number UA2631,
93
+ # departing from SFO at 2023-09-07T05:15:00 and arriving at JFK at
94
+ # 2023-09-07T12:12:00. The flight duration is 7 hours and 57 minutes,
95
+ # with 1 stop, and costs $227.
96
+ ```
93
97
 
94
98
  Args:
95
99
  value: A structured value to be mapped.
96
100
  context: The context information for describing the structured value.
97
101
  lm: The language model to use. If not specified, the language model from
98
102
  `lf.context` context manager will be used.
99
- examples: An optional list of fewshot examples for helping parsing. If None,
100
- the default one-shot example will be added.
103
+ examples: An optional list of fewshot examples for guiding description.
104
+ If None, default examples will be used.
101
105
  cache_seed: Seed for computing cache key. The cache key is determined by a
102
106
  tuple of (lm, prompt, cache seed). If None, cache will be disabled for
103
107
  the query even cache is configured by the LM.
104
- **kwargs: Keyword arguments passed to the `lf.structured.DescribeStructure`.
108
+ **kwargs: Keyword arguments passed to the `_DescribeStructure`.
105
109
 
106
110
  Returns:
107
- The parsed result based on the schema.
111
+ A natural language description of the input value.
108
112
  """
109
113
  return _DescribeStructure(
110
114
  input=value,
@@ -115,10 +119,10 @@ def describe(
115
119
 
116
120
 
117
121
  def default_describe_examples() -> list[mapping.MappingExample]:
118
- """Default describe examples."""
122
+ """Returns default examples for `lf.describe`."""
119
123
 
120
124
  class Country(pg.Object):
121
- """A example dataclass for structured mapping."""
125
+ """An example dataclass for structured mapping."""
122
126
 
123
127
  name: str
124
128
  continents: list[