langfun 0.1.2.dev202511030805__py3-none-any.whl → 0.1.2.dev202511050805__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of langfun might be problematic. Click here for more details.

Files changed (77) hide show
  1. langfun/core/agentic/action.py +76 -9
  2. langfun/core/agentic/action_eval.py +9 -2
  3. langfun/core/async_support.py +32 -3
  4. langfun/core/coding/python/correction.py +19 -9
  5. langfun/core/coding/python/execution.py +14 -12
  6. langfun/core/coding/python/generation.py +21 -16
  7. langfun/core/coding/python/sandboxing.py +23 -3
  8. langfun/core/component.py +42 -3
  9. langfun/core/concurrent.py +70 -6
  10. langfun/core/console.py +1 -1
  11. langfun/core/data/conversion/anthropic.py +10 -3
  12. langfun/core/data/conversion/gemini.py +9 -2
  13. langfun/core/data/conversion/openai.py +17 -7
  14. langfun/core/eval/base.py +46 -42
  15. langfun/core/eval/matching.py +5 -2
  16. langfun/core/eval/patching.py +3 -3
  17. langfun/core/eval/scoring.py +4 -3
  18. langfun/core/eval/v2/checkpointing.py +30 -4
  19. langfun/core/eval/v2/evaluation.py +59 -13
  20. langfun/core/eval/v2/example.py +22 -11
  21. langfun/core/eval/v2/experiment.py +51 -8
  22. langfun/core/eval/v2/metric_values.py +23 -3
  23. langfun/core/eval/v2/metrics.py +33 -4
  24. langfun/core/eval/v2/progress.py +9 -1
  25. langfun/core/eval/v2/reporting.py +15 -1
  26. langfun/core/eval/v2/runners.py +27 -7
  27. langfun/core/langfunc.py +45 -130
  28. langfun/core/language_model.py +88 -10
  29. langfun/core/llms/anthropic.py +27 -2
  30. langfun/core/llms/azure_openai.py +29 -17
  31. langfun/core/llms/cache/base.py +22 -2
  32. langfun/core/llms/cache/in_memory.py +48 -7
  33. langfun/core/llms/compositional.py +25 -1
  34. langfun/core/llms/deepseek.py +29 -1
  35. langfun/core/llms/fake.py +32 -1
  36. langfun/core/llms/gemini.py +9 -1
  37. langfun/core/llms/google_genai.py +29 -1
  38. langfun/core/llms/groq.py +27 -2
  39. langfun/core/llms/llama_cpp.py +22 -3
  40. langfun/core/llms/openai.py +29 -1
  41. langfun/core/llms/openai_compatible.py +18 -6
  42. langfun/core/llms/rest.py +12 -1
  43. langfun/core/llms/vertexai.py +39 -6
  44. langfun/core/logging.py +1 -1
  45. langfun/core/mcp/client.py +77 -22
  46. langfun/core/mcp/session.py +90 -10
  47. langfun/core/mcp/tool.py +83 -23
  48. langfun/core/memory.py +1 -0
  49. langfun/core/message.py +75 -11
  50. langfun/core/message_test.py +9 -0
  51. langfun/core/modalities/audio.py +21 -1
  52. langfun/core/modalities/image.py +19 -1
  53. langfun/core/modalities/mime.py +54 -4
  54. langfun/core/modalities/pdf.py +19 -1
  55. langfun/core/modalities/video.py +21 -1
  56. langfun/core/modality.py +66 -5
  57. langfun/core/natural_language.py +1 -1
  58. langfun/core/sampling.py +4 -4
  59. langfun/core/structured/completion.py +32 -37
  60. langfun/core/structured/description.py +54 -50
  61. langfun/core/structured/function_generation.py +29 -12
  62. langfun/core/structured/mapping.py +70 -15
  63. langfun/core/structured/parsing.py +90 -74
  64. langfun/core/structured/parsing_test.py +0 -3
  65. langfun/core/structured/querying.py +201 -130
  66. langfun/core/structured/schema.py +70 -10
  67. langfun/core/structured/schema_generation.py +33 -14
  68. langfun/core/structured/scoring.py +45 -34
  69. langfun/core/structured/tokenization.py +24 -9
  70. langfun/core/subscription.py +2 -2
  71. langfun/core/template.py +139 -40
  72. langfun/core/template_test.py +40 -0
  73. {langfun-0.1.2.dev202511030805.dist-info → langfun-0.1.2.dev202511050805.dist-info}/METADATA +1 -1
  74. {langfun-0.1.2.dev202511030805.dist-info → langfun-0.1.2.dev202511050805.dist-info}/RECORD +77 -77
  75. {langfun-0.1.2.dev202511030805.dist-info → langfun-0.1.2.dev202511050805.dist-info}/WHEEL +0 -0
  76. {langfun-0.1.2.dev202511030805.dist-info → langfun-0.1.2.dev202511050805.dist-info}/licenses/LICENSE +0 -0
  77. {langfun-0.1.2.dev202511030805.dist-info → langfun-0.1.2.dev202511050805.dist-info}/top_level.txt +0 -0
langfun/core/message.py CHANGED
@@ -32,15 +32,49 @@ class Message(
32
32
  pg.Object,
33
33
  pg.views.HtmlTreeView.Extension
34
34
  ):
35
- """Message.
35
+ """Message between users, LLMs and tools.
36
36
 
37
- ``Message`` is the protocol for users and the system to interact with
38
- LLMs. It consists of a text in the form of natural language,
39
- an identifier of the sender, and a dictionary of Python values as structured
40
- meta-data.
37
+ `lf.Message` is the fundamental unit of communication in Langfun. It
38
+ standardizes interactions with LLMs by encapsulating not only text but also
39
+ multi-modal content, as well as the sender's role and structured metadata.
41
40
 
42
- The subclasses of ``Message`` represent messages sent from different roles.
43
- Agents may use the roles to decide the orchastration logic.
41
+ **Key Components:**
42
+
43
+ * **`text`**: The natural language content of the message.
44
+ * **`sender`**: An identifier for the message originator (e.g., 'User',
45
+ 'AI', 'System').
46
+ * **`metadata`**: A dictionary for structured data, such as tool inputs/
47
+ outputs, scores, or other contextual information.
48
+ * **`referred_modalities`**: A dictionary of modality objects (e.g.,
49
+ `lf.Image`, `lf.Audio`) referenced within the message text via placeholders
50
+ like `<<[[image_id]]>>`.
51
+
52
+ Subclasses like `lf.UserMessage`, `lf.AIMessage`, and `lf.ToolMessage`
53
+ represent messages from specific roles, enabling more complex conversational
54
+ flows and agentic behaviors.
55
+
56
+ **Example:**
57
+
58
+ ```python
59
+ import langfun as lf
60
+
61
+ # Creating a user message with an image
62
+ image = lf.Image.from_path('/path/to/image.png')
63
+ user_message = lf.UserMessage(
64
+ f'What is in this image <<[[{image.id}]]>>?',
65
+ referred_modalities=[image])
66
+
67
+ # Creating an AI message with structured results
68
+ ai_message = lf.AIMessage(
69
+ 'It is a cat.',
70
+ metadata=dict(result=dict(label='cat', confidence=0.9)))
71
+
72
+ print(user_message.chunk())
73
+ # Output: ['What is in this image', <lf.Image object>, '?']
74
+
75
+ print(ai_message.result)
76
+ # Output: {'label': 'cat', 'confidence': 0.9}
77
+ ```
44
78
  """
45
79
 
46
80
  #
@@ -239,6 +273,11 @@ class Message(
239
273
  """
240
274
  return MessageConverter.get(format_or_type, **kwargs).to_value(self)
241
275
 
276
+ @classmethod
277
+ def is_convertible(cls, format_or_type: str | Type[Any]) -> bool:
278
+ """Returns True if the value can be converted to a message."""
279
+ return MessageConverter.is_convertible(format_or_type)
280
+
242
281
  @classmethod
243
282
  def convertible_formats(cls) -> list[str]:
244
283
  """Returns supported format for message conversion."""
@@ -412,7 +451,7 @@ class Message(
412
451
  var_name: str,
413
452
  default: Any = None
414
453
  ) -> modality.Modality | None:
415
- """Gets the modality object referred in the message.
454
+ """Returns modality object referred in the message by its variable name.
416
455
 
417
456
  Args:
418
457
  var_name: The referred variable name for the modality object.
@@ -424,7 +463,14 @@ class Message(
424
463
  return self.referred_modalities.get(var_name, default)
425
464
 
426
465
  def chunk(self, text: str | None = None) -> list[str | modality.Modality]:
427
- """Chunk a message into a list of str or modality objects."""
466
+ """Chunks message into a list of text and modality chunks.
467
+
468
+ Args:
469
+ text: The text to chunk. If None, use `self.text`.
470
+
471
+ Returns:
472
+ A list of text and modality chunks.
473
+ """
428
474
  chunks = []
429
475
 
430
476
  def add_text_chunk(text_piece: str) -> None:
@@ -464,7 +510,7 @@ class Message(
464
510
  def from_chunks(
465
511
  cls, chunks: list[str | modality.Modality], separator: str = ' '
466
512
  ) -> 'Message':
467
- """Assembly a message from a list of string or modality objects."""
513
+ """Assembles a message from a list of string or modality objects."""
468
514
  fused_text = io.StringIO()
469
515
  metadata = dict()
470
516
  referred_modalities = dict()
@@ -554,7 +600,7 @@ class Message(
554
600
  return self.trace(Message.TAG_LM_OUTPUT)
555
601
 
556
602
  def last(self, tag: str) -> Optional['Message']:
557
- """Return the last message wih certain tag."""
603
+ """Returns the last message with a given tag."""
558
604
  current = self
559
605
  while current is not None:
560
606
  if tag in current.tags:
@@ -908,6 +954,12 @@ class _MessageConverterRegistry:
908
954
  if converter.OUTPUT_TYPE is not None:
909
955
  self._type_to_converters[converter.OUTPUT_TYPE].append(converter)
910
956
 
957
+ def unregister(self, converter: Type['MessageConverter']) -> None:
958
+ """Unregisters a message converter."""
959
+ self._name_to_converter.pop(converter.FORMAT_ID, None)
960
+ if converter.OUTPUT_TYPE is not None:
961
+ self._type_to_converters[converter.OUTPUT_TYPE].remove(converter)
962
+
911
963
  def get_by_type(self, t: Type[Any], **kwargs) -> 'MessageConverter':
912
964
  """Returns a message converter for the given type."""
913
965
  t = self._type_to_converters[t]
@@ -938,6 +990,13 @@ class _MessageConverterRegistry:
938
990
  assert isinstance(format_or_type, type), format_or_type
939
991
  return self.get_by_type(format_or_type, **kwargs)
940
992
 
993
+ def is_convertible(self, format_or_type: str | Type[Any]) -> bool:
994
+ """Returns whether the message is convertible to the given format or type."""
995
+ if isinstance(format_or_type, str):
996
+ return format_or_type in self._name_to_converter
997
+ assert isinstance(format_or_type, type), format_or_type
998
+ return bool(self._type_to_converters.get(format_or_type))
999
+
941
1000
  def convertible_formats(self) -> list[str]:
942
1001
  """Returns a list of converter names."""
943
1002
  return sorted(list(self._name_to_converter.keys()))
@@ -1029,6 +1088,11 @@ class MessageConverter(pg.Object):
1029
1088
  """Returns a message converter for the given type."""
1030
1089
  return cls._REGISTRY.get_by_type(t, **kwargs)
1031
1090
 
1091
+ @classmethod
1092
+ def is_convertible(cls, format_or_type: str | Type[Any]) -> bool:
1093
+ """Returns whether the message is convertible to the given format or type."""
1094
+ return cls._REGISTRY.is_convertible(format_or_type)
1095
+
1032
1096
  @classmethod
1033
1097
  def convertible_formats(cls) -> list[str]:
1034
1098
  """Returns a list of converter names."""
@@ -500,6 +500,12 @@ class MessageConverterTest(unittest.TestCase):
500
500
  self.assertIn('test_format2', message.Message.convertible_formats())
501
501
  self.assertIn('test_format3', message.Message.convertible_formats())
502
502
 
503
+ self.assertTrue(message.Message.is_convertible(int))
504
+ self.assertFalse(message.Message.is_convertible(dict))
505
+ self.assertTrue(message.Message.is_convertible('test_format1'))
506
+ self.assertTrue(message.Message.is_convertible('test_format2'))
507
+ self.assertTrue(message.Message.is_convertible('test_format3'))
508
+ self.assertFalse(message.Message.is_convertible('test_format4'))
503
509
  self.assertIn(int, message.Message.convertible_types())
504
510
  self.assertIn(tuple, message.Message.convertible_types())
505
511
  self.assertEqual(
@@ -540,6 +546,9 @@ class MessageConverterTest(unittest.TestCase):
540
546
  message.Message.from_value((1, 2, 3)),
541
547
  message.UserMessage('1,2,3')
542
548
  )
549
+ message.MessageConverter._REGISTRY.unregister(TestConverter)
550
+ message.MessageConverter._REGISTRY.unregister(TestConverter2)
551
+ message.MessageConverter._REGISTRY.unregister(TestConverter3)
543
552
 
544
553
  def test_get_role(self):
545
554
  self.assertEqual(
@@ -18,7 +18,27 @@ from langfun.core.modalities import mime
18
18
 
19
19
 
20
20
  class Audio(mime.Mime):
21
- """Audio."""
21
+ """Represents audio for communicating with language models.
22
+
23
+ `lf.Audio` can be initialized from a URI (HTTP/HTTPS URL or local path)
24
+ using `lf.Audio.from_uri()` or from raw bytes using `lf.Audio.from_bytes()`.
25
+
26
+ **Example:**
27
+
28
+ ```python
29
+ import langfun as lf
30
+
31
+ # Load audio from path
32
+ audio = lf.Audio.from_path('/path/to/audio.mp3')
33
+
34
+ # Use audio in a prompt
35
+ prompt = lf.Template(
36
+ 'What is being said in this audio? {{audio}}', audio=audio
37
+ )
38
+ response = lf.query(prompt, lm=lf.llms.Gemini25Flash())
39
+ print(response)
40
+ ```
41
+ """
22
42
 
23
43
  MIME_PREFIX = 'audio'
24
44
 
@@ -33,7 +33,25 @@ except ImportError:
33
33
 
34
34
 
35
35
  class Image(mime.Mime):
36
- """Image."""
36
+ """Represents an image for communicating with language models.
37
+
38
+ `lf.Image` can be initialized from a URI (HTTP/HTTPS URL or local path)
39
+ using `lf.Image.from_uri()` or from raw bytes using `lf.Image.from_bytes()`.
40
+
41
+ **Example:**
42
+
43
+ ```python
44
+ import langfun as lf
45
+
46
+ # Load image from path
47
+ image = lf.Image.from_path('/path/to/image.png')
48
+
49
+ # Use image in a prompt
50
+ prompt = lf.Template('Describe this image: {{image}}', image=image)
51
+ response = lf.query(prompt, lm=lf.llms.Gemini25Flash())
52
+ print(response)
53
+ ```
54
+ """
37
55
 
38
56
  MIME_PREFIX = 'image'
39
57
 
@@ -37,7 +37,33 @@ def _detect_mime_type(content: bytes) -> str:
37
37
 
38
38
 
39
39
  class Mime(lf.Modality):
40
- """Base for MIME data."""
40
+ """Base class for representing modality data based on MIME types.
41
+
42
+ `lf.Mime` is a subclass of `lf.Modality` that serves as a base for
43
+ handling various data types like images, audio, video, and PDFs,
44
+ identified by their MIME types. It provides unified methods for
45
+ loading data from URIs or bytes (`.from_uri()`, `.from_bytes()`) and
46
+ for accessing content (`.to_bytes()`).
47
+
48
+ Subclasses like `lf.Image`, `lf.Audio`, `lf.Video`, and `lf.PDF`
49
+ specialize in handling specific MIME type prefixes (e.g., 'image/', 'audio/').
50
+
51
+ **Example:**
52
+
53
+ ```python
54
+ import langfun as lf
55
+
56
+ # Load an image from a path
57
+ image = lf.Image.from_path('/path/to/image.png')
58
+ print(image.mime_type)
59
+ # Output: image/png
60
+
61
+ # Create a text document
62
+ text = lf.Custom.from_bytes(b'hello world', mime='text/plain')
63
+ print(text.mime_type)
64
+ # Output: text/plain
65
+ ```
66
+ """
41
67
 
42
68
  # The regular expression that describes the MIME type str.
43
69
  # If None, the MIME type is dynamic. Subclass could override.
@@ -49,6 +75,10 @@ class Mime(lf.Modality):
49
75
  Union[str, bytes, None], 'The raw content of the MIME type.'
50
76
  ] = None
51
77
 
78
+ metadata: Annotated[
79
+ dict[str, Any], 'Additional metadata attached to this object.'
80
+ ] = {}
81
+
52
82
  @functools.cached_property
53
83
  def mime_type(self) -> str:
54
84
  """Returns the MIME type."""
@@ -94,7 +124,10 @@ class Mime(lf.Modality):
94
124
  # Hash the URI to avoid downloading the content.
95
125
  if self.uri is not None:
96
126
  return hashlib.md5(self.uri.encode()).hexdigest()[:8]
97
- return super().hash
127
+ if self.content is not None:
128
+ return super().hash
129
+ assert self.metadata
130
+ return hashlib.md5(str(self.metadata).encode()).hexdigest()[:8]
98
131
 
99
132
  def to_text(self) -> str:
100
133
  """Returns the text content of the MIME type."""
@@ -141,7 +174,7 @@ class Mime(lf.Modality):
141
174
 
142
175
  def _on_bound(self):
143
176
  super()._on_bound()
144
- if self.uri is None and self.content is None:
177
+ if self.uri is None and self.content is None and not self.metadata:
145
178
  raise ValueError('Either uri or content must be provided.')
146
179
 
147
180
  def to_bytes(self) -> bytes:
@@ -281,7 +314,24 @@ class Mime(lf.Modality):
281
314
 
282
315
  @pg.use_init_args(['mime', 'content', 'uri'])
283
316
  class Custom(Mime):
284
- """Custom MIME data."""
317
+ """Represents content of a custom MIME type.
318
+
319
+ `lf.modalities.Custom` is useful for representing data with MIME types
320
+ that do not have dedicated classes like `lf.Image` or `lf.Audio`.
321
+
322
+ **Example:**
323
+
324
+ ```python
325
+ import langfun as lf
326
+
327
+ # Create a custom MIME object for plain text
328
+ text_data = lf.Custom.from_bytes(
329
+ b'This is a text document.', mime='text/plain'
330
+ )
331
+ print(text_data.mime_type)
332
+ # Output: text/plain
333
+ ```
334
+ """
285
335
 
286
336
  mime: Annotated[
287
337
  str, 'The MIME type of the data. E.g. text/plain, or image/png. '
@@ -17,6 +17,24 @@ from langfun.core.modalities import mime
17
17
 
18
18
 
19
19
  class PDF(mime.Mime):
20
- """PDF document."""
20
+ """Represents a PDF document for communicating with language models.
21
+
22
+ `lf.PDF` can be initialized from a URI (HTTP/HTTPS URL or local path)
23
+ using `lf.PDF.from_uri()` or from raw bytes using `lf.PDF.from_bytes()`.
24
+
25
+ **Example:**
26
+
27
+ ```python
28
+ import langfun as lf
29
+
30
+ # Load PDF from path
31
+ pdf = lf.PDF.from_path('/path/to/document.pdf')
32
+
33
+ # Use PDF in a prompt
34
+ prompt = lf.Template('Summarize this document: {{pdf}}', pdf=pdf)
35
+ response = lf.query(prompt, lm=lf.llms.Gemini25Flash())
36
+ print(response)
37
+ ```
38
+ """
21
39
 
22
40
  MIME_PREFIX = 'application/pdf'
@@ -18,7 +18,27 @@ from langfun.core.modalities import mime
18
18
 
19
19
 
20
20
  class Video(mime.Mime):
21
- """Video."""
21
+ """Represents a video for communicating with language models.
22
+
23
+ `lf.Video` can be initialized from a URI (HTTP/HTTPS URL or local path)
24
+ using `lf.Video.from_uri()` or from raw bytes using `lf.Video.from_bytes()`.
25
+
26
+ **Example:**
27
+
28
+ ```python
29
+ import langfun as lf
30
+
31
+ # Load video from path
32
+ video = lf.Video.from_path('/path/to/video.mp4')
33
+
34
+ # Use video in a prompt
35
+ prompt = lf.Template(
36
+ 'What is happening in this video? {{video}}', video=video
37
+ )
38
+ response = lf.query(prompt, lm=lf.llms.Gemini25Flash())
39
+ print(response)
40
+ ```
41
+ """
22
42
 
23
43
  MIME_PREFIX = 'video'
24
44
 
langfun/core/modality.py CHANGED
@@ -24,7 +24,35 @@ import pyglove as pg
24
24
 
25
25
 
26
26
  class Modality(component.Component, pg.views.HtmlTreeView.Extension):
27
- """Base class for multimodal object."""
27
+ """Base class for representing non-text content in prompts.
28
+
29
+ `lf.Modality` is the base class for multimodal objects such as `lf.Image`,
30
+ `lf.Audio`, and `lf.Video`. It allows these non-text inputs to be
31
+ seamlessly embedded within text prompts for processing by multimodal
32
+ language models.
33
+
34
+ When a `Modality` object is rendered within an `lf.Template`, it is
35
+ replaced by a text marker (e.g., `<<[[image:b10a8db1]]>>`), and the
36
+ modality object itself is stored in the `referred_modalities` field of
37
+ the resulting `lf.Message`. This allows language models to associate
38
+ the placeholder with its content during processing.
39
+
40
+ **Example:**
41
+
42
+ ```python
43
+ import langfun as lf
44
+
45
+ image = lf.Image.from_path('/path/to/image.png')
46
+ prompt = lf.Template('What is in this image? {{image}}', image=image)
47
+
48
+ message = prompt.render()
49
+ print(message.text)
50
+ # Output: What is in this image? <<[[image:b10a8db1]]>>
51
+
52
+ print(message.modalities())
53
+ # Output: [<Image object>]
54
+ ```
55
+ """
28
56
 
29
57
  REF_START = '<<[['
30
58
  REF_END = ']]>>'
@@ -87,11 +115,44 @@ class Modality(component.Component, pg.views.HtmlTreeView.Extension):
87
115
 
88
116
 
89
117
  class ModalityRef(pg.Object, pg.typing.CustomTyping):
90
- """References of modality objects in a symbolic tree.
118
+ """Lightweight placeholder for a `lf.Modality` object in a symbolic tree.
119
+
120
+ `ModalityRef` acts as a reference to a `Modality` object (like `lf.Image`
121
+ or `lf.Audio`) within a structured object hierarchy (e.g., a `pg.Object`).
122
+ Instead of embedding potentially large modality data directly, `ModalityRef`
123
+ stores only the ID of the modality object.
124
+
125
+ This is useful in scenarios where structured objects are serialized or
126
+ manipulated, and it's more efficient to refer to modalities by ID rather
127
+ than copying their content. The `lf.ModalityRef.placehold()` class method
128
+ can be used to replace `Modality` instances in a symbolic object with
129
+ `ModalityRef` placeholders, while `lf.ModalityRef.restore()` can reinstate
130
+ the original `Modality` objects using a lookup table.
131
+
132
+ **Example:**
133
+
134
+ ```python
135
+ import langfun as lf
136
+ import pyglove as pg
137
+
138
+ class ImagePair(pg.Object):
139
+ image1: lf.Image
140
+ image2: lf.Image
141
+
142
+ pair = ImagePair(
143
+ image1=lf.Image(content=b'abc'), image2=lf.Image(content=b'def')
144
+ )
145
+ modalities = lf.Modality.from_value(pair)
146
+
147
+ # Replace Image objects with ModalityRef placeholders
148
+ pair_with_refs = lf.ModalityRef.placehold(pair)
149
+ print(pair_with_refs.image1)
150
+ # Output: ModalityRef(id='image:d81e5a68')
91
151
 
92
- `ModalityRef` was introduced to placehold modality objects in a symbolic
93
- tree, to prevent message from being chunked in the middle of a Python
94
- structure.
152
+ # Restore Image objects from ModalityRef placeholders
153
+ pair_restored = lf.ModalityRef.restore(pair_with_refs, modalities)
154
+ assert pair_restored.image1.content == b'abc'
155
+ ```
95
156
  """
96
157
 
97
158
  id: str
@@ -11,7 +11,7 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
- """Natural language utilities."""
14
+ """Natural language formatting."""
15
15
 
16
16
  import abc
17
17
  import pyglove as pg
langfun/core/sampling.py CHANGED
@@ -38,10 +38,10 @@ def sweep(
38
38
  Union[message_lib.Message, BaseException, None], # LM output.
39
39
  ],
40
40
  ]:
41
- """Sweeps the input/output of this LangFunc concurrently.
41
+ """Sweeps the input/output of a LangFunc search space concurrently.
42
42
 
43
43
  Args:
44
- lfun: An LangFunc object that contains `pg.oneof` as the search space
44
+ lfun: An LangFunc object that contains `pg.oneof` as the search space
45
45
  for sampling.
46
46
  num_examples: Number of examples to sample.
47
47
  max_workers: Max number of concurrent workers to do sampling.
@@ -84,10 +84,10 @@ def random_sample(
84
84
  Union[message_lib.Message, BaseException, None], # LM output.
85
85
  ],
86
86
  ]:
87
- """Random samples the input/output of this LangFunc concurrently.
87
+ """Random samples the input/output of a LangFunc search space concurrently.
88
88
 
89
89
  Args:
90
- lfun: An LangFunc object that contains `pg.oneof` as the search space
90
+ lfun: An LangFunc object that contains `pg.oneof` as the search space
91
91
  for sampling.
92
92
  num_examples: Number of examples to sample.
93
93
  max_workers: Max number of concurrent workers to do sampling.
@@ -116,7 +116,7 @@ class _CompleteStructure(mapping.Mapping):
116
116
  )
117
117
 
118
118
  def postprocess_result(self, result: Any) -> Any:
119
- """Postprocess result."""
119
+ """Postprocesses result."""
120
120
  # Try restore modality objects from the input value to output value.
121
121
  if modalities := self.modalities(self.input):
122
122
  result = lf.ModalityRef.restore(result, modalities)
@@ -151,7 +151,7 @@ class _CompleteStructure(mapping.Mapping):
151
151
  #
152
152
 
153
153
  def has_modality_refs(self, value: Any) -> bool:
154
- """Returns true if the value has modalities."""
154
+ """Returns True if the value has modalities."""
155
155
  return not isinstance(value, lf.Modality) and pg.contains(
156
156
  value, type=lf.Modality
157
157
  )
@@ -181,41 +181,36 @@ def complete(
181
181
  returns_message: bool = False,
182
182
  **kwargs,
183
183
  ) -> Any:
184
- """Complete a symbolic value by filling its missing fields.
185
-
186
- Examples:
187
-
188
- ```
189
- class FlightDuration:
190
- hours: int
191
- minutes: int
192
-
193
- class Flight(pg.Object):
194
- airline: str
195
- flight_number: str
196
- departure_airport_code: str
197
- arrival_airport_code: str
198
- departure_time: str
199
- arrival_time: str
200
- duration: FlightDuration
201
- stops: int
202
- price: float
203
-
204
- prompt = '''
205
- Information about flight UA2631.
206
- '''
207
-
208
- r = lf.query(prompt, Flight)
209
- assert isinstance(r, Flight)
210
- assert r.airline == 'United Airlines'
211
- assert r.departure_airport_code == 'SFO'
212
- assert r.duration.hour = 7
213
- ```
184
+ """Completes a symbolic value by filling its missing fields using an LLM.
185
+
186
+ `lf.complete` is used to fill in missing information in structured
187
+ data. It takes a partially defined `pg.Object` instance where some fields
188
+ are marked as `lf.MISSING`, and uses a language model to infer and
189
+ populate those fields based on the provided values.
190
+
191
+ **Example:**
192
+
193
+ ```python
194
+ import langfun as lf
195
+ import pyglove as pg
196
+
197
+ class Country(pg.Object):
198
+ name: str
199
+ capital: str = lf.MISSING
200
+ population: int = lf.MISSING
201
+
202
+ # Filling missing fields of Country(name='France')
203
+ country = lf.complete(Country(name='France'), lm=lf.llms.Gemini25Flash())
204
+ print(country)
205
+ # Output: Country(name='France', capital='Paris', population=67000000)
206
+ ```
214
207
 
215
208
  Args:
216
- input_value: A symbolic value that may contain missing values.
217
- default: The default value if parsing failed. If not specified, error will
218
- be raised.
209
+ input_value: A symbolic value that may contain missing values marked
210
+ by `lf.MISSING`.
211
+ default: The default value to return if parsing fails. If
212
+ `lf.RAISE_IF_HAS_ERROR` is used (default), an error will be raised
213
+ instead.
219
214
  lm: The language model to use. If not specified, the language model from
220
215
  `lf.context` context manager will be used.
221
216
  examples: An optional list of fewshot examples for helping parsing. If None,
@@ -231,10 +226,10 @@ def complete(
231
226
  returns_message: If True, returns `lf.Message` as the output, instead of
232
227
  returning the structured `message.result`.
233
228
  **kwargs: Keyword arguments passed to the
234
- `lf.structured.NaturalLanguageToStructureed` transform.
229
+ `lf.structured.Mapping` transform.
235
230
 
236
231
  Returns:
237
- The result based on the schema.
232
+ The input object with missing fields completed by LLM.
238
233
  """
239
234
  t = _CompleteStructure(
240
235
  input=schema_lib.mark_missing(input_value),