PyPI - langfun - Versions diffs - 0.1.2.dev202509120804__py3-none-any.whl → 0.1.2.dev202512150805__py3-none-any.whl - Mend

langfun 0.1.2.dev202509120804py3-none-any.whl → 0.1.2.dev202512150805py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (162) hide show

langfun/__init__.py +1 -1
langfun/core/__init__.py +7 -1
langfun/core/agentic/__init__.py +8 -1
langfun/core/agentic/action.py +740 -112
langfun/core/agentic/action_eval.py +9 -2
langfun/core/agentic/action_test.py +189 -24
langfun/core/async_support.py +104 -5
langfun/core/async_support_test.py +23 -0
langfun/core/coding/python/correction.py +19 -9
langfun/core/coding/python/execution.py +14 -12
langfun/core/coding/python/generation.py +21 -16
langfun/core/coding/python/sandboxing.py +23 -3
langfun/core/component.py +42 -3
langfun/core/concurrent.py +70 -6
langfun/core/concurrent_test.py +9 -2
langfun/core/console.py +1 -1
langfun/core/data/conversion/anthropic.py +12 -3
langfun/core/data/conversion/anthropic_test.py +8 -6
langfun/core/data/conversion/gemini.py +11 -2
langfun/core/data/conversion/gemini_test.py +48 -9
langfun/core/data/conversion/openai.py +145 -31
langfun/core/data/conversion/openai_test.py +161 -17
langfun/core/eval/base.py +48 -44
langfun/core/eval/base_test.py +5 -5
langfun/core/eval/matching.py +5 -2
langfun/core/eval/patching.py +3 -3
langfun/core/eval/scoring.py +4 -3
langfun/core/eval/v2/__init__.py +3 -0
langfun/core/eval/v2/checkpointing.py +148 -46
langfun/core/eval/v2/checkpointing_test.py +9 -2
langfun/core/eval/v2/config_saver.py +37 -0
langfun/core/eval/v2/config_saver_test.py +36 -0
langfun/core/eval/v2/eval_test_helper.py +104 -3
langfun/core/eval/v2/evaluation.py +102 -19
langfun/core/eval/v2/evaluation_test.py +9 -3
langfun/core/eval/v2/example.py +50 -40
langfun/core/eval/v2/example_test.py +16 -8
langfun/core/eval/v2/experiment.py +95 -20
langfun/core/eval/v2/experiment_test.py +19 -0
langfun/core/eval/v2/metric_values.py +31 -3
langfun/core/eval/v2/metric_values_test.py +32 -0
langfun/core/eval/v2/metrics.py +157 -44
langfun/core/eval/v2/metrics_test.py +39 -18
langfun/core/eval/v2/progress.py +31 -1
langfun/core/eval/v2/progress_test.py +27 -0
langfun/core/eval/v2/progress_tracking.py +13 -5
langfun/core/eval/v2/progress_tracking_test.py +9 -1
langfun/core/eval/v2/reporting.py +88 -71
langfun/core/eval/v2/reporting_test.py +24 -6
langfun/core/eval/v2/runners/__init__.py +30 -0
langfun/core/eval/v2/{runners.py → runners/base.py} +73 -180
langfun/core/eval/v2/runners/beam.py +354 -0
langfun/core/eval/v2/runners/beam_test.py +153 -0
langfun/core/eval/v2/runners/ckpt_monitor.py +350 -0
langfun/core/eval/v2/runners/ckpt_monitor_test.py +213 -0
langfun/core/eval/v2/runners/debug.py +40 -0
langfun/core/eval/v2/runners/debug_test.py +76 -0
langfun/core/eval/v2/runners/parallel.py +243 -0
langfun/core/eval/v2/runners/parallel_test.py +182 -0
langfun/core/eval/v2/runners/sequential.py +47 -0
langfun/core/eval/v2/runners/sequential_test.py +169 -0
langfun/core/langfunc.py +45 -130
langfun/core/langfunc_test.py +7 -5
langfun/core/language_model.py +189 -36
langfun/core/language_model_test.py +54 -3
langfun/core/llms/__init__.py +14 -1
langfun/core/llms/anthropic.py +157 -2
langfun/core/llms/azure_openai.py +29 -17
langfun/core/llms/cache/base.py +25 -3
langfun/core/llms/cache/in_memory.py +48 -7
langfun/core/llms/cache/in_memory_test.py +14 -4
langfun/core/llms/compositional.py +25 -1
langfun/core/llms/deepseek.py +30 -2
langfun/core/llms/fake.py +32 -1
langfun/core/llms/gemini.py +90 -12
langfun/core/llms/gemini_test.py +110 -0
langfun/core/llms/google_genai.py +52 -1
langfun/core/llms/groq.py +28 -3
langfun/core/llms/llama_cpp.py +23 -4
langfun/core/llms/openai.py +120 -3
langfun/core/llms/openai_compatible.py +148 -27
langfun/core/llms/openai_compatible_test.py +207 -20
langfun/core/llms/openai_test.py +0 -2
langfun/core/llms/rest.py +16 -1
langfun/core/llms/vertexai.py +78 -8
langfun/core/logging.py +1 -1
langfun/core/mcp/__init__.py +10 -0
langfun/core/mcp/client.py +177 -0
langfun/core/mcp/client_test.py +71 -0
langfun/core/mcp/session.py +241 -0
langfun/core/mcp/session_test.py +54 -0
langfun/core/mcp/testing/simple_mcp_client.py +33 -0
langfun/core/mcp/testing/simple_mcp_server.py +33 -0
langfun/core/mcp/tool.py +254 -0
langfun/core/mcp/tool_test.py +197 -0
langfun/core/memory.py +1 -0
langfun/core/message.py +160 -55
langfun/core/message_test.py +65 -81
langfun/core/modalities/__init__.py +8 -0
langfun/core/modalities/audio.py +21 -1
langfun/core/modalities/image.py +73 -3
langfun/core/modalities/image_test.py +116 -0
langfun/core/modalities/mime.py +78 -4
langfun/core/modalities/mime_test.py +59 -0
langfun/core/modalities/pdf.py +19 -1
langfun/core/modalities/video.py +21 -1
langfun/core/modality.py +167 -29
langfun/core/modality_test.py +42 -12
langfun/core/natural_language.py +1 -1
langfun/core/sampling.py +4 -4
langfun/core/sampling_test.py +20 -4
langfun/core/structured/__init__.py +2 -24
langfun/core/structured/completion.py +34 -44
langfun/core/structured/completion_test.py +23 -43
langfun/core/structured/description.py +54 -50
langfun/core/structured/function_generation.py +29 -12
langfun/core/structured/mapping.py +81 -37
langfun/core/structured/parsing.py +95 -79
langfun/core/structured/parsing_test.py +0 -3
langfun/core/structured/querying.py +230 -154
langfun/core/structured/querying_test.py +69 -33
langfun/core/structured/schema/__init__.py +49 -0
langfun/core/structured/schema/base.py +664 -0
langfun/core/structured/schema/base_test.py +531 -0
langfun/core/structured/schema/json.py +174 -0
langfun/core/structured/schema/json_test.py +121 -0
langfun/core/structured/schema/python.py +316 -0
langfun/core/structured/schema/python_test.py +410 -0
langfun/core/structured/schema_generation.py +33 -14
langfun/core/structured/scoring.py +47 -36
langfun/core/structured/tokenization.py +26 -11
langfun/core/subscription.py +2 -2
langfun/core/template.py +175 -50
langfun/core/template_test.py +123 -17
langfun/env/__init__.py +43 -0
langfun/env/base_environment.py +827 -0
langfun/env/base_environment_test.py +473 -0
langfun/env/base_feature.py +304 -0
langfun/env/base_feature_test.py +228 -0
langfun/env/base_sandbox.py +842 -0
langfun/env/base_sandbox_test.py +1235 -0
langfun/env/event_handlers/__init__.py +14 -0
langfun/env/event_handlers/chain.py +233 -0
langfun/env/event_handlers/chain_test.py +253 -0
langfun/env/event_handlers/event_logger.py +472 -0
langfun/env/event_handlers/event_logger_test.py +304 -0
langfun/env/event_handlers/metric_writer.py +726 -0
langfun/env/event_handlers/metric_writer_test.py +214 -0
langfun/env/interface.py +1640 -0
langfun/env/interface_test.py +153 -0
langfun/env/load_balancers.py +59 -0
langfun/env/load_balancers_test.py +141 -0
langfun/env/test_utils.py +507 -0
{langfun-0.1.2.dev202509120804.dist-info → langfun-0.1.2.dev202512150805.dist-info}/METADATA +7 -3
langfun-0.1.2.dev202512150805.dist-info/RECORD +217 -0
langfun/core/eval/v2/runners_test.py +0 -343
langfun/core/structured/schema.py +0 -987
langfun/core/structured/schema_test.py +0 -982
langfun-0.1.2.dev202509120804.dist-info/RECORD +0 -172
{langfun-0.1.2.dev202509120804.dist-info → langfun-0.1.2.dev202512150805.dist-info}/WHEEL +0 -0
{langfun-0.1.2.dev202509120804.dist-info → langfun-0.1.2.dev202512150805.dist-info}/licenses/LICENSE +0 -0
{langfun-0.1.2.dev202509120804.dist-info → langfun-0.1.2.dev202512150805.dist-info}/top_level.txt +0 -0

langfun/core/modalities/image_test.py CHANGED Viewed

@@ -103,6 +103,122 @@ class ImageTest(unittest.TestCase):
         image_lib.Image.from_pil_image(image), image_lib.Image
     )
+  def test_from_pil_image_os_error(self):
+    img = pil_image.open(io.BytesIO(image_content))
+    with mock.patch.object(img, 'save') as mock_save:
+      mock_save.side_effect = [OSError, None]
+      with mock.patch('os.chdir') as mock_chdir:
+        with mock.patch('os.getcwd') as mock_getcwd:
+          mock_getcwd.return_value = '/curr/dir'
+          image = image_lib.Image.from_pil_image(img)
+          self.assertIsInstance(image, image_lib.Image)
+          self.assertEqual(mock_save.call_count, 2)
+          mock_save.assert_has_calls([
+              mock.call(mock.ANY, format='PNG'),
+              mock.call(mock.ANY, format='PNG'),
+          ])
+          mock_chdir.assert_has_calls([
+              mock.call('/tmp'),
+              mock.call('/curr/dir'),
+          ])
+  def test_gif_is_compatible(self):
+    # Create a simple 1x1 GIF image using PIL
+    buf = io.BytesIO()
+    img = pil_image.new('P', (1, 1))
+    img.save(buf, format='GIF')
+    gif_bytes = buf.getvalue()
+    gif_image = image_lib.Image.from_bytes(gif_bytes)
+    self.assertEqual(gif_image.mime_type, 'image/gif')
+    # GIF should be compatible if PNG is in supported types
+    self.assertTrue(gif_image._is_compatible(['image/png']))
+    self.assertTrue(gif_image._is_compatible(['image/jpeg', 'image/webp']))
+    self.assertTrue(gif_image._is_compatible(['image/png', 'image/jpeg']))
+    # GIF should not be compatible if only unsupported types
+    self.assertFalse(gif_image._is_compatible(['video/mp4']))
+    self.assertFalse(gif_image._is_compatible(['application/pdf']))
+  def test_gif_make_compatible(self):
+    # Create a simple 1x1 GIF image using PIL
+    buf = io.BytesIO()
+    img = pil_image.new('P', (1, 1))
+    img.save(buf, format='GIF')
+    gif_bytes = buf.getvalue()
+    gif_image = image_lib.Image.from_bytes(gif_bytes)
+    self.assertEqual(gif_image.mime_type, 'image/gif')
+    # Test 1: Convert to PNG (first priority when available)
+    converted = gif_image.make_compatible(['image/png', 'image/jpeg'])
+    self.assertEqual(converted.mime_type, 'image/png')
+    self.assertIsInstance(converted, image_lib.Image)
+    # Test 2: Convert to JPEG when PNG not supported
+    converted = gif_image.make_compatible(['image/jpeg', 'image/webp'])
+    self.assertEqual(converted.mime_type, 'image/jpeg')
+    # Test 3: Convert to WEBP when PNG and JPEG not supported
+    converted = gif_image.make_compatible(['image/webp'])
+    self.assertEqual(converted.mime_type, 'image/webp')
+    # Test 4: Should raise error when no compatible format
+    with self.assertRaises(lf.ModalityError):
+      gif_image.make_compatible(['video/mp4'])
+  def test_is_compatible_direct_match(self):
+    image = image_lib.Image.from_bytes(image_content)  # image/png
+    self.assertTrue(image._is_compatible(['image/png', 'image/jpeg']))
+    self.assertTrue(image._is_compatible(['image/png']))
+    self.assertFalse(image._is_compatible(['image/jpeg']))
+  def test_make_compatible_no_conversion(self):
+    image = image_lib.Image.from_bytes(image_content)  # image/png
+    converted_image = image.make_compatible(['image/png', 'image/jpeg'])
+    self.assertIs(image, converted_image)
+  def test_convert_to_format_jpeg_transparency(self):
+    # Create a simple RGBA PNG image
+    buf = io.BytesIO()
+    img = pil_image.new('RGBA', (1, 1), (255, 0, 0, 128))
+    img.save(buf, format='PNG')
+    rgba_png_bytes = buf.getvalue()
+    rgba_image = image_lib.Image.from_bytes(rgba_png_bytes)
+    self.assertEqual(rgba_image.mime_type, 'image/png')
+    # Convert to JPEG, should trigger transparency handling
+    converted_image = rgba_image._convert_to_format('JPEG')
+    self.assertEqual(converted_image.mime_type, 'image/jpeg')
+    pil_img = converted_image.to_pil_image()
+    self.assertEqual(pil_img.mode, 'RGB')
+  def test_convert_to_format_os_error(self):
+    image = image_lib.Image.from_bytes(image_content)
+    mock_pil_image = mock.MagicMock()
+    mock_save = mock_pil_image.save
+    mock_save.side_effect = [OSError, None]
+    with mock.patch.object(
+        image, 'to_pil_image', return_value=mock_pil_image
+    ), mock.patch('os.chdir') as mock_chdir, mock.patch(
+        'os.getcwd'
+    ) as mock_getcwd:
+      mock_getcwd.return_value = '/curr/dir'
+      converted_image = image._convert_to_format('PNG')
+      self.assertIsInstance(converted_image, image_lib.Image)
+      self.assertEqual(mock_save.call_count, 2)
+      mock_save.assert_has_calls([
+          mock.call(mock.ANY, format='PNG'),
+          mock.call(mock.ANY, format='PNG'),
+      ])
+      mock_chdir.assert_has_calls([
+          mock.call('/tmp'),
+          mock.call('/curr/dir'),
+      ])
 if __name__ == '__main__':
   unittest.main()

langfun/core/modalities/mime.py CHANGED Viewed

@@ -15,6 +15,7 @@
 import base64
 import functools
+import hashlib
 from typing import Annotated, Any, Iterable, Type, Union
 import langfun.core as lf
 # Placeholder for Google-internal internet access import.
@@ -36,7 +37,33 @@ def _detect_mime_type(content: bytes) -> str:
 class Mime(lf.Modality):
-  """Base for MIME data."""
+  """Base class for representing modality data based on MIME types.
+  `lf.Mime` is a subclass of `lf.Modality` that serves as a base for
+  handling various data types like images, audio, video, and PDFs,
+  identified by their MIME types. It provides unified methods for
+  loading data from URIs or bytes (`.from_uri()`, `.from_bytes()`) and
+  for accessing content (`.to_bytes()`).
+  Subclasses like `lf.Image`, `lf.Audio`, `lf.Video`, and `lf.PDF`
+  specialize in handling specific MIME type prefixes (e.g., 'image/', 'audio/').
+  **Example:**
+  ```python
+  import langfun as lf
+  # Load an image from a path
+  image = lf.Image.from_path('/path/to/image.png')
+  print(image.mime_type)
+  # Output: image/png
+  # Create a text document
+  text = lf.Custom.from_bytes(b'hello world', mime='text/plain')
+  print(text.mime_type)
+  # Output: text/plain
+  ```
+  """
   # The regular expression that describes the MIME type str.
   # If None, the MIME type is dynamic. Subclass could override.
@@ -48,6 +75,10 @@ class Mime(lf.Modality):
       Union[str, bytes, None], 'The raw content of the MIME type.'
   ] = None
+  metadata: Annotated[
+      dict[str, Any], 'Additional metadata attached to this object.'
+  ] = {}
   @functools.cached_property
   def mime_type(self) -> str:
     """Returns the MIME type."""
@@ -87,13 +118,37 @@ class Mime(lf.Modality):
     """Returns True if the MIME type is a binary type."""
     return not self.is_text
+  @property
+  def hash(self) -> str:
+    """Returns the hash of the MIME content."""
+    # Hash the URI to avoid downloading the content.
+    if self.uri is not None:
+      return hashlib.md5(self.uri.encode()).hexdigest()[:8]
+    if self.content is not None:
+      return super().hash
+    assert self.metadata
+    return hashlib.md5(str(self.metadata).encode()).hexdigest()[:8]
   def to_text(self) -> str:
     """Returns the text content of the MIME type."""
     if not self.is_text:
       raise lf.ModalityError(
           f'MIME type {self.mime_type!r} cannot be converted to text.'
       )
-    return self.to_bytes().decode()
+    content = self.to_bytes()
+    # Try UTF-8 first (most common encoding).
+    try:
+      return content.decode('utf-8')
+    except UnicodeDecodeError:
+      pass
+    # Check for UTF-16 BOM (0xff 0xfe or 0xfe 0xff).
+    if content[:2] in (b'\xff\xfe', b'\xfe\xff'):
+      try:
+        return content.decode('utf-16')
+      except UnicodeDecodeError:
+        pass
+    # Fallback: decode with error replacement to avoid crashing.
+    return content.decode('utf-8', errors='replace')
   def is_compatible(
       self, mime_types: str | Iterable[str]
@@ -132,7 +187,7 @@ class Mime(lf.Modality):
   def _on_bound(self):
     super()._on_bound()
-    if self.uri is None and self.content is None:
+    if self.uri is None and self.content is None and not self.metadata:
       raise ValueError('Either uri or content must be provided.')
   def to_bytes(self) -> bytes:
@@ -162,6 +217,8 @@ class Mime(lf.Modality):
       return cls.class_from_mime_type(mime_type).from_bytes(content, **kwargs)
     if cls is Mime:
+      if 'youtube.com/watch' in uri:
+        return Custom(mime='text/html', uri=uri, **kwargs)
       content = cls.download(uri)
       mime = _detect_mime_type(content)
       return cls.class_from_mime_type(mime)(uri=uri, content=content, **kwargs)
@@ -272,7 +329,24 @@ class Mime(lf.Modality):
 @pg.use_init_args(['mime', 'content', 'uri'])
 class Custom(Mime):
-  """Custom MIME data."""
+  """Represents content of a custom MIME type.
+  `lf.modalities.Custom` is useful for representing data with MIME types
+  that do not have dedicated classes like `lf.Image` or `lf.Audio`.
+  **Example:**
+  ```python
+  import langfun as lf
+  # Create a custom MIME object for plain text
+  text_data = lf.Custom.from_bytes(
+      b'This is a text document.', mime='text/plain'
+  )
+  print(text_data.mime_type)
+  # Output: text/plain
+  ```
+  """
   mime: Annotated[
       str, 'The MIME type of the data. E.g. text/plain, or image/png. '

langfun/core/modalities/mime_test.py CHANGED Viewed

@@ -109,6 +109,17 @@ class CustomMimeTest(unittest.TestCase):
     with self.assertRaisesRegex(ValueError, 'Unsupported encoding'):
       mime.Mime.from_uri('data:text/plain;base16,abcd')
+    # Test YouTube URI
+    yt_uri = 'https://www.youtube.com/watch?v=dQw4w9WgXcQ'
+    with mock.patch(
+        'langfun.core.modalities.mime.Mime.download'
+    ) as mock_download:
+      content = mime.Mime.from_uri(yt_uri)
+      self.assertIsInstance(content, mime.Custom)
+      self.assertEqual(content.mime_type, 'text/html')
+      self.assertEqual(content.uri, yt_uri)
+      mock_download.assert_not_called()
   def assert_html_content(self, html, expected):
     expected = inspect.cleandoc(expected).strip()
     actual = html.content.strip()
@@ -152,5 +163,53 @@ class CustomMimeTest(unittest.TestCase):
     )
+class ToTextEncodingTest(unittest.TestCase):
+  """Tests for to_text() encoding handling."""
+  def test_utf8_decoding(self):
+    """Test that valid UTF-8 content is decoded correctly."""
+    content = mime.Custom('text/plain', b'Hello, World!')
+    self.assertEqual(content.to_text(), 'Hello, World!')
+    # UTF-8 with multi-byte characters.
+    utf8_content = 'こんにちは'.encode('utf-8')
+    content = mime.Custom('text/plain', utf8_content)
+    self.assertEqual(content.to_text(), 'こんにちは')
+  def test_utf16_le_bom_decoding(self):
+    """Test that UTF-16 Little Endian with BOM is decoded correctly."""
+    # UTF-16 LE BOM: 0xff 0xfe
+    utf16_le_content = 'Hello'.encode('utf-16-le')
+    content_with_bom = b'\xff\xfe' + utf16_le_content
+    content = mime.Custom('text/plain', content_with_bom)
+    self.assertEqual(content.to_text(), 'Hello')
+  def test_utf16_be_bom_decoding(self):
+    """Test that UTF-16 Big Endian with BOM is decoded correctly."""
+    # UTF-16 BE BOM: 0xfe 0xff
+    utf16_be_content = 'Hello'.encode('utf-16-be')
+    content_with_bom = b'\xfe\xff' + utf16_be_content
+    content = mime.Custom('text/plain', content_with_bom)
+    self.assertEqual(content.to_text(), 'Hello')
+  def test_invalid_bytes_fallback_with_replacement(self):
+    """Test that invalid bytes are replaced with replacement character."""
+    # 0xff alone is invalid in UTF-8 and doesn't have UTF-16 BOM pattern.
+    invalid_content = b'\xff\xfdHello'
+    content = mime.Custom('text/plain', invalid_content)
+    result = content.to_text()
+    # Invalid bytes should be replaced with U+FFFD (replacement character).
+    self.assertIn('\ufffd', result)
+    self.assertIn('Hello', result)
+  def test_binary_mime_type_raises_error(self):
+    """Test that binary MIME types raise ModalityError."""
+    content = mime.Custom('application/octet-stream', b'\x00\x01\x02')
+    with self.assertRaisesRegex(
+        lf.ModalityError, 'cannot be converted to text'
+    ):
+      content.to_text()
 if __name__ == '__main__':
   unittest.main()

langfun/core/modalities/pdf.py CHANGED Viewed

@@ -17,6 +17,24 @@ from langfun.core.modalities import mime
 class PDF(mime.Mime):
-  """PDF document."""
+  """Represents a PDF document for communicating with language models.
+  `lf.PDF` can be initialized from a URI (HTTP/HTTPS URL or local path)
+  using `lf.PDF.from_uri()` or from raw bytes using `lf.PDF.from_bytes()`.
+  **Example:**
+  ```python
+  import langfun as lf
+  # Load PDF from path
+  pdf = lf.PDF.from_path('/path/to/document.pdf')
+  # Use PDF in a prompt
+  prompt = lf.Template('Summarize this document: {{pdf}}', pdf=pdf)
+  response = lf.query(prompt, lm=lf.llms.Gemini25Flash())
+  print(response)
+  ```
+  """
   MIME_PREFIX = 'application/pdf'

langfun/core/modalities/video.py CHANGED Viewed

@@ -18,7 +18,27 @@ from langfun.core.modalities import mime
 class Video(mime.Mime):
-  """Video."""
+  """Represents a video for communicating with language models.
+  `lf.Video` can be initialized from a URI (HTTP/HTTPS URL or local path)
+  using `lf.Video.from_uri()` or from raw bytes using `lf.Video.from_bytes()`.
+  **Example:**
+  ```python
+  import langfun as lf
+  # Load video from path
+  video = lf.Video.from_path('/path/to/video.mp4')
+  # Use video in a prompt
+  prompt = lf.Template(
+      'What is happening in this video? {{video}}', video=video
+  )
+  response = lf.query(prompt, lm=lf.llms.Gemini25Flash())
+  print(response)
+  ```
+  """
   MIME_PREFIX = 'video'

langfun/core/modality.py CHANGED Viewed

@@ -14,40 +14,63 @@
 """Interface for modality (e.g. Image, Video, etc.)."""
 import abc
+import contextlib
 import functools
 import hashlib
-from typing import Any, ContextManager
+import re
+from typing import Any, ContextManager, Iterator
 from langfun.core import component
 import pyglove as pg
-_TLS_MODALITY_AS_REF = '__format_modality_as_ref__'
+class Modality(component.Component, pg.views.HtmlTreeView.Extension):
+  """Base class for representing non-text content in prompts.
+  `lf.Modality` is the base class for multimodal objects such as `lf.Image`,
+  `lf.Audio`, and `lf.Video`. It allows these non-text inputs to be
+  seamlessly embedded within text prompts for processing by multimodal
+  language models.
-def format_modality_as_ref(enabled: bool = True) -> ContextManager[None]:
-  """A context manager that formats modality objects as references."""
-  return pg.object_utils.thread_local_value_scope(
-      _TLS_MODALITY_AS_REF, enabled, False
-  )
+  When a `Modality` object is rendered within an `lf.Template`, it is
+  replaced by a text marker (e.g., `<<[[image:b10a8db1]]>>`), and the
+  modality object itself is stored in the `referred_modalities` field of
+  the resulting `lf.Message`. This allows language models to associate
+  the placeholder with its content during processing.
+  **Example:**
-class Modality(component.Component, pg.views.HtmlTreeView.Extension):
-  """Base class for multimodal object."""
+  ```python
+  import langfun as lf
+  image = lf.Image.from_path('/path/to/image.png')
+  prompt = lf.Template('What is in this image? {{image}}', image=image)
+  message = prompt.render()
+  print(message.text)
+  # Output: What is in this image? <<[[image:b10a8db1]]>>
+  print(message.modalities())
+  # Output: [<Image object>]
+  ```
+  """
   REF_START = '<<[['
   REF_END = ']]>>'
   def _on_bound(self):
     super()._on_bound()
-    # Invalidate cached hash if modality member is changed.
+    # Invalidate cached hash and id if modality member is changed.
     self.__dict__.pop('hash', None)
+    self.__dict__.pop('id', None)
   def format(self, *args, **kwargs) -> str:
-    if self.referred_name is None or not pg.object_utils.thread_local_get(
-        _TLS_MODALITY_AS_REF, False
-    ):
+    if not pg.object_utils.thread_local_get(_TLS_MODALITY_AS_REF, False):
       return super().format(*args, **kwargs)
-    return Modality.text_marker(self.referred_name)
+    capture_scope = get_modality_capture_context()
+    if capture_scope is not None:
+      capture_scope.capture(self)
+    return Modality.text_marker(self.id)
   def __str_kwargs__(self) -> dict[str, Any]:
     # For modality objects, we don't want to use markdown format when they
@@ -70,14 +93,11 @@ class Modality(component.Component, pg.views.HtmlTreeView.Extension):
     """Returns a marker in the text for this object."""
     return Modality.REF_START + var_name + Modality.REF_END
-  @property
-  def referred_name(self) -> str | None:
+  @functools.cached_property
+  def id(self) -> str | None:
     """Returns the referred name of this object in its template."""
-    if not self.sym_path:
-      return None
-    # Strip the metadata prefix under message.
-    path = str(self.sym_path)
-    return path[9:] if path.startswith('metadata.') else path
+    modality_type = _camel_to_snake(self.__class__.__name__)
+    return f'{modality_type}:{self.hash}'
   @classmethod
   def from_value(cls, value: pg.Symbolic) -> dict[str, 'Modality']:
@@ -86,7 +106,7 @@ class Modality(component.Component, pg.views.HtmlTreeView.Extension):
     def _visit(k, v, p):
       del k, p
       if isinstance(v, Modality):
-        modalities[v.referred_name] = v
+        modalities[v.id] = v
         return pg.TraverseAction.CONTINUE
       return pg.TraverseAction.ENTER
@@ -95,14 +115,47 @@ class Modality(component.Component, pg.views.HtmlTreeView.Extension):
 class ModalityRef(pg.Object, pg.typing.CustomTyping):
-  """References of modality objects in a symbolic tree.
+  """Lightweight placeholder for a `lf.Modality` object in a symbolic tree.
-  `ModalityRef` was introduced to placehold modality objects in a symbolic
-  tree, to prevent message from being chunked in the middle of a Python
-  structure.
+  `ModalityRef` acts as a reference to a `Modality` object (like `lf.Image`
+  or `lf.Audio`) within a structured object hierarchy (e.g., a `pg.Object`).
+  Instead of embedding potentially large modality data directly, `ModalityRef`
+  stores only the ID of the modality object.
+  This is useful in scenarios where structured objects are serialized or
+  manipulated, and it's more efficient to refer to modalities by ID rather
+  than copying their content. The `lf.ModalityRef.placehold()` class method
+  can be used to replace `Modality` instances in a symbolic object with
+  `ModalityRef` placeholders, while `lf.ModalityRef.restore()` can reinstate
+  the original `Modality` objects using a lookup table.
+  **Example:**
+  ```python
+  import langfun as lf
+  import pyglove as pg
+  class ImagePair(pg.Object):
+    image1: lf.Image
+    image2: lf.Image
+  pair = ImagePair(
+      image1=lf.Image(content=b'abc'), image2=lf.Image(content=b'def')
+  )
+  modalities = lf.Modality.from_value(pair)
+  # Replace Image objects with ModalityRef placeholders
+  pair_with_refs = lf.ModalityRef.placehold(pair)
+  print(pair_with_refs.image1)
+  # Output: ModalityRef(id='image:d81e5a68')
+  # Restore Image objects from ModalityRef placeholders
+  pair_restored = lf.ModalityRef.restore(pair_with_refs, modalities)
+  assert pair_restored.image1.content == b'abc'
+  ```
   """
-  name: str
+  id: str
   def custom_apply(
       self, path: pg.KeyPath, value_spec: pg.ValueSpec, *args, **kwargs
@@ -122,12 +175,97 @@ class ModalityRef(pg.Object, pg.typing.CustomTyping):
     """
     def _placehold(k, v, p):
-      del p
+      del k, p
       if isinstance(v, Modality):
-        return ModalityRef(name=value.sym_path + k)
+        return ModalityRef(id=v.id)
       return v
     return value.clone().rebind(_placehold, raise_on_no_change=False)
+  @classmethod
+  def restore(cls, value: pg.Symbolic, modalities: dict[str, Modality]) -> Any:
+    """Returns a copy of value by replacing refs with modality objects."""
+    def _restore(k, v, p):
+      del k, p
+      if isinstance(v, ModalityRef):
+        modality_object = modalities.get(v.id)
+        if modality_object is None:
+          raise ValueError(
+              f'Modality {v.id} not found in modalities {modalities.keys()}'
+          )
+        return modality_object
+      return v
+    return value.rebind(_restore, raise_on_no_change=False)
 class ModalityError(RuntimeError):  # pylint: disable=g-bad-exception-name
   """Exception raised when modality is not supported."""
+#
+# Context managers to deal with modality objects.
+#
+_TLS_MODALITY_CAPTURE_SCOPE = '__modality_capture_scope__'
+_TLS_MODALITY_AS_REF = '__format_modality_as_ref__'
+def format_modality_as_ref(enabled: bool = True) -> ContextManager[None]:
+  """A context manager that formats modality objects as references."""
+  return pg.object_utils.thread_local_value_scope(
+      _TLS_MODALITY_AS_REF, enabled, False
+  )
+class _ModalityCaptureContext:
+  """A context to capture modality objects when being rendered."""
+  def __init__(self):
+    self._references: dict[str, pg.Ref[Modality]] = {}
+  def capture(self, modality: Modality) -> None:
+    """Captures the modality object."""
+    self._references[modality.id] = pg.Ref(modality)
+  @property
+  def references(self) -> dict[str, pg.Ref[Modality]]:
+    """Returns the modality references captured in this context."""
+    return self._references
+@contextlib.contextmanager
+def capture_rendered_modalities() -> Iterator[dict[str, pg.Ref[Modality]]]:
+  """Capture modality objects whose references is being rendered.
+  Example:
+    ```
+    image = lf.Image.from_url(...)
+    with lf.modality.capture_rendered_modalities() as rendered_modalities:
+      with lf.modality.format_modality_as_ref():
+        print(f'Hello {image}')
+    self.assertEqual(rendered_modalities, {'image:<hash>': pg.Ref(image)})
+    ```
+  """
+  context = get_modality_capture_context()
+  top_level = context is None
+  if top_level:
+    context = _ModalityCaptureContext()
+    pg.object_utils.thread_local_set(_TLS_MODALITY_CAPTURE_SCOPE, context)
+  try:
+    yield context.references  # pylint: disable=attribute-error
+  finally:
+    if top_level:
+      pg.object_utils.thread_local_del(_TLS_MODALITY_CAPTURE_SCOPE)
+def get_modality_capture_context() -> _ModalityCaptureContext | None:
+  """Returns the current modality capture context."""
+  return pg.object_utils.thread_local_get(_TLS_MODALITY_CAPTURE_SCOPE, None)
+def _camel_to_snake(name: str) -> str:
+  """Converts a camelCase name to snake_case."""
+  return re.sub(
+      pattern=r'([A-Z]+)', repl=r'_\1', string=name
+  ).lower().lstrip('_')

langfun 0.1.2.dev202509120804__py3-none-any.whl → 0.1.2.dev202512150805__py3-none-any.whl

langfun 0.1.2.dev202509120804py3-none-any.whl → 0.1.2.dev202512150805py3-none-any.whl