langfun 0.0.2.dev20240429__py3-none-any.whl → 0.1.2.dev202501140804__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (144) hide show
  1. langfun/__init__.py +20 -2
  2. langfun/core/__init__.py +16 -5
  3. langfun/core/agentic/__init__.py +30 -0
  4. langfun/core/agentic/action.py +854 -0
  5. langfun/core/agentic/action_eval.py +150 -0
  6. langfun/core/agentic/action_eval_test.py +109 -0
  7. langfun/core/agentic/action_test.py +136 -0
  8. langfun/core/coding/python/__init__.py +5 -11
  9. langfun/core/coding/python/correction.py +37 -21
  10. langfun/core/coding/python/correction_test.py +29 -3
  11. langfun/core/coding/python/execution.py +40 -216
  12. langfun/core/coding/python/execution_test.py +29 -89
  13. langfun/core/coding/python/generation.py +21 -11
  14. langfun/core/coding/python/generation_test.py +2 -2
  15. langfun/core/coding/python/parsing.py +108 -193
  16. langfun/core/coding/python/parsing_test.py +2 -105
  17. langfun/core/component.py +63 -2
  18. langfun/core/component_test.py +53 -0
  19. langfun/core/concurrent.py +414 -117
  20. langfun/core/concurrent_test.py +111 -24
  21. langfun/core/console.py +18 -5
  22. langfun/core/console_test.py +17 -0
  23. langfun/core/eval/__init__.py +16 -1
  24. langfun/core/eval/base.py +622 -174
  25. langfun/core/eval/base_test.py +200 -54
  26. langfun/core/eval/matching.py +63 -76
  27. langfun/core/eval/matching_test.py +17 -8
  28. langfun/core/eval/patching.py +130 -0
  29. langfun/core/eval/patching_test.py +170 -0
  30. langfun/core/eval/scoring.py +26 -26
  31. langfun/core/eval/scoring_test.py +19 -2
  32. langfun/core/eval/v2/__init__.py +42 -0
  33. langfun/core/eval/v2/checkpointing.py +380 -0
  34. langfun/core/eval/v2/checkpointing_test.py +228 -0
  35. langfun/core/eval/v2/eval_test_helper.py +136 -0
  36. langfun/core/eval/v2/evaluation.py +725 -0
  37. langfun/core/eval/v2/evaluation_test.py +180 -0
  38. langfun/core/eval/v2/example.py +305 -0
  39. langfun/core/eval/v2/example_test.py +128 -0
  40. langfun/core/eval/v2/experiment.py +1048 -0
  41. langfun/core/eval/v2/experiment_test.py +433 -0
  42. langfun/core/eval/v2/metric_values.py +156 -0
  43. langfun/core/eval/v2/metric_values_test.py +80 -0
  44. langfun/core/eval/v2/metrics.py +357 -0
  45. langfun/core/eval/v2/metrics_test.py +203 -0
  46. langfun/core/eval/v2/progress.py +348 -0
  47. langfun/core/eval/v2/progress_test.py +82 -0
  48. langfun/core/eval/v2/progress_tracking.py +210 -0
  49. langfun/core/eval/v2/progress_tracking_test.py +66 -0
  50. langfun/core/eval/v2/reporting.py +270 -0
  51. langfun/core/eval/v2/reporting_test.py +158 -0
  52. langfun/core/eval/v2/runners.py +488 -0
  53. langfun/core/eval/v2/runners_test.py +334 -0
  54. langfun/core/langfunc.py +4 -17
  55. langfun/core/langfunc_test.py +22 -6
  56. langfun/core/language_model.py +577 -39
  57. langfun/core/language_model_test.py +470 -56
  58. langfun/core/llms/__init__.py +87 -16
  59. langfun/core/llms/anthropic.py +312 -87
  60. langfun/core/llms/anthropic_test.py +71 -3
  61. langfun/core/llms/cache/base.py +21 -2
  62. langfun/core/llms/cache/in_memory.py +13 -0
  63. langfun/core/llms/cache/in_memory_test.py +53 -2
  64. langfun/core/llms/compositional.py +101 -0
  65. langfun/core/llms/compositional_test.py +73 -0
  66. langfun/core/llms/deepseek.py +117 -0
  67. langfun/core/llms/deepseek_test.py +61 -0
  68. langfun/core/llms/fake.py +11 -7
  69. langfun/core/llms/fake_test.py +14 -0
  70. langfun/core/llms/gemini.py +507 -0
  71. langfun/core/llms/gemini_test.py +195 -0
  72. langfun/core/llms/google_genai.py +62 -218
  73. langfun/core/llms/google_genai_test.py +9 -202
  74. langfun/core/llms/groq.py +160 -144
  75. langfun/core/llms/groq_test.py +31 -137
  76. langfun/core/llms/llama_cpp.py +15 -42
  77. langfun/core/llms/llama_cpp_test.py +4 -30
  78. langfun/core/llms/openai.py +395 -203
  79. langfun/core/llms/openai_compatible.py +179 -0
  80. langfun/core/llms/openai_compatible_test.py +495 -0
  81. langfun/core/llms/openai_test.py +30 -395
  82. langfun/core/llms/rest.py +113 -0
  83. langfun/core/llms/rest_test.py +111 -0
  84. langfun/core/llms/vertexai.py +192 -0
  85. langfun/core/llms/vertexai_test.py +52 -0
  86. langfun/core/logging.py +284 -0
  87. langfun/core/logging_test.py +125 -0
  88. langfun/core/message.py +319 -9
  89. langfun/core/message_test.py +190 -13
  90. langfun/core/modalities/__init__.py +6 -2
  91. langfun/core/modalities/audio.py +30 -0
  92. langfun/core/modalities/audio_test.py +63 -0
  93. langfun/core/modalities/image.py +39 -20
  94. langfun/core/modalities/image_test.py +52 -9
  95. langfun/core/modalities/mime.py +206 -29
  96. langfun/core/modalities/mime_test.py +90 -9
  97. langfun/core/modalities/ms_office.py +117 -0
  98. langfun/core/modalities/ms_office_test.py +389 -0
  99. langfun/core/modalities/pdf.py +22 -0
  100. langfun/core/modalities/pdf_test.py +57 -0
  101. langfun/core/modalities/video.py +9 -26
  102. langfun/core/modalities/video_test.py +3 -3
  103. langfun/core/modality.py +26 -3
  104. langfun/core/modality_test.py +2 -2
  105. langfun/core/sampling.py +11 -11
  106. langfun/core/structured/__init__.py +12 -16
  107. langfun/core/structured/completion.py +32 -5
  108. langfun/core/structured/completion_test.py +7 -6
  109. langfun/core/structured/description.py +2 -2
  110. langfun/core/structured/description_test.py +3 -3
  111. langfun/core/structured/function_generation.py +60 -27
  112. langfun/core/structured/function_generation_test.py +72 -2
  113. langfun/core/structured/mapping.py +97 -47
  114. langfun/core/structured/mapping_test.py +90 -2
  115. langfun/core/structured/parsing.py +33 -21
  116. langfun/core/structured/parsing_test.py +53 -9
  117. langfun/core/structured/querying.py +746 -0
  118. langfun/core/structured/{prompting_test.py → querying_test.py} +469 -51
  119. langfun/core/structured/schema.py +204 -97
  120. langfun/core/structured/schema_generation.py +1 -1
  121. langfun/core/structured/schema_test.py +130 -29
  122. langfun/core/structured/scoring.py +125 -19
  123. langfun/core/structured/scoring_test.py +30 -0
  124. langfun/core/structured/tokenization.py +64 -0
  125. langfun/core/structured/tokenization_test.py +48 -0
  126. langfun/core/template.py +115 -1
  127. langfun/core/template_test.py +71 -1
  128. langfun/core/templates/conversation.py +9 -0
  129. langfun/core/templates/conversation_test.py +4 -3
  130. langfun/core/templates/selfplay_test.py +10 -2
  131. langfun-0.1.2.dev202501140804.dist-info/METADATA +225 -0
  132. langfun-0.1.2.dev202501140804.dist-info/RECORD +153 -0
  133. {langfun-0.0.2.dev20240429.dist-info → langfun-0.1.2.dev202501140804.dist-info}/WHEEL +1 -1
  134. langfun/core/coding/python/errors.py +0 -108
  135. langfun/core/coding/python/errors_test.py +0 -99
  136. langfun/core/coding/python/permissions.py +0 -90
  137. langfun/core/coding/python/permissions_test.py +0 -86
  138. langfun/core/structured/prompting.py +0 -238
  139. langfun/core/text_formatting.py +0 -162
  140. langfun/core/text_formatting_test.py +0 -47
  141. langfun-0.0.2.dev20240429.dist-info/METADATA +0 -100
  142. langfun-0.0.2.dev20240429.dist-info/RECORD +0 -108
  143. {langfun-0.0.2.dev20240429.dist-info → langfun-0.1.2.dev202501140804.dist-info}/LICENSE +0 -0
  144. {langfun-0.0.2.dev20240429.dist-info → langfun-0.1.2.dev202501140804.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
1
- # Copyright 2023 The Langfun Authors
1
+ # Copyright 2024 The Langfun Authors
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -13,20 +13,30 @@
13
13
  # limitations under the License.
14
14
  """MIME type data."""
15
15
 
16
- import abc
17
- from typing import Annotated, Union
16
+ import base64
17
+ import functools
18
+ from typing import Annotated, Any, Iterable, Type, Union
18
19
  import langfun.core as lf
19
20
  import pyglove as pg
20
21
  import requests
21
22
 
22
23
 
23
- class MimeType(lf.Modality):
24
- """Base for MIME type data."""
24
+ try:
25
+ import magic # pylint: disable=g-import-not-at-top
26
+ from_buffer = magic.from_buffer
27
+ except ImportError:
28
+ def from_buffer(*unused_args, **unused_kwargs):
29
+ raise RuntimeError(
30
+ 'Please install "langfun[mime-auto]" to enable automatic MIME support.'
31
+ )
25
32
 
26
- @property
27
- @abc.abstractmethod
28
- def mime_type(self) -> str:
29
- """Returns the MIME type."""
33
+
34
+ class Mime(lf.Modality):
35
+ """Base for MIME data."""
36
+
37
+ # The regular expression that describes the MIME type str.
38
+ # If None, the MIME type is dynamic. Subclass could override.
39
+ MIME_PREFIX = None
30
40
 
31
41
  uri: Annotated[str | None, 'The URI for locating the MIME data. '] = None
32
42
 
@@ -34,6 +44,86 @@ class MimeType(lf.Modality):
34
44
  Union[str, bytes, None], 'The raw content of the MIME type.'
35
45
  ] = None
36
46
 
47
+ @functools.cached_property
48
+ def mime_type(self) -> str:
49
+ """Returns the MIME type."""
50
+ mime = from_buffer((self.to_bytes()), mime=True)
51
+ if (
52
+ self.MIME_PREFIX
53
+ and not mime.lower().startswith(self.MIME_PREFIX)
54
+ # NOTE(daiyip): libmagic fails to detect the MIME type of some binary
55
+ # files.
56
+ and mime != 'application/octet-stream'
57
+ ):
58
+ raise ValueError(
59
+ f'Expected MIME type: {self.MIME_PREFIX}, Encountered: {mime}'
60
+ )
61
+ return mime
62
+
63
+ @functools.cached_property
64
+ def is_text(self) -> bool:
65
+ return self.mime_type.startswith((
66
+ 'text/',
67
+ 'application/javascript',
68
+ 'application/json',
69
+ 'application/ld+json',
70
+ 'application/plain',
71
+ 'application/rtf',
72
+ 'application/xhtml+xml',
73
+ 'application/xml',
74
+ 'application/x-javascript',
75
+ 'application/x-python-code',
76
+ 'application/x-tex',
77
+ 'application/x-typescript',
78
+ 'application/x-yaml',
79
+ ))
80
+
81
+ @property
82
+ def is_binary(self) -> bool:
83
+ """Returns True if the MIME type is a binary type."""
84
+ return not self.is_text
85
+
86
+ def to_text(self) -> str:
87
+ """Returns the text content of the MIME type."""
88
+ if not self.is_text:
89
+ raise lf.ModalityError(
90
+ f'MIME type {self.mime_type!r} cannot be converted to text.'
91
+ )
92
+ return self.to_bytes().decode()
93
+
94
+ def is_compatible(
95
+ self, mime_types: str | Iterable[str]
96
+ ) -> bool:
97
+ """Returns True if this object is compatible to any of the MIME types."""
98
+ if isinstance(mime_types, str):
99
+ mime_types = {mime_types}
100
+ return self._is_compatible(mime_types)
101
+
102
+ def _is_compatible(self, mime_types: Iterable[str]):
103
+ return self.mime_type in mime_types
104
+
105
+ def make_compatible(
106
+ self,
107
+ mime_types: str | Iterable[str]
108
+ ) -> Union['Mime', list['Mime']]:
109
+ """Makes compatible MIME objects from this object."""
110
+ if isinstance(mime_types, str):
111
+ mime_types = {mime_types}
112
+ if not self._is_compatible(mime_types):
113
+ raise lf.ModalityError(
114
+ f'MIME type {self.mime_type!r} cannot be converted to supported '
115
+ f'types: {mime_types!r}.'
116
+ )
117
+ return self._make_compatible(mime_types)
118
+
119
+ def _make_compatible(
120
+ self,
121
+ mime_types: Iterable[str]
122
+ ) -> Union['Mime', list['Mime']]:
123
+ """Makes compatbile MIME objects from this object."""
124
+ del mime_types
125
+ return self
126
+
37
127
  def _on_bound(self):
38
128
  super()._on_bound()
39
129
  if self.uri is None and self.content is None:
@@ -43,39 +133,126 @@ class MimeType(lf.Modality):
43
133
  if self.content is not None:
44
134
  return self.content
45
135
 
46
- assert self.uri is not None
47
- if self.uri.lower().startswith(('http:', 'https:', 'ftp:')):
48
- content = requests.get(
49
- self.uri,
50
- headers={'User-Agent': 'Langfun'},
51
- ).content
52
- else:
53
- content = pg.io.readfile(self.uri, mode='rb')
54
- self.rebind(content=content, skip_notification=True)
136
+ self.rebind(content=self.download(self.uri), skip_notification=True)
55
137
  return self.content
56
138
 
139
+ @property
140
+ def content_uri(self) -> str:
141
+ """Returns the URI with encoded content."""
142
+ base64_content = base64.b64encode(self.to_bytes()).decode()
143
+ return f'data:{self.mime_type};base64,{base64_content}'
144
+
145
+ @property
146
+ def embeddable_uri(self) -> str:
147
+ """Returns the URI that can be embedded in HTML."""
148
+ if self.uri and self.uri.lower().startswith(('http:', 'https:', 'ftp:')):
149
+ return self.uri
150
+ return self.content_uri
151
+
57
152
  @classmethod
58
- def from_uri(cls, uri: str, **kwargs) -> 'MimeType':
153
+ def from_uri(cls, uri: str, **kwargs) -> 'Mime':
154
+ if cls is Mime:
155
+ content = cls.download(uri)
156
+ mime = from_buffer(content, mime=True).lower()
157
+ return cls.class_from_mime_type(mime)(uri=uri, content=content, **kwargs)
59
158
  return cls(uri=uri, content=None, **kwargs)
60
159
 
61
160
  @classmethod
62
- def from_bytes(cls, content: bytes | str, **kwargs) -> 'MimeType':
161
+ def from_bytes(cls, content: bytes | str, **kwargs) -> 'Mime':
162
+ if cls is Mime:
163
+ mime = from_buffer(content, mime=True).lower()
164
+ return cls.class_from_mime_type(mime)(content=content, **kwargs)
63
165
  return cls(content=content, **kwargs)
64
166
 
167
+ @classmethod
168
+ def class_from_mime_type(cls, mime_type: str) -> Type['Mime']:
169
+ """Subclass from the given MIME type."""
170
+ for subcls in cls.__subclasses__():
171
+ if subcls.MIME_PREFIX is not None and mime_type.startswith(
172
+ subcls.MIME_PREFIX):
173
+ return subcls
174
+ return cls
175
+
176
+ @classmethod
177
+ def download(cls, uri: str) -> bytes | str:
178
+ """Downloads the content of the given URI."""
179
+ if uri.lower().startswith(('http:', 'https:', 'ftp:')):
180
+ return requests.get(
181
+ uri,
182
+ headers={'User-Agent': 'Mozilla/5.0'},
183
+ ).content
184
+ else:
185
+ content = pg.io.readfile(uri, mode='rb')
186
+ assert content is not None
187
+ return content
188
+
189
+ def _html_tree_view_content(
190
+ self,
191
+ **kwargs) -> str:
192
+ return self._raw_html()
65
193
 
66
- @pg.use_init_args(['type', 'content', 'uri'])
67
- class Custom(MimeType):
194
+ def _html_tree_view(
195
+ self,
196
+ view: pg.views.HtmlTreeView,
197
+ extra_flags: dict[str, Any] | None = None,
198
+ **kwargs
199
+ ):
200
+ extra_flags = extra_flags if extra_flags is not None else {}
201
+ raw_mime_content = extra_flags.get('raw_mime_content', False)
202
+ display_modality_when_hover = extra_flags.get(
203
+ 'display_modality_when_hover', False
204
+ )
205
+ if raw_mime_content:
206
+ kwargs['enable_summary'] = False
207
+ elif display_modality_when_hover:
208
+ kwargs.update(
209
+ enable_summary=True,
210
+ enable_summary_tooltip=True,
211
+ )
212
+ return super()._html_tree_view(
213
+ view=view, extra_flags=extra_flags, **kwargs
214
+ )
215
+
216
+ def _html_tree_view_summary(
217
+ self,
218
+ *,
219
+ view: pg.views.HtmlTreeView,
220
+ extra_flags: dict[str, Any] | None = None,
221
+ **kwargs
222
+ ):
223
+ extra_flags = extra_flags or {}
224
+ if extra_flags.get('display_modality_when_hover', False):
225
+ def summary_tooltip(*args, content: str | None = None, **kwargs):
226
+ del content
227
+ return view.tooltip(*args, content=self._raw_html(), **kwargs)
228
+ else:
229
+ summary_tooltip = None
230
+ return super()._html_tree_view_summary(
231
+ view=view,
232
+ summary_tooltip_fn=summary_tooltip,
233
+ extra_flags=extra_flags,
234
+ **kwargs
235
+ )
236
+
237
+ def _raw_html(self) -> str:
238
+ if self.uri and self.uri.lower().startswith(('http:', 'https:', 'ftp:')):
239
+ uri = self.uri
240
+ else:
241
+ uri = self.content_uri
242
+ return self._mime_control_for(uri)
243
+
244
+ def _mime_control_for(self, uri) -> str:
245
+ return f'<embed type="{self.mime_type}" src="{uri}"/>'
246
+
247
+
248
+ @pg.use_init_args(['mime', 'content', 'uri'])
249
+ class Custom(Mime):
68
250
  """Custom MIME data."""
69
251
 
70
- type: Annotated[
252
+ mime: Annotated[
71
253
  str, 'The MIME type of the data. E.g. text/plain, or image/png. '
72
254
  ]
73
255
 
74
256
  @property
75
257
  def mime_type(self) -> str:
76
- return self.type
77
-
78
-
79
- class PDF(Custom):
80
- """PDF document."""
81
- type = 'application/pdf'
258
+ return self.mime
@@ -12,48 +12,129 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  """MIME tests."""
15
+ import inspect
15
16
  import unittest
16
17
  from unittest import mock
17
18
 
19
+ import langfun.core as lf
18
20
  from langfun.core.modalities import mime
19
21
  import pyglove as pg
20
22
 
21
23
 
22
24
  def mock_request(*args, **kwargs):
23
25
  del args, kwargs
24
- return pg.Dict(content='foo')
26
+ return pg.Dict(content=b'foo')
25
27
 
26
28
 
27
29
  def mock_readfile(*args, **kwargs):
28
30
  del args, kwargs
29
- return 'bar'
31
+ return b'bar'
30
32
 
31
33
 
32
34
  class CustomMimeTest(unittest.TestCase):
33
35
 
34
- def test_content(self):
35
- content = mime.Custom('text/plain', 'foo')
36
- self.assertEqual(content.to_bytes(), 'foo')
36
+ def test_is_text(self):
37
+ self.assertTrue(mime.Custom('text/plain', b'foo').is_text)
38
+ self.assertTrue(mime.Custom('text/xml', b'foo').is_text)
39
+ self.assertTrue(mime.Custom('application/json', b'foo').is_text)
40
+ self.assertTrue(mime.Custom('application/x-python-code', b'foo').is_text)
41
+ self.assertFalse(mime.Custom('application/pdf', b'foo').is_text)
42
+ self.assertFalse(mime.Custom('application/octet-stream', b'foo').is_text)
43
+
44
+ def test_from_byes(self):
45
+ content = mime.Mime.from_bytes(b'hello')
46
+ self.assertIs(content.__class__, mime.Mime)
47
+
48
+ content = mime.Custom('text/plain', b'foo')
49
+ self.assertEqual(content.to_bytes(), b'foo')
37
50
  self.assertEqual(content.mime_type, 'text/plain')
51
+ self.assertTrue(content.is_text)
52
+ self.assertFalse(content.is_binary)
53
+ self.assertEqual(content.to_text(), 'foo')
54
+ self.assertTrue(content.is_compatible('text/plain'))
55
+ self.assertFalse(content.is_compatible('text/xml'))
56
+ self.assertIs(content.make_compatible('text/plain'), content)
57
+
58
+ with self.assertRaisesRegex(
59
+ lf.ModalityError, '.* cannot be converted to supported types'
60
+ ):
61
+ content.make_compatible('application/pdf')
38
62
 
39
63
  with self.assertRaisesRegex(
40
64
  ValueError, 'Either uri or content must be provided.'
41
65
  ):
42
66
  mime.Custom('text/plain')
43
67
 
68
+ def test_uri(self):
69
+ content = mime.Custom.from_uri('http://mock/web/a.txt', mime='text/plain')
70
+ with mock.patch('requests.get') as mock_requests_stub:
71
+ mock_requests_stub.side_effect = mock_request
72
+ self.assertEqual(content.uri, 'http://mock/web/a.txt')
73
+ self.assertEqual(content.content_uri, 'data:text/plain;base64,Zm9v')
74
+ self.assertEqual(content.embeddable_uri, 'http://mock/web/a.txt')
75
+
76
+ content = mime.Custom.from_uri('a.txt', mime='text/plain')
77
+ with mock.patch('pyglove.io.readfile') as mock_readfile_stub:
78
+ mock_readfile_stub.side_effect = mock_readfile
79
+ self.assertEqual(content.uri, 'a.txt')
80
+ self.assertEqual(content.content_uri, 'data:text/plain;base64,YmFy')
81
+ self.assertEqual(content.embeddable_uri, 'data:text/plain;base64,YmFy')
82
+
44
83
  def test_from_uri(self):
45
- content = mime.Custom.from_uri('http://mock/web/a.txt', type='text/plain')
84
+ content = mime.Custom.from_uri('http://mock/web/a.txt', mime='text/plain')
46
85
  with mock.patch('requests.get') as mock_requests_stub:
47
86
  mock_requests_stub.side_effect = mock_request
48
- self.assertEqual(content.to_bytes(), 'foo')
87
+ self.assertEqual(content.to_bytes(), b'foo')
49
88
  self.assertEqual(content.mime_type, 'text/plain')
50
89
 
51
- content = mime.Custom.from_uri('a.txt', type='text/plain')
90
+ content = mime.Custom.from_uri('a.txt', mime='text/plain')
52
91
  with mock.patch('pyglove.io.readfile') as mock_readfile_stub:
53
92
  mock_readfile_stub.side_effect = mock_readfile
54
- self.assertEqual(content.to_bytes(), 'bar')
93
+ self.assertEqual(content.to_bytes(), b'bar')
55
94
  self.assertEqual(content.mime_type, 'text/plain')
56
95
 
96
+ def assert_html_content(self, html, expected):
97
+ expected = inspect.cleandoc(expected).strip()
98
+ actual = html.content.strip()
99
+ if actual != expected:
100
+ print(actual)
101
+ self.assertEqual(actual, expected)
102
+
103
+ def test_html(self):
104
+ self.assert_html_content(
105
+ mime.Custom('text/plain', b'foo').to_html(
106
+ enable_summary_tooltip=False,
107
+ enable_key_tooltip=False,
108
+ ),
109
+ """
110
+ <details open class="pyglove custom"><summary><div class="summary-title">Custom(...)</div></summary><embed type="text/plain" src="data:text/plain;base64,Zm9v"/></details>
111
+ """
112
+ )
113
+ self.assert_html_content(
114
+ mime.Custom('text/plain', b'foo').to_html(
115
+ enable_summary_tooltip=False,
116
+ enable_key_tooltip=False,
117
+ extra_flags=dict(
118
+ raw_mime_content=True,
119
+ )
120
+ ),
121
+ """
122
+ <embed type="text/plain" src="data:text/plain;base64,Zm9v"/>
123
+ """
124
+ )
125
+ self.assert_html_content(
126
+ mime.Custom('text/plain', b'foo').to_html(
127
+ enable_summary_tooltip=False,
128
+ enable_key_tooltip=False,
129
+ extra_flags=dict(
130
+ display_modality_when_hover=True,
131
+ )
132
+ ),
133
+ """
134
+ <details open class="pyglove custom"><summary><div class="summary-title">Custom(...)</div><span class="tooltip"><embed type="text/plain" src="data:text/plain;base64,Zm9v"/></span></summary><embed type="text/plain" src="data:text/plain;base64,Zm9v"/></details>
135
+ """
136
+ )
137
+
57
138
 
58
139
  if __name__ == '__main__':
59
140
  unittest.main()
@@ -0,0 +1,117 @@
1
+ # Copyright 2023 The Langfun Authors
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """Microsoft Office file types."""
15
+
16
+ import base64
17
+ import io
18
+ import os
19
+ from typing import Iterable
20
+ from langfun.core.modalities import mime
21
+ from langfun.core.modalities import pdf
22
+ import requests
23
+
24
+
25
+ class Xlsx(mime.Mime):
26
+ """Xlsx file type."""
27
+
28
+ MIME_PREFIX = (
29
+ 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
30
+ )
31
+
32
+ def _raw_html(self) -> str:
33
+ try:
34
+ import pandas as pd # pylint: disable=g-import-not-at-top
35
+ import openpyxl # pylint: disable=g-import-not-at-top, unused-import
36
+ df = pd.read_excel(io.BytesIO(self.to_bytes()))
37
+ return df.to_html()
38
+ except ImportError as e:
39
+ raise RuntimeError(
40
+ 'Please install "langfun[mime-xlsx]" to enable XLSX support.'
41
+ ) from e
42
+
43
+ def _is_compatible(self, mime_types: Iterable[str]) -> bool:
44
+ return bool(set(mime_types).intersection([
45
+ 'text/html',
46
+ 'text/plain',
47
+ ]))
48
+
49
+ def _make_compatible(self, mime_types: Iterable[str]) -> mime.Mime:
50
+ """Returns the MimeType of the converted file."""
51
+ del mime_types
52
+ return mime.Mime(uri=self.uri, content=self._raw_html())
53
+
54
+
55
+ class Docx(mime.Mime):
56
+ """Docx file type."""
57
+
58
+ MIME_PREFIX = (
59
+ 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
60
+ )
61
+
62
+ def to_xml(self) -> str:
63
+ try:
64
+ import docx # pylint: disable=g-import-not-at-top
65
+ doc = docx.Document(io.BytesIO(self.to_bytes()))
66
+ return str(doc.element.xml)
67
+ except ImportError as e:
68
+ raise RuntimeError(
69
+ 'Please install "langfun[mime-docx]" to enable Docx support.'
70
+ ) from e
71
+
72
+ def _repr_html_(self) -> str:
73
+ return self.to_xml()
74
+
75
+ def _is_compatible(self, mime_types: Iterable[str]) -> bool:
76
+ return bool(set(mime_types).intersection([
77
+ 'application/xml',
78
+ 'text/xml',
79
+ 'text/plain',
80
+ ]))
81
+
82
+ def _make_compatible(self, mime_types: Iterable[str]) -> mime.Mime:
83
+ """Returns the MimeType of the converted file."""
84
+ del mime_types
85
+ return mime.Mime(uri=self.uri, content=self.to_xml())
86
+
87
+
88
+ class Pptx(mime.Mime):
89
+ """Pptx file type."""
90
+
91
+ MIME_PREFIX = 'application/vnd.openxmlformats-officedocument.presentationml.presentation'
92
+ API_URL = 'https://v2.convertapi.com/convert/pptx/to/pdf'
93
+
94
+ def to_pdf(self, convert_api_key: str | None = None) -> pdf.PDF:
95
+ api_key = convert_api_key or os.environ.get('CONVERT_API_KEY')
96
+ url = f'{self.API_URL}?Secret={api_key}'
97
+
98
+ json = {
99
+ 'Parameters': [{
100
+ 'Name': 'File',
101
+ 'FileValue': {
102
+ 'Name': os.path.basename(self.uri) if self.uri else 'tmp.pptx',
103
+ 'Data': base64.b64encode(self.to_bytes()).decode('utf-8'),
104
+ },
105
+ }]
106
+ }
107
+ response = requests.post(url, json=json).json()
108
+ base64_pdf = response['Files'][0]['FileData']
109
+ return pdf.PDF.from_bytes(base64.b64decode(base64_pdf))
110
+
111
+ def _is_compatible(self, mime_types: Iterable[str]) -> bool:
112
+ return 'application/pdf' in mime_types
113
+
114
+ def _make_compatible(self, mime_types: Iterable[str]) -> mime.Mime:
115
+ """Returns the MimeType of the converted file."""
116
+ del mime_types
117
+ return self.to_pdf()