langfun 0.0.2.dev20240429__py3-none-any.whl → 0.1.2.dev202501140804__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langfun/__init__.py +20 -2
- langfun/core/__init__.py +16 -5
- langfun/core/agentic/__init__.py +30 -0
- langfun/core/agentic/action.py +854 -0
- langfun/core/agentic/action_eval.py +150 -0
- langfun/core/agentic/action_eval_test.py +109 -0
- langfun/core/agentic/action_test.py +136 -0
- langfun/core/coding/python/__init__.py +5 -11
- langfun/core/coding/python/correction.py +37 -21
- langfun/core/coding/python/correction_test.py +29 -3
- langfun/core/coding/python/execution.py +40 -216
- langfun/core/coding/python/execution_test.py +29 -89
- langfun/core/coding/python/generation.py +21 -11
- langfun/core/coding/python/generation_test.py +2 -2
- langfun/core/coding/python/parsing.py +108 -193
- langfun/core/coding/python/parsing_test.py +2 -105
- langfun/core/component.py +63 -2
- langfun/core/component_test.py +53 -0
- langfun/core/concurrent.py +414 -117
- langfun/core/concurrent_test.py +111 -24
- langfun/core/console.py +18 -5
- langfun/core/console_test.py +17 -0
- langfun/core/eval/__init__.py +16 -1
- langfun/core/eval/base.py +622 -174
- langfun/core/eval/base_test.py +200 -54
- langfun/core/eval/matching.py +63 -76
- langfun/core/eval/matching_test.py +17 -8
- langfun/core/eval/patching.py +130 -0
- langfun/core/eval/patching_test.py +170 -0
- langfun/core/eval/scoring.py +26 -26
- langfun/core/eval/scoring_test.py +19 -2
- langfun/core/eval/v2/__init__.py +42 -0
- langfun/core/eval/v2/checkpointing.py +380 -0
- langfun/core/eval/v2/checkpointing_test.py +228 -0
- langfun/core/eval/v2/eval_test_helper.py +136 -0
- langfun/core/eval/v2/evaluation.py +725 -0
- langfun/core/eval/v2/evaluation_test.py +180 -0
- langfun/core/eval/v2/example.py +305 -0
- langfun/core/eval/v2/example_test.py +128 -0
- langfun/core/eval/v2/experiment.py +1048 -0
- langfun/core/eval/v2/experiment_test.py +433 -0
- langfun/core/eval/v2/metric_values.py +156 -0
- langfun/core/eval/v2/metric_values_test.py +80 -0
- langfun/core/eval/v2/metrics.py +357 -0
- langfun/core/eval/v2/metrics_test.py +203 -0
- langfun/core/eval/v2/progress.py +348 -0
- langfun/core/eval/v2/progress_test.py +82 -0
- langfun/core/eval/v2/progress_tracking.py +210 -0
- langfun/core/eval/v2/progress_tracking_test.py +66 -0
- langfun/core/eval/v2/reporting.py +270 -0
- langfun/core/eval/v2/reporting_test.py +158 -0
- langfun/core/eval/v2/runners.py +488 -0
- langfun/core/eval/v2/runners_test.py +334 -0
- langfun/core/langfunc.py +4 -17
- langfun/core/langfunc_test.py +22 -6
- langfun/core/language_model.py +577 -39
- langfun/core/language_model_test.py +470 -56
- langfun/core/llms/__init__.py +87 -16
- langfun/core/llms/anthropic.py +312 -87
- langfun/core/llms/anthropic_test.py +71 -3
- langfun/core/llms/cache/base.py +21 -2
- langfun/core/llms/cache/in_memory.py +13 -0
- langfun/core/llms/cache/in_memory_test.py +53 -2
- langfun/core/llms/compositional.py +101 -0
- langfun/core/llms/compositional_test.py +73 -0
- langfun/core/llms/deepseek.py +117 -0
- langfun/core/llms/deepseek_test.py +61 -0
- langfun/core/llms/fake.py +11 -7
- langfun/core/llms/fake_test.py +14 -0
- langfun/core/llms/gemini.py +507 -0
- langfun/core/llms/gemini_test.py +195 -0
- langfun/core/llms/google_genai.py +62 -218
- langfun/core/llms/google_genai_test.py +9 -202
- langfun/core/llms/groq.py +160 -144
- langfun/core/llms/groq_test.py +31 -137
- langfun/core/llms/llama_cpp.py +15 -42
- langfun/core/llms/llama_cpp_test.py +4 -30
- langfun/core/llms/openai.py +395 -203
- langfun/core/llms/openai_compatible.py +179 -0
- langfun/core/llms/openai_compatible_test.py +495 -0
- langfun/core/llms/openai_test.py +30 -395
- langfun/core/llms/rest.py +113 -0
- langfun/core/llms/rest_test.py +111 -0
- langfun/core/llms/vertexai.py +192 -0
- langfun/core/llms/vertexai_test.py +52 -0
- langfun/core/logging.py +284 -0
- langfun/core/logging_test.py +125 -0
- langfun/core/message.py +319 -9
- langfun/core/message_test.py +190 -13
- langfun/core/modalities/__init__.py +6 -2
- langfun/core/modalities/audio.py +30 -0
- langfun/core/modalities/audio_test.py +63 -0
- langfun/core/modalities/image.py +39 -20
- langfun/core/modalities/image_test.py +52 -9
- langfun/core/modalities/mime.py +206 -29
- langfun/core/modalities/mime_test.py +90 -9
- langfun/core/modalities/ms_office.py +117 -0
- langfun/core/modalities/ms_office_test.py +389 -0
- langfun/core/modalities/pdf.py +22 -0
- langfun/core/modalities/pdf_test.py +57 -0
- langfun/core/modalities/video.py +9 -26
- langfun/core/modalities/video_test.py +3 -3
- langfun/core/modality.py +26 -3
- langfun/core/modality_test.py +2 -2
- langfun/core/sampling.py +11 -11
- langfun/core/structured/__init__.py +12 -16
- langfun/core/structured/completion.py +32 -5
- langfun/core/structured/completion_test.py +7 -6
- langfun/core/structured/description.py +2 -2
- langfun/core/structured/description_test.py +3 -3
- langfun/core/structured/function_generation.py +60 -27
- langfun/core/structured/function_generation_test.py +72 -2
- langfun/core/structured/mapping.py +97 -47
- langfun/core/structured/mapping_test.py +90 -2
- langfun/core/structured/parsing.py +33 -21
- langfun/core/structured/parsing_test.py +53 -9
- langfun/core/structured/querying.py +746 -0
- langfun/core/structured/{prompting_test.py → querying_test.py} +469 -51
- langfun/core/structured/schema.py +204 -97
- langfun/core/structured/schema_generation.py +1 -1
- langfun/core/structured/schema_test.py +130 -29
- langfun/core/structured/scoring.py +125 -19
- langfun/core/structured/scoring_test.py +30 -0
- langfun/core/structured/tokenization.py +64 -0
- langfun/core/structured/tokenization_test.py +48 -0
- langfun/core/template.py +115 -1
- langfun/core/template_test.py +71 -1
- langfun/core/templates/conversation.py +9 -0
- langfun/core/templates/conversation_test.py +4 -3
- langfun/core/templates/selfplay_test.py +10 -2
- langfun-0.1.2.dev202501140804.dist-info/METADATA +225 -0
- langfun-0.1.2.dev202501140804.dist-info/RECORD +153 -0
- {langfun-0.0.2.dev20240429.dist-info → langfun-0.1.2.dev202501140804.dist-info}/WHEEL +1 -1
- langfun/core/coding/python/errors.py +0 -108
- langfun/core/coding/python/errors_test.py +0 -99
- langfun/core/coding/python/permissions.py +0 -90
- langfun/core/coding/python/permissions_test.py +0 -86
- langfun/core/structured/prompting.py +0 -238
- langfun/core/text_formatting.py +0 -162
- langfun/core/text_formatting_test.py +0 -47
- langfun-0.0.2.dev20240429.dist-info/METADATA +0 -100
- langfun-0.0.2.dev20240429.dist-info/RECORD +0 -108
- {langfun-0.0.2.dev20240429.dist-info → langfun-0.1.2.dev202501140804.dist-info}/LICENSE +0 -0
- {langfun-0.0.2.dev20240429.dist-info → langfun-0.1.2.dev202501140804.dist-info}/top_level.txt +0 -0
langfun/core/modalities/mime.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2024 The Langfun Authors
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -13,20 +13,30 @@
|
|
13
13
|
# limitations under the License.
|
14
14
|
"""MIME type data."""
|
15
15
|
|
16
|
-
import
|
17
|
-
|
16
|
+
import base64
|
17
|
+
import functools
|
18
|
+
from typing import Annotated, Any, Iterable, Type, Union
|
18
19
|
import langfun.core as lf
|
19
20
|
import pyglove as pg
|
20
21
|
import requests
|
21
22
|
|
22
23
|
|
23
|
-
|
24
|
-
|
24
|
+
try:
|
25
|
+
import magic # pylint: disable=g-import-not-at-top
|
26
|
+
from_buffer = magic.from_buffer
|
27
|
+
except ImportError:
|
28
|
+
def from_buffer(*unused_args, **unused_kwargs):
|
29
|
+
raise RuntimeError(
|
30
|
+
'Please install "langfun[mime-auto]" to enable automatic MIME support.'
|
31
|
+
)
|
25
32
|
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
33
|
+
|
34
|
+
class Mime(lf.Modality):
|
35
|
+
"""Base for MIME data."""
|
36
|
+
|
37
|
+
# The regular expression that describes the MIME type str.
|
38
|
+
# If None, the MIME type is dynamic. Subclass could override.
|
39
|
+
MIME_PREFIX = None
|
30
40
|
|
31
41
|
uri: Annotated[str | None, 'The URI for locating the MIME data. '] = None
|
32
42
|
|
@@ -34,6 +44,86 @@ class MimeType(lf.Modality):
|
|
34
44
|
Union[str, bytes, None], 'The raw content of the MIME type.'
|
35
45
|
] = None
|
36
46
|
|
47
|
+
@functools.cached_property
|
48
|
+
def mime_type(self) -> str:
|
49
|
+
"""Returns the MIME type."""
|
50
|
+
mime = from_buffer((self.to_bytes()), mime=True)
|
51
|
+
if (
|
52
|
+
self.MIME_PREFIX
|
53
|
+
and not mime.lower().startswith(self.MIME_PREFIX)
|
54
|
+
# NOTE(daiyip): libmagic fails to detect the MIME type of some binary
|
55
|
+
# files.
|
56
|
+
and mime != 'application/octet-stream'
|
57
|
+
):
|
58
|
+
raise ValueError(
|
59
|
+
f'Expected MIME type: {self.MIME_PREFIX}, Encountered: {mime}'
|
60
|
+
)
|
61
|
+
return mime
|
62
|
+
|
63
|
+
@functools.cached_property
|
64
|
+
def is_text(self) -> bool:
|
65
|
+
return self.mime_type.startswith((
|
66
|
+
'text/',
|
67
|
+
'application/javascript',
|
68
|
+
'application/json',
|
69
|
+
'application/ld+json',
|
70
|
+
'application/plain',
|
71
|
+
'application/rtf',
|
72
|
+
'application/xhtml+xml',
|
73
|
+
'application/xml',
|
74
|
+
'application/x-javascript',
|
75
|
+
'application/x-python-code',
|
76
|
+
'application/x-tex',
|
77
|
+
'application/x-typescript',
|
78
|
+
'application/x-yaml',
|
79
|
+
))
|
80
|
+
|
81
|
+
@property
|
82
|
+
def is_binary(self) -> bool:
|
83
|
+
"""Returns True if the MIME type is a binary type."""
|
84
|
+
return not self.is_text
|
85
|
+
|
86
|
+
def to_text(self) -> str:
|
87
|
+
"""Returns the text content of the MIME type."""
|
88
|
+
if not self.is_text:
|
89
|
+
raise lf.ModalityError(
|
90
|
+
f'MIME type {self.mime_type!r} cannot be converted to text.'
|
91
|
+
)
|
92
|
+
return self.to_bytes().decode()
|
93
|
+
|
94
|
+
def is_compatible(
|
95
|
+
self, mime_types: str | Iterable[str]
|
96
|
+
) -> bool:
|
97
|
+
"""Returns True if this object is compatible to any of the MIME types."""
|
98
|
+
if isinstance(mime_types, str):
|
99
|
+
mime_types = {mime_types}
|
100
|
+
return self._is_compatible(mime_types)
|
101
|
+
|
102
|
+
def _is_compatible(self, mime_types: Iterable[str]):
|
103
|
+
return self.mime_type in mime_types
|
104
|
+
|
105
|
+
def make_compatible(
|
106
|
+
self,
|
107
|
+
mime_types: str | Iterable[str]
|
108
|
+
) -> Union['Mime', list['Mime']]:
|
109
|
+
"""Makes compatible MIME objects from this object."""
|
110
|
+
if isinstance(mime_types, str):
|
111
|
+
mime_types = {mime_types}
|
112
|
+
if not self._is_compatible(mime_types):
|
113
|
+
raise lf.ModalityError(
|
114
|
+
f'MIME type {self.mime_type!r} cannot be converted to supported '
|
115
|
+
f'types: {mime_types!r}.'
|
116
|
+
)
|
117
|
+
return self._make_compatible(mime_types)
|
118
|
+
|
119
|
+
def _make_compatible(
|
120
|
+
self,
|
121
|
+
mime_types: Iterable[str]
|
122
|
+
) -> Union['Mime', list['Mime']]:
|
123
|
+
"""Makes compatbile MIME objects from this object."""
|
124
|
+
del mime_types
|
125
|
+
return self
|
126
|
+
|
37
127
|
def _on_bound(self):
|
38
128
|
super()._on_bound()
|
39
129
|
if self.uri is None and self.content is None:
|
@@ -43,39 +133,126 @@ class MimeType(lf.Modality):
|
|
43
133
|
if self.content is not None:
|
44
134
|
return self.content
|
45
135
|
|
46
|
-
|
47
|
-
if self.uri.lower().startswith(('http:', 'https:', 'ftp:')):
|
48
|
-
content = requests.get(
|
49
|
-
self.uri,
|
50
|
-
headers={'User-Agent': 'Langfun'},
|
51
|
-
).content
|
52
|
-
else:
|
53
|
-
content = pg.io.readfile(self.uri, mode='rb')
|
54
|
-
self.rebind(content=content, skip_notification=True)
|
136
|
+
self.rebind(content=self.download(self.uri), skip_notification=True)
|
55
137
|
return self.content
|
56
138
|
|
139
|
+
@property
|
140
|
+
def content_uri(self) -> str:
|
141
|
+
"""Returns the URI with encoded content."""
|
142
|
+
base64_content = base64.b64encode(self.to_bytes()).decode()
|
143
|
+
return f'data:{self.mime_type};base64,{base64_content}'
|
144
|
+
|
145
|
+
@property
|
146
|
+
def embeddable_uri(self) -> str:
|
147
|
+
"""Returns the URI that can be embedded in HTML."""
|
148
|
+
if self.uri and self.uri.lower().startswith(('http:', 'https:', 'ftp:')):
|
149
|
+
return self.uri
|
150
|
+
return self.content_uri
|
151
|
+
|
57
152
|
@classmethod
|
58
|
-
def from_uri(cls, uri: str, **kwargs) -> '
|
153
|
+
def from_uri(cls, uri: str, **kwargs) -> 'Mime':
|
154
|
+
if cls is Mime:
|
155
|
+
content = cls.download(uri)
|
156
|
+
mime = from_buffer(content, mime=True).lower()
|
157
|
+
return cls.class_from_mime_type(mime)(uri=uri, content=content, **kwargs)
|
59
158
|
return cls(uri=uri, content=None, **kwargs)
|
60
159
|
|
61
160
|
@classmethod
|
62
|
-
def from_bytes(cls, content: bytes | str, **kwargs) -> '
|
161
|
+
def from_bytes(cls, content: bytes | str, **kwargs) -> 'Mime':
|
162
|
+
if cls is Mime:
|
163
|
+
mime = from_buffer(content, mime=True).lower()
|
164
|
+
return cls.class_from_mime_type(mime)(content=content, **kwargs)
|
63
165
|
return cls(content=content, **kwargs)
|
64
166
|
|
167
|
+
@classmethod
|
168
|
+
def class_from_mime_type(cls, mime_type: str) -> Type['Mime']:
|
169
|
+
"""Subclass from the given MIME type."""
|
170
|
+
for subcls in cls.__subclasses__():
|
171
|
+
if subcls.MIME_PREFIX is not None and mime_type.startswith(
|
172
|
+
subcls.MIME_PREFIX):
|
173
|
+
return subcls
|
174
|
+
return cls
|
175
|
+
|
176
|
+
@classmethod
|
177
|
+
def download(cls, uri: str) -> bytes | str:
|
178
|
+
"""Downloads the content of the given URI."""
|
179
|
+
if uri.lower().startswith(('http:', 'https:', 'ftp:')):
|
180
|
+
return requests.get(
|
181
|
+
uri,
|
182
|
+
headers={'User-Agent': 'Mozilla/5.0'},
|
183
|
+
).content
|
184
|
+
else:
|
185
|
+
content = pg.io.readfile(uri, mode='rb')
|
186
|
+
assert content is not None
|
187
|
+
return content
|
188
|
+
|
189
|
+
def _html_tree_view_content(
|
190
|
+
self,
|
191
|
+
**kwargs) -> str:
|
192
|
+
return self._raw_html()
|
65
193
|
|
66
|
-
|
67
|
-
|
194
|
+
def _html_tree_view(
|
195
|
+
self,
|
196
|
+
view: pg.views.HtmlTreeView,
|
197
|
+
extra_flags: dict[str, Any] | None = None,
|
198
|
+
**kwargs
|
199
|
+
):
|
200
|
+
extra_flags = extra_flags if extra_flags is not None else {}
|
201
|
+
raw_mime_content = extra_flags.get('raw_mime_content', False)
|
202
|
+
display_modality_when_hover = extra_flags.get(
|
203
|
+
'display_modality_when_hover', False
|
204
|
+
)
|
205
|
+
if raw_mime_content:
|
206
|
+
kwargs['enable_summary'] = False
|
207
|
+
elif display_modality_when_hover:
|
208
|
+
kwargs.update(
|
209
|
+
enable_summary=True,
|
210
|
+
enable_summary_tooltip=True,
|
211
|
+
)
|
212
|
+
return super()._html_tree_view(
|
213
|
+
view=view, extra_flags=extra_flags, **kwargs
|
214
|
+
)
|
215
|
+
|
216
|
+
def _html_tree_view_summary(
|
217
|
+
self,
|
218
|
+
*,
|
219
|
+
view: pg.views.HtmlTreeView,
|
220
|
+
extra_flags: dict[str, Any] | None = None,
|
221
|
+
**kwargs
|
222
|
+
):
|
223
|
+
extra_flags = extra_flags or {}
|
224
|
+
if extra_flags.get('display_modality_when_hover', False):
|
225
|
+
def summary_tooltip(*args, content: str | None = None, **kwargs):
|
226
|
+
del content
|
227
|
+
return view.tooltip(*args, content=self._raw_html(), **kwargs)
|
228
|
+
else:
|
229
|
+
summary_tooltip = None
|
230
|
+
return super()._html_tree_view_summary(
|
231
|
+
view=view,
|
232
|
+
summary_tooltip_fn=summary_tooltip,
|
233
|
+
extra_flags=extra_flags,
|
234
|
+
**kwargs
|
235
|
+
)
|
236
|
+
|
237
|
+
def _raw_html(self) -> str:
|
238
|
+
if self.uri and self.uri.lower().startswith(('http:', 'https:', 'ftp:')):
|
239
|
+
uri = self.uri
|
240
|
+
else:
|
241
|
+
uri = self.content_uri
|
242
|
+
return self._mime_control_for(uri)
|
243
|
+
|
244
|
+
def _mime_control_for(self, uri) -> str:
|
245
|
+
return f'<embed type="{self.mime_type}" src="{uri}"/>'
|
246
|
+
|
247
|
+
|
248
|
+
@pg.use_init_args(['mime', 'content', 'uri'])
|
249
|
+
class Custom(Mime):
|
68
250
|
"""Custom MIME data."""
|
69
251
|
|
70
|
-
|
252
|
+
mime: Annotated[
|
71
253
|
str, 'The MIME type of the data. E.g. text/plain, or image/png. '
|
72
254
|
]
|
73
255
|
|
74
256
|
@property
|
75
257
|
def mime_type(self) -> str:
|
76
|
-
return self.
|
77
|
-
|
78
|
-
|
79
|
-
class PDF(Custom):
|
80
|
-
"""PDF document."""
|
81
|
-
type = 'application/pdf'
|
258
|
+
return self.mime
|
@@ -12,48 +12,129 @@
|
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
14
|
"""MIME tests."""
|
15
|
+
import inspect
|
15
16
|
import unittest
|
16
17
|
from unittest import mock
|
17
18
|
|
19
|
+
import langfun.core as lf
|
18
20
|
from langfun.core.modalities import mime
|
19
21
|
import pyglove as pg
|
20
22
|
|
21
23
|
|
22
24
|
def mock_request(*args, **kwargs):
|
23
25
|
del args, kwargs
|
24
|
-
return pg.Dict(content='foo')
|
26
|
+
return pg.Dict(content=b'foo')
|
25
27
|
|
26
28
|
|
27
29
|
def mock_readfile(*args, **kwargs):
|
28
30
|
del args, kwargs
|
29
|
-
return 'bar'
|
31
|
+
return b'bar'
|
30
32
|
|
31
33
|
|
32
34
|
class CustomMimeTest(unittest.TestCase):
|
33
35
|
|
34
|
-
def
|
35
|
-
|
36
|
-
self.
|
36
|
+
def test_is_text(self):
|
37
|
+
self.assertTrue(mime.Custom('text/plain', b'foo').is_text)
|
38
|
+
self.assertTrue(mime.Custom('text/xml', b'foo').is_text)
|
39
|
+
self.assertTrue(mime.Custom('application/json', b'foo').is_text)
|
40
|
+
self.assertTrue(mime.Custom('application/x-python-code', b'foo').is_text)
|
41
|
+
self.assertFalse(mime.Custom('application/pdf', b'foo').is_text)
|
42
|
+
self.assertFalse(mime.Custom('application/octet-stream', b'foo').is_text)
|
43
|
+
|
44
|
+
def test_from_byes(self):
|
45
|
+
content = mime.Mime.from_bytes(b'hello')
|
46
|
+
self.assertIs(content.__class__, mime.Mime)
|
47
|
+
|
48
|
+
content = mime.Custom('text/plain', b'foo')
|
49
|
+
self.assertEqual(content.to_bytes(), b'foo')
|
37
50
|
self.assertEqual(content.mime_type, 'text/plain')
|
51
|
+
self.assertTrue(content.is_text)
|
52
|
+
self.assertFalse(content.is_binary)
|
53
|
+
self.assertEqual(content.to_text(), 'foo')
|
54
|
+
self.assertTrue(content.is_compatible('text/plain'))
|
55
|
+
self.assertFalse(content.is_compatible('text/xml'))
|
56
|
+
self.assertIs(content.make_compatible('text/plain'), content)
|
57
|
+
|
58
|
+
with self.assertRaisesRegex(
|
59
|
+
lf.ModalityError, '.* cannot be converted to supported types'
|
60
|
+
):
|
61
|
+
content.make_compatible('application/pdf')
|
38
62
|
|
39
63
|
with self.assertRaisesRegex(
|
40
64
|
ValueError, 'Either uri or content must be provided.'
|
41
65
|
):
|
42
66
|
mime.Custom('text/plain')
|
43
67
|
|
68
|
+
def test_uri(self):
|
69
|
+
content = mime.Custom.from_uri('http://mock/web/a.txt', mime='text/plain')
|
70
|
+
with mock.patch('requests.get') as mock_requests_stub:
|
71
|
+
mock_requests_stub.side_effect = mock_request
|
72
|
+
self.assertEqual(content.uri, 'http://mock/web/a.txt')
|
73
|
+
self.assertEqual(content.content_uri, 'data:text/plain;base64,Zm9v')
|
74
|
+
self.assertEqual(content.embeddable_uri, 'http://mock/web/a.txt')
|
75
|
+
|
76
|
+
content = mime.Custom.from_uri('a.txt', mime='text/plain')
|
77
|
+
with mock.patch('pyglove.io.readfile') as mock_readfile_stub:
|
78
|
+
mock_readfile_stub.side_effect = mock_readfile
|
79
|
+
self.assertEqual(content.uri, 'a.txt')
|
80
|
+
self.assertEqual(content.content_uri, 'data:text/plain;base64,YmFy')
|
81
|
+
self.assertEqual(content.embeddable_uri, 'data:text/plain;base64,YmFy')
|
82
|
+
|
44
83
|
def test_from_uri(self):
|
45
|
-
content = mime.Custom.from_uri('http://mock/web/a.txt',
|
84
|
+
content = mime.Custom.from_uri('http://mock/web/a.txt', mime='text/plain')
|
46
85
|
with mock.patch('requests.get') as mock_requests_stub:
|
47
86
|
mock_requests_stub.side_effect = mock_request
|
48
|
-
self.assertEqual(content.to_bytes(), 'foo')
|
87
|
+
self.assertEqual(content.to_bytes(), b'foo')
|
49
88
|
self.assertEqual(content.mime_type, 'text/plain')
|
50
89
|
|
51
|
-
content = mime.Custom.from_uri('a.txt',
|
90
|
+
content = mime.Custom.from_uri('a.txt', mime='text/plain')
|
52
91
|
with mock.patch('pyglove.io.readfile') as mock_readfile_stub:
|
53
92
|
mock_readfile_stub.side_effect = mock_readfile
|
54
|
-
self.assertEqual(content.to_bytes(), 'bar')
|
93
|
+
self.assertEqual(content.to_bytes(), b'bar')
|
55
94
|
self.assertEqual(content.mime_type, 'text/plain')
|
56
95
|
|
96
|
+
def assert_html_content(self, html, expected):
|
97
|
+
expected = inspect.cleandoc(expected).strip()
|
98
|
+
actual = html.content.strip()
|
99
|
+
if actual != expected:
|
100
|
+
print(actual)
|
101
|
+
self.assertEqual(actual, expected)
|
102
|
+
|
103
|
+
def test_html(self):
|
104
|
+
self.assert_html_content(
|
105
|
+
mime.Custom('text/plain', b'foo').to_html(
|
106
|
+
enable_summary_tooltip=False,
|
107
|
+
enable_key_tooltip=False,
|
108
|
+
),
|
109
|
+
"""
|
110
|
+
<details open class="pyglove custom"><summary><div class="summary-title">Custom(...)</div></summary><embed type="text/plain" src="data:text/plain;base64,Zm9v"/></details>
|
111
|
+
"""
|
112
|
+
)
|
113
|
+
self.assert_html_content(
|
114
|
+
mime.Custom('text/plain', b'foo').to_html(
|
115
|
+
enable_summary_tooltip=False,
|
116
|
+
enable_key_tooltip=False,
|
117
|
+
extra_flags=dict(
|
118
|
+
raw_mime_content=True,
|
119
|
+
)
|
120
|
+
),
|
121
|
+
"""
|
122
|
+
<embed type="text/plain" src="data:text/plain;base64,Zm9v"/>
|
123
|
+
"""
|
124
|
+
)
|
125
|
+
self.assert_html_content(
|
126
|
+
mime.Custom('text/plain', b'foo').to_html(
|
127
|
+
enable_summary_tooltip=False,
|
128
|
+
enable_key_tooltip=False,
|
129
|
+
extra_flags=dict(
|
130
|
+
display_modality_when_hover=True,
|
131
|
+
)
|
132
|
+
),
|
133
|
+
"""
|
134
|
+
<details open class="pyglove custom"><summary><div class="summary-title">Custom(...)</div><span class="tooltip"><embed type="text/plain" src="data:text/plain;base64,Zm9v"/></span></summary><embed type="text/plain" src="data:text/plain;base64,Zm9v"/></details>
|
135
|
+
"""
|
136
|
+
)
|
137
|
+
|
57
138
|
|
58
139
|
if __name__ == '__main__':
|
59
140
|
unittest.main()
|
@@ -0,0 +1,117 @@
|
|
1
|
+
# Copyright 2023 The Langfun Authors
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
"""Microsoft Office file types."""
|
15
|
+
|
16
|
+
import base64
|
17
|
+
import io
|
18
|
+
import os
|
19
|
+
from typing import Iterable
|
20
|
+
from langfun.core.modalities import mime
|
21
|
+
from langfun.core.modalities import pdf
|
22
|
+
import requests
|
23
|
+
|
24
|
+
|
25
|
+
class Xlsx(mime.Mime):
|
26
|
+
"""Xlsx file type."""
|
27
|
+
|
28
|
+
MIME_PREFIX = (
|
29
|
+
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
|
30
|
+
)
|
31
|
+
|
32
|
+
def _raw_html(self) -> str:
|
33
|
+
try:
|
34
|
+
import pandas as pd # pylint: disable=g-import-not-at-top
|
35
|
+
import openpyxl # pylint: disable=g-import-not-at-top, unused-import
|
36
|
+
df = pd.read_excel(io.BytesIO(self.to_bytes()))
|
37
|
+
return df.to_html()
|
38
|
+
except ImportError as e:
|
39
|
+
raise RuntimeError(
|
40
|
+
'Please install "langfun[mime-xlsx]" to enable XLSX support.'
|
41
|
+
) from e
|
42
|
+
|
43
|
+
def _is_compatible(self, mime_types: Iterable[str]) -> bool:
|
44
|
+
return bool(set(mime_types).intersection([
|
45
|
+
'text/html',
|
46
|
+
'text/plain',
|
47
|
+
]))
|
48
|
+
|
49
|
+
def _make_compatible(self, mime_types: Iterable[str]) -> mime.Mime:
|
50
|
+
"""Returns the MimeType of the converted file."""
|
51
|
+
del mime_types
|
52
|
+
return mime.Mime(uri=self.uri, content=self._raw_html())
|
53
|
+
|
54
|
+
|
55
|
+
class Docx(mime.Mime):
|
56
|
+
"""Docx file type."""
|
57
|
+
|
58
|
+
MIME_PREFIX = (
|
59
|
+
'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
|
60
|
+
)
|
61
|
+
|
62
|
+
def to_xml(self) -> str:
|
63
|
+
try:
|
64
|
+
import docx # pylint: disable=g-import-not-at-top
|
65
|
+
doc = docx.Document(io.BytesIO(self.to_bytes()))
|
66
|
+
return str(doc.element.xml)
|
67
|
+
except ImportError as e:
|
68
|
+
raise RuntimeError(
|
69
|
+
'Please install "langfun[mime-docx]" to enable Docx support.'
|
70
|
+
) from e
|
71
|
+
|
72
|
+
def _repr_html_(self) -> str:
|
73
|
+
return self.to_xml()
|
74
|
+
|
75
|
+
def _is_compatible(self, mime_types: Iterable[str]) -> bool:
|
76
|
+
return bool(set(mime_types).intersection([
|
77
|
+
'application/xml',
|
78
|
+
'text/xml',
|
79
|
+
'text/plain',
|
80
|
+
]))
|
81
|
+
|
82
|
+
def _make_compatible(self, mime_types: Iterable[str]) -> mime.Mime:
|
83
|
+
"""Returns the MimeType of the converted file."""
|
84
|
+
del mime_types
|
85
|
+
return mime.Mime(uri=self.uri, content=self.to_xml())
|
86
|
+
|
87
|
+
|
88
|
+
class Pptx(mime.Mime):
|
89
|
+
"""Pptx file type."""
|
90
|
+
|
91
|
+
MIME_PREFIX = 'application/vnd.openxmlformats-officedocument.presentationml.presentation'
|
92
|
+
API_URL = 'https://v2.convertapi.com/convert/pptx/to/pdf'
|
93
|
+
|
94
|
+
def to_pdf(self, convert_api_key: str | None = None) -> pdf.PDF:
|
95
|
+
api_key = convert_api_key or os.environ.get('CONVERT_API_KEY')
|
96
|
+
url = f'{self.API_URL}?Secret={api_key}'
|
97
|
+
|
98
|
+
json = {
|
99
|
+
'Parameters': [{
|
100
|
+
'Name': 'File',
|
101
|
+
'FileValue': {
|
102
|
+
'Name': os.path.basename(self.uri) if self.uri else 'tmp.pptx',
|
103
|
+
'Data': base64.b64encode(self.to_bytes()).decode('utf-8'),
|
104
|
+
},
|
105
|
+
}]
|
106
|
+
}
|
107
|
+
response = requests.post(url, json=json).json()
|
108
|
+
base64_pdf = response['Files'][0]['FileData']
|
109
|
+
return pdf.PDF.from_bytes(base64.b64decode(base64_pdf))
|
110
|
+
|
111
|
+
def _is_compatible(self, mime_types: Iterable[str]) -> bool:
|
112
|
+
return 'application/pdf' in mime_types
|
113
|
+
|
114
|
+
def _make_compatible(self, mime_types: Iterable[str]) -> mime.Mime:
|
115
|
+
"""Returns the MimeType of the converted file."""
|
116
|
+
del mime_types
|
117
|
+
return self.to_pdf()
|