langfun 0.0.2.dev20240527__py3-none-any.whl → 0.0.2.dev20240531__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of langfun might be problematic. Click here for more details.
- langfun/__init__.py +2 -0
- langfun/core/language_model_test.py +3 -2
- langfun/core/llms/google_genai_test.py +1 -1
- langfun/core/llms/openai_test.py +1 -1
- langfun/core/llms/vertexai_test.py +1 -1
- langfun/core/message.py +1 -1
- langfun/core/message_test.py +12 -9
- langfun/core/modalities/__init__.py +4 -0
- langfun/core/modalities/audio.py +30 -0
- langfun/core/modalities/audio_test.py +63 -0
- langfun/core/modalities/ms_office.py +83 -0
- langfun/core/modalities/ms_office_test.py +312 -0
- langfun/core/modality.py +2 -2
- langfun/core/modality_test.py +1 -1
- langfun/core/structured/__init__.py +2 -0
- langfun/core/structured/completion.py +3 -1
- langfun/core/structured/completion_test.py +2 -2
- langfun/core/structured/mapping.py +1 -5
- langfun/core/structured/prompting.py +0 -4
- langfun/core/structured/prompting_test.py +8 -6
- langfun/core/structured/schema.py +88 -42
- langfun/core/structured/schema_test.py +87 -34
- langfun/core/template_test.py +1 -1
- {langfun-0.0.2.dev20240527.dist-info → langfun-0.0.2.dev20240531.dist-info}/METADATA +3 -1
- {langfun-0.0.2.dev20240527.dist-info → langfun-0.0.2.dev20240531.dist-info}/RECORD +28 -24
- {langfun-0.0.2.dev20240527.dist-info → langfun-0.0.2.dev20240531.dist-info}/LICENSE +0 -0
- {langfun-0.0.2.dev20240527.dist-info → langfun-0.0.2.dev20240531.dist-info}/WHEEL +0 -0
- {langfun-0.0.2.dev20240527.dist-info → langfun-0.0.2.dev20240531.dist-info}/top_level.txt +0 -0
langfun/__init__.py
CHANGED
@@ -418,8 +418,9 @@ class LanguageModelTest(unittest.TestCase):
|
|
418
418
|
with contextlib.redirect_stdout(string_io):
|
419
419
|
self.assertEqual(
|
420
420
|
lm(message_lib.UserMessage(
|
421
|
-
'hi
|
422
|
-
'hi
|
421
|
+
'hi <<[[image]]>>', image=Image()), debug=True),
|
422
|
+
'hi <<[[image]]>>'
|
423
|
+
)
|
423
424
|
|
424
425
|
debug_info = string_io.getvalue()
|
425
426
|
self.assertIn('[0] LM INFO', debug_info)
|
@@ -102,7 +102,7 @@ class GenAITest(unittest.TestCase):
|
|
102
102
|
|
103
103
|
def test_content_from_message_mm(self):
|
104
104
|
message = lf.UserMessage(
|
105
|
-
'This is an
|
105
|
+
'This is an <<[[image]]>>, what is it?',
|
106
106
|
image=lf_modalities.Image.from_bytes(example_image),
|
107
107
|
)
|
108
108
|
|
langfun/core/llms/openai_test.py
CHANGED
@@ -74,7 +74,7 @@ class VertexAITest(unittest.TestCase):
|
|
74
74
|
|
75
75
|
def test_content_from_message_mm(self):
|
76
76
|
message = lf.UserMessage(
|
77
|
-
'This is an
|
77
|
+
'This is an <<[[image]]>>, what is it?',
|
78
78
|
image=lf_modalities.Image.from_bytes(example_image),
|
79
79
|
)
|
80
80
|
|
langfun/core/message.py
CHANGED
@@ -144,7 +144,7 @@ class Message(natural_language.NaturalLanguageFormattable, pg.Object):
|
|
144
144
|
def from_value(cls, value: Union[str, 'Message']) -> 'Message':
|
145
145
|
"""Creates a message from a value or return value itself if a Message."""
|
146
146
|
if isinstance(value, modality.Modality):
|
147
|
-
return cls('
|
147
|
+
return cls('<<[[object]]>>', object=value)
|
148
148
|
if isinstance(value, Message):
|
149
149
|
return value
|
150
150
|
return cls(value)
|
langfun/core/message_test.py
CHANGED
@@ -54,7 +54,7 @@ class MessageTest(unittest.TestCase):
|
|
54
54
|
self.assertTrue(
|
55
55
|
pg.eq(
|
56
56
|
message.UserMessage.from_value(CustomModality('foo')),
|
57
|
-
message.UserMessage('
|
57
|
+
message.UserMessage('<<[[object]]>>', object=CustomModality('foo')),
|
58
58
|
)
|
59
59
|
)
|
60
60
|
m = message.UserMessage('hi')
|
@@ -258,13 +258,16 @@ class MessageTest(unittest.TestCase):
|
|
258
258
|
|
259
259
|
def test_referred_modalities(self):
|
260
260
|
m1 = message.UserMessage(
|
261
|
-
'hi, this is a
|
261
|
+
'hi, this is a <<[[img1]]>> and <<[[x.img2]]>>',
|
262
262
|
img1=CustomModality('foo'),
|
263
263
|
x=dict(img2=CustomModality('bar')),
|
264
264
|
)
|
265
265
|
m2 = message.SystemMessage('class Question:\n image={{img1}}', source=m1)
|
266
266
|
m3 = message.AIMessage(
|
267
|
-
|
267
|
+
(
|
268
|
+
'This is the <<[[output_image]]>> based on <<[[x.img2]]>>, '
|
269
|
+
'{{unknown_var}}'
|
270
|
+
),
|
268
271
|
output_image=CustomModality('bar'),
|
269
272
|
source=m2,
|
270
273
|
)
|
@@ -279,8 +282,8 @@ class MessageTest(unittest.TestCase):
|
|
279
282
|
def test_chunking(self):
|
280
283
|
m = message.UserMessage(
|
281
284
|
inspect.cleandoc("""
|
282
|
-
Hi, this is
|
283
|
-
|
285
|
+
Hi, this is <<[[a]]>> and this is {{b}}.
|
286
|
+
<<[[x.c]]>> {{something else
|
284
287
|
"""),
|
285
288
|
a=CustomModality('foo'),
|
286
289
|
x=dict(c=CustomModality('bar')),
|
@@ -294,7 +297,7 @@ class MessageTest(unittest.TestCase):
|
|
294
297
|
CustomModality('foo'),
|
295
298
|
'and this is {{b}}.',
|
296
299
|
CustomModality('bar'),
|
297
|
-
'
|
300
|
+
'{{something else',
|
298
301
|
],
|
299
302
|
)
|
300
303
|
)
|
@@ -304,10 +307,10 @@ class MessageTest(unittest.TestCase):
|
|
304
307
|
message.AIMessage(
|
305
308
|
inspect.cleandoc("""
|
306
309
|
Hi, this is
|
307
|
-
|
310
|
+
<<[[obj0]]>>
|
308
311
|
and this is {{b}}.
|
309
|
-
|
310
|
-
|
312
|
+
<<[[obj1]]>>
|
313
|
+
{{something else
|
311
314
|
"""),
|
312
315
|
obj0=pg.Ref(m.a),
|
313
316
|
obj1=pg.Ref(m.x.c),
|
@@ -17,8 +17,12 @@
|
|
17
17
|
# pylint: disable=g-bad-import-order
|
18
18
|
# pylint: disable=g-import-not-at-top
|
19
19
|
|
20
|
+
from langfun.core.modalities.audio import Audio
|
20
21
|
from langfun.core.modalities.mime import MimeType
|
21
22
|
from langfun.core.modalities.mime import Custom
|
23
|
+
from langfun.core.modalities.ms_office import Docx
|
24
|
+
from langfun.core.modalities.ms_office import Pptx
|
25
|
+
from langfun.core.modalities.ms_office import Xlsx
|
22
26
|
from langfun.core.modalities.image import Image
|
23
27
|
from langfun.core.modalities.pdf import PDF
|
24
28
|
from langfun.core.modalities.video import Video
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# Copyright 2024 The Langfun Authors
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
"""Audio types."""
|
15
|
+
|
16
|
+
import functools
|
17
|
+
from langfun.core.modalities import mime
|
18
|
+
|
19
|
+
|
20
|
+
class Audio(mime.MimeType):
|
21
|
+
"""Audio."""
|
22
|
+
|
23
|
+
MIME_PREFIX = 'audio'
|
24
|
+
|
25
|
+
@functools.cached_property
|
26
|
+
def audio_format(self) -> str:
|
27
|
+
return self.mime_type.removeprefix(self.MIME_PREFIX + '/')
|
28
|
+
|
29
|
+
def _html(self, uri: str) -> str:
|
30
|
+
return f'<audio controls> <source src="{uri}"> </audio>'
|
@@ -0,0 +1,63 @@
|
|
1
|
+
# Copyright 2024 The Langfun Authors
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
"""Audio tests."""
|
15
|
+
import unittest
|
16
|
+
from unittest import mock
|
17
|
+
|
18
|
+
from langfun.core.modalities import audio as audio_lib
|
19
|
+
import pyglove as pg
|
20
|
+
|
21
|
+
|
22
|
+
content_bytes = (
|
23
|
+
b'RIFF$\x00\x00\x00WAVEfmt'
|
24
|
+
b' \x10\x00\x00\x00\x01\x00\x01\x00\x00\x04\x00\x00\x00\x04\x00\x00\x01\x00\x08\x00data\x00\x00\x00\x00'
|
25
|
+
)
|
26
|
+
|
27
|
+
|
28
|
+
def mock_request(*args, **kwargs):
|
29
|
+
del args, kwargs
|
30
|
+
return pg.Dict(content=content_bytes)
|
31
|
+
|
32
|
+
|
33
|
+
class AudioTest(unittest.TestCase):
|
34
|
+
|
35
|
+
def test_audio_content(self):
|
36
|
+
audio = audio_lib.Audio.from_bytes(content_bytes)
|
37
|
+
self.assertEqual(audio.mime_type, 'audio/x-wav')
|
38
|
+
self.assertEqual(audio.audio_format, 'x-wav')
|
39
|
+
self.assertEqual(audio.to_bytes(), content_bytes)
|
40
|
+
|
41
|
+
def test_bad_audio(self):
|
42
|
+
audio = audio_lib.Audio.from_bytes(b'bad')
|
43
|
+
with self.assertRaisesRegex(ValueError, 'Expected MIME type'):
|
44
|
+
_ = audio.audio_format
|
45
|
+
|
46
|
+
|
47
|
+
class AudioFileTest(unittest.TestCase):
|
48
|
+
|
49
|
+
def test_audio_file(self):
|
50
|
+
audio = audio_lib.Audio.from_uri('http://mock/web/a.wav')
|
51
|
+
with mock.patch('requests.get') as mock_requests_get:
|
52
|
+
mock_requests_get.side_effect = mock_request
|
53
|
+
self.assertEqual(audio.audio_format, 'x-wav')
|
54
|
+
self.assertEqual(audio.mime_type, 'audio/x-wav')
|
55
|
+
self.assertEqual(
|
56
|
+
audio._repr_html_(),
|
57
|
+
'<audio controls> <source src="http://mock/web/a.wav"> </audio>',
|
58
|
+
)
|
59
|
+
self.assertEqual(audio.to_bytes(), content_bytes)
|
60
|
+
|
61
|
+
|
62
|
+
if __name__ == '__main__':
|
63
|
+
unittest.main()
|
@@ -0,0 +1,83 @@
|
|
1
|
+
# Copyright 2023 The Langfun Authors
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
"""Microsoft Office file types."""
|
15
|
+
|
16
|
+
import base64
|
17
|
+
import io
|
18
|
+
import os
|
19
|
+
from langfun.core.modalities import mime
|
20
|
+
from langfun.core.modalities import pdf
|
21
|
+
import requests
|
22
|
+
|
23
|
+
|
24
|
+
class Xlsx(mime.MimeType):
|
25
|
+
"""Xlsx file type."""
|
26
|
+
|
27
|
+
MIME_PREFIX = (
|
28
|
+
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
|
29
|
+
)
|
30
|
+
|
31
|
+
def to_html(self) -> str:
|
32
|
+
import pandas as pd # pylint: disable=g-import-not-at-top
|
33
|
+
|
34
|
+
df = pd.read_excel(io.BytesIO(self.to_bytes()))
|
35
|
+
return df.to_html()
|
36
|
+
|
37
|
+
def _repr_html_(self) -> str:
|
38
|
+
return self.to_html()
|
39
|
+
|
40
|
+
|
41
|
+
class Docx(mime.MimeType):
|
42
|
+
"""Docx file type."""
|
43
|
+
|
44
|
+
MIME_PREFIX = (
|
45
|
+
'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
|
46
|
+
)
|
47
|
+
|
48
|
+
def to_xml(self) -> str:
|
49
|
+
import docx # pylint: disable=g-import-not-at-top
|
50
|
+
|
51
|
+
doc = docx.Document(io.BytesIO(self.to_bytes()))
|
52
|
+
return str(doc.element.xml)
|
53
|
+
|
54
|
+
def _repr_html_(self) -> str:
|
55
|
+
return self.to_xml()
|
56
|
+
|
57
|
+
|
58
|
+
class Pptx(mime.MimeType):
|
59
|
+
"""Pptx file type."""
|
60
|
+
|
61
|
+
MIME_PREFIX = 'application/vnd.openxmlformats-officedocument.presentationml.presentation'
|
62
|
+
API_URL = 'https://v2.convertapi.com/convert/pptx/to/pdf'
|
63
|
+
|
64
|
+
def to_pdf(self, convert_api_key: str | None = None) -> pdf.PDF:
|
65
|
+
filename = os.path.basename(self.uri)
|
66
|
+
file_bytes = self.to_bytes()
|
67
|
+
|
68
|
+
api_key = convert_api_key or os.environ.get('CONVERT_API_KEY')
|
69
|
+
url = f'{self.API_URL}?Secret={api_key}'
|
70
|
+
|
71
|
+
json = {
|
72
|
+
'Parameters': [{
|
73
|
+
'Name': 'File',
|
74
|
+
'FileValue': {
|
75
|
+
'Name': filename,
|
76
|
+
'Data': base64.b64encode(file_bytes),
|
77
|
+
},
|
78
|
+
}]
|
79
|
+
}
|
80
|
+
response = requests.post(url, json=json).json()
|
81
|
+
base64_pdf = response['Files'][0]['FileData']
|
82
|
+
pdf_bytes = base64.b64decode(base64_pdf)
|
83
|
+
return pdf.PDF.from_bytes(content=pdf_bytes)
|