langfun 0.0.2.dev20240527__py3-none-any.whl → 0.0.2.dev20240531__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of langfun might be problematic. Click here for more details.

langfun/__init__.py CHANGED
@@ -23,6 +23,8 @@ Schema = structured.Schema
23
23
  MISSING = structured.MISSING
24
24
  UNKNOWN = structured.UNKNOWN
25
25
 
26
+ include_method_in_prompt = structured.include_method_in_prompt
27
+
26
28
  MappingExample = structured.MappingExample
27
29
 
28
30
  call = structured.call
@@ -418,8 +418,9 @@ class LanguageModelTest(unittest.TestCase):
418
418
  with contextlib.redirect_stdout(string_io):
419
419
  self.assertEqual(
420
420
  lm(message_lib.UserMessage(
421
- 'hi {{image}}', image=Image()), debug=True),
422
- 'hi {{image}}')
421
+ 'hi <<[[image]]>>', image=Image()), debug=True),
422
+ 'hi <<[[image]]>>'
423
+ )
423
424
 
424
425
  debug_info = string_io.getvalue()
425
426
  self.assertIn('[0] LM INFO', debug_info)
@@ -102,7 +102,7 @@ class GenAITest(unittest.TestCase):
102
102
 
103
103
  def test_content_from_message_mm(self):
104
104
  message = lf.UserMessage(
105
- 'This is an {{image}}, what is it?',
105
+ 'This is an <<[[image]]>>, what is it?',
106
106
  image=lf_modalities.Image.from_bytes(example_image),
107
107
  )
108
108
 
@@ -164,7 +164,7 @@ class OpenAITest(unittest.TestCase):
164
164
  self.assertEqual(
165
165
  lm(
166
166
  lf.UserMessage(
167
- 'hello {{image}}',
167
+ 'hello <<[[image]]>>',
168
168
  image=lf_modalities.Image.from_uri('https://fake/image')
169
169
  ),
170
170
  sampling_options=lf.LMSamplingOptions(n=2)
@@ -74,7 +74,7 @@ class VertexAITest(unittest.TestCase):
74
74
 
75
75
  def test_content_from_message_mm(self):
76
76
  message = lf.UserMessage(
77
- 'This is an {{image}}, what is it?',
77
+ 'This is an <<[[image]]>>, what is it?',
78
78
  image=lf_modalities.Image.from_bytes(example_image),
79
79
  )
80
80
 
langfun/core/message.py CHANGED
@@ -144,7 +144,7 @@ class Message(natural_language.NaturalLanguageFormattable, pg.Object):
144
144
  def from_value(cls, value: Union[str, 'Message']) -> 'Message':
145
145
  """Creates a message from a value or return value itself if a Message."""
146
146
  if isinstance(value, modality.Modality):
147
- return cls('{{object}}', object=value)
147
+ return cls('<<[[object]]>>', object=value)
148
148
  if isinstance(value, Message):
149
149
  return value
150
150
  return cls(value)
@@ -54,7 +54,7 @@ class MessageTest(unittest.TestCase):
54
54
  self.assertTrue(
55
55
  pg.eq(
56
56
  message.UserMessage.from_value(CustomModality('foo')),
57
- message.UserMessage('{{object}}', object=CustomModality('foo')),
57
+ message.UserMessage('<<[[object]]>>', object=CustomModality('foo')),
58
58
  )
59
59
  )
60
60
  m = message.UserMessage('hi')
@@ -258,13 +258,16 @@ class MessageTest(unittest.TestCase):
258
258
 
259
259
  def test_referred_modalities(self):
260
260
  m1 = message.UserMessage(
261
- 'hi, this is a {{img1}} and {{x.img2}}',
261
+ 'hi, this is a <<[[img1]]>> and <<[[x.img2]]>>',
262
262
  img1=CustomModality('foo'),
263
263
  x=dict(img2=CustomModality('bar')),
264
264
  )
265
265
  m2 = message.SystemMessage('class Question:\n image={{img1}}', source=m1)
266
266
  m3 = message.AIMessage(
267
- 'This is the {{output_image}} based on {{x.img2}}, {{unknown_var}}',
267
+ (
268
+ 'This is the <<[[output_image]]>> based on <<[[x.img2]]>>, '
269
+ '{{unknown_var}}'
270
+ ),
268
271
  output_image=CustomModality('bar'),
269
272
  source=m2,
270
273
  )
@@ -279,8 +282,8 @@ class MessageTest(unittest.TestCase):
279
282
  def test_chunking(self):
280
283
  m = message.UserMessage(
281
284
  inspect.cleandoc("""
282
- Hi, this is {{a}} and this is {{b}}.
283
- {{x.c}} {{something else
285
+ Hi, this is <<[[a]]>> and this is {{b}}.
286
+ <<[[x.c]]>> {{something else
284
287
  """),
285
288
  a=CustomModality('foo'),
286
289
  x=dict(c=CustomModality('bar')),
@@ -294,7 +297,7 @@ class MessageTest(unittest.TestCase):
294
297
  CustomModality('foo'),
295
298
  'and this is {{b}}.',
296
299
  CustomModality('bar'),
297
- ' {{something else',
300
+ '{{something else',
298
301
  ],
299
302
  )
300
303
  )
@@ -304,10 +307,10 @@ class MessageTest(unittest.TestCase):
304
307
  message.AIMessage(
305
308
  inspect.cleandoc("""
306
309
  Hi, this is
307
- {{obj0}}
310
+ <<[[obj0]]>>
308
311
  and this is {{b}}.
309
- {{obj1}}
310
- {{something else
312
+ <<[[obj1]]>>
313
+ {{something else
311
314
  """),
312
315
  obj0=pg.Ref(m.a),
313
316
  obj1=pg.Ref(m.x.c),
@@ -17,8 +17,12 @@
17
17
  # pylint: disable=g-bad-import-order
18
18
  # pylint: disable=g-import-not-at-top
19
19
 
20
+ from langfun.core.modalities.audio import Audio
20
21
  from langfun.core.modalities.mime import MimeType
21
22
  from langfun.core.modalities.mime import Custom
23
+ from langfun.core.modalities.ms_office import Docx
24
+ from langfun.core.modalities.ms_office import Pptx
25
+ from langfun.core.modalities.ms_office import Xlsx
22
26
  from langfun.core.modalities.image import Image
23
27
  from langfun.core.modalities.pdf import PDF
24
28
  from langfun.core.modalities.video import Video
@@ -0,0 +1,30 @@
1
+ # Copyright 2024 The Langfun Authors
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """Audio types."""
15
+
16
+ import functools
17
+ from langfun.core.modalities import mime
18
+
19
+
20
+ class Audio(mime.MimeType):
21
+ """Audio."""
22
+
23
+ MIME_PREFIX = 'audio'
24
+
25
+ @functools.cached_property
26
+ def audio_format(self) -> str:
27
+ return self.mime_type.removeprefix(self.MIME_PREFIX + '/')
28
+
29
+ def _html(self, uri: str) -> str:
30
+ return f'<audio controls> <source src="{uri}"> </audio>'
@@ -0,0 +1,63 @@
1
+ # Copyright 2024 The Langfun Authors
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """Audio tests."""
15
+ import unittest
16
+ from unittest import mock
17
+
18
+ from langfun.core.modalities import audio as audio_lib
19
+ import pyglove as pg
20
+
21
+
22
+ content_bytes = (
23
+ b'RIFF$\x00\x00\x00WAVEfmt'
24
+ b' \x10\x00\x00\x00\x01\x00\x01\x00\x00\x04\x00\x00\x00\x04\x00\x00\x01\x00\x08\x00data\x00\x00\x00\x00'
25
+ )
26
+
27
+
28
+ def mock_request(*args, **kwargs):
29
+ del args, kwargs
30
+ return pg.Dict(content=content_bytes)
31
+
32
+
33
+ class AudioTest(unittest.TestCase):
34
+
35
+ def test_audio_content(self):
36
+ audio = audio_lib.Audio.from_bytes(content_bytes)
37
+ self.assertEqual(audio.mime_type, 'audio/x-wav')
38
+ self.assertEqual(audio.audio_format, 'x-wav')
39
+ self.assertEqual(audio.to_bytes(), content_bytes)
40
+
41
+ def test_bad_audio(self):
42
+ audio = audio_lib.Audio.from_bytes(b'bad')
43
+ with self.assertRaisesRegex(ValueError, 'Expected MIME type'):
44
+ _ = audio.audio_format
45
+
46
+
47
+ class AudioFileTest(unittest.TestCase):
48
+
49
+ def test_audio_file(self):
50
+ audio = audio_lib.Audio.from_uri('http://mock/web/a.wav')
51
+ with mock.patch('requests.get') as mock_requests_get:
52
+ mock_requests_get.side_effect = mock_request
53
+ self.assertEqual(audio.audio_format, 'x-wav')
54
+ self.assertEqual(audio.mime_type, 'audio/x-wav')
55
+ self.assertEqual(
56
+ audio._repr_html_(),
57
+ '<audio controls> <source src="http://mock/web/a.wav"> </audio>',
58
+ )
59
+ self.assertEqual(audio.to_bytes(), content_bytes)
60
+
61
+
62
+ if __name__ == '__main__':
63
+ unittest.main()
@@ -0,0 +1,83 @@
1
+ # Copyright 2023 The Langfun Authors
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """Microsoft Office file types."""
15
+
16
+ import base64
17
+ import io
18
+ import os
19
+ from langfun.core.modalities import mime
20
+ from langfun.core.modalities import pdf
21
+ import requests
22
+
23
+
24
+ class Xlsx(mime.MimeType):
25
+ """Xlsx file type."""
26
+
27
+ MIME_PREFIX = (
28
+ 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
29
+ )
30
+
31
+ def to_html(self) -> str:
32
+ import pandas as pd # pylint: disable=g-import-not-at-top
33
+
34
+ df = pd.read_excel(io.BytesIO(self.to_bytes()))
35
+ return df.to_html()
36
+
37
+ def _repr_html_(self) -> str:
38
+ return self.to_html()
39
+
40
+
41
+ class Docx(mime.MimeType):
42
+ """Docx file type."""
43
+
44
+ MIME_PREFIX = (
45
+ 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
46
+ )
47
+
48
+ def to_xml(self) -> str:
49
+ import docx # pylint: disable=g-import-not-at-top
50
+
51
+ doc = docx.Document(io.BytesIO(self.to_bytes()))
52
+ return str(doc.element.xml)
53
+
54
+ def _repr_html_(self) -> str:
55
+ return self.to_xml()
56
+
57
+
58
+ class Pptx(mime.MimeType):
59
+ """Pptx file type."""
60
+
61
+ MIME_PREFIX = 'application/vnd.openxmlformats-officedocument.presentationml.presentation'
62
+ API_URL = 'https://v2.convertapi.com/convert/pptx/to/pdf'
63
+
64
+ def to_pdf(self, convert_api_key: str | None = None) -> pdf.PDF:
65
+ filename = os.path.basename(self.uri)
66
+ file_bytes = self.to_bytes()
67
+
68
+ api_key = convert_api_key or os.environ.get('CONVERT_API_KEY')
69
+ url = f'{self.API_URL}?Secret={api_key}'
70
+
71
+ json = {
72
+ 'Parameters': [{
73
+ 'Name': 'File',
74
+ 'FileValue': {
75
+ 'Name': filename,
76
+ 'Data': base64.b64encode(file_bytes),
77
+ },
78
+ }]
79
+ }
80
+ response = requests.post(url, json=json).json()
81
+ base64_pdf = response['Files'][0]['FileData']
82
+ pdf_bytes = base64.b64decode(base64_pdf)
83
+ return pdf.PDF.from_bytes(content=pdf_bytes)