pyconverters-openai_vision 0.5.3__py3-none-any.whl → 0.5.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyconverters_openai_vision/__init__.py +1 -1
- pyconverters_openai_vision/openai_vision.py +36 -15
- {pyconverters_openai_vision-0.5.3.dist-info → pyconverters_openai_vision-0.5.7.dist-info}/METADATA +2 -2
- pyconverters_openai_vision-0.5.7.dist-info/RECORD +7 -0
- {pyconverters_openai_vision-0.5.3.dist-info → pyconverters_openai_vision-0.5.7.dist-info}/WHEEL +1 -1
- pyconverters_openai_vision-0.5.3.dist-info/RECORD +0 -7
- {pyconverters_openai_vision-0.5.3.dist-info → pyconverters_openai_vision-0.5.7.dist-info}/entry_points.txt +0 -0
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
"""OpenAIVision converter"""
|
|
2
|
-
__version__ = "0.5.
|
|
2
|
+
__version__ = "0.5.7"
|
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
import base64
|
|
2
2
|
import os
|
|
3
|
+
import re
|
|
3
4
|
from enum import Enum
|
|
4
5
|
from logging import Logger
|
|
6
|
+
from re import Pattern
|
|
5
7
|
from typing import List, cast, Type, Dict, Any
|
|
6
8
|
|
|
7
9
|
import filetype as filetype
|
|
@@ -24,12 +26,13 @@ class OpenAIVisionBaseParameters(ConverterParameters):
|
|
|
24
26
|
None, extra="internal"
|
|
25
27
|
)
|
|
26
28
|
prompt: str = Field(
|
|
27
|
-
"
|
|
29
|
+
"""If the attached file is an image: describe the image with a lot of details.",
|
|
30
|
+
If the attached file is a PDF document: convert the PDF document into Markdown format. The output must be just the markdown result without any explanation or introductory prefix.""",
|
|
28
31
|
description="""Contains the prompt as a string""",
|
|
29
32
|
extra="multiline",
|
|
30
33
|
)
|
|
31
34
|
max_tokens: int = Field(
|
|
32
|
-
|
|
35
|
+
16384,
|
|
33
36
|
description="""The maximum number of tokens to generate in the completion.
|
|
34
37
|
The token count of your prompt plus max_tokens cannot exceed the model's context length.
|
|
35
38
|
Most models have a context length of 2048 tokens (except for the newest models, which support 4096).""",
|
|
@@ -85,13 +88,14 @@ class OpenAIVisionBaseParameters(ConverterParameters):
|
|
|
85
88
|
class OpenAIVisionModel(str, Enum):
|
|
86
89
|
gpt_4o_mini = "gpt-4o-mini"
|
|
87
90
|
gpt_4o = "gpt-4o"
|
|
91
|
+
o3_mini = "o3-mini"
|
|
88
92
|
|
|
89
93
|
|
|
90
94
|
class OpenAIVisionParameters(OpenAIVisionBaseParameters):
|
|
91
95
|
model: OpenAIVisionModel = Field(
|
|
92
96
|
OpenAIVisionModel.gpt_4o_mini,
|
|
93
|
-
description="""The [OpenAI model](https://platform.openai.com/docs/models) used for
|
|
94
|
-
|
|
97
|
+
description="""The [OpenAI model](https://platform.openai.com/docs/models) used for vision. Options currently available:</br>
|
|
98
|
+
|
|
95
99
|
""", extra="pipeline-naming-hint"
|
|
96
100
|
)
|
|
97
101
|
|
|
@@ -126,10 +130,24 @@ class OpenAIVisionConverterBase(ConverterBase):
|
|
|
126
130
|
You input some text as a prompt, and the model will generate a text completion that attempts to match whatever context or pattern you gave it."""
|
|
127
131
|
PREFIX: str = ""
|
|
128
132
|
|
|
129
|
-
def compute_args(self, params: OpenAIVisionBaseParameters, source: UploadFile
|
|
133
|
+
def compute_args(self, params: OpenAIVisionBaseParameters, source: UploadFile, kind
|
|
130
134
|
) -> Dict[str, Any]:
|
|
131
135
|
data = source.file.read()
|
|
132
136
|
rv = base64.b64encode(data)
|
|
137
|
+
if kind.mime.startswith("image"):
|
|
138
|
+
binary_block = {
|
|
139
|
+
"type": "image_url",
|
|
140
|
+
"image_url": {
|
|
141
|
+
"url": f"data:image/jpeg;base64,{rv.decode('utf-8')}"
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
else:
|
|
145
|
+
binary_block = {
|
|
146
|
+
"type": "file",
|
|
147
|
+
"file": {
|
|
148
|
+
"filename": source.filename,
|
|
149
|
+
"file_data": f"data:application/pdf;base64,{rv.decode('utf-8')}"}
|
|
150
|
+
}
|
|
133
151
|
messages = [{"role": "system", "content": params.system_prompt}] if params.system_prompt is not None else []
|
|
134
152
|
messages.append({"role": "user",
|
|
135
153
|
"content": [
|
|
@@ -137,12 +155,8 @@ class OpenAIVisionConverterBase(ConverterBase):
|
|
|
137
155
|
"type": "text",
|
|
138
156
|
"text": params.prompt
|
|
139
157
|
},
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
"image_url": {
|
|
143
|
-
"url": f"data:image/jpeg;base64,{rv.decode('utf-8')}"
|
|
144
|
-
}
|
|
145
|
-
}]})
|
|
158
|
+
binary_block
|
|
159
|
+
]})
|
|
146
160
|
kwargs = {
|
|
147
161
|
'model': params.model_str,
|
|
148
162
|
'messages': messages,
|
|
@@ -156,11 +170,18 @@ class OpenAIVisionConverterBase(ConverterBase):
|
|
|
156
170
|
return kwargs
|
|
157
171
|
|
|
158
172
|
def compute_result(self, **kwargs):
|
|
173
|
+
pattern: Pattern = re.compile(r"```(?:markdown\s+)?(\W.*?)```", re.DOTALL)
|
|
174
|
+
"""Regex pattern to parse the output."""
|
|
159
175
|
response = openai_chat_completion(self.PREFIX, **kwargs)
|
|
160
176
|
contents = []
|
|
161
177
|
for choice in response.choices:
|
|
162
178
|
if choice.message.content:
|
|
163
|
-
|
|
179
|
+
if "```" in choice.message.content:
|
|
180
|
+
action_match = pattern.search(choice.message.content)
|
|
181
|
+
if action_match is not None:
|
|
182
|
+
contents.append(action_match.group(1).strip())
|
|
183
|
+
else:
|
|
184
|
+
contents.append(choice.message.content)
|
|
164
185
|
if contents:
|
|
165
186
|
result = "\n".join(contents)
|
|
166
187
|
return result
|
|
@@ -178,9 +199,9 @@ class OpenAIVisionConverterBase(ConverterBase):
|
|
|
178
199
|
try:
|
|
179
200
|
kind = filetype.guess(source.file)
|
|
180
201
|
source.file.seek(0)
|
|
181
|
-
if kind.mime.startswith("image"):
|
|
202
|
+
if kind.mime.startswith("image") or kind.mime.endswith("pdf"):
|
|
182
203
|
result = None
|
|
183
|
-
kwargs = self.compute_args(params, source)
|
|
204
|
+
kwargs = self.compute_args(params, source, kind)
|
|
184
205
|
if kwargs['model'] != NO_DEPLOYED_MODELS:
|
|
185
206
|
result = self.compute_result(**kwargs)
|
|
186
207
|
if result:
|
|
@@ -189,7 +210,7 @@ class OpenAIVisionConverterBase(ConverterBase):
|
|
|
189
210
|
except BaseException as err:
|
|
190
211
|
raise err
|
|
191
212
|
if doc is None:
|
|
192
|
-
raise TypeError(f"Conversion of
|
|
213
|
+
raise TypeError(f"Conversion of file {source.filename} failed")
|
|
193
214
|
return [doc]
|
|
194
215
|
|
|
195
216
|
@classmethod
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
pyconverters_openai_vision/__init__.py,sha256=hM4qyWHjZFRygolssS-jwicW8aMpHeKDgvSs9HNM6CI,51
|
|
2
|
+
pyconverters_openai_vision/openai_utils.py,sha256=HRJ6sJg88en66gkQbOpQKh7cbwtfoAwVLNh7JQSA9ps,5014
|
|
3
|
+
pyconverters_openai_vision/openai_vision.py,sha256=ACpeOEFNphqKceQqyWHwpP6PvuOOig8qTTHwImq34j8,10445
|
|
4
|
+
pyconverters_openai_vision-0.5.7.dist-info/entry_points.txt,sha256=-DS1gRUTf08Fjb79S_8sqCaqxBifC3q3EJZqXXdcf7Q,197
|
|
5
|
+
pyconverters_openai_vision-0.5.7.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
|
|
6
|
+
pyconverters_openai_vision-0.5.7.dist-info/METADATA,sha256=3b54AAJ1Z8Q5GU7Guzu7z8sybOW0_e5ajHY7UVn8urM,2635
|
|
7
|
+
pyconverters_openai_vision-0.5.7.dist-info/RECORD,,
|
|
@@ -1,7 +0,0 @@
|
|
|
1
|
-
pyconverters_openai_vision/__init__.py,sha256=3OKw8raUFiHJX-jh8L1RvKEFeD7DZW5-8hpnp21EK-A,51
|
|
2
|
-
pyconverters_openai_vision/openai_utils.py,sha256=HRJ6sJg88en66gkQbOpQKh7cbwtfoAwVLNh7JQSA9ps,5014
|
|
3
|
-
pyconverters_openai_vision/openai_vision.py,sha256=MoEyjYCZ75jWiMonaQdR1erpFBpWgYEiqabp9D8102Y,9562
|
|
4
|
-
pyconverters_openai_vision-0.5.3.dist-info/entry_points.txt,sha256=-DS1gRUTf08Fjb79S_8sqCaqxBifC3q3EJZqXXdcf7Q,197
|
|
5
|
-
pyconverters_openai_vision-0.5.3.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
|
|
6
|
-
pyconverters_openai_vision-0.5.3.dist-info/METADATA,sha256=PJWwNjeSChCjjJQhgWxR1RIn9jA7-HvcPvKHWT7sJjk,2635
|
|
7
|
-
pyconverters_openai_vision-0.5.3.dist-info/RECORD,,
|
|
File without changes
|