pyconverters-openai_vision 0.5.18__py3-none-any.whl → 0.5.22__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyconverters_openai_vision/__init__.py +1 -1
- pyconverters_openai_vision/openai_utils.py +86 -24
- pyconverters_openai_vision/openai_vision.py +259 -44
- {pyconverters_openai_vision-0.5.18.dist-info → pyconverters_openai_vision-0.5.22.dist-info}/METADATA +2 -1
- pyconverters_openai_vision-0.5.22.dist-info/RECORD +7 -0
- {pyconverters_openai_vision-0.5.18.dist-info → pyconverters_openai_vision-0.5.22.dist-info}/entry_points.txt +4 -0
- pyconverters_openai_vision-0.5.18.dist-info/RECORD +0 -7
- {pyconverters_openai_vision-0.5.18.dist-info → pyconverters_openai_vision-0.5.22.dist-info}/WHEEL +0 -0
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
"""OpenAIVision converter"""
|
|
2
|
-
__version__ = "0.5.
|
|
2
|
+
__version__ = "0.5.22"
|
|
@@ -6,31 +6,63 @@ from openai import OpenAI
|
|
|
6
6
|
from openai.lib.azure import AzureOpenAI
|
|
7
7
|
from pymultirole_plugins.util import comma_separated_to_list
|
|
8
8
|
from strenum import StrEnum
|
|
9
|
+
import time
|
|
10
|
+
from openai._base_client import SyncHttpxClientWrapper
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class OAuthToken:
|
|
14
|
+
access_token: str = None
|
|
15
|
+
token_expiry: str = None
|
|
16
|
+
|
|
9
17
|
|
|
10
18
|
logger = Logger("pymultirole")
|
|
11
19
|
DEFAULT_CHAT_GPT_MODEL = "gpt-4o-mini"
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
20
|
+
OPENAI_MAX_RETRIES = int(os.getenv("OPENAI_MAX_RETRIES", 2))
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def check_litellm_defined():
|
|
24
|
+
LITELLM_OPENAI_API_KEY = os.getenv("LITELLM_OPENAI_API_KEY", None)
|
|
25
|
+
if LITELLM_OPENAI_API_KEY:
|
|
26
|
+
os.environ["OPENAI_API_KEY"] = LITELLM_OPENAI_API_KEY
|
|
27
|
+
LITELLM_OPENAI_API_BASE = os.getenv("LITELLM_OPENAI_API_BASE", None)
|
|
28
|
+
if LITELLM_OPENAI_API_BASE:
|
|
29
|
+
os.environ["OPENAI_API_BASE"] = LITELLM_OPENAI_API_BASE
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def get_api_key(prefix, oauth_token):
|
|
33
|
+
if not prefix.startswith("APOLLO"):
|
|
34
|
+
api_key = os.getenv(prefix + "OPENAI_API_KEY")
|
|
35
|
+
elif oauth_token.access_token is None or time.time() + 100 > oauth_token.token_expiry:
|
|
36
|
+
client_id = os.getenv("APOLLO_CLIENT_ID")
|
|
37
|
+
client_secret = os.getenv("APOLLO_CLIENT_SECRET")
|
|
38
|
+
token_url = os.getenv("APOLLO_OAUTH")
|
|
39
|
+
if not client_id or not client_secret or not token_url:
|
|
40
|
+
raise ValueError("Environment variables for OAuth are not set properly.")
|
|
41
|
+
token_data = {
|
|
42
|
+
"grant_type": "client_credentials",
|
|
43
|
+
"client_id": client_id,
|
|
44
|
+
"client_secret": client_secret,
|
|
45
|
+
}
|
|
46
|
+
verify = not prefix.startswith("APOLLO")
|
|
47
|
+
response = requests.post(token_url, data=token_data, verify=verify)
|
|
48
|
+
response.raise_for_status()
|
|
49
|
+
json_response = response.json()
|
|
50
|
+
oauth_token.access_token = json_response['access_token']
|
|
51
|
+
oauth_token.token_expiry = time.time() + json_response.get('expires_in', 3600)
|
|
52
|
+
api_key = oauth_token.access_token
|
|
53
|
+
else:
|
|
54
|
+
api_key = oauth_token.access_token
|
|
55
|
+
return api_key
|
|
17
56
|
|
|
18
57
|
|
|
19
58
|
# Now use default retry with backoff of openai api
|
|
20
|
-
def openai_chat_completion(prefix, **kwargs):
|
|
21
|
-
client = set_openai(prefix)
|
|
59
|
+
def openai_chat_completion(prefix, oauth_token, base_url, **kwargs):
|
|
60
|
+
client = set_openai(prefix, oauth_token, base_url)
|
|
22
61
|
response = client.chat.completions.create(**kwargs)
|
|
23
62
|
return response
|
|
24
63
|
|
|
25
64
|
|
|
26
|
-
def
|
|
27
|
-
for m in DEEPINFRA_VISION_MODELS:
|
|
28
|
-
if m in model.lower():
|
|
29
|
-
return True
|
|
30
|
-
return False
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
def openai_list_models(prefix, **kwargs):
|
|
65
|
+
def openai_list_models(prefix, oauth_token, base_url, **kwargs):
|
|
34
66
|
def sort_by_created(x):
|
|
35
67
|
if 'created' in x:
|
|
36
68
|
return x['created']
|
|
@@ -42,7 +74,7 @@ def openai_list_models(prefix, **kwargs):
|
|
|
42
74
|
return x.id
|
|
43
75
|
|
|
44
76
|
models = []
|
|
45
|
-
client = set_openai(prefix)
|
|
77
|
+
client = set_openai(prefix, oauth_token, base_url, max_retries=10)
|
|
46
78
|
if prefix.startswith("DEEPINFRA"):
|
|
47
79
|
deepinfra_url = client.base_url
|
|
48
80
|
deepinfra_models = {}
|
|
@@ -75,9 +107,19 @@ def openai_list_models(prefix, **kwargs):
|
|
|
75
107
|
mods = list(
|
|
76
108
|
{m['model_name'] for m in mods if m['task'] == 'text-generation' and m['status'] == 'running'})
|
|
77
109
|
deepinfra_models.update({m: m for m in mods})
|
|
78
|
-
models =
|
|
110
|
+
models = list(deepinfra_models.keys())
|
|
79
111
|
elif prefix.startswith("AZURE"):
|
|
80
112
|
models = comma_separated_to_list(os.getenv(prefix + "OPENAI_DEPLOYMENT_ID", None))
|
|
113
|
+
elif prefix.startswith("APOLLO"):
|
|
114
|
+
apollo_url = client.base_url
|
|
115
|
+
public_models_list_url = f"{apollo_url}models"
|
|
116
|
+
response = requests.get(public_models_list_url, verify=False,
|
|
117
|
+
headers={'Accept': "application/json", 'Authorization': f"Bearer {client.api_key}"})
|
|
118
|
+
if response.ok:
|
|
119
|
+
resp = response.json()
|
|
120
|
+
mods = sorted(resp["data"], key=sort_by_created, reverse=True)
|
|
121
|
+
models = list(
|
|
122
|
+
{m['id'] for m in mods})
|
|
81
123
|
else:
|
|
82
124
|
response = client.models.list(**kwargs)
|
|
83
125
|
models = sorted(response.data, key=sort_by_created, reverse=True)
|
|
@@ -85,20 +127,31 @@ def openai_list_models(prefix, **kwargs):
|
|
|
85
127
|
return models
|
|
86
128
|
|
|
87
129
|
|
|
88
|
-
def set_openai(prefix):
|
|
130
|
+
def set_openai(prefix, oauth_token, base_url, max_retries=OPENAI_MAX_RETRIES):
|
|
131
|
+
api_key = get_api_key(prefix, oauth_token)
|
|
89
132
|
if prefix.startswith("AZURE"):
|
|
90
133
|
client = AzureOpenAI(
|
|
91
134
|
# This is the default and can be omitted
|
|
92
|
-
api_key=
|
|
93
|
-
azure_endpoint=
|
|
135
|
+
api_key=api_key,
|
|
136
|
+
azure_endpoint=base_url,
|
|
94
137
|
api_version=os.getenv(prefix + "OPENAI_API_VERSION", None),
|
|
95
138
|
# azure_deployment=os.getenv(prefix + "OPENAI_DEPLOYMENT_ID", None)
|
|
96
139
|
)
|
|
97
140
|
else:
|
|
141
|
+
# hack to support verify=None for Apollo
|
|
142
|
+
if prefix.startswith("APOLLO"):
|
|
143
|
+
http_client = SyncHttpxClientWrapper(
|
|
144
|
+
base_url="https://api.openai.com/v1" if base_url is None else base_url,
|
|
145
|
+
verify=False,
|
|
146
|
+
)
|
|
147
|
+
else:
|
|
148
|
+
http_client = None
|
|
98
149
|
client = OpenAI(
|
|
99
150
|
# This is the default and can be omitted
|
|
100
|
-
api_key=
|
|
101
|
-
base_url=
|
|
151
|
+
api_key=api_key,
|
|
152
|
+
base_url=base_url,
|
|
153
|
+
http_client=http_client,
|
|
154
|
+
max_retries=max_retries
|
|
102
155
|
)
|
|
103
156
|
return client
|
|
104
157
|
|
|
@@ -107,14 +160,23 @@ def gpt_filter(m: str):
|
|
|
107
160
|
return m.startswith('gpt') and not m.startswith('gpt-3.5-turbo-instruct') and 'vision' not in m
|
|
108
161
|
|
|
109
162
|
|
|
163
|
+
def all_filter(m: str):
|
|
164
|
+
return True
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def apollo_filter(m: str):
|
|
168
|
+
return 'embed' not in m and 'vision' not in m and 'mock' not in m and 'tts' not in m and 'mock' not in m
|
|
169
|
+
|
|
170
|
+
|
|
110
171
|
NO_DEPLOYED_MODELS = 'no deployed models - check API key'
|
|
111
172
|
|
|
112
173
|
|
|
113
|
-
|
|
174
|
+
# @lru_cache(maxsize=None)
|
|
175
|
+
def create_openai_model_enum(name, prefix="", base_url=None, key=all_filter):
|
|
114
176
|
chat_gpt_models = []
|
|
115
177
|
default_chat_gpt_model = None
|
|
116
178
|
try:
|
|
117
|
-
chat_gpt_models = [m for m in openai_list_models(prefix) if key(m)]
|
|
179
|
+
chat_gpt_models = [m for m in openai_list_models(prefix, OAuthToken(), base_url) if key(m)]
|
|
118
180
|
if chat_gpt_models:
|
|
119
181
|
default_chat_gpt_model = DEFAULT_CHAT_GPT_MODEL if DEFAULT_CHAT_GPT_MODEL in chat_gpt_models else \
|
|
120
182
|
chat_gpt_models[0]
|
|
@@ -4,30 +4,36 @@ import re
|
|
|
4
4
|
from enum import Enum
|
|
5
5
|
from logging import Logger
|
|
6
6
|
from re import Pattern
|
|
7
|
-
from typing import List, cast, Type, Dict, Any
|
|
7
|
+
from typing import List, cast, Type, Dict, Any, Optional
|
|
8
8
|
|
|
9
9
|
import filetype as filetype
|
|
10
|
+
from log_with_context import add_logging_context
|
|
10
11
|
from pydantic import Field, BaseModel
|
|
11
12
|
from pymultirole_plugins.v1.converter import ConverterParameters, ConverterBase
|
|
12
|
-
from pymultirole_plugins.v1.
|
|
13
|
+
from pymultirole_plugins.v1.processor import ProcessorParameters, ProcessorBase
|
|
14
|
+
from pymultirole_plugins.v1.schema import Document, AltText
|
|
13
15
|
from starlette.datastructures import UploadFile
|
|
14
16
|
|
|
15
|
-
from .openai_utils import
|
|
16
|
-
|
|
17
|
+
from .openai_utils import create_openai_model_enum, openai_chat_completion, gpt_filter, \
|
|
18
|
+
NO_DEPLOYED_MODELS, OAuthToken, all_filter, check_litellm_defined
|
|
17
19
|
|
|
18
20
|
logger = Logger("pymultirole")
|
|
21
|
+
SHOW_INTERNAL = bool(os.getenv("SHOW_INTERNAL", "false"))
|
|
19
22
|
|
|
20
23
|
|
|
21
24
|
class OpenAIVisionBaseParameters(ConverterParameters):
|
|
25
|
+
base_url: str = Field(
|
|
26
|
+
None,
|
|
27
|
+
description="""OpenAI endpoint base url""", extra="advanced"
|
|
28
|
+
)
|
|
22
29
|
model_str: str = Field(
|
|
23
|
-
None, extra="
|
|
30
|
+
None, extra="advanced"
|
|
24
31
|
)
|
|
25
32
|
model: str = Field(
|
|
26
33
|
None, extra="internal"
|
|
27
34
|
)
|
|
28
35
|
prompt: str = Field(
|
|
29
|
-
"""If the attached file is an image: describe the image
|
|
30
|
-
If the attached file is a PDF document: convert the PDF document into Markdown format. The output must be just the markdown result without any explanation or introductory prefix.""",
|
|
36
|
+
"""If the attached file is an image: describe the image.""",
|
|
31
37
|
description="""Contains the prompt as a string""",
|
|
32
38
|
extra="multiline",
|
|
33
39
|
)
|
|
@@ -43,7 +49,7 @@ class OpenAIVisionBaseParameters(ConverterParameters):
|
|
|
43
49
|
extra="multiline,advanced",
|
|
44
50
|
)
|
|
45
51
|
temperature: float = Field(
|
|
46
|
-
1
|
|
52
|
+
0.1,
|
|
47
53
|
description="""What sampling temperature to use, between 0 and 2.
|
|
48
54
|
Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
|
|
49
55
|
We generally recommend altering this or `top_p` but not both.""",
|
|
@@ -88,47 +94,58 @@ class OpenAIVisionBaseParameters(ConverterParameters):
|
|
|
88
94
|
class OpenAIVisionModel(str, Enum):
|
|
89
95
|
gpt_4o_mini = "gpt-4o-mini"
|
|
90
96
|
gpt_4o = "gpt-4o"
|
|
91
|
-
|
|
97
|
+
gpt_4_1 = "gpt-4.1"
|
|
98
|
+
gpt_4_1_mini = "gpt-4.1-mini"
|
|
99
|
+
gpt_4_1_nano = "gpt-4.1-nano"
|
|
100
|
+
gpt_5 = "gpt-5"
|
|
101
|
+
gpt_5_mini = "gpt-5-mini"
|
|
102
|
+
gpt_5_nano = "gpt-5-nano"
|
|
92
103
|
|
|
93
104
|
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
105
|
+
check_litellm_defined()
|
|
106
|
+
OPENAI_PREFIX = ""
|
|
107
|
+
OPENAI_API_BASE = os.getenv(OPENAI_PREFIX + "OPENAI_API_BASE", None)
|
|
108
|
+
CHAT_GPT_MODEL_ENUM, DEFAULT_CHAT_GPT_MODEL = create_openai_model_enum('OpenAIModel2', prefix=OPENAI_PREFIX,
|
|
109
|
+
base_url=OPENAI_API_BASE,
|
|
110
|
+
key=gpt_filter if OPENAI_API_BASE is None else all_filter)
|
|
98
111
|
|
|
99
|
-
|
|
112
|
+
|
|
113
|
+
class OpenAIVisionParameters(OpenAIVisionBaseParameters):
|
|
114
|
+
base_url: Optional[str] = Field(
|
|
115
|
+
os.getenv(OPENAI_PREFIX + "OPENAI_API_BASE", None),
|
|
116
|
+
description="""OpenAI endpoint base url""", extra="advanced"
|
|
117
|
+
)
|
|
118
|
+
model: CHAT_GPT_MODEL_ENUM = Field(
|
|
119
|
+
DEFAULT_CHAT_GPT_MODEL,
|
|
120
|
+
description="""The [OpenAI model](https://platform.openai.com/docs/models) used for completion.""",
|
|
121
|
+
extra="pipeline-naming-hint"
|
|
100
122
|
)
|
|
101
123
|
|
|
102
124
|
|
|
103
125
|
DEEPINFRA_PREFIX = "DEEPINFRA_"
|
|
104
|
-
|
|
105
|
-
|
|
126
|
+
DEEPINFRA_OPENAI_API_BASE = os.getenv(DEEPINFRA_PREFIX + "OPENAI_API_BASE", None)
|
|
127
|
+
DEEPINFRA_CHAT_GPT_MODEL_ENUM, DEEPINFRA_DEFAULT_CHAT_GPT_MODEL = create_openai_model_enum('DeepInfraOpenAIModel',
|
|
128
|
+
prefix=DEEPINFRA_PREFIX,
|
|
129
|
+
base_url=DEEPINFRA_OPENAI_API_BASE)
|
|
106
130
|
|
|
107
131
|
|
|
108
132
|
class DeepInfraOpenAIVisionParameters(OpenAIVisionBaseParameters):
|
|
109
|
-
|
|
133
|
+
base_url: str = Field(
|
|
134
|
+
os.getenv(DEEPINFRA_PREFIX + "OPENAI_API_BASE", None),
|
|
135
|
+
description="""OpenAI endpoint base url""", extra="advanced"
|
|
136
|
+
)
|
|
137
|
+
model: DEEPINFRA_CHAT_GPT_MODEL_ENUM = Field(
|
|
110
138
|
None,
|
|
111
|
-
description="""The [DeepInfra 'OpenAI compatible' model](https://deepinfra.com/models?type=
|
|
112
|
-
|
|
139
|
+
description="""The [DeepInfra 'OpenAI compatible' model](https://deepinfra.com/models?type=text-generation) used for completion. It must be deployed on your [DeepInfra dashboard](https://deepinfra.com/dash).""",
|
|
140
|
+
extra="pipeline-naming-hint"
|
|
113
141
|
)
|
|
114
142
|
|
|
115
143
|
|
|
116
|
-
# AZURE_PREFIX = "AZURE_"
|
|
117
|
-
#
|
|
118
|
-
#
|
|
119
|
-
# class AzureOpenAIVisionParameters(OpenAIVisionBaseParameters):
|
|
120
|
-
# model: OpenAIVisionModel = Field(
|
|
121
|
-
# OpenAIVisionModel.whisper_1,
|
|
122
|
-
# description="""The [Azure OpenAI model](https://platform.openai.com/docs/models) used for speech to text transcription. Options currently available:</br>
|
|
123
|
-
# <li>`whisper-1` - state-of-the-art open source large-v2 Whisper model.
|
|
124
|
-
# """, extra="pipeline-naming-hint"
|
|
125
|
-
# )
|
|
126
|
-
|
|
127
|
-
|
|
128
144
|
class OpenAIVisionConverterBase(ConverterBase):
|
|
129
145
|
__doc__ = """Generate text using [OpenAI Text Completion](https://platform.openai.com/docs/guides/completion) API
|
|
130
146
|
You input some text as a prompt, and the model will generate a text completion that attempts to match whatever context or pattern you gave it."""
|
|
131
147
|
PREFIX: str = ""
|
|
148
|
+
oauth_token: OAuthToken = OAuthToken()
|
|
132
149
|
|
|
133
150
|
def compute_args(self, params: OpenAIVisionBaseParameters, source: UploadFile, kind
|
|
134
151
|
) -> Dict[str, Any]:
|
|
@@ -141,13 +158,6 @@ class OpenAIVisionConverterBase(ConverterBase):
|
|
|
141
158
|
"url": f"data:image/jpeg;base64,{rv.decode('utf-8')}"
|
|
142
159
|
}
|
|
143
160
|
}
|
|
144
|
-
else:
|
|
145
|
-
binary_block = {
|
|
146
|
-
"type": "file",
|
|
147
|
-
"file": {
|
|
148
|
-
"filename": source.filename,
|
|
149
|
-
"file_data": f"data:application/pdf;base64,{rv.decode('utf-8')}"}
|
|
150
|
-
}
|
|
151
161
|
messages = [{"role": "system", "content": params.system_prompt}] if params.system_prompt is not None else []
|
|
152
162
|
messages.append({"role": "user",
|
|
153
163
|
"content": [
|
|
@@ -169,11 +179,12 @@ class OpenAIVisionConverterBase(ConverterBase):
|
|
|
169
179
|
}
|
|
170
180
|
return kwargs
|
|
171
181
|
|
|
172
|
-
def compute_result(self, **kwargs):
|
|
182
|
+
def compute_result(self, base_url, **kwargs):
|
|
173
183
|
pattern: Pattern = re.compile(r"```(?:markdown\s+)?(\W.*?)```", re.DOTALL)
|
|
174
184
|
"""Regex pattern to parse the output."""
|
|
175
|
-
response = openai_chat_completion(self.PREFIX, **kwargs)
|
|
185
|
+
response = openai_chat_completion(self.PREFIX, self.oauth_token, base_url, **kwargs)
|
|
176
186
|
contents = []
|
|
187
|
+
result = None
|
|
177
188
|
for choice in response.choices:
|
|
178
189
|
if choice.message.content:
|
|
179
190
|
if "```" in choice.message.content:
|
|
@@ -199,11 +210,11 @@ class OpenAIVisionConverterBase(ConverterBase):
|
|
|
199
210
|
try:
|
|
200
211
|
kind = filetype.guess(source.file)
|
|
201
212
|
source.file.seek(0)
|
|
202
|
-
if kind.mime.startswith("image")
|
|
213
|
+
if kind.mime.startswith("image"):
|
|
203
214
|
result = None
|
|
204
215
|
kwargs = self.compute_args(params, source, kind)
|
|
205
216
|
if kwargs['model'] != NO_DEPLOYED_MODELS:
|
|
206
|
-
result = self.compute_result(**kwargs)
|
|
217
|
+
result = self.compute_result(params.base_url, **kwargs)
|
|
207
218
|
if result:
|
|
208
219
|
doc = Document(identifier=source.filename, text=result)
|
|
209
220
|
doc.properties = {"fileName": source.filename}
|
|
@@ -226,7 +237,9 @@ class OpenAIVisionConverter(OpenAIVisionConverterBase):
|
|
|
226
237
|
params: OpenAIVisionParameters = cast(
|
|
227
238
|
OpenAIVisionParameters, parameters
|
|
228
239
|
)
|
|
229
|
-
params.model_str
|
|
240
|
+
model_str = params.model_str if bool(params.model_str and params.model_str.strip()) else None
|
|
241
|
+
model = params.model.value if params.model is not None else None
|
|
242
|
+
params.model_str = model_str or model
|
|
230
243
|
return super().convert(source, params)
|
|
231
244
|
|
|
232
245
|
@classmethod
|
|
@@ -243,9 +256,211 @@ class DeepInfraOpenAIVisionConverter(OpenAIVisionConverterBase):
|
|
|
243
256
|
params: DeepInfraOpenAIVisionParameters = cast(
|
|
244
257
|
DeepInfraOpenAIVisionParameters, parameters
|
|
245
258
|
)
|
|
246
|
-
params.model_str
|
|
259
|
+
model_str = params.model_str if bool(params.model_str and params.model_str.strip()) else None
|
|
260
|
+
model = params.model.value if params.model is not None else None
|
|
261
|
+
params.model_str = model_str or model
|
|
247
262
|
return super().convert(source, params)
|
|
248
263
|
|
|
249
264
|
@classmethod
|
|
250
265
|
def get_model(cls) -> Type[BaseModel]:
|
|
251
266
|
return DeepInfraOpenAIVisionParameters
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
def guess_kind(base64_src):
|
|
270
|
+
kind = None
|
|
271
|
+
img_regex = r"data:(image/[^;]+);base64"
|
|
272
|
+
matches = re.search(img_regex, base64_src)
|
|
273
|
+
if matches:
|
|
274
|
+
mime = matches.group(1)
|
|
275
|
+
kind = filetype.get_type(mime)
|
|
276
|
+
return kind
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
class OpenAIVisionProcessorBaseParameters(OpenAIVisionBaseParameters):
|
|
280
|
+
replace_refs_altTexts_by_descriptions: bool = Field(
|
|
281
|
+
False, extra="advanced"
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
class OpenAIVisionProcessorBase(ProcessorBase):
|
|
286
|
+
__doc__ = """Generate text using [OpenAI Text Completion](https://platform.openai.com/docs/guides/completion) API
|
|
287
|
+
You input some text as a prompt, and the model will generate a text completion that attempts to match whatever context or pattern you gave it."""
|
|
288
|
+
PREFIX: str = ""
|
|
289
|
+
oauth_token: OAuthToken = OAuthToken()
|
|
290
|
+
|
|
291
|
+
def compute_args(self, params: OpenAIVisionBaseParameters, source: str, kind
|
|
292
|
+
) -> Dict[str, Any]:
|
|
293
|
+
if kind.mime.startswith("image"):
|
|
294
|
+
binary_block = {
|
|
295
|
+
"type": "image_url",
|
|
296
|
+
"image_url": {
|
|
297
|
+
"url": source
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
messages = [{"role": "system", "content": params.system_prompt}] if params.system_prompt is not None else []
|
|
301
|
+
messages.append({"role": "user",
|
|
302
|
+
"content": [
|
|
303
|
+
{
|
|
304
|
+
"type": "text",
|
|
305
|
+
"text": params.prompt
|
|
306
|
+
},
|
|
307
|
+
binary_block
|
|
308
|
+
]})
|
|
309
|
+
kwargs = {
|
|
310
|
+
'model': params.model_str,
|
|
311
|
+
'messages': messages,
|
|
312
|
+
'max_tokens': params.max_tokens,
|
|
313
|
+
'temperature': params.temperature,
|
|
314
|
+
'top_p': params.top_p,
|
|
315
|
+
'n': params.n,
|
|
316
|
+
'frequency_penalty': params.frequency_penalty,
|
|
317
|
+
'presence_penalty': params.presence_penalty,
|
|
318
|
+
}
|
|
319
|
+
return kwargs
|
|
320
|
+
|
|
321
|
+
def compute_result(self, base_url, **kwargs):
|
|
322
|
+
pattern: Pattern = re.compile(r"```(?:markdown\s+)?(\W.*?)```", re.DOTALL)
|
|
323
|
+
"""Regex pattern to parse the output."""
|
|
324
|
+
response = openai_chat_completion(self.PREFIX, self.oauth_token, base_url, **kwargs)
|
|
325
|
+
contents = []
|
|
326
|
+
result = None
|
|
327
|
+
for choice in response.choices:
|
|
328
|
+
if choice.message.content:
|
|
329
|
+
if "```" in choice.message.content:
|
|
330
|
+
action_match = pattern.search(choice.message.content)
|
|
331
|
+
if action_match is not None:
|
|
332
|
+
contents.append(action_match.group(1).strip())
|
|
333
|
+
else:
|
|
334
|
+
contents.append(choice.message.content)
|
|
335
|
+
if contents:
|
|
336
|
+
result = "\n".join(contents)
|
|
337
|
+
return result
|
|
338
|
+
|
|
339
|
+
def process(
|
|
340
|
+
self, documents: List[Document], parameters: ProcessorParameters
|
|
341
|
+
) -> List[Document]:
|
|
342
|
+
# supported_languages = comma_separated_to_list(SUPPORTED_LANGUAGES)
|
|
343
|
+
|
|
344
|
+
params: OpenAIVisionProcessorBaseParameters = cast(
|
|
345
|
+
OpenAIVisionProcessorBaseParameters, parameters
|
|
346
|
+
)
|
|
347
|
+
OPENAI_MODEL = os.getenv(self.PREFIX + "OPENAI_MODEL", None)
|
|
348
|
+
if OPENAI_MODEL:
|
|
349
|
+
params.model_str = OPENAI_MODEL
|
|
350
|
+
try:
|
|
351
|
+
for document in documents:
|
|
352
|
+
with add_logging_context(docid=document.identifier):
|
|
353
|
+
if document.altTexts:
|
|
354
|
+
altTexts = document.altTexts
|
|
355
|
+
alts = {altText.name: altText.text for altText in document.altTexts}
|
|
356
|
+
anames = list(alts.keys())
|
|
357
|
+
for aname in anames:
|
|
358
|
+
atext = alts[aname]
|
|
359
|
+
result = None
|
|
360
|
+
kind = guess_kind(atext)
|
|
361
|
+
if kind is not None and kind.mime.startswith("image"):
|
|
362
|
+
kwargs = self.compute_args(params, atext, kind)
|
|
363
|
+
if kwargs['model'] != NO_DEPLOYED_MODELS:
|
|
364
|
+
result = self.compute_result(params.base_url, **kwargs)
|
|
365
|
+
if result is not None and isinstance(result, str):
|
|
366
|
+
alts[aname] = result
|
|
367
|
+
else:
|
|
368
|
+
del alts[aname]
|
|
369
|
+
if alts:
|
|
370
|
+
document.altTexts = []
|
|
371
|
+
|
|
372
|
+
if params.replace_refs_altTexts_by_descriptions:
|
|
373
|
+
text = document.text
|
|
374
|
+
link_regex = r"!\[([^]]+)\]\(([^]]+)\)"
|
|
375
|
+
|
|
376
|
+
def convert_links(matchobj):
|
|
377
|
+
m = matchobj.group(0)
|
|
378
|
+
m_id = matchobj.group(1)
|
|
379
|
+
if m_id in alts:
|
|
380
|
+
m_desc = alts[m_id]
|
|
381
|
+
return f"{m}\n___\n{m_desc}\n___\n"
|
|
382
|
+
return m
|
|
383
|
+
|
|
384
|
+
ptext = re.sub(link_regex, convert_links, text, 0,
|
|
385
|
+
re.MULTILINE)
|
|
386
|
+
document.text = ptext
|
|
387
|
+
for altText in altTexts:
|
|
388
|
+
if altText.name not in alts:
|
|
389
|
+
document.altTexts.append(altText)
|
|
390
|
+
else:
|
|
391
|
+
for altText in altTexts:
|
|
392
|
+
if altText.name in alts:
|
|
393
|
+
document.altTexts.append(AltText(name=altText.name, text=alts[altText.name]))
|
|
394
|
+
else:
|
|
395
|
+
document.altTexts.append(altText)
|
|
396
|
+
|
|
397
|
+
except BaseException as err:
|
|
398
|
+
raise err
|
|
399
|
+
return documents
|
|
400
|
+
|
|
401
|
+
@classmethod
|
|
402
|
+
def get_model(cls) -> Type[BaseModel]:
|
|
403
|
+
return OpenAIVisionProcessorBaseParameters
|
|
404
|
+
|
|
405
|
+
|
|
406
|
+
class OpenAIVisionProcessorParameters(OpenAIVisionProcessorBaseParameters):
|
|
407
|
+
base_url: Optional[str] = Field(
|
|
408
|
+
os.getenv(OPENAI_PREFIX + "OPENAI_API_BASE", None),
|
|
409
|
+
description="""OpenAI endpoint base url""", extra="advanced"
|
|
410
|
+
)
|
|
411
|
+
model: CHAT_GPT_MODEL_ENUM = Field(
|
|
412
|
+
DEFAULT_CHAT_GPT_MODEL,
|
|
413
|
+
description="""The [OpenAI model](https://platform.openai.com/docs/models) used for completion.""",
|
|
414
|
+
extra="pipeline-naming-hint"
|
|
415
|
+
)
|
|
416
|
+
|
|
417
|
+
|
|
418
|
+
class OpenAIVisionProcessor(OpenAIVisionProcessorBase):
|
|
419
|
+
__doc__ = """Convert audio using [OpenAI Audio](https://platform.openai.com/docs/guides/speech-to-text) API"""
|
|
420
|
+
|
|
421
|
+
def process(
|
|
422
|
+
self, documents: List[Document], parameters: ProcessorParameters
|
|
423
|
+
) -> List[Document]:
|
|
424
|
+
params: OpenAIVisionParameters = cast(
|
|
425
|
+
OpenAIVisionParameters, parameters
|
|
426
|
+
)
|
|
427
|
+
model_str = params.model_str if bool(params.model_str and params.model_str.strip()) else None
|
|
428
|
+
model = params.model.value if params.model is not None else None
|
|
429
|
+
params.model_str = model_str or model
|
|
430
|
+
return super().process(documents, params)
|
|
431
|
+
|
|
432
|
+
@classmethod
|
|
433
|
+
def get_model(cls) -> Type[BaseModel]:
|
|
434
|
+
return OpenAIVisionProcessorParameters
|
|
435
|
+
|
|
436
|
+
|
|
437
|
+
class DeepInfraOpenAIVisionProcessorParameters(OpenAIVisionProcessorBaseParameters):
|
|
438
|
+
base_url: str = Field(
|
|
439
|
+
os.getenv(DEEPINFRA_PREFIX + "OPENAI_API_BASE", None),
|
|
440
|
+
description="""OpenAI endpoint base url""", extra="advanced"
|
|
441
|
+
)
|
|
442
|
+
model: DEEPINFRA_CHAT_GPT_MODEL_ENUM = Field(
|
|
443
|
+
None,
|
|
444
|
+
description="""The [DeepInfra 'OpenAI compatible' model](https://deepinfra.com/models?type=text-generation) used for completion. It must be deployed on your [DeepInfra dashboard](https://deepinfra.com/dash).""",
|
|
445
|
+
extra="pipeline-naming-hint"
|
|
446
|
+
)
|
|
447
|
+
|
|
448
|
+
|
|
449
|
+
class DeepInfraOpenAIVisionProcessor(OpenAIVisionProcessorBase):
|
|
450
|
+
__doc__ = """Convert images using [DeepInfra Vision](https://deepinfra.com/docs/tutorials/whisper) API"""
|
|
451
|
+
PREFIX = DEEPINFRA_PREFIX
|
|
452
|
+
|
|
453
|
+
def process(
|
|
454
|
+
self, documents: List[Document], parameters: ProcessorParameters
|
|
455
|
+
) -> List[Document]:
|
|
456
|
+
params: DeepInfraOpenAIVisionParameters = cast(
|
|
457
|
+
DeepInfraOpenAIVisionParameters, parameters
|
|
458
|
+
)
|
|
459
|
+
model_str = params.model_str if bool(params.model_str and params.model_str.strip()) else None
|
|
460
|
+
model = params.model.value if params.model is not None else None
|
|
461
|
+
params.model_str = model_str or model
|
|
462
|
+
return super().process(documents, params)
|
|
463
|
+
|
|
464
|
+
@classmethod
|
|
465
|
+
def get_model(cls) -> Type[BaseModel]:
|
|
466
|
+
return DeepInfraOpenAIVisionProcessorParameters
|
{pyconverters_openai_vision-0.5.18.dist-info → pyconverters_openai_vision-0.5.22.dist-info}/METADATA
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pyconverters-openai_vision
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.22
|
|
4
4
|
Summary: OpenAIVision converter
|
|
5
5
|
Home-page: https://kairntech.com/
|
|
6
6
|
Author: Olivier Terrier
|
|
@@ -27,6 +27,7 @@ Classifier: Programming Language :: Python :: 3.8
|
|
|
27
27
|
Classifier: Topic :: Internet :: WWW/HTTP :: HTTP Servers
|
|
28
28
|
Classifier: Topic :: Internet :: WWW/HTTP
|
|
29
29
|
Requires-Dist: pymultirole-plugins>=0.5.0,<0.6.0
|
|
30
|
+
Requires-Dist: httpx<0.28
|
|
30
31
|
Requires-Dist: openai==1.9.0
|
|
31
32
|
Requires-Dist: Jinja2
|
|
32
33
|
Requires-Dist: tenacity
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
pyconverters_openai_vision/__init__.py,sha256=DVZWqJHpQcrIA6rtKEN3C-qJc4k9cH846SAacAXLaYY,52
|
|
2
|
+
pyconverters_openai_vision/openai_utils.py,sha256=XI4WYZ-EAVG0Vxd5yUDuZNDgEzqHJeriScxTUusi1oo,7740
|
|
3
|
+
pyconverters_openai_vision/openai_vision.py,sha256=PW_JnekYyE4_XVba6tRs0jwFF1wXbf5zfy1yF2p-BzQ,20014
|
|
4
|
+
pyconverters_openai_vision-0.5.22.dist-info/entry_points.txt,sha256=KLlvDTMJjHy0fk6mvTXFNpn0pC8UKsTJLd9wre9SOHw,394
|
|
5
|
+
pyconverters_openai_vision-0.5.22.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
|
|
6
|
+
pyconverters_openai_vision-0.5.22.dist-info/METADATA,sha256=C5srCJu7yQnlmRRORntBdmAcOhHdjRGAPWBrUiCKlY0,2662
|
|
7
|
+
pyconverters_openai_vision-0.5.22.dist-info/RECORD,,
|
|
@@ -2,3 +2,7 @@
|
|
|
2
2
|
deepinfra_openai_vision=pyconverters_openai_vision.openai_vision:DeepInfraOpenAIVisionConverter
|
|
3
3
|
openai_vision=pyconverters_openai_vision.openai_vision:OpenAIVisionConverter
|
|
4
4
|
|
|
5
|
+
[pyprocessors.plugins]
|
|
6
|
+
deepinfra_openai_vision=pyconverters_openai_vision.openai_vision:DeepInfraOpenAIVisionProcessor
|
|
7
|
+
openai_vision=pyconverters_openai_vision.openai_vision:OpenAIVisionProcessor
|
|
8
|
+
|
|
@@ -1,7 +0,0 @@
|
|
|
1
|
-
pyconverters_openai_vision/__init__.py,sha256=SiWXJv3xF2cmMTYoqoQ-IG7e9b1IwDKstqyF0_DhwKY,52
|
|
2
|
-
pyconverters_openai_vision/openai_utils.py,sha256=HRJ6sJg88en66gkQbOpQKh7cbwtfoAwVLNh7JQSA9ps,5014
|
|
3
|
-
pyconverters_openai_vision/openai_vision.py,sha256=ACpeOEFNphqKceQqyWHwpP6PvuOOig8qTTHwImq34j8,10445
|
|
4
|
-
pyconverters_openai_vision-0.5.18.dist-info/entry_points.txt,sha256=-DS1gRUTf08Fjb79S_8sqCaqxBifC3q3EJZqXXdcf7Q,197
|
|
5
|
-
pyconverters_openai_vision-0.5.18.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
|
|
6
|
-
pyconverters_openai_vision-0.5.18.dist-info/METADATA,sha256=eEkj7GS21qyJulma5ScRH8EWau6Vw5kErZMW__LT0eM,2636
|
|
7
|
-
pyconverters_openai_vision-0.5.18.dist-info/RECORD,,
|
{pyconverters_openai_vision-0.5.18.dist-info → pyconverters_openai_vision-0.5.22.dist-info}/WHEEL
RENAMED
|
File without changes
|