pyconverters-openai_vision 0.5.18__tar.gz → 0.5.22__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pyconverters_openai_vision-0.5.18 → pyconverters_openai_vision-0.5.22}/PKG-INFO +2 -1
- {pyconverters_openai_vision-0.5.18 → pyconverters_openai_vision-0.5.22}/pyconverters_openai_vision/__init__.py +1 -1
- {pyconverters_openai_vision-0.5.18 → pyconverters_openai_vision-0.5.22}/pyconverters_openai_vision/openai_utils.py +86 -24
- pyconverters_openai_vision-0.5.22/pyconverters_openai_vision/openai_vision.py +466 -0
- {pyconverters_openai_vision-0.5.18 → pyconverters_openai_vision-0.5.22}/pyproject.toml +4 -1
- {pyconverters_openai_vision-0.5.18 → pyconverters_openai_vision-0.5.22}/setup.py +7 -2
- pyconverters_openai_vision-0.5.22/tests/data/ENG product fact files_general offer_2025_30pages.json +274 -0
- pyconverters_openai_vision-0.5.22/tests/data/ENG product fact files_general offer_2025_30pages_alts.json +214 -0
- pyconverters_openai_vision-0.5.22/tests/data/ENG product fact files_general offer_2025_30pages_descs.json +133 -0
- pyconverters_openai_vision-0.5.22/tests/data/PC_Kairntech_LLM_v1.md.json +16 -0
- {pyconverters_openai_vision-0.5.18 → pyconverters_openai_vision-0.5.22}/tests/test_openai_vision.py +25 -10
- pyconverters_openai_vision-0.5.18/pyconverters_openai_vision/openai_vision.py +0 -251
- pyconverters_openai_vision-0.5.18/tests/data/Sodexo_URD_2023_FR - 4p.pdf +0 -0
- {pyconverters_openai_vision-0.5.18 → pyconverters_openai_vision-0.5.22}/.dockerignore +0 -0
- {pyconverters_openai_vision-0.5.18 → pyconverters_openai_vision-0.5.22}/.gitignore +0 -0
- {pyconverters_openai_vision-0.5.18 → pyconverters_openai_vision-0.5.22}/Dockerfile +0 -0
- {pyconverters_openai_vision-0.5.18 → pyconverters_openai_vision-0.5.22}/Jenkinsfile +0 -0
- {pyconverters_openai_vision-0.5.18 → pyconverters_openai_vision-0.5.22}/README.md +0 -0
- {pyconverters_openai_vision-0.5.18 → pyconverters_openai_vision-0.5.22}/bumpversion.py +0 -0
- {pyconverters_openai_vision-0.5.18 → pyconverters_openai_vision-0.5.22}/tests/__init__.py +0 -0
- {pyconverters_openai_vision-0.5.18 → pyconverters_openai_vision-0.5.22}/tests/data/colducoq.jpg +0 -0
- {pyconverters_openai_vision-0.5.18 → pyconverters_openai_vision-0.5.22}/tests/data/webinar.png +0 -0
- {pyconverters_openai_vision-0.5.18 → pyconverters_openai_vision-0.5.22}/tox.ini +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pyconverters-openai_vision
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.22
|
|
4
4
|
Summary: OpenAIVision converter
|
|
5
5
|
Home-page: https://kairntech.com/
|
|
6
6
|
Author: Olivier Terrier
|
|
@@ -27,6 +27,7 @@ Classifier: Programming Language :: Python :: 3.8
|
|
|
27
27
|
Classifier: Topic :: Internet :: WWW/HTTP :: HTTP Servers
|
|
28
28
|
Classifier: Topic :: Internet :: WWW/HTTP
|
|
29
29
|
Requires-Dist: pymultirole-plugins>=0.5.0,<0.6.0
|
|
30
|
+
Requires-Dist: httpx<0.28
|
|
30
31
|
Requires-Dist: openai==1.9.0
|
|
31
32
|
Requires-Dist: Jinja2
|
|
32
33
|
Requires-Dist: tenacity
|
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
"""OpenAIVision converter"""
|
|
2
|
-
__version__ = "0.5.
|
|
2
|
+
__version__ = "0.5.22"
|
|
@@ -6,31 +6,63 @@ from openai import OpenAI
|
|
|
6
6
|
from openai.lib.azure import AzureOpenAI
|
|
7
7
|
from pymultirole_plugins.util import comma_separated_to_list
|
|
8
8
|
from strenum import StrEnum
|
|
9
|
+
import time
|
|
10
|
+
from openai._base_client import SyncHttpxClientWrapper
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class OAuthToken:
|
|
14
|
+
access_token: str = None
|
|
15
|
+
token_expiry: str = None
|
|
16
|
+
|
|
9
17
|
|
|
10
18
|
logger = Logger("pymultirole")
|
|
11
19
|
DEFAULT_CHAT_GPT_MODEL = "gpt-4o-mini"
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
20
|
+
OPENAI_MAX_RETRIES = int(os.getenv("OPENAI_MAX_RETRIES", 2))
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def check_litellm_defined():
|
|
24
|
+
LITELLM_OPENAI_API_KEY = os.getenv("LITELLM_OPENAI_API_KEY", None)
|
|
25
|
+
if LITELLM_OPENAI_API_KEY:
|
|
26
|
+
os.environ["OPENAI_API_KEY"] = LITELLM_OPENAI_API_KEY
|
|
27
|
+
LITELLM_OPENAI_API_BASE = os.getenv("LITELLM_OPENAI_API_BASE", None)
|
|
28
|
+
if LITELLM_OPENAI_API_BASE:
|
|
29
|
+
os.environ["OPENAI_API_BASE"] = LITELLM_OPENAI_API_BASE
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def get_api_key(prefix, oauth_token):
|
|
33
|
+
if not prefix.startswith("APOLLO"):
|
|
34
|
+
api_key = os.getenv(prefix + "OPENAI_API_KEY")
|
|
35
|
+
elif oauth_token.access_token is None or time.time() + 100 > oauth_token.token_expiry:
|
|
36
|
+
client_id = os.getenv("APOLLO_CLIENT_ID")
|
|
37
|
+
client_secret = os.getenv("APOLLO_CLIENT_SECRET")
|
|
38
|
+
token_url = os.getenv("APOLLO_OAUTH")
|
|
39
|
+
if not client_id or not client_secret or not token_url:
|
|
40
|
+
raise ValueError("Environment variables for OAuth are not set properly.")
|
|
41
|
+
token_data = {
|
|
42
|
+
"grant_type": "client_credentials",
|
|
43
|
+
"client_id": client_id,
|
|
44
|
+
"client_secret": client_secret,
|
|
45
|
+
}
|
|
46
|
+
verify = not prefix.startswith("APOLLO")
|
|
47
|
+
response = requests.post(token_url, data=token_data, verify=verify)
|
|
48
|
+
response.raise_for_status()
|
|
49
|
+
json_response = response.json()
|
|
50
|
+
oauth_token.access_token = json_response['access_token']
|
|
51
|
+
oauth_token.token_expiry = time.time() + json_response.get('expires_in', 3600)
|
|
52
|
+
api_key = oauth_token.access_token
|
|
53
|
+
else:
|
|
54
|
+
api_key = oauth_token.access_token
|
|
55
|
+
return api_key
|
|
17
56
|
|
|
18
57
|
|
|
19
58
|
# Now use default retry with backoff of openai api
|
|
20
|
-
def openai_chat_completion(prefix, **kwargs):
|
|
21
|
-
client = set_openai(prefix)
|
|
59
|
+
def openai_chat_completion(prefix, oauth_token, base_url, **kwargs):
|
|
60
|
+
client = set_openai(prefix, oauth_token, base_url)
|
|
22
61
|
response = client.chat.completions.create(**kwargs)
|
|
23
62
|
return response
|
|
24
63
|
|
|
25
64
|
|
|
26
|
-
def
|
|
27
|
-
for m in DEEPINFRA_VISION_MODELS:
|
|
28
|
-
if m in model.lower():
|
|
29
|
-
return True
|
|
30
|
-
return False
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
def openai_list_models(prefix, **kwargs):
|
|
65
|
+
def openai_list_models(prefix, oauth_token, base_url, **kwargs):
|
|
34
66
|
def sort_by_created(x):
|
|
35
67
|
if 'created' in x:
|
|
36
68
|
return x['created']
|
|
@@ -42,7 +74,7 @@ def openai_list_models(prefix, **kwargs):
|
|
|
42
74
|
return x.id
|
|
43
75
|
|
|
44
76
|
models = []
|
|
45
|
-
client = set_openai(prefix)
|
|
77
|
+
client = set_openai(prefix, oauth_token, base_url, max_retries=10)
|
|
46
78
|
if prefix.startswith("DEEPINFRA"):
|
|
47
79
|
deepinfra_url = client.base_url
|
|
48
80
|
deepinfra_models = {}
|
|
@@ -75,9 +107,19 @@ def openai_list_models(prefix, **kwargs):
|
|
|
75
107
|
mods = list(
|
|
76
108
|
{m['model_name'] for m in mods if m['task'] == 'text-generation' and m['status'] == 'running'})
|
|
77
109
|
deepinfra_models.update({m: m for m in mods})
|
|
78
|
-
models =
|
|
110
|
+
models = list(deepinfra_models.keys())
|
|
79
111
|
elif prefix.startswith("AZURE"):
|
|
80
112
|
models = comma_separated_to_list(os.getenv(prefix + "OPENAI_DEPLOYMENT_ID", None))
|
|
113
|
+
elif prefix.startswith("APOLLO"):
|
|
114
|
+
apollo_url = client.base_url
|
|
115
|
+
public_models_list_url = f"{apollo_url}models"
|
|
116
|
+
response = requests.get(public_models_list_url, verify=False,
|
|
117
|
+
headers={'Accept': "application/json", 'Authorization': f"Bearer {client.api_key}"})
|
|
118
|
+
if response.ok:
|
|
119
|
+
resp = response.json()
|
|
120
|
+
mods = sorted(resp["data"], key=sort_by_created, reverse=True)
|
|
121
|
+
models = list(
|
|
122
|
+
{m['id'] for m in mods})
|
|
81
123
|
else:
|
|
82
124
|
response = client.models.list(**kwargs)
|
|
83
125
|
models = sorted(response.data, key=sort_by_created, reverse=True)
|
|
@@ -85,20 +127,31 @@ def openai_list_models(prefix, **kwargs):
|
|
|
85
127
|
return models
|
|
86
128
|
|
|
87
129
|
|
|
88
|
-
def set_openai(prefix):
|
|
130
|
+
def set_openai(prefix, oauth_token, base_url, max_retries=OPENAI_MAX_RETRIES):
|
|
131
|
+
api_key = get_api_key(prefix, oauth_token)
|
|
89
132
|
if prefix.startswith("AZURE"):
|
|
90
133
|
client = AzureOpenAI(
|
|
91
134
|
# This is the default and can be omitted
|
|
92
|
-
api_key=
|
|
93
|
-
azure_endpoint=
|
|
135
|
+
api_key=api_key,
|
|
136
|
+
azure_endpoint=base_url,
|
|
94
137
|
api_version=os.getenv(prefix + "OPENAI_API_VERSION", None),
|
|
95
138
|
# azure_deployment=os.getenv(prefix + "OPENAI_DEPLOYMENT_ID", None)
|
|
96
139
|
)
|
|
97
140
|
else:
|
|
141
|
+
# hack to support verify=None for Apollo
|
|
142
|
+
if prefix.startswith("APOLLO"):
|
|
143
|
+
http_client = SyncHttpxClientWrapper(
|
|
144
|
+
base_url="https://api.openai.com/v1" if base_url is None else base_url,
|
|
145
|
+
verify=False,
|
|
146
|
+
)
|
|
147
|
+
else:
|
|
148
|
+
http_client = None
|
|
98
149
|
client = OpenAI(
|
|
99
150
|
# This is the default and can be omitted
|
|
100
|
-
api_key=
|
|
101
|
-
base_url=
|
|
151
|
+
api_key=api_key,
|
|
152
|
+
base_url=base_url,
|
|
153
|
+
http_client=http_client,
|
|
154
|
+
max_retries=max_retries
|
|
102
155
|
)
|
|
103
156
|
return client
|
|
104
157
|
|
|
@@ -107,14 +160,23 @@ def gpt_filter(m: str):
|
|
|
107
160
|
return m.startswith('gpt') and not m.startswith('gpt-3.5-turbo-instruct') and 'vision' not in m
|
|
108
161
|
|
|
109
162
|
|
|
163
|
+
def all_filter(m: str):
|
|
164
|
+
return True
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def apollo_filter(m: str):
|
|
168
|
+
return 'embed' not in m and 'vision' not in m and 'mock' not in m and 'tts' not in m and 'mock' not in m
|
|
169
|
+
|
|
170
|
+
|
|
110
171
|
NO_DEPLOYED_MODELS = 'no deployed models - check API key'
|
|
111
172
|
|
|
112
173
|
|
|
113
|
-
|
|
174
|
+
# @lru_cache(maxsize=None)
|
|
175
|
+
def create_openai_model_enum(name, prefix="", base_url=None, key=all_filter):
|
|
114
176
|
chat_gpt_models = []
|
|
115
177
|
default_chat_gpt_model = None
|
|
116
178
|
try:
|
|
117
|
-
chat_gpt_models = [m for m in openai_list_models(prefix) if key(m)]
|
|
179
|
+
chat_gpt_models = [m for m in openai_list_models(prefix, OAuthToken(), base_url) if key(m)]
|
|
118
180
|
if chat_gpt_models:
|
|
119
181
|
default_chat_gpt_model = DEFAULT_CHAT_GPT_MODEL if DEFAULT_CHAT_GPT_MODEL in chat_gpt_models else \
|
|
120
182
|
chat_gpt_models[0]
|
|
@@ -0,0 +1,466 @@
|
|
|
1
|
+
import base64
|
|
2
|
+
import os
|
|
3
|
+
import re
|
|
4
|
+
from enum import Enum
|
|
5
|
+
from logging import Logger
|
|
6
|
+
from re import Pattern
|
|
7
|
+
from typing import List, cast, Type, Dict, Any, Optional
|
|
8
|
+
|
|
9
|
+
import filetype as filetype
|
|
10
|
+
from log_with_context import add_logging_context
|
|
11
|
+
from pydantic import Field, BaseModel
|
|
12
|
+
from pymultirole_plugins.v1.converter import ConverterParameters, ConverterBase
|
|
13
|
+
from pymultirole_plugins.v1.processor import ProcessorParameters, ProcessorBase
|
|
14
|
+
from pymultirole_plugins.v1.schema import Document, AltText
|
|
15
|
+
from starlette.datastructures import UploadFile
|
|
16
|
+
|
|
17
|
+
from .openai_utils import create_openai_model_enum, openai_chat_completion, gpt_filter, \
|
|
18
|
+
NO_DEPLOYED_MODELS, OAuthToken, all_filter, check_litellm_defined
|
|
19
|
+
|
|
20
|
+
logger = Logger("pymultirole")
|
|
21
|
+
SHOW_INTERNAL = bool(os.getenv("SHOW_INTERNAL", "false"))
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class OpenAIVisionBaseParameters(ConverterParameters):
|
|
25
|
+
base_url: str = Field(
|
|
26
|
+
None,
|
|
27
|
+
description="""OpenAI endpoint base url""", extra="advanced"
|
|
28
|
+
)
|
|
29
|
+
model_str: str = Field(
|
|
30
|
+
None, extra="advanced"
|
|
31
|
+
)
|
|
32
|
+
model: str = Field(
|
|
33
|
+
None, extra="internal"
|
|
34
|
+
)
|
|
35
|
+
prompt: str = Field(
|
|
36
|
+
"""If the attached file is an image: describe the image.""",
|
|
37
|
+
description="""Contains the prompt as a string""",
|
|
38
|
+
extra="multiline",
|
|
39
|
+
)
|
|
40
|
+
max_tokens: int = Field(
|
|
41
|
+
16384,
|
|
42
|
+
description="""The maximum number of tokens to generate in the completion.
|
|
43
|
+
The token count of your prompt plus max_tokens cannot exceed the model's context length.
|
|
44
|
+
Most models have a context length of 2048 tokens (except for the newest models, which support 4096).""",
|
|
45
|
+
)
|
|
46
|
+
system_prompt: str = Field(
|
|
47
|
+
None,
|
|
48
|
+
description="""Contains the system prompt""",
|
|
49
|
+
extra="multiline,advanced",
|
|
50
|
+
)
|
|
51
|
+
temperature: float = Field(
|
|
52
|
+
0.1,
|
|
53
|
+
description="""What sampling temperature to use, between 0 and 2.
|
|
54
|
+
Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
|
|
55
|
+
We generally recommend altering this or `top_p` but not both.""",
|
|
56
|
+
extra="advanced",
|
|
57
|
+
)
|
|
58
|
+
top_p: int = Field(
|
|
59
|
+
1,
|
|
60
|
+
description="""An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass.
|
|
61
|
+
So 0.1 means only the tokens comprising the top 10% probability mass are considered.
|
|
62
|
+
We generally recommend altering this or `temperature` but not both.""",
|
|
63
|
+
extra="advanced",
|
|
64
|
+
)
|
|
65
|
+
n: int = Field(
|
|
66
|
+
1,
|
|
67
|
+
description="""How many completions to generate for each prompt.
|
|
68
|
+
Note: Because this parameter generates many completions, it can quickly consume your token quota.
|
|
69
|
+
Use carefully and ensure that you have reasonable settings for `max_tokens`.""",
|
|
70
|
+
extra="advanced",
|
|
71
|
+
)
|
|
72
|
+
best_of: int = Field(
|
|
73
|
+
1,
|
|
74
|
+
description="""Generates best_of completions server-side and returns the "best" (the one with the highest log probability per token).
|
|
75
|
+
Results cannot be streamed.
|
|
76
|
+
When used with `n`, `best_of` controls the number of candidate completions and `n` specifies how many to return – `best_of` must be greater than `n`.
|
|
77
|
+
Use carefully and ensure that you have reasonable settings for `max_tokens`.""",
|
|
78
|
+
extra="advanced",
|
|
79
|
+
)
|
|
80
|
+
presence_penalty: float = Field(
|
|
81
|
+
0.0,
|
|
82
|
+
description="""Number between -2.0 and 2.0.
|
|
83
|
+
Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.""",
|
|
84
|
+
extra="advanced",
|
|
85
|
+
)
|
|
86
|
+
frequency_penalty: float = Field(
|
|
87
|
+
0.0,
|
|
88
|
+
description="""Number between -2.0 and 2.0.
|
|
89
|
+
Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.""",
|
|
90
|
+
extra="advanced",
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
class OpenAIVisionModel(str, Enum):
|
|
95
|
+
gpt_4o_mini = "gpt-4o-mini"
|
|
96
|
+
gpt_4o = "gpt-4o"
|
|
97
|
+
gpt_4_1 = "gpt-4.1"
|
|
98
|
+
gpt_4_1_mini = "gpt-4.1-mini"
|
|
99
|
+
gpt_4_1_nano = "gpt-4.1-nano"
|
|
100
|
+
gpt_5 = "gpt-5"
|
|
101
|
+
gpt_5_mini = "gpt-5-mini"
|
|
102
|
+
gpt_5_nano = "gpt-5-nano"
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
check_litellm_defined()
|
|
106
|
+
OPENAI_PREFIX = ""
|
|
107
|
+
OPENAI_API_BASE = os.getenv(OPENAI_PREFIX + "OPENAI_API_BASE", None)
|
|
108
|
+
CHAT_GPT_MODEL_ENUM, DEFAULT_CHAT_GPT_MODEL = create_openai_model_enum('OpenAIModel2', prefix=OPENAI_PREFIX,
|
|
109
|
+
base_url=OPENAI_API_BASE,
|
|
110
|
+
key=gpt_filter if OPENAI_API_BASE is None else all_filter)
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
class OpenAIVisionParameters(OpenAIVisionBaseParameters):
|
|
114
|
+
base_url: Optional[str] = Field(
|
|
115
|
+
os.getenv(OPENAI_PREFIX + "OPENAI_API_BASE", None),
|
|
116
|
+
description="""OpenAI endpoint base url""", extra="advanced"
|
|
117
|
+
)
|
|
118
|
+
model: CHAT_GPT_MODEL_ENUM = Field(
|
|
119
|
+
DEFAULT_CHAT_GPT_MODEL,
|
|
120
|
+
description="""The [OpenAI model](https://platform.openai.com/docs/models) used for completion.""",
|
|
121
|
+
extra="pipeline-naming-hint"
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
DEEPINFRA_PREFIX = "DEEPINFRA_"
|
|
126
|
+
DEEPINFRA_OPENAI_API_BASE = os.getenv(DEEPINFRA_PREFIX + "OPENAI_API_BASE", None)
|
|
127
|
+
DEEPINFRA_CHAT_GPT_MODEL_ENUM, DEEPINFRA_DEFAULT_CHAT_GPT_MODEL = create_openai_model_enum('DeepInfraOpenAIModel',
|
|
128
|
+
prefix=DEEPINFRA_PREFIX,
|
|
129
|
+
base_url=DEEPINFRA_OPENAI_API_BASE)
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
class DeepInfraOpenAIVisionParameters(OpenAIVisionBaseParameters):
|
|
133
|
+
base_url: str = Field(
|
|
134
|
+
os.getenv(DEEPINFRA_PREFIX + "OPENAI_API_BASE", None),
|
|
135
|
+
description="""OpenAI endpoint base url""", extra="advanced"
|
|
136
|
+
)
|
|
137
|
+
model: DEEPINFRA_CHAT_GPT_MODEL_ENUM = Field(
|
|
138
|
+
None,
|
|
139
|
+
description="""The [DeepInfra 'OpenAI compatible' model](https://deepinfra.com/models?type=text-generation) used for completion. It must be deployed on your [DeepInfra dashboard](https://deepinfra.com/dash).""",
|
|
140
|
+
extra="pipeline-naming-hint"
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
class OpenAIVisionConverterBase(ConverterBase):
|
|
145
|
+
__doc__ = """Generate text using [OpenAI Text Completion](https://platform.openai.com/docs/guides/completion) API
|
|
146
|
+
You input some text as a prompt, and the model will generate a text completion that attempts to match whatever context or pattern you gave it."""
|
|
147
|
+
PREFIX: str = ""
|
|
148
|
+
oauth_token: OAuthToken = OAuthToken()
|
|
149
|
+
|
|
150
|
+
def compute_args(self, params: OpenAIVisionBaseParameters, source: UploadFile, kind
|
|
151
|
+
) -> Dict[str, Any]:
|
|
152
|
+
data = source.file.read()
|
|
153
|
+
rv = base64.b64encode(data)
|
|
154
|
+
if kind.mime.startswith("image"):
|
|
155
|
+
binary_block = {
|
|
156
|
+
"type": "image_url",
|
|
157
|
+
"image_url": {
|
|
158
|
+
"url": f"data:image/jpeg;base64,{rv.decode('utf-8')}"
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
messages = [{"role": "system", "content": params.system_prompt}] if params.system_prompt is not None else []
|
|
162
|
+
messages.append({"role": "user",
|
|
163
|
+
"content": [
|
|
164
|
+
{
|
|
165
|
+
"type": "text",
|
|
166
|
+
"text": params.prompt
|
|
167
|
+
},
|
|
168
|
+
binary_block
|
|
169
|
+
]})
|
|
170
|
+
kwargs = {
|
|
171
|
+
'model': params.model_str,
|
|
172
|
+
'messages': messages,
|
|
173
|
+
'max_tokens': params.max_tokens,
|
|
174
|
+
'temperature': params.temperature,
|
|
175
|
+
'top_p': params.top_p,
|
|
176
|
+
'n': params.n,
|
|
177
|
+
'frequency_penalty': params.frequency_penalty,
|
|
178
|
+
'presence_penalty': params.presence_penalty,
|
|
179
|
+
}
|
|
180
|
+
return kwargs
|
|
181
|
+
|
|
182
|
+
def compute_result(self, base_url, **kwargs):
|
|
183
|
+
pattern: Pattern = re.compile(r"```(?:markdown\s+)?(\W.*?)```", re.DOTALL)
|
|
184
|
+
"""Regex pattern to parse the output."""
|
|
185
|
+
response = openai_chat_completion(self.PREFIX, self.oauth_token, base_url, **kwargs)
|
|
186
|
+
contents = []
|
|
187
|
+
result = None
|
|
188
|
+
for choice in response.choices:
|
|
189
|
+
if choice.message.content:
|
|
190
|
+
if "```" in choice.message.content:
|
|
191
|
+
action_match = pattern.search(choice.message.content)
|
|
192
|
+
if action_match is not None:
|
|
193
|
+
contents.append(action_match.group(1).strip())
|
|
194
|
+
else:
|
|
195
|
+
contents.append(choice.message.content)
|
|
196
|
+
if contents:
|
|
197
|
+
result = "\n".join(contents)
|
|
198
|
+
return result
|
|
199
|
+
|
|
200
|
+
def convert(self, source: UploadFile, parameters: ConverterParameters) \
|
|
201
|
+
-> List[Document]:
|
|
202
|
+
|
|
203
|
+
params: OpenAIVisionBaseParameters = cast(
|
|
204
|
+
OpenAIVisionBaseParameters, parameters
|
|
205
|
+
)
|
|
206
|
+
OPENAI_MODEL = os.getenv(self.PREFIX + "OPENAI_MODEL", None)
|
|
207
|
+
if OPENAI_MODEL:
|
|
208
|
+
params.model_str = OPENAI_MODEL
|
|
209
|
+
doc = None
|
|
210
|
+
try:
|
|
211
|
+
kind = filetype.guess(source.file)
|
|
212
|
+
source.file.seek(0)
|
|
213
|
+
if kind.mime.startswith("image"):
|
|
214
|
+
result = None
|
|
215
|
+
kwargs = self.compute_args(params, source, kind)
|
|
216
|
+
if kwargs['model'] != NO_DEPLOYED_MODELS:
|
|
217
|
+
result = self.compute_result(params.base_url, **kwargs)
|
|
218
|
+
if result:
|
|
219
|
+
doc = Document(identifier=source.filename, text=result)
|
|
220
|
+
doc.properties = {"fileName": source.filename}
|
|
221
|
+
except BaseException as err:
|
|
222
|
+
raise err
|
|
223
|
+
if doc is None:
|
|
224
|
+
raise TypeError(f"Conversion of file {source.filename} failed")
|
|
225
|
+
return [doc]
|
|
226
|
+
|
|
227
|
+
@classmethod
|
|
228
|
+
def get_model(cls) -> Type[BaseModel]:
|
|
229
|
+
return OpenAIVisionBaseParameters
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
class OpenAIVisionConverter(OpenAIVisionConverterBase):
|
|
233
|
+
__doc__ = """Convert audio using [OpenAI Audio](https://platform.openai.com/docs/guides/speech-to-text) API"""
|
|
234
|
+
|
|
235
|
+
def convert(self, source: UploadFile, parameters: ConverterParameters) \
|
|
236
|
+
-> List[Document]:
|
|
237
|
+
params: OpenAIVisionParameters = cast(
|
|
238
|
+
OpenAIVisionParameters, parameters
|
|
239
|
+
)
|
|
240
|
+
model_str = params.model_str if bool(params.model_str and params.model_str.strip()) else None
|
|
241
|
+
model = params.model.value if params.model is not None else None
|
|
242
|
+
params.model_str = model_str or model
|
|
243
|
+
return super().convert(source, params)
|
|
244
|
+
|
|
245
|
+
@classmethod
|
|
246
|
+
def get_model(cls) -> Type[BaseModel]:
|
|
247
|
+
return OpenAIVisionParameters
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
class DeepInfraOpenAIVisionConverter(OpenAIVisionConverterBase):
|
|
251
|
+
__doc__ = """Convert images using [DeepInfra Vision](https://deepinfra.com/docs/tutorials/whisper) API"""
|
|
252
|
+
PREFIX = DEEPINFRA_PREFIX
|
|
253
|
+
|
|
254
|
+
def convert(self, source: UploadFile, parameters: ConverterParameters) \
|
|
255
|
+
-> List[Document]:
|
|
256
|
+
params: DeepInfraOpenAIVisionParameters = cast(
|
|
257
|
+
DeepInfraOpenAIVisionParameters, parameters
|
|
258
|
+
)
|
|
259
|
+
model_str = params.model_str if bool(params.model_str and params.model_str.strip()) else None
|
|
260
|
+
model = params.model.value if params.model is not None else None
|
|
261
|
+
params.model_str = model_str or model
|
|
262
|
+
return super().convert(source, params)
|
|
263
|
+
|
|
264
|
+
@classmethod
|
|
265
|
+
def get_model(cls) -> Type[BaseModel]:
|
|
266
|
+
return DeepInfraOpenAIVisionParameters
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
def guess_kind(base64_src):
|
|
270
|
+
kind = None
|
|
271
|
+
img_regex = r"data:(image/[^;]+);base64"
|
|
272
|
+
matches = re.search(img_regex, base64_src)
|
|
273
|
+
if matches:
|
|
274
|
+
mime = matches.group(1)
|
|
275
|
+
kind = filetype.get_type(mime)
|
|
276
|
+
return kind
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
class OpenAIVisionProcessorBaseParameters(OpenAIVisionBaseParameters):
|
|
280
|
+
replace_refs_altTexts_by_descriptions: bool = Field(
|
|
281
|
+
False, extra="advanced"
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
class OpenAIVisionProcessorBase(ProcessorBase):
|
|
286
|
+
__doc__ = """Generate text using [OpenAI Text Completion](https://platform.openai.com/docs/guides/completion) API
|
|
287
|
+
You input some text as a prompt, and the model will generate a text completion that attempts to match whatever context or pattern you gave it."""
|
|
288
|
+
PREFIX: str = ""
|
|
289
|
+
oauth_token: OAuthToken = OAuthToken()
|
|
290
|
+
|
|
291
|
+
def compute_args(self, params: OpenAIVisionBaseParameters, source: str, kind
|
|
292
|
+
) -> Dict[str, Any]:
|
|
293
|
+
if kind.mime.startswith("image"):
|
|
294
|
+
binary_block = {
|
|
295
|
+
"type": "image_url",
|
|
296
|
+
"image_url": {
|
|
297
|
+
"url": source
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
messages = [{"role": "system", "content": params.system_prompt}] if params.system_prompt is not None else []
|
|
301
|
+
messages.append({"role": "user",
|
|
302
|
+
"content": [
|
|
303
|
+
{
|
|
304
|
+
"type": "text",
|
|
305
|
+
"text": params.prompt
|
|
306
|
+
},
|
|
307
|
+
binary_block
|
|
308
|
+
]})
|
|
309
|
+
kwargs = {
|
|
310
|
+
'model': params.model_str,
|
|
311
|
+
'messages': messages,
|
|
312
|
+
'max_tokens': params.max_tokens,
|
|
313
|
+
'temperature': params.temperature,
|
|
314
|
+
'top_p': params.top_p,
|
|
315
|
+
'n': params.n,
|
|
316
|
+
'frequency_penalty': params.frequency_penalty,
|
|
317
|
+
'presence_penalty': params.presence_penalty,
|
|
318
|
+
}
|
|
319
|
+
return kwargs
|
|
320
|
+
|
|
321
|
+
def compute_result(self, base_url, **kwargs):
|
|
322
|
+
pattern: Pattern = re.compile(r"```(?:markdown\s+)?(\W.*?)```", re.DOTALL)
|
|
323
|
+
"""Regex pattern to parse the output."""
|
|
324
|
+
response = openai_chat_completion(self.PREFIX, self.oauth_token, base_url, **kwargs)
|
|
325
|
+
contents = []
|
|
326
|
+
result = None
|
|
327
|
+
for choice in response.choices:
|
|
328
|
+
if choice.message.content:
|
|
329
|
+
if "```" in choice.message.content:
|
|
330
|
+
action_match = pattern.search(choice.message.content)
|
|
331
|
+
if action_match is not None:
|
|
332
|
+
contents.append(action_match.group(1).strip())
|
|
333
|
+
else:
|
|
334
|
+
contents.append(choice.message.content)
|
|
335
|
+
if contents:
|
|
336
|
+
result = "\n".join(contents)
|
|
337
|
+
return result
|
|
338
|
+
|
|
339
|
+
def process(
|
|
340
|
+
self, documents: List[Document], parameters: ProcessorParameters
|
|
341
|
+
) -> List[Document]:
|
|
342
|
+
# supported_languages = comma_separated_to_list(SUPPORTED_LANGUAGES)
|
|
343
|
+
|
|
344
|
+
params: OpenAIVisionProcessorBaseParameters = cast(
|
|
345
|
+
OpenAIVisionProcessorBaseParameters, parameters
|
|
346
|
+
)
|
|
347
|
+
OPENAI_MODEL = os.getenv(self.PREFIX + "OPENAI_MODEL", None)
|
|
348
|
+
if OPENAI_MODEL:
|
|
349
|
+
params.model_str = OPENAI_MODEL
|
|
350
|
+
try:
|
|
351
|
+
for document in documents:
|
|
352
|
+
with add_logging_context(docid=document.identifier):
|
|
353
|
+
if document.altTexts:
|
|
354
|
+
altTexts = document.altTexts
|
|
355
|
+
alts = {altText.name: altText.text for altText in document.altTexts}
|
|
356
|
+
anames = list(alts.keys())
|
|
357
|
+
for aname in anames:
|
|
358
|
+
atext = alts[aname]
|
|
359
|
+
result = None
|
|
360
|
+
kind = guess_kind(atext)
|
|
361
|
+
if kind is not None and kind.mime.startswith("image"):
|
|
362
|
+
kwargs = self.compute_args(params, atext, kind)
|
|
363
|
+
if kwargs['model'] != NO_DEPLOYED_MODELS:
|
|
364
|
+
result = self.compute_result(params.base_url, **kwargs)
|
|
365
|
+
if result is not None and isinstance(result, str):
|
|
366
|
+
alts[aname] = result
|
|
367
|
+
else:
|
|
368
|
+
del alts[aname]
|
|
369
|
+
if alts:
|
|
370
|
+
document.altTexts = []
|
|
371
|
+
|
|
372
|
+
if params.replace_refs_altTexts_by_descriptions:
|
|
373
|
+
text = document.text
|
|
374
|
+
link_regex = r"!\[([^]]+)\]\(([^]]+)\)"
|
|
375
|
+
|
|
376
|
+
def convert_links(matchobj):
|
|
377
|
+
m = matchobj.group(0)
|
|
378
|
+
m_id = matchobj.group(1)
|
|
379
|
+
if m_id in alts:
|
|
380
|
+
m_desc = alts[m_id]
|
|
381
|
+
return f"{m}\n___\n{m_desc}\n___\n"
|
|
382
|
+
return m
|
|
383
|
+
|
|
384
|
+
ptext = re.sub(link_regex, convert_links, text, 0,
|
|
385
|
+
re.MULTILINE)
|
|
386
|
+
document.text = ptext
|
|
387
|
+
for altText in altTexts:
|
|
388
|
+
if altText.name not in alts:
|
|
389
|
+
document.altTexts.append(altText)
|
|
390
|
+
else:
|
|
391
|
+
for altText in altTexts:
|
|
392
|
+
if altText.name in alts:
|
|
393
|
+
document.altTexts.append(AltText(name=altText.name, text=alts[altText.name]))
|
|
394
|
+
else:
|
|
395
|
+
document.altTexts.append(altText)
|
|
396
|
+
|
|
397
|
+
except BaseException as err:
|
|
398
|
+
raise err
|
|
399
|
+
return documents
|
|
400
|
+
|
|
401
|
+
@classmethod
|
|
402
|
+
def get_model(cls) -> Type[BaseModel]:
|
|
403
|
+
return OpenAIVisionProcessorBaseParameters
|
|
404
|
+
|
|
405
|
+
|
|
406
|
+
class OpenAIVisionProcessorParameters(OpenAIVisionProcessorBaseParameters):
|
|
407
|
+
base_url: Optional[str] = Field(
|
|
408
|
+
os.getenv(OPENAI_PREFIX + "OPENAI_API_BASE", None),
|
|
409
|
+
description="""OpenAI endpoint base url""", extra="advanced"
|
|
410
|
+
)
|
|
411
|
+
model: CHAT_GPT_MODEL_ENUM = Field(
|
|
412
|
+
DEFAULT_CHAT_GPT_MODEL,
|
|
413
|
+
description="""The [OpenAI model](https://platform.openai.com/docs/models) used for completion.""",
|
|
414
|
+
extra="pipeline-naming-hint"
|
|
415
|
+
)
|
|
416
|
+
|
|
417
|
+
|
|
418
|
+
class OpenAIVisionProcessor(OpenAIVisionProcessorBase):
|
|
419
|
+
__doc__ = """Convert audio using [OpenAI Audio](https://platform.openai.com/docs/guides/speech-to-text) API"""
|
|
420
|
+
|
|
421
|
+
def process(
|
|
422
|
+
self, documents: List[Document], parameters: ProcessorParameters
|
|
423
|
+
) -> List[Document]:
|
|
424
|
+
params: OpenAIVisionParameters = cast(
|
|
425
|
+
OpenAIVisionParameters, parameters
|
|
426
|
+
)
|
|
427
|
+
model_str = params.model_str if bool(params.model_str and params.model_str.strip()) else None
|
|
428
|
+
model = params.model.value if params.model is not None else None
|
|
429
|
+
params.model_str = model_str or model
|
|
430
|
+
return super().process(documents, params)
|
|
431
|
+
|
|
432
|
+
@classmethod
|
|
433
|
+
def get_model(cls) -> Type[BaseModel]:
|
|
434
|
+
return OpenAIVisionProcessorParameters
|
|
435
|
+
|
|
436
|
+
|
|
437
|
+
class DeepInfraOpenAIVisionProcessorParameters(OpenAIVisionProcessorBaseParameters):
|
|
438
|
+
base_url: str = Field(
|
|
439
|
+
os.getenv(DEEPINFRA_PREFIX + "OPENAI_API_BASE", None),
|
|
440
|
+
description="""OpenAI endpoint base url""", extra="advanced"
|
|
441
|
+
)
|
|
442
|
+
model: DEEPINFRA_CHAT_GPT_MODEL_ENUM = Field(
|
|
443
|
+
None,
|
|
444
|
+
description="""The [DeepInfra 'OpenAI compatible' model](https://deepinfra.com/models?type=text-generation) used for completion. It must be deployed on your [DeepInfra dashboard](https://deepinfra.com/dash).""",
|
|
445
|
+
extra="pipeline-naming-hint"
|
|
446
|
+
)
|
|
447
|
+
|
|
448
|
+
|
|
449
|
+
class DeepInfraOpenAIVisionProcessor(OpenAIVisionProcessorBase):
|
|
450
|
+
__doc__ = """Convert images using [DeepInfra Vision](https://deepinfra.com/docs/tutorials/whisper) API"""
|
|
451
|
+
PREFIX = DEEPINFRA_PREFIX
|
|
452
|
+
|
|
453
|
+
def process(
|
|
454
|
+
self, documents: List[Document], parameters: ProcessorParameters
|
|
455
|
+
) -> List[Document]:
|
|
456
|
+
params: DeepInfraOpenAIVisionParameters = cast(
|
|
457
|
+
DeepInfraOpenAIVisionParameters, parameters
|
|
458
|
+
)
|
|
459
|
+
model_str = params.model_str if bool(params.model_str and params.model_str.strip()) else None
|
|
460
|
+
model = params.model.value if params.model is not None else None
|
|
461
|
+
params.model_str = model_str or model
|
|
462
|
+
return super().process(documents, params)
|
|
463
|
+
|
|
464
|
+
@classmethod
|
|
465
|
+
def get_model(cls) -> Type[BaseModel]:
|
|
466
|
+
return DeepInfraOpenAIVisionProcessorParameters
|
|
@@ -30,6 +30,7 @@ classifiers = [
|
|
|
30
30
|
]
|
|
31
31
|
requires = [
|
|
32
32
|
"pymultirole-plugins>=0.5.0,<0.6.0",
|
|
33
|
+
"httpx<0.28",
|
|
33
34
|
"openai==1.9.0",
|
|
34
35
|
"Jinja2",
|
|
35
36
|
"tenacity",
|
|
@@ -46,7 +47,9 @@ requires-python = ">=3.8"
|
|
|
46
47
|
openai_vision = "pyconverters_openai_vision.openai_vision:OpenAIVisionConverter"
|
|
47
48
|
deepinfra_openai_vision = "pyconverters_openai_vision.openai_vision:DeepInfraOpenAIVisionConverter"
|
|
48
49
|
# azure_openai_vision = "pyconverters_openai_vision.openai_vision:AzureOpenAIVisionConverter"
|
|
49
|
-
|
|
50
|
+
[tool.flit.entrypoints."pyprocessors.plugins"]
|
|
51
|
+
openai_vision = "pyconverters_openai_vision.openai_vision:OpenAIVisionProcessor"
|
|
52
|
+
deepinfra_openai_vision = "pyconverters_openai_vision.openai_vision:DeepInfraOpenAIVisionProcessor"
|
|
50
53
|
|
|
51
54
|
[tool.flit.metadata.requires-extra]
|
|
52
55
|
test = [
|