pyconverters-openai_vision 0.5.18__py3-none-any.whl → 0.5.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,2 +1,2 @@
1
1
  """OpenAIVision converter"""
2
- __version__ = "0.5.18"
2
+ __version__ = "0.5.22"
@@ -6,31 +6,63 @@ from openai import OpenAI
6
6
  from openai.lib.azure import AzureOpenAI
7
7
  from pymultirole_plugins.util import comma_separated_to_list
8
8
  from strenum import StrEnum
9
+ import time
10
+ from openai._base_client import SyncHttpxClientWrapper
11
+
12
+
13
+ class OAuthToken:
14
+ access_token: str = None
15
+ token_expiry: str = None
16
+
9
17
 
10
18
  logger = Logger("pymultirole")
11
19
  DEFAULT_CHAT_GPT_MODEL = "gpt-4o-mini"
12
- DEEPINFRA_VISION_MODELS = [
13
- "vision",
14
- "llava",
15
- "pixtral"
16
- ]
20
+ OPENAI_MAX_RETRIES = int(os.getenv("OPENAI_MAX_RETRIES", 2))
21
+
22
+
23
+ def check_litellm_defined():
24
+ LITELLM_OPENAI_API_KEY = os.getenv("LITELLM_OPENAI_API_KEY", None)
25
+ if LITELLM_OPENAI_API_KEY:
26
+ os.environ["OPENAI_API_KEY"] = LITELLM_OPENAI_API_KEY
27
+ LITELLM_OPENAI_API_BASE = os.getenv("LITELLM_OPENAI_API_BASE", None)
28
+ if LITELLM_OPENAI_API_BASE:
29
+ os.environ["OPENAI_API_BASE"] = LITELLM_OPENAI_API_BASE
30
+
31
+
32
+ def get_api_key(prefix, oauth_token):
33
+ if not prefix.startswith("APOLLO"):
34
+ api_key = os.getenv(prefix + "OPENAI_API_KEY")
35
+ elif oauth_token.access_token is None or time.time() + 100 > oauth_token.token_expiry:
36
+ client_id = os.getenv("APOLLO_CLIENT_ID")
37
+ client_secret = os.getenv("APOLLO_CLIENT_SECRET")
38
+ token_url = os.getenv("APOLLO_OAUTH")
39
+ if not client_id or not client_secret or not token_url:
40
+ raise ValueError("Environment variables for OAuth are not set properly.")
41
+ token_data = {
42
+ "grant_type": "client_credentials",
43
+ "client_id": client_id,
44
+ "client_secret": client_secret,
45
+ }
46
+ verify = not prefix.startswith("APOLLO")
47
+ response = requests.post(token_url, data=token_data, verify=verify)
48
+ response.raise_for_status()
49
+ json_response = response.json()
50
+ oauth_token.access_token = json_response['access_token']
51
+ oauth_token.token_expiry = time.time() + json_response.get('expires_in', 3600)
52
+ api_key = oauth_token.access_token
53
+ else:
54
+ api_key = oauth_token.access_token
55
+ return api_key
17
56
 
18
57
 
19
58
  # Now use default retry with backoff of openai api
20
- def openai_chat_completion(prefix, **kwargs):
21
- client = set_openai(prefix)
59
+ def openai_chat_completion(prefix, oauth_token, base_url, **kwargs):
60
+ client = set_openai(prefix, oauth_token, base_url)
22
61
  response = client.chat.completions.create(**kwargs)
23
62
  return response
24
63
 
25
64
 
26
- def is_vision_model(model):
27
- for m in DEEPINFRA_VISION_MODELS:
28
- if m in model.lower():
29
- return True
30
- return False
31
-
32
-
33
- def openai_list_models(prefix, **kwargs):
65
+ def openai_list_models(prefix, oauth_token, base_url, **kwargs):
34
66
  def sort_by_created(x):
35
67
  if 'created' in x:
36
68
  return x['created']
@@ -42,7 +74,7 @@ def openai_list_models(prefix, **kwargs):
42
74
  return x.id
43
75
 
44
76
  models = []
45
- client = set_openai(prefix)
77
+ client = set_openai(prefix, oauth_token, base_url, max_retries=10)
46
78
  if prefix.startswith("DEEPINFRA"):
47
79
  deepinfra_url = client.base_url
48
80
  deepinfra_models = {}
@@ -75,9 +107,19 @@ def openai_list_models(prefix, **kwargs):
75
107
  mods = list(
76
108
  {m['model_name'] for m in mods if m['task'] == 'text-generation' and m['status'] == 'running'})
77
109
  deepinfra_models.update({m: m for m in mods})
78
- models = [m for m in deepinfra_models.keys() if is_vision_model(m)]
110
+ models = list(deepinfra_models.keys())
79
111
  elif prefix.startswith("AZURE"):
80
112
  models = comma_separated_to_list(os.getenv(prefix + "OPENAI_DEPLOYMENT_ID", None))
113
+ elif prefix.startswith("APOLLO"):
114
+ apollo_url = client.base_url
115
+ public_models_list_url = f"{apollo_url}models"
116
+ response = requests.get(public_models_list_url, verify=False,
117
+ headers={'Accept': "application/json", 'Authorization': f"Bearer {client.api_key}"})
118
+ if response.ok:
119
+ resp = response.json()
120
+ mods = sorted(resp["data"], key=sort_by_created, reverse=True)
121
+ models = list(
122
+ {m['id'] for m in mods})
81
123
  else:
82
124
  response = client.models.list(**kwargs)
83
125
  models = sorted(response.data, key=sort_by_created, reverse=True)
@@ -85,20 +127,31 @@ def openai_list_models(prefix, **kwargs):
85
127
  return models
86
128
 
87
129
 
88
- def set_openai(prefix):
130
+ def set_openai(prefix, oauth_token, base_url, max_retries=OPENAI_MAX_RETRIES):
131
+ api_key = get_api_key(prefix, oauth_token)
89
132
  if prefix.startswith("AZURE"):
90
133
  client = AzureOpenAI(
91
134
  # This is the default and can be omitted
92
- api_key=os.getenv(prefix + "OPENAI_API_KEY"),
93
- azure_endpoint=os.getenv(prefix + "OPENAI_API_BASE", None),
135
+ api_key=api_key,
136
+ azure_endpoint=base_url,
94
137
  api_version=os.getenv(prefix + "OPENAI_API_VERSION", None),
95
138
  # azure_deployment=os.getenv(prefix + "OPENAI_DEPLOYMENT_ID", None)
96
139
  )
97
140
  else:
141
+ # hack to support verify=None for Apollo
142
+ if prefix.startswith("APOLLO"):
143
+ http_client = SyncHttpxClientWrapper(
144
+ base_url="https://api.openai.com/v1" if base_url is None else base_url,
145
+ verify=False,
146
+ )
147
+ else:
148
+ http_client = None
98
149
  client = OpenAI(
99
150
  # This is the default and can be omitted
100
- api_key=os.getenv(prefix + "OPENAI_API_KEY"),
101
- base_url=os.getenv(prefix + "OPENAI_API_BASE", None)
151
+ api_key=api_key,
152
+ base_url=base_url,
153
+ http_client=http_client,
154
+ max_retries=max_retries
102
155
  )
103
156
  return client
104
157
 
@@ -107,14 +160,23 @@ def gpt_filter(m: str):
107
160
  return m.startswith('gpt') and not m.startswith('gpt-3.5-turbo-instruct') and 'vision' not in m
108
161
 
109
162
 
163
+ def all_filter(m: str):
164
+ return True
165
+
166
+
167
+ def apollo_filter(m: str):
168
+ return 'embed' not in m and 'vision' not in m and 'mock' not in m and 'tts' not in m and 'mock' not in m
169
+
170
+
110
171
  NO_DEPLOYED_MODELS = 'no deployed models - check API key'
111
172
 
112
173
 
113
- def create_openai_model_enum(name, prefix="", key=lambda m: m):
174
+ # @lru_cache(maxsize=None)
175
+ def create_openai_model_enum(name, prefix="", base_url=None, key=all_filter):
114
176
  chat_gpt_models = []
115
177
  default_chat_gpt_model = None
116
178
  try:
117
- chat_gpt_models = [m for m in openai_list_models(prefix) if key(m)]
179
+ chat_gpt_models = [m for m in openai_list_models(prefix, OAuthToken(), base_url) if key(m)]
118
180
  if chat_gpt_models:
119
181
  default_chat_gpt_model = DEFAULT_CHAT_GPT_MODEL if DEFAULT_CHAT_GPT_MODEL in chat_gpt_models else \
120
182
  chat_gpt_models[0]
@@ -4,30 +4,36 @@ import re
4
4
  from enum import Enum
5
5
  from logging import Logger
6
6
  from re import Pattern
7
- from typing import List, cast, Type, Dict, Any
7
+ from typing import List, cast, Type, Dict, Any, Optional
8
8
 
9
9
  import filetype as filetype
10
+ from log_with_context import add_logging_context
10
11
  from pydantic import Field, BaseModel
11
12
  from pymultirole_plugins.v1.converter import ConverterParameters, ConverterBase
12
- from pymultirole_plugins.v1.schema import Document
13
+ from pymultirole_plugins.v1.processor import ProcessorParameters, ProcessorBase
14
+ from pymultirole_plugins.v1.schema import Document, AltText
13
15
  from starlette.datastructures import UploadFile
14
16
 
15
- from .openai_utils import NO_DEPLOYED_MODELS, \
16
- openai_chat_completion, create_openai_model_enum
17
+ from .openai_utils import create_openai_model_enum, openai_chat_completion, gpt_filter, \
18
+ NO_DEPLOYED_MODELS, OAuthToken, all_filter, check_litellm_defined
17
19
 
18
20
  logger = Logger("pymultirole")
21
+ SHOW_INTERNAL = bool(os.getenv("SHOW_INTERNAL", "false"))
19
22
 
20
23
 
21
24
  class OpenAIVisionBaseParameters(ConverterParameters):
25
+ base_url: str = Field(
26
+ None,
27
+ description="""OpenAI endpoint base url""", extra="advanced"
28
+ )
22
29
  model_str: str = Field(
23
- None, extra="internal"
30
+ None, extra="advanced"
24
31
  )
25
32
  model: str = Field(
26
33
  None, extra="internal"
27
34
  )
28
35
  prompt: str = Field(
29
- """If the attached file is an image: describe the image with a lot of details.",
30
- If the attached file is a PDF document: convert the PDF document into Markdown format. The output must be just the markdown result without any explanation or introductory prefix.""",
36
+ """If the attached file is an image: describe the image.""",
31
37
  description="""Contains the prompt as a string""",
32
38
  extra="multiline",
33
39
  )
@@ -43,7 +49,7 @@ class OpenAIVisionBaseParameters(ConverterParameters):
43
49
  extra="multiline,advanced",
44
50
  )
45
51
  temperature: float = Field(
46
- 1.0,
52
+ 0.1,
47
53
  description="""What sampling temperature to use, between 0 and 2.
48
54
  Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
49
55
  We generally recommend altering this or `top_p` but not both.""",
@@ -88,47 +94,58 @@ class OpenAIVisionBaseParameters(ConverterParameters):
88
94
  class OpenAIVisionModel(str, Enum):
89
95
  gpt_4o_mini = "gpt-4o-mini"
90
96
  gpt_4o = "gpt-4o"
91
- o3_mini = "o3-mini"
97
+ gpt_4_1 = "gpt-4.1"
98
+ gpt_4_1_mini = "gpt-4.1-mini"
99
+ gpt_4_1_nano = "gpt-4.1-nano"
100
+ gpt_5 = "gpt-5"
101
+ gpt_5_mini = "gpt-5-mini"
102
+ gpt_5_nano = "gpt-5-nano"
92
103
 
93
104
 
94
- class OpenAIVisionParameters(OpenAIVisionBaseParameters):
95
- model: OpenAIVisionModel = Field(
96
- OpenAIVisionModel.gpt_4o_mini,
97
- description="""The [OpenAI model](https://platform.openai.com/docs/models) used for vision. Options currently available:</br>
105
+ check_litellm_defined()
106
+ OPENAI_PREFIX = ""
107
+ OPENAI_API_BASE = os.getenv(OPENAI_PREFIX + "OPENAI_API_BASE", None)
108
+ CHAT_GPT_MODEL_ENUM, DEFAULT_CHAT_GPT_MODEL = create_openai_model_enum('OpenAIModel2', prefix=OPENAI_PREFIX,
109
+ base_url=OPENAI_API_BASE,
110
+ key=gpt_filter if OPENAI_API_BASE is None else all_filter)
98
111
 
99
- """, extra="pipeline-naming-hint"
112
+
113
+ class OpenAIVisionParameters(OpenAIVisionBaseParameters):
114
+ base_url: Optional[str] = Field(
115
+ os.getenv(OPENAI_PREFIX + "OPENAI_API_BASE", None),
116
+ description="""OpenAI endpoint base url""", extra="advanced"
117
+ )
118
+ model: CHAT_GPT_MODEL_ENUM = Field(
119
+ DEFAULT_CHAT_GPT_MODEL,
120
+ description="""The [OpenAI model](https://platform.openai.com/docs/models) used for completion.""",
121
+ extra="pipeline-naming-hint"
100
122
  )
101
123
 
102
124
 
103
125
  DEEPINFRA_PREFIX = "DEEPINFRA_"
104
- DEEPINFRA_VISION_MODEL_ENUM, DEEPINFRA_DEFAULT_VISION_MODEL = create_openai_model_enum('DeepInfraVisionModel',
105
- prefix=DEEPINFRA_PREFIX)
126
+ DEEPINFRA_OPENAI_API_BASE = os.getenv(DEEPINFRA_PREFIX + "OPENAI_API_BASE", None)
127
+ DEEPINFRA_CHAT_GPT_MODEL_ENUM, DEEPINFRA_DEFAULT_CHAT_GPT_MODEL = create_openai_model_enum('DeepInfraOpenAIModel',
128
+ prefix=DEEPINFRA_PREFIX,
129
+ base_url=DEEPINFRA_OPENAI_API_BASE)
106
130
 
107
131
 
108
132
  class DeepInfraOpenAIVisionParameters(OpenAIVisionBaseParameters):
109
- model: DEEPINFRA_VISION_MODEL_ENUM = Field(
133
+ base_url: str = Field(
134
+ os.getenv(DEEPINFRA_PREFIX + "OPENAI_API_BASE", None),
135
+ description="""OpenAI endpoint base url""", extra="advanced"
136
+ )
137
+ model: DEEPINFRA_CHAT_GPT_MODEL_ENUM = Field(
110
138
  None,
111
- description="""The [DeepInfra 'OpenAI compatible' model](https://deepinfra.com/models?type=automatic-speech-recognition) used for speech to text transcription. It must be deployed on your [DeepInfra dashboard](https://deepinfra.com/dash).
112
- """, extra="pipeline-naming-hint"
139
+ description="""The [DeepInfra 'OpenAI compatible' model](https://deepinfra.com/models?type=text-generation) used for completion. It must be deployed on your [DeepInfra dashboard](https://deepinfra.com/dash).""",
140
+ extra="pipeline-naming-hint"
113
141
  )
114
142
 
115
143
 
116
- # AZURE_PREFIX = "AZURE_"
117
- #
118
- #
119
- # class AzureOpenAIVisionParameters(OpenAIVisionBaseParameters):
120
- # model: OpenAIVisionModel = Field(
121
- # OpenAIVisionModel.whisper_1,
122
- # description="""The [Azure OpenAI model](https://platform.openai.com/docs/models) used for speech to text transcription. Options currently available:</br>
123
- # <li>`whisper-1` - state-of-the-art open source large-v2 Whisper model.
124
- # """, extra="pipeline-naming-hint"
125
- # )
126
-
127
-
128
144
  class OpenAIVisionConverterBase(ConverterBase):
129
145
  __doc__ = """Generate text using [OpenAI Text Completion](https://platform.openai.com/docs/guides/completion) API
130
146
  You input some text as a prompt, and the model will generate a text completion that attempts to match whatever context or pattern you gave it."""
131
147
  PREFIX: str = ""
148
+ oauth_token: OAuthToken = OAuthToken()
132
149
 
133
150
  def compute_args(self, params: OpenAIVisionBaseParameters, source: UploadFile, kind
134
151
  ) -> Dict[str, Any]:
@@ -141,13 +158,6 @@ class OpenAIVisionConverterBase(ConverterBase):
141
158
  "url": f"data:image/jpeg;base64,{rv.decode('utf-8')}"
142
159
  }
143
160
  }
144
- else:
145
- binary_block = {
146
- "type": "file",
147
- "file": {
148
- "filename": source.filename,
149
- "file_data": f"data:application/pdf;base64,{rv.decode('utf-8')}"}
150
- }
151
161
  messages = [{"role": "system", "content": params.system_prompt}] if params.system_prompt is not None else []
152
162
  messages.append({"role": "user",
153
163
  "content": [
@@ -169,11 +179,12 @@ class OpenAIVisionConverterBase(ConverterBase):
169
179
  }
170
180
  return kwargs
171
181
 
172
- def compute_result(self, **kwargs):
182
+ def compute_result(self, base_url, **kwargs):
173
183
  pattern: Pattern = re.compile(r"```(?:markdown\s+)?(\W.*?)```", re.DOTALL)
174
184
  """Regex pattern to parse the output."""
175
- response = openai_chat_completion(self.PREFIX, **kwargs)
185
+ response = openai_chat_completion(self.PREFIX, self.oauth_token, base_url, **kwargs)
176
186
  contents = []
187
+ result = None
177
188
  for choice in response.choices:
178
189
  if choice.message.content:
179
190
  if "```" in choice.message.content:
@@ -199,11 +210,11 @@ class OpenAIVisionConverterBase(ConverterBase):
199
210
  try:
200
211
  kind = filetype.guess(source.file)
201
212
  source.file.seek(0)
202
- if kind.mime.startswith("image") or kind.mime.endswith("pdf"):
213
+ if kind.mime.startswith("image"):
203
214
  result = None
204
215
  kwargs = self.compute_args(params, source, kind)
205
216
  if kwargs['model'] != NO_DEPLOYED_MODELS:
206
- result = self.compute_result(**kwargs)
217
+ result = self.compute_result(params.base_url, **kwargs)
207
218
  if result:
208
219
  doc = Document(identifier=source.filename, text=result)
209
220
  doc.properties = {"fileName": source.filename}
@@ -226,7 +237,9 @@ class OpenAIVisionConverter(OpenAIVisionConverterBase):
226
237
  params: OpenAIVisionParameters = cast(
227
238
  OpenAIVisionParameters, parameters
228
239
  )
229
- params.model_str = params.model.value
240
+ model_str = params.model_str if bool(params.model_str and params.model_str.strip()) else None
241
+ model = params.model.value if params.model is not None else None
242
+ params.model_str = model_str or model
230
243
  return super().convert(source, params)
231
244
 
232
245
  @classmethod
@@ -243,9 +256,211 @@ class DeepInfraOpenAIVisionConverter(OpenAIVisionConverterBase):
243
256
  params: DeepInfraOpenAIVisionParameters = cast(
244
257
  DeepInfraOpenAIVisionParameters, parameters
245
258
  )
246
- params.model_str = params.model.value
259
+ model_str = params.model_str if bool(params.model_str and params.model_str.strip()) else None
260
+ model = params.model.value if params.model is not None else None
261
+ params.model_str = model_str or model
247
262
  return super().convert(source, params)
248
263
 
249
264
  @classmethod
250
265
  def get_model(cls) -> Type[BaseModel]:
251
266
  return DeepInfraOpenAIVisionParameters
267
+
268
+
269
+ def guess_kind(base64_src):
270
+ kind = None
271
+ img_regex = r"data:(image/[^;]+);base64"
272
+ matches = re.search(img_regex, base64_src)
273
+ if matches:
274
+ mime = matches.group(1)
275
+ kind = filetype.get_type(mime)
276
+ return kind
277
+
278
+
279
+ class OpenAIVisionProcessorBaseParameters(OpenAIVisionBaseParameters):
280
+ replace_refs_altTexts_by_descriptions: bool = Field(
281
+ False, extra="advanced"
282
+ )
283
+
284
+
285
+ class OpenAIVisionProcessorBase(ProcessorBase):
286
+ __doc__ = """Generate text using [OpenAI Text Completion](https://platform.openai.com/docs/guides/completion) API
287
+ You input some text as a prompt, and the model will generate a text completion that attempts to match whatever context or pattern you gave it."""
288
+ PREFIX: str = ""
289
+ oauth_token: OAuthToken = OAuthToken()
290
+
291
+ def compute_args(self, params: OpenAIVisionBaseParameters, source: str, kind
292
+ ) -> Dict[str, Any]:
293
+ if kind.mime.startswith("image"):
294
+ binary_block = {
295
+ "type": "image_url",
296
+ "image_url": {
297
+ "url": source
298
+ }
299
+ }
300
+ messages = [{"role": "system", "content": params.system_prompt}] if params.system_prompt is not None else []
301
+ messages.append({"role": "user",
302
+ "content": [
303
+ {
304
+ "type": "text",
305
+ "text": params.prompt
306
+ },
307
+ binary_block
308
+ ]})
309
+ kwargs = {
310
+ 'model': params.model_str,
311
+ 'messages': messages,
312
+ 'max_tokens': params.max_tokens,
313
+ 'temperature': params.temperature,
314
+ 'top_p': params.top_p,
315
+ 'n': params.n,
316
+ 'frequency_penalty': params.frequency_penalty,
317
+ 'presence_penalty': params.presence_penalty,
318
+ }
319
+ return kwargs
320
+
321
+ def compute_result(self, base_url, **kwargs):
322
+ pattern: Pattern = re.compile(r"```(?:markdown\s+)?(\W.*?)```", re.DOTALL)
323
+ """Regex pattern to parse the output."""
324
+ response = openai_chat_completion(self.PREFIX, self.oauth_token, base_url, **kwargs)
325
+ contents = []
326
+ result = None
327
+ for choice in response.choices:
328
+ if choice.message.content:
329
+ if "```" in choice.message.content:
330
+ action_match = pattern.search(choice.message.content)
331
+ if action_match is not None:
332
+ contents.append(action_match.group(1).strip())
333
+ else:
334
+ contents.append(choice.message.content)
335
+ if contents:
336
+ result = "\n".join(contents)
337
+ return result
338
+
339
+ def process(
340
+ self, documents: List[Document], parameters: ProcessorParameters
341
+ ) -> List[Document]:
342
+ # supported_languages = comma_separated_to_list(SUPPORTED_LANGUAGES)
343
+
344
+ params: OpenAIVisionProcessorBaseParameters = cast(
345
+ OpenAIVisionProcessorBaseParameters, parameters
346
+ )
347
+ OPENAI_MODEL = os.getenv(self.PREFIX + "OPENAI_MODEL", None)
348
+ if OPENAI_MODEL:
349
+ params.model_str = OPENAI_MODEL
350
+ try:
351
+ for document in documents:
352
+ with add_logging_context(docid=document.identifier):
353
+ if document.altTexts:
354
+ altTexts = document.altTexts
355
+ alts = {altText.name: altText.text for altText in document.altTexts}
356
+ anames = list(alts.keys())
357
+ for aname in anames:
358
+ atext = alts[aname]
359
+ result = None
360
+ kind = guess_kind(atext)
361
+ if kind is not None and kind.mime.startswith("image"):
362
+ kwargs = self.compute_args(params, atext, kind)
363
+ if kwargs['model'] != NO_DEPLOYED_MODELS:
364
+ result = self.compute_result(params.base_url, **kwargs)
365
+ if result is not None and isinstance(result, str):
366
+ alts[aname] = result
367
+ else:
368
+ del alts[aname]
369
+ if alts:
370
+ document.altTexts = []
371
+
372
+ if params.replace_refs_altTexts_by_descriptions:
373
+ text = document.text
374
+ link_regex = r"!\[([^]]+)\]\(([^]]+)\)"
375
+
376
+ def convert_links(matchobj):
377
+ m = matchobj.group(0)
378
+ m_id = matchobj.group(1)
379
+ if m_id in alts:
380
+ m_desc = alts[m_id]
381
+ return f"{m}\n___\n{m_desc}\n___\n"
382
+ return m
383
+
384
+ ptext = re.sub(link_regex, convert_links, text, 0,
385
+ re.MULTILINE)
386
+ document.text = ptext
387
+ for altText in altTexts:
388
+ if altText.name not in alts:
389
+ document.altTexts.append(altText)
390
+ else:
391
+ for altText in altTexts:
392
+ if altText.name in alts:
393
+ document.altTexts.append(AltText(name=altText.name, text=alts[altText.name]))
394
+ else:
395
+ document.altTexts.append(altText)
396
+
397
+ except BaseException as err:
398
+ raise err
399
+ return documents
400
+
401
+ @classmethod
402
+ def get_model(cls) -> Type[BaseModel]:
403
+ return OpenAIVisionProcessorBaseParameters
404
+
405
+
406
+ class OpenAIVisionProcessorParameters(OpenAIVisionProcessorBaseParameters):
407
+ base_url: Optional[str] = Field(
408
+ os.getenv(OPENAI_PREFIX + "OPENAI_API_BASE", None),
409
+ description="""OpenAI endpoint base url""", extra="advanced"
410
+ )
411
+ model: CHAT_GPT_MODEL_ENUM = Field(
412
+ DEFAULT_CHAT_GPT_MODEL,
413
+ description="""The [OpenAI model](https://platform.openai.com/docs/models) used for completion.""",
414
+ extra="pipeline-naming-hint"
415
+ )
416
+
417
+
418
+ class OpenAIVisionProcessor(OpenAIVisionProcessorBase):
419
+ __doc__ = """Convert audio using [OpenAI Audio](https://platform.openai.com/docs/guides/speech-to-text) API"""
420
+
421
+ def process(
422
+ self, documents: List[Document], parameters: ProcessorParameters
423
+ ) -> List[Document]:
424
+ params: OpenAIVisionParameters = cast(
425
+ OpenAIVisionParameters, parameters
426
+ )
427
+ model_str = params.model_str if bool(params.model_str and params.model_str.strip()) else None
428
+ model = params.model.value if params.model is not None else None
429
+ params.model_str = model_str or model
430
+ return super().process(documents, params)
431
+
432
+ @classmethod
433
+ def get_model(cls) -> Type[BaseModel]:
434
+ return OpenAIVisionProcessorParameters
435
+
436
+
437
+ class DeepInfraOpenAIVisionProcessorParameters(OpenAIVisionProcessorBaseParameters):
438
+ base_url: str = Field(
439
+ os.getenv(DEEPINFRA_PREFIX + "OPENAI_API_BASE", None),
440
+ description="""OpenAI endpoint base url""", extra="advanced"
441
+ )
442
+ model: DEEPINFRA_CHAT_GPT_MODEL_ENUM = Field(
443
+ None,
444
+ description="""The [DeepInfra 'OpenAI compatible' model](https://deepinfra.com/models?type=text-generation) used for completion. It must be deployed on your [DeepInfra dashboard](https://deepinfra.com/dash).""",
445
+ extra="pipeline-naming-hint"
446
+ )
447
+
448
+
449
+ class DeepInfraOpenAIVisionProcessor(OpenAIVisionProcessorBase):
450
+ __doc__ = """Convert images using [DeepInfra Vision](https://deepinfra.com/docs/tutorials/whisper) API"""
451
+ PREFIX = DEEPINFRA_PREFIX
452
+
453
+ def process(
454
+ self, documents: List[Document], parameters: ProcessorParameters
455
+ ) -> List[Document]:
456
+ params: DeepInfraOpenAIVisionParameters = cast(
457
+ DeepInfraOpenAIVisionParameters, parameters
458
+ )
459
+ model_str = params.model_str if bool(params.model_str and params.model_str.strip()) else None
460
+ model = params.model.value if params.model is not None else None
461
+ params.model_str = model_str or model
462
+ return super().process(documents, params)
463
+
464
+ @classmethod
465
+ def get_model(cls) -> Type[BaseModel]:
466
+ return DeepInfraOpenAIVisionProcessorParameters
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pyconverters-openai_vision
3
- Version: 0.5.18
3
+ Version: 0.5.22
4
4
  Summary: OpenAIVision converter
5
5
  Home-page: https://kairntech.com/
6
6
  Author: Olivier Terrier
@@ -27,6 +27,7 @@ Classifier: Programming Language :: Python :: 3.8
27
27
  Classifier: Topic :: Internet :: WWW/HTTP :: HTTP Servers
28
28
  Classifier: Topic :: Internet :: WWW/HTTP
29
29
  Requires-Dist: pymultirole-plugins>=0.5.0,<0.6.0
30
+ Requires-Dist: httpx<0.28
30
31
  Requires-Dist: openai==1.9.0
31
32
  Requires-Dist: Jinja2
32
33
  Requires-Dist: tenacity
@@ -0,0 +1,7 @@
1
+ pyconverters_openai_vision/__init__.py,sha256=DVZWqJHpQcrIA6rtKEN3C-qJc4k9cH846SAacAXLaYY,52
2
+ pyconverters_openai_vision/openai_utils.py,sha256=XI4WYZ-EAVG0Vxd5yUDuZNDgEzqHJeriScxTUusi1oo,7740
3
+ pyconverters_openai_vision/openai_vision.py,sha256=PW_JnekYyE4_XVba6tRs0jwFF1wXbf5zfy1yF2p-BzQ,20014
4
+ pyconverters_openai_vision-0.5.22.dist-info/entry_points.txt,sha256=KLlvDTMJjHy0fk6mvTXFNpn0pC8UKsTJLd9wre9SOHw,394
5
+ pyconverters_openai_vision-0.5.22.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
6
+ pyconverters_openai_vision-0.5.22.dist-info/METADATA,sha256=C5srCJu7yQnlmRRORntBdmAcOhHdjRGAPWBrUiCKlY0,2662
7
+ pyconverters_openai_vision-0.5.22.dist-info/RECORD,,
@@ -2,3 +2,7 @@
2
2
  deepinfra_openai_vision=pyconverters_openai_vision.openai_vision:DeepInfraOpenAIVisionConverter
3
3
  openai_vision=pyconverters_openai_vision.openai_vision:OpenAIVisionConverter
4
4
 
5
+ [pyprocessors.plugins]
6
+ deepinfra_openai_vision=pyconverters_openai_vision.openai_vision:DeepInfraOpenAIVisionProcessor
7
+ openai_vision=pyconverters_openai_vision.openai_vision:OpenAIVisionProcessor
8
+
@@ -1,7 +0,0 @@
1
- pyconverters_openai_vision/__init__.py,sha256=SiWXJv3xF2cmMTYoqoQ-IG7e9b1IwDKstqyF0_DhwKY,52
2
- pyconverters_openai_vision/openai_utils.py,sha256=HRJ6sJg88en66gkQbOpQKh7cbwtfoAwVLNh7JQSA9ps,5014
3
- pyconverters_openai_vision/openai_vision.py,sha256=ACpeOEFNphqKceQqyWHwpP6PvuOOig8qTTHwImq34j8,10445
4
- pyconverters_openai_vision-0.5.18.dist-info/entry_points.txt,sha256=-DS1gRUTf08Fjb79S_8sqCaqxBifC3q3EJZqXXdcf7Q,197
5
- pyconverters_openai_vision-0.5.18.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
6
- pyconverters_openai_vision-0.5.18.dist-info/METADATA,sha256=eEkj7GS21qyJulma5ScRH8EWau6Vw5kErZMW__LT0eM,2636
7
- pyconverters_openai_vision-0.5.18.dist-info/RECORD,,