pyconverters-openai_vision 0.5.3__tar.gz → 0.5.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pyconverters_openai_vision-0.5.3 → pyconverters_openai_vision-0.5.7}/Dockerfile +1 -1
- {pyconverters_openai_vision-0.5.3 → pyconverters_openai_vision-0.5.7}/Jenkinsfile +7 -7
- {pyconverters_openai_vision-0.5.3 → pyconverters_openai_vision-0.5.7}/PKG-INFO +2 -2
- {pyconverters_openai_vision-0.5.3 → pyconverters_openai_vision-0.5.7}/pyconverters_openai_vision/__init__.py +1 -1
- {pyconverters_openai_vision-0.5.3 → pyconverters_openai_vision-0.5.7}/pyconverters_openai_vision/openai_vision.py +36 -15
- {pyconverters_openai_vision-0.5.3 → pyconverters_openai_vision-0.5.7}/setup.py +1 -1
- pyconverters_openai_vision-0.5.7/tests/data/Sodexo_URD_2023_FR - 4p.pdf +0 -0
- {pyconverters_openai_vision-0.5.3 → pyconverters_openai_vision-0.5.7}/tests/test_openai_vision.py +38 -1
- {pyconverters_openai_vision-0.5.3 → pyconverters_openai_vision-0.5.7}/.dockerignore +0 -0
- {pyconverters_openai_vision-0.5.3 → pyconverters_openai_vision-0.5.7}/.gitignore +0 -0
- {pyconverters_openai_vision-0.5.3 → pyconverters_openai_vision-0.5.7}/README.md +0 -0
- {pyconverters_openai_vision-0.5.3 → pyconverters_openai_vision-0.5.7}/bumpversion.py +0 -0
- {pyconverters_openai_vision-0.5.3 → pyconverters_openai_vision-0.5.7}/pyconverters_openai_vision/openai_utils.py +0 -0
- {pyconverters_openai_vision-0.5.3 → pyconverters_openai_vision-0.5.7}/pyproject.toml +0 -0
- {pyconverters_openai_vision-0.5.3 → pyconverters_openai_vision-0.5.7}/tests/__init__.py +0 -0
- {pyconverters_openai_vision-0.5.3 → pyconverters_openai_vision-0.5.7}/tests/data/colducoq.jpg +0 -0
- {pyconverters_openai_vision-0.5.3 → pyconverters_openai_vision-0.5.7}/tests/data/webinar.png +0 -0
- {pyconverters_openai_vision-0.5.3 → pyconverters_openai_vision-0.5.7}/tox.ini +0 -0
|
@@ -176,7 +176,7 @@ pipeline {
|
|
|
176
176
|
}
|
|
177
177
|
if (sendEmailNotif("${PATH_HOME}/${JOB_NAME}", "${BUILD_NUMBER}")) {
|
|
178
178
|
println 'sending Success Build notification'
|
|
179
|
-
CUSTOM_SUBJECT = '[CI - Jenkinzz SUCCESS] ' + CUSTOM_SUBJECT
|
|
179
|
+
def CUSTOM_SUBJECT = '[CI - Jenkinzz SUCCESS] ' + CUSTOM_SUBJECT
|
|
180
180
|
emailext(
|
|
181
181
|
mimeType: 'text/html',
|
|
182
182
|
subject: CUSTOM_SUBJECT,
|
|
@@ -211,7 +211,7 @@ pipeline {
|
|
|
211
211
|
println 'Exception occurred: ' + e.toString()
|
|
212
212
|
}
|
|
213
213
|
println 'sending Failure Build notification'
|
|
214
|
-
CUSTOM_SUBJECT = '[CI - Jenkinzz FAILURE] ' + CUSTOM_SUBJECT
|
|
214
|
+
def CUSTOM_SUBJECT = '[CI - Jenkinzz FAILURE] ' + CUSTOM_SUBJECT
|
|
215
215
|
emailext(
|
|
216
216
|
mimeType: 'text/html',
|
|
217
217
|
subject: CUSTOM_SUBJECT,
|
|
@@ -235,7 +235,7 @@ pipeline {
|
|
|
235
235
|
|
|
236
236
|
// return FLIT_USERNAME from given file
|
|
237
237
|
def getUserName(path) {
|
|
238
|
-
USERNAME = sh(
|
|
238
|
+
def USERNAME = sh(
|
|
239
239
|
script: "grep FLIT_USERNAME ${path}|cut -d '=' -f2",
|
|
240
240
|
returnStdout: true
|
|
241
241
|
).trim()
|
|
@@ -244,7 +244,7 @@ def getUserName(path) {
|
|
|
244
244
|
|
|
245
245
|
// return FLIT_PASSWORD from given file
|
|
246
246
|
def getUserPass(path) {
|
|
247
|
-
USERPASS = sh(
|
|
247
|
+
def USERPASS = sh(
|
|
248
248
|
script: "grep FLIT_PASSWORD ${path}|cut -d '=' -f2",
|
|
249
249
|
returnStdout: true
|
|
250
250
|
).trim()
|
|
@@ -264,11 +264,11 @@ def switchEmailNotif(toggle, build) {
|
|
|
264
264
|
|
|
265
265
|
// return true if emailNotif file present
|
|
266
266
|
boolean sendEmailNotif(path, build) {
|
|
267
|
-
emailNotif = sh(
|
|
267
|
+
def emailNotif = sh(
|
|
268
268
|
script: "find ${path} -name '.emailNotif'|wc -l",
|
|
269
269
|
returnStdout: true
|
|
270
270
|
).trim()
|
|
271
|
-
emailContent = ''
|
|
271
|
+
def emailContent = ''
|
|
272
272
|
if (emailNotif == '1') {
|
|
273
273
|
emailContent = sh(
|
|
274
274
|
script: "cat ${path}/.emailNotif",
|
|
@@ -279,7 +279,7 @@ boolean sendEmailNotif(path, build) {
|
|
|
279
279
|
}
|
|
280
280
|
|
|
281
281
|
def analyseBuildCause() {
|
|
282
|
-
upstreamProjects = ['pymultirole_plugins']
|
|
282
|
+
String[] upstreamProjects = ['pymultirole_plugins']
|
|
283
283
|
boolean upstreamRunning = false
|
|
284
284
|
String jobName
|
|
285
285
|
// iterate over upstreamProjects
|
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
"""OpenAIVision converter"""
|
|
2
|
-
__version__ = "0.5.
|
|
2
|
+
__version__ = "0.5.7"
|
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
import base64
|
|
2
2
|
import os
|
|
3
|
+
import re
|
|
3
4
|
from enum import Enum
|
|
4
5
|
from logging import Logger
|
|
6
|
+
from re import Pattern
|
|
5
7
|
from typing import List, cast, Type, Dict, Any
|
|
6
8
|
|
|
7
9
|
import filetype as filetype
|
|
@@ -24,12 +26,13 @@ class OpenAIVisionBaseParameters(ConverterParameters):
|
|
|
24
26
|
None, extra="internal"
|
|
25
27
|
)
|
|
26
28
|
prompt: str = Field(
|
|
27
|
-
"
|
|
29
|
+
"""If the attached file is an image: describe the image with a lot of details.",
|
|
30
|
+
If the attached file is a PDF document: convert the PDF document into Markdown format. The output must be just the markdown result without any explanation or introductory prefix.""",
|
|
28
31
|
description="""Contains the prompt as a string""",
|
|
29
32
|
extra="multiline",
|
|
30
33
|
)
|
|
31
34
|
max_tokens: int = Field(
|
|
32
|
-
|
|
35
|
+
16384,
|
|
33
36
|
description="""The maximum number of tokens to generate in the completion.
|
|
34
37
|
The token count of your prompt plus max_tokens cannot exceed the model's context length.
|
|
35
38
|
Most models have a context length of 2048 tokens (except for the newest models, which support 4096).""",
|
|
@@ -85,13 +88,14 @@ class OpenAIVisionBaseParameters(ConverterParameters):
|
|
|
85
88
|
class OpenAIVisionModel(str, Enum):
|
|
86
89
|
gpt_4o_mini = "gpt-4o-mini"
|
|
87
90
|
gpt_4o = "gpt-4o"
|
|
91
|
+
o3_mini = "o3-mini"
|
|
88
92
|
|
|
89
93
|
|
|
90
94
|
class OpenAIVisionParameters(OpenAIVisionBaseParameters):
|
|
91
95
|
model: OpenAIVisionModel = Field(
|
|
92
96
|
OpenAIVisionModel.gpt_4o_mini,
|
|
93
|
-
description="""The [OpenAI model](https://platform.openai.com/docs/models) used for
|
|
94
|
-
|
|
97
|
+
description="""The [OpenAI model](https://platform.openai.com/docs/models) used for vision. Options currently available:</br>
|
|
98
|
+
|
|
95
99
|
""", extra="pipeline-naming-hint"
|
|
96
100
|
)
|
|
97
101
|
|
|
@@ -126,10 +130,24 @@ class OpenAIVisionConverterBase(ConverterBase):
|
|
|
126
130
|
You input some text as a prompt, and the model will generate a text completion that attempts to match whatever context or pattern you gave it."""
|
|
127
131
|
PREFIX: str = ""
|
|
128
132
|
|
|
129
|
-
def compute_args(self, params: OpenAIVisionBaseParameters, source: UploadFile
|
|
133
|
+
def compute_args(self, params: OpenAIVisionBaseParameters, source: UploadFile, kind
|
|
130
134
|
) -> Dict[str, Any]:
|
|
131
135
|
data = source.file.read()
|
|
132
136
|
rv = base64.b64encode(data)
|
|
137
|
+
if kind.mime.startswith("image"):
|
|
138
|
+
binary_block = {
|
|
139
|
+
"type": "image_url",
|
|
140
|
+
"image_url": {
|
|
141
|
+
"url": f"data:image/jpeg;base64,{rv.decode('utf-8')}"
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
else:
|
|
145
|
+
binary_block = {
|
|
146
|
+
"type": "file",
|
|
147
|
+
"file": {
|
|
148
|
+
"filename": source.filename,
|
|
149
|
+
"file_data": f"data:application/pdf;base64,{rv.decode('utf-8')}"}
|
|
150
|
+
}
|
|
133
151
|
messages = [{"role": "system", "content": params.system_prompt}] if params.system_prompt is not None else []
|
|
134
152
|
messages.append({"role": "user",
|
|
135
153
|
"content": [
|
|
@@ -137,12 +155,8 @@ class OpenAIVisionConverterBase(ConverterBase):
|
|
|
137
155
|
"type": "text",
|
|
138
156
|
"text": params.prompt
|
|
139
157
|
},
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
"image_url": {
|
|
143
|
-
"url": f"data:image/jpeg;base64,{rv.decode('utf-8')}"
|
|
144
|
-
}
|
|
145
|
-
}]})
|
|
158
|
+
binary_block
|
|
159
|
+
]})
|
|
146
160
|
kwargs = {
|
|
147
161
|
'model': params.model_str,
|
|
148
162
|
'messages': messages,
|
|
@@ -156,11 +170,18 @@ class OpenAIVisionConverterBase(ConverterBase):
|
|
|
156
170
|
return kwargs
|
|
157
171
|
|
|
158
172
|
def compute_result(self, **kwargs):
|
|
173
|
+
pattern: Pattern = re.compile(r"```(?:markdown\s+)?(\W.*?)```", re.DOTALL)
|
|
174
|
+
"""Regex pattern to parse the output."""
|
|
159
175
|
response = openai_chat_completion(self.PREFIX, **kwargs)
|
|
160
176
|
contents = []
|
|
161
177
|
for choice in response.choices:
|
|
162
178
|
if choice.message.content:
|
|
163
|
-
|
|
179
|
+
if "```" in choice.message.content:
|
|
180
|
+
action_match = pattern.search(choice.message.content)
|
|
181
|
+
if action_match is not None:
|
|
182
|
+
contents.append(action_match.group(1).strip())
|
|
183
|
+
else:
|
|
184
|
+
contents.append(choice.message.content)
|
|
164
185
|
if contents:
|
|
165
186
|
result = "\n".join(contents)
|
|
166
187
|
return result
|
|
@@ -178,9 +199,9 @@ class OpenAIVisionConverterBase(ConverterBase):
|
|
|
178
199
|
try:
|
|
179
200
|
kind = filetype.guess(source.file)
|
|
180
201
|
source.file.seek(0)
|
|
181
|
-
if kind.mime.startswith("image"):
|
|
202
|
+
if kind.mime.startswith("image") or kind.mime.endswith("pdf"):
|
|
182
203
|
result = None
|
|
183
|
-
kwargs = self.compute_args(params, source)
|
|
204
|
+
kwargs = self.compute_args(params, source, kind)
|
|
184
205
|
if kwargs['model'] != NO_DEPLOYED_MODELS:
|
|
185
206
|
result = self.compute_result(**kwargs)
|
|
186
207
|
if result:
|
|
@@ -189,7 +210,7 @@ class OpenAIVisionConverterBase(ConverterBase):
|
|
|
189
210
|
except BaseException as err:
|
|
190
211
|
raise err
|
|
191
212
|
if doc is None:
|
|
192
|
-
raise TypeError(f"Conversion of
|
|
213
|
+
raise TypeError(f"Conversion of file {source.filename} failed")
|
|
193
214
|
return [doc]
|
|
194
215
|
|
|
195
216
|
@classmethod
|
|
@@ -43,7 +43,7 @@ entry_points = \
|
|
|
43
43
|
'pyconverters_openai_vision.openai_vision:OpenAIVisionConverter']}
|
|
44
44
|
|
|
45
45
|
setup(name='pyconverters-openai_vision',
|
|
46
|
-
version='0.5.
|
|
46
|
+
version='0.5.7',
|
|
47
47
|
description='OpenAIVision converter',
|
|
48
48
|
author='Olivier Terrier',
|
|
49
49
|
author_email='olivier.terrier@kairntech.com',
|
|
Binary file
|
{pyconverters_openai_vision-0.5.3 → pyconverters_openai_vision-0.5.7}/tests/test_openai_vision.py
RENAMED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import os
|
|
1
2
|
from pathlib import Path
|
|
2
3
|
from typing import List
|
|
3
4
|
|
|
@@ -7,7 +8,7 @@ from starlette.datastructures import UploadFile
|
|
|
7
8
|
|
|
8
9
|
from pyconverters_openai_vision.openai_vision import (
|
|
9
10
|
OpenAIVisionConverter,
|
|
10
|
-
OpenAIVisionParameters, DeepInfraOpenAIVisionParameters, DeepInfraOpenAIVisionConverter
|
|
11
|
+
OpenAIVisionParameters, DeepInfraOpenAIVisionParameters, DeepInfraOpenAIVisionConverter, OpenAIVisionModel
|
|
11
12
|
)
|
|
12
13
|
|
|
13
14
|
|
|
@@ -37,6 +38,19 @@ def test_openai():
|
|
|
37
38
|
assert 'kairntech' in doc0.text.lower()
|
|
38
39
|
|
|
39
40
|
|
|
41
|
+
@pytest.mark.skip(reason="Not a test")
|
|
42
|
+
def test_openai_pdf():
|
|
43
|
+
converter = OpenAIVisionConverter()
|
|
44
|
+
parameters = OpenAIVisionParameters(model=OpenAIVisionModel.gpt_4o_mini)
|
|
45
|
+
testdir = Path(__file__).parent
|
|
46
|
+
source = Path(testdir, 'data/Sodexo_URD_2023_FR - 4p.pdf')
|
|
47
|
+
with source.open("rb") as fin:
|
|
48
|
+
docs: List[Document] = converter.convert(UploadFile(source.name, fin, 'application/pdf'), parameters)
|
|
49
|
+
assert len(docs) == 1
|
|
50
|
+
doc0 = docs[0]
|
|
51
|
+
assert 'une performance solide au cours' in doc0.text.lower()
|
|
52
|
+
|
|
53
|
+
|
|
40
54
|
@pytest.mark.skip(reason="Not a test")
|
|
41
55
|
def test_deepinfra():
|
|
42
56
|
converter = DeepInfraOpenAIVisionConverter()
|
|
@@ -55,3 +69,26 @@ def test_deepinfra():
|
|
|
55
69
|
assert len(docs) == 1
|
|
56
70
|
doc0 = docs[0]
|
|
57
71
|
assert 'generative ai' in doc0.text.lower()
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
@pytest.mark.skip(reason="Not a test")
|
|
75
|
+
def test_runpod():
|
|
76
|
+
os.environ["OPENAI_API_BASE"] = "https://api.runpod.ai/v2/vllm-9jnu8ajtktj5ay/openai/v1"
|
|
77
|
+
os.environ["OPENAI_MODEL"] = "mistralai/Pixtral-12B-2409"
|
|
78
|
+
os.environ["OPENAI_API_KEY"] = os.getenv("RUNPOD_API_KEY")
|
|
79
|
+
converter = OpenAIVisionConverter()
|
|
80
|
+
parameters = OpenAIVisionParameters()
|
|
81
|
+
testdir = Path(__file__).parent
|
|
82
|
+
source = Path(testdir, 'data/colducoq.jpg')
|
|
83
|
+
with source.open("rb") as fin:
|
|
84
|
+
docs: List[Document] = converter.convert(UploadFile(source.name, fin, 'image/jpeg'), parameters)
|
|
85
|
+
assert len(docs) == 1
|
|
86
|
+
doc0 = docs[0]
|
|
87
|
+
assert 'dent de crolles' in doc0.text.lower()
|
|
88
|
+
|
|
89
|
+
source = Path(testdir, 'data/webinar.png')
|
|
90
|
+
with source.open("rb") as fin:
|
|
91
|
+
docs: List[Document] = converter.convert(UploadFile(source.name, fin, 'image/png'), parameters)
|
|
92
|
+
assert len(docs) == 1
|
|
93
|
+
doc0 = docs[0]
|
|
94
|
+
assert 'generative ai' in doc0.text.lower()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{pyconverters_openai_vision-0.5.3 → pyconverters_openai_vision-0.5.7}/tests/data/colducoq.jpg
RENAMED
|
File without changes
|
{pyconverters_openai_vision-0.5.3 → pyconverters_openai_vision-0.5.7}/tests/data/webinar.png
RENAMED
|
File without changes
|
|
File without changes
|