PyPI - pyconverters-openai_vision - Versions diffs - 0.5.3__tar.gz → 0.5.7__tar.gz - Mend

pyconverters-openai_vision 0.5.3tar.gz → 0.5.7tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

{pyconverters_openai_vision-0.5.3 → pyconverters_openai_vision-0.5.7}/Dockerfile RENAMED Viewed

@@ -1,4 +1,4 @@
-FROM python:3.8-slim-buster
+FROM python:3.8-slim-bookworm
 # Install prerequisites
 RUN apt-get update -y && \
     apt-get install -y \

{pyconverters_openai_vision-0.5.3 → pyconverters_openai_vision-0.5.7}/Jenkinsfile RENAMED Viewed

@@ -176,7 +176,7 @@ pipeline {
             }
             if (sendEmailNotif("${PATH_HOME}/${JOB_NAME}", "${BUILD_NUMBER}")) {
               println 'sending Success Build notification'
-              CUSTOM_SUBJECT = '[CI - Jenkinzz SUCCESS] ' + CUSTOM_SUBJECT
+              def CUSTOM_SUBJECT = '[CI - Jenkinzz SUCCESS] ' + CUSTOM_SUBJECT
               emailext(
                   mimeType: 'text/html',
                   subject: CUSTOM_SUBJECT,
@@ -211,7 +211,7 @@ pipeline {
               println 'Exception occurred: ' + e.toString()
             }
             println 'sending Failure Build notification'
-            CUSTOM_SUBJECT = '[CI - Jenkinzz FAILURE] ' + CUSTOM_SUBJECT
+            def CUSTOM_SUBJECT = '[CI - Jenkinzz FAILURE] ' + CUSTOM_SUBJECT
             emailext(
                 mimeType: 'text/html',
                 subject: CUSTOM_SUBJECT,
@@ -235,7 +235,7 @@ pipeline {
 // return FLIT_USERNAME from given file
 def getUserName(path) {
-  USERNAME = sh(
+  def USERNAME = sh(
                  script: "grep FLIT_USERNAME ${path}|cut -d '=' -f2",
                  returnStdout: true
                ).trim()
@@ -244,7 +244,7 @@ def getUserName(path) {
 // return FLIT_PASSWORD from given file
 def getUserPass(path) {
-  USERPASS = sh(
+  def USERPASS = sh(
                  script: "grep FLIT_PASSWORD ${path}|cut -d '=' -f2",
                  returnStdout: true
                ).trim()
@@ -264,11 +264,11 @@ def switchEmailNotif(toggle, build) {
 // return true if emailNotif file present
 boolean sendEmailNotif(path, build) {
-  emailNotif = sh(
+  def emailNotif = sh(
                  script: "find ${path} -name '.emailNotif'|wc -l",
                  returnStdout: true
                ).trim()
-  emailContent = ''
+  def emailContent = ''
   if (emailNotif == '1') {
     emailContent = sh(
                      script: "cat ${path}/.emailNotif",
@@ -279,7 +279,7 @@ boolean sendEmailNotif(path, build) {
 }
 def analyseBuildCause() {
-  upstreamProjects = ['pymultirole_plugins']
+  String[] upstreamProjects = ['pymultirole_plugins']
   boolean upstreamRunning = false
   String jobName
   // iterate over upstreamProjects

{pyconverters_openai_vision-0.5.3 → pyconverters_openai_vision-0.5.7}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.4
 Name: pyconverters-openai_vision
-Version: 0.5.3
+Version: 0.5.7
 Summary: OpenAIVision converter
 Home-page: https://kairntech.com/
 Author: Olivier Terrier

{pyconverters_openai_vision-0.5.3 → pyconverters_openai_vision-0.5.7}/pyconverters_openai_vision/__init__.py RENAMED Viewed

@@ -1,2 +1,2 @@
 """OpenAIVision converter"""
-__version__ = "0.5.3"
+__version__ = "0.5.7"

{pyconverters_openai_vision-0.5.3 → pyconverters_openai_vision-0.5.7}/pyconverters_openai_vision/openai_vision.py RENAMED Viewed

@@ -1,7 +1,9 @@
 import base64
 import os
+import re
 from enum import Enum
 from logging import Logger
+from re import Pattern
 from typing import List, cast, Type, Dict, Any
 import filetype as filetype
@@ -24,12 +26,13 @@ class OpenAIVisionBaseParameters(ConverterParameters):
         None, extra="internal"
     )
     prompt: str = Field(
-        "Describe the image with a lot of details",
+        """If the attached file is an image: describe the image with a lot of details.",
+        If the attached file is a PDF document: convert the PDF document into Markdown format. The output must be just the markdown result without any explanation or introductory prefix.""",
         description="""Contains the prompt as a string""",
         extra="multiline",
     )
     max_tokens: int = Field(
-        256,
+        16384,
         description="""The maximum number of tokens to generate in the completion.
     The token count of your prompt plus max_tokens cannot exceed the model's context length.
     Most models have a context length of 2048 tokens (except for the newest models, which support 4096).""",
@@ -85,13 +88,14 @@ class OpenAIVisionBaseParameters(ConverterParameters):
 class OpenAIVisionModel(str, Enum):
     gpt_4o_mini = "gpt-4o-mini"
     gpt_4o = "gpt-4o"
+    o3_mini = "o3-mini"
 class OpenAIVisionParameters(OpenAIVisionBaseParameters):
     model: OpenAIVisionModel = Field(
         OpenAIVisionModel.gpt_4o_mini,
-        description="""The [OpenAI model](https://platform.openai.com/docs/models) used for speech to text transcription. Options currently available:</br>
-                        <li>`whisper-1` - state-of-the-art open source large-v2 Whisper model.
+        description="""The [OpenAI model](https://platform.openai.com/docs/models) used for vision. Options currently available:</br>
                         """, extra="pipeline-naming-hint"
     )
@@ -126,10 +130,24 @@ class OpenAIVisionConverterBase(ConverterBase):
     You input some text as a prompt, and the model will generate a text completion that attempts to match whatever context or pattern you gave it."""
     PREFIX: str = ""
-    def compute_args(self, params: OpenAIVisionBaseParameters, source: UploadFile
+    def compute_args(self, params: OpenAIVisionBaseParameters, source: UploadFile, kind
                      ) -> Dict[str, Any]:
         data = source.file.read()
         rv = base64.b64encode(data)
+        if kind.mime.startswith("image"):
+            binary_block = {
+                "type": "image_url",
+                "image_url": {
+                    "url": f"data:image/jpeg;base64,{rv.decode('utf-8')}"
+                }
+            }
+        else:
+            binary_block = {
+                "type": "file",
+                "file": {
+                    "filename": source.filename,
+                    "file_data": f"data:application/pdf;base64,{rv.decode('utf-8')}"}
+            }
         messages = [{"role": "system", "content": params.system_prompt}] if params.system_prompt is not None else []
         messages.append({"role": "user",
                          "content": [
@@ -137,12 +155,8 @@ class OpenAIVisionConverterBase(ConverterBase):
                                  "type": "text",
                                  "text": params.prompt
                              },
-                             {
-                                 "type": "image_url",
-                                 "image_url": {
-                                     "url": f"data:image/jpeg;base64,{rv.decode('utf-8')}"
-                                 }
-                             }]})
+                             binary_block
+                         ]})
         kwargs = {
             'model': params.model_str,
             'messages': messages,
@@ -156,11 +170,18 @@ class OpenAIVisionConverterBase(ConverterBase):
         return kwargs
     def compute_result(self, **kwargs):
+        pattern: Pattern = re.compile(r"```(?:markdown\s+)?(\W.*?)```", re.DOTALL)
+        """Regex pattern to parse the output."""
         response = openai_chat_completion(self.PREFIX, **kwargs)
         contents = []
         for choice in response.choices:
             if choice.message.content:
-                contents.append(choice.message.content)
+                if "```" in choice.message.content:
+                    action_match = pattern.search(choice.message.content)
+                    if action_match is not None:
+                        contents.append(action_match.group(1).strip())
+                else:
+                    contents.append(choice.message.content)
         if contents:
             result = "\n".join(contents)
         return result
@@ -178,9 +199,9 @@ class OpenAIVisionConverterBase(ConverterBase):
         try:
             kind = filetype.guess(source.file)
             source.file.seek(0)
-            if kind.mime.startswith("image"):
+            if kind.mime.startswith("image") or kind.mime.endswith("pdf"):
                 result = None
-                kwargs = self.compute_args(params, source)
+                kwargs = self.compute_args(params, source, kind)
                 if kwargs['model'] != NO_DEPLOYED_MODELS:
                     result = self.compute_result(**kwargs)
                 if result:
@@ -189,7 +210,7 @@ class OpenAIVisionConverterBase(ConverterBase):
         except BaseException as err:
             raise err
         if doc is None:
-            raise TypeError(f"Conversion of audio file {source.filename} failed")
+            raise TypeError(f"Conversion of file {source.filename} failed")
         return [doc]
     @classmethod

{pyconverters_openai_vision-0.5.3 → pyconverters_openai_vision-0.5.7}/setup.py RENAMED Viewed

@@ -43,7 +43,7 @@ entry_points = \
                           'pyconverters_openai_vision.openai_vision:OpenAIVisionConverter']}
 setup(name='pyconverters-openai_vision',
-      version='0.5.3',
+      version='0.5.7',
       description='OpenAIVision converter',
       author='Olivier Terrier',
       author_email='olivier.terrier@kairntech.com',

pyconverters_openai_vision-0.5.7/tests/data/Sodexo_URD_2023_FR - 4p.pdf ADDED Viewed

Binary file

{pyconverters_openai_vision-0.5.3 → pyconverters_openai_vision-0.5.7}/tests/test_openai_vision.py RENAMED Viewed

@@ -1,3 +1,4 @@
+import os
 from pathlib import Path
 from typing import List
@@ -7,7 +8,7 @@ from starlette.datastructures import UploadFile
 from pyconverters_openai_vision.openai_vision import (
     OpenAIVisionConverter,
-    OpenAIVisionParameters, DeepInfraOpenAIVisionParameters, DeepInfraOpenAIVisionConverter
+    OpenAIVisionParameters, DeepInfraOpenAIVisionParameters, DeepInfraOpenAIVisionConverter, OpenAIVisionModel
 )
@@ -37,6 +38,19 @@ def test_openai():
         assert 'kairntech' in doc0.text.lower()
+@pytest.mark.skip(reason="Not a test")
+def test_openai_pdf():
+    converter = OpenAIVisionConverter()
+    parameters = OpenAIVisionParameters(model=OpenAIVisionModel.gpt_4o_mini)
+    testdir = Path(__file__).parent
+    source = Path(testdir, 'data/Sodexo_URD_2023_FR - 4p.pdf')
+    with source.open("rb") as fin:
+        docs: List[Document] = converter.convert(UploadFile(source.name, fin, 'application/pdf'), parameters)
+        assert len(docs) == 1
+        doc0 = docs[0]
+        assert 'une performance solide au cours' in doc0.text.lower()
 @pytest.mark.skip(reason="Not a test")
 def test_deepinfra():
     converter = DeepInfraOpenAIVisionConverter()
@@ -55,3 +69,26 @@ def test_deepinfra():
         assert len(docs) == 1
         doc0 = docs[0]
         assert 'generative ai' in doc0.text.lower()
+@pytest.mark.skip(reason="Not a test")
+def test_runpod():
+    os.environ["OPENAI_API_BASE"] = "https://api.runpod.ai/v2/vllm-9jnu8ajtktj5ay/openai/v1"
+    os.environ["OPENAI_MODEL"] = "mistralai/Pixtral-12B-2409"
+    os.environ["OPENAI_API_KEY"] = os.getenv("RUNPOD_API_KEY")
+    converter = OpenAIVisionConverter()
+    parameters = OpenAIVisionParameters()
+    testdir = Path(__file__).parent
+    source = Path(testdir, 'data/colducoq.jpg')
+    with source.open("rb") as fin:
+        docs: List[Document] = converter.convert(UploadFile(source.name, fin, 'image/jpeg'), parameters)
+        assert len(docs) == 1
+        doc0 = docs[0]
+        assert 'dent de crolles' in doc0.text.lower()
+    source = Path(testdir, 'data/webinar.png')
+    with source.open("rb") as fin:
+        docs: List[Document] = converter.convert(UploadFile(source.name, fin, 'image/png'), parameters)
+        assert len(docs) == 1
+        doc0 = docs[0]
+        assert 'generative ai' in doc0.text.lower()