PyPI - pyconverters-openai_vision - Versions diffs - 0.5.3__py3-none-any.whl → 0.5.4__py3-none-any.whl - Mend

pyconverters-openai_vision 0.5.3py3-none-any.whl → 0.5.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

pyconverters_openai_vision/__init__.py CHANGED Viewed

@@ -1,2 +1,2 @@
 """OpenAIVision converter"""
-__version__ = "0.5.3"
+__version__ = "0.5.4"

pyconverters_openai_vision/openai_vision.py CHANGED Viewed

@@ -1,7 +1,9 @@
 import base64
 import os
+import re
 from enum import Enum
 from logging import Logger
+from re import Pattern
 from typing import List, cast, Type, Dict, Any
 import filetype as filetype
@@ -24,12 +26,13 @@ class OpenAIVisionBaseParameters(ConverterParameters):
         None, extra="internal"
     )
     prompt: str = Field(
-        "Describe the image with a lot of details",
+        """If the attached file is an image: describe the image with a lot of details.",
+        If the attached file is a PDF document: convert the PDF document into Markdown format. The output must be just the markdown result without any explanation or introductory prefix.""",
         description="""Contains the prompt as a string""",
         extra="multiline",
     )
     max_tokens: int = Field(
-        256,
+        16384,
         description="""The maximum number of tokens to generate in the completion.
     The token count of your prompt plus max_tokens cannot exceed the model's context length.
     Most models have a context length of 2048 tokens (except for the newest models, which support 4096).""",
@@ -85,13 +88,14 @@ class OpenAIVisionBaseParameters(ConverterParameters):
 class OpenAIVisionModel(str, Enum):
     gpt_4o_mini = "gpt-4o-mini"
     gpt_4o = "gpt-4o"
+    o3_mini = "o3-mini"
 class OpenAIVisionParameters(OpenAIVisionBaseParameters):
     model: OpenAIVisionModel = Field(
         OpenAIVisionModel.gpt_4o_mini,
-        description="""The [OpenAI model](https://platform.openai.com/docs/models) used for speech to text transcription. Options currently available:</br>
-                        <li>`whisper-1` - state-of-the-art open source large-v2 Whisper model.
+        description="""The [OpenAI model](https://platform.openai.com/docs/models) used for vision. Options currently available:</br>
                         """, extra="pipeline-naming-hint"
     )
@@ -126,10 +130,24 @@ class OpenAIVisionConverterBase(ConverterBase):
     You input some text as a prompt, and the model will generate a text completion that attempts to match whatever context or pattern you gave it."""
     PREFIX: str = ""
-    def compute_args(self, params: OpenAIVisionBaseParameters, source: UploadFile
+    def compute_args(self, params: OpenAIVisionBaseParameters, source: UploadFile, kind
                      ) -> Dict[str, Any]:
         data = source.file.read()
         rv = base64.b64encode(data)
+        if kind.mime.startswith("image"):
+            binary_block = {
+                "type": "image_url",
+                "image_url": {
+                    "url": f"data:image/jpeg;base64,{rv.decode('utf-8')}"
+                }
+            }
+        else:
+            binary_block = {
+                "type": "file",
+                "file": {
+                    "filename": source.filename,
+                    "file_data": f"data:application/pdf;base64,{rv.decode('utf-8')}"}
+            }
         messages = [{"role": "system", "content": params.system_prompt}] if params.system_prompt is not None else []
         messages.append({"role": "user",
                          "content": [
@@ -137,12 +155,8 @@ class OpenAIVisionConverterBase(ConverterBase):
                                  "type": "text",
                                  "text": params.prompt
                              },
-                             {
-                                 "type": "image_url",
-                                 "image_url": {
-                                     "url": f"data:image/jpeg;base64,{rv.decode('utf-8')}"
-                                 }
-                             }]})
+                             binary_block
+                         ]})
         kwargs = {
             'model': params.model_str,
             'messages': messages,
@@ -156,11 +170,18 @@ class OpenAIVisionConverterBase(ConverterBase):
         return kwargs
     def compute_result(self, **kwargs):
+        pattern: Pattern = re.compile(r"```(?:markdown\s+)?(\W.*?)```", re.DOTALL)
+        """Regex pattern to parse the output."""
         response = openai_chat_completion(self.PREFIX, **kwargs)
         contents = []
         for choice in response.choices:
             if choice.message.content:
-                contents.append(choice.message.content)
+                if "```" in choice.message.content:
+                    action_match = pattern.search(choice.message.content)
+                    if action_match is not None:
+                        contents.append(action_match.group(1).strip())
+                else:
+                    contents.append(choice.message.content)
         if contents:
             result = "\n".join(contents)
         return result
@@ -178,9 +199,9 @@ class OpenAIVisionConverterBase(ConverterBase):
         try:
             kind = filetype.guess(source.file)
             source.file.seek(0)
-            if kind.mime.startswith("image"):
+            if kind.mime.startswith("image") or kind.mime.endswith("pdf"):
                 result = None
-                kwargs = self.compute_args(params, source)
+                kwargs = self.compute_args(params, source, kind)
                 if kwargs['model'] != NO_DEPLOYED_MODELS:
                     result = self.compute_result(**kwargs)
                 if result:
@@ -189,7 +210,7 @@ class OpenAIVisionConverterBase(ConverterBase):
         except BaseException as err:
             raise err
         if doc is None:
-            raise TypeError(f"Conversion of audio file {source.filename} failed")
+            raise TypeError(f"Conversion of file {source.filename} failed")
         return [doc]
     @classmethod

{pyconverters_openai_vision-0.5.3.dist-info → pyconverters_openai_vision-0.5.4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.4
 Name: pyconverters-openai_vision
-Version: 0.5.3
+Version: 0.5.4
 Summary: OpenAIVision converter
 Home-page: https://kairntech.com/
 Author: Olivier Terrier

pyconverters_openai_vision-0.5.4.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,7 @@
+pyconverters_openai_vision/__init__.py,sha256=gg9uPdn4gUxA8hQ1hvO_tebG_dq6EzNtTSggy4MA9R0,51
+pyconverters_openai_vision/openai_utils.py,sha256=HRJ6sJg88en66gkQbOpQKh7cbwtfoAwVLNh7JQSA9ps,5014
+pyconverters_openai_vision/openai_vision.py,sha256=ACpeOEFNphqKceQqyWHwpP6PvuOOig8qTTHwImq34j8,10445
+pyconverters_openai_vision-0.5.4.dist-info/entry_points.txt,sha256=-DS1gRUTf08Fjb79S_8sqCaqxBifC3q3EJZqXXdcf7Q,197
+pyconverters_openai_vision-0.5.4.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
+pyconverters_openai_vision-0.5.4.dist-info/METADATA,sha256=FMB-_MGpLRxKMKyjr5InU-P6VJRtB-4hixc12PF2Xyg,2635
+pyconverters_openai_vision-0.5.4.dist-info/RECORD,,

{pyconverters_openai_vision-0.5.3.dist-info → pyconverters_openai_vision-0.5.4.dist-info}/WHEEL RENAMED Viewed

@@ -1,4 +1,4 @@
 Wheel-Version: 1.0
-Generator: flit 3.9.0
+Generator: flit 3.12.0
 Root-Is-Purelib: true
 Tag: py3-none-any

pyconverters_openai_vision-0.5.3.dist-info/RECORD DELETED Viewed

@@ -1,7 +0,0 @@
-pyconverters_openai_vision/__init__.py,sha256=3OKw8raUFiHJX-jh8L1RvKEFeD7DZW5-8hpnp21EK-A,51
-pyconverters_openai_vision/openai_utils.py,sha256=HRJ6sJg88en66gkQbOpQKh7cbwtfoAwVLNh7JQSA9ps,5014
-pyconverters_openai_vision/openai_vision.py,sha256=MoEyjYCZ75jWiMonaQdR1erpFBpWgYEiqabp9D8102Y,9562
-pyconverters_openai_vision-0.5.3.dist-info/entry_points.txt,sha256=-DS1gRUTf08Fjb79S_8sqCaqxBifC3q3EJZqXXdcf7Q,197
-pyconverters_openai_vision-0.5.3.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
-pyconverters_openai_vision-0.5.3.dist-info/METADATA,sha256=PJWwNjeSChCjjJQhgWxR1RIn9jA7-HvcPvKHWT7sJjk,2635
-pyconverters_openai_vision-0.5.3.dist-info/RECORD,,

{pyconverters_openai_vision-0.5.3.dist-info → pyconverters_openai_vision-0.5.4.dist-info}/entry_points.txt RENAMED Viewed

File without changes

pyconverters-openai_vision 0.5.3__py3-none-any.whl → 0.5.4__py3-none-any.whl

pyconverters-openai_vision 0.5.3py3-none-any.whl → 0.5.4py3-none-any.whl