PyPI - pyconverters-openai_vision - Versions diffs - 0.5.50__py3-none-any.whl → 0.5.54__py3-none-any.whl - Mend

pyconverters-openai_vision 0.5.50py3-none-any.whl → 0.5.54py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

pyconverters_openai_vision/__init__.py CHANGED Viewed

@@ -1,2 +1,2 @@
 """OpenAIVision converter"""
-__version__ = "0.5.50"
+__version__ = "0.5.54"

pyconverters_openai_vision/openai_vision.py CHANGED Viewed

@@ -4,14 +4,14 @@ import re
 from enum import Enum
 from logging import Logger
 from re import Pattern
-from typing import List, cast, Type, Dict, Any, Optional
+from typing import List, cast, Type, Dict, Any, Optional, Callable
 import filetype as filetype
 from log_with_context import add_logging_context
 from pydantic import Field, BaseModel
 from pymultirole_plugins.v1.converter import ConverterParameters, ConverterBase
 from pymultirole_plugins.v1.processor import ProcessorParameters, ProcessorBase
-from pymultirole_plugins.v1.schema import Document, AltText
+from pymultirole_plugins.v1.schema import Document, AltText, Sentence
 from starlette.datastructures import UploadFile
 from .openai_utils import create_openai_model_enum, openai_chat_completion, gpt_filter, \
@@ -357,6 +357,73 @@ class OpenAIVisionProcessorBaseParameters(ProcessorParameters):
     )
+def regex_sub_preserve_spans(
+    text: str,
+    regex: str,
+    repl: Callable[[re.Match], str],
+    spans: List[Sentence],
+    flags=0,
+):
+    new_text_parts = []
+    char_map = {}  # old_char_offset -> new_char_offset
+    last_pos = 0
+    new_pos = 0
+    for match in re.finditer(regex, text, flags):
+        start, end = match.start(), match.end()
+        replacement = repl(match)
+        # Copier le texte inchangé
+        unchanged = text[last_pos:start]
+        new_text_parts.append(unchanged)
+        for i in range(last_pos, start):
+            char_map[i] = new_pos
+            new_pos += 1
+        # Insérer le remplacement
+        new_text_parts.append(replacement)
+        for i in range(start, end):
+            char_map[i] = new_pos
+        new_pos += len(replacement)
+        last_pos = end
+    # Reste du texte
+    tail = text[last_pos:]
+    new_text_parts.append(tail)
+    for i in range(last_pos, len(text)):
+        char_map[i] = new_pos
+        new_pos += 1
+    new_text = "".join(new_text_parts)
+    # Créer le nouveau Doc
+    # Recréer les spans
+    new_spans = None
+    if spans is not None:
+        new_spans = []
+        for span in spans:
+            if span.start not in char_map or span.end - 1 not in char_map:
+                continue
+            new_start = char_map[span.start]
+            new_end = char_map[span.end - 1] + 1
+            new_span = Sentence(
+                start=new_start,
+                end=new_end,
+                metadata=span.metadata)
+            if new_span is not None:
+                new_spans.append(new_span)
+    return new_text, new_spans
 class OpenAIVisionProcessorBase(ProcessorBase):
     __doc__ = """Generate text using [OpenAI Text Completion](https://platform.openai.com/docs/guides/completion) API
     You input some text as a prompt, and the model will generate a text completion that attempts to match whatever context or pattern you gave it."""
@@ -449,20 +516,27 @@ class OpenAIVisionProcessorBase(ProcessorBase):
                             if params.replace_refs_altTexts_by_descriptions:
                                 text = document.text
-                                link_regex = r"!\[([^]]+)\]\(([^]]+)\)"
+                                link_regex = r"!\[([^]]+)\]\(([^)]+)\)"
                                 def convert_links(matchobj):
                                     m = matchobj.group(0)
                                     m_id = matchobj.group(1)
                                     if m_id in alts:
                                         # markdown blockquote
-                                        m_desc = "\n".join(["> " + li for li in alts[m_id].splitlines()])
+                                        # m_desc = "\n".join(["> " + li for li in alts[m_id].splitlines()])
+                                        m_desc = alts[m_id]
                                         return f"{m}\n{m_desc}\n"
                                     return m
-                                ptext = re.sub(link_regex, convert_links, text, 0,
-                                               re.MULTILINE)
-                                document.text = ptext
+                                new_text, new_sentences = regex_sub_preserve_spans(
+                                    text,
+                                    link_regex,
+                                    convert_links,
+                                    document.sentences,
+                                    flags=re.MULTILINE
+                                )
+                                document.text = new_text
+                                document.sentences = new_sentences
                                 for altText in altTexts:
                                     if altText.name not in alts:
                                         document.altTexts.append(altText)

{pyconverters_openai_vision-0.5.50.dist-info → pyconverters_openai_vision-0.5.54.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: pyconverters-openai_vision
-Version: 0.5.50
+Version: 0.5.54
 Summary: OpenAIVision converter
 Home-page: https://kairntech.com/
 Author: Olivier Terrier

pyconverters_openai_vision-0.5.54.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,7 @@
+pyconverters_openai_vision/__init__.py,sha256=7ClUk4wE7QRUGKj6xr8-2iGGhMXl4YKmtuY0IeYJq8I,52
+pyconverters_openai_vision/openai_utils.py,sha256=XI4WYZ-EAVG0Vxd5yUDuZNDgEzqHJeriScxTUusi1oo,7740
+pyconverters_openai_vision/openai_vision.py,sha256=d2qlPgD8vfMelZVH-6fvdXWns9nkVxCIAwx_UenOvRc,25862
+pyconverters_openai_vision-0.5.54.dist-info/entry_points.txt,sha256=NR0re-yebKKyhApky1I6nDQzjJQfEyfOkJlJju0Ngzo,404
+pyconverters_openai_vision-0.5.54.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
+pyconverters_openai_vision-0.5.54.dist-info/METADATA,sha256=DpVk7gRhFPN0WYWyS1Pzhy-BKHVWtC2dWeLhRI70yX4,2662
+pyconverters_openai_vision-0.5.54.dist-info/RECORD,,

pyconverters_openai_vision-0.5.50.dist-info/RECORD DELETED Viewed

@@ -1,7 +0,0 @@
-pyconverters_openai_vision/__init__.py,sha256=-heGZjBiUu49bLQ0Gb4MHhW_ihPiy84h3g1zPxlhNqE,52
-pyconverters_openai_vision/openai_utils.py,sha256=XI4WYZ-EAVG0Vxd5yUDuZNDgEzqHJeriScxTUusi1oo,7740
-pyconverters_openai_vision/openai_vision.py,sha256=A5TRj0q-Ojzi4LcKxiP9qdTXGglR_v2YgGApmrJWEeE,23855
-pyconverters_openai_vision-0.5.50.dist-info/entry_points.txt,sha256=NR0re-yebKKyhApky1I6nDQzjJQfEyfOkJlJju0Ngzo,404
-pyconverters_openai_vision-0.5.50.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
-pyconverters_openai_vision-0.5.50.dist-info/METADATA,sha256=OO-aX0p5D-h2HB4efsGMo84ZP5U08ua02K1eXKOw3pU,2662
-pyconverters_openai_vision-0.5.50.dist-info/RECORD,,

{pyconverters_openai_vision-0.5.50.dist-info → pyconverters_openai_vision-0.5.54.dist-info}/WHEEL RENAMED Viewed

File without changes

{pyconverters_openai_vision-0.5.50.dist-info → pyconverters_openai_vision-0.5.54.dist-info}/entry_points.txt RENAMED Viewed

File without changes

pyconverters-openai_vision 0.5.50__py3-none-any.whl → 0.5.54__py3-none-any.whl

pyconverters-openai_vision 0.5.50py3-none-any.whl → 0.5.54py3-none-any.whl