pyconverters-openai_vision 0.5.50__py3-none-any.whl → 0.5.54__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyconverters_openai_vision/__init__.py +1 -1
- pyconverters_openai_vision/openai_vision.py +81 -7
- {pyconverters_openai_vision-0.5.50.dist-info → pyconverters_openai_vision-0.5.54.dist-info}/METADATA +1 -1
- pyconverters_openai_vision-0.5.54.dist-info/RECORD +7 -0
- pyconverters_openai_vision-0.5.50.dist-info/RECORD +0 -7
- {pyconverters_openai_vision-0.5.50.dist-info → pyconverters_openai_vision-0.5.54.dist-info}/WHEEL +0 -0
- {pyconverters_openai_vision-0.5.50.dist-info → pyconverters_openai_vision-0.5.54.dist-info}/entry_points.txt +0 -0
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
"""OpenAIVision converter"""
|
|
2
|
-
__version__ = "0.5.
|
|
2
|
+
__version__ = "0.5.54"
|
|
@@ -4,14 +4,14 @@ import re
|
|
|
4
4
|
from enum import Enum
|
|
5
5
|
from logging import Logger
|
|
6
6
|
from re import Pattern
|
|
7
|
-
from typing import List, cast, Type, Dict, Any, Optional
|
|
7
|
+
from typing import List, cast, Type, Dict, Any, Optional, Callable
|
|
8
8
|
|
|
9
9
|
import filetype as filetype
|
|
10
10
|
from log_with_context import add_logging_context
|
|
11
11
|
from pydantic import Field, BaseModel
|
|
12
12
|
from pymultirole_plugins.v1.converter import ConverterParameters, ConverterBase
|
|
13
13
|
from pymultirole_plugins.v1.processor import ProcessorParameters, ProcessorBase
|
|
14
|
-
from pymultirole_plugins.v1.schema import Document, AltText
|
|
14
|
+
from pymultirole_plugins.v1.schema import Document, AltText, Sentence
|
|
15
15
|
from starlette.datastructures import UploadFile
|
|
16
16
|
|
|
17
17
|
from .openai_utils import create_openai_model_enum, openai_chat_completion, gpt_filter, \
|
|
@@ -357,6 +357,73 @@ class OpenAIVisionProcessorBaseParameters(ProcessorParameters):
|
|
|
357
357
|
)
|
|
358
358
|
|
|
359
359
|
|
|
360
|
+
def regex_sub_preserve_spans(
|
|
361
|
+
text: str,
|
|
362
|
+
regex: str,
|
|
363
|
+
repl: Callable[[re.Match], str],
|
|
364
|
+
spans: List[Sentence],
|
|
365
|
+
flags=0,
|
|
366
|
+
):
|
|
367
|
+
new_text_parts = []
|
|
368
|
+
char_map = {} # old_char_offset -> new_char_offset
|
|
369
|
+
|
|
370
|
+
last_pos = 0
|
|
371
|
+
new_pos = 0
|
|
372
|
+
|
|
373
|
+
for match in re.finditer(regex, text, flags):
|
|
374
|
+
start, end = match.start(), match.end()
|
|
375
|
+
replacement = repl(match)
|
|
376
|
+
|
|
377
|
+
# Copier le texte inchangé
|
|
378
|
+
unchanged = text[last_pos:start]
|
|
379
|
+
new_text_parts.append(unchanged)
|
|
380
|
+
|
|
381
|
+
for i in range(last_pos, start):
|
|
382
|
+
char_map[i] = new_pos
|
|
383
|
+
new_pos += 1
|
|
384
|
+
|
|
385
|
+
# Insérer le remplacement
|
|
386
|
+
new_text_parts.append(replacement)
|
|
387
|
+
|
|
388
|
+
for i in range(start, end):
|
|
389
|
+
char_map[i] = new_pos
|
|
390
|
+
|
|
391
|
+
new_pos += len(replacement)
|
|
392
|
+
last_pos = end
|
|
393
|
+
|
|
394
|
+
# Reste du texte
|
|
395
|
+
tail = text[last_pos:]
|
|
396
|
+
new_text_parts.append(tail)
|
|
397
|
+
|
|
398
|
+
for i in range(last_pos, len(text)):
|
|
399
|
+
char_map[i] = new_pos
|
|
400
|
+
new_pos += 1
|
|
401
|
+
|
|
402
|
+
new_text = "".join(new_text_parts)
|
|
403
|
+
|
|
404
|
+
# Créer le nouveau Doc
|
|
405
|
+
# Recréer les spans
|
|
406
|
+
new_spans = None
|
|
407
|
+
if spans is not None:
|
|
408
|
+
new_spans = []
|
|
409
|
+
for span in spans:
|
|
410
|
+
if span.start not in char_map or span.end - 1 not in char_map:
|
|
411
|
+
continue
|
|
412
|
+
|
|
413
|
+
new_start = char_map[span.start]
|
|
414
|
+
new_end = char_map[span.end - 1] + 1
|
|
415
|
+
|
|
416
|
+
new_span = Sentence(
|
|
417
|
+
start=new_start,
|
|
418
|
+
end=new_end,
|
|
419
|
+
metadata=span.metadata)
|
|
420
|
+
|
|
421
|
+
if new_span is not None:
|
|
422
|
+
new_spans.append(new_span)
|
|
423
|
+
|
|
424
|
+
return new_text, new_spans
|
|
425
|
+
|
|
426
|
+
|
|
360
427
|
class OpenAIVisionProcessorBase(ProcessorBase):
|
|
361
428
|
__doc__ = """Generate text using [OpenAI Text Completion](https://platform.openai.com/docs/guides/completion) API
|
|
362
429
|
You input some text as a prompt, and the model will generate a text completion that attempts to match whatever context or pattern you gave it."""
|
|
@@ -449,20 +516,27 @@ class OpenAIVisionProcessorBase(ProcessorBase):
|
|
|
449
516
|
|
|
450
517
|
if params.replace_refs_altTexts_by_descriptions:
|
|
451
518
|
text = document.text
|
|
452
|
-
link_regex = r"!\[([^]]+)\]\(([^]
|
|
519
|
+
link_regex = r"!\[([^]]+)\]\(([^)]+)\)"
|
|
453
520
|
|
|
454
521
|
def convert_links(matchobj):
|
|
455
522
|
m = matchobj.group(0)
|
|
456
523
|
m_id = matchobj.group(1)
|
|
457
524
|
if m_id in alts:
|
|
458
525
|
# markdown blockquote
|
|
459
|
-
m_desc = "\n".join(["> " + li for li in alts[m_id].splitlines()])
|
|
526
|
+
# m_desc = "\n".join(["> " + li for li in alts[m_id].splitlines()])
|
|
527
|
+
m_desc = alts[m_id]
|
|
460
528
|
return f"{m}\n{m_desc}\n"
|
|
461
529
|
return m
|
|
462
530
|
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
531
|
+
new_text, new_sentences = regex_sub_preserve_spans(
|
|
532
|
+
text,
|
|
533
|
+
link_regex,
|
|
534
|
+
convert_links,
|
|
535
|
+
document.sentences,
|
|
536
|
+
flags=re.MULTILINE
|
|
537
|
+
)
|
|
538
|
+
document.text = new_text
|
|
539
|
+
document.sentences = new_sentences
|
|
466
540
|
for altText in altTexts:
|
|
467
541
|
if altText.name not in alts:
|
|
468
542
|
document.altTexts.append(altText)
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
pyconverters_openai_vision/__init__.py,sha256=7ClUk4wE7QRUGKj6xr8-2iGGhMXl4YKmtuY0IeYJq8I,52
|
|
2
|
+
pyconverters_openai_vision/openai_utils.py,sha256=XI4WYZ-EAVG0Vxd5yUDuZNDgEzqHJeriScxTUusi1oo,7740
|
|
3
|
+
pyconverters_openai_vision/openai_vision.py,sha256=d2qlPgD8vfMelZVH-6fvdXWns9nkVxCIAwx_UenOvRc,25862
|
|
4
|
+
pyconverters_openai_vision-0.5.54.dist-info/entry_points.txt,sha256=NR0re-yebKKyhApky1I6nDQzjJQfEyfOkJlJju0Ngzo,404
|
|
5
|
+
pyconverters_openai_vision-0.5.54.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
|
|
6
|
+
pyconverters_openai_vision-0.5.54.dist-info/METADATA,sha256=DpVk7gRhFPN0WYWyS1Pzhy-BKHVWtC2dWeLhRI70yX4,2662
|
|
7
|
+
pyconverters_openai_vision-0.5.54.dist-info/RECORD,,
|
|
@@ -1,7 +0,0 @@
|
|
|
1
|
-
pyconverters_openai_vision/__init__.py,sha256=-heGZjBiUu49bLQ0Gb4MHhW_ihPiy84h3g1zPxlhNqE,52
|
|
2
|
-
pyconverters_openai_vision/openai_utils.py,sha256=XI4WYZ-EAVG0Vxd5yUDuZNDgEzqHJeriScxTUusi1oo,7740
|
|
3
|
-
pyconverters_openai_vision/openai_vision.py,sha256=A5TRj0q-Ojzi4LcKxiP9qdTXGglR_v2YgGApmrJWEeE,23855
|
|
4
|
-
pyconverters_openai_vision-0.5.50.dist-info/entry_points.txt,sha256=NR0re-yebKKyhApky1I6nDQzjJQfEyfOkJlJju0Ngzo,404
|
|
5
|
-
pyconverters_openai_vision-0.5.50.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
|
|
6
|
-
pyconverters_openai_vision-0.5.50.dist-info/METADATA,sha256=OO-aX0p5D-h2HB4efsGMo84ZP5U08ua02K1eXKOw3pU,2662
|
|
7
|
-
pyconverters_openai_vision-0.5.50.dist-info/RECORD,,
|
{pyconverters_openai_vision-0.5.50.dist-info → pyconverters_openai_vision-0.5.54.dist-info}/WHEEL
RENAMED
|
File without changes
|
|
File without changes
|