pyconverters-openai_vision 0.5.52__tar.gz → 0.5.54__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pyconverters_openai_vision-0.5.52 → pyconverters_openai_vision-0.5.54}/PKG-INFO +1 -1
- {pyconverters_openai_vision-0.5.52 → pyconverters_openai_vision-0.5.54}/pyconverters_openai_vision/__init__.py +1 -1
- {pyconverters_openai_vision-0.5.52 → pyconverters_openai_vision-0.5.54}/pyconverters_openai_vision/openai_vision.py +81 -7
- {pyconverters_openai_vision-0.5.52 → pyconverters_openai_vision-0.5.54}/setup.py +1 -1
- {pyconverters_openai_vision-0.5.52 → pyconverters_openai_vision-0.5.54}/tests/data/ENG product fact files_general offer_2025_30pages_alts.json +20 -20
- {pyconverters_openai_vision-0.5.52 → pyconverters_openai_vision-0.5.54}/tests/test_openai_vision.py +26 -0
- {pyconverters_openai_vision-0.5.52 → pyconverters_openai_vision-0.5.54}/.dockerignore +0 -0
- {pyconverters_openai_vision-0.5.52 → pyconverters_openai_vision-0.5.54}/.gitignore +0 -0
- {pyconverters_openai_vision-0.5.52 → pyconverters_openai_vision-0.5.54}/Dockerfile +0 -0
- {pyconverters_openai_vision-0.5.52 → pyconverters_openai_vision-0.5.54}/Jenkinsfile +0 -0
- {pyconverters_openai_vision-0.5.52 → pyconverters_openai_vision-0.5.54}/README.md +0 -0
- {pyconverters_openai_vision-0.5.52 → pyconverters_openai_vision-0.5.54}/bumpversion.py +0 -0
- {pyconverters_openai_vision-0.5.52 → pyconverters_openai_vision-0.5.54}/pyconverters_openai_vision/openai_utils.py +0 -0
- {pyconverters_openai_vision-0.5.52 → pyconverters_openai_vision-0.5.54}/pyproject.toml +0 -0
- {pyconverters_openai_vision-0.5.52 → pyconverters_openai_vision-0.5.54}/tests/__init__.py +0 -0
- {pyconverters_openai_vision-0.5.52 → pyconverters_openai_vision-0.5.54}/tests/data/ENG product fact files_general offer_2025_30pages.json +0 -0
- {pyconverters_openai_vision-0.5.52 → pyconverters_openai_vision-0.5.54}/tests/data/ENG product fact files_general offer_2025_30pages_descs.json +0 -0
- {pyconverters_openai_vision-0.5.52 → pyconverters_openai_vision-0.5.54}/tests/data/ENG product fact files_general offer_2025_30pages_gpt-4.1-mini.json +0 -0
- {pyconverters_openai_vision-0.5.52 → pyconverters_openai_vision-0.5.54}/tests/data/ENG product fact files_general offer_2025_30pages_gpt-4.1-nano.json +0 -0
- {pyconverters_openai_vision-0.5.52 → pyconverters_openai_vision-0.5.54}/tests/data/ENG product fact files_general offer_2025_30pages_gpt-4o-mini.json +0 -0
- {pyconverters_openai_vision-0.5.52 → pyconverters_openai_vision-0.5.54}/tests/data/ENG product fact files_general offer_2025_30pages_gpt-4o.json +0 -0
- {pyconverters_openai_vision-0.5.52 → pyconverters_openai_vision-0.5.54}/tests/data/PC_Kairntech_LLM_v1.md.json +0 -0
- {pyconverters_openai_vision-0.5.52 → pyconverters_openai_vision-0.5.54}/tests/data/colducoq.jpg +0 -0
- {pyconverters_openai_vision-0.5.52 → pyconverters_openai_vision-0.5.54}/tests/data/webinar.png +0 -0
- {pyconverters_openai_vision-0.5.52 → pyconverters_openai_vision-0.5.54}/tox.ini +0 -0
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
"""OpenAIVision converter"""
|
|
2
|
-
__version__ = "0.5.
|
|
2
|
+
__version__ = "0.5.54"
|
|
@@ -4,14 +4,14 @@ import re
|
|
|
4
4
|
from enum import Enum
|
|
5
5
|
from logging import Logger
|
|
6
6
|
from re import Pattern
|
|
7
|
-
from typing import List, cast, Type, Dict, Any, Optional
|
|
7
|
+
from typing import List, cast, Type, Dict, Any, Optional, Callable
|
|
8
8
|
|
|
9
9
|
import filetype as filetype
|
|
10
10
|
from log_with_context import add_logging_context
|
|
11
11
|
from pydantic import Field, BaseModel
|
|
12
12
|
from pymultirole_plugins.v1.converter import ConverterParameters, ConverterBase
|
|
13
13
|
from pymultirole_plugins.v1.processor import ProcessorParameters, ProcessorBase
|
|
14
|
-
from pymultirole_plugins.v1.schema import Document, AltText
|
|
14
|
+
from pymultirole_plugins.v1.schema import Document, AltText, Sentence
|
|
15
15
|
from starlette.datastructures import UploadFile
|
|
16
16
|
|
|
17
17
|
from .openai_utils import create_openai_model_enum, openai_chat_completion, gpt_filter, \
|
|
@@ -357,6 +357,73 @@ class OpenAIVisionProcessorBaseParameters(ProcessorParameters):
|
|
|
357
357
|
)
|
|
358
358
|
|
|
359
359
|
|
|
360
|
+
def regex_sub_preserve_spans(
|
|
361
|
+
text: str,
|
|
362
|
+
regex: str,
|
|
363
|
+
repl: Callable[[re.Match], str],
|
|
364
|
+
spans: List[Sentence],
|
|
365
|
+
flags=0,
|
|
366
|
+
):
|
|
367
|
+
new_text_parts = []
|
|
368
|
+
char_map = {} # old_char_offset -> new_char_offset
|
|
369
|
+
|
|
370
|
+
last_pos = 0
|
|
371
|
+
new_pos = 0
|
|
372
|
+
|
|
373
|
+
for match in re.finditer(regex, text, flags):
|
|
374
|
+
start, end = match.start(), match.end()
|
|
375
|
+
replacement = repl(match)
|
|
376
|
+
|
|
377
|
+
# Copier le texte inchangé
|
|
378
|
+
unchanged = text[last_pos:start]
|
|
379
|
+
new_text_parts.append(unchanged)
|
|
380
|
+
|
|
381
|
+
for i in range(last_pos, start):
|
|
382
|
+
char_map[i] = new_pos
|
|
383
|
+
new_pos += 1
|
|
384
|
+
|
|
385
|
+
# Insérer le remplacement
|
|
386
|
+
new_text_parts.append(replacement)
|
|
387
|
+
|
|
388
|
+
for i in range(start, end):
|
|
389
|
+
char_map[i] = new_pos
|
|
390
|
+
|
|
391
|
+
new_pos += len(replacement)
|
|
392
|
+
last_pos = end
|
|
393
|
+
|
|
394
|
+
# Reste du texte
|
|
395
|
+
tail = text[last_pos:]
|
|
396
|
+
new_text_parts.append(tail)
|
|
397
|
+
|
|
398
|
+
for i in range(last_pos, len(text)):
|
|
399
|
+
char_map[i] = new_pos
|
|
400
|
+
new_pos += 1
|
|
401
|
+
|
|
402
|
+
new_text = "".join(new_text_parts)
|
|
403
|
+
|
|
404
|
+
# Créer le nouveau Doc
|
|
405
|
+
# Recréer les spans
|
|
406
|
+
new_spans = None
|
|
407
|
+
if spans is not None:
|
|
408
|
+
new_spans = []
|
|
409
|
+
for span in spans:
|
|
410
|
+
if span.start not in char_map or span.end - 1 not in char_map:
|
|
411
|
+
continue
|
|
412
|
+
|
|
413
|
+
new_start = char_map[span.start]
|
|
414
|
+
new_end = char_map[span.end - 1] + 1
|
|
415
|
+
|
|
416
|
+
new_span = Sentence(
|
|
417
|
+
start=new_start,
|
|
418
|
+
end=new_end,
|
|
419
|
+
metadata=span.metadata)
|
|
420
|
+
|
|
421
|
+
if new_span is not None:
|
|
422
|
+
new_spans.append(new_span)
|
|
423
|
+
|
|
424
|
+
return new_text, new_spans
|
|
425
|
+
|
|
426
|
+
|
|
360
427
|
class OpenAIVisionProcessorBase(ProcessorBase):
|
|
361
428
|
__doc__ = """Generate text using [OpenAI Text Completion](https://platform.openai.com/docs/guides/completion) API
|
|
362
429
|
You input some text as a prompt, and the model will generate a text completion that attempts to match whatever context or pattern you gave it."""
|
|
@@ -449,20 +516,27 @@ class OpenAIVisionProcessorBase(ProcessorBase):
|
|
|
449
516
|
|
|
450
517
|
if params.replace_refs_altTexts_by_descriptions:
|
|
451
518
|
text = document.text
|
|
452
|
-
link_regex = r"!\[([^]]+)\]\(([^]
|
|
519
|
+
link_regex = r"!\[([^]]+)\]\(([^)]+)\)"
|
|
453
520
|
|
|
454
521
|
def convert_links(matchobj):
|
|
455
522
|
m = matchobj.group(0)
|
|
456
523
|
m_id = matchobj.group(1)
|
|
457
524
|
if m_id in alts:
|
|
458
525
|
# markdown blockquote
|
|
459
|
-
m_desc = "\n".join(["> " + li for li in alts[m_id].splitlines()])
|
|
526
|
+
# m_desc = "\n".join(["> " + li for li in alts[m_id].splitlines()])
|
|
527
|
+
m_desc = alts[m_id]
|
|
460
528
|
return f"{m}\n{m_desc}\n"
|
|
461
529
|
return m
|
|
462
530
|
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
531
|
+
new_text, new_sentences = regex_sub_preserve_spans(
|
|
532
|
+
text,
|
|
533
|
+
link_regex,
|
|
534
|
+
convert_links,
|
|
535
|
+
document.sentences,
|
|
536
|
+
flags=re.MULTILINE
|
|
537
|
+
)
|
|
538
|
+
document.text = new_text
|
|
539
|
+
document.sentences = new_sentences
|
|
466
540
|
for altText in altTexts:
|
|
467
541
|
if altText.name not in alts:
|
|
468
542
|
document.altTexts.append(altText)
|
|
@@ -48,7 +48,7 @@ entry_points = \
|
|
|
48
48
|
'pyconverters_openai_vision.openai_vision:OpenAIVisionProcessor']}
|
|
49
49
|
|
|
50
50
|
setup(name='pyconverters-openai_vision',
|
|
51
|
-
version='0.5.
|
|
51
|
+
version='0.5.54',
|
|
52
52
|
description='OpenAIVision converter',
|
|
53
53
|
author='Olivier Terrier',
|
|
54
54
|
author_email='olivier.terrier@kairntech.com',
|
|
@@ -132,83 +132,83 @@
|
|
|
132
132
|
"altTexts": [
|
|
133
133
|
{
|
|
134
134
|
"name": "img-0.jpeg",
|
|
135
|
-
"text": "The image
|
|
135
|
+
"text": "The image depicts a rhythmic gymnast in mid-performance, showcasing a dynamic pose. The gymnast is wearing a sparkling, intricately designed leotard that features a blend of pink and nude colors, adorned with shimmering sequins and embellishments. The outfit highlights the athlete's form and movement. Instead of a head, there is a large, shiny pink ball positioned where the head would typically be, creating a surreal and whimsical effect. The background is a soft, neutral color, emphasizing the gymnast's graceful posture and the vibrant details of the costume. The overall composition captures the elegance and artistry of rhythmic gymnastics."
|
|
136
136
|
},
|
|
137
137
|
{
|
|
138
138
|
"name": "img-1.jpeg",
|
|
139
|
-
"text": "The image features a textual layout with a
|
|
139
|
+
"text": "The image features a textual layout with a black background at the top displaying the year \"2025\" in large, bold, white font. Below that, in a slightly smaller font, the words \"GENERAL S\" are presented, also in white. The bottom section of the image contains the phrase \"PRODUCT FACT FIL\" in a clear, bold font, likely indicating that it is part of a product information document. The overall design is clean and modern, with a light blue section visible at the bottom."
|
|
140
140
|
},
|
|
141
141
|
{
|
|
142
142
|
"name": "img-2.jpeg",
|
|
143
|
-
"text": "The image presents a pie chart illustrating the distribution of stories published per day in various languages, totaling 292 stories (excluding updates). \n\n- The largest segment, colored blue, represents Arabic, accounting for 35% of the total.\n- The second largest segment, in red, indicates English stories, making up 20%.\n- The orange segment represents Spanish, contributing 14%.\n- The purple segment shows French stories at 16%.\n- The light blue segment indicates German,
|
|
143
|
+
"text": "The image presents a pie chart illustrating the distribution of stories published per day in various languages, totaling 292 stories (excluding updates). \n\n- The largest segment, colored blue, represents Arabic, accounting for 35% of the total.\n- The second largest segment, in red, indicates English stories, making up 20%.\n- The orange segment represents Spanish, contributing 14%.\n- The purple segment shows French stories at 16%.\n- The light blue segment indicates German, comprising 10%.\n- Finally, the smallest segment, colored yellow, represents Portuguese, which accounts for 5%.\n\nThe chart visually emphasizes the varying proportions of stories across these languages."
|
|
144
144
|
},
|
|
145
145
|
{
|
|
146
146
|
"name": "img-3.jpeg",
|
|
147
|
-
"text": "The image is a pie chart displaying the distribution of various sports. The largest segment, colored blue, represents \"Football,\" accounting for 57% of the total. Other segments include:\n\n- \"Athletics\" (light blue) at
|
|
147
|
+
"text": "The image is a pie chart displaying the distribution of various sports. The largest segment, colored blue, represents \"Football,\" accounting for 57% of the total. Other segments include:\n\n- \"Athletics\" (light blue) at 2%\n- \"Basketball\" (black) at 7%\n- \"Cycling\" (gray) at 3%\n- \"Handball\" (yellow) at 6%\n- \"Motor sports\" (purple) at 6%\n- \"Ski\" (light purple) at 6%\n- \"Tennis\" (green) at 6%\n- \"Olympics\" (orange) at 8%\n\nEach segment is labeled with its corresponding percentage, and the chart uses a variety of colors to differentiate between the sports."
|
|
148
148
|
},
|
|
149
149
|
{
|
|
150
150
|
"name": "img-4.jpeg",
|
|
151
|
-
"text": "The image presents a pie chart alongside a textual description. The chart illustrates the distribution of daily reports (
|
|
151
|
+
"text": "The image presents a pie chart alongside a textual description. The chart illustrates the distribution of daily reports (excluding updates) across various sports, totaling over 30 reports per day. \n\n- The largest segment, colored blue, represents Football, accounting for 67% of the reports.\n- The next largest segment, in orange, represents Motor Sports at 9%.\n- Following that, the purple segment for Athletics makes up 8%.\n- The light blue segment for Basketball comprises 7%.\n- The red segment for Tennis also accounts for 7%.\n- Finally, the yellow segment for the Olympics represents 2%.\n\nEach segment is labeled with its respective percentage, and a legend identifies the colors corresponding to each sport."
|
|
152
152
|
},
|
|
153
153
|
{
|
|
154
154
|
"name": "img-5.jpeg",
|
|
155
|
-
"text": "The image presents a visual representation of the distribution of photos taken per day across various sports. The central pie chart is divided into segments, each representing a different sport, with percentages indicating the proportion of total photos attributed to each category. \n\n- The largest segment, colored
|
|
155
|
+
"text": "The image presents a visual representation of the distribution of photos taken per day across various sports. The central pie chart is divided into segments, each representing a different sport, with percentages indicating the proportion of total photos attributed to each category. \n\n- The largest segment, colored blue, represents Football, accounting for 39% of the total.\n- The second largest, in gray, is Basketball, making up 22%.\n- Other segments include Athletics (orange, 7%), Cricket (red, 4%), Cycling (pink, 2%), Motor Sports (purple, 11%), Rugby (green, 3%), Tennis (light blue, 9%), and the Olympics (light purple, 3%).\n\nTo the left of the pie chart, the text states \"1,490* photos per day,\" emphasizing the total number of photos captured daily across these sports."
|
|
156
156
|
},
|
|
157
157
|
{
|
|
158
158
|
"name": "img-6.jpeg",
|
|
159
|
-
"text": "The image features a simple design with a bright yellow background. In the
|
|
159
|
+
"text": "The image features a simple design with a bright yellow background. In the center, the text \"AFP\" is displayed in bold blue letters, followed by a blue circle. Next to it, the word \"Sports\" is written in a bold black font. The overall layout is clean and straightforward, emphasizing the \"AFP\" branding alongside the sports theme."
|
|
160
160
|
},
|
|
161
161
|
{
|
|
162
162
|
"name": "img-7.jpeg",
|
|
163
|
-
"text": "The image
|
|
163
|
+
"text": "The image presents a pie chart illustrating the distribution of infographics created per day in different languages. The chart is divided into five segments, each representing a language:\n\n- **German**: 24% (yellow segment)\n- **English**: 24% (purple segment)\n- **Spanish**: 21% (light blue segment)\n- **French**: 16% (orange segment)\n- **Portuguese**: 15% (blue segment)\n\nTo the left of the pie chart, there is a bold text stating \"6* Infographics per day,\" indicating the total number of infographics produced daily. The colors of the segments correspond to the languages listed in the legend next to the chart."
|
|
164
164
|
},
|
|
165
165
|
{
|
|
166
166
|
"name": "img-8.jpeg",
|
|
167
|
-
"text": "The image features a pie chart illustrating the distribution of videographics produced per year, totaling 75.
|
|
167
|
+
"text": "The image features a pie chart illustrating the distribution of videographics produced per year, totaling 75. Each segment of the pie chart is color-coded to represent different languages:\n\n- **German**: Light blue segment, accounting for 4% of the total.\n- **English**: Orange segment, representing 27%.\n- **Spanish**: Dark blue segment, making up 28%.\n- **French**: Yellow segment, which comprises 16%.\n- **Portuguese**: Red segment, contributing 25%.\n\nThe chart visually conveys the proportions of videographics created in each language, highlighting Spanish and English as the most significant contributors. The title above the chart states \"75* videographics per year\" in bold yellow text."
|
|
168
168
|
},
|
|
169
169
|
{
|
|
170
170
|
"name": "img-9.jpeg",
|
|
171
|
-
"text": "The image features a series of text boxes on a black background, each highlighting
|
|
171
|
+
"text": "The image features a series of text boxes on a black background, each highlighting different service offerings. \n\n1. The first box states \"Coverage\" followed by \"24 hours a day\" and \"7 days a week,\" indicating continuous availability.\n2. The second box mentions \"+ 300* editable videos per month,\" suggesting a high volume of video content that can be customized.\n3. The third box lists \"130 video production points worldwide,\" emphasizing a global reach for video production services.\n\nThe text is presented in a clean, modern font, enhancing readability against the dark background."
|
|
172
172
|
},
|
|
173
173
|
{
|
|
174
174
|
"name": "img-10.jpeg",
|
|
175
|
-
"text": "The image features a sleek, modern design with a black background. It presents three key pieces of information in bold, white text, each contained within separate
|
|
175
|
+
"text": "The image features a sleek, modern design with a black background. It presents three key pieces of information in bold, white text, each contained within separate sections. \n\n1. The first section highlights \"Coverage\" with the details \"24 hours a day\" and \"7 days a week,\" emphasizing continuous availability.\n2. The second section states \"3 Thematic sections,\" indicating a structured approach to content organization.\n3. The final section notes \"+ 15* articles illustrated per day,\" suggesting a high volume of visually supported content produced daily.\n\nThe overall layout is clean and visually appealing, with a focus on clarity and impact."
|
|
176
176
|
},
|
|
177
177
|
{
|
|
178
178
|
"name": "img-11.jpeg",
|
|
179
|
-
"text": "The image
|
|
179
|
+
"text": "The image depicts a pie chart divided into eight segments, each representing a different percentage of a whole. The segments are colored in various shades: \n\n- The largest segment, colored light blue, occupies 32% of the chart.\n- Next is a yellow segment at 17%.\n- A red segment follows, representing 14%.\n- The blue segment accounts for 12%.\n- There are two segments, both orange and green, each representing 7%.\n- Lastly, a purple segment makes up 11%.\n\nEach segment is labeled with its corresponding percentage, providing a clear visual representation of the data distribution."
|
|
180
180
|
},
|
|
181
181
|
{
|
|
182
182
|
"name": "img-12.jpeg",
|
|
183
|
-
"text": "The image features a sleek, modern design with a black background. It contains four distinct sections, each highlighted with
|
|
183
|
+
"text": "The image features a sleek, modern design with a black background. It contains four distinct sections, each highlighted with white text. \n\n1. The first section states \"Coverage\" followed by \"24 hours a day\" and \"7 days a week,\" emphasizing continuous availability.\n2. The second section simply displays the number \"5\" alongside the phrase \"Thematic sections,\" indicating a structured approach to content organization.\n3. The third section notes \"+ 60*\" with the phrase \"articles illustrated per day,\" suggesting a high volume of daily content production.\n\nThe overall layout is clean and visually appealing, with a focus on clarity and impact."
|
|
184
184
|
},
|
|
185
185
|
{
|
|
186
186
|
"name": "img-13.jpeg",
|
|
187
|
-
"text": "The image is a pie chart divided into several segments, each representing a different percentage of a whole. The
|
|
187
|
+
"text": "The image is a pie chart divided into several segments, each representing a different percentage of a whole. The largest segment, colored blue, occupies 35% of the chart. Other segments include:\n\n- A purple segment at 15%\n- Two segments at 10% each, one in dark gray and the other in light blue\n- A segment in orange at 8%\n- A yellow segment at 5%\n- Two smaller segments, each at 3%, one in green and the other in black\n- A final segment at 9%, colored gray\n\nThe percentages are displayed prominently within each segment, providing a clear visual representation of the data distribution."
|
|
188
188
|
},
|
|
189
189
|
{
|
|
190
190
|
"name": "img-14.jpeg",
|
|
191
|
-
"text": "The image features a clean and modern design with three
|
|
191
|
+
"text": "The image features a clean and modern design with a black background. It presents three key pieces of information in bold, white text, each enclosed in a rectangular box. \n\n1. The first box states \"Coverage\" followed by \"6am to midnight,\" indicating the hours of operation or service.\n2. The second box highlights \"6 Thematic sections,\" suggesting a structured approach to content organization.\n3. The third box notes \"+ 45* articles illustrated per day,\" implying a high volume of daily content production, with an asterisk possibly indicating additional context or a footnote.\n\nThe overall layout is simple and visually appealing, emphasizing clarity and accessibility of information."
|
|
192
192
|
},
|
|
193
193
|
{
|
|
194
194
|
"name": "img-15.jpeg",
|
|
195
|
-
"text": "The image is a pie chart displaying the distribution of various sports. Each segment
|
|
195
|
+
"text": "The image is a pie chart displaying the distribution of various sports. Each segment is color-coded and labeled with a percentage representing its share. \n\n- The largest segment, colored blue, represents \"Football\" at 31%.\n- The second largest, in purple, is \"Athletics\" at 21%.\n- Next, \"Motor sports\" is shown in yellow at 14%.\n- \"Basketball\" is represented in pink at 6%.\n- \"Cycling\" and \"Tennis\" each occupy 6% and are shown in gray and light blue, respectively.\n- Finally, the \"Olympics\" segment is colored orange and accounts for 5%.\n\nThe chart effectively visualizes the popularity or participation rates of these sports."
|
|
196
196
|
},
|
|
197
197
|
{
|
|
198
198
|
"name": "img-16.jpeg",
|
|
199
|
-
"text": "The image features a
|
|
199
|
+
"text": "The image features a minimalist design with a black background and white text. It presents three key points, each enclosed in a rectangular box. \n\n1. The first box states \"Coverage 6am to midnight,\" indicating the operational hours.\n2. The second box highlights \"2 Thematic sections,\" suggesting a structured approach to content organization.\n3. The third box notes \"+ 15* articles illustrated per day,\" emphasizing the volume of illustrated articles produced daily.\n\nThe overall layout is clean and straightforward, focusing on clarity and easy readability."
|
|
200
200
|
},
|
|
201
201
|
{
|
|
202
202
|
"name": "img-17.jpeg",
|
|
203
|
-
"text": "The image is a pie chart divided into several segments, each representing a percentage of a whole. The largest segment, colored blue, occupies 62% of the chart. The next largest segment is red, representing 15%. Following that, there is a pink segment at 12%. Smaller segments include an orange section at 9%, and three tiny segments colored yellow, light blue, and purple, each representing 1%. The percentages are labeled within
|
|
203
|
+
"text": "The image is a pie chart divided into several segments, each representing a percentage of a whole. The largest segment, colored blue, occupies 62% of the chart. The next largest segment is red, representing 15%. Following that, there is a pink segment at 12%. Smaller segments include an orange section at 9%, and three tiny segments colored yellow, light blue, and purple, each representing 1%. The percentages are clearly labeled within each segment, providing a visual representation of the data distribution."
|
|
204
204
|
},
|
|
205
205
|
{
|
|
206
206
|
"name": "img-18.jpeg",
|
|
207
|
-
"text": "The image features a minimalist design with a
|
|
207
|
+
"text": "The image features a minimalist design with a black background and white text. It presents three key points related to coverage and content:\n\n1. **Coverage**: The text states \"6am to midnight,\" indicating the hours of operation or availability.\n2. **Thematic Sections**: The number \"1\" is prominently displayed, suggesting there is one main thematic section.\n3. **Articles**: The phrase \"+ 10* articles illustrated per day\" implies that more than ten articles are illustrated daily, emphasizing the volume of content produced.\n\nOverall, the layout is clean and straightforward, focusing on conveying essential information efficiently."
|
|
208
208
|
},
|
|
209
209
|
{
|
|
210
210
|
"name": "img-19.jpeg",
|
|
211
|
-
"text": "The image is a pie chart
|
|
211
|
+
"text": "The image is a pie chart displaying the distribution of various sports. The largest segment, colored blue, represents \"Football,\" accounting for 56% of the total. Other segments include:\n\n- \"Athletics\" (purple) at 3%\n- \"Basketball\" (orange) at 12%\n- \"Motor sports\" (yellow) at 10%\n- \"Tennis\" (red) at 11%\n- \"Olympics\" (green) at 8%\n\nEach segment is labeled with its corresponding percentage, providing a clear visual representation of the proportions of interest in these sports."
|
|
212
212
|
}
|
|
213
213
|
]
|
|
214
214
|
}
|
{pyconverters_openai_vision-0.5.52 → pyconverters_openai_vision-0.5.54}/tests/test_openai_vision.py
RENAMED
|
@@ -21,6 +21,32 @@ def test_openai_vision_basic():
|
|
|
21
21
|
assert model_class == OpenAIVisionParameters
|
|
22
22
|
|
|
23
23
|
|
|
24
|
+
@pytest.mark.skip(reason="Not a test")
|
|
25
|
+
def test_openai_horrible():
|
|
26
|
+
start_time = time.time()
|
|
27
|
+
testdir = Path(__file__).parent
|
|
28
|
+
parameters = OpenAIVisionProcessorParameters(model_str="gpt-4o", replace_refs_altTexts_by_descriptions=True, prompt="""If the image is a graph (line graph, pie chart, bar chart, etc.), provide a detailed description, possibly a markdown table with figures and headers. Otherwise, just provide a brief one-line description.""")
|
|
29
|
+
processor = OpenAIVisionProcessor()
|
|
30
|
+
source = Path(testdir, 'data/template_conversion_en_gp-document-IF10244.59.pdf.json')
|
|
31
|
+
with source.open("r") as fin:
|
|
32
|
+
jdoc = json.load(fin)
|
|
33
|
+
docs = [Document(**jdoc)]
|
|
34
|
+
for s in docs[0].sentences:
|
|
35
|
+
print(docs[0].text[s.start:s.end])
|
|
36
|
+
print("-------")
|
|
37
|
+
docs: List[Document] = processor.process(docs, parameters)
|
|
38
|
+
assert len(docs) == 1
|
|
39
|
+
print("======================")
|
|
40
|
+
doc0 = docs[0]
|
|
41
|
+
for s in doc0.sentences:
|
|
42
|
+
print(docs[0].text[s.start:s.end])
|
|
43
|
+
print("-------")
|
|
44
|
+
json_file = Path(testdir, 'data/template_conversion_en_gp-document-IF10244.59_gpt-4o.json')
|
|
45
|
+
with json_file.open("w") as fout:
|
|
46
|
+
print(docs[0].json(exclude_none=True, exclude_unset=True, indent=2), file=fout)
|
|
47
|
+
print("--- gpt-4o: %s seconds ---" % (time.time() - start_time))
|
|
48
|
+
|
|
49
|
+
|
|
24
50
|
@pytest.mark.skip(reason="Not a test")
|
|
25
51
|
def test_openai():
|
|
26
52
|
converter = OpenAIVisionConverter()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{pyconverters_openai_vision-0.5.52 → pyconverters_openai_vision-0.5.54}/tests/data/colducoq.jpg
RENAMED
|
File without changes
|
{pyconverters_openai_vision-0.5.52 → pyconverters_openai_vision-0.5.54}/tests/data/webinar.png
RENAMED
|
File without changes
|
|
File without changes
|