amd-gaia 0.14.3__py3-none-any.whl → 0.15.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/METADATA +223 -223
- amd_gaia-0.15.1.dist-info/RECORD +178 -0
- {amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/entry_points.txt +1 -0
- {amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/licenses/LICENSE.md +20 -20
- gaia/__init__.py +29 -29
- gaia/agents/__init__.py +19 -19
- gaia/agents/base/__init__.py +9 -9
- gaia/agents/base/agent.py +2177 -2177
- gaia/agents/base/api_agent.py +120 -120
- gaia/agents/base/console.py +1841 -1841
- gaia/agents/base/errors.py +237 -237
- gaia/agents/base/mcp_agent.py +86 -86
- gaia/agents/base/tools.py +83 -83
- gaia/agents/blender/agent.py +556 -556
- gaia/agents/blender/agent_simple.py +133 -135
- gaia/agents/blender/app.py +211 -211
- gaia/agents/blender/app_simple.py +41 -41
- gaia/agents/blender/core/__init__.py +16 -16
- gaia/agents/blender/core/materials.py +506 -506
- gaia/agents/blender/core/objects.py +316 -316
- gaia/agents/blender/core/rendering.py +225 -225
- gaia/agents/blender/core/scene.py +220 -220
- gaia/agents/blender/core/view.py +146 -146
- gaia/agents/chat/__init__.py +9 -9
- gaia/agents/chat/agent.py +835 -835
- gaia/agents/chat/app.py +1058 -1058
- gaia/agents/chat/session.py +508 -508
- gaia/agents/chat/tools/__init__.py +15 -15
- gaia/agents/chat/tools/file_tools.py +96 -96
- gaia/agents/chat/tools/rag_tools.py +1729 -1729
- gaia/agents/chat/tools/shell_tools.py +436 -436
- gaia/agents/code/__init__.py +7 -7
- gaia/agents/code/agent.py +549 -549
- gaia/agents/code/cli.py +377 -0
- gaia/agents/code/models.py +135 -135
- gaia/agents/code/orchestration/__init__.py +24 -24
- gaia/agents/code/orchestration/checklist_executor.py +1763 -1763
- gaia/agents/code/orchestration/checklist_generator.py +713 -713
- gaia/agents/code/orchestration/factories/__init__.py +9 -9
- gaia/agents/code/orchestration/factories/base.py +63 -63
- gaia/agents/code/orchestration/factories/nextjs_factory.py +118 -118
- gaia/agents/code/orchestration/factories/python_factory.py +106 -106
- gaia/agents/code/orchestration/orchestrator.py +841 -841
- gaia/agents/code/orchestration/project_analyzer.py +391 -391
- gaia/agents/code/orchestration/steps/__init__.py +67 -67
- gaia/agents/code/orchestration/steps/base.py +188 -188
- gaia/agents/code/orchestration/steps/error_handler.py +314 -314
- gaia/agents/code/orchestration/steps/nextjs.py +828 -828
- gaia/agents/code/orchestration/steps/python.py +307 -307
- gaia/agents/code/orchestration/template_catalog.py +469 -469
- gaia/agents/code/orchestration/workflows/__init__.py +14 -14
- gaia/agents/code/orchestration/workflows/base.py +80 -80
- gaia/agents/code/orchestration/workflows/nextjs.py +186 -186
- gaia/agents/code/orchestration/workflows/python.py +94 -94
- gaia/agents/code/prompts/__init__.py +11 -11
- gaia/agents/code/prompts/base_prompt.py +77 -77
- gaia/agents/code/prompts/code_patterns.py +2036 -2036
- gaia/agents/code/prompts/nextjs_prompt.py +40 -40
- gaia/agents/code/prompts/python_prompt.py +109 -109
- gaia/agents/code/schema_inference.py +365 -365
- gaia/agents/code/system_prompt.py +41 -41
- gaia/agents/code/tools/__init__.py +42 -42
- gaia/agents/code/tools/cli_tools.py +1138 -1138
- gaia/agents/code/tools/code_formatting.py +319 -319
- gaia/agents/code/tools/code_tools.py +769 -769
- gaia/agents/code/tools/error_fixing.py +1347 -1347
- gaia/agents/code/tools/external_tools.py +180 -180
- gaia/agents/code/tools/file_io.py +845 -845
- gaia/agents/code/tools/prisma_tools.py +190 -190
- gaia/agents/code/tools/project_management.py +1016 -1016
- gaia/agents/code/tools/testing.py +321 -321
- gaia/agents/code/tools/typescript_tools.py +122 -122
- gaia/agents/code/tools/validation_parsing.py +461 -461
- gaia/agents/code/tools/validation_tools.py +806 -806
- gaia/agents/code/tools/web_dev_tools.py +1758 -1758
- gaia/agents/code/validators/__init__.py +16 -16
- gaia/agents/code/validators/antipattern_checker.py +241 -241
- gaia/agents/code/validators/ast_analyzer.py +197 -197
- gaia/agents/code/validators/requirements_validator.py +145 -145
- gaia/agents/code/validators/syntax_validator.py +171 -171
- gaia/agents/docker/__init__.py +7 -7
- gaia/agents/docker/agent.py +642 -642
- gaia/agents/emr/__init__.py +8 -8
- gaia/agents/emr/agent.py +1506 -1506
- gaia/agents/emr/cli.py +1322 -1322
- gaia/agents/emr/constants.py +475 -475
- gaia/agents/emr/dashboard/__init__.py +4 -4
- gaia/agents/emr/dashboard/server.py +1974 -1974
- gaia/agents/jira/__init__.py +11 -11
- gaia/agents/jira/agent.py +894 -894
- gaia/agents/jira/jql_templates.py +299 -299
- gaia/agents/routing/__init__.py +7 -7
- gaia/agents/routing/agent.py +567 -570
- gaia/agents/routing/system_prompt.py +75 -75
- gaia/agents/summarize/__init__.py +11 -0
- gaia/agents/summarize/agent.py +885 -0
- gaia/agents/summarize/prompts.py +129 -0
- gaia/api/__init__.py +23 -23
- gaia/api/agent_registry.py +238 -238
- gaia/api/app.py +305 -305
- gaia/api/openai_server.py +575 -575
- gaia/api/schemas.py +186 -186
- gaia/api/sse_handler.py +373 -373
- gaia/apps/__init__.py +4 -4
- gaia/apps/llm/__init__.py +6 -6
- gaia/apps/llm/app.py +173 -169
- gaia/apps/summarize/app.py +116 -633
- gaia/apps/summarize/html_viewer.py +133 -133
- gaia/apps/summarize/pdf_formatter.py +284 -284
- gaia/audio/__init__.py +2 -2
- gaia/audio/audio_client.py +439 -439
- gaia/audio/audio_recorder.py +269 -269
- gaia/audio/kokoro_tts.py +599 -599
- gaia/audio/whisper_asr.py +432 -432
- gaia/chat/__init__.py +16 -16
- gaia/chat/app.py +430 -430
- gaia/chat/prompts.py +522 -522
- gaia/chat/sdk.py +1228 -1225
- gaia/cli.py +5481 -5621
- gaia/database/__init__.py +10 -10
- gaia/database/agent.py +176 -176
- gaia/database/mixin.py +290 -290
- gaia/database/testing.py +64 -64
- gaia/eval/batch_experiment.py +2332 -2332
- gaia/eval/claude.py +542 -542
- gaia/eval/config.py +37 -37
- gaia/eval/email_generator.py +512 -512
- gaia/eval/eval.py +3179 -3179
- gaia/eval/groundtruth.py +1130 -1130
- gaia/eval/transcript_generator.py +582 -582
- gaia/eval/webapp/README.md +167 -167
- gaia/eval/webapp/package-lock.json +875 -875
- gaia/eval/webapp/package.json +20 -20
- gaia/eval/webapp/public/app.js +3402 -3402
- gaia/eval/webapp/public/index.html +87 -87
- gaia/eval/webapp/public/styles.css +3661 -3661
- gaia/eval/webapp/server.js +415 -415
- gaia/eval/webapp/test-setup.js +72 -72
- gaia/llm/__init__.py +9 -2
- gaia/llm/base_client.py +60 -0
- gaia/llm/exceptions.py +12 -0
- gaia/llm/factory.py +70 -0
- gaia/llm/lemonade_client.py +3236 -3221
- gaia/llm/lemonade_manager.py +294 -294
- gaia/llm/providers/__init__.py +9 -0
- gaia/llm/providers/claude.py +108 -0
- gaia/llm/providers/lemonade.py +120 -0
- gaia/llm/providers/openai_provider.py +79 -0
- gaia/llm/vlm_client.py +382 -382
- gaia/logger.py +189 -189
- gaia/mcp/agent_mcp_server.py +245 -245
- gaia/mcp/blender_mcp_client.py +138 -138
- gaia/mcp/blender_mcp_server.py +648 -648
- gaia/mcp/context7_cache.py +332 -332
- gaia/mcp/external_services.py +518 -518
- gaia/mcp/mcp_bridge.py +811 -550
- gaia/mcp/servers/__init__.py +6 -6
- gaia/mcp/servers/docker_mcp.py +83 -83
- gaia/perf_analysis.py +361 -0
- gaia/rag/__init__.py +10 -10
- gaia/rag/app.py +293 -293
- gaia/rag/demo.py +304 -304
- gaia/rag/pdf_utils.py +235 -235
- gaia/rag/sdk.py +2194 -2194
- gaia/security.py +163 -163
- gaia/talk/app.py +289 -289
- gaia/talk/sdk.py +538 -538
- gaia/testing/__init__.py +87 -87
- gaia/testing/assertions.py +330 -330
- gaia/testing/fixtures.py +333 -333
- gaia/testing/mocks.py +493 -493
- gaia/util.py +46 -46
- gaia/utils/__init__.py +33 -33
- gaia/utils/file_watcher.py +675 -675
- gaia/utils/parsing.py +223 -223
- gaia/version.py +100 -100
- amd_gaia-0.14.3.dist-info/RECORD +0 -168
- gaia/agents/code/app.py +0 -266
- gaia/llm/llm_client.py +0 -729
- {amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/WHEEL +0 -0
- {amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/top_level.txt +0 -0
gaia/rag/pdf_utils.py
CHANGED
|
@@ -1,235 +1,235 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
# Copyright(C) 2024-2025 Advanced Micro Devices, Inc. All rights reserved.
|
|
3
|
-
# SPDX-License-Identifier: MIT
|
|
4
|
-
|
|
5
|
-
"""
|
|
6
|
-
PDF image extraction utilities for multi-modal RAG.
|
|
7
|
-
|
|
8
|
-
Extracts individual images from PDF pages (not whole page conversion).
|
|
9
|
-
"""
|
|
10
|
-
|
|
11
|
-
import io
|
|
12
|
-
import logging
|
|
13
|
-
from typing import List, Tuple
|
|
14
|
-
|
|
15
|
-
logger = logging.getLogger(__name__)
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
def extract_images_from_page_pymupdf(pdf_path: str, page_num: int) -> List[dict]:
|
|
19
|
-
"""
|
|
20
|
-
Extract images using PyMuPDF (more reliable than pypdf for images).
|
|
21
|
-
|
|
22
|
-
Args:
|
|
23
|
-
pdf_path: Path to PDF file
|
|
24
|
-
page_num: Page number (1-indexed)
|
|
25
|
-
|
|
26
|
-
Returns:
|
|
27
|
-
List of image dicts with bytes, dimensions, etc.
|
|
28
|
-
"""
|
|
29
|
-
images = []
|
|
30
|
-
|
|
31
|
-
try:
|
|
32
|
-
import fitz # PyMuPDF
|
|
33
|
-
from PIL import Image
|
|
34
|
-
|
|
35
|
-
doc = fitz.open(pdf_path)
|
|
36
|
-
page = doc[page_num - 1] # PyMuPDF uses 0-indexed
|
|
37
|
-
|
|
38
|
-
image_list = page.get_images()
|
|
39
|
-
|
|
40
|
-
for img_index, img_info in enumerate(image_list):
|
|
41
|
-
try:
|
|
42
|
-
xref = img_info[0]
|
|
43
|
-
|
|
44
|
-
# Extract image bytes (PyMuPDF handles decoding)
|
|
45
|
-
base_image = doc.extract_image(xref)
|
|
46
|
-
image_bytes = base_image["image"]
|
|
47
|
-
_img_ext = base_image["ext"] # jpg, png, etc.
|
|
48
|
-
|
|
49
|
-
# Open with PIL for processing
|
|
50
|
-
img = Image.open(io.BytesIO(image_bytes))
|
|
51
|
-
|
|
52
|
-
# Get dimensions
|
|
53
|
-
width, height = img.size
|
|
54
|
-
size_kb = len(image_bytes) / 1024
|
|
55
|
-
|
|
56
|
-
# Convert to RGB if needed
|
|
57
|
-
if img.mode not in ["RGB", "RGBA"]:
|
|
58
|
-
logger.debug(f"Converting {img.mode} to RGB")
|
|
59
|
-
img = img.convert("RGB")
|
|
60
|
-
|
|
61
|
-
# Resize if too large
|
|
62
|
-
MAX_DIMENSION = 1600
|
|
63
|
-
if width > MAX_DIMENSION or height > MAX_DIMENSION:
|
|
64
|
-
scale = min(MAX_DIMENSION / width, MAX_DIMENSION / height)
|
|
65
|
-
new_width = int(width * scale)
|
|
66
|
-
new_height = int(height * scale)
|
|
67
|
-
|
|
68
|
-
logger.info(
|
|
69
|
-
f" Resizing: {width}x{height} → {new_width}x{new_height}"
|
|
70
|
-
)
|
|
71
|
-
img = img.resize((new_width, new_height), Image.Resampling.LANCZOS)
|
|
72
|
-
|
|
73
|
-
# Save as optimized PNG
|
|
74
|
-
png_buffer = io.BytesIO()
|
|
75
|
-
img.save(png_buffer, format="PNG", optimize=True, compress_level=6)
|
|
76
|
-
png_bytes = png_buffer.getvalue()
|
|
77
|
-
size_kb = len(png_bytes) / 1024
|
|
78
|
-
|
|
79
|
-
# Iteratively compress until target size is reached
|
|
80
|
-
MAX_SIZE_KB = 300
|
|
81
|
-
compression_iterations = 0
|
|
82
|
-
MAX_ITERATIONS = 5
|
|
83
|
-
|
|
84
|
-
while size_kb > MAX_SIZE_KB and compression_iterations < MAX_ITERATIONS:
|
|
85
|
-
compression_iterations += 1
|
|
86
|
-
logger.info(
|
|
87
|
-
f" Compressing (iteration {compression_iterations}): {size_kb:.0f}KB → <{MAX_SIZE_KB}KB"
|
|
88
|
-
)
|
|
89
|
-
|
|
90
|
-
# Reduce size by 50% each iteration
|
|
91
|
-
img = img.resize(
|
|
92
|
-
(img.width // 2, img.height // 2), Image.Resampling.LANCZOS
|
|
93
|
-
)
|
|
94
|
-
|
|
95
|
-
png_buffer = io.BytesIO()
|
|
96
|
-
img.save(png_buffer, format="PNG", optimize=True, compress_level=9)
|
|
97
|
-
png_bytes = png_buffer.getvalue()
|
|
98
|
-
size_kb = len(png_bytes) / 1024
|
|
99
|
-
|
|
100
|
-
if size_kb <= MAX_SIZE_KB:
|
|
101
|
-
logger.info(
|
|
102
|
-
f" ✅ Compressed to {size_kb:.0f}KB ({img.width}x{img.height}) in {compression_iterations} iteration(s)"
|
|
103
|
-
)
|
|
104
|
-
else:
|
|
105
|
-
logger.warning(
|
|
106
|
-
f" ⚠️ Could not compress below {MAX_SIZE_KB}KB after {MAX_ITERATIONS} iterations (final: {size_kb:.0f}KB)"
|
|
107
|
-
)
|
|
108
|
-
|
|
109
|
-
images.append(
|
|
110
|
-
{
|
|
111
|
-
"image_bytes": png_bytes,
|
|
112
|
-
"width": img.width,
|
|
113
|
-
"height": img.height,
|
|
114
|
-
"format": "png",
|
|
115
|
-
"size_kb": size_kb,
|
|
116
|
-
}
|
|
117
|
-
)
|
|
118
|
-
|
|
119
|
-
logger.debug(
|
|
120
|
-
f"Extracted image {img_index + 1}: {img.width}x{img.height}, {size_kb:.1f}KB"
|
|
121
|
-
)
|
|
122
|
-
|
|
123
|
-
except Exception as e:
|
|
124
|
-
logger.warning(
|
|
125
|
-
f"Failed to extract image {img_index + 1} from page {page_num}: {e}"
|
|
126
|
-
)
|
|
127
|
-
continue
|
|
128
|
-
|
|
129
|
-
doc.close()
|
|
130
|
-
|
|
131
|
-
except ImportError:
|
|
132
|
-
logger.error("PyMuPDF not installed. Install: uv pip install pymupdf")
|
|
133
|
-
except Exception as e:
|
|
134
|
-
logger.error(f"Error extracting images from page {page_num}: {e}")
|
|
135
|
-
|
|
136
|
-
return images
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
def extract_images_from_page(
|
|
140
|
-
page, page_num: int # pylint: disable=unused-argument
|
|
141
|
-
) -> List[dict]:
|
|
142
|
-
"""
|
|
143
|
-
DEPRECATED: Use extract_images_from_page_pymupdf instead.
|
|
144
|
-
|
|
145
|
-
This function kept for backwards compatibility but PyMuPDF
|
|
146
|
-
is more reliable for image extraction.
|
|
147
|
-
"""
|
|
148
|
-
logger.warning("Using deprecated pypdf image extraction - switch to PyMuPDF")
|
|
149
|
-
return []
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
def count_images_in_page(page) -> Tuple[bool, int]:
|
|
153
|
-
"""
|
|
154
|
-
Fast check for image presence without extraction.
|
|
155
|
-
|
|
156
|
-
Args:
|
|
157
|
-
page: pypdf page object
|
|
158
|
-
|
|
159
|
-
Returns:
|
|
160
|
-
(has_images: bool, count: int)
|
|
161
|
-
"""
|
|
162
|
-
count = 0
|
|
163
|
-
|
|
164
|
-
try:
|
|
165
|
-
if "/XObject" in page.get("/Resources", {}):
|
|
166
|
-
xobject = page["/Resources"]["/XObject"].get_object()
|
|
167
|
-
for obj_name in xobject:
|
|
168
|
-
obj = xobject[obj_name]
|
|
169
|
-
if obj.get("/Subtype") == "/Image":
|
|
170
|
-
count += 1
|
|
171
|
-
except Exception: # pylint: disable=broad-except
|
|
172
|
-
pass
|
|
173
|
-
|
|
174
|
-
return (count > 0, count)
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
def get_image_positions_on_page(pdf_path: str, page_num: int) -> List[dict]:
|
|
178
|
-
"""
|
|
179
|
-
Get positions of images on PDF page using PyMuPDF.
|
|
180
|
-
|
|
181
|
-
Args:
|
|
182
|
-
pdf_path: Path to PDF file
|
|
183
|
-
page_num: Page number (0-indexed)
|
|
184
|
-
|
|
185
|
-
Returns:
|
|
186
|
-
[
|
|
187
|
-
{
|
|
188
|
-
"image_index": int,
|
|
189
|
-
"bbox": [x0, y0, x1, y1],
|
|
190
|
-
"position_y": float, # Y-coordinate for sorting
|
|
191
|
-
"width": int,
|
|
192
|
-
"height": int
|
|
193
|
-
},
|
|
194
|
-
...
|
|
195
|
-
]
|
|
196
|
-
"""
|
|
197
|
-
positions = []
|
|
198
|
-
|
|
199
|
-
try:
|
|
200
|
-
import fitz # PyMuPDF
|
|
201
|
-
except ImportError:
|
|
202
|
-
logger.debug("PyMuPDF not available for position detection")
|
|
203
|
-
return positions
|
|
204
|
-
|
|
205
|
-
try:
|
|
206
|
-
doc = fitz.open(pdf_path)
|
|
207
|
-
page = doc[page_num]
|
|
208
|
-
|
|
209
|
-
image_list = page.get_images()
|
|
210
|
-
|
|
211
|
-
for img_index, img_info in enumerate(image_list):
|
|
212
|
-
# Get image bounding box
|
|
213
|
-
xref = img_info[0]
|
|
214
|
-
image_rects = page.get_image_rects(xref)
|
|
215
|
-
|
|
216
|
-
if image_rects:
|
|
217
|
-
rect = image_rects[0] # First occurrence
|
|
218
|
-
bbox = [rect.x0, rect.y0, rect.x1, rect.y1]
|
|
219
|
-
|
|
220
|
-
positions.append(
|
|
221
|
-
{
|
|
222
|
-
"image_index": img_index,
|
|
223
|
-
"bbox": bbox,
|
|
224
|
-
"position_y": rect.y0, # Top Y coordinate
|
|
225
|
-
"width": int(rect.width),
|
|
226
|
-
"height": int(rect.height),
|
|
227
|
-
}
|
|
228
|
-
)
|
|
229
|
-
|
|
230
|
-
doc.close()
|
|
231
|
-
|
|
232
|
-
except Exception as e:
|
|
233
|
-
logger.warning(f"Could not get image positions for page {page_num}: {e}")
|
|
234
|
-
|
|
235
|
-
return positions
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# Copyright(C) 2024-2025 Advanced Micro Devices, Inc. All rights reserved.
|
|
3
|
+
# SPDX-License-Identifier: MIT
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
PDF image extraction utilities for multi-modal RAG.
|
|
7
|
+
|
|
8
|
+
Extracts individual images from PDF pages (not whole page conversion).
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import io
|
|
12
|
+
import logging
|
|
13
|
+
from typing import List, Tuple
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def extract_images_from_page_pymupdf(pdf_path: str, page_num: int) -> List[dict]:
|
|
19
|
+
"""
|
|
20
|
+
Extract images using PyMuPDF (more reliable than pypdf for images).
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
pdf_path: Path to PDF file
|
|
24
|
+
page_num: Page number (1-indexed)
|
|
25
|
+
|
|
26
|
+
Returns:
|
|
27
|
+
List of image dicts with bytes, dimensions, etc.
|
|
28
|
+
"""
|
|
29
|
+
images = []
|
|
30
|
+
|
|
31
|
+
try:
|
|
32
|
+
import fitz # PyMuPDF
|
|
33
|
+
from PIL import Image
|
|
34
|
+
|
|
35
|
+
doc = fitz.open(pdf_path)
|
|
36
|
+
page = doc[page_num - 1] # PyMuPDF uses 0-indexed
|
|
37
|
+
|
|
38
|
+
image_list = page.get_images()
|
|
39
|
+
|
|
40
|
+
for img_index, img_info in enumerate(image_list):
|
|
41
|
+
try:
|
|
42
|
+
xref = img_info[0]
|
|
43
|
+
|
|
44
|
+
# Extract image bytes (PyMuPDF handles decoding)
|
|
45
|
+
base_image = doc.extract_image(xref)
|
|
46
|
+
image_bytes = base_image["image"]
|
|
47
|
+
_img_ext = base_image["ext"] # jpg, png, etc.
|
|
48
|
+
|
|
49
|
+
# Open with PIL for processing
|
|
50
|
+
img = Image.open(io.BytesIO(image_bytes))
|
|
51
|
+
|
|
52
|
+
# Get dimensions
|
|
53
|
+
width, height = img.size
|
|
54
|
+
size_kb = len(image_bytes) / 1024
|
|
55
|
+
|
|
56
|
+
# Convert to RGB if needed
|
|
57
|
+
if img.mode not in ["RGB", "RGBA"]:
|
|
58
|
+
logger.debug(f"Converting {img.mode} to RGB")
|
|
59
|
+
img = img.convert("RGB")
|
|
60
|
+
|
|
61
|
+
# Resize if too large
|
|
62
|
+
MAX_DIMENSION = 1600
|
|
63
|
+
if width > MAX_DIMENSION or height > MAX_DIMENSION:
|
|
64
|
+
scale = min(MAX_DIMENSION / width, MAX_DIMENSION / height)
|
|
65
|
+
new_width = int(width * scale)
|
|
66
|
+
new_height = int(height * scale)
|
|
67
|
+
|
|
68
|
+
logger.info(
|
|
69
|
+
f" Resizing: {width}x{height} → {new_width}x{new_height}"
|
|
70
|
+
)
|
|
71
|
+
img = img.resize((new_width, new_height), Image.Resampling.LANCZOS)
|
|
72
|
+
|
|
73
|
+
# Save as optimized PNG
|
|
74
|
+
png_buffer = io.BytesIO()
|
|
75
|
+
img.save(png_buffer, format="PNG", optimize=True, compress_level=6)
|
|
76
|
+
png_bytes = png_buffer.getvalue()
|
|
77
|
+
size_kb = len(png_bytes) / 1024
|
|
78
|
+
|
|
79
|
+
# Iteratively compress until target size is reached
|
|
80
|
+
MAX_SIZE_KB = 300
|
|
81
|
+
compression_iterations = 0
|
|
82
|
+
MAX_ITERATIONS = 5
|
|
83
|
+
|
|
84
|
+
while size_kb > MAX_SIZE_KB and compression_iterations < MAX_ITERATIONS:
|
|
85
|
+
compression_iterations += 1
|
|
86
|
+
logger.info(
|
|
87
|
+
f" Compressing (iteration {compression_iterations}): {size_kb:.0f}KB → <{MAX_SIZE_KB}KB"
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
# Reduce size by 50% each iteration
|
|
91
|
+
img = img.resize(
|
|
92
|
+
(img.width // 2, img.height // 2), Image.Resampling.LANCZOS
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
png_buffer = io.BytesIO()
|
|
96
|
+
img.save(png_buffer, format="PNG", optimize=True, compress_level=9)
|
|
97
|
+
png_bytes = png_buffer.getvalue()
|
|
98
|
+
size_kb = len(png_bytes) / 1024
|
|
99
|
+
|
|
100
|
+
if size_kb <= MAX_SIZE_KB:
|
|
101
|
+
logger.info(
|
|
102
|
+
f" ✅ Compressed to {size_kb:.0f}KB ({img.width}x{img.height}) in {compression_iterations} iteration(s)"
|
|
103
|
+
)
|
|
104
|
+
else:
|
|
105
|
+
logger.warning(
|
|
106
|
+
f" ⚠️ Could not compress below {MAX_SIZE_KB}KB after {MAX_ITERATIONS} iterations (final: {size_kb:.0f}KB)"
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
images.append(
|
|
110
|
+
{
|
|
111
|
+
"image_bytes": png_bytes,
|
|
112
|
+
"width": img.width,
|
|
113
|
+
"height": img.height,
|
|
114
|
+
"format": "png",
|
|
115
|
+
"size_kb": size_kb,
|
|
116
|
+
}
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
logger.debug(
|
|
120
|
+
f"Extracted image {img_index + 1}: {img.width}x{img.height}, {size_kb:.1f}KB"
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
except Exception as e:
|
|
124
|
+
logger.warning(
|
|
125
|
+
f"Failed to extract image {img_index + 1} from page {page_num}: {e}"
|
|
126
|
+
)
|
|
127
|
+
continue
|
|
128
|
+
|
|
129
|
+
doc.close()
|
|
130
|
+
|
|
131
|
+
except ImportError:
|
|
132
|
+
logger.error("PyMuPDF not installed. Install: uv pip install pymupdf")
|
|
133
|
+
except Exception as e:
|
|
134
|
+
logger.error(f"Error extracting images from page {page_num}: {e}")
|
|
135
|
+
|
|
136
|
+
return images
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def extract_images_from_page(
|
|
140
|
+
page, page_num: int # pylint: disable=unused-argument
|
|
141
|
+
) -> List[dict]:
|
|
142
|
+
"""
|
|
143
|
+
DEPRECATED: Use extract_images_from_page_pymupdf instead.
|
|
144
|
+
|
|
145
|
+
This function kept for backwards compatibility but PyMuPDF
|
|
146
|
+
is more reliable for image extraction.
|
|
147
|
+
"""
|
|
148
|
+
logger.warning("Using deprecated pypdf image extraction - switch to PyMuPDF")
|
|
149
|
+
return []
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def count_images_in_page(page) -> Tuple[bool, int]:
|
|
153
|
+
"""
|
|
154
|
+
Fast check for image presence without extraction.
|
|
155
|
+
|
|
156
|
+
Args:
|
|
157
|
+
page: pypdf page object
|
|
158
|
+
|
|
159
|
+
Returns:
|
|
160
|
+
(has_images: bool, count: int)
|
|
161
|
+
"""
|
|
162
|
+
count = 0
|
|
163
|
+
|
|
164
|
+
try:
|
|
165
|
+
if "/XObject" in page.get("/Resources", {}):
|
|
166
|
+
xobject = page["/Resources"]["/XObject"].get_object()
|
|
167
|
+
for obj_name in xobject:
|
|
168
|
+
obj = xobject[obj_name]
|
|
169
|
+
if obj.get("/Subtype") == "/Image":
|
|
170
|
+
count += 1
|
|
171
|
+
except Exception: # pylint: disable=broad-except
|
|
172
|
+
pass
|
|
173
|
+
|
|
174
|
+
return (count > 0, count)
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def get_image_positions_on_page(pdf_path: str, page_num: int) -> List[dict]:
|
|
178
|
+
"""
|
|
179
|
+
Get positions of images on PDF page using PyMuPDF.
|
|
180
|
+
|
|
181
|
+
Args:
|
|
182
|
+
pdf_path: Path to PDF file
|
|
183
|
+
page_num: Page number (0-indexed)
|
|
184
|
+
|
|
185
|
+
Returns:
|
|
186
|
+
[
|
|
187
|
+
{
|
|
188
|
+
"image_index": int,
|
|
189
|
+
"bbox": [x0, y0, x1, y1],
|
|
190
|
+
"position_y": float, # Y-coordinate for sorting
|
|
191
|
+
"width": int,
|
|
192
|
+
"height": int
|
|
193
|
+
},
|
|
194
|
+
...
|
|
195
|
+
]
|
|
196
|
+
"""
|
|
197
|
+
positions = []
|
|
198
|
+
|
|
199
|
+
try:
|
|
200
|
+
import fitz # PyMuPDF
|
|
201
|
+
except ImportError:
|
|
202
|
+
logger.debug("PyMuPDF not available for position detection")
|
|
203
|
+
return positions
|
|
204
|
+
|
|
205
|
+
try:
|
|
206
|
+
doc = fitz.open(pdf_path)
|
|
207
|
+
page = doc[page_num]
|
|
208
|
+
|
|
209
|
+
image_list = page.get_images()
|
|
210
|
+
|
|
211
|
+
for img_index, img_info in enumerate(image_list):
|
|
212
|
+
# Get image bounding box
|
|
213
|
+
xref = img_info[0]
|
|
214
|
+
image_rects = page.get_image_rects(xref)
|
|
215
|
+
|
|
216
|
+
if image_rects:
|
|
217
|
+
rect = image_rects[0] # First occurrence
|
|
218
|
+
bbox = [rect.x0, rect.y0, rect.x1, rect.y1]
|
|
219
|
+
|
|
220
|
+
positions.append(
|
|
221
|
+
{
|
|
222
|
+
"image_index": img_index,
|
|
223
|
+
"bbox": bbox,
|
|
224
|
+
"position_y": rect.y0, # Top Y coordinate
|
|
225
|
+
"width": int(rect.width),
|
|
226
|
+
"height": int(rect.height),
|
|
227
|
+
}
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
doc.close()
|
|
231
|
+
|
|
232
|
+
except Exception as e:
|
|
233
|
+
logger.warning(f"Could not get image positions for page {page_num}: {e}")
|
|
234
|
+
|
|
235
|
+
return positions
|