deepagents-printshop 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agents/content_editor/__init__.py +1 -0
- agents/content_editor/agent.py +279 -0
- agents/content_editor/content_reviewer.py +327 -0
- agents/content_editor/versioned_agent.py +455 -0
- agents/latex_specialist/__init__.py +1 -0
- agents/latex_specialist/agent.py +531 -0
- agents/latex_specialist/latex_analyzer.py +510 -0
- agents/latex_specialist/latex_optimizer.py +1192 -0
- agents/qa_orchestrator/__init__.py +1 -0
- agents/qa_orchestrator/agent.py +603 -0
- agents/qa_orchestrator/langgraph_workflow.py +733 -0
- agents/qa_orchestrator/pipeline_types.py +72 -0
- agents/qa_orchestrator/quality_gates.py +495 -0
- agents/qa_orchestrator/workflow_coordinator.py +139 -0
- agents/research_agent/__init__.py +1 -0
- agents/research_agent/agent.py +258 -0
- agents/research_agent/llm_report_generator.py +1023 -0
- agents/research_agent/report_generator.py +536 -0
- agents/visual_qa/__init__.py +1 -0
- agents/visual_qa/agent.py +410 -0
- deepagents_printshop-0.1.0.dist-info/METADATA +744 -0
- deepagents_printshop-0.1.0.dist-info/RECORD +37 -0
- deepagents_printshop-0.1.0.dist-info/WHEEL +4 -0
- deepagents_printshop-0.1.0.dist-info/entry_points.txt +2 -0
- deepagents_printshop-0.1.0.dist-info/licenses/LICENSE +86 -0
- tools/__init__.py +1 -0
- tools/change_tracker.py +419 -0
- tools/content_type_loader.py +171 -0
- tools/graph_generator.py +281 -0
- tools/latex_generator.py +374 -0
- tools/llm_latex_generator.py +678 -0
- tools/magazine_layout.py +462 -0
- tools/pattern_injector.py +250 -0
- tools/pattern_learner.py +477 -0
- tools/pdf_compiler.py +386 -0
- tools/version_manager.py +346 -0
- tools/visual_qa.py +799 -0
tools/visual_qa.py
ADDED
|
@@ -0,0 +1,799 @@
|
|
|
1
|
+
"""Visual Quality Assurance for PDF documents."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import base64
|
|
5
|
+
import io
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import List, Dict, Optional, Tuple
|
|
8
|
+
from dataclasses import dataclass
|
|
9
|
+
from datetime import datetime
|
|
10
|
+
|
|
11
|
+
from pdf2image import convert_from_path
|
|
12
|
+
from PIL import Image
|
|
13
|
+
|
|
14
|
+
# Make anthropic import optional
|
|
15
|
+
try:
|
|
16
|
+
import anthropic
|
|
17
|
+
ANTHROPIC_AVAILABLE = True
|
|
18
|
+
except ImportError:
|
|
19
|
+
ANTHROPIC_AVAILABLE = False
|
|
20
|
+
print("⚠️ Anthropic not available - visual analysis will be limited")
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@dataclass
|
|
24
|
+
class VisualValidationResult:
|
|
25
|
+
"""Result of visual validation for a single page."""
|
|
26
|
+
page_number: int
|
|
27
|
+
page_type: str # 'title', 'toc', 'content'
|
|
28
|
+
overall_score: float # 0-100
|
|
29
|
+
issues_found: List[str]
|
|
30
|
+
strengths_found: List[str]
|
|
31
|
+
detailed_feedback: str
|
|
32
|
+
element_scores: Dict[str, float] # Specific element scores
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass
|
|
36
|
+
class DocumentVisualQA:
|
|
37
|
+
"""Complete visual QA results for a document."""
|
|
38
|
+
pdf_path: str
|
|
39
|
+
total_pages: int
|
|
40
|
+
overall_score: float
|
|
41
|
+
page_results: List[VisualValidationResult]
|
|
42
|
+
summary: str
|
|
43
|
+
recommendations: List[str]
|
|
44
|
+
timestamp: str
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class PDFToImageConverter:
|
|
48
|
+
"""Convert PDF pages to images for visual analysis."""
|
|
49
|
+
|
|
50
|
+
def __init__(self, dpi: int = 300):
|
|
51
|
+
"""
|
|
52
|
+
Initialize PDF converter.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
dpi: Resolution for image conversion (higher = better quality)
|
|
56
|
+
"""
|
|
57
|
+
self.dpi = dpi
|
|
58
|
+
|
|
59
|
+
def convert_pdf_to_images(self, pdf_path: str) -> List[Image.Image]:
|
|
60
|
+
"""
|
|
61
|
+
Convert PDF to list of PIL Images.
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
pdf_path: Path to PDF file
|
|
65
|
+
|
|
66
|
+
Returns:
|
|
67
|
+
List of PIL Image objects, one per page
|
|
68
|
+
"""
|
|
69
|
+
try:
|
|
70
|
+
images = convert_from_path(pdf_path, dpi=self.dpi)
|
|
71
|
+
print(f"✅ Converted PDF to {len(images)} page images")
|
|
72
|
+
return images
|
|
73
|
+
except Exception as e:
|
|
74
|
+
print(f"❌ Error converting PDF to images: {e}")
|
|
75
|
+
return []
|
|
76
|
+
|
|
77
|
+
def save_images(self, images: List[Image.Image], output_dir: str, prefix: str = "page") -> List[str]:
|
|
78
|
+
"""Save images to disk and return file paths."""
|
|
79
|
+
output_path = Path(output_dir)
|
|
80
|
+
output_path.mkdir(parents=True, exist_ok=True)
|
|
81
|
+
|
|
82
|
+
saved_paths = []
|
|
83
|
+
for i, image in enumerate(images, 1):
|
|
84
|
+
filename = f"{prefix}_{i:02d}.png"
|
|
85
|
+
filepath = output_path / filename
|
|
86
|
+
image.save(filepath, 'PNG')
|
|
87
|
+
saved_paths.append(str(filepath))
|
|
88
|
+
|
|
89
|
+
return saved_paths
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class VisualValidator:
|
|
93
|
+
"""Basic visual validation using image analysis."""
|
|
94
|
+
|
|
95
|
+
def __init__(self):
|
|
96
|
+
self.validation_rules = self._init_validation_rules()
|
|
97
|
+
|
|
98
|
+
def _init_validation_rules(self) -> Dict:
|
|
99
|
+
"""Initialize validation rules for different page types."""
|
|
100
|
+
return {
|
|
101
|
+
'title_page': {
|
|
102
|
+
'required_elements': ['title', 'author', 'date'],
|
|
103
|
+
'layout_checks': ['centered', 'proper_spacing'],
|
|
104
|
+
'typography_checks': ['title_size', 'font_consistency']
|
|
105
|
+
},
|
|
106
|
+
'toc_page': {
|
|
107
|
+
'required_elements': ['toc_header', 'section_list', 'page_numbers'],
|
|
108
|
+
'layout_checks': ['alignment', 'indentation', 'spacing'],
|
|
109
|
+
'typography_checks': ['consistent_fonts', 'number_alignment']
|
|
110
|
+
},
|
|
111
|
+
'content_page': {
|
|
112
|
+
'required_elements': ['header', 'footer', 'page_number'],
|
|
113
|
+
'layout_checks': ['margins', 'line_spacing', 'paragraph_structure'],
|
|
114
|
+
'typography_checks': ['font_consistency', 'heading_hierarchy']
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
def detect_page_type(self, page_number: int, total_pages: int) -> str:
|
|
119
|
+
"""Detect the type of page based on position."""
|
|
120
|
+
if page_number == 1:
|
|
121
|
+
return 'title_page'
|
|
122
|
+
elif page_number == 2:
|
|
123
|
+
return 'toc_page'
|
|
124
|
+
else:
|
|
125
|
+
return 'content_page'
|
|
126
|
+
|
|
127
|
+
def validate_basic_structure(self, image: Image.Image, page_type: str) -> Dict:
|
|
128
|
+
"""Perform basic structural validation of an image."""
|
|
129
|
+
# This is a simplified version - in production you'd use computer vision
|
|
130
|
+
width, height = image.size
|
|
131
|
+
aspect_ratio = width / height
|
|
132
|
+
|
|
133
|
+
# Basic checks that can be done programmatically
|
|
134
|
+
checks = {
|
|
135
|
+
'image_dimensions': (width, height),
|
|
136
|
+
'aspect_ratio': aspect_ratio,
|
|
137
|
+
'is_portrait': height > width,
|
|
138
|
+
'resolution_adequate': width >= 1000 and height >= 1000,
|
|
139
|
+
'file_size_reasonable': True # Could check actual file size
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
return checks
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
class MultimodalLLMAnalyzer:
|
|
146
|
+
"""Use Claude's vision capabilities for detailed visual analysis."""
|
|
147
|
+
|
|
148
|
+
def __init__(self, api_key: Optional[str] = None, rendering_instructions: str = "", model: str = ""):
|
|
149
|
+
"""
|
|
150
|
+
Initialize Claude analyzer.
|
|
151
|
+
|
|
152
|
+
Args:
|
|
153
|
+
api_key: Anthropic API key (will use environment variable if None)
|
|
154
|
+
rendering_instructions: Content type rendering instructions to append to prompts
|
|
155
|
+
model: Vision model to use (defaults to VISUAL_QA_MODEL env var or claude-sonnet-4-20250514)
|
|
156
|
+
"""
|
|
157
|
+
self.rendering_instructions = rendering_instructions
|
|
158
|
+
self.model = model or os.getenv("VISUAL_QA_MODEL", "claude-sonnet-4-20250514")
|
|
159
|
+
if not ANTHROPIC_AVAILABLE:
|
|
160
|
+
self.client = None
|
|
161
|
+
self.api_key = None
|
|
162
|
+
print("⚠️ Anthropic not available - using fallback analysis")
|
|
163
|
+
else:
|
|
164
|
+
self.api_key = api_key or os.getenv('ANTHROPIC_API_KEY')
|
|
165
|
+
if not self.api_key:
|
|
166
|
+
print("⚠️ ANTHROPIC_API_KEY not found - using fallback analysis")
|
|
167
|
+
self.client = None
|
|
168
|
+
else:
|
|
169
|
+
self.client = anthropic.Anthropic(api_key=self.api_key)
|
|
170
|
+
|
|
171
|
+
self.validation_prompts = self._init_validation_prompts()
|
|
172
|
+
|
|
173
|
+
def _init_validation_prompts(self) -> Dict[str, str]:
|
|
174
|
+
"""Initialize validation prompts for different page types."""
|
|
175
|
+
return {
|
|
176
|
+
'title_page': """
|
|
177
|
+
Analyze this title page image for a research document. Evaluate the following aspects and provide scores (1-10) for each:
|
|
178
|
+
|
|
179
|
+
1. **Title Visibility** (1-10): Is the document title clearly visible, properly sized, and well-positioned?
|
|
180
|
+
2. **Author Information** (1-10): Is the author name present and appropriately placed?
|
|
181
|
+
3. **Date Information** (1-10): Is the date shown and properly formatted?
|
|
182
|
+
4. **Layout Quality** (1-10): Is the content centered and professionally arranged? Are borders or diagrams overlapping?
|
|
183
|
+
5. **Typography** (1-10): Are fonts appropriate, consistent, and readable?
|
|
184
|
+
|
|
185
|
+
**CRITICAL CHECK - LaTeX Syntax Detection:**
|
|
186
|
+
⚠️ **RED FLAG**: Check if any LaTeX code or commands are visible in the rendered PDF (e.g., \\textbf{}, \\section{}, \\begin{}, \\usepackage{}, etc.).
|
|
187
|
+
- If ANY LaTeX syntax is visible in the output, this is a CRITICAL FAILURE
|
|
188
|
+
- The PDF should show formatted text, not raw LaTeX commands
|
|
189
|
+
- Score must be reduced to 1/10 if LaTeX syntax is detected
|
|
190
|
+
- Add "CRITICAL: Visible LaTeX syntax detected" to issues_found
|
|
191
|
+
|
|
192
|
+
**Disclaimer/Notice Check:**
|
|
193
|
+
- Is there a disclaimer or notice visible on the title/cover page (e.g., an AI-generation disclaimer or similar notice)?
|
|
194
|
+
- If the rendering instructions require a disclaimer but none is visible, flag it as an issue.
|
|
195
|
+
|
|
196
|
+
Also identify:
|
|
197
|
+
- Any missing elements that should be present
|
|
198
|
+
- Formatting issues or visual problems
|
|
199
|
+
- Suggestions for improvement
|
|
200
|
+
|
|
201
|
+
Provide your response in this JSON format:
|
|
202
|
+
{
|
|
203
|
+
"scores": {
|
|
204
|
+
"title_visibility": <score>,
|
|
205
|
+
"author_information": <score>,
|
|
206
|
+
"date_information": <score>,
|
|
207
|
+
"layout_quality": <score>,
|
|
208
|
+
"typography": <score>
|
|
209
|
+
},
|
|
210
|
+
"overall_score": <average_score>,
|
|
211
|
+
"issues_found": ["list", "of", "issues"],
|
|
212
|
+
"strengths_found": ["list", "of", "strengths"],
|
|
213
|
+
"detailed_feedback": "Comprehensive analysis of the page quality and specific recommendations"
|
|
214
|
+
}
|
|
215
|
+
""",
|
|
216
|
+
|
|
217
|
+
'toc_page': """
|
|
218
|
+
Examine this table of contents page. Evaluate these aspects with scores (1-10):
|
|
219
|
+
|
|
220
|
+
1. **Header Presence** (1-10): Is there a clear "Table of Contents" or similar header?
|
|
221
|
+
2. **Content Listing** (1-10): Are sections/chapters properly listed?
|
|
222
|
+
3. **Page Numbers** (1-10): Are page numbers present and aligned correctly?
|
|
223
|
+
4. **Hierarchy** (1-10): Is the section hierarchy clear with proper indentation?
|
|
224
|
+
5. **Formatting** (1-10): Is the overall formatting clean and professional?
|
|
225
|
+
|
|
226
|
+
**CRITICAL CHECK - LaTeX Syntax Detection:**
|
|
227
|
+
⚠️ **RED FLAG**: Check if any LaTeX code or commands are visible in the rendered PDF (e.g., \\textbf{}, \\section{}, \\begin{}, \\usepackage{}, etc.).
|
|
228
|
+
- If ANY LaTeX syntax is visible in the output, this is a CRITICAL FAILURE
|
|
229
|
+
- The PDF should show formatted text, not raw LaTeX commands
|
|
230
|
+
- Score must be reduced to 1/10 if LaTeX syntax is detected
|
|
231
|
+
- Add "CRITICAL: Visible LaTeX syntax detected" to issues_found
|
|
232
|
+
|
|
233
|
+
Identify:
|
|
234
|
+
- Missing or malformed elements
|
|
235
|
+
- Alignment and spacing issues
|
|
236
|
+
- Typography and readability concerns
|
|
237
|
+
|
|
238
|
+
Respond in JSON format:
|
|
239
|
+
{
|
|
240
|
+
"scores": {
|
|
241
|
+
"header_presence": <score>,
|
|
242
|
+
"content_listing": <score>,
|
|
243
|
+
"page_numbers": <score>,
|
|
244
|
+
"hierarchy": <score>,
|
|
245
|
+
"formatting": <score>
|
|
246
|
+
},
|
|
247
|
+
"overall_score": <average_score>,
|
|
248
|
+
"issues_found": ["list", "of", "issues"],
|
|
249
|
+
"strengths_found": ["list", "of", "strengths"],
|
|
250
|
+
"detailed_feedback": "Detailed analysis and recommendations"
|
|
251
|
+
}
|
|
252
|
+
""",
|
|
253
|
+
|
|
254
|
+
'content_page': """
|
|
255
|
+
Analyze this content page for visual quality. Score these elements (1-10):
|
|
256
|
+
|
|
257
|
+
1. **Headers/Footers** (1-10): Are headers and footers present, consistent, and well-formatted?
|
|
258
|
+
2. **Page Numbers** (1-10): Is the page number visible and properly positioned?
|
|
259
|
+
3. **Text Layout** (1-10): Are margins, spacing, and text flow appropriate?
|
|
260
|
+
4. **Typography** (1-10): Are fonts consistent, readable, and properly sized?
|
|
261
|
+
5. **Content Elements** (1-10): Are tables, figures, or other elements well-formatted?
|
|
262
|
+
|
|
263
|
+
**CRITICAL CHECK - LaTeX Syntax Detection:**
|
|
264
|
+
⚠️ **RED FLAG**: Check if any LaTeX code or commands are visible in the rendered PDF (e.g., \\textbf{}, \\section{}, \\begin{}, \\usepackage{}, \\cite{}, \\ref{}, etc.).
|
|
265
|
+
- If ANY LaTeX syntax is visible in the output, this is a CRITICAL FAILURE
|
|
266
|
+
- The PDF should show formatted text, not raw LaTeX commands
|
|
267
|
+
- Score must be reduced to 1/10 if LaTeX syntax is detected
|
|
268
|
+
- Add "CRITICAL: Visible LaTeX syntax detected" to issues_found
|
|
269
|
+
|
|
270
|
+
**Production Credit / Citation Check (especially on the last pages):**
|
|
271
|
+
- Is there a production credit or citation at the end of the document (e.g., "Typeset by..." or similar)?
|
|
272
|
+
- If the rendering instructions require a PrintShop citation but none is visible, flag it as an issue.
|
|
273
|
+
|
|
274
|
+
Look for:
|
|
275
|
+
- Inconsistent formatting
|
|
276
|
+
- Poor spacing or alignment
|
|
277
|
+
- Missing page elements
|
|
278
|
+
- Typography issues
|
|
279
|
+
|
|
280
|
+
JSON response format:
|
|
281
|
+
{
|
|
282
|
+
"scores": {
|
|
283
|
+
"headers_footers": <score>,
|
|
284
|
+
"page_numbers": <score>,
|
|
285
|
+
"text_layout": <score>,
|
|
286
|
+
"typography": <score>,
|
|
287
|
+
"content_elements": <score>
|
|
288
|
+
},
|
|
289
|
+
"overall_score": <average_score>,
|
|
290
|
+
"issues_found": ["list", "of", "issues"],
|
|
291
|
+
"strengths_found": ["list", "of", "strengths"],
|
|
292
|
+
"detailed_feedback": "Comprehensive quality assessment and improvement suggestions"
|
|
293
|
+
}
|
|
294
|
+
"""
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
def image_to_base64(self, image: Image.Image, max_size_bytes: int = 5 * 1024 * 1024) -> Tuple[str, str]:
|
|
298
|
+
"""
|
|
299
|
+
Convert PIL Image to base64 string, compressing if needed.
|
|
300
|
+
|
|
301
|
+
Args:
|
|
302
|
+
image: PIL Image to convert
|
|
303
|
+
max_size_bytes: Maximum size in bytes (default 5MB for Claude API)
|
|
304
|
+
|
|
305
|
+
Returns:
|
|
306
|
+
Tuple of (base64_string, media_type)
|
|
307
|
+
"""
|
|
308
|
+
# First try PNG
|
|
309
|
+
buffer = io.BytesIO()
|
|
310
|
+
image.save(buffer, format='PNG')
|
|
311
|
+
image_data = buffer.getvalue()
|
|
312
|
+
|
|
313
|
+
# If under limit, return as-is
|
|
314
|
+
if len(image_data) <= max_size_bytes:
|
|
315
|
+
return base64.b64encode(image_data).decode('utf-8'), "image/png"
|
|
316
|
+
|
|
317
|
+
# Need to compress - try JPEG with decreasing quality
|
|
318
|
+
print(f" ⚠️ Image too large ({len(image_data) / 1024 / 1024:.1f}MB), compressing...")
|
|
319
|
+
|
|
320
|
+
# Convert to RGB if necessary (JPEG doesn't support alpha)
|
|
321
|
+
if image.mode in ('RGBA', 'P'):
|
|
322
|
+
rgb_image = Image.new('RGB', image.size, (255, 255, 255))
|
|
323
|
+
rgb_image.paste(image, mask=image.split()[-1] if image.mode == 'RGBA' else None)
|
|
324
|
+
image = rgb_image
|
|
325
|
+
|
|
326
|
+
# Try decreasing quality levels
|
|
327
|
+
for quality in [85, 70, 50, 30]:
|
|
328
|
+
buffer = io.BytesIO()
|
|
329
|
+
image.save(buffer, format='JPEG', quality=quality, optimize=True)
|
|
330
|
+
image_data = buffer.getvalue()
|
|
331
|
+
|
|
332
|
+
if len(image_data) <= max_size_bytes:
|
|
333
|
+
print(f" ✅ Compressed to {len(image_data) / 1024 / 1024:.1f}MB (JPEG quality={quality})")
|
|
334
|
+
return base64.b64encode(image_data).decode('utf-8'), "image/jpeg"
|
|
335
|
+
|
|
336
|
+
# If still too large, resize the image
|
|
337
|
+
scale = 0.75
|
|
338
|
+
while len(image_data) > max_size_bytes and scale > 0.25:
|
|
339
|
+
new_size = (int(image.width * scale), int(image.height * scale))
|
|
340
|
+
resized = image.resize(new_size, Image.Resampling.LANCZOS)
|
|
341
|
+
|
|
342
|
+
buffer = io.BytesIO()
|
|
343
|
+
resized.save(buffer, format='JPEG', quality=50, optimize=True)
|
|
344
|
+
image_data = buffer.getvalue()
|
|
345
|
+
|
|
346
|
+
if len(image_data) <= max_size_bytes:
|
|
347
|
+
print(f" ✅ Compressed to {len(image_data) / 1024 / 1024:.1f}MB (resized to {scale:.0%})")
|
|
348
|
+
return base64.b64encode(image_data).decode('utf-8'), "image/jpeg"
|
|
349
|
+
|
|
350
|
+
scale -= 0.1
|
|
351
|
+
|
|
352
|
+
# Final fallback - aggressive resize
|
|
353
|
+
print(f" ⚠️ Using aggressive compression")
|
|
354
|
+
new_size = (int(image.width * 0.25), int(image.height * 0.25))
|
|
355
|
+
resized = image.resize(new_size, Image.Resampling.LANCZOS)
|
|
356
|
+
buffer = io.BytesIO()
|
|
357
|
+
resized.save(buffer, format='JPEG', quality=30, optimize=True)
|
|
358
|
+
image_data = buffer.getvalue()
|
|
359
|
+
|
|
360
|
+
return base64.b64encode(image_data).decode('utf-8'), "image/jpeg"
|
|
361
|
+
|
|
362
|
+
def analyze_page(self, image: Image.Image, page_type: str) -> Dict:
|
|
363
|
+
"""
|
|
364
|
+
Analyze a page image using Claude's vision capabilities.
|
|
365
|
+
|
|
366
|
+
Args:
|
|
367
|
+
image: PIL Image of the page
|
|
368
|
+
page_type: Type of page ('title_page', 'toc_page', 'content_page')
|
|
369
|
+
|
|
370
|
+
Returns:
|
|
371
|
+
Analysis results as dictionary
|
|
372
|
+
"""
|
|
373
|
+
# Use fallback analysis if Claude is not available
|
|
374
|
+
if not self.client:
|
|
375
|
+
return self._fallback_analysis(image, page_type)
|
|
376
|
+
|
|
377
|
+
try:
|
|
378
|
+
# Convert image to base64 (with compression if needed)
|
|
379
|
+
image_b64, media_type = self.image_to_base64(image)
|
|
380
|
+
|
|
381
|
+
# Get appropriate prompt
|
|
382
|
+
prompt = self.validation_prompts.get(page_type, self.validation_prompts['content_page'])
|
|
383
|
+
|
|
384
|
+
if self.rendering_instructions:
|
|
385
|
+
prompt += (
|
|
386
|
+
"\n\n## Document Type Rendering Instructions\n"
|
|
387
|
+
"The following are the rendering instructions for this document type. "
|
|
388
|
+
"Evaluate whether the page conforms to these specifications and flag "
|
|
389
|
+
"any deviations as issues:\n\n"
|
|
390
|
+
+ self.rendering_instructions
|
|
391
|
+
)
|
|
392
|
+
|
|
393
|
+
# Analyze with Claude
|
|
394
|
+
response = self.client.messages.create(
|
|
395
|
+
model=self.model,
|
|
396
|
+
max_tokens=2500,
|
|
397
|
+
messages=[
|
|
398
|
+
{
|
|
399
|
+
"role": "user",
|
|
400
|
+
"content": [
|
|
401
|
+
{
|
|
402
|
+
"type": "image",
|
|
403
|
+
"source": {
|
|
404
|
+
"type": "base64",
|
|
405
|
+
"media_type": media_type,
|
|
406
|
+
"data": image_b64
|
|
407
|
+
}
|
|
408
|
+
},
|
|
409
|
+
{
|
|
410
|
+
"type": "text",
|
|
411
|
+
"text": prompt
|
|
412
|
+
}
|
|
413
|
+
]
|
|
414
|
+
}
|
|
415
|
+
]
|
|
416
|
+
)
|
|
417
|
+
|
|
418
|
+
# Parse JSON response
|
|
419
|
+
import json
|
|
420
|
+
import re
|
|
421
|
+
response_text = response.content[0].text
|
|
422
|
+
|
|
423
|
+
# Extract JSON from response (handle cases where there's extra text)
|
|
424
|
+
json_start = response_text.find('{')
|
|
425
|
+
json_end = response_text.rfind('}') + 1
|
|
426
|
+
if json_start == -1 or json_end == 0:
|
|
427
|
+
print("❌ No JSON found in response")
|
|
428
|
+
return self._fallback_analysis(image, page_type)
|
|
429
|
+
|
|
430
|
+
json_content = response_text[json_start:json_end]
|
|
431
|
+
|
|
432
|
+
# Aggressive sanitization - encode the string to ASCII, ignoring errors
|
|
433
|
+
# Then decode back, which removes any problematic characters
|
|
434
|
+
json_content = json_content.encode('ascii', errors='ignore').decode('ascii')
|
|
435
|
+
|
|
436
|
+
# Also remove any remaining control characters (0x00-0x1F except \n \r \t)
|
|
437
|
+
json_content = re.sub(r'[\x00-\x08\x0b\x0c\x0e-\x1f]', '', json_content)
|
|
438
|
+
|
|
439
|
+
# Try to parse with strict=False to be more lenient
|
|
440
|
+
try:
|
|
441
|
+
analysis_result = json.loads(json_content, strict=False)
|
|
442
|
+
except json.JSONDecodeError as e:
|
|
443
|
+
# Last resort: try to extract just the key values we need
|
|
444
|
+
try:
|
|
445
|
+
# Extract scores using regex
|
|
446
|
+
overall_match = re.search(r'"overall_score"\s*:\s*([\d.]+)', json_content)
|
|
447
|
+
overall_score = float(overall_match.group(1)) if overall_match else 7.0
|
|
448
|
+
|
|
449
|
+
# Extract issues
|
|
450
|
+
issues_match = re.search(r'"issues_found"\s*:\s*\[(.*?)\]', json_content, re.DOTALL)
|
|
451
|
+
issues = []
|
|
452
|
+
if issues_match:
|
|
453
|
+
issues = re.findall(r'"([^"]+)"', issues_match.group(1))
|
|
454
|
+
|
|
455
|
+
analysis_result = {
|
|
456
|
+
"scores": {},
|
|
457
|
+
"overall_score": overall_score,
|
|
458
|
+
"issues_found": issues[:3],
|
|
459
|
+
"strengths_found": [],
|
|
460
|
+
"detailed_feedback": "Partial parse from malformed JSON"
|
|
461
|
+
}
|
|
462
|
+
except Exception:
|
|
463
|
+
print(f"❌ JSON parse failed completely: {e}")
|
|
464
|
+
return self._fallback_analysis(image, page_type)
|
|
465
|
+
|
|
466
|
+
return analysis_result
|
|
467
|
+
|
|
468
|
+
except Exception as e:
|
|
469
|
+
print(f"❌ Error analyzing page with Claude: {e}")
|
|
470
|
+
return self._fallback_analysis(image, page_type)
|
|
471
|
+
|
|
472
|
+
def _fallback_analysis(self, image: Image.Image, page_type: str) -> Dict:
|
|
473
|
+
"""Provide basic fallback analysis when Claude is not available."""
|
|
474
|
+
width, height = image.size
|
|
475
|
+
|
|
476
|
+
# Basic heuristic analysis
|
|
477
|
+
if page_type == 'title_page':
|
|
478
|
+
# For title page, assume it's decent if image is reasonable size
|
|
479
|
+
score = 7.5 if width > 1000 and height > 1000 else 6.0
|
|
480
|
+
issues = ["Visual analysis limited - install Anthropic API for detailed analysis"]
|
|
481
|
+
strengths = ["Page successfully rendered"] if width > 1000 else []
|
|
482
|
+
scores = {
|
|
483
|
+
"title_visibility": score,
|
|
484
|
+
"author_information": score,
|
|
485
|
+
"date_information": score,
|
|
486
|
+
"layout_quality": score,
|
|
487
|
+
"typography": score
|
|
488
|
+
}
|
|
489
|
+
elif page_type == 'toc_page':
|
|
490
|
+
score = 7.0 if width > 1000 and height > 1000 else 6.0
|
|
491
|
+
issues = ["Visual analysis limited - install Anthropic API for detailed analysis"]
|
|
492
|
+
strengths = ["Page successfully rendered"] if width > 1000 else []
|
|
493
|
+
scores = {
|
|
494
|
+
"header_presence": score,
|
|
495
|
+
"content_listing": score,
|
|
496
|
+
"page_numbers": score,
|
|
497
|
+
"hierarchy": score,
|
|
498
|
+
"formatting": score
|
|
499
|
+
}
|
|
500
|
+
else: # content_page
|
|
501
|
+
score = 7.0 if width > 1000 and height > 1000 else 6.0
|
|
502
|
+
issues = ["Visual analysis limited - install Anthropic API for detailed analysis"]
|
|
503
|
+
strengths = ["Page successfully rendered"] if width > 1000 else []
|
|
504
|
+
scores = {
|
|
505
|
+
"headers_footers": score,
|
|
506
|
+
"page_numbers": score,
|
|
507
|
+
"text_layout": score,
|
|
508
|
+
"typography": score,
|
|
509
|
+
"content_elements": score
|
|
510
|
+
}
|
|
511
|
+
|
|
512
|
+
return {
|
|
513
|
+
"scores": scores,
|
|
514
|
+
"overall_score": score,
|
|
515
|
+
"issues_found": issues,
|
|
516
|
+
"strengths_found": strengths,
|
|
517
|
+
"detailed_feedback": f"Basic visual check completed for {page_type}. Page dimensions: {width}x{height}. For detailed analysis, configure Anthropic API key."
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
|
|
521
|
+
class VisualQAAgent:
|
|
522
|
+
"""Main Visual QA Agent that orchestrates the entire process."""
|
|
523
|
+
|
|
524
|
+
def __init__(self, api_key: Optional[str] = None, content_source: str = ""):
|
|
525
|
+
"""
|
|
526
|
+
Initialize Visual QA Agent.
|
|
527
|
+
|
|
528
|
+
Args:
|
|
529
|
+
api_key: Anthropic API key (optional, will use environment variable)
|
|
530
|
+
content_source: Content type identifier (e.g. 'research_report') used
|
|
531
|
+
to load rendering instructions from content_types/{id}/type.md
|
|
532
|
+
"""
|
|
533
|
+
self.pdf_converter = PDFToImageConverter()
|
|
534
|
+
self.validator = VisualValidator()
|
|
535
|
+
|
|
536
|
+
rendering_instructions = ""
|
|
537
|
+
if content_source:
|
|
538
|
+
try:
|
|
539
|
+
from tools.content_type_loader import ContentTypeLoader
|
|
540
|
+
loader = ContentTypeLoader()
|
|
541
|
+
type_def = loader.load_type(content_source)
|
|
542
|
+
rendering_instructions = type_def.type_md_content
|
|
543
|
+
except Exception as e:
|
|
544
|
+
print(f"⚠️ Could not load content type '{content_source}': {e}")
|
|
545
|
+
|
|
546
|
+
self.llm_analyzer = MultimodalLLMAnalyzer(api_key, rendering_instructions=rendering_instructions)
|
|
547
|
+
|
|
548
|
+
# Create output directory for images
|
|
549
|
+
self.output_dir = Path("artifacts/reviewed_content/v3_visual_qa")
|
|
550
|
+
self.output_dir.mkdir(parents=True, exist_ok=True)
|
|
551
|
+
|
|
552
|
+
def validate_pdf_visual_quality(self, pdf_path: str) -> DocumentVisualQA:
|
|
553
|
+
"""
|
|
554
|
+
Perform complete visual quality assessment of a PDF.
|
|
555
|
+
|
|
556
|
+
Args:
|
|
557
|
+
pdf_path: Path to PDF file
|
|
558
|
+
|
|
559
|
+
Returns:
|
|
560
|
+
Complete visual QA results
|
|
561
|
+
"""
|
|
562
|
+
print(f"🔍 Starting Visual QA for: {pdf_path}")
|
|
563
|
+
print("=" * 60)
|
|
564
|
+
|
|
565
|
+
# Convert PDF to images
|
|
566
|
+
images = self.pdf_converter.convert_pdf_to_images(pdf_path)
|
|
567
|
+
if not images:
|
|
568
|
+
return self._create_error_result(pdf_path, "Failed to convert PDF to images")
|
|
569
|
+
|
|
570
|
+
# Save images for reference
|
|
571
|
+
image_paths = self.pdf_converter.save_images(
|
|
572
|
+
images,
|
|
573
|
+
str(self.output_dir / "page_images"),
|
|
574
|
+
"page"
|
|
575
|
+
)
|
|
576
|
+
|
|
577
|
+
# Analyze each page
|
|
578
|
+
page_results = []
|
|
579
|
+
total_score = 0
|
|
580
|
+
|
|
581
|
+
for i, image in enumerate(images, 1):
|
|
582
|
+
print(f"\n📄 Analyzing page {i}/{len(images)}...")
|
|
583
|
+
|
|
584
|
+
# Detect page type
|
|
585
|
+
page_type = self.validator.detect_page_type(i, len(images))
|
|
586
|
+
print(f" Detected page type: {page_type}")
|
|
587
|
+
|
|
588
|
+
# Basic validation
|
|
589
|
+
basic_checks = self.validator.validate_basic_structure(image, page_type)
|
|
590
|
+
|
|
591
|
+
# LLM analysis
|
|
592
|
+
llm_analysis = self.llm_analyzer.analyze_page(image, page_type)
|
|
593
|
+
|
|
594
|
+
# Combine results
|
|
595
|
+
page_result = VisualValidationResult(
|
|
596
|
+
page_number=i,
|
|
597
|
+
page_type=page_type,
|
|
598
|
+
overall_score=llm_analysis.get('overall_score', 0),
|
|
599
|
+
issues_found=llm_analysis.get('issues_found', []),
|
|
600
|
+
strengths_found=llm_analysis.get('strengths_found', []),
|
|
601
|
+
detailed_feedback=llm_analysis.get('detailed_feedback', ''),
|
|
602
|
+
element_scores=llm_analysis.get('scores', {})
|
|
603
|
+
)
|
|
604
|
+
|
|
605
|
+
page_results.append(page_result)
|
|
606
|
+
total_score += page_result.overall_score
|
|
607
|
+
|
|
608
|
+
print(f" Score: {page_result.overall_score:.1f}/10")
|
|
609
|
+
if page_result.issues_found:
|
|
610
|
+
print(f" Issues: {len(page_result.issues_found)} found")
|
|
611
|
+
|
|
612
|
+
# Calculate overall score
|
|
613
|
+
overall_score = (total_score / len(images)) * 10 if images else 0 # Convert to 0-100 scale
|
|
614
|
+
|
|
615
|
+
# Generate summary and recommendations
|
|
616
|
+
summary, recommendations = self._generate_summary(page_results, overall_score)
|
|
617
|
+
|
|
618
|
+
# Create final result
|
|
619
|
+
result = DocumentVisualQA(
|
|
620
|
+
pdf_path=pdf_path,
|
|
621
|
+
total_pages=len(images),
|
|
622
|
+
overall_score=overall_score,
|
|
623
|
+
page_results=page_results,
|
|
624
|
+
summary=summary,
|
|
625
|
+
recommendations=recommendations,
|
|
626
|
+
timestamp=datetime.now().isoformat()
|
|
627
|
+
)
|
|
628
|
+
|
|
629
|
+
print("\n" + "=" * 60)
|
|
630
|
+
print(f"🎯 Visual QA Complete!")
|
|
631
|
+
print(f" Overall Score: {overall_score:.1f}/100")
|
|
632
|
+
print(f" Pages Analyzed: {len(images)}")
|
|
633
|
+
print(f" Issues Found: {sum(len(p.issues_found) for p in page_results)}")
|
|
634
|
+
|
|
635
|
+
return result
|
|
636
|
+
|
|
637
|
+
def _create_error_result(self, pdf_path: str, error_message: str) -> DocumentVisualQA:
|
|
638
|
+
"""Create error result when analysis fails."""
|
|
639
|
+
return DocumentVisualQA(
|
|
640
|
+
pdf_path=pdf_path,
|
|
641
|
+
total_pages=0,
|
|
642
|
+
overall_score=0,
|
|
643
|
+
page_results=[],
|
|
644
|
+
summary=f"Visual QA failed: {error_message}",
|
|
645
|
+
recommendations=["Fix PDF conversion issues and retry"],
|
|
646
|
+
timestamp=datetime.now().isoformat()
|
|
647
|
+
)
|
|
648
|
+
|
|
649
|
+
def _generate_summary(self, page_results: List[VisualValidationResult], overall_score: float) -> Tuple[str, List[str]]:
|
|
650
|
+
"""Generate summary and recommendations based on page results."""
|
|
651
|
+
total_issues = sum(len(p.issues_found) for p in page_results)
|
|
652
|
+
total_strengths = sum(len(p.strengths_found) for p in page_results)
|
|
653
|
+
|
|
654
|
+
# Generate summary
|
|
655
|
+
if overall_score >= 85:
|
|
656
|
+
quality_level = "Excellent"
|
|
657
|
+
elif overall_score >= 75:
|
|
658
|
+
quality_level = "Good"
|
|
659
|
+
elif overall_score >= 60:
|
|
660
|
+
quality_level = "Acceptable"
|
|
661
|
+
else:
|
|
662
|
+
quality_level = "Needs Improvement"
|
|
663
|
+
|
|
664
|
+
summary = f"""Visual Quality Assessment: {quality_level} ({overall_score:.1f}/100)
|
|
665
|
+
|
|
666
|
+
Analyzed {len(page_results)} pages with {total_issues} issues identified and {total_strengths} strengths noted.
|
|
667
|
+
Pages include: {', '.join(set(p.page_type.replace('_', ' ').title() for p in page_results))}"""
|
|
668
|
+
|
|
669
|
+
# Generate recommendations
|
|
670
|
+
recommendations = []
|
|
671
|
+
|
|
672
|
+
# Collect common issues
|
|
673
|
+
all_issues = []
|
|
674
|
+
for page in page_results:
|
|
675
|
+
all_issues.extend(page.issues_found)
|
|
676
|
+
|
|
677
|
+
# Group similar issues
|
|
678
|
+
if any('title' in issue.lower() for issue in all_issues):
|
|
679
|
+
recommendations.append("Review title page formatting and ensure all elements are visible")
|
|
680
|
+
|
|
681
|
+
if any('table of contents' in issue.lower() or 'toc' in issue.lower() for issue in all_issues):
|
|
682
|
+
recommendations.append("Fix table of contents formatting and alignment issues")
|
|
683
|
+
|
|
684
|
+
if any('header' in issue.lower() or 'footer' in issue.lower() for issue in all_issues):
|
|
685
|
+
recommendations.append("Ensure consistent headers and footers across all pages")
|
|
686
|
+
|
|
687
|
+
if any('spacing' in issue.lower() or 'margin' in issue.lower() for issue in all_issues):
|
|
688
|
+
recommendations.append("Adjust spacing and margins for better visual consistency")
|
|
689
|
+
|
|
690
|
+
if any('font' in issue.lower() or 'typography' in issue.lower() for issue in all_issues):
|
|
691
|
+
recommendations.append("Review typography choices for consistency and readability")
|
|
692
|
+
|
|
693
|
+
if not recommendations:
|
|
694
|
+
if overall_score >= 85:
|
|
695
|
+
recommendations.append("Document visual quality is excellent - ready for publication")
|
|
696
|
+
else:
|
|
697
|
+
recommendations.append("Review identified issues and consider LaTeX template improvements")
|
|
698
|
+
|
|
699
|
+
return summary, recommendations
|
|
700
|
+
|
|
701
|
+
def save_report(self, result: DocumentVisualQA, output_path: Optional[str] = None) -> str:
|
|
702
|
+
"""Save visual QA report to file."""
|
|
703
|
+
if output_path is None:
|
|
704
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
705
|
+
output_path = str(self.output_dir / f"visual_qa_report_{timestamp}.md")
|
|
706
|
+
|
|
707
|
+
# Generate markdown report
|
|
708
|
+
report_content = f"""# Visual Quality Assessment Report
|
|
709
|
+
|
|
710
|
+
**Document:** {result.pdf_path}
|
|
711
|
+
**Generated:** {result.timestamp}
|
|
712
|
+
**Overall Score:** {result.overall_score:.1f}/100
|
|
713
|
+
|
|
714
|
+
## Summary
|
|
715
|
+
|
|
716
|
+
{result.summary}
|
|
717
|
+
|
|
718
|
+
## Page-by-Page Analysis
|
|
719
|
+
|
|
720
|
+
"""
|
|
721
|
+
|
|
722
|
+
for page in result.page_results:
|
|
723
|
+
report_content += f"""### Page {page.page_number} ({page.page_type.replace('_', ' ').title()})
|
|
724
|
+
|
|
725
|
+
**Score:** {page.overall_score:.1f}/10
|
|
726
|
+
|
|
727
|
+
**Element Scores:**
|
|
728
|
+
"""
|
|
729
|
+
for element, score in page.element_scores.items():
|
|
730
|
+
report_content += f"- {element.replace('_', ' ').title()}: {score}/10\n"
|
|
731
|
+
|
|
732
|
+
if page.issues_found:
|
|
733
|
+
report_content += f"\n**Issues Found:**\n"
|
|
734
|
+
for issue in page.issues_found:
|
|
735
|
+
report_content += f"- {issue}\n"
|
|
736
|
+
|
|
737
|
+
if page.strengths_found:
|
|
738
|
+
report_content += f"\n**Strengths:**\n"
|
|
739
|
+
for strength in page.strengths_found:
|
|
740
|
+
report_content += f"- {strength}\n"
|
|
741
|
+
|
|
742
|
+
report_content += f"\n**Detailed Feedback:**\n{page.detailed_feedback}\n\n"
|
|
743
|
+
|
|
744
|
+
report_content += f"""## Recommendations
|
|
745
|
+
|
|
746
|
+
"""
|
|
747
|
+
for rec in result.recommendations:
|
|
748
|
+
report_content += f"- {rec}\n"
|
|
749
|
+
|
|
750
|
+
report_content += f"""
|
|
751
|
+
## Next Steps
|
|
752
|
+
|
|
753
|
+
{'✅ Document ready for publication' if result.overall_score >= 85 else '⚠️ Address identified issues before final publication'}
|
|
754
|
+
|
|
755
|
+
**Generated by DeepAgents PrintShop Visual QA System**
|
|
756
|
+
"""
|
|
757
|
+
|
|
758
|
+
# Save report
|
|
759
|
+
with open(output_path, 'w', encoding='utf-8') as f:
|
|
760
|
+
f.write(report_content)
|
|
761
|
+
|
|
762
|
+
print(f"📄 Visual QA report saved: {output_path}")
|
|
763
|
+
return output_path
|
|
764
|
+
|
|
765
|
+
|
|
766
|
+
def main():
|
|
767
|
+
"""Test the Visual QA system."""
|
|
768
|
+
# Test with current research report
|
|
769
|
+
pdf_path = "artifacts/output/research_report.pdf"
|
|
770
|
+
|
|
771
|
+
if not os.path.exists(pdf_path):
|
|
772
|
+
print(f"❌ PDF not found: {pdf_path}")
|
|
773
|
+
return
|
|
774
|
+
|
|
775
|
+
print("🔍 Testing Visual QA System")
|
|
776
|
+
print("=" * 50)
|
|
777
|
+
|
|
778
|
+
# Initialize Visual QA Agent
|
|
779
|
+
try:
|
|
780
|
+
agent = VisualQAAgent()
|
|
781
|
+
|
|
782
|
+
# Run visual quality assessment
|
|
783
|
+
result = agent.validate_pdf_visual_quality(pdf_path)
|
|
784
|
+
|
|
785
|
+
# Save report
|
|
786
|
+
report_path = agent.save_report(result)
|
|
787
|
+
|
|
788
|
+
print(f"\n✅ Visual QA Complete!")
|
|
789
|
+
print(f"📊 Overall Score: {result.overall_score:.1f}/100")
|
|
790
|
+
print(f"📄 Report: {report_path}")
|
|
791
|
+
|
|
792
|
+
except Exception as e:
|
|
793
|
+
print(f"❌ Error: {e}")
|
|
794
|
+
import traceback
|
|
795
|
+
traceback.print_exc()
|
|
796
|
+
|
|
797
|
+
|
|
798
|
+
if __name__ == "__main__":
|
|
799
|
+
main()
|