deepagents-printshop 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. agents/content_editor/__init__.py +1 -0
  2. agents/content_editor/agent.py +279 -0
  3. agents/content_editor/content_reviewer.py +327 -0
  4. agents/content_editor/versioned_agent.py +455 -0
  5. agents/latex_specialist/__init__.py +1 -0
  6. agents/latex_specialist/agent.py +531 -0
  7. agents/latex_specialist/latex_analyzer.py +510 -0
  8. agents/latex_specialist/latex_optimizer.py +1192 -0
  9. agents/qa_orchestrator/__init__.py +1 -0
  10. agents/qa_orchestrator/agent.py +603 -0
  11. agents/qa_orchestrator/langgraph_workflow.py +733 -0
  12. agents/qa_orchestrator/pipeline_types.py +72 -0
  13. agents/qa_orchestrator/quality_gates.py +495 -0
  14. agents/qa_orchestrator/workflow_coordinator.py +139 -0
  15. agents/research_agent/__init__.py +1 -0
  16. agents/research_agent/agent.py +258 -0
  17. agents/research_agent/llm_report_generator.py +1023 -0
  18. agents/research_agent/report_generator.py +536 -0
  19. agents/visual_qa/__init__.py +1 -0
  20. agents/visual_qa/agent.py +410 -0
  21. deepagents_printshop-0.1.0.dist-info/METADATA +744 -0
  22. deepagents_printshop-0.1.0.dist-info/RECORD +37 -0
  23. deepagents_printshop-0.1.0.dist-info/WHEEL +4 -0
  24. deepagents_printshop-0.1.0.dist-info/entry_points.txt +2 -0
  25. deepagents_printshop-0.1.0.dist-info/licenses/LICENSE +86 -0
  26. tools/__init__.py +1 -0
  27. tools/change_tracker.py +419 -0
  28. tools/content_type_loader.py +171 -0
  29. tools/graph_generator.py +281 -0
  30. tools/latex_generator.py +374 -0
  31. tools/llm_latex_generator.py +678 -0
  32. tools/magazine_layout.py +462 -0
  33. tools/pattern_injector.py +250 -0
  34. tools/pattern_learner.py +477 -0
  35. tools/pdf_compiler.py +386 -0
  36. tools/version_manager.py +346 -0
  37. tools/visual_qa.py +799 -0
tools/visual_qa.py ADDED
@@ -0,0 +1,799 @@
1
+ """Visual Quality Assurance for PDF documents."""
2
+
3
+ import os
4
+ import base64
5
+ import io
6
+ from pathlib import Path
7
+ from typing import List, Dict, Optional, Tuple
8
+ from dataclasses import dataclass
9
+ from datetime import datetime
10
+
11
+ from pdf2image import convert_from_path
12
+ from PIL import Image
13
+
14
+ # Make anthropic import optional
15
+ try:
16
+ import anthropic
17
+ ANTHROPIC_AVAILABLE = True
18
+ except ImportError:
19
+ ANTHROPIC_AVAILABLE = False
20
+ print("⚠️ Anthropic not available - visual analysis will be limited")
21
+
22
+
23
+ @dataclass
24
+ class VisualValidationResult:
25
+ """Result of visual validation for a single page."""
26
+ page_number: int
27
+ page_type: str # 'title', 'toc', 'content'
28
+ overall_score: float # 0-100
29
+ issues_found: List[str]
30
+ strengths_found: List[str]
31
+ detailed_feedback: str
32
+ element_scores: Dict[str, float] # Specific element scores
33
+
34
+
35
+ @dataclass
36
+ class DocumentVisualQA:
37
+ """Complete visual QA results for a document."""
38
+ pdf_path: str
39
+ total_pages: int
40
+ overall_score: float
41
+ page_results: List[VisualValidationResult]
42
+ summary: str
43
+ recommendations: List[str]
44
+ timestamp: str
45
+
46
+
47
+ class PDFToImageConverter:
48
+ """Convert PDF pages to images for visual analysis."""
49
+
50
+ def __init__(self, dpi: int = 300):
51
+ """
52
+ Initialize PDF converter.
53
+
54
+ Args:
55
+ dpi: Resolution for image conversion (higher = better quality)
56
+ """
57
+ self.dpi = dpi
58
+
59
+ def convert_pdf_to_images(self, pdf_path: str) -> List[Image.Image]:
60
+ """
61
+ Convert PDF to list of PIL Images.
62
+
63
+ Args:
64
+ pdf_path: Path to PDF file
65
+
66
+ Returns:
67
+ List of PIL Image objects, one per page
68
+ """
69
+ try:
70
+ images = convert_from_path(pdf_path, dpi=self.dpi)
71
+ print(f"✅ Converted PDF to {len(images)} page images")
72
+ return images
73
+ except Exception as e:
74
+ print(f"❌ Error converting PDF to images: {e}")
75
+ return []
76
+
77
+ def save_images(self, images: List[Image.Image], output_dir: str, prefix: str = "page") -> List[str]:
78
+ """Save images to disk and return file paths."""
79
+ output_path = Path(output_dir)
80
+ output_path.mkdir(parents=True, exist_ok=True)
81
+
82
+ saved_paths = []
83
+ for i, image in enumerate(images, 1):
84
+ filename = f"{prefix}_{i:02d}.png"
85
+ filepath = output_path / filename
86
+ image.save(filepath, 'PNG')
87
+ saved_paths.append(str(filepath))
88
+
89
+ return saved_paths
90
+
91
+
92
+ class VisualValidator:
93
+ """Basic visual validation using image analysis."""
94
+
95
+ def __init__(self):
96
+ self.validation_rules = self._init_validation_rules()
97
+
98
+ def _init_validation_rules(self) -> Dict:
99
+ """Initialize validation rules for different page types."""
100
+ return {
101
+ 'title_page': {
102
+ 'required_elements': ['title', 'author', 'date'],
103
+ 'layout_checks': ['centered', 'proper_spacing'],
104
+ 'typography_checks': ['title_size', 'font_consistency']
105
+ },
106
+ 'toc_page': {
107
+ 'required_elements': ['toc_header', 'section_list', 'page_numbers'],
108
+ 'layout_checks': ['alignment', 'indentation', 'spacing'],
109
+ 'typography_checks': ['consistent_fonts', 'number_alignment']
110
+ },
111
+ 'content_page': {
112
+ 'required_elements': ['header', 'footer', 'page_number'],
113
+ 'layout_checks': ['margins', 'line_spacing', 'paragraph_structure'],
114
+ 'typography_checks': ['font_consistency', 'heading_hierarchy']
115
+ }
116
+ }
117
+
118
+ def detect_page_type(self, page_number: int, total_pages: int) -> str:
119
+ """Detect the type of page based on position."""
120
+ if page_number == 1:
121
+ return 'title_page'
122
+ elif page_number == 2:
123
+ return 'toc_page'
124
+ else:
125
+ return 'content_page'
126
+
127
+ def validate_basic_structure(self, image: Image.Image, page_type: str) -> Dict:
128
+ """Perform basic structural validation of an image."""
129
+ # This is a simplified version - in production you'd use computer vision
130
+ width, height = image.size
131
+ aspect_ratio = width / height
132
+
133
+ # Basic checks that can be done programmatically
134
+ checks = {
135
+ 'image_dimensions': (width, height),
136
+ 'aspect_ratio': aspect_ratio,
137
+ 'is_portrait': height > width,
138
+ 'resolution_adequate': width >= 1000 and height >= 1000,
139
+ 'file_size_reasonable': True # Could check actual file size
140
+ }
141
+
142
+ return checks
143
+
144
+
145
+ class MultimodalLLMAnalyzer:
146
+ """Use Claude's vision capabilities for detailed visual analysis."""
147
+
148
+ def __init__(self, api_key: Optional[str] = None, rendering_instructions: str = "", model: str = ""):
149
+ """
150
+ Initialize Claude analyzer.
151
+
152
+ Args:
153
+ api_key: Anthropic API key (will use environment variable if None)
154
+ rendering_instructions: Content type rendering instructions to append to prompts
155
+ model: Vision model to use (defaults to VISUAL_QA_MODEL env var or claude-sonnet-4-20250514)
156
+ """
157
+ self.rendering_instructions = rendering_instructions
158
+ self.model = model or os.getenv("VISUAL_QA_MODEL", "claude-sonnet-4-20250514")
159
+ if not ANTHROPIC_AVAILABLE:
160
+ self.client = None
161
+ self.api_key = None
162
+ print("⚠️ Anthropic not available - using fallback analysis")
163
+ else:
164
+ self.api_key = api_key or os.getenv('ANTHROPIC_API_KEY')
165
+ if not self.api_key:
166
+ print("⚠️ ANTHROPIC_API_KEY not found - using fallback analysis")
167
+ self.client = None
168
+ else:
169
+ self.client = anthropic.Anthropic(api_key=self.api_key)
170
+
171
+ self.validation_prompts = self._init_validation_prompts()
172
+
173
+ def _init_validation_prompts(self) -> Dict[str, str]:
174
+ """Initialize validation prompts for different page types."""
175
+ return {
176
+ 'title_page': """
177
+ Analyze this title page image for a research document. Evaluate the following aspects and provide scores (1-10) for each:
178
+
179
+ 1. **Title Visibility** (1-10): Is the document title clearly visible, properly sized, and well-positioned?
180
+ 2. **Author Information** (1-10): Is the author name present and appropriately placed?
181
+ 3. **Date Information** (1-10): Is the date shown and properly formatted?
182
+ 4. **Layout Quality** (1-10): Is the content centered and professionally arranged? Are borders or diagrams overlapping?
183
+ 5. **Typography** (1-10): Are fonts appropriate, consistent, and readable?
184
+
185
+ **CRITICAL CHECK - LaTeX Syntax Detection:**
186
+ ⚠️ **RED FLAG**: Check if any LaTeX code or commands are visible in the rendered PDF (e.g., \\textbf{}, \\section{}, \\begin{}, \\usepackage{}, etc.).
187
+ - If ANY LaTeX syntax is visible in the output, this is a CRITICAL FAILURE
188
+ - The PDF should show formatted text, not raw LaTeX commands
189
+ - Score must be reduced to 1/10 if LaTeX syntax is detected
190
+ - Add "CRITICAL: Visible LaTeX syntax detected" to issues_found
191
+
192
+ **Disclaimer/Notice Check:**
193
+ - Is there a disclaimer or notice visible on the title/cover page (e.g., an AI-generation disclaimer or similar notice)?
194
+ - If the rendering instructions require a disclaimer but none is visible, flag it as an issue.
195
+
196
+ Also identify:
197
+ - Any missing elements that should be present
198
+ - Formatting issues or visual problems
199
+ - Suggestions for improvement
200
+
201
+ Provide your response in this JSON format:
202
+ {
203
+ "scores": {
204
+ "title_visibility": <score>,
205
+ "author_information": <score>,
206
+ "date_information": <score>,
207
+ "layout_quality": <score>,
208
+ "typography": <score>
209
+ },
210
+ "overall_score": <average_score>,
211
+ "issues_found": ["list", "of", "issues"],
212
+ "strengths_found": ["list", "of", "strengths"],
213
+ "detailed_feedback": "Comprehensive analysis of the page quality and specific recommendations"
214
+ }
215
+ """,
216
+
217
+ 'toc_page': """
218
+ Examine this table of contents page. Evaluate these aspects with scores (1-10):
219
+
220
+ 1. **Header Presence** (1-10): Is there a clear "Table of Contents" or similar header?
221
+ 2. **Content Listing** (1-10): Are sections/chapters properly listed?
222
+ 3. **Page Numbers** (1-10): Are page numbers present and aligned correctly?
223
+ 4. **Hierarchy** (1-10): Is the section hierarchy clear with proper indentation?
224
+ 5. **Formatting** (1-10): Is the overall formatting clean and professional?
225
+
226
+ **CRITICAL CHECK - LaTeX Syntax Detection:**
227
+ ⚠️ **RED FLAG**: Check if any LaTeX code or commands are visible in the rendered PDF (e.g., \\textbf{}, \\section{}, \\begin{}, \\usepackage{}, etc.).
228
+ - If ANY LaTeX syntax is visible in the output, this is a CRITICAL FAILURE
229
+ - The PDF should show formatted text, not raw LaTeX commands
230
+ - Score must be reduced to 1/10 if LaTeX syntax is detected
231
+ - Add "CRITICAL: Visible LaTeX syntax detected" to issues_found
232
+
233
+ Identify:
234
+ - Missing or malformed elements
235
+ - Alignment and spacing issues
236
+ - Typography and readability concerns
237
+
238
+ Respond in JSON format:
239
+ {
240
+ "scores": {
241
+ "header_presence": <score>,
242
+ "content_listing": <score>,
243
+ "page_numbers": <score>,
244
+ "hierarchy": <score>,
245
+ "formatting": <score>
246
+ },
247
+ "overall_score": <average_score>,
248
+ "issues_found": ["list", "of", "issues"],
249
+ "strengths_found": ["list", "of", "strengths"],
250
+ "detailed_feedback": "Detailed analysis and recommendations"
251
+ }
252
+ """,
253
+
254
+ 'content_page': """
255
+ Analyze this content page for visual quality. Score these elements (1-10):
256
+
257
+ 1. **Headers/Footers** (1-10): Are headers and footers present, consistent, and well-formatted?
258
+ 2. **Page Numbers** (1-10): Is the page number visible and properly positioned?
259
+ 3. **Text Layout** (1-10): Are margins, spacing, and text flow appropriate?
260
+ 4. **Typography** (1-10): Are fonts consistent, readable, and properly sized?
261
+ 5. **Content Elements** (1-10): Are tables, figures, or other elements well-formatted?
262
+
263
+ **CRITICAL CHECK - LaTeX Syntax Detection:**
264
+ ⚠️ **RED FLAG**: Check if any LaTeX code or commands are visible in the rendered PDF (e.g., \\textbf{}, \\section{}, \\begin{}, \\usepackage{}, \\cite{}, \\ref{}, etc.).
265
+ - If ANY LaTeX syntax is visible in the output, this is a CRITICAL FAILURE
266
+ - The PDF should show formatted text, not raw LaTeX commands
267
+ - Score must be reduced to 1/10 if LaTeX syntax is detected
268
+ - Add "CRITICAL: Visible LaTeX syntax detected" to issues_found
269
+
270
+ **Production Credit / Citation Check (especially on the last pages):**
271
+ - Is there a production credit or citation at the end of the document (e.g., "Typeset by..." or similar)?
272
+ - If the rendering instructions require a PrintShop citation but none is visible, flag it as an issue.
273
+
274
+ Look for:
275
+ - Inconsistent formatting
276
+ - Poor spacing or alignment
277
+ - Missing page elements
278
+ - Typography issues
279
+
280
+ JSON response format:
281
+ {
282
+ "scores": {
283
+ "headers_footers": <score>,
284
+ "page_numbers": <score>,
285
+ "text_layout": <score>,
286
+ "typography": <score>,
287
+ "content_elements": <score>
288
+ },
289
+ "overall_score": <average_score>,
290
+ "issues_found": ["list", "of", "issues"],
291
+ "strengths_found": ["list", "of", "strengths"],
292
+ "detailed_feedback": "Comprehensive quality assessment and improvement suggestions"
293
+ }
294
+ """
295
+ }
296
+
297
+ def image_to_base64(self, image: Image.Image, max_size_bytes: int = 5 * 1024 * 1024) -> Tuple[str, str]:
298
+ """
299
+ Convert PIL Image to base64 string, compressing if needed.
300
+
301
+ Args:
302
+ image: PIL Image to convert
303
+ max_size_bytes: Maximum size in bytes (default 5MB for Claude API)
304
+
305
+ Returns:
306
+ Tuple of (base64_string, media_type)
307
+ """
308
+ # First try PNG
309
+ buffer = io.BytesIO()
310
+ image.save(buffer, format='PNG')
311
+ image_data = buffer.getvalue()
312
+
313
+ # If under limit, return as-is
314
+ if len(image_data) <= max_size_bytes:
315
+ return base64.b64encode(image_data).decode('utf-8'), "image/png"
316
+
317
+ # Need to compress - try JPEG with decreasing quality
318
+ print(f" ⚠️ Image too large ({len(image_data) / 1024 / 1024:.1f}MB), compressing...")
319
+
320
+ # Convert to RGB if necessary (JPEG doesn't support alpha)
321
+ if image.mode in ('RGBA', 'P'):
322
+ rgb_image = Image.new('RGB', image.size, (255, 255, 255))
323
+ rgb_image.paste(image, mask=image.split()[-1] if image.mode == 'RGBA' else None)
324
+ image = rgb_image
325
+
326
+ # Try decreasing quality levels
327
+ for quality in [85, 70, 50, 30]:
328
+ buffer = io.BytesIO()
329
+ image.save(buffer, format='JPEG', quality=quality, optimize=True)
330
+ image_data = buffer.getvalue()
331
+
332
+ if len(image_data) <= max_size_bytes:
333
+ print(f" ✅ Compressed to {len(image_data) / 1024 / 1024:.1f}MB (JPEG quality={quality})")
334
+ return base64.b64encode(image_data).decode('utf-8'), "image/jpeg"
335
+
336
+ # If still too large, resize the image
337
+ scale = 0.75
338
+ while len(image_data) > max_size_bytes and scale > 0.25:
339
+ new_size = (int(image.width * scale), int(image.height * scale))
340
+ resized = image.resize(new_size, Image.Resampling.LANCZOS)
341
+
342
+ buffer = io.BytesIO()
343
+ resized.save(buffer, format='JPEG', quality=50, optimize=True)
344
+ image_data = buffer.getvalue()
345
+
346
+ if len(image_data) <= max_size_bytes:
347
+ print(f" ✅ Compressed to {len(image_data) / 1024 / 1024:.1f}MB (resized to {scale:.0%})")
348
+ return base64.b64encode(image_data).decode('utf-8'), "image/jpeg"
349
+
350
+ scale -= 0.1
351
+
352
+ # Final fallback - aggressive resize
353
+ print(f" ⚠️ Using aggressive compression")
354
+ new_size = (int(image.width * 0.25), int(image.height * 0.25))
355
+ resized = image.resize(new_size, Image.Resampling.LANCZOS)
356
+ buffer = io.BytesIO()
357
+ resized.save(buffer, format='JPEG', quality=30, optimize=True)
358
+ image_data = buffer.getvalue()
359
+
360
+ return base64.b64encode(image_data).decode('utf-8'), "image/jpeg"
361
+
362
+ def analyze_page(self, image: Image.Image, page_type: str) -> Dict:
363
+ """
364
+ Analyze a page image using Claude's vision capabilities.
365
+
366
+ Args:
367
+ image: PIL Image of the page
368
+ page_type: Type of page ('title_page', 'toc_page', 'content_page')
369
+
370
+ Returns:
371
+ Analysis results as dictionary
372
+ """
373
+ # Use fallback analysis if Claude is not available
374
+ if not self.client:
375
+ return self._fallback_analysis(image, page_type)
376
+
377
+ try:
378
+ # Convert image to base64 (with compression if needed)
379
+ image_b64, media_type = self.image_to_base64(image)
380
+
381
+ # Get appropriate prompt
382
+ prompt = self.validation_prompts.get(page_type, self.validation_prompts['content_page'])
383
+
384
+ if self.rendering_instructions:
385
+ prompt += (
386
+ "\n\n## Document Type Rendering Instructions\n"
387
+ "The following are the rendering instructions for this document type. "
388
+ "Evaluate whether the page conforms to these specifications and flag "
389
+ "any deviations as issues:\n\n"
390
+ + self.rendering_instructions
391
+ )
392
+
393
+ # Analyze with Claude
394
+ response = self.client.messages.create(
395
+ model=self.model,
396
+ max_tokens=2500,
397
+ messages=[
398
+ {
399
+ "role": "user",
400
+ "content": [
401
+ {
402
+ "type": "image",
403
+ "source": {
404
+ "type": "base64",
405
+ "media_type": media_type,
406
+ "data": image_b64
407
+ }
408
+ },
409
+ {
410
+ "type": "text",
411
+ "text": prompt
412
+ }
413
+ ]
414
+ }
415
+ ]
416
+ )
417
+
418
+ # Parse JSON response
419
+ import json
420
+ import re
421
+ response_text = response.content[0].text
422
+
423
+ # Extract JSON from response (handle cases where there's extra text)
424
+ json_start = response_text.find('{')
425
+ json_end = response_text.rfind('}') + 1
426
+ if json_start == -1 or json_end == 0:
427
+ print("❌ No JSON found in response")
428
+ return self._fallback_analysis(image, page_type)
429
+
430
+ json_content = response_text[json_start:json_end]
431
+
432
+ # Aggressive sanitization - encode the string to ASCII, ignoring errors
433
+ # Then decode back, which removes any problematic characters
434
+ json_content = json_content.encode('ascii', errors='ignore').decode('ascii')
435
+
436
+ # Also remove any remaining control characters (0x00-0x1F except \n \r \t)
437
+ json_content = re.sub(r'[\x00-\x08\x0b\x0c\x0e-\x1f]', '', json_content)
438
+
439
+ # Try to parse with strict=False to be more lenient
440
+ try:
441
+ analysis_result = json.loads(json_content, strict=False)
442
+ except json.JSONDecodeError as e:
443
+ # Last resort: try to extract just the key values we need
444
+ try:
445
+ # Extract scores using regex
446
+ overall_match = re.search(r'"overall_score"\s*:\s*([\d.]+)', json_content)
447
+ overall_score = float(overall_match.group(1)) if overall_match else 7.0
448
+
449
+ # Extract issues
450
+ issues_match = re.search(r'"issues_found"\s*:\s*\[(.*?)\]', json_content, re.DOTALL)
451
+ issues = []
452
+ if issues_match:
453
+ issues = re.findall(r'"([^"]+)"', issues_match.group(1))
454
+
455
+ analysis_result = {
456
+ "scores": {},
457
+ "overall_score": overall_score,
458
+ "issues_found": issues[:3],
459
+ "strengths_found": [],
460
+ "detailed_feedback": "Partial parse from malformed JSON"
461
+ }
462
+ except Exception:
463
+ print(f"❌ JSON parse failed completely: {e}")
464
+ return self._fallback_analysis(image, page_type)
465
+
466
+ return analysis_result
467
+
468
+ except Exception as e:
469
+ print(f"❌ Error analyzing page with Claude: {e}")
470
+ return self._fallback_analysis(image, page_type)
471
+
472
+ def _fallback_analysis(self, image: Image.Image, page_type: str) -> Dict:
473
+ """Provide basic fallback analysis when Claude is not available."""
474
+ width, height = image.size
475
+
476
+ # Basic heuristic analysis
477
+ if page_type == 'title_page':
478
+ # For title page, assume it's decent if image is reasonable size
479
+ score = 7.5 if width > 1000 and height > 1000 else 6.0
480
+ issues = ["Visual analysis limited - install Anthropic API for detailed analysis"]
481
+ strengths = ["Page successfully rendered"] if width > 1000 else []
482
+ scores = {
483
+ "title_visibility": score,
484
+ "author_information": score,
485
+ "date_information": score,
486
+ "layout_quality": score,
487
+ "typography": score
488
+ }
489
+ elif page_type == 'toc_page':
490
+ score = 7.0 if width > 1000 and height > 1000 else 6.0
491
+ issues = ["Visual analysis limited - install Anthropic API for detailed analysis"]
492
+ strengths = ["Page successfully rendered"] if width > 1000 else []
493
+ scores = {
494
+ "header_presence": score,
495
+ "content_listing": score,
496
+ "page_numbers": score,
497
+ "hierarchy": score,
498
+ "formatting": score
499
+ }
500
+ else: # content_page
501
+ score = 7.0 if width > 1000 and height > 1000 else 6.0
502
+ issues = ["Visual analysis limited - install Anthropic API for detailed analysis"]
503
+ strengths = ["Page successfully rendered"] if width > 1000 else []
504
+ scores = {
505
+ "headers_footers": score,
506
+ "page_numbers": score,
507
+ "text_layout": score,
508
+ "typography": score,
509
+ "content_elements": score
510
+ }
511
+
512
+ return {
513
+ "scores": scores,
514
+ "overall_score": score,
515
+ "issues_found": issues,
516
+ "strengths_found": strengths,
517
+ "detailed_feedback": f"Basic visual check completed for {page_type}. Page dimensions: {width}x{height}. For detailed analysis, configure Anthropic API key."
518
+ }
519
+
520
+
521
+ class VisualQAAgent:
522
+ """Main Visual QA Agent that orchestrates the entire process."""
523
+
524
+ def __init__(self, api_key: Optional[str] = None, content_source: str = ""):
525
+ """
526
+ Initialize Visual QA Agent.
527
+
528
+ Args:
529
+ api_key: Anthropic API key (optional, will use environment variable)
530
+ content_source: Content type identifier (e.g. 'research_report') used
531
+ to load rendering instructions from content_types/{id}/type.md
532
+ """
533
+ self.pdf_converter = PDFToImageConverter()
534
+ self.validator = VisualValidator()
535
+
536
+ rendering_instructions = ""
537
+ if content_source:
538
+ try:
539
+ from tools.content_type_loader import ContentTypeLoader
540
+ loader = ContentTypeLoader()
541
+ type_def = loader.load_type(content_source)
542
+ rendering_instructions = type_def.type_md_content
543
+ except Exception as e:
544
+ print(f"⚠️ Could not load content type '{content_source}': {e}")
545
+
546
+ self.llm_analyzer = MultimodalLLMAnalyzer(api_key, rendering_instructions=rendering_instructions)
547
+
548
+ # Create output directory for images
549
+ self.output_dir = Path("artifacts/reviewed_content/v3_visual_qa")
550
+ self.output_dir.mkdir(parents=True, exist_ok=True)
551
+
552
+ def validate_pdf_visual_quality(self, pdf_path: str) -> DocumentVisualQA:
553
+ """
554
+ Perform complete visual quality assessment of a PDF.
555
+
556
+ Args:
557
+ pdf_path: Path to PDF file
558
+
559
+ Returns:
560
+ Complete visual QA results
561
+ """
562
+ print(f"🔍 Starting Visual QA for: {pdf_path}")
563
+ print("=" * 60)
564
+
565
+ # Convert PDF to images
566
+ images = self.pdf_converter.convert_pdf_to_images(pdf_path)
567
+ if not images:
568
+ return self._create_error_result(pdf_path, "Failed to convert PDF to images")
569
+
570
+ # Save images for reference
571
+ image_paths = self.pdf_converter.save_images(
572
+ images,
573
+ str(self.output_dir / "page_images"),
574
+ "page"
575
+ )
576
+
577
+ # Analyze each page
578
+ page_results = []
579
+ total_score = 0
580
+
581
+ for i, image in enumerate(images, 1):
582
+ print(f"\n📄 Analyzing page {i}/{len(images)}...")
583
+
584
+ # Detect page type
585
+ page_type = self.validator.detect_page_type(i, len(images))
586
+ print(f" Detected page type: {page_type}")
587
+
588
+ # Basic validation
589
+ basic_checks = self.validator.validate_basic_structure(image, page_type)
590
+
591
+ # LLM analysis
592
+ llm_analysis = self.llm_analyzer.analyze_page(image, page_type)
593
+
594
+ # Combine results
595
+ page_result = VisualValidationResult(
596
+ page_number=i,
597
+ page_type=page_type,
598
+ overall_score=llm_analysis.get('overall_score', 0),
599
+ issues_found=llm_analysis.get('issues_found', []),
600
+ strengths_found=llm_analysis.get('strengths_found', []),
601
+ detailed_feedback=llm_analysis.get('detailed_feedback', ''),
602
+ element_scores=llm_analysis.get('scores', {})
603
+ )
604
+
605
+ page_results.append(page_result)
606
+ total_score += page_result.overall_score
607
+
608
+ print(f" Score: {page_result.overall_score:.1f}/10")
609
+ if page_result.issues_found:
610
+ print(f" Issues: {len(page_result.issues_found)} found")
611
+
612
+ # Calculate overall score
613
+ overall_score = (total_score / len(images)) * 10 if images else 0 # Convert to 0-100 scale
614
+
615
+ # Generate summary and recommendations
616
+ summary, recommendations = self._generate_summary(page_results, overall_score)
617
+
618
+ # Create final result
619
+ result = DocumentVisualQA(
620
+ pdf_path=pdf_path,
621
+ total_pages=len(images),
622
+ overall_score=overall_score,
623
+ page_results=page_results,
624
+ summary=summary,
625
+ recommendations=recommendations,
626
+ timestamp=datetime.now().isoformat()
627
+ )
628
+
629
+ print("\n" + "=" * 60)
630
+ print(f"🎯 Visual QA Complete!")
631
+ print(f" Overall Score: {overall_score:.1f}/100")
632
+ print(f" Pages Analyzed: {len(images)}")
633
+ print(f" Issues Found: {sum(len(p.issues_found) for p in page_results)}")
634
+
635
+ return result
636
+
637
+ def _create_error_result(self, pdf_path: str, error_message: str) -> DocumentVisualQA:
638
+ """Create error result when analysis fails."""
639
+ return DocumentVisualQA(
640
+ pdf_path=pdf_path,
641
+ total_pages=0,
642
+ overall_score=0,
643
+ page_results=[],
644
+ summary=f"Visual QA failed: {error_message}",
645
+ recommendations=["Fix PDF conversion issues and retry"],
646
+ timestamp=datetime.now().isoformat()
647
+ )
648
+
649
+ def _generate_summary(self, page_results: List[VisualValidationResult], overall_score: float) -> Tuple[str, List[str]]:
650
+ """Generate summary and recommendations based on page results."""
651
+ total_issues = sum(len(p.issues_found) for p in page_results)
652
+ total_strengths = sum(len(p.strengths_found) for p in page_results)
653
+
654
+ # Generate summary
655
+ if overall_score >= 85:
656
+ quality_level = "Excellent"
657
+ elif overall_score >= 75:
658
+ quality_level = "Good"
659
+ elif overall_score >= 60:
660
+ quality_level = "Acceptable"
661
+ else:
662
+ quality_level = "Needs Improvement"
663
+
664
+ summary = f"""Visual Quality Assessment: {quality_level} ({overall_score:.1f}/100)
665
+
666
+ Analyzed {len(page_results)} pages with {total_issues} issues identified and {total_strengths} strengths noted.
667
+ Pages include: {', '.join(set(p.page_type.replace('_', ' ').title() for p in page_results))}"""
668
+
669
+ # Generate recommendations
670
+ recommendations = []
671
+
672
+ # Collect common issues
673
+ all_issues = []
674
+ for page in page_results:
675
+ all_issues.extend(page.issues_found)
676
+
677
+ # Group similar issues
678
+ if any('title' in issue.lower() for issue in all_issues):
679
+ recommendations.append("Review title page formatting and ensure all elements are visible")
680
+
681
+ if any('table of contents' in issue.lower() or 'toc' in issue.lower() for issue in all_issues):
682
+ recommendations.append("Fix table of contents formatting and alignment issues")
683
+
684
+ if any('header' in issue.lower() or 'footer' in issue.lower() for issue in all_issues):
685
+ recommendations.append("Ensure consistent headers and footers across all pages")
686
+
687
+ if any('spacing' in issue.lower() or 'margin' in issue.lower() for issue in all_issues):
688
+ recommendations.append("Adjust spacing and margins for better visual consistency")
689
+
690
+ if any('font' in issue.lower() or 'typography' in issue.lower() for issue in all_issues):
691
+ recommendations.append("Review typography choices for consistency and readability")
692
+
693
+ if not recommendations:
694
+ if overall_score >= 85:
695
+ recommendations.append("Document visual quality is excellent - ready for publication")
696
+ else:
697
+ recommendations.append("Review identified issues and consider LaTeX template improvements")
698
+
699
+ return summary, recommendations
700
+
701
+ def save_report(self, result: DocumentVisualQA, output_path: Optional[str] = None) -> str:
702
+ """Save visual QA report to file."""
703
+ if output_path is None:
704
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
705
+ output_path = str(self.output_dir / f"visual_qa_report_{timestamp}.md")
706
+
707
+ # Generate markdown report
708
+ report_content = f"""# Visual Quality Assessment Report
709
+
710
+ **Document:** {result.pdf_path}
711
+ **Generated:** {result.timestamp}
712
+ **Overall Score:** {result.overall_score:.1f}/100
713
+
714
+ ## Summary
715
+
716
+ {result.summary}
717
+
718
+ ## Page-by-Page Analysis
719
+
720
+ """
721
+
722
+ for page in result.page_results:
723
+ report_content += f"""### Page {page.page_number} ({page.page_type.replace('_', ' ').title()})
724
+
725
+ **Score:** {page.overall_score:.1f}/10
726
+
727
+ **Element Scores:**
728
+ """
729
+ for element, score in page.element_scores.items():
730
+ report_content += f"- {element.replace('_', ' ').title()}: {score}/10\n"
731
+
732
+ if page.issues_found:
733
+ report_content += f"\n**Issues Found:**\n"
734
+ for issue in page.issues_found:
735
+ report_content += f"- {issue}\n"
736
+
737
+ if page.strengths_found:
738
+ report_content += f"\n**Strengths:**\n"
739
+ for strength in page.strengths_found:
740
+ report_content += f"- {strength}\n"
741
+
742
+ report_content += f"\n**Detailed Feedback:**\n{page.detailed_feedback}\n\n"
743
+
744
+ report_content += f"""## Recommendations
745
+
746
+ """
747
+ for rec in result.recommendations:
748
+ report_content += f"- {rec}\n"
749
+
750
+ report_content += f"""
751
+ ## Next Steps
752
+
753
+ {'✅ Document ready for publication' if result.overall_score >= 85 else '⚠️ Address identified issues before final publication'}
754
+
755
+ **Generated by DeepAgents PrintShop Visual QA System**
756
+ """
757
+
758
+ # Save report
759
+ with open(output_path, 'w', encoding='utf-8') as f:
760
+ f.write(report_content)
761
+
762
+ print(f"📄 Visual QA report saved: {output_path}")
763
+ return output_path
764
+
765
+
766
+ def main():
767
+ """Test the Visual QA system."""
768
+ # Test with current research report
769
+ pdf_path = "artifacts/output/research_report.pdf"
770
+
771
+ if not os.path.exists(pdf_path):
772
+ print(f"❌ PDF not found: {pdf_path}")
773
+ return
774
+
775
+ print("🔍 Testing Visual QA System")
776
+ print("=" * 50)
777
+
778
+ # Initialize Visual QA Agent
779
+ try:
780
+ agent = VisualQAAgent()
781
+
782
+ # Run visual quality assessment
783
+ result = agent.validate_pdf_visual_quality(pdf_path)
784
+
785
+ # Save report
786
+ report_path = agent.save_report(result)
787
+
788
+ print(f"\n✅ Visual QA Complete!")
789
+ print(f"📊 Overall Score: {result.overall_score:.1f}/100")
790
+ print(f"📄 Report: {report_path}")
791
+
792
+ except Exception as e:
793
+ print(f"❌ Error: {e}")
794
+ import traceback
795
+ traceback.print_exc()
796
+
797
+
798
+ if __name__ == "__main__":
799
+ main()