deepagents-printshop 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. agents/content_editor/__init__.py +1 -0
  2. agents/content_editor/agent.py +279 -0
  3. agents/content_editor/content_reviewer.py +327 -0
  4. agents/content_editor/versioned_agent.py +455 -0
  5. agents/latex_specialist/__init__.py +1 -0
  6. agents/latex_specialist/agent.py +531 -0
  7. agents/latex_specialist/latex_analyzer.py +510 -0
  8. agents/latex_specialist/latex_optimizer.py +1192 -0
  9. agents/qa_orchestrator/__init__.py +1 -0
  10. agents/qa_orchestrator/agent.py +603 -0
  11. agents/qa_orchestrator/langgraph_workflow.py +733 -0
  12. agents/qa_orchestrator/pipeline_types.py +72 -0
  13. agents/qa_orchestrator/quality_gates.py +495 -0
  14. agents/qa_orchestrator/workflow_coordinator.py +139 -0
  15. agents/research_agent/__init__.py +1 -0
  16. agents/research_agent/agent.py +258 -0
  17. agents/research_agent/llm_report_generator.py +1023 -0
  18. agents/research_agent/report_generator.py +536 -0
  19. agents/visual_qa/__init__.py +1 -0
  20. agents/visual_qa/agent.py +410 -0
  21. deepagents_printshop-0.1.0.dist-info/METADATA +744 -0
  22. deepagents_printshop-0.1.0.dist-info/RECORD +37 -0
  23. deepagents_printshop-0.1.0.dist-info/WHEEL +4 -0
  24. deepagents_printshop-0.1.0.dist-info/entry_points.txt +2 -0
  25. deepagents_printshop-0.1.0.dist-info/licenses/LICENSE +86 -0
  26. tools/__init__.py +1 -0
  27. tools/change_tracker.py +419 -0
  28. tools/content_type_loader.py +171 -0
  29. tools/graph_generator.py +281 -0
  30. tools/latex_generator.py +374 -0
  31. tools/llm_latex_generator.py +678 -0
  32. tools/magazine_layout.py +462 -0
  33. tools/pattern_injector.py +250 -0
  34. tools/pattern_learner.py +477 -0
  35. tools/pdf_compiler.py +386 -0
  36. tools/version_manager.py +346 -0
  37. tools/visual_qa.py +799 -0
@@ -0,0 +1,1023 @@
1
+ """
2
+ LLM-Enhanced Research Report Generator - Milestone 3
3
+
4
+ Uses Claude with pattern learning to generate intelligent LaTeX documents.
5
+ Applies learned patterns from historical document generation.
6
+ """
7
+
8
+ import os
9
+ import sys
10
+ from pathlib import Path
11
+ from typing import Dict, List, Optional
12
+ import csv
13
+
14
+ # Add tools to path
15
+ sys.path.insert(0, str(Path(__file__).parent.parent.parent))
16
+
17
+ from tools.llm_latex_generator import (
18
+ LLMLaTeXGenerator, LaTeXGenerationRequest, LaTeXGenerationResult
19
+ )
20
+ from tools.pattern_injector import PatternInjector
21
+ from tools.pdf_compiler import PDFCompiler
22
+ from tools.magazine_layout import MagazineLayoutGenerator, get_magazine_preamble
23
+ from tools.content_type_loader import ContentTypeLoader
24
+
25
+
26
+ class LLMResearchReportGenerator:
27
+ """
28
+ LLM-powered LaTeX report generator with pattern learning integration.
29
+
30
+ Features:
31
+ - Uses Claude to generate intelligent LaTeX
32
+ - Applies learned patterns from historical documents
33
+ - Self-correcting LaTeX generation
34
+ - Context-aware optimization
35
+ - Supports multiple content sources (research_report, magazine)
36
+ """
37
+
38
+ def __init__(self, output_dir: str = "artifacts/output", document_type: str = "research_report",
39
+ content_source: str = None):
40
+ """
41
+ Initialize the LLM report generator.
42
+
43
+ Args:
44
+ output_dir: Directory to save generated files
45
+ document_type: Type of document (e.g., 'research_report', 'article', 'magazine')
46
+ content_source: Source content folder (e.g., 'research_report', 'magazine').
47
+ If None, defaults to document_type.
48
+ """
49
+ self.output_dir = Path(output_dir)
50
+ self.output_dir.mkdir(parents=True, exist_ok=True)
51
+ self.artifacts_dir = Path("artifacts")
52
+ self.document_type = document_type
53
+
54
+ # Content source determines which sample_content subdirectory to use
55
+ self.content_source = content_source or document_type
56
+ self.content_dir = self.artifacts_dir / "sample_content" / self.content_source
57
+ self.data_dir = self.content_dir / "data"
58
+ self.images_dir = self.content_dir / "images"
59
+
60
+ # Load configuration from config.md if available
61
+ self.config = self._load_config()
62
+
63
+ # Initialize LLM generator and pattern injector
64
+ self.llm_generator = LLMLaTeXGenerator()
65
+ self.pattern_injector = PatternInjector(document_type=document_type)
66
+ self.pdf_compiler = PDFCompiler()
67
+
68
+ def _load_config(self) -> Dict:
69
+ """Load document configuration from config.md.
70
+
71
+ Uses ContentTypeLoader to resolve the content type definition.
72
+ Parses remaining config sections (metadata, manifest, options) from config.md.
73
+ """
74
+ config = {
75
+ "title": "Research Report",
76
+ "subtitle": "",
77
+ "author": "Research Team",
78
+ "date": "",
79
+ "document_type": self.document_type,
80
+ "sections": [],
81
+ "style": {},
82
+ "options": {}
83
+ }
84
+
85
+ config_path = self.content_dir / "config.md"
86
+ if not config_path.exists():
87
+ return config
88
+
89
+ with open(config_path, 'r', encoding='utf-8') as f:
90
+ content = f.read()
91
+
92
+ # Parse key-value pairs from config.md
93
+ current_section = None
94
+ disclaimer_lines = []
95
+ in_disclaimer = False
96
+ rendering_notes_lines = []
97
+ in_rendering_notes = False
98
+ content_type_id = None
99
+
100
+ for line in content.split('\n'):
101
+ line_stripped = line.strip()
102
+
103
+ # Track sections
104
+ if line_stripped.startswith('## '):
105
+ current_section = line_stripped[3:].strip().lower()
106
+ in_disclaimer = (current_section == 'disclaimer')
107
+ in_rendering_notes = (current_section == 'rendering notes')
108
+ continue
109
+
110
+ # Capture content type
111
+ if current_section == 'content type' and line_stripped and not line_stripped.startswith('---'):
112
+ content_type_id = line_stripped
113
+ continue
114
+
115
+ # Capture disclaimer content (multi-line)
116
+ if in_disclaimer and line_stripped and not line_stripped.startswith('---'):
117
+ disclaimer_lines.append(line_stripped)
118
+ continue
119
+
120
+ # Capture rendering notes (multi-line)
121
+ if in_rendering_notes and line_stripped:
122
+ rendering_notes_lines.append(line_stripped)
123
+ continue
124
+
125
+ line = line_stripped # Use stripped version for rest of parsing
126
+
127
+ # Parse key-value pairs
128
+ if line.startswith('- ') and ':' in line:
129
+ key_value = line[2:].split(':', 1)
130
+ if len(key_value) == 2:
131
+ # Strip bold markers (**) from key
132
+ key = key_value[0].strip().strip('*').lower().replace(' ', '_')
133
+ value = key_value[1].strip()
134
+
135
+ # Map to config
136
+ if key == 'title':
137
+ config['title'] = value
138
+ elif key == 'subtitle':
139
+ config['subtitle'] = value
140
+ elif key == 'author' or key == 'publisher':
141
+ config['author'] = value
142
+ elif key == 'date':
143
+ config['date'] = value
144
+ elif key == 'issue':
145
+ config['issue'] = value
146
+ elif key == 'price':
147
+ config['price'] = value
148
+ elif key == 'barcode_text':
149
+ config['barcode_text'] = value
150
+ elif current_section == 'document options':
151
+ config['options'][key] = value
152
+ elif current_section == 'headers and footers':
153
+ config['style'][key] = value
154
+
155
+ # Parse numbered section list (supports both "Sections" and "Content Manifest")
156
+ if current_section in ('sections', 'content manifest') and line and line[0].isdigit():
157
+ # e.g., "1. Editor's Letter (introduction.md)"
158
+ if '(' in line and ')' in line:
159
+ start = line.index('(') + 1
160
+ end = line.index(')')
161
+ filename = line[start:end]
162
+ title_part = line.split('.', 1)[1] if '.' in line else line
163
+ title = title_part.split('(')[0].strip()
164
+ config['sections'].append({'file': filename, 'title': title})
165
+
166
+ # Store disclaimer if found
167
+ if disclaimer_lines:
168
+ config['disclaimer'] = ' '.join(disclaimer_lines)
169
+
170
+ # Store rendering notes
171
+ if rendering_notes_lines:
172
+ config['rendering_notes'] = '\n'.join(rendering_notes_lines)
173
+
174
+ # Load content type definition
175
+ type_id = content_type_id or self.content_source
176
+ loader = ContentTypeLoader()
177
+ content_type = loader.load_type(type_id)
178
+ config['_content_type'] = content_type
179
+
180
+ return config
181
+
182
+ def load_markdown_content(self, filename: str) -> str:
183
+ """Load markdown content from the sample_content directory."""
184
+ file_path = self.content_dir / filename
185
+ if file_path.exists():
186
+ with open(file_path, 'r', encoding='utf-8') as f:
187
+ return f.read()
188
+ return ""
189
+
190
+ def load_all_markdown_sections(self) -> List[Dict]:
191
+ """
192
+ Load all markdown content files and organize into sections.
193
+ Uses sections from config.md if available, otherwise auto-discovers .md files.
194
+
195
+ Returns:
196
+ List of section dictionaries with title and content
197
+ """
198
+ sections = []
199
+
200
+ # Use sections from config if available
201
+ if self.config.get('sections'):
202
+ markdown_files = [(s['file'], s['title']) for s in self.config['sections']]
203
+ else:
204
+ # Auto-discover .md files (excluding config.md and README.md)
205
+ exclude_files = {'config.md', 'readme.md'}
206
+ markdown_files = []
207
+ if self.content_dir.exists():
208
+ for md_file in sorted(self.content_dir.glob('*.md')):
209
+ if md_file.name.lower() not in exclude_files:
210
+ # Generate title from filename
211
+ title = md_file.stem.replace('_', ' ').replace('-', ' ').title()
212
+ markdown_files.append((md_file.name, title))
213
+
214
+ for filename, title in markdown_files:
215
+ content = self.load_markdown_content(filename)
216
+ if content:
217
+ sections.append({
218
+ "title": title,
219
+ "content": content,
220
+ "type": "markdown"
221
+ })
222
+
223
+ return sections
224
+
225
+ def load_csv_tables(self) -> List[Dict]:
226
+ """
227
+ Load CSV data files as table specifications.
228
+
229
+ Returns:
230
+ List of table dictionaries
231
+ """
232
+ tables = []
233
+
234
+ # Model performance table
235
+ csv_file = self.data_dir / "model_performance.csv"
236
+ if csv_file.exists():
237
+ with open(csv_file, 'r') as f:
238
+ reader = csv.reader(f)
239
+ rows = list(reader)
240
+ if rows:
241
+ tables.append({
242
+ "caption": "Model Performance Comparison",
243
+ "data": rows,
244
+ "format": "booktabs"
245
+ })
246
+
247
+ # Training metrics table
248
+ csv_file2 = self.data_dir / "training_metrics.csv"
249
+ if csv_file2.exists():
250
+ with open(csv_file2, 'r') as f:
251
+ reader = csv.reader(f)
252
+ rows = list(reader)
253
+ if rows and len(rows) > 1:
254
+ # Only first 5 data rows for conciseness
255
+ limited_rows = [rows[0]] + rows[1:6]
256
+ tables.append({
257
+ "caption": "Training Progression (First 5 Epochs)",
258
+ "data": limited_rows,
259
+ "format": "booktabs"
260
+ })
261
+
262
+ return tables
263
+
264
+ def load_figures(self) -> List[Dict]:
265
+ """
266
+ Discover figure files in images directory and load placement guidance from README.
267
+
268
+ Returns:
269
+ List of figure dictionaries with descriptions and placement guidance
270
+ """
271
+ figures = []
272
+
273
+ if not self.images_dir.exists():
274
+ return figures
275
+
276
+ # Load image descriptions from README.md if it exists
277
+ image_guidance = {}
278
+ readme_path = self.images_dir / "README.md"
279
+ if readme_path.exists():
280
+ with open(readme_path, 'r', encoding='utf-8') as f:
281
+ readme_content = f.read()
282
+ image_guidance = self._parse_image_readme(readme_content)
283
+
284
+ # Look for common image extensions
285
+ for ext in ['*.png', '*.jpg', '*.jpeg', '*.pdf']:
286
+ for img_path in self.images_dir.glob(ext):
287
+ filename = img_path.name
288
+
289
+ # Get guidance from README if available
290
+ guidance = image_guidance.get(filename, {})
291
+
292
+ # Calculate path relative to output directory (pdflatex runs from artifacts/output/)
293
+ relative_path = f"../sample_content/{self.content_source}/images/" + filename
294
+
295
+ figures.append({
296
+ "path": relative_path,
297
+ "caption": guidance.get("caption", img_path.stem.replace('_', ' ').replace('-', ' ').title()),
298
+ "width": guidance.get("width", "0.8\\textwidth"),
299
+ "description": guidance.get("description", ""),
300
+ "placement": guidance.get("placement", "")
301
+ })
302
+
303
+ return figures
304
+
305
+ def _fix_common_latex_issues(self, latex_content: str) -> str:
306
+ """
307
+ Fix common LaTeX issues that the LLM often generates.
308
+
309
+ These are syntactic issues that prevent compilation.
310
+ """
311
+ import re
312
+ fixes_applied = []
313
+
314
+ # Fix invalid TikZ options
315
+ # "letter spacing=X" is not a valid TikZ option, remove it
316
+ if 'letter spacing=' in latex_content:
317
+ latex_content = re.sub(r',?\s*letter spacing=[^,\]]+', '', latex_content)
318
+ fixes_applied.append("Removed invalid 'letter spacing' TikZ option")
319
+
320
+ # Fix other common invalid TikZ options
321
+ invalid_tikz_opts = ['word spacing=', 'tracking=', 'stretch=']
322
+ for opt in invalid_tikz_opts:
323
+ if opt in latex_content:
324
+ latex_content = re.sub(rf',?\s*{re.escape(opt)}[^,\]]+', '', latex_content)
325
+ fixes_applied.append(f"Removed invalid '{opt[:-1]}' TikZ option")
326
+
327
+ # Replace placeholder figures with actual images
328
+ latex_content = self._replace_placeholder_figures(latex_content)
329
+
330
+ if fixes_applied:
331
+ print(f"šŸ”§ Fixed LaTeX issues: {', '.join(fixes_applied)}")
332
+
333
+ return latex_content
334
+
335
+ def _replace_placeholder_figures(self, latex_content: str) -> str:
336
+ """
337
+ Replace LLM-generated placeholder figures with actual images.
338
+
339
+ The LLM sometimes generates text placeholders like:
340
+ \\fbox{\\parbox{...}{[Chart Name]}}
341
+
342
+ This replaces them with actual \\includegraphics commands.
343
+ """
344
+ # Get available images
345
+ figures = self.load_figures()
346
+ if not figures:
347
+ return latex_content
348
+
349
+ # Get content images (exclude cover, barcode)
350
+ content_images = []
351
+ for fig in figures:
352
+ fig_path = fig.get('path', '')
353
+ filename = fig_path.split('/')[-1].lower() if fig_path else ''
354
+ if not any(skip in filename for skip in ['cover', 'barcode']):
355
+ content_images.append(fig)
356
+
357
+ if not content_images:
358
+ return latex_content
359
+
360
+ # Find and replace placeholder patterns using string operations
361
+ # Look for \fbox{\parbox patterns that contain bracketed placeholder text
362
+ lines = latex_content.split('\n')
363
+ new_lines = []
364
+ replacements = 0
365
+ image_idx = 0
366
+
367
+ for line in lines:
368
+ # Check if this line contains a placeholder figure
369
+ if '\\fbox{\\parbox' in line and '[' in line and ']' in line:
370
+ # This looks like a placeholder - check for common indicators
371
+ line_lower = line.lower()
372
+ is_placeholder = any(indicator in line_lower for indicator in [
373
+ 'placeholder', 'would be displayed', 'image here',
374
+ 'chart]', 'graph]', 'figure]', 'comparison]'
375
+ ])
376
+
377
+ if is_placeholder and image_idx < len(content_images):
378
+ # Replace with actual image
379
+ img = content_images[image_idx]
380
+ image_idx += 1
381
+ new_line = f"\\includegraphics[width=0.8\\textwidth]{{{img['path']}}}"
382
+ new_lines.append(new_line)
383
+ replacements += 1
384
+ continue
385
+
386
+ new_lines.append(line)
387
+
388
+ if replacements > 0:
389
+ print(f"šŸ”§ Replaced {replacements} placeholder figure(s) with actual images")
390
+ return '\n'.join(new_lines)
391
+
392
+ return latex_content
393
+
394
+ def _fix_image_paths(self, latex_content: str) -> str:
395
+ """
396
+ Fix image paths in LaTeX content.
397
+
398
+ The LLM often generates incorrect relative paths like:
399
+ - sample_content/magazine/images/image.jpg
400
+ - artifacts/sample_content/magazine/images/image.jpg
401
+ - images/image.jpg
402
+ - example-image (placeholder)
403
+
404
+ The correct path (relative to artifacts/output/) is:
405
+ - ../sample_content/{content_source}/images/image.jpg
406
+ """
407
+ import re
408
+
409
+ correct_prefix = f"../sample_content/{self.content_source}/images/"
410
+
411
+ # Get list of actual image files to use for replacements
412
+ actual_images = []
413
+ if self.images_dir.exists():
414
+ for ext in ['*.png', '*.jpg', '*.jpeg']:
415
+ actual_images.extend([f.name for f in self.images_dir.glob(ext)])
416
+
417
+ # Separate special images from content images
418
+ cover_image = None
419
+ barcode_image = None
420
+ content_images = []
421
+ for img in actual_images:
422
+ if 'cover' in img.lower():
423
+ cover_image = img
424
+ elif 'barcode' in img.lower():
425
+ barcode_image = img
426
+ else:
427
+ content_images.append(img)
428
+
429
+ # Track which content images have been used
430
+ image_index = [0] # Use list to allow mutation in nested function
431
+
432
+ def fix_path(match):
433
+ full_match = match.group(0)
434
+ path = match.group(1)
435
+
436
+ # Extract just the filename from any path
437
+ filename = path.split('/')[-1]
438
+
439
+ # Check if this is a placeholder image
440
+ is_placeholder = filename.startswith('example-image') or filename == 'placeholder'
441
+
442
+ # Check for special images by context
443
+ if 'paperwidth' in full_match or 'paperheight' in full_match:
444
+ # This is likely a cover/background image
445
+ if cover_image:
446
+ return full_match.replace(path, correct_prefix + cover_image)
447
+
448
+ if is_placeholder and content_images:
449
+ # Replace placeholder with actual content image
450
+ actual_file = content_images[image_index[0] % len(content_images)]
451
+ image_index[0] += 1
452
+ return full_match.replace(path, correct_prefix + actual_file)
453
+
454
+ # Skip if already has correct prefix with a real filename
455
+ if path.startswith(correct_prefix) and not is_placeholder:
456
+ return full_match
457
+
458
+ # Reconstruct with correct prefix
459
+ new_path = correct_prefix + filename
460
+ return full_match.replace(path, new_path)
461
+
462
+ # Match \includegraphics[...]{path} or \includegraphics{path}
463
+ pattern = r'\\includegraphics(?:\[[^\]]*\])?\{([^}]+)\}'
464
+ new_content, count = re.subn(pattern, fix_path, latex_content)
465
+
466
+ if count > 0:
467
+ # Count how many were actually changed
468
+ original_paths = re.findall(pattern, latex_content)
469
+ new_paths = re.findall(pattern, new_content)
470
+ changes = sum(1 for o, n in zip(original_paths, new_paths) if o != n)
471
+ if changes > 0:
472
+ print(f"šŸ”§ Fixed {changes} image path(s)")
473
+
474
+ return new_content
475
+
476
+ def _ensure_printshop_attribution(self, latex_content: str, figures: list) -> str:
477
+ """
478
+ Ensure the document has PrintShop attribution and barcode (if available).
479
+
480
+ Reads barcode text from config if available.
481
+ """
482
+ # Check if PrintShop attribution already exists (the specific footer text, not disclaimer)
483
+ if 'Generated by DeepAgents PrintShop' in latex_content:
484
+ return latex_content
485
+
486
+ # Find barcode image path if available
487
+ barcode_path = None
488
+ for fig in figures:
489
+ # Extract filename from path to check for barcode
490
+ fig_path = fig.get('path', '')
491
+ filename = fig_path.split('/')[-1].lower() if fig_path else ''
492
+ if 'barcode' in filename:
493
+ barcode_path = fig_path
494
+ break
495
+
496
+ # Get barcode text from config (e.g., "ISSUE 01 | $9.99 US")
497
+ barcode_text = self.config.get('barcode_text', '')
498
+
499
+ print(f"šŸ“„ Adding PrintShop attribution...")
500
+
501
+ # Minimal attribution block - just the tool credit and barcode
502
+ attribution_code = "\n% PrintShop Attribution\n"
503
+
504
+ if barcode_path:
505
+ attribution_code += f"""\\vfill
506
+ \\begin{{center}}
507
+ \\includegraphics[width=1in]{{{barcode_path}}}
508
+ """
509
+ if barcode_text:
510
+ # Escape $ signs for LaTeX
511
+ barcode_text_escaped = barcode_text.replace('$', '\\$')
512
+ attribution_code += f"""
513
+ \\vspace{{0.3em}}
514
+ {{\\tiny {barcode_text_escaped}}}
515
+ """
516
+ attribution_code += f"""
517
+ \\vspace{{1em}}
518
+ {{\\footnotesize\\itshape Generated by DeepAgents PrintShop}}
519
+ \\end{{center}}
520
+ """
521
+ else:
522
+ attribution_code += """\\vfill
523
+ \\begin{center}
524
+ {\\footnotesize\\itshape Generated by DeepAgents PrintShop}
525
+ \\end{center}
526
+ """
527
+
528
+ # Insert before \end{document}
529
+ end_doc_pos = latex_content.find('\\end{document}')
530
+ if end_doc_pos != -1:
531
+ latex_content = latex_content[:end_doc_pos] + attribution_code + latex_content[end_doc_pos:]
532
+
533
+ return latex_content
534
+
535
+ def _inject_missing_figures(self, latex_content: str) -> str:
536
+ """
537
+ Post-process LaTeX to inject missing figures if the LLM didn't include them.
538
+
539
+ This is a safety net for when the LLM generation doesn't include images.
540
+ Note: PrintShop attribution is handled separately in generate_and_compile().
541
+ """
542
+ # Check if figures are already included (uncommented)
543
+ # Look for \includegraphics that's NOT on a line starting with %
544
+ has_uncommented_figures = False
545
+ for line in latex_content.split('\n'):
546
+ stripped = line.strip()
547
+ if '\\includegraphics' in stripped and not stripped.startswith('%'):
548
+ has_uncommented_figures = True
549
+ break
550
+
551
+ if has_uncommented_figures:
552
+ return latex_content # Figures already present
553
+
554
+ figures = self.load_figures()
555
+ if not figures:
556
+ return latex_content # No figures to inject
557
+
558
+ print(f"šŸ–¼ļø Injecting {len(figures)} missing figures into LaTeX...")
559
+
560
+ # For magazine content, handle special images
561
+ if self.content_source == 'magazine':
562
+ # Find the cover image and barcode
563
+ cover_image = None
564
+ barcode_image = None
565
+ other_images = []
566
+
567
+ for fig in figures:
568
+ # Extract filename from path
569
+ fig_path = fig.get('path', '')
570
+ filename = fig_path.split('/')[-1].lower() if fig_path else ''
571
+ if 'cover' in filename:
572
+ cover_image = fig
573
+ elif 'barcode' in filename:
574
+ barcode_image = fig
575
+ else:
576
+ other_images.append(fig)
577
+
578
+ # Inject cover image as background on first page
579
+ if cover_image:
580
+ cover_code = f"""
581
+ % Cover page background
582
+ \\AddToShipoutPictureBG*{{%
583
+ \\AtPageUpperLeft{{%
584
+ \\includegraphics[width=\\paperwidth,height=\\paperheight]{{{cover_image['path']}}}%
585
+ }}%
586
+ }}
587
+ """
588
+ # Insert after \begin{document}
589
+ begin_doc_pos = latex_content.find('\\begin{document}')
590
+ if begin_doc_pos != -1:
591
+ insert_pos = latex_content.find('\n', begin_doc_pos) + 1
592
+ latex_content = latex_content[:insert_pos] + cover_code + latex_content[insert_pos:]
593
+
594
+ # Inject barcode before \end{document}
595
+ if barcode_image:
596
+ barcode_code = f"""
597
+ % Back cover barcode
598
+ \\newpage
599
+ \\thispagestyle{{empty}}
600
+ \\vspace*{{\\fill}}
601
+ \\begin{{center}}
602
+ \\includegraphics[width=1in]{{{barcode_image['path']}}}
603
+
604
+ \\vspace{{0.3em}}
605
+ {{\\tiny ISSUE 01 | \\$9.99 US}}
606
+ \\end{{center}}
607
+ """
608
+ end_doc_pos = latex_content.find('\\end{document}')
609
+ if end_doc_pos != -1:
610
+ latex_content = latex_content[:end_doc_pos] + barcode_code + latex_content[end_doc_pos:]
611
+
612
+ # Inject chart images into content sections
613
+ for fig in other_images:
614
+ # Extract filename from path
615
+ fig_path = fig.get('path', '')
616
+ filename = fig_path.split('/')[-1].lower() if fig_path else ''
617
+ # Skip certain images that are decorative
618
+ if any(skip in filename for skip in ['cover', 'logo', 'icon']):
619
+ continue
620
+
621
+ # For charts and data visualizations, inject after methodology or results sections
622
+ if 'chart' in filename or 'graph' in filename or 'comparison' in filename:
623
+ figure_code = f"""
624
+ \\begin{{figure}}[H]
625
+ \\centering
626
+ \\includegraphics[width=0.9\\textwidth]{{{fig['path']}}}
627
+ \\caption{{{fig.get('caption', 'Figure')}}}
628
+ \\end{{figure}}
629
+ """
630
+ # Try to insert after a results or data section
631
+ for section_marker in ['State of AI Agents', 'methodology', 'results', 'data']:
632
+ section_pos = latex_content.lower().find(section_marker.lower())
633
+ if section_pos != -1:
634
+ # Find end of paragraph after section
635
+ next_para = latex_content.find('\\end{multicols}', section_pos)
636
+ if next_para != -1:
637
+ latex_content = latex_content[:next_para] + figure_code + latex_content[next_para:]
638
+ break
639
+ else:
640
+ # For other document types, inject figures at appropriate locations
641
+ for fig in figures:
642
+ fig_width = fig.get('width', '0.8\\\\textwidth')
643
+ figure_code = f"""
644
+ \\begin{{figure}}[H]
645
+ \\centering
646
+ \\includegraphics[width={fig_width}]{{{fig['path']}}}
647
+ \\caption{{{fig.get('caption', 'Figure')}}}
648
+ \\end{{figure}}
649
+ """
650
+ # Insert before \end{document}
651
+ end_doc_pos = latex_content.find('\\end{document}')
652
+ if end_doc_pos != -1:
653
+ latex_content = latex_content[:end_doc_pos] + figure_code + latex_content[end_doc_pos:]
654
+
655
+ return latex_content
656
+
657
+ def _parse_image_readme(self, readme_content: str) -> Dict:
658
+ """
659
+ Parse the images README.md to extract image descriptions and placement guidance.
660
+
661
+ Returns:
662
+ Dictionary mapping filename to guidance dict
663
+ """
664
+ guidance = {}
665
+ current_file = None
666
+ current_data = {}
667
+
668
+ for line in readme_content.split('\n'):
669
+ line = line.strip()
670
+
671
+ # Detect image filename (e.g., **cover-image.jpg**)
672
+ if line.startswith('**') and line.endswith('**') and ('.' in line):
673
+ # Save previous entry
674
+ if current_file:
675
+ guidance[current_file] = current_data
676
+
677
+ current_file = line.strip('*')
678
+ current_data = {}
679
+
680
+ # Parse description, placement, caption
681
+ elif current_file and line.startswith('- '):
682
+ if line.startswith('- Description:'):
683
+ current_data['description'] = line.replace('- Description:', '').strip()
684
+ elif line.startswith('- Placement:'):
685
+ current_data['placement'] = line.replace('- Placement:', '').strip()
686
+ elif line.startswith('- Caption suggestion:'):
687
+ current_data['caption'] = line.replace('- Caption suggestion:', '').strip().strip('"')
688
+ elif line.startswith('- Style:'):
689
+ # Check for width hints in style
690
+ if '40%' in line:
691
+ current_data['width'] = "0.4\\textwidth"
692
+ elif '30%' in line:
693
+ current_data['width'] = "0.3\\textwidth"
694
+
695
+ # Save last entry
696
+ if current_file:
697
+ guidance[current_file] = current_data
698
+
699
+ return guidance
700
+
701
+ def _get_magazine_requirements(self) -> List[str]:
702
+ """Get magazine-specific LaTeX requirements.
703
+
704
+ Uses the content type definition for rendering instructions and the
705
+ MagazineLayoutGenerator for the concrete preamble code.
706
+ """
707
+ requirements = []
708
+
709
+ # Get preamble from MagazineLayoutGenerator (concrete LaTeX code)
710
+ layout_gen = MagazineLayoutGenerator()
711
+ preamble = layout_gen.get_full_preamble()
712
+ preamble_requirement = f"""MAGAZINE PREAMBLE - INCLUDE THIS EXACT CODE IN YOUR DOCUMENT PREAMBLE:
713
+ ```latex
714
+ {preamble}
715
+ ```
716
+ You MUST include all these package imports and macro definitions in your document preamble."""
717
+ requirements.append(preamble_requirement)
718
+
719
+ # Get layout requirements from the generator
720
+ requirements.extend(layout_gen.get_magazine_requirements())
721
+
722
+ # Inject the content type definition as rendering context
723
+ content_type = self.config.get('_content_type')
724
+ if content_type and content_type.type_md_content:
725
+ requirements.append(
726
+ "CONTENT TYPE RENDERING INSTRUCTIONS:\n" + content_type.type_md_content
727
+ )
728
+
729
+ # Inject rendering notes from config.md (content-specific instructions)
730
+ rendering_notes = self.config.get('rendering_notes', '')
731
+ if rendering_notes:
732
+ requirements.append(
733
+ "ADDITIONAL RENDERING NOTES FROM CONTENT CONFIG:\n" + rendering_notes
734
+ )
735
+
736
+ return requirements
737
+
738
+ def _get_research_report_requirements(self) -> List[str]:
739
+ """Get research report-specific LaTeX requirements.
740
+
741
+ Uses the content type definition for rendering instructions.
742
+ """
743
+ requirements = []
744
+
745
+ # Inject the content type definition as rendering context
746
+ content_type = self.config.get('_content_type')
747
+ if content_type and content_type.type_md_content:
748
+ requirements.append(
749
+ "CONTENT TYPE RENDERING INSTRUCTIONS:\n" + content_type.type_md_content
750
+ )
751
+ else:
752
+ # Fallback if type definition not available
753
+ requirements.extend([
754
+ "Use standard academic article format",
755
+ "Include abstract if content has one",
756
+ "Use numbered sections and subsections",
757
+ "Format references properly if bibliography exists",
758
+ "Use single-column layout throughout"
759
+ ])
760
+
761
+ # Inject rendering notes from config.md
762
+ rendering_notes = self.config.get('rendering_notes', '')
763
+ if rendering_notes:
764
+ requirements.append(
765
+ "ADDITIONAL RENDERING NOTES FROM CONTENT CONFIG:\n" + rendering_notes
766
+ )
767
+
768
+ return requirements
769
+
770
+ def generate_with_patterns(self) -> LaTeXGenerationResult:
771
+ """
772
+ Generate LaTeX document using LLM with learned patterns.
773
+
774
+ Returns:
775
+ Generation result with LaTeX content
776
+ """
777
+ print("šŸš€ LLM-Enhanced LaTeX Generation")
778
+ print("=" * 60)
779
+ print(f"šŸ“ Content Source: {self.content_source}")
780
+ print(f"šŸ“„ Document Type: {self.document_type}")
781
+ print(f"šŸ“ Title: {self.config.get('title', 'Untitled')}")
782
+ print()
783
+
784
+ # Get pattern context for Author agent
785
+ pattern_context = self.pattern_injector.get_context_for_author()
786
+
787
+ if pattern_context:
788
+ print("āœ… Loaded learned patterns from historical documents")
789
+ print(self.pattern_injector.get_summary())
790
+ else:
791
+ print(f"ā„¹ļø No learned patterns available yet for '{self.document_type}'")
792
+
793
+ print()
794
+
795
+ # Load document components
796
+ sections = self.load_all_markdown_sections()
797
+ tables = self.load_csv_tables()
798
+ figures = self.load_figures()
799
+
800
+ print(f"šŸ“„ Loaded {len(sections)} content sections")
801
+ print(f"šŸ“Š Loaded {len(tables)} data tables")
802
+ print(f"šŸ–¼ļø Found {len(figures)} figures")
803
+ print()
804
+
805
+ # Build base requirements
806
+ requirements = [
807
+ "Use professional typography packages (lmodern)",
808
+ "Format tables with booktabs package",
809
+ "Include proper hyperref setup for navigation",
810
+ "Use appropriate section hierarchy",
811
+ "Add proper spacing and layout"
812
+ ]
813
+
814
+ # Add disclaimer if present in config
815
+ if self.config.get('disclaimer'):
816
+ disclaimer_text = self.config['disclaimer']
817
+ requirements.append(f"""IMPORTANT - DISCLAIMER SECTION:
818
+ Include a prominently styled disclaimer box/section at the VERY BEGINNING of the document (right after the title/maketitle).
819
+ Use a framed box or shaded environment to make it stand out.
820
+ The disclaimer text is:
821
+ "{disclaimer_text}"
822
+ This disclaimer MUST appear before any content sections.""")
823
+ print("šŸ“‹ Adding disclaimer requirement")
824
+
825
+ # Add document-type-specific requirements
826
+ if self.content_source == 'magazine' or self.document_type == 'magazine':
827
+ print("šŸ“° Adding magazine-specific styling requirements")
828
+ requirements.extend(self._get_magazine_requirements())
829
+ else:
830
+ requirements.extend(self._get_research_report_requirements())
831
+
832
+ # Add pattern-based requirements
833
+ if pattern_context:
834
+ requirements.append(
835
+ "IMPORTANT: Apply the following learned patterns from historical documents:\n" +
836
+ pattern_context
837
+ )
838
+
839
+ # Build title from config
840
+ title = self.config.get('title', 'Document')
841
+ if self.config.get('subtitle'):
842
+ title = f"{title}: {self.config['subtitle']}"
843
+
844
+ # Create generation request
845
+ request = LaTeXGenerationRequest(
846
+ title=title,
847
+ author=self.config.get('author', 'Author'),
848
+ content_sections=sections,
849
+ tables=tables,
850
+ figures=figures,
851
+ requirements=requirements
852
+ )
853
+
854
+ # Generate using LLM
855
+ print("šŸ¤– Generating LaTeX with Claude Sonnet 4.5...")
856
+ result = self.llm_generator.generate_document(request, validate=True)
857
+
858
+ if result.success:
859
+ print(f"āœ… Generation successful!")
860
+ if result.improvements_made:
861
+ print(f"šŸ’” Applied {len(result.improvements_made)} improvements:")
862
+ for improvement in result.improvements_made[:5]:
863
+ print(f" • {improvement}")
864
+ if result.warnings:
865
+ print(f"āš ļø {len(result.warnings)} warnings:")
866
+ for warning in result.warnings[:3]:
867
+ print(f" • {warning}")
868
+ else:
869
+ print(f"āŒ Generation failed: {result.error_message}")
870
+
871
+ return result
872
+
873
+ def generate_and_compile(self, max_llm_corrections: int = 3) -> Dict:
874
+ """
875
+ Generate LaTeX and compile to PDF with LLM self-correction loop.
876
+
877
+ Args:
878
+ max_llm_corrections: Maximum LLM self-correction attempts
879
+
880
+ Returns:
881
+ Dictionary with paths and status
882
+ """
883
+ # Generate LaTeX
884
+ result = self.generate_with_patterns()
885
+
886
+ if not result.success:
887
+ return {
888
+ "success": False,
889
+ "error": result.error_message
890
+ }
891
+
892
+ latex_content = result.latex_content
893
+ output_filename = f"{self.content_source}.tex"
894
+ tex_path = self.output_dir / output_filename
895
+
896
+ # Pre-validation: Check for truncated output FIRST (before figure injection)
897
+ # This ensures \end{document} exists for subsequent processing
898
+ if '\\end{document}' not in latex_content:
899
+ print("āš ļø Generated LaTeX appears truncated (missing \\end{document})")
900
+ print("šŸ”§ Attempting to complete the document...")
901
+ # Use specialized truncation completion (not full document regeneration)
902
+ latex_content, fixed = self.llm_generator.complete_truncated_document(latex_content)
903
+ if fixed:
904
+ print("āœ… Document completion successful")
905
+ else:
906
+ print("āš ļø Document completion failed - will try to compile anyway")
907
+
908
+ # Post-process: Inject figures if missing (AFTER self-correction so \end{document} exists)
909
+ latex_content = self._inject_missing_figures(latex_content)
910
+
911
+ # Fix image paths: LLM often generates wrong relative paths
912
+ # Correct path is ../sample_content/{content_source}/images/ (relative to artifacts/output/)
913
+ latex_content = self._fix_image_paths(latex_content)
914
+
915
+ # Fix common LaTeX issues that LLM generates
916
+ latex_content = self._fix_common_latex_issues(latex_content)
917
+
918
+ # Ensure PrintShop attribution is present (runs after figure injection)
919
+ figures = self.load_figures()
920
+ latex_content = self._ensure_printshop_attribution(latex_content, figures)
921
+
922
+ # Save LaTeX file
923
+ with open(tex_path, 'w', encoding='utf-8') as f:
924
+ f.write(latex_content)
925
+ print(f"\nšŸ’¾ Saved LaTeX to: {tex_path}")
926
+
927
+ # Compile with LLM self-correction loop
928
+ print("\nšŸ“„ Compiling to PDF...")
929
+ pdf_path = tex_path.with_suffix('.pdf')
930
+
931
+ for attempt in range(max_llm_corrections + 1):
932
+ success, message = self.pdf_compiler.compile(str(tex_path))
933
+
934
+ if success:
935
+ print(f"āœ… PDF generated: {pdf_path}")
936
+ return {
937
+ "success": True,
938
+ "tex_path": str(tex_path),
939
+ "pdf_path": str(pdf_path),
940
+ "latex_result": result,
941
+ "compilation_result": {"success": True, "message": message}
942
+ }
943
+
944
+ # Last attempt - give up
945
+ if attempt == max_llm_corrections:
946
+ print(f"āŒ PDF compilation failed after {max_llm_corrections} LLM correction attempts")
947
+ print(f"Error: {message}")
948
+ break
949
+
950
+ # Try LLM self-correction
951
+ print(f"\nšŸ¤– LLM Self-Correction Attempt {attempt + 1}/{max_llm_corrections}...")
952
+ corrected_latex, fixed, corrections = self.llm_generator.self_correct_compilation_errors(
953
+ latex_content, message, max_attempts=1
954
+ )
955
+
956
+ if fixed:
957
+ latex_content = corrected_latex
958
+ # Save corrected version
959
+ with open(tex_path, 'w', encoding='utf-8') as f:
960
+ f.write(latex_content)
961
+ print(f" āœ… Applied corrections: {corrections}")
962
+ else:
963
+ print(f" āŒ LLM could not fix the issue")
964
+ break
965
+
966
+ return {
967
+ "success": False,
968
+ "tex_path": str(tex_path),
969
+ "pdf_path": None,
970
+ "latex_result": result,
971
+ "compilation_result": {"success": False, "message": message}
972
+ }
973
+
974
+
975
+ def main():
976
+ """Demonstration of LLM-enhanced report generation."""
977
+ import argparse
978
+
979
+ parser = argparse.ArgumentParser(description='LLM-Enhanced Document Generator')
980
+ parser.add_argument(
981
+ '--content', '-c',
982
+ default='research_report',
983
+ help='Content source folder (e.g., research_report, magazine)'
984
+ )
985
+ parser.add_argument(
986
+ '--type', '-t',
987
+ default=None,
988
+ help='Document type for pattern learning (defaults to content source)'
989
+ )
990
+ args = parser.parse_args()
991
+
992
+ content_source = args.content
993
+ document_type = args.type or content_source
994
+
995
+ print("\n" + "=" * 60)
996
+ print("🧠 LLM-Enhanced Document Generator with Pattern Learning")
997
+ print("=" * 60)
998
+ print(f"šŸ“ Content source: {content_source}")
999
+ print(f"šŸ“„ Document type: {document_type}")
1000
+ print()
1001
+
1002
+ generator = LLMResearchReportGenerator(
1003
+ content_source=content_source,
1004
+ document_type=document_type
1005
+ )
1006
+ result = generator.generate_and_compile()
1007
+
1008
+ print("\n" + "=" * 60)
1009
+ if result["success"]:
1010
+ print("āœ… Document generation complete!")
1011
+ print("=" * 60)
1012
+ print(f"\nšŸ“„ LaTeX: {result['tex_path']}")
1013
+ print(f"šŸ“‘ PDF: {result['pdf_path']}")
1014
+ else:
1015
+ print("āŒ Document generation failed")
1016
+ print("=" * 60)
1017
+ if result.get("error"):
1018
+ print(f"\nError: {result['error']}")
1019
+ print()
1020
+
1021
+
1022
+ if __name__ == "__main__":
1023
+ main()