deepagents-printshop 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agents/content_editor/__init__.py +1 -0
- agents/content_editor/agent.py +279 -0
- agents/content_editor/content_reviewer.py +327 -0
- agents/content_editor/versioned_agent.py +455 -0
- agents/latex_specialist/__init__.py +1 -0
- agents/latex_specialist/agent.py +531 -0
- agents/latex_specialist/latex_analyzer.py +510 -0
- agents/latex_specialist/latex_optimizer.py +1192 -0
- agents/qa_orchestrator/__init__.py +1 -0
- agents/qa_orchestrator/agent.py +603 -0
- agents/qa_orchestrator/langgraph_workflow.py +733 -0
- agents/qa_orchestrator/pipeline_types.py +72 -0
- agents/qa_orchestrator/quality_gates.py +495 -0
- agents/qa_orchestrator/workflow_coordinator.py +139 -0
- agents/research_agent/__init__.py +1 -0
- agents/research_agent/agent.py +258 -0
- agents/research_agent/llm_report_generator.py +1023 -0
- agents/research_agent/report_generator.py +536 -0
- agents/visual_qa/__init__.py +1 -0
- agents/visual_qa/agent.py +410 -0
- deepagents_printshop-0.1.0.dist-info/METADATA +744 -0
- deepagents_printshop-0.1.0.dist-info/RECORD +37 -0
- deepagents_printshop-0.1.0.dist-info/WHEEL +4 -0
- deepagents_printshop-0.1.0.dist-info/entry_points.txt +2 -0
- deepagents_printshop-0.1.0.dist-info/licenses/LICENSE +86 -0
- tools/__init__.py +1 -0
- tools/change_tracker.py +419 -0
- tools/content_type_loader.py +171 -0
- tools/graph_generator.py +281 -0
- tools/latex_generator.py +374 -0
- tools/llm_latex_generator.py +678 -0
- tools/magazine_layout.py +462 -0
- tools/pattern_injector.py +250 -0
- tools/pattern_learner.py +477 -0
- tools/pdf_compiler.py +386 -0
- tools/version_manager.py +346 -0
- tools/visual_qa.py +799 -0
|
@@ -0,0 +1,1023 @@
|
|
|
1
|
+
"""
|
|
2
|
+
LLM-Enhanced Research Report Generator - Milestone 3
|
|
3
|
+
|
|
4
|
+
Uses Claude with pattern learning to generate intelligent LaTeX documents.
|
|
5
|
+
Applies learned patterns from historical document generation.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import os
|
|
9
|
+
import sys
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Dict, List, Optional
|
|
12
|
+
import csv
|
|
13
|
+
|
|
14
|
+
# Add tools to path
|
|
15
|
+
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
|
|
16
|
+
|
|
17
|
+
from tools.llm_latex_generator import (
|
|
18
|
+
LLMLaTeXGenerator, LaTeXGenerationRequest, LaTeXGenerationResult
|
|
19
|
+
)
|
|
20
|
+
from tools.pattern_injector import PatternInjector
|
|
21
|
+
from tools.pdf_compiler import PDFCompiler
|
|
22
|
+
from tools.magazine_layout import MagazineLayoutGenerator, get_magazine_preamble
|
|
23
|
+
from tools.content_type_loader import ContentTypeLoader
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class LLMResearchReportGenerator:
|
|
27
|
+
"""
|
|
28
|
+
LLM-powered LaTeX report generator with pattern learning integration.
|
|
29
|
+
|
|
30
|
+
Features:
|
|
31
|
+
- Uses Claude to generate intelligent LaTeX
|
|
32
|
+
- Applies learned patterns from historical documents
|
|
33
|
+
- Self-correcting LaTeX generation
|
|
34
|
+
- Context-aware optimization
|
|
35
|
+
- Supports multiple content sources (research_report, magazine)
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
def __init__(self, output_dir: str = "artifacts/output", document_type: str = "research_report",
|
|
39
|
+
content_source: str = None):
|
|
40
|
+
"""
|
|
41
|
+
Initialize the LLM report generator.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
output_dir: Directory to save generated files
|
|
45
|
+
document_type: Type of document (e.g., 'research_report', 'article', 'magazine')
|
|
46
|
+
content_source: Source content folder (e.g., 'research_report', 'magazine').
|
|
47
|
+
If None, defaults to document_type.
|
|
48
|
+
"""
|
|
49
|
+
self.output_dir = Path(output_dir)
|
|
50
|
+
self.output_dir.mkdir(parents=True, exist_ok=True)
|
|
51
|
+
self.artifacts_dir = Path("artifacts")
|
|
52
|
+
self.document_type = document_type
|
|
53
|
+
|
|
54
|
+
# Content source determines which sample_content subdirectory to use
|
|
55
|
+
self.content_source = content_source or document_type
|
|
56
|
+
self.content_dir = self.artifacts_dir / "sample_content" / self.content_source
|
|
57
|
+
self.data_dir = self.content_dir / "data"
|
|
58
|
+
self.images_dir = self.content_dir / "images"
|
|
59
|
+
|
|
60
|
+
# Load configuration from config.md if available
|
|
61
|
+
self.config = self._load_config()
|
|
62
|
+
|
|
63
|
+
# Initialize LLM generator and pattern injector
|
|
64
|
+
self.llm_generator = LLMLaTeXGenerator()
|
|
65
|
+
self.pattern_injector = PatternInjector(document_type=document_type)
|
|
66
|
+
self.pdf_compiler = PDFCompiler()
|
|
67
|
+
|
|
68
|
+
def _load_config(self) -> Dict:
|
|
69
|
+
"""Load document configuration from config.md.
|
|
70
|
+
|
|
71
|
+
Uses ContentTypeLoader to resolve the content type definition.
|
|
72
|
+
Parses remaining config sections (metadata, manifest, options) from config.md.
|
|
73
|
+
"""
|
|
74
|
+
config = {
|
|
75
|
+
"title": "Research Report",
|
|
76
|
+
"subtitle": "",
|
|
77
|
+
"author": "Research Team",
|
|
78
|
+
"date": "",
|
|
79
|
+
"document_type": self.document_type,
|
|
80
|
+
"sections": [],
|
|
81
|
+
"style": {},
|
|
82
|
+
"options": {}
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
config_path = self.content_dir / "config.md"
|
|
86
|
+
if not config_path.exists():
|
|
87
|
+
return config
|
|
88
|
+
|
|
89
|
+
with open(config_path, 'r', encoding='utf-8') as f:
|
|
90
|
+
content = f.read()
|
|
91
|
+
|
|
92
|
+
# Parse key-value pairs from config.md
|
|
93
|
+
current_section = None
|
|
94
|
+
disclaimer_lines = []
|
|
95
|
+
in_disclaimer = False
|
|
96
|
+
rendering_notes_lines = []
|
|
97
|
+
in_rendering_notes = False
|
|
98
|
+
content_type_id = None
|
|
99
|
+
|
|
100
|
+
for line in content.split('\n'):
|
|
101
|
+
line_stripped = line.strip()
|
|
102
|
+
|
|
103
|
+
# Track sections
|
|
104
|
+
if line_stripped.startswith('## '):
|
|
105
|
+
current_section = line_stripped[3:].strip().lower()
|
|
106
|
+
in_disclaimer = (current_section == 'disclaimer')
|
|
107
|
+
in_rendering_notes = (current_section == 'rendering notes')
|
|
108
|
+
continue
|
|
109
|
+
|
|
110
|
+
# Capture content type
|
|
111
|
+
if current_section == 'content type' and line_stripped and not line_stripped.startswith('---'):
|
|
112
|
+
content_type_id = line_stripped
|
|
113
|
+
continue
|
|
114
|
+
|
|
115
|
+
# Capture disclaimer content (multi-line)
|
|
116
|
+
if in_disclaimer and line_stripped and not line_stripped.startswith('---'):
|
|
117
|
+
disclaimer_lines.append(line_stripped)
|
|
118
|
+
continue
|
|
119
|
+
|
|
120
|
+
# Capture rendering notes (multi-line)
|
|
121
|
+
if in_rendering_notes and line_stripped:
|
|
122
|
+
rendering_notes_lines.append(line_stripped)
|
|
123
|
+
continue
|
|
124
|
+
|
|
125
|
+
line = line_stripped # Use stripped version for rest of parsing
|
|
126
|
+
|
|
127
|
+
# Parse key-value pairs
|
|
128
|
+
if line.startswith('- ') and ':' in line:
|
|
129
|
+
key_value = line[2:].split(':', 1)
|
|
130
|
+
if len(key_value) == 2:
|
|
131
|
+
# Strip bold markers (**) from key
|
|
132
|
+
key = key_value[0].strip().strip('*').lower().replace(' ', '_')
|
|
133
|
+
value = key_value[1].strip()
|
|
134
|
+
|
|
135
|
+
# Map to config
|
|
136
|
+
if key == 'title':
|
|
137
|
+
config['title'] = value
|
|
138
|
+
elif key == 'subtitle':
|
|
139
|
+
config['subtitle'] = value
|
|
140
|
+
elif key == 'author' or key == 'publisher':
|
|
141
|
+
config['author'] = value
|
|
142
|
+
elif key == 'date':
|
|
143
|
+
config['date'] = value
|
|
144
|
+
elif key == 'issue':
|
|
145
|
+
config['issue'] = value
|
|
146
|
+
elif key == 'price':
|
|
147
|
+
config['price'] = value
|
|
148
|
+
elif key == 'barcode_text':
|
|
149
|
+
config['barcode_text'] = value
|
|
150
|
+
elif current_section == 'document options':
|
|
151
|
+
config['options'][key] = value
|
|
152
|
+
elif current_section == 'headers and footers':
|
|
153
|
+
config['style'][key] = value
|
|
154
|
+
|
|
155
|
+
# Parse numbered section list (supports both "Sections" and "Content Manifest")
|
|
156
|
+
if current_section in ('sections', 'content manifest') and line and line[0].isdigit():
|
|
157
|
+
# e.g., "1. Editor's Letter (introduction.md)"
|
|
158
|
+
if '(' in line and ')' in line:
|
|
159
|
+
start = line.index('(') + 1
|
|
160
|
+
end = line.index(')')
|
|
161
|
+
filename = line[start:end]
|
|
162
|
+
title_part = line.split('.', 1)[1] if '.' in line else line
|
|
163
|
+
title = title_part.split('(')[0].strip()
|
|
164
|
+
config['sections'].append({'file': filename, 'title': title})
|
|
165
|
+
|
|
166
|
+
# Store disclaimer if found
|
|
167
|
+
if disclaimer_lines:
|
|
168
|
+
config['disclaimer'] = ' '.join(disclaimer_lines)
|
|
169
|
+
|
|
170
|
+
# Store rendering notes
|
|
171
|
+
if rendering_notes_lines:
|
|
172
|
+
config['rendering_notes'] = '\n'.join(rendering_notes_lines)
|
|
173
|
+
|
|
174
|
+
# Load content type definition
|
|
175
|
+
type_id = content_type_id or self.content_source
|
|
176
|
+
loader = ContentTypeLoader()
|
|
177
|
+
content_type = loader.load_type(type_id)
|
|
178
|
+
config['_content_type'] = content_type
|
|
179
|
+
|
|
180
|
+
return config
|
|
181
|
+
|
|
182
|
+
def load_markdown_content(self, filename: str) -> str:
|
|
183
|
+
"""Load markdown content from the sample_content directory."""
|
|
184
|
+
file_path = self.content_dir / filename
|
|
185
|
+
if file_path.exists():
|
|
186
|
+
with open(file_path, 'r', encoding='utf-8') as f:
|
|
187
|
+
return f.read()
|
|
188
|
+
return ""
|
|
189
|
+
|
|
190
|
+
def load_all_markdown_sections(self) -> List[Dict]:
|
|
191
|
+
"""
|
|
192
|
+
Load all markdown content files and organize into sections.
|
|
193
|
+
Uses sections from config.md if available, otherwise auto-discovers .md files.
|
|
194
|
+
|
|
195
|
+
Returns:
|
|
196
|
+
List of section dictionaries with title and content
|
|
197
|
+
"""
|
|
198
|
+
sections = []
|
|
199
|
+
|
|
200
|
+
# Use sections from config if available
|
|
201
|
+
if self.config.get('sections'):
|
|
202
|
+
markdown_files = [(s['file'], s['title']) for s in self.config['sections']]
|
|
203
|
+
else:
|
|
204
|
+
# Auto-discover .md files (excluding config.md and README.md)
|
|
205
|
+
exclude_files = {'config.md', 'readme.md'}
|
|
206
|
+
markdown_files = []
|
|
207
|
+
if self.content_dir.exists():
|
|
208
|
+
for md_file in sorted(self.content_dir.glob('*.md')):
|
|
209
|
+
if md_file.name.lower() not in exclude_files:
|
|
210
|
+
# Generate title from filename
|
|
211
|
+
title = md_file.stem.replace('_', ' ').replace('-', ' ').title()
|
|
212
|
+
markdown_files.append((md_file.name, title))
|
|
213
|
+
|
|
214
|
+
for filename, title in markdown_files:
|
|
215
|
+
content = self.load_markdown_content(filename)
|
|
216
|
+
if content:
|
|
217
|
+
sections.append({
|
|
218
|
+
"title": title,
|
|
219
|
+
"content": content,
|
|
220
|
+
"type": "markdown"
|
|
221
|
+
})
|
|
222
|
+
|
|
223
|
+
return sections
|
|
224
|
+
|
|
225
|
+
def load_csv_tables(self) -> List[Dict]:
|
|
226
|
+
"""
|
|
227
|
+
Load CSV data files as table specifications.
|
|
228
|
+
|
|
229
|
+
Returns:
|
|
230
|
+
List of table dictionaries
|
|
231
|
+
"""
|
|
232
|
+
tables = []
|
|
233
|
+
|
|
234
|
+
# Model performance table
|
|
235
|
+
csv_file = self.data_dir / "model_performance.csv"
|
|
236
|
+
if csv_file.exists():
|
|
237
|
+
with open(csv_file, 'r') as f:
|
|
238
|
+
reader = csv.reader(f)
|
|
239
|
+
rows = list(reader)
|
|
240
|
+
if rows:
|
|
241
|
+
tables.append({
|
|
242
|
+
"caption": "Model Performance Comparison",
|
|
243
|
+
"data": rows,
|
|
244
|
+
"format": "booktabs"
|
|
245
|
+
})
|
|
246
|
+
|
|
247
|
+
# Training metrics table
|
|
248
|
+
csv_file2 = self.data_dir / "training_metrics.csv"
|
|
249
|
+
if csv_file2.exists():
|
|
250
|
+
with open(csv_file2, 'r') as f:
|
|
251
|
+
reader = csv.reader(f)
|
|
252
|
+
rows = list(reader)
|
|
253
|
+
if rows and len(rows) > 1:
|
|
254
|
+
# Only first 5 data rows for conciseness
|
|
255
|
+
limited_rows = [rows[0]] + rows[1:6]
|
|
256
|
+
tables.append({
|
|
257
|
+
"caption": "Training Progression (First 5 Epochs)",
|
|
258
|
+
"data": limited_rows,
|
|
259
|
+
"format": "booktabs"
|
|
260
|
+
})
|
|
261
|
+
|
|
262
|
+
return tables
|
|
263
|
+
|
|
264
|
+
def load_figures(self) -> List[Dict]:
|
|
265
|
+
"""
|
|
266
|
+
Discover figure files in images directory and load placement guidance from README.
|
|
267
|
+
|
|
268
|
+
Returns:
|
|
269
|
+
List of figure dictionaries with descriptions and placement guidance
|
|
270
|
+
"""
|
|
271
|
+
figures = []
|
|
272
|
+
|
|
273
|
+
if not self.images_dir.exists():
|
|
274
|
+
return figures
|
|
275
|
+
|
|
276
|
+
# Load image descriptions from README.md if it exists
|
|
277
|
+
image_guidance = {}
|
|
278
|
+
readme_path = self.images_dir / "README.md"
|
|
279
|
+
if readme_path.exists():
|
|
280
|
+
with open(readme_path, 'r', encoding='utf-8') as f:
|
|
281
|
+
readme_content = f.read()
|
|
282
|
+
image_guidance = self._parse_image_readme(readme_content)
|
|
283
|
+
|
|
284
|
+
# Look for common image extensions
|
|
285
|
+
for ext in ['*.png', '*.jpg', '*.jpeg', '*.pdf']:
|
|
286
|
+
for img_path in self.images_dir.glob(ext):
|
|
287
|
+
filename = img_path.name
|
|
288
|
+
|
|
289
|
+
# Get guidance from README if available
|
|
290
|
+
guidance = image_guidance.get(filename, {})
|
|
291
|
+
|
|
292
|
+
# Calculate path relative to output directory (pdflatex runs from artifacts/output/)
|
|
293
|
+
relative_path = f"../sample_content/{self.content_source}/images/" + filename
|
|
294
|
+
|
|
295
|
+
figures.append({
|
|
296
|
+
"path": relative_path,
|
|
297
|
+
"caption": guidance.get("caption", img_path.stem.replace('_', ' ').replace('-', ' ').title()),
|
|
298
|
+
"width": guidance.get("width", "0.8\\textwidth"),
|
|
299
|
+
"description": guidance.get("description", ""),
|
|
300
|
+
"placement": guidance.get("placement", "")
|
|
301
|
+
})
|
|
302
|
+
|
|
303
|
+
return figures
|
|
304
|
+
|
|
305
|
+
def _fix_common_latex_issues(self, latex_content: str) -> str:
|
|
306
|
+
"""
|
|
307
|
+
Fix common LaTeX issues that the LLM often generates.
|
|
308
|
+
|
|
309
|
+
These are syntactic issues that prevent compilation.
|
|
310
|
+
"""
|
|
311
|
+
import re
|
|
312
|
+
fixes_applied = []
|
|
313
|
+
|
|
314
|
+
# Fix invalid TikZ options
|
|
315
|
+
# "letter spacing=X" is not a valid TikZ option, remove it
|
|
316
|
+
if 'letter spacing=' in latex_content:
|
|
317
|
+
latex_content = re.sub(r',?\s*letter spacing=[^,\]]+', '', latex_content)
|
|
318
|
+
fixes_applied.append("Removed invalid 'letter spacing' TikZ option")
|
|
319
|
+
|
|
320
|
+
# Fix other common invalid TikZ options
|
|
321
|
+
invalid_tikz_opts = ['word spacing=', 'tracking=', 'stretch=']
|
|
322
|
+
for opt in invalid_tikz_opts:
|
|
323
|
+
if opt in latex_content:
|
|
324
|
+
latex_content = re.sub(rf',?\s*{re.escape(opt)}[^,\]]+', '', latex_content)
|
|
325
|
+
fixes_applied.append(f"Removed invalid '{opt[:-1]}' TikZ option")
|
|
326
|
+
|
|
327
|
+
# Replace placeholder figures with actual images
|
|
328
|
+
latex_content = self._replace_placeholder_figures(latex_content)
|
|
329
|
+
|
|
330
|
+
if fixes_applied:
|
|
331
|
+
print(f"š§ Fixed LaTeX issues: {', '.join(fixes_applied)}")
|
|
332
|
+
|
|
333
|
+
return latex_content
|
|
334
|
+
|
|
335
|
+
def _replace_placeholder_figures(self, latex_content: str) -> str:
|
|
336
|
+
"""
|
|
337
|
+
Replace LLM-generated placeholder figures with actual images.
|
|
338
|
+
|
|
339
|
+
The LLM sometimes generates text placeholders like:
|
|
340
|
+
\\fbox{\\parbox{...}{[Chart Name]}}
|
|
341
|
+
|
|
342
|
+
This replaces them with actual \\includegraphics commands.
|
|
343
|
+
"""
|
|
344
|
+
# Get available images
|
|
345
|
+
figures = self.load_figures()
|
|
346
|
+
if not figures:
|
|
347
|
+
return latex_content
|
|
348
|
+
|
|
349
|
+
# Get content images (exclude cover, barcode)
|
|
350
|
+
content_images = []
|
|
351
|
+
for fig in figures:
|
|
352
|
+
fig_path = fig.get('path', '')
|
|
353
|
+
filename = fig_path.split('/')[-1].lower() if fig_path else ''
|
|
354
|
+
if not any(skip in filename for skip in ['cover', 'barcode']):
|
|
355
|
+
content_images.append(fig)
|
|
356
|
+
|
|
357
|
+
if not content_images:
|
|
358
|
+
return latex_content
|
|
359
|
+
|
|
360
|
+
# Find and replace placeholder patterns using string operations
|
|
361
|
+
# Look for \fbox{\parbox patterns that contain bracketed placeholder text
|
|
362
|
+
lines = latex_content.split('\n')
|
|
363
|
+
new_lines = []
|
|
364
|
+
replacements = 0
|
|
365
|
+
image_idx = 0
|
|
366
|
+
|
|
367
|
+
for line in lines:
|
|
368
|
+
# Check if this line contains a placeholder figure
|
|
369
|
+
if '\\fbox{\\parbox' in line and '[' in line and ']' in line:
|
|
370
|
+
# This looks like a placeholder - check for common indicators
|
|
371
|
+
line_lower = line.lower()
|
|
372
|
+
is_placeholder = any(indicator in line_lower for indicator in [
|
|
373
|
+
'placeholder', 'would be displayed', 'image here',
|
|
374
|
+
'chart]', 'graph]', 'figure]', 'comparison]'
|
|
375
|
+
])
|
|
376
|
+
|
|
377
|
+
if is_placeholder and image_idx < len(content_images):
|
|
378
|
+
# Replace with actual image
|
|
379
|
+
img = content_images[image_idx]
|
|
380
|
+
image_idx += 1
|
|
381
|
+
new_line = f"\\includegraphics[width=0.8\\textwidth]{{{img['path']}}}"
|
|
382
|
+
new_lines.append(new_line)
|
|
383
|
+
replacements += 1
|
|
384
|
+
continue
|
|
385
|
+
|
|
386
|
+
new_lines.append(line)
|
|
387
|
+
|
|
388
|
+
if replacements > 0:
|
|
389
|
+
print(f"š§ Replaced {replacements} placeholder figure(s) with actual images")
|
|
390
|
+
return '\n'.join(new_lines)
|
|
391
|
+
|
|
392
|
+
return latex_content
|
|
393
|
+
|
|
394
|
+
def _fix_image_paths(self, latex_content: str) -> str:
|
|
395
|
+
"""
|
|
396
|
+
Fix image paths in LaTeX content.
|
|
397
|
+
|
|
398
|
+
The LLM often generates incorrect relative paths like:
|
|
399
|
+
- sample_content/magazine/images/image.jpg
|
|
400
|
+
- artifacts/sample_content/magazine/images/image.jpg
|
|
401
|
+
- images/image.jpg
|
|
402
|
+
- example-image (placeholder)
|
|
403
|
+
|
|
404
|
+
The correct path (relative to artifacts/output/) is:
|
|
405
|
+
- ../sample_content/{content_source}/images/image.jpg
|
|
406
|
+
"""
|
|
407
|
+
import re
|
|
408
|
+
|
|
409
|
+
correct_prefix = f"../sample_content/{self.content_source}/images/"
|
|
410
|
+
|
|
411
|
+
# Get list of actual image files to use for replacements
|
|
412
|
+
actual_images = []
|
|
413
|
+
if self.images_dir.exists():
|
|
414
|
+
for ext in ['*.png', '*.jpg', '*.jpeg']:
|
|
415
|
+
actual_images.extend([f.name for f in self.images_dir.glob(ext)])
|
|
416
|
+
|
|
417
|
+
# Separate special images from content images
|
|
418
|
+
cover_image = None
|
|
419
|
+
barcode_image = None
|
|
420
|
+
content_images = []
|
|
421
|
+
for img in actual_images:
|
|
422
|
+
if 'cover' in img.lower():
|
|
423
|
+
cover_image = img
|
|
424
|
+
elif 'barcode' in img.lower():
|
|
425
|
+
barcode_image = img
|
|
426
|
+
else:
|
|
427
|
+
content_images.append(img)
|
|
428
|
+
|
|
429
|
+
# Track which content images have been used
|
|
430
|
+
image_index = [0] # Use list to allow mutation in nested function
|
|
431
|
+
|
|
432
|
+
def fix_path(match):
|
|
433
|
+
full_match = match.group(0)
|
|
434
|
+
path = match.group(1)
|
|
435
|
+
|
|
436
|
+
# Extract just the filename from any path
|
|
437
|
+
filename = path.split('/')[-1]
|
|
438
|
+
|
|
439
|
+
# Check if this is a placeholder image
|
|
440
|
+
is_placeholder = filename.startswith('example-image') or filename == 'placeholder'
|
|
441
|
+
|
|
442
|
+
# Check for special images by context
|
|
443
|
+
if 'paperwidth' in full_match or 'paperheight' in full_match:
|
|
444
|
+
# This is likely a cover/background image
|
|
445
|
+
if cover_image:
|
|
446
|
+
return full_match.replace(path, correct_prefix + cover_image)
|
|
447
|
+
|
|
448
|
+
if is_placeholder and content_images:
|
|
449
|
+
# Replace placeholder with actual content image
|
|
450
|
+
actual_file = content_images[image_index[0] % len(content_images)]
|
|
451
|
+
image_index[0] += 1
|
|
452
|
+
return full_match.replace(path, correct_prefix + actual_file)
|
|
453
|
+
|
|
454
|
+
# Skip if already has correct prefix with a real filename
|
|
455
|
+
if path.startswith(correct_prefix) and not is_placeholder:
|
|
456
|
+
return full_match
|
|
457
|
+
|
|
458
|
+
# Reconstruct with correct prefix
|
|
459
|
+
new_path = correct_prefix + filename
|
|
460
|
+
return full_match.replace(path, new_path)
|
|
461
|
+
|
|
462
|
+
# Match \includegraphics[...]{path} or \includegraphics{path}
|
|
463
|
+
pattern = r'\\includegraphics(?:\[[^\]]*\])?\{([^}]+)\}'
|
|
464
|
+
new_content, count = re.subn(pattern, fix_path, latex_content)
|
|
465
|
+
|
|
466
|
+
if count > 0:
|
|
467
|
+
# Count how many were actually changed
|
|
468
|
+
original_paths = re.findall(pattern, latex_content)
|
|
469
|
+
new_paths = re.findall(pattern, new_content)
|
|
470
|
+
changes = sum(1 for o, n in zip(original_paths, new_paths) if o != n)
|
|
471
|
+
if changes > 0:
|
|
472
|
+
print(f"š§ Fixed {changes} image path(s)")
|
|
473
|
+
|
|
474
|
+
return new_content
|
|
475
|
+
|
|
476
|
+
def _ensure_printshop_attribution(self, latex_content: str, figures: list) -> str:
|
|
477
|
+
"""
|
|
478
|
+
Ensure the document has PrintShop attribution and barcode (if available).
|
|
479
|
+
|
|
480
|
+
Reads barcode text from config if available.
|
|
481
|
+
"""
|
|
482
|
+
# Check if PrintShop attribution already exists (the specific footer text, not disclaimer)
|
|
483
|
+
if 'Generated by DeepAgents PrintShop' in latex_content:
|
|
484
|
+
return latex_content
|
|
485
|
+
|
|
486
|
+
# Find barcode image path if available
|
|
487
|
+
barcode_path = None
|
|
488
|
+
for fig in figures:
|
|
489
|
+
# Extract filename from path to check for barcode
|
|
490
|
+
fig_path = fig.get('path', '')
|
|
491
|
+
filename = fig_path.split('/')[-1].lower() if fig_path else ''
|
|
492
|
+
if 'barcode' in filename:
|
|
493
|
+
barcode_path = fig_path
|
|
494
|
+
break
|
|
495
|
+
|
|
496
|
+
# Get barcode text from config (e.g., "ISSUE 01 | $9.99 US")
|
|
497
|
+
barcode_text = self.config.get('barcode_text', '')
|
|
498
|
+
|
|
499
|
+
print(f"š Adding PrintShop attribution...")
|
|
500
|
+
|
|
501
|
+
# Minimal attribution block - just the tool credit and barcode
|
|
502
|
+
attribution_code = "\n% PrintShop Attribution\n"
|
|
503
|
+
|
|
504
|
+
if barcode_path:
|
|
505
|
+
attribution_code += f"""\\vfill
|
|
506
|
+
\\begin{{center}}
|
|
507
|
+
\\includegraphics[width=1in]{{{barcode_path}}}
|
|
508
|
+
"""
|
|
509
|
+
if barcode_text:
|
|
510
|
+
# Escape $ signs for LaTeX
|
|
511
|
+
barcode_text_escaped = barcode_text.replace('$', '\\$')
|
|
512
|
+
attribution_code += f"""
|
|
513
|
+
\\vspace{{0.3em}}
|
|
514
|
+
{{\\tiny {barcode_text_escaped}}}
|
|
515
|
+
"""
|
|
516
|
+
attribution_code += f"""
|
|
517
|
+
\\vspace{{1em}}
|
|
518
|
+
{{\\footnotesize\\itshape Generated by DeepAgents PrintShop}}
|
|
519
|
+
\\end{{center}}
|
|
520
|
+
"""
|
|
521
|
+
else:
|
|
522
|
+
attribution_code += """\\vfill
|
|
523
|
+
\\begin{center}
|
|
524
|
+
{\\footnotesize\\itshape Generated by DeepAgents PrintShop}
|
|
525
|
+
\\end{center}
|
|
526
|
+
"""
|
|
527
|
+
|
|
528
|
+
# Insert before \end{document}
|
|
529
|
+
end_doc_pos = latex_content.find('\\end{document}')
|
|
530
|
+
if end_doc_pos != -1:
|
|
531
|
+
latex_content = latex_content[:end_doc_pos] + attribution_code + latex_content[end_doc_pos:]
|
|
532
|
+
|
|
533
|
+
return latex_content
|
|
534
|
+
|
|
535
|
+
def _inject_missing_figures(self, latex_content: str) -> str:
|
|
536
|
+
"""
|
|
537
|
+
Post-process LaTeX to inject missing figures if the LLM didn't include them.
|
|
538
|
+
|
|
539
|
+
This is a safety net for when the LLM generation doesn't include images.
|
|
540
|
+
Note: PrintShop attribution is handled separately in generate_and_compile().
|
|
541
|
+
"""
|
|
542
|
+
# Check if figures are already included (uncommented)
|
|
543
|
+
# Look for \includegraphics that's NOT on a line starting with %
|
|
544
|
+
has_uncommented_figures = False
|
|
545
|
+
for line in latex_content.split('\n'):
|
|
546
|
+
stripped = line.strip()
|
|
547
|
+
if '\\includegraphics' in stripped and not stripped.startswith('%'):
|
|
548
|
+
has_uncommented_figures = True
|
|
549
|
+
break
|
|
550
|
+
|
|
551
|
+
if has_uncommented_figures:
|
|
552
|
+
return latex_content # Figures already present
|
|
553
|
+
|
|
554
|
+
figures = self.load_figures()
|
|
555
|
+
if not figures:
|
|
556
|
+
return latex_content # No figures to inject
|
|
557
|
+
|
|
558
|
+
print(f"š¼ļø Injecting {len(figures)} missing figures into LaTeX...")
|
|
559
|
+
|
|
560
|
+
# For magazine content, handle special images
|
|
561
|
+
if self.content_source == 'magazine':
|
|
562
|
+
# Find the cover image and barcode
|
|
563
|
+
cover_image = None
|
|
564
|
+
barcode_image = None
|
|
565
|
+
other_images = []
|
|
566
|
+
|
|
567
|
+
for fig in figures:
|
|
568
|
+
# Extract filename from path
|
|
569
|
+
fig_path = fig.get('path', '')
|
|
570
|
+
filename = fig_path.split('/')[-1].lower() if fig_path else ''
|
|
571
|
+
if 'cover' in filename:
|
|
572
|
+
cover_image = fig
|
|
573
|
+
elif 'barcode' in filename:
|
|
574
|
+
barcode_image = fig
|
|
575
|
+
else:
|
|
576
|
+
other_images.append(fig)
|
|
577
|
+
|
|
578
|
+
# Inject cover image as background on first page
|
|
579
|
+
if cover_image:
|
|
580
|
+
cover_code = f"""
|
|
581
|
+
% Cover page background
|
|
582
|
+
\\AddToShipoutPictureBG*{{%
|
|
583
|
+
\\AtPageUpperLeft{{%
|
|
584
|
+
\\includegraphics[width=\\paperwidth,height=\\paperheight]{{{cover_image['path']}}}%
|
|
585
|
+
}}%
|
|
586
|
+
}}
|
|
587
|
+
"""
|
|
588
|
+
# Insert after \begin{document}
|
|
589
|
+
begin_doc_pos = latex_content.find('\\begin{document}')
|
|
590
|
+
if begin_doc_pos != -1:
|
|
591
|
+
insert_pos = latex_content.find('\n', begin_doc_pos) + 1
|
|
592
|
+
latex_content = latex_content[:insert_pos] + cover_code + latex_content[insert_pos:]
|
|
593
|
+
|
|
594
|
+
# Inject barcode before \end{document}
|
|
595
|
+
if barcode_image:
|
|
596
|
+
barcode_code = f"""
|
|
597
|
+
% Back cover barcode
|
|
598
|
+
\\newpage
|
|
599
|
+
\\thispagestyle{{empty}}
|
|
600
|
+
\\vspace*{{\\fill}}
|
|
601
|
+
\\begin{{center}}
|
|
602
|
+
\\includegraphics[width=1in]{{{barcode_image['path']}}}
|
|
603
|
+
|
|
604
|
+
\\vspace{{0.3em}}
|
|
605
|
+
{{\\tiny ISSUE 01 | \\$9.99 US}}
|
|
606
|
+
\\end{{center}}
|
|
607
|
+
"""
|
|
608
|
+
end_doc_pos = latex_content.find('\\end{document}')
|
|
609
|
+
if end_doc_pos != -1:
|
|
610
|
+
latex_content = latex_content[:end_doc_pos] + barcode_code + latex_content[end_doc_pos:]
|
|
611
|
+
|
|
612
|
+
# Inject chart images into content sections
|
|
613
|
+
for fig in other_images:
|
|
614
|
+
# Extract filename from path
|
|
615
|
+
fig_path = fig.get('path', '')
|
|
616
|
+
filename = fig_path.split('/')[-1].lower() if fig_path else ''
|
|
617
|
+
# Skip certain images that are decorative
|
|
618
|
+
if any(skip in filename for skip in ['cover', 'logo', 'icon']):
|
|
619
|
+
continue
|
|
620
|
+
|
|
621
|
+
# For charts and data visualizations, inject after methodology or results sections
|
|
622
|
+
if 'chart' in filename or 'graph' in filename or 'comparison' in filename:
|
|
623
|
+
figure_code = f"""
|
|
624
|
+
\\begin{{figure}}[H]
|
|
625
|
+
\\centering
|
|
626
|
+
\\includegraphics[width=0.9\\textwidth]{{{fig['path']}}}
|
|
627
|
+
\\caption{{{fig.get('caption', 'Figure')}}}
|
|
628
|
+
\\end{{figure}}
|
|
629
|
+
"""
|
|
630
|
+
# Try to insert after a results or data section
|
|
631
|
+
for section_marker in ['State of AI Agents', 'methodology', 'results', 'data']:
|
|
632
|
+
section_pos = latex_content.lower().find(section_marker.lower())
|
|
633
|
+
if section_pos != -1:
|
|
634
|
+
# Find end of paragraph after section
|
|
635
|
+
next_para = latex_content.find('\\end{multicols}', section_pos)
|
|
636
|
+
if next_para != -1:
|
|
637
|
+
latex_content = latex_content[:next_para] + figure_code + latex_content[next_para:]
|
|
638
|
+
break
|
|
639
|
+
else:
|
|
640
|
+
# For other document types, inject figures at appropriate locations
|
|
641
|
+
for fig in figures:
|
|
642
|
+
fig_width = fig.get('width', '0.8\\\\textwidth')
|
|
643
|
+
figure_code = f"""
|
|
644
|
+
\\begin{{figure}}[H]
|
|
645
|
+
\\centering
|
|
646
|
+
\\includegraphics[width={fig_width}]{{{fig['path']}}}
|
|
647
|
+
\\caption{{{fig.get('caption', 'Figure')}}}
|
|
648
|
+
\\end{{figure}}
|
|
649
|
+
"""
|
|
650
|
+
# Insert before \end{document}
|
|
651
|
+
end_doc_pos = latex_content.find('\\end{document}')
|
|
652
|
+
if end_doc_pos != -1:
|
|
653
|
+
latex_content = latex_content[:end_doc_pos] + figure_code + latex_content[end_doc_pos:]
|
|
654
|
+
|
|
655
|
+
return latex_content
|
|
656
|
+
|
|
657
|
+
def _parse_image_readme(self, readme_content: str) -> Dict:
|
|
658
|
+
"""
|
|
659
|
+
Parse the images README.md to extract image descriptions and placement guidance.
|
|
660
|
+
|
|
661
|
+
Returns:
|
|
662
|
+
Dictionary mapping filename to guidance dict
|
|
663
|
+
"""
|
|
664
|
+
guidance = {}
|
|
665
|
+
current_file = None
|
|
666
|
+
current_data = {}
|
|
667
|
+
|
|
668
|
+
for line in readme_content.split('\n'):
|
|
669
|
+
line = line.strip()
|
|
670
|
+
|
|
671
|
+
# Detect image filename (e.g., **cover-image.jpg**)
|
|
672
|
+
if line.startswith('**') and line.endswith('**') and ('.' in line):
|
|
673
|
+
# Save previous entry
|
|
674
|
+
if current_file:
|
|
675
|
+
guidance[current_file] = current_data
|
|
676
|
+
|
|
677
|
+
current_file = line.strip('*')
|
|
678
|
+
current_data = {}
|
|
679
|
+
|
|
680
|
+
# Parse description, placement, caption
|
|
681
|
+
elif current_file and line.startswith('- '):
|
|
682
|
+
if line.startswith('- Description:'):
|
|
683
|
+
current_data['description'] = line.replace('- Description:', '').strip()
|
|
684
|
+
elif line.startswith('- Placement:'):
|
|
685
|
+
current_data['placement'] = line.replace('- Placement:', '').strip()
|
|
686
|
+
elif line.startswith('- Caption suggestion:'):
|
|
687
|
+
current_data['caption'] = line.replace('- Caption suggestion:', '').strip().strip('"')
|
|
688
|
+
elif line.startswith('- Style:'):
|
|
689
|
+
# Check for width hints in style
|
|
690
|
+
if '40%' in line:
|
|
691
|
+
current_data['width'] = "0.4\\textwidth"
|
|
692
|
+
elif '30%' in line:
|
|
693
|
+
current_data['width'] = "0.3\\textwidth"
|
|
694
|
+
|
|
695
|
+
# Save last entry
|
|
696
|
+
if current_file:
|
|
697
|
+
guidance[current_file] = current_data
|
|
698
|
+
|
|
699
|
+
return guidance
|
|
700
|
+
|
|
701
|
+
def _get_magazine_requirements(self) -> List[str]:
|
|
702
|
+
"""Get magazine-specific LaTeX requirements.
|
|
703
|
+
|
|
704
|
+
Uses the content type definition for rendering instructions and the
|
|
705
|
+
MagazineLayoutGenerator for the concrete preamble code.
|
|
706
|
+
"""
|
|
707
|
+
requirements = []
|
|
708
|
+
|
|
709
|
+
# Get preamble from MagazineLayoutGenerator (concrete LaTeX code)
|
|
710
|
+
layout_gen = MagazineLayoutGenerator()
|
|
711
|
+
preamble = layout_gen.get_full_preamble()
|
|
712
|
+
preamble_requirement = f"""MAGAZINE PREAMBLE - INCLUDE THIS EXACT CODE IN YOUR DOCUMENT PREAMBLE:
|
|
713
|
+
```latex
|
|
714
|
+
{preamble}
|
|
715
|
+
```
|
|
716
|
+
You MUST include all these package imports and macro definitions in your document preamble."""
|
|
717
|
+
requirements.append(preamble_requirement)
|
|
718
|
+
|
|
719
|
+
# Get layout requirements from the generator
|
|
720
|
+
requirements.extend(layout_gen.get_magazine_requirements())
|
|
721
|
+
|
|
722
|
+
# Inject the content type definition as rendering context
|
|
723
|
+
content_type = self.config.get('_content_type')
|
|
724
|
+
if content_type and content_type.type_md_content:
|
|
725
|
+
requirements.append(
|
|
726
|
+
"CONTENT TYPE RENDERING INSTRUCTIONS:\n" + content_type.type_md_content
|
|
727
|
+
)
|
|
728
|
+
|
|
729
|
+
# Inject rendering notes from config.md (content-specific instructions)
|
|
730
|
+
rendering_notes = self.config.get('rendering_notes', '')
|
|
731
|
+
if rendering_notes:
|
|
732
|
+
requirements.append(
|
|
733
|
+
"ADDITIONAL RENDERING NOTES FROM CONTENT CONFIG:\n" + rendering_notes
|
|
734
|
+
)
|
|
735
|
+
|
|
736
|
+
return requirements
|
|
737
|
+
|
|
738
|
+
def _get_research_report_requirements(self) -> List[str]:
|
|
739
|
+
"""Get research report-specific LaTeX requirements.
|
|
740
|
+
|
|
741
|
+
Uses the content type definition for rendering instructions.
|
|
742
|
+
"""
|
|
743
|
+
requirements = []
|
|
744
|
+
|
|
745
|
+
# Inject the content type definition as rendering context
|
|
746
|
+
content_type = self.config.get('_content_type')
|
|
747
|
+
if content_type and content_type.type_md_content:
|
|
748
|
+
requirements.append(
|
|
749
|
+
"CONTENT TYPE RENDERING INSTRUCTIONS:\n" + content_type.type_md_content
|
|
750
|
+
)
|
|
751
|
+
else:
|
|
752
|
+
# Fallback if type definition not available
|
|
753
|
+
requirements.extend([
|
|
754
|
+
"Use standard academic article format",
|
|
755
|
+
"Include abstract if content has one",
|
|
756
|
+
"Use numbered sections and subsections",
|
|
757
|
+
"Format references properly if bibliography exists",
|
|
758
|
+
"Use single-column layout throughout"
|
|
759
|
+
])
|
|
760
|
+
|
|
761
|
+
# Inject rendering notes from config.md
|
|
762
|
+
rendering_notes = self.config.get('rendering_notes', '')
|
|
763
|
+
if rendering_notes:
|
|
764
|
+
requirements.append(
|
|
765
|
+
"ADDITIONAL RENDERING NOTES FROM CONTENT CONFIG:\n" + rendering_notes
|
|
766
|
+
)
|
|
767
|
+
|
|
768
|
+
return requirements
|
|
769
|
+
|
|
770
|
+
def generate_with_patterns(self) -> LaTeXGenerationResult:
|
|
771
|
+
"""
|
|
772
|
+
Generate LaTeX document using LLM with learned patterns.
|
|
773
|
+
|
|
774
|
+
Returns:
|
|
775
|
+
Generation result with LaTeX content
|
|
776
|
+
"""
|
|
777
|
+
print("š LLM-Enhanced LaTeX Generation")
|
|
778
|
+
print("=" * 60)
|
|
779
|
+
print(f"š Content Source: {self.content_source}")
|
|
780
|
+
print(f"š Document Type: {self.document_type}")
|
|
781
|
+
print(f"š Title: {self.config.get('title', 'Untitled')}")
|
|
782
|
+
print()
|
|
783
|
+
|
|
784
|
+
# Get pattern context for Author agent
|
|
785
|
+
pattern_context = self.pattern_injector.get_context_for_author()
|
|
786
|
+
|
|
787
|
+
if pattern_context:
|
|
788
|
+
print("ā
Loaded learned patterns from historical documents")
|
|
789
|
+
print(self.pattern_injector.get_summary())
|
|
790
|
+
else:
|
|
791
|
+
print(f"ā¹ļø No learned patterns available yet for '{self.document_type}'")
|
|
792
|
+
|
|
793
|
+
print()
|
|
794
|
+
|
|
795
|
+
# Load document components
|
|
796
|
+
sections = self.load_all_markdown_sections()
|
|
797
|
+
tables = self.load_csv_tables()
|
|
798
|
+
figures = self.load_figures()
|
|
799
|
+
|
|
800
|
+
print(f"š Loaded {len(sections)} content sections")
|
|
801
|
+
print(f"š Loaded {len(tables)} data tables")
|
|
802
|
+
print(f"š¼ļø Found {len(figures)} figures")
|
|
803
|
+
print()
|
|
804
|
+
|
|
805
|
+
# Build base requirements
|
|
806
|
+
requirements = [
|
|
807
|
+
"Use professional typography packages (lmodern)",
|
|
808
|
+
"Format tables with booktabs package",
|
|
809
|
+
"Include proper hyperref setup for navigation",
|
|
810
|
+
"Use appropriate section hierarchy",
|
|
811
|
+
"Add proper spacing and layout"
|
|
812
|
+
]
|
|
813
|
+
|
|
814
|
+
# Add disclaimer if present in config
|
|
815
|
+
if self.config.get('disclaimer'):
|
|
816
|
+
disclaimer_text = self.config['disclaimer']
|
|
817
|
+
requirements.append(f"""IMPORTANT - DISCLAIMER SECTION:
|
|
818
|
+
Include a prominently styled disclaimer box/section at the VERY BEGINNING of the document (right after the title/maketitle).
|
|
819
|
+
Use a framed box or shaded environment to make it stand out.
|
|
820
|
+
The disclaimer text is:
|
|
821
|
+
"{disclaimer_text}"
|
|
822
|
+
This disclaimer MUST appear before any content sections.""")
|
|
823
|
+
print("š Adding disclaimer requirement")
|
|
824
|
+
|
|
825
|
+
# Add document-type-specific requirements
|
|
826
|
+
if self.content_source == 'magazine' or self.document_type == 'magazine':
|
|
827
|
+
print("š° Adding magazine-specific styling requirements")
|
|
828
|
+
requirements.extend(self._get_magazine_requirements())
|
|
829
|
+
else:
|
|
830
|
+
requirements.extend(self._get_research_report_requirements())
|
|
831
|
+
|
|
832
|
+
# Add pattern-based requirements
|
|
833
|
+
if pattern_context:
|
|
834
|
+
requirements.append(
|
|
835
|
+
"IMPORTANT: Apply the following learned patterns from historical documents:\n" +
|
|
836
|
+
pattern_context
|
|
837
|
+
)
|
|
838
|
+
|
|
839
|
+
# Build title from config
|
|
840
|
+
title = self.config.get('title', 'Document')
|
|
841
|
+
if self.config.get('subtitle'):
|
|
842
|
+
title = f"{title}: {self.config['subtitle']}"
|
|
843
|
+
|
|
844
|
+
# Create generation request
|
|
845
|
+
request = LaTeXGenerationRequest(
|
|
846
|
+
title=title,
|
|
847
|
+
author=self.config.get('author', 'Author'),
|
|
848
|
+
content_sections=sections,
|
|
849
|
+
tables=tables,
|
|
850
|
+
figures=figures,
|
|
851
|
+
requirements=requirements
|
|
852
|
+
)
|
|
853
|
+
|
|
854
|
+
# Generate using LLM
|
|
855
|
+
print("š¤ Generating LaTeX with Claude Sonnet 4.5...")
|
|
856
|
+
result = self.llm_generator.generate_document(request, validate=True)
|
|
857
|
+
|
|
858
|
+
if result.success:
|
|
859
|
+
print(f"ā
Generation successful!")
|
|
860
|
+
if result.improvements_made:
|
|
861
|
+
print(f"š” Applied {len(result.improvements_made)} improvements:")
|
|
862
|
+
for improvement in result.improvements_made[:5]:
|
|
863
|
+
print(f" ⢠{improvement}")
|
|
864
|
+
if result.warnings:
|
|
865
|
+
print(f"ā ļø {len(result.warnings)} warnings:")
|
|
866
|
+
for warning in result.warnings[:3]:
|
|
867
|
+
print(f" ⢠{warning}")
|
|
868
|
+
else:
|
|
869
|
+
print(f"ā Generation failed: {result.error_message}")
|
|
870
|
+
|
|
871
|
+
return result
|
|
872
|
+
|
|
873
|
+
def generate_and_compile(self, max_llm_corrections: int = 3) -> Dict:
|
|
874
|
+
"""
|
|
875
|
+
Generate LaTeX and compile to PDF with LLM self-correction loop.
|
|
876
|
+
|
|
877
|
+
Args:
|
|
878
|
+
max_llm_corrections: Maximum LLM self-correction attempts
|
|
879
|
+
|
|
880
|
+
Returns:
|
|
881
|
+
Dictionary with paths and status
|
|
882
|
+
"""
|
|
883
|
+
# Generate LaTeX
|
|
884
|
+
result = self.generate_with_patterns()
|
|
885
|
+
|
|
886
|
+
if not result.success:
|
|
887
|
+
return {
|
|
888
|
+
"success": False,
|
|
889
|
+
"error": result.error_message
|
|
890
|
+
}
|
|
891
|
+
|
|
892
|
+
latex_content = result.latex_content
|
|
893
|
+
output_filename = f"{self.content_source}.tex"
|
|
894
|
+
tex_path = self.output_dir / output_filename
|
|
895
|
+
|
|
896
|
+
# Pre-validation: Check for truncated output FIRST (before figure injection)
|
|
897
|
+
# This ensures \end{document} exists for subsequent processing
|
|
898
|
+
if '\\end{document}' not in latex_content:
|
|
899
|
+
print("ā ļø Generated LaTeX appears truncated (missing \\end{document})")
|
|
900
|
+
print("š§ Attempting to complete the document...")
|
|
901
|
+
# Use specialized truncation completion (not full document regeneration)
|
|
902
|
+
latex_content, fixed = self.llm_generator.complete_truncated_document(latex_content)
|
|
903
|
+
if fixed:
|
|
904
|
+
print("ā
Document completion successful")
|
|
905
|
+
else:
|
|
906
|
+
print("ā ļø Document completion failed - will try to compile anyway")
|
|
907
|
+
|
|
908
|
+
# Post-process: Inject figures if missing (AFTER self-correction so \end{document} exists)
|
|
909
|
+
latex_content = self._inject_missing_figures(latex_content)
|
|
910
|
+
|
|
911
|
+
# Fix image paths: LLM often generates wrong relative paths
|
|
912
|
+
# Correct path is ../sample_content/{content_source}/images/ (relative to artifacts/output/)
|
|
913
|
+
latex_content = self._fix_image_paths(latex_content)
|
|
914
|
+
|
|
915
|
+
# Fix common LaTeX issues that LLM generates
|
|
916
|
+
latex_content = self._fix_common_latex_issues(latex_content)
|
|
917
|
+
|
|
918
|
+
# Ensure PrintShop attribution is present (runs after figure injection)
|
|
919
|
+
figures = self.load_figures()
|
|
920
|
+
latex_content = self._ensure_printshop_attribution(latex_content, figures)
|
|
921
|
+
|
|
922
|
+
# Save LaTeX file
|
|
923
|
+
with open(tex_path, 'w', encoding='utf-8') as f:
|
|
924
|
+
f.write(latex_content)
|
|
925
|
+
print(f"\nš¾ Saved LaTeX to: {tex_path}")
|
|
926
|
+
|
|
927
|
+
# Compile with LLM self-correction loop
|
|
928
|
+
print("\nš Compiling to PDF...")
|
|
929
|
+
pdf_path = tex_path.with_suffix('.pdf')
|
|
930
|
+
|
|
931
|
+
for attempt in range(max_llm_corrections + 1):
|
|
932
|
+
success, message = self.pdf_compiler.compile(str(tex_path))
|
|
933
|
+
|
|
934
|
+
if success:
|
|
935
|
+
print(f"ā
PDF generated: {pdf_path}")
|
|
936
|
+
return {
|
|
937
|
+
"success": True,
|
|
938
|
+
"tex_path": str(tex_path),
|
|
939
|
+
"pdf_path": str(pdf_path),
|
|
940
|
+
"latex_result": result,
|
|
941
|
+
"compilation_result": {"success": True, "message": message}
|
|
942
|
+
}
|
|
943
|
+
|
|
944
|
+
# Last attempt - give up
|
|
945
|
+
if attempt == max_llm_corrections:
|
|
946
|
+
print(f"ā PDF compilation failed after {max_llm_corrections} LLM correction attempts")
|
|
947
|
+
print(f"Error: {message}")
|
|
948
|
+
break
|
|
949
|
+
|
|
950
|
+
# Try LLM self-correction
|
|
951
|
+
print(f"\nš¤ LLM Self-Correction Attempt {attempt + 1}/{max_llm_corrections}...")
|
|
952
|
+
corrected_latex, fixed, corrections = self.llm_generator.self_correct_compilation_errors(
|
|
953
|
+
latex_content, message, max_attempts=1
|
|
954
|
+
)
|
|
955
|
+
|
|
956
|
+
if fixed:
|
|
957
|
+
latex_content = corrected_latex
|
|
958
|
+
# Save corrected version
|
|
959
|
+
with open(tex_path, 'w', encoding='utf-8') as f:
|
|
960
|
+
f.write(latex_content)
|
|
961
|
+
print(f" ā
Applied corrections: {corrections}")
|
|
962
|
+
else:
|
|
963
|
+
print(f" ā LLM could not fix the issue")
|
|
964
|
+
break
|
|
965
|
+
|
|
966
|
+
return {
|
|
967
|
+
"success": False,
|
|
968
|
+
"tex_path": str(tex_path),
|
|
969
|
+
"pdf_path": None,
|
|
970
|
+
"latex_result": result,
|
|
971
|
+
"compilation_result": {"success": False, "message": message}
|
|
972
|
+
}
|
|
973
|
+
|
|
974
|
+
|
|
975
|
+
def main():
|
|
976
|
+
"""Demonstration of LLM-enhanced report generation."""
|
|
977
|
+
import argparse
|
|
978
|
+
|
|
979
|
+
parser = argparse.ArgumentParser(description='LLM-Enhanced Document Generator')
|
|
980
|
+
parser.add_argument(
|
|
981
|
+
'--content', '-c',
|
|
982
|
+
default='research_report',
|
|
983
|
+
help='Content source folder (e.g., research_report, magazine)'
|
|
984
|
+
)
|
|
985
|
+
parser.add_argument(
|
|
986
|
+
'--type', '-t',
|
|
987
|
+
default=None,
|
|
988
|
+
help='Document type for pattern learning (defaults to content source)'
|
|
989
|
+
)
|
|
990
|
+
args = parser.parse_args()
|
|
991
|
+
|
|
992
|
+
content_source = args.content
|
|
993
|
+
document_type = args.type or content_source
|
|
994
|
+
|
|
995
|
+
print("\n" + "=" * 60)
|
|
996
|
+
print("š§ LLM-Enhanced Document Generator with Pattern Learning")
|
|
997
|
+
print("=" * 60)
|
|
998
|
+
print(f"š Content source: {content_source}")
|
|
999
|
+
print(f"š Document type: {document_type}")
|
|
1000
|
+
print()
|
|
1001
|
+
|
|
1002
|
+
generator = LLMResearchReportGenerator(
|
|
1003
|
+
content_source=content_source,
|
|
1004
|
+
document_type=document_type
|
|
1005
|
+
)
|
|
1006
|
+
result = generator.generate_and_compile()
|
|
1007
|
+
|
|
1008
|
+
print("\n" + "=" * 60)
|
|
1009
|
+
if result["success"]:
|
|
1010
|
+
print("ā
Document generation complete!")
|
|
1011
|
+
print("=" * 60)
|
|
1012
|
+
print(f"\nš LaTeX: {result['tex_path']}")
|
|
1013
|
+
print(f"š PDF: {result['pdf_path']}")
|
|
1014
|
+
else:
|
|
1015
|
+
print("ā Document generation failed")
|
|
1016
|
+
print("=" * 60)
|
|
1017
|
+
if result.get("error"):
|
|
1018
|
+
print(f"\nError: {result['error']}")
|
|
1019
|
+
print()
|
|
1020
|
+
|
|
1021
|
+
|
|
1022
|
+
if __name__ == "__main__":
|
|
1023
|
+
main()
|