deepagents-printshop 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agents/content_editor/__init__.py +1 -0
- agents/content_editor/agent.py +279 -0
- agents/content_editor/content_reviewer.py +327 -0
- agents/content_editor/versioned_agent.py +455 -0
- agents/latex_specialist/__init__.py +1 -0
- agents/latex_specialist/agent.py +531 -0
- agents/latex_specialist/latex_analyzer.py +510 -0
- agents/latex_specialist/latex_optimizer.py +1192 -0
- agents/qa_orchestrator/__init__.py +1 -0
- agents/qa_orchestrator/agent.py +603 -0
- agents/qa_orchestrator/langgraph_workflow.py +733 -0
- agents/qa_orchestrator/pipeline_types.py +72 -0
- agents/qa_orchestrator/quality_gates.py +495 -0
- agents/qa_orchestrator/workflow_coordinator.py +139 -0
- agents/research_agent/__init__.py +1 -0
- agents/research_agent/agent.py +258 -0
- agents/research_agent/llm_report_generator.py +1023 -0
- agents/research_agent/report_generator.py +536 -0
- agents/visual_qa/__init__.py +1 -0
- agents/visual_qa/agent.py +410 -0
- deepagents_printshop-0.1.0.dist-info/METADATA +744 -0
- deepagents_printshop-0.1.0.dist-info/RECORD +37 -0
- deepagents_printshop-0.1.0.dist-info/WHEEL +4 -0
- deepagents_printshop-0.1.0.dist-info/entry_points.txt +2 -0
- deepagents_printshop-0.1.0.dist-info/licenses/LICENSE +86 -0
- tools/__init__.py +1 -0
- tools/change_tracker.py +419 -0
- tools/content_type_loader.py +171 -0
- tools/graph_generator.py +281 -0
- tools/latex_generator.py +374 -0
- tools/llm_latex_generator.py +678 -0
- tools/magazine_layout.py +462 -0
- tools/pattern_injector.py +250 -0
- tools/pattern_learner.py +477 -0
- tools/pdf_compiler.py +386 -0
- tools/version_manager.py +346 -0
- tools/visual_qa.py +799 -0
|
@@ -0,0 +1,733 @@
|
|
|
1
|
+
"""
|
|
2
|
+
LangGraph QA Pipeline Workflow
|
|
3
|
+
|
|
4
|
+
Declarative StateGraph that orchestrates the QA pipeline. Node functions invoke
|
|
5
|
+
downstream agents directly -- no WorkflowExecution object, no glue code.
|
|
6
|
+
PipelineState is the single source of truth.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import operator
|
|
10
|
+
import os
|
|
11
|
+
import sys
|
|
12
|
+
from datetime import datetime
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Annotated, Any, Dict, List, Literal, Optional, TypedDict
|
|
15
|
+
|
|
16
|
+
from dotenv import load_dotenv
|
|
17
|
+
|
|
18
|
+
load_dotenv()
|
|
19
|
+
|
|
20
|
+
from langgraph.checkpoint.memory import MemorySaver
|
|
21
|
+
from langgraph.graph import END, START, StateGraph
|
|
22
|
+
|
|
23
|
+
# Add project root to path
|
|
24
|
+
project_root = Path(__file__).parent.parent.parent
|
|
25
|
+
sys.path.insert(0, str(project_root))
|
|
26
|
+
|
|
27
|
+
from agents.qa_orchestrator.quality_gates import ( # noqa: E402, I001
|
|
28
|
+
QualityAssessment,
|
|
29
|
+
QualityGateEvaluation,
|
|
30
|
+
QualityGateResult,
|
|
31
|
+
)
|
|
32
|
+
from agents.qa_orchestrator.pipeline_types import AgentResult, AgentType # noqa: E402, I001
|
|
33
|
+
from agents.qa_orchestrator.workflow_coordinator import WorkflowCoordinator # noqa: E402, I001
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
# ---------------------------------------------------------------------------
|
|
37
|
+
# LLM-based LaTeX error fixer (second-tier, after PDFCompiler regex fixes)
|
|
38
|
+
# ---------------------------------------------------------------------------
|
|
39
|
+
|
|
40
|
+
def _llm_fix_latex(tex_content: str, error_log: str, attempt: int) -> Optional[str]:
|
|
41
|
+
"""Ask Claude to fix LaTeX compilation errors.
|
|
42
|
+
|
|
43
|
+
Preserves the original preamble by sending only the document body to the
|
|
44
|
+
LLM for fixing, then reassembling preamble + fixed body. This prevents
|
|
45
|
+
the LLM from stripping custom macros, colors, and document class options.
|
|
46
|
+
|
|
47
|
+
Returns corrected .tex content, or None on failure.
|
|
48
|
+
"""
|
|
49
|
+
api_key = os.environ.get("ANTHROPIC_API_KEY")
|
|
50
|
+
if not api_key:
|
|
51
|
+
return None
|
|
52
|
+
|
|
53
|
+
# Split into preamble and body at \begin{document}
|
|
54
|
+
split_marker = "\\begin{document}"
|
|
55
|
+
marker_pos = tex_content.find(split_marker)
|
|
56
|
+
if marker_pos == -1:
|
|
57
|
+
# No \begin{document} — send full content (legacy path)
|
|
58
|
+
original_preamble = None
|
|
59
|
+
body_to_fix = tex_content
|
|
60
|
+
else:
|
|
61
|
+
original_preamble = tex_content[:marker_pos + len(split_marker)]
|
|
62
|
+
body_to_fix = tex_content[marker_pos + len(split_marker):]
|
|
63
|
+
|
|
64
|
+
# Extract macro/environment names from preamble so the LLM knows what's available
|
|
65
|
+
available_macros = ""
|
|
66
|
+
if original_preamble:
|
|
67
|
+
import re
|
|
68
|
+
macro_names = re.findall(r'\\(?:newcommand|renewcommand)\{(\\[^}]+)\}', original_preamble)
|
|
69
|
+
env_names = re.findall(r'\\newenvironment\{([^}]+)\}', original_preamble)
|
|
70
|
+
color_names = re.findall(r'\\definecolor\{([^}]+)\}', original_preamble)
|
|
71
|
+
parts = []
|
|
72
|
+
if macro_names:
|
|
73
|
+
parts.append(f"Available macros: {', '.join(macro_names)}")
|
|
74
|
+
if env_names:
|
|
75
|
+
parts.append(f"Available environments: {', '.join(env_names)}")
|
|
76
|
+
if color_names:
|
|
77
|
+
parts.append(f"Available colors: {', '.join(color_names)}")
|
|
78
|
+
if parts:
|
|
79
|
+
available_macros = "\n".join(parts) + "\n\n"
|
|
80
|
+
|
|
81
|
+
try:
|
|
82
|
+
from anthropic import Anthropic
|
|
83
|
+
|
|
84
|
+
client = Anthropic(api_key=api_key)
|
|
85
|
+
|
|
86
|
+
if original_preamble:
|
|
87
|
+
prompt = (
|
|
88
|
+
f"The following LaTeX document BODY failed to compile (attempt {attempt}). "
|
|
89
|
+
"Fix ONLY the body errors and return ONLY the corrected body content "
|
|
90
|
+
"(everything AFTER \\begin{{document}} and BEFORE \\end{{document}}). "
|
|
91
|
+
"Do NOT include \\documentclass, preamble, \\begin{{document}}, or \\end{{document}}.\n"
|
|
92
|
+
"IMPORTANT: Preserve ALL \\includegraphics commands, \\begin{{figure}} environments, "
|
|
93
|
+
"and disclaimer sections exactly as they appear. Do not remove any images or figures.\n"
|
|
94
|
+
f"{available_macros}"
|
|
95
|
+
f"=== ERRORS ===\n{error_log[:3000]}\n\n"
|
|
96
|
+
f"=== DOCUMENT BODY ===\n{body_to_fix}"
|
|
97
|
+
)
|
|
98
|
+
else:
|
|
99
|
+
prompt = (
|
|
100
|
+
f"The following LaTeX document failed to compile (attempt {attempt}). "
|
|
101
|
+
"Fix the errors and return ONLY the corrected .tex content with no explanation.\n"
|
|
102
|
+
"IMPORTANT: Preserve ALL \\includegraphics commands, \\begin{{figure}} environments, "
|
|
103
|
+
"and disclaimer sections exactly as they appear. Do not remove any images or figures.\n\n"
|
|
104
|
+
f"=== ERRORS ===\n{error_log[:3000]}\n\n"
|
|
105
|
+
f"=== DOCUMENT ===\n{tex_content}"
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
response = client.messages.create(
|
|
109
|
+
model="claude-sonnet-4-20250514",
|
|
110
|
+
max_tokens=8000,
|
|
111
|
+
messages=[{
|
|
112
|
+
"role": "user",
|
|
113
|
+
"content": prompt,
|
|
114
|
+
}],
|
|
115
|
+
)
|
|
116
|
+
fixed = response.content[0].text.strip()
|
|
117
|
+
# Strip code fences if the LLM wrapped the output
|
|
118
|
+
import re
|
|
119
|
+
fixed = re.sub(r'^```(?:latex)?\s*\n', '', fixed)
|
|
120
|
+
fixed = re.sub(r'\n```\s*$', '', fixed)
|
|
121
|
+
|
|
122
|
+
if original_preamble:
|
|
123
|
+
# Reassemble: original preamble + fixed body + \end{document}
|
|
124
|
+
# Strip any preamble/documentclass the LLM may have included anyway
|
|
125
|
+
if "\\documentclass" in fixed:
|
|
126
|
+
doc_start = fixed.find(split_marker)
|
|
127
|
+
if doc_start != -1:
|
|
128
|
+
fixed = fixed[doc_start + len(split_marker):]
|
|
129
|
+
# Strip \end{document} if present (we'll add it)
|
|
130
|
+
fixed = re.sub(r'\\end\{document\}\s*$', '', fixed)
|
|
131
|
+
result = original_preamble + "\n" + fixed + "\n\\end{document}\n"
|
|
132
|
+
return result
|
|
133
|
+
else:
|
|
134
|
+
if fixed and "\\begin{document}" in fixed:
|
|
135
|
+
return fixed
|
|
136
|
+
return None
|
|
137
|
+
except Exception as e:
|
|
138
|
+
print(f" [LangGraph] LLM LaTeX fix attempt {attempt} failed: {e}")
|
|
139
|
+
return None
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
# ---------------------------------------------------------------------------
|
|
143
|
+
# Custom reducer for merging dicts (used by agent_context)
|
|
144
|
+
# ---------------------------------------------------------------------------
|
|
145
|
+
|
|
146
|
+
def merge_dicts(left: Dict[str, Any], right: Dict[str, Any]) -> Dict[str, Any]:
|
|
147
|
+
"""Merge two dicts, with right-side values overwriting left-side."""
|
|
148
|
+
merged = left.copy()
|
|
149
|
+
merged.update(right)
|
|
150
|
+
return merged
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
# ---------------------------------------------------------------------------
|
|
154
|
+
# Pipeline State
|
|
155
|
+
# ---------------------------------------------------------------------------
|
|
156
|
+
|
|
157
|
+
class PipelineState(TypedDict, total=False):
|
|
158
|
+
"""LangGraph state for the QA pipeline.
|
|
159
|
+
|
|
160
|
+
List fields use ``operator.add`` (append semantics).
|
|
161
|
+
``agent_context`` uses ``merge_dicts`` so each node can contribute keys.
|
|
162
|
+
Scalar fields use default replace semantics.
|
|
163
|
+
"""
|
|
164
|
+
|
|
165
|
+
# --- workflow identity ---
|
|
166
|
+
workflow_id: str
|
|
167
|
+
content_source: str
|
|
168
|
+
starting_version: str
|
|
169
|
+
|
|
170
|
+
# --- output directory (set by orchestrator) ---
|
|
171
|
+
output_dir: str
|
|
172
|
+
|
|
173
|
+
# --- mutable scalars (replace semantics) ---
|
|
174
|
+
current_version: str
|
|
175
|
+
current_stage: str
|
|
176
|
+
iterations_completed: int
|
|
177
|
+
success: bool
|
|
178
|
+
human_handoff: bool
|
|
179
|
+
escalated: bool
|
|
180
|
+
start_time: str
|
|
181
|
+
end_time: Optional[str]
|
|
182
|
+
total_processing_time: Optional[float]
|
|
183
|
+
|
|
184
|
+
# --- append-only lists ---
|
|
185
|
+
agent_results: Annotated[List[Dict[str, Any]], operator.add]
|
|
186
|
+
quality_assessments: Annotated[List[Dict[str, Any]], operator.add]
|
|
187
|
+
quality_evaluations: Annotated[List[Dict[str, Any]], operator.add]
|
|
188
|
+
|
|
189
|
+
# --- inter-agent communication ---
|
|
190
|
+
agent_context: Annotated[Dict[str, Any], merge_dicts]
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
# ---------------------------------------------------------------------------
|
|
194
|
+
# Node functions
|
|
195
|
+
# ---------------------------------------------------------------------------
|
|
196
|
+
|
|
197
|
+
def content_review_node(state: PipelineState) -> Dict[str, Any]:
|
|
198
|
+
"""Run the content editor agent directly."""
|
|
199
|
+
content_source = state.get("content_source", "research_report")
|
|
200
|
+
iteration = state.get("iterations_completed", 0)
|
|
201
|
+
iteration_suffix = f"_iter{iteration + 1}" if iteration > 0 else ""
|
|
202
|
+
target_version = f"v1_content_edited{iteration_suffix}"
|
|
203
|
+
# Always use the starting version (v0_original) as input for content editing.
|
|
204
|
+
# The starting version has all markdown files + config.md. Using
|
|
205
|
+
# current_version here would feed the LaTeX .tex output back into the
|
|
206
|
+
# content editor on iteration 2+, losing config.md and the content type.
|
|
207
|
+
input_version = state.get("starting_version", "v0_original")
|
|
208
|
+
|
|
209
|
+
start_time = datetime.now()
|
|
210
|
+
|
|
211
|
+
try:
|
|
212
|
+
from agents.content_editor.versioned_agent import VersionedContentEditorAgent
|
|
213
|
+
from tools.version_manager import VersionManager
|
|
214
|
+
|
|
215
|
+
version_manager = VersionManager()
|
|
216
|
+
existing_version = version_manager.get_version(target_version)
|
|
217
|
+
|
|
218
|
+
if existing_version:
|
|
219
|
+
quality_score = existing_version.get("metadata", {}).get("improved_avg_quality", 85)
|
|
220
|
+
processing_time = (datetime.now() - start_time).total_seconds()
|
|
221
|
+
result = AgentResult(
|
|
222
|
+
agent_type=AgentType.CONTENT_EDITOR,
|
|
223
|
+
success=True,
|
|
224
|
+
version_created=target_version,
|
|
225
|
+
quality_score=quality_score,
|
|
226
|
+
processing_time=processing_time,
|
|
227
|
+
issues_found=[],
|
|
228
|
+
optimizations_applied=["Using existing content editor version"],
|
|
229
|
+
metadata=existing_version.get("metadata", {}),
|
|
230
|
+
)
|
|
231
|
+
else:
|
|
232
|
+
agent = VersionedContentEditorAgent(content_source=content_source)
|
|
233
|
+
results = agent.process_content_with_versioning(
|
|
234
|
+
target_version=target_version,
|
|
235
|
+
parent_version=input_version,
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
processing_time = (datetime.now() - start_time).total_seconds()
|
|
239
|
+
quality_improvement = results["quality_progression"]["overall_improvement"]
|
|
240
|
+
final_quality = results["quality_progression"]["improved_avg_quality"]
|
|
241
|
+
|
|
242
|
+
result = AgentResult(
|
|
243
|
+
agent_type=AgentType.CONTENT_EDITOR,
|
|
244
|
+
success=True,
|
|
245
|
+
version_created=target_version,
|
|
246
|
+
quality_score=final_quality,
|
|
247
|
+
processing_time=processing_time,
|
|
248
|
+
issues_found=[],
|
|
249
|
+
optimizations_applied=[f"Quality improvement: +{quality_improvement} points"],
|
|
250
|
+
metadata=results,
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
except Exception as e:
|
|
254
|
+
processing_time = (datetime.now() - start_time).total_seconds()
|
|
255
|
+
print(f"Content Editor failed: {e}")
|
|
256
|
+
result = AgentResult(
|
|
257
|
+
agent_type=AgentType.CONTENT_EDITOR,
|
|
258
|
+
success=False,
|
|
259
|
+
version_created=target_version,
|
|
260
|
+
quality_score=None,
|
|
261
|
+
processing_time=processing_time,
|
|
262
|
+
issues_found=[],
|
|
263
|
+
optimizations_applied=[],
|
|
264
|
+
error_message=str(e),
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
new_version = target_version if result.success else state.get("current_version", state.get("starting_version"))
|
|
268
|
+
|
|
269
|
+
# Populate inter-agent context with actionable notes
|
|
270
|
+
context_update: Dict[str, Any] = {}
|
|
271
|
+
if result.success:
|
|
272
|
+
has_complex_tables = bool(result.metadata and result.metadata.get("has_complex_tables"))
|
|
273
|
+
readability_concerns = result.metadata.get("readability_concerns", []) if result.metadata else []
|
|
274
|
+
context_update["content_editor_notes"] = {
|
|
275
|
+
"quality_score": result.quality_score,
|
|
276
|
+
"issues_found": result.issues_found,
|
|
277
|
+
"has_complex_tables": has_complex_tables,
|
|
278
|
+
"readability_concerns": readability_concerns,
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
return {
|
|
282
|
+
"current_version": new_version,
|
|
283
|
+
"current_stage": "content_review",
|
|
284
|
+
"agent_results": [result.to_dict()],
|
|
285
|
+
"agent_context": context_update,
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
def latex_optimization_node(state: PipelineState) -> Dict[str, Any]:
|
|
290
|
+
"""Run the LaTeX specialist agent directly."""
|
|
291
|
+
content_source = state.get("content_source", "research_report")
|
|
292
|
+
iteration = state.get("iterations_completed", 0)
|
|
293
|
+
iteration_suffix = f"_iter{iteration + 1}" if iteration > 0 else ""
|
|
294
|
+
target_version = f"v2_latex_optimized{iteration_suffix}"
|
|
295
|
+
input_version = state.get("current_version", "v1_content_edited")
|
|
296
|
+
|
|
297
|
+
# Read upstream context to adjust optimization
|
|
298
|
+
agent_ctx = state.get("agent_context", {})
|
|
299
|
+
content_notes = agent_ctx.get("content_editor_notes", {})
|
|
300
|
+
|
|
301
|
+
optimization_level = "moderate"
|
|
302
|
+
if content_notes.get("has_complex_tables"):
|
|
303
|
+
optimization_level = "conservative"
|
|
304
|
+
print(" [LangGraph] LaTeX node: upstream flagged complex tables — using conservative optimization")
|
|
305
|
+
|
|
306
|
+
start_time = datetime.now()
|
|
307
|
+
|
|
308
|
+
try:
|
|
309
|
+
from agents.latex_specialist.agent import LaTeXSpecialistAgent
|
|
310
|
+
from tools.version_manager import VersionManager
|
|
311
|
+
|
|
312
|
+
version_manager = VersionManager()
|
|
313
|
+
existing_version = version_manager.get_version(target_version)
|
|
314
|
+
|
|
315
|
+
if existing_version:
|
|
316
|
+
metadata = existing_version.get("metadata", {})
|
|
317
|
+
quality_score = metadata.get("latex_quality_score", 90)
|
|
318
|
+
processing_time = (datetime.now() - start_time).total_seconds()
|
|
319
|
+
result = AgentResult(
|
|
320
|
+
agent_type=AgentType.LATEX_SPECIALIST,
|
|
321
|
+
success=True,
|
|
322
|
+
version_created=target_version,
|
|
323
|
+
quality_score=quality_score,
|
|
324
|
+
processing_time=processing_time,
|
|
325
|
+
issues_found=[],
|
|
326
|
+
optimizations_applied=["Using existing LaTeX specialist version"],
|
|
327
|
+
metadata=metadata,
|
|
328
|
+
)
|
|
329
|
+
else:
|
|
330
|
+
agent = LaTeXSpecialistAgent(content_source=content_source)
|
|
331
|
+
results = agent.process_with_versioning(
|
|
332
|
+
parent_version=input_version,
|
|
333
|
+
target_version=target_version,
|
|
334
|
+
optimization_level=optimization_level,
|
|
335
|
+
)
|
|
336
|
+
|
|
337
|
+
processing_time = (datetime.now() - start_time).total_seconds()
|
|
338
|
+
latex_analysis = results["latex_analysis"]
|
|
339
|
+
optimizations = results["optimizations_applied"]
|
|
340
|
+
|
|
341
|
+
result = AgentResult(
|
|
342
|
+
agent_type=AgentType.LATEX_SPECIALIST,
|
|
343
|
+
success=True,
|
|
344
|
+
version_created=target_version,
|
|
345
|
+
quality_score=latex_analysis["overall_score"],
|
|
346
|
+
processing_time=processing_time,
|
|
347
|
+
issues_found=[f"Found {latex_analysis['issues_found']} LaTeX issues"],
|
|
348
|
+
optimizations_applied=optimizations,
|
|
349
|
+
metadata=results,
|
|
350
|
+
)
|
|
351
|
+
|
|
352
|
+
except Exception as e:
|
|
353
|
+
processing_time = (datetime.now() - start_time).total_seconds()
|
|
354
|
+
print(f"LaTeX Specialist failed: {e}")
|
|
355
|
+
result = AgentResult(
|
|
356
|
+
agent_type=AgentType.LATEX_SPECIALIST,
|
|
357
|
+
success=False,
|
|
358
|
+
version_created=target_version,
|
|
359
|
+
quality_score=None,
|
|
360
|
+
processing_time=processing_time,
|
|
361
|
+
issues_found=[],
|
|
362
|
+
optimizations_applied=[],
|
|
363
|
+
error_message=str(e),
|
|
364
|
+
)
|
|
365
|
+
|
|
366
|
+
new_version = target_version if result.success else state.get("current_version")
|
|
367
|
+
|
|
368
|
+
# Compile .tex to PDF so Visual QA can analyze it
|
|
369
|
+
compilation_success = False
|
|
370
|
+
compilation_error: Optional[str] = None
|
|
371
|
+
|
|
372
|
+
if result.success:
|
|
373
|
+
try:
|
|
374
|
+
from tools.version_manager import VersionManager as _VM
|
|
375
|
+
from tools.pdf_compiler import PDFCompiler
|
|
376
|
+
|
|
377
|
+
vm = _VM()
|
|
378
|
+
version_content = vm.get_version_content(target_version)
|
|
379
|
+
output_dir = Path(state.get("output_dir", "artifacts/output"))
|
|
380
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
381
|
+
|
|
382
|
+
tex_filename = f"{content_source}.tex"
|
|
383
|
+
# Find the .tex file in the version (may be named research_report.tex)
|
|
384
|
+
tex_content = None
|
|
385
|
+
for fname, content in version_content.items():
|
|
386
|
+
if fname.endswith(".tex"):
|
|
387
|
+
tex_content = content
|
|
388
|
+
break
|
|
389
|
+
|
|
390
|
+
if tex_content:
|
|
391
|
+
tex_path = output_dir / tex_filename
|
|
392
|
+
with open(tex_path, "w", encoding="utf-8") as f:
|
|
393
|
+
f.write(tex_content)
|
|
394
|
+
|
|
395
|
+
compiler = PDFCompiler(output_dir=str(output_dir))
|
|
396
|
+
success, msg = compiler.compile(str(tex_path))
|
|
397
|
+
|
|
398
|
+
if success:
|
|
399
|
+
compilation_success = True
|
|
400
|
+
print(f" [LangGraph] PDF compiled: {output_dir / content_source}.pdf")
|
|
401
|
+
else:
|
|
402
|
+
# PDFCompiler regex fixes failed — try LLM-based fixes
|
|
403
|
+
print(f" [LangGraph] PDF compilation failed, attempting LLM fix: {msg}")
|
|
404
|
+
compilation_error = msg
|
|
405
|
+
for llm_attempt in range(1, 3): # up to 2 LLM fix attempts
|
|
406
|
+
fixed_tex = _llm_fix_latex(tex_content, msg, llm_attempt)
|
|
407
|
+
if fixed_tex:
|
|
408
|
+
# Write candidate fix to a temp path so we don't destroy the original
|
|
409
|
+
candidate_path = tex_path.with_suffix(f".fix{llm_attempt}.tex")
|
|
410
|
+
with open(candidate_path, "w", encoding="utf-8") as f:
|
|
411
|
+
f.write(fixed_tex)
|
|
412
|
+
success, msg = compiler.compile(str(candidate_path))
|
|
413
|
+
if success:
|
|
414
|
+
# Fix compiled — adopt it as the canonical .tex
|
|
415
|
+
tex_content = fixed_tex
|
|
416
|
+
with open(tex_path, "w", encoding="utf-8") as f:
|
|
417
|
+
f.write(tex_content)
|
|
418
|
+
# Move compiled PDF to expected location
|
|
419
|
+
candidate_pdf = candidate_path.with_suffix(".pdf")
|
|
420
|
+
expected_pdf = tex_path.with_suffix(".pdf")
|
|
421
|
+
if candidate_pdf.exists() and candidate_pdf != expected_pdf:
|
|
422
|
+
if expected_pdf.exists():
|
|
423
|
+
expected_pdf.unlink()
|
|
424
|
+
candidate_pdf.rename(expected_pdf)
|
|
425
|
+
compilation_success = True
|
|
426
|
+
compilation_error = None
|
|
427
|
+
print(f" [LangGraph] PDF compiled after LLM fix attempt {llm_attempt}")
|
|
428
|
+
break
|
|
429
|
+
else:
|
|
430
|
+
compilation_error = msg
|
|
431
|
+
print(f" [LangGraph] LLM fix attempt {llm_attempt} did not resolve compilation: {msg}")
|
|
432
|
+
# Clean up failed candidate
|
|
433
|
+
if candidate_path.exists():
|
|
434
|
+
candidate_path.unlink()
|
|
435
|
+
else:
|
|
436
|
+
print(f" [LangGraph] LLM fix attempt {llm_attempt} returned no result")
|
|
437
|
+
break
|
|
438
|
+
else:
|
|
439
|
+
compilation_error = "No .tex file found in version content"
|
|
440
|
+
print(f" [LangGraph] {compilation_error}")
|
|
441
|
+
except Exception as e:
|
|
442
|
+
compilation_error = str(e)
|
|
443
|
+
print(f" [LangGraph] PDF compilation error: {e}")
|
|
444
|
+
|
|
445
|
+
# If compilation failed, record it as an issue so the quality gate can react
|
|
446
|
+
if result.success and not compilation_success and compilation_error:
|
|
447
|
+
truncated = compilation_error[:300]
|
|
448
|
+
result.issues_found.append(f"PDF_COMPILATION_FAILED: {truncated}")
|
|
449
|
+
|
|
450
|
+
# Write downstream context for Visual QA
|
|
451
|
+
context_update: Dict[str, Any] = {}
|
|
452
|
+
if result.success and result.metadata:
|
|
453
|
+
latex_analysis = result.metadata.get("latex_analysis", {})
|
|
454
|
+
context_update["latex_specialist_notes"] = {
|
|
455
|
+
"structure_score": latex_analysis.get("structure_score"),
|
|
456
|
+
"typography_score": latex_analysis.get("typography_score"),
|
|
457
|
+
"typography_issues": latex_analysis.get("typography_issues", []),
|
|
458
|
+
"packages_used": latex_analysis.get("packages_used", []),
|
|
459
|
+
"compilation_success": compilation_success,
|
|
460
|
+
}
|
|
461
|
+
if compilation_error:
|
|
462
|
+
context_update["compilation_errors"] = {
|
|
463
|
+
"message": compilation_error[:500],
|
|
464
|
+
"iteration": iteration,
|
|
465
|
+
}
|
|
466
|
+
|
|
467
|
+
return {
|
|
468
|
+
"current_version": new_version,
|
|
469
|
+
"current_stage": "latex_optimization",
|
|
470
|
+
"agent_results": [result.to_dict()],
|
|
471
|
+
"agent_context": context_update,
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
|
|
475
|
+
def visual_qa_node(state: PipelineState) -> Dict[str, Any]:
|
|
476
|
+
"""Run visual QA analysis on the generated PDF."""
|
|
477
|
+
content_source = state.get("content_source", "research_report")
|
|
478
|
+
current_version = state.get("current_version")
|
|
479
|
+
output_dir = state.get("output_dir", "artifacts/output")
|
|
480
|
+
pdf_path = f"{output_dir}/{content_source}.pdf"
|
|
481
|
+
|
|
482
|
+
# Read upstream context to decide iteration budget
|
|
483
|
+
agent_ctx = state.get("agent_context", {})
|
|
484
|
+
latex_notes = agent_ctx.get("latex_specialist_notes", {})
|
|
485
|
+
|
|
486
|
+
max_iterations = 2
|
|
487
|
+
typography_score = latex_notes.get("typography_score")
|
|
488
|
+
if typography_score is not None and typography_score < 20:
|
|
489
|
+
max_iterations = 3
|
|
490
|
+
print(" [LangGraph] Visual QA node: weak typography score — allowing extra iteration")
|
|
491
|
+
|
|
492
|
+
try:
|
|
493
|
+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "visual_qa"))
|
|
494
|
+
from agent import VisualQAFeedbackAgent
|
|
495
|
+
|
|
496
|
+
visual_qa_feedback = VisualQAFeedbackAgent(content_source=content_source)
|
|
497
|
+
|
|
498
|
+
if os.path.exists(pdf_path):
|
|
499
|
+
final_pdf, improvements, final_version = visual_qa_feedback.analyze_and_improve(
|
|
500
|
+
pdf_path, max_iterations=max_iterations
|
|
501
|
+
)
|
|
502
|
+
|
|
503
|
+
# Copy the final improved PDF and .tex back to the canonical output dir
|
|
504
|
+
if final_pdf and final_pdf != pdf_path and os.path.exists(final_pdf):
|
|
505
|
+
import shutil
|
|
506
|
+
shutil.copy(final_pdf, pdf_path)
|
|
507
|
+
print(f" [LangGraph] Copied final Visual QA PDF to {pdf_path}")
|
|
508
|
+
|
|
509
|
+
# Also copy the improved .tex if it exists
|
|
510
|
+
improved_tex = os.path.join(output_dir, f"{content_source}_improved.tex")
|
|
511
|
+
canonical_tex = os.path.join(output_dir, f"{content_source}.tex")
|
|
512
|
+
if os.path.exists(improved_tex):
|
|
513
|
+
shutil.copy(improved_tex, canonical_tex)
|
|
514
|
+
print(f" [LangGraph] Copied improved .tex to {canonical_tex}")
|
|
515
|
+
|
|
516
|
+
from tools.visual_qa import VisualQAAgent
|
|
517
|
+
visual_qa = VisualQAAgent(content_source=content_source)
|
|
518
|
+
qa_results = visual_qa.validate_pdf_visual_quality(final_pdf)
|
|
519
|
+
|
|
520
|
+
print(f"Visual QA Score: {qa_results.overall_score:.1f}/100")
|
|
521
|
+
if improvements:
|
|
522
|
+
print(f"Applied {len(improvements)} improvements")
|
|
523
|
+
|
|
524
|
+
all_issues = []
|
|
525
|
+
for page_result in qa_results.page_results:
|
|
526
|
+
all_issues.extend(page_result.issues_found)
|
|
527
|
+
|
|
528
|
+
new_version = final_version if final_version else current_version
|
|
529
|
+
|
|
530
|
+
result = AgentResult(
|
|
531
|
+
agent_type=AgentType.VISUAL_QA,
|
|
532
|
+
success=True,
|
|
533
|
+
version_created=new_version,
|
|
534
|
+
quality_score=qa_results.overall_score,
|
|
535
|
+
processing_time=0.0,
|
|
536
|
+
issues_found=all_issues[:5],
|
|
537
|
+
optimizations_applied=improvements,
|
|
538
|
+
)
|
|
539
|
+
else:
|
|
540
|
+
print(f"PDF not found at {pdf_path}, skipping Visual QA")
|
|
541
|
+
new_version = current_version
|
|
542
|
+
result = AgentResult(
|
|
543
|
+
agent_type=AgentType.VISUAL_QA,
|
|
544
|
+
success=True,
|
|
545
|
+
version_created=current_version,
|
|
546
|
+
quality_score=None,
|
|
547
|
+
processing_time=0.0,
|
|
548
|
+
issues_found=[],
|
|
549
|
+
optimizations_applied=[],
|
|
550
|
+
error_message="PDF not found",
|
|
551
|
+
)
|
|
552
|
+
except Exception as e:
|
|
553
|
+
print(f"Visual QA error: {e}")
|
|
554
|
+
new_version = current_version
|
|
555
|
+
result = AgentResult(
|
|
556
|
+
agent_type=AgentType.VISUAL_QA,
|
|
557
|
+
success=True,
|
|
558
|
+
version_created=current_version,
|
|
559
|
+
quality_score=None,
|
|
560
|
+
processing_time=0.0,
|
|
561
|
+
issues_found=[],
|
|
562
|
+
optimizations_applied=[],
|
|
563
|
+
error_message=str(e),
|
|
564
|
+
)
|
|
565
|
+
|
|
566
|
+
return {
|
|
567
|
+
"current_version": new_version,
|
|
568
|
+
"current_stage": "visual_qa",
|
|
569
|
+
"agent_results": [result.to_dict()],
|
|
570
|
+
}
|
|
571
|
+
|
|
572
|
+
|
|
573
|
+
def quality_assessment_node(state: PipelineState) -> Dict[str, Any]:
|
|
574
|
+
"""Assess overall workflow quality from accumulated agent results."""
|
|
575
|
+
coordinator = WorkflowCoordinator(
|
|
576
|
+
content_source=state.get("content_source", "research_report")
|
|
577
|
+
)
|
|
578
|
+
assessment = coordinator.assess_workflow_quality(state.get("agent_results", []))
|
|
579
|
+
|
|
580
|
+
overall_eval = coordinator.quality_gate_manager.evaluate_overall_quality_gate(
|
|
581
|
+
assessment=assessment,
|
|
582
|
+
iteration_count=state.get("iterations_completed", 0),
|
|
583
|
+
)
|
|
584
|
+
|
|
585
|
+
return {
|
|
586
|
+
"current_stage": "quality_assessment",
|
|
587
|
+
"quality_assessments": [assessment.__dict__],
|
|
588
|
+
"quality_evaluations": [overall_eval.__dict__],
|
|
589
|
+
}
|
|
590
|
+
|
|
591
|
+
|
|
592
|
+
def iteration_node(state: PipelineState) -> Dict[str, Any]:
|
|
593
|
+
"""Increment iteration counter and reset stage for next cycle."""
|
|
594
|
+
new_count = state.get("iterations_completed", 0) + 1
|
|
595
|
+
print(f" [LangGraph] Starting iteration {new_count}")
|
|
596
|
+
return {
|
|
597
|
+
"iterations_completed": new_count,
|
|
598
|
+
"current_stage": "iteration",
|
|
599
|
+
}
|
|
600
|
+
|
|
601
|
+
|
|
602
|
+
def completion_node(state: PipelineState) -> Dict[str, Any]:
|
|
603
|
+
"""Mark pipeline as successfully complete."""
|
|
604
|
+
return {
|
|
605
|
+
"success": True,
|
|
606
|
+
"human_handoff": True,
|
|
607
|
+
"current_stage": "completion",
|
|
608
|
+
"end_time": datetime.now().isoformat(),
|
|
609
|
+
}
|
|
610
|
+
|
|
611
|
+
|
|
612
|
+
def escalation_node(state: PipelineState) -> Dict[str, Any]:
|
|
613
|
+
"""Mark pipeline as escalated to human review."""
|
|
614
|
+
return {
|
|
615
|
+
"escalated": True,
|
|
616
|
+
"current_stage": "escalation",
|
|
617
|
+
"end_time": datetime.now().isoformat(),
|
|
618
|
+
}
|
|
619
|
+
|
|
620
|
+
|
|
621
|
+
# ---------------------------------------------------------------------------
|
|
622
|
+
# Conditional edge functions
|
|
623
|
+
# ---------------------------------------------------------------------------
|
|
624
|
+
|
|
625
|
+
def route_after_content_review(state: PipelineState) -> Literal["latex_optimization", "iteration", "escalation"]:
|
|
626
|
+
"""Decide next step after content review using quality gate."""
|
|
627
|
+
coordinator = WorkflowCoordinator(
|
|
628
|
+
content_source=state.get("content_source", "research_report")
|
|
629
|
+
)
|
|
630
|
+
assessment = coordinator.assess_workflow_quality(state.get("agent_results", []))
|
|
631
|
+
evaluation = coordinator.quality_gate_manager.evaluate_content_quality_gate(assessment)
|
|
632
|
+
|
|
633
|
+
print(f" [LangGraph] Content quality gate: {evaluation.result.value} (score={evaluation.score})")
|
|
634
|
+
|
|
635
|
+
if evaluation.result == QualityGateResult.PASS:
|
|
636
|
+
return "latex_optimization"
|
|
637
|
+
elif evaluation.result == QualityGateResult.ITERATE:
|
|
638
|
+
if state.get("iterations_completed", 0) >= coordinator.quality_gate_manager.thresholds.max_iterations:
|
|
639
|
+
return "escalation"
|
|
640
|
+
return "iteration"
|
|
641
|
+
else:
|
|
642
|
+
return "escalation"
|
|
643
|
+
|
|
644
|
+
|
|
645
|
+
def route_after_latex_optimization(state: PipelineState) -> Literal["visual_qa", "iteration", "escalation"]:
|
|
646
|
+
"""Decide next step after LaTeX optimization using quality gate."""
|
|
647
|
+
coordinator = WorkflowCoordinator(
|
|
648
|
+
content_source=state.get("content_source", "research_report")
|
|
649
|
+
)
|
|
650
|
+
assessment = coordinator.assess_workflow_quality(state.get("agent_results", []))
|
|
651
|
+
evaluation = coordinator.quality_gate_manager.evaluate_latex_quality_gate(assessment)
|
|
652
|
+
|
|
653
|
+
print(f" [LangGraph] LaTeX quality gate: {evaluation.result.value} (score={evaluation.score})")
|
|
654
|
+
|
|
655
|
+
if evaluation.result == QualityGateResult.PASS:
|
|
656
|
+
return "visual_qa"
|
|
657
|
+
elif evaluation.result == QualityGateResult.ITERATE:
|
|
658
|
+
if state.get("iterations_completed", 0) >= coordinator.quality_gate_manager.thresholds.max_iterations:
|
|
659
|
+
return "escalation"
|
|
660
|
+
return "iteration"
|
|
661
|
+
else:
|
|
662
|
+
return "escalation"
|
|
663
|
+
|
|
664
|
+
|
|
665
|
+
def route_after_quality_assessment(state: PipelineState) -> Literal["completion", "iteration", "escalation"]:
|
|
666
|
+
"""Decide final outcome after quality assessment."""
|
|
667
|
+
evaluations = state.get("quality_evaluations", [])
|
|
668
|
+
if not evaluations:
|
|
669
|
+
return "escalation"
|
|
670
|
+
|
|
671
|
+
latest = evaluations[-1]
|
|
672
|
+
result = latest.get("result", "fail")
|
|
673
|
+
|
|
674
|
+
if result == QualityGateResult.PASS.value:
|
|
675
|
+
return "completion"
|
|
676
|
+
elif result == QualityGateResult.ITERATE.value:
|
|
677
|
+
if state.get("iterations_completed", 0) >= 3:
|
|
678
|
+
return "escalation"
|
|
679
|
+
return "iteration"
|
|
680
|
+
else: # ESCALATE or FAIL
|
|
681
|
+
return "escalation"
|
|
682
|
+
|
|
683
|
+
|
|
684
|
+
# ---------------------------------------------------------------------------
|
|
685
|
+
# Graph construction
|
|
686
|
+
# ---------------------------------------------------------------------------
|
|
687
|
+
|
|
688
|
+
def build_qa_graph() -> StateGraph:
|
|
689
|
+
"""Build the QA pipeline StateGraph (uncompiled)."""
|
|
690
|
+
graph = StateGraph(PipelineState)
|
|
691
|
+
|
|
692
|
+
# Add nodes
|
|
693
|
+
graph.add_node("content_review", content_review_node)
|
|
694
|
+
graph.add_node("latex_optimization", latex_optimization_node)
|
|
695
|
+
graph.add_node("visual_qa", visual_qa_node)
|
|
696
|
+
graph.add_node("quality_assessment", quality_assessment_node)
|
|
697
|
+
graph.add_node("iteration", iteration_node)
|
|
698
|
+
graph.add_node("completion", completion_node)
|
|
699
|
+
graph.add_node("escalation", escalation_node)
|
|
700
|
+
|
|
701
|
+
# Edges
|
|
702
|
+
graph.add_edge(START, "content_review")
|
|
703
|
+
graph.add_conditional_edges("content_review", route_after_content_review)
|
|
704
|
+
graph.add_conditional_edges("latex_optimization", route_after_latex_optimization)
|
|
705
|
+
graph.add_edge("visual_qa", "quality_assessment")
|
|
706
|
+
graph.add_conditional_edges("quality_assessment", route_after_quality_assessment)
|
|
707
|
+
graph.add_edge("iteration", "content_review")
|
|
708
|
+
graph.add_edge("completion", END)
|
|
709
|
+
graph.add_edge("escalation", END)
|
|
710
|
+
|
|
711
|
+
return graph
|
|
712
|
+
|
|
713
|
+
|
|
714
|
+
def compile_qa_pipeline(checkpointer=None):
|
|
715
|
+
"""Compile and return the QA pipeline graph.
|
|
716
|
+
|
|
717
|
+
Args:
|
|
718
|
+
checkpointer: LangGraph checkpointer instance. Defaults to MemorySaver.
|
|
719
|
+
|
|
720
|
+
Returns:
|
|
721
|
+
Compiled LangGraph application.
|
|
722
|
+
"""
|
|
723
|
+
if checkpointer is None:
|
|
724
|
+
checkpointer = MemorySaver()
|
|
725
|
+
|
|
726
|
+
graph = build_qa_graph()
|
|
727
|
+
return graph.compile(checkpointer=checkpointer)
|
|
728
|
+
|
|
729
|
+
|
|
730
|
+
def export_mermaid_diagram() -> str:
|
|
731
|
+
"""Export the pipeline graph as a Mermaid diagram string."""
|
|
732
|
+
graph = build_qa_graph()
|
|
733
|
+
return graph.compile().get_graph().draw_mermaid()
|