deepagents-printshop 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agents/content_editor/__init__.py +1 -0
- agents/content_editor/agent.py +279 -0
- agents/content_editor/content_reviewer.py +327 -0
- agents/content_editor/versioned_agent.py +455 -0
- agents/latex_specialist/__init__.py +1 -0
- agents/latex_specialist/agent.py +531 -0
- agents/latex_specialist/latex_analyzer.py +510 -0
- agents/latex_specialist/latex_optimizer.py +1192 -0
- agents/qa_orchestrator/__init__.py +1 -0
- agents/qa_orchestrator/agent.py +603 -0
- agents/qa_orchestrator/langgraph_workflow.py +733 -0
- agents/qa_orchestrator/pipeline_types.py +72 -0
- agents/qa_orchestrator/quality_gates.py +495 -0
- agents/qa_orchestrator/workflow_coordinator.py +139 -0
- agents/research_agent/__init__.py +1 -0
- agents/research_agent/agent.py +258 -0
- agents/research_agent/llm_report_generator.py +1023 -0
- agents/research_agent/report_generator.py +536 -0
- agents/visual_qa/__init__.py +1 -0
- agents/visual_qa/agent.py +410 -0
- deepagents_printshop-0.1.0.dist-info/METADATA +744 -0
- deepagents_printshop-0.1.0.dist-info/RECORD +37 -0
- deepagents_printshop-0.1.0.dist-info/WHEEL +4 -0
- deepagents_printshop-0.1.0.dist-info/entry_points.txt +2 -0
- deepagents_printshop-0.1.0.dist-info/licenses/LICENSE +86 -0
- tools/__init__.py +1 -0
- tools/change_tracker.py +419 -0
- tools/content_type_loader.py +171 -0
- tools/graph_generator.py +281 -0
- tools/latex_generator.py +374 -0
- tools/llm_latex_generator.py +678 -0
- tools/magazine_layout.py +462 -0
- tools/pattern_injector.py +250 -0
- tools/pattern_learner.py +477 -0
- tools/pdf_compiler.py +386 -0
- tools/version_manager.py +346 -0
- tools/visual_qa.py +799 -0
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Shared types for the QA orchestrator pipeline.
|
|
3
|
+
|
|
4
|
+
Extracted from workflow_coordinator.py so that both the LangGraph workflow
|
|
5
|
+
and the coordinator can import them without circular dependencies.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from dataclasses import dataclass
|
|
9
|
+
from enum import Enum
|
|
10
|
+
from typing import Any, Dict, List, Optional
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class AgentType(Enum):
|
|
14
|
+
"""Available agent types."""
|
|
15
|
+
CONTENT_EDITOR = "content_editor"
|
|
16
|
+
LATEX_SPECIALIST = "latex_specialist"
|
|
17
|
+
VISUAL_QA = "visual_qa"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class WorkflowStage(Enum):
|
|
21
|
+
"""Workflow execution stages."""
|
|
22
|
+
INITIALIZATION = "initialization"
|
|
23
|
+
CONTENT_REVIEW = "content_review"
|
|
24
|
+
LATEX_OPTIMIZATION = "latex_optimization"
|
|
25
|
+
VISUAL_QA = "visual_qa"
|
|
26
|
+
QUALITY_ASSESSMENT = "quality_assessment"
|
|
27
|
+
ITERATION = "iteration"
|
|
28
|
+
COMPLETION = "completion"
|
|
29
|
+
ESCALATION = "escalation"
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass
|
|
33
|
+
class AgentResult:
|
|
34
|
+
"""Result from agent execution."""
|
|
35
|
+
agent_type: AgentType
|
|
36
|
+
success: bool
|
|
37
|
+
version_created: str
|
|
38
|
+
quality_score: Optional[float]
|
|
39
|
+
processing_time: float
|
|
40
|
+
issues_found: List[str]
|
|
41
|
+
optimizations_applied: List[str]
|
|
42
|
+
error_message: Optional[str] = None
|
|
43
|
+
metadata: Optional[Dict] = None
|
|
44
|
+
|
|
45
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
46
|
+
"""Serialize to a plain dict for state storage."""
|
|
47
|
+
return {
|
|
48
|
+
"agent_type": self.agent_type.value,
|
|
49
|
+
"success": self.success,
|
|
50
|
+
"version_created": self.version_created,
|
|
51
|
+
"quality_score": self.quality_score,
|
|
52
|
+
"processing_time": self.processing_time,
|
|
53
|
+
"issues_found": self.issues_found,
|
|
54
|
+
"optimizations_applied": self.optimizations_applied,
|
|
55
|
+
"error_message": self.error_message,
|
|
56
|
+
"metadata": self.metadata,
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
@classmethod
|
|
60
|
+
def from_dict(cls, data: Dict[str, Any]) -> "AgentResult":
|
|
61
|
+
"""Reconstruct an AgentResult from a dict."""
|
|
62
|
+
return cls(
|
|
63
|
+
agent_type=AgentType(data["agent_type"]),
|
|
64
|
+
success=data["success"],
|
|
65
|
+
version_created=data["version_created"],
|
|
66
|
+
quality_score=data.get("quality_score"),
|
|
67
|
+
processing_time=data.get("processing_time", 0.0),
|
|
68
|
+
issues_found=data.get("issues_found", []),
|
|
69
|
+
optimizations_applied=data.get("optimizations_applied", []),
|
|
70
|
+
error_message=data.get("error_message"),
|
|
71
|
+
metadata=data.get("metadata"),
|
|
72
|
+
)
|
|
@@ -0,0 +1,495 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Quality Gates System
|
|
3
|
+
|
|
4
|
+
Manages quality thresholds, decision logic, and escalation rules for the QA pipeline.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import Dict, List, Optional, Tuple
|
|
8
|
+
from dataclasses import dataclass
|
|
9
|
+
from datetime import datetime
|
|
10
|
+
from enum import Enum
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class QualityGateResult(Enum):
|
|
14
|
+
"""Quality gate evaluation results."""
|
|
15
|
+
PASS = "pass"
|
|
16
|
+
FAIL = "fail"
|
|
17
|
+
ITERATE = "iterate"
|
|
18
|
+
ESCALATE = "escalate"
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class QualityThresholds:
|
|
23
|
+
"""Quality threshold configuration."""
|
|
24
|
+
# Content quality thresholds
|
|
25
|
+
content_minimum: int = 80
|
|
26
|
+
content_good: int = 85
|
|
27
|
+
content_excellent: int = 90
|
|
28
|
+
|
|
29
|
+
# LaTeX quality thresholds
|
|
30
|
+
latex_minimum: int = 85
|
|
31
|
+
latex_good: int = 90
|
|
32
|
+
latex_excellent: int = 95
|
|
33
|
+
|
|
34
|
+
# Component thresholds (out of 25 each)
|
|
35
|
+
latex_structure_minimum: int = 22
|
|
36
|
+
latex_typography_minimum: int = 18 # Lowered to avoid iteration loops on acceptable typography
|
|
37
|
+
latex_tables_figures_minimum: int = 20
|
|
38
|
+
latex_best_practices_minimum: int = 20
|
|
39
|
+
|
|
40
|
+
# Overall pipeline thresholds
|
|
41
|
+
overall_target: int = 80 # Lowered to avoid iteration issues during development
|
|
42
|
+
human_handoff_threshold: int = 90
|
|
43
|
+
|
|
44
|
+
# Iteration control
|
|
45
|
+
improvement_minimum: int = 5
|
|
46
|
+
convergence_threshold: int = 2
|
|
47
|
+
max_iterations: int = 3
|
|
48
|
+
|
|
49
|
+
# Error thresholds
|
|
50
|
+
max_content_issues: int = 5
|
|
51
|
+
max_latex_issues: int = 3
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@dataclass
|
|
55
|
+
class QualityAssessment:
|
|
56
|
+
"""Complete quality assessment result."""
|
|
57
|
+
content_score: Optional[int] = None
|
|
58
|
+
latex_score: Optional[int] = None
|
|
59
|
+
latex_structure: Optional[int] = None
|
|
60
|
+
latex_typography: Optional[int] = None
|
|
61
|
+
latex_tables_figures: Optional[int] = None
|
|
62
|
+
latex_best_practices: Optional[int] = None
|
|
63
|
+
visual_qa_score: Optional[float] = None
|
|
64
|
+
|
|
65
|
+
content_issues: List[str] = None
|
|
66
|
+
latex_issues: List[str] = None
|
|
67
|
+
visual_qa_issues: List[str] = None
|
|
68
|
+
|
|
69
|
+
overall_score: Optional[float] = None
|
|
70
|
+
assessment_timestamp: str = None
|
|
71
|
+
|
|
72
|
+
def __post_init__(self):
|
|
73
|
+
if self.content_issues is None:
|
|
74
|
+
self.content_issues = []
|
|
75
|
+
if self.latex_issues is None:
|
|
76
|
+
self.latex_issues = []
|
|
77
|
+
if self.visual_qa_issues is None:
|
|
78
|
+
self.visual_qa_issues = []
|
|
79
|
+
if self.assessment_timestamp is None:
|
|
80
|
+
self.assessment_timestamp = datetime.now().isoformat()
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
@dataclass
|
|
84
|
+
class QualityGateEvaluation:
|
|
85
|
+
"""Result of quality gate evaluation."""
|
|
86
|
+
gate_name: str
|
|
87
|
+
result: QualityGateResult
|
|
88
|
+
score: Optional[float]
|
|
89
|
+
threshold: Optional[float]
|
|
90
|
+
reasons: List[str]
|
|
91
|
+
recommendations: List[str]
|
|
92
|
+
next_action: str
|
|
93
|
+
evaluation_timestamp: str = None
|
|
94
|
+
|
|
95
|
+
def __post_init__(self):
|
|
96
|
+
if self.evaluation_timestamp is None:
|
|
97
|
+
self.evaluation_timestamp = datetime.now().isoformat()
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
class QualityGateManager:
|
|
101
|
+
"""
|
|
102
|
+
Manages quality gates and decision logic for the QA pipeline.
|
|
103
|
+
|
|
104
|
+
Features:
|
|
105
|
+
- Quality threshold evaluation
|
|
106
|
+
- Iteration decision logic
|
|
107
|
+
- Human escalation triggers
|
|
108
|
+
- Quality progression tracking
|
|
109
|
+
"""
|
|
110
|
+
|
|
111
|
+
def __init__(self, thresholds: Optional[QualityThresholds] = None):
|
|
112
|
+
"""
|
|
113
|
+
Initialize quality gate manager.
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
thresholds: Custom quality thresholds (uses defaults if None)
|
|
117
|
+
"""
|
|
118
|
+
self.thresholds = thresholds or QualityThresholds()
|
|
119
|
+
self.evaluation_history: List[QualityGateEvaluation] = []
|
|
120
|
+
|
|
121
|
+
def evaluate_content_quality_gate(self, assessment: QualityAssessment) -> QualityGateEvaluation:
|
|
122
|
+
"""
|
|
123
|
+
Evaluate content quality gate.
|
|
124
|
+
|
|
125
|
+
Args:
|
|
126
|
+
assessment: Quality assessment data
|
|
127
|
+
|
|
128
|
+
Returns:
|
|
129
|
+
Quality gate evaluation result
|
|
130
|
+
"""
|
|
131
|
+
reasons = []
|
|
132
|
+
recommendations = []
|
|
133
|
+
|
|
134
|
+
if assessment.content_score is None:
|
|
135
|
+
return QualityGateEvaluation(
|
|
136
|
+
gate_name="content_quality",
|
|
137
|
+
result=QualityGateResult.FAIL,
|
|
138
|
+
score=None,
|
|
139
|
+
threshold=self.thresholds.content_minimum,
|
|
140
|
+
reasons=["Content score not available"],
|
|
141
|
+
recommendations=["Run content analysis"],
|
|
142
|
+
next_action="run_content_editor"
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
score = assessment.content_score
|
|
146
|
+
|
|
147
|
+
# Check minimum threshold
|
|
148
|
+
if score < self.thresholds.content_minimum:
|
|
149
|
+
reasons.append(f"Content score {score} below minimum {self.thresholds.content_minimum}")
|
|
150
|
+
recommendations.append("Run content editor to improve grammar, readability, and structure")
|
|
151
|
+
return QualityGateEvaluation(
|
|
152
|
+
gate_name="content_quality",
|
|
153
|
+
result=QualityGateResult.ITERATE,
|
|
154
|
+
score=score,
|
|
155
|
+
threshold=self.thresholds.content_minimum,
|
|
156
|
+
reasons=reasons,
|
|
157
|
+
recommendations=recommendations,
|
|
158
|
+
next_action="run_content_editor"
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
# Check issue count
|
|
162
|
+
issue_count = len(assessment.content_issues)
|
|
163
|
+
if issue_count > self.thresholds.max_content_issues:
|
|
164
|
+
reasons.append(f"Too many content issues: {issue_count} > {self.thresholds.max_content_issues}")
|
|
165
|
+
recommendations.append("Address remaining content issues")
|
|
166
|
+
return QualityGateEvaluation(
|
|
167
|
+
gate_name="content_quality",
|
|
168
|
+
result=QualityGateResult.ITERATE,
|
|
169
|
+
score=score,
|
|
170
|
+
threshold=self.thresholds.content_minimum,
|
|
171
|
+
reasons=reasons,
|
|
172
|
+
recommendations=recommendations,
|
|
173
|
+
next_action="run_content_editor"
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
# Determine pass level
|
|
177
|
+
if score >= self.thresholds.content_excellent:
|
|
178
|
+
reasons.append(f"Excellent content quality: {score}")
|
|
179
|
+
next_action = "proceed_to_latex"
|
|
180
|
+
elif score >= self.thresholds.content_good:
|
|
181
|
+
reasons.append(f"Good content quality: {score}")
|
|
182
|
+
next_action = "proceed_to_latex"
|
|
183
|
+
else:
|
|
184
|
+
reasons.append(f"Acceptable content quality: {score}")
|
|
185
|
+
next_action = "proceed_to_latex"
|
|
186
|
+
|
|
187
|
+
return QualityGateEvaluation(
|
|
188
|
+
gate_name="content_quality",
|
|
189
|
+
result=QualityGateResult.PASS,
|
|
190
|
+
score=score,
|
|
191
|
+
threshold=self.thresholds.content_minimum,
|
|
192
|
+
reasons=reasons,
|
|
193
|
+
recommendations=["Content quality meets standards"],
|
|
194
|
+
next_action=next_action
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
def evaluate_latex_quality_gate(self, assessment: QualityAssessment) -> QualityGateEvaluation:
|
|
198
|
+
"""
|
|
199
|
+
Evaluate LaTeX quality gate.
|
|
200
|
+
|
|
201
|
+
Args:
|
|
202
|
+
assessment: Quality assessment data
|
|
203
|
+
|
|
204
|
+
Returns:
|
|
205
|
+
Quality gate evaluation result
|
|
206
|
+
"""
|
|
207
|
+
reasons = []
|
|
208
|
+
recommendations = []
|
|
209
|
+
|
|
210
|
+
if assessment.latex_score is None:
|
|
211
|
+
return QualityGateEvaluation(
|
|
212
|
+
gate_name="latex_quality",
|
|
213
|
+
result=QualityGateResult.FAIL,
|
|
214
|
+
score=None,
|
|
215
|
+
threshold=self.thresholds.latex_minimum,
|
|
216
|
+
reasons=["LaTeX score not available"],
|
|
217
|
+
recommendations=["Run LaTeX analysis"],
|
|
218
|
+
next_action="run_latex_specialist"
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
score = assessment.latex_score
|
|
222
|
+
|
|
223
|
+
# Hard gate: compilation failure always forces iteration
|
|
224
|
+
for issue in assessment.latex_issues:
|
|
225
|
+
if issue.startswith("PDF_COMPILATION_FAILED"):
|
|
226
|
+
return QualityGateEvaluation(
|
|
227
|
+
gate_name="latex_quality",
|
|
228
|
+
result=QualityGateResult.ITERATE,
|
|
229
|
+
score=score,
|
|
230
|
+
threshold=self.thresholds.latex_minimum,
|
|
231
|
+
reasons=["PDF compilation failed — must iterate"],
|
|
232
|
+
recommendations=["Fix LaTeX compilation errors before proceeding"],
|
|
233
|
+
next_action="run_latex_specialist",
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
# Check overall LaTeX score
|
|
237
|
+
if score < self.thresholds.latex_minimum:
|
|
238
|
+
reasons.append(f"LaTeX score {score} below minimum {self.thresholds.latex_minimum}")
|
|
239
|
+
recommendations.append("Run LaTeX specialist to improve formatting and structure")
|
|
240
|
+
return QualityGateEvaluation(
|
|
241
|
+
gate_name="latex_quality",
|
|
242
|
+
result=QualityGateResult.ITERATE,
|
|
243
|
+
score=score,
|
|
244
|
+
threshold=self.thresholds.latex_minimum,
|
|
245
|
+
reasons=reasons,
|
|
246
|
+
recommendations=recommendations,
|
|
247
|
+
next_action="run_latex_specialist"
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
# Check component scores
|
|
251
|
+
component_issues = []
|
|
252
|
+
|
|
253
|
+
if assessment.latex_structure and assessment.latex_structure < self.thresholds.latex_structure_minimum:
|
|
254
|
+
component_issues.append(f"Structure: {assessment.latex_structure}/{self.thresholds.latex_structure_minimum}")
|
|
255
|
+
|
|
256
|
+
if assessment.latex_typography and assessment.latex_typography < self.thresholds.latex_typography_minimum:
|
|
257
|
+
component_issues.append(f"Typography: {assessment.latex_typography}/{self.thresholds.latex_typography_minimum}")
|
|
258
|
+
|
|
259
|
+
if assessment.latex_tables_figures and assessment.latex_tables_figures < self.thresholds.latex_tables_figures_minimum:
|
|
260
|
+
component_issues.append(f"Tables/Figures: {assessment.latex_tables_figures}/{self.thresholds.latex_tables_figures_minimum}")
|
|
261
|
+
|
|
262
|
+
if assessment.latex_best_practices and assessment.latex_best_practices < self.thresholds.latex_best_practices_minimum:
|
|
263
|
+
component_issues.append(f"Best Practices: {assessment.latex_best_practices}/{self.thresholds.latex_best_practices_minimum}")
|
|
264
|
+
|
|
265
|
+
if component_issues:
|
|
266
|
+
reasons.extend([f"Component scores below minimum: {', '.join(component_issues)}"])
|
|
267
|
+
recommendations.append("Improve LaTeX component scores")
|
|
268
|
+
return QualityGateEvaluation(
|
|
269
|
+
gate_name="latex_quality",
|
|
270
|
+
result=QualityGateResult.ITERATE,
|
|
271
|
+
score=score,
|
|
272
|
+
threshold=self.thresholds.latex_minimum,
|
|
273
|
+
reasons=reasons,
|
|
274
|
+
recommendations=recommendations,
|
|
275
|
+
next_action="run_latex_specialist"
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
# Check issue count
|
|
279
|
+
issue_count = len(assessment.latex_issues)
|
|
280
|
+
if issue_count > self.thresholds.max_latex_issues:
|
|
281
|
+
reasons.append(f"Too many LaTeX issues: {issue_count} > {self.thresholds.max_latex_issues}")
|
|
282
|
+
recommendations.append("Address remaining LaTeX issues")
|
|
283
|
+
return QualityGateEvaluation(
|
|
284
|
+
gate_name="latex_quality",
|
|
285
|
+
result=QualityGateResult.ITERATE,
|
|
286
|
+
score=score,
|
|
287
|
+
threshold=self.thresholds.latex_minimum,
|
|
288
|
+
reasons=reasons,
|
|
289
|
+
recommendations=recommendations,
|
|
290
|
+
next_action="run_latex_specialist"
|
|
291
|
+
)
|
|
292
|
+
|
|
293
|
+
# Determine pass level
|
|
294
|
+
if score >= self.thresholds.latex_excellent:
|
|
295
|
+
reasons.append(f"Excellent LaTeX quality: {score}")
|
|
296
|
+
next_action = "proceed_to_visual_qa"
|
|
297
|
+
elif score >= self.thresholds.latex_good:
|
|
298
|
+
reasons.append(f"Good LaTeX quality: {score}")
|
|
299
|
+
next_action = "proceed_to_visual_qa"
|
|
300
|
+
else:
|
|
301
|
+
reasons.append(f"Acceptable LaTeX quality: {score}")
|
|
302
|
+
next_action = "proceed_to_visual_qa"
|
|
303
|
+
|
|
304
|
+
return QualityGateEvaluation(
|
|
305
|
+
gate_name="latex_quality",
|
|
306
|
+
result=QualityGateResult.PASS,
|
|
307
|
+
score=score,
|
|
308
|
+
threshold=self.thresholds.latex_minimum,
|
|
309
|
+
reasons=reasons,
|
|
310
|
+
recommendations=["LaTeX quality meets standards"],
|
|
311
|
+
next_action=next_action
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
def evaluate_overall_quality_gate(self, assessment: QualityAssessment, iteration_count: int = 0) -> QualityGateEvaluation:
|
|
315
|
+
"""
|
|
316
|
+
Evaluate overall quality gate for final decision.
|
|
317
|
+
|
|
318
|
+
Args:
|
|
319
|
+
assessment: Complete quality assessment
|
|
320
|
+
iteration_count: Number of iterations completed
|
|
321
|
+
|
|
322
|
+
Returns:
|
|
323
|
+
Final quality gate evaluation
|
|
324
|
+
"""
|
|
325
|
+
reasons = []
|
|
326
|
+
recommendations = []
|
|
327
|
+
|
|
328
|
+
# Calculate overall score
|
|
329
|
+
scores = []
|
|
330
|
+
if assessment.content_score is not None:
|
|
331
|
+
scores.append(assessment.content_score)
|
|
332
|
+
if assessment.latex_score is not None:
|
|
333
|
+
scores.append(assessment.latex_score)
|
|
334
|
+
if assessment.visual_qa_score is not None:
|
|
335
|
+
scores.append(assessment.visual_qa_score)
|
|
336
|
+
|
|
337
|
+
if not scores:
|
|
338
|
+
return QualityGateEvaluation(
|
|
339
|
+
gate_name="overall_quality",
|
|
340
|
+
result=QualityGateResult.FAIL,
|
|
341
|
+
score=None,
|
|
342
|
+
threshold=self.thresholds.overall_target,
|
|
343
|
+
reasons=["No quality scores available"],
|
|
344
|
+
recommendations=["Run complete QA pipeline"],
|
|
345
|
+
next_action="start_pipeline"
|
|
346
|
+
)
|
|
347
|
+
|
|
348
|
+
overall_score = sum(scores) / len(scores)
|
|
349
|
+
assessment.overall_score = overall_score
|
|
350
|
+
|
|
351
|
+
# Check maximum iterations
|
|
352
|
+
if iteration_count >= self.thresholds.max_iterations:
|
|
353
|
+
reasons.append(f"Maximum iterations reached: {iteration_count}")
|
|
354
|
+
if overall_score >= self.thresholds.overall_target:
|
|
355
|
+
recommendations.append("Quality acceptable despite max iterations - proceed with human review")
|
|
356
|
+
return QualityGateEvaluation(
|
|
357
|
+
gate_name="overall_quality",
|
|
358
|
+
result=QualityGateResult.ESCALATE,
|
|
359
|
+
score=overall_score,
|
|
360
|
+
threshold=self.thresholds.overall_target,
|
|
361
|
+
reasons=reasons,
|
|
362
|
+
recommendations=recommendations,
|
|
363
|
+
next_action="human_review"
|
|
364
|
+
)
|
|
365
|
+
else:
|
|
366
|
+
recommendations.append("Quality below target at max iterations - escalate to human")
|
|
367
|
+
return QualityGateEvaluation(
|
|
368
|
+
gate_name="overall_quality",
|
|
369
|
+
result=QualityGateResult.ESCALATE,
|
|
370
|
+
score=overall_score,
|
|
371
|
+
threshold=self.thresholds.overall_target,
|
|
372
|
+
reasons=reasons,
|
|
373
|
+
recommendations=recommendations,
|
|
374
|
+
next_action="human_escalation"
|
|
375
|
+
)
|
|
376
|
+
|
|
377
|
+
# Check for human handoff threshold
|
|
378
|
+
if overall_score >= self.thresholds.human_handoff_threshold:
|
|
379
|
+
reasons.append(f"Excellent overall quality: {overall_score}")
|
|
380
|
+
recommendations.append("Quality exceeds handoff threshold - ready for human review")
|
|
381
|
+
return QualityGateEvaluation(
|
|
382
|
+
gate_name="overall_quality",
|
|
383
|
+
result=QualityGateResult.PASS,
|
|
384
|
+
score=overall_score,
|
|
385
|
+
threshold=self.thresholds.human_handoff_threshold,
|
|
386
|
+
reasons=reasons,
|
|
387
|
+
recommendations=recommendations,
|
|
388
|
+
next_action="human_handoff"
|
|
389
|
+
)
|
|
390
|
+
|
|
391
|
+
# Check for target threshold
|
|
392
|
+
if overall_score >= self.thresholds.overall_target:
|
|
393
|
+
reasons.append(f"Good overall quality: {overall_score}")
|
|
394
|
+
recommendations.append("Quality meets target - ready for human review")
|
|
395
|
+
return QualityGateEvaluation(
|
|
396
|
+
gate_name="overall_quality",
|
|
397
|
+
result=QualityGateResult.PASS,
|
|
398
|
+
score=overall_score,
|
|
399
|
+
threshold=self.thresholds.overall_target,
|
|
400
|
+
reasons=reasons,
|
|
401
|
+
recommendations=recommendations,
|
|
402
|
+
next_action="human_handoff"
|
|
403
|
+
)
|
|
404
|
+
|
|
405
|
+
# Below target - determine iteration strategy
|
|
406
|
+
reasons.append(f"Overall quality {overall_score} below target {self.thresholds.overall_target}")
|
|
407
|
+
|
|
408
|
+
# Analyze which components need improvement
|
|
409
|
+
if assessment.content_score and assessment.content_score < self.thresholds.content_good:
|
|
410
|
+
recommendations.append("Improve content quality")
|
|
411
|
+
if assessment.latex_score and assessment.latex_score < self.thresholds.latex_good:
|
|
412
|
+
recommendations.append("Improve LaTeX quality")
|
|
413
|
+
|
|
414
|
+
return QualityGateEvaluation(
|
|
415
|
+
gate_name="overall_quality",
|
|
416
|
+
result=QualityGateResult.ITERATE,
|
|
417
|
+
score=overall_score,
|
|
418
|
+
threshold=self.thresholds.overall_target,
|
|
419
|
+
reasons=reasons,
|
|
420
|
+
recommendations=recommendations,
|
|
421
|
+
next_action="iterate_pipeline"
|
|
422
|
+
)
|
|
423
|
+
|
|
424
|
+
def check_improvement_convergence(self,
|
|
425
|
+
current_assessment: QualityAssessment,
|
|
426
|
+
previous_assessment: Optional[QualityAssessment]) -> Tuple[bool, float]:
|
|
427
|
+
"""
|
|
428
|
+
Check if quality improvement has converged (plateaued).
|
|
429
|
+
|
|
430
|
+
Args:
|
|
431
|
+
current_assessment: Current quality assessment
|
|
432
|
+
previous_assessment: Previous iteration assessment
|
|
433
|
+
|
|
434
|
+
Returns:
|
|
435
|
+
Tuple of (has_converged, improvement_amount)
|
|
436
|
+
"""
|
|
437
|
+
if not previous_assessment or not current_assessment.overall_score or not previous_assessment.overall_score:
|
|
438
|
+
return False, 0.0
|
|
439
|
+
|
|
440
|
+
improvement = current_assessment.overall_score - previous_assessment.overall_score
|
|
441
|
+
|
|
442
|
+
# Check if improvement is below convergence threshold
|
|
443
|
+
has_converged = improvement < self.thresholds.convergence_threshold
|
|
444
|
+
|
|
445
|
+
return has_converged, improvement
|
|
446
|
+
|
|
447
|
+
def generate_quality_summary(self, assessment: QualityAssessment) -> Dict:
|
|
448
|
+
"""
|
|
449
|
+
Generate a comprehensive quality summary.
|
|
450
|
+
|
|
451
|
+
Args:
|
|
452
|
+
assessment: Quality assessment to summarize
|
|
453
|
+
|
|
454
|
+
Returns:
|
|
455
|
+
Quality summary dictionary
|
|
456
|
+
"""
|
|
457
|
+
summary = {
|
|
458
|
+
"overall_score": assessment.overall_score,
|
|
459
|
+
"assessment_timestamp": assessment.assessment_timestamp,
|
|
460
|
+
"content_analysis": {
|
|
461
|
+
"score": assessment.content_score,
|
|
462
|
+
"issues_count": len(assessment.content_issues),
|
|
463
|
+
"issues": assessment.content_issues[:5] # Limit to first 5
|
|
464
|
+
},
|
|
465
|
+
"latex_analysis": {
|
|
466
|
+
"overall_score": assessment.latex_score,
|
|
467
|
+
"structure": assessment.latex_structure,
|
|
468
|
+
"typography": assessment.latex_typography,
|
|
469
|
+
"tables_figures": assessment.latex_tables_figures,
|
|
470
|
+
"best_practices": assessment.latex_best_practices,
|
|
471
|
+
"issues_count": len(assessment.latex_issues),
|
|
472
|
+
"issues": assessment.latex_issues[:5] # Limit to first 5
|
|
473
|
+
},
|
|
474
|
+
"visual_qa_analysis": {
|
|
475
|
+
"score": assessment.visual_qa_score,
|
|
476
|
+
"issues_count": len(assessment.visual_qa_issues),
|
|
477
|
+
"issues": assessment.visual_qa_issues[:5] # Limit to first 5
|
|
478
|
+
},
|
|
479
|
+
"quality_gates": {
|
|
480
|
+
"content_passes": assessment.content_score >= self.thresholds.content_minimum if assessment.content_score else False,
|
|
481
|
+
"latex_passes": assessment.latex_score >= self.thresholds.latex_minimum if assessment.latex_score else False,
|
|
482
|
+
"overall_passes": assessment.overall_score >= self.thresholds.overall_target if assessment.overall_score else False,
|
|
483
|
+
"ready_for_handoff": assessment.overall_score >= self.thresholds.human_handoff_threshold if assessment.overall_score else False
|
|
484
|
+
}
|
|
485
|
+
}
|
|
486
|
+
|
|
487
|
+
return summary
|
|
488
|
+
|
|
489
|
+
def log_evaluation(self, evaluation: QualityGateEvaluation):
|
|
490
|
+
"""Log quality gate evaluation for history tracking."""
|
|
491
|
+
self.evaluation_history.append(evaluation)
|
|
492
|
+
|
|
493
|
+
def get_evaluation_history(self) -> List[QualityGateEvaluation]:
|
|
494
|
+
"""Get complete evaluation history."""
|
|
495
|
+
return self.evaluation_history.copy()
|