deepagents-printshop 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agents/content_editor/__init__.py +1 -0
- agents/content_editor/agent.py +279 -0
- agents/content_editor/content_reviewer.py +327 -0
- agents/content_editor/versioned_agent.py +455 -0
- agents/latex_specialist/__init__.py +1 -0
- agents/latex_specialist/agent.py +531 -0
- agents/latex_specialist/latex_analyzer.py +510 -0
- agents/latex_specialist/latex_optimizer.py +1192 -0
- agents/qa_orchestrator/__init__.py +1 -0
- agents/qa_orchestrator/agent.py +603 -0
- agents/qa_orchestrator/langgraph_workflow.py +733 -0
- agents/qa_orchestrator/pipeline_types.py +72 -0
- agents/qa_orchestrator/quality_gates.py +495 -0
- agents/qa_orchestrator/workflow_coordinator.py +139 -0
- agents/research_agent/__init__.py +1 -0
- agents/research_agent/agent.py +258 -0
- agents/research_agent/llm_report_generator.py +1023 -0
- agents/research_agent/report_generator.py +536 -0
- agents/visual_qa/__init__.py +1 -0
- agents/visual_qa/agent.py +410 -0
- deepagents_printshop-0.1.0.dist-info/METADATA +744 -0
- deepagents_printshop-0.1.0.dist-info/RECORD +37 -0
- deepagents_printshop-0.1.0.dist-info/WHEEL +4 -0
- deepagents_printshop-0.1.0.dist-info/entry_points.txt +2 -0
- deepagents_printshop-0.1.0.dist-info/licenses/LICENSE +86 -0
- tools/__init__.py +1 -0
- tools/change_tracker.py +419 -0
- tools/content_type_loader.py +171 -0
- tools/graph_generator.py +281 -0
- tools/latex_generator.py +374 -0
- tools/llm_latex_generator.py +678 -0
- tools/magazine_layout.py +462 -0
- tools/pattern_injector.py +250 -0
- tools/pattern_learner.py +477 -0
- tools/pdf_compiler.py +386 -0
- tools/version_manager.py +346 -0
- tools/visual_qa.py +799 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Content Editor Agent for DeepAgents PrintShop."""
|
|
@@ -0,0 +1,279 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Content Editor Agent - Milestone 1
|
|
3
|
+
|
|
4
|
+
A specialized agent for improving grammar, readability, and overall content quality.
|
|
5
|
+
Part of the DeepAgents PrintShop quality assurance system.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import os
|
|
9
|
+
import sys
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Dict, List, Tuple
|
|
12
|
+
import json
|
|
13
|
+
from datetime import datetime
|
|
14
|
+
|
|
15
|
+
# Add project root to path
|
|
16
|
+
project_root = Path(__file__).parent.parent.parent
|
|
17
|
+
sys.path.insert(0, str(project_root))
|
|
18
|
+
|
|
19
|
+
from agents.content_editor.content_reviewer import ContentReviewer
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class ContentEditorAgent:
|
|
23
|
+
"""
|
|
24
|
+
Content Editor Agent with memory for improving research content quality.
|
|
25
|
+
|
|
26
|
+
This agent focuses on:
|
|
27
|
+
1. Grammar and spelling corrections
|
|
28
|
+
2. Readability improvements
|
|
29
|
+
3. Sentence structure optimization
|
|
30
|
+
4. Content flow and coherence
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
def __init__(self, memory_dir: str = ".deepagents/content_editor/memories", document_type: str = "research_report"):
|
|
34
|
+
"""
|
|
35
|
+
Initialize the content editor agent.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
memory_dir: Directory for storing agent memories
|
|
39
|
+
document_type: Type of document for pattern learning
|
|
40
|
+
"""
|
|
41
|
+
self.memory_dir = Path(memory_dir)
|
|
42
|
+
self.memory_dir.mkdir(parents=True, exist_ok=True)
|
|
43
|
+
self.document_type = document_type
|
|
44
|
+
self.content_reviewer = ContentReviewer(document_type=document_type)
|
|
45
|
+
|
|
46
|
+
# Paths
|
|
47
|
+
self.input_dir = Path("artifacts/sample_content")
|
|
48
|
+
self.output_dir = Path("artifacts/reviewed_content/v1_content_edited")
|
|
49
|
+
self.reports_dir = Path("artifacts/agent_reports/quality")
|
|
50
|
+
|
|
51
|
+
# Ensure directories exist
|
|
52
|
+
self.output_dir.mkdir(parents=True, exist_ok=True)
|
|
53
|
+
self.reports_dir.mkdir(parents=True, exist_ok=True)
|
|
54
|
+
|
|
55
|
+
# Initialize memory
|
|
56
|
+
self.init_memory()
|
|
57
|
+
|
|
58
|
+
def init_memory(self):
|
|
59
|
+
"""Initialize agent memory files."""
|
|
60
|
+
memory_files = {
|
|
61
|
+
"grammar_rules.md": """# Grammar and Style Rules
|
|
62
|
+
|
|
63
|
+
## Common Grammar Issues to Fix
|
|
64
|
+
- Subject-verb agreement errors
|
|
65
|
+
- Comma splices and run-on sentences
|
|
66
|
+
- Misplaced modifiers
|
|
67
|
+
- Inconsistent tense usage
|
|
68
|
+
- Passive voice overuse
|
|
69
|
+
|
|
70
|
+
## Style Guidelines
|
|
71
|
+
- Use active voice when possible
|
|
72
|
+
- Vary sentence length for readability
|
|
73
|
+
- Avoid redundant phrases
|
|
74
|
+
- Use precise, technical vocabulary
|
|
75
|
+
- Maintain consistent terminology throughout
|
|
76
|
+
|
|
77
|
+
## Academic Writing Standards
|
|
78
|
+
- Third person perspective for research
|
|
79
|
+
- Present tense for established facts
|
|
80
|
+
- Past tense for specific studies conducted
|
|
81
|
+
- Clear, concise sentences (15-20 words average)
|
|
82
|
+
- Logical paragraph structure with topic sentences
|
|
83
|
+
""",
|
|
84
|
+
"readability_patterns.md": """# Readability Improvement Patterns
|
|
85
|
+
|
|
86
|
+
## Sentence Structure
|
|
87
|
+
- Break long sentences (>25 words) into shorter ones
|
|
88
|
+
- Use transitional phrases between ideas
|
|
89
|
+
- Vary sentence beginnings to avoid monotony
|
|
90
|
+
- Replace complex constructions with simpler alternatives
|
|
91
|
+
|
|
92
|
+
## Word Choice
|
|
93
|
+
- Replace vague terms with specific ones
|
|
94
|
+
- Eliminate unnecessary jargon
|
|
95
|
+
- Use strong verbs instead of weak verb + adverb combinations
|
|
96
|
+
- Choose concrete nouns over abstract ones when possible
|
|
97
|
+
|
|
98
|
+
## Flow and Coherence
|
|
99
|
+
- Ensure each paragraph has a clear main idea
|
|
100
|
+
- Use connecting words and phrases
|
|
101
|
+
- Maintain logical progression of ideas
|
|
102
|
+
- Eliminate redundant information
|
|
103
|
+
""",
|
|
104
|
+
"quality_metrics.md": """# Content Quality Metrics
|
|
105
|
+
|
|
106
|
+
## Grammar Score (0-100)
|
|
107
|
+
- 0-20 errors: 90-100 points
|
|
108
|
+
- 21-40 errors: 70-89 points
|
|
109
|
+
- 41-60 errors: 50-69 points
|
|
110
|
+
- 60+ errors: 0-49 points
|
|
111
|
+
|
|
112
|
+
## Readability Score (0-100)
|
|
113
|
+
- Based on Flesch Reading Ease
|
|
114
|
+
- 90-100: Very Easy (Graduate level appropriate: 30-50)
|
|
115
|
+
- 60-70: Standard (Target for academic writing)
|
|
116
|
+
- 30-50: Difficult (Acceptable for technical content)
|
|
117
|
+
- 0-30: Very Difficult (May need simplification)
|
|
118
|
+
|
|
119
|
+
## Content Quality Indicators
|
|
120
|
+
- Average sentence length: 15-20 words
|
|
121
|
+
- Passive voice usage: <20%
|
|
122
|
+
- Paragraph length: 3-5 sentences
|
|
123
|
+
- Transition word usage: Present in 60%+ of sentences
|
|
124
|
+
"""
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
for filename, content in memory_files.items():
|
|
128
|
+
file_path = self.memory_dir / filename
|
|
129
|
+
if not file_path.exists():
|
|
130
|
+
with open(file_path, 'w', encoding='utf-8') as f:
|
|
131
|
+
f.write(content)
|
|
132
|
+
|
|
133
|
+
def load_content(self, filename: str) -> str:
|
|
134
|
+
"""Load content from the sample_content directory."""
|
|
135
|
+
file_path = self.input_dir / filename
|
|
136
|
+
if file_path.exists():
|
|
137
|
+
with open(file_path, 'r', encoding='utf-8') as f:
|
|
138
|
+
return f.read()
|
|
139
|
+
return ""
|
|
140
|
+
|
|
141
|
+
def save_content(self, filename: str, content: str):
|
|
142
|
+
"""Save reviewed content to the output directory."""
|
|
143
|
+
file_path = self.output_dir / filename
|
|
144
|
+
with open(file_path, 'w', encoding='utf-8') as f:
|
|
145
|
+
f.write(content)
|
|
146
|
+
|
|
147
|
+
def review_content(self, content: str) -> Dict:
|
|
148
|
+
"""
|
|
149
|
+
Review and improve content quality.
|
|
150
|
+
|
|
151
|
+
Args:
|
|
152
|
+
content: Original content text
|
|
153
|
+
|
|
154
|
+
Returns:
|
|
155
|
+
Dict with improved content and quality metrics
|
|
156
|
+
"""
|
|
157
|
+
return self.content_reviewer.review_text(content)
|
|
158
|
+
|
|
159
|
+
def process_all_content(self) -> Dict:
|
|
160
|
+
"""
|
|
161
|
+
Process all content files in the sample_content directory.
|
|
162
|
+
|
|
163
|
+
Returns:
|
|
164
|
+
Dict with processing results and overall quality improvements
|
|
165
|
+
"""
|
|
166
|
+
print(f"📄 Document Type: {self.document_type}")
|
|
167
|
+
print(f"🧠 Pattern Learning: {'Enabled' if self.content_reviewer.pattern_injector else 'Disabled'}")
|
|
168
|
+
print()
|
|
169
|
+
|
|
170
|
+
results = {
|
|
171
|
+
"files_processed": [],
|
|
172
|
+
"overall_quality_improvement": 0,
|
|
173
|
+
"total_issues_fixed": 0,
|
|
174
|
+
"timestamp": datetime.now().isoformat()
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
# Process each markdown file
|
|
178
|
+
for file_path in self.input_dir.glob("*.md"):
|
|
179
|
+
print(f"Processing {file_path.name}...")
|
|
180
|
+
|
|
181
|
+
# Load original content
|
|
182
|
+
original_content = self.load_content(file_path.name)
|
|
183
|
+
|
|
184
|
+
# Review and improve content
|
|
185
|
+
review_result = self.review_content(original_content)
|
|
186
|
+
|
|
187
|
+
# Save improved content
|
|
188
|
+
self.save_content(file_path.name, review_result["improved_content"])
|
|
189
|
+
|
|
190
|
+
# Track results
|
|
191
|
+
file_result = {
|
|
192
|
+
"filename": file_path.name,
|
|
193
|
+
"original_quality_score": review_result["original_quality_score"],
|
|
194
|
+
"improved_quality_score": review_result["improved_quality_score"],
|
|
195
|
+
"quality_improvement": review_result["quality_improvement"],
|
|
196
|
+
"issues_fixed": len(review_result["changes_made"]),
|
|
197
|
+
"changes_summary": review_result["changes_summary"]
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
results["files_processed"].append(file_result)
|
|
201
|
+
results["total_issues_fixed"] += file_result["issues_fixed"]
|
|
202
|
+
|
|
203
|
+
# Calculate overall improvement
|
|
204
|
+
if results["files_processed"]:
|
|
205
|
+
avg_improvement = sum(f["quality_improvement"] for f in results["files_processed"]) / len(results["files_processed"])
|
|
206
|
+
results["overall_quality_improvement"] = round(avg_improvement, 2)
|
|
207
|
+
|
|
208
|
+
# Save processing report
|
|
209
|
+
self.save_report(results)
|
|
210
|
+
|
|
211
|
+
return results
|
|
212
|
+
|
|
213
|
+
def save_report(self, results: Dict):
|
|
214
|
+
"""Save the content review report."""
|
|
215
|
+
report_path = self.reports_dir / "content_review_report.json"
|
|
216
|
+
|
|
217
|
+
with open(report_path, 'w', encoding='utf-8') as f:
|
|
218
|
+
json.dump(results, f, indent=2)
|
|
219
|
+
|
|
220
|
+
# Also create a human-readable markdown report
|
|
221
|
+
md_report_path = self.reports_dir / "content_review_report.md"
|
|
222
|
+
self.create_markdown_report(results, md_report_path)
|
|
223
|
+
|
|
224
|
+
def create_markdown_report(self, results: Dict, output_path: Path):
|
|
225
|
+
"""Create a human-readable markdown report."""
|
|
226
|
+
report_content = f"""# Content Review Report
|
|
227
|
+
|
|
228
|
+
**Generated:** {results['timestamp']}
|
|
229
|
+
|
|
230
|
+
## Summary
|
|
231
|
+
- **Files Processed:** {len(results['files_processed'])}
|
|
232
|
+
- **Overall Quality Improvement:** +{results['overall_quality_improvement']} points
|
|
233
|
+
- **Total Issues Fixed:** {results['total_issues_fixed']}
|
|
234
|
+
|
|
235
|
+
## File-by-File Results
|
|
236
|
+
|
|
237
|
+
"""
|
|
238
|
+
|
|
239
|
+
for file_result in results["files_processed"]:
|
|
240
|
+
report_content += f"""### {file_result['filename']}
|
|
241
|
+
- **Original Quality Score:** {file_result['original_quality_score']}/100
|
|
242
|
+
- **Improved Quality Score:** {file_result['improved_quality_score']}/100
|
|
243
|
+
- **Quality Improvement:** +{file_result['quality_improvement']} points
|
|
244
|
+
- **Issues Fixed:** {file_result['issues_fixed']}
|
|
245
|
+
|
|
246
|
+
**Changes Summary:** {file_result['changes_summary']}
|
|
247
|
+
|
|
248
|
+
---
|
|
249
|
+
|
|
250
|
+
"""
|
|
251
|
+
|
|
252
|
+
with open(output_path, 'w', encoding='utf-8') as f:
|
|
253
|
+
f.write(report_content)
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
def main():
|
|
257
|
+
"""Run the content editor agent."""
|
|
258
|
+
print("Starting Content Editor Agent - Milestone 1")
|
|
259
|
+
print("=" * 50)
|
|
260
|
+
|
|
261
|
+
# Initialize agent
|
|
262
|
+
agent = ContentEditorAgent()
|
|
263
|
+
|
|
264
|
+
# Process all content
|
|
265
|
+
results = agent.process_all_content()
|
|
266
|
+
|
|
267
|
+
# Display summary
|
|
268
|
+
print("\n" + "=" * 50)
|
|
269
|
+
print("CONTENT REVIEW COMPLETE")
|
|
270
|
+
print("=" * 50)
|
|
271
|
+
print(f"Files processed: {len(results['files_processed'])}")
|
|
272
|
+
print(f"Overall quality improvement: +{results['overall_quality_improvement']} points")
|
|
273
|
+
print(f"Total issues fixed: {results['total_issues_fixed']}")
|
|
274
|
+
print("\nReviewed content saved to: artifacts/reviewed_content/v1_content_edited/")
|
|
275
|
+
print("Quality report saved to: artifacts/agent_reports/quality/")
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
if __name__ == "__main__":
|
|
279
|
+
main()
|
|
@@ -0,0 +1,327 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Content Reviewer Tool
|
|
3
|
+
|
|
4
|
+
Provides content analysis and improvement capabilities using LLM-based review.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import re
|
|
8
|
+
import os
|
|
9
|
+
import sys
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Dict, List, Tuple, Optional
|
|
12
|
+
from anthropic import Anthropic
|
|
13
|
+
|
|
14
|
+
# Add project root to path for pattern injector
|
|
15
|
+
project_root = Path(__file__).parent.parent.parent
|
|
16
|
+
sys.path.insert(0, str(project_root))
|
|
17
|
+
|
|
18
|
+
try:
|
|
19
|
+
from tools.pattern_injector import PatternInjector
|
|
20
|
+
except ImportError:
|
|
21
|
+
PatternInjector = None
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class ContentReviewer:
|
|
25
|
+
"""Content review and improvement tool using Anthropic Claude with pattern learning."""
|
|
26
|
+
|
|
27
|
+
def __init__(self, document_type: str = "research_report"):
|
|
28
|
+
"""
|
|
29
|
+
Initialize the content reviewer with API client and pattern learning.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
document_type: Type of document for loading type-specific patterns
|
|
33
|
+
"""
|
|
34
|
+
self.client = Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))
|
|
35
|
+
self.document_type = document_type
|
|
36
|
+
|
|
37
|
+
# Initialize pattern injector if available
|
|
38
|
+
if PatternInjector:
|
|
39
|
+
try:
|
|
40
|
+
self.pattern_injector = PatternInjector(document_type=document_type)
|
|
41
|
+
except Exception as e:
|
|
42
|
+
print(f"⚠️ Could not load pattern injector: {e}")
|
|
43
|
+
self.pattern_injector = None
|
|
44
|
+
else:
|
|
45
|
+
self.pattern_injector = None
|
|
46
|
+
|
|
47
|
+
def analyze_readability(self, text: str) -> Dict:
|
|
48
|
+
"""
|
|
49
|
+
Analyze text readability metrics.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
text: Input text to analyze
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
Dict with readability metrics
|
|
56
|
+
"""
|
|
57
|
+
# Basic readability analysis
|
|
58
|
+
sentences = re.split(r'[.!?]+', text)
|
|
59
|
+
sentences = [s.strip() for s in sentences if s.strip()]
|
|
60
|
+
|
|
61
|
+
words = text.split()
|
|
62
|
+
total_words = len(words)
|
|
63
|
+
total_sentences = len(sentences)
|
|
64
|
+
|
|
65
|
+
# Calculate average sentence length
|
|
66
|
+
avg_sentence_length = total_words / total_sentences if total_sentences > 0 else 0
|
|
67
|
+
|
|
68
|
+
# Count syllables (rough approximation)
|
|
69
|
+
syllable_count = sum(self._count_syllables(word) for word in words)
|
|
70
|
+
|
|
71
|
+
# Flesch Reading Ease (simplified)
|
|
72
|
+
if total_sentences > 0 and total_words > 0:
|
|
73
|
+
flesch_score = 206.835 - (1.015 * avg_sentence_length) - (84.6 * (syllable_count / total_words))
|
|
74
|
+
flesch_score = max(0, min(100, flesch_score))
|
|
75
|
+
else:
|
|
76
|
+
flesch_score = 0
|
|
77
|
+
|
|
78
|
+
# Passive voice detection (simplified)
|
|
79
|
+
passive_indicators = ['was', 'were', 'been', 'being', 'is', 'are', 'am']
|
|
80
|
+
passive_count = sum(1 for word in words if word.lower() in passive_indicators)
|
|
81
|
+
passive_percentage = (passive_count / total_words * 100) if total_words > 0 else 0
|
|
82
|
+
|
|
83
|
+
return {
|
|
84
|
+
"total_words": total_words,
|
|
85
|
+
"total_sentences": total_sentences,
|
|
86
|
+
"avg_sentence_length": round(avg_sentence_length, 1),
|
|
87
|
+
"flesch_reading_ease": round(flesch_score, 1),
|
|
88
|
+
"passive_voice_percentage": round(passive_percentage, 1),
|
|
89
|
+
"syllable_count": syllable_count
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
def _count_syllables(self, word: str) -> int:
|
|
93
|
+
"""Rough syllable counting approximation."""
|
|
94
|
+
word = word.lower()
|
|
95
|
+
vowels = "aeiouy"
|
|
96
|
+
syllables = 0
|
|
97
|
+
prev_was_vowel = False
|
|
98
|
+
|
|
99
|
+
for char in word:
|
|
100
|
+
if char in vowels:
|
|
101
|
+
if not prev_was_vowel:
|
|
102
|
+
syllables += 1
|
|
103
|
+
prev_was_vowel = True
|
|
104
|
+
else:
|
|
105
|
+
prev_was_vowel = False
|
|
106
|
+
|
|
107
|
+
# Handle silent e
|
|
108
|
+
if word.endswith('e') and syllables > 1:
|
|
109
|
+
syllables -= 1
|
|
110
|
+
|
|
111
|
+
return max(1, syllables)
|
|
112
|
+
|
|
113
|
+
def calculate_quality_score(self, metrics: Dict, issues: List[str]) -> int:
|
|
114
|
+
"""
|
|
115
|
+
Calculate overall content quality score (0-100).
|
|
116
|
+
|
|
117
|
+
Args:
|
|
118
|
+
metrics: Readability metrics
|
|
119
|
+
issues: List of identified issues
|
|
120
|
+
|
|
121
|
+
Returns:
|
|
122
|
+
Quality score from 0-100
|
|
123
|
+
"""
|
|
124
|
+
score = 100
|
|
125
|
+
|
|
126
|
+
# Penalize for grammar/style issues
|
|
127
|
+
score -= min(len(issues) * 2, 40) # Up to 40 points for issues
|
|
128
|
+
|
|
129
|
+
# Readability factors
|
|
130
|
+
flesch = metrics.get("flesch_reading_ease", 0)
|
|
131
|
+
if flesch < 30: # Too difficult
|
|
132
|
+
score -= 15
|
|
133
|
+
elif flesch > 70: # Too easy for academic content
|
|
134
|
+
score -= 10
|
|
135
|
+
|
|
136
|
+
# Sentence length factors
|
|
137
|
+
avg_length = metrics.get("avg_sentence_length", 0)
|
|
138
|
+
if avg_length > 25: # Too long
|
|
139
|
+
score -= 10
|
|
140
|
+
elif avg_length < 10: # Too short
|
|
141
|
+
score -= 5
|
|
142
|
+
|
|
143
|
+
# Passive voice penalty
|
|
144
|
+
passive_pct = metrics.get("passive_voice_percentage", 0)
|
|
145
|
+
if passive_pct > 30:
|
|
146
|
+
score -= 10
|
|
147
|
+
|
|
148
|
+
return max(0, min(100, score))
|
|
149
|
+
|
|
150
|
+
def review_text(self, text: str) -> Dict:
|
|
151
|
+
"""
|
|
152
|
+
Review and improve text content with pattern learning.
|
|
153
|
+
|
|
154
|
+
Args:
|
|
155
|
+
text: Original text content
|
|
156
|
+
|
|
157
|
+
Returns:
|
|
158
|
+
Dict with improved content and analysis
|
|
159
|
+
"""
|
|
160
|
+
# Analyze original content
|
|
161
|
+
original_metrics = self.analyze_readability(text)
|
|
162
|
+
|
|
163
|
+
# Get pattern context if available
|
|
164
|
+
pattern_context = ""
|
|
165
|
+
if self.pattern_injector:
|
|
166
|
+
pattern_context = self.pattern_injector.get_context_for_content_editor()
|
|
167
|
+
if pattern_context:
|
|
168
|
+
print(f"✅ Applying learned patterns for '{self.document_type}' documents")
|
|
169
|
+
|
|
170
|
+
# Build prompt with pattern learning context
|
|
171
|
+
prompt = f"""You are a professional editor specializing in academic and technical writing.
|
|
172
|
+
|
|
173
|
+
Please review and improve the following text for:
|
|
174
|
+
1. Grammar and spelling errors
|
|
175
|
+
2. Sentence structure and clarity
|
|
176
|
+
3. Word choice and precision
|
|
177
|
+
4. Flow and readability
|
|
178
|
+
5. Academic writing style"""
|
|
179
|
+
|
|
180
|
+
# Add pattern learning context if available
|
|
181
|
+
if pattern_context:
|
|
182
|
+
prompt += f"""
|
|
183
|
+
|
|
184
|
+
{pattern_context}
|
|
185
|
+
|
|
186
|
+
IMPORTANT: Apply the historical patterns above to improve this document. Look specifically for the common issues identified in previous documents of this type."""
|
|
187
|
+
|
|
188
|
+
prompt += f"""
|
|
189
|
+
|
|
190
|
+
Original text:
|
|
191
|
+
{text}
|
|
192
|
+
|
|
193
|
+
Please provide:
|
|
194
|
+
1. The improved version of the text
|
|
195
|
+
2. A list of specific changes you made
|
|
196
|
+
3. A brief summary of the improvements
|
|
197
|
+
|
|
198
|
+
Format your response as:
|
|
199
|
+
|
|
200
|
+
IMPROVED TEXT:
|
|
201
|
+
[improved version here]
|
|
202
|
+
|
|
203
|
+
CHANGES MADE:
|
|
204
|
+
- [change 1]
|
|
205
|
+
- [change 2]
|
|
206
|
+
- [etc.]
|
|
207
|
+
|
|
208
|
+
SUMMARY:
|
|
209
|
+
[brief summary of improvements]
|
|
210
|
+
"""
|
|
211
|
+
|
|
212
|
+
try:
|
|
213
|
+
response = self.client.messages.create(
|
|
214
|
+
model="claude-sonnet-4-20250514", # Latest Sonnet 4.5 model
|
|
215
|
+
max_tokens=2000,
|
|
216
|
+
messages=[{"role": "user", "content": prompt}]
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
response_text = response.content[0].text
|
|
220
|
+
|
|
221
|
+
# Parse the response
|
|
222
|
+
improved_text, changes_made, summary = self._parse_review_response(response_text)
|
|
223
|
+
|
|
224
|
+
# Analyze improved content
|
|
225
|
+
improved_metrics = self.analyze_readability(improved_text)
|
|
226
|
+
|
|
227
|
+
# Calculate quality scores
|
|
228
|
+
original_issues = self._identify_issues(text, original_metrics)
|
|
229
|
+
improved_issues = self._identify_issues(improved_text, improved_metrics)
|
|
230
|
+
|
|
231
|
+
original_score = self.calculate_quality_score(original_metrics, original_issues)
|
|
232
|
+
improved_score = self.calculate_quality_score(improved_metrics, improved_issues)
|
|
233
|
+
|
|
234
|
+
return {
|
|
235
|
+
"original_content": text,
|
|
236
|
+
"improved_content": improved_text,
|
|
237
|
+
"original_metrics": original_metrics,
|
|
238
|
+
"improved_metrics": improved_metrics,
|
|
239
|
+
"original_quality_score": original_score,
|
|
240
|
+
"improved_quality_score": improved_score,
|
|
241
|
+
"quality_improvement": improved_score - original_score,
|
|
242
|
+
"changes_made": changes_made,
|
|
243
|
+
"changes_summary": summary,
|
|
244
|
+
"issues_fixed": len(original_issues) - len(improved_issues)
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
except Exception as e:
|
|
248
|
+
print(f"Error during content review: {e}")
|
|
249
|
+
return {
|
|
250
|
+
"original_content": text,
|
|
251
|
+
"improved_content": text, # Fallback to original
|
|
252
|
+
"original_metrics": original_metrics,
|
|
253
|
+
"improved_metrics": original_metrics,
|
|
254
|
+
"original_quality_score": self.calculate_quality_score(original_metrics, []),
|
|
255
|
+
"improved_quality_score": self.calculate_quality_score(original_metrics, []),
|
|
256
|
+
"quality_improvement": 0,
|
|
257
|
+
"changes_made": [],
|
|
258
|
+
"changes_summary": f"Review failed: {str(e)}",
|
|
259
|
+
"issues_fixed": 0
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
def _parse_review_response(self, response: str) -> Tuple[str, List[str], str]:
|
|
263
|
+
"""Parse Claude's review response into components."""
|
|
264
|
+
improved_text = ""
|
|
265
|
+
changes_made = []
|
|
266
|
+
summary = ""
|
|
267
|
+
|
|
268
|
+
# Split response into sections
|
|
269
|
+
sections = response.split('\n\n')
|
|
270
|
+
current_section = ""
|
|
271
|
+
|
|
272
|
+
for section in sections:
|
|
273
|
+
if section.startswith("IMPROVED TEXT:"):
|
|
274
|
+
current_section = "improved"
|
|
275
|
+
improved_text = section.replace("IMPROVED TEXT:", "").strip()
|
|
276
|
+
elif section.startswith("CHANGES MADE:"):
|
|
277
|
+
current_section = "changes"
|
|
278
|
+
changes_text = section.replace("CHANGES MADE:", "").strip()
|
|
279
|
+
changes_made = [line.strip().lstrip("- ") for line in changes_text.split('\n') if line.strip()]
|
|
280
|
+
elif section.startswith("SUMMARY:"):
|
|
281
|
+
current_section = "summary"
|
|
282
|
+
summary = section.replace("SUMMARY:", "").strip()
|
|
283
|
+
elif current_section == "improved":
|
|
284
|
+
improved_text += "\n\n" + section
|
|
285
|
+
elif current_section == "changes":
|
|
286
|
+
changes_made.extend([line.strip().lstrip("- ") for line in section.split('\n') if line.strip()])
|
|
287
|
+
elif current_section == "summary":
|
|
288
|
+
summary += "\n\n" + section
|
|
289
|
+
|
|
290
|
+
return improved_text.strip(), changes_made, summary.strip()
|
|
291
|
+
|
|
292
|
+
def _identify_issues(self, text: str, metrics: Dict) -> List[str]:
|
|
293
|
+
"""Identify potential issues in the text."""
|
|
294
|
+
issues = []
|
|
295
|
+
|
|
296
|
+
# Check sentence length
|
|
297
|
+
if metrics.get("avg_sentence_length", 0) > 25:
|
|
298
|
+
issues.append("Long sentences detected")
|
|
299
|
+
|
|
300
|
+
# Check passive voice
|
|
301
|
+
if metrics.get("passive_voice_percentage", 0) > 30:
|
|
302
|
+
issues.append("High passive voice usage")
|
|
303
|
+
|
|
304
|
+
# Check readability
|
|
305
|
+
flesch = metrics.get("flesch_reading_ease", 0)
|
|
306
|
+
if flesch < 30:
|
|
307
|
+
issues.append("Text may be too complex")
|
|
308
|
+
|
|
309
|
+
# Simple grammar checks
|
|
310
|
+
if "....." in text:
|
|
311
|
+
issues.append("Multiple consecutive periods")
|
|
312
|
+
|
|
313
|
+
if " " in text:
|
|
314
|
+
issues.append("Multiple consecutive spaces")
|
|
315
|
+
|
|
316
|
+
# Check for common issues
|
|
317
|
+
common_errors = [
|
|
318
|
+
(r'\bit\'s\b', "Possible incorrect apostrophe usage"),
|
|
319
|
+
(r'\byour\b.*\byour\b', "Repeated 'your'"),
|
|
320
|
+
(r'\bthe the\b', "Repeated 'the'"),
|
|
321
|
+
]
|
|
322
|
+
|
|
323
|
+
for pattern, description in common_errors:
|
|
324
|
+
if re.search(pattern, text, re.IGNORECASE):
|
|
325
|
+
issues.append(description)
|
|
326
|
+
|
|
327
|
+
return issues
|