bioguider 0.2.28__py3-none-any.whl → 0.2.30__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of bioguider might be problematic. Click here for more details.
- bioguider/generation/change_planner.py +176 -14
- bioguider/generation/document_renderer.py +5 -0
- bioguider/generation/llm_cleaner.py +16 -8
- bioguider/generation/llm_content_generator.py +59 -8
- bioguider/generation/models.py +4 -0
- bioguider/generation/output_manager.py +3 -3
- bioguider/generation/report_loader.py +6 -0
- bioguider/generation/suggestion_extractor.py +345 -21
- bioguider/managers/generation_manager.py +356 -51
- {bioguider-0.2.28.dist-info → bioguider-0.2.30.dist-info}/METADATA +1 -1
- {bioguider-0.2.28.dist-info → bioguider-0.2.30.dist-info}/RECORD +13 -13
- {bioguider-0.2.28.dist-info → bioguider-0.2.30.dist-info}/LICENSE +0 -0
- {bioguider-0.2.28.dist-info → bioguider-0.2.30.dist-info}/WHEEL +0 -0
|
@@ -2,7 +2,7 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import os
|
|
4
4
|
from pathlib import Path
|
|
5
|
-
from typing import Tuple, Dict, List
|
|
5
|
+
from typing import Optional, Tuple, Dict, List
|
|
6
6
|
|
|
7
7
|
from bioguider.generation import (
|
|
8
8
|
EvaluationReportLoader,
|
|
@@ -20,17 +20,18 @@ from bioguider.utils.file_utils import parse_repo_url
|
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
class DocumentationGenerationManager:
|
|
23
|
-
def __init__(self, llm, step_callback):
|
|
23
|
+
def __init__(self, llm, step_callback, output_dir: Optional[str] = None):
|
|
24
24
|
self.llm = llm
|
|
25
25
|
self.step_callback = step_callback
|
|
26
26
|
self.repo_url_or_path: str | None = None
|
|
27
|
+
self.start_time = None
|
|
27
28
|
|
|
28
29
|
self.loader = EvaluationReportLoader()
|
|
29
30
|
self.extractor = SuggestionExtractor()
|
|
30
31
|
self.style_analyzer = StyleAnalyzer()
|
|
31
32
|
self.planner = ChangePlanner()
|
|
32
33
|
self.renderer = DocumentRenderer()
|
|
33
|
-
self.output = OutputManager()
|
|
34
|
+
self.output = OutputManager(base_outputs_dir=output_dir)
|
|
34
35
|
self.llm_gen = LLMContentGenerator(llm)
|
|
35
36
|
self.llm_cleaner = LLMCleaner(llm)
|
|
36
37
|
|
|
@@ -42,12 +43,36 @@ class DocumentationGenerationManager:
|
|
|
42
43
|
def prepare_repo(self, repo_url_or_path: str):
|
|
43
44
|
self.repo_url_or_path = repo_url_or_path
|
|
44
45
|
|
|
46
|
+
def _get_generation_time(self) -> str:
|
|
47
|
+
"""Get formatted generation time with start, end, and duration"""
|
|
48
|
+
if self.start_time is None:
|
|
49
|
+
return "Not tracked"
|
|
50
|
+
import time
|
|
51
|
+
import datetime
|
|
52
|
+
end_time = time.time()
|
|
53
|
+
duration = end_time - self.start_time
|
|
54
|
+
|
|
55
|
+
start_str = datetime.datetime.fromtimestamp(self.start_time).strftime("%H:%M:%S")
|
|
56
|
+
end_str = datetime.datetime.fromtimestamp(end_time).strftime("%H:%M:%S")
|
|
57
|
+
|
|
58
|
+
if duration < 60:
|
|
59
|
+
duration_str = f"{duration:.1f}s"
|
|
60
|
+
elif duration < 3600:
|
|
61
|
+
duration_str = f"{duration/60:.1f}m"
|
|
62
|
+
else:
|
|
63
|
+
duration_str = f"{duration/3600:.1f}h"
|
|
64
|
+
|
|
65
|
+
return f"{start_str} → {end_str} ({duration_str})"
|
|
66
|
+
|
|
45
67
|
def run(self, report_path: str, repo_path: str | None = None) -> str:
|
|
68
|
+
import time
|
|
69
|
+
self.start_time = time.time()
|
|
46
70
|
repo_path = repo_path or self.repo_url_or_path or ""
|
|
47
|
-
self.print_step(step_name="LoadReport", step_output=f"
|
|
71
|
+
self.print_step(step_name="LoadReport", step_output=f"Loading evaluation report from {report_path}...")
|
|
48
72
|
report, report_abs = self.loader.load(report_path)
|
|
73
|
+
self.print_step(step_name="LoadReport", step_output="✓ Evaluation report loaded successfully")
|
|
49
74
|
|
|
50
|
-
self.print_step(step_name="ReadRepoFiles", step_output=f"
|
|
75
|
+
self.print_step(step_name="ReadRepoFiles", step_output=f"Reading repository files from {repo_path}...")
|
|
51
76
|
reader = RepoReader(repo_path)
|
|
52
77
|
# Prefer report-listed files if available; include all report-declared file lists
|
|
53
78
|
target_files = []
|
|
@@ -64,54 +89,117 @@ class DocumentationGenerationManager:
|
|
|
64
89
|
if isinstance(key, str) and key.strip():
|
|
65
90
|
userguide_files.append(key)
|
|
66
91
|
target_files.extend(userguide_files)
|
|
92
|
+
|
|
93
|
+
# Add tutorial files from tutorial_evaluation keys
|
|
94
|
+
tutorial_files: list[str] = []
|
|
95
|
+
if getattr(report, "tutorial_files", None):
|
|
96
|
+
tutorial_files.extend([p for p in report.tutorial_files if isinstance(p, str)])
|
|
97
|
+
elif getattr(report, "tutorial_evaluation", None) and isinstance(report.tutorial_evaluation, dict):
|
|
98
|
+
for key in report.tutorial_evaluation.keys():
|
|
99
|
+
if isinstance(key, str) and key.strip():
|
|
100
|
+
tutorial_files.append(key)
|
|
101
|
+
target_files.extend(tutorial_files)
|
|
102
|
+
|
|
67
103
|
if getattr(report, "submission_requirements_files", None):
|
|
68
104
|
target_files.extend(report.submission_requirements_files)
|
|
69
105
|
target_files = [p for p in target_files if isinstance(p, str) and p.strip()]
|
|
70
106
|
target_files = list(dict.fromkeys(target_files)) # de-dup
|
|
71
107
|
files, missing = reader.read_files(target_files) if target_files else reader.read_default_targets()
|
|
108
|
+
self.print_step(step_name="ReadRepoFiles", step_output=f"✓ Read {len(files)} files from repository")
|
|
72
109
|
|
|
73
|
-
self.print_step(step_name="AnalyzeStyle", step_output=
|
|
110
|
+
self.print_step(step_name="AnalyzeStyle", step_output="Analyzing document style and formatting...")
|
|
74
111
|
style = self.style_analyzer.analyze(files)
|
|
112
|
+
self.print_step(step_name="AnalyzeStyle", step_output="✓ Document style analysis completed")
|
|
75
113
|
|
|
76
|
-
self.print_step(step_name="ExtractSuggestions")
|
|
114
|
+
self.print_step(step_name="ExtractSuggestions", step_output="Extracting suggestions from evaluation report...")
|
|
77
115
|
suggestions = self.extractor.extract(report)
|
|
78
|
-
self.print_step(step_name="Suggestions", step_output=f"
|
|
116
|
+
self.print_step(step_name="Suggestions", step_output=f"✓ Extracted {len(suggestions)} suggestions from evaluation report")
|
|
79
117
|
|
|
80
|
-
self.print_step(step_name="PlanChanges")
|
|
118
|
+
self.print_step(step_name="PlanChanges", step_output="Planning changes based on suggestions...")
|
|
81
119
|
plan = self.planner.build_plan(repo_path=repo_path, style=style, suggestions=suggestions, available_files=files)
|
|
82
|
-
self.print_step(step_name="PlannedEdits", step_output=f"
|
|
120
|
+
self.print_step(step_name="PlannedEdits", step_output=f"✓ Planned {len(plan.planned_edits)} edits across {len(set(e.file_path for e in plan.planned_edits))} files")
|
|
83
121
|
|
|
84
|
-
self.print_step(step_name="RenderDocuments")
|
|
85
|
-
# Apply edits
|
|
122
|
+
self.print_step(step_name="RenderDocuments", step_output=f"Rendering documents with LLM (processing {len(plan.planned_edits)} edits)...")
|
|
123
|
+
# Apply edits; support full-file regeneration using the evaluation report as the sole authority
|
|
86
124
|
revised: Dict[str, str] = {}
|
|
87
125
|
diff_stats: Dict[str, dict] = {}
|
|
88
126
|
edits_by_file: Dict[str, list] = {}
|
|
89
127
|
for e in plan.planned_edits:
|
|
90
128
|
edits_by_file.setdefault(e.file_path, []).append(e)
|
|
129
|
+
|
|
130
|
+
total_files = len(edits_by_file)
|
|
131
|
+
processed_files = 0
|
|
132
|
+
|
|
133
|
+
# Prepare evaluation data subset to drive LLM full document generation
|
|
134
|
+
evaluation_data = {
|
|
135
|
+
"readme_evaluation": getattr(report, "readme_evaluation", None),
|
|
136
|
+
"installation_evaluation": getattr(report, "installation_evaluation", None),
|
|
137
|
+
"userguide_evaluation": getattr(report, "userguide_evaluation", None),
|
|
138
|
+
"tutorial_evaluation": getattr(report, "tutorial_evaluation", None),
|
|
139
|
+
}
|
|
140
|
+
|
|
91
141
|
for fpath, edits in edits_by_file.items():
|
|
92
|
-
|
|
142
|
+
processed_files += 1
|
|
143
|
+
self.print_step(step_name="ProcessingFile", step_output=f"Processing {fpath} ({processed_files}/{total_files}) - {len(edits)} edits")
|
|
144
|
+
|
|
145
|
+
original_content = files.get(fpath, "")
|
|
146
|
+
content = original_content
|
|
93
147
|
total_stats = {"added_lines": 0}
|
|
94
148
|
for e in edits:
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
suggestion=next((s for s in suggestions if s.id == e.suggestion_id), None) if e.suggestion_id else None
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
149
|
+
context = original_content
|
|
150
|
+
if not e.content_template or e.content_template.strip() == "":
|
|
151
|
+
# Generate LLM content - use full document generation for full_replace, section generation for others
|
|
152
|
+
suggestion = next((s for s in suggestions if s.id == e.suggestion_id), None) if e.suggestion_id else None
|
|
153
|
+
if suggestion:
|
|
154
|
+
if e.edit_type == "full_replace":
|
|
155
|
+
self.print_step(step_name="GeneratingContent", step_output=f"Generating full document for {e.suggestion_id} using LLM...")
|
|
156
|
+
gen_content, gen_usage = self.llm_gen.generate_full_document(
|
|
157
|
+
target_file=e.file_path,
|
|
158
|
+
evaluation_report={"suggestion": suggestion.content_guidance, "evidence": suggestion.source.get("evidence", "") if suggestion.source else ""},
|
|
159
|
+
context=context,
|
|
160
|
+
)
|
|
161
|
+
if isinstance(gen_content, str) and gen_content:
|
|
162
|
+
self.print_step(step_name="LLMFullDoc", step_output=f"✓ Generated full document for {e.suggestion_id} ({gen_usage.get('total_tokens', 0)} tokens)")
|
|
163
|
+
e.content_template = gen_content
|
|
164
|
+
else:
|
|
165
|
+
self.print_step(step_name="GeneratingContent", step_output=f"Generating section for {e.suggestion_id} using LLM...")
|
|
166
|
+
gen_section, gen_usage = self.llm_gen.generate_section(
|
|
167
|
+
suggestion=suggestion,
|
|
168
|
+
style=plan.style_profile,
|
|
169
|
+
context=context,
|
|
170
|
+
)
|
|
171
|
+
if isinstance(gen_section, str) and gen_section:
|
|
172
|
+
self.print_step(step_name="LLMSection", step_output=f"✓ Generated section for {e.suggestion_id} ({gen_usage.get('total_tokens', 0)} tokens)")
|
|
173
|
+
# Ensure header present
|
|
174
|
+
if gen_section.lstrip().startswith("#"):
|
|
175
|
+
e.content_template = gen_section
|
|
176
|
+
else:
|
|
177
|
+
title = e.anchor.get('value', '').strip() or ''
|
|
178
|
+
e.content_template = f"## {title}\n\n{gen_section}" if title else gen_section
|
|
110
179
|
content, stats = self.renderer.apply_edit(content, e)
|
|
180
|
+
# After applying full document or section changes, run a general cleaner pass for all text files
|
|
181
|
+
# to fix markdown/formatting issues without changing meaning.
|
|
182
|
+
try:
|
|
183
|
+
if fpath.endswith((".md", ".rst", ".Rmd", ".Rd")) and content:
|
|
184
|
+
self.print_step(step_name="CleaningContent", step_output=f"Cleaning formatting for {fpath}...")
|
|
185
|
+
cleaned, _usage = self.llm_cleaner.clean_readme(content)
|
|
186
|
+
if isinstance(cleaned, str) and cleaned.strip():
|
|
187
|
+
content = cleaned
|
|
188
|
+
|
|
189
|
+
# Additional post-processing: remove markdown code fences if present
|
|
190
|
+
if content.startswith("```markdown") and content.endswith("```"):
|
|
191
|
+
# Remove the opening and closing fences
|
|
192
|
+
content = content[11:] # Remove ```markdown
|
|
193
|
+
if content.endswith("```"):
|
|
194
|
+
content = content[:-3] # Remove closing ```
|
|
195
|
+
content = content.strip()
|
|
196
|
+
|
|
197
|
+
except Exception:
|
|
198
|
+
pass
|
|
111
199
|
total_stats["added_lines"] = total_stats.get("added_lines", 0) + stats.get("added_lines", 0)
|
|
112
200
|
revised[fpath] = content
|
|
113
201
|
diff_stats[fpath] = total_stats
|
|
114
|
-
self.print_step(step_name="RenderedFile", step_output=f"
|
|
202
|
+
self.print_step(step_name="RenderedFile", step_output=f"✓ Completed {fpath} - added {total_stats['added_lines']} lines")
|
|
115
203
|
|
|
116
204
|
# Removed cleaner: duplication and fixes handled in prompts and renderer
|
|
117
205
|
|
|
@@ -128,11 +216,23 @@ class DocumentationGenerationManager:
|
|
|
128
216
|
else:
|
|
129
217
|
out_repo_key = self.repo_url_or_path or "repo"
|
|
130
218
|
|
|
131
|
-
self.print_step(step_name="WriteOutputs", step_output=f"
|
|
219
|
+
self.print_step(step_name="WriteOutputs", step_output=f"Writing outputs to {out_repo_key}...")
|
|
132
220
|
out_dir = self.output.prepare_output_dir(out_repo_key)
|
|
133
221
|
# Ensure all files we read (even without edits) are written to outputs alongside revisions
|
|
134
222
|
all_files_to_write: Dict[str, str] = dict(files)
|
|
135
223
|
all_files_to_write.update(revised)
|
|
224
|
+
# Also copy originals next to the new files for side-by-side comparison
|
|
225
|
+
def original_copy_name(path: str) -> str:
|
|
226
|
+
# Handle all file extensions properly
|
|
227
|
+
if "." in path:
|
|
228
|
+
base, ext = path.rsplit(".", 1)
|
|
229
|
+
return f"{base}.original.{ext}"
|
|
230
|
+
return f"{path}.original"
|
|
231
|
+
|
|
232
|
+
for orig_path, orig_content in files.items():
|
|
233
|
+
all_files_to_write[original_copy_name(orig_path)] = orig_content
|
|
234
|
+
|
|
235
|
+
self.print_step(step_name="WritingFiles", step_output=f"Writing {len(all_files_to_write)} files to output directory...")
|
|
136
236
|
artifacts = self.output.write_files(out_dir, all_files_to_write, diff_stats_by_file=diff_stats)
|
|
137
237
|
|
|
138
238
|
manifest = GenerationManifest(
|
|
@@ -144,8 +244,11 @@ class DocumentationGenerationManager:
|
|
|
144
244
|
artifacts=artifacts,
|
|
145
245
|
skipped=missing,
|
|
146
246
|
)
|
|
247
|
+
self.print_step(step_name="WritingManifest", step_output="Writing generation manifest...")
|
|
147
248
|
self.output.write_manifest(out_dir, manifest)
|
|
249
|
+
|
|
148
250
|
# Write human-readable generation report
|
|
251
|
+
self.print_step(step_name="WritingReport", step_output="Writing generation report...")
|
|
149
252
|
gen_report_path = self._write_generation_report(
|
|
150
253
|
out_dir,
|
|
151
254
|
report.repo_url or str(self.repo_url_or_path or ""),
|
|
@@ -155,7 +258,7 @@ class DocumentationGenerationManager:
|
|
|
155
258
|
artifacts,
|
|
156
259
|
missing,
|
|
157
260
|
)
|
|
158
|
-
self.print_step(step_name="Done", step_output=f"
|
|
261
|
+
self.print_step(step_name="Done", step_output=f"✓ Generation completed! Output directory: {out_dir}")
|
|
159
262
|
return out_dir
|
|
160
263
|
|
|
161
264
|
def _write_generation_report(
|
|
@@ -168,35 +271,237 @@ class DocumentationGenerationManager:
|
|
|
168
271
|
artifacts,
|
|
169
272
|
skipped: List[str],
|
|
170
273
|
):
|
|
171
|
-
# Build a
|
|
274
|
+
# Build a user-friendly markdown report
|
|
172
275
|
lines: list[str] = []
|
|
173
|
-
lines.append(f"# Documentation
|
|
174
|
-
lines.append(f"
|
|
175
|
-
lines.append(f"
|
|
176
|
-
|
|
276
|
+
lines.append(f"# Documentation Generation Report\n")
|
|
277
|
+
lines.append(f"**Repository:** {repo_url}\n")
|
|
278
|
+
lines.append(f"**Generated:** {out_dir}\n")
|
|
279
|
+
|
|
280
|
+
# Processing timeline
|
|
281
|
+
total_improvements = len(plan.planned_edits)
|
|
282
|
+
start_time_str = self._get_generation_time().split(" → ")[0] if self.start_time else "Not tracked"
|
|
283
|
+
end_time_str = self._get_generation_time().split(" → ")[1].split(" (")[0] if self.start_time else "Not tracked"
|
|
284
|
+
duration_str = self._get_generation_time().split("(")[1].replace(")", "") if self.start_time else "Not tracked"
|
|
285
|
+
|
|
286
|
+
lines.append(f"**Processing Timeline:**\n")
|
|
287
|
+
lines.append(f"- **Start Time:** {start_time_str}\n")
|
|
288
|
+
lines.append(f"- **End Time:** {end_time_str}\n")
|
|
289
|
+
lines.append(f"- **Duration:** {duration_str}\n")
|
|
290
|
+
|
|
291
|
+
# Calculate statistics by category
|
|
292
|
+
category_stats = {}
|
|
293
|
+
file_stats = {}
|
|
177
294
|
for e in plan.planned_edits:
|
|
178
295
|
sug = next((s for s in suggestions if s.id == e.suggestion_id), None)
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
296
|
+
if sug and sug.category:
|
|
297
|
+
category = sug.category.split('.')[0] # e.g., "readme.dependencies" -> "readme"
|
|
298
|
+
category_stats[category] = category_stats.get(category, 0) + 1
|
|
299
|
+
|
|
300
|
+
file_stats[e.file_path] = file_stats.get(e.file_path, 0) + 1
|
|
301
|
+
|
|
302
|
+
# Calculate evaluation report statistics
|
|
303
|
+
score_stats = {"Excellent": 0, "Good": 0, "Fair": 0, "Poor": 0}
|
|
304
|
+
processed_suggestions = set()
|
|
184
305
|
for e in plan.planned_edits:
|
|
185
|
-
|
|
306
|
+
sug = next((s for s in suggestions if s.id == e.suggestion_id), None)
|
|
307
|
+
if sug and sug.source and sug.id not in processed_suggestions:
|
|
308
|
+
score = sug.source.get("score", "")
|
|
309
|
+
if score in score_stats:
|
|
310
|
+
score_stats[score] += 1
|
|
311
|
+
processed_suggestions.add(sug.id)
|
|
312
|
+
|
|
313
|
+
# Calculate success rate based on processed suggestions only
|
|
314
|
+
processed_suggestions_count = len([s for s in suggestions if s.source and s.source.get("score", "") in ("Fair", "Poor")])
|
|
315
|
+
fixed_suggestions = len(processed_suggestions)
|
|
316
|
+
|
|
317
|
+
# Add professional summary and key metrics
|
|
318
|
+
lines.append(f"\n## Summary\n")
|
|
319
|
+
|
|
320
|
+
# Concise summary for busy developers
|
|
321
|
+
lines.append(f"This is a report of automated documentation enhancements generated by BioGuider.\n")
|
|
322
|
+
lines.append(f"\nOur AI analyzed your existing documentation to identify areas for improvement based on standards for high-quality scientific software. It then automatically rewrote the files to be more accessible and useful for biomedical researchers.\n")
|
|
323
|
+
lines.append(f"\nThis changelog provides a transparent record of what was modified and why. We encourage you to review the changes before committing. Original file versions are backed up with a `.original` extension.\n")
|
|
324
|
+
|
|
325
|
+
# Core metrics
|
|
326
|
+
total_lines_added = sum(stats.get('added_lines', 0) for stats in diff_stats.values())
|
|
327
|
+
success_rate = (fixed_suggestions/processed_suggestions_count*100) if processed_suggestions_count > 0 else 0
|
|
328
|
+
|
|
329
|
+
# Lead with success rate - the most important outcome
|
|
330
|
+
lines.append(f"\n### Key Metrics\n")
|
|
331
|
+
lines.append(f"- **Success Rate:** {success_rate:.1f}% ({fixed_suggestions} of {processed_suggestions_count} processed suggestions addressed)\n")
|
|
332
|
+
lines.append(f"- **Total Impact:** {total_improvements} improvements across {len(file_stats)} files\n")
|
|
333
|
+
lines.append(f"- **Content Added:** {total_lines_added} lines of enhanced documentation\n")
|
|
334
|
+
|
|
335
|
+
# Explain why some suggestions were filtered out
|
|
336
|
+
total_suggestions = len(suggestions)
|
|
337
|
+
filtered_count = total_suggestions - processed_suggestions_count
|
|
338
|
+
if filtered_count > 0:
|
|
339
|
+
lines.append(f"\n### Processing Strategy\n")
|
|
340
|
+
lines.append(f"- **Suggestions filtered out:** {filtered_count} items\n")
|
|
341
|
+
lines.append(f"- **Reason:** Only 'Fair' and 'Poor' priority suggestions were processed\n")
|
|
342
|
+
lines.append(f"- **Rationale:** Focus on critical issues that need immediate attention\n")
|
|
343
|
+
lines.append(f"- **Quality threshold:** 'Excellent' and 'Good' suggestions already meet standards\n")
|
|
344
|
+
|
|
345
|
+
# Priority breakdown - answer "Was it important work?"
|
|
346
|
+
lines.append(f"\n### Priority Breakdown\n")
|
|
347
|
+
priority_fixed = 0
|
|
348
|
+
priority_total = 0
|
|
186
349
|
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
lines.append(f"- {art.dest_rel_path} | status: {status} | added_lines: {added}")
|
|
350
|
+
for score in ["Poor", "Fair"]:
|
|
351
|
+
count = score_stats[score]
|
|
352
|
+
if count > 0:
|
|
353
|
+
priority_total += count
|
|
354
|
+
priority_fixed += count
|
|
355
|
+
lines.append(f"- **{score} Priority:** {count} items → 100% addressed\n")
|
|
194
356
|
|
|
195
|
-
#
|
|
357
|
+
# Remove confusing "Critical Issues" bullet - success rate already shown above
|
|
358
|
+
|
|
359
|
+
# Quality assurance note
|
|
360
|
+
excellent_count = score_stats.get("Excellent", 0)
|
|
361
|
+
good_count = score_stats.get("Good", 0)
|
|
362
|
+
if excellent_count > 0 or good_count > 0:
|
|
363
|
+
lines.append(f"\n### Quality Assurance\n")
|
|
364
|
+
lines.append(f"- **High-Quality Items:** {excellent_count + good_count} suggestions already meeting standards (no changes needed)\n")
|
|
365
|
+
|
|
366
|
+
# Group improvements by file type for better readability
|
|
367
|
+
by_file = {}
|
|
368
|
+
for e in plan.planned_edits:
|
|
369
|
+
if e.file_path not in by_file:
|
|
370
|
+
by_file[e.file_path] = []
|
|
371
|
+
by_file[e.file_path].append(e)
|
|
372
|
+
|
|
373
|
+
lines.append(f"\n## Files Improved\n")
|
|
374
|
+
for file_path, edits in by_file.items():
|
|
375
|
+
added_lines = diff_stats.get(file_path, {}).get('added_lines', 0)
|
|
376
|
+
lines.append(f"\n### {file_path}\n")
|
|
377
|
+
lines.append(f"**Changes made:** {len(edits)} improvement(s), {added_lines} lines added\n")
|
|
378
|
+
|
|
379
|
+
for e in edits:
|
|
380
|
+
sug = next((s for s in suggestions if s.id == e.suggestion_id), None)
|
|
381
|
+
guidance = sug.content_guidance if sug else ""
|
|
382
|
+
evidence = sug.source.get("evidence", "") if sug and sug.source else ""
|
|
383
|
+
section = e.anchor.get('value', 'General improvements')
|
|
384
|
+
|
|
385
|
+
# Convert technical action names to user-friendly descriptions
|
|
386
|
+
action_desc = {
|
|
387
|
+
'append_section': f'Added "{section}" section',
|
|
388
|
+
'replace_intro_block': f'Improved "{section}" section',
|
|
389
|
+
'full_replace': 'Comprehensive rewrite',
|
|
390
|
+
'add_dependencies_section': 'Added dependencies information',
|
|
391
|
+
'add_system_requirements_section': 'Added system requirements',
|
|
392
|
+
'add_hardware_requirements': 'Added hardware requirements',
|
|
393
|
+
'clarify_mandatory_vs_optional': 'Clarified dependencies',
|
|
394
|
+
'improve_readability': f'Improved readability in "{section}"',
|
|
395
|
+
'improve_setup': f'Enhanced setup instructions in "{section}"',
|
|
396
|
+
'improve_reproducibility': f'Improved reproducibility in "{section}"',
|
|
397
|
+
'improve_structure': f'Enhanced structure in "{section}"',
|
|
398
|
+
'improve_code_quality': f'Improved code quality in "{section}"',
|
|
399
|
+
'improve_verification': f'Enhanced result verification in "{section}"',
|
|
400
|
+
'improve_performance': f'Added performance notes in "{section}"',
|
|
401
|
+
'improve_clarity_and_error_handling': f'Improved clarity and error handling in "{section}"',
|
|
402
|
+
'improve_consistency': f'Improved consistency in "{section}"',
|
|
403
|
+
'improve_context': f'Enhanced context in "{section}"',
|
|
404
|
+
'improve_error_handling': f'Improved error handling in "{section}"',
|
|
405
|
+
'add_overview_section': f'Added "{section}" section'
|
|
406
|
+
}.get(e.edit_type, f'Improved {e.edit_type}')
|
|
407
|
+
|
|
408
|
+
lines.append(f"- **{action_desc}**")
|
|
409
|
+
|
|
410
|
+
# Show evaluation reasoning that triggered this improvement
|
|
411
|
+
if sug and sug.source:
|
|
412
|
+
evidence = sug.source.get("evidence", "")
|
|
413
|
+
score = sug.source.get("score", "")
|
|
414
|
+
category = sug.category or ""
|
|
415
|
+
|
|
416
|
+
# Format category for display (e.g., "readme.dependencies" -> "Dependencies")
|
|
417
|
+
category_display = category.split('.')[-1].replace('_', ' ').title() if category else ""
|
|
418
|
+
|
|
419
|
+
if evidence:
|
|
420
|
+
# Handle different evidence types
|
|
421
|
+
if isinstance(evidence, dict):
|
|
422
|
+
# Extract key information from dict evidence
|
|
423
|
+
evidence_text = evidence.get("dependency_suggestions", "") or evidence.get("evidence", "")
|
|
424
|
+
if not evidence_text:
|
|
425
|
+
evidence_text = f"Installation evaluation: {evidence.get('overall_score', 'Unknown')} score"
|
|
426
|
+
else:
|
|
427
|
+
evidence_text = str(evidence)
|
|
428
|
+
# Handle Python dict string evidence (from full_replace actions)
|
|
429
|
+
if evidence_text.startswith("{") and evidence_text.endswith("}"):
|
|
430
|
+
try:
|
|
431
|
+
import ast
|
|
432
|
+
evidence_dict = ast.literal_eval(evidence_text)
|
|
433
|
+
# Extract specific suggestions from the evaluation report
|
|
434
|
+
dep_sugg = evidence_dict.get("dependency_suggestions", "")
|
|
435
|
+
hw_req = evidence_dict.get("hardware_requirements", False)
|
|
436
|
+
compat_os = evidence_dict.get("compatible_os", True)
|
|
437
|
+
overall_score = evidence_dict.get("overall_score", "")
|
|
438
|
+
|
|
439
|
+
# Build specific reason based on evaluation findings
|
|
440
|
+
reasons = []
|
|
441
|
+
if dep_sugg:
|
|
442
|
+
reasons.append(f"Dependencies: {dep_sugg}")
|
|
443
|
+
if hw_req is False:
|
|
444
|
+
reasons.append("Hardware requirements not specified")
|
|
445
|
+
if compat_os is False:
|
|
446
|
+
reasons.append("Operating system compatibility unclear")
|
|
447
|
+
if overall_score and overall_score not in ("Excellent", "Good"):
|
|
448
|
+
reasons.append(f"Overall score: {overall_score}")
|
|
449
|
+
|
|
450
|
+
if reasons:
|
|
451
|
+
evidence_text = "; ".join(reasons)
|
|
452
|
+
else:
|
|
453
|
+
evidence_text = f"Installation evaluation score: {overall_score}"
|
|
454
|
+
except:
|
|
455
|
+
evidence_text = "Installation documentation needs improvement"
|
|
456
|
+
|
|
457
|
+
if score and category_display:
|
|
458
|
+
lines.append(f" - *Reason:* [{category_display} - {score}] {evidence_text}")
|
|
459
|
+
elif score:
|
|
460
|
+
lines.append(f" - *Reason:* [{score}] {evidence_text}")
|
|
461
|
+
elif category_display:
|
|
462
|
+
lines.append(f" - *Reason:* [{category_display}] {evidence_text}")
|
|
463
|
+
else:
|
|
464
|
+
lines.append(f" - *Reason:* {evidence_text}")
|
|
465
|
+
elif score:
|
|
466
|
+
lines.append(f" - *Reason:* Evaluation score was '{score}' - needs improvement")
|
|
467
|
+
|
|
468
|
+
# Show what was actually implemented (different from reason)
|
|
469
|
+
if guidance:
|
|
470
|
+
# Extract key action from guidance to show what was implemented
|
|
471
|
+
if "dependencies" in guidance.lower():
|
|
472
|
+
lines.append(f" - *Implemented:* Added comprehensive dependency list with installation instructions")
|
|
473
|
+
elif "system requirements" in guidance.lower() or "hardware" in guidance.lower():
|
|
474
|
+
lines.append(f" - *Implemented:* Added system requirements and platform-specific installation details")
|
|
475
|
+
elif "comparative statement" in guidance.lower() or "beneficial" in guidance.lower():
|
|
476
|
+
lines.append(f" - *Implemented:* Added comparative analysis highlighting Seurat's advantages")
|
|
477
|
+
elif "readability" in guidance.lower() or "bullet" in guidance.lower():
|
|
478
|
+
lines.append(f" - *Implemented:* Enhanced readability with structured lists and examples")
|
|
479
|
+
elif "overview" in guidance.lower() or "summary" in guidance.lower():
|
|
480
|
+
lines.append(f" - *Implemented:* Improved overview section with clear, professional tone")
|
|
481
|
+
elif "accessible" in guidance.lower() or "non-experts" in guidance.lower():
|
|
482
|
+
lines.append(f" - *Implemented:* Simplified language for broader accessibility")
|
|
483
|
+
elif "examples" in guidance.lower() or "usage" in guidance.lower():
|
|
484
|
+
lines.append(f" - *Implemented:* Added practical examples and usage scenarios")
|
|
485
|
+
elif "error" in guidance.lower() or "debug" in guidance.lower():
|
|
486
|
+
lines.append(f" - *Implemented:* Added error handling guidance and troubleshooting tips")
|
|
487
|
+
elif "context" in guidance.lower() or "scenarios" in guidance.lower():
|
|
488
|
+
lines.append(f" - *Implemented:* Expanded context and real-world application examples")
|
|
489
|
+
elif "structure" in guidance.lower() or "organization" in guidance.lower():
|
|
490
|
+
lines.append(f" - *Implemented:* Improved document structure and organization")
|
|
491
|
+
else:
|
|
492
|
+
# Truncate long guidance to avoid repetition
|
|
493
|
+
short_guidance = guidance[:100] + "..." if len(guidance) > 100 else guidance
|
|
494
|
+
lines.append(f" - *Implemented:* {short_guidance}")
|
|
495
|
+
|
|
496
|
+
lines.append("")
|
|
497
|
+
|
|
498
|
+
# Note about skipped files
|
|
196
499
|
if skipped:
|
|
197
|
-
lines.append("\n##
|
|
500
|
+
lines.append(f"\n## Note\n")
|
|
501
|
+
lines.append(f"The following files were not modified as they were not found in the repository:")
|
|
198
502
|
for rel in skipped:
|
|
199
503
|
lines.append(f"- {rel}")
|
|
504
|
+
|
|
200
505
|
report_md = "\n".join(lines)
|
|
201
506
|
dest = os.path.join(out_dir, "GENERATION_REPORT.md")
|
|
202
507
|
with open(dest, "w", encoding="utf-8") as fobj:
|
|
@@ -43,20 +43,20 @@ bioguider/conversation.py,sha256=DIvk_d7pz_guuORByK1eaaF09FAK-8shcNTrbSUHz9Y,177
|
|
|
43
43
|
bioguider/database/code_structure_db.py,sha256=q9tGZLWrjPi7a3u1b2iUnMO30lNWKbeMOkpDRffev2M,16973
|
|
44
44
|
bioguider/database/summarized_file_db.py,sha256=U60c62e2Bx7PwsTAcCQgljNxD5u5awjpj5qpHEgJbac,4801
|
|
45
45
|
bioguider/generation/__init__.py,sha256=esV02QgCsY67-HBwSHDbA5AcbKzNRIT3wDwwh6N4OFM,945
|
|
46
|
-
bioguider/generation/change_planner.py,sha256=
|
|
47
|
-
bioguider/generation/document_renderer.py,sha256
|
|
48
|
-
bioguider/generation/llm_cleaner.py,sha256=
|
|
49
|
-
bioguider/generation/llm_content_generator.py,sha256=
|
|
46
|
+
bioguider/generation/change_planner.py,sha256=Oi-a-ijxelkQ0sFOVZ3R3arNuw77uCQQKozV_2tQ8fM,16040
|
|
47
|
+
bioguider/generation/document_renderer.py,sha256=-2mij2irjJPpFL5OuWq-iLnR-q9W_OSq1U-lKm3kis0,2004
|
|
48
|
+
bioguider/generation/llm_cleaner.py,sha256=xGQ6BFDjTpzVMPsTfxoMdmJ9et28WGbAJ3Mc0OCLr54,1971
|
|
49
|
+
bioguider/generation/llm_content_generator.py,sha256=09ChJbaDwn7nM0to_Wb1lBlULE3w8J4T1wGNwE8lR4c,6928
|
|
50
50
|
bioguider/generation/llm_injector.py,sha256=bVxP6Asv2em4MBOB5yFsS14AuaeT7NLKQQMcsEqXjPY,17352
|
|
51
|
-
bioguider/generation/models.py,sha256=
|
|
52
|
-
bioguider/generation/output_manager.py,sha256=
|
|
51
|
+
bioguider/generation/models.py,sha256=MlJOLjPHk8xs-UGW-TGN_M9cevTuxTG4tjm1d1L15go,2699
|
|
52
|
+
bioguider/generation/output_manager.py,sha256=uwLyavND4kXOHlsXB0Berab3y8u6bhaEmQOQLl7wDAM,1963
|
|
53
53
|
bioguider/generation/repo_reader.py,sha256=ivTURU61fR8er4ev7gSpOxER3FJv2d9GAx_X5JoVTvQ,1177
|
|
54
|
-
bioguider/generation/report_loader.py,sha256=
|
|
54
|
+
bioguider/generation/report_loader.py,sha256=bxajeTDxod36iFsbSZhXSQjotxqP7LuAg5MC9OqX_p0,5911
|
|
55
55
|
bioguider/generation/style_analyzer.py,sha256=Vn9FAK1qJBNLolLC1tz362k4UBaPl107BlvkQc8pV2I,983
|
|
56
|
-
bioguider/generation/suggestion_extractor.py,sha256=
|
|
56
|
+
bioguider/generation/suggestion_extractor.py,sha256=X-9iobOfS_rZ3WE0_VbpnZWVjDzDn6_dfdiRh6WakZs,31106
|
|
57
57
|
bioguider/generation/test_metrics.py,sha256=ACXmSZc2L_UkkmC5h2s4tG44MXW1d-hClFwPCD5_BFI,7505
|
|
58
58
|
bioguider/managers/evaluation_manager.py,sha256=EoZ8V4rmx16zk1J3N9cNjeo0aCa7i32fLEQ3b2UolEQ,5917
|
|
59
|
-
bioguider/managers/generation_manager.py,sha256=
|
|
59
|
+
bioguider/managers/generation_manager.py,sha256=o3Pc_G6DPhCdQKKA_6sk1ngQuE_QmTZFBhLHm-Cfic8,29449
|
|
60
60
|
bioguider/managers/generation_test_manager.py,sha256=3mOBzQVpsLo_LpSspJcofn3CNtvgagS1DMr9Zuwkzq4,5307
|
|
61
61
|
bioguider/rag/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
62
62
|
bioguider/rag/config.py,sha256=5g4IqTzgyfZfax9Af9CTkXShgItPOt4_9TEMSekCPik,4602
|
|
@@ -74,7 +74,7 @@ bioguider/utils/pyphen_utils.py,sha256=cdZc3qphkvMDeL5NiZ8Xou13M_uVNP7ifJ-FwxO-0
|
|
|
74
74
|
bioguider/utils/python_file_handler.py,sha256=BERiE2RHxpu3gAzv26jr8ZQetkrtnMZOv9SjpQ7WIdg,2650
|
|
75
75
|
bioguider/utils/r_file_handler.py,sha256=8HpFaYKP8N1nItwr9tOx49m99pcLSt8EUtTNTJ7xNoE,19564
|
|
76
76
|
bioguider/utils/utils.py,sha256=h8OhCjzLpHkb3ndnjRBUOBHD7csbHdEVNXf75SRN8Zc,4413
|
|
77
|
-
bioguider-0.2.
|
|
78
|
-
bioguider-0.2.
|
|
79
|
-
bioguider-0.2.
|
|
80
|
-
bioguider-0.2.
|
|
77
|
+
bioguider-0.2.30.dist-info/LICENSE,sha256=qzkvZcKwwA5DuSuhXMOm2LcO6BdEr4V7jwFZVL2-jL4,1065
|
|
78
|
+
bioguider-0.2.30.dist-info/METADATA,sha256=3GXSzaWKZkRKQ84-zzTKNbZfEiW2NVYKyYxR-PztVMg,1962
|
|
79
|
+
bioguider-0.2.30.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
|
80
|
+
bioguider-0.2.30.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|