bioguider 0.2.52__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. bioguider/__init__.py +0 -0
  2. bioguider/agents/__init__.py +0 -0
  3. bioguider/agents/agent_task.py +92 -0
  4. bioguider/agents/agent_tools.py +176 -0
  5. bioguider/agents/agent_utils.py +504 -0
  6. bioguider/agents/collection_execute_step.py +182 -0
  7. bioguider/agents/collection_observe_step.py +125 -0
  8. bioguider/agents/collection_plan_step.py +156 -0
  9. bioguider/agents/collection_task.py +184 -0
  10. bioguider/agents/collection_task_utils.py +142 -0
  11. bioguider/agents/common_agent.py +137 -0
  12. bioguider/agents/common_agent_2step.py +215 -0
  13. bioguider/agents/common_conversation.py +61 -0
  14. bioguider/agents/common_step.py +85 -0
  15. bioguider/agents/consistency_collection_step.py +102 -0
  16. bioguider/agents/consistency_evaluation_task.py +57 -0
  17. bioguider/agents/consistency_evaluation_task_utils.py +14 -0
  18. bioguider/agents/consistency_observe_step.py +110 -0
  19. bioguider/agents/consistency_query_step.py +77 -0
  20. bioguider/agents/dockergeneration_execute_step.py +186 -0
  21. bioguider/agents/dockergeneration_observe_step.py +154 -0
  22. bioguider/agents/dockergeneration_plan_step.py +158 -0
  23. bioguider/agents/dockergeneration_task.py +158 -0
  24. bioguider/agents/dockergeneration_task_utils.py +220 -0
  25. bioguider/agents/evaluation_installation_task.py +270 -0
  26. bioguider/agents/evaluation_readme_task.py +767 -0
  27. bioguider/agents/evaluation_submission_requirements_task.py +172 -0
  28. bioguider/agents/evaluation_task.py +206 -0
  29. bioguider/agents/evaluation_tutorial_task.py +169 -0
  30. bioguider/agents/evaluation_tutorial_task_prompts.py +187 -0
  31. bioguider/agents/evaluation_userguide_prompts.py +179 -0
  32. bioguider/agents/evaluation_userguide_task.py +154 -0
  33. bioguider/agents/evaluation_utils.py +127 -0
  34. bioguider/agents/identification_execute_step.py +181 -0
  35. bioguider/agents/identification_observe_step.py +104 -0
  36. bioguider/agents/identification_plan_step.py +140 -0
  37. bioguider/agents/identification_task.py +270 -0
  38. bioguider/agents/identification_task_utils.py +22 -0
  39. bioguider/agents/peo_common_step.py +64 -0
  40. bioguider/agents/prompt_utils.py +253 -0
  41. bioguider/agents/python_ast_repl_tool.py +69 -0
  42. bioguider/agents/rag_collection_task.py +130 -0
  43. bioguider/conversation.py +67 -0
  44. bioguider/database/code_structure_db.py +500 -0
  45. bioguider/database/summarized_file_db.py +146 -0
  46. bioguider/generation/__init__.py +39 -0
  47. bioguider/generation/benchmark_metrics.py +610 -0
  48. bioguider/generation/change_planner.py +189 -0
  49. bioguider/generation/document_renderer.py +157 -0
  50. bioguider/generation/llm_cleaner.py +67 -0
  51. bioguider/generation/llm_content_generator.py +1128 -0
  52. bioguider/generation/llm_injector.py +809 -0
  53. bioguider/generation/models.py +85 -0
  54. bioguider/generation/output_manager.py +74 -0
  55. bioguider/generation/repo_reader.py +37 -0
  56. bioguider/generation/report_loader.py +166 -0
  57. bioguider/generation/style_analyzer.py +36 -0
  58. bioguider/generation/suggestion_extractor.py +436 -0
  59. bioguider/generation/test_metrics.py +189 -0
  60. bioguider/managers/benchmark_manager.py +785 -0
  61. bioguider/managers/evaluation_manager.py +215 -0
  62. bioguider/managers/generation_manager.py +686 -0
  63. bioguider/managers/generation_test_manager.py +107 -0
  64. bioguider/managers/generation_test_manager_v2.py +525 -0
  65. bioguider/rag/__init__.py +0 -0
  66. bioguider/rag/config.py +117 -0
  67. bioguider/rag/data_pipeline.py +651 -0
  68. bioguider/rag/embedder.py +24 -0
  69. bioguider/rag/rag.py +138 -0
  70. bioguider/settings.py +103 -0
  71. bioguider/utils/code_structure_builder.py +59 -0
  72. bioguider/utils/constants.py +135 -0
  73. bioguider/utils/default.gitignore +140 -0
  74. bioguider/utils/file_utils.py +215 -0
  75. bioguider/utils/gitignore_checker.py +175 -0
  76. bioguider/utils/notebook_utils.py +117 -0
  77. bioguider/utils/pyphen_utils.py +73 -0
  78. bioguider/utils/python_file_handler.py +65 -0
  79. bioguider/utils/r_file_handler.py +551 -0
  80. bioguider/utils/utils.py +163 -0
  81. bioguider-0.2.52.dist-info/LICENSE +21 -0
  82. bioguider-0.2.52.dist-info/METADATA +51 -0
  83. bioguider-0.2.52.dist-info/RECORD +84 -0
  84. bioguider-0.2.52.dist-info/WHEEL +4 -0
@@ -0,0 +1,107 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import json
5
+ from typing import Tuple
6
+
7
+ from bioguider.generation.llm_injector import LLMErrorInjector
8
+ from bioguider.generation.test_metrics import evaluate_fixes
9
+ from bioguider.managers.generation_manager import DocumentationGenerationManager
10
+ from bioguider.agents.agent_utils import read_file, write_file
11
+
12
+
13
+ class GenerationTestManager:
14
+ def __init__(self, llm, step_callback):
15
+ self.llm = llm
16
+ self.step_output = step_callback
17
+
18
+ def print_step(self, name: str, out: str | None = None):
19
+ if self.step_output:
20
+ self.step_output(step_name=name, step_output=out)
21
+
22
+ def run_quant_test(self, report_path: str, baseline_repo_path: str, tmp_repo_path: str, min_per_category: int = 3) -> str:
23
+ self.print_step("QuantTest:LoadBaseline", baseline_repo_path)
24
+ baseline_readme_path = os.path.join(baseline_repo_path, "README.md")
25
+ baseline = read_file(baseline_readme_path) or ""
26
+
27
+ self.print_step("QuantTest:Inject")
28
+ injector = LLMErrorInjector(self.llm)
29
+ corrupted, inj_manifest = injector.inject(baseline, min_per_category=min_per_category)
30
+
31
+ # write corrupted into tmp repo path
32
+ os.makedirs(tmp_repo_path, exist_ok=True)
33
+ corrupted_readme_path = os.path.join(tmp_repo_path, "README.md")
34
+ write_file(corrupted_readme_path, corrupted)
35
+ inj_path = os.path.join(tmp_repo_path, "INJECTION_MANIFEST.json")
36
+ with open(inj_path, "w", encoding="utf-8") as fobj:
37
+ json.dump(inj_manifest, fobj, indent=2)
38
+
39
+ self.print_step("QuantTest:Generate")
40
+ gen = DocumentationGenerationManager(self.llm, self.step_output)
41
+ out_dir = gen.run(report_path=report_path, repo_path=tmp_repo_path)
42
+
43
+ # read revised
44
+ revised_readme_path = os.path.join(out_dir, "README.md")
45
+ revised = read_file(revised_readme_path) or ""
46
+
47
+ self.print_step("QuantTest:Evaluate")
48
+ results = evaluate_fixes(baseline, corrupted, revised, inj_manifest)
49
+ # write results
50
+ with open(os.path.join(out_dir, "GEN_TEST_RESULTS.json"), "w", encoding="utf-8") as fobj:
51
+ json.dump(results, fobj, indent=2)
52
+ # slides-like markdown report
53
+ totals = results.get("summary", {}).get("totals", {})
54
+ success_rate = results.get("summary", {}).get("success_rate", 0.0)
55
+ lines = ["# 🔬 Quantifiable Testing Results\n",
56
+ "\n## BioGuider Error Correction Performance Analysis\n",
57
+ "\n---\n",
58
+ "\n## 📊 Slide 1: Testing Results Overview\n",
59
+ "\n### 🎯 Totals\n",
60
+ f"- Total Errors: {totals.get('total_errors', 0)}\n",
61
+ f"- Fixed to Baseline: {totals.get('fixed_to_baseline', 0)}\n",
62
+ f"- Fixed to Valid: {totals.get('fixed_to_valid', 0)}\n",
63
+ f"- Unchanged: {totals.get('unchanged', 0)}\n",
64
+ f"- Success Rate: {success_rate}%\n",
65
+ "\n### 📂 Per-Category Metrics\n"]
66
+ for cat, m in results["per_category"].items():
67
+ lines.append(f"- {cat}: total={m.get('total',0)}, fixed_to_baseline={m.get('fixed_to_baseline',0)}, fixed_to_valid={m.get('fixed_to_valid',0)}, unchanged={m.get('unchanged',0)}")
68
+ # Per-file change counts (simple heuristic from manifest artifacts)
69
+ try:
70
+ manifest_path = os.path.join(out_dir, "manifest.json")
71
+ with open(manifest_path, "r", encoding="utf-8") as mf:
72
+ mani = json.load(mf)
73
+ lines.append("\n### 🗂️ Per-File Changes\n")
74
+ for art in mani.get("artifacts", []):
75
+ rel = art.get("dest_rel_path")
76
+ stats = art.get("diff_stats", {})
77
+ added = stats.get("added_lines", 0)
78
+ status = "Revised" if added and added > 0 else "Copied"
79
+ lines.append(f"- {rel}: {status}, added_lines={added}")
80
+ except Exception:
81
+ pass
82
+ lines.append("\n---\n\n## 📝 Notes\n")
83
+ lines.append("- README versions saved: README.original.md, README.corrupted.md, README.md (fixed).\n")
84
+ with open(os.path.join(out_dir, "GEN_TEST_REPORT.md"), "w", encoding="utf-8") as fobj:
85
+ fobj.write("\n".join(lines))
86
+ # Save versioned files into output dir
87
+ write_file(os.path.join(out_dir, "README.original.md"), baseline)
88
+ write_file(os.path.join(out_dir, "README.corrupted.md"), corrupted)
89
+ # Copy injection manifest
90
+ try:
91
+ with open(inj_path, "r", encoding="utf-8") as fin:
92
+ with open(os.path.join(out_dir, "INJECTION_MANIFEST.json"), "w", encoding="utf-8") as fout:
93
+ fout.write(fin.read())
94
+ except Exception:
95
+ pass
96
+ self.print_step("QuantTest:Done", out_dir)
97
+ return out_dir
98
+
99
+ def run_quant_suite(self, report_path: str, baseline_repo_path: str, base_tmp_repo_path: str, levels: dict[str, int]) -> dict:
100
+ results = {}
101
+ for level, min_cnt in levels.items():
102
+ tmp_repo_path = f"{base_tmp_repo_path}_{level}"
103
+ out_dir = self.run_quant_test(report_path, baseline_repo_path, tmp_repo_path, min_per_category=min_cnt)
104
+ results[level] = out_dir
105
+ return results
106
+
107
+
@@ -0,0 +1,525 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import json
5
+ import shutil
6
+ from typing import Dict, List, Tuple
7
+ from pathlib import Path
8
+
9
+ from bioguider.generation.llm_injector import LLMErrorInjector
10
+ from bioguider.managers.generation_manager import DocumentationGenerationManager
11
+ from bioguider.agents.agent_utils import read_file, write_file
12
+
13
+
14
+ class GenerationTestManagerV2:
15
+ """
16
+ Enhanced version that:
17
+ 1. Injects errors into ALL files in multiple categories (README, ALL tutorials, ALL userguides, ALL installation docs)
18
+ 2. Tracks errors comprehensively across all files with detailed per-file and per-category statistics
19
+ 3. Provides detailed statistics on injected, detected, and fixed errors
20
+ 4. Simplifies reporting (fixed vs unchanged, no confusing dual-status)
21
+ 5. Saves corrupted, original, and fixed versions for each file for full audit trail
22
+ """
23
+
24
+ def __init__(self, llm, step_callback):
25
+ self.llm = llm
26
+ self.step_output = step_callback
27
+
28
+ def print_step(self, name: str, out: str | None = None):
29
+ if self.step_output:
30
+ self.step_output(step_name=name, step_output=out)
31
+
32
+ def _select_target_files(self, baseline_repo_path: str) -> Dict[str, List[str]]:
33
+ """
34
+ Select target files for error injection across multiple categories.
35
+
36
+ Returns:
37
+ Dict mapping category names to file paths
38
+ """
39
+ targets = {
40
+ "readme": [],
41
+ "tutorial": [],
42
+ "userguide": [],
43
+ "installation": []
44
+ }
45
+
46
+ # README files
47
+ readme_path = os.path.join(baseline_repo_path, "README.md")
48
+ if os.path.exists(readme_path):
49
+ targets["readme"].append(readme_path)
50
+
51
+ # Tutorial files (RMarkdown vignettes) - ALL FILES
52
+ vignettes_dir = os.path.join(baseline_repo_path, "vignettes")
53
+ if os.path.isdir(vignettes_dir):
54
+ tutorial_files = []
55
+ for f in os.listdir(vignettes_dir):
56
+ if f.endswith('.Rmd') and not f.startswith('.'):
57
+ tutorial_files.append(os.path.join(vignettes_dir, f))
58
+ # Inject into ALL tutorial files
59
+ targets["tutorial"] = sorted(tutorial_files)
60
+
61
+ # Installation files
62
+ install_files = []
63
+ for pattern in ["install", "INSTALL", "installation"]:
64
+ for ext in [".md", ".Rmd", ".rst"]:
65
+ fpath = os.path.join(baseline_repo_path, pattern + ext)
66
+ if os.path.exists(fpath):
67
+ install_files.append(fpath)
68
+
69
+ # Also check vignettes for installation guides
70
+ if os.path.isdir(vignettes_dir):
71
+ for f in os.listdir(vignettes_dir):
72
+ if "install" in f.lower() and (f.endswith('.Rmd') or f.endswith('.md')):
73
+ fpath = os.path.join(vignettes_dir, f)
74
+ if fpath not in install_files: # Avoid duplicates
75
+ install_files.append(fpath)
76
+
77
+ targets["installation"] = install_files # ALL installation docs
78
+
79
+ # Userguide files - ALL FILES
80
+ docs_dir = os.path.join(baseline_repo_path, "docs")
81
+ if os.path.isdir(docs_dir):
82
+ userguide_files = []
83
+ for f in os.listdir(docs_dir):
84
+ if f.endswith('.md') and not f.startswith('.'):
85
+ userguide_files.append(os.path.join(docs_dir, f))
86
+ targets["userguide"] = userguide_files # ALL userguide files
87
+
88
+ return targets
89
+
90
+ def _extract_project_terms(self, repo_path: str) -> List[str]:
91
+ """
92
+ Extract function names and key terms from the codebase to use as injection targets.
93
+ """
94
+ import re
95
+ from collections import Counter
96
+
97
+ terms = Counter()
98
+
99
+ # Walk through the repo
100
+ for root, _, files in os.walk(repo_path):
101
+ if ".git" in root or "__pycache__" in root:
102
+ continue
103
+
104
+ for file in files:
105
+ fpath = os.path.join(root, file)
106
+ try:
107
+ content = read_file(fpath)
108
+ if not content:
109
+ continue
110
+
111
+ if file.endswith(".py"):
112
+ # Python function definitions
113
+ funcs = re.findall(r"def\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*\(", content)
114
+ terms.update(funcs)
115
+ # Python class definitions
116
+ classes = re.findall(r"class\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*[:\(]", content)
117
+ terms.update(classes)
118
+
119
+ elif file.endswith(".R"):
120
+ # R function definitions
121
+ funcs = re.findall(r"([a-zA-Z_.][a-zA-Z0-9_.]*)\s*<-\s*function", content)
122
+ terms.update(funcs)
123
+
124
+ except Exception:
125
+ continue
126
+
127
+ # Filter out common/short terms
128
+ filtered_terms = [t for t, _ in terms.most_common(50) if len(t) > 4 and t not in ["init", "self", "setup", "test", "main"]]
129
+ return filtered_terms[:20]
130
+
131
+ def _inject_errors_into_files(
132
+ self,
133
+ target_files: Dict[str, List[str]],
134
+ tmp_repo_path: str,
135
+ min_per_category: int
136
+ ) -> Dict[str, Dict]:
137
+ """
138
+ Inject errors into selected files.
139
+
140
+ Returns:
141
+ Dict mapping file paths to injection manifests
142
+ """
143
+ injector = LLMErrorInjector(self.llm)
144
+ all_manifests = {}
145
+
146
+ # Extract project terms once
147
+ project_terms = self._extract_project_terms(tmp_repo_path)
148
+ self.print_step("ExtractTerms", f"Found {len(project_terms)} project terms: {', '.join(project_terms[:5])}...")
149
+
150
+ for category, file_list in target_files.items():
151
+ self.print_step(f"InjectErrors:{category.title()}", f"Injecting {min_per_category} errors per file into {len(file_list)} files")
152
+
153
+ for fpath in file_list:
154
+ if not os.path.exists(fpath):
155
+ continue
156
+
157
+ baseline_content = read_file(fpath) or ""
158
+ if not baseline_content.strip():
159
+ continue
160
+
161
+ try:
162
+ # Inject errors
163
+ corrupted, manifest = injector.inject(
164
+ baseline_content,
165
+ min_per_category=min_per_category,
166
+ project_terms=project_terms
167
+ )
168
+
169
+ # Save corrupted version to tmp repo
170
+ rel_path = os.path.relpath(fpath, os.path.dirname(os.path.dirname(fpath)))
171
+ if rel_path.startswith("../"):
172
+ rel_path = os.path.basename(fpath)
173
+
174
+ corrupted_path = os.path.join(tmp_repo_path, rel_path)
175
+ os.makedirs(os.path.dirname(corrupted_path), exist_ok=True)
176
+ write_file(corrupted_path, corrupted)
177
+
178
+ # Track manifest - add file_path to each error for tracking
179
+ errors_with_file = []
180
+ for error in manifest.get("errors", []):
181
+ error_with_file = error.copy()
182
+ error_with_file["file_path"] = rel_path
183
+ errors_with_file.append(error_with_file)
184
+
185
+ manifest_with_file = manifest.copy()
186
+ manifest_with_file["errors"] = errors_with_file
187
+
188
+ all_manifests[rel_path] = {
189
+ "category": category,
190
+ "original_path": fpath,
191
+ "corrupted_path": corrupted_path,
192
+ "manifest": manifest_with_file,
193
+ "baseline_content": baseline_content,
194
+ "corrupted_content": corrupted
195
+ }
196
+
197
+ self.print_step(
198
+ f"Injected:{os.path.basename(fpath)}",
199
+ f"{len(manifest.get('errors', []))} errors"
200
+ )
201
+ except Exception as e:
202
+ self.print_step(f"InjectionError:{os.path.basename(fpath)}", str(e))
203
+ continue
204
+
205
+ return all_manifests
206
+
207
+ def _evaluate_all_fixes(
208
+ self,
209
+ all_manifests: Dict[str, Dict],
210
+ output_dir: str
211
+ ) -> Dict:
212
+ """
213
+ Evaluate fixes across all injected files.
214
+
215
+ Returns comprehensive statistics.
216
+ """
217
+ from bioguider.generation.test_metrics import evaluate_fixes
218
+
219
+ all_results = {
220
+ "per_file": {},
221
+ "aggregate": {
222
+ "total_files_injected": len(all_manifests),
223
+ "total_errors_injected": 0,
224
+ "total_errors_fixed": 0,
225
+ "total_errors_unchanged": 0,
226
+ "by_category": {},
227
+ "by_file_type": {}
228
+ },
229
+ "detailed_errors": []
230
+ }
231
+
232
+ for rel_path, info in all_manifests.items():
233
+ # Read the fixed version
234
+ fixed_path = os.path.join(output_dir, rel_path)
235
+ if not os.path.exists(fixed_path):
236
+ # File wasn't processed - copy original
237
+ fixed_content = info["baseline_content"]
238
+ else:
239
+ fixed_content = read_file(fixed_path) or info["baseline_content"]
240
+
241
+ # Evaluate fixes for this file
242
+ results = evaluate_fixes(
243
+ info["baseline_content"],
244
+ info["corrupted_content"],
245
+ fixed_content,
246
+ info["manifest"]
247
+ )
248
+
249
+ # Store per-file results
250
+ all_results["per_file"][rel_path] = {
251
+ "category": info["category"],
252
+ "results": results
253
+ }
254
+
255
+ # Aggregate statistics
256
+ totals = results.get("summary", {}).get("totals", {})
257
+ file_total_errors = totals.get("total_errors", 0)
258
+ file_fixed = totals.get("fixed_to_baseline", 0) + totals.get("fixed_to_valid", 0)
259
+ file_unchanged = totals.get("unchanged", 0)
260
+
261
+ all_results["aggregate"]["total_errors_injected"] += file_total_errors
262
+ all_results["aggregate"]["total_errors_fixed"] += file_fixed
263
+ all_results["aggregate"]["total_errors_unchanged"] += file_unchanged
264
+
265
+ # By file type
266
+ file_cat = info["category"]
267
+ if file_cat not in all_results["aggregate"]["by_file_type"]:
268
+ all_results["aggregate"]["by_file_type"][file_cat] = {
269
+ "files": 0,
270
+ "errors_injected": 0,
271
+ "errors_fixed": 0,
272
+ "errors_unchanged": 0
273
+ }
274
+
275
+ all_results["aggregate"]["by_file_type"][file_cat]["files"] += 1
276
+ all_results["aggregate"]["by_file_type"][file_cat]["errors_injected"] += file_total_errors
277
+ all_results["aggregate"]["by_file_type"][file_cat]["errors_fixed"] += file_fixed
278
+ all_results["aggregate"]["by_file_type"][file_cat]["errors_unchanged"] += file_unchanged
279
+
280
+ # By error category
281
+ for err_cat, metrics in results.get("per_category", {}).items():
282
+ if err_cat not in all_results["aggregate"]["by_category"]:
283
+ all_results["aggregate"]["by_category"][err_cat] = {
284
+ "total": 0,
285
+ "fixed": 0,
286
+ "unchanged": 0
287
+ }
288
+
289
+ all_results["aggregate"]["by_category"][err_cat]["total"] += metrics.get("total", 0)
290
+ all_results["aggregate"]["by_category"][err_cat]["fixed"] += (
291
+ metrics.get("fixed_to_baseline", 0) + metrics.get("fixed_to_valid", 0)
292
+ )
293
+ all_results["aggregate"]["by_category"][err_cat]["unchanged"] += metrics.get("unchanged", 0)
294
+
295
+ # Collect detailed errors
296
+ for err in results.get("per_error", []):
297
+ err_detail = {
298
+ "file": rel_path,
299
+ "file_category": file_cat,
300
+ **err
301
+ }
302
+ # Simplify status
303
+ if err["status"] in ("fixed_to_baseline", "fixed_to_valid"):
304
+ err_detail["status"] = "fixed"
305
+ all_results["detailed_errors"].append(err_detail)
306
+
307
+ # Calculate aggregate success rate
308
+ total_errors = all_results["aggregate"]["total_errors_injected"]
309
+ fixed_errors = all_results["aggregate"]["total_errors_fixed"]
310
+ all_results["aggregate"]["success_rate"] = (
311
+ round((fixed_errors / total_errors * 100.0), 2) if total_errors > 0 else 0.0
312
+ )
313
+
314
+ return all_results
315
+
316
+ def _generate_comprehensive_report(
317
+ self,
318
+ results: Dict,
319
+ output_dir: str,
320
+ level: str
321
+ ):
322
+ """Generate a comprehensive markdown report"""
323
+ agg = results["aggregate"]
324
+
325
+ lines = [
326
+ "# 🔬 BioGuider Quantifiable Testing Results\n",
327
+ f"**Test Level**: {level.upper()}\n",
328
+ "\n---\n",
329
+ "\n## 📊 Executive Summary\n",
330
+ f"\n### Overall Performance\n",
331
+ f"- **Success Rate**: {agg['success_rate']}%\n",
332
+ f"- **Total Files Tested**: {agg['total_files_injected']}\n",
333
+ f"- **Total Errors Injected**: {agg['total_errors_injected']}\n",
334
+ f"- **Errors Fixed**: {agg['total_errors_fixed']} ({round(agg['total_errors_fixed']/agg['total_errors_injected']*100, 1) if agg['total_errors_injected'] > 0 else 0}%)\n",
335
+ f"- **Errors Unchanged**: {agg['total_errors_unchanged']} ({round(agg['total_errors_unchanged']/agg['total_errors_injected']*100, 1) if agg['total_errors_injected'] > 0 else 0}%)\n",
336
+ "\n---\n",
337
+ "\n## 📂 Performance by File Type\n",
338
+ ]
339
+
340
+ for file_type, metrics in sorted(agg["by_file_type"].items()):
341
+ fix_rate = (metrics["errors_fixed"] / metrics["errors_injected"] * 100) if metrics["errors_injected"] > 0 else 0
342
+ lines.append(f"\n### {file_type.title()}\n")
343
+ lines.append(f"- Files Tested: {metrics['files']}\n")
344
+ lines.append(f"- Errors Injected: {metrics['errors_injected']}\n")
345
+ lines.append(f"- Errors Fixed: {metrics['errors_fixed']} ({fix_rate:.1f}%)\n")
346
+ lines.append(f"- Errors Unchanged: {metrics['errors_unchanged']}\n")
347
+
348
+ lines.append("\n---\n")
349
+ lines.append("\n## 🏷️ Performance by Error Category\n")
350
+ lines.append("\n| Category | Total | Fixed | Unchanged | Fix Rate |\n")
351
+ lines.append("|----------|-------|-------|-----------|----------|\n")
352
+
353
+ for err_cat, metrics in sorted(agg["by_category"].items(), key=lambda x: -x[1]["total"]):
354
+ fix_rate = (metrics["fixed"] / metrics["total"] * 100) if metrics["total"] > 0 else 0
355
+ lines.append(
356
+ f"| {err_cat} | {metrics['total']} | {metrics['fixed']} | "
357
+ f"{metrics['unchanged']} | {fix_rate:.1f}% |\n"
358
+ )
359
+
360
+ lines.append("\n---\n")
361
+ lines.append("\n## 📝 Detailed Error Breakdown\n")
362
+
363
+ # Group by file
364
+ by_file = {}
365
+ for err in results["detailed_errors"]:
366
+ fpath = err["file"]
367
+ if fpath not in by_file:
368
+ by_file[fpath] = []
369
+ by_file[fpath].append(err)
370
+
371
+ for fpath, errors in sorted(by_file.items()):
372
+ fixed_count = sum(1 for e in errors if e["status"] == "fixed")
373
+ total_count = len(errors)
374
+ lines.append(f"\n### `{fpath}`\n")
375
+ lines.append(f"- **Total Errors**: {total_count}\n")
376
+ lines.append(f"- **Fixed**: {fixed_count}\n")
377
+ lines.append(f"- **Unchanged**: {total_count - fixed_count}\n")
378
+ lines.append("\n| ID | Category | Status |\n")
379
+ lines.append("|--------|----------|--------|\n")
380
+ for err in errors:
381
+ lines.append(f"| {err['id']} | {err['category']} | {err['status']} |\n")
382
+
383
+ lines.append("\n---\n")
384
+ lines.append("\n## 💡 Notes\n")
385
+ lines.append("- Original, corrupted, and fixed versions saved for each file\n")
386
+ lines.append("- Detailed injection manifests available in `INJECTION_MANIFEST.json`\n")
387
+ lines.append("- Complete results data in `GEN_TEST_RESULTS.json`\n")
388
+
389
+ with open(os.path.join(output_dir, "GEN_TEST_REPORT.md"), "w", encoding="utf-8") as f:
390
+ f.write("".join(lines))
391
+
392
+ def run_quant_test(
393
+ self,
394
+ report_path: str,
395
+ baseline_repo_path: str,
396
+ tmp_repo_path: str,
397
+ min_per_category: int = 3
398
+ ) -> str:
399
+ """
400
+ Run quantifiable testing with multi-file error injection.
401
+ """
402
+ # 1. Select target files across categories
403
+ self.print_step("SelectFiles", "Identifying target files...")
404
+ target_files = self._select_target_files(baseline_repo_path)
405
+
406
+ total_targets = sum(len(files) for files in target_files.values())
407
+ self.print_step("TargetsSelected", f"{total_targets} files selected across {len(target_files)} categories")
408
+
409
+ # 2. Copy baseline to tmp (for unmodified files)
410
+ if os.path.exists(tmp_repo_path):
411
+ shutil.rmtree(tmp_repo_path)
412
+ shutil.copytree(baseline_repo_path, tmp_repo_path, symlinks=False, ignore=shutil.ignore_patterns('.git'))
413
+
414
+ # 3. Inject errors into selected files
415
+ self.print_step("InjectErrors", f"Injecting {min_per_category} errors per category...")
416
+ all_manifests = self._inject_errors_into_files(target_files, tmp_repo_path, min_per_category)
417
+
418
+ total_errors = sum(len(info["manifest"].get("errors", [])) for info in all_manifests.values())
419
+ self.print_step("InjectionComplete", f"{total_errors} errors injected across {len(all_manifests)} files")
420
+
421
+ # Save combined injection manifest with proper structure
422
+ # Flatten all errors with file information for easy tracking
423
+ all_errors_flat = []
424
+ files_info = {}
425
+ for rel_path, info in all_manifests.items():
426
+ file_errors = info["manifest"].get("errors", [])
427
+ files_info[rel_path] = {
428
+ "category": info["category"],
429
+ "original_path": info["original_path"],
430
+ "corrupted_path": info["corrupted_path"],
431
+ "error_count": len(file_errors),
432
+ "errors": file_errors
433
+ }
434
+ all_errors_flat.extend(file_errors)
435
+
436
+ combined_manifest = {
437
+ "total_files": len(all_manifests),
438
+ "total_errors": total_errors,
439
+ "files": files_info,
440
+ "errors": all_errors_flat # Flat list for easy evaluation
441
+ }
442
+ inj_path = os.path.join(tmp_repo_path, "INJECTION_MANIFEST.json")
443
+ with open(inj_path, "w", encoding="utf-8") as f:
444
+ json.dump(combined_manifest, f, indent=2)
445
+
446
+ # 4. Run generation/fixing
447
+ self.print_step("RunGeneration", "Running BioGuider to fix errors...")
448
+ gen = DocumentationGenerationManager(self.llm, self.step_output)
449
+ out_dir = gen.run(report_path=report_path, repo_path=tmp_repo_path)
450
+
451
+ # 5. Evaluate fixes
452
+ self.print_step("EvaluateFixes", "Evaluating error corrections...")
453
+ results = self._evaluate_all_fixes(all_manifests, out_dir)
454
+
455
+ # 6. Save results
456
+ with open(os.path.join(out_dir, "GEN_TEST_RESULTS.json"), "w", encoding="utf-8") as f:
457
+ json.dump(results, f, indent=2)
458
+
459
+ # Copy injection manifest to output
460
+ shutil.copy(inj_path, os.path.join(out_dir, "INJECTION_MANIFEST.json"))
461
+
462
+ # 7. Generate report
463
+ level = "custom"
464
+ if min_per_category <= 3:
465
+ level = "low"
466
+ elif min_per_category <= 7:
467
+ level = "mid"
468
+ else:
469
+ level = "high"
470
+
471
+ self._generate_comprehensive_report(results, out_dir, level)
472
+
473
+ # 8. Save versioned baseline files (original and corrupted versions)
474
+ for rel_path, info in all_manifests.items():
475
+ base_name = os.path.basename(rel_path)
476
+ base_dir = os.path.dirname(rel_path)
477
+
478
+ # Extract file extension properly
479
+ if '.' in base_name:
480
+ name_parts = base_name.rsplit('.', 1)
481
+ base_name_no_ext = name_parts[0]
482
+ ext = '.' + name_parts[1]
483
+ else:
484
+ base_name_no_ext = base_name
485
+ ext = ''
486
+
487
+ # Create original and corrupted filenames
488
+ orig_name = f"{base_name_no_ext}.original{ext}"
489
+ corr_name = f"{base_name_no_ext}.corrupted{ext}"
490
+
491
+ # Determine save directory - preserve directory structure
492
+ if base_name == "README.md":
493
+ # Special handling for README - save at root level
494
+ save_dir = out_dir
495
+ else:
496
+ # Save in same directory structure as original
497
+ save_dir = os.path.join(out_dir, base_dir) if base_dir else out_dir
498
+
499
+ os.makedirs(save_dir, exist_ok=True)
500
+
501
+ # Save original and corrupted versions
502
+ write_file(os.path.join(save_dir, orig_name), info["baseline_content"])
503
+ write_file(os.path.join(save_dir, corr_name), info["corrupted_content"])
504
+
505
+ self.print_step("TestComplete", f"Results saved to {out_dir}")
506
+ return out_dir
507
+
508
+ def run_quant_suite(
509
+ self,
510
+ report_path: str,
511
+ baseline_repo_path: str,
512
+ base_tmp_repo_path: str,
513
+ levels: dict[str, int]
514
+ ) -> dict:
515
+ """
516
+ Run test suite across multiple levels.
517
+ """
518
+ results = {}
519
+ for level, min_cnt in levels.items():
520
+ self.print_step(f"RunLevel:{level.upper()}", f"Running with {min_cnt} errors per file")
521
+ tmp_repo_path = f"{base_tmp_repo_path}_{level}"
522
+ out_dir = self.run_quant_test(report_path, baseline_repo_path, tmp_repo_path, min_per_category=min_cnt)
523
+ results[level] = out_dir
524
+ return results
525
+
File without changes