bioguider 0.2.19__py3-none-any.whl → 0.2.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of bioguider might be problematic. Click here for more details.

Files changed (35) hide show
  1. bioguider/agents/agent_utils.py +5 -3
  2. bioguider/agents/collection_execute_step.py +1 -1
  3. bioguider/agents/common_conversation.py +20 -2
  4. bioguider/agents/consistency_collection_execute_step.py +152 -0
  5. bioguider/agents/consistency_collection_observe_step.py +128 -0
  6. bioguider/agents/consistency_collection_plan_step.py +128 -0
  7. bioguider/agents/consistency_collection_task.py +109 -0
  8. bioguider/agents/consistency_collection_task_utils.py +137 -0
  9. bioguider/agents/evaluation_task.py +2 -2
  10. bioguider/agents/evaluation_userguide_prompts.py +162 -0
  11. bioguider/agents/evaluation_userguide_task.py +164 -0
  12. bioguider/agents/prompt_utils.py +11 -8
  13. bioguider/database/code_structure_db.py +489 -0
  14. bioguider/generation/__init__.py +39 -0
  15. bioguider/generation/change_planner.py +140 -0
  16. bioguider/generation/document_renderer.py +47 -0
  17. bioguider/generation/llm_cleaner.py +43 -0
  18. bioguider/generation/llm_content_generator.py +69 -0
  19. bioguider/generation/llm_injector.py +270 -0
  20. bioguider/generation/models.py +77 -0
  21. bioguider/generation/output_manager.py +54 -0
  22. bioguider/generation/repo_reader.py +37 -0
  23. bioguider/generation/report_loader.py +151 -0
  24. bioguider/generation/style_analyzer.py +36 -0
  25. bioguider/generation/suggestion_extractor.py +136 -0
  26. bioguider/generation/test_metrics.py +104 -0
  27. bioguider/managers/evaluation_manager.py +24 -0
  28. bioguider/managers/generation_manager.py +160 -0
  29. bioguider/managers/generation_test_manager.py +74 -0
  30. bioguider/utils/code_structure_builder.py +42 -0
  31. bioguider/utils/file_handler.py +65 -0
  32. {bioguider-0.2.19.dist-info → bioguider-0.2.20.dist-info}/METADATA +1 -1
  33. {bioguider-0.2.19.dist-info → bioguider-0.2.20.dist-info}/RECORD +35 -10
  34. {bioguider-0.2.19.dist-info → bioguider-0.2.20.dist-info}/LICENSE +0 -0
  35. {bioguider-0.2.19.dist-info → bioguider-0.2.20.dist-info}/WHEEL +0 -0
@@ -0,0 +1,151 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from typing import Tuple, Dict, Any
5
+
6
+ from .models import EvaluationReport
7
+
8
+
9
+ class EvaluationReportLoader:
10
+ def _parse_bool(self, token: str) -> Any:
11
+ if token == "True":
12
+ return True
13
+ if token == "False":
14
+ return False
15
+ return token
16
+
17
+ def _split_args(self, s: str) -> Dict[str, Any]:
18
+ # Split a function-like argument list into a dict, respecting quotes
19
+ args: Dict[str, Any] = {}
20
+ current = ""
21
+ parts = []
22
+ in_single = False
23
+ in_double = False
24
+ for ch in s:
25
+ if ch == "'" and not in_double:
26
+ in_single = not in_single
27
+ current += ch
28
+ continue
29
+ if ch == '"' and not in_single:
30
+ in_double = not in_double
31
+ current += ch
32
+ continue
33
+ if ch == "," and not in_single and not in_double:
34
+ parts.append(current.strip())
35
+ current = ""
36
+ else:
37
+ current += ch
38
+ if current.strip():
39
+ parts.append(current.strip())
40
+ for p in parts:
41
+ if not p:
42
+ continue
43
+ if "=" not in p:
44
+ continue
45
+ k, v = p.split("=", 1)
46
+ k = k.strip()
47
+ v = v.strip()
48
+ if (v.startswith("'") and v.endswith("'")) or (v.startswith('"') and v.endswith('"')):
49
+ v = v[1:-1]
50
+ else:
51
+ # try bool/int
52
+ if v in ("True", "False"):
53
+ v = self._parse_bool(v)
54
+ else:
55
+ try:
56
+ v = int(v)
57
+ except Exception:
58
+ pass
59
+ args[k] = v
60
+ return args
61
+
62
+ def _parse_structured_block(self, text: str, key: str) -> Dict[str, Any] | None:
63
+ # Extract key=ClassName(arg1=val1, ...) and parse args
64
+ marker = f"{key}="
65
+ idx = text.find(marker)
66
+ if idx == -1:
67
+ return None
68
+ rest = text[idx + len(marker) :]
69
+ # find first '('
70
+ pidx = rest.find("(")
71
+ if pidx == -1:
72
+ return None
73
+ rest = rest[pidx + 1 :]
74
+ # find matching ')'
75
+ depth = 1
76
+ collected = ""
77
+ for ch in rest:
78
+ if ch == "(":
79
+ depth += 1
80
+ elif ch == ")":
81
+ depth -= 1
82
+ if depth == 0:
83
+ break
84
+ collected += ch
85
+ if not collected:
86
+ return None
87
+ return self._split_args(collected)
88
+
89
+ def _parse_submission_eval_str(self, text: str) -> Dict[str, Any]:
90
+ # Parse space-separated key=value pairs
91
+ out: Dict[str, Any] = {}
92
+ for token in text.strip().split():
93
+ if "=" not in token:
94
+ continue
95
+ k, v = token.split("=", 1)
96
+ v = v.strip()
97
+ if v in ("True", "False"):
98
+ out[k] = True if v == "True" else False
99
+ else:
100
+ out[k] = v
101
+ return out
102
+
103
+ def load(self, report_path: str) -> Tuple[EvaluationReport, str]:
104
+ with open(report_path, "r", encoding="utf-8") as fobj:
105
+ raw = json.load(fobj)
106
+
107
+ # Normalize nested stringified fields if any
108
+ def normalize(obj):
109
+ if isinstance(obj, str):
110
+ s = obj.strip()
111
+ if (s.startswith("{") and s.endswith("}")) or (s.startswith("[") and s.endswith("]")):
112
+ try:
113
+ return json.loads(s)
114
+ except Exception:
115
+ return obj
116
+ return obj
117
+ if isinstance(obj, dict):
118
+ return {k: normalize(v) for k, v in obj.items()}
119
+ if isinstance(obj, list):
120
+ return [normalize(v) for v in obj]
121
+ return obj
122
+
123
+ normalized = normalize(raw)
124
+
125
+ # Special handling for stringified evaluation fields
126
+ inst_eval = normalized.get("installation_evaluation")
127
+ if isinstance(inst_eval, str):
128
+ normalized["installation_evaluation"] = {
129
+ "structured_evaluation": self._parse_structured_block(inst_eval, "structured_evaluation"),
130
+ }
131
+
132
+ readme_eval = normalized.get("readme_evaluation")
133
+ if isinstance(readme_eval, dict):
134
+ fixed: Dict[str, Any] = {}
135
+ for fname, val in readme_eval.items():
136
+ if isinstance(val, str):
137
+ fixed[fname] = {
138
+ "structured_evaluation": self._parse_structured_block(val, "structured_evaluation"),
139
+ }
140
+ else:
141
+ fixed[fname] = val
142
+ normalized["readme_evaluation"] = fixed
143
+
144
+ submit_eval = normalized.get("submission_requirements_evaluation")
145
+ if isinstance(submit_eval, str):
146
+ normalized["submission_requirements_evaluation"] = self._parse_submission_eval_str(submit_eval)
147
+
148
+ report = EvaluationReport(**normalized)
149
+ return report, report_path
150
+
151
+
@@ -0,0 +1,36 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Dict
4
+
5
+ from .models import StyleProfile
6
+
7
+
8
+ class StyleAnalyzer:
9
+ def analyze(self, files: Dict[str, str]) -> StyleProfile:
10
+ profile = StyleProfile()
11
+
12
+ # Infer heading style: prefer README
13
+ readme = None
14
+ for name in ("README.md", "README.rst"):
15
+ if name in files:
16
+ readme = files[name]
17
+ break
18
+ sample = readme or next(iter(files.values()), "")
19
+ if "\n# " in sample or sample.startswith("# "):
20
+ profile.heading_style = "#"
21
+ elif "\n## " in sample:
22
+ profile.heading_style = "#"
23
+ else:
24
+ profile.heading_style = "#"
25
+
26
+ # List style
27
+ if "\n- " in sample:
28
+ profile.list_style = "-"
29
+ elif "\n* " in sample:
30
+ profile.list_style = "*"
31
+
32
+ # Tone markers (heuristic): keep minimal
33
+ profile.tone_markers = ["concise", "neutral"]
34
+ return profile
35
+
36
+
@@ -0,0 +1,136 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import List
4
+ from .models import EvaluationReport, SuggestionItem
5
+
6
+
7
+ class SuggestionExtractor:
8
+ def extract(self, report: EvaluationReport) -> List[SuggestionItem]:
9
+ suggestions: List[SuggestionItem] = []
10
+
11
+ # README-related suggestions
12
+ if report.readme_evaluation:
13
+ for file_name, evaluation in report.readme_evaluation.items():
14
+ structured = evaluation.get("structured_evaluation") if isinstance(evaluation, dict) else None
15
+ if structured:
16
+ # Intro cleanup / overview enhancement beyond explicit suggestions
17
+ suggestions.append(SuggestionItem(
18
+ id=f"readme-intro-cleanup-{file_name}",
19
+ category="readme.intro_cleanup",
20
+ severity="should_fix",
21
+ source={"section": "readme", "field": "overview", "evidence": "Improve top-level overview for clarity and tone."},
22
+ target_files=[file_name],
23
+ action="replace_intro",
24
+ anchor_hint="Overview",
25
+ content_guidance="Rewrite the opening summary to be clear, neutral, and typo-free.",
26
+ ))
27
+ # Dependency clarity
28
+ dep_score = structured.get("dependency_score")
29
+ dep_sugg = structured.get("dependency_suggestions")
30
+ if dep_score in ("Poor", "Fair") or dep_sugg:
31
+ suggestions.append(SuggestionItem(
32
+ id=f"readme-dependencies-{file_name}",
33
+ category="readme.dependencies",
34
+ severity="should_fix",
35
+ source={"section": "readme", "field": "dependencies", "evidence": str(dep_sugg or dep_score)},
36
+ target_files=[file_name],
37
+ action="add_dependencies_section",
38
+ anchor_hint="Dependencies",
39
+ content_guidance=str(dep_sugg or ""),
40
+ ))
41
+
42
+ # Hardware/Software specs
43
+ hw_score = structured.get("hardware_and_software_spec_score")
44
+ hw_sugg = structured.get("hardware_and_software_spec_suggestions")
45
+ if hw_score in ("Poor", "Fair") or hw_sugg:
46
+ suggestions.append(SuggestionItem(
47
+ id=f"readme-sysreq-{file_name}",
48
+ category="readme.system_requirements",
49
+ severity="should_fix",
50
+ source={"section": "readme", "field": "hardware_and_software", "evidence": str(hw_sugg or hw_score)},
51
+ target_files=[file_name],
52
+ action="add_system_requirements_section",
53
+ anchor_hint="System Requirements",
54
+ content_guidance=str(hw_sugg or ""),
55
+ ))
56
+
57
+ # License mention
58
+ lic_sugg = structured.get("license_suggestions")
59
+ lic_score = structured.get("license_score")
60
+ if lic_sugg and lic_score:
61
+ suggestions.append(SuggestionItem(
62
+ id=f"readme-license-{file_name}",
63
+ category="readme.license",
64
+ severity="nice_to_have",
65
+ source={"section": "readme", "field": "license", "evidence": str(lic_sugg)},
66
+ target_files=[file_name],
67
+ action="mention_license_section",
68
+ anchor_hint="License",
69
+ content_guidance=str(lic_sugg),
70
+ ))
71
+
72
+ # Readability structuring
73
+ read_sugg = structured.get("readability_suggestions")
74
+ if read_sugg:
75
+ suggestions.append(SuggestionItem(
76
+ id=f"readme-structure-{file_name}",
77
+ category="readme.readability",
78
+ severity="nice_to_have",
79
+ source={"section": "readability", "field": "readability_suggestions", "evidence": str(read_sugg)},
80
+ target_files=[file_name],
81
+ action="normalize_headings_structure",
82
+ anchor_hint="Installation",
83
+ content_guidance=str(read_sugg),
84
+ ))
85
+ # If suggestions mention Usage, add a usage section
86
+ if isinstance(read_sugg, str) and "Usage" in read_sugg:
87
+ suggestions.append(SuggestionItem(
88
+ id=f"readme-usage-{file_name}",
89
+ category="readme.usage",
90
+ severity="nice_to_have",
91
+ source={"section": "readability", "field": "usage", "evidence": "Add Usage section as suggested."},
92
+ target_files=[file_name],
93
+ action="add_usage_section",
94
+ anchor_hint="Usage",
95
+ content_guidance="Provide a brief usage example and key commands.",
96
+ ))
97
+
98
+ # Installation-related suggestions
99
+ if report.installation_evaluation:
100
+ structured = None
101
+ if isinstance(report.installation_evaluation, dict):
102
+ structured = report.installation_evaluation.get("structured_evaluation")
103
+ if structured:
104
+ dep_sugg = structured.get("dependency_suggestions")
105
+ if dep_sugg:
106
+ for target in report.installation_files or []:
107
+ suggestions.append(SuggestionItem(
108
+ id=f"install-dep-clarify-{target}",
109
+ category="installation.dependencies",
110
+ severity="should_fix",
111
+ source={"section": "installation", "field": "dependency_suggestions", "evidence": str(dep_sugg)},
112
+ target_files=[target],
113
+ action="clarify_mandatory_vs_optional",
114
+ anchor_hint="Dependencies",
115
+ content_guidance=str(dep_sugg),
116
+ ))
117
+ hw = structured.get("hardware_requirements")
118
+ if hw is False:
119
+ for target in report.installation_files or []:
120
+ suggestions.append(SuggestionItem(
121
+ id=f"install-hw-req-{target}",
122
+ category="installation.hardware",
123
+ severity="should_fix",
124
+ source={"section": "installation", "field": "hardware_requirements", "evidence": "not specified"},
125
+ target_files=[target],
126
+ action="add_hardware_requirements",
127
+ anchor_hint="Hardware Requirements",
128
+ content_guidance="Add concise RAM/CPU recommendation as per report guidance.",
129
+ ))
130
+
131
+ # Submission requirements could drive expected output/dataset sections; use only if in files list
132
+ # Keep minimal to avoid speculative content
133
+
134
+ return suggestions
135
+
136
+
@@ -0,0 +1,104 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import re
5
+ from difflib import SequenceMatcher
6
+ from typing import Dict, Any, List, Tuple
7
+
8
+
9
+ def _lev(a: str, b: str) -> float:
10
+ return 1.0 - SequenceMatcher(None, a, b).ratio()
11
+
12
+
13
+ def _count_markdown_issues(text: str) -> int:
14
+ issues = 0
15
+ # naive checks
16
+ issues += text.count("[![") - text.count("](") # unbalanced badge syntax
17
+ issues += text.count("[ ") # bad link spacing
18
+ issues += len(re.findall(r"^#[^#\s]", text, flags=re.M)) # malformed header
19
+ return max(0, issues)
20
+
21
+
22
+ def evaluate_fixes(baseline: str, corrupted: str, revised: str, injection_manifest: Dict[str, Any]) -> Dict[str, Any]:
23
+ per_error: List[Dict[str, Any]] = []
24
+ per_cat: Dict[str, Dict[str, int]] = {}
25
+
26
+ def mark(cat: str, key: str):
27
+ per_cat.setdefault(cat, {"total": 0, "fixed_to_baseline": 0, "fixed_to_valid": 0, "unchanged": 0, "worsened": 0})
28
+ per_cat[cat][key] += 1
29
+
30
+ for e in injection_manifest.get("errors", []):
31
+ cat = e.get("category", "unknown")
32
+ per_cat.setdefault(cat, {"total": 0, "fixed_to_baseline": 0, "fixed_to_valid": 0, "unchanged": 0, "worsened": 0})
33
+ per_cat[cat]["total"] += 1
34
+ orig = e.get("original_snippet", "")
35
+ mut = e.get("mutated_snippet", "")
36
+
37
+ # Determine the neighborhood and after-fix snippet
38
+ after = None
39
+ if mut and mut in corrupted:
40
+ # try to find replacement around mutated snippet in revised
41
+ idx = corrupted.find(mut)
42
+ window = corrupted[max(0, idx-200): idx+200]
43
+ # pick a few words from orig as hint
44
+ hint = orig[:50]
45
+ if hint and hint in revised:
46
+ after = hint
47
+ if after is None:
48
+ # fallback: search original snippet directly
49
+ after = orig if orig in revised else None
50
+
51
+ status = "unchanged"
52
+ notes = ""
53
+ if cat == "typo":
54
+ if orig and orig in revised:
55
+ status = "fixed_to_baseline"
56
+ elif mut and mut in revised:
57
+ status = "unchanged"
58
+ else:
59
+ status = "fixed_to_valid"
60
+ elif cat == "link":
61
+ # simple: link markdown well-formed
62
+ wellformed = re.search(r"\[[^\]]+\]\([^\s)]+\)", revised) is not None
63
+ status = "fixed_to_valid" if wellformed else "unchanged"
64
+ elif cat == "duplicate":
65
+ dup_before = corrupted.count(mut)
66
+ dup_after = revised.count(mut)
67
+ status = "fixed_to_valid" if dup_after < dup_before else "unchanged"
68
+ elif cat == "markdown_structure":
69
+ issues_before = _count_markdown_issues(corrupted)
70
+ issues_after = _count_markdown_issues(revised)
71
+ status = "fixed_to_valid" if issues_after < issues_before else "unchanged"
72
+ elif cat in ("bio_term", "function"):
73
+ if orig and orig in revised:
74
+ status = "fixed_to_baseline"
75
+ elif mut and mut in revised:
76
+ status = "unchanged"
77
+ else:
78
+ status = "fixed_to_valid"
79
+ else:
80
+ status = "unchanged"
81
+
82
+ mark(cat, status)
83
+ per_error.append({
84
+ "id": e.get("id"),
85
+ "category": cat,
86
+ "status": status,
87
+ "before": mut,
88
+ "after_contains_original": bool(orig and orig in revised),
89
+ "notes": notes,
90
+ })
91
+
92
+ # global metrics
93
+ issues_before = _count_markdown_issues(corrupted)
94
+ issues_after = _count_markdown_issues(revised)
95
+ global_metrics = {
96
+ "markdown_validity_delta": issues_before - issues_after,
97
+ }
98
+ return {
99
+ "per_error": per_error,
100
+ "per_category": per_cat,
101
+ "global": global_metrics,
102
+ }
103
+
104
+
@@ -1,13 +1,16 @@
1
1
  import os
2
2
  from pathlib import Path
3
3
 
4
+ from bioguider.agents.evaluation_userguide_task import EvaluationUserGuideTask
4
5
  from bioguider.agents.prompt_utils import CollectionGoalItemEnum
6
+ from bioguider.database.code_structure_db import CodeStructureDb
5
7
  from bioguider.utils.constants import ProjectMetadata
6
8
  from bioguider.utils.gitignore_checker import GitignoreChecker
7
9
 
8
10
  from ..agents.identification_task import IdentificationTask
9
11
  from ..rag.rag import RAG
10
12
  from ..utils.file_utils import parse_repo_url
13
+ from ..utils.code_structure_builder import CodeStructureBuilder
11
14
  from ..database.summarized_file_db import SummarizedFilesDb
12
15
  from ..agents.evaluation_readme_task import EvaluationREADMETask
13
16
  from ..agents.evaluation_installation_task import EvaluationInstallationTask
@@ -30,6 +33,13 @@ class EvaluationManager:
30
33
 
31
34
  author, repo_name = parse_repo_url(repo_url)
32
35
  self.summary_file_db = SummarizedFilesDb(author, repo_name)
36
+ self.code_structure_db = CodeStructureDb(author, repo_name)
37
+ code_structure_builder = CodeStructureBuilder(
38
+ repo_path=repo_url,
39
+ gitignore_path=Path(repo_url, ".gitignore"),
40
+ code_structure_db=self.code_structure_db
41
+ )
42
+ code_structure_builder.build_code_structure()
33
43
 
34
44
  def identify_project(self) -> ProjectMetadata:
35
45
  repo_path = self.rag.repo_dir
@@ -119,6 +129,20 @@ class EvaluationManager:
119
129
  evaluation, files = evaluation_task.evaluate()
120
130
 
121
131
  return evaluation, files
132
+
133
+ def evaluate_userguide(self):
134
+ evaluation_task = EvaluationUserGuideTask(
135
+ llm=self.llm,
136
+ repo_path=self.rag.repo_dir,
137
+ gitignore_path=Path(self.rag.repo_dir, ".gitignore"),
138
+ meta_data=self.project_metadata,
139
+ step_callback=self.step_callback,
140
+ summarized_files_db=self.summary_file_db,
141
+ code_structure_db=self.code_structure_db,
142
+ )
143
+ evaluation, files = evaluation_task.evaluate()
144
+ return evaluation, files
145
+
122
146
 
123
147
 
124
148
 
@@ -0,0 +1,160 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ from pathlib import Path
5
+ from typing import Tuple, Dict
6
+
7
+ from bioguider.generation import (
8
+ EvaluationReportLoader,
9
+ SuggestionExtractor,
10
+ RepoReader,
11
+ StyleAnalyzer,
12
+ ChangePlanner,
13
+ DocumentRenderer,
14
+ OutputManager,
15
+ LLMContentGenerator,
16
+ LLMCleaner,
17
+ )
18
+ from bioguider.generation.models import GenerationManifest, GenerationReport
19
+ from bioguider.utils.file_utils import parse_repo_url
20
+
21
+
22
+ class DocumentationGenerationManager:
23
+ def __init__(self, llm, step_callback):
24
+ self.llm = llm
25
+ self.step_callback = step_callback
26
+ self.repo_url_or_path: str | None = None
27
+
28
+ self.loader = EvaluationReportLoader()
29
+ self.extractor = SuggestionExtractor()
30
+ self.style_analyzer = StyleAnalyzer()
31
+ self.planner = ChangePlanner()
32
+ self.renderer = DocumentRenderer()
33
+ self.output = OutputManager()
34
+ self.llm_gen = LLMContentGenerator(llm)
35
+ self.llm_cleaner = LLMCleaner(llm)
36
+
37
+ def print_step(self, step_name: str | None = None, step_output: str | None = None):
38
+ if self.step_callback is None:
39
+ return
40
+ self.step_callback(step_name=step_name, step_output=step_output)
41
+
42
+ def prepare_repo(self, repo_url_or_path: str):
43
+ self.repo_url_or_path = repo_url_or_path
44
+
45
+ def run(self, report_path: str, repo_path: str | None = None) -> str:
46
+ repo_path = repo_path or self.repo_url_or_path or ""
47
+ self.print_step(step_name="LoadReport", step_output=f"report_path={report_path}")
48
+ report, report_abs = self.loader.load(report_path)
49
+
50
+ self.print_step(step_name="ReadRepoFiles", step_output=f"repo_path={repo_path}")
51
+ reader = RepoReader(repo_path)
52
+ # Prefer report-listed files if available
53
+ target_files = []
54
+ if report.readme_files:
55
+ target_files.extend(report.readme_files)
56
+ if report.installation_files:
57
+ target_files.extend(report.installation_files)
58
+ target_files = list(dict.fromkeys(target_files)) # de-dup
59
+ files, missing = reader.read_files(target_files) if target_files else reader.read_default_targets()
60
+
61
+ self.print_step(step_name="AnalyzeStyle", step_output=f"files={[p for p in files.keys()]}")
62
+ style = self.style_analyzer.analyze(files)
63
+
64
+ self.print_step(step_name="ExtractSuggestions")
65
+ suggestions = self.extractor.extract(report)
66
+ self.print_step(step_name="Suggestions", step_output=f"count={len(suggestions)} ids={[s.id for s in suggestions]}")
67
+
68
+ self.print_step(step_name="PlanChanges")
69
+ plan = self.planner.build_plan(repo_path=repo_path, style=style, suggestions=suggestions, available_files=files)
70
+ self.print_step(step_name="PlannedEdits", step_output=f"count={len(plan.planned_edits)} files={list(set(e.file_path for e in plan.planned_edits))}")
71
+
72
+ self.print_step(step_name="RenderDocuments")
73
+ # Apply edits cumulatively per file to ensure multiple suggestions are realized
74
+ revised: Dict[str, str] = {}
75
+ diff_stats: Dict[str, dict] = {}
76
+ edits_by_file: Dict[str, list] = {}
77
+ for e in plan.planned_edits:
78
+ edits_by_file.setdefault(e.file_path, []).append(e)
79
+ for fpath, edits in edits_by_file.items():
80
+ content = files.get(fpath, "")
81
+ total_stats = {"added_lines": 0}
82
+ for e in edits:
83
+ # Generate LLM content for section if template is generic
84
+ context = files.get(fpath, "")
85
+ gen_section, gen_usage = self.llm_gen.generate_section(
86
+ suggestion=next((s for s in suggestions if s.id == e.suggestion_id), None) if e.suggestion_id else None,
87
+ style=plan.style_profile,
88
+ context=context,
89
+ ) if e.suggestion_id else ""
90
+ if isinstance(gen_section, str) and gen_section:
91
+ self.print_step(step_name="LLMSection", step_output=f"file={fpath} suggestion={e.suggestion_id} tokens={gen_usage.get('total_tokens', 0)}\n{gen_section}")
92
+ # Ensure header present
93
+ if gen_section.lstrip().startswith("#"):
94
+ e.content_template = gen_section
95
+ else:
96
+ title = e.anchor.get('value', '').strip() or ''
97
+ e.content_template = f"## {title}\n\n{gen_section}" if title else gen_section
98
+ content, stats = self.renderer.apply_edit(content, e)
99
+ total_stats["added_lines"] = total_stats.get("added_lines", 0) + stats.get("added_lines", 0)
100
+ revised[fpath] = content
101
+ diff_stats[fpath] = total_stats
102
+ self.print_step(step_name="RenderedFile", step_output=f"file={fpath} added_lines={total_stats['added_lines']}")
103
+
104
+ # Removed cleaner: duplication and fixes handled in prompts and renderer
105
+
106
+ # Prefer local repo folder name for outputs; fallback to author_repo from URL
107
+ out_repo_key = None
108
+ if repo_path and os.path.isdir(repo_path):
109
+ out_repo_key = os.path.basename(os.path.normpath(repo_path))
110
+ elif report.repo_url:
111
+ try:
112
+ author, name = parse_repo_url(report.repo_url)
113
+ out_repo_key = f"{author}_{name}"
114
+ except Exception:
115
+ out_repo_key = report.repo_url
116
+ else:
117
+ out_repo_key = self.repo_url_or_path or "repo"
118
+
119
+ self.print_step(step_name="WriteOutputs", step_output=f"repo_key={out_repo_key}")
120
+ out_dir = self.output.prepare_output_dir(out_repo_key)
121
+ artifacts = self.output.write_files(out_dir, revised, diff_stats_by_file=diff_stats)
122
+
123
+ manifest = GenerationManifest(
124
+ repo_url=report.repo_url,
125
+ report_path=report_abs,
126
+ output_dir=out_dir,
127
+ suggestions=suggestions,
128
+ planned_edits=plan.planned_edits,
129
+ artifacts=artifacts,
130
+ skipped=missing,
131
+ )
132
+ self.output.write_manifest(out_dir, manifest)
133
+ # Write human-readable generation report
134
+ gen_report_path = self._write_generation_report(out_dir, report.repo_url or str(self.repo_url_or_path or ""), plan, diff_stats, suggestions)
135
+ self.print_step(step_name="Done", step_output=f"output_dir={out_dir}")
136
+ return out_dir
137
+
138
+ def _write_generation_report(self, out_dir: str, repo_url: str, plan, diff_stats: Dict[str, dict], suggestions):
139
+ # Build a simple markdown report
140
+ lines: list[str] = []
141
+ lines.append(f"# Documentation Generation Report\n")
142
+ lines.append(f"Repo: {repo_url}\n")
143
+ lines.append(f"Output: {out_dir}\n")
144
+ lines.append("\n## Summary of Changes\n")
145
+ for e in plan.planned_edits:
146
+ sug = next((s for s in suggestions if s.id == e.suggestion_id), None)
147
+ why = sug.source.get("evidence", "") if sug and sug.source else ""
148
+ lines.append(f"- File: `{e.file_path}` | Action: {e.edit_type} | Section: {e.anchor.get('value','')} | Added lines: {diff_stats.get(e.file_path,{}).get('added_lines',0)}")
149
+ if why:
150
+ lines.append(f" - Why: {why}")
151
+ lines.append("\n## Planned Edits\n")
152
+ for e in plan.planned_edits:
153
+ lines.append(f"- `{e.file_path}` -> {e.edit_type} -> {e.anchor.get('value','')}")
154
+ report_md = "\n".join(lines)
155
+ dest = os.path.join(out_dir, "GENERATION_REPORT.md")
156
+ with open(dest, "w", encoding="utf-8") as fobj:
157
+ fobj.write(report_md)
158
+ return dest
159
+
160
+