bioguider 0.2.32__py3-none-any.whl → 0.2.34__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of bioguider might be problematic. Click here for more details.
- bioguider/agents/agent_utils.py +37 -13
- bioguider/generation/llm_content_generator.py +762 -32
- bioguider/generation/llm_injector.py +60 -7
- bioguider/generation/suggestion_extractor.py +26 -26
- bioguider/managers/generation_manager.py +168 -89
- {bioguider-0.2.32.dist-info → bioguider-0.2.34.dist-info}/METADATA +1 -1
- {bioguider-0.2.32.dist-info → bioguider-0.2.34.dist-info}/RECORD +9 -9
- {bioguider-0.2.32.dist-info → bioguider-0.2.34.dist-info}/LICENSE +0 -0
- {bioguider-0.2.32.dist-info → bioguider-0.2.34.dist-info}/WHEEL +0 -0
|
@@ -115,13 +115,9 @@ class LLMErrorInjector:
|
|
|
115
115
|
max_words=max_words,
|
|
116
116
|
)
|
|
117
117
|
output, _ = conv.generate(system_prompt=system_prompt, instruction_prompt="Return the JSON now.")
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
# try to locate JSON block
|
|
122
|
-
start = output.find("{")
|
|
123
|
-
end = output.rfind("}")
|
|
124
|
-
data = json.loads(output[start:end+1]) if start != -1 and end != -1 else {"corrupted_markdown": readme_text, "errors": []}
|
|
118
|
+
|
|
119
|
+
# Enhanced JSON parsing with better error handling
|
|
120
|
+
data = self._parse_json_output(output, readme_text)
|
|
125
121
|
corrupted = data.get("corrupted_markdown", readme_text)
|
|
126
122
|
# Validate output stays within original context; fallback to deterministic if invalid
|
|
127
123
|
if not self._validate_corrupted(readme_text, corrupted, preserve_keywords):
|
|
@@ -133,6 +129,63 @@ class LLMErrorInjector:
|
|
|
133
129
|
}
|
|
134
130
|
return corrupted, manifest
|
|
135
131
|
|
|
132
|
+
def _parse_json_output(self, output: str, fallback_text: str) -> Dict[str, Any]:
|
|
133
|
+
"""Enhanced JSON parsing with multiple fallback strategies."""
|
|
134
|
+
import re
|
|
135
|
+
|
|
136
|
+
# Strategy 1: Direct JSON parsing
|
|
137
|
+
try:
|
|
138
|
+
return json.loads(output)
|
|
139
|
+
except json.JSONDecodeError:
|
|
140
|
+
pass
|
|
141
|
+
|
|
142
|
+
# Strategy 2: Extract JSON block between ```json and ```
|
|
143
|
+
json_pattern = r'```(?:json)?\s*(\{.*?\})\s*```'
|
|
144
|
+
match = re.search(json_pattern, output, re.DOTALL)
|
|
145
|
+
if match:
|
|
146
|
+
try:
|
|
147
|
+
return json.loads(match.group(1))
|
|
148
|
+
except json.JSONDecodeError:
|
|
149
|
+
pass
|
|
150
|
+
|
|
151
|
+
# Strategy 3: Find first complete JSON object
|
|
152
|
+
start = output.find("{")
|
|
153
|
+
if start != -1:
|
|
154
|
+
# Find matching closing brace
|
|
155
|
+
brace_count = 0
|
|
156
|
+
end = start
|
|
157
|
+
for i, char in enumerate(output[start:], start):
|
|
158
|
+
if char == "{":
|
|
159
|
+
brace_count += 1
|
|
160
|
+
elif char == "}":
|
|
161
|
+
brace_count -= 1
|
|
162
|
+
if brace_count == 0:
|
|
163
|
+
end = i
|
|
164
|
+
break
|
|
165
|
+
|
|
166
|
+
if brace_count == 0: # Found complete JSON object
|
|
167
|
+
try:
|
|
168
|
+
json_str = output[start:end+1]
|
|
169
|
+
return json.loads(json_str)
|
|
170
|
+
except json.JSONDecodeError:
|
|
171
|
+
pass
|
|
172
|
+
|
|
173
|
+
# Strategy 4: Try to fix common JSON issues
|
|
174
|
+
try:
|
|
175
|
+
# Remove markdown code fences
|
|
176
|
+
cleaned = re.sub(r'```(?:json)?\s*', '', output)
|
|
177
|
+
cleaned = re.sub(r'```\s*$', '', cleaned)
|
|
178
|
+
# Remove leading/trailing whitespace
|
|
179
|
+
cleaned = cleaned.strip()
|
|
180
|
+
# Try parsing again
|
|
181
|
+
return json.loads(cleaned)
|
|
182
|
+
except json.JSONDecodeError:
|
|
183
|
+
pass
|
|
184
|
+
|
|
185
|
+
# Strategy 5: Fallback to deterministic injection
|
|
186
|
+
print(f"Warning: Failed to parse LLM JSON output, using fallback. Output preview: {output[:200]}...")
|
|
187
|
+
return {"corrupted_markdown": fallback_text, "errors": []}
|
|
188
|
+
|
|
136
189
|
def _extract_preserve_keywords(self, text: str) -> List[str]:
|
|
137
190
|
# Extract capitalized terms, domain hyphenations, and hostnames in links
|
|
138
191
|
kws: Set[str] = set()
|
|
@@ -21,7 +21,7 @@ class SuggestionExtractor:
|
|
|
21
21
|
id=f"readme-dependencies-{file_name}",
|
|
22
22
|
category="readme.dependencies",
|
|
23
23
|
severity="should_fix",
|
|
24
|
-
source={"section": "readme", "field": "dependency_suggestions", "
|
|
24
|
+
source={"section": "readme", "field": "dependency_suggestions", "score": dep_score},
|
|
25
25
|
target_files=[file_name],
|
|
26
26
|
action="add_dependencies_section",
|
|
27
27
|
anchor_hint="Dependencies",
|
|
@@ -36,7 +36,7 @@ class SuggestionExtractor:
|
|
|
36
36
|
id=f"readme-hardware-{file_name}",
|
|
37
37
|
category="readme.hardware",
|
|
38
38
|
severity="should_fix",
|
|
39
|
-
source={"section": "readme", "field": "hardware_and_software_spec_suggestions", "
|
|
39
|
+
source={"section": "readme", "field": "hardware_and_software_spec_suggestions", "score": hw_sw_score},
|
|
40
40
|
target_files=[file_name],
|
|
41
41
|
action="add_system_requirements_section",
|
|
42
42
|
anchor_hint="System Requirements",
|
|
@@ -51,7 +51,7 @@ class SuggestionExtractor:
|
|
|
51
51
|
id=f"readme-purpose-{file_name}",
|
|
52
52
|
category="readme.purpose",
|
|
53
53
|
severity="should_fix",
|
|
54
|
-
source={"section": "readme", "field": "project_purpose_suggestions", "
|
|
54
|
+
source={"section": "readme", "field": "project_purpose_suggestions", "score": purpose_score},
|
|
55
55
|
target_files=[file_name],
|
|
56
56
|
action="full_replace",
|
|
57
57
|
anchor_hint="Overview",
|
|
@@ -66,7 +66,7 @@ class SuggestionExtractor:
|
|
|
66
66
|
id=f"readme-readability-{file_name}",
|
|
67
67
|
category="readme.readability",
|
|
68
68
|
severity="should_fix",
|
|
69
|
-
source={"section": "readme", "field": "readability_suggestions", "
|
|
69
|
+
source={"section": "readme", "field": "readability_suggestions", "score": readability_score},
|
|
70
70
|
target_files=[file_name],
|
|
71
71
|
action="full_replace",
|
|
72
72
|
anchor_hint="Introduction",
|
|
@@ -78,7 +78,7 @@ class SuggestionExtractor:
|
|
|
78
78
|
id=f"readme-intro-cleanup-{file_name}",
|
|
79
79
|
category="readme.intro_cleanup",
|
|
80
80
|
severity="should_fix",
|
|
81
|
-
source={"section": "readme", "field": "overview", "
|
|
81
|
+
source={"section": "readme", "field": "overview", "score": "Fair"},
|
|
82
82
|
target_files=[file_name],
|
|
83
83
|
action="replace_intro",
|
|
84
84
|
anchor_hint="Overview",
|
|
@@ -92,7 +92,7 @@ class SuggestionExtractor:
|
|
|
92
92
|
id=f"readme-dependencies-clarify-{file_name}",
|
|
93
93
|
category="readme.dependencies",
|
|
94
94
|
severity="should_fix",
|
|
95
|
-
source={"section": "readme", "field": "dependencies", "
|
|
95
|
+
source={"section": "readme", "field": "dependencies", "score": dep_score},
|
|
96
96
|
target_files=[file_name],
|
|
97
97
|
action="add_dependencies_section",
|
|
98
98
|
anchor_hint="Dependencies",
|
|
@@ -103,7 +103,7 @@ class SuggestionExtractor:
|
|
|
103
103
|
id=f"readme-dependencies-fallback-{file_name}",
|
|
104
104
|
category="readme.dependencies",
|
|
105
105
|
severity="should_fix",
|
|
106
|
-
source={"section": "readme", "field": "dependencies", "
|
|
106
|
+
source={"section": "readme", "field": "dependencies", "score": dep_score},
|
|
107
107
|
target_files=[file_name],
|
|
108
108
|
action="add_dependencies_section",
|
|
109
109
|
anchor_hint="Dependencies",
|
|
@@ -118,7 +118,7 @@ class SuggestionExtractor:
|
|
|
118
118
|
id=f"readme-sysreq-clarify-{file_name}",
|
|
119
119
|
category="readme.system_requirements",
|
|
120
120
|
severity="should_fix",
|
|
121
|
-
source={"section": "readme", "field": "hardware_and_software", "
|
|
121
|
+
source={"section": "readme", "field": "hardware_and_software", "score": hw_score},
|
|
122
122
|
target_files=[file_name],
|
|
123
123
|
action="add_system_requirements_section",
|
|
124
124
|
anchor_hint="System Requirements",
|
|
@@ -129,7 +129,7 @@ class SuggestionExtractor:
|
|
|
129
129
|
id=f"readme-sysreq-fallback-{file_name}",
|
|
130
130
|
category="readme.system_requirements",
|
|
131
131
|
severity="should_fix",
|
|
132
|
-
source={"section": "readme", "field": "hardware_and_software", "
|
|
132
|
+
source={"section": "readme", "field": "hardware_and_software", "score": hw_score},
|
|
133
133
|
target_files=[file_name],
|
|
134
134
|
action="add_system_requirements_section",
|
|
135
135
|
anchor_hint="System Requirements",
|
|
@@ -144,7 +144,7 @@ class SuggestionExtractor:
|
|
|
144
144
|
id=f"readme-license-{file_name}",
|
|
145
145
|
category="readme.license",
|
|
146
146
|
severity="nice_to_have",
|
|
147
|
-
source={"section": "readme", "field": "license"
|
|
147
|
+
source={"section": "readme", "field": "license"},
|
|
148
148
|
target_files=[file_name],
|
|
149
149
|
action="mention_license_section",
|
|
150
150
|
anchor_hint="License",
|
|
@@ -159,7 +159,7 @@ class SuggestionExtractor:
|
|
|
159
159
|
id=f"readme-structure-clarify-{file_name}",
|
|
160
160
|
category="readme.readability",
|
|
161
161
|
severity="should_fix",
|
|
162
|
-
source={"section": "readability", "field": "readability_suggestions", "
|
|
162
|
+
source={"section": "readability", "field": "readability_suggestions", "score": read_score},
|
|
163
163
|
target_files=[file_name],
|
|
164
164
|
action="normalize_headings_structure",
|
|
165
165
|
anchor_hint="Installation",
|
|
@@ -170,7 +170,7 @@ class SuggestionExtractor:
|
|
|
170
170
|
id=f"readme-structure-fallback-{file_name}",
|
|
171
171
|
category="readme.readability",
|
|
172
172
|
severity="should_fix",
|
|
173
|
-
source={"section": "readability", "field": "readability_score", "
|
|
173
|
+
source={"section": "readability", "field": "readability_score", "score": read_score},
|
|
174
174
|
target_files=[file_name],
|
|
175
175
|
action="normalize_headings_structure",
|
|
176
176
|
anchor_hint="Installation",
|
|
@@ -182,7 +182,7 @@ class SuggestionExtractor:
|
|
|
182
182
|
id=f"readme-usage-{file_name}",
|
|
183
183
|
category="readme.usage",
|
|
184
184
|
severity="nice_to_have",
|
|
185
|
-
source={"section": "readability", "field": "usage"
|
|
185
|
+
source={"section": "readability", "field": "usage"},
|
|
186
186
|
target_files=[file_name],
|
|
187
187
|
action="add_usage_section",
|
|
188
188
|
anchor_hint="Usage",
|
|
@@ -208,7 +208,7 @@ class SuggestionExtractor:
|
|
|
208
208
|
id=f"install-full-replace-{target}",
|
|
209
209
|
category="installation.full_replace",
|
|
210
210
|
severity="should_fix",
|
|
211
|
-
source={"section": "installation", "field": "overall"
|
|
211
|
+
source={"section": "installation", "field": "overall"},
|
|
212
212
|
target_files=[target],
|
|
213
213
|
action="full_replace",
|
|
214
214
|
anchor_hint=None,
|
|
@@ -235,7 +235,7 @@ class SuggestionExtractor:
|
|
|
235
235
|
id=f"userguide-readability-{file_name}-{i}",
|
|
236
236
|
category="userguide.readability",
|
|
237
237
|
severity="should_fix",
|
|
238
|
-
source={"section": "userguide", "field": "readability_suggestions", "
|
|
238
|
+
source={"section": "userguide", "field": "readability_suggestions", "score": readability_score},
|
|
239
239
|
target_files=[file_name],
|
|
240
240
|
action="full_replace",
|
|
241
241
|
anchor_hint=f"Readability-{i+1}",
|
|
@@ -252,7 +252,7 @@ class SuggestionExtractor:
|
|
|
252
252
|
id=f"userguide-context-{file_name}-{i}",
|
|
253
253
|
category="userguide.context",
|
|
254
254
|
severity="should_fix",
|
|
255
|
-
source={"section": "userguide", "field": "context_and_purpose_suggestions", "
|
|
255
|
+
source={"section": "userguide", "field": "context_and_purpose_suggestions", "score": context_score},
|
|
256
256
|
target_files=[file_name],
|
|
257
257
|
action="full_replace",
|
|
258
258
|
anchor_hint=f"Context-{i+1}",
|
|
@@ -269,7 +269,7 @@ class SuggestionExtractor:
|
|
|
269
269
|
id=f"userguide-error-{file_name}-{i}",
|
|
270
270
|
category="userguide.error_handling",
|
|
271
271
|
severity="should_fix",
|
|
272
|
-
source={"section": "userguide", "field": "error_handling_suggestions", "
|
|
272
|
+
source={"section": "userguide", "field": "error_handling_suggestions", "score": error_score},
|
|
273
273
|
target_files=[file_name],
|
|
274
274
|
action="full_replace",
|
|
275
275
|
anchor_hint=f"Error-Handling-{i+1}",
|
|
@@ -284,7 +284,7 @@ class SuggestionExtractor:
|
|
|
284
284
|
id=f"userguide-consistency-{file_name}",
|
|
285
285
|
category="userguide.consistency",
|
|
286
286
|
severity="should_fix",
|
|
287
|
-
source={"section": "userguide", "field": "consistency", "
|
|
287
|
+
source={"section": "userguide", "field": "consistency", "score": score},
|
|
288
288
|
target_files=[file_name],
|
|
289
289
|
action="full_replace",
|
|
290
290
|
anchor_hint="Examples",
|
|
@@ -309,7 +309,7 @@ class SuggestionExtractor:
|
|
|
309
309
|
id=f"tutorial-readability-{file_name}-{i}",
|
|
310
310
|
category="tutorial.readability",
|
|
311
311
|
severity="should_fix",
|
|
312
|
-
source={"section": "tutorial", "field": "readability_suggestions", "
|
|
312
|
+
source={"section": "tutorial", "field": "readability_suggestions", "score": readability_score},
|
|
313
313
|
target_files=[file_name],
|
|
314
314
|
action="full_replace",
|
|
315
315
|
anchor_hint="Introduction",
|
|
@@ -326,7 +326,7 @@ class SuggestionExtractor:
|
|
|
326
326
|
id=f"tutorial-setup-{file_name}-{i}",
|
|
327
327
|
category="tutorial.setup",
|
|
328
328
|
severity="should_fix",
|
|
329
|
-
source={"section": "tutorial", "field": "setup_and_dependencies_suggestions", "
|
|
329
|
+
source={"section": "tutorial", "field": "setup_and_dependencies_suggestions", "score": setup_score},
|
|
330
330
|
target_files=[file_name],
|
|
331
331
|
action="full_replace",
|
|
332
332
|
anchor_hint="Setup",
|
|
@@ -343,7 +343,7 @@ class SuggestionExtractor:
|
|
|
343
343
|
id=f"tutorial-reproducibility-{file_name}-{i}",
|
|
344
344
|
category="tutorial.reproducibility",
|
|
345
345
|
severity="should_fix",
|
|
346
|
-
source={"section": "tutorial", "field": "reproducibility_suggestions", "
|
|
346
|
+
source={"section": "tutorial", "field": "reproducibility_suggestions", "score": reproducibility_score},
|
|
347
347
|
target_files=[file_name],
|
|
348
348
|
action="full_replace",
|
|
349
349
|
anchor_hint="Setup",
|
|
@@ -360,7 +360,7 @@ class SuggestionExtractor:
|
|
|
360
360
|
id=f"tutorial-structure-{file_name}-{i}",
|
|
361
361
|
category="tutorial.structure",
|
|
362
362
|
severity="should_fix",
|
|
363
|
-
source={"section": "tutorial", "field": "structure_and_navigation_suggestions", "
|
|
363
|
+
source={"section": "tutorial", "field": "structure_and_navigation_suggestions", "score": structure_score},
|
|
364
364
|
target_files=[file_name],
|
|
365
365
|
action="full_replace",
|
|
366
366
|
anchor_hint="Introduction",
|
|
@@ -377,7 +377,7 @@ class SuggestionExtractor:
|
|
|
377
377
|
id=f"tutorial-code-{file_name}-{i}",
|
|
378
378
|
category="tutorial.code_quality",
|
|
379
379
|
severity="should_fix",
|
|
380
|
-
source={"section": "tutorial", "field": "executable_code_quality_suggestions", "
|
|
380
|
+
source={"section": "tutorial", "field": "executable_code_quality_suggestions", "score": code_score},
|
|
381
381
|
target_files=[file_name],
|
|
382
382
|
action="full_replace",
|
|
383
383
|
anchor_hint="Code Examples",
|
|
@@ -394,7 +394,7 @@ class SuggestionExtractor:
|
|
|
394
394
|
id=f"tutorial-verification-{file_name}-{i}",
|
|
395
395
|
category="tutorial.verification",
|
|
396
396
|
severity="should_fix",
|
|
397
|
-
source={"section": "tutorial", "field": "result_verification_suggestions", "
|
|
397
|
+
source={"section": "tutorial", "field": "result_verification_suggestions", "score": verification_score},
|
|
398
398
|
target_files=[file_name],
|
|
399
399
|
action="full_replace",
|
|
400
400
|
anchor_hint="Results",
|
|
@@ -411,7 +411,7 @@ class SuggestionExtractor:
|
|
|
411
411
|
id=f"tutorial-performance-{file_name}-{i}",
|
|
412
412
|
category="tutorial.performance",
|
|
413
413
|
severity="should_fix",
|
|
414
|
-
source={"section": "tutorial", "field": "performance_and_resource_notes_suggestions", "
|
|
414
|
+
source={"section": "tutorial", "field": "performance_and_resource_notes_suggestions", "score": performance_score},
|
|
415
415
|
target_files=[file_name],
|
|
416
416
|
action="full_replace",
|
|
417
417
|
anchor_hint="Performance",
|
|
@@ -424,7 +424,7 @@ class SuggestionExtractor:
|
|
|
424
424
|
id=f"tutorial-consistency-{file_name}",
|
|
425
425
|
category="tutorial.consistency",
|
|
426
426
|
severity="should_fix",
|
|
427
|
-
source={"section": "tutorial", "field": "consistency", "
|
|
427
|
+
source={"section": "tutorial", "field": "consistency", "score": score},
|
|
428
428
|
target_files=[file_name],
|
|
429
429
|
action="full_replace",
|
|
430
430
|
anchor_hint=None,
|
|
@@ -34,6 +34,7 @@ class DocumentationGenerationManager:
|
|
|
34
34
|
self.output = OutputManager(base_outputs_dir=output_dir)
|
|
35
35
|
self.llm_gen = LLMContentGenerator(llm)
|
|
36
36
|
self.llm_cleaner = LLMCleaner(llm)
|
|
37
|
+
|
|
37
38
|
|
|
38
39
|
def print_step(self, step_name: str | None = None, step_output: str | None = None):
|
|
39
40
|
if self.step_callback is None:
|
|
@@ -143,54 +144,176 @@ class DocumentationGenerationManager:
|
|
|
143
144
|
self.print_step(step_name="ProcessingFile", step_output=f"Processing {fpath} ({processed_files}/{total_files}) - {len(edits)} edits")
|
|
144
145
|
|
|
145
146
|
original_content = files.get(fpath, "")
|
|
147
|
+
|
|
148
|
+
# Group suggestions by file to avoid duplicate generation
|
|
149
|
+
file_suggestions = []
|
|
150
|
+
full_replace_edits = []
|
|
151
|
+
section_edits = []
|
|
152
|
+
|
|
153
|
+
for e in edits:
|
|
154
|
+
suggestion = next((s for s in suggestions if s.id == e.suggestion_id), None) if e.suggestion_id else None
|
|
155
|
+
if suggestion:
|
|
156
|
+
file_suggestions.append(suggestion)
|
|
157
|
+
if e.edit_type == "full_replace":
|
|
158
|
+
full_replace_edits.append(e)
|
|
159
|
+
else:
|
|
160
|
+
section_edits.append(e)
|
|
161
|
+
|
|
162
|
+
# Debug: Save suggestion grouping info
|
|
163
|
+
import json
|
|
164
|
+
import os
|
|
165
|
+
from datetime import datetime
|
|
166
|
+
|
|
167
|
+
debug_dir = "outputs/debug_generation"
|
|
168
|
+
os.makedirs(debug_dir, exist_ok=True)
|
|
169
|
+
safe_filename = fpath.replace("/", "_").replace(".", "_")
|
|
170
|
+
|
|
171
|
+
grouping_info = {
|
|
172
|
+
"file_path": fpath,
|
|
173
|
+
"total_edits": len(edits),
|
|
174
|
+
"file_suggestions_count": len(file_suggestions),
|
|
175
|
+
"full_replace_edits_count": len(full_replace_edits),
|
|
176
|
+
"section_edits_count": len(section_edits),
|
|
177
|
+
"suggestions": [
|
|
178
|
+
{
|
|
179
|
+
"id": s.id,
|
|
180
|
+
"category": s.category,
|
|
181
|
+
"content_guidance": s.content_guidance[:200] + "..." if len(s.content_guidance or "") > 200 else s.content_guidance,
|
|
182
|
+
"target_files": s.target_files
|
|
183
|
+
} for s in file_suggestions
|
|
184
|
+
],
|
|
185
|
+
"timestamp": datetime.now().isoformat()
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
grouping_file = os.path.join(debug_dir, f"{safe_filename}_grouping.json")
|
|
189
|
+
with open(grouping_file, 'w', encoding='utf-8') as f:
|
|
190
|
+
json.dump(grouping_info, f, indent=2, ensure_ascii=False)
|
|
191
|
+
|
|
146
192
|
content = original_content
|
|
147
193
|
total_stats = {"added_lines": 0}
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
194
|
+
|
|
195
|
+
# CRITICAL: Generate content ONCE per file if there are full_replace edits
|
|
196
|
+
# All suggestions for this file are merged into a single evaluation report
|
|
197
|
+
# This prevents duplicate content generation
|
|
198
|
+
if full_replace_edits:
|
|
199
|
+
self.print_step(
|
|
200
|
+
step_name="GeneratingContent",
|
|
201
|
+
step_output=f"🔄 Generating full document for {fpath} with {len(file_suggestions)} suggestions using LLM (SINGLE CALL)..."
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
# Merge all suggestions for this file into a single evaluation report
|
|
205
|
+
# Format suggestions with clear numbering to help LLM understand they're separate improvements
|
|
206
|
+
suggestions_list = []
|
|
207
|
+
for idx, s in enumerate(file_suggestions, 1):
|
|
208
|
+
suggestions_list.append({
|
|
209
|
+
"suggestion_number": idx,
|
|
210
|
+
"category": s.category if hasattr(s, 'category') else "general",
|
|
211
|
+
"content_guidance": s.content_guidance
|
|
212
|
+
})
|
|
213
|
+
|
|
214
|
+
merged_evaluation_report = {
|
|
215
|
+
"total_suggestions": len(file_suggestions),
|
|
216
|
+
"integration_instruction": f"Integrate ALL {len(file_suggestions)} suggestions below into ONE cohesive document. Do NOT create {len(file_suggestions)} separate versions.",
|
|
217
|
+
"suggestions": suggestions_list
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
# Debug: Save merged evaluation report
|
|
221
|
+
merged_report_file = os.path.join(debug_dir, f"{safe_filename}_merged_report.json")
|
|
222
|
+
with open(merged_report_file, 'w', encoding='utf-8') as f:
|
|
223
|
+
json.dump(merged_evaluation_report, f, indent=2, ensure_ascii=False)
|
|
224
|
+
|
|
225
|
+
# Debug: Log that we're about to make a single generation call
|
|
226
|
+
debug_log_file = os.path.join(debug_dir, f"{safe_filename}_generation_log.txt")
|
|
227
|
+
with open(debug_log_file, 'a', encoding='utf-8') as f:
|
|
228
|
+
f.write(f"\n=== GENERATION CALL at {datetime.now().isoformat()} ===\n")
|
|
229
|
+
f.write(f"File: {fpath}\n")
|
|
230
|
+
f.write(f"Full replace edits: {len(full_replace_edits)}\n")
|
|
231
|
+
f.write(f"Total suggestions: {len(file_suggestions)}\n")
|
|
232
|
+
f.write(f"Merged into single call: YES\n")
|
|
233
|
+
f.write(f"Suggestion IDs: {[s.id for s in file_suggestions]}\n\n")
|
|
234
|
+
|
|
235
|
+
gen_content, gen_usage = self.llm_gen.generate_full_document(
|
|
236
|
+
target_file=fpath,
|
|
237
|
+
evaluation_report=merged_evaluation_report,
|
|
238
|
+
context=original_content,
|
|
239
|
+
original_content=original_content,
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
# Debug: Log completion
|
|
243
|
+
with open(debug_log_file, 'a', encoding='utf-8') as f:
|
|
244
|
+
f.write(f"Generation completed at {datetime.now().isoformat()}\n")
|
|
245
|
+
f.write(f"Content length: {len(gen_content) if isinstance(gen_content, str) else 0} characters\n")
|
|
246
|
+
f.write(f"Tokens used: {gen_usage.get('total_tokens', 0)}\n")
|
|
247
|
+
f.write(f"SUCCESS: {isinstance(gen_content, str) and gen_content}\n\n")
|
|
248
|
+
|
|
249
|
+
if isinstance(gen_content, str) and gen_content:
|
|
250
|
+
self.print_step(step_name="LLMFullDoc", step_output=f"✓ Generated full document for {fpath} ({gen_usage.get('total_tokens', 0)} tokens)")
|
|
251
|
+
# Apply the generated content to all full_replace edits
|
|
252
|
+
for e in full_replace_edits:
|
|
253
|
+
e.content_template = gen_content
|
|
254
|
+
content = gen_content
|
|
255
|
+
else:
|
|
256
|
+
# Fallback: try individual generation but only for the first edit to avoid duplicates
|
|
257
|
+
if full_replace_edits:
|
|
258
|
+
e = full_replace_edits[0] # Only process the first edit
|
|
259
|
+
suggestion = next((s for s in suggestions if s.id == e.suggestion_id), None) if e.suggestion_id else None
|
|
260
|
+
if suggestion and (not e.content_template or e.content_template.strip() == ""):
|
|
261
|
+
self.print_step(step_name="GeneratingContent", step_output=f"Fallback: Generating full document for {e.suggestion_id} using LLM...")
|
|
156
262
|
gen_content, gen_usage = self.llm_gen.generate_full_document(
|
|
157
263
|
target_file=e.file_path,
|
|
158
|
-
evaluation_report={"suggestion": suggestion.content_guidance
|
|
159
|
-
context=
|
|
264
|
+
evaluation_report={"suggestion": suggestion.content_guidance},
|
|
265
|
+
context=original_content,
|
|
266
|
+
original_content=original_content,
|
|
160
267
|
)
|
|
161
268
|
if isinstance(gen_content, str) and gen_content:
|
|
162
269
|
self.print_step(step_name="LLMFullDoc", step_output=f"✓ Generated full document for {e.suggestion_id} ({gen_usage.get('total_tokens', 0)} tokens)")
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
270
|
+
# Apply the same content to all full_replace edits
|
|
271
|
+
for edit in full_replace_edits:
|
|
272
|
+
edit.content_template = gen_content
|
|
273
|
+
content = gen_content
|
|
274
|
+
else:
|
|
275
|
+
# Handle section edits individually
|
|
276
|
+
for e in section_edits:
|
|
277
|
+
suggestion = next((s for s in suggestions if s.id == e.suggestion_id), None) if e.suggestion_id else None
|
|
278
|
+
if suggestion and (not e.content_template or e.content_template.strip() == ""):
|
|
279
|
+
self.print_step(step_name="GeneratingContent", step_output=f"Generating section for {e.suggestion_id} using LLM...")
|
|
280
|
+
gen_section, gen_usage = self.llm_gen.generate_section(
|
|
281
|
+
suggestion=suggestion,
|
|
282
|
+
style=plan.style_profile,
|
|
283
|
+
context=original_content,
|
|
284
|
+
)
|
|
285
|
+
if isinstance(gen_section, str) and gen_section:
|
|
286
|
+
self.print_step(step_name="LLMSection", step_output=f"✓ Generated section for {e.suggestion_id} ({gen_usage.get('total_tokens', 0)} tokens)")
|
|
287
|
+
# Ensure header present
|
|
288
|
+
if gen_section.lstrip().startswith("#"):
|
|
289
|
+
e.content_template = gen_section
|
|
290
|
+
else:
|
|
291
|
+
title = e.anchor.get('value', '').strip() or ''
|
|
292
|
+
e.content_template = f"## {title}\n\n{gen_section}" if title else gen_section
|
|
293
|
+
|
|
294
|
+
content, stats = self.renderer.apply_edit(content, e)
|
|
295
|
+
total_stats["added_lines"] = total_stats.get("added_lines", 0) + stats.get("added_lines", 0)
|
|
296
|
+
|
|
297
|
+
# Apply remaining edits that weren't full_replace
|
|
298
|
+
for e in edits:
|
|
299
|
+
if e.edit_type != "full_replace":
|
|
300
|
+
content, stats = self.renderer.apply_edit(content, e)
|
|
301
|
+
total_stats["added_lines"] = total_stats.get("added_lines", 0) + stats.get("added_lines", 0)
|
|
302
|
+
|
|
303
|
+
# After applying full document or section changes, run a general cleaner pass for all text files
|
|
304
|
+
# to fix markdown/formatting issues without changing meaning.
|
|
305
|
+
try:
|
|
306
|
+
if fpath.endswith((".md", ".rst", ".Rmd", ".Rd")) and content:
|
|
307
|
+
self.print_step(step_name="CleaningContent", step_output=f"Cleaning formatting for {fpath}...")
|
|
308
|
+
cleaned, _usage = self.llm_cleaner.clean_readme(content)
|
|
309
|
+
if isinstance(cleaned, str) and cleaned.strip():
|
|
310
|
+
content = cleaned
|
|
311
|
+
|
|
312
|
+
# LLM cleaner now handles markdown fences and unwanted summaries
|
|
188
313
|
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
pass
|
|
193
|
-
total_stats["added_lines"] = total_stats.get("added_lines", 0) + stats.get("added_lines", 0)
|
|
314
|
+
except Exception:
|
|
315
|
+
pass
|
|
316
|
+
|
|
194
317
|
revised[fpath] = content
|
|
195
318
|
diff_stats[fpath] = total_stats
|
|
196
319
|
self.print_step(step_name="RenderedFile", step_output=f"✓ Completed {fpath} - added {total_stats['added_lines']} lines")
|
|
@@ -373,7 +496,6 @@ class DocumentationGenerationManager:
|
|
|
373
496
|
for e in edits:
|
|
374
497
|
sug = next((s for s in suggestions if s.id == e.suggestion_id), None)
|
|
375
498
|
guidance = sug.content_guidance if sug else ""
|
|
376
|
-
evidence = sug.source.get("evidence", "") if sug and sug.source else ""
|
|
377
499
|
section = e.anchor.get('value', 'General improvements')
|
|
378
500
|
|
|
379
501
|
# Convert technical action names to user-friendly descriptions
|
|
@@ -445,61 +567,18 @@ class DocumentationGenerationManager:
|
|
|
445
567
|
|
|
446
568
|
# Show evaluation reasoning that triggered this improvement
|
|
447
569
|
if sug and sug.source:
|
|
448
|
-
evidence = sug.source.get("evidence", "")
|
|
449
570
|
score = sug.source.get("score", "")
|
|
450
571
|
category = sug.category or ""
|
|
451
572
|
|
|
452
573
|
# Format category for display (e.g., "readme.dependencies" -> "Dependencies")
|
|
453
574
|
category_display = category.split('.')[-1].replace('_', ' ').title() if category else ""
|
|
454
575
|
|
|
455
|
-
if
|
|
456
|
-
|
|
457
|
-
if isinstance(evidence, dict):
|
|
458
|
-
# Extract key information from dict evidence
|
|
459
|
-
evidence_text = evidence.get("dependency_suggestions", "") or evidence.get("evidence", "")
|
|
460
|
-
if not evidence_text:
|
|
461
|
-
evidence_text = f"Installation evaluation: {evidence.get('overall_score', 'Unknown')} score"
|
|
462
|
-
else:
|
|
463
|
-
evidence_text = str(evidence)
|
|
464
|
-
# Handle Python dict string evidence (from full_replace actions)
|
|
465
|
-
if evidence_text.startswith("{") and evidence_text.endswith("}"):
|
|
466
|
-
try:
|
|
467
|
-
import ast
|
|
468
|
-
evidence_dict = ast.literal_eval(evidence_text)
|
|
469
|
-
# Extract specific suggestions from the evaluation report
|
|
470
|
-
dep_sugg = evidence_dict.get("dependency_suggestions", "")
|
|
471
|
-
hw_req = evidence_dict.get("hardware_requirements", False)
|
|
472
|
-
compat_os = evidence_dict.get("compatible_os", True)
|
|
473
|
-
overall_score = evidence_dict.get("overall_score", "")
|
|
474
|
-
|
|
475
|
-
# Build specific reason based on evaluation findings
|
|
476
|
-
reasons = []
|
|
477
|
-
if dep_sugg:
|
|
478
|
-
reasons.append(f"Dependencies: {dep_sugg}")
|
|
479
|
-
if hw_req is False:
|
|
480
|
-
reasons.append("Hardware requirements not specified")
|
|
481
|
-
if compat_os is False:
|
|
482
|
-
reasons.append("Operating system compatibility unclear")
|
|
483
|
-
if overall_score and overall_score not in ("Excellent", "Good"):
|
|
484
|
-
reasons.append(f"Overall score: {overall_score}")
|
|
485
|
-
|
|
486
|
-
if reasons:
|
|
487
|
-
evidence_text = "; ".join(reasons)
|
|
488
|
-
else:
|
|
489
|
-
evidence_text = f"Installation evaluation score: {overall_score}"
|
|
490
|
-
except:
|
|
491
|
-
evidence_text = "Installation documentation needs improvement"
|
|
492
|
-
|
|
493
|
-
if score and category_display:
|
|
494
|
-
lines.append(f" - *Reason:* [{category_display} - {score}] {evidence_text}")
|
|
495
|
-
elif score:
|
|
496
|
-
lines.append(f" - *Reason:* [{score}] {evidence_text}")
|
|
497
|
-
elif category_display:
|
|
498
|
-
lines.append(f" - *Reason:* [{category_display}] {evidence_text}")
|
|
499
|
-
else:
|
|
500
|
-
lines.append(f" - *Reason:* {evidence_text}")
|
|
576
|
+
if score and category_display:
|
|
577
|
+
lines.append(f" - *Reason:* [{category_display} - {score}]")
|
|
501
578
|
elif score:
|
|
502
|
-
lines.append(f" - *Reason:*
|
|
579
|
+
lines.append(f" - *Reason:* [{score}]")
|
|
580
|
+
elif category_display:
|
|
581
|
+
lines.append(f" - *Reason:* [{category_display}]")
|
|
503
582
|
|
|
504
583
|
# Show what was actually implemented (different from reason)
|
|
505
584
|
if guidance:
|