bioguider 0.2.52__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bioguider/__init__.py +0 -0
- bioguider/agents/__init__.py +0 -0
- bioguider/agents/agent_task.py +92 -0
- bioguider/agents/agent_tools.py +176 -0
- bioguider/agents/agent_utils.py +504 -0
- bioguider/agents/collection_execute_step.py +182 -0
- bioguider/agents/collection_observe_step.py +125 -0
- bioguider/agents/collection_plan_step.py +156 -0
- bioguider/agents/collection_task.py +184 -0
- bioguider/agents/collection_task_utils.py +142 -0
- bioguider/agents/common_agent.py +137 -0
- bioguider/agents/common_agent_2step.py +215 -0
- bioguider/agents/common_conversation.py +61 -0
- bioguider/agents/common_step.py +85 -0
- bioguider/agents/consistency_collection_step.py +102 -0
- bioguider/agents/consistency_evaluation_task.py +57 -0
- bioguider/agents/consistency_evaluation_task_utils.py +14 -0
- bioguider/agents/consistency_observe_step.py +110 -0
- bioguider/agents/consistency_query_step.py +77 -0
- bioguider/agents/dockergeneration_execute_step.py +186 -0
- bioguider/agents/dockergeneration_observe_step.py +154 -0
- bioguider/agents/dockergeneration_plan_step.py +158 -0
- bioguider/agents/dockergeneration_task.py +158 -0
- bioguider/agents/dockergeneration_task_utils.py +220 -0
- bioguider/agents/evaluation_installation_task.py +270 -0
- bioguider/agents/evaluation_readme_task.py +767 -0
- bioguider/agents/evaluation_submission_requirements_task.py +172 -0
- bioguider/agents/evaluation_task.py +206 -0
- bioguider/agents/evaluation_tutorial_task.py +169 -0
- bioguider/agents/evaluation_tutorial_task_prompts.py +187 -0
- bioguider/agents/evaluation_userguide_prompts.py +179 -0
- bioguider/agents/evaluation_userguide_task.py +154 -0
- bioguider/agents/evaluation_utils.py +127 -0
- bioguider/agents/identification_execute_step.py +181 -0
- bioguider/agents/identification_observe_step.py +104 -0
- bioguider/agents/identification_plan_step.py +140 -0
- bioguider/agents/identification_task.py +270 -0
- bioguider/agents/identification_task_utils.py +22 -0
- bioguider/agents/peo_common_step.py +64 -0
- bioguider/agents/prompt_utils.py +253 -0
- bioguider/agents/python_ast_repl_tool.py +69 -0
- bioguider/agents/rag_collection_task.py +130 -0
- bioguider/conversation.py +67 -0
- bioguider/database/code_structure_db.py +500 -0
- bioguider/database/summarized_file_db.py +146 -0
- bioguider/generation/__init__.py +39 -0
- bioguider/generation/benchmark_metrics.py +610 -0
- bioguider/generation/change_planner.py +189 -0
- bioguider/generation/document_renderer.py +157 -0
- bioguider/generation/llm_cleaner.py +67 -0
- bioguider/generation/llm_content_generator.py +1128 -0
- bioguider/generation/llm_injector.py +809 -0
- bioguider/generation/models.py +85 -0
- bioguider/generation/output_manager.py +74 -0
- bioguider/generation/repo_reader.py +37 -0
- bioguider/generation/report_loader.py +166 -0
- bioguider/generation/style_analyzer.py +36 -0
- bioguider/generation/suggestion_extractor.py +436 -0
- bioguider/generation/test_metrics.py +189 -0
- bioguider/managers/benchmark_manager.py +785 -0
- bioguider/managers/evaluation_manager.py +215 -0
- bioguider/managers/generation_manager.py +686 -0
- bioguider/managers/generation_test_manager.py +107 -0
- bioguider/managers/generation_test_manager_v2.py +525 -0
- bioguider/rag/__init__.py +0 -0
- bioguider/rag/config.py +117 -0
- bioguider/rag/data_pipeline.py +651 -0
- bioguider/rag/embedder.py +24 -0
- bioguider/rag/rag.py +138 -0
- bioguider/settings.py +103 -0
- bioguider/utils/code_structure_builder.py +59 -0
- bioguider/utils/constants.py +135 -0
- bioguider/utils/default.gitignore +140 -0
- bioguider/utils/file_utils.py +215 -0
- bioguider/utils/gitignore_checker.py +175 -0
- bioguider/utils/notebook_utils.py +117 -0
- bioguider/utils/pyphen_utils.py +73 -0
- bioguider/utils/python_file_handler.py +65 -0
- bioguider/utils/r_file_handler.py +551 -0
- bioguider/utils/utils.py +163 -0
- bioguider-0.2.52.dist-info/LICENSE +21 -0
- bioguider-0.2.52.dist-info/METADATA +51 -0
- bioguider-0.2.52.dist-info/RECORD +84 -0
- bioguider-0.2.52.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,436 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import List
|
|
4
|
+
from .models import EvaluationReport, SuggestionItem
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class SuggestionExtractor:
|
|
8
|
+
def extract(self, report: EvaluationReport) -> List[SuggestionItem]:
|
|
9
|
+
suggestions: List[SuggestionItem] = []
|
|
10
|
+
|
|
11
|
+
# README-related suggestions - Extract specific suggestions
|
|
12
|
+
if report.readme_evaluation:
|
|
13
|
+
for file_name, evaluation in report.readme_evaluation.items():
|
|
14
|
+
structured = evaluation.get("structured_evaluation") if isinstance(evaluation, dict) else None
|
|
15
|
+
if structured:
|
|
16
|
+
# Extract specific dependency suggestions
|
|
17
|
+
dep_score = structured.get("dependency_score")
|
|
18
|
+
dep_suggestions = structured.get("dependency_suggestions")
|
|
19
|
+
if dep_score in ("Poor", "Fair") and dep_suggestions:
|
|
20
|
+
suggestions.append(SuggestionItem(
|
|
21
|
+
id=f"readme-dependencies-{file_name}",
|
|
22
|
+
category="readme.dependencies",
|
|
23
|
+
severity="should_fix",
|
|
24
|
+
source={"section": "readme", "field": "dependency_suggestions", "score": dep_score},
|
|
25
|
+
target_files=[file_name],
|
|
26
|
+
action="add_dependencies_section",
|
|
27
|
+
anchor_hint="Dependencies",
|
|
28
|
+
content_guidance=dep_suggestions,
|
|
29
|
+
))
|
|
30
|
+
|
|
31
|
+
# Extract specific hardware/software suggestions
|
|
32
|
+
hw_sw_score = structured.get("hardware_and_software_spec_score")
|
|
33
|
+
hw_sw_suggestions = structured.get("hardware_and_software_spec_suggestions")
|
|
34
|
+
if hw_sw_score in ("Poor", "Fair") and hw_sw_suggestions:
|
|
35
|
+
suggestions.append(SuggestionItem(
|
|
36
|
+
id=f"readme-hardware-{file_name}",
|
|
37
|
+
category="readme.hardware",
|
|
38
|
+
severity="should_fix",
|
|
39
|
+
source={"section": "readme", "field": "hardware_and_software_spec_suggestions", "score": hw_sw_score},
|
|
40
|
+
target_files=[file_name],
|
|
41
|
+
action="add_system_requirements_section",
|
|
42
|
+
anchor_hint="System Requirements",
|
|
43
|
+
content_guidance=hw_sw_suggestions,
|
|
44
|
+
))
|
|
45
|
+
|
|
46
|
+
# Extract specific project purpose suggestions
|
|
47
|
+
purpose_score = structured.get("project_purpose_score")
|
|
48
|
+
purpose_suggestions = structured.get("project_purpose_suggestions")
|
|
49
|
+
if purpose_score in ("Poor", "Fair") and purpose_suggestions:
|
|
50
|
+
suggestions.append(SuggestionItem(
|
|
51
|
+
id=f"readme-purpose-{file_name}",
|
|
52
|
+
category="readme.purpose",
|
|
53
|
+
severity="should_fix",
|
|
54
|
+
source={"section": "readme", "field": "project_purpose_suggestions", "score": purpose_score},
|
|
55
|
+
target_files=[file_name],
|
|
56
|
+
action="full_replace",
|
|
57
|
+
anchor_hint="Overview",
|
|
58
|
+
content_guidance=purpose_suggestions,
|
|
59
|
+
))
|
|
60
|
+
|
|
61
|
+
# Extract specific readability suggestions
|
|
62
|
+
readability_score = structured.get("readability_score")
|
|
63
|
+
readability_suggestions = structured.get("readability_suggestions")
|
|
64
|
+
if readability_score in ("Poor", "Fair") and readability_suggestions:
|
|
65
|
+
suggestions.append(SuggestionItem(
|
|
66
|
+
id=f"readme-readability-{file_name}",
|
|
67
|
+
category="readme.readability",
|
|
68
|
+
severity="should_fix",
|
|
69
|
+
source={"section": "readme", "field": "readability_suggestions", "score": readability_score},
|
|
70
|
+
target_files=[file_name],
|
|
71
|
+
action="full_replace",
|
|
72
|
+
anchor_hint="Introduction",
|
|
73
|
+
content_guidance=readability_suggestions,
|
|
74
|
+
))
|
|
75
|
+
|
|
76
|
+
# Intro cleanup / overview enhancement beyond explicit suggestions
|
|
77
|
+
suggestions.append(SuggestionItem(
|
|
78
|
+
id=f"readme-intro-cleanup-{file_name}",
|
|
79
|
+
category="readme.intro_cleanup",
|
|
80
|
+
severity="should_fix",
|
|
81
|
+
source={"section": "readme", "field": "overview", "score": "Fair"},
|
|
82
|
+
target_files=[file_name],
|
|
83
|
+
action="replace_intro",
|
|
84
|
+
anchor_hint="Overview",
|
|
85
|
+
content_guidance="Rewrite the opening summary to be clear, neutral, and typo-free.",
|
|
86
|
+
))
|
|
87
|
+
# Dependency clarity - prioritize specific suggestions (avoid duplicates)
|
|
88
|
+
dep_score = structured.get("dependency_score")
|
|
89
|
+
dep_sugg = structured.get("dependency_suggestions")
|
|
90
|
+
if dep_sugg and dep_score in ("Poor", "Fair"): # Only if not already added above
|
|
91
|
+
suggestions.append(SuggestionItem(
|
|
92
|
+
id=f"readme-dependencies-clarify-{file_name}",
|
|
93
|
+
category="readme.dependencies",
|
|
94
|
+
severity="should_fix",
|
|
95
|
+
source={"section": "readme", "field": "dependencies", "score": dep_score},
|
|
96
|
+
target_files=[file_name],
|
|
97
|
+
action="add_dependencies_section",
|
|
98
|
+
anchor_hint="Dependencies",
|
|
99
|
+
content_guidance=str(dep_sugg),
|
|
100
|
+
))
|
|
101
|
+
elif dep_score in ("Poor", "Fair"): # Fallback to score-based approach
|
|
102
|
+
suggestions.append(SuggestionItem(
|
|
103
|
+
id=f"readme-dependencies-fallback-{file_name}",
|
|
104
|
+
category="readme.dependencies",
|
|
105
|
+
severity="should_fix",
|
|
106
|
+
source={"section": "readme", "field": "dependencies", "score": dep_score},
|
|
107
|
+
target_files=[file_name],
|
|
108
|
+
action="add_dependencies_section",
|
|
109
|
+
anchor_hint="Dependencies",
|
|
110
|
+
content_guidance="List R library dependencies and provide installation guide.",
|
|
111
|
+
))
|
|
112
|
+
|
|
113
|
+
# Hardware/Software specs - prioritize specific suggestions (avoid duplicates)
|
|
114
|
+
hw_score = structured.get("hardware_and_software_spec_score")
|
|
115
|
+
hw_sugg = structured.get("hardware_and_software_spec_suggestions")
|
|
116
|
+
if hw_sugg and hw_score in ("Poor", "Fair"): # Only if not already added above
|
|
117
|
+
suggestions.append(SuggestionItem(
|
|
118
|
+
id=f"readme-sysreq-clarify-{file_name}",
|
|
119
|
+
category="readme.system_requirements",
|
|
120
|
+
severity="should_fix",
|
|
121
|
+
source={"section": "readme", "field": "hardware_and_software", "score": hw_score},
|
|
122
|
+
target_files=[file_name],
|
|
123
|
+
action="add_system_requirements_section",
|
|
124
|
+
anchor_hint="System Requirements",
|
|
125
|
+
content_guidance=str(hw_sugg),
|
|
126
|
+
))
|
|
127
|
+
elif hw_score in ("Poor", "Fair"): # Fallback to score-based approach
|
|
128
|
+
suggestions.append(SuggestionItem(
|
|
129
|
+
id=f"readme-sysreq-fallback-{file_name}",
|
|
130
|
+
category="readme.system_requirements",
|
|
131
|
+
severity="should_fix",
|
|
132
|
+
source={"section": "readme", "field": "hardware_and_software", "score": hw_score},
|
|
133
|
+
target_files=[file_name],
|
|
134
|
+
action="add_system_requirements_section",
|
|
135
|
+
anchor_hint="System Requirements",
|
|
136
|
+
content_guidance="Specify R version requirements, recommend RAM/CPU configurations, and tailor installation instructions for platforms.",
|
|
137
|
+
))
|
|
138
|
+
|
|
139
|
+
# License mention
|
|
140
|
+
lic_sugg = structured.get("license_suggestions")
|
|
141
|
+
lic_score = structured.get("license_score")
|
|
142
|
+
if lic_sugg and lic_score:
|
|
143
|
+
suggestions.append(SuggestionItem(
|
|
144
|
+
id=f"readme-license-{file_name}",
|
|
145
|
+
category="readme.license",
|
|
146
|
+
severity="nice_to_have",
|
|
147
|
+
source={"section": "readme", "field": "license"},
|
|
148
|
+
target_files=[file_name],
|
|
149
|
+
action="mention_license_section",
|
|
150
|
+
anchor_hint="License",
|
|
151
|
+
content_guidance=str(lic_sugg),
|
|
152
|
+
))
|
|
153
|
+
|
|
154
|
+
# Readability structuring - prioritize specific suggestions (avoid duplicates)
|
|
155
|
+
read_sugg = structured.get("readability_suggestions")
|
|
156
|
+
read_score = structured.get("readability_score")
|
|
157
|
+
if read_sugg and read_score in ("Poor", "Fair"): # Only if not already added above
|
|
158
|
+
suggestions.append(SuggestionItem(
|
|
159
|
+
id=f"readme-structure-clarify-{file_name}",
|
|
160
|
+
category="readme.readability",
|
|
161
|
+
severity="should_fix",
|
|
162
|
+
source={"section": "readability", "field": "readability_suggestions", "score": read_score},
|
|
163
|
+
target_files=[file_name],
|
|
164
|
+
action="normalize_headings_structure",
|
|
165
|
+
anchor_hint="Installation",
|
|
166
|
+
content_guidance=str(read_sugg),
|
|
167
|
+
))
|
|
168
|
+
elif read_score in ("Poor", "Fair"): # Fallback to score-based approach
|
|
169
|
+
suggestions.append(SuggestionItem(
|
|
170
|
+
id=f"readme-structure-fallback-{file_name}",
|
|
171
|
+
category="readme.readability",
|
|
172
|
+
severity="should_fix",
|
|
173
|
+
source={"section": "readability", "field": "readability_score", "score": read_score},
|
|
174
|
+
target_files=[file_name],
|
|
175
|
+
action="normalize_headings_structure",
|
|
176
|
+
anchor_hint="Installation",
|
|
177
|
+
content_guidance="Improve readability with better structure and formatting.",
|
|
178
|
+
))
|
|
179
|
+
# If suggestions mention Usage, add a usage section
|
|
180
|
+
if isinstance(read_sugg, str) and "Usage" in read_sugg:
|
|
181
|
+
suggestions.append(SuggestionItem(
|
|
182
|
+
id=f"readme-usage-{file_name}",
|
|
183
|
+
category="readme.usage",
|
|
184
|
+
severity="nice_to_have",
|
|
185
|
+
source={"section": "readability", "field": "usage"},
|
|
186
|
+
target_files=[file_name],
|
|
187
|
+
action="add_usage_section",
|
|
188
|
+
anchor_hint="Usage",
|
|
189
|
+
content_guidance="Provide a brief usage example and key commands.",
|
|
190
|
+
))
|
|
191
|
+
|
|
192
|
+
# Installation-related suggestions
|
|
193
|
+
if report.installation_evaluation:
|
|
194
|
+
structured = None
|
|
195
|
+
if isinstance(report.installation_evaluation, dict):
|
|
196
|
+
structured = report.installation_evaluation.get("structured_evaluation")
|
|
197
|
+
if structured:
|
|
198
|
+
# Use full_replace mode for all installation files
|
|
199
|
+
dep_sugg = structured.get("dependency_suggestions")
|
|
200
|
+
hw_req = structured.get("hardware_requirements")
|
|
201
|
+
compat_os = structured.get("compatible_os")
|
|
202
|
+
overall = structured.get("overall_score")
|
|
203
|
+
|
|
204
|
+
# Trigger full_replace for all installation files when needed
|
|
205
|
+
if overall in ("Poor", "Fair") or hw_req is False or compat_os is False or dep_sugg:
|
|
206
|
+
for target in report.installation_files or []:
|
|
207
|
+
suggestions.append(SuggestionItem(
|
|
208
|
+
id=f"install-full-replace-{target}",
|
|
209
|
+
category="installation.full_replace",
|
|
210
|
+
severity="should_fix",
|
|
211
|
+
source={"section": "installation", "field": "overall"},
|
|
212
|
+
target_files=[target],
|
|
213
|
+
action="full_replace",
|
|
214
|
+
anchor_hint=None,
|
|
215
|
+
content_guidance="Comprehensive rewrite preserving original structure while adding improved dependencies, hardware requirements, and installation instructions.",
|
|
216
|
+
))
|
|
217
|
+
|
|
218
|
+
# Submission requirements could drive expected output/dataset sections; use only if in files list
|
|
219
|
+
# Keep minimal to avoid speculative content
|
|
220
|
+
|
|
221
|
+
# Userguide/API docs suggestions (new interface) - Extract specific suggestions
|
|
222
|
+
if getattr(report, "userguide_evaluation", None) and isinstance(report.userguide_evaluation, dict):
|
|
223
|
+
for file_name, eval_block in report.userguide_evaluation.items():
|
|
224
|
+
ug_eval = eval_block.get("user_guide_evaluation") if isinstance(eval_block, dict) else None
|
|
225
|
+
consistency_eval = eval_block.get("consistency_evaluation") if isinstance(eval_block, dict) else None
|
|
226
|
+
|
|
227
|
+
if isinstance(ug_eval, dict):
|
|
228
|
+
# Extract specific readability suggestions
|
|
229
|
+
readability_score = ug_eval.get("readability_score", "")
|
|
230
|
+
readability_suggestions = ug_eval.get("readability_suggestions", [])
|
|
231
|
+
if readability_score in ("Poor", "Fair") and readability_suggestions:
|
|
232
|
+
for i, suggestion in enumerate(readability_suggestions):
|
|
233
|
+
if isinstance(suggestion, str) and suggestion.strip():
|
|
234
|
+
suggestions.append(SuggestionItem(
|
|
235
|
+
id=f"userguide-readability-{file_name}-{i}",
|
|
236
|
+
category="userguide.readability",
|
|
237
|
+
severity="should_fix",
|
|
238
|
+
source={"section": "userguide", "field": "readability_suggestions", "score": readability_score},
|
|
239
|
+
target_files=[file_name],
|
|
240
|
+
action="full_replace",
|
|
241
|
+
anchor_hint=f"Readability-{i+1}",
|
|
242
|
+
content_guidance=suggestion,
|
|
243
|
+
))
|
|
244
|
+
|
|
245
|
+
# Extract specific context and purpose suggestions
|
|
246
|
+
context_score = ug_eval.get("context_and_purpose_score", "")
|
|
247
|
+
context_suggestions = ug_eval.get("context_and_purpose_suggestions", [])
|
|
248
|
+
if context_score in ("Poor", "Fair") and context_suggestions:
|
|
249
|
+
for i, suggestion in enumerate(context_suggestions):
|
|
250
|
+
if isinstance(suggestion, str) and suggestion.strip():
|
|
251
|
+
suggestions.append(SuggestionItem(
|
|
252
|
+
id=f"userguide-context-{file_name}-{i}",
|
|
253
|
+
category="userguide.context",
|
|
254
|
+
severity="should_fix",
|
|
255
|
+
source={"section": "userguide", "field": "context_and_purpose_suggestions", "score": context_score},
|
|
256
|
+
target_files=[file_name],
|
|
257
|
+
action="full_replace",
|
|
258
|
+
anchor_hint=f"Context-{i+1}",
|
|
259
|
+
content_guidance=suggestion,
|
|
260
|
+
))
|
|
261
|
+
|
|
262
|
+
# Extract specific error handling suggestions
|
|
263
|
+
error_score = ug_eval.get("error_handling_score", "")
|
|
264
|
+
error_suggestions = ug_eval.get("error_handling_suggestions", [])
|
|
265
|
+
if error_score in ("Poor", "Fair") and error_suggestions:
|
|
266
|
+
for i, suggestion in enumerate(error_suggestions):
|
|
267
|
+
if isinstance(suggestion, str) and suggestion.strip():
|
|
268
|
+
suggestions.append(SuggestionItem(
|
|
269
|
+
id=f"userguide-error-{file_name}-{i}",
|
|
270
|
+
category="userguide.error_handling",
|
|
271
|
+
severity="should_fix",
|
|
272
|
+
source={"section": "userguide", "field": "error_handling_suggestions", "score": error_score},
|
|
273
|
+
target_files=[file_name],
|
|
274
|
+
action="full_replace",
|
|
275
|
+
anchor_hint=f"Error-Handling-{i+1}",
|
|
276
|
+
content_guidance=suggestion,
|
|
277
|
+
))
|
|
278
|
+
|
|
279
|
+
# If consistency issues present, add targeted improvements
|
|
280
|
+
if isinstance(consistency_eval, dict):
|
|
281
|
+
score = consistency_eval.get("score")
|
|
282
|
+
if score in ("Poor", "Fair"):
|
|
283
|
+
suggestions.append(SuggestionItem(
|
|
284
|
+
id=f"userguide-consistency-{file_name}",
|
|
285
|
+
category="userguide.consistency",
|
|
286
|
+
severity="should_fix",
|
|
287
|
+
source={"section": "userguide", "field": "consistency", "score": score},
|
|
288
|
+
target_files=[file_name],
|
|
289
|
+
action="full_replace",
|
|
290
|
+
anchor_hint="Examples",
|
|
291
|
+
content_guidance="Improve consistency in examples, terminology, and formatting based on evaluation report.",
|
|
292
|
+
))
|
|
293
|
+
|
|
294
|
+
# Tutorials/vignettes suggestions (new interface) - ONLY Poor/Fair scores
|
|
295
|
+
if getattr(report, "tutorial_evaluation", None) and isinstance(report.tutorial_evaluation, dict):
|
|
296
|
+
for file_name, eval_block in report.tutorial_evaluation.items():
|
|
297
|
+
tut_eval = eval_block.get("tutorial_evaluation") if isinstance(eval_block, dict) else None
|
|
298
|
+
consistency_eval = eval_block.get("consistency_evaluation") if isinstance(eval_block, dict) else None
|
|
299
|
+
if isinstance(tut_eval, dict):
|
|
300
|
+
# Only extract suggestions for Poor/Fair scores
|
|
301
|
+
|
|
302
|
+
# Readability suggestions - only if score is Poor/Fair
|
|
303
|
+
readability_score = tut_eval.get("readability_score", "")
|
|
304
|
+
readability_suggestions = tut_eval.get("readability_suggestions", [])
|
|
305
|
+
if readability_score in ("Poor", "Fair") and readability_suggestions:
|
|
306
|
+
for i, suggestion in enumerate(readability_suggestions):
|
|
307
|
+
if isinstance(suggestion, str) and suggestion.strip():
|
|
308
|
+
suggestions.append(SuggestionItem(
|
|
309
|
+
id=f"tutorial-readability-{file_name}-{i}",
|
|
310
|
+
category="tutorial.readability",
|
|
311
|
+
severity="should_fix",
|
|
312
|
+
source={"section": "tutorial", "field": "readability_suggestions", "score": readability_score},
|
|
313
|
+
target_files=[file_name],
|
|
314
|
+
action="full_replace",
|
|
315
|
+
anchor_hint="Introduction",
|
|
316
|
+
content_guidance=suggestion,
|
|
317
|
+
))
|
|
318
|
+
|
|
319
|
+
# Setup and dependencies suggestions - only if score is Poor/Fair
|
|
320
|
+
setup_score = tut_eval.get("setup_and_dependencies_score", "")
|
|
321
|
+
setup_suggestions = tut_eval.get("setup_and_dependencies_suggestions", [])
|
|
322
|
+
if setup_score in ("Poor", "Fair") and setup_suggestions:
|
|
323
|
+
for i, suggestion in enumerate(setup_suggestions):
|
|
324
|
+
if isinstance(suggestion, str) and suggestion.strip():
|
|
325
|
+
suggestions.append(SuggestionItem(
|
|
326
|
+
id=f"tutorial-setup-{file_name}-{i}",
|
|
327
|
+
category="tutorial.setup",
|
|
328
|
+
severity="should_fix",
|
|
329
|
+
source={"section": "tutorial", "field": "setup_and_dependencies_suggestions", "score": setup_score},
|
|
330
|
+
target_files=[file_name],
|
|
331
|
+
action="full_replace",
|
|
332
|
+
anchor_hint="Setup",
|
|
333
|
+
content_guidance=suggestion,
|
|
334
|
+
))
|
|
335
|
+
|
|
336
|
+
# Reproducibility suggestions - only if score is Poor/Fair
|
|
337
|
+
reproducibility_score = tut_eval.get("reproducibility_score", "")
|
|
338
|
+
reproducibility_suggestions = tut_eval.get("reproducibility_suggestions", [])
|
|
339
|
+
if reproducibility_score in ("Poor", "Fair") and reproducibility_suggestions:
|
|
340
|
+
for i, suggestion in enumerate(reproducibility_suggestions):
|
|
341
|
+
if isinstance(suggestion, str) and suggestion.strip():
|
|
342
|
+
suggestions.append(SuggestionItem(
|
|
343
|
+
id=f"tutorial-reproducibility-{file_name}-{i}",
|
|
344
|
+
category="tutorial.reproducibility",
|
|
345
|
+
severity="should_fix",
|
|
346
|
+
source={"section": "tutorial", "field": "reproducibility_suggestions", "score": reproducibility_score},
|
|
347
|
+
target_files=[file_name],
|
|
348
|
+
action="full_replace",
|
|
349
|
+
anchor_hint="Setup",
|
|
350
|
+
content_guidance=suggestion,
|
|
351
|
+
))
|
|
352
|
+
|
|
353
|
+
# Structure and navigation suggestions - only if score is Poor/Fair
|
|
354
|
+
structure_score = tut_eval.get("structure_and_navigation_score", "")
|
|
355
|
+
structure_suggestions = tut_eval.get("structure_and_navigation_suggestions", [])
|
|
356
|
+
if structure_score in ("Poor", "Fair") and structure_suggestions:
|
|
357
|
+
for i, suggestion in enumerate(structure_suggestions):
|
|
358
|
+
if isinstance(suggestion, str) and suggestion.strip():
|
|
359
|
+
suggestions.append(SuggestionItem(
|
|
360
|
+
id=f"tutorial-structure-{file_name}-{i}",
|
|
361
|
+
category="tutorial.structure",
|
|
362
|
+
severity="should_fix",
|
|
363
|
+
source={"section": "tutorial", "field": "structure_and_navigation_suggestions", "score": structure_score},
|
|
364
|
+
target_files=[file_name],
|
|
365
|
+
action="full_replace",
|
|
366
|
+
anchor_hint="Introduction",
|
|
367
|
+
content_guidance=suggestion,
|
|
368
|
+
))
|
|
369
|
+
|
|
370
|
+
# Executable code quality suggestions - only if score is Poor/Fair
|
|
371
|
+
code_score = tut_eval.get("executable_code_quality_score", "")
|
|
372
|
+
code_suggestions = tut_eval.get("executable_code_quality_suggestions", [])
|
|
373
|
+
if code_score in ("Poor", "Fair") and code_suggestions:
|
|
374
|
+
for i, suggestion in enumerate(code_suggestions):
|
|
375
|
+
if isinstance(suggestion, str) and suggestion.strip():
|
|
376
|
+
suggestions.append(SuggestionItem(
|
|
377
|
+
id=f"tutorial-code-{file_name}-{i}",
|
|
378
|
+
category="tutorial.code_quality",
|
|
379
|
+
severity="should_fix",
|
|
380
|
+
source={"section": "tutorial", "field": "executable_code_quality_suggestions", "score": code_score},
|
|
381
|
+
target_files=[file_name],
|
|
382
|
+
action="full_replace",
|
|
383
|
+
anchor_hint="Code Examples",
|
|
384
|
+
content_guidance=suggestion,
|
|
385
|
+
))
|
|
386
|
+
|
|
387
|
+
# Result verification suggestions - only if score is Poor/Fair
|
|
388
|
+
verification_score = tut_eval.get("result_verification_score", "")
|
|
389
|
+
verification_suggestions = tut_eval.get("result_verification_suggestions", [])
|
|
390
|
+
if verification_score in ("Poor", "Fair") and verification_suggestions:
|
|
391
|
+
for i, suggestion in enumerate(verification_suggestions):
|
|
392
|
+
if isinstance(suggestion, str) and suggestion.strip():
|
|
393
|
+
suggestions.append(SuggestionItem(
|
|
394
|
+
id=f"tutorial-verification-{file_name}-{i}",
|
|
395
|
+
category="tutorial.verification",
|
|
396
|
+
severity="should_fix",
|
|
397
|
+
source={"section": "tutorial", "field": "result_verification_suggestions", "score": verification_score},
|
|
398
|
+
target_files=[file_name],
|
|
399
|
+
action="full_replace",
|
|
400
|
+
anchor_hint="Results",
|
|
401
|
+
content_guidance=suggestion,
|
|
402
|
+
))
|
|
403
|
+
|
|
404
|
+
# Performance and resource notes suggestions - only if score is Poor/Fair
|
|
405
|
+
performance_score = tut_eval.get("performance_and_resource_notes_score", "")
|
|
406
|
+
performance_suggestions = tut_eval.get("performance_and_resource_notes_suggestions", [])
|
|
407
|
+
if performance_score in ("Poor", "Fair") and performance_suggestions:
|
|
408
|
+
for i, suggestion in enumerate(performance_suggestions):
|
|
409
|
+
if isinstance(suggestion, str) and suggestion.strip():
|
|
410
|
+
suggestions.append(SuggestionItem(
|
|
411
|
+
id=f"tutorial-performance-{file_name}-{i}",
|
|
412
|
+
category="tutorial.performance",
|
|
413
|
+
severity="should_fix",
|
|
414
|
+
source={"section": "tutorial", "field": "performance_and_resource_notes_suggestions", "score": performance_score},
|
|
415
|
+
target_files=[file_name],
|
|
416
|
+
action="full_replace",
|
|
417
|
+
anchor_hint="Performance",
|
|
418
|
+
content_guidance=suggestion,
|
|
419
|
+
))
|
|
420
|
+
if isinstance(consistency_eval, dict):
|
|
421
|
+
score = consistency_eval.get("score")
|
|
422
|
+
if score in ("Poor", "Fair"):
|
|
423
|
+
suggestions.append(SuggestionItem(
|
|
424
|
+
id=f"tutorial-consistency-{file_name}",
|
|
425
|
+
category="tutorial.consistency",
|
|
426
|
+
severity="should_fix",
|
|
427
|
+
source={"section": "tutorial", "field": "consistency", "score": score},
|
|
428
|
+
target_files=[file_name],
|
|
429
|
+
action="full_replace",
|
|
430
|
+
anchor_hint=None,
|
|
431
|
+
content_guidance="Align tutorial with code definitions; fix inconsistencies as per report.",
|
|
432
|
+
))
|
|
433
|
+
|
|
434
|
+
return suggestions
|
|
435
|
+
|
|
436
|
+
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import re
|
|
5
|
+
from difflib import SequenceMatcher
|
|
6
|
+
from typing import Dict, Any, List, Tuple
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _lev(a: str, b: str) -> float:
|
|
10
|
+
return 1.0 - SequenceMatcher(None, a, b).ratio()
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _count_markdown_issues(text: str) -> int:
|
|
14
|
+
issues = 0
|
|
15
|
+
# naive checks
|
|
16
|
+
issues += text.count("[ # unbalanced badge syntax
|
|
17
|
+
issues += text.count("[ ") # bad link spacing
|
|
18
|
+
issues += len(re.findall(r"^#[^#\s]", text, flags=re.M)) # malformed header
|
|
19
|
+
return max(0, issues)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def evaluate_fixes(baseline: str, corrupted: str, revised: str, injection_manifest: Dict[str, Any]) -> Dict[str, Any]:
|
|
23
|
+
per_error: List[Dict[str, Any]] = []
|
|
24
|
+
per_cat: Dict[str, Dict[str, int]] = {}
|
|
25
|
+
# aggregate counters
|
|
26
|
+
totals = {"total_errors": 0, "fixed_to_baseline": 0, "fixed_to_valid": 0, "unchanged": 0, "worsened": 0}
|
|
27
|
+
|
|
28
|
+
def mark(cat: str, key: str):
|
|
29
|
+
per_cat.setdefault(cat, {"total": 0, "fixed_to_baseline": 0, "fixed_to_valid": 0, "unchanged": 0, "worsened": 0})
|
|
30
|
+
per_cat[cat][key] += 1
|
|
31
|
+
|
|
32
|
+
# Precompute some structural counts
|
|
33
|
+
def count_malformed_bullets(text: str) -> int:
|
|
34
|
+
return len(re.findall(r"^[-*]\S", text, flags=re.M))
|
|
35
|
+
|
|
36
|
+
def count_bad_image_spacing(text: str) -> int:
|
|
37
|
+
return len(re.findall(r"!\[[^\]]*\]\s+\(", text))
|
|
38
|
+
|
|
39
|
+
def table_variance(text: str) -> int:
|
|
40
|
+
rows = [ln for ln in text.splitlines() if '|' in ln]
|
|
41
|
+
groups: List[List[str]] = []
|
|
42
|
+
cur: List[str] = []
|
|
43
|
+
for ln in rows:
|
|
44
|
+
if '|' in ln:
|
|
45
|
+
cur.append(ln)
|
|
46
|
+
else:
|
|
47
|
+
if len(cur) >= 2:
|
|
48
|
+
groups.append(cur)
|
|
49
|
+
cur = []
|
|
50
|
+
if len(cur) >= 2:
|
|
51
|
+
groups.append(cur)
|
|
52
|
+
vari = 0
|
|
53
|
+
for g in groups:
|
|
54
|
+
counts = [ln.count('|') for ln in g]
|
|
55
|
+
vari += (max(counts) - min(counts))
|
|
56
|
+
return vari
|
|
57
|
+
|
|
58
|
+
malformed_bullets_before = count_malformed_bullets(corrupted)
|
|
59
|
+
malformed_bullets_after = count_malformed_bullets(revised)
|
|
60
|
+
bad_img_before = count_bad_image_spacing(corrupted)
|
|
61
|
+
bad_img_after = count_bad_image_spacing(revised)
|
|
62
|
+
table_var_before = table_variance(corrupted)
|
|
63
|
+
table_var_after = table_variance(revised)
|
|
64
|
+
|
|
65
|
+
canonical_titles = {
|
|
66
|
+
"## What is it?",
|
|
67
|
+
"## What can it do?",
|
|
68
|
+
"## Requirements",
|
|
69
|
+
"## Install",
|
|
70
|
+
"## Quick example",
|
|
71
|
+
"## Learn more",
|
|
72
|
+
"## License & Contact",
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
for e in injection_manifest.get("errors", []):
|
|
76
|
+
cat = e.get("category", "unknown")
|
|
77
|
+
per_cat.setdefault(cat, {"total": 0, "fixed_to_baseline": 0, "fixed_to_valid": 0, "unchanged": 0, "worsened": 0})
|
|
78
|
+
per_cat[cat]["total"] += 1
|
|
79
|
+
orig = e.get("original_snippet", "")
|
|
80
|
+
mut = e.get("mutated_snippet", "")
|
|
81
|
+
|
|
82
|
+
# Determine the neighborhood and after-fix snippet
|
|
83
|
+
after = None
|
|
84
|
+
if mut and mut in corrupted:
|
|
85
|
+
# try to find replacement around mutated snippet in revised
|
|
86
|
+
idx = corrupted.find(mut)
|
|
87
|
+
window = corrupted[max(0, idx-200): idx+200]
|
|
88
|
+
# pick a few words from orig as hint
|
|
89
|
+
hint = orig[:50]
|
|
90
|
+
if hint and hint in revised:
|
|
91
|
+
after = hint
|
|
92
|
+
if after is None:
|
|
93
|
+
# fallback: search original snippet directly
|
|
94
|
+
after = orig if orig in revised else None
|
|
95
|
+
|
|
96
|
+
status = "unchanged"
|
|
97
|
+
notes = ""
|
|
98
|
+
if cat == "typo":
|
|
99
|
+
if orig and orig in revised:
|
|
100
|
+
status = "fixed_to_baseline"
|
|
101
|
+
elif mut and mut in revised:
|
|
102
|
+
status = "unchanged"
|
|
103
|
+
else:
|
|
104
|
+
status = "fixed_to_valid"
|
|
105
|
+
elif cat == "link":
|
|
106
|
+
# simple: link markdown well-formed
|
|
107
|
+
wellformed = re.search(r"\[[^\]]+\]\([^\s)]+\)", revised) is not None
|
|
108
|
+
status = "fixed_to_valid" if wellformed else "unchanged"
|
|
109
|
+
elif cat == "duplicate":
|
|
110
|
+
dup_before = corrupted.count(mut)
|
|
111
|
+
dup_after = revised.count(mut)
|
|
112
|
+
status = "fixed_to_valid" if dup_after < dup_before else "unchanged"
|
|
113
|
+
elif cat == "markdown_structure":
|
|
114
|
+
issues_before = _count_markdown_issues(corrupted)
|
|
115
|
+
issues_after = _count_markdown_issues(revised)
|
|
116
|
+
status = "fixed_to_valid" if issues_after < issues_before else "unchanged"
|
|
117
|
+
elif cat in ("bio_term", "function"):
|
|
118
|
+
if orig and orig in revised:
|
|
119
|
+
status = "fixed_to_baseline"
|
|
120
|
+
elif mut and mut in revised:
|
|
121
|
+
status = "unchanged"
|
|
122
|
+
else:
|
|
123
|
+
status = "fixed_to_valid"
|
|
124
|
+
elif cat == "list_structure":
|
|
125
|
+
status = "fixed_to_valid" if malformed_bullets_after < malformed_bullets_before else "unchanged"
|
|
126
|
+
elif cat == "image_syntax":
|
|
127
|
+
status = "fixed_to_valid" if bad_img_after < bad_img_before else "unchanged"
|
|
128
|
+
elif cat == "section_title":
|
|
129
|
+
# valid if mutated title removed and any canonical title present
|
|
130
|
+
if mut and mut not in revised and any(t in revised for t in canonical_titles):
|
|
131
|
+
status = "fixed_to_valid"
|
|
132
|
+
else:
|
|
133
|
+
status = "unchanged"
|
|
134
|
+
elif cat == "inline_code":
|
|
135
|
+
# check that the raw content regained backticks somewhere
|
|
136
|
+
raw = mut.strip('`') if mut else ""
|
|
137
|
+
rewrapped = f"`{raw}`" if raw else ""
|
|
138
|
+
if raw and rewrapped and rewrapped in revised and mut not in revised:
|
|
139
|
+
status = "fixed_to_valid"
|
|
140
|
+
else:
|
|
141
|
+
status = "unchanged"
|
|
142
|
+
elif cat == "emphasis":
|
|
143
|
+
status = "fixed_to_valid" if mut and mut not in revised else "unchanged"
|
|
144
|
+
elif cat == "table_alignment":
|
|
145
|
+
status = "fixed_to_valid" if table_var_after < table_var_before else "unchanged"
|
|
146
|
+
elif cat == "code_lang_tag":
|
|
147
|
+
status = "fixed_to_valid" if mut and mut not in revised else "unchanged"
|
|
148
|
+
# Biology-specific and CLI/CONFIG categories: treat as fixed if mutated snippet removed
|
|
149
|
+
elif cat in {
|
|
150
|
+
"gene_symbol_case","species_swap","ref_genome_mismatch","modality_confusion","normalization_error",
|
|
151
|
+
"umi_vs_read","batch_effect","qc_threshold","file_format","strandedness","coordinates","units_scale",
|
|
152
|
+
"sample_type","contamination","param_name","default_value","path_hint"
|
|
153
|
+
}:
|
|
154
|
+
status = "fixed_to_valid" if mut and mut not in revised else "unchanged"
|
|
155
|
+
else:
|
|
156
|
+
status = "unchanged"
|
|
157
|
+
|
|
158
|
+
mark(cat, status)
|
|
159
|
+
totals["total_errors"] += 1
|
|
160
|
+
totals[status] += 1
|
|
161
|
+
per_error.append({
|
|
162
|
+
"id": e.get("id"),
|
|
163
|
+
"category": cat,
|
|
164
|
+
"status": status,
|
|
165
|
+
"before": mut,
|
|
166
|
+
"after_contains_original": bool(orig and orig in revised),
|
|
167
|
+
"notes": notes,
|
|
168
|
+
})
|
|
169
|
+
|
|
170
|
+
# global metrics
|
|
171
|
+
issues_before = _count_markdown_issues(corrupted)
|
|
172
|
+
issues_after = _count_markdown_issues(revised)
|
|
173
|
+
global_metrics = {
|
|
174
|
+
"markdown_validity_delta": issues_before - issues_after,
|
|
175
|
+
}
|
|
176
|
+
success = totals["fixed_to_baseline"] + totals["fixed_to_valid"]
|
|
177
|
+
success_rate = (success / totals["total_errors"] * 100.0) if totals["total_errors"] else 0.0
|
|
178
|
+
summary = {
|
|
179
|
+
"totals": totals,
|
|
180
|
+
"success_rate": round(success_rate, 2),
|
|
181
|
+
}
|
|
182
|
+
return {
|
|
183
|
+
"per_error": per_error,
|
|
184
|
+
"per_category": per_cat,
|
|
185
|
+
"global": global_metrics,
|
|
186
|
+
"summary": summary,
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
|