bioguider 0.2.28__tar.gz → 0.2.30__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of bioguider might be problematic. Click here for more details.

Files changed (84) hide show
  1. {bioguider-0.2.28 → bioguider-0.2.30}/PKG-INFO +1 -1
  2. bioguider-0.2.30/bioguider/generation/change_planner.py +302 -0
  3. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/generation/document_renderer.py +5 -0
  4. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/generation/llm_cleaner.py +16 -8
  5. bioguider-0.2.30/bioguider/generation/llm_content_generator.py +123 -0
  6. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/generation/models.py +4 -0
  7. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/generation/output_manager.py +3 -3
  8. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/generation/report_loader.py +6 -0
  9. bioguider-0.2.30/bioguider/generation/suggestion_extractor.py +460 -0
  10. bioguider-0.2.30/bioguider/managers/generation_manager.py +511 -0
  11. {bioguider-0.2.28 → bioguider-0.2.30}/pyproject.toml +1 -1
  12. bioguider-0.2.28/bioguider/generation/change_planner.py +0 -140
  13. bioguider-0.2.28/bioguider/generation/llm_content_generator.py +0 -72
  14. bioguider-0.2.28/bioguider/generation/suggestion_extractor.py +0 -136
  15. bioguider-0.2.28/bioguider/managers/generation_manager.py +0 -206
  16. {bioguider-0.2.28 → bioguider-0.2.30}/LICENSE +0 -0
  17. {bioguider-0.2.28 → bioguider-0.2.30}/README.md +0 -0
  18. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/__init__.py +0 -0
  19. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/agents/__init__.py +0 -0
  20. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/agents/agent_task.py +0 -0
  21. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/agents/agent_tools.py +0 -0
  22. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/agents/agent_utils.py +0 -0
  23. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/agents/collection_execute_step.py +0 -0
  24. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/agents/collection_observe_step.py +0 -0
  25. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/agents/collection_plan_step.py +0 -0
  26. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/agents/collection_task.py +0 -0
  27. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/agents/collection_task_utils.py +0 -0
  28. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/agents/common_agent.py +0 -0
  29. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/agents/common_agent_2step.py +0 -0
  30. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/agents/common_conversation.py +0 -0
  31. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/agents/common_step.py +0 -0
  32. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/agents/consistency_collection_step.py +0 -0
  33. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/agents/consistency_evaluation_task.py +0 -0
  34. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/agents/consistency_evaluation_task_utils.py +0 -0
  35. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/agents/consistency_observe_step.py +0 -0
  36. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/agents/consistency_query_step.py +0 -0
  37. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/agents/dockergeneration_execute_step.py +0 -0
  38. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/agents/dockergeneration_observe_step.py +0 -0
  39. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/agents/dockergeneration_plan_step.py +0 -0
  40. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/agents/dockergeneration_task.py +0 -0
  41. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/agents/dockergeneration_task_utils.py +0 -0
  42. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/agents/evaluation_installation_task.py +0 -0
  43. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/agents/evaluation_readme_task.py +0 -0
  44. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/agents/evaluation_submission_requirements_task.py +0 -0
  45. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/agents/evaluation_task.py +0 -0
  46. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/agents/evaluation_tutorial_task.py +0 -0
  47. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/agents/evaluation_tutorial_task_prompts.py +0 -0
  48. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/agents/evaluation_userguide_prompts.py +0 -0
  49. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/agents/evaluation_userguide_task.py +0 -0
  50. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/agents/identification_execute_step.py +0 -0
  51. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/agents/identification_observe_step.py +0 -0
  52. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/agents/identification_plan_step.py +0 -0
  53. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/agents/identification_task.py +0 -0
  54. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/agents/identification_task_utils.py +0 -0
  55. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/agents/peo_common_step.py +0 -0
  56. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/agents/prompt_utils.py +0 -0
  57. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/agents/python_ast_repl_tool.py +0 -0
  58. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/agents/rag_collection_task.py +0 -0
  59. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/conversation.py +0 -0
  60. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/database/code_structure_db.py +0 -0
  61. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/database/summarized_file_db.py +0 -0
  62. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/generation/__init__.py +0 -0
  63. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/generation/llm_injector.py +0 -0
  64. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/generation/repo_reader.py +0 -0
  65. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/generation/style_analyzer.py +0 -0
  66. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/generation/test_metrics.py +0 -0
  67. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/managers/evaluation_manager.py +0 -0
  68. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/managers/generation_test_manager.py +0 -0
  69. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/rag/__init__.py +0 -0
  70. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/rag/config.py +0 -0
  71. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/rag/data_pipeline.py +0 -0
  72. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/rag/embedder.py +0 -0
  73. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/rag/rag.py +0 -0
  74. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/settings.py +0 -0
  75. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/utils/code_structure_builder.py +0 -0
  76. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/utils/constants.py +0 -0
  77. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/utils/default.gitignore +0 -0
  78. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/utils/file_utils.py +0 -0
  79. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/utils/gitignore_checker.py +0 -0
  80. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/utils/notebook_utils.py +0 -0
  81. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/utils/pyphen_utils.py +0 -0
  82. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/utils/python_file_handler.py +0 -0
  83. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/utils/r_file_handler.py +0 -0
  84. {bioguider-0.2.28 → bioguider-0.2.30}/bioguider/utils/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: bioguider
3
- Version: 0.2.28
3
+ Version: 0.2.30
4
4
  Summary: An AI-Powered package to help biomedical developers to generate clear documentation
5
5
  License: MIT
6
6
  Author: Cankun Wang
@@ -0,0 +1,302 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import List, Dict
4
+
5
+ from .models import SuggestionItem, StyleProfile, DocumentPlan, PlannedEdit
6
+
7
+
8
+ class ChangePlanner:
9
+ def build_plan(
10
+ self,
11
+ repo_path: str,
12
+ style: StyleProfile,
13
+ suggestions: List[SuggestionItem],
14
+ available_files: Dict[str, str],
15
+ ) -> DocumentPlan:
16
+ planned: List[PlannedEdit] = []
17
+ seen_headers: set[tuple[str, str]] = set()
18
+
19
+ def section_header(title: str) -> str:
20
+ # use heading level 2 for inserts to be safe
21
+ h = style.heading_style or "#"
22
+ return f"{h*2} {title}\n\n"
23
+
24
+ for s in suggestions:
25
+ for target in s.target_files:
26
+ if target not in available_files:
27
+ # allow planning; renderer will skip if missing
28
+ pass
29
+
30
+ if s.action == "add_dependencies_section":
31
+ # Use LLM generation instead of template
32
+ header_key = (target, (s.anchor_hint or "Dependencies").strip().lower())
33
+ if header_key in seen_headers:
34
+ continue
35
+ planned.append(PlannedEdit(
36
+ file_path=target,
37
+ edit_type="append_section",
38
+ anchor={"type": "header", "value": s.anchor_hint or "Dependencies"},
39
+ content_template="", # Will be generated by LLM
40
+ rationale=s.source.get("evidence", ""),
41
+ suggestion_id=s.id,
42
+ ))
43
+ seen_headers.add(header_key)
44
+ elif s.action == "add_system_requirements_section":
45
+ # Use LLM generation instead of template
46
+ header_key = (target, (s.anchor_hint or "System Requirements").strip().lower())
47
+ if header_key in seen_headers:
48
+ continue
49
+ planned.append(PlannedEdit(
50
+ file_path=target,
51
+ edit_type="append_section",
52
+ anchor={"type": "header", "value": s.anchor_hint or "System Requirements"},
53
+ content_template="", # Will be generated by LLM
54
+ rationale=s.source.get("evidence", ""),
55
+ suggestion_id=s.id,
56
+ ))
57
+ seen_headers.add(header_key)
58
+ elif s.action == "mention_license_section":
59
+ content = section_header("License") + "This project is released under the MIT License. See LICENSE for details.\n"
60
+ header_key = (target, (s.anchor_hint or "License").strip().lower())
61
+ if header_key in seen_headers:
62
+ continue
63
+ planned.append(PlannedEdit(
64
+ file_path=target,
65
+ edit_type="append_section",
66
+ anchor={"type": "header", "value": s.anchor_hint or "License"},
67
+ content_template=content,
68
+ rationale=s.source.get("evidence", ""),
69
+ suggestion_id=s.id,
70
+ ))
71
+ seen_headers.add(header_key)
72
+ elif s.action == "normalize_headings_structure":
73
+ # Minimal placeholder: avoid heavy rewrites
74
+ # Plan a no-op or a small note; actual normalization could be added later
75
+ continue
76
+ elif s.action == "add_usage_section":
77
+ content = section_header("Usage") + "- Brief example of typical workflow.\n"
78
+ header_key = (target, "usage")
79
+ if header_key in seen_headers:
80
+ continue
81
+ planned.append(PlannedEdit(
82
+ file_path=target,
83
+ edit_type="append_section",
84
+ anchor={"type": "header", "value": "Usage"},
85
+ content_template=content,
86
+ rationale=s.source.get("evidence", ""),
87
+ suggestion_id=s.id,
88
+ ))
89
+ seen_headers.add(header_key)
90
+ elif s.action == "replace_intro":
91
+ # Replace intro block (between H1 and first H2) with a clean Overview section
92
+ # Use empty content_template so LLM can generate content based on guidance
93
+ header_key = (target, "overview")
94
+ if header_key in seen_headers:
95
+ continue
96
+ planned.append(PlannedEdit(
97
+ file_path=target,
98
+ edit_type="replace_intro_block",
99
+ anchor={"type": "header", "value": "Overview"},
100
+ content_template="", # Will be filled by LLM generation
101
+ rationale=s.source.get("evidence", ""),
102
+ suggestion_id=s.id,
103
+ ))
104
+ seen_headers.add(header_key)
105
+ elif s.action == "clarify_mandatory_vs_optional":
106
+ # Use specific guidance from evaluation report instead of generic template
107
+ guidance = s.content_guidance or "Specify compatibility details for dependencies across operating systems and architectures."
108
+ content = section_header("Dependencies") + f"- {guidance}\n"
109
+ header_key = (target, "dependencies")
110
+ if header_key in seen_headers:
111
+ continue
112
+ planned.append(PlannedEdit(
113
+ file_path=target,
114
+ edit_type="append_section",
115
+ anchor={"type": "header", "value": "Dependencies"},
116
+ content_template=content,
117
+ rationale=s.source.get("evidence", ""),
118
+ suggestion_id=s.id,
119
+ ))
120
+ seen_headers.add(header_key)
121
+ elif s.action == "add_hardware_requirements":
122
+ # Use LLM generation instead of template
123
+ header_key = (target, (s.anchor_hint or "Hardware Requirements").strip().lower())
124
+ if header_key in seen_headers:
125
+ continue
126
+ planned.append(PlannedEdit(
127
+ file_path=target,
128
+ edit_type="append_section",
129
+ anchor={"type": "header", "value": s.anchor_hint or "Hardware Requirements"},
130
+ content_template="", # Will be generated by LLM
131
+ rationale=s.source.get("evidence", ""),
132
+ suggestion_id=s.id,
133
+ ))
134
+ seen_headers.add(header_key)
135
+ elif s.action == "improve_clarity_and_error_handling":
136
+ # Handle targeted improvements to user guides
137
+ planned.append(PlannedEdit(
138
+ file_path=target,
139
+ edit_type="append_section",
140
+ anchor={"type": "header", "value": s.anchor_hint or "Introduction"},
141
+ content_template="", # Will be filled by LLM generation
142
+ rationale=s.source.get("evidence", ""),
143
+ suggestion_id=s.id,
144
+ ))
145
+ elif s.action == "improve_consistency":
146
+ # Handle consistency improvements
147
+ planned.append(PlannedEdit(
148
+ file_path=target,
149
+ edit_type="append_section",
150
+ anchor={"type": "header", "value": s.anchor_hint or "Examples"},
151
+ content_template="", # Will be filled by LLM generation
152
+ rationale=s.source.get("evidence", ""),
153
+ suggestion_id=s.id,
154
+ ))
155
+ elif s.action == "improve_tutorial_quality":
156
+ # Handle tutorial quality improvements
157
+ planned.append(PlannedEdit(
158
+ file_path=target,
159
+ edit_type="append_section",
160
+ anchor={"type": "header", "value": s.anchor_hint or "Setup"},
161
+ content_template="", # Will be filled by LLM generation
162
+ rationale=s.source.get("evidence", ""),
163
+ suggestion_id=s.id,
164
+ ))
165
+ elif s.action == "improve_readability":
166
+ # Handle readability improvements
167
+ header_key = (target, (s.anchor_hint or "Introduction").strip().lower())
168
+ if header_key in seen_headers:
169
+ continue
170
+ planned.append(PlannedEdit(
171
+ file_path=target,
172
+ edit_type="append_section",
173
+ anchor={"type": "header", "value": s.anchor_hint or "Introduction"},
174
+ content_template="", # Will be filled by LLM generation
175
+ rationale=s.source.get("evidence", ""),
176
+ suggestion_id=s.id,
177
+ ))
178
+ seen_headers.add(header_key)
179
+ elif s.action == "improve_setup":
180
+ # Handle setup improvements
181
+ planned.append(PlannedEdit(
182
+ file_path=target,
183
+ edit_type="append_section",
184
+ anchor={"type": "header", "value": s.anchor_hint or "Setup"},
185
+ content_template="", # Will be filled by LLM generation
186
+ rationale=s.source.get("evidence", ""),
187
+ suggestion_id=s.id,
188
+ ))
189
+ elif s.action == "improve_reproducibility":
190
+ # Handle reproducibility improvements
191
+ planned.append(PlannedEdit(
192
+ file_path=target,
193
+ edit_type="append_section",
194
+ anchor={"type": "header", "value": s.anchor_hint or "Setup"},
195
+ content_template="", # Will be filled by LLM generation
196
+ rationale=s.source.get("evidence", ""),
197
+ suggestion_id=s.id,
198
+ ))
199
+ elif s.action == "improve_structure":
200
+ # Handle structure improvements
201
+ planned.append(PlannedEdit(
202
+ file_path=target,
203
+ edit_type="append_section",
204
+ anchor={"type": "header", "value": s.anchor_hint or "Introduction"},
205
+ content_template="", # Will be filled by LLM generation
206
+ rationale=s.source.get("evidence", ""),
207
+ suggestion_id=s.id,
208
+ ))
209
+ elif s.action == "improve_code_quality":
210
+ # Handle code quality improvements
211
+ planned.append(PlannedEdit(
212
+ file_path=target,
213
+ edit_type="append_section",
214
+ anchor={"type": "header", "value": s.anchor_hint or "Code Examples"},
215
+ content_template="", # Will be filled by LLM generation
216
+ rationale=s.source.get("evidence", ""),
217
+ suggestion_id=s.id,
218
+ ))
219
+ elif s.action == "improve_verification":
220
+ # Handle verification improvements
221
+ planned.append(PlannedEdit(
222
+ file_path=target,
223
+ edit_type="append_section",
224
+ anchor={"type": "header", "value": s.anchor_hint or "Results"},
225
+ content_template="", # Will be filled by LLM generation
226
+ rationale=s.source.get("evidence", ""),
227
+ suggestion_id=s.id,
228
+ ))
229
+ elif s.action == "improve_performance":
230
+ # Handle performance improvements
231
+ planned.append(PlannedEdit(
232
+ file_path=target,
233
+ edit_type="append_section",
234
+ anchor={"type": "header", "value": s.anchor_hint or "Performance"},
235
+ content_template="", # Will be filled by LLM generation
236
+ rationale=s.source.get("evidence", ""),
237
+ suggestion_id=s.id,
238
+ ))
239
+ elif s.action == "improve_context":
240
+ # Handle context improvements for userguides
241
+ header_key = (target, (s.anchor_hint or "Introduction").strip().lower())
242
+ if header_key in seen_headers:
243
+ continue
244
+ planned.append(PlannedEdit(
245
+ file_path=target,
246
+ edit_type="append_section",
247
+ anchor={"type": "header", "value": s.anchor_hint or "Introduction"},
248
+ content_template="", # Will be filled by LLM generation
249
+ rationale=s.source.get("evidence", ""),
250
+ suggestion_id=s.id,
251
+ ))
252
+ seen_headers.add(header_key)
253
+ elif s.action == "improve_error_handling":
254
+ # Handle error handling improvements for userguides
255
+ header_key = (target, (s.anchor_hint or "Examples").strip().lower())
256
+ if header_key in seen_headers:
257
+ continue
258
+ planned.append(PlannedEdit(
259
+ file_path=target,
260
+ edit_type="append_section",
261
+ anchor={"type": "header", "value": s.anchor_hint or "Examples"},
262
+ content_template="", # Will be filled by LLM generation
263
+ rationale=s.source.get("evidence", ""),
264
+ suggestion_id=s.id,
265
+ ))
266
+ seen_headers.add(header_key)
267
+ elif s.action == "add_overview_section":
268
+ # Handle overview section for README
269
+ planned.append(PlannedEdit(
270
+ file_path=target,
271
+ edit_type="append_section",
272
+ anchor={"type": "header", "value": s.anchor_hint or "Overview"},
273
+ content_template="", # Will be filled by LLM generation
274
+ rationale=s.source.get("evidence", ""),
275
+ suggestion_id=s.id,
276
+ ))
277
+ elif s.action == "full_replace":
278
+ # Handle full document replacement
279
+ planned.append(PlannedEdit(
280
+ file_path=target,
281
+ edit_type="full_replace",
282
+ anchor={"type": "document", "value": "full_document"},
283
+ content_template="", # Will be filled by LLM generation
284
+ rationale=s.source.get("evidence", ""),
285
+ suggestion_id=s.id,
286
+ ))
287
+
288
+ # If a file is planned for full_replace, suppress other edits for that file to avoid redundancy
289
+ by_file: Dict[str, List[PlannedEdit]] = {}
290
+ for e in planned:
291
+ by_file.setdefault(e.file_path, []).append(e)
292
+ filtered: List[PlannedEdit] = []
293
+ for fpath, edits in by_file.items():
294
+ has_full = any(e.edit_type == "full_replace" for e in edits)
295
+ if has_full:
296
+ filtered.extend([e for e in edits if e.edit_type == "full_replace"])
297
+ else:
298
+ filtered.extend(edits)
299
+
300
+ return DocumentPlan(repo_path=repo_path, style_profile=style, planned_edits=filtered)
301
+
302
+
@@ -40,6 +40,11 @@ class DocumentRenderer:
40
40
  added = len(edit.content_template.splitlines())
41
41
  content = new_content
42
42
 
43
+ elif edit.edit_type == "full_replace":
44
+ # Replace entire document content
45
+ content = edit.content_template
46
+ added = len(edit.content_template.splitlines())
47
+
43
48
  # Other edit types (insert_after_header, replace_block) can be added as needed
44
49
 
45
50
  return content, {"added_lines": added}
@@ -6,10 +6,10 @@ from bioguider.agents.common_conversation import CommonConversation
6
6
 
7
7
 
8
8
  CLEANUP_PROMPT = """
9
- You are BioGuider,” a precise editor for biomedical/bioinformatics documentation.
9
+ You are "BioGuider," a precise editor for biomedical/bioinformatics documentation.
10
10
 
11
11
  TASK
12
- Given a full README markdown, produce a corrected version that:
12
+ Given a documentation file (README, RMarkdown, or other), produce a corrected version that:
13
13
  - Fixes typos, grammar, capitalization, and spacing
14
14
  - Corrects malformed markdown (headers, lists, links, code fences)
15
15
  - Repairs or normalizes link formatting; keep URLs absolute if present
@@ -17,14 +17,22 @@ Given a full README markdown, produce a corrected version that:
17
17
  - Preserves technical accuracy and biomedical domain terminology (do not invent features)
18
18
  - Keeps tone neutral and professional; avoid marketing language
19
19
  - Preserves all valid information; do not delete content unless it is a duplicate or malformed
20
+ - For RMarkdown files (.Rmd): Preserve YAML frontmatter, R code chunks, and existing structure exactly
21
+
22
+ CRITICAL REQUIREMENTS:
23
+ - Do NOT wrap the entire document in markdown code fences (```markdown). Return pure content only.
24
+ - If the document starts with ```markdown and ends with ```, remove these fences completely.
25
+ - Do NOT modify YAML frontmatter in RMarkdown files
26
+ - Do NOT modify R code chunks (```{r} blocks) in RMarkdown files
27
+ - Do NOT change the overall structure or organization of the document
20
28
 
21
29
  INPUT
22
- <<README>>
23
- {readme}
24
- <</README>>
30
+ <<DOCUMENT>>
31
+ {doc}
32
+ <</DOCUMENT>>
25
33
 
26
34
  OUTPUT
27
- Return ONLY the revised markdown (no commentary, no explanations).
35
+ Return ONLY the revised content (no commentary, no explanations, no code fences).
28
36
  """
29
37
 
30
38
 
@@ -35,8 +43,8 @@ class LLMCleaner:
35
43
  def clean_readme(self, content: str) -> tuple[str, dict]:
36
44
  conv = CommonConversation(self.llm)
37
45
  output, token_usage = conv.generate(
38
- system_prompt=CLEANUP_PROMPT.format(readme=content[:30000]),
39
- instruction_prompt="Provide the corrected README markdown only.",
46
+ system_prompt=CLEANUP_PROMPT.format(doc=content[:30000]),
47
+ instruction_prompt="Provide the corrected documentation content only.",
40
48
  )
41
49
  return output.strip(), token_usage
42
50
 
@@ -0,0 +1,123 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Dict
4
+ import json
5
+ from langchain_openai.chat_models.base import BaseChatOpenAI
6
+
7
+ from bioguider.agents.common_conversation import CommonConversation
8
+ from .models import StyleProfile, SuggestionItem
9
+
10
+
11
+ LLM_SECTION_PROMPT = """
12
+ You are "BioGuider," a precise documentation generator for biomedical/bioinformatics software.
13
+
14
+ GOAL
15
+ Write or refine a single documentation section named "{section}". Follow the specific guidance from the evaluation report exactly.
16
+
17
+ INPUTS (use only what is provided; never invent)
18
+ - suggestion_category: {suggestion_category}
19
+ - anchor_title: {anchor_title}
20
+ - guidance: {guidance}
21
+ - evidence_from_evaluation: {evidence}
22
+ - repo_context_excerpt (analyze tone/formatting; do not paraphrase it blindly): <<{context}>>
23
+
24
+ CRITICAL REQUIREMENTS
25
+ - Follow the guidance EXACTLY as provided: {guidance}
26
+ - Address the specific suggestions from the evaluation report precisely
27
+ - Do not deviate from the guidance or add unrelated content
28
+ - If guidance mentions specific packages, requirements, or details, include them exactly
29
+ - For RMarkdown files (.Rmd), preserve the original structure including YAML frontmatter, code chunks, and existing headers
30
+ - NEVER generate generic placeholder content like "Clear 2–3 sentence summary" or "brief description"
31
+ - ALWAYS use the specific guidance provided above to create concrete, actionable content
32
+
33
+ STYLE & CONSTRAINTS
34
+ - Fix obvious errors in the content.
35
+ - Preserve the existing tone and style markers: {tone_markers}
36
+ - Use heading style "{heading_style}" and list style "{list_style}"; link style "{link_style}".
37
+ - Neutral, professional tone; avoid marketing claims.
38
+ - Omit details you cannot substantiate from inputs/context; do not invent.
39
+ - Prefer bullets; keep it short and skimmable.
40
+ - Biomedical examples must avoid PHI; assume de-identified data.
41
+ - Output must be plain markdown for this section only, with no commentary and no backticks.
42
+ - Avoid duplication: if similar content exists in the repo context, rewrite succinctly instead of repeating.
43
+ - Never remove, alter, or recreate top-of-file badges/shields/logos (e.g., CI, PyPI, Conda, Docs shields). Assume they remain unchanged; do not output replacements for them.
44
+ - When targeting README content, do not rewrite the document title or header area; generate only the requested section body to be inserted below existing headers/badges.
45
+
46
+ SECTION GUIDELINES (follow guidance exactly)
47
+ - Dependencies: Include specific packages mentioned in guidance (e.g., "ggplot2", "dplyr", etc.)
48
+ - System Requirements: Include R version requirements and platform-specific instructions as mentioned in guidance
49
+ - Hardware Requirements: Include RAM/CPU recommendations as specified in guidance
50
+ - License: one sentence referencing the license and pointing to the LICENSE file.
51
+ - Install (clarify dependencies): Include compatibility details across operating systems and architectures as mentioned in guidance
52
+ - Tutorial improvements: Add specific examples, error handling, and reproducibility notes as mentioned in guidance
53
+ - User guide improvements: Enhance clarity, add missing information, and improve error handling as mentioned in guidance
54
+ - If the section does not fit the above, produce content that directly addresses the guidance provided.
55
+
56
+ OUTPUT FORMAT
57
+ - Return only the section markdown (no code fences).
58
+ - Start with a level-2 header: "## {anchor_title}" unless the content already starts with a header.
59
+ - Ensure the content directly addresses: {guidance}
60
+ - DO NOT include generic instructions or placeholder text
61
+ - ONLY generate content that fulfills the specific guidance provided
62
+ """
63
+
64
+ LLM_FULLDOC_PROMPT = """
65
+ You are “BioGuider,” a documentation rewriter.
66
+
67
+ GOAL
68
+ Rewrite a complete target document using only the provided evaluation report signals and the repository context excerpts. Output a full, ready-to-publish markdown file that is more complete and directly usable.
69
+
70
+ INPUTS (authoritative)
71
+ - evaluation_report (structured JSON excerpts): <<{evaluation_report}>>
72
+ - target_file: {target_file}
73
+ - repo_context_excerpt (do not copy blindly; use only to keep style/tone): <<{context}>>
74
+
75
+ STRICT CONSTRAINTS
76
+ - Base the content solely on the evaluation report. Do not invent features, data, or claims not supported by it.
77
+ - Prefer completeness and usability: produce the full file content, not just minimal "added" snippets.
78
+ - Preserve top-of-file badges/logos if they exist in the original; keep title and header area intact unless the report requires changes.
79
+ - Fix obvious errors; improve structure and readability per report suggestions.
80
+ - Include ONLY sections specifically requested by the evaluation report - do not add unnecessary sections.
81
+ - Avoid redundancy: do not duplicate information across multiple sections.
82
+ - Keep links well-formed; keep neutral, professional tone; concise, skimmable formatting.
83
+ - CRITICAL: Do NOT wrap the entire document inside markdown code fences (```markdown). Do NOT start with ```markdown or end with ```. Return pure markdown content suitable for copy/paste.
84
+ - For RMarkdown files (.Rmd), preserve YAML frontmatter exactly and do not wrap content in code fences.
85
+
86
+ OUTPUT
87
+ - Return only the full markdown content for {target_file}. No commentary, no fences.
88
+ """
89
+
90
+
91
+ class LLMContentGenerator:
92
+ def __init__(self, llm: BaseChatOpenAI):
93
+ self.llm = llm
94
+
95
+ def generate_section(self, suggestion: SuggestionItem, style: StyleProfile, context: str = "") -> tuple[str, dict]:
96
+ conv = CommonConversation(self.llm)
97
+ section_name = suggestion.anchor_hint or suggestion.category.split(".")[-1].replace("_", " ").title()
98
+ system_prompt = LLM_SECTION_PROMPT.format(
99
+ tone_markers=", ".join(style.tone_markers or []),
100
+ heading_style=style.heading_style,
101
+ list_style=style.list_style,
102
+ link_style=style.link_style,
103
+ section=section_name,
104
+ anchor_title=section_name,
105
+ suggestion_category=suggestion.category,
106
+ evidence=(suggestion.source.get("evidence", "") if suggestion.source else ""),
107
+ context=context[:2500],
108
+ guidance=(suggestion.content_guidance or "").strip(),
109
+ )
110
+ content, token_usage = conv.generate(system_prompt=system_prompt, instruction_prompt="Write the section content now.")
111
+ return content.strip(), token_usage
112
+
113
+ def generate_full_document(self, target_file: str, evaluation_report: dict, context: str = "") -> tuple[str, dict]:
114
+ conv = CommonConversation(self.llm)
115
+ system_prompt = LLM_FULLDOC_PROMPT.format(
116
+ target_file=target_file,
117
+ evaluation_report=json.dumps(evaluation_report)[:6000],
118
+ context=context[:4000],
119
+ )
120
+ content, token_usage = conv.generate(system_prompt=system_prompt, instruction_prompt="Write the full document now.")
121
+ return content.strip(), token_usage
122
+
123
+
@@ -18,6 +18,10 @@ class EvaluationReport(BaseModel):
18
18
  userguide_evaluation: Optional[Dict[str, Any]] = None
19
19
  userguide_files: Optional[List[str]] = None
20
20
 
21
+ # Optional: tutorial evaluation content and any explicitly listed files
22
+ tutorial_evaluation: Optional[Dict[str, Any]] = None
23
+ tutorial_files: Optional[List[str]] = None
24
+
21
25
  submission_requirements_evaluation: Optional[Dict[str, Any]] = None
22
26
  submission_requirements_files: Optional[List[str]] = None
23
27
 
@@ -3,14 +3,14 @@ from __future__ import annotations
3
3
  import os
4
4
  import json
5
5
  from datetime import datetime
6
- from typing import Dict, List, Tuple
6
+ from typing import Dict, List, Optional, Tuple
7
7
 
8
8
  from .models import OutputArtifact, GenerationManifest, PlannedEdit
9
9
 
10
10
 
11
11
  class OutputManager:
12
- def __init__(self, base_outputs_dir: str = "outputs"):
13
- self.base_outputs_dir = base_outputs_dir
12
+ def __init__(self, base_outputs_dir: Optional[str] = None):
13
+ self.base_outputs_dir = base_outputs_dir or "outputs"
14
14
 
15
15
  def prepare_output_dir(self, repo_url_or_name: str) -> str:
16
16
  repo_name = self._extract_repo_name(repo_url_or_name)
@@ -150,6 +150,12 @@ class EvaluationReportLoader:
150
150
  normalized["userguide_evaluation"] = userguide_eval["evaluation"]
151
151
  normalized["userguide_files"] = userguide_eval["files"]
152
152
 
153
+ # Tutorial evaluation handling
154
+ tutorial_eval = normalized.get("tutorial")
155
+ if tutorial_eval and isinstance(tutorial_eval.get("evaluation"), dict):
156
+ normalized["tutorial_evaluation"] = tutorial_eval["evaluation"]
157
+ normalized["tutorial_files"] = tutorial_eval["files"]
158
+
153
159
  # userguide_eval = normalized.get("userguide")
154
160
  # if isinstance(userguide_eval, str):
155
161
  # normalized["userguide_evaluation"] = self._parse_structured_block(userguide_eval["evaluation"], "structured_evaluation")