bioguider 0.2.28__py3-none-any.whl → 0.2.30__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of bioguider might be problematic. Click here for more details.
- bioguider/generation/change_planner.py +176 -14
- bioguider/generation/document_renderer.py +5 -0
- bioguider/generation/llm_cleaner.py +16 -8
- bioguider/generation/llm_content_generator.py +59 -8
- bioguider/generation/models.py +4 -0
- bioguider/generation/output_manager.py +3 -3
- bioguider/generation/report_loader.py +6 -0
- bioguider/generation/suggestion_extractor.py +345 -21
- bioguider/managers/generation_manager.py +356 -51
- {bioguider-0.2.28.dist-info → bioguider-0.2.30.dist-info}/METADATA +1 -1
- {bioguider-0.2.28.dist-info → bioguider-0.2.30.dist-info}/RECORD +13 -13
- {bioguider-0.2.28.dist-info → bioguider-0.2.30.dist-info}/LICENSE +0 -0
- {bioguider-0.2.28.dist-info → bioguider-0.2.30.dist-info}/WHEEL +0 -0
|
@@ -28,7 +28,7 @@ class ChangePlanner:
|
|
|
28
28
|
pass
|
|
29
29
|
|
|
30
30
|
if s.action == "add_dependencies_section":
|
|
31
|
-
|
|
31
|
+
# Use LLM generation instead of template
|
|
32
32
|
header_key = (target, (s.anchor_hint or "Dependencies").strip().lower())
|
|
33
33
|
if header_key in seen_headers:
|
|
34
34
|
continue
|
|
@@ -36,13 +36,13 @@ class ChangePlanner:
|
|
|
36
36
|
file_path=target,
|
|
37
37
|
edit_type="append_section",
|
|
38
38
|
anchor={"type": "header", "value": s.anchor_hint or "Dependencies"},
|
|
39
|
-
content_template=
|
|
39
|
+
content_template="", # Will be generated by LLM
|
|
40
40
|
rationale=s.source.get("evidence", ""),
|
|
41
41
|
suggestion_id=s.id,
|
|
42
42
|
))
|
|
43
43
|
seen_headers.add(header_key)
|
|
44
44
|
elif s.action == "add_system_requirements_section":
|
|
45
|
-
|
|
45
|
+
# Use LLM generation instead of template
|
|
46
46
|
header_key = (target, (s.anchor_hint or "System Requirements").strip().lower())
|
|
47
47
|
if header_key in seen_headers:
|
|
48
48
|
continue
|
|
@@ -50,7 +50,7 @@ class ChangePlanner:
|
|
|
50
50
|
file_path=target,
|
|
51
51
|
edit_type="append_section",
|
|
52
52
|
anchor={"type": "header", "value": s.anchor_hint or "System Requirements"},
|
|
53
|
-
content_template=
|
|
53
|
+
content_template="", # Will be generated by LLM
|
|
54
54
|
rationale=s.source.get("evidence", ""),
|
|
55
55
|
suggestion_id=s.id,
|
|
56
56
|
))
|
|
@@ -89,7 +89,7 @@ class ChangePlanner:
|
|
|
89
89
|
seen_headers.add(header_key)
|
|
90
90
|
elif s.action == "replace_intro":
|
|
91
91
|
# Replace intro block (between H1 and first H2) with a clean Overview section
|
|
92
|
-
|
|
92
|
+
# Use empty content_template so LLM can generate content based on guidance
|
|
93
93
|
header_key = (target, "overview")
|
|
94
94
|
if header_key in seen_headers:
|
|
95
95
|
continue
|
|
@@ -97,15 +97,15 @@ class ChangePlanner:
|
|
|
97
97
|
file_path=target,
|
|
98
98
|
edit_type="replace_intro_block",
|
|
99
99
|
anchor={"type": "header", "value": "Overview"},
|
|
100
|
-
content_template=
|
|
100
|
+
content_template="", # Will be filled by LLM generation
|
|
101
101
|
rationale=s.source.get("evidence", ""),
|
|
102
102
|
suggestion_id=s.id,
|
|
103
103
|
))
|
|
104
104
|
seen_headers.add(header_key)
|
|
105
105
|
elif s.action == "clarify_mandatory_vs_optional":
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
)
|
|
106
|
+
# Use specific guidance from evaluation report instead of generic template
|
|
107
|
+
guidance = s.content_guidance or "Specify compatibility details for dependencies across operating systems and architectures."
|
|
108
|
+
content = section_header("Dependencies") + f"- {guidance}\n"
|
|
109
109
|
header_key = (target, "dependencies")
|
|
110
110
|
if header_key in seen_headers:
|
|
111
111
|
continue
|
|
@@ -119,9 +119,7 @@ class ChangePlanner:
|
|
|
119
119
|
))
|
|
120
120
|
seen_headers.add(header_key)
|
|
121
121
|
elif s.action == "add_hardware_requirements":
|
|
122
|
-
|
|
123
|
-
"- Recommended: >=16 GB RAM, multi-core CPU for large datasets.\n"
|
|
124
|
-
)
|
|
122
|
+
# Use LLM generation instead of template
|
|
125
123
|
header_key = (target, (s.anchor_hint or "Hardware Requirements").strip().lower())
|
|
126
124
|
if header_key in seen_headers:
|
|
127
125
|
continue
|
|
@@ -129,12 +127,176 @@ class ChangePlanner:
|
|
|
129
127
|
file_path=target,
|
|
130
128
|
edit_type="append_section",
|
|
131
129
|
anchor={"type": "header", "value": s.anchor_hint or "Hardware Requirements"},
|
|
132
|
-
content_template=
|
|
130
|
+
content_template="", # Will be generated by LLM
|
|
133
131
|
rationale=s.source.get("evidence", ""),
|
|
134
132
|
suggestion_id=s.id,
|
|
135
133
|
))
|
|
136
134
|
seen_headers.add(header_key)
|
|
135
|
+
elif s.action == "improve_clarity_and_error_handling":
|
|
136
|
+
# Handle targeted improvements to user guides
|
|
137
|
+
planned.append(PlannedEdit(
|
|
138
|
+
file_path=target,
|
|
139
|
+
edit_type="append_section",
|
|
140
|
+
anchor={"type": "header", "value": s.anchor_hint or "Introduction"},
|
|
141
|
+
content_template="", # Will be filled by LLM generation
|
|
142
|
+
rationale=s.source.get("evidence", ""),
|
|
143
|
+
suggestion_id=s.id,
|
|
144
|
+
))
|
|
145
|
+
elif s.action == "improve_consistency":
|
|
146
|
+
# Handle consistency improvements
|
|
147
|
+
planned.append(PlannedEdit(
|
|
148
|
+
file_path=target,
|
|
149
|
+
edit_type="append_section",
|
|
150
|
+
anchor={"type": "header", "value": s.anchor_hint or "Examples"},
|
|
151
|
+
content_template="", # Will be filled by LLM generation
|
|
152
|
+
rationale=s.source.get("evidence", ""),
|
|
153
|
+
suggestion_id=s.id,
|
|
154
|
+
))
|
|
155
|
+
elif s.action == "improve_tutorial_quality":
|
|
156
|
+
# Handle tutorial quality improvements
|
|
157
|
+
planned.append(PlannedEdit(
|
|
158
|
+
file_path=target,
|
|
159
|
+
edit_type="append_section",
|
|
160
|
+
anchor={"type": "header", "value": s.anchor_hint or "Setup"},
|
|
161
|
+
content_template="", # Will be filled by LLM generation
|
|
162
|
+
rationale=s.source.get("evidence", ""),
|
|
163
|
+
suggestion_id=s.id,
|
|
164
|
+
))
|
|
165
|
+
elif s.action == "improve_readability":
|
|
166
|
+
# Handle readability improvements
|
|
167
|
+
header_key = (target, (s.anchor_hint or "Introduction").strip().lower())
|
|
168
|
+
if header_key in seen_headers:
|
|
169
|
+
continue
|
|
170
|
+
planned.append(PlannedEdit(
|
|
171
|
+
file_path=target,
|
|
172
|
+
edit_type="append_section",
|
|
173
|
+
anchor={"type": "header", "value": s.anchor_hint or "Introduction"},
|
|
174
|
+
content_template="", # Will be filled by LLM generation
|
|
175
|
+
rationale=s.source.get("evidence", ""),
|
|
176
|
+
suggestion_id=s.id,
|
|
177
|
+
))
|
|
178
|
+
seen_headers.add(header_key)
|
|
179
|
+
elif s.action == "improve_setup":
|
|
180
|
+
# Handle setup improvements
|
|
181
|
+
planned.append(PlannedEdit(
|
|
182
|
+
file_path=target,
|
|
183
|
+
edit_type="append_section",
|
|
184
|
+
anchor={"type": "header", "value": s.anchor_hint or "Setup"},
|
|
185
|
+
content_template="", # Will be filled by LLM generation
|
|
186
|
+
rationale=s.source.get("evidence", ""),
|
|
187
|
+
suggestion_id=s.id,
|
|
188
|
+
))
|
|
189
|
+
elif s.action == "improve_reproducibility":
|
|
190
|
+
# Handle reproducibility improvements
|
|
191
|
+
planned.append(PlannedEdit(
|
|
192
|
+
file_path=target,
|
|
193
|
+
edit_type="append_section",
|
|
194
|
+
anchor={"type": "header", "value": s.anchor_hint or "Setup"},
|
|
195
|
+
content_template="", # Will be filled by LLM generation
|
|
196
|
+
rationale=s.source.get("evidence", ""),
|
|
197
|
+
suggestion_id=s.id,
|
|
198
|
+
))
|
|
199
|
+
elif s.action == "improve_structure":
|
|
200
|
+
# Handle structure improvements
|
|
201
|
+
planned.append(PlannedEdit(
|
|
202
|
+
file_path=target,
|
|
203
|
+
edit_type="append_section",
|
|
204
|
+
anchor={"type": "header", "value": s.anchor_hint or "Introduction"},
|
|
205
|
+
content_template="", # Will be filled by LLM generation
|
|
206
|
+
rationale=s.source.get("evidence", ""),
|
|
207
|
+
suggestion_id=s.id,
|
|
208
|
+
))
|
|
209
|
+
elif s.action == "improve_code_quality":
|
|
210
|
+
# Handle code quality improvements
|
|
211
|
+
planned.append(PlannedEdit(
|
|
212
|
+
file_path=target,
|
|
213
|
+
edit_type="append_section",
|
|
214
|
+
anchor={"type": "header", "value": s.anchor_hint or "Code Examples"},
|
|
215
|
+
content_template="", # Will be filled by LLM generation
|
|
216
|
+
rationale=s.source.get("evidence", ""),
|
|
217
|
+
suggestion_id=s.id,
|
|
218
|
+
))
|
|
219
|
+
elif s.action == "improve_verification":
|
|
220
|
+
# Handle verification improvements
|
|
221
|
+
planned.append(PlannedEdit(
|
|
222
|
+
file_path=target,
|
|
223
|
+
edit_type="append_section",
|
|
224
|
+
anchor={"type": "header", "value": s.anchor_hint or "Results"},
|
|
225
|
+
content_template="", # Will be filled by LLM generation
|
|
226
|
+
rationale=s.source.get("evidence", ""),
|
|
227
|
+
suggestion_id=s.id,
|
|
228
|
+
))
|
|
229
|
+
elif s.action == "improve_performance":
|
|
230
|
+
# Handle performance improvements
|
|
231
|
+
planned.append(PlannedEdit(
|
|
232
|
+
file_path=target,
|
|
233
|
+
edit_type="append_section",
|
|
234
|
+
anchor={"type": "header", "value": s.anchor_hint or "Performance"},
|
|
235
|
+
content_template="", # Will be filled by LLM generation
|
|
236
|
+
rationale=s.source.get("evidence", ""),
|
|
237
|
+
suggestion_id=s.id,
|
|
238
|
+
))
|
|
239
|
+
elif s.action == "improve_context":
|
|
240
|
+
# Handle context improvements for userguides
|
|
241
|
+
header_key = (target, (s.anchor_hint or "Introduction").strip().lower())
|
|
242
|
+
if header_key in seen_headers:
|
|
243
|
+
continue
|
|
244
|
+
planned.append(PlannedEdit(
|
|
245
|
+
file_path=target,
|
|
246
|
+
edit_type="append_section",
|
|
247
|
+
anchor={"type": "header", "value": s.anchor_hint or "Introduction"},
|
|
248
|
+
content_template="", # Will be filled by LLM generation
|
|
249
|
+
rationale=s.source.get("evidence", ""),
|
|
250
|
+
suggestion_id=s.id,
|
|
251
|
+
))
|
|
252
|
+
seen_headers.add(header_key)
|
|
253
|
+
elif s.action == "improve_error_handling":
|
|
254
|
+
# Handle error handling improvements for userguides
|
|
255
|
+
header_key = (target, (s.anchor_hint or "Examples").strip().lower())
|
|
256
|
+
if header_key in seen_headers:
|
|
257
|
+
continue
|
|
258
|
+
planned.append(PlannedEdit(
|
|
259
|
+
file_path=target,
|
|
260
|
+
edit_type="append_section",
|
|
261
|
+
anchor={"type": "header", "value": s.anchor_hint or "Examples"},
|
|
262
|
+
content_template="", # Will be filled by LLM generation
|
|
263
|
+
rationale=s.source.get("evidence", ""),
|
|
264
|
+
suggestion_id=s.id,
|
|
265
|
+
))
|
|
266
|
+
seen_headers.add(header_key)
|
|
267
|
+
elif s.action == "add_overview_section":
|
|
268
|
+
# Handle overview section for README
|
|
269
|
+
planned.append(PlannedEdit(
|
|
270
|
+
file_path=target,
|
|
271
|
+
edit_type="append_section",
|
|
272
|
+
anchor={"type": "header", "value": s.anchor_hint or "Overview"},
|
|
273
|
+
content_template="", # Will be filled by LLM generation
|
|
274
|
+
rationale=s.source.get("evidence", ""),
|
|
275
|
+
suggestion_id=s.id,
|
|
276
|
+
))
|
|
277
|
+
elif s.action == "full_replace":
|
|
278
|
+
# Handle full document replacement
|
|
279
|
+
planned.append(PlannedEdit(
|
|
280
|
+
file_path=target,
|
|
281
|
+
edit_type="full_replace",
|
|
282
|
+
anchor={"type": "document", "value": "full_document"},
|
|
283
|
+
content_template="", # Will be filled by LLM generation
|
|
284
|
+
rationale=s.source.get("evidence", ""),
|
|
285
|
+
suggestion_id=s.id,
|
|
286
|
+
))
|
|
287
|
+
|
|
288
|
+
# If a file is planned for full_replace, suppress other edits for that file to avoid redundancy
|
|
289
|
+
by_file: Dict[str, List[PlannedEdit]] = {}
|
|
290
|
+
for e in planned:
|
|
291
|
+
by_file.setdefault(e.file_path, []).append(e)
|
|
292
|
+
filtered: List[PlannedEdit] = []
|
|
293
|
+
for fpath, edits in by_file.items():
|
|
294
|
+
has_full = any(e.edit_type == "full_replace" for e in edits)
|
|
295
|
+
if has_full:
|
|
296
|
+
filtered.extend([e for e in edits if e.edit_type == "full_replace"])
|
|
297
|
+
else:
|
|
298
|
+
filtered.extend(edits)
|
|
137
299
|
|
|
138
|
-
return DocumentPlan(repo_path=repo_path, style_profile=style, planned_edits=
|
|
300
|
+
return DocumentPlan(repo_path=repo_path, style_profile=style, planned_edits=filtered)
|
|
139
301
|
|
|
140
302
|
|
|
@@ -40,6 +40,11 @@ class DocumentRenderer:
|
|
|
40
40
|
added = len(edit.content_template.splitlines())
|
|
41
41
|
content = new_content
|
|
42
42
|
|
|
43
|
+
elif edit.edit_type == "full_replace":
|
|
44
|
+
# Replace entire document content
|
|
45
|
+
content = edit.content_template
|
|
46
|
+
added = len(edit.content_template.splitlines())
|
|
47
|
+
|
|
43
48
|
# Other edit types (insert_after_header, replace_block) can be added as needed
|
|
44
49
|
|
|
45
50
|
return content, {"added_lines": added}
|
|
@@ -6,10 +6,10 @@ from bioguider.agents.common_conversation import CommonConversation
|
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
CLEANUP_PROMPT = """
|
|
9
|
-
You are
|
|
9
|
+
You are "BioGuider," a precise editor for biomedical/bioinformatics documentation.
|
|
10
10
|
|
|
11
11
|
TASK
|
|
12
|
-
Given a
|
|
12
|
+
Given a documentation file (README, RMarkdown, or other), produce a corrected version that:
|
|
13
13
|
- Fixes typos, grammar, capitalization, and spacing
|
|
14
14
|
- Corrects malformed markdown (headers, lists, links, code fences)
|
|
15
15
|
- Repairs or normalizes link formatting; keep URLs absolute if present
|
|
@@ -17,14 +17,22 @@ Given a full README markdown, produce a corrected version that:
|
|
|
17
17
|
- Preserves technical accuracy and biomedical domain terminology (do not invent features)
|
|
18
18
|
- Keeps tone neutral and professional; avoid marketing language
|
|
19
19
|
- Preserves all valid information; do not delete content unless it is a duplicate or malformed
|
|
20
|
+
- For RMarkdown files (.Rmd): Preserve YAML frontmatter, R code chunks, and existing structure exactly
|
|
21
|
+
|
|
22
|
+
CRITICAL REQUIREMENTS:
|
|
23
|
+
- Do NOT wrap the entire document in markdown code fences (```markdown). Return pure content only.
|
|
24
|
+
- If the document starts with ```markdown and ends with ```, remove these fences completely.
|
|
25
|
+
- Do NOT modify YAML frontmatter in RMarkdown files
|
|
26
|
+
- Do NOT modify R code chunks (```{r} blocks) in RMarkdown files
|
|
27
|
+
- Do NOT change the overall structure or organization of the document
|
|
20
28
|
|
|
21
29
|
INPUT
|
|
22
|
-
<<
|
|
23
|
-
{
|
|
24
|
-
<</
|
|
30
|
+
<<DOCUMENT>>
|
|
31
|
+
{doc}
|
|
32
|
+
<</DOCUMENT>>
|
|
25
33
|
|
|
26
34
|
OUTPUT
|
|
27
|
-
Return ONLY the revised
|
|
35
|
+
Return ONLY the revised content (no commentary, no explanations, no code fences).
|
|
28
36
|
"""
|
|
29
37
|
|
|
30
38
|
|
|
@@ -35,8 +43,8 @@ class LLMCleaner:
|
|
|
35
43
|
def clean_readme(self, content: str) -> tuple[str, dict]:
|
|
36
44
|
conv = CommonConversation(self.llm)
|
|
37
45
|
output, token_usage = conv.generate(
|
|
38
|
-
system_prompt=CLEANUP_PROMPT.format(
|
|
39
|
-
instruction_prompt="Provide the corrected
|
|
46
|
+
system_prompt=CLEANUP_PROMPT.format(doc=content[:30000]),
|
|
47
|
+
instruction_prompt="Provide the corrected documentation content only.",
|
|
40
48
|
)
|
|
41
49
|
return output.strip(), token_usage
|
|
42
50
|
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
from typing import Dict
|
|
4
|
+
import json
|
|
4
5
|
from langchain_openai.chat_models.base import BaseChatOpenAI
|
|
5
6
|
|
|
6
7
|
from bioguider.agents.common_conversation import CommonConversation
|
|
@@ -8,10 +9,10 @@ from .models import StyleProfile, SuggestionItem
|
|
|
8
9
|
|
|
9
10
|
|
|
10
11
|
LLM_SECTION_PROMPT = """
|
|
11
|
-
You are
|
|
12
|
+
You are "BioGuider," a precise documentation generator for biomedical/bioinformatics software.
|
|
12
13
|
|
|
13
14
|
GOAL
|
|
14
|
-
Write or refine a single documentation section named "{section}".
|
|
15
|
+
Write or refine a single documentation section named "{section}". Follow the specific guidance from the evaluation report exactly.
|
|
15
16
|
|
|
16
17
|
INPUTS (use only what is provided; never invent)
|
|
17
18
|
- suggestion_category: {suggestion_category}
|
|
@@ -20,6 +21,15 @@ INPUTS (use only what is provided; never invent)
|
|
|
20
21
|
- evidence_from_evaluation: {evidence}
|
|
21
22
|
- repo_context_excerpt (analyze tone/formatting; do not paraphrase it blindly): <<{context}>>
|
|
22
23
|
|
|
24
|
+
CRITICAL REQUIREMENTS
|
|
25
|
+
- Follow the guidance EXACTLY as provided: {guidance}
|
|
26
|
+
- Address the specific suggestions from the evaluation report precisely
|
|
27
|
+
- Do not deviate from the guidance or add unrelated content
|
|
28
|
+
- If guidance mentions specific packages, requirements, or details, include them exactly
|
|
29
|
+
- For RMarkdown files (.Rmd), preserve the original structure including YAML frontmatter, code chunks, and existing headers
|
|
30
|
+
- NEVER generate generic placeholder content like "Clear 2–3 sentence summary" or "brief description"
|
|
31
|
+
- ALWAYS use the specific guidance provided above to create concrete, actionable content
|
|
32
|
+
|
|
23
33
|
STYLE & CONSTRAINTS
|
|
24
34
|
- Fix obvious errors in the content.
|
|
25
35
|
- Preserve the existing tone and style markers: {tone_markers}
|
|
@@ -33,17 +43,48 @@ STYLE & CONSTRAINTS
|
|
|
33
43
|
- Never remove, alter, or recreate top-of-file badges/shields/logos (e.g., CI, PyPI, Conda, Docs shields). Assume they remain unchanged; do not output replacements for them.
|
|
34
44
|
- When targeting README content, do not rewrite the document title or header area; generate only the requested section body to be inserted below existing headers/badges.
|
|
35
45
|
|
|
36
|
-
SECTION GUIDELINES
|
|
37
|
-
- Dependencies:
|
|
38
|
-
- System Requirements:
|
|
39
|
-
- Hardware Requirements:
|
|
46
|
+
SECTION GUIDELINES (follow guidance exactly)
|
|
47
|
+
- Dependencies: Include specific packages mentioned in guidance (e.g., "ggplot2", "dplyr", etc.)
|
|
48
|
+
- System Requirements: Include R version requirements and platform-specific instructions as mentioned in guidance
|
|
49
|
+
- Hardware Requirements: Include RAM/CPU recommendations as specified in guidance
|
|
40
50
|
- License: one sentence referencing the license and pointing to the LICENSE file.
|
|
41
|
-
- Install (clarify dependencies):
|
|
42
|
-
-
|
|
51
|
+
- Install (clarify dependencies): Include compatibility details across operating systems and architectures as mentioned in guidance
|
|
52
|
+
- Tutorial improvements: Add specific examples, error handling, and reproducibility notes as mentioned in guidance
|
|
53
|
+
- User guide improvements: Enhance clarity, add missing information, and improve error handling as mentioned in guidance
|
|
54
|
+
- If the section does not fit the above, produce content that directly addresses the guidance provided.
|
|
43
55
|
|
|
44
56
|
OUTPUT FORMAT
|
|
45
57
|
- Return only the section markdown (no code fences).
|
|
46
58
|
- Start with a level-2 header: "## {anchor_title}" unless the content already starts with a header.
|
|
59
|
+
- Ensure the content directly addresses: {guidance}
|
|
60
|
+
- DO NOT include generic instructions or placeholder text
|
|
61
|
+
- ONLY generate content that fulfills the specific guidance provided
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
LLM_FULLDOC_PROMPT = """
|
|
65
|
+
You are “BioGuider,” a documentation rewriter.
|
|
66
|
+
|
|
67
|
+
GOAL
|
|
68
|
+
Rewrite a complete target document using only the provided evaluation report signals and the repository context excerpts. Output a full, ready-to-publish markdown file that is more complete and directly usable.
|
|
69
|
+
|
|
70
|
+
INPUTS (authoritative)
|
|
71
|
+
- evaluation_report (structured JSON excerpts): <<{evaluation_report}>>
|
|
72
|
+
- target_file: {target_file}
|
|
73
|
+
- repo_context_excerpt (do not copy blindly; use only to keep style/tone): <<{context}>>
|
|
74
|
+
|
|
75
|
+
STRICT CONSTRAINTS
|
|
76
|
+
- Base the content solely on the evaluation report. Do not invent features, data, or claims not supported by it.
|
|
77
|
+
- Prefer completeness and usability: produce the full file content, not just minimal "added" snippets.
|
|
78
|
+
- Preserve top-of-file badges/logos if they exist in the original; keep title and header area intact unless the report requires changes.
|
|
79
|
+
- Fix obvious errors; improve structure and readability per report suggestions.
|
|
80
|
+
- Include ONLY sections specifically requested by the evaluation report - do not add unnecessary sections.
|
|
81
|
+
- Avoid redundancy: do not duplicate information across multiple sections.
|
|
82
|
+
- Keep links well-formed; keep neutral, professional tone; concise, skimmable formatting.
|
|
83
|
+
- CRITICAL: Do NOT wrap the entire document inside markdown code fences (```markdown). Do NOT start with ```markdown or end with ```. Return pure markdown content suitable for copy/paste.
|
|
84
|
+
- For RMarkdown files (.Rmd), preserve YAML frontmatter exactly and do not wrap content in code fences.
|
|
85
|
+
|
|
86
|
+
OUTPUT
|
|
87
|
+
- Return only the full markdown content for {target_file}. No commentary, no fences.
|
|
47
88
|
"""
|
|
48
89
|
|
|
49
90
|
|
|
@@ -69,4 +110,14 @@ class LLMContentGenerator:
|
|
|
69
110
|
content, token_usage = conv.generate(system_prompt=system_prompt, instruction_prompt="Write the section content now.")
|
|
70
111
|
return content.strip(), token_usage
|
|
71
112
|
|
|
113
|
+
def generate_full_document(self, target_file: str, evaluation_report: dict, context: str = "") -> tuple[str, dict]:
|
|
114
|
+
conv = CommonConversation(self.llm)
|
|
115
|
+
system_prompt = LLM_FULLDOC_PROMPT.format(
|
|
116
|
+
target_file=target_file,
|
|
117
|
+
evaluation_report=json.dumps(evaluation_report)[:6000],
|
|
118
|
+
context=context[:4000],
|
|
119
|
+
)
|
|
120
|
+
content, token_usage = conv.generate(system_prompt=system_prompt, instruction_prompt="Write the full document now.")
|
|
121
|
+
return content.strip(), token_usage
|
|
122
|
+
|
|
72
123
|
|
bioguider/generation/models.py
CHANGED
|
@@ -18,6 +18,10 @@ class EvaluationReport(BaseModel):
|
|
|
18
18
|
userguide_evaluation: Optional[Dict[str, Any]] = None
|
|
19
19
|
userguide_files: Optional[List[str]] = None
|
|
20
20
|
|
|
21
|
+
# Optional: tutorial evaluation content and any explicitly listed files
|
|
22
|
+
tutorial_evaluation: Optional[Dict[str, Any]] = None
|
|
23
|
+
tutorial_files: Optional[List[str]] = None
|
|
24
|
+
|
|
21
25
|
submission_requirements_evaluation: Optional[Dict[str, Any]] = None
|
|
22
26
|
submission_requirements_files: Optional[List[str]] = None
|
|
23
27
|
|
|
@@ -3,14 +3,14 @@ from __future__ import annotations
|
|
|
3
3
|
import os
|
|
4
4
|
import json
|
|
5
5
|
from datetime import datetime
|
|
6
|
-
from typing import Dict, List, Tuple
|
|
6
|
+
from typing import Dict, List, Optional, Tuple
|
|
7
7
|
|
|
8
8
|
from .models import OutputArtifact, GenerationManifest, PlannedEdit
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
class OutputManager:
|
|
12
|
-
def __init__(self, base_outputs_dir: str =
|
|
13
|
-
self.base_outputs_dir = base_outputs_dir
|
|
12
|
+
def __init__(self, base_outputs_dir: Optional[str] = None):
|
|
13
|
+
self.base_outputs_dir = base_outputs_dir or "outputs"
|
|
14
14
|
|
|
15
15
|
def prepare_output_dir(self, repo_url_or_name: str) -> str:
|
|
16
16
|
repo_name = self._extract_repo_name(repo_url_or_name)
|
|
@@ -150,6 +150,12 @@ class EvaluationReportLoader:
|
|
|
150
150
|
normalized["userguide_evaluation"] = userguide_eval["evaluation"]
|
|
151
151
|
normalized["userguide_files"] = userguide_eval["files"]
|
|
152
152
|
|
|
153
|
+
# Tutorial evaluation handling
|
|
154
|
+
tutorial_eval = normalized.get("tutorial")
|
|
155
|
+
if tutorial_eval and isinstance(tutorial_eval.get("evaluation"), dict):
|
|
156
|
+
normalized["tutorial_evaluation"] = tutorial_eval["evaluation"]
|
|
157
|
+
normalized["tutorial_files"] = tutorial_eval["files"]
|
|
158
|
+
|
|
153
159
|
# userguide_eval = normalized.get("userguide")
|
|
154
160
|
# if isinstance(userguide_eval, str):
|
|
155
161
|
# normalized["userguide_evaluation"] = self._parse_structured_block(userguide_eval["evaluation"], "structured_evaluation")
|