bioguider 0.2.29__tar.gz → 0.2.31__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of bioguider might be problematic. Click here for more details.

Files changed (85) hide show
  1. {bioguider-0.2.29 → bioguider-0.2.31}/PKG-INFO +1 -1
  2. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/generation/change_planner.py +63 -14
  3. bioguider-0.2.31/bioguider/generation/document_renderer.py +157 -0
  4. bioguider-0.2.31/bioguider/generation/llm_cleaner.py +67 -0
  5. bioguider-0.2.31/bioguider/generation/llm_content_generator.py +180 -0
  6. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/generation/models.py +4 -0
  7. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/generation/report_loader.py +6 -0
  8. bioguider-0.2.31/bioguider/generation/suggestion_extractor.py +436 -0
  9. bioguider-0.2.31/bioguider/managers/generation_manager.py +547 -0
  10. {bioguider-0.2.29 → bioguider-0.2.31}/pyproject.toml +1 -1
  11. bioguider-0.2.29/bioguider/generation/document_renderer.py +0 -47
  12. bioguider-0.2.29/bioguider/generation/llm_cleaner.py +0 -43
  13. bioguider-0.2.29/bioguider/generation/llm_content_generator.py +0 -72
  14. bioguider-0.2.29/bioguider/generation/suggestion_extractor.py +0 -136
  15. bioguider-0.2.29/bioguider/managers/generation_manager.py +0 -206
  16. {bioguider-0.2.29 → bioguider-0.2.31}/LICENSE +0 -0
  17. {bioguider-0.2.29 → bioguider-0.2.31}/README.md +0 -0
  18. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/__init__.py +0 -0
  19. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/agents/__init__.py +0 -0
  20. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/agents/agent_task.py +0 -0
  21. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/agents/agent_tools.py +0 -0
  22. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/agents/agent_utils.py +0 -0
  23. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/agents/collection_execute_step.py +0 -0
  24. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/agents/collection_observe_step.py +0 -0
  25. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/agents/collection_plan_step.py +0 -0
  26. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/agents/collection_task.py +0 -0
  27. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/agents/collection_task_utils.py +0 -0
  28. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/agents/common_agent.py +0 -0
  29. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/agents/common_agent_2step.py +0 -0
  30. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/agents/common_conversation.py +0 -0
  31. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/agents/common_step.py +0 -0
  32. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/agents/consistency_collection_step.py +0 -0
  33. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/agents/consistency_evaluation_task.py +0 -0
  34. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/agents/consistency_evaluation_task_utils.py +0 -0
  35. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/agents/consistency_observe_step.py +0 -0
  36. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/agents/consistency_query_step.py +0 -0
  37. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/agents/dockergeneration_execute_step.py +0 -0
  38. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/agents/dockergeneration_observe_step.py +0 -0
  39. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/agents/dockergeneration_plan_step.py +0 -0
  40. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/agents/dockergeneration_task.py +0 -0
  41. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/agents/dockergeneration_task_utils.py +0 -0
  42. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/agents/evaluation_installation_task.py +0 -0
  43. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/agents/evaluation_readme_task.py +0 -0
  44. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/agents/evaluation_submission_requirements_task.py +0 -0
  45. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/agents/evaluation_task.py +0 -0
  46. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/agents/evaluation_tutorial_task.py +0 -0
  47. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/agents/evaluation_tutorial_task_prompts.py +0 -0
  48. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/agents/evaluation_userguide_prompts.py +0 -0
  49. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/agents/evaluation_userguide_task.py +0 -0
  50. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/agents/identification_execute_step.py +0 -0
  51. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/agents/identification_observe_step.py +0 -0
  52. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/agents/identification_plan_step.py +0 -0
  53. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/agents/identification_task.py +0 -0
  54. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/agents/identification_task_utils.py +0 -0
  55. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/agents/peo_common_step.py +0 -0
  56. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/agents/prompt_utils.py +0 -0
  57. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/agents/python_ast_repl_tool.py +0 -0
  58. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/agents/rag_collection_task.py +0 -0
  59. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/conversation.py +0 -0
  60. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/database/code_structure_db.py +0 -0
  61. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/database/summarized_file_db.py +0 -0
  62. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/generation/__init__.py +0 -0
  63. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/generation/llm_injector.py +0 -0
  64. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/generation/output_manager.py +0 -0
  65. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/generation/repo_reader.py +0 -0
  66. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/generation/style_analyzer.py +0 -0
  67. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/generation/test_metrics.py +0 -0
  68. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/managers/evaluation_manager.py +0 -0
  69. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/managers/generation_test_manager.py +0 -0
  70. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/rag/__init__.py +0 -0
  71. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/rag/config.py +0 -0
  72. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/rag/data_pipeline.py +0 -0
  73. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/rag/embedder.py +0 -0
  74. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/rag/rag.py +0 -0
  75. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/settings.py +0 -0
  76. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/utils/code_structure_builder.py +0 -0
  77. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/utils/constants.py +0 -0
  78. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/utils/default.gitignore +0 -0
  79. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/utils/file_utils.py +0 -0
  80. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/utils/gitignore_checker.py +0 -0
  81. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/utils/notebook_utils.py +0 -0
  82. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/utils/pyphen_utils.py +0 -0
  83. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/utils/python_file_handler.py +0 -0
  84. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/utils/r_file_handler.py +0 -0
  85. {bioguider-0.2.29 → bioguider-0.2.31}/bioguider/utils/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: bioguider
3
- Version: 0.2.29
3
+ Version: 0.2.31
4
4
  Summary: An AI-Powered package to help biomedical developers to generate clear documentation
5
5
  License: MIT
6
6
  Author: Cankun Wang
@@ -28,7 +28,7 @@ class ChangePlanner:
28
28
  pass
29
29
 
30
30
  if s.action == "add_dependencies_section":
31
- content = section_header("Dependencies") + "- List required packages and versions.\n"
31
+ # Use LLM generation instead of template
32
32
  header_key = (target, (s.anchor_hint or "Dependencies").strip().lower())
33
33
  if header_key in seen_headers:
34
34
  continue
@@ -36,13 +36,13 @@ class ChangePlanner:
36
36
  file_path=target,
37
37
  edit_type="append_section",
38
38
  anchor={"type": "header", "value": s.anchor_hint or "Dependencies"},
39
- content_template=content,
39
+ content_template="", # Will be generated by LLM
40
40
  rationale=s.source.get("evidence", ""),
41
41
  suggestion_id=s.id,
42
42
  ))
43
43
  seen_headers.add(header_key)
44
44
  elif s.action == "add_system_requirements_section":
45
- content = section_header("System Requirements") + "- OS and R version requirements.\n"
45
+ # Use LLM generation instead of template
46
46
  header_key = (target, (s.anchor_hint or "System Requirements").strip().lower())
47
47
  if header_key in seen_headers:
48
48
  continue
@@ -50,7 +50,7 @@ class ChangePlanner:
50
50
  file_path=target,
51
51
  edit_type="append_section",
52
52
  anchor={"type": "header", "value": s.anchor_hint or "System Requirements"},
53
- content_template=content,
53
+ content_template="", # Will be generated by LLM
54
54
  rationale=s.source.get("evidence", ""),
55
55
  suggestion_id=s.id,
56
56
  ))
@@ -89,7 +89,7 @@ class ChangePlanner:
89
89
  seen_headers.add(header_key)
90
90
  elif s.action == "replace_intro":
91
91
  # Replace intro block (between H1 and first H2) with a clean Overview section
92
- content = section_header("Overview") + "- Clear 2–3 sentence summary of purpose and audience.\n"
92
+ # Use empty content_template so LLM can generate content based on guidance
93
93
  header_key = (target, "overview")
94
94
  if header_key in seen_headers:
95
95
  continue
@@ -97,15 +97,15 @@ class ChangePlanner:
97
97
  file_path=target,
98
98
  edit_type="replace_intro_block",
99
99
  anchor={"type": "header", "value": "Overview"},
100
- content_template=content,
100
+ content_template="", # Will be filled by LLM generation
101
101
  rationale=s.source.get("evidence", ""),
102
102
  suggestion_id=s.id,
103
103
  ))
104
104
  seen_headers.add(header_key)
105
105
  elif s.action == "clarify_mandatory_vs_optional":
106
- content = section_header("Dependencies") + (
107
- "- Mandatory: ...\n- Optional: ...\n"
108
- )
106
+ # Use specific guidance from evaluation report instead of generic template
107
+ guidance = s.content_guidance or "Specify compatibility details for dependencies across operating systems and architectures."
108
+ content = section_header("Dependencies") + f"- {guidance}\n"
109
109
  header_key = (target, "dependencies")
110
110
  if header_key in seen_headers:
111
111
  continue
@@ -119,9 +119,7 @@ class ChangePlanner:
119
119
  ))
120
120
  seen_headers.add(header_key)
121
121
  elif s.action == "add_hardware_requirements":
122
- content = section_header("Hardware Requirements") + (
123
- "- Recommended: >=16 GB RAM, multi-core CPU for large datasets.\n"
124
- )
122
+ # Use LLM generation instead of template
125
123
  header_key = (target, (s.anchor_hint or "Hardware Requirements").strip().lower())
126
124
  if header_key in seen_headers:
127
125
  continue
@@ -129,12 +127,63 @@ class ChangePlanner:
129
127
  file_path=target,
130
128
  edit_type="append_section",
131
129
  anchor={"type": "header", "value": s.anchor_hint or "Hardware Requirements"},
132
- content_template=content,
130
+ content_template="", # Will be generated by LLM
133
131
  rationale=s.source.get("evidence", ""),
134
132
  suggestion_id=s.id,
135
133
  ))
136
134
  seen_headers.add(header_key)
135
+ elif s.action == "improve_clarity_and_error_handling":
136
+ # Handle targeted improvements to user guides
137
+ planned.append(PlannedEdit(
138
+ file_path=target,
139
+ edit_type="append_section",
140
+ anchor={"type": "header", "value": s.anchor_hint or "Introduction"},
141
+ content_template="", # Will be filled by LLM generation
142
+ rationale=s.source.get("evidence", ""),
143
+ suggestion_id=s.id,
144
+ ))
145
+ elif s.action == "improve_consistency":
146
+ # Handle consistency improvements
147
+ planned.append(PlannedEdit(
148
+ file_path=target,
149
+ edit_type="append_section",
150
+ anchor={"type": "header", "value": s.anchor_hint or "Examples"},
151
+ content_template="", # Will be filled by LLM generation
152
+ rationale=s.source.get("evidence", ""),
153
+ suggestion_id=s.id,
154
+ ))
155
+ elif s.action == "improve_tutorial_quality":
156
+ # Handle tutorial quality improvements
157
+ planned.append(PlannedEdit(
158
+ file_path=target,
159
+ edit_type="append_section",
160
+ anchor={"type": "header", "value": s.anchor_hint or "Setup"},
161
+ content_template="", # Will be filled by LLM generation
162
+ rationale=s.source.get("evidence", ""),
163
+ suggestion_id=s.id,
164
+ ))
165
+ # All actions now use full_replace mode
166
+ planned.append(PlannedEdit(
167
+ file_path=target,
168
+ edit_type="full_replace",
169
+ anchor={"type": "document", "value": "full_document"},
170
+ content_template="", # Will be filled by LLM generation
171
+ rationale=s.source.get("evidence", ""),
172
+ suggestion_id=s.id,
173
+ ))
174
+
175
+ # If a file is planned for full_replace, suppress other edits for that file to avoid redundancy
176
+ by_file: Dict[str, List[PlannedEdit]] = {}
177
+ for e in planned:
178
+ by_file.setdefault(e.file_path, []).append(e)
179
+ filtered: List[PlannedEdit] = []
180
+ for fpath, edits in by_file.items():
181
+ has_full = any(e.edit_type == "full_replace" for e in edits)
182
+ if has_full:
183
+ filtered.extend([e for e in edits if e.edit_type == "full_replace"])
184
+ else:
185
+ filtered.extend(edits)
137
186
 
138
- return DocumentPlan(repo_path=repo_path, style_profile=style, planned_edits=planned)
187
+ return DocumentPlan(repo_path=repo_path, style_profile=style, planned_edits=filtered)
139
188
 
140
189
 
@@ -0,0 +1,157 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Tuple
4
+
5
+ from .models import PlannedEdit
6
+
7
+
8
+ class DocumentRenderer:
9
+ def apply_edit(self, original: str, edit: PlannedEdit) -> Tuple[str, dict]:
10
+ content = original
11
+ added = 0
12
+
13
+ if edit.edit_type == "append_section":
14
+ # Avoid duplicate header if the same header already exists
15
+ header_line = None
16
+ if edit.content_template.lstrip().startswith("#"):
17
+ header_line = edit.content_template.strip().splitlines()[0].strip()
18
+ if header_line and header_line in content:
19
+ return content, {"added_lines": 0}
20
+ # Append with two leading newlines if needed
21
+ sep = "\n\n" if not content.endswith("\n\n") else ""
22
+ content = f"{content}{sep}{edit.content_template}"
23
+ added = len(edit.content_template.splitlines())
24
+
25
+ elif edit.edit_type == "replace_intro_block":
26
+ # Replace content from start to first level-2 header (##) with new intro
27
+ lines = content.splitlines()
28
+ end_idx = None
29
+ for i, ln in enumerate(lines):
30
+ if ln.strip().startswith("## "):
31
+ end_idx = i
32
+ break
33
+ if end_idx is None:
34
+ # No H2 header found; replace entire content
35
+ new_content = edit.content_template
36
+ else:
37
+ head = lines[:0]
38
+ tail = lines[end_idx:]
39
+ new_content = edit.content_template.rstrip() + "\n\n" + "\n".join(tail)
40
+ added = len(edit.content_template.splitlines())
41
+ content = new_content
42
+
43
+ elif edit.edit_type == "insert_after_header":
44
+ # Insert content after a specific header, but integrate naturally
45
+ header_value = edit.anchor.get("value", "")
46
+ if header_value:
47
+ lines = content.splitlines()
48
+ insert_idx = None
49
+ for i, line in enumerate(lines):
50
+ if line.strip().startswith("#") and header_value.lower() in line.lower():
51
+ # Find a good insertion point after the header and its immediate content
52
+ insert_idx = i + 1
53
+ # Skip empty lines and find the first substantial content
54
+ while insert_idx < len(lines) and lines[insert_idx].strip() == "":
55
+ insert_idx += 1
56
+ # Insert after the first code block or paragraph, but before next major section
57
+ while insert_idx < len(lines):
58
+ line_content = lines[insert_idx].strip()
59
+ if line_content.startswith("#") and not line_content.startswith("###"):
60
+ break
61
+ if line_content.startswith("```") and insert_idx > 0:
62
+ # Found end of code block, insert after it
63
+ insert_idx += 1
64
+ break
65
+ insert_idx += 1
66
+ break
67
+
68
+ if insert_idx is not None:
69
+ # Insert the new content with minimal formatting
70
+ new_content_lines = edit.content_template.splitlines()
71
+ # Remove standalone headers to avoid creating new major sections
72
+ filtered_lines = []
73
+ for line in new_content_lines:
74
+ if line.strip().startswith("## ") and len(line.strip()) < 50:
75
+ # Convert major headers to minor explanations
76
+ header_text = line.strip()[3:].strip()
77
+ filtered_lines.append(f"\n**Note:** {header_text.lower()}")
78
+ else:
79
+ filtered_lines.append(line)
80
+
81
+ # Insert with minimal spacing
82
+ new_lines = lines[:insert_idx] + [""] + filtered_lines + lines[insert_idx:]
83
+ content = "\n".join(new_lines)
84
+ added = len(filtered_lines)
85
+ else:
86
+ # Header not found, append at end
87
+ sep = "\n\n" if not content.endswith("\n\n") else ""
88
+ content = f"{content}{sep}{edit.content_template}"
89
+ added = len(edit.content_template.splitlines())
90
+ else:
91
+ # No header specified, append at end
92
+ sep = "\n\n" if not content.endswith("\n\n") else ""
93
+ content = f"{content}{sep}{edit.content_template}"
94
+ added = len(edit.content_template.splitlines())
95
+
96
+ elif edit.edit_type == "rmarkdown_integration":
97
+ # Special handling for RMarkdown files - integrate content naturally
98
+ header_value = edit.anchor.get("value", "")
99
+ if header_value:
100
+ lines = content.splitlines()
101
+ insert_idx = None
102
+ for i, line in enumerate(lines):
103
+ if line.strip().startswith("#") and header_value.lower() in line.lower():
104
+ # Find insertion point after the first code block in this section
105
+ insert_idx = i + 1
106
+ while insert_idx < len(lines):
107
+ line_content = lines[insert_idx].strip()
108
+ if line_content.startswith("```") and insert_idx > 0:
109
+ # Found code block, insert after it
110
+ insert_idx += 1
111
+ break
112
+ if line_content.startswith("#") and not line_content.startswith("###"):
113
+ # Next major section, insert before it
114
+ break
115
+ insert_idx += 1
116
+ break
117
+
118
+ if insert_idx is not None:
119
+ # Process content to be more contextual
120
+ new_content_lines = edit.content_template.splitlines()
121
+ contextual_lines = []
122
+
123
+ for line in new_content_lines:
124
+ # Convert standalone sections to contextual notes
125
+ if line.strip().startswith("## "):
126
+ header_text = line.strip()[3:].strip()
127
+ contextual_lines.append(f"\n**Note:** For this tutorial, {header_text.lower()}")
128
+ elif line.strip().startswith("# "):
129
+ header_text = line.strip()[2:].strip()
130
+ contextual_lines.append(f"\n**Important:** {header_text.lower()}")
131
+ else:
132
+ contextual_lines.append(line)
133
+
134
+ # Insert with minimal disruption
135
+ new_lines = lines[:insert_idx] + [""] + contextual_lines + lines[insert_idx:]
136
+ content = "\n".join(new_lines)
137
+ added = len(contextual_lines)
138
+ else:
139
+ # Fallback to append
140
+ sep = "\n\n" if not content.endswith("\n\n") else ""
141
+ content = f"{content}{sep}{edit.content_template}"
142
+ added = len(edit.content_template.splitlines())
143
+ else:
144
+ sep = "\n\n" if not content.endswith("\n\n") else ""
145
+ content = f"{content}{sep}{edit.content_template}"
146
+ added = len(edit.content_template.splitlines())
147
+
148
+ elif edit.edit_type == "full_replace":
149
+ # Replace entire document content
150
+ content = edit.content_template
151
+ added = len(edit.content_template.splitlines())
152
+
153
+ # Other edit types (replace_block) can be added as needed
154
+
155
+ return content, {"added_lines": added}
156
+
157
+
@@ -0,0 +1,67 @@
1
+ from __future__ import annotations
2
+
3
+ from langchain_openai.chat_models.base import BaseChatOpenAI
4
+
5
+ from bioguider.agents.common_conversation import CommonConversation
6
+
7
+
8
+ CLEANUP_PROMPT = """
9
+ You are "BioGuider," a precise editor for biomedical/bioinformatics documentation.
10
+
11
+ TASK
12
+ Given a documentation file (README, RMarkdown, or other), produce a corrected version that:
13
+ - Fixes typos, grammar, capitalization, and spacing
14
+ - Corrects malformed markdown (headers, lists, links, code fences)
15
+ - Repairs or normalizes link formatting; keep URLs absolute if present
16
+ - Removes duplicated sections or repeated content; consolidate if needed
17
+ - Preserves technical accuracy and biomedical domain terminology (do not invent features)
18
+ - Keeps tone neutral and professional; avoid marketing language
19
+ - Preserves all valid information; do not delete content unless it is a duplicate or malformed
20
+ - For RMarkdown files (.Rmd): Preserve YAML frontmatter, R code chunks, and existing structure exactly
21
+
22
+ CRITICAL REQUIREMENTS:
23
+ - Do NOT wrap the entire document in markdown code fences (```markdown). Return pure content only.
24
+ - If the document starts with ```markdown and ends with ```, remove these fences completely.
25
+ - Do NOT modify YAML frontmatter in RMarkdown files
26
+ - Do NOT modify R code chunks (```{r} blocks) in RMarkdown files
27
+ - Do NOT change the overall structure or organization of the document
28
+
29
+ ABSOLUTELY FORBIDDEN - REMOVE THESE COMPLETELY:
30
+ - Any summary sections, concluding statements, or notes at the end of documents
31
+ - Phrases like "Happy analyzing!", "Ensure all dependencies are up-to-date", "This concludes", "For more information"
32
+ - Any text that appears to be AI-generated summaries or conclusions
33
+ - Sentences starting with "Note:", "Remember:", "Important:", "Tip:", "Warning:" at the end
34
+ - Any text after the last substantive content section
35
+ - Phrases like "Happy coding!", "Good luck!", "Enjoy!", "Have fun!"
36
+ - Any concluding remarks, final thoughts, or wrap-up statements
37
+ - Text that sounds like AI-generated advice or encouragement
38
+
39
+ DOCUMENT ENDING RULES:
40
+ - The document must end naturally with the last substantive content section
41
+ - Do NOT add any concluding statements, summaries, or notes
42
+ - If the original document had a natural ending, preserve it exactly
43
+ - If AI-added content appears at the end, remove it completely
44
+
45
+ INPUT
46
+ <<DOCUMENT>>
47
+ {doc}
48
+ <</DOCUMENT>>
49
+
50
+ OUTPUT
51
+ Return ONLY the revised content (no commentary, no explanations, no code fences).
52
+ """
53
+
54
+
55
+ class LLMCleaner:
56
+ def __init__(self, llm: BaseChatOpenAI):
57
+ self.llm = llm
58
+
59
+ def clean_readme(self, content: str) -> tuple[str, dict]:
60
+ conv = CommonConversation(self.llm)
61
+ output, token_usage = conv.generate(
62
+ system_prompt=CLEANUP_PROMPT.format(doc=content[:30000]),
63
+ instruction_prompt="Provide the corrected documentation content only.",
64
+ )
65
+ return output.strip(), token_usage
66
+
67
+
@@ -0,0 +1,180 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Dict
4
+ import json
5
+ from langchain_openai.chat_models.base import BaseChatOpenAI
6
+
7
+ from bioguider.agents.common_conversation import CommonConversation
8
+ from .models import StyleProfile, SuggestionItem
9
+
10
+
11
+ LLM_SECTION_PROMPT = """
12
+ You are "BioGuider," a precise documentation generator for biomedical/bioinformatics software.
13
+
14
+ GOAL
15
+ Write or refine a single documentation section named "{section}". Follow the specific guidance from the evaluation report exactly.
16
+
17
+ INPUTS (use only what is provided; never invent)
18
+ - suggestion_category: {suggestion_category}
19
+ - anchor_title: {anchor_title}
20
+ - guidance: {guidance}
21
+ - evidence_from_evaluation: {evidence}
22
+ - repo_context_excerpt (analyze tone/formatting; do not paraphrase it blindly): <<{context}>>
23
+
24
+ CRITICAL REQUIREMENTS
25
+ - Follow the guidance EXACTLY as provided: {guidance}
26
+ - Address the specific suggestions from the evaluation report precisely
27
+ - Do not deviate from the guidance or add unrelated content
28
+ - If guidance mentions specific packages, requirements, or details, include them exactly
29
+ - For RMarkdown files (.Rmd), preserve the original structure including YAML frontmatter, code chunks, and existing headers
30
+ - NEVER generate generic placeholder content like "Clear 2–3 sentence summary" or "brief description"
31
+ - ABSOLUTELY FORBIDDEN: Do NOT add summary sections, notes, conclusions, or any text at the end of documents
32
+ - ABSOLUTELY FORBIDDEN: Do NOT wrap content in markdown code fences (```markdown). Return pure content only.
33
+ - ABSOLUTELY FORBIDDEN: Do NOT add phrases like "Happy analyzing!", "Ensure all dependencies are up-to-date", or any concluding statements
34
+ - ALWAYS use the specific guidance provided above to create concrete, actionable content
35
+
36
+ STYLE & CONSTRAINTS
37
+ - Fix obvious errors in the content.
38
+ - Preserve the existing tone and style markers: {tone_markers}
39
+ - Use heading style "{heading_style}" and list style "{list_style}"; link style "{link_style}".
40
+ - Neutral, professional tone; avoid marketing claims.
41
+ - Omit details you cannot substantiate from inputs/context; do not invent.
42
+ - Prefer bullets; keep it short and skimmable.
43
+ - Biomedical examples must avoid PHI; assume de-identified data.
44
+ - Output must be plain markdown for this section only, with no commentary and no backticks.
45
+ - Avoid duplication: if similar content exists in the repo context, rewrite succinctly instead of repeating.
46
+ - Never remove, alter, or recreate top-of-file badges/shields/logos (e.g., CI, PyPI, Conda, Docs shields). Assume they remain unchanged; do not output replacements for them.
47
+ - When targeting README content, do not rewrite the document title or header area; generate only the requested section body to be inserted below existing headers/badges.
48
+
49
+ SECTION GUIDELINES (follow guidance exactly)
50
+ - Dependencies: Include specific packages mentioned in guidance (e.g., "ggplot2", "dplyr", etc.)
51
+ - System Requirements: Include R version requirements and platform-specific instructions as mentioned in guidance
52
+ - Hardware Requirements: Include RAM/CPU recommendations as specified in guidance
53
+ - License: one sentence referencing the license and pointing to the LICENSE file.
54
+ - Install (clarify dependencies): Include compatibility details across operating systems and architectures as mentioned in guidance
55
+ - Tutorial improvements: Add specific examples, error handling, and reproducibility notes as mentioned in guidance
56
+ - User guide improvements: Enhance clarity, add missing information, and improve error handling as mentioned in guidance
57
+ - Conservative injection: For tutorial files (.Rmd), make minimal, targeted additions that preserve the original structure and flow. Add brief notes, small subsections, or contextual comments that enhance existing content without disrupting the tutorial's narrative.
58
+ - RMarkdown integration: When inserting content into existing RMarkdown tutorials, integrate naturally into the flow rather than creating standalone sections. Add brief explanatory text, code comments, or small subsections that enhance the existing content.
59
+ - RMarkdown format compliance: For .Rmd files, ensure content follows RMarkdown conventions:
60
+ * Use proper R code chunks with ```{{r chunk_name}} and ``` when adding code examples
61
+ * Maintain the tutorial's existing tone and context - content should feel like a natural continuation
62
+ * Avoid creating new major sections unless absolutely necessary
63
+ * Use inline R code with `{{r code_here}}` when appropriate
64
+ * Keep explanations concise and contextual to the tutorial's purpose
65
+ - Context awareness: Content should feel like a natural part of the existing tutorial, not a standalone addition. Reference the tutorial's specific context, datasets, and examples.
66
+ - If the section does not fit the above, produce content that directly addresses the guidance provided.
67
+
68
+ OUTPUT FORMAT
69
+ - Return only the section markdown (no code fences).
70
+ - Start with a level-2 header: "## {{anchor_title}}" unless the content already starts with a header.
71
+ - Ensure the content directly addresses: {{guidance}}
72
+ - DO NOT include generic instructions or placeholder text
73
+ - ONLY generate content that fulfills the specific guidance provided
74
+ """
75
+
76
+ LLM_FULLDOC_PROMPT = """
77
+ You are "BioGuider," a documentation rewriter.
78
+
79
+ GOAL
80
+ Rewrite a complete target document using only the provided evaluation report signals and the repository context excerpts. Output a full, ready-to-publish markdown file that is more complete and directly usable.
81
+
82
+ INPUTS (authoritative)
83
+ - evaluation_report (structured JSON excerpts): <<{evaluation_report}>>
84
+ - target_file: {target_file}
85
+ - repo_context_excerpt (do not copy blindly; use only to keep style/tone): <<{context}>>
86
+
87
+ STRICT CONSTRAINTS
88
+ - Base the content solely on the evaluation report. Do not invent features, data, or claims not supported by it.
89
+ - Prefer completeness and usability: produce the full file content, not just minimal "added" snippets.
90
+ - Preserve top-of-file badges/logos if they exist in the original; keep title and header area intact unless the report requires changes.
91
+ - CRITICAL: Preserve the original document structure, sections, and flow. Only enhance existing content and add missing information.
92
+ - For tutorial files (.Rmd), maintain all original sections (Docker, installation methods, etc.) while improving clarity and adding missing details.
93
+ - Fix obvious errors; improve structure and readability per report suggestions.
94
+ - Include ONLY sections specifically requested by the evaluation report - do not add unnecessary sections.
95
+ - Avoid redundancy: do not duplicate information across multiple sections.
96
+ - ABSOLUTELY FORBIDDEN: Do NOT add summary sections, notes, conclusions, or any text at the end of documents
97
+ - ABSOLUTELY FORBIDDEN: Do NOT wrap the entire document inside markdown code fences (```markdown). Do NOT start with ```markdown or end with ```. Return pure markdown content suitable for copy/paste.
98
+ - ABSOLUTELY FORBIDDEN: Do NOT add phrases like "Happy analyzing!" or any concluding statements
99
+ - Keep links well-formed; keep neutral, professional tone; concise, skimmable formatting.
100
+ - For RMarkdown files (.Rmd), preserve YAML frontmatter exactly and do not wrap content in code fences.
101
+
102
+ OUTPUT
103
+ - Return only the full markdown content for {target_file}. No commentary, no fences.
104
+ """
105
+
106
+ LLM_README_COMPREHENSIVE_PROMPT = """
107
+ You are "BioGuider," a comprehensive documentation rewriter specializing in README files.
108
+
109
+ GOAL
110
+ Create a complete, professional README.md that addresses all evaluation suggestions comprehensively. This is the main project documentation that users will see first.
111
+
112
+ INPUTS (authoritative)
113
+ - evaluation_report (structured JSON excerpts): <<{evaluation_report}>>
114
+ - target_file: {target_file}
115
+ - repo_context_excerpt (do not copy blindly; use only to keep style/tone): <<{context}>>
116
+
117
+ COMPREHENSIVE README REQUIREMENTS
118
+ - Create a complete README with all essential sections: Overview, Installation, Usage, Examples, Contributing, License
119
+ - Address ALL evaluation suggestions thoroughly and comprehensively
120
+ - Include detailed dependency information with installation commands
121
+ - Provide clear system requirements and compatibility information
122
+ - Add practical usage examples and code snippets
123
+ - Include troubleshooting section if needed
124
+ - Make it copy-paste ready for users
125
+ - Use professional, clear language suitable for biomedical researchers
126
+
127
+ STRICT CONSTRAINTS
128
+ - Base the content solely on the evaluation report. Do not invent features, data, or claims not supported by it.
129
+ - ABSOLUTELY FORBIDDEN: Do NOT wrap the entire document inside markdown code fences (```markdown). Return pure markdown content.
130
+ - ABSOLUTELY FORBIDDEN: Do NOT add summary sections, notes, conclusions, or any text at the end of documents
131
+ - Keep links well-formed; use neutral, professional tone; concise, skimmable formatting.
132
+
133
+ OUTPUT
134
+ - Return only the full README.md content. No commentary, no fences.
135
+ """
136
+
137
+
138
+ class LLMContentGenerator:
139
+ def __init__(self, llm: BaseChatOpenAI):
140
+ self.llm = llm
141
+
142
+ def generate_section(self, suggestion: SuggestionItem, style: StyleProfile, context: str = "") -> tuple[str, dict]:
143
+ conv = CommonConversation(self.llm)
144
+ section_name = suggestion.anchor_hint or suggestion.category.split(".")[-1].replace("_", " ").title()
145
+ system_prompt = LLM_SECTION_PROMPT.format(
146
+ tone_markers=", ".join(style.tone_markers or []),
147
+ heading_style=style.heading_style,
148
+ list_style=style.list_style,
149
+ link_style=style.link_style,
150
+ section=section_name,
151
+ anchor_title=section_name,
152
+ suggestion_category=suggestion.category,
153
+ evidence=(suggestion.source.get("evidence", "") if suggestion.source else ""),
154
+ context=context[:2500],
155
+ guidance=(suggestion.content_guidance or "").strip(),
156
+ )
157
+ content, token_usage = conv.generate(system_prompt=system_prompt, instruction_prompt="Write the section content now.")
158
+ return content.strip(), token_usage
159
+
160
+ def generate_full_document(self, target_file: str, evaluation_report: dict, context: str = "") -> tuple[str, dict]:
161
+ conv = CommonConversation(self.llm)
162
+
163
+ # Use comprehensive README prompt for README.md files
164
+ if target_file.endswith("README.md"):
165
+ system_prompt = LLM_README_COMPREHENSIVE_PROMPT.format(
166
+ target_file=target_file,
167
+ evaluation_report=json.dumps(evaluation_report)[:6000],
168
+ context=context[:4000],
169
+ )
170
+ else:
171
+ system_prompt = LLM_FULLDOC_PROMPT.format(
172
+ target_file=target_file,
173
+ evaluation_report=json.dumps(evaluation_report)[:6000],
174
+ context=context[:4000],
175
+ )
176
+
177
+ content, token_usage = conv.generate(system_prompt=system_prompt, instruction_prompt="Write the full document now.")
178
+ return content.strip(), token_usage
179
+
180
+
@@ -18,6 +18,10 @@ class EvaluationReport(BaseModel):
18
18
  userguide_evaluation: Optional[Dict[str, Any]] = None
19
19
  userguide_files: Optional[List[str]] = None
20
20
 
21
+ # Optional: tutorial evaluation content and any explicitly listed files
22
+ tutorial_evaluation: Optional[Dict[str, Any]] = None
23
+ tutorial_files: Optional[List[str]] = None
24
+
21
25
  submission_requirements_evaluation: Optional[Dict[str, Any]] = None
22
26
  submission_requirements_files: Optional[List[str]] = None
23
27
 
@@ -150,6 +150,12 @@ class EvaluationReportLoader:
150
150
  normalized["userguide_evaluation"] = userguide_eval["evaluation"]
151
151
  normalized["userguide_files"] = userguide_eval["files"]
152
152
 
153
+ # Tutorial evaluation handling
154
+ tutorial_eval = normalized.get("tutorial")
155
+ if tutorial_eval and isinstance(tutorial_eval.get("evaluation"), dict):
156
+ normalized["tutorial_evaluation"] = tutorial_eval["evaluation"]
157
+ normalized["tutorial_files"] = tutorial_eval["files"]
158
+
153
159
  # userguide_eval = normalized.get("userguide")
154
160
  # if isinstance(userguide_eval, str):
155
161
  # normalized["userguide_evaluation"] = self._parse_structured_block(userguide_eval["evaluation"], "structured_evaluation")