bioguider 0.2.30__tar.gz → 0.2.32__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of bioguider might be problematic. Click here for more details.

Files changed (81) hide show
  1. {bioguider-0.2.30 → bioguider-0.2.32}/PKG-INFO +1 -1
  2. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/agents/agent_utils.py +4 -1
  3. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/agents/evaluation_readme_task.py +4 -1
  4. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/generation/change_planner.py +9 -122
  5. bioguider-0.2.32/bioguider/generation/document_renderer.py +157 -0
  6. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/generation/llm_cleaner.py +16 -0
  7. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/generation/llm_content_generator.py +66 -9
  8. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/generation/suggestion_extractor.py +21 -45
  9. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/managers/evaluation_manager.py +0 -2
  10. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/managers/generation_manager.py +65 -29
  11. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/utils/code_structure_builder.py +7 -1
  12. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/utils/r_file_handler.py +6 -4
  13. {bioguider-0.2.30 → bioguider-0.2.32}/pyproject.toml +1 -1
  14. bioguider-0.2.30/bioguider/generation/document_renderer.py +0 -52
  15. {bioguider-0.2.30 → bioguider-0.2.32}/LICENSE +0 -0
  16. {bioguider-0.2.30 → bioguider-0.2.32}/README.md +0 -0
  17. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/__init__.py +0 -0
  18. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/agents/__init__.py +0 -0
  19. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/agents/agent_task.py +0 -0
  20. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/agents/agent_tools.py +0 -0
  21. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/agents/collection_execute_step.py +0 -0
  22. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/agents/collection_observe_step.py +0 -0
  23. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/agents/collection_plan_step.py +0 -0
  24. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/agents/collection_task.py +0 -0
  25. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/agents/collection_task_utils.py +0 -0
  26. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/agents/common_agent.py +0 -0
  27. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/agents/common_agent_2step.py +0 -0
  28. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/agents/common_conversation.py +0 -0
  29. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/agents/common_step.py +0 -0
  30. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/agents/consistency_collection_step.py +0 -0
  31. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/agents/consistency_evaluation_task.py +0 -0
  32. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/agents/consistency_evaluation_task_utils.py +0 -0
  33. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/agents/consistency_observe_step.py +0 -0
  34. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/agents/consistency_query_step.py +0 -0
  35. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/agents/dockergeneration_execute_step.py +0 -0
  36. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/agents/dockergeneration_observe_step.py +0 -0
  37. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/agents/dockergeneration_plan_step.py +0 -0
  38. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/agents/dockergeneration_task.py +0 -0
  39. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/agents/dockergeneration_task_utils.py +0 -0
  40. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/agents/evaluation_installation_task.py +0 -0
  41. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/agents/evaluation_submission_requirements_task.py +0 -0
  42. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/agents/evaluation_task.py +0 -0
  43. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/agents/evaluation_tutorial_task.py +0 -0
  44. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/agents/evaluation_tutorial_task_prompts.py +0 -0
  45. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/agents/evaluation_userguide_prompts.py +0 -0
  46. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/agents/evaluation_userguide_task.py +0 -0
  47. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/agents/identification_execute_step.py +0 -0
  48. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/agents/identification_observe_step.py +0 -0
  49. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/agents/identification_plan_step.py +0 -0
  50. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/agents/identification_task.py +0 -0
  51. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/agents/identification_task_utils.py +0 -0
  52. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/agents/peo_common_step.py +0 -0
  53. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/agents/prompt_utils.py +0 -0
  54. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/agents/python_ast_repl_tool.py +0 -0
  55. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/agents/rag_collection_task.py +0 -0
  56. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/conversation.py +0 -0
  57. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/database/code_structure_db.py +0 -0
  58. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/database/summarized_file_db.py +0 -0
  59. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/generation/__init__.py +0 -0
  60. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/generation/llm_injector.py +0 -0
  61. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/generation/models.py +0 -0
  62. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/generation/output_manager.py +0 -0
  63. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/generation/repo_reader.py +0 -0
  64. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/generation/report_loader.py +0 -0
  65. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/generation/style_analyzer.py +0 -0
  66. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/generation/test_metrics.py +0 -0
  67. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/managers/generation_test_manager.py +0 -0
  68. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/rag/__init__.py +0 -0
  69. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/rag/config.py +0 -0
  70. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/rag/data_pipeline.py +0 -0
  71. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/rag/embedder.py +0 -0
  72. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/rag/rag.py +0 -0
  73. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/settings.py +0 -0
  74. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/utils/constants.py +0 -0
  75. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/utils/default.gitignore +0 -0
  76. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/utils/file_utils.py +0 -0
  77. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/utils/gitignore_checker.py +0 -0
  78. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/utils/notebook_utils.py +0 -0
  79. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/utils/pyphen_utils.py +0 -0
  80. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/utils/python_file_handler.py +0 -0
  81. {bioguider-0.2.30 → bioguider-0.2.32}/bioguider/utils/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: bioguider
3
- Version: 0.2.30
3
+ Version: 0.2.32
4
4
  Summary: An AI-Powered package to help biomedical developers to generate clear documentation
5
5
  License: MIT
6
6
  Author: Cankun Wang
@@ -27,6 +27,7 @@ from bioguider.utils.utils import clean_action_input
27
27
  from ..utils.gitignore_checker import GitignoreChecker
28
28
  from ..database.summarized_file_db import SummarizedFilesDb
29
29
  from bioguider.agents.common_conversation import CommonConversation
30
+ from bioguider.rag.config import configs
30
31
 
31
32
  logger = logging.getLogger(__name__)
32
33
 
@@ -153,7 +154,9 @@ def read_directory(
153
154
  return None
154
155
  gitignore_checker = GitignoreChecker(
155
156
  directory=dir_path,
156
- gitignore_path=gitignore_path
157
+ gitignore_path=gitignore_path,
158
+ exclude_dir_patterns=configs["file_filters"]["excluded_dirs"],
159
+ exclude_file_patterns=configs["file_filters"]["excluded_files"],
157
160
  )
158
161
  files = gitignore_checker.check_files_and_folders(level=level)
159
162
  return files
@@ -28,6 +28,7 @@ from bioguider.utils.constants import (
28
28
  EvaluationREADMEResult,
29
29
  )
30
30
  from bioguider.utils.utils import increase_token_usage
31
+ from bioguider.rag.config import configs
31
32
 
32
33
  logger = logging.getLogger(__name__)
33
34
 
@@ -638,7 +639,9 @@ class EvaluationREADMETask(EvaluationTask):
638
639
  repo_path = self.repo_path
639
640
  gitignore_path = Path(repo_path, ".gitignore")
640
641
  gitignore_checker = GitignoreChecker(
641
- directory=repo_path, gitignore_path=gitignore_path
642
+ directory=repo_path, gitignore_path=gitignore_path,
643
+ exclude_dir_patterns=configs["file_filters"]["excluded_dirs"],
644
+ exclude_file_patterns=configs["file_filters"]["excluded_files"],
642
645
  )
643
646
  found_readme_files = gitignore_checker.check_files_and_folders(
644
647
  check_file_cb=lambda root_dir, relative_path: Path(relative_path).name.lower() in possible_readme_files,
@@ -162,128 +162,15 @@ class ChangePlanner:
162
162
  rationale=s.source.get("evidence", ""),
163
163
  suggestion_id=s.id,
164
164
  ))
165
- elif s.action == "improve_readability":
166
- # Handle readability improvements
167
- header_key = (target, (s.anchor_hint or "Introduction").strip().lower())
168
- if header_key in seen_headers:
169
- continue
170
- planned.append(PlannedEdit(
171
- file_path=target,
172
- edit_type="append_section",
173
- anchor={"type": "header", "value": s.anchor_hint or "Introduction"},
174
- content_template="", # Will be filled by LLM generation
175
- rationale=s.source.get("evidence", ""),
176
- suggestion_id=s.id,
177
- ))
178
- seen_headers.add(header_key)
179
- elif s.action == "improve_setup":
180
- # Handle setup improvements
181
- planned.append(PlannedEdit(
182
- file_path=target,
183
- edit_type="append_section",
184
- anchor={"type": "header", "value": s.anchor_hint or "Setup"},
185
- content_template="", # Will be filled by LLM generation
186
- rationale=s.source.get("evidence", ""),
187
- suggestion_id=s.id,
188
- ))
189
- elif s.action == "improve_reproducibility":
190
- # Handle reproducibility improvements
191
- planned.append(PlannedEdit(
192
- file_path=target,
193
- edit_type="append_section",
194
- anchor={"type": "header", "value": s.anchor_hint or "Setup"},
195
- content_template="", # Will be filled by LLM generation
196
- rationale=s.source.get("evidence", ""),
197
- suggestion_id=s.id,
198
- ))
199
- elif s.action == "improve_structure":
200
- # Handle structure improvements
201
- planned.append(PlannedEdit(
202
- file_path=target,
203
- edit_type="append_section",
204
- anchor={"type": "header", "value": s.anchor_hint or "Introduction"},
205
- content_template="", # Will be filled by LLM generation
206
- rationale=s.source.get("evidence", ""),
207
- suggestion_id=s.id,
208
- ))
209
- elif s.action == "improve_code_quality":
210
- # Handle code quality improvements
211
- planned.append(PlannedEdit(
212
- file_path=target,
213
- edit_type="append_section",
214
- anchor={"type": "header", "value": s.anchor_hint or "Code Examples"},
215
- content_template="", # Will be filled by LLM generation
216
- rationale=s.source.get("evidence", ""),
217
- suggestion_id=s.id,
218
- ))
219
- elif s.action == "improve_verification":
220
- # Handle verification improvements
221
- planned.append(PlannedEdit(
222
- file_path=target,
223
- edit_type="append_section",
224
- anchor={"type": "header", "value": s.anchor_hint or "Results"},
225
- content_template="", # Will be filled by LLM generation
226
- rationale=s.source.get("evidence", ""),
227
- suggestion_id=s.id,
228
- ))
229
- elif s.action == "improve_performance":
230
- # Handle performance improvements
231
- planned.append(PlannedEdit(
232
- file_path=target,
233
- edit_type="append_section",
234
- anchor={"type": "header", "value": s.anchor_hint or "Performance"},
235
- content_template="", # Will be filled by LLM generation
236
- rationale=s.source.get("evidence", ""),
237
- suggestion_id=s.id,
238
- ))
239
- elif s.action == "improve_context":
240
- # Handle context improvements for userguides
241
- header_key = (target, (s.anchor_hint or "Introduction").strip().lower())
242
- if header_key in seen_headers:
243
- continue
244
- planned.append(PlannedEdit(
245
- file_path=target,
246
- edit_type="append_section",
247
- anchor={"type": "header", "value": s.anchor_hint or "Introduction"},
248
- content_template="", # Will be filled by LLM generation
249
- rationale=s.source.get("evidence", ""),
250
- suggestion_id=s.id,
251
- ))
252
- seen_headers.add(header_key)
253
- elif s.action == "improve_error_handling":
254
- # Handle error handling improvements for userguides
255
- header_key = (target, (s.anchor_hint or "Examples").strip().lower())
256
- if header_key in seen_headers:
257
- continue
258
- planned.append(PlannedEdit(
259
- file_path=target,
260
- edit_type="append_section",
261
- anchor={"type": "header", "value": s.anchor_hint or "Examples"},
262
- content_template="", # Will be filled by LLM generation
263
- rationale=s.source.get("evidence", ""),
264
- suggestion_id=s.id,
265
- ))
266
- seen_headers.add(header_key)
267
- elif s.action == "add_overview_section":
268
- # Handle overview section for README
269
- planned.append(PlannedEdit(
270
- file_path=target,
271
- edit_type="append_section",
272
- anchor={"type": "header", "value": s.anchor_hint or "Overview"},
273
- content_template="", # Will be filled by LLM generation
274
- rationale=s.source.get("evidence", ""),
275
- suggestion_id=s.id,
276
- ))
277
- elif s.action == "full_replace":
278
- # Handle full document replacement
279
- planned.append(PlannedEdit(
280
- file_path=target,
281
- edit_type="full_replace",
282
- anchor={"type": "document", "value": "full_document"},
283
- content_template="", # Will be filled by LLM generation
284
- rationale=s.source.get("evidence", ""),
285
- suggestion_id=s.id,
286
- ))
165
+ # All actions now use full_replace mode
166
+ planned.append(PlannedEdit(
167
+ file_path=target,
168
+ edit_type="full_replace",
169
+ anchor={"type": "document", "value": "full_document"},
170
+ content_template="", # Will be filled by LLM generation
171
+ rationale=s.source.get("evidence", ""),
172
+ suggestion_id=s.id,
173
+ ))
287
174
 
288
175
  # If a file is planned for full_replace, suppress other edits for that file to avoid redundancy
289
176
  by_file: Dict[str, List[PlannedEdit]] = {}
@@ -0,0 +1,157 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Tuple
4
+
5
+ from .models import PlannedEdit
6
+
7
+
8
+ class DocumentRenderer:
9
+ def apply_edit(self, original: str, edit: PlannedEdit) -> Tuple[str, dict]:
10
+ content = original
11
+ added = 0
12
+
13
+ if edit.edit_type == "append_section":
14
+ # Avoid duplicate header if the same header already exists
15
+ header_line = None
16
+ if edit.content_template.lstrip().startswith("#"):
17
+ header_line = edit.content_template.strip().splitlines()[0].strip()
18
+ if header_line and header_line in content:
19
+ return content, {"added_lines": 0}
20
+ # Append with two leading newlines if needed
21
+ sep = "\n\n" if not content.endswith("\n\n") else ""
22
+ content = f"{content}{sep}{edit.content_template}"
23
+ added = len(edit.content_template.splitlines())
24
+
25
+ elif edit.edit_type == "replace_intro_block":
26
+ # Replace content from start to first level-2 header (##) with new intro
27
+ lines = content.splitlines()
28
+ end_idx = None
29
+ for i, ln in enumerate(lines):
30
+ if ln.strip().startswith("## "):
31
+ end_idx = i
32
+ break
33
+ if end_idx is None:
34
+ # No H2 header found; replace entire content
35
+ new_content = edit.content_template
36
+ else:
37
+ head = lines[:0]
38
+ tail = lines[end_idx:]
39
+ new_content = edit.content_template.rstrip() + "\n\n" + "\n".join(tail)
40
+ added = len(edit.content_template.splitlines())
41
+ content = new_content
42
+
43
+ elif edit.edit_type == "insert_after_header":
44
+ # Insert content after a specific header, but integrate naturally
45
+ header_value = edit.anchor.get("value", "")
46
+ if header_value:
47
+ lines = content.splitlines()
48
+ insert_idx = None
49
+ for i, line in enumerate(lines):
50
+ if line.strip().startswith("#") and header_value.lower() in line.lower():
51
+ # Find a good insertion point after the header and its immediate content
52
+ insert_idx = i + 1
53
+ # Skip empty lines and find the first substantial content
54
+ while insert_idx < len(lines) and lines[insert_idx].strip() == "":
55
+ insert_idx += 1
56
+ # Insert after the first code block or paragraph, but before next major section
57
+ while insert_idx < len(lines):
58
+ line_content = lines[insert_idx].strip()
59
+ if line_content.startswith("#") and not line_content.startswith("###"):
60
+ break
61
+ if line_content.startswith("```") and insert_idx > 0:
62
+ # Found end of code block, insert after it
63
+ insert_idx += 1
64
+ break
65
+ insert_idx += 1
66
+ break
67
+
68
+ if insert_idx is not None:
69
+ # Insert the new content with minimal formatting
70
+ new_content_lines = edit.content_template.splitlines()
71
+ # Remove standalone headers to avoid creating new major sections
72
+ filtered_lines = []
73
+ for line in new_content_lines:
74
+ if line.strip().startswith("## ") and len(line.strip()) < 50:
75
+ # Convert major headers to minor explanations
76
+ header_text = line.strip()[3:].strip()
77
+ filtered_lines.append(f"\n**Note:** {header_text.lower()}")
78
+ else:
79
+ filtered_lines.append(line)
80
+
81
+ # Insert with minimal spacing
82
+ new_lines = lines[:insert_idx] + [""] + filtered_lines + lines[insert_idx:]
83
+ content = "\n".join(new_lines)
84
+ added = len(filtered_lines)
85
+ else:
86
+ # Header not found, append at end
87
+ sep = "\n\n" if not content.endswith("\n\n") else ""
88
+ content = f"{content}{sep}{edit.content_template}"
89
+ added = len(edit.content_template.splitlines())
90
+ else:
91
+ # No header specified, append at end
92
+ sep = "\n\n" if not content.endswith("\n\n") else ""
93
+ content = f"{content}{sep}{edit.content_template}"
94
+ added = len(edit.content_template.splitlines())
95
+
96
+ elif edit.edit_type == "rmarkdown_integration":
97
+ # Special handling for RMarkdown files - integrate content naturally
98
+ header_value = edit.anchor.get("value", "")
99
+ if header_value:
100
+ lines = content.splitlines()
101
+ insert_idx = None
102
+ for i, line in enumerate(lines):
103
+ if line.strip().startswith("#") and header_value.lower() in line.lower():
104
+ # Find insertion point after the first code block in this section
105
+ insert_idx = i + 1
106
+ while insert_idx < len(lines):
107
+ line_content = lines[insert_idx].strip()
108
+ if line_content.startswith("```") and insert_idx > 0:
109
+ # Found code block, insert after it
110
+ insert_idx += 1
111
+ break
112
+ if line_content.startswith("#") and not line_content.startswith("###"):
113
+ # Next major section, insert before it
114
+ break
115
+ insert_idx += 1
116
+ break
117
+
118
+ if insert_idx is not None:
119
+ # Process content to be more contextual
120
+ new_content_lines = edit.content_template.splitlines()
121
+ contextual_lines = []
122
+
123
+ for line in new_content_lines:
124
+ # Convert standalone sections to contextual notes
125
+ if line.strip().startswith("## "):
126
+ header_text = line.strip()[3:].strip()
127
+ contextual_lines.append(f"\n**Note:** For this tutorial, {header_text.lower()}")
128
+ elif line.strip().startswith("# "):
129
+ header_text = line.strip()[2:].strip()
130
+ contextual_lines.append(f"\n**Important:** {header_text.lower()}")
131
+ else:
132
+ contextual_lines.append(line)
133
+
134
+ # Insert with minimal disruption
135
+ new_lines = lines[:insert_idx] + [""] + contextual_lines + lines[insert_idx:]
136
+ content = "\n".join(new_lines)
137
+ added = len(contextual_lines)
138
+ else:
139
+ # Fallback to append
140
+ sep = "\n\n" if not content.endswith("\n\n") else ""
141
+ content = f"{content}{sep}{edit.content_template}"
142
+ added = len(edit.content_template.splitlines())
143
+ else:
144
+ sep = "\n\n" if not content.endswith("\n\n") else ""
145
+ content = f"{content}{sep}{edit.content_template}"
146
+ added = len(edit.content_template.splitlines())
147
+
148
+ elif edit.edit_type == "full_replace":
149
+ # Replace entire document content
150
+ content = edit.content_template
151
+ added = len(edit.content_template.splitlines())
152
+
153
+ # Other edit types (replace_block) can be added as needed
154
+
155
+ return content, {"added_lines": added}
156
+
157
+
@@ -26,6 +26,22 @@ CRITICAL REQUIREMENTS:
26
26
  - Do NOT modify R code chunks (```{r} blocks) in RMarkdown files
27
27
  - Do NOT change the overall structure or organization of the document
28
28
 
29
+ ABSOLUTELY FORBIDDEN - REMOVE THESE COMPLETELY:
30
+ - Any summary sections, concluding statements, or notes at the end of documents
31
+ - Phrases like "Happy analyzing!", "Ensure all dependencies are up-to-date", "This concludes", "For more information"
32
+ - Any text that appears to be AI-generated summaries or conclusions
33
+ - Sentences starting with "Note:", "Remember:", "Important:", "Tip:", "Warning:" at the end
34
+ - Any text after the last substantive content section
35
+ - Phrases like "Happy coding!", "Good luck!", "Enjoy!", "Have fun!"
36
+ - Any concluding remarks, final thoughts, or wrap-up statements
37
+ - Text that sounds like AI-generated advice or encouragement
38
+
39
+ DOCUMENT ENDING RULES:
40
+ - The document must end naturally with the last substantive content section
41
+ - Do NOT add any concluding statements, summaries, or notes
42
+ - If the original document had a natural ending, preserve it exactly
43
+ - If AI-added content appears at the end, remove it completely
44
+
29
45
  INPUT
30
46
  <<DOCUMENT>>
31
47
  {doc}
@@ -28,6 +28,9 @@ CRITICAL REQUIREMENTS
28
28
  - If guidance mentions specific packages, requirements, or details, include them exactly
29
29
  - For RMarkdown files (.Rmd), preserve the original structure including YAML frontmatter, code chunks, and existing headers
30
30
  - NEVER generate generic placeholder content like "Clear 2–3 sentence summary" or "brief description"
31
+ - ABSOLUTELY FORBIDDEN: Do NOT add summary sections, notes, conclusions, or any text at the end of documents
32
+ - ABSOLUTELY FORBIDDEN: Do NOT wrap content in markdown code fences (```markdown). Return pure content only.
33
+ - ABSOLUTELY FORBIDDEN: Do NOT add phrases like "Happy analyzing!", "Ensure all dependencies are up-to-date", or any concluding statements
31
34
  - ALWAYS use the specific guidance provided above to create concrete, actionable content
32
35
 
33
36
  STYLE & CONSTRAINTS
@@ -51,18 +54,27 @@ SECTION GUIDELINES (follow guidance exactly)
51
54
  - Install (clarify dependencies): Include compatibility details across operating systems and architectures as mentioned in guidance
52
55
  - Tutorial improvements: Add specific examples, error handling, and reproducibility notes as mentioned in guidance
53
56
  - User guide improvements: Enhance clarity, add missing information, and improve error handling as mentioned in guidance
57
+ - Conservative injection: For tutorial files (.Rmd), make minimal, targeted additions that preserve the original structure and flow. Add brief notes, small subsections, or contextual comments that enhance existing content without disrupting the tutorial's narrative.
58
+ - RMarkdown integration: When inserting content into existing RMarkdown tutorials, integrate naturally into the flow rather than creating standalone sections. Add brief explanatory text, code comments, or small subsections that enhance the existing content.
59
+ - RMarkdown format compliance: For .Rmd files, ensure content follows RMarkdown conventions:
60
+ * Use proper R code chunks with ```{{r chunk_name}} and ``` when adding code examples
61
+ * Maintain the tutorial's existing tone and context - content should feel like a natural continuation
62
+ * Avoid creating new major sections unless absolutely necessary
63
+ * Use inline R code with `{{r code_here}}` when appropriate
64
+ * Keep explanations concise and contextual to the tutorial's purpose
65
+ - Context awareness: Content should feel like a natural part of the existing tutorial, not a standalone addition. Reference the tutorial's specific context, datasets, and examples.
54
66
  - If the section does not fit the above, produce content that directly addresses the guidance provided.
55
67
 
56
68
  OUTPUT FORMAT
57
69
  - Return only the section markdown (no code fences).
58
- - Start with a level-2 header: "## {anchor_title}" unless the content already starts with a header.
59
- - Ensure the content directly addresses: {guidance}
70
+ - Start with a level-2 header: "## {{anchor_title}}" unless the content already starts with a header.
71
+ - Ensure the content directly addresses: {{guidance}}
60
72
  - DO NOT include generic instructions or placeholder text
61
73
  - ONLY generate content that fulfills the specific guidance provided
62
74
  """
63
75
 
64
76
  LLM_FULLDOC_PROMPT = """
65
- You are BioGuider,” a documentation rewriter.
77
+ You are "BioGuider," a documentation rewriter.
66
78
 
67
79
  GOAL
68
80
  Rewrite a complete target document using only the provided evaluation report signals and the repository context excerpts. Output a full, ready-to-publish markdown file that is more complete and directly usable.
@@ -76,17 +88,52 @@ STRICT CONSTRAINTS
76
88
  - Base the content solely on the evaluation report. Do not invent features, data, or claims not supported by it.
77
89
  - Prefer completeness and usability: produce the full file content, not just minimal "added" snippets.
78
90
  - Preserve top-of-file badges/logos if they exist in the original; keep title and header area intact unless the report requires changes.
91
+ - CRITICAL: Preserve the original document structure, sections, and flow. Only enhance existing content and add missing information.
92
+ - For tutorial files (.Rmd), maintain all original sections (Docker, installation methods, etc.) while improving clarity and adding missing details.
79
93
  - Fix obvious errors; improve structure and readability per report suggestions.
80
94
  - Include ONLY sections specifically requested by the evaluation report - do not add unnecessary sections.
81
95
  - Avoid redundancy: do not duplicate information across multiple sections.
96
+ - ABSOLUTELY FORBIDDEN: Do NOT add summary sections, notes, conclusions, or any text at the end of documents
97
+ - ABSOLUTELY FORBIDDEN: Do NOT wrap the entire document inside markdown code fences (```markdown). Do NOT start with ```markdown or end with ```. Return pure markdown content suitable for copy/paste.
98
+ - ABSOLUTELY FORBIDDEN: Do NOT add phrases like "Happy analyzing!" or any concluding statements
82
99
  - Keep links well-formed; keep neutral, professional tone; concise, skimmable formatting.
83
- - CRITICAL: Do NOT wrap the entire document inside markdown code fences (```markdown). Do NOT start with ```markdown or end with ```. Return pure markdown content suitable for copy/paste.
84
100
  - For RMarkdown files (.Rmd), preserve YAML frontmatter exactly and do not wrap content in code fences.
85
101
 
86
102
  OUTPUT
87
103
  - Return only the full markdown content for {target_file}. No commentary, no fences.
88
104
  """
89
105
 
106
+ LLM_README_COMPREHENSIVE_PROMPT = """
107
+ You are "BioGuider," a comprehensive documentation rewriter specializing in README files.
108
+
109
+ GOAL
110
+ Create a complete, professional README.md that addresses all evaluation suggestions comprehensively. This is the main project documentation that users will see first.
111
+
112
+ INPUTS (authoritative)
113
+ - evaluation_report (structured JSON excerpts): <<{evaluation_report}>>
114
+ - target_file: {target_file}
115
+ - repo_context_excerpt (do not copy blindly; use only to keep style/tone): <<{context}>>
116
+
117
+ COMPREHENSIVE README REQUIREMENTS
118
+ - Create a complete README with all essential sections: Overview, Installation, Usage, Examples, Contributing, License
119
+ - Address ALL evaluation suggestions thoroughly and comprehensively
120
+ - Include detailed dependency information with installation commands
121
+ - Provide clear system requirements and compatibility information
122
+ - Add practical usage examples and code snippets
123
+ - Include troubleshooting section if needed
124
+ - Make it copy-paste ready for users
125
+ - Use professional, clear language suitable for biomedical researchers
126
+
127
+ STRICT CONSTRAINTS
128
+ - Base the content solely on the evaluation report. Do not invent features, data, or claims not supported by it.
129
+ - ABSOLUTELY FORBIDDEN: Do NOT wrap the entire document inside markdown code fences (```markdown). Return pure markdown content.
130
+ - ABSOLUTELY FORBIDDEN: Do NOT add summary sections, notes, conclusions, or any text at the end of documents
131
+ - Keep links well-formed; use neutral, professional tone; concise, skimmable formatting.
132
+
133
+ OUTPUT
134
+ - Return only the full README.md content. No commentary, no fences.
135
+ """
136
+
90
137
 
91
138
  class LLMContentGenerator:
92
139
  def __init__(self, llm: BaseChatOpenAI):
@@ -112,11 +159,21 @@ class LLMContentGenerator:
112
159
 
113
160
  def generate_full_document(self, target_file: str, evaluation_report: dict, context: str = "") -> tuple[str, dict]:
114
161
  conv = CommonConversation(self.llm)
115
- system_prompt = LLM_FULLDOC_PROMPT.format(
116
- target_file=target_file,
117
- evaluation_report=json.dumps(evaluation_report)[:6000],
118
- context=context[:4000],
119
- )
162
+
163
+ # Use comprehensive README prompt for README.md files
164
+ if target_file.endswith("README.md"):
165
+ system_prompt = LLM_README_COMPREHENSIVE_PROMPT.format(
166
+ target_file=target_file,
167
+ evaluation_report=json.dumps(evaluation_report)[:6000],
168
+ context=context[:4000],
169
+ )
170
+ else:
171
+ system_prompt = LLM_FULLDOC_PROMPT.format(
172
+ target_file=target_file,
173
+ evaluation_report=json.dumps(evaluation_report)[:6000],
174
+ context=context[:4000],
175
+ )
176
+
120
177
  content, token_usage = conv.generate(system_prompt=system_prompt, instruction_prompt="Write the full document now.")
121
178
  return content.strip(), token_usage
122
179
 
@@ -53,7 +53,7 @@ class SuggestionExtractor:
53
53
  severity="should_fix",
54
54
  source={"section": "readme", "field": "project_purpose_suggestions", "evidence": purpose_suggestions, "score": purpose_score},
55
55
  target_files=[file_name],
56
- action="add_overview_section",
56
+ action="full_replace",
57
57
  anchor_hint="Overview",
58
58
  content_guidance=purpose_suggestions,
59
59
  ))
@@ -68,7 +68,7 @@ class SuggestionExtractor:
68
68
  severity="should_fix",
69
69
  source={"section": "readme", "field": "readability_suggestions", "evidence": readability_suggestions, "score": readability_score},
70
70
  target_files=[file_name],
71
- action="improve_readability",
71
+ action="full_replace",
72
72
  anchor_hint="Introduction",
73
73
  content_guidance=readability_suggestions,
74
74
  ))
@@ -195,12 +195,14 @@ class SuggestionExtractor:
195
195
  if isinstance(report.installation_evaluation, dict):
196
196
  structured = report.installation_evaluation.get("structured_evaluation")
197
197
  if structured:
198
- # If installation has deficits, full replace installation docs listed in installation_files
199
- overall = structured.get("overall_score")
200
- hw = structured.get("hardware_requirements")
201
- compat = structured.get("compatible_os")
198
+ # Use full_replace mode for all installation files
202
199
  dep_sugg = structured.get("dependency_suggestions")
203
- if overall in ("Poor", "Fair") or hw is False or compat is False or dep_sugg:
200
+ hw_req = structured.get("hardware_requirements")
201
+ compat_os = structured.get("compatible_os")
202
+ overall = structured.get("overall_score")
203
+
204
+ # Trigger full_replace for all installation files when needed
205
+ if overall in ("Poor", "Fair") or hw_req is False or compat_os is False or dep_sugg:
204
206
  for target in report.installation_files or []:
205
207
  suggestions.append(SuggestionItem(
206
208
  id=f"install-full-replace-{target}",
@@ -210,33 +212,7 @@ class SuggestionExtractor:
210
212
  target_files=[target],
211
213
  action="full_replace",
212
214
  anchor_hint=None,
213
- content_guidance="Rewrite installation doc based on evaluation (dependencies, OS, hardware).",
214
- ))
215
- dep_sugg = structured.get("dependency_suggestions")
216
- if dep_sugg: # Prioritize specific suggestions
217
- for target in report.installation_files or []:
218
- suggestions.append(SuggestionItem(
219
- id=f"install-dep-clarify-{target}",
220
- category="installation.dependencies",
221
- severity="should_fix",
222
- source={"section": "installation", "field": "dependency_suggestions", "evidence": str(dep_sugg)},
223
- target_files=[target],
224
- action="clarify_mandatory_vs_optional",
225
- anchor_hint="Dependencies",
226
- content_guidance=str(dep_sugg),
227
- ))
228
- hw_score = structured.get("hardware_requirements")
229
- if hw_score is False:
230
- for target in report.installation_files or []:
231
- suggestions.append(SuggestionItem(
232
- id=f"install-hw-req-{target}",
233
- category="installation.hardware",
234
- severity="should_fix",
235
- source={"section": "installation", "field": "hardware_requirements", "score": "Poor", "evidence": "Hardware requirements not specified"},
236
- target_files=[target],
237
- action="add_hardware_requirements",
238
- anchor_hint="Hardware Requirements",
239
- content_guidance="Add concise RAM/CPU recommendation as per report guidance.",
215
+ content_guidance="Comprehensive rewrite preserving original structure while adding improved dependencies, hardware requirements, and installation instructions.",
240
216
  ))
241
217
 
242
218
  # Submission requirements could drive expected output/dataset sections; use only if in files list
@@ -261,7 +237,7 @@ class SuggestionExtractor:
261
237
  severity="should_fix",
262
238
  source={"section": "userguide", "field": "readability_suggestions", "evidence": suggestion, "score": readability_score},
263
239
  target_files=[file_name],
264
- action="improve_readability",
240
+ action="full_replace",
265
241
  anchor_hint=f"Readability-{i+1}",
266
242
  content_guidance=suggestion,
267
243
  ))
@@ -278,7 +254,7 @@ class SuggestionExtractor:
278
254
  severity="should_fix",
279
255
  source={"section": "userguide", "field": "context_and_purpose_suggestions", "evidence": suggestion, "score": context_score},
280
256
  target_files=[file_name],
281
- action="improve_context",
257
+ action="full_replace",
282
258
  anchor_hint=f"Context-{i+1}",
283
259
  content_guidance=suggestion,
284
260
  ))
@@ -295,7 +271,7 @@ class SuggestionExtractor:
295
271
  severity="should_fix",
296
272
  source={"section": "userguide", "field": "error_handling_suggestions", "evidence": suggestion, "score": error_score},
297
273
  target_files=[file_name],
298
- action="improve_error_handling",
274
+ action="full_replace",
299
275
  anchor_hint=f"Error-Handling-{i+1}",
300
276
  content_guidance=suggestion,
301
277
  ))
@@ -310,7 +286,7 @@ class SuggestionExtractor:
310
286
  severity="should_fix",
311
287
  source={"section": "userguide", "field": "consistency", "evidence": f"score={score}"},
312
288
  target_files=[file_name],
313
- action="improve_consistency",
289
+ action="full_replace",
314
290
  anchor_hint="Examples",
315
291
  content_guidance="Improve consistency in examples, terminology, and formatting based on evaluation report.",
316
292
  ))
@@ -335,7 +311,7 @@ class SuggestionExtractor:
335
311
  severity="should_fix",
336
312
  source={"section": "tutorial", "field": "readability_suggestions", "evidence": suggestion, "score": readability_score},
337
313
  target_files=[file_name],
338
- action="improve_readability",
314
+ action="full_replace",
339
315
  anchor_hint="Introduction",
340
316
  content_guidance=suggestion,
341
317
  ))
@@ -352,7 +328,7 @@ class SuggestionExtractor:
352
328
  severity="should_fix",
353
329
  source={"section": "tutorial", "field": "setup_and_dependencies_suggestions", "evidence": suggestion, "score": setup_score},
354
330
  target_files=[file_name],
355
- action="improve_setup",
331
+ action="full_replace",
356
332
  anchor_hint="Setup",
357
333
  content_guidance=suggestion,
358
334
  ))
@@ -369,7 +345,7 @@ class SuggestionExtractor:
369
345
  severity="should_fix",
370
346
  source={"section": "tutorial", "field": "reproducibility_suggestions", "evidence": suggestion, "score": reproducibility_score},
371
347
  target_files=[file_name],
372
- action="improve_reproducibility",
348
+ action="full_replace",
373
349
  anchor_hint="Setup",
374
350
  content_guidance=suggestion,
375
351
  ))
@@ -386,7 +362,7 @@ class SuggestionExtractor:
386
362
  severity="should_fix",
387
363
  source={"section": "tutorial", "field": "structure_and_navigation_suggestions", "evidence": suggestion, "score": structure_score},
388
364
  target_files=[file_name],
389
- action="improve_structure",
365
+ action="full_replace",
390
366
  anchor_hint="Introduction",
391
367
  content_guidance=suggestion,
392
368
  ))
@@ -403,7 +379,7 @@ class SuggestionExtractor:
403
379
  severity="should_fix",
404
380
  source={"section": "tutorial", "field": "executable_code_quality_suggestions", "evidence": suggestion, "score": code_score},
405
381
  target_files=[file_name],
406
- action="improve_code_quality",
382
+ action="full_replace",
407
383
  anchor_hint="Code Examples",
408
384
  content_guidance=suggestion,
409
385
  ))
@@ -420,7 +396,7 @@ class SuggestionExtractor:
420
396
  severity="should_fix",
421
397
  source={"section": "tutorial", "field": "result_verification_suggestions", "evidence": suggestion, "score": verification_score},
422
398
  target_files=[file_name],
423
- action="improve_verification",
399
+ action="full_replace",
424
400
  anchor_hint="Results",
425
401
  content_guidance=suggestion,
426
402
  ))
@@ -437,7 +413,7 @@ class SuggestionExtractor:
437
413
  severity="should_fix",
438
414
  source={"section": "tutorial", "field": "performance_and_resource_notes_suggestions", "evidence": suggestion, "score": performance_score},
439
415
  target_files=[file_name],
440
- action="improve_performance",
416
+ action="full_replace",
441
417
  anchor_hint="Performance",
442
418
  content_guidance=suggestion,
443
419
  ))
@@ -3,10 +3,8 @@ from pathlib import Path
3
3
 
4
4
  from bioguider.agents.evaluation_tutorial_task import EvaluationTutorialTask
5
5
  from bioguider.agents.evaluation_userguide_task import EvaluationUserGuideTask
6
- from bioguider.agents.prompt_utils import CollectionGoalItemEnum
7
6
  from bioguider.database.code_structure_db import CodeStructureDb
8
7
  from bioguider.utils.constants import ProjectMetadata
9
- from bioguider.utils.gitignore_checker import GitignoreChecker
10
8
 
11
9
  from ..agents.identification_task import IdentificationTask
12
10
  from ..rag.rag import RAG
@@ -186,13 +186,7 @@ class DocumentationGenerationManager:
186
186
  if isinstance(cleaned, str) and cleaned.strip():
187
187
  content = cleaned
188
188
 
189
- # Additional post-processing: remove markdown code fences if present
190
- if content.startswith("```markdown") and content.endswith("```"):
191
- # Remove the opening and closing fences
192
- content = content[11:] # Remove ```markdown
193
- if content.endswith("```"):
194
- content = content[:-3] # Remove closing ```
195
- content = content.strip()
189
+ # LLM cleaner now handles markdown fences and unwanted summaries
196
190
 
197
191
  except Exception:
198
192
  pass
@@ -312,7 +306,7 @@ class DocumentationGenerationManager:
312
306
 
313
307
  # Calculate success rate based on processed suggestions only
314
308
  processed_suggestions_count = len([s for s in suggestions if s.source and s.source.get("score", "") in ("Fair", "Poor")])
315
- fixed_suggestions = len(processed_suggestions)
309
+ fixed_suggestions = len([s for s in processed_suggestions if s in [sug.id for sug in suggestions if sug.source and sug.source.get("score", "") in ("Fair", "Poor")]])
316
310
 
317
311
  # Add professional summary and key metrics
318
312
  lines.append(f"\n## Summary\n")
@@ -383,27 +377,69 @@ class DocumentationGenerationManager:
383
377
  section = e.anchor.get('value', 'General improvements')
384
378
 
385
379
  # Convert technical action names to user-friendly descriptions
386
- action_desc = {
387
- 'append_section': f'Added "{section}" section',
388
- 'replace_intro_block': f'Improved "{section}" section',
389
- 'full_replace': 'Comprehensive rewrite',
390
- 'add_dependencies_section': 'Added dependencies information',
391
- 'add_system_requirements_section': 'Added system requirements',
392
- 'add_hardware_requirements': 'Added hardware requirements',
393
- 'clarify_mandatory_vs_optional': 'Clarified dependencies',
394
- 'improve_readability': f'Improved readability in "{section}"',
395
- 'improve_setup': f'Enhanced setup instructions in "{section}"',
396
- 'improve_reproducibility': f'Improved reproducibility in "{section}"',
397
- 'improve_structure': f'Enhanced structure in "{section}"',
398
- 'improve_code_quality': f'Improved code quality in "{section}"',
399
- 'improve_verification': f'Enhanced result verification in "{section}"',
400
- 'improve_performance': f'Added performance notes in "{section}"',
401
- 'improve_clarity_and_error_handling': f'Improved clarity and error handling in "{section}"',
402
- 'improve_consistency': f'Improved consistency in "{section}"',
403
- 'improve_context': f'Enhanced context in "{section}"',
404
- 'improve_error_handling': f'Improved error handling in "{section}"',
405
- 'add_overview_section': f'Added "{section}" section'
406
- }.get(e.edit_type, f'Improved {e.edit_type}')
380
+ # Use the suggestion action if available, otherwise fall back to edit type
381
+ action_key = sug.action if sug else e.edit_type
382
+
383
+ # Generate category-based description for full_replace actions
384
+ if action_key == 'full_replace' and sug:
385
+ category = sug.category or ""
386
+ category_display = category.split('.')[-1].replace('_', ' ').title() if category else ""
387
+
388
+ # Create specific descriptions based on category
389
+ if 'readme' in category.lower():
390
+ action_desc = 'Enhanced README documentation'
391
+ elif 'tutorial' in category.lower():
392
+ action_desc = 'Improved tutorial content'
393
+ elif 'userguide' in category.lower():
394
+ action_desc = 'Enhanced user guide documentation'
395
+ elif 'installation' in category.lower():
396
+ action_desc = 'Improved installation instructions'
397
+ elif 'dependencies' in category.lower():
398
+ action_desc = 'Enhanced dependency information'
399
+ elif 'readability' in category.lower():
400
+ action_desc = 'Improved readability and clarity'
401
+ elif 'setup' in category.lower():
402
+ action_desc = 'Enhanced setup and configuration'
403
+ elif 'reproducibility' in category.lower():
404
+ action_desc = 'Improved reproducibility'
405
+ elif 'structure' in category.lower():
406
+ action_desc = 'Enhanced document structure'
407
+ elif 'code_quality' in category.lower():
408
+ action_desc = 'Improved code quality'
409
+ elif 'verification' in category.lower():
410
+ action_desc = 'Enhanced result verification'
411
+ elif 'performance' in category.lower():
412
+ action_desc = 'Added performance considerations'
413
+ elif 'context' in category.lower():
414
+ action_desc = 'Enhanced context and purpose'
415
+ elif 'error_handling' in category.lower():
416
+ action_desc = 'Improved error handling'
417
+ else:
418
+ action_desc = f'Enhanced {category_display}' if category_display else 'Comprehensive rewrite'
419
+ else:
420
+ # Use existing action descriptions for non-full_replace actions
421
+ action_desc = {
422
+ 'append_section': f'Added "{section}" section',
423
+ 'insert_after_header': f'Enhanced content in "{section}"',
424
+ 'rmarkdown_integration': f'Integrated improvements in "{section}"',
425
+ 'replace_intro_block': f'Improved "{section}" section',
426
+ 'add_dependencies_section': 'Added dependencies information',
427
+ 'add_system_requirements_section': 'Added system requirements',
428
+ 'add_hardware_requirements': 'Added hardware requirements',
429
+ 'clarify_mandatory_vs_optional': 'Clarified dependencies',
430
+ 'improve_readability': f'Improved readability in "{section}"',
431
+ 'improve_setup': f'Enhanced setup instructions in "{section}"',
432
+ 'improve_reproducibility': f'Improved reproducibility in "{section}"',
433
+ 'improve_structure': f'Enhanced structure in "{section}"',
434
+ 'improve_code_quality': f'Improved code quality in "{section}"',
435
+ 'improve_verification': f'Enhanced result verification in "{section}"',
436
+ 'improve_performance': f'Added performance notes in "{section}"',
437
+ 'improve_clarity_and_error_handling': f'Improved clarity and error handling in "{section}"',
438
+ 'improve_consistency': f'Improved consistency in "{section}"',
439
+ 'improve_context': f'Enhanced context in "{section}"',
440
+ 'improve_error_handling': f'Improved error handling in "{section}"',
441
+ 'add_overview_section': f'Added "{section}" section'
442
+ }.get(action_key, f'Improved {action_key}')
407
443
 
408
444
  lines.append(f"- **{action_desc}**")
409
445
 
@@ -6,6 +6,7 @@ from bioguider.utils.r_file_handler import RFileHandler
6
6
  from .gitignore_checker import GitignoreChecker
7
7
  from .python_file_handler import PythonFileHandler
8
8
  from ..database.code_structure_db import CodeStructureDb
9
+ from ..rag.config import configs
9
10
 
10
11
  logger = logging.getLogger(__name__)
11
12
 
@@ -17,7 +18,12 @@ class CodeStructureBuilder:
17
18
  code_structure_db: CodeStructureDb,
18
19
  ):
19
20
  self.repo_path = str(repo_path)
20
- self.gitignore_checker = GitignoreChecker(repo_path, str(gitignore_path))
21
+ self.gitignore_checker = GitignoreChecker(
22
+ directory=repo_path,
23
+ gitignore_path=str(gitignore_path),
24
+ exclude_dir_patterns=configs["file_filters"]["excluded_dirs"],
25
+ exclude_file_patterns=configs["file_filters"]["excluded_files"],
26
+ )
21
27
  self.file_handler = PythonFileHandler(repo_path)
22
28
  self.code_structure_db = code_structure_db
23
29
 
@@ -348,10 +348,12 @@ class RFileHandler:
348
348
  s = line.lstrip()
349
349
  if s.startswith("#'"):
350
350
  buf.append(s[2:].lstrip())
351
- line_idx -= 1
352
- continue
353
- # stop at first non-roxygen line (don’t cross blank + NULL padding blocks)
354
- break
351
+ elif s.strip() == "":
352
+ pass
353
+ else:
354
+ # stop at first non-roxygen line (don’t cross blank + NULL padding blocks)
355
+ break
356
+ line_idx -= 1
355
357
  if not buf:
356
358
  return None
357
359
  buf.reverse()
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "bioguider"
3
- version = "0.2.30"
3
+ version = "0.2.32"
4
4
  description = "An AI-Powered package to help biomedical developers to generate clear documentation"
5
5
  authors = [
6
6
  "Cankun Wang <Cankun.Wang@osumc.edu>",
@@ -1,52 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from typing import Tuple
4
-
5
- from .models import PlannedEdit
6
-
7
-
8
- class DocumentRenderer:
9
- def apply_edit(self, original: str, edit: PlannedEdit) -> Tuple[str, dict]:
10
- content = original
11
- added = 0
12
-
13
- if edit.edit_type == "append_section":
14
- # Avoid duplicate header if the same header already exists
15
- header_line = None
16
- if edit.content_template.lstrip().startswith("#"):
17
- header_line = edit.content_template.strip().splitlines()[0].strip()
18
- if header_line and header_line in content:
19
- return content, {"added_lines": 0}
20
- # Append with two leading newlines if needed
21
- sep = "\n\n" if not content.endswith("\n\n") else ""
22
- content = f"{content}{sep}{edit.content_template}"
23
- added = len(edit.content_template.splitlines())
24
-
25
- elif edit.edit_type == "replace_intro_block":
26
- # Replace content from start to first level-2 header (##) with new intro
27
- lines = content.splitlines()
28
- end_idx = None
29
- for i, ln in enumerate(lines):
30
- if ln.strip().startswith("## "):
31
- end_idx = i
32
- break
33
- if end_idx is None:
34
- # No H2 header found; replace entire content
35
- new_content = edit.content_template
36
- else:
37
- head = lines[:0]
38
- tail = lines[end_idx:]
39
- new_content = edit.content_template.rstrip() + "\n\n" + "\n".join(tail)
40
- added = len(edit.content_template.splitlines())
41
- content = new_content
42
-
43
- elif edit.edit_type == "full_replace":
44
- # Replace entire document content
45
- content = edit.content_template
46
- added = len(edit.content_template.splitlines())
47
-
48
- # Other edit types (insert_after_header, replace_block) can be added as needed
49
-
50
- return content, {"added_lines": added}
51
-
52
-
File without changes
File without changes