bioguider 0.2.52__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bioguider/__init__.py +0 -0
- bioguider/agents/__init__.py +0 -0
- bioguider/agents/agent_task.py +92 -0
- bioguider/agents/agent_tools.py +176 -0
- bioguider/agents/agent_utils.py +504 -0
- bioguider/agents/collection_execute_step.py +182 -0
- bioguider/agents/collection_observe_step.py +125 -0
- bioguider/agents/collection_plan_step.py +156 -0
- bioguider/agents/collection_task.py +184 -0
- bioguider/agents/collection_task_utils.py +142 -0
- bioguider/agents/common_agent.py +137 -0
- bioguider/agents/common_agent_2step.py +215 -0
- bioguider/agents/common_conversation.py +61 -0
- bioguider/agents/common_step.py +85 -0
- bioguider/agents/consistency_collection_step.py +102 -0
- bioguider/agents/consistency_evaluation_task.py +57 -0
- bioguider/agents/consistency_evaluation_task_utils.py +14 -0
- bioguider/agents/consistency_observe_step.py +110 -0
- bioguider/agents/consistency_query_step.py +77 -0
- bioguider/agents/dockergeneration_execute_step.py +186 -0
- bioguider/agents/dockergeneration_observe_step.py +154 -0
- bioguider/agents/dockergeneration_plan_step.py +158 -0
- bioguider/agents/dockergeneration_task.py +158 -0
- bioguider/agents/dockergeneration_task_utils.py +220 -0
- bioguider/agents/evaluation_installation_task.py +270 -0
- bioguider/agents/evaluation_readme_task.py +767 -0
- bioguider/agents/evaluation_submission_requirements_task.py +172 -0
- bioguider/agents/evaluation_task.py +206 -0
- bioguider/agents/evaluation_tutorial_task.py +169 -0
- bioguider/agents/evaluation_tutorial_task_prompts.py +187 -0
- bioguider/agents/evaluation_userguide_prompts.py +179 -0
- bioguider/agents/evaluation_userguide_task.py +154 -0
- bioguider/agents/evaluation_utils.py +127 -0
- bioguider/agents/identification_execute_step.py +181 -0
- bioguider/agents/identification_observe_step.py +104 -0
- bioguider/agents/identification_plan_step.py +140 -0
- bioguider/agents/identification_task.py +270 -0
- bioguider/agents/identification_task_utils.py +22 -0
- bioguider/agents/peo_common_step.py +64 -0
- bioguider/agents/prompt_utils.py +253 -0
- bioguider/agents/python_ast_repl_tool.py +69 -0
- bioguider/agents/rag_collection_task.py +130 -0
- bioguider/conversation.py +67 -0
- bioguider/database/code_structure_db.py +500 -0
- bioguider/database/summarized_file_db.py +146 -0
- bioguider/generation/__init__.py +39 -0
- bioguider/generation/benchmark_metrics.py +610 -0
- bioguider/generation/change_planner.py +189 -0
- bioguider/generation/document_renderer.py +157 -0
- bioguider/generation/llm_cleaner.py +67 -0
- bioguider/generation/llm_content_generator.py +1128 -0
- bioguider/generation/llm_injector.py +809 -0
- bioguider/generation/models.py +85 -0
- bioguider/generation/output_manager.py +74 -0
- bioguider/generation/repo_reader.py +37 -0
- bioguider/generation/report_loader.py +166 -0
- bioguider/generation/style_analyzer.py +36 -0
- bioguider/generation/suggestion_extractor.py +436 -0
- bioguider/generation/test_metrics.py +189 -0
- bioguider/managers/benchmark_manager.py +785 -0
- bioguider/managers/evaluation_manager.py +215 -0
- bioguider/managers/generation_manager.py +686 -0
- bioguider/managers/generation_test_manager.py +107 -0
- bioguider/managers/generation_test_manager_v2.py +525 -0
- bioguider/rag/__init__.py +0 -0
- bioguider/rag/config.py +117 -0
- bioguider/rag/data_pipeline.py +651 -0
- bioguider/rag/embedder.py +24 -0
- bioguider/rag/rag.py +138 -0
- bioguider/settings.py +103 -0
- bioguider/utils/code_structure_builder.py +59 -0
- bioguider/utils/constants.py +135 -0
- bioguider/utils/default.gitignore +140 -0
- bioguider/utils/file_utils.py +215 -0
- bioguider/utils/gitignore_checker.py +175 -0
- bioguider/utils/notebook_utils.py +117 -0
- bioguider/utils/pyphen_utils.py +73 -0
- bioguider/utils/python_file_handler.py +65 -0
- bioguider/utils/r_file_handler.py +551 -0
- bioguider/utils/utils.py +163 -0
- bioguider-0.2.52.dist-info/LICENSE +21 -0
- bioguider-0.2.52.dist-info/METADATA +51 -0
- bioguider-0.2.52.dist-info/RECORD +84 -0
- bioguider-0.2.52.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,1128 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Dict
|
|
4
|
+
import json
|
|
5
|
+
import re
|
|
6
|
+
import os
|
|
7
|
+
from langchain_openai.chat_models.base import BaseChatOpenAI
|
|
8
|
+
|
|
9
|
+
from bioguider.agents.common_conversation import CommonConversation
|
|
10
|
+
from .models import StyleProfile, SuggestionItem
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
LLM_SECTION_PROMPT = """
|
|
14
|
+
You are "BioGuider," a precise documentation generator for biomedical/bioinformatics software.
|
|
15
|
+
|
|
16
|
+
GOAL
|
|
17
|
+
Write or refine a single documentation section based on specific evaluation feedback.
|
|
18
|
+
|
|
19
|
+
INPUTS
|
|
20
|
+
- suggestion_category: {suggestion_category}
|
|
21
|
+
- anchor_title: {anchor_title}
|
|
22
|
+
- guidance: {guidance}
|
|
23
|
+
- original_text_snippet (if provided): {original_text}
|
|
24
|
+
- evaluation_score: {evaluation_score}
|
|
25
|
+
- repo_context_excerpt: <<{context}>>
|
|
26
|
+
|
|
27
|
+
CRITICAL RULES
|
|
28
|
+
|
|
29
|
+
1. SHOW, DON'T TELL
|
|
30
|
+
- Provide SPECIFIC details (numbers, versions, commands)
|
|
31
|
+
- NEVER write generic statements like "Ensure adequate resources"
|
|
32
|
+
- If evaluation asks for hardware: provide actual RAM/CPU numbers
|
|
33
|
+
- If evaluation asks for dependencies: list exact package versions
|
|
34
|
+
- If evaluation asks for validation: show actual code with expected output
|
|
35
|
+
|
|
36
|
+
2. RESPECT ORIGINAL CONTEXT
|
|
37
|
+
- Fix EXACTLY what evaluation identified, no more, no less
|
|
38
|
+
- Enhance/replace the specific part mentioned in "Original text snippet"
|
|
39
|
+
- Don't rewrite entire document or add unrelated content
|
|
40
|
+
|
|
41
|
+
3. CONTEXT-AWARE RESPONSES
|
|
42
|
+
- TUTORIAL CONTEXT: Users already have software installed, focus on usage/analysis steps
|
|
43
|
+
- README CONTEXT: Users need installation instructions, add setup sections
|
|
44
|
+
- DOCUMENTATION CONTEXT: Users need comprehensive guides, add detailed sections
|
|
45
|
+
- NEVER add installation guides in the middle of a tutorial
|
|
46
|
+
- NEVER add basic setup in advanced tutorial sections
|
|
47
|
+
|
|
48
|
+
4. BIOLOGICAL CORRECTNESS & RELEVANCE
|
|
49
|
+
- Use accurate biological terminology and concepts
|
|
50
|
+
- Provide biologically meaningful examples and explanations
|
|
51
|
+
- Ensure suggestions align with current biological knowledge
|
|
52
|
+
- Use appropriate biological context for the software domain
|
|
53
|
+
- Avoid generic or incorrect biological statements
|
|
54
|
+
- Focus on biologically relevant use cases and applications
|
|
55
|
+
|
|
56
|
+
5. ONE LOCATION PER TOPIC
|
|
57
|
+
- Group related suggestions into ONE section
|
|
58
|
+
- Don't scatter same information across multiple locations
|
|
59
|
+
- Performance suggestions → ONE "Performance Notes" section
|
|
60
|
+
- Installation suggestions → ONE "Installation" section (only if appropriate context)
|
|
61
|
+
|
|
62
|
+
6. ALIGN WITH EVALUATION CRITERIA (CONTEXT-AWARE)
|
|
63
|
+
- Readability: Simplify language, add definitions
|
|
64
|
+
- Coverage: ADD missing sections ONLY if contextually appropriate
|
|
65
|
+
* Tutorial context: Add usage examples, analysis steps, not installation
|
|
66
|
+
* README context: Add prerequisites, setup, installation
|
|
67
|
+
* Documentation context: Add comprehensive guides
|
|
68
|
+
- Reproducibility: Add versions, data sources, expected outputs
|
|
69
|
+
- Structure: Add headers, TOC, but DON'T reorganize existing structure
|
|
70
|
+
- Code Quality: Fix hardcoded paths, add error handling
|
|
71
|
+
- Result Verification: Add expected output examples
|
|
72
|
+
- Performance: Add ONE section with specific hardware/runtime numbers
|
|
73
|
+
|
|
74
|
+
7. RESPECT EVALUATION SCORES
|
|
75
|
+
- Excellent: Minimal changes only
|
|
76
|
+
- Good: Add specific details where missing
|
|
77
|
+
- Fair: Add missing sections, provide specifics (if contextually appropriate)
|
|
78
|
+
- Poor: Major additions but NEVER delete existing content
|
|
79
|
+
|
|
80
|
+
STYLE & FORMATTING
|
|
81
|
+
- Preserve existing tone and style markers: {tone_markers}
|
|
82
|
+
- Use heading style "{heading_style}" and list style "{list_style}"
|
|
83
|
+
- Link style "{link_style}"
|
|
84
|
+
- Plain markdown only (no code fences around entire output)
|
|
85
|
+
- No meta-commentary, no concluding remarks
|
|
86
|
+
- No generic filler text or marketing language
|
|
87
|
+
|
|
88
|
+
OUTPUT
|
|
89
|
+
Return ONLY the improved section content that:
|
|
90
|
+
1. Addresses the specific evaluation feedback: {guidance}
|
|
91
|
+
2. Provides concrete, actionable information
|
|
92
|
+
3. Respects the original text context (if provided)
|
|
93
|
+
4. Fits in the document's existing structure
|
|
94
|
+
5. Is contextually appropriate for the document type
|
|
95
|
+
6. Stops immediately after content (no conclusions)
|
|
96
|
+
"""
|
|
97
|
+
|
|
98
|
+
LLM_FULLDOC_PROMPT = """
|
|
99
|
+
You are "BioGuider," enhancing complete documentation based on systematic evaluation.
|
|
100
|
+
|
|
101
|
+
GOAL
|
|
102
|
+
Enhance an EXISTING document by implementing ALL improvements from evaluation report.
|
|
103
|
+
Output a complete, enhanced, ready-to-publish markdown file.
|
|
104
|
+
|
|
105
|
+
INPUTS
|
|
106
|
+
- evaluation_report (structured feedback): <<{evaluation_report}>>
|
|
107
|
+
- target_file: {target_file}
|
|
108
|
+
- repo_context_excerpt: <<{context}>>
|
|
109
|
+
- original_document: <<{original_content}>>
|
|
110
|
+
- total_suggestions: {total_suggestions}
|
|
111
|
+
|
|
112
|
+
CRITICAL RULES
|
|
113
|
+
|
|
114
|
+
1. PRESERVE EXISTING STRUCTURE & CONTENT
|
|
115
|
+
- Keep EVERY existing section in original order
|
|
116
|
+
- Keep EVERY code block (including <details>, <summary> tags)
|
|
117
|
+
- Keep ALL existing explanations, examples, text
|
|
118
|
+
- Keep ALL YAML frontmatter, metadata
|
|
119
|
+
- NEVER delete ANY sections, paragraphs, or code
|
|
120
|
+
- NEVER reorganize sections or change order
|
|
121
|
+
- NEVER remove HTML tags (<details>, <summary>, etc.)
|
|
122
|
+
|
|
123
|
+
2. SYSTEMATIC INTEGRATION OF ALL {total_suggestions} SUGGESTIONS
|
|
124
|
+
- Read ALL {total_suggestions} suggestions from evaluation
|
|
125
|
+
- Group by evaluation category (Readability, Coverage, etc.)
|
|
126
|
+
- Map each suggestion to WHERE it belongs in ORIGINAL document
|
|
127
|
+
- Group related suggestions into ONE section (don't scatter)
|
|
128
|
+
- Make ONE pass through document applying ALL enhancements
|
|
129
|
+
|
|
130
|
+
3. SHOW, DON'T TELL
|
|
131
|
+
- Provide SPECIFIC details (numbers, versions, commands)
|
|
132
|
+
- NEVER write generic statements like "Ensure adequate resources"
|
|
133
|
+
- If evaluation asks for hardware: provide actual RAM/CPU numbers
|
|
134
|
+
- If evaluation asks for dependencies: list exact package versions
|
|
135
|
+
- If evaluation asks for validation: show actual code with expected output
|
|
136
|
+
|
|
137
|
+
4. BIOLOGICAL CORRECTNESS & RELEVANCE
|
|
138
|
+
- Use accurate biological terminology and concepts
|
|
139
|
+
- Provide biologically meaningful examples and explanations
|
|
140
|
+
- Ensure suggestions align with current biological knowledge
|
|
141
|
+
- Use appropriate biological context for the software domain
|
|
142
|
+
- Avoid generic or incorrect biological statements
|
|
143
|
+
- Focus on biologically relevant use cases and applications
|
|
144
|
+
|
|
145
|
+
5. RESPECT EVALUATION SCORES
|
|
146
|
+
- Excellent: Minimal changes only
|
|
147
|
+
- Good: Add specific details where missing
|
|
148
|
+
- Fair: Add missing sections, provide specifics
|
|
149
|
+
- Poor: Major additions but NEVER delete existing content
|
|
150
|
+
|
|
151
|
+
5. HANDLE SPECIFIC EVALUATION CATEGORIES (CONTEXT-AWARE)
|
|
152
|
+
- Readability: Simplify language, add definitions
|
|
153
|
+
- Coverage: ADD missing sections ONLY if contextually appropriate
|
|
154
|
+
* Tutorial context: Add usage examples, analysis steps, not installation
|
|
155
|
+
* README context: Add prerequisites, setup, installation
|
|
156
|
+
* Documentation context: Add comprehensive guides
|
|
157
|
+
- Reproducibility: Add versions, data sources, expected outputs
|
|
158
|
+
- Structure: Add headers, TOC, but DON'T reorganize existing structure
|
|
159
|
+
- Code Quality: Fix hardcoded paths, add error handling
|
|
160
|
+
- Result Verification: Add expected output examples
|
|
161
|
+
- Performance: Add ONE section with specific hardware/runtime numbers
|
|
162
|
+
|
|
163
|
+
STRICT CONSTRAINTS
|
|
164
|
+
- NEVER invent hardware specs, version numbers, performance metrics without source
|
|
165
|
+
- If evaluation requests but context lacks data: provide reasonable defaults with caveats
|
|
166
|
+
- ABSOLUTELY FORBIDDEN: Wrapping entire output in ```markdown fences
|
|
167
|
+
- ABSOLUTELY FORBIDDEN: Adding conclusions, summaries, or wrap-up paragraphs at end
|
|
168
|
+
- ABSOLUTELY FORBIDDEN: Deleting ANY existing content
|
|
169
|
+
- ABSOLUTELY FORBIDDEN: Reorganizing major sections
|
|
170
|
+
- REQUIRED: Stop immediately after last section from original
|
|
171
|
+
- REQUIRED: Preserve ALL metadata (YAML frontmatter, etc.)
|
|
172
|
+
|
|
173
|
+
OUTPUT
|
|
174
|
+
Return the complete enhanced document for {target_file}.
|
|
175
|
+
- Pure markdown content only
|
|
176
|
+
- No meta-commentary, no fences
|
|
177
|
+
- Ready to copy-paste and publish
|
|
178
|
+
- All {total_suggestions} improvements integrated
|
|
179
|
+
- Original structure and content preserved
|
|
180
|
+
"""
|
|
181
|
+
|
|
182
|
+
LLM_README_COMPREHENSIVE_PROMPT = """
|
|
183
|
+
You are "BioGuider," creating or enhancing README documentation.
|
|
184
|
+
|
|
185
|
+
GOAL
|
|
186
|
+
Create comprehensive, professional README that addresses all evaluation feedback.
|
|
187
|
+
|
|
188
|
+
INPUTS
|
|
189
|
+
- evaluation_report (structured feedback): <<{evaluation_report}>>
|
|
190
|
+
- target_file: {target_file}
|
|
191
|
+
- repo_context_excerpt: <<{context}>>
|
|
192
|
+
- original_readme (if exists): <<{original_content}>>
|
|
193
|
+
|
|
194
|
+
CRITICAL RULES
|
|
195
|
+
|
|
196
|
+
1. SHOW, DON'T TELL
|
|
197
|
+
- Actual commands, not descriptions
|
|
198
|
+
- Specific versions, not "recent versions"
|
|
199
|
+
- Working examples, not pseudo-code
|
|
200
|
+
|
|
201
|
+
2. ONE LOCATION PER TOPIC
|
|
202
|
+
- Dependencies → ONE section
|
|
203
|
+
- Installation → ONE section (with subsections if needed)
|
|
204
|
+
- Performance → ONE section (if applicable)
|
|
205
|
+
|
|
206
|
+
3. SPECIFIC DATA ONLY
|
|
207
|
+
- Don't invent version numbers
|
|
208
|
+
- Don't invent system requirements
|
|
209
|
+
- Use what's in context or provide reasonable defaults with caveats
|
|
210
|
+
|
|
211
|
+
4. PRESERVE EXISTING
|
|
212
|
+
- If README exists, enhance it
|
|
213
|
+
- Don't delete working content
|
|
214
|
+
- Keep existing structure if it's good
|
|
215
|
+
|
|
216
|
+
5. BIOLOGICAL CORRECTNESS & RELEVANCE
|
|
217
|
+
- Use accurate biological terminology and concepts
|
|
218
|
+
- Provide biologically meaningful examples and explanations
|
|
219
|
+
- Ensure suggestions align with current biological knowledge
|
|
220
|
+
- Use appropriate biological context for the software domain
|
|
221
|
+
- Avoid generic or incorrect biological statements
|
|
222
|
+
- Focus on biologically relevant use cases and applications
|
|
223
|
+
|
|
224
|
+
6. ADDRESS EVALUATION SUGGESTIONS
|
|
225
|
+
- Available: Create README with all essential sections
|
|
226
|
+
- Readability: Simplify complex sentences, add explanations
|
|
227
|
+
- Project Purpose: Add clear goal statement and key applications
|
|
228
|
+
- Hardware/Software Spec: Add specific system requirements
|
|
229
|
+
- Dependencies: List exact package versions
|
|
230
|
+
- License Information: State license type and link to LICENSE file
|
|
231
|
+
- Author/Contributor Info: Add credits and contact information
|
|
232
|
+
|
|
233
|
+
STANDARD README STRUCTURE
|
|
234
|
+
- Project name and description
|
|
235
|
+
- Overview with key applications
|
|
236
|
+
- Installation (prerequisites, commands, verification)
|
|
237
|
+
- Quick Start with working example
|
|
238
|
+
- Usage (basic and advanced examples)
|
|
239
|
+
- System Requirements (if applicable)
|
|
240
|
+
- Dependencies with versions
|
|
241
|
+
- Contributing guidelines
|
|
242
|
+
- License information
|
|
243
|
+
- Contact/maintainer info
|
|
244
|
+
|
|
245
|
+
STRICT CONSTRAINTS
|
|
246
|
+
- Don't add excessive badges, emojis, or marketing hype
|
|
247
|
+
- Do add clear installation instructions, working code examples
|
|
248
|
+
- Balance: comprehensive but concise
|
|
249
|
+
- Professional, neutral tone
|
|
250
|
+
- Proper markdown formatting
|
|
251
|
+
|
|
252
|
+
OUTPUT
|
|
253
|
+
Return complete README.md content.
|
|
254
|
+
- Pure markdown only
|
|
255
|
+
- No meta-commentary, no fences
|
|
256
|
+
- Professional, clear, actionable
|
|
257
|
+
- Ready to publish
|
|
258
|
+
- All evaluation suggestions addressed
|
|
259
|
+
"""
|
|
260
|
+
|
|
261
|
+
# Continuation prompt template - used when document generation is truncated
|
|
262
|
+
LLM_CONTINUATION_PROMPT = """
|
|
263
|
+
You are "BioGuider," continuing a truncated document.
|
|
264
|
+
|
|
265
|
+
CRITICAL: This is STRICT CONTINUATION ONLY.
|
|
266
|
+
- You are NOT creating new content
|
|
267
|
+
- You are NOT adding conclusions
|
|
268
|
+
- You are ONLY completing missing sections from original
|
|
269
|
+
|
|
270
|
+
PREVIOUS CONTENT (do not repeat):
|
|
271
|
+
```
|
|
272
|
+
{existing_content_tail}
|
|
273
|
+
```
|
|
274
|
+
|
|
275
|
+
CONTINUATION PROCESS:
|
|
276
|
+
1. Identify what is the last complete section above
|
|
277
|
+
2. Identify what sections are missing from the original document structure
|
|
278
|
+
3. Continue IMMEDIATELY from where content stopped
|
|
279
|
+
4. Use same style, tone, format as existing content
|
|
280
|
+
5. Add ONLY the missing sections from original structure
|
|
281
|
+
6. Stop when original structure is complete
|
|
282
|
+
|
|
283
|
+
FORBIDDEN ADDITIONS:
|
|
284
|
+
- "## Conclusion" section
|
|
285
|
+
- "## Summary" section
|
|
286
|
+
- "## Additional Resources" section
|
|
287
|
+
- "For further guidance..." text
|
|
288
|
+
- Any wrap-up or concluding content
|
|
289
|
+
- Any content not in original document structure
|
|
290
|
+
|
|
291
|
+
OUTPUT:
|
|
292
|
+
Return ONLY continuation content that completes original structure.
|
|
293
|
+
- No commentary
|
|
294
|
+
- No fences
|
|
295
|
+
- No conclusions
|
|
296
|
+
- Stop immediately when structure is complete
|
|
297
|
+
"""
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
class LLMContentGenerator:
|
|
301
|
+
def __init__(self, llm: BaseChatOpenAI):
|
|
302
|
+
self.llm = llm
|
|
303
|
+
|
|
304
|
+
def _detect_truncation(self, content: str, target_file: str, original_content: str = None) -> bool:
|
|
305
|
+
"""
|
|
306
|
+
Detect if content appears to be truncated based on common patterns.
|
|
307
|
+
Universal detection for all file types.
|
|
308
|
+
|
|
309
|
+
Args:
|
|
310
|
+
content: Generated content to check
|
|
311
|
+
target_file: Target file path for context
|
|
312
|
+
original_content: Original content for comparison (if available)
|
|
313
|
+
|
|
314
|
+
Returns:
|
|
315
|
+
True if content appears truncated, False otherwise
|
|
316
|
+
"""
|
|
317
|
+
if not content or len(content.strip()) < 100:
|
|
318
|
+
return True
|
|
319
|
+
|
|
320
|
+
# 1. Compare to original length if available (most reliable indicator)
|
|
321
|
+
if original_content:
|
|
322
|
+
original_len = len(original_content)
|
|
323
|
+
generated_len = len(content)
|
|
324
|
+
# If generated content is significantly shorter than original (< 80%), likely truncated
|
|
325
|
+
if generated_len < original_len * 0.8:
|
|
326
|
+
return True
|
|
327
|
+
|
|
328
|
+
# 2. Check for very short content (applies to all files)
|
|
329
|
+
# Only flag as truncated if content is very short (< 500 chars)
|
|
330
|
+
if len(content) < 500:
|
|
331
|
+
return True
|
|
332
|
+
|
|
333
|
+
# 3. Check for incomplete code blocks (any language)
|
|
334
|
+
# Count opening and closing code fences
|
|
335
|
+
code_fence_count = content.count('```')
|
|
336
|
+
if code_fence_count > 0 and code_fence_count % 2 != 0:
|
|
337
|
+
# Unbalanced code fences suggest truncation
|
|
338
|
+
return True
|
|
339
|
+
|
|
340
|
+
# 4. Check for specific language code blocks
|
|
341
|
+
if target_file.endswith('.Rmd'):
|
|
342
|
+
# R chunks should be complete
|
|
343
|
+
r_chunks_open = re.findall(r'```\{r[^}]*\}', content)
|
|
344
|
+
if r_chunks_open and not content.rstrip().endswith('```'):
|
|
345
|
+
# Has R chunks but doesn't end with closing fence
|
|
346
|
+
return True
|
|
347
|
+
|
|
348
|
+
if target_file.endswith(('.py', '.js', '.ts', '.java', '.cpp', '.c')):
|
|
349
|
+
# Check for incomplete class/function definitions
|
|
350
|
+
lines = content.split('\n')
|
|
351
|
+
last_lines = [line.strip() for line in lines[-5:] if line.strip()]
|
|
352
|
+
if last_lines:
|
|
353
|
+
last_line = last_lines[-1]
|
|
354
|
+
if (last_line.endswith(':') or
|
|
355
|
+
last_line.endswith('{') or
|
|
356
|
+
last_line.endswith('(') or
|
|
357
|
+
'def ' in last_line or
|
|
358
|
+
'class ' in last_line or
|
|
359
|
+
'function ' in last_line):
|
|
360
|
+
return True
|
|
361
|
+
|
|
362
|
+
# 4. Check for incomplete markdown sections (applies to all markdown-like files)
|
|
363
|
+
if any(target_file.endswith(ext) for ext in ['.md', '.Rmd', '.rst', '.txt']):
|
|
364
|
+
lines = content.split('\n')
|
|
365
|
+
last_non_empty_line = None
|
|
366
|
+
for line in reversed(lines):
|
|
367
|
+
if line.strip():
|
|
368
|
+
last_non_empty_line = line.strip()
|
|
369
|
+
break
|
|
370
|
+
|
|
371
|
+
if last_non_empty_line:
|
|
372
|
+
# Check if last line looks incomplete
|
|
373
|
+
incomplete_endings = [
|
|
374
|
+
'##', # Header without content
|
|
375
|
+
'###', # Header without content
|
|
376
|
+
'####', # Header without content
|
|
377
|
+
'-', # List item
|
|
378
|
+
'*', # List item or emphasis
|
|
379
|
+
':', # Definition or label
|
|
380
|
+
'|', # Table row
|
|
381
|
+
]
|
|
382
|
+
|
|
383
|
+
for ending in incomplete_endings:
|
|
384
|
+
if last_non_empty_line.endswith(ending):
|
|
385
|
+
return True
|
|
386
|
+
|
|
387
|
+
# Check if ends with incomplete patterns
|
|
388
|
+
content_end = content[-300:].strip().lower()
|
|
389
|
+
incomplete_patterns = [
|
|
390
|
+
'## ', # Section header without content
|
|
391
|
+
'### ', # Subsection without content
|
|
392
|
+
'#### ', # Sub-subsection without content
|
|
393
|
+
'```{', # Incomplete code chunk
|
|
394
|
+
'```r', # Incomplete R chunk
|
|
395
|
+
'```python',# Incomplete Python chunk
|
|
396
|
+
]
|
|
397
|
+
|
|
398
|
+
for pattern in incomplete_patterns:
|
|
399
|
+
if content_end.endswith(pattern.lower()):
|
|
400
|
+
return True
|
|
401
|
+
|
|
402
|
+
return False
|
|
403
|
+
|
|
404
|
+
def _find_continuation_point(self, content: str, original_content: str = None) -> str:
|
|
405
|
+
"""
|
|
406
|
+
Find a better continuation point than just the last 1000 characters.
|
|
407
|
+
Looks for the last complete section or code block to continue from.
|
|
408
|
+
|
|
409
|
+
Args:
|
|
410
|
+
content: The generated content so far
|
|
411
|
+
original_content: The original content for comparison
|
|
412
|
+
|
|
413
|
+
Returns:
|
|
414
|
+
A suitable continuation point, or None if not found
|
|
415
|
+
"""
|
|
416
|
+
if not content:
|
|
417
|
+
return None
|
|
418
|
+
|
|
419
|
+
lines = content.split('\n')
|
|
420
|
+
if len(lines) < 10: # Too short to find good continuation point
|
|
421
|
+
return None
|
|
422
|
+
|
|
423
|
+
# Strategy 1: Find the last complete section (header with content after it)
|
|
424
|
+
for i in range(len(lines) - 1, -1, -1):
|
|
425
|
+
line = lines[i].strip()
|
|
426
|
+
if line.startswith('## ') and i + 1 < len(lines):
|
|
427
|
+
# Check if there's content after this header
|
|
428
|
+
next_lines = []
|
|
429
|
+
for j in range(i + 1, min(i + 10, len(lines))): # Look at next 10 lines
|
|
430
|
+
if lines[j].strip() and not lines[j].strip().startswith('##'):
|
|
431
|
+
next_lines.append(lines[j])
|
|
432
|
+
else:
|
|
433
|
+
break
|
|
434
|
+
|
|
435
|
+
if next_lines: # Found header with content after it
|
|
436
|
+
# Return from this header onwards
|
|
437
|
+
return '\n'.join(lines[i:])
|
|
438
|
+
|
|
439
|
+
# Strategy 2: Find the last complete code block
|
|
440
|
+
in_code_block = False
|
|
441
|
+
code_block_start = -1
|
|
442
|
+
|
|
443
|
+
for i in range(len(lines) - 1, -1, -1):
|
|
444
|
+
line = lines[i].strip()
|
|
445
|
+
if line.startswith('```') and not in_code_block:
|
|
446
|
+
in_code_block = True
|
|
447
|
+
code_block_start = i
|
|
448
|
+
elif line.startswith('```') and in_code_block:
|
|
449
|
+
# Found complete code block
|
|
450
|
+
return '\n'.join(lines[code_block_start:])
|
|
451
|
+
|
|
452
|
+
# Strategy 3: Find last complete paragraph (ends with period)
|
|
453
|
+
for i in range(len(lines) - 1, -1, -1):
|
|
454
|
+
line = lines[i].strip()
|
|
455
|
+
if line and line.endswith('.') and not line.startswith('#') and not line.startswith('```'):
|
|
456
|
+
# Found a complete sentence, return from there
|
|
457
|
+
return '\n'.join(lines[i:])
|
|
458
|
+
|
|
459
|
+
# Strategy 4: If original content is available, find where the generated content diverges
|
|
460
|
+
if original_content:
|
|
461
|
+
# Simple approach: find the longest common suffix
|
|
462
|
+
min_len = min(len(content), len(original_content))
|
|
463
|
+
common_length = 0
|
|
464
|
+
|
|
465
|
+
for i in range(1, min_len + 1):
|
|
466
|
+
if content[-i:] == original_content[-i:]:
|
|
467
|
+
common_length = i
|
|
468
|
+
else:
|
|
469
|
+
break
|
|
470
|
+
|
|
471
|
+
if common_length > 100: # Found significant common ending
|
|
472
|
+
return content[-(common_length + 100):] # Include some context
|
|
473
|
+
|
|
474
|
+
return None
|
|
475
|
+
|
|
476
|
+
def _appears_complete(self, content: str, target_file: str, original_content: str = None) -> bool:
|
|
477
|
+
"""
|
|
478
|
+
Check if content appears to be complete based on structure, patterns, AND original length.
|
|
479
|
+
Universal completion check for all file types.
|
|
480
|
+
|
|
481
|
+
CRITICAL: If original_content is provided, generated content MUST be at least 90% of original length
|
|
482
|
+
to be considered complete, regardless of other heuristics. This prevents the LLM from fooling us
|
|
483
|
+
with fake conclusions.
|
|
484
|
+
|
|
485
|
+
Args:
|
|
486
|
+
content: Generated content to check
|
|
487
|
+
target_file: Target file path for context
|
|
488
|
+
original_content: Original content for length comparison (optional but recommended)
|
|
489
|
+
|
|
490
|
+
Returns:
|
|
491
|
+
True if content appears complete, False if it needs continuation
|
|
492
|
+
"""
|
|
493
|
+
if not content or len(content.strip()) < 100:
|
|
494
|
+
return False
|
|
495
|
+
|
|
496
|
+
# CRITICAL: If original content is provided, check length ratio first
|
|
497
|
+
# This prevents the LLM from fooling us with fake conclusions
|
|
498
|
+
if original_content and isinstance(original_content, str):
|
|
499
|
+
generated_len = len(content)
|
|
500
|
+
original_len = len(original_content)
|
|
501
|
+
if generated_len < original_len * 0.9:
|
|
502
|
+
# Generated content is too short compared to original - NOT complete
|
|
503
|
+
return False
|
|
504
|
+
|
|
505
|
+
# 1. Check for balanced code blocks (applies to all files)
|
|
506
|
+
code_block_count = content.count('```')
|
|
507
|
+
if code_block_count > 0 and code_block_count % 2 != 0:
|
|
508
|
+
# Unbalanced code blocks suggest incomplete
|
|
509
|
+
return False
|
|
510
|
+
|
|
511
|
+
# 2. File type specific checks
|
|
512
|
+
|
|
513
|
+
# RMarkdown files
|
|
514
|
+
if target_file.endswith('.Rmd'):
|
|
515
|
+
# Check for proper YAML frontmatter
|
|
516
|
+
if not content.startswith('---'):
|
|
517
|
+
return False
|
|
518
|
+
|
|
519
|
+
# Check for conclusion patterns
|
|
520
|
+
conclusion_patterns = [
|
|
521
|
+
'sessionInfo()',
|
|
522
|
+
'session.info()',
|
|
523
|
+
'## Conclusion',
|
|
524
|
+
'## Summary',
|
|
525
|
+
'## Session Info',
|
|
526
|
+
'</details>',
|
|
527
|
+
'knitr::knit(',
|
|
528
|
+
]
|
|
529
|
+
|
|
530
|
+
content_lower = content.lower()
|
|
531
|
+
has_conclusion = any(pattern.lower() in content_lower for pattern in conclusion_patterns)
|
|
532
|
+
|
|
533
|
+
# If we have a conclusion and balanced code blocks, likely complete
|
|
534
|
+
if has_conclusion and code_block_count > 0:
|
|
535
|
+
return True
|
|
536
|
+
|
|
537
|
+
# Markdown files
|
|
538
|
+
if target_file.endswith('.md'):
|
|
539
|
+
# Check for conclusion sections
|
|
540
|
+
conclusion_patterns = [
|
|
541
|
+
'## Conclusion',
|
|
542
|
+
'## Summary',
|
|
543
|
+
'## Next Steps',
|
|
544
|
+
'## Further Reading',
|
|
545
|
+
'## References',
|
|
546
|
+
'## License',
|
|
547
|
+
]
|
|
548
|
+
|
|
549
|
+
content_lower = content.lower()
|
|
550
|
+
has_conclusion = any(pattern.lower() in content_lower for pattern in conclusion_patterns)
|
|
551
|
+
|
|
552
|
+
if has_conclusion and len(content) > 2000:
|
|
553
|
+
return True
|
|
554
|
+
|
|
555
|
+
# Python files
|
|
556
|
+
if target_file.endswith('.py'):
|
|
557
|
+
# Check for balanced brackets/parentheses
|
|
558
|
+
if content.count('(') != content.count(')'):
|
|
559
|
+
return False
|
|
560
|
+
if content.count('[') != content.count(']'):
|
|
561
|
+
return False
|
|
562
|
+
if content.count('{') != content.count('}'):
|
|
563
|
+
return False
|
|
564
|
+
|
|
565
|
+
# Check for complete structure (reasonable length + proper ending)
|
|
566
|
+
lines = [line for line in content.split('\n') if line.strip()]
|
|
567
|
+
if len(lines) > 20: # Has reasonable content
|
|
568
|
+
last_line = lines[-1].strip()
|
|
569
|
+
# Should not end with incomplete statements
|
|
570
|
+
if not (last_line.endswith(':') or
|
|
571
|
+
last_line.endswith('\\') or
|
|
572
|
+
last_line.endswith(',')):
|
|
573
|
+
return True
|
|
574
|
+
|
|
575
|
+
# JavaScript/TypeScript files
|
|
576
|
+
if target_file.endswith(('.js', '.ts', '.jsx', '.tsx')):
|
|
577
|
+
# Check for balanced brackets
|
|
578
|
+
if content.count('{') != content.count('}'):
|
|
579
|
+
return False
|
|
580
|
+
if content.count('(') != content.count(')'):
|
|
581
|
+
return False
|
|
582
|
+
|
|
583
|
+
lines = [line for line in content.split('\n') if line.strip()]
|
|
584
|
+
if len(lines) > 20:
|
|
585
|
+
last_line = lines[-1].strip()
|
|
586
|
+
# Complete if ends with proper syntax
|
|
587
|
+
if (last_line.endswith('}') or
|
|
588
|
+
last_line.endswith(';') or
|
|
589
|
+
last_line.endswith('*/') or
|
|
590
|
+
last_line.startswith('//')):
|
|
591
|
+
return True
|
|
592
|
+
|
|
593
|
+
# 3. Generic checks for all file types
|
|
594
|
+
if len(content) > 3000: # Reasonable length
|
|
595
|
+
# Check if it ends with complete sentences/sections
|
|
596
|
+
lines = content.split('\n')
|
|
597
|
+
last_lines = [line.strip() for line in lines[-10:] if line.strip()]
|
|
598
|
+
|
|
599
|
+
if last_lines:
|
|
600
|
+
last_line = last_lines[-1]
|
|
601
|
+
# Complete if ends with proper punctuation or closing tags
|
|
602
|
+
complete_endings = [
|
|
603
|
+
'.', # Sentence
|
|
604
|
+
'```', # Code block
|
|
605
|
+
'---', # Section divider
|
|
606
|
+
'</details>', # HTML details
|
|
607
|
+
'}', # Closing brace
|
|
608
|
+
';', # Statement end
|
|
609
|
+
'*/', # Comment end
|
|
610
|
+
]
|
|
611
|
+
|
|
612
|
+
if any(last_line.endswith(ending) for ending in complete_endings):
|
|
613
|
+
return True
|
|
614
|
+
|
|
615
|
+
return False
|
|
616
|
+
|
|
617
|
+
def _generate_continuation(self, target_file: str, evaluation_report: dict,
|
|
618
|
+
context: str, existing_content: str) -> tuple[str, dict]:
|
|
619
|
+
"""
|
|
620
|
+
Generate continuation content from where previous generation left off.
|
|
621
|
+
|
|
622
|
+
Args:
|
|
623
|
+
target_file: Target file path
|
|
624
|
+
evaluation_report: Evaluation report data
|
|
625
|
+
context: Repository context
|
|
626
|
+
existing_content: Previously generated content
|
|
627
|
+
|
|
628
|
+
Returns:
|
|
629
|
+
Tuple of (continuation_content, token_usage)
|
|
630
|
+
"""
|
|
631
|
+
# Create LLM for continuation (uses 16k tokens by default)
|
|
632
|
+
from bioguider.agents.agent_utils import get_llm
|
|
633
|
+
import os
|
|
634
|
+
|
|
635
|
+
llm = get_llm(
|
|
636
|
+
api_key=os.environ.get("OPENAI_API_KEY"),
|
|
637
|
+
model_name=os.environ.get("OPENAI_MODEL", "gpt-4o"),
|
|
638
|
+
azure_endpoint=os.environ.get("AZURE_OPENAI_ENDPOINT"),
|
|
639
|
+
api_version=os.environ.get("OPENAI_API_VERSION"),
|
|
640
|
+
azure_deployment=os.environ.get("OPENAI_DEPLOYMENT_NAME"),
|
|
641
|
+
)
|
|
642
|
+
|
|
643
|
+
conv = CommonConversation(llm)
|
|
644
|
+
|
|
645
|
+
# Calculate total suggestions for the prompt
|
|
646
|
+
total_suggestions = 1
|
|
647
|
+
if isinstance(evaluation_report, dict):
|
|
648
|
+
if "total_suggestions" in evaluation_report:
|
|
649
|
+
total_suggestions = evaluation_report["total_suggestions"]
|
|
650
|
+
elif "suggestions" in evaluation_report and isinstance(evaluation_report["suggestions"], list):
|
|
651
|
+
total_suggestions = len(evaluation_report["suggestions"])
|
|
652
|
+
|
|
653
|
+
# Use the centralized continuation prompt template
|
|
654
|
+
continuation_prompt = LLM_CONTINUATION_PROMPT.format(
|
|
655
|
+
target_file=target_file,
|
|
656
|
+
existing_content_tail=existing_content[-1000:], # Last 1000 chars for context
|
|
657
|
+
total_suggestions=total_suggestions,
|
|
658
|
+
evaluation_report_excerpt=json.dumps(evaluation_report)[:4000],
|
|
659
|
+
context_excerpt=context[:2000],
|
|
660
|
+
)
|
|
661
|
+
|
|
662
|
+
content, token_usage = conv.generate(
|
|
663
|
+
system_prompt=continuation_prompt,
|
|
664
|
+
instruction_prompt="Continue the document from where it left off."
|
|
665
|
+
)
|
|
666
|
+
return content.strip(), token_usage
|
|
667
|
+
|
|
668
|
+
def generate_section(self, suggestion: SuggestionItem, style: StyleProfile, context: str = "") -> tuple[str, dict]:
|
|
669
|
+
conv = CommonConversation(self.llm)
|
|
670
|
+
section_name = suggestion.anchor_hint or suggestion.category.split(".")[-1].replace("_", " ").title()
|
|
671
|
+
|
|
672
|
+
# Extract original text snippet and evaluation score from suggestion source
|
|
673
|
+
original_text = ""
|
|
674
|
+
evaluation_score = ""
|
|
675
|
+
if hasattr(suggestion, 'source') and suggestion.source:
|
|
676
|
+
original_text = suggestion.source.get('original_text', '')
|
|
677
|
+
evaluation_score = suggestion.source.get('score', '')
|
|
678
|
+
|
|
679
|
+
# Detect document context to help with appropriate responses
|
|
680
|
+
document_context = self._detect_document_context(context, suggestion.anchor_title or "")
|
|
681
|
+
|
|
682
|
+
system_prompt = LLM_SECTION_PROMPT.format(
|
|
683
|
+
tone_markers=", ".join(style.tone_markers or []),
|
|
684
|
+
heading_style=style.heading_style,
|
|
685
|
+
list_style=style.list_style,
|
|
686
|
+
link_style=style.link_style,
|
|
687
|
+
section=section_name,
|
|
688
|
+
anchor_title=section_name,
|
|
689
|
+
suggestion_category=suggestion.category,
|
|
690
|
+
original_text=original_text,
|
|
691
|
+
evaluation_score=evaluation_score,
|
|
692
|
+
context=context[:2500],
|
|
693
|
+
guidance=(suggestion.content_guidance or "").strip(),
|
|
694
|
+
)
|
|
695
|
+
|
|
696
|
+
# Add context-aware instruction
|
|
697
|
+
context_instruction = f"\n\nCONTEXT DETECTED: {document_context}\n"
|
|
698
|
+
if document_context == "TUTORIAL":
|
|
699
|
+
context_instruction += "Focus on usage/analysis steps, NOT installation. Users already have software installed.\n"
|
|
700
|
+
elif document_context == "README":
|
|
701
|
+
context_instruction += "Focus on installation, setup, and getting started. Users need to install software.\n"
|
|
702
|
+
elif document_context == "BIOLOGICAL":
|
|
703
|
+
context_instruction += "Use accurate biological terminology and provide biologically meaningful examples.\n"
|
|
704
|
+
|
|
705
|
+
system_prompt += context_instruction
|
|
706
|
+
content, token_usage = conv.generate(system_prompt=system_prompt, instruction_prompt="Write the section content now.")
|
|
707
|
+
return content.strip(), token_usage
|
|
708
|
+
|
|
709
|
+
def generate_full_document(self, target_file: str, evaluation_report: dict, context: str = "", original_content: str = None) -> tuple[str, dict]:
|
|
710
|
+
# Create LLM (uses 16k tokens by default - enough for any document)
|
|
711
|
+
from bioguider.agents.agent_utils import get_llm
|
|
712
|
+
import os
|
|
713
|
+
import json
|
|
714
|
+
from datetime import datetime
|
|
715
|
+
|
|
716
|
+
# Get LLM with default 16k token limit
|
|
717
|
+
llm = get_llm(
|
|
718
|
+
api_key=os.environ.get("OPENAI_API_KEY"),
|
|
719
|
+
model_name=os.environ.get("OPENAI_MODEL", "gpt-4o"),
|
|
720
|
+
azure_endpoint=os.environ.get("AZURE_OPENAI_ENDPOINT"),
|
|
721
|
+
api_version=os.environ.get("OPENAI_API_VERSION"),
|
|
722
|
+
azure_deployment=os.environ.get("OPENAI_DEPLOYMENT_NAME"),
|
|
723
|
+
)
|
|
724
|
+
|
|
725
|
+
conv = CommonConversation(llm)
|
|
726
|
+
|
|
727
|
+
# Debug: Save generation settings and context
|
|
728
|
+
debug_info = {
|
|
729
|
+
"target_file": target_file,
|
|
730
|
+
"timestamp": datetime.now().isoformat(),
|
|
731
|
+
"evaluation_report": evaluation_report,
|
|
732
|
+
"context_length": len(context),
|
|
733
|
+
"llm_settings": {
|
|
734
|
+
"model_name": os.environ.get("OPENAI_MODEL", "gpt-4o"),
|
|
735
|
+
"azure_deployment": os.environ.get("OPENAI_DEPLOYMENT_NAME"),
|
|
736
|
+
"max_tokens": getattr(llm, 'max_tokens', 16384)
|
|
737
|
+
}
|
|
738
|
+
}
|
|
739
|
+
|
|
740
|
+
# Save debug info to file
|
|
741
|
+
debug_dir = "outputs/debug_generation"
|
|
742
|
+
os.makedirs(debug_dir, exist_ok=True)
|
|
743
|
+
safe_filename = target_file.replace("/", "_").replace(".", "_")
|
|
744
|
+
debug_file = os.path.join(debug_dir, f"{safe_filename}_debug.json")
|
|
745
|
+
with open(debug_file, 'w', encoding='utf-8') as f:
|
|
746
|
+
json.dump(debug_info, f, indent=2, ensure_ascii=False)
|
|
747
|
+
|
|
748
|
+
# Debug: Save raw evaluation_report to see what's being serialized
|
|
749
|
+
eval_report_file = os.path.join(debug_dir, f"{safe_filename}_raw_eval_report.json")
|
|
750
|
+
with open(eval_report_file, 'w', encoding='utf-8') as f:
|
|
751
|
+
json.dump(evaluation_report, f, indent=2, ensure_ascii=False)
|
|
752
|
+
|
|
753
|
+
# Use comprehensive README prompt for README.md files
|
|
754
|
+
if target_file.endswith("README.md"):
|
|
755
|
+
system_prompt = LLM_README_COMPREHENSIVE_PROMPT.format(
|
|
756
|
+
target_file=target_file,
|
|
757
|
+
evaluation_report=json.dumps(evaluation_report)[:6000],
|
|
758
|
+
context=context[:4000],
|
|
759
|
+
original_content=original_content or "",
|
|
760
|
+
)
|
|
761
|
+
else:
|
|
762
|
+
# Calculate total suggestions for the prompt
|
|
763
|
+
total_suggestions = 1
|
|
764
|
+
if isinstance(evaluation_report, dict):
|
|
765
|
+
if "total_suggestions" in evaluation_report:
|
|
766
|
+
total_suggestions = evaluation_report["total_suggestions"]
|
|
767
|
+
elif "suggestions" in evaluation_report and isinstance(evaluation_report["suggestions"], list):
|
|
768
|
+
total_suggestions = len(evaluation_report["suggestions"])
|
|
769
|
+
|
|
770
|
+
system_prompt = LLM_FULLDOC_PROMPT.format(
|
|
771
|
+
target_file=target_file,
|
|
772
|
+
evaluation_report=json.dumps(evaluation_report)[:6000],
|
|
773
|
+
context=context[:4000],
|
|
774
|
+
original_content=original_content or "",
|
|
775
|
+
total_suggestions=total_suggestions,
|
|
776
|
+
)
|
|
777
|
+
|
|
778
|
+
# Save initial prompt for debugging
|
|
779
|
+
prompt_file = os.path.join(debug_dir, f"{safe_filename}_prompt.txt")
|
|
780
|
+
with open(prompt_file, 'w', encoding='utf-8') as f:
|
|
781
|
+
f.write("=== SYSTEM PROMPT ===\n")
|
|
782
|
+
f.write(system_prompt)
|
|
783
|
+
f.write("\n\n=== INSTRUCTION PROMPT ===\n")
|
|
784
|
+
f.write("Write the full document now.")
|
|
785
|
+
# Context is already embedded in system prompt; avoid duplicating here
|
|
786
|
+
|
|
787
|
+
# Initial generation
|
|
788
|
+
# If the original document is long (RMarkdown > 8k chars), avoid truncation by chunked rewrite
|
|
789
|
+
# Lower threshold from 12k to 8k to catch more documents that would otherwise truncate
|
|
790
|
+
use_chunked = bool(target_file.endswith('.Rmd') and isinstance(original_content, str) and len(original_content) > 8000)
|
|
791
|
+
if use_chunked:
|
|
792
|
+
content, token_usage = self._generate_full_document_chunked(
|
|
793
|
+
target_file=target_file,
|
|
794
|
+
evaluation_report=evaluation_report,
|
|
795
|
+
context=context,
|
|
796
|
+
original_content=original_content or "",
|
|
797
|
+
debug_dir=debug_dir,
|
|
798
|
+
safe_filename=safe_filename,
|
|
799
|
+
)
|
|
800
|
+
else:
|
|
801
|
+
content, token_usage = conv.generate(system_prompt=system_prompt, instruction_prompt="Write the full document now.")
|
|
802
|
+
content = content.strip()
|
|
803
|
+
|
|
804
|
+
# Save initial generation for debugging
|
|
805
|
+
generation_file = os.path.join(debug_dir, f"{safe_filename}_generation_0.txt")
|
|
806
|
+
with open(generation_file, 'w', encoding='utf-8') as f:
|
|
807
|
+
f.write(f"=== INITIAL GENERATION ===\n")
|
|
808
|
+
f.write(f"Tokens: {token_usage}\n")
|
|
809
|
+
f.write(f"Length: {len(content)} characters\n")
|
|
810
|
+
if original_content:
|
|
811
|
+
f.write(f"Original length: {len(original_content)} characters\n")
|
|
812
|
+
f.write(f"Truncation detected: {self._detect_truncation(content, target_file, original_content)}\n")
|
|
813
|
+
f.write(f"\n=== CONTENT ===\n")
|
|
814
|
+
f.write(content)
|
|
815
|
+
|
|
816
|
+
# Check for truncation and continue if needed
|
|
817
|
+
max_continuations = 3 # Limit to prevent infinite loops
|
|
818
|
+
continuation_count = 0
|
|
819
|
+
|
|
820
|
+
while (not use_chunked and self._detect_truncation(content, target_file, original_content) and
|
|
821
|
+
continuation_count < max_continuations):
|
|
822
|
+
|
|
823
|
+
# Additional check: if content appears complete, don't continue
|
|
824
|
+
# Pass original_content so we can check length ratio
|
|
825
|
+
if self._appears_complete(content, target_file, original_content):
|
|
826
|
+
break
|
|
827
|
+
continuation_count += 1
|
|
828
|
+
|
|
829
|
+
# Calculate total suggestions for debugging info
|
|
830
|
+
total_suggestions = 1
|
|
831
|
+
if isinstance(evaluation_report, dict):
|
|
832
|
+
if "total_suggestions" in evaluation_report:
|
|
833
|
+
total_suggestions = evaluation_report["total_suggestions"]
|
|
834
|
+
elif "suggestions" in evaluation_report and isinstance(evaluation_report["suggestions"], list):
|
|
835
|
+
total_suggestions = len(evaluation_report["suggestions"])
|
|
836
|
+
|
|
837
|
+
# Find better continuation point - look for last complete section
|
|
838
|
+
continuation_point = self._find_continuation_point(content, original_content)
|
|
839
|
+
if not continuation_point:
|
|
840
|
+
continuation_point = content[-1000:] # Fallback to last 1000 chars
|
|
841
|
+
|
|
842
|
+
# Generate continuation prompt using centralized template
|
|
843
|
+
continuation_prompt = LLM_CONTINUATION_PROMPT.format(
|
|
844
|
+
target_file=target_file,
|
|
845
|
+
existing_content_tail=continuation_point,
|
|
846
|
+
total_suggestions=total_suggestions,
|
|
847
|
+
evaluation_report_excerpt=json.dumps(evaluation_report)[:4000],
|
|
848
|
+
context_excerpt=context[:2000],
|
|
849
|
+
)
|
|
850
|
+
|
|
851
|
+
# Save continuation prompt for debugging
|
|
852
|
+
continuation_prompt_file = os.path.join(debug_dir, f"{safe_filename}_continuation_{continuation_count}_prompt.txt")
|
|
853
|
+
with open(continuation_prompt_file, 'w', encoding='utf-8') as f:
|
|
854
|
+
f.write(continuation_prompt)
|
|
855
|
+
|
|
856
|
+
# Generate continuation
|
|
857
|
+
continuation_content, continuation_usage = self._generate_continuation(
|
|
858
|
+
target_file=target_file,
|
|
859
|
+
evaluation_report=evaluation_report,
|
|
860
|
+
context=context,
|
|
861
|
+
existing_content=content
|
|
862
|
+
)
|
|
863
|
+
|
|
864
|
+
# Save continuation generation for debugging
|
|
865
|
+
continuation_file = os.path.join(debug_dir, f"{safe_filename}_continuation_{continuation_count}.txt")
|
|
866
|
+
with open(continuation_file, 'w', encoding='utf-8') as f:
|
|
867
|
+
f.write(f"=== CONTINUATION {continuation_count} ===\n")
|
|
868
|
+
f.write(f"Tokens: {continuation_usage}\n")
|
|
869
|
+
f.write(f"Length: {len(continuation_content)} characters\n")
|
|
870
|
+
f.write(f"Truncation detected: {self._detect_truncation(continuation_content, target_file)}\n")
|
|
871
|
+
f.write(f"\n=== CONTENT ===\n")
|
|
872
|
+
f.write(continuation_content)
|
|
873
|
+
|
|
874
|
+
# Merge continuation with existing content
|
|
875
|
+
if continuation_content:
|
|
876
|
+
content += "\n\n" + continuation_content
|
|
877
|
+
# Update token usage
|
|
878
|
+
token_usage = {
|
|
879
|
+
"total_tokens": token_usage.get("total_tokens", 0) + continuation_usage.get("total_tokens", 0),
|
|
880
|
+
"prompt_tokens": token_usage.get("prompt_tokens", 0) + continuation_usage.get("prompt_tokens", 0),
|
|
881
|
+
"completion_tokens": token_usage.get("completion_tokens", 0) + continuation_usage.get("completion_tokens", 0),
|
|
882
|
+
}
|
|
883
|
+
|
|
884
|
+
# Save merged content for debugging
|
|
885
|
+
merged_file = os.path.join(debug_dir, f"{safe_filename}_merged_{continuation_count}.txt")
|
|
886
|
+
with open(merged_file, 'w', encoding='utf-8') as f:
|
|
887
|
+
f.write(f"=== MERGED CONTENT AFTER CONTINUATION {continuation_count} ===\n")
|
|
888
|
+
f.write(f"Total length: {len(content)} characters\n")
|
|
889
|
+
f.write(f"Truncation detected: {self._detect_truncation(content, target_file)}\n")
|
|
890
|
+
f.write(f"\n=== CONTENT ===\n")
|
|
891
|
+
f.write(content)
|
|
892
|
+
else:
|
|
893
|
+
# If continuation is empty, break to avoid infinite loop
|
|
894
|
+
break
|
|
895
|
+
|
|
896
|
+
# Clean up any markdown code fences that might have been added
|
|
897
|
+
content = self._clean_markdown_fences(content)
|
|
898
|
+
|
|
899
|
+
# Save final cleaned content for debugging
|
|
900
|
+
final_file = os.path.join(debug_dir, f"{safe_filename}_final.txt")
|
|
901
|
+
with open(final_file, 'w', encoding='utf-8') as f:
|
|
902
|
+
f.write(f"=== FINAL CLEANED CONTENT ===\n")
|
|
903
|
+
f.write(f"Total tokens: {token_usage}\n")
|
|
904
|
+
f.write(f"Final length: {len(content)} characters\n")
|
|
905
|
+
f.write(f"Continuations used: {continuation_count}\n")
|
|
906
|
+
f.write(f"\n=== CONTENT ===\n")
|
|
907
|
+
f.write(content)
|
|
908
|
+
|
|
909
|
+
return content, token_usage
|
|
910
|
+
|
|
911
|
+
def _clean_markdown_fences(self, content: str) -> str:
|
|
912
|
+
"""
|
|
913
|
+
Remove markdown code fences that shouldn't be in the final content.
|
|
914
|
+
"""
|
|
915
|
+
# Remove ```markdown at the beginning
|
|
916
|
+
if content.startswith('```markdown\n'):
|
|
917
|
+
content = content[12:] # Remove ```markdown\n
|
|
918
|
+
|
|
919
|
+
# Remove ``` at the end
|
|
920
|
+
if content.endswith('\n```'):
|
|
921
|
+
content = content[:-4] # Remove \n```
|
|
922
|
+
elif content.endswith('```'):
|
|
923
|
+
content = content[:-3] # Remove ```
|
|
924
|
+
|
|
925
|
+
# Remove any standalone ```markdown lines
|
|
926
|
+
lines = content.split('\n')
|
|
927
|
+
cleaned_lines = []
|
|
928
|
+
for line in lines:
|
|
929
|
+
if line.strip() == '```markdown':
|
|
930
|
+
continue
|
|
931
|
+
cleaned_lines.append(line)
|
|
932
|
+
|
|
933
|
+
return '\n'.join(cleaned_lines)
|
|
934
|
+
|
|
935
|
+
def _split_rmd_into_chunks(self, content: str) -> list[dict]:
|
|
936
|
+
"""
|
|
937
|
+
Split RMarkdown content into chunks for processing.
|
|
938
|
+
|
|
939
|
+
CRITICAL: This function must correctly identify code blocks to preserve them.
|
|
940
|
+
Code blocks in RMarkdown start with ```{r...} or ``` and end with ```.
|
|
941
|
+
|
|
942
|
+
Returns list of dicts with 'type' (yaml/code/text) and 'content'.
|
|
943
|
+
"""
|
|
944
|
+
chunks = []
|
|
945
|
+
if not content:
|
|
946
|
+
return chunks
|
|
947
|
+
lines = content.split('\n')
|
|
948
|
+
n = len(lines)
|
|
949
|
+
i = 0
|
|
950
|
+
|
|
951
|
+
# Handle YAML frontmatter
|
|
952
|
+
if n >= 3 and lines[0].strip() == '---':
|
|
953
|
+
j = 1
|
|
954
|
+
while j < n and lines[j].strip() != '---':
|
|
955
|
+
j += 1
|
|
956
|
+
if j < n and lines[j].strip() == '---':
|
|
957
|
+
chunks.append({"type": "yaml", "content": '\n'.join(lines[0:j+1])})
|
|
958
|
+
i = j + 1
|
|
959
|
+
|
|
960
|
+
buffer = []
|
|
961
|
+
in_code = False
|
|
962
|
+
|
|
963
|
+
for k in range(i, n):
|
|
964
|
+
line = lines[k]
|
|
965
|
+
# Check for code fence - must be at start of line (possibly with whitespace)
|
|
966
|
+
stripped = line.strip()
|
|
967
|
+
|
|
968
|
+
# Detect code fence opening: ``` or ```{r...} or ```python etc
|
|
969
|
+
is_code_fence = stripped.startswith('```')
|
|
970
|
+
|
|
971
|
+
if is_code_fence:
|
|
972
|
+
if in_code:
|
|
973
|
+
# This is a closing fence
|
|
974
|
+
buffer.append(line)
|
|
975
|
+
chunks.append({"type": "code", "content": '\n'.join(buffer)})
|
|
976
|
+
buffer = []
|
|
977
|
+
in_code = False
|
|
978
|
+
else:
|
|
979
|
+
# This is an opening fence
|
|
980
|
+
# Save any accumulated text first
|
|
981
|
+
if buffer and any(s.strip() for s in buffer):
|
|
982
|
+
chunks.append({"type": "text", "content": '\n'.join(buffer)})
|
|
983
|
+
# Start new code block with the opening fence
|
|
984
|
+
buffer = [line]
|
|
985
|
+
in_code = True
|
|
986
|
+
else:
|
|
987
|
+
buffer.append(line)
|
|
988
|
+
|
|
989
|
+
# Handle remaining buffer
|
|
990
|
+
if buffer and any(s.strip() for s in buffer):
|
|
991
|
+
if in_code:
|
|
992
|
+
# Unclosed code block - this is an error but add it anyway
|
|
993
|
+
print(f"WARNING: Unclosed code block detected in RMarkdown")
|
|
994
|
+
chunks.append({"type": "code", "content": '\n'.join(buffer)})
|
|
995
|
+
else:
|
|
996
|
+
chunks.append({"type": "text", "content": '\n'.join(buffer)})
|
|
997
|
+
|
|
998
|
+
# Validation: count code fences in chunks vs original
|
|
999
|
+
original_fences = len(re.findall(r'^```', content, flags=re.M))
|
|
1000
|
+
chunk_fences = 0
|
|
1001
|
+
for ch in chunks:
|
|
1002
|
+
if ch["type"] == "code":
|
|
1003
|
+
chunk_fences += len(re.findall(r'^```', ch["content"], flags=re.M))
|
|
1004
|
+
|
|
1005
|
+
if original_fences != chunk_fences:
|
|
1006
|
+
print(f"WARNING: Code fence count mismatch in chunking: original={original_fences}, chunks={chunk_fences}")
|
|
1007
|
+
|
|
1008
|
+
return chunks
|
|
1009
|
+
|
|
1010
|
+
def _generate_text_chunk(self, conv: CommonConversation, evaluation_report: dict, context: str, chunk_text: str) -> tuple[str, dict]:
|
|
1011
|
+
LLM_CHUNK_PROMPT = (
|
|
1012
|
+
"You are BioGuider improving a single TEXT chunk of a larger RMarkdown document.\n\n"
|
|
1013
|
+
"GOAL\nRefine ONLY the given chunk's prose per evaluation suggestions while preserving structure.\n"
|
|
1014
|
+
"Do not add conclusions or new sections.\n\n"
|
|
1015
|
+
"INPUTS\n- evaluation_report: <<{evaluation_report}>>\n- repo_context_excerpt: <<{context}>>\n- original_chunk:\n<<<\n{chunk}\n>>>\n\n"
|
|
1016
|
+
"CRITICAL RULES\n"
|
|
1017
|
+
"- This is a TEXT-ONLY chunk - do NOT add any code blocks or code fences (```).\n"
|
|
1018
|
+
"- Preserve all headers and formatting in this chunk.\n"
|
|
1019
|
+
"- Do not invent technical specs.\n"
|
|
1020
|
+
"- Output ONLY the refined text (no code fences, no markdown code blocks).\n"
|
|
1021
|
+
"- NEVER add ``` anywhere in your output.\n"
|
|
1022
|
+
"- Keep the same approximate length as the original chunk."
|
|
1023
|
+
)
|
|
1024
|
+
system_prompt = LLM_CHUNK_PROMPT.format(
|
|
1025
|
+
evaluation_report=json.dumps(evaluation_report)[:4000],
|
|
1026
|
+
context=context[:1500],
|
|
1027
|
+
chunk=chunk_text[:6000],
|
|
1028
|
+
)
|
|
1029
|
+
content, usage = conv.generate(system_prompt=system_prompt, instruction_prompt="Rewrite this text chunk now. Remember: NO code fences (```).")
|
|
1030
|
+
|
|
1031
|
+
# Post-processing: remove any code fences that may have been added
|
|
1032
|
+
output = content.strip()
|
|
1033
|
+
|
|
1034
|
+
# If output contains code fences, the LLM didn't follow instructions
|
|
1035
|
+
# Return original to preserve document structure
|
|
1036
|
+
if '```' in output:
|
|
1037
|
+
print(f"WARNING: LLM added code fences to text chunk, using original")
|
|
1038
|
+
return chunk_text, usage
|
|
1039
|
+
|
|
1040
|
+
return output, usage
|
|
1041
|
+
|
|
1042
|
+
def _generate_full_document_chunked(self, target_file: str, evaluation_report: dict, context: str, original_content: str, debug_dir: str, safe_filename: str) -> tuple[str, dict]:
|
|
1043
|
+
conv = CommonConversation(self.llm)
|
|
1044
|
+
chunks = self._split_rmd_into_chunks(original_content)
|
|
1045
|
+
merged = []
|
|
1046
|
+
total_usage = {"total_tokens": 0, "prompt_tokens": 0, "completion_tokens": 0}
|
|
1047
|
+
from datetime import datetime
|
|
1048
|
+
|
|
1049
|
+
# Save chunk analysis for debugging
|
|
1050
|
+
chunk_analysis_file = os.path.join(debug_dir, f"{safe_filename}_chunk_analysis.txt")
|
|
1051
|
+
with open(chunk_analysis_file, 'w', encoding='utf-8') as f:
|
|
1052
|
+
f.write(f"Total chunks: {len(chunks)}\n")
|
|
1053
|
+
for idx, ch in enumerate(chunks):
|
|
1054
|
+
f.write(f"Chunk {idx}: type={ch['type']}, length={len(ch['content'])}\n")
|
|
1055
|
+
if ch['type'] == 'code':
|
|
1056
|
+
f.write(f" First line: {ch['content'].split(chr(10))[0][:80]}\n")
|
|
1057
|
+
|
|
1058
|
+
for idx, ch in enumerate(chunks):
|
|
1059
|
+
if ch["type"] in ("yaml", "code"):
|
|
1060
|
+
# CRITICAL: Pass through code/yaml chunks EXACTLY as-is
|
|
1061
|
+
merged.append(ch["content"])
|
|
1062
|
+
continue
|
|
1063
|
+
|
|
1064
|
+
# For text chunks, try to improve but fall back to original if needed
|
|
1065
|
+
out, usage = self._generate_text_chunk(conv, evaluation_report, context, ch["content"])
|
|
1066
|
+
|
|
1067
|
+
# Validate the output doesn't contain code fence fragments that could break structure
|
|
1068
|
+
if not out or '```' in out:
|
|
1069
|
+
# If LLM added code fences in text chunk, it could break the document
|
|
1070
|
+
# Fall back to original text
|
|
1071
|
+
out = ch["content"]
|
|
1072
|
+
|
|
1073
|
+
merged.append(out)
|
|
1074
|
+
try:
|
|
1075
|
+
total_usage["total_tokens"] += int(usage.get("total_tokens", 0))
|
|
1076
|
+
total_usage["prompt_tokens"] += int(usage.get("prompt_tokens", 0))
|
|
1077
|
+
total_usage["completion_tokens"] += int(usage.get("completion_tokens", 0))
|
|
1078
|
+
except Exception:
|
|
1079
|
+
pass
|
|
1080
|
+
chunk_file = os.path.join(debug_dir, f"{safe_filename}_chunk_{idx}.txt")
|
|
1081
|
+
with open(chunk_file, 'w', encoding='utf-8') as f:
|
|
1082
|
+
f.write(f"=== CHUNK {idx} ({ch['type']}) at {datetime.now().isoformat()} ===\n")
|
|
1083
|
+
f.write(out)
|
|
1084
|
+
|
|
1085
|
+
content = '\n'.join(merged)
|
|
1086
|
+
|
|
1087
|
+
# CRITICAL: Validate code block structure is preserved
|
|
1088
|
+
original_fences = len(re.findall(r'^```', original_content, flags=re.M))
|
|
1089
|
+
generated_fences = len(re.findall(r'^```', content, flags=re.M))
|
|
1090
|
+
|
|
1091
|
+
if original_fences != generated_fences:
|
|
1092
|
+
# Code block structure was broken - log error and return original
|
|
1093
|
+
error_file = os.path.join(debug_dir, f"{safe_filename}_ERROR_codeblock_mismatch.txt")
|
|
1094
|
+
with open(error_file, 'w', encoding='utf-8') as f:
|
|
1095
|
+
f.write(f"ERROR: Code block count mismatch!\n")
|
|
1096
|
+
f.write(f"Original: {original_fences} code fences\n")
|
|
1097
|
+
f.write(f"Generated: {generated_fences} code fences\n")
|
|
1098
|
+
f.write(f"\nReturning original content to preserve structure.\n")
|
|
1099
|
+
print(f"WARNING: Code block structure broken for {target_file}, returning original content")
|
|
1100
|
+
return original_content, total_usage
|
|
1101
|
+
|
|
1102
|
+
return content, total_usage
|
|
1103
|
+
|
|
1104
|
+
def _detect_document_context(self, context: str, anchor_title: str) -> str:
|
|
1105
|
+
"""Detect the document context to help with appropriate responses."""
|
|
1106
|
+
context_lower = context.lower()
|
|
1107
|
+
anchor_lower = anchor_title.lower()
|
|
1108
|
+
|
|
1109
|
+
# Check for tutorial context
|
|
1110
|
+
if any(keyword in context_lower for keyword in ['tutorial', 'vignette', 'example', 'workflow', 'step-by-step']):
|
|
1111
|
+
return "TUTORIAL"
|
|
1112
|
+
|
|
1113
|
+
# Check for README context
|
|
1114
|
+
if any(keyword in context_lower for keyword in ['readme', 'installation', 'setup', 'prerequisites']):
|
|
1115
|
+
return "README"
|
|
1116
|
+
|
|
1117
|
+
# Check for documentation context
|
|
1118
|
+
if any(keyword in context_lower for keyword in ['documentation', 'guide', 'manual', 'reference']):
|
|
1119
|
+
return "DOCUMENTATION"
|
|
1120
|
+
|
|
1121
|
+
# Check for biological context
|
|
1122
|
+
if any(keyword in context_lower for keyword in ['cell', 'gene', 'protein', 'dna', 'rna', 'genome', 'transcriptome', 'proteome', 'metabolome']):
|
|
1123
|
+
return "BIOLOGICAL"
|
|
1124
|
+
|
|
1125
|
+
# Default to general context
|
|
1126
|
+
return "GENERAL"
|
|
1127
|
+
|
|
1128
|
+
|