bioguider 0.2.52__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. bioguider/__init__.py +0 -0
  2. bioguider/agents/__init__.py +0 -0
  3. bioguider/agents/agent_task.py +92 -0
  4. bioguider/agents/agent_tools.py +176 -0
  5. bioguider/agents/agent_utils.py +504 -0
  6. bioguider/agents/collection_execute_step.py +182 -0
  7. bioguider/agents/collection_observe_step.py +125 -0
  8. bioguider/agents/collection_plan_step.py +156 -0
  9. bioguider/agents/collection_task.py +184 -0
  10. bioguider/agents/collection_task_utils.py +142 -0
  11. bioguider/agents/common_agent.py +137 -0
  12. bioguider/agents/common_agent_2step.py +215 -0
  13. bioguider/agents/common_conversation.py +61 -0
  14. bioguider/agents/common_step.py +85 -0
  15. bioguider/agents/consistency_collection_step.py +102 -0
  16. bioguider/agents/consistency_evaluation_task.py +57 -0
  17. bioguider/agents/consistency_evaluation_task_utils.py +14 -0
  18. bioguider/agents/consistency_observe_step.py +110 -0
  19. bioguider/agents/consistency_query_step.py +77 -0
  20. bioguider/agents/dockergeneration_execute_step.py +186 -0
  21. bioguider/agents/dockergeneration_observe_step.py +154 -0
  22. bioguider/agents/dockergeneration_plan_step.py +158 -0
  23. bioguider/agents/dockergeneration_task.py +158 -0
  24. bioguider/agents/dockergeneration_task_utils.py +220 -0
  25. bioguider/agents/evaluation_installation_task.py +270 -0
  26. bioguider/agents/evaluation_readme_task.py +767 -0
  27. bioguider/agents/evaluation_submission_requirements_task.py +172 -0
  28. bioguider/agents/evaluation_task.py +206 -0
  29. bioguider/agents/evaluation_tutorial_task.py +169 -0
  30. bioguider/agents/evaluation_tutorial_task_prompts.py +187 -0
  31. bioguider/agents/evaluation_userguide_prompts.py +179 -0
  32. bioguider/agents/evaluation_userguide_task.py +154 -0
  33. bioguider/agents/evaluation_utils.py +127 -0
  34. bioguider/agents/identification_execute_step.py +181 -0
  35. bioguider/agents/identification_observe_step.py +104 -0
  36. bioguider/agents/identification_plan_step.py +140 -0
  37. bioguider/agents/identification_task.py +270 -0
  38. bioguider/agents/identification_task_utils.py +22 -0
  39. bioguider/agents/peo_common_step.py +64 -0
  40. bioguider/agents/prompt_utils.py +253 -0
  41. bioguider/agents/python_ast_repl_tool.py +69 -0
  42. bioguider/agents/rag_collection_task.py +130 -0
  43. bioguider/conversation.py +67 -0
  44. bioguider/database/code_structure_db.py +500 -0
  45. bioguider/database/summarized_file_db.py +146 -0
  46. bioguider/generation/__init__.py +39 -0
  47. bioguider/generation/benchmark_metrics.py +610 -0
  48. bioguider/generation/change_planner.py +189 -0
  49. bioguider/generation/document_renderer.py +157 -0
  50. bioguider/generation/llm_cleaner.py +67 -0
  51. bioguider/generation/llm_content_generator.py +1128 -0
  52. bioguider/generation/llm_injector.py +809 -0
  53. bioguider/generation/models.py +85 -0
  54. bioguider/generation/output_manager.py +74 -0
  55. bioguider/generation/repo_reader.py +37 -0
  56. bioguider/generation/report_loader.py +166 -0
  57. bioguider/generation/style_analyzer.py +36 -0
  58. bioguider/generation/suggestion_extractor.py +436 -0
  59. bioguider/generation/test_metrics.py +189 -0
  60. bioguider/managers/benchmark_manager.py +785 -0
  61. bioguider/managers/evaluation_manager.py +215 -0
  62. bioguider/managers/generation_manager.py +686 -0
  63. bioguider/managers/generation_test_manager.py +107 -0
  64. bioguider/managers/generation_test_manager_v2.py +525 -0
  65. bioguider/rag/__init__.py +0 -0
  66. bioguider/rag/config.py +117 -0
  67. bioguider/rag/data_pipeline.py +651 -0
  68. bioguider/rag/embedder.py +24 -0
  69. bioguider/rag/rag.py +138 -0
  70. bioguider/settings.py +103 -0
  71. bioguider/utils/code_structure_builder.py +59 -0
  72. bioguider/utils/constants.py +135 -0
  73. bioguider/utils/default.gitignore +140 -0
  74. bioguider/utils/file_utils.py +215 -0
  75. bioguider/utils/gitignore_checker.py +175 -0
  76. bioguider/utils/notebook_utils.py +117 -0
  77. bioguider/utils/pyphen_utils.py +73 -0
  78. bioguider/utils/python_file_handler.py +65 -0
  79. bioguider/utils/r_file_handler.py +551 -0
  80. bioguider/utils/utils.py +163 -0
  81. bioguider-0.2.52.dist-info/LICENSE +21 -0
  82. bioguider-0.2.52.dist-info/METADATA +51 -0
  83. bioguider-0.2.52.dist-info/RECORD +84 -0
  84. bioguider-0.2.52.dist-info/WHEEL +4 -0
@@ -0,0 +1,1128 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Dict
4
+ import json
5
+ import re
6
+ import os
7
+ from langchain_openai.chat_models.base import BaseChatOpenAI
8
+
9
+ from bioguider.agents.common_conversation import CommonConversation
10
+ from .models import StyleProfile, SuggestionItem
11
+
12
+
13
+ LLM_SECTION_PROMPT = """
14
+ You are "BioGuider," a precise documentation generator for biomedical/bioinformatics software.
15
+
16
+ GOAL
17
+ Write or refine a single documentation section based on specific evaluation feedback.
18
+
19
+ INPUTS
20
+ - suggestion_category: {suggestion_category}
21
+ - anchor_title: {anchor_title}
22
+ - guidance: {guidance}
23
+ - original_text_snippet (if provided): {original_text}
24
+ - evaluation_score: {evaluation_score}
25
+ - repo_context_excerpt: <<{context}>>
26
+
27
+ CRITICAL RULES
28
+
29
+ 1. SHOW, DON'T TELL
30
+ - Provide SPECIFIC details (numbers, versions, commands)
31
+ - NEVER write generic statements like "Ensure adequate resources"
32
+ - If evaluation asks for hardware: provide actual RAM/CPU numbers
33
+ - If evaluation asks for dependencies: list exact package versions
34
+ - If evaluation asks for validation: show actual code with expected output
35
+
36
+ 2. RESPECT ORIGINAL CONTEXT
37
+ - Fix EXACTLY what evaluation identified, no more, no less
38
+ - Enhance/replace the specific part mentioned in "Original text snippet"
39
+ - Don't rewrite entire document or add unrelated content
40
+
41
+ 3. CONTEXT-AWARE RESPONSES
42
+ - TUTORIAL CONTEXT: Users already have software installed, focus on usage/analysis steps
43
+ - README CONTEXT: Users need installation instructions, add setup sections
44
+ - DOCUMENTATION CONTEXT: Users need comprehensive guides, add detailed sections
45
+ - NEVER add installation guides in the middle of a tutorial
46
+ - NEVER add basic setup in advanced tutorial sections
47
+
48
+ 4. BIOLOGICAL CORRECTNESS & RELEVANCE
49
+ - Use accurate biological terminology and concepts
50
+ - Provide biologically meaningful examples and explanations
51
+ - Ensure suggestions align with current biological knowledge
52
+ - Use appropriate biological context for the software domain
53
+ - Avoid generic or incorrect biological statements
54
+ - Focus on biologically relevant use cases and applications
55
+
56
+ 5. ONE LOCATION PER TOPIC
57
+ - Group related suggestions into ONE section
58
+ - Don't scatter same information across multiple locations
59
+ - Performance suggestions → ONE "Performance Notes" section
60
+ - Installation suggestions → ONE "Installation" section (only if appropriate context)
61
+
62
+ 6. ALIGN WITH EVALUATION CRITERIA (CONTEXT-AWARE)
63
+ - Readability: Simplify language, add definitions
64
+ - Coverage: ADD missing sections ONLY if contextually appropriate
65
+ * Tutorial context: Add usage examples, analysis steps, not installation
66
+ * README context: Add prerequisites, setup, installation
67
+ * Documentation context: Add comprehensive guides
68
+ - Reproducibility: Add versions, data sources, expected outputs
69
+ - Structure: Add headers, TOC, but DON'T reorganize existing structure
70
+ - Code Quality: Fix hardcoded paths, add error handling
71
+ - Result Verification: Add expected output examples
72
+ - Performance: Add ONE section with specific hardware/runtime numbers
73
+
74
+ 7. RESPECT EVALUATION SCORES
75
+ - Excellent: Minimal changes only
76
+ - Good: Add specific details where missing
77
+ - Fair: Add missing sections, provide specifics (if contextually appropriate)
78
+ - Poor: Major additions but NEVER delete existing content
79
+
80
+ STYLE & FORMATTING
81
+ - Preserve existing tone and style markers: {tone_markers}
82
+ - Use heading style "{heading_style}" and list style "{list_style}"
83
+ - Link style "{link_style}"
84
+ - Plain markdown only (no code fences around entire output)
85
+ - No meta-commentary, no concluding remarks
86
+ - No generic filler text or marketing language
87
+
88
+ OUTPUT
89
+ Return ONLY the improved section content that:
90
+ 1. Addresses the specific evaluation feedback: {guidance}
91
+ 2. Provides concrete, actionable information
92
+ 3. Respects the original text context (if provided)
93
+ 4. Fits in the document's existing structure
94
+ 5. Is contextually appropriate for the document type
95
+ 6. Stops immediately after content (no conclusions)
96
+ """
97
+
98
+ LLM_FULLDOC_PROMPT = """
99
+ You are "BioGuider," enhancing complete documentation based on systematic evaluation.
100
+
101
+ GOAL
102
+ Enhance an EXISTING document by implementing ALL improvements from evaluation report.
103
+ Output a complete, enhanced, ready-to-publish markdown file.
104
+
105
+ INPUTS
106
+ - evaluation_report (structured feedback): <<{evaluation_report}>>
107
+ - target_file: {target_file}
108
+ - repo_context_excerpt: <<{context}>>
109
+ - original_document: <<{original_content}>>
110
+ - total_suggestions: {total_suggestions}
111
+
112
+ CRITICAL RULES
113
+
114
+ 1. PRESERVE EXISTING STRUCTURE & CONTENT
115
+ - Keep EVERY existing section in original order
116
+ - Keep EVERY code block (including <details>, <summary> tags)
117
+ - Keep ALL existing explanations, examples, text
118
+ - Keep ALL YAML frontmatter, metadata
119
+ - NEVER delete ANY sections, paragraphs, or code
120
+ - NEVER reorganize sections or change order
121
+ - NEVER remove HTML tags (<details>, <summary>, etc.)
122
+
123
+ 2. SYSTEMATIC INTEGRATION OF ALL {total_suggestions} SUGGESTIONS
124
+ - Read ALL {total_suggestions} suggestions from evaluation
125
+ - Group by evaluation category (Readability, Coverage, etc.)
126
+ - Map each suggestion to WHERE it belongs in ORIGINAL document
127
+ - Group related suggestions into ONE section (don't scatter)
128
+ - Make ONE pass through document applying ALL enhancements
129
+
130
+ 3. SHOW, DON'T TELL
131
+ - Provide SPECIFIC details (numbers, versions, commands)
132
+ - NEVER write generic statements like "Ensure adequate resources"
133
+ - If evaluation asks for hardware: provide actual RAM/CPU numbers
134
+ - If evaluation asks for dependencies: list exact package versions
135
+ - If evaluation asks for validation: show actual code with expected output
136
+
137
+ 4. BIOLOGICAL CORRECTNESS & RELEVANCE
138
+ - Use accurate biological terminology and concepts
139
+ - Provide biologically meaningful examples and explanations
140
+ - Ensure suggestions align with current biological knowledge
141
+ - Use appropriate biological context for the software domain
142
+ - Avoid generic or incorrect biological statements
143
+ - Focus on biologically relevant use cases and applications
144
+
145
+ 5. RESPECT EVALUATION SCORES
146
+ - Excellent: Minimal changes only
147
+ - Good: Add specific details where missing
148
+ - Fair: Add missing sections, provide specifics
149
+ - Poor: Major additions but NEVER delete existing content
150
+
151
+ 5. HANDLE SPECIFIC EVALUATION CATEGORIES (CONTEXT-AWARE)
152
+ - Readability: Simplify language, add definitions
153
+ - Coverage: ADD missing sections ONLY if contextually appropriate
154
+ * Tutorial context: Add usage examples, analysis steps, not installation
155
+ * README context: Add prerequisites, setup, installation
156
+ * Documentation context: Add comprehensive guides
157
+ - Reproducibility: Add versions, data sources, expected outputs
158
+ - Structure: Add headers, TOC, but DON'T reorganize existing structure
159
+ - Code Quality: Fix hardcoded paths, add error handling
160
+ - Result Verification: Add expected output examples
161
+ - Performance: Add ONE section with specific hardware/runtime numbers
162
+
163
+ STRICT CONSTRAINTS
164
+ - NEVER invent hardware specs, version numbers, performance metrics without source
165
+ - If evaluation requests but context lacks data: provide reasonable defaults with caveats
166
+ - ABSOLUTELY FORBIDDEN: Wrapping entire output in ```markdown fences
167
+ - ABSOLUTELY FORBIDDEN: Adding conclusions, summaries, or wrap-up paragraphs at end
168
+ - ABSOLUTELY FORBIDDEN: Deleting ANY existing content
169
+ - ABSOLUTELY FORBIDDEN: Reorganizing major sections
170
+ - REQUIRED: Stop immediately after last section from original
171
+ - REQUIRED: Preserve ALL metadata (YAML frontmatter, etc.)
172
+
173
+ OUTPUT
174
+ Return the complete enhanced document for {target_file}.
175
+ - Pure markdown content only
176
+ - No meta-commentary, no fences
177
+ - Ready to copy-paste and publish
178
+ - All {total_suggestions} improvements integrated
179
+ - Original structure and content preserved
180
+ """
181
+
182
+ LLM_README_COMPREHENSIVE_PROMPT = """
183
+ You are "BioGuider," creating or enhancing README documentation.
184
+
185
+ GOAL
186
+ Create comprehensive, professional README that addresses all evaluation feedback.
187
+
188
+ INPUTS
189
+ - evaluation_report (structured feedback): <<{evaluation_report}>>
190
+ - target_file: {target_file}
191
+ - repo_context_excerpt: <<{context}>>
192
+ - original_readme (if exists): <<{original_content}>>
193
+
194
+ CRITICAL RULES
195
+
196
+ 1. SHOW, DON'T TELL
197
+ - Actual commands, not descriptions
198
+ - Specific versions, not "recent versions"
199
+ - Working examples, not pseudo-code
200
+
201
+ 2. ONE LOCATION PER TOPIC
202
+ - Dependencies → ONE section
203
+ - Installation → ONE section (with subsections if needed)
204
+ - Performance → ONE section (if applicable)
205
+
206
+ 3. SPECIFIC DATA ONLY
207
+ - Don't invent version numbers
208
+ - Don't invent system requirements
209
+ - Use what's in context or provide reasonable defaults with caveats
210
+
211
+ 4. PRESERVE EXISTING
212
+ - If README exists, enhance it
213
+ - Don't delete working content
214
+ - Keep existing structure if it's good
215
+
216
+ 5. BIOLOGICAL CORRECTNESS & RELEVANCE
217
+ - Use accurate biological terminology and concepts
218
+ - Provide biologically meaningful examples and explanations
219
+ - Ensure suggestions align with current biological knowledge
220
+ - Use appropriate biological context for the software domain
221
+ - Avoid generic or incorrect biological statements
222
+ - Focus on biologically relevant use cases and applications
223
+
224
+ 6. ADDRESS EVALUATION SUGGESTIONS
225
+ - Available: Create README with all essential sections
226
+ - Readability: Simplify complex sentences, add explanations
227
+ - Project Purpose: Add clear goal statement and key applications
228
+ - Hardware/Software Spec: Add specific system requirements
229
+ - Dependencies: List exact package versions
230
+ - License Information: State license type and link to LICENSE file
231
+ - Author/Contributor Info: Add credits and contact information
232
+
233
+ STANDARD README STRUCTURE
234
+ - Project name and description
235
+ - Overview with key applications
236
+ - Installation (prerequisites, commands, verification)
237
+ - Quick Start with working example
238
+ - Usage (basic and advanced examples)
239
+ - System Requirements (if applicable)
240
+ - Dependencies with versions
241
+ - Contributing guidelines
242
+ - License information
243
+ - Contact/maintainer info
244
+
245
+ STRICT CONSTRAINTS
246
+ - Don't add excessive badges, emojis, or marketing hype
247
+ - Do add clear installation instructions, working code examples
248
+ - Balance: comprehensive but concise
249
+ - Professional, neutral tone
250
+ - Proper markdown formatting
251
+
252
+ OUTPUT
253
+ Return complete README.md content.
254
+ - Pure markdown only
255
+ - No meta-commentary, no fences
256
+ - Professional, clear, actionable
257
+ - Ready to publish
258
+ - All evaluation suggestions addressed
259
+ """
260
+
261
+ # Continuation prompt template - used when document generation is truncated
262
+ LLM_CONTINUATION_PROMPT = """
263
+ You are "BioGuider," continuing a truncated document.
264
+
265
+ CRITICAL: This is STRICT CONTINUATION ONLY.
266
+ - You are NOT creating new content
267
+ - You are NOT adding conclusions
268
+ - You are ONLY completing missing sections from original
269
+
270
+ PREVIOUS CONTENT (do not repeat):
271
+ ```
272
+ {existing_content_tail}
273
+ ```
274
+
275
+ CONTINUATION PROCESS:
276
+ 1. Identify what is the last complete section above
277
+ 2. Identify what sections are missing from the original document structure
278
+ 3. Continue IMMEDIATELY from where content stopped
279
+ 4. Use same style, tone, format as existing content
280
+ 5. Add ONLY the missing sections from original structure
281
+ 6. Stop when original structure is complete
282
+
283
+ FORBIDDEN ADDITIONS:
284
+ - "## Conclusion" section
285
+ - "## Summary" section
286
+ - "## Additional Resources" section
287
+ - "For further guidance..." text
288
+ - Any wrap-up or concluding content
289
+ - Any content not in original document structure
290
+
291
+ OUTPUT:
292
+ Return ONLY continuation content that completes original structure.
293
+ - No commentary
294
+ - No fences
295
+ - No conclusions
296
+ - Stop immediately when structure is complete
297
+ """
298
+
299
+
300
+ class LLMContentGenerator:
301
+ def __init__(self, llm: BaseChatOpenAI):
302
+ self.llm = llm
303
+
304
+ def _detect_truncation(self, content: str, target_file: str, original_content: str = None) -> bool:
305
+ """
306
+ Detect if content appears to be truncated based on common patterns.
307
+ Universal detection for all file types.
308
+
309
+ Args:
310
+ content: Generated content to check
311
+ target_file: Target file path for context
312
+ original_content: Original content for comparison (if available)
313
+
314
+ Returns:
315
+ True if content appears truncated, False otherwise
316
+ """
317
+ if not content or len(content.strip()) < 100:
318
+ return True
319
+
320
+ # 1. Compare to original length if available (most reliable indicator)
321
+ if original_content:
322
+ original_len = len(original_content)
323
+ generated_len = len(content)
324
+ # If generated content is significantly shorter than original (< 80%), likely truncated
325
+ if generated_len < original_len * 0.8:
326
+ return True
327
+
328
+ # 2. Check for very short content (applies to all files)
329
+ # Only flag as truncated if content is very short (< 500 chars)
330
+ if len(content) < 500:
331
+ return True
332
+
333
+ # 3. Check for incomplete code blocks (any language)
334
+ # Count opening and closing code fences
335
+ code_fence_count = content.count('```')
336
+ if code_fence_count > 0 and code_fence_count % 2 != 0:
337
+ # Unbalanced code fences suggest truncation
338
+ return True
339
+
340
+ # 4. Check for specific language code blocks
341
+ if target_file.endswith('.Rmd'):
342
+ # R chunks should be complete
343
+ r_chunks_open = re.findall(r'```\{r[^}]*\}', content)
344
+ if r_chunks_open and not content.rstrip().endswith('```'):
345
+ # Has R chunks but doesn't end with closing fence
346
+ return True
347
+
348
+ if target_file.endswith(('.py', '.js', '.ts', '.java', '.cpp', '.c')):
349
+ # Check for incomplete class/function definitions
350
+ lines = content.split('\n')
351
+ last_lines = [line.strip() for line in lines[-5:] if line.strip()]
352
+ if last_lines:
353
+ last_line = last_lines[-1]
354
+ if (last_line.endswith(':') or
355
+ last_line.endswith('{') or
356
+ last_line.endswith('(') or
357
+ 'def ' in last_line or
358
+ 'class ' in last_line or
359
+ 'function ' in last_line):
360
+ return True
361
+
362
+ # 4. Check for incomplete markdown sections (applies to all markdown-like files)
363
+ if any(target_file.endswith(ext) for ext in ['.md', '.Rmd', '.rst', '.txt']):
364
+ lines = content.split('\n')
365
+ last_non_empty_line = None
366
+ for line in reversed(lines):
367
+ if line.strip():
368
+ last_non_empty_line = line.strip()
369
+ break
370
+
371
+ if last_non_empty_line:
372
+ # Check if last line looks incomplete
373
+ incomplete_endings = [
374
+ '##', # Header without content
375
+ '###', # Header without content
376
+ '####', # Header without content
377
+ '-', # List item
378
+ '*', # List item or emphasis
379
+ ':', # Definition or label
380
+ '|', # Table row
381
+ ]
382
+
383
+ for ending in incomplete_endings:
384
+ if last_non_empty_line.endswith(ending):
385
+ return True
386
+
387
+ # Check if ends with incomplete patterns
388
+ content_end = content[-300:].strip().lower()
389
+ incomplete_patterns = [
390
+ '## ', # Section header without content
391
+ '### ', # Subsection without content
392
+ '#### ', # Sub-subsection without content
393
+ '```{', # Incomplete code chunk
394
+ '```r', # Incomplete R chunk
395
+ '```python',# Incomplete Python chunk
396
+ ]
397
+
398
+ for pattern in incomplete_patterns:
399
+ if content_end.endswith(pattern.lower()):
400
+ return True
401
+
402
+ return False
403
+
404
+ def _find_continuation_point(self, content: str, original_content: str = None) -> str:
405
+ """
406
+ Find a better continuation point than just the last 1000 characters.
407
+ Looks for the last complete section or code block to continue from.
408
+
409
+ Args:
410
+ content: The generated content so far
411
+ original_content: The original content for comparison
412
+
413
+ Returns:
414
+ A suitable continuation point, or None if not found
415
+ """
416
+ if not content:
417
+ return None
418
+
419
+ lines = content.split('\n')
420
+ if len(lines) < 10: # Too short to find good continuation point
421
+ return None
422
+
423
+ # Strategy 1: Find the last complete section (header with content after it)
424
+ for i in range(len(lines) - 1, -1, -1):
425
+ line = lines[i].strip()
426
+ if line.startswith('## ') and i + 1 < len(lines):
427
+ # Check if there's content after this header
428
+ next_lines = []
429
+ for j in range(i + 1, min(i + 10, len(lines))): # Look at next 10 lines
430
+ if lines[j].strip() and not lines[j].strip().startswith('##'):
431
+ next_lines.append(lines[j])
432
+ else:
433
+ break
434
+
435
+ if next_lines: # Found header with content after it
436
+ # Return from this header onwards
437
+ return '\n'.join(lines[i:])
438
+
439
+ # Strategy 2: Find the last complete code block
440
+ in_code_block = False
441
+ code_block_start = -1
442
+
443
+ for i in range(len(lines) - 1, -1, -1):
444
+ line = lines[i].strip()
445
+ if line.startswith('```') and not in_code_block:
446
+ in_code_block = True
447
+ code_block_start = i
448
+ elif line.startswith('```') and in_code_block:
449
+ # Found complete code block
450
+ return '\n'.join(lines[code_block_start:])
451
+
452
+ # Strategy 3: Find last complete paragraph (ends with period)
453
+ for i in range(len(lines) - 1, -1, -1):
454
+ line = lines[i].strip()
455
+ if line and line.endswith('.') and not line.startswith('#') and not line.startswith('```'):
456
+ # Found a complete sentence, return from there
457
+ return '\n'.join(lines[i:])
458
+
459
+ # Strategy 4: If original content is available, find where the generated content diverges
460
+ if original_content:
461
+ # Simple approach: find the longest common suffix
462
+ min_len = min(len(content), len(original_content))
463
+ common_length = 0
464
+
465
+ for i in range(1, min_len + 1):
466
+ if content[-i:] == original_content[-i:]:
467
+ common_length = i
468
+ else:
469
+ break
470
+
471
+ if common_length > 100: # Found significant common ending
472
+ return content[-(common_length + 100):] # Include some context
473
+
474
+ return None
475
+
476
+ def _appears_complete(self, content: str, target_file: str, original_content: str = None) -> bool:
477
+ """
478
+ Check if content appears to be complete based on structure, patterns, AND original length.
479
+ Universal completion check for all file types.
480
+
481
+ CRITICAL: If original_content is provided, generated content MUST be at least 90% of original length
482
+ to be considered complete, regardless of other heuristics. This prevents the LLM from fooling us
483
+ with fake conclusions.
484
+
485
+ Args:
486
+ content: Generated content to check
487
+ target_file: Target file path for context
488
+ original_content: Original content for length comparison (optional but recommended)
489
+
490
+ Returns:
491
+ True if content appears complete, False if it needs continuation
492
+ """
493
+ if not content or len(content.strip()) < 100:
494
+ return False
495
+
496
+ # CRITICAL: If original content is provided, check length ratio first
497
+ # This prevents the LLM from fooling us with fake conclusions
498
+ if original_content and isinstance(original_content, str):
499
+ generated_len = len(content)
500
+ original_len = len(original_content)
501
+ if generated_len < original_len * 0.9:
502
+ # Generated content is too short compared to original - NOT complete
503
+ return False
504
+
505
+ # 1. Check for balanced code blocks (applies to all files)
506
+ code_block_count = content.count('```')
507
+ if code_block_count > 0 and code_block_count % 2 != 0:
508
+ # Unbalanced code blocks suggest incomplete
509
+ return False
510
+
511
+ # 2. File type specific checks
512
+
513
+ # RMarkdown files
514
+ if target_file.endswith('.Rmd'):
515
+ # Check for proper YAML frontmatter
516
+ if not content.startswith('---'):
517
+ return False
518
+
519
+ # Check for conclusion patterns
520
+ conclusion_patterns = [
521
+ 'sessionInfo()',
522
+ 'session.info()',
523
+ '## Conclusion',
524
+ '## Summary',
525
+ '## Session Info',
526
+ '</details>',
527
+ 'knitr::knit(',
528
+ ]
529
+
530
+ content_lower = content.lower()
531
+ has_conclusion = any(pattern.lower() in content_lower for pattern in conclusion_patterns)
532
+
533
+ # If we have a conclusion and balanced code blocks, likely complete
534
+ if has_conclusion and code_block_count > 0:
535
+ return True
536
+
537
+ # Markdown files
538
+ if target_file.endswith('.md'):
539
+ # Check for conclusion sections
540
+ conclusion_patterns = [
541
+ '## Conclusion',
542
+ '## Summary',
543
+ '## Next Steps',
544
+ '## Further Reading',
545
+ '## References',
546
+ '## License',
547
+ ]
548
+
549
+ content_lower = content.lower()
550
+ has_conclusion = any(pattern.lower() in content_lower for pattern in conclusion_patterns)
551
+
552
+ if has_conclusion and len(content) > 2000:
553
+ return True
554
+
555
+ # Python files
556
+ if target_file.endswith('.py'):
557
+ # Check for balanced brackets/parentheses
558
+ if content.count('(') != content.count(')'):
559
+ return False
560
+ if content.count('[') != content.count(']'):
561
+ return False
562
+ if content.count('{') != content.count('}'):
563
+ return False
564
+
565
+ # Check for complete structure (reasonable length + proper ending)
566
+ lines = [line for line in content.split('\n') if line.strip()]
567
+ if len(lines) > 20: # Has reasonable content
568
+ last_line = lines[-1].strip()
569
+ # Should not end with incomplete statements
570
+ if not (last_line.endswith(':') or
571
+ last_line.endswith('\\') or
572
+ last_line.endswith(',')):
573
+ return True
574
+
575
+ # JavaScript/TypeScript files
576
+ if target_file.endswith(('.js', '.ts', '.jsx', '.tsx')):
577
+ # Check for balanced brackets
578
+ if content.count('{') != content.count('}'):
579
+ return False
580
+ if content.count('(') != content.count(')'):
581
+ return False
582
+
583
+ lines = [line for line in content.split('\n') if line.strip()]
584
+ if len(lines) > 20:
585
+ last_line = lines[-1].strip()
586
+ # Complete if ends with proper syntax
587
+ if (last_line.endswith('}') or
588
+ last_line.endswith(';') or
589
+ last_line.endswith('*/') or
590
+ last_line.startswith('//')):
591
+ return True
592
+
593
+ # 3. Generic checks for all file types
594
+ if len(content) > 3000: # Reasonable length
595
+ # Check if it ends with complete sentences/sections
596
+ lines = content.split('\n')
597
+ last_lines = [line.strip() for line in lines[-10:] if line.strip()]
598
+
599
+ if last_lines:
600
+ last_line = last_lines[-1]
601
+ # Complete if ends with proper punctuation or closing tags
602
+ complete_endings = [
603
+ '.', # Sentence
604
+ '```', # Code block
605
+ '---', # Section divider
606
+ '</details>', # HTML details
607
+ '}', # Closing brace
608
+ ';', # Statement end
609
+ '*/', # Comment end
610
+ ]
611
+
612
+ if any(last_line.endswith(ending) for ending in complete_endings):
613
+ return True
614
+
615
+ return False
616
+
617
+ def _generate_continuation(self, target_file: str, evaluation_report: dict,
618
+ context: str, existing_content: str) -> tuple[str, dict]:
619
+ """
620
+ Generate continuation content from where previous generation left off.
621
+
622
+ Args:
623
+ target_file: Target file path
624
+ evaluation_report: Evaluation report data
625
+ context: Repository context
626
+ existing_content: Previously generated content
627
+
628
+ Returns:
629
+ Tuple of (continuation_content, token_usage)
630
+ """
631
+ # Create LLM for continuation (uses 16k tokens by default)
632
+ from bioguider.agents.agent_utils import get_llm
633
+ import os
634
+
635
+ llm = get_llm(
636
+ api_key=os.environ.get("OPENAI_API_KEY"),
637
+ model_name=os.environ.get("OPENAI_MODEL", "gpt-4o"),
638
+ azure_endpoint=os.environ.get("AZURE_OPENAI_ENDPOINT"),
639
+ api_version=os.environ.get("OPENAI_API_VERSION"),
640
+ azure_deployment=os.environ.get("OPENAI_DEPLOYMENT_NAME"),
641
+ )
642
+
643
+ conv = CommonConversation(llm)
644
+
645
+ # Calculate total suggestions for the prompt
646
+ total_suggestions = 1
647
+ if isinstance(evaluation_report, dict):
648
+ if "total_suggestions" in evaluation_report:
649
+ total_suggestions = evaluation_report["total_suggestions"]
650
+ elif "suggestions" in evaluation_report and isinstance(evaluation_report["suggestions"], list):
651
+ total_suggestions = len(evaluation_report["suggestions"])
652
+
653
+ # Use the centralized continuation prompt template
654
+ continuation_prompt = LLM_CONTINUATION_PROMPT.format(
655
+ target_file=target_file,
656
+ existing_content_tail=existing_content[-1000:], # Last 1000 chars for context
657
+ total_suggestions=total_suggestions,
658
+ evaluation_report_excerpt=json.dumps(evaluation_report)[:4000],
659
+ context_excerpt=context[:2000],
660
+ )
661
+
662
+ content, token_usage = conv.generate(
663
+ system_prompt=continuation_prompt,
664
+ instruction_prompt="Continue the document from where it left off."
665
+ )
666
+ return content.strip(), token_usage
667
+
668
+ def generate_section(self, suggestion: SuggestionItem, style: StyleProfile, context: str = "") -> tuple[str, dict]:
669
+ conv = CommonConversation(self.llm)
670
+ section_name = suggestion.anchor_hint or suggestion.category.split(".")[-1].replace("_", " ").title()
671
+
672
+ # Extract original text snippet and evaluation score from suggestion source
673
+ original_text = ""
674
+ evaluation_score = ""
675
+ if hasattr(suggestion, 'source') and suggestion.source:
676
+ original_text = suggestion.source.get('original_text', '')
677
+ evaluation_score = suggestion.source.get('score', '')
678
+
679
+ # Detect document context to help with appropriate responses
680
+ document_context = self._detect_document_context(context, suggestion.anchor_title or "")
681
+
682
+ system_prompt = LLM_SECTION_PROMPT.format(
683
+ tone_markers=", ".join(style.tone_markers or []),
684
+ heading_style=style.heading_style,
685
+ list_style=style.list_style,
686
+ link_style=style.link_style,
687
+ section=section_name,
688
+ anchor_title=section_name,
689
+ suggestion_category=suggestion.category,
690
+ original_text=original_text,
691
+ evaluation_score=evaluation_score,
692
+ context=context[:2500],
693
+ guidance=(suggestion.content_guidance or "").strip(),
694
+ )
695
+
696
+ # Add context-aware instruction
697
+ context_instruction = f"\n\nCONTEXT DETECTED: {document_context}\n"
698
+ if document_context == "TUTORIAL":
699
+ context_instruction += "Focus on usage/analysis steps, NOT installation. Users already have software installed.\n"
700
+ elif document_context == "README":
701
+ context_instruction += "Focus on installation, setup, and getting started. Users need to install software.\n"
702
+ elif document_context == "BIOLOGICAL":
703
+ context_instruction += "Use accurate biological terminology and provide biologically meaningful examples.\n"
704
+
705
+ system_prompt += context_instruction
706
+ content, token_usage = conv.generate(system_prompt=system_prompt, instruction_prompt="Write the section content now.")
707
+ return content.strip(), token_usage
708
+
709
+ def generate_full_document(self, target_file: str, evaluation_report: dict, context: str = "", original_content: str = None) -> tuple[str, dict]:
710
+ # Create LLM (uses 16k tokens by default - enough for any document)
711
+ from bioguider.agents.agent_utils import get_llm
712
+ import os
713
+ import json
714
+ from datetime import datetime
715
+
716
+ # Get LLM with default 16k token limit
717
+ llm = get_llm(
718
+ api_key=os.environ.get("OPENAI_API_KEY"),
719
+ model_name=os.environ.get("OPENAI_MODEL", "gpt-4o"),
720
+ azure_endpoint=os.environ.get("AZURE_OPENAI_ENDPOINT"),
721
+ api_version=os.environ.get("OPENAI_API_VERSION"),
722
+ azure_deployment=os.environ.get("OPENAI_DEPLOYMENT_NAME"),
723
+ )
724
+
725
+ conv = CommonConversation(llm)
726
+
727
+ # Debug: Save generation settings and context
728
+ debug_info = {
729
+ "target_file": target_file,
730
+ "timestamp": datetime.now().isoformat(),
731
+ "evaluation_report": evaluation_report,
732
+ "context_length": len(context),
733
+ "llm_settings": {
734
+ "model_name": os.environ.get("OPENAI_MODEL", "gpt-4o"),
735
+ "azure_deployment": os.environ.get("OPENAI_DEPLOYMENT_NAME"),
736
+ "max_tokens": getattr(llm, 'max_tokens', 16384)
737
+ }
738
+ }
739
+
740
+ # Save debug info to file
741
+ debug_dir = "outputs/debug_generation"
742
+ os.makedirs(debug_dir, exist_ok=True)
743
+ safe_filename = target_file.replace("/", "_").replace(".", "_")
744
+ debug_file = os.path.join(debug_dir, f"{safe_filename}_debug.json")
745
+ with open(debug_file, 'w', encoding='utf-8') as f:
746
+ json.dump(debug_info, f, indent=2, ensure_ascii=False)
747
+
748
+ # Debug: Save raw evaluation_report to see what's being serialized
749
+ eval_report_file = os.path.join(debug_dir, f"{safe_filename}_raw_eval_report.json")
750
+ with open(eval_report_file, 'w', encoding='utf-8') as f:
751
+ json.dump(evaluation_report, f, indent=2, ensure_ascii=False)
752
+
753
+ # Use comprehensive README prompt for README.md files
754
+ if target_file.endswith("README.md"):
755
+ system_prompt = LLM_README_COMPREHENSIVE_PROMPT.format(
756
+ target_file=target_file,
757
+ evaluation_report=json.dumps(evaluation_report)[:6000],
758
+ context=context[:4000],
759
+ original_content=original_content or "",
760
+ )
761
+ else:
762
+ # Calculate total suggestions for the prompt
763
+ total_suggestions = 1
764
+ if isinstance(evaluation_report, dict):
765
+ if "total_suggestions" in evaluation_report:
766
+ total_suggestions = evaluation_report["total_suggestions"]
767
+ elif "suggestions" in evaluation_report and isinstance(evaluation_report["suggestions"], list):
768
+ total_suggestions = len(evaluation_report["suggestions"])
769
+
770
+ system_prompt = LLM_FULLDOC_PROMPT.format(
771
+ target_file=target_file,
772
+ evaluation_report=json.dumps(evaluation_report)[:6000],
773
+ context=context[:4000],
774
+ original_content=original_content or "",
775
+ total_suggestions=total_suggestions,
776
+ )
777
+
778
+ # Save initial prompt for debugging
779
+ prompt_file = os.path.join(debug_dir, f"{safe_filename}_prompt.txt")
780
+ with open(prompt_file, 'w', encoding='utf-8') as f:
781
+ f.write("=== SYSTEM PROMPT ===\n")
782
+ f.write(system_prompt)
783
+ f.write("\n\n=== INSTRUCTION PROMPT ===\n")
784
+ f.write("Write the full document now.")
785
+ # Context is already embedded in system prompt; avoid duplicating here
786
+
787
+ # Initial generation
788
+ # If the original document is long (RMarkdown > 8k chars), avoid truncation by chunked rewrite
789
+ # Lower threshold from 12k to 8k to catch more documents that would otherwise truncate
790
+ use_chunked = bool(target_file.endswith('.Rmd') and isinstance(original_content, str) and len(original_content) > 8000)
791
+ if use_chunked:
792
+ content, token_usage = self._generate_full_document_chunked(
793
+ target_file=target_file,
794
+ evaluation_report=evaluation_report,
795
+ context=context,
796
+ original_content=original_content or "",
797
+ debug_dir=debug_dir,
798
+ safe_filename=safe_filename,
799
+ )
800
+ else:
801
+ content, token_usage = conv.generate(system_prompt=system_prompt, instruction_prompt="Write the full document now.")
802
+ content = content.strip()
803
+
804
+ # Save initial generation for debugging
805
+ generation_file = os.path.join(debug_dir, f"{safe_filename}_generation_0.txt")
806
+ with open(generation_file, 'w', encoding='utf-8') as f:
807
+ f.write(f"=== INITIAL GENERATION ===\n")
808
+ f.write(f"Tokens: {token_usage}\n")
809
+ f.write(f"Length: {len(content)} characters\n")
810
+ if original_content:
811
+ f.write(f"Original length: {len(original_content)} characters\n")
812
+ f.write(f"Truncation detected: {self._detect_truncation(content, target_file, original_content)}\n")
813
+ f.write(f"\n=== CONTENT ===\n")
814
+ f.write(content)
815
+
816
+ # Check for truncation and continue if needed
817
+ max_continuations = 3 # Limit to prevent infinite loops
818
+ continuation_count = 0
819
+
820
+ while (not use_chunked and self._detect_truncation(content, target_file, original_content) and
821
+ continuation_count < max_continuations):
822
+
823
+ # Additional check: if content appears complete, don't continue
824
+ # Pass original_content so we can check length ratio
825
+ if self._appears_complete(content, target_file, original_content):
826
+ break
827
+ continuation_count += 1
828
+
829
+ # Calculate total suggestions for debugging info
830
+ total_suggestions = 1
831
+ if isinstance(evaluation_report, dict):
832
+ if "total_suggestions" in evaluation_report:
833
+ total_suggestions = evaluation_report["total_suggestions"]
834
+ elif "suggestions" in evaluation_report and isinstance(evaluation_report["suggestions"], list):
835
+ total_suggestions = len(evaluation_report["suggestions"])
836
+
837
+ # Find better continuation point - look for last complete section
838
+ continuation_point = self._find_continuation_point(content, original_content)
839
+ if not continuation_point:
840
+ continuation_point = content[-1000:] # Fallback to last 1000 chars
841
+
842
+ # Generate continuation prompt using centralized template
843
+ continuation_prompt = LLM_CONTINUATION_PROMPT.format(
844
+ target_file=target_file,
845
+ existing_content_tail=continuation_point,
846
+ total_suggestions=total_suggestions,
847
+ evaluation_report_excerpt=json.dumps(evaluation_report)[:4000],
848
+ context_excerpt=context[:2000],
849
+ )
850
+
851
+ # Save continuation prompt for debugging
852
+ continuation_prompt_file = os.path.join(debug_dir, f"{safe_filename}_continuation_{continuation_count}_prompt.txt")
853
+ with open(continuation_prompt_file, 'w', encoding='utf-8') as f:
854
+ f.write(continuation_prompt)
855
+
856
+ # Generate continuation
857
+ continuation_content, continuation_usage = self._generate_continuation(
858
+ target_file=target_file,
859
+ evaluation_report=evaluation_report,
860
+ context=context,
861
+ existing_content=content
862
+ )
863
+
864
+ # Save continuation generation for debugging
865
+ continuation_file = os.path.join(debug_dir, f"{safe_filename}_continuation_{continuation_count}.txt")
866
+ with open(continuation_file, 'w', encoding='utf-8') as f:
867
+ f.write(f"=== CONTINUATION {continuation_count} ===\n")
868
+ f.write(f"Tokens: {continuation_usage}\n")
869
+ f.write(f"Length: {len(continuation_content)} characters\n")
870
+ f.write(f"Truncation detected: {self._detect_truncation(continuation_content, target_file)}\n")
871
+ f.write(f"\n=== CONTENT ===\n")
872
+ f.write(continuation_content)
873
+
874
+ # Merge continuation with existing content
875
+ if continuation_content:
876
+ content += "\n\n" + continuation_content
877
+ # Update token usage
878
+ token_usage = {
879
+ "total_tokens": token_usage.get("total_tokens", 0) + continuation_usage.get("total_tokens", 0),
880
+ "prompt_tokens": token_usage.get("prompt_tokens", 0) + continuation_usage.get("prompt_tokens", 0),
881
+ "completion_tokens": token_usage.get("completion_tokens", 0) + continuation_usage.get("completion_tokens", 0),
882
+ }
883
+
884
+ # Save merged content for debugging
885
+ merged_file = os.path.join(debug_dir, f"{safe_filename}_merged_{continuation_count}.txt")
886
+ with open(merged_file, 'w', encoding='utf-8') as f:
887
+ f.write(f"=== MERGED CONTENT AFTER CONTINUATION {continuation_count} ===\n")
888
+ f.write(f"Total length: {len(content)} characters\n")
889
+ f.write(f"Truncation detected: {self._detect_truncation(content, target_file)}\n")
890
+ f.write(f"\n=== CONTENT ===\n")
891
+ f.write(content)
892
+ else:
893
+ # If continuation is empty, break to avoid infinite loop
894
+ break
895
+
896
+ # Clean up any markdown code fences that might have been added
897
+ content = self._clean_markdown_fences(content)
898
+
899
+ # Save final cleaned content for debugging
900
+ final_file = os.path.join(debug_dir, f"{safe_filename}_final.txt")
901
+ with open(final_file, 'w', encoding='utf-8') as f:
902
+ f.write(f"=== FINAL CLEANED CONTENT ===\n")
903
+ f.write(f"Total tokens: {token_usage}\n")
904
+ f.write(f"Final length: {len(content)} characters\n")
905
+ f.write(f"Continuations used: {continuation_count}\n")
906
+ f.write(f"\n=== CONTENT ===\n")
907
+ f.write(content)
908
+
909
+ return content, token_usage
910
+
911
+ def _clean_markdown_fences(self, content: str) -> str:
912
+ """
913
+ Remove markdown code fences that shouldn't be in the final content.
914
+ """
915
+ # Remove ```markdown at the beginning
916
+ if content.startswith('```markdown\n'):
917
+ content = content[12:] # Remove ```markdown\n
918
+
919
+ # Remove ``` at the end
920
+ if content.endswith('\n```'):
921
+ content = content[:-4] # Remove \n```
922
+ elif content.endswith('```'):
923
+ content = content[:-3] # Remove ```
924
+
925
+ # Remove any standalone ```markdown lines
926
+ lines = content.split('\n')
927
+ cleaned_lines = []
928
+ for line in lines:
929
+ if line.strip() == '```markdown':
930
+ continue
931
+ cleaned_lines.append(line)
932
+
933
+ return '\n'.join(cleaned_lines)
934
+
935
+ def _split_rmd_into_chunks(self, content: str) -> list[dict]:
936
+ """
937
+ Split RMarkdown content into chunks for processing.
938
+
939
+ CRITICAL: This function must correctly identify code blocks to preserve them.
940
+ Code blocks in RMarkdown start with ```{r...} or ``` and end with ```.
941
+
942
+ Returns list of dicts with 'type' (yaml/code/text) and 'content'.
943
+ """
944
+ chunks = []
945
+ if not content:
946
+ return chunks
947
+ lines = content.split('\n')
948
+ n = len(lines)
949
+ i = 0
950
+
951
+ # Handle YAML frontmatter
952
+ if n >= 3 and lines[0].strip() == '---':
953
+ j = 1
954
+ while j < n and lines[j].strip() != '---':
955
+ j += 1
956
+ if j < n and lines[j].strip() == '---':
957
+ chunks.append({"type": "yaml", "content": '\n'.join(lines[0:j+1])})
958
+ i = j + 1
959
+
960
+ buffer = []
961
+ in_code = False
962
+
963
+ for k in range(i, n):
964
+ line = lines[k]
965
+ # Check for code fence - must be at start of line (possibly with whitespace)
966
+ stripped = line.strip()
967
+
968
+ # Detect code fence opening: ``` or ```{r...} or ```python etc
969
+ is_code_fence = stripped.startswith('```')
970
+
971
+ if is_code_fence:
972
+ if in_code:
973
+ # This is a closing fence
974
+ buffer.append(line)
975
+ chunks.append({"type": "code", "content": '\n'.join(buffer)})
976
+ buffer = []
977
+ in_code = False
978
+ else:
979
+ # This is an opening fence
980
+ # Save any accumulated text first
981
+ if buffer and any(s.strip() for s in buffer):
982
+ chunks.append({"type": "text", "content": '\n'.join(buffer)})
983
+ # Start new code block with the opening fence
984
+ buffer = [line]
985
+ in_code = True
986
+ else:
987
+ buffer.append(line)
988
+
989
+ # Handle remaining buffer
990
+ if buffer and any(s.strip() for s in buffer):
991
+ if in_code:
992
+ # Unclosed code block - this is an error but add it anyway
993
+ print(f"WARNING: Unclosed code block detected in RMarkdown")
994
+ chunks.append({"type": "code", "content": '\n'.join(buffer)})
995
+ else:
996
+ chunks.append({"type": "text", "content": '\n'.join(buffer)})
997
+
998
+ # Validation: count code fences in chunks vs original
999
+ original_fences = len(re.findall(r'^```', content, flags=re.M))
1000
+ chunk_fences = 0
1001
+ for ch in chunks:
1002
+ if ch["type"] == "code":
1003
+ chunk_fences += len(re.findall(r'^```', ch["content"], flags=re.M))
1004
+
1005
+ if original_fences != chunk_fences:
1006
+ print(f"WARNING: Code fence count mismatch in chunking: original={original_fences}, chunks={chunk_fences}")
1007
+
1008
+ return chunks
1009
+
1010
+ def _generate_text_chunk(self, conv: CommonConversation, evaluation_report: dict, context: str, chunk_text: str) -> tuple[str, dict]:
1011
+ LLM_CHUNK_PROMPT = (
1012
+ "You are BioGuider improving a single TEXT chunk of a larger RMarkdown document.\n\n"
1013
+ "GOAL\nRefine ONLY the given chunk's prose per evaluation suggestions while preserving structure.\n"
1014
+ "Do not add conclusions or new sections.\n\n"
1015
+ "INPUTS\n- evaluation_report: <<{evaluation_report}>>\n- repo_context_excerpt: <<{context}>>\n- original_chunk:\n<<<\n{chunk}\n>>>\n\n"
1016
+ "CRITICAL RULES\n"
1017
+ "- This is a TEXT-ONLY chunk - do NOT add any code blocks or code fences (```).\n"
1018
+ "- Preserve all headers and formatting in this chunk.\n"
1019
+ "- Do not invent technical specs.\n"
1020
+ "- Output ONLY the refined text (no code fences, no markdown code blocks).\n"
1021
+ "- NEVER add ``` anywhere in your output.\n"
1022
+ "- Keep the same approximate length as the original chunk."
1023
+ )
1024
+ system_prompt = LLM_CHUNK_PROMPT.format(
1025
+ evaluation_report=json.dumps(evaluation_report)[:4000],
1026
+ context=context[:1500],
1027
+ chunk=chunk_text[:6000],
1028
+ )
1029
+ content, usage = conv.generate(system_prompt=system_prompt, instruction_prompt="Rewrite this text chunk now. Remember: NO code fences (```).")
1030
+
1031
+ # Post-processing: remove any code fences that may have been added
1032
+ output = content.strip()
1033
+
1034
+ # If output contains code fences, the LLM didn't follow instructions
1035
+ # Return original to preserve document structure
1036
+ if '```' in output:
1037
+ print(f"WARNING: LLM added code fences to text chunk, using original")
1038
+ return chunk_text, usage
1039
+
1040
+ return output, usage
1041
+
1042
+ def _generate_full_document_chunked(self, target_file: str, evaluation_report: dict, context: str, original_content: str, debug_dir: str, safe_filename: str) -> tuple[str, dict]:
1043
+ conv = CommonConversation(self.llm)
1044
+ chunks = self._split_rmd_into_chunks(original_content)
1045
+ merged = []
1046
+ total_usage = {"total_tokens": 0, "prompt_tokens": 0, "completion_tokens": 0}
1047
+ from datetime import datetime
1048
+
1049
+ # Save chunk analysis for debugging
1050
+ chunk_analysis_file = os.path.join(debug_dir, f"{safe_filename}_chunk_analysis.txt")
1051
+ with open(chunk_analysis_file, 'w', encoding='utf-8') as f:
1052
+ f.write(f"Total chunks: {len(chunks)}\n")
1053
+ for idx, ch in enumerate(chunks):
1054
+ f.write(f"Chunk {idx}: type={ch['type']}, length={len(ch['content'])}\n")
1055
+ if ch['type'] == 'code':
1056
+ f.write(f" First line: {ch['content'].split(chr(10))[0][:80]}\n")
1057
+
1058
+ for idx, ch in enumerate(chunks):
1059
+ if ch["type"] in ("yaml", "code"):
1060
+ # CRITICAL: Pass through code/yaml chunks EXACTLY as-is
1061
+ merged.append(ch["content"])
1062
+ continue
1063
+
1064
+ # For text chunks, try to improve but fall back to original if needed
1065
+ out, usage = self._generate_text_chunk(conv, evaluation_report, context, ch["content"])
1066
+
1067
+ # Validate the output doesn't contain code fence fragments that could break structure
1068
+ if not out or '```' in out:
1069
+ # If LLM added code fences in text chunk, it could break the document
1070
+ # Fall back to original text
1071
+ out = ch["content"]
1072
+
1073
+ merged.append(out)
1074
+ try:
1075
+ total_usage["total_tokens"] += int(usage.get("total_tokens", 0))
1076
+ total_usage["prompt_tokens"] += int(usage.get("prompt_tokens", 0))
1077
+ total_usage["completion_tokens"] += int(usage.get("completion_tokens", 0))
1078
+ except Exception:
1079
+ pass
1080
+ chunk_file = os.path.join(debug_dir, f"{safe_filename}_chunk_{idx}.txt")
1081
+ with open(chunk_file, 'w', encoding='utf-8') as f:
1082
+ f.write(f"=== CHUNK {idx} ({ch['type']}) at {datetime.now().isoformat()} ===\n")
1083
+ f.write(out)
1084
+
1085
+ content = '\n'.join(merged)
1086
+
1087
+ # CRITICAL: Validate code block structure is preserved
1088
+ original_fences = len(re.findall(r'^```', original_content, flags=re.M))
1089
+ generated_fences = len(re.findall(r'^```', content, flags=re.M))
1090
+
1091
+ if original_fences != generated_fences:
1092
+ # Code block structure was broken - log error and return original
1093
+ error_file = os.path.join(debug_dir, f"{safe_filename}_ERROR_codeblock_mismatch.txt")
1094
+ with open(error_file, 'w', encoding='utf-8') as f:
1095
+ f.write(f"ERROR: Code block count mismatch!\n")
1096
+ f.write(f"Original: {original_fences} code fences\n")
1097
+ f.write(f"Generated: {generated_fences} code fences\n")
1098
+ f.write(f"\nReturning original content to preserve structure.\n")
1099
+ print(f"WARNING: Code block structure broken for {target_file}, returning original content")
1100
+ return original_content, total_usage
1101
+
1102
+ return content, total_usage
1103
+
1104
+ def _detect_document_context(self, context: str, anchor_title: str) -> str:
1105
+ """Detect the document context to help with appropriate responses."""
1106
+ context_lower = context.lower()
1107
+ anchor_lower = anchor_title.lower()
1108
+
1109
+ # Check for tutorial context
1110
+ if any(keyword in context_lower for keyword in ['tutorial', 'vignette', 'example', 'workflow', 'step-by-step']):
1111
+ return "TUTORIAL"
1112
+
1113
+ # Check for README context
1114
+ if any(keyword in context_lower for keyword in ['readme', 'installation', 'setup', 'prerequisites']):
1115
+ return "README"
1116
+
1117
+ # Check for documentation context
1118
+ if any(keyword in context_lower for keyword in ['documentation', 'guide', 'manual', 'reference']):
1119
+ return "DOCUMENTATION"
1120
+
1121
+ # Check for biological context
1122
+ if any(keyword in context_lower for keyword in ['cell', 'gene', 'protein', 'dna', 'rna', 'genome', 'transcriptome', 'proteome', 'metabolome']):
1123
+ return "BIOLOGICAL"
1124
+
1125
+ # Default to general context
1126
+ return "GENERAL"
1127
+
1128
+