bioguider 0.2.31__py3-none-any.whl → 0.2.33__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of bioguider might be problematic. Click here for more details.
- bioguider/agents/agent_utils.py +41 -14
- bioguider/agents/evaluation_readme_task.py +4 -1
- bioguider/generation/llm_content_generator.py +614 -23
- bioguider/managers/evaluation_manager.py +0 -2
- bioguider/managers/generation_manager.py +161 -39
- bioguider/utils/code_structure_builder.py +7 -1
- bioguider/utils/r_file_handler.py +6 -4
- {bioguider-0.2.31.dist-info → bioguider-0.2.33.dist-info}/METADATA +1 -1
- {bioguider-0.2.31.dist-info → bioguider-0.2.33.dist-info}/RECORD +11 -11
- {bioguider-0.2.31.dist-info → bioguider-0.2.33.dist-info}/LICENSE +0 -0
- {bioguider-0.2.31.dist-info → bioguider-0.2.33.dist-info}/WHEEL +0 -0
bioguider/agents/agent_utils.py
CHANGED
|
@@ -27,6 +27,7 @@ from bioguider.utils.utils import clean_action_input
|
|
|
27
27
|
from ..utils.gitignore_checker import GitignoreChecker
|
|
28
28
|
from ..database.summarized_file_db import SummarizedFilesDb
|
|
29
29
|
from bioguider.agents.common_conversation import CommonConversation
|
|
30
|
+
from bioguider.rag.config import configs
|
|
30
31
|
|
|
31
32
|
logger = logging.getLogger(__name__)
|
|
32
33
|
|
|
@@ -66,8 +67,17 @@ def get_llm(
|
|
|
66
67
|
api_version: str=None,
|
|
67
68
|
azure_deployment: str=None,
|
|
68
69
|
temperature: float = 0.0,
|
|
69
|
-
max_tokens: int =
|
|
70
|
+
max_tokens: int = 16384, # Set high by default - enough for any document type
|
|
70
71
|
):
|
|
72
|
+
"""
|
|
73
|
+
Create an LLM instance with appropriate parameters based on model type and API version.
|
|
74
|
+
|
|
75
|
+
Handles parameter compatibility across different models and API versions:
|
|
76
|
+
- DeepSeek models: Use max_tokens parameter
|
|
77
|
+
- GPT models (newer): Use max_completion_tokens parameter
|
|
78
|
+
- GPT-5+: Don't support custom temperature (uses default)
|
|
79
|
+
"""
|
|
80
|
+
|
|
71
81
|
if model_name.startswith("deepseek"):
|
|
72
82
|
chat = ChatDeepSeek(
|
|
73
83
|
api_key=api_key,
|
|
@@ -76,23 +86,38 @@ def get_llm(
|
|
|
76
86
|
max_tokens=max_tokens,
|
|
77
87
|
)
|
|
78
88
|
elif model_name.startswith("gpt"):
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
89
|
+
# Base parameters common to all GPT models
|
|
90
|
+
llm_params = {
|
|
91
|
+
"api_key": api_key,
|
|
92
|
+
"azure_endpoint": azure_endpoint,
|
|
93
|
+
"api_version": api_version,
|
|
94
|
+
"azure_deployment": azure_deployment,
|
|
95
|
+
"model": model_name,
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
# Determine token limit parameter name based on API version
|
|
99
|
+
# Newer APIs (2024-08+) use max_completion_tokens instead of max_tokens
|
|
100
|
+
use_completion_tokens = api_version and api_version >= "2024-08-01-preview"
|
|
101
|
+
token_param = "max_completion_tokens" if use_completion_tokens else "max_tokens"
|
|
102
|
+
llm_params[token_param] = max_tokens
|
|
103
|
+
|
|
104
|
+
# Handle temperature parameter based on model capabilities
|
|
105
|
+
# GPT-5+ models don't support custom temperature values
|
|
106
|
+
supports_temperature = not any(restricted in model_name for restricted in ["gpt-5", "o1", "o3"])
|
|
107
|
+
if supports_temperature:
|
|
108
|
+
llm_params["temperature"] = temperature
|
|
109
|
+
|
|
110
|
+
chat = AzureChatOpenAI(**llm_params)
|
|
88
111
|
else:
|
|
89
|
-
raise ValueError("
|
|
90
|
-
|
|
112
|
+
raise ValueError(f"Unsupported model type: {model_name}")
|
|
113
|
+
|
|
114
|
+
# Validate the LLM instance with a simple test
|
|
91
115
|
try:
|
|
92
116
|
chat.invoke("Hi")
|
|
93
117
|
except Exception as e:
|
|
94
|
-
|
|
118
|
+
logger.error(f"Failed to initialize LLM {model_name}: {e}")
|
|
95
119
|
return None
|
|
120
|
+
|
|
96
121
|
return chat
|
|
97
122
|
|
|
98
123
|
def pretty_print(message, printout = True):
|
|
@@ -153,7 +178,9 @@ def read_directory(
|
|
|
153
178
|
return None
|
|
154
179
|
gitignore_checker = GitignoreChecker(
|
|
155
180
|
directory=dir_path,
|
|
156
|
-
gitignore_path=gitignore_path
|
|
181
|
+
gitignore_path=gitignore_path,
|
|
182
|
+
exclude_dir_patterns=configs["file_filters"]["excluded_dirs"],
|
|
183
|
+
exclude_file_patterns=configs["file_filters"]["excluded_files"],
|
|
157
184
|
)
|
|
158
185
|
files = gitignore_checker.check_files_and_folders(level=level)
|
|
159
186
|
return files
|
|
@@ -28,6 +28,7 @@ from bioguider.utils.constants import (
|
|
|
28
28
|
EvaluationREADMEResult,
|
|
29
29
|
)
|
|
30
30
|
from bioguider.utils.utils import increase_token_usage
|
|
31
|
+
from bioguider.rag.config import configs
|
|
31
32
|
|
|
32
33
|
logger = logging.getLogger(__name__)
|
|
33
34
|
|
|
@@ -638,7 +639,9 @@ class EvaluationREADMETask(EvaluationTask):
|
|
|
638
639
|
repo_path = self.repo_path
|
|
639
640
|
gitignore_path = Path(repo_path, ".gitignore")
|
|
640
641
|
gitignore_checker = GitignoreChecker(
|
|
641
|
-
directory=repo_path, gitignore_path=gitignore_path
|
|
642
|
+
directory=repo_path, gitignore_path=gitignore_path,
|
|
643
|
+
exclude_dir_patterns=configs["file_filters"]["excluded_dirs"],
|
|
644
|
+
exclude_file_patterns=configs["file_filters"]["excluded_files"],
|
|
642
645
|
)
|
|
643
646
|
found_readme_files = gitignore_checker.check_files_and_folders(
|
|
644
647
|
check_file_cb=lambda root_dir, relative_path: Path(relative_path).name.lower() in possible_readme_files,
|
|
@@ -2,6 +2,7 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
from typing import Dict
|
|
4
4
|
import json
|
|
5
|
+
import re
|
|
5
6
|
from langchain_openai.chat_models.base import BaseChatOpenAI
|
|
6
7
|
|
|
7
8
|
from bioguider.agents.common_conversation import CommonConversation
|
|
@@ -25,13 +26,14 @@ CRITICAL REQUIREMENTS
|
|
|
25
26
|
- Follow the guidance EXACTLY as provided: {guidance}
|
|
26
27
|
- Address the specific suggestions from the evaluation report precisely
|
|
27
28
|
- Do not deviate from the guidance or add unrelated content
|
|
28
|
-
- If guidance mentions specific packages, requirements, or details, include them
|
|
29
|
-
-
|
|
29
|
+
- If guidance mentions specific packages, requirements, or details, include them ONLY if they are explicitly stated - never invent or estimate
|
|
30
|
+
- Preserve the original file structure including frontmatter, code blocks, and existing headers
|
|
30
31
|
- NEVER generate generic placeholder content like "Clear 2–3 sentence summary" or "brief description"
|
|
32
|
+
- NEVER invent technical specifications (hardware requirements, version numbers, performance metrics) unless explicitly provided in guidance or context
|
|
31
33
|
- ABSOLUTELY FORBIDDEN: Do NOT add summary sections, notes, conclusions, or any text at the end of documents
|
|
32
34
|
- ABSOLUTELY FORBIDDEN: Do NOT wrap content in markdown code fences (```markdown). Return pure content only.
|
|
33
35
|
- ABSOLUTELY FORBIDDEN: Do NOT add phrases like "Happy analyzing!", "Ensure all dependencies are up-to-date", or any concluding statements
|
|
34
|
-
- ALWAYS use the specific guidance provided above to create concrete, actionable content
|
|
36
|
+
- ALWAYS use the specific guidance provided above to create concrete, actionable content based on evidence
|
|
35
37
|
|
|
36
38
|
STYLE & CONSTRAINTS
|
|
37
39
|
- Fix obvious errors in the content.
|
|
@@ -47,22 +49,22 @@ STYLE & CONSTRAINTS
|
|
|
47
49
|
- When targeting README content, do not rewrite the document title or header area; generate only the requested section body to be inserted below existing headers/badges.
|
|
48
50
|
|
|
49
51
|
SECTION GUIDELINES (follow guidance exactly)
|
|
50
|
-
- Dependencies: Include specific packages mentioned in guidance
|
|
51
|
-
- System Requirements: Include
|
|
52
|
-
- Hardware Requirements: Include RAM/CPU recommendations
|
|
52
|
+
- Dependencies: Include ONLY specific packages explicitly mentioned in guidance or found in repo context. Never invent package names or versions.
|
|
53
|
+
- System Requirements: Include ONLY language/runtime version requirements explicitly stated in guidance or found in repo context. Never invent version numbers.
|
|
54
|
+
- Hardware Requirements: Include ONLY specific RAM/CPU recommendations explicitly stated in guidance or found in repo context. NEVER estimate or invent hardware specifications - omit this section if not substantiated.
|
|
53
55
|
- License: one sentence referencing the license and pointing to the LICENSE file.
|
|
54
|
-
- Install (clarify dependencies): Include compatibility details
|
|
56
|
+
- Install (clarify dependencies): Include compatibility details ONLY if explicitly mentioned in guidance or found in repo context.
|
|
55
57
|
- Tutorial improvements: Add specific examples, error handling, and reproducibility notes as mentioned in guidance
|
|
56
58
|
- User guide improvements: Enhance clarity, add missing information, and improve error handling as mentioned in guidance
|
|
57
|
-
- Conservative injection: For tutorial files
|
|
58
|
-
-
|
|
59
|
-
-
|
|
60
|
-
*
|
|
59
|
+
- Conservative injection: For tutorial files, make minimal, targeted additions that preserve the original structure and flow. Add brief notes, small subsections, or contextual comments that enhance existing content without disrupting the tutorial's narrative.
|
|
60
|
+
- Natural integration: When inserting content into existing tutorials or guides, integrate naturally into the flow rather than creating standalone sections. Add brief explanatory text, code comments, or small subsections that enhance the existing content.
|
|
61
|
+
- Format compliance: Preserve the existing file format conventions (e.g., YAML frontmatter, code blocks, headers):
|
|
62
|
+
* For code examples, use the appropriate code fence syntax for the language (e.g., ```r, ```python, ```bash)
|
|
61
63
|
* Maintain the tutorial's existing tone and context - content should feel like a natural continuation
|
|
62
64
|
* Avoid creating new major sections unless absolutely necessary
|
|
63
|
-
* Use inline R code with `{{r code_here}}` when appropriate
|
|
64
65
|
* Keep explanations concise and contextual to the tutorial's purpose
|
|
65
|
-
- Context awareness: Content should feel like a natural part of the existing
|
|
66
|
+
- Context awareness: Content should feel like a natural part of the existing document, not a standalone addition. Reference the document's specific context, datasets, and examples when available.
|
|
67
|
+
- Biological accuracy: For biomedical/bioinformatics content, ensure technical accuracy. If unsure about biological or computational details, keep descriptions general rather than inventing specifics.
|
|
66
68
|
- If the section does not fit the above, produce content that directly addresses the guidance provided.
|
|
67
69
|
|
|
68
70
|
OUTPUT FORMAT
|
|
@@ -74,40 +76,72 @@ OUTPUT FORMAT
|
|
|
74
76
|
"""
|
|
75
77
|
|
|
76
78
|
LLM_FULLDOC_PROMPT = """
|
|
77
|
-
You are "BioGuider," a documentation rewriter.
|
|
79
|
+
You are "BioGuider," a documentation rewriter with enhanced capabilities for complex documents.
|
|
78
80
|
|
|
79
81
|
GOAL
|
|
80
|
-
Rewrite a complete target document using only the provided evaluation report signals and the repository context excerpts. Output a full, ready-to-publish markdown file that is more complete and directly usable.
|
|
82
|
+
Rewrite a complete target document using only the provided evaluation report signals and the repository context excerpts. Output a full, ready-to-publish markdown file that is more complete and directly usable. You now have increased token capacity to handle complex documents comprehensively.
|
|
81
83
|
|
|
82
84
|
INPUTS (authoritative)
|
|
83
85
|
- evaluation_report (structured JSON excerpts): <<{evaluation_report}>>
|
|
84
86
|
- target_file: {target_file}
|
|
85
87
|
- repo_context_excerpt (do not copy blindly; use only to keep style/tone): <<{context}>>
|
|
86
88
|
|
|
89
|
+
CRITICAL: SINGLE DOCUMENT WITH MULTIPLE IMPROVEMENTS
|
|
90
|
+
This file requires improvements from {total_suggestions} separate evaluation suggestions. You must:
|
|
91
|
+
1. **Read ALL {total_suggestions} suggestions** in the evaluation_report before writing
|
|
92
|
+
2. **Integrate ALL suggestions into ONE cohesive document** - do NOT create {total_suggestions} separate versions
|
|
93
|
+
3. **Weave improvements together naturally** - related suggestions should enhance the same sections
|
|
94
|
+
4. **Write the document ONCE** with all improvements incorporated throughout
|
|
95
|
+
|
|
96
|
+
INTEGRATION STRATEGY
|
|
97
|
+
- Identify which suggestions target similar topics (e.g., setup, reproducibility, performance)
|
|
98
|
+
- Group related improvements and apply them to the same document sections
|
|
99
|
+
- For tutorial files: Enhance existing sections with all relevant suggestions, don't create duplicate sections
|
|
100
|
+
- For documentation files: Merge suggestions into existing structure, avoid redundant sections
|
|
101
|
+
- Result: ONE enhanced document that addresses all {total_suggestions} suggestions simultaneously
|
|
102
|
+
|
|
103
|
+
CAPACITY AND SCOPE
|
|
104
|
+
- You have enhanced token capacity to handle complex documents comprehensively
|
|
105
|
+
- Tutorial documents: Enhanced capacity for step-by-step content, code examples, and comprehensive explanations
|
|
106
|
+
- Complex documents: Increased capacity for multiple sections, detailed explanations, and extensive content
|
|
107
|
+
- Comprehensive documents: Full capacity for complete documentation with all necessary sections
|
|
108
|
+
|
|
87
109
|
STRICT CONSTRAINTS
|
|
88
|
-
- Base the content solely on the evaluation report. Do not invent features, data, or claims not supported by
|
|
110
|
+
- Base the content solely on the evaluation report and repo context. Do not invent features, data, or claims not supported by these sources.
|
|
111
|
+
- CRITICAL: NEVER invent technical specifications including:
|
|
112
|
+
* Hardware requirements (RAM, CPU, disk space) unless explicitly stated in guidance/context
|
|
113
|
+
* Version numbers for dependencies unless explicitly stated in guidance/context
|
|
114
|
+
* Performance metrics, benchmarks, or timing estimates
|
|
115
|
+
* Biological/computational parameters or thresholds without evidence
|
|
116
|
+
* Installation commands or package names not found in the repo context
|
|
89
117
|
- Prefer completeness and usability: produce the full file content, not just minimal "added" snippets.
|
|
90
118
|
- Preserve top-of-file badges/logos if they exist in the original; keep title and header area intact unless the report requires changes.
|
|
91
119
|
- CRITICAL: Preserve the original document structure, sections, and flow. Only enhance existing content and add missing information.
|
|
92
|
-
- For tutorial files
|
|
120
|
+
- For tutorial files, maintain all original sections while improving clarity and adding missing details based on evaluation suggestions.
|
|
93
121
|
- Fix obvious errors; improve structure and readability per report suggestions.
|
|
94
122
|
- Include ONLY sections specifically requested by the evaluation report - do not add unnecessary sections.
|
|
95
123
|
- Avoid redundancy: do not duplicate information across multiple sections.
|
|
96
124
|
- ABSOLUTELY FORBIDDEN: Do NOT add summary sections, notes, conclusions, or any text at the end of documents
|
|
97
|
-
- ABSOLUTELY FORBIDDEN: Do NOT wrap the entire document inside markdown code fences (```markdown). Do NOT start with ```markdown or end with ```. Return pure
|
|
125
|
+
- ABSOLUTELY FORBIDDEN: Do NOT wrap the entire document inside markdown code fences (```markdown). Do NOT start with ```markdown or end with ```. Return pure content suitable for copy/paste.
|
|
98
126
|
- ABSOLUTELY FORBIDDEN: Do NOT add phrases like "Happy analyzing!" or any concluding statements
|
|
99
127
|
- Keep links well-formed; keep neutral, professional tone; concise, skimmable formatting.
|
|
100
|
-
-
|
|
128
|
+
- Preserve file-specific formatting (e.g., YAML frontmatter, code fence syntax) and do not wrap content in extra code fences.
|
|
129
|
+
|
|
130
|
+
COMPLETENESS REQUIREMENTS
|
|
131
|
+
- Generate complete, comprehensive content that addresses all evaluation suggestions
|
|
132
|
+
- For complex documents, ensure all sections are fully developed and detailed
|
|
133
|
+
- For tutorial documents, include complete step-by-step instructions with examples
|
|
134
|
+
- Use the increased token capacity to provide thorough, useful documentation
|
|
101
135
|
|
|
102
136
|
OUTPUT
|
|
103
137
|
- Return only the full markdown content for {target_file}. No commentary, no fences.
|
|
104
138
|
"""
|
|
105
139
|
|
|
106
140
|
LLM_README_COMPREHENSIVE_PROMPT = """
|
|
107
|
-
You are "BioGuider," a comprehensive documentation rewriter specializing in README files.
|
|
141
|
+
You are "BioGuider," a comprehensive documentation rewriter specializing in README files with enhanced capacity for complex documentation.
|
|
108
142
|
|
|
109
143
|
GOAL
|
|
110
|
-
Create a complete, professional README.md that addresses all evaluation suggestions comprehensively. This is the main project documentation that users will see first.
|
|
144
|
+
Create a complete, professional README.md that addresses all evaluation suggestions comprehensively. This is the main project documentation that users will see first. You now have increased token capacity to create thorough, comprehensive documentation.
|
|
111
145
|
|
|
112
146
|
INPUTS (authoritative)
|
|
113
147
|
- evaluation_report (structured JSON excerpts): <<{evaluation_report}>>
|
|
@@ -124,12 +158,26 @@ COMPREHENSIVE README REQUIREMENTS
|
|
|
124
158
|
- Make it copy-paste ready for users
|
|
125
159
|
- Use professional, clear language suitable for biomedical researchers
|
|
126
160
|
|
|
161
|
+
ENHANCED CAPACITY FEATURES
|
|
162
|
+
- You have increased token capacity to create comprehensive documentation
|
|
163
|
+
- Include detailed explanations, multiple examples, and thorough coverage
|
|
164
|
+
- Provide extensive installation instructions with platform-specific details
|
|
165
|
+
- Add comprehensive usage examples with different scenarios
|
|
166
|
+
- Include detailed API documentation if applicable
|
|
167
|
+
- Provide troubleshooting guides with common issues and solutions
|
|
168
|
+
|
|
127
169
|
STRICT CONSTRAINTS
|
|
128
170
|
- Base the content solely on the evaluation report. Do not invent features, data, or claims not supported by it.
|
|
129
171
|
- ABSOLUTELY FORBIDDEN: Do NOT wrap the entire document inside markdown code fences (```markdown). Return pure markdown content.
|
|
130
172
|
- ABSOLUTELY FORBIDDEN: Do NOT add summary sections, notes, conclusions, or any text at the end of documents
|
|
131
173
|
- Keep links well-formed; use neutral, professional tone; concise, skimmable formatting.
|
|
132
174
|
|
|
175
|
+
COMPLETENESS REQUIREMENTS
|
|
176
|
+
- Generate complete, comprehensive content that addresses all evaluation suggestions
|
|
177
|
+
- Ensure all sections are fully developed and detailed
|
|
178
|
+
- Use the increased token capacity to provide thorough, useful documentation
|
|
179
|
+
- Include all necessary information for users to successfully use the software
|
|
180
|
+
|
|
133
181
|
OUTPUT
|
|
134
182
|
- Return only the full README.md content. No commentary, no fences.
|
|
135
183
|
"""
|
|
@@ -139,6 +187,322 @@ class LLMContentGenerator:
|
|
|
139
187
|
def __init__(self, llm: BaseChatOpenAI):
|
|
140
188
|
self.llm = llm
|
|
141
189
|
|
|
190
|
+
def _detect_truncation(self, content: str, target_file: str) -> bool:
|
|
191
|
+
"""
|
|
192
|
+
Detect if content appears to be truncated based on common patterns.
|
|
193
|
+
Universal detection for all file types.
|
|
194
|
+
|
|
195
|
+
Args:
|
|
196
|
+
content: Generated content to check
|
|
197
|
+
target_file: Target file path for context
|
|
198
|
+
|
|
199
|
+
Returns:
|
|
200
|
+
True if content appears truncated, False otherwise
|
|
201
|
+
"""
|
|
202
|
+
if not content or len(content.strip()) < 100:
|
|
203
|
+
return True
|
|
204
|
+
|
|
205
|
+
# 1. Check for very short content (applies to all files)
|
|
206
|
+
# Only flag as truncated if content is very short (< 1500 chars)
|
|
207
|
+
if len(content) < 1500:
|
|
208
|
+
return True
|
|
209
|
+
|
|
210
|
+
# 2. Check for incomplete code blocks (any language)
|
|
211
|
+
# Count opening and closing code fences
|
|
212
|
+
code_fence_count = content.count('```')
|
|
213
|
+
if code_fence_count > 0 and code_fence_count % 2 != 0:
|
|
214
|
+
# Unbalanced code fences suggest truncation
|
|
215
|
+
return True
|
|
216
|
+
|
|
217
|
+
# 3. Check for specific language code blocks
|
|
218
|
+
if target_file.endswith('.Rmd'):
|
|
219
|
+
# R chunks should be complete
|
|
220
|
+
r_chunks_open = re.findall(r'```\{r[^}]*\}', content)
|
|
221
|
+
if r_chunks_open and not content.rstrip().endswith('```'):
|
|
222
|
+
# Has R chunks but doesn't end with closing fence
|
|
223
|
+
return True
|
|
224
|
+
|
|
225
|
+
if target_file.endswith(('.py', '.js', '.ts', '.java', '.cpp', '.c')):
|
|
226
|
+
# Check for incomplete class/function definitions
|
|
227
|
+
lines = content.split('\n')
|
|
228
|
+
last_lines = [line.strip() for line in lines[-5:] if line.strip()]
|
|
229
|
+
if last_lines:
|
|
230
|
+
last_line = last_lines[-1]
|
|
231
|
+
if (last_line.endswith(':') or
|
|
232
|
+
last_line.endswith('{') or
|
|
233
|
+
last_line.endswith('(') or
|
|
234
|
+
'def ' in last_line or
|
|
235
|
+
'class ' in last_line or
|
|
236
|
+
'function ' in last_line):
|
|
237
|
+
return True
|
|
238
|
+
|
|
239
|
+
# 4. Check for incomplete markdown sections (applies to all markdown-like files)
|
|
240
|
+
if any(target_file.endswith(ext) for ext in ['.md', '.Rmd', '.rst', '.txt']):
|
|
241
|
+
lines = content.split('\n')
|
|
242
|
+
last_non_empty_line = None
|
|
243
|
+
for line in reversed(lines):
|
|
244
|
+
if line.strip():
|
|
245
|
+
last_non_empty_line = line.strip()
|
|
246
|
+
break
|
|
247
|
+
|
|
248
|
+
if last_non_empty_line:
|
|
249
|
+
# Check if last line looks incomplete
|
|
250
|
+
incomplete_endings = [
|
|
251
|
+
'##', # Header without content
|
|
252
|
+
'###', # Header without content
|
|
253
|
+
'####', # Header without content
|
|
254
|
+
'-', # List item
|
|
255
|
+
'*', # List item or emphasis
|
|
256
|
+
':', # Definition or label
|
|
257
|
+
'|', # Table row
|
|
258
|
+
]
|
|
259
|
+
|
|
260
|
+
for ending in incomplete_endings:
|
|
261
|
+
if last_non_empty_line.endswith(ending):
|
|
262
|
+
return True
|
|
263
|
+
|
|
264
|
+
# Check if ends with incomplete patterns
|
|
265
|
+
content_end = content[-300:].strip().lower()
|
|
266
|
+
incomplete_patterns = [
|
|
267
|
+
'## ', # Section header without content
|
|
268
|
+
'### ', # Subsection without content
|
|
269
|
+
'#### ', # Sub-subsection without content
|
|
270
|
+
'```{', # Incomplete code chunk
|
|
271
|
+
'```r', # Incomplete R chunk
|
|
272
|
+
'```python',# Incomplete Python chunk
|
|
273
|
+
]
|
|
274
|
+
|
|
275
|
+
for pattern in incomplete_patterns:
|
|
276
|
+
if content_end.endswith(pattern.lower()):
|
|
277
|
+
return True
|
|
278
|
+
|
|
279
|
+
return False
|
|
280
|
+
|
|
281
|
+
def _appears_complete(self, content: str, target_file: str) -> bool:
|
|
282
|
+
"""
|
|
283
|
+
Check if content appears to be complete based on structure and patterns.
|
|
284
|
+
Universal completion check for all file types.
|
|
285
|
+
|
|
286
|
+
Args:
|
|
287
|
+
content: Generated content to check
|
|
288
|
+
target_file: Target file path for context
|
|
289
|
+
|
|
290
|
+
Returns:
|
|
291
|
+
True if content appears complete, False if it needs continuation
|
|
292
|
+
"""
|
|
293
|
+
if not content or len(content.strip()) < 100:
|
|
294
|
+
return False
|
|
295
|
+
|
|
296
|
+
# 1. Check for balanced code blocks (applies to all files)
|
|
297
|
+
code_block_count = content.count('```')
|
|
298
|
+
if code_block_count > 0 and code_block_count % 2 != 0:
|
|
299
|
+
# Unbalanced code blocks suggest incomplete
|
|
300
|
+
return False
|
|
301
|
+
|
|
302
|
+
# 2. File type specific checks
|
|
303
|
+
|
|
304
|
+
# RMarkdown files
|
|
305
|
+
if target_file.endswith('.Rmd'):
|
|
306
|
+
# Check for proper YAML frontmatter
|
|
307
|
+
if not content.startswith('---'):
|
|
308
|
+
return False
|
|
309
|
+
|
|
310
|
+
# Check for conclusion patterns
|
|
311
|
+
conclusion_patterns = [
|
|
312
|
+
'sessionInfo()',
|
|
313
|
+
'session.info()',
|
|
314
|
+
'## Conclusion',
|
|
315
|
+
'## Summary',
|
|
316
|
+
'## Session Info',
|
|
317
|
+
'</details>',
|
|
318
|
+
'knitr::knit(',
|
|
319
|
+
]
|
|
320
|
+
|
|
321
|
+
content_lower = content.lower()
|
|
322
|
+
has_conclusion = any(pattern.lower() in content_lower for pattern in conclusion_patterns)
|
|
323
|
+
|
|
324
|
+
# If we have a conclusion and balanced code blocks, likely complete
|
|
325
|
+
if has_conclusion and code_block_count > 0:
|
|
326
|
+
return True
|
|
327
|
+
|
|
328
|
+
# Markdown files
|
|
329
|
+
if target_file.endswith('.md'):
|
|
330
|
+
# Check for conclusion sections
|
|
331
|
+
conclusion_patterns = [
|
|
332
|
+
'## Conclusion',
|
|
333
|
+
'## Summary',
|
|
334
|
+
'## Next Steps',
|
|
335
|
+
'## Further Reading',
|
|
336
|
+
'## References',
|
|
337
|
+
'## License',
|
|
338
|
+
]
|
|
339
|
+
|
|
340
|
+
content_lower = content.lower()
|
|
341
|
+
has_conclusion = any(pattern.lower() in content_lower for pattern in conclusion_patterns)
|
|
342
|
+
|
|
343
|
+
if has_conclusion and len(content) > 2000:
|
|
344
|
+
return True
|
|
345
|
+
|
|
346
|
+
# Python files
|
|
347
|
+
if target_file.endswith('.py'):
|
|
348
|
+
# Check for balanced brackets/parentheses
|
|
349
|
+
if content.count('(') != content.count(')'):
|
|
350
|
+
return False
|
|
351
|
+
if content.count('[') != content.count(']'):
|
|
352
|
+
return False
|
|
353
|
+
if content.count('{') != content.count('}'):
|
|
354
|
+
return False
|
|
355
|
+
|
|
356
|
+
# Check for complete structure (reasonable length + proper ending)
|
|
357
|
+
lines = [line for line in content.split('\n') if line.strip()]
|
|
358
|
+
if len(lines) > 20: # Has reasonable content
|
|
359
|
+
last_line = lines[-1].strip()
|
|
360
|
+
# Should not end with incomplete statements
|
|
361
|
+
if not (last_line.endswith(':') or
|
|
362
|
+
last_line.endswith('\\') or
|
|
363
|
+
last_line.endswith(',')):
|
|
364
|
+
return True
|
|
365
|
+
|
|
366
|
+
# JavaScript/TypeScript files
|
|
367
|
+
if target_file.endswith(('.js', '.ts', '.jsx', '.tsx')):
|
|
368
|
+
# Check for balanced brackets
|
|
369
|
+
if content.count('{') != content.count('}'):
|
|
370
|
+
return False
|
|
371
|
+
if content.count('(') != content.count(')'):
|
|
372
|
+
return False
|
|
373
|
+
|
|
374
|
+
lines = [line for line in content.split('\n') if line.strip()]
|
|
375
|
+
if len(lines) > 20:
|
|
376
|
+
last_line = lines[-1].strip()
|
|
377
|
+
# Complete if ends with proper syntax
|
|
378
|
+
if (last_line.endswith('}') or
|
|
379
|
+
last_line.endswith(';') or
|
|
380
|
+
last_line.endswith('*/') or
|
|
381
|
+
last_line.startswith('//')):
|
|
382
|
+
return True
|
|
383
|
+
|
|
384
|
+
# 3. Generic checks for all file types
|
|
385
|
+
if len(content) > 3000: # Reasonable length
|
|
386
|
+
# Check if it ends with complete sentences/sections
|
|
387
|
+
lines = content.split('\n')
|
|
388
|
+
last_lines = [line.strip() for line in lines[-10:] if line.strip()]
|
|
389
|
+
|
|
390
|
+
if last_lines:
|
|
391
|
+
last_line = last_lines[-1]
|
|
392
|
+
# Complete if ends with proper punctuation or closing tags
|
|
393
|
+
complete_endings = [
|
|
394
|
+
'.', # Sentence
|
|
395
|
+
'```', # Code block
|
|
396
|
+
'---', # Section divider
|
|
397
|
+
'</details>', # HTML details
|
|
398
|
+
'}', # Closing brace
|
|
399
|
+
';', # Statement end
|
|
400
|
+
'*/', # Comment end
|
|
401
|
+
]
|
|
402
|
+
|
|
403
|
+
if any(last_line.endswith(ending) for ending in complete_endings):
|
|
404
|
+
return True
|
|
405
|
+
|
|
406
|
+
return False
|
|
407
|
+
|
|
408
|
+
def _generate_continuation(self, target_file: str, evaluation_report: dict,
|
|
409
|
+
context: str, existing_content: str) -> tuple[str, dict]:
|
|
410
|
+
"""
|
|
411
|
+
Generate continuation content from where previous generation left off.
|
|
412
|
+
|
|
413
|
+
Args:
|
|
414
|
+
target_file: Target file path
|
|
415
|
+
evaluation_report: Evaluation report data
|
|
416
|
+
context: Repository context
|
|
417
|
+
existing_content: Previously generated content
|
|
418
|
+
|
|
419
|
+
Returns:
|
|
420
|
+
Tuple of (continuation_content, token_usage)
|
|
421
|
+
"""
|
|
422
|
+
# Create LLM for continuation (uses 16k tokens by default)
|
|
423
|
+
from bioguider.agents.agent_utils import get_llm
|
|
424
|
+
import os
|
|
425
|
+
|
|
426
|
+
llm = get_llm(
|
|
427
|
+
api_key=os.environ.get("OPENAI_API_KEY"),
|
|
428
|
+
model_name=os.environ.get("OPENAI_MODEL", "gpt-4o"),
|
|
429
|
+
azure_endpoint=os.environ.get("AZURE_OPENAI_ENDPOINT"),
|
|
430
|
+
api_version=os.environ.get("OPENAI_API_VERSION"),
|
|
431
|
+
azure_deployment=os.environ.get("OPENAI_DEPLOYMENT_NAME"),
|
|
432
|
+
)
|
|
433
|
+
|
|
434
|
+
conv = CommonConversation(llm)
|
|
435
|
+
|
|
436
|
+
# Calculate total suggestions for the prompt
|
|
437
|
+
total_suggestions = 1
|
|
438
|
+
if isinstance(evaluation_report, dict):
|
|
439
|
+
if "total_suggestions" in evaluation_report:
|
|
440
|
+
total_suggestions = evaluation_report["total_suggestions"]
|
|
441
|
+
elif "suggestions" in evaluation_report and isinstance(evaluation_report["suggestions"], list):
|
|
442
|
+
total_suggestions = len(evaluation_report["suggestions"])
|
|
443
|
+
|
|
444
|
+
continuation_prompt = f"""
|
|
445
|
+
You are "BioGuider," continuing a documentation generation task with enhanced capacity for complex documents.
|
|
446
|
+
|
|
447
|
+
GOAL
|
|
448
|
+
Continue generating the document "{target_file}" from where the previous generation left off.
|
|
449
|
+
The previous content was truncated and needs to be completed. You now have increased token
|
|
450
|
+
capacity to handle complex documents comprehensively.
|
|
451
|
+
|
|
452
|
+
PREVIOUS CONTENT (do not repeat this):
|
|
453
|
+
```
|
|
454
|
+
{existing_content[-1000:]} # Last 1000 chars for context
|
|
455
|
+
```
|
|
456
|
+
|
|
457
|
+
TASK
|
|
458
|
+
Continue the document naturally from the last complete section. Maintain the same style,
|
|
459
|
+
structure, and flow as the previous content. Complete all remaining sections that should
|
|
460
|
+
be in this document.
|
|
461
|
+
|
|
462
|
+
CAPACITY AND SCOPE
|
|
463
|
+
- You have enhanced token capacity to handle complex documents comprehensively
|
|
464
|
+
- Tutorial documents: Enhanced capacity for step-by-step content, code examples, and comprehensive explanations
|
|
465
|
+
- Complex documents: Increased capacity for multiple sections, detailed explanations, and extensive content
|
|
466
|
+
- Comprehensive documents: Full capacity for complete documentation with all necessary sections
|
|
467
|
+
|
|
468
|
+
INPUTS
|
|
469
|
+
- evaluation_report (contains {total_suggestions} suggestions to integrate): {json.dumps(evaluation_report)[:4000]}
|
|
470
|
+
- context: {context[:2000]}
|
|
471
|
+
|
|
472
|
+
REMINDER: SINGLE DOCUMENT APPROACH
|
|
473
|
+
- The evaluation report contains {total_suggestions} SEPARATE suggestions
|
|
474
|
+
- These should be integrated into ONE cohesive continuation
|
|
475
|
+
- Do NOT create {total_suggestions} separate sections for each suggestion
|
|
476
|
+
- Group related suggestions (e.g., setup, reproducibility, performance) and integrate them naturally
|
|
477
|
+
|
|
478
|
+
REQUIREMENTS
|
|
479
|
+
- Continue seamlessly from the previous content
|
|
480
|
+
- Maintain the same tone and style
|
|
481
|
+
- Complete all sections that should be in this document
|
|
482
|
+
- Preserve file-specific formatting (e.g., YAML frontmatter, code block syntax appropriate to the language)
|
|
483
|
+
- Do not repeat content already generated
|
|
484
|
+
- Return only the continuation content, not the full document
|
|
485
|
+
- Use the increased token capacity to provide thorough, complete content
|
|
486
|
+
- NEVER invent technical specifications (hardware, versions, performance) unless explicitly in evaluation report or context
|
|
487
|
+
- ABSOLUTELY FORBIDDEN: Do NOT wrap content in markdown code fences (```markdown). Return pure content only.
|
|
488
|
+
- ABSOLUTELY FORBIDDEN: Do NOT add summary sections, notes, conclusions, or any text at the end of documents
|
|
489
|
+
|
|
490
|
+
COMPLETENESS REQUIREMENTS
|
|
491
|
+
- Generate complete, comprehensive content that addresses all remaining evaluation suggestions
|
|
492
|
+
- For complex documents, ensure all sections are fully developed and detailed
|
|
493
|
+
- For tutorial documents, include complete step-by-step instructions with examples
|
|
494
|
+
- Use the increased token capacity to provide thorough, useful documentation
|
|
495
|
+
|
|
496
|
+
OUTPUT
|
|
497
|
+
Return only the continuation content that should be appended to the existing content.
|
|
498
|
+
"""
|
|
499
|
+
|
|
500
|
+
content, token_usage = conv.generate(
|
|
501
|
+
system_prompt=continuation_prompt,
|
|
502
|
+
instruction_prompt="Continue the document from where it left off."
|
|
503
|
+
)
|
|
504
|
+
return content.strip(), token_usage
|
|
505
|
+
|
|
142
506
|
def generate_section(self, suggestion: SuggestionItem, style: StyleProfile, context: str = "") -> tuple[str, dict]:
|
|
143
507
|
conv = CommonConversation(self.llm)
|
|
144
508
|
section_name = suggestion.anchor_hint or suggestion.category.split(".")[-1].replace("_", " ").title()
|
|
@@ -158,7 +522,43 @@ class LLMContentGenerator:
|
|
|
158
522
|
return content.strip(), token_usage
|
|
159
523
|
|
|
160
524
|
def generate_full_document(self, target_file: str, evaluation_report: dict, context: str = "") -> tuple[str, dict]:
|
|
161
|
-
|
|
525
|
+
# Create LLM (uses 16k tokens by default - enough for any document)
|
|
526
|
+
from bioguider.agents.agent_utils import get_llm
|
|
527
|
+
import os
|
|
528
|
+
import json
|
|
529
|
+
from datetime import datetime
|
|
530
|
+
|
|
531
|
+
# Get LLM with default 16k token limit
|
|
532
|
+
llm = get_llm(
|
|
533
|
+
api_key=os.environ.get("OPENAI_API_KEY"),
|
|
534
|
+
model_name=os.environ.get("OPENAI_MODEL", "gpt-4o"),
|
|
535
|
+
azure_endpoint=os.environ.get("AZURE_OPENAI_ENDPOINT"),
|
|
536
|
+
api_version=os.environ.get("OPENAI_API_VERSION"),
|
|
537
|
+
azure_deployment=os.environ.get("OPENAI_DEPLOYMENT_NAME"),
|
|
538
|
+
)
|
|
539
|
+
|
|
540
|
+
conv = CommonConversation(llm)
|
|
541
|
+
|
|
542
|
+
# Debug: Save generation settings and context
|
|
543
|
+
debug_info = {
|
|
544
|
+
"target_file": target_file,
|
|
545
|
+
"timestamp": datetime.now().isoformat(),
|
|
546
|
+
"evaluation_report": evaluation_report,
|
|
547
|
+
"context_length": len(context),
|
|
548
|
+
"llm_settings": {
|
|
549
|
+
"model_name": os.environ.get("OPENAI_MODEL", "gpt-4o"),
|
|
550
|
+
"azure_deployment": os.environ.get("OPENAI_DEPLOYMENT_NAME"),
|
|
551
|
+
"max_tokens": getattr(llm, 'max_tokens', 16384)
|
|
552
|
+
}
|
|
553
|
+
}
|
|
554
|
+
|
|
555
|
+
# Save debug info to file
|
|
556
|
+
debug_dir = "outputs/debug_generation"
|
|
557
|
+
os.makedirs(debug_dir, exist_ok=True)
|
|
558
|
+
safe_filename = target_file.replace("/", "_").replace(".", "_")
|
|
559
|
+
debug_file = os.path.join(debug_dir, f"{safe_filename}_debug.json")
|
|
560
|
+
with open(debug_file, 'w', encoding='utf-8') as f:
|
|
561
|
+
json.dump(debug_info, f, indent=2, ensure_ascii=False)
|
|
162
562
|
|
|
163
563
|
# Use comprehensive README prompt for README.md files
|
|
164
564
|
if target_file.endswith("README.md"):
|
|
@@ -168,13 +568,204 @@ class LLMContentGenerator:
|
|
|
168
568
|
context=context[:4000],
|
|
169
569
|
)
|
|
170
570
|
else:
|
|
571
|
+
# Calculate total suggestions for the prompt
|
|
572
|
+
total_suggestions = 1
|
|
573
|
+
if isinstance(evaluation_report, dict):
|
|
574
|
+
if "total_suggestions" in evaluation_report:
|
|
575
|
+
total_suggestions = evaluation_report["total_suggestions"]
|
|
576
|
+
elif "suggestions" in evaluation_report and isinstance(evaluation_report["suggestions"], list):
|
|
577
|
+
total_suggestions = len(evaluation_report["suggestions"])
|
|
578
|
+
|
|
171
579
|
system_prompt = LLM_FULLDOC_PROMPT.format(
|
|
172
580
|
target_file=target_file,
|
|
173
581
|
evaluation_report=json.dumps(evaluation_report)[:6000],
|
|
174
582
|
context=context[:4000],
|
|
583
|
+
total_suggestions=total_suggestions,
|
|
175
584
|
)
|
|
176
585
|
|
|
586
|
+
# Save initial prompt for debugging
|
|
587
|
+
prompt_file = os.path.join(debug_dir, f"{safe_filename}_prompt.txt")
|
|
588
|
+
with open(prompt_file, 'w', encoding='utf-8') as f:
|
|
589
|
+
f.write("=== SYSTEM PROMPT ===\n")
|
|
590
|
+
f.write(system_prompt)
|
|
591
|
+
f.write("\n\n=== INSTRUCTION PROMPT ===\n")
|
|
592
|
+
f.write("Write the full document now.")
|
|
593
|
+
f.write("\n\n=== EVALUATION REPORT ===\n")
|
|
594
|
+
f.write(json.dumps(evaluation_report, indent=2))
|
|
595
|
+
f.write("\n\n=== CONTEXT ===\n")
|
|
596
|
+
f.write(context[:2000] + "..." if len(context) > 2000 else context)
|
|
597
|
+
|
|
598
|
+
# Initial generation
|
|
177
599
|
content, token_usage = conv.generate(system_prompt=system_prompt, instruction_prompt="Write the full document now.")
|
|
178
|
-
|
|
600
|
+
content = content.strip()
|
|
601
|
+
|
|
602
|
+
# Save initial generation for debugging
|
|
603
|
+
generation_file = os.path.join(debug_dir, f"{safe_filename}_generation_0.txt")
|
|
604
|
+
with open(generation_file, 'w', encoding='utf-8') as f:
|
|
605
|
+
f.write(f"=== INITIAL GENERATION ===\n")
|
|
606
|
+
f.write(f"Tokens: {token_usage}\n")
|
|
607
|
+
f.write(f"Length: {len(content)} characters\n")
|
|
608
|
+
f.write(f"Truncation detected: {self._detect_truncation(content, target_file)}\n")
|
|
609
|
+
f.write(f"\n=== CONTENT ===\n")
|
|
610
|
+
f.write(content)
|
|
611
|
+
|
|
612
|
+
# Check for truncation and continue if needed
|
|
613
|
+
max_continuations = 3 # Limit to prevent infinite loops
|
|
614
|
+
continuation_count = 0
|
|
615
|
+
|
|
616
|
+
while (self._detect_truncation(content, target_file) and
|
|
617
|
+
continuation_count < max_continuations):
|
|
618
|
+
|
|
619
|
+
# Additional check: if content appears complete, don't continue
|
|
620
|
+
if self._appears_complete(content, target_file):
|
|
621
|
+
break
|
|
622
|
+
continuation_count += 1
|
|
623
|
+
|
|
624
|
+
# Save continuation prompt for debugging
|
|
625
|
+
continuation_prompt_file = os.path.join(debug_dir, f"{safe_filename}_continuation_{continuation_count}_prompt.txt")
|
|
626
|
+
continuation_prompt = f"""
|
|
627
|
+
You are "BioGuider," continuing a documentation generation task with enhanced capacity for complex documents.
|
|
628
|
+
|
|
629
|
+
GOAL
|
|
630
|
+
Continue generating the document "{target_file}" from where the previous generation left off.
|
|
631
|
+
The previous content was truncated and needs to be completed. You now have increased token
|
|
632
|
+
capacity to handle complex documents comprehensively.
|
|
633
|
+
|
|
634
|
+
PREVIOUS CONTENT (do not repeat this):
|
|
635
|
+
```
|
|
636
|
+
{content[-1000:]} # Last 1000 chars for context
|
|
637
|
+
```
|
|
638
|
+
|
|
639
|
+
TASK
|
|
640
|
+
Continue the document naturally from the last complete section. Maintain the same style,
|
|
641
|
+
structure, and flow as the previous content. Complete all remaining sections that should
|
|
642
|
+
be in this document.
|
|
643
|
+
|
|
644
|
+
CRITICAL REQUIREMENTS:
|
|
645
|
+
- Do NOT repeat any content already generated above
|
|
646
|
+
- Do NOT duplicate sections, headers, or code blocks that already exist
|
|
647
|
+
- Generate ONLY new, unique content that continues from where the previous content ended
|
|
648
|
+
- If the previous content appears complete, add complementary sections that enhance the document
|
|
649
|
+
- Focus on adding missing sections, examples, or explanations that weren't covered
|
|
650
|
+
|
|
651
|
+
CAPACITY AND SCOPE
|
|
652
|
+
- You have enhanced token capacity to handle complex documents comprehensively
|
|
653
|
+
- Tutorial documents: Enhanced capacity for step-by-step content, code examples, and comprehensive explanations
|
|
654
|
+
- Complex documents: Increased capacity for multiple sections, detailed explanations, and extensive content
|
|
655
|
+
- Comprehensive documents: Full capacity for complete documentation with all necessary sections
|
|
656
|
+
|
|
657
|
+
INPUTS
|
|
658
|
+
- evaluation_report (contains {total_suggestions} suggestions to integrate): {json.dumps(evaluation_report)[:4000]}
|
|
659
|
+
- context: {context[:2000]}
|
|
660
|
+
|
|
661
|
+
REMINDER: SINGLE DOCUMENT APPROACH
|
|
662
|
+
- The evaluation report contains {total_suggestions} SEPARATE suggestions
|
|
663
|
+
- These should be integrated into ONE cohesive continuation
|
|
664
|
+
- Do NOT create {total_suggestions} separate sections for each suggestion
|
|
665
|
+
- Group related suggestions (e.g., setup, reproducibility, performance) and integrate them naturally
|
|
666
|
+
|
|
667
|
+
REQUIREMENTS
|
|
668
|
+
- Continue seamlessly from the previous content
|
|
669
|
+
- Maintain the same tone and style
|
|
670
|
+
- Complete all sections that should be in this document
|
|
671
|
+
- Preserve file-specific formatting (e.g., YAML frontmatter, code block syntax appropriate to the language)
|
|
672
|
+
- Do not repeat content already generated
|
|
673
|
+
- Return only the continuation content, not the full document
|
|
674
|
+
- Use the increased token capacity to provide thorough, complete content
|
|
675
|
+
- NEVER invent technical specifications (hardware, versions, performance) unless explicitly in evaluation report or context
|
|
676
|
+
- ABSOLUTELY FORBIDDEN: Do NOT wrap content in markdown code fences (```markdown). Return pure content only.
|
|
677
|
+
- ABSOLUTELY FORBIDDEN: Do NOT add summary sections, notes, conclusions, or any text at the end of documents
|
|
678
|
+
|
|
679
|
+
COMPLETENESS REQUIREMENTS
|
|
680
|
+
- Generate complete, comprehensive content that addresses all remaining evaluation suggestions
|
|
681
|
+
- For complex documents, ensure all sections are fully developed and detailed
|
|
682
|
+
- For tutorial documents, include complete step-by-step instructions with examples
|
|
683
|
+
- Use the increased token capacity to provide thorough, useful documentation
|
|
684
|
+
|
|
685
|
+
OUTPUT
|
|
686
|
+
- Return only the continuation content. No commentary, no fences.
|
|
687
|
+
"""
|
|
688
|
+
|
|
689
|
+
with open(continuation_prompt_file, 'w', encoding='utf-8') as f:
|
|
690
|
+
f.write(continuation_prompt)
|
|
691
|
+
|
|
692
|
+
# Generate continuation
|
|
693
|
+
continuation_content, continuation_usage = self._generate_continuation(
|
|
694
|
+
target_file=target_file,
|
|
695
|
+
evaluation_report=evaluation_report,
|
|
696
|
+
context=context,
|
|
697
|
+
existing_content=content
|
|
698
|
+
)
|
|
699
|
+
|
|
700
|
+
# Save continuation generation for debugging
|
|
701
|
+
continuation_file = os.path.join(debug_dir, f"{safe_filename}_continuation_{continuation_count}.txt")
|
|
702
|
+
with open(continuation_file, 'w', encoding='utf-8') as f:
|
|
703
|
+
f.write(f"=== CONTINUATION {continuation_count} ===\n")
|
|
704
|
+
f.write(f"Tokens: {continuation_usage}\n")
|
|
705
|
+
f.write(f"Length: {len(continuation_content)} characters\n")
|
|
706
|
+
f.write(f"Truncation detected: {self._detect_truncation(continuation_content, target_file)}\n")
|
|
707
|
+
f.write(f"\n=== CONTENT ===\n")
|
|
708
|
+
f.write(continuation_content)
|
|
709
|
+
|
|
710
|
+
# Merge continuation with existing content
|
|
711
|
+
if continuation_content:
|
|
712
|
+
content += "\n\n" + continuation_content
|
|
713
|
+
# Update token usage
|
|
714
|
+
token_usage = {
|
|
715
|
+
"total_tokens": token_usage.get("total_tokens", 0) + continuation_usage.get("total_tokens", 0),
|
|
716
|
+
"prompt_tokens": token_usage.get("prompt_tokens", 0) + continuation_usage.get("prompt_tokens", 0),
|
|
717
|
+
"completion_tokens": token_usage.get("completion_tokens", 0) + continuation_usage.get("completion_tokens", 0),
|
|
718
|
+
}
|
|
719
|
+
|
|
720
|
+
# Save merged content for debugging
|
|
721
|
+
merged_file = os.path.join(debug_dir, f"{safe_filename}_merged_{continuation_count}.txt")
|
|
722
|
+
with open(merged_file, 'w', encoding='utf-8') as f:
|
|
723
|
+
f.write(f"=== MERGED CONTENT AFTER CONTINUATION {continuation_count} ===\n")
|
|
724
|
+
f.write(f"Total length: {len(content)} characters\n")
|
|
725
|
+
f.write(f"Truncation detected: {self._detect_truncation(content, target_file)}\n")
|
|
726
|
+
f.write(f"\n=== CONTENT ===\n")
|
|
727
|
+
f.write(content)
|
|
728
|
+
else:
|
|
729
|
+
# If continuation is empty, break to avoid infinite loop
|
|
730
|
+
break
|
|
731
|
+
|
|
732
|
+
# Clean up any markdown code fences that might have been added
|
|
733
|
+
content = self._clean_markdown_fences(content)
|
|
734
|
+
|
|
735
|
+
# Save final cleaned content for debugging
|
|
736
|
+
final_file = os.path.join(debug_dir, f"{safe_filename}_final.txt")
|
|
737
|
+
with open(final_file, 'w', encoding='utf-8') as f:
|
|
738
|
+
f.write(f"=== FINAL CLEANED CONTENT ===\n")
|
|
739
|
+
f.write(f"Total tokens: {token_usage}\n")
|
|
740
|
+
f.write(f"Final length: {len(content)} characters\n")
|
|
741
|
+
f.write(f"Continuations used: {continuation_count}\n")
|
|
742
|
+
f.write(f"\n=== CONTENT ===\n")
|
|
743
|
+
f.write(content)
|
|
744
|
+
|
|
745
|
+
return content, token_usage
|
|
746
|
+
|
|
747
|
+
def _clean_markdown_fences(self, content: str) -> str:
|
|
748
|
+
"""
|
|
749
|
+
Remove markdown code fences that shouldn't be in the final content.
|
|
750
|
+
"""
|
|
751
|
+
# Remove ```markdown at the beginning
|
|
752
|
+
if content.startswith('```markdown\n'):
|
|
753
|
+
content = content[12:] # Remove ```markdown\n
|
|
754
|
+
|
|
755
|
+
# Remove ``` at the end
|
|
756
|
+
if content.endswith('\n```'):
|
|
757
|
+
content = content[:-4] # Remove \n```
|
|
758
|
+
elif content.endswith('```'):
|
|
759
|
+
content = content[:-3] # Remove ```
|
|
760
|
+
|
|
761
|
+
# Remove any standalone ```markdown lines
|
|
762
|
+
lines = content.split('\n')
|
|
763
|
+
cleaned_lines = []
|
|
764
|
+
for line in lines:
|
|
765
|
+
if line.strip() == '```markdown':
|
|
766
|
+
continue
|
|
767
|
+
cleaned_lines.append(line)
|
|
768
|
+
|
|
769
|
+
return '\n'.join(cleaned_lines)
|
|
179
770
|
|
|
180
771
|
|
|
@@ -3,10 +3,8 @@ from pathlib import Path
|
|
|
3
3
|
|
|
4
4
|
from bioguider.agents.evaluation_tutorial_task import EvaluationTutorialTask
|
|
5
5
|
from bioguider.agents.evaluation_userguide_task import EvaluationUserGuideTask
|
|
6
|
-
from bioguider.agents.prompt_utils import CollectionGoalItemEnum
|
|
7
6
|
from bioguider.database.code_structure_db import CodeStructureDb
|
|
8
7
|
from bioguider.utils.constants import ProjectMetadata
|
|
9
|
-
from bioguider.utils.gitignore_checker import GitignoreChecker
|
|
10
8
|
|
|
11
9
|
from ..agents.identification_task import IdentificationTask
|
|
12
10
|
from ..rag.rag import RAG
|
|
@@ -34,6 +34,7 @@ class DocumentationGenerationManager:
|
|
|
34
34
|
self.output = OutputManager(base_outputs_dir=output_dir)
|
|
35
35
|
self.llm_gen = LLMContentGenerator(llm)
|
|
36
36
|
self.llm_cleaner = LLMCleaner(llm)
|
|
37
|
+
|
|
37
38
|
|
|
38
39
|
def print_step(self, step_name: str | None = None, step_output: str | None = None):
|
|
39
40
|
if self.step_callback is None:
|
|
@@ -143,54 +144,175 @@ class DocumentationGenerationManager:
|
|
|
143
144
|
self.print_step(step_name="ProcessingFile", step_output=f"Processing {fpath} ({processed_files}/{total_files}) - {len(edits)} edits")
|
|
144
145
|
|
|
145
146
|
original_content = files.get(fpath, "")
|
|
147
|
+
|
|
148
|
+
# Group suggestions by file to avoid duplicate generation
|
|
149
|
+
file_suggestions = []
|
|
150
|
+
full_replace_edits = []
|
|
151
|
+
section_edits = []
|
|
152
|
+
|
|
153
|
+
for e in edits:
|
|
154
|
+
suggestion = next((s for s in suggestions if s.id == e.suggestion_id), None) if e.suggestion_id else None
|
|
155
|
+
if suggestion:
|
|
156
|
+
file_suggestions.append(suggestion)
|
|
157
|
+
if e.edit_type == "full_replace":
|
|
158
|
+
full_replace_edits.append(e)
|
|
159
|
+
else:
|
|
160
|
+
section_edits.append(e)
|
|
161
|
+
|
|
162
|
+
# Debug: Save suggestion grouping info
|
|
163
|
+
import json
|
|
164
|
+
import os
|
|
165
|
+
from datetime import datetime
|
|
166
|
+
|
|
167
|
+
debug_dir = "outputs/debug_generation"
|
|
168
|
+
os.makedirs(debug_dir, exist_ok=True)
|
|
169
|
+
safe_filename = fpath.replace("/", "_").replace(".", "_")
|
|
170
|
+
|
|
171
|
+
grouping_info = {
|
|
172
|
+
"file_path": fpath,
|
|
173
|
+
"total_edits": len(edits),
|
|
174
|
+
"file_suggestions_count": len(file_suggestions),
|
|
175
|
+
"full_replace_edits_count": len(full_replace_edits),
|
|
176
|
+
"section_edits_count": len(section_edits),
|
|
177
|
+
"suggestions": [
|
|
178
|
+
{
|
|
179
|
+
"id": s.id,
|
|
180
|
+
"category": s.category,
|
|
181
|
+
"content_guidance": s.content_guidance[:200] + "..." if len(s.content_guidance or "") > 200 else s.content_guidance,
|
|
182
|
+
"target_files": s.target_files
|
|
183
|
+
} for s in file_suggestions
|
|
184
|
+
],
|
|
185
|
+
"timestamp": datetime.now().isoformat()
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
grouping_file = os.path.join(debug_dir, f"{safe_filename}_grouping.json")
|
|
189
|
+
with open(grouping_file, 'w', encoding='utf-8') as f:
|
|
190
|
+
json.dump(grouping_info, f, indent=2, ensure_ascii=False)
|
|
191
|
+
|
|
146
192
|
content = original_content
|
|
147
193
|
total_stats = {"added_lines": 0}
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
194
|
+
|
|
195
|
+
# CRITICAL: Generate content ONCE per file if there are full_replace edits
|
|
196
|
+
# All suggestions for this file are merged into a single evaluation report
|
|
197
|
+
# This prevents duplicate content generation
|
|
198
|
+
if full_replace_edits:
|
|
199
|
+
self.print_step(
|
|
200
|
+
step_name="GeneratingContent",
|
|
201
|
+
step_output=f"🔄 Generating full document for {fpath} with {len(file_suggestions)} suggestions using LLM (SINGLE CALL)..."
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
# Merge all suggestions for this file into a single evaluation report
|
|
205
|
+
# Format suggestions with clear numbering to help LLM understand they're separate improvements
|
|
206
|
+
suggestions_list = []
|
|
207
|
+
for idx, s in enumerate(file_suggestions, 1):
|
|
208
|
+
suggestions_list.append({
|
|
209
|
+
"suggestion_number": idx,
|
|
210
|
+
"category": s.category if hasattr(s, 'category') else "general",
|
|
211
|
+
"content_guidance": s.content_guidance,
|
|
212
|
+
"evidence": s.source.get("evidence", "") if s.source else ""
|
|
213
|
+
})
|
|
214
|
+
|
|
215
|
+
merged_evaluation_report = {
|
|
216
|
+
"total_suggestions": len(file_suggestions),
|
|
217
|
+
"integration_instruction": f"Integrate ALL {len(file_suggestions)} suggestions below into ONE cohesive document. Do NOT create {len(file_suggestions)} separate versions.",
|
|
218
|
+
"suggestions": suggestions_list
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
# Debug: Save merged evaluation report
|
|
222
|
+
merged_report_file = os.path.join(debug_dir, f"{safe_filename}_merged_report.json")
|
|
223
|
+
with open(merged_report_file, 'w', encoding='utf-8') as f:
|
|
224
|
+
json.dump(merged_evaluation_report, f, indent=2, ensure_ascii=False)
|
|
225
|
+
|
|
226
|
+
# Debug: Log that we're about to make a single generation call
|
|
227
|
+
debug_log_file = os.path.join(debug_dir, f"{safe_filename}_generation_log.txt")
|
|
228
|
+
with open(debug_log_file, 'a', encoding='utf-8') as f:
|
|
229
|
+
f.write(f"\n=== GENERATION CALL at {datetime.now().isoformat()} ===\n")
|
|
230
|
+
f.write(f"File: {fpath}\n")
|
|
231
|
+
f.write(f"Full replace edits: {len(full_replace_edits)}\n")
|
|
232
|
+
f.write(f"Total suggestions: {len(file_suggestions)}\n")
|
|
233
|
+
f.write(f"Merged into single call: YES\n")
|
|
234
|
+
f.write(f"Suggestion IDs: {[s.id for s in file_suggestions]}\n\n")
|
|
235
|
+
|
|
236
|
+
gen_content, gen_usage = self.llm_gen.generate_full_document(
|
|
237
|
+
target_file=fpath,
|
|
238
|
+
evaluation_report=merged_evaluation_report,
|
|
239
|
+
context=original_content,
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
# Debug: Log completion
|
|
243
|
+
with open(debug_log_file, 'a', encoding='utf-8') as f:
|
|
244
|
+
f.write(f"Generation completed at {datetime.now().isoformat()}\n")
|
|
245
|
+
f.write(f"Content length: {len(gen_content) if isinstance(gen_content, str) else 0} characters\n")
|
|
246
|
+
f.write(f"Tokens used: {gen_usage.get('total_tokens', 0)}\n")
|
|
247
|
+
f.write(f"SUCCESS: {isinstance(gen_content, str) and gen_content}\n\n")
|
|
248
|
+
|
|
249
|
+
if isinstance(gen_content, str) and gen_content:
|
|
250
|
+
self.print_step(step_name="LLMFullDoc", step_output=f"✓ Generated full document for {fpath} ({gen_usage.get('total_tokens', 0)} tokens)")
|
|
251
|
+
# Apply the generated content to all full_replace edits
|
|
252
|
+
for e in full_replace_edits:
|
|
253
|
+
e.content_template = gen_content
|
|
254
|
+
content = gen_content
|
|
255
|
+
else:
|
|
256
|
+
# Fallback: try individual generation but only for the first edit to avoid duplicates
|
|
257
|
+
if full_replace_edits:
|
|
258
|
+
e = full_replace_edits[0] # Only process the first edit
|
|
259
|
+
suggestion = next((s for s in suggestions if s.id == e.suggestion_id), None) if e.suggestion_id else None
|
|
260
|
+
if suggestion and (not e.content_template or e.content_template.strip() == ""):
|
|
261
|
+
self.print_step(step_name="GeneratingContent", step_output=f"Fallback: Generating full document for {e.suggestion_id} using LLM...")
|
|
156
262
|
gen_content, gen_usage = self.llm_gen.generate_full_document(
|
|
157
263
|
target_file=e.file_path,
|
|
158
264
|
evaluation_report={"suggestion": suggestion.content_guidance, "evidence": suggestion.source.get("evidence", "") if suggestion.source else ""},
|
|
159
|
-
context=
|
|
265
|
+
context=original_content,
|
|
160
266
|
)
|
|
161
267
|
if isinstance(gen_content, str) and gen_content:
|
|
162
268
|
self.print_step(step_name="LLMFullDoc", step_output=f"✓ Generated full document for {e.suggestion_id} ({gen_usage.get('total_tokens', 0)} tokens)")
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
269
|
+
# Apply the same content to all full_replace edits
|
|
270
|
+
for edit in full_replace_edits:
|
|
271
|
+
edit.content_template = gen_content
|
|
272
|
+
content = gen_content
|
|
273
|
+
else:
|
|
274
|
+
# Handle section edits individually
|
|
275
|
+
for e in section_edits:
|
|
276
|
+
suggestion = next((s for s in suggestions if s.id == e.suggestion_id), None) if e.suggestion_id else None
|
|
277
|
+
if suggestion and (not e.content_template or e.content_template.strip() == ""):
|
|
278
|
+
self.print_step(step_name="GeneratingContent", step_output=f"Generating section for {e.suggestion_id} using LLM...")
|
|
279
|
+
gen_section, gen_usage = self.llm_gen.generate_section(
|
|
280
|
+
suggestion=suggestion,
|
|
281
|
+
style=plan.style_profile,
|
|
282
|
+
context=original_content,
|
|
283
|
+
)
|
|
284
|
+
if isinstance(gen_section, str) and gen_section:
|
|
285
|
+
self.print_step(step_name="LLMSection", step_output=f"✓ Generated section for {e.suggestion_id} ({gen_usage.get('total_tokens', 0)} tokens)")
|
|
286
|
+
# Ensure header present
|
|
287
|
+
if gen_section.lstrip().startswith("#"):
|
|
288
|
+
e.content_template = gen_section
|
|
289
|
+
else:
|
|
290
|
+
title = e.anchor.get('value', '').strip() or ''
|
|
291
|
+
e.content_template = f"## {title}\n\n{gen_section}" if title else gen_section
|
|
292
|
+
|
|
293
|
+
content, stats = self.renderer.apply_edit(content, e)
|
|
294
|
+
total_stats["added_lines"] = total_stats.get("added_lines", 0) + stats.get("added_lines", 0)
|
|
295
|
+
|
|
296
|
+
# Apply remaining edits that weren't full_replace
|
|
297
|
+
for e in edits:
|
|
298
|
+
if e.edit_type != "full_replace":
|
|
299
|
+
content, stats = self.renderer.apply_edit(content, e)
|
|
300
|
+
total_stats["added_lines"] = total_stats.get("added_lines", 0) + stats.get("added_lines", 0)
|
|
301
|
+
|
|
302
|
+
# After applying full document or section changes, run a general cleaner pass for all text files
|
|
303
|
+
# to fix markdown/formatting issues without changing meaning.
|
|
304
|
+
try:
|
|
305
|
+
if fpath.endswith((".md", ".rst", ".Rmd", ".Rd")) and content:
|
|
306
|
+
self.print_step(step_name="CleaningContent", step_output=f"Cleaning formatting for {fpath}...")
|
|
307
|
+
cleaned, _usage = self.llm_cleaner.clean_readme(content)
|
|
308
|
+
if isinstance(cleaned, str) and cleaned.strip():
|
|
309
|
+
content = cleaned
|
|
310
|
+
|
|
311
|
+
# LLM cleaner now handles markdown fences and unwanted summaries
|
|
188
312
|
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
pass
|
|
193
|
-
total_stats["added_lines"] = total_stats.get("added_lines", 0) + stats.get("added_lines", 0)
|
|
313
|
+
except Exception:
|
|
314
|
+
pass
|
|
315
|
+
|
|
194
316
|
revised[fpath] = content
|
|
195
317
|
diff_stats[fpath] = total_stats
|
|
196
318
|
self.print_step(step_name="RenderedFile", step_output=f"✓ Completed {fpath} - added {total_stats['added_lines']} lines")
|
|
@@ -6,6 +6,7 @@ from bioguider.utils.r_file_handler import RFileHandler
|
|
|
6
6
|
from .gitignore_checker import GitignoreChecker
|
|
7
7
|
from .python_file_handler import PythonFileHandler
|
|
8
8
|
from ..database.code_structure_db import CodeStructureDb
|
|
9
|
+
from ..rag.config import configs
|
|
9
10
|
|
|
10
11
|
logger = logging.getLogger(__name__)
|
|
11
12
|
|
|
@@ -17,7 +18,12 @@ class CodeStructureBuilder:
|
|
|
17
18
|
code_structure_db: CodeStructureDb,
|
|
18
19
|
):
|
|
19
20
|
self.repo_path = str(repo_path)
|
|
20
|
-
self.gitignore_checker = GitignoreChecker(
|
|
21
|
+
self.gitignore_checker = GitignoreChecker(
|
|
22
|
+
directory=repo_path,
|
|
23
|
+
gitignore_path=str(gitignore_path),
|
|
24
|
+
exclude_dir_patterns=configs["file_filters"]["excluded_dirs"],
|
|
25
|
+
exclude_file_patterns=configs["file_filters"]["excluded_files"],
|
|
26
|
+
)
|
|
21
27
|
self.file_handler = PythonFileHandler(repo_path)
|
|
22
28
|
self.code_structure_db = code_structure_db
|
|
23
29
|
|
|
@@ -348,10 +348,12 @@ class RFileHandler:
|
|
|
348
348
|
s = line.lstrip()
|
|
349
349
|
if s.startswith("#'"):
|
|
350
350
|
buf.append(s[2:].lstrip())
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
351
|
+
elif s.strip() == "":
|
|
352
|
+
pass
|
|
353
|
+
else:
|
|
354
|
+
# stop at first non-roxygen line (don’t cross blank + NULL padding blocks)
|
|
355
|
+
break
|
|
356
|
+
line_idx -= 1
|
|
355
357
|
if not buf:
|
|
356
358
|
return None
|
|
357
359
|
buf.reverse()
|
|
@@ -2,7 +2,7 @@ bioguider/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
|
2
2
|
bioguider/agents/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
3
|
bioguider/agents/agent_task.py,sha256=TL0Zx8zOmiAVslmNbfMPQ38qTQ73QospY6Dwrwf8POg,2890
|
|
4
4
|
bioguider/agents/agent_tools.py,sha256=r21wHV6a-Ic2T0dk4YzA-_d7PodHPM3GzRxJqv-llSw,7286
|
|
5
|
-
bioguider/agents/agent_utils.py,sha256=
|
|
5
|
+
bioguider/agents/agent_utils.py,sha256=VL6ui13v0Fo6FGRvrak6Q04q1q7LWuKY6o0JP4fx8Os,16293
|
|
6
6
|
bioguider/agents/collection_execute_step.py,sha256=jE_oSQZI5WDaz0bJjUWoAfqWfVbGUqN--cvITSWCGiI,5614
|
|
7
7
|
bioguider/agents/collection_observe_step.py,sha256=1xOw6N3uIoyh4h4_vcULAc5x5KZ9G-zZo42AhRidyn8,5373
|
|
8
8
|
bioguider/agents/collection_plan_step.py,sha256=Nn0f8AOkEDCDtnhaqE7yCQoi7PVpsHmiUcsIqC0T0dQ,5956
|
|
@@ -23,7 +23,7 @@ bioguider/agents/dockergeneration_plan_step.py,sha256=SB8tQM9PkIKsD2o1DFD7bedcxz
|
|
|
23
23
|
bioguider/agents/dockergeneration_task.py,sha256=mYmorLKnJ-Jku3Qq_Y_kcSTsbYIo3RiVdD0puxqXY5Q,6221
|
|
24
24
|
bioguider/agents/dockergeneration_task_utils.py,sha256=v7emqrJlVW-A5ZdLmPSdiaMSKCR8uzy9UYzx_1cgzyo,9041
|
|
25
25
|
bioguider/agents/evaluation_installation_task.py,sha256=0RNH5NV7YKwn_we_d3IjnFf_ee9IPCEQ_olebq2y8Ng,12130
|
|
26
|
-
bioguider/agents/evaluation_readme_task.py,sha256=
|
|
26
|
+
bioguider/agents/evaluation_readme_task.py,sha256=GUrGZUK9Jvb82M0AooLRi7CeOpZ19t-YQfs-UxYKK2Q,30841
|
|
27
27
|
bioguider/agents/evaluation_submission_requirements_task.py,sha256=J_6C-M2AfYue2C-gWBHl7KqGrTBuFBn9zmMV5vSRk-U,7834
|
|
28
28
|
bioguider/agents/evaluation_task.py,sha256=uu0BjalctF9hQjGtT53whbeJHv2RVvs8_2woVUmOLRE,8132
|
|
29
29
|
bioguider/agents/evaluation_tutorial_task.py,sha256=cxzXf5cXdZtYd8yCpffTlDGuZFx5NZ_iz9kkNXLLtns,10138
|
|
@@ -46,7 +46,7 @@ bioguider/generation/__init__.py,sha256=esV02QgCsY67-HBwSHDbA5AcbKzNRIT3wDwwh6N4
|
|
|
46
46
|
bioguider/generation/change_planner.py,sha256=0N10jvkfn2J9b598FKOKPQecwmQv68yeuUvMZn81nOI,9715
|
|
47
47
|
bioguider/generation/document_renderer.py,sha256=Md8NMo0CXNIqatWOdKE-_4k02Y3T_BCLmEPLTEiYUCA,7984
|
|
48
48
|
bioguider/generation/llm_cleaner.py,sha256=qFgS5xi7bBO8HAJ9WFNzH3p9AhOsAkYjchKQHuAUWWM,2917
|
|
49
|
-
bioguider/generation/llm_content_generator.py,sha256=
|
|
49
|
+
bioguider/generation/llm_content_generator.py,sha256=DEgk4uAgZrxBTVEN3ZuhL7W-tBfXOyn2X4e9rM1Gfhc,39748
|
|
50
50
|
bioguider/generation/llm_injector.py,sha256=bVxP6Asv2em4MBOB5yFsS14AuaeT7NLKQQMcsEqXjPY,17352
|
|
51
51
|
bioguider/generation/models.py,sha256=MlJOLjPHk8xs-UGW-TGN_M9cevTuxTG4tjm1d1L15go,2699
|
|
52
52
|
bioguider/generation/output_manager.py,sha256=uwLyavND4kXOHlsXB0Berab3y8u6bhaEmQOQLl7wDAM,1963
|
|
@@ -55,8 +55,8 @@ bioguider/generation/report_loader.py,sha256=bxajeTDxod36iFsbSZhXSQjotxqP7LuAg5M
|
|
|
55
55
|
bioguider/generation/style_analyzer.py,sha256=Vn9FAK1qJBNLolLC1tz362k4UBaPl107BlvkQc8pV2I,983
|
|
56
56
|
bioguider/generation/suggestion_extractor.py,sha256=kkPOYE6FXRtYlogV0GQdBraZZJm08I6Oux5YKGUF1UU,29442
|
|
57
57
|
bioguider/generation/test_metrics.py,sha256=ACXmSZc2L_UkkmC5h2s4tG44MXW1d-hClFwPCD5_BFI,7505
|
|
58
|
-
bioguider/managers/evaluation_manager.py,sha256=
|
|
59
|
-
bioguider/managers/generation_manager.py,sha256=
|
|
58
|
+
bioguider/managers/evaluation_manager.py,sha256=7WlshfnqFkk34dDlf50qAINK5sFTaoCFE0f0vGYyRdc,5789
|
|
59
|
+
bioguider/managers/generation_manager.py,sha256=m6hGu9_1HcL1YS0PMoFfXfVgDqsps1ahcj6L9E4jtoo,38636
|
|
60
60
|
bioguider/managers/generation_test_manager.py,sha256=3mOBzQVpsLo_LpSspJcofn3CNtvgagS1DMr9Zuwkzq4,5307
|
|
61
61
|
bioguider/rag/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
62
62
|
bioguider/rag/config.py,sha256=5g4IqTzgyfZfax9Af9CTkXShgItPOt4_9TEMSekCPik,4602
|
|
@@ -64,7 +64,7 @@ bioguider/rag/data_pipeline.py,sha256=hyBmjOpP1ka_y_4X0lUwlNKEBjmPNOmETEfQf-s86Z
|
|
|
64
64
|
bioguider/rag/embedder.py,sha256=jofR8hOj3Aj2IyBQ9y6FeAc84tgq5agbIfCGyFxYpJ8,650
|
|
65
65
|
bioguider/rag/rag.py,sha256=JFPwrJlKDSyd3U3Gce_NSxI5343eNUbqPG9Fs5Pfoq0,4696
|
|
66
66
|
bioguider/settings.py,sha256=BD_iz9aYarxmWUl0XaKl4-D4oTXMhFzljsXLNn2phis,3143
|
|
67
|
-
bioguider/utils/code_structure_builder.py,sha256=
|
|
67
|
+
bioguider/utils/code_structure_builder.py,sha256=t7Ao3r50v7c0JrS74J70mCOtaEFnKvam-bsA7OSvFQo,2192
|
|
68
68
|
bioguider/utils/constants.py,sha256=NGmqEgxNDL1fe-htJbtHGcU94EVUK28YAupxGYOJO_c,9012
|
|
69
69
|
bioguider/utils/default.gitignore,sha256=XjPdyO2KV8z8iyuqluaNR_70tBQftMpyKL8HboVNyeI,1605
|
|
70
70
|
bioguider/utils/file_utils.py,sha256=DOWRluneNpGQ4uVwwX9Tp2VzmZ56wIqXKMyjcMH9Bpc,6229
|
|
@@ -72,9 +72,9 @@ bioguider/utils/gitignore_checker.py,sha256=pOYUwsS9D5014LxcZb0cj3s2CAYaD2uF_pYJ
|
|
|
72
72
|
bioguider/utils/notebook_utils.py,sha256=SfU1iLuwgbDzNN-TUh_qbnfUSgn-PI6NrK6QfmdpMqQ,4009
|
|
73
73
|
bioguider/utils/pyphen_utils.py,sha256=cdZc3qphkvMDeL5NiZ8Xou13M_uVNP7ifJ-FwxO-0BE,2680
|
|
74
74
|
bioguider/utils/python_file_handler.py,sha256=BERiE2RHxpu3gAzv26jr8ZQetkrtnMZOv9SjpQ7WIdg,2650
|
|
75
|
-
bioguider/utils/r_file_handler.py,sha256=
|
|
75
|
+
bioguider/utils/r_file_handler.py,sha256=y57Y04wjgtFWve0lPg1EOrNNOccPfnNF0z2WnlFMX74,19616
|
|
76
76
|
bioguider/utils/utils.py,sha256=h8OhCjzLpHkb3ndnjRBUOBHD7csbHdEVNXf75SRN8Zc,4413
|
|
77
|
-
bioguider-0.2.
|
|
78
|
-
bioguider-0.2.
|
|
79
|
-
bioguider-0.2.
|
|
80
|
-
bioguider-0.2.
|
|
77
|
+
bioguider-0.2.33.dist-info/LICENSE,sha256=qzkvZcKwwA5DuSuhXMOm2LcO6BdEr4V7jwFZVL2-jL4,1065
|
|
78
|
+
bioguider-0.2.33.dist-info/METADATA,sha256=Yyqyvrm_CLNHy1fgDluqbCsePUqA_mYwkzRupFTHbRU,1962
|
|
79
|
+
bioguider-0.2.33.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
|
80
|
+
bioguider-0.2.33.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|