PyPI - bioguider - Versions diffs - 0.2.30__tar.gz → 0.2.32__tar.gz - Mend

bioguider 0.2.30tar.gz → 0.2.32tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of bioguider might be problematic. Click here for more details.

Files changed (81) hide show

{bioguider-0.2.30 → bioguider-0.2.32}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: bioguider
-Version: 0.2.30
+Version: 0.2.32
 Summary: An AI-Powered package to help biomedical developers to generate clear documentation
 License: MIT
 Author: Cankun Wang

{bioguider-0.2.30 → bioguider-0.2.32}/bioguider/agents/agent_utils.py RENAMED Viewed

@@ -27,6 +27,7 @@ from bioguider.utils.utils import clean_action_input
 from ..utils.gitignore_checker import GitignoreChecker
 from ..database.summarized_file_db import SummarizedFilesDb
 from bioguider.agents.common_conversation import CommonConversation
+from bioguider.rag.config import configs
 logger = logging.getLogger(__name__)
@@ -153,7 +154,9 @@ def read_directory(
         return None
     gitignore_checker = GitignoreChecker(
         directory=dir_path,
-        gitignore_path=gitignore_path
+        gitignore_path=gitignore_path,
+        exclude_dir_patterns=configs["file_filters"]["excluded_dirs"],
+        exclude_file_patterns=configs["file_filters"]["excluded_files"],
     )
     files = gitignore_checker.check_files_and_folders(level=level)
     return files

{bioguider-0.2.30 → bioguider-0.2.32}/bioguider/agents/evaluation_readme_task.py RENAMED Viewed

@@ -28,6 +28,7 @@ from bioguider.utils.constants import (
     EvaluationREADMEResult,
 )
 from bioguider.utils.utils import increase_token_usage
+from bioguider.rag.config import configs
 logger = logging.getLogger(__name__)
@@ -638,7 +639,9 @@ class EvaluationREADMETask(EvaluationTask):
         repo_path = self.repo_path
         gitignore_path = Path(repo_path, ".gitignore")
         gitignore_checker = GitignoreChecker(
-            directory=repo_path, gitignore_path=gitignore_path
+            directory=repo_path, gitignore_path=gitignore_path,
+            exclude_dir_patterns=configs["file_filters"]["excluded_dirs"],
+            exclude_file_patterns=configs["file_filters"]["excluded_files"],
         )
         found_readme_files = gitignore_checker.check_files_and_folders(
             check_file_cb=lambda root_dir, relative_path: Path(relative_path).name.lower() in possible_readme_files,

{bioguider-0.2.30 → bioguider-0.2.32}/bioguider/generation/change_planner.py RENAMED Viewed

@@ -162,128 +162,15 @@ class ChangePlanner:
                         rationale=s.source.get("evidence", ""),
                         suggestion_id=s.id,
                     ))
-                elif s.action == "improve_readability":
-                    # Handle readability improvements
-                    header_key = (target, (s.anchor_hint or "Introduction").strip().lower())
-                    if header_key in seen_headers:
-                        continue
-                    planned.append(PlannedEdit(
-                        file_path=target,
-                        edit_type="append_section",
-                        anchor={"type": "header", "value": s.anchor_hint or "Introduction"},
-                        content_template="",  # Will be filled by LLM generation
-                        rationale=s.source.get("evidence", ""),
-                        suggestion_id=s.id,
-                    ))
-                    seen_headers.add(header_key)
-                elif s.action == "improve_setup":
-                    # Handle setup improvements
-                    planned.append(PlannedEdit(
-                        file_path=target,
-                        edit_type="append_section",
-                        anchor={"type": "header", "value": s.anchor_hint or "Setup"},
-                        content_template="",  # Will be filled by LLM generation
-                        rationale=s.source.get("evidence", ""),
-                        suggestion_id=s.id,
-                    ))
-                elif s.action == "improve_reproducibility":
-                    # Handle reproducibility improvements
-                    planned.append(PlannedEdit(
-                        file_path=target,
-                        edit_type="append_section",
-                        anchor={"type": "header", "value": s.anchor_hint or "Setup"},
-                        content_template="",  # Will be filled by LLM generation
-                        rationale=s.source.get("evidence", ""),
-                        suggestion_id=s.id,
-                    ))
-                elif s.action == "improve_structure":
-                    # Handle structure improvements
-                    planned.append(PlannedEdit(
-                        file_path=target,
-                        edit_type="append_section",
-                        anchor={"type": "header", "value": s.anchor_hint or "Introduction"},
-                        content_template="",  # Will be filled by LLM generation
-                        rationale=s.source.get("evidence", ""),
-                        suggestion_id=s.id,
-                    ))
-                elif s.action == "improve_code_quality":
-                    # Handle code quality improvements
-                    planned.append(PlannedEdit(
-                        file_path=target,
-                        edit_type="append_section",
-                        anchor={"type": "header", "value": s.anchor_hint or "Code Examples"},
-                        content_template="",  # Will be filled by LLM generation
-                        rationale=s.source.get("evidence", ""),
-                        suggestion_id=s.id,
-                    ))
-                elif s.action == "improve_verification":
-                    # Handle verification improvements
-                    planned.append(PlannedEdit(
-                        file_path=target,
-                        edit_type="append_section",
-                        anchor={"type": "header", "value": s.anchor_hint or "Results"},
-                        content_template="",  # Will be filled by LLM generation
-                        rationale=s.source.get("evidence", ""),
-                        suggestion_id=s.id,
-                    ))
-                elif s.action == "improve_performance":
-                    # Handle performance improvements
-                    planned.append(PlannedEdit(
-                        file_path=target,
-                        edit_type="append_section",
-                        anchor={"type": "header", "value": s.anchor_hint or "Performance"},
-                        content_template="",  # Will be filled by LLM generation
-                        rationale=s.source.get("evidence", ""),
-                        suggestion_id=s.id,
-                    ))
-                elif s.action == "improve_context":
-                    # Handle context improvements for userguides
-                    header_key = (target, (s.anchor_hint or "Introduction").strip().lower())
-                    if header_key in seen_headers:
-                        continue
-                    planned.append(PlannedEdit(
-                        file_path=target,
-                        edit_type="append_section",
-                        anchor={"type": "header", "value": s.anchor_hint or "Introduction"},
-                        content_template="",  # Will be filled by LLM generation
-                        rationale=s.source.get("evidence", ""),
-                        suggestion_id=s.id,
-                    ))
-                    seen_headers.add(header_key)
-                elif s.action == "improve_error_handling":
-                    # Handle error handling improvements for userguides
-                    header_key = (target, (s.anchor_hint or "Examples").strip().lower())
-                    if header_key in seen_headers:
-                        continue
-                    planned.append(PlannedEdit(
-                        file_path=target,
-                        edit_type="append_section",
-                        anchor={"type": "header", "value": s.anchor_hint or "Examples"},
-                        content_template="",  # Will be filled by LLM generation
-                        rationale=s.source.get("evidence", ""),
-                        suggestion_id=s.id,
-                    ))
-                    seen_headers.add(header_key)
-                elif s.action == "add_overview_section":
-                    # Handle overview section for README
-                    planned.append(PlannedEdit(
-                        file_path=target,
-                        edit_type="append_section",
-                        anchor={"type": "header", "value": s.anchor_hint or "Overview"},
-                        content_template="",  # Will be filled by LLM generation
-                        rationale=s.source.get("evidence", ""),
-                        suggestion_id=s.id,
-                    ))
-                elif s.action == "full_replace":
-                    # Handle full document replacement
-                    planned.append(PlannedEdit(
-                        file_path=target,
-                        edit_type="full_replace",
-                        anchor={"type": "document", "value": "full_document"},
-                        content_template="",  # Will be filled by LLM generation
-                        rationale=s.source.get("evidence", ""),
-                        suggestion_id=s.id,
-                    ))
+                # All actions now use full_replace mode
+                planned.append(PlannedEdit(
+                    file_path=target,
+                    edit_type="full_replace",
+                    anchor={"type": "document", "value": "full_document"},
+                    content_template="",  # Will be filled by LLM generation
+                    rationale=s.source.get("evidence", ""),
+                    suggestion_id=s.id,
+                ))
         # If a file is planned for full_replace, suppress other edits for that file to avoid redundancy
         by_file: Dict[str, List[PlannedEdit]] = {}

bioguider-0.2.32/bioguider/generation/document_renderer.py ADDED Viewed

@@ -0,0 +1,157 @@
+from __future__ import annotations
+from typing import Tuple
+from .models import PlannedEdit
+class DocumentRenderer:
+    def apply_edit(self, original: str, edit: PlannedEdit) -> Tuple[str, dict]:
+        content = original
+        added = 0
+        if edit.edit_type == "append_section":
+            # Avoid duplicate header if the same header already exists
+            header_line = None
+            if edit.content_template.lstrip().startswith("#"):
+                header_line = edit.content_template.strip().splitlines()[0].strip()
+            if header_line and header_line in content:
+                return content, {"added_lines": 0}
+            # Append with two leading newlines if needed
+            sep = "\n\n" if not content.endswith("\n\n") else ""
+            content = f"{content}{sep}{edit.content_template}"
+            added = len(edit.content_template.splitlines())
+        elif edit.edit_type == "replace_intro_block":
+            # Replace content from start to first level-2 header (##) with new intro
+            lines = content.splitlines()
+            end_idx = None
+            for i, ln in enumerate(lines):
+                if ln.strip().startswith("## "):
+                    end_idx = i
+                    break
+            if end_idx is None:
+                # No H2 header found; replace entire content
+                new_content = edit.content_template
+            else:
+                head = lines[:0]
+                tail = lines[end_idx:]
+                new_content = edit.content_template.rstrip() + "\n\n" + "\n".join(tail)
+            added = len(edit.content_template.splitlines())
+            content = new_content
+        elif edit.edit_type == "insert_after_header":
+            # Insert content after a specific header, but integrate naturally
+            header_value = edit.anchor.get("value", "")
+            if header_value:
+                lines = content.splitlines()
+                insert_idx = None
+                for i, line in enumerate(lines):
+                    if line.strip().startswith("#") and header_value.lower() in line.lower():
+                        # Find a good insertion point after the header and its immediate content
+                        insert_idx = i + 1
+                        # Skip empty lines and find the first substantial content
+                        while insert_idx < len(lines) and lines[insert_idx].strip() == "":
+                            insert_idx += 1
+                        # Insert after the first code block or paragraph, but before next major section
+                        while insert_idx < len(lines):
+                            line_content = lines[insert_idx].strip()
+                            if line_content.startswith("#") and not line_content.startswith("###"):
+                                break
+                            if line_content.startswith("```") and insert_idx > 0:
+                                # Found end of code block, insert after it
+                                insert_idx += 1
+                                break
+                            insert_idx += 1
+                        break
+                if insert_idx is not None:
+                    # Insert the new content with minimal formatting
+                    new_content_lines = edit.content_template.splitlines()
+                    # Remove standalone headers to avoid creating new major sections
+                    filtered_lines = []
+                    for line in new_content_lines:
+                        if line.strip().startswith("## ") and len(line.strip()) < 50:
+                            # Convert major headers to minor explanations
+                            header_text = line.strip()[3:].strip()
+                            filtered_lines.append(f"\n**Note:** {header_text.lower()}")
+                        else:
+                            filtered_lines.append(line)
+                    # Insert with minimal spacing
+                    new_lines = lines[:insert_idx] + [""] + filtered_lines + lines[insert_idx:]
+                    content = "\n".join(new_lines)
+                    added = len(filtered_lines)
+                else:
+                    # Header not found, append at end
+                    sep = "\n\n" if not content.endswith("\n\n") else ""
+                    content = f"{content}{sep}{edit.content_template}"
+                    added = len(edit.content_template.splitlines())
+            else:
+                # No header specified, append at end
+                sep = "\n\n" if not content.endswith("\n\n") else ""
+                content = f"{content}{sep}{edit.content_template}"
+                added = len(edit.content_template.splitlines())
+        elif edit.edit_type == "rmarkdown_integration":
+            # Special handling for RMarkdown files - integrate content naturally
+            header_value = edit.anchor.get("value", "")
+            if header_value:
+                lines = content.splitlines()
+                insert_idx = None
+                for i, line in enumerate(lines):
+                    if line.strip().startswith("#") and header_value.lower() in line.lower():
+                        # Find insertion point after the first code block in this section
+                        insert_idx = i + 1
+                        while insert_idx < len(lines):
+                            line_content = lines[insert_idx].strip()
+                            if line_content.startswith("```") and insert_idx > 0:
+                                # Found code block, insert after it
+                                insert_idx += 1
+                                break
+                            if line_content.startswith("#") and not line_content.startswith("###"):
+                                # Next major section, insert before it
+                                break
+                            insert_idx += 1
+                        break
+                if insert_idx is not None:
+                    # Process content to be more contextual
+                    new_content_lines = edit.content_template.splitlines()
+                    contextual_lines = []
+                    for line in new_content_lines:
+                        # Convert standalone sections to contextual notes
+                        if line.strip().startswith("## "):
+                            header_text = line.strip()[3:].strip()
+                            contextual_lines.append(f"\n**Note:** For this tutorial, {header_text.lower()}")
+                        elif line.strip().startswith("# "):
+                            header_text = line.strip()[2:].strip()
+                            contextual_lines.append(f"\n**Important:** {header_text.lower()}")
+                        else:
+                            contextual_lines.append(line)
+                    # Insert with minimal disruption
+                    new_lines = lines[:insert_idx] + [""] + contextual_lines + lines[insert_idx:]
+                    content = "\n".join(new_lines)
+                    added = len(contextual_lines)
+                else:
+                    # Fallback to append
+                    sep = "\n\n" if not content.endswith("\n\n") else ""
+                    content = f"{content}{sep}{edit.content_template}"
+                    added = len(edit.content_template.splitlines())
+            else:
+                sep = "\n\n" if not content.endswith("\n\n") else ""
+                content = f"{content}{sep}{edit.content_template}"
+                added = len(edit.content_template.splitlines())
+        elif edit.edit_type == "full_replace":
+            # Replace entire document content
+            content = edit.content_template
+            added = len(edit.content_template.splitlines())
+        # Other edit types (replace_block) can be added as needed
+        return content, {"added_lines": added}

{bioguider-0.2.30 → bioguider-0.2.32}/bioguider/generation/llm_cleaner.py RENAMED Viewed

@@ -26,6 +26,22 @@ CRITICAL REQUIREMENTS:
 - Do NOT modify R code chunks (```{r} blocks) in RMarkdown files
 - Do NOT change the overall structure or organization of the document
+ABSOLUTELY FORBIDDEN - REMOVE THESE COMPLETELY:
+- Any summary sections, concluding statements, or notes at the end of documents
+- Phrases like "Happy analyzing!", "Ensure all dependencies are up-to-date", "This concludes", "For more information"
+- Any text that appears to be AI-generated summaries or conclusions
+- Sentences starting with "Note:", "Remember:", "Important:", "Tip:", "Warning:" at the end
+- Any text after the last substantive content section
+- Phrases like "Happy coding!", "Good luck!", "Enjoy!", "Have fun!"
+- Any concluding remarks, final thoughts, or wrap-up statements
+- Text that sounds like AI-generated advice or encouragement
+DOCUMENT ENDING RULES:
+- The document must end naturally with the last substantive content section
+- Do NOT add any concluding statements, summaries, or notes
+- If the original document had a natural ending, preserve it exactly
+- If AI-added content appears at the end, remove it completely
 INPUT
 <<DOCUMENT>>
 {doc}

{bioguider-0.2.30 → bioguider-0.2.32}/bioguider/generation/llm_content_generator.py RENAMED Viewed

@@ -28,6 +28,9 @@ CRITICAL REQUIREMENTS
 - If guidance mentions specific packages, requirements, or details, include them exactly
 - For RMarkdown files (.Rmd), preserve the original structure including YAML frontmatter, code chunks, and existing headers
 - NEVER generate generic placeholder content like "Clear 2–3 sentence summary" or "brief description"
+- ABSOLUTELY FORBIDDEN: Do NOT add summary sections, notes, conclusions, or any text at the end of documents
+- ABSOLUTELY FORBIDDEN: Do NOT wrap content in markdown code fences (```markdown). Return pure content only.
+- ABSOLUTELY FORBIDDEN: Do NOT add phrases like "Happy analyzing!", "Ensure all dependencies are up-to-date", or any concluding statements
 - ALWAYS use the specific guidance provided above to create concrete, actionable content
 STYLE & CONSTRAINTS
@@ -51,18 +54,27 @@ SECTION GUIDELINES (follow guidance exactly)
 - Install (clarify dependencies): Include compatibility details across operating systems and architectures as mentioned in guidance
 - Tutorial improvements: Add specific examples, error handling, and reproducibility notes as mentioned in guidance
 - User guide improvements: Enhance clarity, add missing information, and improve error handling as mentioned in guidance
+- Conservative injection: For tutorial files (.Rmd), make minimal, targeted additions that preserve the original structure and flow. Add brief notes, small subsections, or contextual comments that enhance existing content without disrupting the tutorial's narrative.
+- RMarkdown integration: When inserting content into existing RMarkdown tutorials, integrate naturally into the flow rather than creating standalone sections. Add brief explanatory text, code comments, or small subsections that enhance the existing content.
+- RMarkdown format compliance: For .Rmd files, ensure content follows RMarkdown conventions:
+  * Use proper R code chunks with ```{{r chunk_name}} and ``` when adding code examples
+  * Maintain the tutorial's existing tone and context - content should feel like a natural continuation
+  * Avoid creating new major sections unless absolutely necessary
+  * Use inline R code with `{{r code_here}}` when appropriate
+  * Keep explanations concise and contextual to the tutorial's purpose
+- Context awareness: Content should feel like a natural part of the existing tutorial, not a standalone addition. Reference the tutorial's specific context, datasets, and examples.
 - If the section does not fit the above, produce content that directly addresses the guidance provided.
 OUTPUT FORMAT
 - Return only the section markdown (no code fences).
-- Start with a level-2 header: "## {anchor_title}" unless the content already starts with a header.
-- Ensure the content directly addresses: {guidance}
+- Start with a level-2 header: "## {{anchor_title}}" unless the content already starts with a header.
+- Ensure the content directly addresses: {{guidance}}
 - DO NOT include generic instructions or placeholder text
 - ONLY generate content that fulfills the specific guidance provided
 """
 LLM_FULLDOC_PROMPT = """
-You are “BioGuider,” a documentation rewriter.
+You are "BioGuider," a documentation rewriter.
 GOAL
 Rewrite a complete target document using only the provided evaluation report signals and the repository context excerpts. Output a full, ready-to-publish markdown file that is more complete and directly usable.
@@ -76,17 +88,52 @@ STRICT CONSTRAINTS
 - Base the content solely on the evaluation report. Do not invent features, data, or claims not supported by it.
 - Prefer completeness and usability: produce the full file content, not just minimal "added" snippets.
 - Preserve top-of-file badges/logos if they exist in the original; keep title and header area intact unless the report requires changes.
+- CRITICAL: Preserve the original document structure, sections, and flow. Only enhance existing content and add missing information.
+- For tutorial files (.Rmd), maintain all original sections (Docker, installation methods, etc.) while improving clarity and adding missing details.
 - Fix obvious errors; improve structure and readability per report suggestions.
 - Include ONLY sections specifically requested by the evaluation report - do not add unnecessary sections.
 - Avoid redundancy: do not duplicate information across multiple sections.
+- ABSOLUTELY FORBIDDEN: Do NOT add summary sections, notes, conclusions, or any text at the end of documents
+- ABSOLUTELY FORBIDDEN: Do NOT wrap the entire document inside markdown code fences (```markdown). Do NOT start with ```markdown or end with ```. Return pure markdown content suitable for copy/paste.
+- ABSOLUTELY FORBIDDEN: Do NOT add phrases like "Happy analyzing!" or any concluding statements
 - Keep links well-formed; keep neutral, professional tone; concise, skimmable formatting.
-- CRITICAL: Do NOT wrap the entire document inside markdown code fences (```markdown). Do NOT start with ```markdown or end with ```. Return pure markdown content suitable for copy/paste.
 - For RMarkdown files (.Rmd), preserve YAML frontmatter exactly and do not wrap content in code fences.
 OUTPUT
 - Return only the full markdown content for {target_file}. No commentary, no fences.
 """
+LLM_README_COMPREHENSIVE_PROMPT = """
+You are "BioGuider," a comprehensive documentation rewriter specializing in README files.
+GOAL
+Create a complete, professional README.md that addresses all evaluation suggestions comprehensively. This is the main project documentation that users will see first.
+INPUTS (authoritative)
+- evaluation_report (structured JSON excerpts): <<{evaluation_report}>>
+- target_file: {target_file}
+- repo_context_excerpt (do not copy blindly; use only to keep style/tone): <<{context}>>
+COMPREHENSIVE README REQUIREMENTS
+- Create a complete README with all essential sections: Overview, Installation, Usage, Examples, Contributing, License
+- Address ALL evaluation suggestions thoroughly and comprehensively
+- Include detailed dependency information with installation commands
+- Provide clear system requirements and compatibility information
+- Add practical usage examples and code snippets
+- Include troubleshooting section if needed
+- Make it copy-paste ready for users
+- Use professional, clear language suitable for biomedical researchers
+STRICT CONSTRAINTS
+- Base the content solely on the evaluation report. Do not invent features, data, or claims not supported by it.
+- ABSOLUTELY FORBIDDEN: Do NOT wrap the entire document inside markdown code fences (```markdown). Return pure markdown content.
+- ABSOLUTELY FORBIDDEN: Do NOT add summary sections, notes, conclusions, or any text at the end of documents
+- Keep links well-formed; use neutral, professional tone; concise, skimmable formatting.
+OUTPUT
+- Return only the full README.md content. No commentary, no fences.
+"""
 class LLMContentGenerator:
     def __init__(self, llm: BaseChatOpenAI):
@@ -112,11 +159,21 @@ class LLMContentGenerator:
     def generate_full_document(self, target_file: str, evaluation_report: dict, context: str = "") -> tuple[str, dict]:
         conv = CommonConversation(self.llm)
-        system_prompt = LLM_FULLDOC_PROMPT.format(
-            target_file=target_file,
-            evaluation_report=json.dumps(evaluation_report)[:6000],
-            context=context[:4000],
-        )
+        # Use comprehensive README prompt for README.md files
+        if target_file.endswith("README.md"):
+            system_prompt = LLM_README_COMPREHENSIVE_PROMPT.format(
+                target_file=target_file,
+                evaluation_report=json.dumps(evaluation_report)[:6000],
+                context=context[:4000],
+            )
+        else:
+            system_prompt = LLM_FULLDOC_PROMPT.format(
+                target_file=target_file,
+                evaluation_report=json.dumps(evaluation_report)[:6000],
+                context=context[:4000],
+            )
         content, token_usage = conv.generate(system_prompt=system_prompt, instruction_prompt="Write the full document now.")
         return content.strip(), token_usage

{bioguider-0.2.30 → bioguider-0.2.32}/bioguider/generation/suggestion_extractor.py RENAMED Viewed

@@ -53,7 +53,7 @@ class SuggestionExtractor:
                             severity="should_fix",
                             source={"section": "readme", "field": "project_purpose_suggestions", "evidence": purpose_suggestions, "score": purpose_score},
                             target_files=[file_name],
-                            action="add_overview_section",
+                            action="full_replace",
                             anchor_hint="Overview",
                             content_guidance=purpose_suggestions,
                         ))
@@ -68,7 +68,7 @@ class SuggestionExtractor:
                             severity="should_fix",
                             source={"section": "readme", "field": "readability_suggestions", "evidence": readability_suggestions, "score": readability_score},
                             target_files=[file_name],
-                            action="improve_readability",
+                            action="full_replace",
                             anchor_hint="Introduction",
                             content_guidance=readability_suggestions,
                         ))
@@ -195,12 +195,14 @@ class SuggestionExtractor:
             if isinstance(report.installation_evaluation, dict):
                 structured = report.installation_evaluation.get("structured_evaluation")
             if structured:
-                # If installation has deficits, full replace installation docs listed in installation_files
-                overall = structured.get("overall_score")
-                hw = structured.get("hardware_requirements")
-                compat = structured.get("compatible_os")
+                # Use full_replace mode for all installation files
                 dep_sugg = structured.get("dependency_suggestions")
-                if overall in ("Poor", "Fair") or hw is False or compat is False or dep_sugg:
+                hw_req = structured.get("hardware_requirements")
+                compat_os = structured.get("compatible_os")
+                overall = structured.get("overall_score")
+                # Trigger full_replace for all installation files when needed
+                if overall in ("Poor", "Fair") or hw_req is False or compat_os is False or dep_sugg:
                     for target in report.installation_files or []:
                         suggestions.append(SuggestionItem(
                             id=f"install-full-replace-{target}",
@@ -210,33 +212,7 @@ class SuggestionExtractor:
                             target_files=[target],
                             action="full_replace",
                             anchor_hint=None,
-                            content_guidance="Rewrite installation doc based on evaluation (dependencies, OS, hardware).",
-                        ))
-                dep_sugg = structured.get("dependency_suggestions")
-                if dep_sugg:  # Prioritize specific suggestions
-                    for target in report.installation_files or []:
-                        suggestions.append(SuggestionItem(
-                            id=f"install-dep-clarify-{target}",
-                            category="installation.dependencies",
-                            severity="should_fix",
-                            source={"section": "installation", "field": "dependency_suggestions", "evidence": str(dep_sugg)},
-                            target_files=[target],
-                            action="clarify_mandatory_vs_optional",
-                            anchor_hint="Dependencies",
-                            content_guidance=str(dep_sugg),
-                        ))
-                hw_score = structured.get("hardware_requirements")
-                if hw_score is False:
-                    for target in report.installation_files or []:
-                        suggestions.append(SuggestionItem(
-                            id=f"install-hw-req-{target}",
-                            category="installation.hardware",
-                            severity="should_fix",
-                            source={"section": "installation", "field": "hardware_requirements", "score": "Poor", "evidence": "Hardware requirements not specified"},
-                            target_files=[target],
-                            action="add_hardware_requirements",
-                            anchor_hint="Hardware Requirements",
-                            content_guidance="Add concise RAM/CPU recommendation as per report guidance.",
+                            content_guidance="Comprehensive rewrite preserving original structure while adding improved dependencies, hardware requirements, and installation instructions.",
                         ))
         # Submission requirements could drive expected output/dataset sections; use only if in files list
@@ -261,7 +237,7 @@ class SuggestionExtractor:
                                     severity="should_fix",
                                     source={"section": "userguide", "field": "readability_suggestions", "evidence": suggestion, "score": readability_score},
                                     target_files=[file_name],
-                                    action="improve_readability",
+                                    action="full_replace",
                                     anchor_hint=f"Readability-{i+1}",
                                     content_guidance=suggestion,
                                 ))
@@ -278,7 +254,7 @@ class SuggestionExtractor:
                                     severity="should_fix",
                                     source={"section": "userguide", "field": "context_and_purpose_suggestions", "evidence": suggestion, "score": context_score},
                                     target_files=[file_name],
-                                    action="improve_context",
+                                    action="full_replace",
                                     anchor_hint=f"Context-{i+1}",
                                     content_guidance=suggestion,
                                 ))
@@ -295,7 +271,7 @@ class SuggestionExtractor:
                                     severity="should_fix",
                                     source={"section": "userguide", "field": "error_handling_suggestions", "evidence": suggestion, "score": error_score},
                                     target_files=[file_name],
-                                    action="improve_error_handling",
+                                    action="full_replace",
                                     anchor_hint=f"Error-Handling-{i+1}",
                                     content_guidance=suggestion,
                                 ))
@@ -310,7 +286,7 @@ class SuggestionExtractor:
                             severity="should_fix",
                             source={"section": "userguide", "field": "consistency", "evidence": f"score={score}"},
                             target_files=[file_name],
-                            action="improve_consistency",
+                            action="full_replace",
                             anchor_hint="Examples",
                             content_guidance="Improve consistency in examples, terminology, and formatting based on evaluation report.",
                         ))
@@ -335,7 +311,7 @@ class SuggestionExtractor:
                                     severity="should_fix",
                                     source={"section": "tutorial", "field": "readability_suggestions", "evidence": suggestion, "score": readability_score},
                                     target_files=[file_name],
-                                    action="improve_readability",
+                                    action="full_replace",
                                     anchor_hint="Introduction",
                                     content_guidance=suggestion,
                                 ))
@@ -352,7 +328,7 @@ class SuggestionExtractor:
                                     severity="should_fix",
                                     source={"section": "tutorial", "field": "setup_and_dependencies_suggestions", "evidence": suggestion, "score": setup_score},
                                     target_files=[file_name],
-                                    action="improve_setup",
+                                    action="full_replace",
                                     anchor_hint="Setup",
                                     content_guidance=suggestion,
                                 ))
@@ -369,7 +345,7 @@ class SuggestionExtractor:
                                     severity="should_fix",
                                     source={"section": "tutorial", "field": "reproducibility_suggestions", "evidence": suggestion, "score": reproducibility_score},
                                     target_files=[file_name],
-                                    action="improve_reproducibility",
+                                    action="full_replace",
                                     anchor_hint="Setup",
                                     content_guidance=suggestion,
                                 ))
@@ -386,7 +362,7 @@ class SuggestionExtractor:
                                     severity="should_fix",
                                     source={"section": "tutorial", "field": "structure_and_navigation_suggestions", "evidence": suggestion, "score": structure_score},
                                     target_files=[file_name],
-                                    action="improve_structure",
+                                    action="full_replace",
                                     anchor_hint="Introduction",
                                     content_guidance=suggestion,
                                 ))
@@ -403,7 +379,7 @@ class SuggestionExtractor:
                                     severity="should_fix",
                                     source={"section": "tutorial", "field": "executable_code_quality_suggestions", "evidence": suggestion, "score": code_score},
                                     target_files=[file_name],
-                                    action="improve_code_quality",
+                                    action="full_replace",
                                     anchor_hint="Code Examples",
                                     content_guidance=suggestion,
                                 ))
@@ -420,7 +396,7 @@ class SuggestionExtractor:
                                     severity="should_fix",
                                     source={"section": "tutorial", "field": "result_verification_suggestions", "evidence": suggestion, "score": verification_score},
                                     target_files=[file_name],
-                                    action="improve_verification",
+                                    action="full_replace",
                                     anchor_hint="Results",
                                     content_guidance=suggestion,
                                 ))
@@ -437,7 +413,7 @@ class SuggestionExtractor:
                                     severity="should_fix",
                                     source={"section": "tutorial", "field": "performance_and_resource_notes_suggestions", "evidence": suggestion, "score": performance_score},
                                     target_files=[file_name],
-                                    action="improve_performance",
+                                    action="full_replace",
                                     anchor_hint="Performance",
                                     content_guidance=suggestion,
                                 ))

{bioguider-0.2.30 → bioguider-0.2.32}/bioguider/managers/evaluation_manager.py RENAMED Viewed

@@ -3,10 +3,8 @@ from pathlib import Path
 from bioguider.agents.evaluation_tutorial_task import EvaluationTutorialTask
 from bioguider.agents.evaluation_userguide_task import EvaluationUserGuideTask
-from bioguider.agents.prompt_utils import CollectionGoalItemEnum
 from bioguider.database.code_structure_db import CodeStructureDb
 from bioguider.utils.constants import ProjectMetadata
-from bioguider.utils.gitignore_checker import GitignoreChecker
 from ..agents.identification_task import IdentificationTask
 from ..rag.rag import RAG

{bioguider-0.2.30 → bioguider-0.2.32}/bioguider/managers/generation_manager.py RENAMED Viewed

@@ -186,13 +186,7 @@ class DocumentationGenerationManager:
                         if isinstance(cleaned, str) and cleaned.strip():
                             content = cleaned
-                        # Additional post-processing: remove markdown code fences if present
-                        if content.startswith("```markdown") and content.endswith("```"):
-                            # Remove the opening and closing fences
-                            content = content[11:]  # Remove ```markdown
-                            if content.endswith("```"):
-                                content = content[:-3]  # Remove closing ```
-                            content = content.strip()
+                        # LLM cleaner now handles markdown fences and unwanted summaries
                 except Exception:
                     pass
@@ -312,7 +306,7 @@ class DocumentationGenerationManager:
         # Calculate success rate based on processed suggestions only
         processed_suggestions_count = len([s for s in suggestions if s.source and s.source.get("score", "") in ("Fair", "Poor")])
-        fixed_suggestions = len(processed_suggestions)
+        fixed_suggestions = len([s for s in processed_suggestions if s in [sug.id for sug in suggestions if sug.source and sug.source.get("score", "") in ("Fair", "Poor")]])
         # Add professional summary and key metrics
         lines.append(f"\n## Summary\n")
@@ -383,27 +377,69 @@ class DocumentationGenerationManager:
                 section = e.anchor.get('value', 'General improvements')
                 # Convert technical action names to user-friendly descriptions
-                action_desc = {
-                    'append_section': f'Added "{section}" section',
-                    'replace_intro_block': f'Improved "{section}" section',
-                    'full_replace': 'Comprehensive rewrite',
-                    'add_dependencies_section': 'Added dependencies information',
-                    'add_system_requirements_section': 'Added system requirements',
-                    'add_hardware_requirements': 'Added hardware requirements',
-                    'clarify_mandatory_vs_optional': 'Clarified dependencies',
-                    'improve_readability': f'Improved readability in "{section}"',
-                    'improve_setup': f'Enhanced setup instructions in "{section}"',
-                    'improve_reproducibility': f'Improved reproducibility in "{section}"',
-                    'improve_structure': f'Enhanced structure in "{section}"',
-                    'improve_code_quality': f'Improved code quality in "{section}"',
-                    'improve_verification': f'Enhanced result verification in "{section}"',
-                    'improve_performance': f'Added performance notes in "{section}"',
-                    'improve_clarity_and_error_handling': f'Improved clarity and error handling in "{section}"',
-                    'improve_consistency': f'Improved consistency in "{section}"',
-                    'improve_context': f'Enhanced context in "{section}"',
-                    'improve_error_handling': f'Improved error handling in "{section}"',
-                    'add_overview_section': f'Added "{section}" section'
-                }.get(e.edit_type, f'Improved {e.edit_type}')
+                # Use the suggestion action if available, otherwise fall back to edit type
+                action_key = sug.action if sug else e.edit_type
+                # Generate category-based description for full_replace actions
+                if action_key == 'full_replace' and sug:
+                    category = sug.category or ""
+                    category_display = category.split('.')[-1].replace('_', ' ').title() if category else ""
+                    # Create specific descriptions based on category
+                    if 'readme' in category.lower():
+                        action_desc = 'Enhanced README documentation'
+                    elif 'tutorial' in category.lower():
+                        action_desc = 'Improved tutorial content'
+                    elif 'userguide' in category.lower():
+                        action_desc = 'Enhanced user guide documentation'
+                    elif 'installation' in category.lower():
+                        action_desc = 'Improved installation instructions'
+                    elif 'dependencies' in category.lower():
+                        action_desc = 'Enhanced dependency information'
+                    elif 'readability' in category.lower():
+                        action_desc = 'Improved readability and clarity'
+                    elif 'setup' in category.lower():
+                        action_desc = 'Enhanced setup and configuration'
+                    elif 'reproducibility' in category.lower():
+                        action_desc = 'Improved reproducibility'
+                    elif 'structure' in category.lower():
+                        action_desc = 'Enhanced document structure'
+                    elif 'code_quality' in category.lower():
+                        action_desc = 'Improved code quality'
+                    elif 'verification' in category.lower():
+                        action_desc = 'Enhanced result verification'
+                    elif 'performance' in category.lower():
+                        action_desc = 'Added performance considerations'
+                    elif 'context' in category.lower():
+                        action_desc = 'Enhanced context and purpose'
+                    elif 'error_handling' in category.lower():
+                        action_desc = 'Improved error handling'
+                    else:
+                        action_desc = f'Enhanced {category_display}' if category_display else 'Comprehensive rewrite'
+                else:
+                    # Use existing action descriptions for non-full_replace actions
+                    action_desc = {
+                        'append_section': f'Added "{section}" section',
+                        'insert_after_header': f'Enhanced content in "{section}"',
+                        'rmarkdown_integration': f'Integrated improvements in "{section}"',
+                        'replace_intro_block': f'Improved "{section}" section',
+                        'add_dependencies_section': 'Added dependencies information',
+                        'add_system_requirements_section': 'Added system requirements',
+                        'add_hardware_requirements': 'Added hardware requirements',
+                        'clarify_mandatory_vs_optional': 'Clarified dependencies',
+                        'improve_readability': f'Improved readability in "{section}"',
+                        'improve_setup': f'Enhanced setup instructions in "{section}"',
+                        'improve_reproducibility': f'Improved reproducibility in "{section}"',
+                        'improve_structure': f'Enhanced structure in "{section}"',
+                        'improve_code_quality': f'Improved code quality in "{section}"',
+                        'improve_verification': f'Enhanced result verification in "{section}"',
+                        'improve_performance': f'Added performance notes in "{section}"',
+                        'improve_clarity_and_error_handling': f'Improved clarity and error handling in "{section}"',
+                        'improve_consistency': f'Improved consistency in "{section}"',
+                        'improve_context': f'Enhanced context in "{section}"',
+                        'improve_error_handling': f'Improved error handling in "{section}"',
+                        'add_overview_section': f'Added "{section}" section'
+                    }.get(action_key, f'Improved {action_key}')
                 lines.append(f"- **{action_desc}**")

{bioguider-0.2.30 → bioguider-0.2.32}/bioguider/utils/code_structure_builder.py RENAMED Viewed

@@ -6,6 +6,7 @@ from bioguider.utils.r_file_handler import RFileHandler
 from .gitignore_checker import GitignoreChecker
 from .python_file_handler import PythonFileHandler
 from ..database.code_structure_db import CodeStructureDb
+from ..rag.config import configs
 logger = logging.getLogger(__name__)
@@ -17,7 +18,12 @@ class CodeStructureBuilder:
         code_structure_db: CodeStructureDb,
     ):
         self.repo_path = str(repo_path)
-        self.gitignore_checker = GitignoreChecker(repo_path, str(gitignore_path))
+        self.gitignore_checker = GitignoreChecker(
+            directory=repo_path,
+            gitignore_path=str(gitignore_path),
+            exclude_dir_patterns=configs["file_filters"]["excluded_dirs"],
+            exclude_file_patterns=configs["file_filters"]["excluded_files"],
+        )
         self.file_handler = PythonFileHandler(repo_path)
         self.code_structure_db = code_structure_db

{bioguider-0.2.30 → bioguider-0.2.32}/bioguider/utils/r_file_handler.py RENAMED Viewed

@@ -348,10 +348,12 @@ class RFileHandler:
             s = line.lstrip()
             if s.startswith("#'"):
                 buf.append(s[2:].lstrip())
-                line_idx -= 1
-                continue
-            # stop at first non-roxygen line (don’t cross blank + NULL padding blocks)
-            break
+            elif s.strip() == "":
+                pass
+            else:
+                # stop at first non-roxygen line (don’t cross blank + NULL padding blocks)
+                break
+            line_idx -= 1
         if not buf:
             return None
         buf.reverse()

{bioguider-0.2.30 → bioguider-0.2.32}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "bioguider"
-version = "0.2.30"
+version = "0.2.32"
 description = "An AI-Powered package to help biomedical developers to generate clear documentation"
 authors = [
     "Cankun Wang <Cankun.Wang@osumc.edu>",

bioguider-0.2.30/bioguider/generation/document_renderer.py DELETED Viewed

@@ -1,52 +0,0 @@
-from __future__ import annotations
-from typing import Tuple
-from .models import PlannedEdit
-class DocumentRenderer:
-    def apply_edit(self, original: str, edit: PlannedEdit) -> Tuple[str, dict]:
-        content = original
-        added = 0
-        if edit.edit_type == "append_section":
-            # Avoid duplicate header if the same header already exists
-            header_line = None
-            if edit.content_template.lstrip().startswith("#"):
-                header_line = edit.content_template.strip().splitlines()[0].strip()
-            if header_line and header_line in content:
-                return content, {"added_lines": 0}
-            # Append with two leading newlines if needed
-            sep = "\n\n" if not content.endswith("\n\n") else ""
-            content = f"{content}{sep}{edit.content_template}"
-            added = len(edit.content_template.splitlines())
-        elif edit.edit_type == "replace_intro_block":
-            # Replace content from start to first level-2 header (##) with new intro
-            lines = content.splitlines()
-            end_idx = None
-            for i, ln in enumerate(lines):
-                if ln.strip().startswith("## "):
-                    end_idx = i
-                    break
-            if end_idx is None:
-                # No H2 header found; replace entire content
-                new_content = edit.content_template
-            else:
-                head = lines[:0]
-                tail = lines[end_idx:]
-                new_content = edit.content_template.rstrip() + "\n\n" + "\n".join(tail)
-            added = len(edit.content_template.splitlines())
-            content = new_content
-        elif edit.edit_type == "full_replace":
-            # Replace entire document content
-            content = edit.content_template
-            added = len(edit.content_template.splitlines())
-        # Other edit types (insert_after_header, replace_block) can be added as needed
-        return content, {"added_lines": added}