PyPI - bioguider - Versions diffs - 0.2.25__tar.gz → 0.2.27__tar.gz - Mend

bioguider 0.2.25tar.gz → 0.2.27tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of bioguider might be problematic. Click here for more details.

Files changed (80) hide show

{bioguider-0.2.25 → bioguider-0.2.27}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: bioguider
-Version: 0.2.25
+Version: 0.2.27
 Summary: An AI-Powered package to help biomedical developers to generate clear documentation
 License: MIT
 Author: Cankun Wang

{bioguider-0.2.25 → bioguider-0.2.27}/bioguider/agents/consistency_observe_step.py RENAMED Viewed

@@ -21,7 +21,7 @@ and generate a structured consistency assessment based on the following criteria
 **Consistency**:
   * **Score**: [Poor / Fair / Good / Excellent]
   * **Assessment**: [Your evaluation of whether the {domain} documentation is consistent with the code definitions]
-  * **Development**: [A list of inconsistent function/class/method name and inconsistent docstring, and describe how they are inconsistent]
+  * **Development**: [A list of inconsistent function/class/method name and inconsistent docstring, and describe how they are inconsistent, please be as specific as possible]
   * **Strengths**: [A list of strengths of the {domain} documentation on consistency]
 ---
@@ -32,7 +32,7 @@ Your output **must exactly match** the following format:
 **Consistency**:
   * **Score**: [Poor / Fair / Good / Excellent]
   * **Assessment**: [Your evaluation of whether the {domain} documentation is consistent with the code definitions]
-  * **Development**: [A list of inconsistent function/class/method name and inconsistent docstring, and describe how they are inconsistent]
+  * **Development**: [A list of inconsistent function/class/method name and inconsistent docstring, and describe how they are inconsistent, please be as specific as possible]
   * **Strengths**: [A list of strengths of the {domain} documentation on consistency]
 ```

{bioguider-0.2.25 → bioguider-0.2.27}/bioguider/agents/evaluation_tutorial_task.py RENAMED Viewed

@@ -28,21 +28,21 @@ MAX_FILE_SIZE = 1024 * 100 # 100K
 class TutorialEvaluationResult(BaseModel):
     overall_score: str=Field(description="A string value, could be `Poor`, `Fair`, `Good`, or `Excellent`")
     overall_key_strengths: str=Field(description="A string value, the key strengths of the tutorial")
-    overall_improvement_suggestions: str=Field(description="Suggestions to improve the overall score if necessary")
+    # overall_improvement_suggestions: str=Field(description="Suggestions to improve the overall score if necessary")
     readability_score: str=Field(description="A string value, could be 'Poor', 'Fair', 'Good', or 'Excellent'")
-    readability_suggestions: str=Field(description="Suggestions to improve readability if necessary")
+    readability_suggestions: list[str]=Field(description="A list of string values, suggestions to improve readability if necessary")
     setup_and_dependencies_score: str=Field(description="A string value, could be 'Poor', 'Fair', 'Good', or 'Excellent'")
-    setup_and_dependencies_suggestions: str=Field(description="Suggestions to improve setup and dependencies if necessary")
+    setup_and_dependencies_suggestions: list[str]=Field(description="A list of string values, suggestions to improve setup and dependencies if necessary")
     reproducibility_score: str=Field(description="A string value, could be 'Poor', 'Fair', 'Good', or 'Excellent'")
-    reproducibility_suggestions: str=Field(description="Suggestions to improve reproducibility if necessary")
+    reproducibility_suggestions: list[str]=Field(description="A list of string values, suggestions to improve reproducibility if necessary")
     structure_and_navigation_score: str=Field(description="A string value, could be 'Poor', 'Fair', 'Good', or 'Excellent'")
-    structure_and_navigation_suggestions: str=Field(description="Suggestions to improve structure and navigation if necessary")
+    structure_and_navigation_suggestions: list[str]=Field(description="A list of string values, suggestions to improve structure and navigation if necessary")
     executable_code_quality_score: str=Field(description="A string value, could be 'Poor', 'Fair', 'Good', or 'Excellent'")
-    executable_code_quality_suggestions: str=Field(description="Suggestions to improve executable code quality if necessary")
+    executable_code_quality_suggestions: list[str]=Field(description="A list of string values, suggestions to improve executable code quality if necessary")
     result_verification_score: str=Field(description="A string value, could be 'Poor', 'Fair', 'Good', or 'Excellent'")
-    result_verification_suggestions: str=Field(description="Suggestions to improve result verification if necessary")
+    result_verification_suggestions: list[str]=Field(description="A list of string values, suggestions to improve result verification if necessary")
     performance_and_resource_notes_score: str=Field(description="A string value, could be 'Poor', 'Fair', 'Good', or 'Excellent'")
-    performance_and_resource_notes_suggestions: str=Field(description="Suggestions to improve performance and resource notes if necessary")
+    performance_and_resource_notes_suggestions: list[str]=Field(description="A list of string values, suggestions to improve performance and resource notes if necessary")
 class IndividualTutorialEvaluationResult(BaseModel):
     tutorial_evaluation: TutorialEvaluationResult | None=Field(description="The evaluation result of the tutorial")

{bioguider-0.2.25 → bioguider-0.2.27}/bioguider/agents/evaluation_tutorial_task_prompts.py RENAMED Viewed

@@ -15,37 +15,37 @@ Your task is to analyze the provided tutorial file and generate a structured qua
 2. **Coverage**:
    * **Assessment**: [Your evaluation of whether it covers all major steps needed to get started, and dependencies, prerequisites, setup steps, and example usage.]
-   * **Improvement Suggestions**:
+   * **Improvement Suggestions**: please be as specific as possible.
       * **Original text:** [Quote a specific line/section from the tutorial.]
       * **Improving comments:** [Provide your suggestions to improve clarity.]
 3. **Reproducibility**:
    * **Assessment**: [Your evaluation of whether it provides a clear **description** of reproducibility]
-   * **Improvement Suggestions**:
+   * **Improvement Suggestions**: please be as specific as possible.
       * **Original text:** [Quote a specific line/section from the tutorial.]
       * **Improving comments:** [Provide your suggestions to improve clarity.]
 4. **Structure & Navigation**:
    * **Assessment**: [Your evaluation of whether it provides logical sections (e.g., intro -> setup -> steps -> results -> next), TOC/anchors, estimated time, etc.]
-   * **Improvement Suggestions**:
+   * **Improvement Suggestions**: please be as specific as possible.
       * **Original text:** [Quote a specific line/section from the tutorial.]
       * **Improving comments:** [Provide your suggestions to improve clarity.]
 5. **Executable Code Quality**:
    * **Assessment**: [Your evaluation on whether the code snippets are executable and functional, idiomatic, no hard-coded paths, etc.]
-   * **Improvement Suggestions**:
+   * **Improvement Suggestions**: please be as specific as possible.
       * **Original text:** [Quote a specific line/section from the tutorial.]
       * **Improving comments:** [Provide your suggestions to improve clarity.]
 6. **Result Verification**:
    * **Assessment**: [Your evaluation on expected outputs shown (figures/tables/metrics), acceptance criteria, etc.]
-   * **Improvement Suggestions**:
+   * **Improvement Suggestions**: please be as specific as possible.
       * **Original text:** [Quote a specific line/section from the tutorial.]
       * **Improving comments:** [Provide your suggestions to improve clarity.]
 7. **Performance & Resource Notes**:
    * **Assessment**: [Your evaluation on performance and resource notes, e.g., CPU/GPU usage, memory usage, runtime estimates, small "lite" path provided.]
-   * **Improvement Suggestions**:
+   * **Improvement Suggestions**: please be as specific as possible.
       * **Original text:** [Quote a specific line/section from the tutorial.]
       * **Improving comments:** [Provide your suggestions to improve clarity.]
@@ -57,49 +57,39 @@ Your final report must **exactly match** the following format. Do not add or omi
 **FinalAnswer**
 * **Overall Score:** [Poor / Fair / Good / Excellent]
 * **Overall Key Strengths**: <brief summary of the Tutorial's strongest points in 2-3 sentences>
-* **Overall Improvement Suggestions:**
-  - "Original text snippet 1" - Improving comment 1
-  - "Original text snippet 2" - Improving comment 2
-  - ...
 * **Readability Score:** [Poor / Fair / Good / Excellent]
-* **Readability Key Strengths**: <brief summary of the Tutorial's strongest points in 2-3 sentences>
-* **Readability Improvement Suggestions:**
+* **Readability Improvement Suggestions:** please be as specific as possible.
   - "Original text snippet 1" - Improving comment 1
   - "Original text snippet 2" - Improving comment 2
   - ...
 * **Coverage Score:** [Poor / Fair / Good / Excellent]
-* **Coverage Key Strengths**: <brief summary of the Tutorial's strongest points in 2-3 sentences>
-* **Coverage Improvement Suggestions:**
+* **Coverage Improvement Suggestions:** please be as specific as possible.
   - "Original text snippet 1" - Improving comment 1
   - "Original text snippet 2" - Improving comment 2
   - ...
 * **Reproducibility Score:** [Poor / Fair / Good / Excellent]
-* **Reproducibility Key Strengths**: <brief summary of the Tutorial's strongest points in 2-3 sentences>
-* **Reproducibility Improvement Suggestions:**
+* **Reproducibility Improvement Suggestions:** please be as specific as possible.
   - "Original text snippet 1" - Improving comment 1
   - "Original text snippet 2" - Improving comment 2
   - ...
 * **Structure & Navigation Score:** [Poor / Fair / Good / Excellent]
-* **Structure & Navigation Key Strengths**: <brief summary of the Tutorial's strongest points in 2-3 sentences>
-* **Structure & Navigation Improvement Suggestions:**
+* **Structure & Navigation Improvement Suggestions:** please be as specific as possible.
   - "Original text snippet 1" - Improving comment 1
   - "Original text snippet 2" - Improving comment 2
   - ...
 * **Executable Code Quality Score:** [Poor / Fair / Good / Excellent]
-* **Executable Code Quality Key Strengths**: <brief summary of the Tutorial's strongest points in 2-3 sentences>
-* **Executable Code Quality Improvement Suggestions:**
+* **Executable Code Quality Improvement Suggestions:** please be as specific as possible.
   - "Original text snippet 1" - Improving comment 1
   - "Original text snippet 2" - Improving comment 2
   - ...
 * **Result Verification Score:** [Poor / Fair / Good / Excellent]
-* **Result Verification Key Strengths**: <brief summary of the Tutorial's strongest points in 2-3 sentences>
-* **Result Verification Improvement Suggestions:**
+* **Result Verification Improvement Suggestions:** please be as specific as possible.
   - "Original text snippet 1" - Improving comment 1
   - "Original text snippet 2" - Improving comment 2
   - ...
 * **Performance & Resource Notes Score:** [Poor / Fair / Good / Excellent]
-* **Performance & Resource Notes Key Strengths**: <brief summary of the Tutorial's strongest points in 2-3 sentences>
-* **Performance & Resource Notes Improvement Suggestions:**
+* **Performance & Resource Notes Improvement Suggestions:** please be as specific as possible.
   - "Original text snippet 1" - Improving comment 1
   - "Original text snippet 2" - Improving comment 2
   - ...

{bioguider-0.2.25 → bioguider-0.2.27}/bioguider/agents/evaluation_userguide_prompts.py RENAMED Viewed

@@ -15,31 +15,31 @@ Your task is to analyze the provided files related to user guide and generate a
 2. **Arguments and Clarity**:
    * **Assessment**: [Your evaluation of whether it provides a clear **description** of arguments and their usage]
-   * **Improvement Suggestions**:
+   * **Improvement Suggestions**: please be as specific as possible.
       * **Original text:** [Quote a specific line/section from the user guide.]
       * **Improving comments:** [Provide your suggestions to improve clarity.]
 3. **Return Value and Clarity**:
    * **Assessment**: [Your evaluation of whether it provides a clear **description** of return value and its meaning]
-   * **Improvement Suggestions**:
+   * **Improvement Suggestions**: please be as specific as possible.
       * **Original text:** [Quote a specific line/section from the user guide.]
       * **Improving comments:** [Provide your suggestions to improve clarity.]
 4. **Context and Purpose**:
    * **Assessment**: [Your evaluation of whether it provides a clear **description** of the context and purpose of the module]
-   * **Improvement Suggestions**:
+   * **Improvement Suggestions**: please be as specific as possible.
       * **Original text:** [Quote a specific line/section from the user guide.]
       * **Improving comments:** [Provide your suggestions to improve clarity.]
 5. **Error Handling**:
    * **Assessment**: [Your evaluation of whether it provides a clear **description** of error handling]
-   * **Improvement Suggestions**:
+   * **Improvement Suggestions**: please be as specific as possible.
       * **Original text:** [Quote a specific line/section from the user guide.]
       * **Improving comments:** [Provide your suggestions to improve clarity.]
 6. **Usage Examples**:
    * **Assessment**: [Your evaluation of whether it provides a clear **description** of usage examples]
-   * **Improvement Suggestions**:
+   * **Improvement Suggestions**: please be as specific as possible.
       * **Original text:** [Quote a specific line/section from the user guide.]
       * **Improving comments:** [Provide your suggestions to improve clarity.]
@@ -54,43 +54,40 @@ Your final report must **exactly match** the following format. Do not add or omi
 **FinalAnswer**
 * **Overall Score:** [Poor / Fair / Good / Excellent]
 * **Overall Key Strengths**: <brief summary of the User Guide's strongest points in 2-3 sentences>
-* **Overall Improvement Suggestions:**
-  - "Original text snippet 1" - Improving comment 1
-  - "Original text snippet 2" - Improving comment 2
-  - ...
 * **Readability Analysis Score:** [Poor / Fair / Good / Excellent]
 * **Readability Analysis Key Strengths**: <brief summary of the User Guide's strongest points in 2-3 sentences>
-* **Readability Analysis Improvement Suggestions:**
+* **Readability Analysis Improvement Suggestions:** please be as specific as possible.
   - "Original text snippet 1" - Improving comment 1
   - "Original text snippet 2" - Improving comment 2
   - ...
 * **Arguments and Clarity Score:** [Poor / Fair / Good / Excellent]
 * **Arguments and Clarity Key Strengths**: <brief summary of the User Guide's strongest points in 2-3 sentences>
-* **Arguments and Clarity Improvement Suggestions:**
+* **Arguments and Clarity Improvement Suggestions:** please be as specific as possible.
   - "Original text snippet 1" - Improving comment 1
   - "Original text snippet 2" - Improving comment 2
   - ...
 * **Return Value and Clarity Score:** [Poor / Fair / Good / Excellent]
 * **Return Value and Clarity Key Strengths**: <brief summary of the User Guide's strongest points in 2-3 sentences>
-* **Return Value and Clarity Improvement Suggestions:**
+* **Return Value and Clarity Improvement Suggestions:** please be as specific as possible.
   - "Original text snippet 1" - Improving comment 1
   - "Original text snippet 2" - Improving comment 2
   - ...
 * **Context and Purpose Score:** [Poor / Fair / Good / Excellent]
 * **Context and Purpose Key Strengths**: <brief summary of the User Guide's strongest points in 2-3 sentences>
-* **Context and Purpose Improvement Suggestions:**
+* **Context and Purpose Improvement Suggestions:** please be as specific as possible.
   - "Original text snippet 1" - Improving comment 1
   - "Original text snippet 2" - Improving comment 2
   - ...
 * **Error Handling Score:** [Poor / Fair / Good / Excellent]
 * **Error Handling Key Strengths**: <brief summary of the User Guide's strongest points in 2-3 sentences>
-* **Error Handling Improvement Suggestions:**
+* **Error Handling Improvement Suggestions:** please be as specific as possible.
   - "Original text snippet 1" - Improving comment 1
   - "Original text snippet 2" - Improving comment 2
   - ...
 * **Usage Examples Score:** [Poor / Fair / Good / Excellent]
 * **Usage Examples Key Strengths**: <brief summary of the User Guide's strongest points in 2-3 sentences>
-* **Usage Examples Improvement Suggestions:**
+* **Usage Examples Improvement Suggestions:** please be as specific as possible.
   - "Original text snippet 1" - Improving comment 1
   - "Original text snippet 2" - Improving comment 2
   - ...
@@ -105,58 +102,3 @@ Your final report must **exactly match** the following format. Do not add or omi
 """
-CONSISTENCY_EVAL_SYSTEM_PROMPT = """
-You are an expert in evaluating the consistency of user guide in software repositories.
-Your task is to analyze both:
-1. the provided file related to user guide/API documentation,
-2. the code definitions related to the user guide/API documentation
-and generate a structured consistency assessment based on the following criteria.
----
-### **Evaluation Criteria**
-**Consistency**:
-  * **Score**: [Poor / Fair / Good / Excellent]
-  * **Assessment**: [Your evaluation of whether the user guide/API documentation is consistent with the code definitions]
-  * **Development**: [A list of inconsistent function/class/method name and inconsistent docstring]
-  * **Strengths**: [A list of strengths of the user guide/API documentation on consistency]
-### **Output Format**
-Your output **must exactly match** the following format:
-```
-**Consistency**:
-  * **Score**: [Poor / Fair / Good / Excellent]
-  * **Assessment**: [Your evaluation of whether the user guide/API documentation is consistent with the code definitions]
-  * **Development**: [A list of inconsistent function/class/method name and inconsistent docstring]
-  * **Strengths**: [A list of strengths of the user guide/API documentation on consistency]
-```
-### **Output Example**
-```
-**Consistency**:
-  * **Assessment**: [Your evaluation of whether the user guide/API documentation is consistent with the code definitions]
-  * **Development**:
-    - Inconsistent function/class/method name 1
-    - Inconsistent docstring 1
-    - Inconsistent function/class/method name 2
-    - Inconsistent docstring 2
-    - ...
-  * **Strengths**:
-    - Strengths 1
-    - Strengths 2
-    - ...
-```
----
-### **Input User Guide/API Documentation**
-{user_guide_api_documentation}
-### **Code Definitions**
-{code_definitions}
----
-"""

{bioguider-0.2.25 → bioguider-0.2.27}/bioguider/agents/evaluation_userguide_task.py RENAMED Viewed

@@ -24,13 +24,13 @@ from .evaluation_userguide_prompts import INDIVIDUAL_USERGUIDE_EVALUATION_SYSTEM
 class UserGuideEvaluationResult(BaseModel):
     overall_score: str=Field(description="A string value, could be `Poor`, `Fair`, `Good`, or `Excellent`")
     overall_key_strengths: str=Field(description="A string value, the key strengths of the user guide")
-    overall_improvement_suggestions: str=Field(description="Suggestions to improve the overall score if necessary")
     readability_score: str=Field(description="A string value, could be `Poor`, `Fair`, `Good`, or `Excellent`")
-    readability_suggestions: str=Field(description="Suggestions to improve readability if necessary")
+    readability_suggestions: list[str]=Field(description="A list of string values, suggestions to improve readability if necessary")
     context_and_purpose_score: str=Field(description="A string value, could be `Poor`, `Fair`, `Good`, or `Excellent`")
-    context_and_purpose_suggestions: str=Field(description="Suggestions to improve context and purpose if necessary")
+    context_and_purpose_suggestions: list[str]=Field(description="A list of string values, suggestions to improve context and purpose if necessary")
     error_handling_score: str=Field(description="A string value, could be `Poor`, `Fair`, `Good`, or `Excellent`")
-    error_handling_suggestions: str=Field(description="Suggestions to improve error handling if necessary")
+    error_handling_suggestions: list[str]=Field(description="A list of string values, suggestions to improve error handling if necessary")
 class IndividualUserGuideEvaluationResult(BaseModel):
     user_guide_evaluation: UserGuideEvaluationResult | None=Field(description="The evaluation result of the user guide")

{bioguider-0.2.25 → bioguider-0.2.27}/bioguider/generation/llm_content_generator.py RENAMED Viewed

@@ -11,7 +11,7 @@ LLM_SECTION_PROMPT = """
 You are “BioGuider,” a concise documentation generator for biomedical/bioinformatics software.
 GOAL
-Write or refine a single documentation section named "{section}". Produce minimal, style-consistent content that addresses only this section.
+Write or refine a single documentation section named "{section}". Produce professional, comprehensive, style-consistent content that addresses only this section.
 INPUTS (use only what is provided; never invent)
 - suggestion_category: {suggestion_category}
@@ -21,6 +21,7 @@ INPUTS (use only what is provided; never invent)
 - repo_context_excerpt (analyze tone/formatting; do not paraphrase it blindly): <<{context}>>
 STYLE & CONSTRAINTS
+- Fix obvious errors in the content.
 - Preserve the existing tone and style markers: {tone_markers}
 - Use heading style "{heading_style}" and list style "{list_style}"; link style "{link_style}".
 - Neutral, professional tone; avoid marketing claims.
@@ -29,9 +30,11 @@ STYLE & CONSTRAINTS
 - Biomedical examples must avoid PHI; assume de-identified data.
 - Output must be plain markdown for this section only, with no commentary and no backticks.
 - Avoid duplication: if similar content exists in the repo context, rewrite succinctly instead of repeating.
+- Never remove, alter, or recreate top-of-file badges/shields/logos (e.g., CI, PyPI, Conda, Docs shields). Assume they remain unchanged; do not output replacements for them.
+- When targeting README content, do not rewrite the document title or header area; generate only the requested section body to be inserted below existing headers/badges.
 SECTION GUIDELINES
-- Dependencies: short bullet list; clearly separate Mandatory and Optional if applicable; avoid version numbers unless present in context.
+- Dependencies: short bullet list; clearly separate Mandatory and Optional if applicable.
 - System Requirements: runtime versions and supported OS; add hardware notes only if guidance provides specifics.
 - Hardware Requirements: brief bullets with RAM/CPU only if guidance includes numbers.
 - License: one sentence referencing the license and pointing to the LICENSE file.

{bioguider-0.2.25 → bioguider-0.2.27}/bioguider/generation/llm_injector.py RENAMED Viewed

@@ -26,6 +26,35 @@ ERROR CATEGORIES (inject all)
 - bio_term: slightly wrong domain term (e.g., “single sell” for “single cell”); do not invent new science
 - function: misspell a known function/API name **from the input README-lite only**
 - markdown_structure: break a header level, list indentation, or code fence (one-off)
+- list_structure: remove bullet space (e.g., “-item”), mix markers inconsistently
+- section_title: subtly change a section title casing or wording
+- image_syntax: break image markdown spacing (e.g., `![alt] (url)`)
+- inline_code: remove backticks around inline code
+- emphasis: break emphasis markers (e.g., missing closing `*`)
+- table_alignment: misalign or omit a `|` in a markdown table
+- code_lang_tag: use the wrong fenced code language (e.g., ```py for R)
+BIOLOGY-SPECIFIC ERROR CATEGORIES (inject all; keep realistic & subtle)
+- gene_symbol_case: change gene symbol casing or add suffix (e.g., “tp53”, “CD3e”), but **do not alter** protected keywords
+- species_swap: imply human vs mouse mix-up (e.g., “mm10” vs “GRCh38”) in a short phrase
+- ref_genome_mismatch: claim a reference genome that conflicts with the example file or text
+- modality_confusion: conflate RNA-seq with ATAC or proteomics in a brief phrase
+- normalization_error: misuse terms like CPM/TPM/CLR/log1p in a sentence
+- umi_vs_read: confuse UMI counts vs read counts in a short line
+- batch_effect: misstate “batch correction” vs “normalization” terminology
+- qc_threshold: use a common but slightly wrong QC gate (e.g., mito% 0.5 instead of 5)
+- file_format: mix up FASTQ/BAM/MTX/H5AD/RDS in a brief mention
+- strandedness: claim “stranded” when workflow is unstranded (or vice versa)
+- coordinates: confuse 0-based vs 1-based or chromosome naming style (chr1 vs 1)
+- units_scale: use the wrong scale/unit (e.g., μm vs mm; 10e6 instead of 1e6)
+- sample_type: conflate “primary tissue” with “cell line” in a single phrase
+- contamination: misuse “ambient RNA” vs “doublets” terminology
+CLI/CONFIG ERROR CATEGORIES (inject all)
+- param_name: slightly misspell a CLI flag or config key (e.g., `--min-cell` → `--min-cells`)
+- default_value: state a plausible but incorrect default value
+- path_hint: introduce a subtle path typo (e.g., `data/filtrd`)
 CONSTRAINTS
 - Keep edits minimal and local; **≥85% token overlap** with input.
@@ -43,6 +72,7 @@ CONSTRAINTS
 - Maintain a **concise length** (≤ {max_words} words).
 - Do **not** alter the protected keywords (exact casing/spelling): {keywords}
 - Keep at least **{min_per_category} errors per category** listed above.
+- Limit `duplicate` injections to at most **{min_per_category}**.
 - If the input contains runnable code, keep it mostly intact but introduce **one** realistic break
   (e.g., missing quote/paren or wrong function name) without adding new libraries.
 - Keep at least one **valid** URL so the fixer can compare.
@@ -204,8 +234,8 @@ class LLMErrorInjector:
             corrupted = corrupted.replace(orig, mut, 1)
             errors.append({"id": f"e_link_sup_{len(errors)}", "category": "link", "original_snippet": orig, "mutated_snippet": mut, "rationale": "scheme colon removed"})
-        # duplicate supplements
-        for _ in range(need("duplicate")):
+        # duplicate supplements (cap to min_per_category)
+        for _ in range(min(need("duplicate"), min_per_category)):
             lines = corrupted.splitlines()
             idx = next((i for i, ln in enumerate(lines) if ln.strip().startswith("- ") or ln.strip().startswith("## ")), None)
             if idx is None:
@@ -264,6 +294,48 @@ class LLMErrorInjector:
                 else:
                     break
+        # list_structure supplements
+        for _ in range(need("list_structure")):
+            m = re.search(r"^\-\s+\S", corrupted, flags=re.M)
+            if not m:
+                break
+            orig = m.group(0)
+            mut = orig.replace("- ", "-", 1)
+            corrupted = corrupted.replace(orig, mut, 1)
+            errors.append({"id": f"e_list_sup_{len(errors)}", "category": "list_structure", "original_snippet": orig, "mutated_snippet": mut, "rationale": "bullet missing space"})
+        # section_title supplements
+        for _ in range(need("section_title")):
+            m = re.search(r"^##\s+(What is it\?|What can it do\?|Requirements|Install|Quick example|Learn more|License & Contact)$", corrupted, flags=re.M)
+            if not m:
+                break
+            orig = m.group(0)
+            mut = orig.replace("What is it?", "What is It?").replace("Install", "Installation")
+            if mut == orig:
+                break
+            corrupted = corrupted.replace(orig, mut, 1)
+            errors.append({"id": f"e_title_sup_{len(errors)}", "category": "section_title", "original_snippet": orig, "mutated_snippet": mut, "rationale": "subtle title change"})
+        # image_syntax supplements
+        for _ in range(need("image_syntax")):
+            m = re.search(r"!\[[^\]]*\]\([^\)]+\)", corrupted)
+            if not m:
+                break
+            orig = m.group(0)
+            mut = orig.replace("](", "] (")
+            corrupted = corrupted.replace(orig, mut, 1)
+            errors.append({"id": f"e_img_sup_{len(errors)}", "category": "image_syntax", "original_snippet": orig, "mutated_snippet": mut, "rationale": "broken image spacing"})
+        # inline_code supplements
+        for _ in range(need("inline_code")):
+            m = re.search(r"`[^`\n]+`", corrupted)
+            if not m:
+                break
+            orig = m.group(0)
+            mut = orig.strip("`")
+            corrupted = corrupted.replace(orig, mut, 1)
+            errors.append({"id": f"e_code_sup_{len(errors)}", "category": "inline_code", "original_snippet": orig, "mutated_snippet": mut, "rationale": "removed inline code backticks"})
         data["errors"] = errors
         return corrupted, data

{bioguider-0.2.25 → bioguider-0.2.27}/bioguider/generation/models.py RENAMED Viewed

@@ -14,6 +14,10 @@ class EvaluationReport(BaseModel):
     readme_evaluation: Optional[Dict[str, Any]] = None
     readme_files: Optional[List[str]] = None
+    # Optional: rich user guide evaluation content and any explicitly listed files
+    userguide_evaluation: Optional[Dict[str, Any]] = None
+    userguide_files: Optional[List[str]] = None
     submission_requirements_evaluation: Optional[Dict[str, Any]] = None
     submission_requirements_files: Optional[List[str]] = None

{bioguider-0.2.25 → bioguider-0.2.27}/bioguider/generation/test_metrics.py RENAMED Viewed

@@ -22,11 +22,56 @@ def _count_markdown_issues(text: str) -> int:
 def evaluate_fixes(baseline: str, corrupted: str, revised: str, injection_manifest: Dict[str, Any]) -> Dict[str, Any]:
     per_error: List[Dict[str, Any]] = []
     per_cat: Dict[str, Dict[str, int]] = {}
+    # aggregate counters
+    totals = {"total_errors": 0, "fixed_to_baseline": 0, "fixed_to_valid": 0, "unchanged": 0, "worsened": 0}
     def mark(cat: str, key: str):
         per_cat.setdefault(cat, {"total": 0, "fixed_to_baseline": 0, "fixed_to_valid": 0, "unchanged": 0, "worsened": 0})
         per_cat[cat][key] += 1
+    # Precompute some structural counts
+    def count_malformed_bullets(text: str) -> int:
+        return len(re.findall(r"^[-*]\S", text, flags=re.M))
+    def count_bad_image_spacing(text: str) -> int:
+        return len(re.findall(r"!\[[^\]]*\]\s+\(", text))
+    def table_variance(text: str) -> int:
+        rows = [ln for ln in text.splitlines() if '|' in ln]
+        groups: List[List[str]] = []
+        cur: List[str] = []
+        for ln in rows:
+            if '|' in ln:
+                cur.append(ln)
+            else:
+                if len(cur) >= 2:
+                    groups.append(cur)
+                cur = []
+        if len(cur) >= 2:
+            groups.append(cur)
+        vari = 0
+        for g in groups:
+            counts = [ln.count('|') for ln in g]
+            vari += (max(counts) - min(counts))
+        return vari
+    malformed_bullets_before = count_malformed_bullets(corrupted)
+    malformed_bullets_after = count_malformed_bullets(revised)
+    bad_img_before = count_bad_image_spacing(corrupted)
+    bad_img_after = count_bad_image_spacing(revised)
+    table_var_before = table_variance(corrupted)
+    table_var_after = table_variance(revised)
+    canonical_titles = {
+        "## What is it?",
+        "## What can it do?",
+        "## Requirements",
+        "## Install",
+        "## Quick example",
+        "## Learn more",
+        "## License & Contact",
+    }
     for e in injection_manifest.get("errors", []):
         cat = e.get("category", "unknown")
         per_cat.setdefault(cat, {"total": 0, "fixed_to_baseline": 0, "fixed_to_valid": 0, "unchanged": 0, "worsened": 0})
@@ -76,10 +121,43 @@ def evaluate_fixes(baseline: str, corrupted: str, revised: str, injection_manife
                 status = "unchanged"
             else:
                 status = "fixed_to_valid"
+        elif cat == "list_structure":
+            status = "fixed_to_valid" if malformed_bullets_after < malformed_bullets_before else "unchanged"
+        elif cat == "image_syntax":
+            status = "fixed_to_valid" if bad_img_after < bad_img_before else "unchanged"
+        elif cat == "section_title":
+            # valid if mutated title removed and any canonical title present
+            if mut and mut not in revised and any(t in revised for t in canonical_titles):
+                status = "fixed_to_valid"
+            else:
+                status = "unchanged"
+        elif cat == "inline_code":
+            # check that the raw content regained backticks somewhere
+            raw = mut.strip('`') if mut else ""
+            rewrapped = f"`{raw}`" if raw else ""
+            if raw and rewrapped and rewrapped in revised and mut not in revised:
+                status = "fixed_to_valid"
+            else:
+                status = "unchanged"
+        elif cat == "emphasis":
+            status = "fixed_to_valid" if mut and mut not in revised else "unchanged"
+        elif cat == "table_alignment":
+            status = "fixed_to_valid" if table_var_after < table_var_before else "unchanged"
+        elif cat == "code_lang_tag":
+            status = "fixed_to_valid" if mut and mut not in revised else "unchanged"
+        # Biology-specific and CLI/CONFIG categories: treat as fixed if mutated snippet removed
+        elif cat in {
+            "gene_symbol_case","species_swap","ref_genome_mismatch","modality_confusion","normalization_error",
+            "umi_vs_read","batch_effect","qc_threshold","file_format","strandedness","coordinates","units_scale",
+            "sample_type","contamination","param_name","default_value","path_hint"
+        }:
+            status = "fixed_to_valid" if mut and mut not in revised else "unchanged"
         else:
             status = "unchanged"
         mark(cat, status)
+        totals["total_errors"] += 1
+        totals[status] += 1
         per_error.append({
             "id": e.get("id"),
             "category": cat,
@@ -95,10 +173,17 @@ def evaluate_fixes(baseline: str, corrupted: str, revised: str, injection_manife
     global_metrics = {
         "markdown_validity_delta": issues_before - issues_after,
     }
+    success = totals["fixed_to_baseline"] + totals["fixed_to_valid"]
+    success_rate = (success / totals["total_errors"] * 100.0) if totals["total_errors"] else 0.0
+    summary = {
+        "totals": totals,
+        "success_rate": round(success_rate, 2),
+    }
     return {
         "per_error": per_error,
         "per_category": per_cat,
         "global": global_metrics,
+        "summary": summary,
     }

{bioguider-0.2.25 → bioguider-0.2.27}/bioguider/managers/generation_manager.py RENAMED Viewed

@@ -2,7 +2,7 @@ from __future__ import annotations
 import os
 from pathlib import Path
-from typing import Tuple, Dict
+from typing import Tuple, Dict, List
 from bioguider.generation import (
     EvaluationReportLoader,
@@ -49,12 +49,24 @@ class DocumentationGenerationManager:
         self.print_step(step_name="ReadRepoFiles", step_output=f"repo_path={repo_path}")
         reader = RepoReader(repo_path)
-        # Prefer report-listed files if available
+        # Prefer report-listed files if available; include all report-declared file lists
         target_files = []
-        if report.readme_files:
+        if getattr(report, "readme_files", None):
             target_files.extend(report.readme_files)
-        if report.installation_files:
+        if getattr(report, "installation_files", None):
             target_files.extend(report.installation_files)
+        # If userguide_files not explicitly provided, derive from userguide_evaluation keys
+        userguide_files: list[str] = []
+        if getattr(report, "userguide_files", None):
+            userguide_files.extend([p for p in report.userguide_files if isinstance(p, str)])
+        elif getattr(report, "userguide_evaluation", None) and isinstance(report.userguide_evaluation, dict):
+            for key in report.userguide_evaluation.keys():
+                if isinstance(key, str) and key.strip():
+                    userguide_files.append(key)
+        target_files.extend(userguide_files)
+        if getattr(report, "submission_requirements_files", None):
+            target_files.extend(report.submission_requirements_files)
+        target_files = [p for p in target_files if isinstance(p, str) and p.strip()]
         target_files = list(dict.fromkeys(target_files))  # de-dup
         files, missing = reader.read_files(target_files) if target_files else reader.read_default_targets()
@@ -118,7 +130,10 @@ class DocumentationGenerationManager:
         self.print_step(step_name="WriteOutputs", step_output=f"repo_key={out_repo_key}")
         out_dir = self.output.prepare_output_dir(out_repo_key)
-        artifacts = self.output.write_files(out_dir, revised, diff_stats_by_file=diff_stats)
+        # Ensure all files we read (even without edits) are written to outputs alongside revisions
+        all_files_to_write: Dict[str, str] = dict(files)
+        all_files_to_write.update(revised)
+        artifacts = self.output.write_files(out_dir, all_files_to_write, diff_stats_by_file=diff_stats)
         manifest = GenerationManifest(
             repo_url=report.repo_url,
@@ -131,14 +146,31 @@ class DocumentationGenerationManager:
         )
         self.output.write_manifest(out_dir, manifest)
         # Write human-readable generation report
-        gen_report_path = self._write_generation_report(out_dir, report.repo_url or str(self.repo_url_or_path or ""), plan, diff_stats, suggestions)
+        gen_report_path = self._write_generation_report(
+            out_dir,
+            report.repo_url or str(self.repo_url_or_path or ""),
+            plan,
+            diff_stats,
+            suggestions,
+            artifacts,
+            missing,
+        )
         self.print_step(step_name="Done", step_output=f"output_dir={out_dir}")
         return out_dir
-    def _write_generation_report(self, out_dir: str, repo_url: str, plan, diff_stats: Dict[str, dict], suggestions):
+    def _write_generation_report(
+        self,
+        out_dir: str,
+        repo_url: str,
+        plan,
+        diff_stats: Dict[str, dict],
+        suggestions,
+        artifacts,
+        skipped: List[str],
+    ):
         # Build a simple markdown report
         lines: list[str] = []
-        lines.append(f"# Documentation Generation Report\n")
+        lines.append(f"# Documentation Changelog\n")
         lines.append(f"Repo: {repo_url}\n")
         lines.append(f"Output: {out_dir}\n")
         lines.append("\n## Summary of Changes\n")
@@ -151,6 +183,20 @@ class DocumentationGenerationManager:
         lines.append("\n## Planned Edits\n")
         for e in plan.planned_edits:
             lines.append(f"- `{e.file_path}` -> {e.edit_type} -> {e.anchor.get('value','')}")
+        # Summarize all files written with basic status
+        lines.append("\n## Files Written\n")
+        for art in artifacts:
+            stats = art.diff_stats or {}
+            added = stats.get("added_lines", 0)
+            status = "Revised" if added and added > 0 else "Copied"
+            lines.append(f"- {art.dest_rel_path} | status: {status} | added_lines: {added}")
+        # Skipped or missing files
+        if skipped:
+            lines.append("\n## Skipped or Missing Files\n")
+            for rel in skipped:
+                lines.append(f"- {rel}")
         report_md = "\n".join(lines)
         dest = os.path.join(out_dir, "GENERATION_REPORT.md")
         with open(dest, "w", encoding="utf-8") as fobj:

{bioguider-0.2.25 → bioguider-0.2.27}/bioguider/managers/generation_test_manager.py RENAMED Viewed

@@ -19,14 +19,14 @@ class GenerationTestManager:
         if self.step_output:
             self.step_output(step_name=name, step_output=out)
-    def run_quant_test(self, report_path: str, baseline_repo_path: str, tmp_repo_path: str) -> str:
+    def run_quant_test(self, report_path: str, baseline_repo_path: str, tmp_repo_path: str, min_per_category: int = 3) -> str:
         self.print_step("QuantTest:LoadBaseline", baseline_repo_path)
         baseline_readme_path = os.path.join(baseline_repo_path, "README.md")
         baseline = read_file(baseline_readme_path) or ""
         self.print_step("QuantTest:Inject")
         injector = LLMErrorInjector(self.llm)
-        corrupted, inj_manifest = injector.inject(baseline, min_per_category=3)
+        corrupted, inj_manifest = injector.inject(baseline, min_per_category=min_per_category)
         # write corrupted into tmp repo path
         os.makedirs(tmp_repo_path, exist_ok=True)
@@ -49,13 +49,38 @@ class GenerationTestManager:
         # write results
         with open(os.path.join(out_dir, "GEN_TEST_RESULTS.json"), "w", encoding="utf-8") as fobj:
             json.dump(results, fobj, indent=2)
-        # simple md report
-        lines = ["# Quantifiable Generation Test Report\n"]
-        lines.append("## Metrics by Category\n")
+        # slides-like markdown report
+        totals = results.get("summary", {}).get("totals", {})
+        success_rate = results.get("summary", {}).get("success_rate", 0.0)
+        lines = ["# 🔬 Quantifiable Testing Results\n",
+                 "\n## BioGuider Error Correction Performance Analysis\n",
+                 "\n---\n",
+                 "\n## 📊 Slide 1: Testing Results Overview\n",
+                 "\n### 🎯 Totals\n",
+                 f"- Total Errors: {totals.get('total_errors', 0)}\n",
+                 f"- Fixed to Baseline: {totals.get('fixed_to_baseline', 0)}\n",
+                 f"- Fixed to Valid: {totals.get('fixed_to_valid', 0)}\n",
+                 f"- Unchanged: {totals.get('unchanged', 0)}\n",
+                 f"- Success Rate: {success_rate}%\n",
+                 "\n### 📂 Per-Category Metrics\n"]
         for cat, m in results["per_category"].items():
-            lines.append(f"- {cat}: {m}")
-        lines.append("\n## Notes\n")
-        lines.append("- Three versions saved in this directory: README.original.md, README.corrupted.md, README.md (fixed).")
+            lines.append(f"- {cat}: total={m.get('total',0)}, fixed_to_baseline={m.get('fixed_to_baseline',0)}, fixed_to_valid={m.get('fixed_to_valid',0)}, unchanged={m.get('unchanged',0)}")
+        # Per-file change counts (simple heuristic from manifest artifacts)
+        try:
+            manifest_path = os.path.join(out_dir, "manifest.json")
+            with open(manifest_path, "r", encoding="utf-8") as mf:
+                mani = json.load(mf)
+            lines.append("\n### 🗂️ Per-File Changes\n")
+            for art in mani.get("artifacts", []):
+                rel = art.get("dest_rel_path")
+                stats = art.get("diff_stats", {})
+                added = stats.get("added_lines", 0)
+                status = "Revised" if added and added > 0 else "Copied"
+                lines.append(f"- {rel}: {status}, added_lines={added}")
+        except Exception:
+            pass
+        lines.append("\n---\n\n## 📝 Notes\n")
+        lines.append("- README versions saved: README.original.md, README.corrupted.md, README.md (fixed).\n")
         with open(os.path.join(out_dir, "GEN_TEST_REPORT.md"), "w", encoding="utf-8") as fobj:
             fobj.write("\n".join(lines))
         # Save versioned files into output dir
@@ -71,4 +96,12 @@ class GenerationTestManager:
         self.print_step("QuantTest:Done", out_dir)
         return out_dir
+    def run_quant_suite(self, report_path: str, baseline_repo_path: str, base_tmp_repo_path: str, levels: dict[str, int]) -> dict:
+        results = {}
+        for level, min_cnt in levels.items():
+            tmp_repo_path = f"{base_tmp_repo_path}_{level}"
+            out_dir = self.run_quant_test(report_path, baseline_repo_path, tmp_repo_path, min_per_category=min_cnt)
+            results[level] = out_dir
+        return results

{bioguider-0.2.25 → bioguider-0.2.27}/bioguider/utils/utils.py RENAMED Viewed

@@ -53,10 +53,14 @@ def run_command(command: list, cwd: str = None, timeout: int = None):
         return e.stdout or "", e.stderr or f"Command timed out after {timeout} seconds", -1
 def escape_braces(text: str) -> str:
-    # First replace single } not part of }} with }}
-    text = re.sub(r'(?<!})}(?!})', '}}', text)
-    # Then replace single { not part of {{
-    text = re.sub(r'(?<!{){(?!{)', '{{', text)
+    def fix_braces(m):
+        s = m.group(0)
+        # If odd number of braces, double the last one
+        if len(s) % 2 == 1:
+            return s + s[-1]
+        return s
+    # Handle both { and } sequences
+    text = re.sub(r'{+|}+', fix_braces, text)
     return text
 def increase_token_usage(
@@ -105,7 +109,7 @@ def convert_to_serializable(obj):
     else:
         return obj
-def convert_html_to_text(html_path: str | Path, exclude_tags: list[str] | None = None) -> str:
+def convert_html_to_text(html_path: str | Path, exclude_tags: list[str] = ["script", "style", "img", "svg", "meta", "link"]) -> str:
     """
     This function is used to convert html string to text, that is,
     extract text from html content, including tables.

{bioguider-0.2.25 → bioguider-0.2.27}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "bioguider"
-version = "0.2.25"
+version = "0.2.27"
 description = "An AI-Powered package to help biomedical developers to generate clear documentation"
 authors = [
     "Cankun Wang <Cankun.Wang@osumc.edu>",