PyPI - bioguider - Versions diffs - 0.2.5__tar.gz → 0.2.7__tar.gz - Mend

bioguider 0.2.5tar.gz → 0.2.7tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of bioguider might be problematic. Click here for more details.

Files changed (48) hide show

{bioguider-0.2.5 → bioguider-0.2.7}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: bioguider
-Version: 0.2.5
+Version: 0.2.7
 Summary: An AI-Powered package to help biomedical developers to generate clear documentation
 License: MIT
 Author: Cankun Wang
@@ -28,6 +28,7 @@ Requires-Dist: langchain-experimental (>=0.3.4,<0.4.0)
 Requires-Dist: langchain-google-genai (>=2.1.4,<3.0.0)
 Requires-Dist: langchain-openai (>=0.3.8,<0.4.0)
 Requires-Dist: langgraph (>=0.3.11,<0.4.0)
+Requires-Dist: markdownify (>=1.1.0,<2.0.0)
 Requires-Dist: nanoid (>=2.0.0,<3.0.0)
 Requires-Dist: pydantic (>=2.10.6,<3.0.0)
 Requires-Dist: pydantic-settings (>=2.8.1,<3.0.0)

{bioguider-0.2.5 → bioguider-0.2.7}/bioguider/agents/agent_tools.py RENAMED Viewed

@@ -1,9 +1,11 @@
 import os
 from typing import Callable
+from markdownify import markdownify as md
 from langchain_openai.chat_models.base import BaseChatOpenAI
 from bioguider.database.summarized_file_db import SummarizedFilesDb
 from bioguider.utils.file_utils import get_file_type
 from bioguider.agents.agent_utils import read_directory, read_file, summarize_file
+from bioguider.rag.data_pipeline import count_tokens
 class agent_tool:
     def __init__(
@@ -39,7 +41,14 @@ Returns:
             file_path = os.path.join(self.repo_path, file_path)
         if not os.path.isfile(file_path):
             return None
-        return read_file(file_path)
+        content = read_file(file_path)
+        if file_path.endswith(".html") or file_path.endswith(".htm"):
+            content = md(content, escape_underscores=False)
+        tokens = count_tokens(content)
+        MAX_TOKENS = os.environ.get('OPENAI_MAX_INPUT_TOKENS', 102400)
+        if tokens > int(MAX_TOKENS):
+            content = content[:100000]
+        return content
 class summarize_file_tool(agent_tool):
     """ read and summarize the file

bioguider-0.2.7/bioguider/agents/evaluation_installation_task.py ADDED Viewed

@@ -0,0 +1,160 @@
+import os
+from pathlib import Path
+import logging
+from typing import Callable, Optional
+from abc import ABC, abstractmethod
+from langchain.prompts import ChatPromptTemplate
+from langchain_openai.chat_models.base import BaseChatOpenAI
+from pydantic import BaseModel, Field
+from markdownify import markdownify as md
+from bioguider.agents.agent_utils import read_file
+from bioguider.utils.constants import DEFAULT_TOKEN_USAGE, ProjectMetadata
+from bioguider.rag.data_pipeline import count_tokens
+from .common_agent_2step import CommonAgentTwoSteps, CommonAgentTwoChainSteps
+from .common_agent import CommonConversation
+from ..utils.pyphen_utils import PyphenReadability
+from ..utils.gitignore_checker import GitignoreChecker
+from .evaluation_task import EvaluationTask
+from .agent_utils import read_file
+logger = logging.getLogger(__name__)
+EVALUATION_INSTALLATION_SYSTEM_PROMPT = """
+You are an expert in evaluating the quality of **installation instructions** in software repositories.
+Your task is to analyze the provided content of installation-related files and generate a **comprehensive, structured quality report**.
+---
+### Evaluation Criteria
+Please assess the installation information using the following criteria. For each, provide a concise evaluation and specific feedback:
+1. **Ease of Access**
+   * Is the installation information clearly presented and easy to locate within the repository?
+   * Is it included in a top-level README, a dedicated INSTALL.md file, or other accessible locations?
+2. **Clarity of Dependency Specification**
+   * Are all software and library dependencies clearly listed?
+   * Are installation methods (e.g., `pip`, `conda`, `apt`) for those dependencies explicitly provided?
+3. **Hardware Requirements**
+   * Does the documentation specify hardware needs (e.g., GPU, memory, OS) if relevant?
+4. **Step-by-Step Installation Guide**
+   * Is there a clear, ordered set of instructions for installing the software?
+   * Are example commands or configuration steps provided to help users follow along?
+---
+### Output Format
+Your response **must exactly follow** the structure below:
+```
+**FinalAnswer**
+**Overall Score:** [Poor / Fair / Good / Excellent]
+**Ease of Access:** <your comments>
+**Clarity of Dependency Specification:** <your comments>
+**Hardware Requirements:** <your comments>
+**Installation Guide:** <your comments>
+```
+---
+### Installation Files Provided:
+{installation_file_contents}
+"""
+class EvaluationInstallationResult(BaseModel):
+    ease_of_access: Optional[str]=Field(description="Is the installation information easy to access")
+    score: Optional[str]=Field(description="An overall score, could be Poor, Fair, Good or Excellent")
+    clarity_of_dependency: Optional[str]=Field(description="Are all dependencies clearly listed")
+    hardware_requirements: Optional[str]=Field(description="Are all hardware requirements clearly specified")
+    installation_guide: Optional[str]=Field(description="Is there a clear, ordered set of instructions for installing the software")
+EvaluationInstallationResultSchema = {
+    "title": "EvaluationREADMEResult",
+    "type": "object",
+    "properties": {
+        "ease_of_access": {
+            "anyOf": [{"type": "string"}, {"type": "null"}],
+            "description": "Is the installation information easy to access",
+            "title": "Ease of Access"
+        },
+        "score": {
+            "anyOf": [{"type": "string"}, {"type": "null"}],
+            "description": "An overall score, could be Poor, Fair, Good or Excellent",
+            "title": "Score"
+        },
+        "clarity_of_dependency": {
+            "anyOf": [{"type": "string"}, {"type": "null"}],
+            "description": "Are all dependencies clearly listed",
+            "title": "Clarity of Dependency",
+        },
+        "hardware_requirements": {
+            "anyOf": [{"type": "string"}, {"type": "null"}],
+            "description": "Are all hardware requirements clearly specified",
+            "title": "Hardware Requirements"
+        },
+        "installation_guide": {
+            "anyOf": [{"type": "string"}, {"type": "null"}],
+            "description": "Is there a clear, ordered set of instructions for installing the software",
+            "title": "Installation Guide"
+        }
+    },
+    "required": ["ease_of_access", "score", "clarity_of_dependency", "hardware_requirements", "installation_guide"]
+}
+class EvaluationInstallationTask(EvaluationTask):
+    def __init__(
+        self,
+        llm,
+        repo_path,
+        gitignore_path,
+        meta_data = None,
+        step_callback = None,
+    ):
+        super().__init__(llm, repo_path, gitignore_path, meta_data, step_callback)
+    def _evaluate(self, files: list[str] | None = None):
+        if files is None or len(files) == 0:
+            return None
+        files_content = ""
+        MAX_TOKENS = os.environ.get("OPENAI_MAX_INPUT_TOKENS", 102400)
+        for f in files:
+            if f.endswith(".html") or f.endswith(".htm"):
+                html = read_file(os.path.join(self.repo_path, f))
+                content = md(html, escape_underscores=False)
+            else:
+                content = read_file(os.path.join(self.repo_path, f))
+            if count_tokens(content) > int(MAX_TOKENS):
+                content = content[:100000]
+            files_content += f"""
+{f} content:
+{content}
+"""
+        system_prompt = ChatPromptTemplate.from_template(EVALUATION_INSTALLATION_SYSTEM_PROMPT).format(
+            installation_file_contents=files_content
+        )
+        agent = CommonAgentTwoChainSteps(llm=self.llm)
+        res, _, token_usage, reasoning_process = agent.go(
+            system_prompt=system_prompt,
+            instruction_prompt="Before arriving at the conclusion, clearly explain your reasoning step by step. Now, let's begin the evaluation.",
+            schema=EvaluationInstallationResultSchema,
+        )
+        res = EvaluationInstallationResult(**res)
+        evaluation = {
+            "score": res.score,
+            "ease_of_access": res.ease_of_access,
+            "hardware_requirements": res.hardware_requirements,
+            "clarity_of_dependency": res.clarity_of_dependency,
+            "installation_guide": res.installation_guide,
+            "reasoning_process": reasoning_process,
+        }
+        return evaluation, token_usage

{bioguider-0.2.5 → bioguider-0.2.7}/bioguider/agents/evaluation_task.py RENAMED Viewed

@@ -76,7 +76,7 @@ For each criterion below, provide a brief assessment followed by specific, actio
   * Project-Level README: Yes / No
   * **Score:** <number from 0 to 100>
   * **Key Strengths**: <brief summary of the README's strongest points in 2-3 sentences>
-  * **Improvement Suggestions:**
+  * **Overall Improvement Suggestions:**
     - "Original text snippet 1" - Improving comment 1
     - "Original text snippet 2" - Improving comment 2
     - ...
@@ -124,7 +124,12 @@ For each criterion below, provide a brief assessment followed by specific, actio
 **Final Answer**
   The final answer **must exactly match** the following format:
  * Project-Level README: Yes / No
- * Overall Assessment: provide a final, overall assessment of the README file's quality, summarizing the key strengths and areas for improvement.
+ * **Score:** <number from 0 to 100>
+  * **Key Strengths**: <brief summary of the README's strongest points in 2-3 sentences>
+  * **Overall Improvement Suggestions:**
+    - "Original text snippet 1" - Improving comment 1
+    - "Original text snippet 2" - Improving comment 2
+    - ...
 ---
 ### **README path:**
@@ -243,7 +248,9 @@ class EvaluationREADMETask(EvaluationTask):
                 readme_evaluations[readme_file] = {
                     "evaluation": {
                         "project_level": "/" in readme_file,
-                        "overall_assessment": f"{readme_file} is an empty file."
+                        "score": 0,
+                        "key_strengths": f"{readme_file} is an empty file.",
+                        "overall_improvement_suggestions": f"{readme_file} is an empty file.",
                     },
                     "reasoning_process": f"{readme_file} is an empty file.",
                 }
@@ -277,7 +284,7 @@ class EvaluationREADMETask(EvaluationTask):
                     "project_level": response.project_level,
                     "score": response.score,
                     "key_strengths": response.key_strengths,
-                    "overall_assessment": response.overall_improvement_suggestions,
+                    "overall_improvement_suggestions": response.overall_improvement_suggestions,
                 },
                 "reasoning_process": reasoning_process
             }

{bioguider-0.2.5 → bioguider-0.2.7}/bioguider/agents/prompt_utils.py RENAMED Viewed

@@ -165,7 +165,7 @@ If **any one** of these is present, the document should be classified as Install
         "important_instructions": """- Give priority to analyzing README file that contain installation instructions and the files whose names include **"install"** or **"setup"**.
 - If multiple files are found, select the most comprehensive one that covers the installation process.
 - The total number of collected files should **not exceed 3**.
-- Make sure to include **only one installation instruction file**, selecting the most comprehensive and representative one.
+- Identify and select **no more than three** installation instruction files — choose the most comprehensive and representative ones.
 """
     },
     "License": {

{bioguider-0.2.5 → bioguider-0.2.7}/bioguider/managers/evaluation_manager.py RENAMED Viewed

@@ -10,6 +10,7 @@ from ..rag.rag import RAG
 from ..utils.file_utils import parse_repo_url
 from ..database.summarized_file_db import SummarizedFilesDb
 from ..agents.evaluation_task import EvaluationREADMETask
+from ..agents.evaluation_installation_task import EvaluationInstallationTask
 from ..agents.collection_task import CollectionTask
 class EvaluationManager:
@@ -84,6 +85,30 @@ class EvaluationManager:
         if s is None or 'final_answer' not in s:
             return None
+    def evaluate_installation(self):
+        task = CollectionTask(
+            llm=self.llm,
+            step_callback=self.step_callback,
+        )
+        task.compile(
+            repo_path=self.rag.repo_dir,
+            gitignore_path=Path(self.rag.repo_dir, ".gitignore"),
+            db=self.summary_file_db,
+            goal_item=CollectionGoalItemEnum.Installation.name,
+        )
+        files = task.collect()
+        if files is None or len(files) == 0:
+            return None
+        evaluation_task = EvaluationInstallationTask(
+            llm=self.llm,
+            repo_path=self.rag.repo_dir,
+            gitignore_path=Path(self.rag.repo_dir, ".gitignore"),
+            meta_data=self.project_metadata,
+            step_callback=self.step_callback,
+        )
+        evaluation = evaluation_task.evaluate(files)
+        return evaluation, files
     def _find_readme_files(self) -> list[str]:
         """
         Search for a README file in the repository directory.

{bioguider-0.2.5 → bioguider-0.2.7}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "bioguider"
-version = "0.2.5"
+version = "0.2.7"
 description = "An AI-Powered package to help biomedical developers to generate clear documentation"
 authors = [
     "Cankun Wang <Cankun.Wang@osumc.edu>",
@@ -45,6 +45,7 @@ faiss-cpu = "^1.11.0"
 binaryornot = "^0.4.4"
 textstat = "^0.7.6"
 pyphen = "^0.17.2"
+markdownify = "^1.1.0"
 [tool.poetry.group.dev.dependencies]