PyPI - bioguider - Versions diffs - 0.2.12__tar.gz → 0.2.14__tar.gz - Mend

bioguider 0.2.12tar.gz → 0.2.14tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of bioguider might be problematic. Click here for more details.

Files changed (50) hide show

{bioguider-0.2.12 → bioguider-0.2.14}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: bioguider
-Version: 0.2.12
+Version: 0.2.14
 Summary: An AI-Powered package to help biomedical developers to generate clear documentation
 License: MIT
 Author: Cankun Wang

{bioguider-0.2.12 → bioguider-0.2.14}/bioguider/agents/agent_task.py RENAMED Viewed

@@ -13,7 +13,12 @@ class AgentTask(ABC):
     A class representing a step in an agent's process.
     """
-    def __init__(self, llm: BaseChatOpenAI, step_callback: Callable | None = None):
+    def __init__(
+        self,
+        llm: BaseChatOpenAI,
+        step_callback: Callable | None = None,
+        summarized_files_db: SummarizedFilesDb | None = None,
+    ):
         """
         Initialize the AgentStep with a language model and a callback function.
@@ -23,7 +28,7 @@ class AgentTask(ABC):
         """
         self.llm = llm
         self.step_callback = step_callback
-        self.summary_file_db = None
+        self.summarized_files_db = summarized_files_db
         self.graph: CompiledGraph | None = None
     def _print_step(
@@ -45,7 +50,7 @@ class AgentTask(ABC):
             token_usage=token_usage,
         )
-    def compile(self, repo_path: str, gitignore_path: str, db: SummarizedFilesDb | None = None, **kwargs):
+    def compile(self, repo_path: str, gitignore_path: str, **kwargs):
         """
         Compile the agent step with the given repository and gitignore paths.
@@ -55,7 +60,6 @@ class AgentTask(ABC):
             **kwargs: derived class may pass more arguments to implmented _compile(), that is,
                 what **kwargs is depends on derived class
         """
-        self.summary_file_db = db
         self._compile(repo_path, gitignore_path, **kwargs)
     @abstractmethod

{bioguider-0.2.12 → bioguider-0.2.14}/bioguider/agents/agent_tools.py RENAMED Viewed

@@ -1,4 +1,5 @@
 import os
+import logging
 from typing import Callable
 from markdownify import markdownify as md
 from langchain_openai.chat_models.base import BaseChatOpenAI
@@ -7,6 +8,8 @@ from bioguider.utils.file_utils import get_file_type
 from bioguider.agents.agent_utils import read_directory, read_file, summarize_file
 from bioguider.rag.data_pipeline import count_tokens
+logger = logging.getLogger(__name__)
 class agent_tool:
     def __init__(
         self,
@@ -53,19 +56,12 @@ Returns:
 class summarize_file_tool(agent_tool):
     """ Read a file and generate a summary according to a specified prompt.
-Arguments
-----------
-    file_path : str, required
-        Path to the file to read.
-    summarize_prompt : str, optional
-        Instruction guiding the summarization focus (default is "N/A").
-        Use this to emphasize specific aspects of the content.
+Args:
+    file_path str: required. The file path to read.
+    summarize_prompt str: optional. A string instruction guiding the summarization focus (default is "N/A"). Use this to emphasize specific aspects of the content.
-Returns
--------
-    str or None
-        A summarized version of the file content.
-        Returns None if the file does not exist or cannot be read.
+Returns:
+    str or None: A summarized version of the file content. Returns None if the file does not exist or cannot be read.
     """
     def __init__(
         self,
@@ -124,8 +120,15 @@ Returns
         if summarized_content is not None:
             return f"summarized content of file {file_path}: " + summarized_content
-        file_content = read_file(abs_file_path)
-        file_content = file_content.replace("{", "{{").replace("}", "}}")
+        try:
+            file_content = read_file(abs_file_path)
+            file_content = file_content.replace("{", "{{").replace("}", "}}")
+        except UnicodeDecodeError as e:
+            logger.error(str(e))
+            return f"{file_path} is a binary, can't be summarized."
+        except Exception as e:
+            logger.error(str(e))
+            return f"Failed to read {file_path}."
         summarized_content, token_usage = summarize_file(
             self.llm, abs_file_path, file_content, self.detailed_level,
             summary_instructions=self.summarize_instruction,

{bioguider-0.2.12 → bioguider-0.2.14}/bioguider/agents/agent_utils.py RENAMED Viewed

@@ -16,11 +16,12 @@ from langchain.tools import BaseTool
 from langchain.schema import AgentAction, AgentFinish
 from langchain.agents import AgentOutputParser
 from langgraph.prebuilt import create_react_agent
+from langchain_community.callbacks.openai_info import OpenAICallbackHandler
 import logging
 from pydantic import BaseModel, Field
-from bioguider.utils.constants import DEFAULT_TOKEN_USAGE
+from bioguider.utils.constants import DEFAULT_TOKEN_USAGE, MAX_FILE_LENGTH, MAX_SENTENCE_NUM
 from bioguider.utils.file_utils import get_file_type
 from ..utils.gitignore_checker import GitignoreChecker
 from ..database.summarized_file_db import SummarizedFilesDb
@@ -178,8 +179,7 @@ Here is the file content:
 Now, let's start to summarize.
 """)
-MAX_FILE_LENGTH=20 *1024 # 20K
-MAX_SENTENCE_NUM=20
 def summarize_file(
     llm: BaseChatOpenAI,
     name: str,
@@ -379,6 +379,20 @@ def escape_braces(text: str) -> str:
     text = re.sub(r'(?<!{){(?!{)', '{{', text)
     return text
+STRING_TO_OBJECT_SYSTEM_PROMPT = """
+You are an expert to understand data. You will be provided a text, and your task is to extracted structured data from the provided text.
+---
+### **Instructions**
+1. If no structured data can be extracted, return None
+---
+### **Input Text**
+{input_text}
+"""
 def try_parse_json_object(json_obj: str) -> dict | None:
     json_obj = json_obj.strip()
@@ -406,4 +420,26 @@ def try_parse_json_object(json_obj: str) -> dict | None:
         return None
     except Exception as e:
         logger.error(e)
-        return None
+        return None
+def try_parse_with_llm(llm: BaseChatOpenAI, input_text: str, schema: any):
+    system_prompt = ChatPromptTemplate.from_template(
+        STRING_TO_OBJECT_SYSTEM_PROMPT
+    ).format(input_text=input_text)
+    prompt = ChatPromptTemplate.from_messages([
+        ("system", system_prompt)
+    ])
+    agent = prompt | llm.with_structured_output(schema)
+    callback_handler = OpenAICallbackHandler()
+    try:
+        res = agent.invoke(
+            input={},
+            config={
+                "callbacks": [callback_handler],
+            },
+        )
+        return res, vars(callback_handler)
+    except Exception as e:
+        logger.error(e)
+        return None

{bioguider-0.2.12 → bioguider-0.2.14}/bioguider/agents/collection_observe_step.py RENAMED Viewed

@@ -5,7 +5,7 @@ from langchain_openai.chat_models.base import BaseChatOpenAI
 from langchain_core.prompts import ChatPromptTemplate
 from bioguider.agents.agent_utils import ObservationResult
 from bioguider.agents.collection_task_utils import CollectionWorkflowState
-from bioguider.agents.common_agent_2step import CommonAgentTwoSteps
+from bioguider.agents.common_agent_2step import CommonAgentTwoChainSteps, CommonAgentTwoSteps
 from bioguider.agents.peo_common_step import PEOCommonStep
 from bioguider.agents.prompt_utils import COLLECTION_GOAL, COLLECTION_PROMPTS
@@ -34,11 +34,13 @@ Here is the 2-level file structure of the repository (`f` = file, `d` = director
   * Provide your reasoning under **Analysis**
   * Then list all relevant files and folders under **FinalAnswer**
+  * **FinalAnswer** format must exactly match this format:
+    **FinalAnswer**: {{"final_answer": [<file path>, <file path>, <file path>, ...]}}
   * Be sure to include the **full relative paths** with respect to the repository root.
-  * Your answer **must follow this exact format** (note: no JSON code block, no additional comments):
+  * Your answer **must exactly match the follwing format** (note: no JSON code block, no additional comments), **do not** make up anything:
   ```
-  **Analysis**: your analysis here
+  **Analysis**: your analysis here
   **FinalAnswer**: {{"final_answer": ["path/to/file1", "path/to/file2", ...]}}
   ```
 4. If you believe **more files still need to be collected**:
@@ -80,8 +82,8 @@ class CollectionObserveStep(PEOCommonStep):
         repo_structure = self.repo_structure
         intermediate_steps = self._build_intermediate_steps(state)
         prompt = ChatPromptTemplate.from_template(COLLECTION_OBSERVE_SYSTEM_PROMPT)
-        important_instructions = "N/A" if "important_instructions" not in collection_item or len(collection_item["important_instructions"]) == 0 \
-            else collection_item["important_instructions"]
+        important_instructions = "N/A" if "observe_important_instructions" not in collection_item or len(collection_item["observe_important_instructions"]) == 0 \
+            else collection_item["observe_important_instructions"]
         return prompt.format(
             goal_item_desc=goal_item_desc,
             related_file_description=collection_item["related_file_description"],

{bioguider-0.2.12 → bioguider-0.2.14}/bioguider/agents/collection_plan_step.py RENAMED Viewed

@@ -8,7 +8,7 @@ from bioguider.agents.agent_utils import (
     PlanAgentResultJsonSchema,
     PlanAgentResult,
 )
-from bioguider.agents.common_agent_2step import CommonAgentTwoSteps
+from bioguider.agents.common_agent_2step import CommonAgentTwoChainSteps, CommonAgentTwoSteps
 from bioguider.agents.peo_common_step import PEOCommonStep
 from bioguider.agents.collection_task_utils import CollectionWorkflowState
 from bioguider.agents.prompt_utils import COLLECTION_GOAL, COLLECTION_PROMPTS
@@ -57,7 +57,9 @@ Here are the results from previous steps:
 3. You may use the `read_directory` tool to explore directory contents, but avoid using it in the first step unless necessary.
-4. You may use the `python_repl` tool to execute Python code, but this should **also be avoided in the first step**.
+4. Your plan can only use the above tools, **do not** make up any tools not in the above tools list.
+5. Your planned step input file or input directory must come from the above repository files structure, **do not** make up file name or directory name.
 ---
@@ -65,12 +67,12 @@ Here are the results from previous steps:
 {important_instructions}
 ### **Output Format**
-Your plan should be returned as a sequence of steps in the following format:
+Your plan **must exactly match** a sequence of steps in the following format, **do not** make up anything:
-Step: <tool name>   # Tool name must be one of {tool_names}
+Step: <tool name>   # Tool name **must be one** of {tool_names}
 Step Input: <file or directory name>
-Step: <tool name>
+Step: <tool name>  # Tool name **must be one** of {tool_names}
 Step Input: <file or directory name>
 ...
 """)
@@ -105,8 +107,8 @@ class CollectionPlanStep(PEOCommonStep):
         step_analysis, step_thoughts = self._build_intermediate_analysis_and_thoughts(state)
         goal = ChatPromptTemplate.from_template(COLLECTION_GOAL).format(goal_item=collection_item["goal_item"])
         related_file_description = collection_item["related_file_description"]
-        important_instructions="N/A" if "important_instructions" not in collection_item or len(collection_item["important_instructions"]) == 0 \
-            else collection_item["important_instructions"]
+        important_instructions="N/A" if "plan_important_instructions" not in collection_item or len(collection_item["plan_important_instructions"]) == 0 \
+            else collection_item["plan_important_instructions"]
         tool_names, tools_desc = get_tool_names_and_descriptions(self.custom_tools)
         system_prompt = COLLECTION_PLAN_SYSTEM_PROMPT.format(
             goal=goal,

{bioguider-0.2.12 → bioguider-0.2.14}/bioguider/agents/collection_task.py RENAMED Viewed

@@ -50,9 +50,12 @@ class CollectionTask(AgentTask):
     def __init__(
         self,
         llm: BaseChatOpenAI,
-        step_callback: Callable | None = None
+        step_callback: Callable | None = None,
+        summarize_instruction: str | None = "N/A",
+        summarized_files_db: SummarizedFilesDb | None = None,
+        provided_files: list[str] | None = None,
     ):
-        super().__init__(llm, step_callback)
+        super().__init__(llm, step_callback, summarized_files_db=summarized_files_db)
         self.repo_path: str | None = None
         self.gitignore_path: str | None = None
         self.repo_structure: str | None = None
@@ -60,6 +63,8 @@ class CollectionTask(AgentTask):
         self.steps: list[PEOCommonStep] = []
         self.tools: list[any] | None = None
         self.custom_tools: list[Tool] | None = None
+        self.summarize_instruction = summarize_instruction
+        self.provided_files = provided_files
     def _prepare_tools(self, related_file_goal_item_desc):
         tool_rd = read_directory_tool(repo_path=self.repo_path)
@@ -67,7 +72,8 @@ class CollectionTask(AgentTask):
             llm=self.llm,
             repo_path=self.repo_path,
             output_callback=self.step_callback,
-            db=self.summary_file_db,
+            db=self.summarized_files_db,
+            summaize_instruction=self.summarize_instruction,
         )
         tool_rf = read_file_tool(repo_path=self.repo_path)
         tool_cf = check_file_related_tool(
@@ -75,6 +81,8 @@ class CollectionTask(AgentTask):
             repo_path=self.repo_path,
             goal_item_desc=related_file_goal_item_desc,
             output_callback=self.step_callback,
+            summarize_instruction=self.summarize_instruction,
+            summarized_files_db=self.summarized_files_db,
         )
         self.tools = [tool_rd, tool_sum, tool_rf, tool_cf]
         self.custom_tools = [
@@ -99,13 +107,15 @@ class CollectionTask(AgentTask):
                 description=tool_cf.__class__.__doc__,
             ),
         ]
-        self.custom_tools.append(CustomPythonAstREPLTool())
+        # self.custom_tools.append(CustomPythonAstREPLTool())
     def _initialize(self):
         # initialize the 2-level file structure of the repo
         if not os.path.exists(self.repo_path):
             raise ValueError(f"Repository path {self.repo_path} does not exist.")
-        files = read_directory(self.repo_path, os.path.join(self.repo_path, ".gitignore"))
+        files = self.provided_files
+        if files is None:
+            files = read_directory(self.repo_path, os.path.join(self.repo_path, ".gitignore"))
         file_pairs = [(f, get_file_type(os.path.join(self.repo_path, f)).value) for f in files]
         self.repo_structure = ""
         for f, f_type in file_pairs:

{bioguider-0.2.12 → bioguider-0.2.14}/bioguider/agents/collection_task_utils.py RENAMED Viewed

@@ -4,13 +4,17 @@ from langchain.prompts import ChatPromptTemplate
 from langchain_openai.chat_models.base import BaseChatOpenAI
 from langchain_core.messages import AIMessage
 from pydantic import BaseModel, Field
+import logging
 from bioguider.agents.agent_tools import agent_tool
 from bioguider.agents.agent_utils import read_file, summarize_file
 from bioguider.agents.peo_common_step import PEOWorkflowState
 from bioguider.agents.common_agent import CommonAgent
 from bioguider.agents.common_agent_2step import CommonAgentTwoSteps
+from bioguider.database.summarized_file_db import SummarizedFilesDb
+from bioguider.utils.constants import MAX_FILE_LENGTH
+logger = logging.getLogger(__name__)
 class CollectionWorkflowState(TypedDict):
     llm: Optional[BaseChatOpenAI]
@@ -46,20 +50,22 @@ Does this file appear to contain related information?
 ---
-### **Output Format:**
-Respond with a single word: "Yes" or "No" to indicate whether the file is related to the goal item.
-Do not include any additional text, explanation, or formatting.
+### **Output Format:**
+Respond with exactly two parts:
+1. A single word: Yes or No (indicating if the file meets the goal criteria)
+2. One brief explanatory sentence.
+For example: Yes. This file is a compiled binary file, so, it is related to the compiled standalone file (goal item).
 """)
 class CheckFileRelatedResult(BaseModel):
-    is_related: bool = Field(description="True if the file is related to the goal item, False otherwise.")
+    is_related: str = Field(description="A string conclusion specify if the provided file is related. The string value contains two parts:\n 1. A single word: Yes or No (indicating if the file meets the goal criteria).\n 2. One brief explanatory sentence.")
 class check_file_related_tool(agent_tool):
     """ Check if the file is related to the goal item
 Args:
     file_path str: file path
 Returns:
-    bool: True if the file is related to the goal item, False otherwise.
+    str: A string conclusion. The string conclusion contains two parts:\n 1. A single word: Yes or No (indicating if the file meets the goal criteria).\n 2. One brief explanatory sentence.
     """
     def __init__(
         self,
@@ -67,23 +73,51 @@ Returns:
         repo_path: str,
         goal_item_desc: str,
         output_callback: Callable | None = None,
+        summarize_instruction: str | None = None,
+        summarize_level: int | None = 6,
+        summarized_files_db: SummarizedFilesDb | None = None,
     ):
         super().__init__(llm=llm, output_callback=output_callback)
         self.repo_path = repo_path
         self.goal_item_desc = goal_item_desc
+        self.summarize_instruction = summarize_instruction \
+            if summarize_instruction is not None else "N/A"
+        self.summarize_level = summarize_level
+        self.summarized_files_db = summarized_files_db
     def run(self, file_path: str) -> str:
         if not self.repo_path in file_path:
             file_path = os.path.join(self.repo_path, file_path)
         if not os.path.isfile(file_path):
             return "Can't read file"
-        file_content = read_file(file_path)
-        if file_content is None:
+        check_prompts = None
+        try:
+            file_content = read_file(file_path)
+        except UnicodeDecodeError as e:
+            logger.error(str(e))
+            check_prompts = "Can't summarize binary file, please decide according to file name and extension."
+        except Exception as e:
+            logger.error(str(e))
+            check_prompts = "Failed to summarize file, please decide according to file name and extension."
+        if check_prompts is None and file_content is None:
             return "Failed to read file"
-        summarized_content, token_usage = summarize_file(self.llm, file_path, file_content, 6)
-        if summarized_content is None:
-            return "Failed to summarize file"
-        self._print_token_usage(token_usage)
+        if check_prompts is not None:
+            summarized_content = check_prompts
+        else:
+            if len(file_content) > MAX_FILE_LENGTH:
+                file_content = file_content[:MAX_FILE_LENGTH]
+            summarized_content, token_usage = summarize_file(
+                llm=self.llm,
+                name=file_path,
+                content=file_content,
+                level=self.summarize_level,
+                summary_instructions=self.summarize_instruction,
+                db=self.summarized_files_db,
+            )
+            if summarized_content is None:
+                return "Failed to summarize file"
+            self._print_token_usage(token_usage)
         prompt = CHECK_FILE_RELATED_USER_PROMPT.format(
             goal_item_desc=self.goal_item_desc,
@@ -102,8 +136,5 @@ Returns:
         self._print_step_output(step_output=reasoning)
         self._print_token_usage(token_usage)
-        if out:
-            return "Yes, the file is related to the goal item."
-        else:
-            return "No, the file **is not** related to the goal item."
+        return res.is_related

{bioguider-0.2.12 → bioguider-0.2.14}/bioguider/agents/dockergeneration_task.py RENAMED Viewed

@@ -47,7 +47,7 @@ class DockerGenerationTask(AgentTask):
     def __init__(
         self,
         llm,
-        step_callback = None
+        step_callback = None,
     ):
         super().__init__(llm, step_callback)
         self.repo_path: str | None = None

{bioguider-0.2.12 → bioguider-0.2.14}/bioguider/agents/evaluation_installation_task.py RENAMED Viewed

@@ -9,7 +9,8 @@ from pydantic import BaseModel, Field
 from markdownify import markdownify as md
 from bioguider.agents.agent_utils import read_file
-from bioguider.agents.prompt_utils import EVALUATION_INSTRUCTION
+from bioguider.agents.collection_task import CollectionTask
+from bioguider.agents.prompt_utils import EVALUATION_INSTRUCTION, CollectionGoalItemEnum
 from bioguider.utils.constants import DEFAULT_TOKEN_USAGE, ProjectMetadata
 from bioguider.rag.data_pipeline import count_tokens
 from .common_agent_2step import CommonAgentTwoSteps, CommonAgentTwoChainSteps
@@ -32,14 +33,17 @@ Your task is to analyze the provided files related to installation and generate
 1. **Installation Available**: Is the installation section in document (like README.md or INSTALLATION)?
    * Output: `Yes` or `No`
-2. **Installation Tutorial**: Is the installation tutorial provided?
+2. **Installation Tutorial**: Is the step-by-step installation tutorial provided?
    * Ouput: `Yes` or `No`
 3. **Number of required Dependencies Installation**: The number of dependencies that are required to install
    * Output: Number
    * Suggest specific improvements if necessary, such as missing dependencies
-4. **Overall Score**: Give an overall quality rating of the Installation information.
+4. **Compatible Operating System**: Is the compatible operating system described?
+   * Output: `Yes` or `No`
+5. **Overall Score**: Give an overall quality rating of the Installation information.
    * Output: `Poor`, `Fair`, `Good`, or `Excellent`
 ---
@@ -53,6 +57,7 @@ Your final report must **exactly match** the following format. Do not add or omi
 **Dependency:**
   * number: [Number]
   * suggestions: <suggestion to improve **dependency information** like missing dependencies
+**Compatible Operating System:** [Yes / No]
 **Overall Score:** [Poor / Fair / Good / Excellent]
 ---
@@ -113,6 +118,7 @@ class StructuredEvaluationInstallationResult(BaseModel):
     install_tutorial: Optional[bool]=Field(description="A boolean value. Is the installation tutorial provided?")
     dependency_number: Optional[int]=Field(description="A number. It is the number of dependencies that are required to install.")
     dependency_suggestions: Optional[str]=Field(description="A string value. It is the specific improvements if necessary, such as missing dependencies")
+    compatible_os: Optional[bool]=Field(description="A boolean value. Is compatible operating system described?")
     overall_score: Optional[str]=Field(description="A overall scroll for the installation quality, could be `Poor`, `Fair`, `Good`, or `Excellent`")
 class EvaluationInstallationResult(BaseModel):
@@ -163,8 +169,9 @@ class EvaluationInstallationTask(EvaluationTask):
         gitignore_path,
         meta_data = None,
         step_callback = None,
+        summarized_files_db = None,
     ):
-        super().__init__(llm, repo_path, gitignore_path, meta_data, step_callback)
+        super().__init__(llm, repo_path, gitignore_path, meta_data, step_callback, summarized_files_db)
         self.evaluation_name = "Installation Evaluation"
@@ -204,6 +211,8 @@ class EvaluationInstallationTask(EvaluationTask):
             instruction_prompt=EVALUATION_INSTRUCTION,
             schema=StructuredEvaluationInstallationResult,
         )
+        res: StructuredEvaluationInstallationResult = res
+        res.dependency_number = 0 if res.dependency_number is None else res.dependency_number
         self.print_step(step_output=reasoning_process)
         self.print_step(token_usage=token_usage)
@@ -235,7 +244,7 @@ class EvaluationInstallationTask(EvaluationTask):
         }
         return evaluation, token_usage
-    def _evaluate(self, files: list[str] | None = None) -> tuple[dict | None, dict]:
+    def _evaluate(self, files: list[str] | None = None) -> tuple[dict | None, dict, list[str]]:
         evaluation, token_usage = self._free_evaluate(files)
         structured_evaluation, structured_token_usage = self._structured_evaluate(files)
@@ -245,5 +254,20 @@ class EvaluationInstallationTask(EvaluationTask):
         }
         total_token_usage = increase_token_usage(token_usage, structured_token_usage)
-        return combined_evaluation, total_token_usage
+        return combined_evaluation, total_token_usage, files
+    def _collect_files(self):
+        task = CollectionTask(
+            llm=self.llm,
+            step_callback=self.step_callback,
+        )
+        task.compile(
+            repo_path=self.repo_path,
+            gitignore_path=Path(self.repo_path, ".gitignore"),
+            db=self.summarized_files_db,
+            goal_item=CollectionGoalItemEnum.Installation.name,
+        )
+        files = task.collect()
+        if files is None:
+            return []
+        return files

{bioguider-0.2.12 → bioguider-0.2.14}/bioguider/agents/evaluation_readme_task.py RENAMED Viewed

@@ -7,6 +7,7 @@ from langchain_openai.chat_models.base import BaseChatOpenAI
 from pydantic import BaseModel, Field
 from bioguider.agents.prompt_utils import EVALUATION_INSTRUCTION
+from bioguider.utils.gitignore_checker import GitignoreChecker
 from ..utils.pyphen_utils import PyphenReadability
 from bioguider.agents.agent_utils import increase_token_usage, read_file, summarize_file
@@ -303,9 +304,10 @@ class EvaluationREADMETask(EvaluationTask):
         repo_path: str,
         gitignore_path: str,
         meta_data: ProjectMetadata | None = None,
-        step_callback: Callable | None = None
+        step_callback: Callable | None = None,
+        summarized_files_db = None,
     ):
-        super().__init__(llm, repo_path, gitignore_path, meta_data, step_callback)
+        super().__init__(llm, repo_path, gitignore_path, meta_data, step_callback, summarized_files_db)
         self.evaluation_name = "README Evaluation"
     def _structured_evaluate(self, free_readme_evaluations: dict[str, dict]):
@@ -455,7 +457,7 @@ class EvaluationREADMETask(EvaluationTask):
             total_token_usage = increase_token_usage(total_token_usage, token_usage)
         return readme_evaluations, total_token_usage
-    def _evaluate(self, files: list[str]) -> tuple[dict, dict]:
+    def _evaluate(self, files: list[str]) -> tuple[dict, dict, list[str]]:
         free_readme_evaluations, free_token_usage = self._free_evaluate(files)
         structured_readme_evaluations, structured_token_usage = self._structured_evaluate(free_readme_evaluations)
@@ -472,6 +474,26 @@ class EvaluationREADMETask(EvaluationTask):
         total_token_usage = increase_token_usage(free_token_usage, structured_token_usage)
-        return combined_evaluations, total_token_usage
+        return combined_evaluations, total_token_usage, files
+    def _collect_files(self):
+        """
+        Search for a README file in the repository directory.
+        """
+        possible_readme_files = [
+            "readme.md",
+            "readme.rst",
+            "readme.txt",
+            "readme",
+        ]
+        repo_path = self.repo_path
+        gitignore_path = Path(repo_path, ".gitignore")
+        gitignore_checker = GitignoreChecker(
+            directory=repo_path, gitignore_path=gitignore_path
+        )
+        found_readme_files = gitignore_checker.check_files_and_folders(
+            check_file_cb=lambda root_dir, relative_path: Path(relative_path).name.lower() in possible_readme_files,
+        )
+        return found_readme_files

bioguider 0.2.12__tar.gz → 0.2.14__tar.gz

Potentially problematic release.

bioguider 0.2.12tar.gz → 0.2.14tar.gz