PyPI - bioguider - Versions diffs - 0.2.12__py3-none-any.whl → 0.2.13__py3-none-any.whl - Mend

bioguider 0.2.12py3-none-any.whl → 0.2.13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of bioguider might be problematic. Click here for more details.

Files changed (23) hide show

bioguider/agents/agent_task.py +8 -4
bioguider/agents/agent_tools.py +17 -14
bioguider/agents/agent_utils.py +40 -4
bioguider/agents/collection_observe_step.py +7 -5
bioguider/agents/collection_plan_step.py +9 -7
bioguider/agents/collection_task.py +15 -5
bioguider/agents/collection_task_utils.py +46 -15
bioguider/agents/dockergeneration_task.py +1 -1
bioguider/agents/evaluation_installation_task.py +29 -7
bioguider/agents/evaluation_readme_task.py +26 -4
bioguider/agents/evaluation_submission_requirements_task.py +153 -0
bioguider/agents/evaluation_task.py +19 -6
bioguider/agents/identification_observe_step.py +7 -1
bioguider/agents/identification_plan_step.py +6 -1
bioguider/agents/identification_task.py +23 -4
bioguider/agents/identification_task_utils.py +2 -0
bioguider/agents/prompt_utils.py +44 -4
bioguider/managers/evaluation_manager.py +38 -46
bioguider/utils/constants.py +2 -0
{bioguider-0.2.12.dist-info → bioguider-0.2.13.dist-info}/METADATA +1 -1
{bioguider-0.2.12.dist-info → bioguider-0.2.13.dist-info}/RECORD +23 -22
{bioguider-0.2.12.dist-info → bioguider-0.2.13.dist-info}/LICENSE +0 -0
{bioguider-0.2.12.dist-info → bioguider-0.2.13.dist-info}/WHEEL +0 -0

bioguider/agents/agent_task.py CHANGED Viewed

@@ -13,7 +13,12 @@ class AgentTask(ABC):
     A class representing a step in an agent's process.
     """
-    def __init__(self, llm: BaseChatOpenAI, step_callback: Callable | None = None):
+    def __init__(
+        self,
+        llm: BaseChatOpenAI,
+        step_callback: Callable | None = None,
+        summarized_files_db: SummarizedFilesDb | None = None,
+    ):
         """
         Initialize the AgentStep with a language model and a callback function.
@@ -23,7 +28,7 @@ class AgentTask(ABC):
         """
         self.llm = llm
         self.step_callback = step_callback
-        self.summary_file_db = None
+        self.summarized_files_db = summarized_files_db
         self.graph: CompiledGraph | None = None
     def _print_step(
@@ -45,7 +50,7 @@ class AgentTask(ABC):
             token_usage=token_usage,
         )
-    def compile(self, repo_path: str, gitignore_path: str, db: SummarizedFilesDb | None = None, **kwargs):
+    def compile(self, repo_path: str, gitignore_path: str, **kwargs):
         """
         Compile the agent step with the given repository and gitignore paths.
@@ -55,7 +60,6 @@ class AgentTask(ABC):
             **kwargs: derived class may pass more arguments to implmented _compile(), that is,
                 what **kwargs is depends on derived class
         """
-        self.summary_file_db = db
         self._compile(repo_path, gitignore_path, **kwargs)
     @abstractmethod

bioguider/agents/agent_tools.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import os
+import logging
 from typing import Callable
 from markdownify import markdownify as md
 from langchain_openai.chat_models.base import BaseChatOpenAI
@@ -7,6 +8,8 @@ from bioguider.utils.file_utils import get_file_type
 from bioguider.agents.agent_utils import read_directory, read_file, summarize_file
 from bioguider.rag.data_pipeline import count_tokens
+logger = logging.getLogger(__name__)
 class agent_tool:
     def __init__(
         self,
@@ -53,19 +56,12 @@ Returns:
 class summarize_file_tool(agent_tool):
     """ Read a file and generate a summary according to a specified prompt.
-Arguments
-----------
-    file_path : str, required
-        Path to the file to read.
-    summarize_prompt : str, optional
-        Instruction guiding the summarization focus (default is "N/A").
-        Use this to emphasize specific aspects of the content.
+Args:
+    file_path str: required. The file path to read.
+    summarize_prompt str: optional. A string instruction guiding the summarization focus (default is "N/A"). Use this to emphasize specific aspects of the content.
-Returns
--------
-    str or None
-        A summarized version of the file content.
-        Returns None if the file does not exist or cannot be read.
+Returns:
+    str or None: A summarized version of the file content. Returns None if the file does not exist or cannot be read.
     """
     def __init__(
         self,
@@ -124,8 +120,15 @@ Returns
         if summarized_content is not None:
             return f"summarized content of file {file_path}: " + summarized_content
-        file_content = read_file(abs_file_path)
-        file_content = file_content.replace("{", "{{").replace("}", "}}")
+        try:
+            file_content = read_file(abs_file_path)
+            file_content = file_content.replace("{", "{{").replace("}", "}}")
+        except UnicodeDecodeError as e:
+            logger.error(str(e))
+            return f"{file_path} is a binary, can't be summarized."
+        except Exception as e:
+            logger.error(str(e))
+            return f"Failed to read {file_path}."
         summarized_content, token_usage = summarize_file(
             self.llm, abs_file_path, file_content, self.detailed_level,
             summary_instructions=self.summarize_instruction,

bioguider/agents/agent_utils.py CHANGED Viewed

@@ -16,11 +16,12 @@ from langchain.tools import BaseTool
 from langchain.schema import AgentAction, AgentFinish
 from langchain.agents import AgentOutputParser
 from langgraph.prebuilt import create_react_agent
+from langchain_community.callbacks.openai_info import OpenAICallbackHandler
 import logging
 from pydantic import BaseModel, Field
-from bioguider.utils.constants import DEFAULT_TOKEN_USAGE
+from bioguider.utils.constants import DEFAULT_TOKEN_USAGE, MAX_FILE_LENGTH, MAX_SENTENCE_NUM
 from bioguider.utils.file_utils import get_file_type
 from ..utils.gitignore_checker import GitignoreChecker
 from ..database.summarized_file_db import SummarizedFilesDb
@@ -178,8 +179,7 @@ Here is the file content:
 Now, let's start to summarize.
 """)
-MAX_FILE_LENGTH=20 *1024 # 20K
-MAX_SENTENCE_NUM=20
 def summarize_file(
     llm: BaseChatOpenAI,
     name: str,
@@ -379,6 +379,20 @@ def escape_braces(text: str) -> str:
     text = re.sub(r'(?<!{){(?!{)', '{{', text)
     return text
+STRING_TO_OBJECT_SYSTEM_PROMPT = """
+You are an expert to understand data. You will be provided a text, and your task is to extracted structured data from the provided text.
+---
+### **Instructions**
+1. If no structured data can be extracted, return None
+---
+### **Input Text**
+{input_text}
+"""
 def try_parse_json_object(json_obj: str) -> dict | None:
     json_obj = json_obj.strip()
@@ -406,4 +420,26 @@ def try_parse_json_object(json_obj: str) -> dict | None:
         return None
     except Exception as e:
         logger.error(e)
-        return None
+        return None
+def try_parse_with_llm(llm: BaseChatOpenAI, input_text: str, schema: any):
+    system_prompt = ChatPromptTemplate.from_template(
+        STRING_TO_OBJECT_SYSTEM_PROMPT
+    ).format(input_text=input_text)
+    prompt = ChatPromptTemplate.from_messages([
+        ("system", system_prompt)
+    ])
+    agent = prompt | llm.with_structured_output(schema)
+    callback_handler = OpenAICallbackHandler()
+    try:
+        res = agent.invoke(
+            input={},
+            config={
+                "callbacks": [callback_handler],
+            },
+        )
+        return res, vars(callback_handler)
+    except Exception as e:
+        logger.error(e)
+        return None

bioguider/agents/collection_observe_step.py CHANGED Viewed

@@ -5,7 +5,7 @@ from langchain_openai.chat_models.base import BaseChatOpenAI
 from langchain_core.prompts import ChatPromptTemplate
 from bioguider.agents.agent_utils import ObservationResult
 from bioguider.agents.collection_task_utils import CollectionWorkflowState
-from bioguider.agents.common_agent_2step import CommonAgentTwoSteps
+from bioguider.agents.common_agent_2step import CommonAgentTwoChainSteps, CommonAgentTwoSteps
 from bioguider.agents.peo_common_step import PEOCommonStep
 from bioguider.agents.prompt_utils import COLLECTION_GOAL, COLLECTION_PROMPTS
@@ -34,11 +34,13 @@ Here is the 2-level file structure of the repository (`f` = file, `d` = director
   * Provide your reasoning under **Analysis**
   * Then list all relevant files and folders under **FinalAnswer**
+  * **FinalAnswer** format must exactly match this format:
+    **FinalAnswer**: {{"final_answer": [<file path>, <file path>, <file path>, ...]}}
   * Be sure to include the **full relative paths** with respect to the repository root.
-  * Your answer **must follow this exact format** (note: no JSON code block, no additional comments):
+  * Your answer **must exactly match the follwing format** (note: no JSON code block, no additional comments), **do not** make up anything:
   ```
-  **Analysis**: your analysis here
+  **Analysis**: your analysis here
   **FinalAnswer**: {{"final_answer": ["path/to/file1", "path/to/file2", ...]}}
   ```
 4. If you believe **more files still need to be collected**:
@@ -80,8 +82,8 @@ class CollectionObserveStep(PEOCommonStep):
         repo_structure = self.repo_structure
         intermediate_steps = self._build_intermediate_steps(state)
         prompt = ChatPromptTemplate.from_template(COLLECTION_OBSERVE_SYSTEM_PROMPT)
-        important_instructions = "N/A" if "important_instructions" not in collection_item or len(collection_item["important_instructions"]) == 0 \
-            else collection_item["important_instructions"]
+        important_instructions = "N/A" if "observe_important_instructions" not in collection_item or len(collection_item["observe_important_instructions"]) == 0 \
+            else collection_item["observe_important_instructions"]
         return prompt.format(
             goal_item_desc=goal_item_desc,
             related_file_description=collection_item["related_file_description"],

bioguider/agents/collection_plan_step.py CHANGED Viewed

@@ -8,7 +8,7 @@ from bioguider.agents.agent_utils import (
     PlanAgentResultJsonSchema,
     PlanAgentResult,
 )
-from bioguider.agents.common_agent_2step import CommonAgentTwoSteps
+from bioguider.agents.common_agent_2step import CommonAgentTwoChainSteps, CommonAgentTwoSteps
 from bioguider.agents.peo_common_step import PEOCommonStep
 from bioguider.agents.collection_task_utils import CollectionWorkflowState
 from bioguider.agents.prompt_utils import COLLECTION_GOAL, COLLECTION_PROMPTS
@@ -57,7 +57,9 @@ Here are the results from previous steps:
 3. You may use the `read_directory` tool to explore directory contents, but avoid using it in the first step unless necessary.
-4. You may use the `python_repl` tool to execute Python code, but this should **also be avoided in the first step**.
+4. Your plan can only use the above tools, **do not** make up any tools not in the above tools list.
+5. Your planned step input file or input directory must come from the above repository files structure, **do not** make up file name or directory name.
 ---
@@ -65,12 +67,12 @@ Here are the results from previous steps:
 {important_instructions}
 ### **Output Format**
-Your plan should be returned as a sequence of steps in the following format:
+Your plan **must exactly match** a sequence of steps in the following format, **do not** make up anything:
-Step: <tool name>   # Tool name must be one of {tool_names}
+Step: <tool name>   # Tool name **must be one** of {tool_names}
 Step Input: <file or directory name>
-Step: <tool name>
+Step: <tool name>  # Tool name **must be one** of {tool_names}
 Step Input: <file or directory name>
 ...
 """)
@@ -105,8 +107,8 @@ class CollectionPlanStep(PEOCommonStep):
         step_analysis, step_thoughts = self._build_intermediate_analysis_and_thoughts(state)
         goal = ChatPromptTemplate.from_template(COLLECTION_GOAL).format(goal_item=collection_item["goal_item"])
         related_file_description = collection_item["related_file_description"]
-        important_instructions="N/A" if "important_instructions" not in collection_item or len(collection_item["important_instructions"]) == 0 \
-            else collection_item["important_instructions"]
+        important_instructions="N/A" if "plan_important_instructions" not in collection_item or len(collection_item["plan_important_instructions"]) == 0 \
+            else collection_item["plan_important_instructions"]
         tool_names, tools_desc = get_tool_names_and_descriptions(self.custom_tools)
         system_prompt = COLLECTION_PLAN_SYSTEM_PROMPT.format(
             goal=goal,

bioguider/agents/collection_task.py CHANGED Viewed

@@ -50,9 +50,12 @@ class CollectionTask(AgentTask):
     def __init__(
         self,
         llm: BaseChatOpenAI,
-        step_callback: Callable | None = None
+        step_callback: Callable | None = None,
+        summarize_instruction: str | None = "N/A",
+        summarized_files_db: SummarizedFilesDb | None = None,
+        provided_files: list[str] | None = None,
     ):
-        super().__init__(llm, step_callback)
+        super().__init__(llm, step_callback, summarized_files_db=summarized_files_db)
         self.repo_path: str | None = None
         self.gitignore_path: str | None = None
         self.repo_structure: str | None = None
@@ -60,6 +63,8 @@ class CollectionTask(AgentTask):
         self.steps: list[PEOCommonStep] = []
         self.tools: list[any] | None = None
         self.custom_tools: list[Tool] | None = None
+        self.summarize_instruction = summarize_instruction
+        self.provided_files = provided_files
     def _prepare_tools(self, related_file_goal_item_desc):
         tool_rd = read_directory_tool(repo_path=self.repo_path)
@@ -67,7 +72,8 @@ class CollectionTask(AgentTask):
             llm=self.llm,
             repo_path=self.repo_path,
             output_callback=self.step_callback,
-            db=self.summary_file_db,
+            db=self.summarized_files_db,
+            summaize_instruction=self.summarize_instruction,
         )
         tool_rf = read_file_tool(repo_path=self.repo_path)
         tool_cf = check_file_related_tool(
@@ -75,6 +81,8 @@ class CollectionTask(AgentTask):
             repo_path=self.repo_path,
             goal_item_desc=related_file_goal_item_desc,
             output_callback=self.step_callback,
+            summarize_instruction=self.summarize_instruction,
+            summarized_files_db=self.summarized_files_db,
         )
         self.tools = [tool_rd, tool_sum, tool_rf, tool_cf]
         self.custom_tools = [
@@ -99,13 +107,15 @@ class CollectionTask(AgentTask):
                 description=tool_cf.__class__.__doc__,
             ),
         ]
-        self.custom_tools.append(CustomPythonAstREPLTool())
+        # self.custom_tools.append(CustomPythonAstREPLTool())
     def _initialize(self):
         # initialize the 2-level file structure of the repo
         if not os.path.exists(self.repo_path):
             raise ValueError(f"Repository path {self.repo_path} does not exist.")
-        files = read_directory(self.repo_path, os.path.join(self.repo_path, ".gitignore"))
+        files = self.provided_files
+        if files is None:
+            files = read_directory(self.repo_path, os.path.join(self.repo_path, ".gitignore"))
         file_pairs = [(f, get_file_type(os.path.join(self.repo_path, f)).value) for f in files]
         self.repo_structure = ""
         for f, f_type in file_pairs:

bioguider/agents/collection_task_utils.py CHANGED Viewed

@@ -4,13 +4,17 @@ from langchain.prompts import ChatPromptTemplate
 from langchain_openai.chat_models.base import BaseChatOpenAI
 from langchain_core.messages import AIMessage
 from pydantic import BaseModel, Field
+import logging
 from bioguider.agents.agent_tools import agent_tool
 from bioguider.agents.agent_utils import read_file, summarize_file
 from bioguider.agents.peo_common_step import PEOWorkflowState
 from bioguider.agents.common_agent import CommonAgent
 from bioguider.agents.common_agent_2step import CommonAgentTwoSteps
+from bioguider.database.summarized_file_db import SummarizedFilesDb
+from bioguider.utils.constants import MAX_FILE_LENGTH
+logger = logging.getLogger(__name__)
 class CollectionWorkflowState(TypedDict):
     llm: Optional[BaseChatOpenAI]
@@ -46,20 +50,22 @@ Does this file appear to contain related information?
 ---
-### **Output Format:**
-Respond with a single word: "Yes" or "No" to indicate whether the file is related to the goal item.
-Do not include any additional text, explanation, or formatting.
+### **Output Format:**
+Respond with exactly two parts:
+1. A single word: Yes or No (indicating if the file meets the goal criteria)
+2. One brief explanatory sentence.
+For example: Yes. This file is a compiled binary file, so, it is related to the compiled standalone file (goal item).
 """)
 class CheckFileRelatedResult(BaseModel):
-    is_related: bool = Field(description="True if the file is related to the goal item, False otherwise.")
+    is_related: str = Field(description="A string conclusion specify if the provided file is related. The string value contains two parts:\n 1. A single word: Yes or No (indicating if the file meets the goal criteria).\n 2. One brief explanatory sentence.")
 class check_file_related_tool(agent_tool):
     """ Check if the file is related to the goal item
 Args:
     file_path str: file path
 Returns:
-    bool: True if the file is related to the goal item, False otherwise.
+    str: A string conclusion. The string conclusion contains two parts:\n 1. A single word: Yes or No (indicating if the file meets the goal criteria).\n 2. One brief explanatory sentence.
     """
     def __init__(
         self,
@@ -67,23 +73,51 @@ Returns:
         repo_path: str,
         goal_item_desc: str,
         output_callback: Callable | None = None,
+        summarize_instruction: str | None = None,
+        summarize_level: int | None = 6,
+        summarized_files_db: SummarizedFilesDb | None = None,
     ):
         super().__init__(llm=llm, output_callback=output_callback)
         self.repo_path = repo_path
         self.goal_item_desc = goal_item_desc
+        self.summarize_instruction = summarize_instruction \
+            if summarize_instruction is not None else "N/A"
+        self.summarize_level = summarize_level
+        self.summarized_files_db = summarized_files_db
     def run(self, file_path: str) -> str:
         if not self.repo_path in file_path:
             file_path = os.path.join(self.repo_path, file_path)
         if not os.path.isfile(file_path):
             return "Can't read file"
-        file_content = read_file(file_path)
-        if file_content is None:
+        check_prompts = None
+        try:
+            file_content = read_file(file_path)
+        except UnicodeDecodeError as e:
+            logger.error(str(e))
+            check_prompts = "Can't summarize binary file, please decide according to file name and extension."
+        except Exception as e:
+            logger.error(str(e))
+            check_prompts = "Failed to summarize file, please decide according to file name and extension."
+        if check_prompts is None and file_content is None:
             return "Failed to read file"
-        summarized_content, token_usage = summarize_file(self.llm, file_path, file_content, 6)
-        if summarized_content is None:
-            return "Failed to summarize file"
-        self._print_token_usage(token_usage)
+        if check_prompts is not None:
+            summarized_content = check_prompts
+        else:
+            if len(file_content) > MAX_FILE_LENGTH:
+                file_content = file_content[:MAX_FILE_LENGTH]
+            summarized_content, token_usage = summarize_file(
+                llm=self.llm,
+                name=file_path,
+                content=file_content,
+                level=self.summarize_level,
+                summary_instructions=self.summarize_instruction,
+                db=self.summarized_files_db,
+            )
+            if summarized_content is None:
+                return "Failed to summarize file"
+            self._print_token_usage(token_usage)
         prompt = CHECK_FILE_RELATED_USER_PROMPT.format(
             goal_item_desc=self.goal_item_desc,
@@ -102,8 +136,5 @@ Returns:
         self._print_step_output(step_output=reasoning)
         self._print_token_usage(token_usage)
-        if out:
-            return "Yes, the file is related to the goal item."
-        else:
-            return "No, the file **is not** related to the goal item."
+        return res.is_related

bioguider/agents/dockergeneration_task.py CHANGED Viewed

@@ -47,7 +47,7 @@ class DockerGenerationTask(AgentTask):
     def __init__(
         self,
         llm,
-        step_callback = None
+        step_callback = None,
     ):
         super().__init__(llm, step_callback)
         self.repo_path: str | None = None

bioguider/agents/evaluation_installation_task.py CHANGED Viewed

@@ -9,7 +9,8 @@ from pydantic import BaseModel, Field
 from markdownify import markdownify as md
 from bioguider.agents.agent_utils import read_file
-from bioguider.agents.prompt_utils import EVALUATION_INSTRUCTION
+from bioguider.agents.collection_task import CollectionTask
+from bioguider.agents.prompt_utils import EVALUATION_INSTRUCTION, CollectionGoalItemEnum
 from bioguider.utils.constants import DEFAULT_TOKEN_USAGE, ProjectMetadata
 from bioguider.rag.data_pipeline import count_tokens
 from .common_agent_2step import CommonAgentTwoSteps, CommonAgentTwoChainSteps
@@ -32,14 +33,17 @@ Your task is to analyze the provided files related to installation and generate
 1. **Installation Available**: Is the installation section in document (like README.md or INSTALLATION)?
    * Output: `Yes` or `No`
-2. **Installation Tutorial**: Is the installation tutorial provided?
+2. **Installation Tutorial**: Is the step-by-step installation tutorial provided?
    * Ouput: `Yes` or `No`
 3. **Number of required Dependencies Installation**: The number of dependencies that are required to install
    * Output: Number
    * Suggest specific improvements if necessary, such as missing dependencies
-4. **Overall Score**: Give an overall quality rating of the Installation information.
+4. **Compatible Operating System**: Is the compatible operating system described?
+   * Output: `Yes` or `No`
+5. **Overall Score**: Give an overall quality rating of the Installation information.
    * Output: `Poor`, `Fair`, `Good`, or `Excellent`
 ---
@@ -53,6 +57,7 @@ Your final report must **exactly match** the following format. Do not add or omi
 **Dependency:**
   * number: [Number]
   * suggestions: <suggestion to improve **dependency information** like missing dependencies
+**Compatible Operating System:** [Yes / No]
 **Overall Score:** [Poor / Fair / Good / Excellent]
 ---
@@ -113,6 +118,7 @@ class StructuredEvaluationInstallationResult(BaseModel):
     install_tutorial: Optional[bool]=Field(description="A boolean value. Is the installation tutorial provided?")
     dependency_number: Optional[int]=Field(description="A number. It is the number of dependencies that are required to install.")
     dependency_suggestions: Optional[str]=Field(description="A string value. It is the specific improvements if necessary, such as missing dependencies")
+    compatible_os: Optional[bool]=Field(description="A boolean value. Is compatible operating system described?")
     overall_score: Optional[str]=Field(description="A overall scroll for the installation quality, could be `Poor`, `Fair`, `Good`, or `Excellent`")
 class EvaluationInstallationResult(BaseModel):
@@ -163,8 +169,9 @@ class EvaluationInstallationTask(EvaluationTask):
         gitignore_path,
         meta_data = None,
         step_callback = None,
+        summarized_files_db = None,
     ):
-        super().__init__(llm, repo_path, gitignore_path, meta_data, step_callback)
+        super().__init__(llm, repo_path, gitignore_path, meta_data, step_callback, summarized_files_db)
         self.evaluation_name = "Installation Evaluation"
@@ -235,7 +242,7 @@ class EvaluationInstallationTask(EvaluationTask):
         }
         return evaluation, token_usage
-    def _evaluate(self, files: list[str] | None = None) -> tuple[dict | None, dict]:
+    def _evaluate(self, files: list[str] | None = None) -> tuple[dict | None, dict, list[str]]:
         evaluation, token_usage = self._free_evaluate(files)
         structured_evaluation, structured_token_usage = self._structured_evaluate(files)
@@ -245,5 +252,20 @@ class EvaluationInstallationTask(EvaluationTask):
         }
         total_token_usage = increase_token_usage(token_usage, structured_token_usage)
-        return combined_evaluation, total_token_usage
+        return combined_evaluation, total_token_usage, files
+    def _collect_files(self):
+        task = CollectionTask(
+            llm=self.llm,
+            step_callback=self.step_callback,
+        )
+        task.compile(
+            repo_path=self.repo_path,
+            gitignore_path=Path(self.repo_path, ".gitignore"),
+            db=self.summarized_files_db,
+            goal_item=CollectionGoalItemEnum.Installation.name,
+        )
+        files = task.collect()
+        if files is None:
+            return []
+        return files

bioguider/agents/evaluation_readme_task.py CHANGED Viewed

@@ -7,6 +7,7 @@ from langchain_openai.chat_models.base import BaseChatOpenAI
 from pydantic import BaseModel, Field
 from bioguider.agents.prompt_utils import EVALUATION_INSTRUCTION
+from bioguider.utils.gitignore_checker import GitignoreChecker
 from ..utils.pyphen_utils import PyphenReadability
 from bioguider.agents.agent_utils import increase_token_usage, read_file, summarize_file
@@ -303,9 +304,10 @@ class EvaluationREADMETask(EvaluationTask):
         repo_path: str,
         gitignore_path: str,
         meta_data: ProjectMetadata | None = None,
-        step_callback: Callable | None = None
+        step_callback: Callable | None = None,
+        summarized_files_db = None,
     ):
-        super().__init__(llm, repo_path, gitignore_path, meta_data, step_callback)
+        super().__init__(llm, repo_path, gitignore_path, meta_data, step_callback, summarized_files_db)
         self.evaluation_name = "README Evaluation"
     def _structured_evaluate(self, free_readme_evaluations: dict[str, dict]):
@@ -455,7 +457,7 @@ class EvaluationREADMETask(EvaluationTask):
             total_token_usage = increase_token_usage(total_token_usage, token_usage)
         return readme_evaluations, total_token_usage
-    def _evaluate(self, files: list[str]) -> tuple[dict, dict]:
+    def _evaluate(self, files: list[str]) -> tuple[dict, dict, list[str]]:
         free_readme_evaluations, free_token_usage = self._free_evaluate(files)
         structured_readme_evaluations, structured_token_usage = self._structured_evaluate(free_readme_evaluations)
@@ -472,6 +474,26 @@ class EvaluationREADMETask(EvaluationTask):
         total_token_usage = increase_token_usage(free_token_usage, structured_token_usage)
-        return combined_evaluations, total_token_usage
+        return combined_evaluations, total_token_usage, files
+    def _collect_files(self):
+        """
+        Search for a README file in the repository directory.
+        """
+        possible_readme_files = [
+            "readme.md",
+            "readme.rst",
+            "readme.txt",
+            "readme",
+        ]
+        repo_path = self.repo_path
+        gitignore_path = Path(repo_path, ".gitignore")
+        gitignore_checker = GitignoreChecker(
+            directory=repo_path, gitignore_path=gitignore_path
+        )
+        found_readme_files = gitignore_checker.check_files_and_folders(
+            check_file_cb=lambda root_dir, relative_path: Path(relative_path).name.lower() in possible_readme_files,
+        )
+        return found_readme_files

bioguider/agents/evaluation_submission_requirements_task.py ADDED Viewed

@@ -0,0 +1,153 @@
+from typing import Optional
+from pydantic import BaseModel, Field
+from bioguider.agents.agent_utils import try_parse_json_object, try_parse_with_llm
+from bioguider.agents.evaluation_task import EvaluationTask
+from bioguider.agents.collection_task import CollectionTask
+from bioguider.agents.identification_task import IdentificationTask
+from bioguider.agents.prompt_utils import CollectionGoalItemEnum
+from bioguider.agents.evaluation_installation_task import StructuredEvaluationInstallationResult
+from bioguider.utils.constants import DEFAULT_TOKEN_USAGE
+DEMO_INSTRUCTION_GOAL = """
+1. Identify if it provides the instructions to run on provided data
+2. Identify if it provides the instructions to run on custom data
+3. Identify if it provides the expected output
+"""
+DEMO_INSTRUCTION_FINAL_ANSWER = \
+'{{"run_on_data_instruction": <True or False>, "run_on_custom_instruction": <True or False>, "expected_output_description": <True Or False>}}'
+class DemoInstructionsResult(BaseModel):
+    run_on_data_instruction: Optional[bool] = Field(description="A boolean value. Does it provide instructions on how to run on provided data?")
+    run_on_custom_instruction: Optional[bool] = Field(description="A boolean value. Does it provide instructions on how to run on custom data?")
+    expected_output_description: Optional[bool] = Field(description="A boolean value. Does it provide the description of expected output?")
+class EvaluationSubmissionRequirementsTask(EvaluationTask):
+    def __init__(
+        self,
+        llm,
+        repo_path,
+        gitignore_path,
+        meta_data = None,
+        step_callback = None,
+        summarized_files_db = None,
+        readme_files_evaluation: dict | None = None,
+        installation_evaluation: dict | None = None,
+        installation_files: list[str] | None = None
+    ):
+        super().__init__(llm, repo_path, gitignore_path, meta_data, step_callback, summarized_files_db)
+        self.evaluation_name = "Submission Requirements Evaluation"
+        self.readme_files_evaluation = readme_files_evaluation
+        self.installation_evaluation = installation_evaluation
+        self.installation_files = installation_files
+    def _collect_software_package_content(self):
+        collection_task = CollectionTask(
+            llm = self.llm,
+            step_callback=self.step_callback,
+            summarize_instruction="We are collecting compiled standalone files, source code files and example data files.",
+            summarized_files_db=self.summarized_files_db,
+        )
+        collection_task.compile(
+            repo_path=self.repo_path,
+            gitignore_path=self.gitignore_path,
+            db=self.summarized_files_db,
+            goal_item=CollectionGoalItemEnum.SoftwarePackageContent.name,
+        )
+        files = collection_task.collect()
+        return files
+    def _evaluate_software_package_content(self):
+        files = self._collect_software_package_content()
+        if len(files) == 3:
+            return {
+                "compiled_standalone_software": files[0].strip().lower() != "n/a",
+                "source_code": files[1].strip().lower() != "n/a",
+                "demo_dataset": files[2].strip().lower() != "n/a",
+            }, files
+        else:
+            return {
+                "compiled_standalone_software": False,
+                "source_code": False,
+                "demo_dataset": False,
+            }, files
+    def _evaluatie_demo_instructions(self):
+        readme_files = [f for f in self.readme_files_evaluation.keys() \
+                        if self.readme_files_evaluation[f]["evaluation"]["project_level"]]
+        installation_files = self.installation_files if self.installation_files is not None else []
+        provided_files = readme_files + installation_files
+        provided_files = provided_files if len(provided_files) > 0 else None
+        identify_task = IdentificationTask(
+            llm=self.llm,
+            step_callback=self.step_callback,
+            summarized_files_db=self.summarized_files_db,
+            provided_files=provided_files
+        )
+        identify_task.compile(
+            repo_path=self.repo_path,
+            gitignore_path=self.gitignore_path,
+        )
+        final_answer = identify_task.identify_customize_goal(
+            goal="demo instructions",
+            final_answer_example=DEMO_INSTRUCTION_FINAL_ANSWER,
+        )
+        final_answer = final_answer["final_answer"] \
+            if final_answer is not None and "final_answer" in final_answer else final_answer
+        parsed_obj = self._parse_demo_instruction_result(final_answer)
+        return parsed_obj, provided_files
+    def _parse_demo_instruction_result(self, result: str | dict):
+        if isinstance(result, dict):
+            return result
+        obj = try_parse_json_object(result)
+        if obj is None:
+            obj, token_usage = try_parse_with_llm(
+                llm=self.llm,
+                input_text=result,
+                schema=DemoInstructionsResult,
+            )
+            obj = vars(obj) if obj is not None else obj
+            self.print_step(token_usage=token_usage)
+            self.print_step(step_output=str(obj))
+        return obj
+    def _combine_evaluation(
+        self,
+        software_evaluation: dict,
+        demo_evaluation: dict,
+    ):
+        readme_files = [f for f in self.readme_files_evaluation.keys() \
+                        if self.readme_files_evaluation[f]["evaluation"]["project_level"]]
+        structured_install_evaluation: StructuredEvaluationInstallationResult = \
+            self.installation_evaluation["structured_evaluation"]
+        software_dependency = structured_install_evaluation.dependency_number > 0
+        install_tutorial = structured_install_evaluation.install_tutorial
+        license = any([self.readme_files_evaluation[f]["structured_evaluation"].license_score for f in readme_files])
+        return {
+            **software_evaluation,
+            **demo_evaluation,
+            "complete_readme": len(readme_files) > 0,
+            "software_dependency": software_dependency,
+            "install_tutorial": install_tutorial,
+            "license": license,
+        }
+    def _evaluate(self, files):
+        software_evaluation, software_files = self._evaluate_software_package_content()
+        demo_evaluation, demo_files = self._evaluatie_demo_instructions()
+        files = list(set(software_files + demo_files))
+        return self._combine_evaluation(software_evaluation, demo_evaluation), {**DEFAULT_TOKEN_USAGE}, files
+    def _collect_files(self):
+        return []

bioguider/agents/evaluation_task.py CHANGED Viewed

@@ -9,6 +9,7 @@ from langchain_openai.chat_models.base import BaseChatOpenAI
 from bioguider.agents.agent_utils import read_file
 from bioguider.agents.prompt_utils import EVALUATION_INSTRUCTION
+from bioguider.database.summarized_file_db import SummarizedFilesDb
 from bioguider.utils.constants import DEFAULT_TOKEN_USAGE, ProjectMetadata
 from .common_agent import CommonConversation
 from ..utils.pyphen_utils import PyphenReadability
@@ -158,7 +159,8 @@ class EvaluationTask(ABC):
         repo_path: str,
         gitignore_path: str,
         meta_data: ProjectMetadata | None = None,
-        step_callback: Callable | None = None
+        step_callback: Callable | None = None,
+        summarized_files_db: SummarizedFilesDb | None=None,
     ):
         self.evaluation_name = ""
         self.llm = llm
@@ -166,6 +168,8 @@ class EvaluationTask(ABC):
         self.gitignore_path = gitignore_path
         self.step_callback = step_callback
         self.metadata = meta_data
+        self.summarized_files_db = summarized_files_db
     def print_step(
         self,
         step_name: str | None = None,
@@ -180,11 +184,12 @@ class EvaluationTask(ABC):
             token_usage=token_usage,
         )
-    def evaluate(self, files: list[str] | None = None) -> dict:
+    def evaluate(self) -> dict:
         self._enter_evaluation()
-        evaluations, token_usage = self._evaluate(files)
+        files = self._collect_files()
+        evaluations, token_usage, files = self._evaluate(files)
         self._leave_evaluation(token_usage)
-        return evaluations
+        return evaluations, files
     def _enter_evaluation(self):
         self.print_step(step_name=self.evaluation_name)
@@ -196,6 +201,10 @@ class EvaluationTask(ABC):
     def _evaluate(self, files: list[str]) -> tuple[dict, dict]:
         pass
+    @abstractmethod
+    def _collect_files(self) -> list[str]:
+        pass
 EVALUATION_TUTORIAL_SYSTEM_PROMPT="""
 You are an expert in software documentation and developer education.
@@ -262,9 +271,10 @@ class EvaluationTutorialTask(EvaluationTask):
         repo_path: str,
         gitignore_path: str,
         meta_data: ProjectMetadata | None = None,
-        step_callback: Callable | None = None
+        step_callback: Callable | None = None,
+        summarized_files_db = None,
     ):
-        super().__init__(llm, repo_path, gitignore_path, meta_data, step_callback)
+        super().__init__(llm, repo_path, gitignore_path, meta_data, step_callback, summarized_files_db)
         self.evaluation_name = "Tutorial Evaluation"
     def _evaluate(self, files: list[str]) -> tuple[dict, dict]:
@@ -300,4 +310,7 @@ class EvaluationTutorialTask(EvaluationTask):
             self.print_step(step_output=response)
             evaluations[file] = response
         return evaluations, token_usage
+    def _collect_files(self):
+        return []

bioguider/agents/identification_observe_step.py CHANGED Viewed

@@ -34,6 +34,9 @@ Carefully review the **Goal**, **Repository File Structure**, and **Intermediate
   ```
 Be precise and support your reasoning with evidence from the input.
+### **Important Instructions**
+{important_instructions}
 ### Notes
 We are collecting information over multiple rounds, your thoughts and the output of this step will be persisted, so please **do not rush to provide a Final Answer**.
 If you find the current information insufficient, share your reasoning or thoughts instead—we’ll continue with the next round accordingly.
@@ -58,6 +61,8 @@ class IdentificationObserveStep(PEOCommonStep):
     def _prepare_system_prompt(self, state: IdentificationWorkflowState):
         goal = state["goal"]
+        important_instructions = "N/A" \
+            if not "observe_instructions" in state else state["observe_instructions"]
         final_answer_example = state["final_answer_example"]
         intermediate_output = self._build_intermediate_steps(state)
         prompt = ChatPromptTemplate.from_template(IDENTIFICATION_OBSERVATION_SYSTEM_PROMPT)
@@ -67,11 +72,12 @@ class IdentificationObserveStep(PEOCommonStep):
             repo_structure=self.repo_structure,
             intermediate_output=intermediate_output,
             final_answer_example=final_answer_example,
+            important_instructions=important_instructions,
         )
     def _execute_directly(self, state: IdentificationWorkflowState):
         system_prompt = self._prepare_system_prompt(state)
-        agent = CommonAgentTwoChainSteps(llm=self.llm)
+        agent = CommonAgentTwoSteps(llm=self.llm)
         res, _, token_usage, reasoning_process = agent.go(
             system_prompt=system_prompt,
             instruction_prompt="Now, let's begin.",

bioguider/agents/identification_plan_step.py CHANGED Viewed

@@ -36,6 +36,9 @@ meaning that states and variables will persisted through multiple rounds of plan
 developing your collection plan incrementally and reflect on the intermediate observations at each round, instead of coding up
 everything in one go. Be sure to take only one or two actions in each step.
+### **Important Instructions**
+{important_instructions}
 ### **Output**
 You plan should follow this format:
 Step: tool name, should be one of {tool_names}
@@ -81,6 +84,7 @@ class IdentificationPlanStep(PEOCommonStep):
     def _prepare_system_prompt(self, state: IdentificationWorkflowState) -> str:
         goal = state["goal"]
+        important_instructions = "N/A" if not "plan_instructions" in state else state["plan_instructions"]
         repo_structure = self.repo_structure
         intermdediate_steps = self._build_intermediate_steps(state)
         step_analysis, step_thoughts = self._build_intermediate_analysis_and_thoughts(state)
@@ -101,6 +105,7 @@ class IdentificationPlanStep(PEOCommonStep):
             intermediate_analysis=step_analysis,
             intermediate_thoughts=step_thoughts,
             tool_names=tool_names,
+            important_instructions=important_instructions,
         )
     def _convert_to_plan_actions_text(self, actions: list[dict]) -> str:
@@ -113,7 +118,7 @@ class IdentificationPlanStep(PEOCommonStep):
     def _execute_directly(self, state: IdentificationWorkflowState):
         system_prompt = self._prepare_system_prompt(state)
-        agent = CommonAgentTwoChainSteps(llm=self.llm)
+        agent = CommonAgentTwoSteps(llm=self.llm)
         res, _, token_usage, reasoning_process = agent.go(
             system_prompt=system_prompt,
             instruction_prompt="Now, let's begin.",

bioguider/agents/identification_task.py CHANGED Viewed

@@ -64,14 +64,17 @@ class IdentificationTask(AgentTask):
         self,
         llm: BaseChatOpenAI,
         step_callback: Callable | None=None,
+        summarized_files_db: SummarizedFilesDb | None = None,
+        provided_files: list[str] | None = None,
     ):
-        super().__init__(llm=llm, step_callback=step_callback)
+        super().__init__(llm=llm, step_callback=step_callback, summarized_files_db=summarized_files_db)
         self.repo_path: str | None = None
         self.gitignore_path: str | None = None
         self.repo_structure: str | None = None
         self.tools = []
         self.custom_tools = []
         self.steps: list[PEOCommonStep] = []
+        self.provided_files = provided_files
     def _prepare_tools(self):
         tool_rd = read_directory_tool(repo_path=self.repo_path)
@@ -79,7 +82,7 @@ class IdentificationTask(AgentTask):
             llm=self.llm,
             repo_path=self.repo_path,
             output_callback=self.step_callback,
-            db=self.summary_file_db,
+            db=self.summarized_files_db,
         )
         tool_rf = read_file_tool(repo_path=self.repo_path)
@@ -106,7 +109,9 @@ class IdentificationTask(AgentTask):
     def _initialize(self):
         if not os.path.exists(self.repo_path):
             raise ValueError(f"Repository path {self.repo_path} does not exist.")
-        files = read_directory(self.repo_path, os.path.join(self.repo_path, ".gitignore"))
+        files = self.provided_files
+        if files is None:
+            files = read_directory(self.repo_path, os.path.join(self.repo_path, ".gitignore"))
         file_pairs = [(f, get_file_type(os.path.join(self.repo_path, f)).value) for f in files]
         self.repo_structure = ""
         for f, f_type in file_pairs:
@@ -188,7 +193,21 @@ class IdentificationTask(AgentTask):
         meta_data = s["final_answer"] if "final_answer" in s else "unknown type"
         return self._parse_meta_data(meta_data)
+    def identify_customize_goal(
+        self,
+        goal: str,
+        final_answer_example: str,
+        plan_instructions: str = "N/A",
+        observe_instructions: str = "N/A",
+    ):
+        s = self._go_graph({
+            "goal": goal,
+            "final_answer_example": final_answer_example,
+            "plan_instructions": plan_instructions,
+            "observe_instructions": observe_instructions,
+        })
+        return s["final_answer"] if "final_answer" in s else None
     def _parse_project_type(self, proj_type_obj: str) -> ProjectTypeEnum:
         proj_type_obj = proj_type_obj.strip()
         the_obj = try_parse_json_object(proj_type_obj)

bioguider/agents/identification_task_utils.py CHANGED Viewed

@@ -10,6 +10,8 @@ class IdentificationWorkflowState(TypedDict):
     plan_actions: Optional[str]
     plan_reasoning: Optional[str]
+    plan_instructions: Optional[str]
+    observe_instructions: Optional[str]
     intermediate_steps: Optional[list[str]]
     final_answer: Optional[str]
     final_answer_example: Optional[str]

bioguider/agents/prompt_utils.py CHANGED Viewed

@@ -91,6 +91,7 @@ class CollectionGoalItemEnum(Enum):
     Installation = "Installation"
     License = "License"
     Contributing = "Contributing"
+    SoftwarePackageContent = "SoftwarePackageContent"
@@ -147,12 +148,18 @@ If **any one** of these is present, the document should be classified as a Docke
      * Examples: `example.py`, `main.py`, `demo.R`, `notebooks/get_started.ipynb`, etc.
      * These should be runnable with minimal configuration.""",
-        "important_instructions": """- Only include minimal code examples that demonstrate basic functionality.
+        "plan_important_instructions": """- Only include minimal code examples that demonstrate basic functionality.
 If multiple example files are found, select only the simplest and most lightweight one that is sufficient to verify the repository works.
  - Give priority to analyzing files whose names include **"install"** or **"Dockerfile"**, as these are most likely to be useful for generating our Dockerfile
  - The total number of collected files should **not exceed 5**.
  - Make sure to include **only one code example**, selecting the most minimal and representative one.
-"""
+""",
+        "observe_important_instructions": """- Only include minimal code examples that demonstrate basic functionality.
+If multiple example files are found, select only the simplest and most lightweight one that is sufficient to verify the repository works.
+ - Give priority to analyzing files whose names include **"install"** or **"Dockerfile"**, as these are most likely to be useful for generating our Dockerfile
+ - The total number of collected files should **not exceed 5**.
+ - Make sure to include **only one code example**, selecting the most minimal and representative one.
+""",
     },
     "Installation": {
         "goal_item": "Installation Instructions",
@@ -163,11 +170,16 @@ If **any one** of these is present, the document should be classified as Install
  - Configuration steps required to get the software running.
  - Troubleshooting tips related to installation issues.
  - You can include directory names if all files in the directory are relevant to the goal item.""",
-        "important_instructions": """- Give priority to analyzing README file that contain installation instructions and the files whose names include **"install"** or **"setup"**.
+        "plan_important_instructions": """ - Give priority to analyzing README file that contain installation instructions and the files whose names include **"install"** or **"setup"**.
 - If multiple files are found, select the most comprehensive one that covers the installation process.
 - The total number of collected files should **not exceed 3**.
 - Identify and select **no more than three** installation instruction files — choose the most comprehensive and representative ones.
-"""
+""",
+        "observe_important_instructions": """ - Give priority to analyzing README file that contain installation instructions and the files whose names include **"install"** or **"setup"**.
+- If multiple files are found, select the most comprehensive one that covers the installation process.
+- The total number of collected files should **not exceed 3**.
+- Identify and select **no more than three** installation instruction files — choose the most comprehensive and representative ones.
+""",
     },
     "License": {
         "goal_item": "License Information",
@@ -187,6 +199,34 @@ If **any one** of these is present, the document should be classified as Contrib
  - Any file that contains instructions for developers on how to contribute to the project, including coding standards, testing procedures, and submission processes.
  - You can include directory names if all files in the directory are relevant to the goal item.""",
     },
+    "SoftwarePackageContent": {
+        "goal_item": "Software Package Content",
+        "related_file_description": """A file qualifies as **Software Package Content** if it meets **at least one** of the following elements.
+ - A compiled binary file that may be qualified as a compiled standalone software, please carefully analyze a binary file and its file name to identify if it is a compiled standalone software
+ - A source code file, like a file whose extension is `.py`, `.R`, `.ipynb`, `.ts`, or `.js`.
+ - An example data which is used to demonstrate usage or for tutorial. Image file should not be considered as example data.
+""",
+        "plan_important_instructions": """ - A comiled standalone software file is non-textual and appears to be in an executable format (e.g., `.exe`, `.dll`, `.so`, `.bin`, `.elf`).
+ - A comiled standalone software file **is not a script or compiled library**, that is, It is not a wrapper script (e.g., shell, Python, Python notebook or Rmd) nor a dynamic/shared library meant for linking.
+ - When identifying source code file, prioritize analyzing the file's **extension** and **file name** and try to avoid reading file, using check_file_related_tool or summarizing file content.
+ - When identifying example data, prioritize analyzing the file's **extension** (like .dat, .csv, .fastq, and so on) and **file name** (like example_data.txt, example.dat, and so on). If extension/name is ambiguous, use summarizing file content to decide.
+ - **Note**: You **only need to detect** whether at least **one** compiled standalone software file, **one** source code file and **one** example data file exist — no need to list all such files.
+""",
+        "observe_important_instructions": """ - A comiled standalone software file is non-textual and appears to be in an executable format (e.g., `.exe`, `.dll`, `.so`, `.bin`, `.elf`).
+ - A comiled standalone software file **is not a script or compiled library**, that is, It is not a wrapper script (e.g., shell, Python, Python notebook or Rmd) nor a dynamic/shared library meant for linking.
+ - When identifying source code file, prioritize analyzing the file's **extension** and **file name** and try to avoid reading file, using check_file_related_tool or summarizing file content.
+ - When identifying example data, prioritize analyzing the file's **extension** (like .dat, .csv, .fastq, and so on) and **file name** (like example_data.txt, example.dat, and so on). If extension/name is ambiguous, use summarizing file content to decide.
+ - **Note**: You **only need to detect** whether at least **one** compiled standalone software file, **one** source code file and **one** example data file exist — no need to list all such files.
+ - **Final answer format**: If you believe **all relevant files** have been collected:
+   Your final answer **must exactly match** the following format:
+   **FinalAnswer:** {{"final_answer": [<N/A or a compiled filename>, <N/A or a source file name>, <N/A or a example data file name>]}}
+   For each category, return a single file name or `"N/A"` if none found. And the return array must exactly follow this order: [<A comiled standalone software file name>, <A source code file name>, <A example data file name>]
+   For example, **FinalAnswer:** {{"final_answer": ["N/A", "app.py", "example.csv"]}} indicates:
+   * No compiled standalone software found
+   * `app.py` found as source code
+   * `example.csv` found as example data
+""",
+    },
 }

bioguider/managers/evaluation_manager.py CHANGED Viewed

@@ -11,6 +11,7 @@ from ..utils.file_utils import parse_repo_url
 from ..database.summarized_file_db import SummarizedFilesDb
 from ..agents.evaluation_readme_task import EvaluationREADMETask
 from ..agents.evaluation_installation_task import EvaluationInstallationTask
+from ..agents.evaluation_submission_requirements_task import EvaluationSubmissionRequirementsTask
 from ..agents.collection_task import CollectionTask
 class EvaluationManager:
@@ -65,69 +66,60 @@ class EvaluationManager:
             gitignore_path=Path(self.rag.repo_dir, ".gitignore"),
             meta_data=self.project_metadata,
             step_callback=self.step_callback,
+            summarized_files_db=self.summary_file_db,
         )
-        readme_files = self._find_readme_files()
-        results = task.evaluate(readme_files)
+        # readme_files = self._find_readme_files()
+        results, readme_files = task.evaluate()
         return results, readme_files
     def evaluate_tutorial(self):
-        task = CollectionTask(
-            llm=self.llm,
-            step_callback=self.step_callback,
-        )
-        task.compile(
-            repo_path=self.rag.repo_dir,
-            gitignore_path=Path(self.rag.repo_dir, ".gitignore"),
-            db=self.summary_file_db,
-            goal_item=CollectionGoalItemEnum.Tutorial.name,
-        )
-        s = task.collect()
-        if s is None or 'final_answer' not in s:
-            return None
+        pass
+        # task = CollectionTask(
+        #     llm=self.llm,
+        #     step_callback=self.step_callback,
+        # )
+        # task.compile(
+        #     repo_path=self.rag.repo_dir,
+        #     gitignore_path=Path(self.rag.repo_dir, ".gitignore"),
+        #     db=self.summary_file_db,
+        #     goal_item=CollectionGoalItemEnum.Tutorial.name,
+        # )
+        # s = task.collect()
+        # if s is None or 'final_answer' not in s:
+        #     return None
     def evaluate_installation(self):
-        task = CollectionTask(
+        evaluation_task = EvaluationInstallationTask(
             llm=self.llm,
-            step_callback=self.step_callback,
-        )
-        task.compile(
             repo_path=self.rag.repo_dir,
             gitignore_path=Path(self.rag.repo_dir, ".gitignore"),
-            db=self.summary_file_db,
-            goal_item=CollectionGoalItemEnum.Installation.name,
+            meta_data=self.project_metadata,
+            step_callback=self.step_callback,
         )
-        files = task.collect()
-        if files is None or len(files) == 0:
-            return None
-        evaluation_task = EvaluationInstallationTask(
+        evaluation, files = evaluation_task.evaluate()
+        return evaluation, files
+    def evaluate_submission_requirements(
+        self,
+        readme_files_evaluation: dict | None = None,
+        installation_files: list[str] | None = None,
+        installation_evaluation: dict[str] | None = None,
+    ):
+        evaluation_task = EvaluationSubmissionRequirementsTask(
             llm=self.llm,
             repo_path=self.rag.repo_dir,
             gitignore_path=Path(self.rag.repo_dir, ".gitignore"),
             meta_data=self.project_metadata,
             step_callback=self.step_callback,
+            summarized_files_db=self.summary_file_db,
+            readme_files_evaluation=readme_files_evaluation,
+            installation_files=installation_files,
+            installation_evaluation=installation_evaluation,
         )
-        evaluation = evaluation_task.evaluate(files)
+        evaluation, files = evaluation_task.evaluate()
         return evaluation, files
-    def _find_readme_files(self) -> list[str]:
-        """
-        Search for a README file in the repository directory.
-        """
-        possible_readme_files = [
-            "readme.md",
-            "readme.rst",
-            "readme.txt",
-            "readme",
-        ]
-        repo_path = self.rag.repo_dir
-        gitignore_path = Path(repo_path, ".gitignore")
-        gitignore_checker = GitignoreChecker(
-            directory=repo_path, gitignore_path=gitignore_path
-        )
-        found_readme_files = gitignore_checker.check_files_and_folders(
-            check_file_cb=lambda root_dir, relative_path: Path(relative_path).name.lower() in possible_readme_files,
-        )
-        return found_readme_files

bioguider/utils/constants.py CHANGED Viewed

@@ -38,3 +38,5 @@ class ProjectMetadata:
         self.description = description
         self.license = license
+MAX_FILE_LENGTH=10 *1024 # 10K
+MAX_SENTENCE_NUM=20

{bioguider-0.2.12.dist-info → bioguider-0.2.13.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: bioguider
-Version: 0.2.12
+Version: 0.2.13
 Summary: An AI-Powered package to help biomedical developers to generate clear documentation
 License: MIT
 Author: Cankun Wang

{bioguider-0.2.12.dist-info → bioguider-0.2.13.dist-info}/RECORD RENAMED Viewed

@@ -1,49 +1,50 @@
 bioguider/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 bioguider/agents/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-bioguider/agents/agent_task.py,sha256=FrWCq_mG-Oo745qcZT3Lai4rd8hQ5IGK3jMNe1vFQrs,2820
-bioguider/agents/agent_tools.py,sha256=YWF44vGjTzK0H9dxfdZyJ5K2H4z2j1bz-Q0bVw1UoE8,7014
-bioguider/agents/agent_utils.py,sha256=FxZsssGapnHe0zruopfuBcctkEHL0zaA9So7dJvvtAg,13671
+bioguider/agents/agent_task.py,sha256=B_QGkoA96GdrYSX29TQ_tct_y8z4zS4l5Cn0-eph88k,2863
+bioguider/agents/agent_tools.py,sha256=r21wHV6a-Ic2T0dk4YzA-_d7PodHPM3GzRxJqv-llSw,7286
+bioguider/agents/agent_utils.py,sha256=FFFhZvL_THvZ2Y7QwFmi3jsUd1i7PYi9tPJNMTVUsgY,14702
 bioguider/agents/collection_execute_step.py,sha256=Ev4BLjjmBdsc52M1zrq7QK8g7fsffDkSxu-jN2rvedw,5614
-bioguider/agents/collection_observe_step.py,sha256=iNeV6f16Emk1LMStSR4FXBPZ6Sc0eTjwxEfmoeegV-U,4554
-bioguider/agents/collection_plan_step.py,sha256=mx-_5Y3pqKDPBaMMyFElKlpq1GWN7g03ZplnlTr9ppE,5699
-bioguider/agents/collection_task.py,sha256=CLtPOqhlOgAfysMX2WYiGs3_O9W7qp3kh0wck6COiac,7304
-bioguider/agents/collection_task_utils.py,sha256=WRzzpMV6r8aY0FlX_zroHbLDXyrmvS48OSiBr_fIq2Q,3677
+bioguider/agents/collection_observe_step.py,sha256=N_P5NadFa0usO0M9cXXJvKJoZofwcDW0cPSfnwvPEO4,4786
+bioguider/agents/collection_plan_step.py,sha256=Nn0f8AOkEDCDtnhaqE7yCQoi7PVpsHmiUcsIqC0T0dQ,5956
+bioguider/agents/collection_task.py,sha256=ZLUxDgh8OkY5INMfC283RdxEYCZJTksoLDNEMfKg-3s,7865
+bioguider/agents/collection_task_utils.py,sha256=CmOh3HZEocuLj5VZCkLbD6P8O5tzyuyFa8Ykd-1GPGE,5356
 bioguider/agents/common_agent.py,sha256=eGs8m8bjO0dmW6lDIen7DQNdWdHD7j8Udf3XhL1k6vI,5242
 bioguider/agents/common_agent_2step.py,sha256=Vton0RKtmMyEgIIFhnBk4CFU_hynX0LvwREcZ9kvMxQ,7918
 bioguider/agents/common_step.py,sha256=GdOCbmj1pwh4etg-futVFYVDQuoUG89DnIrw-B6QbzM,2594
 bioguider/agents/dockergeneration_execute_step.py,sha256=F92jDlkc6KjAvTkX7q1FsCYP8J15SCaNgmwh3YPqfDo,6500
 bioguider/agents/dockergeneration_observe_step.py,sha256=93PO_Y4YyUShVTKRt0nErcjb-xYTcwcZCj7TgniS9t4,6098
 bioguider/agents/dockergeneration_plan_step.py,sha256=SB8tQM9PkIKsD2o1DFD7bedcxz6r6hSy8n_EVK60Fz0,7235
-bioguider/agents/dockergeneration_task.py,sha256=ezsweVHJsFpOyOI6rYMt1DZ3PE19dcq4J3Lm-d0IA8M,6220
+bioguider/agents/dockergeneration_task.py,sha256=mYmorLKnJ-Jku3Qq_Y_kcSTsbYIo3RiVdD0puxqXY5Q,6221
 bioguider/agents/dockergeneration_task_utils.py,sha256=v7emqrJlVW-A5ZdLmPSdiaMSKCR8uzy9UYzx_1cgzyo,9041
-bioguider/agents/evaluation_installation_task.py,sha256=dtxm6P1E7jitA27K3Sc15vSQaW-iaedI14_vdriLZ8k,10215
-bioguider/agents/evaluation_readme_task.py,sha256=1sHUOj4Iiodg7M6YsjYfrFRGrNwsnzMYuVPpyqC-WGw,21740
-bioguider/agents/evaluation_task.py,sha256=e-yJWhty9hvlvWaMYRoSoZs6Sjq9eLzBxtJstYaEIKY,12261
+bioguider/agents/evaluation_installation_task.py,sha256=9AVE5PJB69aoDX0WMkye0UkYstSU2CsjzVB3eaPrLQo,11128
+bioguider/agents/evaluation_readme_task.py,sha256=_v7ESqMurOg4UXCGqc1zmaVscBx3QbznrUdAKQH9Zws,22597
+bioguider/agents/evaluation_submission_requirements_task.py,sha256=rFCI6bTl64kRiUkEwbh6Ef1LV-YqrgJhGkHaaqE_Pp8,6647
+bioguider/agents/evaluation_task.py,sha256=4VZ7l8oqcUsgJ2YY6s6mkcJu25DgA1qLXv2kVUm2SgI,12654
 bioguider/agents/identification_execute_step.py,sha256=w3IjL8f2WiHCyiLjVSoySnIAXpi1-hK1DLKCnXbAN2Y,5587
-bioguider/agents/identification_observe_step.py,sha256=j4Fniv86jljkClTFc-p3pA39_zxhGJLPS9K7jNpxhJ0,3750
-bioguider/agents/identification_plan_step.py,sha256=p0BKziXdB4ph4D_T9FU5bH8CbHD5Gv0YuszMds_xh-Y,5224
-bioguider/agents/identification_task.py,sha256=qJ46FdmctibXIzO4C2wBwXR7VLHUksBtFiILH2eIHB4,9277
-bioguider/agents/identification_task_utils.py,sha256=5gevknha9hJiiQN5L7Yp9-pyhAlbR_j31aGRK5j0D_w,522
+bioguider/agents/identification_observe_step.py,sha256=U-iWDR1AZIUpthEswtMbMkPK4YAbAv2SrvBJAqdKyZo,3988
+bioguider/agents/identification_plan_step.py,sha256=owsTK1NZIuiZL7QPVknJyp9TBRK-mhnuf2RwK4YzaxU,5442
+bioguider/agents/identification_task.py,sha256=hVhgExCv1OPMgOYOGRRJyR3-uiG-VR-OgrvWT6vLn9M,10031
+bioguider/agents/identification_task_utils.py,sha256=nWRK3kCyiglw7576idiDGXEzUBBInkz_w9OsK6OJv2E,599
 bioguider/agents/peo_common_step.py,sha256=iw2c1h7X11WJzSE2tSRg0UAoXH0QOlQDxW9CCzSVMOY,2677
-bioguider/agents/prompt_utils.py,sha256=qjHEvqyHLazGAc3PEx_-QN3rCy2-WYnC3mbUigwPtEM,12530
+bioguider/agents/prompt_utils.py,sha256=M-KUqGPhtOyFlDN1yNNZdOxTOPFjACF5VhBFW7gXgSc,17151
 bioguider/agents/python_ast_repl_tool.py,sha256=o7-4P1h8jS8ikhGSA4CI_OWQ2a0Eg5tEdmuAp_qrO-0,2519
 bioguider/agents/rag_collection_task.py,sha256=r_jPAMjQcC7dIydKxX77UuMqjJ3MiVKswNZ-yNw7yx8,5199
 bioguider/conversation.py,sha256=DIvk_d7pz_guuORByK1eaaF09FAK-8shcNTrbSUHz9Y,1779
 bioguider/database/summarized_file_db.py,sha256=tDSi2iCvm2-lrx0rBJo0C11gYl9FswsDZTG2-Yhu5cE,4646
-bioguider/managers/evaluation_manager.py,sha256=czLjC3Cl6Xb1R2-sKFUHUNVle_G3O-g66x0-LISdP_w,4917
+bioguider/managers/evaluation_manager.py,sha256=O8mxrAGllDIkcQVsrRrqxH0eyJHwtoSauWrPe_F7qqU,4778
 bioguider/rag/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 bioguider/rag/config.py,sha256=5g4IqTzgyfZfax9Af9CTkXShgItPOt4_9TEMSekCPik,4602
 bioguider/rag/data_pipeline.py,sha256=bkJ2IUCgPx_OL2uZtPd6cIBor2VFZEIfGd5kVlmiPjw,27292
 bioguider/rag/embedder.py,sha256=jofR8hOj3Aj2IyBQ9y6FeAc84tgq5agbIfCGyFxYpJ8,650
 bioguider/rag/rag.py,sha256=JFPwrJlKDSyd3U3Gce_NSxI5343eNUbqPG9Fs5Pfoq0,4696
 bioguider/settings.py,sha256=BD_iz9aYarxmWUl0XaKl4-D4oTXMhFzljsXLNn2phis,3143
-bioguider/utils/constants.py,sha256=_xMAhwE3py2RR0pIimnb2qfucXdnTj4ZNeKGACouh2w,932
+bioguider/utils/constants.py,sha256=4PbzF-s49M0nNtGsLjxQ9-APaJqNAjCQrE0wunSvPqw,982
 bioguider/utils/default.gitignore,sha256=XjPdyO2KV8z8iyuqluaNR_70tBQftMpyKL8HboVNyeI,1605
 bioguider/utils/file_utils.py,sha256=9VfAHsz1UkFPtzAmvWZvPl1TMaKIYNjNlLgsfB8tNjg,3683
 bioguider/utils/gitignore_checker.py,sha256=pOYUwsS9D5014LxcZb0cj3s2CAYaD2uF_pYJpaNKcho,6532
 bioguider/utils/pyphen_utils.py,sha256=cdZc3qphkvMDeL5NiZ8Xou13M_uVNP7ifJ-FwxO-0BE,2680
 bioguider/utils/utils.py,sha256=YP3HXgU_rvYDWkEcTzWGiYZw-mlfVrqGhUGSc0_4Pms,900
-bioguider-0.2.12.dist-info/LICENSE,sha256=qzkvZcKwwA5DuSuhXMOm2LcO6BdEr4V7jwFZVL2-jL4,1065
-bioguider-0.2.12.dist-info/METADATA,sha256=cDnfYHJuXvhP7lWHl7Y2Sb0BCBVZOzwGd-lmxC0-pVc,1868
-bioguider-0.2.12.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
-bioguider-0.2.12.dist-info/RECORD,,
+bioguider-0.2.13.dist-info/LICENSE,sha256=qzkvZcKwwA5DuSuhXMOm2LcO6BdEr4V7jwFZVL2-jL4,1065
+bioguider-0.2.13.dist-info/METADATA,sha256=HpVwAdlrLxjQ5JVIm-prHV33vHyxPHi3IID4x8l-l_c,1868
+bioguider-0.2.13.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
+bioguider-0.2.13.dist-info/RECORD,,

{bioguider-0.2.12.dist-info → bioguider-0.2.13.dist-info}/LICENSE RENAMED Viewed

File without changes

{bioguider-0.2.12.dist-info → bioguider-0.2.13.dist-info}/WHEEL RENAMED Viewed

File without changes

bioguider 0.2.12__py3-none-any.whl → 0.2.13__py3-none-any.whl

Potentially problematic release.

bioguider 0.2.12py3-none-any.whl → 0.2.13py3-none-any.whl