PyPI - bioguider - Versions diffs - 0.2.3__py3-none-any.whl - Mend

bioguider 0.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of bioguider might be problematic. Click here for more details.

Files changed (47) hide show

bioguider/__init__.py +0 -0
bioguider/agents/__init__.py +0 -0
bioguider/agents/agent_task.py +88 -0
bioguider/agents/agent_tools.py +147 -0
bioguider/agents/agent_utils.py +357 -0
bioguider/agents/collection_execute_step.py +180 -0
bioguider/agents/collection_observe_step.py +113 -0
bioguider/agents/collection_plan_step.py +154 -0
bioguider/agents/collection_task.py +179 -0
bioguider/agents/collection_task_utils.py +109 -0
bioguider/agents/common_agent.py +159 -0
bioguider/agents/common_agent_2step.py +126 -0
bioguider/agents/common_step.py +85 -0
bioguider/agents/dockergeneration_execute_step.py +186 -0
bioguider/agents/dockergeneration_observe_step.py +153 -0
bioguider/agents/dockergeneration_plan_step.py +158 -0
bioguider/agents/dockergeneration_task.py +158 -0
bioguider/agents/dockergeneration_task_utils.py +220 -0
bioguider/agents/evaluation_task.py +269 -0
bioguider/agents/identification_execute_step.py +179 -0
bioguider/agents/identification_observe_step.py +92 -0
bioguider/agents/identification_plan_step.py +135 -0
bioguider/agents/identification_task.py +220 -0
bioguider/agents/identification_task_utils.py +18 -0
bioguider/agents/peo_common_step.py +64 -0
bioguider/agents/prompt_utils.py +190 -0
bioguider/agents/python_ast_repl_tool.py +69 -0
bioguider/agents/rag_collection_task.py +130 -0
bioguider/conversation.py +67 -0
bioguider/database/summarized_file_db.py +140 -0
bioguider/managers/evaluation_manager.py +108 -0
bioguider/rag/__init__.py +0 -0
bioguider/rag/config.py +117 -0
bioguider/rag/data_pipeline.py +648 -0
bioguider/rag/embedder.py +24 -0
bioguider/rag/rag.py +134 -0
bioguider/settings.py +103 -0
bioguider/utils/constants.py +40 -0
bioguider/utils/default.gitignore +140 -0
bioguider/utils/file_utils.py +126 -0
bioguider/utils/gitignore_checker.py +175 -0
bioguider/utils/pyphen_utils.py +73 -0
bioguider/utils/utils.py +27 -0
bioguider-0.2.3.dist-info/LICENSE +21 -0
bioguider-0.2.3.dist-info/METADATA +44 -0
bioguider-0.2.3.dist-info/RECORD +47 -0
bioguider-0.2.3.dist-info/WHEEL +4 -0

bioguider/agents/collection_execute_step.py ADDED Viewed

@@ -0,0 +1,180 @@
+import logging
+from langchain_openai.chat_models.base import BaseChatOpenAI
+from langchain.tools import BaseTool
+from langchain_core.prompts import ChatPromptTemplate, StringPromptTemplate
+from langchain.agents import create_react_agent, AgentExecutor
+from langchain_community.callbacks.openai_info import OpenAICallbackHandler
+from bioguider.utils.constants import DEFAULT_TOKEN_USAGE
+from bioguider.agents.agent_utils import (
+    CustomPromptTemplate,
+    CustomOutputParser,
+)
+from bioguider.agents.common_agent_2step import CommonAgentTwoSteps
+from bioguider.agents.peo_common_step import PEOCommonStep, PEOWorkflowState
+from bioguider.agents.collection_task_utils import CollectionWorkflowState
+logger = logging.getLogger(__name__)
+COLLECTION_EXECUTION_SYSTEM_PROMPT = """---
+You are an expert Python developer.
+You are given a **plan** and must complete it strictly using Python code and the available tools.
+---
+### **Available Tools**
+{tools}
+---
+### **Your Task**
+Follow the given plan step by step using the exact format below:
+```
+Thought: Describe what you are thinking or planning to do next.
+Action: The tool you are going to use (must be one of: {tool_names})
+Action Input: The input to the selected action
+Observation: The result returned by the action
+```
+You may repeat the **Thought → Action → Action Input → Observation** loop as needed.
+Once all steps in the plan have been executed, output all the results using this format:
+```
+Thought: I have completed the plan.
+Final Answer:
+Action: {{tool_name}}
+Action Input: {{input1}}
+Action Observation: {{Observation1}}
+---
+Action: {{tool_name}}
+Action Input: {{input2}}
+Action Observation: {{Observation2}}
+---
+...
+```
+---
+### **Example**
+```
+Action: summarize_file_tool
+Action Input: README.md
+Observation: # BioGuider\nBioGuider is a Python package for bioinformatics.\n...
+...
+Final Answer:
+Action: summarize_file_tool
+Action Input: README.md
+Action Observation: # BioGuider\nBioGuider is a Python package for bioinformatics.\n...
+---
+Action: check_file_related_tool
+Action Input: pyproject.toml
+Action Observation: Yes, the file is related to the project.
+---
+...
+```
+---
+### **Important Notes**
+- You must strictly follow the provided plan.
+- **Do not take any additional or alternative actions**, even if:
+  - No relevant result is found
+  - The file content is missing, empty, or irrelevant
+- If no information is found in a step, simply proceed to the next action in the plan without improvising.
+- Only use the tools specified in the plan actions. No independent decisions or extra steps are allowed.
+---
+### **Plan**
+{plan_actions}
+### **Actions Already Taken**
+{agent_scratchpad}
+---
+{input}
+---
+"""
+class CollectionExecuteStep(PEOCommonStep):
+    def __init__(
+        self,
+        llm: BaseChatOpenAI,
+        repo_path: str,
+        repo_structure: str,
+        gitignore_path: str,
+        custom_tools: list[BaseTool] | None = None,
+    ):
+        super().__init__(llm)
+        self.step_name = "Collection Execution Step"
+        self.repo_path = repo_path
+        self.repo_structure = repo_structure
+        self.gitignore_path = gitignore_path
+        self.custom_tools = custom_tools if custom_tools is not None else []
+    def _execute_directly(self, state: PEOWorkflowState):
+        plan_actions = state["plan_actions"]
+        prompt = CustomPromptTemplate(
+            template=COLLECTION_EXECUTION_SYSTEM_PROMPT,
+            tools=self.custom_tools,
+            plan_actions=plan_actions,
+            input_variables=[
+                "tools", "tool_names", "agent_scratchpad",
+                "intermediate_steps", "plan_actions",
+            ],
+        )
+        output_parser = CustomOutputParser()
+        agent = create_react_agent(
+            llm=self.llm,
+            tools=self.custom_tools,
+            prompt=prompt,
+            output_parser=output_parser,
+            stop_sequence=["\nObservation:"],
+        )
+        callback_handler = OpenAICallbackHandler()
+        agent_executor = AgentExecutor(
+            agent=agent,
+            tools=self.custom_tools,
+            max_iterations=10,
+        )
+        response = agent_executor.invoke(
+            input={"plan_actions": plan_actions, "input": "Now, let's begin."},
+            config={
+                "callbacks": [callback_handler],
+                "recursion_limit": 20,
+            },
+        )
+        # parse the response
+        if "output" in response:
+            output = response["output"]
+            if "**Final Answer**" in output:
+                final_answer = output.split("**Final Answer:**")[-1].strip().strip(":")
+                step_output = final_answer
+            elif "Final Answer" in output:
+                final_answer = output.split("Final Answer")[-1].strip().strip(":")
+                step_output = final_answer
+            else:
+                step_output = output
+            self._print_step(state, step_output=step_output)
+            state["step_output"] = step_output
+        else:
+            logger.error("No output found in the response.")
+            self._print_step(
+                state,
+                step_output="Error: No output found in the response.",
+            )
+            state["step_output"] = "Error: No output found in the response."
+        token_usage = vars(callback_handler)
+        token_usage = {**DEFAULT_TOKEN_USAGE, **token_usage}
+        return state, token_usage

bioguider/agents/collection_observe_step.py ADDED Viewed

@@ -0,0 +1,113 @@
+from typing import Callable
+from langchain.tools import BaseTool
+from langchain_openai.chat_models.base import BaseChatOpenAI
+from langchain_core.prompts import ChatPromptTemplate
+from bioguider.agents.agent_utils import ObservationResult
+from bioguider.agents.collection_task_utils import CollectionWorkflowState
+from bioguider.agents.common_agent_2step import CommonAgentTwoSteps
+from bioguider.agents.peo_common_step import PEOCommonStep
+from bioguider.agents.prompt_utils import COLLECTION_GOAL, COLLECTION_PROMPTS
+COLLECTION_OBSERVE_SYSTEM_PROMPT = """You are an expert software developer and technical documentation analyst.
+{goal_item_desc}
+{related_file_description}
+---
+### **Repository Structure**
+Here is the 2-level file structure of the repository (`f` = file, `d` = directory, `l` - symlink, `u` - unknown):
+{repo_structure}
+---
+### **Intermediate Output**
+{intermediate_output}
+---
+### **Instructions**
+1. Your goal is to identify files that are relevant to the **goal item**.
+2. Carefully review the **Goal**, **Repository Structure**, and **Intermediate Output**.
+3. If you believe **all relevant files** have been collected:
+* Proceed with the following format:
+  * Provide your reasoning under **Analysis**
+  * Then list all relevant files and folders under **FinalAnswer**
+  * Be sure to include the **full relative paths** with respect to the repository root.
+  * Your answer **must follow this exact format** (note: no JSON code block, no additional comments):
+  ```
+  **Analysis**: your analysis here
+  **FinalAnswer**: {{"final_answer": ["path/to/file1", "path/to/file2", ...]}}
+  ```
+4. If you believe **more files still need to be collected**:
+* Provide your reasoning under **Thoughts**:
+  ```
+  **Thoughts**: your explanation here
+  ```
+5. Important instructions:
+  {important_instructions}
+Be precise and support your reasoning with evidence from the input.
+---
+### Notes
+- We are collecting information over multiple rounds, your thoughts and the output of this step will be persisted, so please **do not rush to provide a Final Answer**.
+  If you find the current information insufficient, share your thoughts instead—we’ll continue with the next round accordingly.
+"""
+class CollectionObserveStep(PEOCommonStep):
+    def __init__(
+        self,
+        llm: BaseChatOpenAI,
+        repo_path: str,
+        repo_structure: str,
+        gitignore_path: str,
+    ):
+        super().__init__(llm=llm)
+        self.repo_path = repo_path
+        self.repo_structure = repo_structure
+        self.gitignore_path = gitignore_path
+        self.step_name = "Collection Observation Step"
+    def _build_prompt(self, state):
+        str_goal_item = state["goal_item"]
+        collection_item = COLLECTION_PROMPTS[str_goal_item]
+        goal_item_desc = \
+            ChatPromptTemplate.from_template(COLLECTION_GOAL).format(goal_item=collection_item["goal_item"])
+        repo_structure = self.repo_structure
+        intermediate_steps = self._build_intermediate_steps(state)
+        prompt = ChatPromptTemplate.from_template(COLLECTION_OBSERVE_SYSTEM_PROMPT)
+        important_instructions = "N/A" if "important_instructions" not in collection_item or len(collection_item["important_instructions"]) == 0 \
+            else collection_item["important_instructions"]
+        return prompt.format(
+            goal_item_desc=goal_item_desc,
+            related_file_description=collection_item["related_file_description"],
+            repo_structure=repo_structure,
+            intermediate_output=intermediate_steps,
+            important_instructions=important_instructions,
+        )
+    def _execute_directly(self, state: CollectionWorkflowState):
+        system_prompt = self._build_prompt(state)
+        agent = CommonAgentTwoSteps(llm=self.llm)
+        res, _, token_usage, reasoning_process = agent.go(
+            system_prompt=system_prompt,
+            instruction_prompt="Let's begin thinking.",
+            schema=ObservationResult,
+        )
+        state["final_answer"] = res.FinalAnswer
+        analysis = res.Analysis
+        thoughts = res.Thoughts
+        state["step_analysis"] = analysis
+        state["step_thoughts"] = thoughts
+        self._print_step(
+            state,
+            step_output=f"**Observation Reasoning Process**\n{reasoning_process}"
+        )
+        self._print_step(
+            state,
+            step_output=f"Final Answer: {res.FinalAnswer if res.FinalAnswer else None}\nAnalysis: {analysis}\nThoughts: {thoughts}",
+        )
+        return state, token_usage

bioguider/agents/collection_plan_step.py ADDED Viewed

@@ -0,0 +1,154 @@
+from langchain_openai.chat_models.base import BaseChatOpenAI
+from langchain.tools import BaseTool
+from langchain_core.prompts import ChatPromptTemplate, StringPromptTemplate
+from bioguider.agents.agent_utils import (
+    convert_plan_to_string,
+    get_tool_names_and_descriptions,
+    PlanAgentResultJsonSchema,
+    PlanAgentResult,
+)
+from bioguider.agents.common_agent_2step import CommonAgentTwoSteps
+from bioguider.agents.peo_common_step import PEOCommonStep
+from bioguider.agents.collection_task_utils import CollectionWorkflowState
+from bioguider.agents.prompt_utils import COLLECTION_GOAL, COLLECTION_PROMPTS
+COLLECTION_PLAN_SYSTEM_PROMPT = ChatPromptTemplate.from_template("""### **Goal**
+You are an expert developer specializing in the biomedical domain.
+**{goal}**
+{related_file_description}
+---
+### **Repository File Structure**
+Below is the 2-level file structure of the repository (`f` = file, `d` = directory, `l` - symlink, `u` - unknown):
+{repo_structure}
+---
+### **Function Tools**
+You have access to the following function tools:
+{tools}
+---
+### **Intermediate Steps**
+Here are the results from previous steps:
+{intermediate_steps}
+---
+### **Intermediate Thoughts**
+- **Analysis**: {intermediate_analysis}
+- **Thoughts**: {intermediate_thoughts}
+---
+### **Instructions**
+1. We will iterate through multiple **Plan -> Execution -> Observation** loops as needed.
+   - All variables and tool outputs are **persisted across rounds**, so you can build on prior results.
+   - Develop your plan **incrementally**, and reflect on intermediate observations before proceeding.
+   - Limit each step to **one or two actions** — avoid trying to complete everything in a single step.
+2. Your task is to collect all files that are relevant to the goal.
+   - Start by using the `summarize_file` tool to inspect file content quickly.
+   - If needed, follow up with the `read_file` tool for full content extraction.
+3. You may use the `read_directory` tool to explore directory contents, but avoid using it in the first step unless necessary.
+4. You may use the `python_repl` tool to execute Python code, but this should **also be avoided in the first step**.
+---
+### **Important Instructions**
+{important_instructions}
+### **Output Format**
+Your plan should be returned as a sequence of steps in the following format:
+Step: <tool name>   # Tool name must be one of {tool_names}
+Step Input: <file or directory name>
+Step: <tool name>
+Step Input: <file or directory name>
+...
+""")
+class CollectionPlanStep(PEOCommonStep):
+    """
+    CollectionPlanStep is a step in the collection plan process.
+    It is responsible for initializing the tools and compiling the step.
+    """
+    def __init__(
+        self,
+        llm: BaseChatOpenAI,
+        repo_path: str,
+        repo_structure: str,
+        gitignore_path: str,
+        custom_tools: list[BaseTool] | None = None,
+    ):
+        super().__init__(llm)
+        self.step_name = "Collection Plan Step"
+        self.repo_path = repo_path
+        self.repo_structure = repo_structure
+        self.gitignore_path = gitignore_path
+        self.custom_tools = custom_tools if custom_tools is not None else []
+    def _prepare_system_prompt(self, state: CollectionWorkflowState) -> str:
+        collection_state = state
+        goal_item = collection_state["goal_item"]
+        collection_item = COLLECTION_PROMPTS[goal_item]
+        intermediate_steps = self._build_intermediate_steps(state)
+        step_analysis, step_thoughts = self._build_intermediate_analysis_and_thoughts(state)
+        goal = ChatPromptTemplate.from_template(COLLECTION_GOAL).format(goal_item=collection_item["goal_item"])
+        related_file_description = collection_item["related_file_description"]
+        important_instructions="N/A" if "important_instructions" not in collection_item or len(collection_item["important_instructions"]) == 0 \
+            else collection_item["important_instructions"]
+        tool_names, tools_desc = get_tool_names_and_descriptions(self.custom_tools)
+        system_prompt = COLLECTION_PLAN_SYSTEM_PROMPT.format(
+            goal=goal,
+            related_file_description=related_file_description,
+            repo_structure=self.repo_structure,
+            tools=tools_desc,
+            intermediate_steps=intermediate_steps,
+            intermediate_analysis=step_analysis,
+            intermediate_thoughts=step_thoughts,
+            tool_names=tool_names,
+            important_instructions=important_instructions,
+        )
+        self._print_step(
+            state,
+            step_output="**Intermediate Step Output**\n" + intermediate_steps
+        )
+        self._print_step(
+            state,
+            step_output="**Intermediate Step Analysis**\n{step_analysis}\n**Intermediate Step Thoughts**\n{step_thoughts}",
+        )
+        return system_prompt
+    def _execute_directly(self, state: CollectionWorkflowState):
+        system_prompt = self._prepare_system_prompt(state)
+        agent = CommonAgentTwoSteps(llm=self.llm)
+        res, _, token_usage, reasoning_process = agent.go(
+            system_prompt=system_prompt,
+            instruction_prompt="Now, let's begin the collection plan step.",
+            schema=PlanAgentResultJsonSchema,
+        )
+        PEOCommonStep._reset_step_state(state)
+        res = PlanAgentResult(**res)
+        self._print_step(state, step_output=f"**Reasoning Process**\n{reasoning_process}")
+        self._print_step(state, step_output=f"**Plan**\n{str(res.actions)}")
+        state["plan_actions"] = convert_plan_to_string(res)
+        return state, token_usage

bioguider/agents/collection_task.py ADDED Viewed

@@ -0,0 +1,179 @@
+import os
+import logging
+import re
+import json
+from pydantic import BaseModel, Field
+from typing import Callable, List, Optional, TypedDict, Union
+from langchain_core.prompts import ChatPromptTemplate, StringPromptTemplate
+from langchain_core.messages import SystemMessage, HumanMessage
+from langchain_openai.chat_models.base import BaseChatOpenAI
+from langchain.tools import StructuredTool, Tool, tool, BaseTool
+from langchain.agents import (
+    initialize_agent,
+    AgentType,
+    AgentOutputParser,
+    create_react_agent,
+    AgentExecutor,
+)
+from langchain.schema import (
+    AgentFinish,
+    AgentAction,
+)
+from langgraph.graph import StateGraph, START, END
+from bioguider.database.summarized_file_db import SummarizedFilesDb
+from bioguider.utils.file_utils import get_file_type
+from bioguider.agents.agent_utils import read_directory
+from bioguider.agents.collection_task_utils import (
+    RELATED_FILE_GOAL_ITEM,
+    CollectionWorkflowState,
+    check_file_related_tool,
+)
+from bioguider.agents.common_agent import CommonAgent
+from bioguider.agents.agent_tools import (
+    read_directory_tool,
+    summarize_file_tool,
+    read_file_tool,
+)
+from bioguider.agents.peo_common_step import PEOCommonStep
+from bioguider.agents.prompt_utils import COLLECTION_PROMPTS
+from bioguider.agents.python_ast_repl_tool import CustomPythonAstREPLTool
+from bioguider.agents.agent_task import AgentTask
+from bioguider.agents.collection_plan_step import CollectionPlanStep
+from bioguider.agents.collection_execute_step import CollectionExecuteStep
+from bioguider.agents.collection_observe_step import CollectionObserveStep
+logger = logging.getLogger(__name__)
+class CollectionTask(AgentTask):
+    def __init__(
+        self,
+        llm: BaseChatOpenAI,
+        step_callback: Callable | None = None
+    ):
+        super().__init__(llm, step_callback)
+        self.repo_path: str | None = None
+        self.gitignore_path: str | None = None
+        self.repo_structure: str | None = None
+        self.goal_item: str | None = None
+        self.steps: list[PEOCommonStep] = []
+        self.tools: list[any] | None = None
+        self.custom_tools: list[Tool] | None = None
+    def _initialize(self):
+        # initialize the 2-level file structure of the repo
+        if not os.path.exists(self.repo_path):
+            raise ValueError(f"Repository path {self.repo_path} does not exist.")
+        files = read_directory(self.repo_path, os.path.join(self.repo_path, ".gitignore"))
+        file_pairs = [(f, get_file_type(os.path.join(self.repo_path, f)).value) for f in files]
+        self.repo_structure = ""
+        for f, f_type in file_pairs:
+            self.repo_structure += f"{f} - {f_type}\n"
+        collection_item = COLLECTION_PROMPTS[self.goal_item]
+        related_file_goal_item_desc = ChatPromptTemplate.from_template(RELATED_FILE_GOAL_ITEM).format(
+            goal_item=collection_item["goal_item"],
+            related_file_description=collection_item["related_file_description"],
+        )
+        self.tools = [
+            read_directory_tool(repo_path=self.repo_path),
+            summarize_file_tool(
+                llm=self.llm,
+                repo_path=self.repo_path,
+                output_callback=self.step_callback,
+                db=self.summary_file_db,
+            ),
+            read_file_tool(repo_path=self.repo_path),
+            check_file_related_tool(
+                llm=self.llm,
+                repo_path=self.repo_path,
+                goal_item_desc=related_file_goal_item_desc,
+                output_callback=self.step_callback,
+            ),
+        ]
+        self.custom_tools = [Tool(
+            name=tool.__class__.__name__,
+            func=tool.run,
+            description=tool.__class__.__doc__,
+        ) for tool in self.tools]
+        self.custom_tools.append(CustomPythonAstREPLTool())
+        self.steps = [
+            CollectionPlanStep(
+                llm=self.llm,
+                repo_path=self.repo_path,
+                repo_structure=self.repo_structure,
+                gitignore_path=self.gitignore_path,
+                custom_tools=self.custom_tools,
+            ),
+            CollectionExecuteStep(
+                llm=self.llm,
+                repo_path=self.repo_path,
+                repo_structure=self.repo_structure,
+                gitignore_path=self.gitignore_path,
+                custom_tools=self.custom_tools,
+            ),
+            CollectionObserveStep(
+                llm=self.llm,
+                repo_path=self.repo_path,
+                repo_structure=self.repo_structure,
+                gitignore_path=self.gitignore_path,
+            ),
+        ]
+    def _compile(self, repo_path: str, gitignore_path: str, **kwargs):
+        self.repo_path = repo_path
+        self.gitignore_path = gitignore_path
+        self.goal_item = kwargs.get("goal_item")
+        self._initialize()
+        def check_observe_step(state):
+            if "final_answer" in state and state["final_answer"] is not None:
+                self._print_step(step_name="Final Answer")
+                self._print_step(step_output=state["final_answer"])
+                return END
+            return "plan_step"
+        graph = StateGraph(CollectionWorkflowState)
+        graph.add_node("plan_step", self.steps[0].execute)
+        graph.add_node("execute_step", self.steps[1].execute)
+        graph.add_node("observe_step", self.steps[2].execute)
+        graph.add_edge(START, "plan_step")
+        graph.add_edge("plan_step", "execute_step")
+        graph.add_edge("execute_step", "observe_step")
+        graph.add_conditional_edges("observe_step", check_observe_step, {"plan_step", END})
+        self.graph = graph.compile()
+    def collect(self) -> list[str] | None:
+        s = self._go_graph({"goal_item": self.goal_item})
+        if s is None or 'final_answer' not in s:
+            return None
+        if s["final_answer"] is None:
+            return None
+        result = s["final_answer"].strip()
+        try:
+            json_obj = json.loads(result)
+            result = json_obj["final_answer"]
+            if isinstance(result, str):
+                result = result.strip()
+                return [result]
+            elif isinstance(result, list):
+                return result
+            else:
+                logger.error(f"Final answer is not a valid JSON list or string: {result}")
+                return None
+        except json.JSONDecodeError:
+            logger.error(f"Final answer is not a valid JSON: {result}")
+            return None
+        except Exception as e:
+            logger.error(str(e))
+        return s