PyPI - bioguider - Versions diffs - 0.2.3__py3-none-any.whl - Mend

bioguider 0.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of bioguider might be problematic. Click here for more details.

Files changed (47) hide show

bioguider/__init__.py +0 -0
bioguider/agents/__init__.py +0 -0
bioguider/agents/agent_task.py +88 -0
bioguider/agents/agent_tools.py +147 -0
bioguider/agents/agent_utils.py +357 -0
bioguider/agents/collection_execute_step.py +180 -0
bioguider/agents/collection_observe_step.py +113 -0
bioguider/agents/collection_plan_step.py +154 -0
bioguider/agents/collection_task.py +179 -0
bioguider/agents/collection_task_utils.py +109 -0
bioguider/agents/common_agent.py +159 -0
bioguider/agents/common_agent_2step.py +126 -0
bioguider/agents/common_step.py +85 -0
bioguider/agents/dockergeneration_execute_step.py +186 -0
bioguider/agents/dockergeneration_observe_step.py +153 -0
bioguider/agents/dockergeneration_plan_step.py +158 -0
bioguider/agents/dockergeneration_task.py +158 -0
bioguider/agents/dockergeneration_task_utils.py +220 -0
bioguider/agents/evaluation_task.py +269 -0
bioguider/agents/identification_execute_step.py +179 -0
bioguider/agents/identification_observe_step.py +92 -0
bioguider/agents/identification_plan_step.py +135 -0
bioguider/agents/identification_task.py +220 -0
bioguider/agents/identification_task_utils.py +18 -0
bioguider/agents/peo_common_step.py +64 -0
bioguider/agents/prompt_utils.py +190 -0
bioguider/agents/python_ast_repl_tool.py +69 -0
bioguider/agents/rag_collection_task.py +130 -0
bioguider/conversation.py +67 -0
bioguider/database/summarized_file_db.py +140 -0
bioguider/managers/evaluation_manager.py +108 -0
bioguider/rag/__init__.py +0 -0
bioguider/rag/config.py +117 -0
bioguider/rag/data_pipeline.py +648 -0
bioguider/rag/embedder.py +24 -0
bioguider/rag/rag.py +134 -0
bioguider/settings.py +103 -0
bioguider/utils/constants.py +40 -0
bioguider/utils/default.gitignore +140 -0
bioguider/utils/file_utils.py +126 -0
bioguider/utils/gitignore_checker.py +175 -0
bioguider/utils/pyphen_utils.py +73 -0
bioguider/utils/utils.py +27 -0
bioguider-0.2.3.dist-info/LICENSE +21 -0
bioguider-0.2.3.dist-info/METADATA +44 -0
bioguider-0.2.3.dist-info/RECORD +47 -0
bioguider-0.2.3.dist-info/WHEEL +4 -0

bioguider/agents/collection_task_utils.py ADDED Viewed

@@ -0,0 +1,109 @@
+import os
+from typing import Callable, Optional, TypedDict
+from langchain.prompts import ChatPromptTemplate
+from langchain_openai.chat_models.base import BaseChatOpenAI
+from langchain_core.messages import AIMessage
+from pydantic import BaseModel, Field
+from bioguider.agents.agent_tools import agent_tool
+from bioguider.agents.agent_utils import read_file, summarize_file
+from bioguider.agents.peo_common_step import PEOWorkflowState
+from bioguider.agents.common_agent import CommonAgent
+from bioguider.agents.common_agent_2step import CommonAgentTwoSteps
+class CollectionWorkflowState(TypedDict):
+    llm: Optional[BaseChatOpenAI]
+    step_output_callback: Optional[Callable]
+    intermediate_steps: Optional[str]
+    step_output: Optional[str]
+    step_analysis: Optional[str]
+    step_thoughts: Optional[str]
+    plan_actions: Optional[list[dict]]
+    goal_item: Optional[str]
+    final_answer: Optional[str]
+RELATED_FILE_GOAL_ITEM = """
+Your task is to determine whether the file is related to **{goal_item}**.
+{related_file_description}
+"""
+CHECK_FILE_RELATED_USER_PROMPT = ChatPromptTemplate.from_template("""
+You are given a summary of a file’s content.
+{goal_item_desc}
+Here is the file summary:
+```
+{summarized_file_content}
+```
+### **Question:**
+Does this file appear to contain related information?
+---
+### **Output Format:**
+Respond with a single word: "Yes" or "No" to indicate whether the file is related to the goal item.
+Do not include any additional text, explanation, or formatting.
+""")
+class CheckFileRelatedResult(BaseModel):
+    is_related: bool = Field(description="True if the file is related to the goal item, False otherwise.")
+class check_file_related_tool(agent_tool):
+    """ Check if the file is related to the goal item
+Args:
+    file_path str: file path
+Returns:
+    bool: True if the file is related to the goal item, False otherwise.
+    """
+    def __init__(
+        self,
+        llm: BaseChatOpenAI,
+        repo_path: str,
+        goal_item_desc: str,
+        output_callback: Callable | None = None,
+    ):
+        super().__init__(llm=llm, output_callback=output_callback)
+        self.repo_path = repo_path
+        self.goal_item_desc = goal_item_desc
+    def run(self, file_path: str) -> str:
+        if not self.repo_path in file_path:
+            file_path = os.path.join(self.repo_path, file_path)
+        if not os.path.isfile(file_path):
+            return "Can't read file"
+        file_content = read_file(file_path)
+        if file_content is None:
+            return "Failed to read file"
+        summarized_content, token_usage = summarize_file(self.llm, file_path, file_content, 6)
+        if summarized_content is None:
+            return "Failed to summarize file"
+        self._print_token_usage(token_usage)
+        prompt = CHECK_FILE_RELATED_USER_PROMPT.format(
+            goal_item_desc=self.goal_item_desc,
+            summarized_file_content=summarized_content,
+        )
+        agent = CommonAgentTwoSteps(llm=self.llm)
+        res, _, token_usage, reasoning = agent.go(
+            system_prompt=prompt,
+            instruction_prompt="Now, please check if the file is related to the goal item.",
+            schema=CheckFileRelatedResult,
+        )
+        # res: AIMessage = self.llm.invoke([("human", prompt)])
+        res: CheckFileRelatedResult = res
+        out = res.is_related
+        self._print_step_output(step_output=reasoning)
+        self._print_token_usage(token_usage)
+        if out:
+            return "Yes, the file is related to the goal item."
+        else:
+            return "No, the file **is not** related to the goal item."

bioguider/agents/common_agent.py ADDED Viewed

@@ -0,0 +1,159 @@
+from typing import Any, Callable, Optional
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_openai.chat_models.base import BaseChatOpenAI
+from langchain_community.callbacks.openai_info import OpenAICallbackHandler
+from langchain_core.messages import SystemMessage, HumanMessage
+from pydantic import BaseModel, Field
+from tenacity import retry, stop_after_attempt, wait_incrementing
+import logging
+from bioguider.agents.agent_utils import (
+    escape_braces,
+    increase_token_usage,
+)
+logger = logging.getLogger(__name__)
+class RetryException(Exception):
+    """Exception need to retry"""
+    pass
+class CommonAgentResult(BaseModel):
+    reasoning_process: str = Field(
+        description="A detailed explanation of the thought process or reasoning steps taken to reach a conclusion."
+    )
+class CommonAgent:
+    def __init__(self, llm: BaseChatOpenAI):
+        self.llm = llm
+        self.exception: RetryException | None = None
+        self.token_usage: dict | None = None
+    def go(
+        self,
+        system_prompt: str,
+        instruction_prompt: str,
+        schema: any,
+        pre_process: Optional[Callable] = None,
+        post_process: Optional[Callable] = None,
+        **kwargs: Optional[Any],
+    ):
+        """
+        execute agent
+        Args:
+        system_prompt str: system prompt
+        instruction_prompt str: user prompt to guide how llm execute agent
+        schema pydantic.BaseModel or json schema: llm output result schema
+        pre_process Callable or None: pre-processor that would be executed before llm.invoke
+        post_process Callable or None: post-processor that would be executed after llm.invoke
+        kwargs None or dict: args for pre_proces and post_process
+        Return:
+        (output that comply with input args `schema`)
+        """
+        self._initialize()
+        if pre_process is not None:
+            is_OK = pre_process(**kwargs)
+            if not is_OK:  # skip
+                return
+        return self._invoke_agent(
+            system_prompt,
+            instruction_prompt,
+            schema,
+            post_process,
+            **kwargs,
+        )
+    def _initialize(self):
+        self.exception = None
+        self.token_usage = None
+    def _process_retryexception_message(
+        self, prompt: ChatPromptTemplate
+    ) -> ChatPromptTemplate:
+        if self.exception is None:
+            return prompt
+        existing_messages = prompt.messages
+        updated_messages = existing_messages + [("human", str(self.exception))]
+        self.exception = None
+        updated_prompt = ChatPromptTemplate.from_messages(updated_messages)
+        return updated_prompt
+    def _incre_token_usage(self, token_usage):
+        incremental_token_usage = token_usage
+        if not isinstance(token_usage, dict):
+            incremental_token_usage = vars(incremental_token_usage)
+        self.token_usage = increase_token_usage(
+            self.token_usage, incremental_token_usage
+        )
+    @retry(
+        stop=stop_after_attempt(5),
+        wait=wait_incrementing(start=1.0, increment=3, max=10),
+    )
+    def _invoke_agent(
+        self,
+        system_prompt: str,
+        instruction_prompt: str,
+        schema: any,
+        post_process: Optional[Callable] = None,
+        **kwargs: Optional[Any],
+    ) -> tuple[Any, Any, dict | None, Any | None]:
+        system_prompt = escape_braces(system_prompt)
+        prompt = ChatPromptTemplate.from_messages([
+            ("system", system_prompt),
+            ("human", instruction_prompt),
+        ])
+        # Initialize the callback handler
+        callback_handler = OpenAICallbackHandler()
+        updated_prompt = self._process_retryexception_message(prompt)
+        agent = updated_prompt | self.llm.with_structured_output(schema)
+        try:
+            res = agent.invoke(
+                input={},
+                config={
+                    "callbacks": [callback_handler],
+                },
+            )
+            self._incre_token_usage(callback_handler)
+        except Exception as e:
+            logger.error(str(e))
+            raise e
+        processed_res = res
+        if post_process is not None:
+            try:
+                processed_res = post_process(res, **kwargs)
+            except RetryException as e:
+                logger.error(str(e))
+                self.exception = e
+                raise e
+            except Exception as e:
+                logger.error(str(e))
+                raise e
+        return res, processed_res, self.token_usage, None
+class CommonConversation:
+    def __init__(self, llm: BaseChatOpenAI):
+        self.llm = llm
+    def generate(self, system_prompt: str, instruction_prompt: str):
+        msgs = [
+            SystemMessage(system_prompt),
+            HumanMessage(instruction_prompt),
+        ]
+        msgs_template = ChatPromptTemplate.from_messages(messages=msgs)
+        callback_handler = OpenAICallbackHandler()
+        result = self.llm.generate(
+            messages=[msgs],
+            callbacks=[callback_handler]
+        )
+        response = result.generations[0][0].text
+        token_usage = result.llm_output.get("token_usage")
+        return response, token_usage

bioguider/agents/common_agent_2step.py ADDED Viewed

@@ -0,0 +1,126 @@
+from typing import Any, Callable, Optional
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_openai.chat_models.base import BaseChatOpenAI
+from langchain_community.callbacks.openai_info import OpenAICallbackHandler
+from pydantic import BaseModel, Field
+from tenacity import retry, stop_after_attempt, wait_incrementing
+import logging
+from bioguider.agents.agent_utils import escape_braces
+from bioguider.agents.common_agent import (
+    CommonAgent,
+    RetryException,
+)
+from bioguider.agents.prompt_utils import COT_USER_INSTRUCTION
+logger = logging.getLogger()
+class CommonAgentTwoSteps(CommonAgent):
+    def __init__(self, llm: BaseChatOpenAI):
+        super().__init__(llm)
+    def _initialize(self):
+        self.exceptions = None
+        self.token_usage = None
+    def _get_retryexception_message(self) -> list[tuple[str, str]]:
+        if self.exceptions is None:
+            return None
+        return [("human", str(excp)) for excp in self.exceptions]
+    def _build_prompt_for_cot_step(
+        self,
+        system_prompt: str,
+        instruction_prompt: str,
+    ):
+        # system_prompt = system_prompt.replace("{", "{{").replace("}", "}}")
+        system_prompt = escape_braces(system_prompt)
+        instruction_prompt = instruction_prompt.replace("{", "{{").replace("}", "}}")
+        msgs = [("system", system_prompt)]
+        msgs = msgs + [("human", instruction_prompt)]
+        exception_msgs = self._get_retryexception_message()
+        if exception_msgs is not None:
+            msgs = msgs + exception_msgs
+        msgs = msgs + [("human", COT_USER_INSTRUCTION)]
+        return ChatPromptTemplate.from_messages(msgs)
+    def _build_prompt_for_final_step(
+        self,
+        system_prompt: str,
+        cot_msg: str,
+    ):
+        system_prompt = system_prompt.replace("{", "{{").replace("}", "}}")
+        msgs = [("system", system_prompt)]
+        cot_msg = cot_msg.replace("{", "{{").replace("}", "}}")
+        msgs = msgs + [(
+            "human",
+            f"Please review the following step-by-step reasoning and provide the answer based on it: ```{cot_msg}```"
+        )]
+        return ChatPromptTemplate.from_messages(msgs)
+    @retry(
+        stop=stop_after_attempt(5),
+        wait=wait_incrementing(start=1.0, increment=3, max=10),
+    )
+    def _invoke_agent(
+        self,
+        system_prompt: str,
+        instruction_prompt: str,
+        schema: any,
+        post_process: Optional[Callable] = None,
+        **kwargs: Optional[Any],
+    ):
+        # Initialize the callback handler
+        callback_handler = OpenAICallbackHandler()
+        cot_prompt = self._build_prompt_for_cot_step(
+            system_prompt=system_prompt,
+            instruction_prompt=instruction_prompt
+        )
+        try:
+            # First, use llm to do CoT
+            msgs = cot_prompt.invoke(input={}).to_messages()
+            cot_res = self.llm.generate(messages=[msgs])
+            reasoning_process = cot_res.generations[0][0].text
+            token_usage = cot_res.llm_output.get("token_usage")
+            cot_tokens = {
+                "total_tokens": token_usage.get("total_tokens", 0),
+                "prompt_tokens": token_usage.get("prompt_tokens", 0),
+                "completion_tokens": token_usage.get("completion_tokens", 0),
+            }
+            self._incre_token_usage(cot_tokens)
+        except Exception as e:
+            logger.error(str(e))
+            raise e
+        # Then use the reasoning process to do the structured output
+        updated_prompt = self._build_prompt_for_final_step(
+            system_prompt=system_prompt,
+            cot_msg=reasoning_process,
+        )
+        agent = updated_prompt | self.llm.with_structured_output(schema)
+        try:
+            res = agent.invoke(
+                input={},
+                config={
+                    "callbacks": [callback_handler],
+                },
+            )
+            self._incre_token_usage(callback_handler)
+        except Exception as e:
+            logger.error(str(e))
+            raise e
+        processed_res = None
+        if post_process is not None:
+            try:
+                processed_res = post_process(res, **kwargs)
+            except RetryException as e:
+                logger.error(str(e))
+                self.exceptions = [e] if self.exceptions is None else self.exceptions + [e]
+                raise e
+            except Exception as e:
+                logger.error(str(e))
+                raise e
+        return res, processed_res, self.token_usage, reasoning_process

bioguider/agents/common_step.py ADDED Viewed

@@ -0,0 +1,85 @@
+from abc import ABC, abstractmethod
+from typing import Any, Callable, Dict, Optional, TypedDict
+import logging
+from langchain_openai.chat_models.base import BaseChatOpenAI
+from bioguider.utils.constants import DEFAULT_TOKEN_USAGE
+logger = logging.getLogger(__name__)
+class CommonState(TypedDict):
+    """
+    CommonState is a TypedDict that defines the structure of the state
+    used in the CommonStep class.
+    """
+    llm: Optional[BaseChatOpenAI]
+    step_output_callback: Optional[Callable]
+class CommonStep(ABC):
+    """
+    CommonStep is a base class for defining common steps in a workflow.
+    It provides methods to execute the step and handle exceptions.
+    """
+    def __init__(self):
+        super().__init__()
+        self.step_name = ""
+    def enter_step(self, state):
+        if state["step_output_callback"] is None:
+            return
+        state["step_output_callback"](
+            step_name=self.step_name,
+        )
+    def leave_step(self, state, token_usage: Optional[dict[str, int]] = None):
+        if state["step_output_callback"] is None:
+            return
+        if token_usage is not None:
+            state["step_output_callback"](token_usage=token_usage)
+    def execute(self, state):
+        """
+        Execute the step. This method should be overridden by subclasses.
+        """
+        self.enter_step(state)
+        state, token_usage = self._execute_directly(state)
+        self.leave_step(state, token_usage)
+        return state
+    def _print_step(
+        self,
+        state,
+        step_name: str | None = None,
+        step_output: str | None = None,
+        token_usage: dict | object | None = None,
+    ):
+        step_callback = state["step_output_callback"]
+        if step_callback is None:
+            return
+        # convert token_usage to dict
+        if token_usage is not None and not isinstance(token_usage, dict):
+            token_usage = vars(token_usage)
+            # In case token_usage.total_tokens is 0
+            token_usage = { **DEFAULT_TOKEN_USAGE, **token_usage }
+        step_callback(
+            step_name=step_name,
+            step_output=step_output,
+            token_usage=token_usage,
+        )
+    @abstractmethod
+    def _execute_directly(self, state) -> tuple[dict, dict[str, int]]:
+        """
+        Execute the step directly. This method should be overridden by subclasses.
+        Args:
+            state (CommonState): The state of the workflow.
+        Returns:
+            tuple[dict, dict[str, int]]: The updated state and token usage.
+        """
+        pass

bioguider/agents/dockergeneration_execute_step.py ADDED Viewed

@@ -0,0 +1,186 @@
+import logging
+from langchain_openai.chat_models.base import BaseChatOpenAI
+from langchain.tools import BaseTool
+from langchain.agents import create_react_agent, AgentExecutor
+from langchain_community.callbacks.openai_info import OpenAICallbackHandler
+from bioguider.utils.constants import DEFAULT_TOKEN_USAGE
+from bioguider.agents.agent_utils import (
+    CustomPromptTemplate,
+    CustomOutputParser,
+)
+from bioguider.agents.peo_common_step import PEOCommonStep
+from bioguider.agents.dockergeneration_task_utils import (
+    DockerGenerationWorkflowState,
+    generate_Dockerfile_tool,
+)
+logger = logging.getLogger(__name__)
+DOCKERGENERATION_EXECUTION_SYSTEM_PROMPT = """You are an expert in software containerization and reproducibility engineering.
+You are given a **plan** and must complete it strictly using Python code and the available tools.
+---
+### **Available Tools**
+{tools}
+---
+### **Your Task**
+Follow the given plan step by step using the exact format below:
+```
+Thought: Describe what you are thinking or planning to do next.
+Action: The tool you are going to use (must be one of: {tool_names})
+Action Input: The input to the selected action
+Observation: The result returned by the action
+```
+You may repeat the **Thought → Action → Action Input → Observation** loop as needed.
+Once all steps in the plan have been executed, end the loop and output all the results and generated Dockerfile using this format:
+```
+Thought: I have completed the plan.
+Final Answer:
+Action: {{tool_name}}
+Action Input: {{file_name1}}
+Action Observation: {{Observation1}}
+---
+Action: {{tool_name}}
+Action Input: {{file_name2}}
+Action Observation: {{Observation2}}
+---
+**Dockerfile file name**: {{docker file path}}
+...
+```
+---
+### **Important Notes**
+- You must strictly follow the provided plan.
+- **Do not take any additional or alternative actions**, even if:
+  - No relevant result is found
+  - The file content is missing, empty, or irrelevant
+- If no information is found in a step, simply proceed to the next action in the plan without improvising.
+- Only use the tools specified in the plan actions. No independent decisions or extra steps are allowed.
+---
+### **Plan**
+{plan_actions}
+### **Plan Thoughts**
+{plan_thoughts}
+### **Actions Already Taken**
+{agent_scratchpad}
+---
+{input}
+---
+"""
+class DockerGenerationExecuteStep(PEOCommonStep):
+    def __init__(
+        self,
+        llm: BaseChatOpenAI,
+        repo_path: str,
+        repo_structure: str,
+        gitignore_path: str,
+        custom_tools: list[BaseTool] | None = None,
+    ):
+        super().__init__(llm)
+        self.step_name = "Docker Generation Execute Step"
+        self.repo_path = repo_path
+        self.repo_structure = repo_structure
+        self.gitignore_path = gitignore_path
+        self.custom_tools = custom_tools if custom_tools is not None else []
+        self.generate_tool: generate_Dockerfile_tool | None = None
+    def set_generate_Dockerfile_tool(self, tool: generate_Dockerfile_tool):
+        self.generate_tool = tool
+    def _execute_directly(self, state: DockerGenerationWorkflowState):
+        plan_actions = state["plan_actions"]
+        plan_thoughts = state["plan_thoughts"]
+        step_output = state["step_output"] if "step_output" in state and \
+            state["step_output"] is not None else "N/A"
+        step_dockerfile_content = state["step_dockerfile_content"] if "step_dockerfile_content" in state and \
+            state["step_dockerfile_content"] is not None else "N/A"
+        self.generate_tool.set_intermediate_output(
+            plan_thoughts=plan_thoughts,
+            step_error=step_output,
+            step_dockerfile_content=step_dockerfile_content,
+        )
+        prompt = CustomPromptTemplate(
+            template=DOCKERGENERATION_EXECUTION_SYSTEM_PROMPT,
+            tools=self.custom_tools,
+            plan_actions=plan_actions,
+            input_variables=[
+                "tools", "tool_names", "agent_scratchpad",
+                "intermediate_steps", "plan_actions", "plan_thoughts",
+            ],
+        )
+        output_parser = CustomOutputParser()
+        agent = create_react_agent(
+            llm = self.llm,
+            tools = self.custom_tools,
+            prompt = prompt,
+            output_parser=output_parser,
+            stop_sequence=["\nObservation:"],
+        )
+        callback_handler = OpenAICallbackHandler()
+        agent_executor = AgentExecutor(
+            agent=agent,
+            tools=self.custom_tools,
+            max_iterations=10,
+        )
+        response = agent_executor.invoke(
+            input={
+                "plan_actions": plan_actions,
+                "plan_thoughts": plan_thoughts,
+                "input": "Now, let's begin."
+            },
+            config={
+                "callbacks": [callback_handler],
+                "recursion_limit": 20,
+            }
+        )
+        if "output" in response:
+            output = response["output"]
+            self._print_step(state, step_output=f"**Execute Output:** \n{output}")
+            if "**Final Answer**" in output:
+                final_answer = output.split("**Final Answer:**")[-1].strip().strip(":")
+                step_output = final_answer
+            elif "Final Answer" in output:
+                final_answer = output.split("Final Answer")[-1].strip().strip(":")
+                step_output = final_answer
+            else:
+                step_output = output
+            self._print_step(state, step_output=step_output)
+            state["step_output"] = step_output
+            if "**Dockerfile file name**" in step_output:
+                dockerfile: str = step_output.split("**Dockerfile file name**")[-1]
+                dockerfile = dockerfile.strip().strip(":")
+                dockerfile = dockerfile.strip("```").strip()
+                state["dockerfile"] = dockerfile
+            else:
+                state["dockerfile"] = None
+            # state["dockerfile"] = f"demo-bioguider-{docker_id}.Dockerfile"
+        else:
+            logger.error("No output found in the response.")
+            self._print_step(
+                state,
+                step_output="Error: No output found in the response.",
+            )
+            state["step_output"] = "Error: No output found in the response."
+        token_usage = vars(callback_handler)
+        token_usage = {**DEFAULT_TOKEN_USAGE, **token_usage}
+        return state, token_usage