PyPI - bioguider - Versions diffs - 0.2.19__tar.gz → 0.2.21__tar.gz - Mend

bioguider 0.2.19tar.gz → 0.2.21tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of bioguider might be problematic. Click here for more details.

Files changed (77) hide show

{bioguider-0.2.19 → bioguider-0.2.21}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: bioguider
-Version: 0.2.19
+Version: 0.2.21
 Summary: An AI-Powered package to help biomedical developers to generate clear documentation
 License: MIT
 Author: Cankun Wang

{bioguider-0.2.19 → bioguider-0.2.21}/bioguider/agents/agent_utils.py RENAMED Viewed

@@ -2,6 +2,7 @@
 import json
 from json import JSONDecodeError
 import os
+from pathlib import Path
 import re
 from typing import List, Optional, Tuple, Union
 from langchain_openai import AzureChatOpenAI
@@ -22,6 +23,7 @@ from pydantic import BaseModel, Field
 from bioguider.utils.constants import DEFAULT_TOKEN_USAGE, MAX_FILE_LENGTH, MAX_SENTENCE_NUM
 from bioguider.utils.file_utils import get_file_type
+from bioguider.utils.utils import clean_action_input
 from ..utils.gitignore_checker import GitignoreChecker
 from ..database.summarized_file_db import SummarizedFilesDb
 from bioguider.agents.common_conversation import CommonConversation
@@ -122,16 +124,18 @@ def pretty_print(message, printout = True):
 HUGE_FILE_LENGTH = 10 * 1024 # 10K
 def read_file(
-    file_path: str,
+    file_path: str | Path,
 ) -> str | None:
+    file_path = str(file_path).strip()
     if not os.path.isfile(file_path):
         return None
     with open(file_path, 'r') as f:
         content = f.read()
         return content
-def write_file(file_path: str, content: str):
+def write_file(file_path: str | Path, content: str):
     try:
+        file_path = str(file_path).strip()
         with open(file_path, "w") as fobj:
             fobj.write(content)
             return True
@@ -140,10 +144,11 @@ def write_file(file_path: str, content: str):
         return False
 def read_directory(
-    dir_path: str,
+    dir_path: str | Path,
     gitignore_path: str,
     level: int=1,
 ) -> list[str] | None:
+    dir_path = str(dir_path).strip()
     if not os.path.isdir(dir_path):
         return None
     gitignore_checker = GitignoreChecker(
@@ -182,15 +187,16 @@ Now, let's start to summarize.
 def summarize_file(
     llm: BaseChatOpenAI,
-    name: str,
+    name: str | Path,
     content: str | None = None,
     level: int = 3,
     summary_instructions: str | None = None,
     summarize_prompt: str = "N/A",
     db: SummarizedFilesDb | None = None,
 ) -> Tuple[str, dict]:
+    name = str(name).strip()
     if content is None:
-        try:
+        try:
             with open(name, "r") as fobj:
                 content = fobj.read()
         except Exception as e:
@@ -289,8 +295,7 @@ class CustomOutputParser(AgentOutputParser):
         action_input = match.group(2)
         # Return the action and action input
         action_dict = None
-        action_input = action_input.strip(" ").strip('"')
-        action_input_replaced = action_input.replace("'", '"')
+        action_input_replaced = clean_action_input(action_input)
         try:
             action_dict = json.loads(action_input_replaced)
         except json.JSONDecodeError:
@@ -299,10 +304,11 @@ class CustomOutputParser(AgentOutputParser):
             # try using ast to parse input string
             import ast
             try:
-                action_dict = ast.literal_eval(action_input)
+                action_dict = ast.literal_eval(action_input_replaced)
                 if not isinstance(action_dict, dict):
                     action_dict = None
             except Exception as e:
+                logger.error(f"Error parsing action input: {action_input} -> {action_input_replaced}\n{e}")
                 pass
         return AgentAction(
             tool=action,
@@ -408,8 +414,10 @@ def read_license_file(repo_path: str) -> tuple[str | None, str|None]:
     ]
     license_files = []
     for file in hardcoded_license_files:
-        if os.path.exists(os.path.join(repo_path, file)):
-            with open(os.path.join(repo_path, file), "r") as f:
+        file_path = os.path.join(str(repo_path), file)
+        file_path = file_path.strip()
+        if os.path.exists(file_path):
+            with open(file_path, "r") as f:
                 license_files.append((f.read(), os.path.join(repo_path, file)))
     max_item = max(license_files, key=lambda x: len(x[0])) if len(license_files) > 0 else (None, None)

{bioguider-0.2.19 → bioguider-0.2.21}/bioguider/agents/collection_execute_step.py RENAMED Viewed

@@ -144,7 +144,7 @@ class CollectionExecuteStep(PEOCommonStep):
         agent_executor = AgentExecutor(
             agent=agent,
             tools=self.custom_tools,
-            max_iterations=10,
+            max_iterations=30,
         )
         response = agent_executor.invoke(
             input={"plan_actions": plan_actions, "input": "Now, let's begin."},

{bioguider-0.2.19 → bioguider-0.2.21}/bioguider/agents/collection_observe_step.py RENAMED Viewed

@@ -94,8 +94,13 @@ class CollectionObserveStep(PEOCommonStep):
         )
     def _execute_directly(self, state: CollectionWorkflowState):
         step_count = state["step_count"]
-        instruction = "Now, we have reached max recursion limit, please give me the **final answer** based on the current information" \
-            if step_count == MAX_STEP_COUNT/3 - 2 else "Let's begin thinking."
+        plan = state["plan_actions"]
+        plan = plan.strip()
+        if len(plan) == 0:
+            instruction = "No plan provided, please let's generate the final answer based on the current information."
+        else:
+            instruction = "Now, we have reached max recursion limit, please give me the **final answer** based on the current information" \
+                if step_count == MAX_STEP_COUNT/3 - 2 else "Let's begin thinking."
         system_prompt = self._build_prompt(state)
         agent = CommonAgentTwoSteps(llm=self.llm)
         res, _, token_usage, reasoning_process = agent.go(

{bioguider-0.2.19 → bioguider-0.2.21}/bioguider/agents/collection_task_utils.py RENAMED Viewed

@@ -89,6 +89,7 @@ Returns:
     def run(self, file_path: str) -> str:
         if not self.repo_path in file_path:
             file_path = os.path.join(self.repo_path, file_path)
+        file_path = file_path.strip()
         if not os.path.isfile(file_path):
             return "Can't read file"

{bioguider-0.2.19 → bioguider-0.2.21}/bioguider/agents/common_conversation.py RENAMED Viewed

@@ -19,8 +19,26 @@ class CommonConversation:
             callbacks=[callback_handler]
         )
         response = result.generations[0][0].text
-        token_usage = result.llm_output.get("token_usage")
-        return response, vars(token_usage)
+        # Try to normalize token usage across providers
+        token_usage = {}
+        try:
+            if hasattr(result, "llm_output") and result.llm_output is not None:
+                raw = result.llm_output.get("token_usage") or result.llm_output.get("usage")
+                if isinstance(raw, dict):
+                    token_usage = {
+                        "total_tokens": raw.get("total_tokens") or raw.get("total"),
+                        "prompt_tokens": raw.get("prompt_tokens") or raw.get("prompt"),
+                        "completion_tokens": raw.get("completion_tokens") or raw.get("completion"),
+                    }
+        except Exception:
+            pass
+        if not token_usage:
+            token_usage = {
+                "total_tokens": getattr(callback_handler, "total_tokens", 0),
+                "prompt_tokens": getattr(callback_handler, "prompt_tokens", 0),
+                "completion_tokens": getattr(callback_handler, "completion_tokens", 0),
+            }
+        return response, token_usage
     def generate_with_schema(self, system_prompt: str, instruction_prompt: str, schema: any):
         system_prompt = escape_braces(system_prompt)

bioguider-0.2.21/bioguider/agents/consistency_collection_step.py ADDED Viewed

@@ -0,0 +1,100 @@
+from langchain.prompts import ChatPromptTemplate
+from langchain_openai.chat_models.base import BaseChatOpenAI
+from pydantic import BaseModel, Field
+from bioguider.agents.common_agent_2step import CommonAgentTwoSteps
+from bioguider.agents.consistency_evaluation_task_utils import ConsistencyEvaluationState
+from bioguider.agents.peo_common_step import PEOCommonStep
+CONSISTANCY_COLLECTION_SYSTEM_PROMPT = """
+### **Goal**
+You are an expert developer specializing in the biomedical domain.
+You will be given a user guide/API documentation. Your task is to collect all the functions, classes, and methods that the user guide/API documentation mentions.
+---
+### **Input User Guide/API Documentation**
+{user_guide_api_documentation}
+### **Output Format**
+The collected functions, classes, and methods **must exactly match** the following format, **do not** make up anything:
+```
+name: <function/class/method name>
+file_path: <file path, if not sure, just put "N/A">
+parameters: <parameters, if not sure, just put "N/A">
+parent: <parent name, if it is a class method, put the class name as the parent name, if not sure, just put "N/A">
+...
+```
+---
+### **Output Example**
+```
+name: __init__
+file_path: src/agents/common_agent.py
+parameters: llm, step_output_callback, summarized_files_db
+parent: CommonAgent
+name: _invoke_agent
+file_path: src/agents/common_agent.py
+parameters: system_prompt, instruction_prompt, schema, post_process
+parent: CommonAgent
+...
+```
+"""
+class ConsistencyCollectionResult(BaseModel):
+    functions_and_classes: list[dict] = Field(description="A list of functions and classes that the user guide/API documentation mentions")
+ConsistencyCollectionResultJsonSchema = {
+  "properties": {
+    "functions_and_classes": {
+      "description": "A list of functions and classes that the user guide/API documentation mentions",
+      "items": {
+        "type": "object"
+      },
+      "title": "Functions And Classes",
+      "type": "array"
+    }
+  },
+  "required": [
+    "functions_and_classes"
+  ],
+  "title": "ConsistencyCollectionResult",
+  "type": "object"
+}
+class ConsistencyCollectionStep(PEOCommonStep):
+    def __init__(self, llm: BaseChatOpenAI):
+        super().__init__(llm)
+        self.step_name = "Consistency Collection Step"
+    def _prepare_system_prompt(self, state: ConsistencyEvaluationState) -> str:
+        user_guide_api_documentation = state["user_guide_api_documentation"]
+        return ChatPromptTemplate.from_template(CONSISTANCY_COLLECTION_SYSTEM_PROMPT).format(
+            user_guide_api_documentation=user_guide_api_documentation,
+        )
+    def _execute_directly(self, state: ConsistencyEvaluationState) -> tuple[dict, dict[str, int]]:
+        system_prompt = self._prepare_system_prompt(state)
+        agent = CommonAgentTwoSteps(llm=self.llm)
+        res, _, token_usage, reasoning_process = agent.go(
+            system_prompt=system_prompt,
+            instruction_prompt="Now, let's begin the consistency collection step.",
+            schema=ConsistencyCollectionResultJsonSchema,
+        )
+        res: ConsistencyCollectionResult = ConsistencyCollectionResult.model_validate(res)
+        state["functions_and_classes"] = res.functions_and_classes
+        self._print_step(state, step_output=f"Consistency Collection Result: {res.functions_and_classes}")
+        self._print_step(state, step_output=f"Consistency Collection Reasoning Process: {reasoning_process}")
+        return state, token_usage

bioguider-0.2.21/bioguider/agents/consistency_evaluation_task.py ADDED Viewed

@@ -0,0 +1,56 @@
+from typing import Callable
+from langchain_openai.chat_models.base import BaseChatOpenAI
+from pydantic import BaseModel
+from bioguider.agents.consistency_evaluation_task_utils import ConsistencyEvaluationState
+from bioguider.database.code_structure_db import CodeStructureDb
+from .consistency_collection_step import ConsistencyCollectionStep
+from .consistency_query_step import ConsistencyQueryStep
+from .consistency_observe_step import ConsistencyObserveStep
+class ConsistencyEvaluationResult(BaseModel):
+    score: str
+    assessment: str
+    development: list[str]
+    strengths: list[str]
+class ConsistencyEvaluationTask:
+    def __init__(
+        self,
+        llm: BaseChatOpenAI,
+        code_structure_db: CodeStructureDb,
+        step_callback: Callable | None = None
+    ):
+        self.llm = llm
+        self.code_structure_db = code_structure_db
+        self.step_callback = step_callback
+    def evaluate(self, user_guide_api_documentation: str) -> ConsistencyEvaluationResult:
+        collection_step = ConsistencyCollectionStep(llm=self.llm)
+        query_step = ConsistencyQueryStep(code_structure_db=self.code_structure_db)
+        observe_step = ConsistencyObserveStep(llm=self.llm)
+        state = ConsistencyEvaluationState(
+            user_guide_api_documentation=user_guide_api_documentation,
+            step_output_callback=self.step_callback,
+        )
+        state = collection_step.execute(state)
+        state = query_step.execute(state)
+        state = observe_step.execute(state)
+        score = state["consistency_score"]
+        assessment = state["consistency_assessment"]
+        development = state["consistency_development"]
+        strengths = state["consistency_strengths"]
+        return ConsistencyEvaluationResult(
+            score=score,
+            assessment=assessment,
+            development=development,
+            strengths=strengths,
+        )

bioguider-0.2.21/bioguider/agents/consistency_evaluation_task_utils.py ADDED Viewed

@@ -0,0 +1,13 @@
+from typing import Callable, Optional, TypedDict
+class ConsistencyEvaluationState(TypedDict):
+    user_guide_api_documentation: str
+    step_output_callback: Optional[Callable]
+    functions_and_classes: Optional[list[dict]]
+    all_query_rows: Optional[list[any]]
+    consistency_score: Optional[str]
+    consistency_assessment: Optional[str]
+    consistency_development: Optional[list[str]]
+    consistency_strengths: Optional[list[str]]

bioguider-0.2.21/bioguider/agents/consistency_observe_step.py ADDED Viewed

@@ -0,0 +1,107 @@
+from langchain.prompts import ChatPromptTemplate
+from langchain_openai.chat_models.base import BaseChatOpenAI
+from pydantic import BaseModel, Field
+from bioguider.agents.common_agent_2step import CommonAgentTwoSteps
+from bioguider.agents.consistency_evaluation_task_utils import ConsistencyEvaluationState
+from bioguider.agents.peo_common_step import PEOCommonStep
+CONSISTENCY_OBSERVE_SYSTEM_PROMPT = """
+You are an expert developer specializing in the biomedical domain.
+Your task is to analyze both:
+1. the provided file related to user guide/API documentation,
+2. the code definitions related to the user guide/API documentation
+and generate a structured consistency assessment based on the following criteria.
+---
+### **Evaluation Criteria**
+**Consistency**:
+  * **Score**: [Poor / Fair / Good / Excellent]
+  * **Assessment**: [Your evaluation of whether the user guide/API documentation is consistent with the code definitions]
+  * **Development**: [A list of inconsistent function/class/method name and inconsistent docstring, and describe how they are inconsistent]
+  * **Strengths**: [A list of strengths of the user guide/API documentation on consistency]
+---
+### **Output Format**
+Your output **must exactly match** the following format:
+```
+**Consistency**:
+  * **Score**: [Poor / Fair / Good / Excellent]
+  * **Assessment**: [Your evaluation of whether the user guide/API documentation is consistent with the code definitions]
+  * **Development**: [A list of inconsistent function/class/method name and inconsistent docstring, and describe how they are inconsistent]
+  * **Strengths**: [A list of strengths of the user guide/API documentation on consistency]
+```
+### **Output Example**
+```
+**Consistency**:
+  * **Assessment**: [Your evaluation of whether the user guide/API documentation is consistent with the code definitions]
+  * **Development**:
+    - Inconsistent function/class/method name 1
+    - Inconsistent docstring 1
+    - Inconsistent function/class/method name 2
+    - Inconsistent docstring 2
+    - ...
+  * **Strengths**:
+    - Strengths 1
+    - Strengths 2
+    - ...
+```
+---
+### **Input User Guide/API Documentation**
+{user_guide_api_documentation}
+### **Code Definitions**
+{code_definitions}
+"""
+class ConsistencyEvaluationObserveResult(BaseModel):
+    consistency_score: str=Field(description="A string value, could be `Poor`, `Fair`, `Good`, or `Excellent`")
+    consistency_assessment: str=Field(description="Your evaluation of whether the user guide/API documentation is consistent with the code definitions")
+    consistency_development: list[str]=Field(description="A list of inconsistent function/class/method name and inconsistent docstring")
+    consistency_strengths: list[str]=Field(description="A list of strengths of the user guide/API documentation on consistency")
+class ConsistencyObserveStep(PEOCommonStep):
+    def __init__(self, llm: BaseChatOpenAI):
+        super().__init__(llm)
+        self.step_name = "Consistency Observe Step"
+    def _prepare_system_prompt(self, state: ConsistencyEvaluationState):
+        all_query_rows = state["all_query_rows"]
+        user_guide_api_documentation = state["user_guide_api_documentation"]
+        code_definition = ""
+        for row in all_query_rows:
+            content = f"name: {row['name']}\nfile_path: {row['path']}\nparent: {row['parent']}\nparameters: {row['params']}\ndoc_string: {row['doc_string']}"
+            code_definition += content
+            code_definition += "\n\n\n"
+        return ChatPromptTemplate.from_template(CONSISTENCY_OBSERVE_SYSTEM_PROMPT).format(
+            code_definitions=code_definition,
+            user_guide_api_documentation=user_guide_api_documentation,
+        )
+    def _execute_directly(self, state: ConsistencyEvaluationState):
+        system_prompt = self._prepare_system_prompt(state)
+        agent = CommonAgentTwoSteps(llm=self.llm)
+        res, _, token_usage, reasoning_process = agent.go(
+            system_prompt=system_prompt,
+            instruction_prompt="Now, let's begin the consistency evaluation step.",
+            schema=ConsistencyEvaluationObserveResult,
+        )
+        res: ConsistencyEvaluationObserveResult = res
+        state["consistency_score"] = res.consistency_score
+        state["consistency_assessment"] = res.consistency_assessment
+        state["consistency_development"] = res.consistency_development
+        state["consistency_strengths"] = res.consistency_strengths
+        return state, token_usage

bioguider-0.2.21/bioguider/agents/consistency_query_step.py ADDED Viewed

@@ -0,0 +1,74 @@
+from bioguider.agents.common_step import CommonStep
+from bioguider.agents.consistency_evaluation_task_utils import ConsistencyEvaluationState
+from bioguider.database.code_structure_db import CodeStructureDb
+from bioguider.utils.constants import DEFAULT_TOKEN_USAGE
+class ConsistencyQueryStep(CommonStep):
+    def __init__(self, code_structure_db: CodeStructureDb):
+        super().__init__()
+        self.step_name = "Consistency Query Step"
+        self.code_structure_db = code_structure_db
+    def _execute_directly(self, state: ConsistencyEvaluationState):
+        functions_and_classes = state["functions_and_classes"]
+        all_rows: list[any] = []
+        for function_or_class in functions_and_classes:
+            function_or_class_name = function_or_class["name"]
+            function_or_class_file_path = function_or_class["file_path"]
+            function_or_class_parameters = function_or_class["parameters"]
+            function_or_class_parent = function_or_class["parent"]
+            self._print_step(state, step_output=(
+                f"Consistency Query Step: \n{function_or_class_name},\n"
+                f" {function_or_class_file_path},\n"
+                f" {function_or_class_parameters},\n"
+                f" {function_or_class_parent}"
+            ))
+            file_path = None
+            parent = None
+            name = None
+            if "file_path" in function_or_class and function_or_class["file_path"] != "N/A":
+                file_path = function_or_class["file_path"]
+            if "parent" in function_or_class and function_or_class["parent"] != "N/A":
+                parent = function_or_class["parent"]
+            if "name" in function_or_class and function_or_class["name"] != "N/A":
+                name = function_or_class["name"]
+            rows: list[any] | None = None
+            if name is None:
+                if file_path is not None:
+                    rows = self.code_structure_db.select_by_path(file_path)
+                elif parent is not None:
+                    rows = self.code_structure_db.select_by_parent(parent)
+            else:
+                if file_path is not None and parent is not None:
+                    rows = self.code_structure_db.select_by_name_and_parent_and_path(name, parent, file_path)
+                    if rows is None or len(rows) == 0:
+                        rows = self.code_structure_db.select_by_name_and_path(name, file_path)
+                    if rows is None or len(rows) == 0:
+                        rows = self.code_structure_db.select_by_name_and_parent(name, parent)
+                    if rows is None or len(rows) == 0:
+                        rows = self.code_structure_db.select_by_name(name)
+                elif file_path is not None:
+                    rows = self.code_structure_db.select_by_name_and_path(name, file_path)
+                    if rows is None or len(rows) == 0:
+                        rows = self.code_structure_db.select_by_name(name)
+                elif parent is not None:
+                    rows = self.code_structure_db.select_by_name_and_parent(name, parent)
+                    if rows is None or len(rows) == 0:
+                        rows = self.code_structure_db.select_by_name(name)
+                else:
+                    rows = self.code_structure_db.select_by_name(name)
+            if rows is None or len(rows) == 0:
+                self._print_step(state, step_output=f"No such function or class {name}")
+                continue
+            all_rows.extend(rows)
+        state["all_query_rows"] = all_rows
+        return state, {**DEFAULT_TOKEN_USAGE}

{bioguider-0.2.19 → bioguider-0.2.21}/bioguider/agents/evaluation_task.py RENAMED Viewed

@@ -184,7 +184,7 @@ class EvaluationTask(ABC):
             token_usage=token_usage,
         )
-    def evaluate(self) -> dict:
+    def evaluate(self) -> tuple[dict, list[str]]:
         self._enter_evaluation()
         files = self._collect_files()
         evaluations, token_usage, files = self._evaluate(files)
@@ -198,7 +198,7 @@ class EvaluationTask(ABC):
         self.print_step(token_usage=token_usage)
     @abstractmethod
-    def _evaluate(self, files: list[str]) -> tuple[dict, dict]:
+    def _evaluate(self, files: list[str]) -> tuple[dict, dict, list[str]]:
         pass
     @abstractmethod

bioguider 0.2.19__tar.gz → 0.2.21__tar.gz

Potentially problematic release.

bioguider 0.2.19tar.gz → 0.2.21tar.gz