PyPI - bioguider - Versions diffs - 0.2.7__tar.gz → 0.2.9__tar.gz - Mend

bioguider 0.2.7tar.gz → 0.2.9tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of bioguider might be problematic. Click here for more details.

Files changed (48) hide show

{bioguider-0.2.7 → bioguider-0.2.9}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: bioguider
-Version: 0.2.7
+Version: 0.2.9
 Summary: An AI-Powered package to help biomedical developers to generate clear documentation
 License: MIT
 Author: Cankun Wang

{bioguider-0.2.7 → bioguider-0.2.9}/bioguider/agents/agent_tools.py RENAMED Viewed

@@ -51,12 +51,22 @@ Returns:
         return content
 class summarize_file_tool(agent_tool):
-    """ read and summarize the file
-Args:
-    file_path str: file path
-Returns:
-    A string of summarized file content, if the file does not exist, return None.
-        """
+    """ Read a file and generate a summary according to a specified prompt.
+Arguments
+----------
+    file_path : str, required
+        Path to the file to read.
+    summarize_prompt : str, optional
+        Instruction guiding the summarization focus (default is "N/A").
+        Use this to emphasize specific aspects of the content.
+Returns
+-------
+    str or None
+        A summarized version of the file content.
+        Returns None if the file does not exist or cannot be read.
+    """
     def __init__(
         self,
         llm: BaseChatOpenAI,
@@ -75,27 +85,31 @@ Returns:
         self.summary_file_db = db
         self.summarize_instruction = summaize_instruction
-    def _retrive_from_summary_file_db(self, file_path: str) -> str | None:
+    def _retrive_from_summary_file_db(self, file_path: str, prompt: str = "N/A") -> str | None:
         if self.summary_file_db is None:
             return None
         return self.summary_file_db.select_summarized_text(
             file_path=file_path,
             instruction=self.summarize_instruction,
             summarize_level=self.detailed_level,
+            summarize_prompt=prompt,
         )
-    def _save_to_summary_file_db(self, file_path: str, summarized_text: str, token_usage: dict):
+    def _save_to_summary_file_db(self, file_path: str, prompt: str, summarized_text: str, token_usage: dict):
         if self.summary_file_db is None:
             return
         self.summary_file_db.upsert_summarized_file(
             file_path=file_path,
             instruction=self.summarize_instruction,
             summarize_level=self.detailed_level,
+            summarize_prompt=prompt,
             summarized_text=summarized_text,
             token_usage=token_usage,
         )
-    def run(self, file_path: str) -> str | None:
+    def run(self, file_path: str, summarize_prompt: str = "N/A") -> str | None:
         if file_path is None:
             return None
+        if summarize_prompt is None or len(summarize_prompt) == 0:
+            summarize_prompt = "N/A"
         file_path = file_path.strip()
         abs_file_path = file_path
@@ -104,7 +118,8 @@ Returns:
         if not os.path.isfile(abs_file_path):
             return f"{file_path} is not a file."
         summarized_content = self._retrive_from_summary_file_db(
-            file_path=file_path
+            file_path=file_path,
+            prompt=summarize_prompt,
         )
         if summarized_content is not None:
             return f"summarized content of file {file_path}: " + summarized_content
@@ -114,9 +129,11 @@ Returns:
         summarized_content, token_usage = summarize_file(
             self.llm, abs_file_path, file_content, self.detailed_level,
             summary_instructions=self.summarize_instruction,
+            summarize_prompt=summarize_prompt,
         )
         self._save_to_summary_file_db(
             file_path=file_path,
+            prompt=summarize_prompt,
             summarized_text=summarized_content,
             token_usage=token_usage,
         )

{bioguider-0.2.7 → bioguider-0.2.9}/bioguider/agents/agent_utils.py RENAMED Viewed

@@ -153,13 +153,28 @@ def read_directory(
 EVALUATION_SUMMARIZE_FILE_PROMPT = ChatPromptTemplate.from_template("""
-You are provided with the content of the file **{file_name}**:
-```
-{file_content}
-```
+You will be provided with the content of the file **{file_name}**:
+---
 ### **Summary Instructions**
 {summary_instructions}
 The content is lengthy. Please generate a concise summary ({sentence_num1}-{sentence_num2} sentences).
+---
+### **Important Instructions**
+{summarize_prompt}
+---
+### **File Content**
+Here is the file content:
+{file_content}
+---
+Now, let's start to summarize.
 """)
 MAX_FILE_LENGTH=20 *1024 # 20K
@@ -170,6 +185,7 @@ def summarize_file(
     content: str | None = None,
     level: int = 3,
     summary_instructions: str | None = None,
+    summarize_prompt: str = "N/A",
     db: SummarizedFilesDb | None = None,
 ) -> Tuple[str, dict]:
     if content is None:
@@ -198,6 +214,7 @@ def summarize_file(
         summary_instructions=summary_instructions \
             if summary_instructions is not None and len(summary_instructions) > 0 \
             else "N/A",
+        summarize_prompt=summarize_prompt,
     )
     config = {"recursion_limit": 500}
@@ -210,7 +227,12 @@ def summarize_file(
     }
     if db is not None:
         db.upsert_summarized_file(
-            name, summary_instructions, level, token_usage
+            file_path=name,
+            instruction=summary_instructions,
+            summarize_level=level,
+            summarize_prompt=summarize_prompt,
+            summarized_text=out,
+            token_usage=token_usage,
         )
     return out, token_usage

{bioguider-0.2.7 → bioguider-0.2.9}/bioguider/agents/collection_execute_step.py RENAMED Viewed

@@ -62,11 +62,13 @@ Action Observation: {{Observation2}}
 ```
 Action: summarize_file_tool
 Action Input: README.md
+Action Input: "Please extract license information in summarized file content."
 Observation: # BioGuider\nBioGuider is a Python package for bioinformatics.\n...
 ...
 Final Answer:
 Action: summarize_file_tool
 Action Input: README.md
+Action Input: "N/A"
 Action Observation: # BioGuider\nBioGuider is a Python package for bioinformatics.\n...
 ---
 Action: check_file_related_tool

{bioguider-0.2.7 → bioguider-0.2.9}/bioguider/agents/collection_task.py RENAMED Viewed

@@ -61,6 +61,46 @@ class CollectionTask(AgentTask):
         self.tools: list[any] | None = None
         self.custom_tools: list[Tool] | None = None
+    def _prepare_tools(self, related_file_goal_item_desc):
+        tool_rd = read_directory_tool(repo_path=self.repo_path)
+        tool_sum = summarize_file_tool(
+            llm=self.llm,
+            repo_path=self.repo_path,
+            output_callback=self.step_callback,
+            db=self.summary_file_db,
+        )
+        tool_rf = read_file_tool(repo_path=self.repo_path)
+        tool_cf = check_file_related_tool(
+            llm=self.llm,
+            repo_path=self.repo_path,
+            goal_item_desc=related_file_goal_item_desc,
+            output_callback=self.step_callback,
+        )
+        self.tools = [tool_rd, tool_sum, tool_rf, tool_cf]
+        self.custom_tools = [
+            Tool(
+                name = tool_rd.__class__.__name__,
+                func = tool_rd.run,
+                description=tool_rd.__class__.__doc__,
+            ),
+            StructuredTool.from_function(
+                tool_sum.run,
+                description=tool_sum.__class__.__doc__,
+                name=tool_sum.__class__.__name__,
+            ),
+            Tool(
+                name = tool_rf.__class__.__name__,
+                func = tool_rf.run,
+                description=tool_rf.__class__.__doc__,
+            ),
+            Tool(
+                name = tool_cf.__class__.__name__,
+                func = tool_cf.run,
+                description=tool_cf.__class__.__doc__,
+            ),
+        ]
+        self.custom_tools.append(CustomPythonAstREPLTool())
     def _initialize(self):
         # initialize the 2-level file structure of the repo
         if not os.path.exists(self.repo_path):
@@ -76,28 +116,8 @@ class CollectionTask(AgentTask):
             goal_item=collection_item["goal_item"],
             related_file_description=collection_item["related_file_description"],
         )
-        self.tools = [
-            read_directory_tool(repo_path=self.repo_path),
-            summarize_file_tool(
-                llm=self.llm,
-                repo_path=self.repo_path,
-                output_callback=self.step_callback,
-                db=self.summary_file_db,
-            ),
-            read_file_tool(repo_path=self.repo_path),
-            check_file_related_tool(
-                llm=self.llm,
-                repo_path=self.repo_path,
-                goal_item_desc=related_file_goal_item_desc,
-                output_callback=self.step_callback,
-            ),
-        ]
-        self.custom_tools = [Tool(
-            name=tool.__class__.__name__,
-            func=tool.run,
-            description=tool.__class__.__doc__,
-        ) for tool in self.tools]
-        self.custom_tools.append(CustomPythonAstREPLTool())
+        self._prepare_tools(related_file_goal_item_desc)
         self.steps = [
             CollectionPlanStep(
                 llm=self.llm,

{bioguider-0.2.7 → bioguider-0.2.9}/bioguider/agents/evaluation_installation_task.py RENAMED Viewed

@@ -52,14 +52,12 @@ Please assess the installation information using the following criteria. For eac
 Your response **must exactly follow** the structure below:
-```
 **FinalAnswer**
 **Overall Score:** [Poor / Fair / Good / Excellent]
 **Ease of Access:** <your comments>
 **Clarity of Dependency Specification:** <your comments>
 **Hardware Requirements:** <your comments>
 **Installation Guide:** <your comments>
-```
 ---
@@ -118,6 +116,7 @@ class EvaluationInstallationTask(EvaluationTask):
         step_callback = None,
     ):
         super().__init__(llm, repo_path, gitignore_path, meta_data, step_callback)
+        self.evaluation_name = "Installation Evaluation"
     def _evaluate(self, files: list[str] | None = None):
         if files is None or len(files) == 0:
@@ -148,6 +147,7 @@ class EvaluationInstallationTask(EvaluationTask):
             schema=EvaluationInstallationResultSchema,
         )
         res = EvaluationInstallationResult(**res)
+        self.print_step(step_output=reasoning_process)
         evaluation = {
             "score": res.score,
             "ease_of_access": res.ease_of_access,

{bioguider-0.2.7 → bioguider-0.2.9}/bioguider/agents/evaluation_task.py RENAMED Viewed

@@ -18,7 +18,8 @@ from ..utils.gitignore_checker import GitignoreChecker
 logger = logging.getLogger(__name__)
 EVALUATION_README_SYSTEM_PROMPT = """
-You are an expert in evaluating the quality of README files in software repositories. Your task is to analyze the provided README file and generate a comprehensive quality report.
+You are an expert in evaluating the quality of README files in software repositories.
+Your task is to analyze the provided README file and generate a comprehensive quality report.
 ---
@@ -28,9 +29,10 @@ First, determine whether the provided README is a **project-level README** (typi
 ---
-### **Project-level README Evaluation**
+### **Evaluation Criteria**
+#### If the README is a **project-level** file, evaluate it using the following criteria.
-If the README is a **project-level** file, evaluate it using the following criteria.
 For each criterion below, provide a brief assessment followed by specific, actionable comments for improvement.
 **1. Project Clarity & Purpose**
@@ -38,30 +40,45 @@ For each criterion below, provide a brief assessment followed by specific, actio
  * **Improvement Suggestions**:
     * **Original text:** [Quote a specific line/section from the README.]
     * **Improving comments:** [Provide your suggestions to improve clarity.]
+    * **Original text:** [Quote a specific line/section from the README.]
+    * **Improving comments:** [Provide your suggestions to improve clarity.]
+    ...
 **2. Installation Instructions**
  * **Assessment**: [Your evaluation of the installation instructions.]
  * **Improvement Suggestions**:
     * **Original text:** [Quote text related to installation.]
     * **Improving comments:** [Provide your suggestions.]
+    * **Original text:** [Quote text related to installation.]
+    * **Improving comments:** [Provide your suggestions.]
+    ...
 **3. Usage Instructions**
  * **Assessment**: [Your evaluation of the usage instructions.]
  * **Improvement Suggestions**:
     * **Original text:** [Quote text related to usage.]
     * **Improving comments:** [Provide your suggestions.]
+    * **Original text:** [Quote text related to usage.]
+    * **Improving comments:** [Provide your suggestions.]
+    ...
 **4. Contributing Guidelines**
  * **Assessment**: [Your evaluation of the contributing guidelines.]
  * **Improvement Suggestions**:
     * **Original text:** [Quote text related to contributions.]
     * **Improving comments:** [Provide your suggestions.]
+    * **Original text:** [Quote text related to contributions.]
+    * **Improving comments:** [Provide your suggestions.]
+    ...
 **5. License Information**
  * **Assessment**: [Your evaluation of the license information.]
  * **Improvement Suggestions**:
     * **Original text:** [Quote text related to the license.]
     * **Improving comments:** [Provide your suggestions.]
+    * **Original text:** [Quote text related to the license.]
+    * **Improving comments:** [Provide your suggestions.]
+    ...
 **6. Readability Analysis**
  * **Flesch Reading Ease**: `{flesch_reading_ease}` (A higher score is better, with 60-70 being easily understood by most adults).
@@ -70,29 +87,9 @@ For each criterion below, provide a brief assessment followed by specific, actio
  * **SMOG Index**: `{smog_index}` (Estimates the years of education needed to understand the text).
  * **Assessment**: Based on these scores, evaluate the overall readability and technical complexity of the language used.
-**Final Answer**
- The final answer **must exactly match** the following format:
-```
-  * Project-Level README: Yes / No
-  * **Score:** <number from 0 to 100>
-  * **Key Strengths**: <brief summary of the README's strongest points in 2-3 sentences>
-  * **Overall Improvement Suggestions:**
-    - "Original text snippet 1" - Improving comment 1
-    - "Original text snippet 2" - Improving comment 2
-    - ...
-```
-  * **Project-Level README**: Indicate “Yes” if the README is project-level, otherwise “No.”
-  * **Score**: Provide an overall quality score (100 = perfect).
-  * **Key Strengths**: Provide the README's strongest points in 2-3 sentences
-  * **Overall Improvement Suggestions**:
-    * List each original text snippet that needs improvement, followed by your suggestion.
 ---
-### **Folder-Level README Evaluation**
-If the README is a **folder-level** file, use the following criteria instead.
+#### If if is a **folder-level** file, use the following criteria instead.
 For each criterion below, provide a brief assessment followed by specific, actionable comments for improvement.
@@ -121,15 +118,30 @@ For each criterion below, provide a brief assessment followed by specific, actio
  * **SMOG Index**: `{smog_index}` (Estimates the years of education needed to understand the text).
  * **Assessment**: Based on these scores, evaluate the overall readability and technical complexity of the language used.
-**Final Answer**
-  The final answer **must exactly match** the following format:
+---
+### Final Report Format
+#### Your output **must exactly match**  the following template:
+**FinalAnswer**
  * Project-Level README: Yes / No
- * **Score:** <number from 0 to 100>
+ * **Score:** [Poor / Fair / Good / Excellent]
   * **Key Strengths**: <brief summary of the README's strongest points in 2-3 sentences>
   * **Overall Improvement Suggestions:**
     - "Original text snippet 1" - Improving comment 1
     - "Original text snippet 2" - Improving comment 2
     - ...
+#### Notes
+* **Project-Level README**: "Yes" if root-level; "No" if folder-level.
+* **Score**: Overall quality rating, could be Poor / Fair / Good / Excellent.
+* **Key Strengths**: Briefly highlight the README's strongest aspects.
+* **Improvement Suggestions**: Provide concrete snippets and suggested improvements.
 ---
 ### **README path:**

{bioguider-0.2.7 → bioguider-0.2.9}/bioguider/agents/identification_execute_step.py RENAMED Viewed

@@ -60,11 +60,13 @@ Action Observation: {{Observation2}}
 ```
 Action: summarize_file_tool
 Action Input: README.md
+Action Input: "Please extract license information in summarized file content."
 Observation: # BioGuider\nBioGuider is a Python package for bioinformatics.\n...
 ...
 Final Answer:
 Action: summarize_file_tool
 Action Input: README.md
+Action Input: "N/A"
 Action Observation: # BioGuider\nBioGuider is a Python package for bioinformatics.\n...
 ---
 Action: check_file_related_tool

{bioguider-0.2.7 → bioguider-0.2.9}/bioguider/agents/identification_task.py RENAMED Viewed

@@ -6,7 +6,7 @@ from enum import Enum
 from typing import Callable
 from pydantic import BaseModel, Field
 from langchain_openai.chat_models.base import BaseChatOpenAI
-from langchain.tools import Tool
+from langchain.tools import Tool, StructuredTool
 from langgraph.graph import StateGraph, START, END
 from bioguider.utils.constants import PrimaryLanguageEnum, ProjectTypeEnum
@@ -72,6 +72,36 @@ class IdentificationTask(AgentTask):
         self.custom_tools = []
         self.steps: list[PEOCommonStep] = []
+    def _prepare_tools(self):
+        tool_rd = read_directory_tool(repo_path=self.repo_path)
+        tool_sum = summarize_file_tool(
+            llm=self.llm,
+            repo_path=self.repo_path,
+            output_callback=self.step_callback,
+            db=self.summary_file_db,
+        )
+        tool_rf = read_file_tool(repo_path=self.repo_path)
+        self.tools = [tool_rd, tool_sum, tool_rf,]
+        self.custom_tools = [
+            Tool(
+                name = tool_rd.__class__.__name__,
+                func = tool_rd.run,
+                description=tool_rd.__class__.__doc__,
+            ),
+            StructuredTool.from_function(
+                tool_sum.run,
+                description=tool_sum.__class__.__doc__,
+                name=tool_sum.__class__.__name__,
+            ),
+            Tool(
+                name = tool_rf.__class__.__name__,
+                func = tool_rf.run,
+                description=tool_rf.__class__.__doc__,
+            ),
+        ]
+        self.custom_tools.append(CustomPythonAstREPLTool())
     def _initialize(self):
         if not os.path.exists(self.repo_path):
             raise ValueError(f"Repository path {self.repo_path} does not exist.")
@@ -81,22 +111,7 @@ class IdentificationTask(AgentTask):
         for f, f_type in file_pairs:
             self.repo_structure += f"{f} - {f_type}\n"
-        self.tools = [
-            summarize_file_tool(
-                llm=self.llm,
-                repo_path=self.repo_path,
-                output_callback=self._print_step,
-                db=self.summary_file_db,
-            ),
-            read_directory_tool(repo_path=self.repo_path, gitignore_path=self.gitignore_path),
-            read_file_tool(repo_path=self.repo_path),
-        ]
-        self.custom_tools = [Tool(
-            name=tool.__class__.__name__,
-            func=tool.run,
-            description=tool.__class__.__doc__,
-        ) for tool in self.tools]
-        self.custom_tools.append(CustomPythonAstREPLTool())
+        self._prepare_tools()
         self.steps = [
             IdentificationPlanStep(
                 llm=self.llm,

{bioguider-0.2.7 → bioguider-0.2.9}/bioguider/database/summarized_file_db.py RENAMED Viewed

@@ -18,22 +18,23 @@ summarized_files_create_table_query = f"""
 CREATE TABLE IF NOT EXISTS {SUMMARIZED_FILES_TABLE_NAME} (
     file_path VARCHAR(512),
     instruction TEXT,
+    summarize_prompt TEXT,
     summarize_level INTEGER,
     summarized_text TEXT,
     token_usage  VARCHAR(512),
     datetime TEXT NOT NULL DEFAULT (strftime('%Y-%m-%d %H:%M:%f', 'now')),
-    UNIQUE (file_path, instruction, summarize_level)
+    UNIQUE (file_path, instruction, summarize_level, summarize_prompt)
 );
 """
 summarized_files_upsert_query = f"""
-INSERT INTO {SUMMARIZED_FILES_TABLE_NAME}(file_path, instruction, summarize_level, summarized_text, token_usage, datetime)
-VALUES (?, ?, ?, ?, ?, strftime('%Y-%m-%d %H:%M:%f', 'now'))
-ON CONFLICT(file_path, instruction, summarize_level) DO UPDATE SET summarized_text=excluded.summarized_text,
+INSERT INTO {SUMMARIZED_FILES_TABLE_NAME}(file_path, instruction, summarize_level, summarize_prompt, summarized_text, token_usage, datetime)
+VALUES (?, ?, ?, ?, ?, ?, strftime('%Y-%m-%d %H:%M:%f', 'now'))
+ON CONFLICT(file_path, instruction, summarize_level, summarize_prompt) DO UPDATE SET summarized_text=excluded.summarized_text,
 datetime=strftime('%Y-%m-%d %H:%M:%f', 'now');
 """
 summarized_files_select_query = f"""
 SELECT summarized_text, datetime FROM {SUMMARIZED_FILES_TABLE_NAME}
-where file_path = ? and instruction = ? and summarize_level = ?;
+where file_path = ? and instruction = ? and summarize_level = ? and summarize_prompt=?;
 """
 class SummarizedFilesDb:
@@ -83,6 +84,7 @@ class SummarizedFilesDb:
         file_path: str,
         instruction: str,
         summarize_level: int,
+        summarize_prompt: str,
         summarized_text: str,
         token_usage: dict | None = None
     ):
@@ -96,7 +98,7 @@ class SummarizedFilesDb:
             cursor = self.connection.cursor()
             cursor.execute(
                 summarized_files_upsert_query,
-                (file_path, instruction, summarize_level, summarized_text, token_usage, )
+                (file_path, instruction, summarize_level, summarize_prompt, summarized_text, token_usage, )
             )
             self.connection.commit()
             return True
@@ -112,6 +114,7 @@ class SummarizedFilesDb:
         file_path: str,
         instruction: str,
         summarize_level: int,
+        summarize_prompt: str,
     ) -> str | None:
         self._connect_to_db()
         self._ensure_tables()
@@ -119,7 +122,7 @@ class SummarizedFilesDb:
             cursor = self.connection.cursor()
             cursor.execute(
                 summarized_files_select_query,
-                (file_path, instruction, summarize_level,)
+                (file_path, instruction, summarize_level, summarize_prompt,)
             )
             row = cursor.fetchone()
             if row is None:

{bioguider-0.2.7 → bioguider-0.2.9}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "bioguider"
-version = "0.2.7"
+version = "0.2.9"
 description = "An AI-Powered package to help biomedical developers to generate clear documentation"
 authors = [
     "Cankun Wang <Cankun.Wang@osumc.edu>",