PyPI - bioguider - Versions diffs - 0.2.3__tar.gz → 0.2.4__tar.gz - Mend

bioguider 0.2.3tar.gz → 0.2.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of bioguider might be problematic. Click here for more details.

Files changed (47) hide show

{bioguider-0.2.3 → bioguider-0.2.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: bioguider
-Version: 0.2.3
+Version: 0.2.4
 Summary: An AI-Powered package to help biomedical developers to generate clear documentation
 License: MIT
 Author: Cankun Wang

{bioguider-0.2.3 → bioguider-0.2.4}/bioguider/agents/common_agent_2step.py RENAMED Viewed

@@ -113,6 +113,95 @@ class CommonAgentTwoSteps(CommonAgent):
             logger.error(str(e))
             raise e
         processed_res = None
+        if post_process is not None:
+            try:
+                processed_res = post_process(res, **kwargs)
+            except RetryException as e:
+                logger.error(str(e))
+                self.exceptions = [e] if self.exceptions is None else self.exceptions + [e]
+                raise e
+            except Exception as e:
+                logger.error(str(e))
+                raise e
+        return res, processed_res, self.token_usage, reasoning_process
+FINAL_STEP_SYSTEM_PROMPTS = ChatPromptTemplate.from_template("""
+---
+You will be given a response generated by a LLM, which includes a **step-by-step reasoning process** followed by a clearly marked **final answer**.
+### **Your Task:**
+Extract and return only the content of the **final answer**.
+---
+### **Important Instructions:**
+1. Your task is to **extract only the final answer** from the provided reasoning process.
+   **Do not** make any judgments, interpretations, or modifications to the content.
+### **Input:**
+{llm_response}
+---
+""")
+class CommonAgentTwoChainSteps(CommonAgentTwoSteps):
+    def __init__(self, llm):
+        super().__init__(llm)
+    def _invoke_agent(self, system_prompt, instruction_prompt, schema, post_process = None, **kwargs):
+        # Initialize the callback handler
+        callback_handler = OpenAICallbackHandler()
+        processed_system_prompt = system_prompt.replace("{", "(").replace("}", ")")
+        cot_prompt = self._build_prompt_for_cot_step(
+            system_prompt=processed_system_prompt,
+            instruction_prompt=instruction_prompt
+        )
+        try:
+            # First, use llm to do CoT
+            msgs = cot_prompt.invoke(input={}).to_messages()
+            cot_res = self.llm.generate(messages=[msgs])
+            if cot_res is None or cot_res.llm_output is None:
+                raise Exception("llm generate invalid output")
+            reasoning_process = cot_res.generations[0][0].text
+            token_usage: Any = cot_res.llm_output.get("token_usage")
+            cot_tokens = {
+                "total_tokens": token_usage.get("total_tokens", 0),
+                "prompt_tokens": token_usage.get("prompt_tokens", 0),
+                "completion_tokens": token_usage.get("completion_tokens", 0),
+            }
+            self._incre_token_usage(cot_tokens)
+        except Exception as e:
+            logger.error(str(e))
+            raise e
+        try:
+            # Then use the reasoning process to do the structured output
+            processed_reasoning_process = reasoning_process.replace("{", "{{").replace("}", "}}")
+            final_msg = FINAL_STEP_SYSTEM_PROMPTS.format(
+                llm_response=processed_reasoning_process,
+            )
+            msgs = [(
+                "human",
+                final_msg,
+            )]
+            final_prompt = ChatPromptTemplate.from_messages(msgs)
+            agent = final_prompt | self.llm.with_structured_output(schema)
+            res = agent.invoke(
+                input={},
+                config={
+                    "callbacks": [callback_handler],
+                },
+            )
+            self._incre_token_usage(callback_handler)
+        except Exception as e:
+            logger.error(str(e))
+            raise e
+        processed_res = None
         if post_process is not None:
             try:
                 processed_res = post_process(res, **kwargs)

{bioguider-0.2.3 → bioguider-0.2.4}/bioguider/agents/dockergeneration_observe_step.py RENAMED Viewed

@@ -6,7 +6,7 @@ from pydantic import BaseModel, Field
 from bioguider.utils.constants import DEFAULT_TOKEN_USAGE
 from bioguider.agents.agent_utils import run_command, read_file
 from bioguider.agents.dockergeneration_task_utils import DockerGenerationWorkflowState
-from bioguider.agents.common_agent_2step import CommonAgentTwoSteps
+from bioguider.agents.common_agent_2step import CommonAgentTwoChainSteps, CommonAgentTwoSteps
 from bioguider.agents.peo_common_step import PEOCommonStep
 DOCKERGENERATION_OBSERVE_SYSTEM_PROMPT = """You are an expert in software containerization and reproducibility engineering.
@@ -93,7 +93,7 @@ class DockerGenerationObserveStep(PEOCommonStep):
             if code != 0:
                 error_msg = DockerGenerationObserveStep._extract_error_message(error)
                 system_prompt = self._build_system_prompt(state, error_msg, "N/A")
-                agent = CommonAgentTwoSteps(llm=self.llm)
+                agent = CommonAgentTwoChainSteps(llm=self.llm)
                 res, _, token_usage, reasoning = agent.go(
                     system_prompt=system_prompt,
                     instruction_prompt="Now, let's begin observing.",
@@ -125,7 +125,7 @@ class DockerGenerationObserveStep(PEOCommonStep):
                     "docker build successfully.",
                     error,
                 )
-                agent = CommonAgentTwoSteps(llm=self.llm)
+                agent = CommonAgentTwoChainSteps(llm=self.llm)
                 res, _, token_usage, reasoning = agent.go(
                     system_prompt=system_prompt,
                     instruction_prompt="Now, let's begin observing.",

{bioguider-0.2.3 → bioguider-0.2.4}/bioguider/agents/dockergeneration_plan_step.py RENAMED Viewed

@@ -12,7 +12,7 @@ from bioguider.agents.agent_utils import (
     PlanAgentResultJsonSchema,
 )
 from bioguider.agents.peo_common_step import PEOCommonStep
-from bioguider.agents.common_agent_2step import CommonAgentTwoSteps
+from bioguider.agents.common_agent_2step import CommonAgentTwoChainSteps, CommonAgentTwoSteps
 from bioguider.agents.dockergeneration_task_utils import (
     DockerGenerationWorkflowState,
     prepare_provided_files_string,
@@ -140,7 +140,7 @@ class DockerGenerationPlanStep(PEOCommonStep):
     def _execute_directly(self, state: DockerGenerationWorkflowState):
         system_prompt = self._prepare_system_prompt(state)
-        agent = CommonAgentTwoSteps(llm=self.llm)
+        agent = CommonAgentTwoChainSteps(llm=self.llm)
         res, _, token_usage, reasoning = agent.go(
             system_prompt=system_prompt,
             instruction_prompt="Now, let's begin to make a plan",

{bioguider-0.2.3 → bioguider-0.2.4}/bioguider/agents/evaluation_task.py RENAMED Viewed

@@ -2,14 +2,15 @@
 import os
 from pathlib import Path
 import logging
-from typing import Callable
+from typing import Callable, Optional
 from abc import ABC, abstractmethod
 from langchain.prompts import ChatPromptTemplate
 from langchain_openai.chat_models.base import BaseChatOpenAI
+from pydantic import BaseModel, Field
 from bioguider.agents.agent_utils import read_file
-from bioguider.utils.constants import ProjectMetadata
-from .common_agent_2step import CommonAgentTwoSteps
+from bioguider.utils.constants import DEFAULT_TOKEN_USAGE, ProjectMetadata
+from .common_agent_2step import CommonAgentTwoSteps, CommonAgentTwoChainSteps
 from .common_agent import CommonConversation
 from ..utils.pyphen_utils import PyphenReadability
 from ..utils.gitignore_checker import GitignoreChecker
@@ -19,12 +20,19 @@ logger = logging.getLogger(__name__)
 EVALUATION_README_SYSTEM_PROMPT = """
 You are an expert in evaluating the quality of README files in software repositories. Your task is to analyze the provided README file and generate a comprehensive quality report.
-### **README Quality Report**
+---
-For each criterion below, provide a brief assessment followed by specific, actionable comments for improvement.
+### **Step 1:  Identify README type
+First, determine whether the provided README is a **project-level README** (typically at the root of a repository) or a **folder-level README** (typically inside subdirectories).
 ---
+### **Project-level README Evaluation**
+If the README is a **project-level** file, evaluate it using the following criteria.
+For each criterion below, provide a brief assessment followed by specific, actionable comments for improvement.
 **1. Project Clarity & Purpose**
  * **Assessment**: [Your evaluation of whether the project's purpose is clear.]
  * **Improvement Suggestions**:
@@ -62,11 +70,53 @@ For each criterion below, provide a brief assessment followed by specific, actio
  * **SMOG Index**: `{smog_index}` (Estimates the years of education needed to understand the text).
  * **Assessment**: Based on these scores, evaluate the overall readability and technical complexity of the language used.
-**7. Overall Quality Summary**
+**Final Answer**
+ * Project-Level README: Yes / No
  * Provide a final, overall assessment of the README file's quality, summarizing the key strengths and areas for improvement.
 ---
+### **Folder-Level README Evaluation**
+If the README is a **folder-level** file, use the following criteria instead.
+For each criterion below, provide a brief assessment followed by specific, actionable comments for improvement.
+**1. Folder Description**
+ * **Assessment**: [Your evaluation of whether it Provides a clear **description** of what the folder contains (e.g., modules, scripts, data).]
+ * **Improvement Suggestions**:
+    * **Original text:** [Quote a specific line/section from the README.]
+    * **Improving comments:** [Provide your suggestions to improve clarity.]
+**2. Folder Purpose**
+ * **Assessment**: [Your evaluation of whether it explains the **purpose** or **role** of the components inside this subfolder.]
+ * **Improvement Suggestions**:
+    * **Original text:** [Quote text related to purpose.]
+    * **Improving comments:** [Provide your suggestions.]
+**3. Usage**
+ * **Assessment**: [Your evaluation of whether it includes **usage instructions** specific to this folder (e.g., commands, import paths, input/output files).]
+ * **Improvement Suggestions**:
+    * **Original text:** [Quote text related to usage.]
+    * **Improving comments:** [Provide your suggestions.]
+**4. Readability Analysis**
+ * **Flesch Reading Ease**: `{flesch_reading_ease}` (A higher score is better, with 60-70 being easily understood by most adults).
+ * **Flesch-Kincaid Grade Level**: `{flesch_kincaid_grade}` (Represents the US school-grade level needed to understand the text).
+ * **Gunning Fog Index**: `{gunning_fog_index}` (A score above 12 is generally considered too hard for most people).
+ * **SMOG Index**: `{smog_index}` (Estimates the years of education needed to understand the text).
+ * **Assessment**: Based on these scores, evaluate the overall readability and technical complexity of the language used.
+**Final Answer**
+ * Project-Level README: Yes / No
+ * Provide a final, overall assessment of the README file's quality, summarizing the key strengths and areas for improvement.
+---
+### **README path:**
+{readme_path}
+---
 ### **README Content:**
 {readme_content}
 """
@@ -100,11 +150,11 @@ class EvaluationTask(ABC):
             token_usage=token_usage,
         )
-    def evaluate(self, files: list[str] | None = None):
+    def evaluate(self, files: list[str] | None = None) -> dict:
         self._enter_evaluation()
-        evaluation, token_usage = self._evaluate(files)
+        evaluations, token_usage = self._evaluate(files)
         self._leave_evaluation(token_usage)
-        return evaluation
+        return evaluations
     def _enter_evaluation(self):
         self.print_step(step_name=self.evaluation_name)
@@ -113,9 +163,13 @@ class EvaluationTask(ABC):
         self.print_step(token_usage=token_usage)
     @abstractmethod
-    def _evaluate(self, files: list[str]):
+    def _evaluate(self, files: list[str]) -> tuple[dict, dict]:
         pass
+class EvaluationREADMEResult(BaseModel):
+    project_level: Optional[bool]=Field(description="a boolean value specifying if the README file is **project-level** README. TRUE: project-level, FALSE, folder-level")
+    overall_assessment: Optional[str]=Field(description="an overall assessment")
 class EvaluationREADMETask(EvaluationTask):
     def __init__(
         self,
@@ -128,7 +182,7 @@ class EvaluationREADMETask(EvaluationTask):
         super().__init__(llm, repo_path, gitignore_path, meta_data, step_callback)
         self.evaluation_name = "README Evaluation"
-    def _evaluate(self, files: list[str]):
+    def _evaluate(self, files: list[str]) -> tuple[dict, dict]:
         readme_files = files
         if readme_files is None or len(readme_files) == 0:
             return None
@@ -140,6 +194,8 @@ class EvaluationREADMETask(EvaluationTask):
             if readme_content is None:
                 logger.error(f"Error in reading file {readme_file}")
                 continue
+            if len(readme_content.strip()) == 0:
+                readme_content = "empty file"
             readability = PyphenReadability()
             flesch_reading_ease, flesch_kincaid_grade, gunning_fog_index, smog_index, \
@@ -148,19 +204,28 @@ class EvaluationREADMETask(EvaluationTask):
                 EVALUATION_README_SYSTEM_PROMPT
             ).format(
                 readme_content=readme_content,
+                readme_path=readme_file,
                 flesch_reading_ease=flesch_reading_ease,
                 flesch_kincaid_grade=flesch_kincaid_grade,
                 gunning_fog_index=gunning_fog_index,
                 smog_index=smog_index,
             )
-            conversation = CommonConversation(llm=self.llm)
-            response, token_usage = conversation.generate(
+            # conversation = CommonConversation(llm=self.llm)
+            agent = CommonAgentTwoChainSteps(llm=self.llm)
+            response, _, token_usage, reasoning_process = agent.go(
                 system_prompt=system_prompt,
-                instruction_prompt="Before arriving at the conclusion, clearly explain your reasoning step by step. Now, let's begin the evaluation."
+                instruction_prompt="Before arriving at the conclusion, clearly explain your reasoning step by step. Now, let's begin the evaluation.",
+                schema=EvaluationREADMEResult,
             )
             self.print_step(step_output=f"README: {readme_file}")
-            self.print_step(step_output=response)
-            readme_evaluations[readme_file] = response
+            self.print_step(step_output=reasoning_process)
+            readme_evaluations[readme_file] = {
+                "evaluation": {
+                    "project_level": response.project_level,
+                    "overall_assessment": response.overall_assessment,
+                },
+                "reasoning_process": reasoning_process
+            }
         return readme_evaluations, token_usage
 EVALUATION_TUTORIAL_SYSTEM_PROMPT="""
@@ -233,9 +298,9 @@ class EvaluationTutorialTask(EvaluationTask):
         super().__init__(llm, repo_path, gitignore_path, meta_data, step_callback)
         self.evaluation_name = "Tutorial Evaluation"
-    def _evaluate(self, files: list[str]):
+    def _evaluate(self, files: list[str]) -> tuple[dict, dict]:
         if len(files) == 0:
-            return None
+            return {}, {**DEFAULT_TOKEN_USAGE}
         evaluations = {}
         for file in files:

{bioguider-0.2.3 → bioguider-0.2.4}/bioguider/agents/identification_observe_step.py RENAMED Viewed

@@ -2,7 +2,7 @@
 from langchain.prompts import ChatPromptTemplate
 from bioguider.agents.agent_utils import ObservationResult
-from bioguider.agents.common_agent_2step import CommonAgentTwoSteps
+from bioguider.agents.common_agent_2step import CommonAgentTwoSteps, CommonAgentTwoChainSteps
 from bioguider.agents.identification_task_utils import IdentificationWorkflowState
 from bioguider.agents.peo_common_step import PEOWorkflowState, PEOCommonStep
@@ -70,7 +70,7 @@ class IdentificationObserveStep(PEOCommonStep):
     def _execute_directly(self, state: IdentificationWorkflowState):
         system_prompt = self._prepare_system_prompt(state)
-        agent = CommonAgentTwoSteps(llm=self.llm)
+        agent = CommonAgentTwoChainSteps(llm=self.llm)
         res, _, token_usage, reasoning_process = agent.go(
             system_prompt=system_prompt,
             instruction_prompt="Now, let's begin.",

{bioguider-0.2.3 → bioguider-0.2.4}/bioguider/agents/identification_plan_step.py RENAMED Viewed

@@ -5,7 +5,7 @@ from langchain.tools import BaseTool
 from pydantic import BaseModel, Field
 from bioguider.agents.agent_utils import get_tool_names_and_descriptions
-from bioguider.agents.common_agent_2step import CommonAgentTwoSteps
+from bioguider.agents.common_agent_2step import CommonAgentTwoChainSteps, CommonAgentTwoSteps
 from bioguider.agents.identification_task_utils import IdentificationWorkflowState
 from bioguider.agents.peo_common_step import PEOCommonStep
@@ -113,7 +113,7 @@ class IdentificationPlanStep(PEOCommonStep):
     def _execute_directly(self, state: IdentificationWorkflowState):
         system_prompt = self._prepare_system_prompt(state)
-        agent = CommonAgentTwoSteps(llm=self.llm)
+        agent = CommonAgentTwoChainSteps(llm=self.llm)
         res, _, token_usage, reasoning_process = agent.go(
             system_prompt=system_prompt,
             instruction_prompt="Now, let's begin.",

{bioguider-0.2.3 → bioguider-0.2.4}/bioguider/managers/evaluation_manager.py RENAMED Viewed

@@ -57,7 +57,7 @@ class EvaluationManager:
         )
         return self.project_metadata
-    def evaluate_readme(self):
+    def evaluate_readme(self) -> tuple[any, list[str]]:
         task = EvaluationREADMETask(
             llm=self.llm,
             repo_path=self.rag.repo_dir,
@@ -67,7 +67,7 @@ class EvaluationManager:
         )
         readme_files = self._find_readme_files()
         results = task.evaluate(readme_files)
-        return results
+        return results, readme_files
     def evaluate_tutorial(self):
         task = CollectionTask(
@@ -97,7 +97,7 @@ class EvaluationManager:
         repo_path = self.rag.repo_dir
         gitignore_path = Path(repo_path, ".gitignore")
         gitignore_checker = GitignoreChecker(
-            directory=self.repo_path, gitignore_path=gitignore_path
+            directory=repo_path, gitignore_path=gitignore_path
         )
         found_readme_files = gitignore_checker.check_files_and_folders(
             check_file_cb=lambda root_dir, relative_path: Path(relative_path).name.lower() in possible_readme_files,

{bioguider-0.2.3 → bioguider-0.2.4}/bioguider/rag/data_pipeline.py RENAMED Viewed

@@ -518,6 +518,21 @@ class DatabaseManager:
         self.reset_database()
         self._create_repo(repo_url_or_path, access_token)
         return self.prepare_db_index()
+    def _extract_repo_name_from_url(self, repo_url_or_path: str, repo_type: str) -> str:
+        # Extract owner and repo name to create unique identifier
+        url_parts = repo_url_or_path.rstrip('/').split('/')
+        if repo_type in ["github", "gitlab", "bitbucket"] and len(url_parts) >= 5:
+            # GitHub URL format: https://github.com/owner/repo
+            # GitLab URL format: https://gitlab.com/owner/repo or https://gitlab.com/group/subgroup/repo
+            # Bitbucket URL format: https://bitbucket.org/owner/repo
+            owner = url_parts[-2]
+            repo = url_parts[-1].replace(".git", "")
+            repo_name = f"{owner}_{repo}"
+        else:
+            repo_name = url_parts[-1].replace(".git", "")
+        return repo_name
     def reset_database(self):
         """
@@ -545,19 +560,18 @@ class DatabaseManager:
             root_path = retrieve_data_root_path()
             os.makedirs(root_path, exist_ok=True)
+            repo_type = "unknown"
             # url
             if repo_url_or_path.startswith("https://") or repo_url_or_path.startswith("http://"):
                 # Extract repo name based on the URL format
                 if "github.com" in repo_url_or_path:
                     # GitHub URL format: https://github.com/owner/repo
-                    repo_name = repo_url_or_path.split("/")[-1].replace(".git", "")
+                    repo_type = "github"
                 elif "gitlab.com" in repo_url_or_path:
                     # GitLab URL format: https://gitlab.com/owner/repo or https://gitlab.com/group/subgroup/repo
                     # Use the last part of the URL as the repo name
-                    repo_name = repo_url_or_path.split("/")[-1].replace(".git", "")
-                else:
-                    # Generic handling for other Git URLs
-                    repo_name = repo_url_or_path.split("/")[-1].replace(".git", "")
+                    repo_type = "gitlab"
+                repo_name = self._extract_repo_name_from_url(repo_url_or_path, repo_type)
                 save_repo_dir = os.path.join(root_path, "repos", repo_name)

{bioguider-0.2.3 → bioguider-0.2.4}/bioguider/utils/file_utils.py RENAMED Viewed

@@ -117,9 +117,11 @@ def parse_repo_url(url: str) -> tuple[str | None, str | None]:
     except Exception:
         return None, None
-def retrieve_data_root_path():
+def retrieve_data_root_path() -> Path:
     data_folder = os.environ.get("DATA_FOLDER", "./data")
-    return data_folder
+    root_folder = Path(data_folder, ".adalflow")
+    return root_folder.absolute()

{bioguider-0.2.3 → bioguider-0.2.4}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "bioguider"
-version = "0.2.3"
+version = "0.2.4"
 description = "An AI-Powered package to help biomedical developers to generate clear documentation"
 authors = [
     "Cankun Wang <Cankun.Wang@osumc.edu>",