PyPI - bioguider - Versions diffs - 0.2.10__tar.gz → 0.2.11__tar.gz - Mend

bioguider 0.2.10tar.gz → 0.2.11tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of bioguider might be problematic. Click here for more details.

Files changed (49) hide show

{bioguider-0.2.10 → bioguider-0.2.11}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: bioguider
-Version: 0.2.10
+Version: 0.2.11
 Summary: An AI-Powered package to help biomedical developers to generate clear documentation
 License: MIT
 Author: Cankun Wang

{bioguider-0.2.10 → bioguider-0.2.11}/bioguider/agents/agent_task.py RENAMED Viewed

@@ -83,6 +83,6 @@ class AgentTask(ABC):
             print(s)
         return s

{bioguider-0.2.10 → bioguider-0.2.11}/bioguider/agents/agent_utils.py RENAMED Viewed

@@ -1,5 +1,6 @@
 import json
+from json import JSONDecodeError
 import os
 import re
 import subprocess
@@ -376,4 +377,33 @@ def escape_braces(text: str) -> str:
     text = re.sub(r'(?<!})}(?!})', '}}', text)
     # Then replace single { not part of {{
     text = re.sub(r'(?<!{){(?!{)', '{{', text)
-    return text
+    return text
+def try_parse_json_object(json_obj: str) -> dict | None:
+    json_obj = json_obj.strip()
+    # First, try to parse
+    try:
+        obj = json.loads(json_obj)
+        return obj
+    except JSONDecodeError as e:
+        logger.error(e)
+    # Second, let's handle some common errors
+    if not json_obj.startswith("{") and not json_obj.endswith("}") and ":" in json_obj:
+        json_obj = "{" + json_obj + "}"
+    if json_obj.startswith("{{"):
+        json_obj = json_obj[1:]
+    if json_obj.endswith("}}"):
+        json_obj = json_obj[:-1]
+    # Finally, let's try to parse again
+    try:
+        obj = json.loads(json_obj)
+        return obj
+    except JSONDecodeError as e:
+        logger.error(e)
+        return None
+    except Exception as e:
+        logger.error(e)
+        return None

{bioguider-0.2.10 → bioguider-0.2.11}/bioguider/agents/collection_task.py RENAMED Viewed

@@ -24,7 +24,7 @@ from langgraph.graph import StateGraph, START, END
 from bioguider.database.summarized_file_db import SummarizedFilesDb
 from bioguider.utils.file_utils import get_file_type
-from bioguider.agents.agent_utils import read_directory
+from bioguider.agents.agent_utils import read_directory, try_parse_json_object
 from bioguider.agents.collection_task_utils import (
     RELATED_FILE_GOAL_ITEM,
     CollectionWorkflowState,
@@ -172,28 +172,16 @@ class CollectionTask(AgentTask):
         if s["final_answer"] is None:
             return None
         result = s["final_answer"].strip()
-        try:
-            json_obj = json.loads(result)
-            result = json_obj["final_answer"]
-            if isinstance(result, str):
-                result = result.strip()
-                return [result]
-            elif isinstance(result, list):
-                return result
-            else:
-                logger.error(f"Final answer is not a valid JSON list or string: {result}")
-                return None
-        except json.JSONDecodeError:
+        the_obj = try_parse_json_object(result)
+        if the_obj is None or "final_answer" not in the_obj:
             logger.error(f"Final answer is not a valid JSON: {result}")
             return None
-        except Exception as e:
-            logger.error(str(e))
-        return s
+        final_result = the_obj["final_answer"]
+        if isinstance(final_result, str):
+            final_result = final_result.strip()
+            return [final_result]
+        elif isinstance(final_result, list):
+            return final_result
+        else:
+            logger.error(f"Final answer is not a valid JSON list or string: {result}")
+            return None

{bioguider-0.2.10 → bioguider-0.2.11}/bioguider/agents/evaluation_installation_task.py RENAMED Viewed

@@ -9,6 +9,7 @@ from pydantic import BaseModel, Field
 from markdownify import markdownify as md
 from bioguider.agents.agent_utils import read_file
+from bioguider.agents.prompt_utils import EVALUATION_INSTRUCTION
 from bioguider.utils.constants import DEFAULT_TOKEN_USAGE, ProjectMetadata
 from bioguider.rag.data_pipeline import count_tokens
 from .common_agent_2step import CommonAgentTwoSteps, CommonAgentTwoChainSteps
@@ -16,11 +17,52 @@ from .common_agent import CommonConversation
 from ..utils.pyphen_utils import PyphenReadability
 from ..utils.gitignore_checker import GitignoreChecker
 from .evaluation_task import EvaluationTask
-from .agent_utils import read_file
+from .agent_utils import increase_token_usage, read_file
 logger = logging.getLogger(__name__)
+STRUCTURED_EVALUATION_INSTALLATION_SYSTEM_PROMPT = """
+You are an expert in evaluating the quality of installation information in software repositories.
+Your task is to analyze the provided files related to installation and generate a structured quality assessment based on the following criteria.
+---
+### **Evaluation Criteria**
+1. **Installation Available**: Is the installation documents accessible and present?
+   * Output: `Yes` or `No`
+2. **Installation Tutorial**: Is the installation tutorial provided?
+   * Ouput: `Yes` or `No`
+3. **Number of required Dependencies Installation**: The number of dependencies that are required to install
+   * Output: Number
+   * Suggest specific improvements if necessary, such as missing dependencies
+4. **Overall Score**: Give an overall quality rating of the Installation information.
+   * Output: `Poor`, `Fair`, `Good`, or `Excellent`
+---
+### **Final Report Ouput**
+Your final report must **exactly match** the following format. Do not add or omit any sections.
+**FinalAnswer**
+**Install Available:** [Yes / No]
+**Install Tutorial:** [Yes / No]
+**Dependency:**
+  * number: [Number]
+  * suggestions: <suggestion to improve **dependency information** like missing dependencies
+**Overall Score:** [Poor / Fair / Good / Excellent]
+---
+### Installation Files Provided:
+{installation_files_content}
+"""
 EVALUATION_INSTALLATION_SYSTEM_PROMPT = """
 You are an expert in evaluating the quality of **installation instructions** in software repositories.
 Your task is to analyze the provided content of installation-related files and generate a **comprehensive, structured quality report**.
@@ -62,10 +104,17 @@ Your response **must exactly follow** the structure below:
 ---
 ### Installation Files Provided:
-{installation_file_contents}
+{installation_files_content}
 """
+class StructuredEvaluationInstallationResult(BaseModel):
+    install_available: Optional[bool]=Field(description="A boolean value. Is the installation documents accessible and present?")
+    install_tutorial: Optional[bool]=Field(description="A boolean value. Is the installation tutorial provided?")
+    dependency_number: Optional[int]=Field(description="A number. It is the number of dependencies that are required to install.")
+    dependency_suggestions: Optional[str]=Field(description="A string value. It is the specific improvements if necessary, such as missing dependencies")
+    overall_score: Optional[str]=Field(description="A overall scroll for the installation quality, could be `Poor`, `Fair`, `Good`, or `Excellent`")
 class EvaluationInstallationResult(BaseModel):
     ease_of_access: Optional[str]=Field(description="Is the installation information easy to access")
     score: Optional[str]=Field(description="An overall score, could be Poor, Fair, Good or Excellent")
@@ -118,10 +167,10 @@ class EvaluationInstallationTask(EvaluationTask):
         super().__init__(llm, repo_path, gitignore_path, meta_data, step_callback)
         self.evaluation_name = "Installation Evaluation"
-    def _evaluate(self, files: list[str] | None = None):
+    def _collect_install_files_content(self, files: list[str] | None=None) -> str:
         if files is None or len(files) == 0:
-            return None
+            return "N/A"
         files_content = ""
         MAX_TOKENS = os.environ.get("OPENAI_MAX_INPUT_TOKENS", 102400)
         for f in files:
@@ -137,24 +186,64 @@ class EvaluationInstallationTask(EvaluationTask):
 {content}
 """
+        return files_content
+    def _structured_evaluate(self, files: list[str] | None = None) -> tuple[dict|None, dict]:
+        if files is None or len(files) == 0:
+            return None, {**DEFAULT_TOKEN_USAGE}
+        files_content = self._collect_install_files_content(files)
+        system_prompt = ChatPromptTemplate.from_template(
+            STRUCTURED_EVALUATION_INSTALLATION_SYSTEM_PROMPT,
+        ).format(
+            installation_files_content=files_content,
+        )
+        agent = CommonAgentTwoChainSteps(llm=self.llm)
+        res, _, token_usage, reasoning_process = agent.go(
+            system_prompt=system_prompt,
+            instruction_prompt=EVALUATION_INSTRUCTION,
+            schema=StructuredEvaluationInstallationResult,
+        )
+        self.print_step(step_output=reasoning_process)
+        self.print_step(token_usage=token_usage)
+        return {
+            "structured_evaluation": res,
+            "structured_reasoning_process": reasoning_process,
+        }, token_usage
+    def _free_evaluate(self, files: list[str] | None=None) -> tuple[dict|None, dict]:
+        if files is None or len(files) == 0:
+            return None, {**DEFAULT_TOKEN_USAGE}
+        files_content = self._collect_install_files_content(files)
         system_prompt = ChatPromptTemplate.from_template(EVALUATION_INSTALLATION_SYSTEM_PROMPT).format(
-            installation_file_contents=files_content
+            installation_files_content=files_content
         )
         agent = CommonAgentTwoChainSteps(llm=self.llm)
         res, _, token_usage, reasoning_process = agent.go(
             system_prompt=system_prompt,
-            instruction_prompt="Before arriving at the conclusion, clearly explain your reasoning step by step. Now, let's begin the evaluation.",
+            instruction_prompt=EVALUATION_INSTRUCTION,
             schema=EvaluationInstallationResultSchema,
         )
         res = EvaluationInstallationResult(**res)
         self.print_step(step_output=reasoning_process)
+        self.print_step(token_usage=token_usage)
         evaluation = {
-            "score": res.score,
-            "ease_of_access": res.ease_of_access,
-            "hardware_requirements": res.hardware_requirements,
-            "clarity_of_dependency": res.clarity_of_dependency,
-            "installation_guide": res.installation_guide,
+            "evaluation": res,
             "reasoning_process": reasoning_process,
         }
         return evaluation, token_usage
+    def _evaluate(self, files: list[str] | None = None) -> tuple[dict | None, dict]:
+        evaluation, token_usage = self._free_evaluate(files)
+        structured_evaluation, structured_token_usage = self._structured_evaluate(files)
+        combined_evaluation = {
+            **evaluation,
+            **structured_evaluation,
+        }
+        total_token_usage = increase_token_usage(token_usage, structured_token_usage)
+        return combined_evaluation, total_token_usage

bioguider-0.2.11/bioguider/agents/evaluation_readme_task.py ADDED Viewed

@@ -0,0 +1,473 @@
+import logging
+from pathlib import Path
+from typing import Callable, Optional
+from langchain.prompts import ChatPromptTemplate
+from langchain_openai.chat_models.base import BaseChatOpenAI
+from pydantic import BaseModel, Field
+from bioguider.agents.prompt_utils import EVALUATION_INSTRUCTION
+from ..utils.pyphen_utils import PyphenReadability
+from bioguider.agents.agent_utils import increase_token_usage, read_file, summarize_file
+from bioguider.agents.common_agent_2step import CommonAgentTwoChainSteps
+from bioguider.agents.evaluation_task import EvaluationTask
+from bioguider.utils.constants import DEFAULT_TOKEN_USAGE, ProjectMetadata
+logger = logging.getLogger(__name__)
+STRUCTURED_EVALUATION_README_SYSTEM_PROMPT = """
+You are an expert in evaluating the quality of README files in software repositories.
+Your task is to analyze the provided README file and generate a structured quality assessment based on the following criteria.
+If a LICENSE file is present in the repository, its content will also be provided to support your evaluation of license-related criteria.
+---
+### **Evaluation Criteria**
+1. **Available**: Is the README accessible and present?
+   * Output: `Yes` or `No`
+2. **Readability**: Evaluate based on readability metrics such as Flesch-Kincaid Grade Level, SMOG Index, etc.
+   * Output: `Poor`, `Fair`, `Good`, or `Excellent`
+   * Suggest specific improvements if necessary
+3. **Project Purpose**: Is the project's goal or function clearly stated?
+   * Output: `Yes` or `No`
+   * Provide suggestions if unclear
+4. **Hardware and Software Requirements**: Are hardware/software specs and compatibility details included?
+   * Output: `Poor`, `Fair`, `Good`, or `Excellent`
+   * Suggest how to improve the section if needed
+5. **Dependencies**: Are all necessary software libraries and dependencies clearly listed?
+   * Output: `Poor`, `Fair`, `Good`, or `Excellent`
+   * Suggest improvements if applicable
+6. **License Information**: Is license type clearly indicated?
+   * Output: `Yes` or `No`
+   * Suggest improvement if missing or unclear
+7. **Author / Contributor Info**: Are contributor or maintainer details provided?
+   * Output: `Yes` or `No`
+   * Suggest improvement if missing
+8. **Overall Score**: Give an overall quality rating of the README.
+   * Output: `Poor`, `Fair`, `Good`, or `Excellent`
+---
+### **Readability Metrics**
+ * **Flesch Reading Ease**: `{flesch_reading_ease}` (A higher score is better, with 60-70 being easily understood by most adults).
+ * **Flesch-Kincaid Grade Level**: `{flesch_kincaid_grade}` (Represents the US school-grade level needed to understand the text).
+ * **Gunning Fog Index**: `{gunning_fog_index}` (A score above 12 is generally considered too hard for most people).
+ * **SMOG Index**: `{smog_index}` (Estimates the years of education needed to understand the text).
+---
+### **Final Report Ouput**
+Your final report must **exactly match** the following format. Do not add or omit any sections.
+**FinalAnswer**
+**Available:** [Yes / No]
+**Readability:**
+  * score: [Poor / Fair / Good / Excellent]
+  * suggestions: <suggestions to improve README readability>
+**Project Purpose:**
+  * score: [Yes / No]
+  * suggestions: <suggestions to improve project purpose.>
+**Hardware and software spec and compatibility description:**
+  * score: [Poor / Fair / Good / Excellent]
+  * suggestions: <suggestions to improve **hardware and software** description>
+**Dependencies clearly stated:**
+  * score: [Poor / Fair / Good / Excellent]
+  * suggestions: <suggestions to improve **Dependencies** description>
+**License Information Included:**
+  * score: [Yes / No]
+  * suggestions: <suggestions to improve **License Information**>
+**Overall Score:** [Poor / Fair / Good / Excellent]
+---
+### **README Path**
+{readme_path}
+---
+### **README content**
+{readme_content}
+---
+### **LICENSE Path**
+{license_path}
+---
+### **LICENSE Summarized Content**
+{license_summarized_content}
+"""
+EVALUATION_README_SYSTEM_PROMPT = """
+You are an expert in evaluating the quality of README files in software repositories.
+Your task is to analyze the provided README file and generate a comprehensive quality report.
+---
+### **Step 1:  Identify README type
+First, determine whether the provided README is a **project-level README** (typically at the root of a repository) or a **folder-level README** (typically inside subdirectories).
+---
+### **Evaluation Criteria**
+#### If the README is a **project-level** file, evaluate it using the following criteria.
+For each criterion below, provide a brief assessment followed by specific, actionable comments for improvement.
+**1. Project Clarity & Purpose**
+ * **Assessment**: [Your evaluation of whether the project's purpose is clear.]
+ * **Improvement Suggestions**:
+    * **Original text:** [Quote a specific line/section from the README.]
+    * **Improving comments:** [Provide your suggestions to improve clarity.]
+    * **Original text:** [Quote a specific line/section from the README.]
+    * **Improving comments:** [Provide your suggestions to improve clarity.]
+    ...
+**2. Installation Instructions**
+ * **Assessment**: [Your evaluation of the installation instructions.]
+ * **Improvement Suggestions**:
+    * **Original text:** [Quote text related to installation.]
+    * **Improving comments:** [Provide your suggestions.]
+    * **Original text:** [Quote text related to installation.]
+    * **Improving comments:** [Provide your suggestions.]
+    ...
+**3. Usage Instructions**
+ * **Assessment**: [Your evaluation of the usage instructions.]
+ * **Improvement Suggestions**:
+    * **Original text:** [Quote text related to usage.]
+    * **Improving comments:** [Provide your suggestions.]
+    * **Original text:** [Quote text related to usage.]
+    * **Improving comments:** [Provide your suggestions.]
+    ...
+**4. Contributing Guidelines**
+ * **Assessment**: [Your evaluation of the contributing guidelines.]
+ * **Improvement Suggestions**:
+    * **Original text:** [Quote text related to contributions.]
+    * **Improving comments:** [Provide your suggestions.]
+    * **Original text:** [Quote text related to contributions.]
+    * **Improving comments:** [Provide your suggestions.]
+    ...
+**5. License Information**
+ * **Assessment**: [Your evaluation of the license information.]
+ * **Improvement Suggestions**:
+    * **Original text:** [Quote text related to the license.]
+    * **Improving comments:** [Provide your suggestions.]
+    * **Original text:** [Quote text related to the license.]
+    * **Improving comments:** [Provide your suggestions.]
+    ...
+**6. Readability Analysis**
+ * **Flesch Reading Ease**: `{flesch_reading_ease}` (A higher score is better, with 60-70 being easily understood by most adults).
+ * **Flesch-Kincaid Grade Level**: `{flesch_kincaid_grade}` (Represents the US school-grade level needed to understand the text).
+ * **Gunning Fog Index**: `{gunning_fog_index}` (A score above 12 is generally considered too hard for most people).
+ * **SMOG Index**: `{smog_index}` (Estimates the years of education needed to understand the text).
+ * **Assessment**: Based on these scores, evaluate the overall readability and technical complexity of the language used.
+---
+#### If if is a **folder-level** file, use the following criteria instead.
+For each criterion below, provide a brief assessment followed by specific, actionable comments for improvement.
+**1. Folder Description**
+ * **Assessment**: [Your evaluation of whether it Provides a clear **description** of what the folder contains (e.g., modules, scripts, data).]
+ * **Improvement Suggestions**:
+    * **Original text:** [Quote a specific line/section from the README.]
+    * **Improving comments:** [Provide your suggestions to improve clarity.]
+**2. Folder Purpose**
+ * **Assessment**: [Your evaluation of whether it explains the **purpose** or **role** of the components inside this subfolder.]
+ * **Improvement Suggestions**:
+    * **Original text:** [Quote text related to purpose.]
+    * **Improving comments:** [Provide your suggestions.]
+**3. Usage**
+ * **Assessment**: [Your evaluation of whether it includes **usage instructions** specific to this folder (e.g., commands, import paths, input/output files).]
+ * **Improvement Suggestions**:
+    * **Original text:** [Quote text related to usage.]
+    * **Improving comments:** [Provide your suggestions.]
+**4. Readability Analysis**
+ * **Flesch Reading Ease**: `{flesch_reading_ease}` (A higher score is better, with 60-70 being easily understood by most adults).
+ * **Flesch-Kincaid Grade Level**: `{flesch_kincaid_grade}` (Represents the US school-grade level needed to understand the text).
+ * **Gunning Fog Index**: `{gunning_fog_index}` (A score above 12 is generally considered too hard for most people).
+ * **SMOG Index**: `{smog_index}` (Estimates the years of education needed to understand the text).
+ * **Assessment**: Based on these scores, evaluate the overall readability and technical complexity of the language used.
+---
+### Final Report Format
+#### Your output **must exactly match**  the following template:
+**FinalAnswer**
+ * Project-Level README: Yes / No
+ * **Score:** [Poor / Fair / Good / Excellent]
+  * **Key Strengths**: <brief summary of the README's strongest points in 2-3 sentences>
+  * **Overall Improvement Suggestions:**
+    - "Original text snippet 1" - Improving comment 1
+    - "Original text snippet 2" - Improving comment 2
+    - ...
+#### Notes
+* **Project-Level README**: "Yes" if root-level; "No" if folder-level.
+* **Score**: Overall quality rating, could be Poor / Fair / Good / Excellent.
+* **Key Strengths**: Briefly highlight the README's strongest aspects.
+* **Improvement Suggestions**: Provide concrete snippets and suggested improvements.
+---
+### **README path:**
+{readme_path}
+---
+### **README Content:**
+{readme_content}
+"""
+class StructuredEvaluationREADMEResult(BaseModel):
+    available_score: Optional[bool]=Field(description="A boolean value, Is the README accessible and present?")
+    readability_score: Optional[str]=Field(description="A string value, could be `Poor`, `Fair`, `Good`, or `Excellent`")
+    readability_suggestions: Optional[str]=Field(description="Suggestions to improve readability if necessary")
+    project_purpose_score: Optional[bool]=Field(description="A boolean value. Is the project's goal or function clearly stated?")
+    project_purpose_suggestions: Optional[str]=Field(description="Suggestions if not clear")
+    hardware_and_software_spec_score: Optional[str]=Field(description="A string value, could be `Poor`, `Fair`, `Good`, or `Excellent`")
+    hardware_and_software_spec_suggestions: Optional[str]=Field(description="Suggestions if not clear")
+    dependency_score: Optional[str]=Field(description="A string value, could be `Poor`, `Fair`, `Good`, or `Excellent`")
+    dependency_suggestions: Optional[str]=Field(description="Suggestions if dependencies are not clearly stated")
+    license_score: Optional[bool]=Field(description="A boolean value, Are contributor or maintainer details provided?")
+    license_suggestions: Optional[str]=Field(description="Suggestions to improve license information")
+    overall_score: str=Field(description="A overall scroll for the README quality, could be `Poor`, `Fair`, `Good`, or `Excellent`")
+class EvaluationREADMEResult(BaseModel):
+    project_level: Optional[bool]=Field(description="A boolean value specifying if the README file is **project-level** README. TRUE: project-level, FALSE, folder-level")
+    score: Optional[str]=Field(description="An overall score")
+    key_strengths: Optional[str]=Field(description="A string specifying the key strengths of README file.")
+    overall_improvement_suggestions: Optional[list[str]]=Field(description="A list of overall improvement suggestions")
+EvaluationREADMEResultSchema = {
+    "title": "EvaluationREADMEResult",
+    "type": "object",
+    "properties": {
+        "project_level": {
+            "anyOf": [{"type": "boolean"}, {"type": "null"}],
+            "description": "A boolean value specifying if the README file is **project-level** README. TRUE: project-level, FALSE: folder-level.",
+            "title": "Project Level"
+        },
+        "score": {
+            "anyOf": [{"type": "string"}, {"type": "null"}],
+            "description": "An overall score",
+            "title": "Score"
+        },
+        "key_strengths": {
+            "anyOf": [{"type": "string"}, {"type": "null"}],
+            "description": "A string specifying the key strengths of README file.",
+            "title": "Key Strengths",
+        },
+        "overall_improvement_suggestions": {
+            "anyOf": [{"items": {"type": "string"}, "type": "array"}, {"type": "null"}],
+            "description": "A list of improvement suggestions",
+            "title": "Overall Improvement Suggestions"
+        }
+    },
+    "required": ["project_level", "score", "key_strengths", "overall_improvement_suggestions"]
+}
+class EvaluationREADMETask(EvaluationTask):
+    def __init__(
+        self,
+        llm: BaseChatOpenAI,
+        repo_path: str,
+        gitignore_path: str,
+        meta_data: ProjectMetadata | None = None,
+        step_callback: Callable | None = None
+    ):
+        super().__init__(llm, repo_path, gitignore_path, meta_data, step_callback)
+        self.evaluation_name = "README Evaluation"
+    def _structured_evaluate(self, free_readme_evaluations: dict[str, dict]):
+        """ Evaluate README in structure:
+        available: bool
+        readability: score and suggestion
+        project purpose: bool, suggestion
+        hardware and software spec and compatibility description: score and suggestion
+        dependencies clearly stated: score and suggestion
+        license information included: bool and suggestion
+        Code contributor / author information included: bool and suggestion
+        overall score:
+        """
+        total_token_usage = {**DEFAULT_TOKEN_USAGE}
+        if free_readme_evaluations is None:
+            return None, total_token_usage
+        license_path = "LICENSE"
+        license_content = read_file(Path(self.repo_path, license_path))
+        license_summarized_content = summarize_file(
+            llm=self.llm,
+            name=license_path,
+            content=license_content,
+            level=6,
+            summary_instructions="What license is the repository using?",
+        ) if license_content is not None else "N/A"
+        license_path = license_path if license_content is not None else "N/A"
+        structured_readme_evaluations = {}
+        for readme_file in free_readme_evaluations.keys():
+            evaluation = free_readme_evaluations[readme_file]["evaluation"]
+            if not evaluation["project_level"]:
+                continue
+            full_path = Path(self.repo_path, readme_file)
+            readme_content = read_file(full_path)
+            if readme_content is None:
+                logger.error(f"Error in reading file {readme_file}")
+                continue
+            if len(readme_content.strip()) == 0:
+                structured_readme_evaluations[readme_file] = {
+                    "structured_evaluation": StructuredEvaluationREADMEResult(
+                        available_score=False,
+                        readability_score="Poor",
+                        readability_suggestions="No readability provided",
+                        project_purpose_score=False,
+                        project_purpose_suggestions="No project purpose provided",
+                        hardware_and_software_spec_score="Poor",
+                        hardware_and_software_spec_suggestions="No hardware and software spec provided",
+                        dependency_score="Poor",
+                        dependency_suggestions="No dependency provided",
+                        license_score=False,
+                        license_suggestions="No license information",
+                        overall_score="Poor",
+                    ),
+                    "structured_reasoning_process": f"{readme_file} is an empty file.",
+                }
+                continue
+            readability = PyphenReadability()
+            flesch_reading_ease, flesch_kincaid_grade, gunning_fog_index, smog_index, \
+                _, _, _, _, _ = readability.readability_metrics(readme_content)
+            system_prompt = ChatPromptTemplate.from_template(
+                STRUCTURED_EVALUATION_README_SYSTEM_PROMPT
+            ).format(
+                readme_path=readme_file,
+                readme_content=readme_content,
+                license_path=license_path,
+                license_summarized_content=license_summarized_content,
+                flesch_reading_ease=flesch_reading_ease,
+                flesch_kincaid_grade=flesch_kincaid_grade,
+                gunning_fog_index=gunning_fog_index,
+                smog_index=smog_index,
+            )
+            agent = CommonAgentTwoChainSteps(llm=self.llm)
+            response, _, token_usage, reasoning_process = agent.go(
+                system_prompt=system_prompt,
+                instruction_prompt=EVALUATION_INSTRUCTION,
+                schema=StructuredEvaluationREADMEResult,
+            )
+            self.print_step(step_output=f"README: {readme_file} structured evaluation")
+            self.print_step(step_output=reasoning_process)
+            structured_readme_evaluations[readme_file] = {
+                "structured_evaluation": response,
+                "structured_reasoning_process": reasoning_process,
+            }
+            total_token_usage = increase_token_usage(total_token_usage, token_usage)
+        return structured_readme_evaluations, total_token_usage
+    def _free_evaluate(self, files: list[str]):
+        readme_files = files
+        if readme_files is None or len(readme_files) == 0:
+            return None, {**DEFAULT_TOKEN_USAGE}
+        readme_evaluations = {}
+        total_token_usage = {**DEFAULT_TOKEN_USAGE}
+        for readme_file in readme_files:
+            readme_path = Path(self.repo_path, readme_file)
+            readme_content = read_file(readme_path)
+            if readme_content is None:
+                logger.error(f"Error in reading file {readme_file}")
+                continue
+            if len(readme_content.strip()) == 0:
+                readme_evaluations[readme_file] = {
+                    "evaluation": {
+                        "project_level": not "/" in readme_file,
+                        "score": "Poor",
+                        "key_strengths": f"{readme_file} is an empty file.",
+                        "overall_improvement_suggestions": f"{readme_file} is an empty file.",
+                    },
+                    "reasoning_process": f"{readme_file} is an empty file.",
+                }
+                continue
+            readability = PyphenReadability()
+            flesch_reading_ease, flesch_kincaid_grade, gunning_fog_index, smog_index, \
+                _, _, _, _, _ = readability.readability_metrics(readme_content)
+            system_prompt = ChatPromptTemplate.from_template(
+                EVALUATION_README_SYSTEM_PROMPT
+            ).format(
+                readme_content=readme_content,
+                readme_path=readme_file,
+                flesch_reading_ease=flesch_reading_ease,
+                flesch_kincaid_grade=flesch_kincaid_grade,
+                gunning_fog_index=gunning_fog_index,
+                smog_index=smog_index,
+            )
+            # conversation = CommonConversation(llm=self.llm)
+            agent = CommonAgentTwoChainSteps(llm=self.llm)
+            response, _, token_usage, reasoning_process = agent.go(
+                system_prompt=system_prompt,
+                instruction_prompt=EVALUATION_INSTRUCTION,
+                schema=EvaluationREADMEResultSchema,
+            )
+            response = EvaluationREADMEResult(**response)
+            self.print_step(step_output=f"README: {readme_file} free evaluation")
+            self.print_step(step_output=reasoning_process)
+            readme_evaluations[readme_file] = {
+                "evaluation": {
+                    "project_level": response.project_level,
+                    "score": response.score,
+                    "key_strengths": response.key_strengths,
+                    "overall_improvement_suggestions": response.overall_improvement_suggestions,
+                },
+                "reasoning_process": reasoning_process
+            }
+            total_token_usage = increase_token_usage(total_token_usage, token_usage)
+        return readme_evaluations, total_token_usage
+    def _evaluate(self, files: list[str]) -> tuple[dict, dict]:
+        free_readme_evaluations, free_token_usage = self._free_evaluate(files)
+        structured_readme_evaluations, structured_token_usage = self._structured_evaluate(free_readme_evaluations)
+        # combine result
+        combined_evaluations = {}
+        for f in files:
+            if not f in structured_readme_evaluations:
+                combined_evaluations = {**free_readme_evaluations[f]}
+            else:
+                combined_evaluations[f] = {
+                    **free_readme_evaluations[f],
+                    **structured_readme_evaluations[f],
+                }
+        total_token_usage = increase_token_usage(free_token_usage, structured_token_usage)
+        return combined_evaluations, total_token_usage

{bioguider-0.2.10 → bioguider-0.2.11}/bioguider/agents/evaluation_task.py RENAMED Viewed

@@ -2,18 +2,16 @@
 import os
 from pathlib import Path
 import logging
-from typing import Callable, Optional
+from typing import Callable
 from abc import ABC, abstractmethod
 from langchain.prompts import ChatPromptTemplate
 from langchain_openai.chat_models.base import BaseChatOpenAI
-from pydantic import BaseModel, Field
 from bioguider.agents.agent_utils import read_file
+from bioguider.agents.prompt_utils import EVALUATION_INSTRUCTION
 from bioguider.utils.constants import DEFAULT_TOKEN_USAGE, ProjectMetadata
-from .common_agent_2step import CommonAgentTwoSteps, CommonAgentTwoChainSteps
 from .common_agent import CommonConversation
 from ..utils.pyphen_utils import PyphenReadability
-from ..utils.gitignore_checker import GitignoreChecker
 logger = logging.getLogger(__name__)
@@ -198,110 +196,7 @@ class EvaluationTask(ABC):
     def _evaluate(self, files: list[str]) -> tuple[dict, dict]:
         pass
-class EvaluationREADMEResult(BaseModel):
-    project_level: Optional[bool]=Field(description="A boolean value specifying if the README file is **project-level** README. TRUE: project-level, FALSE, folder-level")
-    score: Optional[str]=Field(description="An overall score")
-    key_strengths: Optional[str]=Field(description="A string specifying the key strengths of README file.")
-    overall_improvement_suggestions: Optional[list[str]]=Field(description="A list of overall improvement suggestions")
-EvaluationREADMEResultSchema = {
-    "title": "EvaluationREADMEResult",
-    "type": "object",
-    "properties": {
-        "project_level": {
-            "anyOf": [{"type": "boolean"}, {"type": "null"}],
-            "description": "A boolean value specifying if the README file is **project-level** README. TRUE: project-level, FALSE: folder-level.",
-            "title": "Project Level"
-        },
-        "score": {
-            "anyOf": [{"type": "string"}, {"type": "null"}],
-            "description": "An overall score",
-            "title": "Score"
-        },
-        "key_strengths": {
-            "anyOf": [{"type": "string"}, {"type": "null"}],
-            "description": "A string specifying the key strengths of README file.",
-            "title": "Key Strengths",
-        },
-        "overall_improvement_suggestions": {
-            "anyOf": [{"items": {"type": "string"}, "type": "array"}, {"type": "null"}],
-            "description": "A list of improvement suggestions",
-            "title": "Overall Improvement Suggestions"
-        }
-    },
-    "required": ["project_level", "score", "key_strengths", "overall_improvement_suggestions"]
-}
-class EvaluationREADMETask(EvaluationTask):
-    def __init__(
-        self,
-        llm: BaseChatOpenAI,
-        repo_path: str,
-        gitignore_path: str,
-        meta_data: ProjectMetadata | None = None,
-        step_callback: Callable | None = None
-    ):
-        super().__init__(llm, repo_path, gitignore_path, meta_data, step_callback)
-        self.evaluation_name = "README Evaluation"
-    def _evaluate(self, files: list[str]) -> tuple[dict, dict]:
-        readme_files = files
-        if readme_files is None or len(readme_files) == 0:
-            return None
-        readme_evaluations = {}
-        for readme_file in readme_files:
-            readme_path = Path(self.repo_path, readme_file)
-            readme_content = read_file(readme_path)
-            if readme_content is None:
-                logger.error(f"Error in reading file {readme_file}")
-                continue
-            if len(readme_content.strip()) == 0:
-                readme_evaluations[readme_file] = {
-                    "evaluation": {
-                        "project_level": "/" in readme_file,
-                        "score": "Poor",
-                        "key_strengths": f"{readme_file} is an empty file.",
-                        "overall_improvement_suggestions": f"{readme_file} is an empty file.",
-                    },
-                    "reasoning_process": f"{readme_file} is an empty file.",
-                }
-                continue
-            readability = PyphenReadability()
-            flesch_reading_ease, flesch_kincaid_grade, gunning_fog_index, smog_index, \
-                _, _, _, _, _ = readability.readability_metrics(readme_content)
-            system_prompt = ChatPromptTemplate.from_template(
-                EVALUATION_README_SYSTEM_PROMPT
-            ).format(
-                readme_content=readme_content,
-                readme_path=readme_file,
-                flesch_reading_ease=flesch_reading_ease,
-                flesch_kincaid_grade=flesch_kincaid_grade,
-                gunning_fog_index=gunning_fog_index,
-                smog_index=smog_index,
-            )
-            # conversation = CommonConversation(llm=self.llm)
-            agent = CommonAgentTwoChainSteps(llm=self.llm)
-            response, _, token_usage, reasoning_process = agent.go(
-                system_prompt=system_prompt,
-                instruction_prompt="Before arriving at the conclusion, clearly explain your reasoning step by step. Now, let's begin the evaluation.",
-                schema=EvaluationREADMEResultSchema,
-            )
-            response = EvaluationREADMEResult(**response)
-            self.print_step(step_output=f"README: {readme_file}")
-            self.print_step(step_output=reasoning_process)
-            readme_evaluations[readme_file] = {
-                "evaluation": {
-                    "project_level": response.project_level,
-                    "score": response.score,
-                    "key_strengths": response.key_strengths,
-                    "overall_improvement_suggestions": response.overall_improvement_suggestions,
-                },
-                "reasoning_process": reasoning_process
-            }
-        return readme_evaluations, token_usage
 EVALUATION_TUTORIAL_SYSTEM_PROMPT="""
 You are an expert in software documentation and developer education.
 You are given the content of a tutorial file from a GitHub repository. Your task is to **critically evaluate** the quality of this tutorial based on best practices in technical writing and developer onboarding.
@@ -399,7 +294,7 @@ class EvaluationTutorialTask(EvaluationTask):
             conversation = CommonConversation(llm=self.llm)
             response, token_usage = conversation.generate(
                 system_prompt=system_prompt,
-                instruction_prompt="Before arriving at the conclusion, clearly explain your reasoning step by step. Now, let's begin the evaluation."
+                instruction_prompt=EVALUATION_INSTRUCTION,
             )
             self.print_step(step_output=f"Tutorial: {file}")
             self.print_step(step_output=response)

{bioguider-0.2.10 → bioguider-0.2.11}/bioguider/agents/identification_task.py RENAMED Viewed

@@ -18,6 +18,7 @@ from bioguider.agents.agent_tools import (
 )
 from bioguider.agents.agent_utils import (
     read_directory,
+    try_parse_json_object,
 )
 from bioguider.agents.identification_execute_step import IdentificationExecuteStep
 from bioguider.agents.identification_observe_step import IdentificationObserveStep
@@ -189,13 +190,18 @@ class IdentificationTask(AgentTask):
     def _parse_project_type(self, proj_type_obj: str) -> ProjectTypeEnum:
-        try:
-            json_obj = json.loads(proj_type_obj)
-            proj_type = json_obj["project_type"]
-        except Exception as e:
-            logger.error(e)
-            return ProjectTypeEnum.unknown
-        proj_type = proj_type.strip()
+        proj_type_obj = proj_type_obj.strip()
+        the_obj = try_parse_json_object(proj_type_obj)
+        if not the_obj is None and "project_type" in the_obj:
+            proj_type = the_obj["project_type"]
+        elif proj_type_obj in [
+            ProjectTypeEnum.application.value,
+            ProjectTypeEnum.package.value,
+            ProjectTypeEnum.pipeline.value
+        ]:
+            return ProjectTypeEnum(proj_type_obj)
+        else:
+            proj_type = "unknown"
         if proj_type == "application":
             return ProjectTypeEnum.application
         elif proj_type == "package":
@@ -206,12 +212,19 @@ class IdentificationTask(AgentTask):
             return ProjectTypeEnum.unknown
     def _parse_primary_language(self, language_obj: str) -> PrimaryLanguageEnum:
-        try:
-            json_obj = json.loads(language_obj)
-            language = json_obj["primary_language"]
-        except Exception as e:
-            logger.error(e)
-            return PrimaryLanguageEnum.unknown
+        # try to handle some common errors
+        language_obj  = language_obj.strip()
+        the_obj = try_parse_json_object(language_obj)
+        if not the_obj is None and "primary_language" in the_obj:
+            language = the_obj["primary_language"]
+        elif language_obj in [
+            PrimaryLanguageEnum.python.value,
+            PrimaryLanguageEnum.R.value,
+        ]:
+            return PrimaryLanguageEnum(language_obj)
+        else:
+            language = "unknown"
         language = language.strip()
         if language == "python":
             return PrimaryLanguageEnum.python
@@ -221,15 +234,14 @@ class IdentificationTask(AgentTask):
             return PrimaryLanguageEnum.unknown
     def _parse_meta_data(self, meta_data_obj: str) -> dict:
-        try:
-            json_obj = json.loads(meta_data_obj)
-            meta_data = json_obj
-            return meta_data
-        except Exception as e:
-            logger.error(e)
-            return {
-                "name": "unknown",
-                "description": "unknown",
-                "license": "unknown",
-                "owner": "unknown",
-            }
+        meta_data_obj  = meta_data_obj.strip()
+        the_obj = try_parse_json_object(meta_data_obj)
+        return the_obj if the_obj is not None else {
+            "name": "unknown",
+            "description": "unknown",
+            "license": "unknown",
+            "owner": "unknown",
+        }

{bioguider-0.2.10 → bioguider-0.2.11}/bioguider/agents/prompt_utils.py RENAMED Viewed

@@ -82,6 +82,7 @@ IDENTIFICATION_GOAL_META_DATA = """Identify the following meta data of the repos
 """
 COT_USER_INSTRUCTION = "Do not give the answer immediately. First, explain your reasoning process step by step, then provide the answer."
+EVALUATION_INSTRUCTION="Before arriving at the conclusion, clearly explain your reasoning step by step. Now, let's begin the evaluation."
 class CollectionGoalItemEnum(Enum):
     UserGuide = "User Guide"
@@ -188,3 +189,5 @@ If **any one** of these is present, the document should be classified as Contrib
     },
 }

{bioguider-0.2.10 → bioguider-0.2.11}/bioguider/managers/evaluation_manager.py RENAMED Viewed

@@ -9,7 +9,7 @@ from ..agents.identification_task import IdentificationTask
 from ..rag.rag import RAG
 from ..utils.file_utils import parse_repo_url
 from ..database.summarized_file_db import SummarizedFilesDb
-from ..agents.evaluation_task import EvaluationREADMETask
+from ..agents.evaluation_readme_task import EvaluationREADMETask
 from ..agents.evaluation_installation_task import EvaluationInstallationTask
 from ..agents.collection_task import CollectionTask

{bioguider-0.2.10 → bioguider-0.2.11}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "bioguider"
-version = "0.2.10"
+version = "0.2.11"
 description = "An AI-Powered package to help biomedical developers to generate clear documentation"
 authors = [
     "Cankun Wang <Cankun.Wang@osumc.edu>",