PyPI - bioguider - Versions diffs - 0.2.20__py3-none-any.whl → 0.2.22__py3-none-any.whl - Mend

bioguider 0.2.20py3-none-any.whl → 0.2.22py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of bioguider might be problematic. Click here for more details.

Files changed (32) hide show

bioguider/agents/agent_utils.py +16 -10
bioguider/agents/collection_observe_step.py +7 -2
bioguider/agents/collection_task_utils.py +1 -0
bioguider/agents/consistency_collection_step.py +102 -0
bioguider/agents/consistency_evaluation_task.py +57 -0
bioguider/agents/consistency_evaluation_task_utils.py +14 -0
bioguider/agents/consistency_observe_step.py +109 -0
bioguider/agents/consistency_query_step.py +74 -0
bioguider/agents/evaluation_task.py +0 -110
bioguider/agents/evaluation_tutorial_task.py +156 -0
bioguider/agents/evaluation_tutorial_task_prompts.py +114 -0
bioguider/agents/evaluation_userguide_task.py +13 -43
bioguider/agents/prompt_utils.py +15 -2
bioguider/database/code_structure_db.py +20 -9
bioguider/database/summarized_file_db.py +6 -3
bioguider/managers/evaluation_manager.py +16 -2
bioguider/rag/data_pipeline.py +1 -1
bioguider/utils/code_structure_builder.py +15 -8
bioguider/utils/constants.py +12 -12
bioguider/utils/notebook_utils.py +117 -0
bioguider/utils/{file_handler.py → python_file_handler.py} +1 -1
bioguider/utils/r_file_handler.py +549 -0
bioguider/utils/utils.py +34 -1
{bioguider-0.2.20.dist-info → bioguider-0.2.22.dist-info}/METADATA +1 -1
{bioguider-0.2.20.dist-info → bioguider-0.2.22.dist-info}/RECORD +27 -23
bioguider/agents/consistency_collection_execute_step.py +0 -152
bioguider/agents/consistency_collection_observe_step.py +0 -128
bioguider/agents/consistency_collection_plan_step.py +0 -128
bioguider/agents/consistency_collection_task.py +0 -109
bioguider/agents/consistency_collection_task_utils.py +0 -137
{bioguider-0.2.20.dist-info → bioguider-0.2.22.dist-info}/LICENSE +0 -0
{bioguider-0.2.20.dist-info → bioguider-0.2.22.dist-info}/WHEEL +0 -0

bioguider/agents/evaluation_tutorial_task.py ADDED Viewed

@@ -0,0 +1,156 @@
+import json
+from pathlib import Path
+from typing import Callable
+from langchain.prompts import ChatPromptTemplate
+from langchain_openai.chat_models.base import BaseChatOpenAI
+from pydantic import BaseModel, Field
+import logging
+from bioguider.agents.agent_utils import read_file
+from bioguider.agents.common_agent_2step import CommonAgentTwoSteps
+from bioguider.agents.consistency_evaluation_task import ConsistencyEvaluationResult, ConsistencyEvaluationTask
+from bioguider.agents.evaluation_task import EvaluationTask
+from bioguider.agents.collection_task import CollectionTask
+from bioguider.agents.evaluation_tutorial_task_prompts import INDIVIDUAL_TUTORIAL_EVALUATION_SYSTEM_PROMPT
+from bioguider.agents.prompt_utils import CollectionGoalItemEnum
+from bioguider.utils.constants import DEFAULT_TOKEN_USAGE, ProjectMetadata
+from bioguider.utils.notebook_utils import extract_markdown_from_notebook, strip_notebook_to_code_and_markdown
+from bioguider.utils.pyphen_utils import PyphenReadability
+from bioguider.utils.utils import increase_token_usage
+logger = logging.getLogger(__name__)
+class TutorialEvaluationResult(BaseModel):
+    overall_score: str=Field(description="A string value, could be `Poor`, `Fair`, `Good`, or `Excellent`")
+    overall_key_strengths: str=Field(description="A string value, the key strengths of the tutorial")
+    overall_improvement_suggestions: str=Field(description="Suggestions to improve the overall score if necessary")
+    readability_score: str=Field(description="A string value, could be 'Poor', 'Fair', 'Good', or 'Excellent'")
+    readability_suggestions: str=Field(description="Suggestions to improve readability if necessary")
+    setup_and_dependencies_score: str=Field(description="A string value, could be 'Poor', 'Fair', 'Good', or 'Excellent'")
+    setup_and_dependencies_suggestions: str=Field(description="Suggestions to improve setup and dependencies if necessary")
+    reproducibility_score: str=Field(description="A string value, could be 'Poor', 'Fair', 'Good', or 'Excellent'")
+    reproducibility_suggestions: str=Field(description="Suggestions to improve reproducibility if necessary")
+    structure_and_navigation_score: str=Field(description="A string value, could be 'Poor', 'Fair', 'Good', or 'Excellent'")
+    structure_and_navigation_suggestions: str=Field(description="Suggestions to improve structure and navigation if necessary")
+    executable_code_quality_score: str=Field(description="A string value, could be 'Poor', 'Fair', 'Good', or 'Excellent'")
+    executable_code_quality_suggestions: str=Field(description="Suggestions to improve executable code quality if necessary")
+    result_verification_score: str=Field(description="A string value, could be 'Poor', 'Fair', 'Good', or 'Excellent'")
+    result_verification_suggestions: str=Field(description="Suggestions to improve result verification if necessary")
+    performance_and_resource_notes_score: str=Field(description="A string value, could be 'Poor', 'Fair', 'Good', or 'Excellent'")
+    performance_and_resource_notes_suggestions: str=Field(description="Suggestions to improve performance and resource notes if necessary")
+class IndividualTutorialEvaluationResult(BaseModel):
+    tutorial_evaluation: TutorialEvaluationResult | None=Field(description="The evaluation result of the tutorial")
+    consistency_evaluation: ConsistencyEvaluationResult | None=Field(description="The evaluation result of the consistency of the tutorial")
+class EvaluationTutorialTask(EvaluationTask):
+    def __init__(
+        self,
+        llm: BaseChatOpenAI,
+        repo_path: str,
+        gitignore_path: str,
+        meta_data: ProjectMetadata | None = None,
+        step_callback: Callable | None = None,
+        summarized_files_db = None,
+        code_structure_db = None,
+    ):
+        super().__init__(llm, repo_path, gitignore_path, meta_data, step_callback, summarized_files_db)
+        self.evaluation_name = "Tutorial Evaluation"
+        self.code_structure_db = code_structure_db
+    def _collect_files(self):
+        task = CollectionTask(
+            llm=self.llm,
+            step_callback=self.step_callback,
+            summarized_files_db=self.summarized_files_db,
+        )
+        task.compile(
+            repo_path=self.repo_path,
+            gitignore_path=Path(self.repo_path, ".gitignore"),
+            goal_item=CollectionGoalItemEnum.Tutorial.name,
+        )
+        files = task.collect()
+        return files
+    def _evaluate_consistency(self, file: str) -> ConsistencyEvaluationResult:
+        consistency_evaluation_task = ConsistencyEvaluationTask(
+            llm=self.llm,
+            code_structure_db=self.code_structure_db,
+            step_callback=self.step_callback,
+        )
+        file = file.strip()
+        with open(Path(self.repo_path, file), "r") as f:
+            tutorial_content = f.read()
+        return consistency_evaluation_task.evaluate(
+            domain="tutorial/vignette",
+            documentation=tutorial_content,
+        )
+    def _evaluate_consistency_on_content(self, content: str) -> ConsistencyEvaluationResult:
+        consistency_evaluation_task = ConsistencyEvaluationTask(
+            llm=self.llm,
+            code_structure_db=self.code_structure_db,
+            step_callback=self.step_callback,
+        )
+        return consistency_evaluation_task.evaluate(
+            domain="tutorial/vignette",
+            documentation=content,
+        ), {**DEFAULT_TOKEN_USAGE}
+    def _evaluate_individual_tutorial(self, file: str) -> tuple[IndividualTutorialEvaluationResult | None, dict]:
+        content = read_file(Path(self.repo_path, file))
+        if content is None:
+            logger.error(f"Error in reading file {file}")
+            return None, {**DEFAULT_TOKEN_USAGE}
+        if file.endswith(".ipynb"):
+            readability_content = extract_markdown_from_notebook(Path(self.repo_path, file))
+            content = json.dumps(strip_notebook_to_code_and_markdown(Path(self.repo_path, file)))
+        else:
+            readability_content = content
+        readability = PyphenReadability()
+        flesch_reading_ease, flesch_kincaid_grade, gunning_fog_index, smog_index, \
+                _, _, _, _, _ = readability.readability_metrics(readability_content)
+        system_prompt = ChatPromptTemplate.from_template(
+            INDIVIDUAL_TUTORIAL_EVALUATION_SYSTEM_PROMPT
+        ).format(
+            flesch_reading_ease=flesch_reading_ease,
+            flesch_kincaid_grade=flesch_kincaid_grade,
+            gunning_fog_index=gunning_fog_index,
+            smog_index=smog_index,
+            tutorial_file_content=readability_content,
+        )
+        agent = CommonAgentTwoSteps(llm=self.llm)
+        res, _, token_usage, reasoning_process = agent.go(
+            system_prompt=system_prompt,
+            instruction_prompt="Now, let's begin the tutorial evaluation.",
+            schema=TutorialEvaluationResult,
+        )
+        res: TutorialEvaluationResult = res
+        consistency_evaluation_result, _temp_token_usage = self._evaluate_consistency_on_content(content)
+        if consistency_evaluation_result is None:
+            # No sufficient information to evaluate the consistency of the tutorial
+            consistency_evaluation_result = ConsistencyEvaluationResult(
+                consistency_score="N/A",
+                consistency_assessment="No sufficient information to evaluate the consistency of the tutorial",
+                consistency_development=[],
+                consistency_strengths=[],
+            )
+        return IndividualTutorialEvaluationResult(
+            tutorial_evaluation=res,
+            consistency_evaluation=consistency_evaluation_result,
+        ), token_usage
+    def _evaluate(self, files: list[str] | None = None) -> tuple[dict[str, IndividualTutorialEvaluationResult] | None, dict, list[str]]:
+        total_token_usage = {**DEFAULT_TOKEN_USAGE}
+        tutorial_evaluation_results = {}
+        for file in files:
+            tutorial_evaluation_result, token_usage = self._evaluate_individual_tutorial(file)
+            total_token_usage = increase_token_usage(total_token_usage, token_usage)
+            tutorial_evaluation_results[file] = tutorial_evaluation_result
+        return tutorial_evaluation_results, total_token_usage, files

bioguider/agents/evaluation_tutorial_task_prompts.py ADDED Viewed

@@ -0,0 +1,114 @@
+INDIVIDUAL_TUTORIAL_EVALUATION_SYSTEM_PROMPT = """
+You are an expert in evaluating the quality of tutorials in software repositories.
+Your task is to analyze the provided tutorial file and generate a structured quality assessment based on the following criteria.
+---
+### **Evaluation Criteria**
+1. **Readability**:
+   * **Flesch Reading Ease**: `{flesch_reading_ease}` (A higher score is better, with 60-70 being easily understood by most adults).
+   * **Flesch-Kincaid Grade Level**: `{flesch_kincaid_grade}` (Represents the US school-grade level needed to understand the text).
+   * **Gunning Fog Index**: `{gunning_fog_index}` (A score above 12 is generally considered too hard for most people).
+   * **SMOG Index**: `{smog_index}` (Estimates the years of education needed to understand the text).
+   * **Assessment**: Based on these scores, evaluate the overall readability and technical complexity of the language used.
+2. **Coverage**:
+   * **Assessment**: [Your evaluation of whether it covers all major steps needed to get started, and dependencies, prerequisites, setup steps, and example usage.]
+   * **Improvement Suggestions**:
+      * **Original text:** [Quote a specific line/section from the tutorial.]
+      * **Improving comments:** [Provide your suggestions to improve clarity.]
+3. **Reproducibility**:
+   * **Assessment**: [Your evaluation of whether it provides a clear **description** of reproducibility]
+   * **Improvement Suggestions**:
+      * **Original text:** [Quote a specific line/section from the tutorial.]
+      * **Improving comments:** [Provide your suggestions to improve clarity.]
+4. **Structure & Navigation**:
+   * **Assessment**: [Your evaluation of whether it provides logical sections (e.g., intro -> setup -> steps -> results -> next), TOC/anchors, estimated time, etc.]
+   * **Improvement Suggestions**:
+      * **Original text:** [Quote a specific line/section from the tutorial.]
+      * **Improving comments:** [Provide your suggestions to improve clarity.]
+5. **Executable Code Quality**:
+   * **Assessment**: [Your evaluation on whether the code snippets are executable and functional, idiomatic, no hard-coded paths, etc.]
+   * **Improvement Suggestions**:
+      * **Original text:** [Quote a specific line/section from the tutorial.]
+      * **Improving comments:** [Provide your suggestions to improve clarity.]
+6. **Result Verification**:
+   * **Assessment**: [Your evaluation on expected outputs shown (figures/tables/metrics), acceptance criteria, etc.]
+   * **Improvement Suggestions**:
+      * **Original text:** [Quote a specific line/section from the tutorial.]
+      * **Improving comments:** [Provide your suggestions to improve clarity.]
+7. **Performance & Resource Notes**:
+   * **Assessment**: [Your evaluation on performance and resource notes, e.g., CPU/GPU usage, memory usage, runtime estimates, small "lite" path provided.]
+   * **Improvement Suggestions**:
+      * **Original text:** [Quote a specific line/section from the tutorial.]
+      * **Improving comments:** [Provide your suggestions to improve clarity.]
+---
+### **Final Report Ouput**
+Your final report must **exactly match** the following format. Do not add or omit any sections.
+**FinalAnswer**
+* **Overall Score:** [Poor / Fair / Good / Excellent]
+* **Overall Key Strengths**: <brief summary of the Tutorial's strongest points in 2-3 sentences>
+* **Overall Improvement Suggestions:**
+  - "Original text snippet 1" - Improving comment 1
+  - "Original text snippet 2" - Improving comment 2
+  - ...
+* **Readability Score:** [Poor / Fair / Good / Excellent]
+* **Readability Key Strengths**: <brief summary of the Tutorial's strongest points in 2-3 sentences>
+* **Readability Improvement Suggestions:**
+  - "Original text snippet 1" - Improving comment 1
+  - "Original text snippet 2" - Improving comment 2
+  - ...
+* **Coverage Score:** [Poor / Fair / Good / Excellent]
+* **Coverage Key Strengths**: <brief summary of the Tutorial's strongest points in 2-3 sentences>
+* **Coverage Improvement Suggestions:**
+  - "Original text snippet 1" - Improving comment 1
+  - "Original text snippet 2" - Improving comment 2
+  - ...
+* **Reproducibility Score:** [Poor / Fair / Good / Excellent]
+* **Reproducibility Key Strengths**: <brief summary of the Tutorial's strongest points in 2-3 sentences>
+* **Reproducibility Improvement Suggestions:**
+  - "Original text snippet 1" - Improving comment 1
+  - "Original text snippet 2" - Improving comment 2
+  - ...
+* **Structure & Navigation Score:** [Poor / Fair / Good / Excellent]
+* **Structure & Navigation Key Strengths**: <brief summary of the Tutorial's strongest points in 2-3 sentences>
+* **Structure & Navigation Improvement Suggestions:**
+  - "Original text snippet 1" - Improving comment 1
+  - "Original text snippet 2" - Improving comment 2
+  - ...
+* **Executable Code Quality Score:** [Poor / Fair / Good / Excellent]
+* **Executable Code Quality Key Strengths**: <brief summary of the Tutorial's strongest points in 2-3 sentences>
+* **Executable Code Quality Improvement Suggestions:**
+  - "Original text snippet 1" - Improving comment 1
+  - "Original text snippet 2" - Improving comment 2
+  - ...
+* **Result Verification Score:** [Poor / Fair / Good / Excellent]
+* **Result Verification Key Strengths**: <brief summary of the Tutorial's strongest points in 2-3 sentences>
+* **Result Verification Improvement Suggestions:**
+  - "Original text snippet 1" - Improving comment 1
+  - "Original text snippet 2" - Improving comment 2
+  - ...
+* **Performance & Resource Notes Score:** [Poor / Fair / Good / Excellent]
+* **Performance & Resource Notes Key Strengths**: <brief summary of the Tutorial's strongest points in 2-3 sentences>
+* **Performance & Resource Notes Improvement Suggestions:**
+  - "Original text snippet 1" - Improving comment 1
+  - "Original text snippet 2" - Improving comment 2
+  - ...
+---
+### **Tutorial File Content:**
+{tutorial_file_content}
+---
+"""

bioguider/agents/evaluation_userguide_task.py CHANGED Viewed

@@ -1,36 +1,24 @@
-import os
 from pathlib import Path
 import logging
 from langchain.prompts import ChatPromptTemplate
-from markdownify import markdownify as md
 from pydantic import BaseModel, Field
 from bioguider.agents.agent_utils import read_file
 from bioguider.agents.collection_task import CollectionTask
-from bioguider.agents.prompt_utils import EVALUATION_INSTRUCTION, CollectionGoalItemEnum
+from bioguider.agents.common_agent_2step import CommonAgentTwoSteps
+from bioguider.agents.consistency_evaluation_task import ConsistencyEvaluationTask, ConsistencyEvaluationResult
+from bioguider.agents.prompt_utils import CollectionGoalItemEnum
 from bioguider.utils.constants import (
     DEFAULT_TOKEN_USAGE,
-    ProjectMetadata,
-    StructuredEvaluationInstallationResult,
-    FreeEvaluationInstallationResult,
-    EvaluationInstallationResult,
 )
-from bioguider.rag.data_pipeline import count_tokens
-from .common_agent_2step import CommonAgentTwoSteps, CommonAgentTwoChainSteps
 from ..utils.pyphen_utils import PyphenReadability
 from .evaluation_task import EvaluationTask
 from .agent_utils import read_file
 from bioguider.utils.utils import increase_token_usage
-from .evaluation_userguide_prompts import CONSISTENCY_EVAL_SYSTEM_PROMPT, INDIVIDUAL_USERGUIDE_EVALUATION_SYSTEM_PROMPT
-from .consistency_collection_task import ConsistencyCollectionTask
+from .evaluation_userguide_prompts import INDIVIDUAL_USERGUIDE_EVALUATION_SYSTEM_PROMPT
-class ConsistencyEvaluationResult(BaseModel):
-    consistency_score: str=Field(description="A string value, could be `Poor`, `Fair`, `Good`, or `Excellent`")
-    consistency_assessment: str=Field(description="Your evaluation of whether the user guide/API documentation is consistent with the code definitions")
-    consistency_development: list[str]=Field(description="A list of inconsistent function/class/method name and inconsistent docstring")
-    consistency_strengths: list[str]=Field(description="A list of strengths of the user guide/API documentation on consistency")
 class UserGuideEvaluationResult(BaseModel):
     overall_score: str=Field(description="A string value, could be `Poor`, `Fair`, `Good`, or `Excellent`")
@@ -78,39 +66,19 @@ class EvaluationUserGuideTask(EvaluationTask):
         files = task.collect()
         return files
-    def _evaluate_consistency(self, file: str) -> tuple[EvaluationInstallationResult | None, dict, list[str]]:
-        consistency_collect_task = ConsistencyCollectionTask(
+    def _evaluate_consistency(self, file: str) -> ConsistencyEvaluationResult:
+        consistency_evaluation_task = ConsistencyEvaluationTask(
             llm=self.llm,
             code_structure_db=self.code_structure_db,
             step_callback=self.step_callback,
         )
-        consistency_collect_task.compile(repo_path=self.repo_path, gitignore_path=Path(self.repo_path, ".gitignore"))
+        file = file.strip()
         with open(Path(self.repo_path, file), "r") as f:
             user_guide_api_documentation = f.read()
-        res, code_definitions = consistency_collect_task.collect(user_guide_api_documentation)
-        if not res:
-            # No sufficient information to evaluate the consistency of the user guide/API documentation
-            return None, {**DEFAULT_TOKEN_USAGE}
-        system_prompt = ChatPromptTemplate.from_template(
-            CONSISTENCY_EVAL_SYSTEM_PROMPT
-        ).format(
-            user_guide_api_documentation=user_guide_api_documentation,
-            code_definitions=code_definitions,
-        )
-        agent = CommonAgentTwoSteps(llm=self.llm)
-        res, _, token_usage, reasoning_process = agent.go(
-            system_prompt=system_prompt,
-            instruction_prompt="Now, let's begin the consistency evaluation step.",
-            schema=ConsistencyEvaluationResult,
-        )
-        res: ConsistencyEvaluationResult = res
-        self.print_step(step_output=f"Consistency Evaluation Result: {res}")
-        self.print_step(step_output=f"Consistency Evaluation Reasoning Process: {reasoning_process}")
-        self.print_step(token_usage=token_usage)
-        return res, token_usage
+        return consistency_evaluation_task.evaluate(
+            domain="user guide/API",
+            documentation=user_guide_api_documentation,
+        ), {**DEFAULT_TOKEN_USAGE}
     def _evaluate_individual_userguide(self, file: str) -> tuple[IndividualUserGuideEvaluationResult | None, dict]:
         content = read_file(Path(self.repo_path, file))
@@ -157,6 +125,8 @@ class EvaluationUserGuideTask(EvaluationTask):
         total_token_usage = {**DEFAULT_TOKEN_USAGE}
         user_guide_evaluation_results = {}
         for file in files:
+            if file.endswith(".py") or file.endswith(".R"):
+                continue
             user_guide_evaluation_result, token_usage = self._evaluate_individual_userguide(file)
             total_token_usage = increase_token_usage(total_token_usage, token_usage)
             user_guide_evaluation_results[file] = user_guide_evaluation_result

bioguider/agents/prompt_utils.py CHANGED Viewed

@@ -104,6 +104,7 @@ COLLECTION_PROMPTS = {
         "goal_item": "User Guide",
         "related_file_description": """A document qualifies as a **User Guide** if it includes **at least one** of the following elements.
 If **any one** of these is present, the document should be classified as a User Guide — full coverage is **not required**:
+ - **Not source code or a script** (*.py, *.R) or notebook (*.ipynb, *.Rmd) that is not intended for end-user interaction.
  - Document **functions, methods, or classes**
  - Describe **input parameters, return values**, and **usage syntax**
  - Include **technical guidance** for using specific APIs
@@ -117,9 +118,12 @@ If **any one** of these is present, the document should be classified as a User
  - Code Walkthroughs: Detailed explanations of code snippets in a tutorial format.
 **Do not** classify the document as a User Guide if it is souce code or a script (*.py, *.R) that is not intended for end-user interaction.
  - You can include directory names if all files in the directory are relevant to the goal item.""",
-        "plan_important_instructions": """ - **Do not** classify the document as a User Guide if it is source code or a script (*.py, *.R) that is not intended for end-user interaction.
+        "plan_important_instructions": """ - **Do not** try to summarize or read the content of any source code or script (*.py, *.R) or notebook (*.ipynb, *.Rmd) that is not intended for end-user interaction.
+ - **Do not** classify the document as a User Guide if it is source code or a script (*.py, *.R) that is not intended for end-user interaction.
  - **Do not** classify the document as a User Guide if it is a notebook (*.ipynb, *.Rmd) that is not intended for end-user interaction.
- - You plan **must not** include any source code or script (*.py, *.R) or notebook (*.ipynb, *.Rmd) that is not intended for end-user interaction."""
+ - You plan **must not** include any source code or script (*.py, *.R) or notebook (*.ipynb, *.Rmd) that is not intended for end-user interaction.""",
+        "observe_important_instructions": """ - **Do not** classify the document as a User Guide if it is source code or a script (*.py, *.R) that is not intended for end-user interaction.
+ - **Do not** include any source code or script (*.py, *.R) or notebook (*.ipynb, *.Rmd) in the final answer that is not intended for end-user interaction."""
     },
     "Tutorial": {
         "goal_item": "Tutorials & Vignettes",
@@ -131,6 +135,15 @@ If **any one** of these is present, the document should be classified as a User
  - Interactive Elements: Features that allow users to experiment with the code in real-time, such as Jupyter notebooks or R Markdown files.
  - Use Cases: Real-world applications or scenarios where the software can be applied effectively.
  - You can include directory names if all files in the directory are relevant to the goal item.
+**Important instructions**:
+ - **Do not** use **read_file_tool, summarize_file_tool, check_file_related_tool** on the python/R notebook files **(.ipynb, .Rmd)**, as they are too big to read.
+""",
+        "plan_important_instructions": """ - **Do not** use **read_file_tool, summarize_file_tool, check_file_related_tool** on the python/R notebook files **(.ipynb, .Rmd)**, as they are too big to read.
+  - For python/R notebook files **(.ipynb, .Rmd)**, **only infer** if it is the tutorial/vignette from the file name and avoid reading the content of the file.
+""",
+        "observe_important_instructions": """ - **Do not** use **read_file_tool, summarize_file_tool, check_file_related_tool** on the python/R notebook files **(.ipynb, .Rmd)**, as they are too big to read.
+  - For python/R notebook files **(.ipynb, .Rmd)**, **only infer** if it is the tutorial/vignette from the file name and avoid reading the content of the file.
+  - **Do not** include any binary files (e.g., `.png`, `.jpg`, `.jpeg`, `.gif`, `.svg`) in the final answer.s
 """,
     },
     "DockerGeneration": {

bioguider/database/code_structure_db.py CHANGED Viewed

@@ -123,15 +123,8 @@ class CodeStructureDb:
                 os.makedirs(db_path, exist_ok=True)
             except Exception as e:
                 logging.error(e)
-                return False
-        db_path = os.path.join(db_path, "databases")
-        # Ensure the local path exists
-        try:
-            os.makedirs(db_path, exist_ok=True)
-        except Exception as e:
-            logging.error(e)
-            return False
-        db_path = os.path.join(db_path, f"{self.author}_{self.repo_name}.db")
+                return False
+        db_path = os.path.join(db_path, f"{self.author}_{self.repo_name}_code_structure.db")
         if not os.path.exists(db_path):
             try:
                 with open(db_path, "w"):
@@ -142,6 +135,24 @@ class CodeStructureDb:
         self.connection = sqlite3.connect(db_path)
         return True
+    def is_database_built(self) -> bool:
+        res = self._connect_to_db()
+        if not res:
+            return False
+        res = self._ensure_tables()
+        if not res:
+            return False
+        try:
+            cursor = self.connection.cursor()
+            cursor.execute(f"SELECT * FROM {CODE_STRUCTURE_TABLE_NAME}")
+            return cursor.fetchone() is not None
+        except Exception as e:
+            logging.error(e)
+            return False
+        finally:
+            self.connection.close()
+            self.connection = None
     def insert_code_structure(
         self,
         name: str,

bioguider/database/summarized_file_db.py CHANGED Viewed

@@ -38,10 +38,11 @@ where file_path = ? and instruction = ? and summarize_level = ? and summarize_pr
 """
 class SummarizedFilesDb:
-    def __init__(self, author: str, repo_name: str):
+    def __init__(self, author: str, repo_name: str, data_folder: str = None):
         self.author = author
         self.repo_name = repo_name
         self.connection: Connection | None = None
+        self.data_folder = data_folder
     def _ensure_tables(self) -> bool:
         if self.connection is None:
@@ -60,7 +61,9 @@ class SummarizedFilesDb:
     def _connect_to_db(self) -> bool:
         if self.connection is not None:
             return True
-        db_path = os.environ.get("DATA_FOLDER", "./data")
+        db_path = self.data_folder
+        if db_path is None:
+            db_path = os.environ.get("DATA_FOLDER", "./data")
         db_path = os.path.join(db_path, "databases")
         # Ensure the local path exists
         try:
@@ -68,7 +71,7 @@ class SummarizedFilesDb:
         except Exception as e:
             logging.error(e)
             return False
-        db_path = os.path.join(db_path, f"{self.author}_{self.repo_name}.db")
+        db_path = os.path.join(db_path, f"{self.author}_{self.repo_name}_summarized_file.db")
         if not os.path.exists(db_path):
             try:
                 with open(db_path, "w"):

bioguider/managers/evaluation_manager.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import os
 from pathlib import Path
+from bioguider.agents.evaluation_tutorial_task import EvaluationTutorialTask
 from bioguider.agents.evaluation_userguide_task import EvaluationUserGuideTask
 from bioguider.agents.prompt_utils import CollectionGoalItemEnum
 from bioguider.database.code_structure_db import CodeStructureDb
@@ -35,8 +36,8 @@ class EvaluationManager:
         self.summary_file_db = SummarizedFilesDb(author, repo_name)
         self.code_structure_db = CodeStructureDb(author, repo_name)
         code_structure_builder = CodeStructureBuilder(
-            repo_path=repo_url,
-            gitignore_path=Path(repo_url, ".gitignore"),
+            repo_path=self.rag.repo_dir,
+            gitignore_path=Path(self.rag.repo_dir, ".gitignore"),
             code_structure_db=self.code_structure_db
         )
         code_structure_builder.build_code_structure()
@@ -142,6 +143,19 @@ class EvaluationManager:
         )
         evaluation, files = evaluation_task.evaluate()
         return evaluation, files
+    def evaluate_tutorial(self):
+        evaluation_task = EvaluationTutorialTask(
+            llm=self.llm,
+            repo_path=self.rag.repo_dir,
+            gitignore_path=Path(self.rag.repo_dir, ".gitignore"),
+            meta_data=self.project_metadata,
+            step_callback=self.step_callback,
+            summarized_files_db=self.summary_file_db,
+            code_structure_db=self.code_structure_db,
+        )
+        evaluation, files = evaluation_task.evaluate()
+        return evaluation, files

bioguider/rag/data_pipeline.py CHANGED Viewed

@@ -91,7 +91,7 @@ def download_repo(repo_url: str, local_path: str, access_token: str = None):
         logger.info(f"Cloning repository from {repo_url} to {local_path}")
         # We use repo_url in the log to avoid exposing the token in logs
         result = subprocess.run(
-            ["git", "clone", clone_url, local_path],
+            ["git", "clone", "--recurse-submodules", clone_url, local_path],
             check=True,
             stdout=subprocess.PIPE,
             stderr=subprocess.PIPE,

bioguider/utils/code_structure_builder.py CHANGED Viewed

@@ -1,8 +1,10 @@
 from pathlib import Path
 import logging
+from bioguider.utils.r_file_handler import RFileHandler
 from .gitignore_checker import GitignoreChecker
-from .file_handler import FileHandler
+from .python_file_handler import PythonFileHandler
 from ..database.code_structure_db import CodeStructureDb
 logger = logging.getLogger(__name__)
@@ -10,22 +12,27 @@ logger = logging.getLogger(__name__)
 class CodeStructureBuilder:
     def __init__(
         self,
-        repo_path: str,
-        gitignore_path: str,
+        repo_path: str | Path,
+        gitignore_path: str | Path,
         code_structure_db: CodeStructureDb,
     ):
-        self.repo_path = repo_path
-        self.gitignore_checker = GitignoreChecker(repo_path, gitignore_path)
-        self.file_handler = FileHandler(repo_path)
+        self.repo_path = str(repo_path)
+        self.gitignore_checker = GitignoreChecker(repo_path, str(gitignore_path))
+        self.file_handler = PythonFileHandler(repo_path)
         self.code_structure_db = code_structure_db
     def build_code_structure(self):
+        if self.code_structure_db.is_database_built():
+            return
         files = self.gitignore_checker.check_files_and_folders()
         for file in files:
-            if not file.endswith(".py"):
+            if not file.endswith(".py") and not file.endswith(".R"):
                 continue
             logger.info(f"Building code structure for {file}")
-            file_handler = FileHandler(Path(self.repo_path) / file)
+            if file.endswith(".py"):
+                file_handler = PythonFileHandler(Path(self.repo_path) / file)
+            else:
+                file_handler = RFileHandler(Path(self.repo_path) / file)
             functions_and_classes = file_handler.get_functions_and_classes()
             # fixme: currently, we don't extract reference graph for each function or class
             for function_or_class in functions_and_classes:

bioguider/utils/constants.py CHANGED Viewed

@@ -119,15 +119,15 @@ class DemoInstructionsResult(BaseModel):
     expected_output_description: Optional[bool] = Field(description="A boolean value. Does it provide the description of expected output?")
 class EvaluationSubmissionRequirementsResult(BaseModel):
-    compiled_standalone_software: bool
-    source_code: bool
-    demo_dataset: bool
-    run_on_data_instruction: bool
-    run_on_custom_instruction: bool
-    expected_output_description: bool
-    complete_readme: bool
-    software_dependency: bool
-    install_tutorial: bool
-    license: bool
-    hardware_requirements: bool
-    compatible_os: bool
+    compiled_standalone_software: bool | None
+    source_code: bool | None
+    demo_dataset: bool | None
+    run_on_data_instruction: bool | None
+    run_on_custom_instruction: bool | None
+    expected_output_description: bool | None
+    complete_readme: bool | None
+    software_dependency: bool | None
+    install_tutorial: bool | None
+    license: bool | None
+    hardware_requirements: bool | None
+    compatible_os: bool | None

bioguider 0.2.20__py3-none-any.whl → 0.2.22__py3-none-any.whl

Potentially problematic release.

bioguider 0.2.20py3-none-any.whl → 0.2.22py3-none-any.whl