PyPI - bioguider - Versions diffs - 0.2.21__tar.gz → 0.2.22__tar.gz - Mend

bioguider 0.2.21tar.gz → 0.2.22tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of bioguider might be problematic. Click here for more details.

Files changed (81) hide show

{bioguider-0.2.21 → bioguider-0.2.22}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: bioguider
-Version: 0.2.21
+Version: 0.2.22
 Summary: An AI-Powered package to help biomedical developers to generate clear documentation
 License: MIT
 Author: Cankun Wang

{bioguider-0.2.21 → bioguider-0.2.22}/bioguider/agents/consistency_collection_step.py RENAMED Viewed

@@ -12,12 +12,12 @@ from bioguider.agents.peo_common_step import PEOCommonStep
 CONSISTANCY_COLLECTION_SYSTEM_PROMPT = """
 ### **Goal**
 You are an expert developer specializing in the biomedical domain.
-You will be given a user guide/API documentation. Your task is to collect all the functions, classes, and methods that the user guide/API documentation mentions.
+You will be given a {domain} documentation. Your task is to collect all the functions, classes, and methods that the {domain} documentation mentions.
 ---
-### **Input User Guide/API Documentation**
-{user_guide_api_documentation}
+### **Input {domain} Documentation**
+{documentation}
 ### **Output Format**
 The collected functions, classes, and methods **must exactly match** the following format, **do not** make up anything:
@@ -52,12 +52,12 @@ parent: CommonAgent
 """
 class ConsistencyCollectionResult(BaseModel):
-    functions_and_classes: list[dict] = Field(description="A list of functions and classes that the user guide/API documentation mentions")
+    functions_and_classes: list[dict] = Field(description="A list of functions and classes that the documentation mentions")
 ConsistencyCollectionResultJsonSchema = {
   "properties": {
     "functions_and_classes": {
-      "description": "A list of functions and classes that the user guide/API documentation mentions",
+      "description": "A list of functions and classes that the documentation mentions",
       "items": {
         "type": "object"
       },
@@ -78,9 +78,11 @@ class ConsistencyCollectionStep(PEOCommonStep):
         self.step_name = "Consistency Collection Step"
     def _prepare_system_prompt(self, state: ConsistencyEvaluationState) -> str:
-        user_guide_api_documentation = state["user_guide_api_documentation"]
+        documentation = state["documentation"]
+        domain = state["domain"]
         return ChatPromptTemplate.from_template(CONSISTANCY_COLLECTION_SYSTEM_PROMPT).format(
-            user_guide_api_documentation=user_guide_api_documentation,
+            domain=domain,
+            documentation=documentation,
         )
     def _execute_directly(self, state: ConsistencyEvaluationState) -> tuple[dict, dict[str, int]]:

{bioguider-0.2.21 → bioguider-0.2.22}/bioguider/agents/consistency_evaluation_task.py RENAMED Viewed

@@ -28,13 +28,14 @@ class ConsistencyEvaluationTask:
         self.code_structure_db = code_structure_db
         self.step_callback = step_callback
-    def evaluate(self, user_guide_api_documentation: str) -> ConsistencyEvaluationResult:
+    def evaluate(self, domain: str, documentation: str) -> ConsistencyEvaluationResult:
         collection_step = ConsistencyCollectionStep(llm=self.llm)
         query_step = ConsistencyQueryStep(code_structure_db=self.code_structure_db)
         observe_step = ConsistencyObserveStep(llm=self.llm)
         state = ConsistencyEvaluationState(
-            user_guide_api_documentation=user_guide_api_documentation,
+            domain=domain,
+            documentation=documentation,
             step_output_callback=self.step_callback,
         )

{bioguider-0.2.21 → bioguider-0.2.22}/bioguider/agents/consistency_evaluation_task_utils.py RENAMED Viewed

@@ -3,7 +3,8 @@ from typing import Callable, Optional, TypedDict
 class ConsistencyEvaluationState(TypedDict):
-    user_guide_api_documentation: str
+    domain: str
+    documentation: str
     step_output_callback: Optional[Callable]
     functions_and_classes: Optional[list[dict]]
     all_query_rows: Optional[list[any]]

{bioguider-0.2.21 → bioguider-0.2.22}/bioguider/agents/consistency_observe_step.py RENAMED Viewed

@@ -10,8 +10,8 @@ from bioguider.agents.peo_common_step import PEOCommonStep
 CONSISTENCY_OBSERVE_SYSTEM_PROMPT = """
 You are an expert developer specializing in the biomedical domain.
 Your task is to analyze both:
-1. the provided file related to user guide/API documentation,
-2. the code definitions related to the user guide/API documentation
+1. the provided file related to {domain} documentation,
+2. the code definitions related to the {domain} documentation
 and generate a structured consistency assessment based on the following criteria.
 ---
@@ -20,9 +20,9 @@ and generate a structured consistency assessment based on the following criteria
 **Consistency**:
   * **Score**: [Poor / Fair / Good / Excellent]
-  * **Assessment**: [Your evaluation of whether the user guide/API documentation is consistent with the code definitions]
+  * **Assessment**: [Your evaluation of whether the {domain} documentation is consistent with the code definitions]
   * **Development**: [A list of inconsistent function/class/method name and inconsistent docstring, and describe how they are inconsistent]
-  * **Strengths**: [A list of strengths of the user guide/API documentation on consistency]
+  * **Strengths**: [A list of strengths of the {domain} documentation on consistency]
 ---
@@ -31,16 +31,16 @@ Your output **must exactly match** the following format:
 ```
 **Consistency**:
   * **Score**: [Poor / Fair / Good / Excellent]
-  * **Assessment**: [Your evaluation of whether the user guide/API documentation is consistent with the code definitions]
+  * **Assessment**: [Your evaluation of whether the {domain} documentation is consistent with the code definitions]
   * **Development**: [A list of inconsistent function/class/method name and inconsistent docstring, and describe how they are inconsistent]
-  * **Strengths**: [A list of strengths of the user guide/API documentation on consistency]
+  * **Strengths**: [A list of strengths of the {domain} documentation on consistency]
 ```
 ### **Output Example**
 ```
 **Consistency**:
-  * **Assessment**: [Your evaluation of whether the user guide/API documentation is consistent with the code definitions]
+  * **Assessment**: [Your evaluation of whether the {domain} documentation is consistent with the code definitions]
   * **Development**:
     - Inconsistent function/class/method name 1
     - Inconsistent docstring 1
@@ -55,8 +55,8 @@ Your output **must exactly match** the following format:
 ---
-### **Input User Guide/API Documentation**
-{user_guide_api_documentation}
+### **Input {domain} Documentation**
+{documentation}
 ### **Code Definitions**
 {code_definitions}
@@ -66,9 +66,9 @@ Your output **must exactly match** the following format:
 class ConsistencyEvaluationObserveResult(BaseModel):
     consistency_score: str=Field(description="A string value, could be `Poor`, `Fair`, `Good`, or `Excellent`")
-    consistency_assessment: str=Field(description="Your evaluation of whether the user guide/API documentation is consistent with the code definitions")
+    consistency_assessment: str=Field(description="Your evaluation of whether the documentation is consistent with the code definitions")
     consistency_development: list[str]=Field(description="A list of inconsistent function/class/method name and inconsistent docstring")
-    consistency_strengths: list[str]=Field(description="A list of strengths of the user guide/API documentation on consistency")
+    consistency_strengths: list[str]=Field(description="A list of strengths of the documentation on consistency")
 class ConsistencyObserveStep(PEOCommonStep):
@@ -78,7 +78,8 @@ class ConsistencyObserveStep(PEOCommonStep):
     def _prepare_system_prompt(self, state: ConsistencyEvaluationState):
         all_query_rows = state["all_query_rows"]
-        user_guide_api_documentation = state["user_guide_api_documentation"]
+        documentation = state["documentation"]
+        domain = state["domain"]
         code_definition = ""
         for row in all_query_rows:
             content = f"name: {row['name']}\nfile_path: {row['path']}\nparent: {row['parent']}\nparameters: {row['params']}\ndoc_string: {row['doc_string']}"
@@ -86,7 +87,8 @@ class ConsistencyObserveStep(PEOCommonStep):
             code_definition += "\n\n\n"
         return ChatPromptTemplate.from_template(CONSISTENCY_OBSERVE_SYSTEM_PROMPT).format(
             code_definitions=code_definition,
-            user_guide_api_documentation=user_guide_api_documentation,
+            documentation=documentation,
+            domain=domain,
         )
     def _execute_directly(self, state: ConsistencyEvaluationState):

{bioguider-0.2.21 → bioguider-0.2.22}/bioguider/agents/evaluation_task.py RENAMED Viewed

@@ -204,113 +204,3 @@ class EvaluationTask(ABC):
     @abstractmethod
     def _collect_files(self) -> list[str]:
         pass
-EVALUATION_TUTORIAL_SYSTEM_PROMPT="""
-You are an expert in software documentation and developer education.
-You are given the content of a tutorial file from a GitHub repository. Your task is to **critically evaluate** the quality of this tutorial based on best practices in technical writing and developer onboarding.
-Please assess the tutorial using the following criteria. Provide your evaluation in structured sections:
----
-### **Evaluation Criteria:**
-1. **Readability**: You are provided the following metrics scores calculated with pyphen, please evaluate readability based on the scores:
-   * Flesch Reading Ease: {flesch_reading_ease} (206.835 - 1.015(words/sentences) - 84.6(syllables/words))
-   * Flesch-Kincaid Grade Level: {flesch_kincaid_grade} (0.39(words/sentences) + 11.8(syllables/words) - 15.59)
-   * Gunning Fog Index: {gunning_fog_index} (0.4[(words/sentences) + 100(complex words/words)])
-   * SMOG Index: {smog_index} (1.043*sqrt(polysyllables * (30/sentences)) + 3.1291)
-2. **Coverage**
-   * Does the tutorial cover all major steps needed to get started?
-   * Are dependencies, prerequisites, setup steps, and example usage included?
-3. **Structure & Organization**
-   * Is the content logically structured (e.g., introduction → setup → examples → summary)?
-   * Are sections well-labeled and easy to navigate?
-4. **Balance Between Code and Explanation**
-   * Is there a good balance between code snippets and narrative explanation?
-   * Are code blocks properly annotated or explained?
-5. **Terminology Consistency**
-   * Is technical terminology used consistently and accurately?
-   * Are key terms introduced and reused correctly?
-6. **Example Quality**
-   * Are the examples relevant, correct, and representative of real usage?
-   * Are edge cases or typical user pitfalls addressed?
-7. **Formatting and Style**
-   * Are headings, bullet points, code formatting, and markdown style used effectively?
-   * Are there any formatting issues that hurt clarity?
----
-### **Output Format:**
-Please respond in the following format:
-```
-**FinalAnswer**
-**Readability**: Your comments here
-**Coverage**: Your comments here
-**Structure & Organization**: Your comments here
-**Code vs. Explanation Balance**: Your comments here
-**Terminology Consistency**: Your comments here
-**Example Quality**: Your comments here
-**Formatting and Style**: Your comments here
-**Overall Rating**: [Poor / Fair / Good / Excellent]
-```
----
-### **Tutorial File Content:**
-```
-{tutorial_file_content}
-```
----
-"""
-class EvaluationTutorialTask(EvaluationTask):
-    def __init__(
-        self,
-        llm: BaseChatOpenAI,
-        repo_path: str,
-        gitignore_path: str,
-        meta_data: ProjectMetadata | None = None,
-        step_callback: Callable | None = None,
-        summarized_files_db = None,
-    ):
-        super().__init__(llm, repo_path, gitignore_path, meta_data, step_callback, summarized_files_db)
-        self.evaluation_name = "Tutorial Evaluation"
-    def _evaluate(self, files: list[str]) -> tuple[dict, dict]:
-        if len(files) == 0:
-            return {}, {**DEFAULT_TOKEN_USAGE}
-        evaluations = {}
-        for file in files:
-            tutorial_path = Path(self.repo_path, file)
-            tutorial_content = read_file(tutorial_path)
-            if tutorial_content is None:
-                logging.error(f"Error in reading file {file}")
-                continue
-            readability = PyphenReadability()
-            flesch_reading_ease, flesch_kincaid_grade, gunning_fog_index, smog_index, \
-                _, _, _, _, _ = readability.readability_metrics(tutorial_content)
-            system_prompt = ChatPromptTemplate.from_template(
-                EVALUATION_TUTORIAL_SYSTEM_PROMPT
-            ).format(
-                tutorial_file_content=tutorial_content,
-                flesch_reading_ease=flesch_reading_ease,
-                flesch_kincaid_grade=flesch_kincaid_grade,
-                gunning_fog_index=gunning_fog_index,
-                smog_index=smog_index,
-            )
-            conversation = CommonConversation(llm=self.llm)
-            response, token_usage = conversation.generate(
-                system_prompt=system_prompt,
-                instruction_prompt=EVALUATION_INSTRUCTION,
-            )
-            self.print_step(step_output=f"Tutorial: {file}")
-            self.print_step(step_output=response)
-            evaluations[file] = response
-        return evaluations, token_usage
-    def _collect_files(self):
-        return []

bioguider-0.2.22/bioguider/agents/evaluation_tutorial_task.py ADDED Viewed

@@ -0,0 +1,156 @@
+import json
+from pathlib import Path
+from typing import Callable
+from langchain.prompts import ChatPromptTemplate
+from langchain_openai.chat_models.base import BaseChatOpenAI
+from pydantic import BaseModel, Field
+import logging
+from bioguider.agents.agent_utils import read_file
+from bioguider.agents.common_agent_2step import CommonAgentTwoSteps
+from bioguider.agents.consistency_evaluation_task import ConsistencyEvaluationResult, ConsistencyEvaluationTask
+from bioguider.agents.evaluation_task import EvaluationTask
+from bioguider.agents.collection_task import CollectionTask
+from bioguider.agents.evaluation_tutorial_task_prompts import INDIVIDUAL_TUTORIAL_EVALUATION_SYSTEM_PROMPT
+from bioguider.agents.prompt_utils import CollectionGoalItemEnum
+from bioguider.utils.constants import DEFAULT_TOKEN_USAGE, ProjectMetadata
+from bioguider.utils.notebook_utils import extract_markdown_from_notebook, strip_notebook_to_code_and_markdown
+from bioguider.utils.pyphen_utils import PyphenReadability
+from bioguider.utils.utils import increase_token_usage
+logger = logging.getLogger(__name__)
+class TutorialEvaluationResult(BaseModel):
+    overall_score: str=Field(description="A string value, could be `Poor`, `Fair`, `Good`, or `Excellent`")
+    overall_key_strengths: str=Field(description="A string value, the key strengths of the tutorial")
+    overall_improvement_suggestions: str=Field(description="Suggestions to improve the overall score if necessary")
+    readability_score: str=Field(description="A string value, could be 'Poor', 'Fair', 'Good', or 'Excellent'")
+    readability_suggestions: str=Field(description="Suggestions to improve readability if necessary")
+    setup_and_dependencies_score: str=Field(description="A string value, could be 'Poor', 'Fair', 'Good', or 'Excellent'")
+    setup_and_dependencies_suggestions: str=Field(description="Suggestions to improve setup and dependencies if necessary")
+    reproducibility_score: str=Field(description="A string value, could be 'Poor', 'Fair', 'Good', or 'Excellent'")
+    reproducibility_suggestions: str=Field(description="Suggestions to improve reproducibility if necessary")
+    structure_and_navigation_score: str=Field(description="A string value, could be 'Poor', 'Fair', 'Good', or 'Excellent'")
+    structure_and_navigation_suggestions: str=Field(description="Suggestions to improve structure and navigation if necessary")
+    executable_code_quality_score: str=Field(description="A string value, could be 'Poor', 'Fair', 'Good', or 'Excellent'")
+    executable_code_quality_suggestions: str=Field(description="Suggestions to improve executable code quality if necessary")
+    result_verification_score: str=Field(description="A string value, could be 'Poor', 'Fair', 'Good', or 'Excellent'")
+    result_verification_suggestions: str=Field(description="Suggestions to improve result verification if necessary")
+    performance_and_resource_notes_score: str=Field(description="A string value, could be 'Poor', 'Fair', 'Good', or 'Excellent'")
+    performance_and_resource_notes_suggestions: str=Field(description="Suggestions to improve performance and resource notes if necessary")
+class IndividualTutorialEvaluationResult(BaseModel):
+    tutorial_evaluation: TutorialEvaluationResult | None=Field(description="The evaluation result of the tutorial")
+    consistency_evaluation: ConsistencyEvaluationResult | None=Field(description="The evaluation result of the consistency of the tutorial")
+class EvaluationTutorialTask(EvaluationTask):
+    def __init__(
+        self,
+        llm: BaseChatOpenAI,
+        repo_path: str,
+        gitignore_path: str,
+        meta_data: ProjectMetadata | None = None,
+        step_callback: Callable | None = None,
+        summarized_files_db = None,
+        code_structure_db = None,
+    ):
+        super().__init__(llm, repo_path, gitignore_path, meta_data, step_callback, summarized_files_db)
+        self.evaluation_name = "Tutorial Evaluation"
+        self.code_structure_db = code_structure_db
+    def _collect_files(self):
+        task = CollectionTask(
+            llm=self.llm,
+            step_callback=self.step_callback,
+            summarized_files_db=self.summarized_files_db,
+        )
+        task.compile(
+            repo_path=self.repo_path,
+            gitignore_path=Path(self.repo_path, ".gitignore"),
+            goal_item=CollectionGoalItemEnum.Tutorial.name,
+        )
+        files = task.collect()
+        return files
+    def _evaluate_consistency(self, file: str) -> ConsistencyEvaluationResult:
+        consistency_evaluation_task = ConsistencyEvaluationTask(
+            llm=self.llm,
+            code_structure_db=self.code_structure_db,
+            step_callback=self.step_callback,
+        )
+        file = file.strip()
+        with open(Path(self.repo_path, file), "r") as f:
+            tutorial_content = f.read()
+        return consistency_evaluation_task.evaluate(
+            domain="tutorial/vignette",
+            documentation=tutorial_content,
+        )
+    def _evaluate_consistency_on_content(self, content: str) -> ConsistencyEvaluationResult:
+        consistency_evaluation_task = ConsistencyEvaluationTask(
+            llm=self.llm,
+            code_structure_db=self.code_structure_db,
+            step_callback=self.step_callback,
+        )
+        return consistency_evaluation_task.evaluate(
+            domain="tutorial/vignette",
+            documentation=content,
+        ), {**DEFAULT_TOKEN_USAGE}
+    def _evaluate_individual_tutorial(self, file: str) -> tuple[IndividualTutorialEvaluationResult | None, dict]:
+        content = read_file(Path(self.repo_path, file))
+        if content is None:
+            logger.error(f"Error in reading file {file}")
+            return None, {**DEFAULT_TOKEN_USAGE}
+        if file.endswith(".ipynb"):
+            readability_content = extract_markdown_from_notebook(Path(self.repo_path, file))
+            content = json.dumps(strip_notebook_to_code_and_markdown(Path(self.repo_path, file)))
+        else:
+            readability_content = content
+        readability = PyphenReadability()
+        flesch_reading_ease, flesch_kincaid_grade, gunning_fog_index, smog_index, \
+                _, _, _, _, _ = readability.readability_metrics(readability_content)
+        system_prompt = ChatPromptTemplate.from_template(
+            INDIVIDUAL_TUTORIAL_EVALUATION_SYSTEM_PROMPT
+        ).format(
+            flesch_reading_ease=flesch_reading_ease,
+            flesch_kincaid_grade=flesch_kincaid_grade,
+            gunning_fog_index=gunning_fog_index,
+            smog_index=smog_index,
+            tutorial_file_content=readability_content,
+        )
+        agent = CommonAgentTwoSteps(llm=self.llm)
+        res, _, token_usage, reasoning_process = agent.go(
+            system_prompt=system_prompt,
+            instruction_prompt="Now, let's begin the tutorial evaluation.",
+            schema=TutorialEvaluationResult,
+        )
+        res: TutorialEvaluationResult = res
+        consistency_evaluation_result, _temp_token_usage = self._evaluate_consistency_on_content(content)
+        if consistency_evaluation_result is None:
+            # No sufficient information to evaluate the consistency of the tutorial
+            consistency_evaluation_result = ConsistencyEvaluationResult(
+                consistency_score="N/A",
+                consistency_assessment="No sufficient information to evaluate the consistency of the tutorial",
+                consistency_development=[],
+                consistency_strengths=[],
+            )
+        return IndividualTutorialEvaluationResult(
+            tutorial_evaluation=res,
+            consistency_evaluation=consistency_evaluation_result,
+        ), token_usage
+    def _evaluate(self, files: list[str] | None = None) -> tuple[dict[str, IndividualTutorialEvaluationResult] | None, dict, list[str]]:
+        total_token_usage = {**DEFAULT_TOKEN_USAGE}
+        tutorial_evaluation_results = {}
+        for file in files:
+            tutorial_evaluation_result, token_usage = self._evaluate_individual_tutorial(file)
+            total_token_usage = increase_token_usage(total_token_usage, token_usage)
+            tutorial_evaluation_results[file] = tutorial_evaluation_result
+        return tutorial_evaluation_results, total_token_usage, files

bioguider-0.2.22/bioguider/agents/evaluation_tutorial_task_prompts.py ADDED Viewed

@@ -0,0 +1,114 @@
+INDIVIDUAL_TUTORIAL_EVALUATION_SYSTEM_PROMPT = """
+You are an expert in evaluating the quality of tutorials in software repositories.
+Your task is to analyze the provided tutorial file and generate a structured quality assessment based on the following criteria.
+---
+### **Evaluation Criteria**
+1. **Readability**:
+   * **Flesch Reading Ease**: `{flesch_reading_ease}` (A higher score is better, with 60-70 being easily understood by most adults).
+   * **Flesch-Kincaid Grade Level**: `{flesch_kincaid_grade}` (Represents the US school-grade level needed to understand the text).
+   * **Gunning Fog Index**: `{gunning_fog_index}` (A score above 12 is generally considered too hard for most people).
+   * **SMOG Index**: `{smog_index}` (Estimates the years of education needed to understand the text).
+   * **Assessment**: Based on these scores, evaluate the overall readability and technical complexity of the language used.
+2. **Coverage**:
+   * **Assessment**: [Your evaluation of whether it covers all major steps needed to get started, and dependencies, prerequisites, setup steps, and example usage.]
+   * **Improvement Suggestions**:
+      * **Original text:** [Quote a specific line/section from the tutorial.]
+      * **Improving comments:** [Provide your suggestions to improve clarity.]
+3. **Reproducibility**:
+   * **Assessment**: [Your evaluation of whether it provides a clear **description** of reproducibility]
+   * **Improvement Suggestions**:
+      * **Original text:** [Quote a specific line/section from the tutorial.]
+      * **Improving comments:** [Provide your suggestions to improve clarity.]
+4. **Structure & Navigation**:
+   * **Assessment**: [Your evaluation of whether it provides logical sections (e.g., intro -> setup -> steps -> results -> next), TOC/anchors, estimated time, etc.]
+   * **Improvement Suggestions**:
+      * **Original text:** [Quote a specific line/section from the tutorial.]
+      * **Improving comments:** [Provide your suggestions to improve clarity.]
+5. **Executable Code Quality**:
+   * **Assessment**: [Your evaluation on whether the code snippets are executable and functional, idiomatic, no hard-coded paths, etc.]
+   * **Improvement Suggestions**:
+      * **Original text:** [Quote a specific line/section from the tutorial.]
+      * **Improving comments:** [Provide your suggestions to improve clarity.]
+6. **Result Verification**:
+   * **Assessment**: [Your evaluation on expected outputs shown (figures/tables/metrics), acceptance criteria, etc.]
+   * **Improvement Suggestions**:
+      * **Original text:** [Quote a specific line/section from the tutorial.]
+      * **Improving comments:** [Provide your suggestions to improve clarity.]
+7. **Performance & Resource Notes**:
+   * **Assessment**: [Your evaluation on performance and resource notes, e.g., CPU/GPU usage, memory usage, runtime estimates, small "lite" path provided.]
+   * **Improvement Suggestions**:
+      * **Original text:** [Quote a specific line/section from the tutorial.]
+      * **Improving comments:** [Provide your suggestions to improve clarity.]
+---
+### **Final Report Ouput**
+Your final report must **exactly match** the following format. Do not add or omit any sections.
+**FinalAnswer**
+* **Overall Score:** [Poor / Fair / Good / Excellent]
+* **Overall Key Strengths**: <brief summary of the Tutorial's strongest points in 2-3 sentences>
+* **Overall Improvement Suggestions:**
+  - "Original text snippet 1" - Improving comment 1
+  - "Original text snippet 2" - Improving comment 2
+  - ...
+* **Readability Score:** [Poor / Fair / Good / Excellent]
+* **Readability Key Strengths**: <brief summary of the Tutorial's strongest points in 2-3 sentences>
+* **Readability Improvement Suggestions:**
+  - "Original text snippet 1" - Improving comment 1
+  - "Original text snippet 2" - Improving comment 2
+  - ...
+* **Coverage Score:** [Poor / Fair / Good / Excellent]
+* **Coverage Key Strengths**: <brief summary of the Tutorial's strongest points in 2-3 sentences>
+* **Coverage Improvement Suggestions:**
+  - "Original text snippet 1" - Improving comment 1
+  - "Original text snippet 2" - Improving comment 2
+  - ...
+* **Reproducibility Score:** [Poor / Fair / Good / Excellent]
+* **Reproducibility Key Strengths**: <brief summary of the Tutorial's strongest points in 2-3 sentences>
+* **Reproducibility Improvement Suggestions:**
+  - "Original text snippet 1" - Improving comment 1
+  - "Original text snippet 2" - Improving comment 2
+  - ...
+* **Structure & Navigation Score:** [Poor / Fair / Good / Excellent]
+* **Structure & Navigation Key Strengths**: <brief summary of the Tutorial's strongest points in 2-3 sentences>
+* **Structure & Navigation Improvement Suggestions:**
+  - "Original text snippet 1" - Improving comment 1
+  - "Original text snippet 2" - Improving comment 2
+  - ...
+* **Executable Code Quality Score:** [Poor / Fair / Good / Excellent]
+* **Executable Code Quality Key Strengths**: <brief summary of the Tutorial's strongest points in 2-3 sentences>
+* **Executable Code Quality Improvement Suggestions:**
+  - "Original text snippet 1" - Improving comment 1
+  - "Original text snippet 2" - Improving comment 2
+  - ...
+* **Result Verification Score:** [Poor / Fair / Good / Excellent]
+* **Result Verification Key Strengths**: <brief summary of the Tutorial's strongest points in 2-3 sentences>
+* **Result Verification Improvement Suggestions:**
+  - "Original text snippet 1" - Improving comment 1
+  - "Original text snippet 2" - Improving comment 2
+  - ...
+* **Performance & Resource Notes Score:** [Poor / Fair / Good / Excellent]
+* **Performance & Resource Notes Key Strengths**: <brief summary of the Tutorial's strongest points in 2-3 sentences>
+* **Performance & Resource Notes Improvement Suggestions:**
+  - "Original text snippet 1" - Improving comment 1
+  - "Original text snippet 2" - Improving comment 2
+  - ...
+---
+### **Tutorial File Content:**
+{tutorial_file_content}
+---
+"""

{bioguider-0.2.21 → bioguider-0.2.22}/bioguider/agents/evaluation_userguide_task.py RENAMED Viewed

@@ -75,7 +75,10 @@ class EvaluationUserGuideTask(EvaluationTask):
         file = file.strip()
         with open(Path(self.repo_path, file), "r") as f:
             user_guide_api_documentation = f.read()
-        return consistency_evaluation_task.evaluate(user_guide_api_documentation), {**DEFAULT_TOKEN_USAGE}
+        return consistency_evaluation_task.evaluate(
+            domain="user guide/API",
+            documentation=user_guide_api_documentation,
+        ), {**DEFAULT_TOKEN_USAGE}
     def _evaluate_individual_userguide(self, file: str) -> tuple[IndividualUserGuideEvaluationResult | None, dict]:
         content = read_file(Path(self.repo_path, file))

{bioguider-0.2.21 → bioguider-0.2.22}/bioguider/agents/prompt_utils.py RENAMED Viewed

@@ -135,6 +135,15 @@ If **any one** of these is present, the document should be classified as a User
  - Interactive Elements: Features that allow users to experiment with the code in real-time, such as Jupyter notebooks or R Markdown files.
  - Use Cases: Real-world applications or scenarios where the software can be applied effectively.
  - You can include directory names if all files in the directory are relevant to the goal item.
+**Important instructions**:
+ - **Do not** use **read_file_tool, summarize_file_tool, check_file_related_tool** on the python/R notebook files **(.ipynb, .Rmd)**, as they are too big to read.
+""",
+        "plan_important_instructions": """ - **Do not** use **read_file_tool, summarize_file_tool, check_file_related_tool** on the python/R notebook files **(.ipynb, .Rmd)**, as they are too big to read.
+  - For python/R notebook files **(.ipynb, .Rmd)**, **only infer** if it is the tutorial/vignette from the file name and avoid reading the content of the file.
+""",
+        "observe_important_instructions": """ - **Do not** use **read_file_tool, summarize_file_tool, check_file_related_tool** on the python/R notebook files **(.ipynb, .Rmd)**, as they are too big to read.
+  - For python/R notebook files **(.ipynb, .Rmd)**, **only infer** if it is the tutorial/vignette from the file name and avoid reading the content of the file.
+  - **Do not** include any binary files (e.g., `.png`, `.jpg`, `.jpeg`, `.gif`, `.svg`) in the final answer.s
 """,
     },
     "DockerGeneration": {

{bioguider-0.2.21 → bioguider-0.2.22}/bioguider/database/code_structure_db.py RENAMED Viewed

@@ -123,15 +123,8 @@ class CodeStructureDb:
                 os.makedirs(db_path, exist_ok=True)
             except Exception as e:
                 logging.error(e)
-                return False
-        db_path = os.path.join(db_path, "databases")
-        # Ensure the local path exists
-        try:
-            os.makedirs(db_path, exist_ok=True)
-        except Exception as e:
-            logging.error(e)
-            return False
-        db_path = os.path.join(db_path, f"{self.author}_{self.repo_name}.db")
+                return False
+        db_path = os.path.join(db_path, f"{self.author}_{self.repo_name}_code_structure.db")
         if not os.path.exists(db_path):
             try:
                 with open(db_path, "w"):
@@ -142,6 +135,24 @@ class CodeStructureDb:
         self.connection = sqlite3.connect(db_path)
         return True
+    def is_database_built(self) -> bool:
+        res = self._connect_to_db()
+        if not res:
+            return False
+        res = self._ensure_tables()
+        if not res:
+            return False
+        try:
+            cursor = self.connection.cursor()
+            cursor.execute(f"SELECT * FROM {CODE_STRUCTURE_TABLE_NAME}")
+            return cursor.fetchone() is not None
+        except Exception as e:
+            logging.error(e)
+            return False
+        finally:
+            self.connection.close()
+            self.connection = None
     def insert_code_structure(
         self,
         name: str,

{bioguider-0.2.21 → bioguider-0.2.22}/bioguider/database/summarized_file_db.py RENAMED Viewed

@@ -38,10 +38,11 @@ where file_path = ? and instruction = ? and summarize_level = ? and summarize_pr
 """
 class SummarizedFilesDb:
-    def __init__(self, author: str, repo_name: str):
+    def __init__(self, author: str, repo_name: str, data_folder: str = None):
         self.author = author
         self.repo_name = repo_name
         self.connection: Connection | None = None
+        self.data_folder = data_folder
     def _ensure_tables(self) -> bool:
         if self.connection is None:
@@ -60,7 +61,9 @@ class SummarizedFilesDb:
     def _connect_to_db(self) -> bool:
         if self.connection is not None:
             return True
-        db_path = os.environ.get("DATA_FOLDER", "./data")
+        db_path = self.data_folder
+        if db_path is None:
+            db_path = os.environ.get("DATA_FOLDER", "./data")
         db_path = os.path.join(db_path, "databases")
         # Ensure the local path exists
         try:
@@ -68,7 +71,7 @@ class SummarizedFilesDb:
         except Exception as e:
             logging.error(e)
             return False
-        db_path = os.path.join(db_path, f"{self.author}_{self.repo_name}.db")
+        db_path = os.path.join(db_path, f"{self.author}_{self.repo_name}_summarized_file.db")
         if not os.path.exists(db_path):
             try:
                 with open(db_path, "w"):

bioguider 0.2.21__tar.gz → 0.2.22__tar.gz

Potentially problematic release.

bioguider 0.2.21tar.gz → 0.2.22tar.gz