bioguider 0.2.20__py3-none-any.whl → 0.2.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of bioguider might be problematic. Click here for more details.

Files changed (32) hide show
  1. bioguider/agents/agent_utils.py +16 -10
  2. bioguider/agents/collection_observe_step.py +7 -2
  3. bioguider/agents/collection_task_utils.py +1 -0
  4. bioguider/agents/consistency_collection_step.py +102 -0
  5. bioguider/agents/consistency_evaluation_task.py +57 -0
  6. bioguider/agents/consistency_evaluation_task_utils.py +14 -0
  7. bioguider/agents/consistency_observe_step.py +109 -0
  8. bioguider/agents/consistency_query_step.py +74 -0
  9. bioguider/agents/evaluation_task.py +0 -110
  10. bioguider/agents/evaluation_tutorial_task.py +156 -0
  11. bioguider/agents/evaluation_tutorial_task_prompts.py +114 -0
  12. bioguider/agents/evaluation_userguide_task.py +13 -43
  13. bioguider/agents/prompt_utils.py +15 -2
  14. bioguider/database/code_structure_db.py +20 -9
  15. bioguider/database/summarized_file_db.py +6 -3
  16. bioguider/managers/evaluation_manager.py +16 -2
  17. bioguider/rag/data_pipeline.py +1 -1
  18. bioguider/utils/code_structure_builder.py +15 -8
  19. bioguider/utils/constants.py +12 -12
  20. bioguider/utils/notebook_utils.py +117 -0
  21. bioguider/utils/{file_handler.py → python_file_handler.py} +1 -1
  22. bioguider/utils/r_file_handler.py +549 -0
  23. bioguider/utils/utils.py +34 -1
  24. {bioguider-0.2.20.dist-info → bioguider-0.2.22.dist-info}/METADATA +1 -1
  25. {bioguider-0.2.20.dist-info → bioguider-0.2.22.dist-info}/RECORD +27 -23
  26. bioguider/agents/consistency_collection_execute_step.py +0 -152
  27. bioguider/agents/consistency_collection_observe_step.py +0 -128
  28. bioguider/agents/consistency_collection_plan_step.py +0 -128
  29. bioguider/agents/consistency_collection_task.py +0 -109
  30. bioguider/agents/consistency_collection_task_utils.py +0 -137
  31. {bioguider-0.2.20.dist-info → bioguider-0.2.22.dist-info}/LICENSE +0 -0
  32. {bioguider-0.2.20.dist-info → bioguider-0.2.22.dist-info}/WHEEL +0 -0
@@ -0,0 +1,156 @@
1
+
2
+
3
+ import json
4
+ from pathlib import Path
5
+ from typing import Callable
6
+ from langchain.prompts import ChatPromptTemplate
7
+ from langchain_openai.chat_models.base import BaseChatOpenAI
8
+ from pydantic import BaseModel, Field
9
+ import logging
10
+
11
+ from bioguider.agents.agent_utils import read_file
12
+ from bioguider.agents.common_agent_2step import CommonAgentTwoSteps
13
+ from bioguider.agents.consistency_evaluation_task import ConsistencyEvaluationResult, ConsistencyEvaluationTask
14
+ from bioguider.agents.evaluation_task import EvaluationTask
15
+ from bioguider.agents.collection_task import CollectionTask
16
+ from bioguider.agents.evaluation_tutorial_task_prompts import INDIVIDUAL_TUTORIAL_EVALUATION_SYSTEM_PROMPT
17
+ from bioguider.agents.prompt_utils import CollectionGoalItemEnum
18
+ from bioguider.utils.constants import DEFAULT_TOKEN_USAGE, ProjectMetadata
19
+ from bioguider.utils.notebook_utils import extract_markdown_from_notebook, strip_notebook_to_code_and_markdown
20
+ from bioguider.utils.pyphen_utils import PyphenReadability
21
+ from bioguider.utils.utils import increase_token_usage
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+ class TutorialEvaluationResult(BaseModel):
26
+ overall_score: str=Field(description="A string value, could be `Poor`, `Fair`, `Good`, or `Excellent`")
27
+ overall_key_strengths: str=Field(description="A string value, the key strengths of the tutorial")
28
+ overall_improvement_suggestions: str=Field(description="Suggestions to improve the overall score if necessary")
29
+ readability_score: str=Field(description="A string value, could be 'Poor', 'Fair', 'Good', or 'Excellent'")
30
+ readability_suggestions: str=Field(description="Suggestions to improve readability if necessary")
31
+ setup_and_dependencies_score: str=Field(description="A string value, could be 'Poor', 'Fair', 'Good', or 'Excellent'")
32
+ setup_and_dependencies_suggestions: str=Field(description="Suggestions to improve setup and dependencies if necessary")
33
+ reproducibility_score: str=Field(description="A string value, could be 'Poor', 'Fair', 'Good', or 'Excellent'")
34
+ reproducibility_suggestions: str=Field(description="Suggestions to improve reproducibility if necessary")
35
+ structure_and_navigation_score: str=Field(description="A string value, could be 'Poor', 'Fair', 'Good', or 'Excellent'")
36
+ structure_and_navigation_suggestions: str=Field(description="Suggestions to improve structure and navigation if necessary")
37
+ executable_code_quality_score: str=Field(description="A string value, could be 'Poor', 'Fair', 'Good', or 'Excellent'")
38
+ executable_code_quality_suggestions: str=Field(description="Suggestions to improve executable code quality if necessary")
39
+ result_verification_score: str=Field(description="A string value, could be 'Poor', 'Fair', 'Good', or 'Excellent'")
40
+ result_verification_suggestions: str=Field(description="Suggestions to improve result verification if necessary")
41
+ performance_and_resource_notes_score: str=Field(description="A string value, could be 'Poor', 'Fair', 'Good', or 'Excellent'")
42
+ performance_and_resource_notes_suggestions: str=Field(description="Suggestions to improve performance and resource notes if necessary")
43
+
44
+ class IndividualTutorialEvaluationResult(BaseModel):
45
+ tutorial_evaluation: TutorialEvaluationResult | None=Field(description="The evaluation result of the tutorial")
46
+ consistency_evaluation: ConsistencyEvaluationResult | None=Field(description="The evaluation result of the consistency of the tutorial")
47
+
48
+ class EvaluationTutorialTask(EvaluationTask):
49
+ def __init__(
50
+ self,
51
+ llm: BaseChatOpenAI,
52
+ repo_path: str,
53
+ gitignore_path: str,
54
+ meta_data: ProjectMetadata | None = None,
55
+ step_callback: Callable | None = None,
56
+ summarized_files_db = None,
57
+ code_structure_db = None,
58
+ ):
59
+ super().__init__(llm, repo_path, gitignore_path, meta_data, step_callback, summarized_files_db)
60
+ self.evaluation_name = "Tutorial Evaluation"
61
+ self.code_structure_db = code_structure_db
62
+
63
+ def _collect_files(self):
64
+ task = CollectionTask(
65
+ llm=self.llm,
66
+ step_callback=self.step_callback,
67
+ summarized_files_db=self.summarized_files_db,
68
+ )
69
+ task.compile(
70
+ repo_path=self.repo_path,
71
+ gitignore_path=Path(self.repo_path, ".gitignore"),
72
+ goal_item=CollectionGoalItemEnum.Tutorial.name,
73
+ )
74
+ files = task.collect()
75
+ return files
76
+
77
+ def _evaluate_consistency(self, file: str) -> ConsistencyEvaluationResult:
78
+ consistency_evaluation_task = ConsistencyEvaluationTask(
79
+ llm=self.llm,
80
+ code_structure_db=self.code_structure_db,
81
+ step_callback=self.step_callback,
82
+ )
83
+ file = file.strip()
84
+ with open(Path(self.repo_path, file), "r") as f:
85
+ tutorial_content = f.read()
86
+ return consistency_evaluation_task.evaluate(
87
+ domain="tutorial/vignette",
88
+ documentation=tutorial_content,
89
+ )
90
+
91
+ def _evaluate_consistency_on_content(self, content: str) -> ConsistencyEvaluationResult:
92
+ consistency_evaluation_task = ConsistencyEvaluationTask(
93
+ llm=self.llm,
94
+ code_structure_db=self.code_structure_db,
95
+ step_callback=self.step_callback,
96
+ )
97
+ return consistency_evaluation_task.evaluate(
98
+ domain="tutorial/vignette",
99
+ documentation=content,
100
+ ), {**DEFAULT_TOKEN_USAGE}
101
+
102
+ def _evaluate_individual_tutorial(self, file: str) -> tuple[IndividualTutorialEvaluationResult | None, dict]:
103
+ content = read_file(Path(self.repo_path, file))
104
+ if content is None:
105
+ logger.error(f"Error in reading file {file}")
106
+ return None, {**DEFAULT_TOKEN_USAGE}
107
+
108
+ if file.endswith(".ipynb"):
109
+ readability_content = extract_markdown_from_notebook(Path(self.repo_path, file))
110
+ content = json.dumps(strip_notebook_to_code_and_markdown(Path(self.repo_path, file)))
111
+ else:
112
+ readability_content = content
113
+ readability = PyphenReadability()
114
+ flesch_reading_ease, flesch_kincaid_grade, gunning_fog_index, smog_index, \
115
+ _, _, _, _, _ = readability.readability_metrics(readability_content)
116
+ system_prompt = ChatPromptTemplate.from_template(
117
+ INDIVIDUAL_TUTORIAL_EVALUATION_SYSTEM_PROMPT
118
+ ).format(
119
+ flesch_reading_ease=flesch_reading_ease,
120
+ flesch_kincaid_grade=flesch_kincaid_grade,
121
+ gunning_fog_index=gunning_fog_index,
122
+ smog_index=smog_index,
123
+ tutorial_file_content=readability_content,
124
+ )
125
+ agent = CommonAgentTwoSteps(llm=self.llm)
126
+ res, _, token_usage, reasoning_process = agent.go(
127
+ system_prompt=system_prompt,
128
+ instruction_prompt="Now, let's begin the tutorial evaluation.",
129
+ schema=TutorialEvaluationResult,
130
+ )
131
+ res: TutorialEvaluationResult = res
132
+
133
+ consistency_evaluation_result, _temp_token_usage = self._evaluate_consistency_on_content(content)
134
+ if consistency_evaluation_result is None:
135
+ # No sufficient information to evaluate the consistency of the tutorial
136
+ consistency_evaluation_result = ConsistencyEvaluationResult(
137
+ consistency_score="N/A",
138
+ consistency_assessment="No sufficient information to evaluate the consistency of the tutorial",
139
+ consistency_development=[],
140
+ consistency_strengths=[],
141
+ )
142
+ return IndividualTutorialEvaluationResult(
143
+ tutorial_evaluation=res,
144
+ consistency_evaluation=consistency_evaluation_result,
145
+ ), token_usage
146
+
147
+ def _evaluate(self, files: list[str] | None = None) -> tuple[dict[str, IndividualTutorialEvaluationResult] | None, dict, list[str]]:
148
+ total_token_usage = {**DEFAULT_TOKEN_USAGE}
149
+ tutorial_evaluation_results = {}
150
+ for file in files:
151
+ tutorial_evaluation_result, token_usage = self._evaluate_individual_tutorial(file)
152
+ total_token_usage = increase_token_usage(total_token_usage, token_usage)
153
+ tutorial_evaluation_results[file] = tutorial_evaluation_result
154
+ return tutorial_evaluation_results, total_token_usage, files
155
+
156
+
@@ -0,0 +1,114 @@
1
+ INDIVIDUAL_TUTORIAL_EVALUATION_SYSTEM_PROMPT = """
2
+
3
+ You are an expert in evaluating the quality of tutorials in software repositories.
4
+ Your task is to analyze the provided tutorial file and generate a structured quality assessment based on the following criteria.
5
+ ---
6
+
7
+ ### **Evaluation Criteria**
8
+
9
+ 1. **Readability**:
10
+ * **Flesch Reading Ease**: `{flesch_reading_ease}` (A higher score is better, with 60-70 being easily understood by most adults).
11
+ * **Flesch-Kincaid Grade Level**: `{flesch_kincaid_grade}` (Represents the US school-grade level needed to understand the text).
12
+ * **Gunning Fog Index**: `{gunning_fog_index}` (A score above 12 is generally considered too hard for most people).
13
+ * **SMOG Index**: `{smog_index}` (Estimates the years of education needed to understand the text).
14
+ * **Assessment**: Based on these scores, evaluate the overall readability and technical complexity of the language used.
15
+
16
+ 2. **Coverage**:
17
+ * **Assessment**: [Your evaluation of whether it covers all major steps needed to get started, and dependencies, prerequisites, setup steps, and example usage.]
18
+ * **Improvement Suggestions**:
19
+ * **Original text:** [Quote a specific line/section from the tutorial.]
20
+ * **Improving comments:** [Provide your suggestions to improve clarity.]
21
+
22
+ 3. **Reproducibility**:
23
+ * **Assessment**: [Your evaluation of whether it provides a clear **description** of reproducibility]
24
+ * **Improvement Suggestions**:
25
+ * **Original text:** [Quote a specific line/section from the tutorial.]
26
+ * **Improving comments:** [Provide your suggestions to improve clarity.]
27
+
28
+ 4. **Structure & Navigation**:
29
+ * **Assessment**: [Your evaluation of whether it provides logical sections (e.g., intro -> setup -> steps -> results -> next), TOC/anchors, estimated time, etc.]
30
+ * **Improvement Suggestions**:
31
+ * **Original text:** [Quote a specific line/section from the tutorial.]
32
+ * **Improving comments:** [Provide your suggestions to improve clarity.]
33
+
34
+ 5. **Executable Code Quality**:
35
+ * **Assessment**: [Your evaluation on whether the code snippets are executable and functional, idiomatic, no hard-coded paths, etc.]
36
+ * **Improvement Suggestions**:
37
+ * **Original text:** [Quote a specific line/section from the tutorial.]
38
+ * **Improving comments:** [Provide your suggestions to improve clarity.]
39
+
40
+ 6. **Result Verification**:
41
+ * **Assessment**: [Your evaluation on expected outputs shown (figures/tables/metrics), acceptance criteria, etc.]
42
+ * **Improvement Suggestions**:
43
+ * **Original text:** [Quote a specific line/section from the tutorial.]
44
+ * **Improving comments:** [Provide your suggestions to improve clarity.]
45
+
46
+ 7. **Performance & Resource Notes**:
47
+ * **Assessment**: [Your evaluation on performance and resource notes, e.g., CPU/GPU usage, memory usage, runtime estimates, small "lite" path provided.]
48
+ * **Improvement Suggestions**:
49
+ * **Original text:** [Quote a specific line/section from the tutorial.]
50
+ * **Improving comments:** [Provide your suggestions to improve clarity.]
51
+
52
+ ---
53
+
54
+ ### **Final Report Ouput**
55
+ Your final report must **exactly match** the following format. Do not add or omit any sections.
56
+
57
+ **FinalAnswer**
58
+ * **Overall Score:** [Poor / Fair / Good / Excellent]
59
+ * **Overall Key Strengths**: <brief summary of the Tutorial's strongest points in 2-3 sentences>
60
+ * **Overall Improvement Suggestions:**
61
+ - "Original text snippet 1" - Improving comment 1
62
+ - "Original text snippet 2" - Improving comment 2
63
+ - ...
64
+ * **Readability Score:** [Poor / Fair / Good / Excellent]
65
+ * **Readability Key Strengths**: <brief summary of the Tutorial's strongest points in 2-3 sentences>
66
+ * **Readability Improvement Suggestions:**
67
+ - "Original text snippet 1" - Improving comment 1
68
+ - "Original text snippet 2" - Improving comment 2
69
+ - ...
70
+ * **Coverage Score:** [Poor / Fair / Good / Excellent]
71
+ * **Coverage Key Strengths**: <brief summary of the Tutorial's strongest points in 2-3 sentences>
72
+ * **Coverage Improvement Suggestions:**
73
+ - "Original text snippet 1" - Improving comment 1
74
+ - "Original text snippet 2" - Improving comment 2
75
+ - ...
76
+ * **Reproducibility Score:** [Poor / Fair / Good / Excellent]
77
+ * **Reproducibility Key Strengths**: <brief summary of the Tutorial's strongest points in 2-3 sentences>
78
+ * **Reproducibility Improvement Suggestions:**
79
+ - "Original text snippet 1" - Improving comment 1
80
+ - "Original text snippet 2" - Improving comment 2
81
+ - ...
82
+ * **Structure & Navigation Score:** [Poor / Fair / Good / Excellent]
83
+ * **Structure & Navigation Key Strengths**: <brief summary of the Tutorial's strongest points in 2-3 sentences>
84
+ * **Structure & Navigation Improvement Suggestions:**
85
+ - "Original text snippet 1" - Improving comment 1
86
+ - "Original text snippet 2" - Improving comment 2
87
+ - ...
88
+ * **Executable Code Quality Score:** [Poor / Fair / Good / Excellent]
89
+ * **Executable Code Quality Key Strengths**: <brief summary of the Tutorial's strongest points in 2-3 sentences>
90
+ * **Executable Code Quality Improvement Suggestions:**
91
+ - "Original text snippet 1" - Improving comment 1
92
+ - "Original text snippet 2" - Improving comment 2
93
+ - ...
94
+ * **Result Verification Score:** [Poor / Fair / Good / Excellent]
95
+ * **Result Verification Key Strengths**: <brief summary of the Tutorial's strongest points in 2-3 sentences>
96
+ * **Result Verification Improvement Suggestions:**
97
+ - "Original text snippet 1" - Improving comment 1
98
+ - "Original text snippet 2" - Improving comment 2
99
+ - ...
100
+ * **Performance & Resource Notes Score:** [Poor / Fair / Good / Excellent]
101
+ * **Performance & Resource Notes Key Strengths**: <brief summary of the Tutorial's strongest points in 2-3 sentences>
102
+ * **Performance & Resource Notes Improvement Suggestions:**
103
+ - "Original text snippet 1" - Improving comment 1
104
+ - "Original text snippet 2" - Improving comment 2
105
+ - ...
106
+
107
+ ---
108
+
109
+ ### **Tutorial File Content:**
110
+ {tutorial_file_content}
111
+
112
+ ---
113
+
114
+ """
@@ -1,36 +1,24 @@
1
1
 
2
- import os
3
2
  from pathlib import Path
4
3
  import logging
5
4
  from langchain.prompts import ChatPromptTemplate
6
- from markdownify import markdownify as md
7
5
  from pydantic import BaseModel, Field
8
6
 
9
7
  from bioguider.agents.agent_utils import read_file
10
8
  from bioguider.agents.collection_task import CollectionTask
11
- from bioguider.agents.prompt_utils import EVALUATION_INSTRUCTION, CollectionGoalItemEnum
9
+ from bioguider.agents.common_agent_2step import CommonAgentTwoSteps
10
+ from bioguider.agents.consistency_evaluation_task import ConsistencyEvaluationTask, ConsistencyEvaluationResult
11
+ from bioguider.agents.prompt_utils import CollectionGoalItemEnum
12
12
  from bioguider.utils.constants import (
13
13
  DEFAULT_TOKEN_USAGE,
14
- ProjectMetadata,
15
- StructuredEvaluationInstallationResult,
16
- FreeEvaluationInstallationResult,
17
- EvaluationInstallationResult,
18
14
  )
19
- from bioguider.rag.data_pipeline import count_tokens
20
- from .common_agent_2step import CommonAgentTwoSteps, CommonAgentTwoChainSteps
21
15
  from ..utils.pyphen_utils import PyphenReadability
22
16
 
23
17
  from .evaluation_task import EvaluationTask
24
18
  from .agent_utils import read_file
25
19
  from bioguider.utils.utils import increase_token_usage
26
- from .evaluation_userguide_prompts import CONSISTENCY_EVAL_SYSTEM_PROMPT, INDIVIDUAL_USERGUIDE_EVALUATION_SYSTEM_PROMPT
27
- from .consistency_collection_task import ConsistencyCollectionTask
20
+ from .evaluation_userguide_prompts import INDIVIDUAL_USERGUIDE_EVALUATION_SYSTEM_PROMPT
28
21
 
29
- class ConsistencyEvaluationResult(BaseModel):
30
- consistency_score: str=Field(description="A string value, could be `Poor`, `Fair`, `Good`, or `Excellent`")
31
- consistency_assessment: str=Field(description="Your evaluation of whether the user guide/API documentation is consistent with the code definitions")
32
- consistency_development: list[str]=Field(description="A list of inconsistent function/class/method name and inconsistent docstring")
33
- consistency_strengths: list[str]=Field(description="A list of strengths of the user guide/API documentation on consistency")
34
22
 
35
23
  class UserGuideEvaluationResult(BaseModel):
36
24
  overall_score: str=Field(description="A string value, could be `Poor`, `Fair`, `Good`, or `Excellent`")
@@ -78,39 +66,19 @@ class EvaluationUserGuideTask(EvaluationTask):
78
66
  files = task.collect()
79
67
  return files
80
68
 
81
- def _evaluate_consistency(self, file: str) -> tuple[EvaluationInstallationResult | None, dict, list[str]]:
82
- consistency_collect_task = ConsistencyCollectionTask(
69
+ def _evaluate_consistency(self, file: str) -> ConsistencyEvaluationResult:
70
+ consistency_evaluation_task = ConsistencyEvaluationTask(
83
71
  llm=self.llm,
84
72
  code_structure_db=self.code_structure_db,
85
73
  step_callback=self.step_callback,
86
74
  )
87
- consistency_collect_task.compile(repo_path=self.repo_path, gitignore_path=Path(self.repo_path, ".gitignore"))
75
+ file = file.strip()
88
76
  with open(Path(self.repo_path, file), "r") as f:
89
77
  user_guide_api_documentation = f.read()
90
- res, code_definitions = consistency_collect_task.collect(user_guide_api_documentation)
91
-
92
- if not res:
93
- # No sufficient information to evaluate the consistency of the user guide/API documentation
94
- return None, {**DEFAULT_TOKEN_USAGE}
95
-
96
- system_prompt = ChatPromptTemplate.from_template(
97
- CONSISTENCY_EVAL_SYSTEM_PROMPT
98
- ).format(
99
- user_guide_api_documentation=user_guide_api_documentation,
100
- code_definitions=code_definitions,
101
- )
102
- agent = CommonAgentTwoSteps(llm=self.llm)
103
- res, _, token_usage, reasoning_process = agent.go(
104
- system_prompt=system_prompt,
105
- instruction_prompt="Now, let's begin the consistency evaluation step.",
106
- schema=ConsistencyEvaluationResult,
107
- )
108
- res: ConsistencyEvaluationResult = res
109
- self.print_step(step_output=f"Consistency Evaluation Result: {res}")
110
- self.print_step(step_output=f"Consistency Evaluation Reasoning Process: {reasoning_process}")
111
- self.print_step(token_usage=token_usage)
112
-
113
- return res, token_usage
78
+ return consistency_evaluation_task.evaluate(
79
+ domain="user guide/API",
80
+ documentation=user_guide_api_documentation,
81
+ ), {**DEFAULT_TOKEN_USAGE}
114
82
 
115
83
  def _evaluate_individual_userguide(self, file: str) -> tuple[IndividualUserGuideEvaluationResult | None, dict]:
116
84
  content = read_file(Path(self.repo_path, file))
@@ -157,6 +125,8 @@ class EvaluationUserGuideTask(EvaluationTask):
157
125
  total_token_usage = {**DEFAULT_TOKEN_USAGE}
158
126
  user_guide_evaluation_results = {}
159
127
  for file in files:
128
+ if file.endswith(".py") or file.endswith(".R"):
129
+ continue
160
130
  user_guide_evaluation_result, token_usage = self._evaluate_individual_userguide(file)
161
131
  total_token_usage = increase_token_usage(total_token_usage, token_usage)
162
132
  user_guide_evaluation_results[file] = user_guide_evaluation_result
@@ -104,6 +104,7 @@ COLLECTION_PROMPTS = {
104
104
  "goal_item": "User Guide",
105
105
  "related_file_description": """A document qualifies as a **User Guide** if it includes **at least one** of the following elements.
106
106
  If **any one** of these is present, the document should be classified as a User Guide — full coverage is **not required**:
107
+ - **Not source code or a script** (*.py, *.R) or notebook (*.ipynb, *.Rmd) that is not intended for end-user interaction.
107
108
  - Document **functions, methods, or classes**
108
109
  - Describe **input parameters, return values**, and **usage syntax**
109
110
  - Include **technical guidance** for using specific APIs
@@ -117,9 +118,12 @@ If **any one** of these is present, the document should be classified as a User
117
118
  - Code Walkthroughs: Detailed explanations of code snippets in a tutorial format.
118
119
  **Do not** classify the document as a User Guide if it is souce code or a script (*.py, *.R) that is not intended for end-user interaction.
119
120
  - You can include directory names if all files in the directory are relevant to the goal item.""",
120
- "plan_important_instructions": """ - **Do not** classify the document as a User Guide if it is source code or a script (*.py, *.R) that is not intended for end-user interaction.
121
+ "plan_important_instructions": """ - **Do not** try to summarize or read the content of any source code or script (*.py, *.R) or notebook (*.ipynb, *.Rmd) that is not intended for end-user interaction.
122
+ - **Do not** classify the document as a User Guide if it is source code or a script (*.py, *.R) that is not intended for end-user interaction.
121
123
  - **Do not** classify the document as a User Guide if it is a notebook (*.ipynb, *.Rmd) that is not intended for end-user interaction.
122
- - You plan **must not** include any source code or script (*.py, *.R) or notebook (*.ipynb, *.Rmd) that is not intended for end-user interaction."""
124
+ - You plan **must not** include any source code or script (*.py, *.R) or notebook (*.ipynb, *.Rmd) that is not intended for end-user interaction.""",
125
+ "observe_important_instructions": """ - **Do not** classify the document as a User Guide if it is source code or a script (*.py, *.R) that is not intended for end-user interaction.
126
+ - **Do not** include any source code or script (*.py, *.R) or notebook (*.ipynb, *.Rmd) in the final answer that is not intended for end-user interaction."""
123
127
  },
124
128
  "Tutorial": {
125
129
  "goal_item": "Tutorials & Vignettes",
@@ -131,6 +135,15 @@ If **any one** of these is present, the document should be classified as a User
131
135
  - Interactive Elements: Features that allow users to experiment with the code in real-time, such as Jupyter notebooks or R Markdown files.​
132
136
  - Use Cases: Real-world applications or scenarios where the software can be applied effectively.
133
137
  - You can include directory names if all files in the directory are relevant to the goal item.
138
+ **Important instructions**:
139
+ - **Do not** use **read_file_tool, summarize_file_tool, check_file_related_tool** on the python/R notebook files **(.ipynb, .Rmd)**, as they are too big to read.
140
+ """,
141
+ "plan_important_instructions": """ - **Do not** use **read_file_tool, summarize_file_tool, check_file_related_tool** on the python/R notebook files **(.ipynb, .Rmd)**, as they are too big to read.
142
+ - For python/R notebook files **(.ipynb, .Rmd)**, **only infer** if it is the tutorial/vignette from the file name and avoid reading the content of the file.
143
+ """,
144
+ "observe_important_instructions": """ - **Do not** use **read_file_tool, summarize_file_tool, check_file_related_tool** on the python/R notebook files **(.ipynb, .Rmd)**, as they are too big to read.
145
+ - For python/R notebook files **(.ipynb, .Rmd)**, **only infer** if it is the tutorial/vignette from the file name and avoid reading the content of the file.
146
+ - **Do not** include any binary files (e.g., `.png`, `.jpg`, `.jpeg`, `.gif`, `.svg`) in the final answer.s
134
147
  """,
135
148
  },
136
149
  "DockerGeneration": {
@@ -123,15 +123,8 @@ class CodeStructureDb:
123
123
  os.makedirs(db_path, exist_ok=True)
124
124
  except Exception as e:
125
125
  logging.error(e)
126
- return False
127
- db_path = os.path.join(db_path, "databases")
128
- # Ensure the local path exists
129
- try:
130
- os.makedirs(db_path, exist_ok=True)
131
- except Exception as e:
132
- logging.error(e)
133
- return False
134
- db_path = os.path.join(db_path, f"{self.author}_{self.repo_name}.db")
126
+ return False
127
+ db_path = os.path.join(db_path, f"{self.author}_{self.repo_name}_code_structure.db")
135
128
  if not os.path.exists(db_path):
136
129
  try:
137
130
  with open(db_path, "w"):
@@ -142,6 +135,24 @@ class CodeStructureDb:
142
135
  self.connection = sqlite3.connect(db_path)
143
136
  return True
144
137
 
138
+ def is_database_built(self) -> bool:
139
+ res = self._connect_to_db()
140
+ if not res:
141
+ return False
142
+ res = self._ensure_tables()
143
+ if not res:
144
+ return False
145
+ try:
146
+ cursor = self.connection.cursor()
147
+ cursor.execute(f"SELECT * FROM {CODE_STRUCTURE_TABLE_NAME}")
148
+ return cursor.fetchone() is not None
149
+ except Exception as e:
150
+ logging.error(e)
151
+ return False
152
+ finally:
153
+ self.connection.close()
154
+ self.connection = None
155
+
145
156
  def insert_code_structure(
146
157
  self,
147
158
  name: str,
@@ -38,10 +38,11 @@ where file_path = ? and instruction = ? and summarize_level = ? and summarize_pr
38
38
  """
39
39
 
40
40
  class SummarizedFilesDb:
41
- def __init__(self, author: str, repo_name: str):
41
+ def __init__(self, author: str, repo_name: str, data_folder: str = None):
42
42
  self.author = author
43
43
  self.repo_name = repo_name
44
44
  self.connection: Connection | None = None
45
+ self.data_folder = data_folder
45
46
 
46
47
  def _ensure_tables(self) -> bool:
47
48
  if self.connection is None:
@@ -60,7 +61,9 @@ class SummarizedFilesDb:
60
61
  def _connect_to_db(self) -> bool:
61
62
  if self.connection is not None:
62
63
  return True
63
- db_path = os.environ.get("DATA_FOLDER", "./data")
64
+ db_path = self.data_folder
65
+ if db_path is None:
66
+ db_path = os.environ.get("DATA_FOLDER", "./data")
64
67
  db_path = os.path.join(db_path, "databases")
65
68
  # Ensure the local path exists
66
69
  try:
@@ -68,7 +71,7 @@ class SummarizedFilesDb:
68
71
  except Exception as e:
69
72
  logging.error(e)
70
73
  return False
71
- db_path = os.path.join(db_path, f"{self.author}_{self.repo_name}.db")
74
+ db_path = os.path.join(db_path, f"{self.author}_{self.repo_name}_summarized_file.db")
72
75
  if not os.path.exists(db_path):
73
76
  try:
74
77
  with open(db_path, "w"):
@@ -1,6 +1,7 @@
1
1
  import os
2
2
  from pathlib import Path
3
3
 
4
+ from bioguider.agents.evaluation_tutorial_task import EvaluationTutorialTask
4
5
  from bioguider.agents.evaluation_userguide_task import EvaluationUserGuideTask
5
6
  from bioguider.agents.prompt_utils import CollectionGoalItemEnum
6
7
  from bioguider.database.code_structure_db import CodeStructureDb
@@ -35,8 +36,8 @@ class EvaluationManager:
35
36
  self.summary_file_db = SummarizedFilesDb(author, repo_name)
36
37
  self.code_structure_db = CodeStructureDb(author, repo_name)
37
38
  code_structure_builder = CodeStructureBuilder(
38
- repo_path=repo_url,
39
- gitignore_path=Path(repo_url, ".gitignore"),
39
+ repo_path=self.rag.repo_dir,
40
+ gitignore_path=Path(self.rag.repo_dir, ".gitignore"),
40
41
  code_structure_db=self.code_structure_db
41
42
  )
42
43
  code_structure_builder.build_code_structure()
@@ -142,6 +143,19 @@ class EvaluationManager:
142
143
  )
143
144
  evaluation, files = evaluation_task.evaluate()
144
145
  return evaluation, files
146
+
147
+ def evaluate_tutorial(self):
148
+ evaluation_task = EvaluationTutorialTask(
149
+ llm=self.llm,
150
+ repo_path=self.rag.repo_dir,
151
+ gitignore_path=Path(self.rag.repo_dir, ".gitignore"),
152
+ meta_data=self.project_metadata,
153
+ step_callback=self.step_callback,
154
+ summarized_files_db=self.summary_file_db,
155
+ code_structure_db=self.code_structure_db,
156
+ )
157
+ evaluation, files = evaluation_task.evaluate()
158
+ return evaluation, files
145
159
 
146
160
 
147
161
 
@@ -91,7 +91,7 @@ def download_repo(repo_url: str, local_path: str, access_token: str = None):
91
91
  logger.info(f"Cloning repository from {repo_url} to {local_path}")
92
92
  # We use repo_url in the log to avoid exposing the token in logs
93
93
  result = subprocess.run(
94
- ["git", "clone", clone_url, local_path],
94
+ ["git", "clone", "--recurse-submodules", clone_url, local_path],
95
95
  check=True,
96
96
  stdout=subprocess.PIPE,
97
97
  stderr=subprocess.PIPE,
@@ -1,8 +1,10 @@
1
1
  from pathlib import Path
2
2
  import logging
3
3
 
4
+ from bioguider.utils.r_file_handler import RFileHandler
5
+
4
6
  from .gitignore_checker import GitignoreChecker
5
- from .file_handler import FileHandler
7
+ from .python_file_handler import PythonFileHandler
6
8
  from ..database.code_structure_db import CodeStructureDb
7
9
 
8
10
  logger = logging.getLogger(__name__)
@@ -10,22 +12,27 @@ logger = logging.getLogger(__name__)
10
12
  class CodeStructureBuilder:
11
13
  def __init__(
12
14
  self,
13
- repo_path: str,
14
- gitignore_path: str,
15
+ repo_path: str | Path,
16
+ gitignore_path: str | Path,
15
17
  code_structure_db: CodeStructureDb,
16
18
  ):
17
- self.repo_path = repo_path
18
- self.gitignore_checker = GitignoreChecker(repo_path, gitignore_path)
19
- self.file_handler = FileHandler(repo_path)
19
+ self.repo_path = str(repo_path)
20
+ self.gitignore_checker = GitignoreChecker(repo_path, str(gitignore_path))
21
+ self.file_handler = PythonFileHandler(repo_path)
20
22
  self.code_structure_db = code_structure_db
21
23
 
22
24
  def build_code_structure(self):
25
+ if self.code_structure_db.is_database_built():
26
+ return
23
27
  files = self.gitignore_checker.check_files_and_folders()
24
28
  for file in files:
25
- if not file.endswith(".py"):
29
+ if not file.endswith(".py") and not file.endswith(".R"):
26
30
  continue
27
31
  logger.info(f"Building code structure for {file}")
28
- file_handler = FileHandler(Path(self.repo_path) / file)
32
+ if file.endswith(".py"):
33
+ file_handler = PythonFileHandler(Path(self.repo_path) / file)
34
+ else:
35
+ file_handler = RFileHandler(Path(self.repo_path) / file)
29
36
  functions_and_classes = file_handler.get_functions_and_classes()
30
37
  # fixme: currently, we don't extract reference graph for each function or class
31
38
  for function_or_class in functions_and_classes:
@@ -119,15 +119,15 @@ class DemoInstructionsResult(BaseModel):
119
119
  expected_output_description: Optional[bool] = Field(description="A boolean value. Does it provide the description of expected output?")
120
120
 
121
121
  class EvaluationSubmissionRequirementsResult(BaseModel):
122
- compiled_standalone_software: bool
123
- source_code: bool
124
- demo_dataset: bool
125
- run_on_data_instruction: bool
126
- run_on_custom_instruction: bool
127
- expected_output_description: bool
128
- complete_readme: bool
129
- software_dependency: bool
130
- install_tutorial: bool
131
- license: bool
132
- hardware_requirements: bool
133
- compatible_os: bool
122
+ compiled_standalone_software: bool | None
123
+ source_code: bool | None
124
+ demo_dataset: bool | None
125
+ run_on_data_instruction: bool | None
126
+ run_on_custom_instruction: bool | None
127
+ expected_output_description: bool | None
128
+ complete_readme: bool | None
129
+ software_dependency: bool | None
130
+ install_tutorial: bool | None
131
+ license: bool | None
132
+ hardware_requirements: bool | None
133
+ compatible_os: bool | None