bioguider 0.2.20__py3-none-any.whl → 0.2.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of bioguider might be problematic. Click here for more details.

Files changed (32) hide show
  1. bioguider/agents/agent_utils.py +16 -10
  2. bioguider/agents/collection_observe_step.py +7 -2
  3. bioguider/agents/collection_task_utils.py +1 -0
  4. bioguider/agents/consistency_collection_step.py +102 -0
  5. bioguider/agents/consistency_evaluation_task.py +57 -0
  6. bioguider/agents/consistency_evaluation_task_utils.py +14 -0
  7. bioguider/agents/consistency_observe_step.py +109 -0
  8. bioguider/agents/consistency_query_step.py +74 -0
  9. bioguider/agents/evaluation_task.py +0 -110
  10. bioguider/agents/evaluation_tutorial_task.py +156 -0
  11. bioguider/agents/evaluation_tutorial_task_prompts.py +114 -0
  12. bioguider/agents/evaluation_userguide_task.py +13 -43
  13. bioguider/agents/prompt_utils.py +15 -2
  14. bioguider/database/code_structure_db.py +20 -9
  15. bioguider/database/summarized_file_db.py +6 -3
  16. bioguider/managers/evaluation_manager.py +16 -2
  17. bioguider/rag/data_pipeline.py +1 -1
  18. bioguider/utils/code_structure_builder.py +15 -8
  19. bioguider/utils/constants.py +12 -12
  20. bioguider/utils/notebook_utils.py +117 -0
  21. bioguider/utils/{file_handler.py → python_file_handler.py} +1 -1
  22. bioguider/utils/r_file_handler.py +549 -0
  23. bioguider/utils/utils.py +34 -1
  24. {bioguider-0.2.20.dist-info → bioguider-0.2.22.dist-info}/METADATA +1 -1
  25. {bioguider-0.2.20.dist-info → bioguider-0.2.22.dist-info}/RECORD +27 -23
  26. bioguider/agents/consistency_collection_execute_step.py +0 -152
  27. bioguider/agents/consistency_collection_observe_step.py +0 -128
  28. bioguider/agents/consistency_collection_plan_step.py +0 -128
  29. bioguider/agents/consistency_collection_task.py +0 -109
  30. bioguider/agents/consistency_collection_task_utils.py +0 -137
  31. {bioguider-0.2.20.dist-info → bioguider-0.2.22.dist-info}/LICENSE +0 -0
  32. {bioguider-0.2.20.dist-info → bioguider-0.2.22.dist-info}/WHEEL +0 -0
@@ -2,6 +2,7 @@
2
2
  import json
3
3
  from json import JSONDecodeError
4
4
  import os
5
+ from pathlib import Path
5
6
  import re
6
7
  from typing import List, Optional, Tuple, Union
7
8
  from langchain_openai import AzureChatOpenAI
@@ -22,6 +23,7 @@ from pydantic import BaseModel, Field
22
23
 
23
24
  from bioguider.utils.constants import DEFAULT_TOKEN_USAGE, MAX_FILE_LENGTH, MAX_SENTENCE_NUM
24
25
  from bioguider.utils.file_utils import get_file_type
26
+ from bioguider.utils.utils import clean_action_input
25
27
  from ..utils.gitignore_checker import GitignoreChecker
26
28
  from ..database.summarized_file_db import SummarizedFilesDb
27
29
  from bioguider.agents.common_conversation import CommonConversation
@@ -122,16 +124,18 @@ def pretty_print(message, printout = True):
122
124
  HUGE_FILE_LENGTH = 10 * 1024 # 10K
123
125
 
124
126
  def read_file(
125
- file_path: str,
127
+ file_path: str | Path,
126
128
  ) -> str | None:
129
+ file_path = str(file_path).strip()
127
130
  if not os.path.isfile(file_path):
128
131
  return None
129
132
  with open(file_path, 'r') as f:
130
133
  content = f.read()
131
134
  return content
132
135
 
133
- def write_file(file_path: str, content: str):
136
+ def write_file(file_path: str | Path, content: str):
134
137
  try:
138
+ file_path = str(file_path).strip()
135
139
  with open(file_path, "w") as fobj:
136
140
  fobj.write(content)
137
141
  return True
@@ -140,10 +144,11 @@ def write_file(file_path: str, content: str):
140
144
  return False
141
145
 
142
146
  def read_directory(
143
- dir_path: str,
147
+ dir_path: str | Path,
144
148
  gitignore_path: str,
145
149
  level: int=1,
146
150
  ) -> list[str] | None:
151
+ dir_path = str(dir_path).strip()
147
152
  if not os.path.isdir(dir_path):
148
153
  return None
149
154
  gitignore_checker = GitignoreChecker(
@@ -182,15 +187,16 @@ Now, let's start to summarize.
182
187
 
183
188
  def summarize_file(
184
189
  llm: BaseChatOpenAI,
185
- name: str,
190
+ name: str | Path,
186
191
  content: str | None = None,
187
192
  level: int = 3,
188
193
  summary_instructions: str | None = None,
189
194
  summarize_prompt: str = "N/A",
190
195
  db: SummarizedFilesDb | None = None,
191
196
  ) -> Tuple[str, dict]:
197
+ name = str(name).strip()
192
198
  if content is None:
193
- try:
199
+ try:
194
200
  with open(name, "r") as fobj:
195
201
  content = fobj.read()
196
202
  except Exception as e:
@@ -289,9 +295,7 @@ class CustomOutputParser(AgentOutputParser):
289
295
  action_input = match.group(2)
290
296
  # Return the action and action input
291
297
  action_dict = None
292
- action_input_replaced = action_input.strip().strip(" ").strip('"').strip('`').strip()
293
- action_input_replaced = action_input_replaced.replace("'", '"')
294
- action_input_replaced = action_input_replaced.replace("`", '"')
298
+ action_input_replaced = clean_action_input(action_input)
295
299
  try:
296
300
  action_dict = json.loads(action_input_replaced)
297
301
  except json.JSONDecodeError:
@@ -410,8 +414,10 @@ def read_license_file(repo_path: str) -> tuple[str | None, str|None]:
410
414
  ]
411
415
  license_files = []
412
416
  for file in hardcoded_license_files:
413
- if os.path.exists(os.path.join(repo_path, file)):
414
- with open(os.path.join(repo_path, file), "r") as f:
417
+ file_path = os.path.join(str(repo_path), file)
418
+ file_path = file_path.strip()
419
+ if os.path.exists(file_path):
420
+ with open(file_path, "r") as f:
415
421
  license_files.append((f.read(), os.path.join(repo_path, file)))
416
422
 
417
423
  max_item = max(license_files, key=lambda x: len(x[0])) if len(license_files) > 0 else (None, None)
@@ -94,8 +94,13 @@ class CollectionObserveStep(PEOCommonStep):
94
94
  )
95
95
  def _execute_directly(self, state: CollectionWorkflowState):
96
96
  step_count = state["step_count"]
97
- instruction = "Now, we have reached max recursion limit, please give me the **final answer** based on the current information" \
98
- if step_count == MAX_STEP_COUNT/3 - 2 else "Let's begin thinking."
97
+ plan = state["plan_actions"]
98
+ plan = plan.strip()
99
+ if len(plan) == 0:
100
+ instruction = "No plan provided, please let's generate the final answer based on the current information."
101
+ else:
102
+ instruction = "Now, we have reached max recursion limit, please give me the **final answer** based on the current information" \
103
+ if step_count == MAX_STEP_COUNT/3 - 2 else "Let's begin thinking."
99
104
  system_prompt = self._build_prompt(state)
100
105
  agent = CommonAgentTwoSteps(llm=self.llm)
101
106
  res, _, token_usage, reasoning_process = agent.go(
@@ -89,6 +89,7 @@ Returns:
89
89
  def run(self, file_path: str) -> str:
90
90
  if not self.repo_path in file_path:
91
91
  file_path = os.path.join(self.repo_path, file_path)
92
+ file_path = file_path.strip()
92
93
  if not os.path.isfile(file_path):
93
94
  return "Can't read file"
94
95
 
@@ -0,0 +1,102 @@
1
+
2
+
3
+
4
+ from langchain.prompts import ChatPromptTemplate
5
+ from langchain_openai.chat_models.base import BaseChatOpenAI
6
+ from pydantic import BaseModel, Field
7
+ from bioguider.agents.common_agent_2step import CommonAgentTwoSteps
8
+ from bioguider.agents.consistency_evaluation_task_utils import ConsistencyEvaluationState
9
+ from bioguider.agents.peo_common_step import PEOCommonStep
10
+
11
+
12
+ CONSISTANCY_COLLECTION_SYSTEM_PROMPT = """
13
+ ### **Goal**
14
+ You are an expert developer specializing in the biomedical domain.
15
+ You will be given a {domain} documentation. Your task is to collect all the functions, classes, and methods that the {domain} documentation mentions.
16
+
17
+ ---
18
+
19
+ ### **Input {domain} Documentation**
20
+ {documentation}
21
+
22
+ ### **Output Format**
23
+ The collected functions, classes, and methods **must exactly match** the following format, **do not** make up anything:
24
+
25
+ ```
26
+ name: <function/class/method name>
27
+ file_path: <file path, if not sure, just put "N/A">
28
+ parameters: <parameters, if not sure, just put "N/A">
29
+ parent: <parent name, if it is a class method, put the class name as the parent name, if not sure, just put "N/A">
30
+
31
+ ...
32
+
33
+ ```
34
+
35
+ ---
36
+
37
+ ### **Output Example**
38
+ ```
39
+ name: __init__
40
+ file_path: src/agents/common_agent.py
41
+ parameters: llm, step_output_callback, summarized_files_db
42
+ parent: CommonAgent
43
+
44
+ name: _invoke_agent
45
+ file_path: src/agents/common_agent.py
46
+ parameters: system_prompt, instruction_prompt, schema, post_process
47
+ parent: CommonAgent
48
+
49
+ ...
50
+ ```
51
+
52
+ """
53
+
54
+ class ConsistencyCollectionResult(BaseModel):
55
+ functions_and_classes: list[dict] = Field(description="A list of functions and classes that the documentation mentions")
56
+
57
+ ConsistencyCollectionResultJsonSchema = {
58
+ "properties": {
59
+ "functions_and_classes": {
60
+ "description": "A list of functions and classes that the documentation mentions",
61
+ "items": {
62
+ "type": "object"
63
+ },
64
+ "title": "Functions And Classes",
65
+ "type": "array"
66
+ }
67
+ },
68
+ "required": [
69
+ "functions_and_classes"
70
+ ],
71
+ "title": "ConsistencyCollectionResult",
72
+ "type": "object"
73
+ }
74
+
75
+ class ConsistencyCollectionStep(PEOCommonStep):
76
+ def __init__(self, llm: BaseChatOpenAI):
77
+ super().__init__(llm)
78
+ self.step_name = "Consistency Collection Step"
79
+
80
+ def _prepare_system_prompt(self, state: ConsistencyEvaluationState) -> str:
81
+ documentation = state["documentation"]
82
+ domain = state["domain"]
83
+ return ChatPromptTemplate.from_template(CONSISTANCY_COLLECTION_SYSTEM_PROMPT).format(
84
+ domain=domain,
85
+ documentation=documentation,
86
+ )
87
+
88
+ def _execute_directly(self, state: ConsistencyEvaluationState) -> tuple[dict, dict[str, int]]:
89
+ system_prompt = self._prepare_system_prompt(state)
90
+ agent = CommonAgentTwoSteps(llm=self.llm)
91
+ res, _, token_usage, reasoning_process = agent.go(
92
+ system_prompt=system_prompt,
93
+ instruction_prompt="Now, let's begin the consistency collection step.",
94
+ schema=ConsistencyCollectionResultJsonSchema,
95
+ )
96
+ res: ConsistencyCollectionResult = ConsistencyCollectionResult.model_validate(res)
97
+ state["functions_and_classes"] = res.functions_and_classes
98
+ self._print_step(state, step_output=f"Consistency Collection Result: {res.functions_and_classes}")
99
+ self._print_step(state, step_output=f"Consistency Collection Reasoning Process: {reasoning_process}")
100
+
101
+ return state, token_usage
102
+
@@ -0,0 +1,57 @@
1
+
2
+
3
+
4
+ from typing import Callable
5
+ from langchain_openai.chat_models.base import BaseChatOpenAI
6
+ from pydantic import BaseModel
7
+
8
+ from bioguider.agents.consistency_evaluation_task_utils import ConsistencyEvaluationState
9
+ from bioguider.database.code_structure_db import CodeStructureDb
10
+ from .consistency_collection_step import ConsistencyCollectionStep
11
+ from .consistency_query_step import ConsistencyQueryStep
12
+ from .consistency_observe_step import ConsistencyObserveStep
13
+
14
+ class ConsistencyEvaluationResult(BaseModel):
15
+ score: str
16
+ assessment: str
17
+ development: list[str]
18
+ strengths: list[str]
19
+
20
+ class ConsistencyEvaluationTask:
21
+ def __init__(
22
+ self,
23
+ llm: BaseChatOpenAI,
24
+ code_structure_db: CodeStructureDb,
25
+ step_callback: Callable | None = None
26
+ ):
27
+ self.llm = llm
28
+ self.code_structure_db = code_structure_db
29
+ self.step_callback = step_callback
30
+
31
+ def evaluate(self, domain: str, documentation: str) -> ConsistencyEvaluationResult:
32
+ collection_step = ConsistencyCollectionStep(llm=self.llm)
33
+ query_step = ConsistencyQueryStep(code_structure_db=self.code_structure_db)
34
+ observe_step = ConsistencyObserveStep(llm=self.llm)
35
+
36
+ state = ConsistencyEvaluationState(
37
+ domain=domain,
38
+ documentation=documentation,
39
+ step_output_callback=self.step_callback,
40
+ )
41
+
42
+ state = collection_step.execute(state)
43
+ state = query_step.execute(state)
44
+ state = observe_step.execute(state)
45
+
46
+ score = state["consistency_score"]
47
+ assessment = state["consistency_assessment"]
48
+ development = state["consistency_development"]
49
+ strengths = state["consistency_strengths"]
50
+
51
+ return ConsistencyEvaluationResult(
52
+ score=score,
53
+ assessment=assessment,
54
+ development=development,
55
+ strengths=strengths,
56
+ )
57
+
@@ -0,0 +1,14 @@
1
+
2
+ from typing import Callable, Optional, TypedDict
3
+
4
+
5
+ class ConsistencyEvaluationState(TypedDict):
6
+ domain: str
7
+ documentation: str
8
+ step_output_callback: Optional[Callable]
9
+ functions_and_classes: Optional[list[dict]]
10
+ all_query_rows: Optional[list[any]]
11
+ consistency_score: Optional[str]
12
+ consistency_assessment: Optional[str]
13
+ consistency_development: Optional[list[str]]
14
+ consistency_strengths: Optional[list[str]]
@@ -0,0 +1,109 @@
1
+
2
+
3
+ from langchain.prompts import ChatPromptTemplate
4
+ from langchain_openai.chat_models.base import BaseChatOpenAI
5
+ from pydantic import BaseModel, Field
6
+ from bioguider.agents.common_agent_2step import CommonAgentTwoSteps
7
+ from bioguider.agents.consistency_evaluation_task_utils import ConsistencyEvaluationState
8
+ from bioguider.agents.peo_common_step import PEOCommonStep
9
+
10
+ CONSISTENCY_OBSERVE_SYSTEM_PROMPT = """
11
+ You are an expert developer specializing in the biomedical domain.
12
+ Your task is to analyze both:
13
+ 1. the provided file related to {domain} documentation,
14
+ 2. the code definitions related to the {domain} documentation
15
+ and generate a structured consistency assessment based on the following criteria.
16
+
17
+ ---
18
+
19
+ ### **Evaluation Criteria**
20
+
21
+ **Consistency**:
22
+ * **Score**: [Poor / Fair / Good / Excellent]
23
+ * **Assessment**: [Your evaluation of whether the {domain} documentation is consistent with the code definitions]
24
+ * **Development**: [A list of inconsistent function/class/method name and inconsistent docstring, and describe how they are inconsistent]
25
+ * **Strengths**: [A list of strengths of the {domain} documentation on consistency]
26
+
27
+ ---
28
+
29
+ ### **Output Format**
30
+ Your output **must exactly match** the following format:
31
+ ```
32
+ **Consistency**:
33
+ * **Score**: [Poor / Fair / Good / Excellent]
34
+ * **Assessment**: [Your evaluation of whether the {domain} documentation is consistent with the code definitions]
35
+ * **Development**: [A list of inconsistent function/class/method name and inconsistent docstring, and describe how they are inconsistent]
36
+ * **Strengths**: [A list of strengths of the {domain} documentation on consistency]
37
+ ```
38
+
39
+ ### **Output Example**
40
+
41
+ ```
42
+ **Consistency**:
43
+ * **Assessment**: [Your evaluation of whether the {domain} documentation is consistent with the code definitions]
44
+ * **Development**:
45
+ - Inconsistent function/class/method name 1
46
+ - Inconsistent docstring 1
47
+ - Inconsistent function/class/method name 2
48
+ - Inconsistent docstring 2
49
+ - ...
50
+ * **Strengths**:
51
+ - Strengths 1
52
+ - Strengths 2
53
+ - ...
54
+ ```
55
+
56
+ ---
57
+
58
+ ### **Input {domain} Documentation**
59
+ {documentation}
60
+
61
+ ### **Code Definitions**
62
+ {code_definitions}
63
+
64
+
65
+ """
66
+
67
+ class ConsistencyEvaluationObserveResult(BaseModel):
68
+ consistency_score: str=Field(description="A string value, could be `Poor`, `Fair`, `Good`, or `Excellent`")
69
+ consistency_assessment: str=Field(description="Your evaluation of whether the documentation is consistent with the code definitions")
70
+ consistency_development: list[str]=Field(description="A list of inconsistent function/class/method name and inconsistent docstring")
71
+ consistency_strengths: list[str]=Field(description="A list of strengths of the documentation on consistency")
72
+
73
+
74
+ class ConsistencyObserveStep(PEOCommonStep):
75
+ def __init__(self, llm: BaseChatOpenAI):
76
+ super().__init__(llm)
77
+ self.step_name = "Consistency Observe Step"
78
+
79
+ def _prepare_system_prompt(self, state: ConsistencyEvaluationState):
80
+ all_query_rows = state["all_query_rows"]
81
+ documentation = state["documentation"]
82
+ domain = state["domain"]
83
+ code_definition = ""
84
+ for row in all_query_rows:
85
+ content = f"name: {row['name']}\nfile_path: {row['path']}\nparent: {row['parent']}\nparameters: {row['params']}\ndoc_string: {row['doc_string']}"
86
+ code_definition += content
87
+ code_definition += "\n\n\n"
88
+ return ChatPromptTemplate.from_template(CONSISTENCY_OBSERVE_SYSTEM_PROMPT).format(
89
+ code_definitions=code_definition,
90
+ documentation=documentation,
91
+ domain=domain,
92
+ )
93
+
94
+ def _execute_directly(self, state: ConsistencyEvaluationState):
95
+ system_prompt = self._prepare_system_prompt(state)
96
+ agent = CommonAgentTwoSteps(llm=self.llm)
97
+ res, _, token_usage, reasoning_process = agent.go(
98
+ system_prompt=system_prompt,
99
+ instruction_prompt="Now, let's begin the consistency evaluation step.",
100
+ schema=ConsistencyEvaluationObserveResult,
101
+ )
102
+ res: ConsistencyEvaluationObserveResult = res
103
+ state["consistency_score"] = res.consistency_score
104
+ state["consistency_assessment"] = res.consistency_assessment
105
+ state["consistency_development"] = res.consistency_development
106
+ state["consistency_strengths"] = res.consistency_strengths
107
+ return state, token_usage
108
+
109
+
@@ -0,0 +1,74 @@
1
+
2
+
3
+ from bioguider.agents.common_step import CommonStep
4
+ from bioguider.agents.consistency_evaluation_task_utils import ConsistencyEvaluationState
5
+ from bioguider.database.code_structure_db import CodeStructureDb
6
+ from bioguider.utils.constants import DEFAULT_TOKEN_USAGE
7
+
8
+
9
+ class ConsistencyQueryStep(CommonStep):
10
+ def __init__(self, code_structure_db: CodeStructureDb):
11
+ super().__init__()
12
+ self.step_name = "Consistency Query Step"
13
+ self.code_structure_db = code_structure_db
14
+
15
+ def _execute_directly(self, state: ConsistencyEvaluationState):
16
+ functions_and_classes = state["functions_and_classes"]
17
+ all_rows: list[any] = []
18
+ for function_or_class in functions_and_classes:
19
+ function_or_class_name = function_or_class["name"]
20
+ function_or_class_file_path = function_or_class["file_path"]
21
+ function_or_class_parameters = function_or_class["parameters"]
22
+ function_or_class_parent = function_or_class["parent"]
23
+ self._print_step(state, step_output=(
24
+ f"Consistency Query Step: \n{function_or_class_name},\n"
25
+ f" {function_or_class_file_path},\n"
26
+ f" {function_or_class_parameters},\n"
27
+ f" {function_or_class_parent}"
28
+ ))
29
+ file_path = None
30
+ parent = None
31
+ name = None
32
+ if "file_path" in function_or_class and function_or_class["file_path"] != "N/A":
33
+ file_path = function_or_class["file_path"]
34
+ if "parent" in function_or_class and function_or_class["parent"] != "N/A":
35
+ parent = function_or_class["parent"]
36
+ if "name" in function_or_class and function_or_class["name"] != "N/A":
37
+ name = function_or_class["name"]
38
+
39
+ rows: list[any] | None = None
40
+ if name is None:
41
+ if file_path is not None:
42
+ rows = self.code_structure_db.select_by_path(file_path)
43
+ elif parent is not None:
44
+ rows = self.code_structure_db.select_by_parent(parent)
45
+ else:
46
+ if file_path is not None and parent is not None:
47
+ rows = self.code_structure_db.select_by_name_and_parent_and_path(name, parent, file_path)
48
+ if rows is None or len(rows) == 0:
49
+ rows = self.code_structure_db.select_by_name_and_path(name, file_path)
50
+ if rows is None or len(rows) == 0:
51
+ rows = self.code_structure_db.select_by_name_and_parent(name, parent)
52
+ if rows is None or len(rows) == 0:
53
+ rows = self.code_structure_db.select_by_name(name)
54
+ elif file_path is not None:
55
+ rows = self.code_structure_db.select_by_name_and_path(name, file_path)
56
+ if rows is None or len(rows) == 0:
57
+ rows = self.code_structure_db.select_by_name(name)
58
+ elif parent is not None:
59
+ rows = self.code_structure_db.select_by_name_and_parent(name, parent)
60
+ if rows is None or len(rows) == 0:
61
+ rows = self.code_structure_db.select_by_name(name)
62
+ else:
63
+ rows = self.code_structure_db.select_by_name(name)
64
+ if rows is None or len(rows) == 0:
65
+ self._print_step(state, step_output=f"No such function or class {name}")
66
+ continue
67
+ all_rows.extend(rows)
68
+
69
+ state["all_query_rows"] = all_rows
70
+
71
+ return state, {**DEFAULT_TOKEN_USAGE}
72
+
73
+
74
+
@@ -204,113 +204,3 @@ class EvaluationTask(ABC):
204
204
  @abstractmethod
205
205
  def _collect_files(self) -> list[str]:
206
206
  pass
207
-
208
-
209
- EVALUATION_TUTORIAL_SYSTEM_PROMPT="""
210
- You are an expert in software documentation and developer education.
211
- You are given the content of a tutorial file from a GitHub repository. Your task is to **critically evaluate** the quality of this tutorial based on best practices in technical writing and developer onboarding.
212
- Please assess the tutorial using the following criteria. Provide your evaluation in structured sections:
213
-
214
- ---
215
-
216
- ### **Evaluation Criteria:**
217
- 1. **Readability**: You are provided the following metrics scores calculated with pyphen, please evaluate readability based on the scores:
218
- * Flesch Reading Ease: {flesch_reading_ease} (206.835 - 1.015(words/sentences) - 84.6(syllables/words))
219
- * Flesch-Kincaid Grade Level: {flesch_kincaid_grade} (0.39(words/sentences) + 11.8(syllables/words) - 15.59)
220
- * Gunning Fog Index: {gunning_fog_index} (0.4[(words/sentences) + 100(complex words/words)])
221
- * SMOG Index: {smog_index} (1.043*sqrt(polysyllables * (30/sentences)) + 3.1291)
222
- 2. **Coverage**
223
- * Does the tutorial cover all major steps needed to get started?
224
- * Are dependencies, prerequisites, setup steps, and example usage included?
225
- 3. **Structure & Organization**
226
- * Is the content logically structured (e.g., introduction → setup → examples → summary)?
227
- * Are sections well-labeled and easy to navigate?
228
- 4. **Balance Between Code and Explanation**
229
- * Is there a good balance between code snippets and narrative explanation?
230
- * Are code blocks properly annotated or explained?
231
- 5. **Terminology Consistency**
232
- * Is technical terminology used consistently and accurately?
233
- * Are key terms introduced and reused correctly?
234
- 6. **Example Quality**
235
- * Are the examples relevant, correct, and representative of real usage?
236
- * Are edge cases or typical user pitfalls addressed?
237
- 7. **Formatting and Style**
238
- * Are headings, bullet points, code formatting, and markdown style used effectively?
239
- * Are there any formatting issues that hurt clarity?
240
- ---
241
-
242
- ### **Output Format:**
243
- Please respond in the following format:
244
-
245
- ```
246
- **FinalAnswer**
247
- **Readability**: Your comments here
248
- **Coverage**: Your comments here
249
- **Structure & Organization**: Your comments here
250
- **Code vs. Explanation Balance**: Your comments here
251
- **Terminology Consistency**: Your comments here
252
- **Example Quality**: Your comments here
253
- **Formatting and Style**: Your comments here
254
- **Overall Rating**: [Poor / Fair / Good / Excellent]
255
- ```
256
-
257
- ---
258
-
259
- ### **Tutorial File Content:**
260
-
261
- ```
262
- {tutorial_file_content}
263
- ```
264
-
265
- ---
266
- """
267
- class EvaluationTutorialTask(EvaluationTask):
268
- def __init__(
269
- self,
270
- llm: BaseChatOpenAI,
271
- repo_path: str,
272
- gitignore_path: str,
273
- meta_data: ProjectMetadata | None = None,
274
- step_callback: Callable | None = None,
275
- summarized_files_db = None,
276
- ):
277
- super().__init__(llm, repo_path, gitignore_path, meta_data, step_callback, summarized_files_db)
278
- self.evaluation_name = "Tutorial Evaluation"
279
-
280
- def _evaluate(self, files: list[str]) -> tuple[dict, dict]:
281
- if len(files) == 0:
282
- return {}, {**DEFAULT_TOKEN_USAGE}
283
-
284
- evaluations = {}
285
- for file in files:
286
- tutorial_path = Path(self.repo_path, file)
287
- tutorial_content = read_file(tutorial_path)
288
- if tutorial_content is None:
289
- logging.error(f"Error in reading file {file}")
290
- continue
291
-
292
- readability = PyphenReadability()
293
- flesch_reading_ease, flesch_kincaid_grade, gunning_fog_index, smog_index, \
294
- _, _, _, _, _ = readability.readability_metrics(tutorial_content)
295
- system_prompt = ChatPromptTemplate.from_template(
296
- EVALUATION_TUTORIAL_SYSTEM_PROMPT
297
- ).format(
298
- tutorial_file_content=tutorial_content,
299
- flesch_reading_ease=flesch_reading_ease,
300
- flesch_kincaid_grade=flesch_kincaid_grade,
301
- gunning_fog_index=gunning_fog_index,
302
- smog_index=smog_index,
303
- )
304
- conversation = CommonConversation(llm=self.llm)
305
- response, token_usage = conversation.generate(
306
- system_prompt=system_prompt,
307
- instruction_prompt=EVALUATION_INSTRUCTION,
308
- )
309
- self.print_step(step_output=f"Tutorial: {file}")
310
- self.print_step(step_output=response)
311
- evaluations[file] = response
312
- return evaluations, token_usage
313
-
314
- def _collect_files(self):
315
- return []
316
-