bioguider 0.2.3__tar.gz → 0.2.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of bioguider might be problematic. Click here for more details.
- {bioguider-0.2.3 → bioguider-0.2.4}/PKG-INFO +1 -1
- {bioguider-0.2.3 → bioguider-0.2.4}/bioguider/agents/common_agent_2step.py +89 -0
- {bioguider-0.2.3 → bioguider-0.2.4}/bioguider/agents/dockergeneration_observe_step.py +3 -3
- {bioguider-0.2.3 → bioguider-0.2.4}/bioguider/agents/dockergeneration_plan_step.py +2 -2
- {bioguider-0.2.3 → bioguider-0.2.4}/bioguider/agents/evaluation_task.py +83 -18
- {bioguider-0.2.3 → bioguider-0.2.4}/bioguider/agents/identification_observe_step.py +2 -2
- {bioguider-0.2.3 → bioguider-0.2.4}/bioguider/agents/identification_plan_step.py +2 -2
- {bioguider-0.2.3 → bioguider-0.2.4}/bioguider/managers/evaluation_manager.py +3 -3
- {bioguider-0.2.3 → bioguider-0.2.4}/bioguider/rag/data_pipeline.py +19 -5
- {bioguider-0.2.3 → bioguider-0.2.4}/bioguider/utils/file_utils.py +4 -2
- {bioguider-0.2.3 → bioguider-0.2.4}/pyproject.toml +1 -1
- {bioguider-0.2.3 → bioguider-0.2.4}/LICENSE +0 -0
- {bioguider-0.2.3 → bioguider-0.2.4}/README.md +0 -0
- {bioguider-0.2.3 → bioguider-0.2.4}/bioguider/__init__.py +0 -0
- {bioguider-0.2.3 → bioguider-0.2.4}/bioguider/agents/__init__.py +0 -0
- {bioguider-0.2.3 → bioguider-0.2.4}/bioguider/agents/agent_task.py +0 -0
- {bioguider-0.2.3 → bioguider-0.2.4}/bioguider/agents/agent_tools.py +0 -0
- {bioguider-0.2.3 → bioguider-0.2.4}/bioguider/agents/agent_utils.py +0 -0
- {bioguider-0.2.3 → bioguider-0.2.4}/bioguider/agents/collection_execute_step.py +0 -0
- {bioguider-0.2.3 → bioguider-0.2.4}/bioguider/agents/collection_observe_step.py +0 -0
- {bioguider-0.2.3 → bioguider-0.2.4}/bioguider/agents/collection_plan_step.py +0 -0
- {bioguider-0.2.3 → bioguider-0.2.4}/bioguider/agents/collection_task.py +0 -0
- {bioguider-0.2.3 → bioguider-0.2.4}/bioguider/agents/collection_task_utils.py +0 -0
- {bioguider-0.2.3 → bioguider-0.2.4}/bioguider/agents/common_agent.py +0 -0
- {bioguider-0.2.3 → bioguider-0.2.4}/bioguider/agents/common_step.py +0 -0
- {bioguider-0.2.3 → bioguider-0.2.4}/bioguider/agents/dockergeneration_execute_step.py +0 -0
- {bioguider-0.2.3 → bioguider-0.2.4}/bioguider/agents/dockergeneration_task.py +0 -0
- {bioguider-0.2.3 → bioguider-0.2.4}/bioguider/agents/dockergeneration_task_utils.py +0 -0
- {bioguider-0.2.3 → bioguider-0.2.4}/bioguider/agents/identification_execute_step.py +0 -0
- {bioguider-0.2.3 → bioguider-0.2.4}/bioguider/agents/identification_task.py +0 -0
- {bioguider-0.2.3 → bioguider-0.2.4}/bioguider/agents/identification_task_utils.py +0 -0
- {bioguider-0.2.3 → bioguider-0.2.4}/bioguider/agents/peo_common_step.py +0 -0
- {bioguider-0.2.3 → bioguider-0.2.4}/bioguider/agents/prompt_utils.py +0 -0
- {bioguider-0.2.3 → bioguider-0.2.4}/bioguider/agents/python_ast_repl_tool.py +0 -0
- {bioguider-0.2.3 → bioguider-0.2.4}/bioguider/agents/rag_collection_task.py +0 -0
- {bioguider-0.2.3 → bioguider-0.2.4}/bioguider/conversation.py +0 -0
- {bioguider-0.2.3 → bioguider-0.2.4}/bioguider/database/summarized_file_db.py +0 -0
- {bioguider-0.2.3 → bioguider-0.2.4}/bioguider/rag/__init__.py +0 -0
- {bioguider-0.2.3 → bioguider-0.2.4}/bioguider/rag/config.py +0 -0
- {bioguider-0.2.3 → bioguider-0.2.4}/bioguider/rag/embedder.py +0 -0
- {bioguider-0.2.3 → bioguider-0.2.4}/bioguider/rag/rag.py +0 -0
- {bioguider-0.2.3 → bioguider-0.2.4}/bioguider/settings.py +0 -0
- {bioguider-0.2.3 → bioguider-0.2.4}/bioguider/utils/constants.py +0 -0
- {bioguider-0.2.3 → bioguider-0.2.4}/bioguider/utils/default.gitignore +0 -0
- {bioguider-0.2.3 → bioguider-0.2.4}/bioguider/utils/gitignore_checker.py +0 -0
- {bioguider-0.2.3 → bioguider-0.2.4}/bioguider/utils/pyphen_utils.py +0 -0
- {bioguider-0.2.3 → bioguider-0.2.4}/bioguider/utils/utils.py +0 -0
|
@@ -113,6 +113,95 @@ class CommonAgentTwoSteps(CommonAgent):
|
|
|
113
113
|
logger.error(str(e))
|
|
114
114
|
raise e
|
|
115
115
|
processed_res = None
|
|
116
|
+
if post_process is not None:
|
|
117
|
+
try:
|
|
118
|
+
processed_res = post_process(res, **kwargs)
|
|
119
|
+
except RetryException as e:
|
|
120
|
+
logger.error(str(e))
|
|
121
|
+
self.exceptions = [e] if self.exceptions is None else self.exceptions + [e]
|
|
122
|
+
raise e
|
|
123
|
+
except Exception as e:
|
|
124
|
+
logger.error(str(e))
|
|
125
|
+
raise e
|
|
126
|
+
return res, processed_res, self.token_usage, reasoning_process
|
|
127
|
+
|
|
128
|
+
FINAL_STEP_SYSTEM_PROMPTS = ChatPromptTemplate.from_template("""
|
|
129
|
+
---
|
|
130
|
+
|
|
131
|
+
You will be given a response generated by a LLM, which includes a **step-by-step reasoning process** followed by a clearly marked **final answer**.
|
|
132
|
+
|
|
133
|
+
### **Your Task:**
|
|
134
|
+
|
|
135
|
+
Extract and return only the content of the **final answer**.
|
|
136
|
+
|
|
137
|
+
---
|
|
138
|
+
|
|
139
|
+
### **Important Instructions:**
|
|
140
|
+
1. Your task is to **extract only the final answer** from the provided reasoning process.
|
|
141
|
+
**Do not** make any judgments, interpretations, or modifications to the content.
|
|
142
|
+
|
|
143
|
+
### **Input:**
|
|
144
|
+
|
|
145
|
+
{llm_response}
|
|
146
|
+
|
|
147
|
+
---
|
|
148
|
+
""")
|
|
149
|
+
|
|
150
|
+
class CommonAgentTwoChainSteps(CommonAgentTwoSteps):
|
|
151
|
+
def __init__(self, llm):
|
|
152
|
+
super().__init__(llm)
|
|
153
|
+
|
|
154
|
+
def _invoke_agent(self, system_prompt, instruction_prompt, schema, post_process = None, **kwargs):
|
|
155
|
+
# Initialize the callback handler
|
|
156
|
+
callback_handler = OpenAICallbackHandler()
|
|
157
|
+
processed_system_prompt = system_prompt.replace("{", "(").replace("}", ")")
|
|
158
|
+
cot_prompt = self._build_prompt_for_cot_step(
|
|
159
|
+
system_prompt=processed_system_prompt,
|
|
160
|
+
instruction_prompt=instruction_prompt
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
try:
|
|
164
|
+
# First, use llm to do CoT
|
|
165
|
+
msgs = cot_prompt.invoke(input={}).to_messages()
|
|
166
|
+
|
|
167
|
+
cot_res = self.llm.generate(messages=[msgs])
|
|
168
|
+
if cot_res is None or cot_res.llm_output is None:
|
|
169
|
+
raise Exception("llm generate invalid output")
|
|
170
|
+
reasoning_process = cot_res.generations[0][0].text
|
|
171
|
+
token_usage: Any = cot_res.llm_output.get("token_usage")
|
|
172
|
+
cot_tokens = {
|
|
173
|
+
"total_tokens": token_usage.get("total_tokens", 0),
|
|
174
|
+
"prompt_tokens": token_usage.get("prompt_tokens", 0),
|
|
175
|
+
"completion_tokens": token_usage.get("completion_tokens", 0),
|
|
176
|
+
}
|
|
177
|
+
self._incre_token_usage(cot_tokens)
|
|
178
|
+
except Exception as e:
|
|
179
|
+
logger.error(str(e))
|
|
180
|
+
raise e
|
|
181
|
+
|
|
182
|
+
try:
|
|
183
|
+
# Then use the reasoning process to do the structured output
|
|
184
|
+
processed_reasoning_process = reasoning_process.replace("{", "{{").replace("}", "}}")
|
|
185
|
+
final_msg = FINAL_STEP_SYSTEM_PROMPTS.format(
|
|
186
|
+
llm_response=processed_reasoning_process,
|
|
187
|
+
)
|
|
188
|
+
msgs = [(
|
|
189
|
+
"human",
|
|
190
|
+
final_msg,
|
|
191
|
+
)]
|
|
192
|
+
final_prompt = ChatPromptTemplate.from_messages(msgs)
|
|
193
|
+
agent = final_prompt | self.llm.with_structured_output(schema)
|
|
194
|
+
res = agent.invoke(
|
|
195
|
+
input={},
|
|
196
|
+
config={
|
|
197
|
+
"callbacks": [callback_handler],
|
|
198
|
+
},
|
|
199
|
+
)
|
|
200
|
+
self._incre_token_usage(callback_handler)
|
|
201
|
+
except Exception as e:
|
|
202
|
+
logger.error(str(e))
|
|
203
|
+
raise e
|
|
204
|
+
processed_res = None
|
|
116
205
|
if post_process is not None:
|
|
117
206
|
try:
|
|
118
207
|
processed_res = post_process(res, **kwargs)
|
|
@@ -6,7 +6,7 @@ from pydantic import BaseModel, Field
|
|
|
6
6
|
from bioguider.utils.constants import DEFAULT_TOKEN_USAGE
|
|
7
7
|
from bioguider.agents.agent_utils import run_command, read_file
|
|
8
8
|
from bioguider.agents.dockergeneration_task_utils import DockerGenerationWorkflowState
|
|
9
|
-
from bioguider.agents.common_agent_2step import CommonAgentTwoSteps
|
|
9
|
+
from bioguider.agents.common_agent_2step import CommonAgentTwoChainSteps, CommonAgentTwoSteps
|
|
10
10
|
from bioguider.agents.peo_common_step import PEOCommonStep
|
|
11
11
|
|
|
12
12
|
DOCKERGENERATION_OBSERVE_SYSTEM_PROMPT = """You are an expert in software containerization and reproducibility engineering.
|
|
@@ -93,7 +93,7 @@ class DockerGenerationObserveStep(PEOCommonStep):
|
|
|
93
93
|
if code != 0:
|
|
94
94
|
error_msg = DockerGenerationObserveStep._extract_error_message(error)
|
|
95
95
|
system_prompt = self._build_system_prompt(state, error_msg, "N/A")
|
|
96
|
-
agent =
|
|
96
|
+
agent = CommonAgentTwoChainSteps(llm=self.llm)
|
|
97
97
|
res, _, token_usage, reasoning = agent.go(
|
|
98
98
|
system_prompt=system_prompt,
|
|
99
99
|
instruction_prompt="Now, let's begin observing.",
|
|
@@ -125,7 +125,7 @@ class DockerGenerationObserveStep(PEOCommonStep):
|
|
|
125
125
|
"docker build successfully.",
|
|
126
126
|
error,
|
|
127
127
|
)
|
|
128
|
-
agent =
|
|
128
|
+
agent = CommonAgentTwoChainSteps(llm=self.llm)
|
|
129
129
|
res, _, token_usage, reasoning = agent.go(
|
|
130
130
|
system_prompt=system_prompt,
|
|
131
131
|
instruction_prompt="Now, let's begin observing.",
|
|
@@ -12,7 +12,7 @@ from bioguider.agents.agent_utils import (
|
|
|
12
12
|
PlanAgentResultJsonSchema,
|
|
13
13
|
)
|
|
14
14
|
from bioguider.agents.peo_common_step import PEOCommonStep
|
|
15
|
-
from bioguider.agents.common_agent_2step import CommonAgentTwoSteps
|
|
15
|
+
from bioguider.agents.common_agent_2step import CommonAgentTwoChainSteps, CommonAgentTwoSteps
|
|
16
16
|
from bioguider.agents.dockergeneration_task_utils import (
|
|
17
17
|
DockerGenerationWorkflowState,
|
|
18
18
|
prepare_provided_files_string,
|
|
@@ -140,7 +140,7 @@ class DockerGenerationPlanStep(PEOCommonStep):
|
|
|
140
140
|
|
|
141
141
|
def _execute_directly(self, state: DockerGenerationWorkflowState):
|
|
142
142
|
system_prompt = self._prepare_system_prompt(state)
|
|
143
|
-
agent =
|
|
143
|
+
agent = CommonAgentTwoChainSteps(llm=self.llm)
|
|
144
144
|
res, _, token_usage, reasoning = agent.go(
|
|
145
145
|
system_prompt=system_prompt,
|
|
146
146
|
instruction_prompt="Now, let's begin to make a plan",
|
|
@@ -2,14 +2,15 @@
|
|
|
2
2
|
import os
|
|
3
3
|
from pathlib import Path
|
|
4
4
|
import logging
|
|
5
|
-
from typing import Callable
|
|
5
|
+
from typing import Callable, Optional
|
|
6
6
|
from abc import ABC, abstractmethod
|
|
7
7
|
from langchain.prompts import ChatPromptTemplate
|
|
8
8
|
from langchain_openai.chat_models.base import BaseChatOpenAI
|
|
9
|
+
from pydantic import BaseModel, Field
|
|
9
10
|
|
|
10
11
|
from bioguider.agents.agent_utils import read_file
|
|
11
|
-
from bioguider.utils.constants import ProjectMetadata
|
|
12
|
-
from .common_agent_2step import CommonAgentTwoSteps
|
|
12
|
+
from bioguider.utils.constants import DEFAULT_TOKEN_USAGE, ProjectMetadata
|
|
13
|
+
from .common_agent_2step import CommonAgentTwoSteps, CommonAgentTwoChainSteps
|
|
13
14
|
from .common_agent import CommonConversation
|
|
14
15
|
from ..utils.pyphen_utils import PyphenReadability
|
|
15
16
|
from ..utils.gitignore_checker import GitignoreChecker
|
|
@@ -19,12 +20,19 @@ logger = logging.getLogger(__name__)
|
|
|
19
20
|
EVALUATION_README_SYSTEM_PROMPT = """
|
|
20
21
|
You are an expert in evaluating the quality of README files in software repositories. Your task is to analyze the provided README file and generate a comprehensive quality report.
|
|
21
22
|
|
|
22
|
-
|
|
23
|
+
---
|
|
23
24
|
|
|
24
|
-
|
|
25
|
+
### **Step 1: Identify README type
|
|
26
|
+
|
|
27
|
+
First, determine whether the provided README is a **project-level README** (typically at the root of a repository) or a **folder-level README** (typically inside subdirectories).
|
|
25
28
|
|
|
26
29
|
---
|
|
27
30
|
|
|
31
|
+
### **Project-level README Evaluation**
|
|
32
|
+
|
|
33
|
+
If the README is a **project-level** file, evaluate it using the following criteria.
|
|
34
|
+
For each criterion below, provide a brief assessment followed by specific, actionable comments for improvement.
|
|
35
|
+
|
|
28
36
|
**1. Project Clarity & Purpose**
|
|
29
37
|
* **Assessment**: [Your evaluation of whether the project's purpose is clear.]
|
|
30
38
|
* **Improvement Suggestions**:
|
|
@@ -62,11 +70,53 @@ For each criterion below, provide a brief assessment followed by specific, actio
|
|
|
62
70
|
* **SMOG Index**: `{smog_index}` (Estimates the years of education needed to understand the text).
|
|
63
71
|
* **Assessment**: Based on these scores, evaluate the overall readability and technical complexity of the language used.
|
|
64
72
|
|
|
65
|
-
**
|
|
73
|
+
**Final Answer**
|
|
74
|
+
* Project-Level README: Yes / No
|
|
66
75
|
* Provide a final, overall assessment of the README file's quality, summarizing the key strengths and areas for improvement.
|
|
67
76
|
|
|
68
77
|
---
|
|
69
78
|
|
|
79
|
+
### **Folder-Level README Evaluation**
|
|
80
|
+
|
|
81
|
+
If the README is a **folder-level** file, use the following criteria instead.
|
|
82
|
+
|
|
83
|
+
For each criterion below, provide a brief assessment followed by specific, actionable comments for improvement.
|
|
84
|
+
|
|
85
|
+
**1. Folder Description**
|
|
86
|
+
* **Assessment**: [Your evaluation of whether it Provides a clear **description** of what the folder contains (e.g., modules, scripts, data).]
|
|
87
|
+
* **Improvement Suggestions**:
|
|
88
|
+
* **Original text:** [Quote a specific line/section from the README.]
|
|
89
|
+
* **Improving comments:** [Provide your suggestions to improve clarity.]
|
|
90
|
+
|
|
91
|
+
**2. Folder Purpose**
|
|
92
|
+
* **Assessment**: [Your evaluation of whether it explains the **purpose** or **role** of the components inside this subfolder.]
|
|
93
|
+
* **Improvement Suggestions**:
|
|
94
|
+
* **Original text:** [Quote text related to purpose.]
|
|
95
|
+
* **Improving comments:** [Provide your suggestions.]
|
|
96
|
+
|
|
97
|
+
**3. Usage**
|
|
98
|
+
* **Assessment**: [Your evaluation of whether it includes **usage instructions** specific to this folder (e.g., commands, import paths, input/output files).]
|
|
99
|
+
* **Improvement Suggestions**:
|
|
100
|
+
* **Original text:** [Quote text related to usage.]
|
|
101
|
+
* **Improving comments:** [Provide your suggestions.]
|
|
102
|
+
|
|
103
|
+
**4. Readability Analysis**
|
|
104
|
+
* **Flesch Reading Ease**: `{flesch_reading_ease}` (A higher score is better, with 60-70 being easily understood by most adults).
|
|
105
|
+
* **Flesch-Kincaid Grade Level**: `{flesch_kincaid_grade}` (Represents the US school-grade level needed to understand the text).
|
|
106
|
+
* **Gunning Fog Index**: `{gunning_fog_index}` (A score above 12 is generally considered too hard for most people).
|
|
107
|
+
* **SMOG Index**: `{smog_index}` (Estimates the years of education needed to understand the text).
|
|
108
|
+
* **Assessment**: Based on these scores, evaluate the overall readability and technical complexity of the language used.
|
|
109
|
+
|
|
110
|
+
**Final Answer**
|
|
111
|
+
* Project-Level README: Yes / No
|
|
112
|
+
* Provide a final, overall assessment of the README file's quality, summarizing the key strengths and areas for improvement.
|
|
113
|
+
---
|
|
114
|
+
|
|
115
|
+
### **README path:**
|
|
116
|
+
{readme_path}
|
|
117
|
+
|
|
118
|
+
---
|
|
119
|
+
|
|
70
120
|
### **README Content:**
|
|
71
121
|
{readme_content}
|
|
72
122
|
"""
|
|
@@ -100,11 +150,11 @@ class EvaluationTask(ABC):
|
|
|
100
150
|
token_usage=token_usage,
|
|
101
151
|
)
|
|
102
152
|
|
|
103
|
-
def evaluate(self, files: list[str] | None = None):
|
|
153
|
+
def evaluate(self, files: list[str] | None = None) -> dict:
|
|
104
154
|
self._enter_evaluation()
|
|
105
|
-
|
|
155
|
+
evaluations, token_usage = self._evaluate(files)
|
|
106
156
|
self._leave_evaluation(token_usage)
|
|
107
|
-
return
|
|
157
|
+
return evaluations
|
|
108
158
|
|
|
109
159
|
def _enter_evaluation(self):
|
|
110
160
|
self.print_step(step_name=self.evaluation_name)
|
|
@@ -113,9 +163,13 @@ class EvaluationTask(ABC):
|
|
|
113
163
|
self.print_step(token_usage=token_usage)
|
|
114
164
|
|
|
115
165
|
@abstractmethod
|
|
116
|
-
def _evaluate(self, files: list[str]):
|
|
166
|
+
def _evaluate(self, files: list[str]) -> tuple[dict, dict]:
|
|
117
167
|
pass
|
|
118
168
|
|
|
169
|
+
class EvaluationREADMEResult(BaseModel):
|
|
170
|
+
project_level: Optional[bool]=Field(description="a boolean value specifying if the README file is **project-level** README. TRUE: project-level, FALSE, folder-level")
|
|
171
|
+
overall_assessment: Optional[str]=Field(description="an overall assessment")
|
|
172
|
+
|
|
119
173
|
class EvaluationREADMETask(EvaluationTask):
|
|
120
174
|
def __init__(
|
|
121
175
|
self,
|
|
@@ -128,7 +182,7 @@ class EvaluationREADMETask(EvaluationTask):
|
|
|
128
182
|
super().__init__(llm, repo_path, gitignore_path, meta_data, step_callback)
|
|
129
183
|
self.evaluation_name = "README Evaluation"
|
|
130
184
|
|
|
131
|
-
def _evaluate(self, files: list[str]):
|
|
185
|
+
def _evaluate(self, files: list[str]) -> tuple[dict, dict]:
|
|
132
186
|
readme_files = files
|
|
133
187
|
if readme_files is None or len(readme_files) == 0:
|
|
134
188
|
return None
|
|
@@ -140,6 +194,8 @@ class EvaluationREADMETask(EvaluationTask):
|
|
|
140
194
|
if readme_content is None:
|
|
141
195
|
logger.error(f"Error in reading file {readme_file}")
|
|
142
196
|
continue
|
|
197
|
+
if len(readme_content.strip()) == 0:
|
|
198
|
+
readme_content = "empty file"
|
|
143
199
|
|
|
144
200
|
readability = PyphenReadability()
|
|
145
201
|
flesch_reading_ease, flesch_kincaid_grade, gunning_fog_index, smog_index, \
|
|
@@ -148,19 +204,28 @@ class EvaluationREADMETask(EvaluationTask):
|
|
|
148
204
|
EVALUATION_README_SYSTEM_PROMPT
|
|
149
205
|
).format(
|
|
150
206
|
readme_content=readme_content,
|
|
207
|
+
readme_path=readme_file,
|
|
151
208
|
flesch_reading_ease=flesch_reading_ease,
|
|
152
209
|
flesch_kincaid_grade=flesch_kincaid_grade,
|
|
153
210
|
gunning_fog_index=gunning_fog_index,
|
|
154
211
|
smog_index=smog_index,
|
|
155
212
|
)
|
|
156
|
-
conversation = CommonConversation(llm=self.llm)
|
|
157
|
-
|
|
213
|
+
# conversation = CommonConversation(llm=self.llm)
|
|
214
|
+
agent = CommonAgentTwoChainSteps(llm=self.llm)
|
|
215
|
+
response, _, token_usage, reasoning_process = agent.go(
|
|
158
216
|
system_prompt=system_prompt,
|
|
159
|
-
instruction_prompt="Before arriving at the conclusion, clearly explain your reasoning step by step. Now, let's begin the evaluation."
|
|
217
|
+
instruction_prompt="Before arriving at the conclusion, clearly explain your reasoning step by step. Now, let's begin the evaluation.",
|
|
218
|
+
schema=EvaluationREADMEResult,
|
|
160
219
|
)
|
|
161
220
|
self.print_step(step_output=f"README: {readme_file}")
|
|
162
|
-
self.print_step(step_output=
|
|
163
|
-
readme_evaluations[readme_file] =
|
|
221
|
+
self.print_step(step_output=reasoning_process)
|
|
222
|
+
readme_evaluations[readme_file] = {
|
|
223
|
+
"evaluation": {
|
|
224
|
+
"project_level": response.project_level,
|
|
225
|
+
"overall_assessment": response.overall_assessment,
|
|
226
|
+
},
|
|
227
|
+
"reasoning_process": reasoning_process
|
|
228
|
+
}
|
|
164
229
|
return readme_evaluations, token_usage
|
|
165
230
|
|
|
166
231
|
EVALUATION_TUTORIAL_SYSTEM_PROMPT="""
|
|
@@ -233,9 +298,9 @@ class EvaluationTutorialTask(EvaluationTask):
|
|
|
233
298
|
super().__init__(llm, repo_path, gitignore_path, meta_data, step_callback)
|
|
234
299
|
self.evaluation_name = "Tutorial Evaluation"
|
|
235
300
|
|
|
236
|
-
def _evaluate(self, files: list[str]):
|
|
301
|
+
def _evaluate(self, files: list[str]) -> tuple[dict, dict]:
|
|
237
302
|
if len(files) == 0:
|
|
238
|
-
return
|
|
303
|
+
return {}, {**DEFAULT_TOKEN_USAGE}
|
|
239
304
|
|
|
240
305
|
evaluations = {}
|
|
241
306
|
for file in files:
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
from langchain.prompts import ChatPromptTemplate
|
|
3
3
|
|
|
4
4
|
from bioguider.agents.agent_utils import ObservationResult
|
|
5
|
-
from bioguider.agents.common_agent_2step import CommonAgentTwoSteps
|
|
5
|
+
from bioguider.agents.common_agent_2step import CommonAgentTwoSteps, CommonAgentTwoChainSteps
|
|
6
6
|
from bioguider.agents.identification_task_utils import IdentificationWorkflowState
|
|
7
7
|
from bioguider.agents.peo_common_step import PEOWorkflowState, PEOCommonStep
|
|
8
8
|
|
|
@@ -70,7 +70,7 @@ class IdentificationObserveStep(PEOCommonStep):
|
|
|
70
70
|
|
|
71
71
|
def _execute_directly(self, state: IdentificationWorkflowState):
|
|
72
72
|
system_prompt = self._prepare_system_prompt(state)
|
|
73
|
-
agent =
|
|
73
|
+
agent = CommonAgentTwoChainSteps(llm=self.llm)
|
|
74
74
|
res, _, token_usage, reasoning_process = agent.go(
|
|
75
75
|
system_prompt=system_prompt,
|
|
76
76
|
instruction_prompt="Now, let's begin.",
|
|
@@ -5,7 +5,7 @@ from langchain.tools import BaseTool
|
|
|
5
5
|
from pydantic import BaseModel, Field
|
|
6
6
|
|
|
7
7
|
from bioguider.agents.agent_utils import get_tool_names_and_descriptions
|
|
8
|
-
from bioguider.agents.common_agent_2step import CommonAgentTwoSteps
|
|
8
|
+
from bioguider.agents.common_agent_2step import CommonAgentTwoChainSteps, CommonAgentTwoSteps
|
|
9
9
|
from bioguider.agents.identification_task_utils import IdentificationWorkflowState
|
|
10
10
|
from bioguider.agents.peo_common_step import PEOCommonStep
|
|
11
11
|
|
|
@@ -113,7 +113,7 @@ class IdentificationPlanStep(PEOCommonStep):
|
|
|
113
113
|
|
|
114
114
|
def _execute_directly(self, state: IdentificationWorkflowState):
|
|
115
115
|
system_prompt = self._prepare_system_prompt(state)
|
|
116
|
-
agent =
|
|
116
|
+
agent = CommonAgentTwoChainSteps(llm=self.llm)
|
|
117
117
|
res, _, token_usage, reasoning_process = agent.go(
|
|
118
118
|
system_prompt=system_prompt,
|
|
119
119
|
instruction_prompt="Now, let's begin.",
|
|
@@ -57,7 +57,7 @@ class EvaluationManager:
|
|
|
57
57
|
)
|
|
58
58
|
return self.project_metadata
|
|
59
59
|
|
|
60
|
-
def evaluate_readme(self):
|
|
60
|
+
def evaluate_readme(self) -> tuple[any, list[str]]:
|
|
61
61
|
task = EvaluationREADMETask(
|
|
62
62
|
llm=self.llm,
|
|
63
63
|
repo_path=self.rag.repo_dir,
|
|
@@ -67,7 +67,7 @@ class EvaluationManager:
|
|
|
67
67
|
)
|
|
68
68
|
readme_files = self._find_readme_files()
|
|
69
69
|
results = task.evaluate(readme_files)
|
|
70
|
-
return results
|
|
70
|
+
return results, readme_files
|
|
71
71
|
|
|
72
72
|
def evaluate_tutorial(self):
|
|
73
73
|
task = CollectionTask(
|
|
@@ -97,7 +97,7 @@ class EvaluationManager:
|
|
|
97
97
|
repo_path = self.rag.repo_dir
|
|
98
98
|
gitignore_path = Path(repo_path, ".gitignore")
|
|
99
99
|
gitignore_checker = GitignoreChecker(
|
|
100
|
-
directory=
|
|
100
|
+
directory=repo_path, gitignore_path=gitignore_path
|
|
101
101
|
)
|
|
102
102
|
found_readme_files = gitignore_checker.check_files_and_folders(
|
|
103
103
|
check_file_cb=lambda root_dir, relative_path: Path(relative_path).name.lower() in possible_readme_files,
|
|
@@ -518,6 +518,21 @@ class DatabaseManager:
|
|
|
518
518
|
self.reset_database()
|
|
519
519
|
self._create_repo(repo_url_or_path, access_token)
|
|
520
520
|
return self.prepare_db_index()
|
|
521
|
+
|
|
522
|
+
def _extract_repo_name_from_url(self, repo_url_or_path: str, repo_type: str) -> str:
|
|
523
|
+
# Extract owner and repo name to create unique identifier
|
|
524
|
+
url_parts = repo_url_or_path.rstrip('/').split('/')
|
|
525
|
+
|
|
526
|
+
if repo_type in ["github", "gitlab", "bitbucket"] and len(url_parts) >= 5:
|
|
527
|
+
# GitHub URL format: https://github.com/owner/repo
|
|
528
|
+
# GitLab URL format: https://gitlab.com/owner/repo or https://gitlab.com/group/subgroup/repo
|
|
529
|
+
# Bitbucket URL format: https://bitbucket.org/owner/repo
|
|
530
|
+
owner = url_parts[-2]
|
|
531
|
+
repo = url_parts[-1].replace(".git", "")
|
|
532
|
+
repo_name = f"{owner}_{repo}"
|
|
533
|
+
else:
|
|
534
|
+
repo_name = url_parts[-1].replace(".git", "")
|
|
535
|
+
return repo_name
|
|
521
536
|
|
|
522
537
|
def reset_database(self):
|
|
523
538
|
"""
|
|
@@ -545,19 +560,18 @@ class DatabaseManager:
|
|
|
545
560
|
root_path = retrieve_data_root_path()
|
|
546
561
|
|
|
547
562
|
os.makedirs(root_path, exist_ok=True)
|
|
563
|
+
repo_type = "unknown"
|
|
548
564
|
# url
|
|
549
565
|
if repo_url_or_path.startswith("https://") or repo_url_or_path.startswith("http://"):
|
|
550
566
|
# Extract repo name based on the URL format
|
|
551
567
|
if "github.com" in repo_url_or_path:
|
|
552
568
|
# GitHub URL format: https://github.com/owner/repo
|
|
553
|
-
|
|
569
|
+
repo_type = "github"
|
|
554
570
|
elif "gitlab.com" in repo_url_or_path:
|
|
555
571
|
# GitLab URL format: https://gitlab.com/owner/repo or https://gitlab.com/group/subgroup/repo
|
|
556
572
|
# Use the last part of the URL as the repo name
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
# Generic handling for other Git URLs
|
|
560
|
-
repo_name = repo_url_or_path.split("/")[-1].replace(".git", "")
|
|
573
|
+
repo_type = "gitlab"
|
|
574
|
+
repo_name = self._extract_repo_name_from_url(repo_url_or_path, repo_type)
|
|
561
575
|
|
|
562
576
|
save_repo_dir = os.path.join(root_path, "repos", repo_name)
|
|
563
577
|
|
|
@@ -117,9 +117,11 @@ def parse_repo_url(url: str) -> tuple[str | None, str | None]:
|
|
|
117
117
|
except Exception:
|
|
118
118
|
return None, None
|
|
119
119
|
|
|
120
|
-
def retrieve_data_root_path():
|
|
120
|
+
def retrieve_data_root_path() -> Path:
|
|
121
121
|
data_folder = os.environ.get("DATA_FOLDER", "./data")
|
|
122
|
-
|
|
122
|
+
root_folder = Path(data_folder, ".adalflow")
|
|
123
|
+
return root_folder.absolute()
|
|
124
|
+
|
|
123
125
|
|
|
124
126
|
|
|
125
127
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|