bioguider 0.2.15__py3-none-any.whl → 0.2.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of bioguider might be problematic. Click here for more details.
- bioguider/agents/agent_utils.py +38 -54
- bioguider/agents/collection_observe_step.py +1 -1
- bioguider/agents/common_agent.py +3 -25
- bioguider/agents/common_agent_2step.py +1 -1
- bioguider/agents/common_conversation.py +43 -0
- bioguider/agents/dockergeneration_observe_step.py +2 -1
- bioguider/agents/evaluation_installation_task.py +68 -99
- bioguider/agents/evaluation_readme_task.py +280 -182
- bioguider/agents/evaluation_submission_requirements_task.py +69 -54
- bioguider/agents/evaluation_task.py +1 -1
- bioguider/agents/identification_observe_step.py +1 -1
- bioguider/agents/prompt_utils.py +4 -2
- bioguider/utils/constants.py +86 -1
- bioguider/utils/utils.py +45 -1
- {bioguider-0.2.15.dist-info → bioguider-0.2.16.dist-info}/METADATA +1 -1
- {bioguider-0.2.15.dist-info → bioguider-0.2.16.dist-info}/RECORD +18 -17
- {bioguider-0.2.15.dist-info → bioguider-0.2.16.dist-info}/LICENSE +0 -0
- {bioguider-0.2.15.dist-info → bioguider-0.2.16.dist-info}/WHEEL +0 -0
bioguider/agents/agent_utils.py
CHANGED
|
@@ -3,7 +3,6 @@ import json
|
|
|
3
3
|
from json import JSONDecodeError
|
|
4
4
|
import os
|
|
5
5
|
import re
|
|
6
|
-
import subprocess
|
|
7
6
|
from typing import List, Optional, Tuple, Union
|
|
8
7
|
from langchain_openai import AzureChatOpenAI
|
|
9
8
|
from langchain_deepseek import ChatDeepSeek
|
|
@@ -25,6 +24,7 @@ from bioguider.utils.constants import DEFAULT_TOKEN_USAGE, MAX_FILE_LENGTH, MAX_
|
|
|
25
24
|
from bioguider.utils.file_utils import get_file_type
|
|
26
25
|
from ..utils.gitignore_checker import GitignoreChecker
|
|
27
26
|
from ..database.summarized_file_db import SummarizedFilesDb
|
|
27
|
+
from bioguider.agents.common_conversation import CommonConversation
|
|
28
28
|
|
|
29
29
|
logger = logging.getLogger(__name__)
|
|
30
30
|
|
|
@@ -238,19 +238,6 @@ def summarize_file(
|
|
|
238
238
|
|
|
239
239
|
return out, token_usage
|
|
240
240
|
|
|
241
|
-
def increase_token_usage(
|
|
242
|
-
token_usage: Optional[dict] = None,
|
|
243
|
-
incremental: dict = {**DEFAULT_TOKEN_USAGE},
|
|
244
|
-
):
|
|
245
|
-
if token_usage is None:
|
|
246
|
-
token_usage = {**DEFAULT_TOKEN_USAGE}
|
|
247
|
-
token_usage["total_tokens"] += incremental["total_tokens"]
|
|
248
|
-
token_usage["completion_tokens"] += incremental["completion_tokens"]
|
|
249
|
-
token_usage["prompt_tokens"] += incremental["prompt_tokens"]
|
|
250
|
-
|
|
251
|
-
return token_usage
|
|
252
|
-
|
|
253
|
-
|
|
254
241
|
# Set up a prompt template
|
|
255
242
|
class CustomPromptTemplate(StringPromptTemplate):
|
|
256
243
|
# The template to use
|
|
@@ -355,30 +342,6 @@ def convert_plan_to_string(plan: PlanAgentResult) -> str:
|
|
|
355
342
|
plan_str += action_str
|
|
356
343
|
return plan_str
|
|
357
344
|
|
|
358
|
-
def run_command(command: list, cwd: str = None, timeout: int = None):
|
|
359
|
-
"""
|
|
360
|
-
Run a shell command with optional timeout and return stdout, stderr, and return code.
|
|
361
|
-
"""
|
|
362
|
-
try:
|
|
363
|
-
result = subprocess.run(
|
|
364
|
-
command,
|
|
365
|
-
cwd=cwd,
|
|
366
|
-
stdout=subprocess.PIPE,
|
|
367
|
-
stderr=subprocess.PIPE,
|
|
368
|
-
text=True,
|
|
369
|
-
timeout=timeout
|
|
370
|
-
)
|
|
371
|
-
return result.stdout, result.stderr, result.returncode
|
|
372
|
-
except subprocess.TimeoutExpired as e:
|
|
373
|
-
return e.stdout or "", e.stderr or f"Command timed out after {timeout} seconds", -1
|
|
374
|
-
|
|
375
|
-
def escape_braces(text: str) -> str:
|
|
376
|
-
# First replace single } not part of }} with }}
|
|
377
|
-
text = re.sub(r'(?<!})}(?!})', '}}', text)
|
|
378
|
-
# Then replace single { not part of {{
|
|
379
|
-
text = re.sub(r'(?<!{){(?!{)', '{{', text)
|
|
380
|
-
return text
|
|
381
|
-
|
|
382
345
|
STRING_TO_OBJECT_SYSTEM_PROMPT = """
|
|
383
346
|
You are an expert to understand data. You will be provided a text, and your task is to extracted structured data from the provided text.
|
|
384
347
|
|
|
@@ -426,20 +389,41 @@ def try_parse_with_llm(llm: BaseChatOpenAI, input_text: str, schema: any):
|
|
|
426
389
|
system_prompt = ChatPromptTemplate.from_template(
|
|
427
390
|
STRING_TO_OBJECT_SYSTEM_PROMPT
|
|
428
391
|
).format(input_text=input_text)
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
392
|
+
|
|
393
|
+
conversation = CommonConversation(llm=llm)
|
|
394
|
+
res, token_usage = conversation.generate_with_schema(
|
|
395
|
+
system_prompt=system_prompt,
|
|
396
|
+
instruction_prompt="Let's start to parse the input text.",
|
|
397
|
+
schema=schema,
|
|
398
|
+
)
|
|
399
|
+
return res, token_usage
|
|
400
|
+
|
|
401
|
+
def read_license_file(repo_path: str) -> tuple[str | None, str|None]:
|
|
402
|
+
# find hardcoded license file
|
|
403
|
+
hardcoded_license_files = [
|
|
404
|
+
"LICENSE",
|
|
405
|
+
"LICENSE.txt",
|
|
406
|
+
"LICENSE.md",
|
|
407
|
+
"LICENSE.rst",
|
|
408
|
+
]
|
|
409
|
+
license_files = []
|
|
410
|
+
for file in hardcoded_license_files:
|
|
411
|
+
if os.path.exists(os.path.join(repo_path, file)):
|
|
412
|
+
with open(os.path.join(repo_path, file), "r") as f:
|
|
413
|
+
license_files.append((f.read(), os.path.join(repo_path, file)))
|
|
414
|
+
|
|
415
|
+
max_item = max(license_files, key=lambda x: len(x[0])) if len(license_files) > 0 else (None, None)
|
|
416
|
+
if max_item[0] is not None:
|
|
417
|
+
return max_item[0], max_item[1]
|
|
418
|
+
|
|
419
|
+
# find in root directory
|
|
420
|
+
for root, _, files in os.walk(repo_path):
|
|
421
|
+
for file in files:
|
|
422
|
+
if file.lower() == "license":
|
|
423
|
+
with open(os.path.join(root, file), "r") as f:
|
|
424
|
+
return f.read(), os.path.join(root, file)
|
|
425
|
+
if file[:8].lower() == "license.":
|
|
426
|
+
with open(os.path.join(root, file), "r") as f:
|
|
427
|
+
return f.read(), os.path.join(root, file)
|
|
428
|
+
return None, None
|
|
434
429
|
|
|
435
|
-
try:
|
|
436
|
-
res = agent.invoke(
|
|
437
|
-
input={},
|
|
438
|
-
config={
|
|
439
|
-
"callbacks": [callback_handler],
|
|
440
|
-
},
|
|
441
|
-
)
|
|
442
|
-
return res, vars(callback_handler)
|
|
443
|
-
except Exception as e:
|
|
444
|
-
logger.error(e)
|
|
445
|
-
return None
|
|
@@ -95,7 +95,7 @@ class CollectionObserveStep(PEOCommonStep):
|
|
|
95
95
|
def _execute_directly(self, state: CollectionWorkflowState):
|
|
96
96
|
step_count = state["step_count"]
|
|
97
97
|
instruction = "Now, we have reached max recursion limit, please give me the **final answer** based on the current information" \
|
|
98
|
-
if step_count == MAX_STEP_COUNT - 2 else "Let's begin thinking."
|
|
98
|
+
if step_count == MAX_STEP_COUNT/3 - 2 else "Let's begin thinking."
|
|
99
99
|
system_prompt = self._build_prompt(state)
|
|
100
100
|
agent = CommonAgentTwoSteps(llm=self.llm)
|
|
101
101
|
res, _, token_usage, reasoning_process = agent.go(
|
bioguider/agents/common_agent.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
|
|
1
2
|
from typing import Any, Callable, Optional
|
|
2
3
|
from langchain_core.prompts import ChatPromptTemplate
|
|
3
4
|
from langchain_openai.chat_models.base import BaseChatOpenAI
|
|
@@ -7,10 +8,7 @@ from pydantic import BaseModel, Field
|
|
|
7
8
|
from tenacity import retry, stop_after_attempt, wait_incrementing
|
|
8
9
|
import logging
|
|
9
10
|
|
|
10
|
-
from bioguider.
|
|
11
|
-
escape_braces,
|
|
12
|
-
increase_token_usage,
|
|
13
|
-
)
|
|
11
|
+
from bioguider.utils.utils import escape_braces, increase_token_usage
|
|
14
12
|
|
|
15
13
|
logger = logging.getLogger(__name__)
|
|
16
14
|
|
|
@@ -19,13 +17,11 @@ class RetryException(Exception):
|
|
|
19
17
|
|
|
20
18
|
pass
|
|
21
19
|
|
|
22
|
-
|
|
23
20
|
class CommonAgentResult(BaseModel):
|
|
24
21
|
reasoning_process: str = Field(
|
|
25
22
|
description="A detailed explanation of the thought process or reasoning steps taken to reach a conclusion."
|
|
26
23
|
)
|
|
27
24
|
|
|
28
|
-
|
|
29
25
|
class CommonAgent:
|
|
30
26
|
def __init__(self, llm: BaseChatOpenAI):
|
|
31
27
|
self.llm = llm
|
|
@@ -138,22 +134,4 @@ class CommonAgent:
|
|
|
138
134
|
logger.error(str(e))
|
|
139
135
|
raise e
|
|
140
136
|
return res, processed_res, self.token_usage, None
|
|
141
|
-
|
|
142
|
-
class CommonConversation:
|
|
143
|
-
def __init__(self, llm: BaseChatOpenAI):
|
|
144
|
-
self.llm = llm
|
|
145
|
-
|
|
146
|
-
def generate(self, system_prompt: str, instruction_prompt: str):
|
|
147
|
-
msgs = [
|
|
148
|
-
SystemMessage(system_prompt),
|
|
149
|
-
HumanMessage(instruction_prompt),
|
|
150
|
-
]
|
|
151
|
-
msgs_template = ChatPromptTemplate.from_messages(messages=msgs)
|
|
152
|
-
callback_handler = OpenAICallbackHandler()
|
|
153
|
-
result = self.llm.generate(
|
|
154
|
-
messages=[msgs],
|
|
155
|
-
callbacks=[callback_handler]
|
|
156
|
-
)
|
|
157
|
-
response = result.generations[0][0].text
|
|
158
|
-
token_usage = result.llm_output.get("token_usage")
|
|
159
|
-
return response, token_usage
|
|
137
|
+
|
|
@@ -6,7 +6,7 @@ from pydantic import BaseModel, Field
|
|
|
6
6
|
from tenacity import retry, stop_after_attempt, wait_incrementing
|
|
7
7
|
import logging
|
|
8
8
|
|
|
9
|
-
from bioguider.
|
|
9
|
+
from bioguider.utils.utils import escape_braces
|
|
10
10
|
from bioguider.agents.common_agent import (
|
|
11
11
|
CommonAgent,
|
|
12
12
|
RetryException,
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
from langchain_core.messages import SystemMessage, HumanMessage
|
|
2
|
+
from langchain_core.prompts import ChatPromptTemplate
|
|
3
|
+
from langchain_openai.chat_models.base import BaseChatOpenAI
|
|
4
|
+
from langchain_community.callbacks.openai_info import OpenAICallbackHandler
|
|
5
|
+
from bioguider.utils.utils import escape_braces
|
|
6
|
+
|
|
7
|
+
class CommonConversation:
|
|
8
|
+
def __init__(self, llm: BaseChatOpenAI):
|
|
9
|
+
self.llm = llm
|
|
10
|
+
|
|
11
|
+
def generate(self, system_prompt: str, instruction_prompt: str):
|
|
12
|
+
msgs = [
|
|
13
|
+
SystemMessage(system_prompt),
|
|
14
|
+
HumanMessage(instruction_prompt),
|
|
15
|
+
]
|
|
16
|
+
callback_handler = OpenAICallbackHandler()
|
|
17
|
+
result = self.llm.generate(
|
|
18
|
+
messages=[msgs],
|
|
19
|
+
callbacks=[callback_handler]
|
|
20
|
+
)
|
|
21
|
+
response = result.generations[0][0].text
|
|
22
|
+
token_usage = result.llm_output.get("token_usage")
|
|
23
|
+
return response, token_usage
|
|
24
|
+
|
|
25
|
+
def generate_with_schema(self, system_prompt: str, instruction_prompt: str, schema: any):
|
|
26
|
+
system_prompt = escape_braces(system_prompt)
|
|
27
|
+
instruction_prompt = escape_braces(instruction_prompt)
|
|
28
|
+
msgs = [
|
|
29
|
+
SystemMessage(system_prompt),
|
|
30
|
+
HumanMessage(instruction_prompt),
|
|
31
|
+
]
|
|
32
|
+
msgs_template = ChatPromptTemplate.from_messages(messages=msgs)
|
|
33
|
+
callback_handler = OpenAICallbackHandler()
|
|
34
|
+
agent = msgs_template | self.llm.with_structured_output(schema)
|
|
35
|
+
result = agent.invoke(
|
|
36
|
+
input={},
|
|
37
|
+
config={
|
|
38
|
+
"callbacks": [callback_handler],
|
|
39
|
+
},
|
|
40
|
+
)
|
|
41
|
+
token_usage = vars(callback_handler)
|
|
42
|
+
return result, token_usage
|
|
43
|
+
|
|
@@ -4,7 +4,8 @@ from langchain.prompts import ChatPromptTemplate
|
|
|
4
4
|
from pydantic import BaseModel, Field
|
|
5
5
|
|
|
6
6
|
from bioguider.utils.constants import DEFAULT_TOKEN_USAGE
|
|
7
|
-
from bioguider.agents.agent_utils import
|
|
7
|
+
from bioguider.agents.agent_utils import read_file
|
|
8
|
+
from bioguider.utils.utils import run_command
|
|
8
9
|
from bioguider.agents.dockergeneration_task_utils import DockerGenerationWorkflowState
|
|
9
10
|
from bioguider.agents.common_agent_2step import CommonAgentTwoChainSteps, CommonAgentTwoSteps
|
|
10
11
|
from bioguider.agents.peo_common_step import PEOCommonStep
|
|
@@ -1,24 +1,25 @@
|
|
|
1
1
|
import os
|
|
2
2
|
from pathlib import Path
|
|
3
3
|
import logging
|
|
4
|
-
from typing import Callable, Optional
|
|
5
|
-
from abc import ABC, abstractmethod
|
|
6
4
|
from langchain.prompts import ChatPromptTemplate
|
|
7
|
-
from langchain_openai.chat_models.base import BaseChatOpenAI
|
|
8
|
-
from pydantic import BaseModel, Field
|
|
9
5
|
from markdownify import markdownify as md
|
|
10
6
|
|
|
11
7
|
from bioguider.agents.agent_utils import read_file
|
|
12
8
|
from bioguider.agents.collection_task import CollectionTask
|
|
13
9
|
from bioguider.agents.prompt_utils import EVALUATION_INSTRUCTION, CollectionGoalItemEnum
|
|
14
|
-
from bioguider.utils.constants import
|
|
10
|
+
from bioguider.utils.constants import (
|
|
11
|
+
DEFAULT_TOKEN_USAGE,
|
|
12
|
+
ProjectMetadata,
|
|
13
|
+
StructuredEvaluationInstallationResult,
|
|
14
|
+
FreeEvaluationInstallationResult,
|
|
15
|
+
EvaluationInstallationResult,
|
|
16
|
+
)
|
|
15
17
|
from bioguider.rag.data_pipeline import count_tokens
|
|
16
18
|
from .common_agent_2step import CommonAgentTwoSteps, CommonAgentTwoChainSteps
|
|
17
|
-
|
|
18
|
-
from ..utils.pyphen_utils import PyphenReadability
|
|
19
|
-
from ..utils.gitignore_checker import GitignoreChecker
|
|
19
|
+
|
|
20
20
|
from .evaluation_task import EvaluationTask
|
|
21
|
-
from .agent_utils import
|
|
21
|
+
from .agent_utils import read_file
|
|
22
|
+
from bioguider.utils.utils import increase_token_usage
|
|
22
23
|
|
|
23
24
|
|
|
24
25
|
logger = logging.getLogger(__name__)
|
|
@@ -43,7 +44,10 @@ Your task is to analyze the provided files related to installation and generate
|
|
|
43
44
|
4. **Compatible Operating System**: Is the compatible operating system described?
|
|
44
45
|
* Output: `Yes` or `No`
|
|
45
46
|
|
|
46
|
-
5. **
|
|
47
|
+
5. **Hardware Requirements**: Is the hardware requirements described?
|
|
48
|
+
* Output: `Yes` or `No`
|
|
49
|
+
|
|
50
|
+
6. **Overall Score**: Give an overall quality rating of the Installation information.
|
|
47
51
|
* Output: `Poor`, `Fair`, `Good`, or `Excellent`
|
|
48
52
|
|
|
49
53
|
---
|
|
@@ -58,6 +62,7 @@ Your final report must **exactly match** the following format. Do not add or omi
|
|
|
58
62
|
* number: [Number]
|
|
59
63
|
* suggestions: <suggestion to improve **dependency information** like missing dependencies
|
|
60
64
|
**Compatible Operating System:** [Yes / No]
|
|
65
|
+
**Hardware Requirements:** [Yes / No]
|
|
61
66
|
**Overall Score:** [Poor / Fair / Good / Excellent]
|
|
62
67
|
|
|
63
68
|
---
|
|
@@ -68,43 +73,45 @@ Your final report must **exactly match** the following format. Do not add or omi
|
|
|
68
73
|
"""
|
|
69
74
|
|
|
70
75
|
|
|
71
|
-
|
|
76
|
+
FREE_EVALUATION_INSTALLATION_SYSTEM_PROMPT = """
|
|
72
77
|
You are an expert in evaluating the quality of **installation instructions** in software repositories.
|
|
73
78
|
Your task is to analyze the provided content of installation-related files and generate a **comprehensive, structured quality report**.
|
|
79
|
+
You will be given:
|
|
80
|
+
1. The content of installation-related files.
|
|
81
|
+
2. A structured evaluation of the installation-related files and its reasoning process.
|
|
74
82
|
|
|
75
83
|
---
|
|
76
84
|
|
|
77
|
-
###
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
85
|
+
### **Instructions**
|
|
86
|
+
1. Based on the provided structured evaluation and its reasoning process, generate a free evaluation of the installation-related files.
|
|
87
|
+
2. Focus on the explanation of assessment in structured evaluation and how to improve the installation-related files based on the structured evaluation and its reasoning process.
|
|
88
|
+
* For each suggestion to improve the installation-related files, you **must provide some examples** of the original text snippet and the improving comments.
|
|
89
|
+
3. For each item in the structured evaluation, provide a detailed assessment followed by specific, actionable comments for improvement.
|
|
90
|
+
4. Your improvement suggestions must also include the original text snippet and the improving comments.
|
|
91
|
+
5. Your improvement suggestions must also include suggestions to improve readability.
|
|
92
|
+
6. If you think the it is good enough, you can say so.
|
|
84
93
|
|
|
85
|
-
|
|
86
|
-
* Are all software and library dependencies clearly listed?
|
|
87
|
-
* Are installation methods (e.g., `pip`, `conda`, `apt`) for those dependencies explicitly provided?
|
|
94
|
+
---
|
|
88
95
|
|
|
89
|
-
|
|
90
|
-
|
|
96
|
+
### **Output Format**
|
|
97
|
+
Your output must **exactly match** the following format. Do not add or omit any sections.
|
|
91
98
|
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
99
|
+
**FinalAnswer**
|
|
100
|
+
**Ease of Access:**
|
|
101
|
+
<Your assessment and suggestion here>
|
|
102
|
+
**Clarity of Dependency Specification:**
|
|
103
|
+
<Your assessment and suggestion here>
|
|
104
|
+
**Hardware Requirements:**
|
|
105
|
+
<Your assessment and suggestion here>
|
|
106
|
+
**Installation Guide:**
|
|
107
|
+
<Your assessment and suggestion here>
|
|
108
|
+
**Compatible Operating System:**
|
|
109
|
+
<Your assessment and suggestion here>
|
|
95
110
|
|
|
96
111
|
---
|
|
97
112
|
|
|
98
|
-
###
|
|
99
|
-
|
|
100
|
-
Your response **must exactly follow** the structure below:
|
|
101
|
-
|
|
102
|
-
**FinalAnswer**
|
|
103
|
-
**Overall Score:** [Poor / Fair / Good / Excellent]
|
|
104
|
-
**Ease of Access:** <your comments>
|
|
105
|
-
**Clarity of Dependency Specification:** <your comments>
|
|
106
|
-
**Hardware Requirements:** <your comments>
|
|
107
|
-
**Installation Guide:** <your comments>
|
|
113
|
+
### **Structured Evaluation and Reasoning Process**
|
|
114
|
+
{structured_evaluation_and_reasoning_process}
|
|
108
115
|
|
|
109
116
|
---
|
|
110
117
|
|
|
@@ -113,54 +120,6 @@ Your response **must exactly follow** the structure below:
|
|
|
113
120
|
|
|
114
121
|
"""
|
|
115
122
|
|
|
116
|
-
class StructuredEvaluationInstallationResult(BaseModel):
|
|
117
|
-
install_available: Optional[bool]=Field(description="A boolean value. Is the installation documents accessible and present?")
|
|
118
|
-
install_tutorial: Optional[bool]=Field(description="A boolean value. Is the installation tutorial provided?")
|
|
119
|
-
dependency_number: Optional[int]=Field(description="A number. It is the number of dependencies that are required to install.")
|
|
120
|
-
dependency_suggestions: Optional[str]=Field(description="A string value. It is the specific improvements if necessary, such as missing dependencies")
|
|
121
|
-
compatible_os: Optional[bool]=Field(description="A boolean value. Is compatible operating system described?")
|
|
122
|
-
overall_score: Optional[str]=Field(description="A overall scroll for the installation quality, could be `Poor`, `Fair`, `Good`, or `Excellent`")
|
|
123
|
-
|
|
124
|
-
class EvaluationInstallationResult(BaseModel):
|
|
125
|
-
ease_of_access: Optional[str]=Field(description="Is the installation information easy to access")
|
|
126
|
-
score: Optional[str]=Field(description="An overall score, could be Poor, Fair, Good or Excellent")
|
|
127
|
-
clarity_of_dependency: Optional[str]=Field(description="Are all dependencies clearly listed")
|
|
128
|
-
hardware_requirements: Optional[str]=Field(description="Are all hardware requirements clearly specified")
|
|
129
|
-
installation_guide: Optional[str]=Field(description="Is there a clear, ordered set of instructions for installing the software")
|
|
130
|
-
|
|
131
|
-
EvaluationInstallationResultSchema = {
|
|
132
|
-
"title": "EvaluationREADMEResult",
|
|
133
|
-
"type": "object",
|
|
134
|
-
"properties": {
|
|
135
|
-
"ease_of_access": {
|
|
136
|
-
"anyOf": [{"type": "string"}, {"type": "null"}],
|
|
137
|
-
"description": "Is the installation information easy to access",
|
|
138
|
-
"title": "Ease of Access"
|
|
139
|
-
},
|
|
140
|
-
"score": {
|
|
141
|
-
"anyOf": [{"type": "string"}, {"type": "null"}],
|
|
142
|
-
"description": "An overall score, could be Poor, Fair, Good or Excellent",
|
|
143
|
-
"title": "Score"
|
|
144
|
-
},
|
|
145
|
-
"clarity_of_dependency": {
|
|
146
|
-
"anyOf": [{"type": "string"}, {"type": "null"}],
|
|
147
|
-
"description": "Are all dependencies clearly listed",
|
|
148
|
-
"title": "Clarity of Dependency",
|
|
149
|
-
},
|
|
150
|
-
"hardware_requirements": {
|
|
151
|
-
"anyOf": [{"type": "string"}, {"type": "null"}],
|
|
152
|
-
"description": "Are all hardware requirements clearly specified",
|
|
153
|
-
"title": "Hardware Requirements"
|
|
154
|
-
},
|
|
155
|
-
"installation_guide": {
|
|
156
|
-
"anyOf": [{"type": "string"}, {"type": "null"}],
|
|
157
|
-
"description": "Is there a clear, ordered set of instructions for installing the software",
|
|
158
|
-
"title": "Installation Guide"
|
|
159
|
-
}
|
|
160
|
-
},
|
|
161
|
-
"required": ["ease_of_access", "score", "clarity_of_dependency", "hardware_requirements", "installation_guide"]
|
|
162
|
-
}
|
|
163
|
-
|
|
164
123
|
class EvaluationInstallationTask(EvaluationTask):
|
|
165
124
|
def __init__(
|
|
166
125
|
self,
|
|
@@ -217,25 +176,30 @@ class EvaluationInstallationTask(EvaluationTask):
|
|
|
217
176
|
self.print_step(token_usage=token_usage)
|
|
218
177
|
|
|
219
178
|
return {
|
|
220
|
-
"
|
|
221
|
-
"
|
|
179
|
+
"evaluation": res,
|
|
180
|
+
"reasoning_process": reasoning_process,
|
|
222
181
|
}, token_usage
|
|
223
182
|
|
|
224
|
-
def _free_evaluate(
|
|
183
|
+
def _free_evaluate(
|
|
184
|
+
self,
|
|
185
|
+
files: list[str] | None=None,
|
|
186
|
+
structured_evaluation_and_reasoning_process: str | None=None,
|
|
187
|
+
) -> tuple[dict|None, dict]:
|
|
225
188
|
if files is None or len(files) == 0:
|
|
226
189
|
return None, {**DEFAULT_TOKEN_USAGE}
|
|
227
190
|
|
|
191
|
+
structured_evaluation_and_reasoning_process = structured_evaluation_and_reasoning_process or "N/A"
|
|
228
192
|
files_content = self._collect_install_files_content(files)
|
|
229
|
-
system_prompt = ChatPromptTemplate.from_template(
|
|
230
|
-
installation_files_content=files_content
|
|
193
|
+
system_prompt = ChatPromptTemplate.from_template(FREE_EVALUATION_INSTALLATION_SYSTEM_PROMPT).format(
|
|
194
|
+
installation_files_content=files_content,
|
|
195
|
+
structured_evaluation_and_reasoning_process=structured_evaluation_and_reasoning_process,
|
|
231
196
|
)
|
|
232
197
|
agent = CommonAgentTwoChainSteps(llm=self.llm)
|
|
233
198
|
res, _, token_usage, reasoning_process = agent.go(
|
|
234
199
|
system_prompt=system_prompt,
|
|
235
200
|
instruction_prompt=EVALUATION_INSTRUCTION,
|
|
236
|
-
schema=
|
|
201
|
+
schema=FreeEvaluationInstallationResult,
|
|
237
202
|
)
|
|
238
|
-
res = EvaluationInstallationResult(**res)
|
|
239
203
|
self.print_step(step_output=reasoning_process)
|
|
240
204
|
self.print_step(token_usage=token_usage)
|
|
241
205
|
evaluation = {
|
|
@@ -244,15 +208,20 @@ class EvaluationInstallationTask(EvaluationTask):
|
|
|
244
208
|
}
|
|
245
209
|
return evaluation, token_usage
|
|
246
210
|
|
|
247
|
-
def _evaluate(self, files: list[str] | None = None) -> tuple[
|
|
248
|
-
|
|
249
|
-
structured_evaluation, structured_token_usage = self._structured_evaluate(files)
|
|
211
|
+
def _evaluate(self, files: list[str] | None = None) -> tuple[EvaluationInstallationResult | None, dict, list[str]]:
|
|
212
|
+
total_token_usage = {**DEFAULT_TOKEN_USAGE}
|
|
250
213
|
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
214
|
+
structured_evaluation, structured_token_usage = self._structured_evaluate(files)
|
|
215
|
+
total_token_usage = increase_token_usage(total_token_usage, structured_token_usage)
|
|
216
|
+
evaluation, token_usage = self._free_evaluate(files, structured_evaluation["reasoning_process"])
|
|
217
|
+
total_token_usage = increase_token_usage(total_token_usage, token_usage)
|
|
218
|
+
|
|
219
|
+
combined_evaluation = EvaluationInstallationResult(
|
|
220
|
+
structured_evaluation=structured_evaluation["evaluation"],
|
|
221
|
+
free_evaluation=evaluation["evaluation"],
|
|
222
|
+
structured_reasoning_process=structured_evaluation["reasoning_process"],
|
|
223
|
+
free_reasoning_process=evaluation["reasoning_process"],
|
|
224
|
+
)
|
|
256
225
|
|
|
257
226
|
return combined_evaluation, total_token_usage, files
|
|
258
227
|
|