bioguider 0.2.52__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bioguider/__init__.py +0 -0
- bioguider/agents/__init__.py +0 -0
- bioguider/agents/agent_task.py +92 -0
- bioguider/agents/agent_tools.py +176 -0
- bioguider/agents/agent_utils.py +504 -0
- bioguider/agents/collection_execute_step.py +182 -0
- bioguider/agents/collection_observe_step.py +125 -0
- bioguider/agents/collection_plan_step.py +156 -0
- bioguider/agents/collection_task.py +184 -0
- bioguider/agents/collection_task_utils.py +142 -0
- bioguider/agents/common_agent.py +137 -0
- bioguider/agents/common_agent_2step.py +215 -0
- bioguider/agents/common_conversation.py +61 -0
- bioguider/agents/common_step.py +85 -0
- bioguider/agents/consistency_collection_step.py +102 -0
- bioguider/agents/consistency_evaluation_task.py +57 -0
- bioguider/agents/consistency_evaluation_task_utils.py +14 -0
- bioguider/agents/consistency_observe_step.py +110 -0
- bioguider/agents/consistency_query_step.py +77 -0
- bioguider/agents/dockergeneration_execute_step.py +186 -0
- bioguider/agents/dockergeneration_observe_step.py +154 -0
- bioguider/agents/dockergeneration_plan_step.py +158 -0
- bioguider/agents/dockergeneration_task.py +158 -0
- bioguider/agents/dockergeneration_task_utils.py +220 -0
- bioguider/agents/evaluation_installation_task.py +270 -0
- bioguider/agents/evaluation_readme_task.py +767 -0
- bioguider/agents/evaluation_submission_requirements_task.py +172 -0
- bioguider/agents/evaluation_task.py +206 -0
- bioguider/agents/evaluation_tutorial_task.py +169 -0
- bioguider/agents/evaluation_tutorial_task_prompts.py +187 -0
- bioguider/agents/evaluation_userguide_prompts.py +179 -0
- bioguider/agents/evaluation_userguide_task.py +154 -0
- bioguider/agents/evaluation_utils.py +127 -0
- bioguider/agents/identification_execute_step.py +181 -0
- bioguider/agents/identification_observe_step.py +104 -0
- bioguider/agents/identification_plan_step.py +140 -0
- bioguider/agents/identification_task.py +270 -0
- bioguider/agents/identification_task_utils.py +22 -0
- bioguider/agents/peo_common_step.py +64 -0
- bioguider/agents/prompt_utils.py +253 -0
- bioguider/agents/python_ast_repl_tool.py +69 -0
- bioguider/agents/rag_collection_task.py +130 -0
- bioguider/conversation.py +67 -0
- bioguider/database/code_structure_db.py +500 -0
- bioguider/database/summarized_file_db.py +146 -0
- bioguider/generation/__init__.py +39 -0
- bioguider/generation/benchmark_metrics.py +610 -0
- bioguider/generation/change_planner.py +189 -0
- bioguider/generation/document_renderer.py +157 -0
- bioguider/generation/llm_cleaner.py +67 -0
- bioguider/generation/llm_content_generator.py +1128 -0
- bioguider/generation/llm_injector.py +809 -0
- bioguider/generation/models.py +85 -0
- bioguider/generation/output_manager.py +74 -0
- bioguider/generation/repo_reader.py +37 -0
- bioguider/generation/report_loader.py +166 -0
- bioguider/generation/style_analyzer.py +36 -0
- bioguider/generation/suggestion_extractor.py +436 -0
- bioguider/generation/test_metrics.py +189 -0
- bioguider/managers/benchmark_manager.py +785 -0
- bioguider/managers/evaluation_manager.py +215 -0
- bioguider/managers/generation_manager.py +686 -0
- bioguider/managers/generation_test_manager.py +107 -0
- bioguider/managers/generation_test_manager_v2.py +525 -0
- bioguider/rag/__init__.py +0 -0
- bioguider/rag/config.py +117 -0
- bioguider/rag/data_pipeline.py +651 -0
- bioguider/rag/embedder.py +24 -0
- bioguider/rag/rag.py +138 -0
- bioguider/settings.py +103 -0
- bioguider/utils/code_structure_builder.py +59 -0
- bioguider/utils/constants.py +135 -0
- bioguider/utils/default.gitignore +140 -0
- bioguider/utils/file_utils.py +215 -0
- bioguider/utils/gitignore_checker.py +175 -0
- bioguider/utils/notebook_utils.py +117 -0
- bioguider/utils/pyphen_utils.py +73 -0
- bioguider/utils/python_file_handler.py +65 -0
- bioguider/utils/r_file_handler.py +551 -0
- bioguider/utils/utils.py +163 -0
- bioguider-0.2.52.dist-info/LICENSE +21 -0
- bioguider-0.2.52.dist-info/METADATA +51 -0
- bioguider-0.2.52.dist-info/RECORD +84 -0
- bioguider-0.2.52.dist-info/WHEEL +4 -0
bioguider/__init__.py
ADDED
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
|
|
2
|
+
from typing import Callable
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
|
|
5
|
+
from langchain_openai.chat_models.base import BaseChatOpenAI
|
|
6
|
+
from langgraph.graph.graph import CompiledGraph
|
|
7
|
+
|
|
8
|
+
from bioguider.utils.constants import DEFAULT_TOKEN_USAGE, MAX_STEP_COUNT
|
|
9
|
+
from bioguider.database.summarized_file_db import SummarizedFilesDb
|
|
10
|
+
|
|
11
|
+
class AgentTask(ABC):
|
|
12
|
+
"""
|
|
13
|
+
A class representing a step in an agent's process.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
def __init__(
|
|
17
|
+
self,
|
|
18
|
+
llm: BaseChatOpenAI,
|
|
19
|
+
step_callback: Callable | None = None,
|
|
20
|
+
summarized_files_db: SummarizedFilesDb | None = None,
|
|
21
|
+
):
|
|
22
|
+
"""
|
|
23
|
+
Initialize the AgentStep with a language model and a callback function.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
llm (BaseChatOpenAI): The language model to use.
|
|
27
|
+
step_callback (Callable): A callback function to handle step results.
|
|
28
|
+
"""
|
|
29
|
+
self.llm = llm
|
|
30
|
+
self.step_callback = step_callback
|
|
31
|
+
self.summarized_files_db = summarized_files_db
|
|
32
|
+
self.graph: CompiledGraph | None = None
|
|
33
|
+
|
|
34
|
+
def _print_step(
|
|
35
|
+
self,
|
|
36
|
+
step_name: str | None = None,
|
|
37
|
+
step_output: str | None = None,
|
|
38
|
+
token_usage: dict | object | None = None,
|
|
39
|
+
):
|
|
40
|
+
if self.step_callback is None:
|
|
41
|
+
return
|
|
42
|
+
# convert token_usage to dict
|
|
43
|
+
if token_usage is not None and not isinstance(token_usage, dict):
|
|
44
|
+
token_usage = vars(token_usage)
|
|
45
|
+
token_usage = {**DEFAULT_TOKEN_USAGE, **token_usage}
|
|
46
|
+
step_callback = self.step_callback
|
|
47
|
+
step_callback(
|
|
48
|
+
step_name=step_name,
|
|
49
|
+
step_output=step_output,
|
|
50
|
+
token_usage=token_usage,
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
def compile(self, repo_path: str, gitignore_path: str, **kwargs):
|
|
54
|
+
"""
|
|
55
|
+
Compile the agent step with the given repository and gitignore paths.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
repo_path (str): The path to the repository.
|
|
59
|
+
gitignore_path (str): The path to the .gitignore file.
|
|
60
|
+
**kwargs: derived class may pass more arguments to implmented _compile(), that is,
|
|
61
|
+
what **kwargs is depends on derived class
|
|
62
|
+
"""
|
|
63
|
+
self._compile(repo_path, gitignore_path, **kwargs)
|
|
64
|
+
|
|
65
|
+
@abstractmethod
|
|
66
|
+
def _compile(self, repo_path: str, gitignore_path: str, **kwargs):
|
|
67
|
+
"""
|
|
68
|
+
Abstract method to compile the agent step.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
repo_path (str): The path to the repository.
|
|
72
|
+
gitignore_path (str): The path to the .gitignore file.
|
|
73
|
+
"""
|
|
74
|
+
pass
|
|
75
|
+
|
|
76
|
+
def _go_graph(self, input: dict) -> dict:
|
|
77
|
+
input = {
|
|
78
|
+
**input,
|
|
79
|
+
"llm": self.llm,
|
|
80
|
+
"step_output_callback": self.step_callback,
|
|
81
|
+
}
|
|
82
|
+
for s in self.graph.stream(
|
|
83
|
+
input=input,
|
|
84
|
+
stream_mode="values",
|
|
85
|
+
config={"recursion_limit": MAX_STEP_COUNT},
|
|
86
|
+
):
|
|
87
|
+
print(s)
|
|
88
|
+
|
|
89
|
+
return s
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
|
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import logging
|
|
3
|
+
from typing import Callable
|
|
4
|
+
from markdownify import markdownify as md
|
|
5
|
+
from langchain_openai.chat_models.base import BaseChatOpenAI
|
|
6
|
+
from bioguider.database.summarized_file_db import SummarizedFilesDb
|
|
7
|
+
from bioguider.utils.file_utils import get_file_type
|
|
8
|
+
from bioguider.agents.agent_utils import read_directory, read_file, summarize_file
|
|
9
|
+
from bioguider.rag.data_pipeline import count_tokens
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
class agent_tool:
|
|
14
|
+
def __init__(
|
|
15
|
+
self,
|
|
16
|
+
llm: BaseChatOpenAI | None = None,
|
|
17
|
+
output_callback:Callable[[dict], None] = None,
|
|
18
|
+
):
|
|
19
|
+
self.llm = llm
|
|
20
|
+
self.output_callback = output_callback
|
|
21
|
+
|
|
22
|
+
def _print_token_usage(self, token_usage: dict):
|
|
23
|
+
if self.output_callback is not None:
|
|
24
|
+
self.output_callback(token_usage=token_usage)
|
|
25
|
+
def _print_step_output(self, step_output: str):
|
|
26
|
+
if self.output_callback is not None:
|
|
27
|
+
self.output_callback(step_output=step_output)
|
|
28
|
+
|
|
29
|
+
class read_file_tool:
|
|
30
|
+
""" read file
|
|
31
|
+
Args:
|
|
32
|
+
file_path str: file path
|
|
33
|
+
Returns:
|
|
34
|
+
A string of file content, if the file does not exist, return None.
|
|
35
|
+
"""
|
|
36
|
+
def __init__(self, repo_path: str | None = None):
|
|
37
|
+
self.repo_path = repo_path if repo_path is not None else ""
|
|
38
|
+
|
|
39
|
+
def run(self, file_path: str) -> str | None:
|
|
40
|
+
if file_path is None:
|
|
41
|
+
return None
|
|
42
|
+
file_path = file_path.strip()
|
|
43
|
+
if self.repo_path is not None and self.repo_path not in file_path:
|
|
44
|
+
file_path = os.path.join(self.repo_path, file_path)
|
|
45
|
+
if not os.path.isfile(file_path):
|
|
46
|
+
return None
|
|
47
|
+
content = read_file(file_path)
|
|
48
|
+
if file_path.endswith(".html") or file_path.endswith(".htm"):
|
|
49
|
+
content = md(content, escape_underscores=False)
|
|
50
|
+
tokens = count_tokens(content)
|
|
51
|
+
MAX_TOKENS = os.environ.get('OPENAI_MAX_INPUT_TOKENS', 102400)
|
|
52
|
+
if tokens > int(MAX_TOKENS):
|
|
53
|
+
content = content[:100000]
|
|
54
|
+
return content
|
|
55
|
+
|
|
56
|
+
class summarize_file_tool(agent_tool):
|
|
57
|
+
""" Read a file and generate a summary according to a specified prompt.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
file_path str: required. The file path to read.
|
|
61
|
+
summarize_prompt str: optional. A string instruction guiding the summarization focus (default is "N/A"). Use this to emphasize specific aspects of the content.
|
|
62
|
+
|
|
63
|
+
Returns:
|
|
64
|
+
str or None: A summarized version of the file content. Returns None if the file does not exist or cannot be read.
|
|
65
|
+
"""
|
|
66
|
+
def __init__(
|
|
67
|
+
self,
|
|
68
|
+
llm: BaseChatOpenAI,
|
|
69
|
+
repo_path: str | None = None,
|
|
70
|
+
output_callback: Callable | None = None,
|
|
71
|
+
detailed_level: int | None = 6,
|
|
72
|
+
db: SummarizedFilesDb | None = None,
|
|
73
|
+
summaize_instruction: str = "",
|
|
74
|
+
):
|
|
75
|
+
super().__init__(llm=llm, output_callback=output_callback)
|
|
76
|
+
self.repo_path = repo_path
|
|
77
|
+
detailed_level = detailed_level if detailed_level is not None else 6
|
|
78
|
+
detailed_level = detailed_level if detailed_level > 0 else 1
|
|
79
|
+
detailed_level = detailed_level if detailed_level <= 10 else 10
|
|
80
|
+
self.detailed_level = detailed_level
|
|
81
|
+
self.summary_file_db = db
|
|
82
|
+
self.summarize_instruction = summaize_instruction
|
|
83
|
+
|
|
84
|
+
def _retrive_from_summary_file_db(self, file_path: str, prompt: str = "N/A") -> str | None:
|
|
85
|
+
if self.summary_file_db is None:
|
|
86
|
+
return None
|
|
87
|
+
return self.summary_file_db.select_summarized_text(
|
|
88
|
+
file_path=file_path,
|
|
89
|
+
instruction=self.summarize_instruction,
|
|
90
|
+
summarize_level=self.detailed_level,
|
|
91
|
+
summarize_prompt=prompt,
|
|
92
|
+
)
|
|
93
|
+
def _save_to_summary_file_db(self, file_path: str, prompt: str, summarized_text: str, token_usage: dict):
|
|
94
|
+
if self.summary_file_db is None:
|
|
95
|
+
return
|
|
96
|
+
self.summary_file_db.upsert_summarized_file(
|
|
97
|
+
file_path=file_path,
|
|
98
|
+
instruction=self.summarize_instruction,
|
|
99
|
+
summarize_level=self.detailed_level,
|
|
100
|
+
summarize_prompt=prompt,
|
|
101
|
+
summarized_text=summarized_text,
|
|
102
|
+
token_usage=token_usage,
|
|
103
|
+
)
|
|
104
|
+
def run(self, file_path: str, summarize_prompt: str = "N/A") -> str | None:
|
|
105
|
+
if file_path is None:
|
|
106
|
+
return None
|
|
107
|
+
if summarize_prompt is None or len(summarize_prompt) == 0:
|
|
108
|
+
summarize_prompt = "N/A"
|
|
109
|
+
|
|
110
|
+
file_path = file_path.strip()
|
|
111
|
+
abs_file_path = file_path
|
|
112
|
+
if self.repo_path is not None and self.repo_path not in abs_file_path:
|
|
113
|
+
abs_file_path = os.path.join(self.repo_path, abs_file_path)
|
|
114
|
+
if not os.path.isfile(abs_file_path):
|
|
115
|
+
return f"{file_path} is not a file."
|
|
116
|
+
summarized_content = self._retrive_from_summary_file_db(
|
|
117
|
+
file_path=file_path,
|
|
118
|
+
prompt=summarize_prompt,
|
|
119
|
+
)
|
|
120
|
+
if summarized_content is not None:
|
|
121
|
+
return f"summarized content of file {file_path}: " + summarized_content
|
|
122
|
+
|
|
123
|
+
try:
|
|
124
|
+
file_content = read_file(abs_file_path)
|
|
125
|
+
file_content = file_content.replace("{", "{{").replace("}", "}}")
|
|
126
|
+
except UnicodeDecodeError as e:
|
|
127
|
+
logger.error(str(e))
|
|
128
|
+
return f"{file_path} is a binary, can't be summarized."
|
|
129
|
+
except Exception as e:
|
|
130
|
+
logger.error(str(e))
|
|
131
|
+
return f"Failed to read {file_path}."
|
|
132
|
+
summarized_content, token_usage = summarize_file(
|
|
133
|
+
self.llm, abs_file_path, file_content, self.detailed_level,
|
|
134
|
+
summary_instructions=self.summarize_instruction,
|
|
135
|
+
summarize_prompt=summarize_prompt,
|
|
136
|
+
)
|
|
137
|
+
self._save_to_summary_file_db(
|
|
138
|
+
file_path=file_path,
|
|
139
|
+
prompt=summarize_prompt,
|
|
140
|
+
summarized_text=summarized_content,
|
|
141
|
+
token_usage=token_usage,
|
|
142
|
+
)
|
|
143
|
+
self._print_token_usage(token_usage)
|
|
144
|
+
return f"summarized content of file {file_path}: " + summarized_content
|
|
145
|
+
|
|
146
|
+
class read_directory_tool:
|
|
147
|
+
"""Reads the contents of a directory, including files and subdirectories in it..
|
|
148
|
+
Args:
|
|
149
|
+
dir_path (str): Path to the directory.
|
|
150
|
+
Returns:
|
|
151
|
+
a string containing file and subdirectory paths found within the specified depth.
|
|
152
|
+
"""
|
|
153
|
+
def __init__(
|
|
154
|
+
self,
|
|
155
|
+
repo_path: str | None = None,
|
|
156
|
+
gitignore_path: str | None = None,
|
|
157
|
+
):
|
|
158
|
+
self.repo_path = repo_path
|
|
159
|
+
self.gitignore_path = gitignore_path if gitignore_path is not None else ""
|
|
160
|
+
|
|
161
|
+
def run(self, dir_path):
|
|
162
|
+
dir_path = dir_path.strip()
|
|
163
|
+
full_path = dir_path
|
|
164
|
+
if full_path == "." or full_path == "..":
|
|
165
|
+
return f"Please skip this folder {dir_path}"
|
|
166
|
+
if self.repo_path not in full_path:
|
|
167
|
+
full_path = os.path.join(self.repo_path, full_path)
|
|
168
|
+
files = read_directory(full_path, gitignore_path=self.gitignore_path, level=1)
|
|
169
|
+
if files is None:
|
|
170
|
+
return "N/A"
|
|
171
|
+
file_pairs = [(f, get_file_type(os.path.join(full_path, f)).value) for f in files]
|
|
172
|
+
dir_structure = ""
|
|
173
|
+
for f, f_type in file_pairs:
|
|
174
|
+
dir_structure += f"{os.path.join(dir_path, f)} - {f_type}\n"
|
|
175
|
+
return f"The 2-level content of directory {dir_path}: \n" + \
|
|
176
|
+
f"{dir_structure if len(dir_structure) > 0 else 'No files and sub-directories in it'}"
|