bioguider 0.2.52__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. bioguider/__init__.py +0 -0
  2. bioguider/agents/__init__.py +0 -0
  3. bioguider/agents/agent_task.py +92 -0
  4. bioguider/agents/agent_tools.py +176 -0
  5. bioguider/agents/agent_utils.py +504 -0
  6. bioguider/agents/collection_execute_step.py +182 -0
  7. bioguider/agents/collection_observe_step.py +125 -0
  8. bioguider/agents/collection_plan_step.py +156 -0
  9. bioguider/agents/collection_task.py +184 -0
  10. bioguider/agents/collection_task_utils.py +142 -0
  11. bioguider/agents/common_agent.py +137 -0
  12. bioguider/agents/common_agent_2step.py +215 -0
  13. bioguider/agents/common_conversation.py +61 -0
  14. bioguider/agents/common_step.py +85 -0
  15. bioguider/agents/consistency_collection_step.py +102 -0
  16. bioguider/agents/consistency_evaluation_task.py +57 -0
  17. bioguider/agents/consistency_evaluation_task_utils.py +14 -0
  18. bioguider/agents/consistency_observe_step.py +110 -0
  19. bioguider/agents/consistency_query_step.py +77 -0
  20. bioguider/agents/dockergeneration_execute_step.py +186 -0
  21. bioguider/agents/dockergeneration_observe_step.py +154 -0
  22. bioguider/agents/dockergeneration_plan_step.py +158 -0
  23. bioguider/agents/dockergeneration_task.py +158 -0
  24. bioguider/agents/dockergeneration_task_utils.py +220 -0
  25. bioguider/agents/evaluation_installation_task.py +270 -0
  26. bioguider/agents/evaluation_readme_task.py +767 -0
  27. bioguider/agents/evaluation_submission_requirements_task.py +172 -0
  28. bioguider/agents/evaluation_task.py +206 -0
  29. bioguider/agents/evaluation_tutorial_task.py +169 -0
  30. bioguider/agents/evaluation_tutorial_task_prompts.py +187 -0
  31. bioguider/agents/evaluation_userguide_prompts.py +179 -0
  32. bioguider/agents/evaluation_userguide_task.py +154 -0
  33. bioguider/agents/evaluation_utils.py +127 -0
  34. bioguider/agents/identification_execute_step.py +181 -0
  35. bioguider/agents/identification_observe_step.py +104 -0
  36. bioguider/agents/identification_plan_step.py +140 -0
  37. bioguider/agents/identification_task.py +270 -0
  38. bioguider/agents/identification_task_utils.py +22 -0
  39. bioguider/agents/peo_common_step.py +64 -0
  40. bioguider/agents/prompt_utils.py +253 -0
  41. bioguider/agents/python_ast_repl_tool.py +69 -0
  42. bioguider/agents/rag_collection_task.py +130 -0
  43. bioguider/conversation.py +67 -0
  44. bioguider/database/code_structure_db.py +500 -0
  45. bioguider/database/summarized_file_db.py +146 -0
  46. bioguider/generation/__init__.py +39 -0
  47. bioguider/generation/benchmark_metrics.py +610 -0
  48. bioguider/generation/change_planner.py +189 -0
  49. bioguider/generation/document_renderer.py +157 -0
  50. bioguider/generation/llm_cleaner.py +67 -0
  51. bioguider/generation/llm_content_generator.py +1128 -0
  52. bioguider/generation/llm_injector.py +809 -0
  53. bioguider/generation/models.py +85 -0
  54. bioguider/generation/output_manager.py +74 -0
  55. bioguider/generation/repo_reader.py +37 -0
  56. bioguider/generation/report_loader.py +166 -0
  57. bioguider/generation/style_analyzer.py +36 -0
  58. bioguider/generation/suggestion_extractor.py +436 -0
  59. bioguider/generation/test_metrics.py +189 -0
  60. bioguider/managers/benchmark_manager.py +785 -0
  61. bioguider/managers/evaluation_manager.py +215 -0
  62. bioguider/managers/generation_manager.py +686 -0
  63. bioguider/managers/generation_test_manager.py +107 -0
  64. bioguider/managers/generation_test_manager_v2.py +525 -0
  65. bioguider/rag/__init__.py +0 -0
  66. bioguider/rag/config.py +117 -0
  67. bioguider/rag/data_pipeline.py +651 -0
  68. bioguider/rag/embedder.py +24 -0
  69. bioguider/rag/rag.py +138 -0
  70. bioguider/settings.py +103 -0
  71. bioguider/utils/code_structure_builder.py +59 -0
  72. bioguider/utils/constants.py +135 -0
  73. bioguider/utils/default.gitignore +140 -0
  74. bioguider/utils/file_utils.py +215 -0
  75. bioguider/utils/gitignore_checker.py +175 -0
  76. bioguider/utils/notebook_utils.py +117 -0
  77. bioguider/utils/pyphen_utils.py +73 -0
  78. bioguider/utils/python_file_handler.py +65 -0
  79. bioguider/utils/r_file_handler.py +551 -0
  80. bioguider/utils/utils.py +163 -0
  81. bioguider-0.2.52.dist-info/LICENSE +21 -0
  82. bioguider-0.2.52.dist-info/METADATA +51 -0
  83. bioguider-0.2.52.dist-info/RECORD +84 -0
  84. bioguider-0.2.52.dist-info/WHEEL +4 -0
bioguider/__init__.py ADDED
File without changes
File without changes
@@ -0,0 +1,92 @@
1
+
2
+ from typing import Callable
3
+ from abc import ABC, abstractmethod
4
+
5
+ from langchain_openai.chat_models.base import BaseChatOpenAI
6
+ from langgraph.graph.graph import CompiledGraph
7
+
8
+ from bioguider.utils.constants import DEFAULT_TOKEN_USAGE, MAX_STEP_COUNT
9
+ from bioguider.database.summarized_file_db import SummarizedFilesDb
10
+
11
+ class AgentTask(ABC):
12
+ """
13
+ A class representing a step in an agent's process.
14
+ """
15
+
16
+ def __init__(
17
+ self,
18
+ llm: BaseChatOpenAI,
19
+ step_callback: Callable | None = None,
20
+ summarized_files_db: SummarizedFilesDb | None = None,
21
+ ):
22
+ """
23
+ Initialize the AgentStep with a language model and a callback function.
24
+
25
+ Args:
26
+ llm (BaseChatOpenAI): The language model to use.
27
+ step_callback (Callable): A callback function to handle step results.
28
+ """
29
+ self.llm = llm
30
+ self.step_callback = step_callback
31
+ self.summarized_files_db = summarized_files_db
32
+ self.graph: CompiledGraph | None = None
33
+
34
+ def _print_step(
35
+ self,
36
+ step_name: str | None = None,
37
+ step_output: str | None = None,
38
+ token_usage: dict | object | None = None,
39
+ ):
40
+ if self.step_callback is None:
41
+ return
42
+ # convert token_usage to dict
43
+ if token_usage is not None and not isinstance(token_usage, dict):
44
+ token_usage = vars(token_usage)
45
+ token_usage = {**DEFAULT_TOKEN_USAGE, **token_usage}
46
+ step_callback = self.step_callback
47
+ step_callback(
48
+ step_name=step_name,
49
+ step_output=step_output,
50
+ token_usage=token_usage,
51
+ )
52
+
53
+ def compile(self, repo_path: str, gitignore_path: str, **kwargs):
54
+ """
55
+ Compile the agent step with the given repository and gitignore paths.
56
+
57
+ Args:
58
+ repo_path (str): The path to the repository.
59
+ gitignore_path (str): The path to the .gitignore file.
60
+ **kwargs: derived class may pass more arguments to implmented _compile(), that is,
61
+ what **kwargs is depends on derived class
62
+ """
63
+ self._compile(repo_path, gitignore_path, **kwargs)
64
+
65
+ @abstractmethod
66
+ def _compile(self, repo_path: str, gitignore_path: str, **kwargs):
67
+ """
68
+ Abstract method to compile the agent step.
69
+
70
+ Args:
71
+ repo_path (str): The path to the repository.
72
+ gitignore_path (str): The path to the .gitignore file.
73
+ """
74
+ pass
75
+
76
+ def _go_graph(self, input: dict) -> dict:
77
+ input = {
78
+ **input,
79
+ "llm": self.llm,
80
+ "step_output_callback": self.step_callback,
81
+ }
82
+ for s in self.graph.stream(
83
+ input=input,
84
+ stream_mode="values",
85
+ config={"recursion_limit": MAX_STEP_COUNT},
86
+ ):
87
+ print(s)
88
+
89
+ return s
90
+
91
+
92
+
@@ -0,0 +1,176 @@
1
+ import os
2
+ import logging
3
+ from typing import Callable
4
+ from markdownify import markdownify as md
5
+ from langchain_openai.chat_models.base import BaseChatOpenAI
6
+ from bioguider.database.summarized_file_db import SummarizedFilesDb
7
+ from bioguider.utils.file_utils import get_file_type
8
+ from bioguider.agents.agent_utils import read_directory, read_file, summarize_file
9
+ from bioguider.rag.data_pipeline import count_tokens
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+ class agent_tool:
14
+ def __init__(
15
+ self,
16
+ llm: BaseChatOpenAI | None = None,
17
+ output_callback:Callable[[dict], None] = None,
18
+ ):
19
+ self.llm = llm
20
+ self.output_callback = output_callback
21
+
22
+ def _print_token_usage(self, token_usage: dict):
23
+ if self.output_callback is not None:
24
+ self.output_callback(token_usage=token_usage)
25
+ def _print_step_output(self, step_output: str):
26
+ if self.output_callback is not None:
27
+ self.output_callback(step_output=step_output)
28
+
29
+ class read_file_tool:
30
+ """ read file
31
+ Args:
32
+ file_path str: file path
33
+ Returns:
34
+ A string of file content, if the file does not exist, return None.
35
+ """
36
+ def __init__(self, repo_path: str | None = None):
37
+ self.repo_path = repo_path if repo_path is not None else ""
38
+
39
+ def run(self, file_path: str) -> str | None:
40
+ if file_path is None:
41
+ return None
42
+ file_path = file_path.strip()
43
+ if self.repo_path is not None and self.repo_path not in file_path:
44
+ file_path = os.path.join(self.repo_path, file_path)
45
+ if not os.path.isfile(file_path):
46
+ return None
47
+ content = read_file(file_path)
48
+ if file_path.endswith(".html") or file_path.endswith(".htm"):
49
+ content = md(content, escape_underscores=False)
50
+ tokens = count_tokens(content)
51
+ MAX_TOKENS = os.environ.get('OPENAI_MAX_INPUT_TOKENS', 102400)
52
+ if tokens > int(MAX_TOKENS):
53
+ content = content[:100000]
54
+ return content
55
+
56
+ class summarize_file_tool(agent_tool):
57
+ """ Read a file and generate a summary according to a specified prompt.
58
+
59
+ Args:
60
+ file_path str: required. The file path to read.
61
+ summarize_prompt str: optional. A string instruction guiding the summarization focus (default is "N/A"). Use this to emphasize specific aspects of the content.
62
+
63
+ Returns:
64
+ str or None: A summarized version of the file content. Returns None if the file does not exist or cannot be read.
65
+ """
66
+ def __init__(
67
+ self,
68
+ llm: BaseChatOpenAI,
69
+ repo_path: str | None = None,
70
+ output_callback: Callable | None = None,
71
+ detailed_level: int | None = 6,
72
+ db: SummarizedFilesDb | None = None,
73
+ summaize_instruction: str = "",
74
+ ):
75
+ super().__init__(llm=llm, output_callback=output_callback)
76
+ self.repo_path = repo_path
77
+ detailed_level = detailed_level if detailed_level is not None else 6
78
+ detailed_level = detailed_level if detailed_level > 0 else 1
79
+ detailed_level = detailed_level if detailed_level <= 10 else 10
80
+ self.detailed_level = detailed_level
81
+ self.summary_file_db = db
82
+ self.summarize_instruction = summaize_instruction
83
+
84
+ def _retrive_from_summary_file_db(self, file_path: str, prompt: str = "N/A") -> str | None:
85
+ if self.summary_file_db is None:
86
+ return None
87
+ return self.summary_file_db.select_summarized_text(
88
+ file_path=file_path,
89
+ instruction=self.summarize_instruction,
90
+ summarize_level=self.detailed_level,
91
+ summarize_prompt=prompt,
92
+ )
93
+ def _save_to_summary_file_db(self, file_path: str, prompt: str, summarized_text: str, token_usage: dict):
94
+ if self.summary_file_db is None:
95
+ return
96
+ self.summary_file_db.upsert_summarized_file(
97
+ file_path=file_path,
98
+ instruction=self.summarize_instruction,
99
+ summarize_level=self.detailed_level,
100
+ summarize_prompt=prompt,
101
+ summarized_text=summarized_text,
102
+ token_usage=token_usage,
103
+ )
104
+ def run(self, file_path: str, summarize_prompt: str = "N/A") -> str | None:
105
+ if file_path is None:
106
+ return None
107
+ if summarize_prompt is None or len(summarize_prompt) == 0:
108
+ summarize_prompt = "N/A"
109
+
110
+ file_path = file_path.strip()
111
+ abs_file_path = file_path
112
+ if self.repo_path is not None and self.repo_path not in abs_file_path:
113
+ abs_file_path = os.path.join(self.repo_path, abs_file_path)
114
+ if not os.path.isfile(abs_file_path):
115
+ return f"{file_path} is not a file."
116
+ summarized_content = self._retrive_from_summary_file_db(
117
+ file_path=file_path,
118
+ prompt=summarize_prompt,
119
+ )
120
+ if summarized_content is not None:
121
+ return f"summarized content of file {file_path}: " + summarized_content
122
+
123
+ try:
124
+ file_content = read_file(abs_file_path)
125
+ file_content = file_content.replace("{", "{{").replace("}", "}}")
126
+ except UnicodeDecodeError as e:
127
+ logger.error(str(e))
128
+ return f"{file_path} is a binary, can't be summarized."
129
+ except Exception as e:
130
+ logger.error(str(e))
131
+ return f"Failed to read {file_path}."
132
+ summarized_content, token_usage = summarize_file(
133
+ self.llm, abs_file_path, file_content, self.detailed_level,
134
+ summary_instructions=self.summarize_instruction,
135
+ summarize_prompt=summarize_prompt,
136
+ )
137
+ self._save_to_summary_file_db(
138
+ file_path=file_path,
139
+ prompt=summarize_prompt,
140
+ summarized_text=summarized_content,
141
+ token_usage=token_usage,
142
+ )
143
+ self._print_token_usage(token_usage)
144
+ return f"summarized content of file {file_path}: " + summarized_content
145
+
146
+ class read_directory_tool:
147
+ """Reads the contents of a directory, including files and subdirectories in it..
148
+ Args:
149
+ dir_path (str): Path to the directory.
150
+ Returns:
151
+ a string containing file and subdirectory paths found within the specified depth.
152
+ """
153
+ def __init__(
154
+ self,
155
+ repo_path: str | None = None,
156
+ gitignore_path: str | None = None,
157
+ ):
158
+ self.repo_path = repo_path
159
+ self.gitignore_path = gitignore_path if gitignore_path is not None else ""
160
+
161
+ def run(self, dir_path):
162
+ dir_path = dir_path.strip()
163
+ full_path = dir_path
164
+ if full_path == "." or full_path == "..":
165
+ return f"Please skip this folder {dir_path}"
166
+ if self.repo_path not in full_path:
167
+ full_path = os.path.join(self.repo_path, full_path)
168
+ files = read_directory(full_path, gitignore_path=self.gitignore_path, level=1)
169
+ if files is None:
170
+ return "N/A"
171
+ file_pairs = [(f, get_file_type(os.path.join(full_path, f)).value) for f in files]
172
+ dir_structure = ""
173
+ for f, f_type in file_pairs:
174
+ dir_structure += f"{os.path.join(dir_path, f)} - {f_type}\n"
175
+ return f"The 2-level content of directory {dir_path}: \n" + \
176
+ f"{dir_structure if len(dir_structure) > 0 else 'No files and sub-directories in it'}"