bioguider 0.2.52__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bioguider/__init__.py +0 -0
- bioguider/agents/__init__.py +0 -0
- bioguider/agents/agent_task.py +92 -0
- bioguider/agents/agent_tools.py +176 -0
- bioguider/agents/agent_utils.py +504 -0
- bioguider/agents/collection_execute_step.py +182 -0
- bioguider/agents/collection_observe_step.py +125 -0
- bioguider/agents/collection_plan_step.py +156 -0
- bioguider/agents/collection_task.py +184 -0
- bioguider/agents/collection_task_utils.py +142 -0
- bioguider/agents/common_agent.py +137 -0
- bioguider/agents/common_agent_2step.py +215 -0
- bioguider/agents/common_conversation.py +61 -0
- bioguider/agents/common_step.py +85 -0
- bioguider/agents/consistency_collection_step.py +102 -0
- bioguider/agents/consistency_evaluation_task.py +57 -0
- bioguider/agents/consistency_evaluation_task_utils.py +14 -0
- bioguider/agents/consistency_observe_step.py +110 -0
- bioguider/agents/consistency_query_step.py +77 -0
- bioguider/agents/dockergeneration_execute_step.py +186 -0
- bioguider/agents/dockergeneration_observe_step.py +154 -0
- bioguider/agents/dockergeneration_plan_step.py +158 -0
- bioguider/agents/dockergeneration_task.py +158 -0
- bioguider/agents/dockergeneration_task_utils.py +220 -0
- bioguider/agents/evaluation_installation_task.py +270 -0
- bioguider/agents/evaluation_readme_task.py +767 -0
- bioguider/agents/evaluation_submission_requirements_task.py +172 -0
- bioguider/agents/evaluation_task.py +206 -0
- bioguider/agents/evaluation_tutorial_task.py +169 -0
- bioguider/agents/evaluation_tutorial_task_prompts.py +187 -0
- bioguider/agents/evaluation_userguide_prompts.py +179 -0
- bioguider/agents/evaluation_userguide_task.py +154 -0
- bioguider/agents/evaluation_utils.py +127 -0
- bioguider/agents/identification_execute_step.py +181 -0
- bioguider/agents/identification_observe_step.py +104 -0
- bioguider/agents/identification_plan_step.py +140 -0
- bioguider/agents/identification_task.py +270 -0
- bioguider/agents/identification_task_utils.py +22 -0
- bioguider/agents/peo_common_step.py +64 -0
- bioguider/agents/prompt_utils.py +253 -0
- bioguider/agents/python_ast_repl_tool.py +69 -0
- bioguider/agents/rag_collection_task.py +130 -0
- bioguider/conversation.py +67 -0
- bioguider/database/code_structure_db.py +500 -0
- bioguider/database/summarized_file_db.py +146 -0
- bioguider/generation/__init__.py +39 -0
- bioguider/generation/benchmark_metrics.py +610 -0
- bioguider/generation/change_planner.py +189 -0
- bioguider/generation/document_renderer.py +157 -0
- bioguider/generation/llm_cleaner.py +67 -0
- bioguider/generation/llm_content_generator.py +1128 -0
- bioguider/generation/llm_injector.py +809 -0
- bioguider/generation/models.py +85 -0
- bioguider/generation/output_manager.py +74 -0
- bioguider/generation/repo_reader.py +37 -0
- bioguider/generation/report_loader.py +166 -0
- bioguider/generation/style_analyzer.py +36 -0
- bioguider/generation/suggestion_extractor.py +436 -0
- bioguider/generation/test_metrics.py +189 -0
- bioguider/managers/benchmark_manager.py +785 -0
- bioguider/managers/evaluation_manager.py +215 -0
- bioguider/managers/generation_manager.py +686 -0
- bioguider/managers/generation_test_manager.py +107 -0
- bioguider/managers/generation_test_manager_v2.py +525 -0
- bioguider/rag/__init__.py +0 -0
- bioguider/rag/config.py +117 -0
- bioguider/rag/data_pipeline.py +651 -0
- bioguider/rag/embedder.py +24 -0
- bioguider/rag/rag.py +138 -0
- bioguider/settings.py +103 -0
- bioguider/utils/code_structure_builder.py +59 -0
- bioguider/utils/constants.py +135 -0
- bioguider/utils/default.gitignore +140 -0
- bioguider/utils/file_utils.py +215 -0
- bioguider/utils/gitignore_checker.py +175 -0
- bioguider/utils/notebook_utils.py +117 -0
- bioguider/utils/pyphen_utils.py +73 -0
- bioguider/utils/python_file_handler.py +65 -0
- bioguider/utils/r_file_handler.py +551 -0
- bioguider/utils/utils.py +163 -0
- bioguider-0.2.52.dist-info/LICENSE +21 -0
- bioguider-0.2.52.dist-info/METADATA +51 -0
- bioguider-0.2.52.dist-info/RECORD +84 -0
- bioguider-0.2.52.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,500 @@
|
|
|
1
|
+
import sqlite3
|
|
2
|
+
from sqlite3 import Connection
|
|
3
|
+
import os
|
|
4
|
+
from time import strftime
|
|
5
|
+
from typing import Optional, List, Dict, Any
|
|
6
|
+
import logging
|
|
7
|
+
import json
|
|
8
|
+
|
|
9
|
+
logging = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
CODE_STRUCTURE_TABLE_NAME = "SourceCodeStructure"
|
|
12
|
+
|
|
13
|
+
code_structure_create_table_query = f"""
|
|
14
|
+
CREATE TABLE IF NOT EXISTS {CODE_STRUCTURE_TABLE_NAME} (
|
|
15
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
16
|
+
name VARCHAR(256) NOT NULL,
|
|
17
|
+
path VARCHAR(512) NOT NULL,
|
|
18
|
+
start_lineno INTEGER NOT NULL,
|
|
19
|
+
end_lineno INTEGER NOT NULL,
|
|
20
|
+
parent VARCHAR(256),
|
|
21
|
+
doc_string TEXT,
|
|
22
|
+
params TEXT,
|
|
23
|
+
reference_to TEXT,
|
|
24
|
+
reference_by TEXT,
|
|
25
|
+
datetime TEXT NOT NULL DEFAULT (strftime('%Y-%m-%d %H:%M:%f', 'now')),
|
|
26
|
+
UNIQUE (name, path, start_lineno, end_lineno, parent)
|
|
27
|
+
);
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
code_structure_insert_query = f"""
|
|
31
|
+
INSERT INTO {CODE_STRUCTURE_TABLE_NAME}(name, path, start_lineno, end_lineno, parent, doc_string, params, reference_to, reference_by, datetime)
|
|
32
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, strftime('%Y-%m-%d %H:%M:%f', 'now'))
|
|
33
|
+
ON CONFLICT(name, path, start_lineno, end_lineno, parent) DO UPDATE SET doc_string=excluded.doc_string, params=excluded.params,
|
|
34
|
+
reference_to=excluded.reference_to, reference_by=excluded.reference_by, datetime=strftime('%Y-%m-%d %H:%M:%f', 'now');
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
code_structure_select_by_path_query = f"""
|
|
38
|
+
SELECT id, name, path, start_lineno, end_lineno, parent, doc_string, params, reference_to, reference_by, datetime
|
|
39
|
+
FROM {CODE_STRUCTURE_TABLE_NAME}
|
|
40
|
+
WHERE path = ?;
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
code_structure_select_by_name_query = f"""
|
|
44
|
+
SELECT id, name, path, start_lineno, end_lineno, parent, doc_string, params, reference_to, reference_by, datetime
|
|
45
|
+
FROM {CODE_STRUCTURE_TABLE_NAME}
|
|
46
|
+
WHERE name = ?;
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
code_structure_select_by_name_and_path_query = f"""
|
|
50
|
+
SELECT id, name, path, start_lineno, end_lineno, parent, doc_string, params, reference_to, reference_by, datetime
|
|
51
|
+
FROM {CODE_STRUCTURE_TABLE_NAME}
|
|
52
|
+
WHERE name = ? AND path = ?;
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
code_structure_select_by_id_query = f"""
|
|
56
|
+
SELECT id, name, path, start_lineno, end_lineno, parent, doc_string, params, reference_to, reference_by, datetime
|
|
57
|
+
FROM {CODE_STRUCTURE_TABLE_NAME}
|
|
58
|
+
WHERE id = ?;
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
code_structure_select_by_parent_and_parentpath_query = f"""
|
|
62
|
+
SELECT id, name, path, start_lineno, end_lineno, parent, doc_string, params, reference_to, reference_by, datetime
|
|
63
|
+
FROM {CODE_STRUCTURE_TABLE_NAME}
|
|
64
|
+
WHERE parent = ? AND path = ?;
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
code_structure_select_by_parent_query = f"""
|
|
68
|
+
SELECT id, name, path, start_lineno, end_lineno, parent, doc_string, params, reference_to, reference_by, datetime
|
|
69
|
+
FROM {CODE_STRUCTURE_TABLE_NAME}
|
|
70
|
+
WHERE parent = ?;
|
|
71
|
+
"""
|
|
72
|
+
|
|
73
|
+
code_structure_update_query = f"""
|
|
74
|
+
UPDATE {CODE_STRUCTURE_TABLE_NAME}
|
|
75
|
+
SET name = ?, path = ?, start_lineno = ?, end_lineno = ?, parent = ?, doc_string = ?, params = ?, reference_to = ?, reference_by = ?, datetime = strftime('%Y-%m-%d %H:%M:%f', 'now')
|
|
76
|
+
WHERE id = ?;
|
|
77
|
+
"""
|
|
78
|
+
|
|
79
|
+
code_structure_delete_query = f"""
|
|
80
|
+
DELETE FROM {CODE_STRUCTURE_TABLE_NAME} WHERE id = ?;
|
|
81
|
+
"""
|
|
82
|
+
|
|
83
|
+
code_structure_select_by_name_and_parent_and_path_query = f"""
|
|
84
|
+
SELECT id, name, path, start_lineno, end_lineno, parent, doc_string, params, reference_to, reference_by, datetime
|
|
85
|
+
FROM {CODE_STRUCTURE_TABLE_NAME}
|
|
86
|
+
WHERE name = ? AND parent = ? AND path = ?;
|
|
87
|
+
"""
|
|
88
|
+
|
|
89
|
+
code_structure_select_by_name_and_parent_query = f"""
|
|
90
|
+
SELECT id, name, path, start_lineno, end_lineno, parent, doc_string, params, reference_to, reference_by, datetime
|
|
91
|
+
FROM {CODE_STRUCTURE_TABLE_NAME}
|
|
92
|
+
WHERE name = ? AND parent = ?;
|
|
93
|
+
"""
|
|
94
|
+
|
|
95
|
+
class CodeStructureDb:
|
|
96
|
+
def __init__(self, author: str, repo_name: str, data_folder: str = None):
|
|
97
|
+
self.author = author
|
|
98
|
+
self.repo_name = repo_name
|
|
99
|
+
self.data_folder = data_folder
|
|
100
|
+
self.connection: Connection | None = None
|
|
101
|
+
|
|
102
|
+
def _ensure_tables(self) -> bool:
|
|
103
|
+
if self.connection is None:
|
|
104
|
+
return False
|
|
105
|
+
try:
|
|
106
|
+
cursor = self.connection.cursor()
|
|
107
|
+
cursor.execute(code_structure_create_table_query)
|
|
108
|
+
self.connection.commit()
|
|
109
|
+
return True
|
|
110
|
+
except Exception as e:
|
|
111
|
+
logging.error(e)
|
|
112
|
+
return False
|
|
113
|
+
|
|
114
|
+
def _connect_to_db(self) -> bool:
|
|
115
|
+
if self.connection is not None:
|
|
116
|
+
return True
|
|
117
|
+
db_path = self.data_folder
|
|
118
|
+
if db_path is None:
|
|
119
|
+
db_path = os.environ.get("DATA_FOLDER", "./data")
|
|
120
|
+
db_path = os.path.join(db_path, "databases")
|
|
121
|
+
if not os.path.exists(db_path):
|
|
122
|
+
try:
|
|
123
|
+
os.makedirs(db_path, exist_ok=True)
|
|
124
|
+
except Exception as e:
|
|
125
|
+
logging.error(e)
|
|
126
|
+
return False
|
|
127
|
+
db_path = os.path.join(db_path, f"{self.author}_{self.repo_name}_code_structure.db")
|
|
128
|
+
if not os.path.exists(db_path):
|
|
129
|
+
try:
|
|
130
|
+
with open(db_path, "w"):
|
|
131
|
+
pass
|
|
132
|
+
except Exception as e:
|
|
133
|
+
logging.error(e)
|
|
134
|
+
return False
|
|
135
|
+
self.connection = sqlite3.connect(db_path)
|
|
136
|
+
return True
|
|
137
|
+
|
|
138
|
+
def is_database_built(self) -> bool:
|
|
139
|
+
res = self._connect_to_db()
|
|
140
|
+
if not res:
|
|
141
|
+
return False
|
|
142
|
+
res = self._ensure_tables()
|
|
143
|
+
if not res:
|
|
144
|
+
return False
|
|
145
|
+
try:
|
|
146
|
+
cursor = self.connection.cursor()
|
|
147
|
+
cursor.execute(f"SELECT * FROM {CODE_STRUCTURE_TABLE_NAME}")
|
|
148
|
+
return cursor.fetchone() is not None
|
|
149
|
+
except Exception as e:
|
|
150
|
+
logging.error(e)
|
|
151
|
+
return False
|
|
152
|
+
finally:
|
|
153
|
+
self.connection.close()
|
|
154
|
+
self.connection = None
|
|
155
|
+
|
|
156
|
+
def insert_code_structure(
|
|
157
|
+
self,
|
|
158
|
+
name: str,
|
|
159
|
+
path: str,
|
|
160
|
+
start_lineno: int,
|
|
161
|
+
end_lineno: int,
|
|
162
|
+
parent: str = None,
|
|
163
|
+
doc_string: str = None,
|
|
164
|
+
params: str = None,
|
|
165
|
+
reference_to: str = None,
|
|
166
|
+
reference_by: str = None
|
|
167
|
+
) -> bool:
|
|
168
|
+
"""Insert a new code structure entry into the database."""
|
|
169
|
+
if parent is None:
|
|
170
|
+
parent = ""
|
|
171
|
+
if path is None:
|
|
172
|
+
path = ""
|
|
173
|
+
res = self._connect_to_db()
|
|
174
|
+
if not res:
|
|
175
|
+
return False
|
|
176
|
+
res = self._ensure_tables()
|
|
177
|
+
if not res:
|
|
178
|
+
return False
|
|
179
|
+
try:
|
|
180
|
+
cursor = self.connection.cursor()
|
|
181
|
+
cursor.execute(
|
|
182
|
+
code_structure_insert_query,
|
|
183
|
+
(name, path, start_lineno, end_lineno, parent, doc_string, json.dumps(params) if params is not None else None, reference_to, reference_by)
|
|
184
|
+
)
|
|
185
|
+
self.connection.commit()
|
|
186
|
+
return True
|
|
187
|
+
except Exception as e:
|
|
188
|
+
logging.error(e)
|
|
189
|
+
return False
|
|
190
|
+
finally:
|
|
191
|
+
self.connection.close()
|
|
192
|
+
self.connection = None
|
|
193
|
+
|
|
194
|
+
def select_by_path(self, path: str) -> List[Dict[str, Any]]:
|
|
195
|
+
"""Select all code structures by file path."""
|
|
196
|
+
res = self._connect_to_db()
|
|
197
|
+
if not res:
|
|
198
|
+
return []
|
|
199
|
+
res = self._ensure_tables()
|
|
200
|
+
if not res:
|
|
201
|
+
return []
|
|
202
|
+
try:
|
|
203
|
+
cursor = self.connection.cursor()
|
|
204
|
+
cursor.execute(code_structure_select_by_path_query, (path,))
|
|
205
|
+
rows = cursor.fetchall()
|
|
206
|
+
return [
|
|
207
|
+
{
|
|
208
|
+
"id": row[0],
|
|
209
|
+
"name": row[1],
|
|
210
|
+
"path": row[2],
|
|
211
|
+
"start_lineno": row[3],
|
|
212
|
+
"end_lineno": row[4],
|
|
213
|
+
"parent": row[5],
|
|
214
|
+
"doc_string": row[6],
|
|
215
|
+
"params": row[7],
|
|
216
|
+
"reference_to": row[8],
|
|
217
|
+
"reference_by": row[9],
|
|
218
|
+
"datetime": row[10]
|
|
219
|
+
}
|
|
220
|
+
for row in rows
|
|
221
|
+
]
|
|
222
|
+
except Exception as e:
|
|
223
|
+
logging.error(e)
|
|
224
|
+
return []
|
|
225
|
+
finally:
|
|
226
|
+
self.connection.close()
|
|
227
|
+
self.connection = None
|
|
228
|
+
|
|
229
|
+
def select_by_name(self, name: str) -> List[Dict[str, Any]]:
|
|
230
|
+
"""Select all code structures by name."""
|
|
231
|
+
res = self._connect_to_db()
|
|
232
|
+
if not res:
|
|
233
|
+
return []
|
|
234
|
+
res = self._ensure_tables()
|
|
235
|
+
if not res:
|
|
236
|
+
return []
|
|
237
|
+
try:
|
|
238
|
+
cursor = self.connection.cursor()
|
|
239
|
+
cursor.execute(code_structure_select_by_name_query, (name,))
|
|
240
|
+
rows = cursor.fetchall()
|
|
241
|
+
return [
|
|
242
|
+
{
|
|
243
|
+
"id": row[0],
|
|
244
|
+
"name": row[1],
|
|
245
|
+
"path": row[2],
|
|
246
|
+
"start_lineno": row[3],
|
|
247
|
+
"end_lineno": row[4],
|
|
248
|
+
"parent": row[5],
|
|
249
|
+
"doc_string": row[6],
|
|
250
|
+
"params": row[7],
|
|
251
|
+
"reference_to": row[8],
|
|
252
|
+
"reference_by": row[9],
|
|
253
|
+
"datetime": row[10]
|
|
254
|
+
}
|
|
255
|
+
for row in rows
|
|
256
|
+
]
|
|
257
|
+
except Exception as e:
|
|
258
|
+
logging.error(e)
|
|
259
|
+
return []
|
|
260
|
+
finally:
|
|
261
|
+
self.connection.close()
|
|
262
|
+
self.connection = None
|
|
263
|
+
|
|
264
|
+
def select_by_name_and_path(self, name: str, path: str) -> Optional[Dict[str, Any]]:
|
|
265
|
+
"""Select a code structure by name and path."""
|
|
266
|
+
res = self._connect_to_db()
|
|
267
|
+
if not res:
|
|
268
|
+
return None
|
|
269
|
+
res = self._ensure_tables()
|
|
270
|
+
if not res:
|
|
271
|
+
return None
|
|
272
|
+
try:
|
|
273
|
+
cursor = self.connection.cursor()
|
|
274
|
+
cursor.execute(code_structure_select_by_name_and_path_query, (name, path))
|
|
275
|
+
row = cursor.fetchone()
|
|
276
|
+
if row is None:
|
|
277
|
+
return None
|
|
278
|
+
return {
|
|
279
|
+
"id": row[0],
|
|
280
|
+
"name": row[1],
|
|
281
|
+
"path": row[2],
|
|
282
|
+
"start_lineno": row[3],
|
|
283
|
+
"end_lineno": row[4],
|
|
284
|
+
"parent": row[5],
|
|
285
|
+
"doc_string": row[6],
|
|
286
|
+
"params": row[7],
|
|
287
|
+
"reference_to": row[8],
|
|
288
|
+
"reference_by": row[9],
|
|
289
|
+
"datetime": row[10]
|
|
290
|
+
}
|
|
291
|
+
except Exception as e:
|
|
292
|
+
logging.error(e)
|
|
293
|
+
return None
|
|
294
|
+
finally:
|
|
295
|
+
self.connection.close()
|
|
296
|
+
self.connection = None
|
|
297
|
+
|
|
298
|
+
def select_by_name_and_parent(self, name: str, parent: str) -> List[Dict[str, Any]]:
|
|
299
|
+
"""Select all code structures by name and parent."""
|
|
300
|
+
res = self._connect_to_db()
|
|
301
|
+
if not res:
|
|
302
|
+
return []
|
|
303
|
+
res = self._ensure_tables()
|
|
304
|
+
if not res:
|
|
305
|
+
return []
|
|
306
|
+
try:
|
|
307
|
+
cursor = self.connection.cursor()
|
|
308
|
+
cursor.execute(code_structure_select_by_name_and_parent_query, (name, parent))
|
|
309
|
+
rows = cursor.fetchall()
|
|
310
|
+
return [
|
|
311
|
+
{
|
|
312
|
+
"id": row[0],
|
|
313
|
+
"name": row[1],
|
|
314
|
+
"path": row[2],
|
|
315
|
+
"start_lineno": row[3],
|
|
316
|
+
"end_lineno": row[4],
|
|
317
|
+
"parent": row[5],
|
|
318
|
+
"doc_string": row[6],
|
|
319
|
+
"params": row[7],
|
|
320
|
+
"reference_to": row[8],
|
|
321
|
+
"reference_by": row[9],
|
|
322
|
+
"datetime": row[10]
|
|
323
|
+
}
|
|
324
|
+
for row in rows
|
|
325
|
+
]
|
|
326
|
+
except Exception as e:
|
|
327
|
+
logging.error(e)
|
|
328
|
+
return []
|
|
329
|
+
finally:
|
|
330
|
+
self.connection.close()
|
|
331
|
+
self.connection = None
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
def select_by_name_and_parent_and_path(self, name: str, parent: str, path: str) -> Optional[Dict[str, Any]]:
|
|
335
|
+
"""Select a code structure by name and parent."""
|
|
336
|
+
res = self._connect_to_db()
|
|
337
|
+
if not res:
|
|
338
|
+
return None
|
|
339
|
+
res = self._ensure_tables()
|
|
340
|
+
if not res:
|
|
341
|
+
return None
|
|
342
|
+
try:
|
|
343
|
+
cursor = self.connection.cursor()
|
|
344
|
+
cursor.execute(code_structure_select_by_name_and_parent_and_path_query, (name, parent, path))
|
|
345
|
+
row = cursor.fetchone()
|
|
346
|
+
if row is None:
|
|
347
|
+
return None
|
|
348
|
+
return {
|
|
349
|
+
"id": row[0],
|
|
350
|
+
"name": row[1],
|
|
351
|
+
"path": row[2],
|
|
352
|
+
"start_lineno": row[3],
|
|
353
|
+
"end_lineno": row[4],
|
|
354
|
+
"parent": row[5],
|
|
355
|
+
"doc_string": row[6],
|
|
356
|
+
"params": row[7],
|
|
357
|
+
"reference_to": row[8],
|
|
358
|
+
"reference_by": row[9],
|
|
359
|
+
"datetime": row[10]
|
|
360
|
+
}
|
|
361
|
+
except Exception as e:
|
|
362
|
+
logging.error(e)
|
|
363
|
+
return None
|
|
364
|
+
finally:
|
|
365
|
+
self.connection.close()
|
|
366
|
+
self.connection = None
|
|
367
|
+
|
|
368
|
+
def select_by_id(self, id: int) -> Optional[Dict[str, Any]]:
|
|
369
|
+
"""Select a code structure by ID."""
|
|
370
|
+
res = self._connect_to_db()
|
|
371
|
+
if not res:
|
|
372
|
+
return None
|
|
373
|
+
res = self._ensure_tables()
|
|
374
|
+
if not res:
|
|
375
|
+
return None
|
|
376
|
+
try:
|
|
377
|
+
cursor = self.connection.cursor()
|
|
378
|
+
cursor.execute(code_structure_select_by_id_query, (id,))
|
|
379
|
+
row = cursor.fetchone()
|
|
380
|
+
if row is None:
|
|
381
|
+
return None
|
|
382
|
+
return {
|
|
383
|
+
"id": row[0],
|
|
384
|
+
"name": row[1],
|
|
385
|
+
"path": row[2],
|
|
386
|
+
"start_lineno": row[3],
|
|
387
|
+
"end_lineno": row[4],
|
|
388
|
+
"parent": row[5],
|
|
389
|
+
"doc_string": row[6],
|
|
390
|
+
"params": row[7],
|
|
391
|
+
"reference_to": row[8],
|
|
392
|
+
"reference_by": row[9],
|
|
393
|
+
"datetime": row[10]
|
|
394
|
+
}
|
|
395
|
+
except Exception as e:
|
|
396
|
+
logging.error(e)
|
|
397
|
+
return None
|
|
398
|
+
finally:
|
|
399
|
+
self.connection.close()
|
|
400
|
+
self.connection = None
|
|
401
|
+
|
|
402
|
+
def update_code_structure(
|
|
403
|
+
self,
|
|
404
|
+
id: int,
|
|
405
|
+
name: str,
|
|
406
|
+
path: str,
|
|
407
|
+
start_lineno: int,
|
|
408
|
+
end_lineno: int,
|
|
409
|
+
parent: str = None,
|
|
410
|
+
doc_string: str = None,
|
|
411
|
+
params: str = None,
|
|
412
|
+
reference_to: str = None,
|
|
413
|
+
reference_by: str = None
|
|
414
|
+
) -> bool:
|
|
415
|
+
"""Update an existing code structure entry."""
|
|
416
|
+
res = self._connect_to_db()
|
|
417
|
+
if not res:
|
|
418
|
+
return False
|
|
419
|
+
res = self._ensure_tables()
|
|
420
|
+
if not res:
|
|
421
|
+
return False
|
|
422
|
+
try:
|
|
423
|
+
cursor = self.connection.cursor()
|
|
424
|
+
cursor.execute(
|
|
425
|
+
code_structure_update_query,
|
|
426
|
+
(name, path, start_lineno, end_lineno, parent, doc_string, params, reference_to, reference_by, id)
|
|
427
|
+
)
|
|
428
|
+
self.connection.commit()
|
|
429
|
+
return cursor.rowcount > 0
|
|
430
|
+
except Exception as e:
|
|
431
|
+
logging.error(e)
|
|
432
|
+
return False
|
|
433
|
+
finally:
|
|
434
|
+
self.connection.close()
|
|
435
|
+
self.connection = None
|
|
436
|
+
|
|
437
|
+
def select_by_parent(self, parent: str, path: str | None = None) -> List[Dict[str, Any]]:
|
|
438
|
+
"""Select all code structures by parent."""
|
|
439
|
+
res = self._connect_to_db()
|
|
440
|
+
if not res:
|
|
441
|
+
return []
|
|
442
|
+
res = self._ensure_tables()
|
|
443
|
+
if not res:
|
|
444
|
+
return []
|
|
445
|
+
try:
|
|
446
|
+
cursor = self.connection.cursor()
|
|
447
|
+
if path is not None:
|
|
448
|
+
cursor.execute(code_structure_select_by_parent_and_parentpath_query, (parent, path))
|
|
449
|
+
else:
|
|
450
|
+
cursor.execute(code_structure_select_by_parent_query, (parent,))
|
|
451
|
+
rows = cursor.fetchall()
|
|
452
|
+
return [
|
|
453
|
+
{
|
|
454
|
+
"id": row[0],
|
|
455
|
+
"name": row[1],
|
|
456
|
+
"path": row[2],
|
|
457
|
+
"start_lineno": row[3],
|
|
458
|
+
"end_lineno": row[4],
|
|
459
|
+
"parent": row[5],
|
|
460
|
+
"doc_string": row[6],
|
|
461
|
+
"params": row[7],
|
|
462
|
+
"reference_to": row[8],
|
|
463
|
+
"reference_by": row[9],
|
|
464
|
+
"datetime": row[10]
|
|
465
|
+
}
|
|
466
|
+
for row in rows
|
|
467
|
+
]
|
|
468
|
+
except Exception as e:
|
|
469
|
+
logging.error(e)
|
|
470
|
+
return []
|
|
471
|
+
finally:
|
|
472
|
+
self.connection.close()
|
|
473
|
+
self.connection = None
|
|
474
|
+
|
|
475
|
+
def delete_code_structure(self, id: int) -> bool:
|
|
476
|
+
"""Delete a code structure entry by ID."""
|
|
477
|
+
res = self._connect_to_db()
|
|
478
|
+
if not res:
|
|
479
|
+
return False
|
|
480
|
+
res = self._ensure_tables()
|
|
481
|
+
if not res:
|
|
482
|
+
return False
|
|
483
|
+
try:
|
|
484
|
+
cursor = self.connection.cursor()
|
|
485
|
+
cursor.execute(code_structure_delete_query, (id,))
|
|
486
|
+
self.connection.commit()
|
|
487
|
+
return cursor.rowcount > 0
|
|
488
|
+
except Exception as e:
|
|
489
|
+
logging.error(e)
|
|
490
|
+
return False
|
|
491
|
+
finally:
|
|
492
|
+
self.connection.close()
|
|
493
|
+
self.connection = None
|
|
494
|
+
|
|
495
|
+
def get_db_file(self) -> str:
|
|
496
|
+
"""Get the database file path."""
|
|
497
|
+
db_path = os.environ.get("DATA_FOLDER", "./data")
|
|
498
|
+
db_path = os.path.join(db_path, "databases")
|
|
499
|
+
db_path = os.path.join(db_path, f"{self.author}_{self.repo_name}.db")
|
|
500
|
+
return db_path
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
|
|
2
|
+
import sqlite3
|
|
3
|
+
from sqlite3 import Connection
|
|
4
|
+
import os
|
|
5
|
+
from time import strftime
|
|
6
|
+
from typing import Optional
|
|
7
|
+
import logging
|
|
8
|
+
from string import Template
|
|
9
|
+
import json
|
|
10
|
+
|
|
11
|
+
from bioguider.utils.constants import DEFAULT_TOKEN_USAGE
|
|
12
|
+
|
|
13
|
+
logging = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
SUMMARIZED_FILES_TABLE_NAME = "SummarizedFiles"
|
|
16
|
+
|
|
17
|
+
summarized_files_create_table_query = f"""
|
|
18
|
+
CREATE TABLE IF NOT EXISTS {SUMMARIZED_FILES_TABLE_NAME} (
|
|
19
|
+
file_path VARCHAR(512),
|
|
20
|
+
instruction TEXT,
|
|
21
|
+
summarize_prompt TEXT,
|
|
22
|
+
summarize_level INTEGER,
|
|
23
|
+
summarized_text TEXT,
|
|
24
|
+
token_usage VARCHAR(512),
|
|
25
|
+
datetime TEXT NOT NULL DEFAULT (strftime('%Y-%m-%d %H:%M:%f', 'now')),
|
|
26
|
+
UNIQUE (file_path, instruction, summarize_level, summarize_prompt)
|
|
27
|
+
);
|
|
28
|
+
"""
|
|
29
|
+
summarized_files_upsert_query = f"""
|
|
30
|
+
INSERT INTO {SUMMARIZED_FILES_TABLE_NAME}(file_path, instruction, summarize_level, summarize_prompt, summarized_text, token_usage, datetime)
|
|
31
|
+
VALUES (?, ?, ?, ?, ?, ?, strftime('%Y-%m-%d %H:%M:%f', 'now'))
|
|
32
|
+
ON CONFLICT(file_path, instruction, summarize_level, summarize_prompt) DO UPDATE SET summarized_text=excluded.summarized_text,
|
|
33
|
+
datetime=strftime('%Y-%m-%d %H:%M:%f', 'now');
|
|
34
|
+
"""
|
|
35
|
+
summarized_files_select_query = f"""
|
|
36
|
+
SELECT summarized_text, datetime FROM {SUMMARIZED_FILES_TABLE_NAME}
|
|
37
|
+
where file_path = ? and instruction = ? and summarize_level = ? and summarize_prompt=?;
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
class SummarizedFilesDb:
|
|
41
|
+
def __init__(self, author: str, repo_name: str, data_folder: str = None):
|
|
42
|
+
self.author = author
|
|
43
|
+
self.repo_name = repo_name
|
|
44
|
+
self.connection: Connection | None = None
|
|
45
|
+
self.data_folder = data_folder
|
|
46
|
+
|
|
47
|
+
def _ensure_tables(self) -> bool:
|
|
48
|
+
if self.connection is None:
|
|
49
|
+
return False
|
|
50
|
+
try:
|
|
51
|
+
cursor = self.connection.cursor()
|
|
52
|
+
cursor.execute(
|
|
53
|
+
summarized_files_create_table_query
|
|
54
|
+
)
|
|
55
|
+
self.connection.commit()
|
|
56
|
+
return True
|
|
57
|
+
except Exception as e:
|
|
58
|
+
logging.error(e)
|
|
59
|
+
return False
|
|
60
|
+
|
|
61
|
+
def _connect_to_db(self) -> bool:
|
|
62
|
+
if self.connection is not None:
|
|
63
|
+
return True
|
|
64
|
+
db_path = self.data_folder
|
|
65
|
+
if db_path is None:
|
|
66
|
+
db_path = os.environ.get("DATA_FOLDER", "./data")
|
|
67
|
+
db_path = os.path.join(db_path, "databases")
|
|
68
|
+
# Ensure the local path exists
|
|
69
|
+
try:
|
|
70
|
+
os.makedirs(db_path, exist_ok=True)
|
|
71
|
+
except Exception as e:
|
|
72
|
+
logging.error(e)
|
|
73
|
+
return False
|
|
74
|
+
db_path = os.path.join(db_path, f"{self.author}_{self.repo_name}_summarized_file.db")
|
|
75
|
+
if not os.path.exists(db_path):
|
|
76
|
+
try:
|
|
77
|
+
with open(db_path, "w"):
|
|
78
|
+
pass
|
|
79
|
+
except Exception as e:
|
|
80
|
+
logging.error(e)
|
|
81
|
+
return False
|
|
82
|
+
self.connection = sqlite3.connect(db_path)
|
|
83
|
+
return True
|
|
84
|
+
|
|
85
|
+
def upsert_summarized_file(
|
|
86
|
+
self,
|
|
87
|
+
file_path: str,
|
|
88
|
+
instruction: str,
|
|
89
|
+
summarize_level: int,
|
|
90
|
+
summarize_prompt: str,
|
|
91
|
+
summarized_text: str,
|
|
92
|
+
token_usage: dict | None = None
|
|
93
|
+
):
|
|
94
|
+
token_usage = token_usage if token_usage is not None else {**DEFAULT_TOKEN_USAGE}
|
|
95
|
+
token_usage = json.dumps(token_usage)
|
|
96
|
+
res = self._connect_to_db()
|
|
97
|
+
assert res
|
|
98
|
+
res = self._ensure_tables()
|
|
99
|
+
assert res
|
|
100
|
+
try:
|
|
101
|
+
cursor = self.connection.cursor()
|
|
102
|
+
cursor.execute(
|
|
103
|
+
summarized_files_upsert_query,
|
|
104
|
+
(file_path, instruction, summarize_level, summarize_prompt, summarized_text, token_usage, )
|
|
105
|
+
)
|
|
106
|
+
self.connection.commit()
|
|
107
|
+
return True
|
|
108
|
+
except Exception as e:
|
|
109
|
+
logging.error(e)
|
|
110
|
+
return False
|
|
111
|
+
finally:
|
|
112
|
+
self.connection.close()
|
|
113
|
+
self.connection = None
|
|
114
|
+
|
|
115
|
+
def select_summarized_text(
|
|
116
|
+
self,
|
|
117
|
+
file_path: str,
|
|
118
|
+
instruction: str,
|
|
119
|
+
summarize_level: int,
|
|
120
|
+
summarize_prompt: str = "N/A",
|
|
121
|
+
) -> str | None:
|
|
122
|
+
self._connect_to_db()
|
|
123
|
+
self._ensure_tables()
|
|
124
|
+
try:
|
|
125
|
+
cursor = self.connection.cursor()
|
|
126
|
+
cursor.execute(
|
|
127
|
+
summarized_files_select_query,
|
|
128
|
+
(file_path, instruction, summarize_level, summarize_prompt,)
|
|
129
|
+
)
|
|
130
|
+
row = cursor.fetchone()
|
|
131
|
+
if row is None:
|
|
132
|
+
return None
|
|
133
|
+
return row[0]
|
|
134
|
+
except Exception as e:
|
|
135
|
+
logging.error(e)
|
|
136
|
+
return None
|
|
137
|
+
finally:
|
|
138
|
+
self.connection.close()
|
|
139
|
+
self.connection = None
|
|
140
|
+
|
|
141
|
+
def get_db_file(self):
|
|
142
|
+
db_path = os.environ.get("DATA_FOLDER", "./data")
|
|
143
|
+
db_path = os.path.join(db_path, f"{self.author}_{self.repo_name}.db")
|
|
144
|
+
return db_path
|
|
145
|
+
|
|
146
|
+
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
from .models import (
|
|
2
|
+
EvaluationReport,
|
|
3
|
+
SuggestionItem,
|
|
4
|
+
StyleProfile,
|
|
5
|
+
PlannedEdit,
|
|
6
|
+
DocumentPlan,
|
|
7
|
+
OutputArtifact,
|
|
8
|
+
GenerationManifest,
|
|
9
|
+
)
|
|
10
|
+
from .report_loader import EvaluationReportLoader
|
|
11
|
+
from .suggestion_extractor import SuggestionExtractor
|
|
12
|
+
from .repo_reader import RepoReader
|
|
13
|
+
from .style_analyzer import StyleAnalyzer
|
|
14
|
+
from .change_planner import ChangePlanner
|
|
15
|
+
from .document_renderer import DocumentRenderer
|
|
16
|
+
from .output_manager import OutputManager
|
|
17
|
+
from .llm_content_generator import LLMContentGenerator
|
|
18
|
+
from .llm_cleaner import LLMCleaner
|
|
19
|
+
|
|
20
|
+
__all__ = [
|
|
21
|
+
"EvaluationReport",
|
|
22
|
+
"SuggestionItem",
|
|
23
|
+
"StyleProfile",
|
|
24
|
+
"PlannedEdit",
|
|
25
|
+
"DocumentPlan",
|
|
26
|
+
"OutputArtifact",
|
|
27
|
+
"GenerationManifest",
|
|
28
|
+
"EvaluationReportLoader",
|
|
29
|
+
"SuggestionExtractor",
|
|
30
|
+
"RepoReader",
|
|
31
|
+
"StyleAnalyzer",
|
|
32
|
+
"ChangePlanner",
|
|
33
|
+
"DocumentRenderer",
|
|
34
|
+
"OutputManager",
|
|
35
|
+
"LLMContentGenerator",
|
|
36
|
+
"LLMCleaner",
|
|
37
|
+
]
|
|
38
|
+
|
|
39
|
+
|