bioguider 0.2.52__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. bioguider/__init__.py +0 -0
  2. bioguider/agents/__init__.py +0 -0
  3. bioguider/agents/agent_task.py +92 -0
  4. bioguider/agents/agent_tools.py +176 -0
  5. bioguider/agents/agent_utils.py +504 -0
  6. bioguider/agents/collection_execute_step.py +182 -0
  7. bioguider/agents/collection_observe_step.py +125 -0
  8. bioguider/agents/collection_plan_step.py +156 -0
  9. bioguider/agents/collection_task.py +184 -0
  10. bioguider/agents/collection_task_utils.py +142 -0
  11. bioguider/agents/common_agent.py +137 -0
  12. bioguider/agents/common_agent_2step.py +215 -0
  13. bioguider/agents/common_conversation.py +61 -0
  14. bioguider/agents/common_step.py +85 -0
  15. bioguider/agents/consistency_collection_step.py +102 -0
  16. bioguider/agents/consistency_evaluation_task.py +57 -0
  17. bioguider/agents/consistency_evaluation_task_utils.py +14 -0
  18. bioguider/agents/consistency_observe_step.py +110 -0
  19. bioguider/agents/consistency_query_step.py +77 -0
  20. bioguider/agents/dockergeneration_execute_step.py +186 -0
  21. bioguider/agents/dockergeneration_observe_step.py +154 -0
  22. bioguider/agents/dockergeneration_plan_step.py +158 -0
  23. bioguider/agents/dockergeneration_task.py +158 -0
  24. bioguider/agents/dockergeneration_task_utils.py +220 -0
  25. bioguider/agents/evaluation_installation_task.py +270 -0
  26. bioguider/agents/evaluation_readme_task.py +767 -0
  27. bioguider/agents/evaluation_submission_requirements_task.py +172 -0
  28. bioguider/agents/evaluation_task.py +206 -0
  29. bioguider/agents/evaluation_tutorial_task.py +169 -0
  30. bioguider/agents/evaluation_tutorial_task_prompts.py +187 -0
  31. bioguider/agents/evaluation_userguide_prompts.py +179 -0
  32. bioguider/agents/evaluation_userguide_task.py +154 -0
  33. bioguider/agents/evaluation_utils.py +127 -0
  34. bioguider/agents/identification_execute_step.py +181 -0
  35. bioguider/agents/identification_observe_step.py +104 -0
  36. bioguider/agents/identification_plan_step.py +140 -0
  37. bioguider/agents/identification_task.py +270 -0
  38. bioguider/agents/identification_task_utils.py +22 -0
  39. bioguider/agents/peo_common_step.py +64 -0
  40. bioguider/agents/prompt_utils.py +253 -0
  41. bioguider/agents/python_ast_repl_tool.py +69 -0
  42. bioguider/agents/rag_collection_task.py +130 -0
  43. bioguider/conversation.py +67 -0
  44. bioguider/database/code_structure_db.py +500 -0
  45. bioguider/database/summarized_file_db.py +146 -0
  46. bioguider/generation/__init__.py +39 -0
  47. bioguider/generation/benchmark_metrics.py +610 -0
  48. bioguider/generation/change_planner.py +189 -0
  49. bioguider/generation/document_renderer.py +157 -0
  50. bioguider/generation/llm_cleaner.py +67 -0
  51. bioguider/generation/llm_content_generator.py +1128 -0
  52. bioguider/generation/llm_injector.py +809 -0
  53. bioguider/generation/models.py +85 -0
  54. bioguider/generation/output_manager.py +74 -0
  55. bioguider/generation/repo_reader.py +37 -0
  56. bioguider/generation/report_loader.py +166 -0
  57. bioguider/generation/style_analyzer.py +36 -0
  58. bioguider/generation/suggestion_extractor.py +436 -0
  59. bioguider/generation/test_metrics.py +189 -0
  60. bioguider/managers/benchmark_manager.py +785 -0
  61. bioguider/managers/evaluation_manager.py +215 -0
  62. bioguider/managers/generation_manager.py +686 -0
  63. bioguider/managers/generation_test_manager.py +107 -0
  64. bioguider/managers/generation_test_manager_v2.py +525 -0
  65. bioguider/rag/__init__.py +0 -0
  66. bioguider/rag/config.py +117 -0
  67. bioguider/rag/data_pipeline.py +651 -0
  68. bioguider/rag/embedder.py +24 -0
  69. bioguider/rag/rag.py +138 -0
  70. bioguider/settings.py +103 -0
  71. bioguider/utils/code_structure_builder.py +59 -0
  72. bioguider/utils/constants.py +135 -0
  73. bioguider/utils/default.gitignore +140 -0
  74. bioguider/utils/file_utils.py +215 -0
  75. bioguider/utils/gitignore_checker.py +175 -0
  76. bioguider/utils/notebook_utils.py +117 -0
  77. bioguider/utils/pyphen_utils.py +73 -0
  78. bioguider/utils/python_file_handler.py +65 -0
  79. bioguider/utils/r_file_handler.py +551 -0
  80. bioguider/utils/utils.py +163 -0
  81. bioguider-0.2.52.dist-info/LICENSE +21 -0
  82. bioguider-0.2.52.dist-info/METADATA +51 -0
  83. bioguider-0.2.52.dist-info/RECORD +84 -0
  84. bioguider-0.2.52.dist-info/WHEEL +4 -0
@@ -0,0 +1,500 @@
1
+ import sqlite3
2
+ from sqlite3 import Connection
3
+ import os
4
+ from time import strftime
5
+ from typing import Optional, List, Dict, Any
6
+ import logging
7
+ import json
8
+
9
+ logging = logging.getLogger(__name__)
10
+
11
+ CODE_STRUCTURE_TABLE_NAME = "SourceCodeStructure"
12
+
13
+ code_structure_create_table_query = f"""
14
+ CREATE TABLE IF NOT EXISTS {CODE_STRUCTURE_TABLE_NAME} (
15
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
16
+ name VARCHAR(256) NOT NULL,
17
+ path VARCHAR(512) NOT NULL,
18
+ start_lineno INTEGER NOT NULL,
19
+ end_lineno INTEGER NOT NULL,
20
+ parent VARCHAR(256),
21
+ doc_string TEXT,
22
+ params TEXT,
23
+ reference_to TEXT,
24
+ reference_by TEXT,
25
+ datetime TEXT NOT NULL DEFAULT (strftime('%Y-%m-%d %H:%M:%f', 'now')),
26
+ UNIQUE (name, path, start_lineno, end_lineno, parent)
27
+ );
28
+ """
29
+
30
+ code_structure_insert_query = f"""
31
+ INSERT INTO {CODE_STRUCTURE_TABLE_NAME}(name, path, start_lineno, end_lineno, parent, doc_string, params, reference_to, reference_by, datetime)
32
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, strftime('%Y-%m-%d %H:%M:%f', 'now'))
33
+ ON CONFLICT(name, path, start_lineno, end_lineno, parent) DO UPDATE SET doc_string=excluded.doc_string, params=excluded.params,
34
+ reference_to=excluded.reference_to, reference_by=excluded.reference_by, datetime=strftime('%Y-%m-%d %H:%M:%f', 'now');
35
+ """
36
+
37
+ code_structure_select_by_path_query = f"""
38
+ SELECT id, name, path, start_lineno, end_lineno, parent, doc_string, params, reference_to, reference_by, datetime
39
+ FROM {CODE_STRUCTURE_TABLE_NAME}
40
+ WHERE path = ?;
41
+ """
42
+
43
+ code_structure_select_by_name_query = f"""
44
+ SELECT id, name, path, start_lineno, end_lineno, parent, doc_string, params, reference_to, reference_by, datetime
45
+ FROM {CODE_STRUCTURE_TABLE_NAME}
46
+ WHERE name = ?;
47
+ """
48
+
49
+ code_structure_select_by_name_and_path_query = f"""
50
+ SELECT id, name, path, start_lineno, end_lineno, parent, doc_string, params, reference_to, reference_by, datetime
51
+ FROM {CODE_STRUCTURE_TABLE_NAME}
52
+ WHERE name = ? AND path = ?;
53
+ """
54
+
55
+ code_structure_select_by_id_query = f"""
56
+ SELECT id, name, path, start_lineno, end_lineno, parent, doc_string, params, reference_to, reference_by, datetime
57
+ FROM {CODE_STRUCTURE_TABLE_NAME}
58
+ WHERE id = ?;
59
+ """
60
+
61
+ code_structure_select_by_parent_and_parentpath_query = f"""
62
+ SELECT id, name, path, start_lineno, end_lineno, parent, doc_string, params, reference_to, reference_by, datetime
63
+ FROM {CODE_STRUCTURE_TABLE_NAME}
64
+ WHERE parent = ? AND path = ?;
65
+ """
66
+
67
+ code_structure_select_by_parent_query = f"""
68
+ SELECT id, name, path, start_lineno, end_lineno, parent, doc_string, params, reference_to, reference_by, datetime
69
+ FROM {CODE_STRUCTURE_TABLE_NAME}
70
+ WHERE parent = ?;
71
+ """
72
+
73
+ code_structure_update_query = f"""
74
+ UPDATE {CODE_STRUCTURE_TABLE_NAME}
75
+ SET name = ?, path = ?, start_lineno = ?, end_lineno = ?, parent = ?, doc_string = ?, params = ?, reference_to = ?, reference_by = ?, datetime = strftime('%Y-%m-%d %H:%M:%f', 'now')
76
+ WHERE id = ?;
77
+ """
78
+
79
+ code_structure_delete_query = f"""
80
+ DELETE FROM {CODE_STRUCTURE_TABLE_NAME} WHERE id = ?;
81
+ """
82
+
83
+ code_structure_select_by_name_and_parent_and_path_query = f"""
84
+ SELECT id, name, path, start_lineno, end_lineno, parent, doc_string, params, reference_to, reference_by, datetime
85
+ FROM {CODE_STRUCTURE_TABLE_NAME}
86
+ WHERE name = ? AND parent = ? AND path = ?;
87
+ """
88
+
89
+ code_structure_select_by_name_and_parent_query = f"""
90
+ SELECT id, name, path, start_lineno, end_lineno, parent, doc_string, params, reference_to, reference_by, datetime
91
+ FROM {CODE_STRUCTURE_TABLE_NAME}
92
+ WHERE name = ? AND parent = ?;
93
+ """
94
+
95
+ class CodeStructureDb:
96
+ def __init__(self, author: str, repo_name: str, data_folder: str = None):
97
+ self.author = author
98
+ self.repo_name = repo_name
99
+ self.data_folder = data_folder
100
+ self.connection: Connection | None = None
101
+
102
+ def _ensure_tables(self) -> bool:
103
+ if self.connection is None:
104
+ return False
105
+ try:
106
+ cursor = self.connection.cursor()
107
+ cursor.execute(code_structure_create_table_query)
108
+ self.connection.commit()
109
+ return True
110
+ except Exception as e:
111
+ logging.error(e)
112
+ return False
113
+
114
+ def _connect_to_db(self) -> bool:
115
+ if self.connection is not None:
116
+ return True
117
+ db_path = self.data_folder
118
+ if db_path is None:
119
+ db_path = os.environ.get("DATA_FOLDER", "./data")
120
+ db_path = os.path.join(db_path, "databases")
121
+ if not os.path.exists(db_path):
122
+ try:
123
+ os.makedirs(db_path, exist_ok=True)
124
+ except Exception as e:
125
+ logging.error(e)
126
+ return False
127
+ db_path = os.path.join(db_path, f"{self.author}_{self.repo_name}_code_structure.db")
128
+ if not os.path.exists(db_path):
129
+ try:
130
+ with open(db_path, "w"):
131
+ pass
132
+ except Exception as e:
133
+ logging.error(e)
134
+ return False
135
+ self.connection = sqlite3.connect(db_path)
136
+ return True
137
+
138
+ def is_database_built(self) -> bool:
139
+ res = self._connect_to_db()
140
+ if not res:
141
+ return False
142
+ res = self._ensure_tables()
143
+ if not res:
144
+ return False
145
+ try:
146
+ cursor = self.connection.cursor()
147
+ cursor.execute(f"SELECT * FROM {CODE_STRUCTURE_TABLE_NAME}")
148
+ return cursor.fetchone() is not None
149
+ except Exception as e:
150
+ logging.error(e)
151
+ return False
152
+ finally:
153
+ self.connection.close()
154
+ self.connection = None
155
+
156
+ def insert_code_structure(
157
+ self,
158
+ name: str,
159
+ path: str,
160
+ start_lineno: int,
161
+ end_lineno: int,
162
+ parent: str = None,
163
+ doc_string: str = None,
164
+ params: str = None,
165
+ reference_to: str = None,
166
+ reference_by: str = None
167
+ ) -> bool:
168
+ """Insert a new code structure entry into the database."""
169
+ if parent is None:
170
+ parent = ""
171
+ if path is None:
172
+ path = ""
173
+ res = self._connect_to_db()
174
+ if not res:
175
+ return False
176
+ res = self._ensure_tables()
177
+ if not res:
178
+ return False
179
+ try:
180
+ cursor = self.connection.cursor()
181
+ cursor.execute(
182
+ code_structure_insert_query,
183
+ (name, path, start_lineno, end_lineno, parent, doc_string, json.dumps(params) if params is not None else None, reference_to, reference_by)
184
+ )
185
+ self.connection.commit()
186
+ return True
187
+ except Exception as e:
188
+ logging.error(e)
189
+ return False
190
+ finally:
191
+ self.connection.close()
192
+ self.connection = None
193
+
194
+ def select_by_path(self, path: str) -> List[Dict[str, Any]]:
195
+ """Select all code structures by file path."""
196
+ res = self._connect_to_db()
197
+ if not res:
198
+ return []
199
+ res = self._ensure_tables()
200
+ if not res:
201
+ return []
202
+ try:
203
+ cursor = self.connection.cursor()
204
+ cursor.execute(code_structure_select_by_path_query, (path,))
205
+ rows = cursor.fetchall()
206
+ return [
207
+ {
208
+ "id": row[0],
209
+ "name": row[1],
210
+ "path": row[2],
211
+ "start_lineno": row[3],
212
+ "end_lineno": row[4],
213
+ "parent": row[5],
214
+ "doc_string": row[6],
215
+ "params": row[7],
216
+ "reference_to": row[8],
217
+ "reference_by": row[9],
218
+ "datetime": row[10]
219
+ }
220
+ for row in rows
221
+ ]
222
+ except Exception as e:
223
+ logging.error(e)
224
+ return []
225
+ finally:
226
+ self.connection.close()
227
+ self.connection = None
228
+
229
+ def select_by_name(self, name: str) -> List[Dict[str, Any]]:
230
+ """Select all code structures by name."""
231
+ res = self._connect_to_db()
232
+ if not res:
233
+ return []
234
+ res = self._ensure_tables()
235
+ if not res:
236
+ return []
237
+ try:
238
+ cursor = self.connection.cursor()
239
+ cursor.execute(code_structure_select_by_name_query, (name,))
240
+ rows = cursor.fetchall()
241
+ return [
242
+ {
243
+ "id": row[0],
244
+ "name": row[1],
245
+ "path": row[2],
246
+ "start_lineno": row[3],
247
+ "end_lineno": row[4],
248
+ "parent": row[5],
249
+ "doc_string": row[6],
250
+ "params": row[7],
251
+ "reference_to": row[8],
252
+ "reference_by": row[9],
253
+ "datetime": row[10]
254
+ }
255
+ for row in rows
256
+ ]
257
+ except Exception as e:
258
+ logging.error(e)
259
+ return []
260
+ finally:
261
+ self.connection.close()
262
+ self.connection = None
263
+
264
+ def select_by_name_and_path(self, name: str, path: str) -> Optional[Dict[str, Any]]:
265
+ """Select a code structure by name and path."""
266
+ res = self._connect_to_db()
267
+ if not res:
268
+ return None
269
+ res = self._ensure_tables()
270
+ if not res:
271
+ return None
272
+ try:
273
+ cursor = self.connection.cursor()
274
+ cursor.execute(code_structure_select_by_name_and_path_query, (name, path))
275
+ row = cursor.fetchone()
276
+ if row is None:
277
+ return None
278
+ return {
279
+ "id": row[0],
280
+ "name": row[1],
281
+ "path": row[2],
282
+ "start_lineno": row[3],
283
+ "end_lineno": row[4],
284
+ "parent": row[5],
285
+ "doc_string": row[6],
286
+ "params": row[7],
287
+ "reference_to": row[8],
288
+ "reference_by": row[9],
289
+ "datetime": row[10]
290
+ }
291
+ except Exception as e:
292
+ logging.error(e)
293
+ return None
294
+ finally:
295
+ self.connection.close()
296
+ self.connection = None
297
+
298
+ def select_by_name_and_parent(self, name: str, parent: str) -> List[Dict[str, Any]]:
299
+ """Select all code structures by name and parent."""
300
+ res = self._connect_to_db()
301
+ if not res:
302
+ return []
303
+ res = self._ensure_tables()
304
+ if not res:
305
+ return []
306
+ try:
307
+ cursor = self.connection.cursor()
308
+ cursor.execute(code_structure_select_by_name_and_parent_query, (name, parent))
309
+ rows = cursor.fetchall()
310
+ return [
311
+ {
312
+ "id": row[0],
313
+ "name": row[1],
314
+ "path": row[2],
315
+ "start_lineno": row[3],
316
+ "end_lineno": row[4],
317
+ "parent": row[5],
318
+ "doc_string": row[6],
319
+ "params": row[7],
320
+ "reference_to": row[8],
321
+ "reference_by": row[9],
322
+ "datetime": row[10]
323
+ }
324
+ for row in rows
325
+ ]
326
+ except Exception as e:
327
+ logging.error(e)
328
+ return []
329
+ finally:
330
+ self.connection.close()
331
+ self.connection = None
332
+
333
+
334
+ def select_by_name_and_parent_and_path(self, name: str, parent: str, path: str) -> Optional[Dict[str, Any]]:
335
+ """Select a code structure by name and parent."""
336
+ res = self._connect_to_db()
337
+ if not res:
338
+ return None
339
+ res = self._ensure_tables()
340
+ if not res:
341
+ return None
342
+ try:
343
+ cursor = self.connection.cursor()
344
+ cursor.execute(code_structure_select_by_name_and_parent_and_path_query, (name, parent, path))
345
+ row = cursor.fetchone()
346
+ if row is None:
347
+ return None
348
+ return {
349
+ "id": row[0],
350
+ "name": row[1],
351
+ "path": row[2],
352
+ "start_lineno": row[3],
353
+ "end_lineno": row[4],
354
+ "parent": row[5],
355
+ "doc_string": row[6],
356
+ "params": row[7],
357
+ "reference_to": row[8],
358
+ "reference_by": row[9],
359
+ "datetime": row[10]
360
+ }
361
+ except Exception as e:
362
+ logging.error(e)
363
+ return None
364
+ finally:
365
+ self.connection.close()
366
+ self.connection = None
367
+
368
+ def select_by_id(self, id: int) -> Optional[Dict[str, Any]]:
369
+ """Select a code structure by ID."""
370
+ res = self._connect_to_db()
371
+ if not res:
372
+ return None
373
+ res = self._ensure_tables()
374
+ if not res:
375
+ return None
376
+ try:
377
+ cursor = self.connection.cursor()
378
+ cursor.execute(code_structure_select_by_id_query, (id,))
379
+ row = cursor.fetchone()
380
+ if row is None:
381
+ return None
382
+ return {
383
+ "id": row[0],
384
+ "name": row[1],
385
+ "path": row[2],
386
+ "start_lineno": row[3],
387
+ "end_lineno": row[4],
388
+ "parent": row[5],
389
+ "doc_string": row[6],
390
+ "params": row[7],
391
+ "reference_to": row[8],
392
+ "reference_by": row[9],
393
+ "datetime": row[10]
394
+ }
395
+ except Exception as e:
396
+ logging.error(e)
397
+ return None
398
+ finally:
399
+ self.connection.close()
400
+ self.connection = None
401
+
402
+ def update_code_structure(
403
+ self,
404
+ id: int,
405
+ name: str,
406
+ path: str,
407
+ start_lineno: int,
408
+ end_lineno: int,
409
+ parent: str = None,
410
+ doc_string: str = None,
411
+ params: str = None,
412
+ reference_to: str = None,
413
+ reference_by: str = None
414
+ ) -> bool:
415
+ """Update an existing code structure entry."""
416
+ res = self._connect_to_db()
417
+ if not res:
418
+ return False
419
+ res = self._ensure_tables()
420
+ if not res:
421
+ return False
422
+ try:
423
+ cursor = self.connection.cursor()
424
+ cursor.execute(
425
+ code_structure_update_query,
426
+ (name, path, start_lineno, end_lineno, parent, doc_string, params, reference_to, reference_by, id)
427
+ )
428
+ self.connection.commit()
429
+ return cursor.rowcount > 0
430
+ except Exception as e:
431
+ logging.error(e)
432
+ return False
433
+ finally:
434
+ self.connection.close()
435
+ self.connection = None
436
+
437
+ def select_by_parent(self, parent: str, path: str | None = None) -> List[Dict[str, Any]]:
438
+ """Select all code structures by parent."""
439
+ res = self._connect_to_db()
440
+ if not res:
441
+ return []
442
+ res = self._ensure_tables()
443
+ if not res:
444
+ return []
445
+ try:
446
+ cursor = self.connection.cursor()
447
+ if path is not None:
448
+ cursor.execute(code_structure_select_by_parent_and_parentpath_query, (parent, path))
449
+ else:
450
+ cursor.execute(code_structure_select_by_parent_query, (parent,))
451
+ rows = cursor.fetchall()
452
+ return [
453
+ {
454
+ "id": row[0],
455
+ "name": row[1],
456
+ "path": row[2],
457
+ "start_lineno": row[3],
458
+ "end_lineno": row[4],
459
+ "parent": row[5],
460
+ "doc_string": row[6],
461
+ "params": row[7],
462
+ "reference_to": row[8],
463
+ "reference_by": row[9],
464
+ "datetime": row[10]
465
+ }
466
+ for row in rows
467
+ ]
468
+ except Exception as e:
469
+ logging.error(e)
470
+ return []
471
+ finally:
472
+ self.connection.close()
473
+ self.connection = None
474
+
475
+ def delete_code_structure(self, id: int) -> bool:
476
+ """Delete a code structure entry by ID."""
477
+ res = self._connect_to_db()
478
+ if not res:
479
+ return False
480
+ res = self._ensure_tables()
481
+ if not res:
482
+ return False
483
+ try:
484
+ cursor = self.connection.cursor()
485
+ cursor.execute(code_structure_delete_query, (id,))
486
+ self.connection.commit()
487
+ return cursor.rowcount > 0
488
+ except Exception as e:
489
+ logging.error(e)
490
+ return False
491
+ finally:
492
+ self.connection.close()
493
+ self.connection = None
494
+
495
+ def get_db_file(self) -> str:
496
+ """Get the database file path."""
497
+ db_path = os.environ.get("DATA_FOLDER", "./data")
498
+ db_path = os.path.join(db_path, "databases")
499
+ db_path = os.path.join(db_path, f"{self.author}_{self.repo_name}.db")
500
+ return db_path
@@ -0,0 +1,146 @@
1
+
2
+ import sqlite3
3
+ from sqlite3 import Connection
4
+ import os
5
+ from time import strftime
6
+ from typing import Optional
7
+ import logging
8
+ from string import Template
9
+ import json
10
+
11
+ from bioguider.utils.constants import DEFAULT_TOKEN_USAGE
12
+
13
+ logging = logging.getLogger(__name__)
14
+
15
+ SUMMARIZED_FILES_TABLE_NAME = "SummarizedFiles"
16
+
17
+ summarized_files_create_table_query = f"""
18
+ CREATE TABLE IF NOT EXISTS {SUMMARIZED_FILES_TABLE_NAME} (
19
+ file_path VARCHAR(512),
20
+ instruction TEXT,
21
+ summarize_prompt TEXT,
22
+ summarize_level INTEGER,
23
+ summarized_text TEXT,
24
+ token_usage VARCHAR(512),
25
+ datetime TEXT NOT NULL DEFAULT (strftime('%Y-%m-%d %H:%M:%f', 'now')),
26
+ UNIQUE (file_path, instruction, summarize_level, summarize_prompt)
27
+ );
28
+ """
29
+ summarized_files_upsert_query = f"""
30
+ INSERT INTO {SUMMARIZED_FILES_TABLE_NAME}(file_path, instruction, summarize_level, summarize_prompt, summarized_text, token_usage, datetime)
31
+ VALUES (?, ?, ?, ?, ?, ?, strftime('%Y-%m-%d %H:%M:%f', 'now'))
32
+ ON CONFLICT(file_path, instruction, summarize_level, summarize_prompt) DO UPDATE SET summarized_text=excluded.summarized_text,
33
+ datetime=strftime('%Y-%m-%d %H:%M:%f', 'now');
34
+ """
35
+ summarized_files_select_query = f"""
36
+ SELECT summarized_text, datetime FROM {SUMMARIZED_FILES_TABLE_NAME}
37
+ where file_path = ? and instruction = ? and summarize_level = ? and summarize_prompt=?;
38
+ """
39
+
40
+ class SummarizedFilesDb:
41
+ def __init__(self, author: str, repo_name: str, data_folder: str = None):
42
+ self.author = author
43
+ self.repo_name = repo_name
44
+ self.connection: Connection | None = None
45
+ self.data_folder = data_folder
46
+
47
+ def _ensure_tables(self) -> bool:
48
+ if self.connection is None:
49
+ return False
50
+ try:
51
+ cursor = self.connection.cursor()
52
+ cursor.execute(
53
+ summarized_files_create_table_query
54
+ )
55
+ self.connection.commit()
56
+ return True
57
+ except Exception as e:
58
+ logging.error(e)
59
+ return False
60
+
61
+ def _connect_to_db(self) -> bool:
62
+ if self.connection is not None:
63
+ return True
64
+ db_path = self.data_folder
65
+ if db_path is None:
66
+ db_path = os.environ.get("DATA_FOLDER", "./data")
67
+ db_path = os.path.join(db_path, "databases")
68
+ # Ensure the local path exists
69
+ try:
70
+ os.makedirs(db_path, exist_ok=True)
71
+ except Exception as e:
72
+ logging.error(e)
73
+ return False
74
+ db_path = os.path.join(db_path, f"{self.author}_{self.repo_name}_summarized_file.db")
75
+ if not os.path.exists(db_path):
76
+ try:
77
+ with open(db_path, "w"):
78
+ pass
79
+ except Exception as e:
80
+ logging.error(e)
81
+ return False
82
+ self.connection = sqlite3.connect(db_path)
83
+ return True
84
+
85
+ def upsert_summarized_file(
86
+ self,
87
+ file_path: str,
88
+ instruction: str,
89
+ summarize_level: int,
90
+ summarize_prompt: str,
91
+ summarized_text: str,
92
+ token_usage: dict | None = None
93
+ ):
94
+ token_usage = token_usage if token_usage is not None else {**DEFAULT_TOKEN_USAGE}
95
+ token_usage = json.dumps(token_usage)
96
+ res = self._connect_to_db()
97
+ assert res
98
+ res = self._ensure_tables()
99
+ assert res
100
+ try:
101
+ cursor = self.connection.cursor()
102
+ cursor.execute(
103
+ summarized_files_upsert_query,
104
+ (file_path, instruction, summarize_level, summarize_prompt, summarized_text, token_usage, )
105
+ )
106
+ self.connection.commit()
107
+ return True
108
+ except Exception as e:
109
+ logging.error(e)
110
+ return False
111
+ finally:
112
+ self.connection.close()
113
+ self.connection = None
114
+
115
+ def select_summarized_text(
116
+ self,
117
+ file_path: str,
118
+ instruction: str,
119
+ summarize_level: int,
120
+ summarize_prompt: str = "N/A",
121
+ ) -> str | None:
122
+ self._connect_to_db()
123
+ self._ensure_tables()
124
+ try:
125
+ cursor = self.connection.cursor()
126
+ cursor.execute(
127
+ summarized_files_select_query,
128
+ (file_path, instruction, summarize_level, summarize_prompt,)
129
+ )
130
+ row = cursor.fetchone()
131
+ if row is None:
132
+ return None
133
+ return row[0]
134
+ except Exception as e:
135
+ logging.error(e)
136
+ return None
137
+ finally:
138
+ self.connection.close()
139
+ self.connection = None
140
+
141
+ def get_db_file(self):
142
+ db_path = os.environ.get("DATA_FOLDER", "./data")
143
+ db_path = os.path.join(db_path, f"{self.author}_{self.repo_name}.db")
144
+ return db_path
145
+
146
+
@@ -0,0 +1,39 @@
1
+ from .models import (
2
+ EvaluationReport,
3
+ SuggestionItem,
4
+ StyleProfile,
5
+ PlannedEdit,
6
+ DocumentPlan,
7
+ OutputArtifact,
8
+ GenerationManifest,
9
+ )
10
+ from .report_loader import EvaluationReportLoader
11
+ from .suggestion_extractor import SuggestionExtractor
12
+ from .repo_reader import RepoReader
13
+ from .style_analyzer import StyleAnalyzer
14
+ from .change_planner import ChangePlanner
15
+ from .document_renderer import DocumentRenderer
16
+ from .output_manager import OutputManager
17
+ from .llm_content_generator import LLMContentGenerator
18
+ from .llm_cleaner import LLMCleaner
19
+
20
+ __all__ = [
21
+ "EvaluationReport",
22
+ "SuggestionItem",
23
+ "StyleProfile",
24
+ "PlannedEdit",
25
+ "DocumentPlan",
26
+ "OutputArtifact",
27
+ "GenerationManifest",
28
+ "EvaluationReportLoader",
29
+ "SuggestionExtractor",
30
+ "RepoReader",
31
+ "StyleAnalyzer",
32
+ "ChangePlanner",
33
+ "DocumentRenderer",
34
+ "OutputManager",
35
+ "LLMContentGenerator",
36
+ "LLMCleaner",
37
+ ]
38
+
39
+