@aj-archipelago/cortex 1.4.2 → 1.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. package/README.md +1 -0
  2. package/config.js +1 -1
  3. package/helper-apps/cortex-autogen2/.dockerignore +1 -0
  4. package/helper-apps/cortex-autogen2/Dockerfile +6 -10
  5. package/helper-apps/cortex-autogen2/Dockerfile.worker +2 -0
  6. package/helper-apps/cortex-autogen2/agents.py +203 -2
  7. package/helper-apps/cortex-autogen2/main.py +1 -1
  8. package/helper-apps/cortex-autogen2/pyproject.toml +12 -0
  9. package/helper-apps/cortex-autogen2/requirements.txt +14 -0
  10. package/helper-apps/cortex-autogen2/services/redis_publisher.py +1 -1
  11. package/helper-apps/cortex-autogen2/services/run_analyzer.py +1 -1
  12. package/helper-apps/cortex-autogen2/task_processor.py +431 -229
  13. package/helper-apps/cortex-autogen2/test_entity_fetcher.py +305 -0
  14. package/helper-apps/cortex-autogen2/tests/README.md +240 -0
  15. package/helper-apps/cortex-autogen2/tests/TEST_REPORT.md +342 -0
  16. package/helper-apps/cortex-autogen2/tests/__init__.py +8 -0
  17. package/helper-apps/cortex-autogen2/tests/analysis/__init__.py +1 -0
  18. package/helper-apps/cortex-autogen2/tests/analysis/improvement_suggester.py +224 -0
  19. package/helper-apps/cortex-autogen2/tests/analysis/trend_analyzer.py +211 -0
  20. package/helper-apps/cortex-autogen2/tests/cli/__init__.py +1 -0
  21. package/helper-apps/cortex-autogen2/tests/cli/run_tests.py +296 -0
  22. package/helper-apps/cortex-autogen2/tests/collectors/__init__.py +1 -0
  23. package/helper-apps/cortex-autogen2/tests/collectors/log_collector.py +252 -0
  24. package/helper-apps/cortex-autogen2/tests/collectors/progress_collector.py +182 -0
  25. package/helper-apps/cortex-autogen2/tests/conftest.py +15 -0
  26. package/helper-apps/cortex-autogen2/tests/database/__init__.py +1 -0
  27. package/helper-apps/cortex-autogen2/tests/database/repository.py +501 -0
  28. package/helper-apps/cortex-autogen2/tests/database/schema.sql +108 -0
  29. package/helper-apps/cortex-autogen2/tests/evaluators/__init__.py +1 -0
  30. package/helper-apps/cortex-autogen2/tests/evaluators/llm_scorer.py +294 -0
  31. package/helper-apps/cortex-autogen2/tests/evaluators/prompts.py +250 -0
  32. package/helper-apps/cortex-autogen2/tests/evaluators/wordcloud_validator.py +168 -0
  33. package/helper-apps/cortex-autogen2/tests/metrics/__init__.py +1 -0
  34. package/helper-apps/cortex-autogen2/tests/metrics/collector.py +155 -0
  35. package/helper-apps/cortex-autogen2/tests/orchestrator.py +576 -0
  36. package/helper-apps/cortex-autogen2/tests/test_cases.yaml +279 -0
  37. package/helper-apps/cortex-autogen2/tests/test_data.db +0 -0
  38. package/helper-apps/cortex-autogen2/tests/utils/__init__.py +3 -0
  39. package/helper-apps/cortex-autogen2/tests/utils/connectivity.py +112 -0
  40. package/helper-apps/cortex-autogen2/tools/azure_blob_tools.py +74 -24
  41. package/helper-apps/cortex-autogen2/tools/entity_api_registry.json +38 -0
  42. package/helper-apps/cortex-autogen2/tools/file_tools.py +1 -1
  43. package/helper-apps/cortex-autogen2/tools/search_tools.py +436 -238
  44. package/helper-apps/cortex-file-handler/package-lock.json +2 -2
  45. package/helper-apps/cortex-file-handler/package.json +1 -1
  46. package/helper-apps/cortex-file-handler/scripts/setup-test-containers.js +4 -5
  47. package/helper-apps/cortex-file-handler/src/blobHandler.js +36 -144
  48. package/helper-apps/cortex-file-handler/src/services/FileConversionService.js +5 -3
  49. package/helper-apps/cortex-file-handler/src/services/storage/AzureStorageProvider.js +34 -1
  50. package/helper-apps/cortex-file-handler/src/services/storage/GCSStorageProvider.js +22 -0
  51. package/helper-apps/cortex-file-handler/src/services/storage/LocalStorageProvider.js +28 -1
  52. package/helper-apps/cortex-file-handler/src/services/storage/StorageFactory.js +29 -4
  53. package/helper-apps/cortex-file-handler/src/services/storage/StorageProvider.js +11 -0
  54. package/helper-apps/cortex-file-handler/src/services/storage/StorageService.js +1 -1
  55. package/helper-apps/cortex-file-handler/tests/blobHandler.test.js +3 -2
  56. package/helper-apps/cortex-file-handler/tests/checkHashShortLived.test.js +8 -1
  57. package/helper-apps/cortex-file-handler/tests/containerConversionFlow.test.js +5 -2
  58. package/helper-apps/cortex-file-handler/tests/containerNameParsing.test.js +14 -7
  59. package/helper-apps/cortex-file-handler/tests/containerParameterFlow.test.js +5 -2
  60. package/helper-apps/cortex-file-handler/tests/storage/StorageFactory.test.js +31 -19
  61. package/package.json +1 -1
  62. package/server/modelExecutor.js +4 -0
  63. package/server/plugins/claude4VertexPlugin.js +540 -0
  64. package/server/plugins/openAiWhisperPlugin.js +43 -2
  65. package/tests/integration/rest/vendors/claude_streaming.test.js +121 -0
  66. package/tests/unit/plugins/claude4VertexPlugin.test.js +462 -0
  67. package/tests/unit/plugins/claude4VertexToolConversion.test.js +413 -0
  68. package/helper-apps/cortex-autogen/.funcignore +0 -8
  69. package/helper-apps/cortex-autogen/Dockerfile +0 -10
  70. package/helper-apps/cortex-autogen/OAI_CONFIG_LIST +0 -6
  71. package/helper-apps/cortex-autogen/agents.py +0 -493
  72. package/helper-apps/cortex-autogen/agents_extra.py +0 -14
  73. package/helper-apps/cortex-autogen/config.py +0 -18
  74. package/helper-apps/cortex-autogen/data_operations.py +0 -29
  75. package/helper-apps/cortex-autogen/function_app.py +0 -44
  76. package/helper-apps/cortex-autogen/host.json +0 -15
  77. package/helper-apps/cortex-autogen/main.py +0 -38
  78. package/helper-apps/cortex-autogen/prompts.py +0 -196
  79. package/helper-apps/cortex-autogen/prompts_extra.py +0 -5
  80. package/helper-apps/cortex-autogen/requirements.txt +0 -9
  81. package/helper-apps/cortex-autogen/search.py +0 -85
  82. package/helper-apps/cortex-autogen/test.sh +0 -40
  83. package/helper-apps/cortex-autogen/tools/sasfileuploader.py +0 -66
  84. package/helper-apps/cortex-autogen/utils.py +0 -88
  85. package/helper-apps/cortex-autogen2/DigiCertGlobalRootCA.crt.pem +0 -22
  86. package/helper-apps/cortex-autogen2/poetry.lock +0 -3652
@@ -0,0 +1,501 @@
1
+ """
2
+ Database repository for test results storage and retrieval.
3
+
4
+ This module provides a clean interface for storing and querying test data
5
+ from the SQLite database.
6
+ """
7
+
8
+ import sqlite3
9
+ import json
10
+ import os
11
+ from datetime import datetime
12
+ from typing import List, Dict, Optional, Any
13
+ from pathlib import Path
14
+
15
+
16
+ class TestRepository:
17
+ """Repository for managing test results in SQLite database."""
18
+
19
+ def __init__(self, db_path: Optional[str] = None):
20
+ """
21
+ Initialize the repository.
22
+
23
+ Args:
24
+ db_path: Path to SQLite database file. If None, uses default location.
25
+ """
26
+ if db_path is None:
27
+ db_dir = Path(__file__).parent
28
+ db_path = db_dir / "test_results.db"
29
+
30
+ self.db_path = str(db_path)
31
+ self._initialize_database()
32
+
33
+ def _initialize_database(self):
34
+ """Create database and tables if they don't exist."""
35
+ schema_path = Path(__file__).parent / "schema.sql"
36
+
37
+ with open(schema_path, 'r') as f:
38
+ schema = f.read()
39
+
40
+ conn = sqlite3.connect(self.db_path)
41
+ conn.executescript(schema)
42
+ conn.commit()
43
+ conn.close()
44
+
45
+ def _get_connection(self) -> sqlite3.Connection:
46
+ """Get a database connection with row factory."""
47
+ conn = sqlite3.connect(self.db_path)
48
+ conn.row_factory = sqlite3.Row
49
+ return conn
50
+
51
+ # ==================== Test Runs ====================
52
+
53
+ def create_test_run(
54
+ self,
55
+ test_case_id: str,
56
+ task_description: str,
57
+ request_id: str
58
+ ) -> int:
59
+ """
60
+ Create a new test run record.
61
+
62
+ Returns:
63
+ test_run_id: The ID of the created test run
64
+ """
65
+ conn = self._get_connection()
66
+ cursor = conn.cursor()
67
+
68
+ cursor.execute("""
69
+ INSERT INTO test_runs (test_case_id, task_description, request_id, started_at, status)
70
+ VALUES (?, ?, ?, ?, 'running')
71
+ """, (test_case_id, task_description, request_id, datetime.now()))
72
+
73
+ test_run_id = cursor.lastrowid
74
+ conn.commit()
75
+ conn.close()
76
+
77
+ return test_run_id
78
+
79
+ def update_test_run_status(
80
+ self,
81
+ test_run_id: int,
82
+ status: str,
83
+ completed_at: Optional[datetime] = None,
84
+ error_message: Optional[str] = None
85
+ ):
86
+ """Update test run status and completion time."""
87
+ conn = self._get_connection()
88
+ cursor = conn.cursor()
89
+
90
+ if completed_at is None and status in ('completed', 'failed', 'timeout'):
91
+ completed_at = datetime.now()
92
+
93
+ # Calculate duration if completed
94
+ duration_seconds = None
95
+ if completed_at:
96
+ cursor.execute("SELECT started_at FROM test_runs WHERE id = ?", (test_run_id,))
97
+ row = cursor.fetchone()
98
+ if row:
99
+ started_at = datetime.fromisoformat(row['started_at'])
100
+ duration_seconds = (completed_at - started_at).total_seconds()
101
+
102
+ cursor.execute("""
103
+ UPDATE test_runs
104
+ SET status = ?, completed_at = ?, duration_seconds = ?, error_message = ?
105
+ WHERE id = ?
106
+ """, (status, completed_at, duration_seconds, error_message, test_run_id))
107
+
108
+ conn.commit()
109
+ conn.close()
110
+
111
+ def save_final_response(self, test_run_id: int, final_response: str):
112
+ """
113
+ Save the final response message sent to the user.
114
+
115
+ This stores the complete final message including file URLs,
116
+ making it easy to retrieve outputs from any test run.
117
+
118
+ Args:
119
+ test_run_id: ID of the test run
120
+ final_response: The complete final message text with URLs
121
+ """
122
+ conn = self._get_connection()
123
+ cursor = conn.cursor()
124
+
125
+ cursor.execute("""
126
+ UPDATE test_runs
127
+ SET final_response = ?
128
+ WHERE id = ?
129
+ """, (final_response, test_run_id))
130
+
131
+ conn.commit()
132
+ conn.close()
133
+
134
+ def get_test_run(self, test_run_id: int) -> Optional[Dict[str, Any]]:
135
+ """Get test run by ID."""
136
+ conn = self._get_connection()
137
+ cursor = conn.cursor()
138
+
139
+ cursor.execute("SELECT * FROM test_runs WHERE id = ?", (test_run_id,))
140
+ row = cursor.fetchone()
141
+ conn.close()
142
+
143
+ return dict(row) if row else None
144
+
145
+ def get_recent_runs(self, test_case_id: Optional[str] = None, limit: int = 10) -> List[Dict[str, Any]]:
146
+ """Get recent test runs, optionally filtered by test case."""
147
+ conn = self._get_connection()
148
+ cursor = conn.cursor()
149
+
150
+ if test_case_id:
151
+ cursor.execute("""
152
+ SELECT * FROM test_runs
153
+ WHERE test_case_id = ?
154
+ ORDER BY created_at DESC
155
+ LIMIT ?
156
+ """, (test_case_id, limit))
157
+ else:
158
+ cursor.execute("""
159
+ SELECT * FROM test_runs
160
+ ORDER BY created_at DESC
161
+ LIMIT ?
162
+ """, (limit,))
163
+
164
+ rows = cursor.fetchall()
165
+ conn.close()
166
+
167
+ return [dict(row) for row in rows]
168
+
169
+ # ==================== Progress Updates ====================
170
+
171
+ def add_progress_update(
172
+ self,
173
+ test_run_id: int,
174
+ timestamp: datetime,
175
+ progress: float,
176
+ info: str,
177
+ is_final: bool = False
178
+ ):
179
+ """Add a progress update to the database."""
180
+ conn = self._get_connection()
181
+ cursor = conn.cursor()
182
+
183
+ cursor.execute("""
184
+ INSERT INTO progress_updates (test_run_id, timestamp, progress, info, is_final)
185
+ VALUES (?, ?, ?, ?, ?)
186
+ """, (test_run_id, timestamp, progress, info, is_final))
187
+
188
+ conn.commit()
189
+ conn.close()
190
+
191
+ def get_progress_updates(self, test_run_id: int) -> List[Dict[str, Any]]:
192
+ """Get all progress updates for a test run."""
193
+ conn = self._get_connection()
194
+ cursor = conn.cursor()
195
+
196
+ cursor.execute("""
197
+ SELECT * FROM progress_updates
198
+ WHERE test_run_id = ?
199
+ ORDER BY timestamp ASC
200
+ """, (test_run_id,))
201
+
202
+ rows = cursor.fetchall()
203
+ conn.close()
204
+
205
+ return [dict(row) for row in rows]
206
+
207
+ # ==================== Logs ====================
208
+
209
+ def add_log(
210
+ self,
211
+ test_run_id: int,
212
+ timestamp: datetime,
213
+ level: str,
214
+ agent: Optional[str],
215
+ message: str
216
+ ):
217
+ """Add a log entry."""
218
+ conn = self._get_connection()
219
+ cursor = conn.cursor()
220
+
221
+ cursor.execute("""
222
+ INSERT INTO logs (test_run_id, timestamp, level, agent, message)
223
+ VALUES (?, ?, ?, ?, ?)
224
+ """, (test_run_id, timestamp, level, agent, message))
225
+
226
+ conn.commit()
227
+ conn.close()
228
+
229
+ def get_logs(self, test_run_id: int, level: Optional[str] = None) -> List[Dict[str, Any]]:
230
+ """Get logs for a test run, optionally filtered by level."""
231
+ conn = self._get_connection()
232
+ cursor = conn.cursor()
233
+
234
+ if level:
235
+ cursor.execute("""
236
+ SELECT * FROM logs
237
+ WHERE test_run_id = ? AND level = ?
238
+ ORDER BY timestamp ASC
239
+ """, (test_run_id, level))
240
+ else:
241
+ cursor.execute("""
242
+ SELECT * FROM logs
243
+ WHERE test_run_id = ?
244
+ ORDER BY timestamp ASC
245
+ """, (test_run_id,))
246
+
247
+ rows = cursor.fetchall()
248
+ conn.close()
249
+
250
+ return [dict(row) for row in rows]
251
+
252
+ # ==================== Files ====================
253
+
254
+ def add_file(
255
+ self,
256
+ test_run_id: int,
257
+ file_path: str,
258
+ file_type: str,
259
+ file_size_bytes: Optional[int] = None,
260
+ sas_url: Optional[str] = None
261
+ ):
262
+ """Add a file created during test execution."""
263
+ conn = self._get_connection()
264
+ cursor = conn.cursor()
265
+
266
+ cursor.execute("""
267
+ INSERT INTO files_created (test_run_id, file_path, file_type, file_size_bytes, sas_url)
268
+ VALUES (?, ?, ?, ?, ?)
269
+ """, (test_run_id, file_path, file_type, file_size_bytes, sas_url))
270
+
271
+ conn.commit()
272
+ conn.close()
273
+
274
+ def get_files(self, test_run_id: int) -> List[Dict[str, Any]]:
275
+ """Get all files created during a test run."""
276
+ conn = self._get_connection()
277
+ cursor = conn.cursor()
278
+
279
+ cursor.execute("""
280
+ SELECT * FROM files_created
281
+ WHERE test_run_id = ?
282
+ ORDER BY created_at ASC
283
+ """, (test_run_id,))
284
+
285
+ rows = cursor.fetchall()
286
+ conn.close()
287
+
288
+ return [dict(row) for row in rows]
289
+
290
+ # ==================== Evaluations ====================
291
+
292
+ def save_evaluation(
293
+ self,
294
+ test_run_id: int,
295
+ progress_score: int,
296
+ output_score: int,
297
+ progress_reasoning: str,
298
+ output_reasoning: str,
299
+ progress_issues: Optional[List[str]] = None,
300
+ output_strengths: Optional[List[str]] = None,
301
+ output_weaknesses: Optional[List[str]] = None
302
+ ):
303
+ """Save evaluation scores and reasoning."""
304
+ overall_score = int((output_score * 0.8) + (progress_score * 0.2))
305
+
306
+ conn = self._get_connection()
307
+ cursor = conn.cursor()
308
+
309
+ cursor.execute("""
310
+ INSERT OR REPLACE INTO evaluations (
311
+ test_run_id, progress_score, output_score, overall_score,
312
+ progress_reasoning, output_reasoning,
313
+ progress_issues, output_strengths, output_weaknesses
314
+ )
315
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
316
+ """, (
317
+ test_run_id, progress_score, output_score, overall_score,
318
+ progress_reasoning, output_reasoning,
319
+ json.dumps(progress_issues or []),
320
+ json.dumps(output_strengths or []),
321
+ json.dumps(output_weaknesses or [])
322
+ ))
323
+
324
+ conn.commit()
325
+ conn.close()
326
+
327
+ def get_evaluation(self, test_run_id: int) -> Optional[Dict[str, Any]]:
328
+ """Get evaluation for a test run."""
329
+ conn = self._get_connection()
330
+ cursor = conn.cursor()
331
+
332
+ cursor.execute("SELECT * FROM evaluations WHERE test_run_id = ?", (test_run_id,))
333
+ row = cursor.fetchone()
334
+ conn.close()
335
+
336
+ if row:
337
+ result = dict(row)
338
+ # Parse JSON fields
339
+ result['progress_issues'] = json.loads(result['progress_issues'])
340
+ result['output_strengths'] = json.loads(result['output_strengths'])
341
+ result['output_weaknesses'] = json.loads(result['output_weaknesses'])
342
+ return result
343
+
344
+ return None
345
+
346
+ # ==================== Metrics ====================
347
+
348
+ def save_metrics(
349
+ self,
350
+ test_run_id: int,
351
+ time_to_first_progress: float,
352
+ time_to_completion: float,
353
+ total_progress_updates: int,
354
+ avg_update_interval: float,
355
+ min_update_interval: float,
356
+ max_update_interval: float,
357
+ files_created: int,
358
+ sas_urls_provided: int,
359
+ errors_count: int,
360
+ warnings_count: int
361
+ ):
362
+ """Save performance metrics."""
363
+ conn = self._get_connection()
364
+ cursor = conn.cursor()
365
+
366
+ cursor.execute("""
367
+ INSERT OR REPLACE INTO metrics (
368
+ test_run_id, time_to_first_progress, time_to_completion,
369
+ total_progress_updates, avg_update_interval,
370
+ min_update_interval, max_update_interval,
371
+ files_created, sas_urls_provided,
372
+ errors_count, warnings_count
373
+ )
374
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
375
+ """, (
376
+ test_run_id, time_to_first_progress, time_to_completion,
377
+ total_progress_updates, avg_update_interval,
378
+ min_update_interval, max_update_interval,
379
+ files_created, sas_urls_provided,
380
+ errors_count, warnings_count
381
+ ))
382
+
383
+ conn.commit()
384
+ conn.close()
385
+
386
+ def get_metrics(self, test_run_id: int) -> Optional[Dict[str, Any]]:
387
+ """Get metrics for a test run."""
388
+ conn = self._get_connection()
389
+ cursor = conn.cursor()
390
+
391
+ cursor.execute("SELECT * FROM metrics WHERE test_run_id = ?", (test_run_id,))
392
+ row = cursor.fetchone()
393
+ conn.close()
394
+
395
+ return dict(row) if row else None
396
+
397
+ # ==================== Suggestions ====================
398
+
399
+ def add_suggestion(
400
+ self,
401
+ test_run_id: int,
402
+ suggestion: str,
403
+ category: str = 'other',
404
+ priority: str = 'medium',
405
+ code_reference: Optional[str] = None
406
+ ):
407
+ """Add an improvement suggestion."""
408
+ conn = self._get_connection()
409
+ cursor = conn.cursor()
410
+
411
+ cursor.execute("""
412
+ INSERT INTO suggestions (test_run_id, suggestion, category, priority, code_reference)
413
+ VALUES (?, ?, ?, ?, ?)
414
+ """, (test_run_id, suggestion, category, priority, code_reference))
415
+
416
+ conn.commit()
417
+ conn.close()
418
+
419
+ def get_suggestions(self, test_run_id: int) -> List[Dict[str, Any]]:
420
+ """Get all suggestions for a test run."""
421
+ conn = self._get_connection()
422
+ cursor = conn.cursor()
423
+
424
+ cursor.execute("""
425
+ SELECT * FROM suggestions
426
+ WHERE test_run_id = ?
427
+ ORDER BY priority DESC, created_at ASC
428
+ """, (test_run_id,))
429
+
430
+ rows = cursor.fetchall()
431
+ conn.close()
432
+
433
+ return [dict(row) for row in rows]
434
+
435
+ # ==================== Analytics ====================
436
+
437
+ def get_average_scores(self, test_case_id: Optional[str] = None, limit: int = 10) -> Dict[str, float]:
438
+ """Get average scores for recent test runs."""
439
+ conn = self._get_connection()
440
+ cursor = conn.cursor()
441
+
442
+ if test_case_id:
443
+ cursor.execute("""
444
+ SELECT AVG(e.progress_score) as avg_progress,
445
+ AVG(e.output_score) as avg_output,
446
+ AVG(e.overall_score) as avg_overall
447
+ FROM evaluations e
448
+ JOIN test_runs tr ON e.test_run_id = tr.id
449
+ WHERE tr.test_case_id = ? AND tr.status = 'completed'
450
+ AND tr.id IN (
451
+ SELECT id FROM test_runs
452
+ WHERE test_case_id = ?
453
+ ORDER BY created_at DESC
454
+ LIMIT ?
455
+ )
456
+ """, (test_case_id, test_case_id, limit))
457
+ else:
458
+ cursor.execute("""
459
+ SELECT AVG(e.progress_score) as avg_progress,
460
+ AVG(e.output_score) as avg_output,
461
+ AVG(e.overall_score) as avg_overall
462
+ FROM evaluations e
463
+ JOIN test_runs tr ON e.test_run_id = tr.id
464
+ WHERE tr.status = 'completed'
465
+ AND tr.id IN (
466
+ SELECT id FROM test_runs
467
+ ORDER BY created_at DESC
468
+ LIMIT ?
469
+ )
470
+ """, (limit,))
471
+
472
+ row = cursor.fetchone()
473
+ conn.close()
474
+
475
+ if row:
476
+ return {
477
+ 'avg_progress_score': row['avg_progress'] or 0,
478
+ 'avg_output_score': row['avg_output'] or 0,
479
+ 'avg_overall_score': row['avg_overall'] or 0
480
+ }
481
+
482
+ return {'avg_progress_score': 0, 'avg_output_score': 0, 'avg_overall_score': 0}
483
+
484
+ def get_score_trend(self, test_case_id: str, limit: int = 20) -> List[Dict[str, Any]]:
485
+ """Get score trend over time for a specific test case."""
486
+ conn = self._get_connection()
487
+ cursor = conn.cursor()
488
+
489
+ cursor.execute("""
490
+ SELECT tr.created_at, e.progress_score, e.output_score, e.overall_score
491
+ FROM test_runs tr
492
+ JOIN evaluations e ON tr.id = e.test_run_id
493
+ WHERE tr.test_case_id = ? AND tr.status = 'completed'
494
+ ORDER BY tr.created_at ASC
495
+ LIMIT ?
496
+ """, (test_case_id, limit))
497
+
498
+ rows = cursor.fetchall()
499
+ conn.close()
500
+
501
+ return [dict(row) for row in rows]
@@ -0,0 +1,108 @@
1
+ -- Cortex AutoGen2 Test Results Database Schema
2
+ -- SQLite database for storing test runs, evaluations, and metrics
3
+
4
+ -- Test runs table - stores information about each test execution
5
+ CREATE TABLE IF NOT EXISTS test_runs (
6
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
7
+ test_case_id TEXT NOT NULL,
8
+ task_description TEXT NOT NULL,
9
+ request_id TEXT UNIQUE,
10
+ started_at TIMESTAMP NOT NULL,
11
+ completed_at TIMESTAMP,
12
+ duration_seconds REAL,
13
+ status TEXT CHECK(status IN ('running', 'completed', 'failed', 'timeout')) NOT NULL DEFAULT 'running',
14
+ error_message TEXT,
15
+ final_response TEXT, -- Stores the complete final message sent to user with file URLs
16
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
17
+ );
18
+
19
+ -- Progress updates collected during test execution
20
+ CREATE TABLE IF NOT EXISTS progress_updates (
21
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
22
+ test_run_id INTEGER NOT NULL,
23
+ timestamp TIMESTAMP NOT NULL,
24
+ progress REAL,
25
+ info TEXT,
26
+ is_final BOOLEAN DEFAULT 0,
27
+ FOREIGN KEY (test_run_id) REFERENCES test_runs(id) ON DELETE CASCADE
28
+ );
29
+
30
+ -- Docker logs collected during test execution
31
+ CREATE TABLE IF NOT EXISTS logs (
32
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
33
+ test_run_id INTEGER NOT NULL,
34
+ timestamp TIMESTAMP NOT NULL,
35
+ level TEXT,
36
+ agent TEXT,
37
+ message TEXT,
38
+ FOREIGN KEY (test_run_id) REFERENCES test_runs(id) ON DELETE CASCADE
39
+ );
40
+
41
+ -- Files created during test execution
42
+ CREATE TABLE IF NOT EXISTS files_created (
43
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
44
+ test_run_id INTEGER NOT NULL,
45
+ file_path TEXT,
46
+ file_type TEXT,
47
+ file_size_bytes INTEGER,
48
+ sas_url TEXT,
49
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
50
+ FOREIGN KEY (test_run_id) REFERENCES test_runs(id) ON DELETE CASCADE
51
+ );
52
+
53
+ -- LLM-based evaluation scores
54
+ CREATE TABLE IF NOT EXISTS evaluations (
55
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
56
+ test_run_id INTEGER UNIQUE NOT NULL,
57
+ progress_score INTEGER CHECK(progress_score BETWEEN 0 AND 100),
58
+ output_score INTEGER CHECK(output_score BETWEEN 0 AND 100),
59
+ overall_score INTEGER CHECK(overall_score BETWEEN 0 AND 100),
60
+ progress_reasoning TEXT,
61
+ output_reasoning TEXT,
62
+ progress_issues TEXT, -- JSON array of issues found
63
+ output_strengths TEXT, -- JSON array of strengths
64
+ output_weaknesses TEXT, -- JSON array of weaknesses
65
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
66
+ FOREIGN KEY (test_run_id) REFERENCES test_runs(id) ON DELETE CASCADE
67
+ );
68
+
69
+ -- Performance metrics
70
+ CREATE TABLE IF NOT EXISTS metrics (
71
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
72
+ test_run_id INTEGER UNIQUE NOT NULL,
73
+ time_to_first_progress REAL,
74
+ time_to_completion REAL,
75
+ total_progress_updates INTEGER,
76
+ avg_update_interval REAL,
77
+ min_update_interval REAL,
78
+ max_update_interval REAL,
79
+ files_created INTEGER,
80
+ sas_urls_provided INTEGER,
81
+ errors_count INTEGER,
82
+ warnings_count INTEGER,
83
+ FOREIGN KEY (test_run_id) REFERENCES test_runs(id) ON DELETE CASCADE
84
+ );
85
+
86
+ -- Improvement suggestions from LLM analysis
87
+ CREATE TABLE IF NOT EXISTS suggestions (
88
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
89
+ test_run_id INTEGER NOT NULL,
90
+ suggestion TEXT NOT NULL,
91
+ category TEXT CHECK(category IN ('performance', 'quality', 'reliability', 'other')) DEFAULT 'other',
92
+ priority TEXT CHECK(priority IN ('high', 'medium', 'low')) DEFAULT 'medium',
93
+ code_reference TEXT, -- File path or agent name related to suggestion
94
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
95
+ FOREIGN KEY (test_run_id) REFERENCES test_runs(id) ON DELETE CASCADE
96
+ );
97
+
98
+ -- Create indexes for better query performance
99
+ CREATE INDEX IF NOT EXISTS idx_test_runs_test_case ON test_runs(test_case_id);
100
+ CREATE INDEX IF NOT EXISTS idx_test_runs_status ON test_runs(status);
101
+ CREATE INDEX IF NOT EXISTS idx_test_runs_created_at ON test_runs(created_at);
102
+ CREATE INDEX IF NOT EXISTS idx_progress_updates_run ON progress_updates(test_run_id);
103
+ CREATE INDEX IF NOT EXISTS idx_progress_updates_timestamp ON progress_updates(timestamp);
104
+ CREATE INDEX IF NOT EXISTS idx_logs_run ON logs(test_run_id);
105
+ CREATE INDEX IF NOT EXISTS idx_logs_level ON logs(level);
106
+ CREATE INDEX IF NOT EXISTS idx_files_created_run ON files_created(test_run_id);
107
+ CREATE INDEX IF NOT EXISTS idx_suggestions_run ON suggestions(test_run_id);
108
+ CREATE INDEX IF NOT EXISTS idx_suggestions_priority ON suggestions(priority);
@@ -0,0 +1 @@
1
+ """LLM-based evaluators for scoring test results."""