@aj-archipelago/cortex 1.4.2 → 1.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. package/README.md +1 -0
  2. package/config.js +1 -1
  3. package/helper-apps/cortex-autogen2/.dockerignore +1 -0
  4. package/helper-apps/cortex-autogen2/Dockerfile +6 -10
  5. package/helper-apps/cortex-autogen2/Dockerfile.worker +2 -0
  6. package/helper-apps/cortex-autogen2/agents.py +203 -2
  7. package/helper-apps/cortex-autogen2/main.py +1 -1
  8. package/helper-apps/cortex-autogen2/pyproject.toml +12 -0
  9. package/helper-apps/cortex-autogen2/requirements.txt +14 -0
  10. package/helper-apps/cortex-autogen2/services/redis_publisher.py +1 -1
  11. package/helper-apps/cortex-autogen2/services/run_analyzer.py +1 -1
  12. package/helper-apps/cortex-autogen2/task_processor.py +431 -229
  13. package/helper-apps/cortex-autogen2/test_entity_fetcher.py +305 -0
  14. package/helper-apps/cortex-autogen2/tests/README.md +240 -0
  15. package/helper-apps/cortex-autogen2/tests/TEST_REPORT.md +342 -0
  16. package/helper-apps/cortex-autogen2/tests/__init__.py +8 -0
  17. package/helper-apps/cortex-autogen2/tests/analysis/__init__.py +1 -0
  18. package/helper-apps/cortex-autogen2/tests/analysis/improvement_suggester.py +224 -0
  19. package/helper-apps/cortex-autogen2/tests/analysis/trend_analyzer.py +211 -0
  20. package/helper-apps/cortex-autogen2/tests/cli/__init__.py +1 -0
  21. package/helper-apps/cortex-autogen2/tests/cli/run_tests.py +296 -0
  22. package/helper-apps/cortex-autogen2/tests/collectors/__init__.py +1 -0
  23. package/helper-apps/cortex-autogen2/tests/collectors/log_collector.py +252 -0
  24. package/helper-apps/cortex-autogen2/tests/collectors/progress_collector.py +182 -0
  25. package/helper-apps/cortex-autogen2/tests/conftest.py +15 -0
  26. package/helper-apps/cortex-autogen2/tests/database/__init__.py +1 -0
  27. package/helper-apps/cortex-autogen2/tests/database/repository.py +501 -0
  28. package/helper-apps/cortex-autogen2/tests/database/schema.sql +108 -0
  29. package/helper-apps/cortex-autogen2/tests/evaluators/__init__.py +1 -0
  30. package/helper-apps/cortex-autogen2/tests/evaluators/llm_scorer.py +294 -0
  31. package/helper-apps/cortex-autogen2/tests/evaluators/prompts.py +250 -0
  32. package/helper-apps/cortex-autogen2/tests/evaluators/wordcloud_validator.py +168 -0
  33. package/helper-apps/cortex-autogen2/tests/metrics/__init__.py +1 -0
  34. package/helper-apps/cortex-autogen2/tests/metrics/collector.py +155 -0
  35. package/helper-apps/cortex-autogen2/tests/orchestrator.py +576 -0
  36. package/helper-apps/cortex-autogen2/tests/test_cases.yaml +279 -0
  37. package/helper-apps/cortex-autogen2/tests/test_data.db +0 -0
  38. package/helper-apps/cortex-autogen2/tests/utils/__init__.py +3 -0
  39. package/helper-apps/cortex-autogen2/tests/utils/connectivity.py +112 -0
  40. package/helper-apps/cortex-autogen2/tools/azure_blob_tools.py +74 -24
  41. package/helper-apps/cortex-autogen2/tools/entity_api_registry.json +38 -0
  42. package/helper-apps/cortex-autogen2/tools/file_tools.py +1 -1
  43. package/helper-apps/cortex-autogen2/tools/search_tools.py +436 -238
  44. package/helper-apps/cortex-file-handler/package-lock.json +2 -2
  45. package/helper-apps/cortex-file-handler/package.json +1 -1
  46. package/helper-apps/cortex-file-handler/scripts/setup-test-containers.js +4 -5
  47. package/helper-apps/cortex-file-handler/src/blobHandler.js +36 -144
  48. package/helper-apps/cortex-file-handler/src/services/FileConversionService.js +5 -3
  49. package/helper-apps/cortex-file-handler/src/services/storage/AzureStorageProvider.js +34 -1
  50. package/helper-apps/cortex-file-handler/src/services/storage/GCSStorageProvider.js +22 -0
  51. package/helper-apps/cortex-file-handler/src/services/storage/LocalStorageProvider.js +28 -1
  52. package/helper-apps/cortex-file-handler/src/services/storage/StorageFactory.js +29 -4
  53. package/helper-apps/cortex-file-handler/src/services/storage/StorageProvider.js +11 -0
  54. package/helper-apps/cortex-file-handler/src/services/storage/StorageService.js +1 -1
  55. package/helper-apps/cortex-file-handler/tests/blobHandler.test.js +3 -2
  56. package/helper-apps/cortex-file-handler/tests/checkHashShortLived.test.js +8 -1
  57. package/helper-apps/cortex-file-handler/tests/containerConversionFlow.test.js +5 -2
  58. package/helper-apps/cortex-file-handler/tests/containerNameParsing.test.js +14 -7
  59. package/helper-apps/cortex-file-handler/tests/containerParameterFlow.test.js +5 -2
  60. package/helper-apps/cortex-file-handler/tests/storage/StorageFactory.test.js +31 -19
  61. package/package.json +1 -1
  62. package/server/modelExecutor.js +4 -0
  63. package/server/plugins/claude4VertexPlugin.js +540 -0
  64. package/server/plugins/openAiWhisperPlugin.js +43 -2
  65. package/tests/integration/rest/vendors/claude_streaming.test.js +121 -0
  66. package/tests/unit/plugins/claude4VertexPlugin.test.js +462 -0
  67. package/tests/unit/plugins/claude4VertexToolConversion.test.js +413 -0
  68. package/helper-apps/cortex-autogen/.funcignore +0 -8
  69. package/helper-apps/cortex-autogen/Dockerfile +0 -10
  70. package/helper-apps/cortex-autogen/OAI_CONFIG_LIST +0 -6
  71. package/helper-apps/cortex-autogen/agents.py +0 -493
  72. package/helper-apps/cortex-autogen/agents_extra.py +0 -14
  73. package/helper-apps/cortex-autogen/config.py +0 -18
  74. package/helper-apps/cortex-autogen/data_operations.py +0 -29
  75. package/helper-apps/cortex-autogen/function_app.py +0 -44
  76. package/helper-apps/cortex-autogen/host.json +0 -15
  77. package/helper-apps/cortex-autogen/main.py +0 -38
  78. package/helper-apps/cortex-autogen/prompts.py +0 -196
  79. package/helper-apps/cortex-autogen/prompts_extra.py +0 -5
  80. package/helper-apps/cortex-autogen/requirements.txt +0 -9
  81. package/helper-apps/cortex-autogen/search.py +0 -85
  82. package/helper-apps/cortex-autogen/test.sh +0 -40
  83. package/helper-apps/cortex-autogen/tools/sasfileuploader.py +0 -66
  84. package/helper-apps/cortex-autogen/utils.py +0 -88
  85. package/helper-apps/cortex-autogen2/DigiCertGlobalRootCA.crt.pem +0 -22
  86. package/helper-apps/cortex-autogen2/poetry.lock +0 -3652
@@ -0,0 +1,211 @@
1
+ """
2
+ Trend analyzer for tracking quality metrics over time.
3
+
4
+ Analyzes historical test results to identify trends, regressions,
5
+ and improvements.
6
+ """
7
+
8
+ import logging
9
+ from typing import List, Dict, Optional
10
+ from tests.database.repository import TestRepository
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ class TrendAnalyzer:
16
+ """Analyzes trends in test scores and metrics over time."""
17
+
18
+ def __init__(self, db_path: Optional[str] = None):
19
+ """
20
+ Initialize the trend analyzer.
21
+
22
+ Args:
23
+ db_path: Path to SQLite database
24
+ """
25
+ self.db = TestRepository(db_path)
26
+
27
+ def get_score_trend(self, test_case_id: str, limit: int = 20) -> List[Dict]:
28
+ """
29
+ Get score trend for a test case.
30
+
31
+ Args:
32
+ test_case_id: Test case ID
33
+ limit: Number of historical runs to analyze
34
+
35
+ Returns:
36
+ List of score data points
37
+ """
38
+ return self.db.get_score_trend(test_case_id, limit=limit)
39
+
40
+ def detect_regression(self, test_case_id: str, threshold: int = 10) -> Optional[Dict]:
41
+ """
42
+ Detect if a regression has occurred.
43
+
44
+ Args:
45
+ test_case_id: Test case ID
46
+ threshold: Score drop threshold to consider regression (default: 10 points)
47
+
48
+ Returns:
49
+ Regression info dict if detected, None otherwise
50
+ """
51
+ trend = self.get_score_trend(test_case_id, limit=5)
52
+
53
+ if len(trend) < 2:
54
+ return None
55
+
56
+ # Compare latest score to previous average
57
+ latest_score = trend[-1]['overall_score']
58
+ previous_scores = [run['overall_score'] for run in trend[:-1]]
59
+ avg_previous = sum(previous_scores) / len(previous_scores)
60
+
61
+ drop = avg_previous - latest_score
62
+
63
+ if drop >= threshold:
64
+ return {
65
+ 'test_case_id': test_case_id,
66
+ 'latest_score': latest_score,
67
+ 'previous_avg': avg_previous,
68
+ 'drop': drop,
69
+ 'severity': 'high' if drop >= 20 else 'medium'
70
+ }
71
+
72
+ return None
73
+
74
+ def get_average_scores(
75
+ self,
76
+ test_case_id: Optional[str] = None,
77
+ limit: int = 10
78
+ ) -> Dict[str, float]:
79
+ """
80
+ Get average scores for recent runs.
81
+
82
+ Args:
83
+ test_case_id: Optional test case ID to filter by
84
+ limit: Number of runs to average
85
+
86
+ Returns:
87
+ Dict with average scores
88
+ """
89
+ return self.db.get_average_scores(test_case_id, limit=limit)
90
+
91
+ def compare_test_cases(self, limit: int = 10) -> List[Dict]:
92
+ """
93
+ Compare performance across different test cases.
94
+
95
+ Args:
96
+ limit: Number of recent runs per test case to analyze
97
+
98
+ Returns:
99
+ List of test case comparisons
100
+ """
101
+ # Get all unique test case IDs
102
+ recent_runs = self.db.get_recent_runs(limit=100)
103
+ test_case_ids = list(set(run['test_case_id'] for run in recent_runs))
104
+
105
+ comparisons = []
106
+
107
+ for test_case_id in test_case_ids:
108
+ scores = self.get_average_scores(test_case_id, limit=limit)
109
+ trend = self.get_score_trend(test_case_id, limit=limit)
110
+
111
+ # Calculate stability (variance in scores)
112
+ if len(trend) >= 3:
113
+ overall_scores = [run['overall_score'] for run in trend]
114
+ avg = sum(overall_scores) / len(overall_scores)
115
+ variance = sum((score - avg) ** 2 for score in overall_scores) / len(overall_scores)
116
+ stability = max(0, 100 - variance) # Higher is more stable
117
+ else:
118
+ stability = None
119
+
120
+ comparisons.append({
121
+ 'test_case_id': test_case_id,
122
+ 'avg_progress_score': scores['avg_progress_score'],
123
+ 'avg_output_score': scores['avg_output_score'],
124
+ 'avg_overall_score': scores['avg_overall_score'],
125
+ 'runs_count': len(trend),
126
+ 'stability': stability
127
+ })
128
+
129
+ # Sort by overall score
130
+ comparisons.sort(key=lambda x: x['avg_overall_score'], reverse=True)
131
+
132
+ return comparisons
133
+
134
+ def get_summary_report(self) -> Dict:
135
+ """
136
+ Generate comprehensive summary report.
137
+
138
+ Returns:
139
+ Summary statistics
140
+ """
141
+ # Get recent runs
142
+ recent_runs = self.db.get_recent_runs(limit=50)
143
+
144
+ if not recent_runs:
145
+ return {
146
+ 'total_runs': 0,
147
+ 'message': 'No test runs found'
148
+ }
149
+
150
+ # Overall statistics
151
+ overall_scores = self.get_average_scores(limit=20)
152
+
153
+ # Test case breakdown
154
+ test_case_comparisons = self.compare_test_cases(limit=10)
155
+
156
+ # Detect regressions
157
+ regressions = []
158
+ for comparison in test_case_comparisons:
159
+ test_case_id = comparison['test_case_id']
160
+ regression = self.detect_regression(test_case_id)
161
+ if regression:
162
+ regressions.append(regression)
163
+
164
+ # Success rate
165
+ completed = sum(1 for run in recent_runs if run['status'] == 'completed')
166
+ success_rate = (completed / len(recent_runs) * 100) if recent_runs else 0
167
+
168
+ return {
169
+ 'total_runs': len(recent_runs),
170
+ 'success_rate': success_rate,
171
+ 'overall_scores': overall_scores,
172
+ 'test_case_comparisons': test_case_comparisons,
173
+ 'regressions_detected': regressions,
174
+ 'regression_count': len(regressions)
175
+ }
176
+
177
+ def print_summary_report(self):
178
+ """Print formatted summary report to console."""
179
+ report = self.get_summary_report()
180
+
181
+ print("\n" + "=" * 80)
182
+ print("šŸ“Š Test Quality Summary Report")
183
+ print("=" * 80 + "\n")
184
+
185
+ print(f"Total Test Runs: {report['total_runs']}")
186
+ print(f"Success Rate: {report['success_rate']:.1f}%")
187
+ print(f"\nOverall Average Scores:")
188
+ print(f" Progress: {report['overall_scores']['avg_progress_score']:.1f}/100")
189
+ print(f" Output: {report['overall_scores']['avg_output_score']:.1f}/100")
190
+ print(f" Overall: {report['overall_scores']['avg_overall_score']:.1f}/100")
191
+
192
+ print(f"\nšŸ“‹ Test Case Performance:")
193
+ print(f"{'Test Case':<40} {'Overall':<10} {'Stability':<12} {'Runs'}")
194
+ print("─" * 75)
195
+
196
+ for tc in report['test_case_comparisons']:
197
+ test_case = tc['test_case_id'][:38]
198
+ overall = f"{tc['avg_overall_score']:.1f}"
199
+ stability = f"{tc['stability']:.1f}" if tc['stability'] else "N/A"
200
+ runs = tc['runs_count']
201
+
202
+ print(f"{test_case:<40} {overall:<10} {stability:<12} {runs}")
203
+
204
+ if report['regressions_detected']:
205
+ print(f"\nāš ļø Regressions Detected: {report['regression_count']}")
206
+ for reg in report['regressions_detected']:
207
+ print(f" • {reg['test_case_id']}: {reg['latest_score']:.1f} (down {reg['drop']:.1f} points)")
208
+ else:
209
+ print(f"\nāœ… No regressions detected")
210
+
211
+ print(f"\n{'=' * 80}\n")
@@ -0,0 +1 @@
1
+ """CLI interface for running tests."""
@@ -0,0 +1,296 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ CLI runner for Cortex AutoGen2 automated tests.
4
+
5
+ Usage:
6
+ python tests/cli/run_tests.py --all # Run all tests
7
+ python tests/cli/run_tests.py --test tc001_pokemon_pptx # Run specific test
8
+ python tests/cli/run_tests.py --history # View recent results
9
+ python tests/cli/run_tests.py --trend tc001_pokemon_pptx # View score trend
10
+ """
11
+
12
+ import os
13
+ import sys
14
+ import asyncio
15
+ import argparse
16
+ import logging
17
+ from pathlib import Path
18
+ from datetime import datetime
19
+
20
+ # Add parent directories to path
21
+ sys.path.insert(0, str(Path(__file__).parent.parent.parent))
22
+
23
+ from dotenv import load_dotenv
24
+ from tests.orchestrator import TestOrchestrator
25
+ from tests.database.repository import TestRepository
26
+ from tests.analysis.trend_analyzer import TrendAnalyzer
27
+
28
+ # Load environment variables
29
+ load_dotenv()
30
+
31
+ # Configure logging
32
+ logging.basicConfig(
33
+ level=logging.INFO,
34
+ format='%(asctime)s - %(levelname)s - %(message)s',
35
+ datefmt='%Y-%m-%d %H:%M:%S'
36
+ )
37
+
38
+ # Suppress verbose Azure SDK logging
39
+ logging.getLogger('azure').setLevel(logging.WARNING)
40
+ logging.getLogger('azure.core.pipeline.policies.http_logging_policy').setLevel(logging.WARNING)
41
+
42
+ logger = logging.getLogger(__name__)
43
+
44
+
45
+ def print_header():
46
+ """Print CLI header."""
47
+ print("\n" + "=" * 80)
48
+ print("🧪 Cortex AutoGen2 - Automated Quality Testing Suite")
49
+ print("=" * 80 + "\n")
50
+
51
+
52
+ def print_test_result(result: dict):
53
+ """Print formatted test result."""
54
+ print(f"\n{'─' * 80}")
55
+ print(f"šŸ“‹ Test: {result.get('test_case_id', 'unknown')}")
56
+ print(f"{'─' * 80}")
57
+ print(f"Status: {result.get('status', 'unknown')}")
58
+ print(f"Duration: {result.get('duration_seconds', 0):.1f}s")
59
+ print(f"Progress Updates: {result.get('progress_updates_count', 0)}")
60
+ print(f"Files Created: {result.get('files_created_count', 0)}")
61
+
62
+ # Show final response data field
63
+ final_response = result.get('final_response', '')
64
+ if final_response:
65
+ print(f"\nšŸ“ Final Response Data Field ({len(final_response)} chars):")
66
+ print(final_response)
67
+
68
+ print(f"\nšŸ“Š Scores:")
69
+ print(f" Progress: {result.get('progress_evaluation', {}).get('score', 0)}/100")
70
+ print(f" Output: {result.get('output_evaluation', {}).get('score', 0)}/100")
71
+ print(f" Overall: {result.get('overall_score', 0)}/100")
72
+
73
+ # Show evaluation reasoning
74
+ progress_eval = result.get('progress_evaluation', {})
75
+ if progress_eval.get('reasoning'):
76
+ print(f"\nšŸ’­ Progress Reasoning:")
77
+ print(f" {progress_eval['reasoning']}")
78
+
79
+ output_eval = result.get('output_evaluation', {})
80
+ if output_eval.get('reasoning'):
81
+ print(f"\nšŸ’­ Output Reasoning:")
82
+ print(f" {output_eval['reasoning']}")
83
+
84
+ # Show strengths/weaknesses
85
+ if output_eval.get('strengths'):
86
+ print(f"\nāœ… Strengths:")
87
+ for strength in output_eval['strengths']:
88
+ print(f" • {strength}")
89
+
90
+ if output_eval.get('weaknesses'):
91
+ print(f"\nāš ļø Weaknesses:")
92
+ for weakness in output_eval['weaknesses']:
93
+ print(f" • {weakness}")
94
+
95
+ print(f"{'─' * 80}\n")
96
+
97
+
98
+ def print_history(limit: int = 10):
99
+ """Print recent test history."""
100
+ db = TestRepository()
101
+ runs = db.get_recent_runs(limit=limit)
102
+
103
+ print("\nšŸ“œ Recent Test Runs:\n")
104
+
105
+ if not runs:
106
+ print(" No test runs found in database.")
107
+ return
108
+
109
+ print(f"{'ID':<6} {'Test Case':<30} {'Status':<12} {'Duration':<10} {'Scores (P/O/Overall)':<20} {'Date'}")
110
+ print("─" * 110)
111
+
112
+ for run in runs:
113
+ test_id = run['id']
114
+ test_case = run['test_case_id'][:28]
115
+ status = run['status']
116
+ duration = f"{run.get('duration_seconds', 0):.1f}s"
117
+ created_at = run['created_at'][:19]
118
+
119
+ # Get evaluation scores
120
+ eval_data = db.get_evaluation(test_id)
121
+ if eval_data:
122
+ progress_score = eval_data.get('progress_score', 0)
123
+ output_score = eval_data.get('output_score', 0)
124
+ overall_score = eval_data.get('overall_score', 0)
125
+ scores = f"{progress_score}/{output_score}/{overall_score}"
126
+ else:
127
+ scores = "N/A"
128
+
129
+ print(f"{test_id:<6} {test_case:<30} {status:<12} {duration:<10} {scores:<20} {created_at}")
130
+
131
+ print()
132
+
133
+
134
+ def print_trend(test_case_id: str, limit: int = 20):
135
+ """Print score trend for a test case."""
136
+ analyzer = TrendAnalyzer()
137
+ trend_data = analyzer.get_score_trend(test_case_id, limit=limit)
138
+
139
+ print(f"\nšŸ“ˆ Score Trend for {test_case_id}:\n")
140
+
141
+ if not trend_data:
142
+ print(f" No historical data found for test case: {test_case_id}")
143
+ return
144
+
145
+ print(f"{'Date':<20} {'Progress':<10} {'Output':<10} {'Overall':<10}")
146
+ print("─" * 52)
147
+
148
+ for entry in trend_data:
149
+ date = entry['created_at'][:19]
150
+ progress = entry['progress_score']
151
+ output = entry['output_score']
152
+ overall = entry['overall_score']
153
+
154
+ print(f"{date:<20} {progress:<10} {output:<10} {overall:<10}")
155
+
156
+ # Calculate trend
157
+ if len(trend_data) >= 2:
158
+ first_overall = trend_data[0]['overall_score']
159
+ last_overall = trend_data[-1]['overall_score']
160
+ change = last_overall - first_overall
161
+
162
+ print(f"\nšŸ“Š Trend Analysis:")
163
+ print(f" First score: {first_overall}/100")
164
+ print(f" Latest score: {last_overall}/100")
165
+ print(f" Change: {change:+d} points")
166
+
167
+ if change > 10:
168
+ print(f" Status: šŸ“ˆ Improving")
169
+ elif change < -10:
170
+ print(f" Status: šŸ“‰ Declining (regression detected!)")
171
+ else:
172
+ print(f" Status: āž”ļø Stable")
173
+
174
+ print()
175
+
176
+
177
+ async def run_all_tests():
178
+ """Run all test cases."""
179
+ print_header()
180
+ print("šŸš€ Running all test cases...\n")
181
+
182
+ orchestrator = TestOrchestrator()
183
+ results = await orchestrator.run_all_tests()
184
+
185
+ # Print individual results
186
+ for result in results:
187
+ print_test_result(result)
188
+
189
+ # Print final summary
190
+ print("\n" + "=" * 80)
191
+ print("šŸ“Š Final Summary")
192
+ print("=" * 80 + "\n")
193
+
194
+ passed = sum(1 for r in results if r.get('overall_score', 0) > 80)
195
+ failed = len(results) - passed
196
+
197
+ print(f"Total Tests: {len(results)}")
198
+ print(f"Passed (>80): {passed}")
199
+ print(f"Failed (≤80): {failed}")
200
+
201
+ avg_overall = sum(r.get('overall_score', 0) for r in results) / len(results) if results else 0
202
+ print(f"Average Overall Score: {avg_overall:.1f}/100")
203
+
204
+ print(f"\n{'=' * 80}\n")
205
+
206
+
207
+ async def run_single_test(test_case_id: str):
208
+ """Run a single test case."""
209
+ print_header()
210
+ print(f"šŸŽÆ Running test case: {test_case_id}\n")
211
+
212
+ orchestrator = TestOrchestrator()
213
+ test_cases = orchestrator.load_test_cases()
214
+
215
+ # Find the test case
216
+ test_case = next((tc for tc in test_cases if tc['id'] == test_case_id), None)
217
+
218
+ if not test_case:
219
+ print(f"āŒ Test case not found: {test_case_id}")
220
+ print(f"\nAvailable test cases:")
221
+ for tc in test_cases:
222
+ print(f" • {tc['id']} - {tc['name']}")
223
+ return
224
+
225
+ result = await orchestrator.run_test(test_case)
226
+ print_test_result(result)
227
+
228
+
229
+ def main():
230
+ """Main CLI entry point."""
231
+ parser = argparse.ArgumentParser(
232
+ description="Cortex AutoGen2 Automated Testing Suite",
233
+ formatter_class=argparse.RawDescriptionHelpFormatter
234
+ )
235
+
236
+ parser.add_argument(
237
+ '--all',
238
+ action='store_true',
239
+ help='Run all test cases'
240
+ )
241
+
242
+ parser.add_argument(
243
+ '--test',
244
+ type=str,
245
+ metavar='TEST_ID',
246
+ help='Run specific test case (e.g., tc001_pokemon_pptx)'
247
+ )
248
+
249
+ parser.add_argument(
250
+ '--history',
251
+ action='store_true',
252
+ help='View recent test history'
253
+ )
254
+
255
+ parser.add_argument(
256
+ '--trend',
257
+ type=str,
258
+ metavar='TEST_ID',
259
+ help='View score trend for specific test case'
260
+ )
261
+
262
+ parser.add_argument(
263
+ '--limit',
264
+ type=int,
265
+ default=10,
266
+ help='Limit number of results (default: 10)'
267
+ )
268
+
269
+ args = parser.parse_args()
270
+
271
+ # Handle commands
272
+ if args.all:
273
+ asyncio.run(run_all_tests())
274
+
275
+ elif args.test:
276
+ asyncio.run(run_single_test(args.test))
277
+
278
+ elif args.history:
279
+ print_header()
280
+ print_history(limit=args.limit)
281
+
282
+ elif args.trend:
283
+ print_header()
284
+ print_trend(args.trend, limit=args.limit)
285
+
286
+ else:
287
+ parser.print_help()
288
+ print("\nExamples:")
289
+ print(" python tests/cli/run_tests.py --all")
290
+ print(" python tests/cli/run_tests.py --test tc001_pokemon_pptx")
291
+ print(" python tests/cli/run_tests.py --history --limit 20")
292
+ print(" python tests/cli/run_tests.py --trend tc001_pokemon_pptx")
293
+
294
+
295
+ if __name__ == "__main__":
296
+ main()
@@ -0,0 +1 @@
1
+ """Data collectors for progress updates and logs."""