scalebox-sdk 0.1.12__py3-none-any.whl → 0.1.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. scalebox/__init__.py +1 -1
  2. scalebox/api/__init__.py +3 -3
  3. scalebox/api/client/__init__.py +1 -1
  4. scalebox/code_interpreter/code_interpreter_async.py +3 -3
  5. scalebox/code_interpreter/code_interpreter_sync.py +2 -2
  6. scalebox/csx_desktop/main.py +4 -4
  7. scalebox/test/aclient.py +72 -72
  8. scalebox/test/code_interpreter_centext.py +21 -21
  9. scalebox/test/code_interpreter_centext_sync.py +21 -21
  10. scalebox/test/code_interpreter_test.py +1 -1
  11. scalebox/test/code_interpreter_test_sync.py +1 -1
  12. scalebox/test/run_all_validation_tests.py +334 -334
  13. scalebox/test/test_basic.py +78 -78
  14. scalebox/test/test_code_interpreter_async_comprehensive.py +2653 -2653
  15. scalebox/test/{test_code_interpreter_e2bsync_comprehensive.py → test_code_interpreter_execcode.py} +328 -392
  16. scalebox/test/test_code_interpreter_sync_comprehensive.py +3416 -3412
  17. scalebox/test/test_csx_desktop_examples.py +130 -0
  18. scalebox/test/test_sandbox_async_comprehensive.py +736 -738
  19. scalebox/test/test_sandbox_stress_and_edge_cases.py +778 -778
  20. scalebox/test/test_sandbox_sync_comprehensive.py +779 -770
  21. scalebox/test/test_sandbox_usage_examples.py +987 -987
  22. scalebox/test/testacreate.py +24 -24
  23. scalebox/test/testagetinfo.py +18 -18
  24. scalebox/test/testcodeinterpreter_async.py +508 -508
  25. scalebox/test/testcodeinterpreter_sync.py +239 -239
  26. scalebox/test/testcomputeuse.py +2 -2
  27. scalebox/test/testnovnc.py +12 -12
  28. scalebox/test/testsandbox_async.py +202 -118
  29. scalebox/test/testsandbox_sync.py +71 -38
  30. scalebox/version.py +2 -2
  31. {scalebox_sdk-0.1.12.dist-info → scalebox_sdk-0.1.14.dist-info}/METADATA +2 -2
  32. {scalebox_sdk-0.1.12.dist-info → scalebox_sdk-0.1.14.dist-info}/RECORD +36 -37
  33. scalebox/test/test_code_interpreter_e2basync_comprehensive.py +0 -2655
  34. scalebox/test/test_e2b_first.py +0 -11
  35. {scalebox_sdk-0.1.12.dist-info → scalebox_sdk-0.1.14.dist-info}/WHEEL +0 -0
  36. {scalebox_sdk-0.1.12.dist-info → scalebox_sdk-0.1.14.dist-info}/entry_points.txt +0 -0
  37. {scalebox_sdk-0.1.12.dist-info → scalebox_sdk-0.1.14.dist-info}/licenses/LICENSE +0 -0
  38. {scalebox_sdk-0.1.12.dist-info → scalebox_sdk-0.1.14.dist-info}/top_level.txt +0 -0
@@ -1,334 +1,334 @@
1
- #!/usr/bin/env python3
2
- """
3
- 统一的测试运行器 - 运行所有沙箱验证测试
4
-
5
- 这个脚本会依次运行所有的验证测试,并生成综合报告。
6
- 支持选择性运行特定测试套件,并提供详细的性能和结果统计。
7
- """
8
-
9
- import asyncio
10
- import logging
11
- import os
12
- import sys
13
- import time
14
- import traceback
15
- from pathlib import Path
16
- from typing import Any, Dict, List
17
-
18
- # 添加项目根目录到Python路径
19
- project_root = Path(__file__).parent.parent
20
- sys.path.insert(0, str(project_root))
21
-
22
- # 配置日志
23
- logging.basicConfig(
24
- level=logging.INFO,
25
- format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
26
- handlers=[
27
- logging.StreamHandler(),
28
- logging.FileHandler("/home/ubuntu/git_home/scalebox/test/test_results.log"),
29
- ],
30
- )
31
- logger = logging.getLogger(__name__)
32
-
33
-
34
- class TestSuiteRunner:
35
- """测试套件运行器"""
36
-
37
- def __init__(self):
38
- self.results: Dict[str, Dict[str, Any]] = {}
39
- self.total_start_time = time.time()
40
-
41
- def run_test_module(self, module_name: str, description: str) -> Dict[str, Any]:
42
- """运行单个测试模块"""
43
- logger.info(f"\n{'='*60}")
44
- logger.info(f"开始运行: {description}")
45
- logger.info(f"模块: {module_name}")
46
- logger.info(f"{'='*60}")
47
-
48
- start_time = time.time()
49
- result = {
50
- "module": module_name,
51
- "description": description,
52
- "success": False,
53
- "duration": 0,
54
- "error": None,
55
- "details": {},
56
- }
57
-
58
- try:
59
- if module_name == "test_sandbox_async_comprehensive":
60
- from test_sandbox_async_comprehensive import AsyncSandboxValidator
61
-
62
- validator = AsyncSandboxValidator()
63
- asyncio.run(validator.run_all_tests())
64
- result["details"] = {
65
- "total_tests": len(validator.test_results),
66
- "passed": sum(1 for r in validator.test_results if r["success"]),
67
- "failed": len(validator.failed_tests),
68
- "test_results": validator.test_results,
69
- }
70
-
71
- elif module_name == "test_sandbox_sync_comprehensive":
72
- from test_sandbox_sync_comprehensive import SandboxValidator
73
-
74
- validator = SandboxValidator()
75
- validator.run_all_tests()
76
- result["details"] = {
77
- "total_tests": len(validator.test_results),
78
- "passed": sum(1 for r in validator.test_results if r["success"]),
79
- "failed": len(validator.failed_tests),
80
- "test_results": validator.test_results,
81
- }
82
-
83
- elif module_name == "test_sandbox_stress_and_edge_cases":
84
- from test_sandbox_stress_and_edge_cases import StressTestValidator
85
-
86
- validator = StressTestValidator()
87
- validator.run_all_tests()
88
- result["details"] = {
89
- "total_tests": len(validator.test_results),
90
- "passed": sum(1 for r in validator.test_results if r["success"]),
91
- "failed": len(validator.failed_tests),
92
- "test_results": validator.test_results,
93
- }
94
-
95
- elif module_name == "test_sandbox_usage_examples":
96
- # 使用示例不返回测试结果,只是演示
97
- import test_sandbox_usage_examples
98
-
99
- test_sandbox_usage_examples.main()
100
- result["details"] = {
101
- "total_tests": 1,
102
- "passed": 1,
103
- "failed": 0,
104
- "note": "Usage examples completed successfully",
105
- }
106
-
107
- result["success"] = True
108
-
109
- except Exception as e:
110
- result["error"] = str(e)
111
- result["traceback"] = traceback.format_exc()
112
- logger.error(f"测试模块 {module_name} 运行失败: {e}")
113
- logger.error(f"详细错误信息:\n{traceback.format_exc()}")
114
-
115
- result["duration"] = time.time() - start_time
116
-
117
- # 打印模块结果摘要
118
- if result["success"]:
119
- logger.info(f"✅ {description} - 完成")
120
- if "total_tests" in result["details"]:
121
- logger.info(f" 总测试: {result['details']['total_tests']}")
122
- logger.info(f" 通过: {result['details']['passed']}")
123
- logger.info(f" 失败: {result['details']['failed']}")
124
- logger.info(f" 耗时: {result['duration']:.3f}秒")
125
- else:
126
- logger.error(f"❌ {description} - 失败")
127
- logger.error(f" 错误: {result['error']}")
128
- logger.error(f" 耗时: {result['duration']:.3f}秒")
129
-
130
- return result
131
-
132
- def run_all_tests(self, selected_tests: List[str] = None) -> Dict[str, Any]:
133
- """运行所有或选定的测试"""
134
- logger.info("ScaleBox 沙箱验证测试套件")
135
- logger.info(f"开始时间: {time.strftime('%Y-%m-%d %H:%M:%S')}")
136
-
137
- # 定义所有测试套件
138
- test_suites = [
139
- {
140
- "module": "test_sandbox_async_comprehensive",
141
- "description": "AsyncSandbox 综合功能验证",
142
- },
143
- {
144
- "module": "test_sandbox_sync_comprehensive",
145
- "description": "Sandbox 综合功能验证",
146
- },
147
- {
148
- "module": "test_sandbox_stress_and_edge_cases",
149
- "description": "压力测试和边界条件验证",
150
- },
151
- {
152
- "module": "test_sandbox_usage_examples",
153
- "description": "使用示例和最佳实践演示",
154
- },
155
- ]
156
-
157
- # 过滤选定的测试
158
- if selected_tests:
159
- test_suites = [
160
- suite for suite in test_suites if suite["module"] in selected_tests
161
- ]
162
-
163
- logger.info(f"将运行 {len(test_suites)} 个测试套件")
164
-
165
- # 运行所有测试套件
166
- for suite in test_suites:
167
- result = self.run_test_module(suite["module"], suite["description"])
168
- self.results[suite["module"]] = result
169
-
170
- # 生成综合报告
171
- total_duration = time.time() - self.total_start_time
172
- return self.generate_final_report(total_duration)
173
-
174
- def generate_final_report(self, total_duration: float) -> Dict[str, Any]:
175
- """生成最终测试报告"""
176
- logger.info(f"\n{'='*80}")
177
- logger.info("ScaleBox 沙箱验证测试 - 最终报告")
178
- logger.info(f"{'='*80}")
179
-
180
- # 统计总体结果
181
- total_suites = len(self.results)
182
- successful_suites = sum(1 for r in self.results.values() if r["success"])
183
- failed_suites = total_suites - successful_suites
184
-
185
- total_tests = sum(
186
- r["details"].get("total_tests", 0) for r in self.results.values()
187
- )
188
- total_passed = sum(r["details"].get("passed", 0) for r in self.results.values())
189
- total_failed = sum(r["details"].get("failed", 0) for r in self.results.values())
190
-
191
- logger.info(f"测试套件统计:")
192
- logger.info(f" 总套件数: {total_suites}")
193
- logger.info(f" 成功套件: {successful_suites}")
194
- logger.info(f" 失败套件: {failed_suites}")
195
- logger.info(f" 套件成功率: {(successful_suites/total_suites*100):.1f}%")
196
-
197
- logger.info(f"\n测试用例统计:")
198
- logger.info(f" 总测试数: {total_tests}")
199
- logger.info(f" 通过测试: {total_passed}")
200
- logger.info(f" 失败测试: {total_failed}")
201
- logger.info(
202
- f" 测试成功率: {(total_passed/total_tests*100):.1f}%"
203
- if total_tests > 0
204
- else " 测试成功率: N/A"
205
- )
206
-
207
- logger.info(f"\n性能统计:")
208
- logger.info(f" 总运行时间: {total_duration:.3f}秒")
209
-
210
- # 详细结果
211
- logger.info(f"\n详细结果:")
212
- for module, result in self.results.items():
213
- status = "✅ 成功" if result["success"] else "❌ 失败"
214
- logger.info(f" {result['description']}: {status}")
215
- logger.info(f" 模块: {module}")
216
- logger.info(f" 耗时: {result['duration']:.3f}秒")
217
-
218
- if "total_tests" in result["details"]:
219
- logger.info(
220
- f" 测试: {result['details']['passed']}/{result['details']['total_tests']}"
221
- )
222
-
223
- if not result["success"] and result["error"]:
224
- logger.info(f" 错误: {result['error']}")
225
-
226
- # 失败的测试详情
227
- if failed_suites > 0:
228
- logger.info(f"\n失败的测试套件:")
229
- for module, result in self.results.items():
230
- if not result["success"]:
231
- logger.info(f" ❌ {result['description']}")
232
- logger.info(f" 错误: {result['error']}")
233
-
234
- # 性能对比(如果有同步和异步测试结果)
235
- if (
236
- "test_sandbox_async_comprehensive" in self.results
237
- and "test_sandbox_sync_comprehensive" in self.results
238
- ):
239
- async_duration = self.results["test_sandbox_async_comprehensive"][
240
- "duration"
241
- ]
242
- sync_duration = self.results["test_sandbox_sync_comprehensive"]["duration"]
243
-
244
- logger.info(f"\n同步 vs 异步性能对比:")
245
- logger.info(f" 异步版本耗时: {async_duration:.3f}秒")
246
- logger.info(f" 同步版本耗时: {sync_duration:.3f}秒")
247
-
248
- if async_duration > 0 and sync_duration > 0:
249
- if async_duration < sync_duration:
250
- speedup = sync_duration / async_duration
251
- logger.info(f" 异步版本快 {speedup:.1f}x")
252
- else:
253
- speedup = async_duration / sync_duration
254
- logger.info(f" 同步版本快 {speedup:.1f}x")
255
-
256
- logger.info(f"\n{'='*80}")
257
- logger.info(f"测试完成时间: {time.strftime('%Y-%m-%d %H:%M:%S')}")
258
- logger.info(f"{'='*80}")
259
-
260
- # 返回汇总报告
261
- summary = {
262
- "total_suites": total_suites,
263
- "successful_suites": successful_suites,
264
- "failed_suites": failed_suites,
265
- "suite_success_rate": (
266
- (successful_suites / total_suites * 100) if total_suites > 0 else 0
267
- ),
268
- "total_tests": total_tests,
269
- "total_passed": total_passed,
270
- "total_failed": total_failed,
271
- "test_success_rate": (
272
- (total_passed / total_tests * 100) if total_tests > 0 else 0
273
- ),
274
- "total_duration": total_duration,
275
- "results": self.results,
276
- }
277
-
278
- return summary
279
-
280
-
281
- def main():
282
- """主函数"""
283
- import argparse
284
-
285
- parser = argparse.ArgumentParser(description="ScaleBox 沙箱验证测试套件")
286
- parser.add_argument(
287
- "--tests",
288
- nargs="+",
289
- choices=[
290
- "test_sandbox_async_comprehensive",
291
- "test_sandbox_sync_comprehensive",
292
- "test_sandbox_stress_and_edge_cases",
293
- "test_sandbox_usage_examples",
294
- ],
295
- help="选择要运行的测试套件",
296
- )
297
- parser.add_argument("--log-level", default="INFO", help="日志级别")
298
- parser.add_argument("--output", help="输出报告到文件")
299
-
300
- args = parser.parse_args()
301
-
302
- # 设置日志级别
303
- logging.getLogger().setLevel(getattr(logging, args.log_level.upper()))
304
-
305
- try:
306
- # 运行测试
307
- runner = TestSuiteRunner()
308
- summary = runner.run_all_tests(args.tests)
309
-
310
- # 保存报告到文件
311
- if args.output:
312
- import json
313
-
314
- with open(args.output, "w", encoding="utf-8") as f:
315
- json.dump(summary, f, indent=2, ensure_ascii=False, default=str)
316
- logger.info(f"测试报告已保存到: {args.output}")
317
-
318
- # 根据测试结果设置退出码
319
- if summary["failed_suites"] > 0:
320
- sys.exit(1)
321
- else:
322
- sys.exit(0)
323
-
324
- except KeyboardInterrupt:
325
- logger.info("用户中断测试")
326
- sys.exit(2)
327
- except Exception as e:
328
- logger.error(f"测试运行器出现异常: {e}")
329
- logger.error(traceback.format_exc())
330
- sys.exit(3)
331
-
332
-
333
- if __name__ == "__main__":
334
- main()
1
+ #!/usr/bin/env python3
2
+ """
3
+ 统一的测试运行器 - 运行所有沙箱验证测试
4
+
5
+ 这个脚本会依次运行所有的验证测试,并生成综合报告。
6
+ 支持选择性运行特定测试套件,并提供详细的性能和结果统计。
7
+ """
8
+
9
+ import asyncio
10
+ import logging
11
+ import os
12
+ import sys
13
+ import time
14
+ import traceback
15
+ from pathlib import Path
16
+ from typing import Any, Dict, List
17
+
18
+ # 添加项目根目录到Python路径
19
+ project_root = Path(__file__).parent.parent
20
+ sys.path.insert(0, str(project_root))
21
+
22
+ # 配置日志
23
+ logging.basicConfig(
24
+ level=logging.INFO,
25
+ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
26
+ handlers=[
27
+ logging.StreamHandler(),
28
+ logging.FileHandler("/home/ubuntu/git_home/scalebox/test/test_results.log"),
29
+ ],
30
+ )
31
+ logger = logging.getLogger(__name__)
32
+
33
+
34
+ class TestSuiteRunner:
35
+ """测试套件运行器"""
36
+
37
+ def __init__(self):
38
+ self.results: Dict[str, Dict[str, Any]] = {}
39
+ self.total_start_time = time.time()
40
+
41
+ def run_test_module(self, module_name: str, description: str) -> Dict[str, Any]:
42
+ """运行单个测试模块"""
43
+ logger.info(f"\n{'='*60}")
44
+ logger.info(f"开始运行: {description}")
45
+ logger.info(f"模块: {module_name}")
46
+ logger.info(f"{'='*60}")
47
+
48
+ start_time = time.time()
49
+ result = {
50
+ "module": module_name,
51
+ "description": description,
52
+ "success": False,
53
+ "duration": 0,
54
+ "error": None,
55
+ "details": {},
56
+ }
57
+
58
+ try:
59
+ if module_name == "test_sandbox_async_comprehensive":
60
+ from test_sandbox_async_comprehensive import AsyncSandboxValidator
61
+
62
+ validator = AsyncSandboxValidator()
63
+ asyncio.run(validator.run_all_tests())
64
+ result["details"] = {
65
+ "total_tests": len(validator.test_results),
66
+ "passed": sum(1 for r in validator.test_results if r["success"]),
67
+ "failed": len(validator.failed_tests),
68
+ "test_results": validator.test_results,
69
+ }
70
+
71
+ elif module_name == "test_sandbox_sync_comprehensive":
72
+ from test_sandbox_sync_comprehensive import SandboxValidator
73
+
74
+ validator = SandboxValidator()
75
+ validator.run_all_tests()
76
+ result["details"] = {
77
+ "total_tests": len(validator.test_results),
78
+ "passed": sum(1 for r in validator.test_results if r["success"]),
79
+ "failed": len(validator.failed_tests),
80
+ "test_results": validator.test_results,
81
+ }
82
+
83
+ elif module_name == "test_sandbox_stress_and_edge_cases":
84
+ from test_sandbox_stress_and_edge_cases import StressTestValidator
85
+
86
+ validator = StressTestValidator()
87
+ validator.run_all_tests()
88
+ result["details"] = {
89
+ "total_tests": len(validator.test_results),
90
+ "passed": sum(1 for r in validator.test_results if r["success"]),
91
+ "failed": len(validator.failed_tests),
92
+ "test_results": validator.test_results,
93
+ }
94
+
95
+ elif module_name == "test_sandbox_usage_examples":
96
+ # 使用示例不返回测试结果,只是演示
97
+ import test_sandbox_usage_examples
98
+
99
+ test_sandbox_usage_examples.main()
100
+ result["details"] = {
101
+ "total_tests": 1,
102
+ "passed": 1,
103
+ "failed": 0,
104
+ "note": "Usage examples completed successfully",
105
+ }
106
+
107
+ result["success"] = True
108
+
109
+ except Exception as e:
110
+ result["error"] = str(e)
111
+ result["traceback"] = traceback.format_exc()
112
+ logger.error(f"测试模块 {module_name} 运行失败: {e}")
113
+ logger.error(f"详细错误信息:\n{traceback.format_exc()}")
114
+
115
+ result["duration"] = time.time() - start_time
116
+
117
+ # 打印模块结果摘要
118
+ if result["success"]:
119
+ logger.info(f"✅ {description} - 完成")
120
+ if "total_tests" in result["details"]:
121
+ logger.info(f" 总测试: {result['details']['total_tests']}")
122
+ logger.info(f" 通过: {result['details']['passed']}")
123
+ logger.info(f" 失败: {result['details']['failed']}")
124
+ logger.info(f" 耗时: {result['duration']:.3f}秒")
125
+ else:
126
+ logger.error(f"❌ {description} - 失败")
127
+ logger.error(f" 错误: {result['error']}")
128
+ logger.error(f" 耗时: {result['duration']:.3f}秒")
129
+
130
+ return result
131
+
132
+ def run_all_tests(self, selected_tests: List[str] = None) -> Dict[str, Any]:
133
+ """运行所有或选定的测试"""
134
+ logger.info("ScaleBox 沙箱验证测试套件")
135
+ logger.info(f"开始时间: {time.strftime('%Y-%m-%d %H:%M:%S')}")
136
+
137
+ # 定义所有测试套件
138
+ test_suites = [
139
+ {
140
+ "module": "test_sandbox_async_comprehensive",
141
+ "description": "AsyncSandbox 综合功能验证",
142
+ },
143
+ {
144
+ "module": "test_sandbox_sync_comprehensive",
145
+ "description": "Sandbox 综合功能验证",
146
+ },
147
+ {
148
+ "module": "test_sandbox_stress_and_edge_cases",
149
+ "description": "压力测试和边界条件验证",
150
+ },
151
+ {
152
+ "module": "test_sandbox_usage_examples",
153
+ "description": "使用示例和最佳实践演示",
154
+ },
155
+ ]
156
+
157
+ # 过滤选定的测试
158
+ if selected_tests:
159
+ test_suites = [
160
+ suite for suite in test_suites if suite["module"] in selected_tests
161
+ ]
162
+
163
+ logger.info(f"将运行 {len(test_suites)} 个测试套件")
164
+
165
+ # 运行所有测试套件
166
+ for suite in test_suites:
167
+ result = self.run_test_module(suite["module"], suite["description"])
168
+ self.results[suite["module"]] = result
169
+
170
+ # 生成综合报告
171
+ total_duration = time.time() - self.total_start_time
172
+ return self.generate_final_report(total_duration)
173
+
174
+ def generate_final_report(self, total_duration: float) -> Dict[str, Any]:
175
+ """生成最终测试报告"""
176
+ logger.info(f"\n{'='*80}")
177
+ logger.info("ScaleBox 沙箱验证测试 - 最终报告")
178
+ logger.info(f"{'='*80}")
179
+
180
+ # 统计总体结果
181
+ total_suites = len(self.results)
182
+ successful_suites = sum(1 for r in self.results.values() if r["success"])
183
+ failed_suites = total_suites - successful_suites
184
+
185
+ total_tests = sum(
186
+ r["details"].get("total_tests", 0) for r in self.results.values()
187
+ )
188
+ total_passed = sum(r["details"].get("passed", 0) for r in self.results.values())
189
+ total_failed = sum(r["details"].get("failed", 0) for r in self.results.values())
190
+
191
+ logger.info(f"测试套件统计:")
192
+ logger.info(f" 总套件数: {total_suites}")
193
+ logger.info(f" 成功套件: {successful_suites}")
194
+ logger.info(f" 失败套件: {failed_suites}")
195
+ logger.info(f" 套件成功率: {(successful_suites/total_suites*100):.1f}%")
196
+
197
+ logger.info(f"\n测试用例统计:")
198
+ logger.info(f" 总测试数: {total_tests}")
199
+ logger.info(f" 通过测试: {total_passed}")
200
+ logger.info(f" 失败测试: {total_failed}")
201
+ logger.info(
202
+ f" 测试成功率: {(total_passed/total_tests*100):.1f}%"
203
+ if total_tests > 0
204
+ else " 测试成功率: N/A"
205
+ )
206
+
207
+ logger.info(f"\n性能统计:")
208
+ logger.info(f" 总运行时间: {total_duration:.3f}秒")
209
+
210
+ # 详细结果
211
+ logger.info(f"\n详细结果:")
212
+ for module, result in self.results.items():
213
+ status = "✅ 成功" if result["success"] else "❌ 失败"
214
+ logger.info(f" {result['description']}: {status}")
215
+ logger.info(f" 模块: {module}")
216
+ logger.info(f" 耗时: {result['duration']:.3f}秒")
217
+
218
+ if "total_tests" in result["details"]:
219
+ logger.info(
220
+ f" 测试: {result['details']['passed']}/{result['details']['total_tests']}"
221
+ )
222
+
223
+ if not result["success"] and result["error"]:
224
+ logger.info(f" 错误: {result['error']}")
225
+
226
+ # 失败的测试详情
227
+ if failed_suites > 0:
228
+ logger.info(f"\n失败的测试套件:")
229
+ for module, result in self.results.items():
230
+ if not result["success"]:
231
+ logger.info(f" ❌ {result['description']}")
232
+ logger.info(f" 错误: {result['error']}")
233
+
234
+ # 性能对比(如果有同步和异步测试结果)
235
+ if (
236
+ "test_sandbox_async_comprehensive" in self.results
237
+ and "test_sandbox_sync_comprehensive" in self.results
238
+ ):
239
+ async_duration = self.results["test_sandbox_async_comprehensive"][
240
+ "duration"
241
+ ]
242
+ sync_duration = self.results["test_sandbox_sync_comprehensive"]["duration"]
243
+
244
+ logger.info(f"\n同步 vs 异步性能对比:")
245
+ logger.info(f" 异步版本耗时: {async_duration:.3f}秒")
246
+ logger.info(f" 同步版本耗时: {sync_duration:.3f}秒")
247
+
248
+ if async_duration > 0 and sync_duration > 0:
249
+ if async_duration < sync_duration:
250
+ speedup = sync_duration / async_duration
251
+ logger.info(f" 异步版本快 {speedup:.1f}x")
252
+ else:
253
+ speedup = async_duration / sync_duration
254
+ logger.info(f" 同步版本快 {speedup:.1f}x")
255
+
256
+ logger.info(f"\n{'='*80}")
257
+ logger.info(f"测试完成时间: {time.strftime('%Y-%m-%d %H:%M:%S')}")
258
+ logger.info(f"{'='*80}")
259
+
260
+ # 返回汇总报告
261
+ summary = {
262
+ "total_suites": total_suites,
263
+ "successful_suites": successful_suites,
264
+ "failed_suites": failed_suites,
265
+ "suite_success_rate": (
266
+ (successful_suites / total_suites * 100) if total_suites > 0 else 0
267
+ ),
268
+ "total_tests": total_tests,
269
+ "total_passed": total_passed,
270
+ "total_failed": total_failed,
271
+ "test_success_rate": (
272
+ (total_passed / total_tests * 100) if total_tests > 0 else 0
273
+ ),
274
+ "total_duration": total_duration,
275
+ "results": self.results,
276
+ }
277
+
278
+ return summary
279
+
280
+
281
+ def main():
282
+ """主函数"""
283
+ import argparse
284
+
285
+ parser = argparse.ArgumentParser(description="ScaleBox 沙箱验证测试套件")
286
+ parser.add_argument(
287
+ "--tests",
288
+ nargs="+",
289
+ choices=[
290
+ "test_sandbox_async_comprehensive",
291
+ "test_sandbox_sync_comprehensive",
292
+ "test_sandbox_stress_and_edge_cases",
293
+ "test_sandbox_usage_examples",
294
+ ],
295
+ help="选择要运行的测试套件",
296
+ )
297
+ parser.add_argument("--log-level", default="INFO", help="日志级别")
298
+ parser.add_argument("--output", help="输出报告到文件")
299
+
300
+ args = parser.parse_args()
301
+
302
+ # 设置日志级别
303
+ logging.getLogger().setLevel(getattr(logging, args.log_level.upper()))
304
+
305
+ try:
306
+ # 运行测试
307
+ runner = TestSuiteRunner()
308
+ summary = runner.run_all_tests(args.tests)
309
+
310
+ # 保存报告到文件
311
+ if args.output:
312
+ import json
313
+
314
+ with open(args.output, "w", encoding="utf-8") as f:
315
+ json.dump(summary, f, indent=2, ensure_ascii=False, default=str)
316
+ logger.info(f"测试报告已保存到: {args.output}")
317
+
318
+ # 根据测试结果设置退出码
319
+ if summary["failed_suites"] > 0:
320
+ sys.exit(1)
321
+ else:
322
+ sys.exit(0)
323
+
324
+ except KeyboardInterrupt:
325
+ logger.info("用户中断测试")
326
+ sys.exit(2)
327
+ except Exception as e:
328
+ logger.error(f"测试运行器出现异常: {e}")
329
+ logger.error(traceback.format_exc())
330
+ sys.exit(3)
331
+
332
+
333
+ if __name__ == "__main__":
334
+ main()