isage-benchmark-agent 0.1.0.1__cp311-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. isage_benchmark_agent-0.1.0.1.dist-info/METADATA +91 -0
  2. isage_benchmark_agent-0.1.0.1.dist-info/RECORD +51 -0
  3. isage_benchmark_agent-0.1.0.1.dist-info/WHEEL +5 -0
  4. isage_benchmark_agent-0.1.0.1.dist-info/entry_points.txt +2 -0
  5. isage_benchmark_agent-0.1.0.1.dist-info/licenses/LICENSE +21 -0
  6. isage_benchmark_agent-0.1.0.1.dist-info/top_level.txt +1 -0
  7. sage/__init__.py +0 -0
  8. sage/benchmark/__init__.py +0 -0
  9. sage/benchmark/benchmark_agent/__init__.py +108 -0
  10. sage/benchmark/benchmark_agent/__main__.py +177 -0
  11. sage/benchmark/benchmark_agent/acebench_loader.py +369 -0
  12. sage/benchmark/benchmark_agent/adapter_registry.py +3036 -0
  13. sage/benchmark/benchmark_agent/config/config_loader.py +176 -0
  14. sage/benchmark/benchmark_agent/config/default_config.yaml +24 -0
  15. sage/benchmark/benchmark_agent/config/planning_exp.yaml +34 -0
  16. sage/benchmark/benchmark_agent/config/timing_detection_exp.yaml +34 -0
  17. sage/benchmark/benchmark_agent/config/tool_selection_exp.yaml +32 -0
  18. sage/benchmark/benchmark_agent/data_paths.py +332 -0
  19. sage/benchmark/benchmark_agent/evaluation/__init__.py +217 -0
  20. sage/benchmark/benchmark_agent/evaluation/analyzers/__init__.py +11 -0
  21. sage/benchmark/benchmark_agent/evaluation/analyzers/planning_analyzer.py +111 -0
  22. sage/benchmark/benchmark_agent/evaluation/analyzers/timing_analyzer.py +135 -0
  23. sage/benchmark/benchmark_agent/evaluation/analyzers/tool_selection_analyzer.py +124 -0
  24. sage/benchmark/benchmark_agent/evaluation/evaluator.py +228 -0
  25. sage/benchmark/benchmark_agent/evaluation/metrics.py +650 -0
  26. sage/benchmark/benchmark_agent/evaluation/report_builder.py +217 -0
  27. sage/benchmark/benchmark_agent/evaluation/unified_tool_selection.py +602 -0
  28. sage/benchmark/benchmark_agent/experiments/__init__.py +63 -0
  29. sage/benchmark/benchmark_agent/experiments/base_experiment.py +263 -0
  30. sage/benchmark/benchmark_agent/experiments/method_comparison.py +742 -0
  31. sage/benchmark/benchmark_agent/experiments/planning_exp.py +262 -0
  32. sage/benchmark/benchmark_agent/experiments/timing_detection_exp.py +198 -0
  33. sage/benchmark/benchmark_agent/experiments/tool_selection_exp.py +250 -0
  34. sage/benchmark/benchmark_agent/scripts/__init__.py +26 -0
  35. sage/benchmark/benchmark_agent/scripts/experiments/__init__.py +40 -0
  36. sage/benchmark/benchmark_agent/scripts/experiments/exp_analysis_ablation.py +425 -0
  37. sage/benchmark/benchmark_agent/scripts/experiments/exp_analysis_error.py +400 -0
  38. sage/benchmark/benchmark_agent/scripts/experiments/exp_analysis_robustness.py +439 -0
  39. sage/benchmark/benchmark_agent/scripts/experiments/exp_analysis_scaling.py +565 -0
  40. sage/benchmark/benchmark_agent/scripts/experiments/exp_cross_dataset.py +406 -0
  41. sage/benchmark/benchmark_agent/scripts/experiments/exp_main_planning.py +315 -0
  42. sage/benchmark/benchmark_agent/scripts/experiments/exp_main_selection.py +344 -0
  43. sage/benchmark/benchmark_agent/scripts/experiments/exp_main_timing.py +270 -0
  44. sage/benchmark/benchmark_agent/scripts/experiments/exp_training_comparison.py +620 -0
  45. sage/benchmark/benchmark_agent/scripts/experiments/exp_utils.py +427 -0
  46. sage/benchmark/benchmark_agent/scripts/experiments/figure_generator.py +677 -0
  47. sage/benchmark/benchmark_agent/scripts/experiments/llm_service.py +332 -0
  48. sage/benchmark/benchmark_agent/scripts/experiments/run_paper1_experiments.py +627 -0
  49. sage/benchmark/benchmark_agent/scripts/experiments/sage_bench_cli.py +422 -0
  50. sage/benchmark/benchmark_agent/scripts/experiments/table_generator.py +430 -0
  51. sage/benchmark/benchmark_agent/tools_loader.py +212 -0
@@ -0,0 +1,332 @@
1
+ """
2
+ LLM Service Manager - LLM 服务管理模块
3
+
4
+ 提供统一的 LLM 服务管理功能:
5
+ - 启动/停止 vLLM 服务
6
+ - 检查服务状态
7
+ - 多端口管理
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import os
13
+ import signal
14
+ import subprocess
15
+ import sys
16
+ import time
17
+ from pathlib import Path
18
+ from typing import Any
19
+
20
+ # 端口配置
21
+ try:
22
+ from sage.common.config.ports import SagePorts
23
+
24
+ DEFAULT_LLM_PORT = SagePorts.BENCHMARK_LLM
25
+ except ImportError:
26
+ DEFAULT_LLM_PORT = 8901
27
+
28
+ DEFAULT_LLM_MODEL = "Qwen/Qwen2.5-0.5B-Instruct"
29
+ LLM_PID_FILE = Path.home() / ".sage" / "benchmark_llm.pid"
30
+
31
+
32
+ def check_llm_service(port: int = DEFAULT_LLM_PORT) -> dict[str, Any]:
33
+ """
34
+ 检查 LLM 服务状态。
35
+
36
+ Args:
37
+ port: 服务端口
38
+
39
+ Returns:
40
+ 状态字典 {"running": bool, "port": int, "model": str, "error": str}
41
+ """
42
+ try:
43
+ import httpx
44
+ except ImportError:
45
+ return {"running": False, "port": port, "model": None, "error": "httpx not installed"}
46
+
47
+ result: dict[str, Any] = {"running": False, "port": port, "model": None, "error": None}
48
+
49
+ try:
50
+ response = httpx.get(f"http://localhost:{port}/v1/models", timeout=5.0)
51
+ if response.status_code == 200:
52
+ data = response.json()
53
+ models = data.get("data", [])
54
+ if models:
55
+ result["running"] = True
56
+ result["model"] = models[0].get("id", "unknown")
57
+ else:
58
+ result["error"] = f"HTTP {response.status_code}"
59
+ except httpx.ConnectError:
60
+ result["error"] = "Connection refused"
61
+ except httpx.TimeoutException:
62
+ result["error"] = "Timeout"
63
+ except Exception as e:
64
+ result["error"] = str(e)
65
+
66
+ return result
67
+
68
+
69
+ def check_all_llm_services() -> dict[int, dict]:
70
+ """
71
+ 检查所有可能的 LLM 服务端口。
72
+
73
+ Returns:
74
+ {port: status_dict, ...}
75
+ """
76
+ try:
77
+ from sage.common.config.ports import SagePorts
78
+
79
+ ports = [SagePorts.BENCHMARK_LLM] + SagePorts.get_llm_ports()
80
+ except ImportError:
81
+ ports = [DEFAULT_LLM_PORT, 8001, 8000]
82
+
83
+ # 去重
84
+ seen = set()
85
+ unique_ports = []
86
+ for port in ports:
87
+ if port not in seen:
88
+ seen.add(port)
89
+ unique_ports.append(port)
90
+
91
+ return {port: check_llm_service(port) for port in unique_ports}
92
+
93
+
94
+ def start_llm_service(
95
+ model: str = DEFAULT_LLM_MODEL,
96
+ port: int = DEFAULT_LLM_PORT,
97
+ gpu_memory: float = 0.5,
98
+ timeout: int = 120,
99
+ ) -> bool:
100
+ """
101
+ 启动 vLLM 服务。
102
+
103
+ Args:
104
+ model: 模型 ID
105
+ port: 服务端口
106
+ gpu_memory: GPU 显存使用比例
107
+ timeout: 等待启动超时秒数
108
+
109
+ Returns:
110
+ 是否成功启动
111
+ """
112
+ # 检查是否已运行
113
+ status = check_llm_service(port)
114
+ if status["running"]:
115
+ print(f"✅ LLM 服务已在运行 (port={port}, model={status['model']})")
116
+ return True
117
+
118
+ print("🚀 启动 LLM 服务...")
119
+ print(f" 模型: {model}")
120
+ print(f" 端口: {port}")
121
+ print(f" GPU 显存: {gpu_memory * 100:.0f}%")
122
+
123
+ # 确保 PID 文件目录存在
124
+ LLM_PID_FILE.parent.mkdir(parents=True, exist_ok=True)
125
+
126
+ # 构建命令
127
+ cmd = [
128
+ sys.executable,
129
+ "-m",
130
+ "vllm.entrypoints.openai.api_server",
131
+ "--model",
132
+ model,
133
+ "--port",
134
+ str(port),
135
+ "--gpu-memory-utilization",
136
+ str(gpu_memory),
137
+ "--trust-remote-code",
138
+ ]
139
+
140
+ try:
141
+ # 启动后台进程
142
+ process = subprocess.Popen(
143
+ cmd,
144
+ stdout=subprocess.PIPE,
145
+ stderr=subprocess.PIPE,
146
+ start_new_session=True,
147
+ )
148
+
149
+ # 保存 PID
150
+ with open(LLM_PID_FILE, "w") as f:
151
+ f.write(str(process.pid))
152
+
153
+ print(f" PID: {process.pid}")
154
+ print(" 等待服务启动...")
155
+
156
+ # 等待服务就绪
157
+ for i in range(timeout):
158
+ time.sleep(1)
159
+ if check_llm_service(port)["running"]:
160
+ print(f"\n✅ LLM 服务已启动 (耗时 {i + 1}s)")
161
+ return True
162
+ if i % 10 == 9:
163
+ print(f" 已等待 {i + 1}s...")
164
+
165
+ print("\n❌ 服务启动超时")
166
+ return False
167
+
168
+ except Exception as e:
169
+ print(f"❌ 启动失败: {e}")
170
+ return False
171
+
172
+
173
+ def stop_llm_service() -> bool:
174
+ """
175
+ 停止 LLM 服务。
176
+
177
+ Returns:
178
+ 是否成功停止
179
+ """
180
+ if not LLM_PID_FILE.exists():
181
+ print("ℹ️ 没有找到运行中的 LLM 服务")
182
+ return True
183
+
184
+ try:
185
+ with open(LLM_PID_FILE) as f:
186
+ pid = int(f.read().strip())
187
+
188
+ print(f"🛑 停止 LLM 服务 (PID={pid})...")
189
+ os.kill(pid, signal.SIGTERM)
190
+
191
+ # 等待进程结束
192
+ for _ in range(10):
193
+ try:
194
+ os.kill(pid, 0) # 检查进程是否存在
195
+ time.sleep(0.5)
196
+ except OSError:
197
+ break
198
+
199
+ LLM_PID_FILE.unlink(missing_ok=True)
200
+ print("✅ LLM 服务已停止")
201
+ return True
202
+
203
+ except ProcessLookupError:
204
+ # 进程已不存在
205
+ LLM_PID_FILE.unlink(missing_ok=True)
206
+ print("✅ LLM 服务已停止")
207
+ return True
208
+ except Exception as e:
209
+ print(f"❌ 停止失败: {e}")
210
+ return False
211
+
212
+
213
+ def print_llm_status():
214
+ """打印 LLM 服务状态。"""
215
+ print("\n📡 LLM 服务状态")
216
+ print("=" * 50)
217
+
218
+ statuses = check_all_llm_services()
219
+
220
+ for port, status in statuses.items():
221
+ if status["running"]:
222
+ print(f" ✅ Port {port}: 运行中")
223
+ print(f" 模型: {status['model']}")
224
+ else:
225
+ print(f" ❌ Port {port}: {status['error'] or '未运行'}")
226
+
227
+
228
+ def ensure_llm_available(
229
+ port: int = DEFAULT_LLM_PORT,
230
+ model: str = DEFAULT_LLM_MODEL,
231
+ auto_start: bool = True,
232
+ allow_cloud: bool = True,
233
+ ) -> bool:
234
+ """
235
+ 确保 LLM 服务可用。
236
+
237
+ 如果服务未运行且 auto_start=True,会尝试启动服务。
238
+
239
+ Args:
240
+ port: 服务端口
241
+ model: 模型 ID
242
+ auto_start: 是否自动启动
243
+ allow_cloud: 是否允许使用云端 API (默认 True)
244
+
245
+ Returns:
246
+ 服务是否可用
247
+ """
248
+ # 首先检查指定端口
249
+ print(f" 🔍 Checking LLM service on port {port}...")
250
+ status = check_llm_service(port)
251
+ if status["running"]:
252
+ print(f" ✅ Found running service on port {port}")
253
+ # 设置环境变量供后续使用
254
+ os.environ["SAGE_LLM_PORT"] = str(port)
255
+ os.environ["SAGE_CHAT_BASE_URL"] = f"http://localhost:{port}/v1"
256
+ return True
257
+
258
+ # 检查其他端口
259
+ print(" 🔍 Checking other common ports...")
260
+ all_statuses = check_all_llm_services()
261
+ for p, s in all_statuses.items():
262
+ if s["running"]:
263
+ print(f" ℹ️ Found running service on port {p}")
264
+ # 设置环境变量供后续使用
265
+ os.environ["SAGE_LLM_PORT"] = str(p)
266
+ os.environ["SAGE_CHAT_BASE_URL"] = f"http://localhost:{p}/v1"
267
+ return True
268
+
269
+ # 检查云端 API 配置
270
+ if allow_cloud and (os.environ.get("SAGE_CHAT_API_KEY") or os.environ.get("OPENAI_API_KEY")):
271
+ print(" ℹ️ 检测到云端 API 配置")
272
+ return True
273
+
274
+ # 尝试自动启动
275
+ if auto_start:
276
+ print(" ⚠️ 未检测到可用的 LLM 服务,尝试自动启动...")
277
+ return start_llm_service(model=model, port=port)
278
+
279
+ return False
280
+
281
+
282
+ # =============================================================================
283
+ # CLI 入口
284
+ # =============================================================================
285
+
286
+
287
+ def main():
288
+ import argparse
289
+
290
+ parser = argparse.ArgumentParser(
291
+ description="LLM Service Manager",
292
+ )
293
+
294
+ subparsers = parser.add_subparsers(dest="action", help="操作")
295
+
296
+ # start
297
+ start_parser = subparsers.add_parser("start", help="启动 LLM 服务")
298
+ start_parser.add_argument("--model", default=DEFAULT_LLM_MODEL, help="模型 ID")
299
+ start_parser.add_argument("--port", type=int, default=DEFAULT_LLM_PORT, help="端口")
300
+ start_parser.add_argument("--gpu-memory", type=float, default=0.5, help="GPU 显存比例")
301
+
302
+ # stop
303
+ subparsers.add_parser("stop", help="停止 LLM 服务")
304
+
305
+ # status
306
+ subparsers.add_parser("status", help="查看服务状态")
307
+
308
+ args = parser.parse_args()
309
+
310
+ if args.action == "start":
311
+ success = start_llm_service(
312
+ model=args.model,
313
+ port=args.port,
314
+ gpu_memory=args.gpu_memory,
315
+ )
316
+ return 0 if success else 1
317
+
318
+ elif args.action == "stop":
319
+ success = stop_llm_service()
320
+ return 0 if success else 1
321
+
322
+ elif args.action == "status":
323
+ print_llm_status()
324
+ return 0
325
+
326
+ else:
327
+ parser.print_help()
328
+ return 0
329
+
330
+
331
+ if __name__ == "__main__":
332
+ sys.exit(main())