coderfleet 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. coderfleet/__init__.py +1 -0
  2. coderfleet/__main__.py +4 -0
  3. coderfleet/cli.py +212 -0
  4. coderfleet/compose.py +176 -0
  5. coderfleet/config.py +69 -0
  6. coderfleet/config_cmds.py +243 -0
  7. coderfleet/data/Dockerfile +92 -0
  8. coderfleet/data/__init__.py +0 -0
  9. coderfleet/data/accounts.conf.example +26 -0
  10. coderfleet/data/config.conf.example +31 -0
  11. coderfleet/data/entrypoint.sh +56 -0
  12. coderfleet/data/projects.conf.example +17 -0
  13. coderfleet/data/scripts/coderfleet_usage_status.py +138 -0
  14. coderfleet/docker_ops.py +385 -0
  15. coderfleet/init_wizard.py +227 -0
  16. coderfleet/login_cmd.py +168 -0
  17. coderfleet/server/__init__.py +0 -0
  18. coderfleet/server/docker_mgr.py +45 -0
  19. coderfleet/server/main.py +546 -0
  20. coderfleet/server/models.py +285 -0
  21. coderfleet/server/scheduler.py +1219 -0
  22. coderfleet/server/static/css/main.css +2906 -0
  23. coderfleet/server/static/index.html +378 -0
  24. coderfleet/server/static/js/accounts.js +85 -0
  25. coderfleet/server/static/js/app.js +28 -0
  26. coderfleet/server/static/js/chat.js +743 -0
  27. coderfleet/server/static/js/log.js +145 -0
  28. coderfleet/server/static/js/nav.js +46 -0
  29. coderfleet/server/static/js/projects.js +298 -0
  30. coderfleet/server/static/js/renderer.js +586 -0
  31. coderfleet/server/static/js/state.js +76 -0
  32. coderfleet/server/static/js/submit.js +200 -0
  33. coderfleet/server/static/js/tasks.js +92 -0
  34. coderfleet/server/static/js/terminal.js +347 -0
  35. coderfleet/server/static/js/utils.js +147 -0
  36. coderfleet/server/static/vendor/marked.min.js +6 -0
  37. coderfleet/server/static/vendor/xterm/addon-fit.js +2 -0
  38. coderfleet/server/static/vendor/xterm/xterm.css +218 -0
  39. coderfleet/server/static/vendor/xterm/xterm.js +2 -0
  40. coderfleet/server/terminal.py +129 -0
  41. coderfleet/task_cmds.py +311 -0
  42. coderfleet-0.1.0.dist-info/METADATA +492 -0
  43. coderfleet-0.1.0.dist-info/RECORD +45 -0
  44. coderfleet-0.1.0.dist-info/WHEEL +4 -0
  45. coderfleet-0.1.0.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,1219 @@
1
+ """
2
+ scheduler.py — 任务调度核心
3
+
4
+ 职责:
5
+ - 解析 accounts.conf,获取账号列表
6
+ - 判断账号空闲/忙碌状态
7
+ - 分配任务到合适账号
8
+ - 异步执行任务,维护任务生命周期
9
+ """
10
+ from __future__ import annotations
11
+
12
+ import asyncio
13
+ import json
14
+ import os
15
+ import random
16
+ import shlex
17
+ import subprocess
18
+ from datetime import datetime
19
+ from pathlib import Path
20
+ from typing import Optional
21
+
22
+ from coderfleet.server import docker_mgr
23
+ from coderfleet.server.models import (
24
+ Account,
25
+ AccountAuth,
26
+ AccountProxy,
27
+ AccountResponse,
28
+ AccountType,
29
+ Conversation,
30
+ ConversationStatus,
31
+ Project,
32
+ Task,
33
+ TaskStatus,
34
+ )
35
+
36
+
37
+ class Scheduler:
38
+ def __init__(self, workspace_dir: Path):
39
+ self.workspace_dir = workspace_dir
40
+ self.accounts_conf = workspace_dir / "accounts.conf"
41
+ self.projects_conf = workspace_dir / "projects.conf"
42
+ self.tasks_dir = workspace_dir / "tasks"
43
+ self.conversations_dir = workspace_dir / "conversations"
44
+ # task_id → asyncio.Task(后台运行的协程)
45
+ self._running: dict[str, asyncio.Task] = {}
46
+ self._loop_task: Optional[asyncio.Task] = None
47
+
48
+ @staticmethod
49
+ def task_process_marker(task_id: str) -> str:
50
+ return f"coderfleet-task-{task_id}"
51
+
52
+ @staticmethod
53
+ def build_cli_command(
54
+ acc_type: AccountType,
55
+ prompt: str,
56
+ auto: bool,
57
+ task_id: str,
58
+ native_session_id: str = "",
59
+ container_workdir: str = "",
60
+ images: list[str] | None = None,
61
+ ) -> str:
62
+ """
63
+ 构建在容器内执行的 CLI 命令。
64
+ 始终开启 JSON 输出(--output-format stream-json / --json)以便提取 native_session_id。
65
+ images: 容器内图片路径列表,仅 codex 支持(-i 参数)。
66
+ """
67
+ escaped_prompt = shlex.quote(prompt)
68
+ marker = shlex.quote(Scheduler.task_process_marker(task_id))
69
+ task_env = shlex.quote(task_id)
70
+
71
+ if acc_type == AccountType.claude:
72
+ permission = "--dangerously-skip-permissions" if auto else "--permission-mode acceptEdits"
73
+ # 始终使用流式 JSON 输出以捕获 session_id
74
+ output_format = " --output-format stream-json --verbose"
75
+ resume = f" --resume {shlex.quote(native_session_id)}" if native_session_id else ""
76
+ # claude 不支持 -i flag,通过在 prompt 末尾附加本地路径来传图片
77
+ if images:
78
+ paths = "\n".join(images)
79
+ escaped_prompt = shlex.quote(f"{prompt}\n\n[Attached images:\n{paths}]")
80
+ # 通过 stdin 传入 prompt,兼容所有 claude CLI 版本(部分版本不再接受位置参数)
81
+ inner_cmd = (
82
+ f"printf '%s\\n' {escaped_prompt} | "
83
+ f"CODERFLEET_TASK_ID={task_env} exec -a {marker} "
84
+ f"claude -p {permission}{output_format}{resume}"
85
+ )
86
+ else:
87
+ sandbox = "danger-full-access" if auto else "workspace-write"
88
+ image_flags = "".join(f" -i {shlex.quote(img)}" for img in (images or []))
89
+ # 始终使用 --json 以捕获 thread_id;prompt 通过 stdin 传入
90
+ if native_session_id:
91
+ # codex exec resume <session_id> --json [flags] - prompt via stdin
92
+ # resume 子命令不支持 --sandbox,使用 --dangerously-bypass-approvals-and-sandbox
93
+ danger_flag = " --dangerously-bypass-approvals-and-sandbox" if auto else ""
94
+ inner_cmd = (
95
+ f"printf '%s\\n' {escaped_prompt} | "
96
+ f"CODERFLEET_TASK_ID={task_env} exec -a {marker} "
97
+ f"codex exec resume {shlex.quote(native_session_id)} --json{danger_flag}{image_flags}"
98
+ )
99
+ else:
100
+ inner_cmd = (
101
+ f"printf '%s\\n' {escaped_prompt} | "
102
+ f"CODERFLEET_TASK_ID={task_env} exec -a {marker} "
103
+ f"codex exec --json --sandbox {sandbox}{image_flags}"
104
+ )
105
+ if container_workdir:
106
+ inner_cmd = f"cd {shlex.quote(container_workdir)} && {inner_cmd}"
107
+
108
+ task_log = f"/workspace/.coderfleet-tasks/{task_id}.log"
109
+ task_exit = f"/workspace/.coderfleet-tasks/{task_id}.exit"
110
+ # 用子 shell ( ... ) 包裹 inner_cmd:exec -a 替换的是子 shell 进程,
111
+ # 外层 bash 在子 shell 退出后仍可执行 echo $? 写入 exit 文件。
112
+ # 若不加括号,exec -a 会直接替换外层 bash,分号后的 echo $? 永远不会执行。
113
+ wrapper_body = (
114
+ f"( {inner_cmd} ) >> {shlex.quote(task_log)} 2>&1"
115
+ f"; echo $? > {shlex.quote(task_exit)}"
116
+ )
117
+ return (
118
+ f"mkdir -p /workspace/.coderfleet-tasks"
119
+ f" && setsid bash -c {shlex.quote(wrapper_body)} &"
120
+ )
121
+
122
+ @staticmethod
123
+ def build_usage_status_command(acc_type: AccountType) -> str:
124
+ if acc_type == AccountType.codex:
125
+ return "coderfleet-usage-status codex 2>&1"
126
+ return ""
127
+
128
+ @staticmethod
129
+ def extract_native_session_id(acc_type: AccountType, text: str) -> str:
130
+ for line in text.splitlines():
131
+ line = line.strip()
132
+ if not line.startswith("{"):
133
+ continue
134
+ try:
135
+ data = json.loads(line)
136
+ except json.JSONDecodeError:
137
+ continue
138
+ if acc_type == AccountType.codex:
139
+ if data.get("type") == "thread.started" and data.get("thread_id"):
140
+ return str(data["thread_id"])
141
+ elif data.get("session_id"):
142
+ return str(data["session_id"])
143
+ return ""
144
+
145
+ # ── 账号管理 ──────────────────────────────────────────
146
+
147
+ def get_accounts(self) -> list[Account]:
148
+ """解析 accounts.conf,返回所有账号"""
149
+ accounts = []
150
+ if not self.accounts_conf.exists():
151
+ return accounts
152
+ for line in self.accounts_conf.read_text(encoding="utf-8").splitlines():
153
+ line = line.strip().rstrip("\r")
154
+ if not line or line.startswith("#"):
155
+ continue
156
+ parts = {}
157
+ for token in line.split():
158
+ if "=" in token:
159
+ k, v = token.split("=", 1)
160
+ parts[k.upper()] = v
161
+ if "NAME" not in parts or "TYPE" not in parts:
162
+ continue
163
+ try:
164
+ acc_type = AccountType(parts["TYPE"])
165
+ auth = AccountAuth(parts.get("AUTH", AccountAuth.login.value))
166
+ proxy = AccountProxy(parts.get("PROXY", AccountProxy.relay.value))
167
+ except ValueError:
168
+ continue
169
+ env_file = parts.get("ENV_FILE", "")
170
+ if auth == AccountAuth.env and acc_type != AccountType.claude:
171
+ continue
172
+ if auth == AccountAuth.env and not env_file:
173
+ env_file = f"./accounts/{parts['NAME']}/env"
174
+ accounts.append(Account(
175
+ name = parts["NAME"],
176
+ type = acc_type,
177
+ auth = auth,
178
+ env_file = env_file,
179
+ proxy = proxy,
180
+ ))
181
+ return accounts
182
+
183
+ def get_projects(self) -> list[Project]:
184
+ projects: list[Project] = []
185
+ if self.projects_conf.exists():
186
+ for line in self.projects_conf.read_text(encoding="utf-8").splitlines():
187
+ line = line.strip().rstrip("\r")
188
+ if not line or line.startswith("#"):
189
+ continue
190
+ parts = {}
191
+ for token in line.split():
192
+ if "=" in token:
193
+ k, v = token.split("=", 1)
194
+ parts[k.upper()] = v
195
+ if "NAME" not in parts or "ACCOUNT" not in parts or "PATH" not in parts:
196
+ continue
197
+ path = parts["PATH"].replace("~", str(Path.home()), 1)
198
+ projects.append(Project(
199
+ name=parts["NAME"],
200
+ account=parts["ACCOUNT"],
201
+ path=path,
202
+ ))
203
+
204
+ return projects
205
+
206
+ def get_busy_accounts(self) -> set[str]:
207
+ """返回当前有 running 任务的账号名集合"""
208
+ busy = set()
209
+ for task in Task.load_all(self.tasks_dir):
210
+ if task.status == TaskStatus.running:
211
+ busy.add(task.account)
212
+ return busy
213
+
214
+ def list_accounts(self) -> list[AccountResponse]:
215
+ # Single pass: collect running tasks + done/failed counts per account
216
+ running_tasks: dict[str, Task] = {}
217
+ done_counts: dict[str, int] = {}
218
+ failed_counts: dict[str, int] = {}
219
+ for task in Task.load_all(self.tasks_dir):
220
+ if task.status == TaskStatus.running:
221
+ if task.account not in running_tasks:
222
+ running_tasks[task.account] = task
223
+ elif task.status == TaskStatus.done:
224
+ done_counts[task.account] = done_counts.get(task.account, 0) + 1
225
+ elif task.status == TaskStatus.failed:
226
+ failed_counts[task.account] = failed_counts.get(task.account, 0) + 1
227
+ busy = set(running_tasks.keys())
228
+
229
+ result = []
230
+ projects_by_account: dict[str, list[str]] = {}
231
+ for project in self.get_projects():
232
+ projects_by_account.setdefault(project.account, []).append(project.name)
233
+ for acc in self.get_accounts():
234
+ project_names = projects_by_account.get(acc.name, [])
235
+ containers = []
236
+ running = False
237
+ for pn in project_names:
238
+ ctr = f"{acc.type.value}-{pn}"
239
+ containers.append(ctr)
240
+ if docker_mgr.is_container_running(ctr):
241
+ running = True
242
+ rt = running_tasks.get(acc.name)
243
+ result.append(AccountResponse(
244
+ name = acc.name,
245
+ type = acc.type,
246
+ auth = acc.auth,
247
+ env_file = acc.env_file,
248
+ proxy = acc.proxy,
249
+ projects = project_names,
250
+ running = running,
251
+ busy = acc.name in busy,
252
+ container = " ".join(containers),
253
+ running_task_id = rt.id if rt else "",
254
+ running_task_prompt = rt.prompt if rt else "",
255
+ task_done_count = done_counts.get(acc.name, 0),
256
+ task_failed_count = failed_counts.get(acc.name, 0),
257
+ ))
258
+ return result
259
+
260
+ def list_projects(self) -> list[Project]:
261
+ return self.get_projects()
262
+
263
+ def find_idle_account(
264
+ self,
265
+ prefer_type: Optional[AccountType] = None,
266
+ prefer_project: Optional[str] = None,
267
+ ) -> Optional[Account]:
268
+ """
269
+ 找一个满足条件的空闲账号:
270
+ - 类型匹配(可选)
271
+ - 项目路径匹配(可选,规范化后比较)
272
+ - 至少有一个项目容器在线
273
+ - 没有 running 任务占用
274
+ """
275
+ busy = self.get_busy_accounts()
276
+
277
+ for acc in self.get_accounts():
278
+ if prefer_type and acc.type != prefer_type:
279
+ continue
280
+ if prefer_project:
281
+ project_match = self.find_project_for_path(prefer_project, acc.name)
282
+ if not project_match:
283
+ continue
284
+ if acc.name in busy:
285
+ continue
286
+ # Check if any project container for this account is running
287
+ account_projects = [p for p in self.get_projects() if p.account == acc.name]
288
+ if not account_projects:
289
+ continue
290
+ any_running = any(
291
+ docker_mgr.is_container_running(p.container_name(acc.type))
292
+ for p in account_projects
293
+ )
294
+ if not any_running:
295
+ continue
296
+ return acc
297
+
298
+ return None
299
+
300
+ @staticmethod
301
+ def _canonical_path(path: str) -> Path:
302
+ return Path(path).expanduser().resolve()
303
+
304
+ def _path_under_root(self, root: str, project: str) -> bool:
305
+ account_root = self._canonical_path(root)
306
+ project_path = self._canonical_path(project)
307
+ try:
308
+ project_path.relative_to(account_root)
309
+ except ValueError:
310
+ return False
311
+ return True
312
+
313
+ def account_can_access_project(self, acc: Account, project: str) -> bool:
314
+ return any(
315
+ p.account == acc.name and self._path_under_root(p.path, project)
316
+ for p in self.get_projects()
317
+ )
318
+
319
+ def find_project_by_name(self, name: str) -> Optional[Project]:
320
+ return next((p for p in self.get_projects() if p.name == name), None)
321
+
322
+ def find_project_for_path(self, project: Optional[str], account: Optional[str] = None) -> Optional[Project]:
323
+ if not project:
324
+ return None
325
+ matching = [
326
+ p for p in self.get_projects()
327
+ if self._path_under_root(p.path, project)
328
+ ]
329
+ if account:
330
+ matching = [p for p in matching if p.account == account]
331
+ if not matching:
332
+ return None
333
+ return max(matching, key=lambda p: len(str(self._canonical_path(p.path))))
334
+
335
+ def resolve_task_project(self, acc: Account, project: Optional[str]) -> str:
336
+ if not project:
337
+ account_projects = [p for p in self.get_projects() if p.account == acc.name]
338
+ if len(account_projects) == 1:
339
+ return str(self._canonical_path(account_projects[0].path))
340
+ if not account_projects:
341
+ raise ValueError(f"账号 '{acc.name}' 未关联项目,请先创建项目")
342
+ raise ValueError(f"账号 '{acc.name}' 关联了多个项目,请选择项目")
343
+ if not self.account_can_access_project(acc, project):
344
+ raise ValueError(f"项目 '{project}' 未关联账号 '{acc.name}'")
345
+ return str(self._canonical_path(project))
346
+
347
+ def container_workdir_for_project(self, owner: Project, project: str) -> str:
348
+ account_root = self._canonical_path(owner.path)
349
+ project_path = self._canonical_path(project)
350
+ rel = project_path.relative_to(account_root)
351
+ if str(rel) == ".":
352
+ return "/workspace"
353
+ return "/workspace/" + rel.as_posix()
354
+
355
+ # ── 任务管理 ──────────────────────────────────────────
356
+
357
+ @staticmethod
358
+ def new_task_id() -> str:
359
+ ts = datetime.now().strftime("%Y%m%d%H%M%S")
360
+ rand = random.randint(0, 9999)
361
+ return f"{ts}-{rand:04d}"
362
+
363
+ def list_tasks(self) -> list[Task]:
364
+ return Task.load_all(self.tasks_dir)
365
+
366
+ def get_task(self, task_id: str) -> Optional[Task]:
367
+ path = self.tasks_dir / f"{task_id}.json"
368
+ if not path.exists():
369
+ return None
370
+ return Task.load(path)
371
+
372
+ def get_log_path(self, task_id: str) -> Path:
373
+ return self.tasks_dir / f"{task_id}.log"
374
+
375
+ @staticmethod
376
+ def new_conversation_id() -> str:
377
+ ts = datetime.now().strftime("%Y%m%d%H%M%S")
378
+ rand = random.randint(0, 9999)
379
+ return f"conv-{ts}-{rand:04d}"
380
+
381
+ def list_conversations(self, include_archived: bool = False) -> list[Conversation]:
382
+ convs = Conversation.load_all(self.conversations_dir)
383
+ if not include_archived:
384
+ convs = [c for c in convs if c.status != ConversationStatus.archived]
385
+ return convs
386
+
387
+ def archive_conversation(self, conversation_id: str, status: ConversationStatus) -> Conversation:
388
+ conv = self.get_conversation(conversation_id)
389
+ if conv is None:
390
+ raise ValueError(f"任务链 '{conversation_id}' 不存在")
391
+ conv.status = status
392
+ conv.save(self.conversations_dir)
393
+ return conv
394
+
395
+ def delete_conversation(self, conversation_id: str) -> None:
396
+ path = self.conversations_dir / f"{conversation_id}.json"
397
+ if not path.exists():
398
+ raise ValueError(f"任务链 '{conversation_id}' 不存在")
399
+ path.unlink()
400
+
401
+ def get_conversation(self, conversation_id: str) -> Optional[Conversation]:
402
+ path = self.conversations_dir / f"{conversation_id}.json"
403
+ if not path.exists():
404
+ return None
405
+ return Conversation.load(path)
406
+
407
+ def ensure_conversation_available(self, conversation: Conversation) -> None:
408
+ for task in Task.load_all(self.tasks_dir):
409
+ if task.conversation_id == conversation.id and task.status == TaskStatus.running:
410
+ raise RuntimeError(f"任务链 '{conversation.name}' 正在运行,请等待当前任务结束")
411
+
412
+ def update_conversation_native_session(
413
+ self,
414
+ conversation_id: str,
415
+ native_session_id: str,
416
+ task_id: str,
417
+ ) -> None:
418
+ conversation = self.get_conversation(conversation_id)
419
+ if conversation is None:
420
+ return
421
+ conversation.touch(
422
+ self.conversations_dir,
423
+ native_session_id=native_session_id,
424
+ last_task_id=task_id,
425
+ )
426
+
427
+ def _create_conversation(self, name: str, acc: Account, project: Project, task_project: str) -> Conversation:
428
+ conversation = Conversation(
429
+ id = self.new_conversation_id(),
430
+ name = name,
431
+ account = acc.name,
432
+ type = acc.type,
433
+ project = task_project,
434
+ project_name = project.name,
435
+ )
436
+ conversation.save(self.conversations_dir)
437
+ return conversation
438
+
439
+ # ── 提交任务 ──────────────────────────────────────────
440
+
441
+ # ── 定时与排队调度 ────────────────────────────────────────
442
+
443
+ def start_scheduling_loop(self) -> None:
444
+ if self._loop_task is None or self._loop_task.done():
445
+ self._loop_task = asyncio.create_task(
446
+ self._schedule_pending_tasks_loop(),
447
+ name="scheduler-pending-loop",
448
+ )
449
+
450
+ async def _schedule_pending_tasks_loop(self) -> None:
451
+ while True:
452
+ try:
453
+ await self.schedule_next_tasks()
454
+ except Exception as e:
455
+ import traceback
456
+ print("Error in schedule_next_tasks:")
457
+ traceback.print_exc()
458
+ await asyncio.sleep(1.0)
459
+
460
+ async def schedule_next_tasks(self) -> None:
461
+ now = datetime.now()
462
+
463
+ # 1. 扫描并触发已到时间的定时任务 (scheduled -> pending)
464
+ all_tasks = Task.load_all(self.tasks_dir)
465
+ for t in all_tasks:
466
+ if t.status == TaskStatus.scheduled and t.execute_at:
467
+ try:
468
+ dt = datetime.fromisoformat(t.execute_at)
469
+ if now >= dt:
470
+ t.update_status(TaskStatus.pending, self.tasks_dir)
471
+ except Exception as e:
472
+ t.update_status(TaskStatus.failed, self.tasks_dir)
473
+ self._write_failed_log(t, f"定时时间解析失败:{e}")
474
+
475
+ # 2. 扫描并运行 pending 任务
476
+ pending_tasks = [t for t in Task.load_all(self.tasks_dir) if t.status == TaskStatus.pending]
477
+ if not pending_tasks:
478
+ return
479
+
480
+ # 按照创建时间升序排列,先进先出
481
+ pending_tasks.sort(key=lambda t: t.created or "")
482
+ busy_accounts = self.get_busy_accounts()
483
+
484
+ for task in pending_tasks:
485
+ if task.account not in busy_accounts:
486
+ busy_accounts.add(task.account)
487
+ # 异步拉起执行该 pending 任务
488
+ await self._start_pending_task(task)
489
+
490
+ async def _start_pending_task(self, task: Task) -> None:
491
+ try:
492
+ acc = next((a for a in self.get_accounts() if a.name == task.account), None)
493
+ if acc is None:
494
+ task.update_status(TaskStatus.failed, self.tasks_dir)
495
+ self._write_failed_log(task, f"账号 '{task.account}' 不存在")
496
+ return
497
+
498
+ project = self.find_project_for_path(task.project, task.account)
499
+ if project is None:
500
+ task.update_status(TaskStatus.failed, self.tasks_dir)
501
+ self._write_failed_log(task, f"项目路径 '{task.project}' 未配置")
502
+ return
503
+
504
+ container_name = project.container_name(acc.type)
505
+ container_workdir = self.container_workdir_for_project(project, task.project)
506
+
507
+ if not docker_mgr.is_container_running(container_name):
508
+ task.update_status(TaskStatus.failed, self.tasks_dir)
509
+ self._write_failed_log(task, f"容器 {container_name} 未运行")
510
+ return
511
+
512
+ # 加载对应的 conversation(如果有)
513
+ conversation = self.get_conversation(task.conversation_id) if task.conversation_id else None
514
+
515
+ # 转换为运行状态
516
+ task.update_status(TaskStatus.running, self.tasks_dir)
517
+
518
+ # 写日志头
519
+ log_path = self.get_log_path(task.id)
520
+ self._write_log_header(log_path, task, acc, container_workdir, container_name)
521
+
522
+ # 异步后台执行
523
+ bg = asyncio.create_task(
524
+ self._run(
525
+ task,
526
+ acc,
527
+ log_path,
528
+ getattr(task, "auto", False),
529
+ conversation,
530
+ container_workdir,
531
+ container_name,
532
+ getattr(task, "images", []),
533
+ ),
534
+ name=f"task-{task.id}",
535
+ )
536
+ self._running[task.id] = bg
537
+
538
+ except Exception as e:
539
+ task.update_status(TaskStatus.failed, self.tasks_dir)
540
+ self._write_failed_log(task, f"拉起任务失败:{e}")
541
+
542
+ def _write_failed_log(self, task: Task, reason: str) -> None:
543
+ log_path = self.get_log_path(task.id)
544
+ log_path.parent.mkdir(parents=True, exist_ok=True)
545
+ with log_path.open("w", encoding="utf-8") as f:
546
+ f.write("=== CoderFleet Task Log ===\n")
547
+ f.write(f"id: {task.id}\n")
548
+ f.write(f"status: failed\n")
549
+ f.write(f"prompt: {task.prompt}\n")
550
+ f.write(f"error: {reason}\n")
551
+ f.write("=" * 38 + "\n\n")
552
+ f.write(f"任务启动失败:{reason}\n")
553
+
554
+ async def submit(
555
+ self,
556
+ prompt: str,
557
+ account_name: Optional[str] = None,
558
+ prefer_project: Optional[str] = None,
559
+ prefer_type: Optional[AccountType] = None,
560
+ auto: bool = False,
561
+ conversation_id: Optional[str] = None,
562
+ conversation_name: Optional[str] = None,
563
+ project_name: Optional[str] = None,
564
+ images: list[str] = [],
565
+ execute_at: Optional[str] = None,
566
+ ) -> Task:
567
+ """
568
+ 提交任务,异步在后台执行,立即返回 Task 对象。
569
+ 调用方可以通过 task.id 跟踪进度。
570
+ """
571
+ is_pending = False
572
+ conversation: Optional[Conversation] = None
573
+
574
+ if conversation_id:
575
+ conversation = self.get_conversation(conversation_id)
576
+ if conversation is None:
577
+ raise ValueError(f"任务链 '{conversation_id}' 不存在")
578
+
579
+ # 判断任务链是否有正在运行的任务
580
+ has_running_in_conv = any(
581
+ t.conversation_id == conversation.id and t.status == TaskStatus.running
582
+ for t in Task.load_all(self.tasks_dir)
583
+ )
584
+ if has_running_in_conv:
585
+ is_pending = True
586
+
587
+ account_name = conversation.account
588
+ prefer_type = conversation.type
589
+ prefer_project = conversation.project
590
+ project_name = conversation.project_name or project_name
591
+
592
+ selected_project: Optional[Project] = None
593
+ if project_name:
594
+ selected_project = self.find_project_by_name(project_name)
595
+ if selected_project is None:
596
+ raise ValueError(f"项目 '{project_name}' 不存在")
597
+ if account_name and account_name != selected_project.account:
598
+ raise ValueError(
599
+ f"项目 '{project_name}' 关联账号为 {selected_project.account},与指定账号 {account_name} 不一致"
600
+ )
601
+ account_name = selected_project.account
602
+ prefer_project = selected_project.path
603
+
604
+ # 确定账号
605
+ acc: Optional[Account] = None
606
+ if account_name:
607
+ acc = next((a for a in self.get_accounts() if a.name == account_name), None)
608
+ if acc is None:
609
+ raise ValueError(f"账号 '{account_name}' 不存在")
610
+ if prefer_type and acc.type != prefer_type:
611
+ raise ValueError(
612
+ f"账号 '{account_name}' 类型为 {acc.type.value},与筛选类型 {prefer_type.value} 不一致"
613
+ )
614
+ # 若账号忙碌,则标记为排队
615
+ if acc.name in self.get_busy_accounts():
616
+ is_pending = True
617
+ else:
618
+ # 优先寻找空闲账号
619
+ acc = self.find_idle_account(
620
+ prefer_type = prefer_type,
621
+ prefer_project = prefer_project,
622
+ )
623
+ if acc is None:
624
+ # 寻找支持该项目且对应的项目容器在线的忙碌账号
625
+ matching_busy_accounts = []
626
+ for a in self.get_accounts():
627
+ if prefer_type and a.type != prefer_type:
628
+ continue
629
+ if prefer_project:
630
+ project_match = self.find_project_for_path(prefer_project, a.name)
631
+ if not project_match:
632
+ continue
633
+ account_projects = [p for p in self.get_projects() if p.account == a.name]
634
+ if not account_projects:
635
+ continue
636
+ any_running = any(
637
+ docker_mgr.is_container_running(p.container_name(a.type))
638
+ for p in account_projects
639
+ )
640
+ if not any_running:
641
+ continue
642
+ matching_busy_accounts.append(a)
643
+
644
+ if not matching_busy_accounts:
645
+ hints = []
646
+ if prefer_project:
647
+ hints.append(f"项目:{prefer_project}")
648
+ if prefer_type:
649
+ hints.append(f"类型:{prefer_type.value}")
650
+ hint_str = ",".join(hints)
651
+ raise RuntimeError(
652
+ f"没有匹配的空闲账号{('(' + hint_str + ')') if hint_str else ''}"
653
+ )
654
+
655
+ # 分配第一个可用的忙碌账号并标记为 pending
656
+ acc = matching_busy_accounts[0]
657
+ is_pending = True
658
+
659
+ task_project = self.resolve_task_project(acc, prefer_project)
660
+ selected_project = selected_project or self.find_project_for_path(task_project, acc.name)
661
+ if selected_project is None:
662
+ raise ValueError(f"项目 '{task_project}' 未配置")
663
+ container_name = selected_project.container_name(acc.type)
664
+ container_workdir = self.container_workdir_for_project(selected_project, task_project)
665
+
666
+ if not docker_mgr.is_container_running(container_name):
667
+ raise RuntimeError(f"容器 {container_name} 未运行")
668
+
669
+ if conversation is None and conversation_name:
670
+ conversation = self._create_conversation(conversation_name, acc, selected_project, task_project)
671
+
672
+ # 创建任务记录
673
+ task_id = self.new_task_id()
674
+
675
+ # ── 处理定时任务 ──
676
+ if execute_at:
677
+ try:
678
+ dt = datetime.fromisoformat(execute_at)
679
+ if dt > datetime.now():
680
+ task = Task(
681
+ id = task_id,
682
+ status = TaskStatus.scheduled,
683
+ account = acc.name,
684
+ type = acc.type,
685
+ prompt = prompt,
686
+ project = task_project,
687
+ project_name = selected_project.name,
688
+ conversation_id = conversation.id if conversation else "",
689
+ native_session_id = conversation.native_session_id if conversation else "",
690
+ auto = auto,
691
+ images = images,
692
+ execute_at = execute_at,
693
+ )
694
+ task.save(self.tasks_dir)
695
+
696
+ # 写入空日志文件防 SSE 404
697
+ log_path = self.get_log_path(task_id)
698
+ log_path.parent.mkdir(parents=True, exist_ok=True)
699
+ log_path.write_text("", encoding="utf-8")
700
+
701
+ return task
702
+ except ValueError:
703
+ pass
704
+
705
+ # ── 处理排队任务 ──
706
+ if is_pending:
707
+ task = Task(
708
+ id = task_id,
709
+ status = TaskStatus.pending,
710
+ account = acc.name,
711
+ type = acc.type,
712
+ prompt = prompt,
713
+ project = task_project,
714
+ project_name = selected_project.name,
715
+ conversation_id = conversation.id if conversation else "",
716
+ native_session_id = conversation.native_session_id if conversation else "",
717
+ auto = auto,
718
+ images = images,
719
+ )
720
+ task.save(self.tasks_dir)
721
+
722
+ # 写入空日志文件防 SSE 404
723
+ log_path = self.get_log_path(task_id)
724
+ log_path.parent.mkdir(parents=True, exist_ok=True)
725
+ log_path.write_text("", encoding="utf-8")
726
+
727
+ # 立即触发一次调度检查
728
+ asyncio.create_task(self.schedule_next_tasks())
729
+ return task
730
+
731
+ # ── 立即执行任务 ──
732
+ log_path = self.get_log_path(task_id)
733
+ task = Task(
734
+ id = task_id,
735
+ status = TaskStatus.running,
736
+ account = acc.name,
737
+ type = acc.type,
738
+ prompt = prompt,
739
+ project = task_project,
740
+ project_name = selected_project.name,
741
+ conversation_id = conversation.id if conversation else "",
742
+ native_session_id = conversation.native_session_id if conversation else "",
743
+ auto = auto,
744
+ images = images,
745
+ )
746
+ task.save(self.tasks_dir)
747
+
748
+ # 写日志头
749
+ self._write_log_header(log_path, task, acc, container_workdir, container_name)
750
+
751
+ # 异步后台执行
752
+ bg = asyncio.create_task(
753
+ self._run(task, acc, log_path, auto, conversation, container_workdir, container_name, images),
754
+ name=f"task-{task_id}",
755
+ )
756
+ self._running[task_id] = bg
757
+
758
+ return task
759
+
760
+ # ── 从已有任务创建任务链 ──────────────────────────────
761
+
762
+ def create_conversation_from_task(
763
+ self,
764
+ name: str,
765
+ task_id: str,
766
+ ) -> Conversation:
767
+ """
768
+ 从已有任务(需有 native_session_id)创建任务链。
769
+ 之后可通过 conversation_id 续接该会话上下文。
770
+ """
771
+ task = self.get_task(task_id)
772
+ if task is None:
773
+ raise ValueError(f"任务 '{task_id}' 不存在")
774
+ if not task.native_session_id:
775
+ raise ValueError(
776
+ f"任务 '{task_id}' 没有 native_session_id,"
777
+ "可能是较早的任务(未开启流式 JSON 输出模式)"
778
+ )
779
+ acc = next((a for a in self.get_accounts() if a.name == task.account), None)
780
+ if acc is None:
781
+ raise ValueError(f"账号 '{task.account}' 不存在")
782
+ project = self.find_project_for_path(task.project, task.account)
783
+ if project is None:
784
+ raise ValueError(f"项目路径 '{task.project}' 未配置")
785
+
786
+ conversation = Conversation(
787
+ id = self.new_conversation_id(),
788
+ name = name,
789
+ account = acc.name,
790
+ type = acc.type,
791
+ project = task.project,
792
+ project_name = project.name,
793
+ native_session_id = task.native_session_id,
794
+ last_task_id = task_id,
795
+ )
796
+ conversation.save(self.conversations_dir)
797
+ return conversation
798
+
799
+ def _write_log_header(self, log_path: Path, task: Task, acc: Account, container_workdir: str = "", container_name: str = "") -> None:
800
+ log_path.parent.mkdir(parents=True, exist_ok=True)
801
+ with log_path.open("a", encoding="utf-8") as f:
802
+ f.write("=== CoderFleet Task Log ===\n")
803
+ f.write(f"id: {task.id}\n")
804
+ f.write(f"account: {acc.name} ({acc.type.value})\n")
805
+ f.write(f"project: {task.project}\n")
806
+ if container_name:
807
+ f.write(f"container: {container_name}\n")
808
+ if container_workdir:
809
+ f.write(f"container cwd: {container_workdir}\n")
810
+ if task.conversation_id:
811
+ f.write(f"conversation: {task.conversation_id}\n")
812
+ if task.native_session_id:
813
+ f.write(f"native session: {task.native_session_id}\n")
814
+ escaped_prompt = task.prompt.replace('\r\n', '\\n').replace('\n', '\\n') if task.prompt else ""
815
+ f.write(f"prompt: {escaped_prompt}\n")
816
+ f.write(f"started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
817
+ f.write("=" * 38 + "\n\n")
818
+
819
+ async def _stream_container_log(
820
+ self,
821
+ task: Task,
822
+ acc: Account,
823
+ log_path: Path,
824
+ host_log: Path,
825
+ host_exit: Path,
826
+ conversation: Optional[Conversation] = None,
827
+ start_offset: int = 0,
828
+ ) -> int:
829
+ """
830
+ 宿主机侧日志轮询:将容器写入 host_log 的新内容同步到 log_path,
831
+ 检测到 host_exit 文件出现后退出,返回 exit code。
832
+ CancelledError 不在此处捕获,由调用方处理 kill 逻辑。
833
+ """
834
+ import aiofiles
835
+ last_size = start_offset
836
+ captured_session_id = ""
837
+ container_name = self._get_task_container(task) or ""
838
+
839
+ async with aiofiles.open(log_path, mode="a", encoding="utf-8") as f:
840
+ while True:
841
+ await asyncio.sleep(0.3)
842
+ is_done = host_exit.exists()
843
+
844
+ if host_log.exists():
845
+ cur_size = host_log.stat().st_size
846
+ if cur_size > last_size:
847
+ async with aiofiles.open(host_log, mode="rb") as hf:
848
+ await hf.seek(last_size)
849
+ new_bytes = await hf.read()
850
+ text = new_bytes.decode("utf-8", errors="replace")
851
+ await f.write(text)
852
+ await f.flush()
853
+ last_size = cur_size
854
+
855
+ if not captured_session_id:
856
+ captured_session_id = self.extract_native_session_id(acc.type, text)
857
+ if captured_session_id:
858
+ task.native_session_id = captured_session_id
859
+ task.save(self.tasks_dir)
860
+ if conversation:
861
+ self.update_conversation_native_session(
862
+ conversation.id,
863
+ captured_session_id,
864
+ task.id,
865
+ )
866
+
867
+ if is_done:
868
+ break
869
+
870
+ rc = -1
871
+ if host_exit.exists():
872
+ try:
873
+ rc = int(host_exit.read_text().strip())
874
+ except (ValueError, OSError):
875
+ rc = -1
876
+
877
+ if rc == 0:
878
+ if conversation:
879
+ if not conversation.native_session_id and captured_session_id:
880
+ self.update_conversation_native_session(conversation.id, captured_session_id, task.id)
881
+ else:
882
+ conversation.touch(self.conversations_dir, last_task_id=task.id)
883
+ task.update_status(TaskStatus.done, self.tasks_dir)
884
+ await self._append_usage_status(log_path, acc, container_name)
885
+ self._append_log_footer(log_path, "done")
886
+ else:
887
+ task.update_status(TaskStatus.failed, self.tasks_dir)
888
+ await self._append_usage_status(log_path, acc, container_name)
889
+ self._append_log_footer(log_path, f"failed (exit={rc})")
890
+
891
+ return rc
892
+
893
+ async def _run(
894
+ self,
895
+ task: Task,
896
+ acc: Account,
897
+ log_path: Path,
898
+ auto: bool,
899
+ conversation: Optional[Conversation] = None,
900
+ container_workdir: str = "",
901
+ container_name: str = "",
902
+ images: list[str] = [],
903
+ ) -> None:
904
+ """后台协程:以 detached 方式启动容器任务,再轮询宿主机日志文件跟踪进度。"""
905
+ try:
906
+ cmd = self.build_cli_command(
907
+ acc.type,
908
+ task.prompt,
909
+ auto,
910
+ task.id,
911
+ native_session_id=conversation.native_session_id if conversation else "",
912
+ container_workdir=container_workdir,
913
+ images=images,
914
+ )
915
+
916
+ proc = await asyncio.create_subprocess_exec(
917
+ "docker", "exec", container_name, "bash", "-c", cmd,
918
+ stdout=asyncio.subprocess.PIPE,
919
+ stderr=asyncio.subprocess.STDOUT,
920
+ )
921
+ await proc.wait()
922
+
923
+ if proc.returncode != 0:
924
+ err = (await proc.stdout.read()).decode("utf-8", errors="replace") if proc.stdout else ""
925
+ raise RuntimeError(f"docker exec failed (exit={proc.returncode}): {err.strip()}")
926
+
927
+ project_root = self._get_project_root(task)
928
+ if project_root is None:
929
+ raise RuntimeError(f"找不到任务 {task.id} 对应的项目根目录")
930
+
931
+ host_log = self._host_task_log(project_root, task.id)
932
+ host_exit = self._host_task_exit(project_root, task.id)
933
+
934
+ await self._stream_container_log(
935
+ task, acc, log_path, host_log, host_exit, conversation
936
+ )
937
+
938
+ except asyncio.CancelledError:
939
+ self.kill_task_process(task)
940
+ task.update_status(TaskStatus.killed, self.tasks_dir)
941
+ self._append_log_footer(log_path, "killed")
942
+ return
943
+ except Exception as e:
944
+ task.update_status(TaskStatus.failed, self.tasks_dir)
945
+ await self._append_usage_status(log_path, acc, container_name)
946
+ self._append_log_footer(log_path, f"failed: {e}")
947
+ return
948
+ finally:
949
+ self._running.pop(task.id, None)
950
+ self._cleanup_container_task_files(task)
951
+ asyncio.create_task(self.schedule_next_tasks())
952
+
953
+ async def _reattach(self, task: Task) -> None:
954
+ """重新 attach 到一个在 Python 重启后仍存活的容器进程,继续跟踪其日志。"""
955
+ log_path = self.get_log_path(task.id)
956
+
957
+ acc = next((a for a in self.get_accounts() if a.name == task.account), None)
958
+ if acc is None:
959
+ task.update_status(TaskStatus.failed, self.tasks_dir)
960
+ self._append_log_footer(log_path, "failed: account not found on reattach")
961
+ return
962
+
963
+ project_root = self._get_project_root(task)
964
+ if project_root is None:
965
+ task.update_status(TaskStatus.failed, self.tasks_dir)
966
+ self._append_log_footer(log_path, "failed: project root not found on reattach")
967
+ return
968
+
969
+ host_log = self._host_task_log(project_root, task.id)
970
+ host_exit = self._host_task_exit(project_root, task.id)
971
+ conversation = self.get_conversation(task.conversation_id) if task.conversation_id else None
972
+ start_offset = host_log.stat().st_size if host_log.exists() else 0
973
+
974
+ try:
975
+ self._append_log_footer(log_path, "reattached after server restart")
976
+ await self._stream_container_log(
977
+ task, acc, log_path, host_log, host_exit, conversation,
978
+ start_offset=start_offset,
979
+ )
980
+ except asyncio.CancelledError:
981
+ self.kill_task_process(task)
982
+ task.update_status(TaskStatus.killed, self.tasks_dir)
983
+ self._append_log_footer(log_path, "killed")
984
+ return
985
+ except Exception as e:
986
+ task.update_status(TaskStatus.failed, self.tasks_dir)
987
+ self._append_log_footer(log_path, f"failed during reattach: {e}")
988
+ return
989
+ finally:
990
+ self._running.pop(task.id, None)
991
+ self._cleanup_container_task_files(task)
992
+ asyncio.create_task(self.schedule_next_tasks())
993
+
994
+ async def _append_usage_status(self, log_path: Path, acc: Account, container_name: str) -> None:
995
+ cmd = self.build_usage_status_command(acc.type)
996
+ if not cmd or not container_name:
997
+ return
998
+
999
+ try:
1000
+ with log_path.open("a", encoding="utf-8") as f:
1001
+ f.write("\n" + "=" * 38 + "\n")
1002
+ f.write("usage status:\n")
1003
+
1004
+ proc = await asyncio.create_subprocess_exec(
1005
+ "docker",
1006
+ "exec",
1007
+ container_name,
1008
+ "bash",
1009
+ "-lc",
1010
+ cmd,
1011
+ stdout=asyncio.subprocess.PIPE,
1012
+ stderr=asyncio.subprocess.STDOUT,
1013
+ )
1014
+ stdout, _ = await asyncio.wait_for(proc.communicate(), timeout=45)
1015
+ text = stdout.decode("utf-8", errors="replace").strip()
1016
+ if not text:
1017
+ text = "未获取到用量信息"
1018
+ except Exception as e:
1019
+ text = f"用量检查失败:{e}"
1020
+
1021
+ try:
1022
+ with log_path.open("a", encoding="utf-8") as f:
1023
+ f.write(text + "\n")
1024
+ except Exception:
1025
+ pass
1026
+
1027
+ def _append_log_footer(self, log_path: Path, result: str) -> None:
1028
+ try:
1029
+ with log_path.open("a", encoding="utf-8") as f:
1030
+ f.write("\n" + "=" * 38 + "\n")
1031
+ f.write(f"finished: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} [{result}]\n")
1032
+ except Exception:
1033
+ pass
1034
+
1035
+ def _get_task_container(self, task: Task) -> Optional[str]:
1036
+ """通过任务记录的 project 路径找到对应项目,推导容器名。"""
1037
+ project = self.find_project_for_path(task.project, task.account)
1038
+ if project is None:
1039
+ return None
1040
+ return project.container_name(task.type)
1041
+
1042
+ def _get_project_root(self, task: Task) -> Optional[Path]:
1043
+ """返回任务所属项目在宿主机的根目录(/workspace 在容器内挂载的目标)。"""
1044
+ project = self.find_project_for_path(task.project, task.account)
1045
+ if project is None:
1046
+ return None
1047
+ return Path(project.path)
1048
+
1049
+ @staticmethod
1050
+ def _host_task_log(project_root: Path, task_id: str) -> Path:
1051
+ return project_root / ".coderfleet-tasks" / f"{task_id}.log"
1052
+
1053
+ @staticmethod
1054
+ def _host_task_exit(project_root: Path, task_id: str) -> Path:
1055
+ return project_root / ".coderfleet-tasks" / f"{task_id}.exit"
1056
+
1057
+ def _cleanup_container_task_files(self, task: Task) -> None:
1058
+ project_root = self._get_project_root(task)
1059
+ if project_root is None:
1060
+ return
1061
+ self._host_task_log(project_root, task.id).unlink(missing_ok=True)
1062
+ self._host_task_exit(project_root, task.id).unlink(missing_ok=True)
1063
+
1064
+ def is_task_process_alive(self, task: Task) -> bool:
1065
+ container = self._get_task_container(task)
1066
+ if container is None:
1067
+ return False
1068
+ marker = shlex.quote(self.task_process_marker(task.id))
1069
+ result = subprocess.run(
1070
+ [
1071
+ "docker",
1072
+ "exec",
1073
+ container,
1074
+ "bash",
1075
+ "-lc",
1076
+ f"pgrep -af {marker} >/dev/null",
1077
+ ],
1078
+ stdout=subprocess.DEVNULL,
1079
+ stderr=subprocess.DEVNULL,
1080
+ check=False,
1081
+ )
1082
+ return result.returncode == 0
1083
+
1084
+ def kill_task_process(self, task: Task) -> None:
1085
+ container = self._get_task_container(task)
1086
+ if container is None:
1087
+ return
1088
+ marker = shlex.quote(self.task_process_marker(task.id))
1089
+ subprocess.run(
1090
+ [
1091
+ "docker",
1092
+ "exec",
1093
+ container,
1094
+ "bash",
1095
+ "-lc",
1096
+ f"pkill -TERM -f {marker} || true",
1097
+ ],
1098
+ stdout=subprocess.DEVNULL,
1099
+ stderr=subprocess.DEVNULL,
1100
+ check=False,
1101
+ )
1102
+
1103
+ async def reconcile_running_tasks(self) -> int:
1104
+ """
1105
+ 服务启动时调用:对状态仍为 running 的任务做恢复处理。
1106
+ - 容器进程仍存活 → 重新 attach,任务继续执行
1107
+ - 容器进程已消亡 → 读 exit 文件恢复最终状态,或标记 failed
1108
+ """
1109
+ reconciled = 0
1110
+ for task in Task.load_all(self.tasks_dir):
1111
+ if task.status != TaskStatus.running:
1112
+ continue
1113
+
1114
+ if self.is_task_process_alive(task):
1115
+ bg = asyncio.create_task(
1116
+ self._reattach(task),
1117
+ name=f"task-{task.id}",
1118
+ )
1119
+ self._running[task.id] = bg
1120
+ reconciled += 1
1121
+ continue
1122
+
1123
+ # 进程已消亡:尝试从 exit 文件恢复状态
1124
+ project_root = self._get_project_root(task)
1125
+ if project_root is not None:
1126
+ exit_file = self._host_task_exit(project_root, task.id)
1127
+ if exit_file.exists():
1128
+ try:
1129
+ rc = int(exit_file.read_text().strip())
1130
+ status = TaskStatus.done if rc == 0 else TaskStatus.failed
1131
+ result = "done" if rc == 0 else f"failed (exit={rc})"
1132
+ except (ValueError, OSError):
1133
+ status = TaskStatus.failed
1134
+ result = "failed: server restarted; could not read exit code"
1135
+ else:
1136
+ status = TaskStatus.failed
1137
+ result = "failed: server restarted; no container process or exit file found"
1138
+ else:
1139
+ status = TaskStatus.failed
1140
+ result = "failed: server restarted; project root not found"
1141
+
1142
+ task.update_status(status, self.tasks_dir)
1143
+ self._append_log_footer(self.get_log_path(task.id), result)
1144
+ self._cleanup_container_task_files(task)
1145
+ reconciled += 1
1146
+
1147
+ return reconciled
1148
+
1149
+ # ── 终止任务 ──────────────────────────────────────────
1150
+
1151
+ async def kill_task(self, task_id: str) -> Task:
1152
+ task = self.get_task(task_id)
1153
+ if task is None:
1154
+ raise ValueError(f"任务 '{task_id}' 不存在")
1155
+ if task.status not in (TaskStatus.running, TaskStatus.pending, TaskStatus.scheduled):
1156
+ raise RuntimeError(f"任务状态为 '{task.status.value}',只能终止 running、pending 或 scheduled 状态的任务")
1157
+
1158
+ if task.status in (TaskStatus.pending, TaskStatus.scheduled):
1159
+ old_status = task.status
1160
+ task.update_status(TaskStatus.killed, self.tasks_dir)
1161
+ reason = "killed by user (cancelled schedule)" if old_status == TaskStatus.scheduled else "killed by user (while pending)"
1162
+ self._append_log_footer(self.get_log_path(task_id), reason)
1163
+ # 异步触发一次调度,确保释放该队列的后续处理(以防万一)
1164
+ asyncio.create_task(self.schedule_next_tasks())
1165
+ return task
1166
+
1167
+ # 先更新状态防止并发写入
1168
+ task.update_status(TaskStatus.killed, self.tasks_dir)
1169
+
1170
+ # 取消后台协程
1171
+ self.kill_task_process(task)
1172
+ bg = self._running.pop(task_id, None)
1173
+ if bg and not bg.done():
1174
+ bg.cancel()
1175
+ try:
1176
+ await asyncio.wait_for(asyncio.shield(bg), timeout=3)
1177
+ except (asyncio.CancelledError, asyncio.TimeoutError):
1178
+ pass
1179
+
1180
+ self._append_log_footer(self.get_log_path(task_id), "killed by user")
1181
+ # 触发下一次调度
1182
+ asyncio.create_task(self.schedule_next_tasks())
1183
+ return task
1184
+
1185
+ def delete_task(self, task_id: str) -> None:
1186
+ task = self.get_task(task_id)
1187
+ if task and task.status == TaskStatus.running:
1188
+ raise RuntimeError(f"任务 '{task_id}' 正在运行,无法删除")
1189
+ json_path = self.tasks_dir / f"{task_id}.json"
1190
+ log_path = self.tasks_dir / f"{task_id}.log"
1191
+ if not json_path.exists():
1192
+ raise ValueError(f"任务 '{task_id}' 不存在")
1193
+ json_path.unlink(missing_ok=True)
1194
+ log_path.unlink(missing_ok=True)
1195
+
1196
+ def archive_task(self, task_id: str, archived: bool) -> Task:
1197
+ task = self.get_task(task_id)
1198
+ if task is None:
1199
+ raise ValueError(f"任务 '{task_id}' 不存在")
1200
+ task.archived = archived
1201
+ task.save(self.tasks_dir)
1202
+ return task
1203
+
1204
+ # ── 清理旧记录 ────────────────────────────────────────
1205
+
1206
+ def clean_tasks(self, keep: int = 30) -> int:
1207
+ all_tasks = Task.load_all(self.tasks_dir)
1208
+ if len(all_tasks) <= keep:
1209
+ return 0
1210
+ cleaned = 0
1211
+ for task in all_tasks[keep:]:
1212
+ if task.status == TaskStatus.running:
1213
+ continue
1214
+ json_path = self.tasks_dir / f"{task.id}.json"
1215
+ log_path = self.tasks_dir / f"{task.id}.log"
1216
+ json_path.unlink(missing_ok=True)
1217
+ log_path.unlink(missing_ok=True)
1218
+ cleaned += 1
1219
+ return cleaned