coderfleet 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- coderfleet/__init__.py +1 -0
- coderfleet/__main__.py +4 -0
- coderfleet/cli.py +212 -0
- coderfleet/compose.py +176 -0
- coderfleet/config.py +69 -0
- coderfleet/config_cmds.py +243 -0
- coderfleet/data/Dockerfile +92 -0
- coderfleet/data/__init__.py +0 -0
- coderfleet/data/accounts.conf.example +26 -0
- coderfleet/data/config.conf.example +31 -0
- coderfleet/data/entrypoint.sh +56 -0
- coderfleet/data/projects.conf.example +17 -0
- coderfleet/data/scripts/coderfleet_usage_status.py +138 -0
- coderfleet/docker_ops.py +385 -0
- coderfleet/init_wizard.py +227 -0
- coderfleet/login_cmd.py +168 -0
- coderfleet/server/__init__.py +0 -0
- coderfleet/server/docker_mgr.py +45 -0
- coderfleet/server/main.py +546 -0
- coderfleet/server/models.py +285 -0
- coderfleet/server/scheduler.py +1219 -0
- coderfleet/server/static/css/main.css +2906 -0
- coderfleet/server/static/index.html +378 -0
- coderfleet/server/static/js/accounts.js +85 -0
- coderfleet/server/static/js/app.js +28 -0
- coderfleet/server/static/js/chat.js +743 -0
- coderfleet/server/static/js/log.js +145 -0
- coderfleet/server/static/js/nav.js +46 -0
- coderfleet/server/static/js/projects.js +298 -0
- coderfleet/server/static/js/renderer.js +586 -0
- coderfleet/server/static/js/state.js +76 -0
- coderfleet/server/static/js/submit.js +200 -0
- coderfleet/server/static/js/tasks.js +92 -0
- coderfleet/server/static/js/terminal.js +347 -0
- coderfleet/server/static/js/utils.js +147 -0
- coderfleet/server/static/vendor/marked.min.js +6 -0
- coderfleet/server/static/vendor/xterm/addon-fit.js +2 -0
- coderfleet/server/static/vendor/xterm/xterm.css +218 -0
- coderfleet/server/static/vendor/xterm/xterm.js +2 -0
- coderfleet/server/terminal.py +129 -0
- coderfleet/task_cmds.py +311 -0
- coderfleet-0.1.0.dist-info/METADATA +492 -0
- coderfleet-0.1.0.dist-info/RECORD +45 -0
- coderfleet-0.1.0.dist-info/WHEEL +4 -0
- coderfleet-0.1.0.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,1219 @@
|
|
|
1
|
+
"""
|
|
2
|
+
scheduler.py — 任务调度核心
|
|
3
|
+
|
|
4
|
+
职责:
|
|
5
|
+
- 解析 accounts.conf,获取账号列表
|
|
6
|
+
- 判断账号空闲/忙碌状态
|
|
7
|
+
- 分配任务到合适账号
|
|
8
|
+
- 异步执行任务,维护任务生命周期
|
|
9
|
+
"""
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import asyncio
|
|
13
|
+
import json
|
|
14
|
+
import os
|
|
15
|
+
import random
|
|
16
|
+
import shlex
|
|
17
|
+
import subprocess
|
|
18
|
+
from datetime import datetime
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
from typing import Optional
|
|
21
|
+
|
|
22
|
+
from coderfleet.server import docker_mgr
|
|
23
|
+
from coderfleet.server.models import (
|
|
24
|
+
Account,
|
|
25
|
+
AccountAuth,
|
|
26
|
+
AccountProxy,
|
|
27
|
+
AccountResponse,
|
|
28
|
+
AccountType,
|
|
29
|
+
Conversation,
|
|
30
|
+
ConversationStatus,
|
|
31
|
+
Project,
|
|
32
|
+
Task,
|
|
33
|
+
TaskStatus,
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class Scheduler:
|
|
38
|
+
def __init__(self, workspace_dir: Path):
|
|
39
|
+
self.workspace_dir = workspace_dir
|
|
40
|
+
self.accounts_conf = workspace_dir / "accounts.conf"
|
|
41
|
+
self.projects_conf = workspace_dir / "projects.conf"
|
|
42
|
+
self.tasks_dir = workspace_dir / "tasks"
|
|
43
|
+
self.conversations_dir = workspace_dir / "conversations"
|
|
44
|
+
# task_id → asyncio.Task(后台运行的协程)
|
|
45
|
+
self._running: dict[str, asyncio.Task] = {}
|
|
46
|
+
self._loop_task: Optional[asyncio.Task] = None
|
|
47
|
+
|
|
48
|
+
@staticmethod
|
|
49
|
+
def task_process_marker(task_id: str) -> str:
|
|
50
|
+
return f"coderfleet-task-{task_id}"
|
|
51
|
+
|
|
52
|
+
@staticmethod
|
|
53
|
+
def build_cli_command(
|
|
54
|
+
acc_type: AccountType,
|
|
55
|
+
prompt: str,
|
|
56
|
+
auto: bool,
|
|
57
|
+
task_id: str,
|
|
58
|
+
native_session_id: str = "",
|
|
59
|
+
container_workdir: str = "",
|
|
60
|
+
images: list[str] | None = None,
|
|
61
|
+
) -> str:
|
|
62
|
+
"""
|
|
63
|
+
构建在容器内执行的 CLI 命令。
|
|
64
|
+
始终开启 JSON 输出(--output-format stream-json / --json)以便提取 native_session_id。
|
|
65
|
+
images: 容器内图片路径列表,仅 codex 支持(-i 参数)。
|
|
66
|
+
"""
|
|
67
|
+
escaped_prompt = shlex.quote(prompt)
|
|
68
|
+
marker = shlex.quote(Scheduler.task_process_marker(task_id))
|
|
69
|
+
task_env = shlex.quote(task_id)
|
|
70
|
+
|
|
71
|
+
if acc_type == AccountType.claude:
|
|
72
|
+
permission = "--dangerously-skip-permissions" if auto else "--permission-mode acceptEdits"
|
|
73
|
+
# 始终使用流式 JSON 输出以捕获 session_id
|
|
74
|
+
output_format = " --output-format stream-json --verbose"
|
|
75
|
+
resume = f" --resume {shlex.quote(native_session_id)}" if native_session_id else ""
|
|
76
|
+
# claude 不支持 -i flag,通过在 prompt 末尾附加本地路径来传图片
|
|
77
|
+
if images:
|
|
78
|
+
paths = "\n".join(images)
|
|
79
|
+
escaped_prompt = shlex.quote(f"{prompt}\n\n[Attached images:\n{paths}]")
|
|
80
|
+
# 通过 stdin 传入 prompt,兼容所有 claude CLI 版本(部分版本不再接受位置参数)
|
|
81
|
+
inner_cmd = (
|
|
82
|
+
f"printf '%s\\n' {escaped_prompt} | "
|
|
83
|
+
f"CODERFLEET_TASK_ID={task_env} exec -a {marker} "
|
|
84
|
+
f"claude -p {permission}{output_format}{resume}"
|
|
85
|
+
)
|
|
86
|
+
else:
|
|
87
|
+
sandbox = "danger-full-access" if auto else "workspace-write"
|
|
88
|
+
image_flags = "".join(f" -i {shlex.quote(img)}" for img in (images or []))
|
|
89
|
+
# 始终使用 --json 以捕获 thread_id;prompt 通过 stdin 传入
|
|
90
|
+
if native_session_id:
|
|
91
|
+
# codex exec resume <session_id> --json [flags] - prompt via stdin
|
|
92
|
+
# resume 子命令不支持 --sandbox,使用 --dangerously-bypass-approvals-and-sandbox
|
|
93
|
+
danger_flag = " --dangerously-bypass-approvals-and-sandbox" if auto else ""
|
|
94
|
+
inner_cmd = (
|
|
95
|
+
f"printf '%s\\n' {escaped_prompt} | "
|
|
96
|
+
f"CODERFLEET_TASK_ID={task_env} exec -a {marker} "
|
|
97
|
+
f"codex exec resume {shlex.quote(native_session_id)} --json{danger_flag}{image_flags}"
|
|
98
|
+
)
|
|
99
|
+
else:
|
|
100
|
+
inner_cmd = (
|
|
101
|
+
f"printf '%s\\n' {escaped_prompt} | "
|
|
102
|
+
f"CODERFLEET_TASK_ID={task_env} exec -a {marker} "
|
|
103
|
+
f"codex exec --json --sandbox {sandbox}{image_flags}"
|
|
104
|
+
)
|
|
105
|
+
if container_workdir:
|
|
106
|
+
inner_cmd = f"cd {shlex.quote(container_workdir)} && {inner_cmd}"
|
|
107
|
+
|
|
108
|
+
task_log = f"/workspace/.coderfleet-tasks/{task_id}.log"
|
|
109
|
+
task_exit = f"/workspace/.coderfleet-tasks/{task_id}.exit"
|
|
110
|
+
# 用子 shell ( ... ) 包裹 inner_cmd:exec -a 替换的是子 shell 进程,
|
|
111
|
+
# 外层 bash 在子 shell 退出后仍可执行 echo $? 写入 exit 文件。
|
|
112
|
+
# 若不加括号,exec -a 会直接替换外层 bash,分号后的 echo $? 永远不会执行。
|
|
113
|
+
wrapper_body = (
|
|
114
|
+
f"( {inner_cmd} ) >> {shlex.quote(task_log)} 2>&1"
|
|
115
|
+
f"; echo $? > {shlex.quote(task_exit)}"
|
|
116
|
+
)
|
|
117
|
+
return (
|
|
118
|
+
f"mkdir -p /workspace/.coderfleet-tasks"
|
|
119
|
+
f" && setsid bash -c {shlex.quote(wrapper_body)} &"
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
@staticmethod
|
|
123
|
+
def build_usage_status_command(acc_type: AccountType) -> str:
|
|
124
|
+
if acc_type == AccountType.codex:
|
|
125
|
+
return "coderfleet-usage-status codex 2>&1"
|
|
126
|
+
return ""
|
|
127
|
+
|
|
128
|
+
@staticmethod
|
|
129
|
+
def extract_native_session_id(acc_type: AccountType, text: str) -> str:
|
|
130
|
+
for line in text.splitlines():
|
|
131
|
+
line = line.strip()
|
|
132
|
+
if not line.startswith("{"):
|
|
133
|
+
continue
|
|
134
|
+
try:
|
|
135
|
+
data = json.loads(line)
|
|
136
|
+
except json.JSONDecodeError:
|
|
137
|
+
continue
|
|
138
|
+
if acc_type == AccountType.codex:
|
|
139
|
+
if data.get("type") == "thread.started" and data.get("thread_id"):
|
|
140
|
+
return str(data["thread_id"])
|
|
141
|
+
elif data.get("session_id"):
|
|
142
|
+
return str(data["session_id"])
|
|
143
|
+
return ""
|
|
144
|
+
|
|
145
|
+
# ── 账号管理 ──────────────────────────────────────────
|
|
146
|
+
|
|
147
|
+
def get_accounts(self) -> list[Account]:
|
|
148
|
+
"""解析 accounts.conf,返回所有账号"""
|
|
149
|
+
accounts = []
|
|
150
|
+
if not self.accounts_conf.exists():
|
|
151
|
+
return accounts
|
|
152
|
+
for line in self.accounts_conf.read_text(encoding="utf-8").splitlines():
|
|
153
|
+
line = line.strip().rstrip("\r")
|
|
154
|
+
if not line or line.startswith("#"):
|
|
155
|
+
continue
|
|
156
|
+
parts = {}
|
|
157
|
+
for token in line.split():
|
|
158
|
+
if "=" in token:
|
|
159
|
+
k, v = token.split("=", 1)
|
|
160
|
+
parts[k.upper()] = v
|
|
161
|
+
if "NAME" not in parts or "TYPE" not in parts:
|
|
162
|
+
continue
|
|
163
|
+
try:
|
|
164
|
+
acc_type = AccountType(parts["TYPE"])
|
|
165
|
+
auth = AccountAuth(parts.get("AUTH", AccountAuth.login.value))
|
|
166
|
+
proxy = AccountProxy(parts.get("PROXY", AccountProxy.relay.value))
|
|
167
|
+
except ValueError:
|
|
168
|
+
continue
|
|
169
|
+
env_file = parts.get("ENV_FILE", "")
|
|
170
|
+
if auth == AccountAuth.env and acc_type != AccountType.claude:
|
|
171
|
+
continue
|
|
172
|
+
if auth == AccountAuth.env and not env_file:
|
|
173
|
+
env_file = f"./accounts/{parts['NAME']}/env"
|
|
174
|
+
accounts.append(Account(
|
|
175
|
+
name = parts["NAME"],
|
|
176
|
+
type = acc_type,
|
|
177
|
+
auth = auth,
|
|
178
|
+
env_file = env_file,
|
|
179
|
+
proxy = proxy,
|
|
180
|
+
))
|
|
181
|
+
return accounts
|
|
182
|
+
|
|
183
|
+
def get_projects(self) -> list[Project]:
|
|
184
|
+
projects: list[Project] = []
|
|
185
|
+
if self.projects_conf.exists():
|
|
186
|
+
for line in self.projects_conf.read_text(encoding="utf-8").splitlines():
|
|
187
|
+
line = line.strip().rstrip("\r")
|
|
188
|
+
if not line or line.startswith("#"):
|
|
189
|
+
continue
|
|
190
|
+
parts = {}
|
|
191
|
+
for token in line.split():
|
|
192
|
+
if "=" in token:
|
|
193
|
+
k, v = token.split("=", 1)
|
|
194
|
+
parts[k.upper()] = v
|
|
195
|
+
if "NAME" not in parts or "ACCOUNT" not in parts or "PATH" not in parts:
|
|
196
|
+
continue
|
|
197
|
+
path = parts["PATH"].replace("~", str(Path.home()), 1)
|
|
198
|
+
projects.append(Project(
|
|
199
|
+
name=parts["NAME"],
|
|
200
|
+
account=parts["ACCOUNT"],
|
|
201
|
+
path=path,
|
|
202
|
+
))
|
|
203
|
+
|
|
204
|
+
return projects
|
|
205
|
+
|
|
206
|
+
def get_busy_accounts(self) -> set[str]:
|
|
207
|
+
"""返回当前有 running 任务的账号名集合"""
|
|
208
|
+
busy = set()
|
|
209
|
+
for task in Task.load_all(self.tasks_dir):
|
|
210
|
+
if task.status == TaskStatus.running:
|
|
211
|
+
busy.add(task.account)
|
|
212
|
+
return busy
|
|
213
|
+
|
|
214
|
+
def list_accounts(self) -> list[AccountResponse]:
|
|
215
|
+
# Single pass: collect running tasks + done/failed counts per account
|
|
216
|
+
running_tasks: dict[str, Task] = {}
|
|
217
|
+
done_counts: dict[str, int] = {}
|
|
218
|
+
failed_counts: dict[str, int] = {}
|
|
219
|
+
for task in Task.load_all(self.tasks_dir):
|
|
220
|
+
if task.status == TaskStatus.running:
|
|
221
|
+
if task.account not in running_tasks:
|
|
222
|
+
running_tasks[task.account] = task
|
|
223
|
+
elif task.status == TaskStatus.done:
|
|
224
|
+
done_counts[task.account] = done_counts.get(task.account, 0) + 1
|
|
225
|
+
elif task.status == TaskStatus.failed:
|
|
226
|
+
failed_counts[task.account] = failed_counts.get(task.account, 0) + 1
|
|
227
|
+
busy = set(running_tasks.keys())
|
|
228
|
+
|
|
229
|
+
result = []
|
|
230
|
+
projects_by_account: dict[str, list[str]] = {}
|
|
231
|
+
for project in self.get_projects():
|
|
232
|
+
projects_by_account.setdefault(project.account, []).append(project.name)
|
|
233
|
+
for acc in self.get_accounts():
|
|
234
|
+
project_names = projects_by_account.get(acc.name, [])
|
|
235
|
+
containers = []
|
|
236
|
+
running = False
|
|
237
|
+
for pn in project_names:
|
|
238
|
+
ctr = f"{acc.type.value}-{pn}"
|
|
239
|
+
containers.append(ctr)
|
|
240
|
+
if docker_mgr.is_container_running(ctr):
|
|
241
|
+
running = True
|
|
242
|
+
rt = running_tasks.get(acc.name)
|
|
243
|
+
result.append(AccountResponse(
|
|
244
|
+
name = acc.name,
|
|
245
|
+
type = acc.type,
|
|
246
|
+
auth = acc.auth,
|
|
247
|
+
env_file = acc.env_file,
|
|
248
|
+
proxy = acc.proxy,
|
|
249
|
+
projects = project_names,
|
|
250
|
+
running = running,
|
|
251
|
+
busy = acc.name in busy,
|
|
252
|
+
container = " ".join(containers),
|
|
253
|
+
running_task_id = rt.id if rt else "",
|
|
254
|
+
running_task_prompt = rt.prompt if rt else "",
|
|
255
|
+
task_done_count = done_counts.get(acc.name, 0),
|
|
256
|
+
task_failed_count = failed_counts.get(acc.name, 0),
|
|
257
|
+
))
|
|
258
|
+
return result
|
|
259
|
+
|
|
260
|
+
def list_projects(self) -> list[Project]:
|
|
261
|
+
return self.get_projects()
|
|
262
|
+
|
|
263
|
+
def find_idle_account(
|
|
264
|
+
self,
|
|
265
|
+
prefer_type: Optional[AccountType] = None,
|
|
266
|
+
prefer_project: Optional[str] = None,
|
|
267
|
+
) -> Optional[Account]:
|
|
268
|
+
"""
|
|
269
|
+
找一个满足条件的空闲账号:
|
|
270
|
+
- 类型匹配(可选)
|
|
271
|
+
- 项目路径匹配(可选,规范化后比较)
|
|
272
|
+
- 至少有一个项目容器在线
|
|
273
|
+
- 没有 running 任务占用
|
|
274
|
+
"""
|
|
275
|
+
busy = self.get_busy_accounts()
|
|
276
|
+
|
|
277
|
+
for acc in self.get_accounts():
|
|
278
|
+
if prefer_type and acc.type != prefer_type:
|
|
279
|
+
continue
|
|
280
|
+
if prefer_project:
|
|
281
|
+
project_match = self.find_project_for_path(prefer_project, acc.name)
|
|
282
|
+
if not project_match:
|
|
283
|
+
continue
|
|
284
|
+
if acc.name in busy:
|
|
285
|
+
continue
|
|
286
|
+
# Check if any project container for this account is running
|
|
287
|
+
account_projects = [p for p in self.get_projects() if p.account == acc.name]
|
|
288
|
+
if not account_projects:
|
|
289
|
+
continue
|
|
290
|
+
any_running = any(
|
|
291
|
+
docker_mgr.is_container_running(p.container_name(acc.type))
|
|
292
|
+
for p in account_projects
|
|
293
|
+
)
|
|
294
|
+
if not any_running:
|
|
295
|
+
continue
|
|
296
|
+
return acc
|
|
297
|
+
|
|
298
|
+
return None
|
|
299
|
+
|
|
300
|
+
@staticmethod
|
|
301
|
+
def _canonical_path(path: str) -> Path:
|
|
302
|
+
return Path(path).expanduser().resolve()
|
|
303
|
+
|
|
304
|
+
def _path_under_root(self, root: str, project: str) -> bool:
|
|
305
|
+
account_root = self._canonical_path(root)
|
|
306
|
+
project_path = self._canonical_path(project)
|
|
307
|
+
try:
|
|
308
|
+
project_path.relative_to(account_root)
|
|
309
|
+
except ValueError:
|
|
310
|
+
return False
|
|
311
|
+
return True
|
|
312
|
+
|
|
313
|
+
def account_can_access_project(self, acc: Account, project: str) -> bool:
|
|
314
|
+
return any(
|
|
315
|
+
p.account == acc.name and self._path_under_root(p.path, project)
|
|
316
|
+
for p in self.get_projects()
|
|
317
|
+
)
|
|
318
|
+
|
|
319
|
+
def find_project_by_name(self, name: str) -> Optional[Project]:
|
|
320
|
+
return next((p for p in self.get_projects() if p.name == name), None)
|
|
321
|
+
|
|
322
|
+
def find_project_for_path(self, project: Optional[str], account: Optional[str] = None) -> Optional[Project]:
|
|
323
|
+
if not project:
|
|
324
|
+
return None
|
|
325
|
+
matching = [
|
|
326
|
+
p for p in self.get_projects()
|
|
327
|
+
if self._path_under_root(p.path, project)
|
|
328
|
+
]
|
|
329
|
+
if account:
|
|
330
|
+
matching = [p for p in matching if p.account == account]
|
|
331
|
+
if not matching:
|
|
332
|
+
return None
|
|
333
|
+
return max(matching, key=lambda p: len(str(self._canonical_path(p.path))))
|
|
334
|
+
|
|
335
|
+
def resolve_task_project(self, acc: Account, project: Optional[str]) -> str:
|
|
336
|
+
if not project:
|
|
337
|
+
account_projects = [p for p in self.get_projects() if p.account == acc.name]
|
|
338
|
+
if len(account_projects) == 1:
|
|
339
|
+
return str(self._canonical_path(account_projects[0].path))
|
|
340
|
+
if not account_projects:
|
|
341
|
+
raise ValueError(f"账号 '{acc.name}' 未关联项目,请先创建项目")
|
|
342
|
+
raise ValueError(f"账号 '{acc.name}' 关联了多个项目,请选择项目")
|
|
343
|
+
if not self.account_can_access_project(acc, project):
|
|
344
|
+
raise ValueError(f"项目 '{project}' 未关联账号 '{acc.name}'")
|
|
345
|
+
return str(self._canonical_path(project))
|
|
346
|
+
|
|
347
|
+
def container_workdir_for_project(self, owner: Project, project: str) -> str:
|
|
348
|
+
account_root = self._canonical_path(owner.path)
|
|
349
|
+
project_path = self._canonical_path(project)
|
|
350
|
+
rel = project_path.relative_to(account_root)
|
|
351
|
+
if str(rel) == ".":
|
|
352
|
+
return "/workspace"
|
|
353
|
+
return "/workspace/" + rel.as_posix()
|
|
354
|
+
|
|
355
|
+
# ── 任务管理 ──────────────────────────────────────────
|
|
356
|
+
|
|
357
|
+
@staticmethod
|
|
358
|
+
def new_task_id() -> str:
|
|
359
|
+
ts = datetime.now().strftime("%Y%m%d%H%M%S")
|
|
360
|
+
rand = random.randint(0, 9999)
|
|
361
|
+
return f"{ts}-{rand:04d}"
|
|
362
|
+
|
|
363
|
+
def list_tasks(self) -> list[Task]:
|
|
364
|
+
return Task.load_all(self.tasks_dir)
|
|
365
|
+
|
|
366
|
+
def get_task(self, task_id: str) -> Optional[Task]:
|
|
367
|
+
path = self.tasks_dir / f"{task_id}.json"
|
|
368
|
+
if not path.exists():
|
|
369
|
+
return None
|
|
370
|
+
return Task.load(path)
|
|
371
|
+
|
|
372
|
+
def get_log_path(self, task_id: str) -> Path:
|
|
373
|
+
return self.tasks_dir / f"{task_id}.log"
|
|
374
|
+
|
|
375
|
+
@staticmethod
|
|
376
|
+
def new_conversation_id() -> str:
|
|
377
|
+
ts = datetime.now().strftime("%Y%m%d%H%M%S")
|
|
378
|
+
rand = random.randint(0, 9999)
|
|
379
|
+
return f"conv-{ts}-{rand:04d}"
|
|
380
|
+
|
|
381
|
+
def list_conversations(self, include_archived: bool = False) -> list[Conversation]:
|
|
382
|
+
convs = Conversation.load_all(self.conversations_dir)
|
|
383
|
+
if not include_archived:
|
|
384
|
+
convs = [c for c in convs if c.status != ConversationStatus.archived]
|
|
385
|
+
return convs
|
|
386
|
+
|
|
387
|
+
def archive_conversation(self, conversation_id: str, status: ConversationStatus) -> Conversation:
|
|
388
|
+
conv = self.get_conversation(conversation_id)
|
|
389
|
+
if conv is None:
|
|
390
|
+
raise ValueError(f"任务链 '{conversation_id}' 不存在")
|
|
391
|
+
conv.status = status
|
|
392
|
+
conv.save(self.conversations_dir)
|
|
393
|
+
return conv
|
|
394
|
+
|
|
395
|
+
def delete_conversation(self, conversation_id: str) -> None:
|
|
396
|
+
path = self.conversations_dir / f"{conversation_id}.json"
|
|
397
|
+
if not path.exists():
|
|
398
|
+
raise ValueError(f"任务链 '{conversation_id}' 不存在")
|
|
399
|
+
path.unlink()
|
|
400
|
+
|
|
401
|
+
def get_conversation(self, conversation_id: str) -> Optional[Conversation]:
|
|
402
|
+
path = self.conversations_dir / f"{conversation_id}.json"
|
|
403
|
+
if not path.exists():
|
|
404
|
+
return None
|
|
405
|
+
return Conversation.load(path)
|
|
406
|
+
|
|
407
|
+
def ensure_conversation_available(self, conversation: Conversation) -> None:
|
|
408
|
+
for task in Task.load_all(self.tasks_dir):
|
|
409
|
+
if task.conversation_id == conversation.id and task.status == TaskStatus.running:
|
|
410
|
+
raise RuntimeError(f"任务链 '{conversation.name}' 正在运行,请等待当前任务结束")
|
|
411
|
+
|
|
412
|
+
def update_conversation_native_session(
|
|
413
|
+
self,
|
|
414
|
+
conversation_id: str,
|
|
415
|
+
native_session_id: str,
|
|
416
|
+
task_id: str,
|
|
417
|
+
) -> None:
|
|
418
|
+
conversation = self.get_conversation(conversation_id)
|
|
419
|
+
if conversation is None:
|
|
420
|
+
return
|
|
421
|
+
conversation.touch(
|
|
422
|
+
self.conversations_dir,
|
|
423
|
+
native_session_id=native_session_id,
|
|
424
|
+
last_task_id=task_id,
|
|
425
|
+
)
|
|
426
|
+
|
|
427
|
+
def _create_conversation(self, name: str, acc: Account, project: Project, task_project: str) -> Conversation:
|
|
428
|
+
conversation = Conversation(
|
|
429
|
+
id = self.new_conversation_id(),
|
|
430
|
+
name = name,
|
|
431
|
+
account = acc.name,
|
|
432
|
+
type = acc.type,
|
|
433
|
+
project = task_project,
|
|
434
|
+
project_name = project.name,
|
|
435
|
+
)
|
|
436
|
+
conversation.save(self.conversations_dir)
|
|
437
|
+
return conversation
|
|
438
|
+
|
|
439
|
+
# ── 提交任务 ──────────────────────────────────────────
|
|
440
|
+
|
|
441
|
+
# ── 定时与排队调度 ────────────────────────────────────────
|
|
442
|
+
|
|
443
|
+
def start_scheduling_loop(self) -> None:
|
|
444
|
+
if self._loop_task is None or self._loop_task.done():
|
|
445
|
+
self._loop_task = asyncio.create_task(
|
|
446
|
+
self._schedule_pending_tasks_loop(),
|
|
447
|
+
name="scheduler-pending-loop",
|
|
448
|
+
)
|
|
449
|
+
|
|
450
|
+
async def _schedule_pending_tasks_loop(self) -> None:
|
|
451
|
+
while True:
|
|
452
|
+
try:
|
|
453
|
+
await self.schedule_next_tasks()
|
|
454
|
+
except Exception as e:
|
|
455
|
+
import traceback
|
|
456
|
+
print("Error in schedule_next_tasks:")
|
|
457
|
+
traceback.print_exc()
|
|
458
|
+
await asyncio.sleep(1.0)
|
|
459
|
+
|
|
460
|
+
async def schedule_next_tasks(self) -> None:
|
|
461
|
+
now = datetime.now()
|
|
462
|
+
|
|
463
|
+
# 1. 扫描并触发已到时间的定时任务 (scheduled -> pending)
|
|
464
|
+
all_tasks = Task.load_all(self.tasks_dir)
|
|
465
|
+
for t in all_tasks:
|
|
466
|
+
if t.status == TaskStatus.scheduled and t.execute_at:
|
|
467
|
+
try:
|
|
468
|
+
dt = datetime.fromisoformat(t.execute_at)
|
|
469
|
+
if now >= dt:
|
|
470
|
+
t.update_status(TaskStatus.pending, self.tasks_dir)
|
|
471
|
+
except Exception as e:
|
|
472
|
+
t.update_status(TaskStatus.failed, self.tasks_dir)
|
|
473
|
+
self._write_failed_log(t, f"定时时间解析失败:{e}")
|
|
474
|
+
|
|
475
|
+
# 2. 扫描并运行 pending 任务
|
|
476
|
+
pending_tasks = [t for t in Task.load_all(self.tasks_dir) if t.status == TaskStatus.pending]
|
|
477
|
+
if not pending_tasks:
|
|
478
|
+
return
|
|
479
|
+
|
|
480
|
+
# 按照创建时间升序排列,先进先出
|
|
481
|
+
pending_tasks.sort(key=lambda t: t.created or "")
|
|
482
|
+
busy_accounts = self.get_busy_accounts()
|
|
483
|
+
|
|
484
|
+
for task in pending_tasks:
|
|
485
|
+
if task.account not in busy_accounts:
|
|
486
|
+
busy_accounts.add(task.account)
|
|
487
|
+
# 异步拉起执行该 pending 任务
|
|
488
|
+
await self._start_pending_task(task)
|
|
489
|
+
|
|
490
|
+
async def _start_pending_task(self, task: Task) -> None:
|
|
491
|
+
try:
|
|
492
|
+
acc = next((a for a in self.get_accounts() if a.name == task.account), None)
|
|
493
|
+
if acc is None:
|
|
494
|
+
task.update_status(TaskStatus.failed, self.tasks_dir)
|
|
495
|
+
self._write_failed_log(task, f"账号 '{task.account}' 不存在")
|
|
496
|
+
return
|
|
497
|
+
|
|
498
|
+
project = self.find_project_for_path(task.project, task.account)
|
|
499
|
+
if project is None:
|
|
500
|
+
task.update_status(TaskStatus.failed, self.tasks_dir)
|
|
501
|
+
self._write_failed_log(task, f"项目路径 '{task.project}' 未配置")
|
|
502
|
+
return
|
|
503
|
+
|
|
504
|
+
container_name = project.container_name(acc.type)
|
|
505
|
+
container_workdir = self.container_workdir_for_project(project, task.project)
|
|
506
|
+
|
|
507
|
+
if not docker_mgr.is_container_running(container_name):
|
|
508
|
+
task.update_status(TaskStatus.failed, self.tasks_dir)
|
|
509
|
+
self._write_failed_log(task, f"容器 {container_name} 未运行")
|
|
510
|
+
return
|
|
511
|
+
|
|
512
|
+
# 加载对应的 conversation(如果有)
|
|
513
|
+
conversation = self.get_conversation(task.conversation_id) if task.conversation_id else None
|
|
514
|
+
|
|
515
|
+
# 转换为运行状态
|
|
516
|
+
task.update_status(TaskStatus.running, self.tasks_dir)
|
|
517
|
+
|
|
518
|
+
# 写日志头
|
|
519
|
+
log_path = self.get_log_path(task.id)
|
|
520
|
+
self._write_log_header(log_path, task, acc, container_workdir, container_name)
|
|
521
|
+
|
|
522
|
+
# 异步后台执行
|
|
523
|
+
bg = asyncio.create_task(
|
|
524
|
+
self._run(
|
|
525
|
+
task,
|
|
526
|
+
acc,
|
|
527
|
+
log_path,
|
|
528
|
+
getattr(task, "auto", False),
|
|
529
|
+
conversation,
|
|
530
|
+
container_workdir,
|
|
531
|
+
container_name,
|
|
532
|
+
getattr(task, "images", []),
|
|
533
|
+
),
|
|
534
|
+
name=f"task-{task.id}",
|
|
535
|
+
)
|
|
536
|
+
self._running[task.id] = bg
|
|
537
|
+
|
|
538
|
+
except Exception as e:
|
|
539
|
+
task.update_status(TaskStatus.failed, self.tasks_dir)
|
|
540
|
+
self._write_failed_log(task, f"拉起任务失败:{e}")
|
|
541
|
+
|
|
542
|
+
def _write_failed_log(self, task: Task, reason: str) -> None:
|
|
543
|
+
log_path = self.get_log_path(task.id)
|
|
544
|
+
log_path.parent.mkdir(parents=True, exist_ok=True)
|
|
545
|
+
with log_path.open("w", encoding="utf-8") as f:
|
|
546
|
+
f.write("=== CoderFleet Task Log ===\n")
|
|
547
|
+
f.write(f"id: {task.id}\n")
|
|
548
|
+
f.write(f"status: failed\n")
|
|
549
|
+
f.write(f"prompt: {task.prompt}\n")
|
|
550
|
+
f.write(f"error: {reason}\n")
|
|
551
|
+
f.write("=" * 38 + "\n\n")
|
|
552
|
+
f.write(f"任务启动失败:{reason}\n")
|
|
553
|
+
|
|
554
|
+
async def submit(
|
|
555
|
+
self,
|
|
556
|
+
prompt: str,
|
|
557
|
+
account_name: Optional[str] = None,
|
|
558
|
+
prefer_project: Optional[str] = None,
|
|
559
|
+
prefer_type: Optional[AccountType] = None,
|
|
560
|
+
auto: bool = False,
|
|
561
|
+
conversation_id: Optional[str] = None,
|
|
562
|
+
conversation_name: Optional[str] = None,
|
|
563
|
+
project_name: Optional[str] = None,
|
|
564
|
+
images: list[str] = [],
|
|
565
|
+
execute_at: Optional[str] = None,
|
|
566
|
+
) -> Task:
|
|
567
|
+
"""
|
|
568
|
+
提交任务,异步在后台执行,立即返回 Task 对象。
|
|
569
|
+
调用方可以通过 task.id 跟踪进度。
|
|
570
|
+
"""
|
|
571
|
+
is_pending = False
|
|
572
|
+
conversation: Optional[Conversation] = None
|
|
573
|
+
|
|
574
|
+
if conversation_id:
|
|
575
|
+
conversation = self.get_conversation(conversation_id)
|
|
576
|
+
if conversation is None:
|
|
577
|
+
raise ValueError(f"任务链 '{conversation_id}' 不存在")
|
|
578
|
+
|
|
579
|
+
# 判断任务链是否有正在运行的任务
|
|
580
|
+
has_running_in_conv = any(
|
|
581
|
+
t.conversation_id == conversation.id and t.status == TaskStatus.running
|
|
582
|
+
for t in Task.load_all(self.tasks_dir)
|
|
583
|
+
)
|
|
584
|
+
if has_running_in_conv:
|
|
585
|
+
is_pending = True
|
|
586
|
+
|
|
587
|
+
account_name = conversation.account
|
|
588
|
+
prefer_type = conversation.type
|
|
589
|
+
prefer_project = conversation.project
|
|
590
|
+
project_name = conversation.project_name or project_name
|
|
591
|
+
|
|
592
|
+
selected_project: Optional[Project] = None
|
|
593
|
+
if project_name:
|
|
594
|
+
selected_project = self.find_project_by_name(project_name)
|
|
595
|
+
if selected_project is None:
|
|
596
|
+
raise ValueError(f"项目 '{project_name}' 不存在")
|
|
597
|
+
if account_name and account_name != selected_project.account:
|
|
598
|
+
raise ValueError(
|
|
599
|
+
f"项目 '{project_name}' 关联账号为 {selected_project.account},与指定账号 {account_name} 不一致"
|
|
600
|
+
)
|
|
601
|
+
account_name = selected_project.account
|
|
602
|
+
prefer_project = selected_project.path
|
|
603
|
+
|
|
604
|
+
# 确定账号
|
|
605
|
+
acc: Optional[Account] = None
|
|
606
|
+
if account_name:
|
|
607
|
+
acc = next((a for a in self.get_accounts() if a.name == account_name), None)
|
|
608
|
+
if acc is None:
|
|
609
|
+
raise ValueError(f"账号 '{account_name}' 不存在")
|
|
610
|
+
if prefer_type and acc.type != prefer_type:
|
|
611
|
+
raise ValueError(
|
|
612
|
+
f"账号 '{account_name}' 类型为 {acc.type.value},与筛选类型 {prefer_type.value} 不一致"
|
|
613
|
+
)
|
|
614
|
+
# 若账号忙碌,则标记为排队
|
|
615
|
+
if acc.name in self.get_busy_accounts():
|
|
616
|
+
is_pending = True
|
|
617
|
+
else:
|
|
618
|
+
# 优先寻找空闲账号
|
|
619
|
+
acc = self.find_idle_account(
|
|
620
|
+
prefer_type = prefer_type,
|
|
621
|
+
prefer_project = prefer_project,
|
|
622
|
+
)
|
|
623
|
+
if acc is None:
|
|
624
|
+
# 寻找支持该项目且对应的项目容器在线的忙碌账号
|
|
625
|
+
matching_busy_accounts = []
|
|
626
|
+
for a in self.get_accounts():
|
|
627
|
+
if prefer_type and a.type != prefer_type:
|
|
628
|
+
continue
|
|
629
|
+
if prefer_project:
|
|
630
|
+
project_match = self.find_project_for_path(prefer_project, a.name)
|
|
631
|
+
if not project_match:
|
|
632
|
+
continue
|
|
633
|
+
account_projects = [p for p in self.get_projects() if p.account == a.name]
|
|
634
|
+
if not account_projects:
|
|
635
|
+
continue
|
|
636
|
+
any_running = any(
|
|
637
|
+
docker_mgr.is_container_running(p.container_name(a.type))
|
|
638
|
+
for p in account_projects
|
|
639
|
+
)
|
|
640
|
+
if not any_running:
|
|
641
|
+
continue
|
|
642
|
+
matching_busy_accounts.append(a)
|
|
643
|
+
|
|
644
|
+
if not matching_busy_accounts:
|
|
645
|
+
hints = []
|
|
646
|
+
if prefer_project:
|
|
647
|
+
hints.append(f"项目:{prefer_project}")
|
|
648
|
+
if prefer_type:
|
|
649
|
+
hints.append(f"类型:{prefer_type.value}")
|
|
650
|
+
hint_str = ",".join(hints)
|
|
651
|
+
raise RuntimeError(
|
|
652
|
+
f"没有匹配的空闲账号{('(' + hint_str + ')') if hint_str else ''}"
|
|
653
|
+
)
|
|
654
|
+
|
|
655
|
+
# 分配第一个可用的忙碌账号并标记为 pending
|
|
656
|
+
acc = matching_busy_accounts[0]
|
|
657
|
+
is_pending = True
|
|
658
|
+
|
|
659
|
+
task_project = self.resolve_task_project(acc, prefer_project)
|
|
660
|
+
selected_project = selected_project or self.find_project_for_path(task_project, acc.name)
|
|
661
|
+
if selected_project is None:
|
|
662
|
+
raise ValueError(f"项目 '{task_project}' 未配置")
|
|
663
|
+
container_name = selected_project.container_name(acc.type)
|
|
664
|
+
container_workdir = self.container_workdir_for_project(selected_project, task_project)
|
|
665
|
+
|
|
666
|
+
if not docker_mgr.is_container_running(container_name):
|
|
667
|
+
raise RuntimeError(f"容器 {container_name} 未运行")
|
|
668
|
+
|
|
669
|
+
if conversation is None and conversation_name:
|
|
670
|
+
conversation = self._create_conversation(conversation_name, acc, selected_project, task_project)
|
|
671
|
+
|
|
672
|
+
# 创建任务记录
|
|
673
|
+
task_id = self.new_task_id()
|
|
674
|
+
|
|
675
|
+
# ── 处理定时任务 ──
|
|
676
|
+
if execute_at:
|
|
677
|
+
try:
|
|
678
|
+
dt = datetime.fromisoformat(execute_at)
|
|
679
|
+
if dt > datetime.now():
|
|
680
|
+
task = Task(
|
|
681
|
+
id = task_id,
|
|
682
|
+
status = TaskStatus.scheduled,
|
|
683
|
+
account = acc.name,
|
|
684
|
+
type = acc.type,
|
|
685
|
+
prompt = prompt,
|
|
686
|
+
project = task_project,
|
|
687
|
+
project_name = selected_project.name,
|
|
688
|
+
conversation_id = conversation.id if conversation else "",
|
|
689
|
+
native_session_id = conversation.native_session_id if conversation else "",
|
|
690
|
+
auto = auto,
|
|
691
|
+
images = images,
|
|
692
|
+
execute_at = execute_at,
|
|
693
|
+
)
|
|
694
|
+
task.save(self.tasks_dir)
|
|
695
|
+
|
|
696
|
+
# 写入空日志文件防 SSE 404
|
|
697
|
+
log_path = self.get_log_path(task_id)
|
|
698
|
+
log_path.parent.mkdir(parents=True, exist_ok=True)
|
|
699
|
+
log_path.write_text("", encoding="utf-8")
|
|
700
|
+
|
|
701
|
+
return task
|
|
702
|
+
except ValueError:
|
|
703
|
+
pass
|
|
704
|
+
|
|
705
|
+
# ── 处理排队任务 ──
|
|
706
|
+
if is_pending:
|
|
707
|
+
task = Task(
|
|
708
|
+
id = task_id,
|
|
709
|
+
status = TaskStatus.pending,
|
|
710
|
+
account = acc.name,
|
|
711
|
+
type = acc.type,
|
|
712
|
+
prompt = prompt,
|
|
713
|
+
project = task_project,
|
|
714
|
+
project_name = selected_project.name,
|
|
715
|
+
conversation_id = conversation.id if conversation else "",
|
|
716
|
+
native_session_id = conversation.native_session_id if conversation else "",
|
|
717
|
+
auto = auto,
|
|
718
|
+
images = images,
|
|
719
|
+
)
|
|
720
|
+
task.save(self.tasks_dir)
|
|
721
|
+
|
|
722
|
+
# 写入空日志文件防 SSE 404
|
|
723
|
+
log_path = self.get_log_path(task_id)
|
|
724
|
+
log_path.parent.mkdir(parents=True, exist_ok=True)
|
|
725
|
+
log_path.write_text("", encoding="utf-8")
|
|
726
|
+
|
|
727
|
+
# 立即触发一次调度检查
|
|
728
|
+
asyncio.create_task(self.schedule_next_tasks())
|
|
729
|
+
return task
|
|
730
|
+
|
|
731
|
+
# ── 立即执行任务 ──
|
|
732
|
+
log_path = self.get_log_path(task_id)
|
|
733
|
+
task = Task(
|
|
734
|
+
id = task_id,
|
|
735
|
+
status = TaskStatus.running,
|
|
736
|
+
account = acc.name,
|
|
737
|
+
type = acc.type,
|
|
738
|
+
prompt = prompt,
|
|
739
|
+
project = task_project,
|
|
740
|
+
project_name = selected_project.name,
|
|
741
|
+
conversation_id = conversation.id if conversation else "",
|
|
742
|
+
native_session_id = conversation.native_session_id if conversation else "",
|
|
743
|
+
auto = auto,
|
|
744
|
+
images = images,
|
|
745
|
+
)
|
|
746
|
+
task.save(self.tasks_dir)
|
|
747
|
+
|
|
748
|
+
# 写日志头
|
|
749
|
+
self._write_log_header(log_path, task, acc, container_workdir, container_name)
|
|
750
|
+
|
|
751
|
+
# 异步后台执行
|
|
752
|
+
bg = asyncio.create_task(
|
|
753
|
+
self._run(task, acc, log_path, auto, conversation, container_workdir, container_name, images),
|
|
754
|
+
name=f"task-{task_id}",
|
|
755
|
+
)
|
|
756
|
+
self._running[task_id] = bg
|
|
757
|
+
|
|
758
|
+
return task
|
|
759
|
+
|
|
760
|
+
# ── 从已有任务创建任务链 ──────────────────────────────
|
|
761
|
+
|
|
762
|
+
def create_conversation_from_task(
|
|
763
|
+
self,
|
|
764
|
+
name: str,
|
|
765
|
+
task_id: str,
|
|
766
|
+
) -> Conversation:
|
|
767
|
+
"""
|
|
768
|
+
从已有任务(需有 native_session_id)创建任务链。
|
|
769
|
+
之后可通过 conversation_id 续接该会话上下文。
|
|
770
|
+
"""
|
|
771
|
+
task = self.get_task(task_id)
|
|
772
|
+
if task is None:
|
|
773
|
+
raise ValueError(f"任务 '{task_id}' 不存在")
|
|
774
|
+
if not task.native_session_id:
|
|
775
|
+
raise ValueError(
|
|
776
|
+
f"任务 '{task_id}' 没有 native_session_id,"
|
|
777
|
+
"可能是较早的任务(未开启流式 JSON 输出模式)"
|
|
778
|
+
)
|
|
779
|
+
acc = next((a for a in self.get_accounts() if a.name == task.account), None)
|
|
780
|
+
if acc is None:
|
|
781
|
+
raise ValueError(f"账号 '{task.account}' 不存在")
|
|
782
|
+
project = self.find_project_for_path(task.project, task.account)
|
|
783
|
+
if project is None:
|
|
784
|
+
raise ValueError(f"项目路径 '{task.project}' 未配置")
|
|
785
|
+
|
|
786
|
+
conversation = Conversation(
|
|
787
|
+
id = self.new_conversation_id(),
|
|
788
|
+
name = name,
|
|
789
|
+
account = acc.name,
|
|
790
|
+
type = acc.type,
|
|
791
|
+
project = task.project,
|
|
792
|
+
project_name = project.name,
|
|
793
|
+
native_session_id = task.native_session_id,
|
|
794
|
+
last_task_id = task_id,
|
|
795
|
+
)
|
|
796
|
+
conversation.save(self.conversations_dir)
|
|
797
|
+
return conversation
|
|
798
|
+
|
|
799
|
+
def _write_log_header(self, log_path: Path, task: Task, acc: Account, container_workdir: str = "", container_name: str = "") -> None:
|
|
800
|
+
log_path.parent.mkdir(parents=True, exist_ok=True)
|
|
801
|
+
with log_path.open("a", encoding="utf-8") as f:
|
|
802
|
+
f.write("=== CoderFleet Task Log ===\n")
|
|
803
|
+
f.write(f"id: {task.id}\n")
|
|
804
|
+
f.write(f"account: {acc.name} ({acc.type.value})\n")
|
|
805
|
+
f.write(f"project: {task.project}\n")
|
|
806
|
+
if container_name:
|
|
807
|
+
f.write(f"container: {container_name}\n")
|
|
808
|
+
if container_workdir:
|
|
809
|
+
f.write(f"container cwd: {container_workdir}\n")
|
|
810
|
+
if task.conversation_id:
|
|
811
|
+
f.write(f"conversation: {task.conversation_id}\n")
|
|
812
|
+
if task.native_session_id:
|
|
813
|
+
f.write(f"native session: {task.native_session_id}\n")
|
|
814
|
+
escaped_prompt = task.prompt.replace('\r\n', '\\n').replace('\n', '\\n') if task.prompt else ""
|
|
815
|
+
f.write(f"prompt: {escaped_prompt}\n")
|
|
816
|
+
f.write(f"started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
|
|
817
|
+
f.write("=" * 38 + "\n\n")
|
|
818
|
+
|
|
819
|
+
async def _stream_container_log(
|
|
820
|
+
self,
|
|
821
|
+
task: Task,
|
|
822
|
+
acc: Account,
|
|
823
|
+
log_path: Path,
|
|
824
|
+
host_log: Path,
|
|
825
|
+
host_exit: Path,
|
|
826
|
+
conversation: Optional[Conversation] = None,
|
|
827
|
+
start_offset: int = 0,
|
|
828
|
+
) -> int:
|
|
829
|
+
"""
|
|
830
|
+
宿主机侧日志轮询:将容器写入 host_log 的新内容同步到 log_path,
|
|
831
|
+
检测到 host_exit 文件出现后退出,返回 exit code。
|
|
832
|
+
CancelledError 不在此处捕获,由调用方处理 kill 逻辑。
|
|
833
|
+
"""
|
|
834
|
+
import aiofiles
|
|
835
|
+
last_size = start_offset
|
|
836
|
+
captured_session_id = ""
|
|
837
|
+
container_name = self._get_task_container(task) or ""
|
|
838
|
+
|
|
839
|
+
async with aiofiles.open(log_path, mode="a", encoding="utf-8") as f:
|
|
840
|
+
while True:
|
|
841
|
+
await asyncio.sleep(0.3)
|
|
842
|
+
is_done = host_exit.exists()
|
|
843
|
+
|
|
844
|
+
if host_log.exists():
|
|
845
|
+
cur_size = host_log.stat().st_size
|
|
846
|
+
if cur_size > last_size:
|
|
847
|
+
async with aiofiles.open(host_log, mode="rb") as hf:
|
|
848
|
+
await hf.seek(last_size)
|
|
849
|
+
new_bytes = await hf.read()
|
|
850
|
+
text = new_bytes.decode("utf-8", errors="replace")
|
|
851
|
+
await f.write(text)
|
|
852
|
+
await f.flush()
|
|
853
|
+
last_size = cur_size
|
|
854
|
+
|
|
855
|
+
if not captured_session_id:
|
|
856
|
+
captured_session_id = self.extract_native_session_id(acc.type, text)
|
|
857
|
+
if captured_session_id:
|
|
858
|
+
task.native_session_id = captured_session_id
|
|
859
|
+
task.save(self.tasks_dir)
|
|
860
|
+
if conversation:
|
|
861
|
+
self.update_conversation_native_session(
|
|
862
|
+
conversation.id,
|
|
863
|
+
captured_session_id,
|
|
864
|
+
task.id,
|
|
865
|
+
)
|
|
866
|
+
|
|
867
|
+
if is_done:
|
|
868
|
+
break
|
|
869
|
+
|
|
870
|
+
rc = -1
|
|
871
|
+
if host_exit.exists():
|
|
872
|
+
try:
|
|
873
|
+
rc = int(host_exit.read_text().strip())
|
|
874
|
+
except (ValueError, OSError):
|
|
875
|
+
rc = -1
|
|
876
|
+
|
|
877
|
+
if rc == 0:
|
|
878
|
+
if conversation:
|
|
879
|
+
if not conversation.native_session_id and captured_session_id:
|
|
880
|
+
self.update_conversation_native_session(conversation.id, captured_session_id, task.id)
|
|
881
|
+
else:
|
|
882
|
+
conversation.touch(self.conversations_dir, last_task_id=task.id)
|
|
883
|
+
task.update_status(TaskStatus.done, self.tasks_dir)
|
|
884
|
+
await self._append_usage_status(log_path, acc, container_name)
|
|
885
|
+
self._append_log_footer(log_path, "done")
|
|
886
|
+
else:
|
|
887
|
+
task.update_status(TaskStatus.failed, self.tasks_dir)
|
|
888
|
+
await self._append_usage_status(log_path, acc, container_name)
|
|
889
|
+
self._append_log_footer(log_path, f"failed (exit={rc})")
|
|
890
|
+
|
|
891
|
+
return rc
|
|
892
|
+
|
|
893
|
+
async def _run(
|
|
894
|
+
self,
|
|
895
|
+
task: Task,
|
|
896
|
+
acc: Account,
|
|
897
|
+
log_path: Path,
|
|
898
|
+
auto: bool,
|
|
899
|
+
conversation: Optional[Conversation] = None,
|
|
900
|
+
container_workdir: str = "",
|
|
901
|
+
container_name: str = "",
|
|
902
|
+
images: list[str] = [],
|
|
903
|
+
) -> None:
|
|
904
|
+
"""后台协程:以 detached 方式启动容器任务,再轮询宿主机日志文件跟踪进度。"""
|
|
905
|
+
try:
|
|
906
|
+
cmd = self.build_cli_command(
|
|
907
|
+
acc.type,
|
|
908
|
+
task.prompt,
|
|
909
|
+
auto,
|
|
910
|
+
task.id,
|
|
911
|
+
native_session_id=conversation.native_session_id if conversation else "",
|
|
912
|
+
container_workdir=container_workdir,
|
|
913
|
+
images=images,
|
|
914
|
+
)
|
|
915
|
+
|
|
916
|
+
proc = await asyncio.create_subprocess_exec(
|
|
917
|
+
"docker", "exec", container_name, "bash", "-c", cmd,
|
|
918
|
+
stdout=asyncio.subprocess.PIPE,
|
|
919
|
+
stderr=asyncio.subprocess.STDOUT,
|
|
920
|
+
)
|
|
921
|
+
await proc.wait()
|
|
922
|
+
|
|
923
|
+
if proc.returncode != 0:
|
|
924
|
+
err = (await proc.stdout.read()).decode("utf-8", errors="replace") if proc.stdout else ""
|
|
925
|
+
raise RuntimeError(f"docker exec failed (exit={proc.returncode}): {err.strip()}")
|
|
926
|
+
|
|
927
|
+
project_root = self._get_project_root(task)
|
|
928
|
+
if project_root is None:
|
|
929
|
+
raise RuntimeError(f"找不到任务 {task.id} 对应的项目根目录")
|
|
930
|
+
|
|
931
|
+
host_log = self._host_task_log(project_root, task.id)
|
|
932
|
+
host_exit = self._host_task_exit(project_root, task.id)
|
|
933
|
+
|
|
934
|
+
await self._stream_container_log(
|
|
935
|
+
task, acc, log_path, host_log, host_exit, conversation
|
|
936
|
+
)
|
|
937
|
+
|
|
938
|
+
except asyncio.CancelledError:
|
|
939
|
+
self.kill_task_process(task)
|
|
940
|
+
task.update_status(TaskStatus.killed, self.tasks_dir)
|
|
941
|
+
self._append_log_footer(log_path, "killed")
|
|
942
|
+
return
|
|
943
|
+
except Exception as e:
|
|
944
|
+
task.update_status(TaskStatus.failed, self.tasks_dir)
|
|
945
|
+
await self._append_usage_status(log_path, acc, container_name)
|
|
946
|
+
self._append_log_footer(log_path, f"failed: {e}")
|
|
947
|
+
return
|
|
948
|
+
finally:
|
|
949
|
+
self._running.pop(task.id, None)
|
|
950
|
+
self._cleanup_container_task_files(task)
|
|
951
|
+
asyncio.create_task(self.schedule_next_tasks())
|
|
952
|
+
|
|
953
|
+
async def _reattach(self, task: Task) -> None:
|
|
954
|
+
"""重新 attach 到一个在 Python 重启后仍存活的容器进程,继续跟踪其日志。"""
|
|
955
|
+
log_path = self.get_log_path(task.id)
|
|
956
|
+
|
|
957
|
+
acc = next((a for a in self.get_accounts() if a.name == task.account), None)
|
|
958
|
+
if acc is None:
|
|
959
|
+
task.update_status(TaskStatus.failed, self.tasks_dir)
|
|
960
|
+
self._append_log_footer(log_path, "failed: account not found on reattach")
|
|
961
|
+
return
|
|
962
|
+
|
|
963
|
+
project_root = self._get_project_root(task)
|
|
964
|
+
if project_root is None:
|
|
965
|
+
task.update_status(TaskStatus.failed, self.tasks_dir)
|
|
966
|
+
self._append_log_footer(log_path, "failed: project root not found on reattach")
|
|
967
|
+
return
|
|
968
|
+
|
|
969
|
+
host_log = self._host_task_log(project_root, task.id)
|
|
970
|
+
host_exit = self._host_task_exit(project_root, task.id)
|
|
971
|
+
conversation = self.get_conversation(task.conversation_id) if task.conversation_id else None
|
|
972
|
+
start_offset = host_log.stat().st_size if host_log.exists() else 0
|
|
973
|
+
|
|
974
|
+
try:
|
|
975
|
+
self._append_log_footer(log_path, "reattached after server restart")
|
|
976
|
+
await self._stream_container_log(
|
|
977
|
+
task, acc, log_path, host_log, host_exit, conversation,
|
|
978
|
+
start_offset=start_offset,
|
|
979
|
+
)
|
|
980
|
+
except asyncio.CancelledError:
|
|
981
|
+
self.kill_task_process(task)
|
|
982
|
+
task.update_status(TaskStatus.killed, self.tasks_dir)
|
|
983
|
+
self._append_log_footer(log_path, "killed")
|
|
984
|
+
return
|
|
985
|
+
except Exception as e:
|
|
986
|
+
task.update_status(TaskStatus.failed, self.tasks_dir)
|
|
987
|
+
self._append_log_footer(log_path, f"failed during reattach: {e}")
|
|
988
|
+
return
|
|
989
|
+
finally:
|
|
990
|
+
self._running.pop(task.id, None)
|
|
991
|
+
self._cleanup_container_task_files(task)
|
|
992
|
+
asyncio.create_task(self.schedule_next_tasks())
|
|
993
|
+
|
|
994
|
+
async def _append_usage_status(self, log_path: Path, acc: Account, container_name: str) -> None:
|
|
995
|
+
cmd = self.build_usage_status_command(acc.type)
|
|
996
|
+
if not cmd or not container_name:
|
|
997
|
+
return
|
|
998
|
+
|
|
999
|
+
try:
|
|
1000
|
+
with log_path.open("a", encoding="utf-8") as f:
|
|
1001
|
+
f.write("\n" + "=" * 38 + "\n")
|
|
1002
|
+
f.write("usage status:\n")
|
|
1003
|
+
|
|
1004
|
+
proc = await asyncio.create_subprocess_exec(
|
|
1005
|
+
"docker",
|
|
1006
|
+
"exec",
|
|
1007
|
+
container_name,
|
|
1008
|
+
"bash",
|
|
1009
|
+
"-lc",
|
|
1010
|
+
cmd,
|
|
1011
|
+
stdout=asyncio.subprocess.PIPE,
|
|
1012
|
+
stderr=asyncio.subprocess.STDOUT,
|
|
1013
|
+
)
|
|
1014
|
+
stdout, _ = await asyncio.wait_for(proc.communicate(), timeout=45)
|
|
1015
|
+
text = stdout.decode("utf-8", errors="replace").strip()
|
|
1016
|
+
if not text:
|
|
1017
|
+
text = "未获取到用量信息"
|
|
1018
|
+
except Exception as e:
|
|
1019
|
+
text = f"用量检查失败:{e}"
|
|
1020
|
+
|
|
1021
|
+
try:
|
|
1022
|
+
with log_path.open("a", encoding="utf-8") as f:
|
|
1023
|
+
f.write(text + "\n")
|
|
1024
|
+
except Exception:
|
|
1025
|
+
pass
|
|
1026
|
+
|
|
1027
|
+
def _append_log_footer(self, log_path: Path, result: str) -> None:
|
|
1028
|
+
try:
|
|
1029
|
+
with log_path.open("a", encoding="utf-8") as f:
|
|
1030
|
+
f.write("\n" + "=" * 38 + "\n")
|
|
1031
|
+
f.write(f"finished: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} [{result}]\n")
|
|
1032
|
+
except Exception:
|
|
1033
|
+
pass
|
|
1034
|
+
|
|
1035
|
+
def _get_task_container(self, task: Task) -> Optional[str]:
|
|
1036
|
+
"""通过任务记录的 project 路径找到对应项目,推导容器名。"""
|
|
1037
|
+
project = self.find_project_for_path(task.project, task.account)
|
|
1038
|
+
if project is None:
|
|
1039
|
+
return None
|
|
1040
|
+
return project.container_name(task.type)
|
|
1041
|
+
|
|
1042
|
+
def _get_project_root(self, task: Task) -> Optional[Path]:
|
|
1043
|
+
"""返回任务所属项目在宿主机的根目录(/workspace 在容器内挂载的目标)。"""
|
|
1044
|
+
project = self.find_project_for_path(task.project, task.account)
|
|
1045
|
+
if project is None:
|
|
1046
|
+
return None
|
|
1047
|
+
return Path(project.path)
|
|
1048
|
+
|
|
1049
|
+
@staticmethod
|
|
1050
|
+
def _host_task_log(project_root: Path, task_id: str) -> Path:
|
|
1051
|
+
return project_root / ".coderfleet-tasks" / f"{task_id}.log"
|
|
1052
|
+
|
|
1053
|
+
@staticmethod
|
|
1054
|
+
def _host_task_exit(project_root: Path, task_id: str) -> Path:
|
|
1055
|
+
return project_root / ".coderfleet-tasks" / f"{task_id}.exit"
|
|
1056
|
+
|
|
1057
|
+
def _cleanup_container_task_files(self, task: Task) -> None:
|
|
1058
|
+
project_root = self._get_project_root(task)
|
|
1059
|
+
if project_root is None:
|
|
1060
|
+
return
|
|
1061
|
+
self._host_task_log(project_root, task.id).unlink(missing_ok=True)
|
|
1062
|
+
self._host_task_exit(project_root, task.id).unlink(missing_ok=True)
|
|
1063
|
+
|
|
1064
|
+
def is_task_process_alive(self, task: Task) -> bool:
|
|
1065
|
+
container = self._get_task_container(task)
|
|
1066
|
+
if container is None:
|
|
1067
|
+
return False
|
|
1068
|
+
marker = shlex.quote(self.task_process_marker(task.id))
|
|
1069
|
+
result = subprocess.run(
|
|
1070
|
+
[
|
|
1071
|
+
"docker",
|
|
1072
|
+
"exec",
|
|
1073
|
+
container,
|
|
1074
|
+
"bash",
|
|
1075
|
+
"-lc",
|
|
1076
|
+
f"pgrep -af {marker} >/dev/null",
|
|
1077
|
+
],
|
|
1078
|
+
stdout=subprocess.DEVNULL,
|
|
1079
|
+
stderr=subprocess.DEVNULL,
|
|
1080
|
+
check=False,
|
|
1081
|
+
)
|
|
1082
|
+
return result.returncode == 0
|
|
1083
|
+
|
|
1084
|
+
def kill_task_process(self, task: Task) -> None:
|
|
1085
|
+
container = self._get_task_container(task)
|
|
1086
|
+
if container is None:
|
|
1087
|
+
return
|
|
1088
|
+
marker = shlex.quote(self.task_process_marker(task.id))
|
|
1089
|
+
subprocess.run(
|
|
1090
|
+
[
|
|
1091
|
+
"docker",
|
|
1092
|
+
"exec",
|
|
1093
|
+
container,
|
|
1094
|
+
"bash",
|
|
1095
|
+
"-lc",
|
|
1096
|
+
f"pkill -TERM -f {marker} || true",
|
|
1097
|
+
],
|
|
1098
|
+
stdout=subprocess.DEVNULL,
|
|
1099
|
+
stderr=subprocess.DEVNULL,
|
|
1100
|
+
check=False,
|
|
1101
|
+
)
|
|
1102
|
+
|
|
1103
|
+
async def reconcile_running_tasks(self) -> int:
|
|
1104
|
+
"""
|
|
1105
|
+
服务启动时调用:对状态仍为 running 的任务做恢复处理。
|
|
1106
|
+
- 容器进程仍存活 → 重新 attach,任务继续执行
|
|
1107
|
+
- 容器进程已消亡 → 读 exit 文件恢复最终状态,或标记 failed
|
|
1108
|
+
"""
|
|
1109
|
+
reconciled = 0
|
|
1110
|
+
for task in Task.load_all(self.tasks_dir):
|
|
1111
|
+
if task.status != TaskStatus.running:
|
|
1112
|
+
continue
|
|
1113
|
+
|
|
1114
|
+
if self.is_task_process_alive(task):
|
|
1115
|
+
bg = asyncio.create_task(
|
|
1116
|
+
self._reattach(task),
|
|
1117
|
+
name=f"task-{task.id}",
|
|
1118
|
+
)
|
|
1119
|
+
self._running[task.id] = bg
|
|
1120
|
+
reconciled += 1
|
|
1121
|
+
continue
|
|
1122
|
+
|
|
1123
|
+
# 进程已消亡:尝试从 exit 文件恢复状态
|
|
1124
|
+
project_root = self._get_project_root(task)
|
|
1125
|
+
if project_root is not None:
|
|
1126
|
+
exit_file = self._host_task_exit(project_root, task.id)
|
|
1127
|
+
if exit_file.exists():
|
|
1128
|
+
try:
|
|
1129
|
+
rc = int(exit_file.read_text().strip())
|
|
1130
|
+
status = TaskStatus.done if rc == 0 else TaskStatus.failed
|
|
1131
|
+
result = "done" if rc == 0 else f"failed (exit={rc})"
|
|
1132
|
+
except (ValueError, OSError):
|
|
1133
|
+
status = TaskStatus.failed
|
|
1134
|
+
result = "failed: server restarted; could not read exit code"
|
|
1135
|
+
else:
|
|
1136
|
+
status = TaskStatus.failed
|
|
1137
|
+
result = "failed: server restarted; no container process or exit file found"
|
|
1138
|
+
else:
|
|
1139
|
+
status = TaskStatus.failed
|
|
1140
|
+
result = "failed: server restarted; project root not found"
|
|
1141
|
+
|
|
1142
|
+
task.update_status(status, self.tasks_dir)
|
|
1143
|
+
self._append_log_footer(self.get_log_path(task.id), result)
|
|
1144
|
+
self._cleanup_container_task_files(task)
|
|
1145
|
+
reconciled += 1
|
|
1146
|
+
|
|
1147
|
+
return reconciled
|
|
1148
|
+
|
|
1149
|
+
# ── 终止任务 ──────────────────────────────────────────
|
|
1150
|
+
|
|
1151
|
+
async def kill_task(self, task_id: str) -> Task:
|
|
1152
|
+
task = self.get_task(task_id)
|
|
1153
|
+
if task is None:
|
|
1154
|
+
raise ValueError(f"任务 '{task_id}' 不存在")
|
|
1155
|
+
if task.status not in (TaskStatus.running, TaskStatus.pending, TaskStatus.scheduled):
|
|
1156
|
+
raise RuntimeError(f"任务状态为 '{task.status.value}',只能终止 running、pending 或 scheduled 状态的任务")
|
|
1157
|
+
|
|
1158
|
+
if task.status in (TaskStatus.pending, TaskStatus.scheduled):
|
|
1159
|
+
old_status = task.status
|
|
1160
|
+
task.update_status(TaskStatus.killed, self.tasks_dir)
|
|
1161
|
+
reason = "killed by user (cancelled schedule)" if old_status == TaskStatus.scheduled else "killed by user (while pending)"
|
|
1162
|
+
self._append_log_footer(self.get_log_path(task_id), reason)
|
|
1163
|
+
# 异步触发一次调度,确保释放该队列的后续处理(以防万一)
|
|
1164
|
+
asyncio.create_task(self.schedule_next_tasks())
|
|
1165
|
+
return task
|
|
1166
|
+
|
|
1167
|
+
# 先更新状态防止并发写入
|
|
1168
|
+
task.update_status(TaskStatus.killed, self.tasks_dir)
|
|
1169
|
+
|
|
1170
|
+
# 取消后台协程
|
|
1171
|
+
self.kill_task_process(task)
|
|
1172
|
+
bg = self._running.pop(task_id, None)
|
|
1173
|
+
if bg and not bg.done():
|
|
1174
|
+
bg.cancel()
|
|
1175
|
+
try:
|
|
1176
|
+
await asyncio.wait_for(asyncio.shield(bg), timeout=3)
|
|
1177
|
+
except (asyncio.CancelledError, asyncio.TimeoutError):
|
|
1178
|
+
pass
|
|
1179
|
+
|
|
1180
|
+
self._append_log_footer(self.get_log_path(task_id), "killed by user")
|
|
1181
|
+
# 触发下一次调度
|
|
1182
|
+
asyncio.create_task(self.schedule_next_tasks())
|
|
1183
|
+
return task
|
|
1184
|
+
|
|
1185
|
+
def delete_task(self, task_id: str) -> None:
|
|
1186
|
+
task = self.get_task(task_id)
|
|
1187
|
+
if task and task.status == TaskStatus.running:
|
|
1188
|
+
raise RuntimeError(f"任务 '{task_id}' 正在运行,无法删除")
|
|
1189
|
+
json_path = self.tasks_dir / f"{task_id}.json"
|
|
1190
|
+
log_path = self.tasks_dir / f"{task_id}.log"
|
|
1191
|
+
if not json_path.exists():
|
|
1192
|
+
raise ValueError(f"任务 '{task_id}' 不存在")
|
|
1193
|
+
json_path.unlink(missing_ok=True)
|
|
1194
|
+
log_path.unlink(missing_ok=True)
|
|
1195
|
+
|
|
1196
|
+
def archive_task(self, task_id: str, archived: bool) -> Task:
|
|
1197
|
+
task = self.get_task(task_id)
|
|
1198
|
+
if task is None:
|
|
1199
|
+
raise ValueError(f"任务 '{task_id}' 不存在")
|
|
1200
|
+
task.archived = archived
|
|
1201
|
+
task.save(self.tasks_dir)
|
|
1202
|
+
return task
|
|
1203
|
+
|
|
1204
|
+
# ── 清理旧记录 ────────────────────────────────────────
|
|
1205
|
+
|
|
1206
|
+
def clean_tasks(self, keep: int = 30) -> int:
|
|
1207
|
+
all_tasks = Task.load_all(self.tasks_dir)
|
|
1208
|
+
if len(all_tasks) <= keep:
|
|
1209
|
+
return 0
|
|
1210
|
+
cleaned = 0
|
|
1211
|
+
for task in all_tasks[keep:]:
|
|
1212
|
+
if task.status == TaskStatus.running:
|
|
1213
|
+
continue
|
|
1214
|
+
json_path = self.tasks_dir / f"{task.id}.json"
|
|
1215
|
+
log_path = self.tasks_dir / f"{task.id}.log"
|
|
1216
|
+
json_path.unlink(missing_ok=True)
|
|
1217
|
+
log_path.unlink(missing_ok=True)
|
|
1218
|
+
cleaned += 1
|
|
1219
|
+
return cleaned
|