htmlgen-mcp 0.3.3__py3-none-any.whl → 0.3.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of htmlgen-mcp might be problematic. Click here for more details.
- htmlgen_mcp/agents/quick_generator.py +481 -63
- htmlgen_mcp/agents/smart_web_agent.py +707 -235
- htmlgen_mcp/agents/web_tools/__init__.py +41 -41
- htmlgen_mcp/agents/web_tools/css.py +148 -0
- htmlgen_mcp/agents/web_tools/js.py +12 -10
- htmlgen_mcp/agents/web_tools/navigation.py +2 -0
- htmlgen_mcp/agents/web_tools/project.py +0 -4
- htmlgen_mcp/config.py +9 -30
- htmlgen_mcp/improved_progress.py +392 -0
- htmlgen_mcp/nas_log_manager.py +308 -0
- htmlgen_mcp/nas_storage.py +356 -0
- htmlgen_mcp/progress_tools.py +194 -0
- htmlgen_mcp/progress_tracker.py +378 -0
- htmlgen_mcp/web_agent_server.py +35 -4
- {htmlgen_mcp-0.3.3.dist-info → htmlgen_mcp-0.3.5.dist-info}/METADATA +1 -1
- {htmlgen_mcp-0.3.3.dist-info → htmlgen_mcp-0.3.5.dist-info}/RECORD +19 -16
- htmlgen_mcp/agents/cluster_state.py +0 -414
- htmlgen_mcp/agents/cluster_storage.py +0 -341
- {htmlgen_mcp-0.3.3.dist-info → htmlgen_mcp-0.3.5.dist-info}/WHEEL +0 -0
- {htmlgen_mcp-0.3.3.dist-info → htmlgen_mcp-0.3.5.dist-info}/entry_points.txt +0 -0
- {htmlgen_mcp-0.3.3.dist-info → htmlgen_mcp-0.3.5.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,392 @@
|
|
|
1
|
+
"""改进的进度日志管理 - 解决集群环境下的查询问题"""
|
|
2
|
+
import json
|
|
3
|
+
import time
|
|
4
|
+
import os
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Dict, Optional, Any, List
|
|
7
|
+
from datetime import datetime
|
|
8
|
+
import threading
|
|
9
|
+
import fcntl
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class ImprovedProgressManager:
|
|
13
|
+
"""改进的进度管理器 - 支持 job_id 和 plan_id 查询"""
|
|
14
|
+
|
|
15
|
+
def __init__(self, nas_base_path: str = "/app/mcp-servers/mcp-servers/html_agent"):
|
|
16
|
+
self.nas_base = Path(nas_base_path)
|
|
17
|
+
self.progress_base = self.nas_base / "mcp_data" / "make_web"
|
|
18
|
+
|
|
19
|
+
# 分离不同类型的存储
|
|
20
|
+
self.jobs_dir = self.progress_base / "jobs"
|
|
21
|
+
self.plans_dir = self.progress_base / "plans"
|
|
22
|
+
self.logs_dir = self.progress_base / "logs"
|
|
23
|
+
self.mappings_dir = self.progress_base / "mappings"
|
|
24
|
+
|
|
25
|
+
# 创建所有必要的目录
|
|
26
|
+
for dir_path in [self.jobs_dir, self.plans_dir, self.logs_dir, self.mappings_dir]:
|
|
27
|
+
dir_path.mkdir(parents=True, exist_ok=True)
|
|
28
|
+
|
|
29
|
+
# 内存缓存,减少文件 I/O
|
|
30
|
+
self._cache = {}
|
|
31
|
+
self._cache_lock = threading.Lock()
|
|
32
|
+
|
|
33
|
+
def register_job(self, job_id: str, plan_id: Optional[str] = None,
|
|
34
|
+
description: str = "", project_path: str = "") -> str:
|
|
35
|
+
"""
|
|
36
|
+
注册新任务
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
job_id: 任务 ID
|
|
40
|
+
plan_id: 关联的计划 ID(可选)
|
|
41
|
+
description: 任务描述
|
|
42
|
+
project_path: 项目路径
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
进度日志文件路径
|
|
46
|
+
"""
|
|
47
|
+
# 生成日志文件路径
|
|
48
|
+
log_file = self.logs_dir / f"{job_id}.jsonl"
|
|
49
|
+
|
|
50
|
+
# 创建任务信息
|
|
51
|
+
job_info = {
|
|
52
|
+
"job_id": job_id,
|
|
53
|
+
"plan_id": plan_id,
|
|
54
|
+
"description": description,
|
|
55
|
+
"project_path": project_path,
|
|
56
|
+
"log_file": str(log_file),
|
|
57
|
+
"status": "pending",
|
|
58
|
+
"created_at": datetime.now().isoformat(),
|
|
59
|
+
"node_id": os.environ.get("NODE_ID", "unknown"),
|
|
60
|
+
"updated_at": datetime.now().isoformat()
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
# 保存任务信息
|
|
64
|
+
job_file = self.jobs_dir / f"{job_id}.json"
|
|
65
|
+
self._safe_write_json(job_file, job_info)
|
|
66
|
+
|
|
67
|
+
# 如果有 plan_id,创建映射
|
|
68
|
+
if plan_id:
|
|
69
|
+
self._create_mapping(plan_id, job_id, "plan_to_job")
|
|
70
|
+
self._create_mapping(job_id, plan_id, "job_to_plan")
|
|
71
|
+
|
|
72
|
+
# 创建日志文件映射
|
|
73
|
+
self._create_mapping(job_id, str(log_file), "job_to_log")
|
|
74
|
+
if plan_id:
|
|
75
|
+
self._create_mapping(plan_id, str(log_file), "plan_to_log")
|
|
76
|
+
|
|
77
|
+
# 更新缓存
|
|
78
|
+
with self._cache_lock:
|
|
79
|
+
self._cache[f"job:{job_id}"] = job_info
|
|
80
|
+
if plan_id:
|
|
81
|
+
self._cache[f"plan:{plan_id}:job"] = job_id
|
|
82
|
+
self._cache[f"plan:{plan_id}:log"] = str(log_file)
|
|
83
|
+
|
|
84
|
+
return str(log_file)
|
|
85
|
+
|
|
86
|
+
def find_log_path(self, identifier: str) -> Optional[str]:
|
|
87
|
+
"""
|
|
88
|
+
根据 job_id 或 plan_id 查找日志文件路径
|
|
89
|
+
|
|
90
|
+
Args:
|
|
91
|
+
identifier: job_id 或 plan_id
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
日志文件路径,如果找不到返回 None
|
|
95
|
+
"""
|
|
96
|
+
# 先检查缓存
|
|
97
|
+
with self._cache_lock:
|
|
98
|
+
# 尝试作为 job_id 查找
|
|
99
|
+
cached_job = self._cache.get(f"job:{identifier}")
|
|
100
|
+
if cached_job:
|
|
101
|
+
return cached_job.get("log_file")
|
|
102
|
+
|
|
103
|
+
# 尝试作为 plan_id 查找
|
|
104
|
+
cached_log = self._cache.get(f"plan:{identifier}:log")
|
|
105
|
+
if cached_log:
|
|
106
|
+
return cached_log
|
|
107
|
+
|
|
108
|
+
# 方法1: 直接查找日志文件
|
|
109
|
+
direct_log = self.logs_dir / f"{identifier}.jsonl"
|
|
110
|
+
if direct_log.exists():
|
|
111
|
+
return str(direct_log)
|
|
112
|
+
|
|
113
|
+
# 方法2: 从任务信息中查找
|
|
114
|
+
job_file = self.jobs_dir / f"{identifier}.json"
|
|
115
|
+
if job_file.exists():
|
|
116
|
+
try:
|
|
117
|
+
job_info = self._safe_read_json(job_file)
|
|
118
|
+
if job_info and "log_file" in job_info:
|
|
119
|
+
# 更新缓存
|
|
120
|
+
with self._cache_lock:
|
|
121
|
+
self._cache[f"job:{identifier}"] = job_info
|
|
122
|
+
return job_info["log_file"]
|
|
123
|
+
except Exception:
|
|
124
|
+
pass
|
|
125
|
+
|
|
126
|
+
# 方法3: 从映射中查找
|
|
127
|
+
mapping = self._load_mapping(identifier, "job_to_log")
|
|
128
|
+
if mapping:
|
|
129
|
+
return mapping
|
|
130
|
+
|
|
131
|
+
mapping = self._load_mapping(identifier, "plan_to_log")
|
|
132
|
+
if mapping:
|
|
133
|
+
return mapping
|
|
134
|
+
|
|
135
|
+
# 方法4: 扫描所有任务文件(最后的手段)
|
|
136
|
+
for job_file in self.jobs_dir.glob("*.json"):
|
|
137
|
+
try:
|
|
138
|
+
job_info = self._safe_read_json(job_file)
|
|
139
|
+
if job_info:
|
|
140
|
+
# 检查 job_id
|
|
141
|
+
if job_info.get("job_id") == identifier:
|
|
142
|
+
log_file = job_info.get("log_file")
|
|
143
|
+
if log_file:
|
|
144
|
+
# 更新缓存
|
|
145
|
+
with self._cache_lock:
|
|
146
|
+
self._cache[f"job:{identifier}"] = job_info
|
|
147
|
+
return log_file
|
|
148
|
+
|
|
149
|
+
# 检查 plan_id
|
|
150
|
+
if job_info.get("plan_id") == identifier:
|
|
151
|
+
log_file = job_info.get("log_file")
|
|
152
|
+
if log_file:
|
|
153
|
+
# 更新缓存
|
|
154
|
+
with self._cache_lock:
|
|
155
|
+
self._cache[f"plan:{identifier}:log"] = log_file
|
|
156
|
+
return log_file
|
|
157
|
+
except Exception:
|
|
158
|
+
continue
|
|
159
|
+
|
|
160
|
+
return None
|
|
161
|
+
|
|
162
|
+
def write_progress(self, job_id: str, event: Dict[str, Any]) -> bool:
|
|
163
|
+
"""
|
|
164
|
+
写入进度事件
|
|
165
|
+
|
|
166
|
+
Args:
|
|
167
|
+
job_id: 任务 ID
|
|
168
|
+
event: 进度事件
|
|
169
|
+
|
|
170
|
+
Returns:
|
|
171
|
+
是否写入成功
|
|
172
|
+
"""
|
|
173
|
+
log_path = self.find_log_path(job_id)
|
|
174
|
+
if not log_path:
|
|
175
|
+
# 如果找不到日志文件,自动注册任务
|
|
176
|
+
log_path = self.register_job(job_id)
|
|
177
|
+
|
|
178
|
+
try:
|
|
179
|
+
# 添加时间戳
|
|
180
|
+
if "timestamp" not in event:
|
|
181
|
+
event["timestamp"] = time.time()
|
|
182
|
+
|
|
183
|
+
# 原子写入(追加模式)
|
|
184
|
+
log_file = Path(log_path)
|
|
185
|
+
temp_file = log_file.parent / f".{log_file.name}.tmp"
|
|
186
|
+
|
|
187
|
+
# 使用文件锁
|
|
188
|
+
with open(log_path, 'a', encoding='utf-8') as f:
|
|
189
|
+
fcntl.flock(f.fileno(), fcntl.LOCK_EX)
|
|
190
|
+
try:
|
|
191
|
+
f.write(json.dumps(event, ensure_ascii=False))
|
|
192
|
+
f.write('\n')
|
|
193
|
+
f.flush()
|
|
194
|
+
os.fsync(f.fileno()) # 强制刷新到磁盘
|
|
195
|
+
finally:
|
|
196
|
+
fcntl.flock(f.fileno(), fcntl.LOCK_UN)
|
|
197
|
+
|
|
198
|
+
# 更新任务状态
|
|
199
|
+
self._update_job_status(job_id, event)
|
|
200
|
+
|
|
201
|
+
return True
|
|
202
|
+
|
|
203
|
+
except Exception as e:
|
|
204
|
+
print(f"写入进度失败: {e}")
|
|
205
|
+
return False
|
|
206
|
+
|
|
207
|
+
def read_progress(self, identifier: str, limit: int = 100,
|
|
208
|
+
since_timestamp: Optional[float] = None) -> List[Dict[str, Any]]:
|
|
209
|
+
"""
|
|
210
|
+
读取进度事件
|
|
211
|
+
|
|
212
|
+
Args:
|
|
213
|
+
identifier: job_id 或 plan_id
|
|
214
|
+
limit: 返回事件数量限制
|
|
215
|
+
since_timestamp: 从此时间戳之后的事件
|
|
216
|
+
|
|
217
|
+
Returns:
|
|
218
|
+
进度事件列表
|
|
219
|
+
"""
|
|
220
|
+
log_path = self.find_log_path(identifier)
|
|
221
|
+
if not log_path or not Path(log_path).exists():
|
|
222
|
+
return []
|
|
223
|
+
|
|
224
|
+
events = []
|
|
225
|
+
try:
|
|
226
|
+
with open(log_path, 'r', encoding='utf-8') as f:
|
|
227
|
+
for line in f:
|
|
228
|
+
if line.strip():
|
|
229
|
+
try:
|
|
230
|
+
event = json.loads(line)
|
|
231
|
+
# 过滤时间戳
|
|
232
|
+
if since_timestamp and event.get("timestamp", 0) <= since_timestamp:
|
|
233
|
+
continue
|
|
234
|
+
events.append(event)
|
|
235
|
+
# 限制数量
|
|
236
|
+
if len(events) >= limit:
|
|
237
|
+
break
|
|
238
|
+
except json.JSONDecodeError:
|
|
239
|
+
continue
|
|
240
|
+
except Exception as e:
|
|
241
|
+
print(f"读取进度失败: {e}")
|
|
242
|
+
|
|
243
|
+
return events
|
|
244
|
+
|
|
245
|
+
def get_job_status(self, job_id: str) -> Optional[Dict[str, Any]]:
|
|
246
|
+
"""
|
|
247
|
+
获取任务状态
|
|
248
|
+
|
|
249
|
+
Args:
|
|
250
|
+
job_id: 任务 ID
|
|
251
|
+
|
|
252
|
+
Returns:
|
|
253
|
+
任务状态信息
|
|
254
|
+
"""
|
|
255
|
+
# 先检查缓存
|
|
256
|
+
with self._cache_lock:
|
|
257
|
+
cached = self._cache.get(f"job:{job_id}")
|
|
258
|
+
if cached and time.time() - cached.get("_cache_time", 0) < 5: # 5秒缓存
|
|
259
|
+
return cached
|
|
260
|
+
|
|
261
|
+
# 从文件读取
|
|
262
|
+
job_file = self.jobs_dir / f"{job_id}.json"
|
|
263
|
+
if job_file.exists():
|
|
264
|
+
try:
|
|
265
|
+
job_info = self._safe_read_json(job_file)
|
|
266
|
+
if job_info:
|
|
267
|
+
# 更新缓存
|
|
268
|
+
job_info["_cache_time"] = time.time()
|
|
269
|
+
with self._cache_lock:
|
|
270
|
+
self._cache[f"job:{job_id}"] = job_info
|
|
271
|
+
return job_info
|
|
272
|
+
except Exception:
|
|
273
|
+
pass
|
|
274
|
+
|
|
275
|
+
return None
|
|
276
|
+
|
|
277
|
+
def _create_mapping(self, key: str, value: str, mapping_type: str):
|
|
278
|
+
"""创建映射关系"""
|
|
279
|
+
mapping_file = self.mappings_dir / f"{mapping_type}.json"
|
|
280
|
+
|
|
281
|
+
# 读取现有映射
|
|
282
|
+
mappings = {}
|
|
283
|
+
if mapping_file.exists():
|
|
284
|
+
try:
|
|
285
|
+
mappings = self._safe_read_json(mapping_file) or {}
|
|
286
|
+
except Exception:
|
|
287
|
+
mappings = {}
|
|
288
|
+
|
|
289
|
+
# 更新映射
|
|
290
|
+
mappings[key] = value
|
|
291
|
+
|
|
292
|
+
# 保存映射
|
|
293
|
+
self._safe_write_json(mapping_file, mappings)
|
|
294
|
+
|
|
295
|
+
def _load_mapping(self, key: str, mapping_type: str) -> Optional[str]:
|
|
296
|
+
"""加载映射关系"""
|
|
297
|
+
mapping_file = self.mappings_dir / f"{mapping_type}.json"
|
|
298
|
+
|
|
299
|
+
if mapping_file.exists():
|
|
300
|
+
try:
|
|
301
|
+
mappings = self._safe_read_json(mapping_file)
|
|
302
|
+
if mappings:
|
|
303
|
+
return mappings.get(key)
|
|
304
|
+
except Exception:
|
|
305
|
+
pass
|
|
306
|
+
|
|
307
|
+
return None
|
|
308
|
+
|
|
309
|
+
def _update_job_status(self, job_id: str, event: Dict[str, Any]):
|
|
310
|
+
"""更新任务状态"""
|
|
311
|
+
job_file = self.jobs_dir / f"{job_id}.json"
|
|
312
|
+
|
|
313
|
+
# 读取现有信息
|
|
314
|
+
job_info = {}
|
|
315
|
+
if job_file.exists():
|
|
316
|
+
job_info = self._safe_read_json(job_file) or {}
|
|
317
|
+
|
|
318
|
+
# 更新状态
|
|
319
|
+
if "status" in event:
|
|
320
|
+
job_info["status"] = event["status"]
|
|
321
|
+
if "progress" in event:
|
|
322
|
+
job_info["progress"] = event["progress"]
|
|
323
|
+
|
|
324
|
+
job_info["updated_at"] = datetime.now().isoformat()
|
|
325
|
+
job_info["last_event"] = event
|
|
326
|
+
|
|
327
|
+
# 保存更新
|
|
328
|
+
self._safe_write_json(job_file, job_info)
|
|
329
|
+
|
|
330
|
+
# 更新缓存
|
|
331
|
+
job_info["_cache_time"] = time.time()
|
|
332
|
+
with self._cache_lock:
|
|
333
|
+
self._cache[f"job:{job_id}"] = job_info
|
|
334
|
+
|
|
335
|
+
def _safe_write_json(self, file_path: Path, data: Dict):
|
|
336
|
+
"""安全写入 JSON 文件(原子操作)"""
|
|
337
|
+
temp_file = file_path.parent / f".{file_path.name}.tmp"
|
|
338
|
+
|
|
339
|
+
try:
|
|
340
|
+
# 先写入临时文件
|
|
341
|
+
with open(temp_file, 'w', encoding='utf-8') as f:
|
|
342
|
+
json.dump(data, f, ensure_ascii=False, indent=2)
|
|
343
|
+
f.flush()
|
|
344
|
+
os.fsync(f.fileno())
|
|
345
|
+
|
|
346
|
+
# 原子重命名
|
|
347
|
+
temp_file.replace(file_path)
|
|
348
|
+
|
|
349
|
+
except Exception as e:
|
|
350
|
+
# 清理临时文件
|
|
351
|
+
if temp_file.exists():
|
|
352
|
+
temp_file.unlink()
|
|
353
|
+
raise e
|
|
354
|
+
|
|
355
|
+
def _safe_read_json(self, file_path: Path) -> Optional[Dict]:
|
|
356
|
+
"""安全读取 JSON 文件"""
|
|
357
|
+
try:
|
|
358
|
+
with open(file_path, 'r', encoding='utf-8') as f:
|
|
359
|
+
return json.load(f)
|
|
360
|
+
except Exception:
|
|
361
|
+
return None
|
|
362
|
+
|
|
363
|
+
def cleanup_old_logs(self, days_to_keep: int = 7) -> int:
|
|
364
|
+
"""清理旧的日志文件"""
|
|
365
|
+
cleaned = 0
|
|
366
|
+
cutoff_time = time.time() - (days_to_keep * 24 * 3600)
|
|
367
|
+
|
|
368
|
+
for log_file in self.logs_dir.glob("*.jsonl"):
|
|
369
|
+
try:
|
|
370
|
+
if log_file.stat().st_mtime < cutoff_time:
|
|
371
|
+
log_file.unlink()
|
|
372
|
+
cleaned += 1
|
|
373
|
+
except Exception:
|
|
374
|
+
continue
|
|
375
|
+
|
|
376
|
+
return cleaned
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
# 全局实例
|
|
380
|
+
_progress_manager: Optional[ImprovedProgressManager] = None
|
|
381
|
+
|
|
382
|
+
|
|
383
|
+
def get_progress_manager() -> ImprovedProgressManager:
|
|
384
|
+
"""获取进度管理器实例(单例)"""
|
|
385
|
+
global _progress_manager
|
|
386
|
+
if _progress_manager is None:
|
|
387
|
+
nas_path = os.environ.get(
|
|
388
|
+
"NAS_STORAGE_PATH",
|
|
389
|
+
"/app/mcp-servers/mcp-servers/html_agent"
|
|
390
|
+
)
|
|
391
|
+
_progress_manager = ImprovedProgressManager(nas_path)
|
|
392
|
+
return _progress_manager
|
|
@@ -0,0 +1,308 @@
|
|
|
1
|
+
"""简化的 NAS 日志管理器 - 直接在 NAS 上读写日志"""
|
|
2
|
+
import json
|
|
3
|
+
import os
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Dict, List, Optional, Any
|
|
6
|
+
import time
|
|
7
|
+
from datetime import datetime
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class NASLogManager:
|
|
11
|
+
"""NAS 日志管理器 - 所有操作直接在 NAS 上进行"""
|
|
12
|
+
|
|
13
|
+
def __init__(self, nas_base_path: str = "/app/mcp-servers/mcp-servers/html_agent"):
|
|
14
|
+
self.nas_base = Path(nas_base_path)
|
|
15
|
+
|
|
16
|
+
# 日志存储目录
|
|
17
|
+
self.logs_dir = self.nas_base / "mcp_data" / "make_web" / "progress_logs"
|
|
18
|
+
self.jobs_dir = self.nas_base / "mcp_data" / "make_web" / "jobs"
|
|
19
|
+
self.index_dir = self.nas_base / "mcp_data" / "make_web" / "job_index"
|
|
20
|
+
|
|
21
|
+
# 创建目录
|
|
22
|
+
for dir_path in [self.logs_dir, self.jobs_dir, self.index_dir]:
|
|
23
|
+
dir_path.mkdir(parents=True, exist_ok=True)
|
|
24
|
+
|
|
25
|
+
def create_job_log(self, job_id: str, plan_id: Optional[str] = None) -> str:
|
|
26
|
+
"""
|
|
27
|
+
创建任务日志文件
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
job_id: 任务 ID
|
|
31
|
+
plan_id: 计划 ID(可选)
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
日志文件路径
|
|
35
|
+
"""
|
|
36
|
+
# 日志文件路径
|
|
37
|
+
log_file = self.logs_dir / f"{job_id}.jsonl"
|
|
38
|
+
|
|
39
|
+
# 创建索引文件(用于快速查找)
|
|
40
|
+
index_data = {
|
|
41
|
+
"job_id": job_id,
|
|
42
|
+
"plan_id": plan_id,
|
|
43
|
+
"log_file": str(log_file),
|
|
44
|
+
"created_at": datetime.now().isoformat(),
|
|
45
|
+
"node_id": os.environ.get("NODE_ID", "unknown")
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
# 保存 job_id 索引
|
|
49
|
+
job_index_file = self.index_dir / f"{job_id}.json"
|
|
50
|
+
with open(job_index_file, 'w', encoding='utf-8') as f:
|
|
51
|
+
json.dump(index_data, f, ensure_ascii=False, indent=2)
|
|
52
|
+
|
|
53
|
+
# 如果有 plan_id,也创建索引
|
|
54
|
+
if plan_id:
|
|
55
|
+
plan_index_file = self.index_dir / f"{plan_id}.json"
|
|
56
|
+
with open(plan_index_file, 'w', encoding='utf-8') as f:
|
|
57
|
+
json.dump(index_data, f, ensure_ascii=False, indent=2)
|
|
58
|
+
|
|
59
|
+
# 初始化日志文件
|
|
60
|
+
if not log_file.exists():
|
|
61
|
+
with open(log_file, 'w', encoding='utf-8') as f:
|
|
62
|
+
init_event = {
|
|
63
|
+
"timestamp": time.time(),
|
|
64
|
+
"event": "job_created",
|
|
65
|
+
"job_id": job_id,
|
|
66
|
+
"plan_id": plan_id,
|
|
67
|
+
"created_at": datetime.now().isoformat()
|
|
68
|
+
}
|
|
69
|
+
f.write(json.dumps(init_event, ensure_ascii=False) + '\n')
|
|
70
|
+
|
|
71
|
+
return str(log_file)
|
|
72
|
+
|
|
73
|
+
def find_log_file(self, identifier: str) -> Optional[str]:
|
|
74
|
+
"""
|
|
75
|
+
查找日志文件路径(支持 job_id 和 plan_id)
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
identifier: job_id 或 plan_id
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
日志文件路径,如果找不到返回 None
|
|
82
|
+
"""
|
|
83
|
+
# 方法1: 检查索引文件
|
|
84
|
+
index_file = self.index_dir / f"{identifier}.json"
|
|
85
|
+
if index_file.exists():
|
|
86
|
+
try:
|
|
87
|
+
with open(index_file, 'r', encoding='utf-8') as f:
|
|
88
|
+
index_data = json.load(f)
|
|
89
|
+
log_file = index_data.get("log_file")
|
|
90
|
+
if log_file and Path(log_file).exists():
|
|
91
|
+
return log_file
|
|
92
|
+
except Exception:
|
|
93
|
+
pass
|
|
94
|
+
|
|
95
|
+
# 方法2: 直接检查是否是 job_id
|
|
96
|
+
direct_log = self.logs_dir / f"{identifier}.jsonl"
|
|
97
|
+
if direct_log.exists():
|
|
98
|
+
return str(direct_log)
|
|
99
|
+
|
|
100
|
+
# 方法3: 扫描所有索引文件查找 plan_id
|
|
101
|
+
for index_file in self.index_dir.glob("*.json"):
|
|
102
|
+
try:
|
|
103
|
+
with open(index_file, 'r', encoding='utf-8') as f:
|
|
104
|
+
index_data = json.load(f)
|
|
105
|
+
if (index_data.get("job_id") == identifier or
|
|
106
|
+
index_data.get("plan_id") == identifier):
|
|
107
|
+
log_file = index_data.get("log_file")
|
|
108
|
+
if log_file and Path(log_file).exists():
|
|
109
|
+
return log_file
|
|
110
|
+
except Exception:
|
|
111
|
+
continue
|
|
112
|
+
|
|
113
|
+
return None
|
|
114
|
+
|
|
115
|
+
def write_progress(self, identifier: str, event: Dict[str, Any]) -> bool:
|
|
116
|
+
"""
|
|
117
|
+
写入进度事件
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
identifier: job_id 或 plan_id
|
|
121
|
+
event: 进度事件
|
|
122
|
+
|
|
123
|
+
Returns:
|
|
124
|
+
是否写入成功
|
|
125
|
+
"""
|
|
126
|
+
log_file_path = self.find_log_file(identifier)
|
|
127
|
+
|
|
128
|
+
# 如果找不到日志文件,尝试创建
|
|
129
|
+
if not log_file_path:
|
|
130
|
+
# 假设 identifier 是 job_id
|
|
131
|
+
log_file_path = self.create_job_log(identifier)
|
|
132
|
+
|
|
133
|
+
try:
|
|
134
|
+
# 添加时间戳
|
|
135
|
+
if "timestamp" not in event:
|
|
136
|
+
event["timestamp"] = time.time()
|
|
137
|
+
|
|
138
|
+
# 追加写入日志
|
|
139
|
+
with open(log_file_path, 'a', encoding='utf-8') as f:
|
|
140
|
+
f.write(json.dumps(event, ensure_ascii=False) + '\n')
|
|
141
|
+
f.flush() # 立即刷新到 NAS
|
|
142
|
+
|
|
143
|
+
return True
|
|
144
|
+
|
|
145
|
+
except Exception as e:
|
|
146
|
+
print(f"写入进度失败 {identifier}: {e}")
|
|
147
|
+
return False
|
|
148
|
+
|
|
149
|
+
def read_progress(self, identifier: str, limit: int = 100) -> List[Dict[str, Any]]:
|
|
150
|
+
"""
|
|
151
|
+
读取进度事件
|
|
152
|
+
|
|
153
|
+
Args:
|
|
154
|
+
identifier: job_id 或 plan_id
|
|
155
|
+
limit: 返回事件数量限制
|
|
156
|
+
|
|
157
|
+
Returns:
|
|
158
|
+
进度事件列表
|
|
159
|
+
"""
|
|
160
|
+
log_file_path = self.find_log_file(identifier)
|
|
161
|
+
if not log_file_path:
|
|
162
|
+
return []
|
|
163
|
+
|
|
164
|
+
events = []
|
|
165
|
+
try:
|
|
166
|
+
with open(log_file_path, 'r', encoding='utf-8') as f:
|
|
167
|
+
lines = f.readlines()
|
|
168
|
+
|
|
169
|
+
# 从最新的开始读取
|
|
170
|
+
for line in reversed(lines[-limit:]):
|
|
171
|
+
if line.strip():
|
|
172
|
+
try:
|
|
173
|
+
event = json.loads(line)
|
|
174
|
+
events.insert(0, event) # 保持时间顺序
|
|
175
|
+
except json.JSONDecodeError:
|
|
176
|
+
continue
|
|
177
|
+
|
|
178
|
+
except Exception as e:
|
|
179
|
+
print(f"读取进度失败 {identifier}: {e}")
|
|
180
|
+
|
|
181
|
+
return events
|
|
182
|
+
|
|
183
|
+
def get_all_jobs(self) -> List[Dict[str, Any]]:
|
|
184
|
+
"""
|
|
185
|
+
获取所有任务列表
|
|
186
|
+
|
|
187
|
+
Returns:
|
|
188
|
+
任务列表
|
|
189
|
+
"""
|
|
190
|
+
jobs = []
|
|
191
|
+
|
|
192
|
+
# 扫描所有索引文件
|
|
193
|
+
for index_file in self.index_dir.glob("*.json"):
|
|
194
|
+
try:
|
|
195
|
+
with open(index_file, 'r', encoding='utf-8') as f:
|
|
196
|
+
index_data = json.load(f)
|
|
197
|
+
# 只添加 job_id 的记录,避免重复
|
|
198
|
+
if index_data.get("job_id") == index_file.stem:
|
|
199
|
+
jobs.append(index_data)
|
|
200
|
+
except Exception:
|
|
201
|
+
continue
|
|
202
|
+
|
|
203
|
+
# 按创建时间排序
|
|
204
|
+
jobs.sort(key=lambda x: x.get("created_at", ""), reverse=True)
|
|
205
|
+
|
|
206
|
+
return jobs
|
|
207
|
+
|
|
208
|
+
def job_exists(self, identifier: str) -> bool:
|
|
209
|
+
"""
|
|
210
|
+
检查任务是否存在
|
|
211
|
+
|
|
212
|
+
Args:
|
|
213
|
+
identifier: job_id 或 plan_id
|
|
214
|
+
|
|
215
|
+
Returns:
|
|
216
|
+
任务是否存在
|
|
217
|
+
"""
|
|
218
|
+
return self.find_log_file(identifier) is not None
|
|
219
|
+
|
|
220
|
+
def get_job_summary(self, identifier: str) -> Optional[Dict[str, Any]]:
|
|
221
|
+
"""
|
|
222
|
+
获取任务摘要信息
|
|
223
|
+
|
|
224
|
+
Args:
|
|
225
|
+
identifier: job_id 或 plan_id
|
|
226
|
+
|
|
227
|
+
Returns:
|
|
228
|
+
任务摘要
|
|
229
|
+
"""
|
|
230
|
+
log_file_path = self.find_log_file(identifier)
|
|
231
|
+
if not log_file_path:
|
|
232
|
+
return None
|
|
233
|
+
|
|
234
|
+
try:
|
|
235
|
+
# 读取最近的几个事件
|
|
236
|
+
events = self.read_progress(identifier, limit=10)
|
|
237
|
+
if not events:
|
|
238
|
+
return None
|
|
239
|
+
|
|
240
|
+
# 获取最新事件
|
|
241
|
+
latest_event = events[-1] if events else {}
|
|
242
|
+
first_event = events[0] if events else {}
|
|
243
|
+
|
|
244
|
+
summary = {
|
|
245
|
+
"identifier": identifier,
|
|
246
|
+
"log_file": log_file_path,
|
|
247
|
+
"total_events": len(events),
|
|
248
|
+
"first_event_time": first_event.get("timestamp"),
|
|
249
|
+
"latest_event_time": latest_event.get("timestamp"),
|
|
250
|
+
"latest_status": latest_event.get("status", "unknown"),
|
|
251
|
+
"latest_message": latest_event.get("message", ""),
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
# 尝试获取索引信息
|
|
255
|
+
index_file = self.index_dir / f"{identifier}.json"
|
|
256
|
+
if index_file.exists():
|
|
257
|
+
try:
|
|
258
|
+
with open(index_file, 'r', encoding='utf-8') as f:
|
|
259
|
+
index_data = json.load(f)
|
|
260
|
+
summary.update({
|
|
261
|
+
"job_id": index_data.get("job_id"),
|
|
262
|
+
"plan_id": index_data.get("plan_id"),
|
|
263
|
+
"created_at": index_data.get("created_at"),
|
|
264
|
+
"node_id": index_data.get("node_id")
|
|
265
|
+
})
|
|
266
|
+
except Exception:
|
|
267
|
+
pass
|
|
268
|
+
|
|
269
|
+
return summary
|
|
270
|
+
|
|
271
|
+
except Exception as e:
|
|
272
|
+
print(f"获取任务摘要失败 {identifier}: {e}")
|
|
273
|
+
return None
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
# 全局实例
|
|
277
|
+
_nas_log_manager: Optional[NASLogManager] = None
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
def get_nas_log_manager() -> NASLogManager:
|
|
281
|
+
"""获取 NAS 日志管理器实例(单例)"""
|
|
282
|
+
global _nas_log_manager
|
|
283
|
+
if _nas_log_manager is None:
|
|
284
|
+
nas_path = os.environ.get(
|
|
285
|
+
"NAS_STORAGE_PATH",
|
|
286
|
+
"/app/mcp-servers/mcp-servers/html_agent"
|
|
287
|
+
)
|
|
288
|
+
_nas_log_manager = NASLogManager(nas_path)
|
|
289
|
+
return _nas_log_manager
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
# 便捷函数
|
|
293
|
+
def log_progress(job_id: str, **kwargs):
|
|
294
|
+
"""记录进度的便捷函数"""
|
|
295
|
+
manager = get_nas_log_manager()
|
|
296
|
+
return manager.write_progress(job_id, kwargs)
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
def query_progress(identifier: str, limit: int = 20):
|
|
300
|
+
"""查询进度的便捷函数"""
|
|
301
|
+
manager = get_nas_log_manager()
|
|
302
|
+
return manager.read_progress(identifier, limit)
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
def ensure_job_log(job_id: str, plan_id: Optional[str] = None):
|
|
306
|
+
"""确保任务日志存在的便捷函数"""
|
|
307
|
+
manager = get_nas_log_manager()
|
|
308
|
+
return manager.create_job_log(job_id, plan_id)
|