devlake-mcp 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- devlake_mcp/__init__.py +7 -0
- devlake_mcp/__main__.py +10 -0
- devlake_mcp/cli.py +794 -0
- devlake_mcp/client.py +474 -0
- devlake_mcp/compat.py +165 -0
- devlake_mcp/config.py +204 -0
- devlake_mcp/constants.py +161 -0
- devlake_mcp/enums.py +58 -0
- devlake_mcp/generation_manager.py +296 -0
- devlake_mcp/git_utils.py +489 -0
- devlake_mcp/hooks/__init__.py +49 -0
- devlake_mcp/hooks/hook_utils.py +246 -0
- devlake_mcp/hooks/post_tool_use.py +325 -0
- devlake_mcp/hooks/pre_tool_use.py +110 -0
- devlake_mcp/hooks/record_session.py +183 -0
- devlake_mcp/hooks/session_start.py +81 -0
- devlake_mcp/hooks/stop.py +275 -0
- devlake_mcp/hooks/transcript_utils.py +547 -0
- devlake_mcp/hooks/user_prompt_submit.py +204 -0
- devlake_mcp/logging_config.py +202 -0
- devlake_mcp/retry_queue.py +556 -0
- devlake_mcp/server.py +664 -0
- devlake_mcp/session_manager.py +444 -0
- devlake_mcp/utils.py +225 -0
- devlake_mcp/version_utils.py +174 -0
- devlake_mcp-0.4.1.dist-info/METADATA +541 -0
- devlake_mcp-0.4.1.dist-info/RECORD +31 -0
- devlake_mcp-0.4.1.dist-info/WHEEL +5 -0
- devlake_mcp-0.4.1.dist-info/entry_points.txt +3 -0
- devlake_mcp-0.4.1.dist-info/licenses/LICENSE +21 -0
- devlake_mcp-0.4.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,556 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
本地队列重试管理器
|
|
5
|
+
|
|
6
|
+
功能:
|
|
7
|
+
1. 保存失败记录(增强版 save_to_local_queue)
|
|
8
|
+
2. 扫描并重试失败记录
|
|
9
|
+
3. 管理重试状态和清理
|
|
10
|
+
|
|
11
|
+
设计原则:
|
|
12
|
+
- 指数退避策略:1分钟 → 5分钟 → 15分钟 → 60分钟 → 4小时
|
|
13
|
+
- 最大重试次数:5次
|
|
14
|
+
- 失败记录保留:7天
|
|
15
|
+
- 非阻塞执行:不影响主流程
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
import os
|
|
19
|
+
import json
|
|
20
|
+
import logging
|
|
21
|
+
from pathlib import Path
|
|
22
|
+
from datetime import datetime, timedelta
|
|
23
|
+
from typing import Dict, List, Any, Optional, Literal
|
|
24
|
+
from dataclasses import dataclass, asdict
|
|
25
|
+
|
|
26
|
+
from .utils import get_data_dir
|
|
27
|
+
from .client import DevLakeClient
|
|
28
|
+
from .config import DevLakeConfig
|
|
29
|
+
|
|
30
|
+
# 队列类型定义
|
|
31
|
+
QueueType = Literal['session', 'prompt', 'prompt_update', 'file_change']
|
|
32
|
+
|
|
33
|
+
# 模块级 logger
|
|
34
|
+
logger = logging.getLogger(__name__)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
# ============================================================================
|
|
38
|
+
# 数据模型
|
|
39
|
+
# ============================================================================
|
|
40
|
+
|
|
41
|
+
@dataclass
|
|
42
|
+
class RetryMetadata:
|
|
43
|
+
"""重试元数据"""
|
|
44
|
+
queue_type: str # session, prompt, prompt_update, file_change
|
|
45
|
+
api_endpoint: str # API 端点路径
|
|
46
|
+
created_at: str # 创建时间(ISO 8601)
|
|
47
|
+
retry_count: int = 0 # 重试次数
|
|
48
|
+
last_retry_at: Optional[str] = None # 上次重试时间
|
|
49
|
+
next_retry_at: Optional[str] = None # 下次重试时间
|
|
50
|
+
error_history: List[Dict[str, str]] = None # 错误历史
|
|
51
|
+
|
|
52
|
+
def __post_init__(self):
|
|
53
|
+
if self.error_history is None:
|
|
54
|
+
self.error_history = []
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@dataclass
|
|
58
|
+
class FailedUpload:
|
|
59
|
+
"""失败上传记录"""
|
|
60
|
+
data: Dict[str, Any] # 原始上传数据
|
|
61
|
+
metadata: RetryMetadata # 重试元数据
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
# ============================================================================
|
|
65
|
+
# 配置常量(支持环境变量覆盖)
|
|
66
|
+
# ============================================================================
|
|
67
|
+
|
|
68
|
+
def get_retry_config() -> Dict[str, Any]:
|
|
69
|
+
"""
|
|
70
|
+
获取重试配置(从环境变量读取)
|
|
71
|
+
|
|
72
|
+
环境变量:
|
|
73
|
+
- DEVLAKE_RETRY_ENABLED: 是否启用重试(默认 true)
|
|
74
|
+
- DEVLAKE_RETRY_MAX_ATTEMPTS: 最大重试次数(默认 5)
|
|
75
|
+
- DEVLAKE_RETRY_CLEANUP_DAYS: 失败记录保留天数(默认 7)
|
|
76
|
+
- DEVLAKE_RETRY_CHECK_ON_HOOK: Hook执行时检查重试(默认 true)
|
|
77
|
+
"""
|
|
78
|
+
return {
|
|
79
|
+
'enabled': os.getenv('DEVLAKE_RETRY_ENABLED', 'true').lower() == 'true',
|
|
80
|
+
'max_attempts': int(os.getenv('DEVLAKE_RETRY_MAX_ATTEMPTS', '5')),
|
|
81
|
+
'cleanup_days': int(os.getenv('DEVLAKE_RETRY_CLEANUP_DAYS', '7')),
|
|
82
|
+
'check_on_hook': os.getenv('DEVLAKE_RETRY_CHECK_ON_HOOK', 'true').lower() == 'true',
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
# 指数退避策略(秒)
|
|
87
|
+
RETRY_BACKOFF_SCHEDULE = [
|
|
88
|
+
60, # 第1次重试:1分钟后
|
|
89
|
+
300, # 第2次重试:5分钟后
|
|
90
|
+
900, # 第3次重试:15分钟后
|
|
91
|
+
3600, # 第4次重试:60分钟后
|
|
92
|
+
14400, # 第5次重试:4小时后
|
|
93
|
+
]
|
|
94
|
+
|
|
95
|
+
# 队列类型到 API 端点的映射
|
|
96
|
+
QUEUE_TYPE_TO_ENDPOINT = {
|
|
97
|
+
'session': '/api/ai-coding/sessions',
|
|
98
|
+
'prompt': '/api/ai-coding/prompts',
|
|
99
|
+
'prompt_update': '/api/ai-coding/prompts', # 更新操作使用相同端点
|
|
100
|
+
'file_change': '/api/ai-coding/file-changes',
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
# 队列目录名称映射
|
|
104
|
+
QUEUE_TYPE_TO_DIR = {
|
|
105
|
+
'session': 'failed_session_uploads',
|
|
106
|
+
'prompt': 'failed_prompt_uploads',
|
|
107
|
+
'prompt_update': 'failed_prompt_update_uploads', # 更新操作使用独立目录
|
|
108
|
+
'file_change': 'failed_file_change_uploads',
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
# ============================================================================
|
|
113
|
+
# 核心功能函数
|
|
114
|
+
# ============================================================================
|
|
115
|
+
|
|
116
|
+
def save_failed_upload(
|
|
117
|
+
queue_type: QueueType,
|
|
118
|
+
data: Dict[str, Any],
|
|
119
|
+
error: str,
|
|
120
|
+
api_endpoint: Optional[str] = None
|
|
121
|
+
) -> bool:
|
|
122
|
+
"""
|
|
123
|
+
保存失败的上传记录(增强版)
|
|
124
|
+
|
|
125
|
+
Args:
|
|
126
|
+
queue_type: 队列类型('session', 'prompt', 'prompt_update', 'file_change')
|
|
127
|
+
data: 原始上传数据
|
|
128
|
+
error: 错误信息
|
|
129
|
+
api_endpoint: API 端点(可选,默认从 queue_type 推断)
|
|
130
|
+
|
|
131
|
+
Returns:
|
|
132
|
+
bool: 保存成功返回 True,失败返回 False
|
|
133
|
+
|
|
134
|
+
示例:
|
|
135
|
+
# 创建操作
|
|
136
|
+
save_failed_upload(
|
|
137
|
+
queue_type='prompt',
|
|
138
|
+
data=prompt_data,
|
|
139
|
+
error='Connection timeout'
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
# 更新操作
|
|
143
|
+
save_failed_upload(
|
|
144
|
+
queue_type='prompt_update',
|
|
145
|
+
data={'prompt_uuid': uuid, **update_data},
|
|
146
|
+
error='500 Server Error'
|
|
147
|
+
)
|
|
148
|
+
"""
|
|
149
|
+
try:
|
|
150
|
+
# 推断 API 端点
|
|
151
|
+
if api_endpoint is None:
|
|
152
|
+
api_endpoint = QUEUE_TYPE_TO_ENDPOINT.get(queue_type)
|
|
153
|
+
if not api_endpoint:
|
|
154
|
+
logger.error(f"未知的队列类型: {queue_type}")
|
|
155
|
+
return False
|
|
156
|
+
|
|
157
|
+
# 获取队列目录
|
|
158
|
+
queue_dir_name = QUEUE_TYPE_TO_DIR.get(queue_type)
|
|
159
|
+
if not queue_dir_name:
|
|
160
|
+
logger.error(f"未知的队列类型: {queue_type}")
|
|
161
|
+
return False
|
|
162
|
+
|
|
163
|
+
queue_dir = get_data_dir(persistent=True) / queue_dir_name
|
|
164
|
+
queue_dir.mkdir(parents=True, exist_ok=True)
|
|
165
|
+
|
|
166
|
+
# 创建元数据
|
|
167
|
+
now = datetime.utcnow()
|
|
168
|
+
next_retry_time = calculate_next_retry_time(retry_count=0)
|
|
169
|
+
|
|
170
|
+
metadata = RetryMetadata(
|
|
171
|
+
queue_type=queue_type,
|
|
172
|
+
api_endpoint=api_endpoint,
|
|
173
|
+
created_at=now.isoformat() + 'Z',
|
|
174
|
+
retry_count=0,
|
|
175
|
+
last_retry_at=None,
|
|
176
|
+
next_retry_at=next_retry_time.isoformat() + 'Z',
|
|
177
|
+
error_history=[{
|
|
178
|
+
'timestamp': now.isoformat() + 'Z',
|
|
179
|
+
'error': error
|
|
180
|
+
}]
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
# 构造完整记录
|
|
184
|
+
failed_upload = FailedUpload(data=data, metadata=metadata)
|
|
185
|
+
|
|
186
|
+
# 使用时间戳作为文件名,确保唯一性
|
|
187
|
+
filename = f"{int(now.timestamp() * 1000)}.json"
|
|
188
|
+
queue_file = queue_dir / filename
|
|
189
|
+
|
|
190
|
+
# 保存到文件
|
|
191
|
+
with open(queue_file, 'w', encoding='utf-8') as f:
|
|
192
|
+
json.dump(asdict(failed_upload), f, ensure_ascii=False, indent=2)
|
|
193
|
+
|
|
194
|
+
logger.info(f"已保存失败记录到队列 '{queue_type}': {queue_file}")
|
|
195
|
+
return True
|
|
196
|
+
|
|
197
|
+
except Exception as e:
|
|
198
|
+
# 保存失败也不影响主流程
|
|
199
|
+
logger.error(f"保存失败记录到队列 '{queue_type}' 时出错: {e}", exc_info=True)
|
|
200
|
+
return False
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def retry_failed_uploads(max_parallel: int = 3) -> Dict[str, Any]:
|
|
204
|
+
"""
|
|
205
|
+
扫描并重试所有失败的上传记录
|
|
206
|
+
|
|
207
|
+
Args:
|
|
208
|
+
max_parallel: 每次最多重试的记录数(避免阻塞)
|
|
209
|
+
|
|
210
|
+
Returns:
|
|
211
|
+
Dict: 重试统计信息
|
|
212
|
+
{
|
|
213
|
+
'checked': 10, # 检查的记录数
|
|
214
|
+
'retried': 5, # 尝试重试的记录数
|
|
215
|
+
'succeeded': 3, # 重试成功的记录数
|
|
216
|
+
'failed': 2, # 重试失败的记录数
|
|
217
|
+
'skipped': 5 # 跳过的记录数(未到重试时间)
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
注意:
|
|
221
|
+
- 非阻塞执行,快速返回
|
|
222
|
+
- 每次最多重试 max_parallel 条记录
|
|
223
|
+
- 适合在 Hook 中调用
|
|
224
|
+
"""
|
|
225
|
+
config = get_retry_config()
|
|
226
|
+
if not config['enabled']:
|
|
227
|
+
logger.debug("重试功能已禁用(DEVLAKE_RETRY_ENABLED=false)")
|
|
228
|
+
return {'checked': 0, 'retried': 0, 'succeeded': 0, 'failed': 0, 'skipped': 0}
|
|
229
|
+
|
|
230
|
+
stats = {
|
|
231
|
+
'checked': 0,
|
|
232
|
+
'retried': 0,
|
|
233
|
+
'succeeded': 0,
|
|
234
|
+
'failed': 0,
|
|
235
|
+
'skipped': 0,
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
try:
|
|
239
|
+
now = datetime.utcnow()
|
|
240
|
+
retry_count = 0
|
|
241
|
+
|
|
242
|
+
# 遍历所有队列类型
|
|
243
|
+
for queue_type in QUEUE_TYPE_TO_DIR.keys():
|
|
244
|
+
queue_dir_name = QUEUE_TYPE_TO_DIR[queue_type]
|
|
245
|
+
queue_dir = get_data_dir(persistent=True) / queue_dir_name
|
|
246
|
+
|
|
247
|
+
if not queue_dir.exists():
|
|
248
|
+
continue
|
|
249
|
+
|
|
250
|
+
# 获取所有失败记录文件(按时间排序,优先处理旧的)
|
|
251
|
+
failed_files = sorted(queue_dir.glob('*.json'), key=lambda f: f.stat().st_mtime)
|
|
252
|
+
|
|
253
|
+
for failed_file in failed_files:
|
|
254
|
+
# 限制单次重试数量(避免阻塞)
|
|
255
|
+
if retry_count >= max_parallel:
|
|
256
|
+
logger.debug(f"已达到单次最大重试数量 {max_parallel},跳过剩余记录")
|
|
257
|
+
return stats
|
|
258
|
+
|
|
259
|
+
stats['checked'] += 1
|
|
260
|
+
|
|
261
|
+
try:
|
|
262
|
+
# 读取失败记录
|
|
263
|
+
with open(failed_file, 'r', encoding='utf-8') as f:
|
|
264
|
+
record = json.load(f)
|
|
265
|
+
|
|
266
|
+
data = record.get('data', {})
|
|
267
|
+
metadata_dict = record.get('metadata', {})
|
|
268
|
+
|
|
269
|
+
# 转换为数据类
|
|
270
|
+
metadata = RetryMetadata(**metadata_dict)
|
|
271
|
+
|
|
272
|
+
# 检查是否超过最大重试次数
|
|
273
|
+
if metadata.retry_count >= config['max_attempts']:
|
|
274
|
+
logger.debug(f"记录已达最大重试次数 {config['max_attempts']},跳过: {failed_file}")
|
|
275
|
+
stats['skipped'] += 1
|
|
276
|
+
continue
|
|
277
|
+
|
|
278
|
+
# 检查是否到达重试时间(转换为 naive datetime 以便比较)
|
|
279
|
+
next_retry_time = datetime.fromisoformat(metadata.next_retry_at.replace('Z', '+00:00')).replace(tzinfo=None)
|
|
280
|
+
if now < next_retry_time:
|
|
281
|
+
logger.debug(f"未到重试时间({metadata.next_retry_at}),跳过: {failed_file}")
|
|
282
|
+
stats['skipped'] += 1
|
|
283
|
+
continue
|
|
284
|
+
|
|
285
|
+
# 执行重试
|
|
286
|
+
logger.info(f"开始重试上传(第 {metadata.retry_count + 1} 次): {failed_file}")
|
|
287
|
+
stats['retried'] += 1
|
|
288
|
+
retry_count += 1
|
|
289
|
+
|
|
290
|
+
success, error = _retry_upload(queue_type, data, metadata.api_endpoint)
|
|
291
|
+
|
|
292
|
+
if success:
|
|
293
|
+
# 重试成功,删除本地文件
|
|
294
|
+
failed_file.unlink()
|
|
295
|
+
logger.info(f"重试成功,已删除本地记录: {failed_file}")
|
|
296
|
+
stats['succeeded'] += 1
|
|
297
|
+
else:
|
|
298
|
+
# 重试失败,更新元数据
|
|
299
|
+
_update_retry_metadata(failed_file, metadata, error)
|
|
300
|
+
logger.warning(f"重试失败(第 {metadata.retry_count + 1} 次): {error}")
|
|
301
|
+
stats['failed'] += 1
|
|
302
|
+
|
|
303
|
+
except Exception as e:
|
|
304
|
+
logger.error(f"处理失败记录时出错: {failed_file}, 错误: {e}", exc_info=True)
|
|
305
|
+
stats['failed'] += 1
|
|
306
|
+
|
|
307
|
+
if stats['retried'] > 0:
|
|
308
|
+
logger.info(f"重试统计: {stats}")
|
|
309
|
+
|
|
310
|
+
return stats
|
|
311
|
+
|
|
312
|
+
except Exception as e:
|
|
313
|
+
logger.error(f"重试失败上传时出错: {e}", exc_info=True)
|
|
314
|
+
return stats
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
def cleanup_expired_failures(max_age_hours: Optional[int] = None) -> int:
|
|
318
|
+
"""
|
|
319
|
+
清理过期的失败记录
|
|
320
|
+
|
|
321
|
+
Args:
|
|
322
|
+
max_age_hours: 最大保留时间(小时),默认从配置读取
|
|
323
|
+
|
|
324
|
+
Returns:
|
|
325
|
+
int: 清理的文件数量
|
|
326
|
+
|
|
327
|
+
清理条件:
|
|
328
|
+
1. 超过最大重试次数的记录
|
|
329
|
+
2. 超过保留期限的记录(默认 7 天)
|
|
330
|
+
"""
|
|
331
|
+
config = get_retry_config()
|
|
332
|
+
if max_age_hours is None:
|
|
333
|
+
max_age_hours = config['cleanup_days'] * 24
|
|
334
|
+
|
|
335
|
+
cleaned_count = 0
|
|
336
|
+
|
|
337
|
+
try:
|
|
338
|
+
now = datetime.utcnow()
|
|
339
|
+
max_age_seconds = max_age_hours * 3600
|
|
340
|
+
|
|
341
|
+
# 遍历所有队列目录
|
|
342
|
+
for queue_dir_name in QUEUE_TYPE_TO_DIR.values():
|
|
343
|
+
queue_dir = get_data_dir(persistent=True) / queue_dir_name
|
|
344
|
+
|
|
345
|
+
if not queue_dir.exists():
|
|
346
|
+
continue
|
|
347
|
+
|
|
348
|
+
for failed_file in queue_dir.glob('*.json'):
|
|
349
|
+
try:
|
|
350
|
+
# 检查文件年龄
|
|
351
|
+
file_age_seconds = now.timestamp() - failed_file.stat().st_mtime
|
|
352
|
+
|
|
353
|
+
should_delete = False
|
|
354
|
+
|
|
355
|
+
# 条件1: 文件过期
|
|
356
|
+
if file_age_seconds > max_age_seconds:
|
|
357
|
+
should_delete = True
|
|
358
|
+
logger.debug(f"文件已过期({file_age_seconds / 3600:.1f} 小时): {failed_file}")
|
|
359
|
+
else:
|
|
360
|
+
# 条件2: 超过最大重试次数
|
|
361
|
+
try:
|
|
362
|
+
with open(failed_file, 'r', encoding='utf-8') as f:
|
|
363
|
+
record = json.load(f)
|
|
364
|
+
metadata_dict = record.get('metadata', {})
|
|
365
|
+
retry_count = metadata_dict.get('retry_count', 0)
|
|
366
|
+
|
|
367
|
+
if retry_count >= config['max_attempts']:
|
|
368
|
+
should_delete = True
|
|
369
|
+
logger.debug(f"已达最大重试次数 {config['max_attempts']}: {failed_file}")
|
|
370
|
+
except Exception:
|
|
371
|
+
# 无法读取的文件也删除
|
|
372
|
+
should_delete = True
|
|
373
|
+
logger.warning(f"无法读取的文件,将被删除: {failed_file}")
|
|
374
|
+
|
|
375
|
+
if should_delete:
|
|
376
|
+
failed_file.unlink()
|
|
377
|
+
cleaned_count += 1
|
|
378
|
+
|
|
379
|
+
except Exception as e:
|
|
380
|
+
logger.error(f"清理文件时出错: {failed_file}, 错误: {e}")
|
|
381
|
+
|
|
382
|
+
if cleaned_count > 0:
|
|
383
|
+
logger.info(f"已清理 {cleaned_count} 个过期的失败记录")
|
|
384
|
+
|
|
385
|
+
return cleaned_count
|
|
386
|
+
|
|
387
|
+
except Exception as e:
|
|
388
|
+
logger.error(f"清理过期失败记录时出错: {e}", exc_info=True)
|
|
389
|
+
return cleaned_count
|
|
390
|
+
|
|
391
|
+
|
|
392
|
+
def get_queue_statistics() -> Dict[str, Any]:
|
|
393
|
+
"""
|
|
394
|
+
获取队列统计信息
|
|
395
|
+
|
|
396
|
+
Returns:
|
|
397
|
+
Dict: 统计信息
|
|
398
|
+
{
|
|
399
|
+
'session': {'total': 5, 'pending': 3, 'max_retried': 2},
|
|
400
|
+
'prompt': {'total': 10, 'pending': 7, 'max_retried': 3},
|
|
401
|
+
'file_change': {'total': 0, 'pending': 0, 'max_retried': 0},
|
|
402
|
+
'summary': {'total': 15, 'pending': 10, 'max_retried': 5}
|
|
403
|
+
}
|
|
404
|
+
"""
|
|
405
|
+
config = get_retry_config()
|
|
406
|
+
stats = {}
|
|
407
|
+
summary = {'total': 0, 'pending': 0, 'max_retried': 0}
|
|
408
|
+
|
|
409
|
+
try:
|
|
410
|
+
for queue_type, queue_dir_name in QUEUE_TYPE_TO_DIR.items():
|
|
411
|
+
queue_dir = get_data_dir(persistent=True) / queue_dir_name
|
|
412
|
+
queue_stats = {'total': 0, 'pending': 0, 'max_retried': 0}
|
|
413
|
+
|
|
414
|
+
if queue_dir.exists():
|
|
415
|
+
for failed_file in queue_dir.glob('*.json'):
|
|
416
|
+
queue_stats['total'] += 1
|
|
417
|
+
|
|
418
|
+
try:
|
|
419
|
+
with open(failed_file, 'r', encoding='utf-8') as f:
|
|
420
|
+
record = json.load(f)
|
|
421
|
+
metadata_dict = record.get('metadata', {})
|
|
422
|
+
retry_count = metadata_dict.get('retry_count', 0)
|
|
423
|
+
|
|
424
|
+
if retry_count < config['max_attempts']:
|
|
425
|
+
queue_stats['pending'] += 1
|
|
426
|
+
else:
|
|
427
|
+
queue_stats['max_retried'] += 1
|
|
428
|
+
except Exception:
|
|
429
|
+
# 无法读取的文件计入 total
|
|
430
|
+
pass
|
|
431
|
+
|
|
432
|
+
stats[queue_type] = queue_stats
|
|
433
|
+
summary['total'] += queue_stats['total']
|
|
434
|
+
summary['pending'] += queue_stats['pending']
|
|
435
|
+
summary['max_retried'] += queue_stats['max_retried']
|
|
436
|
+
|
|
437
|
+
stats['summary'] = summary
|
|
438
|
+
return stats
|
|
439
|
+
|
|
440
|
+
except Exception as e:
|
|
441
|
+
logger.error(f"获取队列统计信息时出错: {e}", exc_info=True)
|
|
442
|
+
return stats
|
|
443
|
+
|
|
444
|
+
|
|
445
|
+
# ============================================================================
|
|
446
|
+
# 辅助函数(私有)
|
|
447
|
+
# ============================================================================
|
|
448
|
+
|
|
449
|
+
def calculate_next_retry_time(retry_count: int) -> datetime:
|
|
450
|
+
"""
|
|
451
|
+
计算下次重试时间(指数退避)
|
|
452
|
+
|
|
453
|
+
Args:
|
|
454
|
+
retry_count: 当前重试次数
|
|
455
|
+
|
|
456
|
+
Returns:
|
|
457
|
+
datetime: 下次重试时间
|
|
458
|
+
|
|
459
|
+
退避策略:
|
|
460
|
+
第1次:1分钟后
|
|
461
|
+
第2次:5分钟后
|
|
462
|
+
第3次:15分钟后
|
|
463
|
+
第4次:60分钟后
|
|
464
|
+
第5次:4小时后
|
|
465
|
+
"""
|
|
466
|
+
if retry_count >= len(RETRY_BACKOFF_SCHEDULE):
|
|
467
|
+
# 超过预定义次数,使用最后一个间隔
|
|
468
|
+
backoff_seconds = RETRY_BACKOFF_SCHEDULE[-1]
|
|
469
|
+
else:
|
|
470
|
+
backoff_seconds = RETRY_BACKOFF_SCHEDULE[retry_count]
|
|
471
|
+
|
|
472
|
+
return datetime.utcnow() + timedelta(seconds=backoff_seconds)
|
|
473
|
+
|
|
474
|
+
|
|
475
|
+
def _retry_upload(
|
|
476
|
+
queue_type: str,
|
|
477
|
+
data: Dict[str, Any],
|
|
478
|
+
api_endpoint: str
|
|
479
|
+
) -> tuple[bool, Optional[str]]:
|
|
480
|
+
"""
|
|
481
|
+
执行实际的重试上传
|
|
482
|
+
|
|
483
|
+
Args:
|
|
484
|
+
queue_type: 队列类型
|
|
485
|
+
data: 上传数据
|
|
486
|
+
api_endpoint: API 端点
|
|
487
|
+
|
|
488
|
+
Returns:
|
|
489
|
+
(success: bool, error: Optional[str])
|
|
490
|
+
"""
|
|
491
|
+
try:
|
|
492
|
+
client = DevLakeClient()
|
|
493
|
+
|
|
494
|
+
# 根据队列类型调用不同的 API 方法
|
|
495
|
+
if queue_type == 'session':
|
|
496
|
+
client.create_session(data)
|
|
497
|
+
elif queue_type == 'prompt':
|
|
498
|
+
client.create_prompt(data)
|
|
499
|
+
elif queue_type == 'prompt_update':
|
|
500
|
+
# 更新操作需要 prompt_uuid
|
|
501
|
+
prompt_uuid = data.get('prompt_uuid')
|
|
502
|
+
if not prompt_uuid:
|
|
503
|
+
return False, "prompt_update 缺少 prompt_uuid"
|
|
504
|
+
client.update_prompt(prompt_uuid, data)
|
|
505
|
+
elif queue_type == 'file_change':
|
|
506
|
+
# file_change 是批量接口,需要包装成 changes 数组
|
|
507
|
+
if 'changes' not in data:
|
|
508
|
+
data = {'changes': [data]}
|
|
509
|
+
client.create_file_changes(data['changes'])
|
|
510
|
+
else:
|
|
511
|
+
return False, f"未知的队列类型: {queue_type}"
|
|
512
|
+
|
|
513
|
+
client.close()
|
|
514
|
+
return True, None
|
|
515
|
+
|
|
516
|
+
except Exception as e:
|
|
517
|
+
error_msg = str(e)
|
|
518
|
+
logger.error(f"重试上传失败: {error_msg}")
|
|
519
|
+
return False, error_msg
|
|
520
|
+
|
|
521
|
+
|
|
522
|
+
def _update_retry_metadata(
|
|
523
|
+
failed_file: Path,
|
|
524
|
+
metadata: RetryMetadata,
|
|
525
|
+
error: str
|
|
526
|
+
) -> None:
|
|
527
|
+
"""
|
|
528
|
+
更新失败记录的重试元数据
|
|
529
|
+
|
|
530
|
+
Args:
|
|
531
|
+
failed_file: 失败记录文件路径
|
|
532
|
+
metadata: 当前元数据
|
|
533
|
+
error: 最新的错误信息
|
|
534
|
+
"""
|
|
535
|
+
try:
|
|
536
|
+
# 读取原始数据
|
|
537
|
+
with open(failed_file, 'r', encoding='utf-8') as f:
|
|
538
|
+
record = json.load(f)
|
|
539
|
+
|
|
540
|
+
# 更新元数据
|
|
541
|
+
now = datetime.utcnow()
|
|
542
|
+
metadata.retry_count += 1
|
|
543
|
+
metadata.last_retry_at = now.isoformat() + 'Z'
|
|
544
|
+
metadata.next_retry_at = calculate_next_retry_time(metadata.retry_count).isoformat() + 'Z'
|
|
545
|
+
metadata.error_history.append({
|
|
546
|
+
'timestamp': now.isoformat() + 'Z',
|
|
547
|
+
'error': error
|
|
548
|
+
})
|
|
549
|
+
|
|
550
|
+
# 保存更新后的记录
|
|
551
|
+
record['metadata'] = asdict(metadata)
|
|
552
|
+
with open(failed_file, 'w', encoding='utf-8') as f:
|
|
553
|
+
json.dump(record, f, ensure_ascii=False, indent=2)
|
|
554
|
+
|
|
555
|
+
except Exception as e:
|
|
556
|
+
logger.error(f"更新重试元数据失败: {failed_file}, 错误: {e}", exc_info=True)
|