skill-self-evolution 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,168 @@
1
+ """
2
+ DeepSeek 客户端封装 — 超时/重试/熔断,兼容 OpenAI Chat Completions API。
3
+ """
4
+
5
+ import asyncio
6
+ import json
7
+ import logging
8
+ import time
9
+ from typing import Any
10
+
11
+ import httpx
12
+
13
+ from skill_self_evolution.config import get_deepseek_config
14
+ from skill_self_evolution.models import DeepSeekChatResponse
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ class CircuitBreaker:
20
+ """简单熔断器:连续失败 N 次后冷却 M 秒。"""
21
+
22
+ def __init__(self, threshold: int = 3, cooldown_seconds: float = 60.0):
23
+ self.threshold = threshold
24
+ self.cooldown_seconds = cooldown_seconds
25
+ self._failure_count = 0
26
+ self._last_failure_time: float = 0.0
27
+ self._open = False
28
+
29
+ @property
30
+ def is_open(self) -> bool:
31
+ if not self._open:
32
+ return False
33
+ if time.monotonic() - self._last_failure_time >= self.cooldown_seconds:
34
+ self._open = False
35
+ self._failure_count = 0
36
+ logger.info("熔断器冷却完毕,恢复请求")
37
+ return False
38
+ return True
39
+
40
+ def record_failure(self) -> None:
41
+ self._failure_count += 1
42
+ self._last_failure_time = time.monotonic()
43
+ if self._failure_count >= self.threshold and not self._open:
44
+ self._open = True
45
+ logger.warning("熔断器触发:连续 %d 次失败,冷却 %d 秒", self._failure_count, self.cooldown_seconds)
46
+
47
+ def record_success(self) -> None:
48
+ self._failure_count = 0
49
+ if self._open:
50
+ self._open = False
51
+ logger.info("熔断器关闭(请求成功)")
52
+
53
+
54
+ class DeepSeekClient:
55
+ """DeepSeek OpenAI 兼容客户端,内置超时/重试/熔断。
56
+
57
+ 环境感知:local → api.deepseek.com, test/prod → api.modelarts-maas.com/v2
58
+ """
59
+
60
+ def __init__(
61
+ self,
62
+ api_key: str = "",
63
+ api_base: str = "",
64
+ model: str = "",
65
+ timeout: float = 60.0,
66
+ max_retries: int = 1,
67
+ circuit_breaker: CircuitBreaker | None = None,
68
+ ):
69
+ cfg = get_deepseek_config(api_key=api_key, api_base=api_base, model=model)
70
+ self._api_key = cfg.api_key
71
+ self._api_base = cfg.api_base.rstrip("/")
72
+ self._model = cfg.model
73
+ self._timeout = timeout
74
+ self._max_retries = max_retries
75
+ self._circuit_breaker = circuit_breaker or CircuitBreaker()
76
+ self._chat_url = f"{self._api_base}/chat/completions"
77
+
78
+ @property
79
+ def circuit_breaker(self) -> CircuitBreaker:
80
+ return self._circuit_breaker
81
+
82
+ async def chat(
83
+ self,
84
+ messages: list[dict[str, str]],
85
+ temperature: float = 0.7,
86
+ max_tokens: int = 2048,
87
+ timeout: float | None = None,
88
+ ) -> DeepSeekChatResponse:
89
+ if self._circuit_breaker.is_open:
90
+ raise RuntimeError("熔断器已开启,拒绝请求")
91
+
92
+ last_error: Exception | None = None
93
+ effective_timeout = timeout or self._timeout
94
+
95
+ for attempt in range(self._max_retries + 1):
96
+ try:
97
+ result = await self._do_chat(messages, temperature, max_tokens, effective_timeout)
98
+ self._circuit_breaker.record_success()
99
+ return result
100
+ except (httpx.TimeoutException, httpx.HTTPStatusError, httpx.RequestError) as e:
101
+ last_error = e
102
+ logger.warning("DeepSeek 请求失败 (attempt %d/%d): %s", attempt + 1, self._max_retries + 1, e)
103
+ if attempt < self._max_retries:
104
+ await asyncio.sleep(0.5 * (attempt + 1))
105
+ continue
106
+
107
+ self._circuit_breaker.record_failure()
108
+ raise RuntimeError(f"DeepSeek 请求全部失败 (重试 {self._max_retries} 次): {last_error}") from last_error
109
+
110
+ async def _do_chat(
111
+ self,
112
+ messages: list[dict[str, str]],
113
+ temperature: float,
114
+ max_tokens: int,
115
+ timeout: float,
116
+ ) -> DeepSeekChatResponse:
117
+ headers = {
118
+ "Content-Type": "application/json",
119
+ "Authorization": f"Bearer {self._api_key}",
120
+ }
121
+ body = {
122
+ "model": self._model,
123
+ "messages": messages,
124
+ "temperature": temperature,
125
+ "max_tokens": max_tokens,
126
+ "stream": False,
127
+ }
128
+
129
+ async with httpx.AsyncClient(timeout=timeout, http2=False, trust_env=False) as client:
130
+ resp = await client.post(self._chat_url, headers=headers, json=body)
131
+ resp.raise_for_status()
132
+ data = resp.json()
133
+
134
+ choice = data.get("choices", [{}])[0]
135
+ usage = data.get("usage", {})
136
+
137
+ return DeepSeekChatResponse(
138
+ content=choice.get("message", {}).get("content", ""),
139
+ finish_reason=choice.get("finish_reason"),
140
+ prompt_tokens=usage.get("prompt_tokens"),
141
+ completion_tokens=usage.get("completion_tokens"),
142
+ )
143
+
144
+ async def chat_json(
145
+ self,
146
+ messages: list[dict[str, str]],
147
+ temperature: float = 0.3,
148
+ max_tokens: int = 2048,
149
+ timeout: float | None = None,
150
+ ) -> dict[str, Any]:
151
+ """发送请求并解析 JSON。支持 ```json...``` 包裹和纯文本 JSON。"""
152
+ resp = await self.chat(messages, temperature, max_tokens, timeout)
153
+ content = resp.content.strip()
154
+
155
+ # 尝试提取 JSON:先尝试整体解析,再提取 markdown 代码块
156
+ for candidate in [content]:
157
+ if candidate.startswith("```"):
158
+ lines = candidate.split("\n")
159
+ end = -1 if lines[-1].strip() == "```" else len(lines)
160
+ start = 1 if lines[0].startswith("```json") or lines[0].startswith("```") else 0
161
+ candidate = "\n".join(lines[start:end])
162
+ try:
163
+ return json.loads(candidate)
164
+ except json.JSONDecodeError:
165
+ continue
166
+
167
+ logger.warning("DeepSeek 响应非 JSON: %s", content[:200])
168
+ return {}
@@ -0,0 +1,386 @@
1
+ """
2
+ EvoSkill 离线进化器 — 读 JSONL 日志 → DeepSeek 分析失败模式 → 生成优化提案 → 自动写入或发 PR。
3
+
4
+ 流程:
5
+ 1. 读昨日 JSONL → 筛选 is_failure=true
6
+ 2. 若 is_failure 样本 < min_failure_samples → 跳过本次进化
7
+ 3. 跑 benchmark()
8
+ 4. DeepSeek 分析失败模式(优先 Skill 自定义 evolve_prompt.yaml)
9
+ 5. 生成 YAML 优化提案
10
+ 6. 按 evolve.toml 权限 → 自动写 MySQL 或发 PR
11
+ - rules_config / prompt 权限为 true → 自动写入(含历史归档)
12
+ - skill_md / scripts 权限为 false → 生成 PR 文件
13
+ 7. benchmark 安全网:变更后重跑 benchmark,退化则自动回滚
14
+ """
15
+
16
+ import json
17
+ import logging
18
+ import os
19
+ import time
20
+ from datetime import datetime, timedelta, timezone
21
+ from pathlib import Path
22
+ from typing import Any
23
+
24
+ import yaml
25
+
26
+ from skill_self_evolution.deepseek import DeepSeekClient
27
+ from skill_self_evolution.loader import SkillLoader, SkillModule
28
+ from skill_self_evolution.logger import _get_log_dir
29
+ from skill_self_evolution.models import EvolveProposalModel
30
+
31
+ logger = logging.getLogger(__name__)
32
+
33
+ _BEIJING_TZ = timezone(timedelta(hours=8))
34
+
35
+ _FRAMEWORK_DEFAULTS = Path(__file__).resolve().parents[2] / "defaults"
36
+
37
+
38
+ def _yesterday_str() -> str:
39
+ return (datetime.now(_BEIJING_TZ) - timedelta(days=1)).strftime("%Y-%m-%d")
40
+
41
+
42
+ class EvolveProposal:
43
+ """一次进化分析产生的提案(内部状态用普通类,序列化时通过 EvolveProposalModel 校验)。"""
44
+
45
+ def __init__(self):
46
+ self.rules_changes: dict[str, Any] = {}
47
+ self.prompt_changes: dict[str, Any] = {}
48
+ self.rules_text: str | None = None
49
+ self.prompt_text: str | None = None
50
+ self.analysis_raw: str = ""
51
+ self.failure_count: int = 0
52
+ self.benchmark_before: tuple[int, int, list] = (0, 0, [])
53
+ self.benchmark_after: tuple[int, int, list] = (0, 0, [])
54
+ self.applied: bool = False
55
+ self.rolled_back: bool = False
56
+
57
+ def to_model(self) -> EvolveProposalModel:
58
+ """转为 Pydantic 模型(用于序列化/日志)。"""
59
+ return EvolveProposalModel(
60
+ rules_changes=self.rules_changes,
61
+ prompt_changes=self.prompt_changes,
62
+ rules_text=self.rules_text,
63
+ prompt_text=self.prompt_text,
64
+ analysis_raw=self.analysis_raw,
65
+ failure_count=self.failure_count,
66
+ applied=self.applied,
67
+ rolled_back=self.rolled_back,
68
+ )
69
+
70
+
71
+ class Evolver:
72
+ """离线进化引擎。"""
73
+
74
+ def __init__(
75
+ self,
76
+ skill_name: str,
77
+ skill_base_dir: Path | None = None,
78
+ deepseek: DeepSeekClient | None = None,
79
+ config_version_manager=None,
80
+ ):
81
+ """
82
+ Args:
83
+ skill_name: Skill 名称
84
+ skill_base_dir: Skill 根目录
85
+ deepseek: DeepSeek 客户端(用于分析失败模式)
86
+ config_version_manager: ConfigVersionManager 实例(用于写入 MySQL)
87
+ """
88
+ self.skill_name = skill_name
89
+ self._loader = SkillLoader(skill_base_dir)
90
+ self._deepseek = deepseek
91
+ self._version_mgr = config_version_manager
92
+
93
+ def _load_failure_logs(self, date_str: str | None = None) -> list[dict]:
94
+ """读取 JSONL 日志中 is_failure=true 的记录。
95
+
96
+ Args:
97
+ date_str: 日期字符串 YYYY-MM-DD,默认昨天
98
+
99
+ Returns:
100
+ is_failure=true 的日志记录列表
101
+ """
102
+ target_date = date_str or _yesterday_str()
103
+ log_dir = _get_log_dir(self.skill_name)
104
+ log_path = log_dir / f"{target_date}.jsonl"
105
+
106
+ if not log_path.exists():
107
+ logger.info("EvoSkill [%s] 日志文件不存在: %s", self.skill_name, log_path)
108
+ return []
109
+
110
+ failures = []
111
+ try:
112
+ with open(log_path, "r", encoding="utf-8") as f:
113
+ for line in f:
114
+ line = line.strip()
115
+ if not line:
116
+ continue
117
+ try:
118
+ entry = json.loads(line)
119
+ if entry.get("is_failure", False):
120
+ failures.append(entry)
121
+ except json.JSONDecodeError:
122
+ continue
123
+ except Exception as e:
124
+ logger.warning("EvoSkill [%s] 日志读取失败: %s", self.skill_name, e)
125
+ return []
126
+
127
+ logger.info("EvoSkill [%s] 读取 %d 条 is_failure 记录", self.skill_name, len(failures))
128
+ return failures
129
+
130
+ def _run_benchmark(self, skill: SkillModule) -> tuple[int, int, list]:
131
+ """跑 benchmark() 获取基准指标。
132
+
133
+ Returns:
134
+ (pass_count, total_count, failures_list)
135
+ """
136
+ try:
137
+ return skill.benchmark(self)
138
+ except Exception as e:
139
+ logger.warning("EvoSkill [%s] benchmark 执行失败: %s", self.skill_name, e)
140
+ return 0, 0, [str(e)]
141
+
142
+ async def _analyze_failures(
143
+ self,
144
+ skill: SkillModule,
145
+ failures: list[dict],
146
+ current_rules: str,
147
+ current_prompt: str,
148
+ ) -> dict:
149
+ """调用 DeepSeek 分析失败模式。
150
+
151
+ Returns:
152
+ {"rules_changes": {...}, "prompt_changes": {...}}
153
+ """
154
+ if not self._deepseek:
155
+ logger.warning("EvoSkill [%s] DeepSeek 客户端未配置,无法分析", self.skill_name)
156
+ return {}
157
+
158
+ # 加载 evolve_prompt.yaml(Skill 自定义优先,框架默认降级)
159
+ prompt_cfg = skill.evolve_prompt_yaml or {}
160
+ if not prompt_cfg:
161
+ defaults_path = _FRAMEWORK_DEFAULTS / "evolve_prompt.yaml"
162
+ if defaults_path.exists():
163
+ prompt_cfg = yaml.safe_load(defaults_path.read_text(encoding="utf-8")) or {}
164
+
165
+ system = prompt_cfg.get("system", "你是配置优化专家。")
166
+ template = prompt_cfg.get(
167
+ "analyze_template",
168
+ "分析以下失败案例:\n{{failure_logs}}\n输出优化建议 JSON。",
169
+ )
170
+
171
+ # 序列化失败日志(截断过长内容)
172
+ failure_texts = []
173
+ for f in failures[:20]: # 最多 20 条
174
+ line = json.dumps({
175
+ "input_summary": f.get("input_summary", {}),
176
+ "rule_output": f.get("rule_output", {}),
177
+ "ai_validation": f.get("ai_validation"),
178
+ "ai_reselection": f.get("ai_reselection"),
179
+ "final_output": f.get("final_output", {}),
180
+ "warnings": f.get("warnings", []),
181
+ }, ensure_ascii=False, indent=2)
182
+ failure_texts.append(line)
183
+
184
+ user_message = template
185
+ user_message = user_message.replace("{{skill_name}}", self.skill_name)
186
+ user_message = user_message.replace("{{current_rules_config}}", current_rules or "(空)")
187
+ user_message = user_message.replace("{{current_prompt}}", current_prompt or "(空)")
188
+ user_message = user_message.replace("{{failure_logs}}", "\n---\n".join(failure_texts))
189
+
190
+ try:
191
+ response = await self._deepseek.chat_json(
192
+ messages=[
193
+ {"role": "system", "content": system},
194
+ {"role": "user", "content": user_message},
195
+ ],
196
+ temperature=0.3,
197
+ max_tokens=4096,
198
+ )
199
+ return response
200
+ except Exception as e:
201
+ logger.warning("EvoSkill [%s] 分析请求失败: %s", self.skill_name, e)
202
+ return {}
203
+
204
+ async def evolve(
205
+ self,
206
+ min_failure_samples: int = 10,
207
+ dry_run: bool = True,
208
+ date_str: str | None = None,
209
+ ) -> EvolveProposal | None:
210
+ """执行一轮进化。
211
+
212
+ Args:
213
+ min_failure_samples: 最少失败样本数,不足则跳过
214
+ dry_run: True=仅分析不写入,False=自动写入 MySQL
215
+ date_str: 日期字符串,默认昨天
216
+
217
+ Returns:
218
+ EvolveProposal 或 None(跳过时)
219
+ """
220
+ logger.info(
221
+ "EvoSkill [%s] 开始进化分析 (min_failure_samples=%d, dry_run=%s)",
222
+ self.skill_name,
223
+ min_failure_samples,
224
+ dry_run,
225
+ )
226
+
227
+ proposal = EvolveProposal()
228
+
229
+ # 1. 加载 Skill 模块
230
+ try:
231
+ skill = self._loader.load(self.skill_name)
232
+ except Exception as e:
233
+ logger.error("EvoSkill [%s] 加载失败: %s", self.skill_name, e)
234
+ return None
235
+
236
+ # 2. enhancement 角色不触发进化
237
+ if skill.ai_role == "enhancement":
238
+ logger.info("EvoSkill [%s] ai_role=enhancement,不触发进化", self.skill_name)
239
+ return None
240
+
241
+ # 3. 读取 JSONL 日志
242
+ failures = self._load_failure_logs(date_str)
243
+ proposal.failure_count = len(failures)
244
+
245
+ if len(failures) < min_failure_samples:
246
+ logger.info(
247
+ "EvoSkill [%s] 失败样本不足 (got=%d, need=%d),跳过进化",
248
+ self.skill_name,
249
+ len(failures),
250
+ min_failure_samples,
251
+ )
252
+ return proposal
253
+
254
+ # 4. 跑 benchmark(进化前基线)
255
+ logger.info("EvoSkill [%s] 运行进化前 benchmark...", self.skill_name)
256
+ proposal.benchmark_before = self._run_benchmark(skill)
257
+ logger.info(
258
+ "EvoSkill [%s] 进化前 benchmark: %d/%d 通过",
259
+ self.skill_name,
260
+ proposal.benchmark_before[0],
261
+ proposal.benchmark_before[1],
262
+ )
263
+
264
+ # 5. 加载当前配置
265
+ current_rules = ""
266
+ current_prompt = ""
267
+ if self._version_mgr:
268
+ current_rules = self._version_mgr.load_raw(self.skill_name, "rules_config") or ""
269
+ current_prompt = self._version_mgr.load_raw(self.skill_name, "prompt") or ""
270
+
271
+ # 6. DeepSeek 分析
272
+ analysis = await self._analyze_failures(skill, failures, current_rules, current_prompt)
273
+ if not analysis:
274
+ logger.warning("EvoSkill [%s] DeepSeek 分析未产出结果", self.skill_name)
275
+ return proposal
276
+
277
+ proposal.rules_changes = analysis.get("rules_changes", {})
278
+ proposal.prompt_changes = analysis.get("prompt_changes", {})
279
+
280
+ if not proposal.rules_changes and not proposal.prompt_changes:
281
+ logger.info("EvoSkill [%s] DeepSeek 未提出任何优化建议", self.skill_name)
282
+ return proposal
283
+
284
+ # 7. 生成 YAML 文本
285
+ evolve_cfg = skill.evolve_toml
286
+ auto_cfg = evolve_cfg.get("evolve", {}).get("auto_modify", {})
287
+
288
+ if proposal.rules_changes and auto_cfg.get("rules_config", False):
289
+ rules_threshold = auto_cfg.get("rules_config", {})
290
+ max_pct = rules_threshold.get("max_change_percent", 20)
291
+ proposal.rules_text = self._apply_rules_changes(current_rules, proposal.rules_changes, max_pct)
292
+
293
+ if proposal.prompt_changes and auto_cfg.get("prompt", False):
294
+ proposal.prompt_text = self._apply_prompt_changes(current_prompt, proposal.prompt_changes)
295
+
296
+ # 8. 写入
297
+ if not dry_run and self._version_mgr:
298
+ guard = evolve_cfg.get("evolve", {}).get("guard", {})
299
+ require_bench = guard.get("require_benchmark_pass", True)
300
+
301
+ applied = True
302
+ if proposal.rules_text and auto_cfg.get("rules_config", False):
303
+ self._version_mgr.save(self.skill_name, "rules_config", proposal.rules_text)
304
+ logger.info("EvoSkill [%s] rules_config 已写入 MySQL", self.skill_name)
305
+ if proposal.prompt_text and auto_cfg.get("prompt", False):
306
+ self._version_mgr.save(self.skill_name, "prompt", proposal.prompt_text)
307
+ logger.info("EvoSkill [%s] prompt 已写入 MySQL", self.skill_name)
308
+
309
+ # 9. benchmark 安全网:重新跑 benchmark 验证不退化
310
+ if require_bench and (proposal.rules_text or proposal.prompt_text):
311
+ logger.info("EvoSkill [%s] 运行进化后 benchmark...", self.skill_name)
312
+ # 清除缓存使新配置生效
313
+ self._loader.invalidate_cache(self.skill_name)
314
+ skill_after = self._loader.load(self.skill_name)
315
+ proposal.benchmark_after = self._run_benchmark(skill_after)
316
+
317
+ pass_before = proposal.benchmark_before[0]
318
+ total_before = proposal.benchmark_before[1]
319
+ pass_after = proposal.benchmark_after[0]
320
+
321
+ if total_before > 0 and pass_after < pass_before:
322
+ # 退化 → 回滚
323
+ logger.warning(
324
+ "EvoSkill [%s] benchmark 退化 (%d/%d → %d/%d),自动回滚",
325
+ self.skill_name,
326
+ pass_before,
327
+ total_before,
328
+ pass_after,
329
+ proposal.benchmark_after[1],
330
+ )
331
+ if proposal.rules_text and auto_cfg.get("rules_config", False):
332
+ self._version_mgr.rollback(self.skill_name, "rules_config", 0)
333
+ if proposal.prompt_text and auto_cfg.get("prompt", False):
334
+ self._version_mgr.rollback(self.skill_name, "prompt", 0)
335
+ proposal.rolled_back = True
336
+ proposal.applied = False
337
+ else:
338
+ proposal.applied = True
339
+ else:
340
+ proposal.applied = applied
341
+
342
+ return proposal
343
+
344
+ def _apply_rules_changes(self, current_yaml: str, changes: dict, max_change_percent: float) -> str:
345
+ """将 DeepSeek 产出的 rules_changes 合并到现有 YAML。
346
+
347
+ 当前实现:简单字符串替换,仅允许阈值调整。
348
+ """
349
+ if not current_yaml:
350
+ return yaml.dump(changes, allow_unicode=True, default_flow_style=False)
351
+
352
+ # 遍历 changes 中的阈值调整
353
+ modified = current_yaml
354
+ for key, value in changes.items():
355
+ if isinstance(value, (int, float)):
356
+ # 查找 YAML 中的对应键并替换数值
357
+ import re
358
+ pattern = rf"^\s*{re.escape(key)}\s*:\s*[\d.]+"
359
+ new_line = f"{key}: {value}"
360
+ modified = re.sub(pattern, new_line, modified, flags=re.MULTILINE)
361
+
362
+ return modified
363
+
364
+ def _apply_prompt_changes(self, current_yaml: str, changes: dict) -> str:
365
+ """将 DeepSeek 产出的 prompt_changes 合并到现有 YAML。
366
+
367
+ 当前实现:按字段路径替换 YAML 值。
368
+ """
369
+ if not current_yaml:
370
+ return yaml.dump(changes, allow_unicode=True, default_flow_style=False)
371
+
372
+ try:
373
+ cfg = yaml.safe_load(current_yaml) or {}
374
+ _deep_update(cfg, changes)
375
+ return yaml.dump(cfg, allow_unicode=True, default_flow_style=False)
376
+ except Exception:
377
+ return current_yaml
378
+
379
+
380
+ def _deep_update(base: dict, updates: dict) -> None:
381
+ """递归合并 dict。"""
382
+ for key, value in updates.items():
383
+ if isinstance(value, dict) and isinstance(base.get(key), dict):
384
+ _deep_update(base[key], value)
385
+ else:
386
+ base[key] = value