@zhushanwen/pi-evolve-daily 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,138 @@
1
+ """Signal 6: 跨项目通用模式。
2
+
3
+ 分析多项目 session 数据,提取公共工具调用序列、项目类型分布等跨项目指标。
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import sys
9
+ from collections import Counter, defaultdict
10
+ from pathlib import Path
11
+
12
+ # 使 config 可导入
13
+ sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
14
+
15
+ # 序列长度
16
+ _SEQUENCE_LENGTH = 3
17
+
18
+ # 项目类型关键字映射
19
+ _PROJECT_TYPE_KEYWORDS: dict[str, list[str]] = {
20
+ "frontend": ["frontend", "vue", "react", "nuxt", "next", "angular", "svelte", "web-app", "webapp"],
21
+ "backend": ["api", "server", "backend", "service", "grpc", "rest"],
22
+ "fullstack": ["workspace"],
23
+ "tooling": ["tool", "cli", "script", "util", "extension", "plugin", "agent", "harness"],
24
+ }
25
+
26
+
27
+ def _classify_project(project_path: str) -> str:
28
+ """根据项目路径中的关键字判断项目类型。"""
29
+ lower = project_path.lower()
30
+ for ptype, keywords in _PROJECT_TYPE_KEYWORDS.items():
31
+ if any(kw in lower for kw in keywords):
32
+ return ptype
33
+ return "other"
34
+
35
+
36
+ def _extract_sequences(
37
+ tool_calls: list, length: int = _SEQUENCE_LENGTH
38
+ ) -> list[tuple[str, ...]]:
39
+ """从工具调用列表中提取连续的 N-gram 序列。"""
40
+ names = [tc.name for tc in tool_calls]
41
+ if len(names) < length:
42
+ return []
43
+ return [tuple(names[i : i + length]) for i in range(len(names) - length + 1)]
44
+
45
+
46
+ def _short_project_name(project_path: str) -> str:
47
+ """从项目完整路径提取简短名称。"""
48
+ parts = project_path.replace("\\", "/").rstrip("/").split("/")
49
+ parts = [p for p in parts if p]
50
+ if not parts:
51
+ return project_path
52
+ if len(parts) >= 2:
53
+ return f"{parts[-2]}/{parts[-1]}"
54
+ return parts[-1]
55
+
56
+
57
+ def analyze_cross_project(sessions: list) -> dict:
58
+ """分析跨项目通用模式。
59
+
60
+ Args:
61
+ sessions: ParsedSession 列表
62
+
63
+ Returns:
64
+ 包含 projects、common_tool_sequences、project_type_distribution 的分析结果
65
+ """
66
+ # 按项目聚合
67
+ project_data: dict[str, dict] = defaultdict(
68
+ lambda: {"sessions": 0, "tool_calls": [], "tool_counter": Counter()}
69
+ )
70
+
71
+ for session in sessions:
72
+ project = session.project or ""
73
+ if not project:
74
+ continue
75
+ short_name = _short_project_name(project)
76
+
77
+ project_data[short_name]["sessions"] += 1
78
+ project_data[short_name]["tool_calls"].extend(session.tool_calls)
79
+ for tc in session.tool_calls:
80
+ project_data[short_name]["tool_counter"][tc.name] += 1
81
+
82
+ # 构建项目列表
83
+ projects: list[dict] = []
84
+ for name, data in sorted(project_data.items()):
85
+ top_tools = data["tool_counter"].most_common(5)
86
+ projects.append({
87
+ "name": name,
88
+ "sessions": data["sessions"],
89
+ "total_tool_calls": len(data["tool_calls"]),
90
+ "top_tools": top_tools,
91
+ })
92
+
93
+ # 跨项目公共工具序列
94
+ # 统计每个序列出现在哪些项目中
95
+ sequence_projects: dict[tuple[str, ...], set] = defaultdict(set)
96
+ sequence_counts: Counter = Counter()
97
+
98
+ for session in sessions:
99
+ project = session.project or ""
100
+ if not project:
101
+ continue
102
+ short_name = _short_project_name(project)
103
+
104
+ for seq in _extract_sequences(session.tool_calls):
105
+ sequence_projects[seq].add(short_name)
106
+ sequence_counts[seq] += 1
107
+
108
+ # 只保留出现在 >= 2 个项目中的序列
109
+ common_sequences: list[dict] = []
110
+ for seq, projs in sorted(
111
+ sequence_projects.items(), key=lambda x: len(x[1]), reverse=True
112
+ ):
113
+ if len(projs) < 2:
114
+ continue
115
+ common_sequences.append({
116
+ "sequence": list(seq),
117
+ "projects": sorted(projs),
118
+ "total_count": sequence_counts[seq],
119
+ })
120
+
121
+ # 限制输出量
122
+ common_sequences = common_sequences[:30]
123
+
124
+ # 项目类型分布
125
+ type_dist: dict[str, int] = Counter()
126
+ for session in sessions:
127
+ project = session.project or ""
128
+ if not project:
129
+ continue
130
+ ptype = _classify_project(project)
131
+ type_dist[ptype] += 1
132
+
133
+ return {
134
+ "project_count": len(project_data),
135
+ "projects": projects,
136
+ "common_tool_sequences": common_sequences,
137
+ "project_type_distribution": dict(type_dist),
138
+ }
@@ -0,0 +1,255 @@
1
+ """Signal 3: 错误与重试分析。
2
+
3
+ 统计工具错误率、错误模式分类、自我纠正率、按项目的错误分布。
4
+ 同时收集 failure_refs(session_id + tool_call_id + pattern + self_corrected),
5
+ 供 evolve 分析时按需回溯 JSONL 获取完整上下文。
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import json
11
+ import re
12
+ from collections import Counter, defaultdict
13
+ from pathlib import Path
14
+
15
+ import sys
16
+
17
+ _PARENT = str(Path(__file__).resolve().parent.parent)
18
+ if _PARENT not in sys.path:
19
+ sys.path.insert(0, _PARENT)
20
+
21
+ from config import ERROR_KEYWORDS
22
+
23
+
24
+ # ── 错误模式提取 ─────────────────────────────────────
25
+
26
+ def _extract_error_pattern(message: str) -> str | None:
27
+ """从错误消息中提取归类模式。
28
+
29
+ 优先匹配具体的已知模式,然后 fallback 到通用关键词。
30
+ """
31
+ msg_lower = message.lower()
32
+
33
+ # 按具体程度排序的模式匹配
34
+ specific_patterns = [
35
+ (r"could not find the exact text", "Could not find the exact text"),
36
+ (r"enoent.*no such file", "ENOENT: no such file"),
37
+ (r"enoent", "ENOENT"),
38
+ (r"permission denied", "Permission denied"),
39
+ (r"non-zero exit code", "Non-zero exit code"),
40
+ (r"command failed.*non-zero", "Command failed (non-zero exit)"),
41
+ (r"timeout|timed out", "Timeout"),
42
+ (r"syntaxerror|syntax error", "Syntax error"),
43
+ (r"typeerror|type error", "TypeError"),
44
+ (r"importerror|module not found", "ImportError/Module not found"),
45
+ (r"referenceerror", "ReferenceError"),
46
+ (r"connection refused|econnrefused", "Connection refused"),
47
+ (r"out of memory", "Out of memory"),
48
+ ]
49
+
50
+ for pattern, label in specific_patterns:
51
+ if re.search(pattern, msg_lower):
52
+ return label
53
+
54
+ # Fallback: 检查 ERROR_KEYWORDS
55
+ for keyword in ERROR_KEYWORDS:
56
+ if keyword.lower() in msg_lower:
57
+ # 截取错误消息的前 80 字符作为模式
58
+ snippet = message.strip()[:80]
59
+ # 去掉路径等变量部分
60
+ snippet = re.sub(r"/[\w./\-]+", "<path>", snippet)
61
+ return snippet
62
+
63
+ return None
64
+
65
+
66
+ def analyze_errors(sessions) -> dict:
67
+ """分析错误与重试模式。
68
+
69
+ Args:
70
+ sessions: list[ParsedSession],解析后的 session 列表。
71
+
72
+ Returns:
73
+ 错误统计字典,含 failure_refs 列表。
74
+ """
75
+ total_errors = 0
76
+ # tool_name -> {errors, total}
77
+ tool_error_stats: dict[str, dict[str, int]] = defaultdict(lambda: {"errors": 0, "total": 0})
78
+ # project -> {errors, total_calls}
79
+ project_stats: dict[str, dict[str, int]] = defaultdict(lambda: {"errors": 0, "total_calls": 0})
80
+
81
+ # edit 相关
82
+ edit_total = 0
83
+ edit_match_failures = 0
84
+
85
+ # bash 相关
86
+ bash_total = 0
87
+ bash_errors = 0
88
+
89
+ # 错误模式统计
90
+ pattern_counter: Counter[str] = Counter()
91
+ pattern_examples: dict[str, list[str]] = defaultdict(list)
92
+
93
+ # 自我纠正统计
94
+ error_count_for_correction = 0
95
+ self_correction_count = 0
96
+
97
+ # failure_refs:每个 error 记录一个引用
98
+ failure_refs: list[dict] = []
99
+
100
+ for session in sessions:
101
+ proj = session.project or session.project_dir or "unknown"
102
+ sid = session.session_id or session.file_path
103
+
104
+ # 建立 tool_call_id -> ToolCall 映射
105
+ call_by_id: dict[str, object] = {}
106
+ for tc in session.tool_calls:
107
+ call_by_id[tc.id] = tc
108
+
109
+
110
+
111
+ for tr in session.tool_results:
112
+ tool_error_stats[tr.tool_name]["total"] += 1
113
+ project_stats[proj]["total_calls"] += 1
114
+
115
+ if tr.is_error:
116
+ total_errors += 1
117
+ tool_error_stats[tr.tool_name]["errors"] += 1
118
+ project_stats[proj]["errors"] += 1
119
+
120
+ # bash 错误统计
121
+ if tr.tool_name == "bash":
122
+ bash_errors += 1
123
+
124
+ # 错误模式提取
125
+ pattern = _extract_error_pattern(tr.content_preview)
126
+ if pattern:
127
+ pattern_counter[pattern] += 1
128
+ example = tr.content_preview.strip()[:200]
129
+ if len(pattern_examples[pattern]) < 3:
130
+ pattern_examples[pattern].append(example)
131
+
132
+ # 记录错误所在的 tool_call_id,用于自我纠正检测
133
+
134
+ # 非 error 的 bash 也计入 total
135
+ if tr.tool_name == "bash":
136
+ bash_total += 1
137
+
138
+ # edit 匹配失败统计:计入 tool_results 遍历中
139
+ if tr.tool_name == "edit":
140
+ edit_total += 1
141
+ if tr.is_error and "could not find" in tr.content_preview.lower():
142
+ edit_match_failures += 1
143
+
144
+ # ── 自我纠正检测 + failure_refs 收集 ──────
145
+ # 对每个 error result,检查后续是否有相同工具的 retry
146
+ # 同时记录每个 error 的 ref(session_id + tool_call_id + pattern + self_corrected)
147
+ ordered_results = sorted(
148
+ session.tool_results, key=lambda t: t.timestamp or ""
149
+ )
150
+ ordered_calls = sorted(
151
+ session.tool_calls, key=lambda t: t.timestamp or ""
152
+ )
153
+
154
+ for tr in ordered_results:
155
+ if not tr.is_error:
156
+ continue
157
+ error_count_for_correction += 1
158
+
159
+ # 找到这个 error result 对应的 tool_call
160
+ error_call = call_by_id.get(tr.tool_call_id)
161
+ if not error_call:
162
+ # 没有 tool_call 仍记录 ref,但 self_corrected = False
163
+ pattern = _extract_error_pattern(tr.content_preview)
164
+ failure_refs.append({
165
+ "session_id": sid,
166
+ "tool_call_id": tr.tool_call_id,
167
+ "pattern": pattern or "Unknown",
168
+ "self_corrected": False,
169
+ })
170
+ continue
171
+
172
+ error_tool_name = getattr(error_call, "name", "")
173
+ error_timestamp = getattr(error_call, "timestamp", "")
174
+
175
+ # 在 error_call 之后找是否有同名的 tool_call
176
+ found_retry = False
177
+ for tc in ordered_calls:
178
+ if tc.timestamp <= error_timestamp:
179
+ continue
180
+ if tc.name == error_tool_name:
181
+ found_retry = True
182
+ break
183
+ if found_retry:
184
+ self_correction_count += 1
185
+
186
+ # 收集 failure_ref
187
+ pattern = _extract_error_pattern(tr.content_preview)
188
+ failure_refs.append({
189
+ "session_id": sid,
190
+ "tool_call_id": tr.tool_call_id,
191
+ "pattern": pattern or "Unknown",
192
+ "self_corrected": found_retry,
193
+ })
194
+
195
+ # ── 汇总 by_tool ────────────────────────────
196
+ by_tool: dict[str, dict] = {}
197
+ for tool_name, stats in tool_error_stats.items():
198
+ total = stats["total"]
199
+ errors = stats["errors"]
200
+ by_tool[tool_name] = {
201
+ "errors": errors,
202
+ "total": total,
203
+ "error_rate": round(errors / total, 4) if total else 0.0,
204
+ }
205
+
206
+ # ── top_error_patterns ──────────────────────
207
+ top_error_patterns = [
208
+ {
209
+ "pattern": pattern,
210
+ "count": count,
211
+ "examples": pattern_examples.get(pattern, [])[:3],
212
+ }
213
+ for pattern, count in pattern_counter.most_common(10)
214
+ ]
215
+
216
+ # ── self_correction_rate ────────────────────
217
+ self_correction_rate = round(
218
+ self_correction_count / error_count_for_correction, 4
219
+ ) if error_count_for_correction else 0.0
220
+
221
+ # ── by_project ──────────────────────────────
222
+ by_project = sorted(
223
+ [
224
+ {
225
+ "project": proj,
226
+ "errors": stats["errors"],
227
+ "total_calls": stats["total_calls"],
228
+ "error_rate": round(
229
+ stats["errors"] / stats["total_calls"], 4
230
+ ) if stats["total_calls"] else 0.0,
231
+ }
232
+ for proj, stats in project_stats.items()
233
+ ],
234
+ key=lambda x: x["errors"],
235
+ reverse=True,
236
+ )
237
+
238
+ # ── bash_failure_rate ───────────────────────
239
+ bash_failure_rate = round(bash_errors / bash_total, 4) if bash_total else 0.0
240
+
241
+ # ── edit_match_failure_rate ─────────────────
242
+ edit_match_failure_rate = round(
243
+ edit_match_failures / edit_total, 4
244
+ ) if edit_total else 0.0
245
+
246
+ return {
247
+ "total_errors": total_errors,
248
+ "by_tool": by_tool,
249
+ "bash_failure_rate": bash_failure_rate,
250
+ "edit_match_failure_rate": edit_match_failure_rate,
251
+ "top_error_patterns": top_error_patterns,
252
+ "self_correction_rate": self_correction_rate,
253
+ "by_project": by_project,
254
+ "failure_refs": failure_refs,
255
+ }
@@ -0,0 +1,176 @@
1
+ """Signal 7: 用户满意度隐式信号。
2
+
3
+ 通过 session 粒度的间接指标推断用户满意度:
4
+ 单轮完成率、平均轮数、工具调用密度、session 时长分布。
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import sys
10
+ from collections import defaultdict
11
+ from datetime import datetime, timezone
12
+ from pathlib import Path
13
+
14
+ # 使 config 可导入
15
+ sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
16
+ from config import SINGLE_TURN_MAX_MESSAGES
17
+
18
+
19
+ def _parse_ts(ts_str: str) -> datetime | None:
20
+ """安全解析 ISO 时间戳。"""
21
+ if not ts_str:
22
+ return None
23
+ try:
24
+ dt = datetime.fromisoformat(ts_str)
25
+ if dt.tzinfo is None:
26
+ dt = dt.replace(tzinfo=timezone.utc)
27
+ return dt
28
+ except (ValueError, TypeError):
29
+ return None
30
+
31
+
32
+ def _percentile(sorted_values: list[float], pct: float) -> float:
33
+ """计算百分位数(线性插值)。"""
34
+ if not sorted_values:
35
+ return 0.0
36
+ idx = pct / 100.0 * (len(sorted_values) - 1)
37
+ lower = int(idx)
38
+ upper = min(lower + 1, len(sorted_values) - 1)
39
+ frac = idx - lower
40
+ return round(sorted_values[lower] + frac * (sorted_values[upper] - sorted_values[lower]), 2)
41
+
42
+
43
+ def _short_project_name(project_path: str) -> str:
44
+ """从项目完整路径提取简短名称。"""
45
+ parts = project_path.replace("\\", "/").rstrip("/").split("/")
46
+ parts = [p for p in parts if p]
47
+ if not parts:
48
+ return project_path
49
+ if len(parts) >= 2:
50
+ return f"{parts[-2]}/{parts[-1]}"
51
+ return parts[-1]
52
+
53
+
54
+ def _session_last_timestamp(session) -> str:
55
+ """获取 session 中最后一条记录的时间戳。
56
+
57
+ 遍历所有时间戳字段,取最大值。
58
+ """
59
+ timestamps: list[str] = []
60
+
61
+ for tc in session.tool_calls:
62
+ if tc.timestamp:
63
+ timestamps.append(tc.timestamp)
64
+ for tr in session.tool_results:
65
+ if tr.timestamp:
66
+ timestamps.append(tr.timestamp)
67
+ for um in session.user_messages:
68
+ if um.timestamp:
69
+ timestamps.append(um.timestamp)
70
+ for ui in session.usage_list:
71
+ if ui.timestamp:
72
+ timestamps.append(ui.timestamp)
73
+
74
+ return max(timestamps) if timestamps else ""
75
+
76
+
77
+ def analyze_satisfaction(sessions: list) -> dict:
78
+ """分析用户满意度隐式信号。
79
+
80
+ Args:
81
+ sessions: ParsedSession 列表
82
+
83
+ Returns:
84
+ 包含 single_turn_completion_rate、session_duration_stats、by_project 等的分析结果
85
+ """
86
+ total_sessions = len(sessions)
87
+ if total_sessions == 0:
88
+ return {
89
+ "total_sessions": 0,
90
+ "single_turn_completion_rate": 0.0,
91
+ "avg_turns_per_session": 0.0,
92
+ "avg_tool_calls_per_session": 0.0,
93
+ "session_duration_stats": {
94
+ "median_minutes": 0.0,
95
+ "p25_minutes": 0.0,
96
+ "p75_minutes": 0.0,
97
+ "max_minutes": 0.0,
98
+ },
99
+ "by_project": [],
100
+ }
101
+
102
+ single_turn_count = 0
103
+ total_turns = 0
104
+ total_tool_calls = 0
105
+ durations_minutes: list[float] = []
106
+
107
+ # 按项目聚合
108
+ project_stats: dict[str, dict] = defaultdict(
109
+ lambda: {"sessions": 0, "turns": 0, "single_turn": 0}
110
+ )
111
+
112
+ for session in sessions:
113
+ user_count = len(session.user_messages)
114
+ tool_count = len(session.tool_calls)
115
+
116
+ total_turns += user_count
117
+ total_tool_calls += tool_count
118
+
119
+ # 单轮判定:用户消息数 <= SINGLE_TURN_MAX_MESSAGES
120
+ is_single_turn = user_count <= SINGLE_TURN_MAX_MESSAGES
121
+ if is_single_turn:
122
+ single_turn_count += 1
123
+
124
+ # 计算时长
125
+ start_ts = _parse_ts(session.start_time)
126
+ end_ts_str = _session_last_timestamp(session)
127
+ end_ts = _parse_ts(end_ts_str) if end_ts_str else None
128
+
129
+ if start_ts and end_ts and end_ts > start_ts:
130
+ delta = (end_ts - start_ts).total_seconds() / 60.0
131
+ durations_minutes.append(delta)
132
+
133
+ # 项目级统计
134
+ project = session.project or ""
135
+ if project:
136
+ short_name = _short_project_name(project)
137
+ project_stats[short_name]["sessions"] += 1
138
+ project_stats[short_name]["turns"] += user_count
139
+ if is_single_turn:
140
+ project_stats[short_name]["single_turn"] += 1
141
+
142
+ # 时长统计
143
+ durations_minutes.sort()
144
+ duration_stats = {
145
+ "median_minutes": _percentile(durations_minutes, 50),
146
+ "p25_minutes": _percentile(durations_minutes, 25),
147
+ "p75_minutes": _percentile(durations_minutes, 75),
148
+ "max_minutes": durations_minutes[-1] if durations_minutes else 0.0,
149
+ }
150
+
151
+ # 按项目汇总(只含 sessions >= 2 的项目)
152
+ by_project: list[dict] = []
153
+ for name, stats in sorted(project_stats.items()):
154
+ if stats["sessions"] < 2:
155
+ continue
156
+ avg_turns = stats["turns"] / stats["sessions"]
157
+ single_rate = stats["single_turn"] / stats["sessions"]
158
+ by_project.append({
159
+ "project": name,
160
+ "sessions": stats["sessions"],
161
+ "avg_turns": round(avg_turns, 2),
162
+ "single_turn_rate": round(single_rate, 4),
163
+ })
164
+
165
+ return {
166
+ "total_sessions": total_sessions,
167
+ "single_turn_completion_rate": round(
168
+ single_turn_count / total_sessions, 4
169
+ ),
170
+ "avg_turns_per_session": round(total_turns / total_sessions, 2),
171
+ "avg_tool_calls_per_session": round(
172
+ total_tool_calls / total_sessions, 2
173
+ ),
174
+ "session_duration_stats": duration_stats,
175
+ "by_project": by_project,
176
+ }