@zhushanwen/pi-evolve-daily 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +47 -0
- package/package.json +18 -2
- package/scripts/analyze.py +186 -0
- package/scripts/config.py +50 -0
- package/scripts/extract_context.py +350 -0
- package/scripts/extractors/__init__.py +0 -0
- package/scripts/extractors/cross_project.py +138 -0
- package/scripts/extractors/errors.py +255 -0
- package/scripts/extractors/satisfaction.py +176 -0
- package/scripts/extractors/skill_state.py +203 -0
- package/scripts/extractors/skills.py +255 -0
- package/scripts/extractors/tokens.py +176 -0
- package/scripts/extractors/tools.py +234 -0
- package/scripts/extractors/users.py +157 -0
- package/scripts/miner.py +352 -0
- package/scripts/parser.py +556 -0
- package/scripts/reporter.py +371 -0
- package/scripts/tests/__init__.py +0 -0
- package/scripts/tests/test_analyze.py +70 -0
- package/scripts/tests/test_miner.py +280 -0
- package/scripts/tests/test_reporter.py +195 -0
- package/src/index.ts +9 -9
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
"""Signal 6: 跨项目通用模式。
|
|
2
|
+
|
|
3
|
+
分析多项目 session 数据,提取公共工具调用序列、项目类型分布等跨项目指标。
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import sys
|
|
9
|
+
from collections import Counter, defaultdict
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
|
|
12
|
+
# 使 config 可导入
|
|
13
|
+
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
|
|
14
|
+
|
|
15
|
+
# 序列长度
|
|
16
|
+
_SEQUENCE_LENGTH = 3
|
|
17
|
+
|
|
18
|
+
# 项目类型关键字映射
|
|
19
|
+
_PROJECT_TYPE_KEYWORDS: dict[str, list[str]] = {
|
|
20
|
+
"frontend": ["frontend", "vue", "react", "nuxt", "next", "angular", "svelte", "web-app", "webapp"],
|
|
21
|
+
"backend": ["api", "server", "backend", "service", "grpc", "rest"],
|
|
22
|
+
"fullstack": ["workspace"],
|
|
23
|
+
"tooling": ["tool", "cli", "script", "util", "extension", "plugin", "agent", "harness"],
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _classify_project(project_path: str) -> str:
|
|
28
|
+
"""根据项目路径中的关键字判断项目类型。"""
|
|
29
|
+
lower = project_path.lower()
|
|
30
|
+
for ptype, keywords in _PROJECT_TYPE_KEYWORDS.items():
|
|
31
|
+
if any(kw in lower for kw in keywords):
|
|
32
|
+
return ptype
|
|
33
|
+
return "other"
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _extract_sequences(
|
|
37
|
+
tool_calls: list, length: int = _SEQUENCE_LENGTH
|
|
38
|
+
) -> list[tuple[str, ...]]:
|
|
39
|
+
"""从工具调用列表中提取连续的 N-gram 序列。"""
|
|
40
|
+
names = [tc.name for tc in tool_calls]
|
|
41
|
+
if len(names) < length:
|
|
42
|
+
return []
|
|
43
|
+
return [tuple(names[i : i + length]) for i in range(len(names) - length + 1)]
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _short_project_name(project_path: str) -> str:
|
|
47
|
+
"""从项目完整路径提取简短名称。"""
|
|
48
|
+
parts = project_path.replace("\\", "/").rstrip("/").split("/")
|
|
49
|
+
parts = [p for p in parts if p]
|
|
50
|
+
if not parts:
|
|
51
|
+
return project_path
|
|
52
|
+
if len(parts) >= 2:
|
|
53
|
+
return f"{parts[-2]}/{parts[-1]}"
|
|
54
|
+
return parts[-1]
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def analyze_cross_project(sessions: list) -> dict:
|
|
58
|
+
"""分析跨项目通用模式。
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
sessions: ParsedSession 列表
|
|
62
|
+
|
|
63
|
+
Returns:
|
|
64
|
+
包含 projects、common_tool_sequences、project_type_distribution 的分析结果
|
|
65
|
+
"""
|
|
66
|
+
# 按项目聚合
|
|
67
|
+
project_data: dict[str, dict] = defaultdict(
|
|
68
|
+
lambda: {"sessions": 0, "tool_calls": [], "tool_counter": Counter()}
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
for session in sessions:
|
|
72
|
+
project = session.project or ""
|
|
73
|
+
if not project:
|
|
74
|
+
continue
|
|
75
|
+
short_name = _short_project_name(project)
|
|
76
|
+
|
|
77
|
+
project_data[short_name]["sessions"] += 1
|
|
78
|
+
project_data[short_name]["tool_calls"].extend(session.tool_calls)
|
|
79
|
+
for tc in session.tool_calls:
|
|
80
|
+
project_data[short_name]["tool_counter"][tc.name] += 1
|
|
81
|
+
|
|
82
|
+
# 构建项目列表
|
|
83
|
+
projects: list[dict] = []
|
|
84
|
+
for name, data in sorted(project_data.items()):
|
|
85
|
+
top_tools = data["tool_counter"].most_common(5)
|
|
86
|
+
projects.append({
|
|
87
|
+
"name": name,
|
|
88
|
+
"sessions": data["sessions"],
|
|
89
|
+
"total_tool_calls": len(data["tool_calls"]),
|
|
90
|
+
"top_tools": top_tools,
|
|
91
|
+
})
|
|
92
|
+
|
|
93
|
+
# 跨项目公共工具序列
|
|
94
|
+
# 统计每个序列出现在哪些项目中
|
|
95
|
+
sequence_projects: dict[tuple[str, ...], set] = defaultdict(set)
|
|
96
|
+
sequence_counts: Counter = Counter()
|
|
97
|
+
|
|
98
|
+
for session in sessions:
|
|
99
|
+
project = session.project or ""
|
|
100
|
+
if not project:
|
|
101
|
+
continue
|
|
102
|
+
short_name = _short_project_name(project)
|
|
103
|
+
|
|
104
|
+
for seq in _extract_sequences(session.tool_calls):
|
|
105
|
+
sequence_projects[seq].add(short_name)
|
|
106
|
+
sequence_counts[seq] += 1
|
|
107
|
+
|
|
108
|
+
# 只保留出现在 >= 2 个项目中的序列
|
|
109
|
+
common_sequences: list[dict] = []
|
|
110
|
+
for seq, projs in sorted(
|
|
111
|
+
sequence_projects.items(), key=lambda x: len(x[1]), reverse=True
|
|
112
|
+
):
|
|
113
|
+
if len(projs) < 2:
|
|
114
|
+
continue
|
|
115
|
+
common_sequences.append({
|
|
116
|
+
"sequence": list(seq),
|
|
117
|
+
"projects": sorted(projs),
|
|
118
|
+
"total_count": sequence_counts[seq],
|
|
119
|
+
})
|
|
120
|
+
|
|
121
|
+
# 限制输出量
|
|
122
|
+
common_sequences = common_sequences[:30]
|
|
123
|
+
|
|
124
|
+
# 项目类型分布
|
|
125
|
+
type_dist: dict[str, int] = Counter()
|
|
126
|
+
for session in sessions:
|
|
127
|
+
project = session.project or ""
|
|
128
|
+
if not project:
|
|
129
|
+
continue
|
|
130
|
+
ptype = _classify_project(project)
|
|
131
|
+
type_dist[ptype] += 1
|
|
132
|
+
|
|
133
|
+
return {
|
|
134
|
+
"project_count": len(project_data),
|
|
135
|
+
"projects": projects,
|
|
136
|
+
"common_tool_sequences": common_sequences,
|
|
137
|
+
"project_type_distribution": dict(type_dist),
|
|
138
|
+
}
|
|
@@ -0,0 +1,255 @@
|
|
|
1
|
+
"""Signal 3: 错误与重试分析。
|
|
2
|
+
|
|
3
|
+
统计工具错误率、错误模式分类、自我纠正率、按项目的错误分布。
|
|
4
|
+
同时收集 failure_refs(session_id + tool_call_id + pattern + self_corrected),
|
|
5
|
+
供 evolve 分析时按需回溯 JSONL 获取完整上下文。
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
import re
|
|
12
|
+
from collections import Counter, defaultdict
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
|
|
15
|
+
import sys
|
|
16
|
+
|
|
17
|
+
_PARENT = str(Path(__file__).resolve().parent.parent)
|
|
18
|
+
if _PARENT not in sys.path:
|
|
19
|
+
sys.path.insert(0, _PARENT)
|
|
20
|
+
|
|
21
|
+
from config import ERROR_KEYWORDS
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
# ── 错误模式提取 ─────────────────────────────────────
|
|
25
|
+
|
|
26
|
+
def _extract_error_pattern(message: str) -> str | None:
|
|
27
|
+
"""从错误消息中提取归类模式。
|
|
28
|
+
|
|
29
|
+
优先匹配具体的已知模式,然后 fallback 到通用关键词。
|
|
30
|
+
"""
|
|
31
|
+
msg_lower = message.lower()
|
|
32
|
+
|
|
33
|
+
# 按具体程度排序的模式匹配
|
|
34
|
+
specific_patterns = [
|
|
35
|
+
(r"could not find the exact text", "Could not find the exact text"),
|
|
36
|
+
(r"enoent.*no such file", "ENOENT: no such file"),
|
|
37
|
+
(r"enoent", "ENOENT"),
|
|
38
|
+
(r"permission denied", "Permission denied"),
|
|
39
|
+
(r"non-zero exit code", "Non-zero exit code"),
|
|
40
|
+
(r"command failed.*non-zero", "Command failed (non-zero exit)"),
|
|
41
|
+
(r"timeout|timed out", "Timeout"),
|
|
42
|
+
(r"syntaxerror|syntax error", "Syntax error"),
|
|
43
|
+
(r"typeerror|type error", "TypeError"),
|
|
44
|
+
(r"importerror|module not found", "ImportError/Module not found"),
|
|
45
|
+
(r"referenceerror", "ReferenceError"),
|
|
46
|
+
(r"connection refused|econnrefused", "Connection refused"),
|
|
47
|
+
(r"out of memory", "Out of memory"),
|
|
48
|
+
]
|
|
49
|
+
|
|
50
|
+
for pattern, label in specific_patterns:
|
|
51
|
+
if re.search(pattern, msg_lower):
|
|
52
|
+
return label
|
|
53
|
+
|
|
54
|
+
# Fallback: 检查 ERROR_KEYWORDS
|
|
55
|
+
for keyword in ERROR_KEYWORDS:
|
|
56
|
+
if keyword.lower() in msg_lower:
|
|
57
|
+
# 截取错误消息的前 80 字符作为模式
|
|
58
|
+
snippet = message.strip()[:80]
|
|
59
|
+
# 去掉路径等变量部分
|
|
60
|
+
snippet = re.sub(r"/[\w./\-]+", "<path>", snippet)
|
|
61
|
+
return snippet
|
|
62
|
+
|
|
63
|
+
return None
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def analyze_errors(sessions) -> dict:
|
|
67
|
+
"""分析错误与重试模式。
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
sessions: list[ParsedSession],解析后的 session 列表。
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
错误统计字典,含 failure_refs 列表。
|
|
74
|
+
"""
|
|
75
|
+
total_errors = 0
|
|
76
|
+
# tool_name -> {errors, total}
|
|
77
|
+
tool_error_stats: dict[str, dict[str, int]] = defaultdict(lambda: {"errors": 0, "total": 0})
|
|
78
|
+
# project -> {errors, total_calls}
|
|
79
|
+
project_stats: dict[str, dict[str, int]] = defaultdict(lambda: {"errors": 0, "total_calls": 0})
|
|
80
|
+
|
|
81
|
+
# edit 相关
|
|
82
|
+
edit_total = 0
|
|
83
|
+
edit_match_failures = 0
|
|
84
|
+
|
|
85
|
+
# bash 相关
|
|
86
|
+
bash_total = 0
|
|
87
|
+
bash_errors = 0
|
|
88
|
+
|
|
89
|
+
# 错误模式统计
|
|
90
|
+
pattern_counter: Counter[str] = Counter()
|
|
91
|
+
pattern_examples: dict[str, list[str]] = defaultdict(list)
|
|
92
|
+
|
|
93
|
+
# 自我纠正统计
|
|
94
|
+
error_count_for_correction = 0
|
|
95
|
+
self_correction_count = 0
|
|
96
|
+
|
|
97
|
+
# failure_refs:每个 error 记录一个引用
|
|
98
|
+
failure_refs: list[dict] = []
|
|
99
|
+
|
|
100
|
+
for session in sessions:
|
|
101
|
+
proj = session.project or session.project_dir or "unknown"
|
|
102
|
+
sid = session.session_id or session.file_path
|
|
103
|
+
|
|
104
|
+
# 建立 tool_call_id -> ToolCall 映射
|
|
105
|
+
call_by_id: dict[str, object] = {}
|
|
106
|
+
for tc in session.tool_calls:
|
|
107
|
+
call_by_id[tc.id] = tc
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
for tr in session.tool_results:
|
|
112
|
+
tool_error_stats[tr.tool_name]["total"] += 1
|
|
113
|
+
project_stats[proj]["total_calls"] += 1
|
|
114
|
+
|
|
115
|
+
if tr.is_error:
|
|
116
|
+
total_errors += 1
|
|
117
|
+
tool_error_stats[tr.tool_name]["errors"] += 1
|
|
118
|
+
project_stats[proj]["errors"] += 1
|
|
119
|
+
|
|
120
|
+
# bash 错误统计
|
|
121
|
+
if tr.tool_name == "bash":
|
|
122
|
+
bash_errors += 1
|
|
123
|
+
|
|
124
|
+
# 错误模式提取
|
|
125
|
+
pattern = _extract_error_pattern(tr.content_preview)
|
|
126
|
+
if pattern:
|
|
127
|
+
pattern_counter[pattern] += 1
|
|
128
|
+
example = tr.content_preview.strip()[:200]
|
|
129
|
+
if len(pattern_examples[pattern]) < 3:
|
|
130
|
+
pattern_examples[pattern].append(example)
|
|
131
|
+
|
|
132
|
+
# 记录错误所在的 tool_call_id,用于自我纠正检测
|
|
133
|
+
|
|
134
|
+
# 非 error 的 bash 也计入 total
|
|
135
|
+
if tr.tool_name == "bash":
|
|
136
|
+
bash_total += 1
|
|
137
|
+
|
|
138
|
+
# edit 匹配失败统计:计入 tool_results 遍历中
|
|
139
|
+
if tr.tool_name == "edit":
|
|
140
|
+
edit_total += 1
|
|
141
|
+
if tr.is_error and "could not find" in tr.content_preview.lower():
|
|
142
|
+
edit_match_failures += 1
|
|
143
|
+
|
|
144
|
+
# ── 自我纠正检测 + failure_refs 收集 ──────
|
|
145
|
+
# 对每个 error result,检查后续是否有相同工具的 retry
|
|
146
|
+
# 同时记录每个 error 的 ref(session_id + tool_call_id + pattern + self_corrected)
|
|
147
|
+
ordered_results = sorted(
|
|
148
|
+
session.tool_results, key=lambda t: t.timestamp or ""
|
|
149
|
+
)
|
|
150
|
+
ordered_calls = sorted(
|
|
151
|
+
session.tool_calls, key=lambda t: t.timestamp or ""
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
for tr in ordered_results:
|
|
155
|
+
if not tr.is_error:
|
|
156
|
+
continue
|
|
157
|
+
error_count_for_correction += 1
|
|
158
|
+
|
|
159
|
+
# 找到这个 error result 对应的 tool_call
|
|
160
|
+
error_call = call_by_id.get(tr.tool_call_id)
|
|
161
|
+
if not error_call:
|
|
162
|
+
# 没有 tool_call 仍记录 ref,但 self_corrected = False
|
|
163
|
+
pattern = _extract_error_pattern(tr.content_preview)
|
|
164
|
+
failure_refs.append({
|
|
165
|
+
"session_id": sid,
|
|
166
|
+
"tool_call_id": tr.tool_call_id,
|
|
167
|
+
"pattern": pattern or "Unknown",
|
|
168
|
+
"self_corrected": False,
|
|
169
|
+
})
|
|
170
|
+
continue
|
|
171
|
+
|
|
172
|
+
error_tool_name = getattr(error_call, "name", "")
|
|
173
|
+
error_timestamp = getattr(error_call, "timestamp", "")
|
|
174
|
+
|
|
175
|
+
# 在 error_call 之后找是否有同名的 tool_call
|
|
176
|
+
found_retry = False
|
|
177
|
+
for tc in ordered_calls:
|
|
178
|
+
if tc.timestamp <= error_timestamp:
|
|
179
|
+
continue
|
|
180
|
+
if tc.name == error_tool_name:
|
|
181
|
+
found_retry = True
|
|
182
|
+
break
|
|
183
|
+
if found_retry:
|
|
184
|
+
self_correction_count += 1
|
|
185
|
+
|
|
186
|
+
# 收集 failure_ref
|
|
187
|
+
pattern = _extract_error_pattern(tr.content_preview)
|
|
188
|
+
failure_refs.append({
|
|
189
|
+
"session_id": sid,
|
|
190
|
+
"tool_call_id": tr.tool_call_id,
|
|
191
|
+
"pattern": pattern or "Unknown",
|
|
192
|
+
"self_corrected": found_retry,
|
|
193
|
+
})
|
|
194
|
+
|
|
195
|
+
# ── 汇总 by_tool ────────────────────────────
|
|
196
|
+
by_tool: dict[str, dict] = {}
|
|
197
|
+
for tool_name, stats in tool_error_stats.items():
|
|
198
|
+
total = stats["total"]
|
|
199
|
+
errors = stats["errors"]
|
|
200
|
+
by_tool[tool_name] = {
|
|
201
|
+
"errors": errors,
|
|
202
|
+
"total": total,
|
|
203
|
+
"error_rate": round(errors / total, 4) if total else 0.0,
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
# ── top_error_patterns ──────────────────────
|
|
207
|
+
top_error_patterns = [
|
|
208
|
+
{
|
|
209
|
+
"pattern": pattern,
|
|
210
|
+
"count": count,
|
|
211
|
+
"examples": pattern_examples.get(pattern, [])[:3],
|
|
212
|
+
}
|
|
213
|
+
for pattern, count in pattern_counter.most_common(10)
|
|
214
|
+
]
|
|
215
|
+
|
|
216
|
+
# ── self_correction_rate ────────────────────
|
|
217
|
+
self_correction_rate = round(
|
|
218
|
+
self_correction_count / error_count_for_correction, 4
|
|
219
|
+
) if error_count_for_correction else 0.0
|
|
220
|
+
|
|
221
|
+
# ── by_project ──────────────────────────────
|
|
222
|
+
by_project = sorted(
|
|
223
|
+
[
|
|
224
|
+
{
|
|
225
|
+
"project": proj,
|
|
226
|
+
"errors": stats["errors"],
|
|
227
|
+
"total_calls": stats["total_calls"],
|
|
228
|
+
"error_rate": round(
|
|
229
|
+
stats["errors"] / stats["total_calls"], 4
|
|
230
|
+
) if stats["total_calls"] else 0.0,
|
|
231
|
+
}
|
|
232
|
+
for proj, stats in project_stats.items()
|
|
233
|
+
],
|
|
234
|
+
key=lambda x: x["errors"],
|
|
235
|
+
reverse=True,
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
# ── bash_failure_rate ───────────────────────
|
|
239
|
+
bash_failure_rate = round(bash_errors / bash_total, 4) if bash_total else 0.0
|
|
240
|
+
|
|
241
|
+
# ── edit_match_failure_rate ─────────────────
|
|
242
|
+
edit_match_failure_rate = round(
|
|
243
|
+
edit_match_failures / edit_total, 4
|
|
244
|
+
) if edit_total else 0.0
|
|
245
|
+
|
|
246
|
+
return {
|
|
247
|
+
"total_errors": total_errors,
|
|
248
|
+
"by_tool": by_tool,
|
|
249
|
+
"bash_failure_rate": bash_failure_rate,
|
|
250
|
+
"edit_match_failure_rate": edit_match_failure_rate,
|
|
251
|
+
"top_error_patterns": top_error_patterns,
|
|
252
|
+
"self_correction_rate": self_correction_rate,
|
|
253
|
+
"by_project": by_project,
|
|
254
|
+
"failure_refs": failure_refs,
|
|
255
|
+
}
|
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
"""Signal 7: 用户满意度隐式信号。
|
|
2
|
+
|
|
3
|
+
通过 session 粒度的间接指标推断用户满意度:
|
|
4
|
+
单轮完成率、平均轮数、工具调用密度、session 时长分布。
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import sys
|
|
10
|
+
from collections import defaultdict
|
|
11
|
+
from datetime import datetime, timezone
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
|
|
14
|
+
# 使 config 可导入
|
|
15
|
+
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
|
|
16
|
+
from config import SINGLE_TURN_MAX_MESSAGES
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _parse_ts(ts_str: str) -> datetime | None:
|
|
20
|
+
"""安全解析 ISO 时间戳。"""
|
|
21
|
+
if not ts_str:
|
|
22
|
+
return None
|
|
23
|
+
try:
|
|
24
|
+
dt = datetime.fromisoformat(ts_str)
|
|
25
|
+
if dt.tzinfo is None:
|
|
26
|
+
dt = dt.replace(tzinfo=timezone.utc)
|
|
27
|
+
return dt
|
|
28
|
+
except (ValueError, TypeError):
|
|
29
|
+
return None
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _percentile(sorted_values: list[float], pct: float) -> float:
|
|
33
|
+
"""计算百分位数(线性插值)。"""
|
|
34
|
+
if not sorted_values:
|
|
35
|
+
return 0.0
|
|
36
|
+
idx = pct / 100.0 * (len(sorted_values) - 1)
|
|
37
|
+
lower = int(idx)
|
|
38
|
+
upper = min(lower + 1, len(sorted_values) - 1)
|
|
39
|
+
frac = idx - lower
|
|
40
|
+
return round(sorted_values[lower] + frac * (sorted_values[upper] - sorted_values[lower]), 2)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _short_project_name(project_path: str) -> str:
|
|
44
|
+
"""从项目完整路径提取简短名称。"""
|
|
45
|
+
parts = project_path.replace("\\", "/").rstrip("/").split("/")
|
|
46
|
+
parts = [p for p in parts if p]
|
|
47
|
+
if not parts:
|
|
48
|
+
return project_path
|
|
49
|
+
if len(parts) >= 2:
|
|
50
|
+
return f"{parts[-2]}/{parts[-1]}"
|
|
51
|
+
return parts[-1]
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _session_last_timestamp(session) -> str:
|
|
55
|
+
"""获取 session 中最后一条记录的时间戳。
|
|
56
|
+
|
|
57
|
+
遍历所有时间戳字段,取最大值。
|
|
58
|
+
"""
|
|
59
|
+
timestamps: list[str] = []
|
|
60
|
+
|
|
61
|
+
for tc in session.tool_calls:
|
|
62
|
+
if tc.timestamp:
|
|
63
|
+
timestamps.append(tc.timestamp)
|
|
64
|
+
for tr in session.tool_results:
|
|
65
|
+
if tr.timestamp:
|
|
66
|
+
timestamps.append(tr.timestamp)
|
|
67
|
+
for um in session.user_messages:
|
|
68
|
+
if um.timestamp:
|
|
69
|
+
timestamps.append(um.timestamp)
|
|
70
|
+
for ui in session.usage_list:
|
|
71
|
+
if ui.timestamp:
|
|
72
|
+
timestamps.append(ui.timestamp)
|
|
73
|
+
|
|
74
|
+
return max(timestamps) if timestamps else ""
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def analyze_satisfaction(sessions: list) -> dict:
|
|
78
|
+
"""分析用户满意度隐式信号。
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
sessions: ParsedSession 列表
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
包含 single_turn_completion_rate、session_duration_stats、by_project 等的分析结果
|
|
85
|
+
"""
|
|
86
|
+
total_sessions = len(sessions)
|
|
87
|
+
if total_sessions == 0:
|
|
88
|
+
return {
|
|
89
|
+
"total_sessions": 0,
|
|
90
|
+
"single_turn_completion_rate": 0.0,
|
|
91
|
+
"avg_turns_per_session": 0.0,
|
|
92
|
+
"avg_tool_calls_per_session": 0.0,
|
|
93
|
+
"session_duration_stats": {
|
|
94
|
+
"median_minutes": 0.0,
|
|
95
|
+
"p25_minutes": 0.0,
|
|
96
|
+
"p75_minutes": 0.0,
|
|
97
|
+
"max_minutes": 0.0,
|
|
98
|
+
},
|
|
99
|
+
"by_project": [],
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
single_turn_count = 0
|
|
103
|
+
total_turns = 0
|
|
104
|
+
total_tool_calls = 0
|
|
105
|
+
durations_minutes: list[float] = []
|
|
106
|
+
|
|
107
|
+
# 按项目聚合
|
|
108
|
+
project_stats: dict[str, dict] = defaultdict(
|
|
109
|
+
lambda: {"sessions": 0, "turns": 0, "single_turn": 0}
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
for session in sessions:
|
|
113
|
+
user_count = len(session.user_messages)
|
|
114
|
+
tool_count = len(session.tool_calls)
|
|
115
|
+
|
|
116
|
+
total_turns += user_count
|
|
117
|
+
total_tool_calls += tool_count
|
|
118
|
+
|
|
119
|
+
# 单轮判定:用户消息数 <= SINGLE_TURN_MAX_MESSAGES
|
|
120
|
+
is_single_turn = user_count <= SINGLE_TURN_MAX_MESSAGES
|
|
121
|
+
if is_single_turn:
|
|
122
|
+
single_turn_count += 1
|
|
123
|
+
|
|
124
|
+
# 计算时长
|
|
125
|
+
start_ts = _parse_ts(session.start_time)
|
|
126
|
+
end_ts_str = _session_last_timestamp(session)
|
|
127
|
+
end_ts = _parse_ts(end_ts_str) if end_ts_str else None
|
|
128
|
+
|
|
129
|
+
if start_ts and end_ts and end_ts > start_ts:
|
|
130
|
+
delta = (end_ts - start_ts).total_seconds() / 60.0
|
|
131
|
+
durations_minutes.append(delta)
|
|
132
|
+
|
|
133
|
+
# 项目级统计
|
|
134
|
+
project = session.project or ""
|
|
135
|
+
if project:
|
|
136
|
+
short_name = _short_project_name(project)
|
|
137
|
+
project_stats[short_name]["sessions"] += 1
|
|
138
|
+
project_stats[short_name]["turns"] += user_count
|
|
139
|
+
if is_single_turn:
|
|
140
|
+
project_stats[short_name]["single_turn"] += 1
|
|
141
|
+
|
|
142
|
+
# 时长统计
|
|
143
|
+
durations_minutes.sort()
|
|
144
|
+
duration_stats = {
|
|
145
|
+
"median_minutes": _percentile(durations_minutes, 50),
|
|
146
|
+
"p25_minutes": _percentile(durations_minutes, 25),
|
|
147
|
+
"p75_minutes": _percentile(durations_minutes, 75),
|
|
148
|
+
"max_minutes": durations_minutes[-1] if durations_minutes else 0.0,
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
# 按项目汇总(只含 sessions >= 2 的项目)
|
|
152
|
+
by_project: list[dict] = []
|
|
153
|
+
for name, stats in sorted(project_stats.items()):
|
|
154
|
+
if stats["sessions"] < 2:
|
|
155
|
+
continue
|
|
156
|
+
avg_turns = stats["turns"] / stats["sessions"]
|
|
157
|
+
single_rate = stats["single_turn"] / stats["sessions"]
|
|
158
|
+
by_project.append({
|
|
159
|
+
"project": name,
|
|
160
|
+
"sessions": stats["sessions"],
|
|
161
|
+
"avg_turns": round(avg_turns, 2),
|
|
162
|
+
"single_turn_rate": round(single_rate, 4),
|
|
163
|
+
})
|
|
164
|
+
|
|
165
|
+
return {
|
|
166
|
+
"total_sessions": total_sessions,
|
|
167
|
+
"single_turn_completion_rate": round(
|
|
168
|
+
single_turn_count / total_sessions, 4
|
|
169
|
+
),
|
|
170
|
+
"avg_turns_per_session": round(total_turns / total_sessions, 2),
|
|
171
|
+
"avg_tool_calls_per_session": round(
|
|
172
|
+
total_tool_calls / total_sessions, 2
|
|
173
|
+
),
|
|
174
|
+
"session_duration_stats": duration_stats,
|
|
175
|
+
"by_project": by_project,
|
|
176
|
+
}
|