@zhushanwen/pi-evolve-daily 0.1.4 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,5 @@
1
+ """Evolve daily analyzer — Python 侧数据提取与规则挖掘。
2
+
3
+ extractors/: 从 session JSONL 提取各维度统计数据。
4
+ rules/: 从 daily-reports 中挖掘 actionable issues。
5
+ """
@@ -0,0 +1,203 @@
1
+ #!/usr/bin/env python3
2
+ """Evolve Daily Analyzer - 使用新的 extractors 和 rules 分析 session JSONL。
3
+
4
+ 用法:
5
+ python3 analyze.py --since 1d --format json --output report.json
6
+ python3 analyze.py --input session.jsonl --format json --output report.json
7
+ """
8
+
9
+ import argparse
10
+ import json
11
+ import sys
12
+ from datetime import datetime, timedelta
13
+ from pathlib import Path
14
+ from typing import Any
15
+
16
+ # 添加当前目录到 Python 路径,以便导入 extractors 和 rules
17
+ sys.path.insert(0, str(Path(__file__).parent))
18
+
19
+ from extractors import run_extractors
20
+ from rules import run_rules
21
+
22
+
23
+ def load_sessions(since_days: int = 1, input_file: str | None = None) -> list[dict]:
24
+ """加载 session JSONL 数据。
25
+
26
+ Args:
27
+ since_days: 加载最近 N 天的数据。
28
+ input_file: 指定输入文件路径(优先级高于 since_days)。
29
+
30
+ Returns:
31
+ session 列表。
32
+ """
33
+ if input_file:
34
+ return _load_from_file(input_file)
35
+
36
+ # 从默认目录加载
37
+ sessions_dir = Path.home() / ".pi" / "agent" / "sessions"
38
+ if not sessions_dir.exists():
39
+ print(f"[evolve] Warning: Sessions directory not found: {sessions_dir}")
40
+ return []
41
+
42
+ cutoff = datetime.now() - timedelta(days=since_days)
43
+ sessions = []
44
+
45
+ for session_file in sessions_dir.glob("*.jsonl"):
46
+ try:
47
+ # 从文件名解析日期
48
+ file_date = datetime.fromisoformat(session_file.stem[:10])
49
+ if file_date < cutoff:
50
+ continue
51
+ except (ValueError, IndexError):
52
+ # 文件名不是日期格式,跳过
53
+ continue
54
+
55
+ session_data = _load_session_file(session_file)
56
+ if session_data:
57
+ sessions.append(session_data)
58
+
59
+ return sessions
60
+
61
+
62
+ def _load_from_file(input_file: str) -> list[dict]:
63
+ """从单个文件加载 session 数据。"""
64
+ path = Path(input_file)
65
+ if not path.exists():
66
+ print(f"[evolve] Warning: Input file not found: {input_file}")
67
+ return []
68
+
69
+ if path.suffix == ".jsonl":
70
+ session_data = _load_session_file(path)
71
+ return [session_data] if session_data else []
72
+ elif path.suffix == ".json":
73
+ try:
74
+ with open(path, "r", encoding="utf-8") as f:
75
+ data = json.load(f)
76
+ return [data] if isinstance(data, dict) else data
77
+ except Exception as e:
78
+ print(f"[evolve] Warning: Failed to load JSON file {input_file}: {e}")
79
+ return []
80
+ else:
81
+ print(f"[evolve] Warning: Unsupported file format: {path.suffix}")
82
+ return []
83
+
84
+
85
+ def _load_session_file(file_path: Path) -> dict | None:
86
+ """加载单个 JSONL session 文件。"""
87
+ try:
88
+ messages = []
89
+ with open(file_path, "r", encoding="utf-8") as f:
90
+ for line in f:
91
+ line = line.strip()
92
+ if line:
93
+ try:
94
+ messages.append(json.loads(line))
95
+ except json.JSONDecodeError:
96
+ continue
97
+
98
+ if not messages:
99
+ return None
100
+
101
+ return {
102
+ "session_id": file_path.stem,
103
+ "messages": messages,
104
+ "file_path": str(file_path),
105
+ }
106
+ except Exception as e:
107
+ print(f"[evolve] Warning: Failed to load session file {file_path}: {e}")
108
+ return None
109
+
110
+
111
+ def generate_report(sessions: list[dict], format: str = "json") -> dict:
112
+ """生成分析报告。
113
+
114
+ Args:
115
+ sessions: session 列表。
116
+ format: 输出格式(json)。
117
+
118
+ Returns:
119
+ 分析报告字典。
120
+ """
121
+ # 运行所有 extractors(传入当前工作目录作为 project_root)
122
+ project_root = str(Path.cwd())
123
+ extractor_results = run_extractors(sessions, project_root=project_root)
124
+
125
+
126
+ # 运行所有 miner rules
127
+ issues = run_rules(extractor_results)
128
+
129
+ # 生成报告
130
+ report = {
131
+ "generated_at": datetime.now().isoformat(),
132
+ "session_count": len(sessions),
133
+ "extractors": extractor_results,
134
+ "issues": issues,
135
+ "summary": {
136
+ "total_issues": len(issues),
137
+ "high_severity": sum(1 for i in issues if i.get("severity") == "high"),
138
+ "medium_severity": sum(1 for i in issues if i.get("severity") == "medium"),
139
+ "low_severity": sum(1 for i in issues if i.get("severity") == "low"),
140
+ },
141
+ }
142
+
143
+ return report
144
+
145
+
146
+ def main():
147
+ parser = argparse.ArgumentParser(description="Evolve Daily Analyzer")
148
+ parser.add_argument("--since", type=str, default="1d", help="分析最近 N 天的数据(如 1d, 7d)")
149
+ parser.add_argument("--input", type=str, help="指定输入文件路径")
150
+ parser.add_argument("--format", type=str, default="json", choices=["json"], help="输出格式")
151
+ parser.add_argument("--output", type=str, help="输出文件路径")
152
+ parser.add_argument("--verbose", action="store_true", help="详细输出")
153
+
154
+ args = parser.parse_args()
155
+
156
+ # 解析 since 参数
157
+ since_str = args.since.lower().rstrip("d")
158
+ try:
159
+ since_days = int(since_str)
160
+ except ValueError:
161
+ print(f"[evolve] Error: Invalid --since value: {args.since}")
162
+ sys.exit(1)
163
+
164
+ # 加载 sessions
165
+ if args.verbose:
166
+ print(f"[evolve] Loading sessions (since {since_days} days)...")
167
+ sessions = load_sessions(since_days=since_days, input_file=args.input)
168
+
169
+ if not sessions:
170
+ print("[evolve] Warning: No sessions found")
171
+ # 生成空报告
172
+ report = {
173
+ "generated_at": datetime.now().isoformat(),
174
+ "session_count": 0,
175
+ "extractors": {},
176
+ "issues": [],
177
+ "summary": {
178
+ "total_issues": 0,
179
+ "high_severity": 0,
180
+ "medium_severity": 0,
181
+ "low_severity": 0,
182
+ },
183
+ }
184
+ else:
185
+ if args.verbose:
186
+ print(f"[evolve] Found {len(sessions)} sessions")
187
+ print("[evolve] Running extractors...")
188
+ report = generate_report(sessions, format=args.format)
189
+
190
+ # 输出报告
191
+ if args.output:
192
+ output_path = Path(args.output)
193
+ output_path.parent.mkdir(parents=True, exist_ok=True)
194
+ with open(output_path, "w", encoding="utf-8") as f:
195
+ json.dump(report, f, ensure_ascii=False, indent=2)
196
+ if args.verbose:
197
+ print(f"[evolve] Report saved to {args.output}")
198
+ else:
199
+ print(json.dumps(report, ensure_ascii=False, indent=2))
200
+
201
+
202
+ if __name__ == "__main__":
203
+ main()
@@ -0,0 +1,61 @@
1
+ """Extractor 自动发现机制。
2
+
3
+ 通过 pkgutil.iter_modules 自动发现 extractors/ 目录下的所有模块,
4
+ 每个模块必须实现 extract(sessions: list[dict]) -> dict 接口。
5
+ """
6
+
7
+ import pkgutil
8
+ import importlib
9
+ from typing import Protocol, runtime_checkable
10
+
11
+
12
+ @runtime_checkable
13
+ class BaseExtractor(Protocol):
14
+ """Extractor 协议:所有 extractor 必须实现 extract 方法。"""
15
+
16
+ def extract(self, sessions: list[dict]) -> dict: ...
17
+
18
+
19
+ def discover_extractors() -> dict[str, object]:
20
+ """自动发现所有 extractor 模块。
21
+
22
+ Returns:
23
+ dict[str, module]: 模块名到模块对象的映射。
24
+ """
25
+ extractors: dict[str, object] = {}
26
+ for _importer, modname, _ispkg in pkgutil.iter_modules(__path__):
27
+ if modname.startswith("_"):
28
+ continue
29
+ try:
30
+ module = importlib.import_module(f".{modname}", __package__)
31
+ if hasattr(module, "extract"):
32
+ extractors[modname] = module
33
+ except Exception as exc:
34
+ print(f"[evolve] Warning: Failed to load extractor {modname}: {exc}")
35
+ return extractors
36
+
37
+
38
+ def run_extractors(sessions: list[dict], project_root: str = "") -> dict:
39
+ """运行所有 extractor,每个 extractor 独立运行,失败时返回空结果。
40
+
41
+ Args:
42
+ sessions: session JSONL 解析后的字典列表。
43
+ project_root: 项目根目录路径(可选,用于 workflow extractor 扫描 .xyz-harness/)。
44
+
45
+ Returns:
46
+ 合并后的提取结果,key 为 "{extractor_name}_stats"。
47
+ """
48
+ import inspect
49
+ results: dict = {}
50
+ extractors = discover_extractors()
51
+ for name, extractor in extractors.items():
52
+ try:
53
+ sig = inspect.signature(extractor.extract)
54
+ if "project_root" in sig.parameters:
55
+ results[f"{name}_stats"] = extractor.extract(sessions, project_root=project_root) # type: ignore[attr-defined]
56
+ else:
57
+ results[f"{name}_stats"] = extractor.extract(sessions) # type: ignore[attr-defined]
58
+ except Exception as exc:
59
+ print(f"[evolve] Warning: Extractor {name} failed: {exc}")
60
+ results[f"{name}_stats"] = {}
61
+ return results
@@ -0,0 +1,64 @@
1
+ """统计 session 中的 compactionSummary 消息。"""
2
+
3
+ from typing import Any
4
+
5
+
6
+ def extract(sessions: list[dict]) -> dict:
7
+ """从 session 列表中提取 compact 统计。
8
+
9
+ Args:
10
+ sessions: session JSONL 解析后的字典列表。
11
+
12
+ Returns:
13
+ 包含 compact 频率、分布、触发时机等统计信息。
14
+ """
15
+ total_compacts = 0
16
+ compact_turn_indices: list[int] = []
17
+ sessions_with_compact = 0
18
+ total_sessions = len(sessions)
19
+
20
+ for session in sessions:
21
+ messages = session.get("messages", [])
22
+ session_compacts = 0
23
+
24
+ for i, msg in enumerate(messages):
25
+ if msg.get("type") == "compaction":
26
+ total_compacts += 1
27
+ session_compacts += 1
28
+ # turn 索引 = 消息序号 / 2(粗略估算)
29
+ compact_turn_indices.append(i // 2)
30
+
31
+ if session_compacts > 0:
32
+ sessions_with_compact += 1
33
+
34
+ # 计算分布
35
+ avg_compacts = total_compacts / max(total_sessions, 1)
36
+ per_session_counts = [
37
+ sum(1 for msg in s.get("messages", []) if msg.get("type") == "compaction")
38
+ for s in sessions
39
+ ]
40
+ max_compacts = max(per_session_counts) if per_session_counts else 0
41
+
42
+ # 分布桶:[0次, 1次, 2次, 3次, 4次, 5次, 6次+]
43
+ distribution = [0] * 7
44
+ for count in per_session_counts:
45
+ if count >= 6:
46
+ distribution[6] += 1
47
+ else:
48
+ distribution[count] += 1
49
+
50
+ # 早期触发统计(turn < 5 时触发 compact)
51
+ early_trigger_count = sum(1 for idx in compact_turn_indices if idx < 5)
52
+
53
+ return {
54
+ "total_compacts": total_compacts,
55
+ "compacts_per_session": {
56
+ "avg": avg_compacts,
57
+ "max": max_compacts,
58
+ "distribution": distribution,
59
+ },
60
+ "compact_turn_indices": compact_turn_indices,
61
+ "early_trigger_count": early_trigger_count,
62
+ "sessions_with_compact": sessions_with_compact,
63
+ "total_sessions": total_sessions,
64
+ }
@@ -0,0 +1,139 @@
1
+ """计算估算的上下文窗口利用率。"""
2
+
3
+ from typing import Any
4
+
5
+ # 模型 context limit 映射(已知模型)
6
+ MODEL_CONTEXT_LIMITS: dict[str, int] = {
7
+ "claude-sonnet-4": 200_000,
8
+ "claude-haiku-3.5": 200_000,
9
+ "deepseek-v3": 64_000,
10
+ "deepseek-r1": 64_000,
11
+ "gpt-4o": 128_000,
12
+ "gpt-4o-mini": 128_000,
13
+ }
14
+
15
+
16
+ def estimate_tokens_from_chars(char_count: int, text_sample: str = "") -> int:
17
+ """粗略估算 token 数。
18
+
19
+ 如果有 text_sample,按中英文字符比例估算。
20
+ 否则使用保守的混合比例 0.5 token/char。
21
+
22
+ Args:
23
+ char_count: 字符总数。
24
+ text_sample: 用于估算中英文比例的文本样本。
25
+
26
+ Returns:
27
+ 估算的 token 数。
28
+ """
29
+ if char_count == 0:
30
+ return 0
31
+ if text_sample:
32
+ chinese_chars = sum(1 for c in text_sample if "\u4e00" <= c <= "\u9fff")
33
+ ratio = chinese_chars / len(text_sample)
34
+ # 混合比例:中文 ~1.5 token/char,英文 ~0.25 token/char
35
+ return int(char_count * (ratio * 1.5 + (1 - ratio) * 0.25))
36
+ # 无样本时使用保守的混合比例
37
+ return int(char_count * 0.5)
38
+
39
+
40
+ def _extract_content_length(msg: dict) -> int:
41
+ """提取消息内容的字符数。"""
42
+ # 处理嵌套的消息格式 (msg.message.content)
43
+ message = msg.get("message", msg)
44
+ content = message.get("content", "")
45
+ if isinstance(content, str):
46
+ return len(content)
47
+ if isinstance(content, list):
48
+ return sum(
49
+ len(item.get("text", ""))
50
+ for item in content
51
+ if isinstance(item, dict) and "text" in item
52
+ )
53
+ return 0
54
+
55
+
56
+ def extract(sessions: list[dict]) -> dict:
57
+ """从 session 列表中提取上下文利用率统计。
58
+
59
+ 通过累积消息字符数估算上下文使用量,结合模型 context limit 计算利用率。
60
+
61
+ Args:
62
+ sessions: session JSONL 解析后的字典列表。
63
+
64
+ Returns:
65
+ 包含上下文利用率分布、峰值、模型映射等统计信息。
66
+ """
67
+ models_used: set[str] = set()
68
+ context_limits: dict[str, int] = {}
69
+ utilization_samples: list[float] = []
70
+ compact_at_high_utilization = 0
71
+ total_compacts = 0
72
+
73
+ for session in sessions:
74
+ messages = session.get("messages", [])
75
+ current_model: str | None = None
76
+ cumulative_chars = 0
77
+
78
+ for msg in messages:
79
+ # 检查 model_change 事件
80
+ if msg.get("type") == "model_change":
81
+ model_id = msg.get("modelId", "")
82
+ if model_id:
83
+ current_model = model_id
84
+ models_used.add(model_id)
85
+ if model_id in MODEL_CONTEXT_LIMITS:
86
+ context_limits[model_id] = MODEL_CONTEXT_LIMITS[model_id]
87
+
88
+ # 累积消息字符数
89
+ cumulative_chars += _extract_content_length(msg)
90
+
91
+ # compact 事件
92
+ if msg.get("type") == "compaction":
93
+ total_compacts += 1
94
+ if current_model and current_model in MODEL_CONTEXT_LIMITS:
95
+ limit = MODEL_CONTEXT_LIMITS[current_model]
96
+ estimated_tokens = estimate_tokens_from_chars(cumulative_chars)
97
+ utilization = estimated_tokens / limit
98
+ if utilization >= 0.7:
99
+ compact_at_high_utilization += 1
100
+ utilization_samples.append(utilization)
101
+ # compact 后重置累积
102
+ cumulative_chars = 0
103
+
104
+ # session 结束时记录最终利用率
105
+ if (
106
+ current_model
107
+ and current_model in MODEL_CONTEXT_LIMITS
108
+ and cumulative_chars > 0
109
+ ):
110
+ limit = MODEL_CONTEXT_LIMITS[current_model]
111
+ estimated_tokens = estimate_tokens_from_chars(cumulative_chars)
112
+ utilization = estimated_tokens / limit
113
+ utilization_samples.append(utilization)
114
+
115
+ # 计算统计
116
+ avg_utilization = sum(utilization_samples) / max(len(utilization_samples), 1)
117
+ peak_utilization = max(utilization_samples) if utilization_samples else 0.0
118
+
119
+ # 分布桶
120
+ distribution = {"0-30%": 0, "30-60%": 0, "60-90%": 0, "90%+": 0}
121
+ for u in utilization_samples:
122
+ if u < 0.3:
123
+ distribution["0-30%"] += 1
124
+ elif u < 0.6:
125
+ distribution["30-60%"] += 1
126
+ elif u < 0.9:
127
+ distribution["60-90%"] += 1
128
+ else:
129
+ distribution["90%+"] += 1
130
+
131
+ return {
132
+ "models_used": sorted(models_used),
133
+ "context_limits": context_limits,
134
+ "avg_estimated_utilization": avg_utilization,
135
+ "peak_estimated_utilization": peak_utilization,
136
+ "utilization_distribution": distribution,
137
+ "compact_at_high_utilization": compact_at_high_utilization,
138
+ "total_compacts": total_compacts,
139
+ }
@@ -0,0 +1,163 @@
1
+ """分析 Goal 任务拆分质量和 Todo 使用质量。"""
2
+
3
+ import re
4
+ from typing import Any
5
+
6
+
7
+ def score_evidence(evidence: str) -> float:
8
+ """Evidence 质量评分 0.0-1.0。
9
+
10
+ 评分维度:长度、路径引用、测试关键词、结果关键词、数值。
11
+
12
+ Args:
13
+ evidence: 任务的 evidence 文本。
14
+
15
+ Returns:
16
+ 0.0-1.0 之间的质量评分。
17
+ """
18
+ if not evidence:
19
+ return 0.0
20
+ score = 0.0
21
+ if len(evidence) >= 20:
22
+ score += 0.3
23
+ if re.search(r"[/\\]", evidence):
24
+ score += 0.2
25
+ if re.search(r"test|spec|check", evidence, re.I):
26
+ score += 0.2
27
+ if re.search(r"pass|fail|success|error", evidence, re.I):
28
+ score += 0.2
29
+ if re.search(r"\d+", evidence):
30
+ score += 0.1
31
+ return min(score, 1.0)
32
+
33
+
34
+ def _extract_text_from_content(content: Any) -> str:
35
+ """从消息 content 中提取纯文本。"""
36
+ if isinstance(content, str):
37
+ return content
38
+ if isinstance(content, list):
39
+ return " ".join(
40
+ item.get("text", "")
41
+ for item in content
42
+ if isinstance(item, dict) and "text" in item
43
+ )
44
+ return ""
45
+
46
+
47
+ def extract(sessions: list[dict]) -> dict:
48
+ """从 session 列表中提取 Goal/Todo 质量统计。
49
+
50
+ 分析 Goal 完成率、任务拆分质量、Evidence 质量、Stall 频率、Token 消耗,
51
+ 以及 Todo 的完成率、放弃率等。
52
+
53
+ Args:
54
+ sessions: session JSONL 解析后的字典列表。
55
+
56
+ Returns:
57
+ 包含 goal_quality_stats 和 todo_stats 两个维度。
58
+ """
59
+ goals_total = 0
60
+ goals_completed = 0
61
+ goals_budget_limited = 0
62
+ goals_cancelled = 0
63
+ all_tasks: list[dict] = []
64
+ all_evidence: list[str] = []
65
+ stall_count = 0
66
+ total_tokens = 0
67
+
68
+ todo_total = 0
69
+ todo_completed = 0
70
+ todo_abandoned = 0
71
+
72
+ for session in sessions:
73
+ messages = session.get("messages", [])
74
+
75
+ for msg in messages:
76
+ # Goal state entries
77
+ if msg.get("customType") == "goal-state":
78
+ goals_total += 1
79
+ state = msg.get("data", {})
80
+ status = state.get("status", "")
81
+
82
+ if status == "complete":
83
+ goals_completed += 1
84
+ elif status == "budget_limited":
85
+ goals_budget_limited += 1
86
+ elif status == "cancelled":
87
+ goals_cancelled += 1
88
+
89
+ tasks = state.get("tasks", [])
90
+ for task in tasks:
91
+ all_tasks.append(task)
92
+ evidence = task.get("evidence", "")
93
+ if evidence:
94
+ all_evidence.append(evidence)
95
+
96
+ stall_count += state.get("stallCount", 0)
97
+ total_tokens += state.get("tokensUsed", 0)
98
+
99
+ # Todo tool calls
100
+ if (
101
+ msg.get("role") == "toolResult"
102
+ and msg.get("toolName") == "todo"
103
+ ):
104
+ content = _extract_text_from_content(msg.get("content", ""))
105
+
106
+ # 解析 todo 操作
107
+ if "add" in content.lower() or "添加" in content:
108
+ todo_total += 1
109
+ if "completed" in content.lower() or "完成" in content:
110
+ todo_completed += 1
111
+ if "delete" in content.lower() or "删除" in content:
112
+ todo_abandoned += 1
113
+
114
+ # 任务统计
115
+ total_tasks = len(all_tasks)
116
+ completed_tasks = sum(1 for t in all_tasks if t.get("status") == "completed")
117
+ cancelled_tasks = sum(1 for t in all_tasks if t.get("status") == "cancelled")
118
+ pending_tasks = sum(1 for t in all_tasks if t.get("status") == "pending")
119
+
120
+ # Evidence 统计
121
+ tasks_with_evidence = len(all_evidence)
122
+ evidence_scores = [score_evidence(e) for e in all_evidence]
123
+ avg_evidence_score = sum(evidence_scores) / max(len(evidence_scores), 1)
124
+ low_quality_count = sum(1 for s in evidence_scores if s < 0.4)
125
+
126
+ return {
127
+ "goals_total": goals_total,
128
+ "goals_completed": goals_completed,
129
+ "goals_budget_limited": goals_budget_limited,
130
+ "goals_cancelled": goals_cancelled,
131
+ "completion_rate": goals_completed / max(goals_total, 1),
132
+ "avg_tasks_per_goal": total_tasks / max(goals_total, 1),
133
+ "task_stats": {
134
+ "total": total_tasks,
135
+ "completed": completed_tasks,
136
+ "cancelled": cancelled_tasks,
137
+ "pending": pending_tasks,
138
+ "completion_rate": completed_tasks / max(total_tasks, 1),
139
+ "cancel_rate": cancelled_tasks / max(total_tasks, 1),
140
+ },
141
+ "evidence_stats": {
142
+ "tasks_with_evidence": tasks_with_evidence,
143
+ "evidence_rate": tasks_with_evidence / max(total_tasks, 1),
144
+ "avg_evidence_score": avg_evidence_score,
145
+ "low_quality_evidence_count": low_quality_count,
146
+ },
147
+ "stall_stats": {
148
+ "goals_with_stall": 1 if stall_count > 0 else 0,
149
+ "stall_rate": (1 if stall_count > 0 else 0) / max(goals_total, 1),
150
+ "avg_stall_count": stall_count / max(goals_total, 1),
151
+ },
152
+ "token_stats": {
153
+ "avg_tokens_per_goal": total_tokens / max(goals_total, 1),
154
+ "avg_tokens_per_task": total_tokens / max(total_tasks, 1),
155
+ },
156
+ "todo_stats": {
157
+ "total_todos": todo_total,
158
+ "completed": todo_completed,
159
+ "abandoned": todo_abandoned,
160
+ "completion_rate": todo_completed / max(todo_total, 1),
161
+ "abandon_rate": todo_abandoned / max(todo_total, 1),
162
+ },
163
+ }