beeops 0.1.4 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/bin/beeops.js +22 -32
- package/contexts/agent-modes.json +8 -19
- package/contexts/en/agent-modes.json +8 -19
- package/contexts/en/worker-base.md +1 -11
- package/contexts/ja/agent-modes.json +8 -19
- package/contexts/ja/worker-base.md +1 -11
- package/contexts/worker-base.md +1 -11
- package/package.json +1 -1
- package/contexts/en/fb.md +0 -15
- package/contexts/en/log.md +0 -16
- package/contexts/fb.md +0 -15
- package/contexts/ja/fb.md +0 -15
- package/contexts/ja/log.md +0 -17
- package/contexts/log.md +0 -16
- package/hooks/checkpoint.py +0 -89
- package/hooks/resolve-log-path.py +0 -93
- package/hooks/run-log.py +0 -429
- package/skills/bo-log-writer/SKILL.md +0 -101
- package/skills/bo-self-improver/SKILL.md +0 -145
- package/skills/bo-self-improver/refs/agent-manager.md +0 -61
- package/skills/bo-self-improver/refs/command-manager.md +0 -46
- package/skills/bo-self-improver/refs/skill-manager.md +0 -59
- package/skills/bo-self-improver/scripts/analyze.py +0 -359
- /package/hooks/{prompt-context.py → bo-prompt-context.py} +0 -0
|
@@ -1,145 +0,0 @@
|
|
|
1
|
-
---
|
|
2
|
-
name: bo-self-improver
|
|
3
|
-
description: Analyze accumulated log JSONL to automatically improve skills, commands, and agents. Runs automatically on session exit.
|
|
4
|
-
argument-hint: ["scan" or date]
|
|
5
|
-
---
|
|
6
|
-
|
|
7
|
-
# bo-self-improver: Self-Improvement
|
|
8
|
-
|
|
9
|
-
Analyze accumulated log JSONL to improve skills, commands, and agents.
|
|
10
|
-
|
|
11
|
-
## Scan Targets
|
|
12
|
-
|
|
13
|
-
Resources exist in two layers: global and project. **Scan both.**
|
|
14
|
-
|
|
15
|
-
| Target | Global (`~/.claude/`) | Project (`.claude/`) |
|
|
16
|
-
|--------|--------------------------|--------------------------|
|
|
17
|
-
| Skills | `~/.claude/skills/` | `.claude/skills/` |
|
|
18
|
-
| Commands | `~/.claude/commands/` | `.claude/commands/` |
|
|
19
|
-
| Agents | `~/.claude/agents/` | `.claude/agents/` |
|
|
20
|
-
|
|
21
|
-
## Procedure
|
|
22
|
-
|
|
23
|
-
### 1. Run Analysis Script (Automates Steps 1-4)
|
|
24
|
-
|
|
25
|
-
```bash
|
|
26
|
-
python3 .claude/skills/bo-self-improver/scripts/analyze.py
|
|
27
|
-
```
|
|
28
|
-
|
|
29
|
-
The script performs all of the following in batch and outputs JSON:
|
|
30
|
-
- Log path resolution (via `resolve-log-path.py`)
|
|
31
|
-
- Cursor management + analysis mode determination
|
|
32
|
-
- Diff log extraction
|
|
33
|
-
- Resource usage frequency tallying (skills_used / agents_used / commands_used)
|
|
34
|
-
- Rule-based agent gap detection
|
|
35
|
-
- Error-skill correlation analysis (effectiveness)
|
|
36
|
-
|
|
37
|
-
**Output JSON structure:**
|
|
38
|
-
|
|
39
|
-
```json
|
|
40
|
-
{
|
|
41
|
-
"status": { "total", "cursor", "new_lines", "mode" },
|
|
42
|
-
"frequency": {
|
|
43
|
-
"counts": { "skills": {}, "agents": {}, "commands": {} },
|
|
44
|
-
"classification": { "skills": { "high", "low", "unused" }, ... }
|
|
45
|
-
},
|
|
46
|
-
"all_resources": { "skills": [], "agents": [], "commands": [] },
|
|
47
|
-
"agent_gaps": { "agent_name": { "missed": N, "examples": [...] } },
|
|
48
|
-
"effectiveness": {
|
|
49
|
-
"total_entries", "error_entries",
|
|
50
|
-
"repeated_error_tags", "skills_with_errors", "effective_skills"
|
|
51
|
-
},
|
|
52
|
-
"entries": [{ "title", "category", "has_errors", "learnings", "patterns" }]
|
|
53
|
-
}
|
|
54
|
-
```
|
|
55
|
-
|
|
56
|
-
- `status.mode == "no_new"` → "No new logs" — exit
|
|
57
|
-
- `status.mode == "diff+full"` → In addition to diff analysis, also read `$LOG_BASE/self-improve/*.md` for long-term trends
|
|
58
|
-
|
|
59
|
-
### 2. Skill Gap Analysis (LLM Judgment)
|
|
60
|
-
|
|
61
|
-
The script only detects rule-based agent gaps. **Skill gaps must be judged by the LLM.**
|
|
62
|
-
|
|
63
|
-
```
|
|
64
|
-
Procedure:
|
|
65
|
-
1. Review entries in the output JSON
|
|
66
|
-
2. From each entry's category + learnings, infer "which skill should have been used"
|
|
67
|
-
3. Compare with actual skills_used
|
|
68
|
-
4. Tally the gaps
|
|
69
|
-
```
|
|
70
|
-
|
|
71
|
-
### 3. Reduction Evaluation (LLM Judgment)
|
|
72
|
-
|
|
73
|
-
Steps 1-2 produce "add/update" candidates, but **evaluate reductions first**. Prioritize reduction over addition.
|
|
74
|
-
|
|
75
|
-
| Check Item | Criteria | Action |
|
|
76
|
-
|---|---|---|
|
|
77
|
-
| Unused resources | 0 usage in frequency + similar resource exists | Merge or delete |
|
|
78
|
-
| Highly duplicated | Two skills with 80%+ content overlap | Merge into one |
|
|
79
|
-
| Bloated content | 15+ checklist items, or SKILL.md > 200 lines | Remove items / split to references |
|
|
80
|
-
| Out-of-date content | Content doesn't match current code | Update to match or delete section |
|
|
81
|
-
| Verbose description | Description > 2 lines, or repeats common knowledge | Simplify |
|
|
82
|
-
|
|
83
|
-
**Required output**: Must explicitly state one of:
|
|
84
|
-
- Reduction candidates found → List specific resource names and reduction details
|
|
85
|
-
- No reduction targets → State rationale in one line (e.g., "All skills used within past 2 weeks, no duplicates")
|
|
86
|
-
|
|
87
|
-
### 4. Execute Resource Improvements (LLM Judgment)
|
|
88
|
-
|
|
89
|
-
Based on Steps 1-3, improve resources following the reference documents below.
|
|
90
|
-
**Execute reductions (Step 3) before additions/updates.**
|
|
91
|
-
|
|
92
|
-
| Target | Reference Document | Content |
|
|
93
|
-
|--------|-------------------|---------|
|
|
94
|
-
| Skills | `refs/skill-manager.md` | Creation/merge/deletion criteria, naming rules |
|
|
95
|
-
| Commands | `refs/command-manager.md` | Creation/update/merge/deletion criteria, format |
|
|
96
|
-
| Agents | `refs/agent-manager.md` | Creation/merge/deletion criteria, format |
|
|
97
|
-
|
|
98
|
-
After executing improvements, record to `log.jsonl` via bo-log-writer. Include creation/update details in the `resources_created` field.
|
|
99
|
-
|
|
100
|
-
### 5. Update Cursor
|
|
101
|
-
|
|
102
|
-
```bash
|
|
103
|
-
python3 .claude/skills/bo-self-improver/scripts/analyze.py --update-cursor
|
|
104
|
-
```
|
|
105
|
-
|
|
106
|
-
### 6. Persist Analysis Results & Record to Log
|
|
107
|
-
|
|
108
|
-
1. **Persist analysis results**: Save to `$LOG_BASE/self-improve/{YYYY-MM-DD}.md`
|
|
109
|
-
|
|
110
|
-
```markdown
|
|
111
|
-
# {YYYY-MM-DD} self-improve analysis results
|
|
112
|
-
|
|
113
|
-
## Analysis Scope
|
|
114
|
-
- Log lines: {cursor+1} to {total} ({new_lines} lines)
|
|
115
|
-
- Mode: diff analysis / diff+full analysis
|
|
116
|
-
|
|
117
|
-
## Frequency
|
|
118
|
-
| Resource Type | Name | Usage Count | Classification |
|
|
119
|
-
|---|---|---|---|
|
|
120
|
-
|
|
121
|
-
## Gaps
|
|
122
|
-
| Resource Type | Name | Missed Count | Representative Miss Pattern |
|
|
123
|
-
|---|---|---|---|
|
|
124
|
-
|
|
125
|
-
## Reduction Evaluation
|
|
126
|
-
| Target Resource | Decision | Rationale |
|
|
127
|
-
|---|---|---|
|
|
128
|
-
|
|
129
|
-
*Even if no reduction targets, state the rationale*
|
|
130
|
-
|
|
131
|
-
## Effectiveness
|
|
132
|
-
| Pattern | Details |
|
|
133
|
-
|---|---|
|
|
134
|
-
|
|
135
|
-
## Improvement Actions Taken
|
|
136
|
-
- ...
|
|
137
|
-
```
|
|
138
|
-
|
|
139
|
-
2. **Log recording**: Record improvement actions to `log.jsonl` via bo-log-writer skill format.
|
|
140
|
-
|
|
141
|
-
## Rules
|
|
142
|
-
|
|
143
|
-
- Execute all creation, update, deletion, merge, and split actions automatically, leaving a log
|
|
144
|
-
- Never delete log JSONL (permanent storage)
|
|
145
|
-
- Do not end with "text edits only". Always execute frequency analysis and duplication analysis, recording results
|
|
@@ -1,61 +0,0 @@
|
|
|
1
|
-
# エージェント管理 判定基準
|
|
2
|
-
|
|
3
|
-
bo-self-improver がエージェントを作成・更新・統合・削除する際の判定基準。
|
|
4
|
-
|
|
5
|
-
## 新規作成
|
|
6
|
-
|
|
7
|
-
| 信号 | 例 | アクション |
|
|
8
|
-
|------|-----|-----------|
|
|
9
|
-
| メインコンテキストが溢れる | 大量のファイル読み込みが必要な調査 | 専門エージェントに切り出し |
|
|
10
|
-
| 並行実行が有効 | レビューしながらテスト実行 | 独立エージェント化 |
|
|
11
|
-
| 専門知識が深い | DB最適化、セキュリティ監査 | 専門エージェント化 |
|
|
12
|
-
| 頻出する作業パターンに対応エージェントがない | 利用頻度分析で検出 | 新規作成 |
|
|
13
|
-
|
|
14
|
-
## 統合・削除
|
|
15
|
-
|
|
16
|
-
| 信号 | 例 | アクション |
|
|
17
|
-
|------|-----|-----------|
|
|
18
|
-
| agents_used に一度も登場しない | 利用頻度0 | 他エージェントへの統合を検討 |
|
|
19
|
-
| 役割が80%以上重複する2エージェント | A と B がほぼ同じ作業 | 1つに統合 |
|
|
20
|
-
| ギャップ分析で見逃し頻度が高い | 使うべき場面で使われていない | description / 発動タイミングを改善 |
|
|
21
|
-
|
|
22
|
-
## エージェントとスキルの使い分け
|
|
23
|
-
|
|
24
|
-
| 種類 | 用途 | 実行方式 |
|
|
25
|
-
|------|------|----------|
|
|
26
|
-
| エージェント(agents/) | 独立コンテキストでの専門作業 | Task tool でサブプロセス実行 |
|
|
27
|
-
| スキル(skills/) | メインコンテキスト内での知識参照 | Skill tool で直接ロード |
|
|
28
|
-
|
|
29
|
-
**判定**: 「独立コンテキストが必要か?」→ Yes = エージェント、No = スキル
|
|
30
|
-
|
|
31
|
-
## フォーマット
|
|
32
|
-
|
|
33
|
-
```markdown
|
|
34
|
-
---
|
|
35
|
-
name: { エージェント名 }
|
|
36
|
-
description: { 1行の説明 }
|
|
37
|
-
tools: ['Read', 'Write', 'Edit', 'Bash', 'Grep', 'Glob']
|
|
38
|
-
model: { sonnet or opus }
|
|
39
|
-
---
|
|
40
|
-
|
|
41
|
-
# {エージェント名}
|
|
42
|
-
|
|
43
|
-
## 役割
|
|
44
|
-
|
|
45
|
-
{このエージェントが何をするか}
|
|
46
|
-
|
|
47
|
-
## 手順
|
|
48
|
-
|
|
49
|
-
1. ...
|
|
50
|
-
2. ...
|
|
51
|
-
|
|
52
|
-
## ルール
|
|
53
|
-
|
|
54
|
-
- ...
|
|
55
|
-
```
|
|
56
|
-
|
|
57
|
-
## 注意事項
|
|
58
|
-
|
|
59
|
-
- エージェント名はケバブケース(例: `build-error-resolver`, `code-reviewer`)
|
|
60
|
-
- `tools` は必要最小限に制限する(セキュリティ原則)
|
|
61
|
-
- `model` は作業の複雑さに応じて選択(単純=sonnet, 複雑=opus)
|
|
@@ -1,46 +0,0 @@
|
|
|
1
|
-
# コマンド管理 判定基準
|
|
2
|
-
|
|
3
|
-
bo-self-improver がコマンドを作成・更新・統合・削除する際の判定基準。
|
|
4
|
-
|
|
5
|
-
## 新規作成
|
|
6
|
-
|
|
7
|
-
| 信号 | 例 | アクション |
|
|
8
|
-
|------|-----|-----------|
|
|
9
|
-
| log の `patterns` に同手順が3回以上 | 「テスト→ビルド→デプロイ」が繰り返し | コマンド化 |
|
|
10
|
-
| 「〜して」が2回以上登場 | 「テスト実行して」「ビルドチェックして」 | コマンド化を検討 |
|
|
11
|
-
| ユーザーが手順を説明している | 「まずdiffを見て、次にテストして...」 | ワークフローコマンド化 |
|
|
12
|
-
|
|
13
|
-
## 更新・統合・削除
|
|
14
|
-
|
|
15
|
-
| 信号 | 例 | アクション |
|
|
16
|
-
|------|-----|-----------|
|
|
17
|
-
| 既存コマンドに足りない手順がある | /check に console.log チェックがない | 既存コマンドを更新 |
|
|
18
|
-
| commands_used に一度も登場しない | 利用頻度0 | 統合・削除を検討 |
|
|
19
|
-
| 80%以上重複する2コマンド | /check と /build が大部分同じ | 1つに統合 |
|
|
20
|
-
|
|
21
|
-
## コマンドとスキルの使い分け
|
|
22
|
-
|
|
23
|
-
| 種類 | 用途 | 例 |
|
|
24
|
-
|------|------|----|
|
|
25
|
-
| コマンド(commands/) | ユーザーが `/名前` で直接呼ぶ定型作業 | `/check`, `/commit`, `/review` |
|
|
26
|
-
| スキル(skills/) | 状況に応じて自動発動する専門知識 | `gen-security-checklist`, `proj-ref-task` |
|
|
27
|
-
|
|
28
|
-
**判定**: 「ユーザーが能動的に呼ぶか?」→ Yes = コマンド、No = スキル
|
|
29
|
-
|
|
30
|
-
## フォーマット
|
|
31
|
-
|
|
32
|
-
```markdown
|
|
33
|
-
# コマンドの説明
|
|
34
|
-
|
|
35
|
-
$ARGUMENTS を使って引数を受け取れる。
|
|
36
|
-
|
|
37
|
-
## 手順
|
|
38
|
-
|
|
39
|
-
1. ステップ1
|
|
40
|
-
2. ステップ2
|
|
41
|
-
...
|
|
42
|
-
```
|
|
43
|
-
|
|
44
|
-
## 注意事項
|
|
45
|
-
|
|
46
|
-
- コマンド名はケバブケース(例: `organize-logs`, `create-pr`)
|
|
@@ -1,59 +0,0 @@
|
|
|
1
|
-
# スキル管理 判定基準
|
|
2
|
-
|
|
3
|
-
bo-self-improver がスキルを作成・更新・統合・削除する際の判定基準。
|
|
4
|
-
|
|
5
|
-
## 新規作成
|
|
6
|
-
|
|
7
|
-
| 信号 | 例 | アクション |
|
|
8
|
-
|------|-----|-----------|
|
|
9
|
-
| 同じ learning/pattern が log に2回以上 | 「sleep → availableFrom 置換」が複数回言及 | 新規スキル作成 |
|
|
10
|
-
| 同じ error+solution が2回以上 | prisma enum 追加漏れが繰り返し | 既存スキルに追記 or 独立スキル作成 |
|
|
11
|
-
| ユーザーが方針転換した | 「N日後は escalation の仕事」→ 設計原則 | 既存スキル(proj-ref-*)に追記 |
|
|
12
|
-
| 新しいライブラリ・API の使い方が蓄積 | Inngest step API の注意点 | `stack-*` に追記 or 新規作成 |
|
|
13
|
-
|
|
14
|
-
## 統合・削除
|
|
15
|
-
|
|
16
|
-
| 信号 | 例 | アクション |
|
|
17
|
-
|------|-----|-----------|
|
|
18
|
-
| skills_used に一度も登場しない | 利用頻度0 | 類似スキルへの統合を検討。根拠を記録 |
|
|
19
|
-
| 内容が80%以上重複する2スキル | dev-X と dev-Y がほぼ同じ | 1つに統合し、元を削除 |
|
|
20
|
-
| 内容が実装と乖離 | 記載と実コードが不一致 | 実装に合わせて更新 or 削除 |
|
|
21
|
-
| ギャップ分析で見逃し頻度が高い | 使うべき場面で使われていない | description / 発動条件を改善 |
|
|
22
|
-
| 有効性分析でエラー防止に寄与していない | 使ったのにエラー発生 | チェックリスト項目を追加 |
|
|
23
|
-
|
|
24
|
-
## 命名規則
|
|
25
|
-
|
|
26
|
-
3レイヤールール:
|
|
27
|
-
|
|
28
|
-
| プレフィックス | 対象 |
|
|
29
|
-
|---------------|------|
|
|
30
|
-
| `dev-` | 開発プラクティス・エラー解決・設計 |
|
|
31
|
-
| `review-` | コードレビューチェックリスト |
|
|
32
|
-
| `web-` | UI/UX デザイン・心理学・ツールキット |
|
|
33
|
-
| `meta-` | メタ管理(自己改善・ログ・ルーティング) |
|
|
34
|
-
| `sync-` | 同期(ドキュメント・ナレッジ) |
|
|
35
|
-
| `stack-` | 特定ライブラリ/フレームワーク |
|
|
36
|
-
| `proj-` | 現プロジェクト専用 |
|
|
37
|
-
|
|
38
|
-
スキル名は `{prefix}-{カテゴリ}-{対象}` の形式(例: `dev-error-resolver`, `review-security`, `web-ui-toolkit`, `stack-inngest`, `proj-ref-task`)。
|
|
39
|
-
|
|
40
|
-
## フォーマット
|
|
41
|
-
|
|
42
|
-
```markdown
|
|
43
|
-
---
|
|
44
|
-
name: { スキル名 }
|
|
45
|
-
description: { 1行の説明 }
|
|
46
|
-
argument-hint: { 引数のヒント(任意) }
|
|
47
|
-
---
|
|
48
|
-
|
|
49
|
-
# {スキル名}
|
|
50
|
-
|
|
51
|
-
## 内容
|
|
52
|
-
|
|
53
|
-
{知識・チェックリスト・ガイドライン等}
|
|
54
|
-
```
|
|
55
|
-
|
|
56
|
-
## 注意事項
|
|
57
|
-
|
|
58
|
-
- `dev-tool-skill-creator` のフォーマット(SKILL.md + frontmatter)に従う
|
|
59
|
-
- README.md の更新も忘れずに行う
|
|
@@ -1,359 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
"""Analyze log.jsonl for self-improvement structured reports.
|
|
3
|
-
|
|
4
|
-
Performs cursor management, diff extraction, frequency tallying, agent gap detection,
|
|
5
|
-
and effectiveness analysis in batch, outputting JSON for the LLM to make improvement decisions.
|
|
6
|
-
|
|
7
|
-
Usage:
|
|
8
|
-
python3 analyze.py # Auto-detect log path
|
|
9
|
-
python3 analyze.py --log-base PATH # Explicit log path
|
|
10
|
-
python3 analyze.py --update-cursor # Update cursor after analysis
|
|
11
|
-
"""
|
|
12
|
-
|
|
13
|
-
import json
|
|
14
|
-
import subprocess
|
|
15
|
-
import sys
|
|
16
|
-
from collections import Counter
|
|
17
|
-
from datetime import date, datetime
|
|
18
|
-
from pathlib import Path
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
# ---------------------------------------------------------------------------
|
|
22
|
-
# Log path resolution (delegate to resolve-log-path.py)
|
|
23
|
-
# ---------------------------------------------------------------------------
|
|
24
|
-
|
|
25
|
-
def _find_resolve_script() -> Path | None:
|
|
26
|
-
"""Find resolve-log-path.py via multiple strategies."""
|
|
27
|
-
import os
|
|
28
|
-
|
|
29
|
-
# Strategy 1: BO_CONTEXTS_DIR env var
|
|
30
|
-
ctx_dir = os.environ.get("BO_CONTEXTS_DIR")
|
|
31
|
-
if ctx_dir:
|
|
32
|
-
candidate = Path(ctx_dir).parent / "hooks" / "resolve-log-path.py"
|
|
33
|
-
if candidate.exists():
|
|
34
|
-
return candidate
|
|
35
|
-
|
|
36
|
-
# Strategy 2: Relative to this script (package layout)
|
|
37
|
-
candidate = Path(__file__).resolve().parent.parent.parent / "hooks" / "resolve-log-path.py"
|
|
38
|
-
if candidate.exists():
|
|
39
|
-
return candidate
|
|
40
|
-
|
|
41
|
-
# Strategy 3: require.resolve
|
|
42
|
-
try:
|
|
43
|
-
pkg_dir = subprocess.run(
|
|
44
|
-
["node", "-e", "console.log(require.resolve('beeops/package.json').replace('/package.json',''))"],
|
|
45
|
-
capture_output=True, text=True, check=True,
|
|
46
|
-
).stdout.strip()
|
|
47
|
-
candidate = Path(pkg_dir) / "hooks" / "resolve-log-path.py"
|
|
48
|
-
if candidate.exists():
|
|
49
|
-
return candidate
|
|
50
|
-
except Exception:
|
|
51
|
-
pass
|
|
52
|
-
|
|
53
|
-
return None
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
def resolve_log_base() -> Path:
|
|
57
|
-
resolver = _find_resolve_script()
|
|
58
|
-
if resolver:
|
|
59
|
-
result = subprocess.run(
|
|
60
|
-
[sys.executable, str(resolver)],
|
|
61
|
-
capture_output=True, text=True, check=True
|
|
62
|
-
)
|
|
63
|
-
return Path(result.stdout.strip())
|
|
64
|
-
# Fallback
|
|
65
|
-
return Path.cwd() / ".claude" / "beeops" / "logs"
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
# ---------------------------------------------------------------------------
|
|
69
|
-
# Step 1: Status check & analysis mode determination
|
|
70
|
-
# ---------------------------------------------------------------------------
|
|
71
|
-
|
|
72
|
-
def check_status(log_base: Path) -> dict:
|
|
73
|
-
log_file = log_base / "log.jsonl"
|
|
74
|
-
cursor_file = log_base / "self-improve/.cursor"
|
|
75
|
-
last_full_file = log_base / "self-improve/.last-full"
|
|
76
|
-
|
|
77
|
-
if not log_file.exists():
|
|
78
|
-
return {"total": 0, "cursor": 0, "new_lines": 0, "mode": "no_log"}
|
|
79
|
-
|
|
80
|
-
total = sum(1 for _ in open(log_file, encoding="utf-8"))
|
|
81
|
-
cursor = int(cursor_file.read_text().strip()) if cursor_file.exists() else 0
|
|
82
|
-
new_lines = total - cursor
|
|
83
|
-
|
|
84
|
-
if new_lines <= 0:
|
|
85
|
-
return {"total": total, "cursor": cursor, "new_lines": 0, "mode": "no_new"}
|
|
86
|
-
|
|
87
|
-
last_full = last_full_file.read_text().strip() if last_full_file.exists() else "1970-01-01"
|
|
88
|
-
try:
|
|
89
|
-
last_full_date = datetime.strptime(last_full, "%Y-%m-%d").date()
|
|
90
|
-
days_since_full = (date.today() - last_full_date).days
|
|
91
|
-
except ValueError:
|
|
92
|
-
days_since_full = 999
|
|
93
|
-
|
|
94
|
-
mode = "diff"
|
|
95
|
-
if new_lines >= 50 or days_since_full >= 5:
|
|
96
|
-
mode = "diff+full"
|
|
97
|
-
|
|
98
|
-
return {
|
|
99
|
-
"total": total,
|
|
100
|
-
"cursor": cursor,
|
|
101
|
-
"new_lines": new_lines,
|
|
102
|
-
"mode": mode,
|
|
103
|
-
"last_full": last_full,
|
|
104
|
-
"days_since_full": days_since_full,
|
|
105
|
-
}
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
# ---------------------------------------------------------------------------
|
|
109
|
-
# Diff entry extraction
|
|
110
|
-
# ---------------------------------------------------------------------------
|
|
111
|
-
|
|
112
|
-
def get_diff_entries(log_base: Path, cursor: int) -> list[dict]:
|
|
113
|
-
"""Get new log entries after the cursor."""
|
|
114
|
-
log_file = log_base / "log.jsonl"
|
|
115
|
-
entries = []
|
|
116
|
-
with open(log_file, encoding="utf-8") as f:
|
|
117
|
-
for i, line in enumerate(f, 1):
|
|
118
|
-
if i <= cursor:
|
|
119
|
-
continue
|
|
120
|
-
try:
|
|
121
|
-
entries.append(json.loads(line))
|
|
122
|
-
except json.JSONDecodeError:
|
|
123
|
-
continue
|
|
124
|
-
return entries
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
# ---------------------------------------------------------------------------
|
|
128
|
-
# Step 2: Frequency analysis
|
|
129
|
-
# ---------------------------------------------------------------------------
|
|
130
|
-
|
|
131
|
-
def list_resources() -> dict[str, list[str]]:
|
|
132
|
-
"""Get existing resource lists from both global and project locations."""
|
|
133
|
-
result: dict[str, set[str]] = {"skills": set(), "agents": set(), "commands": set()}
|
|
134
|
-
|
|
135
|
-
for base in [Path.home() / ".claude", Path.cwd() / ".claude"]:
|
|
136
|
-
skills_dir = base / "skills"
|
|
137
|
-
if skills_dir.exists():
|
|
138
|
-
for d in skills_dir.iterdir():
|
|
139
|
-
if d.is_dir() and (d / "SKILL.md").exists():
|
|
140
|
-
result["skills"].add(d.name)
|
|
141
|
-
agents_dir = base / "agents"
|
|
142
|
-
if agents_dir.exists():
|
|
143
|
-
for f in agents_dir.glob("*.md"):
|
|
144
|
-
if f.name != "README.md":
|
|
145
|
-
result["agents"].add(f.stem)
|
|
146
|
-
commands_dir = base / "commands"
|
|
147
|
-
if commands_dir.exists():
|
|
148
|
-
for f in commands_dir.glob("*.md"):
|
|
149
|
-
if f.name != "README.md":
|
|
150
|
-
result["commands"].add(f.stem)
|
|
151
|
-
|
|
152
|
-
return {k: sorted(v) for k, v in result.items()}
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
def analyze_frequency(entries: list[dict], all_resources: dict) -> dict:
|
|
156
|
-
"""Tally skills_used / agents_used / commands_used in diff logs."""
|
|
157
|
-
counts: dict[str, Counter] = {
|
|
158
|
-
"skills": Counter(),
|
|
159
|
-
"agents": Counter(),
|
|
160
|
-
"commands": Counter(),
|
|
161
|
-
}
|
|
162
|
-
|
|
163
|
-
for entry in entries:
|
|
164
|
-
for s in entry.get("skills_used", []):
|
|
165
|
-
counts["skills"][s] += 1
|
|
166
|
-
for a in entry.get("agents_used", []):
|
|
167
|
-
counts["agents"][a] += 1
|
|
168
|
-
for c in entry.get("commands_used", []):
|
|
169
|
-
counts["commands"][c] += 1
|
|
170
|
-
|
|
171
|
-
def classify(counter: Counter, all_names: list[str]) -> dict:
|
|
172
|
-
used = set(counter.keys())
|
|
173
|
-
return {
|
|
174
|
-
"high": sorted(n for n in used if counter[n] >= 3),
|
|
175
|
-
"low": sorted(n for n in used if 0 < counter[n] < 3),
|
|
176
|
-
"unused": sorted(n for n in all_names if n not in used),
|
|
177
|
-
}
|
|
178
|
-
|
|
179
|
-
return {
|
|
180
|
-
"counts": {k: dict(v.most_common()) for k, v in counts.items()},
|
|
181
|
-
"classification": {
|
|
182
|
-
k: classify(counts[k], all_resources[k]) for k in counts
|
|
183
|
-
},
|
|
184
|
-
}
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
# ---------------------------------------------------------------------------
|
|
188
|
-
# Step 3: Agent gap analysis (rule-based)
|
|
189
|
-
# ---------------------------------------------------------------------------
|
|
190
|
-
|
|
191
|
-
AGENT_RULES = [
|
|
192
|
-
{
|
|
193
|
-
"agent": "code-reviewer",
|
|
194
|
-
"match": lambda e: (
|
|
195
|
-
"review" in e.get("category", "").lower()
|
|
196
|
-
or e.get("category", "").lower() in ("implementation", "bugfix")
|
|
197
|
-
),
|
|
198
|
-
},
|
|
199
|
-
{
|
|
200
|
-
"agent": "build-error-resolver",
|
|
201
|
-
"match": lambda e: (
|
|
202
|
-
bool(e.get("errors"))
|
|
203
|
-
and any(
|
|
204
|
-
t in tag
|
|
205
|
-
for err in e.get("errors", [])
|
|
206
|
-
for tag in err.get("tags", [])
|
|
207
|
-
for t in ("build", "type", "prisma", "tsc")
|
|
208
|
-
)
|
|
209
|
-
),
|
|
210
|
-
},
|
|
211
|
-
{
|
|
212
|
-
"agent": "database-reviewer",
|
|
213
|
-
"match": lambda e: any(
|
|
214
|
-
"prisma" in c.get("file", "") or "migration" in c.get("file", "")
|
|
215
|
-
for c in e.get("changes", [])
|
|
216
|
-
),
|
|
217
|
-
},
|
|
218
|
-
{
|
|
219
|
-
"agent": "refactor-cleaner",
|
|
220
|
-
"match": lambda e: "refactor" in e.get("category", "").lower(),
|
|
221
|
-
},
|
|
222
|
-
{
|
|
223
|
-
"agent": "planner",
|
|
224
|
-
"match": lambda e: (
|
|
225
|
-
"Phase" in e.get("title", "")
|
|
226
|
-
or len(e.get("changes", [])) >= 8
|
|
227
|
-
),
|
|
228
|
-
},
|
|
229
|
-
{
|
|
230
|
-
"agent": "security-reviewer",
|
|
231
|
-
"match": lambda e: any(
|
|
232
|
-
any(kw in c.get("file", "").lower() for kw in ("auth", "security", "rls"))
|
|
233
|
-
for c in e.get("changes", [])
|
|
234
|
-
),
|
|
235
|
-
},
|
|
236
|
-
]
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
def analyze_agent_gaps(entries: list[dict]) -> dict:
|
|
240
|
-
"""Detect agents that should have been used but weren't."""
|
|
241
|
-
gaps: Counter = Counter()
|
|
242
|
-
gap_examples: dict[str, list[str]] = {}
|
|
243
|
-
|
|
244
|
-
for entry in entries:
|
|
245
|
-
actual = set(entry.get("agents_used", []))
|
|
246
|
-
for rule in AGENT_RULES:
|
|
247
|
-
if rule["match"](entry) and rule["agent"] not in actual:
|
|
248
|
-
agent = rule["agent"]
|
|
249
|
-
gaps[agent] += 1
|
|
250
|
-
gap_examples.setdefault(agent, [])
|
|
251
|
-
if len(gap_examples[agent]) < 5:
|
|
252
|
-
gap_examples[agent].append(entry.get("title", "?"))
|
|
253
|
-
|
|
254
|
-
return {
|
|
255
|
-
agent: {"missed": count, "examples": gap_examples.get(agent, [])}
|
|
256
|
-
for agent, count in gaps.most_common()
|
|
257
|
-
}
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
# ---------------------------------------------------------------------------
|
|
261
|
-
# Step 4: Effectiveness analysis
|
|
262
|
-
# ---------------------------------------------------------------------------
|
|
263
|
-
|
|
264
|
-
def analyze_effectiveness(entries: list[dict]) -> dict:
|
|
265
|
-
"""Analyze correlation between skill usage and error occurrence."""
|
|
266
|
-
error_entries = [e for e in entries if e.get("errors")]
|
|
267
|
-
no_error_entries = [e for e in entries if not e.get("errors")]
|
|
268
|
-
|
|
269
|
-
error_tags: Counter = Counter()
|
|
270
|
-
for entry in error_entries:
|
|
271
|
-
for err in entry.get("errors", []):
|
|
272
|
-
for tag in err.get("tags", []):
|
|
273
|
-
error_tags[tag] += 1
|
|
274
|
-
|
|
275
|
-
skills_with_errors: Counter = Counter()
|
|
276
|
-
for entry in error_entries:
|
|
277
|
-
for skill in entry.get("skills_used", []):
|
|
278
|
-
skills_with_errors[skill] += 1
|
|
279
|
-
|
|
280
|
-
effective_skills: Counter = Counter()
|
|
281
|
-
for entry in no_error_entries:
|
|
282
|
-
for skill in entry.get("skills_used", []):
|
|
283
|
-
effective_skills[skill] += 1
|
|
284
|
-
|
|
285
|
-
return {
|
|
286
|
-
"total_entries": len(entries),
|
|
287
|
-
"error_entries": len(error_entries),
|
|
288
|
-
"repeated_error_tags": {t: c for t, c in error_tags.most_common() if c >= 2},
|
|
289
|
-
"skills_with_errors": dict(skills_with_errors),
|
|
290
|
-
"effective_skills": dict(effective_skills),
|
|
291
|
-
}
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
# ---------------------------------------------------------------------------
|
|
295
|
-
# Cursor update
|
|
296
|
-
# ---------------------------------------------------------------------------
|
|
297
|
-
|
|
298
|
-
def update_cursor(log_base: Path, total: int, full_analysis: bool):
|
|
299
|
-
"""Write analyzed line number to cursor file."""
|
|
300
|
-
(log_base / "self-improve").mkdir(parents=True, exist_ok=True)
|
|
301
|
-
cursor_file = log_base / "self-improve/.cursor"
|
|
302
|
-
cursor_file.write_text(str(total) + "\n")
|
|
303
|
-
|
|
304
|
-
if full_analysis:
|
|
305
|
-
last_full_file = log_base / "self-improve/.last-full"
|
|
306
|
-
last_full_file.write_text(date.today().isoformat() + "\n")
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
# ---------------------------------------------------------------------------
|
|
310
|
-
# Main
|
|
311
|
-
# ---------------------------------------------------------------------------
|
|
312
|
-
|
|
313
|
-
def main():
|
|
314
|
-
import argparse
|
|
315
|
-
|
|
316
|
-
parser = argparse.ArgumentParser(description="log.jsonl self-improvement analysis")
|
|
317
|
-
parser.add_argument("--log-base", help="Explicit log directory path")
|
|
318
|
-
parser.add_argument("--update-cursor", action="store_true", help="Update cursor after analysis")
|
|
319
|
-
args = parser.parse_args()
|
|
320
|
-
|
|
321
|
-
log_base = Path(args.log_base) if args.log_base else resolve_log_base()
|
|
322
|
-
status = check_status(log_base)
|
|
323
|
-
|
|
324
|
-
if status["mode"] in ("no_log", "no_new"):
|
|
325
|
-
print(json.dumps({"status": status}, indent=2, ensure_ascii=False))
|
|
326
|
-
return
|
|
327
|
-
|
|
328
|
-
entries = get_diff_entries(log_base, status["cursor"])
|
|
329
|
-
all_resources = list_resources()
|
|
330
|
-
|
|
331
|
-
result = {
|
|
332
|
-
"status": status,
|
|
333
|
-
"frequency": analyze_frequency(entries, all_resources),
|
|
334
|
-
"all_resources": all_resources,
|
|
335
|
-
"agent_gaps": analyze_agent_gaps(entries),
|
|
336
|
-
"effectiveness": analyze_effectiveness(entries),
|
|
337
|
-
"entries": [
|
|
338
|
-
{
|
|
339
|
-
"title": e.get("title"),
|
|
340
|
-
"category": e.get("category"),
|
|
341
|
-
"has_errors": bool(e.get("errors")),
|
|
342
|
-
"skills_used": e.get("skills_used", []),
|
|
343
|
-
"agents_used": e.get("agents_used", []),
|
|
344
|
-
"learnings": e.get("learnings", []),
|
|
345
|
-
"patterns": e.get("patterns", []),
|
|
346
|
-
}
|
|
347
|
-
for e in entries
|
|
348
|
-
],
|
|
349
|
-
}
|
|
350
|
-
|
|
351
|
-
print(json.dumps(result, indent=2, ensure_ascii=False))
|
|
352
|
-
|
|
353
|
-
if args.update_cursor:
|
|
354
|
-
update_cursor(log_base, status["total"], status["mode"] == "diff+full")
|
|
355
|
-
print(f"\n# Cursor updated: {status['total']}", file=sys.stderr)
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
if __name__ == "__main__":
|
|
359
|
-
main()
|
|
File without changes
|