@rm0nroe/coach-claw 1.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +311 -0
- package/coach/README.md +99 -0
- package/coach/bin/aggregate_facets.py +274 -0
- package/coach/bin/analyze.py +678 -0
- package/coach/bin/bank.py +247 -0
- package/coach/bin/banner_themes.py +645 -0
- package/coach/bin/coach_paths.py +33 -0
- package/coach/bin/coexistence_check.py +129 -0
- package/coach/bin/configure.py +245 -0
- package/coach/bin/cron_check.py +81 -0
- package/coach/bin/default_statusline.py +135 -0
- package/coach/bin/doctor.py +663 -0
- package/coach/bin/insights-llm.sh +264 -0
- package/coach/bin/insights.sh +163 -0
- package/coach/bin/insights_window.py +111 -0
- package/coach/bin/marker_io.py +154 -0
- package/coach/bin/merge.py +671 -0
- package/coach/bin/redact.py +86 -0
- package/coach/bin/render_env.py +148 -0
- package/coach/bin/reward_hints.py +87 -0
- package/coach/bin/run-insights.sh +20 -0
- package/coach/bin/run_with_lock.py +85 -0
- package/coach/bin/scoring.py +260 -0
- package/coach/bin/skill_inventory.py +215 -0
- package/coach/bin/stats.py +459 -0
- package/coach/bin/status.py +293 -0
- package/coach/bin/statusline_self_patch.py +205 -0
- package/coach/bin/statusline_variants.py +146 -0
- package/coach/bin/statusline_wrap.py +244 -0
- package/coach/bin/statusline_wrap_action.py +460 -0
- package/coach/bin/switch_to_plugin.py +256 -0
- package/coach/bin/themes.py +256 -0
- package/coach/bin/user_config.py +176 -0
- package/coach/bin/xp_accounting.py +98 -0
- package/coach/changelog.md +4 -0
- package/coach/default-statusline-command.sh +19 -0
- package/coach/default-statusline-wrap-command.sh +15 -0
- package/coach/profile.yaml +37 -0
- package/coach/tests/conftest.py +13 -0
- package/coach/tests/test_aggregate_facets.py +379 -0
- package/coach/tests/test_analyze_aggregate.py +153 -0
- package/coach/tests/test_analyze_redaction.py +105 -0
- package/coach/tests/test_analyze_strengths.py +165 -0
- package/coach/tests/test_bank_atomic_write.py +61 -0
- package/coach/tests/test_bank_concurrency.py +126 -0
- package/coach/tests/test_banner_themes.py +981 -0
- package/coach/tests/test_celebrate_dedup.py +409 -0
- package/coach/tests/test_coach_paths.py +50 -0
- package/coach/tests/test_coexistence_check.py +128 -0
- package/coach/tests/test_configure.py +258 -0
- package/coach/tests/test_cron_check.py +118 -0
- package/coach/tests/test_cron_nudge_hook.py +134 -0
- package/coach/tests/test_detection_parity.py +105 -0
- package/coach/tests/test_doctor.py +595 -0
- package/coach/tests/test_hook_bespoke_dispatch.py +288 -0
- package/coach/tests/test_hook_module_resolution.py +116 -0
- package/coach/tests/test_hook_relevance.py +996 -0
- package/coach/tests/test_hook_render_env.py +364 -0
- package/coach/tests/test_hook_session_id_guard.py +160 -0
- package/coach/tests/test_insights_llm.py +759 -0
- package/coach/tests/test_insights_llm_venv_path.py +109 -0
- package/coach/tests/test_insights_window.py +237 -0
- package/coach/tests/test_install.py +1150 -0
- package/coach/tests/test_install_pyyaml_fallback.py +142 -0
- package/coach/tests/test_marker_consumption.py +167 -0
- package/coach/tests/test_marker_writer_locking.py +305 -0
- package/coach/tests/test_merge.py +413 -0
- package/coach/tests/test_no_broken_mktemp.py +90 -0
- package/coach/tests/test_render_env.py +137 -0
- package/coach/tests/test_render_env_glyphs.py +119 -0
- package/coach/tests/test_reward_hints.py +59 -0
- package/coach/tests/test_scoring.py +147 -0
- package/coach/tests/test_session_start_weekly_trigger.py +92 -0
- package/coach/tests/test_skill_inventory.py +368 -0
- package/coach/tests/test_stats_hybrid.py +142 -0
- package/coach/tests/test_status_accounting.py +41 -0
- package/coach/tests/test_statusline_failsafe.py +70 -0
- package/coach/tests/test_statusline_self_patch.py +261 -0
- package/coach/tests/test_statusline_variants.py +110 -0
- package/coach/tests/test_statusline_wrap.py +196 -0
- package/coach/tests/test_statusline_wrap_action.py +408 -0
- package/coach/tests/test_switch_to_plugin.py +360 -0
- package/coach/tests/test_themes.py +104 -0
- package/coach/tests/test_user_config.py +160 -0
- package/coach/tests/test_wrap_announce_hook.py +130 -0
- package/coach/tests/test_xp_accounting.py +55 -0
- package/hooks/coach-session-start.py +536 -0
- package/hooks/coach-user-prompt.py +2288 -0
- package/install-launchd.sh +102 -0
- package/install.sh +597 -0
- package/launchd/com.local.claude-coach.plist.template +34 -0
- package/launchd/run-insights.sh +20 -0
- package/npm/coach-claw.js +259 -0
- package/package.json +52 -0
- package/requirements.txt +11 -0
- package/settings-snippet.json +31 -0
- package/skills/coach/SKILL.md +107 -0
- package/skills/coach-insights/SKILL.md +78 -0
- package/skills/config/SKILL.md +149 -0
|
@@ -0,0 +1,678 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Structural behavior analyzer — the deterministic cron path.
|
|
4
|
+
|
|
5
|
+
Reads redacted session transcripts and extracts DETERMINISTIC signals only
|
|
6
|
+
(tool-use counts, timing, presence/absence of planning artifacts). Never
|
|
7
|
+
emits content quotes — strictly aggregated counts.
|
|
8
|
+
|
|
9
|
+
As of v0.5.0 this is invoked **only by the daily deterministic
|
|
10
|
+
scheduled runner** (`run-insights.sh` → `insights.sh`). The on-demand
|
|
11
|
+
`/coach-insights` skill and the SessionStart-triggered weekly path
|
|
12
|
+
both delegate to `insights-llm.sh`, which invokes Claude Code's
|
|
13
|
+
built-in `/insights` for the side effect of refreshing
|
|
14
|
+
`facets/*.json` sidecars and then aggregates those structured
|
|
15
|
+
sidecars deterministically via `aggregate_facets.py` (no prose
|
|
16
|
+
translation — facets enum keys are stable kebab/snake-case slugs by
|
|
17
|
+
Anthropic's data contract). Two distinct paths with two distinct
|
|
18
|
+
IO contracts.
|
|
19
|
+
|
|
20
|
+
Input: space-separated transcript paths as argv
|
|
21
|
+
Output: JSON on stdout with {n_sessions, detections:[...], summary:{...}}
|
|
22
|
+
"""
|
|
23
|
+
from __future__ import annotations
|
|
24
|
+
|
|
25
|
+
import json
|
|
26
|
+
import re
|
|
27
|
+
import sys
|
|
28
|
+
from collections import Counter
|
|
29
|
+
from datetime import datetime
|
|
30
|
+
from pathlib import Path
|
|
31
|
+
|
|
32
|
+
from redact import redact
|
|
33
|
+
from scoring import matches_action
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _project_name_from_slug(slug: str) -> str:
|
|
37
|
+
"""Convert a Claude Code transcript dir name into the user-readable
|
|
38
|
+
project name. Claude Code encodes a session's cwd as a slug where
|
|
39
|
+
slashes become dashes, e.g. ``-Users-alice-Desktop-dev-widget``.
|
|
40
|
+
The conservative recovery is the last dash-segment.
|
|
41
|
+
|
|
42
|
+
Limitation: hyphenated original project names (e.g. ``acme-app``)
|
|
43
|
+
collapse to the last segment (``app``). The hook's tokenizer already
|
|
44
|
+
splits cwd anchors on dashes, so partial matches still fire (cwd
|
|
45
|
+
anchor ``{acme, app}`` ∩ skill.projects token set ``{app}`` is
|
|
46
|
+
non-empty). Users who want the full hyphenated form on a tagged
|
|
47
|
+
skill can declare it in SKILL.md frontmatter; explicit `projects:`
|
|
48
|
+
supersedes inference everywhere.
|
|
49
|
+
"""
|
|
50
|
+
if not slug:
|
|
51
|
+
return ""
|
|
52
|
+
return slug.rstrip("-").split("-")[-1].lower()
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def parse_ts(v):
|
|
56
|
+
if not v:
|
|
57
|
+
return None
|
|
58
|
+
try:
|
|
59
|
+
return datetime.fromisoformat(str(v).replace("Z", "+00:00"))
|
|
60
|
+
except Exception:
|
|
61
|
+
return None
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _iter_redacted_records(path: Path):
|
|
65
|
+
"""Yield JSON records after redacting each JSONL line before parsing."""
|
|
66
|
+
try:
|
|
67
|
+
fh = path.open(errors="replace")
|
|
68
|
+
except Exception:
|
|
69
|
+
return
|
|
70
|
+
with fh:
|
|
71
|
+
for line in fh:
|
|
72
|
+
if not line.strip():
|
|
73
|
+
continue
|
|
74
|
+
redacted = redact(line)
|
|
75
|
+
try:
|
|
76
|
+
yield json.loads(redacted)
|
|
77
|
+
except Exception:
|
|
78
|
+
continue
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def analyze_session(path: Path) -> dict | None:
|
|
82
|
+
try:
|
|
83
|
+
records = _iter_redacted_records(path)
|
|
84
|
+
except Exception:
|
|
85
|
+
return None
|
|
86
|
+
|
|
87
|
+
sig = {
|
|
88
|
+
"path": str(path),
|
|
89
|
+
"session_hash": path.stem[:8],
|
|
90
|
+
"project": path.parent.name,
|
|
91
|
+
"tool_counts": Counter(),
|
|
92
|
+
"user_turns": 0,
|
|
93
|
+
"assistant_turns": 0,
|
|
94
|
+
"first_ts": None,
|
|
95
|
+
"last_ts": None,
|
|
96
|
+
"first_user_ts": None,
|
|
97
|
+
"first_edit_ts": None,
|
|
98
|
+
"first_edit_idx": None,
|
|
99
|
+
"last_edit_idx": None,
|
|
100
|
+
"first_plan_ts": None,
|
|
101
|
+
"first_plan_idx": None,
|
|
102
|
+
"first_test_idx": None,
|
|
103
|
+
"last_test_idx": None,
|
|
104
|
+
"first_commit_idx": None,
|
|
105
|
+
"last_commit_idx": None,
|
|
106
|
+
"first_read_idx": None,
|
|
107
|
+
"first_search_idx": None,
|
|
108
|
+
"task_create_count": 0,
|
|
109
|
+
"exit_plan_count": 0,
|
|
110
|
+
"edit_count": 0,
|
|
111
|
+
"write_count": 0,
|
|
112
|
+
"bash_count": 0,
|
|
113
|
+
"read_count": 0,
|
|
114
|
+
"grep_count": 0,
|
|
115
|
+
"glob_count": 0,
|
|
116
|
+
"agent_count": 0,
|
|
117
|
+
"skill_count": 0,
|
|
118
|
+
"skills_invoked": Counter(),
|
|
119
|
+
"commit_count": 0,
|
|
120
|
+
"test_run_count": 0,
|
|
121
|
+
"has_any_test_run": False,
|
|
122
|
+
"has_any_commit": False,
|
|
123
|
+
"bash_rm_rf_count": 0,
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
event_idx = 0
|
|
127
|
+
for rec in records:
|
|
128
|
+
ts = parse_ts(rec.get("timestamp"))
|
|
129
|
+
if ts:
|
|
130
|
+
if not sig["first_ts"] or ts < sig["first_ts"]:
|
|
131
|
+
sig["first_ts"] = ts
|
|
132
|
+
if not sig["last_ts"] or ts > sig["last_ts"]:
|
|
133
|
+
sig["last_ts"] = ts
|
|
134
|
+
|
|
135
|
+
rec_type = rec.get("type")
|
|
136
|
+
msg = rec.get("message") or {}
|
|
137
|
+
role = msg.get("role")
|
|
138
|
+
|
|
139
|
+
if rec_type == "user" and role == "user":
|
|
140
|
+
sig["user_turns"] += 1
|
|
141
|
+
if ts and not sig["first_user_ts"]:
|
|
142
|
+
sig["first_user_ts"] = ts
|
|
143
|
+
|
|
144
|
+
if rec_type == "assistant" and role == "assistant":
|
|
145
|
+
sig["assistant_turns"] += 1
|
|
146
|
+
content = msg.get("content") or []
|
|
147
|
+
if not isinstance(content, list):
|
|
148
|
+
continue
|
|
149
|
+
for block in content:
|
|
150
|
+
if not isinstance(block, dict):
|
|
151
|
+
continue
|
|
152
|
+
if block.get("type") != "tool_use":
|
|
153
|
+
continue
|
|
154
|
+
idx = event_idx
|
|
155
|
+
event_idx += 1
|
|
156
|
+
name = block.get("name", "")
|
|
157
|
+
sig["tool_counts"][name] += 1
|
|
158
|
+
if name in ("Edit", "MultiEdit"):
|
|
159
|
+
sig["edit_count"] += 1
|
|
160
|
+
if ts and not sig["first_edit_ts"]:
|
|
161
|
+
sig["first_edit_ts"] = ts
|
|
162
|
+
if sig["first_edit_idx"] is None:
|
|
163
|
+
sig["first_edit_idx"] = idx
|
|
164
|
+
sig["last_edit_idx"] = idx
|
|
165
|
+
elif name == "Write":
|
|
166
|
+
sig["write_count"] += 1
|
|
167
|
+
if ts and not sig["first_edit_ts"]:
|
|
168
|
+
sig["first_edit_ts"] = ts
|
|
169
|
+
if sig["first_edit_idx"] is None:
|
|
170
|
+
sig["first_edit_idx"] = idx
|
|
171
|
+
sig["last_edit_idx"] = idx
|
|
172
|
+
elif name == "Plan":
|
|
173
|
+
if ts and not sig["first_plan_ts"]:
|
|
174
|
+
sig["first_plan_ts"] = ts
|
|
175
|
+
if sig["first_plan_idx"] is None:
|
|
176
|
+
sig["first_plan_idx"] = idx
|
|
177
|
+
elif name in ("TaskCreate", "TodoWrite"):
|
|
178
|
+
sig["task_create_count"] += 1
|
|
179
|
+
if ts and not sig["first_plan_ts"]:
|
|
180
|
+
sig["first_plan_ts"] = ts
|
|
181
|
+
if sig["first_plan_idx"] is None:
|
|
182
|
+
sig["first_plan_idx"] = idx
|
|
183
|
+
elif name == "ExitPlanMode":
|
|
184
|
+
sig["exit_plan_count"] += 1
|
|
185
|
+
if ts and not sig["first_plan_ts"]:
|
|
186
|
+
sig["first_plan_ts"] = ts
|
|
187
|
+
if sig["first_plan_idx"] is None:
|
|
188
|
+
sig["first_plan_idx"] = idx
|
|
189
|
+
elif name == "Bash":
|
|
190
|
+
sig["bash_count"] += 1
|
|
191
|
+
if matches_action(block, "commit"):
|
|
192
|
+
sig["commit_count"] += 1
|
|
193
|
+
sig["has_any_commit"] = True
|
|
194
|
+
if sig["first_commit_idx"] is None:
|
|
195
|
+
sig["first_commit_idx"] = idx
|
|
196
|
+
sig["last_commit_idx"] = idx
|
|
197
|
+
if matches_action(block, "test_run"):
|
|
198
|
+
sig["test_run_count"] += 1
|
|
199
|
+
sig["has_any_test_run"] = True
|
|
200
|
+
if sig["first_test_idx"] is None:
|
|
201
|
+
sig["first_test_idx"] = idx
|
|
202
|
+
sig["last_test_idx"] = idx
|
|
203
|
+
cmd = (block.get("input") or {}).get("command", "")
|
|
204
|
+
if re.search(r"\brm\s+-rf?\b", cmd):
|
|
205
|
+
sig["bash_rm_rf_count"] += 1
|
|
206
|
+
elif name == "Read":
|
|
207
|
+
sig["read_count"] += 1
|
|
208
|
+
if sig["first_read_idx"] is None:
|
|
209
|
+
sig["first_read_idx"] = idx
|
|
210
|
+
elif name == "Grep":
|
|
211
|
+
sig["grep_count"] += 1
|
|
212
|
+
if sig["first_search_idx"] is None:
|
|
213
|
+
sig["first_search_idx"] = idx
|
|
214
|
+
elif name == "Glob":
|
|
215
|
+
sig["glob_count"] += 1
|
|
216
|
+
if sig["first_search_idx"] is None:
|
|
217
|
+
sig["first_search_idx"] = idx
|
|
218
|
+
elif name == "Agent":
|
|
219
|
+
sig["agent_count"] += 1
|
|
220
|
+
elif name == "Skill":
|
|
221
|
+
sig["skill_count"] += 1
|
|
222
|
+
invoked = (block.get("input") or {}).get("skill")
|
|
223
|
+
if invoked:
|
|
224
|
+
sig["skills_invoked"][invoked] += 1
|
|
225
|
+
|
|
226
|
+
if sig["first_user_ts"] and sig["first_edit_ts"]:
|
|
227
|
+
sig["sec_first_user_to_first_edit"] = (
|
|
228
|
+
sig["first_edit_ts"] - sig["first_user_ts"]
|
|
229
|
+
).total_seconds()
|
|
230
|
+
else:
|
|
231
|
+
sig["sec_first_user_to_first_edit"] = None
|
|
232
|
+
|
|
233
|
+
sig["plan_before_edit"] = False
|
|
234
|
+
if sig["first_plan_idx"] is not None and sig["first_edit_idx"] is not None:
|
|
235
|
+
sig["plan_before_edit"] = sig["first_plan_idx"] <= sig["first_edit_idx"]
|
|
236
|
+
elif sig["first_plan_idx"] is not None and sig["first_edit_idx"] is None:
|
|
237
|
+
sig["plan_before_edit"] = True
|
|
238
|
+
elif sig["first_plan_ts"] and sig["first_edit_ts"]:
|
|
239
|
+
sig["plan_before_edit"] = sig["first_plan_ts"] <= sig["first_edit_ts"]
|
|
240
|
+
elif sig["first_plan_ts"] and not sig["first_edit_ts"]:
|
|
241
|
+
sig["plan_before_edit"] = True
|
|
242
|
+
|
|
243
|
+
sig["tool_counts"] = dict(sig["tool_counts"])
|
|
244
|
+
sig["skills_invoked"] = dict(sig["skills_invoked"])
|
|
245
|
+
return sig
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
def _flat_and_per_project_skill_counts(
|
|
249
|
+
sessions: list[dict],
|
|
250
|
+
) -> tuple[Counter, dict]:
|
|
251
|
+
"""Compute the flat ``skills_used`` Counter and the per-project
|
|
252
|
+
``skills_by_project`` breakdown from session signatures. Pulled out
|
|
253
|
+
of aggregate() so it can run unconditionally — small windows
|
|
254
|
+
(n < 3) skip pattern detection but still need to feed the rolling
|
|
255
|
+
invocation history that drives skill_inventory's scope inference."""
|
|
256
|
+
skills_used: Counter = Counter()
|
|
257
|
+
skills_by_project: dict[str, dict[str, int]] = {}
|
|
258
|
+
for s in sessions:
|
|
259
|
+
proj = _project_name_from_slug(s.get("project") or "")
|
|
260
|
+
invoked = s.get("skills_invoked") or {}
|
|
261
|
+
for name, count in invoked.items():
|
|
262
|
+
skills_used[name] += count
|
|
263
|
+
if proj:
|
|
264
|
+
bucket = skills_by_project.setdefault(proj, {})
|
|
265
|
+
bucket[name] = bucket.get(name, 0) + count
|
|
266
|
+
return skills_used, skills_by_project
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
def aggregate(sessions: list[dict]) -> tuple[list[dict], dict]:
|
|
270
|
+
detections: list[dict] = []
|
|
271
|
+
n = len(sessions)
|
|
272
|
+
if n < 3:
|
|
273
|
+
# Skip pattern detection (sample too small to be confident),
|
|
274
|
+
# but still emit the per-project skill counts — the rolling
|
|
275
|
+
# invocation history accumulates from windows of any size.
|
|
276
|
+
skills_used, skills_by_project = _flat_and_per_project_skill_counts(sessions)
|
|
277
|
+
return detections, {
|
|
278
|
+
"n_sessions": n,
|
|
279
|
+
"note": "too few sessions for detection",
|
|
280
|
+
"skills_used": dict(skills_used),
|
|
281
|
+
"skills_by_project": skills_by_project,
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
# 1. under-planning
|
|
285
|
+
under_plan = [
|
|
286
|
+
s for s in sessions
|
|
287
|
+
if (s["edit_count"] + s["write_count"]) >= 5
|
|
288
|
+
and s.get("sec_first_user_to_first_edit") is not None
|
|
289
|
+
and s["sec_first_user_to_first_edit"] <= 120
|
|
290
|
+
and not s["plan_before_edit"]
|
|
291
|
+
]
|
|
292
|
+
if len(under_plan) >= 3:
|
|
293
|
+
detections.append({
|
|
294
|
+
"id": "under-planning",
|
|
295
|
+
"name": "under-planning",
|
|
296
|
+
"nudge": (
|
|
297
|
+
f"Across {len(under_plan)} of {n} recent sessions, editing "
|
|
298
|
+
"started within 2 minutes of the first user turn with no "
|
|
299
|
+
"TaskCreate/TodoWrite/ExitPlanMode call preceding the first "
|
|
300
|
+
"Edit or Write."
|
|
301
|
+
),
|
|
302
|
+
"examples": [
|
|
303
|
+
f"session {s['session_hash']} in {s['project']}: "
|
|
304
|
+
f"{round(s['sec_first_user_to_first_edit'])}s to first edit, "
|
|
305
|
+
f"{s['edit_count']+s['write_count']} total edits, no plan artifact"
|
|
306
|
+
for s in under_plan[:3]
|
|
307
|
+
],
|
|
308
|
+
"priority": 4,
|
|
309
|
+
"source_session_ids": [s["session_hash"] for s in under_plan[:5]],
|
|
310
|
+
})
|
|
311
|
+
|
|
312
|
+
# 2. edits-without-testing
|
|
313
|
+
no_tests = [s for s in sessions
|
|
314
|
+
if (s["edit_count"] + s["write_count"]) >= 10
|
|
315
|
+
and not s["has_any_test_run"]]
|
|
316
|
+
if len(no_tests) >= 3:
|
|
317
|
+
detections.append({
|
|
318
|
+
"id": "edits-without-testing",
|
|
319
|
+
"name": "edits without testing",
|
|
320
|
+
"nudge": (
|
|
321
|
+
f"Across {len(no_tests)} of {n} recent sessions with 10+ "
|
|
322
|
+
"file mutations, zero pytest/jest/cargo/npm-test invocations "
|
|
323
|
+
"were observed before session end."
|
|
324
|
+
),
|
|
325
|
+
"examples": [
|
|
326
|
+
f"session {s['session_hash']} in {s['project']}: "
|
|
327
|
+
f"{s['edit_count']+s['write_count']} edits, 0 test runs"
|
|
328
|
+
for s in no_tests[:3]
|
|
329
|
+
],
|
|
330
|
+
"priority": 4,
|
|
331
|
+
"source_session_ids": [s["session_hash"] for s in no_tests[:5]],
|
|
332
|
+
# Explicit reward — keyword inference also produces this, but being
|
|
333
|
+
# explicit makes the mapping robust if the nudge text is reworded.
|
|
334
|
+
"reward_hint": {
|
|
335
|
+
"action": "test_run",
|
|
336
|
+
"xp": 2,
|
|
337
|
+
"description": "test run (pytest / jest / cargo test / …)",
|
|
338
|
+
},
|
|
339
|
+
})
|
|
340
|
+
|
|
341
|
+
# 3. commit-without-testing
|
|
342
|
+
commit_no_test = [
|
|
343
|
+
s for s in sessions
|
|
344
|
+
if s["commit_count"] >= 1
|
|
345
|
+
and not s["has_any_test_run"]
|
|
346
|
+
and (s["edit_count"] + s["write_count"]) >= 5
|
|
347
|
+
]
|
|
348
|
+
if len(commit_no_test) >= 3:
|
|
349
|
+
detections.append({
|
|
350
|
+
"id": "commit-without-testing",
|
|
351
|
+
"name": "commit without testing",
|
|
352
|
+
"nudge": (
|
|
353
|
+
f"In {len(commit_no_test)} of {n} recent sessions, "
|
|
354
|
+
"`git commit` ran after 5+ edits without any test command "
|
|
355
|
+
"executing in the same session."
|
|
356
|
+
),
|
|
357
|
+
"examples": [
|
|
358
|
+
f"session {s['session_hash']} in {s['project']}: "
|
|
359
|
+
f"{s['commit_count']} commit(s), {s['edit_count']+s['write_count']} edits, 0 tests"
|
|
360
|
+
for s in commit_no_test[:3]
|
|
361
|
+
],
|
|
362
|
+
"priority": 3,
|
|
363
|
+
"source_session_ids": [s["session_hash"] for s in commit_no_test[:5]],
|
|
364
|
+
"reward_hint": {
|
|
365
|
+
"action": "test_run",
|
|
366
|
+
"xp": 2,
|
|
367
|
+
"description": "test run (pytest / jest / cargo test / …)",
|
|
368
|
+
},
|
|
369
|
+
})
|
|
370
|
+
|
|
371
|
+
# 4. heavy-agent-delegation
|
|
372
|
+
heavy_agent = [s for s in sessions if s["agent_count"] >= 8]
|
|
373
|
+
if len(heavy_agent) >= 3:
|
|
374
|
+
avg = sum(s["agent_count"] for s in heavy_agent) / len(heavy_agent)
|
|
375
|
+
detections.append({
|
|
376
|
+
"id": "heavy-agent-delegation",
|
|
377
|
+
"name": "heavy subagent delegation",
|
|
378
|
+
"nudge": (
|
|
379
|
+
f"Across {len(heavy_agent)} of {n} recent sessions, "
|
|
380
|
+
f"8+ Agent spawns were observed (avg {avg:.0f}/session)."
|
|
381
|
+
),
|
|
382
|
+
"examples": [
|
|
383
|
+
f"session {s['session_hash']} in {s['project']}: "
|
|
384
|
+
f"{s['agent_count']} Agent spawns, "
|
|
385
|
+
f"{s['edit_count']+s['write_count']} edits"
|
|
386
|
+
for s in sorted(heavy_agent, key=lambda x: -x["agent_count"])[:3]
|
|
387
|
+
],
|
|
388
|
+
"priority": 2,
|
|
389
|
+
"source_session_ids": [s["session_hash"] for s in heavy_agent[:5]],
|
|
390
|
+
})
|
|
391
|
+
|
|
392
|
+
# 5. exploration-without-landing
|
|
393
|
+
read_no_edit = [s for s in sessions
|
|
394
|
+
if s["read_count"] >= 15
|
|
395
|
+
and (s["edit_count"] + s["write_count"]) == 0
|
|
396
|
+
and s["assistant_turns"] >= 10]
|
|
397
|
+
if len(read_no_edit) >= 3:
|
|
398
|
+
detections.append({
|
|
399
|
+
"id": "exploration-without-landing",
|
|
400
|
+
"name": "exploration without landing",
|
|
401
|
+
"nudge": (
|
|
402
|
+
f"In {len(read_no_edit)} of {n} recent sessions, "
|
|
403
|
+
"15+ Read tool calls occurred with zero Edit/Write calls, "
|
|
404
|
+
"suggesting exploration that did not conclude with a change."
|
|
405
|
+
),
|
|
406
|
+
"examples": [
|
|
407
|
+
f"session {s['session_hash']} in {s['project']}: "
|
|
408
|
+
f"{s['read_count']} reads, 0 edits, {s['assistant_turns']} assistant turns"
|
|
409
|
+
for s in read_no_edit[:3]
|
|
410
|
+
],
|
|
411
|
+
"priority": 2,
|
|
412
|
+
"source_session_ids": [s["session_hash"] for s in read_no_edit[:5]],
|
|
413
|
+
# Reward lands the exploration — landing a commit completes the tip.
|
|
414
|
+
# Inference returns None for this pattern (no "test" keyword), so this
|
|
415
|
+
# is a genuine addition, not just making the default explicit.
|
|
416
|
+
"reward_hint": {
|
|
417
|
+
"action": "commit",
|
|
418
|
+
"xp": 1,
|
|
419
|
+
"description": "git commit (land the change)",
|
|
420
|
+
},
|
|
421
|
+
})
|
|
422
|
+
|
|
423
|
+
# 6. skipped-search-tools
|
|
424
|
+
skipped_search = [s for s in sessions
|
|
425
|
+
if s["read_count"] >= 20
|
|
426
|
+
and (s["grep_count"] + s["glob_count"]) <= 2]
|
|
427
|
+
if len(skipped_search) >= 3:
|
|
428
|
+
detections.append({
|
|
429
|
+
"id": "skipped-search-tools",
|
|
430
|
+
"name": "skipped search tools",
|
|
431
|
+
"nudge": (
|
|
432
|
+
f"In {len(skipped_search)} of {n} recent sessions, "
|
|
433
|
+
"20+ Read calls were made with ≤2 Grep/Glob calls — "
|
|
434
|
+
"reading files without first narrowing by search."
|
|
435
|
+
),
|
|
436
|
+
"examples": [
|
|
437
|
+
f"session {s['session_hash']} in {s['project']}: "
|
|
438
|
+
f"{s['read_count']} reads, {s['grep_count']}g+{s['glob_count']}gl search"
|
|
439
|
+
for s in skipped_search[:3]
|
|
440
|
+
],
|
|
441
|
+
"priority": 2,
|
|
442
|
+
"source_session_ids": [s["session_hash"] for s in skipped_search[:5]],
|
|
443
|
+
})
|
|
444
|
+
|
|
445
|
+
def _edits(s: dict) -> int:
|
|
446
|
+
return int(s.get("edit_count", 0) or 0) + int(s.get("write_count", 0) or 0)
|
|
447
|
+
|
|
448
|
+
def _idx_before(a, b) -> bool:
|
|
449
|
+
return a is not None and b is not None and a <= b
|
|
450
|
+
|
|
451
|
+
def _idx_after(a, b) -> bool:
|
|
452
|
+
return a is not None and b is not None and a > b
|
|
453
|
+
|
|
454
|
+
def _positive_strength(
|
|
455
|
+
*,
|
|
456
|
+
relevant: list[dict],
|
|
457
|
+
good: list[dict],
|
|
458
|
+
id: str,
|
|
459
|
+
name: str,
|
|
460
|
+
nudge: str,
|
|
461
|
+
example_fn,
|
|
462
|
+
reward_hint: dict | None = None,
|
|
463
|
+
priority: int = 2,
|
|
464
|
+
) -> None:
|
|
465
|
+
# Strengths are intentionally stricter than weakness detections:
|
|
466
|
+
# they need repeated evidence and a majority of the relevant window.
|
|
467
|
+
if len(good) < 3 or not relevant:
|
|
468
|
+
return
|
|
469
|
+
if (len(good) / len(relevant)) < 0.60:
|
|
470
|
+
return
|
|
471
|
+
det = {
|
|
472
|
+
"id": id,
|
|
473
|
+
"name": name,
|
|
474
|
+
"direction": "positive",
|
|
475
|
+
"nudge": nudge,
|
|
476
|
+
"examples": [example_fn(s) for s in good[:3]],
|
|
477
|
+
"priority": priority,
|
|
478
|
+
"source_session_ids": [s["session_hash"] for s in good[:5]],
|
|
479
|
+
}
|
|
480
|
+
if reward_hint:
|
|
481
|
+
det["reward_hint"] = reward_hint
|
|
482
|
+
detections.append(det)
|
|
483
|
+
|
|
484
|
+
edit_sessions = [s for s in sessions if _edits(s) > 0]
|
|
485
|
+
tests_after_edits = [
|
|
486
|
+
s for s in edit_sessions
|
|
487
|
+
if s.get("test_run_count", 0) >= 1
|
|
488
|
+
and _idx_after(s.get("last_test_idx"), s.get("last_edit_idx"))
|
|
489
|
+
]
|
|
490
|
+
_positive_strength(
|
|
491
|
+
relevant=edit_sessions,
|
|
492
|
+
good=tests_after_edits,
|
|
493
|
+
id="tests-after-edits",
|
|
494
|
+
name="tests after edits",
|
|
495
|
+
nudge=(
|
|
496
|
+
f"In {len(tests_after_edits)} of {len(edit_sessions)} edit sessions, "
|
|
497
|
+
"a test command ran after the final file mutation."
|
|
498
|
+
),
|
|
499
|
+
example_fn=lambda s: (
|
|
500
|
+
f"session {s['session_hash']} in {s['project']}: "
|
|
501
|
+
f"{_edits(s)} edits, {s['test_run_count']} test run(s) after edits"
|
|
502
|
+
),
|
|
503
|
+
reward_hint={
|
|
504
|
+
"action": "test_run",
|
|
505
|
+
"xp": 2,
|
|
506
|
+
"description": "test run (pytest / jest / cargo test / …)",
|
|
507
|
+
},
|
|
508
|
+
)
|
|
509
|
+
|
|
510
|
+
plans_before_edits = [
|
|
511
|
+
s for s in edit_sessions
|
|
512
|
+
if s.get("plan_before_edit")
|
|
513
|
+
or _idx_before(s.get("first_plan_idx"), s.get("first_edit_idx"))
|
|
514
|
+
]
|
|
515
|
+
_positive_strength(
|
|
516
|
+
relevant=edit_sessions,
|
|
517
|
+
good=plans_before_edits,
|
|
518
|
+
id="plans-before-edits",
|
|
519
|
+
name="plans before edits",
|
|
520
|
+
nudge=(
|
|
521
|
+
f"In {len(plans_before_edits)} of {len(edit_sessions)} edit sessions, "
|
|
522
|
+
"a TaskCreate/TodoWrite/ExitPlanMode/Plan artifact appeared before editing."
|
|
523
|
+
),
|
|
524
|
+
example_fn=lambda s: (
|
|
525
|
+
f"session {s['session_hash']} in {s['project']}: "
|
|
526
|
+
f"planning preceded {_edits(s)} edit(s)"
|
|
527
|
+
),
|
|
528
|
+
)
|
|
529
|
+
|
|
530
|
+
commit_sessions = [s for s in sessions if s.get("commit_count", 0) >= 1]
|
|
531
|
+
commits_gated_by_tests = [
|
|
532
|
+
s for s in commit_sessions
|
|
533
|
+
if s.get("test_run_count", 0) >= 1
|
|
534
|
+
and _idx_before(s.get("last_test_idx"), s.get("last_commit_idx"))
|
|
535
|
+
]
|
|
536
|
+
_positive_strength(
|
|
537
|
+
relevant=commit_sessions,
|
|
538
|
+
good=commits_gated_by_tests,
|
|
539
|
+
id="commits-gated-by-tests",
|
|
540
|
+
name="commits gated by tests",
|
|
541
|
+
nudge=(
|
|
542
|
+
f"In {len(commits_gated_by_tests)} of {len(commit_sessions)} commit sessions, "
|
|
543
|
+
"a test command ran before the final git commit."
|
|
544
|
+
),
|
|
545
|
+
example_fn=lambda s: (
|
|
546
|
+
f"session {s['session_hash']} in {s['project']}: "
|
|
547
|
+
f"{s['test_run_count']} test run(s) before {s['commit_count']} commit(s)"
|
|
548
|
+
),
|
|
549
|
+
reward_hint={
|
|
550
|
+
"action": "commit",
|
|
551
|
+
"xp": 1,
|
|
552
|
+
"description": "git commit after verification",
|
|
553
|
+
},
|
|
554
|
+
)
|
|
555
|
+
|
|
556
|
+
read_sessions = [s for s in sessions if s.get("read_count", 0) >= 1]
|
|
557
|
+
search_before_read = [
|
|
558
|
+
s for s in read_sessions
|
|
559
|
+
if (s.get("grep_count", 0) + s.get("glob_count", 0)) >= 1
|
|
560
|
+
and _idx_before(s.get("first_search_idx"), s.get("first_read_idx"))
|
|
561
|
+
]
|
|
562
|
+
_positive_strength(
|
|
563
|
+
relevant=read_sessions,
|
|
564
|
+
good=search_before_read,
|
|
565
|
+
id="search-before-reading",
|
|
566
|
+
name="search before reading",
|
|
567
|
+
nudge=(
|
|
568
|
+
f"In {len(search_before_read)} of {len(read_sessions)} read sessions, "
|
|
569
|
+
"Grep/Glob narrowed the search before the first Read call."
|
|
570
|
+
),
|
|
571
|
+
example_fn=lambda s: (
|
|
572
|
+
f"session {s['session_hash']} in {s['project']}: "
|
|
573
|
+
f"{s['grep_count']} Grep + {s['glob_count']} Glob before reading"
|
|
574
|
+
),
|
|
575
|
+
)
|
|
576
|
+
|
|
577
|
+
small_batch_verify = [
|
|
578
|
+
s for s in edit_sessions
|
|
579
|
+
if 1 <= _edits(s) <= 6
|
|
580
|
+
and s.get("test_run_count", 0) >= 1
|
|
581
|
+
and _idx_after(s.get("last_test_idx"), s.get("last_edit_idx"))
|
|
582
|
+
]
|
|
583
|
+
_positive_strength(
|
|
584
|
+
relevant=edit_sessions,
|
|
585
|
+
good=small_batch_verify,
|
|
586
|
+
id="small-batch-verify",
|
|
587
|
+
name="small batch verify",
|
|
588
|
+
nudge=(
|
|
589
|
+
f"In {len(small_batch_verify)} of {len(edit_sessions)} edit sessions, "
|
|
590
|
+
"changes stayed to six or fewer file mutations and ended with a test run."
|
|
591
|
+
),
|
|
592
|
+
example_fn=lambda s: (
|
|
593
|
+
f"session {s['session_hash']} in {s['project']}: "
|
|
594
|
+
f"{_edits(s)} edits, then {s['test_run_count']} test run(s)"
|
|
595
|
+
),
|
|
596
|
+
reward_hint={
|
|
597
|
+
"action": "test_run",
|
|
598
|
+
"xp": 2,
|
|
599
|
+
"description": "test run after a small edit batch",
|
|
600
|
+
},
|
|
601
|
+
)
|
|
602
|
+
|
|
603
|
+
safe_git = [
|
|
604
|
+
s for s in commit_sessions
|
|
605
|
+
if int(s.get("bash_rm_rf_count", 0) or 0) == 0
|
|
606
|
+
]
|
|
607
|
+
_positive_strength(
|
|
608
|
+
relevant=commit_sessions,
|
|
609
|
+
good=safe_git,
|
|
610
|
+
id="safe-git-hygiene",
|
|
611
|
+
name="safe git hygiene",
|
|
612
|
+
nudge=(
|
|
613
|
+
f"In {len(safe_git)} of {len(commit_sessions)} commit sessions, "
|
|
614
|
+
"git commit was observed without any rm -rf command in the same session."
|
|
615
|
+
),
|
|
616
|
+
example_fn=lambda s: (
|
|
617
|
+
f"session {s['session_hash']} in {s['project']}: "
|
|
618
|
+
f"{s['commit_count']} commit(s), 0 rm -rf commands"
|
|
619
|
+
),
|
|
620
|
+
reward_hint={
|
|
621
|
+
"action": "commit",
|
|
622
|
+
"xp": 1,
|
|
623
|
+
"description": "git commit with safe shell hygiene",
|
|
624
|
+
},
|
|
625
|
+
)
|
|
626
|
+
|
|
627
|
+
skill_sessions = [s for s in sessions if s.get("skill_count", 0) >= 1]
|
|
628
|
+
_positive_strength(
|
|
629
|
+
relevant=sessions,
|
|
630
|
+
good=skill_sessions,
|
|
631
|
+
id="effective-skill-use",
|
|
632
|
+
name="effective skill use",
|
|
633
|
+
nudge=(
|
|
634
|
+
f"In {len(skill_sessions)} of {n} recent sessions, "
|
|
635
|
+
"a slash-command skill or Skill tool was invoked during the work."
|
|
636
|
+
),
|
|
637
|
+
example_fn=lambda s: (
|
|
638
|
+
f"session {s['session_hash']} in {s['project']}: "
|
|
639
|
+
f"{s['skill_count']} skill invocation(s)"
|
|
640
|
+
),
|
|
641
|
+
reward_hint={
|
|
642
|
+
"action": "skill_invoke",
|
|
643
|
+
"xp": 1,
|
|
644
|
+
"description": "skill invocation",
|
|
645
|
+
},
|
|
646
|
+
)
|
|
647
|
+
|
|
648
|
+
skills_used, skills_by_project = _flat_and_per_project_skill_counts(sessions)
|
|
649
|
+
|
|
650
|
+
summary = {
|
|
651
|
+
"n_sessions": n,
|
|
652
|
+
"total_edits": sum(s["edit_count"] + s["write_count"] for s in sessions),
|
|
653
|
+
"total_bash": sum(s["bash_count"] for s in sessions),
|
|
654
|
+
"total_agents": sum(s["agent_count"] for s in sessions),
|
|
655
|
+
"sessions_with_tests": sum(1 for s in sessions if s["has_any_test_run"]),
|
|
656
|
+
"sessions_with_plans": sum(1 for s in sessions if s["first_plan_ts"]),
|
|
657
|
+
"skills_used": dict(skills_used),
|
|
658
|
+
# Per-project breakdown for the skill_hints inference path.
|
|
659
|
+
# Keys are user-readable project names; the rolling accumulator
|
|
660
|
+
# lives in profile.yaml and is updated by merge.py each run.
|
|
661
|
+
"skills_by_project": skills_by_project,
|
|
662
|
+
}
|
|
663
|
+
return detections, summary
|
|
664
|
+
|
|
665
|
+
|
|
666
|
+
def main():
|
|
667
|
+
paths = [Path(p) for p in sys.argv[1:] if p.strip()]
|
|
668
|
+
sessions = []
|
|
669
|
+
for p in paths:
|
|
670
|
+
sig = analyze_session(p)
|
|
671
|
+
if sig and sig.get("assistant_turns", 0) > 0:
|
|
672
|
+
sessions.append(sig)
|
|
673
|
+
detections, summary = aggregate(sessions)
|
|
674
|
+
print(json.dumps({"detections": detections, "summary": summary}, indent=2))
|
|
675
|
+
|
|
676
|
+
|
|
677
|
+
if __name__ == "__main__":
|
|
678
|
+
main()
|