@rm0nroe/coach-claw 1.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +311 -0
- package/coach/README.md +99 -0
- package/coach/bin/aggregate_facets.py +274 -0
- package/coach/bin/analyze.py +678 -0
- package/coach/bin/bank.py +247 -0
- package/coach/bin/banner_themes.py +645 -0
- package/coach/bin/coach_paths.py +33 -0
- package/coach/bin/coexistence_check.py +129 -0
- package/coach/bin/configure.py +245 -0
- package/coach/bin/cron_check.py +81 -0
- package/coach/bin/default_statusline.py +135 -0
- package/coach/bin/doctor.py +663 -0
- package/coach/bin/insights-llm.sh +264 -0
- package/coach/bin/insights.sh +163 -0
- package/coach/bin/insights_window.py +111 -0
- package/coach/bin/marker_io.py +154 -0
- package/coach/bin/merge.py +671 -0
- package/coach/bin/redact.py +86 -0
- package/coach/bin/render_env.py +148 -0
- package/coach/bin/reward_hints.py +87 -0
- package/coach/bin/run-insights.sh +20 -0
- package/coach/bin/run_with_lock.py +85 -0
- package/coach/bin/scoring.py +260 -0
- package/coach/bin/skill_inventory.py +215 -0
- package/coach/bin/stats.py +459 -0
- package/coach/bin/status.py +293 -0
- package/coach/bin/statusline_self_patch.py +205 -0
- package/coach/bin/statusline_variants.py +146 -0
- package/coach/bin/statusline_wrap.py +244 -0
- package/coach/bin/statusline_wrap_action.py +460 -0
- package/coach/bin/switch_to_plugin.py +256 -0
- package/coach/bin/themes.py +256 -0
- package/coach/bin/user_config.py +176 -0
- package/coach/bin/xp_accounting.py +98 -0
- package/coach/changelog.md +4 -0
- package/coach/default-statusline-command.sh +19 -0
- package/coach/default-statusline-wrap-command.sh +15 -0
- package/coach/profile.yaml +37 -0
- package/coach/tests/conftest.py +13 -0
- package/coach/tests/test_aggregate_facets.py +379 -0
- package/coach/tests/test_analyze_aggregate.py +153 -0
- package/coach/tests/test_analyze_redaction.py +105 -0
- package/coach/tests/test_analyze_strengths.py +165 -0
- package/coach/tests/test_bank_atomic_write.py +61 -0
- package/coach/tests/test_bank_concurrency.py +126 -0
- package/coach/tests/test_banner_themes.py +981 -0
- package/coach/tests/test_celebrate_dedup.py +409 -0
- package/coach/tests/test_coach_paths.py +50 -0
- package/coach/tests/test_coexistence_check.py +128 -0
- package/coach/tests/test_configure.py +258 -0
- package/coach/tests/test_cron_check.py +118 -0
- package/coach/tests/test_cron_nudge_hook.py +134 -0
- package/coach/tests/test_detection_parity.py +105 -0
- package/coach/tests/test_doctor.py +595 -0
- package/coach/tests/test_hook_bespoke_dispatch.py +288 -0
- package/coach/tests/test_hook_module_resolution.py +116 -0
- package/coach/tests/test_hook_relevance.py +996 -0
- package/coach/tests/test_hook_render_env.py +364 -0
- package/coach/tests/test_hook_session_id_guard.py +160 -0
- package/coach/tests/test_insights_llm.py +759 -0
- package/coach/tests/test_insights_llm_venv_path.py +109 -0
- package/coach/tests/test_insights_window.py +237 -0
- package/coach/tests/test_install.py +1150 -0
- package/coach/tests/test_install_pyyaml_fallback.py +142 -0
- package/coach/tests/test_marker_consumption.py +167 -0
- package/coach/tests/test_marker_writer_locking.py +305 -0
- package/coach/tests/test_merge.py +413 -0
- package/coach/tests/test_no_broken_mktemp.py +90 -0
- package/coach/tests/test_render_env.py +137 -0
- package/coach/tests/test_render_env_glyphs.py +119 -0
- package/coach/tests/test_reward_hints.py +59 -0
- package/coach/tests/test_scoring.py +147 -0
- package/coach/tests/test_session_start_weekly_trigger.py +92 -0
- package/coach/tests/test_skill_inventory.py +368 -0
- package/coach/tests/test_stats_hybrid.py +142 -0
- package/coach/tests/test_status_accounting.py +41 -0
- package/coach/tests/test_statusline_failsafe.py +70 -0
- package/coach/tests/test_statusline_self_patch.py +261 -0
- package/coach/tests/test_statusline_variants.py +110 -0
- package/coach/tests/test_statusline_wrap.py +196 -0
- package/coach/tests/test_statusline_wrap_action.py +408 -0
- package/coach/tests/test_switch_to_plugin.py +360 -0
- package/coach/tests/test_themes.py +104 -0
- package/coach/tests/test_user_config.py +160 -0
- package/coach/tests/test_wrap_announce_hook.py +130 -0
- package/coach/tests/test_xp_accounting.py +55 -0
- package/hooks/coach-session-start.py +536 -0
- package/hooks/coach-user-prompt.py +2288 -0
- package/install-launchd.sh +102 -0
- package/install.sh +597 -0
- package/launchd/com.local.claude-coach.plist.template +34 -0
- package/launchd/run-insights.sh +20 -0
- package/npm/coach-claw.js +259 -0
- package/package.json +52 -0
- package/requirements.txt +11 -0
- package/settings-snippet.json +31 -0
- package/skills/coach/SKILL.md +107 -0
- package/skills/coach-insights/SKILL.md +78 -0
- package/skills/config/SKILL.md +149 -0
|
@@ -0,0 +1,996 @@
|
|
|
1
|
+
"""coach-user-prompt.py: session-relevance filtering for skill hints.
|
|
2
|
+
|
|
3
|
+
Regression guard for the bug where an off-topic skill (a frontend-animation
|
|
4
|
+
skill during backend debugging, or similar mismatch) was proposed for
|
|
5
|
+
sessions that had nothing to do with it, producing coach-tip reward lines
|
|
6
|
+
disconnected from the work being done.
|
|
7
|
+
"""
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import importlib.util
|
|
11
|
+
import json
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
|
|
14
|
+
import pytest
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@pytest.fixture(scope="module")
|
|
18
|
+
def cup():
|
|
19
|
+
"""Load coach-user-prompt.py as a module. It's not a package, so we go
|
|
20
|
+
via importlib rather than a direct import statement."""
|
|
21
|
+
repo_path = Path(__file__).resolve().parents[2] / "hooks" / "coach-user-prompt.py"
|
|
22
|
+
path = repo_path if repo_path.exists() else Path.home() / ".claude" / "hooks" / "coach-user-prompt.py"
|
|
23
|
+
if not path.exists():
|
|
24
|
+
pytest.skip(f"hook not installed at {path}")
|
|
25
|
+
spec = importlib.util.spec_from_file_location("cup_under_test", str(path))
|
|
26
|
+
mod = importlib.util.module_from_spec(spec)
|
|
27
|
+
spec.loader.exec_module(mod)
|
|
28
|
+
return mod
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _write_transcript(path: Path, tool_uses: list[dict]) -> None:
|
|
32
|
+
lines = [
|
|
33
|
+
json.dumps({"message": {"content": [dict(tu, type="tool_use")]}})
|
|
34
|
+
for tu in tool_uses
|
|
35
|
+
]
|
|
36
|
+
path.write_text("\n".join(lines) + "\n")
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _write_timed_transcript(path: Path, tool_uses: list[dict]) -> None:
|
|
40
|
+
lines = []
|
|
41
|
+
for i, tu in enumerate(tool_uses):
|
|
42
|
+
lines.append(json.dumps({
|
|
43
|
+
"timestamp": f"2026-01-01T00:00:{i:02d}+00:00",
|
|
44
|
+
"message": {"content": [dict(tu, type="tool_use")]},
|
|
45
|
+
}))
|
|
46
|
+
path.write_text("\n".join(lines) + "\n")
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def test_find_transcript_accepts_project_transcript(cup, tmp_path, monkeypatch):
|
|
50
|
+
home = tmp_path / "home"
|
|
51
|
+
transcript = home / ".claude/projects/acme/session.jsonl"
|
|
52
|
+
transcript.parent.mkdir(parents=True)
|
|
53
|
+
transcript.write_text("{}\n")
|
|
54
|
+
monkeypatch.setenv("HOME", str(home))
|
|
55
|
+
|
|
56
|
+
assert cup._find_transcript({"transcript_path": str(transcript)}) == transcript.resolve()
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def test_find_transcript_accepts_camelcase_payload_key(cup, tmp_path, monkeypatch):
|
|
60
|
+
home = tmp_path / "home"
|
|
61
|
+
transcript = home / ".claude/projects/acme/session.jsonl"
|
|
62
|
+
transcript.parent.mkdir(parents=True)
|
|
63
|
+
transcript.write_text("{}\n")
|
|
64
|
+
monkeypatch.setenv("HOME", str(home))
|
|
65
|
+
|
|
66
|
+
assert cup._find_transcript({"transcriptPath": str(transcript)}) == transcript.resolve()
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def test_find_transcript_rejects_outside_projects(cup, tmp_path, monkeypatch):
|
|
70
|
+
home = tmp_path / "home"
|
|
71
|
+
outside = home / ".ssh/config"
|
|
72
|
+
outside.parent.mkdir(parents=True)
|
|
73
|
+
outside.write_text("Host example\n")
|
|
74
|
+
monkeypatch.setenv("HOME", str(home))
|
|
75
|
+
|
|
76
|
+
assert cup._find_transcript({"transcript_path": str(outside)}) is None
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def test_find_transcript_rejects_missing_path(cup, tmp_path, monkeypatch):
|
|
80
|
+
home = tmp_path / "home"
|
|
81
|
+
missing = home / ".claude/projects/acme/missing.jsonl"
|
|
82
|
+
monkeypatch.setenv("HOME", str(home))
|
|
83
|
+
|
|
84
|
+
assert cup._find_transcript({"transcript_path": str(missing)}) is None
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
# --- tokenizer unit ---------------------------------------------------------
|
|
88
|
+
|
|
89
|
+
def test_tokenize_splits_file_paths(cup):
|
|
90
|
+
toks = cup._tokenize("animation.ts")
|
|
91
|
+
assert "animation" in toks
|
|
92
|
+
assert "ts" in toks
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def test_tokenize_strips_noise(cup):
|
|
96
|
+
toks = cup._tokenize("the user uses a tool for the task")
|
|
97
|
+
assert "task" not in toks # in noise list
|
|
98
|
+
assert "tool" not in toks
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def test_tokenize_applies_min_length(cup):
|
|
102
|
+
# bare 2-char words are dropped unless in the preserved-shorts list
|
|
103
|
+
assert "is" not in cup._tokenize("is a bigword")
|
|
104
|
+
# common code/ext tokens are kept even at 2 chars
|
|
105
|
+
assert "py" in cup._tokenize("edit a py file")
|
|
106
|
+
assert "ui" in cup._tokenize("a ui baseline")
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
# --- signal + fit integration ----------------------------------------------
|
|
110
|
+
|
|
111
|
+
def test_python_work_filters_off_topic_frontend_skill(cup, tmp_path):
|
|
112
|
+
t = tmp_path / "sess.jsonl"
|
|
113
|
+
_write_transcript(t, [
|
|
114
|
+
{"name": "Bash", "input": {"command": "pytest tests/"}},
|
|
115
|
+
{"name": "Edit", "input": {"file_path": "/p/.claude/coach/bin/merge.py"}},
|
|
116
|
+
{"name": "Edit", "input": {"file_path": "/p/.claude/coach/bin/scoring.py"}},
|
|
117
|
+
{"name": "Edit", "input": {"file_path": "/p/.claude/coach/tests/test_merge.py"}},
|
|
118
|
+
])
|
|
119
|
+
signal, anchors = cup._session_signal(t, "/p/.claude/coach")
|
|
120
|
+
|
|
121
|
+
frontend_anim = {"id": "frontend-anim",
|
|
122
|
+
"short_tip": "Scroll-linked animations, pinning, scrub."}
|
|
123
|
+
# Exactly the bug this fix targets: frontend skill must NOT fit a Python session.
|
|
124
|
+
assert cup._skill_fits_session(frontend_anim, signal, anchors) is False
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def test_frontend_work_keeps_matching_skill(cup, tmp_path):
|
|
128
|
+
t = tmp_path / "sess.jsonl"
|
|
129
|
+
_write_transcript(t, [
|
|
130
|
+
{"name": "Edit", "input": {"file_path": "/a/components/Hero.tsx"}},
|
|
131
|
+
{"name": "Bash", "input": {"command": "pnpm dev"}},
|
|
132
|
+
{"name": "Edit", "input": {"file_path": "/a/lib/animations.ts"}},
|
|
133
|
+
{"name": "Edit", "input": {"file_path": "/a/components/ScrollTriggers.tsx"}},
|
|
134
|
+
{"name": "Edit", "input": {"file_path": "/a/lib/scrub.ts"}},
|
|
135
|
+
])
|
|
136
|
+
signal, anchors = cup._session_signal(t, "/a")
|
|
137
|
+
frontend_anim = {"id": "frontend-anim",
|
|
138
|
+
"short_tip": "Scroll-linked animations, pinning, scrub triggers."}
|
|
139
|
+
# Overlap is distinctive and multi-token: animations, scrolltriggers, scrub.
|
|
140
|
+
assert cup._skill_fits_session(frontend_anim, signal, anchors) is True
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def test_thin_signal_is_strict(cup):
|
|
144
|
+
"""When the signal is thin (<3 tokens), skill tips are suppressed rather
|
|
145
|
+
than blindly kept. The cost of an off-topic skill reward line is high;
|
|
146
|
+
the cost of a missed skill tip is low (another turn comes along).
|
|
147
|
+
Weakness/strength tips are unaffected β they're about user behavior,
|
|
148
|
+
not an installed-skill catalog."""
|
|
149
|
+
frontend_anim = {"id": "frontend-anim", "short_tip": "Scroll animations."}
|
|
150
|
+
assert cup._skill_fits_session(frontend_anim, set()) is False
|
|
151
|
+
assert cup._skill_fits_session(frontend_anim, {"only", "two"}) is False
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def test_single_common_token_not_enough(cup):
|
|
155
|
+
"""A single overlap on a common-dev-vocab token (e.g. 'test', 'file')
|
|
156
|
+
doesn't prove relevance β those show up in nearly every session."""
|
|
157
|
+
frontend_anim = {"id": "frontend-anim",
|
|
158
|
+
"short_tip": "Scroll-linked animations test code."}
|
|
159
|
+
# Session has three tokens but the only skill overlap is 'test'
|
|
160
|
+
# (in _COMMON_DEV_VOCAB).
|
|
161
|
+
signal = {"test", "backend", "daemon"}
|
|
162
|
+
assert cup._skill_fits_session(frontend_anim, signal) is False
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def test_single_distinctive_token_is_not_enough(cup):
|
|
166
|
+
"""A single distinctive-token overlap is NOT enough. Words like
|
|
167
|
+
`scroll` / `mobile` / `ssh` are distinctive in the vocabulary sense
|
|
168
|
+
but in the real world they span unrelated projects (an asset-pipeline
|
|
169
|
+
skill and an AI-agents skill can both mention `mobile` without being
|
|
170
|
+
about the same work). The policy after the 2026-04-24 fix: require
|
|
171
|
+
β₯2 distinctive tokens, or a direct project-anchor overlap."""
|
|
172
|
+
frontend_anim = {"id": "frontend-anim",
|
|
173
|
+
"short_tip": "Scroll-linked animations."}
|
|
174
|
+
signal = {"scroll", "backend", "daemon"} # 'scroll' is distinctive
|
|
175
|
+
assert cup._skill_fits_session(frontend_anim, signal) is False
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def test_two_distinctive_tokens_clear_the_bar(cup):
|
|
179
|
+
"""Two distinctive-token overlaps pass. The previous rule accepted a
|
|
180
|
+
single distinctive token; the new rule requires two, so an accidental
|
|
181
|
+
cross-project overlap on one plumbing-ish word can no longer fire."""
|
|
182
|
+
skill = {"id": "frontend-anim",
|
|
183
|
+
"short_tip": "Scroll-linked animations and pinning."}
|
|
184
|
+
signal = {"scroll", "animations", "coach", "bin"}
|
|
185
|
+
# Overlap = {scroll, animations}; both distinctive. Count β₯ 2 β passes.
|
|
186
|
+
assert cup._skill_fits_session(skill, signal) is True
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def test_two_common_tokens_overlap_not_enough(cup):
|
|
190
|
+
"""Previously a pair of common-vocab overlaps (e.g. {markdown, test})
|
|
191
|
+
could pass the bar via the β₯2-token fallback. Under the new policy,
|
|
192
|
+
common-vocab tokens contribute nothing to the threshold β only
|
|
193
|
+
distinctive tokens count toward the β₯2 requirement. This closes a
|
|
194
|
+
back door where any skill whose description happened to share a
|
|
195
|
+
couple of generic dev words with the session would fire."""
|
|
196
|
+
skill = {"id": "update-docs", "short_tip": "Update test docs and run build."}
|
|
197
|
+
signal = {"update", "test", "build", "coach"}
|
|
198
|
+
# Overlap = {update, test, build}; all three are in _COMMON_DEV_VOCAB.
|
|
199
|
+
assert cup._skill_fits_session(skill, signal) is False
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
def test_python_daemon_session_filters_off_topic_frontend_skill(cup, tmp_path):
|
|
203
|
+
"""Reproduces the canonical failure mode: a Python daemon-debugging
|
|
204
|
+
session must NOT surface an off-topic frontend-animation skill as a tip.
|
|
205
|
+
The whole point of this filter."""
|
|
206
|
+
t = tmp_path / "sess.jsonl"
|
|
207
|
+
# User said what they were doing AND ran related commands.
|
|
208
|
+
events = [
|
|
209
|
+
# Recent user message β the strongest domain signal
|
|
210
|
+
{"type": "user", "message": {"role": "user", "content":
|
|
211
|
+
"Fire the restart cycle on the ingest daemon. Need to check the "
|
|
212
|
+
"event journal and the transaction cross-check. Tail daemon.log "
|
|
213
|
+
"over ssh to the worker host."}},
|
|
214
|
+
# Recent tool uses matching the domain
|
|
215
|
+
{"message": {"content": [{"type": "tool_use", "name": "Bash",
|
|
216
|
+
"input": {"command": "ssh worker 'tail -f daemon.log'"}}]}},
|
|
217
|
+
{"message": {"content": [{"type": "tool_use", "name": "Bash",
|
|
218
|
+
"input": {"command": "grep EVENT|TXN journal"}}]}},
|
|
219
|
+
]
|
|
220
|
+
t.write_text("\n".join(json.dumps(e) for e in events))
|
|
221
|
+
signal, anchors = cup._session_signal(t, "/projects/data-pipeline/ingest-worker")
|
|
222
|
+
frontend_anim = {
|
|
223
|
+
"id": "frontend-anim",
|
|
224
|
+
"short_tip": "Animation timelines β position parameter, nesting, playback.",
|
|
225
|
+
}
|
|
226
|
+
assert cup._skill_fits_session(frontend_anim, signal, anchors) is False
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def test_session_signal_includes_user_message_tokens(cup, tmp_path):
|
|
230
|
+
"""Regression guard β _session_signal must pull tokens from user
|
|
231
|
+
message text, not just tool_uses. Without this, a fresh session with
|
|
232
|
+
few tool calls produces a thin signal and the filter lets through
|
|
233
|
+
irrelevant skills."""
|
|
234
|
+
t = tmp_path / "sess.jsonl"
|
|
235
|
+
t.write_text(json.dumps({
|
|
236
|
+
"type": "user",
|
|
237
|
+
"message": {"role": "user", "content":
|
|
238
|
+
"Please help me debug the ingest daemon's restart "
|
|
239
|
+
"behavior β the journal isn't matching the transaction state."},
|
|
240
|
+
}) + "\n")
|
|
241
|
+
signal, _anchors = cup._session_signal(t, None)
|
|
242
|
+
assert "ingest" in signal
|
|
243
|
+
assert "daemon" in signal
|
|
244
|
+
assert "journal" in signal
|
|
245
|
+
assert "transaction" in signal
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
def test_session_signal_only_reads_tail_of_long_transcript(cup, tmp_path):
|
|
249
|
+
"""Regression guard β _session_signal must read only a bounded tail
|
|
250
|
+
of the transcript, not the whole file. Naively swapping the previous
|
|
251
|
+
fh.readlines() / tail-slice for a deque without dropping the slice
|
|
252
|
+
would crash silently (deques don't slice) and the hook's outer
|
|
253
|
+
try/except would swallow it. This test pins both correctness AND
|
|
254
|
+
tail semantics: 200 stale tool_uses up front MUST be dropped by the
|
|
255
|
+
deque, and 400 fresh tool_uses at the end MUST drive the signal."""
|
|
256
|
+
t = tmp_path / "sess.jsonl"
|
|
257
|
+
lines = []
|
|
258
|
+
# 200 stale tool_uses β these tokens must NOT appear in the signal
|
|
259
|
+
# because the deque(fh, maxlen=max_events*4=400) drops them.
|
|
260
|
+
for _ in range(200):
|
|
261
|
+
lines.append(json.dumps({
|
|
262
|
+
"type": "assistant",
|
|
263
|
+
"message": {"role": "assistant", "content": [
|
|
264
|
+
{"type": "tool_use", "name": "Bash",
|
|
265
|
+
"input": {"command": "markeralpha stale stalelong"}}
|
|
266
|
+
]},
|
|
267
|
+
}))
|
|
268
|
+
# 400 fresh tool_uses that fit exactly inside the tail window. The
|
|
269
|
+
# tool_use loop caps at max_events=100, which we'll cover via the
|
|
270
|
+
# tail's last 100 entries.
|
|
271
|
+
for _ in range(400):
|
|
272
|
+
lines.append(json.dumps({
|
|
273
|
+
"type": "assistant",
|
|
274
|
+
"message": {"role": "assistant", "content": [
|
|
275
|
+
{"type": "tool_use", "name": "Bash",
|
|
276
|
+
"input": {"command": "markerbeta fresh freshlong"}}
|
|
277
|
+
]},
|
|
278
|
+
}))
|
|
279
|
+
t.write_text("\n".join(lines) + "\n")
|
|
280
|
+
signal, _anchors = cup._session_signal(t, None)
|
|
281
|
+
assert "markerbeta" in signal, "fresh tail tokens must drive the signal"
|
|
282
|
+
assert "markeralpha" not in signal, (
|
|
283
|
+
"stale tokens beyond the deque's maxlen must NOT leak into signal"
|
|
284
|
+
)
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
def test_build_tip_pool_filters_skills_by_signal(cup):
|
|
288
|
+
profile = {
|
|
289
|
+
"entries": [],
|
|
290
|
+
"skill_hints": [
|
|
291
|
+
{"id": "frontend-anim", "short_tip": "Scroll-linked animations."},
|
|
292
|
+
{"id": "update-docs", "short_tip": "Update markdown docs and README."},
|
|
293
|
+
],
|
|
294
|
+
}
|
|
295
|
+
# Pretend we're editing Python + markdown. Signal overlaps update-docs
|
|
296
|
+
# on two distinctive tokens (`readme`, `markdown`) β passes. Frontend
|
|
297
|
+
# skill has no overlap β filtered.
|
|
298
|
+
signal = {"python3", "pytest", "coach", "bin", "readme", "markdown"}
|
|
299
|
+
pool = cup._build_tip_pool(profile, session_signal=signal)
|
|
300
|
+
skill_ids = {t["entry_id"] for t in pool if t["kind"] == "skill"}
|
|
301
|
+
assert "update-docs" in skill_ids
|
|
302
|
+
assert "frontend-anim" not in skill_ids
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
def test_build_tip_pool_unfiltered_when_signal_none(cup):
|
|
306
|
+
"""Backwards-compatible path: if no signal provided, keep all hints."""
|
|
307
|
+
profile = {
|
|
308
|
+
"entries": [],
|
|
309
|
+
"skill_hints": [
|
|
310
|
+
{"id": "frontend-anim", "short_tip": "Scroll-linked animations."},
|
|
311
|
+
],
|
|
312
|
+
}
|
|
313
|
+
pool = cup._build_tip_pool(profile, session_signal=None)
|
|
314
|
+
assert {t["entry_id"] for t in pool if t["kind"] == "skill"} == {"frontend-anim"}
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
def test_behavior_gate_filters_edits_without_testing_after_test(cup, tmp_path):
|
|
318
|
+
t = tmp_path / "sess.jsonl"
|
|
319
|
+
_write_transcript(t, [
|
|
320
|
+
{"name": "Edit", "input": {"file_path": "/p/app.py"}},
|
|
321
|
+
{"name": "Bash", "input": {"command": "pytest tests/"}},
|
|
322
|
+
])
|
|
323
|
+
evidence = cup._session_behavior_evidence(t)
|
|
324
|
+
profile = {"entries": [{
|
|
325
|
+
"id": "edits-without-testing",
|
|
326
|
+
"name": "edits without testing",
|
|
327
|
+
"tier": "active",
|
|
328
|
+
"confidence": 0.9,
|
|
329
|
+
"nudge": "Run tests after edits.",
|
|
330
|
+
}]}
|
|
331
|
+
pool = cup._build_tip_pool(profile, behavior_evidence=evidence)
|
|
332
|
+
assert "edits-without-testing" not in {tip["entry_id"] for tip in pool}
|
|
333
|
+
|
|
334
|
+
|
|
335
|
+
def test_behavior_gate_keeps_edits_without_testing_after_later_edit(cup, tmp_path):
|
|
336
|
+
t = tmp_path / "sess.jsonl"
|
|
337
|
+
_write_transcript(t, [
|
|
338
|
+
{"name": "Bash", "input": {"command": "pytest tests/"}},
|
|
339
|
+
{"name": "Edit", "input": {"file_path": "/p/app.py"}},
|
|
340
|
+
])
|
|
341
|
+
evidence = cup._session_behavior_evidence(t)
|
|
342
|
+
profile = {"entries": [{
|
|
343
|
+
"id": "edits-without-testing",
|
|
344
|
+
"name": "edits without testing",
|
|
345
|
+
"tier": "active",
|
|
346
|
+
"confidence": 0.9,
|
|
347
|
+
"nudge": "Run tests after edits.",
|
|
348
|
+
}]}
|
|
349
|
+
pool = cup._build_tip_pool(profile, behavior_evidence=evidence)
|
|
350
|
+
assert "edits-without-testing" in {tip["entry_id"] for tip in pool}
|
|
351
|
+
|
|
352
|
+
|
|
353
|
+
def test_completion_detection_uses_shared_collect_only_rule(cup, tmp_path):
|
|
354
|
+
t = tmp_path / "sess.jsonl"
|
|
355
|
+
_write_timed_transcript(t, [
|
|
356
|
+
{"name": "Bash", "input": {"command": "pytest --collect-only tests/"}},
|
|
357
|
+
{"name": "Bash", "input": {"command": "mocha"}},
|
|
358
|
+
])
|
|
359
|
+
fired_at = cup._parse_iso("2026-01-01T00:00:00+00:00")
|
|
360
|
+
assert cup._transcript_matches(t, fired_at, {"action": "test_run"}) is True
|
|
361
|
+
|
|
362
|
+
t2 = tmp_path / "collect-only.jsonl"
|
|
363
|
+
_write_timed_transcript(t2, [
|
|
364
|
+
{"name": "Bash", "input": {"command": "pytest --collect-only tests/"}},
|
|
365
|
+
])
|
|
366
|
+
assert cup._transcript_matches(t2, fired_at, {"action": "test_run"}) is False
|
|
367
|
+
|
|
368
|
+
|
|
369
|
+
def test_completion_detection_handles_dynamic_doc_write(cup, tmp_path):
|
|
370
|
+
t = tmp_path / "sess.jsonl"
|
|
371
|
+
_write_timed_transcript(t, [
|
|
372
|
+
{"name": "Edit", "input": {"file_path": "/p/README.md"}},
|
|
373
|
+
])
|
|
374
|
+
fired_at = cup._parse_iso("2026-01-01T00:00:00+00:00")
|
|
375
|
+
assert cup._transcript_matches(t, fired_at, {"action": "doc_write"}) is True
|
|
376
|
+
|
|
377
|
+
|
|
378
|
+
def test_common_dev_vocab_contains_skill_catalog_meta_words(cup):
|
|
379
|
+
"""Explicit guard: the skill-catalog meta-words added to
|
|
380
|
+
`_COMMON_DEV_VOCAB` after the meta-discussion false-positive bug
|
|
381
|
+
must stay there. Deleting any one of them would silently regress
|
|
382
|
+
the filter β a GSAP-style skill description would again match on a
|
|
383
|
+
single self-referential word like `skill` or `official` during any
|
|
384
|
+
session that happens to mention it (which is every coach-development
|
|
385
|
+
session, for obvious reasons)."""
|
|
386
|
+
for word in ("skill", "skills", "official", "api", "framework",
|
|
387
|
+
"library", "plugin", "plugins"):
|
|
388
|
+
assert word in cup._COMMON_DEV_VOCAB, (
|
|
389
|
+
f"{word!r} must stay in _COMMON_DEV_VOCAB β it's a skill-"
|
|
390
|
+
f"description meta-word that would otherwise false-positive "
|
|
391
|
+
f"every skill hint in any session mentioning it"
|
|
392
|
+
)
|
|
393
|
+
|
|
394
|
+
|
|
395
|
+
def test_genuine_frontend_work_still_fits_after_meta_vocab_expansion(cup):
|
|
396
|
+
"""Pair with `test_meta_discussion_filters_off_topic_skill`: adding
|
|
397
|
+
`skill`/`official`/`plugin` to `_COMMON_DEV_VOCAB` must NOT strand
|
|
398
|
+
legitimate frontend sessions. Id-token matches (e.g. `gsap`) and
|
|
399
|
+
distinctive domain tokens (`scroll`, `animation`, `draggable`) carry
|
|
400
|
+
the relevance signal β the meta-words were never what made the
|
|
401
|
+
filter work in the genuine-work case."""
|
|
402
|
+
# Session signal simulating actual frontend/animation work
|
|
403
|
+
signal = {"gsap", "scroll", "animation", "tween", "draggable",
|
|
404
|
+
"scrolltrigger", "tsx", "components", "pnpm"}
|
|
405
|
+
gsap_plugins = {
|
|
406
|
+
"id": "gsap-plugins",
|
|
407
|
+
"short_tip": "Official GSAP skill for GSAP plugins β registration, "
|
|
408
|
+
"ScrollToPlugin, ScrollSmoother, Flip, Draggable, "
|
|
409
|
+
"Inertia, Observer.",
|
|
410
|
+
}
|
|
411
|
+
assert cup._skill_fits_session(gsap_plugins, signal) is True
|
|
412
|
+
|
|
413
|
+
|
|
414
|
+
def test_meta_discussion_filters_off_topic_skill(cup):
|
|
415
|
+
"""Regression: a session that's a meta-discussion about Claude Code
|
|
416
|
+
itself β working on the coach, installing skills, debugging hooks β
|
|
417
|
+
must NOT fire off-topic skill hints just because the word 'skill' or
|
|
418
|
+
'official' or 'api' appears in both the session and every skill's
|
|
419
|
+
description. Those are self-referential meta-words, not domain signal.
|
|
420
|
+
|
|
421
|
+
Seen in the wild when the canonical `Official GSAP skill for X`
|
|
422
|
+
descriptions kept matching on the word `skill` during sessions where
|
|
423
|
+
the user was literally developing the coach itself (no frontend work
|
|
424
|
+
at all)."""
|
|
425
|
+
# Session is clearly about coach/hook development β no frontend work.
|
|
426
|
+
signal = {"coach", "hook", "install", "skill", "sandbox", "launchd",
|
|
427
|
+
"pytest", "claude", "settings", "python3", "bundle"}
|
|
428
|
+
|
|
429
|
+
# A canonical-shape skill description that used to false-positive on
|
|
430
|
+
# the meta-word 'skill' alone.
|
|
431
|
+
gsap_like = {
|
|
432
|
+
"id": "frontend-anim",
|
|
433
|
+
"short_tip": "Official animation skill for the core API β "
|
|
434
|
+
"scroll-linked animations, pinning, scrub.",
|
|
435
|
+
}
|
|
436
|
+
assert cup._skill_fits_session(gsap_like, signal) is False, (
|
|
437
|
+
"self-referential meta-word overlap ('skill'/'official'/'api') "
|
|
438
|
+
"must not count as distinctive"
|
|
439
|
+
)
|
|
440
|
+
|
|
441
|
+
# Sanity: a REAL overlap in the same session (e.g. install-oriented
|
|
442
|
+
# skill) still fits.
|
|
443
|
+
install_skill = {
|
|
444
|
+
"id": "install-helper",
|
|
445
|
+
"short_tip": "Install, configure, and bundle Claude Code hooks "
|
|
446
|
+
"β settings patching, launchd, sandbox testing.",
|
|
447
|
+
}
|
|
448
|
+
assert cup._skill_fits_session(install_skill, signal) is True
|
|
449
|
+
|
|
450
|
+
|
|
451
|
+
# --- widget/deploy-staging regression + project-anchor route ---------------
|
|
452
|
+
|
|
453
|
+
def test_widget_session_does_not_fire_deploy_staging(cup, tmp_path):
|
|
454
|
+
"""Regression for the cross-project plumbing-token bug: during a
|
|
455
|
+
widget (AI-agents) session doing reconciler + journal work over SSH
|
|
456
|
+
to a remote device, the coach suggested /deploy-staging β a skill
|
|
457
|
+
for a DIFFERENT project (service β an asset deployment pipeline).
|
|
458
|
+
Both projects touched a remote staging device, so the pre-fix
|
|
459
|
+
filter saw one 'distinctive' token overlap (`mobile`) and fired.
|
|
460
|
+
|
|
461
|
+
After the fix, `mobile` / `ssh` / `deploy` live in _COMMON_DEV_VOCAB
|
|
462
|
+
(cross-project plumbing, not domain evidence), and a single
|
|
463
|
+
distinctive-token overlap is no longer sufficient on its own. The
|
|
464
|
+
skill must stay filtered here."""
|
|
465
|
+
t = tmp_path / "sess.jsonl"
|
|
466
|
+
events = [
|
|
467
|
+
{"type": "user", "message": {"role": "user", "content":
|
|
468
|
+
"Let's wire the reconciler to the widget agent journal. "
|
|
469
|
+
"Tail daemon.log on the mobile over ssh and see what the "
|
|
470
|
+
"reconciler is doing with the latest transactions."}},
|
|
471
|
+
{"message": {"content": [{"type": "tool_use", "name": "Bash",
|
|
472
|
+
"input": {"command": "ssh mobile 'tail -f daemon.log'"}}]}},
|
|
473
|
+
{"message": {"content": [{"type": "tool_use", "name": "Edit",
|
|
474
|
+
"input": {"file_path":
|
|
475
|
+
"/Users/r/Desktop/dev/widget/reconciler/journal.py"}}]}},
|
|
476
|
+
]
|
|
477
|
+
t.write_text("\n".join(json.dumps(e) for e in events))
|
|
478
|
+
signal, anchors = cup._session_signal(t, "/Users/r/Desktop/dev/widget")
|
|
479
|
+
|
|
480
|
+
# Description shaped like a real cross-project skill that would
|
|
481
|
+
# share plumbing tokens with the widget session.
|
|
482
|
+
cross_project_skill = {
|
|
483
|
+
"id": "deploy-staging",
|
|
484
|
+
"short_tip": ("Iterate on a deployable artifact. Exports build "
|
|
485
|
+
"outputs from a content tool, deploys to a staging "
|
|
486
|
+
"environment, captures screenshots, and compares "
|
|
487
|
+
"against reference."),
|
|
488
|
+
}
|
|
489
|
+
assert cup._skill_fits_session(cross_project_skill, signal, anchors) is False, (
|
|
490
|
+
"deploy-staging is a service-project skill; sharing the word `mobile` "
|
|
491
|
+
"with a widget session must not be enough to fire it"
|
|
492
|
+
)
|
|
493
|
+
|
|
494
|
+
|
|
495
|
+
def test_project_anchor_shortcut_fires_on_name_match(cup, tmp_path):
|
|
496
|
+
"""A skill whose description literally names the current project dir
|
|
497
|
+
should fire easily β that's near-conclusive on-topic evidence, and
|
|
498
|
+
keeps the overall filter from being so strict it silences legitimate
|
|
499
|
+
project-scoped skills. The cwd-derived anchor token is the shortcut."""
|
|
500
|
+
t = tmp_path / "sess.jsonl"
|
|
501
|
+
events = [
|
|
502
|
+
{"message": {"content": [{"type": "tool_use", "name": "Edit",
|
|
503
|
+
"input": {"file_path": "/Users/r/Desktop/dev/widget/agent.py"}}]}},
|
|
504
|
+
{"message": {"content": [{"type": "tool_use", "name": "Bash",
|
|
505
|
+
"input": {"command": "pytest tests/"}}]}},
|
|
506
|
+
{"message": {"content": [{"type": "tool_use", "name": "Edit",
|
|
507
|
+
"input": {"file_path": "/Users/r/Desktop/dev/widget/README.md"}}]}},
|
|
508
|
+
]
|
|
509
|
+
t.write_text("\n".join(json.dumps(e) for e in events))
|
|
510
|
+
signal, anchors = cup._session_signal(t, "/Users/r/Desktop/dev/widget")
|
|
511
|
+
assert "widget" in anchors
|
|
512
|
+
|
|
513
|
+
# Skill explicitly scoped to the widget project β should fire via anchor.
|
|
514
|
+
widget_skill = {
|
|
515
|
+
"id": "widget-build",
|
|
516
|
+
"short_tip": "Perpetual architectural evolution loop for widget.",
|
|
517
|
+
}
|
|
518
|
+
assert cup._skill_fits_session(widget_skill, signal, anchors) is True
|
|
519
|
+
|
|
520
|
+
|
|
521
|
+
def test_single_plumbing_token_not_enough(cup):
|
|
522
|
+
"""Explicit guard for the cross-project-plumbing tokens added in
|
|
523
|
+
_COMMON_DEV_VOCAB (mobile, ssh, deploy, iterate, export, β¦). A skill
|
|
524
|
+
whose only overlap with the session is one of these tokens must not
|
|
525
|
+
fire, because these words span many unrelated projects."""
|
|
526
|
+
signal = {"mobile", "agent", "reconciler", "journal", "widget"}
|
|
527
|
+
skill = {
|
|
528
|
+
"id": "deploy-staging",
|
|
529
|
+
"short_tip": "Deploys to a staging environment and iterates on build outputs.",
|
|
530
|
+
}
|
|
531
|
+
# Overlap = {mobile} (all others in skill are in vocab).
|
|
532
|
+
# No distinctive tokens β False.
|
|
533
|
+
assert cup._skill_fits_session(skill, signal) is False
|
|
534
|
+
|
|
535
|
+
|
|
536
|
+
def test_session_signal_anchors_are_last_cwd_component(cup):
|
|
537
|
+
"""The anchor set must be the tokens inside the last cwd path
|
|
538
|
+
component only β not the whole path. This keeps the anchor route
|
|
539
|
+
from matching on ancestor-dir tokens like `desktop` / `dev`, which
|
|
540
|
+
would span every project under that parent."""
|
|
541
|
+
_signal, anchors = cup._session_signal(None, "/Users/r/Desktop/dev/widget")
|
|
542
|
+
assert "widget" in anchors
|
|
543
|
+
# Ancestor components must NOT leak into anchors.
|
|
544
|
+
assert "desktop" not in anchors
|
|
545
|
+
assert "dev" not in anchors
|
|
546
|
+
|
|
547
|
+
|
|
548
|
+
# --- project-scoped gate (SKILL.md frontmatter `projects:` field) -----------
|
|
549
|
+
|
|
550
|
+
def test_scoped_skill_fires_in_matching_project(cup, tmp_path):
|
|
551
|
+
"""A skill declaring ``projects: [widget]`` should fire when the cwd
|
|
552
|
+
anchor resolves to `widget`, as long as there's *some* topic overlap
|
|
553
|
+
with the session β declaring project scope doesn't make the skill
|
|
554
|
+
fire on every turn inside that project."""
|
|
555
|
+
t = tmp_path / "sess.jsonl"
|
|
556
|
+
t.write_text(json.dumps({
|
|
557
|
+
"type": "user",
|
|
558
|
+
"message": {"role": "user", "content":
|
|
559
|
+
"Let's run the evolution loop and swarm some research agents "
|
|
560
|
+
"across the reference codebases."},
|
|
561
|
+
}) + "\n")
|
|
562
|
+
signal, anchors = cup._session_signal(t, "/Users/r/Desktop/dev/widget")
|
|
563
|
+
skill = {
|
|
564
|
+
"id": "widget-build",
|
|
565
|
+
"short_tip": "Perpetual architectural evolution loop. Swarms research "
|
|
566
|
+
"agents across reference codebases and debates findings.",
|
|
567
|
+
"projects": ["widget"],
|
|
568
|
+
}
|
|
569
|
+
assert cup._skill_fits_session(skill, signal, anchors) is True
|
|
570
|
+
|
|
571
|
+
|
|
572
|
+
def test_scoped_skill_filtered_in_non_matching_project(cup, tmp_path):
|
|
573
|
+
"""The whole point: a skill declared for `service` must not fire in
|
|
574
|
+
`widget`, even if the session happens to share some plumbing tokens
|
|
575
|
+
with the skill's description. Hard filter, no token-math override."""
|
|
576
|
+
t = tmp_path / "sess.jsonl"
|
|
577
|
+
events = [
|
|
578
|
+
{"type": "user", "message": {"role": "user", "content":
|
|
579
|
+
"Ship the reconciler over ssh to the mobile, tail daemon.log, "
|
|
580
|
+
"compare journal events against the transaction feed."}},
|
|
581
|
+
]
|
|
582
|
+
t.write_text("\n".join(json.dumps(e) for e in events))
|
|
583
|
+
signal, anchors = cup._session_signal(t, "/Users/r/Desktop/dev/widget")
|
|
584
|
+
scoped_skill = {
|
|
585
|
+
"id": "deploy-staging",
|
|
586
|
+
"short_tip": "Iterate on a deployable artifact. Exports build outputs "
|
|
587
|
+
"from a content tool, deploys to a staging environment, "
|
|
588
|
+
"captures screenshots, compares references.",
|
|
589
|
+
"projects": ["service"],
|
|
590
|
+
}
|
|
591
|
+
assert cup._skill_fits_session(scoped_skill, signal, anchors) is False
|
|
592
|
+
|
|
593
|
+
|
|
594
|
+
def test_scoped_skill_filtered_when_cwd_unknown(cup):
|
|
595
|
+
"""Conservative behavior: if the hook has no anchor (fresh session,
|
|
596
|
+
no cwd in payload, etc.) a project-scoped skill cannot be evaluated
|
|
597
|
+
safely. Default to SKIP β this matches the coach's stated principle
|
|
598
|
+
('default to SKIP when uncertain')."""
|
|
599
|
+
signal = {"some", "session", "tokens", "here"}
|
|
600
|
+
scoped = {
|
|
601
|
+
"id": "deploy-staging",
|
|
602
|
+
"short_tip": "Asset pipeline work.",
|
|
603
|
+
"projects": ["service"],
|
|
604
|
+
}
|
|
605
|
+
assert cup._skill_fits_session(scoped, signal, frozenset()) is False
|
|
606
|
+
|
|
607
|
+
|
|
608
|
+
def test_scoped_skill_handles_compound_project_name(cup, tmp_path):
|
|
609
|
+
"""`projects: [acme-cli]` tokenizes to {acme, cli}.
|
|
610
|
+
A cwd of `~/Desktop/dev/acme-app` anchors to {acme, app}.
|
|
611
|
+
The intersection is {acme} β the skill is considered in-project.
|
|
612
|
+
This is the aliasing pathway for repos that share a prefix."""
|
|
613
|
+
t = tmp_path / "sess.jsonl"
|
|
614
|
+
t.write_text(json.dumps({
|
|
615
|
+
"type": "user",
|
|
616
|
+
"message": {"role": "user", "content":
|
|
617
|
+
"Migrate the recon module from cli to app's src-tauri."},
|
|
618
|
+
}) + "\n")
|
|
619
|
+
signal, anchors = cup._session_signal(
|
|
620
|
+
t, "/Users/r/Desktop/dev/acme-app")
|
|
621
|
+
skill = {
|
|
622
|
+
"id": "cli-migrate",
|
|
623
|
+
"short_tip": "Migrate acme-cli Python modules to Rust in "
|
|
624
|
+
"acme-app's src-tauri.",
|
|
625
|
+
"projects": ["acme-cli", "acme-app"],
|
|
626
|
+
}
|
|
627
|
+
assert cup._skill_fits_session(skill, signal, anchors) is True
|
|
628
|
+
|
|
629
|
+
|
|
630
|
+
def test_scoped_skill_still_needs_topic_overlap(cup, tmp_path):
|
|
631
|
+
"""In-project scope is necessary but not sufficient. If the session
|
|
632
|
+
in a `widget` cwd is doing something totally unrelated to the
|
|
633
|
+
skill's topic (e.g. editing the README while the skill is about
|
|
634
|
+
evolution loops), the skill should not fire."""
|
|
635
|
+
t = tmp_path / "sess.jsonl"
|
|
636
|
+
events = [
|
|
637
|
+
{"message": {"content": [{"type": "tool_use", "name": "Edit",
|
|
638
|
+
"input": {"file_path":
|
|
639
|
+
"/Users/r/Desktop/dev/widget/LICENSE"}}]}},
|
|
640
|
+
{"message": {"content": [{"type": "tool_use", "name": "Bash",
|
|
641
|
+
"input": {"command": "git status"}}]}},
|
|
642
|
+
]
|
|
643
|
+
t.write_text("\n".join(json.dumps(e) for e in events))
|
|
644
|
+
signal, anchors = cup._session_signal(t, "/Users/r/Desktop/dev/widget")
|
|
645
|
+
skill = {
|
|
646
|
+
"id": "widget-build",
|
|
647
|
+
"short_tip": "Perpetual architectural evolution loop swarming agents "
|
|
648
|
+
"across reference codebases.",
|
|
649
|
+
"projects": ["widget"],
|
|
650
|
+
}
|
|
651
|
+
# No overlap between session tokens and the evolve/swarm/reference
|
|
652
|
+
# vocabulary β skipped despite being in-project.
|
|
653
|
+
assert cup._skill_fits_session(skill, signal, anchors) is False
|
|
654
|
+
|
|
655
|
+
|
|
656
|
+
def test_untagged_skill_still_uses_prior_overlap_rules(cup):
|
|
657
|
+
"""Adding the project-scoped gate must not change behavior for skills
|
|
658
|
+
that don't declare `projects`. The existing β₯2-distinctive rule
|
|
659
|
+
continues to apply β proving the gate is additive, not replacement."""
|
|
660
|
+
skill_untagged = {
|
|
661
|
+
"id": "frontend-anim",
|
|
662
|
+
"short_tip": "Scroll-linked animations and pinning.",
|
|
663
|
+
# deliberately no `projects` field
|
|
664
|
+
}
|
|
665
|
+
signal = {"scroll", "animations", "hero", "pnpm"}
|
|
666
|
+
assert cup._skill_fits_session(skill_untagged, signal) is True
|
|
667
|
+
|
|
668
|
+
skill_untagged_single_hit = {
|
|
669
|
+
"id": "frontend-anim",
|
|
670
|
+
"short_tip": "Scroll-linked animations.",
|
|
671
|
+
}
|
|
672
|
+
# Single-distinctive-token (scroll) remains insufficient (the
|
|
673
|
+
# 2026-04-24 tightening), regardless of the new project gate.
|
|
674
|
+
assert cup._skill_fits_session(
|
|
675
|
+
skill_untagged_single_hit, {"scroll", "backend", "daemon"}) is False
|
|
676
|
+
|
|
677
|
+
|
|
678
|
+
# --- git-root anchor walk --------------------------------------------------
|
|
679
|
+
|
|
680
|
+
def test_git_root_anchor_from_subdirectory_cwd(cup, tmp_path):
|
|
681
|
+
"""Regression guard for the subdirectory-cwd case: if the user is
|
|
682
|
+
working inside a monorepo package (e.g.
|
|
683
|
+
``~/Desktop/dev/widget/packages/core``), the last-component anchor
|
|
684
|
+
becomes {core, packages} and misses the project name. The git-root
|
|
685
|
+
walk recovers it by finding the nearest ``.git/`` ancestor."""
|
|
686
|
+
repo = tmp_path / "widget"
|
|
687
|
+
subdir = repo / "packages" / "core"
|
|
688
|
+
subdir.mkdir(parents=True)
|
|
689
|
+
(repo / ".git").mkdir() # fake git root
|
|
690
|
+
_signal, anchors = cup._session_signal(None, str(subdir))
|
|
691
|
+
assert "widget" in anchors, (
|
|
692
|
+
"git-root walk must recover the project name when cwd is a "
|
|
693
|
+
"subdirectory deeper than parts[-1]"
|
|
694
|
+
)
|
|
695
|
+
|
|
696
|
+
|
|
697
|
+
def test_git_root_anchor_returns_none_outside_any_repo(cup, tmp_path):
|
|
698
|
+
"""If there's no `.git/` anywhere in the ancestor chain, the walk
|
|
699
|
+
returns None silently and anchors fall back to the last-component
|
|
700
|
+
tokenization only. Must not crash."""
|
|
701
|
+
loose = tmp_path / "random" / "nested" / "dirs"
|
|
702
|
+
loose.mkdir(parents=True)
|
|
703
|
+
# Lock in BOTH the function-level contract (returns None) AND the
|
|
704
|
+
# signal-level effect (only last-component tokens contribute) β
|
|
705
|
+
# the prior version of this test only checked the latter, so a
|
|
706
|
+
# bug returning some other ancestor would still pass.
|
|
707
|
+
assert cup._find_git_root_name(str(loose)) is None
|
|
708
|
+
_signal, anchors = cup._session_signal(None, str(loose))
|
|
709
|
+
assert "dirs" in anchors
|
|
710
|
+
|
|
711
|
+
|
|
712
|
+
def test_git_root_walk_skips_home_repo(cup, tmp_path, monkeypatch):
|
|
713
|
+
"""Regression for review-finding #4 (2026-04-24): if $HOME itself
|
|
714
|
+
is a git repo (e.g., a dotfiles checkout), the walk would
|
|
715
|
+
previously anchor every non-nested-repo cwd to the username,
|
|
716
|
+
which then false-positives past the project filter for any skill
|
|
717
|
+
description containing that token.
|
|
718
|
+
|
|
719
|
+
Fix: a `.git` found exactly at $HOME is ignored; the walk
|
|
720
|
+
continues past it. With no DIFFERENT ancestor repo, the function
|
|
721
|
+
returns None β matching the no-repo case."""
|
|
722
|
+
fake_home = tmp_path / "home"
|
|
723
|
+
fake_home.mkdir()
|
|
724
|
+
(fake_home / ".git").mkdir() # home IS a git repo
|
|
725
|
+
inner = fake_home / "Desktop" / "scratch"
|
|
726
|
+
inner.mkdir(parents=True)
|
|
727
|
+
monkeypatch.setattr(cup.Path, "home", classmethod(lambda cls: fake_home))
|
|
728
|
+
|
|
729
|
+
# The home `.git` must be skipped β no nested repo exists, so
|
|
730
|
+
# the walk hits filesystem root and returns None.
|
|
731
|
+
assert cup._find_git_root_name(str(inner)) is None
|
|
732
|
+
|
|
733
|
+
|
|
734
|
+
def test_git_root_walk_finds_nested_repo_even_with_home_repo(
|
|
735
|
+
cup, tmp_path, monkeypatch):
|
|
736
|
+
"""Pair with the home-skip test: when a nested repo DOES exist
|
|
737
|
+
inside the home-rooted repo, it should still be found. The
|
|
738
|
+
home-skip rule must not block legitimate inner-repo detection."""
|
|
739
|
+
fake_home = tmp_path / "home"
|
|
740
|
+
fake_home.mkdir()
|
|
741
|
+
(fake_home / ".git").mkdir() # home IS a git repo
|
|
742
|
+
project = fake_home / "Desktop" / "dev" / "widget"
|
|
743
|
+
project.mkdir(parents=True)
|
|
744
|
+
(project / ".git").mkdir() # nested repo exists
|
|
745
|
+
inner = project / "packages" / "core"
|
|
746
|
+
inner.mkdir(parents=True)
|
|
747
|
+
monkeypatch.setattr(cup.Path, "home", classmethod(lambda cls: fake_home))
|
|
748
|
+
|
|
749
|
+
# Walk must find the widget repo before reaching the home-skip case.
|
|
750
|
+
assert cup._find_git_root_name(str(inner)) == "widget"
|
|
751
|
+
|
|
752
|
+
|
|
753
|
+
# --- COACH_ALL_SKILLS escape hatch -----------------------------------------
|
|
754
|
+
|
|
755
|
+
def test_coach_all_skills_env_bypasses_filter(cup, monkeypatch):
|
|
756
|
+
"""Setting COACH_ALL_SKILLS=1 must disable skill filtering in
|
|
757
|
+
_build_tip_pool so all hints become eligible. Insurance escape
|
|
758
|
+
hatch for false-filter misfires in the wild."""
|
|
759
|
+
monkeypatch.setenv("COACH_ALL_SKILLS", "1")
|
|
760
|
+
profile = {
|
|
761
|
+
"entries": [],
|
|
762
|
+
"skill_hints": [
|
|
763
|
+
# Project-scoped to service, but should still fire under bypass.
|
|
764
|
+
{"id": "deploy-staging",
|
|
765
|
+
"short_tip": "Asset pipeline work.",
|
|
766
|
+
"projects": ["service"]},
|
|
767
|
+
# Would also be filtered for thin overlap under normal rules.
|
|
768
|
+
{"id": "frontend-anim",
|
|
769
|
+
"short_tip": "Scroll-linked animations."},
|
|
770
|
+
],
|
|
771
|
+
}
|
|
772
|
+
# A cwd that anchors to widget β deploy-staging would normally be
|
|
773
|
+
# hard-filtered by the scoped gate.
|
|
774
|
+
signal = {"reconciler", "agent", "journal"}
|
|
775
|
+
anchors = {"widget"}
|
|
776
|
+
pool = cup._build_tip_pool(
|
|
777
|
+
profile, session_signal=signal, project_anchors=anchors)
|
|
778
|
+
skill_ids = {t["entry_id"] for t in pool if t["kind"] == "skill"}
|
|
779
|
+
assert "deploy-staging" in skill_ids
|
|
780
|
+
assert "frontend-anim" in skill_ids
|
|
781
|
+
|
|
782
|
+
|
|
783
|
+
def test_coach_all_skills_off_still_filters(cup, monkeypatch):
|
|
784
|
+
"""Sanity counterpart: without the env var, the scoped gate is
|
|
785
|
+
active. Same profile + same signal = deploy-staging filtered out."""
|
|
786
|
+
monkeypatch.delenv("COACH_ALL_SKILLS", raising=False)
|
|
787
|
+
profile = {
|
|
788
|
+
"entries": [],
|
|
789
|
+
"skill_hints": [
|
|
790
|
+
{"id": "deploy-staging",
|
|
791
|
+
"short_tip": "Asset pipeline work.",
|
|
792
|
+
"projects": ["service"]},
|
|
793
|
+
],
|
|
794
|
+
}
|
|
795
|
+
signal = {"reconciler", "agent", "journal"}
|
|
796
|
+
anchors = {"widget"}
|
|
797
|
+
pool = cup._build_tip_pool(
|
|
798
|
+
profile, session_signal=signal, project_anchors=anchors)
|
|
799
|
+
skill_ids = {t["entry_id"] for t in pool if t["kind"] == "skill"}
|
|
800
|
+
assert "deploy-staging" not in skill_ids
|
|
801
|
+
|
|
802
|
+
|
|
803
|
+
# --- branding regression: Coach Claw persona + reward marker ----------------
|
|
804
|
+
# The coach surfaces user-facing strings that drift if labels or banner
|
|
805
|
+
# emoji change accidentally. These tests pin the contract so a typo or
|
|
806
|
+
# refactor can't silently revert the rebrand.
|
|
807
|
+
|
|
808
|
+
def test_label_pools_have_no_screwdriver(cup):
|
|
809
|
+
assert not any("πͺ" in label for label in cup.WEAKNESS_LABELS)
|
|
810
|
+
assert not any("πͺ" in label for label in cup.SKILL_LABELS)
|
|
811
|
+
assert not any("πͺ" in label for label in cup.STRENGTH_LABELS)
|
|
812
|
+
|
|
813
|
+
|
|
814
|
+
def test_skill_labels_use_coach_claw(cup):
|
|
815
|
+
assert "*π¦ From Coach Claw:*" in cup.SKILL_LABELS
|
|
816
|
+
assert "*π¦ Coach:*" in cup.SKILL_LABELS
|
|
817
|
+
assert not any("From your coach" in label for label in cup.SKILL_LABELS)
|
|
818
|
+
# π§ was the old direction-flavored Coach variant β replaced by π¦ so
|
|
819
|
+
# every emoji-decorated SKILL label carries the Coach Claw persona.
|
|
820
|
+
assert not any("π§" in label for label in cup.SKILL_LABELS)
|
|
821
|
+
|
|
822
|
+
|
|
823
|
+
def test_xp_attribution_uses_arrow_marker(cup):
|
|
824
|
+
skill_lines = cup._xp_attribution(
|
|
825
|
+
{"kind": "skill", "entry_id": "deploy-to-vercel", "clean_streak_runs": 0}
|
|
826
|
+
)
|
|
827
|
+
assert isinstance(skill_lines, list) and len(skill_lines) == 1
|
|
828
|
+
assert skill_lines[0].startswith("_β +")
|
|
829
|
+
assert "XP" not in skill_lines[0]
|
|
830
|
+
assert "β¨" not in skill_lines[0]
|
|
831
|
+
|
|
832
|
+
weakness_lines = cup._xp_attribution({
|
|
833
|
+
"kind": "weakness",
|
|
834
|
+
"entry_id": "edits-without-testing",
|
|
835
|
+
"clean_streak": 2,
|
|
836
|
+
"reward_hint": {"action": "test_run", "xp": 2, "description": "test run"},
|
|
837
|
+
})
|
|
838
|
+
# Streak bar must live on its own line so it never wraps inside the
|
|
839
|
+
# per-action reward sentence.
|
|
840
|
+
assert len(weakness_lines) == 2
|
|
841
|
+
assert weakness_lines[0].startswith("_β +2 per test run")
|
|
842
|
+
assert "XP" not in weakness_lines[0]
|
|
843
|
+
assert "π₯" not in weakness_lines[0]
|
|
844
|
+
assert weakness_lines[1].startswith("_π‘οΈ Warming up")
|
|
845
|
+
assert "2/5" in weakness_lines[1]
|
|
846
|
+
assert all("β¨" not in line for line in weakness_lines)
|
|
847
|
+
|
|
848
|
+
|
|
849
|
+
@pytest.mark.parametrize(
|
|
850
|
+
("streak", "expected"),
|
|
851
|
+
[
|
|
852
|
+
(2, "_π‘οΈ Warming up π΄π΄βͺβͺβͺ 2/5 β +5 bonus at 5/5._"),
|
|
853
|
+
(3, "_πΆοΈ Heating up π΄π΄π΄βͺβͺ 3/5 β +5 bonus at 5/5._"),
|
|
854
|
+
(4, "_π₯ Streak π΄π΄π΄π΄βͺ 4/5 β +5 bonus at 5/5._"),
|
|
855
|
+
(5, "_π Mastered π΄π΄π΄π΄π΄ 5/5 β +5 bonus ready._"),
|
|
856
|
+
],
|
|
857
|
+
)
|
|
858
|
+
def test_weakness_streak_stage_ladder(cup, streak, expected):
|
|
859
|
+
lines = cup._xp_attribution({"kind": "weakness", "clean_streak": streak})
|
|
860
|
+
assert lines == [expected]
|
|
861
|
+
|
|
862
|
+
|
|
863
|
+
@pytest.mark.parametrize(
|
|
864
|
+
("streak", "expected"),
|
|
865
|
+
[
|
|
866
|
+
(2, "_π‘οΈ Warming up π΄π΄βͺβͺβͺ 2/5 β +5 mastery bonus at 5/5._"),
|
|
867
|
+
(3, "_πΆοΈ Heating up π΄π΄π΄βͺβͺ 3/5 β +5 mastery bonus at 5/5._"),
|
|
868
|
+
(4, "_π₯ Streak π΄π΄π΄π΄βͺ 4/5 β +5 mastery bonus at 5/5._"),
|
|
869
|
+
(5, "_π Mastered π΄π΄π΄π΄π΄ 5/5 β +5 mastery bonus ready._"),
|
|
870
|
+
],
|
|
871
|
+
)
|
|
872
|
+
def test_strength_streak_stage_ladder(cup, streak, expected):
|
|
873
|
+
lines = cup._xp_attribution({"kind": "strength", "positive_streak": streak})
|
|
874
|
+
assert lines == [expected]
|
|
875
|
+
|
|
876
|
+
|
|
877
|
+
def test_strength_tips_have_distinct_runtime_treatment(cup):
|
|
878
|
+
assert "*Strength:*" in cup.STRENGTH_LABELS
|
|
879
|
+
assert not set(cup.STRENGTH_LABELS) & set(cup.WEAKNESS_LABELS)
|
|
880
|
+
|
|
881
|
+
lines = cup._xp_attribution({
|
|
882
|
+
"kind": "strength",
|
|
883
|
+
"entry_id": "tests-after-edits",
|
|
884
|
+
"clean_streak": 0,
|
|
885
|
+
"positive_streak": 3,
|
|
886
|
+
"reward_hint": {"action": "test_run", "xp": 2, "description": "test run"},
|
|
887
|
+
})
|
|
888
|
+
assert len(lines) == 2
|
|
889
|
+
assert lines[0].startswith("_β +2 per test run")
|
|
890
|
+
assert "XP" not in lines[0]
|
|
891
|
+
assert "π₯" not in lines[0]
|
|
892
|
+
assert "πΆοΈ Heating up" in lines[1]
|
|
893
|
+
assert "3/5" in lines[1]
|
|
894
|
+
|
|
895
|
+
spec = cup._completion_spec({
|
|
896
|
+
"kind": "strength",
|
|
897
|
+
"entry_id": "tests-after-edits",
|
|
898
|
+
"reward_hint": {"action": "test_run", "xp": 2, "description": "test run"},
|
|
899
|
+
})
|
|
900
|
+
assert spec == {"action": "test_run", "xp": 2, "description": "test run"}
|
|
901
|
+
|
|
902
|
+
|
|
903
|
+
def test_strength_completion_banner_reinforces_instead_of_clearing(cup):
|
|
904
|
+
block = cup._completion_banner([
|
|
905
|
+
("entry:tests-after-edits", {
|
|
906
|
+
"kind": "strength",
|
|
907
|
+
"entry_id": "tests-after-edits",
|
|
908
|
+
"positive_streak": 2,
|
|
909
|
+
"spec": {"action": "test_run", "xp": 2, "description": "test run"},
|
|
910
|
+
})
|
|
911
|
+
])
|
|
912
|
+
assert "> πͺ Strength reinforced β test runner detected" in block
|
|
913
|
+
assert "> +2 XP Β· tests-after-edits strength streak π΄π΄βͺβͺβͺ" in block
|
|
914
|
+
assert "advances on next /coach-insights run" not in block
|
|
915
|
+
|
|
916
|
+
|
|
917
|
+
def test_strength_session_cap_helpers(cup):
|
|
918
|
+
state = {}
|
|
919
|
+
now = cup.datetime(2026, 1, 1, tzinfo=cup.timezone.utc)
|
|
920
|
+
|
|
921
|
+
assert cup._session_strength_already_fired(state, "s1") is False
|
|
922
|
+
cup._mark_strength_fired(state, "s1", now)
|
|
923
|
+
assert cup._session_strength_already_fired(state, "s1") is True
|
|
924
|
+
|
|
925
|
+
|
|
926
|
+
def test_tip_log_records_redacted_bounded_events(cup, tmp_path, monkeypatch):
|
|
927
|
+
log_path = tmp_path / "log.ndjson"
|
|
928
|
+
monkeypatch.setattr(cup, "LOG_PATH", log_path)
|
|
929
|
+
monkeypatch.setattr(cup, "LOG_MAX_LINES", 2)
|
|
930
|
+
now = cup.datetime(2026, 1, 1, tzinfo=cup.timezone.utc)
|
|
931
|
+
|
|
932
|
+
cup._log_tip_fired(
|
|
933
|
+
{
|
|
934
|
+
"id": "entry:edits-without-testing",
|
|
935
|
+
"entry_id": "edits-without-testing",
|
|
936
|
+
"kind": "weakness",
|
|
937
|
+
"tier": "active",
|
|
938
|
+
"nudge": "raw nudge text should never be logged",
|
|
939
|
+
"example": "raw transcript example should never be logged",
|
|
940
|
+
},
|
|
941
|
+
{"action": "test_run", "xp": 2, "description": "pytest tests/unit"},
|
|
942
|
+
now,
|
|
943
|
+
)
|
|
944
|
+
cup._log_tip_completed(
|
|
945
|
+
"entry:edits-without-testing",
|
|
946
|
+
{
|
|
947
|
+
"entry_id": "edits-without-testing",
|
|
948
|
+
"kind": "weakness",
|
|
949
|
+
"spec": {"action": "test_run", "xp": 2, "description": "pytest tests/unit"},
|
|
950
|
+
},
|
|
951
|
+
now,
|
|
952
|
+
)
|
|
953
|
+
cup._log_tip_fired(
|
|
954
|
+
{
|
|
955
|
+
"id": "skill:update-docs",
|
|
956
|
+
"entry_id": "update-docs",
|
|
957
|
+
"kind": "skill",
|
|
958
|
+
"tier": "hint",
|
|
959
|
+
},
|
|
960
|
+
{"action": "skill_invoke", "skill_id": "update-docs"},
|
|
961
|
+
now,
|
|
962
|
+
)
|
|
963
|
+
|
|
964
|
+
lines = log_path.read_text().splitlines()
|
|
965
|
+
assert len(lines) == 2
|
|
966
|
+
records = [json.loads(line) for line in lines]
|
|
967
|
+
assert [record["event"] for record in records] == ["tip_completed", "tip_fired"]
|
|
968
|
+
assert records[0]["action"] == "test_run"
|
|
969
|
+
assert records[0]["xp"] == 2
|
|
970
|
+
assert records[1]["skill_id"] == "update-docs"
|
|
971
|
+
|
|
972
|
+
raw = log_path.read_text()
|
|
973
|
+
assert "raw nudge" not in raw
|
|
974
|
+
assert "raw transcript" not in raw
|
|
975
|
+
assert "pytest tests/unit" not in raw
|
|
976
|
+
|
|
977
|
+
|
|
978
|
+
def test_celebration_banners_use_canonical_glyphs(cup):
|
|
979
|
+
# Streak rewards: directional arrows (positiveββ, negativeββ), no β¨ leakage.
|
|
980
|
+
streak_neg = cup._streak_reward_block(
|
|
981
|
+
[{"id": "x", "name": "x", "streak": 2, "target": 5,
|
|
982
|
+
"xp_awarded": 1, "direction": "negative"}]
|
|
983
|
+
)
|
|
984
|
+
assert "β" in streak_neg and "β¨" not in streak_neg
|
|
985
|
+
streak_pos = cup._streak_reward_block(
|
|
986
|
+
[{"id": "y", "name": "y", "streak": 2, "target": 5,
|
|
987
|
+
"xp_awarded": 1, "direction": "positive"}]
|
|
988
|
+
)
|
|
989
|
+
assert "β" in streak_pos and "β¨" not in streak_pos
|
|
990
|
+
|
|
991
|
+
# Graduations: πβ‘οΈ (negative) / ππ (positive) ceremonial pair preserved.
|
|
992
|
+
grad = cup._graduation_block(
|
|
993
|
+
[{"id": "x", "name": "x", "direction": "negative",
|
|
994
|
+
"graduated_reason": "5 clean runs"}]
|
|
995
|
+
)
|
|
996
|
+
assert "πβ‘οΈ" in grad and "πβ¨" not in grad
|