@rm0nroe/coach-claw 1.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +311 -0
- package/coach/README.md +99 -0
- package/coach/bin/aggregate_facets.py +274 -0
- package/coach/bin/analyze.py +678 -0
- package/coach/bin/bank.py +247 -0
- package/coach/bin/banner_themes.py +645 -0
- package/coach/bin/coach_paths.py +33 -0
- package/coach/bin/coexistence_check.py +129 -0
- package/coach/bin/configure.py +245 -0
- package/coach/bin/cron_check.py +81 -0
- package/coach/bin/default_statusline.py +135 -0
- package/coach/bin/doctor.py +663 -0
- package/coach/bin/insights-llm.sh +264 -0
- package/coach/bin/insights.sh +163 -0
- package/coach/bin/insights_window.py +111 -0
- package/coach/bin/marker_io.py +154 -0
- package/coach/bin/merge.py +671 -0
- package/coach/bin/redact.py +86 -0
- package/coach/bin/render_env.py +148 -0
- package/coach/bin/reward_hints.py +87 -0
- package/coach/bin/run-insights.sh +20 -0
- package/coach/bin/run_with_lock.py +85 -0
- package/coach/bin/scoring.py +260 -0
- package/coach/bin/skill_inventory.py +215 -0
- package/coach/bin/stats.py +459 -0
- package/coach/bin/status.py +293 -0
- package/coach/bin/statusline_self_patch.py +205 -0
- package/coach/bin/statusline_variants.py +146 -0
- package/coach/bin/statusline_wrap.py +244 -0
- package/coach/bin/statusline_wrap_action.py +460 -0
- package/coach/bin/switch_to_plugin.py +256 -0
- package/coach/bin/themes.py +256 -0
- package/coach/bin/user_config.py +176 -0
- package/coach/bin/xp_accounting.py +98 -0
- package/coach/changelog.md +4 -0
- package/coach/default-statusline-command.sh +19 -0
- package/coach/default-statusline-wrap-command.sh +15 -0
- package/coach/profile.yaml +37 -0
- package/coach/tests/conftest.py +13 -0
- package/coach/tests/test_aggregate_facets.py +379 -0
- package/coach/tests/test_analyze_aggregate.py +153 -0
- package/coach/tests/test_analyze_redaction.py +105 -0
- package/coach/tests/test_analyze_strengths.py +165 -0
- package/coach/tests/test_bank_atomic_write.py +61 -0
- package/coach/tests/test_bank_concurrency.py +126 -0
- package/coach/tests/test_banner_themes.py +981 -0
- package/coach/tests/test_celebrate_dedup.py +409 -0
- package/coach/tests/test_coach_paths.py +50 -0
- package/coach/tests/test_coexistence_check.py +128 -0
- package/coach/tests/test_configure.py +258 -0
- package/coach/tests/test_cron_check.py +118 -0
- package/coach/tests/test_cron_nudge_hook.py +134 -0
- package/coach/tests/test_detection_parity.py +105 -0
- package/coach/tests/test_doctor.py +595 -0
- package/coach/tests/test_hook_bespoke_dispatch.py +288 -0
- package/coach/tests/test_hook_module_resolution.py +116 -0
- package/coach/tests/test_hook_relevance.py +996 -0
- package/coach/tests/test_hook_render_env.py +364 -0
- package/coach/tests/test_hook_session_id_guard.py +160 -0
- package/coach/tests/test_insights_llm.py +759 -0
- package/coach/tests/test_insights_llm_venv_path.py +109 -0
- package/coach/tests/test_insights_window.py +237 -0
- package/coach/tests/test_install.py +1150 -0
- package/coach/tests/test_install_pyyaml_fallback.py +142 -0
- package/coach/tests/test_marker_consumption.py +167 -0
- package/coach/tests/test_marker_writer_locking.py +305 -0
- package/coach/tests/test_merge.py +413 -0
- package/coach/tests/test_no_broken_mktemp.py +90 -0
- package/coach/tests/test_render_env.py +137 -0
- package/coach/tests/test_render_env_glyphs.py +119 -0
- package/coach/tests/test_reward_hints.py +59 -0
- package/coach/tests/test_scoring.py +147 -0
- package/coach/tests/test_session_start_weekly_trigger.py +92 -0
- package/coach/tests/test_skill_inventory.py +368 -0
- package/coach/tests/test_stats_hybrid.py +142 -0
- package/coach/tests/test_status_accounting.py +41 -0
- package/coach/tests/test_statusline_failsafe.py +70 -0
- package/coach/tests/test_statusline_self_patch.py +261 -0
- package/coach/tests/test_statusline_variants.py +110 -0
- package/coach/tests/test_statusline_wrap.py +196 -0
- package/coach/tests/test_statusline_wrap_action.py +408 -0
- package/coach/tests/test_switch_to_plugin.py +360 -0
- package/coach/tests/test_themes.py +104 -0
- package/coach/tests/test_user_config.py +160 -0
- package/coach/tests/test_wrap_announce_hook.py +130 -0
- package/coach/tests/test_xp_accounting.py +55 -0
- package/hooks/coach-session-start.py +536 -0
- package/hooks/coach-user-prompt.py +2288 -0
- package/install-launchd.sh +102 -0
- package/install.sh +597 -0
- package/launchd/com.local.claude-coach.plist.template +34 -0
- package/launchd/run-insights.sh +20 -0
- package/npm/coach-claw.js +259 -0
- package/package.json +52 -0
- package/requirements.txt +11 -0
- package/settings-snippet.json +31 -0
- package/skills/coach/SKILL.md +107 -0
- package/skills/coach-insights/SKILL.md +78 -0
- package/skills/config/SKILL.md +149 -0
|
@@ -0,0 +1,368 @@
|
|
|
1
|
+
"""skill_inventory.py: SKILL.md frontmatter parsing + `projects` field
|
|
2
|
+
coercion.
|
|
3
|
+
|
|
4
|
+
Covers the path that tags skills with their declared project scope.
|
|
5
|
+
The hook consumes this list and uses it as a hard filter, so misreads
|
|
6
|
+
here turn into silently-wrong filter decisions at runtime."""
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import contextlib
|
|
10
|
+
import io
|
|
11
|
+
import json
|
|
12
|
+
import sys
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
|
|
15
|
+
import pytest
|
|
16
|
+
|
|
17
|
+
import skill_inventory as si # provided by coach/tests/conftest.py
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@pytest.fixture
|
|
21
|
+
def isolated_argv(monkeypatch):
|
|
22
|
+
"""``si.main()`` calls ``argparse.parse_args()`` which reads
|
|
23
|
+
``sys.argv``. Inside pytest that's pytest's own argv (``tests/``,
|
|
24
|
+
``-v``, etc.) and argparse bails. Pin it to a clean single-element
|
|
25
|
+
list so the parser sees no flags — the tests rely on defaults."""
|
|
26
|
+
monkeypatch.setattr(sys, "argv", ["skill_inventory.py"])
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
# --- _parse_frontmatter_text ----------------------------------------------
|
|
30
|
+
|
|
31
|
+
def test_parse_frontmatter_reads_scalar_fields():
|
|
32
|
+
text = """---
|
|
33
|
+
name: demo
|
|
34
|
+
description: "Do a demo thing"
|
|
35
|
+
---
|
|
36
|
+
|
|
37
|
+
Body here.
|
|
38
|
+
"""
|
|
39
|
+
fm = si._parse_frontmatter_text(text)
|
|
40
|
+
assert fm["name"] == "demo"
|
|
41
|
+
assert fm["description"] == "Do a demo thing"
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def test_parse_frontmatter_reads_inline_list():
|
|
45
|
+
text = """---
|
|
46
|
+
name: demo
|
|
47
|
+
projects: [service, acme-app]
|
|
48
|
+
---
|
|
49
|
+
|
|
50
|
+
Body.
|
|
51
|
+
"""
|
|
52
|
+
fm = si._parse_frontmatter_text(text)
|
|
53
|
+
assert fm["projects"] == ["service", "acme-app"]
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def test_parse_frontmatter_reads_block_list():
|
|
57
|
+
text = """---
|
|
58
|
+
name: demo
|
|
59
|
+
projects:
|
|
60
|
+
- service
|
|
61
|
+
- acme-app
|
|
62
|
+
- widget
|
|
63
|
+
---
|
|
64
|
+
"""
|
|
65
|
+
fm = si._parse_frontmatter_text(text)
|
|
66
|
+
assert fm["projects"] == ["service", "acme-app", "widget"]
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def test_parse_frontmatter_no_frontmatter():
|
|
70
|
+
assert si._parse_frontmatter_text("just a markdown file\n") == {}
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def test_parse_frontmatter_malformed_yaml_is_silent():
|
|
74
|
+
"""The deterministic insights pass runs unattended on cron; a single
|
|
75
|
+
broken SKILL.md must not crash the whole inventory pass. Bad YAML →
|
|
76
|
+
empty dict → skill is skipped for lack of description (its own
|
|
77
|
+
graceful failure further up)."""
|
|
78
|
+
text = """---
|
|
79
|
+
this is: not: valid: yaml
|
|
80
|
+
- because
|
|
81
|
+
indentation:
|
|
82
|
+
---
|
|
83
|
+
body
|
|
84
|
+
"""
|
|
85
|
+
assert si._parse_frontmatter_text(text) == {}
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def test_parse_frontmatter_non_dict_root_returns_empty():
|
|
89
|
+
"""If a SKILL.md author writes a YAML list at the top level instead
|
|
90
|
+
of a mapping, we still degrade to empty rather than crash."""
|
|
91
|
+
text = """---
|
|
92
|
+
- alpha
|
|
93
|
+
- beta
|
|
94
|
+
---
|
|
95
|
+
"""
|
|
96
|
+
assert si._parse_frontmatter_text(text) == {}
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
# --- _coerce_projects ------------------------------------------------------
|
|
100
|
+
|
|
101
|
+
def test_coerce_projects_list():
|
|
102
|
+
assert si._coerce_projects(["service", "widget"]) == ["service", "widget"]
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def test_coerce_projects_scalar_string_becomes_single_list():
|
|
106
|
+
"""Single-project shorthand: ``projects: service`` is common user
|
|
107
|
+
shorthand and shouldn't be rejected just because it isn't a list."""
|
|
108
|
+
assert si._coerce_projects("service") == ["service"]
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def test_coerce_projects_none_is_empty():
|
|
112
|
+
assert si._coerce_projects(None) == []
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def test_coerce_projects_strips_empty_and_whitespace():
|
|
116
|
+
assert si._coerce_projects(["service", "", " ", "widget "]) == ["service", "widget"]
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def test_coerce_projects_ignores_non_strings():
|
|
120
|
+
# A number or dict snuck into the list must not crash and must be
|
|
121
|
+
# filtered — we only emit strings downstream.
|
|
122
|
+
assert si._coerce_projects(["service", 42, {"x": 1}, "widget"]) == ["service", "widget"]
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def test_coerce_projects_unknown_type_is_empty():
|
|
126
|
+
assert si._coerce_projects(42) == []
|
|
127
|
+
assert si._coerce_projects({"a": 1}) == []
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
# --- end-to-end: main() emits the expected shape ---------------------------
|
|
131
|
+
|
|
132
|
+
def _write_skill(root: Path, sid: str, frontmatter: str) -> None:
|
|
133
|
+
d = root / sid
|
|
134
|
+
d.mkdir(parents=True)
|
|
135
|
+
(d / "SKILL.md").write_text(f"---\n{frontmatter}\n---\n\nBody.\n")
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def test_inventory_emits_projects_field_when_present(
|
|
139
|
+
tmp_path, monkeypatch, isolated_argv):
|
|
140
|
+
skills = tmp_path / "skills"
|
|
141
|
+
skills.mkdir()
|
|
142
|
+
_write_skill(skills, "deploy-staging",
|
|
143
|
+
"description: Iterate on the avatar\nprojects: [service]")
|
|
144
|
+
_write_skill(skills, "capability-loop",
|
|
145
|
+
"description: Continuous improvement loop")
|
|
146
|
+
monkeypatch.setattr(si, "SKILLS_DIR", skills)
|
|
147
|
+
|
|
148
|
+
buf = io.StringIO()
|
|
149
|
+
with contextlib.redirect_stdout(buf):
|
|
150
|
+
si.main()
|
|
151
|
+
hints = json.loads(buf.getvalue())
|
|
152
|
+
by_id = {h["id"]: h for h in hints}
|
|
153
|
+
assert by_id["deploy-staging"]["projects"] == ["service"]
|
|
154
|
+
# Untagged skill: empty list, not missing.
|
|
155
|
+
assert by_id["capability-loop"]["projects"] == []
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def test_inventory_skips_skill_without_description(
|
|
159
|
+
tmp_path, monkeypatch, isolated_argv):
|
|
160
|
+
"""Pre-existing behavior: no description → skill dropped. Adding
|
|
161
|
+
`projects` support must not change the drop rule."""
|
|
162
|
+
skills = tmp_path / "skills"
|
|
163
|
+
skills.mkdir()
|
|
164
|
+
_write_skill(skills, "mystery", "projects: [service]") # no description
|
|
165
|
+
monkeypatch.setattr(si, "SKILLS_DIR", skills)
|
|
166
|
+
|
|
167
|
+
buf = io.StringIO()
|
|
168
|
+
with contextlib.redirect_stdout(buf):
|
|
169
|
+
si.main()
|
|
170
|
+
hints = json.loads(buf.getvalue())
|
|
171
|
+
assert hints == []
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
# --- _infer_projects (rolling-history scope inference) ---------------------
|
|
175
|
+
|
|
176
|
+
def test_infer_projects_zero_history_returns_empty():
|
|
177
|
+
"""Cold-start: no skill has been invoked anywhere yet. Inference
|
|
178
|
+
must return [] so the hook falls through to the untagged rules."""
|
|
179
|
+
assert si._infer_projects("deploy-staging", {}) == []
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def test_infer_projects_below_threshold_returns_empty():
|
|
183
|
+
"""A single invocation in one project is noise — could be an
|
|
184
|
+
experiment from the wrong cwd. Don't tag until ≥2 hits prove
|
|
185
|
+
intent. Threshold default is INFER_THRESHOLD = 2."""
|
|
186
|
+
by_project = {"service": {"deploy-staging": 1}}
|
|
187
|
+
assert si._infer_projects("deploy-staging", by_project) == []
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def test_infer_projects_single_project_at_threshold_tags():
|
|
191
|
+
"""≥2 invocations in exactly one project → tagged with that
|
|
192
|
+
project. This is the canonical inference success case."""
|
|
193
|
+
by_project = {"service": {"deploy-staging": 2}}
|
|
194
|
+
assert si._infer_projects("deploy-staging", by_project) == ["service"]
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def test_infer_projects_one_project_above_threshold_other_below_tags():
|
|
198
|
+
"""The project with ≥2 hits counts as observed; the project with
|
|
199
|
+
1 hit doesn't. So the skill is still considered single-project
|
|
200
|
+
scoped — the noisy one-off doesn't graduate it to global."""
|
|
201
|
+
by_project = {
|
|
202
|
+
"service": {"deploy-staging": 5},
|
|
203
|
+
"widget": {"deploy-staging": 1}, # below threshold; ignored
|
|
204
|
+
}
|
|
205
|
+
assert si._infer_projects("deploy-staging", by_project) == ["service"]
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def test_infer_projects_two_projects_above_threshold_graduates():
|
|
209
|
+
"""Cross-cutting tool: invoked ≥2× in ≥2 projects → return [],
|
|
210
|
+
treating the skill as global. /design-style skills auto-graduate
|
|
211
|
+
once the user proves they're using the skill across projects."""
|
|
212
|
+
by_project = {
|
|
213
|
+
"service": {"design": 3},
|
|
214
|
+
"widget": {"design": 4},
|
|
215
|
+
}
|
|
216
|
+
assert si._infer_projects("design", by_project) == []
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def test_infer_projects_only_other_skills_count():
|
|
220
|
+
"""A project whose only history is a DIFFERENT skill must not
|
|
221
|
+
count toward this skill's observed set — guards the dict shape
|
|
222
|
+
against accidental cross-talk between skills."""
|
|
223
|
+
by_project = {
|
|
224
|
+
"service": {"design": 5}, # not deploy-staging
|
|
225
|
+
"widget": {"deploy-staging": 3},
|
|
226
|
+
}
|
|
227
|
+
assert si._infer_projects("deploy-staging", by_project) == ["widget"]
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
def test_infer_projects_tolerates_bad_shapes():
|
|
231
|
+
"""The cron path can't crash on garbled history. Non-dict project
|
|
232
|
+
buckets, non-numeric counts → silently ignored."""
|
|
233
|
+
by_project = {
|
|
234
|
+
"service": {"deploy-staging": "lots"}, # bad count
|
|
235
|
+
"broken": "not-a-dict",
|
|
236
|
+
"widget": {"deploy-staging": 3},
|
|
237
|
+
}
|
|
238
|
+
assert si._infer_projects("deploy-staging", by_project) == ["widget"]
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
def test_infer_projects_threshold_param_actually_changes_behavior():
|
|
242
|
+
"""Lock the threshold parameter against drift by using data where
|
|
243
|
+
different threshold values produce GENUINELY DIFFERENT outputs —
|
|
244
|
+
not the same `[]` for every value with a misleading comment.
|
|
245
|
+
A single project at count=1: invisible at threshold=2, observed
|
|
246
|
+
(and tagged) at threshold=1, still invisible at threshold=3."""
|
|
247
|
+
by_project = {"service": {"x": 1}}
|
|
248
|
+
assert si._infer_projects("x", by_project, threshold=2) == [] # below bar
|
|
249
|
+
assert si._infer_projects("x", by_project, threshold=1) == ["service"] # ≥ bar
|
|
250
|
+
assert si._infer_projects("x", by_project, threshold=3) == [] # below bar
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
# --- end-to-end: inventory consumes by_project, frontmatter wins -----------
|
|
254
|
+
|
|
255
|
+
def test_inventory_uses_inference_when_frontmatter_missing(
|
|
256
|
+
tmp_path, monkeypatch, isolated_argv):
|
|
257
|
+
"""Skills without frontmatter `projects:` should pick up scope
|
|
258
|
+
from invocation history, threaded through the --skills-by-project
|
|
259
|
+
CLI flag."""
|
|
260
|
+
skills = tmp_path / "skills"
|
|
261
|
+
skills.mkdir()
|
|
262
|
+
_write_skill(skills, "deploy-staging",
|
|
263
|
+
"description: Iterate on the avatar") # no projects
|
|
264
|
+
monkeypatch.setattr(si, "SKILLS_DIR", skills)
|
|
265
|
+
|
|
266
|
+
sbp = tmp_path / "sbp.json"
|
|
267
|
+
sbp.write_text(json.dumps({"service": {"deploy-staging": 5}}))
|
|
268
|
+
monkeypatch.setattr(
|
|
269
|
+
"sys.argv",
|
|
270
|
+
["skill_inventory.py", "--skills-by-project", str(sbp)])
|
|
271
|
+
|
|
272
|
+
buf = io.StringIO()
|
|
273
|
+
with contextlib.redirect_stdout(buf):
|
|
274
|
+
si.main()
|
|
275
|
+
hints = json.loads(buf.getvalue())
|
|
276
|
+
by_id = {h["id"]: h for h in hints}
|
|
277
|
+
assert by_id["deploy-staging"]["projects"] == ["service"]
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
def test_inventory_frontmatter_supersedes_inference(
|
|
281
|
+
tmp_path, monkeypatch, isolated_argv):
|
|
282
|
+
"""Explicit frontmatter `projects:` is authoritative — even when
|
|
283
|
+
the rolling history would suggest a different (or graduated)
|
|
284
|
+
scope. Users get deterministic control when they ask for it."""
|
|
285
|
+
skills = tmp_path / "skills"
|
|
286
|
+
skills.mkdir()
|
|
287
|
+
_write_skill(
|
|
288
|
+
skills, "deploy-staging",
|
|
289
|
+
"description: Iterate on the avatar\nprojects: [service]")
|
|
290
|
+
monkeypatch.setattr(si, "SKILLS_DIR", skills)
|
|
291
|
+
|
|
292
|
+
# History would normally graduate the skill to global (≥2 in ≥2
|
|
293
|
+
# projects), but the frontmatter must win.
|
|
294
|
+
sbp = tmp_path / "sbp.json"
|
|
295
|
+
sbp.write_text(json.dumps({
|
|
296
|
+
"service": {"deploy-staging": 5},
|
|
297
|
+
"widget": {"deploy-staging": 5},
|
|
298
|
+
}))
|
|
299
|
+
monkeypatch.setattr(
|
|
300
|
+
"sys.argv",
|
|
301
|
+
["skill_inventory.py", "--skills-by-project", str(sbp)])
|
|
302
|
+
|
|
303
|
+
buf = io.StringIO()
|
|
304
|
+
with contextlib.redirect_stdout(buf):
|
|
305
|
+
si.main()
|
|
306
|
+
hints = json.loads(buf.getvalue())
|
|
307
|
+
by_id = {h["id"]: h for h in hints}
|
|
308
|
+
assert by_id["deploy-staging"]["projects"] == ["service"]
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
def test_inventory_no_history_file_falls_back_to_frontmatter_only(
|
|
312
|
+
tmp_path, monkeypatch, isolated_argv):
|
|
313
|
+
"""If --skills-by-project is omitted (e.g., first run before any
|
|
314
|
+
history exists), the inventory still works — inference returns
|
|
315
|
+
[] for everything and only frontmatter-tagged skills get scope."""
|
|
316
|
+
skills = tmp_path / "skills"
|
|
317
|
+
skills.mkdir()
|
|
318
|
+
_write_skill(skills, "untagged", "description: A skill")
|
|
319
|
+
_write_skill(
|
|
320
|
+
skills, "tagged",
|
|
321
|
+
"description: A scoped skill\nprojects: [service]")
|
|
322
|
+
monkeypatch.setattr(si, "SKILLS_DIR", skills)
|
|
323
|
+
|
|
324
|
+
buf = io.StringIO()
|
|
325
|
+
with contextlib.redirect_stdout(buf):
|
|
326
|
+
si.main()
|
|
327
|
+
hints = json.loads(buf.getvalue())
|
|
328
|
+
by_id = {h["id"]: h for h in hints}
|
|
329
|
+
assert by_id["untagged"]["projects"] == []
|
|
330
|
+
assert by_id["tagged"]["projects"] == ["service"]
|
|
331
|
+
|
|
332
|
+
|
|
333
|
+
def test_inventory_explicit_empty_projects_blocks_inference(
|
|
334
|
+
tmp_path, monkeypatch, isolated_argv):
|
|
335
|
+
"""A user writing `projects: []` in frontmatter is making a
|
|
336
|
+
deliberate "this skill is cross-project / global" declaration.
|
|
337
|
+
History inference must NOT silently re-tag it to a single project,
|
|
338
|
+
which would reverse the user's intent and quietly hard-block the
|
|
339
|
+
skill outside that wrongly-inferred scope.
|
|
340
|
+
|
|
341
|
+
Lock in the distinction: ``projects: []`` (key present, empty list)
|
|
342
|
+
is authoritative. Only ``projects:`` absent entirely falls through
|
|
343
|
+
to inference. Regression for review-finding #1 (2026-04-24)."""
|
|
344
|
+
skills = tmp_path / "skills"
|
|
345
|
+
skills.mkdir()
|
|
346
|
+
_write_skill(
|
|
347
|
+
skills, "global-by-declaration",
|
|
348
|
+
"description: A cross-project skill\nprojects: []")
|
|
349
|
+
monkeypatch.setattr(si, "SKILLS_DIR", skills)
|
|
350
|
+
|
|
351
|
+
# Set up a history that WOULD infer scope `[service]` if inference ran.
|
|
352
|
+
sbp = tmp_path / "sbp.json"
|
|
353
|
+
sbp.write_text(json.dumps(
|
|
354
|
+
{"service": {"global-by-declaration": 5}}))
|
|
355
|
+
monkeypatch.setattr(
|
|
356
|
+
"sys.argv",
|
|
357
|
+
["skill_inventory.py", "--skills-by-project", str(sbp)])
|
|
358
|
+
|
|
359
|
+
buf = io.StringIO()
|
|
360
|
+
with contextlib.redirect_stdout(buf):
|
|
361
|
+
si.main()
|
|
362
|
+
hints = json.loads(buf.getvalue())
|
|
363
|
+
by_id = {h["id"]: h for h in hints}
|
|
364
|
+
assert by_id["global-by-declaration"]["projects"] == [], (
|
|
365
|
+
"explicit `projects: []` in frontmatter must beat history "
|
|
366
|
+
"inference — otherwise a user declaration silently reverses "
|
|
367
|
+
"into a hard project-scoped filter"
|
|
368
|
+
)
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
"""stats.py — hybrid ELO math.
|
|
2
|
+
|
|
3
|
+
Regression guard for the 'ELO stopped moving' symptom: level index +
|
|
4
|
+
level-up detection use integer `lifetime + session // 10` (no phantom
|
|
5
|
+
level-ups), while the ELO within-level slide uses float
|
|
6
|
+
`lifetime + session / 10` (rating nudges as raw XP accrues).
|
|
7
|
+
"""
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import pytest
|
|
11
|
+
|
|
12
|
+
import stats
|
|
13
|
+
from stats import _compute_hybrid, compute_for_render
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@pytest.fixture(autouse=True)
|
|
17
|
+
def _hermetic_stats_globals(monkeypatch):
|
|
18
|
+
"""Pin stats.py module globals to the canonical baseline (craft
|
|
19
|
+
ladder + 1000-2800 ELO range) for the duration of these tests.
|
|
20
|
+
|
|
21
|
+
`stats.LEVELS`, `stats.ELO_MIN`, `stats.ELO_MAX` are populated at
|
|
22
|
+
import time from `~/.claude/coach/.user_config.json` (see
|
|
23
|
+
`stats._load_runtime_config`). Without this fixture, a user who
|
|
24
|
+
has run `/config theme <other>` or `/config elo <m> <M>` would see
|
|
25
|
+
these baseline-pinning tests fail on the installed-copy test
|
|
26
|
+
workflow advertised in CLAUDE.md, because the level names + ELO
|
|
27
|
+
interpolation would be reading their live preferences. Tests
|
|
28
|
+
against `_compute_hybrid` are about the math, not the user's
|
|
29
|
+
chosen ladder — force defaults here."""
|
|
30
|
+
monkeypatch.setattr(stats, "LEVELS", stats._build_level_ladder())
|
|
31
|
+
monkeypatch.setattr(stats, "ELO_MIN", 1000)
|
|
32
|
+
monkeypatch.setattr(stats, "ELO_MAX", 2800)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def test_level_stays_stable_under_session_xp():
|
|
36
|
+
"""Lifetime=4 (L2 Iterator), session 0→15: level should NOT change."""
|
|
37
|
+
names = {_compute_hybrid(4, s)["name"] for s in range(0, 16)}
|
|
38
|
+
assert names == {"Iterator"}
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def test_elo_increases_monotonically_across_session():
|
|
42
|
+
"""ELO must be non-decreasing as raw session XP accrues within a level."""
|
|
43
|
+
prev = -1
|
|
44
|
+
for raw in range(0, 16):
|
|
45
|
+
elo = _compute_hybrid(4, raw)["elo"]
|
|
46
|
+
assert elo >= prev, f"ELO regressed at raw={raw}: {elo} < {prev}"
|
|
47
|
+
prev = elo
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def test_elo_moves_with_every_raw_xp_in_a_level():
|
|
51
|
+
"""The whole point of the hybrid: ELO should not be frozen at session=0
|
|
52
|
+
value. It must actually tick up between raw=0 and raw=15."""
|
|
53
|
+
at_zero = _compute_hybrid(4, 0)["elo"]
|
|
54
|
+
at_fifteen = _compute_hybrid(4, 15)["elo"]
|
|
55
|
+
assert at_fifteen > at_zero
|
|
56
|
+
assert at_fifteen - at_zero >= 5 # meaningful movement, not just rounding
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def test_level_up_fires_only_at_bank_boundary():
|
|
60
|
+
"""Lifetime=7 (L2, threshold to L3 is 8). Raw session 0-9 should stay L2
|
|
61
|
+
(bank_gain=0). Raw 10+ should cross to L3 (bank_gain=1 → level_xp=8)."""
|
|
62
|
+
for raw in range(0, 10):
|
|
63
|
+
assert _compute_hybrid(7, raw)["name"] == "Iterator"
|
|
64
|
+
for raw in range(10, 16):
|
|
65
|
+
assert _compute_hybrid(7, raw)["name"] == "Builder"
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def test_no_phantom_level_up_at_sub_bank_session_xp():
|
|
69
|
+
"""With lifetime=7 and raw=9, progress_xp=7.9 crosses L3 threshold (8)
|
|
70
|
+
as a float, but level_xp=7 does NOT. Level name must stay Iterator so
|
|
71
|
+
the user doesn't see a level they're about to fall back from."""
|
|
72
|
+
h = _compute_hybrid(7, 9)
|
|
73
|
+
assert h["name"] == "Iterator"
|
|
74
|
+
assert h["level_xp"] == 7
|
|
75
|
+
assert h["progress_xp"] == pytest.approx(7.9)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def test_sigil_pct_glides_live_within_session():
|
|
79
|
+
"""Sigil pct derives from progress_xp (float), not level_xp — so it
|
|
80
|
+
should change for sub-bank raw XP movements even when level_xp is fixed."""
|
|
81
|
+
a = _compute_hybrid(4, 2)["sigil_pct"]
|
|
82
|
+
b = _compute_hybrid(4, 8)["sigil_pct"]
|
|
83
|
+
assert b > a
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def test_zero_session_matches_pure_lifetime_baseline():
|
|
87
|
+
"""Sanity: session=0 ⇒ level_xp == progress_xp == lifetime."""
|
|
88
|
+
h = _compute_hybrid(42, 0)
|
|
89
|
+
assert h["level_xp"] == 42
|
|
90
|
+
assert h["progress_xp"] == 42
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def test_hybrid_renders_expected_elo_for_user_baseline():
|
|
94
|
+
"""Baseline lock: ◆ Ⅱ 1044 Iterator with session=0 should map to L2
|
|
95
|
+
Iterator at lifetime=4. Pins the formula so it doesn't drift."""
|
|
96
|
+
h = _compute_hybrid(4, 0)
|
|
97
|
+
assert h["name"] == "Iterator"
|
|
98
|
+
assert h["elo"] == 1044
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
# -----------------------------------------------------------------------------
|
|
102
|
+
# compute_for_render — public render-side helper consumed by the military
|
|
103
|
+
# theme's rank line and any future banner that needs idx + name + elo + roman
|
|
104
|
+
# + medal_count in one call. Medal count = min(5, idx // 4 + 1) — pins the
|
|
105
|
+
# rank-ribbon scaling.
|
|
106
|
+
|
|
107
|
+
@pytest.mark.parametrize(
|
|
108
|
+
"lifetime,expected_idx,expected_name,expected_roman,expected_medals,expected_next",
|
|
109
|
+
[
|
|
110
|
+
( 0, 0, "Drafter", "Ⅰ", 1, 3), # L1 — first rung
|
|
111
|
+
( 15, 3, "Shipper", "Ⅳ", 1, 25), # L4 — last 1-medal rank
|
|
112
|
+
( 25, 4, "Craftsman", "Ⅴ", 2, 40), # L5 — first 2-medal rank
|
|
113
|
+
( 90, 7, "Sensei", "Ⅷ", 2, 125), # L8 — last 2-medal rank
|
|
114
|
+
( 125, 8, "Luminary", "Ⅸ", 3, 165), # L9 — first 3-medal rank
|
|
115
|
+
( 510, 15, "Prodigy", "ⅩⅥ", 4, 585), # L16 — last 4-medal rank
|
|
116
|
+
( 585, 16, "Visionary", "ⅩⅦ", 5, 665), # L17 — first 5-medal rank (cap)
|
|
117
|
+
( 840, 19, "Paragon", "ⅩⅩ", 5, 935), # L20 — still capped at 5
|
|
118
|
+
],
|
|
119
|
+
)
|
|
120
|
+
def test_compute_for_render_breakpoints(
|
|
121
|
+
lifetime, expected_idx, expected_name, expected_roman, expected_medals,
|
|
122
|
+
expected_next,
|
|
123
|
+
):
|
|
124
|
+
out = compute_for_render(lifetime, 0)
|
|
125
|
+
assert out["idx"] == expected_idx
|
|
126
|
+
assert out["name"] == expected_name
|
|
127
|
+
assert out["roman"] == expected_roman
|
|
128
|
+
assert out["medal_count"] == expected_medals
|
|
129
|
+
assert out["next_xp"] == expected_next
|
|
130
|
+
# ELO must agree with _compute_hybrid for the same inputs (single source
|
|
131
|
+
# of truth — compute_for_render is just a wrapper).
|
|
132
|
+
assert out["elo"] == _compute_hybrid(lifetime, 0)["elo"]
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def test_compute_for_render_session_xp_affects_elo_not_medal():
|
|
136
|
+
"""Within-level session XP slides ELO but never changes medal_count
|
|
137
|
+
(which is purely a function of level idx, not float progress_xp)."""
|
|
138
|
+
a = compute_for_render(4, 0)
|
|
139
|
+
b = compute_for_render(4, 15)
|
|
140
|
+
assert a["medal_count"] == b["medal_count"] # same level
|
|
141
|
+
assert a["idx"] == b["idx"]
|
|
142
|
+
assert b["elo"] >= a["elo"] # session XP nudges rating
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
|
|
5
|
+
import yaml
|
|
6
|
+
|
|
7
|
+
import status
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def test_status_splits_lifetime_xp_buckets(tmp_path, monkeypatch, capsys):
|
|
11
|
+
profile = tmp_path / "profile.yaml"
|
|
12
|
+
ledger = tmp_path / "banked_sessions.json"
|
|
13
|
+
changelog = tmp_path / "changelog.md"
|
|
14
|
+
projects = tmp_path / "projects"
|
|
15
|
+
projects.mkdir()
|
|
16
|
+
ledger.write_text(json.dumps({"s1": {"xp": 12, "banked": 1}}))
|
|
17
|
+
changelog.write_text("")
|
|
18
|
+
profile.write_text(yaml.safe_dump({
|
|
19
|
+
"entries": [{"id": "w1", "direction": "negative", "clean_streak_runs": 2}],
|
|
20
|
+
"graduated": [{"id": "done"}],
|
|
21
|
+
"archived": [{"id": "aged-out", "direction": "negative"}],
|
|
22
|
+
"session_banked_xp": 4,
|
|
23
|
+
"milestone_xp": 3,
|
|
24
|
+
"manual_adjustments": 1,
|
|
25
|
+
}))
|
|
26
|
+
monkeypatch.setattr(status, "PROFILE", profile)
|
|
27
|
+
monkeypatch.setattr(status, "LEDGER", ledger)
|
|
28
|
+
monkeypatch.setattr(status, "CHANGELOG", changelog)
|
|
29
|
+
monkeypatch.setattr(status, "PROJECTS", projects)
|
|
30
|
+
|
|
31
|
+
status.main()
|
|
32
|
+
|
|
33
|
+
out = capsys.readouterr().out
|
|
34
|
+
assert "Lifetime (15 xp)" in out
|
|
35
|
+
assert "graduated patterns (1 × 5)" in out
|
|
36
|
+
assert "completed sessions (1 sessions at 10:1)" in out
|
|
37
|
+
assert "mid-streak milestones" in out
|
|
38
|
+
assert "manual adjustments" in out
|
|
39
|
+
assert "1 retired" in out
|
|
40
|
+
assert "1 archived" in out
|
|
41
|
+
assert "banked from past sessions" not in out
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
"""Statusline render path is fail-soft.
|
|
2
|
+
|
|
3
|
+
CLAUDE.md treats hook fail-soft as non-negotiable; the statusline shares
|
|
4
|
+
the same property pragmatically — it runs on every Claude Code render,
|
|
5
|
+
and a traceback there both blanks the prefix and noises up the user's
|
|
6
|
+
terminal. These tests pin both `default_statusline.main()` and
|
|
7
|
+
`stats.main()` to swallow exceptions and exit 0, matching the hook
|
|
8
|
+
fail-soft contract in `hooks/coach-session-start.py`.
|
|
9
|
+
"""
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import io
|
|
13
|
+
import json
|
|
14
|
+
import sys
|
|
15
|
+
|
|
16
|
+
import pytest
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def test_default_statusline_main_failsafe_on_render_exception(monkeypatch, capsys):
|
|
20
|
+
"""If render_segment raises, default_statusline.main returns 0 — no traceback."""
|
|
21
|
+
import default_statusline as ds
|
|
22
|
+
|
|
23
|
+
def boom(*_a, **_kw):
|
|
24
|
+
raise RuntimeError("simulated corrupt profile mid-write")
|
|
25
|
+
|
|
26
|
+
monkeypatch.setattr(ds, "render_segment", boom)
|
|
27
|
+
|
|
28
|
+
payload = json.dumps({
|
|
29
|
+
"model": {"display_name": "Claude Sonnet 4.6"},
|
|
30
|
+
"context_window": {"used_percentage": 42},
|
|
31
|
+
})
|
|
32
|
+
monkeypatch.setattr("sys.stdin", io.StringIO(payload))
|
|
33
|
+
|
|
34
|
+
rc = ds.main()
|
|
35
|
+
assert rc == 0
|
|
36
|
+
captured = capsys.readouterr()
|
|
37
|
+
# The prefix may or may not have been flushed before the exception;
|
|
38
|
+
# what matters is no traceback escaped to stderr.
|
|
39
|
+
assert "Traceback" not in captured.err
|
|
40
|
+
assert "RuntimeError" not in captured.err
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def test_stats_main_failsafe_on_render_exception(monkeypatch, capsys):
|
|
44
|
+
"""If render_segment raises, stats.main returns 0 — no traceback."""
|
|
45
|
+
import stats
|
|
46
|
+
|
|
47
|
+
def boom(*_a, **_kw):
|
|
48
|
+
raise RuntimeError("simulated render failure")
|
|
49
|
+
|
|
50
|
+
monkeypatch.setattr(stats, "render_segment", boom)
|
|
51
|
+
monkeypatch.setattr("sys.stdin", io.StringIO("{}"))
|
|
52
|
+
|
|
53
|
+
rc = stats.main()
|
|
54
|
+
assert rc == 0
|
|
55
|
+
captured = capsys.readouterr()
|
|
56
|
+
assert "Traceback" not in captured.err
|
|
57
|
+
assert "RuntimeError" not in captured.err
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def test_default_statusline_main_failsafe_on_corrupt_stdin(monkeypatch, capsys):
|
|
61
|
+
"""A non-JSON stdin payload still exits 0 — covered by _read_stdin_payload's
|
|
62
|
+
own try/except, but pinned here as the user-visible contract."""
|
|
63
|
+
import default_statusline as ds
|
|
64
|
+
|
|
65
|
+
monkeypatch.setattr("sys.stdin", io.StringIO("{ not valid json"))
|
|
66
|
+
|
|
67
|
+
rc = ds.main()
|
|
68
|
+
assert rc == 0
|
|
69
|
+
captured = capsys.readouterr()
|
|
70
|
+
assert "Traceback" not in captured.err
|