@rm0nroe/coach-claw 1.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +311 -0
- package/coach/README.md +99 -0
- package/coach/bin/aggregate_facets.py +274 -0
- package/coach/bin/analyze.py +678 -0
- package/coach/bin/bank.py +247 -0
- package/coach/bin/banner_themes.py +645 -0
- package/coach/bin/coach_paths.py +33 -0
- package/coach/bin/coexistence_check.py +129 -0
- package/coach/bin/configure.py +245 -0
- package/coach/bin/cron_check.py +81 -0
- package/coach/bin/default_statusline.py +135 -0
- package/coach/bin/doctor.py +663 -0
- package/coach/bin/insights-llm.sh +264 -0
- package/coach/bin/insights.sh +163 -0
- package/coach/bin/insights_window.py +111 -0
- package/coach/bin/marker_io.py +154 -0
- package/coach/bin/merge.py +671 -0
- package/coach/bin/redact.py +86 -0
- package/coach/bin/render_env.py +148 -0
- package/coach/bin/reward_hints.py +87 -0
- package/coach/bin/run-insights.sh +20 -0
- package/coach/bin/run_with_lock.py +85 -0
- package/coach/bin/scoring.py +260 -0
- package/coach/bin/skill_inventory.py +215 -0
- package/coach/bin/stats.py +459 -0
- package/coach/bin/status.py +293 -0
- package/coach/bin/statusline_self_patch.py +205 -0
- package/coach/bin/statusline_variants.py +146 -0
- package/coach/bin/statusline_wrap.py +244 -0
- package/coach/bin/statusline_wrap_action.py +460 -0
- package/coach/bin/switch_to_plugin.py +256 -0
- package/coach/bin/themes.py +256 -0
- package/coach/bin/user_config.py +176 -0
- package/coach/bin/xp_accounting.py +98 -0
- package/coach/changelog.md +4 -0
- package/coach/default-statusline-command.sh +19 -0
- package/coach/default-statusline-wrap-command.sh +15 -0
- package/coach/profile.yaml +37 -0
- package/coach/tests/conftest.py +13 -0
- package/coach/tests/test_aggregate_facets.py +379 -0
- package/coach/tests/test_analyze_aggregate.py +153 -0
- package/coach/tests/test_analyze_redaction.py +105 -0
- package/coach/tests/test_analyze_strengths.py +165 -0
- package/coach/tests/test_bank_atomic_write.py +61 -0
- package/coach/tests/test_bank_concurrency.py +126 -0
- package/coach/tests/test_banner_themes.py +981 -0
- package/coach/tests/test_celebrate_dedup.py +409 -0
- package/coach/tests/test_coach_paths.py +50 -0
- package/coach/tests/test_coexistence_check.py +128 -0
- package/coach/tests/test_configure.py +258 -0
- package/coach/tests/test_cron_check.py +118 -0
- package/coach/tests/test_cron_nudge_hook.py +134 -0
- package/coach/tests/test_detection_parity.py +105 -0
- package/coach/tests/test_doctor.py +595 -0
- package/coach/tests/test_hook_bespoke_dispatch.py +288 -0
- package/coach/tests/test_hook_module_resolution.py +116 -0
- package/coach/tests/test_hook_relevance.py +996 -0
- package/coach/tests/test_hook_render_env.py +364 -0
- package/coach/tests/test_hook_session_id_guard.py +160 -0
- package/coach/tests/test_insights_llm.py +759 -0
- package/coach/tests/test_insights_llm_venv_path.py +109 -0
- package/coach/tests/test_insights_window.py +237 -0
- package/coach/tests/test_install.py +1150 -0
- package/coach/tests/test_install_pyyaml_fallback.py +142 -0
- package/coach/tests/test_marker_consumption.py +167 -0
- package/coach/tests/test_marker_writer_locking.py +305 -0
- package/coach/tests/test_merge.py +413 -0
- package/coach/tests/test_no_broken_mktemp.py +90 -0
- package/coach/tests/test_render_env.py +137 -0
- package/coach/tests/test_render_env_glyphs.py +119 -0
- package/coach/tests/test_reward_hints.py +59 -0
- package/coach/tests/test_scoring.py +147 -0
- package/coach/tests/test_session_start_weekly_trigger.py +92 -0
- package/coach/tests/test_skill_inventory.py +368 -0
- package/coach/tests/test_stats_hybrid.py +142 -0
- package/coach/tests/test_status_accounting.py +41 -0
- package/coach/tests/test_statusline_failsafe.py +70 -0
- package/coach/tests/test_statusline_self_patch.py +261 -0
- package/coach/tests/test_statusline_variants.py +110 -0
- package/coach/tests/test_statusline_wrap.py +196 -0
- package/coach/tests/test_statusline_wrap_action.py +408 -0
- package/coach/tests/test_switch_to_plugin.py +360 -0
- package/coach/tests/test_themes.py +104 -0
- package/coach/tests/test_user_config.py +160 -0
- package/coach/tests/test_wrap_announce_hook.py +130 -0
- package/coach/tests/test_xp_accounting.py +55 -0
- package/hooks/coach-session-start.py +536 -0
- package/hooks/coach-user-prompt.py +2288 -0
- package/install-launchd.sh +102 -0
- package/install.sh +597 -0
- package/launchd/com.local.claude-coach.plist.template +34 -0
- package/launchd/run-insights.sh +20 -0
- package/npm/coach-claw.js +259 -0
- package/package.json +52 -0
- package/requirements.txt +11 -0
- package/settings-snippet.json +31 -0
- package/skills/coach/SKILL.md +107 -0
- package/skills/coach-insights/SKILL.md +78 -0
- package/skills/config/SKILL.md +149 -0
|
@@ -0,0 +1,759 @@
|
|
|
1
|
+
"""Integration tests for coach/bin/insights-llm.sh.
|
|
2
|
+
|
|
3
|
+
Exercise the actual bash wrapper via subprocess (not just the Python helpers
|
|
4
|
+
it composes) so the shell→Python boundary is covered. Uses
|
|
5
|
+
COACH_INSIGHTS_LLM_SKIP_REFRESH=1 to bypass the `claude -p /insights`
|
|
6
|
+
subprocess and operate on a fixture facets directory.
|
|
7
|
+
"""
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import fcntl
|
|
11
|
+
import json
|
|
12
|
+
import os
|
|
13
|
+
import shutil
|
|
14
|
+
import subprocess
|
|
15
|
+
import time
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
|
|
18
|
+
import pytest
|
|
19
|
+
import yaml
|
|
20
|
+
|
|
21
|
+
SCRIPT = Path(__file__).resolve().parent.parent / "bin" / "insights-llm.sh"
|
|
22
|
+
BIN_DIR = SCRIPT.parent
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _seed_coach_dir(tmp_path: Path) -> Path:
|
|
26
|
+
coach_dir = tmp_path / "coach"
|
|
27
|
+
coach_dir.mkdir()
|
|
28
|
+
profile = {
|
|
29
|
+
"schema_version": 1,
|
|
30
|
+
"updated": None,
|
|
31
|
+
"entries": [],
|
|
32
|
+
"recent_runs": [],
|
|
33
|
+
}
|
|
34
|
+
(coach_dir / "profile.yaml").write_text(yaml.safe_dump(profile))
|
|
35
|
+
(coach_dir / "changelog.md").touch()
|
|
36
|
+
subprocess.run(["git", "init", "-q"], cwd=coach_dir, check=True)
|
|
37
|
+
subprocess.run(
|
|
38
|
+
["git", "-c", "user.email=t@t", "-c", "user.name=t",
|
|
39
|
+
"commit", "--allow-empty", "-q", "-m", "init"],
|
|
40
|
+
cwd=coach_dir,
|
|
41
|
+
check=True,
|
|
42
|
+
)
|
|
43
|
+
return coach_dir
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _seed_facets(tmp_path: Path, n: int = 5) -> Path:
|
|
47
|
+
facets = tmp_path / "facets"
|
|
48
|
+
facets.mkdir()
|
|
49
|
+
for i in range(n):
|
|
50
|
+
(facets / f"s{i}.json").write_text(
|
|
51
|
+
json.dumps({
|
|
52
|
+
"session_id": f"s{i}",
|
|
53
|
+
"friction_counts": {"misunderstood_request": 1, "wrong_approach": 1},
|
|
54
|
+
"friction_detail": f"session {i} mislabeled the work and went the wrong direction",
|
|
55
|
+
"primary_success": "good_debugging",
|
|
56
|
+
"brief_summary": f"session {i} drove a bug to root cause",
|
|
57
|
+
})
|
|
58
|
+
)
|
|
59
|
+
return facets
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _run(coach_dir: Path, facets: Path, *args: str) -> subprocess.CompletedProcess:
|
|
63
|
+
env = {
|
|
64
|
+
**os.environ,
|
|
65
|
+
"COACH_DIR_OVERRIDE": str(coach_dir),
|
|
66
|
+
"COACH_FACETS_DIR": str(facets),
|
|
67
|
+
"COACH_INSIGHTS_LLM_SKIP_REFRESH": "1",
|
|
68
|
+
# Strip any GIT_* env that might come in from the test runner so the
|
|
69
|
+
# commit step inside the wrapper uses our throwaway coach_dir's git.
|
|
70
|
+
"GIT_DIR": "",
|
|
71
|
+
"GIT_WORK_TREE": "",
|
|
72
|
+
}
|
|
73
|
+
return subprocess.run(
|
|
74
|
+
["bash", str(SCRIPT), *args],
|
|
75
|
+
env=env,
|
|
76
|
+
capture_output=True,
|
|
77
|
+
text=True,
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def test_run_id_prefix_distinguishes_weekly(tmp_path: Path) -> None:
|
|
82
|
+
coach_dir = _seed_coach_dir(tmp_path)
|
|
83
|
+
facets = _seed_facets(tmp_path)
|
|
84
|
+
result = _run(coach_dir, facets)
|
|
85
|
+
assert result.returncode == 0, result.stderr
|
|
86
|
+
assert "run_id=insights-weekly-" in result.stdout
|
|
87
|
+
# Profile entries get a source_runs entry with the weekly prefix.
|
|
88
|
+
profile = yaml.safe_load((coach_dir / "profile.yaml").read_text())
|
|
89
|
+
entries = profile.get("entries") or []
|
|
90
|
+
assert entries, "expected merge to land at least one entry"
|
|
91
|
+
for e in entries:
|
|
92
|
+
for run in (e.get("source_runs") or []):
|
|
93
|
+
assert run.startswith("insights-weekly-"), run
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def test_throttle_marker_set_on_success(tmp_path: Path) -> None:
|
|
97
|
+
coach_dir = _seed_coach_dir(tmp_path)
|
|
98
|
+
facets = _seed_facets(tmp_path)
|
|
99
|
+
marker = coach_dir / ".last_weekly_insights"
|
|
100
|
+
assert not marker.exists()
|
|
101
|
+
result = _run(coach_dir, facets)
|
|
102
|
+
assert result.returncode == 0, result.stderr
|
|
103
|
+
assert marker.exists(), "throttle marker was not touched"
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def test_throttle_skips_recent_run(tmp_path: Path) -> None:
|
|
107
|
+
coach_dir = _seed_coach_dir(tmp_path)
|
|
108
|
+
facets = _seed_facets(tmp_path)
|
|
109
|
+
# First run lands the marker.
|
|
110
|
+
r1 = _run(coach_dir, facets)
|
|
111
|
+
assert r1.returncode == 0
|
|
112
|
+
# Second run immediately after should skip.
|
|
113
|
+
r2 = _run(coach_dir, facets)
|
|
114
|
+
assert r2.returncode == 0
|
|
115
|
+
assert "skipped" in r2.stdout.lower()
|
|
116
|
+
# And should NOT have run merge again — recent_runs should be length 1.
|
|
117
|
+
profile = yaml.safe_load((coach_dir / "profile.yaml").read_text())
|
|
118
|
+
assert len(profile.get("recent_runs") or []) == 1
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def test_force_overrides_cooldown(tmp_path: Path) -> None:
|
|
122
|
+
coach_dir = _seed_coach_dir(tmp_path)
|
|
123
|
+
facets = _seed_facets(tmp_path)
|
|
124
|
+
r1 = _run(coach_dir, facets)
|
|
125
|
+
assert r1.returncode == 0
|
|
126
|
+
r2 = _run(coach_dir, facets, "--force")
|
|
127
|
+
assert r2.returncode == 0
|
|
128
|
+
assert "skipped" not in r2.stdout.lower()
|
|
129
|
+
profile = yaml.safe_load((coach_dir / "profile.yaml").read_text())
|
|
130
|
+
# Two successful merges → two entries in recent_runs.
|
|
131
|
+
assert len(profile.get("recent_runs") or []) == 2
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def test_dry_run_skips_merge(tmp_path: Path) -> None:
|
|
135
|
+
coach_dir = _seed_coach_dir(tmp_path)
|
|
136
|
+
facets = _seed_facets(tmp_path)
|
|
137
|
+
marker = coach_dir / ".last_weekly_insights"
|
|
138
|
+
result = _run(coach_dir, facets, "--dry-run")
|
|
139
|
+
assert result.returncode == 0, result.stderr
|
|
140
|
+
# Profile untouched.
|
|
141
|
+
profile = yaml.safe_load((coach_dir / "profile.yaml").read_text())
|
|
142
|
+
assert profile.get("entries") == []
|
|
143
|
+
assert profile.get("recent_runs") == []
|
|
144
|
+
# Marker untouched.
|
|
145
|
+
assert not marker.exists()
|
|
146
|
+
# Detections JSON printed.
|
|
147
|
+
assert "(dry-run; merge skipped" in result.stdout
|
|
148
|
+
# The aggregator's JSON list embedded in stdout.
|
|
149
|
+
assert "misunderstood-request" in result.stdout
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def test_invalid_facets_dir_bails_on_no_evidence(tmp_path: Path) -> None:
|
|
153
|
+
"""A nonexistent facets dir is "no current-window evidence" — wrapper
|
|
154
|
+
must bail with exit 7 (v0.5.1 evidence gate). Pre-v0.5.1 this used
|
|
155
|
+
to merge `[]` as a clean evidence pass; that was the bug class
|
|
156
|
+
closed by the n_sessions==0 gate."""
|
|
157
|
+
coach_dir = _seed_coach_dir(tmp_path)
|
|
158
|
+
nonexistent = tmp_path / "no-facets-here"
|
|
159
|
+
result = _run(coach_dir, nonexistent)
|
|
160
|
+
assert result.returncode == 7, (
|
|
161
|
+
f"expected exit 7 (no evidence), got {result.returncode}\n"
|
|
162
|
+
f"stderr={result.stderr}"
|
|
163
|
+
)
|
|
164
|
+
assert "no current-window evidence" in result.stderr
|
|
165
|
+
# Profile MUST NOT have advanced — no merge ran.
|
|
166
|
+
profile = yaml.safe_load((coach_dir / "profile.yaml").read_text())
|
|
167
|
+
assert profile.get("entries") == []
|
|
168
|
+
assert profile.get("recent_runs") in (None, [])
|
|
169
|
+
assert not (coach_dir / ".last_weekly_insights").exists()
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def test_below_threshold_emits_zero_detections(tmp_path: Path) -> None:
|
|
173
|
+
"""Sparse facets — friction in 1/10 sessions — emits zero detections,
|
|
174
|
+
wrapper still exits 0 and merges (an empty pass is a meaningful signal)."""
|
|
175
|
+
coach_dir = _seed_coach_dir(tmp_path)
|
|
176
|
+
facets = tmp_path / "facets"
|
|
177
|
+
facets.mkdir()
|
|
178
|
+
(facets / "s0.json").write_text(json.dumps({
|
|
179
|
+
"session_id": "s0",
|
|
180
|
+
"friction_counts": {"misunderstood_request": 1},
|
|
181
|
+
}))
|
|
182
|
+
for i in range(1, 10):
|
|
183
|
+
(facets / f"s{i}.json").write_text(json.dumps({"session_id": f"s{i}"}))
|
|
184
|
+
|
|
185
|
+
result = _run(coach_dir, facets)
|
|
186
|
+
assert result.returncode == 0
|
|
187
|
+
assert "detections=0" in result.stdout
|
|
188
|
+
profile = yaml.safe_load((coach_dir / "profile.yaml").read_text())
|
|
189
|
+
assert profile.get("entries") == []
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def _build_isolated_bin(
|
|
193
|
+
tmp_path: Path,
|
|
194
|
+
*,
|
|
195
|
+
agg_body: str,
|
|
196
|
+
claude_body: str | None = None,
|
|
197
|
+
) -> Path:
|
|
198
|
+
"""Build a `bin/` containing the real wrapper + lock helper + merge
|
|
199
|
+
sidecars, but a *test-controlled* aggregate_facets.py.
|
|
200
|
+
|
|
201
|
+
Used by the failure-mode tests below to exercise the wrapper's
|
|
202
|
+
error-handling around aggregator behavior without monkeying with
|
|
203
|
+
the real bundle.
|
|
204
|
+
|
|
205
|
+
If ``claude_body`` is provided, also writes an executable `claude`
|
|
206
|
+
shim into the same dir; pair with `_sandbox_path_dir(...,
|
|
207
|
+
extra_dir=fake_bin)` so the wrapper picks up the shim instead of
|
|
208
|
+
the host's real `claude` CLI.
|
|
209
|
+
"""
|
|
210
|
+
fake_bin = tmp_path / "bin"
|
|
211
|
+
fake_bin.mkdir()
|
|
212
|
+
for name in (
|
|
213
|
+
"insights-llm.sh",
|
|
214
|
+
"run_with_lock.py",
|
|
215
|
+
"merge.py",
|
|
216
|
+
"marker_io.py",
|
|
217
|
+
"reward_hints.py",
|
|
218
|
+
"xp_accounting.py",
|
|
219
|
+
):
|
|
220
|
+
(fake_bin / name).write_text((BIN_DIR / name).read_text())
|
|
221
|
+
for ext in ("sh", "py"):
|
|
222
|
+
for p in fake_bin.glob(f"*.{ext}"):
|
|
223
|
+
p.chmod(0o755)
|
|
224
|
+
(fake_bin / "aggregate_facets.py").write_text(agg_body)
|
|
225
|
+
(fake_bin / "aggregate_facets.py").chmod(0o755)
|
|
226
|
+
if claude_body is not None:
|
|
227
|
+
claude_shim = fake_bin / "claude"
|
|
228
|
+
claude_shim.write_text(claude_body)
|
|
229
|
+
claude_shim.chmod(0o755)
|
|
230
|
+
return fake_bin
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
def _sandbox_path_dir(tmp_path: Path, *, extra_dir: Path | None = None) -> str:
|
|
234
|
+
"""Construct a sandbox PATH that contains the system coreutils
|
|
235
|
+
(/usr/bin:/bin: dirname, mkdir, mktemp, date, git, touch, kill,
|
|
236
|
+
sleep, …) plus pinned python3 and bash symlinks, but DOES NOT
|
|
237
|
+
expose `claude` (which lives elsewhere — e.g. ~/.nvm or Homebrew).
|
|
238
|
+
|
|
239
|
+
The wrapper resolves python3 at insights-llm.sh:57 BEFORE checking
|
|
240
|
+
`claude`, then re-execs through `bash` at line 81 and shells out to
|
|
241
|
+
`dirname`, `mktemp`, `date`, `git`, etc. throughout. A naked
|
|
242
|
+
stripped PATH (e.g. PATH="") would fail at python3 resolution with
|
|
243
|
+
exit 2; a PATH with only python3+bash would fail at the next
|
|
244
|
+
`dirname` call. /usr/bin:/bin is the POSIX-standard base where
|
|
245
|
+
`claude` is *not* installed (it's typically in nvm or
|
|
246
|
+
/usr/local/bin), so it's a safe foundation that excludes claude
|
|
247
|
+
by construction.
|
|
248
|
+
|
|
249
|
+
Use ``extra_dir`` to layer in a `fake_bin` that contains a `claude`
|
|
250
|
+
shim (or omit it for the missing-claude case).
|
|
251
|
+
"""
|
|
252
|
+
sandbox = tmp_path / "sandbox-bin"
|
|
253
|
+
sandbox.mkdir()
|
|
254
|
+
real_python3 = shutil.which("python3")
|
|
255
|
+
real_bash = shutil.which("bash")
|
|
256
|
+
assert real_python3, "host has no python3 — cannot build sandbox PATH"
|
|
257
|
+
assert real_bash, "host has no bash — cannot build sandbox PATH"
|
|
258
|
+
os.symlink(real_python3, sandbox / "python3")
|
|
259
|
+
os.symlink(real_bash, sandbox / "bash")
|
|
260
|
+
parts = [str(sandbox), "/usr/bin", "/bin"]
|
|
261
|
+
if extra_dir is not None:
|
|
262
|
+
parts.insert(0, str(extra_dir))
|
|
263
|
+
else:
|
|
264
|
+
# Guard against a regression where someone exposes the host PATH
|
|
265
|
+
# by accident — the missing-claude test would silently pass.
|
|
266
|
+
assert not shutil.which(
|
|
267
|
+
"claude", path=":".join(parts)
|
|
268
|
+
), f"sandbox PATH unexpectedly resolves `claude`: {parts}"
|
|
269
|
+
return ":".join(parts)
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
def _run_with_path(
|
|
273
|
+
*,
|
|
274
|
+
coach_dir: Path,
|
|
275
|
+
facets: Path,
|
|
276
|
+
fake_bin: Path,
|
|
277
|
+
path: str,
|
|
278
|
+
extra_env: dict | None = None,
|
|
279
|
+
args: tuple = ("--force",),
|
|
280
|
+
timeout: int = 30,
|
|
281
|
+
) -> subprocess.CompletedProcess:
|
|
282
|
+
"""Invoke the wrapper from ``fake_bin`` with a custom PATH and no
|
|
283
|
+
``COACH_INSIGHTS_LLM_SKIP_REFRESH`` so the real LLM-step branch
|
|
284
|
+
runs. Used by the LLM-fail-hard tests."""
|
|
285
|
+
env = {
|
|
286
|
+
# Start clean — do NOT inherit the parent PATH, since that
|
|
287
|
+
# would re-introduce the host's real `claude` binary.
|
|
288
|
+
"HOME": os.environ.get("HOME", str(coach_dir.parent)),
|
|
289
|
+
"PATH": path,
|
|
290
|
+
"COACH_DIR_OVERRIDE": str(coach_dir),
|
|
291
|
+
"COACH_FACETS_DIR": str(facets),
|
|
292
|
+
"GIT_DIR": "",
|
|
293
|
+
"GIT_WORK_TREE": "",
|
|
294
|
+
}
|
|
295
|
+
if extra_env:
|
|
296
|
+
env.update(extra_env)
|
|
297
|
+
return subprocess.run(
|
|
298
|
+
["bash", str(fake_bin / "insights-llm.sh"), *args],
|
|
299
|
+
env=env,
|
|
300
|
+
capture_output=True,
|
|
301
|
+
text=True,
|
|
302
|
+
timeout=timeout,
|
|
303
|
+
)
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
def test_aggregator_failure_bails_before_merge(tmp_path: Path) -> None:
|
|
307
|
+
"""If aggregate_facets.py exits nonzero, the wrapper MUST:
|
|
308
|
+
- exit nonzero itself (not silently treat empty $DET as `[]`)
|
|
309
|
+
- not run merge.py (profile + changelog unchanged)
|
|
310
|
+
- not touch the throttle marker (so the next session can retry)
|
|
311
|
+
|
|
312
|
+
Guards the shell→Python boundary documented in
|
|
313
|
+
feedback_test_gap_shell_helper_boundary.md and the P1 caught in
|
|
314
|
+
teammate review: the original `... > $DET` redirect lost the
|
|
315
|
+
aggregator's nonzero exit code and the inline `try: print(len(...))
|
|
316
|
+
except: print(0)` heredoc swallowed JSON parse errors, so a busted
|
|
317
|
+
aggregator was committed as a clean evidence pass and consumed
|
|
318
|
+
the weekly cadence.
|
|
319
|
+
"""
|
|
320
|
+
coach_dir = _seed_coach_dir(tmp_path)
|
|
321
|
+
facets = _seed_facets(tmp_path)
|
|
322
|
+
fake_bin = _build_isolated_bin(
|
|
323
|
+
tmp_path, agg_body="#!/usr/bin/env python3\nimport sys\nsys.exit(7)\n"
|
|
324
|
+
)
|
|
325
|
+
|
|
326
|
+
pre_profile = (coach_dir / "profile.yaml").read_text()
|
|
327
|
+
pre_changelog_size = (coach_dir / "changelog.md").stat().st_size
|
|
328
|
+
marker = coach_dir / ".last_weekly_insights"
|
|
329
|
+
assert not marker.exists()
|
|
330
|
+
|
|
331
|
+
env = {
|
|
332
|
+
**os.environ,
|
|
333
|
+
"COACH_DIR_OVERRIDE": str(coach_dir),
|
|
334
|
+
"COACH_FACETS_DIR": str(facets),
|
|
335
|
+
"COACH_INSIGHTS_LLM_SKIP_REFRESH": "1",
|
|
336
|
+
"GIT_DIR": "",
|
|
337
|
+
"GIT_WORK_TREE": "",
|
|
338
|
+
}
|
|
339
|
+
result = subprocess.run(
|
|
340
|
+
["bash", str(fake_bin / "insights-llm.sh"), "--force"],
|
|
341
|
+
env=env,
|
|
342
|
+
capture_output=True,
|
|
343
|
+
text=True,
|
|
344
|
+
)
|
|
345
|
+
|
|
346
|
+
assert result.returncode != 0, (
|
|
347
|
+
f"wrapper exited 0 despite aggregator failure:\n"
|
|
348
|
+
f"stdout={result.stdout}\nstderr={result.stderr}"
|
|
349
|
+
)
|
|
350
|
+
assert "bailing before merge" in result.stderr, result.stderr
|
|
351
|
+
# Throttle marker MUST NOT exist — the next session start should
|
|
352
|
+
# retry rather than wait 7 more days on a failed run.
|
|
353
|
+
assert not marker.exists(), "throttle marker was touched despite aggregator failure"
|
|
354
|
+
# Profile + changelog UNCHANGED.
|
|
355
|
+
assert (coach_dir / "profile.yaml").read_text() == pre_profile, (
|
|
356
|
+
"profile.yaml was mutated despite aggregator failure"
|
|
357
|
+
)
|
|
358
|
+
assert (coach_dir / "changelog.md").stat().st_size == pre_changelog_size, (
|
|
359
|
+
"changelog.md was appended to despite aggregator failure"
|
|
360
|
+
)
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
def test_aggregator_garbled_output_bails_before_merge(tmp_path: Path) -> None:
|
|
364
|
+
"""An aggregator that exits 0 but emits unparseable JSON must also
|
|
365
|
+
bail before merge — merging an unreadable detections file as `[]`
|
|
366
|
+
is the same failure mode as a nonzero aggregator exit, just one
|
|
367
|
+
layer down."""
|
|
368
|
+
coach_dir = _seed_coach_dir(tmp_path)
|
|
369
|
+
facets = _seed_facets(tmp_path)
|
|
370
|
+
fake_bin = _build_isolated_bin(
|
|
371
|
+
tmp_path,
|
|
372
|
+
agg_body=(
|
|
373
|
+
"#!/usr/bin/env python3\n"
|
|
374
|
+
"import sys\n"
|
|
375
|
+
"sys.stdout.write('not valid json {{{')\n"
|
|
376
|
+
"sys.exit(0)\n"
|
|
377
|
+
),
|
|
378
|
+
)
|
|
379
|
+
|
|
380
|
+
pre_profile = (coach_dir / "profile.yaml").read_text()
|
|
381
|
+
marker = coach_dir / ".last_weekly_insights"
|
|
382
|
+
|
|
383
|
+
env = {
|
|
384
|
+
**os.environ,
|
|
385
|
+
"COACH_DIR_OVERRIDE": str(coach_dir),
|
|
386
|
+
"COACH_FACETS_DIR": str(facets),
|
|
387
|
+
"COACH_INSIGHTS_LLM_SKIP_REFRESH": "1",
|
|
388
|
+
"GIT_DIR": "",
|
|
389
|
+
"GIT_WORK_TREE": "",
|
|
390
|
+
}
|
|
391
|
+
result = subprocess.run(
|
|
392
|
+
["bash", str(fake_bin / "insights-llm.sh"), "--force"],
|
|
393
|
+
env=env,
|
|
394
|
+
capture_output=True,
|
|
395
|
+
text=True,
|
|
396
|
+
)
|
|
397
|
+
|
|
398
|
+
assert result.returncode != 0
|
|
399
|
+
assert "unparseable" in result.stderr or "bailing" in result.stderr
|
|
400
|
+
assert not marker.exists()
|
|
401
|
+
assert (coach_dir / "profile.yaml").read_text() == pre_profile
|
|
402
|
+
|
|
403
|
+
|
|
404
|
+
def test_concurrent_run_skips_when_lock_held(tmp_path: Path) -> None:
|
|
405
|
+
"""If another process already holds .weekly_insights.lock, the
|
|
406
|
+
wrapper must exit 10 (skipped) without running the LLM call,
|
|
407
|
+
aggregator, or merge. Guards the P1 race where two SessionStart
|
|
408
|
+
hooks fire within the slow `claude -p /insights` window —
|
|
409
|
+
without this serialization, both wrappers would run the LLM call,
|
|
410
|
+
both aggregate, both merge, prematurely advancing
|
|
411
|
+
debounce/graduation streaks."""
|
|
412
|
+
coach_dir = _seed_coach_dir(tmp_path)
|
|
413
|
+
facets = _seed_facets(tmp_path)
|
|
414
|
+
|
|
415
|
+
lock_path = coach_dir / ".weekly_insights.lock"
|
|
416
|
+
lock_path.touch()
|
|
417
|
+
fd = os.open(str(lock_path), os.O_RDWR)
|
|
418
|
+
fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
|
419
|
+
|
|
420
|
+
try:
|
|
421
|
+
result = _run(coach_dir, facets, "--force")
|
|
422
|
+
finally:
|
|
423
|
+
try:
|
|
424
|
+
fcntl.flock(fd, fcntl.LOCK_UN)
|
|
425
|
+
finally:
|
|
426
|
+
os.close(fd)
|
|
427
|
+
|
|
428
|
+
assert result.returncode == 10, (
|
|
429
|
+
f"expected exit 10 (lock contention skip), got {result.returncode}\n"
|
|
430
|
+
f"stdout={result.stdout}\nstderr={result.stderr}"
|
|
431
|
+
)
|
|
432
|
+
assert "concurrent" in result.stdout.lower()
|
|
433
|
+
# No merge ran — recent_runs untouched, no marker.
|
|
434
|
+
profile = yaml.safe_load((coach_dir / "profile.yaml").read_text())
|
|
435
|
+
assert profile.get("recent_runs") in (None, [])
|
|
436
|
+
assert profile.get("entries") in (None, [])
|
|
437
|
+
assert not (coach_dir / ".last_weekly_insights").exists()
|
|
438
|
+
|
|
439
|
+
|
|
440
|
+
def test_concurrent_wrappers_only_one_merges(tmp_path: Path) -> None:
|
|
441
|
+
"""End-to-end concurrent run: launch two wrappers in parallel
|
|
442
|
+
against a fixture with a slow (2s) aggregator. Exactly one must
|
|
443
|
+
win the lock and merge; the other must exit 10. Profile gets
|
|
444
|
+
one entry, recent_runs gets one append."""
|
|
445
|
+
coach_dir = _seed_coach_dir(tmp_path)
|
|
446
|
+
facets = _seed_facets(tmp_path)
|
|
447
|
+
slow_agg = (
|
|
448
|
+
"#!/usr/bin/env python3\n"
|
|
449
|
+
"import json, sys, time\n"
|
|
450
|
+
"sys.stdout.write(json.dumps([{\n"
|
|
451
|
+
" 'id': 'misunderstood-request',\n"
|
|
452
|
+
" 'name': 'misunderstood request',\n"
|
|
453
|
+
" 'direction': 'negative',\n"
|
|
454
|
+
" 'nudge': 'test',\n"
|
|
455
|
+
" 'examples': [],\n"
|
|
456
|
+
" 'priority': 2,\n"
|
|
457
|
+
"}]))\n"
|
|
458
|
+
"sys.stdout.flush()\n"
|
|
459
|
+
"time.sleep(1.5)\n"
|
|
460
|
+
)
|
|
461
|
+
fake_bin = _build_isolated_bin(tmp_path, agg_body=slow_agg)
|
|
462
|
+
|
|
463
|
+
env = {
|
|
464
|
+
**os.environ,
|
|
465
|
+
"COACH_DIR_OVERRIDE": str(coach_dir),
|
|
466
|
+
"COACH_FACETS_DIR": str(facets),
|
|
467
|
+
"COACH_INSIGHTS_LLM_SKIP_REFRESH": "1",
|
|
468
|
+
"GIT_DIR": "",
|
|
469
|
+
"GIT_WORK_TREE": "",
|
|
470
|
+
}
|
|
471
|
+
p_a = subprocess.Popen(
|
|
472
|
+
["bash", str(fake_bin / "insights-llm.sh"), "--force"],
|
|
473
|
+
env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True,
|
|
474
|
+
)
|
|
475
|
+
time.sleep(0.2)
|
|
476
|
+
p_b = subprocess.Popen(
|
|
477
|
+
["bash", str(fake_bin / "insights-llm.sh"), "--force"],
|
|
478
|
+
env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True,
|
|
479
|
+
)
|
|
480
|
+
out_a, err_a = p_a.communicate(timeout=15)
|
|
481
|
+
out_b, err_b = p_b.communicate(timeout=15)
|
|
482
|
+
|
|
483
|
+
rcs = sorted([p_a.returncode, p_b.returncode])
|
|
484
|
+
assert rcs == [0, 10], (
|
|
485
|
+
f"expected one winner (rc=0) + one skipper (rc=10), got {rcs}\n"
|
|
486
|
+
f"A: rc={p_a.returncode} stdout={out_a!r} stderr={err_a!r}\n"
|
|
487
|
+
f"B: rc={p_b.returncode} stdout={out_b!r} stderr={err_b!r}"
|
|
488
|
+
)
|
|
489
|
+
profile = yaml.safe_load((coach_dir / "profile.yaml").read_text())
|
|
490
|
+
assert len(profile.get("recent_runs") or []) == 1, (
|
|
491
|
+
f"expected exactly one recent_run after concurrent race, got {profile.get('recent_runs')}"
|
|
492
|
+
)
|
|
493
|
+
assert len(profile.get("entries") or []) == 1
|
|
494
|
+
|
|
495
|
+
|
|
496
|
+
# --- LLM-step fail-hard regression suite ----------------------------------
|
|
497
|
+
#
|
|
498
|
+
# Mirrors test_aggregator_failure_bails_before_merge. The bug class is the
|
|
499
|
+
# same — a refresh step that fails silently lets merge.py treat
|
|
500
|
+
# stale-or-empty facets as a clean evidence pass, advancing absence-based
|
|
501
|
+
# streaks on phantom data. The only difference is which step fails: these
|
|
502
|
+
# three cases cover the LLM refresh (insights-llm.sh:133–164) instead of
|
|
503
|
+
# the aggregator (insights-llm.sh:175–199).
|
|
504
|
+
|
|
505
|
+
|
|
506
|
+
def _aggregator_should_not_run_body() -> str:
|
|
507
|
+
"""Aggregator body that fails the test loudly if invoked.
|
|
508
|
+
|
|
509
|
+
Used by the LLM-fail-hard tests below: when the wrapper exits 6
|
|
510
|
+
*before* the aggregator stage (the desired behavior), this body is
|
|
511
|
+
never executed. If the wrapper regresses to fail-soft and falls
|
|
512
|
+
through, the aggregator will run and the test will catch it via a
|
|
513
|
+
distinctive sentinel string in stderr.
|
|
514
|
+
"""
|
|
515
|
+
return (
|
|
516
|
+
"#!/usr/bin/env python3\n"
|
|
517
|
+
"import sys\n"
|
|
518
|
+
"sys.stderr.write('AGG_RAN_BUT_SHOULD_NOT_HAVE\\n')\n"
|
|
519
|
+
"sys.stdout.write('[]\\n')\n"
|
|
520
|
+
"sys.exit(0)\n"
|
|
521
|
+
)
|
|
522
|
+
|
|
523
|
+
|
|
524
|
+
def test_missing_claude_bails_before_merge(tmp_path: Path) -> None:
|
|
525
|
+
"""When `claude` is absent from PATH the wrapper MUST exit 6 before
|
|
526
|
+
aggregating, merging, or touching the throttle marker. Reproduces
|
|
527
|
+
the v0.5.1 P1 #1a teammate finding: a 4/5 weakness graduated with
|
|
528
|
+
+5 XP under fail-soft + missing claude + empty facets."""
|
|
529
|
+
coach_dir = _seed_coach_dir(tmp_path)
|
|
530
|
+
facets = _seed_facets(tmp_path)
|
|
531
|
+
fake_bin = _build_isolated_bin(
|
|
532
|
+
tmp_path, agg_body=_aggregator_should_not_run_body()
|
|
533
|
+
)
|
|
534
|
+
|
|
535
|
+
pre_profile = (coach_dir / "profile.yaml").read_text()
|
|
536
|
+
marker = coach_dir / ".last_weekly_insights"
|
|
537
|
+
assert not marker.exists()
|
|
538
|
+
|
|
539
|
+
# Sandbox PATH has python3 + bash but NO claude.
|
|
540
|
+
path = _sandbox_path_dir(tmp_path)
|
|
541
|
+
|
|
542
|
+
result = _run_with_path(
|
|
543
|
+
coach_dir=coach_dir, facets=facets, fake_bin=fake_bin, path=path
|
|
544
|
+
)
|
|
545
|
+
|
|
546
|
+
assert result.returncode == 6, (
|
|
547
|
+
f"expected exit 6 (LLM refresh failed), got {result.returncode}\n"
|
|
548
|
+
f"stdout={result.stdout}\nstderr={result.stderr}"
|
|
549
|
+
)
|
|
550
|
+
assert "claude CLI not on PATH" in result.stderr
|
|
551
|
+
assert "bailing before merge" in result.stderr
|
|
552
|
+
assert "AGG_RAN_BUT_SHOULD_NOT_HAVE" not in result.stderr, (
|
|
553
|
+
"wrapper fell through to aggregator despite missing claude"
|
|
554
|
+
)
|
|
555
|
+
assert not marker.exists(), "throttle marker was touched despite missing claude"
|
|
556
|
+
assert (coach_dir / "profile.yaml").read_text() == pre_profile
|
|
557
|
+
|
|
558
|
+
|
|
559
|
+
def test_claude_nonzero_exit_bails_before_merge(tmp_path: Path) -> None:
|
|
560
|
+
"""When `claude -p /insights` exits nonzero (e.g. plan does not
|
|
561
|
+
grant access, transient API failure), the wrapper MUST exit 6
|
|
562
|
+
before merge/marker — same reasoning as missing-claude."""
|
|
563
|
+
coach_dir = _seed_coach_dir(tmp_path)
|
|
564
|
+
facets = _seed_facets(tmp_path)
|
|
565
|
+
fake_bin = _build_isolated_bin(
|
|
566
|
+
tmp_path,
|
|
567
|
+
agg_body=_aggregator_should_not_run_body(),
|
|
568
|
+
claude_body="#!/bin/sh\nexit 1\n",
|
|
569
|
+
)
|
|
570
|
+
|
|
571
|
+
pre_profile = (coach_dir / "profile.yaml").read_text()
|
|
572
|
+
marker = coach_dir / ".last_weekly_insights"
|
|
573
|
+
|
|
574
|
+
path = _sandbox_path_dir(tmp_path, extra_dir=fake_bin)
|
|
575
|
+
|
|
576
|
+
result = _run_with_path(
|
|
577
|
+
coach_dir=coach_dir, facets=facets, fake_bin=fake_bin, path=path
|
|
578
|
+
)
|
|
579
|
+
|
|
580
|
+
assert result.returncode == 6, (
|
|
581
|
+
f"expected exit 6, got {result.returncode}\n"
|
|
582
|
+
f"stdout={result.stdout}\nstderr={result.stderr}"
|
|
583
|
+
)
|
|
584
|
+
assert "exited rc=" in result.stderr
|
|
585
|
+
assert "bailing before merge" in result.stderr
|
|
586
|
+
assert "AGG_RAN_BUT_SHOULD_NOT_HAVE" not in result.stderr
|
|
587
|
+
assert not marker.exists()
|
|
588
|
+
assert (coach_dir / "profile.yaml").read_text() == pre_profile
|
|
589
|
+
|
|
590
|
+
|
|
591
|
+
def test_no_evidence_bails_before_merge(tmp_path: Path) -> None:
|
|
592
|
+
"""When the aggregator finds n_sessions == 0 in the window it exits
|
|
593
|
+
EXIT_NO_EVIDENCE=3; the wrapper MUST translate to its own exit 7 and
|
|
594
|
+
bail before merge/marker. Reproduces v0.5.1 P1 #1b: a successful
|
|
595
|
+
`claude -p` that writes zero current-window facets used to merge
|
|
596
|
+
`detections=[]` as a clean evidence pass.
|
|
597
|
+
|
|
598
|
+
Skips the LLM step (COACH_INSIGHTS_LLM_SKIP_REFRESH=1) so the
|
|
599
|
+
aggregator runs against the seeded empty facets dir directly."""
|
|
600
|
+
coach_dir = _seed_coach_dir(tmp_path)
|
|
601
|
+
empty_facets = tmp_path / "empty-facets"
|
|
602
|
+
empty_facets.mkdir()
|
|
603
|
+
pre_profile = (coach_dir / "profile.yaml").read_text()
|
|
604
|
+
pre_changelog_size = (coach_dir / "changelog.md").stat().st_size
|
|
605
|
+
marker = coach_dir / ".last_weekly_insights"
|
|
606
|
+
assert not marker.exists()
|
|
607
|
+
|
|
608
|
+
result = _run(coach_dir, empty_facets, "--force")
|
|
609
|
+
|
|
610
|
+
assert result.returncode == 7, (
|
|
611
|
+
f"expected exit 7 (no evidence), got {result.returncode}\n"
|
|
612
|
+
f"stdout={result.stdout}\nstderr={result.stderr}"
|
|
613
|
+
)
|
|
614
|
+
assert "no current-window evidence" in result.stderr
|
|
615
|
+
assert "bailing before merge" in result.stderr
|
|
616
|
+
assert not marker.exists(), "throttle marker was touched despite no evidence"
|
|
617
|
+
assert (coach_dir / "profile.yaml").read_text() == pre_profile
|
|
618
|
+
assert (coach_dir / "changelog.md").stat().st_size == pre_changelog_size
|
|
619
|
+
|
|
620
|
+
|
|
621
|
+
def test_claude_timeout_bails_before_merge(tmp_path: Path) -> None:
|
|
622
|
+
"""When `claude -p /insights` exceeds COACH_INSIGHTS_LLM_TIMEOUT,
|
|
623
|
+
the wrapper kills the subprocess and MUST exit 6 — not fall through
|
|
624
|
+
to aggregation. NOTE: timeout is set via the env var, not a CLI
|
|
625
|
+
flag (the wrapper exits 2 with 'unknown arg' if you pass --timeout)."""
|
|
626
|
+
coach_dir = _seed_coach_dir(tmp_path)
|
|
627
|
+
facets = _seed_facets(tmp_path)
|
|
628
|
+
# Sleep well past the test's timeout. The wrapper polls every 2s
|
|
629
|
+
# so a 4s timeout means the kill fires at the 4s tick.
|
|
630
|
+
fake_bin = _build_isolated_bin(
|
|
631
|
+
tmp_path,
|
|
632
|
+
agg_body=_aggregator_should_not_run_body(),
|
|
633
|
+
claude_body="#!/bin/sh\nsleep 60\n",
|
|
634
|
+
)
|
|
635
|
+
|
|
636
|
+
pre_profile = (coach_dir / "profile.yaml").read_text()
|
|
637
|
+
marker = coach_dir / ".last_weekly_insights"
|
|
638
|
+
|
|
639
|
+
path = _sandbox_path_dir(tmp_path, extra_dir=fake_bin)
|
|
640
|
+
|
|
641
|
+
result = _run_with_path(
|
|
642
|
+
coach_dir=coach_dir,
|
|
643
|
+
facets=facets,
|
|
644
|
+
fake_bin=fake_bin,
|
|
645
|
+
path=path,
|
|
646
|
+
extra_env={"COACH_INSIGHTS_LLM_TIMEOUT": "4"},
|
|
647
|
+
timeout=30,
|
|
648
|
+
)
|
|
649
|
+
|
|
650
|
+
assert result.returncode == 6, (
|
|
651
|
+
f"expected exit 6 (timeout), got {result.returncode}\n"
|
|
652
|
+
f"stdout={result.stdout}\nstderr={result.stderr}"
|
|
653
|
+
)
|
|
654
|
+
assert "timed out" in result.stderr
|
|
655
|
+
assert "bailing before merge" in result.stderr
|
|
656
|
+
assert "AGG_RAN_BUT_SHOULD_NOT_HAVE" not in result.stderr
|
|
657
|
+
assert not marker.exists()
|
|
658
|
+
assert (coach_dir / "profile.yaml").read_text() == pre_profile
|
|
659
|
+
|
|
660
|
+
|
|
661
|
+
# --- merge.py marker path isolation (v0.5.1 P1 #2) ------------------------
|
|
662
|
+
# Pre-v0.5.1, merge.py hardcoded marker paths under
|
|
663
|
+
# Path.home() / ".claude/coach/", so a sandboxed run with
|
|
664
|
+
# COACH_DIR_OVERRIDE leaked .pending_* markers into the live install.
|
|
665
|
+
# Fix: main() reassigns the module globals to args.profile.parent
|
|
666
|
+
# before calling merge(). This test exercises the CLI path end-to-end
|
|
667
|
+
# and verifies live-install markers are byte-identical pre/post.
|
|
668
|
+
|
|
669
|
+
import hashlib # noqa: E402
|
|
670
|
+
|
|
671
|
+
MERGE_PY = BIN_DIR / "merge.py"
|
|
672
|
+
LIVE_COACH_DIR = Path.home() / ".claude" / "coach"
|
|
673
|
+
LIVE_MARKERS = (
|
|
674
|
+
LIVE_COACH_DIR / ".pending_graduation",
|
|
675
|
+
LIVE_COACH_DIR / ".pending_streak_rewards",
|
|
676
|
+
LIVE_COACH_DIR / ".pending_regression",
|
|
677
|
+
)
|
|
678
|
+
|
|
679
|
+
|
|
680
|
+
def _snapshot_marker(p: Path) -> tuple:
|
|
681
|
+
"""Return (mtime_ns, sha256) for a marker file, or (None, None) if
|
|
682
|
+
it doesn't exist. mtime_ns + content hash is strictly stronger than
|
|
683
|
+
`exists()`: a write that produces identical bytes at the same-second
|
|
684
|
+
mtime would still bump mtime_ns, and any byte change is hashed."""
|
|
685
|
+
if not p.exists():
|
|
686
|
+
return (None, None)
|
|
687
|
+
st = p.stat()
|
|
688
|
+
return (st.st_mtime_ns, hashlib.sha256(p.read_bytes()).hexdigest())
|
|
689
|
+
|
|
690
|
+
|
|
691
|
+
def test_merge_writes_markers_under_profile_parent(tmp_path: Path) -> None:
|
|
692
|
+
"""merge.py CLI with --profile <tmp>/profile.yaml MUST write the
|
|
693
|
+
three .pending_* markers under <tmp>/ — never to the live install
|
|
694
|
+
under ~/.claude/coach/. Snapshots live-install markers (mtime_ns +
|
|
695
|
+
sha256) pre/post and asserts byte-identical preservation."""
|
|
696
|
+
pre_live = {p: _snapshot_marker(p) for p in LIVE_MARKERS}
|
|
697
|
+
|
|
698
|
+
coach_dir = tmp_path / "coach"
|
|
699
|
+
coach_dir.mkdir()
|
|
700
|
+
|
|
701
|
+
# Seed a profile with one entry at clean_streak_runs=4. The next
|
|
702
|
+
# empty-detections merge ticks it to 5 → graduation → marker write.
|
|
703
|
+
profile_yaml = coach_dir / "profile.yaml"
|
|
704
|
+
profile_yaml.write_text(yaml.safe_dump({
|
|
705
|
+
"schema_version": 1,
|
|
706
|
+
"updated": None,
|
|
707
|
+
"entries": [{
|
|
708
|
+
"id": "test-weakness", "name": "test weakness",
|
|
709
|
+
"tier": "active", "direction": "negative",
|
|
710
|
+
"confidence": 0.8, "priority": 3,
|
|
711
|
+
"nudge": "stop doing that", "examples": [],
|
|
712
|
+
"first_seen": "2026-03-01",
|
|
713
|
+
"last_seen_in_run": "2026-04-01",
|
|
714
|
+
"clean_streak_runs": 4, "positive_run_streak": 0,
|
|
715
|
+
"source_runs": ["old"], "total_occurrences": 10,
|
|
716
|
+
}],
|
|
717
|
+
"recent_runs": ["r-a", "r-b", "r-c"],
|
|
718
|
+
}))
|
|
719
|
+
(coach_dir / "changelog.md").touch()
|
|
720
|
+
detections_json = tmp_path / "detections.json"
|
|
721
|
+
detections_json.write_text("[]")
|
|
722
|
+
|
|
723
|
+
result = subprocess.run(
|
|
724
|
+
[
|
|
725
|
+
"python3", str(MERGE_PY),
|
|
726
|
+
"--profile", str(profile_yaml),
|
|
727
|
+
"--changelog", str(coach_dir / "changelog.md"),
|
|
728
|
+
"--lock", str(coach_dir / ".lock"),
|
|
729
|
+
"--detections", str(detections_json),
|
|
730
|
+
"--run-id", "insights-weekly-marker-isolation-test",
|
|
731
|
+
],
|
|
732
|
+
capture_output=True,
|
|
733
|
+
text=True,
|
|
734
|
+
)
|
|
735
|
+
assert result.returncode == 0, (
|
|
736
|
+
f"merge.py failed: rc={result.returncode}\n"
|
|
737
|
+
f"stdout={result.stdout}\nstderr={result.stderr}"
|
|
738
|
+
)
|
|
739
|
+
|
|
740
|
+
# Markers MUST land under the tmp coach dir (the entry graduated,
|
|
741
|
+
# so .pending_graduation must exist; others may be empty/absent).
|
|
742
|
+
tmp_grad = coach_dir / ".pending_graduation"
|
|
743
|
+
assert tmp_grad.exists(), (
|
|
744
|
+
f"expected .pending_graduation under tmp coach dir; "
|
|
745
|
+
f"contents={list(coach_dir.iterdir())}"
|
|
746
|
+
)
|
|
747
|
+
grad_payload = json.loads(tmp_grad.read_text())
|
|
748
|
+
grad_ids = [g.get("id") for g in grad_payload.get("graduations", [])]
|
|
749
|
+
assert "test-weakness" in grad_ids, (
|
|
750
|
+
f"graduation marker missing test-weakness: {grad_payload}"
|
|
751
|
+
)
|
|
752
|
+
|
|
753
|
+
# Live-install markers MUST be byte-identical to the pre-snapshot.
|
|
754
|
+
post_live = {p: _snapshot_marker(p) for p in LIVE_MARKERS}
|
|
755
|
+
for p in LIVE_MARKERS:
|
|
756
|
+
assert pre_live[p] == post_live[p], (
|
|
757
|
+
f"live-install marker mutated by sandboxed merge: {p}\n"
|
|
758
|
+
f"pre={pre_live[p]} post={post_live[p]}"
|
|
759
|
+
)
|