@rm0nroe/coach-claw 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +311 -0
  3. package/coach/README.md +99 -0
  4. package/coach/bin/aggregate_facets.py +274 -0
  5. package/coach/bin/analyze.py +678 -0
  6. package/coach/bin/bank.py +247 -0
  7. package/coach/bin/banner_themes.py +645 -0
  8. package/coach/bin/coach_paths.py +33 -0
  9. package/coach/bin/coexistence_check.py +129 -0
  10. package/coach/bin/configure.py +245 -0
  11. package/coach/bin/cron_check.py +81 -0
  12. package/coach/bin/default_statusline.py +135 -0
  13. package/coach/bin/doctor.py +663 -0
  14. package/coach/bin/insights-llm.sh +264 -0
  15. package/coach/bin/insights.sh +163 -0
  16. package/coach/bin/insights_window.py +111 -0
  17. package/coach/bin/marker_io.py +154 -0
  18. package/coach/bin/merge.py +671 -0
  19. package/coach/bin/redact.py +86 -0
  20. package/coach/bin/render_env.py +148 -0
  21. package/coach/bin/reward_hints.py +87 -0
  22. package/coach/bin/run-insights.sh +20 -0
  23. package/coach/bin/run_with_lock.py +85 -0
  24. package/coach/bin/scoring.py +260 -0
  25. package/coach/bin/skill_inventory.py +215 -0
  26. package/coach/bin/stats.py +459 -0
  27. package/coach/bin/status.py +293 -0
  28. package/coach/bin/statusline_self_patch.py +205 -0
  29. package/coach/bin/statusline_variants.py +146 -0
  30. package/coach/bin/statusline_wrap.py +244 -0
  31. package/coach/bin/statusline_wrap_action.py +460 -0
  32. package/coach/bin/switch_to_plugin.py +256 -0
  33. package/coach/bin/themes.py +256 -0
  34. package/coach/bin/user_config.py +176 -0
  35. package/coach/bin/xp_accounting.py +98 -0
  36. package/coach/changelog.md +4 -0
  37. package/coach/default-statusline-command.sh +19 -0
  38. package/coach/default-statusline-wrap-command.sh +15 -0
  39. package/coach/profile.yaml +37 -0
  40. package/coach/tests/conftest.py +13 -0
  41. package/coach/tests/test_aggregate_facets.py +379 -0
  42. package/coach/tests/test_analyze_aggregate.py +153 -0
  43. package/coach/tests/test_analyze_redaction.py +105 -0
  44. package/coach/tests/test_analyze_strengths.py +165 -0
  45. package/coach/tests/test_bank_atomic_write.py +61 -0
  46. package/coach/tests/test_bank_concurrency.py +126 -0
  47. package/coach/tests/test_banner_themes.py +981 -0
  48. package/coach/tests/test_celebrate_dedup.py +409 -0
  49. package/coach/tests/test_coach_paths.py +50 -0
  50. package/coach/tests/test_coexistence_check.py +128 -0
  51. package/coach/tests/test_configure.py +258 -0
  52. package/coach/tests/test_cron_check.py +118 -0
  53. package/coach/tests/test_cron_nudge_hook.py +134 -0
  54. package/coach/tests/test_detection_parity.py +105 -0
  55. package/coach/tests/test_doctor.py +595 -0
  56. package/coach/tests/test_hook_bespoke_dispatch.py +288 -0
  57. package/coach/tests/test_hook_module_resolution.py +116 -0
  58. package/coach/tests/test_hook_relevance.py +996 -0
  59. package/coach/tests/test_hook_render_env.py +364 -0
  60. package/coach/tests/test_hook_session_id_guard.py +160 -0
  61. package/coach/tests/test_insights_llm.py +759 -0
  62. package/coach/tests/test_insights_llm_venv_path.py +109 -0
  63. package/coach/tests/test_insights_window.py +237 -0
  64. package/coach/tests/test_install.py +1150 -0
  65. package/coach/tests/test_install_pyyaml_fallback.py +142 -0
  66. package/coach/tests/test_marker_consumption.py +167 -0
  67. package/coach/tests/test_marker_writer_locking.py +305 -0
  68. package/coach/tests/test_merge.py +413 -0
  69. package/coach/tests/test_no_broken_mktemp.py +90 -0
  70. package/coach/tests/test_render_env.py +137 -0
  71. package/coach/tests/test_render_env_glyphs.py +119 -0
  72. package/coach/tests/test_reward_hints.py +59 -0
  73. package/coach/tests/test_scoring.py +147 -0
  74. package/coach/tests/test_session_start_weekly_trigger.py +92 -0
  75. package/coach/tests/test_skill_inventory.py +368 -0
  76. package/coach/tests/test_stats_hybrid.py +142 -0
  77. package/coach/tests/test_status_accounting.py +41 -0
  78. package/coach/tests/test_statusline_failsafe.py +70 -0
  79. package/coach/tests/test_statusline_self_patch.py +261 -0
  80. package/coach/tests/test_statusline_variants.py +110 -0
  81. package/coach/tests/test_statusline_wrap.py +196 -0
  82. package/coach/tests/test_statusline_wrap_action.py +408 -0
  83. package/coach/tests/test_switch_to_plugin.py +360 -0
  84. package/coach/tests/test_themes.py +104 -0
  85. package/coach/tests/test_user_config.py +160 -0
  86. package/coach/tests/test_wrap_announce_hook.py +130 -0
  87. package/coach/tests/test_xp_accounting.py +55 -0
  88. package/hooks/coach-session-start.py +536 -0
  89. package/hooks/coach-user-prompt.py +2288 -0
  90. package/install-launchd.sh +102 -0
  91. package/install.sh +597 -0
  92. package/launchd/com.local.claude-coach.plist.template +34 -0
  93. package/launchd/run-insights.sh +20 -0
  94. package/npm/coach-claw.js +259 -0
  95. package/package.json +52 -0
  96. package/requirements.txt +11 -0
  97. package/settings-snippet.json +31 -0
  98. package/skills/coach/SKILL.md +107 -0
  99. package/skills/coach-insights/SKILL.md +78 -0
  100. package/skills/config/SKILL.md +149 -0
@@ -0,0 +1,759 @@
1
+ """Integration tests for coach/bin/insights-llm.sh.
2
+
3
+ Exercise the actual bash wrapper via subprocess (not just the Python helpers
4
+ it composes) so the shell→Python boundary is covered. Uses
5
+ COACH_INSIGHTS_LLM_SKIP_REFRESH=1 to bypass the `claude -p /insights`
6
+ subprocess and operate on a fixture facets directory.
7
+ """
8
+ from __future__ import annotations
9
+
10
+ import fcntl
11
+ import json
12
+ import os
13
+ import shutil
14
+ import subprocess
15
+ import time
16
+ from pathlib import Path
17
+
18
+ import pytest
19
+ import yaml
20
+
21
+ SCRIPT = Path(__file__).resolve().parent.parent / "bin" / "insights-llm.sh"
22
+ BIN_DIR = SCRIPT.parent
23
+
24
+
25
+ def _seed_coach_dir(tmp_path: Path) -> Path:
26
+ coach_dir = tmp_path / "coach"
27
+ coach_dir.mkdir()
28
+ profile = {
29
+ "schema_version": 1,
30
+ "updated": None,
31
+ "entries": [],
32
+ "recent_runs": [],
33
+ }
34
+ (coach_dir / "profile.yaml").write_text(yaml.safe_dump(profile))
35
+ (coach_dir / "changelog.md").touch()
36
+ subprocess.run(["git", "init", "-q"], cwd=coach_dir, check=True)
37
+ subprocess.run(
38
+ ["git", "-c", "user.email=t@t", "-c", "user.name=t",
39
+ "commit", "--allow-empty", "-q", "-m", "init"],
40
+ cwd=coach_dir,
41
+ check=True,
42
+ )
43
+ return coach_dir
44
+
45
+
46
+ def _seed_facets(tmp_path: Path, n: int = 5) -> Path:
47
+ facets = tmp_path / "facets"
48
+ facets.mkdir()
49
+ for i in range(n):
50
+ (facets / f"s{i}.json").write_text(
51
+ json.dumps({
52
+ "session_id": f"s{i}",
53
+ "friction_counts": {"misunderstood_request": 1, "wrong_approach": 1},
54
+ "friction_detail": f"session {i} mislabeled the work and went the wrong direction",
55
+ "primary_success": "good_debugging",
56
+ "brief_summary": f"session {i} drove a bug to root cause",
57
+ })
58
+ )
59
+ return facets
60
+
61
+
62
+ def _run(coach_dir: Path, facets: Path, *args: str) -> subprocess.CompletedProcess:
63
+ env = {
64
+ **os.environ,
65
+ "COACH_DIR_OVERRIDE": str(coach_dir),
66
+ "COACH_FACETS_DIR": str(facets),
67
+ "COACH_INSIGHTS_LLM_SKIP_REFRESH": "1",
68
+ # Strip any GIT_* env that might come in from the test runner so the
69
+ # commit step inside the wrapper uses our throwaway coach_dir's git.
70
+ "GIT_DIR": "",
71
+ "GIT_WORK_TREE": "",
72
+ }
73
+ return subprocess.run(
74
+ ["bash", str(SCRIPT), *args],
75
+ env=env,
76
+ capture_output=True,
77
+ text=True,
78
+ )
79
+
80
+
81
+ def test_run_id_prefix_distinguishes_weekly(tmp_path: Path) -> None:
82
+ coach_dir = _seed_coach_dir(tmp_path)
83
+ facets = _seed_facets(tmp_path)
84
+ result = _run(coach_dir, facets)
85
+ assert result.returncode == 0, result.stderr
86
+ assert "run_id=insights-weekly-" in result.stdout
87
+ # Profile entries get a source_runs entry with the weekly prefix.
88
+ profile = yaml.safe_load((coach_dir / "profile.yaml").read_text())
89
+ entries = profile.get("entries") or []
90
+ assert entries, "expected merge to land at least one entry"
91
+ for e in entries:
92
+ for run in (e.get("source_runs") or []):
93
+ assert run.startswith("insights-weekly-"), run
94
+
95
+
96
+ def test_throttle_marker_set_on_success(tmp_path: Path) -> None:
97
+ coach_dir = _seed_coach_dir(tmp_path)
98
+ facets = _seed_facets(tmp_path)
99
+ marker = coach_dir / ".last_weekly_insights"
100
+ assert not marker.exists()
101
+ result = _run(coach_dir, facets)
102
+ assert result.returncode == 0, result.stderr
103
+ assert marker.exists(), "throttle marker was not touched"
104
+
105
+
106
+ def test_throttle_skips_recent_run(tmp_path: Path) -> None:
107
+ coach_dir = _seed_coach_dir(tmp_path)
108
+ facets = _seed_facets(tmp_path)
109
+ # First run lands the marker.
110
+ r1 = _run(coach_dir, facets)
111
+ assert r1.returncode == 0
112
+ # Second run immediately after should skip.
113
+ r2 = _run(coach_dir, facets)
114
+ assert r2.returncode == 0
115
+ assert "skipped" in r2.stdout.lower()
116
+ # And should NOT have run merge again — recent_runs should be length 1.
117
+ profile = yaml.safe_load((coach_dir / "profile.yaml").read_text())
118
+ assert len(profile.get("recent_runs") or []) == 1
119
+
120
+
121
+ def test_force_overrides_cooldown(tmp_path: Path) -> None:
122
+ coach_dir = _seed_coach_dir(tmp_path)
123
+ facets = _seed_facets(tmp_path)
124
+ r1 = _run(coach_dir, facets)
125
+ assert r1.returncode == 0
126
+ r2 = _run(coach_dir, facets, "--force")
127
+ assert r2.returncode == 0
128
+ assert "skipped" not in r2.stdout.lower()
129
+ profile = yaml.safe_load((coach_dir / "profile.yaml").read_text())
130
+ # Two successful merges → two entries in recent_runs.
131
+ assert len(profile.get("recent_runs") or []) == 2
132
+
133
+
134
+ def test_dry_run_skips_merge(tmp_path: Path) -> None:
135
+ coach_dir = _seed_coach_dir(tmp_path)
136
+ facets = _seed_facets(tmp_path)
137
+ marker = coach_dir / ".last_weekly_insights"
138
+ result = _run(coach_dir, facets, "--dry-run")
139
+ assert result.returncode == 0, result.stderr
140
+ # Profile untouched.
141
+ profile = yaml.safe_load((coach_dir / "profile.yaml").read_text())
142
+ assert profile.get("entries") == []
143
+ assert profile.get("recent_runs") == []
144
+ # Marker untouched.
145
+ assert not marker.exists()
146
+ # Detections JSON printed.
147
+ assert "(dry-run; merge skipped" in result.stdout
148
+ # The aggregator's JSON list embedded in stdout.
149
+ assert "misunderstood-request" in result.stdout
150
+
151
+
152
+ def test_invalid_facets_dir_bails_on_no_evidence(tmp_path: Path) -> None:
153
+ """A nonexistent facets dir is "no current-window evidence" — wrapper
154
+ must bail with exit 7 (v0.5.1 evidence gate). Pre-v0.5.1 this used
155
+ to merge `[]` as a clean evidence pass; that was the bug class
156
+ closed by the n_sessions==0 gate."""
157
+ coach_dir = _seed_coach_dir(tmp_path)
158
+ nonexistent = tmp_path / "no-facets-here"
159
+ result = _run(coach_dir, nonexistent)
160
+ assert result.returncode == 7, (
161
+ f"expected exit 7 (no evidence), got {result.returncode}\n"
162
+ f"stderr={result.stderr}"
163
+ )
164
+ assert "no current-window evidence" in result.stderr
165
+ # Profile MUST NOT have advanced — no merge ran.
166
+ profile = yaml.safe_load((coach_dir / "profile.yaml").read_text())
167
+ assert profile.get("entries") == []
168
+ assert profile.get("recent_runs") in (None, [])
169
+ assert not (coach_dir / ".last_weekly_insights").exists()
170
+
171
+
172
+ def test_below_threshold_emits_zero_detections(tmp_path: Path) -> None:
173
+ """Sparse facets — friction in 1/10 sessions — emits zero detections,
174
+ wrapper still exits 0 and merges (an empty pass is a meaningful signal)."""
175
+ coach_dir = _seed_coach_dir(tmp_path)
176
+ facets = tmp_path / "facets"
177
+ facets.mkdir()
178
+ (facets / "s0.json").write_text(json.dumps({
179
+ "session_id": "s0",
180
+ "friction_counts": {"misunderstood_request": 1},
181
+ }))
182
+ for i in range(1, 10):
183
+ (facets / f"s{i}.json").write_text(json.dumps({"session_id": f"s{i}"}))
184
+
185
+ result = _run(coach_dir, facets)
186
+ assert result.returncode == 0
187
+ assert "detections=0" in result.stdout
188
+ profile = yaml.safe_load((coach_dir / "profile.yaml").read_text())
189
+ assert profile.get("entries") == []
190
+
191
+
192
+ def _build_isolated_bin(
193
+ tmp_path: Path,
194
+ *,
195
+ agg_body: str,
196
+ claude_body: str | None = None,
197
+ ) -> Path:
198
+ """Build a `bin/` containing the real wrapper + lock helper + merge
199
+ sidecars, but a *test-controlled* aggregate_facets.py.
200
+
201
+ Used by the failure-mode tests below to exercise the wrapper's
202
+ error-handling around aggregator behavior without monkeying with
203
+ the real bundle.
204
+
205
+ If ``claude_body`` is provided, also writes an executable `claude`
206
+ shim into the same dir; pair with `_sandbox_path_dir(...,
207
+ extra_dir=fake_bin)` so the wrapper picks up the shim instead of
208
+ the host's real `claude` CLI.
209
+ """
210
+ fake_bin = tmp_path / "bin"
211
+ fake_bin.mkdir()
212
+ for name in (
213
+ "insights-llm.sh",
214
+ "run_with_lock.py",
215
+ "merge.py",
216
+ "marker_io.py",
217
+ "reward_hints.py",
218
+ "xp_accounting.py",
219
+ ):
220
+ (fake_bin / name).write_text((BIN_DIR / name).read_text())
221
+ for ext in ("sh", "py"):
222
+ for p in fake_bin.glob(f"*.{ext}"):
223
+ p.chmod(0o755)
224
+ (fake_bin / "aggregate_facets.py").write_text(agg_body)
225
+ (fake_bin / "aggregate_facets.py").chmod(0o755)
226
+ if claude_body is not None:
227
+ claude_shim = fake_bin / "claude"
228
+ claude_shim.write_text(claude_body)
229
+ claude_shim.chmod(0o755)
230
+ return fake_bin
231
+
232
+
233
+ def _sandbox_path_dir(tmp_path: Path, *, extra_dir: Path | None = None) -> str:
234
+ """Construct a sandbox PATH that contains the system coreutils
235
+ (/usr/bin:/bin: dirname, mkdir, mktemp, date, git, touch, kill,
236
+ sleep, …) plus pinned python3 and bash symlinks, but DOES NOT
237
+ expose `claude` (which lives elsewhere — e.g. ~/.nvm or Homebrew).
238
+
239
+ The wrapper resolves python3 at insights-llm.sh:57 BEFORE checking
240
+ `claude`, then re-execs through `bash` at line 81 and shells out to
241
+ `dirname`, `mktemp`, `date`, `git`, etc. throughout. A naked
242
+ stripped PATH (e.g. PATH="") would fail at python3 resolution with
243
+ exit 2; a PATH with only python3+bash would fail at the next
244
+ `dirname` call. /usr/bin:/bin is the POSIX-standard base where
245
+ `claude` is *not* installed (it's typically in nvm or
246
+ /usr/local/bin), so it's a safe foundation that excludes claude
247
+ by construction.
248
+
249
+ Use ``extra_dir`` to layer in a `fake_bin` that contains a `claude`
250
+ shim (or omit it for the missing-claude case).
251
+ """
252
+ sandbox = tmp_path / "sandbox-bin"
253
+ sandbox.mkdir()
254
+ real_python3 = shutil.which("python3")
255
+ real_bash = shutil.which("bash")
256
+ assert real_python3, "host has no python3 — cannot build sandbox PATH"
257
+ assert real_bash, "host has no bash — cannot build sandbox PATH"
258
+ os.symlink(real_python3, sandbox / "python3")
259
+ os.symlink(real_bash, sandbox / "bash")
260
+ parts = [str(sandbox), "/usr/bin", "/bin"]
261
+ if extra_dir is not None:
262
+ parts.insert(0, str(extra_dir))
263
+ else:
264
+ # Guard against a regression where someone exposes the host PATH
265
+ # by accident — the missing-claude test would silently pass.
266
+ assert not shutil.which(
267
+ "claude", path=":".join(parts)
268
+ ), f"sandbox PATH unexpectedly resolves `claude`: {parts}"
269
+ return ":".join(parts)
270
+
271
+
272
+ def _run_with_path(
273
+ *,
274
+ coach_dir: Path,
275
+ facets: Path,
276
+ fake_bin: Path,
277
+ path: str,
278
+ extra_env: dict | None = None,
279
+ args: tuple = ("--force",),
280
+ timeout: int = 30,
281
+ ) -> subprocess.CompletedProcess:
282
+ """Invoke the wrapper from ``fake_bin`` with a custom PATH and no
283
+ ``COACH_INSIGHTS_LLM_SKIP_REFRESH`` so the real LLM-step branch
284
+ runs. Used by the LLM-fail-hard tests."""
285
+ env = {
286
+ # Start clean — do NOT inherit the parent PATH, since that
287
+ # would re-introduce the host's real `claude` binary.
288
+ "HOME": os.environ.get("HOME", str(coach_dir.parent)),
289
+ "PATH": path,
290
+ "COACH_DIR_OVERRIDE": str(coach_dir),
291
+ "COACH_FACETS_DIR": str(facets),
292
+ "GIT_DIR": "",
293
+ "GIT_WORK_TREE": "",
294
+ }
295
+ if extra_env:
296
+ env.update(extra_env)
297
+ return subprocess.run(
298
+ ["bash", str(fake_bin / "insights-llm.sh"), *args],
299
+ env=env,
300
+ capture_output=True,
301
+ text=True,
302
+ timeout=timeout,
303
+ )
304
+
305
+
306
+ def test_aggregator_failure_bails_before_merge(tmp_path: Path) -> None:
307
+ """If aggregate_facets.py exits nonzero, the wrapper MUST:
308
+ - exit nonzero itself (not silently treat empty $DET as `[]`)
309
+ - not run merge.py (profile + changelog unchanged)
310
+ - not touch the throttle marker (so the next session can retry)
311
+
312
+ Guards the shell→Python boundary documented in
313
+ feedback_test_gap_shell_helper_boundary.md and the P1 caught in
314
+ teammate review: the original `... > $DET` redirect lost the
315
+ aggregator's nonzero exit code and the inline `try: print(len(...))
316
+ except: print(0)` heredoc swallowed JSON parse errors, so a busted
317
+ aggregator was committed as a clean evidence pass and consumed
318
+ the weekly cadence.
319
+ """
320
+ coach_dir = _seed_coach_dir(tmp_path)
321
+ facets = _seed_facets(tmp_path)
322
+ fake_bin = _build_isolated_bin(
323
+ tmp_path, agg_body="#!/usr/bin/env python3\nimport sys\nsys.exit(7)\n"
324
+ )
325
+
326
+ pre_profile = (coach_dir / "profile.yaml").read_text()
327
+ pre_changelog_size = (coach_dir / "changelog.md").stat().st_size
328
+ marker = coach_dir / ".last_weekly_insights"
329
+ assert not marker.exists()
330
+
331
+ env = {
332
+ **os.environ,
333
+ "COACH_DIR_OVERRIDE": str(coach_dir),
334
+ "COACH_FACETS_DIR": str(facets),
335
+ "COACH_INSIGHTS_LLM_SKIP_REFRESH": "1",
336
+ "GIT_DIR": "",
337
+ "GIT_WORK_TREE": "",
338
+ }
339
+ result = subprocess.run(
340
+ ["bash", str(fake_bin / "insights-llm.sh"), "--force"],
341
+ env=env,
342
+ capture_output=True,
343
+ text=True,
344
+ )
345
+
346
+ assert result.returncode != 0, (
347
+ f"wrapper exited 0 despite aggregator failure:\n"
348
+ f"stdout={result.stdout}\nstderr={result.stderr}"
349
+ )
350
+ assert "bailing before merge" in result.stderr, result.stderr
351
+ # Throttle marker MUST NOT exist — the next session start should
352
+ # retry rather than wait 7 more days on a failed run.
353
+ assert not marker.exists(), "throttle marker was touched despite aggregator failure"
354
+ # Profile + changelog UNCHANGED.
355
+ assert (coach_dir / "profile.yaml").read_text() == pre_profile, (
356
+ "profile.yaml was mutated despite aggregator failure"
357
+ )
358
+ assert (coach_dir / "changelog.md").stat().st_size == pre_changelog_size, (
359
+ "changelog.md was appended to despite aggregator failure"
360
+ )
361
+
362
+
363
+ def test_aggregator_garbled_output_bails_before_merge(tmp_path: Path) -> None:
364
+ """An aggregator that exits 0 but emits unparseable JSON must also
365
+ bail before merge — merging an unreadable detections file as `[]`
366
+ is the same failure mode as a nonzero aggregator exit, just one
367
+ layer down."""
368
+ coach_dir = _seed_coach_dir(tmp_path)
369
+ facets = _seed_facets(tmp_path)
370
+ fake_bin = _build_isolated_bin(
371
+ tmp_path,
372
+ agg_body=(
373
+ "#!/usr/bin/env python3\n"
374
+ "import sys\n"
375
+ "sys.stdout.write('not valid json {{{')\n"
376
+ "sys.exit(0)\n"
377
+ ),
378
+ )
379
+
380
+ pre_profile = (coach_dir / "profile.yaml").read_text()
381
+ marker = coach_dir / ".last_weekly_insights"
382
+
383
+ env = {
384
+ **os.environ,
385
+ "COACH_DIR_OVERRIDE": str(coach_dir),
386
+ "COACH_FACETS_DIR": str(facets),
387
+ "COACH_INSIGHTS_LLM_SKIP_REFRESH": "1",
388
+ "GIT_DIR": "",
389
+ "GIT_WORK_TREE": "",
390
+ }
391
+ result = subprocess.run(
392
+ ["bash", str(fake_bin / "insights-llm.sh"), "--force"],
393
+ env=env,
394
+ capture_output=True,
395
+ text=True,
396
+ )
397
+
398
+ assert result.returncode != 0
399
+ assert "unparseable" in result.stderr or "bailing" in result.stderr
400
+ assert not marker.exists()
401
+ assert (coach_dir / "profile.yaml").read_text() == pre_profile
402
+
403
+
404
+ def test_concurrent_run_skips_when_lock_held(tmp_path: Path) -> None:
405
+ """If another process already holds .weekly_insights.lock, the
406
+ wrapper must exit 10 (skipped) without running the LLM call,
407
+ aggregator, or merge. Guards the P1 race where two SessionStart
408
+ hooks fire within the slow `claude -p /insights` window —
409
+ without this serialization, both wrappers would run the LLM call,
410
+ both aggregate, both merge, prematurely advancing
411
+ debounce/graduation streaks."""
412
+ coach_dir = _seed_coach_dir(tmp_path)
413
+ facets = _seed_facets(tmp_path)
414
+
415
+ lock_path = coach_dir / ".weekly_insights.lock"
416
+ lock_path.touch()
417
+ fd = os.open(str(lock_path), os.O_RDWR)
418
+ fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
419
+
420
+ try:
421
+ result = _run(coach_dir, facets, "--force")
422
+ finally:
423
+ try:
424
+ fcntl.flock(fd, fcntl.LOCK_UN)
425
+ finally:
426
+ os.close(fd)
427
+
428
+ assert result.returncode == 10, (
429
+ f"expected exit 10 (lock contention skip), got {result.returncode}\n"
430
+ f"stdout={result.stdout}\nstderr={result.stderr}"
431
+ )
432
+ assert "concurrent" in result.stdout.lower()
433
+ # No merge ran — recent_runs untouched, no marker.
434
+ profile = yaml.safe_load((coach_dir / "profile.yaml").read_text())
435
+ assert profile.get("recent_runs") in (None, [])
436
+ assert profile.get("entries") in (None, [])
437
+ assert not (coach_dir / ".last_weekly_insights").exists()
438
+
439
+
440
+ def test_concurrent_wrappers_only_one_merges(tmp_path: Path) -> None:
441
+ """End-to-end concurrent run: launch two wrappers in parallel
442
+ against a fixture with a slow (2s) aggregator. Exactly one must
443
+ win the lock and merge; the other must exit 10. Profile gets
444
+ one entry, recent_runs gets one append."""
445
+ coach_dir = _seed_coach_dir(tmp_path)
446
+ facets = _seed_facets(tmp_path)
447
+ slow_agg = (
448
+ "#!/usr/bin/env python3\n"
449
+ "import json, sys, time\n"
450
+ "sys.stdout.write(json.dumps([{\n"
451
+ " 'id': 'misunderstood-request',\n"
452
+ " 'name': 'misunderstood request',\n"
453
+ " 'direction': 'negative',\n"
454
+ " 'nudge': 'test',\n"
455
+ " 'examples': [],\n"
456
+ " 'priority': 2,\n"
457
+ "}]))\n"
458
+ "sys.stdout.flush()\n"
459
+ "time.sleep(1.5)\n"
460
+ )
461
+ fake_bin = _build_isolated_bin(tmp_path, agg_body=slow_agg)
462
+
463
+ env = {
464
+ **os.environ,
465
+ "COACH_DIR_OVERRIDE": str(coach_dir),
466
+ "COACH_FACETS_DIR": str(facets),
467
+ "COACH_INSIGHTS_LLM_SKIP_REFRESH": "1",
468
+ "GIT_DIR": "",
469
+ "GIT_WORK_TREE": "",
470
+ }
471
+ p_a = subprocess.Popen(
472
+ ["bash", str(fake_bin / "insights-llm.sh"), "--force"],
473
+ env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True,
474
+ )
475
+ time.sleep(0.2)
476
+ p_b = subprocess.Popen(
477
+ ["bash", str(fake_bin / "insights-llm.sh"), "--force"],
478
+ env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True,
479
+ )
480
+ out_a, err_a = p_a.communicate(timeout=15)
481
+ out_b, err_b = p_b.communicate(timeout=15)
482
+
483
+ rcs = sorted([p_a.returncode, p_b.returncode])
484
+ assert rcs == [0, 10], (
485
+ f"expected one winner (rc=0) + one skipper (rc=10), got {rcs}\n"
486
+ f"A: rc={p_a.returncode} stdout={out_a!r} stderr={err_a!r}\n"
487
+ f"B: rc={p_b.returncode} stdout={out_b!r} stderr={err_b!r}"
488
+ )
489
+ profile = yaml.safe_load((coach_dir / "profile.yaml").read_text())
490
+ assert len(profile.get("recent_runs") or []) == 1, (
491
+ f"expected exactly one recent_run after concurrent race, got {profile.get('recent_runs')}"
492
+ )
493
+ assert len(profile.get("entries") or []) == 1
494
+
495
+
496
+ # --- LLM-step fail-hard regression suite ----------------------------------
497
+ #
498
+ # Mirrors test_aggregator_failure_bails_before_merge. The bug class is the
499
+ # same — a refresh step that fails silently lets merge.py treat
500
+ # stale-or-empty facets as a clean evidence pass, advancing absence-based
501
+ # streaks on phantom data. The only difference is which step fails: these
502
+ # three cases cover the LLM refresh (insights-llm.sh:133–164) instead of
503
+ # the aggregator (insights-llm.sh:175–199).
504
+
505
+
506
+ def _aggregator_should_not_run_body() -> str:
507
+ """Aggregator body that fails the test loudly if invoked.
508
+
509
+ Used by the LLM-fail-hard tests below: when the wrapper exits 6
510
+ *before* the aggregator stage (the desired behavior), this body is
511
+ never executed. If the wrapper regresses to fail-soft and falls
512
+ through, the aggregator will run and the test will catch it via a
513
+ distinctive sentinel string in stderr.
514
+ """
515
+ return (
516
+ "#!/usr/bin/env python3\n"
517
+ "import sys\n"
518
+ "sys.stderr.write('AGG_RAN_BUT_SHOULD_NOT_HAVE\\n')\n"
519
+ "sys.stdout.write('[]\\n')\n"
520
+ "sys.exit(0)\n"
521
+ )
522
+
523
+
524
+ def test_missing_claude_bails_before_merge(tmp_path: Path) -> None:
525
+ """When `claude` is absent from PATH the wrapper MUST exit 6 before
526
+ aggregating, merging, or touching the throttle marker. Reproduces
527
+ the v0.5.1 P1 #1a teammate finding: a 4/5 weakness graduated with
528
+ +5 XP under fail-soft + missing claude + empty facets."""
529
+ coach_dir = _seed_coach_dir(tmp_path)
530
+ facets = _seed_facets(tmp_path)
531
+ fake_bin = _build_isolated_bin(
532
+ tmp_path, agg_body=_aggregator_should_not_run_body()
533
+ )
534
+
535
+ pre_profile = (coach_dir / "profile.yaml").read_text()
536
+ marker = coach_dir / ".last_weekly_insights"
537
+ assert not marker.exists()
538
+
539
+ # Sandbox PATH has python3 + bash but NO claude.
540
+ path = _sandbox_path_dir(tmp_path)
541
+
542
+ result = _run_with_path(
543
+ coach_dir=coach_dir, facets=facets, fake_bin=fake_bin, path=path
544
+ )
545
+
546
+ assert result.returncode == 6, (
547
+ f"expected exit 6 (LLM refresh failed), got {result.returncode}\n"
548
+ f"stdout={result.stdout}\nstderr={result.stderr}"
549
+ )
550
+ assert "claude CLI not on PATH" in result.stderr
551
+ assert "bailing before merge" in result.stderr
552
+ assert "AGG_RAN_BUT_SHOULD_NOT_HAVE" not in result.stderr, (
553
+ "wrapper fell through to aggregator despite missing claude"
554
+ )
555
+ assert not marker.exists(), "throttle marker was touched despite missing claude"
556
+ assert (coach_dir / "profile.yaml").read_text() == pre_profile
557
+
558
+
559
+ def test_claude_nonzero_exit_bails_before_merge(tmp_path: Path) -> None:
560
+ """When `claude -p /insights` exits nonzero (e.g. plan does not
561
+ grant access, transient API failure), the wrapper MUST exit 6
562
+ before merge/marker — same reasoning as missing-claude."""
563
+ coach_dir = _seed_coach_dir(tmp_path)
564
+ facets = _seed_facets(tmp_path)
565
+ fake_bin = _build_isolated_bin(
566
+ tmp_path,
567
+ agg_body=_aggregator_should_not_run_body(),
568
+ claude_body="#!/bin/sh\nexit 1\n",
569
+ )
570
+
571
+ pre_profile = (coach_dir / "profile.yaml").read_text()
572
+ marker = coach_dir / ".last_weekly_insights"
573
+
574
+ path = _sandbox_path_dir(tmp_path, extra_dir=fake_bin)
575
+
576
+ result = _run_with_path(
577
+ coach_dir=coach_dir, facets=facets, fake_bin=fake_bin, path=path
578
+ )
579
+
580
+ assert result.returncode == 6, (
581
+ f"expected exit 6, got {result.returncode}\n"
582
+ f"stdout={result.stdout}\nstderr={result.stderr}"
583
+ )
584
+ assert "exited rc=" in result.stderr
585
+ assert "bailing before merge" in result.stderr
586
+ assert "AGG_RAN_BUT_SHOULD_NOT_HAVE" not in result.stderr
587
+ assert not marker.exists()
588
+ assert (coach_dir / "profile.yaml").read_text() == pre_profile
589
+
590
+
591
+ def test_no_evidence_bails_before_merge(tmp_path: Path) -> None:
592
+ """When the aggregator finds n_sessions == 0 in the window it exits
593
+ EXIT_NO_EVIDENCE=3; the wrapper MUST translate to its own exit 7 and
594
+ bail before merge/marker. Reproduces v0.5.1 P1 #1b: a successful
595
+ `claude -p` that writes zero current-window facets used to merge
596
+ `detections=[]` as a clean evidence pass.
597
+
598
+ Skips the LLM step (COACH_INSIGHTS_LLM_SKIP_REFRESH=1) so the
599
+ aggregator runs against the seeded empty facets dir directly."""
600
+ coach_dir = _seed_coach_dir(tmp_path)
601
+ empty_facets = tmp_path / "empty-facets"
602
+ empty_facets.mkdir()
603
+ pre_profile = (coach_dir / "profile.yaml").read_text()
604
+ pre_changelog_size = (coach_dir / "changelog.md").stat().st_size
605
+ marker = coach_dir / ".last_weekly_insights"
606
+ assert not marker.exists()
607
+
608
+ result = _run(coach_dir, empty_facets, "--force")
609
+
610
+ assert result.returncode == 7, (
611
+ f"expected exit 7 (no evidence), got {result.returncode}\n"
612
+ f"stdout={result.stdout}\nstderr={result.stderr}"
613
+ )
614
+ assert "no current-window evidence" in result.stderr
615
+ assert "bailing before merge" in result.stderr
616
+ assert not marker.exists(), "throttle marker was touched despite no evidence"
617
+ assert (coach_dir / "profile.yaml").read_text() == pre_profile
618
+ assert (coach_dir / "changelog.md").stat().st_size == pre_changelog_size
619
+
620
+
621
+ def test_claude_timeout_bails_before_merge(tmp_path: Path) -> None:
622
+ """When `claude -p /insights` exceeds COACH_INSIGHTS_LLM_TIMEOUT,
623
+ the wrapper kills the subprocess and MUST exit 6 — not fall through
624
+ to aggregation. NOTE: timeout is set via the env var, not a CLI
625
+ flag (the wrapper exits 2 with 'unknown arg' if you pass --timeout)."""
626
+ coach_dir = _seed_coach_dir(tmp_path)
627
+ facets = _seed_facets(tmp_path)
628
+ # Sleep well past the test's timeout. The wrapper polls every 2s
629
+ # so a 4s timeout means the kill fires at the 4s tick.
630
+ fake_bin = _build_isolated_bin(
631
+ tmp_path,
632
+ agg_body=_aggregator_should_not_run_body(),
633
+ claude_body="#!/bin/sh\nsleep 60\n",
634
+ )
635
+
636
+ pre_profile = (coach_dir / "profile.yaml").read_text()
637
+ marker = coach_dir / ".last_weekly_insights"
638
+
639
+ path = _sandbox_path_dir(tmp_path, extra_dir=fake_bin)
640
+
641
+ result = _run_with_path(
642
+ coach_dir=coach_dir,
643
+ facets=facets,
644
+ fake_bin=fake_bin,
645
+ path=path,
646
+ extra_env={"COACH_INSIGHTS_LLM_TIMEOUT": "4"},
647
+ timeout=30,
648
+ )
649
+
650
+ assert result.returncode == 6, (
651
+ f"expected exit 6 (timeout), got {result.returncode}\n"
652
+ f"stdout={result.stdout}\nstderr={result.stderr}"
653
+ )
654
+ assert "timed out" in result.stderr
655
+ assert "bailing before merge" in result.stderr
656
+ assert "AGG_RAN_BUT_SHOULD_NOT_HAVE" not in result.stderr
657
+ assert not marker.exists()
658
+ assert (coach_dir / "profile.yaml").read_text() == pre_profile
659
+
660
+
661
+ # --- merge.py marker path isolation (v0.5.1 P1 #2) ------------------------
662
+ # Pre-v0.5.1, merge.py hardcoded marker paths under
663
+ # Path.home() / ".claude/coach/", so a sandboxed run with
664
+ # COACH_DIR_OVERRIDE leaked .pending_* markers into the live install.
665
+ # Fix: main() reassigns the module globals to args.profile.parent
666
+ # before calling merge(). This test exercises the CLI path end-to-end
667
+ # and verifies live-install markers are byte-identical pre/post.
668
+
669
+ import hashlib # noqa: E402
670
+
671
+ MERGE_PY = BIN_DIR / "merge.py"
672
+ LIVE_COACH_DIR = Path.home() / ".claude" / "coach"
673
+ LIVE_MARKERS = (
674
+ LIVE_COACH_DIR / ".pending_graduation",
675
+ LIVE_COACH_DIR / ".pending_streak_rewards",
676
+ LIVE_COACH_DIR / ".pending_regression",
677
+ )
678
+
679
+
680
+ def _snapshot_marker(p: Path) -> tuple:
681
+ """Return (mtime_ns, sha256) for a marker file, or (None, None) if
682
+ it doesn't exist. mtime_ns + content hash is strictly stronger than
683
+ `exists()`: a write that produces identical bytes at the same-second
684
+ mtime would still bump mtime_ns, and any byte change is hashed."""
685
+ if not p.exists():
686
+ return (None, None)
687
+ st = p.stat()
688
+ return (st.st_mtime_ns, hashlib.sha256(p.read_bytes()).hexdigest())
689
+
690
+
691
+ def test_merge_writes_markers_under_profile_parent(tmp_path: Path) -> None:
692
+ """merge.py CLI with --profile <tmp>/profile.yaml MUST write the
693
+ three .pending_* markers under <tmp>/ — never to the live install
694
+ under ~/.claude/coach/. Snapshots live-install markers (mtime_ns +
695
+ sha256) pre/post and asserts byte-identical preservation."""
696
+ pre_live = {p: _snapshot_marker(p) for p in LIVE_MARKERS}
697
+
698
+ coach_dir = tmp_path / "coach"
699
+ coach_dir.mkdir()
700
+
701
+ # Seed a profile with one entry at clean_streak_runs=4. The next
702
+ # empty-detections merge ticks it to 5 → graduation → marker write.
703
+ profile_yaml = coach_dir / "profile.yaml"
704
+ profile_yaml.write_text(yaml.safe_dump({
705
+ "schema_version": 1,
706
+ "updated": None,
707
+ "entries": [{
708
+ "id": "test-weakness", "name": "test weakness",
709
+ "tier": "active", "direction": "negative",
710
+ "confidence": 0.8, "priority": 3,
711
+ "nudge": "stop doing that", "examples": [],
712
+ "first_seen": "2026-03-01",
713
+ "last_seen_in_run": "2026-04-01",
714
+ "clean_streak_runs": 4, "positive_run_streak": 0,
715
+ "source_runs": ["old"], "total_occurrences": 10,
716
+ }],
717
+ "recent_runs": ["r-a", "r-b", "r-c"],
718
+ }))
719
+ (coach_dir / "changelog.md").touch()
720
+ detections_json = tmp_path / "detections.json"
721
+ detections_json.write_text("[]")
722
+
723
+ result = subprocess.run(
724
+ [
725
+ "python3", str(MERGE_PY),
726
+ "--profile", str(profile_yaml),
727
+ "--changelog", str(coach_dir / "changelog.md"),
728
+ "--lock", str(coach_dir / ".lock"),
729
+ "--detections", str(detections_json),
730
+ "--run-id", "insights-weekly-marker-isolation-test",
731
+ ],
732
+ capture_output=True,
733
+ text=True,
734
+ )
735
+ assert result.returncode == 0, (
736
+ f"merge.py failed: rc={result.returncode}\n"
737
+ f"stdout={result.stdout}\nstderr={result.stderr}"
738
+ )
739
+
740
+ # Markers MUST land under the tmp coach dir (the entry graduated,
741
+ # so .pending_graduation must exist; others may be empty/absent).
742
+ tmp_grad = coach_dir / ".pending_graduation"
743
+ assert tmp_grad.exists(), (
744
+ f"expected .pending_graduation under tmp coach dir; "
745
+ f"contents={list(coach_dir.iterdir())}"
746
+ )
747
+ grad_payload = json.loads(tmp_grad.read_text())
748
+ grad_ids = [g.get("id") for g in grad_payload.get("graduations", [])]
749
+ assert "test-weakness" in grad_ids, (
750
+ f"graduation marker missing test-weakness: {grad_payload}"
751
+ )
752
+
753
+ # Live-install markers MUST be byte-identical to the pre-snapshot.
754
+ post_live = {p: _snapshot_marker(p) for p in LIVE_MARKERS}
755
+ for p in LIVE_MARKERS:
756
+ assert pre_live[p] == post_live[p], (
757
+ f"live-install marker mutated by sandboxed merge: {p}\n"
758
+ f"pre={pre_live[p]} post={post_live[p]}"
759
+ )