@rm0nroe/coach-claw 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +311 -0
  3. package/coach/README.md +99 -0
  4. package/coach/bin/aggregate_facets.py +274 -0
  5. package/coach/bin/analyze.py +678 -0
  6. package/coach/bin/bank.py +247 -0
  7. package/coach/bin/banner_themes.py +645 -0
  8. package/coach/bin/coach_paths.py +33 -0
  9. package/coach/bin/coexistence_check.py +129 -0
  10. package/coach/bin/configure.py +245 -0
  11. package/coach/bin/cron_check.py +81 -0
  12. package/coach/bin/default_statusline.py +135 -0
  13. package/coach/bin/doctor.py +663 -0
  14. package/coach/bin/insights-llm.sh +264 -0
  15. package/coach/bin/insights.sh +163 -0
  16. package/coach/bin/insights_window.py +111 -0
  17. package/coach/bin/marker_io.py +154 -0
  18. package/coach/bin/merge.py +671 -0
  19. package/coach/bin/redact.py +86 -0
  20. package/coach/bin/render_env.py +148 -0
  21. package/coach/bin/reward_hints.py +87 -0
  22. package/coach/bin/run-insights.sh +20 -0
  23. package/coach/bin/run_with_lock.py +85 -0
  24. package/coach/bin/scoring.py +260 -0
  25. package/coach/bin/skill_inventory.py +215 -0
  26. package/coach/bin/stats.py +459 -0
  27. package/coach/bin/status.py +293 -0
  28. package/coach/bin/statusline_self_patch.py +205 -0
  29. package/coach/bin/statusline_variants.py +146 -0
  30. package/coach/bin/statusline_wrap.py +244 -0
  31. package/coach/bin/statusline_wrap_action.py +460 -0
  32. package/coach/bin/switch_to_plugin.py +256 -0
  33. package/coach/bin/themes.py +256 -0
  34. package/coach/bin/user_config.py +176 -0
  35. package/coach/bin/xp_accounting.py +98 -0
  36. package/coach/changelog.md +4 -0
  37. package/coach/default-statusline-command.sh +19 -0
  38. package/coach/default-statusline-wrap-command.sh +15 -0
  39. package/coach/profile.yaml +37 -0
  40. package/coach/tests/conftest.py +13 -0
  41. package/coach/tests/test_aggregate_facets.py +379 -0
  42. package/coach/tests/test_analyze_aggregate.py +153 -0
  43. package/coach/tests/test_analyze_redaction.py +105 -0
  44. package/coach/tests/test_analyze_strengths.py +165 -0
  45. package/coach/tests/test_bank_atomic_write.py +61 -0
  46. package/coach/tests/test_bank_concurrency.py +126 -0
  47. package/coach/tests/test_banner_themes.py +981 -0
  48. package/coach/tests/test_celebrate_dedup.py +409 -0
  49. package/coach/tests/test_coach_paths.py +50 -0
  50. package/coach/tests/test_coexistence_check.py +128 -0
  51. package/coach/tests/test_configure.py +258 -0
  52. package/coach/tests/test_cron_check.py +118 -0
  53. package/coach/tests/test_cron_nudge_hook.py +134 -0
  54. package/coach/tests/test_detection_parity.py +105 -0
  55. package/coach/tests/test_doctor.py +595 -0
  56. package/coach/tests/test_hook_bespoke_dispatch.py +288 -0
  57. package/coach/tests/test_hook_module_resolution.py +116 -0
  58. package/coach/tests/test_hook_relevance.py +996 -0
  59. package/coach/tests/test_hook_render_env.py +364 -0
  60. package/coach/tests/test_hook_session_id_guard.py +160 -0
  61. package/coach/tests/test_insights_llm.py +759 -0
  62. package/coach/tests/test_insights_llm_venv_path.py +109 -0
  63. package/coach/tests/test_insights_window.py +237 -0
  64. package/coach/tests/test_install.py +1150 -0
  65. package/coach/tests/test_install_pyyaml_fallback.py +142 -0
  66. package/coach/tests/test_marker_consumption.py +167 -0
  67. package/coach/tests/test_marker_writer_locking.py +305 -0
  68. package/coach/tests/test_merge.py +413 -0
  69. package/coach/tests/test_no_broken_mktemp.py +90 -0
  70. package/coach/tests/test_render_env.py +137 -0
  71. package/coach/tests/test_render_env_glyphs.py +119 -0
  72. package/coach/tests/test_reward_hints.py +59 -0
  73. package/coach/tests/test_scoring.py +147 -0
  74. package/coach/tests/test_session_start_weekly_trigger.py +92 -0
  75. package/coach/tests/test_skill_inventory.py +368 -0
  76. package/coach/tests/test_stats_hybrid.py +142 -0
  77. package/coach/tests/test_status_accounting.py +41 -0
  78. package/coach/tests/test_statusline_failsafe.py +70 -0
  79. package/coach/tests/test_statusline_self_patch.py +261 -0
  80. package/coach/tests/test_statusline_variants.py +110 -0
  81. package/coach/tests/test_statusline_wrap.py +196 -0
  82. package/coach/tests/test_statusline_wrap_action.py +408 -0
  83. package/coach/tests/test_switch_to_plugin.py +360 -0
  84. package/coach/tests/test_themes.py +104 -0
  85. package/coach/tests/test_user_config.py +160 -0
  86. package/coach/tests/test_wrap_announce_hook.py +130 -0
  87. package/coach/tests/test_xp_accounting.py +55 -0
  88. package/hooks/coach-session-start.py +536 -0
  89. package/hooks/coach-user-prompt.py +2288 -0
  90. package/install-launchd.sh +102 -0
  91. package/install.sh +597 -0
  92. package/launchd/com.local.claude-coach.plist.template +34 -0
  93. package/launchd/run-insights.sh +20 -0
  94. package/npm/coach-claw.js +259 -0
  95. package/package.json +52 -0
  96. package/requirements.txt +11 -0
  97. package/settings-snippet.json +31 -0
  98. package/skills/coach/SKILL.md +107 -0
  99. package/skills/coach-insights/SKILL.md +78 -0
  100. package/skills/config/SKILL.md +149 -0
@@ -0,0 +1,86 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Transcript redactor — stdin → stdout.
4
+
5
+ Runs BEFORE the deterministic cron analyzer (`analyze.py` invoked by
6
+ `insights.sh`) reads any transcript byte. P0 privacy gate: strips known
7
+ credential shapes out of transcript content so they cannot be echoed into
8
+ the coach profile and subsequently injected as additionalContext on every
9
+ SessionStart.
10
+
11
+ The on-demand `/coach-insights` skill does NOT call `redact.py` — that
12
+ path defers the analytical step to Claude Code's built-in `/insights`,
13
+ which is an Anthropic-side LLM step the user is already authorized for
14
+ by virtue of running Claude Code. Coach still never writes raw
15
+ transcript content to `profile.yaml` on either path.
16
+
17
+ Usage:
18
+ cat transcript.jsonl | redact.py > redacted.jsonl
19
+
20
+ This is deliberately conservative — it over-redacts rather than miss a
21
+ secret. False positives are fine; leaked keys are not.
22
+ """
23
+ from __future__ import annotations
24
+
25
+ import re
26
+ import sys
27
+
28
+ PATTERNS: list[tuple[re.Pattern[str], str]] = [
29
+ # Provider-specific API key shapes
30
+ (re.compile(r"sk-ant-[A-Za-z0-9_\-]{20,}"), "[REDACTED:anthropic-key]"),
31
+ (re.compile(r"sk-[A-Za-z0-9]{32,}"), "[REDACTED:openai-key]"),
32
+ # Stripe live + test secret keys (underscore not hyphen — the openai
33
+ # `sk-` rule above will not match these).
34
+ (re.compile(r"sk_live_[A-Za-z0-9]{24,}"), "[REDACTED:stripe-live-key]"),
35
+ (re.compile(r"sk_test_[A-Za-z0-9]{24,}"), "[REDACTED:stripe-test-key]"),
36
+ (re.compile(r"AKIA[0-9A-Z]{16}"), "[REDACTED:aws-access-key]"),
37
+ (re.compile(r"ASIA[0-9A-Z]{16}"), "[REDACTED:aws-sts-key]"),
38
+ (re.compile(r"aws_secret_access_key\s*[:=]\s*[A-Za-z0-9/+=]{40}", re.IGNORECASE),
39
+ "aws_secret_access_key=[REDACTED]"),
40
+ (re.compile(r"gh[pousr]_[A-Za-z0-9]{30,}"), "[REDACTED:github-token]"),
41
+ (re.compile(r"github_pat_[A-Za-z0-9_]{60,}"), "[REDACTED:github-pat]"),
42
+ (re.compile(r"xox[baprs]-[A-Za-z0-9-]{10,}"), "[REDACTED:slack-token]"),
43
+ (re.compile(r"AIza[0-9A-Za-z_\-]{35}"), "[REDACTED:google-api-key]"),
44
+ (re.compile(r"ya29\.[0-9A-Za-z_\-]{50,}"), "[REDACTED:google-oauth]"),
45
+ # Hugging Face user access tokens (start with `hf_`, ~37 chars).
46
+ (re.compile(r"\bhf_[A-Za-z0-9]{30,}"), "[REDACTED:huggingface-token]"),
47
+ # npm automation/publish tokens.
48
+ (re.compile(r"\bnpm_[A-Za-z0-9]{30,}"), "[REDACTED:npm-token]"),
49
+
50
+ # Generic bearer tokens and authorization headers
51
+ (re.compile(r"Bearer\s+[A-Za-z0-9\-._~+/]{20,}=*", re.IGNORECASE), "Bearer [REDACTED]"),
52
+ (re.compile(r"Authorization:\s*[A-Za-z]+\s+[A-Za-z0-9\-._~+/]{20,}=*", re.IGNORECASE),
53
+ "Authorization: [REDACTED]"),
54
+
55
+ # Private keys (PEM blocks — collapse the whole block)
56
+ (re.compile(r"-----BEGIN [A-Z ]*PRIVATE KEY-----[\s\S]+?-----END [A-Z ]*PRIVATE KEY-----"),
57
+ "[REDACTED:private-key-block]"),
58
+
59
+ # JWT-shaped tokens (three base64url segments separated by dots)
60
+ (re.compile(r"eyJ[A-Za-z0-9_\-]{10,}\.[A-Za-z0-9_\-]{10,}\.[A-Za-z0-9_\-]{10,}"),
61
+ "[REDACTED:jwt]"),
62
+
63
+ # Hex-form secrets that look like 32+ hex chars on their own
64
+ (re.compile(r"\b[a-fA-F0-9]{40,}\b"), "[REDACTED:hex-secret?]"),
65
+
66
+ # .env-style assignments for suspicious key names
67
+ (re.compile(
68
+ r"(?mi)^\s*(\w*(?:SECRET|TOKEN|PASSWORD|API[_-]?KEY|PRIVATE[_-]?KEY|ACCESS[_-]?KEY|CREDENTIAL)\w*)"
69
+ r"\s*[:=]\s*[\"']?([^\"'\n\s]{8,})[\"']?"),
70
+ r"\1=[REDACTED]"),
71
+ ]
72
+
73
+
74
+ def redact(text: str) -> str:
75
+ for pat, replacement in PATTERNS:
76
+ text = pat.sub(replacement, text)
77
+ return text
78
+
79
+
80
+ def main() -> None:
81
+ data = sys.stdin.read()
82
+ sys.stdout.write(redact(data))
83
+
84
+
85
+ if __name__ == "__main__":
86
+ main()
@@ -0,0 +1,148 @@
1
+ """Render-environment detection for coach output.
2
+
3
+ Coach banners need different markdown shapes depending on whether they're
4
+ rendered by terminal Claude Code (which dims blockquotes via theme) or an
5
+ IDE chat panel (which uses a WebView markdown renderer with different
6
+ feature support — notably, no GFM admonitions and weak blockquote styling).
7
+
8
+ `detect_render_env()` returns "ide" or "terminal" based on Claude Code's
9
+ own `CLAUDE_CODE_ENTRYPOINT` env var. Allowlist semantics: known IDE
10
+ entrypoints get the IDE shape; everything else falls through to the
11
+ terminal shape, which renders correctly across all surfaces.
12
+
13
+ Single source of truth — imported by both hooks (which append coach/bin/
14
+ to sys.path before importing).
15
+ """
16
+ from __future__ import annotations
17
+
18
+ import os
19
+ from typing import Literal, Mapping
20
+
21
+ # Entrypoints whose output is rendered by an IDE chat panel WebView.
22
+ # vscode, jetbrains, ide-onboarding: confirmed in Claude Code binary
23
+ # claude-vscode, claude-jetbrains: speculative — kept as defensive
24
+ # fallback for prefixed variants. If absent, detection falls through
25
+ # to the terminal shape, which renders correctly in IDE panels too
26
+ # (just less prominently than the HR-framed shape).
27
+ IDE_ENTRYPOINTS = frozenset({
28
+ "vscode",
29
+ "claude-vscode",
30
+ "jetbrains",
31
+ "claude-jetbrains",
32
+ "ide-onboarding",
33
+ })
34
+
35
+ RenderEnv = Literal["ide", "terminal"]
36
+
37
+
38
+ def detect_render_env(env: Mapping[str, str] | None = None) -> RenderEnv:
39
+ """Return "ide" if the hook is being invoked from an IDE chat panel,
40
+ otherwise "terminal".
41
+
42
+ Allowlist: unknown / future entrypoints default to "terminal" — the
43
+ terminal shape uses universal markdown that renders acceptably
44
+ everywhere, so it's the safe fallback when we don't recognize the
45
+ surface.
46
+
47
+ Honors COACH_RENDER_ENV={ide,terminal} as a manual override (useful
48
+ for testing the IDE branch from a terminal session, or vice versa).
49
+
50
+ Args:
51
+ env: environment mapping. Defaults to os.environ. Explicit param
52
+ so tests can pass a fake without monkeypatching.
53
+ """
54
+ if env is None:
55
+ env = os.environ
56
+
57
+ override = env.get("COACH_RENDER_ENV", "").strip().lower()
58
+ if override in ("ide", "terminal"):
59
+ return override # type: ignore[return-value]
60
+
61
+ entrypoint = env.get("CLAUDE_CODE_ENTRYPOINT", "").strip().lower()
62
+ if entrypoint in IDE_ENTRYPOINTS:
63
+ return "ide"
64
+ return "terminal"
65
+
66
+
67
+ # -----------------------------------------------------------------------------
68
+ # Glyph-capability probes — lets bespoke banner themes ask "can this terminal
69
+ # render U+2694 ⚔ as a single cell?" before committing to it. Falls back to a
70
+ # 1-cell ASCII alternative (e.g., ✕) when the answer is no.
71
+ #
72
+ # Memoized at module level — probing the env is idempotent within a process,
73
+ # and every UserPromptSubmit hook spawns a fresh interpreter, so a per-process
74
+ # cache is enough. Tests pass an explicit `env` mapping to bypass the cache.
75
+
76
+ _DUAL_BLADE_CACHE: bool | None = None
77
+
78
+
79
+ def _is_truthy(value: str) -> bool:
80
+ return value.strip().lower() in ("1", "true", "yes", "on")
81
+
82
+
83
+ def _is_falsy(value: str) -> bool:
84
+ return value.strip().lower() in ("0", "false", "no", "off")
85
+
86
+
87
+ def supports_dual_blade(env: Mapping[str, str] | None = None) -> bool:
88
+ """Return True if ⚔ (U+2694 CROSSED SWORDS) can be expected to render as
89
+ a single cell in this terminal. Themes that build streak meters out of ⚔
90
+ use this to fall back to ✕ when the renderer would mis-width the glyph.
91
+
92
+ Detection order:
93
+ 1. COACH_FORCE_ASCII_GLYPHS truthy → False (generic kill-switch — also
94
+ applies to other dual-cell-risk glyphs added later).
95
+ 2. COACH_SUPPORTS_DUAL_BLADE set → honor explicitly (for tests + power
96
+ users overriding a wrong default).
97
+ 3. Locale (LANG / LC_ALL / LC_CTYPE) lacks UTF-8 → False.
98
+ 4. TERM in {"dumb", "linux"} → False.
99
+ 5. Default → True. Modern terminals dominate; safer to render the
100
+ intended glyph than to ASCII-degrade the majority.
101
+
102
+ Args:
103
+ env: environment mapping. Defaults to os.environ. When provided,
104
+ the module-level cache is bypassed (so tests can pin shapes).
105
+ """
106
+ global _DUAL_BLADE_CACHE
107
+ if env is None:
108
+ if _DUAL_BLADE_CACHE is not None:
109
+ return _DUAL_BLADE_CACHE
110
+ env = os.environ
111
+ cache = True
112
+ else:
113
+ cache = False
114
+
115
+ result = _probe_dual_blade(env)
116
+ if cache:
117
+ _DUAL_BLADE_CACHE = result
118
+ return result
119
+
120
+
121
+ def _probe_dual_blade(env: Mapping[str, str]) -> bool:
122
+ if _is_truthy(env.get("COACH_FORCE_ASCII_GLYPHS", "")):
123
+ return False
124
+
125
+ explicit = env.get("COACH_SUPPORTS_DUAL_BLADE", "")
126
+ if _is_truthy(explicit):
127
+ return True
128
+ if _is_falsy(explicit):
129
+ return False
130
+
131
+ # POSIX locale precedence: LC_ALL > LC_CTYPE > LANG. The first
132
+ # non-empty value is the effective locale — OR-merging across all
133
+ # three means an explicit `LC_ALL=C` is silently overridden by an
134
+ # otherwise-unused `LANG=en_US.UTF-8`, which is the bug.
135
+ effective = ""
136
+ for var in ("LC_ALL", "LC_CTYPE", "LANG"):
137
+ val = env.get(var, "").strip()
138
+ if val:
139
+ effective = val.lower()
140
+ break
141
+ if effective and "utf-8" not in effective and "utf8" not in effective:
142
+ return False
143
+
144
+ term = env.get("TERM", "").strip().lower()
145
+ if term in ("dumb", "linux"):
146
+ return False
147
+
148
+ return True
@@ -0,0 +1,87 @@
1
+ """
2
+ Shared reward-hint inference for the Coach system.
3
+
4
+ A `reward_hint` attached to a profile.yaml entry specifies what user action
5
+ completes a tip for that pattern, and how much XP each action earns. Shape:
6
+
7
+ reward_hint:
8
+ action: test_run | commit | skill_invoke # named action
9
+ xp: 2 # per-action XP
10
+ description: "test run (pytest / ...)" # human-readable for tip
11
+
12
+ This module infers reasonable defaults from an entry's id + nudge text when
13
+ no explicit hint is set. It's imported by:
14
+
15
+ - coach/bin/merge.py — to populate reward_hint at entry creation
16
+ - hooks/coach-user-prompt.py — as read-time fallback for existing entries
17
+
18
+ Single source of truth for the keyword heuristic + detector vocabulary.
19
+ Hooks can't simply `import reward_hints` because they run from ~/.claude/
20
+ with a different cwd; they must append coach/bin/ to sys.path first.
21
+ """
22
+ from __future__ import annotations
23
+
24
+ # (keyword, reward_hint payload). First match wins. Keywords are case-
25
+ # insensitive and checked against BOTH the entry id and the nudge text,
26
+ # so a pattern with id="over-mocks" whose nudge says "wrote code without
27
+ # running tests" still trips test_run via the nudge.
28
+ _HEURISTIC: list[tuple[str, dict]] = [
29
+ # Test-run signals (broadest coverage: id-tokens + common nudge phrasings)
30
+ ("without-test", {"action": "test_run", "xp": 2,
31
+ "description": "test run (pytest / jest / cargo test / …)"}),
32
+ ("without test", {"action": "test_run", "xp": 2, "description": "test run"}),
33
+ ("no test", {"action": "test_run", "xp": 2, "description": "test run"}),
34
+ ("no tests", {"action": "test_run", "xp": 2, "description": "test run"}),
35
+ ("untested", {"action": "test_run", "xp": 2, "description": "test run"}),
36
+ ("skip-test", {"action": "test_run", "xp": 2, "description": "test run"}),
37
+ ("skip test", {"action": "test_run", "xp": 2, "description": "test run"}),
38
+ ("skipped test", {"action": "test_run", "xp": 2, "description": "test run"}),
39
+ ("skipping test", {"action": "test_run", "xp": 2, "description": "test run"}),
40
+ ("tests skipped", {"action": "test_run", "xp": 2, "description": "test run"}),
41
+ ("tests were skipped", {"action": "test_run", "xp": 2, "description": "test run"}),
42
+ ("running tests", {"action": "test_run", "xp": 2, "description": "test run"}),
43
+ ("run tests", {"action": "test_run", "xp": 2, "description": "test run"}),
44
+ ("test run", {"action": "test_run", "xp": 2, "description": "test run"}),
45
+ ("test suite", {"action": "test_run", "xp": 2, "description": "test run"}),
46
+ # Commit signals
47
+ ("without-commit", {"action": "commit", "xp": 1, "description": "git commit"}),
48
+ ("without committing", {"action": "commit", "xp": 1, "description": "git commit"}),
49
+ ("not committing", {"action": "commit", "xp": 1, "description": "git commit"}),
50
+ ]
51
+
52
+
53
+ def infer_reward_hint(entry: dict) -> dict | None:
54
+ """Guess the reward_hint for a profile entry (or a detection dict) that
55
+ doesn't have one set. Inspects both the id and the nudge text for
56
+ keyword hits. Returns None when nothing matches → graduation-only pattern.
57
+
58
+ Accepts either:
59
+ - a profile.yaml entry dict (has `id`, `nudge`, ...)
60
+ - an analyze.py detection dict (has `id`, `nudge`, ...)
61
+ Both share the same relevant keys.
62
+ """
63
+ if not isinstance(entry, dict):
64
+ return None
65
+ eid = str(entry.get("id") or "").lower()
66
+ nudge = str(entry.get("nudge") or "").lower()
67
+ haystack = f"{eid} {nudge}"
68
+ for keyword, hint in _HEURISTIC:
69
+ if keyword in haystack:
70
+ return dict(hint) # copy so callers can't mutate our defaults
71
+ return None
72
+
73
+
74
+ def effective_reward_hint(entry: dict) -> dict | None:
75
+ """Return the entry's explicit reward_hint if present and valid, else
76
+ infer. Centralizes the "explicit overrides inference" rule so every
77
+ caller gets the same precedence."""
78
+ if not isinstance(entry, dict):
79
+ return None
80
+ explicit = entry.get("reward_hint")
81
+ if (
82
+ isinstance(explicit, dict)
83
+ and explicit.get("action")
84
+ and int(explicit.get("xp", 0) or 0) > 0
85
+ ):
86
+ return explicit
87
+ return infer_reward_hint(entry)
@@ -0,0 +1,20 @@
1
+ #!/bin/bash
2
+ # Wrapper invoked by launchd to run the Coach insights pass once. Logs to
3
+ # /tmp. Runs the deterministic insights.sh — does NOT go through the
4
+ # claude CLI, so no cold-start cost and no slash-command routing issues.
5
+ # (The on-demand `/coach-insights` skill is the LLM-driven counterpart
6
+ # that runs from inside Claude Code; this wrapper is launchd-only.)
7
+
8
+ set -u
9
+ export PATH="/opt/homebrew/bin:/usr/local/bin:/usr/bin:/bin"
10
+ export HOME="${HOME:-$(eval echo ~$(whoami))}"
11
+
12
+ LOG="/tmp/claude-coach.log"
13
+ TS="$(date -u +%Y-%m-%dT%H:%M:%SZ)"
14
+ echo "[$TS] starting insights.sh 1d" >> "$LOG"
15
+
16
+ "$HOME/.claude/coach/bin/insights.sh" 1d >> "$LOG" 2>&1
17
+ EXIT=$?
18
+
19
+ echo "[$TS] insights.sh exited $EXIT" >> "$LOG"
20
+ exit "$EXIT"
@@ -0,0 +1,85 @@
1
+ #!/usr/bin/env python3
2
+ """Run a command while holding an exclusive flock on a sidecar file.
3
+
4
+ Used by insights-llm.sh to serialize concurrent weekly-insights runs.
5
+ Two Claude Code SessionStart hooks firing within the ~90s window of a
6
+ slow `claude -p "/insights"` call will both see `.last_weekly_insights`
7
+ as stale and try to spawn the wrapper. Without this lock, both run the
8
+ LLM call, both aggregate, both merge — wasting an LLM call and
9
+ prematurely advancing debounce/graduation streaks. Under the lock, the
10
+ second wrapper rechecks the throttle (which the first wrapper just
11
+ refreshed) and skips cleanly.
12
+
13
+ Usage:
14
+ run_with_lock.py <lock_path> <cmd> [args...]
15
+
16
+ Behavior:
17
+ - Acquires `fcntl.LOCK_EX | LOCK_NB` on the lock file (created if
18
+ absent). Failure to acquire (because another process holds it)
19
+ prints a one-line "skipped (concurrent ...)" notice to stdout and
20
+ exits SKIP_EXIT_CODE (10). The caller should treat this as
21
+ benign — coordination, not error.
22
+ - On success, runs `cmd` as a subprocess and returns its exit code.
23
+ Sets `COACH_LLM_LOCK_HELD=1` in the child env so the wrapped
24
+ script can detect it's already inside the lock and skip a
25
+ re-exec loop.
26
+ - Lock auto-releases when this process exits (per fcntl semantics);
27
+ we also explicitly unlock + close on the way out.
28
+ """
29
+ from __future__ import annotations
30
+
31
+ import fcntl
32
+ import os
33
+ import subprocess
34
+ import sys
35
+
36
+
37
+ SKIP_EXIT_CODE = 10
38
+
39
+
40
+ def main() -> int:
41
+ if len(sys.argv) < 3:
42
+ print(
43
+ "usage: run_with_lock.py <lock_path> <cmd> [args...]",
44
+ file=sys.stderr,
45
+ )
46
+ return 64
47
+
48
+ lock_path = sys.argv[1]
49
+ cmd = sys.argv[2:]
50
+
51
+ # Ensure the lock file's parent dir exists (test fixtures sometimes
52
+ # point at not-yet-created paths).
53
+ parent = os.path.dirname(lock_path) or "."
54
+ try:
55
+ os.makedirs(parent, exist_ok=True)
56
+ except OSError:
57
+ pass
58
+
59
+ fd = os.open(lock_path, os.O_CREAT | os.O_RDWR, 0o644)
60
+ try:
61
+ fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
62
+ except BlockingIOError:
63
+ os.close(fd)
64
+ print("skipped (concurrent weekly run in progress)")
65
+ return SKIP_EXIT_CODE
66
+
67
+ env = os.environ.copy()
68
+ env.setdefault("COACH_LLM_LOCK_HELD", "1")
69
+
70
+ try:
71
+ proc = subprocess.run(cmd, env=env)
72
+ return proc.returncode
73
+ finally:
74
+ try:
75
+ fcntl.flock(fd, fcntl.LOCK_UN)
76
+ except OSError:
77
+ pass
78
+ try:
79
+ os.close(fd)
80
+ except OSError:
81
+ pass
82
+
83
+
84
+ if __name__ == "__main__":
85
+ sys.exit(main())
@@ -0,0 +1,260 @@
1
+ """
2
+ Shared transcript-scoring primitives for the Coach system.
3
+
4
+ One source of truth for action detection (regexes, tool-type matchers) and
5
+ per-action XP so that `stats.py`, `bank.py`, and the `coach-user-prompt.py`
6
+ hook all agree on what counts as what.
7
+
8
+ Exports:
9
+ - TEST_RE, COMMIT_RE, COLLECT_ONLY_RE — position-anchored bash regexes
10
+ - BASELINE_ACTIONS — {name -> xp} baseline reward table
11
+ - matches_action(tool_use, action) — shared per-tool-use matcher
12
+ - score_transcript_with_breakdown() — explainable uncapped/capped score
13
+ - score_transcript(path, profile) — returns capped session XP
14
+ baseline XP for test_run / commit / skill_invoke, plus any
15
+ additional XP for reward_hint actions found on active profile
16
+ entries whose `action` is NOT already baseline (no double-counting).
17
+
18
+ Stats.py, bank.py, status.py, and the hook all call this module so their
19
+ definitions of "test run", "commit", "skill invoke", and dynamic actions do
20
+ not drift.
21
+
22
+ IMPORTANT: if you add a new action detector, extend ACTION_DETECTORS below
23
+ and also add a reward_hints.py heuristic entry so `/coach-insights` patterns can
24
+ auto-bind to it.
25
+ """
26
+ from __future__ import annotations
27
+
28
+ import json
29
+ import re
30
+ from pathlib import Path
31
+
32
+
33
+ # --- Position-anchored regexes --------------------------------------------------
34
+ # Start-of-line or after ; && || |, with optional env-var or cd-prefix.
35
+ # Prevents false positives on "pytest" / "git commit" inside commit-message
36
+ # bodies.
37
+ TEST_RE = re.compile(
38
+ r"(?:^|[;&|])\s*"
39
+ r"(?:\w+=\S+\s+)*"
40
+ r"(?:cd\s+\S+\s*&&\s*)?"
41
+ r"(?:pytest|jest|vitest|mocha|rspec|phpunit|"
42
+ r"cargo\s+test|go\s+test|pnpm\s+test|npm\s+test|bun\s+test|"
43
+ r"yarn\s+test|mix\s+test)"
44
+ r"\b"
45
+ )
46
+ COMMIT_RE = re.compile(
47
+ r"(?:^|[;&|])\s*"
48
+ r"(?:\w+=\S+\s+)*"
49
+ r"(?:cd\s+\S+\s*&&\s*)?"
50
+ r"git\s+commit\b"
51
+ )
52
+ # pytest --collect-only farms XP without running anything
53
+ COLLECT_ONLY_RE = re.compile(r"pytest\s+.*--co(llect)?-only")
54
+
55
+ # --- Baseline reward table -----------------------------------------------------
56
+ # Baseline actions always scored, regardless of profile contents. Matches
57
+ # what reward_hint can specify — if a pattern has `reward_hint: { action:
58
+ # test_run }`, that's the SAME +2 the baseline awards, not an additional one.
59
+ BASELINE_ACTIONS = {
60
+ "test_run": 2,
61
+ "commit": 1,
62
+ "skill_invoke": 1,
63
+ }
64
+
65
+ SESSION_XP_CAP = 15
66
+
67
+
68
+ # --- Per-action detectors ------------------------------------------------------
69
+ # Each detector takes a tool_use dict (`{type, name, input}`) and returns
70
+ # True if that tool_use counts as one occurrence of this action.
71
+ def _detect_test_run(tu: dict) -> bool:
72
+ if tu.get("name") != "Bash":
73
+ return False
74
+ cmd = (tu.get("input") or {}).get("command", "") or ""
75
+ if COLLECT_ONLY_RE.search(cmd):
76
+ return False
77
+ return bool(TEST_RE.search(cmd))
78
+
79
+
80
+ def _detect_commit(tu: dict) -> bool:
81
+ if tu.get("name") != "Bash":
82
+ return False
83
+ cmd = (tu.get("input") or {}).get("command", "") or ""
84
+ return bool(COMMIT_RE.search(cmd))
85
+
86
+
87
+ # skill_invoke is counted by unique-skill-id set, not per-event, so it's
88
+ # handled specially in score_transcript — not in this dispatch.
89
+
90
+ def _detect_doc_write(tu: dict) -> bool:
91
+ """Write/Edit on a markdown file. Reward for doc-skipping patterns."""
92
+ name = tu.get("name", "")
93
+ if name not in ("Write", "Edit", "MultiEdit"):
94
+ return False
95
+ path = (tu.get("input") or {}).get("file_path") or ""
96
+ return isinstance(path, str) and path.endswith(".md")
97
+
98
+
99
+ # action-name → (event-detector-or-None, per-event-xp). None means
100
+ # "this action is scored specially in score_transcript" (e.g. skill_invoke
101
+ # which tallies unique skill ids).
102
+ ACTION_DETECTORS = {
103
+ "test_run": (_detect_test_run, 2),
104
+ "commit": (_detect_commit, 1),
105
+ "skill_invoke": (None, 1), # special: unique-set tally
106
+ "doc_write": (_detect_doc_write, 1), # extension slot; /coach-insights can bind here
107
+ }
108
+
109
+
110
+ def matches_action(
111
+ tool_use: dict,
112
+ action: str,
113
+ *,
114
+ skill_id: str | None = None,
115
+ ) -> bool:
116
+ """Return True if one transcript tool_use satisfies an action.
117
+
118
+ `skill_id` optionally narrows `skill_invoke` to one slash command / skill.
119
+ Other actions are handled by ACTION_DETECTORS. Unknown actions are False
120
+ rather than errors so hooks can fail closed.
121
+ """
122
+ if not isinstance(tool_use, dict):
123
+ return False
124
+ if action == "skill_invoke":
125
+ if tool_use.get("name") not in ("SlashCommand", "Skill"):
126
+ return False
127
+ inp = tool_use.get("input") or {}
128
+ sid = (inp.get("command") or inp.get("skill") or "").lstrip("/")
129
+ if not sid:
130
+ return False
131
+ if skill_id:
132
+ return sid == skill_id.lstrip("/")
133
+ return True
134
+ detector, _xp = ACTION_DETECTORS.get(action, (None, 0))
135
+ if detector is None:
136
+ return False
137
+ return bool(detector(tool_use))
138
+
139
+
140
+ def _iter_tool_uses(path: Path):
141
+ """Yield every tool_use dict from a transcript JSONL, tolerating junk.
142
+
143
+ Intentional: skips the redact() pre-pass that analyze.py uses. The only
144
+ outputs that escape this function (via score_transcript_with_breakdown,
145
+ line ~243) are integer counts and skill_id slugs — no transcript bytes
146
+ are returned or persisted. If the output shape ever expands to include
147
+ user content, add the redact() pass to match analyze.py.
148
+ """
149
+ try:
150
+ with path.open() as fh:
151
+ for line in fh:
152
+ try:
153
+ obj = json.loads(line)
154
+ except Exception:
155
+ continue
156
+ msg = obj.get("message") or {}
157
+ content = msg.get("content")
158
+ if not isinstance(content, list):
159
+ continue
160
+ for item in content:
161
+ if isinstance(item, dict) and item.get("type") == "tool_use":
162
+ yield item
163
+ except Exception:
164
+ return
165
+
166
+
167
+ def _dynamic_actions_from_profile(profile: dict) -> dict:
168
+ """Return {action_name: xp} for reward_hint.action values present in
169
+ profile that are NOT already baseline. Deduplicates — if two patterns
170
+ reference the same action, it's still one scoring rule."""
171
+ out: dict[str, int] = {}
172
+ if not isinstance(profile, dict):
173
+ return out
174
+ for e in profile.get("entries", []) or []:
175
+ if not isinstance(e, dict):
176
+ continue
177
+ h = e.get("reward_hint")
178
+ if not isinstance(h, dict):
179
+ continue
180
+ action = h.get("action")
181
+ xp = int(h.get("xp", 0) or 0)
182
+ if not action or xp <= 0:
183
+ continue
184
+ if action in BASELINE_ACTIONS:
185
+ continue # already scored via baseline
186
+ if action not in ACTION_DETECTORS:
187
+ continue # no detector registered; stats.py can't score it
188
+ # If multiple patterns reference the same non-baseline action with
189
+ # different xp values, keep the max — conservative upper bound.
190
+ out[action] = max(out.get(action, 0), xp)
191
+ return out
192
+
193
+
194
+ def score_transcript(path: Path, profile: dict | None = None) -> int:
195
+ """Return capped session XP for a single transcript.
196
+
197
+ Always scores the three baseline actions (test_run, commit, skill_invoke).
198
+ If `profile` is provided, also scores any additional reward_hint actions
199
+ registered in ACTION_DETECTORS. Total capped at SESSION_XP_CAP.
200
+ """
201
+ return int(score_transcript_with_breakdown(path, profile)["capped_xp"])
202
+
203
+
204
+ def score_transcript_with_breakdown(
205
+ path: Path,
206
+ profile: dict | None = None,
207
+ ) -> dict:
208
+ """Return an explainable session score for one transcript.
209
+
210
+ Shape is intentionally simple for CLI renderers:
211
+ tests, commits, skills_n, skills_list, dynamic_actions,
212
+ raw_xp, capped_xp, capped.
213
+ """
214
+ test_runs = 0
215
+ commits = 0
216
+ skills: set[str] = set()
217
+ dynamic_counts: dict[str, int] = {}
218
+ dynamic_actions = _dynamic_actions_from_profile(profile or {})
219
+
220
+ for tu in _iter_tool_uses(path):
221
+ if matches_action(tu, "test_run"):
222
+ test_runs += 1
223
+ if matches_action(tu, "commit"):
224
+ commits += 1
225
+ if matches_action(tu, "skill_invoke"):
226
+ sid = ((tu.get("input") or {}).get("command")
227
+ or (tu.get("input") or {}).get("skill") or "").lstrip("/")
228
+ if sid:
229
+ skills.add(sid)
230
+ for action in dynamic_actions:
231
+ if matches_action(tu, action):
232
+ dynamic_counts[action] = dynamic_counts.get(action, 0) + 1
233
+
234
+ dynamic_breakdown = {}
235
+ for action, count in sorted(dynamic_counts.items()):
236
+ xp_each = dynamic_actions[action]
237
+ dynamic_breakdown[action] = {
238
+ "count": count,
239
+ "xp_each": xp_each,
240
+ "xp": count * xp_each,
241
+ }
242
+
243
+ raw_xp = (
244
+ test_runs * BASELINE_ACTIONS["test_run"]
245
+ + commits * BASELINE_ACTIONS["commit"]
246
+ + len(skills) * BASELINE_ACTIONS["skill_invoke"]
247
+ + sum(item["xp"] for item in dynamic_breakdown.values())
248
+ )
249
+
250
+ return {
251
+ "tests": test_runs,
252
+ "commits": commits,
253
+ "skills_n": len(skills),
254
+ "skills_list": sorted(skills),
255
+ "dynamic_actions": dynamic_breakdown,
256
+ "available_dynamic_actions": dict(sorted(dynamic_actions.items())),
257
+ "raw_xp": raw_xp,
258
+ "capped_xp": min(raw_xp, SESSION_XP_CAP),
259
+ "capped": raw_xp > SESSION_XP_CAP,
260
+ }