@rm0nroe/coach-claw 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +311 -0
  3. package/coach/README.md +99 -0
  4. package/coach/bin/aggregate_facets.py +274 -0
  5. package/coach/bin/analyze.py +678 -0
  6. package/coach/bin/bank.py +247 -0
  7. package/coach/bin/banner_themes.py +645 -0
  8. package/coach/bin/coach_paths.py +33 -0
  9. package/coach/bin/coexistence_check.py +129 -0
  10. package/coach/bin/configure.py +245 -0
  11. package/coach/bin/cron_check.py +81 -0
  12. package/coach/bin/default_statusline.py +135 -0
  13. package/coach/bin/doctor.py +663 -0
  14. package/coach/bin/insights-llm.sh +264 -0
  15. package/coach/bin/insights.sh +163 -0
  16. package/coach/bin/insights_window.py +111 -0
  17. package/coach/bin/marker_io.py +154 -0
  18. package/coach/bin/merge.py +671 -0
  19. package/coach/bin/redact.py +86 -0
  20. package/coach/bin/render_env.py +148 -0
  21. package/coach/bin/reward_hints.py +87 -0
  22. package/coach/bin/run-insights.sh +20 -0
  23. package/coach/bin/run_with_lock.py +85 -0
  24. package/coach/bin/scoring.py +260 -0
  25. package/coach/bin/skill_inventory.py +215 -0
  26. package/coach/bin/stats.py +459 -0
  27. package/coach/bin/status.py +293 -0
  28. package/coach/bin/statusline_self_patch.py +205 -0
  29. package/coach/bin/statusline_variants.py +146 -0
  30. package/coach/bin/statusline_wrap.py +244 -0
  31. package/coach/bin/statusline_wrap_action.py +460 -0
  32. package/coach/bin/switch_to_plugin.py +256 -0
  33. package/coach/bin/themes.py +256 -0
  34. package/coach/bin/user_config.py +176 -0
  35. package/coach/bin/xp_accounting.py +98 -0
  36. package/coach/changelog.md +4 -0
  37. package/coach/default-statusline-command.sh +19 -0
  38. package/coach/default-statusline-wrap-command.sh +15 -0
  39. package/coach/profile.yaml +37 -0
  40. package/coach/tests/conftest.py +13 -0
  41. package/coach/tests/test_aggregate_facets.py +379 -0
  42. package/coach/tests/test_analyze_aggregate.py +153 -0
  43. package/coach/tests/test_analyze_redaction.py +105 -0
  44. package/coach/tests/test_analyze_strengths.py +165 -0
  45. package/coach/tests/test_bank_atomic_write.py +61 -0
  46. package/coach/tests/test_bank_concurrency.py +126 -0
  47. package/coach/tests/test_banner_themes.py +981 -0
  48. package/coach/tests/test_celebrate_dedup.py +409 -0
  49. package/coach/tests/test_coach_paths.py +50 -0
  50. package/coach/tests/test_coexistence_check.py +128 -0
  51. package/coach/tests/test_configure.py +258 -0
  52. package/coach/tests/test_cron_check.py +118 -0
  53. package/coach/tests/test_cron_nudge_hook.py +134 -0
  54. package/coach/tests/test_detection_parity.py +105 -0
  55. package/coach/tests/test_doctor.py +595 -0
  56. package/coach/tests/test_hook_bespoke_dispatch.py +288 -0
  57. package/coach/tests/test_hook_module_resolution.py +116 -0
  58. package/coach/tests/test_hook_relevance.py +996 -0
  59. package/coach/tests/test_hook_render_env.py +364 -0
  60. package/coach/tests/test_hook_session_id_guard.py +160 -0
  61. package/coach/tests/test_insights_llm.py +759 -0
  62. package/coach/tests/test_insights_llm_venv_path.py +109 -0
  63. package/coach/tests/test_insights_window.py +237 -0
  64. package/coach/tests/test_install.py +1150 -0
  65. package/coach/tests/test_install_pyyaml_fallback.py +142 -0
  66. package/coach/tests/test_marker_consumption.py +167 -0
  67. package/coach/tests/test_marker_writer_locking.py +305 -0
  68. package/coach/tests/test_merge.py +413 -0
  69. package/coach/tests/test_no_broken_mktemp.py +90 -0
  70. package/coach/tests/test_render_env.py +137 -0
  71. package/coach/tests/test_render_env_glyphs.py +119 -0
  72. package/coach/tests/test_reward_hints.py +59 -0
  73. package/coach/tests/test_scoring.py +147 -0
  74. package/coach/tests/test_session_start_weekly_trigger.py +92 -0
  75. package/coach/tests/test_skill_inventory.py +368 -0
  76. package/coach/tests/test_stats_hybrid.py +142 -0
  77. package/coach/tests/test_status_accounting.py +41 -0
  78. package/coach/tests/test_statusline_failsafe.py +70 -0
  79. package/coach/tests/test_statusline_self_patch.py +261 -0
  80. package/coach/tests/test_statusline_variants.py +110 -0
  81. package/coach/tests/test_statusline_wrap.py +196 -0
  82. package/coach/tests/test_statusline_wrap_action.py +408 -0
  83. package/coach/tests/test_switch_to_plugin.py +360 -0
  84. package/coach/tests/test_themes.py +104 -0
  85. package/coach/tests/test_user_config.py +160 -0
  86. package/coach/tests/test_wrap_announce_hook.py +130 -0
  87. package/coach/tests/test_xp_accounting.py +55 -0
  88. package/hooks/coach-session-start.py +536 -0
  89. package/hooks/coach-user-prompt.py +2288 -0
  90. package/install-launchd.sh +102 -0
  91. package/install.sh +597 -0
  92. package/launchd/com.local.claude-coach.plist.template +34 -0
  93. package/launchd/run-insights.sh +20 -0
  94. package/npm/coach-claw.js +259 -0
  95. package/package.json +52 -0
  96. package/requirements.txt +11 -0
  97. package/settings-snippet.json +31 -0
  98. package/skills/coach/SKILL.md +107 -0
  99. package/skills/coach-insights/SKILL.md +78 -0
  100. package/skills/config/SKILL.md +149 -0
@@ -0,0 +1,379 @@
1
+ """Unit tests for coach/bin/aggregate_facets.py.
2
+
3
+ Mock facets/*.json sidecar fixtures, assert threshold-based emit shape
4
+ matches what merge.py expects on its --detections input.
5
+ """
6
+ from __future__ import annotations
7
+
8
+ import json
9
+ import subprocess
10
+ import sys
11
+ from pathlib import Path
12
+
13
+ import aggregate_facets
14
+
15
+
16
+ def _write_facet(dir_: Path, name: str, payload: dict) -> Path:
17
+ p = dir_ / f"{name}.json"
18
+ p.write_text(json.dumps(payload))
19
+ return p
20
+
21
+
22
+ def _make_session(
23
+ *,
24
+ friction: dict | None = None,
25
+ primary_success: str | None = None,
26
+ friction_detail: str = "",
27
+ brief_summary: str = "",
28
+ session_id: str = "test-session",
29
+ ) -> dict:
30
+ out: dict = {"session_id": session_id}
31
+ if friction is not None:
32
+ out["friction_counts"] = friction
33
+ if primary_success is not None:
34
+ out["primary_success"] = primary_success
35
+ if friction_detail:
36
+ out["friction_detail"] = friction_detail
37
+ if brief_summary:
38
+ out["brief_summary"] = brief_summary
39
+ return out
40
+
41
+
42
+ def test_friction_counts_emits_negative_detection(tmp_path: Path) -> None:
43
+ """friction_counts.misunderstood_request in 4/5 sessions → emit negative."""
44
+ for i in range(4):
45
+ _write_facet(
46
+ tmp_path,
47
+ f"s{i}",
48
+ _make_session(
49
+ friction={"misunderstood_request": 2},
50
+ friction_detail=f"session {i} got off-track on the first attempt",
51
+ session_id=f"s{i}",
52
+ ),
53
+ )
54
+ _write_facet(tmp_path, "s4", _make_session(session_id="s4"))
55
+
56
+ dets = aggregate_facets.aggregate(tmp_path, window_days=7, cap=8)
57
+ ids = [d["id"] for d in dets]
58
+ assert "misunderstood-request" in ids
59
+ det = next(d for d in dets if d["id"] == "misunderstood-request")
60
+ assert det["direction"] == "negative"
61
+ assert det["ratio"] == 0.8
62
+ assert det["n_sessions"] == 5
63
+
64
+
65
+ def test_primary_success_emits_positive_detection(tmp_path: Path) -> None:
66
+ """6/10 sessions with primary_success=good_debugging → emit positive."""
67
+ for i in range(6):
68
+ _write_facet(
69
+ tmp_path,
70
+ f"s{i}",
71
+ _make_session(
72
+ primary_success="good_debugging",
73
+ brief_summary=f"session {i}: drove the bug to root cause",
74
+ session_id=f"s{i}",
75
+ ),
76
+ )
77
+ for i in range(6, 10):
78
+ _write_facet(tmp_path, f"s{i}", _make_session(session_id=f"s{i}"))
79
+
80
+ dets = aggregate_facets.aggregate(tmp_path, window_days=7, cap=8)
81
+ ids = [d["id"] for d in dets]
82
+ assert "good-debugging" in ids
83
+ det = next(d for d in dets if d["id"] == "good-debugging")
84
+ assert det["direction"] == "positive"
85
+ assert det["ratio"] == 0.6
86
+
87
+
88
+ def test_below_threshold_drops_detection(tmp_path: Path) -> None:
89
+ """Friction in 2/10 sessions (<25%) → not emitted."""
90
+ for i in range(2):
91
+ _write_facet(
92
+ tmp_path,
93
+ f"s{i}",
94
+ _make_session(friction={"buggy_code": 1}, session_id=f"s{i}"),
95
+ )
96
+ for i in range(2, 10):
97
+ _write_facet(tmp_path, f"s{i}", _make_session(session_id=f"s{i}"))
98
+
99
+ dets = aggregate_facets.aggregate(tmp_path, window_days=7, cap=8)
100
+ assert "buggy-code" not in [d["id"] for d in dets]
101
+
102
+
103
+ def test_strength_threshold_higher_than_negative(tmp_path: Path) -> None:
104
+ """5/10 (50%) primary_success does NOT emit; needs ≥60%."""
105
+ for i in range(5):
106
+ _write_facet(
107
+ tmp_path,
108
+ f"s{i}",
109
+ _make_session(primary_success="multi_file_changes", session_id=f"s{i}"),
110
+ )
111
+ for i in range(5, 10):
112
+ _write_facet(tmp_path, f"s{i}", _make_session(session_id=f"s{i}"))
113
+
114
+ dets = aggregate_facets.aggregate(tmp_path, window_days=7, cap=8)
115
+ assert "multi-file-changes" not in [d["id"] for d in dets]
116
+
117
+
118
+ def test_strength_at_threshold_emits(tmp_path: Path) -> None:
119
+ """Exactly 60% (6/10) primary_success → emits."""
120
+ for i in range(6):
121
+ _write_facet(
122
+ tmp_path,
123
+ f"s{i}",
124
+ _make_session(primary_success="multi_file_changes", session_id=f"s{i}"),
125
+ )
126
+ for i in range(6, 10):
127
+ _write_facet(tmp_path, f"s{i}", _make_session(session_id=f"s{i}"))
128
+
129
+ dets = aggregate_facets.aggregate(tmp_path, window_days=7, cap=8)
130
+ assert "multi-file-changes" in [d["id"] for d in dets]
131
+
132
+
133
+ def test_id_kebab_normalization(tmp_path: Path) -> None:
134
+ """friction_counts underscore keys emit kebab-case ids."""
135
+ for i in range(3):
136
+ _write_facet(
137
+ tmp_path,
138
+ f"s{i}",
139
+ _make_session(
140
+ friction={"misunderstood_request": 1, "wrong_approach": 1},
141
+ session_id=f"s{i}",
142
+ ),
143
+ )
144
+ _write_facet(tmp_path, "s3", _make_session(session_id="s3"))
145
+
146
+ dets = aggregate_facets.aggregate(tmp_path, window_days=7, cap=8)
147
+ ids = {d["id"] for d in dets}
148
+ # Both 3/4 = 75% > 25% → both should emit.
149
+ assert "misunderstood-request" in ids
150
+ assert "wrong-approach" in ids
151
+ # No underscores in any id.
152
+ for d in dets:
153
+ assert "_" not in d["id"]
154
+
155
+
156
+ def test_examples_capped_and_redacted(tmp_path: Path) -> None:
157
+ """5 friction_detail strings → capped at 3, each ≤120 chars; file paths
158
+ redacted."""
159
+ raw_examples = [
160
+ "Edited /Users/foo/project/src/main.py and broke the build for an hour",
161
+ "The settings.py change cascaded into a migration regression",
162
+ "Wrong approach in /tmp/bar/src/handler.go before we caught it on PR",
163
+ "Went down the wrong rabbit hole on test_runner.ts for 30 minutes",
164
+ "Misread the spec — the README.md said the opposite of what I assumed",
165
+ ]
166
+ for i, detail in enumerate(raw_examples):
167
+ _write_facet(
168
+ tmp_path,
169
+ f"s{i}",
170
+ _make_session(
171
+ friction={"misunderstood_request": 1},
172
+ friction_detail=detail,
173
+ session_id=f"s{i}",
174
+ ),
175
+ )
176
+
177
+ dets = aggregate_facets.aggregate(tmp_path, window_days=7, cap=8)
178
+ det = next(d for d in dets if d["id"] == "misunderstood-request")
179
+ assert len(det["examples"]) == 3
180
+ for ex in det["examples"]:
181
+ assert len(ex) <= 120
182
+ # File-path tokens redacted.
183
+ assert "/Users/foo/" not in ex
184
+ assert "/tmp/bar/" not in ex
185
+ # File-extension tokens redacted.
186
+ assert "settings.py" not in ex
187
+ assert "handler.go" not in ex
188
+ assert "test_runner.ts" not in ex
189
+ assert "README.md" not in ex
190
+
191
+
192
+ def test_window_filtering(tmp_path: Path) -> None:
193
+ """Facets older than the window are dropped."""
194
+ import os
195
+ import time as _time
196
+
197
+ # Stale: mtime 14d ago.
198
+ stale_payload = _make_session(friction={"misunderstood_request": 5}, session_id="stale")
199
+ p_stale = _write_facet(tmp_path, "stale", stale_payload)
200
+ stale_ts = _time.time() - 14 * 86400
201
+ os.utime(p_stale, (stale_ts, stale_ts))
202
+
203
+ # Fresh: today.
204
+ for i in range(3):
205
+ _write_facet(
206
+ tmp_path,
207
+ f"fresh{i}",
208
+ _make_session(friction={"buggy_code": 1}, session_id=f"fresh{i}"),
209
+ )
210
+
211
+ dets = aggregate_facets.aggregate(tmp_path, window_days=7, cap=8)
212
+ ids = [d["id"] for d in dets]
213
+ # Stale not counted: only 3 sessions in window, all with buggy_code → emit it.
214
+ assert "buggy-code" in ids
215
+ # Stale's misunderstood_request should NOT emit.
216
+ assert "misunderstood-request" not in ids
217
+ det = next(d for d in dets if d["id"] == "buggy-code")
218
+ assert det["n_sessions"] == 3
219
+
220
+
221
+ def test_missing_facets_dir_returns_empty(tmp_path: Path) -> None:
222
+ """Nonexistent facets dir → empty list, no crash."""
223
+ nonexistent = tmp_path / "nope"
224
+ dets = aggregate_facets.aggregate(nonexistent, window_days=7, cap=8)
225
+ assert dets == []
226
+
227
+
228
+ def test_empty_facets_dir_returns_empty(tmp_path: Path) -> None:
229
+ """Existing but empty facets dir → empty list."""
230
+ dets = aggregate_facets.aggregate(tmp_path, window_days=7, cap=8)
231
+ assert dets == []
232
+
233
+
234
+ def test_malformed_json_skipped(tmp_path: Path) -> None:
235
+ """Malformed JSON files are skipped silently."""
236
+ (tmp_path / "broken.json").write_text("not valid json {{{")
237
+ for i in range(3):
238
+ _write_facet(
239
+ tmp_path,
240
+ f"s{i}",
241
+ _make_session(friction={"buggy_code": 1}, session_id=f"s{i}"),
242
+ )
243
+
244
+ dets = aggregate_facets.aggregate(tmp_path, window_days=7, cap=8)
245
+ assert "buggy-code" in [d["id"] for d in dets]
246
+
247
+
248
+ def test_cap_enforced(tmp_path: Path) -> None:
249
+ """cap=2 limits output to 2 detections, highest ratio first."""
250
+ keys = ["misunderstood_request", "wrong_approach", "buggy_code", "edge_case"]
251
+ # All four hit 100%.
252
+ for i in range(5):
253
+ _write_facet(
254
+ tmp_path,
255
+ f"s{i}",
256
+ _make_session(
257
+ friction={k: 1 for k in keys},
258
+ session_id=f"s{i}",
259
+ ),
260
+ )
261
+
262
+ dets = aggregate_facets.aggregate(tmp_path, window_days=7, cap=2)
263
+ assert len(dets) == 2
264
+
265
+
266
+ def test_schema_shape_matches_merge_input(tmp_path: Path) -> None:
267
+ """Detection objects must carry the fields merge.py reads."""
268
+ for i in range(3):
269
+ _write_facet(
270
+ tmp_path,
271
+ f"s{i}",
272
+ _make_session(
273
+ friction={"misunderstood_request": 1},
274
+ friction_detail=f"detail {i}",
275
+ session_id=f"s{i}",
276
+ ),
277
+ )
278
+
279
+ dets = aggregate_facets.aggregate(tmp_path, window_days=7, cap=8)
280
+ assert dets, "expected at least one detection"
281
+ for d in dets:
282
+ assert "id" in d and isinstance(d["id"], str) and d["id"]
283
+ assert d["direction"] in ("positive", "negative")
284
+ assert "name" in d
285
+ assert "nudge" in d
286
+ assert "examples" in d and isinstance(d["examples"], list)
287
+ assert d.get("source") == "insights-weekly"
288
+
289
+
290
+ def test_zero_count_friction_not_emitted(tmp_path: Path) -> None:
291
+ """friction_counts entries with count=0 are not treated as present."""
292
+ for i in range(5):
293
+ _write_facet(
294
+ tmp_path,
295
+ f"s{i}",
296
+ _make_session(friction={"misunderstood_request": 0}, session_id=f"s{i}"),
297
+ )
298
+
299
+ dets = aggregate_facets.aggregate(tmp_path, window_days=7, cap=8)
300
+ assert "misunderstood-request" not in [d["id"] for d in dets]
301
+
302
+
303
+ # --- CLI-level evidence gate (v0.5.1 P1 #1b) -------------------------------
304
+ # `aggregate()` continues to return [] for empty/missing dirs (those tests
305
+ # above stay green). The CLI `main()` adds an "evidence gate": if
306
+ # n_sessions == 0 in the requested window, exit 3 (EXIT_NO_EVIDENCE) and
307
+ # print no JSON to stdout. The wrapper translates that to its own exit 7.
308
+ # Reasoning: empty detections WITH n_sessions > 0 is valid (clean week,
309
+ # merges normally); empty detections WITH n_sessions == 0 is no evidence
310
+ # and must NOT advance absence-based streaks.
311
+
312
+ SCRIPT = Path(__file__).resolve().parent.parent / "bin" / "aggregate_facets.py"
313
+
314
+
315
+ def test_no_sessions_in_window_returns_3(tmp_path: Path) -> None:
316
+ """Empty facets dir → CLI exits 3, prints no JSON to stdout, prints
317
+ a clear stderr message naming the window. Pinned by
318
+ aggregate_facets.EXIT_NO_EVIDENCE."""
319
+ empty = tmp_path / "empty-facets"
320
+ empty.mkdir()
321
+
322
+ result = subprocess.run(
323
+ [sys.executable, str(SCRIPT), "--facets-dir", str(empty), "--window-days", "7"],
324
+ capture_output=True,
325
+ text=True,
326
+ )
327
+
328
+ assert result.returncode == aggregate_facets.EXIT_NO_EVIDENCE == 3, (
329
+ f"expected exit 3, got {result.returncode}\nstderr={result.stderr}"
330
+ )
331
+ assert "no sessions in last 7 days" in result.stderr
332
+ assert "refusing to emit detections" in result.stderr
333
+ # Stdout MUST be empty so a caller piping stdout into merge gets a
334
+ # parse error instead of a silent `[]` merge.
335
+ assert result.stdout.strip() == "", (
336
+ f"stdout should be empty when bailing on no-evidence: {result.stdout!r}"
337
+ )
338
+
339
+
340
+ def test_nonexistent_facets_dir_cli_returns_3(tmp_path: Path) -> None:
341
+ """Same gate fires when --facets-dir doesn't exist (the function
342
+ returns []; the CLI catches it via the n_sessions recount)."""
343
+ nonexistent = tmp_path / "does-not-exist"
344
+
345
+ result = subprocess.run(
346
+ [sys.executable, str(SCRIPT), "--facets-dir", str(nonexistent)],
347
+ capture_output=True,
348
+ text=True,
349
+ )
350
+
351
+ assert result.returncode == 3
352
+ assert "no sessions" in result.stderr
353
+ assert result.stdout.strip() == ""
354
+
355
+
356
+ def test_session_with_no_detections_still_exits_0(tmp_path: Path) -> None:
357
+ """The gate fires on n_sessions==0, NOT on detections==0. A clean
358
+ session (no friction, no primary_success) with n_sessions=1 emits
359
+ no detections but is a legitimate clean signal and must merge as
360
+ `[]` — exit 0, NOT exit 3. This pins the asymmetry from
361
+ `_session_with_no_detections_still_exits_0` vs the no-evidence
362
+ gate."""
363
+ _write_facet(tmp_path, "s0", _make_session(session_id="s0"))
364
+
365
+ result = subprocess.run(
366
+ [sys.executable, str(SCRIPT), "--facets-dir", str(tmp_path)],
367
+ capture_output=True,
368
+ text=True,
369
+ )
370
+
371
+ assert result.returncode == 0, (
372
+ f"single-session-no-friction should exit 0, got {result.returncode}\n"
373
+ f"stderr={result.stderr}"
374
+ )
375
+ assert "n_sessions=1" in result.stderr
376
+ assert "detections=0" in result.stderr
377
+ # Empty detections list, NOT empty stdout — wrapper merges this as
378
+ # a clean week.
379
+ assert json.loads(result.stdout) == []
@@ -0,0 +1,153 @@
1
+ """analyze.py aggregator: per-project skill invocation breakdown.
2
+
3
+ Locks in the data shape that flows through insights.sh → merge.py and
4
+ ultimately becomes the rolling accumulator the inventory inference
5
+ reads. Pre-2026-04-24 the aggregator collapsed all projects into a
6
+ flat `skills_used` Counter and lost the project association — this
7
+ file guards that the new `skills_by_project` field stays correct
8
+ across the canonical cases.
9
+ """
10
+ from __future__ import annotations
11
+
12
+ import analyze
13
+
14
+
15
+ # --- _project_name_from_slug -----------------------------------------------
16
+
17
+ def test_project_name_from_simple_slug():
18
+ """The common case: `~/Desktop/dev/widget` becomes the slug
19
+ `-Users-alice-Desktop-dev-widget`. Last segment wins."""
20
+ assert analyze._project_name_from_slug(
21
+ "-Users-alice-Desktop-dev-widget") == "widget"
22
+
23
+
24
+ def test_project_name_from_hyphenated_slug_collapses_to_last_segment():
25
+ """Documented limitation: hyphens in original project names
26
+ collide with the slash-to-dash separator. `acme-app` becomes
27
+ `app`. The hook tokenizer compensates by splitting cwd anchors
28
+ on dashes too, so the partial still matches at filter time."""
29
+ assert analyze._project_name_from_slug(
30
+ "-Users-r-Desktop-dev-acme-app") == "app"
31
+
32
+
33
+ def test_project_name_from_empty_or_garbage():
34
+ assert analyze._project_name_from_slug("") == ""
35
+ assert analyze._project_name_from_slug(None or "") == ""
36
+ # Trailing dashes stripped before split.
37
+ assert analyze._project_name_from_slug("-Users-x-foo--") == "foo"
38
+
39
+
40
+ def test_project_name_lowercases():
41
+ """Anchor-token comparison is lowercase on the hook side; emit
42
+ lowercase at source so they line up without the consumer needing
43
+ to re-normalize."""
44
+ assert analyze._project_name_from_slug(
45
+ "-Users-r-Desktop-dev-MyProject") == "myproject"
46
+
47
+
48
+ # --- aggregate(): skills_by_project shape ----------------------------------
49
+
50
+ def _make_session(*, project: str, skills: dict[str, int],
51
+ assistant_turns: int = 5) -> dict:
52
+ """Minimal session shape sufficient for aggregate() to consume.
53
+ aggregate() only reads project, skills_invoked, and a few fields
54
+ from the detection branches; we provide the per-skill counts and
55
+ fill the rest with neutral defaults."""
56
+ return {
57
+ "project": project,
58
+ "skills_invoked": dict(skills),
59
+ "session_hash": "abcd1234",
60
+ "tool_counts": {},
61
+ "user_turns": 1,
62
+ "assistant_turns": assistant_turns,
63
+ "first_ts": None,
64
+ "last_ts": None,
65
+ "first_user_ts": None,
66
+ "first_edit_ts": None,
67
+ "first_plan_ts": None,
68
+ "task_create_count": 0,
69
+ "exit_plan_count": 0,
70
+ "edit_count": 0,
71
+ "write_count": 0,
72
+ "bash_count": 0,
73
+ "commit_count": 0,
74
+ "test_run_count": 0,
75
+ "has_any_commit": False,
76
+ "has_any_test_run": False,
77
+ "bash_rm_rf_count": 0,
78
+ "read_count": 0,
79
+ "grep_count": 0,
80
+ "glob_count": 0,
81
+ "agent_count": 0,
82
+ "skill_count": sum(skills.values()),
83
+ "sec_first_user_to_first_edit": None,
84
+ "plan_before_edit": False,
85
+ }
86
+
87
+
88
+ def test_aggregate_emits_skills_by_project():
89
+ """The new emit. Single session, single project, single skill —
90
+ smallest case that proves the pipe is open."""
91
+ sessions = [_make_session(
92
+ project="-Users-r-Desktop-dev-service",
93
+ skills={"deploy-staging": 3})]
94
+ _detections, summary = analyze.aggregate(sessions)
95
+ assert summary["skills_by_project"] == {"service": {"deploy-staging": 3}}
96
+
97
+
98
+ def test_aggregate_sums_across_sessions_in_same_project():
99
+ sessions = [
100
+ _make_session(project="-Users-r-Desktop-dev-service",
101
+ skills={"deploy-staging": 2}),
102
+ _make_session(project="-Users-r-Desktop-dev-service",
103
+ skills={"deploy-staging": 1, "design": 1}),
104
+ ]
105
+ _detections, summary = analyze.aggregate(sessions)
106
+ assert summary["skills_by_project"]["service"]["deploy-staging"] == 3
107
+ assert summary["skills_by_project"]["service"]["design"] == 1
108
+
109
+
110
+ def test_aggregate_separates_distinct_projects():
111
+ sessions = [
112
+ _make_session(project="-Users-r-Desktop-dev-service",
113
+ skills={"deploy-staging": 2}),
114
+ _make_session(project="-Users-r-Desktop-dev-widget",
115
+ skills={"widget-build": 4}),
116
+ ]
117
+ _detections, summary = analyze.aggregate(sessions)
118
+ sbp = summary["skills_by_project"]
119
+ assert sbp["service"] == {"deploy-staging": 2}
120
+ assert sbp["widget"] == {"widget-build": 4}
121
+
122
+
123
+ def test_aggregate_skips_sessions_with_no_project():
124
+ """If a session somehow lacks a project (corrupted state, edge
125
+ case from a transcript without a parent dir), it must not crash
126
+ or attribute its invocations to an empty-string project key."""
127
+ sessions = [
128
+ _make_session(project="", skills={"deploy-staging": 1}),
129
+ _make_session(project="-Users-r-Desktop-dev-service",
130
+ skills={"deploy-staging": 1}),
131
+ ]
132
+ _detections, summary = analyze.aggregate(sessions)
133
+ assert "" not in summary["skills_by_project"]
134
+ assert summary["skills_by_project"] == {"service": {"deploy-staging": 1}}
135
+
136
+
137
+ def test_aggregate_keeps_skills_used_in_sync_with_skills_by_project():
138
+ """The flat skills_used Counter and the per-project breakdown are
139
+ derived from the same source. Their totals must match — drift
140
+ here would be a sign of a bookkeeping error in aggregate()."""
141
+ sessions = [
142
+ _make_session(project="-Users-r-Desktop-dev-service",
143
+ skills={"deploy-staging": 2, "design": 1}),
144
+ _make_session(project="-Users-r-Desktop-dev-widget",
145
+ skills={"design": 3}),
146
+ ]
147
+ _detections, summary = analyze.aggregate(sessions)
148
+ flat_total = summary["skills_used"]
149
+ by_proj_totals: dict[str, int] = {}
150
+ for proj_skills in summary["skills_by_project"].values():
151
+ for sid, count in proj_skills.items():
152
+ by_proj_totals[sid] = by_proj_totals.get(sid, 0) + count
153
+ assert flat_total == by_proj_totals
@@ -0,0 +1,105 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from pathlib import Path
5
+
6
+ import analyze
7
+ import redact
8
+
9
+
10
+ def _assistant_record(*, command: str = "") -> dict:
11
+ content = []
12
+ if command:
13
+ content.append({
14
+ "type": "tool_use",
15
+ "name": "Bash",
16
+ "input": {"command": command},
17
+ })
18
+ return {
19
+ "type": "assistant",
20
+ "message": {"role": "assistant", "content": content},
21
+ }
22
+
23
+
24
+ def test_analyze_redacts_each_record_before_json_parsing(tmp_path, monkeypatch):
25
+ secret = "sk-" + ("A" * 40)
26
+ transcript = tmp_path / "session.jsonl"
27
+ transcript.write_text(
28
+ json.dumps(_assistant_record(command=f"echo {secret}; pytest")) + "\n"
29
+ )
30
+
31
+ real_loads = analyze.json.loads
32
+ parsed_inputs: list[str] = []
33
+
34
+ def loads_spy(text, *args, **kwargs):
35
+ parsed_inputs.append(text)
36
+ assert secret not in text
37
+ return real_loads(text, *args, **kwargs)
38
+
39
+ monkeypatch.setattr(analyze.json, "loads", loads_spy)
40
+
41
+ sig = analyze.analyze_session(transcript)
42
+
43
+ assert sig is not None
44
+ assert sig["test_run_count"] == 1
45
+ assert parsed_inputs
46
+ assert "[REDACTED:openai-key]" in parsed_inputs[0]
47
+
48
+
49
+ def test_analyze_streams_transcript_without_reading_whole_file(tmp_path, monkeypatch):
50
+ transcript = tmp_path / "large.jsonl"
51
+ transcript.write_text(
52
+ "".join(json.dumps(_assistant_record()) + "\n" for _ in range(5000))
53
+ )
54
+
55
+ def fail_read_text(*args, **kwargs):
56
+ raise AssertionError("analyze_session must not read whole transcripts")
57
+
58
+ monkeypatch.setattr(Path, "read_text", fail_read_text)
59
+
60
+ sig = analyze.analyze_session(transcript)
61
+
62
+ assert sig is not None
63
+ assert sig["assistant_turns"] == 5000
64
+
65
+
66
+ # --- redact.py pattern coverage -------------------------------------------
67
+ # Each test passes a bare token in prose context (not a `KEY=value`
68
+ # assignment) so we know the token-shape pattern itself catches it, not the
69
+ # `.env`-style fallback.
70
+
71
+
72
+ def test_redact_stripe_live_key():
73
+ text = "we use sk_live_" + "a1B2c3D4e5F6g7H8i9J0k1L2" + " for prod"
74
+ out = redact.redact(text)
75
+ assert "sk_live_" not in out
76
+ assert "[REDACTED:stripe-live-key]" in out
77
+
78
+
79
+ def test_redact_stripe_test_key():
80
+ text = "test creds: sk_test_" + ("A" * 30)
81
+ out = redact.redact(text)
82
+ assert "sk_test_" not in out
83
+ assert "[REDACTED:stripe-test-key]" in out
84
+
85
+
86
+ def test_redact_huggingface_token():
87
+ text = "use hf_" + ("a" * 35) + " to download"
88
+ out = redact.redact(text)
89
+ assert "hf_a" not in out
90
+ assert "[REDACTED:huggingface-token]" in out
91
+
92
+
93
+ def test_redact_npm_publish_token():
94
+ text = "npm_" + ("X" * 36) + " is the publish token"
95
+ out = redact.redact(text)
96
+ assert "npm_X" not in out
97
+ assert "[REDACTED:npm-token]" in out
98
+
99
+
100
+ def test_redact_does_not_collapse_short_lookalikes():
101
+ """Don't redact short fragments that happen to start with these
102
+ prefixes — minimum length thresholds matter."""
103
+ text = "sk_live_short hf_short npm_short"
104
+ out = redact.redact(text)
105
+ assert "REDACTED" not in out