daimon-briefing 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,406 @@
1
+ """Env-driven configuration. DAIMON_* takes precedence; LLM vars fall back to LITELLM_*.
2
+
3
+ Each variable resolves process env first, then `~/.daimon/env` (override the
4
+ file location with DAIMON_ENV_FILE). The file exists because hooks run in
5
+ whatever environment the host process happened to inherit — a GUI-launched
6
+ Claude Code has no shell profile, so shell exports are not a reliable channel.
7
+ File format: KEY=VALUE lines; `export ` prefix, surrounding quotes, blank
8
+ lines, and `#` comments are tolerated. Keep it chmod 600 — it holds API keys.
9
+ """
10
+
11
+ import getpass
12
+ import os
13
+ import subprocess
14
+ from pathlib import Path
15
+
16
+
17
+ def _env_file_path() -> Path:
18
+ raw = os.environ.get("DAIMON_ENV_FILE")
19
+ return Path(raw).expanduser() if raw else Path.home() / ".daimon" / "env"
20
+
21
+
22
+ def _file_values() -> dict:
23
+ """Parse the env file. Re-read per call — processes are short-lived and a
24
+ cache would leak between tests; the file is a handful of lines."""
25
+ path = _env_file_path()
26
+ try:
27
+ text = path.read_text(encoding="utf-8")
28
+ except OSError:
29
+ return {}
30
+ values = {}
31
+ for line in text.splitlines():
32
+ line = line.strip()
33
+ if not line or line.startswith("#") or "=" not in line:
34
+ continue
35
+ if line.startswith("export "):
36
+ line = line[len("export "):].lstrip()
37
+ key, _, val = line.partition("=")
38
+ key, val = key.strip(), val.strip()
39
+ if len(val) >= 2 and val[0] == val[-1] and val[0] in ("'", '"'):
40
+ val = val[1:-1]
41
+ if key:
42
+ values[key] = val
43
+ return values
44
+
45
+
46
+ def _get(name: str) -> str | None:
47
+ """One variable: process env wins; env file is the fallback."""
48
+ val = os.environ.get(name)
49
+ if val is not None:
50
+ return val
51
+ return _file_values().get(name)
52
+
53
+
54
+ def _flag(name: str) -> bool:
55
+ return (_get(name) or "").strip() in ("1", "true", "yes", "on")
56
+
57
+
58
+ def is_disabled() -> bool:
59
+ """Kill switch — when set, all hooks become no-ops."""
60
+ return _flag("DAIMON_DISABLE")
61
+
62
+
63
+ def checkpoint_dir() -> Path:
64
+ raw = _get("DAIMON_CHECKPOINT_DIR")
65
+ if raw:
66
+ return Path(raw).expanduser()
67
+ return Path.home() / ".daimon" / "checkpoints"
68
+
69
+
70
+ def checkpoint_history() -> int:
71
+ """How many checkpoint pointers to retain per directory: latest.json plus
72
+ prev-1.json .. prev-(N-1).json. Default 3; 1 disables history (latest only).
73
+ Feeds #26 self-healing: a failed serialize can fall back to a prev pointer."""
74
+ try:
75
+ return max(1, int(_get("DAIMON_CHECKPOINT_HISTORY") or "3"))
76
+ except ValueError:
77
+ return 3
78
+
79
+
80
+ def carry_enabled() -> bool:
81
+ """Deterministic cross-session carry (#33 Phase 2). Default ON — it fixes a
82
+ measured defect (multicycle run-01: whole-item loss under LLM-mediated
83
+ carry). DAIMON_CARRY=0 is the kill switch."""
84
+ return (_get("DAIMON_CARRY") or "1") != "0"
85
+
86
+
87
+ def carry_floor() -> float:
88
+ """Minimum #78 effective weight for a carried item to keep carrying.
89
+ Default 0.05: decisions expire ~5-6 weeks (importance-graded), escalated
90
+ open questions live ~3-4 months — calibrated against scoring.TYPE_RULES."""
91
+ try:
92
+ return float(_get("DAIMON_CARRY_FLOOR") or "0.05")
93
+ except ValueError:
94
+ return 0.05
95
+
96
+
97
+ def carry_max() -> int:
98
+ """Cap on CARRIED items per kind (native items never count or drop)."""
99
+ try:
100
+ return max(1, int(_get("DAIMON_CARRY_MAX") or "8"))
101
+ except ValueError:
102
+ return 8
103
+
104
+
105
+ def checkpoint_keep() -> int:
106
+ """How many per-session checkpoint files (<session_id>.json) to retain in the
107
+ flat store dir. Newest-N by the #93 `created` stamp (file mtime fallback);
108
+ older files are GC'd opportunistically after a successful write. Default 100;
109
+ 0 disables GC entirely (keep forever). Deliberately generous so #33's merged
110
+ checkpoint history keeps a deep well of per-session files to reconstruct from."""
111
+ try:
112
+ return max(0, int(_get("DAIMON_CHECKPOINT_KEEP") or "100"))
113
+ except ValueError:
114
+ return 100
115
+
116
+
117
+ def max_briefing_decisions() -> int:
118
+ """Cap on decisions shown in the briefing (render-time view). Default 10; 0 =
119
+ unbounded. The checkpoint keeps ALL decisions — this bounds only the injected
120
+ briefing, whose sole unbounded-growth axis is the decisions list."""
121
+ try:
122
+ return max(0, int(_get("DAIMON_MAX_BRIEFING_DECISIONS") or "10"))
123
+ except ValueError:
124
+ return 10
125
+
126
+
127
+ # ---- team memory (#111): opt-in shared mirror + author identity ----
128
+
129
+
130
+ def team_enabled() -> bool:
131
+ """Opt-in (DAIMON_TEAM=1, default OFF): mirror each checkpoint into the shared
132
+ team dir so `brief --team` can surface teammates. Gates WRITES only — reads of
133
+ the team dir are always allowed."""
134
+ return _flag("DAIMON_TEAM")
135
+
136
+
137
+ def team_dir() -> Path:
138
+ """Root of the shared team-memory mirror. Sibling of the checkpoint dir under
139
+ ~/.daimon by default; DAIMON_TEAM_DIR overrides (tests point it under tmp so no
140
+ test can touch the developer's real ~/.daimon/team)."""
141
+ raw = _get("DAIMON_TEAM_DIR")
142
+ if raw:
143
+ return Path(raw).expanduser()
144
+ return Path.home() / ".daimon" / "team"
145
+
146
+
147
+ def recall_db() -> Path:
148
+ """Location of the derived recall index (#112). NEVER source of truth —
149
+ safe to delete at any time; recall rebuilds it by scanning the local flat
150
+ store + team dir. Lives BESIDE the checkpoint dir under ~/.daimon, not
151
+ inside it: the flat store's GC / pointer scans own that namespace, and a
152
+ foreign file there is one landmine nobody needs. DAIMON_RECALL_DB overrides
153
+ (tests point it under tmp so no test can clobber the real index)."""
154
+ raw = _get("DAIMON_RECALL_DB")
155
+ if raw:
156
+ return Path(raw).expanduser()
157
+ return Path.home() / ".daimon" / "recall.db"
158
+
159
+
160
+ def brief_max_tokens() -> int:
161
+ """Token budget for the injected plain briefing (#79), estimated at
162
+ len(text)//4 — no tokenizer dependency. 0 = unbounded. Default 3000: a
163
+ briefing that eats a fifth of a small context window stops being a briefing.
164
+ DAIMON_BRIEF_MAX_TOKENS overrides."""
165
+ raw = _get("DAIMON_BRIEF_MAX_TOKENS")
166
+ try:
167
+ n = int(raw) if raw is not None else 3000
168
+ except ValueError:
169
+ return 3000
170
+ return max(0, n)
171
+
172
+
173
+ def recall_seen_dir() -> Path:
174
+ """Per-session suggestion-cooldown state for recall-inject (#125): one small
175
+ JSON per session listing the checkpoints already suggested, so a repeated
176
+ topic never re-injects. Disposable like the recall db — deleting it only
177
+ resets cooldowns. DAIMON_RECALL_SEEN_DIR overrides (tests -> tmp)."""
178
+ raw = _get("DAIMON_RECALL_SEEN_DIR")
179
+ if raw:
180
+ return Path(raw).expanduser()
181
+ return Path.home() / ".daimon" / "recall_seen"
182
+
183
+
184
+ def team_retention_days() -> int:
185
+ """Read-time age window for teammates' checkpoints (#113): read_team skips
186
+ files older than this many days. 0 = keep all. Default 365 — deliberately
187
+ generous; retention NEVER physically deletes from the shared append-only
188
+ branch (deletes race appends, the spike verdict)."""
189
+ try:
190
+ return max(0, int(_get("DAIMON_TEAM_RETENTION_DAYS") or "365"))
191
+ except ValueError:
192
+ return 365
193
+
194
+
195
+ def _git_user_name() -> str:
196
+ """`git config user.name` in the current dir, or "" on ANY failure (not a repo,
197
+ git missing, timeout, unset). Same subprocess style as resolve_project_root —
198
+ the git dependency lives HERE in the policy layer, never in store (pure file-ops)."""
199
+ try:
200
+ result = subprocess.run(
201
+ ["git", "config", "user.name"],
202
+ capture_output=True,
203
+ text=True,
204
+ timeout=2,
205
+ )
206
+ except (FileNotFoundError, subprocess.TimeoutExpired, OSError):
207
+ return ""
208
+ if result.returncode != 0:
209
+ return ""
210
+ return result.stdout.strip()
211
+
212
+
213
+ def author() -> str:
214
+ """Team author identity for namespacing: DAIMON_AUTHOR env → `git config
215
+ user.name` → getpass.getuser(), falling to "unknown" if all fail. Never raises.
216
+
217
+ Not cached: a checkpoint write happens once per session-end, so the single git
218
+ call per write is negligible, and a process-level cache would only leak stale
219
+ identity between tests."""
220
+ name = (_get("DAIMON_AUTHOR") or "").strip()
221
+ if not name:
222
+ name = _git_user_name()
223
+ if not name:
224
+ try:
225
+ name = getpass.getuser()
226
+ except Exception:
227
+ name = ""
228
+ return name or "unknown"
229
+
230
+
231
+ def log_dir() -> Path:
232
+ """Where the session-end hook writes serialize.log. The hook hardcodes
233
+ ~/.daimon/logs; this override exists so the CLI (and tests) can point
234
+ `status` somewhere else."""
235
+ raw = _get("DAIMON_LOG_DIR")
236
+ if raw:
237
+ return Path(raw).expanduser()
238
+ return Path.home() / ".daimon" / "logs"
239
+
240
+
241
+ def project_dir() -> str | None:
242
+ """Working directory of the session being briefed/serialized (per-project
243
+ checkpoint routing). Hooks pass the host payload's cwd through this var;
244
+ unset = project unknown = pre-routing behavior."""
245
+ return _get("DAIMON_PROJECT_DIR") or None
246
+
247
+
248
+ def resolve_project_root(raw: str | None) -> str | None:
249
+ """Normalize a project dir to its git toplevel so a subdir session maps to the
250
+ ONE repo bucket (#74).
251
+
252
+ Checkpoint identity is keyed on the (slugged) project dir. A session run from a
253
+ subdirectory of a repo — e.g. `daimon/plugin/`, which is not its own git repo —
254
+ would otherwise slug to a different bucket than the repo root and fork a separate
255
+ checkpoint history. Resolving to `git rev-parse --show-toplevel` at ingress keeps
256
+ every session in the repo pointing at the same bucket.
257
+
258
+ This lives in config (the resolution/policy layer) on purpose: store.py stays
259
+ pure file-ops with no git/subprocess dependency.
260
+
261
+ Falsy `raw` passes through unchanged (None must keep falling back to the global
262
+ pointer — an unknown project is not invented into a dir). On ANY git failure —
263
+ not a repo, git binary missing, timeout, OS error, dir gone — `raw` is returned
264
+ UNCHANGED, preserving exact pre-normalization behavior. Never raises.
265
+ """
266
+ if not raw:
267
+ return raw
268
+ try:
269
+ result = subprocess.run(
270
+ ["git", "-C", raw, "rev-parse", "--show-toplevel"],
271
+ capture_output=True,
272
+ text=True,
273
+ timeout=2,
274
+ )
275
+ except (FileNotFoundError, subprocess.TimeoutExpired, OSError):
276
+ return raw
277
+ if result.returncode != 0:
278
+ return raw
279
+ top = result.stdout.strip()
280
+ return top or raw
281
+
282
+
283
+ def min_messages() -> int:
284
+ try:
285
+ return int(_get("DAIMON_MIN_MESSAGES") or "10")
286
+ except ValueError:
287
+ return 10
288
+
289
+
290
+ def timeout_seconds() -> int:
291
+ try:
292
+ return int(_get("DAIMON_TIMEOUT") or "120")
293
+ except ValueError:
294
+ return 120
295
+
296
+
297
+ def hung_after_seconds() -> int:
298
+ """Age (seconds) past which a serialize spawn with NO result line is treated
299
+ as hung/killed rather than still-running. Serialize runs 4-25 min in
300
+ production, so the default (1800 = 30 min) sits safely beyond a slow run.
301
+ Override with DAIMON_HUNG_AFTER."""
302
+ try:
303
+ return int(_get("DAIMON_HUNG_AFTER") or "1800")
304
+ except ValueError:
305
+ return 1800
306
+
307
+
308
+ def chunk_lines() -> int:
309
+ """Rendered-transcript line count above which serialization goes chunked
310
+ (armC). 1200 matches the recall cliff measured in the D-007 probe."""
311
+ try:
312
+ return int(_get("DAIMON_CHUNK_LINES") or "1200")
313
+ except ValueError:
314
+ return 1200
315
+
316
+
317
+ def chunk_overlap() -> int:
318
+ try:
319
+ return int(_get("DAIMON_CHUNK_OVERLAP") or "100")
320
+ except ValueError:
321
+ return 100
322
+
323
+
324
+ def chunk_concurrency() -> int:
325
+ """Parallel chunk-serialize calls. Gateway calls are generation-bound
326
+ (~minutes each); sequential chunking makes long sessions unusable."""
327
+ try:
328
+ return max(1, int(_get("DAIMON_CHUNK_CONCURRENCY") or "4"))
329
+ except ValueError:
330
+ return 4
331
+
332
+
333
+ def merge_group_size() -> int:
334
+ """Max partials per hierarchical merge call. K=3 keeps every merge call at
335
+ the proven 3-chunk size from issue #28 where 6-chunk merges DNF at 900s."""
336
+ try:
337
+ return max(2, int(_get("DAIMON_MERGE_GROUP_SIZE") or "3"))
338
+ except ValueError:
339
+ return 3
340
+
341
+
342
+ def llm_briefing() -> bool:
343
+ """Opt-in: render the briefing via LLM instead of the deterministic template."""
344
+ return _flag("DAIMON_LLM_BRIEFING")
345
+
346
+
347
+ def scar_harvest_enabled() -> bool:
348
+ """Opt-in: draft scar candidates from the transcript at session-end (#76)."""
349
+ return _flag("DAIMON_SCAR_HARVEST")
350
+
351
+
352
+ def llm_no_cache() -> bool:
353
+ """Per-request bypass of gateway response caching (LiteLLM `no-cache`) —
354
+ needed when a cached bad response pins a failure or when runs must be
355
+ statistically independent."""
356
+ return _flag("DAIMON_LLM_NO_CACHE")
357
+
358
+
359
+ def llm_base_url() -> str:
360
+ return (
361
+ _get("DAIMON_LLM_BASE_URL")
362
+ or _get("LITELLM_BASE_URL")
363
+ or "http://localhost:4000"
364
+ ).rstrip("/")
365
+
366
+
367
+ def llm_api_key() -> str | None:
368
+ return _get("DAIMON_LLM_API_KEY") or _get("LITELLM_API_KEY")
369
+
370
+
371
+ def llm_model() -> str | None:
372
+ return _get("DAIMON_LLM_MODEL") or _get("LITELLM_MODEL")
373
+
374
+
375
+ def llm_temperature() -> float:
376
+ """Sampling temperature sent with every chat call. Default 0.0 for
377
+ deterministic extraction; some upstreams (e.g. kimi-k2.6) reject anything
378
+ but a fixed value — set this to whatever the model demands."""
379
+ try:
380
+ return float(_get("DAIMON_LLM_TEMPERATURE") or "0.0")
381
+ except ValueError:
382
+ return 0.0
383
+
384
+
385
+ def llm_backend() -> str:
386
+ """Which LLM transport: 'auto' (default — litellm if credentials exist,
387
+ else a command CLI if one resolves), 'litellm', 'command', or 'claude-cli'."""
388
+ return (_get("DAIMON_LLM_BACKEND") or "auto").strip()
389
+
390
+
391
+ def llm_fallback() -> bool:
392
+ """When the litellm backend fails, auto-fall-back to a command backend.
393
+ Default ON — this is the gateway-failure resilience. Set 0 to disable."""
394
+ return (_get("DAIMON_LLM_FALLBACK") or "1").strip() in ("1", "true", "yes", "on")
395
+
396
+
397
+ def llm_command() -> str | None:
398
+ """Full CLI invocation for the command backend (binary + model + flags).
399
+ The prompt is piped via stdin, never argv."""
400
+ return _get("DAIMON_LLM_COMMAND") or None
401
+
402
+
403
+ def llm_command_output() -> str | None:
404
+ """How to extract assistant text from the command's stdout:
405
+ 'text' (raw stdout) | 'json:<key>' (parse JSON, read <key>)."""
406
+ return _get("DAIMON_LLM_COMMAND_OUTPUT") or None
@@ -0,0 +1,81 @@
1
+ """Onboarding helper: detect the resolved LLM backend and fill config gaps by
2
+ writing ~/.daimon/env. Detection is NOT reimplemented — it reuses the real
3
+ resolver in llm.py so the doctor view can never disagree with what llm.chat()
4
+ would actually run (the single-source-of-truth requirement from #48).
5
+
6
+ Stdlib only, offline: no live LLM call is made here.
7
+ """
8
+
9
+ import os
10
+ import shutil
11
+ import tempfile
12
+ from pathlib import Path
13
+
14
+ from . import config, llm
15
+
16
+
17
+ def resolved_backend() -> str:
18
+ """The backend llm.chat() would actually use. Mirrors its `auto` branch
19
+ exactly — if these diverge, the doctor lies."""
20
+ setting = config.llm_backend()
21
+ if setting != "auto":
22
+ return setting
23
+ if config.llm_api_key():
24
+ return "litellm"
25
+ if llm._resolve_command() is not None:
26
+ return "command"
27
+ return "litellm" # let _chat_litellm raise the helpful no-key error
28
+
29
+
30
+ def status() -> dict:
31
+ """Detection snapshot for the doctor view. No LLM call."""
32
+ rb = resolved_backend()
33
+ cmd = llm._resolve_command() # (command_str, output_spec) | None
34
+ if rb in ("command", "claude-cli"):
35
+ ready = cmd is not None
36
+ else: # litellm needs BOTH key and model (matches the serialize pre-flight)
37
+ ready = bool(config.llm_api_key() and config.llm_model())
38
+ return {
39
+ "resolved_backend": rb,
40
+ "ready": ready,
41
+ "claude_on_path": shutil.which("claude") is not None,
42
+ "has_api_key": config.llm_api_key() is not None,
43
+ "has_model": config.llm_model() is not None,
44
+ "command": cmd[0] if cmd else None,
45
+ "command_source": (
46
+ "explicit" if config.llm_command()
47
+ else ("claude-cli" if cmd else None)
48
+ ),
49
+ "env_file": str(config._env_file_path()),
50
+ "env_file_exists": config._env_file_path().exists(),
51
+ }
52
+
53
+
54
+ def write_env(updates: dict) -> Path:
55
+ """Merge `updates` into ~/.daimon/env (DAIMON_ENV_FILE) and rewrite it as
56
+ sorted KEY=VALUE lines, preserving unrelated pre-existing keys.
57
+
58
+ The file is machine-managed: comments/order are NOT preserved (normalized).
59
+ Written atomically (temp + os.replace) and chmod 600 — it holds API keys.
60
+ Empty merge result -> no file is created (the claude zero-config case writes
61
+ nothing). Returns the target path either way.
62
+ """
63
+ path = config._env_file_path()
64
+ merged = {**config._file_values(), **updates}
65
+ if not merged:
66
+ return path # nothing to persist -> never create an empty file
67
+ path.parent.mkdir(parents=True, exist_ok=True)
68
+ body = "".join(f"{k}={merged[k]}\n" for k in sorted(merged))
69
+ fd, tmp = tempfile.mkstemp(dir=str(path.parent), prefix=".env-")
70
+ try:
71
+ with os.fdopen(fd, "w", encoding="utf-8") as f:
72
+ f.write(body)
73
+ os.replace(tmp, path)
74
+ except BaseException:
75
+ try:
76
+ os.unlink(tmp)
77
+ except OSError:
78
+ pass
79
+ raise
80
+ os.chmod(path, 0o600)
81
+ return path
@@ -0,0 +1,189 @@
1
+ """Zero-LLM scar-candidate harvester (#76). Session-end, opt-in, never breaks a session.
2
+
3
+ Scans the transcript for anchorable negative knowledge and drafts scar *candidates*
4
+ into <project_root>/.scars/candidates/. Path-anchored only (Fork 2): a hit with no
5
+ real file/dir path in its own span is dropped — precision over recall, because a
6
+ scar system dies from noise, not a missed lesson. Emits candidates only; a human
7
+ reviewer promotes. Pure stdlib.
8
+ """
9
+
10
+ import datetime
11
+ import json
12
+ import logging
13
+ import re
14
+ from pathlib import Path
15
+ from typing import NamedTuple
16
+
17
+ from . import transcript
18
+
19
+ log = logging.getLogger("daimon_briefing")
20
+
21
+
22
+ class Hit(NamedTuple):
23
+ kind: str
24
+ sentence: str
25
+ context: str
26
+ msg_index: int
27
+
28
+
29
+ _AVOID_RE = re.compile(
30
+ r"\b(avoid|don't|do not|never|gotcha|pitfall|footgun|broke|breaks|mistake|dead[ -]?end"
31
+ # Spanish band mirrors the English markers (#4). Bare "no" is far more
32
+ # frequent than "don't", so only specific imperative constructions fire —
33
+ # never plain negation ("no devuelve" stays silent).
34
+ r"|evit(?:a|á|ar|es|en)|nunca|jam[áa]s|trampa|romp(?:e|i[óo]|en)"
35
+ r"|callej[óo]n sin salida|punto muerto|no (?:hagas|toques|uses|llames))\b",
36
+ re.IGNORECASE,
37
+ )
38
+ _INTENT_RE = re.compile(
39
+ r"\b(on purpose|intentional(?:ly)?|deliberately|looks wrong but|must stay|keep this"
40
+ r"|a prop[óo]sito|intencional(?:mente)?|adrede|deliberadamente"
41
+ r"|parece (?:mal|incorrecto) pero|debe quedar(?:se)?)\b",
42
+ re.IGNORECASE,
43
+ )
44
+
45
+
46
+ def _split_sentences(text):
47
+ parts = re.split(r"(?<=[.!?])\s+|\n+", text)
48
+ return [p.strip() for p in parts if p.strip()]
49
+
50
+
51
+ def detect(messages: list[dict]) -> list[Hit]:
52
+ """Assistant-only marker scan. Returns Hits (no I/O, no anchoring yet)."""
53
+ hits: list[Hit] = []
54
+ for i, m in enumerate(messages):
55
+ if m.get("role") != "assistant":
56
+ continue
57
+ content = transcript._text_of(m.get("content"))
58
+ for s in _split_sentences(content):
59
+ if _INTENT_RE.search(s):
60
+ hits.append(Hit("intentional", s, content, i))
61
+ elif _AVOID_RE.search(s):
62
+ hits.append(Hit("avoidance", s, content, i))
63
+ return hits
64
+
65
+
66
+ # path-like token: a/b/c.ext (ext-whitelisted to keep prose out) OR a nested a/b/ dir.
67
+ _PATH_RE = re.compile(
68
+ r"([\w.\-/]+\.(?:py|md|js|ts|tsx|go|rs|json|ya?ml|toml|sh|txt|cfg|ini)"
69
+ r"|[\w.\-]+(?:/[\w.\-]+)+/?)"
70
+ )
71
+
72
+
73
+ def anchor_of(hit, project_root):
74
+ """First path token in the hit's sentence that exists INSIDE project_root.
75
+
76
+ Returns a repo-relative posix path str, or None → drop hit. Absolute tokens
77
+ and ``..`` traversal that escape the root are rejected: the resolved path must
78
+ stay under the resolved root. The existence + containment check is the
79
+ precision gate — garbled, hallucinated, or escaping paths vanish.
80
+ """
81
+ root = Path(project_root).resolve()
82
+ for m in _PATH_RE.finditer(hit.sentence):
83
+ cand = m.group(1).rstrip(":,.)")
84
+ if not cand or Path(cand).is_absolute():
85
+ continue
86
+ try:
87
+ resolved = (root / cand).resolve()
88
+ if resolved.is_relative_to(root) and resolved.exists():
89
+ return resolved.relative_to(root).as_posix()
90
+ except (OSError, ValueError):
91
+ continue
92
+ return None
93
+
94
+
95
+ _DEADEND_RE = re.compile(
96
+ r"\b(tried|attempted|turned out|didn't work|doesn't work|gave up)\b", re.IGNORECASE
97
+ )
98
+
99
+
100
+ def _scar_type(hit):
101
+ if hit.kind == "intentional":
102
+ return "fence"
103
+ return "deadend" if _DEADEND_RE.search(hit.sentence) else "landmine"
104
+
105
+
106
+ def _slug(title):
107
+ s = re.sub(r"[^a-z0-9]+", "-", title.lower()).strip("-")
108
+ return s[:60] or "harvested-scar"
109
+
110
+
111
+ def to_candidate(hit, anchor, session_id, today):
112
+ """Build (slug, markdown). Lint-valid frontmatter, single path-only anchor.
113
+
114
+ `title` is emitted via json.dumps → a valid double-quoted YAML scalar even when
115
+ the sentence contains ':' or quotes (the #1 hand-written-scar YAML footgun).
116
+ """
117
+ typ = _scar_type(hit)
118
+ title = " ".join(hit.sentence.split())[:80].rstrip()
119
+ slug = _slug(title)
120
+ review = (
121
+ datetime.date.fromisoformat(today) + datetime.timedelta(days=365)
122
+ ).isoformat()
123
+ md = (
124
+ "---\n"
125
+ "id: 0\n"
126
+ f"type: {typ}\n"
127
+ f"title: {json.dumps(title)}\n"
128
+ "severity: medium\n"
129
+ "confidence: 0.5\n"
130
+ f"created: {today}\n"
131
+ 'authors: ["daimon-harvest"]\n'
132
+ "anchors:\n"
133
+ f" - path: {anchor}\n"
134
+ "evidence:\n"
135
+ f" - note: {json.dumps('auto-harvested from session ' + session_id)}\n"
136
+ "expires:\n"
137
+ ' condition: "the referenced code is removed or the constraint no longer holds"\n'
138
+ f" review_after: {review}\n"
139
+ "status: candidate\n"
140
+ "---\n\n"
141
+ f"{hit.sentence.strip()}\n\n"
142
+ "Auto-harvested from the session transcript — a human must verify the claim "
143
+ "and confirm the anchor before promotion.\n"
144
+ )
145
+ return slug, md
146
+
147
+
148
+ _MAX_CANDIDATES = 5
149
+
150
+
151
+ def run(messages, project_root, session_id):
152
+ """detect -> anchor-gate -> candidate -> dedup + cap -> write. Returns count.
153
+
154
+ Skips silently when the repo hasn't opted into scars (no .scars/). Never
155
+ overwrites an existing candidate (a human may have edited it), so re-running
156
+ the same session writes nothing new.
157
+ """
158
+ if not project_root:
159
+ return 0
160
+ root = Path(project_root)
161
+ if not (root / ".scars").exists():
162
+ return 0
163
+ cand_dir = root / ".scars" / "candidates"
164
+ cand_dir.mkdir(parents=True, exist_ok=True)
165
+ today = datetime.date.today().isoformat()
166
+ written, dropped = 0, 0
167
+ seen = set()
168
+ for hit in detect(messages):
169
+ anchor = anchor_of(hit, project_root)
170
+ if anchor is None:
171
+ continue
172
+ slug, md = to_candidate(hit, anchor, session_id, today)
173
+ if slug in seen:
174
+ continue
175
+ seen.add(slug)
176
+ path = cand_dir / f"{slug}.md"
177
+ if path.exists():
178
+ continue
179
+ if written >= _MAX_CANDIDATES:
180
+ dropped += 1
181
+ continue
182
+ path.write_text(md, encoding="utf-8")
183
+ written += 1
184
+ if dropped:
185
+ log.info(
186
+ "daimon: scar harvest capped at %d, dropped %d candidate(s)",
187
+ _MAX_CANDIDATES, dropped,
188
+ )
189
+ return written