daimon-briefing 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- daimon_briefing/__init__.py +25 -0
- daimon_briefing/anchor.py +103 -0
- daimon_briefing/briefing.py +274 -0
- daimon_briefing/carry.py +97 -0
- daimon_briefing/cli.py +1119 -0
- daimon_briefing/config.py +406 -0
- daimon_briefing/configure.py +81 -0
- daimon_briefing/harvest.py +189 -0
- daimon_briefing/hooks.py +78 -0
- daimon_briefing/llm.py +239 -0
- daimon_briefing/recall.py +588 -0
- daimon_briefing/render.py +389 -0
- daimon_briefing/scoring.py +79 -0
- daimon_briefing/serializer.py +550 -0
- daimon_briefing/store.py +506 -0
- daimon_briefing/teamsync.py +484 -0
- daimon_briefing/transcript.py +258 -0
- daimon_briefing-0.3.0.dist-info/METADATA +161 -0
- daimon_briefing-0.3.0.dist-info/RECORD +21 -0
- daimon_briefing-0.3.0.dist-info/WHEEL +4 -0
- daimon_briefing-0.3.0.dist-info/entry_points.txt +6 -0
|
@@ -0,0 +1,406 @@
|
|
|
1
|
+
"""Env-driven configuration. DAIMON_* takes precedence; LLM vars fall back to LITELLM_*.
|
|
2
|
+
|
|
3
|
+
Each variable resolves process env first, then `~/.daimon/env` (override the
|
|
4
|
+
file location with DAIMON_ENV_FILE). The file exists because hooks run in
|
|
5
|
+
whatever environment the host process happened to inherit — a GUI-launched
|
|
6
|
+
Claude Code has no shell profile, so shell exports are not a reliable channel.
|
|
7
|
+
File format: KEY=VALUE lines; `export ` prefix, surrounding quotes, blank
|
|
8
|
+
lines, and `#` comments are tolerated. Keep it chmod 600 — it holds API keys.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import getpass
|
|
12
|
+
import os
|
|
13
|
+
import subprocess
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _env_file_path() -> Path:
|
|
18
|
+
raw = os.environ.get("DAIMON_ENV_FILE")
|
|
19
|
+
return Path(raw).expanduser() if raw else Path.home() / ".daimon" / "env"
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _file_values() -> dict:
|
|
23
|
+
"""Parse the env file. Re-read per call — processes are short-lived and a
|
|
24
|
+
cache would leak between tests; the file is a handful of lines."""
|
|
25
|
+
path = _env_file_path()
|
|
26
|
+
try:
|
|
27
|
+
text = path.read_text(encoding="utf-8")
|
|
28
|
+
except OSError:
|
|
29
|
+
return {}
|
|
30
|
+
values = {}
|
|
31
|
+
for line in text.splitlines():
|
|
32
|
+
line = line.strip()
|
|
33
|
+
if not line or line.startswith("#") or "=" not in line:
|
|
34
|
+
continue
|
|
35
|
+
if line.startswith("export "):
|
|
36
|
+
line = line[len("export "):].lstrip()
|
|
37
|
+
key, _, val = line.partition("=")
|
|
38
|
+
key, val = key.strip(), val.strip()
|
|
39
|
+
if len(val) >= 2 and val[0] == val[-1] and val[0] in ("'", '"'):
|
|
40
|
+
val = val[1:-1]
|
|
41
|
+
if key:
|
|
42
|
+
values[key] = val
|
|
43
|
+
return values
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _get(name: str) -> str | None:
|
|
47
|
+
"""One variable: process env wins; env file is the fallback."""
|
|
48
|
+
val = os.environ.get(name)
|
|
49
|
+
if val is not None:
|
|
50
|
+
return val
|
|
51
|
+
return _file_values().get(name)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _flag(name: str) -> bool:
|
|
55
|
+
return (_get(name) or "").strip() in ("1", "true", "yes", "on")
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def is_disabled() -> bool:
|
|
59
|
+
"""Kill switch — when set, all hooks become no-ops."""
|
|
60
|
+
return _flag("DAIMON_DISABLE")
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def checkpoint_dir() -> Path:
|
|
64
|
+
raw = _get("DAIMON_CHECKPOINT_DIR")
|
|
65
|
+
if raw:
|
|
66
|
+
return Path(raw).expanduser()
|
|
67
|
+
return Path.home() / ".daimon" / "checkpoints"
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def checkpoint_history() -> int:
|
|
71
|
+
"""How many checkpoint pointers to retain per directory: latest.json plus
|
|
72
|
+
prev-1.json .. prev-(N-1).json. Default 3; 1 disables history (latest only).
|
|
73
|
+
Feeds #26 self-healing: a failed serialize can fall back to a prev pointer."""
|
|
74
|
+
try:
|
|
75
|
+
return max(1, int(_get("DAIMON_CHECKPOINT_HISTORY") or "3"))
|
|
76
|
+
except ValueError:
|
|
77
|
+
return 3
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def carry_enabled() -> bool:
|
|
81
|
+
"""Deterministic cross-session carry (#33 Phase 2). Default ON — it fixes a
|
|
82
|
+
measured defect (multicycle run-01: whole-item loss under LLM-mediated
|
|
83
|
+
carry). DAIMON_CARRY=0 is the kill switch."""
|
|
84
|
+
return (_get("DAIMON_CARRY") or "1") != "0"
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def carry_floor() -> float:
|
|
88
|
+
"""Minimum #78 effective weight for a carried item to keep carrying.
|
|
89
|
+
Default 0.05: decisions expire ~5-6 weeks (importance-graded), escalated
|
|
90
|
+
open questions live ~3-4 months — calibrated against scoring.TYPE_RULES."""
|
|
91
|
+
try:
|
|
92
|
+
return float(_get("DAIMON_CARRY_FLOOR") or "0.05")
|
|
93
|
+
except ValueError:
|
|
94
|
+
return 0.05
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def carry_max() -> int:
|
|
98
|
+
"""Cap on CARRIED items per kind (native items never count or drop)."""
|
|
99
|
+
try:
|
|
100
|
+
return max(1, int(_get("DAIMON_CARRY_MAX") or "8"))
|
|
101
|
+
except ValueError:
|
|
102
|
+
return 8
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def checkpoint_keep() -> int:
|
|
106
|
+
"""How many per-session checkpoint files (<session_id>.json) to retain in the
|
|
107
|
+
flat store dir. Newest-N by the #93 `created` stamp (file mtime fallback);
|
|
108
|
+
older files are GC'd opportunistically after a successful write. Default 100;
|
|
109
|
+
0 disables GC entirely (keep forever). Deliberately generous so #33's merged
|
|
110
|
+
checkpoint history keeps a deep well of per-session files to reconstruct from."""
|
|
111
|
+
try:
|
|
112
|
+
return max(0, int(_get("DAIMON_CHECKPOINT_KEEP") or "100"))
|
|
113
|
+
except ValueError:
|
|
114
|
+
return 100
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def max_briefing_decisions() -> int:
|
|
118
|
+
"""Cap on decisions shown in the briefing (render-time view). Default 10; 0 =
|
|
119
|
+
unbounded. The checkpoint keeps ALL decisions — this bounds only the injected
|
|
120
|
+
briefing, whose sole unbounded-growth axis is the decisions list."""
|
|
121
|
+
try:
|
|
122
|
+
return max(0, int(_get("DAIMON_MAX_BRIEFING_DECISIONS") or "10"))
|
|
123
|
+
except ValueError:
|
|
124
|
+
return 10
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
# ---- team memory (#111): opt-in shared mirror + author identity ----
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def team_enabled() -> bool:
|
|
131
|
+
"""Opt-in (DAIMON_TEAM=1, default OFF): mirror each checkpoint into the shared
|
|
132
|
+
team dir so `brief --team` can surface teammates. Gates WRITES only — reads of
|
|
133
|
+
the team dir are always allowed."""
|
|
134
|
+
return _flag("DAIMON_TEAM")
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def team_dir() -> Path:
|
|
138
|
+
"""Root of the shared team-memory mirror. Sibling of the checkpoint dir under
|
|
139
|
+
~/.daimon by default; DAIMON_TEAM_DIR overrides (tests point it under tmp so no
|
|
140
|
+
test can touch the developer's real ~/.daimon/team)."""
|
|
141
|
+
raw = _get("DAIMON_TEAM_DIR")
|
|
142
|
+
if raw:
|
|
143
|
+
return Path(raw).expanduser()
|
|
144
|
+
return Path.home() / ".daimon" / "team"
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def recall_db() -> Path:
|
|
148
|
+
"""Location of the derived recall index (#112). NEVER source of truth —
|
|
149
|
+
safe to delete at any time; recall rebuilds it by scanning the local flat
|
|
150
|
+
store + team dir. Lives BESIDE the checkpoint dir under ~/.daimon, not
|
|
151
|
+
inside it: the flat store's GC / pointer scans own that namespace, and a
|
|
152
|
+
foreign file there is one landmine nobody needs. DAIMON_RECALL_DB overrides
|
|
153
|
+
(tests point it under tmp so no test can clobber the real index)."""
|
|
154
|
+
raw = _get("DAIMON_RECALL_DB")
|
|
155
|
+
if raw:
|
|
156
|
+
return Path(raw).expanduser()
|
|
157
|
+
return Path.home() / ".daimon" / "recall.db"
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def brief_max_tokens() -> int:
|
|
161
|
+
"""Token budget for the injected plain briefing (#79), estimated at
|
|
162
|
+
len(text)//4 — no tokenizer dependency. 0 = unbounded. Default 3000: a
|
|
163
|
+
briefing that eats a fifth of a small context window stops being a briefing.
|
|
164
|
+
DAIMON_BRIEF_MAX_TOKENS overrides."""
|
|
165
|
+
raw = _get("DAIMON_BRIEF_MAX_TOKENS")
|
|
166
|
+
try:
|
|
167
|
+
n = int(raw) if raw is not None else 3000
|
|
168
|
+
except ValueError:
|
|
169
|
+
return 3000
|
|
170
|
+
return max(0, n)
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def recall_seen_dir() -> Path:
|
|
174
|
+
"""Per-session suggestion-cooldown state for recall-inject (#125): one small
|
|
175
|
+
JSON per session listing the checkpoints already suggested, so a repeated
|
|
176
|
+
topic never re-injects. Disposable like the recall db — deleting it only
|
|
177
|
+
resets cooldowns. DAIMON_RECALL_SEEN_DIR overrides (tests -> tmp)."""
|
|
178
|
+
raw = _get("DAIMON_RECALL_SEEN_DIR")
|
|
179
|
+
if raw:
|
|
180
|
+
return Path(raw).expanduser()
|
|
181
|
+
return Path.home() / ".daimon" / "recall_seen"
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def team_retention_days() -> int:
|
|
185
|
+
"""Read-time age window for teammates' checkpoints (#113): read_team skips
|
|
186
|
+
files older than this many days. 0 = keep all. Default 365 — deliberately
|
|
187
|
+
generous; retention NEVER physically deletes from the shared append-only
|
|
188
|
+
branch (deletes race appends, the spike verdict)."""
|
|
189
|
+
try:
|
|
190
|
+
return max(0, int(_get("DAIMON_TEAM_RETENTION_DAYS") or "365"))
|
|
191
|
+
except ValueError:
|
|
192
|
+
return 365
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def _git_user_name() -> str:
|
|
196
|
+
"""`git config user.name` in the current dir, or "" on ANY failure (not a repo,
|
|
197
|
+
git missing, timeout, unset). Same subprocess style as resolve_project_root —
|
|
198
|
+
the git dependency lives HERE in the policy layer, never in store (pure file-ops)."""
|
|
199
|
+
try:
|
|
200
|
+
result = subprocess.run(
|
|
201
|
+
["git", "config", "user.name"],
|
|
202
|
+
capture_output=True,
|
|
203
|
+
text=True,
|
|
204
|
+
timeout=2,
|
|
205
|
+
)
|
|
206
|
+
except (FileNotFoundError, subprocess.TimeoutExpired, OSError):
|
|
207
|
+
return ""
|
|
208
|
+
if result.returncode != 0:
|
|
209
|
+
return ""
|
|
210
|
+
return result.stdout.strip()
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def author() -> str:
|
|
214
|
+
"""Team author identity for namespacing: DAIMON_AUTHOR env → `git config
|
|
215
|
+
user.name` → getpass.getuser(), falling to "unknown" if all fail. Never raises.
|
|
216
|
+
|
|
217
|
+
Not cached: a checkpoint write happens once per session-end, so the single git
|
|
218
|
+
call per write is negligible, and a process-level cache would only leak stale
|
|
219
|
+
identity between tests."""
|
|
220
|
+
name = (_get("DAIMON_AUTHOR") or "").strip()
|
|
221
|
+
if not name:
|
|
222
|
+
name = _git_user_name()
|
|
223
|
+
if not name:
|
|
224
|
+
try:
|
|
225
|
+
name = getpass.getuser()
|
|
226
|
+
except Exception:
|
|
227
|
+
name = ""
|
|
228
|
+
return name or "unknown"
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
def log_dir() -> Path:
|
|
232
|
+
"""Where the session-end hook writes serialize.log. The hook hardcodes
|
|
233
|
+
~/.daimon/logs; this override exists so the CLI (and tests) can point
|
|
234
|
+
`status` somewhere else."""
|
|
235
|
+
raw = _get("DAIMON_LOG_DIR")
|
|
236
|
+
if raw:
|
|
237
|
+
return Path(raw).expanduser()
|
|
238
|
+
return Path.home() / ".daimon" / "logs"
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
def project_dir() -> str | None:
|
|
242
|
+
"""Working directory of the session being briefed/serialized (per-project
|
|
243
|
+
checkpoint routing). Hooks pass the host payload's cwd through this var;
|
|
244
|
+
unset = project unknown = pre-routing behavior."""
|
|
245
|
+
return _get("DAIMON_PROJECT_DIR") or None
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
def resolve_project_root(raw: str | None) -> str | None:
|
|
249
|
+
"""Normalize a project dir to its git toplevel so a subdir session maps to the
|
|
250
|
+
ONE repo bucket (#74).
|
|
251
|
+
|
|
252
|
+
Checkpoint identity is keyed on the (slugged) project dir. A session run from a
|
|
253
|
+
subdirectory of a repo — e.g. `daimon/plugin/`, which is not its own git repo —
|
|
254
|
+
would otherwise slug to a different bucket than the repo root and fork a separate
|
|
255
|
+
checkpoint history. Resolving to `git rev-parse --show-toplevel` at ingress keeps
|
|
256
|
+
every session in the repo pointing at the same bucket.
|
|
257
|
+
|
|
258
|
+
This lives in config (the resolution/policy layer) on purpose: store.py stays
|
|
259
|
+
pure file-ops with no git/subprocess dependency.
|
|
260
|
+
|
|
261
|
+
Falsy `raw` passes through unchanged (None must keep falling back to the global
|
|
262
|
+
pointer — an unknown project is not invented into a dir). On ANY git failure —
|
|
263
|
+
not a repo, git binary missing, timeout, OS error, dir gone — `raw` is returned
|
|
264
|
+
UNCHANGED, preserving exact pre-normalization behavior. Never raises.
|
|
265
|
+
"""
|
|
266
|
+
if not raw:
|
|
267
|
+
return raw
|
|
268
|
+
try:
|
|
269
|
+
result = subprocess.run(
|
|
270
|
+
["git", "-C", raw, "rev-parse", "--show-toplevel"],
|
|
271
|
+
capture_output=True,
|
|
272
|
+
text=True,
|
|
273
|
+
timeout=2,
|
|
274
|
+
)
|
|
275
|
+
except (FileNotFoundError, subprocess.TimeoutExpired, OSError):
|
|
276
|
+
return raw
|
|
277
|
+
if result.returncode != 0:
|
|
278
|
+
return raw
|
|
279
|
+
top = result.stdout.strip()
|
|
280
|
+
return top or raw
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
def min_messages() -> int:
|
|
284
|
+
try:
|
|
285
|
+
return int(_get("DAIMON_MIN_MESSAGES") or "10")
|
|
286
|
+
except ValueError:
|
|
287
|
+
return 10
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
def timeout_seconds() -> int:
|
|
291
|
+
try:
|
|
292
|
+
return int(_get("DAIMON_TIMEOUT") or "120")
|
|
293
|
+
except ValueError:
|
|
294
|
+
return 120
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
def hung_after_seconds() -> int:
|
|
298
|
+
"""Age (seconds) past which a serialize spawn with NO result line is treated
|
|
299
|
+
as hung/killed rather than still-running. Serialize runs 4-25 min in
|
|
300
|
+
production, so the default (1800 = 30 min) sits safely beyond a slow run.
|
|
301
|
+
Override with DAIMON_HUNG_AFTER."""
|
|
302
|
+
try:
|
|
303
|
+
return int(_get("DAIMON_HUNG_AFTER") or "1800")
|
|
304
|
+
except ValueError:
|
|
305
|
+
return 1800
|
|
306
|
+
|
|
307
|
+
|
|
308
|
+
def chunk_lines() -> int:
|
|
309
|
+
"""Rendered-transcript line count above which serialization goes chunked
|
|
310
|
+
(armC). 1200 matches the recall cliff measured in the D-007 probe."""
|
|
311
|
+
try:
|
|
312
|
+
return int(_get("DAIMON_CHUNK_LINES") or "1200")
|
|
313
|
+
except ValueError:
|
|
314
|
+
return 1200
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
def chunk_overlap() -> int:
|
|
318
|
+
try:
|
|
319
|
+
return int(_get("DAIMON_CHUNK_OVERLAP") or "100")
|
|
320
|
+
except ValueError:
|
|
321
|
+
return 100
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
def chunk_concurrency() -> int:
|
|
325
|
+
"""Parallel chunk-serialize calls. Gateway calls are generation-bound
|
|
326
|
+
(~minutes each); sequential chunking makes long sessions unusable."""
|
|
327
|
+
try:
|
|
328
|
+
return max(1, int(_get("DAIMON_CHUNK_CONCURRENCY") or "4"))
|
|
329
|
+
except ValueError:
|
|
330
|
+
return 4
|
|
331
|
+
|
|
332
|
+
|
|
333
|
+
def merge_group_size() -> int:
|
|
334
|
+
"""Max partials per hierarchical merge call. K=3 keeps every merge call at
|
|
335
|
+
the proven 3-chunk size from issue #28 where 6-chunk merges DNF at 900s."""
|
|
336
|
+
try:
|
|
337
|
+
return max(2, int(_get("DAIMON_MERGE_GROUP_SIZE") or "3"))
|
|
338
|
+
except ValueError:
|
|
339
|
+
return 3
|
|
340
|
+
|
|
341
|
+
|
|
342
|
+
def llm_briefing() -> bool:
|
|
343
|
+
"""Opt-in: render the briefing via LLM instead of the deterministic template."""
|
|
344
|
+
return _flag("DAIMON_LLM_BRIEFING")
|
|
345
|
+
|
|
346
|
+
|
|
347
|
+
def scar_harvest_enabled() -> bool:
|
|
348
|
+
"""Opt-in: draft scar candidates from the transcript at session-end (#76)."""
|
|
349
|
+
return _flag("DAIMON_SCAR_HARVEST")
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
def llm_no_cache() -> bool:
|
|
353
|
+
"""Per-request bypass of gateway response caching (LiteLLM `no-cache`) —
|
|
354
|
+
needed when a cached bad response pins a failure or when runs must be
|
|
355
|
+
statistically independent."""
|
|
356
|
+
return _flag("DAIMON_LLM_NO_CACHE")
|
|
357
|
+
|
|
358
|
+
|
|
359
|
+
def llm_base_url() -> str:
|
|
360
|
+
return (
|
|
361
|
+
_get("DAIMON_LLM_BASE_URL")
|
|
362
|
+
or _get("LITELLM_BASE_URL")
|
|
363
|
+
or "http://localhost:4000"
|
|
364
|
+
).rstrip("/")
|
|
365
|
+
|
|
366
|
+
|
|
367
|
+
def llm_api_key() -> str | None:
|
|
368
|
+
return _get("DAIMON_LLM_API_KEY") or _get("LITELLM_API_KEY")
|
|
369
|
+
|
|
370
|
+
|
|
371
|
+
def llm_model() -> str | None:
|
|
372
|
+
return _get("DAIMON_LLM_MODEL") or _get("LITELLM_MODEL")
|
|
373
|
+
|
|
374
|
+
|
|
375
|
+
def llm_temperature() -> float:
|
|
376
|
+
"""Sampling temperature sent with every chat call. Default 0.0 for
|
|
377
|
+
deterministic extraction; some upstreams (e.g. kimi-k2.6) reject anything
|
|
378
|
+
but a fixed value — set this to whatever the model demands."""
|
|
379
|
+
try:
|
|
380
|
+
return float(_get("DAIMON_LLM_TEMPERATURE") or "0.0")
|
|
381
|
+
except ValueError:
|
|
382
|
+
return 0.0
|
|
383
|
+
|
|
384
|
+
|
|
385
|
+
def llm_backend() -> str:
|
|
386
|
+
"""Which LLM transport: 'auto' (default — litellm if credentials exist,
|
|
387
|
+
else a command CLI if one resolves), 'litellm', 'command', or 'claude-cli'."""
|
|
388
|
+
return (_get("DAIMON_LLM_BACKEND") or "auto").strip()
|
|
389
|
+
|
|
390
|
+
|
|
391
|
+
def llm_fallback() -> bool:
|
|
392
|
+
"""When the litellm backend fails, auto-fall-back to a command backend.
|
|
393
|
+
Default ON — this is the gateway-failure resilience. Set 0 to disable."""
|
|
394
|
+
return (_get("DAIMON_LLM_FALLBACK") or "1").strip() in ("1", "true", "yes", "on")
|
|
395
|
+
|
|
396
|
+
|
|
397
|
+
def llm_command() -> str | None:
|
|
398
|
+
"""Full CLI invocation for the command backend (binary + model + flags).
|
|
399
|
+
The prompt is piped via stdin, never argv."""
|
|
400
|
+
return _get("DAIMON_LLM_COMMAND") or None
|
|
401
|
+
|
|
402
|
+
|
|
403
|
+
def llm_command_output() -> str | None:
|
|
404
|
+
"""How to extract assistant text from the command's stdout:
|
|
405
|
+
'text' (raw stdout) | 'json:<key>' (parse JSON, read <key>)."""
|
|
406
|
+
return _get("DAIMON_LLM_COMMAND_OUTPUT") or None
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
"""Onboarding helper: detect the resolved LLM backend and fill config gaps by
|
|
2
|
+
writing ~/.daimon/env. Detection is NOT reimplemented — it reuses the real
|
|
3
|
+
resolver in llm.py so the doctor view can never disagree with what llm.chat()
|
|
4
|
+
would actually run (the single-source-of-truth requirement from #48).
|
|
5
|
+
|
|
6
|
+
Stdlib only, offline: no live LLM call is made here.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import os
|
|
10
|
+
import shutil
|
|
11
|
+
import tempfile
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
|
|
14
|
+
from . import config, llm
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def resolved_backend() -> str:
|
|
18
|
+
"""The backend llm.chat() would actually use. Mirrors its `auto` branch
|
|
19
|
+
exactly — if these diverge, the doctor lies."""
|
|
20
|
+
setting = config.llm_backend()
|
|
21
|
+
if setting != "auto":
|
|
22
|
+
return setting
|
|
23
|
+
if config.llm_api_key():
|
|
24
|
+
return "litellm"
|
|
25
|
+
if llm._resolve_command() is not None:
|
|
26
|
+
return "command"
|
|
27
|
+
return "litellm" # let _chat_litellm raise the helpful no-key error
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def status() -> dict:
|
|
31
|
+
"""Detection snapshot for the doctor view. No LLM call."""
|
|
32
|
+
rb = resolved_backend()
|
|
33
|
+
cmd = llm._resolve_command() # (command_str, output_spec) | None
|
|
34
|
+
if rb in ("command", "claude-cli"):
|
|
35
|
+
ready = cmd is not None
|
|
36
|
+
else: # litellm needs BOTH key and model (matches the serialize pre-flight)
|
|
37
|
+
ready = bool(config.llm_api_key() and config.llm_model())
|
|
38
|
+
return {
|
|
39
|
+
"resolved_backend": rb,
|
|
40
|
+
"ready": ready,
|
|
41
|
+
"claude_on_path": shutil.which("claude") is not None,
|
|
42
|
+
"has_api_key": config.llm_api_key() is not None,
|
|
43
|
+
"has_model": config.llm_model() is not None,
|
|
44
|
+
"command": cmd[0] if cmd else None,
|
|
45
|
+
"command_source": (
|
|
46
|
+
"explicit" if config.llm_command()
|
|
47
|
+
else ("claude-cli" if cmd else None)
|
|
48
|
+
),
|
|
49
|
+
"env_file": str(config._env_file_path()),
|
|
50
|
+
"env_file_exists": config._env_file_path().exists(),
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def write_env(updates: dict) -> Path:
|
|
55
|
+
"""Merge `updates` into ~/.daimon/env (DAIMON_ENV_FILE) and rewrite it as
|
|
56
|
+
sorted KEY=VALUE lines, preserving unrelated pre-existing keys.
|
|
57
|
+
|
|
58
|
+
The file is machine-managed: comments/order are NOT preserved (normalized).
|
|
59
|
+
Written atomically (temp + os.replace) and chmod 600 — it holds API keys.
|
|
60
|
+
Empty merge result -> no file is created (the claude zero-config case writes
|
|
61
|
+
nothing). Returns the target path either way.
|
|
62
|
+
"""
|
|
63
|
+
path = config._env_file_path()
|
|
64
|
+
merged = {**config._file_values(), **updates}
|
|
65
|
+
if not merged:
|
|
66
|
+
return path # nothing to persist -> never create an empty file
|
|
67
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
68
|
+
body = "".join(f"{k}={merged[k]}\n" for k in sorted(merged))
|
|
69
|
+
fd, tmp = tempfile.mkstemp(dir=str(path.parent), prefix=".env-")
|
|
70
|
+
try:
|
|
71
|
+
with os.fdopen(fd, "w", encoding="utf-8") as f:
|
|
72
|
+
f.write(body)
|
|
73
|
+
os.replace(tmp, path)
|
|
74
|
+
except BaseException:
|
|
75
|
+
try:
|
|
76
|
+
os.unlink(tmp)
|
|
77
|
+
except OSError:
|
|
78
|
+
pass
|
|
79
|
+
raise
|
|
80
|
+
os.chmod(path, 0o600)
|
|
81
|
+
return path
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
"""Zero-LLM scar-candidate harvester (#76). Session-end, opt-in, never breaks a session.
|
|
2
|
+
|
|
3
|
+
Scans the transcript for anchorable negative knowledge and drafts scar *candidates*
|
|
4
|
+
into <project_root>/.scars/candidates/. Path-anchored only (Fork 2): a hit with no
|
|
5
|
+
real file/dir path in its own span is dropped — precision over recall, because a
|
|
6
|
+
scar system dies from noise, not a missed lesson. Emits candidates only; a human
|
|
7
|
+
reviewer promotes. Pure stdlib.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import datetime
|
|
11
|
+
import json
|
|
12
|
+
import logging
|
|
13
|
+
import re
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from typing import NamedTuple
|
|
16
|
+
|
|
17
|
+
from . import transcript
|
|
18
|
+
|
|
19
|
+
log = logging.getLogger("daimon_briefing")
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class Hit(NamedTuple):
|
|
23
|
+
kind: str
|
|
24
|
+
sentence: str
|
|
25
|
+
context: str
|
|
26
|
+
msg_index: int
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
_AVOID_RE = re.compile(
|
|
30
|
+
r"\b(avoid|don't|do not|never|gotcha|pitfall|footgun|broke|breaks|mistake|dead[ -]?end"
|
|
31
|
+
# Spanish band mirrors the English markers (#4). Bare "no" is far more
|
|
32
|
+
# frequent than "don't", so only specific imperative constructions fire —
|
|
33
|
+
# never plain negation ("no devuelve" stays silent).
|
|
34
|
+
r"|evit(?:a|á|ar|es|en)|nunca|jam[áa]s|trampa|romp(?:e|i[óo]|en)"
|
|
35
|
+
r"|callej[óo]n sin salida|punto muerto|no (?:hagas|toques|uses|llames))\b",
|
|
36
|
+
re.IGNORECASE,
|
|
37
|
+
)
|
|
38
|
+
_INTENT_RE = re.compile(
|
|
39
|
+
r"\b(on purpose|intentional(?:ly)?|deliberately|looks wrong but|must stay|keep this"
|
|
40
|
+
r"|a prop[óo]sito|intencional(?:mente)?|adrede|deliberadamente"
|
|
41
|
+
r"|parece (?:mal|incorrecto) pero|debe quedar(?:se)?)\b",
|
|
42
|
+
re.IGNORECASE,
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _split_sentences(text):
|
|
47
|
+
parts = re.split(r"(?<=[.!?])\s+|\n+", text)
|
|
48
|
+
return [p.strip() for p in parts if p.strip()]
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def detect(messages: list[dict]) -> list[Hit]:
|
|
52
|
+
"""Assistant-only marker scan. Returns Hits (no I/O, no anchoring yet)."""
|
|
53
|
+
hits: list[Hit] = []
|
|
54
|
+
for i, m in enumerate(messages):
|
|
55
|
+
if m.get("role") != "assistant":
|
|
56
|
+
continue
|
|
57
|
+
content = transcript._text_of(m.get("content"))
|
|
58
|
+
for s in _split_sentences(content):
|
|
59
|
+
if _INTENT_RE.search(s):
|
|
60
|
+
hits.append(Hit("intentional", s, content, i))
|
|
61
|
+
elif _AVOID_RE.search(s):
|
|
62
|
+
hits.append(Hit("avoidance", s, content, i))
|
|
63
|
+
return hits
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
# path-like token: a/b/c.ext (ext-whitelisted to keep prose out) OR a nested a/b/ dir.
|
|
67
|
+
_PATH_RE = re.compile(
|
|
68
|
+
r"([\w.\-/]+\.(?:py|md|js|ts|tsx|go|rs|json|ya?ml|toml|sh|txt|cfg|ini)"
|
|
69
|
+
r"|[\w.\-]+(?:/[\w.\-]+)+/?)"
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def anchor_of(hit, project_root):
|
|
74
|
+
"""First path token in the hit's sentence that exists INSIDE project_root.
|
|
75
|
+
|
|
76
|
+
Returns a repo-relative posix path str, or None → drop hit. Absolute tokens
|
|
77
|
+
and ``..`` traversal that escape the root are rejected: the resolved path must
|
|
78
|
+
stay under the resolved root. The existence + containment check is the
|
|
79
|
+
precision gate — garbled, hallucinated, or escaping paths vanish.
|
|
80
|
+
"""
|
|
81
|
+
root = Path(project_root).resolve()
|
|
82
|
+
for m in _PATH_RE.finditer(hit.sentence):
|
|
83
|
+
cand = m.group(1).rstrip(":,.)")
|
|
84
|
+
if not cand or Path(cand).is_absolute():
|
|
85
|
+
continue
|
|
86
|
+
try:
|
|
87
|
+
resolved = (root / cand).resolve()
|
|
88
|
+
if resolved.is_relative_to(root) and resolved.exists():
|
|
89
|
+
return resolved.relative_to(root).as_posix()
|
|
90
|
+
except (OSError, ValueError):
|
|
91
|
+
continue
|
|
92
|
+
return None
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
_DEADEND_RE = re.compile(
|
|
96
|
+
r"\b(tried|attempted|turned out|didn't work|doesn't work|gave up)\b", re.IGNORECASE
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _scar_type(hit):
|
|
101
|
+
if hit.kind == "intentional":
|
|
102
|
+
return "fence"
|
|
103
|
+
return "deadend" if _DEADEND_RE.search(hit.sentence) else "landmine"
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def _slug(title):
|
|
107
|
+
s = re.sub(r"[^a-z0-9]+", "-", title.lower()).strip("-")
|
|
108
|
+
return s[:60] or "harvested-scar"
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def to_candidate(hit, anchor, session_id, today):
|
|
112
|
+
"""Build (slug, markdown). Lint-valid frontmatter, single path-only anchor.
|
|
113
|
+
|
|
114
|
+
`title` is emitted via json.dumps → a valid double-quoted YAML scalar even when
|
|
115
|
+
the sentence contains ':' or quotes (the #1 hand-written-scar YAML footgun).
|
|
116
|
+
"""
|
|
117
|
+
typ = _scar_type(hit)
|
|
118
|
+
title = " ".join(hit.sentence.split())[:80].rstrip()
|
|
119
|
+
slug = _slug(title)
|
|
120
|
+
review = (
|
|
121
|
+
datetime.date.fromisoformat(today) + datetime.timedelta(days=365)
|
|
122
|
+
).isoformat()
|
|
123
|
+
md = (
|
|
124
|
+
"---\n"
|
|
125
|
+
"id: 0\n"
|
|
126
|
+
f"type: {typ}\n"
|
|
127
|
+
f"title: {json.dumps(title)}\n"
|
|
128
|
+
"severity: medium\n"
|
|
129
|
+
"confidence: 0.5\n"
|
|
130
|
+
f"created: {today}\n"
|
|
131
|
+
'authors: ["daimon-harvest"]\n'
|
|
132
|
+
"anchors:\n"
|
|
133
|
+
f" - path: {anchor}\n"
|
|
134
|
+
"evidence:\n"
|
|
135
|
+
f" - note: {json.dumps('auto-harvested from session ' + session_id)}\n"
|
|
136
|
+
"expires:\n"
|
|
137
|
+
' condition: "the referenced code is removed or the constraint no longer holds"\n'
|
|
138
|
+
f" review_after: {review}\n"
|
|
139
|
+
"status: candidate\n"
|
|
140
|
+
"---\n\n"
|
|
141
|
+
f"{hit.sentence.strip()}\n\n"
|
|
142
|
+
"Auto-harvested from the session transcript — a human must verify the claim "
|
|
143
|
+
"and confirm the anchor before promotion.\n"
|
|
144
|
+
)
|
|
145
|
+
return slug, md
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
_MAX_CANDIDATES = 5
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def run(messages, project_root, session_id):
|
|
152
|
+
"""detect -> anchor-gate -> candidate -> dedup + cap -> write. Returns count.
|
|
153
|
+
|
|
154
|
+
Skips silently when the repo hasn't opted into scars (no .scars/). Never
|
|
155
|
+
overwrites an existing candidate (a human may have edited it), so re-running
|
|
156
|
+
the same session writes nothing new.
|
|
157
|
+
"""
|
|
158
|
+
if not project_root:
|
|
159
|
+
return 0
|
|
160
|
+
root = Path(project_root)
|
|
161
|
+
if not (root / ".scars").exists():
|
|
162
|
+
return 0
|
|
163
|
+
cand_dir = root / ".scars" / "candidates"
|
|
164
|
+
cand_dir.mkdir(parents=True, exist_ok=True)
|
|
165
|
+
today = datetime.date.today().isoformat()
|
|
166
|
+
written, dropped = 0, 0
|
|
167
|
+
seen = set()
|
|
168
|
+
for hit in detect(messages):
|
|
169
|
+
anchor = anchor_of(hit, project_root)
|
|
170
|
+
if anchor is None:
|
|
171
|
+
continue
|
|
172
|
+
slug, md = to_candidate(hit, anchor, session_id, today)
|
|
173
|
+
if slug in seen:
|
|
174
|
+
continue
|
|
175
|
+
seen.add(slug)
|
|
176
|
+
path = cand_dir / f"{slug}.md"
|
|
177
|
+
if path.exists():
|
|
178
|
+
continue
|
|
179
|
+
if written >= _MAX_CANDIDATES:
|
|
180
|
+
dropped += 1
|
|
181
|
+
continue
|
|
182
|
+
path.write_text(md, encoding="utf-8")
|
|
183
|
+
written += 1
|
|
184
|
+
if dropped:
|
|
185
|
+
log.info(
|
|
186
|
+
"daimon: scar harvest capped at %d, dropped %d candidate(s)",
|
|
187
|
+
_MAX_CANDIDATES, dropped,
|
|
188
|
+
)
|
|
189
|
+
return written
|