@jhizzard/termdeck 1.0.1 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,898 @@
1
+ /**
2
+ * TermDeck session-end memory hook (Mnestra-direct, no rag-system dependency).
3
+ *
4
+ * Vendored into ~/.claude/hooks/memory-session-end.js by @jhizzard/termdeck-stack.
5
+ * Wired into ~/.claude/settings.json under hooks.SessionEnd — fires once per
6
+ * Claude Code session close (`/exit`, Ctrl+D, terminal close, or process kill).
7
+ *
8
+ * History: this hook was originally registered under hooks.Stop, which fires
9
+ * after every assistant turn. That meant the same transcript got embedded and
10
+ * INSERTed dozens of times per session (and most fired with env-var-missing
11
+ * because Claude Code launched outside TermDeck doesn't have SUPABASE_URL in
12
+ * scope). Sprint 48 close-out moved registration to SessionEnd (one row per
13
+ * session, fires deterministically on /exit) AND added the secrets-env
14
+ * fallback below so a standalone-Claude-Code launch picks up the credentials
15
+ * without needing them in the parent shell.
16
+ *
17
+ * Behavior:
18
+ * 1. Reads {transcript_path, cwd, session_id, sessionType?, source_agent?}
19
+ * from stdin (Claude Code SessionEnd payload, or — Sprint 50 T1 — a
20
+ * server-driven invocation for non-Claude agents). source_agent
21
+ * defaults to 'claude' when absent (Claude Code's existing hook
22
+ * payload doesn't carry it; the TermDeck server's per-adapter
23
+ * onPanelClose interceptor sets it explicitly for codex/gemini/grok).
24
+ * 2. Loads ~/.termdeck/secrets.env into process.env if any required key is
25
+ * absent OR is a literal `${VAR}` placeholder (Sprint 47.5 hotfix
26
+ * discipline — Claude Code does not expand `${VAR}` in MCP env, and we
27
+ * can't trust the parent shell to have sourced secrets.env).
28
+ * 3. Skips small transcripts (< MIN_TRANSCRIPT_BYTES, default 5KB).
29
+ * 4. Validates env vars; logs and exits cleanly if any required key is still
30
+ * missing after the secrets.env fallback.
31
+ * 5. Detects project from cwd against PROJECT_MAP (else "global"). Extend the
32
+ * map by editing the array below — see assets/hooks/README.md for guidance.
33
+ * 6. Dispatches to a transcript parser by sessionType (Sprint 45 T4): Claude
34
+ * JSONL, Codex JSONL, Gemini single-JSON, or auto-detect when sessionType
35
+ * is absent. Builds a coarse summary from the resulting message list
36
+ * (last ~30 message excerpts).
37
+ * 7. Embeds the summary via OpenAI text-embedding-3-small.
38
+ * 8. POSTs ONE row to Supabase /rest/v1/memory_items with source_type='session_summary'.
39
+ * 9. (Sprint 51.6 T3) POSTs ONE row to Supabase /rest/v1/memory_sessions with
40
+ * Prefer: resolution=merge-duplicates so SessionEnd-fires-twice resolves
41
+ * to a single row. Requires Mnestra migration 017 on canonical installs;
42
+ * petvetbid already has the rich schema from rag-system bootstrap.
43
+ * 10. Logs every step to ~/.claude/hooks/memory-hook.log.
44
+ *
45
+ * Version stamp (Sprint 51.6 T3 — hook upgrade gap fix):
46
+ * The marker `@termdeck/stack-installer-hook v<N>` below is read by both
47
+ * stack-installer's installSessionEndHook (version-aware overwrite under
48
+ * --yes) and `termdeck init --mnestra` (refreshBundledHookIfNewer step).
49
+ * Bump the integer whenever a change to this file should overwrite an
50
+ * already-installed copy on the user's machine — e.g. a new write path,
51
+ * a new transcript parser, a default PROJECT_MAP change. Comment-only
52
+ * tweaks do not need a bump.
53
+ *
54
+ * v2 (Sprint 51.7 T2 — metadata completeness + wire-up insurance):
55
+ * - parseTranscriptMetadata() now populates memory_sessions.started_at /
56
+ * duration_minutes / facts_extracted from per-message timestamps and
57
+ * memory_remember tool_use counts, closing the v1 "minimum viable row"
58
+ * gap Codex flagged at Sprint 51.6 Phase B.
59
+ * - Stamp bump load-bearing as INSURANCE for the Sprint 51.6 wire-up bug
60
+ * (T1 fix landing in same v1.0.3 wave): an installed-v1 user upgrading
61
+ * to bundled-v2 always passes the `installed >= bundled` short-circuit
62
+ * at init-mnestra.js:550 and reaches the refresh path.
63
+ *
64
+ * @termdeck/stack-installer-hook v2
65
+ *
66
+ * Required env vars (validated at entry, after the secrets.env fallback):
67
+ * - SUPABASE_URL e.g. https://<project-ref>.supabase.co
68
+ * - SUPABASE_SERVICE_ROLE_KEY service-role key (NOT the anon key — needs INSERT on memory_items)
69
+ * - OPENAI_API_KEY sk-... for text-embedding-3-small
70
+ *
71
+ * Optional:
72
+ * - TERMDECK_HOOK_DEBUG=1 verbose logging
73
+ * - TERMDECK_HOOK_MIN_BYTES=5000 transcript size threshold
74
+ * - TERMDECK_SESSION_TYPE=... override sessionType when payload lacks it
75
+ *
76
+ * Fail-soft contract: any error (network, parse, env-var-missing, malformed transcript)
77
+ * logs and exits 0. Never blocks Claude Code session close.
78
+ *
79
+ * Co-existence with Joshua's personal rag-system hook: this bundled hook writes
80
+ * source_type='session_summary' (one row per session). Joshua's personal hook
81
+ * writes source_type='fact' (multiple rows from extractFacts pipeline). Different
82
+ * source_types coexist in memory_items without dedup collisions.
83
+ */
84
+
85
+ 'use strict';
86
+
87
+ const { existsSync, statSync, appendFileSync, readFileSync } = require('fs');
88
+ const { join } = require('path');
89
+ const os = require('os');
90
+
91
+ const LOG_FILE = join(os.homedir(), '.claude', 'hooks', 'memory-hook.log');
92
+
93
+ // Resolved per-call so tests can override via TERMDECK_HOOK_SECRETS_PATH
94
+ // (the const-at-load-time pattern would freeze the path before any test
95
+ // that mutates HOME or the override env var gets a chance to take effect).
96
+ function resolveSecretsPath() {
97
+ return process.env.TERMDECK_HOOK_SECRETS_PATH
98
+ || join(os.homedir(), '.termdeck', 'secrets.env');
99
+ }
100
+
101
+ // PROJECT_MAP — most-specific-first ordering (Sprint 41 design).
102
+ // Patterns match against the cwd reported by Claude Code at SessionEnd.
103
+ // First match wins; falls through to "global".
104
+ //
105
+ // Sprint 51.6 (T1 side finding b): a previous version shipped this array
106
+ // empty, which caused every session to tag as "global" — orphaning rows
107
+ // from project-scoped memory_recall queries. The default below restores
108
+ // the most-specific-first taxonomy from Sprint 41 T1, generalized for
109
+ // universal shipping. Users still extend in place by editing this array.
110
+ //
111
+ // Patterns NOT specific to Joshua's filesystem (e.g. /\/PVB\//i, /\/DOR\//i)
112
+ // are kept because they're benign on other machines — the regex simply
113
+ // doesn't fire on cwds that don't contain those segments. The chopin-
114
+ // nashville catch-all stays LAST (structural invariant) so a TermDeck cwd
115
+ // inside ChopinNashville/SideHustles/ resolves to "termdeck", not the
116
+ // catch-all.
117
+ const PROJECT_MAP = [
118
+ // ── Active code projects (most-specific FIRST) ──
119
+ { pattern: /\/SideHustles\/TermDeck\/termdeck/i, project: 'termdeck' },
120
+ { pattern: /\/Graciella\/engram(\/|$)/i, project: 'mnestra' },
121
+ { pattern: /\/Graciella\/rumen(\/|$)/i, project: 'rumen' },
122
+ { pattern: /\/Graciella\/rag-system(\/|$)/i, project: 'rag-system' },
123
+ { pattern: /\/ChopinInBohemia\/podium(\/|$)/i, project: 'podium' },
124
+ { pattern: /\/ChopinInBohemia(\/|$)/i, project: 'chopin-in-bohemia' },
125
+ { pattern: /\/SideHustles\/SchedulingApp(\/|$)/i, project: 'chopin-scheduler' },
126
+ { pattern: /\/ChopinNashville\/SchedulingApp(\/|$)/i, project: 'chopin-scheduler' },
127
+ { pattern: /\/Graciella\/PVB(\/|$)|\/PVB\/pvb(\/|$)/i, project: 'pvb' },
128
+ { pattern: /\/Unagi\/gorgias-ticket-monitor(\/|$)/i, project: 'claimguard' },
129
+ { pattern: /\/ChopinNashville\/SideHustles\/ClaimGuard(\/|$)/i, project: 'claimguard' },
130
+ { pattern: /\/Documents\/DOR(\/|$)/i, project: 'dor' },
131
+ { pattern: /\/Graciella\/joshuaizzard-dev(\/|$)/i, project: 'portfolio' },
132
+ { pattern: /\/Graciella\/imessage-reader(\/|$)/i, project: 'imessage-reader' },
133
+
134
+ // ── chopin-nashville catch-all (MUST be LAST among /ChopinNashville/ matchers).
135
+ // Sprint 35 + 41 lesson: any /ChopinNashville/-matching pattern placed below
136
+ // this entry gets shadowed and the row mis-tags as 'chopin-nashville'.
137
+ { pattern: /\/ChopinNashville(\/|$)/i, project: 'chopin-nashville' },
138
+ ];
139
+
140
+ const MIN_TRANSCRIPT_BYTES = parseInt(process.env.TERMDECK_HOOK_MIN_BYTES || '5000', 10);
141
+ const DEBUG = process.env.TERMDECK_HOOK_DEBUG === '1';
142
+
143
+ function log(msg) {
144
+ try { appendFileSync(LOG_FILE, `[${new Date().toISOString()}] ${msg}\n`); }
145
+ catch (_) { /* fail-soft */ }
146
+ }
147
+ function debug(msg) { if (DEBUG) log(`[debug] ${msg}`); }
148
+
149
+ function detectProject(cwd) {
150
+ for (const { pattern, project } of PROJECT_MAP) {
151
+ if (pattern.test(cwd)) return project;
152
+ }
153
+ return 'global';
154
+ }
155
+
156
+ // Treat values shaped like `${VAR}` as unset. Claude Code does not expand
157
+ // shell placeholders in MCP env or hook env, so a literal `${SUPABASE_URL}`
158
+ // is non-empty-but-invalid — the same trap that caused the Sprint 47.5
159
+ // hotfix on the stack-installer + mnestra MCP. Mirroring that discipline
160
+ // here keeps the hook resilient if any future tooling regresses to the
161
+ // placeholder pattern.
162
+ function isUnexpandedPlaceholder(v) {
163
+ return typeof v === 'string' && v.startsWith('${') && v.endsWith('}');
164
+ }
165
+
166
+ // Load ~/.termdeck/secrets.env into process.env when keys are absent or
167
+ // hold an unexpanded `${VAR}` placeholder. Concrete values already in
168
+ // process.env always win — the fallback only fills gaps. Silent no-op if
169
+ // the file is missing. Mirrors mnestra's loadTermdeckSecretsFallback so
170
+ // the hook works in three launch contexts:
171
+ // 1. Inside TermDeck PTY (Sprint 48 T4 PTY env merge supplies the vars).
172
+ // 2. Standalone Claude Code launched from a shell with secrets.env sourced.
173
+ // 3. Standalone Claude Code launched from a vanilla shell (this fallback).
174
+ function loadTermdeckSecretsFallback() {
175
+ const secretsPath = resolveSecretsPath();
176
+ if (!existsSync(secretsPath)) return;
177
+ let raw;
178
+ try { raw = readFileSync(secretsPath, 'utf8'); }
179
+ catch (err) {
180
+ log(`secrets-env-read-failed: ${err && err.message ? err.message : String(err)}`);
181
+ return;
182
+ }
183
+ let loaded = 0;
184
+ for (const line of raw.split('\n')) {
185
+ const trimmed = line.trim();
186
+ if (!trimmed || trimmed.startsWith('#')) continue;
187
+ const m = trimmed.match(/^([A-Z_][A-Z0-9_]*)=(.*)$/);
188
+ if (!m) continue;
189
+ const key = m[1];
190
+ const cur = process.env[key];
191
+ if (cur && !isUnexpandedPlaceholder(cur)) continue;
192
+ let v = m[2];
193
+ if (v.length >= 2 && (v[0] === '"' || v[0] === "'") && v[v.length - 1] === v[0]) {
194
+ v = v.slice(1, -1);
195
+ }
196
+ process.env[key] = v;
197
+ loaded++;
198
+ }
199
+ if (loaded > 0) debug(`secrets-env-loaded: ${loaded} keys from ${secretsPath}`);
200
+ }
201
+
202
+ function readEnv() {
203
+ loadTermdeckSecretsFallback();
204
+ const required = ['SUPABASE_URL', 'SUPABASE_SERVICE_ROLE_KEY', 'OPENAI_API_KEY'];
205
+ const missing = required.filter((k) => {
206
+ const v = process.env[k];
207
+ return !v || isUnexpandedPlaceholder(v);
208
+ });
209
+ if (missing.length) {
210
+ log(`env-var-missing: ${missing.join(', ')} — set these in ~/.termdeck/secrets.env or your shell to enable Mnestra ingestion. Skipping.`);
211
+ return null;
212
+ }
213
+ return {
214
+ supabaseUrl: process.env.SUPABASE_URL.replace(/\/$/, ''),
215
+ supabaseKey: process.env.SUPABASE_SERVICE_ROLE_KEY,
216
+ openaiKey: process.env.OPENAI_API_KEY,
217
+ };
218
+ }
219
+
220
+ // ──────────────────────────────────────────────────────────────────────────
221
+ // Sprint 45 T4 — adapter-pluggable transcript parsers.
222
+ //
223
+ // Each parser takes raw transcript file contents (string) and returns a
224
+ // `{ role: 'user'|'assistant', content: string }[]` array — the shape
225
+ // buildSummary() consumes. Adapters in packages/server/src/agent-adapters/
226
+ // own the canonical parser logic; this file inlines copies because the
227
+ // hook ships standalone to ~/.claude/hooks/ where it can't `require()`
228
+ // from the TermDeck server package. When new agents add adapters, mirror
229
+ // their parseTranscript function body here — keep the two in sync.
230
+ // (Sprint 46 candidate: a sync script that codegens this section from
231
+ // agent-adapters/*.js, analogous to scripts/sync-agent-instructions.js
232
+ // for CLAUDE.md / AGENTS.md / GEMINI.md mirroring.)
233
+ //
234
+ // When sessionType is absent or unknown, parseAutoDetect runs a per-line
235
+ // best-effort that handles Claude JSONL, Codex JSONL, AND Gemini's single
236
+ // JSON-object shape. This is the pre-T4 stop-gap T1+T2 landed inline —
237
+ // preserved as the fallback so existing hook payloads (Claude Code Stop,
238
+ // no sessionType field) continue working for any of the three agents.
239
+ // Once Sprint 46 wires sessionType into payloads, the auto path narrows
240
+ // to a legacy compatibility role.
241
+ // ──────────────────────────────────────────────────────────────────────────
242
+
243
+ function parseClaudeJsonl(raw) {
244
+ if (typeof raw !== 'string' || raw.length === 0) return [];
245
+ const lines = raw.split('\n').filter(Boolean);
246
+ const messages = [];
247
+ for (const line of lines) {
248
+ let msg;
249
+ try { msg = JSON.parse(line); } catch (_) { continue; }
250
+ const role = msg && msg.message && msg.message.role;
251
+ if (role !== 'user' && role !== 'assistant') continue;
252
+ const content = msg.message.content;
253
+ let text = '';
254
+ if (typeof content === 'string') {
255
+ text = content;
256
+ } else if (Array.isArray(content)) {
257
+ text = content
258
+ .filter((c) => c && c.type === 'text')
259
+ .map((c) => c.text || '')
260
+ .join(' ');
261
+ }
262
+ if (text) messages.push({ role, content: text.slice(0, 400) });
263
+ }
264
+ return messages;
265
+ }
266
+
267
+ function parseCodexJsonl(raw) {
268
+ if (typeof raw !== 'string' || raw.length === 0) return [];
269
+ const lines = raw.split('\n').filter(Boolean);
270
+ const messages = [];
271
+ for (const line of lines) {
272
+ let msg;
273
+ try { msg = JSON.parse(line); } catch (_) { continue; }
274
+ if (!msg || msg.type !== 'response_item') continue;
275
+ const payload = msg.payload;
276
+ if (!payload || payload.type !== 'message') continue;
277
+ const role = payload.role;
278
+ // Codex's `developer` role carries the sandbox/permissions prelude — skip.
279
+ if (role !== 'user' && role !== 'assistant') continue;
280
+ const content = payload.content;
281
+ let text = '';
282
+ if (typeof content === 'string') {
283
+ text = content;
284
+ } else if (Array.isArray(content)) {
285
+ // Codex uses `input_text` (user) and `output_text` (assistant); accept
286
+ // plain `text` for forward-compat with future Codex CLI versions.
287
+ text = content
288
+ .filter((c) => c && (c.type === 'input_text' || c.type === 'output_text' || c.type === 'text'))
289
+ .map((c) => c.text || '')
290
+ .join(' ');
291
+ }
292
+ if (text) messages.push({ role, content: text.slice(0, 400) });
293
+ }
294
+ return messages;
295
+ }
296
+
297
+ function parseGeminiJson(raw) {
298
+ // Gemini CLI persists each session as a single JSON object (NOT JSONL):
299
+ // { sessionId, projectHash, startTime, lastUpdated, kind,
300
+ // messages: [{ id, timestamp, type: 'user'|'gemini', content }] }
301
+ // user content: [{ text }]; gemini content: string. Map type='gemini' →
302
+ // role='assistant' to match the rest of the dispatch shape.
303
+ if (typeof raw !== 'string' || raw.length === 0) return [];
304
+ let obj;
305
+ try { obj = JSON.parse(raw); } catch (_) { return []; }
306
+ if (!obj || !Array.isArray(obj.messages)) return [];
307
+ const messages = [];
308
+ for (const msg of obj.messages) {
309
+ if (!msg || typeof msg !== 'object') continue;
310
+ let role;
311
+ if (msg.type === 'user') role = 'user';
312
+ else if (msg.type === 'gemini' || msg.type === 'assistant') role = 'assistant';
313
+ else continue;
314
+ const content = msg.content;
315
+ let text = '';
316
+ if (typeof content === 'string') {
317
+ text = content;
318
+ } else if (Array.isArray(content)) {
319
+ text = content
320
+ .filter((c) => c && typeof c.text === 'string')
321
+ .map((c) => c.text)
322
+ .join(' ');
323
+ }
324
+ if (text) messages.push({ role, content: text.slice(0, 400) });
325
+ }
326
+ return messages;
327
+ }
328
+
329
+ // Sprint 50 T1 — Grok parser. Mirrors packages/server/src/agent-adapters/grok.js
330
+ // parseTranscript: accepts either a JSON array or JSONL of `{role, content}`
331
+ // objects, where content is a string OR an array of `{type, text, ...}` parts
332
+ // (AI SDK provider shape). Tool-call / tool-result / reasoning parts are
333
+ // skipped — only the `type:'text'` parts contribute to the summary.
334
+ //
335
+ // The JSON envelope is produced server-side by the Grok adapter's
336
+ // `resolveTranscriptPath` (which extracts from ~/.grok/grok.db SQLite via
337
+ // better-sqlite3 and writes a tempfile). The hook itself never opens grok.db
338
+ // — that would require better-sqlite3 to be reachable from ~/.claude/hooks/,
339
+ // which isn't part of the install contract. The transcript_path the server
340
+ // hands the hook is the tempfile, and the sessionType in the payload is
341
+ // 'grok' so this parser is the one selected.
342
+ function parseGrokJson(raw) {
343
+ if (typeof raw !== 'string' || raw.length === 0) return [];
344
+ let messages = null;
345
+ try {
346
+ const parsed = JSON.parse(raw);
347
+ if (Array.isArray(parsed)) messages = parsed;
348
+ } catch (_) { /* fall through to JSONL */ }
349
+ if (!messages) {
350
+ messages = [];
351
+ for (const line of raw.split('\n')) {
352
+ const trimmed = line.trim();
353
+ if (!trimmed) continue;
354
+ try {
355
+ const obj = JSON.parse(trimmed);
356
+ if (obj && typeof obj === 'object') messages.push(obj);
357
+ } catch (_) { continue; }
358
+ }
359
+ }
360
+ const out = [];
361
+ for (const msg of messages) {
362
+ if (!msg || typeof msg !== 'object') continue;
363
+ const role = msg.role;
364
+ if (role !== 'user' && role !== 'assistant') continue;
365
+ const content = msg.content;
366
+ let text = '';
367
+ if (typeof content === 'string') {
368
+ text = content;
369
+ } else if (Array.isArray(content)) {
370
+ text = content
371
+ .filter((c) => c && c.type === 'text' && typeof c.text === 'string')
372
+ .map((c) => c.text)
373
+ .join(' ');
374
+ }
375
+ if (text) out.push({ role, content: text.slice(0, 400) });
376
+ }
377
+ return out;
378
+ }
379
+
380
+ function parseAutoDetect(raw) {
381
+ // Fallback when sessionType is absent. Tries Gemini's single-JSON shape
382
+ // first (cheap to detect — starts with `{` and has a top-level `messages`
383
+ // array), then falls through to per-line Claude/Codex JSONL detection.
384
+ // This preserves T1+T2's pre-T4 multi-shape stop-gap so any Claude Code
385
+ // Stop payload (which doesn't carry sessionType) keeps ingesting whichever
386
+ // CLI's transcript path landed there.
387
+ if (typeof raw !== 'string' || raw.length === 0) return [];
388
+
389
+ const trimmed = raw.trim();
390
+ if (trimmed.startsWith('{')) {
391
+ const geminiTry = parseGeminiJson(raw);
392
+ if (geminiTry.length > 0) return geminiTry;
393
+ }
394
+
395
+ const lines = raw.split('\n').filter(Boolean);
396
+ const messages = [];
397
+ for (const line of lines) {
398
+ let msg;
399
+ try { msg = JSON.parse(line); } catch (_) { continue; }
400
+
401
+ let role;
402
+ let content;
403
+ let textBlockType = 'text';
404
+
405
+ if (msg && msg.message && (msg.message.role === 'user' || msg.message.role === 'assistant')) {
406
+ role = msg.message.role;
407
+ content = msg.message.content;
408
+ } else if (msg && msg.type === 'response_item' && msg.payload && msg.payload.type === 'message') {
409
+ role = msg.payload.role;
410
+ if (role !== 'user' && role !== 'assistant') continue;
411
+ content = msg.payload.content;
412
+ textBlockType = null; // Codex content blocks use input_text/output_text
413
+ } else {
414
+ continue;
415
+ }
416
+
417
+ let text = '';
418
+ if (typeof content === 'string') {
419
+ text = content;
420
+ } else if (Array.isArray(content)) {
421
+ text = content
422
+ .filter((c) => c && (
423
+ textBlockType === null
424
+ ? (c.type === 'input_text' || c.type === 'output_text' || c.type === 'text')
425
+ : c.type === textBlockType
426
+ ))
427
+ .map((c) => c.text || '')
428
+ .join(' ');
429
+ }
430
+ if (text) messages.push({ role, content: text.slice(0, 400) });
431
+ }
432
+ return messages;
433
+ }
434
+
435
+ const TRANSCRIPT_PARSERS = {
436
+ 'claude-code': parseClaudeJsonl,
437
+ 'codex': parseCodexJsonl,
438
+ 'gemini': parseGeminiJson,
439
+ // Sprint 50 T1 — grok parser. Server-side `resolveTranscriptPath` extracts
440
+ // ~/.grok/grok.db rows via better-sqlite3 and writes a JSON envelope to a
441
+ // tempfile; the hook reads that tempfile with parseGrokJson here.
442
+ 'grok': parseGrokJson,
443
+ };
444
+ const DEFAULT_SESSION_TYPE = 'auto';
445
+
446
+ function selectTranscriptParser(sessionType) {
447
+ if (sessionType && TRANSCRIPT_PARSERS[sessionType]) {
448
+ return { parser: TRANSCRIPT_PARSERS[sessionType], sessionType };
449
+ }
450
+ return { parser: parseAutoDetect, sessionType: 'auto' };
451
+ }
452
+
453
+ // ──────────────────────────────────────────────────────────────────────────
454
+ // Sprint 51.7 T2 — transcript metadata extractor for memory_sessions.
455
+ //
456
+ // The v1 bundled hook (Sprint 51.6 T3) intentionally shipped the "minimum
457
+ // viable row" — postMemorySession set started_at, duration_minutes, and
458
+ // facts_extracted to NULL/0 because v1 omitted transcript parsing for
459
+ // per-message timestamps. The legacy rag-system writer
460
+ // (~/Documents/Graciella/rag-system/src/scripts/process-session.ts) populated
461
+ // those fields by parsing the transcript JSONL passed to it on stdin, and
462
+ // petvetbid's 289 baseline rows carried the rich shape from that writer.
463
+ // v2 closes the gap in pure Node so the bundled hook reaches parity without
464
+ // the rag-system dependency (Class E hidden-dependency rule).
465
+ //
466
+ // Heuristic for facts_extracted: count distinct `tool_use` blocks whose
467
+ // `name` matches a memory_remember MCP tool. Conservative by design — a
468
+ // regex like /Remember:/ inside summary text would over-match quoted user
469
+ // content (e.g., "the user typed 'Remember:' in their prompt"). Counting
470
+ // tool_use blocks instead measures what was actually written into the store
471
+ // during the session, which is the semantic the rag-system writer used.
472
+ //
473
+ // Tool name variants observed in real transcripts (T4-CODEX 11:09 ET pre-
474
+ // audit confirmed both prefixes are live in `~/.claude/projects/`):
475
+ // - `memory_remember` (bare; CC native + future-proofing)
476
+ // - `mcp__mnestra__memory_remember` (current Mnestra MCP, post-rename)
477
+ // - `mcp__memory__memory_remember` (legacy MCP server name from when
478
+ // the project was called "memory")
479
+ // Counting all three avoids undercounting on existing user transcripts.
480
+ // ──────────────────────────────────────────────────────────────────────────
481
+
482
+ const FACT_TOOL_NAMES = new Set([
483
+ 'memory_remember',
484
+ 'mcp__mnestra__memory_remember',
485
+ 'mcp__memory__memory_remember',
486
+ ]);
487
+
488
+ // Sprint 51.7 T2 / T4-CODEX 11:13 ET catch: each adapter shipped by this
489
+ // hook stores message content under a different key shape, and we have to
490
+ // match all of them or facts_extracted under-counts whenever a non-Claude
491
+ // session writes to memory_sessions. Mirror the shapes already documented
492
+ // at the top of TRANSCRIPT_PARSERS:
493
+ //
494
+ // - Claude Code (current): msg.message.content[]
495
+ // - Grok (Sprint 50 T1): msg.content[] (flat, AI SDK provider shape)
496
+ // - Codex (response_item): msg.payload.content[] when msg.type === 'response_item'
497
+ //
498
+ // Gemini's single-JSON envelope doesn't apply per-line — its content lives
499
+ // inside a top-level messages array, and each entry's content is a flat
500
+ // array OR a string. extractContentBlocks() handles flat arrays; strings
501
+ // are skipped (no tool_use can hide inside a string).
502
+ function extractContentBlocks(msg) {
503
+ if (!msg || typeof msg !== 'object') return null;
504
+ if (msg.message && Array.isArray(msg.message.content)) return msg.message.content;
505
+ if (Array.isArray(msg.content)) return msg.content;
506
+ if (msg.type === 'response_item' && msg.payload && Array.isArray(msg.payload.content)) {
507
+ return msg.payload.content;
508
+ }
509
+ return null;
510
+ }
511
+
512
+ function parseTranscriptMetadata(rawJsonl) {
513
+ if (typeof rawJsonl !== 'string' || rawJsonl.length === 0) {
514
+ return { startedAt: null, endedAt: null, durationMinutes: null, factsExtracted: 0 };
515
+ }
516
+ const lines = rawJsonl.split('\n').filter(Boolean);
517
+ let earliestTs = null;
518
+ let latestTs = null;
519
+ let factsExtracted = 0;
520
+
521
+ for (const line of lines) {
522
+ let msg;
523
+ try { msg = JSON.parse(line); } catch (_) { continue; }
524
+ if (!msg || typeof msg !== 'object') continue;
525
+
526
+ // Timestamp: top-level `timestamp` is the canonical Claude Code shape.
527
+ // Fall back to `msg.message.timestamp` for any future / alt-shape that
528
+ // nests it (Codex/Gemini/Grok adapters preserve the top-level form, so
529
+ // this is mostly forward-compat).
530
+ const ts = msg.timestamp || (msg.message && msg.message.timestamp);
531
+ if (typeof ts === 'string' || typeof ts === 'number') {
532
+ const t = Date.parse(ts);
533
+ if (!Number.isNaN(t)) {
534
+ if (earliestTs === null || t < earliestTs) earliestTs = t;
535
+ if (latestTs === null || t > latestTs) latestTs = t;
536
+ }
537
+ }
538
+
539
+ // facts_extracted: count tool_use blocks matching a memory_remember
540
+ // MCP tool name. See FACT_TOOL_NAMES + extractContentBlocks above.
541
+ const blocks = extractContentBlocks(msg);
542
+ if (blocks) {
543
+ for (const b of blocks) {
544
+ if (b && b.type === 'tool_use' && typeof b.name === 'string' && FACT_TOOL_NAMES.has(b.name)) {
545
+ factsExtracted += 1;
546
+ }
547
+ }
548
+ }
549
+ }
550
+
551
+ const startedAt = earliestTs !== null ? new Date(earliestTs).toISOString() : null;
552
+ const endedAt = latestTs !== null ? new Date(latestTs).toISOString() : null;
553
+ const durationMinutes = (earliestTs !== null && latestTs !== null)
554
+ ? Math.max(0, Math.round((latestTs - earliestTs) / 60000))
555
+ : null;
556
+ return { startedAt, endedAt, durationMinutes, factsExtracted };
557
+ }
558
+
559
+ // Sprint 51.6 T3 → 51.7 T2: `buildSummary` now also returns parser-derived
560
+ // metadata (startedAt, endedAt, durationMinutes, factsExtracted) merged into
561
+ // the result object. parseTranscriptMetadata reuses the same raw string —
562
+ // no second readFileSync. Returns null when the transcript is unreadable or
563
+ // has fewer than 5 messages (skip semantics unchanged from v1).
564
+ function buildSummary(transcriptPath, sessionType) {
565
+ let raw;
566
+ try { raw = readFileSync(transcriptPath, 'utf8'); }
567
+ catch (e) { log(`read-transcript-failed: ${e.message}`); return null; }
568
+
569
+ const { parser, sessionType: resolvedType } = selectTranscriptParser(sessionType);
570
+ if (sessionType && resolvedType !== sessionType) {
571
+ debug(`unknown-session-type="${sessionType}", falling back to ${resolvedType}`);
572
+ }
573
+
574
+ const messages = parser(raw);
575
+
576
+ if (messages.length < 5) {
577
+ debug(`session-too-short: ${messages.length} messages (parser=${resolvedType}), skipping`);
578
+ return null;
579
+ }
580
+
581
+ const tail = messages.slice(-30);
582
+ const summary =
583
+ `Session with ${messages.length} messages.\n\n` +
584
+ tail.map((m) => `[${m.role}] ${m.content}`).join('\n');
585
+ // OpenAI text-embedding-3-small accepts up to 8192 tokens (~32K chars).
586
+ // 7000 chars is a safe headroom that survives multibyte expansion.
587
+
588
+ // Sprint 51.7 T2: merge transcript-derived metadata so the caller (
589
+ // processStdinPayload → postMemorySession) can populate the
590
+ // memory_sessions.started_at/duration_minutes/facts_extracted fields the
591
+ // v1 hook left NULL/0.
592
+ const metadata = parseTranscriptMetadata(raw);
593
+
594
+ return {
595
+ summary: summary.slice(0, 7000),
596
+ messagesCount: messages.length,
597
+ ...metadata,
598
+ };
599
+ }
600
+
601
+ async function embedText(text, openaiKey) {
602
+ try {
603
+ const res = await fetch('https://api.openai.com/v1/embeddings', {
604
+ method: 'POST',
605
+ headers: {
606
+ 'Content-Type': 'application/json',
607
+ 'Authorization': `Bearer ${openaiKey}`,
608
+ },
609
+ body: JSON.stringify({ model: 'text-embedding-3-small', input: text }),
610
+ });
611
+ if (!res.ok) {
612
+ const body = await res.text().catch(() => '');
613
+ log(`openai-embed-failed: HTTP ${res.status} ${body.slice(0, 200)}`);
614
+ return null;
615
+ }
616
+ const data = await res.json();
617
+ return data?.data?.[0]?.embedding || null;
618
+ } catch (e) {
619
+ log(`openai-embed-exception: ${e.message}`);
620
+ return null;
621
+ }
622
+ }
623
+
624
+ // Sprint 50 T2: every row written by this hook carries an LLM-provenance
625
+ // tag (memory_items.source_agent). Defaults to 'claude' for backwards
626
+ // compat with Claude Code's existing SessionEnd payload, which doesn't
627
+ // supply the field; TermDeck server's per-adapter onPanelClose
628
+ // interceptor (Sprint 50 T1) sets it explicitly to 'codex'/'gemini'/'grok'
629
+ // for non-Claude panels. The set is open-ended on the server side; this
630
+ // constant gates only the spelling-mistake/empty-string case.
631
+ const ALLOWED_SOURCE_AGENTS = new Set([
632
+ 'claude', 'codex', 'gemini', 'grok', 'orchestrator',
633
+ ]);
634
+
635
+ function normalizeSourceAgent(raw) {
636
+ if (typeof raw !== 'string') return 'claude';
637
+ const v = raw.trim().toLowerCase();
638
+ if (!v) return 'claude';
639
+ return ALLOWED_SOURCE_AGENTS.has(v) ? v : 'claude';
640
+ }
641
+
642
+ async function postMemoryItem({ supabaseUrl, supabaseKey, content, embedding, project, sessionId, sourceAgent }) {
643
+ try {
644
+ const res = await fetch(`${supabaseUrl}/rest/v1/memory_items`, {
645
+ method: 'POST',
646
+ headers: {
647
+ 'Content-Type': 'application/json',
648
+ 'apikey': supabaseKey,
649
+ 'Authorization': `Bearer ${supabaseKey}`,
650
+ 'Prefer': 'return=minimal',
651
+ },
652
+ body: JSON.stringify({
653
+ content,
654
+ embedding: `[${embedding.join(',')}]`,
655
+ source_type: 'session_summary',
656
+ category: 'workflow',
657
+ project,
658
+ source_session_id: sessionId || null,
659
+ source_agent: normalizeSourceAgent(sourceAgent),
660
+ }),
661
+ });
662
+ if (!res.ok) {
663
+ const body = await res.text().catch(() => '');
664
+ log(`supabase-insert-failed: HTTP ${res.status} ${body.slice(0, 200)}`);
665
+ return false;
666
+ }
667
+ return true;
668
+ } catch (e) {
669
+ log(`supabase-insert-exception: ${e.message}`);
670
+ return false;
671
+ }
672
+ }
673
+
674
+ // Sprint 51.6 T3 — companion write to memory_sessions.
675
+ //
676
+ // History: the bundled hook never wrote memory_sessions until v1.0.2. Joshua's
677
+ // PRIOR personal rag-system hook spawned process-session.ts which inserted
678
+ // memory_sessions rows; the Sprint 38 P0 rewrite replaced that hook with a
679
+ // Mnestra-direct hook that only wrote memory_items. Result: from 2026-05-02
680
+ // 13:24 ET (when bundled overwrote personal) until v1.0.2, no memory_sessions
681
+ // rows accumulated. Sprint 51.6 T1+T2+T3 documented the gap; this function
682
+ // closes it.
683
+ //
684
+ // Schema target: Mnestra migration 017 brings canonical engram in line with
685
+ // petvetbid's rag-system flavor (session_id, summary_embedding, started_at,
686
+ // ended_at, duration_minutes, messages_count, transcript_path, etc). The
687
+ // bundled hook writes the rich shape on every install — fresh-canonical
688
+ // (post-mig-017) and petvetbid alike.
689
+ //
690
+ // Idempotency: Prefer: resolution=merge-duplicates relies on the
691
+ // memory_sessions_session_id_key unique constraint. Mig 017 adds it where
692
+ // absent. SessionEnd-fires-twice (e.g. /exit then PTY close) resolves to a
693
+ // single row.
694
+ async function postMemorySession({
695
+ supabaseUrl, supabaseKey,
696
+ summary, summaryEmbedding,
697
+ project, sessionId,
698
+ transcriptPath, messagesCount,
699
+ endedAt,
700
+ // Sprint 51.7 T2 — transcript-derived metadata (closes Sprint 51.6's
701
+ // started_at/duration_minutes/facts_extracted=NULL gap). All optional;
702
+ // null/null/0 fallback preserves the v1 minimum-viable-row shape when the
703
+ // transcript carries no timestamps (e.g. legacy fixtures, pre-CC-2.x
704
+ // payloads, or hand-fed test inputs).
705
+ startedAt = null,
706
+ durationMinutes = null,
707
+ factsExtracted = 0,
708
+ }) {
709
+ if (!sessionId) {
710
+ log('memory-sessions-skip: sessionId missing — cannot satisfy session_id NOT NULL/UNIQUE.');
711
+ return false;
712
+ }
713
+ try {
714
+ // Sprint 51.6 T3 / T4-CODEX audit 20:23 ET: PostgREST requires both
715
+ // `Prefer: resolution=merge-duplicates` AND `?on_conflict=<column>`
716
+ // on the URL to trigger an UPSERT. Without `on_conflict=session_id`
717
+ // a duplicate fire would error against memory_sessions_session_id_key.
718
+ const res = await fetch(`${supabaseUrl}/rest/v1/memory_sessions?on_conflict=session_id`, {
719
+ method: 'POST',
720
+ headers: {
721
+ 'Content-Type': 'application/json',
722
+ 'apikey': supabaseKey,
723
+ 'Authorization': `Bearer ${supabaseKey}`,
724
+ 'Prefer': 'resolution=merge-duplicates,return=minimal',
725
+ },
726
+ body: JSON.stringify({
727
+ session_id: sessionId,
728
+ summary,
729
+ summary_embedding: Array.isArray(summaryEmbedding)
730
+ ? `[${summaryEmbedding.join(',')}]`
731
+ : null,
732
+ project,
733
+ // Sprint 51.7 T2: started_at + duration_minutes + facts_extracted now
734
+ // populated from parseTranscriptMetadata when transcript timestamps
735
+ // are present. files_changed and topics remain unpopulated (would
736
+ // require diff parsing the bundled hook doesn't have; deferred).
737
+ started_at: typeof startedAt === 'string' ? startedAt : null,
738
+ ended_at: (endedAt instanceof Date ? endedAt : new Date()).toISOString(),
739
+ duration_minutes: typeof durationMinutes === 'number' ? durationMinutes : null,
740
+ messages_count: typeof messagesCount === 'number' ? messagesCount : 0,
741
+ facts_extracted: typeof factsExtracted === 'number' ? factsExtracted : 0,
742
+ transcript_path: transcriptPath || null,
743
+ }),
744
+ });
745
+ if (!res.ok) {
746
+ const body = await res.text().catch(() => '');
747
+ log(`memory-sessions-insert-failed: HTTP ${res.status} ${body.slice(0, 200)}`);
748
+ return false;
749
+ }
750
+ return true;
751
+ } catch (e) {
752
+ log(`memory-sessions-insert-exception: ${e.message}`);
753
+ return false;
754
+ }
755
+ }
756
+
757
+ async function processStdinPayload(input) {
758
+ let data;
759
+ try { data = JSON.parse(input); }
760
+ catch (e) { log(`parse-stdin-failed: ${e.message}`); return; }
761
+
762
+ const transcriptPath = data.transcript_path;
763
+ const cwd = data.cwd || '';
764
+ const sessionId =
765
+ data.session_id ||
766
+ (transcriptPath ? transcriptPath.split('/').pop().replace('.jsonl', '') : null);
767
+
768
+ // Sprint 45 T4: sessionType drives buildSummary's parser dispatch.
769
+ // Read order: payload (server-driven invocations) → env var (TermDeck
770
+ // server can set TERMDECK_SESSION_TYPE in the spawned PTY's env) →
771
+ // 'auto' default (parseAutoDetect handles Claude + Codex + Gemini).
772
+ const sessionType =
773
+ data.sessionType ||
774
+ data.session_type ||
775
+ process.env.TERMDECK_SESSION_TYPE ||
776
+ DEFAULT_SESSION_TYPE;
777
+
778
+ // Sprint 50 T2: provenance tag the row with the LLM that produced it.
779
+ // Default 'claude' — Claude Code's native SessionEnd payload doesn't
780
+ // carry source_agent, so any unset path is implicitly Claude. The
781
+ // TermDeck server's per-adapter onPanelClose interceptor (Sprint 50 T1)
782
+ // sets it explicitly for non-Claude panels.
783
+ const sourceAgent =
784
+ data.source_agent ||
785
+ data.sourceAgent ||
786
+ process.env.TERMDECK_SOURCE_AGENT ||
787
+ 'claude';
788
+
789
+ if (!transcriptPath) { log('no-transcript-path: skipping'); return; }
790
+
791
+ let stat;
792
+ try { stat = statSync(transcriptPath); }
793
+ catch (e) { log(`cannot-stat-transcript: ${transcriptPath} — ${e.message}`); return; }
794
+
795
+ if (stat.size < MIN_TRANSCRIPT_BYTES) {
796
+ debug(`small-transcript: ${stat.size} bytes < ${MIN_TRANSCRIPT_BYTES}, skipping`);
797
+ return;
798
+ }
799
+
800
+ const env = readEnv();
801
+ if (!env) return;
802
+
803
+ const project = detectProject(cwd);
804
+ debug(`project="${project}", session=${sessionId}, sessionType=${sessionType}`);
805
+
806
+ const built = buildSummary(transcriptPath, sessionType);
807
+ if (!built) return;
808
+ const {
809
+ summary,
810
+ messagesCount,
811
+ startedAt: parsedStartedAt,
812
+ endedAt: parsedEndedAt,
813
+ durationMinutes,
814
+ factsExtracted,
815
+ } = built;
816
+
817
+ const embedding = await embedText(summary, env.openaiKey);
818
+ if (!embedding) return;
819
+
820
+ const itemOk = await postMemoryItem({
821
+ supabaseUrl: env.supabaseUrl,
822
+ supabaseKey: env.supabaseKey,
823
+ content: summary,
824
+ embedding,
825
+ project,
826
+ sessionId,
827
+ sourceAgent,
828
+ });
829
+
830
+ // Sprint 51.6 T3: companion memory_sessions write. Independent of the
831
+ // memory_items write — a memory_items failure shouldn't suppress the
832
+ // memory_sessions row, and vice versa. Both errors fail-soft.
833
+ //
834
+ // Sprint 51.7 T2: prefer parser-derived `endedAt` (last-message
835
+ // timestamp) over hook-fire-time when the transcript carried timestamps.
836
+ // Matches the rag-system writer's semantics — `ended_at` is "when the
837
+ // conversation last had activity," not "when the SessionEnd hook
838
+ // happened to fire." Falls back to `new Date()` when the parser found
839
+ // no timestamps, preserving v1 behavior.
840
+ const sessionOk = await postMemorySession({
841
+ supabaseUrl: env.supabaseUrl,
842
+ supabaseKey: env.supabaseKey,
843
+ summary,
844
+ summaryEmbedding: embedding,
845
+ project,
846
+ sessionId,
847
+ transcriptPath,
848
+ messagesCount,
849
+ endedAt: parsedEndedAt ? new Date(parsedEndedAt) : new Date(),
850
+ startedAt: parsedStartedAt,
851
+ durationMinutes,
852
+ factsExtracted,
853
+ });
854
+
855
+ if (itemOk || sessionOk) {
856
+ log(`ingested: project="${project}" session=${sessionId} bytes=${summary.length} messages=${messagesCount} sessionType=${sessionType} sourceAgent=${normalizeSourceAgent(sourceAgent)} startedAt=${parsedStartedAt || 'null'} durationMin=${durationMinutes === null ? 'null' : durationMinutes} factsExtracted=${factsExtracted} memory_items=${itemOk ? 'ok' : 'fail'} memory_sessions=${sessionOk ? 'ok' : 'fail'}`);
857
+ }
858
+ }
859
+
860
+ // Module-export contract for testability. When run as a script (require.main === module),
861
+ // read stdin and process. When require()d (tests), expose helpers.
862
+ if (require.main === module) {
863
+ let input = '';
864
+ process.stdin.setEncoding('utf8');
865
+ process.stdin.on('data', (chunk) => { input += chunk; });
866
+ process.stdin.on('end', () => {
867
+ processStdinPayload(input).catch((e) => log(`hook-error: ${e.message}`));
868
+ });
869
+ } else {
870
+ module.exports = {
871
+ PROJECT_MAP,
872
+ detectProject,
873
+ readEnv,
874
+ buildSummary,
875
+ embedText,
876
+ postMemoryItem,
877
+ // Sprint 51.6 T3 — memory_sessions write companion.
878
+ postMemorySession,
879
+ processStdinPayload,
880
+ LOG_FILE,
881
+ // Sprint 45 T4 — adapter-pluggable transcript-parser surface.
882
+ TRANSCRIPT_PARSERS,
883
+ DEFAULT_SESSION_TYPE,
884
+ parseClaudeJsonl,
885
+ parseCodexJsonl,
886
+ parseGeminiJson,
887
+ parseGrokJson,
888
+ parseAutoDetect,
889
+ selectTranscriptParser,
890
+ // Sprint 50 T2 — source_agent provenance plumbing.
891
+ normalizeSourceAgent,
892
+ ALLOWED_SOURCE_AGENTS,
893
+ // Sprint 51.7 T2 — transcript-metadata extractor for memory_sessions.
894
+ parseTranscriptMetadata,
895
+ FACT_TOOL_NAMES,
896
+ extractContentBlocks,
897
+ };
898
+ }