@jhizzard/termdeck-stack 0.6.2 → 0.6.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -51,7 +51,17 @@
51
51
  * a new transcript parser, a default PROJECT_MAP change. Comment-only
52
52
  * tweaks do not need a bump.
53
53
  *
54
- * @termdeck/stack-installer-hook v1
54
+ * v2 (Sprint 51.7 T2 — metadata completeness + wire-up insurance):
55
+ * - parseTranscriptMetadata() now populates memory_sessions.started_at /
56
+ * duration_minutes / facts_extracted from per-message timestamps and
57
+ * memory_remember tool_use counts, closing the v1 "minimum viable row"
58
+ * gap Codex flagged at Sprint 51.6 Phase B.
59
+ * - Stamp bump load-bearing as INSURANCE for the Sprint 51.6 wire-up bug
60
+ * (T1 fix landing in same v1.0.3 wave): an installed-v1 user upgrading
61
+ * to bundled-v2 always passes the `installed >= bundled` short-circuit
62
+ * at init-mnestra.js:550 and reaches the refresh path.
63
+ *
64
+ * @termdeck/stack-installer-hook v2
55
65
  *
56
66
  * Required env vars (validated at entry, after the secrets.env fallback):
57
67
  * - SUPABASE_URL e.g. https://<project-ref>.supabase.co
@@ -440,11 +450,117 @@ function selectTranscriptParser(sessionType) {
440
450
  return { parser: parseAutoDetect, sessionType: 'auto' };
441
451
  }
442
452
 
443
- // Sprint 51.6 T3: returns `{ summary, messagesCount }` instead of just the
444
- // summary string. messagesCount feeds the new memory_sessions write path
445
- // (postMemorySession), which needs the parser-derived count without
446
- // reparsing the transcript. Returns null when the transcript is unreadable
447
- // or has fewer than 5 messages same skip semantics as before.
453
+ // ──────────────────────────────────────────────────────────────────────────
454
+ // Sprint 51.7 T2 transcript metadata extractor for memory_sessions.
455
+ //
456
+ // The v1 bundled hook (Sprint 51.6 T3) intentionally shipped the "minimum
457
+ // viable row"postMemorySession set started_at, duration_minutes, and
458
+ // facts_extracted to NULL/0 because v1 omitted transcript parsing for
459
+ // per-message timestamps. The legacy rag-system writer
460
+ // (~/Documents/Graciella/rag-system/src/scripts/process-session.ts) populated
461
+ // those fields by parsing the transcript JSONL passed to it on stdin, and
462
+ // petvetbid's 289 baseline rows carried the rich shape from that writer.
463
+ // v2 closes the gap in pure Node so the bundled hook reaches parity without
464
+ // the rag-system dependency (Class E hidden-dependency rule).
465
+ //
466
+ // Heuristic for facts_extracted: count distinct `tool_use` blocks whose
467
+ // `name` matches a memory_remember MCP tool. Conservative by design — a
468
+ // regex like /Remember:/ inside summary text would over-match quoted user
469
+ // content (e.g., "the user typed 'Remember:' in their prompt"). Counting
470
+ // tool_use blocks instead measures what was actually written into the store
471
+ // during the session, which is the semantic the rag-system writer used.
472
+ //
473
+ // Tool name variants observed in real transcripts (T4-CODEX 11:09 ET pre-
474
+ // audit confirmed both prefixes are live in `~/.claude/projects/`):
475
+ // - `memory_remember` (bare; CC native + future-proofing)
476
+ // - `mcp__mnestra__memory_remember` (current Mnestra MCP, post-rename)
477
+ // - `mcp__memory__memory_remember` (legacy MCP server name from when
478
+ // the project was called "memory")
479
+ // Counting all three avoids undercounting on existing user transcripts.
480
+ // ──────────────────────────────────────────────────────────────────────────
481
+
482
+ const FACT_TOOL_NAMES = new Set([
483
+ 'memory_remember',
484
+ 'mcp__mnestra__memory_remember',
485
+ 'mcp__memory__memory_remember',
486
+ ]);
487
+
488
+ // Sprint 51.7 T2 / T4-CODEX 11:13 ET catch: each adapter shipped by this
489
+ // hook stores message content under a different key shape, and we have to
490
+ // match all of them or facts_extracted under-counts whenever a non-Claude
491
+ // session writes to memory_sessions. Mirror the shapes already documented
492
+ // at the top of TRANSCRIPT_PARSERS:
493
+ //
494
+ // - Claude Code (current): msg.message.content[]
495
+ // - Grok (Sprint 50 T1): msg.content[] (flat, AI SDK provider shape)
496
+ // - Codex (response_item): msg.payload.content[] when msg.type === 'response_item'
497
+ //
498
+ // Gemini's single-JSON envelope doesn't apply per-line — its content lives
499
+ // inside a top-level messages array, and each entry's content is a flat
500
+ // array OR a string. extractContentBlocks() handles flat arrays; strings
501
+ // are skipped (no tool_use can hide inside a string).
502
+ function extractContentBlocks(msg) {
503
+ if (!msg || typeof msg !== 'object') return null;
504
+ if (msg.message && Array.isArray(msg.message.content)) return msg.message.content;
505
+ if (Array.isArray(msg.content)) return msg.content;
506
+ if (msg.type === 'response_item' && msg.payload && Array.isArray(msg.payload.content)) {
507
+ return msg.payload.content;
508
+ }
509
+ return null;
510
+ }
511
+
512
+ function parseTranscriptMetadata(rawJsonl) {
513
+ if (typeof rawJsonl !== 'string' || rawJsonl.length === 0) {
514
+ return { startedAt: null, endedAt: null, durationMinutes: null, factsExtracted: 0 };
515
+ }
516
+ const lines = rawJsonl.split('\n').filter(Boolean);
517
+ let earliestTs = null;
518
+ let latestTs = null;
519
+ let factsExtracted = 0;
520
+
521
+ for (const line of lines) {
522
+ let msg;
523
+ try { msg = JSON.parse(line); } catch (_) { continue; }
524
+ if (!msg || typeof msg !== 'object') continue;
525
+
526
+ // Timestamp: top-level `timestamp` is the canonical Claude Code shape.
527
+ // Fall back to `msg.message.timestamp` for any future / alt-shape that
528
+ // nests it (Codex/Gemini/Grok adapters preserve the top-level form, so
529
+ // this is mostly forward-compat).
530
+ const ts = msg.timestamp || (msg.message && msg.message.timestamp);
531
+ if (typeof ts === 'string' || typeof ts === 'number') {
532
+ const t = Date.parse(ts);
533
+ if (!Number.isNaN(t)) {
534
+ if (earliestTs === null || t < earliestTs) earliestTs = t;
535
+ if (latestTs === null || t > latestTs) latestTs = t;
536
+ }
537
+ }
538
+
539
+ // facts_extracted: count tool_use blocks matching a memory_remember
540
+ // MCP tool name. See FACT_TOOL_NAMES + extractContentBlocks above.
541
+ const blocks = extractContentBlocks(msg);
542
+ if (blocks) {
543
+ for (const b of blocks) {
544
+ if (b && b.type === 'tool_use' && typeof b.name === 'string' && FACT_TOOL_NAMES.has(b.name)) {
545
+ factsExtracted += 1;
546
+ }
547
+ }
548
+ }
549
+ }
550
+
551
+ const startedAt = earliestTs !== null ? new Date(earliestTs).toISOString() : null;
552
+ const endedAt = latestTs !== null ? new Date(latestTs).toISOString() : null;
553
+ const durationMinutes = (earliestTs !== null && latestTs !== null)
554
+ ? Math.max(0, Math.round((latestTs - earliestTs) / 60000))
555
+ : null;
556
+ return { startedAt, endedAt, durationMinutes, factsExtracted };
557
+ }
558
+
559
+ // Sprint 51.6 T3 → 51.7 T2: `buildSummary` now also returns parser-derived
560
+ // metadata (startedAt, endedAt, durationMinutes, factsExtracted) merged into
561
+ // the result object. parseTranscriptMetadata reuses the same raw string —
562
+ // no second readFileSync. Returns null when the transcript is unreadable or
563
+ // has fewer than 5 messages (skip semantics unchanged from v1).
448
564
  function buildSummary(transcriptPath, sessionType) {
449
565
  let raw;
450
566
  try { raw = readFileSync(transcriptPath, 'utf8'); }
@@ -468,7 +584,18 @@ function buildSummary(transcriptPath, sessionType) {
468
584
  tail.map((m) => `[${m.role}] ${m.content}`).join('\n');
469
585
  // OpenAI text-embedding-3-small accepts up to 8192 tokens (~32K chars).
470
586
  // 7000 chars is a safe headroom that survives multibyte expansion.
471
- return { summary: summary.slice(0, 7000), messagesCount: messages.length };
587
+
588
+ // Sprint 51.7 T2: merge transcript-derived metadata so the caller (
589
+ // processStdinPayload → postMemorySession) can populate the
590
+ // memory_sessions.started_at/duration_minutes/facts_extracted fields the
591
+ // v1 hook left NULL/0.
592
+ const metadata = parseTranscriptMetadata(raw);
593
+
594
+ return {
595
+ summary: summary.slice(0, 7000),
596
+ messagesCount: messages.length,
597
+ ...metadata,
598
+ };
472
599
  }
473
600
 
474
601
  async function embedText(text, openaiKey) {
@@ -569,7 +696,15 @@ async function postMemorySession({
569
696
  summary, summaryEmbedding,
570
697
  project, sessionId,
571
698
  transcriptPath, messagesCount,
572
- endedAt
699
+ endedAt,
700
+ // Sprint 51.7 T2 — transcript-derived metadata (closes Sprint 51.6's
701
+ // started_at/duration_minutes/facts_extracted=NULL gap). All optional;
702
+ // null/null/0 fallback preserves the v1 minimum-viable-row shape when the
703
+ // transcript carries no timestamps (e.g. legacy fixtures, pre-CC-2.x
704
+ // payloads, or hand-fed test inputs).
705
+ startedAt = null,
706
+ durationMinutes = null,
707
+ factsExtracted = 0,
573
708
  }) {
574
709
  if (!sessionId) {
575
710
  log('memory-sessions-skip: sessionId missing — cannot satisfy session_id NOT NULL/UNIQUE.');
@@ -595,14 +730,16 @@ async function postMemorySession({
595
730
  ? `[${summaryEmbedding.join(',')}]`
596
731
  : null,
597
732
  project,
733
+ // Sprint 51.7 T2: started_at + duration_minutes + facts_extracted now
734
+ // populated from parseTranscriptMetadata when transcript timestamps
735
+ // are present. files_changed and topics remain unpopulated (would
736
+ // require diff parsing the bundled hook doesn't have; deferred).
737
+ started_at: typeof startedAt === 'string' ? startedAt : null,
598
738
  ended_at: (endedAt instanceof Date ? endedAt : new Date()).toISOString(),
739
+ duration_minutes: typeof durationMinutes === 'number' ? durationMinutes : null,
599
740
  messages_count: typeof messagesCount === 'number' ? messagesCount : 0,
741
+ facts_extracted: typeof factsExtracted === 'number' ? factsExtracted : 0,
600
742
  transcript_path: transcriptPath || null,
601
- // started_at, duration_minutes, facts_extracted, files_changed, topics
602
- // intentionally omitted — column defaults apply on petvetbid; nullable
603
- // on canonical (post-mig-017). Future sprint may parse per-message
604
- // timestamps to derive started_at + duration; v1.0.2 ships the
605
- // minimum viable row.
606
743
  }),
607
744
  });
608
745
  if (!res.ok) {
@@ -668,7 +805,14 @@ async function processStdinPayload(input) {
668
805
 
669
806
  const built = buildSummary(transcriptPath, sessionType);
670
807
  if (!built) return;
671
- const { summary, messagesCount } = built;
808
+ const {
809
+ summary,
810
+ messagesCount,
811
+ startedAt: parsedStartedAt,
812
+ endedAt: parsedEndedAt,
813
+ durationMinutes,
814
+ factsExtracted,
815
+ } = built;
672
816
 
673
817
  const embedding = await embedText(summary, env.openaiKey);
674
818
  if (!embedding) return;
@@ -686,6 +830,13 @@ async function processStdinPayload(input) {
686
830
  // Sprint 51.6 T3: companion memory_sessions write. Independent of the
687
831
  // memory_items write — a memory_items failure shouldn't suppress the
688
832
  // memory_sessions row, and vice versa. Both errors fail-soft.
833
+ //
834
+ // Sprint 51.7 T2: prefer parser-derived `endedAt` (last-message
835
+ // timestamp) over hook-fire-time when the transcript carried timestamps.
836
+ // Matches the rag-system writer's semantics — `ended_at` is "when the
837
+ // conversation last had activity," not "when the SessionEnd hook
838
+ // happened to fire." Falls back to `new Date()` when the parser found
839
+ // no timestamps, preserving v1 behavior.
689
840
  const sessionOk = await postMemorySession({
690
841
  supabaseUrl: env.supabaseUrl,
691
842
  supabaseKey: env.supabaseKey,
@@ -695,11 +846,14 @@ async function processStdinPayload(input) {
695
846
  sessionId,
696
847
  transcriptPath,
697
848
  messagesCount,
698
- endedAt: new Date(),
849
+ endedAt: parsedEndedAt ? new Date(parsedEndedAt) : new Date(),
850
+ startedAt: parsedStartedAt,
851
+ durationMinutes,
852
+ factsExtracted,
699
853
  });
700
854
 
701
855
  if (itemOk || sessionOk) {
702
- log(`ingested: project="${project}" session=${sessionId} bytes=${summary.length} messages=${messagesCount} sessionType=${sessionType} sourceAgent=${normalizeSourceAgent(sourceAgent)} memory_items=${itemOk ? 'ok' : 'fail'} memory_sessions=${sessionOk ? 'ok' : 'fail'}`);
856
+ log(`ingested: project="${project}" session=${sessionId} bytes=${summary.length} messages=${messagesCount} sessionType=${sessionType} sourceAgent=${normalizeSourceAgent(sourceAgent)} startedAt=${parsedStartedAt || 'null'} durationMin=${durationMinutes === null ? 'null' : durationMinutes} factsExtracted=${factsExtracted} memory_items=${itemOk ? 'ok' : 'fail'} memory_sessions=${sessionOk ? 'ok' : 'fail'}`);
703
857
  }
704
858
  }
705
859
 
@@ -736,5 +890,9 @@ if (require.main === module) {
736
890
  // Sprint 50 T2 — source_agent provenance plumbing.
737
891
  normalizeSourceAgent,
738
892
  ALLOWED_SOURCE_AGENTS,
893
+ // Sprint 51.7 T2 — transcript-metadata extractor for memory_sessions.
894
+ parseTranscriptMetadata,
895
+ FACT_TOOL_NAMES,
896
+ extractContentBlocks,
739
897
  };
740
898
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@jhizzard/termdeck-stack",
3
- "version": "0.6.2",
3
+ "version": "0.6.4",
4
4
  "description": "One-command installer for the TermDeck developer memory stack: TermDeck + Mnestra + Rumen + Supabase MCP",
5
5
  "bin": {
6
6
  "termdeck-stack": "./src/index.js"