npm - @lh8ppl/claude-memory-kit - Versions diffs - 0.2.4 → 0.3.0 - Mend

@lh8ppl/claude-memory-kit 0.2.4 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

package/README.md +13 -10
package/bin/cmk-capture-prompt.mjs +21 -1
package/package.json +2 -1
package/src/auto-extract.mjs +68 -11
package/src/capture-prompt.mjs +33 -1
package/src/capture-turn.mjs +64 -6
package/src/conflict-queue.mjs +20 -3
package/src/forget.mjs +13 -0
package/src/frontmatter.mjs +4 -1
package/src/import-anthropic-memory.mjs +25 -1
package/src/index-db.mjs +39 -0
package/src/index-rebuild.mjs +42 -2
package/src/inject-context.mjs +49 -6
package/src/install.mjs +107 -1
package/src/mcp-server.mjs +57 -7
package/src/merge-facts.mjs +12 -0
package/src/provenance.mjs +4 -0
package/src/result-shapes.mjs +1 -1
package/src/scratchpad.mjs +5 -3
package/src/search.mjs +96 -9
package/src/semantic-backend.mjs +485 -0
package/src/settings-hooks.mjs +4 -1
package/src/subcommands.mjs +92 -16
package/src/transcript-index.mjs +162 -0
package/src/turn-tools.mjs +179 -0
package/template/.claude/skills/memory-search/SKILL.md +86 -0
package/template/CLAUDE.md.template +2 -0

package/src/index-rebuild.mjs CHANGED Viewed

@@ -47,6 +47,7 @@ import { existsSync, readdirSync, readFileSync, statSync } from 'node:fs';
 import { basename, join, relative } from 'node:path';
 import chokidar from 'chokidar';
 import { INDEX_DB_SCHEMA } from './index-db.mjs';
+import { syncTranscriptChunks } from './transcript-index.mjs';
 import { readBullet, parseBulletProvenance } from './provenance.mjs';
 import { parse as parseFrontmatter } from './frontmatter.mjs';
 import {
@@ -145,7 +146,10 @@ export function parseObservationsFromScratchpad({
   projectRoot,
   userDir,
 }) {
-  const lines = content.split('\n');
+  // Task 139 (D-126): CRLF-tolerant read — autocrlf clones rewrite the
+  // committed memory files; a strict-\n split left \r on every line and
+  // the bullet/provenance regexes went blind.
+  const lines = content.split(/\r?\n/);
   const sha1 = sha1OfContent(content);
   const source_file = relativeSource(path, { projectRoot, userDir });
   const baseName = basename(path);
@@ -435,6 +439,12 @@ export function reindexBoot({ projectRoot, userDir, db, now }) {
     });
     const knownPaths = db.prepare('SELECT path FROM files').all();
     for (const { path: relPath } of knownPaths) {
+      // Task 104.2 composition guard: 'transcript:'-prefixed checkpoints
+      // belong to the transcript scope (transcript-index.mjs) — they are
+      // never in the observation live-set and pruning them here would
+      // defeat that scope's checkpoint on every boot. Its own sync prunes
+      // its own orphans.
+      if (relPath.startsWith('transcript:')) continue;
       if (liveRelPaths.has(relPath)) continue;
       const obsCount = db
         .prepare('SELECT COUNT(*) AS n FROM observations WHERE source_file = ?')
@@ -443,12 +453,24 @@ export function reindexBoot({ projectRoot, userDir, db, now }) {
     }
   }
+  // Task 104.2 — sync the transcript scope (the L3 raw tier) in the same
+  // boot pass. Cheap: per-file sha1 checkpoint; best-effort — a transcript
+  // sync hiccup must not fail the observation reindex.
+  let transcripts = { files: 0, chunks: 0 };
+  try {
+    transcripts = syncTranscriptChunks({ db, projectRoot, now: ts });
+  } catch {
+    // best-effort; the next boot retries
+  }
   return {
     filesScanned,
     filesReindexed,
     observationsAffected,
     filesPruned,
     observationsPruned,
+    transcriptFiles: transcripts.files,
+    transcriptChunks: transcripts.chunks,
     durationMs: Date.now() - t0,
     skipped,
   };
@@ -464,13 +486,20 @@ export function reindexBoot({ projectRoot, userDir, db, now }) {
 export function reindexFull({ projectRoot, userDir, db, now }) {
   const t0 = Date.now();
   const ts = now ?? t0;
-  // Drop + recreate (faster than per-row DELETE).
+  // Drop + recreate (faster than per-row DELETE). Task 104.2: the transcript
+  // scope drops + rebuilds with everything else — `files` carries its
+  // checkpoints, so a full reindex must re-chunk from scratch too.
   db.exec(`
     DROP TABLE IF EXISTS observations_fts;
     DROP TRIGGER IF EXISTS obs_after_insert;
     DROP TRIGGER IF EXISTS obs_after_update;
     DROP TRIGGER IF EXISTS obs_after_delete;
     DROP TABLE IF EXISTS observations;
+    DROP TABLE IF EXISTS transcript_chunks_fts;
+    DROP TRIGGER IF EXISTS tch_after_insert;
+    DROP TRIGGER IF EXISTS tch_after_update;
+    DROP TRIGGER IF EXISTS tch_after_delete;
+    DROP TABLE IF EXISTS transcript_chunks;
     DROP TABLE IF EXISTS files;
   `);
   db.exec(INDEX_DB_SCHEMA);
@@ -514,9 +543,20 @@ export function reindexFull({ projectRoot, userDir, db, now }) {
     observationsAffected += txn(source, sha1);
   }
+  // Task 104.2 — rebuild the transcript scope from scratch (its tables were
+  // dropped above). Best-effort, same contract as the boot-path sync.
+  let transcripts = { files: 0, chunks: 0 };
+  try {
+    transcripts = syncTranscriptChunks({ db, projectRoot, now: ts });
+  } catch {
+    // best-effort; the next reindex retries
+  }
   return {
     filesScanned,
     observationsAffected,
+    transcriptFiles: transcripts.files,
+    transcriptChunks: transcripts.chunks,
     durationMs: Date.now() - t0,
     skipped,
   };

package/src/inject-context.mjs CHANGED Viewed

@@ -61,6 +61,33 @@ function trustLabel(rank) {
 const DEFAULT_CAP_BYTES = 13_000;
 const HOOK_EVENT_NAME = 'SessionStart';
+// Task 75.0 (D-64 / memory-os Layer-07 "Ground Truth", D-73 near-verbatim):
+// injecting memory is insufficient — the agent must be TOLD the injected
+// context is authoritative, or it re-derives from code what the snapshot
+// already answers (the D-40 cold-open failure). This preamble leads every
+// non-empty snapshot. It is code-generated (not template-scaffolded) on
+// purpose: always present, never consolidated/evicted/graduated, and
+// existing installs pick it up on upgrade with no re-scaffold (avoids the
+// Task-73 stale-template class).
+//
+// §7.1 composition: the preamble + its 2 joining newlines must fit the
+// 725-byte slack between Σ TIER_BUDGETS (12,275) and DEFAULT_CAP_BYTES
+// (13,000) — worst case 12,275 + len + 2 ≤ 13,000, i.e. len ≤ 723. The
+// boundary test pins len ≤ 700. injectContext also subtracts the reserve
+// from the cap handed to enforceCap, so custom capBytes stay honored.
+export const AUTHORITATIVE_MEMORY_PREAMBLE = [
+  '# Injected memory — AUTHORITATIVE (claude-memory-kit)',
+  '',
+  'Ground-truth ranking: (1) terminal/tool output → live system state;',
+  '(2) THIS snapshot + `cmk search` → documented knowledge & prior decisions;',
+  '(3) official docs → version-specifics; (4) training knowledge → verify against 1-3.',
+  '',
+  'When injected memory contradicts your assumptions, injected memory wins.',
+  'Lead with memory — never re-derive from code what it already answers, and',
+  'never treat a question as novel when the answer is already in your prompt.',
+  'This snapshot is a bounded hot index; `cmk search "<topic>"` reaches the facts not shown here.',
+].join('\n');
 // Match any line containing a `(P-XXXXXXXX)`-shaped citation id. Looser
 // than ID_PATTERN on purpose — alphabet-validation is the writer's job;
 // here we just want to recognize "any line that LOOKS like it carries a
@@ -520,7 +547,12 @@ function truncateTierToBudget(blockText, budget, valueById = new Map()) {
 // lowest-priority tier wholesale, logged as a dropped_tiers event.
 // This shouldn't fire under the documented budget table (1500+4500+
 // 4000 = 10000 ≤ 10240 default cap), but the safety net is cheap.
-function enforceCap(orderedBlocks, capBytes, ts) {
+// `reportCapBytes` (Task 75.0): the CALLER-facing cap for Door-4 events.
+// injectContext hands enforceCap a cap reduced by the preamble reserve;
+// truncation.log must still report the capBytes the user configured, not
+// the internal effective value, or the log reads as nonsense (411 when
+// the user set 1024).
+function enforceCap(orderedBlocks, capBytes, ts, reportCapBytes = capBytes) {
   const tierEvents = [];
   // Step 1: per-tier budget enforcement (section-granular).
   for (const block of orderedBlocks) {
@@ -559,7 +591,7 @@ function enforceCap(orderedBlocks, capBytes, ts) {
     bytes -= Buffer.byteLength(dropped.text, 'utf8');
     let event = dropEvents[dropEvents.length - 1];
     if (!event) {
-      event = { ts, capBytes, dropped_tiers: [] };
+      event = { ts, capBytes: reportCapBytes, dropped_tiers: [] };
       dropEvents.push(event);
     }
     event.dropped_tiers.push(dropped.tier);
@@ -707,15 +739,26 @@ export function injectContext({
   }
   // 3. Cap enforcement: drop whole tier blocks from the tail until within
-  // capBytes. Each drop emits one truncation event.
+  // capBytes. Each drop emits one truncation event. The authoritative-memory
+  // preamble (Task 75.0) is reserved out of the cap up front so the final
+  // snapshot (preamble + blocks) still honors capBytes exactly.
+  const preambleReserve =
+    rawBlocks.length > 0
+      ? Buffer.byteLength(AUTHORITATIVE_MEMORY_PREAMBLE, 'utf8') + 2
+      : 0;
   const { blocks: keptBlocks, truncationEvents } = enforceCap(
     rawBlocks,
-    cap,
+    Math.max(0, cap - preambleReserve),
     ts,
+    cap,
   );
-  // 4. Concatenate.
-  const snapshot = keptBlocks.map((b) => b.text).join('\n');
+  // 4. Concatenate. The preamble leads every non-empty snapshot; an empty
+  // snapshot stays empty (don't claim authoritative memory with nothing
+  // behind it).
+  const body = keptBlocks.map((b) => b.text).join('\n');
+  const snapshot =
+    body === '' ? '' : `${AUTHORITATIVE_MEMORY_PREAMBLE}\n\n${body}`;
   // 5. Persist side-effect logs under <projectRoot>/context/.locks/. We
   // only write the project-tier .locks file (which is the well-known

package/src/install.mjs CHANGED Viewed

@@ -39,6 +39,7 @@ import {
   writeFileSync,
 } from 'node:fs';
 import { homedir } from 'node:os';
+import { spawnSync } from 'node:child_process';
 import { basename, dirname, join, relative, resolve } from 'node:path';
 import { fileURLToPath } from 'node:url';
 import { injectClaudeMdBlock } from './claude-md.mjs';
@@ -411,7 +412,112 @@ export async function install(options = {}) {
     }
   }
-  return { projectRoot, userTier, created, skipped, gitignore, claudeMd, hooks, mcpServer, errors };
+  // Task 46 — semantic-recall opt-in/out. `--with-semantic`: install the
+  // optional embedder (~260 MB once, fully local), flip the project's
+  // default search mode to hybrid, and pre-warm the model so the one-time
+  // download happens NOW, not as a surprise on the first search.
+  // `--no-semantic`: pin keyword explicitly. Neither flag → settings
+  // untouched (keyword by absence). The npm spawn is injectable
+  // (options.spawnNpm) so tests assert the argv without touching the host.
+  // Both flags together → withSemantic wins (the affirmative opt-in beats
+  // the pin-off; checked first below).
+  let semantic = { action: 'skipped' };
+  if (options.withSemantic) {
+    semantic = await enableSemantic({ projectRoot, spawnNpm: options.spawnNpm, warm: options.warmEmbedder });
+    if (semantic.action === 'error') errors.push({ path: 'semantic', error: semantic.error });
+  } else if (options.noSemantic) {
+    const r = mergeProjectSettings(projectRoot, { search: { default_mode: 'keyword' } });
+    semantic = r.ok
+      ? { action: 'disabled', path: r.path }
+      : { action: 'error', error: r.error };
+    if (!r.ok) errors.push({ path: r.path, error: r.error });
+  }
+  return { projectRoot, userTier, created, skipped, gitignore, claudeMd, hooks, mcpServer, semantic, errors };
+}
+/**
+ * Read-merge-write <projectRoot>/context/settings.json, preserving every
+ * key the user already has (over-mutation-safe; deep-merges one level).
+ */
+export function mergeProjectSettings(projectRoot, patch) {
+  const path = join(projectRoot, 'context', 'settings.json');
+  try {
+    let current = {};
+    if (existsSync(path)) {
+      current = JSON.parse(readFileSync(path, 'utf8'));
+    }
+    const next = { ...current };
+    for (const [key, value] of Object.entries(patch)) {
+      next[key] =
+        value && typeof value === 'object' && !Array.isArray(value)
+          ? { ...(current[key] ?? {}), ...value }
+          : value;
+    }
+    mkdirSync(dirname(path), { recursive: true });
+    writeFileSync(path, JSON.stringify(next, null, 2) + '\n', 'utf8');
+    return { ok: true, path };
+  } catch (err) {
+    return { ok: false, path, error: err?.message ?? String(err) };
+  }
+}
+/**
+ * The production npm-spawn closure, as an injectable-seam factory
+ * (Task 125.4) so its argv/shell/timeout contract is testable without
+ * running a real `npm install -g` (which stays a machine-level step
+ * tests must never take).
+ */
+export function buildDefaultNpmRunner({ spawnSyncImpl = spawnSync } = {}) {
+  return () => {
+    // One constant command string under shell:true (no user input — and
+    // an args array + shell:true trips Node's DEP0190). npm is npm.cmd
+    // on Windows; the shell resolves it cross-platform.
+    const r = spawnSyncImpl('npm install -g @huggingface/transformers', {
+      encoding: 'utf8',
+      stdio: 'inherit',
+      shell: true,
+      // spawn-discipline (design §8.5): a hung registry shouldn't hang
+      // install forever; 10 min covers the ~46 MB package on slow links.
+      timeout: 600_000,
+    });
+    return { status: r.status, error: r.error?.message };
+  };
+}
+async function enableSemantic({ projectRoot, spawnNpm, warm }) {
+  // 1. Install the optional embedder globally (it resolves as a sibling of
+  // the globally-installed kit). Injectable for tests.
+  const runNpm = spawnNpm ?? buildDefaultNpmRunner();
+  const npm = runNpm();
+  if (npm.status !== 0) {
+    return {
+      action: 'error',
+      error: `npm install -g @huggingface/transformers failed (${npm.error ?? `exit ${npm.status}`}) — semantic recall NOT enabled; keyword search is unaffected`,
+    };
+  }
+  // 2. Flip the project default to hybrid ONLY after the dependency landed
+  // (no half-state: a hybrid default without an embedder would degrade
+  // every search to a fallback warning).
+  const settings = mergeProjectSettings(projectRoot, { search: { default_mode: 'hybrid' } });
+  if (!settings.ok) {
+    return { action: 'error', error: settings.error };
+  }
+  // 3. Pre-warm (best-effort): the one-time model download happens during
+  // install, not on the first search. Injectable for tests.
+  let warmed = { ok: false, reason: 'skipped' };
+  try {
+    const warmFn =
+      warm ??
+      (async () => {
+        const { warmEmbedder } = await import('./semantic-backend.mjs');
+        return warmEmbedder();
+      });
+    warmed = await warmFn();
+  } catch (err) {
+    warmed = { ok: false, reason: err?.message ?? String(err) };
+  }
+  return { action: 'enabled', path: settings.path, defaultMode: 'hybrid', warmed };
 }
 /**

package/src/mcp-server.mjs CHANGED Viewed

@@ -106,16 +106,60 @@ export function validatePath(p, { projectRoot, userDir }) {
 // --- Tool handlers ----------------------------------------------------
-function makeMkSearch({ db, semanticBackend }) {
-  return async ({ query, mode, tier, since, limit, min_trust }) => {
+function makeMkSearch({ db, semanticBackend, projectRoot }) {
+  return async ({ query, mode, scope, tier, since, limit, min_trust }) => {
+    // Task 46: explicit mode wins; otherwise the project's configured
+    // default (search.default_mode — set by `cmk install --with-semantic`).
+    const { prepareSemanticBackend, resolveDefaultSearchMode } = await import(
+      './semantic-backend.mjs'
+    );
+    let wantMode =
+      mode ??
+      (projectRoot ? resolveDefaultSearchMode({ projectRoot }) : SEARCH_MODES.KEYWORD);
+    // Task 65: when the caller asks for semantic/hybrid and no test seam is
+    // injected, prepare the REAL embedded backend (lazy-optional — an absent
+    // embedder degrades to the actionable error below; keyword unaffected).
+    let backend = semanticBackend;
+    let degradedNote = null;
+    if (
+      backend === undefined &&
+      (wantMode === SEARCH_MODES.SEMANTIC || wantMode === SEARCH_MODES.HYBRID)
+    ) {
+      const prep = await prepareSemanticBackend({ db, query, scope: scope ?? 'facts' });
+      if (!prep.ok && mode) {
+        // Explicitly requested — surface the actionable error.
+        return {
+          content: [
+            {
+              type: 'text',
+              text: `error: semantic backend unavailable (${prep.reason}). ${prep.hint ?? 'Use mode "keyword".'}`,
+            },
+          ],
+          isError: true,
+        };
+      }
+      if (!prep.ok) {
+        // Configured default can't run — degrade gracefully to keyword,
+        // but NOT silently (Task 125.1, the user's call reversing the
+        // Task-46 review skip): the note below tells the model what it
+        // got, so it can relay the fix to the user.
+        wantMode = SEARCH_MODES.KEYWORD;
+        degradedNote =
+          `note: this project's configured default search is semantic (hybrid), but the embedder is unavailable (${prep.reason}) — these are keyword-only results. ` +
+          'Suggest the user run `cmk install --with-semantic` to restore semantic recall.';
+      } else {
+        backend = prep.backend;
+      }
+    }
     const r = search({
       db, query,
-      mode: mode ?? SEARCH_MODES.KEYWORD,
+      mode: wantMode,
+      scope,
       tier,
       since,
       limit,
       minTrust: min_trust,
-      semanticBackend,
+      semanticBackend: backend,
     });
     if (r.action === 'error') {
       return {
@@ -124,7 +168,12 @@ function makeMkSearch({ db, semanticBackend }) {
       };
     }
     return {
-      content: [{ type: 'text', text: JSON.stringify(r.results, null, 2) }],
+      content: [
+        { type: 'text', text: JSON.stringify(r.results, null, 2) },
+        // Results stay content[0] (shape-compatible); the degradation note,
+        // when present, rides as a second block.
+        ...(degradedNote ? [{ type: 'text', text: degradedNote }] : []),
+      ],
     };
   };
 }
@@ -505,17 +554,18 @@ export function buildMcpServer({ projectRoot, userDir, db, semanticBackend }) {
   server.registerTool(
     'mk_search',
     {
-      description: 'Search kit memory (FTS5 keyword by default; semantic + hybrid require the Layer-5b semantic backend, not yet shipped).',
+      description: 'Search kit memory. FTS5 keyword by default; semantic + hybrid use the embedded Layer-5b backend (sqlite-vec + a local ONNX embedder — needs the optional @huggingface/transformers install).',
       inputSchema: {
         query: z.string().min(1).describe('search query'),
         mode: z.enum(['keyword', 'semantic', 'hybrid']).optional(),
+        scope: z.enum(['facts', 'transcripts']).optional().describe("'facts' (default) = curated memory; 'transcripts' = the raw session record — the LAST-RESORT recall tier, search it only when curated memory has no answer"),
         tier: z.enum(['U', 'P', 'L']).optional(),
         since: z.string().optional().describe('ISO 8601 timestamp'),
         limit: z.number().int().positive().max(1000).optional(),
         min_trust: z.enum(['low', 'medium', 'high']).optional(),
       },
     },
-    makeMkSearch({ db, semanticBackend }),
+    makeMkSearch({ db, semanticBackend, projectRoot }),
   );
   // mk_get

package/src/merge-facts.mjs CHANGED Viewed

@@ -26,6 +26,7 @@ import { parse, format } from './frontmatter.mjs';
 import { appendAuditEntry, nowIso, REASON_CODES } from './audit-log.mjs';
 import { ERROR_CATEGORIES, errorResult, notFoundResult } from './result-shapes.mjs';
 import { writeFact } from './write-fact.mjs';
+import { reindex } from './reindex.mjs';
 function listLiveFactFiles(factDir) {
   if (!existsSync(factDir)) return [];
@@ -193,6 +194,17 @@ export function mergeFacts(opts = {}) {
   const supersededA = moveToSuperseded(matchA, writeResult.id);
   const supersededB = moveToSuperseded(matchB, writeResult.id);
+  // Task 124 (the D-112 class): writeFact refreshed INDEX.md when C was
+  // created — but A and B left the fact dir AFTER that, so the index kept
+  // both as dangling lines until a manual `cmk reindex`. The writer owns
+  // the derived view on the removal side too. Best-effort, same contract
+  // as writeFact's: the merge is already durable on disk.
+  try {
+    reindex({ tier, projectRoot, userDir, warn: () => {} });
+  } catch {
+    // index rebuild is best-effort; the merge already succeeded
+  }
   const ts = now ?? nowIso();
   appendAuditEntry(tierRoot, {
     ts,

package/src/provenance.mjs CHANGED Viewed

@@ -44,6 +44,10 @@ const VALID_WRITE_SOURCES = new Set([
   'compressor',
   'manual-edit',
   'imported',
+  // Task 138 review finding: the conflict-queue merge-both action writes a
+  // merged bullet to the scratchpad; its provenance needs a valid write key
+  // (the old hand-rolled comment had none and broke reindex - D-125 class).
+  'merged',
 ]);
 const REQUIRED_PROVENANCE_FIELDS = [
   'source',

package/src/result-shapes.mjs CHANGED Viewed

@@ -104,7 +104,7 @@ export const ERROR_CATEGORIES = Object.freeze({
   POISON_GUARD: 'poison_guard',
   // `cmk search` requested --mode=semantic or --mode=hybrid but the
-  // Layer-5b semantic backend is not yet shipped (Task 30, design
+  // Layer-5b semantic backend's optional embedder is not installed (Task 30/65, design
   // §9.3). Pairs with `process.exitCode = 2` in subcommands.mjs per
   // tasks.md 30.2's explicit "exit 2 when unavailable" contract.
   // NO silent fallback to keyword — the user asked for semantic,

package/src/scratchpad.mjs CHANGED Viewed

@@ -180,7 +180,9 @@ function findSectionRange(lines, sectionTitle) {
 }
 function insertIntoSection(text, sectionTitle, bullet) {
-  const lines = text.split('\n');
+  // Task 139 (D-126): CRLF-tolerant read; the join below re-emits \n,
+  // so a CRLF-converted scratchpad self-heals on the next write.
+  const lines = text.split(/\r?\n/);
   const range = findSectionRange(lines, sectionTitle);
   if (!range) return null;
   // Insert before the next `## ` heading; skip trailing blank lines so the
@@ -208,7 +210,7 @@ function insertIntoSection(text, sectionTitle, bullet) {
 export function ensureSectionExists(scratchpadPath, sectionTitle) {
   if (!existsSync(scratchpadPath)) return { created: false, error: 'no-file' };
   const text = readFileSync(scratchpadPath, 'utf8');
-  if (findSectionRange(text.split('\n'), sectionTitle)) return { created: false };
+  if (findSectionRange(text.split(/\r?\n/), sectionTitle)) return { created: false }; // Task 139: CRLF-tolerant
   const body = text.trimEnd(); // drop trailing whitespace/blank lines (no `\s+$` regex — trips ReDoS heuristics)
   // No leading blank lines for an empty/whitespace-only file (the scaffolded
   // scratchpads are never empty, but keep the output clean if one ever is).
@@ -220,7 +222,7 @@ export function ensureSectionExists(scratchpadPath, sectionTitle) {
 const EVICTED_ID_RE = /^- \(([PUL]-[A-Za-z0-9]+)\)/;
 function consolidate(text, { nowDate }) {
-  const lines = text.split('\n');
+  const lines = text.split(/\r?\n/); // Task 139: CRLF-tolerant
   const removeIdx = new Set();
   const evicted = [];
   const staleCutoff = new Date(nowDate.getTime() - STALE_AFTER_DAYS * 24 * 60 * 60 * 1000);

package/src/search.mjs CHANGED Viewed

@@ -11,7 +11,7 @@
 //             ~100ms for 10k bullets. Always available — the keyword
 //             backend ships in v0.1.0 with no extra install.
 //
-//   semantic  the Layer-5b semantic backend (not yet shipped — the embedded
+//   semantic  the Layer-5b semantic backend (Task 65: sqlite-vec + local ONNX embedder; the embedded
 //             vector backend is a future release; the DI seam below is the
 //             drop-in point). Until then this mode errors with
 //             ERROR_CATEGORIES.SEMANTIC_UNAVAILABLE when the caller
@@ -54,6 +54,15 @@ export const SEARCH_MODES = Object.freeze({
 export const DEFAULT_LIMIT = 20;
 const MAX_LIMIT = 1000;
+// Task 104.2 (D-117) — search scopes. 'facts' = the curated observation
+// index (L1, the default). 'transcripts' = the SEPARATE raw-transcript
+// chunk index (the L3 last-resort tier) — reached ONLY when explicitly
+// asked, so raw history never pollutes curated results.
+export const SEARCH_SCOPES = Object.freeze({
+  FACTS: 'facts',
+  TRANSCRIPTS: 'transcripts',
+});
 const TRUST_ORDINAL = Object.freeze({
   low: 1,
   medium: 2,
@@ -107,7 +116,24 @@ function validateInput(opts) {
       errors.push(`limit: must be a positive integer ≤ ${MAX_LIMIT}`);
     }
   }
-  return { errors, mode };
+  const scope = opts.scope ?? SEARCH_SCOPES.FACTS;
+  if (scope !== SEARCH_SCOPES.FACTS && scope !== SEARCH_SCOPES.TRANSCRIPTS) {
+    errors.push(`scope: must be one of facts/transcripts (got ${JSON.stringify(scope)})`);
+  }
+  if (scope === SEARCH_SCOPES.TRANSCRIPTS) {
+    // Chunks carry no tier/trust/created_at — rejecting these is more honest
+    // than silently ignoring them (the explicit-vs-configured asymmetry rule).
+    for (const [key, label] of [
+      ['tier', 'tier'],
+      ['minTrust', 'minTrust'],
+      ['since', 'since'],
+    ]) {
+      if (opts[key] !== undefined) {
+        errors.push(`${label}: not supported under the transcripts scope (raw chunks carry no ${label})`);
+      }
+    }
+  }
+  return { errors, mode, scope };
 }
 // --- Keyword (FTS5 BM25) backend --------------------------------------
@@ -211,6 +237,60 @@ function runKeywordSearch(db, opts) {
   }));
 }
+// --- Transcript-scope keyword backend (Task 104.2, the L3 raw tier) ----
+const TRANSCRIPT_KEYWORD_SQL = `
+SELECT
+  t.source_file AS source_file,
+  t.source_line AS source_line,
+  t.heading AS heading,
+  transcript_chunks_fts.rank AS score,
+  snippet(transcript_chunks_fts, 0, '<b>', '</b>', '...', 16) AS snippet
+FROM transcript_chunks_fts
+JOIN transcript_chunks t ON t.rowid = transcript_chunks_fts.rowid
+WHERE transcript_chunks_fts MATCH @query
+ORDER BY transcript_chunks_fts.rank
+LIMIT @limit
+`;
+// Synthetic, readable id for a raw chunk (chunks are locations, not curated
+// facts — no [PUL]-XXXXXXXX identity). Also the RRF fusion key in hybrid
+// mode and the drill-back handle the memory-search skill surfaces.
+function transcriptHitId(row) {
+  return `T:${row.source_file}:${row.source_line}`;
+}
+function runTranscriptKeywordSearch(db, opts) {
+  let rows;
+  try {
+    rows = db
+      .prepare(TRANSCRIPT_KEYWORD_SQL)
+      .all({ query: opts.query, limit: opts.limit ?? DEFAULT_LIMIT });
+  } catch (err) {
+    if (err?.code === 'SQLITE_ERROR' || /fts5:|no such column:/i.test(err?.message ?? '')) {
+      throw new FTS5ParseError(err, opts.query);
+    }
+    throw err;
+  }
+  return rows.map((r) => ({
+    id: transcriptHitId(r),
+    // Raw turns contain newlines (dialogue + Tools blocks) — flatten so the
+    // one-line-per-hit output contract holds across scopes.
+    snippet: flattenSnippet(r.snippet),
+    source_file: r.source_file,
+    source_line: r.source_line,
+    heading: r.heading,
+    score: r.score,
+  }));
+}
+const TRANSCRIPT_SNIPPET_MAX = 240;
+function flattenSnippet(s) {
+  const flat = String(s ?? '').replace(/\s+/g, ' ').trim();
+  return flat.length > TRANSCRIPT_SNIPPET_MAX ? flat.slice(0, TRANSCRIPT_SNIPPET_MAX) + '…' : flat;
+}
 // --- Reciprocal-rank fusion (hybrid mode) -----------------------------
 /**
@@ -255,10 +335,15 @@ export function reciprocalRankFusion({
 // --- Public boundary --------------------------------------------------
 export function search(opts = {}) {
-  const { errors, mode } = validateInput(opts);
+  const { errors, mode, scope } = validateInput(opts);
   if (errors.length > 0) {
     return errorResult({ category: ERROR_CATEGORIES.SCHEMA, errors });
   }
+  // Scope dispatch (Task 104.2): the transcripts scope swaps the keyword
+  // backend; semantic/hybrid use the caller-prepared backend exactly like
+  // the facts scope (prepareSemanticBackend({scope}) embeds the right table).
+  const keywordBackend =
+    scope === SEARCH_SCOPES.TRANSCRIPTS ? runTranscriptKeywordSearch : runKeywordSearch;
   // Semantic + hybrid require an injected backend. Production v0.1.0
   // passes undefined → error with the not-yet-shipped hint. A future
@@ -268,7 +353,8 @@ export function search(opts = {}) {
       return errorResult({
         category: ERROR_CATEGORIES.SEMANTIC_UNAVAILABLE,
         errors: [
-          'the Layer-5b semantic backend is not yet shipped — semantic/hybrid search will land in a future release. ' +
+          'no semantic backend provided — semantic/hybrid need the embedded Layer-5b backend prepared by the caller ' +
+            '(the CLI/MCP do this automatically when the optional @huggingface/transformers embedder is installed). ' +
             'Use --mode=keyword for the always-available FTS5 search.',
         ],
       });
@@ -278,15 +364,16 @@ export function search(opts = {}) {
   let results;
   try {
     if (mode === SEARCH_MODES.KEYWORD) {
-      results = runKeywordSearch(opts.db, opts);
+      results = keywordBackend(opts.db, opts);
     } else if (mode === SEARCH_MODES.SEMANTIC) {
       // The semantic backend is an injected callable returning the same
-      // shape as runKeywordSearch (array of {id, snippet, source_file,
-      // source_line, tier, trust, score}).
+      // shape as the scope's keyword backend (facts: {id, snippet,
+      // source_file, source_line, tier, trust, score}; transcripts: the
+      // synthetic-T:-id shape without tier/trust).
       results = opts.semanticBackend(opts);
     } else {
       // hybrid: run both backends + fuse.
-      const keywordResults = runKeywordSearch(opts.db, opts);
+      const keywordResults = keywordBackend(opts.db, opts);
       const semanticResults = opts.semanticBackend(opts);
       const fused = reciprocalRankFusion({
         keywordResults,
@@ -308,5 +395,5 @@ export function search(opts = {}) {
     throw err;
   }
-  return { action: 'found', mode, results };
+  return { action: 'found', mode, scope, results };
 }