npm - @loreai/core - Versions diffs - 0.0.1 → 0.10.0 - Mend

@loreai/core 0.0.1 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (147) hide show

package/LICENSE +21 -0
package/README.md +26 -5
package/dist/bun/agents-file.d.ts +59 -0
package/dist/bun/agents-file.d.ts.map +1 -0
package/dist/bun/config.d.ts +58 -0
package/dist/bun/config.d.ts.map +1 -0
package/dist/bun/curator.d.ts +35 -0
package/dist/bun/curator.d.ts.map +1 -0
package/dist/bun/db/driver.bun.d.ts +5 -0
package/dist/bun/db/driver.bun.d.ts.map +1 -0
package/dist/bun/db/driver.node.d.ts +15 -0
package/dist/bun/db/driver.node.d.ts.map +1 -0
package/dist/bun/db.d.ts +22 -0
package/dist/bun/db.d.ts.map +1 -0
package/dist/bun/distillation.d.ts +32 -0
package/dist/bun/distillation.d.ts.map +1 -0
package/dist/bun/embedding.d.ts +90 -0
package/dist/bun/embedding.d.ts.map +1 -0
package/dist/bun/gradient.d.ts +73 -0
package/dist/bun/gradient.d.ts.map +1 -0
package/dist/bun/index.d.ts +19 -0
package/dist/bun/index.d.ts.map +1 -0
package/dist/bun/index.js +28236 -0
package/dist/bun/index.js.map +7 -0
package/dist/bun/lat-reader.d.ts +69 -0
package/dist/bun/lat-reader.d.ts.map +1 -0
package/dist/bun/log.d.ts +17 -0
package/dist/bun/log.d.ts.map +1 -0
package/dist/bun/ltm.d.ts +138 -0
package/dist/bun/ltm.d.ts.map +1 -0
package/dist/bun/markdown.d.ts +37 -0
package/dist/bun/markdown.d.ts.map +1 -0
package/dist/bun/prompt.d.ts +47 -0
package/dist/bun/prompt.d.ts.map +1 -0
package/dist/bun/recall.d.ts +41 -0
package/dist/bun/recall.d.ts.map +1 -0
package/dist/bun/search.d.ts +113 -0
package/dist/bun/search.d.ts.map +1 -0
package/dist/bun/temporal.d.ts +66 -0
package/dist/bun/temporal.d.ts.map +1 -0
package/dist/bun/types.d.ts +180 -0
package/dist/bun/types.d.ts.map +1 -0
package/dist/bun/worker.d.ts +6 -0
package/dist/bun/worker.d.ts.map +1 -0
package/dist/node/agents-file.d.ts +59 -0
package/dist/node/agents-file.d.ts.map +1 -0
package/dist/node/config.d.ts +58 -0
package/dist/node/config.d.ts.map +1 -0
package/dist/node/curator.d.ts +35 -0
package/dist/node/curator.d.ts.map +1 -0
package/dist/node/db/driver.bun.d.ts +5 -0
package/dist/node/db/driver.bun.d.ts.map +1 -0
package/dist/node/db/driver.node.d.ts +15 -0
package/dist/node/db/driver.node.d.ts.map +1 -0
package/dist/node/db.d.ts +22 -0
package/dist/node/db.d.ts.map +1 -0
package/dist/node/distillation.d.ts +32 -0
package/dist/node/distillation.d.ts.map +1 -0
package/dist/node/embedding.d.ts +90 -0
package/dist/node/embedding.d.ts.map +1 -0
package/dist/node/gradient.d.ts +73 -0
package/dist/node/gradient.d.ts.map +1 -0
package/dist/node/index.d.ts +19 -0
package/dist/node/index.d.ts.map +1 -0
package/dist/node/index.js +28253 -0
package/dist/node/index.js.map +7 -0
package/dist/node/lat-reader.d.ts +69 -0
package/dist/node/lat-reader.d.ts.map +1 -0
package/dist/node/log.d.ts +17 -0
package/dist/node/log.d.ts.map +1 -0
package/dist/node/ltm.d.ts +138 -0
package/dist/node/ltm.d.ts.map +1 -0
package/dist/node/markdown.d.ts +37 -0
package/dist/node/markdown.d.ts.map +1 -0
package/dist/node/prompt.d.ts +47 -0
package/dist/node/prompt.d.ts.map +1 -0
package/dist/node/recall.d.ts +41 -0
package/dist/node/recall.d.ts.map +1 -0
package/dist/node/search.d.ts +113 -0
package/dist/node/search.d.ts.map +1 -0
package/dist/node/temporal.d.ts +66 -0
package/dist/node/temporal.d.ts.map +1 -0
package/dist/node/types.d.ts +180 -0
package/dist/node/types.d.ts.map +1 -0
package/dist/node/worker.d.ts +6 -0
package/dist/node/worker.d.ts.map +1 -0
package/dist/types/agents-file.d.ts +59 -0
package/dist/types/agents-file.d.ts.map +1 -0
package/dist/types/config.d.ts +58 -0
package/dist/types/config.d.ts.map +1 -0
package/dist/types/curator.d.ts +35 -0
package/dist/types/curator.d.ts.map +1 -0
package/dist/types/db/driver.bun.d.ts +5 -0
package/dist/types/db/driver.bun.d.ts.map +1 -0
package/dist/types/db/driver.node.d.ts +15 -0
package/dist/types/db/driver.node.d.ts.map +1 -0
package/dist/types/db.d.ts +22 -0
package/dist/types/db.d.ts.map +1 -0
package/dist/types/distillation.d.ts +32 -0
package/dist/types/distillation.d.ts.map +1 -0
package/dist/types/embedding.d.ts +90 -0
package/dist/types/embedding.d.ts.map +1 -0
package/dist/types/gradient.d.ts +73 -0
package/dist/types/gradient.d.ts.map +1 -0
package/dist/types/index.d.ts +19 -0
package/dist/types/index.d.ts.map +1 -0
package/dist/types/lat-reader.d.ts +69 -0
package/dist/types/lat-reader.d.ts.map +1 -0
package/dist/types/log.d.ts +17 -0
package/dist/types/log.d.ts.map +1 -0
package/dist/types/ltm.d.ts +138 -0
package/dist/types/ltm.d.ts.map +1 -0
package/dist/types/markdown.d.ts +37 -0
package/dist/types/markdown.d.ts.map +1 -0
package/dist/types/prompt.d.ts +47 -0
package/dist/types/prompt.d.ts.map +1 -0
package/dist/types/recall.d.ts +41 -0
package/dist/types/recall.d.ts.map +1 -0
package/dist/types/search.d.ts +113 -0
package/dist/types/search.d.ts.map +1 -0
package/dist/types/temporal.d.ts +66 -0
package/dist/types/temporal.d.ts.map +1 -0
package/dist/types/types.d.ts +180 -0
package/dist/types/types.d.ts.map +1 -0
package/dist/types/worker.d.ts +6 -0
package/dist/types/worker.d.ts.map +1 -0
package/package.json +48 -5
package/src/agents-file.ts +406 -0
package/src/config.ts +132 -0
package/src/curator.ts +220 -0
package/src/db/driver.bun.ts +18 -0
package/src/db/driver.node.ts +54 -0
package/src/db.ts +433 -0
package/src/distillation.ts +433 -0
package/src/embedding.ts +528 -0
package/src/gradient.ts +1387 -0
package/src/index.ts +109 -0
package/src/lat-reader.ts +374 -0
package/src/log.ts +27 -0
package/src/ltm.ts +861 -0
package/src/markdown.ts +129 -0
package/src/prompt.ts +454 -0
package/src/recall.ts +446 -0
package/src/search.ts +330 -0
package/src/temporal.ts +379 -0
package/src/types.ts +199 -0
package/src/worker.ts +26 -0

package/src/search.ts ADDED Viewed

@@ -0,0 +1,330 @@
+/**
+ * Centralized FTS5 search utilities for Lore.
+ *
+ * Provides query building, stopword filtering, and (Phase 2+) score fusion.
+ * All FTS5 search callers (ltm, temporal, reflect) import from here.
+ */
+/**
+ * Curated stopword set for FTS5 queries. These are common English words that
+ * match broadly and dilute search precision when used with OR semantics.
+ *
+ * CRITICAL: OR without stopword filtering is catastrophic — "the OR for OR and"
+ * matches every document in the corpus. Stopwords MUST be filtered before
+ * building OR queries.
+ *
+ * This list is intentionally conservative: only includes words that are
+ * genuinely content-free. Domain terms like "handle", "state", "type" are
+ * NOT stopwords — they carry meaning in code/technical contexts.
+ */
+export const STOPWORDS: ReadonlySet<string> = new Set([
+  // Articles & determiners
+  "an",
+  "the",
+  "this",
+  "that",
+  "these",
+  "those",
+  "some",
+  "each",
+  "every",
+  // Pronouns
+  "he",
+  "it",
+  "me",
+  "my",
+  "we",
+  "us",
+  "or",
+  "am",
+  "they",
+  "them",
+  "their",
+  "there",
+  "here",
+  "what",
+  "which",
+  "where",
+  "when",
+  "whom",
+  // Common verbs (content-free)
+  "is",
+  "be",
+  "do",
+  "no",
+  "so",
+  "if",
+  "as",
+  "at",
+  "by",
+  "in",
+  "of",
+  "on",
+  "to",
+  "up",
+  "are",
+  "was",
+  "has",
+  "had",
+  "not",
+  "but",
+  "can",
+  "did",
+  "for",
+  "got",
+  "let",
+  "may",
+  "our",
+  "its",
+  "nor",
+  "yet",
+  "how",
+  "all",
+  "any",
+  "too",
+  "own",
+  "out",
+  "why",
+  "who",
+  "few",
+  "have",
+  "been",
+  "were",
+  "will",
+  "would",
+  "could",
+  "should",
+  "does",
+  "being",
+  "also",
+  // Prepositions & conjunctions
+  "with",
+  "from",
+  "into",
+  "about",
+  "than",
+  "over",
+  "such",
+  "after",
+  "before",
+  "between",
+  // Adverbs (content-free)
+  "just",
+  "only",
+  "very",
+  "more",
+  "most",
+  "really",
+  "already",
+]);
+/**
+ * The sentinel value returned when a query contains no meaningful terms after
+ * filtering. Callers should check for this and return a "query too vague"
+ * message instead of executing an FTS5 MATCH against it.
+ */
+export const EMPTY_QUERY = '""';
+/**
+ * Filter raw query text into meaningful FTS5 tokens.
+ *
+ * Filtering (in order):
+ * 1. Strip non-word chars (punctuation, operators — prevents FTS5 injection)
+ * 2. Remove single-character tokens (contraction artifacts like "s", "t")
+ * 3. Remove stopwords
+ *
+ * If ALL words are filtered, returns an empty array. The caller decides
+ * what to do (typically returns a "query too vague" message).
+ *
+ * No general length filter — short but meaningful tokens like "DB", "CI",
+ * "IO", "PR" are preserved. Only single chars are dropped.
+ */
+export function filterTerms(raw: string): string[] {
+  const words = raw
+    .replace(/[^\w\s]/g, " ")
+    .split(/\s+/)
+    .filter(Boolean);
+  return words.filter(
+    (w) => w.length > 1 && !STOPWORDS.has(w.toLowerCase()),
+  );
+}
+/**
+ * Build an FTS5 MATCH expression using AND semantics (implicit AND via space).
+ *
+ * Returns `""` (match-nothing sentinel) when no meaningful terms remain after
+ * filtering. Callers should check `q === EMPTY_QUERY` and handle accordingly.
+ */
+export function ftsQuery(raw: string): string {
+  const terms = filterTerms(raw);
+  if (!terms.length) return EMPTY_QUERY;
+  return terms.map((w) => `${w}*`).join(" ");
+}
+/**
+ * Build an FTS5 MATCH expression using OR semantics.
+ * Same filtering as ftsQuery(), but joins terms with OR.
+ * Used as fallback when AND returns zero results.
+ */
+export function ftsQueryOr(raw: string): string {
+  const terms = filterTerms(raw);
+  if (!terms.length) return EMPTY_QUERY;
+  return terms.map((w) => `${w}*`).join(" OR ");
+}
+// ---------------------------------------------------------------------------
+// Term extraction (Phase 3)
+// ---------------------------------------------------------------------------
+/**
+ * Extract the top meaningful terms from text, sorted by frequency.
+ *
+ * Same filtering as ftsQuery: drops single chars + stopwords.
+ * No general length threshold — preserves short meaningful tokens like "DB", "CI".
+ *
+ * Used by forSession() to build session context queries for FTS5 scoring.
+ *
+ * @param text   Raw text to extract terms from
+ * @param limit  Max number of terms to return (default 40)
+ */
+export function extractTopTerms(text: string, limit = 40): string[] {
+  const freq = text
+    .replace(/[^\w\s]/g, " ")
+    .toLowerCase()
+    .split(/\s+/)
+    .filter((w) => w.length > 1 && !STOPWORDS.has(w))
+    .reduce<Map<string, number>>((acc, w) => {
+      acc.set(w, (acc.get(w) ?? 0) + 1);
+      return acc;
+    }, new Map());
+  return [...freq.entries()]
+    .sort((a, b) => b[1] - a[1])
+    .slice(0, limit)
+    .map(([w]) => w);
+}
+// ---------------------------------------------------------------------------
+// Score normalization & fusion (Phase 2)
+// ---------------------------------------------------------------------------
+/**
+ * Normalize a raw FTS5 BM25 rank to a 0–1 range using min-max normalization.
+ *
+ * FTS5 rank/bm25() values are negative (more negative = better match).
+ * This converts them to 0–1 where 1 = best match in the result set.
+ *
+ * Used for display scores only — RRF fusion uses rank positions, not scores.
+ */
+export function normalizeRank(
+  rank: number,
+  minRank: number,
+  maxRank: number,
+): number {
+  // All same rank → everything is equally relevant
+  if (minRank === maxRank) return 1;
+  // minRank is most negative (best), maxRank is least negative (worst)
+  // Invert: best match → 1.0, worst → 0.0
+  return (maxRank - rank) / (maxRank - minRank);
+}
+/**
+ * Reciprocal Rank Fusion: merge multiple ranked lists into a single ranked list.
+ *
+ * RRF score = Σ(1 / (k + rank_i)) for each list where the item appears.
+ * k = 60 is standard (from Cormack et al., 2009; also used by QMD).
+ *
+ * RRF is rank-based, not score-based — raw score magnitude differences across
+ * different FTS5 tables don't matter. Only relative ordering within each list.
+ *
+ * @param lists  Each list provides items (in ranked order) and a key function
+ *               for deduplication. Items at the front of the array are rank 0.
+ * @param k      Smoothing constant. Default 60.
+ * @returns      Fused list sorted by RRF score descending. When items appear
+ *               in multiple lists, the first occurrence's item is kept.
+ */
+export function reciprocalRankFusion<T>(
+  lists: Array<{ items: T[]; key: (item: T) => string }>,
+  k = 60,
+): Array<{ item: T; score: number }> {
+  const scores = new Map<string, { item: T; score: number }>();
+  for (const list of lists) {
+    for (let rank = 0; rank < list.items.length; rank++) {
+      const item = list.items[rank];
+      const id = list.key(item);
+      const rrfScore = 1 / (k + rank);
+      const existing = scores.get(id);
+      if (existing) {
+        existing.score += rrfScore;
+      } else {
+        scores.set(id, { item, score: rrfScore });
+      }
+    }
+  }
+  return [...scores.values()].sort((a, b) => b.score - a.score);
+}
+// ---------------------------------------------------------------------------
+// LLM query expansion (Phase 4)
+// ---------------------------------------------------------------------------
+import { QUERY_EXPANSION_SYSTEM } from "./prompt";
+import * as log from "./log";
+import type { LLMClient } from "./types";
+/**
+ * Expand a user query into multiple search variants using the configured LLM.
+ * Returns `[original, ...expanded]`. The original is always first.
+ *
+ * Uses a 3-second timeout — if the LLM is slow, returns only the original query.
+ * Errors are caught silently (logged) and the original query is returned.
+ *
+ * @param llm       LLM client for prompt calls
+ * @param query     The original user query
+ * @param model     Optional model override
+ */
+export async function expandQuery(
+  llm: LLMClient,
+  query: string,
+  model?: { providerID: string; modelID: string },
+): Promise<string[]> {
+  const TIMEOUT_MS = 3000;
+  try {
+    // Race the LLM call against a timeout
+    const responseText = await Promise.race([
+      llm.prompt(
+        QUERY_EXPANSION_SYSTEM,
+        `Input: "${query}"`,
+        { model, workerID: "lore-query-expand" },
+      ),
+      new Promise<null>((resolve) => setTimeout(() => resolve(null), TIMEOUT_MS)),
+    ]);
+    if (!responseText) {
+      log.info("query expansion timed out or failed, using original query");
+      return [query];
+    }
+    // Parse JSON array from response
+    const cleaned = responseText
+      .trim()
+      .replace(/^```json?\s*/i, "")
+      .replace(/\s*```$/i, "");
+    const parsed = JSON.parse(cleaned);
+    if (!Array.isArray(parsed)) return [query];
+    const expanded = parsed.filter(
+      (q): q is string => typeof q === "string" && q.trim().length > 0,
+    );
+    if (!expanded.length) return [query];
+    return [query, ...expanded.slice(0, 3)]; // cap at 3 expansions
+  } catch (err) {
+    log.info("query expansion failed, using original query:", err);
+    return [query];
+  }
+}

package/src/temporal.ts ADDED Viewed

@@ -0,0 +1,379 @@
+import { db, ensureProject } from "./db";
+import { ftsQuery, ftsQueryOr, EMPTY_QUERY } from "./search";
+import { sanitizeSurrogates } from "./markdown";
+import type { LoreMessage, LorePart } from "./types";
+import { isTextPart, isReasoningPart, isToolPart } from "./types";
+// ~3 chars per token — validated as best heuristic against real API data.
+function estimate(text: string): number {
+  return Math.ceil(text.length / 3);
+}
+function partsToText(parts: LorePart[]): string {
+  const chunks: string[] = [];
+  for (const part of parts) {
+    if (isTextPart(part)) chunks.push(part.text);
+    else if (isReasoningPart(part) && part.text)
+      chunks.push(`[reasoning] ${part.text}`);
+    else if (isToolPart(part) && part.state.status === "completed")
+      chunks.push(`[tool:${part.tool}] ${part.state.output}`);
+  }
+  // Sanitize unpaired surrogates from tool outputs and other raw text.
+  // Without this, surrogates survive into the DB and later break JSON
+  // serialization when included in recall tool responses.
+  return sanitizeSurrogates(chunks.join("\n"));
+}
+function messageMetadata(info: LoreMessage, parts: LorePart[]): string {
+  const meta: Record<string, unknown> = {};
+  if (info.role === "user") {
+    meta.agent = info.agent;
+    meta.model = info.model;
+  } else {
+    meta.modelID = info.modelID;
+    meta.providerID = info.providerID;
+    meta.mode = info.mode;
+  }
+  const tools = parts.filter(isToolPart).map((p) => p.tool);
+  if (tools.length) meta.tools = tools;
+  return JSON.stringify(meta);
+}
+export function store(input: {
+  projectPath: string;
+  info: LoreMessage;
+  parts: LorePart[];
+}) {
+  const pid = ensureProject(input.projectPath);
+  const content = partsToText(input.parts);
+  if (!content.trim()) return;
+  const existing = db()
+    .query("SELECT id FROM temporal_messages WHERE id = ?")
+    .get(input.info.id);
+  if (existing) {
+    db()
+      .query(
+        "UPDATE temporal_messages SET content = ?, tokens = ?, metadata = ? WHERE id = ?",
+      )
+      .run(
+        content,
+        estimate(content),
+        messageMetadata(input.info, input.parts),
+        input.info.id,
+      );
+    return;
+  }
+  db()
+    .query(
+      `INSERT INTO temporal_messages (id, project_id, session_id, role, content, tokens, distilled, created_at, metadata)
+       VALUES (?, ?, ?, ?, ?, ?, 0, ?, ?)`,
+    )
+    .run(
+      input.info.id,
+      pid,
+      input.info.sessionID,
+      input.info.role,
+      content,
+      estimate(content),
+      input.info.time.created,
+      messageMetadata(input.info, input.parts),
+    );
+}
+export type TemporalMessage = {
+  id: string;
+  project_id: string;
+  session_id: string;
+  role: string;
+  content: string;
+  tokens: number;
+  distilled: number;
+  created_at: number;
+  metadata: string;
+};
+export function undistilled(
+  projectPath: string,
+  sessionID?: string,
+): TemporalMessage[] {
+  const pid = ensureProject(projectPath);
+  const query = sessionID
+    ? "SELECT * FROM temporal_messages WHERE project_id = ? AND session_id = ? AND distilled = 0 ORDER BY created_at ASC"
+    : "SELECT * FROM temporal_messages WHERE project_id = ? AND distilled = 0 ORDER BY created_at ASC";
+  const params = sessionID ? [pid, sessionID] : [pid];
+  return db()
+    .query(query)
+    .all(...params) as TemporalMessage[];
+}
+export function bySession(
+  projectPath: string,
+  sessionID: string,
+): TemporalMessage[] {
+  const pid = ensureProject(projectPath);
+  return db()
+    .query(
+      "SELECT * FROM temporal_messages WHERE project_id = ? AND session_id = ? ORDER BY created_at ASC",
+    )
+    .all(pid, sessionID) as TemporalMessage[];
+}
+export function markDistilled(ids: string[]) {
+  if (!ids.length) return;
+  const placeholders = ids.map(() => "?").join(",");
+  db()
+    .query(
+      `UPDATE temporal_messages SET distilled = 1 WHERE id IN (${placeholders})`,
+    )
+    .run(...ids);
+}
+// LIKE-based fallback for when FTS5 fails unexpectedly.
+function searchLike(input: {
+  pid: string;
+  query: string;
+  sessionID?: string;
+  limit: number;
+}): TemporalMessage[] {
+  const terms = input.query
+    .toLowerCase()
+    .split(/\s+/)
+    .filter((t) => t.length > 2);
+  if (!terms.length) return [];
+  const conditions = terms.map(() => "LOWER(content) LIKE ?").join(" AND ");
+  const likeParams = terms.map((t) => `%${t}%`);
+  const query = input.sessionID
+    ? `SELECT * FROM temporal_messages WHERE project_id = ? AND session_id = ? AND ${conditions} ORDER BY created_at DESC LIMIT ?`
+    : `SELECT * FROM temporal_messages WHERE project_id = ? AND ${conditions} ORDER BY created_at DESC LIMIT ?`;
+  const params = input.sessionID
+    ? [input.pid, input.sessionID, ...likeParams, input.limit]
+    : [input.pid, ...likeParams, input.limit];
+  return db()
+    .query(query)
+    .all(...params) as TemporalMessage[];
+}
+export function search(input: {
+  projectPath: string;
+  query: string;
+  sessionID?: string;
+  limit?: number;
+}): TemporalMessage[] {
+  const pid = ensureProject(input.projectPath);
+  const limit = input.limit ?? 20;
+  const q = ftsQuery(input.query);
+  if (q === EMPTY_QUERY) return [];
+  const ftsSQL = input.sessionID
+    ? `SELECT m.* FROM temporal_messages m
+       JOIN temporal_fts f ON m.rowid = f.rowid
+       WHERE f.content MATCH ? AND m.project_id = ? AND m.session_id = ?
+       ORDER BY rank LIMIT ?`
+    : `SELECT m.* FROM temporal_messages m
+       JOIN temporal_fts f ON m.rowid = f.rowid
+       WHERE f.content MATCH ? AND m.project_id = ?
+       ORDER BY rank LIMIT ?`;
+  const params = input.sessionID
+    ? [q, pid, input.sessionID, limit]
+    : [q, pid, limit];
+  try {
+    const results = db()
+      .query(ftsSQL)
+      .all(...params) as TemporalMessage[];
+    if (results.length) return results;
+    // AND returned nothing — try OR fallback for broader recall
+    const qOr = ftsQueryOr(input.query);
+    if (qOr === EMPTY_QUERY) return [];
+    const paramsOr = input.sessionID
+      ? [qOr, pid, input.sessionID, limit]
+      : [qOr, pid, limit];
+    return db()
+      .query(ftsSQL)
+      .all(...paramsOr) as TemporalMessage[];
+  } catch {
+    // FTS5 still choked (edge case) — fall back to LIKE search
+    return searchLike({
+      pid,
+      query: input.query,
+      sessionID: input.sessionID,
+      limit,
+    });
+  }
+}
+export type ScoredTemporalMessage = TemporalMessage & { rank: number };
+/**
+ * Search with BM25 scores included. Returns results with raw FTS5 rank values
+ * for use in cross-source score fusion (RRF).
+ */
+export function searchScored(input: {
+  projectPath: string;
+  query: string;
+  sessionID?: string;
+  limit?: number;
+}): ScoredTemporalMessage[] {
+  const pid = ensureProject(input.projectPath);
+  const limit = input.limit ?? 20;
+  const q = ftsQuery(input.query);
+  if (q === EMPTY_QUERY) return [];
+  const ftsSQL = input.sessionID
+    ? `SELECT m.*, rank FROM temporal_messages m
+       JOIN temporal_fts f ON m.rowid = f.rowid
+       WHERE f.content MATCH ? AND m.project_id = ? AND m.session_id = ?
+       ORDER BY rank LIMIT ?`
+    : `SELECT m.*, rank FROM temporal_messages m
+       JOIN temporal_fts f ON m.rowid = f.rowid
+       WHERE f.content MATCH ? AND m.project_id = ?
+       ORDER BY rank LIMIT ?`;
+  const params = input.sessionID
+    ? [q, pid, input.sessionID, limit]
+    : [q, pid, limit];
+  try {
+    const results = db().query(ftsSQL).all(...params) as ScoredTemporalMessage[];
+    if (results.length) return results;
+    const qOr = ftsQueryOr(input.query);
+    if (qOr === EMPTY_QUERY) return [];
+    const paramsOr = input.sessionID
+      ? [qOr, pid, input.sessionID, limit]
+      : [qOr, pid, limit];
+    return db().query(ftsSQL).all(...paramsOr) as ScoredTemporalMessage[];
+  } catch {
+    return [];
+  }
+}
+export function count(projectPath: string, sessionID?: string): number {
+  const pid = ensureProject(projectPath);
+  const query = sessionID
+    ? "SELECT COUNT(*) as count FROM temporal_messages WHERE project_id = ? AND session_id = ?"
+    : "SELECT COUNT(*) as count FROM temporal_messages WHERE project_id = ?";
+  const params = sessionID ? [pid, sessionID] : [pid];
+  return (
+    db()
+      .query(query)
+      .get(...params) as { count: number }
+  ).count;
+}
+export function undistilledCount(
+  projectPath: string,
+  sessionID?: string,
+): number {
+  const pid = ensureProject(projectPath);
+  const query = sessionID
+    ? "SELECT COUNT(*) as count FROM temporal_messages WHERE project_id = ? AND session_id = ? AND distilled = 0"
+    : "SELECT COUNT(*) as count FROM temporal_messages WHERE project_id = ? AND distilled = 0";
+  const params = sessionID ? [pid, sessionID] : [pid];
+  return (
+    db()
+      .query(query)
+      .get(...params) as { count: number }
+  ).count;
+}
+export type PruneResult = {
+  /** Rows deleted by the TTL pass (distilled=1 AND older than retention period). */
+  ttlDeleted: number;
+  /** Rows deleted by the size-cap pass (distilled=1, oldest-first, to get under maxStorage). */
+  capDeleted: number;
+};
+/**
+ * Prune temporal messages for a project using a two-pass Hybrid C strategy:
+ *
+ * Pass 1 — TTL: delete messages where distilled=1 AND created_at is older than
+ * retentionDays. This covers normal operation — both distillation and curation
+ * have had ample time to process anything that old.
+ *
+ * Pass 2 — Size cap: if total temporal storage for the project still exceeds
+ * maxStorageMB, delete the oldest distilled=1 messages (regardless of age)
+ * until under the cap.
+ *
+ * Invariant: undistilled messages (distilled=0) are NEVER deleted by either pass.
+ */
+export function prune(input: {
+  projectPath: string;
+  retentionDays: number;
+  maxStorageMB: number;
+}): PruneResult {
+  const database = db();
+  const pid = ensureProject(input.projectPath);
+  const cutoff = Date.now() - input.retentionDays * 24 * 60 * 60 * 1000;
+  // Pass 1: TTL — delete distilled messages older than the retention window.
+  // Note: result.changes is inflated by FTS trigger side-effects, so we count
+  // eligible rows before deletion to get the accurate number deleted.
+  const ttlEligible = (
+    database
+      .query(
+        "SELECT COUNT(*) as c FROM temporal_messages WHERE project_id = ? AND distilled = 1 AND created_at < ?",
+      )
+      .get(pid, cutoff) as { c: number }
+  ).c;
+  if (ttlEligible > 0) {
+    database
+      .query(
+        "DELETE FROM temporal_messages WHERE project_id = ? AND distilled = 1 AND created_at < ?",
+      )
+      .run(pid, cutoff);
+  }
+  const ttlDeleted = ttlEligible;
+  // Pass 2: Size cap — check if total storage for this project exceeds the
+  // limit and if so, evict the oldest distilled messages until under the cap.
+  const maxBytes = input.maxStorageMB * 1024 * 1024;
+  const totalBytes = (
+    database
+      .query("SELECT SUM(LENGTH(content)) as b FROM temporal_messages WHERE project_id = ?")
+      .get(pid) as { b: number | null }
+  ).b ?? 0;
+  let capDeleted = 0;
+  if (totalBytes > maxBytes) {
+    // Collect oldest distilled messages until we've accounted for enough bytes
+    // to drop below the cap. Delete them in a single batch.
+    const candidates = database
+      .query(
+        "SELECT id, LENGTH(content) as size FROM temporal_messages WHERE project_id = ? AND distilled = 1 ORDER BY created_at ASC",
+      )
+      .all(pid) as { id: string; size: number }[];
+    const toDelete: string[] = [];
+    let freed = 0;
+    const excess = totalBytes - maxBytes;
+    for (const row of candidates) {
+      if (freed >= excess) break;
+      toDelete.push(row.id);
+      freed += row.size;
+    }
+    if (toDelete.length) {
+      const placeholders = toDelete.map(() => "?").join(",");
+      database
+        .query(
+          `DELETE FROM temporal_messages WHERE id IN (${placeholders})`,
+        )
+        .run(...toDelete);
+      // toDelete.length is the accurate count — result.changes is inflated by FTS triggers.
+      capDeleted = toDelete.length;
+    }
+  }
+  // Pass 3: Prune archived distillations older than the retention window.
+  // Archived gen-0 distillations are kept for recall search but don't need
+  // to live forever — they follow the same retention policy as temporal messages.
+  database
+    .query(
+      "DELETE FROM distillations WHERE project_id = ? AND archived = 1 AND created_at < ?",
+    )
+    .run(pid, cutoff);
+  return { ttlDeleted, capDeleted };
+}