npm - clementine-agent - Versions diffs - 1.18.29 → 1.18.30 - Mend

clementine-agent 1.18.29 → 1.18.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/dist/gateway/context-policy.d.ts +10 -0
package/dist/gateway/context-policy.js +25 -1
package/dist/gateway/entity-registry.d.ts +47 -0
package/dist/gateway/entity-registry.js +92 -0
package/dist/gateway/router.js +18 -1
package/dist/memory/store.d.ts +19 -0
package/dist/memory/store.js +87 -0
package/package.json +1 -1

package/dist/gateway/context-policy.d.ts CHANGED Viewed

@@ -7,6 +7,7 @@
  * replies into status dumps.
  */
 import type { ActiveContextItem, ActiveContextSnapshot } from './active-context.js';
+import type { EntityMatch } from './entity-registry.js';
 export type ContextTurnIntent = 'greeting' | 'ack' | 'status' | 'repair_request' | 'followup' | 'memory_correction' | 'work_request' | 'general_chat';
 export type RequiredRetrieval = 'none' | 'event' | 'transcript';
 export interface ContextPolicyDecision {
@@ -17,10 +18,19 @@ export interface ContextPolicyDecision {
     requiredRetrieval: RequiredRetrieval;
     retrievalQueries: string[];
     debugReasons: string[];
+    triggeredEntities: EntityMatch[];
 }
 export interface ContextPolicyInput {
     text: string;
     activeContext?: ActiveContextSnapshot | null;
+    /**
+     * Pre-computed entity matches against the registry. Caller looks these
+     * up via entity-registry.findEntitiesInText so the policy module stays
+     * free of the store dependency. When matches arrive, the policy elevates
+     * recall to 'transcript' on non-trivial intents and seeds entity names
+     * into the retrieval queries.
+     */
+    entityMatches?: EntityMatch[];
 }
 export declare function looksLikeVagueContextReference(text: string): boolean;
 export declare function classifyContextTurn(text: string): ContextTurnIntent;

package/dist/gateway/context-policy.js CHANGED Viewed

@@ -88,6 +88,7 @@ function buildRetrievalQueries(intent, text, activeContext) {
 export function decideContextPolicy(input) {
     const intent = classifyContextTurn(input.text);
     const activeContext = input.activeContext ?? null;
+    const entityMatches = input.entityMatches ?? [];
     const debugReasons = [`intent:${intent}`];
     const proactiveSurface = (activeContext?.items ?? [])
         .filter((item) => item.greetingEligible && !item.alreadySurfaced && !item.resolved)
@@ -98,8 +99,21 @@ export function decideContextPolicy(input) {
     if (intent === 'repair_request' || intent === 'followup' || intent === 'memory_correction') {
         requiredRetrieval = 'transcript';
     }
+    // Entity-driven proactive recall: a known topic in the user's turn is a
+    // strong enough signal to pre-fetch related history without waiting for
+    // a vague-repair phrase. Skip on greeting/ack so a passing entity mention
+    // ("hey, how's the dashboard?") doesn't pull a wall of context into a
+    // friendly hello.
+    const elevatedByEntity = entityMatches.length > 0 && intent !== 'greeting' && intent !== 'ack';
+    if (elevatedByEntity && requiredRetrieval !== 'transcript') {
+        requiredRetrieval = 'transcript';
+        debugReasons.push('entity:elevated-retrieval');
+    }
     if (requiredRetrieval !== 'none')
         debugReasons.push(`retrieval:${requiredRetrieval}`);
+    if (entityMatches.length > 0) {
+        debugReasons.push(`entities:${entityMatches.map(e => e.name).join(',')}`);
+    }
     const silentContextBlocks = [];
     if (activeContext?.promptBlock
         && intent !== 'greeting'
@@ -112,14 +126,24 @@ export function decideContextPolicy(input) {
             ? activeContext?.greetingLine ?? 'Hey. I am here.'
             : 'Hey. I am here.'
         : null;
+    const baseQueries = buildRetrievalQueries(intent, input.text, activeContext);
+    // Prepend entity display names so the recall search prioritizes them.
+    // Dedup against the base lexical queries so we don't pay twice for the
+    // same term.
+    const entityQueries = entityMatches.map(e => e.display);
+    const merged = [...new Set([...entityQueries, ...baseQueries])]
+        .map(q => q.trim())
+        .filter(Boolean)
+        .slice(0, 6);
     return {
         turnIntent: intent,
         silentContextBlocks,
         visibleOpening,
         proactiveSurface,
         requiredRetrieval,
-        retrievalQueries: buildRetrievalQueries(intent, input.text, activeContext),
+        retrievalQueries: merged,
         debugReasons,
+        triggeredEntities: entityMatches,
     };
 }
 //# sourceMappingURL=context-policy.js.map

package/dist/gateway/entity-registry.d.ts ADDED Viewed

@@ -0,0 +1,47 @@
+/**
+ * Entity registry — detects when a user turn mentions a topic / entity
+ * Clementine already has context on, so recall can fire without waiting
+ * for vague-repair phrases like "what did we decide?".
+ *
+ * The registry is a flattened, mention-frequency-ranked snapshot of:
+ *   - chunks.topic (curated knowledge)
+ *   - episodes.topics + episodes.entities (consolidated session memory)
+ *
+ * Cached per store dbPath with a 5-minute TTL — the registry only changes
+ * when episodes consolidate or new chunks land, both of which are minutes-
+ * scale events. Invalidating less often keeps the chat path fast.
+ */
+import type { MemoryStore } from '../memory/store.js';
+export interface RegistryEntity {
+    name: string;
+    display: string;
+    kind: 'topic' | 'entity';
+    count: number;
+}
+export interface EntityMatch {
+    name: string;
+    display: string;
+    kind: 'topic' | 'entity';
+}
+/** Read the registry from cache, refreshing if stale or missing. Tests can
+ *  call invalidateEntityRegistry() between cases to bypass the cache. */
+export declare function getEntityRegistry(store: MemoryStore, opts?: {
+    now?: number;
+    key?: string;
+}): RegistryEntity[];
+/** Drop cached registry entries — used by tests and by code paths that
+ *  know they just mutated the registry source (e.g. after a fresh episode
+ *  consolidation pass). */
+export declare function invalidateEntityRegistry(key?: string): void;
+/**
+ * Find registry entities mentioned in the input text, with word-boundary
+ * matching so "auth" doesn't match "author". Multi-word entities are
+ * matched as contiguous word sequences. Longer matches are preferred
+ * (more specific), with mention-count as the tiebreaker.
+ *
+ * Returns at most `maxMatches` (default 5) entities, deduplicated.
+ */
+export declare function findEntitiesInText(text: string, registry: RegistryEntity[], opts?: {
+    maxMatches?: number;
+}): EntityMatch[];
+//# sourceMappingURL=entity-registry.d.ts.map

package/dist/gateway/entity-registry.js ADDED Viewed

@@ -0,0 +1,92 @@
+const REGISTRY_TTL_MS = 5 * 60 * 1000;
+const cache = new Map();
+/** Words that trigger pointless matches when allowed (too generic). Augments
+ *  the per-entity length filter — "we" or "do" would never make it past the
+ *  3-char floor anyway, but common-but-bare nouns sometimes do, and they
+ *  cause false positives across unrelated turns. */
+const ENTITY_STOPWORDS = new Set([
+    'the', 'and', 'but', 'for', 'are', 'was', 'has', 'had', 'have', 'this',
+    'that', 'with', 'from', 'they', 'them', 'their', 'these', 'those',
+    'about', 'into', 'over', 'just', 'than', 'then', 'when', 'what', 'where',
+    'while', 'will', 'would', 'could', 'should',
+]);
+/** Read the registry from cache, refreshing if stale or missing. Tests can
+ *  call invalidateEntityRegistry() between cases to bypass the cache. */
+export function getEntityRegistry(store, opts = {}) {
+    const key = opts.key ?? store.dbPath ?? 'default';
+    const now = opts.now ?? Date.now();
+    const cached = cache.get(key);
+    if (cached && now - cached.loadedAt < REGISTRY_TTL_MS) {
+        return cached.entries;
+    }
+    let entries = [];
+    try {
+        if (typeof store.getEntityRegistrySnapshot === 'function') {
+            entries = store.getEntityRegistrySnapshot({ minCount: 1, maxItems: 500 });
+        }
+    }
+    catch { /* registry probe is best-effort */ }
+    cache.set(key, { entries, loadedAt: now });
+    return entries;
+}
+/** Drop cached registry entries — used by tests and by code paths that
+ *  know they just mutated the registry source (e.g. after a fresh episode
+ *  consolidation pass). */
+export function invalidateEntityRegistry(key) {
+    if (key)
+        cache.delete(key);
+    else
+        cache.clear();
+}
+function normalizeForMatch(text) {
+    return text
+        .toLowerCase()
+        .replace(/[^\p{L}\p{N}]+/gu, ' ')
+        .replace(/\s+/g, ' ')
+        .trim();
+}
+/**
+ * Find registry entities mentioned in the input text, with word-boundary
+ * matching so "auth" doesn't match "author". Multi-word entities are
+ * matched as contiguous word sequences. Longer matches are preferred
+ * (more specific), with mention-count as the tiebreaker.
+ *
+ * Returns at most `maxMatches` (default 5) entities, deduplicated.
+ */
+export function findEntitiesInText(text, registry, opts = {}) {
+    const max = Math.max(1, opts.maxMatches ?? 5);
+    if (!text || registry.length === 0)
+        return [];
+    const haystack = ` ${normalizeForMatch(text)} `;
+    if (haystack.trim().length < 3)
+        return [];
+    const candidates = [];
+    for (const entry of registry) {
+        if (entry.name.length < 3)
+            continue;
+        if (entry.name.split(' ').length === 1 && ENTITY_STOPWORDS.has(entry.name))
+            continue;
+        const needle = ` ${entry.name} `;
+        if (haystack.includes(needle)) {
+            candidates.push({ entry, specificity: entry.name.length });
+        }
+    }
+    // Specificity desc, then count desc — multi-word matches win, frequency
+    // breaks ties between equally-specific candidates.
+    candidates.sort((a, b) => b.specificity - a.specificity || b.entry.count - a.entry.count);
+    // Dedup: skip a candidate if a longer already-accepted match fully
+    // contains its name (e.g. don't surface "dashboard" if "dashboard
+    // refactor" already matched).
+    const accepted = [];
+    const acceptedNames = [];
+    for (const { entry } of candidates) {
+        if (acceptedNames.some(n => n.includes(entry.name)))
+            continue;
+        accepted.push({ name: entry.name, display: entry.display, kind: entry.kind });
+        acceptedNames.push(entry.name);
+        if (accepted.length >= max)
+            break;
+    }
+    return accepted;
+}
+//# sourceMappingURL=entity-registry.js.map

package/dist/gateway/router.js CHANGED Viewed

@@ -29,6 +29,7 @@ import { isInternalSyntheticPrompt, resolveRecentOperationalContext } from './re
 import { decideContextPolicy } from './context-policy.js';
 import { persistConversationLearning } from './conversation-learning.js';
 import { detectCommitmentInTurn, recordDetectedCommitment } from './commitments.js';
+import { findEntitiesInText, getEntityRegistry } from './entity-registry.js';
 import { getBackgroundCreditBlock, isCreditBalanceError, markBackgroundCreditBlocked } from './credit-guard.js';
 import { appendTurnLedger, estimateTokensApprox, formatLastTurnLedger, readRecentTurnLedger } from './turn-ledger.js';
 import { assessGatewayContextHygiene, formatGatewayHygieneAnnotation } from './context-hygiene.js';
@@ -1548,7 +1549,23 @@ export class Gateway {
         const activeContext = this.isTrustedPersonalSession(sessionKey)
             ? buildActiveContextSnapshot(sessionKey, { baseDir: BASE_DIR, transcriptCoverage, openCommitments })
             : null;
-        const contextDecision = decideContextPolicy({ text, activeContext });
+        // Entity recall: if the user mentions something we already have context
+        // on (a chunk topic or an episode entity), elevate retrieval so the
+        // model gets the relevant history without waiting for a repair phrase.
+        let entityMatches = [];
+        if (this.isTrustedPersonalSession(sessionKey)) {
+            try {
+                const store = this.assistant.getMemoryStore?.();
+                if (store) {
+                    const registry = getEntityRegistry(store);
+                    if (registry.length > 0) {
+                        entityMatches = findEntitiesInText(text, registry);
+                    }
+                }
+            }
+            catch { /* entity registry probe is best-effort */ }
+        }
+        const contextDecision = decideContextPolicy({ text, activeContext, entityMatches });
         if (this.isTrustedPersonalSession(sessionKey)) {
             const learning = persistConversationLearning(sessionKey, text, this.assistant.getMemoryStore?.());
             if (learning?.corrections.length || learning?.preferences.length) {

package/dist/memory/store.d.ts CHANGED Viewed

@@ -703,6 +703,25 @@ export declare class MemoryStore {
         chunkId: number | null;
         createdAt: string;
     }>;
+    /**
+     * Pull a flattened, deduplicated snapshot of named topics + entities the
+     * agent already knows about, ranked by mention frequency. Sources:
+     *   - chunks.topic         (curated knowledge — the strongest signal)
+     *   - episodes.topics      (LLM-extracted topic phrases per session)
+     *   - episodes.entities    (LLM-extracted named things)
+     *
+     * Used by the entity-registry module to detect when a user turn mentions
+     * something we have prior context on, so recall can fire proactively.
+     */
+    getEntityRegistrySnapshot(opts?: {
+        minCount?: number;
+        maxItems?: number;
+    }): Array<{
+        name: string;
+        display: string;
+        kind: 'topic' | 'entity';
+        count: number;
+    }>;
     /**
      * Insert a commitment, deduping on the fingerprint. If a row with the
      * same fingerprint already exists, the existing id is returned and no

package/dist/memory/store.js CHANGED Viewed

@@ -3182,6 +3182,93 @@ export class MemoryStore {
             createdAt: row.created_at,
         }));
     }
+    // ── Entity registry ───────────────────────────────────────────────
+    /**
+     * Pull a flattened, deduplicated snapshot of named topics + entities the
+     * agent already knows about, ranked by mention frequency. Sources:
+     *   - chunks.topic         (curated knowledge — the strongest signal)
+     *   - episodes.topics      (LLM-extracted topic phrases per session)
+     *   - episodes.entities    (LLM-extracted named things)
+     *
+     * Used by the entity-registry module to detect when a user turn mentions
+     * something we have prior context on, so recall can fire proactively.
+     */
+    getEntityRegistrySnapshot(opts = {}) {
+        const minCount = Math.max(1, opts.minCount ?? 1);
+        const maxItems = Math.max(1, Math.min(opts.maxItems ?? 500, 5000));
+        const counts = new Map();
+        const accept = (raw, kind) => {
+            if (!raw)
+                return;
+            const display = raw.trim();
+            if (display.length < 3 || display.length > 80)
+                return;
+            const name = display.toLowerCase();
+            const existing = counts.get(name);
+            if (existing) {
+                existing.count++;
+                // Topics from chunks outrank LLM-derived ones for kind classification.
+                if (kind === 'topic')
+                    existing.kind = 'topic';
+            }
+            else {
+                counts.set(name, { display, kind, count: 1 });
+            }
+        };
+        try {
+            const topicRows = this.conn
+                .prepare(`SELECT topic, COUNT(*) as cnt FROM chunks
+           WHERE topic IS NOT NULL AND length(trim(topic)) > 0
+           GROUP BY topic`)
+                .all();
+            for (const r of topicRows) {
+                const existing = counts.get(r.topic.trim().toLowerCase());
+                if (existing)
+                    existing.count += r.cnt - 1; // already added 1 above
+                accept(r.topic, 'topic');
+                if (existing) {
+                    // Increment with the SQL-derived count (offset by the 1 accept added).
+                    const e = counts.get(r.topic.trim().toLowerCase());
+                    if (e)
+                        e.count = Math.max(e.count, r.cnt);
+                }
+            }
+        }
+        catch { /* chunks.topic column missing or query fails */ }
+        try {
+            const epRows = this.conn
+                .prepare(`SELECT topics, entities FROM episodes`)
+                .all();
+            for (const row of epRows) {
+                if (row.topics) {
+                    try {
+                        const arr = JSON.parse(row.topics);
+                        if (Array.isArray(arr))
+                            for (const t of arr)
+                                if (typeof t === 'string')
+                                    accept(t, 'topic');
+                    }
+                    catch { /* skip malformed JSON */ }
+                }
+                if (row.entities) {
+                    try {
+                        const arr = JSON.parse(row.entities);
+                        if (Array.isArray(arr))
+                            for (const e of arr)
+                                if (typeof e === 'string')
+                                    accept(e, 'entity');
+                    }
+                    catch { /* skip malformed JSON */ }
+                }
+            }
+        }
+        catch { /* episodes table missing */ }
+        const all = [...counts.entries()]
+            .map(([name, v]) => ({ name, display: v.display, kind: v.kind, count: v.count }))
+            .filter(e => e.count >= minCount);
+        all.sort((a, b) => b.count - a.count || a.name.length - b.name.length);
+        return all.slice(0, maxItems);
+    }
     // ── Commitments ───────────────────────────────────────────────────
     /**
      * Insert a commitment, deduping on the fingerprint. If a row with the

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "clementine-agent",
-  "version": "1.18.29",
+  "version": "1.18.30",
   "description": "Clementine — Personal AI Assistant (TypeScript)",
   "type": "module",
   "main": "dist/index.js",