clementine-agent 1.18.29 → 1.18.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,6 +7,7 @@
7
7
  * replies into status dumps.
8
8
  */
9
9
  import type { ActiveContextItem, ActiveContextSnapshot } from './active-context.js';
10
+ import type { EntityMatch } from './entity-registry.js';
10
11
  export type ContextTurnIntent = 'greeting' | 'ack' | 'status' | 'repair_request' | 'followup' | 'memory_correction' | 'work_request' | 'general_chat';
11
12
  export type RequiredRetrieval = 'none' | 'event' | 'transcript';
12
13
  export interface ContextPolicyDecision {
@@ -17,10 +18,19 @@ export interface ContextPolicyDecision {
17
18
  requiredRetrieval: RequiredRetrieval;
18
19
  retrievalQueries: string[];
19
20
  debugReasons: string[];
21
+ triggeredEntities: EntityMatch[];
20
22
  }
21
23
  export interface ContextPolicyInput {
22
24
  text: string;
23
25
  activeContext?: ActiveContextSnapshot | null;
26
+ /**
27
+ * Pre-computed entity matches against the registry. Caller looks these
28
+ * up via entity-registry.findEntitiesInText so the policy module stays
29
+ * free of the store dependency. When matches arrive, the policy elevates
30
+ * recall to 'transcript' on non-trivial intents and seeds entity names
31
+ * into the retrieval queries.
32
+ */
33
+ entityMatches?: EntityMatch[];
24
34
  }
25
35
  export declare function looksLikeVagueContextReference(text: string): boolean;
26
36
  export declare function classifyContextTurn(text: string): ContextTurnIntent;
@@ -88,6 +88,7 @@ function buildRetrievalQueries(intent, text, activeContext) {
88
88
  export function decideContextPolicy(input) {
89
89
  const intent = classifyContextTurn(input.text);
90
90
  const activeContext = input.activeContext ?? null;
91
+ const entityMatches = input.entityMatches ?? [];
91
92
  const debugReasons = [`intent:${intent}`];
92
93
  const proactiveSurface = (activeContext?.items ?? [])
93
94
  .filter((item) => item.greetingEligible && !item.alreadySurfaced && !item.resolved)
@@ -98,8 +99,21 @@ export function decideContextPolicy(input) {
98
99
  if (intent === 'repair_request' || intent === 'followup' || intent === 'memory_correction') {
99
100
  requiredRetrieval = 'transcript';
100
101
  }
102
+ // Entity-driven proactive recall: a known topic in the user's turn is a
103
+ // strong enough signal to pre-fetch related history without waiting for
104
+ // a vague-repair phrase. Skip on greeting/ack so a passing entity mention
105
+ // ("hey, how's the dashboard?") doesn't pull a wall of context into a
106
+ // friendly hello.
107
+ const elevatedByEntity = entityMatches.length > 0 && intent !== 'greeting' && intent !== 'ack';
108
+ if (elevatedByEntity && requiredRetrieval !== 'transcript') {
109
+ requiredRetrieval = 'transcript';
110
+ debugReasons.push('entity:elevated-retrieval');
111
+ }
101
112
  if (requiredRetrieval !== 'none')
102
113
  debugReasons.push(`retrieval:${requiredRetrieval}`);
114
+ if (entityMatches.length > 0) {
115
+ debugReasons.push(`entities:${entityMatches.map(e => e.name).join(',')}`);
116
+ }
103
117
  const silentContextBlocks = [];
104
118
  if (activeContext?.promptBlock
105
119
  && intent !== 'greeting'
@@ -112,14 +126,24 @@ export function decideContextPolicy(input) {
112
126
  ? activeContext?.greetingLine ?? 'Hey. I am here.'
113
127
  : 'Hey. I am here.'
114
128
  : null;
129
+ const baseQueries = buildRetrievalQueries(intent, input.text, activeContext);
130
+ // Prepend entity display names so the recall search prioritizes them.
131
+ // Dedup against the base lexical queries so we don't pay twice for the
132
+ // same term.
133
+ const entityQueries = entityMatches.map(e => e.display);
134
+ const merged = [...new Set([...entityQueries, ...baseQueries])]
135
+ .map(q => q.trim())
136
+ .filter(Boolean)
137
+ .slice(0, 6);
115
138
  return {
116
139
  turnIntent: intent,
117
140
  silentContextBlocks,
118
141
  visibleOpening,
119
142
  proactiveSurface,
120
143
  requiredRetrieval,
121
- retrievalQueries: buildRetrievalQueries(intent, input.text, activeContext),
144
+ retrievalQueries: merged,
122
145
  debugReasons,
146
+ triggeredEntities: entityMatches,
123
147
  };
124
148
  }
125
149
  //# sourceMappingURL=context-policy.js.map
@@ -0,0 +1,47 @@
1
+ /**
2
+ * Entity registry — detects when a user turn mentions a topic / entity
3
+ * Clementine already has context on, so recall can fire without waiting
4
+ * for vague-repair phrases like "what did we decide?".
5
+ *
6
+ * The registry is a flattened, mention-frequency-ranked snapshot of:
7
+ * - chunks.topic (curated knowledge)
8
+ * - episodes.topics + episodes.entities (consolidated session memory)
9
+ *
10
+ * Cached per store dbPath with a 5-minute TTL — the registry only changes
11
+ * when episodes consolidate or new chunks land, both of which are minutes-
12
+ * scale events. Invalidating less often keeps the chat path fast.
13
+ */
14
+ import type { MemoryStore } from '../memory/store.js';
15
+ export interface RegistryEntity {
16
+ name: string;
17
+ display: string;
18
+ kind: 'topic' | 'entity';
19
+ count: number;
20
+ }
21
+ export interface EntityMatch {
22
+ name: string;
23
+ display: string;
24
+ kind: 'topic' | 'entity';
25
+ }
26
+ /** Read the registry from cache, refreshing if stale or missing. Tests can
27
+ * call invalidateEntityRegistry() between cases to bypass the cache. */
28
+ export declare function getEntityRegistry(store: MemoryStore, opts?: {
29
+ now?: number;
30
+ key?: string;
31
+ }): RegistryEntity[];
32
+ /** Drop cached registry entries — used by tests and by code paths that
33
+ * know they just mutated the registry source (e.g. after a fresh episode
34
+ * consolidation pass). */
35
+ export declare function invalidateEntityRegistry(key?: string): void;
36
+ /**
37
+ * Find registry entities mentioned in the input text, with word-boundary
38
+ * matching so "auth" doesn't match "author". Multi-word entities are
39
+ * matched as contiguous word sequences. Longer matches are preferred
40
+ * (more specific), with mention-count as the tiebreaker.
41
+ *
42
+ * Returns at most `maxMatches` (default 5) entities, deduplicated.
43
+ */
44
+ export declare function findEntitiesInText(text: string, registry: RegistryEntity[], opts?: {
45
+ maxMatches?: number;
46
+ }): EntityMatch[];
47
+ //# sourceMappingURL=entity-registry.d.ts.map
@@ -0,0 +1,92 @@
1
+ const REGISTRY_TTL_MS = 5 * 60 * 1000;
2
+ const cache = new Map();
3
+ /** Words that trigger pointless matches when allowed (too generic). Augments
4
+ * the per-entity length filter — "we" or "do" would never make it past the
5
+ * 3-char floor anyway, but common-but-bare nouns sometimes do, and they
6
+ * cause false positives across unrelated turns. */
7
+ const ENTITY_STOPWORDS = new Set([
8
+ 'the', 'and', 'but', 'for', 'are', 'was', 'has', 'had', 'have', 'this',
9
+ 'that', 'with', 'from', 'they', 'them', 'their', 'these', 'those',
10
+ 'about', 'into', 'over', 'just', 'than', 'then', 'when', 'what', 'where',
11
+ 'while', 'will', 'would', 'could', 'should',
12
+ ]);
13
+ /** Read the registry from cache, refreshing if stale or missing. Tests can
14
+ * call invalidateEntityRegistry() between cases to bypass the cache. */
15
+ export function getEntityRegistry(store, opts = {}) {
16
+ const key = opts.key ?? store.dbPath ?? 'default';
17
+ const now = opts.now ?? Date.now();
18
+ const cached = cache.get(key);
19
+ if (cached && now - cached.loadedAt < REGISTRY_TTL_MS) {
20
+ return cached.entries;
21
+ }
22
+ let entries = [];
23
+ try {
24
+ if (typeof store.getEntityRegistrySnapshot === 'function') {
25
+ entries = store.getEntityRegistrySnapshot({ minCount: 1, maxItems: 500 });
26
+ }
27
+ }
28
+ catch { /* registry probe is best-effort */ }
29
+ cache.set(key, { entries, loadedAt: now });
30
+ return entries;
31
+ }
32
+ /** Drop cached registry entries — used by tests and by code paths that
33
+ * know they just mutated the registry source (e.g. after a fresh episode
34
+ * consolidation pass). */
35
+ export function invalidateEntityRegistry(key) {
36
+ if (key)
37
+ cache.delete(key);
38
+ else
39
+ cache.clear();
40
+ }
41
+ function normalizeForMatch(text) {
42
+ return text
43
+ .toLowerCase()
44
+ .replace(/[^\p{L}\p{N}]+/gu, ' ')
45
+ .replace(/\s+/g, ' ')
46
+ .trim();
47
+ }
48
+ /**
49
+ * Find registry entities mentioned in the input text, with word-boundary
50
+ * matching so "auth" doesn't match "author". Multi-word entities are
51
+ * matched as contiguous word sequences. Longer matches are preferred
52
+ * (more specific), with mention-count as the tiebreaker.
53
+ *
54
+ * Returns at most `maxMatches` (default 5) entities, deduplicated.
55
+ */
56
+ export function findEntitiesInText(text, registry, opts = {}) {
57
+ const max = Math.max(1, opts.maxMatches ?? 5);
58
+ if (!text || registry.length === 0)
59
+ return [];
60
+ const haystack = ` ${normalizeForMatch(text)} `;
61
+ if (haystack.trim().length < 3)
62
+ return [];
63
+ const candidates = [];
64
+ for (const entry of registry) {
65
+ if (entry.name.length < 3)
66
+ continue;
67
+ if (entry.name.split(' ').length === 1 && ENTITY_STOPWORDS.has(entry.name))
68
+ continue;
69
+ const needle = ` ${entry.name} `;
70
+ if (haystack.includes(needle)) {
71
+ candidates.push({ entry, specificity: entry.name.length });
72
+ }
73
+ }
74
+ // Specificity desc, then count desc — multi-word matches win, frequency
75
+ // breaks ties between equally-specific candidates.
76
+ candidates.sort((a, b) => b.specificity - a.specificity || b.entry.count - a.entry.count);
77
+ // Dedup: skip a candidate if a longer already-accepted match fully
78
+ // contains its name (e.g. don't surface "dashboard" if "dashboard
79
+ // refactor" already matched).
80
+ const accepted = [];
81
+ const acceptedNames = [];
82
+ for (const { entry } of candidates) {
83
+ if (acceptedNames.some(n => n.includes(entry.name)))
84
+ continue;
85
+ accepted.push({ name: entry.name, display: entry.display, kind: entry.kind });
86
+ acceptedNames.push(entry.name);
87
+ if (accepted.length >= max)
88
+ break;
89
+ }
90
+ return accepted;
91
+ }
92
+ //# sourceMappingURL=entity-registry.js.map
@@ -29,6 +29,7 @@ import { isInternalSyntheticPrompt, resolveRecentOperationalContext } from './re
29
29
  import { decideContextPolicy } from './context-policy.js';
30
30
  import { persistConversationLearning } from './conversation-learning.js';
31
31
  import { detectCommitmentInTurn, recordDetectedCommitment } from './commitments.js';
32
+ import { findEntitiesInText, getEntityRegistry } from './entity-registry.js';
32
33
  import { getBackgroundCreditBlock, isCreditBalanceError, markBackgroundCreditBlocked } from './credit-guard.js';
33
34
  import { appendTurnLedger, estimateTokensApprox, formatLastTurnLedger, readRecentTurnLedger } from './turn-ledger.js';
34
35
  import { assessGatewayContextHygiene, formatGatewayHygieneAnnotation } from './context-hygiene.js';
@@ -1548,7 +1549,23 @@ export class Gateway {
1548
1549
  const activeContext = this.isTrustedPersonalSession(sessionKey)
1549
1550
  ? buildActiveContextSnapshot(sessionKey, { baseDir: BASE_DIR, transcriptCoverage, openCommitments })
1550
1551
  : null;
1551
- const contextDecision = decideContextPolicy({ text, activeContext });
1552
+ // Entity recall: if the user mentions something we already have context
1553
+ // on (a chunk topic or an episode entity), elevate retrieval so the
1554
+ // model gets the relevant history without waiting for a repair phrase.
1555
+ let entityMatches = [];
1556
+ if (this.isTrustedPersonalSession(sessionKey)) {
1557
+ try {
1558
+ const store = this.assistant.getMemoryStore?.();
1559
+ if (store) {
1560
+ const registry = getEntityRegistry(store);
1561
+ if (registry.length > 0) {
1562
+ entityMatches = findEntitiesInText(text, registry);
1563
+ }
1564
+ }
1565
+ }
1566
+ catch { /* entity registry probe is best-effort */ }
1567
+ }
1568
+ const contextDecision = decideContextPolicy({ text, activeContext, entityMatches });
1552
1569
  if (this.isTrustedPersonalSession(sessionKey)) {
1553
1570
  const learning = persistConversationLearning(sessionKey, text, this.assistant.getMemoryStore?.());
1554
1571
  if (learning?.corrections.length || learning?.preferences.length) {
@@ -703,6 +703,25 @@ export declare class MemoryStore {
703
703
  chunkId: number | null;
704
704
  createdAt: string;
705
705
  }>;
706
+ /**
707
+ * Pull a flattened, deduplicated snapshot of named topics + entities the
708
+ * agent already knows about, ranked by mention frequency. Sources:
709
+ * - chunks.topic (curated knowledge — the strongest signal)
710
+ * - episodes.topics (LLM-extracted topic phrases per session)
711
+ * - episodes.entities (LLM-extracted named things)
712
+ *
713
+ * Used by the entity-registry module to detect when a user turn mentions
714
+ * something we have prior context on, so recall can fire proactively.
715
+ */
716
+ getEntityRegistrySnapshot(opts?: {
717
+ minCount?: number;
718
+ maxItems?: number;
719
+ }): Array<{
720
+ name: string;
721
+ display: string;
722
+ kind: 'topic' | 'entity';
723
+ count: number;
724
+ }>;
706
725
  /**
707
726
  * Insert a commitment, deduping on the fingerprint. If a row with the
708
727
  * same fingerprint already exists, the existing id is returned and no
@@ -3182,6 +3182,93 @@ export class MemoryStore {
3182
3182
  createdAt: row.created_at,
3183
3183
  }));
3184
3184
  }
3185
+ // ── Entity registry ───────────────────────────────────────────────
3186
+ /**
3187
+ * Pull a flattened, deduplicated snapshot of named topics + entities the
3188
+ * agent already knows about, ranked by mention frequency. Sources:
3189
+ * - chunks.topic (curated knowledge — the strongest signal)
3190
+ * - episodes.topics (LLM-extracted topic phrases per session)
3191
+ * - episodes.entities (LLM-extracted named things)
3192
+ *
3193
+ * Used by the entity-registry module to detect when a user turn mentions
3194
+ * something we have prior context on, so recall can fire proactively.
3195
+ */
3196
+ getEntityRegistrySnapshot(opts = {}) {
3197
+ const minCount = Math.max(1, opts.minCount ?? 1);
3198
+ const maxItems = Math.max(1, Math.min(opts.maxItems ?? 500, 5000));
3199
+ const counts = new Map();
3200
+ const accept = (raw, kind) => {
3201
+ if (!raw)
3202
+ return;
3203
+ const display = raw.trim();
3204
+ if (display.length < 3 || display.length > 80)
3205
+ return;
3206
+ const name = display.toLowerCase();
3207
+ const existing = counts.get(name);
3208
+ if (existing) {
3209
+ existing.count++;
3210
+ // Topics from chunks outrank LLM-derived ones for kind classification.
3211
+ if (kind === 'topic')
3212
+ existing.kind = 'topic';
3213
+ }
3214
+ else {
3215
+ counts.set(name, { display, kind, count: 1 });
3216
+ }
3217
+ };
3218
+ try {
3219
+ const topicRows = this.conn
3220
+ .prepare(`SELECT topic, COUNT(*) as cnt FROM chunks
3221
+ WHERE topic IS NOT NULL AND length(trim(topic)) > 0
3222
+ GROUP BY topic`)
3223
+ .all();
3224
+ for (const r of topicRows) {
3225
+ const existing = counts.get(r.topic.trim().toLowerCase());
3226
+ if (existing)
3227
+ existing.count += r.cnt - 1; // already added 1 above
3228
+ accept(r.topic, 'topic');
3229
+ if (existing) {
3230
+ // Increment with the SQL-derived count (offset by the 1 accept added).
3231
+ const e = counts.get(r.topic.trim().toLowerCase());
3232
+ if (e)
3233
+ e.count = Math.max(e.count, r.cnt);
3234
+ }
3235
+ }
3236
+ }
3237
+ catch { /* chunks.topic column missing or query fails */ }
3238
+ try {
3239
+ const epRows = this.conn
3240
+ .prepare(`SELECT topics, entities FROM episodes`)
3241
+ .all();
3242
+ for (const row of epRows) {
3243
+ if (row.topics) {
3244
+ try {
3245
+ const arr = JSON.parse(row.topics);
3246
+ if (Array.isArray(arr))
3247
+ for (const t of arr)
3248
+ if (typeof t === 'string')
3249
+ accept(t, 'topic');
3250
+ }
3251
+ catch { /* skip malformed JSON */ }
3252
+ }
3253
+ if (row.entities) {
3254
+ try {
3255
+ const arr = JSON.parse(row.entities);
3256
+ if (Array.isArray(arr))
3257
+ for (const e of arr)
3258
+ if (typeof e === 'string')
3259
+ accept(e, 'entity');
3260
+ }
3261
+ catch { /* skip malformed JSON */ }
3262
+ }
3263
+ }
3264
+ }
3265
+ catch { /* episodes table missing */ }
3266
+ const all = [...counts.entries()]
3267
+ .map(([name, v]) => ({ name, display: v.display, kind: v.kind, count: v.count }))
3268
+ .filter(e => e.count >= minCount);
3269
+ all.sort((a, b) => b.count - a.count || a.name.length - b.name.length);
3270
+ return all.slice(0, maxItems);
3271
+ }
3185
3272
  // ── Commitments ───────────────────────────────────────────────────
3186
3273
  /**
3187
3274
  * Insert a commitment, deduping on the fingerprint. If a row with the
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "clementine-agent",
3
- "version": "1.18.29",
3
+ "version": "1.18.30",
4
4
  "description": "Clementine — Personal AI Assistant (TypeScript)",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",