clementine-agent 1.2.1 → 1.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/config.d.ts CHANGED
@@ -59,6 +59,15 @@ export declare const BUDGET: {
59
59
  summarization: undefined;
60
60
  reflection: undefined;
61
61
  };
62
+ export declare const MEMORY_JANITOR: {
63
+ consolidatedExpireDays: number;
64
+ consolidatedSalienceFloor: number;
65
+ softDeleteGraceDays: number;
66
+ auxRetentionDays: number;
67
+ extractionsMaxRows: number;
68
+ vacuumIntervalDays: number;
69
+ vacuumIdleSeconds: number;
70
+ };
62
71
  export declare const TASK_BUDGET_TOKENS: {
63
72
  heartbeat: number | undefined;
64
73
  cronT1: number | undefined;
@@ -159,6 +168,8 @@ export declare const VAULT_MIGRATIONS_STATE: string;
159
168
  export declare const PLANS_DIR: string;
160
169
  export declare const ADVISOR_LOG_PATH: string;
161
170
  export declare const REMOTE_ACCESS_CONFIG: string;
171
+ /** Persistent session store for the dashboard /auth flow (mode 0600 enforced on write). */
172
+ export declare const SESSIONS_FILE: string;
162
173
  export declare const STAGING_DIR: string;
163
174
  export declare const ALLOW_SOURCE_EDITS: boolean;
164
175
  export declare const ADVISOR_RULES_LOADER: 'off' | 'shadow' | 'primary';
package/dist/config.js CHANGED
@@ -195,6 +195,20 @@ export const BUDGET = {
195
195
  summarization: undefined,
196
196
  reflection: undefined,
197
197
  };
198
+ // ── Memory janitor (bounded-growth maintenance) ─────────────────────
199
+ // Two-phase delete: consolidated chunks with low salience and no recent
200
+ // access get soft-deleted, then physically deleted after a grace period.
201
+ // Aux tables (recall_traces, access_log, outcomes) cap at a rolling window.
202
+ // VACUUM runs at most once per N days, only when daemon is idle.
203
+ export const MEMORY_JANITOR = {
204
+ consolidatedExpireDays: getEnvOrJsonNumber('MEMORY_CONSOLIDATED_EXPIRE_DAYS', undefined, 60),
205
+ consolidatedSalienceFloor: getEnvOrJsonNumber('MEMORY_CONSOLIDATED_SALIENCE_FLOOR', undefined, 0.2),
206
+ softDeleteGraceDays: getEnvOrJsonNumber('MEMORY_SOFT_DELETE_GRACE_DAYS', undefined, 14),
207
+ auxRetentionDays: getEnvOrJsonNumber('MEMORY_AUX_RETENTION_DAYS', undefined, 30),
208
+ extractionsMaxRows: getEnvOrJsonNumber('MEMORY_EXTRACTIONS_MAX_ROWS', undefined, 50000),
209
+ vacuumIntervalDays: getEnvOrJsonNumber('MEMORY_VACUUM_INTERVAL_DAYS', undefined, 7),
210
+ vacuumIdleSeconds: getEnvOrJsonNumber('MEMORY_VACUUM_IDLE_SECONDS', undefined, 300),
211
+ };
198
212
  // ── Task budget caps (tokens per query) ──────────────────────────────
199
213
  // Passed to the Claude Agent SDK as `taskBudget: { total }`. The model is
200
214
  // told its remaining token budget so it can pace tool use and wrap up
@@ -383,6 +397,8 @@ export const PLANS_DIR = path.join(BASE_DIR, 'plans');
383
397
  export const ADVISOR_LOG_PATH = path.join(BASE_DIR, 'cron', 'advisor-decisions.jsonl');
384
398
  // ── Remote Access ──────────────────────────────────────────────────
385
399
  export const REMOTE_ACCESS_CONFIG = path.join(BASE_DIR, 'remote-access.json');
400
+ /** Persistent session store for the dashboard /auth flow (mode 0600 enforced on write). */
401
+ export const SESSIONS_FILE = path.join(BASE_DIR, '.sessions.json');
386
402
  // ── Source Self-Edit Staging ─────────────────────────────────────────
387
403
  export const STAGING_DIR = path.join(BASE_DIR, 'staging');
388
404
  // Source self-editing is deprecated. The data-driven path (advisor rules,
package/dist/index.js CHANGED
@@ -608,6 +608,15 @@ async function asyncMain() {
608
608
  {
609
609
  const memStore = assistant.getMemoryStore();
610
610
  if (memStore) {
611
+ // Async write queue: route transcript saves, recall traces, outcomes,
612
+ // and access-log inserts off the request thread. ~250ms flush window;
613
+ // drained on shutdown below. Idempotent — safe if called twice.
614
+ try {
615
+ memStore.enableWriteQueue();
616
+ }
617
+ catch (err) {
618
+ logger.warn({ err }, 'Failed to enable memory write queue — falling back to sync writes');
619
+ }
611
620
  const { runStartupMaintenance, startPeriodicMaintenance } = await import('./memory/maintenance.js');
612
621
  // Fire-and-forget startup maintenance
613
622
  runStartupMaintenance(memStore).catch(() => { });
@@ -999,6 +1008,17 @@ async function asyncMain() {
999
1008
  catch (err) {
1000
1009
  logger.warn({ err }, 'Session flush on shutdown failed');
1001
1010
  }
1011
+ // Drain the memory write queue so transcripts/recall traces/outcomes/access
1012
+ // logs that were enqueued in the last <250ms make it to SQLite.
1013
+ try {
1014
+ const memStore = assistant.getMemoryStore();
1015
+ if (memStore && typeof memStore.flushWrites === 'function') {
1016
+ await memStore.flushWrites();
1017
+ }
1018
+ }
1019
+ catch (err) {
1020
+ logger.warn({ err }, 'Memory write queue drain failed');
1021
+ }
1002
1022
  // Now safe to tear down remaining infrastructure
1003
1023
  heartbeat.stop();
1004
1024
  cronScheduler.stop();
@@ -22,6 +22,13 @@ const DIR_CATEGORY_MAP = {
22
22
  '05-Tasks': 'advice',
23
23
  '07-Inbox': 'events',
24
24
  };
25
+ /**
26
+ * Procedural memory: learned workflows live in 00-System/procedures/.
27
+ * Frontmatter `triggers: [verb-phrases]` is parsed separately by the store
28
+ * and used at retrieval time to boost the chunk when a query mentions one
29
+ * of the trigger verbs. Pattern adopted from Mem0's v1.0.0 procedural tier.
30
+ */
31
+ const PROCEDURE_DIR = '00-System/procedures';
25
32
  /** Content keyword patterns for category detection (used as fallback). */
26
33
  const CATEGORY_KEYWORDS = [
27
34
  [/\b(prefer|always use|never use|i like|i don'?t like|i hate)\b/i, 'preferences'],
@@ -37,11 +44,15 @@ function detectCategoryAndTopic(relPath, frontmatter, content) {
37
44
  // 1. Explicit frontmatter category
38
45
  if (frontmatter.category) {
39
46
  const fm = String(frontmatter.category).toLowerCase();
40
- if (['facts', 'events', 'discoveries', 'preferences', 'advice'].includes(fm)) {
47
+ if (['facts', 'events', 'discoveries', 'preferences', 'advice', 'procedure'].includes(fm)) {
41
48
  category = fm;
42
49
  }
43
50
  }
44
- // 2. Directory-based
51
+ // 2. Procedure directory (overrides directory map below).
52
+ if (!category && relPath.startsWith(PROCEDURE_DIR)) {
53
+ category = 'procedure';
54
+ }
55
+ // 3. Directory-based
45
56
  if (!category) {
46
57
  const topDir = relPath.split('/')[0];
47
58
  category = DIR_CATEGORY_MAP[topDir] ?? null;
@@ -0,0 +1,38 @@
1
+ /**
2
+ * Tiny in-process LRU for hot chunk-row reads.
3
+ *
4
+ * Use case: searchContext + recall-trace expansion + dashboard chunk view all
5
+ * funnel through getChunksByIds, which often touches the same hot rows
6
+ * many times within a session. SQLite reads are already fast (microseconds),
7
+ * but the LRU eliminates the per-query overhead and lets us amortize the
8
+ * row-shape unpacking that getChunksByIds does.
9
+ *
10
+ * Bounded: capacity ~1000 by default (~1MB at 1KB/chunk). Map preserves
11
+ * insertion order, so we delete-then-set on access to keep most-recent at
12
+ * the tail and evict from the head.
13
+ *
14
+ * Concurrency: single-process daemon, single thread — no locking needed.
15
+ */
16
+ export declare class HotCache<K, V> {
17
+ private map;
18
+ private capacity;
19
+ private hits;
20
+ private misses;
21
+ private evictions;
22
+ constructor(capacity?: number);
23
+ get(key: K): V | undefined;
24
+ set(key: K, value: V): void;
25
+ delete(key: K): boolean;
26
+ /** Drop all entries — call when bulk-rebuilding the underlying store. */
27
+ clear(): void;
28
+ size(): number;
29
+ stats(): {
30
+ hits: number;
31
+ misses: number;
32
+ evictions: number;
33
+ size: number;
34
+ capacity: number;
35
+ hitRate: number;
36
+ };
37
+ }
38
+ //# sourceMappingURL=hot-cache.d.ts.map
@@ -0,0 +1,73 @@
1
+ /**
2
+ * Tiny in-process LRU for hot chunk-row reads.
3
+ *
4
+ * Use case: searchContext + recall-trace expansion + dashboard chunk view all
5
+ * funnel through getChunksByIds, which often touches the same hot rows
6
+ * many times within a session. SQLite reads are already fast (microseconds),
7
+ * but the LRU eliminates the per-query overhead and lets us amortize the
8
+ * row-shape unpacking that getChunksByIds does.
9
+ *
10
+ * Bounded: capacity ~1000 by default (~1MB at 1KB/chunk). Map preserves
11
+ * insertion order, so we delete-then-set on access to keep most-recent at
12
+ * the tail and evict from the head.
13
+ *
14
+ * Concurrency: single-process daemon, single thread — no locking needed.
15
+ */
16
+ export class HotCache {
17
+ map = new Map();
18
+ capacity;
19
+ hits = 0;
20
+ misses = 0;
21
+ evictions = 0;
22
+ constructor(capacity = 1000) {
23
+ this.capacity = capacity;
24
+ }
25
+ get(key) {
26
+ const v = this.map.get(key);
27
+ if (v === undefined) {
28
+ this.misses++;
29
+ return undefined;
30
+ }
31
+ // Bump to most-recent.
32
+ this.map.delete(key);
33
+ this.map.set(key, v);
34
+ this.hits++;
35
+ return v;
36
+ }
37
+ set(key, value) {
38
+ if (this.map.has(key)) {
39
+ this.map.delete(key);
40
+ }
41
+ else if (this.map.size >= this.capacity) {
42
+ // Evict oldest (first inserted).
43
+ const oldestKey = this.map.keys().next().value;
44
+ if (oldestKey !== undefined) {
45
+ this.map.delete(oldestKey);
46
+ this.evictions++;
47
+ }
48
+ }
49
+ this.map.set(key, value);
50
+ }
51
+ delete(key) {
52
+ return this.map.delete(key);
53
+ }
54
+ /** Drop all entries — call when bulk-rebuilding the underlying store. */
55
+ clear() {
56
+ this.map.clear();
57
+ }
58
+ size() {
59
+ return this.map.size;
60
+ }
61
+ stats() {
62
+ const total = this.hits + this.misses;
63
+ return {
64
+ hits: this.hits,
65
+ misses: this.misses,
66
+ evictions: this.evictions,
67
+ size: this.map.size,
68
+ capacity: this.capacity,
69
+ hitRate: total > 0 ? this.hits / total : 0,
70
+ };
71
+ }
72
+ }
73
+ //# sourceMappingURL=hot-cache.js.map
@@ -0,0 +1,28 @@
1
+ /**
2
+ * Memory store integrity probes — self-healing checks that run on the
3
+ * janitor's periodic cycle. Each probe is independent and conservative:
4
+ * - reports what it found,
5
+ * - repairs only when the fix is non-destructive,
6
+ * - never throws (logs and continues).
7
+ *
8
+ * Three checks today (the cheap, high-value ones):
9
+ * 1. FTS5 contentless-table integrity → auto-rebuild on failure
10
+ * 2. derived_from references to deleted chunks → nullify the dangling refs
11
+ * 3. chunks with content but no embedding → return count for backfill
12
+ *
13
+ * Graph reachability is intentionally NOT probed here — it lives in
14
+ * graph-store.ts's own health probe, which auto-restarts FalkorDB.
15
+ */
16
+ export interface IntegrityReport {
17
+ ftsOk: boolean;
18
+ ftsRebuilt: boolean;
19
+ orphanRefsNulled: number;
20
+ missingEmbeddings: number;
21
+ }
22
+ /**
23
+ * Run all probes and apply safe repairs. Returns a report; never throws.
24
+ * The store argument is typed loose so this module can be called from
25
+ * maintenance.ts without an import cycle.
26
+ */
27
+ export declare function runIntegrityProbes(store: any): IntegrityReport;
28
+ //# sourceMappingURL=integrity.d.ts.map
@@ -0,0 +1,119 @@
1
+ /**
2
+ * Memory store integrity probes — self-healing checks that run on the
3
+ * janitor's periodic cycle. Each probe is independent and conservative:
4
+ * - reports what it found,
5
+ * - repairs only when the fix is non-destructive,
6
+ * - never throws (logs and continues).
7
+ *
8
+ * Three checks today (the cheap, high-value ones):
9
+ * 1. FTS5 contentless-table integrity → auto-rebuild on failure
10
+ * 2. derived_from references to deleted chunks → nullify the dangling refs
11
+ * 3. chunks with content but no embedding → return count for backfill
12
+ *
13
+ * Graph reachability is intentionally NOT probed here — it lives in
14
+ * graph-store.ts's own health probe, which auto-restarts FalkorDB.
15
+ */
16
+ import pino from 'pino';
17
+ const logger = pino({ name: 'clementine.integrity' });
18
+ /**
19
+ * Run all probes and apply safe repairs. Returns a report; never throws.
20
+ * The store argument is typed loose so this module can be called from
21
+ * maintenance.ts without an import cycle.
22
+ */
23
+ export function runIntegrityProbes(store) {
24
+ const report = {
25
+ ftsOk: true,
26
+ ftsRebuilt: false,
27
+ orphanRefsNulled: 0,
28
+ missingEmbeddings: 0,
29
+ };
30
+ // 1. FTS5 integrity. Contentless tables can corrupt under specific failure
31
+ // modes (process kill mid-trigger, manual SQL on chunks_fts, etc.).
32
+ // integrity-check returns 'ok' on success; rebuild is the standard fix.
33
+ try {
34
+ const conn = store.conn;
35
+ if (conn) {
36
+ try {
37
+ const row = conn.prepare(`INSERT INTO chunks_fts(chunks_fts) VALUES('integrity-check') RETURNING ''`).get();
38
+ // 'integrity-check' is a no-op insert that throws on failure. If we
39
+ // got a row back, FTS is fine. (Some SQLite builds don't support the
40
+ // RETURNING form on virtual tables — fall back to plain run().)
41
+ void row;
42
+ }
43
+ catch (innerErr) {
44
+ // Try the plain form before declaring failure.
45
+ try {
46
+ conn.prepare(`INSERT INTO chunks_fts(chunks_fts) VALUES('integrity-check')`).run();
47
+ }
48
+ catch {
49
+ report.ftsOk = false;
50
+ logger.warn({ err: innerErr }, 'FTS5 integrity check failed — rebuilding');
51
+ try {
52
+ conn.prepare(`INSERT INTO chunks_fts(chunks_fts) VALUES('rebuild')`).run();
53
+ report.ftsRebuilt = true;
54
+ }
55
+ catch (rebuildErr) {
56
+ logger.warn({ err: rebuildErr }, 'FTS5 rebuild failed');
57
+ }
58
+ }
59
+ }
60
+ }
61
+ }
62
+ catch (err) {
63
+ logger.warn({ err }, 'FTS integrity probe error');
64
+ }
65
+ // 2. derived_from dangling references. Phase-2 janitor deletes a chunk
66
+ // that was a source for a summary; we keep the summary but the JSON
67
+ // array of source ids may now contain ids that no longer exist. Walk
68
+ // summary chunks, prune missing ids; fully empty array → null.
69
+ try {
70
+ const conn = store.conn;
71
+ if (conn) {
72
+ const summaries = conn.prepare(`SELECT id, derived_from FROM chunks
73
+ WHERE derived_from IS NOT NULL AND derived_from != ''`).all();
74
+ const liveCheck = conn.prepare('SELECT 1 FROM chunks WHERE id = ?');
75
+ const updateStmt = conn.prepare('UPDATE chunks SET derived_from = ? WHERE id = ?');
76
+ for (const s of summaries) {
77
+ let ids;
78
+ try {
79
+ ids = JSON.parse(s.derived_from);
80
+ }
81
+ catch {
82
+ continue;
83
+ }
84
+ if (!Array.isArray(ids))
85
+ continue;
86
+ const live = ids.filter((id) => {
87
+ if (typeof id !== 'number')
88
+ return false;
89
+ return !!liveCheck.get(id);
90
+ });
91
+ if (live.length !== ids.length) {
92
+ updateStmt.run(live.length === 0 ? null : JSON.stringify(live), s.id);
93
+ report.orphanRefsNulled++;
94
+ }
95
+ }
96
+ }
97
+ }
98
+ catch (err) {
99
+ logger.warn({ err }, 'derived_from orphan probe failed');
100
+ }
101
+ // 3. Missing dense embeddings — a counter for the dashboard / next backfill
102
+ // cycle. Doesn't repair (backfill is async + heavy); just surfaces.
103
+ try {
104
+ const conn = store.conn;
105
+ if (conn) {
106
+ const row = conn.prepare(`SELECT COUNT(*) AS c FROM chunks c
107
+ LEFT JOIN chunk_soft_deletes sd ON sd.chunk_id = c.id
108
+ WHERE sd.chunk_id IS NULL
109
+ AND c.embedding_dense IS NULL
110
+ AND length(c.content) > 0`).get();
111
+ report.missingEmbeddings = row.c;
112
+ }
113
+ }
114
+ catch (err) {
115
+ logger.warn({ err }, 'Missing-embedding probe failed');
116
+ }
117
+ return report;
118
+ }
119
+ //# sourceMappingURL=integrity.js.map
@@ -4,9 +4,30 @@
4
4
  * Runs startup and periodic maintenance so the memory store stays healthy
5
5
  * without manual intervention. New users get this out of the box.
6
6
  *
7
- * Startup: decay salience, prune stale data, backfill embeddings
8
- * Periodic (every 6h): full consolidation cycle + embedding rebuild
7
+ * Startup: decay salience, prune stale data, backfill embeddings, run janitor
8
+ * Periodic (every 6h): full consolidation cycle + embedding rebuild + janitor
9
+ * + idle-gated VACUUM at most once per week
9
10
  */
11
+ /**
12
+ * Janitor pass — keeps the store bounded. Safe to call repeatedly.
13
+ * Idempotent within a single run; surfaces totals for logging.
14
+ */
15
+ export declare function runJanitor(store: any): {
16
+ softDeleted: number;
17
+ physicallyDeleted: number;
18
+ outcomesPruned: number;
19
+ extractionsCapped: number;
20
+ };
21
+ /**
22
+ * Run VACUUM if (a) it's been more than vacuumIntervalDays since the last
23
+ * one and (b) the store has been idle for at least vacuumIdleSeconds.
24
+ * Returns null when skipped, otherwise the size delta.
25
+ */
26
+ export declare function maybeVacuum(store: any): {
27
+ sizeBeforeBytes: number;
28
+ sizeAfterBytes: number;
29
+ durationMs: number;
30
+ } | null;
10
31
  /**
11
32
  * Run one-time maintenance at daemon startup.
12
33
  * Non-blocking — errors are logged but never thrown.
@@ -4,12 +4,82 @@
4
4
  * Runs startup and periodic maintenance so the memory store stays healthy
5
5
  * without manual intervention. New users get this out of the box.
6
6
  *
7
- * Startup: decay salience, prune stale data, backfill embeddings
8
- * Periodic (every 6h): full consolidation cycle + embedding rebuild
7
+ * Startup: decay salience, prune stale data, backfill embeddings, run janitor
8
+ * Periodic (every 6h): full consolidation cycle + embedding rebuild + janitor
9
+ * + idle-gated VACUUM at most once per week
9
10
  */
10
11
  import pino from 'pino';
12
+ import { MEMORY_JANITOR } from '../config.js';
13
+ import { runIntegrityProbes } from './integrity.js';
11
14
  const logger = pino({ name: 'clementine.maintenance' });
12
15
  const PERIODIC_INTERVAL_MS = 6 * 60 * 60 * 1000; // 6 hours
16
+ const VACUUM_META_KEY = 'last_vacuum_at';
17
+ /**
18
+ * Janitor pass — keeps the store bounded. Safe to call repeatedly.
19
+ * Idempotent within a single run; surfaces totals for logging.
20
+ */
21
+ export function runJanitor(store) {
22
+ let softDeleted = 0;
23
+ let physicallyDeleted = 0;
24
+ try {
25
+ const result = store.expireConsolidated?.({
26
+ expireDays: MEMORY_JANITOR.consolidatedExpireDays,
27
+ salienceFloor: MEMORY_JANITOR.consolidatedSalienceFloor,
28
+ graceDays: MEMORY_JANITOR.softDeleteGraceDays,
29
+ });
30
+ if (result) {
31
+ softDeleted = result.softDeleted;
32
+ physicallyDeleted = result.physicallyDeleted;
33
+ }
34
+ }
35
+ catch (err) {
36
+ logger.warn({ err }, 'expireConsolidated failed');
37
+ }
38
+ let outcomesPruned = 0;
39
+ try {
40
+ outcomesPruned = store.pruneOutcomes?.(MEMORY_JANITOR.auxRetentionDays) ?? 0;
41
+ }
42
+ catch (err) {
43
+ logger.warn({ err }, 'pruneOutcomes failed');
44
+ }
45
+ let extractionsCapped = 0;
46
+ try {
47
+ extractionsCapped = store.capExtractions?.(MEMORY_JANITOR.extractionsMaxRows) ?? 0;
48
+ }
49
+ catch (err) {
50
+ logger.warn({ err }, 'capExtractions failed');
51
+ }
52
+ return { softDeleted, physicallyDeleted, outcomesPruned, extractionsCapped };
53
+ }
54
+ /**
55
+ * Run VACUUM if (a) it's been more than vacuumIntervalDays since the last
56
+ * one and (b) the store has been idle for at least vacuumIdleSeconds.
57
+ * Returns null when skipped, otherwise the size delta.
58
+ */
59
+ export function maybeVacuum(store) {
60
+ try {
61
+ const lastIso = store.getMaintenanceMeta?.(VACUUM_META_KEY);
62
+ if (lastIso) {
63
+ const last = new Date(lastIso).getTime();
64
+ const ageMs = Date.now() - last;
65
+ if (ageMs < MEMORY_JANITOR.vacuumIntervalDays * 86_400_000)
66
+ return null;
67
+ }
68
+ const lastActivity = store.lastActivityAt?.();
69
+ if (lastActivity !== null && lastActivity !== undefined) {
70
+ const idleMs = Date.now() - lastActivity;
71
+ if (idleMs < MEMORY_JANITOR.vacuumIdleSeconds * 1000)
72
+ return null;
73
+ }
74
+ const result = store.vacuum?.();
75
+ store.setMaintenanceMeta?.(VACUUM_META_KEY, new Date().toISOString());
76
+ return result ?? null;
77
+ }
78
+ catch (err) {
79
+ logger.warn({ err }, 'VACUUM failed');
80
+ return null;
81
+ }
82
+ }
13
83
  /**
14
84
  * Run one-time maintenance at daemon startup.
15
85
  * Non-blocking — errors are logged but never thrown.
@@ -56,6 +126,32 @@ export async function runStartupMaintenance(store) {
56
126
  catch {
57
127
  // Table may not exist yet — non-fatal
58
128
  }
129
+ // Janitor — bounded growth pass.
130
+ try {
131
+ const result = runJanitor(store);
132
+ if (result.softDeleted || result.physicallyDeleted || result.outcomesPruned || result.extractionsCapped) {
133
+ logger.info(result, 'Janitor pass complete');
134
+ }
135
+ }
136
+ catch (err) {
137
+ logger.warn({ err }, 'Startup janitor failed');
138
+ }
139
+ // Embedding warm-up — pre-embed the most-cited chunks in the background so
140
+ // the first retrievals after startup don't pay cold-start latency. Fire
141
+ // and forget; never blocks startup.
142
+ if (typeof store.warmDenseEmbeddings === 'function') {
143
+ void (async () => {
144
+ try {
145
+ const result = await store.warmDenseEmbeddings(200);
146
+ if (result.warmed > 0) {
147
+ logger.info(result, 'Embedding warm-up complete');
148
+ }
149
+ }
150
+ catch (err) {
151
+ logger.warn({ err }, 'Embedding warm-up failed');
152
+ }
153
+ })();
154
+ }
59
155
  logger.info({ durationMs: Date.now() - start }, 'Startup maintenance complete');
60
156
  }
61
157
  /**
@@ -104,7 +200,7 @@ export function startPeriodicMaintenance(store, llmCall) {
104
200
  logger.warn({ err }, 'Post-consolidation embedding build failed');
105
201
  }
106
202
  }
107
- // 5. Extraction log pruning
203
+ // 5. Extraction log pruning (legacy 90-day rule retained alongside cap)
108
204
  try {
109
205
  const conn = store.conn;
110
206
  if (conn) {
@@ -114,6 +210,47 @@ export function startPeriodicMaintenance(store, llmCall) {
114
210
  }
115
211
  }
116
212
  catch { /* non-fatal */ }
213
+ // 6. Janitor — bounded growth.
214
+ try {
215
+ const result = runJanitor(store);
216
+ if (result.softDeleted || result.physicallyDeleted || result.outcomesPruned || result.extractionsCapped) {
217
+ logger.info(result, 'Janitor pass complete');
218
+ }
219
+ }
220
+ catch (err) {
221
+ logger.warn({ err }, 'Periodic janitor failed');
222
+ }
223
+ // 6b. Integrity probes — FTS health, orphan derived_from, embedding gaps.
224
+ try {
225
+ const report = runIntegrityProbes(store);
226
+ // Persist for the dashboard so the "last integrity check" surface
227
+ // doesn't depend on log scraping.
228
+ try {
229
+ store.setMaintenanceMeta?.('last_integrity_report', JSON.stringify({ ...report, ranAt: new Date().toISOString() }));
230
+ }
231
+ catch { /* meta write is best-effort */ }
232
+ if (!report.ftsOk || report.ftsRebuilt || report.orphanRefsNulled > 0 || report.missingEmbeddings > 0) {
233
+ logger.info(report, 'Integrity probes complete');
234
+ }
235
+ }
236
+ catch (err) {
237
+ logger.warn({ err }, 'Integrity probes failed');
238
+ }
239
+ // 7. VACUUM — idle-gated, at most once per vacuumIntervalDays.
240
+ try {
241
+ const vac = maybeVacuum(store);
242
+ if (vac) {
243
+ logger.info({
244
+ sizeBeforeBytes: vac.sizeBeforeBytes,
245
+ sizeAfterBytes: vac.sizeAfterBytes,
246
+ reclaimedBytes: vac.sizeBeforeBytes - vac.sizeAfterBytes,
247
+ durationMs: vac.durationMs,
248
+ }, 'VACUUM complete');
249
+ }
250
+ }
251
+ catch (err) {
252
+ logger.warn({ err }, 'Periodic VACUUM failed');
253
+ }
117
254
  logger.info({ durationMs: Date.now() - start }, 'Periodic maintenance complete');
118
255
  };
119
256
  return setInterval(runCycle, PERIODIC_INTERVAL_MS);
@@ -33,6 +33,8 @@ export interface UserModelProposals {
33
33
  /** Raw model output, for debugging. */
34
34
  rawResponse?: string;
35
35
  }
36
- export declare function seedUserModelFromMemory(store: SeedSourceStore, llmCall: (prompt: string) => Promise<string>): Promise<UserModelProposals>;
36
+ export declare function seedUserModelFromMemory(store: SeedSourceStore, llmCall: (prompt: string) => Promise<string>, opts?: {
37
+ memoryFilePath?: string;
38
+ }): Promise<UserModelProposals>;
37
39
  export {};
38
40
  //# sourceMappingURL=seed-user-model.d.ts.map
@@ -18,13 +18,13 @@ const logger = pino({ name: 'clementine.seed-user-model' });
18
18
  const MAX_MEMORY_MD_CHARS = 4000;
19
19
  const MAX_CHUNK_CHARS = 4000;
20
20
  const MAX_SUMMARIES_CHARS = 1500;
21
- function gatherCorpus(store) {
21
+ function gatherCorpus(store, memoryFilePath) {
22
22
  const parts = [];
23
23
  let sourceCount = 0;
24
24
  // 1. MEMORY.md — highest-signal source, the agent's curated profile note
25
- if (existsSync(MEMORY_FILE)) {
25
+ if (existsSync(memoryFilePath)) {
26
26
  try {
27
- const md = readFileSync(MEMORY_FILE, 'utf-8').slice(0, MAX_MEMORY_MD_CHARS);
27
+ const md = readFileSync(memoryFilePath, 'utf-8').slice(0, MAX_MEMORY_MD_CHARS);
28
28
  if (md.trim()) {
29
29
  parts.push(`## MEMORY.md\n${md}`);
30
30
  sourceCount++;
@@ -147,8 +147,9 @@ function parseProposals(raw) {
147
147
  }
148
148
  return out;
149
149
  }
150
- export async function seedUserModelFromMemory(store, llmCall) {
151
- const { corpus, sourceCount } = gatherCorpus(store);
150
+ export async function seedUserModelFromMemory(store, llmCall, opts = {}) {
151
+ const memFile = opts.memoryFilePath ?? MEMORY_FILE;
152
+ const { corpus, sourceCount } = gatherCorpus(store, memFile);
152
153
  if (!corpus.trim() || sourceCount === 0) {
153
154
  return {
154
155
  user_facts: '', goals: '', relationships: '', agent_persona: '',