clementine-agent 1.0.32 → 1.0.34

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -33,14 +33,15 @@ export async function assembleContext(options) {
33
33
  priority: 0,
34
34
  maxChars: 500,
35
35
  minRemainingBudget: 0,
36
- resolve: () => {
36
+ resolve: (budget) => {
37
37
  if (!fs.existsSync(idPath))
38
38
  return '';
39
39
  try {
40
40
  const content = fs.readFileSync(idPath, 'utf-8').trim();
41
41
  if (!content)
42
42
  return '';
43
- return `## Identity\n\n${content}`;
43
+ const block = `## Identity\n\n${content}`;
44
+ return block.length > budget ? block.slice(0, budget) : block;
44
45
  }
45
46
  catch {
46
47
  return '';
@@ -56,14 +57,15 @@ export async function assembleContext(options) {
56
57
  priority: 1,
57
58
  maxChars: isAutonomous ? 1000 : 2000,
58
59
  minRemainingBudget: 0,
59
- resolve: () => {
60
+ resolve: (budget) => {
60
61
  if (!fs.existsSync(wmPath))
61
62
  return '';
62
63
  try {
63
64
  const content = fs.readFileSync(wmPath, 'utf-8').trim();
64
65
  if (!content)
65
66
  return '';
66
- return `## Working Memory (scratchpad)\n\n${content}`;
67
+ const block = `## Working Memory (scratchpad)\n\n${content}`;
68
+ return block.length > budget ? block.slice(0, budget) : block;
67
69
  }
68
70
  catch {
69
71
  return '';
@@ -79,10 +81,15 @@ export async function assembleContext(options) {
79
81
  priority: 2,
80
82
  maxChars: isAutonomous ? 1000 : 2000,
81
83
  minRemainingBudget: 500,
82
- resolve: () => skillCtx,
84
+ resolve: (budget) => skillCtx.length > budget ? skillCtx.slice(0, budget) : skillCtx,
83
85
  });
84
86
  }
85
87
  // Slot 3: Memory search results (core recall)
88
+ // formatResultsForPrompt respects the effective budget and breaks on
89
+ // entry boundaries (not mid-string), so we don't need the outer
90
+ // slice-truncation to kick in here. Previously this slot was double-
91
+ // truncated: formatter used its own 8000 cap, then the outer loop cut
92
+ // further by Math.min(maxChars, remaining), chopping entries in half.
86
93
  if (options.memoryResults && options.memoryResults.length > 0) {
87
94
  const results = options.memoryResults;
88
95
  slots.push({
@@ -90,10 +97,7 @@ export async function assembleContext(options) {
90
97
  priority: 3,
91
98
  maxChars: isAutonomous ? 2000 : 8000,
92
99
  minRemainingBudget: 200,
93
- resolve: () => {
94
- // formatResultsForPrompt already handles truncation within its own budget
95
- return formatResultsForPrompt(results, isAutonomous ? 2000 : 8000);
96
- },
100
+ resolve: (budget) => formatResultsForPrompt(results, budget),
97
101
  });
98
102
  }
99
103
  // Slot 4: Graph relationships (supplementary)
@@ -104,7 +108,7 @@ export async function assembleContext(options) {
104
108
  priority: 4,
105
109
  maxChars: 2000,
106
110
  minRemainingBudget: 500,
107
- resolve: () => graphCtx,
111
+ resolve: (budget) => graphCtx.length > budget ? graphCtx.slice(0, budget) : graphCtx,
108
112
  });
109
113
  }
110
114
  // Sort by priority (lower number = higher priority)
@@ -121,18 +125,27 @@ export async function assembleContext(options) {
121
125
  continue;
122
126
  }
123
127
  try {
124
- let content = await slot.resolve();
128
+ // The slot's effective budget is the smaller of its own maxChars and
129
+ // what's actually remaining across all slots. Passed into resolve so
130
+ // the slot produces right-sized content up front, not a mid-entry
131
+ // truncation after the fact.
132
+ const effectiveBudget = Math.min(slot.maxChars, remaining);
133
+ const content = await slot.resolve(effectiveBudget);
125
134
  if (!content) {
126
135
  skipped.push(slot.name);
127
136
  continue;
128
137
  }
129
- // Truncate to the smaller of slot max and remaining budget
130
- const limit = Math.min(slot.maxChars, remaining);
131
- if (content.length > limit) {
132
- content = content.slice(0, limit) + '\n...(truncated)';
138
+ // Safety net: if resolve() ignored the budget and returned too much,
139
+ // clip at a line boundary rather than a character boundary so we don't
140
+ // leave a malformed half-block in the prompt.
141
+ let finalContent = content;
142
+ if (content.length > effectiveBudget) {
143
+ const trimmed = content.slice(0, effectiveBudget);
144
+ const lastNewline = trimmed.lastIndexOf('\n');
145
+ finalContent = (lastNewline > 0 ? trimmed.slice(0, lastNewline) : trimmed) + '\n...(truncated)';
133
146
  }
134
- parts.push(content);
135
- remaining -= content.length;
147
+ parts.push(finalContent);
148
+ remaining -= finalContent.length;
136
149
  included.push(slot.name);
137
150
  }
138
151
  catch {
@@ -35,4 +35,11 @@ export declare function deserializeEmbedding(buf: Buffer): Float32Array;
35
35
  * Check if the embedding system is ready (vocabulary loaded with sufficient words).
36
36
  */
37
37
  export declare function isReady(): boolean;
38
+ /**
39
+ * Stable hash of the current vocabulary's word→dimension mapping. When this
40
+ * changes, previously-stored embedding vectors become silently incorrect
41
+ * because dimension N now represents a different word. Callers (MemoryStore
42
+ * backfill) use this hash to detect staleness and invalidate stored vectors.
43
+ */
44
+ export declare function getVocabHash(): string;
38
45
  //# sourceMappingURL=embeddings.d.ts.map
@@ -9,6 +9,7 @@
9
9
  * Query-time: embed the query, compute cosine similarity against stored vectors.
10
10
  */
11
11
  import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
12
+ import { createHash } from 'node:crypto';
12
13
  import path from 'node:path';
13
14
  import pino from 'pino';
14
15
  import { BASE_DIR } from '../config.js';
@@ -163,6 +164,19 @@ export function isReady() {
163
164
  loadVocab();
164
165
  return vocabWords.length >= 50; // need at least 50 vocab words
165
166
  }
167
+ /**
168
+ * Stable hash of the current vocabulary's word→dimension mapping. When this
169
+ * changes, previously-stored embedding vectors become silently incorrect
170
+ * because dimension N now represents a different word. Callers (MemoryStore
171
+ * backfill) use this hash to detect staleness and invalidate stored vectors.
172
+ */
173
+ export function getVocabHash() {
174
+ loadVocab();
175
+ if (vocabWords.length === 0)
176
+ return '';
177
+ // Order-sensitive: dimension assignment depends on insertion order.
178
+ return createHash('sha1').update(vocabWords.join('|')).digest('hex').slice(0, 16);
179
+ }
166
180
  const STOP_WORDS = new Set([
167
181
  'the', 'be', 'to', 'of', 'and', 'in', 'that', 'have', 'it', 'for',
168
182
  'not', 'on', 'with', 'he', 'as', 'you', 'do', 'at', 'this', 'but',
@@ -61,6 +61,22 @@ export declare class GraphStore {
61
61
  syncFromVault(vaultDir: string, agentsDir: string): Promise<GraphSyncStats>;
62
62
  extractAndStoreRelationships(triplets: RelationshipTriplet[]): Promise<void>;
63
63
  enrichWithGraphContext(entityIds: string[], _maxHops?: number): Promise<string>;
64
+ /**
65
+ * Drop Note nodes whose slug isn't in the caller-provided set of valid IDs.
66
+ * Wikilinks into deleted vault files leave dangling Note nodes with
67
+ * MENTIONS edges pointing at them — this cleans those up.
68
+ *
69
+ * Deliberately NOT auto-scheduled: blast radius is significant, and the
70
+ * caller (dashboard action, MCP tool, manual script) should supply the
71
+ * authoritative valid-IDs set. Runs DETACH DELETE so incoming edges go
72
+ * with the node.
73
+ *
74
+ * Returns counts of what was removed.
75
+ */
76
+ invalidateOrphanedNotes(validIds: Set<string>): Promise<{
77
+ scanned: number;
78
+ deleted: number;
79
+ }>;
64
80
  }
65
81
  export declare function getSharedGraphStore(persistenceDir: string): Promise<GraphStore | null>;
66
82
  //# sourceMappingURL=graph-store.d.ts.map
@@ -580,6 +580,53 @@ export class GraphStore {
580
580
  return '';
581
581
  return '\n## Relationship Context\n' + lines.join('\n');
582
582
  }
583
+ /**
584
+ * Drop Note nodes whose slug isn't in the caller-provided set of valid IDs.
585
+ * Wikilinks into deleted vault files leave dangling Note nodes with
586
+ * MENTIONS edges pointing at them — this cleans those up.
587
+ *
588
+ * Deliberately NOT auto-scheduled: blast radius is significant, and the
589
+ * caller (dashboard action, MCP tool, manual script) should supply the
590
+ * authoritative valid-IDs set. Runs DETACH DELETE so incoming edges go
591
+ * with the node.
592
+ *
593
+ * Returns counts of what was removed.
594
+ */
595
+ async invalidateOrphanedNotes(validIds) {
596
+ if (!this.available)
597
+ return { scanned: 0, deleted: 0 };
598
+ if (validIds.size === 0) {
599
+ // Defense: refuse to run with an empty set — would delete every Note.
600
+ logger.warn('invalidateOrphanedNotes called with empty validIds — refusing to run');
601
+ return { scanned: 0, deleted: 0 };
602
+ }
603
+ let scanned = 0;
604
+ let deleted = 0;
605
+ try {
606
+ const res = await this.graph.query('MATCH (n:Note) RETURN n.id AS id');
607
+ const rows = (res.data ?? []);
608
+ scanned = rows.length;
609
+ for (const row of rows) {
610
+ const id = row.id;
611
+ if (!id || validIds.has(id))
612
+ continue;
613
+ try {
614
+ await this.graph.query('MATCH (n:Note {id: $id}) DETACH DELETE n', { params: { id } });
615
+ deleted++;
616
+ }
617
+ catch (err) {
618
+ logger.debug({ err, id }, 'Orphan Note deletion failed');
619
+ }
620
+ }
621
+ }
622
+ catch (err) {
623
+ logger.warn({ err }, 'invalidateOrphanedNotes query failed');
624
+ }
625
+ if (deleted > 0) {
626
+ logger.info({ scanned, deleted, validIdsSize: validIds.size }, 'Invalidated orphan Note nodes');
627
+ }
628
+ return { scanned, deleted };
629
+ }
583
630
  }
584
631
  // ── Shared Client Helper ───────────────────────────────────────────────
585
632
  /**
@@ -174,10 +174,15 @@ export declare class MemoryStore {
174
174
  salienceThreshold?: number;
175
175
  accessLogRetentionDays?: number;
176
176
  transcriptRetentionDays?: number;
177
+ behavioralRetentionDays?: number;
177
178
  }): {
178
179
  episodicPruned: number;
179
180
  accessLogPruned: number;
180
181
  transcriptsPruned: number;
182
+ skillUsagePruned: number;
183
+ feedbackPruned: number;
184
+ reflectionsPruned: number;
185
+ usageLogPruned: number;
181
186
  };
182
187
  /**
183
188
  * Get chunks within a date range, ordered chronologically.
@@ -533,6 +538,7 @@ export declare class MemoryStore {
533
538
  buildEmbeddings(): {
534
539
  vocabSize: number;
535
540
  backfilled: number;
541
+ invalidated: number;
536
542
  };
537
543
  /**
538
544
  * Delete all chunks, wikilinks, file hash, and access log for a given file.
@@ -10,9 +10,11 @@
10
10
  * (single-user, one MCP subprocess handles all writes).
11
11
  */
12
12
  import { createHash } from 'node:crypto';
13
- import { appendFileSync, existsSync, mkdirSync, readFileSync, readdirSync, statSync } from 'node:fs';
13
+ import { appendFileSync, existsSync, mkdirSync, readFileSync, readdirSync, statSync, writeFileSync } from 'node:fs';
14
14
  import path from 'node:path';
15
15
  import Database from 'better-sqlite3';
16
+ import { BASE_DIR } from '../config.js';
17
+ import { temporalDecay } from './search.js';
16
18
  import * as embeddingsModule from './embeddings.js';
17
19
  import { chunkFile } from './chunker.js';
18
20
  import { mmrRerank } from './mmr.js';
@@ -184,6 +186,24 @@ export class MemoryStore {
184
186
  catch {
185
187
  // Index already exists
186
188
  }
189
+ // Hot-path indices: every chat turn sorts/filters chunks by updated_at
190
+ // (recency) and by (agent_slug, updated_at) for agent-scoped recent
191
+ // context. Without these the queries do full table scans.
192
+ try {
193
+ this.conn.exec('CREATE INDEX idx_chunks_updated_at ON chunks(updated_at DESC)');
194
+ }
195
+ catch { /* already exists */ }
196
+ try {
197
+ this.conn.exec('CREATE INDEX idx_chunks_agent_updated ON chunks(agent_slug, updated_at DESC)');
198
+ }
199
+ catch { /* already exists */ }
200
+ // Embedding filter — searchByEmbedding's base predicate is
201
+ // `embedding IS NOT NULL`; a partial index turns that into an
202
+ // index-only scan for the candidate set.
203
+ try {
204
+ this.conn.exec('CREATE INDEX idx_chunks_has_embedding ON chunks(id) WHERE embedding IS NOT NULL');
205
+ }
206
+ catch { /* already exists */ }
187
207
  // Access log table for salience tracking
188
208
  this.conn.exec(`
189
209
  CREATE TABLE IF NOT EXISTS access_log (
@@ -581,32 +601,35 @@ export class MemoryStore {
581
601
  stats.filesDeleted++;
582
602
  }
583
603
  }
584
- // Process changed/new files
585
- for (const filePath of filesToUpdate) {
604
+ // Process changed/new files inside a single transaction so a 1000-file
605
+ // sync produces one WAL commit instead of 1000+. Prepared statements are
606
+ // hoisted out of the loop — better-sqlite3 caches by SQL text anyway, but
607
+ // the explicit handle avoids re-parsing and makes the intent clear.
608
+ const insertStmt = this.conn.prepare(`INSERT INTO chunks
609
+ (source_file, section, content, chunk_type, frontmatter_json, content_hash, category, topic)
610
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)`);
611
+ const upsertHashStmt = this.conn.prepare(`INSERT OR REPLACE INTO file_hashes (rel_path, content_hash, last_synced)
612
+ VALUES (?, ?, datetime('now'))`);
613
+ const processFile = (filePath) => {
586
614
  const rel = path.relative(this.vaultDir, filePath);
587
615
  const chunks = chunkFile(filePath, this.vaultDir);
588
616
  if (chunks.length === 0)
589
- continue;
590
- // Delete old chunks for this file
617
+ return;
591
618
  this.deleteFileChunks(rel);
592
- // Insert new chunks
593
- const insertStmt = this.conn.prepare(`INSERT INTO chunks
594
- (source_file, section, content, chunk_type, frontmatter_json, content_hash, category, topic)
595
- VALUES (?, ?, ?, ?, ?, ?, ?, ?)`);
596
619
  for (const chunk of chunks) {
597
620
  insertStmt.run(chunk.sourceFile, chunk.section, chunk.content, chunk.chunkType, chunk.frontmatterJson, chunk.contentHash, chunk.category ?? null, chunk.topic ?? null);
598
621
  }
599
- // Parse and index wikilinks
600
622
  this.indexWikilinks(rel, filePath);
601
- // Update file hash
602
623
  const bytes = readFileSync(filePath);
603
624
  const fileHash = createHash('sha256').update(bytes).digest('hex').slice(0, 16);
604
- this.conn
605
- .prepare(`INSERT OR REPLACE INTO file_hashes (rel_path, content_hash, last_synced)
606
- VALUES (?, ?, datetime('now'))`)
607
- .run(rel, fileHash);
625
+ upsertHashStmt.run(rel, fileHash);
608
626
  stats.filesUpdated++;
609
- }
627
+ };
628
+ const processAll = this.conn.transaction((files) => {
629
+ for (const f of files)
630
+ processFile(f);
631
+ });
632
+ processAll(filesToUpdate);
610
633
  // Count total chunks
611
634
  const countRow = this.conn
612
635
  .prepare('SELECT COUNT(*) as cnt FROM chunks')
@@ -711,20 +734,30 @@ export class MemoryStore {
711
734
  * Get the most recently updated chunks.
712
735
  */
713
736
  getRecentChunks(limit = 5, agentSlug, filters, strict = false) {
714
- const mapRow = (row) => ({
715
- sourceFile: row.source_file,
716
- section: row.section,
717
- content: row.content,
718
- score: 0,
719
- chunkType: row.chunk_type,
720
- matchType: 'recency',
721
- lastUpdated: row.updated_at ?? '',
722
- chunkId: row.id,
723
- salience: row.salience ?? 0,
724
- agentSlug: row.agent_slug ?? null,
725
- category: row.category,
726
- topic: row.topic,
727
- });
737
+ const now = Date.now();
738
+ const mapRow = (row) => {
739
+ // Score recency by exponential decay (half-life 30 days). Previously
740
+ // every recent row got score=0, which meant MMR's min-max normalization
741
+ // ranked them at the floor — a two-day-old chunk and a six-month-old
742
+ // chunk were indistinguishable. Decay lets recent results actually
743
+ // compete with FTS and vector matches during rerank.
744
+ const daysOld = row.updated_at ? (now - Date.parse(row.updated_at)) / 86_400_000 : 0;
745
+ const decayed = temporalDecay(daysOld);
746
+ return {
747
+ sourceFile: row.source_file,
748
+ section: row.section,
749
+ content: row.content,
750
+ score: decayed,
751
+ chunkType: row.chunk_type,
752
+ matchType: 'recency',
753
+ lastUpdated: row.updated_at ?? '',
754
+ chunkId: row.id,
755
+ salience: row.salience ?? 0,
756
+ agentSlug: row.agent_slug ?? null,
757
+ category: row.category,
758
+ topic: row.topic,
759
+ };
760
+ };
728
761
  // Build optional WHERE clauses for category/topic
729
762
  let filterSql = '';
730
763
  const filterParams = [];
@@ -838,17 +871,20 @@ export class MemoryStore {
838
871
  * Scans chunks that have stored embeddings and returns top matches.
839
872
  */
840
873
  searchByEmbedding(queryVec, limit, agentSlug, strict = false) {
841
- const rows = this.conn
842
- .prepare(`SELECT id, source_file, section, content, chunk_type, embedding, salience, agent_slug, updated_at, category, topic
843
- FROM chunks
844
- WHERE embedding IS NOT NULL`)
845
- .all();
874
+ // Push agent-isolation into SQL so we don't deserialize embeddings for
875
+ // rows we'd immediately reject. Soft isolation (non-strict) still loads
876
+ // all embeddings because the boost is applied post-scoring, but at
877
+ // least strict mode no longer scans foreign-agent chunks.
878
+ let sql = 'SELECT id, source_file, section, content, chunk_type, embedding, salience, agent_slug, updated_at, category, topic FROM chunks WHERE embedding IS NOT NULL';
879
+ const params = [];
880
+ if (strict && agentSlug) {
881
+ sql += ' AND (agent_slug IS NULL OR agent_slug = ?)';
882
+ params.push(agentSlug);
883
+ }
884
+ const rows = this.conn.prepare(sql).all(...params);
846
885
  const scored = [];
847
886
  for (const row of rows) {
848
887
  try {
849
- // Hard isolation: skip chunks from other agents (allow own + global)
850
- if (strict && agentSlug && row.agent_slug !== null && row.agent_slug !== agentSlug)
851
- continue;
852
888
  const vec = embeddingsModule.deserializeEmbedding(row.embedding);
853
889
  const sim = embeddingsModule.cosineSimilarity(queryVec, vec);
854
890
  if (sim < 0.15)
@@ -1148,6 +1184,10 @@ export class MemoryStore {
1148
1184
  const threshold = opts.salienceThreshold ?? 0.01;
1149
1185
  const accessRetention = opts.accessLogRetentionDays ?? 60;
1150
1186
  const transcriptRetention = opts.transcriptRetentionDays ?? 90;
1187
+ // Behavioral telemetry kept longer than transcripts so the feedback loop
1188
+ // (getFeedbackStats, getBehavioralPatterns, getSkillsToSuppress) has a
1189
+ // wide enough window to aggregate meaningful signal.
1190
+ const behavioralRetention = opts.behavioralRetentionDays ?? 180;
1151
1191
  // Prune stale episodic chunks (not vault-sourced content)
1152
1192
  const episodicResult = this.conn
1153
1193
  .prepare(`DELETE FROM chunks
@@ -1167,10 +1207,30 @@ export class MemoryStore {
1167
1207
  .prepare(`DELETE FROM transcripts
1168
1208
  WHERE created_at < datetime('now', ?)`)
1169
1209
  .run(`-${transcriptRetention} days`);
1210
+ // Behavioral telemetry pruning — these tables were previously unbounded.
1211
+ // Each is append-only, so a rolling window is safe; aggregate stats
1212
+ // consume the window directly rather than historical totals.
1213
+ const skillUsageResult = this.conn
1214
+ .prepare(`DELETE FROM skill_usage WHERE retrieved_at < datetime('now', ?)`)
1215
+ .run(`-${behavioralRetention} days`);
1216
+ const feedbackResult = this.conn
1217
+ .prepare(`DELETE FROM feedback WHERE created_at < datetime('now', ?)`)
1218
+ .run(`-${behavioralRetention} days`);
1219
+ const reflectionsResult = this.conn
1220
+ .prepare(`DELETE FROM session_reflections WHERE created_at < datetime('now', ?)`)
1221
+ .run(`-${behavioralRetention} days`);
1222
+ // Usage log is denser (per-exchange) — keep a shorter window.
1223
+ const usageResult = this.conn
1224
+ .prepare(`DELETE FROM usage_log WHERE created_at < datetime('now', ?)`)
1225
+ .run(`-${Math.min(behavioralRetention, 90)} days`);
1170
1226
  return {
1171
1227
  episodicPruned: episodicResult.changes,
1172
1228
  accessLogPruned: accessResult.changes,
1173
1229
  transcriptsPruned: transcriptResult.changes,
1230
+ skillUsagePruned: skillUsageResult.changes,
1231
+ feedbackPruned: feedbackResult.changes,
1232
+ reflectionsPruned: reflectionsResult.changes,
1233
+ usageLogPruned: usageResult.changes,
1174
1234
  };
1175
1235
  }
1176
1236
  // ── Timeline Query ─────────────────────────────────────────────
@@ -2045,25 +2105,55 @@ export class MemoryStore {
2045
2105
  .prepare('SELECT id, content FROM chunks')
2046
2106
  .all();
2047
2107
  if (rows.length === 0)
2048
- return { vocabSize: 0, backfilled: 0 };
2108
+ return { vocabSize: 0, backfilled: 0, invalidated: 0 };
2109
+ // Capture prior vocab hash BEFORE rebuild. If buildVocab produces a
2110
+ // different word→dimension mapping, previously-stored embedding vectors
2111
+ // become silently wrong (dimension N now represents a different word).
2112
+ const hashFile = path.join(BASE_DIR, '.embedding-vocab.hash');
2113
+ let priorHash = '';
2114
+ try {
2115
+ if (existsSync(hashFile))
2116
+ priorHash = readFileSync(hashFile, 'utf-8').trim();
2117
+ }
2118
+ catch { /* first run */ }
2049
2119
  // Build vocabulary from entire corpus (including consolidated summaries)
2050
2120
  embeddingsModule.buildVocab(rows.map((r) => r.content));
2051
2121
  if (!embeddingsModule.isReady())
2052
- return { vocabSize: 0, backfilled: 0 };
2122
+ return { vocabSize: 0, backfilled: 0, invalidated: 0 };
2123
+ // If the vocab shifted, invalidate every stored vector so they re-embed
2124
+ // against the new word→dim mapping. Without this, old vectors silently
2125
+ // mismatch query vectors and cosine similarity returns nonsense.
2126
+ const newHash = embeddingsModule.getVocabHash();
2127
+ let invalidated = 0;
2128
+ if (priorHash && priorHash !== newHash) {
2129
+ const res = this.conn.prepare('UPDATE chunks SET embedding = NULL WHERE embedding IS NOT NULL').run();
2130
+ invalidated = res.changes;
2131
+ // Count is returned in the result object — callers (maintenance cycle)
2132
+ // log it there. No local logger in this file to avoid the import.
2133
+ }
2134
+ try {
2135
+ writeFileSync(hashFile, newHash);
2136
+ }
2137
+ catch { /* non-fatal */ }
2053
2138
  // Backfill embeddings for all chunks that don't have one
2054
2139
  const missing = this.conn
2055
2140
  .prepare('SELECT id, content FROM chunks WHERE embedding IS NULL')
2056
2141
  .all();
2057
2142
  const updateStmt = this.conn.prepare('UPDATE chunks SET embedding = ? WHERE id = ?');
2058
2143
  let backfilled = 0;
2059
- for (const row of missing) {
2060
- const vec = embeddingsModule.embed(row.content);
2061
- if (vec) {
2062
- updateStmt.run(embeddingsModule.serializeEmbedding(vec), row.id);
2063
- backfilled++;
2144
+ // Wrap backfill in a transaction — potentially thousands of UPDATEs
2145
+ // per vocab shift, and a single WAL commit is dramatically faster.
2146
+ const backfillAll = this.conn.transaction((items) => {
2147
+ for (const row of items) {
2148
+ const vec = embeddingsModule.embed(row.content);
2149
+ if (vec) {
2150
+ updateStmt.run(embeddingsModule.serializeEmbedding(vec), row.id);
2151
+ backfilled++;
2152
+ }
2064
2153
  }
2065
- }
2066
- return { vocabSize: rows.length, backfilled };
2154
+ });
2155
+ backfillAll(missing);
2156
+ return { vocabSize: rows.length, backfilled, invalidated };
2067
2157
  }
2068
2158
  // ── Helpers ───────────────────────────────────────────────────────
2069
2159
  /**
@@ -86,10 +86,15 @@ export type MemoryStoreType = {
86
86
  salienceThreshold?: number;
87
87
  accessLogRetentionDays?: number;
88
88
  transcriptRetentionDays?: number;
89
+ behavioralRetentionDays?: number;
89
90
  }): {
90
91
  episodicPruned: number;
91
92
  accessLogPruned: number;
92
93
  transcriptsPruned: number;
94
+ skillUsagePruned: number;
95
+ feedbackPruned: number;
96
+ reflectionsPruned: number;
97
+ usageLogPruned: number;
93
98
  };
94
99
  checkDuplicate(content: string, sourceFile?: string): {
95
100
  isDuplicate: boolean;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "clementine-agent",
3
- "version": "1.0.32",
3
+ "version": "1.0.34",
4
4
  "description": "Clementine — Personal AI Assistant (TypeScript)",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",