clementine-agent 1.1.20 → 1.1.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli/index.js CHANGED
@@ -2257,6 +2257,223 @@ agentCmd
2257
2257
  const memoryCmd = program
2258
2258
  .command('memory')
2259
2259
  .description('Search and manage memory');
2260
+ memoryCmd
2261
+ .command('status')
2262
+ .description('Show memory store stats — chunk count, embeddings coverage, agent/category breakdown, salience')
2263
+ .option('--json', 'Emit machine-readable JSON')
2264
+ .action(async (opts) => {
2265
+ const BOLD = '\x1b[1m';
2266
+ const DIM = '\x1b[0;90m';
2267
+ const CYAN = '\x1b[0;36m';
2268
+ const RESET = '\x1b[0m';
2269
+ try {
2270
+ const { MemoryStore } = await import('../memory/store.js');
2271
+ const VAULT_DIR = path.join(BASE_DIR, 'vault');
2272
+ const DB_PATH = path.join(VAULT_DIR, '.memory.db');
2273
+ const store = new MemoryStore(DB_PATH, VAULT_DIR);
2274
+ const stats = store.getMemoryStats();
2275
+ if (opts.json) {
2276
+ console.log(JSON.stringify(stats, null, 2));
2277
+ return;
2278
+ }
2279
+ const pct = stats.totalChunks > 0
2280
+ ? ((stats.chunksWithEmbeddings / stats.totalChunks) * 100).toFixed(1)
2281
+ : '0.0';
2282
+ console.log();
2283
+ console.log(` ${BOLD}Memory store${RESET} ${DIM}${DB_PATH}${RESET}`);
2284
+ console.log();
2285
+ console.log(` Total chunks: ${BOLD}${stats.totalChunks.toLocaleString()}${RESET}`);
2286
+ console.log(` With embeddings: ${stats.chunksWithEmbeddings.toLocaleString()} ${DIM}(${pct}%, TF-IDF 512-dim)${RESET}`);
2287
+ console.log(` Pinned (manual): ${stats.pinnedChunks}`);
2288
+ console.log(` Avg salience: ${stats.avgSalience.toFixed(3)} ${DIM}(0 = no access boost; >1 = strong reinforcement)${RESET}`);
2289
+ if (stats.oldestUpdated) {
2290
+ console.log(` Date range: ${stats.oldestUpdated.slice(0, 10)} → ${stats.newestUpdated?.slice(0, 10)}`);
2291
+ }
2292
+ console.log();
2293
+ console.log(` ${BOLD}Per agent${RESET}`);
2294
+ for (const a of stats.perAgent.slice(0, 10)) {
2295
+ console.log(` ${CYAN}${a.agentSlug.padEnd(28)}${RESET}${a.count.toLocaleString().padStart(8)}`);
2296
+ }
2297
+ if (stats.perAgent.length > 10)
2298
+ console.log(` ${DIM}…and ${stats.perAgent.length - 10} more${RESET}`);
2299
+ console.log();
2300
+ console.log(` ${BOLD}Per category${RESET}`);
2301
+ for (const c of stats.perCategory.slice(0, 10)) {
2302
+ console.log(` ${CYAN}${c.category.padEnd(28)}${RESET}${c.count.toLocaleString().padStart(8)}`);
2303
+ }
2304
+ console.log();
2305
+ }
2306
+ catch (err) {
2307
+ console.error(` Error reading memory stats: ${err}`);
2308
+ process.exit(1);
2309
+ }
2310
+ });
2311
+ memoryCmd
2312
+ .command('pin <chunkId>')
2313
+ .description('Pin a chunk — gives its score a 2x boost in recall (use chunk IDs from `memory search`)')
2314
+ .action(async (chunkIdStr) => {
2315
+ const GREEN = '\x1b[0;32m';
2316
+ const RED = '\x1b[0;31m';
2317
+ const RESET = '\x1b[0m';
2318
+ const chunkId = parseInt(chunkIdStr, 10);
2319
+ if (!Number.isFinite(chunkId) || chunkId <= 0) {
2320
+ console.error(` ${RED}Invalid chunk id${RESET}: "${chunkIdStr}". Use IDs from \`clementine memory search\`.`);
2321
+ process.exit(1);
2322
+ }
2323
+ try {
2324
+ const { MemoryStore } = await import('../memory/store.js');
2325
+ const VAULT_DIR = path.join(BASE_DIR, 'vault');
2326
+ const DB_PATH = path.join(VAULT_DIR, '.memory.db');
2327
+ const store = new MemoryStore(DB_PATH, VAULT_DIR);
2328
+ const ok = store.setPinned(chunkId, true);
2329
+ if (!ok) {
2330
+ console.error(` ${RED}Chunk ${chunkId} not found.${RESET}`);
2331
+ process.exit(1);
2332
+ }
2333
+ console.log(` ${GREEN}✓${RESET} Pinned chunk ${chunkId}. It now gets a 2× boost in memory_recall.`);
2334
+ }
2335
+ catch (err) {
2336
+ console.error(` Error pinning chunk: ${err}`);
2337
+ process.exit(1);
2338
+ }
2339
+ });
2340
+ memoryCmd
2341
+ .command('unpin <chunkId>')
2342
+ .description('Unpin a chunk — removes the manual 2x boost, leaves automatic salience untouched')
2343
+ .action(async (chunkIdStr) => {
2344
+ const GREEN = '\x1b[0;32m';
2345
+ const RED = '\x1b[0;31m';
2346
+ const RESET = '\x1b[0m';
2347
+ const chunkId = parseInt(chunkIdStr, 10);
2348
+ if (!Number.isFinite(chunkId) || chunkId <= 0) {
2349
+ console.error(` ${RED}Invalid chunk id${RESET}: "${chunkIdStr}".`);
2350
+ process.exit(1);
2351
+ }
2352
+ try {
2353
+ const { MemoryStore } = await import('../memory/store.js');
2354
+ const VAULT_DIR = path.join(BASE_DIR, 'vault');
2355
+ const DB_PATH = path.join(VAULT_DIR, '.memory.db');
2356
+ const store = new MemoryStore(DB_PATH, VAULT_DIR);
2357
+ const ok = store.setPinned(chunkId, false);
2358
+ if (!ok) {
2359
+ console.error(` ${RED}Chunk ${chunkId} not found.${RESET}`);
2360
+ process.exit(1);
2361
+ }
2362
+ console.log(` ${GREEN}✓${RESET} Unpinned chunk ${chunkId}.`);
2363
+ }
2364
+ catch (err) {
2365
+ console.error(` Error unpinning chunk: ${err}`);
2366
+ process.exit(1);
2367
+ }
2368
+ });
2369
+ memoryCmd
2370
+ .command('dedup')
2371
+ .description('Find near-duplicate chunks via embedding cosine similarity. Dry-run by default.')
2372
+ .option('--threshold <n>', 'Cosine similarity threshold (0-1)', '0.95')
2373
+ .option('--apply', 'Actually delete duplicates (default: dry-run preview only)')
2374
+ .option('--limit <n>', 'Max clusters to report', '50')
2375
+ .action(async (opts) => {
2376
+ const BOLD = '\x1b[1m';
2377
+ const DIM = '\x1b[0;90m';
2378
+ const GREEN = '\x1b[0;32m';
2379
+ const YELLOW = '\x1b[0;33m';
2380
+ const RESET = '\x1b[0m';
2381
+ const threshold = parseFloat(opts.threshold);
2382
+ const limit = parseInt(opts.limit, 10);
2383
+ try {
2384
+ const { MemoryStore } = await import('../memory/store.js');
2385
+ const VAULT_DIR = path.join(BASE_DIR, 'vault');
2386
+ const DB_PATH = path.join(VAULT_DIR, '.memory.db');
2387
+ const store = new MemoryStore(DB_PATH, VAULT_DIR);
2388
+ const clusters = store.findNearDuplicates({ threshold, limit });
2389
+ if (clusters.length === 0) {
2390
+ console.log(` ${GREEN}No near-duplicates found above threshold ${threshold}.${RESET}`);
2391
+ return;
2392
+ }
2393
+ const totalDupes = clusters.reduce((sum, c) => sum + c.duplicates.length, 0);
2394
+ console.log();
2395
+ console.log(` ${BOLD}Found ${clusters.length} cluster${clusters.length === 1 ? '' : 's'} (${totalDupes} duplicate chunk${totalDupes === 1 ? '' : 's'})${RESET}`);
2396
+ console.log(` ${DIM}Keeping the most-recent chunk per cluster; older copies will be removed if --apply is passed.${RESET}`);
2397
+ console.log();
2398
+ for (const cluster of clusters.slice(0, 20)) {
2399
+ const keepLabel = `${cluster.keep.sourceFile} > ${cluster.keep.section}`;
2400
+ const agent = cluster.keep.agentSlug ?? 'global';
2401
+ console.log(` ${BOLD}KEEP${RESET} #${cluster.keep.chunkId} ${DIM}[${agent}]${RESET} ${keepLabel}`);
2402
+ for (const dup of cluster.duplicates) {
2403
+ const dupLabel = `${dup.sourceFile} > ${dup.section}`;
2404
+ console.log(` ${YELLOW}drop${RESET} #${dup.chunkId} sim=${dup.similarity.toFixed(3)} ${DIM}${dupLabel}${RESET}`);
2405
+ }
2406
+ }
2407
+ if (clusters.length > 20) {
2408
+ console.log(` ${DIM}…and ${clusters.length - 20} more clusters (raise --limit to see them).${RESET}`);
2409
+ }
2410
+ console.log();
2411
+ if (opts.apply) {
2412
+ const allDupeIds = clusters.flatMap(c => c.duplicates.map(d => d.chunkId));
2413
+ const removed = store.deleteChunks(allDupeIds);
2414
+ console.log(` ${GREEN}✓${RESET} Deleted ${removed} duplicate chunk${removed === 1 ? '' : 's'}.`);
2415
+ }
2416
+ else {
2417
+ console.log(` ${DIM}This was a preview. Re-run with ${BOLD}--apply${RESET}${DIM} to delete the duplicates.${RESET}`);
2418
+ }
2419
+ console.log();
2420
+ }
2421
+ catch (err) {
2422
+ console.error(` Error during dedup: ${err}`);
2423
+ process.exit(1);
2424
+ }
2425
+ });
2426
+ memoryCmd
2427
+ .command('cross-agent')
2428
+ .description('Surface chunks that recur across 3+ agents — candidates for promotion to global memory')
2429
+ .option('--threshold <n>', 'Cosine similarity threshold for "same idea" (0-1)', '0.88')
2430
+ .option('--min-agents <n>', 'Minimum distinct agents touched by a cluster', '3')
2431
+ .option('--limit <n>', 'Max clusters to report', '30')
2432
+ .action(async (opts) => {
2433
+ const BOLD = '\x1b[1m';
2434
+ const DIM = '\x1b[0;90m';
2435
+ const CYAN = '\x1b[0;36m';
2436
+ const GREEN = '\x1b[0;32m';
2437
+ const RESET = '\x1b[0m';
2438
+ try {
2439
+ const { MemoryStore } = await import('../memory/store.js');
2440
+ const VAULT_DIR = path.join(BASE_DIR, 'vault');
2441
+ const DB_PATH = path.join(VAULT_DIR, '.memory.db');
2442
+ const store = new MemoryStore(DB_PATH, VAULT_DIR);
2443
+ const clusters = store.findCrossAgentRecurrence({
2444
+ threshold: parseFloat(opts.threshold),
2445
+ minAgents: parseInt(opts.minAgents, 10),
2446
+ limit: parseInt(opts.limit, 10),
2447
+ });
2448
+ if (clusters.length === 0) {
2449
+ console.log(` ${GREEN}No cross-agent recurrence found above threshold ${opts.threshold} touching ${opts.minAgents}+ agents.${RESET}`);
2450
+ return;
2451
+ }
2452
+ console.log();
2453
+ console.log(` ${BOLD}Found ${clusters.length} cluster${clusters.length === 1 ? '' : 's'} recurring across ${opts.minAgents}+ agents${RESET}`);
2454
+ console.log(` ${DIM}These are candidates for promotion to global memory — facts the team has independently arrived at.${RESET}`);
2455
+ console.log();
2456
+ for (const c of clusters) {
2457
+ const preview = c.representative.content.replace(/\n/g, ' ').slice(0, 140);
2458
+ console.log(` ${BOLD}Cluster (${c.agents.length} agents)${RESET} ${CYAN}${c.agents.join(', ')}${RESET}`);
2459
+ console.log(` representative #${c.representative.chunkId} ${DIM}${c.representative.sourceFile} > ${c.representative.section}${RESET}`);
2460
+ console.log(` ${DIM}${preview}${preview.length >= 140 ? '…' : ''}${RESET}`);
2461
+ for (const m of c.members.slice(1, 4)) {
2462
+ console.log(` ${DIM}└─ #${m.chunkId} [${m.agentSlug}] sim=${m.similarity.toFixed(3)}${RESET}`);
2463
+ }
2464
+ if (c.members.length > 4) {
2465
+ console.log(` ${DIM}└─ +${c.members.length - 4} more${RESET}`);
2466
+ }
2467
+ console.log();
2468
+ }
2469
+ console.log(` ${DIM}To promote a chunk to global, use the agent-side ${BOLD}memory_promote${RESET}${DIM} tool with the chunk id, or pin it with ${BOLD}clementine memory pin <id>${RESET}${DIM} for now.${RESET}`);
2470
+ console.log();
2471
+ }
2472
+ catch (err) {
2473
+ console.error(` Error finding cross-agent recurrence: ${err}`);
2474
+ process.exit(1);
2475
+ }
2476
+ });
2260
2477
  memoryCmd
2261
2478
  .command('search <query>')
2262
2479
  .description('Search memory (full-text)')
@@ -2281,10 +2498,13 @@ memoryCmd
2281
2498
  const source = r.sourceFile ? path.basename(r.sourceFile) : 'unknown';
2282
2499
  const section = r.section || '';
2283
2500
  const snippet = r.content.replace(/\n/g, ' ').slice(0, 120);
2284
- console.log(` ${BOLD}${source}${RESET}${section ? ` ${CYAN}${section}${RESET}` : ''}`);
2501
+ const pinned = r.pinned ? ' 📌' : '';
2502
+ console.log(` ${DIM}#${r.chunkId}${RESET} ${BOLD}${source}${RESET}${section ? ` › ${CYAN}${section}${RESET}` : ''}${pinned}`);
2285
2503
  console.log(` ${DIM}${snippet}${snippet.length >= 120 ? '…' : ''}${RESET}`);
2286
2504
  console.log();
2287
2505
  }
2506
+ console.log(` ${DIM}Tip: pin a chunk to boost its score in recall — ${BOLD}clementine memory pin <id>${RESET}`);
2507
+ console.log();
2288
2508
  }
2289
2509
  catch (err) {
2290
2510
  console.error(` Error searching memory: ${err}`);
@@ -50,6 +50,94 @@ export declare class MemoryStore {
50
50
  private get conn();
51
51
  /** Return the total number of indexed chunks. */
52
52
  getChunkCount(): number;
53
+ /** Toggle the manual pin flag on a chunk. Pinned chunks get a 2x score boost in recall. */
54
+ setPinned(chunkId: number, pinned: boolean): boolean;
55
+ /**
56
+ * Aggregate stats for the memory store — used by `clementine memory status`.
57
+ * Single-pass scans so it stays fast even on large chunk tables.
58
+ */
59
+ getMemoryStats(): {
60
+ totalChunks: number;
61
+ chunksWithEmbeddings: number;
62
+ pinnedChunks: number;
63
+ perAgent: Array<{
64
+ agentSlug: string;
65
+ count: number;
66
+ }>;
67
+ perCategory: Array<{
68
+ category: string;
69
+ count: number;
70
+ }>;
71
+ avgSalience: number;
72
+ oldestUpdated: string | null;
73
+ newestUpdated: string | null;
74
+ };
75
+ /**
76
+ * Find clusters of near-duplicate chunks using embedding cosine similarity.
77
+ * Returns clusters where at least 2 chunks score above the threshold.
78
+ *
79
+ * Caller decides what to do — typical use is `clementine memory dedup` to
80
+ * preview / merge / mark-superseded. Per-pair O(n²) within agent scope to
81
+ * keep the search space tractable; cross-agent dupes are surfaced separately
82
+ * by the auto-promote flow.
83
+ */
84
+ findNearDuplicates(opts?: {
85
+ threshold?: number;
86
+ minLen?: number;
87
+ limit?: number;
88
+ }): Array<{
89
+ keep: {
90
+ chunkId: number;
91
+ sourceFile: string;
92
+ section: string;
93
+ content: string;
94
+ agentSlug: string | null;
95
+ updatedAt: string | null;
96
+ };
97
+ duplicates: Array<{
98
+ chunkId: number;
99
+ sourceFile: string;
100
+ section: string;
101
+ content: string;
102
+ agentSlug: string | null;
103
+ updatedAt: string | null;
104
+ similarity: number;
105
+ }>;
106
+ }>;
107
+ /** Delete chunks by id. Used by dedup --apply. */
108
+ deleteChunks(chunkIds: number[]): number;
109
+ /**
110
+ * Find chunks whose semantic content recurs across 3+ different agents —
111
+ * candidates for promotion to global memory. Detection-only; surfacing.
112
+ * The user (or a future cron) decides whether to actually promote.
113
+ *
114
+ * Approach: scan agent-scoped chunks with embeddings, cluster cross-agent
115
+ * pairs above the similarity threshold, return clusters touching >= minAgents
116
+ * distinct agents. Limits keep the O(n²) scan tractable on large stores.
117
+ */
118
+ findCrossAgentRecurrence(opts?: {
119
+ threshold?: number;
120
+ minAgents?: number;
121
+ minLen?: number;
122
+ limit?: number;
123
+ }): Array<{
124
+ representative: {
125
+ chunkId: number;
126
+ sourceFile: string;
127
+ section: string;
128
+ content: string;
129
+ agentSlug: string;
130
+ };
131
+ members: Array<{
132
+ chunkId: number;
133
+ sourceFile: string;
134
+ section: string;
135
+ agentSlug: string;
136
+ similarity: number;
137
+ updatedAt: string | null;
138
+ }>;
139
+ agents: string[];
140
+ }>;
53
141
  /**
54
142
  * Scan the entire vault, hash-compare, and re-index changed files.
55
143
  */
@@ -173,6 +173,15 @@ export class MemoryStore {
173
173
  catch {
174
174
  // Column already exists
175
175
  }
176
+ // Add pinned flag — manual salience reinforcement. When true, recall
177
+ // applies an extra score boost on top of the access-pattern salience.
178
+ // Toggled by `clementine memory pin/unpin <chunkId>` (or the dashboard).
179
+ try {
180
+ this.conn.exec('ALTER TABLE chunks ADD COLUMN pinned INTEGER DEFAULT 0');
181
+ }
182
+ catch {
183
+ // Column already exists
184
+ }
176
185
  // Indexes for category/topic filtering
177
186
  try {
178
187
  this.conn.exec('CREATE INDEX idx_chunks_category ON chunks(category)');
@@ -737,6 +746,213 @@ export class MemoryStore {
737
746
  return 0;
738
747
  }
739
748
  }
749
+ /** Toggle the manual pin flag on a chunk. Pinned chunks get a 2x score boost in recall. */
750
+ setPinned(chunkId, pinned) {
751
+ try {
752
+ const result = this.conn.prepare('UPDATE chunks SET pinned = ? WHERE id = ?')
753
+ .run(pinned ? 1 : 0, chunkId);
754
+ return result.changes > 0;
755
+ }
756
+ catch {
757
+ return false;
758
+ }
759
+ }
760
+ /**
761
+ * Aggregate stats for the memory store — used by `clementine memory status`.
762
+ * Single-pass scans so it stays fast even on large chunk tables.
763
+ */
764
+ getMemoryStats() {
765
+ const totalChunks = this.getChunkCount();
766
+ const chunksWithEmbeddings = this.conn
767
+ .prepare('SELECT COUNT(*) as cnt FROM chunks WHERE embedding IS NOT NULL')
768
+ .get()?.cnt ?? 0;
769
+ const pinnedChunks = this.conn
770
+ .prepare('SELECT COUNT(*) as cnt FROM chunks WHERE pinned = 1')
771
+ .get()?.cnt ?? 0;
772
+ const perAgent = this.conn
773
+ .prepare(`SELECT COALESCE(agent_slug, 'global') as agentSlug, COUNT(*) as count
774
+ FROM chunks GROUP BY agent_slug ORDER BY count DESC`)
775
+ .all();
776
+ const perCategory = this.conn
777
+ .prepare(`SELECT COALESCE(category, '(none)') as category, COUNT(*) as count
778
+ FROM chunks GROUP BY category ORDER BY count DESC`)
779
+ .all();
780
+ const avgRow = this.conn
781
+ .prepare('SELECT AVG(salience) as avg FROM chunks WHERE salience > 0')
782
+ .get();
783
+ const dateRow = this.conn
784
+ .prepare('SELECT MIN(updated_at) as oldest, MAX(updated_at) as newest FROM chunks WHERE updated_at IS NOT NULL')
785
+ .get();
786
+ return {
787
+ totalChunks,
788
+ chunksWithEmbeddings,
789
+ pinnedChunks,
790
+ perAgent,
791
+ perCategory,
792
+ avgSalience: avgRow?.avg ?? 0,
793
+ oldestUpdated: dateRow?.oldest ?? null,
794
+ newestUpdated: dateRow?.newest ?? null,
795
+ };
796
+ }
797
+ /**
798
+ * Find clusters of near-duplicate chunks using embedding cosine similarity.
799
+ * Returns clusters where at least 2 chunks score above the threshold.
800
+ *
801
+ * Caller decides what to do — typical use is `clementine memory dedup` to
802
+ * preview / merge / mark-superseded. Per-pair O(n²) within agent scope to
803
+ * keep the search space tractable; cross-agent dupes are surfaced separately
804
+ * by the auto-promote flow.
805
+ */
806
+ findNearDuplicates(opts = {}) {
807
+ const threshold = opts.threshold ?? 0.95;
808
+ const minLen = opts.minLen ?? 80; // skip very short chunks — too easily collide
809
+ const limitClusters = opts.limit ?? 50; // cap results so the CLI stays readable
810
+ if (!embeddingsModule.isReady())
811
+ return [];
812
+ const rows = this.conn.prepare(`SELECT id, source_file, section, content, embedding, agent_slug, updated_at
813
+ FROM chunks
814
+ WHERE embedding IS NOT NULL AND length(content) >= ?
815
+ ORDER BY agent_slug, updated_at DESC`).all(minLen);
816
+ // Group by agent first — only compare within the same scope to bound the
817
+ // O(n²) blow-up. Cross-agent dedup is the auto-promote flow's job.
818
+ const buckets = new Map();
819
+ for (const r of rows) {
820
+ const key = r.agent_slug ?? '__global__';
821
+ if (!buckets.has(key))
822
+ buckets.set(key, []);
823
+ buckets.get(key).push(r);
824
+ }
825
+ const clusters = [];
826
+ const consumed = new Set();
827
+ for (const bucket of buckets.values()) {
828
+ // Decode embeddings once per row.
829
+ const decoded = bucket.map(r => ({
830
+ ...r,
831
+ vec: embeddingsModule.deserializeEmbedding(r.embedding),
832
+ }));
833
+ for (let i = 0; i < decoded.length; i++) {
834
+ if (consumed.has(decoded[i].id))
835
+ continue;
836
+ const head = decoded[i];
837
+ const dupes = [];
838
+ for (let j = i + 1; j < decoded.length; j++) {
839
+ if (consumed.has(decoded[j].id))
840
+ continue;
841
+ const sim = embeddingsModule.cosineSimilarity(head.vec, decoded[j].vec);
842
+ if (sim >= threshold) {
843
+ dupes.push({
844
+ chunkId: decoded[j].id,
845
+ sourceFile: decoded[j].source_file,
846
+ section: decoded[j].section,
847
+ content: decoded[j].content,
848
+ agentSlug: decoded[j].agent_slug,
849
+ updatedAt: decoded[j].updated_at,
850
+ similarity: sim,
851
+ });
852
+ consumed.add(decoded[j].id);
853
+ }
854
+ }
855
+ if (dupes.length > 0) {
856
+ consumed.add(head.id);
857
+ clusters.push({
858
+ keep: {
859
+ chunkId: head.id,
860
+ sourceFile: head.source_file,
861
+ section: head.section,
862
+ content: head.content,
863
+ agentSlug: head.agent_slug,
864
+ updatedAt: head.updated_at,
865
+ },
866
+ duplicates: dupes,
867
+ });
868
+ if (clusters.length >= limitClusters)
869
+ return clusters;
870
+ }
871
+ }
872
+ }
873
+ return clusters;
874
+ }
875
+ /** Delete chunks by id. Used by dedup --apply. */
876
+ deleteChunks(chunkIds) {
877
+ if (!chunkIds.length)
878
+ return 0;
879
+ const placeholders = chunkIds.map(() => '?').join(',');
880
+ const result = this.conn.prepare(`DELETE FROM chunks WHERE id IN (${placeholders})`).run(...chunkIds);
881
+ return result.changes;
882
+ }
883
+ /**
884
+ * Find chunks whose semantic content recurs across 3+ different agents —
885
+ * candidates for promotion to global memory. Detection-only; surfacing.
886
+ * The user (or a future cron) decides whether to actually promote.
887
+ *
888
+ * Approach: scan agent-scoped chunks with embeddings, cluster cross-agent
889
+ * pairs above the similarity threshold, return clusters touching >= minAgents
890
+ * distinct agents. Limits keep the O(n²) scan tractable on large stores.
891
+ */
892
+ findCrossAgentRecurrence(opts = {}) {
893
+ const threshold = opts.threshold ?? 0.88; // looser than dedup — paraphrases count
894
+ const minAgents = opts.minAgents ?? 3;
895
+ const minLen = opts.minLen ?? 100;
896
+ const limitClusters = opts.limit ?? 30;
897
+ if (!embeddingsModule.isReady())
898
+ return [];
899
+ // Only consider chunks that ARE agent-scoped (NULL = already global).
900
+ const rows = this.conn.prepare(`SELECT id, source_file, section, content, embedding, agent_slug, updated_at
901
+ FROM chunks
902
+ WHERE embedding IS NOT NULL
903
+ AND agent_slug IS NOT NULL
904
+ AND length(content) >= ?
905
+ ORDER BY updated_at DESC`).all(minLen);
906
+ if (rows.length < minAgents)
907
+ return [];
908
+ const decoded = rows.map(r => ({ ...r, vec: embeddingsModule.deserializeEmbedding(r.embedding) }));
909
+ const clusters = [];
910
+ const consumed = new Set();
911
+ for (let i = 0; i < decoded.length; i++) {
912
+ if (consumed.has(decoded[i].id))
913
+ continue;
914
+ const head = decoded[i];
915
+ const members = [
916
+ { chunkId: head.id, sourceFile: head.source_file, section: head.section, agentSlug: head.agent_slug, similarity: 1.0, updatedAt: head.updated_at },
917
+ ];
918
+ const agentsTouched = new Set([head.agent_slug]);
919
+ for (let j = i + 1; j < decoded.length; j++) {
920
+ if (consumed.has(decoded[j].id))
921
+ continue;
922
+ const sim = embeddingsModule.cosineSimilarity(head.vec, decoded[j].vec);
923
+ if (sim >= threshold) {
924
+ members.push({
925
+ chunkId: decoded[j].id,
926
+ sourceFile: decoded[j].source_file,
927
+ section: decoded[j].section,
928
+ agentSlug: decoded[j].agent_slug,
929
+ similarity: sim,
930
+ updatedAt: decoded[j].updated_at,
931
+ });
932
+ agentsTouched.add(decoded[j].agent_slug);
933
+ }
934
+ }
935
+ if (agentsTouched.size >= minAgents) {
936
+ // Mark all in this cluster consumed so we don't re-cluster around them.
937
+ for (const m of members)
938
+ consumed.add(m.chunkId);
939
+ clusters.push({
940
+ representative: {
941
+ chunkId: head.id,
942
+ sourceFile: head.source_file,
943
+ section: head.section,
944
+ content: head.content,
945
+ agentSlug: head.agent_slug,
946
+ },
947
+ members,
948
+ agents: Array.from(agentsTouched).sort(),
949
+ });
950
+ if (clusters.length >= limitClusters)
951
+ break;
952
+ }
953
+ }
954
+ return clusters;
955
+ }
740
956
  // ── Full Sync ──────────────────────────────────────────────────────
741
957
  /**
742
958
  * Scan the entire vault, hash-compare, and re-index changed files.
@@ -878,7 +1094,7 @@ export class MemoryStore {
878
1094
  try {
879
1095
  let sql = `SELECT c.id, c.source_file, c.section, c.content, c.chunk_type,
880
1096
  c.updated_at, c.salience, c.last_outcome_score, c.agent_slug, c.category, c.topic,
881
- bm25(chunks_fts) as score
1097
+ c.pinned, bm25(chunks_fts) as score
882
1098
  FROM chunks_fts f
883
1099
  JOIN chunks c ON c.id = f.rowid
884
1100
  WHERE chunks_fts MATCH ?`;
@@ -912,6 +1128,7 @@ export class MemoryStore {
912
1128
  agentSlug: row.agent_slug ?? null,
913
1129
  category: row.category,
914
1130
  topic: row.topic,
1131
+ pinned: row.pinned === 1,
915
1132
  }));
916
1133
  }
917
1134
  catch {
@@ -1030,6 +1247,12 @@ export class MemoryStore {
1030
1247
  if (r.salience > 0) {
1031
1248
  r.score *= 1.0 + r.salience;
1032
1249
  }
1250
+ // Manual pin: stronger boost than access-pattern salience. Toggled via
1251
+ // `clementine memory pin <chunkId>`. Doubles the relevance score so
1252
+ // pinned chunks consistently rank near the top within their relevance band.
1253
+ if (r.pinned) {
1254
+ r.score *= 2.0;
1255
+ }
1033
1256
  // Outcome-driven adjustment: chunks that recently got cited in
1034
1257
  // responses get a small boost; chunks that were pulled in and
1035
1258
  // ignored get a small penalty. Bounded to ±30% so outcome noise
@@ -108,6 +108,51 @@ export function registerMemoryTools(server) {
108
108
  }
109
109
  }
110
110
  });
111
+ // ── 0b. team_scratchpad ────────────────────────────────────────────────
112
+ //
113
+ // Cross-agent shared scratchpad. Unlike working_memory (per-agent), this
114
+ // is a single shared markdown file every agent can read and append to.
115
+ // Use cases: live coordination ("Sasha is drafting the brief, Ross hold
116
+ // outbound for 30m"), cross-agent context drops, async hand-offs that
117
+ // don't warrant a full goal_create or task_add. Append tags every entry
118
+ // with the author's agent slug + ISO timestamp so the trail stays clear.
119
+ const TEAM_SCRATCHPAD_FILE = path.join(BASE_DIR, 'team-scratchpad.md');
120
+ server.tool('team_scratchpad', getToolDescription('team_scratchpad') ?? 'Cross-agent shared scratchpad for live team coordination. All agents read/write the same file. Use for hand-offs, "I am working on X", short-term context drops. For durable facts, use memory_write/MEMORY.md instead.', {
121
+ action: z.enum(['read', 'append', 'replace', 'clear']).describe('What to do with the team scratchpad'),
122
+ content: z.string().optional().describe('Text to append or replace with (required for append/replace)'),
123
+ }, async ({ action, content }) => {
124
+ const author = ACTIVE_AGENT_SLUG ?? 'clementine';
125
+ switch (action) {
126
+ case 'read': {
127
+ if (!existsSync(TEAM_SCRATCHPAD_FILE)) {
128
+ return textResult('Team scratchpad is empty.');
129
+ }
130
+ return textResult(readFileSync(TEAM_SCRATCHPAD_FILE, 'utf-8'));
131
+ }
132
+ case 'append': {
133
+ if (!content)
134
+ return textResult('Error: content is required for append.');
135
+ const stamp = new Date().toISOString();
136
+ const entry = `\n- **[${author}@${stamp}]** ${content}\n`;
137
+ const existing = existsSync(TEAM_SCRATCHPAD_FILE) ? readFileSync(TEAM_SCRATCHPAD_FILE, 'utf-8') : '# Team Scratchpad\n\nShared across all agents. Append tags entries with author + timestamp.\n';
138
+ writeFileSync(TEAM_SCRATCHPAD_FILE, existing + entry);
139
+ return textResult(`Appended to team scratchpad as ${author}.`);
140
+ }
141
+ case 'replace': {
142
+ if (!content)
143
+ return textResult('Error: content is required for replace.');
144
+ const stamp = new Date().toISOString();
145
+ const header = `# Team Scratchpad\n\n_Replaced by ${author} at ${stamp}._\n\n`;
146
+ writeFileSync(TEAM_SCRATCHPAD_FILE, header + content + '\n');
147
+ return textResult(`Team scratchpad replaced by ${author}.`);
148
+ }
149
+ case 'clear': {
150
+ if (existsSync(TEAM_SCRATCHPAD_FILE))
151
+ unlinkSync(TEAM_SCRATCHPAD_FILE);
152
+ return textResult('Team scratchpad cleared.');
153
+ }
154
+ }
155
+ });
111
156
  // ── 1. memory_read ─────────────────────────────────────────────────────
112
157
  server.tool('memory_read', getToolDescription('memory_read') ?? "Read a note from the Obsidian vault. Shortcuts: 'today', 'yesterday', 'memory', 'tasks', 'heartbeat', 'cron', 'soul'. Or pass a relative path or note name.", {
113
158
  name: z.string().describe('Note name, path, or shortcut'),
@@ -11,10 +11,15 @@
11
11
  const TOOL_META = {
12
12
  // ── Memory & Vault ────────────────────────────────────────────────
13
13
  working_memory: {
14
- description: 'Persistent scratchpad that survives across conversations. Use to jot down current project context, TODOs, reminders, or anything you need to remember for next time. Actions: read, append, replace, clear. ALWAYS read before replacing to avoid overwriting useful notes.',
14
+ description: 'Per-agent persistent scratchpad only YOU see it. Survives across conversations. Use for current project context, TODOs, reminders, or anything you need to remember for next time. Actions: read, append, replace, clear. ALWAYS read before replacing. For cross-agent coordination, use team_scratchpad instead.',
15
15
  exampleUsage: 'Before starting complex work, read working_memory to check for context from prior sessions.',
16
16
  returnHint: 'Full working memory contents (markdown text).',
17
17
  },
18
+ team_scratchpad: {
19
+ description: 'Cross-agent shared scratchpad — every agent on the team reads and writes the same file. Use for live coordination, hand-offs, "I am working on X — back off until Y", short-lived context drops. For durable facts that should outlive coordination noise, use memory_write to MEMORY.md instead. Append tags entries with author slug + timestamp.',
20
+ exampleUsage: 'Before starting outbound work, read team_scratchpad to see if another agent has already claimed a prospect or paused outreach.',
21
+ returnHint: 'Full scratchpad contents with per-entry author + ISO timestamp.',
22
+ },
18
23
  memory_search: {
19
24
  description: 'Full-text search across all vault notes. Best for finding specific keywords or phrases. For broader semantic matching, use memory_recall instead. Results include file path, section heading, and relevance score.',
20
25
  exampleUsage: 'Use when the user asks "what did we discuss about X" or you need to find a specific note.',
package/dist/types.d.ts CHANGED
@@ -15,6 +15,7 @@ export interface SearchResult {
15
15
  agentSlug?: string | null;
16
16
  category?: string | null;
17
17
  topic?: string | null;
18
+ pinned?: boolean;
18
19
  }
19
20
  export type ChunkCategory = 'facts' | 'events' | 'discoveries' | 'preferences' | 'advice';
20
21
  export interface Chunk {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "clementine-agent",
3
- "version": "1.1.20",
3
+ "version": "1.1.21",
4
4
  "description": "Clementine — Personal AI Assistant (TypeScript)",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",