@cognistore/mcp-server 2.0.2 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/index.js +393 -23
  2. package/package.json +2 -2
package/dist/index.js CHANGED
@@ -94,7 +94,8 @@ var searchOptionsSchema = z.object({
94
94
  scope: scopeSchema.optional(),
95
95
  limit: z.number().int().min(1).max(100).optional().default(DEFAULT_SEARCH_LIMIT),
96
96
  threshold: z.number().min(0).max(1).optional().default(DEFAULT_SIMILARITY_THRESHOLD),
97
- includePlanContext: z.boolean().optional().default(false)
97
+ includePlanContext: z.boolean().optional().default(false),
98
+ queryText: z.string().optional()
98
99
  });
99
100
  var createPlanSchema = z.object({
100
101
  title: z.string().min(1, "Title is required"),
@@ -140,6 +141,7 @@ __export(schema_exports, {
140
141
  createEmbeddingsTable: () => createEmbeddingsTable,
141
142
  deleteEmbedding: () => deleteEmbedding,
142
143
  deletePlanEmbedding: () => deletePlanEmbedding,
144
+ getEmbeddingById: () => getEmbeddingById,
143
145
  insertEmbedding: () => insertEmbedding,
144
146
  insertPlanEmbedding: () => insertPlanEmbedding,
145
147
  knowledgeEntries: () => knowledgeEntries,
@@ -195,6 +197,14 @@ function deleteEmbedding(sqlite, id) {
195
197
  const stmt = sqlite.prepare(`DELETE FROM ${VIRTUAL_TABLE_NAME} WHERE id = ?`);
196
198
  stmt.run(id);
197
199
  }
200
+ function getEmbeddingById(sqlite, id) {
201
+ const row = sqlite.prepare(`SELECT embedding FROM ${VIRTUAL_TABLE_NAME} WHERE id = ?`).get(id);
202
+ if (!row || row.embedding == null)
203
+ return null;
204
+ const buf = row.embedding;
205
+ const f32 = new Float32Array(buf.buffer, buf.byteOffset, Math.floor(buf.byteLength / 4));
206
+ return Array.from(f32);
207
+ }
198
208
  function searchKnn(sqlite, queryEmbedding, k) {
199
209
  const stmt = sqlite.prepare(`
200
210
  SELECT id, distance
@@ -224,6 +234,49 @@ function searchPlansKnn(sqlite, queryEmbedding, k) {
224
234
  return stmt.all(Buffer.from(new Float32Array(queryEmbedding).buffer), k);
225
235
  }
226
236
 
237
+ // ../../packages/core/dist/db/schema/fts.js
238
+ var FTS_TABLE_NAME = "knowledge_fts";
239
+ function createKnowledgeFtsTable(sqlite) {
240
+ sqlite.exec(`
241
+ CREATE VIRTUAL TABLE IF NOT EXISTS ${FTS_TABLE_NAME} USING fts5(
242
+ id UNINDEXED,
243
+ title,
244
+ content,
245
+ tags
246
+ )
247
+ `);
248
+ }
249
+ function insertFts(sqlite, row) {
250
+ sqlite.prepare(`INSERT INTO ${FTS_TABLE_NAME}(id, title, content, tags) VALUES (?, ?, ?, ?)`).run(row.id, row.title ?? "", row.content ?? "", row.tags ?? "");
251
+ }
252
+ function updateFts(sqlite, row) {
253
+ deleteFts(sqlite, row.id);
254
+ insertFts(sqlite, row);
255
+ }
256
+ function deleteFts(sqlite, id) {
257
+ sqlite.prepare(`DELETE FROM ${FTS_TABLE_NAME} WHERE id = ?`).run(id);
258
+ }
259
+ function ftsCount(sqlite) {
260
+ const row = sqlite.prepare(`SELECT count(*) AS c FROM ${FTS_TABLE_NAME}`).get();
261
+ return row?.c ?? 0;
262
+ }
263
+ function sanitizeFtsQuery(query) {
264
+ return query.split(/\s+/).filter(Boolean).map((t) => `"${t.replace(/"/g, '""')}"`).join(" ");
265
+ }
266
+ function searchFts(sqlite, query, k) {
267
+ const match = sanitizeFtsQuery(query);
268
+ if (!match)
269
+ return [];
270
+ const stmt = sqlite.prepare(`
271
+ SELECT id, bm25(${FTS_TABLE_NAME}) AS bm25
272
+ FROM ${FTS_TABLE_NAME}
273
+ WHERE ${FTS_TABLE_NAME} MATCH ?
274
+ ORDER BY bm25
275
+ LIMIT ?
276
+ `);
277
+ return stmt.all(match, k);
278
+ }
279
+
227
280
  // ../../packages/core/dist/db/migrate.js
228
281
  import { readdirSync, readFileSync, existsSync } from "fs";
229
282
  import { resolve } from "path";
@@ -357,6 +410,9 @@ CREATE TABLE IF NOT EXISTS scan_state (
357
410
  );
358
411
 
359
412
  ALTER TABLE plans ADD COLUMN plan_file_path TEXT;
413
+ `,
414
+ "2.1.0": `
415
+ CREATE VIRTUAL TABLE IF NOT EXISTS knowledge_fts USING fts5(id UNINDEXED, title, content, tags);
360
416
  `
361
417
  };
362
418
  function runMigrations(sqlite, migrationsDir) {
@@ -459,9 +515,34 @@ function createDbClient(dbPath) {
459
515
  const dims = Number(process.env.EMBEDDING_DIMENSIONS) || DEFAULT_EMBEDDING_DIMENSIONS;
460
516
  createEmbeddingsTable(sqlite, dims);
461
517
  createPlansEmbeddingsTable(sqlite, dims);
518
+ try {
519
+ createKnowledgeFtsTable(sqlite);
520
+ backfillFtsIfEmpty(sqlite);
521
+ sqlite.exec("CREATE INDEX IF NOT EXISTS idx_knowledge_created_at ON knowledge_entries(created_at)");
522
+ sqlite.exec("CREATE INDEX IF NOT EXISTS idx_plans_created_at ON plans(created_at)");
523
+ } catch {
524
+ }
462
525
  const db = drizzle(sqlite, { schema: schema_exports });
463
526
  return { db, sqlite };
464
527
  }
528
+ function backfillFtsIfEmpty(sqlite) {
529
+ if (ftsCount(sqlite) > 0)
530
+ return;
531
+ const rows = sqlite.prepare("SELECT id, title, content, tags FROM knowledge_entries WHERE type != 'system'").all();
532
+ for (const r of rows) {
533
+ let tags = "";
534
+ try {
535
+ const parsed = JSON.parse(r.tags ?? "[]");
536
+ tags = Array.isArray(parsed) ? parsed.filter(Boolean).join(" ") : "";
537
+ } catch {
538
+ tags = "";
539
+ }
540
+ try {
541
+ insertFts(sqlite, { id: r.id, title: r.title ?? "", content: r.content ?? "", tags });
542
+ } catch {
543
+ }
544
+ }
545
+ }
465
546
  function createPlansEmbeddingsTable(sqlite, dimensions = DEFAULT_EMBEDDING_DIMENSIONS) {
466
547
  try {
467
548
  sqlite.exec(`
@@ -503,6 +584,10 @@ var KnowledgeRepository = class {
503
584
  updatedAt: now
504
585
  }).returning();
505
586
  insertEmbedding(this.sqlite, id, input.embedding);
587
+ try {
588
+ insertFts(this.sqlite, { id, title: input.title, content: input.content, tags: ftsTags(input.tags) });
589
+ } catch {
590
+ }
506
591
  return entry;
507
592
  }
508
593
  async findById(id) {
@@ -522,6 +607,17 @@ var KnowledgeRepository = class {
522
607
  ...values,
523
608
  version: sql`${knowledgeEntries.version} + 1`
524
609
  }).where(eq(knowledgeEntries.id, id)).returning();
610
+ if (entry) {
611
+ try {
612
+ updateFts(this.sqlite, {
613
+ id,
614
+ title: entry.title ?? "",
615
+ content: entry.content ?? "",
616
+ tags: ftsTags(entry.tags)
617
+ });
618
+ } catch {
619
+ }
620
+ }
525
621
  if (embedding) {
526
622
  updateEmbedding(this.sqlite, id, embedding);
527
623
  }
@@ -531,6 +627,10 @@ var KnowledgeRepository = class {
531
627
  const [entry] = await this.db.delete(knowledgeEntries).where(eq(knowledgeEntries.id, id)).returning();
532
628
  if (entry) {
533
629
  deleteEmbedding(this.sqlite, id);
630
+ try {
631
+ deleteFts(this.sqlite, id);
632
+ } catch {
633
+ }
534
634
  }
535
635
  return entry ?? null;
536
636
  }
@@ -544,11 +644,20 @@ var KnowledgeRepository = class {
544
644
  const threshold = options?.threshold ?? DEFAULT_SIMILARITY_THRESHOLD;
545
645
  const candidateLimit = limit * 5;
546
646
  const knnResults = searchKnn(this.sqlite, queryEmbedding, candidateLimit);
547
- if (knnResults.length === 0) {
647
+ const distanceMap = new Map(knnResults.map((r) => [r.id, r.distance]));
648
+ let ftsResults = [];
649
+ if (options?.queryText) {
650
+ try {
651
+ ftsResults = searchFts(this.sqlite, options.queryText, candidateLimit);
652
+ } catch {
653
+ ftsResults = [];
654
+ }
655
+ }
656
+ const bm25Map = new Map(ftsResults.map((r) => [r.id, r.bm25]));
657
+ if (knnResults.length === 0 && ftsResults.length === 0) {
548
658
  return [];
549
659
  }
550
- const candidateIds = knnResults.map((r) => r.id);
551
- const distanceMap = new Map(knnResults.map((r) => [r.id, r.distance]));
660
+ const candidateIds = [.../* @__PURE__ */ new Set([...knnResults.map((r) => r.id), ...ftsResults.map((r) => r.id)])];
552
661
  const conditions = [];
553
662
  conditions.push(sql`${knowledgeEntries.id} IN (${sql.join(candidateIds.map((id) => sql`${id}`), sql`, `)})`);
554
663
  if (options?.scope) {
@@ -565,18 +674,25 @@ var KnowledgeRepository = class {
565
674
  conditions.push(or(isNull(knowledgeEntries.expiresAt), sql`${knowledgeEntries.expiresAt} > ${(/* @__PURE__ */ new Date()).toISOString()}`));
566
675
  const whereClause = and(...conditions);
567
676
  const entries = await this.db.select().from(knowledgeEntries).where(whereClause);
568
- return entries.map((entry) => ({
569
- entry,
570
- similarity: 1 - (distanceMap.get(entry.id) ?? 1)
571
- })).filter((r) => r.similarity >= threshold).sort((a, b) => b.similarity - a.similarity).slice(0, limit);
572
- }
573
- async listRecent(limit = 20, filters) {
677
+ return entries.map((entry) => {
678
+ const semantic = 1 - (distanceMap.get(entry.id) ?? 1);
679
+ const hasFts = bm25Map.has(entry.id);
680
+ const bm25Norm = hasFts ? 1 / (1 + Math.exp(bm25Map.get(entry.id))) : 0;
681
+ const combined = 0.7 * semantic + 0.3 * bm25Norm;
682
+ return { entry, similarity: combined, semantic, hasFts };
683
+ }).filter((r) => r.semantic >= threshold || r.hasFts).sort((a, b) => b.similarity - a.similarity).slice(0, limit).map(({ entry, similarity }) => ({ entry, similarity }));
684
+ }
685
+ async listRecent(limit = 20, filters, offset = 0) {
574
686
  const conditions = [ne(knowledgeEntries.type, "system")];
575
687
  if (filters?.type)
576
688
  conditions.push(sql`${knowledgeEntries.type} = ${filters.type}`);
577
689
  if (filters?.scope)
578
690
  conditions.push(sql`${knowledgeEntries.scope} = ${filters.scope}`);
579
- return this.db.select().from(knowledgeEntries).where(and(...conditions)).orderBy(sql`${knowledgeEntries.createdAt} DESC`).limit(limit);
691
+ if (filters?.tags && filters.tags.length > 0) {
692
+ const tagConditions = filters.tags.map((tag) => sql`EXISTS (SELECT 1 FROM json_each(${knowledgeEntries.tags}) WHERE value = ${tag})`);
693
+ conditions.push(or(...tagConditions));
694
+ }
695
+ return this.db.select().from(knowledgeEntries).where(and(...conditions)).orderBy(sql`${knowledgeEntries.createdAt} DESC`).limit(limit).offset(offset);
580
696
  }
581
697
  async listTags(opts = {}) {
582
698
  const { from, to } = opts;
@@ -603,6 +719,110 @@ var KnowledgeRepository = class {
603
719
  const result = await this.db.all(sql`SELECT value as tag, COUNT(*) as count FROM knowledge_entries, json_each(knowledge_entries.tags) WHERE knowledge_entries.type != 'system' GROUP BY value ORDER BY count DESC LIMIT ${limit}`);
604
720
  return result;
605
721
  }
722
+ /**
723
+ * Rename/merge a tag across all entries. Per-row correlated rebuild of the JSON
724
+ * tags array: replace `from`→`to`, and `json_group_array(DISTINCT ...)` collapses
725
+ * a pre-existing `to`. The `WHERE EXISTS` guard limits both the updated_at bump
726
+ * and the affected-id list to rows that actually contain `from`. Returns the IDs
727
+ * of the rows that changed (callers re-embed + resync FTS for these).
728
+ */
729
+ renameTag(from, to) {
730
+ const now = (/* @__PURE__ */ new Date()).toISOString();
731
+ const affected = this.sqlite.prepare(`SELECT id FROM knowledge_entries WHERE EXISTS (SELECT 1 FROM json_each(knowledge_entries.tags) WHERE value = ?)`).all(from);
732
+ if (affected.length === 0)
733
+ return [];
734
+ this.sqlite.prepare(`UPDATE knowledge_entries
735
+ SET tags = (
736
+ SELECT json_group_array(DISTINCT CASE WHEN value = @from THEN @to ELSE value END)
737
+ FROM json_each(knowledge_entries.tags)
738
+ ),
739
+ updated_at = @now
740
+ WHERE EXISTS (SELECT 1 FROM json_each(knowledge_entries.tags) WHERE value = @from)`).run({ from, to, now });
741
+ return affected.map((r) => r.id);
742
+ }
743
+ /**
744
+ * Flag stale knowledge: not updated since `cutoff` (days), OR already expired,
745
+ * OR confidence below `minConfidence`. Lightweight metadata-only query (no new
746
+ * column / no per-entry access tracking).
747
+ */
748
+ async findStaleEntries(opts = {}) {
749
+ const days = opts.days ?? 90;
750
+ const minConfidence = opts.minConfidence ?? 0.5;
751
+ const limit = opts.limit ?? 100;
752
+ const cutoff = new Date(Date.now() - days * 24 * 60 * 60 * 1e3).toISOString();
753
+ const now = (/* @__PURE__ */ new Date()).toISOString();
754
+ const rows = this.sqlite.prepare(`SELECT id, title, type, scope, confidence_score AS confidenceScore, updated_at AS updatedAt, expires_at AS expiresAt
755
+ FROM knowledge_entries
756
+ WHERE type != 'system'
757
+ AND (updated_at < @cutoff OR (expires_at IS NOT NULL AND expires_at < @now) OR confidence_score < @minConfidence)
758
+ ORDER BY updated_at ASC
759
+ LIMIT @limit`).all({ cutoff, now, minConfidence, limit });
760
+ return rows;
761
+ }
762
+ /**
763
+ * Find near-duplicate entry pairs via per-entry KNN (avoids an O(n²) full scan).
764
+ * The embedding MUST be read from the vec table (listAll() blanks it). Canonical
765
+ * `id < neighborId` filter dedups pairs and drops the self-match. Returns [] when
766
+ * there are no embeddings yet.
767
+ */
768
+ async findDuplicatePairs(opts = {}) {
769
+ const threshold = opts.threshold ?? 0.9;
770
+ const limit = opts.limit ?? 100;
771
+ const entries = await this.listAll();
772
+ const meta = new Map(entries.map((e) => [e.id, e]));
773
+ const pairs = [];
774
+ for (const entry of entries) {
775
+ const emb = getEmbeddingById(this.sqlite, entry.id);
776
+ if (!emb)
777
+ continue;
778
+ const neighbors = searchKnn(this.sqlite, emb, 5);
779
+ for (const n of neighbors) {
780
+ if (entry.id >= n.id)
781
+ continue;
782
+ if (!meta.has(n.id))
783
+ continue;
784
+ const similarity = 1 - n.distance;
785
+ if (similarity >= threshold) {
786
+ const other = meta.get(n.id);
787
+ pairs.push({
788
+ a: { id: entry.id, title: entry.title },
789
+ b: { id: other.id, title: other.title },
790
+ similarity
791
+ });
792
+ }
793
+ }
794
+ if (pairs.length >= limit)
795
+ break;
796
+ }
797
+ return pairs.sort((x, y) => y.similarity - x.similarity).slice(0, limit);
798
+ }
799
+ /**
800
+ * (Re)populate the FTS5 index from knowledge_entries. Used at startup when the
801
+ * index is empty but entries exist (e.g. after the FTS migration on an existing
802
+ * DB). Best-effort and idempotent (clears then re-inserts).
803
+ */
804
+ backfillFts() {
805
+ const rows = this.sqlite.prepare(`SELECT id, title, content, tags FROM knowledge_entries WHERE type != 'system'`).all();
806
+ let count = 0;
807
+ for (const r of rows) {
808
+ try {
809
+ insertFts(this.sqlite, { id: r.id, title: r.title ?? "", content: r.content ?? "", tags: ftsTags(r.tags) });
810
+ count++;
811
+ } catch {
812
+ }
813
+ }
814
+ return count;
815
+ }
816
+ /** Backfill the FTS index only when it's empty but entries exist (startup path). */
817
+ backfillFtsIfNeeded() {
818
+ try {
819
+ if (ftsCount(this.sqlite) > 0)
820
+ return 0;
821
+ return this.backfillFts();
822
+ } catch {
823
+ return 0;
824
+ }
825
+ }
606
826
  async count() {
607
827
  const [result] = await this.db.select({ count: sql`count(*)` }).from(knowledgeEntries).where(ne(knowledgeEntries.type, "system"));
608
828
  return Number(result.count);
@@ -690,7 +910,7 @@ var KnowledgeRepository = class {
690
910
  listAllPlans() {
691
911
  return this.sqlite.prepare("SELECT * FROM plans ORDER BY created_at DESC").all();
692
912
  }
693
- listPlans(limit = 20, status, scope) {
913
+ listPlans(limit = 20, status, scope, offset = 0) {
694
914
  const conditions = [];
695
915
  const params = [];
696
916
  if (status) {
@@ -702,8 +922,8 @@ var KnowledgeRepository = class {
702
922
  params.push(scope);
703
923
  }
704
924
  const where = conditions.length ? "WHERE " + conditions.join(" AND ") : "";
705
- params.push(limit);
706
- return this.sqlite.prepare(`SELECT * FROM plans ${where} ORDER BY created_at DESC LIMIT ?`).all(...params);
925
+ params.push(limit, offset);
926
+ return this.sqlite.prepare(`SELECT * FROM plans ${where} ORDER BY created_at DESC LIMIT ? OFFSET ?`).all(...params);
707
927
  }
708
928
  findSimilarActivePlans(embedding, scope, threshold = 0.5) {
709
929
  try {
@@ -945,6 +1165,17 @@ var KnowledgeRepository = class {
945
1165
  return removed;
946
1166
  }
947
1167
  };
1168
+ function ftsTags(tags) {
1169
+ let arr = tags;
1170
+ if (typeof tags === "string") {
1171
+ try {
1172
+ arr = JSON.parse(tags);
1173
+ } catch {
1174
+ return tags;
1175
+ }
1176
+ }
1177
+ return Array.isArray(arr) ? arr.filter(Boolean).join(" ") : "";
1178
+ }
948
1179
  function cosineSimilarity(a, b) {
949
1180
  let dot = 0, magA = 0, magB = 0;
950
1181
  for (let i = 0; i < a.length; i++) {
@@ -957,6 +1188,25 @@ function cosineSimilarity(a, b) {
957
1188
  }
958
1189
 
959
1190
  // ../../packages/core/dist/services/knowledge.service.js
1191
+ function levenshtein(a, b) {
1192
+ if (a === b)
1193
+ return 0;
1194
+ if (!a.length)
1195
+ return b.length;
1196
+ if (!b.length)
1197
+ return a.length;
1198
+ let prev = Array.from({ length: b.length + 1 }, (_, i) => i);
1199
+ let curr = new Array(b.length + 1);
1200
+ for (let i = 1; i <= a.length; i++) {
1201
+ curr[0] = i;
1202
+ for (let j = 1; j <= b.length; j++) {
1203
+ const cost = a[i - 1] === b[j - 1] ? 0 : 1;
1204
+ curr[j] = Math.min(curr[j - 1] + 1, prev[j] + 1, prev[j - 1] + cost);
1205
+ }
1206
+ [prev, curr] = [curr, prev];
1207
+ }
1208
+ return prev[b.length];
1209
+ }
960
1210
  var KnowledgeService = class {
961
1211
  repository;
962
1212
  embeddingProvider;
@@ -977,8 +1227,26 @@ var KnowledgeService = class {
977
1227
  buildEmbeddingText(title, content, tags) {
978
1228
  return `${title} ${content} ${tags.join(" ")}`;
979
1229
  }
1230
+ /**
1231
+ * Conservative tag normalization: trim + lowercase + dedup (order-preserving).
1232
+ * Deliberately does NOT rewrite tokens (e.g. nest.js→nestjs) — that's left to
1233
+ * the explicit merge flow so meaning is never silently changed.
1234
+ */
1235
+ normalizeTags(tags = []) {
1236
+ const seen = /* @__PURE__ */ new Set();
1237
+ const out = [];
1238
+ for (const raw of tags) {
1239
+ const t = (raw ?? "").trim().toLowerCase();
1240
+ if (t && !seen.has(t)) {
1241
+ seen.add(t);
1242
+ out.push(t);
1243
+ }
1244
+ }
1245
+ return out;
1246
+ }
980
1247
  async add(input) {
981
1248
  const { skipDedup, ...rest } = input;
1249
+ rest.tags = this.normalizeTags(rest.tags);
982
1250
  const embeddingText = this.buildEmbeddingText(rest.title, rest.content, rest.tags);
983
1251
  const embedding = await this.embeddingProvider.embed(embeddingText);
984
1252
  if (!skipDedup) {
@@ -1004,7 +1272,7 @@ var KnowledgeService = class {
1004
1272
  }
1005
1273
  async search(query, options) {
1006
1274
  const queryEmbedding = await this.embeddingProvider.embed(query);
1007
- const results = await this.repository.searchBySimilarity(queryEmbedding, options);
1275
+ const results = await this.repository.searchBySimilarity(queryEmbedding, { ...options, queryText: query });
1008
1276
  this.logOp("read", results.length);
1009
1277
  const direct = results.map((r) => ({
1010
1278
  entry: this.toKnowledgeEntry(r.entry),
@@ -1073,6 +1341,8 @@ var KnowledgeService = class {
1073
1341
  return entry ? this.toKnowledgeEntry(entry) : null;
1074
1342
  }
1075
1343
  async update(id, updates) {
1344
+ if (updates.tags)
1345
+ updates.tags = this.normalizeTags(updates.tags);
1076
1346
  let embedding;
1077
1347
  if (updates.content || updates.title || updates.tags) {
1078
1348
  const current = await this.repository.findById(id);
@@ -1195,8 +1465,8 @@ var KnowledgeService = class {
1195
1465
  }
1196
1466
  return hash.toString(36);
1197
1467
  }
1198
- async listRecent(limit = 20, filters) {
1199
- const entries = await this.repository.listRecent(limit, filters);
1468
+ async listRecent(limit = 20, filters, offset = 0) {
1469
+ const entries = await this.repository.listRecent(limit, filters, offset);
1200
1470
  return entries.map((e) => this.toKnowledgeEntry(e));
1201
1471
  }
1202
1472
  async topTags(limit = 10, opts = {}) {
@@ -1205,6 +1475,74 @@ var KnowledgeService = class {
1205
1475
  async listTags(opts = {}) {
1206
1476
  return this.repository.listTags(opts);
1207
1477
  }
1478
+ /**
1479
+ * Suggest near-duplicate tag pairs to merge (e.g. nest.js ↔ nestjs, redis ↔ Redis).
1480
+ * Compares lowercased forms via max(normalized Levenshtein, token-set Jaccard).
1481
+ * O(n²) over the small DISTINCT tag set.
1482
+ */
1483
+ async suggestTagMerges(threshold = 0.82) {
1484
+ const tags = await this.repository.listTags();
1485
+ const out = [];
1486
+ for (let i = 0; i < tags.length; i++) {
1487
+ for (let j = i + 1; j < tags.length; j++) {
1488
+ const sim = this.tagSimilarity(tags[i], tags[j]);
1489
+ if (sim >= threshold)
1490
+ out.push({ a: tags[i], b: tags[j], similarity: Math.round(sim * 100) / 100 });
1491
+ }
1492
+ }
1493
+ return out.sort((x, y) => y.similarity - x.similarity);
1494
+ }
1495
+ tagSimilarity(a, b) {
1496
+ const la = a.trim().toLowerCase();
1497
+ const lb = b.trim().toLowerCase();
1498
+ if (!la || !lb)
1499
+ return 0;
1500
+ if (la === lb)
1501
+ return 1;
1502
+ const lev = 1 - levenshtein(la, lb) / Math.max(la.length, lb.length);
1503
+ const ta = new Set(la.split(/[^a-z0-9]+/).filter(Boolean));
1504
+ const tb = new Set(lb.split(/[^a-z0-9]+/).filter(Boolean));
1505
+ let inter = 0;
1506
+ for (const t of ta)
1507
+ if (tb.has(t))
1508
+ inter++;
1509
+ const union = (/* @__PURE__ */ new Set([...ta, ...tb])).size;
1510
+ const jaccard = union ? inter / union : 0;
1511
+ return Math.max(lev, jaccard);
1512
+ }
1513
+ /**
1514
+ * Merge tag `from` into `to` across all entries, then re-embed + resync FTS for
1515
+ * affected rows. Re-embeds via repository.update (the UPDATE path), NOT the
1516
+ * insert path — and update() also resyncs the FTS row.
1517
+ */
1518
+ async mergeTag(from, to) {
1519
+ const f = (from ?? "").trim();
1520
+ const t = (to ?? "").trim();
1521
+ if (!f || !t || f === t)
1522
+ return { merged: 0 };
1523
+ const affected = this.repository.renameTag(f, t);
1524
+ for (const id of affected) {
1525
+ const entry = await this.repository.findById(id);
1526
+ if (!entry)
1527
+ continue;
1528
+ const tags = Array.isArray(entry.tags) ? entry.tags : JSON.parse(entry.tags ?? "[]");
1529
+ const embedding = await this.embeddingProvider.embed(this.buildEmbeddingText(entry.title, entry.content, tags));
1530
+ await this.repository.update(id, { embedding });
1531
+ }
1532
+ if (affected.length)
1533
+ this.logOp("write", affected.length);
1534
+ return { merged: affected.length };
1535
+ }
1536
+ async findStaleEntries(opts = {}) {
1537
+ return this.repository.findStaleEntries(opts);
1538
+ }
1539
+ async findDuplicatePairs(opts = {}) {
1540
+ return this.repository.findDuplicatePairs(opts);
1541
+ }
1542
+ /** (Re)populate the FTS5 index if it's empty but entries exist. Returns rows indexed. */
1543
+ backfillFtsIfNeeded() {
1544
+ return this.repository.backfillFtsIfNeeded();
1545
+ }
1208
1546
  async getStats() {
1209
1547
  const [count, byType, byScope, lastUpdatedAt] = await Promise.all([
1210
1548
  this.repository.count(),
@@ -1323,8 +1661,8 @@ var KnowledgeService = class {
1323
1661
  const rows = this.repository.listAllPlans();
1324
1662
  return rows.map((r) => this.toPlan(r));
1325
1663
  }
1326
- listPlans(limit = 20, status, scope) {
1327
- const rows = this.repository.listPlans(limit, status, scope);
1664
+ listPlans(limit = 20, status, scope, offset = 0) {
1665
+ const rows = this.repository.listPlans(limit, status, scope, offset);
1328
1666
  return rows.map((r) => this.toPlan(r));
1329
1667
  }
1330
1668
  archiveStaleDrafts(maxAgeHours = 24) {
@@ -2932,10 +3270,10 @@ var KnowledgeSDK = class {
2932
3270
  throw this.wrapError(error, "Failed to delete knowledge");
2933
3271
  }
2934
3272
  }
2935
- async listRecent(limit = 20, filters) {
3273
+ async listRecent(limit = 20, filters, offset = 0) {
2936
3274
  this.ensureInitialized();
2937
3275
  try {
2938
- return await this.service.listRecent(limit, filters);
3276
+ return await this.service.listRecent(limit, filters, offset);
2939
3277
  } catch (error) {
2940
3278
  throw this.wrapError(error, "Failed to list recent knowledge");
2941
3279
  }
@@ -2956,6 +3294,38 @@ var KnowledgeSDK = class {
2956
3294
  throw this.wrapError(error, "Failed to list tags");
2957
3295
  }
2958
3296
  }
3297
+ async suggestTagMerges(threshold = 0.82) {
3298
+ this.ensureInitialized();
3299
+ try {
3300
+ return await this.service.suggestTagMerges(threshold);
3301
+ } catch (error) {
3302
+ throw this.wrapError(error, "Failed to suggest tag merges");
3303
+ }
3304
+ }
3305
+ async mergeTags(from, to) {
3306
+ this.ensureInitialized();
3307
+ try {
3308
+ return await this.service.mergeTag(from, to);
3309
+ } catch (error) {
3310
+ throw this.wrapError(error, "Failed to merge tags");
3311
+ }
3312
+ }
3313
+ async findStaleEntries(opts = {}) {
3314
+ this.ensureInitialized();
3315
+ try {
3316
+ return await this.service.findStaleEntries(opts);
3317
+ } catch (error) {
3318
+ throw this.wrapError(error, "Failed to find stale entries");
3319
+ }
3320
+ }
3321
+ async findDuplicatePairs(opts = {}) {
3322
+ this.ensureInitialized();
3323
+ try {
3324
+ return await this.service.findDuplicatePairs(opts);
3325
+ } catch (error) {
3326
+ throw this.wrapError(error, "Failed to find duplicate pairs");
3327
+ }
3328
+ }
2959
3329
  async countByType(opts = {}) {
2960
3330
  this.ensureInitialized();
2961
3331
  try {
@@ -3059,9 +3429,9 @@ var KnowledgeSDK = class {
3059
3429
  this.ensureInitialized();
3060
3430
  return this.service.deletePlan(id);
3061
3431
  }
3062
- listPlans(limit = 20, status, scope) {
3432
+ listPlans(limit = 20, status, scope, offset = 0) {
3063
3433
  this.ensureInitialized();
3064
- return this.service.listPlans(limit, status, scope);
3434
+ return this.service.listPlans(limit, status, scope, offset);
3065
3435
  }
3066
3436
  async addPlanRelation(planId, knowledgeId, relationType) {
3067
3437
  this.ensureInitialized();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@cognistore/mcp-server",
3
- "version": "2.0.2",
3
+ "version": "2.1.0",
4
4
  "private": false,
5
5
  "type": "module",
6
6
  "description": "MCP server for CogniStore — integrates with Claude Code and GitHub Copilot",
@@ -12,7 +12,7 @@
12
12
  },
13
13
  "main": "./dist/index.js",
14
14
  "bin": {
15
- "cognistore-mcp": "./dist/index.js"
15
+ "cognistore-mcp": "dist/index.js"
16
16
  },
17
17
  "files": [
18
18
  "dist"