@cognistore/mcp-server 2.1.3 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/index.js +226 -35
  2. package/package.json +1 -1
package/dist/index.js CHANGED
@@ -134,6 +134,12 @@ var updatePlanTaskSchema = z.object({
134
134
  notes: z.string().nullable().optional(),
135
135
  position: z.number().int().min(0).optional()
136
136
  });
137
+ var mergeTagsBatchSchema = z.object({
138
+ merges: z.array(z.object({
139
+ from: z.string().min(1, "from is required"),
140
+ to: z.string().min(1, "to is required")
141
+ })).min(1, "at least one merge is required").max(50, "at most 50 merges per batch")
142
+ });
137
143
 
138
144
  // ../../packages/core/dist/db/schema/index.js
139
145
  var schema_exports = {};
@@ -558,37 +564,37 @@ function createPlansEmbeddingsTable(sqlite, dimensions = DEFAULT_EMBEDDING_DIMEN
558
564
  // ../../packages/core/dist/repositories/knowledge.repository.js
559
565
  import { eq, ne, sql, and, or, isNull } from "drizzle-orm";
560
566
  var OPERATIONS_RETENTION_DAYS = 30;
561
- var KnowledgeRepository = class {
567
+ var KnowledgeRepository = class _KnowledgeRepository {
562
568
  db;
563
569
  sqlite;
564
570
  constructor(db, sqlite) {
565
571
  this.db = db;
566
572
  this.sqlite = sqlite;
567
573
  }
574
+ /**
575
+ * Entry row + embedding are committed atomically: a failed embedding insert
576
+ * rolls the entry back instead of leaving an orphan row that semantic search
577
+ * can never find. Uses a RAW prepared insert (createPlan style) inside
578
+ * better-sqlite3's .transaction() — the body must be fully synchronous; an
579
+ * awaited drizzle insert opens an interleaving window where a concurrent
580
+ * create() issues BEGIN on the same connection ("cannot start a transaction
581
+ * within a transaction"). FTS stays best-effort inside the txn.
582
+ */
568
583
  async create(input) {
569
584
  const id = crypto.randomUUID();
570
585
  const now = (/* @__PURE__ */ new Date()).toISOString();
571
- const [entry] = await this.db.insert(knowledgeEntries).values({
572
- id,
573
- title: input.title,
574
- content: input.content,
575
- tags: input.tags,
576
- type: input.type,
577
- scope: input.scope,
578
- source: input.source,
579
- confidenceScore: input.confidenceScore ?? 1,
580
- expiresAt: input.expiresAt ? input.expiresAt.toISOString() : null,
581
- relatedIds: input.relatedIds ?? null,
582
- agentId: input.agentId ?? null,
583
- createdAt: now,
584
- updatedAt: now
585
- }).returning();
586
- insertEmbedding(this.sqlite, id, input.embedding);
587
- try {
588
- insertFts(this.sqlite, { id, title: input.title, content: input.content, tags: ftsTags(input.tags) });
589
- } catch {
590
- }
591
- return entry;
586
+ const insertTxn = this.sqlite.transaction(() => {
587
+ this.sqlite.prepare(`INSERT INTO knowledge_entries
588
+ (id, title, content, tags, type, scope, source, version, expires_at, confidence_score, related_ids, agent_id, created_at, updated_at)
589
+ VALUES (?, ?, ?, ?, ?, ?, ?, 1, ?, ?, ?, ?, ?, ?)`).run(id, input.title, input.content, JSON.stringify(input.tags ?? []), input.type, input.scope, input.source, input.expiresAt ? input.expiresAt.toISOString() : null, input.confidenceScore ?? 1, input.relatedIds ? JSON.stringify(input.relatedIds) : null, input.agentId ?? null, now, now);
590
+ insertEmbedding(this.sqlite, id, input.embedding);
591
+ try {
592
+ insertFts(this.sqlite, { id, title: input.title, content: input.content, tags: ftsTags(input.tags) });
593
+ } catch {
594
+ }
595
+ });
596
+ insertTxn();
597
+ return await this.findById(id);
592
598
  }
593
599
  async findById(id) {
594
600
  const [entry] = await this.db.select().from(knowledgeEntries).where(eq(knowledgeEntries.id, id));
@@ -719,6 +725,11 @@ var KnowledgeRepository = class {
719
725
  const result = await this.db.all(sql`SELECT value as tag, COUNT(*) as count FROM knowledge_entries, json_each(knowledge_entries.tags) WHERE knowledge_entries.type != 'system' GROUP BY value ORDER BY count DESC LIMIT ${limit}`);
720
726
  return result;
721
727
  }
728
+ /** Usage count for every tag (no limit/range) — feeds merge-keeper defaults. */
729
+ async tagCounts() {
730
+ const result = await this.db.all(sql`SELECT value as tag, COUNT(*) as count FROM knowledge_entries, json_each(knowledge_entries.tags) WHERE knowledge_entries.type != 'system' GROUP BY value`);
731
+ return result;
732
+ }
722
733
  /**
723
734
  * Rename/merge a tag across all entries. Per-row correlated rebuild of the JSON
724
735
  * tags array: replace `from`→`to`, and `json_group_array(DISTINCT ...)` collapses
@@ -768,14 +779,30 @@ var KnowledgeRepository = class {
768
779
  async findDuplicatePairs(opts = {}) {
769
780
  const threshold = opts.threshold ?? 0.9;
770
781
  const limit = opts.limit ?? 100;
782
+ const { pairs } = await this.collectDuplicatePairs(threshold, 5, limit);
783
+ return pairs.map((p) => ({ a: { id: p.a.id, title: p.a.title }, b: { id: p.b.id, title: p.b.title }, similarity: p.similarity })).sort((x, y) => y.similarity - x.similarity).slice(0, limit);
784
+ }
785
+ /**
786
+ * Shared KNN pair collection for findDuplicatePairs/findDuplicateGroups.
787
+ * `pairCap` exists only as a runaway safety net — group callers pass a large cap
788
+ * (a truncated pair graph would split/lose cluster members in union-find).
789
+ */
790
+ async collectDuplicatePairs(threshold, k, pairCap) {
771
791
  const entries = await this.listAll();
772
- const meta = new Map(entries.map((e) => [e.id, e]));
792
+ const meta = new Map(entries.map((e) => [e.id, {
793
+ id: e.id,
794
+ title: e.title,
795
+ scope: e.scope,
796
+ type: e.type,
797
+ version: e.version ?? 1,
798
+ updatedAt: e.updatedAt
799
+ }]));
773
800
  const pairs = [];
774
801
  for (const entry of entries) {
775
802
  const emb = getEmbeddingById(this.sqlite, entry.id);
776
803
  if (!emb)
777
804
  continue;
778
- const neighbors = searchKnn(this.sqlite, emb, 5);
805
+ const neighbors = searchKnn(this.sqlite, emb, k);
779
806
  for (const n of neighbors) {
780
807
  if (entry.id >= n.id)
781
808
  continue;
@@ -783,18 +810,78 @@ var KnowledgeRepository = class {
783
810
  continue;
784
811
  const similarity = 1 - n.distance;
785
812
  if (similarity >= threshold) {
786
- const other = meta.get(n.id);
787
- pairs.push({
788
- a: { id: entry.id, title: entry.title },
789
- b: { id: other.id, title: other.title },
790
- similarity
791
- });
813
+ pairs.push({ a: meta.get(entry.id), b: meta.get(n.id), similarity });
792
814
  }
793
815
  }
794
- if (pairs.length >= limit)
816
+ if (pairs.length >= pairCap)
795
817
  break;
796
818
  }
797
- return pairs.sort((x, y) => y.similarity - x.similarity).slice(0, limit);
819
+ return { pairs, meta };
820
+ }
821
+ // KNN width for duplicate GROUPING. Per-entry KNN keeps a cluster of up to
822
+ // ~DUP_KNN_K identical members fully connected; raise if users report a giant
823
+ // duplicate cluster rendering as two cards.
824
+ static DUP_KNN_K = 20;
825
+ // Safety net only — never intended to truncate a real pair graph (105 pairs for
826
+ // a 15-member cluster; 5000 covers pathological DBs without unbounded memory).
827
+ static DUP_PAIR_CAP = 5e3;
828
+ /**
829
+ * Cluster near-duplicate pairs into connected components (union-find) so N
830
+ * copies of one entry render as ONE group, not N(N-1)/2 repeated pair rows.
831
+ * `limit` applies to the GROUP list (size DESC, then similarity DESC) — never
832
+ * to the underlying pair collection.
833
+ */
834
+ async findDuplicateGroups(opts = {}) {
835
+ const threshold = opts.threshold ?? 0.9;
836
+ const limit = opts.limit ?? 100;
837
+ const { pairs } = await this.collectDuplicatePairs(threshold, _KnowledgeRepository.DUP_KNN_K, _KnowledgeRepository.DUP_PAIR_CAP);
838
+ const parent = /* @__PURE__ */ new Map();
839
+ const find = (x) => {
840
+ let root = x;
841
+ while (parent.get(root) !== void 0 && parent.get(root) !== root)
842
+ root = parent.get(root);
843
+ let cur = x;
844
+ while (cur !== root) {
845
+ const next = parent.get(cur);
846
+ parent.set(cur, root);
847
+ cur = next;
848
+ }
849
+ return root;
850
+ };
851
+ const union = (a, b) => {
852
+ if (!parent.has(a))
853
+ parent.set(a, a);
854
+ if (!parent.has(b))
855
+ parent.set(b, b);
856
+ const ra = find(a);
857
+ const rb = find(b);
858
+ if (ra !== rb)
859
+ parent.set(rb, ra);
860
+ };
861
+ const memberMeta = /* @__PURE__ */ new Map();
862
+ const maxSim = /* @__PURE__ */ new Map();
863
+ for (const p of pairs) {
864
+ union(p.a.id, p.b.id);
865
+ memberMeta.set(p.a.id, p.a);
866
+ memberMeta.set(p.b.id, p.b);
867
+ }
868
+ const grouped = /* @__PURE__ */ new Map();
869
+ for (const m of memberMeta.values()) {
870
+ const root = find(m.id);
871
+ const arr = grouped.get(root) ?? [];
872
+ arr.push(m);
873
+ grouped.set(root, arr);
874
+ }
875
+ for (const p of pairs) {
876
+ const root = find(p.a.id);
877
+ maxSim.set(root, Math.max(maxSim.get(root) ?? 0, p.similarity));
878
+ }
879
+ const groups = Array.from(grouped.entries()).map(([root, members]) => {
880
+ members.sort((a, b) => b.version - a.version || b.updatedAt.localeCompare(a.updatedAt));
881
+ const groupId = members.reduce((min, m) => m.id < min ? m.id : min, members[0].id);
882
+ return { groupId, maxSimilarity: maxSim.get(root) ?? 0, members };
883
+ });
884
+ return groups.sort((x, y) => y.members.length - x.members.length || y.maxSimilarity - x.maxSimilarity).slice(0, limit);
798
885
  }
799
886
  /**
800
887
  * (Re)populate the FTS5 index from knowledge_entries. Used at startup when the
@@ -1481,13 +1568,21 @@ var KnowledgeService = class {
1481
1568
  * O(n²) over the small DISTINCT tag set.
1482
1569
  */
1483
1570
  async suggestTagMerges(threshold = 0.82) {
1484
- const tags = await this.repository.listTags();
1571
+ const counts = new Map((await this.repository.tagCounts()).map((r) => [r.tag, r.count]));
1572
+ const tags = Array.from(counts.keys());
1485
1573
  const out = [];
1486
1574
  for (let i = 0; i < tags.length; i++) {
1487
1575
  for (let j = i + 1; j < tags.length; j++) {
1488
1576
  const sim = this.tagSimilarity(tags[i], tags[j]);
1489
- if (sim >= threshold)
1490
- out.push({ a: tags[i], b: tags[j], similarity: Math.round(sim * 100) / 100 });
1577
+ if (sim >= threshold) {
1578
+ out.push({
1579
+ a: tags[i],
1580
+ b: tags[j],
1581
+ similarity: Math.round(sim * 100) / 100,
1582
+ countA: counts.get(tags[i]) ?? 0,
1583
+ countB: counts.get(tags[j]) ?? 0
1584
+ });
1585
+ }
1491
1586
  }
1492
1587
  }
1493
1588
  return out.sort((x, y) => y.similarity - x.similarity);
@@ -1533,12 +1628,90 @@ var KnowledgeService = class {
1533
1628
  this.logOp("write", affected.length);
1534
1629
  return { merged: affected.length };
1535
1630
  }
1631
+ /**
1632
+ * Apply several tag merges in ONE pass. All conflict detection happens BEFORE
1633
+ * the first renameTag SQL executes, so a CONFLICT never leaves a partially
1634
+ * merged DB:
1635
+ * - duplicate `from` mapped to different targets → CONFLICT error
1636
+ * - cycles (a→b, b→a) → CONFLICT error
1637
+ * - chains collapse to their terminal target (a→b, b→c ⇒ a→c, b→c), which is
1638
+ * equivalent to sequential application: renameTag is idempotent per terminal
1639
+ * target and json_group_array(DISTINCT …) collapses a pre-existing target.
1640
+ * Affected entry ids are UNIONED so an entry touched by two merges is re-embedded
1641
+ * exactly once, AFTER all renames (buildEmbeddingText reads the final tags).
1642
+ * Re-embeds run through repository.update (UPDATE path: FTS resync + version
1643
+ * bump — never the embedding-insert path) with bounded concurrency; one failed
1644
+ * re-embed does not abort the rest.
1645
+ */
1646
+ async mergeTagsBatch(merges) {
1647
+ const cleaned = merges.map((m) => ({ from: (m.from ?? "").trim(), to: (m.to ?? "").trim() })).filter((m) => m.from && m.to && m.from !== m.to);
1648
+ if (cleaned.length === 0)
1649
+ return { applied: [], entriesReembedded: 0 };
1650
+ const target = /* @__PURE__ */ new Map();
1651
+ for (const m of cleaned) {
1652
+ const existing = target.get(m.from);
1653
+ if (existing !== void 0 && existing !== m.to) {
1654
+ throw new Error(`CONFLICT: tag "${m.from}" is merged into multiple targets ("${existing}" and "${m.to}")`);
1655
+ }
1656
+ target.set(m.from, m.to);
1657
+ }
1658
+ const terminal = /* @__PURE__ */ new Map();
1659
+ for (const from of target.keys()) {
1660
+ const visited = /* @__PURE__ */ new Set([from]);
1661
+ let to = target.get(from);
1662
+ while (target.has(to)) {
1663
+ if (visited.has(to)) {
1664
+ throw new Error(`CONFLICT: circular merge chain involving tag "${to}"`);
1665
+ }
1666
+ visited.add(to);
1667
+ to = target.get(to);
1668
+ }
1669
+ terminal.set(from, to);
1670
+ }
1671
+ const applied = [];
1672
+ const affectedIds = /* @__PURE__ */ new Set();
1673
+ for (const [from, to] of terminal) {
1674
+ const ids2 = this.repository.renameTag(from, to);
1675
+ for (const id of ids2)
1676
+ affectedIds.add(id);
1677
+ applied.push({ from, to, count: ids2.length });
1678
+ }
1679
+ const ids = Array.from(affectedIds);
1680
+ await this.mapWithConcurrency(ids, 4, async (id) => {
1681
+ try {
1682
+ const entry = await this.repository.findById(id);
1683
+ if (!entry)
1684
+ return;
1685
+ const tags = Array.isArray(entry.tags) ? entry.tags : JSON.parse(entry.tags ?? "[]");
1686
+ const embedding = await this.embeddingProvider.embed(this.buildEmbeddingText(entry.title, entry.content, tags));
1687
+ await this.repository.update(id, { embedding });
1688
+ } catch {
1689
+ }
1690
+ });
1691
+ if (affectedIds.size)
1692
+ this.logOp("write", affectedIds.size);
1693
+ return { applied, entriesReembedded: affectedIds.size };
1694
+ }
1695
+ /** Minimal worker-pool: run `fn` over `items` with at most `limit` in flight. */
1696
+ async mapWithConcurrency(items, limit, fn) {
1697
+ let next = 0;
1698
+ const workers = Array.from({ length: Math.min(limit, items.length) }, async () => {
1699
+ while (next < items.length) {
1700
+ const i = next++;
1701
+ await fn(items[i]);
1702
+ }
1703
+ });
1704
+ await Promise.all(workers);
1705
+ }
1536
1706
  async findStaleEntries(opts = {}) {
1537
1707
  return this.repository.findStaleEntries(opts);
1538
1708
  }
1539
1709
  async findDuplicatePairs(opts = {}) {
1540
1710
  return this.repository.findDuplicatePairs(opts);
1541
1711
  }
1712
+ async findDuplicateGroups(opts = {}) {
1713
+ return this.repository.findDuplicateGroups(opts);
1714
+ }
1542
1715
  /** (Re)populate the FTS5 index if it's empty but entries exist. Returns rows indexed. */
1543
1716
  backfillFtsIfNeeded() {
1544
1717
  return this.repository.backfillFtsIfNeeded();
@@ -3310,6 +3483,16 @@ var KnowledgeSDK = class {
3310
3483
  throw this.wrapError(error, "Failed to merge tags");
3311
3484
  }
3312
3485
  }
3486
+ async mergeTagsBatch(merges) {
3487
+ this.ensureInitialized();
3488
+ try {
3489
+ return await this.service.mergeTagsBatch(merges);
3490
+ } catch (error) {
3491
+ if (error instanceof Error && error.message.startsWith("CONFLICT:"))
3492
+ throw error;
3493
+ throw this.wrapError(error, "Failed to merge tags");
3494
+ }
3495
+ }
3313
3496
  async findStaleEntries(opts = {}) {
3314
3497
  this.ensureInitialized();
3315
3498
  try {
@@ -3326,6 +3509,14 @@ var KnowledgeSDK = class {
3326
3509
  throw this.wrapError(error, "Failed to find duplicate pairs");
3327
3510
  }
3328
3511
  }
3512
+ async findDuplicateGroups(opts = {}) {
3513
+ this.ensureInitialized();
3514
+ try {
3515
+ return await this.service.findDuplicateGroups(opts);
3516
+ } catch (error) {
3517
+ throw this.wrapError(error, "Failed to find duplicate groups");
3518
+ }
3519
+ }
3329
3520
  async countByType(opts = {}) {
3330
3521
  this.ensureInitialized();
3331
3522
  try {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@cognistore/mcp-server",
3
- "version": "2.1.3",
3
+ "version": "2.2.0",
4
4
  "private": false,
5
5
  "type": "module",
6
6
  "description": "MCP server for CogniStore — integrates with Claude Code and GitHub Copilot",