@cognistore/mcp-server 2.1.2 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +226 -35
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -134,6 +134,12 @@ var updatePlanTaskSchema = z.object({
|
|
|
134
134
|
notes: z.string().nullable().optional(),
|
|
135
135
|
position: z.number().int().min(0).optional()
|
|
136
136
|
});
|
|
137
|
+
var mergeTagsBatchSchema = z.object({
|
|
138
|
+
merges: z.array(z.object({
|
|
139
|
+
from: z.string().min(1, "from is required"),
|
|
140
|
+
to: z.string().min(1, "to is required")
|
|
141
|
+
})).min(1, "at least one merge is required").max(50, "at most 50 merges per batch")
|
|
142
|
+
});
|
|
137
143
|
|
|
138
144
|
// ../../packages/core/dist/db/schema/index.js
|
|
139
145
|
var schema_exports = {};
|
|
@@ -558,37 +564,37 @@ function createPlansEmbeddingsTable(sqlite, dimensions = DEFAULT_EMBEDDING_DIMEN
|
|
|
558
564
|
// ../../packages/core/dist/repositories/knowledge.repository.js
|
|
559
565
|
import { eq, ne, sql, and, or, isNull } from "drizzle-orm";
|
|
560
566
|
var OPERATIONS_RETENTION_DAYS = 30;
|
|
561
|
-
var KnowledgeRepository = class {
|
|
567
|
+
var KnowledgeRepository = class _KnowledgeRepository {
|
|
562
568
|
db;
|
|
563
569
|
sqlite;
|
|
564
570
|
constructor(db, sqlite) {
|
|
565
571
|
this.db = db;
|
|
566
572
|
this.sqlite = sqlite;
|
|
567
573
|
}
|
|
574
|
+
/**
|
|
575
|
+
* Entry row + embedding are committed atomically: a failed embedding insert
|
|
576
|
+
* rolls the entry back instead of leaving an orphan row that semantic search
|
|
577
|
+
* can never find. Uses a RAW prepared insert (createPlan style) inside
|
|
578
|
+
* better-sqlite3's .transaction() — the body must be fully synchronous; an
|
|
579
|
+
* awaited drizzle insert opens an interleaving window where a concurrent
|
|
580
|
+
* create() issues BEGIN on the same connection ("cannot start a transaction
|
|
581
|
+
* within a transaction"). FTS stays best-effort inside the txn.
|
|
582
|
+
*/
|
|
568
583
|
async create(input) {
|
|
569
584
|
const id = crypto.randomUUID();
|
|
570
585
|
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
571
|
-
const
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
createdAt: now,
|
|
584
|
-
updatedAt: now
|
|
585
|
-
}).returning();
|
|
586
|
-
insertEmbedding(this.sqlite, id, input.embedding);
|
|
587
|
-
try {
|
|
588
|
-
insertFts(this.sqlite, { id, title: input.title, content: input.content, tags: ftsTags(input.tags) });
|
|
589
|
-
} catch {
|
|
590
|
-
}
|
|
591
|
-
return entry;
|
|
586
|
+
const insertTxn = this.sqlite.transaction(() => {
|
|
587
|
+
this.sqlite.prepare(`INSERT INTO knowledge_entries
|
|
588
|
+
(id, title, content, tags, type, scope, source, version, expires_at, confidence_score, related_ids, agent_id, created_at, updated_at)
|
|
589
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, 1, ?, ?, ?, ?, ?, ?)`).run(id, input.title, input.content, JSON.stringify(input.tags ?? []), input.type, input.scope, input.source, input.expiresAt ? input.expiresAt.toISOString() : null, input.confidenceScore ?? 1, input.relatedIds ? JSON.stringify(input.relatedIds) : null, input.agentId ?? null, now, now);
|
|
590
|
+
insertEmbedding(this.sqlite, id, input.embedding);
|
|
591
|
+
try {
|
|
592
|
+
insertFts(this.sqlite, { id, title: input.title, content: input.content, tags: ftsTags(input.tags) });
|
|
593
|
+
} catch {
|
|
594
|
+
}
|
|
595
|
+
});
|
|
596
|
+
insertTxn();
|
|
597
|
+
return await this.findById(id);
|
|
592
598
|
}
|
|
593
599
|
async findById(id) {
|
|
594
600
|
const [entry] = await this.db.select().from(knowledgeEntries).where(eq(knowledgeEntries.id, id));
|
|
@@ -719,6 +725,11 @@ var KnowledgeRepository = class {
|
|
|
719
725
|
const result = await this.db.all(sql`SELECT value as tag, COUNT(*) as count FROM knowledge_entries, json_each(knowledge_entries.tags) WHERE knowledge_entries.type != 'system' GROUP BY value ORDER BY count DESC LIMIT ${limit}`);
|
|
720
726
|
return result;
|
|
721
727
|
}
|
|
728
|
+
/** Usage count for every tag (no limit/range) — feeds merge-keeper defaults. */
|
|
729
|
+
async tagCounts() {
|
|
730
|
+
const result = await this.db.all(sql`SELECT value as tag, COUNT(*) as count FROM knowledge_entries, json_each(knowledge_entries.tags) WHERE knowledge_entries.type != 'system' GROUP BY value`);
|
|
731
|
+
return result;
|
|
732
|
+
}
|
|
722
733
|
/**
|
|
723
734
|
* Rename/merge a tag across all entries. Per-row correlated rebuild of the JSON
|
|
724
735
|
* tags array: replace `from`→`to`, and `json_group_array(DISTINCT ...)` collapses
|
|
@@ -768,14 +779,30 @@ var KnowledgeRepository = class {
|
|
|
768
779
|
async findDuplicatePairs(opts = {}) {
|
|
769
780
|
const threshold = opts.threshold ?? 0.9;
|
|
770
781
|
const limit = opts.limit ?? 100;
|
|
782
|
+
const { pairs } = await this.collectDuplicatePairs(threshold, 5, limit);
|
|
783
|
+
return pairs.map((p) => ({ a: { id: p.a.id, title: p.a.title }, b: { id: p.b.id, title: p.b.title }, similarity: p.similarity })).sort((x, y) => y.similarity - x.similarity).slice(0, limit);
|
|
784
|
+
}
|
|
785
|
+
/**
|
|
786
|
+
* Shared KNN pair collection for findDuplicatePairs/findDuplicateGroups.
|
|
787
|
+
* `pairCap` exists only as a runaway safety net — group callers pass a large cap
|
|
788
|
+
* (a truncated pair graph would split/lose cluster members in union-find).
|
|
789
|
+
*/
|
|
790
|
+
async collectDuplicatePairs(threshold, k, pairCap) {
|
|
771
791
|
const entries = await this.listAll();
|
|
772
|
-
const meta = new Map(entries.map((e) => [e.id,
|
|
792
|
+
const meta = new Map(entries.map((e) => [e.id, {
|
|
793
|
+
id: e.id,
|
|
794
|
+
title: e.title,
|
|
795
|
+
scope: e.scope,
|
|
796
|
+
type: e.type,
|
|
797
|
+
version: e.version ?? 1,
|
|
798
|
+
updatedAt: e.updatedAt
|
|
799
|
+
}]));
|
|
773
800
|
const pairs = [];
|
|
774
801
|
for (const entry of entries) {
|
|
775
802
|
const emb = getEmbeddingById(this.sqlite, entry.id);
|
|
776
803
|
if (!emb)
|
|
777
804
|
continue;
|
|
778
|
-
const neighbors = searchKnn(this.sqlite, emb,
|
|
805
|
+
const neighbors = searchKnn(this.sqlite, emb, k);
|
|
779
806
|
for (const n of neighbors) {
|
|
780
807
|
if (entry.id >= n.id)
|
|
781
808
|
continue;
|
|
@@ -783,18 +810,78 @@ var KnowledgeRepository = class {
|
|
|
783
810
|
continue;
|
|
784
811
|
const similarity = 1 - n.distance;
|
|
785
812
|
if (similarity >= threshold) {
|
|
786
|
-
|
|
787
|
-
pairs.push({
|
|
788
|
-
a: { id: entry.id, title: entry.title },
|
|
789
|
-
b: { id: other.id, title: other.title },
|
|
790
|
-
similarity
|
|
791
|
-
});
|
|
813
|
+
pairs.push({ a: meta.get(entry.id), b: meta.get(n.id), similarity });
|
|
792
814
|
}
|
|
793
815
|
}
|
|
794
|
-
if (pairs.length >=
|
|
816
|
+
if (pairs.length >= pairCap)
|
|
795
817
|
break;
|
|
796
818
|
}
|
|
797
|
-
return pairs
|
|
819
|
+
return { pairs, meta };
|
|
820
|
+
}
|
|
821
|
+
// KNN width for duplicate GROUPING. Per-entry KNN keeps a cluster of up to
|
|
822
|
+
// ~DUP_KNN_K identical members fully connected; raise if users report a giant
|
|
823
|
+
// duplicate cluster rendering as two cards.
|
|
824
|
+
static DUP_KNN_K = 20;
|
|
825
|
+
// Safety net only — never intended to truncate a real pair graph (105 pairs for
|
|
826
|
+
// a 15-member cluster; 5000 covers pathological DBs without unbounded memory).
|
|
827
|
+
static DUP_PAIR_CAP = 5e3;
|
|
828
|
+
/**
|
|
829
|
+
* Cluster near-duplicate pairs into connected components (union-find) so N
|
|
830
|
+
* copies of one entry render as ONE group, not N(N-1)/2 repeated pair rows.
|
|
831
|
+
* `limit` applies to the GROUP list (size DESC, then similarity DESC) — never
|
|
832
|
+
* to the underlying pair collection.
|
|
833
|
+
*/
|
|
834
|
+
async findDuplicateGroups(opts = {}) {
|
|
835
|
+
const threshold = opts.threshold ?? 0.9;
|
|
836
|
+
const limit = opts.limit ?? 100;
|
|
837
|
+
const { pairs } = await this.collectDuplicatePairs(threshold, _KnowledgeRepository.DUP_KNN_K, _KnowledgeRepository.DUP_PAIR_CAP);
|
|
838
|
+
const parent = /* @__PURE__ */ new Map();
|
|
839
|
+
const find = (x) => {
|
|
840
|
+
let root = x;
|
|
841
|
+
while (parent.get(root) !== void 0 && parent.get(root) !== root)
|
|
842
|
+
root = parent.get(root);
|
|
843
|
+
let cur = x;
|
|
844
|
+
while (cur !== root) {
|
|
845
|
+
const next = parent.get(cur);
|
|
846
|
+
parent.set(cur, root);
|
|
847
|
+
cur = next;
|
|
848
|
+
}
|
|
849
|
+
return root;
|
|
850
|
+
};
|
|
851
|
+
const union = (a, b) => {
|
|
852
|
+
if (!parent.has(a))
|
|
853
|
+
parent.set(a, a);
|
|
854
|
+
if (!parent.has(b))
|
|
855
|
+
parent.set(b, b);
|
|
856
|
+
const ra = find(a);
|
|
857
|
+
const rb = find(b);
|
|
858
|
+
if (ra !== rb)
|
|
859
|
+
parent.set(rb, ra);
|
|
860
|
+
};
|
|
861
|
+
const memberMeta = /* @__PURE__ */ new Map();
|
|
862
|
+
const maxSim = /* @__PURE__ */ new Map();
|
|
863
|
+
for (const p of pairs) {
|
|
864
|
+
union(p.a.id, p.b.id);
|
|
865
|
+
memberMeta.set(p.a.id, p.a);
|
|
866
|
+
memberMeta.set(p.b.id, p.b);
|
|
867
|
+
}
|
|
868
|
+
const grouped = /* @__PURE__ */ new Map();
|
|
869
|
+
for (const m of memberMeta.values()) {
|
|
870
|
+
const root = find(m.id);
|
|
871
|
+
const arr = grouped.get(root) ?? [];
|
|
872
|
+
arr.push(m);
|
|
873
|
+
grouped.set(root, arr);
|
|
874
|
+
}
|
|
875
|
+
for (const p of pairs) {
|
|
876
|
+
const root = find(p.a.id);
|
|
877
|
+
maxSim.set(root, Math.max(maxSim.get(root) ?? 0, p.similarity));
|
|
878
|
+
}
|
|
879
|
+
const groups = Array.from(grouped.entries()).map(([root, members]) => {
|
|
880
|
+
members.sort((a, b) => b.version - a.version || b.updatedAt.localeCompare(a.updatedAt));
|
|
881
|
+
const groupId = members.reduce((min, m) => m.id < min ? m.id : min, members[0].id);
|
|
882
|
+
return { groupId, maxSimilarity: maxSim.get(root) ?? 0, members };
|
|
883
|
+
});
|
|
884
|
+
return groups.sort((x, y) => y.members.length - x.members.length || y.maxSimilarity - x.maxSimilarity).slice(0, limit);
|
|
798
885
|
}
|
|
799
886
|
/**
|
|
800
887
|
* (Re)populate the FTS5 index from knowledge_entries. Used at startup when the
|
|
@@ -1481,13 +1568,21 @@ var KnowledgeService = class {
|
|
|
1481
1568
|
* O(n²) over the small DISTINCT tag set.
|
|
1482
1569
|
*/
|
|
1483
1570
|
async suggestTagMerges(threshold = 0.82) {
|
|
1484
|
-
const
|
|
1571
|
+
const counts = new Map((await this.repository.tagCounts()).map((r) => [r.tag, r.count]));
|
|
1572
|
+
const tags = Array.from(counts.keys());
|
|
1485
1573
|
const out = [];
|
|
1486
1574
|
for (let i = 0; i < tags.length; i++) {
|
|
1487
1575
|
for (let j = i + 1; j < tags.length; j++) {
|
|
1488
1576
|
const sim = this.tagSimilarity(tags[i], tags[j]);
|
|
1489
|
-
if (sim >= threshold)
|
|
1490
|
-
out.push({
|
|
1577
|
+
if (sim >= threshold) {
|
|
1578
|
+
out.push({
|
|
1579
|
+
a: tags[i],
|
|
1580
|
+
b: tags[j],
|
|
1581
|
+
similarity: Math.round(sim * 100) / 100,
|
|
1582
|
+
countA: counts.get(tags[i]) ?? 0,
|
|
1583
|
+
countB: counts.get(tags[j]) ?? 0
|
|
1584
|
+
});
|
|
1585
|
+
}
|
|
1491
1586
|
}
|
|
1492
1587
|
}
|
|
1493
1588
|
return out.sort((x, y) => y.similarity - x.similarity);
|
|
@@ -1533,12 +1628,90 @@ var KnowledgeService = class {
|
|
|
1533
1628
|
this.logOp("write", affected.length);
|
|
1534
1629
|
return { merged: affected.length };
|
|
1535
1630
|
}
|
|
1631
|
+
/**
|
|
1632
|
+
* Apply several tag merges in ONE pass. All conflict detection happens BEFORE
|
|
1633
|
+
* the first renameTag SQL executes, so a CONFLICT never leaves a partially
|
|
1634
|
+
* merged DB:
|
|
1635
|
+
* - duplicate `from` mapped to different targets → CONFLICT error
|
|
1636
|
+
* - cycles (a→b, b→a) → CONFLICT error
|
|
1637
|
+
* - chains collapse to their terminal target (a→b, b→c ⇒ a→c, b→c), which is
|
|
1638
|
+
* equivalent to sequential application: renameTag is idempotent per terminal
|
|
1639
|
+
* target and json_group_array(DISTINCT …) collapses a pre-existing target.
|
|
1640
|
+
* Affected entry ids are UNIONED so an entry touched by two merges is re-embedded
|
|
1641
|
+
* exactly once, AFTER all renames (buildEmbeddingText reads the final tags).
|
|
1642
|
+
* Re-embeds run through repository.update (UPDATE path: FTS resync + version
|
|
1643
|
+
* bump — never the embedding-insert path) with bounded concurrency; one failed
|
|
1644
|
+
* re-embed does not abort the rest.
|
|
1645
|
+
*/
|
|
1646
|
+
async mergeTagsBatch(merges) {
|
|
1647
|
+
const cleaned = merges.map((m) => ({ from: (m.from ?? "").trim(), to: (m.to ?? "").trim() })).filter((m) => m.from && m.to && m.from !== m.to);
|
|
1648
|
+
if (cleaned.length === 0)
|
|
1649
|
+
return { applied: [], entriesReembedded: 0 };
|
|
1650
|
+
const target = /* @__PURE__ */ new Map();
|
|
1651
|
+
for (const m of cleaned) {
|
|
1652
|
+
const existing = target.get(m.from);
|
|
1653
|
+
if (existing !== void 0 && existing !== m.to) {
|
|
1654
|
+
throw new Error(`CONFLICT: tag "${m.from}" is merged into multiple targets ("${existing}" and "${m.to}")`);
|
|
1655
|
+
}
|
|
1656
|
+
target.set(m.from, m.to);
|
|
1657
|
+
}
|
|
1658
|
+
const terminal = /* @__PURE__ */ new Map();
|
|
1659
|
+
for (const from of target.keys()) {
|
|
1660
|
+
const visited = /* @__PURE__ */ new Set([from]);
|
|
1661
|
+
let to = target.get(from);
|
|
1662
|
+
while (target.has(to)) {
|
|
1663
|
+
if (visited.has(to)) {
|
|
1664
|
+
throw new Error(`CONFLICT: circular merge chain involving tag "${to}"`);
|
|
1665
|
+
}
|
|
1666
|
+
visited.add(to);
|
|
1667
|
+
to = target.get(to);
|
|
1668
|
+
}
|
|
1669
|
+
terminal.set(from, to);
|
|
1670
|
+
}
|
|
1671
|
+
const applied = [];
|
|
1672
|
+
const affectedIds = /* @__PURE__ */ new Set();
|
|
1673
|
+
for (const [from, to] of terminal) {
|
|
1674
|
+
const ids2 = this.repository.renameTag(from, to);
|
|
1675
|
+
for (const id of ids2)
|
|
1676
|
+
affectedIds.add(id);
|
|
1677
|
+
applied.push({ from, to, count: ids2.length });
|
|
1678
|
+
}
|
|
1679
|
+
const ids = Array.from(affectedIds);
|
|
1680
|
+
await this.mapWithConcurrency(ids, 4, async (id) => {
|
|
1681
|
+
try {
|
|
1682
|
+
const entry = await this.repository.findById(id);
|
|
1683
|
+
if (!entry)
|
|
1684
|
+
return;
|
|
1685
|
+
const tags = Array.isArray(entry.tags) ? entry.tags : JSON.parse(entry.tags ?? "[]");
|
|
1686
|
+
const embedding = await this.embeddingProvider.embed(this.buildEmbeddingText(entry.title, entry.content, tags));
|
|
1687
|
+
await this.repository.update(id, { embedding });
|
|
1688
|
+
} catch {
|
|
1689
|
+
}
|
|
1690
|
+
});
|
|
1691
|
+
if (affectedIds.size)
|
|
1692
|
+
this.logOp("write", affectedIds.size);
|
|
1693
|
+
return { applied, entriesReembedded: affectedIds.size };
|
|
1694
|
+
}
|
|
1695
|
+
/** Minimal worker-pool: run `fn` over `items` with at most `limit` in flight. */
|
|
1696
|
+
async mapWithConcurrency(items, limit, fn) {
|
|
1697
|
+
let next = 0;
|
|
1698
|
+
const workers = Array.from({ length: Math.min(limit, items.length) }, async () => {
|
|
1699
|
+
while (next < items.length) {
|
|
1700
|
+
const i = next++;
|
|
1701
|
+
await fn(items[i]);
|
|
1702
|
+
}
|
|
1703
|
+
});
|
|
1704
|
+
await Promise.all(workers);
|
|
1705
|
+
}
|
|
1536
1706
|
async findStaleEntries(opts = {}) {
|
|
1537
1707
|
return this.repository.findStaleEntries(opts);
|
|
1538
1708
|
}
|
|
1539
1709
|
async findDuplicatePairs(opts = {}) {
|
|
1540
1710
|
return this.repository.findDuplicatePairs(opts);
|
|
1541
1711
|
}
|
|
1712
|
+
async findDuplicateGroups(opts = {}) {
|
|
1713
|
+
return this.repository.findDuplicateGroups(opts);
|
|
1714
|
+
}
|
|
1542
1715
|
/** (Re)populate the FTS5 index if it's empty but entries exist. Returns rows indexed. */
|
|
1543
1716
|
backfillFtsIfNeeded() {
|
|
1544
1717
|
return this.repository.backfillFtsIfNeeded();
|
|
@@ -3310,6 +3483,16 @@ var KnowledgeSDK = class {
|
|
|
3310
3483
|
throw this.wrapError(error, "Failed to merge tags");
|
|
3311
3484
|
}
|
|
3312
3485
|
}
|
|
3486
|
+
async mergeTagsBatch(merges) {
|
|
3487
|
+
this.ensureInitialized();
|
|
3488
|
+
try {
|
|
3489
|
+
return await this.service.mergeTagsBatch(merges);
|
|
3490
|
+
} catch (error) {
|
|
3491
|
+
if (error instanceof Error && error.message.startsWith("CONFLICT:"))
|
|
3492
|
+
throw error;
|
|
3493
|
+
throw this.wrapError(error, "Failed to merge tags");
|
|
3494
|
+
}
|
|
3495
|
+
}
|
|
3313
3496
|
async findStaleEntries(opts = {}) {
|
|
3314
3497
|
this.ensureInitialized();
|
|
3315
3498
|
try {
|
|
@@ -3326,6 +3509,14 @@ var KnowledgeSDK = class {
|
|
|
3326
3509
|
throw this.wrapError(error, "Failed to find duplicate pairs");
|
|
3327
3510
|
}
|
|
3328
3511
|
}
|
|
3512
|
+
async findDuplicateGroups(opts = {}) {
|
|
3513
|
+
this.ensureInitialized();
|
|
3514
|
+
try {
|
|
3515
|
+
return await this.service.findDuplicateGroups(opts);
|
|
3516
|
+
} catch (error) {
|
|
3517
|
+
throw this.wrapError(error, "Failed to find duplicate groups");
|
|
3518
|
+
}
|
|
3519
|
+
}
|
|
3329
3520
|
async countByType(opts = {}) {
|
|
3330
3521
|
this.ensureInitialized();
|
|
3331
3522
|
try {
|