@cognistore/mcp-server 2.0.3 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +393 -23
- package/package.json +2 -2
package/dist/index.js
CHANGED
|
@@ -94,7 +94,8 @@ var searchOptionsSchema = z.object({
|
|
|
94
94
|
scope: scopeSchema.optional(),
|
|
95
95
|
limit: z.number().int().min(1).max(100).optional().default(DEFAULT_SEARCH_LIMIT),
|
|
96
96
|
threshold: z.number().min(0).max(1).optional().default(DEFAULT_SIMILARITY_THRESHOLD),
|
|
97
|
-
includePlanContext: z.boolean().optional().default(false)
|
|
97
|
+
includePlanContext: z.boolean().optional().default(false),
|
|
98
|
+
queryText: z.string().optional()
|
|
98
99
|
});
|
|
99
100
|
var createPlanSchema = z.object({
|
|
100
101
|
title: z.string().min(1, "Title is required"),
|
|
@@ -140,6 +141,7 @@ __export(schema_exports, {
|
|
|
140
141
|
createEmbeddingsTable: () => createEmbeddingsTable,
|
|
141
142
|
deleteEmbedding: () => deleteEmbedding,
|
|
142
143
|
deletePlanEmbedding: () => deletePlanEmbedding,
|
|
144
|
+
getEmbeddingById: () => getEmbeddingById,
|
|
143
145
|
insertEmbedding: () => insertEmbedding,
|
|
144
146
|
insertPlanEmbedding: () => insertPlanEmbedding,
|
|
145
147
|
knowledgeEntries: () => knowledgeEntries,
|
|
@@ -195,6 +197,14 @@ function deleteEmbedding(sqlite, id) {
|
|
|
195
197
|
const stmt = sqlite.prepare(`DELETE FROM ${VIRTUAL_TABLE_NAME} WHERE id = ?`);
|
|
196
198
|
stmt.run(id);
|
|
197
199
|
}
|
|
200
|
+
function getEmbeddingById(sqlite, id) {
|
|
201
|
+
const row = sqlite.prepare(`SELECT embedding FROM ${VIRTUAL_TABLE_NAME} WHERE id = ?`).get(id);
|
|
202
|
+
if (!row || row.embedding == null)
|
|
203
|
+
return null;
|
|
204
|
+
const buf = row.embedding;
|
|
205
|
+
const f32 = new Float32Array(buf.buffer, buf.byteOffset, Math.floor(buf.byteLength / 4));
|
|
206
|
+
return Array.from(f32);
|
|
207
|
+
}
|
|
198
208
|
function searchKnn(sqlite, queryEmbedding, k) {
|
|
199
209
|
const stmt = sqlite.prepare(`
|
|
200
210
|
SELECT id, distance
|
|
@@ -224,6 +234,49 @@ function searchPlansKnn(sqlite, queryEmbedding, k) {
|
|
|
224
234
|
return stmt.all(Buffer.from(new Float32Array(queryEmbedding).buffer), k);
|
|
225
235
|
}
|
|
226
236
|
|
|
237
|
+
// ../../packages/core/dist/db/schema/fts.js
|
|
238
|
+
var FTS_TABLE_NAME = "knowledge_fts";
|
|
239
|
+
function createKnowledgeFtsTable(sqlite) {
|
|
240
|
+
sqlite.exec(`
|
|
241
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS ${FTS_TABLE_NAME} USING fts5(
|
|
242
|
+
id UNINDEXED,
|
|
243
|
+
title,
|
|
244
|
+
content,
|
|
245
|
+
tags
|
|
246
|
+
)
|
|
247
|
+
`);
|
|
248
|
+
}
|
|
249
|
+
function insertFts(sqlite, row) {
|
|
250
|
+
sqlite.prepare(`INSERT INTO ${FTS_TABLE_NAME}(id, title, content, tags) VALUES (?, ?, ?, ?)`).run(row.id, row.title ?? "", row.content ?? "", row.tags ?? "");
|
|
251
|
+
}
|
|
252
|
+
function updateFts(sqlite, row) {
|
|
253
|
+
deleteFts(sqlite, row.id);
|
|
254
|
+
insertFts(sqlite, row);
|
|
255
|
+
}
|
|
256
|
+
function deleteFts(sqlite, id) {
|
|
257
|
+
sqlite.prepare(`DELETE FROM ${FTS_TABLE_NAME} WHERE id = ?`).run(id);
|
|
258
|
+
}
|
|
259
|
+
function ftsCount(sqlite) {
|
|
260
|
+
const row = sqlite.prepare(`SELECT count(*) AS c FROM ${FTS_TABLE_NAME}`).get();
|
|
261
|
+
return row?.c ?? 0;
|
|
262
|
+
}
|
|
263
|
+
function sanitizeFtsQuery(query) {
|
|
264
|
+
return query.split(/\s+/).filter(Boolean).map((t) => `"${t.replace(/"/g, '""')}"`).join(" ");
|
|
265
|
+
}
|
|
266
|
+
function searchFts(sqlite, query, k) {
|
|
267
|
+
const match = sanitizeFtsQuery(query);
|
|
268
|
+
if (!match)
|
|
269
|
+
return [];
|
|
270
|
+
const stmt = sqlite.prepare(`
|
|
271
|
+
SELECT id, bm25(${FTS_TABLE_NAME}) AS bm25
|
|
272
|
+
FROM ${FTS_TABLE_NAME}
|
|
273
|
+
WHERE ${FTS_TABLE_NAME} MATCH ?
|
|
274
|
+
ORDER BY bm25
|
|
275
|
+
LIMIT ?
|
|
276
|
+
`);
|
|
277
|
+
return stmt.all(match, k);
|
|
278
|
+
}
|
|
279
|
+
|
|
227
280
|
// ../../packages/core/dist/db/migrate.js
|
|
228
281
|
import { readdirSync, readFileSync, existsSync } from "fs";
|
|
229
282
|
import { resolve } from "path";
|
|
@@ -357,6 +410,9 @@ CREATE TABLE IF NOT EXISTS scan_state (
|
|
|
357
410
|
);
|
|
358
411
|
|
|
359
412
|
ALTER TABLE plans ADD COLUMN plan_file_path TEXT;
|
|
413
|
+
`,
|
|
414
|
+
"2.1.0": `
|
|
415
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS knowledge_fts USING fts5(id UNINDEXED, title, content, tags);
|
|
360
416
|
`
|
|
361
417
|
};
|
|
362
418
|
function runMigrations(sqlite, migrationsDir) {
|
|
@@ -459,9 +515,34 @@ function createDbClient(dbPath) {
|
|
|
459
515
|
const dims = Number(process.env.EMBEDDING_DIMENSIONS) || DEFAULT_EMBEDDING_DIMENSIONS;
|
|
460
516
|
createEmbeddingsTable(sqlite, dims);
|
|
461
517
|
createPlansEmbeddingsTable(sqlite, dims);
|
|
518
|
+
try {
|
|
519
|
+
createKnowledgeFtsTable(sqlite);
|
|
520
|
+
backfillFtsIfEmpty(sqlite);
|
|
521
|
+
sqlite.exec("CREATE INDEX IF NOT EXISTS idx_knowledge_created_at ON knowledge_entries(created_at)");
|
|
522
|
+
sqlite.exec("CREATE INDEX IF NOT EXISTS idx_plans_created_at ON plans(created_at)");
|
|
523
|
+
} catch {
|
|
524
|
+
}
|
|
462
525
|
const db = drizzle(sqlite, { schema: schema_exports });
|
|
463
526
|
return { db, sqlite };
|
|
464
527
|
}
|
|
528
|
+
function backfillFtsIfEmpty(sqlite) {
|
|
529
|
+
if (ftsCount(sqlite) > 0)
|
|
530
|
+
return;
|
|
531
|
+
const rows = sqlite.prepare("SELECT id, title, content, tags FROM knowledge_entries WHERE type != 'system'").all();
|
|
532
|
+
for (const r of rows) {
|
|
533
|
+
let tags = "";
|
|
534
|
+
try {
|
|
535
|
+
const parsed = JSON.parse(r.tags ?? "[]");
|
|
536
|
+
tags = Array.isArray(parsed) ? parsed.filter(Boolean).join(" ") : "";
|
|
537
|
+
} catch {
|
|
538
|
+
tags = "";
|
|
539
|
+
}
|
|
540
|
+
try {
|
|
541
|
+
insertFts(sqlite, { id: r.id, title: r.title ?? "", content: r.content ?? "", tags });
|
|
542
|
+
} catch {
|
|
543
|
+
}
|
|
544
|
+
}
|
|
545
|
+
}
|
|
465
546
|
function createPlansEmbeddingsTable(sqlite, dimensions = DEFAULT_EMBEDDING_DIMENSIONS) {
|
|
466
547
|
try {
|
|
467
548
|
sqlite.exec(`
|
|
@@ -503,6 +584,10 @@ var KnowledgeRepository = class {
|
|
|
503
584
|
updatedAt: now
|
|
504
585
|
}).returning();
|
|
505
586
|
insertEmbedding(this.sqlite, id, input.embedding);
|
|
587
|
+
try {
|
|
588
|
+
insertFts(this.sqlite, { id, title: input.title, content: input.content, tags: ftsTags(input.tags) });
|
|
589
|
+
} catch {
|
|
590
|
+
}
|
|
506
591
|
return entry;
|
|
507
592
|
}
|
|
508
593
|
async findById(id) {
|
|
@@ -522,6 +607,17 @@ var KnowledgeRepository = class {
|
|
|
522
607
|
...values,
|
|
523
608
|
version: sql`${knowledgeEntries.version} + 1`
|
|
524
609
|
}).where(eq(knowledgeEntries.id, id)).returning();
|
|
610
|
+
if (entry) {
|
|
611
|
+
try {
|
|
612
|
+
updateFts(this.sqlite, {
|
|
613
|
+
id,
|
|
614
|
+
title: entry.title ?? "",
|
|
615
|
+
content: entry.content ?? "",
|
|
616
|
+
tags: ftsTags(entry.tags)
|
|
617
|
+
});
|
|
618
|
+
} catch {
|
|
619
|
+
}
|
|
620
|
+
}
|
|
525
621
|
if (embedding) {
|
|
526
622
|
updateEmbedding(this.sqlite, id, embedding);
|
|
527
623
|
}
|
|
@@ -531,6 +627,10 @@ var KnowledgeRepository = class {
|
|
|
531
627
|
const [entry] = await this.db.delete(knowledgeEntries).where(eq(knowledgeEntries.id, id)).returning();
|
|
532
628
|
if (entry) {
|
|
533
629
|
deleteEmbedding(this.sqlite, id);
|
|
630
|
+
try {
|
|
631
|
+
deleteFts(this.sqlite, id);
|
|
632
|
+
} catch {
|
|
633
|
+
}
|
|
534
634
|
}
|
|
535
635
|
return entry ?? null;
|
|
536
636
|
}
|
|
@@ -544,11 +644,20 @@ var KnowledgeRepository = class {
|
|
|
544
644
|
const threshold = options?.threshold ?? DEFAULT_SIMILARITY_THRESHOLD;
|
|
545
645
|
const candidateLimit = limit * 5;
|
|
546
646
|
const knnResults = searchKnn(this.sqlite, queryEmbedding, candidateLimit);
|
|
547
|
-
|
|
647
|
+
const distanceMap = new Map(knnResults.map((r) => [r.id, r.distance]));
|
|
648
|
+
let ftsResults = [];
|
|
649
|
+
if (options?.queryText) {
|
|
650
|
+
try {
|
|
651
|
+
ftsResults = searchFts(this.sqlite, options.queryText, candidateLimit);
|
|
652
|
+
} catch {
|
|
653
|
+
ftsResults = [];
|
|
654
|
+
}
|
|
655
|
+
}
|
|
656
|
+
const bm25Map = new Map(ftsResults.map((r) => [r.id, r.bm25]));
|
|
657
|
+
if (knnResults.length === 0 && ftsResults.length === 0) {
|
|
548
658
|
return [];
|
|
549
659
|
}
|
|
550
|
-
const candidateIds = knnResults.map((r) => r.id);
|
|
551
|
-
const distanceMap = new Map(knnResults.map((r) => [r.id, r.distance]));
|
|
660
|
+
const candidateIds = [.../* @__PURE__ */ new Set([...knnResults.map((r) => r.id), ...ftsResults.map((r) => r.id)])];
|
|
552
661
|
const conditions = [];
|
|
553
662
|
conditions.push(sql`${knowledgeEntries.id} IN (${sql.join(candidateIds.map((id) => sql`${id}`), sql`, `)})`);
|
|
554
663
|
if (options?.scope) {
|
|
@@ -565,18 +674,25 @@ var KnowledgeRepository = class {
|
|
|
565
674
|
conditions.push(or(isNull(knowledgeEntries.expiresAt), sql`${knowledgeEntries.expiresAt} > ${(/* @__PURE__ */ new Date()).toISOString()}`));
|
|
566
675
|
const whereClause = and(...conditions);
|
|
567
676
|
const entries = await this.db.select().from(knowledgeEntries).where(whereClause);
|
|
568
|
-
return entries.map((entry) =>
|
|
569
|
-
entry
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
677
|
+
return entries.map((entry) => {
|
|
678
|
+
const semantic = 1 - (distanceMap.get(entry.id) ?? 1);
|
|
679
|
+
const hasFts = bm25Map.has(entry.id);
|
|
680
|
+
const bm25Norm = hasFts ? 1 / (1 + Math.exp(bm25Map.get(entry.id))) : 0;
|
|
681
|
+
const combined = 0.7 * semantic + 0.3 * bm25Norm;
|
|
682
|
+
return { entry, similarity: combined, semantic, hasFts };
|
|
683
|
+
}).filter((r) => r.semantic >= threshold || r.hasFts).sort((a, b) => b.similarity - a.similarity).slice(0, limit).map(({ entry, similarity }) => ({ entry, similarity }));
|
|
684
|
+
}
|
|
685
|
+
async listRecent(limit = 20, filters, offset = 0) {
|
|
574
686
|
const conditions = [ne(knowledgeEntries.type, "system")];
|
|
575
687
|
if (filters?.type)
|
|
576
688
|
conditions.push(sql`${knowledgeEntries.type} = ${filters.type}`);
|
|
577
689
|
if (filters?.scope)
|
|
578
690
|
conditions.push(sql`${knowledgeEntries.scope} = ${filters.scope}`);
|
|
579
|
-
|
|
691
|
+
if (filters?.tags && filters.tags.length > 0) {
|
|
692
|
+
const tagConditions = filters.tags.map((tag) => sql`EXISTS (SELECT 1 FROM json_each(${knowledgeEntries.tags}) WHERE value = ${tag})`);
|
|
693
|
+
conditions.push(or(...tagConditions));
|
|
694
|
+
}
|
|
695
|
+
return this.db.select().from(knowledgeEntries).where(and(...conditions)).orderBy(sql`${knowledgeEntries.createdAt} DESC`).limit(limit).offset(offset);
|
|
580
696
|
}
|
|
581
697
|
async listTags(opts = {}) {
|
|
582
698
|
const { from, to } = opts;
|
|
@@ -603,6 +719,110 @@ var KnowledgeRepository = class {
|
|
|
603
719
|
const result = await this.db.all(sql`SELECT value as tag, COUNT(*) as count FROM knowledge_entries, json_each(knowledge_entries.tags) WHERE knowledge_entries.type != 'system' GROUP BY value ORDER BY count DESC LIMIT ${limit}`);
|
|
604
720
|
return result;
|
|
605
721
|
}
|
|
722
|
+
/**
|
|
723
|
+
* Rename/merge a tag across all entries. Per-row correlated rebuild of the JSON
|
|
724
|
+
* tags array: replace `from`→`to`, and `json_group_array(DISTINCT ...)` collapses
|
|
725
|
+
* a pre-existing `to`. The `WHERE EXISTS` guard limits both the updated_at bump
|
|
726
|
+
* and the affected-id list to rows that actually contain `from`. Returns the IDs
|
|
727
|
+
* of the rows that changed (callers re-embed + resync FTS for these).
|
|
728
|
+
*/
|
|
729
|
+
renameTag(from, to) {
|
|
730
|
+
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
731
|
+
const affected = this.sqlite.prepare(`SELECT id FROM knowledge_entries WHERE EXISTS (SELECT 1 FROM json_each(knowledge_entries.tags) WHERE value = ?)`).all(from);
|
|
732
|
+
if (affected.length === 0)
|
|
733
|
+
return [];
|
|
734
|
+
this.sqlite.prepare(`UPDATE knowledge_entries
|
|
735
|
+
SET tags = (
|
|
736
|
+
SELECT json_group_array(DISTINCT CASE WHEN value = @from THEN @to ELSE value END)
|
|
737
|
+
FROM json_each(knowledge_entries.tags)
|
|
738
|
+
),
|
|
739
|
+
updated_at = @now
|
|
740
|
+
WHERE EXISTS (SELECT 1 FROM json_each(knowledge_entries.tags) WHERE value = @from)`).run({ from, to, now });
|
|
741
|
+
return affected.map((r) => r.id);
|
|
742
|
+
}
|
|
743
|
+
/**
|
|
744
|
+
* Flag stale knowledge: not updated since `cutoff` (days), OR already expired,
|
|
745
|
+
* OR confidence below `minConfidence`. Lightweight metadata-only query (no new
|
|
746
|
+
* column / no per-entry access tracking).
|
|
747
|
+
*/
|
|
748
|
+
async findStaleEntries(opts = {}) {
|
|
749
|
+
const days = opts.days ?? 90;
|
|
750
|
+
const minConfidence = opts.minConfidence ?? 0.5;
|
|
751
|
+
const limit = opts.limit ?? 100;
|
|
752
|
+
const cutoff = new Date(Date.now() - days * 24 * 60 * 60 * 1e3).toISOString();
|
|
753
|
+
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
754
|
+
const rows = this.sqlite.prepare(`SELECT id, title, type, scope, confidence_score AS confidenceScore, updated_at AS updatedAt, expires_at AS expiresAt
|
|
755
|
+
FROM knowledge_entries
|
|
756
|
+
WHERE type != 'system'
|
|
757
|
+
AND (updated_at < @cutoff OR (expires_at IS NOT NULL AND expires_at < @now) OR confidence_score < @minConfidence)
|
|
758
|
+
ORDER BY updated_at ASC
|
|
759
|
+
LIMIT @limit`).all({ cutoff, now, minConfidence, limit });
|
|
760
|
+
return rows;
|
|
761
|
+
}
|
|
762
|
+
/**
|
|
763
|
+
* Find near-duplicate entry pairs via per-entry KNN (avoids an O(n²) full scan).
|
|
764
|
+
* The embedding MUST be read from the vec table (listAll() blanks it). Canonical
|
|
765
|
+
* `id < neighborId` filter dedups pairs and drops the self-match. Returns [] when
|
|
766
|
+
* there are no embeddings yet.
|
|
767
|
+
*/
|
|
768
|
+
async findDuplicatePairs(opts = {}) {
|
|
769
|
+
const threshold = opts.threshold ?? 0.9;
|
|
770
|
+
const limit = opts.limit ?? 100;
|
|
771
|
+
const entries = await this.listAll();
|
|
772
|
+
const meta = new Map(entries.map((e) => [e.id, e]));
|
|
773
|
+
const pairs = [];
|
|
774
|
+
for (const entry of entries) {
|
|
775
|
+
const emb = getEmbeddingById(this.sqlite, entry.id);
|
|
776
|
+
if (!emb)
|
|
777
|
+
continue;
|
|
778
|
+
const neighbors = searchKnn(this.sqlite, emb, 5);
|
|
779
|
+
for (const n of neighbors) {
|
|
780
|
+
if (entry.id >= n.id)
|
|
781
|
+
continue;
|
|
782
|
+
if (!meta.has(n.id))
|
|
783
|
+
continue;
|
|
784
|
+
const similarity = 1 - n.distance;
|
|
785
|
+
if (similarity >= threshold) {
|
|
786
|
+
const other = meta.get(n.id);
|
|
787
|
+
pairs.push({
|
|
788
|
+
a: { id: entry.id, title: entry.title },
|
|
789
|
+
b: { id: other.id, title: other.title },
|
|
790
|
+
similarity
|
|
791
|
+
});
|
|
792
|
+
}
|
|
793
|
+
}
|
|
794
|
+
if (pairs.length >= limit)
|
|
795
|
+
break;
|
|
796
|
+
}
|
|
797
|
+
return pairs.sort((x, y) => y.similarity - x.similarity).slice(0, limit);
|
|
798
|
+
}
|
|
799
|
+
/**
|
|
800
|
+
* (Re)populate the FTS5 index from knowledge_entries. Used at startup when the
|
|
801
|
+
* index is empty but entries exist (e.g. after the FTS migration on an existing
|
|
802
|
+
* DB). Best-effort and idempotent (clears then re-inserts).
|
|
803
|
+
*/
|
|
804
|
+
backfillFts() {
|
|
805
|
+
const rows = this.sqlite.prepare(`SELECT id, title, content, tags FROM knowledge_entries WHERE type != 'system'`).all();
|
|
806
|
+
let count = 0;
|
|
807
|
+
for (const r of rows) {
|
|
808
|
+
try {
|
|
809
|
+
insertFts(this.sqlite, { id: r.id, title: r.title ?? "", content: r.content ?? "", tags: ftsTags(r.tags) });
|
|
810
|
+
count++;
|
|
811
|
+
} catch {
|
|
812
|
+
}
|
|
813
|
+
}
|
|
814
|
+
return count;
|
|
815
|
+
}
|
|
816
|
+
/** Backfill the FTS index only when it's empty but entries exist (startup path). */
|
|
817
|
+
backfillFtsIfNeeded() {
|
|
818
|
+
try {
|
|
819
|
+
if (ftsCount(this.sqlite) > 0)
|
|
820
|
+
return 0;
|
|
821
|
+
return this.backfillFts();
|
|
822
|
+
} catch {
|
|
823
|
+
return 0;
|
|
824
|
+
}
|
|
825
|
+
}
|
|
606
826
|
async count() {
|
|
607
827
|
const [result] = await this.db.select({ count: sql`count(*)` }).from(knowledgeEntries).where(ne(knowledgeEntries.type, "system"));
|
|
608
828
|
return Number(result.count);
|
|
@@ -690,7 +910,7 @@ var KnowledgeRepository = class {
|
|
|
690
910
|
listAllPlans() {
|
|
691
911
|
return this.sqlite.prepare("SELECT * FROM plans ORDER BY created_at DESC").all();
|
|
692
912
|
}
|
|
693
|
-
listPlans(limit = 20, status, scope) {
|
|
913
|
+
listPlans(limit = 20, status, scope, offset = 0) {
|
|
694
914
|
const conditions = [];
|
|
695
915
|
const params = [];
|
|
696
916
|
if (status) {
|
|
@@ -702,8 +922,8 @@ var KnowledgeRepository = class {
|
|
|
702
922
|
params.push(scope);
|
|
703
923
|
}
|
|
704
924
|
const where = conditions.length ? "WHERE " + conditions.join(" AND ") : "";
|
|
705
|
-
params.push(limit);
|
|
706
|
-
return this.sqlite.prepare(`SELECT * FROM plans ${where} ORDER BY created_at DESC LIMIT ?`).all(...params);
|
|
925
|
+
params.push(limit, offset);
|
|
926
|
+
return this.sqlite.prepare(`SELECT * FROM plans ${where} ORDER BY created_at DESC LIMIT ? OFFSET ?`).all(...params);
|
|
707
927
|
}
|
|
708
928
|
findSimilarActivePlans(embedding, scope, threshold = 0.5) {
|
|
709
929
|
try {
|
|
@@ -945,6 +1165,17 @@ var KnowledgeRepository = class {
|
|
|
945
1165
|
return removed;
|
|
946
1166
|
}
|
|
947
1167
|
};
|
|
1168
|
+
function ftsTags(tags) {
|
|
1169
|
+
let arr = tags;
|
|
1170
|
+
if (typeof tags === "string") {
|
|
1171
|
+
try {
|
|
1172
|
+
arr = JSON.parse(tags);
|
|
1173
|
+
} catch {
|
|
1174
|
+
return tags;
|
|
1175
|
+
}
|
|
1176
|
+
}
|
|
1177
|
+
return Array.isArray(arr) ? arr.filter(Boolean).join(" ") : "";
|
|
1178
|
+
}
|
|
948
1179
|
function cosineSimilarity(a, b) {
|
|
949
1180
|
let dot = 0, magA = 0, magB = 0;
|
|
950
1181
|
for (let i = 0; i < a.length; i++) {
|
|
@@ -957,6 +1188,25 @@ function cosineSimilarity(a, b) {
|
|
|
957
1188
|
}
|
|
958
1189
|
|
|
959
1190
|
// ../../packages/core/dist/services/knowledge.service.js
|
|
1191
|
+
function levenshtein(a, b) {
|
|
1192
|
+
if (a === b)
|
|
1193
|
+
return 0;
|
|
1194
|
+
if (!a.length)
|
|
1195
|
+
return b.length;
|
|
1196
|
+
if (!b.length)
|
|
1197
|
+
return a.length;
|
|
1198
|
+
let prev = Array.from({ length: b.length + 1 }, (_, i) => i);
|
|
1199
|
+
let curr = new Array(b.length + 1);
|
|
1200
|
+
for (let i = 1; i <= a.length; i++) {
|
|
1201
|
+
curr[0] = i;
|
|
1202
|
+
for (let j = 1; j <= b.length; j++) {
|
|
1203
|
+
const cost = a[i - 1] === b[j - 1] ? 0 : 1;
|
|
1204
|
+
curr[j] = Math.min(curr[j - 1] + 1, prev[j] + 1, prev[j - 1] + cost);
|
|
1205
|
+
}
|
|
1206
|
+
[prev, curr] = [curr, prev];
|
|
1207
|
+
}
|
|
1208
|
+
return prev[b.length];
|
|
1209
|
+
}
|
|
960
1210
|
var KnowledgeService = class {
|
|
961
1211
|
repository;
|
|
962
1212
|
embeddingProvider;
|
|
@@ -977,8 +1227,26 @@ var KnowledgeService = class {
|
|
|
977
1227
|
buildEmbeddingText(title, content, tags) {
|
|
978
1228
|
return `${title} ${content} ${tags.join(" ")}`;
|
|
979
1229
|
}
|
|
1230
|
+
/**
|
|
1231
|
+
* Conservative tag normalization: trim + lowercase + dedup (order-preserving).
|
|
1232
|
+
* Deliberately does NOT rewrite tokens (e.g. nest.js→nestjs) — that's left to
|
|
1233
|
+
* the explicit merge flow so meaning is never silently changed.
|
|
1234
|
+
*/
|
|
1235
|
+
normalizeTags(tags = []) {
|
|
1236
|
+
const seen = /* @__PURE__ */ new Set();
|
|
1237
|
+
const out = [];
|
|
1238
|
+
for (const raw of tags) {
|
|
1239
|
+
const t = (raw ?? "").trim().toLowerCase();
|
|
1240
|
+
if (t && !seen.has(t)) {
|
|
1241
|
+
seen.add(t);
|
|
1242
|
+
out.push(t);
|
|
1243
|
+
}
|
|
1244
|
+
}
|
|
1245
|
+
return out;
|
|
1246
|
+
}
|
|
980
1247
|
async add(input) {
|
|
981
1248
|
const { skipDedup, ...rest } = input;
|
|
1249
|
+
rest.tags = this.normalizeTags(rest.tags);
|
|
982
1250
|
const embeddingText = this.buildEmbeddingText(rest.title, rest.content, rest.tags);
|
|
983
1251
|
const embedding = await this.embeddingProvider.embed(embeddingText);
|
|
984
1252
|
if (!skipDedup) {
|
|
@@ -1004,7 +1272,7 @@ var KnowledgeService = class {
|
|
|
1004
1272
|
}
|
|
1005
1273
|
async search(query, options) {
|
|
1006
1274
|
const queryEmbedding = await this.embeddingProvider.embed(query);
|
|
1007
|
-
const results = await this.repository.searchBySimilarity(queryEmbedding, options);
|
|
1275
|
+
const results = await this.repository.searchBySimilarity(queryEmbedding, { ...options, queryText: query });
|
|
1008
1276
|
this.logOp("read", results.length);
|
|
1009
1277
|
const direct = results.map((r) => ({
|
|
1010
1278
|
entry: this.toKnowledgeEntry(r.entry),
|
|
@@ -1073,6 +1341,8 @@ var KnowledgeService = class {
|
|
|
1073
1341
|
return entry ? this.toKnowledgeEntry(entry) : null;
|
|
1074
1342
|
}
|
|
1075
1343
|
async update(id, updates) {
|
|
1344
|
+
if (updates.tags)
|
|
1345
|
+
updates.tags = this.normalizeTags(updates.tags);
|
|
1076
1346
|
let embedding;
|
|
1077
1347
|
if (updates.content || updates.title || updates.tags) {
|
|
1078
1348
|
const current = await this.repository.findById(id);
|
|
@@ -1195,8 +1465,8 @@ var KnowledgeService = class {
|
|
|
1195
1465
|
}
|
|
1196
1466
|
return hash.toString(36);
|
|
1197
1467
|
}
|
|
1198
|
-
async listRecent(limit = 20, filters) {
|
|
1199
|
-
const entries = await this.repository.listRecent(limit, filters);
|
|
1468
|
+
async listRecent(limit = 20, filters, offset = 0) {
|
|
1469
|
+
const entries = await this.repository.listRecent(limit, filters, offset);
|
|
1200
1470
|
return entries.map((e) => this.toKnowledgeEntry(e));
|
|
1201
1471
|
}
|
|
1202
1472
|
async topTags(limit = 10, opts = {}) {
|
|
@@ -1205,6 +1475,74 @@ var KnowledgeService = class {
|
|
|
1205
1475
|
async listTags(opts = {}) {
|
|
1206
1476
|
return this.repository.listTags(opts);
|
|
1207
1477
|
}
|
|
1478
|
+
/**
|
|
1479
|
+
* Suggest near-duplicate tag pairs to merge (e.g. nest.js ↔ nestjs, redis ↔ Redis).
|
|
1480
|
+
* Compares lowercased forms via max(normalized Levenshtein, token-set Jaccard).
|
|
1481
|
+
* O(n²) over the small DISTINCT tag set.
|
|
1482
|
+
*/
|
|
1483
|
+
async suggestTagMerges(threshold = 0.82) {
|
|
1484
|
+
const tags = await this.repository.listTags();
|
|
1485
|
+
const out = [];
|
|
1486
|
+
for (let i = 0; i < tags.length; i++) {
|
|
1487
|
+
for (let j = i + 1; j < tags.length; j++) {
|
|
1488
|
+
const sim = this.tagSimilarity(tags[i], tags[j]);
|
|
1489
|
+
if (sim >= threshold)
|
|
1490
|
+
out.push({ a: tags[i], b: tags[j], similarity: Math.round(sim * 100) / 100 });
|
|
1491
|
+
}
|
|
1492
|
+
}
|
|
1493
|
+
return out.sort((x, y) => y.similarity - x.similarity);
|
|
1494
|
+
}
|
|
1495
|
+
tagSimilarity(a, b) {
|
|
1496
|
+
const la = a.trim().toLowerCase();
|
|
1497
|
+
const lb = b.trim().toLowerCase();
|
|
1498
|
+
if (!la || !lb)
|
|
1499
|
+
return 0;
|
|
1500
|
+
if (la === lb)
|
|
1501
|
+
return 1;
|
|
1502
|
+
const lev = 1 - levenshtein(la, lb) / Math.max(la.length, lb.length);
|
|
1503
|
+
const ta = new Set(la.split(/[^a-z0-9]+/).filter(Boolean));
|
|
1504
|
+
const tb = new Set(lb.split(/[^a-z0-9]+/).filter(Boolean));
|
|
1505
|
+
let inter = 0;
|
|
1506
|
+
for (const t of ta)
|
|
1507
|
+
if (tb.has(t))
|
|
1508
|
+
inter++;
|
|
1509
|
+
const union = (/* @__PURE__ */ new Set([...ta, ...tb])).size;
|
|
1510
|
+
const jaccard = union ? inter / union : 0;
|
|
1511
|
+
return Math.max(lev, jaccard);
|
|
1512
|
+
}
|
|
1513
|
+
/**
|
|
1514
|
+
* Merge tag `from` into `to` across all entries, then re-embed + resync FTS for
|
|
1515
|
+
* affected rows. Re-embeds via repository.update (the UPDATE path), NOT the
|
|
1516
|
+
* insert path — and update() also resyncs the FTS row.
|
|
1517
|
+
*/
|
|
1518
|
+
async mergeTag(from, to) {
|
|
1519
|
+
const f = (from ?? "").trim();
|
|
1520
|
+
const t = (to ?? "").trim();
|
|
1521
|
+
if (!f || !t || f === t)
|
|
1522
|
+
return { merged: 0 };
|
|
1523
|
+
const affected = this.repository.renameTag(f, t);
|
|
1524
|
+
for (const id of affected) {
|
|
1525
|
+
const entry = await this.repository.findById(id);
|
|
1526
|
+
if (!entry)
|
|
1527
|
+
continue;
|
|
1528
|
+
const tags = Array.isArray(entry.tags) ? entry.tags : JSON.parse(entry.tags ?? "[]");
|
|
1529
|
+
const embedding = await this.embeddingProvider.embed(this.buildEmbeddingText(entry.title, entry.content, tags));
|
|
1530
|
+
await this.repository.update(id, { embedding });
|
|
1531
|
+
}
|
|
1532
|
+
if (affected.length)
|
|
1533
|
+
this.logOp("write", affected.length);
|
|
1534
|
+
return { merged: affected.length };
|
|
1535
|
+
}
|
|
1536
|
+
async findStaleEntries(opts = {}) {
|
|
1537
|
+
return this.repository.findStaleEntries(opts);
|
|
1538
|
+
}
|
|
1539
|
+
async findDuplicatePairs(opts = {}) {
|
|
1540
|
+
return this.repository.findDuplicatePairs(opts);
|
|
1541
|
+
}
|
|
1542
|
+
/** (Re)populate the FTS5 index if it's empty but entries exist. Returns rows indexed. */
|
|
1543
|
+
backfillFtsIfNeeded() {
|
|
1544
|
+
return this.repository.backfillFtsIfNeeded();
|
|
1545
|
+
}
|
|
1208
1546
|
async getStats() {
|
|
1209
1547
|
const [count, byType, byScope, lastUpdatedAt] = await Promise.all([
|
|
1210
1548
|
this.repository.count(),
|
|
@@ -1323,8 +1661,8 @@ var KnowledgeService = class {
|
|
|
1323
1661
|
const rows = this.repository.listAllPlans();
|
|
1324
1662
|
return rows.map((r) => this.toPlan(r));
|
|
1325
1663
|
}
|
|
1326
|
-
listPlans(limit = 20, status, scope) {
|
|
1327
|
-
const rows = this.repository.listPlans(limit, status, scope);
|
|
1664
|
+
listPlans(limit = 20, status, scope, offset = 0) {
|
|
1665
|
+
const rows = this.repository.listPlans(limit, status, scope, offset);
|
|
1328
1666
|
return rows.map((r) => this.toPlan(r));
|
|
1329
1667
|
}
|
|
1330
1668
|
archiveStaleDrafts(maxAgeHours = 24) {
|
|
@@ -2932,10 +3270,10 @@ var KnowledgeSDK = class {
|
|
|
2932
3270
|
throw this.wrapError(error, "Failed to delete knowledge");
|
|
2933
3271
|
}
|
|
2934
3272
|
}
|
|
2935
|
-
async listRecent(limit = 20, filters) {
|
|
3273
|
+
async listRecent(limit = 20, filters, offset = 0) {
|
|
2936
3274
|
this.ensureInitialized();
|
|
2937
3275
|
try {
|
|
2938
|
-
return await this.service.listRecent(limit, filters);
|
|
3276
|
+
return await this.service.listRecent(limit, filters, offset);
|
|
2939
3277
|
} catch (error) {
|
|
2940
3278
|
throw this.wrapError(error, "Failed to list recent knowledge");
|
|
2941
3279
|
}
|
|
@@ -2956,6 +3294,38 @@ var KnowledgeSDK = class {
|
|
|
2956
3294
|
throw this.wrapError(error, "Failed to list tags");
|
|
2957
3295
|
}
|
|
2958
3296
|
}
|
|
3297
|
+
async suggestTagMerges(threshold = 0.82) {
|
|
3298
|
+
this.ensureInitialized();
|
|
3299
|
+
try {
|
|
3300
|
+
return await this.service.suggestTagMerges(threshold);
|
|
3301
|
+
} catch (error) {
|
|
3302
|
+
throw this.wrapError(error, "Failed to suggest tag merges");
|
|
3303
|
+
}
|
|
3304
|
+
}
|
|
3305
|
+
async mergeTags(from, to) {
|
|
3306
|
+
this.ensureInitialized();
|
|
3307
|
+
try {
|
|
3308
|
+
return await this.service.mergeTag(from, to);
|
|
3309
|
+
} catch (error) {
|
|
3310
|
+
throw this.wrapError(error, "Failed to merge tags");
|
|
3311
|
+
}
|
|
3312
|
+
}
|
|
3313
|
+
async findStaleEntries(opts = {}) {
|
|
3314
|
+
this.ensureInitialized();
|
|
3315
|
+
try {
|
|
3316
|
+
return await this.service.findStaleEntries(opts);
|
|
3317
|
+
} catch (error) {
|
|
3318
|
+
throw this.wrapError(error, "Failed to find stale entries");
|
|
3319
|
+
}
|
|
3320
|
+
}
|
|
3321
|
+
async findDuplicatePairs(opts = {}) {
|
|
3322
|
+
this.ensureInitialized();
|
|
3323
|
+
try {
|
|
3324
|
+
return await this.service.findDuplicatePairs(opts);
|
|
3325
|
+
} catch (error) {
|
|
3326
|
+
throw this.wrapError(error, "Failed to find duplicate pairs");
|
|
3327
|
+
}
|
|
3328
|
+
}
|
|
2959
3329
|
async countByType(opts = {}) {
|
|
2960
3330
|
this.ensureInitialized();
|
|
2961
3331
|
try {
|
|
@@ -3059,9 +3429,9 @@ var KnowledgeSDK = class {
|
|
|
3059
3429
|
this.ensureInitialized();
|
|
3060
3430
|
return this.service.deletePlan(id);
|
|
3061
3431
|
}
|
|
3062
|
-
listPlans(limit = 20, status, scope) {
|
|
3432
|
+
listPlans(limit = 20, status, scope, offset = 0) {
|
|
3063
3433
|
this.ensureInitialized();
|
|
3064
|
-
return this.service.listPlans(limit, status, scope);
|
|
3434
|
+
return this.service.listPlans(limit, status, scope, offset);
|
|
3065
3435
|
}
|
|
3066
3436
|
async addPlanRelation(planId, knowledgeId, relationType) {
|
|
3067
3437
|
this.ensureInitialized();
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@cognistore/mcp-server",
|
|
3
|
-
"version": "2.0
|
|
3
|
+
"version": "2.1.0",
|
|
4
4
|
"private": false,
|
|
5
5
|
"type": "module",
|
|
6
6
|
"description": "MCP server for CogniStore — integrates with Claude Code and GitHub Copilot",
|
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
},
|
|
13
13
|
"main": "./dist/index.js",
|
|
14
14
|
"bin": {
|
|
15
|
-
"cognistore-mcp": "
|
|
15
|
+
"cognistore-mcp": "dist/index.js"
|
|
16
16
|
},
|
|
17
17
|
"files": [
|
|
18
18
|
"dist"
|