@sfdxy/sf-documentation-knowledge 1.1.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,161 @@
1
+ /**
2
+ * Retrieval Quality Tests — validate that known-good queries
3
+ * return expected documents in the top results.
4
+ *
5
+ * These tests load the real graph.json and exercise the full
6
+ * search pipeline (stemming, trigram matching, TF-IDF scoring).
7
+ *
8
+ * Run: npm test
9
+ */
10
+ import { describe, it, expect, beforeAll } from "vitest";
11
+ import { GraphQuery, stem } from "../utils/graph-query.js";
12
+ import { CodeIndex } from "../mcp/code-index.js";
13
+ const KNOWLEDGE_DIR = "knowledge/current";
14
+ // ─── Unit tests for the stemmer ─────────────────────────────────
15
+ describe("stem()", () => {
16
+ it("strips -ing suffix", () => {
17
+ expect(stem("configuring")).toBe("configur");
18
+ });
19
+ it("strips -tion suffix", () => {
20
+ expect(stem("configuration")).toBe("configurat");
21
+ });
22
+ it("strips -ed suffix", () => {
23
+ expect(stem("queried")).toBe("query");
24
+ });
25
+ it("strips -es suffix", () => {
26
+ expect(stem("queries")).toBe("query");
27
+ });
28
+ it("strips -s suffix", () => {
29
+ expect(stem("objects")).toBe("object");
30
+ });
31
+ it("preserves short words", () => {
32
+ expect(stem("api")).toBe("api");
33
+ expect(stem("dml")).toBe("dml");
34
+ });
35
+ it("strips -ment suffix", () => {
36
+ expect(stem("deployment")).toBe("deploy");
37
+ });
38
+ it("strips -able suffix", () => {
39
+ expect(stem("batchable")).toBe("batch");
40
+ });
41
+ });
42
+ // ─── Integration tests: graph search retrieval quality ──────────
43
+ describe("GraphQuery retrieval quality", () => {
44
+ let gq;
45
+ beforeAll(async () => {
46
+ gq = new GraphQuery(KNOWLEDGE_DIR);
47
+ await gq.load();
48
+ }, 60_000); // 60s timeout for loading 77MB graph
49
+ /**
50
+ * Test helper: asserts that at least one result's nodeId
51
+ * matches the provided pattern.
52
+ */
53
+ function expectResultMatching(results, pattern, query) {
54
+ const match = results.find((r) => pattern.test(r.nodeId));
55
+ expect(match, `Expected query "${query}" to return a result matching ${pattern}. Got: ${results.slice(0, 5).map((r) => r.nodeId).join(", ")}`).toBeDefined();
56
+ }
57
+ it("finds Batch Apex documentation", () => {
58
+ const results = gq.searchNodes("batch apex", { type: "document", limit: 10 });
59
+ expect(results.length).toBeGreaterThan(0);
60
+ expectResultMatching(results, /doc:apex-guide:.*batch/i, "batch apex");
61
+ });
62
+ it("finds SOQL documentation", () => {
63
+ const results = gq.searchNodes("SOQL queries", { type: "document", limit: 10 });
64
+ expect(results.length).toBeGreaterThan(0);
65
+ expectResultMatching(results, /doc:(soql-sosl|apex-guide):/, "SOQL queries");
66
+ });
67
+ it("finds REST API docs", () => {
68
+ const results = gq.searchNodes("REST API sobject describe", { type: "document", limit: 15, domain: "rest-api" });
69
+ expect(results.length).toBeGreaterThan(0);
70
+ // All results must be from rest-api since we pre-filter
71
+ for (const r of results) {
72
+ expect(r.nodeId).toMatch(/^doc:rest-api:/);
73
+ }
74
+ });
75
+ it("finds Platform Events docs", () => {
76
+ const results = gq.searchNodes("platform events publish", { type: "document", limit: 10 });
77
+ expect(results.length).toBeGreaterThan(0);
78
+ expectResultMatching(results, /doc:platform-events:/, "platform events publish");
79
+ });
80
+ it("domain pre-filter works", () => {
81
+ const results = gq.searchNodes("trigger", { type: "document", limit: 10, domain: "apex-guide" });
82
+ expect(results.length).toBeGreaterThan(0);
83
+ // All results must be in the apex-guide domain
84
+ for (const r of results) {
85
+ expect(r.nodeId).toMatch(/^doc:apex-guide:/);
86
+ }
87
+ });
88
+ it("finds LWC documentation", () => {
89
+ const results = gq.searchNodes("lightning web components", { type: "document", limit: 10 });
90
+ expect(results.length).toBeGreaterThan(0);
91
+ expectResultMatching(results, /doc:lwc:/, "lightning web components");
92
+ });
93
+ it("finds Metadata API docs", () => {
94
+ const results = gq.searchNodes("metadata api deploy", { type: "document", limit: 10 });
95
+ expect(results.length).toBeGreaterThan(0);
96
+ expectResultMatching(results, /doc:metadata-api:/, "metadata api deploy");
97
+ });
98
+ it("keyword search works", () => {
99
+ const results = gq.findDocsByKeyword("Apex", 10);
100
+ expect(results.length).toBeGreaterThan(0);
101
+ });
102
+ it("returns empty array for nonsense query", () => {
103
+ const results = gq.searchNodes("xyzzy_foobar_gibberish", { type: "document", limit: 5 });
104
+ expect(results.length).toBe(0);
105
+ });
106
+ it("stemming improves recall: 'configuring' matches 'configuration' docs", () => {
107
+ const stemmedResults = gq.searchNodes("configuring triggers", { type: "document", limit: 15 });
108
+ // Should find trigger-related docs even though query used "configuring" not exact match
109
+ expect(stemmedResults.length).toBeGreaterThan(0);
110
+ });
111
+ it("graph context returns valid doc context", () => {
112
+ // Find a real doc nodeId first
113
+ const results = gq.searchNodes("batch apex", { type: "document", limit: 1 });
114
+ expect(results.length).toBeGreaterThan(0);
115
+ const ctx = gq.getDocContext(results[0].nodeId);
116
+ expect(ctx).not.toBeNull();
117
+ expect(ctx.label).toBeTruthy();
118
+ expect(ctx.keywords).toBeInstanceOf(Array);
119
+ });
120
+ it("namespace listing returns Apex namespaces", () => {
121
+ const ns = gq.listNamespaces();
122
+ expect(ns.length).toBeGreaterThan(0);
123
+ const systemNs = ns.find((n) => n.namespace === "System");
124
+ expect(systemNs).toBeDefined();
125
+ });
126
+ it("domain listing returns 100+ domains", () => {
127
+ const domains = gq.listDomains();
128
+ expect(domains.length).toBeGreaterThanOrEqual(100);
129
+ });
130
+ });
131
+ // ─── CodeIndex retrieval quality ────────────────────────────────
132
+ describe("CodeIndex retrieval quality", () => {
133
+ let codeIndex;
134
+ beforeAll(async () => {
135
+ codeIndex = new CodeIndex(KNOWLEDGE_DIR);
136
+ await codeIndex.load();
137
+ }, 120_000); // 2 min — reads all 33k files
138
+ it("finds Apex code examples", () => {
139
+ const snippets = codeIndex.search("batch apex", { limit: 5 });
140
+ expect(snippets.length).toBeGreaterThan(0);
141
+ expect(snippets[0].code.length).toBeGreaterThan(10);
142
+ });
143
+ it("language filter works", () => {
144
+ const snippets = codeIndex.search("query", { language: "soql", limit: 5 });
145
+ for (const s of snippets) {
146
+ expect(s.language).toBe("soql");
147
+ }
148
+ });
149
+ it("domain filter works", () => {
150
+ const snippets = codeIndex.search("apex", { domain: "apex-guide", limit: 5 });
151
+ for (const s of snippets) {
152
+ expect(s.domain).toBe("apex-guide");
153
+ }
154
+ });
155
+ it("returns stats with language breakdown", () => {
156
+ const stats = codeIndex.getStats();
157
+ expect(stats.totalSnippets).toBeGreaterThan(100);
158
+ expect(Object.keys(stats.languages).length).toBeGreaterThanOrEqual(2);
159
+ });
160
+ });
161
+ //# sourceMappingURL=retrieval-quality.test.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"retrieval-quality.test.js","sourceRoot":"","sources":["../../src/tests/retrieval-quality.test.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AACH,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAC;AACzD,OAAO,EAAE,UAAU,EAAE,IAAI,EAAE,MAAM,yBAAyB,CAAC;AAC3D,OAAO,EAAE,SAAS,EAAE,MAAM,sBAAsB,CAAC;AAEjD,MAAM,aAAa,GAAG,mBAAmB,CAAC;AAE1C,mEAAmE;AACnE,QAAQ,CAAC,QAAQ,EAAE,GAAG,EAAE;IACtB,EAAE,CAAC,oBAAoB,EAAE,GAAG,EAAE;QAC5B,MAAM,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;IAC/C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,qBAAqB,EAAE,GAAG,EAAE;QAC7B,MAAM,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;IACnD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,mBAAmB,EAAE,GAAG,EAAE;QAC3B,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IACxC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,mBAAmB,EAAE,GAAG,EAAE;QAC3B,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IACxC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,kBAAkB,EAAE,GAAG,EAAE;QAC1B,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IACzC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,uBAAuB,EAAE,GAAG,EAAE;QAC/B,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAChC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAClC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,qBAAqB,EAAE,GAAG,EAAE;QAC7B,MAAM,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IAC5C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,qBAAqB,EAAE,GAAG,EAAE;QAC7B,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IAC1C,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,mEAAmE;AACnE,QAAQ,CAAC,8BAA8B,EAAE,GAAG,EAAE;IAC5C,IAAI,EAAc,CAAC;IAEnB,SAAS,CAAC,KAAK,IAAI,EAAE;QACnB,EAAE,GAAG,IAAI,UAAU,CAAC,aAAa,CAAC,CAAC;QACnC,MAAM,EAAE,CAAC,IAAI,EAAE,CAAC;IAClB,CAAC,EAAE,MAAM,CAAC,CAAC,CAAC,qCAAqC;IAEjD;;;OAGG;IACH,SAAS,oBAAoB,CAC3B,OAAkC,EAClC,OAAe,EACf,KAAa;QAEb,MAAM,KAAK,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC;QAC1D,MAAM,CACJ,KAAK,EACL,mBAAmB,KAAK,iCAAiC,OAAO,UAAU,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAChI,CAAC,WAAW,EAAE,CAAC;IAClB,CAAC;IAED,EAAE,CAAC,gCAAgC,EAAE,GAAG,EAAE;QACxC,MAAM,OAAO,GAAG,EAAE,CAAC,WAAW,CAAC,YAAY,EAAE,EAAE,IAAI,EAAE,UAAU,EAAE,KAAK,EAAE,EAAE,EAAE,CAAC,CAAC;QAC9E,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;QAC1C,oBAAoB,CAAC,OAAO,EAAE,yBAAyB,EAAE,YAAY,CAAC,CAAC;IACzE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,0BAA0B,EAAE,GAAG,EAAE;QAClC,MAAM,OAAO,GAAG,EAAE,CAAC,WAAW,CAAC,cAAc,EAAE,EAAE,IAAI,EAAE,UAAU,EAAE,KAAK,EAAE,EAAE,EAAE,CAAC,CAAC;QAChF,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;QAC1C,oBAAoB,CAAC,OAAO,EAAE,6BAA6B,EAAE,cAAc,CAAC,CAAC;IAC/E,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,qBAAqB,EAAE,GAAG,EAAE;QAC7B,MAAM,OAAO,GAAG,EAAE,CAAC,WAAW,CAAC,2BAA2B,EAAE,EAAE,IAAI,EAAE,UAAU,EAAE,KAAK,EAAE,EAAE,EAAE,MAAM,EAAE,UAAU,EAAE,CAAC,CAAC;QACjH,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;QAC1C,wDAAwD;QACxD,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;YACxB,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,gBAAgB,CAAC,CAAC;QAC7C,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,4BAA4B,EAAE,GAAG,EAAE;QACpC,MAAM,OAAO,GAAG,EAAE,CAAC,WAAW,CAAC,yBAAyB,EAAE,EAAE,IAAI,EAAE,UAAU,EAAE,KAAK,EAAE,EAAE,EAAE,CAAC,CAAC;QAC3F,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;QAC1C,oBAAoB,CAAC,OAAO,EAAE,sBAAsB,EAAE,yBAAyB,CAAC,CAAC;IACnF,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,yBAAyB,EAAE,GAAG,EAAE;QACjC,MAAM,OAAO,GAAG,EAAE,CAAC,WAAW,CAAC,SAAS,EAAE,EAAE,IAAI,EAAE,UAAU,EAAE,KAAK,EAAE,EAAE,EAAE,MAAM,EAAE,YAAY,EAAE,CAAC,CAAC;QACjG,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;QAC1C,+CAA+C;QAC/C,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;YACxB,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,kBAAkB,CAAC,CAAC;QAC/C,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,yBAAyB,EAAE,GAAG,EAAE;QACjC,MAAM,OAAO,GAAG,EAAE,CAAC,WAAW,CAAC,0BAA0B,EAAE,EAAE,IAAI,EAAE,UAAU,EAAE,KAAK,EAAE,EAAE,EAAE,CAAC,CAAC;QAC5F,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;QAC1C,oBAAoB,CAAC,OAAO,EAAE,UAAU,EAAE,0BAA0B,CAAC,CAAC;IACxE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,yBAAyB,EAAE,GAAG,EAAE;QACjC,MAAM,OAAO,GAAG,EAAE,CAAC,WAAW,CAAC,qBAAqB,EAAE,EAAE,IAAI,EAAE,UAAU,EAAE,KAAK,EAAE,EAAE,EAAE,CAAC,CAAC;QACvF,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;QAC1C,oBAAoB,CAAC,OAAO,EAAE,mBAAmB,EAAE,qBAAqB,CAAC,CAAC;IAC5E,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,sBAAsB,EAAE,GAAG,EAAE;QAC9B,MAAM,OAAO,GAAG,EAAE,CAAC,iBAAiB,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;QACjD,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;IAC5C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,wCAAwC,EAAE,GAAG,EAAE;QAChD,MAAM,OAAO,GAAG,EAAE,CAAC,WAAW,CAAC,wBAAwB,EAAE,EAAE,IAAI,EAAE,UAAU,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC,CAAC;QACzF,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACjC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,sEAAsE,EAAE,GAAG,EAAE;QAC9E,MAAM,cAAc,GAAG,EAAE,CAAC,WAAW,CAAC,sBAAsB,EAAE,EAAE,IAAI,EAAE,UAAU,EAAE,KAAK,EAAE,EAAE,EAAE,CAAC,CAAC;QAC/F,wFAAwF;QACxF,MAAM,CAAC,cAAc,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;IACnD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,yCAAyC,EAAE,GAAG,EAAE;QACjD,+BAA+B;QAC/B,MAAM,OAAO,GAAG,EAAE,CAAC,WAAW,CAAC,YAAY,EAAE,EAAE,IAAI,EAAE,UAAU,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC,CAAC;QAC7E,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;QAC1C,MAAM,GAAG,GAAG,EAAE,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;QAChD,MAAM,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC;QAC3B,MAAM,CAAC,GAAI,CAAC,KAAK,CAAC,CAAC,UAAU,EAAE,CAAC;QAChC,MAAM,CAAC,GAAI,CAAC,QAAQ,CAAC,CAAC,cAAc,CAAC,KAAK,CAAC,CAAC;IAC9C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,2CAA2C,EAAE,GAAG,EAAE;QACnD,MAAM,EAAE,GAAG,EAAE,CAAC,cAAc,EAAE,CAAC;QAC/B,MAAM,CAAC,EAAE,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;QACrC,MAAM,QAAQ,GAAG,EAAE,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,KAAK,QAAQ,CAAC,CAAC;QAC1D,MAAM,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC;IACjC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,qCAAqC,EAAE,GAAG,EAAE;QAC7C,MAAM,OAAO,GAAG,EAAE,CAAC,WAAW,EAAE,CAAC;QACjC,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,sBAAsB,CAAC,GAAG,CAAC,CAAC;IACrD,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,mEAAmE;AACnE,QAAQ,CAAC,6BAA6B,EAAE,GAAG,EAAE;IAC3C,IAAI,SAAoB,CAAC;IAEzB,SAAS,CAAC,KAAK,IAAI,EAAE;QACnB,SAAS,GAAG,IAAI,SAAS,CAAC,aAAa,CAAC,CAAC;QACzC,MAAM,SAAS,CAAC,IAAI,EAAE,CAAC;IACzB,CAAC,EAAE,OAAO,CAAC,CAAC,CAAC,8BAA8B;IAE3C,EAAE,CAAC,0BAA0B,EAAE,GAAG,EAAE;QAClC,MAAM,QAAQ,GAAG,SAAS,CAAC,MAAM,CAAC,YAAY,EAAE,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC,CAAC;QAC9D,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;QAC3C,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,EAAE,CAAC,CAAC;IACtD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,uBAAuB,EAAE,GAAG,EAAE;QAC/B,MAAM,QAAQ,GAAG,SAAS,CAAC,MAAM,CAAC,OAAO,EAAE,EAAE,QAAQ,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC,CAAC;QAC3E,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;YACzB,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAClC,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,qBAAqB,EAAE,GAAG,EAAE;QAC7B,MAAM,QAAQ,GAAG,SAAS,CAAC,MAAM,CAAC,MAAM,EAAE,EAAE,MAAM,EAAE,YAAY,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC,CAAC;QAC9E,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;YACzB,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;QACtC,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,uCAAuC,EAAE,GAAG,EAAE;QAC/C,MAAM,KAAK,GAAG,SAAS,CAAC,QAAQ,EAAE,CAAC;QACnC,MAAM,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC,eAAe,CAAC,GAAG,CAAC,CAAC;QACjD,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,MAAM,CAAC,CAAC,sBAAsB,CAAC,CAAC,CAAC,CAAC;IACxE,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
@@ -17,9 +17,26 @@ export interface DocContext {
17
17
  references: GraphSearchResult[];
18
18
  referencedBy: GraphSearchResult[];
19
19
  }
20
+ export interface SearchOptions {
21
+ type?: string;
22
+ limit?: number;
23
+ /** Pre-filter by domain prefix (e.g. "apex-reference") */
24
+ domain?: string;
25
+ /** Pre-filter by docType */
26
+ docType?: string;
27
+ }
28
+ /** Simple, dependency-free stemmer optimised for technical documentation. */
29
+ export declare function stem(word: string): string;
20
30
  /**
21
31
  * GraphQuery provides fast, in-memory traversal of the SF Knowledge Graph.
22
32
  * Load once, query many times.
33
+ *
34
+ * Indexing strategy (v2):
35
+ * 1. Stemmed label index — stem(word) → Set<nodeId>
36
+ * 2. Trigram index — 3-char substring → Set<stemmed word> (for fuzzy recall)
37
+ * 3. Keyword index — keyword → Set<nodeId> (from tagged_with edges)
38
+ * 4. IDF table — stem → log(N / df) (for TF-IDF scoring)
39
+ * 5. LRU query cache — query key → results
23
40
  */
24
41
  export declare class GraphQuery {
25
42
  private graph;
@@ -27,8 +44,18 @@ export declare class GraphQuery {
27
44
  private graphPath;
28
45
  /** Inverted keyword index: lowercase keyword → Set of doc node IDs */
29
46
  private keywordIndex;
30
- /** Inverted label index: lowercase word → Set of node IDs */
47
+ /** Stemmed label index: stemmed word → Set of node IDs */
31
48
  private labelIndex;
49
+ /** Trigram index: trigram → Set of stemmed words (for fuzzy matching) */
50
+ private trigramIndex;
51
+ /** IDF table: stemmed word → inverse-document-frequency weight */
52
+ private idfTable;
53
+ /** Total number of document nodes (for IDF calculation) */
54
+ private totalDocs;
55
+ /** LRU query cache: cache key → { results, timestamp } */
56
+ private queryCache;
57
+ private readonly CACHE_TTL_MS;
58
+ private readonly CACHE_MAX_SIZE;
32
59
  constructor(knowledgeDir?: string);
33
60
  /**
34
61
  * Load the graph into memory and build lookup indices.
@@ -36,14 +63,15 @@ export declare class GraphQuery {
36
63
  load(): Promise<void>;
37
64
  private ensureLoaded;
38
65
  private buildIndices;
66
+ private getCached;
67
+ private setCache;
39
68
  /**
40
69
  * Search across all node labels for a query string.
41
- * Returns nodes whose labels contain the query terms.
70
+ *
71
+ * Uses stemming + trigram fuzzy matching + TF-IDF scoring.
72
+ * Supports optional pre-filters for domain and docType.
42
73
  */
43
- searchNodes(query: string, options?: {
44
- type?: string;
45
- limit?: number;
46
- }): GraphSearchResult[];
74
+ searchNodes(query: string, options?: SearchOptions): GraphSearchResult[];
47
75
  /**
48
76
  * Find documents tagged with a specific keyword.
49
77
  */
@@ -1 +1 @@
1
- {"version":3,"file":"graph-query.d.ts","sourceRoot":"","sources":["../../src/utils/graph-query.ts"],"names":[],"mappings":"AAkBA,MAAM,WAAW,iBAAiB;IAChC,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,UAAU;IACzB,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;IAChB,GAAG,EAAE,MAAM,CAAC;IACZ,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,UAAU,EAAE,iBAAiB,EAAE,CAAC;IAChC,YAAY,EAAE,iBAAiB,EAAE,CAAC;CACnC;AAED;;;GAGG;AACH,qBAAa,UAAU;IAErB,OAAO,CAAC,KAAK,CAA4B;IACzC,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,SAAS,CAAS;IAE1B,sEAAsE;IACtE,OAAO,CAAC,YAAY,CAAkC;IACtD,6DAA6D;IAC7D,OAAO,CAAC,UAAU,CAAkC;gBAExC,YAAY,SAAsB;IAI9C;;OAEG;IACG,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;IAkB3B,OAAO,CAAC,YAAY;IAIpB,OAAO,CAAC,YAAY;IAgCpB;;;OAGG;IACH,WAAW,CACT,KAAK,EAAE,MAAM,EACb,OAAO,GAAE;QAAE,IAAI,CAAC,EAAE,MAAM,CAAC;QAAC,KAAK,CAAC,EAAE,MAAM,CAAA;KAAO,GAC9C,iBAAiB,EAAE;IAmDtB;;OAEG;IACH,iBAAiB,CAAC,OAAO,EAAE,MAAM,EAAE,KAAK,SAAK,GAAG,iBAAiB,EAAE;IAkBnE;;OAEG;IACH,WAAW,CAAC,SAAS,EAAE,MAAM,EAAE,KAAK,SAAI,GAAG,iBAAiB,EAAE;IAgD9D;;OAEG;IACH,eAAe,CAAC,SAAS,EAAE,MAAM,GAAG,iBAAiB,EAAE;IAsBvD;;OAEG;IACH,aAAa,CAAC,OAAO,EAAE,MAAM,GAAG,iBAAiB,EAAE;IAoBnD;;OAEG;IACH,aAAa,CAAC,OAAO,EAAE,MAAM,EAAE,KAAK,SAAK,GAAG,iBAAiB,EAAE;IAsB/D;;OAEG;IACH,aAAa,CAAC,SAAS,EAAE,MAAM,GAAG,UAAU,GAAG,IAAI;IAiEnD;;OAEG;IACH,WAAW,IAAI,iBAAiB,EAAE;IAiBlC;;OAEG;IACH,cAAc,IAAI,KAAK,CAAC;QAAE,SAAS,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAA;KAAE,CAAC;IAiBhE;;OAEG;IACH,YAAY,IAAI,KAAK,CAAC;QAAE,OAAO,EAAE,MAAM,CAAC;QAAC,WAAW,EAAE,MAAM,CAAA;KAAE,CAAC;IAiB/D,6BAA6B;IAC7B,QAAQ,IAAI;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE;CAI7C"}
1
+ {"version":3,"file":"graph-query.d.ts","sourceRoot":"","sources":["../../src/utils/graph-query.ts"],"names":[],"mappings":"AAkBA,MAAM,WAAW,iBAAiB;IAChC,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,UAAU;IACzB,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;IAChB,GAAG,EAAE,MAAM,CAAC;IACZ,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,UAAU,EAAE,iBAAiB,EAAE,CAAC;IAChC,YAAY,EAAE,iBAAiB,EAAE,CAAC;CACnC;AAED,MAAM,WAAW,aAAa;IAC5B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,0DAA0D;IAC1D,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,4BAA4B;IAC5B,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AA2CD,6EAA6E;AAC7E,wBAAgB,IAAI,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAWzC;AAYD;;;;;;;;;;GAUG;AACH,qBAAa,UAAU;IAErB,OAAO,CAAC,KAAK,CAA4B;IACzC,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,SAAS,CAAS;IAE1B,sEAAsE;IACtE,OAAO,CAAC,YAAY,CAAkC;IACtD,0DAA0D;IAC1D,OAAO,CAAC,UAAU,CAAkC;IACpD,yEAAyE;IACzE,OAAO,CAAC,YAAY,CAAkC;IACtD,kEAAkE;IAClE,OAAO,CAAC,QAAQ,CAA6B;IAC7C,2DAA2D;IAC3D,OAAO,CAAC,SAAS,CAAK;IAEtB,0DAA0D;IAC1D,OAAO,CAAC,UAAU,CAAmE;IACrF,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAiB;IAC9C,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAO;gBAE1B,YAAY,SAAsB;IAI9C;;OAEG;IACG,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;IAkB3B,OAAO,CAAC,YAAY;IAIpB,OAAO,CAAC,YAAY;IA8EpB,OAAO,CAAC,SAAS;IAUjB,OAAO,CAAC,QAAQ;IAWhB;;;;;OAKG;IACH,WAAW,CACT,KAAK,EAAE,MAAM,EACb,OAAO,GAAE,aAAkB,GAC1B,iBAAiB,EAAE;IA0GtB;;OAEG;IACH,iBAAiB,CAAC,OAAO,EAAE,MAAM,EAAE,KAAK,SAAK,GAAG,iBAAiB,EAAE;IAkBnE;;OAEG;IACH,WAAW,CAAC,SAAS,EAAE,MAAM,EAAE,KAAK,SAAI,GAAG,iBAAiB,EAAE;IAkD9D;;OAEG;IACH,eAAe,CAAC,SAAS,EAAE,MAAM,GAAG,iBAAiB,EAAE;IAuBvD;;OAEG;IACH,aAAa,CAAC,OAAO,EAAE,MAAM,GAAG,iBAAiB,EAAE;IAqBnD;;OAEG;IACH,aAAa,CAAC,OAAO,EAAE,MAAM,EAAE,KAAK,SAAK,GAAG,iBAAiB,EAAE;IAuB/D;;OAEG;IACH,aAAa,CAAC,SAAS,EAAE,MAAM,GAAG,UAAU,GAAG,IAAI;IAmEnD;;OAEG;IACH,WAAW,IAAI,iBAAiB,EAAE;IAkBlC;;OAEG;IACH,cAAc,IAAI,KAAK,CAAC;QAAE,SAAS,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAA;KAAE,CAAC;IAmBhE;;OAEG;IACH,YAAY,IAAI,KAAK,CAAC;QAAE,OAAO,EAAE,MAAM,CAAC;QAAC,WAAW,EAAE,MAAM,CAAA;KAAE,CAAC;IAmB/D,6BAA6B;IAC7B,QAAQ,IAAI;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE;CAI7C"}
@@ -6,9 +6,81 @@ import pkg from "graphology";
6
6
  const DirectedGraph = pkg.DirectedGraph || pkg;
7
7
  import { createChildLogger } from "./logger.js";
8
8
  const log = createChildLogger("utils:graph-query");
9
+ // ─── Lightweight suffix-stripping stemmer ─────────────────────────
10
+ const SUFFIX_RULES = [
11
+ [/ational$/, "ate"],
12
+ [/tional$/, "tion"],
13
+ [/ization$/, "ize"],
14
+ [/fulness$/, "ful"],
15
+ [/ousness$/, "ous"],
16
+ [/iveness$/, "ive"],
17
+ [/ibilities$/, "ible"],
18
+ [/ically$/, "ic"],
19
+ [/ating$/, "ate"],
20
+ [/ising$/, "ise"],
21
+ [/izing$/, "ize"],
22
+ [/abling$/, "able"],
23
+ [/ement$/, ""],
24
+ [/ment$/, ""],
25
+ [/tion$/, "t"],
26
+ [/sion$/, "s"],
27
+ [/ness$/, ""],
28
+ [/able$/, ""],
29
+ [/ible$/, ""],
30
+ [/ment$/, ""],
31
+ [/ious$/, ""],
32
+ [/eous$/, ""],
33
+ [/ous$/, ""],
34
+ [/ive$/, ""],
35
+ [/ful$/, ""],
36
+ [/ing$/, ""],
37
+ [/ies$/, "y"],
38
+ [/ied$/, "y"],
39
+ [/ion$/, ""],
40
+ [/ers$/, ""],
41
+ [/est$/, ""],
42
+ [/ely$/, ""],
43
+ [/ed$/, ""],
44
+ [/er$/, ""],
45
+ [/ly$/, ""],
46
+ [/es$/, ""],
47
+ [/s$/, ""],
48
+ ];
49
+ /** Simple, dependency-free stemmer optimised for technical documentation. */
50
+ export function stem(word) {
51
+ if (word.length < 4)
52
+ return word;
53
+ const lower = word.toLowerCase();
54
+ for (const [re, replacement] of SUFFIX_RULES) {
55
+ if (re.test(lower)) {
56
+ const stemmed = lower.replace(re, replacement);
57
+ // Only keep stems ≥ 3 chars to avoid over-stripping
58
+ if (stemmed.length >= 3)
59
+ return stemmed;
60
+ }
61
+ }
62
+ return lower;
63
+ }
64
+ // ─── Trigram helpers ──────────────────────────────────────────────
65
+ function trigrams(word) {
66
+ if (word.length < 3)
67
+ return [word];
68
+ const tris = [];
69
+ for (let i = 0; i <= word.length - 3; i++) {
70
+ tris.push(word.slice(i, i + 3));
71
+ }
72
+ return tris;
73
+ }
9
74
  /**
10
75
  * GraphQuery provides fast, in-memory traversal of the SF Knowledge Graph.
11
76
  * Load once, query many times.
77
+ *
78
+ * Indexing strategy (v2):
79
+ * 1. Stemmed label index — stem(word) → Set<nodeId>
80
+ * 2. Trigram index — 3-char substring → Set<stemmed word> (for fuzzy recall)
81
+ * 3. Keyword index — keyword → Set<nodeId> (from tagged_with edges)
82
+ * 4. IDF table — stem → log(N / df) (for TF-IDF scoring)
83
+ * 5. LRU query cache — query key → results
12
84
  */
13
85
  export class GraphQuery {
14
86
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
@@ -17,8 +89,18 @@ export class GraphQuery {
17
89
  graphPath;
18
90
  /** Inverted keyword index: lowercase keyword → Set of doc node IDs */
19
91
  keywordIndex = new Map();
20
- /** Inverted label index: lowercase word → Set of node IDs */
92
+ /** Stemmed label index: stemmed word → Set of node IDs */
21
93
  labelIndex = new Map();
94
+ /** Trigram index: trigram → Set of stemmed words (for fuzzy matching) */
95
+ trigramIndex = new Map();
96
+ /** IDF table: stemmed word → inverse-document-frequency weight */
97
+ idfTable = new Map();
98
+ /** Total number of document nodes (for IDF calculation) */
99
+ totalDocs = 0;
100
+ /** LRU query cache: cache key → { results, timestamp } */
101
+ queryCache = new Map();
102
+ CACHE_TTL_MS = 5 * 60 * 1000; // 5 minutes
103
+ CACHE_MAX_SIZE = 200;
22
104
  constructor(knowledgeDir = "knowledge/current") {
23
105
  this.graphPath = path.join(knowledgeDir, "graph.json");
24
106
  }
@@ -42,7 +124,8 @@ export class GraphQuery {
42
124
  throw new Error("Graph not loaded. Call load() first.");
43
125
  }
44
126
  buildIndices() {
45
- // Build keyword → docs index
127
+ // Build keyword → docs index from tagged_with edges
128
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
46
129
  this.graph.forEachEdge((_, attrs, source, target) => {
47
130
  if (attrs.type === "tagged_with") {
48
131
  const targetAttrs = this.graph.getNodeAttributes(target);
@@ -55,62 +138,164 @@ export class GraphQuery {
55
138
  }
56
139
  }
57
140
  });
58
- // Build label word node index
141
+ // Build stemmed label index + trigram index + IDF table
142
+ // Track document frequency (how many DOCUMENT nodes each stem appears in)
143
+ const docFrequency = new Map();
144
+ let docCount = 0;
145
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
59
146
  this.graph.forEachNode((nodeId, attrs) => {
60
147
  const label = attrs.label?.toLowerCase();
61
148
  if (!label)
62
149
  return;
63
- const words = label.split(/\s+/);
150
+ const isDoc = attrs.type === "document";
151
+ if (isDoc)
152
+ docCount++;
153
+ const words = label.split(/[\s_\-.]+/);
154
+ const seenStems = new Set();
64
155
  for (const word of words) {
65
156
  if (word.length < 3)
66
157
  continue;
67
- if (!this.labelIndex.has(word)) {
68
- this.labelIndex.set(word, new Set());
158
+ const stemmed = stem(word);
159
+ seenStems.add(stemmed);
160
+ // Stemmed label index
161
+ if (!this.labelIndex.has(stemmed)) {
162
+ this.labelIndex.set(stemmed, new Set());
163
+ }
164
+ this.labelIndex.get(stemmed).add(nodeId);
165
+ // Trigram index: map each trigram to the stemmed word
166
+ for (const tri of trigrams(stemmed)) {
167
+ if (!this.trigramIndex.has(tri)) {
168
+ this.trigramIndex.set(tri, new Set());
169
+ }
170
+ this.trigramIndex.get(tri).add(stemmed);
171
+ }
172
+ }
173
+ // Track DF for document nodes only
174
+ if (isDoc) {
175
+ for (const s of seenStems) {
176
+ docFrequency.set(s, (docFrequency.get(s) || 0) + 1);
69
177
  }
70
- this.labelIndex.get(word).add(nodeId);
71
178
  }
72
179
  });
180
+ // Compute IDF: log(N / df) for each stem
181
+ this.totalDocs = docCount;
182
+ for (const [stemmedWord, df] of docFrequency) {
183
+ this.idfTable.set(stemmedWord, Math.log(docCount / df));
184
+ }
185
+ log.info({
186
+ labelTerms: this.labelIndex.size,
187
+ trigrams: this.trigramIndex.size,
188
+ keywords: this.keywordIndex.size,
189
+ docs: docCount,
190
+ }, "Search indices built (stemmed + trigram + IDF)");
191
+ }
192
+ // ─── Cache helpers ──────────────────────────────────────────────
193
+ getCached(key) {
194
+ const entry = this.queryCache.get(key);
195
+ if (!entry)
196
+ return null;
197
+ if (Date.now() - entry.ts > this.CACHE_TTL_MS) {
198
+ this.queryCache.delete(key);
199
+ return null;
200
+ }
201
+ return entry.results;
202
+ }
203
+ setCache(key, results) {
204
+ // Evict oldest if at capacity
205
+ if (this.queryCache.size >= this.CACHE_MAX_SIZE) {
206
+ const oldest = this.queryCache.keys().next().value;
207
+ if (oldest !== undefined)
208
+ this.queryCache.delete(oldest);
209
+ }
210
+ this.queryCache.set(key, { results, ts: Date.now() });
73
211
  }
74
212
  // ─── Query Methods ─────────────────────────────────────────────
75
213
  /**
76
214
  * Search across all node labels for a query string.
77
- * Returns nodes whose labels contain the query terms.
215
+ *
216
+ * Uses stemming + trigram fuzzy matching + TF-IDF scoring.
217
+ * Supports optional pre-filters for domain and docType.
78
218
  */
79
219
  searchNodes(query, options = {}) {
80
220
  this.ensureLoaded();
81
- const { type, limit = 25 } = options;
82
- const terms = query.toLowerCase().split(/\s+/).filter((t) => t.length >= 3);
83
- if (terms.length === 0)
221
+ const { type, limit = 25, domain, docType } = options;
222
+ // Check cache
223
+ const cacheKey = `search:${query}|${type || ""}|${domain || ""}|${docType || ""}|${limit}`;
224
+ const cached = this.getCached(cacheKey);
225
+ if (cached)
226
+ return cached;
227
+ // Tokenise and stem the query
228
+ const rawTerms = query.toLowerCase()
229
+ .replace(/[._]/g, " ")
230
+ .replace(/([a-z])([A-Z])/g, "$1 $2")
231
+ .split(/\s+/)
232
+ .filter((t) => t.length >= 3);
233
+ if (rawTerms.length === 0)
84
234
  return [];
85
- // Find candidate nodes from label index
235
+ const stemmedTerms = rawTerms.map(stem);
236
+ // Collect candidate nodes via stemmed label index
237
+ // For each stemmed term, find exact stem matches + trigram-expanded fuzzy matches
86
238
  const candidates = new Map();
87
- for (const term of terms) {
88
- for (const [word, nodeIds] of this.labelIndex) {
89
- // Require meaningful overlap: shorter string must be ≥60% of longer
90
- const shorter = Math.min(word.length, term.length);
91
- const longer = Math.max(word.length, term.length);
92
- if (shorter / longer < 0.6)
93
- continue;
94
- if (word.includes(term) || term.includes(word)) {
95
- for (const nodeId of nodeIds) {
96
- candidates.set(nodeId, (candidates.get(nodeId) || 0) + 1);
239
+ for (const st of stemmedTerms) {
240
+ const matchedStems = new Set();
241
+ // Exact stem match
242
+ if (this.labelIndex.has(st)) {
243
+ matchedStems.add(st);
244
+ }
245
+ // Trigram fuzzy expansion: find other stems that share trigrams
246
+ const queryTrigrams = trigrams(st);
247
+ const trigramHits = new Map();
248
+ for (const tri of queryTrigrams) {
249
+ const stems = this.trigramIndex.get(tri);
250
+ if (stems) {
251
+ for (const s of stems) {
252
+ trigramHits.set(s, (trigramHits.get(s) || 0) + 1);
97
253
  }
98
254
  }
99
255
  }
256
+ // Accept stems with ≥60% trigram overlap
257
+ const threshold = Math.max(1, Math.floor(queryTrigrams.length * 0.6));
258
+ for (const [s, count] of trigramHits) {
259
+ if (count >= threshold)
260
+ matchedStems.add(s);
261
+ }
262
+ // Collect node IDs from matched stems
263
+ for (const ms of matchedStems) {
264
+ const nodeIds = this.labelIndex.get(ms);
265
+ if (!nodeIds)
266
+ continue;
267
+ // Weight by IDF of the matched stem
268
+ const idf = this.idfTable.get(ms) || 1.0;
269
+ for (const nodeId of nodeIds) {
270
+ candidates.set(nodeId, (candidates.get(nodeId) || 0) + idf);
271
+ }
272
+ }
100
273
  }
101
- // Score and filter
274
+ // Score, filter, and rank
102
275
  const results = [];
103
- for (const [nodeId, matchCount] of candidates) {
276
+ const queryLower = query.toLowerCase();
277
+ for (const [nodeId, tfidfScore] of candidates) {
104
278
  const attrs = this.graph.getNodeAttributes(nodeId);
279
+ // Pre-filter by type
105
280
  if (type && attrs.type !== type)
106
281
  continue;
282
+ // Pre-filter by domain (node IDs are doc:<domain>:<topic>)
283
+ if (domain && !nodeId.startsWith(`doc:${domain}:`))
284
+ continue;
285
+ // Pre-filter by docType
286
+ if (docType && attrs.docType !== docType)
287
+ continue;
107
288
  const label = attrs.label?.toLowerCase() || "";
108
- // Exact match gets highest score
109
- let score = matchCount / terms.length;
110
- if (label === query.toLowerCase())
111
- score = 10;
112
- else if (label.startsWith(query.toLowerCase()))
113
- score += 2;
289
+ // Compute final score: TF-IDF base + bonuses
290
+ let score = tfidfScore / stemmedTerms.length; // normalize by query length
291
+ // Exact match bonus
292
+ if (label === queryLower)
293
+ score += 10;
294
+ else if (label.startsWith(queryLower))
295
+ score += 3;
296
+ // Contains full query bonus
297
+ else if (label.includes(queryLower))
298
+ score += 1.5;
114
299
  results.push({
115
300
  nodeId,
116
301
  type: attrs.type,
@@ -120,10 +305,12 @@ export class GraphQuery {
120
305
  score,
121
306
  });
122
307
  }
123
- return results
124
- .filter((r) => (r.score || 0) >= 0.3) // Filter weak matches
308
+ const sorted = results
309
+ .filter((r) => (r.score || 0) >= 0.3)
125
310
  .sort((a, b) => (b.score || 0) - (a.score || 0))
126
311
  .slice(0, limit);
312
+ this.setCache(cacheKey, sorted);
313
+ return sorted;
127
314
  }
128
315
  /**
129
316
  * Find documents tagged with a specific keyword.
@@ -159,6 +346,7 @@ export class GraphQuery {
159
346
  const nextFrontier = [];
160
347
  for (const nodeId of frontier) {
161
348
  // Outgoing reference edges
349
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
162
350
  this.graph.forEachOutEdge(nodeId, (_, attrs, _src, target) => {
163
351
  if (attrs.type === "references" && !visited.has(target)) {
164
352
  visited.add(target);
@@ -174,6 +362,7 @@ export class GraphQuery {
174
362
  }
175
363
  });
176
364
  // Incoming reference edges
365
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
177
366
  this.graph.forEachInEdge(nodeId, (_, attrs, source) => {
178
367
  if (attrs.type === "references" && !visited.has(source)) {
179
368
  visited.add(source);
@@ -202,6 +391,7 @@ export class GraphQuery {
202
391
  if (!this.graph.hasNode(nsNodeId))
203
392
  return [];
204
393
  const results = [];
394
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
205
395
  this.graph.forEachInEdge(nsNodeId, (_, attrs, source) => {
206
396
  if (attrs.type === "belongs_to_namespace") {
207
397
  const sourceAttrs = this.graph.getNodeAttributes(source);
@@ -225,6 +415,7 @@ export class GraphQuery {
225
415
  if (!this.graph.hasNode(serviceNodeId))
226
416
  return [];
227
417
  const results = [];
418
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
228
419
  this.graph.forEachInEdge(serviceNodeId, (_, attrs, source) => {
229
420
  if (attrs.type === "belongs_to_service") {
230
421
  const sourceAttrs = this.graph.getNodeAttributes(source);
@@ -246,6 +437,7 @@ export class GraphQuery {
246
437
  if (!this.graph.hasNode(dtNodeId))
247
438
  return [];
248
439
  const results = [];
440
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
249
441
  this.graph.forEachInEdge(dtNodeId, (_, attrs, source) => {
250
442
  if (attrs.type === "is_type" && results.length < limit) {
251
443
  const sourceAttrs = this.graph.getNodeAttributes(source);
@@ -274,6 +466,7 @@ export class GraphQuery {
274
466
  const references = [];
275
467
  const referencedBy = [];
276
468
  // Traverse outgoing edges
469
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
277
470
  this.graph.forEachOutEdge(docNodeId, (_, edgeAttrs, _src, target) => {
278
471
  const targetAttrs = this.graph.getNodeAttributes(target);
279
472
  switch (edgeAttrs.type) {
@@ -295,6 +488,7 @@ export class GraphQuery {
295
488
  }
296
489
  });
297
490
  // Traverse incoming edges
491
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
298
492
  this.graph.forEachInEdge(docNodeId, (_, edgeAttrs, source) => {
299
493
  const sourceAttrs = this.graph.getNodeAttributes(source);
300
494
  switch (edgeAttrs.type) {
@@ -330,6 +524,7 @@ export class GraphQuery {
330
524
  listDomains() {
331
525
  this.ensureLoaded();
332
526
  const results = [];
527
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
333
528
  this.graph.forEachNode((nodeId, attrs) => {
334
529
  if (attrs.type === "domain") {
335
530
  results.push({
@@ -347,9 +542,11 @@ export class GraphQuery {
347
542
  listNamespaces() {
348
543
  this.ensureLoaded();
349
544
  const results = [];
545
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
350
546
  this.graph.forEachNode((nodeId, attrs) => {
351
547
  if (attrs.type === "namespace") {
352
548
  let docCount = 0;
549
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
353
550
  this.graph.forEachInEdge(nodeId, (_, edgeAttrs) => {
354
551
  if (edgeAttrs.type === "belongs_to_namespace")
355
552
  docCount++;
@@ -365,9 +562,11 @@ export class GraphQuery {
365
562
  listServices() {
366
563
  this.ensureLoaded();
367
564
  const results = [];
565
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
368
566
  this.graph.forEachNode((nodeId, attrs) => {
369
567
  if (attrs.type === "service") {
370
568
  let domainCount = 0;
569
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
371
570
  this.graph.forEachInEdge(nodeId, (_, edgeAttrs) => {
372
571
  if (edgeAttrs.type === "belongs_to_service")
373
572
  domainCount++;