@sfdxy/sf-documentation-knowledge 1.2.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +19 -3
- package/dist/cli/collect.js +5 -1
- package/dist/cli/collect.js.map +1 -1
- package/dist/cli/data-quality.d.ts +3 -0
- package/dist/cli/data-quality.d.ts.map +1 -0
- package/dist/cli/data-quality.js +213 -0
- package/dist/cli/data-quality.js.map +1 -0
- package/dist/cli/discover.js +5 -1
- package/dist/cli/discover.js.map +1 -1
- package/dist/mcp/server.js +72 -16
- package/dist/mcp/server.js.map +1 -1
- package/dist/tests/retrieval-quality.test.d.ts +2 -0
- package/dist/tests/retrieval-quality.test.d.ts.map +1 -0
- package/dist/tests/retrieval-quality.test.js +161 -0
- package/dist/tests/retrieval-quality.test.js.map +1 -0
- package/dist/utils/graph-query.d.ts +34 -6
- package/dist/utils/graph-query.d.ts.map +1 -1
- package/dist/utils/graph-query.js +229 -35
- package/dist/utils/graph-query.js.map +1 -1
- package/package.json +2 -1
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Retrieval Quality Tests — validate that known-good queries
|
|
3
|
+
* return expected documents in the top results.
|
|
4
|
+
*
|
|
5
|
+
* These tests load the real graph.json and exercise the full
|
|
6
|
+
* search pipeline (stemming, trigram matching, TF-IDF scoring).
|
|
7
|
+
*
|
|
8
|
+
* Run: npm test
|
|
9
|
+
*/
|
|
10
|
+
import { describe, it, expect, beforeAll } from "vitest";
|
|
11
|
+
import { GraphQuery, stem } from "../utils/graph-query.js";
|
|
12
|
+
import { CodeIndex } from "../mcp/code-index.js";
|
|
13
|
+
const KNOWLEDGE_DIR = "knowledge/current";
|
|
14
|
+
// ─── Unit tests for the stemmer ─────────────────────────────────
|
|
15
|
+
describe("stem()", () => {
|
|
16
|
+
it("strips -ing suffix", () => {
|
|
17
|
+
expect(stem("configuring")).toBe("configur");
|
|
18
|
+
});
|
|
19
|
+
it("strips -tion suffix", () => {
|
|
20
|
+
expect(stem("configuration")).toBe("configurat");
|
|
21
|
+
});
|
|
22
|
+
it("strips -ed suffix", () => {
|
|
23
|
+
expect(stem("queried")).toBe("query");
|
|
24
|
+
});
|
|
25
|
+
it("strips -es suffix", () => {
|
|
26
|
+
expect(stem("queries")).toBe("query");
|
|
27
|
+
});
|
|
28
|
+
it("strips -s suffix", () => {
|
|
29
|
+
expect(stem("objects")).toBe("object");
|
|
30
|
+
});
|
|
31
|
+
it("preserves short words", () => {
|
|
32
|
+
expect(stem("api")).toBe("api");
|
|
33
|
+
expect(stem("dml")).toBe("dml");
|
|
34
|
+
});
|
|
35
|
+
it("strips -ment suffix", () => {
|
|
36
|
+
expect(stem("deployment")).toBe("deploy");
|
|
37
|
+
});
|
|
38
|
+
it("strips -able suffix", () => {
|
|
39
|
+
expect(stem("batchable")).toBe("batch");
|
|
40
|
+
});
|
|
41
|
+
});
|
|
42
|
+
// ─── Integration tests: graph search retrieval quality ──────────
|
|
43
|
+
describe("GraphQuery retrieval quality", () => {
|
|
44
|
+
let gq;
|
|
45
|
+
beforeAll(async () => {
|
|
46
|
+
gq = new GraphQuery(KNOWLEDGE_DIR);
|
|
47
|
+
await gq.load();
|
|
48
|
+
}, 60_000); // 60s timeout for loading 77MB graph
|
|
49
|
+
/**
|
|
50
|
+
* Test helper: asserts that at least one result's nodeId
|
|
51
|
+
* matches the provided pattern.
|
|
52
|
+
*/
|
|
53
|
+
function expectResultMatching(results, pattern, query) {
|
|
54
|
+
const match = results.find((r) => pattern.test(r.nodeId));
|
|
55
|
+
expect(match, `Expected query "${query}" to return a result matching ${pattern}. Got: ${results.slice(0, 5).map((r) => r.nodeId).join(", ")}`).toBeDefined();
|
|
56
|
+
}
|
|
57
|
+
it("finds Batch Apex documentation", () => {
|
|
58
|
+
const results = gq.searchNodes("batch apex", { type: "document", limit: 10 });
|
|
59
|
+
expect(results.length).toBeGreaterThan(0);
|
|
60
|
+
expectResultMatching(results, /doc:apex-guide:.*batch/i, "batch apex");
|
|
61
|
+
});
|
|
62
|
+
it("finds SOQL documentation", () => {
|
|
63
|
+
const results = gq.searchNodes("SOQL queries", { type: "document", limit: 10 });
|
|
64
|
+
expect(results.length).toBeGreaterThan(0);
|
|
65
|
+
expectResultMatching(results, /doc:(soql-sosl|apex-guide):/, "SOQL queries");
|
|
66
|
+
});
|
|
67
|
+
it("finds REST API docs", () => {
|
|
68
|
+
const results = gq.searchNodes("REST API sobject describe", { type: "document", limit: 15, domain: "rest-api" });
|
|
69
|
+
expect(results.length).toBeGreaterThan(0);
|
|
70
|
+
// All results must be from rest-api since we pre-filter
|
|
71
|
+
for (const r of results) {
|
|
72
|
+
expect(r.nodeId).toMatch(/^doc:rest-api:/);
|
|
73
|
+
}
|
|
74
|
+
});
|
|
75
|
+
it("finds Platform Events docs", () => {
|
|
76
|
+
const results = gq.searchNodes("platform events publish", { type: "document", limit: 10 });
|
|
77
|
+
expect(results.length).toBeGreaterThan(0);
|
|
78
|
+
expectResultMatching(results, /doc:platform-events:/, "platform events publish");
|
|
79
|
+
});
|
|
80
|
+
it("domain pre-filter works", () => {
|
|
81
|
+
const results = gq.searchNodes("trigger", { type: "document", limit: 10, domain: "apex-guide" });
|
|
82
|
+
expect(results.length).toBeGreaterThan(0);
|
|
83
|
+
// All results must be in the apex-guide domain
|
|
84
|
+
for (const r of results) {
|
|
85
|
+
expect(r.nodeId).toMatch(/^doc:apex-guide:/);
|
|
86
|
+
}
|
|
87
|
+
});
|
|
88
|
+
it("finds LWC documentation", () => {
|
|
89
|
+
const results = gq.searchNodes("lightning web components", { type: "document", limit: 10 });
|
|
90
|
+
expect(results.length).toBeGreaterThan(0);
|
|
91
|
+
expectResultMatching(results, /doc:lwc:/, "lightning web components");
|
|
92
|
+
});
|
|
93
|
+
it("finds Metadata API docs", () => {
|
|
94
|
+
const results = gq.searchNodes("metadata api deploy", { type: "document", limit: 10 });
|
|
95
|
+
expect(results.length).toBeGreaterThan(0);
|
|
96
|
+
expectResultMatching(results, /doc:metadata-api:/, "metadata api deploy");
|
|
97
|
+
});
|
|
98
|
+
it("keyword search works", () => {
|
|
99
|
+
const results = gq.findDocsByKeyword("Apex", 10);
|
|
100
|
+
expect(results.length).toBeGreaterThan(0);
|
|
101
|
+
});
|
|
102
|
+
it("returns empty array for nonsense query", () => {
|
|
103
|
+
const results = gq.searchNodes("xyzzy_foobar_gibberish", { type: "document", limit: 5 });
|
|
104
|
+
expect(results.length).toBe(0);
|
|
105
|
+
});
|
|
106
|
+
it("stemming improves recall: 'configuring' matches 'configuration' docs", () => {
|
|
107
|
+
const stemmedResults = gq.searchNodes("configuring triggers", { type: "document", limit: 15 });
|
|
108
|
+
// Should find trigger-related docs even though query used "configuring" not exact match
|
|
109
|
+
expect(stemmedResults.length).toBeGreaterThan(0);
|
|
110
|
+
});
|
|
111
|
+
it("graph context returns valid doc context", () => {
|
|
112
|
+
// Find a real doc nodeId first
|
|
113
|
+
const results = gq.searchNodes("batch apex", { type: "document", limit: 1 });
|
|
114
|
+
expect(results.length).toBeGreaterThan(0);
|
|
115
|
+
const ctx = gq.getDocContext(results[0].nodeId);
|
|
116
|
+
expect(ctx).not.toBeNull();
|
|
117
|
+
expect(ctx.label).toBeTruthy();
|
|
118
|
+
expect(ctx.keywords).toBeInstanceOf(Array);
|
|
119
|
+
});
|
|
120
|
+
it("namespace listing returns Apex namespaces", () => {
|
|
121
|
+
const ns = gq.listNamespaces();
|
|
122
|
+
expect(ns.length).toBeGreaterThan(0);
|
|
123
|
+
const systemNs = ns.find((n) => n.namespace === "System");
|
|
124
|
+
expect(systemNs).toBeDefined();
|
|
125
|
+
});
|
|
126
|
+
it("domain listing returns 100+ domains", () => {
|
|
127
|
+
const domains = gq.listDomains();
|
|
128
|
+
expect(domains.length).toBeGreaterThanOrEqual(100);
|
|
129
|
+
});
|
|
130
|
+
});
|
|
131
|
+
// ─── CodeIndex retrieval quality ────────────────────────────────
|
|
132
|
+
describe("CodeIndex retrieval quality", () => {
|
|
133
|
+
let codeIndex;
|
|
134
|
+
beforeAll(async () => {
|
|
135
|
+
codeIndex = new CodeIndex(KNOWLEDGE_DIR);
|
|
136
|
+
await codeIndex.load();
|
|
137
|
+
}, 120_000); // 2 min — reads all 33k files
|
|
138
|
+
it("finds Apex code examples", () => {
|
|
139
|
+
const snippets = codeIndex.search("batch apex", { limit: 5 });
|
|
140
|
+
expect(snippets.length).toBeGreaterThan(0);
|
|
141
|
+
expect(snippets[0].code.length).toBeGreaterThan(10);
|
|
142
|
+
});
|
|
143
|
+
it("language filter works", () => {
|
|
144
|
+
const snippets = codeIndex.search("query", { language: "soql", limit: 5 });
|
|
145
|
+
for (const s of snippets) {
|
|
146
|
+
expect(s.language).toBe("soql");
|
|
147
|
+
}
|
|
148
|
+
});
|
|
149
|
+
it("domain filter works", () => {
|
|
150
|
+
const snippets = codeIndex.search("apex", { domain: "apex-guide", limit: 5 });
|
|
151
|
+
for (const s of snippets) {
|
|
152
|
+
expect(s.domain).toBe("apex-guide");
|
|
153
|
+
}
|
|
154
|
+
});
|
|
155
|
+
it("returns stats with language breakdown", () => {
|
|
156
|
+
const stats = codeIndex.getStats();
|
|
157
|
+
expect(stats.totalSnippets).toBeGreaterThan(100);
|
|
158
|
+
expect(Object.keys(stats.languages).length).toBeGreaterThanOrEqual(2);
|
|
159
|
+
});
|
|
160
|
+
});
|
|
161
|
+
//# sourceMappingURL=retrieval-quality.test.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"retrieval-quality.test.js","sourceRoot":"","sources":["../../src/tests/retrieval-quality.test.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AACH,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAC;AACzD,OAAO,EAAE,UAAU,EAAE,IAAI,EAAE,MAAM,yBAAyB,CAAC;AAC3D,OAAO,EAAE,SAAS,EAAE,MAAM,sBAAsB,CAAC;AAEjD,MAAM,aAAa,GAAG,mBAAmB,CAAC;AAE1C,mEAAmE;AACnE,QAAQ,CAAC,QAAQ,EAAE,GAAG,EAAE;IACtB,EAAE,CAAC,oBAAoB,EAAE,GAAG,EAAE;QAC5B,MAAM,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;IAC/C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,qBAAqB,EAAE,GAAG,EAAE;QAC7B,MAAM,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;IACnD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,mBAAmB,EAAE,GAAG,EAAE;QAC3B,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IACxC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,mBAAmB,EAAE,GAAG,EAAE;QAC3B,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IACxC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,kBAAkB,EAAE,GAAG,EAAE;QAC1B,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IACzC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,uBAAuB,EAAE,GAAG,EAAE;QAC/B,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAChC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAClC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,qBAAqB,EAAE,GAAG,EAAE;QAC7B,MAAM,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IAC5C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,qBAAqB,EAAE,GAAG,EAAE;QAC7B,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IAC1C,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,mEAAmE;AACnE,QAAQ,CAAC,8BAA8B,EAAE,GAAG,EAAE;IAC5C,IAAI,EAAc,CAAC;IAEnB,SAAS,CAAC,KAAK,IAAI,EAAE;QACnB,EAAE,GAAG,IAAI,UAAU,CAAC,aAAa,CAAC,CAAC;QACnC,MAAM,EAAE,CAAC,IAAI,EAAE,CAAC;IAClB,CAAC,EAAE,MAAM,CAAC,CAAC,CAAC,qCAAqC;IAEjD;;;OAGG;IACH,SAAS,oBAAoB,CAC3B,OAAkC,EAClC,OAAe,EACf,KAAa;QAEb,MAAM,KAAK,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC;QAC1D,MAAM,CACJ,KAAK,EACL,mBAAmB,KAAK,iCAAiC,OAAO,UAAU,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAChI,CAAC,WAAW,EAAE,CAAC;IAClB,CAAC;IAED,EAAE,CAAC,gCAAgC,EAAE,GAAG,EAAE;QACxC,MAAM,OAAO,GAAG,EAAE,CAAC,WAAW,CAAC,YAAY,EAAE,EAAE,IAAI,EAAE,UAAU,EAAE,KAAK,EAAE,EAAE,EAAE,CAAC,CAAC;QAC9E,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;QAC1C,oBAAoB,CAAC,OAAO,EAAE,yBAAyB,EAAE,YAAY,CAAC,CAAC;IACzE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,0BAA0B,EAAE,GAAG,EAAE;QAClC,MAAM,OAAO,GAAG,EAAE,CAAC,WAAW,CAAC,cAAc,EAAE,EAAE,IAAI,EAAE,UAAU,EAAE,KAAK,EAAE,EAAE,EAAE,CAAC,CAAC;QAChF,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;QAC1C,oBAAoB,CAAC,OAAO,EAAE,6BAA6B,EAAE,cAAc,CAAC,CAAC;IAC/E,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,qBAAqB,EAAE,GAAG,EAAE;QAC7B,MAAM,OAAO,GAAG,EAAE,CAAC,WAAW,CAAC,2BAA2B,EAAE,EAAE,IAAI,EAAE,UAAU,EAAE,KAAK,EAAE,EAAE,EAAE,MAAM,EAAE,UAAU,EAAE,CAAC,CAAC;QACjH,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;QAC1C,wDAAwD;QACxD,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;YACxB,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,gBAAgB,CAAC,CAAC;QAC7C,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,4BAA4B,EAAE,GAAG,EAAE;QACpC,MAAM,OAAO,GAAG,EAAE,CAAC,WAAW,CAAC,yBAAyB,EAAE,EAAE,IAAI,EAAE,UAAU,EAAE,KAAK,EAAE,EAAE,EAAE,CAAC,CAAC;QAC3F,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;QAC1C,oBAAoB,CAAC,OAAO,EAAE,sBAAsB,EAAE,yBAAyB,CAAC,CAAC;IACnF,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,yBAAyB,EAAE,GAAG,EAAE;QACjC,MAAM,OAAO,GAAG,EAAE,CAAC,WAAW,CAAC,SAAS,EAAE,EAAE,IAAI,EAAE,UAAU,EAAE,KAAK,EAAE,EAAE,EAAE,MAAM,EAAE,YAAY,EAAE,CAAC,CAAC;QACjG,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;QAC1C,+CAA+C;QAC/C,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;YACxB,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,kBAAkB,CAAC,CAAC;QAC/C,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,yBAAyB,EAAE,GAAG,EAAE;QACjC,MAAM,OAAO,GAAG,EAAE,CAAC,WAAW,CAAC,0BAA0B,EAAE,EAAE,IAAI,EAAE,UAAU,EAAE,KAAK,EAAE,EAAE,EAAE,CAAC,CAAC;QAC5F,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;QAC1C,oBAAoB,CAAC,OAAO,EAAE,UAAU,EAAE,0BAA0B,CAAC,CAAC;IACxE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,yBAAyB,EAAE,GAAG,EAAE;QACjC,MAAM,OAAO,GAAG,EAAE,CAAC,WAAW,CAAC,qBAAqB,EAAE,EAAE,IAAI,EAAE,UAAU,EAAE,KAAK,EAAE,EAAE,EAAE,CAAC,CAAC;QACvF,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;QAC1C,oBAAoB,CAAC,OAAO,EAAE,mBAAmB,EAAE,qBAAqB,CAAC,CAAC;IAC5E,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,sBAAsB,EAAE,GAAG,EAAE;QAC9B,MAAM,OAAO,GAAG,EAAE,CAAC,iBAAiB,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;QACjD,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;IAC5C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,wCAAwC,EAAE,GAAG,EAAE;QAChD,MAAM,OAAO,GAAG,EAAE,CAAC,WAAW,CAAC,wBAAwB,EAAE,EAAE,IAAI,EAAE,UAAU,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC,CAAC;QACzF,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACjC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,sEAAsE,EAAE,GAAG,EAAE;QAC9E,MAAM,cAAc,GAAG,EAAE,CAAC,WAAW,CAAC,sBAAsB,EAAE,EAAE,IAAI,EAAE,UAAU,EAAE,KAAK,EAAE,EAAE,EAAE,CAAC,CAAC;QAC/F,wFAAwF;QACxF,MAAM,CAAC,cAAc,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;IACnD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,yCAAyC,EAAE,GAAG,EAAE;QACjD,+BAA+B;QAC/B,MAAM,OAAO,GAAG,EAAE,CAAC,WAAW,CAAC,YAAY,EAAE,EAAE,IAAI,EAAE,UAAU,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC,CAAC;QAC7E,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;QAC1C,MAAM,GAAG,GAAG,EAAE,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;QAChD,MAAM,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC;QAC3B,MAAM,CAAC,GAAI,CAAC,KAAK,CAAC,CAAC,UAAU,EAAE,CAAC;QAChC,MAAM,CAAC,GAAI,CAAC,QAAQ,CAAC,CAAC,cAAc,CAAC,KAAK,CAAC,CAAC;IAC9C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,2CAA2C,EAAE,GAAG,EAAE;QACnD,MAAM,EAAE,GAAG,EAAE,CAAC,cAAc,EAAE,CAAC;QAC/B,MAAM,CAAC,EAAE,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;QACrC,MAAM,QAAQ,GAAG,EAAE,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,KAAK,QAAQ,CAAC,CAAC;QAC1D,MAAM,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC;IACjC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,qCAAqC,EAAE,GAAG,EAAE;QAC7C,MAAM,OAAO,GAAG,EAAE,CAAC,WAAW,EAAE,CAAC;QACjC,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,sBAAsB,CAAC,GAAG,CAAC,CAAC;IACrD,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,mEAAmE;AACnE,QAAQ,CAAC,6BAA6B,EAAE,GAAG,EAAE;IAC3C,IAAI,SAAoB,CAAC;IAEzB,SAAS,CAAC,KAAK,IAAI,EAAE;QACnB,SAAS,GAAG,IAAI,SAAS,CAAC,aAAa,CAAC,CAAC;QACzC,MAAM,SAAS,CAAC,IAAI,EAAE,CAAC;IACzB,CAAC,EAAE,OAAO,CAAC,CAAC,CAAC,8BAA8B;IAE3C,EAAE,CAAC,0BAA0B,EAAE,GAAG,EAAE;QAClC,MAAM,QAAQ,GAAG,SAAS,CAAC,MAAM,CAAC,YAAY,EAAE,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC,CAAC;QAC9D,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;QAC3C,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,EAAE,CAAC,CAAC;IACtD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,uBAAuB,EAAE,GAAG,EAAE;QAC/B,MAAM,QAAQ,GAAG,SAAS,CAAC,MAAM,CAAC,OAAO,EAAE,EAAE,QAAQ,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC,CAAC;QAC3E,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;YACzB,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAClC,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,qBAAqB,EAAE,GAAG,EAAE;QAC7B,MAAM,QAAQ,GAAG,SAAS,CAAC,MAAM,CAAC,MAAM,EAAE,EAAE,MAAM,EAAE,YAAY,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC,CAAC;QAC9E,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;YACzB,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;QACtC,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,uCAAuC,EAAE,GAAG,EAAE;QAC/C,MAAM,KAAK,GAAG,SAAS,CAAC,QAAQ,EAAE,CAAC;QACnC,MAAM,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC,eAAe,CAAC,GAAG,CAAC,CAAC;QACjD,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,MAAM,CAAC,CAAC,sBAAsB,CAAC,CAAC,CAAC,CAAC;IACxE,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|
|
@@ -17,9 +17,26 @@ export interface DocContext {
|
|
|
17
17
|
references: GraphSearchResult[];
|
|
18
18
|
referencedBy: GraphSearchResult[];
|
|
19
19
|
}
|
|
20
|
+
export interface SearchOptions {
|
|
21
|
+
type?: string;
|
|
22
|
+
limit?: number;
|
|
23
|
+
/** Pre-filter by domain prefix (e.g. "apex-reference") */
|
|
24
|
+
domain?: string;
|
|
25
|
+
/** Pre-filter by docType */
|
|
26
|
+
docType?: string;
|
|
27
|
+
}
|
|
28
|
+
/** Simple, dependency-free stemmer optimised for technical documentation. */
|
|
29
|
+
export declare function stem(word: string): string;
|
|
20
30
|
/**
|
|
21
31
|
* GraphQuery provides fast, in-memory traversal of the SF Knowledge Graph.
|
|
22
32
|
* Load once, query many times.
|
|
33
|
+
*
|
|
34
|
+
* Indexing strategy (v2):
|
|
35
|
+
* 1. Stemmed label index — stem(word) → Set<nodeId>
|
|
36
|
+
* 2. Trigram index — 3-char substring → Set<stemmed word> (for fuzzy recall)
|
|
37
|
+
* 3. Keyword index — keyword → Set<nodeId> (from tagged_with edges)
|
|
38
|
+
* 4. IDF table — stem → log(N / df) (for TF-IDF scoring)
|
|
39
|
+
* 5. LRU query cache — query key → results
|
|
23
40
|
*/
|
|
24
41
|
export declare class GraphQuery {
|
|
25
42
|
private graph;
|
|
@@ -27,8 +44,18 @@ export declare class GraphQuery {
|
|
|
27
44
|
private graphPath;
|
|
28
45
|
/** Inverted keyword index: lowercase keyword → Set of doc node IDs */
|
|
29
46
|
private keywordIndex;
|
|
30
|
-
/**
|
|
47
|
+
/** Stemmed label index: stemmed word → Set of node IDs */
|
|
31
48
|
private labelIndex;
|
|
49
|
+
/** Trigram index: trigram → Set of stemmed words (for fuzzy matching) */
|
|
50
|
+
private trigramIndex;
|
|
51
|
+
/** IDF table: stemmed word → inverse-document-frequency weight */
|
|
52
|
+
private idfTable;
|
|
53
|
+
/** Total number of document nodes (for IDF calculation) */
|
|
54
|
+
private totalDocs;
|
|
55
|
+
/** LRU query cache: cache key → { results, timestamp } */
|
|
56
|
+
private queryCache;
|
|
57
|
+
private readonly CACHE_TTL_MS;
|
|
58
|
+
private readonly CACHE_MAX_SIZE;
|
|
32
59
|
constructor(knowledgeDir?: string);
|
|
33
60
|
/**
|
|
34
61
|
* Load the graph into memory and build lookup indices.
|
|
@@ -36,14 +63,15 @@ export declare class GraphQuery {
|
|
|
36
63
|
load(): Promise<void>;
|
|
37
64
|
private ensureLoaded;
|
|
38
65
|
private buildIndices;
|
|
66
|
+
private getCached;
|
|
67
|
+
private setCache;
|
|
39
68
|
/**
|
|
40
69
|
* Search across all node labels for a query string.
|
|
41
|
-
*
|
|
70
|
+
*
|
|
71
|
+
* Uses stemming + trigram fuzzy matching + TF-IDF scoring.
|
|
72
|
+
* Supports optional pre-filters for domain and docType.
|
|
42
73
|
*/
|
|
43
|
-
searchNodes(query: string, options?:
|
|
44
|
-
type?: string;
|
|
45
|
-
limit?: number;
|
|
46
|
-
}): GraphSearchResult[];
|
|
74
|
+
searchNodes(query: string, options?: SearchOptions): GraphSearchResult[];
|
|
47
75
|
/**
|
|
48
76
|
* Find documents tagged with a specific keyword.
|
|
49
77
|
*/
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"graph-query.d.ts","sourceRoot":"","sources":["../../src/utils/graph-query.ts"],"names":[],"mappings":"AAkBA,MAAM,WAAW,iBAAiB;IAChC,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,UAAU;IACzB,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;IAChB,GAAG,EAAE,MAAM,CAAC;IACZ,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,UAAU,EAAE,iBAAiB,EAAE,CAAC;IAChC,YAAY,EAAE,iBAAiB,EAAE,CAAC;CACnC;AAED
|
|
1
|
+
{"version":3,"file":"graph-query.d.ts","sourceRoot":"","sources":["../../src/utils/graph-query.ts"],"names":[],"mappings":"AAkBA,MAAM,WAAW,iBAAiB;IAChC,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,UAAU;IACzB,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;IAChB,GAAG,EAAE,MAAM,CAAC;IACZ,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,UAAU,EAAE,iBAAiB,EAAE,CAAC;IAChC,YAAY,EAAE,iBAAiB,EAAE,CAAC;CACnC;AAED,MAAM,WAAW,aAAa;IAC5B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,0DAA0D;IAC1D,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,4BAA4B;IAC5B,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AA2CD,6EAA6E;AAC7E,wBAAgB,IAAI,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAWzC;AAYD;;;;;;;;;;GAUG;AACH,qBAAa,UAAU;IAErB,OAAO,CAAC,KAAK,CAA4B;IACzC,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,SAAS,CAAS;IAE1B,sEAAsE;IACtE,OAAO,CAAC,YAAY,CAAkC;IACtD,0DAA0D;IAC1D,OAAO,CAAC,UAAU,CAAkC;IACpD,yEAAyE;IACzE,OAAO,CAAC,YAAY,CAAkC;IACtD,kEAAkE;IAClE,OAAO,CAAC,QAAQ,CAA6B;IAC7C,2DAA2D;IAC3D,OAAO,CAAC,SAAS,CAAK;IAEtB,0DAA0D;IAC1D,OAAO,CAAC,UAAU,CAAmE;IACrF,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAiB;IAC9C,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAO;gBAE1B,YAAY,SAAsB;IAI9C;;OAEG;IACG,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;IAkB3B,OAAO,CAAC,YAAY;IAIpB,OAAO,CAAC,YAAY;IA8EpB,OAAO,CAAC,SAAS;IAUjB,OAAO,CAAC,QAAQ;IAWhB;;;;;OAKG;IACH,WAAW,CACT,KAAK,EAAE,MAAM,EACb,OAAO,GAAE,aAAkB,GAC1B,iBAAiB,EAAE;IA0GtB;;OAEG;IACH,iBAAiB,CAAC,OAAO,EAAE,MAAM,EAAE,KAAK,SAAK,GAAG,iBAAiB,EAAE;IAkBnE;;OAEG;IACH,WAAW,CAAC,SAAS,EAAE,MAAM,EAAE,KAAK,SAAI,GAAG,iBAAiB,EAAE;IAkD9D;;OAEG;IACH,eAAe,CAAC,SAAS,EAAE,MAAM,GAAG,iBAAiB,EAAE;IAuBvD;;OAEG;IACH,aAAa,CAAC,OAAO,EAAE,MAAM,GAAG,iBAAiB,EAAE;IAqBnD;;OAEG;IACH,aAAa,CAAC,OAAO,EAAE,MAAM,EAAE,KAAK,SAAK,GAAG,iBAAiB,EAAE;IAuB/D;;OAEG;IACH,aAAa,CAAC,SAAS,EAAE,MAAM,GAAG,UAAU,GAAG,IAAI;IAmEnD;;OAEG;IACH,WAAW,IAAI,iBAAiB,EAAE;IAkBlC;;OAEG;IACH,cAAc,IAAI,KAAK,CAAC;QAAE,SAAS,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAA;KAAE,CAAC;IAmBhE;;OAEG;IACH,YAAY,IAAI,KAAK,CAAC;QAAE,OAAO,EAAE,MAAM,CAAC;QAAC,WAAW,EAAE,MAAM,CAAA;KAAE,CAAC;IAmB/D,6BAA6B;IAC7B,QAAQ,IAAI;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE;CAI7C"}
|
|
@@ -6,9 +6,81 @@ import pkg from "graphology";
|
|
|
6
6
|
const DirectedGraph = pkg.DirectedGraph || pkg;
|
|
7
7
|
import { createChildLogger } from "./logger.js";
|
|
8
8
|
const log = createChildLogger("utils:graph-query");
|
|
9
|
+
// ─── Lightweight suffix-stripping stemmer ─────────────────────────
|
|
10
|
+
const SUFFIX_RULES = [
|
|
11
|
+
[/ational$/, "ate"],
|
|
12
|
+
[/tional$/, "tion"],
|
|
13
|
+
[/ization$/, "ize"],
|
|
14
|
+
[/fulness$/, "ful"],
|
|
15
|
+
[/ousness$/, "ous"],
|
|
16
|
+
[/iveness$/, "ive"],
|
|
17
|
+
[/ibilities$/, "ible"],
|
|
18
|
+
[/ically$/, "ic"],
|
|
19
|
+
[/ating$/, "ate"],
|
|
20
|
+
[/ising$/, "ise"],
|
|
21
|
+
[/izing$/, "ize"],
|
|
22
|
+
[/abling$/, "able"],
|
|
23
|
+
[/ement$/, ""],
|
|
24
|
+
[/ment$/, ""],
|
|
25
|
+
[/tion$/, "t"],
|
|
26
|
+
[/sion$/, "s"],
|
|
27
|
+
[/ness$/, ""],
|
|
28
|
+
[/able$/, ""],
|
|
29
|
+
[/ible$/, ""],
|
|
30
|
+
[/ment$/, ""],
|
|
31
|
+
[/ious$/, ""],
|
|
32
|
+
[/eous$/, ""],
|
|
33
|
+
[/ous$/, ""],
|
|
34
|
+
[/ive$/, ""],
|
|
35
|
+
[/ful$/, ""],
|
|
36
|
+
[/ing$/, ""],
|
|
37
|
+
[/ies$/, "y"],
|
|
38
|
+
[/ied$/, "y"],
|
|
39
|
+
[/ion$/, ""],
|
|
40
|
+
[/ers$/, ""],
|
|
41
|
+
[/est$/, ""],
|
|
42
|
+
[/ely$/, ""],
|
|
43
|
+
[/ed$/, ""],
|
|
44
|
+
[/er$/, ""],
|
|
45
|
+
[/ly$/, ""],
|
|
46
|
+
[/es$/, ""],
|
|
47
|
+
[/s$/, ""],
|
|
48
|
+
];
|
|
49
|
+
/** Simple, dependency-free stemmer optimised for technical documentation. */
|
|
50
|
+
export function stem(word) {
|
|
51
|
+
if (word.length < 4)
|
|
52
|
+
return word;
|
|
53
|
+
const lower = word.toLowerCase();
|
|
54
|
+
for (const [re, replacement] of SUFFIX_RULES) {
|
|
55
|
+
if (re.test(lower)) {
|
|
56
|
+
const stemmed = lower.replace(re, replacement);
|
|
57
|
+
// Only keep stems ≥ 3 chars to avoid over-stripping
|
|
58
|
+
if (stemmed.length >= 3)
|
|
59
|
+
return stemmed;
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
return lower;
|
|
63
|
+
}
|
|
64
|
+
// ─── Trigram helpers ──────────────────────────────────────────────
|
|
65
|
+
function trigrams(word) {
|
|
66
|
+
if (word.length < 3)
|
|
67
|
+
return [word];
|
|
68
|
+
const tris = [];
|
|
69
|
+
for (let i = 0; i <= word.length - 3; i++) {
|
|
70
|
+
tris.push(word.slice(i, i + 3));
|
|
71
|
+
}
|
|
72
|
+
return tris;
|
|
73
|
+
}
|
|
9
74
|
/**
|
|
10
75
|
* GraphQuery provides fast, in-memory traversal of the SF Knowledge Graph.
|
|
11
76
|
* Load once, query many times.
|
|
77
|
+
*
|
|
78
|
+
* Indexing strategy (v2):
|
|
79
|
+
* 1. Stemmed label index — stem(word) → Set<nodeId>
|
|
80
|
+
* 2. Trigram index — 3-char substring → Set<stemmed word> (for fuzzy recall)
|
|
81
|
+
* 3. Keyword index — keyword → Set<nodeId> (from tagged_with edges)
|
|
82
|
+
* 4. IDF table — stem → log(N / df) (for TF-IDF scoring)
|
|
83
|
+
* 5. LRU query cache — query key → results
|
|
12
84
|
*/
|
|
13
85
|
export class GraphQuery {
|
|
14
86
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
@@ -17,8 +89,18 @@ export class GraphQuery {
|
|
|
17
89
|
graphPath;
|
|
18
90
|
/** Inverted keyword index: lowercase keyword → Set of doc node IDs */
|
|
19
91
|
keywordIndex = new Map();
|
|
20
|
-
/**
|
|
92
|
+
/** Stemmed label index: stemmed word → Set of node IDs */
|
|
21
93
|
labelIndex = new Map();
|
|
94
|
+
/** Trigram index: trigram → Set of stemmed words (for fuzzy matching) */
|
|
95
|
+
trigramIndex = new Map();
|
|
96
|
+
/** IDF table: stemmed word → inverse-document-frequency weight */
|
|
97
|
+
idfTable = new Map();
|
|
98
|
+
/** Total number of document nodes (for IDF calculation) */
|
|
99
|
+
totalDocs = 0;
|
|
100
|
+
/** LRU query cache: cache key → { results, timestamp } */
|
|
101
|
+
queryCache = new Map();
|
|
102
|
+
CACHE_TTL_MS = 5 * 60 * 1000; // 5 minutes
|
|
103
|
+
CACHE_MAX_SIZE = 200;
|
|
22
104
|
constructor(knowledgeDir = "knowledge/current") {
|
|
23
105
|
this.graphPath = path.join(knowledgeDir, "graph.json");
|
|
24
106
|
}
|
|
@@ -42,7 +124,8 @@ export class GraphQuery {
|
|
|
42
124
|
throw new Error("Graph not loaded. Call load() first.");
|
|
43
125
|
}
|
|
44
126
|
buildIndices() {
|
|
45
|
-
// Build keyword → docs index
|
|
127
|
+
// Build keyword → docs index from tagged_with edges
|
|
128
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
46
129
|
this.graph.forEachEdge((_, attrs, source, target) => {
|
|
47
130
|
if (attrs.type === "tagged_with") {
|
|
48
131
|
const targetAttrs = this.graph.getNodeAttributes(target);
|
|
@@ -55,67 +138,164 @@ export class GraphQuery {
|
|
|
55
138
|
}
|
|
56
139
|
}
|
|
57
140
|
});
|
|
58
|
-
// Build label
|
|
141
|
+
// Build stemmed label index + trigram index + IDF table
|
|
142
|
+
// Track document frequency (how many DOCUMENT nodes each stem appears in)
|
|
143
|
+
const docFrequency = new Map();
|
|
144
|
+
let docCount = 0;
|
|
145
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
59
146
|
this.graph.forEachNode((nodeId, attrs) => {
|
|
60
147
|
const label = attrs.label?.toLowerCase();
|
|
61
148
|
if (!label)
|
|
62
149
|
return;
|
|
63
|
-
const
|
|
150
|
+
const isDoc = attrs.type === "document";
|
|
151
|
+
if (isDoc)
|
|
152
|
+
docCount++;
|
|
153
|
+
const words = label.split(/[\s_\-.]+/);
|
|
154
|
+
const seenStems = new Set();
|
|
64
155
|
for (const word of words) {
|
|
65
156
|
if (word.length < 3)
|
|
66
157
|
continue;
|
|
67
|
-
|
|
68
|
-
|
|
158
|
+
const stemmed = stem(word);
|
|
159
|
+
seenStems.add(stemmed);
|
|
160
|
+
// Stemmed label index
|
|
161
|
+
if (!this.labelIndex.has(stemmed)) {
|
|
162
|
+
this.labelIndex.set(stemmed, new Set());
|
|
163
|
+
}
|
|
164
|
+
this.labelIndex.get(stemmed).add(nodeId);
|
|
165
|
+
// Trigram index: map each trigram to the stemmed word
|
|
166
|
+
for (const tri of trigrams(stemmed)) {
|
|
167
|
+
if (!this.trigramIndex.has(tri)) {
|
|
168
|
+
this.trigramIndex.set(tri, new Set());
|
|
169
|
+
}
|
|
170
|
+
this.trigramIndex.get(tri).add(stemmed);
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
// Track DF for document nodes only
|
|
174
|
+
if (isDoc) {
|
|
175
|
+
for (const s of seenStems) {
|
|
176
|
+
docFrequency.set(s, (docFrequency.get(s) || 0) + 1);
|
|
69
177
|
}
|
|
70
|
-
this.labelIndex.get(word).add(nodeId);
|
|
71
178
|
}
|
|
72
179
|
});
|
|
180
|
+
// Compute IDF: log(N / df) for each stem
|
|
181
|
+
this.totalDocs = docCount;
|
|
182
|
+
for (const [stemmedWord, df] of docFrequency) {
|
|
183
|
+
this.idfTable.set(stemmedWord, Math.log(docCount / df));
|
|
184
|
+
}
|
|
185
|
+
log.info({
|
|
186
|
+
labelTerms: this.labelIndex.size,
|
|
187
|
+
trigrams: this.trigramIndex.size,
|
|
188
|
+
keywords: this.keywordIndex.size,
|
|
189
|
+
docs: docCount,
|
|
190
|
+
}, "Search indices built (stemmed + trigram + IDF)");
|
|
191
|
+
}
|
|
192
|
+
// ─── Cache helpers ──────────────────────────────────────────────
|
|
193
|
+
getCached(key) {
|
|
194
|
+
const entry = this.queryCache.get(key);
|
|
195
|
+
if (!entry)
|
|
196
|
+
return null;
|
|
197
|
+
if (Date.now() - entry.ts > this.CACHE_TTL_MS) {
|
|
198
|
+
this.queryCache.delete(key);
|
|
199
|
+
return null;
|
|
200
|
+
}
|
|
201
|
+
return entry.results;
|
|
202
|
+
}
|
|
203
|
+
setCache(key, results) {
|
|
204
|
+
// Evict oldest if at capacity
|
|
205
|
+
if (this.queryCache.size >= this.CACHE_MAX_SIZE) {
|
|
206
|
+
const oldest = this.queryCache.keys().next().value;
|
|
207
|
+
if (oldest !== undefined)
|
|
208
|
+
this.queryCache.delete(oldest);
|
|
209
|
+
}
|
|
210
|
+
this.queryCache.set(key, { results, ts: Date.now() });
|
|
73
211
|
}
|
|
74
212
|
// ─── Query Methods ─────────────────────────────────────────────
|
|
75
213
|
/**
|
|
76
214
|
* Search across all node labels for a query string.
|
|
77
|
-
*
|
|
215
|
+
*
|
|
216
|
+
* Uses stemming + trigram fuzzy matching + TF-IDF scoring.
|
|
217
|
+
* Supports optional pre-filters for domain and docType.
|
|
78
218
|
*/
|
|
79
219
|
searchNodes(query, options = {}) {
|
|
80
220
|
this.ensureLoaded();
|
|
81
|
-
const { type, limit = 25 } = options;
|
|
82
|
-
//
|
|
83
|
-
const
|
|
84
|
-
|
|
85
|
-
|
|
221
|
+
const { type, limit = 25, domain, docType } = options;
|
|
222
|
+
// Check cache
|
|
223
|
+
const cacheKey = `search:${query}|${type || ""}|${domain || ""}|${docType || ""}|${limit}`;
|
|
224
|
+
const cached = this.getCached(cacheKey);
|
|
225
|
+
if (cached)
|
|
226
|
+
return cached;
|
|
227
|
+
// Tokenise and stem the query
|
|
228
|
+
const rawTerms = query.toLowerCase()
|
|
229
|
+
.replace(/[._]/g, " ")
|
|
230
|
+
.replace(/([a-z])([A-Z])/g, "$1 $2")
|
|
86
231
|
.split(/\s+/)
|
|
87
232
|
.filter((t) => t.length >= 3);
|
|
88
|
-
if (
|
|
233
|
+
if (rawTerms.length === 0)
|
|
89
234
|
return [];
|
|
90
|
-
|
|
235
|
+
const stemmedTerms = rawTerms.map(stem);
|
|
236
|
+
// Collect candidate nodes via stemmed label index
|
|
237
|
+
// For each stemmed term, find exact stem matches + trigram-expanded fuzzy matches
|
|
91
238
|
const candidates = new Map();
|
|
92
|
-
for (const
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
239
|
+
for (const st of stemmedTerms) {
|
|
240
|
+
const matchedStems = new Set();
|
|
241
|
+
// Exact stem match
|
|
242
|
+
if (this.labelIndex.has(st)) {
|
|
243
|
+
matchedStems.add(st);
|
|
244
|
+
}
|
|
245
|
+
// Trigram fuzzy expansion: find other stems that share trigrams
|
|
246
|
+
const queryTrigrams = trigrams(st);
|
|
247
|
+
const trigramHits = new Map();
|
|
248
|
+
for (const tri of queryTrigrams) {
|
|
249
|
+
const stems = this.trigramIndex.get(tri);
|
|
250
|
+
if (stems) {
|
|
251
|
+
for (const s of stems) {
|
|
252
|
+
trigramHits.set(s, (trigramHits.get(s) || 0) + 1);
|
|
102
253
|
}
|
|
103
254
|
}
|
|
104
255
|
}
|
|
256
|
+
// Accept stems with ≥60% trigram overlap
|
|
257
|
+
const threshold = Math.max(1, Math.floor(queryTrigrams.length * 0.6));
|
|
258
|
+
for (const [s, count] of trigramHits) {
|
|
259
|
+
if (count >= threshold)
|
|
260
|
+
matchedStems.add(s);
|
|
261
|
+
}
|
|
262
|
+
// Collect node IDs from matched stems
|
|
263
|
+
for (const ms of matchedStems) {
|
|
264
|
+
const nodeIds = this.labelIndex.get(ms);
|
|
265
|
+
if (!nodeIds)
|
|
266
|
+
continue;
|
|
267
|
+
// Weight by IDF of the matched stem
|
|
268
|
+
const idf = this.idfTable.get(ms) || 1.0;
|
|
269
|
+
for (const nodeId of nodeIds) {
|
|
270
|
+
candidates.set(nodeId, (candidates.get(nodeId) || 0) + idf);
|
|
271
|
+
}
|
|
272
|
+
}
|
|
105
273
|
}
|
|
106
|
-
// Score and
|
|
274
|
+
// Score, filter, and rank
|
|
107
275
|
const results = [];
|
|
108
|
-
|
|
276
|
+
const queryLower = query.toLowerCase();
|
|
277
|
+
for (const [nodeId, tfidfScore] of candidates) {
|
|
109
278
|
const attrs = this.graph.getNodeAttributes(nodeId);
|
|
279
|
+
// Pre-filter by type
|
|
110
280
|
if (type && attrs.type !== type)
|
|
111
281
|
continue;
|
|
282
|
+
// Pre-filter by domain (node IDs are doc:<domain>:<topic>)
|
|
283
|
+
if (domain && !nodeId.startsWith(`doc:${domain}:`))
|
|
284
|
+
continue;
|
|
285
|
+
// Pre-filter by docType
|
|
286
|
+
if (docType && attrs.docType !== docType)
|
|
287
|
+
continue;
|
|
112
288
|
const label = attrs.label?.toLowerCase() || "";
|
|
113
|
-
//
|
|
114
|
-
let score =
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
289
|
+
// Compute final score: TF-IDF base + bonuses
|
|
290
|
+
let score = tfidfScore / stemmedTerms.length; // normalize by query length
|
|
291
|
+
// Exact match bonus
|
|
292
|
+
if (label === queryLower)
|
|
293
|
+
score += 10;
|
|
294
|
+
else if (label.startsWith(queryLower))
|
|
295
|
+
score += 3;
|
|
296
|
+
// Contains full query bonus
|
|
297
|
+
else if (label.includes(queryLower))
|
|
298
|
+
score += 1.5;
|
|
119
299
|
results.push({
|
|
120
300
|
nodeId,
|
|
121
301
|
type: attrs.type,
|
|
@@ -125,10 +305,12 @@ export class GraphQuery {
|
|
|
125
305
|
score,
|
|
126
306
|
});
|
|
127
307
|
}
|
|
128
|
-
|
|
129
|
-
.filter((r) => (r.score || 0) >= 0.3)
|
|
308
|
+
const sorted = results
|
|
309
|
+
.filter((r) => (r.score || 0) >= 0.3)
|
|
130
310
|
.sort((a, b) => (b.score || 0) - (a.score || 0))
|
|
131
311
|
.slice(0, limit);
|
|
312
|
+
this.setCache(cacheKey, sorted);
|
|
313
|
+
return sorted;
|
|
132
314
|
}
|
|
133
315
|
/**
|
|
134
316
|
* Find documents tagged with a specific keyword.
|
|
@@ -164,6 +346,7 @@ export class GraphQuery {
|
|
|
164
346
|
const nextFrontier = [];
|
|
165
347
|
for (const nodeId of frontier) {
|
|
166
348
|
// Outgoing reference edges
|
|
349
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
167
350
|
this.graph.forEachOutEdge(nodeId, (_, attrs, _src, target) => {
|
|
168
351
|
if (attrs.type === "references" && !visited.has(target)) {
|
|
169
352
|
visited.add(target);
|
|
@@ -179,6 +362,7 @@ export class GraphQuery {
|
|
|
179
362
|
}
|
|
180
363
|
});
|
|
181
364
|
// Incoming reference edges
|
|
365
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
182
366
|
this.graph.forEachInEdge(nodeId, (_, attrs, source) => {
|
|
183
367
|
if (attrs.type === "references" && !visited.has(source)) {
|
|
184
368
|
visited.add(source);
|
|
@@ -207,6 +391,7 @@ export class GraphQuery {
|
|
|
207
391
|
if (!this.graph.hasNode(nsNodeId))
|
|
208
392
|
return [];
|
|
209
393
|
const results = [];
|
|
394
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
210
395
|
this.graph.forEachInEdge(nsNodeId, (_, attrs, source) => {
|
|
211
396
|
if (attrs.type === "belongs_to_namespace") {
|
|
212
397
|
const sourceAttrs = this.graph.getNodeAttributes(source);
|
|
@@ -230,6 +415,7 @@ export class GraphQuery {
|
|
|
230
415
|
if (!this.graph.hasNode(serviceNodeId))
|
|
231
416
|
return [];
|
|
232
417
|
const results = [];
|
|
418
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
233
419
|
this.graph.forEachInEdge(serviceNodeId, (_, attrs, source) => {
|
|
234
420
|
if (attrs.type === "belongs_to_service") {
|
|
235
421
|
const sourceAttrs = this.graph.getNodeAttributes(source);
|
|
@@ -251,6 +437,7 @@ export class GraphQuery {
|
|
|
251
437
|
if (!this.graph.hasNode(dtNodeId))
|
|
252
438
|
return [];
|
|
253
439
|
const results = [];
|
|
440
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
254
441
|
this.graph.forEachInEdge(dtNodeId, (_, attrs, source) => {
|
|
255
442
|
if (attrs.type === "is_type" && results.length < limit) {
|
|
256
443
|
const sourceAttrs = this.graph.getNodeAttributes(source);
|
|
@@ -279,6 +466,7 @@ export class GraphQuery {
|
|
|
279
466
|
const references = [];
|
|
280
467
|
const referencedBy = [];
|
|
281
468
|
// Traverse outgoing edges
|
|
469
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
282
470
|
this.graph.forEachOutEdge(docNodeId, (_, edgeAttrs, _src, target) => {
|
|
283
471
|
const targetAttrs = this.graph.getNodeAttributes(target);
|
|
284
472
|
switch (edgeAttrs.type) {
|
|
@@ -300,6 +488,7 @@ export class GraphQuery {
|
|
|
300
488
|
}
|
|
301
489
|
});
|
|
302
490
|
// Traverse incoming edges
|
|
491
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
303
492
|
this.graph.forEachInEdge(docNodeId, (_, edgeAttrs, source) => {
|
|
304
493
|
const sourceAttrs = this.graph.getNodeAttributes(source);
|
|
305
494
|
switch (edgeAttrs.type) {
|
|
@@ -335,6 +524,7 @@ export class GraphQuery {
|
|
|
335
524
|
listDomains() {
|
|
336
525
|
this.ensureLoaded();
|
|
337
526
|
const results = [];
|
|
527
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
338
528
|
this.graph.forEachNode((nodeId, attrs) => {
|
|
339
529
|
if (attrs.type === "domain") {
|
|
340
530
|
results.push({
|
|
@@ -352,9 +542,11 @@ export class GraphQuery {
|
|
|
352
542
|
listNamespaces() {
|
|
353
543
|
this.ensureLoaded();
|
|
354
544
|
const results = [];
|
|
545
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
355
546
|
this.graph.forEachNode((nodeId, attrs) => {
|
|
356
547
|
if (attrs.type === "namespace") {
|
|
357
548
|
let docCount = 0;
|
|
549
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
358
550
|
this.graph.forEachInEdge(nodeId, (_, edgeAttrs) => {
|
|
359
551
|
if (edgeAttrs.type === "belongs_to_namespace")
|
|
360
552
|
docCount++;
|
|
@@ -370,9 +562,11 @@ export class GraphQuery {
|
|
|
370
562
|
listServices() {
|
|
371
563
|
this.ensureLoaded();
|
|
372
564
|
const results = [];
|
|
565
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
373
566
|
this.graph.forEachNode((nodeId, attrs) => {
|
|
374
567
|
if (attrs.type === "service") {
|
|
375
568
|
let domainCount = 0;
|
|
569
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
376
570
|
this.graph.forEachInEdge(nodeId, (_, edgeAttrs) => {
|
|
377
571
|
if (edgeAttrs.type === "belongs_to_service")
|
|
378
572
|
domainCount++;
|