@sfdxy/sf-documentation-knowledge 1.1.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +19 -3
- package/dist/cli/collect.js +5 -1
- package/dist/cli/collect.js.map +1 -1
- package/dist/cli/data-quality.d.ts +3 -0
- package/dist/cli/data-quality.d.ts.map +1 -0
- package/dist/cli/data-quality.js +213 -0
- package/dist/cli/data-quality.js.map +1 -0
- package/dist/cli/discover.js +5 -1
- package/dist/cli/discover.js.map +1 -1
- package/dist/mcp/server.js +317 -21
- package/dist/mcp/server.js.map +1 -1
- package/dist/tests/retrieval-quality.test.d.ts +2 -0
- package/dist/tests/retrieval-quality.test.d.ts.map +1 -0
- package/dist/tests/retrieval-quality.test.js +161 -0
- package/dist/tests/retrieval-quality.test.js.map +1 -0
- package/dist/utils/graph-query.d.ts +34 -6
- package/dist/utils/graph-query.d.ts.map +1 -1
- package/dist/utils/graph-query.js +231 -32
- package/dist/utils/graph-query.js.map +1 -1
- package/package.json +2 -1
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Retrieval Quality Tests — validate that known-good queries
|
|
3
|
+
* return expected documents in the top results.
|
|
4
|
+
*
|
|
5
|
+
* These tests load the real graph.json and exercise the full
|
|
6
|
+
* search pipeline (stemming, trigram matching, TF-IDF scoring).
|
|
7
|
+
*
|
|
8
|
+
* Run: npm test
|
|
9
|
+
*/
|
|
10
|
+
import { describe, it, expect, beforeAll } from "vitest";
|
|
11
|
+
import { GraphQuery, stem } from "../utils/graph-query.js";
|
|
12
|
+
import { CodeIndex } from "../mcp/code-index.js";
|
|
13
|
+
const KNOWLEDGE_DIR = "knowledge/current";
|
|
14
|
+
// ─── Unit tests for the stemmer ─────────────────────────────────
|
|
15
|
+
describe("stem()", () => {
|
|
16
|
+
it("strips -ing suffix", () => {
|
|
17
|
+
expect(stem("configuring")).toBe("configur");
|
|
18
|
+
});
|
|
19
|
+
it("strips -tion suffix", () => {
|
|
20
|
+
expect(stem("configuration")).toBe("configurat");
|
|
21
|
+
});
|
|
22
|
+
it("strips -ed suffix", () => {
|
|
23
|
+
expect(stem("queried")).toBe("query");
|
|
24
|
+
});
|
|
25
|
+
it("strips -es suffix", () => {
|
|
26
|
+
expect(stem("queries")).toBe("query");
|
|
27
|
+
});
|
|
28
|
+
it("strips -s suffix", () => {
|
|
29
|
+
expect(stem("objects")).toBe("object");
|
|
30
|
+
});
|
|
31
|
+
it("preserves short words", () => {
|
|
32
|
+
expect(stem("api")).toBe("api");
|
|
33
|
+
expect(stem("dml")).toBe("dml");
|
|
34
|
+
});
|
|
35
|
+
it("strips -ment suffix", () => {
|
|
36
|
+
expect(stem("deployment")).toBe("deploy");
|
|
37
|
+
});
|
|
38
|
+
it("strips -able suffix", () => {
|
|
39
|
+
expect(stem("batchable")).toBe("batch");
|
|
40
|
+
});
|
|
41
|
+
});
|
|
42
|
+
// ─── Integration tests: graph search retrieval quality ──────────
|
|
43
|
+
describe("GraphQuery retrieval quality", () => {
|
|
44
|
+
let gq;
|
|
45
|
+
beforeAll(async () => {
|
|
46
|
+
gq = new GraphQuery(KNOWLEDGE_DIR);
|
|
47
|
+
await gq.load();
|
|
48
|
+
}, 60_000); // 60s timeout for loading 77MB graph
|
|
49
|
+
/**
|
|
50
|
+
* Test helper: asserts that at least one result's nodeId
|
|
51
|
+
* matches the provided pattern.
|
|
52
|
+
*/
|
|
53
|
+
function expectResultMatching(results, pattern, query) {
|
|
54
|
+
const match = results.find((r) => pattern.test(r.nodeId));
|
|
55
|
+
expect(match, `Expected query "${query}" to return a result matching ${pattern}. Got: ${results.slice(0, 5).map((r) => r.nodeId).join(", ")}`).toBeDefined();
|
|
56
|
+
}
|
|
57
|
+
it("finds Batch Apex documentation", () => {
|
|
58
|
+
const results = gq.searchNodes("batch apex", { type: "document", limit: 10 });
|
|
59
|
+
expect(results.length).toBeGreaterThan(0);
|
|
60
|
+
expectResultMatching(results, /doc:apex-guide:.*batch/i, "batch apex");
|
|
61
|
+
});
|
|
62
|
+
it("finds SOQL documentation", () => {
|
|
63
|
+
const results = gq.searchNodes("SOQL queries", { type: "document", limit: 10 });
|
|
64
|
+
expect(results.length).toBeGreaterThan(0);
|
|
65
|
+
expectResultMatching(results, /doc:(soql-sosl|apex-guide):/, "SOQL queries");
|
|
66
|
+
});
|
|
67
|
+
it("finds REST API docs", () => {
|
|
68
|
+
const results = gq.searchNodes("REST API sobject describe", { type: "document", limit: 15, domain: "rest-api" });
|
|
69
|
+
expect(results.length).toBeGreaterThan(0);
|
|
70
|
+
// All results must be from rest-api since we pre-filter
|
|
71
|
+
for (const r of results) {
|
|
72
|
+
expect(r.nodeId).toMatch(/^doc:rest-api:/);
|
|
73
|
+
}
|
|
74
|
+
});
|
|
75
|
+
it("finds Platform Events docs", () => {
|
|
76
|
+
const results = gq.searchNodes("platform events publish", { type: "document", limit: 10 });
|
|
77
|
+
expect(results.length).toBeGreaterThan(0);
|
|
78
|
+
expectResultMatching(results, /doc:platform-events:/, "platform events publish");
|
|
79
|
+
});
|
|
80
|
+
it("domain pre-filter works", () => {
|
|
81
|
+
const results = gq.searchNodes("trigger", { type: "document", limit: 10, domain: "apex-guide" });
|
|
82
|
+
expect(results.length).toBeGreaterThan(0);
|
|
83
|
+
// All results must be in the apex-guide domain
|
|
84
|
+
for (const r of results) {
|
|
85
|
+
expect(r.nodeId).toMatch(/^doc:apex-guide:/);
|
|
86
|
+
}
|
|
87
|
+
});
|
|
88
|
+
it("finds LWC documentation", () => {
|
|
89
|
+
const results = gq.searchNodes("lightning web components", { type: "document", limit: 10 });
|
|
90
|
+
expect(results.length).toBeGreaterThan(0);
|
|
91
|
+
expectResultMatching(results, /doc:lwc:/, "lightning web components");
|
|
92
|
+
});
|
|
93
|
+
it("finds Metadata API docs", () => {
|
|
94
|
+
const results = gq.searchNodes("metadata api deploy", { type: "document", limit: 10 });
|
|
95
|
+
expect(results.length).toBeGreaterThan(0);
|
|
96
|
+
expectResultMatching(results, /doc:metadata-api:/, "metadata api deploy");
|
|
97
|
+
});
|
|
98
|
+
it("keyword search works", () => {
|
|
99
|
+
const results = gq.findDocsByKeyword("Apex", 10);
|
|
100
|
+
expect(results.length).toBeGreaterThan(0);
|
|
101
|
+
});
|
|
102
|
+
it("returns empty array for nonsense query", () => {
|
|
103
|
+
const results = gq.searchNodes("xyzzy_foobar_gibberish", { type: "document", limit: 5 });
|
|
104
|
+
expect(results.length).toBe(0);
|
|
105
|
+
});
|
|
106
|
+
it("stemming improves recall: 'configuring' matches 'configuration' docs", () => {
|
|
107
|
+
const stemmedResults = gq.searchNodes("configuring triggers", { type: "document", limit: 15 });
|
|
108
|
+
// Should find trigger-related docs even though query used "configuring" not exact match
|
|
109
|
+
expect(stemmedResults.length).toBeGreaterThan(0);
|
|
110
|
+
});
|
|
111
|
+
it("graph context returns valid doc context", () => {
|
|
112
|
+
// Find a real doc nodeId first
|
|
113
|
+
const results = gq.searchNodes("batch apex", { type: "document", limit: 1 });
|
|
114
|
+
expect(results.length).toBeGreaterThan(0);
|
|
115
|
+
const ctx = gq.getDocContext(results[0].nodeId);
|
|
116
|
+
expect(ctx).not.toBeNull();
|
|
117
|
+
expect(ctx.label).toBeTruthy();
|
|
118
|
+
expect(ctx.keywords).toBeInstanceOf(Array);
|
|
119
|
+
});
|
|
120
|
+
it("namespace listing returns Apex namespaces", () => {
|
|
121
|
+
const ns = gq.listNamespaces();
|
|
122
|
+
expect(ns.length).toBeGreaterThan(0);
|
|
123
|
+
const systemNs = ns.find((n) => n.namespace === "System");
|
|
124
|
+
expect(systemNs).toBeDefined();
|
|
125
|
+
});
|
|
126
|
+
it("domain listing returns 100+ domains", () => {
|
|
127
|
+
const domains = gq.listDomains();
|
|
128
|
+
expect(domains.length).toBeGreaterThanOrEqual(100);
|
|
129
|
+
});
|
|
130
|
+
});
|
|
131
|
+
// ─── CodeIndex retrieval quality ────────────────────────────────
|
|
132
|
+
describe("CodeIndex retrieval quality", () => {
|
|
133
|
+
let codeIndex;
|
|
134
|
+
beforeAll(async () => {
|
|
135
|
+
codeIndex = new CodeIndex(KNOWLEDGE_DIR);
|
|
136
|
+
await codeIndex.load();
|
|
137
|
+
}, 120_000); // 2 min — reads all 33k files
|
|
138
|
+
it("finds Apex code examples", () => {
|
|
139
|
+
const snippets = codeIndex.search("batch apex", { limit: 5 });
|
|
140
|
+
expect(snippets.length).toBeGreaterThan(0);
|
|
141
|
+
expect(snippets[0].code.length).toBeGreaterThan(10);
|
|
142
|
+
});
|
|
143
|
+
it("language filter works", () => {
|
|
144
|
+
const snippets = codeIndex.search("query", { language: "soql", limit: 5 });
|
|
145
|
+
for (const s of snippets) {
|
|
146
|
+
expect(s.language).toBe("soql");
|
|
147
|
+
}
|
|
148
|
+
});
|
|
149
|
+
it("domain filter works", () => {
|
|
150
|
+
const snippets = codeIndex.search("apex", { domain: "apex-guide", limit: 5 });
|
|
151
|
+
for (const s of snippets) {
|
|
152
|
+
expect(s.domain).toBe("apex-guide");
|
|
153
|
+
}
|
|
154
|
+
});
|
|
155
|
+
it("returns stats with language breakdown", () => {
|
|
156
|
+
const stats = codeIndex.getStats();
|
|
157
|
+
expect(stats.totalSnippets).toBeGreaterThan(100);
|
|
158
|
+
expect(Object.keys(stats.languages).length).toBeGreaterThanOrEqual(2);
|
|
159
|
+
});
|
|
160
|
+
});
|
|
161
|
+
//# sourceMappingURL=retrieval-quality.test.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"retrieval-quality.test.js","sourceRoot":"","sources":["../../src/tests/retrieval-quality.test.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AACH,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAC;AACzD,OAAO,EAAE,UAAU,EAAE,IAAI,EAAE,MAAM,yBAAyB,CAAC;AAC3D,OAAO,EAAE,SAAS,EAAE,MAAM,sBAAsB,CAAC;AAEjD,MAAM,aAAa,GAAG,mBAAmB,CAAC;AAE1C,mEAAmE;AACnE,QAAQ,CAAC,QAAQ,EAAE,GAAG,EAAE;IACtB,EAAE,CAAC,oBAAoB,EAAE,GAAG,EAAE;QAC5B,MAAM,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;IAC/C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,qBAAqB,EAAE,GAAG,EAAE;QAC7B,MAAM,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;IACnD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,mBAAmB,EAAE,GAAG,EAAE;QAC3B,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IACxC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,mBAAmB,EAAE,GAAG,EAAE;QAC3B,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IACxC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,kBAAkB,EAAE,GAAG,EAAE;QAC1B,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IACzC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,uBAAuB,EAAE,GAAG,EAAE;QAC/B,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAChC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAClC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,qBAAqB,EAAE,GAAG,EAAE;QAC7B,MAAM,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IAC5C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,qBAAqB,EAAE,GAAG,EAAE;QAC7B,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IAC1C,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,mEAAmE;AACnE,QAAQ,CAAC,8BAA8B,EAAE,GAAG,EAAE;IAC5C,IAAI,EAAc,CAAC;IAEnB,SAAS,CAAC,KAAK,IAAI,EAAE;QACnB,EAAE,GAAG,IAAI,UAAU,CAAC,aAAa,CAAC,CAAC;QACnC,MAAM,EAAE,CAAC,IAAI,EAAE,CAAC;IAClB,CAAC,EAAE,MAAM,CAAC,CAAC,CAAC,qCAAqC;IAEjD;;;OAGG;IACH,SAAS,oBAAoB,CAC3B,OAAkC,EAClC,OAAe,EACf,KAAa;QAEb,MAAM,KAAK,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC;QAC1D,MAAM,CACJ,KAAK,EACL,mBAAmB,KAAK,iCAAiC,OAAO,UAAU,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAChI,CAAC,WAAW,EAAE,CAAC;IAClB,CAAC;IAED,EAAE,CAAC,gCAAgC,EAAE,GAAG,EAAE;QACxC,MAAM,OAAO,GAAG,EAAE,CAAC,WAAW,CAAC,YAAY,EAAE,EAAE,IAAI,EAAE,UAAU,EAAE,KAAK,EAAE,EAAE,EAAE,CAAC,CAAC;QAC9E,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;QAC1C,oBAAoB,CAAC,OAAO,EAAE,yBAAyB,EAAE,YAAY,CAAC,CAAC;IACzE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,0BAA0B,EAAE,GAAG,EAAE;QAClC,MAAM,OAAO,GAAG,EAAE,CAAC,WAAW,CAAC,cAAc,EAAE,EAAE,IAAI,EAAE,UAAU,EAAE,KAAK,EAAE,EAAE,EAAE,CAAC,CAAC;QAChF,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;QAC1C,oBAAoB,CAAC,OAAO,EAAE,6BAA6B,EAAE,cAAc,CAAC,CAAC;IAC/E,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,qBAAqB,EAAE,GAAG,EAAE;QAC7B,MAAM,OAAO,GAAG,EAAE,CAAC,WAAW,CAAC,2BAA2B,EAAE,EAAE,IAAI,EAAE,UAAU,EAAE,KAAK,EAAE,EAAE,EAAE,MAAM,EAAE,UAAU,EAAE,CAAC,CAAC;QACjH,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;QAC1C,wDAAwD;QACxD,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;YACxB,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,gBAAgB,CAAC,CAAC;QAC7C,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,4BAA4B,EAAE,GAAG,EAAE;QACpC,MAAM,OAAO,GAAG,EAAE,CAAC,WAAW,CAAC,yBAAyB,EAAE,EAAE,IAAI,EAAE,UAAU,EAAE,KAAK,EAAE,EAAE,EAAE,CAAC,CAAC;QAC3F,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;QAC1C,oBAAoB,CAAC,OAAO,EAAE,sBAAsB,EAAE,yBAAyB,CAAC,CAAC;IACnF,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,yBAAyB,EAAE,GAAG,EAAE;QACjC,MAAM,OAAO,GAAG,EAAE,CAAC,WAAW,CAAC,SAAS,EAAE,EAAE,IAAI,EAAE,UAAU,EAAE,KAAK,EAAE,EAAE,EAAE,MAAM,EAAE,YAAY,EAAE,CAAC,CAAC;QACjG,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;QAC1C,+CAA+C;QAC/C,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;YACxB,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,kBAAkB,CAAC,CAAC;QAC/C,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,yBAAyB,EAAE,GAAG,EAAE;QACjC,MAAM,OAAO,GAAG,EAAE,CAAC,WAAW,CAAC,0BAA0B,EAAE,EAAE,IAAI,EAAE,UAAU,EAAE,KAAK,EAAE,EAAE,EAAE,CAAC,CAAC;QAC5F,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;QAC1C,oBAAoB,CAAC,OAAO,EAAE,UAAU,EAAE,0BAA0B,CAAC,CAAC;IACxE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,yBAAyB,EAAE,GAAG,EAAE;QACjC,MAAM,OAAO,GAAG,EAAE,CAAC,WAAW,CAAC,qBAAqB,EAAE,EAAE,IAAI,EAAE,UAAU,EAAE,KAAK,EAAE,EAAE,EAAE,CAAC,CAAC;QACvF,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;QAC1C,oBAAoB,CAAC,OAAO,EAAE,mBAAmB,EAAE,qBAAqB,CAAC,CAAC;IAC5E,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,sBAAsB,EAAE,GAAG,EAAE;QAC9B,MAAM,OAAO,GAAG,EAAE,CAAC,iBAAiB,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;QACjD,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;IAC5C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,wCAAwC,EAAE,GAAG,EAAE;QAChD,MAAM,OAAO,GAAG,EAAE,CAAC,WAAW,CAAC,wBAAwB,EAAE,EAAE,IAAI,EAAE,UAAU,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC,CAAC;QACzF,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACjC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,sEAAsE,EAAE,GAAG,EAAE;QAC9E,MAAM,cAAc,GAAG,EAAE,CAAC,WAAW,CAAC,sBAAsB,EAAE,EAAE,IAAI,EAAE,UAAU,EAAE,KAAK,EAAE,EAAE,EAAE,CAAC,CAAC;QAC/F,wFAAwF;QACxF,MAAM,CAAC,cAAc,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;IACnD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,yCAAyC,EAAE,GAAG,EAAE;QACjD,+BAA+B;QAC/B,MAAM,OAAO,GAAG,EAAE,CAAC,WAAW,CAAC,YAAY,EAAE,EAAE,IAAI,EAAE,UAAU,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC,CAAC;QAC7E,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;QAC1C,MAAM,GAAG,GAAG,EAAE,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;QAChD,MAAM,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC;QAC3B,MAAM,CAAC,GAAI,CAAC,KAAK,CAAC,CAAC,UAAU,EAAE,CAAC;QAChC,MAAM,CAAC,GAAI,CAAC,QAAQ,CAAC,CAAC,cAAc,CAAC,KAAK,CAAC,CAAC;IAC9C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,2CAA2C,EAAE,GAAG,EAAE;QACnD,MAAM,EAAE,GAAG,EAAE,CAAC,cAAc,EAAE,CAAC;QAC/B,MAAM,CAAC,EAAE,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;QACrC,MAAM,QAAQ,GAAG,EAAE,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,KAAK,QAAQ,CAAC,CAAC;QAC1D,MAAM,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC;IACjC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,qCAAqC,EAAE,GAAG,EAAE;QAC7C,MAAM,OAAO,GAAG,EAAE,CAAC,WAAW,EAAE,CAAC;QACjC,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,sBAAsB,CAAC,GAAG,CAAC,CAAC;IACrD,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,mEAAmE;AACnE,QAAQ,CAAC,6BAA6B,EAAE,GAAG,EAAE;IAC3C,IAAI,SAAoB,CAAC;IAEzB,SAAS,CAAC,KAAK,IAAI,EAAE;QACnB,SAAS,GAAG,IAAI,SAAS,CAAC,aAAa,CAAC,CAAC;QACzC,MAAM,SAAS,CAAC,IAAI,EAAE,CAAC;IACzB,CAAC,EAAE,OAAO,CAAC,CAAC,CAAC,8BAA8B;IAE3C,EAAE,CAAC,0BAA0B,EAAE,GAAG,EAAE;QAClC,MAAM,QAAQ,GAAG,SAAS,CAAC,MAAM,CAAC,YAAY,EAAE,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC,CAAC;QAC9D,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;QAC3C,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,eAAe,CAAC,EAAE,CAAC,CAAC;IACtD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,uBAAuB,EAAE,GAAG,EAAE;QAC/B,MAAM,QAAQ,GAAG,SAAS,CAAC,MAAM,CAAC,OAAO,EAAE,EAAE,QAAQ,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC,CAAC;QAC3E,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;YACzB,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAClC,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,qBAAqB,EAAE,GAAG,EAAE;QAC7B,MAAM,QAAQ,GAAG,SAAS,CAAC,MAAM,CAAC,MAAM,EAAE,EAAE,MAAM,EAAE,YAAY,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC,CAAC;QAC9E,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;YACzB,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;QACtC,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,uCAAuC,EAAE,GAAG,EAAE;QAC/C,MAAM,KAAK,GAAG,SAAS,CAAC,QAAQ,EAAE,CAAC;QACnC,MAAM,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC,eAAe,CAAC,GAAG,CAAC,CAAC;QACjD,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,MAAM,CAAC,CAAC,sBAAsB,CAAC,CAAC,CAAC,CAAC;IACxE,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|
|
@@ -17,9 +17,26 @@ export interface DocContext {
|
|
|
17
17
|
references: GraphSearchResult[];
|
|
18
18
|
referencedBy: GraphSearchResult[];
|
|
19
19
|
}
|
|
20
|
+
export interface SearchOptions {
|
|
21
|
+
type?: string;
|
|
22
|
+
limit?: number;
|
|
23
|
+
/** Pre-filter by domain prefix (e.g. "apex-reference") */
|
|
24
|
+
domain?: string;
|
|
25
|
+
/** Pre-filter by docType */
|
|
26
|
+
docType?: string;
|
|
27
|
+
}
|
|
28
|
+
/** Simple, dependency-free stemmer optimised for technical documentation. */
|
|
29
|
+
export declare function stem(word: string): string;
|
|
20
30
|
/**
|
|
21
31
|
* GraphQuery provides fast, in-memory traversal of the SF Knowledge Graph.
|
|
22
32
|
* Load once, query many times.
|
|
33
|
+
*
|
|
34
|
+
* Indexing strategy (v2):
|
|
35
|
+
* 1. Stemmed label index — stem(word) → Set<nodeId>
|
|
36
|
+
* 2. Trigram index — 3-char substring → Set<stemmed word> (for fuzzy recall)
|
|
37
|
+
* 3. Keyword index — keyword → Set<nodeId> (from tagged_with edges)
|
|
38
|
+
* 4. IDF table — stem → log(N / df) (for TF-IDF scoring)
|
|
39
|
+
* 5. LRU query cache — query key → results
|
|
23
40
|
*/
|
|
24
41
|
export declare class GraphQuery {
|
|
25
42
|
private graph;
|
|
@@ -27,8 +44,18 @@ export declare class GraphQuery {
|
|
|
27
44
|
private graphPath;
|
|
28
45
|
/** Inverted keyword index: lowercase keyword → Set of doc node IDs */
|
|
29
46
|
private keywordIndex;
|
|
30
|
-
/**
|
|
47
|
+
/** Stemmed label index: stemmed word → Set of node IDs */
|
|
31
48
|
private labelIndex;
|
|
49
|
+
/** Trigram index: trigram → Set of stemmed words (for fuzzy matching) */
|
|
50
|
+
private trigramIndex;
|
|
51
|
+
/** IDF table: stemmed word → inverse-document-frequency weight */
|
|
52
|
+
private idfTable;
|
|
53
|
+
/** Total number of document nodes (for IDF calculation) */
|
|
54
|
+
private totalDocs;
|
|
55
|
+
/** LRU query cache: cache key → { results, timestamp } */
|
|
56
|
+
private queryCache;
|
|
57
|
+
private readonly CACHE_TTL_MS;
|
|
58
|
+
private readonly CACHE_MAX_SIZE;
|
|
32
59
|
constructor(knowledgeDir?: string);
|
|
33
60
|
/**
|
|
34
61
|
* Load the graph into memory and build lookup indices.
|
|
@@ -36,14 +63,15 @@ export declare class GraphQuery {
|
|
|
36
63
|
load(): Promise<void>;
|
|
37
64
|
private ensureLoaded;
|
|
38
65
|
private buildIndices;
|
|
66
|
+
private getCached;
|
|
67
|
+
private setCache;
|
|
39
68
|
/**
|
|
40
69
|
* Search across all node labels for a query string.
|
|
41
|
-
*
|
|
70
|
+
*
|
|
71
|
+
* Uses stemming + trigram fuzzy matching + TF-IDF scoring.
|
|
72
|
+
* Supports optional pre-filters for domain and docType.
|
|
42
73
|
*/
|
|
43
|
-
searchNodes(query: string, options?:
|
|
44
|
-
type?: string;
|
|
45
|
-
limit?: number;
|
|
46
|
-
}): GraphSearchResult[];
|
|
74
|
+
searchNodes(query: string, options?: SearchOptions): GraphSearchResult[];
|
|
47
75
|
/**
|
|
48
76
|
* Find documents tagged with a specific keyword.
|
|
49
77
|
*/
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"graph-query.d.ts","sourceRoot":"","sources":["../../src/utils/graph-query.ts"],"names":[],"mappings":"AAkBA,MAAM,WAAW,iBAAiB;IAChC,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,UAAU;IACzB,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;IAChB,GAAG,EAAE,MAAM,CAAC;IACZ,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,UAAU,EAAE,iBAAiB,EAAE,CAAC;IAChC,YAAY,EAAE,iBAAiB,EAAE,CAAC;CACnC;AAED
|
|
1
|
+
{"version":3,"file":"graph-query.d.ts","sourceRoot":"","sources":["../../src/utils/graph-query.ts"],"names":[],"mappings":"AAkBA,MAAM,WAAW,iBAAiB;IAChC,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,UAAU;IACzB,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;IAChB,GAAG,EAAE,MAAM,CAAC;IACZ,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,UAAU,EAAE,iBAAiB,EAAE,CAAC;IAChC,YAAY,EAAE,iBAAiB,EAAE,CAAC;CACnC;AAED,MAAM,WAAW,aAAa;IAC5B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,0DAA0D;IAC1D,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,4BAA4B;IAC5B,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AA2CD,6EAA6E;AAC7E,wBAAgB,IAAI,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAWzC;AAYD;;;;;;;;;;GAUG;AACH,qBAAa,UAAU;IAErB,OAAO,CAAC,KAAK,CAA4B;IACzC,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,SAAS,CAAS;IAE1B,sEAAsE;IACtE,OAAO,CAAC,YAAY,CAAkC;IACtD,0DAA0D;IAC1D,OAAO,CAAC,UAAU,CAAkC;IACpD,yEAAyE;IACzE,OAAO,CAAC,YAAY,CAAkC;IACtD,kEAAkE;IAClE,OAAO,CAAC,QAAQ,CAA6B;IAC7C,2DAA2D;IAC3D,OAAO,CAAC,SAAS,CAAK;IAEtB,0DAA0D;IAC1D,OAAO,CAAC,UAAU,CAAmE;IACrF,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAiB;IAC9C,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAO;gBAE1B,YAAY,SAAsB;IAI9C;;OAEG;IACG,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;IAkB3B,OAAO,CAAC,YAAY;IAIpB,OAAO,CAAC,YAAY;IA8EpB,OAAO,CAAC,SAAS;IAUjB,OAAO,CAAC,QAAQ;IAWhB;;;;;OAKG;IACH,WAAW,CACT,KAAK,EAAE,MAAM,EACb,OAAO,GAAE,aAAkB,GAC1B,iBAAiB,EAAE;IA0GtB;;OAEG;IACH,iBAAiB,CAAC,OAAO,EAAE,MAAM,EAAE,KAAK,SAAK,GAAG,iBAAiB,EAAE;IAkBnE;;OAEG;IACH,WAAW,CAAC,SAAS,EAAE,MAAM,EAAE,KAAK,SAAI,GAAG,iBAAiB,EAAE;IAkD9D;;OAEG;IACH,eAAe,CAAC,SAAS,EAAE,MAAM,GAAG,iBAAiB,EAAE;IAuBvD;;OAEG;IACH,aAAa,CAAC,OAAO,EAAE,MAAM,GAAG,iBAAiB,EAAE;IAqBnD;;OAEG;IACH,aAAa,CAAC,OAAO,EAAE,MAAM,EAAE,KAAK,SAAK,GAAG,iBAAiB,EAAE;IAuB/D;;OAEG;IACH,aAAa,CAAC,SAAS,EAAE,MAAM,GAAG,UAAU,GAAG,IAAI;IAmEnD;;OAEG;IACH,WAAW,IAAI,iBAAiB,EAAE;IAkBlC;;OAEG;IACH,cAAc,IAAI,KAAK,CAAC;QAAE,SAAS,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAA;KAAE,CAAC;IAmBhE;;OAEG;IACH,YAAY,IAAI,KAAK,CAAC;QAAE,OAAO,EAAE,MAAM,CAAC;QAAC,WAAW,EAAE,MAAM,CAAA;KAAE,CAAC;IAmB/D,6BAA6B;IAC7B,QAAQ,IAAI;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE;CAI7C"}
|
|
@@ -6,9 +6,81 @@ import pkg from "graphology";
|
|
|
6
6
|
const DirectedGraph = pkg.DirectedGraph || pkg;
|
|
7
7
|
import { createChildLogger } from "./logger.js";
|
|
8
8
|
const log = createChildLogger("utils:graph-query");
|
|
9
|
+
// ─── Lightweight suffix-stripping stemmer ─────────────────────────
|
|
10
|
+
const SUFFIX_RULES = [
|
|
11
|
+
[/ational$/, "ate"],
|
|
12
|
+
[/tional$/, "tion"],
|
|
13
|
+
[/ization$/, "ize"],
|
|
14
|
+
[/fulness$/, "ful"],
|
|
15
|
+
[/ousness$/, "ous"],
|
|
16
|
+
[/iveness$/, "ive"],
|
|
17
|
+
[/ibilities$/, "ible"],
|
|
18
|
+
[/ically$/, "ic"],
|
|
19
|
+
[/ating$/, "ate"],
|
|
20
|
+
[/ising$/, "ise"],
|
|
21
|
+
[/izing$/, "ize"],
|
|
22
|
+
[/abling$/, "able"],
|
|
23
|
+
[/ement$/, ""],
|
|
24
|
+
[/ment$/, ""],
|
|
25
|
+
[/tion$/, "t"],
|
|
26
|
+
[/sion$/, "s"],
|
|
27
|
+
[/ness$/, ""],
|
|
28
|
+
[/able$/, ""],
|
|
29
|
+
[/ible$/, ""],
|
|
30
|
+
[/ment$/, ""],
|
|
31
|
+
[/ious$/, ""],
|
|
32
|
+
[/eous$/, ""],
|
|
33
|
+
[/ous$/, ""],
|
|
34
|
+
[/ive$/, ""],
|
|
35
|
+
[/ful$/, ""],
|
|
36
|
+
[/ing$/, ""],
|
|
37
|
+
[/ies$/, "y"],
|
|
38
|
+
[/ied$/, "y"],
|
|
39
|
+
[/ion$/, ""],
|
|
40
|
+
[/ers$/, ""],
|
|
41
|
+
[/est$/, ""],
|
|
42
|
+
[/ely$/, ""],
|
|
43
|
+
[/ed$/, ""],
|
|
44
|
+
[/er$/, ""],
|
|
45
|
+
[/ly$/, ""],
|
|
46
|
+
[/es$/, ""],
|
|
47
|
+
[/s$/, ""],
|
|
48
|
+
];
|
|
49
|
+
/** Simple, dependency-free stemmer optimised for technical documentation. */
|
|
50
|
+
export function stem(word) {
|
|
51
|
+
if (word.length < 4)
|
|
52
|
+
return word;
|
|
53
|
+
const lower = word.toLowerCase();
|
|
54
|
+
for (const [re, replacement] of SUFFIX_RULES) {
|
|
55
|
+
if (re.test(lower)) {
|
|
56
|
+
const stemmed = lower.replace(re, replacement);
|
|
57
|
+
// Only keep stems ≥ 3 chars to avoid over-stripping
|
|
58
|
+
if (stemmed.length >= 3)
|
|
59
|
+
return stemmed;
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
return lower;
|
|
63
|
+
}
|
|
64
|
+
// ─── Trigram helpers ──────────────────────────────────────────────
|
|
65
|
+
function trigrams(word) {
|
|
66
|
+
if (word.length < 3)
|
|
67
|
+
return [word];
|
|
68
|
+
const tris = [];
|
|
69
|
+
for (let i = 0; i <= word.length - 3; i++) {
|
|
70
|
+
tris.push(word.slice(i, i + 3));
|
|
71
|
+
}
|
|
72
|
+
return tris;
|
|
73
|
+
}
|
|
9
74
|
/**
|
|
10
75
|
* GraphQuery provides fast, in-memory traversal of the SF Knowledge Graph.
|
|
11
76
|
* Load once, query many times.
|
|
77
|
+
*
|
|
78
|
+
* Indexing strategy (v2):
|
|
79
|
+
* 1. Stemmed label index — stem(word) → Set<nodeId>
|
|
80
|
+
* 2. Trigram index — 3-char substring → Set<stemmed word> (for fuzzy recall)
|
|
81
|
+
* 3. Keyword index — keyword → Set<nodeId> (from tagged_with edges)
|
|
82
|
+
* 4. IDF table — stem → log(N / df) (for TF-IDF scoring)
|
|
83
|
+
* 5. LRU query cache — query key → results
|
|
12
84
|
*/
|
|
13
85
|
export class GraphQuery {
|
|
14
86
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
@@ -17,8 +89,18 @@ export class GraphQuery {
|
|
|
17
89
|
graphPath;
|
|
18
90
|
/** Inverted keyword index: lowercase keyword → Set of doc node IDs */
|
|
19
91
|
keywordIndex = new Map();
|
|
20
|
-
/**
|
|
92
|
+
/** Stemmed label index: stemmed word → Set of node IDs */
|
|
21
93
|
labelIndex = new Map();
|
|
94
|
+
/** Trigram index: trigram → Set of stemmed words (for fuzzy matching) */
|
|
95
|
+
trigramIndex = new Map();
|
|
96
|
+
/** IDF table: stemmed word → inverse-document-frequency weight */
|
|
97
|
+
idfTable = new Map();
|
|
98
|
+
/** Total number of document nodes (for IDF calculation) */
|
|
99
|
+
totalDocs = 0;
|
|
100
|
+
/** LRU query cache: cache key → { results, timestamp } */
|
|
101
|
+
queryCache = new Map();
|
|
102
|
+
CACHE_TTL_MS = 5 * 60 * 1000; // 5 minutes
|
|
103
|
+
CACHE_MAX_SIZE = 200;
|
|
22
104
|
constructor(knowledgeDir = "knowledge/current") {
|
|
23
105
|
this.graphPath = path.join(knowledgeDir, "graph.json");
|
|
24
106
|
}
|
|
@@ -42,7 +124,8 @@ export class GraphQuery {
|
|
|
42
124
|
throw new Error("Graph not loaded. Call load() first.");
|
|
43
125
|
}
|
|
44
126
|
buildIndices() {
|
|
45
|
-
// Build keyword → docs index
|
|
127
|
+
// Build keyword → docs index from tagged_with edges
|
|
128
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
46
129
|
this.graph.forEachEdge((_, attrs, source, target) => {
|
|
47
130
|
if (attrs.type === "tagged_with") {
|
|
48
131
|
const targetAttrs = this.graph.getNodeAttributes(target);
|
|
@@ -55,62 +138,164 @@ export class GraphQuery {
|
|
|
55
138
|
}
|
|
56
139
|
}
|
|
57
140
|
});
|
|
58
|
-
// Build label
|
|
141
|
+
// Build stemmed label index + trigram index + IDF table
|
|
142
|
+
// Track document frequency (how many DOCUMENT nodes each stem appears in)
|
|
143
|
+
const docFrequency = new Map();
|
|
144
|
+
let docCount = 0;
|
|
145
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
59
146
|
this.graph.forEachNode((nodeId, attrs) => {
|
|
60
147
|
const label = attrs.label?.toLowerCase();
|
|
61
148
|
if (!label)
|
|
62
149
|
return;
|
|
63
|
-
const
|
|
150
|
+
const isDoc = attrs.type === "document";
|
|
151
|
+
if (isDoc)
|
|
152
|
+
docCount++;
|
|
153
|
+
const words = label.split(/[\s_\-.]+/);
|
|
154
|
+
const seenStems = new Set();
|
|
64
155
|
for (const word of words) {
|
|
65
156
|
if (word.length < 3)
|
|
66
157
|
continue;
|
|
67
|
-
|
|
68
|
-
|
|
158
|
+
const stemmed = stem(word);
|
|
159
|
+
seenStems.add(stemmed);
|
|
160
|
+
// Stemmed label index
|
|
161
|
+
if (!this.labelIndex.has(stemmed)) {
|
|
162
|
+
this.labelIndex.set(stemmed, new Set());
|
|
163
|
+
}
|
|
164
|
+
this.labelIndex.get(stemmed).add(nodeId);
|
|
165
|
+
// Trigram index: map each trigram to the stemmed word
|
|
166
|
+
for (const tri of trigrams(stemmed)) {
|
|
167
|
+
if (!this.trigramIndex.has(tri)) {
|
|
168
|
+
this.trigramIndex.set(tri, new Set());
|
|
169
|
+
}
|
|
170
|
+
this.trigramIndex.get(tri).add(stemmed);
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
// Track DF for document nodes only
|
|
174
|
+
if (isDoc) {
|
|
175
|
+
for (const s of seenStems) {
|
|
176
|
+
docFrequency.set(s, (docFrequency.get(s) || 0) + 1);
|
|
69
177
|
}
|
|
70
|
-
this.labelIndex.get(word).add(nodeId);
|
|
71
178
|
}
|
|
72
179
|
});
|
|
180
|
+
// Compute IDF: log(N / df) for each stem
|
|
181
|
+
this.totalDocs = docCount;
|
|
182
|
+
for (const [stemmedWord, df] of docFrequency) {
|
|
183
|
+
this.idfTable.set(stemmedWord, Math.log(docCount / df));
|
|
184
|
+
}
|
|
185
|
+
log.info({
|
|
186
|
+
labelTerms: this.labelIndex.size,
|
|
187
|
+
trigrams: this.trigramIndex.size,
|
|
188
|
+
keywords: this.keywordIndex.size,
|
|
189
|
+
docs: docCount,
|
|
190
|
+
}, "Search indices built (stemmed + trigram + IDF)");
|
|
191
|
+
}
|
|
192
|
+
// ─── Cache helpers ──────────────────────────────────────────────
|
|
193
|
+
getCached(key) {
|
|
194
|
+
const entry = this.queryCache.get(key);
|
|
195
|
+
if (!entry)
|
|
196
|
+
return null;
|
|
197
|
+
if (Date.now() - entry.ts > this.CACHE_TTL_MS) {
|
|
198
|
+
this.queryCache.delete(key);
|
|
199
|
+
return null;
|
|
200
|
+
}
|
|
201
|
+
return entry.results;
|
|
202
|
+
}
|
|
203
|
+
setCache(key, results) {
|
|
204
|
+
// Evict oldest if at capacity
|
|
205
|
+
if (this.queryCache.size >= this.CACHE_MAX_SIZE) {
|
|
206
|
+
const oldest = this.queryCache.keys().next().value;
|
|
207
|
+
if (oldest !== undefined)
|
|
208
|
+
this.queryCache.delete(oldest);
|
|
209
|
+
}
|
|
210
|
+
this.queryCache.set(key, { results, ts: Date.now() });
|
|
73
211
|
}
|
|
74
212
|
// ─── Query Methods ─────────────────────────────────────────────
|
|
75
213
|
/**
|
|
76
214
|
* Search across all node labels for a query string.
|
|
77
|
-
*
|
|
215
|
+
*
|
|
216
|
+
* Uses stemming + trigram fuzzy matching + TF-IDF scoring.
|
|
217
|
+
* Supports optional pre-filters for domain and docType.
|
|
78
218
|
*/
|
|
79
219
|
searchNodes(query, options = {}) {
|
|
80
220
|
this.ensureLoaded();
|
|
81
|
-
const { type, limit = 25 } = options;
|
|
82
|
-
|
|
83
|
-
|
|
221
|
+
const { type, limit = 25, domain, docType } = options;
|
|
222
|
+
// Check cache
|
|
223
|
+
const cacheKey = `search:${query}|${type || ""}|${domain || ""}|${docType || ""}|${limit}`;
|
|
224
|
+
const cached = this.getCached(cacheKey);
|
|
225
|
+
if (cached)
|
|
226
|
+
return cached;
|
|
227
|
+
// Tokenise and stem the query
|
|
228
|
+
const rawTerms = query.toLowerCase()
|
|
229
|
+
.replace(/[._]/g, " ")
|
|
230
|
+
.replace(/([a-z])([A-Z])/g, "$1 $2")
|
|
231
|
+
.split(/\s+/)
|
|
232
|
+
.filter((t) => t.length >= 3);
|
|
233
|
+
if (rawTerms.length === 0)
|
|
84
234
|
return [];
|
|
85
|
-
|
|
235
|
+
const stemmedTerms = rawTerms.map(stem);
|
|
236
|
+
// Collect candidate nodes via stemmed label index
|
|
237
|
+
// For each stemmed term, find exact stem matches + trigram-expanded fuzzy matches
|
|
86
238
|
const candidates = new Map();
|
|
87
|
-
for (const
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
239
|
+
for (const st of stemmedTerms) {
|
|
240
|
+
const matchedStems = new Set();
|
|
241
|
+
// Exact stem match
|
|
242
|
+
if (this.labelIndex.has(st)) {
|
|
243
|
+
matchedStems.add(st);
|
|
244
|
+
}
|
|
245
|
+
// Trigram fuzzy expansion: find other stems that share trigrams
|
|
246
|
+
const queryTrigrams = trigrams(st);
|
|
247
|
+
const trigramHits = new Map();
|
|
248
|
+
for (const tri of queryTrigrams) {
|
|
249
|
+
const stems = this.trigramIndex.get(tri);
|
|
250
|
+
if (stems) {
|
|
251
|
+
for (const s of stems) {
|
|
252
|
+
trigramHits.set(s, (trigramHits.get(s) || 0) + 1);
|
|
97
253
|
}
|
|
98
254
|
}
|
|
99
255
|
}
|
|
256
|
+
// Accept stems with ≥60% trigram overlap
|
|
257
|
+
const threshold = Math.max(1, Math.floor(queryTrigrams.length * 0.6));
|
|
258
|
+
for (const [s, count] of trigramHits) {
|
|
259
|
+
if (count >= threshold)
|
|
260
|
+
matchedStems.add(s);
|
|
261
|
+
}
|
|
262
|
+
// Collect node IDs from matched stems
|
|
263
|
+
for (const ms of matchedStems) {
|
|
264
|
+
const nodeIds = this.labelIndex.get(ms);
|
|
265
|
+
if (!nodeIds)
|
|
266
|
+
continue;
|
|
267
|
+
// Weight by IDF of the matched stem
|
|
268
|
+
const idf = this.idfTable.get(ms) || 1.0;
|
|
269
|
+
for (const nodeId of nodeIds) {
|
|
270
|
+
candidates.set(nodeId, (candidates.get(nodeId) || 0) + idf);
|
|
271
|
+
}
|
|
272
|
+
}
|
|
100
273
|
}
|
|
101
|
-
// Score and
|
|
274
|
+
// Score, filter, and rank
|
|
102
275
|
const results = [];
|
|
103
|
-
|
|
276
|
+
const queryLower = query.toLowerCase();
|
|
277
|
+
for (const [nodeId, tfidfScore] of candidates) {
|
|
104
278
|
const attrs = this.graph.getNodeAttributes(nodeId);
|
|
279
|
+
// Pre-filter by type
|
|
105
280
|
if (type && attrs.type !== type)
|
|
106
281
|
continue;
|
|
282
|
+
// Pre-filter by domain (node IDs are doc:<domain>:<topic>)
|
|
283
|
+
if (domain && !nodeId.startsWith(`doc:${domain}:`))
|
|
284
|
+
continue;
|
|
285
|
+
// Pre-filter by docType
|
|
286
|
+
if (docType && attrs.docType !== docType)
|
|
287
|
+
continue;
|
|
107
288
|
const label = attrs.label?.toLowerCase() || "";
|
|
108
|
-
//
|
|
109
|
-
let score =
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
289
|
+
// Compute final score: TF-IDF base + bonuses
|
|
290
|
+
let score = tfidfScore / stemmedTerms.length; // normalize by query length
|
|
291
|
+
// Exact match bonus
|
|
292
|
+
if (label === queryLower)
|
|
293
|
+
score += 10;
|
|
294
|
+
else if (label.startsWith(queryLower))
|
|
295
|
+
score += 3;
|
|
296
|
+
// Contains full query bonus
|
|
297
|
+
else if (label.includes(queryLower))
|
|
298
|
+
score += 1.5;
|
|
114
299
|
results.push({
|
|
115
300
|
nodeId,
|
|
116
301
|
type: attrs.type,
|
|
@@ -120,10 +305,12 @@ export class GraphQuery {
|
|
|
120
305
|
score,
|
|
121
306
|
});
|
|
122
307
|
}
|
|
123
|
-
|
|
124
|
-
.filter((r) => (r.score || 0) >= 0.3)
|
|
308
|
+
const sorted = results
|
|
309
|
+
.filter((r) => (r.score || 0) >= 0.3)
|
|
125
310
|
.sort((a, b) => (b.score || 0) - (a.score || 0))
|
|
126
311
|
.slice(0, limit);
|
|
312
|
+
this.setCache(cacheKey, sorted);
|
|
313
|
+
return sorted;
|
|
127
314
|
}
|
|
128
315
|
/**
|
|
129
316
|
* Find documents tagged with a specific keyword.
|
|
@@ -159,6 +346,7 @@ export class GraphQuery {
|
|
|
159
346
|
const nextFrontier = [];
|
|
160
347
|
for (const nodeId of frontier) {
|
|
161
348
|
// Outgoing reference edges
|
|
349
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
162
350
|
this.graph.forEachOutEdge(nodeId, (_, attrs, _src, target) => {
|
|
163
351
|
if (attrs.type === "references" && !visited.has(target)) {
|
|
164
352
|
visited.add(target);
|
|
@@ -174,6 +362,7 @@ export class GraphQuery {
|
|
|
174
362
|
}
|
|
175
363
|
});
|
|
176
364
|
// Incoming reference edges
|
|
365
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
177
366
|
this.graph.forEachInEdge(nodeId, (_, attrs, source) => {
|
|
178
367
|
if (attrs.type === "references" && !visited.has(source)) {
|
|
179
368
|
visited.add(source);
|
|
@@ -202,6 +391,7 @@ export class GraphQuery {
|
|
|
202
391
|
if (!this.graph.hasNode(nsNodeId))
|
|
203
392
|
return [];
|
|
204
393
|
const results = [];
|
|
394
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
205
395
|
this.graph.forEachInEdge(nsNodeId, (_, attrs, source) => {
|
|
206
396
|
if (attrs.type === "belongs_to_namespace") {
|
|
207
397
|
const sourceAttrs = this.graph.getNodeAttributes(source);
|
|
@@ -225,6 +415,7 @@ export class GraphQuery {
|
|
|
225
415
|
if (!this.graph.hasNode(serviceNodeId))
|
|
226
416
|
return [];
|
|
227
417
|
const results = [];
|
|
418
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
228
419
|
this.graph.forEachInEdge(serviceNodeId, (_, attrs, source) => {
|
|
229
420
|
if (attrs.type === "belongs_to_service") {
|
|
230
421
|
const sourceAttrs = this.graph.getNodeAttributes(source);
|
|
@@ -246,6 +437,7 @@ export class GraphQuery {
|
|
|
246
437
|
if (!this.graph.hasNode(dtNodeId))
|
|
247
438
|
return [];
|
|
248
439
|
const results = [];
|
|
440
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
249
441
|
this.graph.forEachInEdge(dtNodeId, (_, attrs, source) => {
|
|
250
442
|
if (attrs.type === "is_type" && results.length < limit) {
|
|
251
443
|
const sourceAttrs = this.graph.getNodeAttributes(source);
|
|
@@ -274,6 +466,7 @@ export class GraphQuery {
|
|
|
274
466
|
const references = [];
|
|
275
467
|
const referencedBy = [];
|
|
276
468
|
// Traverse outgoing edges
|
|
469
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
277
470
|
this.graph.forEachOutEdge(docNodeId, (_, edgeAttrs, _src, target) => {
|
|
278
471
|
const targetAttrs = this.graph.getNodeAttributes(target);
|
|
279
472
|
switch (edgeAttrs.type) {
|
|
@@ -295,6 +488,7 @@ export class GraphQuery {
|
|
|
295
488
|
}
|
|
296
489
|
});
|
|
297
490
|
// Traverse incoming edges
|
|
491
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
298
492
|
this.graph.forEachInEdge(docNodeId, (_, edgeAttrs, source) => {
|
|
299
493
|
const sourceAttrs = this.graph.getNodeAttributes(source);
|
|
300
494
|
switch (edgeAttrs.type) {
|
|
@@ -330,6 +524,7 @@ export class GraphQuery {
|
|
|
330
524
|
listDomains() {
|
|
331
525
|
this.ensureLoaded();
|
|
332
526
|
const results = [];
|
|
527
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
333
528
|
this.graph.forEachNode((nodeId, attrs) => {
|
|
334
529
|
if (attrs.type === "domain") {
|
|
335
530
|
results.push({
|
|
@@ -347,9 +542,11 @@ export class GraphQuery {
|
|
|
347
542
|
listNamespaces() {
|
|
348
543
|
this.ensureLoaded();
|
|
349
544
|
const results = [];
|
|
545
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
350
546
|
this.graph.forEachNode((nodeId, attrs) => {
|
|
351
547
|
if (attrs.type === "namespace") {
|
|
352
548
|
let docCount = 0;
|
|
549
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
353
550
|
this.graph.forEachInEdge(nodeId, (_, edgeAttrs) => {
|
|
354
551
|
if (edgeAttrs.type === "belongs_to_namespace")
|
|
355
552
|
docCount++;
|
|
@@ -365,9 +562,11 @@ export class GraphQuery {
|
|
|
365
562
|
listServices() {
|
|
366
563
|
this.ensureLoaded();
|
|
367
564
|
const results = [];
|
|
565
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
368
566
|
this.graph.forEachNode((nodeId, attrs) => {
|
|
369
567
|
if (attrs.type === "service") {
|
|
370
568
|
let domainCount = 0;
|
|
569
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
371
570
|
this.graph.forEachInEdge(nodeId, (_, edgeAttrs) => {
|
|
372
571
|
if (edgeAttrs.type === "belongs_to_service")
|
|
373
572
|
domainCount++;
|