@rarusoft/dendrite-wiki 0.1.0-alpha.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +79 -0
- package/dist/api-extractor/extract.js +269 -0
- package/dist/api-extractor/language-extractor.js +15 -0
- package/dist/api-extractor/python-extractor.js +358 -0
- package/dist/api-extractor/render.js +195 -0
- package/dist/api-extractor/tree-sitter-extractor.js +1079 -0
- package/dist/api-extractor/types.js +11 -0
- package/dist/api-extractor/typescript-extractor.js +50 -0
- package/dist/api-extractor/walk.js +178 -0
- package/dist/api-reference.js +438 -0
- package/dist/benchmark-events.js +129 -0
- package/dist/benchmark.js +270 -0
- package/dist/binder-export.js +381 -0
- package/dist/canonical-target.js +168 -0
- package/dist/chart-insert.js +377 -0
- package/dist/chart-prompts.js +414 -0
- package/dist/context-cache.js +98 -0
- package/dist/contradicts-shipped-memory.js +232 -0
- package/dist/diff-context.js +142 -0
- package/dist/doctor.js +220 -0
- package/dist/generated-docs.js +219 -0
- package/dist/i18n.js +71 -0
- package/dist/index.js +49 -0
- package/dist/librarian.js +255 -0
- package/dist/maintenance-actions.js +244 -0
- package/dist/maintenance-inbox.js +842 -0
- package/dist/maintenance-runner.js +62 -0
- package/dist/page-drift.js +225 -0
- package/dist/page-inbox.js +168 -0
- package/dist/report-export.js +339 -0
- package/dist/review-bridge.js +1386 -0
- package/dist/search-index.js +199 -0
- package/dist/store.js +1617 -0
- package/dist/telemetry-defaults.js +44 -0
- package/dist/telemetry-report.js +263 -0
- package/dist/telemetry.js +544 -0
- package/dist/wiki-synthesis.js +901 -0
- package/package.json +35 -0
- package/src/api-extractor/extract.ts +333 -0
- package/src/api-extractor/language-extractor.ts +37 -0
- package/src/api-extractor/python-extractor.ts +380 -0
- package/src/api-extractor/render.ts +267 -0
- package/src/api-extractor/tree-sitter-extractor.ts +1210 -0
- package/src/api-extractor/types.ts +41 -0
- package/src/api-extractor/typescript-extractor.ts +56 -0
- package/src/api-extractor/walk.ts +209 -0
- package/src/api-reference.ts +552 -0
- package/src/benchmark-events.ts +216 -0
- package/src/benchmark.ts +376 -0
- package/src/binder-export.ts +437 -0
- package/src/canonical-target.ts +192 -0
- package/src/chart-insert.ts +478 -0
- package/src/chart-prompts.ts +417 -0
- package/src/context-cache.ts +129 -0
- package/src/contradicts-shipped-memory.ts +311 -0
- package/src/diff-context.ts +187 -0
- package/src/doctor.ts +260 -0
- package/src/generated-docs.ts +316 -0
- package/src/i18n.ts +106 -0
- package/src/index.ts +59 -0
- package/src/librarian.ts +331 -0
- package/src/maintenance-actions.ts +314 -0
- package/src/maintenance-inbox.ts +1132 -0
- package/src/maintenance-runner.ts +85 -0
- package/src/page-drift.ts +292 -0
- package/src/page-inbox.ts +254 -0
- package/src/report-export.ts +392 -0
- package/src/review-bridge.ts +1729 -0
- package/src/search-index.ts +266 -0
- package/src/store.ts +2171 -0
- package/src/telemetry-defaults.ts +50 -0
- package/src/telemetry-report.ts +365 -0
- package/src/telemetry.ts +757 -0
- package/src/wiki-synthesis.ts +1307 -0
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Wiki search index — keyword + graph ranking with explainable reasons.
|
|
3
|
+
*
|
|
4
|
+
* Builds a token index over page titles, slugs, content, and source-backed claims, plus a
|
|
5
|
+
* companion graph layer (inbound link counts, outbound links, related pages) so search
|
|
6
|
+
* results carry both lexical and structural signal. Query results include a `reasons` array
|
|
7
|
+
* explaining why each page surfaced ("title matches X", "content mentions Y", "linked from
|
|
8
|
+
* the wiki graph", "N inbound links"), which is what makes the recall surface auditable
|
|
9
|
+
* — operators can read why a page ranked where it did instead of trusting an opaque
|
|
10
|
+
* vector score.
|
|
11
|
+
*
|
|
12
|
+
* Used by `wiki_search`, by `wiki_context` for assembling the briefing's "ranked pages"
|
|
13
|
+
* section, and by the `Memory Trails` recall path in `memory-edges.ts` for query-edge
|
|
14
|
+
* reinforcement. The Jaccard tokenizer here is the same one used to compute Memory Trails
|
|
15
|
+
* query-fingerprint similarity, so search ranking and trail bonuses agree on what counts
|
|
16
|
+
* as a "similar" query.
|
|
17
|
+
*/
|
|
18
|
+
import path from 'node:path';
|
|
19
|
+
import { tokenizeSearchQuery } from '@rarusoft/dendrite-memory';
|
|
20
|
+
export function buildWikiSearchIndex(input) {
|
|
21
|
+
const pageByPath = new Map(input.pages.map(({ page }) => [page.path, page.slug]));
|
|
22
|
+
const outgoing = new Map(input.pages.map(({ page }) => [page.slug, new Set()]));
|
|
23
|
+
const inbound = new Map(input.pages.map(({ page }) => [page.slug, 0]));
|
|
24
|
+
collectGraphEdges('docs/index.md', input.indexContent, pageByPath, outgoing, inbound);
|
|
25
|
+
for (const document of input.pages) {
|
|
26
|
+
collectGraphEdges(document.page.path, document.content, pageByPath, outgoing, inbound);
|
|
27
|
+
for (const claim of document.claims) {
|
|
28
|
+
for (const source of claim.sources) {
|
|
29
|
+
if (outgoing.has(source.slug)) {
|
|
30
|
+
outgoing.get(document.page.slug)?.add(source.slug);
|
|
31
|
+
inbound.set(source.slug, (inbound.get(source.slug) ?? 0) + 1);
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
const graph = new Map();
|
|
37
|
+
for (const { page } of input.pages) {
|
|
38
|
+
const outgoingLinks = Array.from(outgoing.get(page.slug) ?? []).sort((left, right) => left.localeCompare(right));
|
|
39
|
+
const relatedPages = outgoingLinks.slice(0, 5);
|
|
40
|
+
graph.set(page.slug, {
|
|
41
|
+
slug: page.slug,
|
|
42
|
+
inboundLinks: inbound.get(page.slug) ?? 0,
|
|
43
|
+
outgoingLinks,
|
|
44
|
+
relatedPages
|
|
45
|
+
});
|
|
46
|
+
}
|
|
47
|
+
return { pages: input.pages, graph };
|
|
48
|
+
}
|
|
49
|
+
export function searchWikiIndex(index, query) {
|
|
50
|
+
const terms = tokenizeSearchQuery(query);
|
|
51
|
+
return index.pages
|
|
52
|
+
.map((document) => scoreSearchDocument(document, terms, index.graph))
|
|
53
|
+
.filter((result) => result.score > 0)
|
|
54
|
+
.sort((left, right) => right.score - left.score || left.slug.localeCompare(right.slug));
|
|
55
|
+
}
|
|
56
|
+
export function fallbackSearchResults(index) {
|
|
57
|
+
return index.pages
|
|
58
|
+
.map((document) => {
|
|
59
|
+
const graph = index.graph.get(document.page.slug) ?? emptyGraphNode(document.page.slug);
|
|
60
|
+
const score = Math.min(graph.inboundLinks, 3) + (document.page.slug === 'architecture' ? 4 : 0) + (document.page.slug === 'project-log' ? 3 : 0);
|
|
61
|
+
return {
|
|
62
|
+
...document.page,
|
|
63
|
+
score,
|
|
64
|
+
summary: extractSummaryParagraph(document.content) || document.page.title,
|
|
65
|
+
reasons: [score > 0 ? 'fallback ranking from graph and default briefing pages' : 'fallback page for broad project briefing'],
|
|
66
|
+
matchedTerms: [],
|
|
67
|
+
claimMatches: [],
|
|
68
|
+
graph
|
|
69
|
+
};
|
|
70
|
+
})
|
|
71
|
+
.sort((left, right) => right.score - left.score || left.slug.localeCompare(right.slug));
|
|
72
|
+
}
|
|
73
|
+
export function searchResultToContextPage(result) {
|
|
74
|
+
return {
|
|
75
|
+
slug: result.slug,
|
|
76
|
+
title: result.title,
|
|
77
|
+
path: result.path,
|
|
78
|
+
score: result.score,
|
|
79
|
+
summary: result.summary,
|
|
80
|
+
reason: result.reasons.slice(0, 3).join('; ') || 'selected by deterministic search index',
|
|
81
|
+
evidence: {
|
|
82
|
+
matchedTerms: result.matchedTerms,
|
|
83
|
+
inboundLinks: result.graph.inboundLinks,
|
|
84
|
+
relatedPages: result.graph.relatedPages.slice(0, 3)
|
|
85
|
+
}
|
|
86
|
+
};
|
|
87
|
+
}
|
|
88
|
+
// Re-exported from @rarusoft/dendrite-memory so the brain owns the canonical tokenizer and
|
|
89
|
+
// the wiki indexer shares the same tokenization rules. Phase 4 slice B wave 2 of
|
|
90
|
+
// the Library Extraction Roadmap inverted this dependency (was: memory-store /
|
|
91
|
+
// memory-edges importing from search-index just to get the tokenizer).
|
|
92
|
+
export { tokenizeSearchQuery };
|
|
93
|
+
function scoreSearchDocument(document, terms, graph) {
|
|
94
|
+
const title = document.page.title.toLowerCase();
|
|
95
|
+
const slug = document.page.slug.toLowerCase();
|
|
96
|
+
const content = document.content.toLowerCase();
|
|
97
|
+
const reasons = new Set();
|
|
98
|
+
const matchedTerms = new Set();
|
|
99
|
+
const claimMatches = [];
|
|
100
|
+
let score = 0;
|
|
101
|
+
for (const term of terms) {
|
|
102
|
+
if (title.includes(term)) {
|
|
103
|
+
score += 8;
|
|
104
|
+
reasons.add(`title matches "${term}"`);
|
|
105
|
+
matchedTerms.add(term);
|
|
106
|
+
}
|
|
107
|
+
if (slug.includes(term)) {
|
|
108
|
+
score += 6;
|
|
109
|
+
reasons.add(`slug matches "${term}"`);
|
|
110
|
+
matchedTerms.add(term);
|
|
111
|
+
}
|
|
112
|
+
const contentHits = countOccurrences(content, term);
|
|
113
|
+
if (contentHits > 0) {
|
|
114
|
+
score += Math.min(contentHits, 4) * 2;
|
|
115
|
+
reasons.add(`content mentions "${term}"`);
|
|
116
|
+
matchedTerms.add(term);
|
|
117
|
+
}
|
|
118
|
+
const matchingClaims = document.claims.filter((claim) => claim.text.toLowerCase().includes(term));
|
|
119
|
+
if (matchingClaims.length > 0) {
|
|
120
|
+
score += matchingClaims.length * 4;
|
|
121
|
+
reasons.add(`claim text matches "${term}"`);
|
|
122
|
+
matchedTerms.add(term);
|
|
123
|
+
for (const claim of matchingClaims) {
|
|
124
|
+
claimMatches.push({ text: claim.text, status: claim.status, sources: claim.sources });
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
const graphNode = graph.get(document.page.slug) ?? emptyGraphNode(document.page.slug);
|
|
129
|
+
if (score > 0 && graphNode.inboundLinks > 0) {
|
|
130
|
+
score += Math.min(graphNode.inboundLinks, 3);
|
|
131
|
+
reasons.add(graphNode.inboundLinks > 1 ? `${graphNode.inboundLinks} inbound links` : 'linked from the wiki graph');
|
|
132
|
+
}
|
|
133
|
+
return {
|
|
134
|
+
...document.page,
|
|
135
|
+
score,
|
|
136
|
+
summary: extractSummaryParagraph(document.content) || document.page.title,
|
|
137
|
+
reasons: Array.from(reasons),
|
|
138
|
+
matchedTerms: Array.from(matchedTerms),
|
|
139
|
+
claimMatches: dedupeClaimMatches(claimMatches),
|
|
140
|
+
graph: graphNode
|
|
141
|
+
};
|
|
142
|
+
}
|
|
143
|
+
function collectGraphEdges(sourcePath, content, pageByPath, outgoing, inbound) {
|
|
144
|
+
const sourceSlug = pageByPath.get(sourcePath);
|
|
145
|
+
const sourceDir = path.posix.dirname(sourcePath);
|
|
146
|
+
for (const link of extractMarkdownLinks(content)) {
|
|
147
|
+
const normalizedPath = resolveMarkdownLinkPath(link, sourceDir);
|
|
148
|
+
if (!normalizedPath) {
|
|
149
|
+
continue;
|
|
150
|
+
}
|
|
151
|
+
const targetSlug = pageByPath.get(normalizedPath);
|
|
152
|
+
if (!targetSlug) {
|
|
153
|
+
continue;
|
|
154
|
+
}
|
|
155
|
+
inbound.set(targetSlug, (inbound.get(targetSlug) ?? 0) + 1);
|
|
156
|
+
if (sourceSlug && sourceSlug !== targetSlug) {
|
|
157
|
+
outgoing.get(sourceSlug)?.add(targetSlug);
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
function extractMarkdownLinks(content) {
|
|
162
|
+
return Array.from(content.matchAll(/\[[^\]]+\]\(([^)]+)\)/g)).map((match) => match[1].split('#')[0].trim()).filter(Boolean);
|
|
163
|
+
}
|
|
164
|
+
function resolveMarkdownLinkPath(link, sourceDir) {
|
|
165
|
+
if (/^[a-z]+:/i.test(link) || path.isAbsolute(link)) {
|
|
166
|
+
return undefined;
|
|
167
|
+
}
|
|
168
|
+
return path.posix.normalize(path.posix.join(sourceDir, link.replace(/\\/g, '/')));
|
|
169
|
+
}
|
|
170
|
+
function extractSummaryParagraph(content) {
|
|
171
|
+
const lines = content.split(/\r?\n/);
|
|
172
|
+
const h1Index = lines.findIndex((line) => /^#\s+\S+/.test(line));
|
|
173
|
+
const bodyLines = lines.slice(h1Index === -1 ? 0 : h1Index + 1);
|
|
174
|
+
for (const line of bodyLines) {
|
|
175
|
+
const trimmed = line.trim();
|
|
176
|
+
if (!trimmed || trimmed.startsWith('#') || trimmed.startsWith('|') || trimmed.startsWith('- ') || /^\d+\.\s/.test(trimmed)) {
|
|
177
|
+
continue;
|
|
178
|
+
}
|
|
179
|
+
return trimmed;
|
|
180
|
+
}
|
|
181
|
+
return '';
|
|
182
|
+
}
|
|
183
|
+
function countOccurrences(value, term) {
|
|
184
|
+
if (!term) {
|
|
185
|
+
return 0;
|
|
186
|
+
}
|
|
187
|
+
return value.split(term).length - 1;
|
|
188
|
+
}
|
|
189
|
+
function dedupeClaimMatches(claims) {
|
|
190
|
+
return Array.from(new Map(claims.map((claim) => [`${claim.status}:${claim.text}`, claim])).values());
|
|
191
|
+
}
|
|
192
|
+
function emptyGraphNode(slug) {
|
|
193
|
+
return {
|
|
194
|
+
slug,
|
|
195
|
+
inboundLinks: 0,
|
|
196
|
+
outgoingLinks: [],
|
|
197
|
+
relatedPages: []
|
|
198
|
+
};
|
|
199
|
+
}
|