@firstpick/pi-utils 0.1.6 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/local-wiki.ts +78 -24
package/package.json
CHANGED
package/src/local-wiki.ts
CHANGED
|
@@ -59,6 +59,8 @@ export interface LocalWikiEngineConfig {
|
|
|
59
59
|
fileExtensions: RegExp;
|
|
60
60
|
format: LocalWikiFormat;
|
|
61
61
|
queryExpansions?: Record<string, string[]>;
|
|
62
|
+
searchStopwords?: Iterable<string>;
|
|
63
|
+
termWeights?: Record<string, number>;
|
|
62
64
|
missingDocsMessage?: string;
|
|
63
65
|
ignoredDirs?: string[];
|
|
64
66
|
sourceName?: (filePath: string, docsPath: string) => string | undefined;
|
|
@@ -74,6 +76,7 @@ export function createLocalWikiEngine(config: LocalWikiEngineConfig) {
|
|
|
74
76
|
const metadataCache = path.join(config.cacheDir, "metadata.json");
|
|
75
77
|
const ignoredDirs = new Set([".git", "node_modules", "result", ...(config.ignoredDirs ?? [])]);
|
|
76
78
|
const missingDocsMessage = config.missingDocsMessage ?? `Local ${config.displayName} docs are not available at ${config.docsPath}.`;
|
|
79
|
+
const searchStopwords = new Set([...(config.searchStopwords ?? [])].map((word) => normalizeQuery(word)).filter(Boolean));
|
|
77
80
|
|
|
78
81
|
async function localExists(filePath: string): Promise<boolean> {
|
|
79
82
|
try { await fsp.access(filePath); return true; } catch { return false; }
|
|
@@ -119,7 +122,36 @@ export function createLocalWikiEngine(config: LocalWikiEngineConfig) {
|
|
|
119
122
|
}
|
|
120
123
|
|
|
121
124
|
function stripMarkdownDecorators(input: string): string {
|
|
122
|
-
return input
|
|
125
|
+
return input
|
|
126
|
+
.replace(/^#+\s*/, "")
|
|
127
|
+
.replace(/\s*\{#[^}]+\}\s*$/g, "")
|
|
128
|
+
.replace(/[*_`~]/g, "")
|
|
129
|
+
.replace(/\[([^\]]+)\]\([^\)]+\)/g, "$1")
|
|
130
|
+
.trim();
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
function stripYamlFrontmatter(markdown: string): string {
|
|
134
|
+
return markdown.replace(/^---\s*\n[\s\S]*?\n---\s*\n?/, "");
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
function yamlFrontmatterTitle(markdown: string): string | undefined {
|
|
138
|
+
const frontmatter = markdown.match(/^---\s*\n([\s\S]*?)\n---\s*\n?/);
|
|
139
|
+
const raw = frontmatter?.[1]?.match(/^title:\s*["']?(.+?)["']?\s*$/m)?.[1];
|
|
140
|
+
return raw ? stripMarkdownDecorators(raw) : undefined;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
function firstMarkdownHeading(markdown: string): string | undefined {
|
|
144
|
+
let inFence = false;
|
|
145
|
+
for (const line of stripYamlFrontmatter(markdown).split(/\n/)) {
|
|
146
|
+
if (/^\s*(```|~~~)/.test(line)) {
|
|
147
|
+
inFence = !inFence;
|
|
148
|
+
continue;
|
|
149
|
+
}
|
|
150
|
+
if (inFence) continue;
|
|
151
|
+
const match = line.match(/^#\s+(.+)$/);
|
|
152
|
+
if (match) return match[1].trim();
|
|
153
|
+
}
|
|
154
|
+
return undefined;
|
|
123
155
|
}
|
|
124
156
|
|
|
125
157
|
function decodeEntities(input: string): string {
|
|
@@ -137,10 +169,17 @@ export function createLocalWikiEngine(config: LocalWikiEngineConfig) {
|
|
|
137
169
|
}
|
|
138
170
|
|
|
139
171
|
function markdownSections(markdown: string, fallbackTitle: string): LocalWikiSection[] {
|
|
172
|
+
const body = stripYamlFrontmatter(markdown);
|
|
140
173
|
const sections: LocalWikiSection[] = [];
|
|
141
174
|
let current: LocalWikiSection | undefined;
|
|
142
|
-
|
|
143
|
-
|
|
175
|
+
let inFence = false;
|
|
176
|
+
for (const line of body.split(/\n/)) {
|
|
177
|
+
if (/^\s*(```|~~~)/.test(line)) {
|
|
178
|
+
inFence = !inFence;
|
|
179
|
+
if (current) current.text += `${line}\n`;
|
|
180
|
+
continue;
|
|
181
|
+
}
|
|
182
|
+
const match = !inFence ? line.match(/^(#{1,6})\s+(.+)$/) : undefined;
|
|
144
183
|
if (match) {
|
|
145
184
|
const title = stripMarkdownDecorators(match[2]);
|
|
146
185
|
if (title.toLowerCase() === "contents") continue;
|
|
@@ -151,7 +190,7 @@ export function createLocalWikiEngine(config: LocalWikiEngineConfig) {
|
|
|
151
190
|
}
|
|
152
191
|
if (current) current.text += `${line}\n`;
|
|
153
192
|
}
|
|
154
|
-
if (!current) sections.push({ title: fallbackTitle, level: 1, anchor: anchorFromHeading(fallbackTitle), text:
|
|
193
|
+
if (!current) sections.push({ title: fallbackTitle, level: 1, anchor: anchorFromHeading(fallbackTitle), text: body.trim() });
|
|
155
194
|
else current.text = current.text.trim();
|
|
156
195
|
return sections;
|
|
157
196
|
}
|
|
@@ -168,7 +207,7 @@ export function createLocalWikiEngine(config: LocalWikiEngineConfig) {
|
|
|
168
207
|
}
|
|
169
208
|
|
|
170
209
|
function markdownTitle(markdown: string, filePath: string): string {
|
|
171
|
-
return stripMarkdownDecorators(markdown
|
|
210
|
+
return stripMarkdownDecorators(yamlFrontmatterTitle(markdown) || firstMarkdownHeading(markdown) || titleFromPath(filePath));
|
|
172
211
|
}
|
|
173
212
|
|
|
174
213
|
function htmlTitle(html: string, filePath: string): string {
|
|
@@ -211,10 +250,11 @@ export function createLocalWikiEngine(config: LocalWikiEngineConfig) {
|
|
|
211
250
|
|
|
212
251
|
function parsePage(raw: string, filePath: string, mtimeMs: number): LocalWikiPage {
|
|
213
252
|
const title = config.format === "html" ? htmlTitle(raw, filePath) : markdownTitle(raw, filePath);
|
|
214
|
-
const
|
|
253
|
+
const markdownBody = config.format === "html" ? raw : stripYamlFrontmatter(raw);
|
|
254
|
+
const baseText = config.format === "html" ? htmlToText(raw) : normalizeWhitespace(markdownBody);
|
|
215
255
|
const text = config.transformText?.(baseText, title, filePath) ?? baseText;
|
|
216
256
|
const sections = markdownSections(text, title);
|
|
217
|
-
return { title, slug: path.relative(config.docsPath, filePath).replace(config.fileExtensions, ""), path: filePath, source: config.sourceName?.(filePath, config.docsPath), headings: sections.map((s) => s.title), sections, links: config.format === "html" ? htmlLinks(raw, filePath) : markdownLinks(
|
|
257
|
+
return { title, slug: path.relative(config.docsPath, filePath).replace(config.fileExtensions, ""), path: filePath, source: config.sourceName?.(filePath, config.docsPath), headings: sections.map((s) => s.title), sections, links: config.format === "html" ? htmlLinks(raw, filePath) : markdownLinks(markdownBody, filePath), text, mtimeMs };
|
|
218
258
|
}
|
|
219
259
|
|
|
220
260
|
function limitText(text: string, maxChars = 12000): { text: string; truncated: boolean } {
|
|
@@ -257,12 +297,21 @@ export function createLocalWikiEngine(config: LocalWikiEngineConfig) {
|
|
|
257
297
|
}
|
|
258
298
|
|
|
259
299
|
function expandQuery(query: string): string[] {
|
|
260
|
-
const tokens = normalizeQuery(query).split(/\s+/).filter(
|
|
300
|
+
const tokens = normalizeQuery(query).split(/\s+/).filter((token) => token && !searchStopwords.has(token));
|
|
261
301
|
const expanded = new Set(tokens);
|
|
262
|
-
for (const token of tokens)
|
|
302
|
+
for (const token of tokens) {
|
|
303
|
+
for (const extra of config.queryExpansions?.[token] ?? []) {
|
|
304
|
+
const normalized = normalizeQuery(extra);
|
|
305
|
+
if (normalized && !searchStopwords.has(normalized)) expanded.add(normalized);
|
|
306
|
+
}
|
|
307
|
+
}
|
|
263
308
|
return [...expanded].filter(Boolean);
|
|
264
309
|
}
|
|
265
310
|
|
|
311
|
+
function tokenWeight(token: string): number {
|
|
312
|
+
return config.termWeights?.[token] ?? 1;
|
|
313
|
+
}
|
|
314
|
+
|
|
266
315
|
function makeSnippet(text: string, tokens: string[], max = 280): string | undefined {
|
|
267
316
|
const lower = text.toLowerCase();
|
|
268
317
|
const index = tokens.map((t) => lower.indexOf(t.toLowerCase())).filter((i) => i >= 0).sort((a, b) => a - b)[0];
|
|
@@ -282,14 +331,15 @@ export function createLocalWikiEngine(config: LocalWikiEngineConfig) {
|
|
|
282
331
|
const matchedFields = new Set<string>();
|
|
283
332
|
const scoreExplanation: string[] = [];
|
|
284
333
|
for (const token of tokens) {
|
|
285
|
-
|
|
286
|
-
if (
|
|
287
|
-
if (
|
|
288
|
-
if (
|
|
334
|
+
const weight = tokenWeight(token);
|
|
335
|
+
if (title.includes(token)) { score += 25 * weight; matchedFields.add("title"); scoreExplanation.push(`title matched '${token}'`); }
|
|
336
|
+
if (slug.includes(token)) { score += 12 * weight; matchedFields.add("slug"); }
|
|
337
|
+
if (source.includes(token)) { score += 8 * weight; matchedFields.add("source"); }
|
|
338
|
+
if (headings.includes(token)) { score += 10 * weight; matchedFields.add("headings"); }
|
|
289
339
|
const textMatches = text.split(token).length - 1;
|
|
290
|
-
if (textMatches > 0) { score += Math.min(15, textMatches); matchedFields.add("text"); }
|
|
340
|
+
if (textMatches > 0) { score += Math.min(15, textMatches) * weight; matchedFields.add("text"); }
|
|
291
341
|
}
|
|
292
|
-
return score > 0 ? { title: page.title, path: page.path, source: page.source, score, matchedFields: [...matchedFields], scoreExplanation, snippet: makeSnippet(page.text, tokens) } : undefined;
|
|
342
|
+
return score > 0 ? { title: page.title, path: page.path, source: page.source, score: Number(score.toFixed(2)), matchedFields: [...matchedFields], scoreExplanation, snippet: makeSnippet(page.text, tokens) } : undefined;
|
|
293
343
|
}
|
|
294
344
|
|
|
295
345
|
function findPage(pages: LocalWikiPage[], pageRef: string): LocalWikiPage | undefined {
|
|
@@ -308,9 +358,9 @@ export function createLocalWikiEngine(config: LocalWikiEngineConfig) {
|
|
|
308
358
|
async function search(params: { query: string; limit?: number; includeSnippets?: boolean }) {
|
|
309
359
|
const { pages } = await loadCache();
|
|
310
360
|
const tokens = expandQuery(params.query);
|
|
311
|
-
const limit = Math.max(1, Math.min(params.limit ??
|
|
361
|
+
const limit = Math.max(1, Math.min(params.limit ?? 8, 50));
|
|
312
362
|
const results = pages.map((p) => scorePage(p, tokens)).filter((x): x is LocalWikiSearchResult => Boolean(x)).sort((a, b) => b.score - a.score).slice(0, limit);
|
|
313
|
-
return { query: params.query, expandedTokens: tokens, results: params.includeSnippets ===
|
|
363
|
+
return { query: params.query, expandedTokens: tokens, results: params.includeSnippets === true ? results : results.map(({ snippet, ...rest }) => rest) };
|
|
314
364
|
}
|
|
315
365
|
|
|
316
366
|
async function loadPage(pageRef: string): Promise<LocalWikiPage> {
|
|
@@ -326,23 +376,27 @@ export function createLocalWikiEngine(config: LocalWikiEngineConfig) {
|
|
|
326
376
|
return { title: page.title, source: page.source, path: page.path, citation: `${page.path} — ${page.title}`, truncated: limited.truncated, text: limited.text };
|
|
327
377
|
}
|
|
328
378
|
|
|
329
|
-
async function sections(params: { page: string }) {
|
|
379
|
+
async function sections(params: { page: string; maxSections?: number }) {
|
|
330
380
|
const page = await loadPage(params.page);
|
|
331
|
-
|
|
381
|
+
const maxSections = Math.max(1, Math.min(params.maxSections ?? 80, 300));
|
|
382
|
+
const selected = page.sections.slice(0, maxSections);
|
|
383
|
+
return { title: page.title, source: page.source, path: page.path, sectionCount: page.sections.length, omittedSectionCount: Math.max(0, page.sections.length - selected.length), sections: selected.map((s) => ({ title: s.title, level: s.level, anchor: s.anchor })) };
|
|
332
384
|
}
|
|
333
385
|
|
|
334
|
-
async function extract(params: { page: string; section?: string; query?: string; maxChars?: number }) {
|
|
386
|
+
async function extract(params: { page: string; section?: string; query?: string; maxChars?: number; maxSections?: number }) {
|
|
335
387
|
const page = await loadPage(params.page);
|
|
336
388
|
let matchedSections = page.sections;
|
|
337
389
|
if (params.section) { const needle = normalizeQuery(params.section); matchedSections = matchedSections.filter((s) => normalizeQuery(s.title).includes(needle)); }
|
|
338
390
|
if (params.query) {
|
|
339
391
|
const tokens = expandQuery(params.query);
|
|
340
|
-
matchedSections = matchedSections.map((section) => ({ section, score: tokens.reduce((sum, token) => sum + (normalizeQuery(`${section.title} ${section.text}`).includes(token) ?
|
|
392
|
+
matchedSections = matchedSections.map((section) => ({ section, score: tokens.reduce((sum, token) => sum + (normalizeQuery(`${section.title} ${section.text}`).includes(token) ? tokenWeight(token) : 0), 0) })).filter((i) => i.score > 0).sort((a, b) => b.score - a.score).map((i) => i.section);
|
|
341
393
|
}
|
|
342
|
-
|
|
394
|
+
const maxSections = Math.max(1, Math.min(params.maxSections ?? (params.section || params.query ? 6 : 5), 50));
|
|
395
|
+
const totalMatchedSections = matchedSections.length;
|
|
396
|
+
matchedSections = matchedSections.slice(0, maxSections);
|
|
343
397
|
const joined = matchedSections.map((s) => `${"#".repeat(Math.min(s.level, 6))} ${s.title}\n\n${s.text}`).join("\n\n");
|
|
344
|
-
const limited = limitText(joined || page.text, params.maxChars ??
|
|
345
|
-
return { title: page.title, source: page.source, path: page.path, citation: `${page.path} — ${matchedSections.map((s) => s.title).join(", ") || page.title}`, matchedSections: matchedSections.map((s) => ({ title: s.title, level: s.level, anchor: s.anchor })), truncated: limited.truncated, text: limited.text };
|
|
398
|
+
const limited = limitText(joined || page.text, params.maxChars ?? 10000);
|
|
399
|
+
return { title: page.title, source: page.source, path: page.path, citation: `${page.path} — ${matchedSections.map((s) => s.title).join(", ") || page.title}`, matchedSections: matchedSections.map((s) => ({ title: s.title, level: s.level, anchor: s.anchor })), totalMatchedSections, omittedSectionCount: Math.max(0, totalMatchedSections - matchedSections.length), truncated: limited.truncated, text: limited.text };
|
|
346
400
|
}
|
|
347
401
|
|
|
348
402
|
async function related(params: { page: string; limit?: number }) {
|