@firstpick/pi-utils 0.1.2 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -0
- package/index.ts +470 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -5,9 +5,26 @@ Shared helper utilities used by `@firstpick/pi-extension-*` packages.
|
|
|
5
5
|
## Exports
|
|
6
6
|
|
|
7
7
|
- `getAgentDir()`
|
|
8
|
+
- `getPiDir()`
|
|
9
|
+
- `getAgentEnvPath()`
|
|
10
|
+
- `getAgentSettingsPath()`
|
|
11
|
+
- `getWorkspaceEnvPath(cwd?)`
|
|
8
12
|
- `envFlag(name, fallback?)`
|
|
9
13
|
- `resolvePathFromAgentDir(configuredPath)`
|
|
14
|
+
- `parseEnvFile(filePath)`
|
|
15
|
+
- `readEnvValue(filePath, key)`
|
|
16
|
+
- `resolveEnvValue(key, options?)`
|
|
17
|
+
- `quoteEnvValue(value)`
|
|
18
|
+
- `upsertEnvValue(filePath, key, value)`
|
|
19
|
+
- `slugify(input, options?)`
|
|
20
|
+
- `formatTokens(count)`
|
|
21
|
+
- `estimateTokensFromCharCount(charCount)`
|
|
22
|
+
- `estimatePromptInjectionTokens(systemPrompt)`
|
|
23
|
+
- `delay(ms)`
|
|
10
24
|
- `createExtensionWorkingIndicator(ctx, initialMessage, options?)`
|
|
11
25
|
- `withExtensionWorkingIndicator(ctx, initialMessage, run, options?)`
|
|
26
|
+
- `createLocalWikiEngine(config)`
|
|
12
27
|
|
|
13
28
|
`createExtensionWorkingIndicator` renders a reusable extension-owned spinner using `ctx.ui.setWidget` plus footer `setStatus`, so it works inside slash-command handlers where Pi's built-in model-streaming working row is not shown.
|
|
29
|
+
|
|
30
|
+
`createLocalWikiEngine` centralizes local documentation corpus handling for wiki-style extensions: file discovery, Markdown/HTML parsing, section/link extraction, cache freshness, query expansion, search ranking, snippets, page reads, focused extracts, related links, and status payloads.
|
package/index.ts
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import fs from "node:fs";
|
|
1
2
|
import os from "node:os";
|
|
2
3
|
import path from "node:path";
|
|
3
4
|
import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
|
|
@@ -15,6 +16,17 @@ export type ExtensionWorkingIndicatorOptions = {
|
|
|
15
16
|
frames?: string[];
|
|
16
17
|
};
|
|
17
18
|
|
|
19
|
+
export type EnvResolution = {
|
|
20
|
+
value?: string;
|
|
21
|
+
source?: "environment" | "workspace .env" | "Pi global .env";
|
|
22
|
+
path?: string;
|
|
23
|
+
};
|
|
24
|
+
|
|
25
|
+
export type SlugifyOptions = {
|
|
26
|
+
maxLength?: number;
|
|
27
|
+
fallback?: string;
|
|
28
|
+
};
|
|
29
|
+
|
|
18
30
|
export function getAgentDir(): string {
|
|
19
31
|
const env = process.env.PI_CODING_AGENT_DIR?.trim();
|
|
20
32
|
if (env) return path.resolve(env);
|
|
@@ -31,6 +43,108 @@ export function resolvePathFromAgentDir(configuredPath: string): string {
|
|
|
31
43
|
return path.isAbsolute(configuredPath) ? path.normalize(configuredPath) : path.resolve(getAgentDir(), configuredPath);
|
|
32
44
|
}
|
|
33
45
|
|
|
46
|
+
export function getPiDir(): string {
|
|
47
|
+
return path.dirname(getAgentDir());
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
export function getAgentEnvPath(): string {
|
|
51
|
+
return path.join(getAgentDir(), ".env");
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
export function getAgentSettingsPath(): string {
|
|
55
|
+
return path.join(getAgentDir(), "settings.json");
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
export function getWorkspaceEnvPath(cwd = process.cwd()): string {
|
|
59
|
+
return path.join(cwd, ".env");
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
export function parseEnvFile(filePath: string): Record<string, string> {
|
|
63
|
+
if (!fs.existsSync(filePath)) return {};
|
|
64
|
+
const values: Record<string, string> = {};
|
|
65
|
+
for (const rawLine of fs.readFileSync(filePath, "utf8").split(/\r?\n/)) {
|
|
66
|
+
const line = rawLine.trim();
|
|
67
|
+
if (!line || line.startsWith("#")) continue;
|
|
68
|
+
const match = line.match(/^(?:export\s+)?([A-Za-z_][A-Za-z0-9_]*)\s*=\s*(.*)$/);
|
|
69
|
+
if (!match) continue;
|
|
70
|
+
let value = match[2] ?? "";
|
|
71
|
+
const commentStart = value.search(/\s#/);
|
|
72
|
+
if (commentStart >= 0) value = value.slice(0, commentStart);
|
|
73
|
+
value = value.trim();
|
|
74
|
+
if ((value.startsWith('"') && value.endsWith('"')) || (value.startsWith("'") && value.endsWith("'"))) {
|
|
75
|
+
value = value.slice(1, -1);
|
|
76
|
+
}
|
|
77
|
+
values[match[1] ?? ""] = value.replace(/\\n/g, "\n");
|
|
78
|
+
}
|
|
79
|
+
return values;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
export function readEnvValue(filePath: string, key: string): string | undefined {
|
|
83
|
+
return parseEnvFile(filePath)[key];
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
export function resolveEnvValue(key: string, options: { includeWorkspace?: boolean; cwd?: string } = {}): EnvResolution {
|
|
87
|
+
const envValue = process.env[key]?.trim();
|
|
88
|
+
if (envValue) return { value: envValue, source: "environment" };
|
|
89
|
+
|
|
90
|
+
if (options.includeWorkspace) {
|
|
91
|
+
const workspaceEnvPath = getWorkspaceEnvPath(options.cwd);
|
|
92
|
+
const workspaceValue = readEnvValue(workspaceEnvPath, key)?.trim();
|
|
93
|
+
if (workspaceValue) return { value: workspaceValue, source: "workspace .env", path: workspaceEnvPath };
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
const globalEnvPath = getAgentEnvPath();
|
|
97
|
+
const globalValue = readEnvValue(globalEnvPath, key)?.trim();
|
|
98
|
+
if (globalValue) return { value: globalValue, source: "Pi global .env", path: globalEnvPath };
|
|
99
|
+
|
|
100
|
+
return {};
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
export function quoteEnvValue(value: string): string {
|
|
104
|
+
return JSON.stringify(value);
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
export function upsertEnvValue(filePath: string, key: string, value: string): void {
|
|
108
|
+
let content = fs.existsSync(filePath) ? fs.readFileSync(filePath, "utf8") : "";
|
|
109
|
+
const line = `${key}=${quoteEnvValue(value)}`;
|
|
110
|
+
const pattern = new RegExp(`^\\s*(?:export\\s+)?${key}\\s*=.*$`, "m");
|
|
111
|
+
content = pattern.test(content) ? content.replace(pattern, line) : `${content}${content && !content.endsWith("\n") ? "\n" : ""}${line}\n`;
|
|
112
|
+
fs.mkdirSync(path.dirname(filePath), { recursive: true });
|
|
113
|
+
fs.writeFileSync(filePath, content, { mode: 0o600 });
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
export function slugify(input: string, options: SlugifyOptions = {}): string {
|
|
117
|
+
const maxLength = options.maxLength ?? 80;
|
|
118
|
+
const slug = input
|
|
119
|
+
.toLowerCase()
|
|
120
|
+
.trim()
|
|
121
|
+
.replace(/[^a-z0-9]+/g, "-")
|
|
122
|
+
.replace(/^-+|-+$/g, "")
|
|
123
|
+
.slice(0, maxLength);
|
|
124
|
+
return slug || options.fallback || "";
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
export function formatTokens(count: number): string {
|
|
128
|
+
if (count < 1000) return count.toString();
|
|
129
|
+
if (count < 10000) return `${(count / 1000).toFixed(1)}k`;
|
|
130
|
+
if (count < 1000000) return `${Math.round(count / 1000)}k`;
|
|
131
|
+
if (count < 10000000) return `${(count / 1000000).toFixed(1)}M`;
|
|
132
|
+
return `${Math.round(count / 1000000)}M`;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
export function estimateTokensFromCharCount(charCount: number): number {
|
|
136
|
+
return Math.max(0, Math.round(charCount / 4));
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
export function estimatePromptInjectionTokens(systemPrompt: string): number {
|
|
140
|
+
return estimateTokensFromCharCount(systemPrompt.length);
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
export function delay(ms: number): Promise<void> {
|
|
144
|
+
if (ms <= 0) return Promise.resolve();
|
|
145
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
146
|
+
}
|
|
147
|
+
|
|
34
148
|
export function createExtensionWorkingIndicator(ctx: any, initialMessage: string, options: ExtensionWorkingIndicatorOptions = {}): ExtensionWorkingIndicator {
|
|
35
149
|
const id = options.id ?? "extension-working";
|
|
36
150
|
const title = options.title ?? "Working";
|
|
@@ -79,3 +193,359 @@ export async function withExtensionWorkingIndicator<T>(ctx: any, initialMessage:
|
|
|
79
193
|
export default function piUtilsExtension(_pi: ExtensionAPI): void {
|
|
80
194
|
// Utility package: no runtime behavior.
|
|
81
195
|
}
|
|
196
|
+
|
|
197
|
+
// ---- Local wiki engine ----
|
|
198
|
+
import fsp from "node:fs/promises";
|
|
199
|
+
|
|
200
|
+
export type LocalWikiFormat = "markdown" | "html";
|
|
201
|
+
|
|
202
|
+
export interface LocalWikiSection {
|
|
203
|
+
title: string;
|
|
204
|
+
level: number;
|
|
205
|
+
anchor: string;
|
|
206
|
+
text: string;
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
export interface LocalWikiLink {
|
|
210
|
+
title: string;
|
|
211
|
+
path: string;
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
export interface LocalWikiPage {
|
|
215
|
+
title: string;
|
|
216
|
+
slug: string;
|
|
217
|
+
path: string;
|
|
218
|
+
source?: string;
|
|
219
|
+
headings: string[];
|
|
220
|
+
sections: LocalWikiSection[];
|
|
221
|
+
links: LocalWikiLink[];
|
|
222
|
+
text: string;
|
|
223
|
+
mtimeMs: number;
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
export interface LocalWikiSearchResult {
|
|
227
|
+
title: string;
|
|
228
|
+
path: string;
|
|
229
|
+
source?: string;
|
|
230
|
+
score: number;
|
|
231
|
+
matchedFields: string[];
|
|
232
|
+
scoreExplanation: string[];
|
|
233
|
+
snippet?: string;
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
export interface LocalWikiCacheMetadata {
|
|
237
|
+
schemaVersion: number;
|
|
238
|
+
docsPath: string;
|
|
239
|
+
generatedAt: string;
|
|
240
|
+
pageCount: number;
|
|
241
|
+
newestMtimeMs: number;
|
|
242
|
+
extra?: Record<string, unknown>;
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
export interface LocalWikiLoadedCache {
|
|
246
|
+
pages: LocalWikiPage[];
|
|
247
|
+
metadata: LocalWikiCacheMetadata;
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
export interface LocalWikiEngineConfig {
|
|
251
|
+
displayName: string;
|
|
252
|
+
docsPath: string;
|
|
253
|
+
cacheDir: string;
|
|
254
|
+
schemaVersion?: number;
|
|
255
|
+
fileExtensions: RegExp;
|
|
256
|
+
format: LocalWikiFormat;
|
|
257
|
+
queryExpansions?: Record<string, string[]>;
|
|
258
|
+
missingDocsMessage?: string;
|
|
259
|
+
ignoredDirs?: string[];
|
|
260
|
+
sourceName?: (filePath: string, docsPath: string) => string | undefined;
|
|
261
|
+
metadataExtra?: () => Promise<Record<string, unknown>>;
|
|
262
|
+
statusExtra?: () => Promise<Record<string, unknown>>;
|
|
263
|
+
transformText?: (text: string, title: string, filePath: string) => string;
|
|
264
|
+
titleFromHtml?: (html: string, filePath: string, fallback: string) => string;
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
export function createLocalWikiEngine(config: LocalWikiEngineConfig) {
|
|
268
|
+
const schemaVersion = config.schemaVersion ?? 1;
|
|
269
|
+
const pagesCache = path.join(config.cacheDir, "pages.json");
|
|
270
|
+
const metadataCache = path.join(config.cacheDir, "metadata.json");
|
|
271
|
+
const ignoredDirs = new Set([".git", "node_modules", "result", ...(config.ignoredDirs ?? [])]);
|
|
272
|
+
const missingDocsMessage = config.missingDocsMessage ?? `Local ${config.displayName} docs are not available at ${config.docsPath}.`;
|
|
273
|
+
|
|
274
|
+
async function localExists(filePath: string): Promise<boolean> {
|
|
275
|
+
try { await fsp.access(filePath); return true; } catch { return false; }
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
async function listDocFiles(dir: string): Promise<string[]> {
|
|
279
|
+
const entries = await fsp.readdir(dir, { withFileTypes: true });
|
|
280
|
+
const files: string[] = [];
|
|
281
|
+
for (const entry of entries) {
|
|
282
|
+
if (ignoredDirs.has(entry.name)) continue;
|
|
283
|
+
const full = path.join(dir, entry.name);
|
|
284
|
+
if (entry.isDirectory()) files.push(...await listDocFiles(full));
|
|
285
|
+
if (entry.isFile() && config.fileExtensions.test(entry.name)) files.push(full);
|
|
286
|
+
}
|
|
287
|
+
return files.sort();
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
async function available(): Promise<boolean> {
|
|
291
|
+
try {
|
|
292
|
+
const stat = await fsp.stat(config.docsPath);
|
|
293
|
+
return stat.isDirectory() && (await listDocFiles(config.docsPath)).length > 0;
|
|
294
|
+
} catch { return false; }
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
async function stats(): Promise<{ pageCount: number; newestMtimeMs: number }> {
|
|
298
|
+
if (!await localExists(config.docsPath)) return { pageCount: 0, newestMtimeMs: 0 };
|
|
299
|
+
const files = await listDocFiles(config.docsPath);
|
|
300
|
+
let newestMtimeMs = 0;
|
|
301
|
+
for (const file of files) newestMtimeMs = Math.max(newestMtimeMs, (await fsp.stat(file)).mtimeMs);
|
|
302
|
+
return { pageCount: files.length, newestMtimeMs };
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
function titleFromPath(filePath: string): string {
|
|
306
|
+
return path.basename(filePath, path.extname(filePath)).replace(/[-_]+/g, " ").trim();
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
function anchorFromHeading(raw: string): string {
|
|
310
|
+
return raw.toLowerCase().replace(/`/g, "").replace(/[^a-z0-9]+/g, "-").replace(/^-|-$/g, "");
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
function normalizeWhitespace(input: string): string {
|
|
314
|
+
return input.replace(/[ \t]+\n/g, "\n").replace(/\n{3,}/g, "\n\n").replace(/[ \t]{2,}/g, " ").trim();
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
function stripMarkdownDecorators(input: string): string {
|
|
318
|
+
return input.replace(/^#+\s*/, "").replace(/[*_`~]/g, "").replace(/\[([^\]]+)\]\([^\)]+\)/g, "$1").trim();
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
function decodeEntities(input: string): string {
|
|
322
|
+
const entityMap: Record<string, string> = { amp: "&", lt: "<", gt: ">", quot: '"', apos: "'", nbsp: " " };
|
|
323
|
+
return input
|
|
324
|
+
.replace(/&#(x[0-9a-f]+|\d+);/gi, (m, value: string) => {
|
|
325
|
+
const code = value.toLowerCase().startsWith("x") ? Number.parseInt(value.slice(1), 16) : Number.parseInt(value, 10);
|
|
326
|
+
return Number.isFinite(code) ? String.fromCodePoint(code) : m;
|
|
327
|
+
})
|
|
328
|
+
.replace(/&([a-z]+);/gi, (m, name: string) => entityMap[name.toLowerCase()] ?? m);
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
function stripTags(input: string): string {
|
|
332
|
+
return decodeEntities(input.replace(/<[^>]+>/g, " ")).replace(/\s+/g, " ").trim();
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
function markdownSections(markdown: string, fallbackTitle: string): LocalWikiSection[] {
|
|
336
|
+
const sections: LocalWikiSection[] = [];
|
|
337
|
+
let current: LocalWikiSection | undefined;
|
|
338
|
+
for (const line of markdown.split(/\n/)) {
|
|
339
|
+
const match = line.match(/^(#{1,6})\s+(.+)$/);
|
|
340
|
+
if (match) {
|
|
341
|
+
const title = stripMarkdownDecorators(match[2]);
|
|
342
|
+
if (title.toLowerCase() === "contents") continue;
|
|
343
|
+
if (current) current.text = current.text.trim();
|
|
344
|
+
current = { title, level: match[1].length, anchor: anchorFromHeading(title), text: "" };
|
|
345
|
+
sections.push(current);
|
|
346
|
+
continue;
|
|
347
|
+
}
|
|
348
|
+
if (current) current.text += `${line}\n`;
|
|
349
|
+
}
|
|
350
|
+
if (!current) sections.push({ title: fallbackTitle, level: 1, anchor: anchorFromHeading(fallbackTitle), text: markdown.trim() });
|
|
351
|
+
else current.text = current.text.trim();
|
|
352
|
+
return sections;
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
function htmlToText(html: string): string {
|
|
356
|
+
let body = html.match(/<body[^>]*>([\s\S]*?)<\/body>/i)?.[1] ?? html;
|
|
357
|
+
body = body.replace(/<script[\s\S]*?<\/script>/gi, " ").replace(/<style[\s\S]*?<\/style>/gi, " ");
|
|
358
|
+
body = body.replace(/<(h[1-6])[^>]*>([\s\S]*?)<\/\1>/gi, (_m, tag: string, inner: string) => `\n\n${"#".repeat(Number(tag.slice(1)))} ${stripTags(inner)}\n\n`);
|
|
359
|
+
body = body.replace(/<pre[^>]*>([\s\S]*?)<\/pre>/gi, (_m, inner: string) => `\n\n\`\`\`\n${stripTags(inner)}\n\`\`\`\n\n`);
|
|
360
|
+
body = body.replace(/<code[^>]*>([\s\S]*?)<\/code>/gi, (_m, inner: string) => `\`${stripTags(inner)}\``);
|
|
361
|
+
body = body.replace(/<li[^>]*>/gi, "\n- ").replace(/<br\s*\/?>/gi, "\n");
|
|
362
|
+
body = body.replace(/<\/(p|div|section|article|table|tr|ul|ol|dl)>/gi, "\n");
|
|
363
|
+
return normalizeWhitespace(decodeEntities(body.replace(/<[^>]+>/g, " ")));
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
function markdownTitle(markdown: string, filePath: string): string {
|
|
367
|
+
return stripMarkdownDecorators(markdown.match(/^#\s+(.+)$/m)?.[1]?.trim() || titleFromPath(filePath));
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
function htmlTitle(html: string, filePath: string): string {
|
|
371
|
+
const fallback = titleFromPath(filePath);
|
|
372
|
+
return (config.titleFromHtml?.(html, filePath, fallback) ?? stripTags(html.match(/<title[^>]*>([\s\S]*?)<\/title>/i)?.[1] ?? "")) || fallback;
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
function resolveLocalPath(currentFile: string, href: string): string | undefined {
|
|
376
|
+
if (/^(https?:|mailto:|#)/i.test(href)) return undefined;
|
|
377
|
+
const cleanHref = decodeEntities(href).split("#")[0].split("?")[0];
|
|
378
|
+
if (!cleanHref) return undefined;
|
|
379
|
+
const ext = config.format === "html" ? ".html" : ".md";
|
|
380
|
+
const candidates = path.extname(cleanHref) ? [cleanHref] : [cleanHref + ext, `${cleanHref}.mdx`, `${cleanHref}.rst`, path.join(cleanHref, "index.md")];
|
|
381
|
+
for (const candidate of candidates) {
|
|
382
|
+
const resolved = path.normalize(path.resolve(path.dirname(currentFile), candidate));
|
|
383
|
+
if (resolved.startsWith(config.docsPath)) return resolved;
|
|
384
|
+
}
|
|
385
|
+
return undefined;
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
function markdownLinks(markdown: string, currentFile: string): LocalWikiLink[] {
|
|
389
|
+
const links = new Map<string, LocalWikiLink>();
|
|
390
|
+
for (const match of markdown.matchAll(/\[([^\]]+)\]\(([^\)]+)\)/g)) {
|
|
391
|
+
const resolved = resolveLocalPath(currentFile, match[2].trim());
|
|
392
|
+
if (!resolved) continue;
|
|
393
|
+
links.set(resolved, { title: stripMarkdownDecorators(match[1]) || titleFromPath(resolved), path: resolved });
|
|
394
|
+
}
|
|
395
|
+
return [...links.values()];
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
function htmlLinks(html: string, currentFile: string): LocalWikiLink[] {
|
|
399
|
+
const links = new Map<string, LocalWikiLink>();
|
|
400
|
+
for (const match of html.matchAll(/<a\s+[^>]*href=["']([^"'#?]+)(?:#[^"']*)?["'][^>]*>([\s\S]*?)<\/a>/gi)) {
|
|
401
|
+
const resolved = resolveLocalPath(currentFile, match[1]);
|
|
402
|
+
if (!resolved) continue;
|
|
403
|
+
links.set(resolved, { title: stripTags(match[2]) || titleFromPath(resolved), path: resolved });
|
|
404
|
+
}
|
|
405
|
+
return [...links.values()];
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
function parsePage(raw: string, filePath: string, mtimeMs: number): LocalWikiPage {
|
|
409
|
+
const title = config.format === "html" ? htmlTitle(raw, filePath) : markdownTitle(raw, filePath);
|
|
410
|
+
const baseText = config.format === "html" ? htmlToText(raw) : normalizeWhitespace(raw);
|
|
411
|
+
const text = config.transformText?.(baseText, title, filePath) ?? baseText;
|
|
412
|
+
const sections = markdownSections(text, title);
|
|
413
|
+
return { title, slug: path.relative(config.docsPath, filePath).replace(config.fileExtensions, ""), path: filePath, source: config.sourceName?.(filePath, config.docsPath), headings: sections.map((s) => s.title), sections, links: config.format === "html" ? htmlLinks(raw, filePath) : markdownLinks(text, filePath), text, mtimeMs };
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
function limitText(text: string, maxChars = 12000): { text: string; truncated: boolean } {
|
|
417
|
+
return text.length <= maxChars ? { text, truncated: false } : { text: `${text.slice(0, maxChars)}\n\n[truncated at ${maxChars} characters]`, truncated: true };
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
async function buildCache(): Promise<LocalWikiLoadedCache> {
|
|
421
|
+
if (!await available()) throw new Error(missingDocsMessage);
|
|
422
|
+
await fsp.mkdir(config.cacheDir, { recursive: true });
|
|
423
|
+
const files = await listDocFiles(config.docsPath);
|
|
424
|
+
const pages: LocalWikiPage[] = [];
|
|
425
|
+
let newestMtimeMs = 0;
|
|
426
|
+
for (const file of files) {
|
|
427
|
+
const stat = await fsp.stat(file);
|
|
428
|
+
newestMtimeMs = Math.max(newestMtimeMs, stat.mtimeMs);
|
|
429
|
+
pages.push(parsePage(await fsp.readFile(file, "utf8"), file, stat.mtimeMs));
|
|
430
|
+
}
|
|
431
|
+
const metadata: LocalWikiCacheMetadata = { schemaVersion, docsPath: config.docsPath, generatedAt: new Date().toISOString(), pageCount: pages.length, newestMtimeMs, extra: await config.metadataExtra?.() };
|
|
432
|
+
await fsp.writeFile(pagesCache, JSON.stringify(pages, null, 2));
|
|
433
|
+
await fsp.writeFile(metadataCache, JSON.stringify(metadata, null, 2));
|
|
434
|
+
return { pages, metadata };
|
|
435
|
+
}
|
|
436
|
+
|
|
437
|
+
async function cacheFresh(metadata: LocalWikiCacheMetadata): Promise<boolean> {
|
|
438
|
+
const current = await stats();
|
|
439
|
+
return metadata.schemaVersion === schemaVersion && metadata.docsPath === config.docsPath && metadata.pageCount === current.pageCount && metadata.newestMtimeMs === current.newestMtimeMs;
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
async function loadCache(): Promise<LocalWikiLoadedCache> {
|
|
443
|
+
try {
|
|
444
|
+
const [pagesRaw, metadataRaw] = await Promise.all([fsp.readFile(pagesCache, "utf8"), fsp.readFile(metadataCache, "utf8")]);
|
|
445
|
+
const metadata = JSON.parse(metadataRaw) as LocalWikiCacheMetadata;
|
|
446
|
+
if (await cacheFresh(metadata)) return { pages: JSON.parse(pagesRaw) as LocalWikiPage[], metadata };
|
|
447
|
+
} catch {}
|
|
448
|
+
return buildCache();
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
function normalizeQuery(input: string): string {
|
|
452
|
+
return input.toLowerCase().replace(/[^a-z0-9_./+-]+/g, " ").trim();
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
function expandQuery(query: string): string[] {
|
|
456
|
+
const tokens = normalizeQuery(query).split(/\s+/).filter(Boolean);
|
|
457
|
+
const expanded = new Set(tokens);
|
|
458
|
+
for (const token of tokens) for (const extra of config.queryExpansions?.[token] ?? []) expanded.add(normalizeQuery(extra));
|
|
459
|
+
return [...expanded].filter(Boolean);
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
function makeSnippet(text: string, tokens: string[], max = 280): string | undefined {
|
|
463
|
+
const lower = text.toLowerCase();
|
|
464
|
+
const index = tokens.map((t) => lower.indexOf(t.toLowerCase())).filter((i) => i >= 0).sort((a, b) => a - b)[0];
|
|
465
|
+
if (index === undefined) return undefined;
|
|
466
|
+
const start = Math.max(0, index - Math.floor(max / 2));
|
|
467
|
+
const snippet = text.slice(start, start + max).replace(/\s+/g, " ").trim();
|
|
468
|
+
return `${start > 0 ? "…" : ""}${snippet}${start + max < text.length ? "…" : ""}`;
|
|
469
|
+
}
|
|
470
|
+
|
|
471
|
+
function scorePage(page: LocalWikiPage, tokens: string[]): LocalWikiSearchResult | undefined {
|
|
472
|
+
const title = normalizeQuery(page.title);
|
|
473
|
+
const slug = normalizeQuery(page.slug);
|
|
474
|
+
const source = normalizeQuery(page.source ?? "");
|
|
475
|
+
const headings = normalizeQuery(page.headings.join(" "));
|
|
476
|
+
const text = normalizeQuery(page.text);
|
|
477
|
+
let score = 0;
|
|
478
|
+
const matchedFields = new Set<string>();
|
|
479
|
+
const scoreExplanation: string[] = [];
|
|
480
|
+
for (const token of tokens) {
|
|
481
|
+
if (title.includes(token)) { score += 25; matchedFields.add("title"); scoreExplanation.push(`title matched '${token}'`); }
|
|
482
|
+
if (slug.includes(token)) { score += 12; matchedFields.add("slug"); }
|
|
483
|
+
if (source.includes(token)) { score += 8; matchedFields.add("source"); }
|
|
484
|
+
if (headings.includes(token)) { score += 10; matchedFields.add("headings"); }
|
|
485
|
+
const textMatches = text.split(token).length - 1;
|
|
486
|
+
if (textMatches > 0) { score += Math.min(15, textMatches); matchedFields.add("text"); }
|
|
487
|
+
}
|
|
488
|
+
return score > 0 ? { title: page.title, path: page.path, source: page.source, score, matchedFields: [...matchedFields], scoreExplanation, snippet: makeSnippet(page.text, tokens) } : undefined;
|
|
489
|
+
}
|
|
490
|
+
|
|
491
|
+
function findPage(pages: LocalWikiPage[], pageRef: string): LocalWikiPage | undefined {
|
|
492
|
+
const normalized = normalizeQuery(pageRef);
|
|
493
|
+
return pages.find((p) => p.path === pageRef) ?? pages.find((p) => normalizeQuery(p.slug) === normalized) ?? pages.find((p) => normalizeQuery(p.title) === normalized) ?? pages.find((p) => normalizeQuery(p.slug).includes(normalized) || normalizeQuery(p.title).includes(normalized));
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
async function status() {
|
|
497
|
+
const currentAvailable = await available();
|
|
498
|
+
const currentStats = await stats();
|
|
499
|
+
let cacheGeneratedAt: string | undefined;
|
|
500
|
+
try { cacheGeneratedAt = (JSON.parse(await fsp.readFile(metadataCache, "utf8")) as LocalWikiCacheMetadata).generatedAt; } catch {}
|
|
501
|
+
return { displayName: config.displayName, docsPath: config.docsPath, available: currentAvailable, pageCount: currentStats.pageCount, cacheGeneratedAt, ...(await config.statusExtra?.()) };
|
|
502
|
+
}
|
|
503
|
+
|
|
504
|
+
async function search(params: { query: string; limit?: number; includeSnippets?: boolean }) {
|
|
505
|
+
const { pages } = await loadCache();
|
|
506
|
+
const tokens = expandQuery(params.query);
|
|
507
|
+
const limit = Math.max(1, Math.min(params.limit ?? 10, 50));
|
|
508
|
+
const results = pages.map((p) => scorePage(p, tokens)).filter((x): x is LocalWikiSearchResult => Boolean(x)).sort((a, b) => b.score - a.score).slice(0, limit);
|
|
509
|
+
return { query: params.query, expandedTokens: tokens, results: params.includeSnippets === false ? results.map(({ snippet, ...rest }) => rest) : results };
|
|
510
|
+
}
|
|
511
|
+
|
|
512
|
+
async function loadPage(pageRef: string): Promise<LocalWikiPage> {
|
|
513
|
+
const cache = await loadCache();
|
|
514
|
+
const page = findPage(cache.pages, pageRef);
|
|
515
|
+
if (!page) throw new Error(`No ${config.displayName} page matched '${pageRef}'. Try a local wiki search first.`);
|
|
516
|
+
return page;
|
|
517
|
+
}
|
|
518
|
+
|
|
519
|
+
async function read(params: { page: string; maxChars?: number }) {
|
|
520
|
+
const page = await loadPage(params.page);
|
|
521
|
+
const limited = limitText(page.text, params.maxChars ?? 20000);
|
|
522
|
+
return { title: page.title, source: page.source, path: page.path, citation: `${page.path} — ${page.title}`, truncated: limited.truncated, text: limited.text };
|
|
523
|
+
}
|
|
524
|
+
|
|
525
|
+
async function sections(params: { page: string }) {
|
|
526
|
+
const page = await loadPage(params.page);
|
|
527
|
+
return { title: page.title, source: page.source, path: page.path, sections: page.sections.map((s) => ({ title: s.title, level: s.level, anchor: s.anchor })) };
|
|
528
|
+
}
|
|
529
|
+
|
|
530
|
+
async function extract(params: { page: string; section?: string; query?: string; maxChars?: number }) {
|
|
531
|
+
const page = await loadPage(params.page);
|
|
532
|
+
let matchedSections = page.sections;
|
|
533
|
+
if (params.section) { const needle = normalizeQuery(params.section); matchedSections = matchedSections.filter((s) => normalizeQuery(s.title).includes(needle)); }
|
|
534
|
+
if (params.query) {
|
|
535
|
+
const tokens = expandQuery(params.query);
|
|
536
|
+
matchedSections = matchedSections.map((section) => ({ section, score: tokens.reduce((sum, token) => sum + (normalizeQuery(`${section.title} ${section.text}`).includes(token) ? 1 : 0), 0) })).filter((i) => i.score > 0).sort((a, b) => b.score - a.score).map((i) => i.section);
|
|
537
|
+
}
|
|
538
|
+
if (!params.section && !params.query) matchedSections = matchedSections.slice(0, 5);
|
|
539
|
+
const joined = matchedSections.map((s) => `${"#".repeat(Math.min(s.level, 6))} ${s.title}\n\n${s.text}`).join("\n\n");
|
|
540
|
+
const limited = limitText(joined || page.text, params.maxChars ?? 12000);
|
|
541
|
+
return { title: page.title, source: page.source, path: page.path, citation: `${page.path} — ${matchedSections.map((s) => s.title).join(", ") || page.title}`, matchedSections: matchedSections.map((s) => ({ title: s.title, level: s.level, anchor: s.anchor })), truncated: limited.truncated, text: limited.text };
|
|
542
|
+
}
|
|
543
|
+
|
|
544
|
+
async function related(params: { page: string; limit?: number }) {
|
|
545
|
+
const page = await loadPage(params.page);
|
|
546
|
+
const limit = Math.max(1, Math.min(params.limit ?? 10, 50));
|
|
547
|
+
return { title: page.title, source: page.source, path: page.path, links: page.links.slice(0, limit) };
|
|
548
|
+
}
|
|
549
|
+
|
|
550
|
+
return { available, stats, buildCache, loadCache, status, search, read, sections, extract, related, expandQuery, listDocFiles };
|
|
551
|
+
}
|