@firstpick/pi-utils 0.1.6 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +1 -1
  2. package/src/local-wiki.ts +78 -24
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@firstpick/pi-utils",
3
- "version": "0.1.6",
3
+ "version": "0.1.7",
4
4
  "description": "Shared utilities for Firstpick Pi extension packages.",
5
5
  "main": "index.ts",
6
6
  "exports": {
package/src/local-wiki.ts CHANGED
@@ -59,6 +59,8 @@ export interface LocalWikiEngineConfig {
59
59
  fileExtensions: RegExp;
60
60
  format: LocalWikiFormat;
61
61
  queryExpansions?: Record<string, string[]>;
62
+ searchStopwords?: Iterable<string>;
63
+ termWeights?: Record<string, number>;
62
64
  missingDocsMessage?: string;
63
65
  ignoredDirs?: string[];
64
66
  sourceName?: (filePath: string, docsPath: string) => string | undefined;
@@ -74,6 +76,7 @@ export function createLocalWikiEngine(config: LocalWikiEngineConfig) {
74
76
  const metadataCache = path.join(config.cacheDir, "metadata.json");
75
77
  const ignoredDirs = new Set([".git", "node_modules", "result", ...(config.ignoredDirs ?? [])]);
76
78
  const missingDocsMessage = config.missingDocsMessage ?? `Local ${config.displayName} docs are not available at ${config.docsPath}.`;
79
+ const searchStopwords = new Set([...(config.searchStopwords ?? [])].map((word) => normalizeQuery(word)).filter(Boolean));
77
80
 
78
81
  async function localExists(filePath: string): Promise<boolean> {
79
82
  try { await fsp.access(filePath); return true; } catch { return false; }
@@ -119,7 +122,36 @@ export function createLocalWikiEngine(config: LocalWikiEngineConfig) {
119
122
  }
120
123
 
121
124
  function stripMarkdownDecorators(input: string): string {
122
- return input.replace(/^#+\s*/, "").replace(/[*_`~]/g, "").replace(/\[([^\]]+)\]\([^\)]+\)/g, "$1").trim();
125
+ return input
126
+ .replace(/^#+\s*/, "")
127
+ .replace(/\s*\{#[^}]+\}\s*$/g, "")
128
+ .replace(/[*_`~]/g, "")
129
+ .replace(/\[([^\]]+)\]\([^\)]+\)/g, "$1")
130
+ .trim();
131
+ }
132
+
133
+ function stripYamlFrontmatter(markdown: string): string {
134
+ return markdown.replace(/^---\s*\n[\s\S]*?\n---\s*\n?/, "");
135
+ }
136
+
137
+ function yamlFrontmatterTitle(markdown: string): string | undefined {
138
+ const frontmatter = markdown.match(/^---\s*\n([\s\S]*?)\n---\s*\n?/);
139
+ const raw = frontmatter?.[1]?.match(/^title:\s*["']?(.+?)["']?\s*$/m)?.[1];
140
+ return raw ? stripMarkdownDecorators(raw) : undefined;
141
+ }
142
+
143
+ function firstMarkdownHeading(markdown: string): string | undefined {
144
+ let inFence = false;
145
+ for (const line of stripYamlFrontmatter(markdown).split(/\n/)) {
146
+ if (/^\s*(```|~~~)/.test(line)) {
147
+ inFence = !inFence;
148
+ continue;
149
+ }
150
+ if (inFence) continue;
151
+ const match = line.match(/^#\s+(.+)$/);
152
+ if (match) return match[1].trim();
153
+ }
154
+ return undefined;
123
155
  }
124
156
 
125
157
  function decodeEntities(input: string): string {
@@ -137,10 +169,17 @@ export function createLocalWikiEngine(config: LocalWikiEngineConfig) {
137
169
  }
138
170
 
139
171
  function markdownSections(markdown: string, fallbackTitle: string): LocalWikiSection[] {
172
+ const body = stripYamlFrontmatter(markdown);
140
173
  const sections: LocalWikiSection[] = [];
141
174
  let current: LocalWikiSection | undefined;
142
- for (const line of markdown.split(/\n/)) {
143
- const match = line.match(/^(#{1,6})\s+(.+)$/);
175
+ let inFence = false;
176
+ for (const line of body.split(/\n/)) {
177
+ if (/^\s*(```|~~~)/.test(line)) {
178
+ inFence = !inFence;
179
+ if (current) current.text += `${line}\n`;
180
+ continue;
181
+ }
182
+ const match = !inFence ? line.match(/^(#{1,6})\s+(.+)$/) : undefined;
144
183
  if (match) {
145
184
  const title = stripMarkdownDecorators(match[2]);
146
185
  if (title.toLowerCase() === "contents") continue;
@@ -151,7 +190,7 @@ export function createLocalWikiEngine(config: LocalWikiEngineConfig) {
151
190
  }
152
191
  if (current) current.text += `${line}\n`;
153
192
  }
154
- if (!current) sections.push({ title: fallbackTitle, level: 1, anchor: anchorFromHeading(fallbackTitle), text: markdown.trim() });
193
+ if (!current) sections.push({ title: fallbackTitle, level: 1, anchor: anchorFromHeading(fallbackTitle), text: body.trim() });
155
194
  else current.text = current.text.trim();
156
195
  return sections;
157
196
  }
@@ -168,7 +207,7 @@ export function createLocalWikiEngine(config: LocalWikiEngineConfig) {
168
207
  }
169
208
 
170
209
  function markdownTitle(markdown: string, filePath: string): string {
171
- return stripMarkdownDecorators(markdown.match(/^#\s+(.+)$/m)?.[1]?.trim() || titleFromPath(filePath));
210
+ return stripMarkdownDecorators(yamlFrontmatterTitle(markdown) || firstMarkdownHeading(markdown) || titleFromPath(filePath));
172
211
  }
173
212
 
174
213
  function htmlTitle(html: string, filePath: string): string {
@@ -211,10 +250,11 @@ export function createLocalWikiEngine(config: LocalWikiEngineConfig) {
211
250
 
212
251
  function parsePage(raw: string, filePath: string, mtimeMs: number): LocalWikiPage {
213
252
  const title = config.format === "html" ? htmlTitle(raw, filePath) : markdownTitle(raw, filePath);
214
- const baseText = config.format === "html" ? htmlToText(raw) : normalizeWhitespace(raw);
253
+ const markdownBody = config.format === "html" ? raw : stripYamlFrontmatter(raw);
254
+ const baseText = config.format === "html" ? htmlToText(raw) : normalizeWhitespace(markdownBody);
215
255
  const text = config.transformText?.(baseText, title, filePath) ?? baseText;
216
256
  const sections = markdownSections(text, title);
217
- return { title, slug: path.relative(config.docsPath, filePath).replace(config.fileExtensions, ""), path: filePath, source: config.sourceName?.(filePath, config.docsPath), headings: sections.map((s) => s.title), sections, links: config.format === "html" ? htmlLinks(raw, filePath) : markdownLinks(text, filePath), text, mtimeMs };
257
+ return { title, slug: path.relative(config.docsPath, filePath).replace(config.fileExtensions, ""), path: filePath, source: config.sourceName?.(filePath, config.docsPath), headings: sections.map((s) => s.title), sections, links: config.format === "html" ? htmlLinks(raw, filePath) : markdownLinks(markdownBody, filePath), text, mtimeMs };
218
258
  }
219
259
 
220
260
  function limitText(text: string, maxChars = 12000): { text: string; truncated: boolean } {
@@ -257,12 +297,21 @@ export function createLocalWikiEngine(config: LocalWikiEngineConfig) {
257
297
  }
258
298
 
259
299
  function expandQuery(query: string): string[] {
260
- const tokens = normalizeQuery(query).split(/\s+/).filter(Boolean);
300
+ const tokens = normalizeQuery(query).split(/\s+/).filter((token) => token && !searchStopwords.has(token));
261
301
  const expanded = new Set(tokens);
262
- for (const token of tokens) for (const extra of config.queryExpansions?.[token] ?? []) expanded.add(normalizeQuery(extra));
302
+ for (const token of tokens) {
303
+ for (const extra of config.queryExpansions?.[token] ?? []) {
304
+ const normalized = normalizeQuery(extra);
305
+ if (normalized && !searchStopwords.has(normalized)) expanded.add(normalized);
306
+ }
307
+ }
263
308
  return [...expanded].filter(Boolean);
264
309
  }
265
310
 
311
+ function tokenWeight(token: string): number {
312
+ return config.termWeights?.[token] ?? 1;
313
+ }
314
+
266
315
  function makeSnippet(text: string, tokens: string[], max = 280): string | undefined {
267
316
  const lower = text.toLowerCase();
268
317
  const index = tokens.map((t) => lower.indexOf(t.toLowerCase())).filter((i) => i >= 0).sort((a, b) => a - b)[0];
@@ -282,14 +331,15 @@ export function createLocalWikiEngine(config: LocalWikiEngineConfig) {
282
331
  const matchedFields = new Set<string>();
283
332
  const scoreExplanation: string[] = [];
284
333
  for (const token of tokens) {
285
- if (title.includes(token)) { score += 25; matchedFields.add("title"); scoreExplanation.push(`title matched '${token}'`); }
286
- if (slug.includes(token)) { score += 12; matchedFields.add("slug"); }
287
- if (source.includes(token)) { score += 8; matchedFields.add("source"); }
288
- if (headings.includes(token)) { score += 10; matchedFields.add("headings"); }
334
+ const weight = tokenWeight(token);
335
+ if (title.includes(token)) { score += 25 * weight; matchedFields.add("title"); scoreExplanation.push(`title matched '${token}'`); }
336
+ if (slug.includes(token)) { score += 12 * weight; matchedFields.add("slug"); }
337
+ if (source.includes(token)) { score += 8 * weight; matchedFields.add("source"); }
338
+ if (headings.includes(token)) { score += 10 * weight; matchedFields.add("headings"); }
289
339
  const textMatches = text.split(token).length - 1;
290
- if (textMatches > 0) { score += Math.min(15, textMatches); matchedFields.add("text"); }
340
+ if (textMatches > 0) { score += Math.min(15, textMatches) * weight; matchedFields.add("text"); }
291
341
  }
292
- return score > 0 ? { title: page.title, path: page.path, source: page.source, score, matchedFields: [...matchedFields], scoreExplanation, snippet: makeSnippet(page.text, tokens) } : undefined;
342
+ return score > 0 ? { title: page.title, path: page.path, source: page.source, score: Number(score.toFixed(2)), matchedFields: [...matchedFields], scoreExplanation, snippet: makeSnippet(page.text, tokens) } : undefined;
293
343
  }
294
344
 
295
345
  function findPage(pages: LocalWikiPage[], pageRef: string): LocalWikiPage | undefined {
@@ -308,9 +358,9 @@ export function createLocalWikiEngine(config: LocalWikiEngineConfig) {
308
358
  async function search(params: { query: string; limit?: number; includeSnippets?: boolean }) {
309
359
  const { pages } = await loadCache();
310
360
  const tokens = expandQuery(params.query);
311
- const limit = Math.max(1, Math.min(params.limit ?? 10, 50));
361
+ const limit = Math.max(1, Math.min(params.limit ?? 8, 50));
312
362
  const results = pages.map((p) => scorePage(p, tokens)).filter((x): x is LocalWikiSearchResult => Boolean(x)).sort((a, b) => b.score - a.score).slice(0, limit);
313
- return { query: params.query, expandedTokens: tokens, results: params.includeSnippets === false ? results.map(({ snippet, ...rest }) => rest) : results };
363
+ return { query: params.query, expandedTokens: tokens, results: params.includeSnippets === true ? results : results.map(({ snippet, ...rest }) => rest) };
314
364
  }
315
365
 
316
366
  async function loadPage(pageRef: string): Promise<LocalWikiPage> {
@@ -326,23 +376,27 @@ export function createLocalWikiEngine(config: LocalWikiEngineConfig) {
326
376
  return { title: page.title, source: page.source, path: page.path, citation: `${page.path} — ${page.title}`, truncated: limited.truncated, text: limited.text };
327
377
  }
328
378
 
329
- async function sections(params: { page: string }) {
379
+ async function sections(params: { page: string; maxSections?: number }) {
330
380
  const page = await loadPage(params.page);
331
- return { title: page.title, source: page.source, path: page.path, sections: page.sections.map((s) => ({ title: s.title, level: s.level, anchor: s.anchor })) };
381
+ const maxSections = Math.max(1, Math.min(params.maxSections ?? 80, 300));
382
+ const selected = page.sections.slice(0, maxSections);
383
+ return { title: page.title, source: page.source, path: page.path, sectionCount: page.sections.length, omittedSectionCount: Math.max(0, page.sections.length - selected.length), sections: selected.map((s) => ({ title: s.title, level: s.level, anchor: s.anchor })) };
332
384
  }
333
385
 
334
- async function extract(params: { page: string; section?: string; query?: string; maxChars?: number }) {
386
+ async function extract(params: { page: string; section?: string; query?: string; maxChars?: number; maxSections?: number }) {
335
387
  const page = await loadPage(params.page);
336
388
  let matchedSections = page.sections;
337
389
  if (params.section) { const needle = normalizeQuery(params.section); matchedSections = matchedSections.filter((s) => normalizeQuery(s.title).includes(needle)); }
338
390
  if (params.query) {
339
391
  const tokens = expandQuery(params.query);
340
- matchedSections = matchedSections.map((section) => ({ section, score: tokens.reduce((sum, token) => sum + (normalizeQuery(`${section.title} ${section.text}`).includes(token) ? 1 : 0), 0) })).filter((i) => i.score > 0).sort((a, b) => b.score - a.score).map((i) => i.section);
392
+ matchedSections = matchedSections.map((section) => ({ section, score: tokens.reduce((sum, token) => sum + (normalizeQuery(`${section.title} ${section.text}`).includes(token) ? tokenWeight(token) : 0), 0) })).filter((i) => i.score > 0).sort((a, b) => b.score - a.score).map((i) => i.section);
341
393
  }
342
- if (!params.section && !params.query) matchedSections = matchedSections.slice(0, 5);
394
+ const maxSections = Math.max(1, Math.min(params.maxSections ?? (params.section || params.query ? 6 : 5), 50));
395
+ const totalMatchedSections = matchedSections.length;
396
+ matchedSections = matchedSections.slice(0, maxSections);
343
397
  const joined = matchedSections.map((s) => `${"#".repeat(Math.min(s.level, 6))} ${s.title}\n\n${s.text}`).join("\n\n");
344
- const limited = limitText(joined || page.text, params.maxChars ?? 12000);
345
- return { title: page.title, source: page.source, path: page.path, citation: `${page.path} — ${matchedSections.map((s) => s.title).join(", ") || page.title}`, matchedSections: matchedSections.map((s) => ({ title: s.title, level: s.level, anchor: s.anchor })), truncated: limited.truncated, text: limited.text };
398
+ const limited = limitText(joined || page.text, params.maxChars ?? 10000);
399
+ return { title: page.title, source: page.source, path: page.path, citation: `${page.path} — ${matchedSections.map((s) => s.title).join(", ") || page.title}`, matchedSections: matchedSections.map((s) => ({ title: s.title, level: s.level, anchor: s.anchor })), totalMatchedSections, omittedSectionCount: Math.max(0, totalMatchedSections - matchedSections.length), truncated: limited.truncated, text: limited.text };
346
400
  }
347
401
 
348
402
  async function related(params: { page: string; limit?: number }) {