mkdnsite 0.0.1 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +8 -3
- package/src/adapters/cloudflare.ts +202 -15
- package/src/adapters/local.ts +38 -17
- package/src/analytics/classify.ts +65 -0
- package/src/analytics/console.ts +39 -0
- package/src/analytics/noop.ts +15 -0
- package/src/analytics/types.ts +49 -0
- package/src/cache/kv.ts +81 -0
- package/src/cache/memory.ts +46 -0
- package/src/cache/response.ts +24 -0
- package/src/cli.ts +301 -51
- package/src/client/scripts.ts +379 -3
- package/src/config/defaults.ts +66 -5
- package/src/config/schema.ts +200 -2
- package/src/content/assets.ts +202 -0
- package/src/content/cache.ts +232 -0
- package/src/content/filesystem.ts +17 -1
- package/src/content/github.ts +169 -102
- package/src/content/nav-builder.ts +120 -0
- package/src/content/r2.ts +214 -0
- package/src/handler.ts +341 -21
- package/src/index.ts +49 -1
- package/src/mcp/server.ts +164 -0
- package/src/mcp/stdio.ts +29 -0
- package/src/mcp/transport.ts +29 -0
- package/src/negotiate/headers.ts +37 -9
- package/src/render/page-shell.ts +249 -8
- package/src/search/index.ts +342 -0
- package/src/security/csp.ts +92 -0
- package/src/theme/{prose-css.ts → base-css.ts} +251 -11
- package/src/theme/build-css.ts +74 -0
package/src/render/page-shell.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import type { MkdnSiteConfig } from '../config/schema.ts'
|
|
2
2
|
import type { MarkdownMeta, NavNode } from '../content/types.ts'
|
|
3
|
-
import {
|
|
3
|
+
import { buildThemeCss } from '../theme/build-css.ts'
|
|
4
4
|
import { CLIENT_SCRIPTS } from '../client/scripts.ts'
|
|
5
5
|
|
|
6
6
|
interface PageShellProps {
|
|
@@ -9,6 +9,8 @@ interface PageShellProps {
|
|
|
9
9
|
config: MkdnSiteConfig
|
|
10
10
|
nav?: NavNode
|
|
11
11
|
currentSlug: string
|
|
12
|
+
/** Raw markdown body — used for reading time calculation */
|
|
13
|
+
body?: string
|
|
12
14
|
}
|
|
13
15
|
|
|
14
16
|
/**
|
|
@@ -16,7 +18,7 @@ interface PageShellProps {
|
|
|
16
18
|
* This is pure SSR — no client-side React hydration required.
|
|
17
19
|
*/
|
|
18
20
|
export function renderPage (props: PageShellProps): string {
|
|
19
|
-
const { renderedContent, meta, config, nav, currentSlug } = props
|
|
21
|
+
const { renderedContent, meta, config, nav, currentSlug, body } = props
|
|
20
22
|
|
|
21
23
|
const title = meta.title != null
|
|
22
24
|
? `${meta.title} — ${config.site.title}`
|
|
@@ -25,9 +27,25 @@ export function renderPage (props: PageShellProps): string {
|
|
|
25
27
|
const lang = config.site.lang ?? 'en'
|
|
26
28
|
|
|
27
29
|
const navHtml = (config.theme.showNav && nav != null)
|
|
28
|
-
? renderNav(nav, currentSlug)
|
|
30
|
+
? renderNav(nav, currentSlug, config)
|
|
29
31
|
: ''
|
|
30
32
|
|
|
33
|
+
const pageTitleHtml = (config.theme.pageTitle === true && meta.title != null)
|
|
34
|
+
? `<h1 class="mkdn-page-title">${esc(meta.title)}</h1>`
|
|
35
|
+
: ''
|
|
36
|
+
|
|
37
|
+
const pageMetaHtml = buildPageMetaHtml(meta, config, body)
|
|
38
|
+
|
|
39
|
+
const tocHtml = config.theme.showToc
|
|
40
|
+
? buildTocHtml(renderedContent)
|
|
41
|
+
: ''
|
|
42
|
+
|
|
43
|
+
const prevNextHtml = (config.theme.prevNext === true && nav != null)
|
|
44
|
+
? buildPrevNextHtml(nav, currentSlug)
|
|
45
|
+
: ''
|
|
46
|
+
|
|
47
|
+
const hasToc = tocHtml !== ''
|
|
48
|
+
|
|
31
49
|
const clientScripts = config.client.enabled
|
|
32
50
|
? CLIENT_SCRIPTS(config.client)
|
|
33
51
|
: ''
|
|
@@ -52,6 +70,12 @@ export function renderPage (props: PageShellProps): string {
|
|
|
52
70
|
</button>`
|
|
53
71
|
: ''
|
|
54
72
|
|
|
73
|
+
const searchTriggerHtml = config.client.enabled && config.client.search
|
|
74
|
+
? `<button class="mkdn-search-trigger" type="button" aria-label="Search" title="Search (⌘K)">
|
|
75
|
+
<svg xmlns="http://www.w3.org/2000/svg" width="18" height="18" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><circle cx="11" cy="11" r="8"/><path d="m21 21-4.3-4.3"/></svg>
|
|
76
|
+
</button>`
|
|
77
|
+
: ''
|
|
78
|
+
|
|
55
79
|
return `<!DOCTYPE html>
|
|
56
80
|
<html lang="${esc(lang)}">
|
|
57
81
|
<head>
|
|
@@ -59,22 +83,33 @@ export function renderPage (props: PageShellProps): string {
|
|
|
59
83
|
<meta name="viewport" content="width=device-width, initial-scale=1">
|
|
60
84
|
<title>${esc(title)}</title>
|
|
61
85
|
${description !== '' ? `<meta name="description" content="${esc(description)}">` : ''}
|
|
86
|
+
${buildOgTags(props)}
|
|
62
87
|
<meta name="generator" content="mkdnsite">
|
|
88
|
+
${buildFaviconTags(config)}
|
|
89
|
+
${buildAnalyticsTags(config)}
|
|
63
90
|
${config.client.math ? '<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.16/dist/katex.min.css" crossorigin="anonymous">' : ''}
|
|
64
|
-
<style>${
|
|
91
|
+
<style>${buildThemeCss(config)}</style>
|
|
92
|
+
${config.theme.customCssUrl != null ? `<link rel="stylesheet" href="${esc(config.theme.customCssUrl)}">` : ''}
|
|
65
93
|
${themeInitScript}
|
|
66
94
|
</head>
|
|
67
95
|
<body>
|
|
96
|
+
${searchTriggerHtml}
|
|
68
97
|
${themeToggleHtml}
|
|
69
98
|
<div class="mkdn-layout">
|
|
70
99
|
${navHtml !== '' ? `<nav class="mkdn-nav" aria-label="Site navigation">${navHtml}</nav>` : ''}
|
|
71
100
|
<main class="mkdn-main">
|
|
101
|
+
${hasToc ? '<div class="mkdn-content-area">' : ''}
|
|
72
102
|
<article class="mkdn-article mkdn-prose">
|
|
103
|
+
${pageTitleHtml}
|
|
104
|
+
${pageMetaHtml}
|
|
73
105
|
${renderedContent}
|
|
74
106
|
</article>
|
|
107
|
+
${prevNextHtml}
|
|
75
108
|
<footer class="mkdn-footer">
|
|
76
109
|
<p>Powered by <a href="https://mkdn.site">mkdnsite</a></p>
|
|
77
110
|
</footer>
|
|
111
|
+
${hasToc ? '</div>' : ''}
|
|
112
|
+
${tocHtml}
|
|
78
113
|
</main>
|
|
79
114
|
</div>
|
|
80
115
|
${clientScripts}
|
|
@@ -91,17 +126,32 @@ export function render404 (config: MkdnSiteConfig): string {
|
|
|
91
126
|
})
|
|
92
127
|
}
|
|
93
128
|
|
|
94
|
-
function
|
|
129
|
+
function renderNavHeader (config: MkdnSiteConfig): string {
|
|
130
|
+
const { logo, logoText } = config.theme
|
|
131
|
+
if (logo == null && (logoText == null || logoText === '')) return ''
|
|
132
|
+
|
|
133
|
+
const imgHtml = logo != null
|
|
134
|
+
? `<span class="mkdn-nav-logo"><img src="${esc(logo.src)}" alt="${esc(logo.alt ?? '')}" width="${logo.width ?? 32}" height="${logo.height ?? 32}"></span>`
|
|
135
|
+
: ''
|
|
136
|
+
const textHtml = logoText != null && logoText !== ''
|
|
137
|
+
? `<span class="mkdn-nav-title">${esc(logoText)}</span>`
|
|
138
|
+
: ''
|
|
139
|
+
|
|
140
|
+
return `<a href="/" class="mkdn-nav-header">${imgHtml}${textHtml}</a>`
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
function renderNav (node: NavNode, currentSlug: string, config: MkdnSiteConfig, depth = 0): string {
|
|
95
144
|
if (depth === 0) {
|
|
96
|
-
const
|
|
97
|
-
|
|
145
|
+
const header = renderNavHeader(config)
|
|
146
|
+
const items = node.children.map(c => renderNav(c, currentSlug, config, 1)).join('\n')
|
|
147
|
+
return `<div class="mkdn-nav-inner">${header}<ul class="mkdn-nav-list">${items}</ul></div>`
|
|
98
148
|
}
|
|
99
149
|
|
|
100
150
|
const isActive = currentSlug === node.slug
|
|
101
151
|
|
|
102
152
|
if (node.isSection && node.children.length > 0) {
|
|
103
153
|
const childItems = node.children
|
|
104
|
-
.map(c => renderNav(c, currentSlug, depth + 1))
|
|
154
|
+
.map(c => renderNav(c, currentSlug, config, depth + 1))
|
|
105
155
|
.join('\n')
|
|
106
156
|
return `<li class="mkdn-nav-section">
|
|
107
157
|
<span class="mkdn-nav-section-title">${esc(node.title)}</span>
|
|
@@ -112,6 +162,197 @@ function renderNav (node: NavNode, currentSlug: string, depth = 0): string {
|
|
|
112
162
|
return `<li${isActive ? ' class="active"' : ''}><a href="${node.slug}"${isActive ? ' aria-current="page"' : ''}>${esc(node.title)}</a></li>`
|
|
113
163
|
}
|
|
114
164
|
|
|
165
|
+
function buildPageMetaHtml (meta: MarkdownMeta, config: MkdnSiteConfig, body?: string): string {
|
|
166
|
+
const parts: string[] = []
|
|
167
|
+
const showDate = config.theme.pageDate === true
|
|
168
|
+
const showReading = config.theme.readingTime === true
|
|
169
|
+
|
|
170
|
+
if (showDate && meta.date != null) {
|
|
171
|
+
const lang = config.site.lang ?? 'en'
|
|
172
|
+
const formatter = new Intl.DateTimeFormat(lang, {
|
|
173
|
+
year: 'numeric',
|
|
174
|
+
month: 'long',
|
|
175
|
+
day: 'numeric'
|
|
176
|
+
})
|
|
177
|
+
const dateVal = coerceDateToString(meta.date)
|
|
178
|
+
const dateStr = formatter.format(new Date(dateVal))
|
|
179
|
+
let datePart = `<time datetime="${esc(dateVal)}">${esc(dateStr)}</time>`
|
|
180
|
+
if (meta.updated != null) {
|
|
181
|
+
const updatedVal = coerceDateToString(meta.updated)
|
|
182
|
+
const updatedStr = formatter.format(new Date(updatedVal))
|
|
183
|
+
datePart += ` · Updated <time datetime="${esc(updatedVal)}">${esc(updatedStr)}</time>`
|
|
184
|
+
}
|
|
185
|
+
parts.push(datePart)
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
if (showReading && body != null) {
|
|
189
|
+
parts.push(buildReadingTimeHtml(body))
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
if (parts.length === 0) return ''
|
|
193
|
+
return `<div class="mkdn-page-meta">${parts.join(' · ')}</div>`
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
function buildReadingTimeHtml (body: string): string {
|
|
197
|
+
const trimmed = body.trim()
|
|
198
|
+
const wordCount = trimmed === '' ? 0 : trimmed.split(/\s+/).length
|
|
199
|
+
const minutes = Math.max(1, Math.ceil(wordCount / 238))
|
|
200
|
+
return `<span class="mkdn-reading-time">${minutes} min read</span>`
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
function buildTocHtml (renderedContent: string): string {
|
|
204
|
+
const headingRegex = /<h([2-4])\s[^>]*?id="([^"]+)"[^>]*>([\s\S]+?)<\/h[2-4]>/g
|
|
205
|
+
const headings: Array<{ level: number, id: string, text: string }> = []
|
|
206
|
+
|
|
207
|
+
let match = headingRegex.exec(renderedContent)
|
|
208
|
+
while (match !== null) {
|
|
209
|
+
const text = match[3].replace(/<[^>]+>/g, '').trim()
|
|
210
|
+
headings.push({ level: parseInt(match[1], 10), id: match[2], text })
|
|
211
|
+
match = headingRegex.exec(renderedContent)
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
if (headings.length === 0) return ''
|
|
215
|
+
|
|
216
|
+
const items = headings
|
|
217
|
+
.map(h => `<li class="mkdn-toc-${h.level}"><a href="#${esc(h.id)}">${esc(h.text)}</a></li>`)
|
|
218
|
+
.join('')
|
|
219
|
+
|
|
220
|
+
return `<nav class="mkdn-toc" aria-label="Table of contents"><p class="mkdn-toc-title">On this page</p><ul>${items}</ul></nav>`
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
function flattenNav (node: NavNode): Array<{ title: string, slug: string }> {
|
|
224
|
+
const result: Array<{ title: string, slug: string }> = []
|
|
225
|
+
for (const child of node.children) {
|
|
226
|
+
if (child.isSection) {
|
|
227
|
+
result.push(...flattenNav(child))
|
|
228
|
+
} else {
|
|
229
|
+
result.push({ title: child.title, slug: child.slug })
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
return result
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
function buildPrevNextHtml (nav: NavNode, currentSlug: string): string {
|
|
236
|
+
const pages = flattenNav(nav)
|
|
237
|
+
const idx = pages.findIndex(p => p.slug === currentSlug)
|
|
238
|
+
if (idx === -1) return ''
|
|
239
|
+
|
|
240
|
+
const prev = idx > 0 ? pages[idx - 1] : null
|
|
241
|
+
const next = idx < pages.length - 1 ? pages[idx + 1] : null
|
|
242
|
+
if (prev == null && next == null) return ''
|
|
243
|
+
|
|
244
|
+
const prevHtml = prev != null
|
|
245
|
+
? `<a href="${esc(prev.slug)}" class="mkdn-prev"><span class="mkdn-pn-label">← Previous</span><span class="mkdn-pn-title">${esc(prev.title)}</span></a>`
|
|
246
|
+
: '<span></span>'
|
|
247
|
+
const nextHtml = next != null
|
|
248
|
+
? `<a href="${esc(next.slug)}" class="mkdn-next"><span class="mkdn-pn-label">Next →</span><span class="mkdn-pn-title">${esc(next.title)}</span></a>`
|
|
249
|
+
: '<span></span>'
|
|
250
|
+
|
|
251
|
+
return `<nav class="mkdn-prev-next" aria-label="Page navigation">${prevHtml}${nextHtml}</nav>`
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
function faviconMimeType (src: string): string {
|
|
255
|
+
const lower = src.toLowerCase().split('?')[0]
|
|
256
|
+
if (lower.endsWith('.svg')) return 'image/svg+xml'
|
|
257
|
+
if (lower.endsWith('.png')) return 'image/png'
|
|
258
|
+
if (lower.endsWith('.ico')) return 'image/x-icon'
|
|
259
|
+
// Default fallback
|
|
260
|
+
return 'image/x-icon'
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
function buildFaviconTags (config: MkdnSiteConfig): string {
|
|
264
|
+
// Resolve favicon src: explicit favicon config wins, then logo fallback (PNG/SVG only)
|
|
265
|
+
let src: string | undefined
|
|
266
|
+
if (config.site.favicon?.src != null) {
|
|
267
|
+
src = config.site.favicon.src
|
|
268
|
+
} else if (config.theme.logo?.src != null) {
|
|
269
|
+
const lower = config.theme.logo.src.toLowerCase().split('?')[0]
|
|
270
|
+
if (lower.endsWith('.svg') || lower.endsWith('.png')) {
|
|
271
|
+
src = config.theme.logo.src
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
if (src == null) return ''
|
|
275
|
+
|
|
276
|
+
const safeSrc = esc(src)
|
|
277
|
+
const type = faviconMimeType(src)
|
|
278
|
+
const lines: string[] = []
|
|
279
|
+
|
|
280
|
+
lines.push(`<link rel="icon" href="${safeSrc}" type="${type}">`)
|
|
281
|
+
// apple-touch-icon for PNG (requires raster image)
|
|
282
|
+
if (type === 'image/png') {
|
|
283
|
+
lines.push(`<link rel="apple-touch-icon" href="${safeSrc}">`)
|
|
284
|
+
}
|
|
285
|
+
return lines.join('\n ')
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
function buildAnalyticsTags (config: MkdnSiteConfig): string {
|
|
289
|
+
const id = config.analytics?.googleAnalytics?.measurementId
|
|
290
|
+
if (id == null || id === '') return ''
|
|
291
|
+
// GA4 measurement IDs are always G- followed by alphanumerics
|
|
292
|
+
if (!/^G-[A-Z0-9]+$/i.test(id)) return ''
|
|
293
|
+
// Belt-and-suspenders: escape for both HTML and JS string context
|
|
294
|
+
const safeId = esc(id).replace(/\\/g, '\\\\').replace(/'/g, "\\'").replace(/<\//g, '<\\/')
|
|
295
|
+
return `<script async src="https://www.googletagmanager.com/gtag/js?id=${safeId}"></script>
|
|
296
|
+
<script>
|
|
297
|
+
window.dataLayer = window.dataLayer || [];
|
|
298
|
+
function gtag(){dataLayer.push(arguments);}
|
|
299
|
+
gtag('js', new Date());
|
|
300
|
+
gtag('config', '${safeId}');
|
|
301
|
+
</script>`
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
function buildOgTags (props: PageShellProps): string {
|
|
305
|
+
const { meta, config, currentSlug } = props
|
|
306
|
+
const tags: string[] = []
|
|
307
|
+
|
|
308
|
+
const ogTitle = meta.title ?? config.site.title
|
|
309
|
+
const ogDescription = meta.description ?? config.site.description ?? ''
|
|
310
|
+
const ogType = (meta as Record<string, unknown>).og_type as string ??
|
|
311
|
+
config.site.og?.type ??
|
|
312
|
+
(currentSlug === '/' || currentSlug === '' ? 'website' : 'article')
|
|
313
|
+
const ogImage = (meta as Record<string, unknown>).og_image as string ??
|
|
314
|
+
config.site.og?.image
|
|
315
|
+
const twitterCard = config.site.og?.twitterCard ?? 'summary'
|
|
316
|
+
const twitterSite = config.site.og?.twitterSite
|
|
317
|
+
|
|
318
|
+
tags.push(`<meta property="og:title" content="${esc(ogTitle)}">`)
|
|
319
|
+
if (ogDescription !== '') {
|
|
320
|
+
tags.push(`<meta property="og:description" content="${esc(ogDescription)}">`)
|
|
321
|
+
}
|
|
322
|
+
tags.push(`<meta property="og:type" content="${esc(ogType)}">`)
|
|
323
|
+
if (config.site.url != null && config.site.url !== '') {
|
|
324
|
+
const baseUrl = config.site.url.replace(/\/$/, '')
|
|
325
|
+
const slug = currentSlug === '' || currentSlug === '/' ? '' : currentSlug
|
|
326
|
+
tags.push(`<meta property="og:url" content="${esc(baseUrl + slug)}">`)
|
|
327
|
+
}
|
|
328
|
+
tags.push(`<meta property="og:site_name" content="${esc(config.site.title)}">`)
|
|
329
|
+
if (ogImage != null && ogImage !== '') {
|
|
330
|
+
tags.push(`<meta property="og:image" content="${esc(ogImage)}">`)
|
|
331
|
+
}
|
|
332
|
+
tags.push(`<meta name="twitter:card" content="${esc(twitterCard)}">`)
|
|
333
|
+
tags.push(`<meta name="twitter:title" content="${esc(ogTitle)}">`)
|
|
334
|
+
if (ogDescription !== '') {
|
|
335
|
+
tags.push(`<meta name="twitter:description" content="${esc(ogDescription)}">`)
|
|
336
|
+
}
|
|
337
|
+
if (ogImage != null && ogImage !== '') {
|
|
338
|
+
tags.push(`<meta name="twitter:image" content="${esc(ogImage)}">`)
|
|
339
|
+
}
|
|
340
|
+
if (twitterSite != null && twitterSite !== '') {
|
|
341
|
+
tags.push(`<meta name="twitter:site" content="${esc(twitterSite)}">`)
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
return tags.join('\n ')
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
/**
|
|
348
|
+
* Coerce a date value to an ISO date string.
|
|
349
|
+
* YAML parsers often produce Date objects instead of strings for bare dates.
|
|
350
|
+
*/
|
|
351
|
+
function coerceDateToString (val: unknown): string {
|
|
352
|
+
if (val instanceof Date) return val.toISOString().split('T')[0]
|
|
353
|
+
return String(val)
|
|
354
|
+
}
|
|
355
|
+
|
|
115
356
|
function esc (str: string): string {
|
|
116
357
|
return str
|
|
117
358
|
.replace(/&/g, '&')
|
|
@@ -0,0 +1,342 @@
|
|
|
1
|
+
import type { ContentPage, ContentSource } from '../content/types.ts'
|
|
2
|
+
|
|
3
|
+
export interface SearchResult {
|
|
4
|
+
slug: string
|
|
5
|
+
title: string
|
|
6
|
+
description?: string
|
|
7
|
+
excerpt: string
|
|
8
|
+
score: number
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export interface SearchIndex {
|
|
12
|
+
/** Add or update a page in the index */
|
|
13
|
+
index: (page: ContentPage) => void
|
|
14
|
+
/** Remove a page from the index */
|
|
15
|
+
remove: (slug: string) => void
|
|
16
|
+
/** Search for pages matching a query */
|
|
17
|
+
search: (query: string, limit?: number) => SearchResult[]
|
|
18
|
+
/** Rebuild the entire index from a content source */
|
|
19
|
+
rebuild: (source: ContentSource) => Promise<void>
|
|
20
|
+
/** Serialize internal index state to a JSON string for storage */
|
|
21
|
+
serialize: () => string
|
|
22
|
+
/** Restore index state from a previously serialized JSON string */
|
|
23
|
+
deserialize: (data: string) => void
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/** Serialized format stored in cache / on disk */
|
|
27
|
+
export interface SerializedSearchIndex {
|
|
28
|
+
/** Version tag for forward-compat */
|
|
29
|
+
v: number
|
|
30
|
+
/** Documents: slug → serialized entry */
|
|
31
|
+
docs: Record<string, SerializedDocEntry>
|
|
32
|
+
/** Inverted index: token → list of slugs */
|
|
33
|
+
posting: Record<string, string[]>
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
interface SerializedDocEntry {
|
|
37
|
+
slug: string
|
|
38
|
+
title: string
|
|
39
|
+
description?: string
|
|
40
|
+
tags: string[]
|
|
41
|
+
titleTokens: string[]
|
|
42
|
+
descTokens: string[]
|
|
43
|
+
tagTokens: string[]
|
|
44
|
+
bodyTokens: string[]
|
|
45
|
+
body: string
|
|
46
|
+
termFreqs: Record<string, number>
|
|
47
|
+
totalTokens: number
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
interface DocEntry {
|
|
51
|
+
slug: string
|
|
52
|
+
title: string
|
|
53
|
+
description?: string
|
|
54
|
+
tags: string[]
|
|
55
|
+
titleTokens: string[]
|
|
56
|
+
descTokens: string[]
|
|
57
|
+
tagTokens: string[]
|
|
58
|
+
bodyTokens: string[]
|
|
59
|
+
body: string
|
|
60
|
+
termFreqs: Map<string, number>
|
|
61
|
+
totalTokens: number
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
export function createSearchIndex (): SearchIndex {
|
|
65
|
+
// inverted index: token → set of slugs
|
|
66
|
+
const posting = new Map<string, Set<string>>()
|
|
67
|
+
const docs = new Map<string, DocEntry>()
|
|
68
|
+
|
|
69
|
+
function addToPosting (token: string, slug: string): void {
|
|
70
|
+
let set = posting.get(token)
|
|
71
|
+
if (set == null) {
|
|
72
|
+
set = new Set()
|
|
73
|
+
posting.set(token, set)
|
|
74
|
+
}
|
|
75
|
+
set.add(slug)
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
function removeFromPosting (slug: string): void {
|
|
79
|
+
for (const set of posting.values()) {
|
|
80
|
+
set.delete(slug)
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
function index (page: ContentPage): void {
|
|
85
|
+
const slug = page.slug
|
|
86
|
+
|
|
87
|
+
// Remove any existing entry first
|
|
88
|
+
if (docs.has(slug)) removeFromPosting(slug)
|
|
89
|
+
|
|
90
|
+
const title = String(page.meta.title ?? '')
|
|
91
|
+
const description = page.meta.description != null ? String(page.meta.description) : undefined
|
|
92
|
+
const tags: string[] = Array.isArray(page.meta.tags)
|
|
93
|
+
? (page.meta.tags as unknown[]).map(t => String(t))
|
|
94
|
+
: []
|
|
95
|
+
|
|
96
|
+
const titleTokens = tokenize(title)
|
|
97
|
+
const descTokens = description != null ? tokenize(description) : []
|
|
98
|
+
const tagTokens = tags.flatMap(t => tokenize(t))
|
|
99
|
+
const bodyTokens = tokenize(stripMarkdown(page.body))
|
|
100
|
+
|
|
101
|
+
// Boost: title 3x, description 2x, tags 2x
|
|
102
|
+
const allTokens = [
|
|
103
|
+
...titleTokens, ...titleTokens, ...titleTokens,
|
|
104
|
+
...descTokens, ...descTokens,
|
|
105
|
+
...tagTokens, ...tagTokens,
|
|
106
|
+
...bodyTokens
|
|
107
|
+
]
|
|
108
|
+
|
|
109
|
+
const termFreqs = new Map<string, number>()
|
|
110
|
+
for (const t of allTokens) {
|
|
111
|
+
termFreqs.set(t, (termFreqs.get(t) ?? 0) + 1)
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
const entry: DocEntry = {
|
|
115
|
+
slug,
|
|
116
|
+
title,
|
|
117
|
+
description,
|
|
118
|
+
tags,
|
|
119
|
+
titleTokens,
|
|
120
|
+
descTokens,
|
|
121
|
+
tagTokens,
|
|
122
|
+
bodyTokens,
|
|
123
|
+
body: page.body,
|
|
124
|
+
termFreqs,
|
|
125
|
+
totalTokens: allTokens.length
|
|
126
|
+
}
|
|
127
|
+
docs.set(slug, entry)
|
|
128
|
+
|
|
129
|
+
// Update posting list
|
|
130
|
+
for (const token of termFreqs.keys()) {
|
|
131
|
+
addToPosting(token, slug)
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
function remove (slug: string): void {
|
|
136
|
+
if (!docs.has(slug)) return
|
|
137
|
+
removeFromPosting(slug)
|
|
138
|
+
docs.delete(slug)
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
function search (query: string, limit = 10): SearchResult[] {
|
|
142
|
+
const cappedLimit = Math.min(limit, 50)
|
|
143
|
+
const trimmed = query.trim()
|
|
144
|
+
if (trimmed === '') return []
|
|
145
|
+
|
|
146
|
+
const queryTokens = tokenize(trimmed)
|
|
147
|
+
if (queryTokens.length === 0) return []
|
|
148
|
+
|
|
149
|
+
const totalDocs = docs.size
|
|
150
|
+
if (totalDocs === 0) return []
|
|
151
|
+
|
|
152
|
+
// Gather candidate slugs (any posting list hit)
|
|
153
|
+
const candidates = new Set<string>()
|
|
154
|
+
for (const token of queryTokens) {
|
|
155
|
+
const set = posting.get(token)
|
|
156
|
+
if (set != null) {
|
|
157
|
+
for (const slug of set) candidates.add(slug)
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
const results: SearchResult[] = []
|
|
162
|
+
|
|
163
|
+
for (const slug of candidates) {
|
|
164
|
+
const entry = docs.get(slug)
|
|
165
|
+
if (entry == null) continue
|
|
166
|
+
|
|
167
|
+
let score = 0
|
|
168
|
+
for (const token of queryTokens) {
|
|
169
|
+
const tf = (entry.termFreqs.get(token) ?? 0) / (entry.totalTokens === 0 ? 1 : entry.totalTokens)
|
|
170
|
+
const docsWithTerm = posting.get(token)?.size ?? 0
|
|
171
|
+
if (docsWithTerm === 0) continue
|
|
172
|
+
// Smoothed IDF: log((N+1) / df) — avoids zero when N == df
|
|
173
|
+
const idf = Math.log((totalDocs + 1) / docsWithTerm)
|
|
174
|
+
score += tf * idf
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
if (score <= 0) continue
|
|
178
|
+
|
|
179
|
+
results.push({
|
|
180
|
+
slug,
|
|
181
|
+
title: entry.title,
|
|
182
|
+
description: entry.description,
|
|
183
|
+
excerpt: buildExcerpt(entry.body, queryTokens),
|
|
184
|
+
score
|
|
185
|
+
})
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
return results
|
|
189
|
+
.sort((a, b) => b.score - a.score)
|
|
190
|
+
.slice(0, cappedLimit)
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
async function rebuild (source: ContentSource): Promise<void> {
|
|
194
|
+
const pages = await source.listPages()
|
|
195
|
+
docs.clear()
|
|
196
|
+
for (const set of posting.values()) set.clear()
|
|
197
|
+
posting.clear()
|
|
198
|
+
for (const page of pages) {
|
|
199
|
+
if (page.meta.draft !== true) {
|
|
200
|
+
index(page)
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
function serialize (): string {
|
|
206
|
+
const docsObj: Record<string, SerializedDocEntry> = {}
|
|
207
|
+
for (const [slug, entry] of docs) {
|
|
208
|
+
const termFreqsObj: Record<string, number> = {}
|
|
209
|
+
for (const [token, freq] of entry.termFreqs) {
|
|
210
|
+
termFreqsObj[token] = freq
|
|
211
|
+
}
|
|
212
|
+
docsObj[slug] = {
|
|
213
|
+
slug: entry.slug,
|
|
214
|
+
title: entry.title,
|
|
215
|
+
description: entry.description,
|
|
216
|
+
tags: entry.tags,
|
|
217
|
+
titleTokens: entry.titleTokens,
|
|
218
|
+
descTokens: entry.descTokens,
|
|
219
|
+
tagTokens: entry.tagTokens,
|
|
220
|
+
bodyTokens: entry.bodyTokens,
|
|
221
|
+
body: entry.body,
|
|
222
|
+
termFreqs: termFreqsObj,
|
|
223
|
+
totalTokens: entry.totalTokens
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
const postingObj: Record<string, string[]> = {}
|
|
228
|
+
for (const [token, set] of posting) {
|
|
229
|
+
postingObj[token] = Array.from(set)
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
const serialized: SerializedSearchIndex = { v: 1, docs: docsObj, posting: postingObj }
|
|
233
|
+
return JSON.stringify(serialized)
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
function deserialize (data: string): void {
|
|
237
|
+
const parsed = JSON.parse(data) as SerializedSearchIndex
|
|
238
|
+
if (parsed.v !== 1) {
|
|
239
|
+
throw new Error('SearchIndex: unsupported serialization version ' + String(parsed.v))
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
// Clear current state
|
|
243
|
+
docs.clear()
|
|
244
|
+
posting.clear()
|
|
245
|
+
|
|
246
|
+
// Restore docs
|
|
247
|
+
for (const [slug, entry] of Object.entries(parsed.docs)) {
|
|
248
|
+
const termFreqs = new Map<string, number>()
|
|
249
|
+
for (const [token, freq] of Object.entries(entry.termFreqs)) {
|
|
250
|
+
termFreqs.set(token, freq)
|
|
251
|
+
}
|
|
252
|
+
docs.set(slug, {
|
|
253
|
+
slug: entry.slug,
|
|
254
|
+
title: entry.title,
|
|
255
|
+
description: entry.description,
|
|
256
|
+
tags: entry.tags,
|
|
257
|
+
titleTokens: entry.titleTokens,
|
|
258
|
+
descTokens: entry.descTokens,
|
|
259
|
+
tagTokens: entry.tagTokens,
|
|
260
|
+
bodyTokens: entry.bodyTokens,
|
|
261
|
+
body: entry.body,
|
|
262
|
+
termFreqs,
|
|
263
|
+
totalTokens: entry.totalTokens
|
|
264
|
+
})
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
// Restore posting lists
|
|
268
|
+
for (const [token, slugs] of Object.entries(parsed.posting)) {
|
|
269
|
+
posting.set(token, new Set(slugs))
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
return { index, remove, search, rebuild, serialize, deserialize }
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
// ─── Helpers ─────────────────────────────────────────────────────────────────
|
|
277
|
+
|
|
278
|
+
function tokenize (text: string): string[] {
|
|
279
|
+
return text
|
|
280
|
+
.toLowerCase()
|
|
281
|
+
.replace(/[^a-z0-9\s'-]/g, ' ')
|
|
282
|
+
.split(/\s+/)
|
|
283
|
+
.map(t => t.replace(/^['-]+|['-]+$/g, ''))
|
|
284
|
+
.filter(t => t.length >= 2 && !STOP_WORDS.has(t))
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
function stripMarkdown (md: string): string {
|
|
288
|
+
return md
|
|
289
|
+
// Remove fenced code blocks
|
|
290
|
+
.replace(/```[\s\S]*?```/g, ' ')
|
|
291
|
+
// Remove inline code
|
|
292
|
+
.replace(/`[^`]+`/g, ' ')
|
|
293
|
+
// Remove headings (keep text)
|
|
294
|
+
.replace(/^#{1,6}\s+/gm, '')
|
|
295
|
+
// Remove images
|
|
296
|
+
.replace(/!\[[^\]]*\]\([^)]*\)/g, ' ')
|
|
297
|
+
// Remove links (keep text)
|
|
298
|
+
.replace(/\[([^\]]*)\]\([^)]*\)/g, '$1')
|
|
299
|
+
// Remove bold/italic markers
|
|
300
|
+
.replace(/[*_]{1,3}([^*_]+)[*_]{1,3}/g, '$1')
|
|
301
|
+
// Remove blockquotes
|
|
302
|
+
.replace(/^>\s*/gm, '')
|
|
303
|
+
// Remove horizontal rules
|
|
304
|
+
.replace(/^[-*_]{3,}\s*$/gm, '')
|
|
305
|
+
// Remove HTML tags
|
|
306
|
+
.replace(/<[^>]+>/g, ' ')
|
|
307
|
+
// Collapse whitespace
|
|
308
|
+
.replace(/\s+/g, ' ')
|
|
309
|
+
.trim()
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
function buildExcerpt (body: string, queryTokens: string[]): string {
|
|
313
|
+
const plain = stripMarkdown(body)
|
|
314
|
+
const lower = plain.toLowerCase()
|
|
315
|
+
|
|
316
|
+
let bestPos = -1
|
|
317
|
+
for (const token of queryTokens) {
|
|
318
|
+
const pos = lower.indexOf(token)
|
|
319
|
+
if (pos !== -1) {
|
|
320
|
+
bestPos = pos
|
|
321
|
+
break
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
if (bestPos === -1) {
|
|
326
|
+
return plain.slice(0, 150).trim() + (plain.length > 150 ? '…' : '')
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
const start = Math.max(0, bestPos - 50)
|
|
330
|
+
const end = Math.min(plain.length, start + 150)
|
|
331
|
+
const excerpt = plain.slice(start, end).trim()
|
|
332
|
+
return (start > 0 ? '…' : '') + excerpt + (end < plain.length ? '…' : '')
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
const STOP_WORDS = new Set([
|
|
336
|
+
'a', 'an', 'the', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for',
|
|
337
|
+
'of', 'with', 'by', 'from', 'is', 'it', 'its', 'this', 'that', 'be',
|
|
338
|
+
'as', 'was', 'are', 'were', 'been', 'has', 'have', 'had', 'do', 'does',
|
|
339
|
+
'did', 'not', 'no', 'so', 'if', 'up', 'can', 'will', 'you', 'we', 'he',
|
|
340
|
+
'she', 'they', 'their', 'all', 'any', 'also', 'more', 'into', 'than',
|
|
341
|
+
'then', 'when', 'how', 'what', 'which', 'who', 'use', 'used', 'using'
|
|
342
|
+
])
|