mkdnsite 0.0.1 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +10 -3
- package/src/adapters/cloudflare.ts +276 -15
- package/src/adapters/local.ts +48 -18
- package/src/analytics/classify.ts +65 -0
- package/src/analytics/console.ts +39 -0
- package/src/analytics/noop.ts +15 -0
- package/src/analytics/types.ts +49 -0
- package/src/cache/kv.ts +81 -0
- package/src/cache/memory.ts +46 -0
- package/src/cache/response.ts +24 -0
- package/src/cli.ts +311 -51
- package/src/client/scripts.ts +405 -3
- package/src/config/defaults.ts +68 -5
- package/src/config/schema.ts +214 -2
- package/src/content/assets.ts +202 -0
- package/src/content/cache.ts +232 -0
- package/src/content/filesystem.ts +53 -2
- package/src/content/github.ts +194 -103
- package/src/content/nav-builder.ts +120 -0
- package/src/content/r2.ts +214 -0
- package/src/content/types.ts +10 -0
- package/src/handler.ts +357 -22
- package/src/index.ts +49 -1
- package/src/mcp/server.ts +164 -0
- package/src/mcp/stdio.ts +29 -0
- package/src/mcp/transport.ts +29 -0
- package/src/negotiate/headers.ts +37 -9
- package/src/render/page-shell.ts +250 -8
- package/src/search/index.ts +342 -0
- package/src/security/csp.ts +92 -0
- package/src/theme/{prose-css.ts → base-css.ts} +325 -15
- package/src/theme/build-css.ts +74 -0
|
@@ -0,0 +1,342 @@
|
|
|
1
|
+
import type { ContentPage, ContentSource } from '../content/types.ts'
|
|
2
|
+
|
|
3
|
+
export interface SearchResult {
|
|
4
|
+
slug: string
|
|
5
|
+
title: string
|
|
6
|
+
description?: string
|
|
7
|
+
excerpt: string
|
|
8
|
+
score: number
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export interface SearchIndex {
|
|
12
|
+
/** Add or update a page in the index */
|
|
13
|
+
index: (page: ContentPage) => void
|
|
14
|
+
/** Remove a page from the index */
|
|
15
|
+
remove: (slug: string) => void
|
|
16
|
+
/** Search for pages matching a query */
|
|
17
|
+
search: (query: string, limit?: number) => SearchResult[]
|
|
18
|
+
/** Rebuild the entire index from a content source */
|
|
19
|
+
rebuild: (source: ContentSource) => Promise<void>
|
|
20
|
+
/** Serialize internal index state to a JSON string for storage */
|
|
21
|
+
serialize: () => string
|
|
22
|
+
/** Restore index state from a previously serialized JSON string */
|
|
23
|
+
deserialize: (data: string) => void
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/** Serialized format stored in cache / on disk */
|
|
27
|
+
export interface SerializedSearchIndex {
|
|
28
|
+
/** Version tag for forward-compat */
|
|
29
|
+
v: number
|
|
30
|
+
/** Documents: slug → serialized entry */
|
|
31
|
+
docs: Record<string, SerializedDocEntry>
|
|
32
|
+
/** Inverted index: token → list of slugs */
|
|
33
|
+
posting: Record<string, string[]>
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
interface SerializedDocEntry {
|
|
37
|
+
slug: string
|
|
38
|
+
title: string
|
|
39
|
+
description?: string
|
|
40
|
+
tags: string[]
|
|
41
|
+
titleTokens: string[]
|
|
42
|
+
descTokens: string[]
|
|
43
|
+
tagTokens: string[]
|
|
44
|
+
bodyTokens: string[]
|
|
45
|
+
body: string
|
|
46
|
+
termFreqs: Record<string, number>
|
|
47
|
+
totalTokens: number
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
interface DocEntry {
|
|
51
|
+
slug: string
|
|
52
|
+
title: string
|
|
53
|
+
description?: string
|
|
54
|
+
tags: string[]
|
|
55
|
+
titleTokens: string[]
|
|
56
|
+
descTokens: string[]
|
|
57
|
+
tagTokens: string[]
|
|
58
|
+
bodyTokens: string[]
|
|
59
|
+
body: string
|
|
60
|
+
termFreqs: Map<string, number>
|
|
61
|
+
totalTokens: number
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
export function createSearchIndex (): SearchIndex {
|
|
65
|
+
// inverted index: token → set of slugs
|
|
66
|
+
const posting = new Map<string, Set<string>>()
|
|
67
|
+
const docs = new Map<string, DocEntry>()
|
|
68
|
+
|
|
69
|
+
function addToPosting (token: string, slug: string): void {
|
|
70
|
+
let set = posting.get(token)
|
|
71
|
+
if (set == null) {
|
|
72
|
+
set = new Set()
|
|
73
|
+
posting.set(token, set)
|
|
74
|
+
}
|
|
75
|
+
set.add(slug)
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
function removeFromPosting (slug: string): void {
|
|
79
|
+
for (const set of posting.values()) {
|
|
80
|
+
set.delete(slug)
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
function index (page: ContentPage): void {
|
|
85
|
+
const slug = page.slug
|
|
86
|
+
|
|
87
|
+
// Remove any existing entry first
|
|
88
|
+
if (docs.has(slug)) removeFromPosting(slug)
|
|
89
|
+
|
|
90
|
+
const title = String(page.meta.title ?? '')
|
|
91
|
+
const description = page.meta.description != null ? String(page.meta.description) : undefined
|
|
92
|
+
const tags: string[] = Array.isArray(page.meta.tags)
|
|
93
|
+
? (page.meta.tags as unknown[]).map(t => String(t))
|
|
94
|
+
: []
|
|
95
|
+
|
|
96
|
+
const titleTokens = tokenize(title)
|
|
97
|
+
const descTokens = description != null ? tokenize(description) : []
|
|
98
|
+
const tagTokens = tags.flatMap(t => tokenize(t))
|
|
99
|
+
const bodyTokens = tokenize(stripMarkdown(page.body))
|
|
100
|
+
|
|
101
|
+
// Boost: title 3x, description 2x, tags 2x
|
|
102
|
+
const allTokens = [
|
|
103
|
+
...titleTokens, ...titleTokens, ...titleTokens,
|
|
104
|
+
...descTokens, ...descTokens,
|
|
105
|
+
...tagTokens, ...tagTokens,
|
|
106
|
+
...bodyTokens
|
|
107
|
+
]
|
|
108
|
+
|
|
109
|
+
const termFreqs = new Map<string, number>()
|
|
110
|
+
for (const t of allTokens) {
|
|
111
|
+
termFreqs.set(t, (termFreqs.get(t) ?? 0) + 1)
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
const entry: DocEntry = {
|
|
115
|
+
slug,
|
|
116
|
+
title,
|
|
117
|
+
description,
|
|
118
|
+
tags,
|
|
119
|
+
titleTokens,
|
|
120
|
+
descTokens,
|
|
121
|
+
tagTokens,
|
|
122
|
+
bodyTokens,
|
|
123
|
+
body: page.body,
|
|
124
|
+
termFreqs,
|
|
125
|
+
totalTokens: allTokens.length
|
|
126
|
+
}
|
|
127
|
+
docs.set(slug, entry)
|
|
128
|
+
|
|
129
|
+
// Update posting list
|
|
130
|
+
for (const token of termFreqs.keys()) {
|
|
131
|
+
addToPosting(token, slug)
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
function remove (slug: string): void {
|
|
136
|
+
if (!docs.has(slug)) return
|
|
137
|
+
removeFromPosting(slug)
|
|
138
|
+
docs.delete(slug)
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
function search (query: string, limit = 10): SearchResult[] {
|
|
142
|
+
const cappedLimit = Math.min(limit, 50)
|
|
143
|
+
const trimmed = query.trim()
|
|
144
|
+
if (trimmed === '') return []
|
|
145
|
+
|
|
146
|
+
const queryTokens = tokenize(trimmed)
|
|
147
|
+
if (queryTokens.length === 0) return []
|
|
148
|
+
|
|
149
|
+
const totalDocs = docs.size
|
|
150
|
+
if (totalDocs === 0) return []
|
|
151
|
+
|
|
152
|
+
// Gather candidate slugs (any posting list hit)
|
|
153
|
+
const candidates = new Set<string>()
|
|
154
|
+
for (const token of queryTokens) {
|
|
155
|
+
const set = posting.get(token)
|
|
156
|
+
if (set != null) {
|
|
157
|
+
for (const slug of set) candidates.add(slug)
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
const results: SearchResult[] = []
|
|
162
|
+
|
|
163
|
+
for (const slug of candidates) {
|
|
164
|
+
const entry = docs.get(slug)
|
|
165
|
+
if (entry == null) continue
|
|
166
|
+
|
|
167
|
+
let score = 0
|
|
168
|
+
for (const token of queryTokens) {
|
|
169
|
+
const tf = (entry.termFreqs.get(token) ?? 0) / (entry.totalTokens === 0 ? 1 : entry.totalTokens)
|
|
170
|
+
const docsWithTerm = posting.get(token)?.size ?? 0
|
|
171
|
+
if (docsWithTerm === 0) continue
|
|
172
|
+
// Smoothed IDF: log((N+1) / df) — avoids zero when N == df
|
|
173
|
+
const idf = Math.log((totalDocs + 1) / docsWithTerm)
|
|
174
|
+
score += tf * idf
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
if (score <= 0) continue
|
|
178
|
+
|
|
179
|
+
results.push({
|
|
180
|
+
slug,
|
|
181
|
+
title: entry.title,
|
|
182
|
+
description: entry.description,
|
|
183
|
+
excerpt: buildExcerpt(entry.body, queryTokens),
|
|
184
|
+
score
|
|
185
|
+
})
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
return results
|
|
189
|
+
.sort((a, b) => b.score - a.score)
|
|
190
|
+
.slice(0, cappedLimit)
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
async function rebuild (source: ContentSource): Promise<void> {
|
|
194
|
+
const pages = await source.listPages()
|
|
195
|
+
docs.clear()
|
|
196
|
+
for (const set of posting.values()) set.clear()
|
|
197
|
+
posting.clear()
|
|
198
|
+
for (const page of pages) {
|
|
199
|
+
if (page.meta.draft !== true) {
|
|
200
|
+
index(page)
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
function serialize (): string {
|
|
206
|
+
const docsObj: Record<string, SerializedDocEntry> = {}
|
|
207
|
+
for (const [slug, entry] of docs) {
|
|
208
|
+
const termFreqsObj: Record<string, number> = {}
|
|
209
|
+
for (const [token, freq] of entry.termFreqs) {
|
|
210
|
+
termFreqsObj[token] = freq
|
|
211
|
+
}
|
|
212
|
+
docsObj[slug] = {
|
|
213
|
+
slug: entry.slug,
|
|
214
|
+
title: entry.title,
|
|
215
|
+
description: entry.description,
|
|
216
|
+
tags: entry.tags,
|
|
217
|
+
titleTokens: entry.titleTokens,
|
|
218
|
+
descTokens: entry.descTokens,
|
|
219
|
+
tagTokens: entry.tagTokens,
|
|
220
|
+
bodyTokens: entry.bodyTokens,
|
|
221
|
+
body: entry.body,
|
|
222
|
+
termFreqs: termFreqsObj,
|
|
223
|
+
totalTokens: entry.totalTokens
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
const postingObj: Record<string, string[]> = {}
|
|
228
|
+
for (const [token, set] of posting) {
|
|
229
|
+
postingObj[token] = Array.from(set)
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
const serialized: SerializedSearchIndex = { v: 1, docs: docsObj, posting: postingObj }
|
|
233
|
+
return JSON.stringify(serialized)
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
function deserialize (data: string): void {
|
|
237
|
+
const parsed = JSON.parse(data) as SerializedSearchIndex
|
|
238
|
+
if (parsed.v !== 1) {
|
|
239
|
+
throw new Error('SearchIndex: unsupported serialization version ' + String(parsed.v))
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
// Clear current state
|
|
243
|
+
docs.clear()
|
|
244
|
+
posting.clear()
|
|
245
|
+
|
|
246
|
+
// Restore docs
|
|
247
|
+
for (const [slug, entry] of Object.entries(parsed.docs)) {
|
|
248
|
+
const termFreqs = new Map<string, number>()
|
|
249
|
+
for (const [token, freq] of Object.entries(entry.termFreqs)) {
|
|
250
|
+
termFreqs.set(token, freq)
|
|
251
|
+
}
|
|
252
|
+
docs.set(slug, {
|
|
253
|
+
slug: entry.slug,
|
|
254
|
+
title: entry.title,
|
|
255
|
+
description: entry.description,
|
|
256
|
+
tags: entry.tags,
|
|
257
|
+
titleTokens: entry.titleTokens,
|
|
258
|
+
descTokens: entry.descTokens,
|
|
259
|
+
tagTokens: entry.tagTokens,
|
|
260
|
+
bodyTokens: entry.bodyTokens,
|
|
261
|
+
body: entry.body,
|
|
262
|
+
termFreqs,
|
|
263
|
+
totalTokens: entry.totalTokens
|
|
264
|
+
})
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
// Restore posting lists
|
|
268
|
+
for (const [token, slugs] of Object.entries(parsed.posting)) {
|
|
269
|
+
posting.set(token, new Set(slugs))
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
return { index, remove, search, rebuild, serialize, deserialize }
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
// ─── Helpers ─────────────────────────────────────────────────────────────────
|
|
277
|
+
|
|
278
|
+
function tokenize (text: string): string[] {
|
|
279
|
+
return text
|
|
280
|
+
.toLowerCase()
|
|
281
|
+
.replace(/[^a-z0-9\s'-]/g, ' ')
|
|
282
|
+
.split(/\s+/)
|
|
283
|
+
.map(t => t.replace(/^['-]+|['-]+$/g, ''))
|
|
284
|
+
.filter(t => t.length >= 2 && !STOP_WORDS.has(t))
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
function stripMarkdown (md: string): string {
|
|
288
|
+
return md
|
|
289
|
+
// Remove fenced code blocks
|
|
290
|
+
.replace(/```[\s\S]*?```/g, ' ')
|
|
291
|
+
// Remove inline code
|
|
292
|
+
.replace(/`[^`]+`/g, ' ')
|
|
293
|
+
// Remove headings (keep text)
|
|
294
|
+
.replace(/^#{1,6}\s+/gm, '')
|
|
295
|
+
// Remove images
|
|
296
|
+
.replace(/!\[[^\]]*\]\([^)]*\)/g, ' ')
|
|
297
|
+
// Remove links (keep text)
|
|
298
|
+
.replace(/\[([^\]]*)\]\([^)]*\)/g, '$1')
|
|
299
|
+
// Remove bold/italic markers
|
|
300
|
+
.replace(/[*_]{1,3}([^*_]+)[*_]{1,3}/g, '$1')
|
|
301
|
+
// Remove blockquotes
|
|
302
|
+
.replace(/^>\s*/gm, '')
|
|
303
|
+
// Remove horizontal rules
|
|
304
|
+
.replace(/^[-*_]{3,}\s*$/gm, '')
|
|
305
|
+
// Remove HTML tags
|
|
306
|
+
.replace(/<[^>]+>/g, ' ')
|
|
307
|
+
// Collapse whitespace
|
|
308
|
+
.replace(/\s+/g, ' ')
|
|
309
|
+
.trim()
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
function buildExcerpt (body: string, queryTokens: string[]): string {
|
|
313
|
+
const plain = stripMarkdown(body)
|
|
314
|
+
const lower = plain.toLowerCase()
|
|
315
|
+
|
|
316
|
+
let bestPos = -1
|
|
317
|
+
for (const token of queryTokens) {
|
|
318
|
+
const pos = lower.indexOf(token)
|
|
319
|
+
if (pos !== -1) {
|
|
320
|
+
bestPos = pos
|
|
321
|
+
break
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
if (bestPos === -1) {
|
|
326
|
+
return plain.slice(0, 150).trim() + (plain.length > 150 ? '…' : '')
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
const start = Math.max(0, bestPos - 50)
|
|
330
|
+
const end = Math.min(plain.length, start + 150)
|
|
331
|
+
const excerpt = plain.slice(start, end).trim()
|
|
332
|
+
return (start > 0 ? '…' : '') + excerpt + (end < plain.length ? '…' : '')
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
const STOP_WORDS = new Set([
|
|
336
|
+
'a', 'an', 'the', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for',
|
|
337
|
+
'of', 'with', 'by', 'from', 'is', 'it', 'its', 'this', 'that', 'be',
|
|
338
|
+
'as', 'was', 'are', 'were', 'been', 'has', 'have', 'had', 'do', 'does',
|
|
339
|
+
'did', 'not', 'no', 'so', 'if', 'up', 'can', 'will', 'you', 'we', 'he',
|
|
340
|
+
'she', 'they', 'their', 'all', 'any', 'also', 'more', 'into', 'than',
|
|
341
|
+
'then', 'when', 'how', 'what', 'which', 'who', 'use', 'used', 'using'
|
|
342
|
+
])
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
import type { MkdnSiteConfig, CspConfig } from '../config/schema.ts'
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Sanitize a CSP source value to prevent directive injection.
|
|
5
|
+
* Strips semicolons which act as directive separators.
|
|
6
|
+
*/
|
|
7
|
+
function sanitizeCspValue (val: string): string {
|
|
8
|
+
return val.replace(/;/g, '').trim()
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Build a Content-Security-Policy header value string from the current config.
|
|
13
|
+
* Only includes external sources for features that are actually enabled.
|
|
14
|
+
*/
|
|
15
|
+
export function buildCsp (config: MkdnSiteConfig): string {
|
|
16
|
+
const { client, analytics, csp, theme } = config
|
|
17
|
+
const gaEnabled = (analytics?.googleAnalytics?.measurementId ?? '') !== ''
|
|
18
|
+
const useCdn = client.mermaid || client.charts
|
|
19
|
+
const extra: CspConfig = csp ?? { enabled: true }
|
|
20
|
+
|
|
21
|
+
// script-src
|
|
22
|
+
const scriptSrc = ["'self'", "'unsafe-inline'"]
|
|
23
|
+
if (useCdn) scriptSrc.push('https://cdn.jsdelivr.net')
|
|
24
|
+
if (gaEnabled) {
|
|
25
|
+
scriptSrc.push('https://www.googletagmanager.com')
|
|
26
|
+
scriptSrc.push('https://www.google-analytics.com')
|
|
27
|
+
}
|
|
28
|
+
if (extra.extraScriptSrc != null) {
|
|
29
|
+
scriptSrc.push(...extra.extraScriptSrc.map(sanitizeCspValue))
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
// style-src
|
|
33
|
+
const styleSrc = ["'self'", "'unsafe-inline'"]
|
|
34
|
+
if (client.math) styleSrc.push('https://cdn.jsdelivr.net')
|
|
35
|
+
if (theme.customCssUrl != null) {
|
|
36
|
+
try {
|
|
37
|
+
const u = new URL(theme.customCssUrl)
|
|
38
|
+
if (u.protocol === 'https:' || u.protocol === 'http:') {
|
|
39
|
+
styleSrc.push(u.origin)
|
|
40
|
+
}
|
|
41
|
+
} catch {
|
|
42
|
+
// relative URL — 'self' covers it
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
if (extra.extraStyleSrc != null) {
|
|
46
|
+
styleSrc.push(...extra.extraStyleSrc.map(sanitizeCspValue))
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
// img-src
|
|
50
|
+
const imgSrc = ["'self'", 'data:', 'https:']
|
|
51
|
+
if (client.mermaid) imgSrc.push('blob:')
|
|
52
|
+
if (extra.extraImgSrc != null) {
|
|
53
|
+
imgSrc.push(...extra.extraImgSrc.map(sanitizeCspValue))
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
// font-src
|
|
57
|
+
const fontSrc = ["'self'", 'https://fonts.gstatic.com']
|
|
58
|
+
if (client.math) fontSrc.push('https://cdn.jsdelivr.net')
|
|
59
|
+
if (extra.extraFontSrc != null) {
|
|
60
|
+
fontSrc.push(...extra.extraFontSrc.map(sanitizeCspValue))
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// connect-src
|
|
64
|
+
const connectSrc = ["'self'"]
|
|
65
|
+
if (gaEnabled) {
|
|
66
|
+
connectSrc.push('https://www.google-analytics.com')
|
|
67
|
+
connectSrc.push('https://analytics.google.com')
|
|
68
|
+
connectSrc.push('https://region1.google-analytics.com')
|
|
69
|
+
}
|
|
70
|
+
if (extra.extraConnectSrc != null) {
|
|
71
|
+
connectSrc.push(...extra.extraConnectSrc.map(sanitizeCspValue))
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
const directives: string[] = [
|
|
75
|
+
"default-src 'self'",
|
|
76
|
+
'script-src ' + scriptSrc.join(' '),
|
|
77
|
+
'style-src ' + styleSrc.join(' '),
|
|
78
|
+
'img-src ' + imgSrc.join(' '),
|
|
79
|
+
'font-src ' + fontSrc.join(' '),
|
|
80
|
+
'connect-src ' + connectSrc.join(' '),
|
|
81
|
+
"frame-src 'none'",
|
|
82
|
+
"object-src 'none'",
|
|
83
|
+
"base-uri 'self'",
|
|
84
|
+
"form-action 'self'"
|
|
85
|
+
]
|
|
86
|
+
|
|
87
|
+
if (extra.reportUri != null && extra.reportUri !== '') {
|
|
88
|
+
directives.push('report-uri ' + sanitizeCspValue(extra.reportUri))
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
return directives.join('; ')
|
|
92
|
+
}
|