mkdnsite 0.0.1 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +8 -3
- package/src/adapters/cloudflare.ts +202 -15
- package/src/adapters/local.ts +38 -17
- package/src/analytics/classify.ts +65 -0
- package/src/analytics/console.ts +39 -0
- package/src/analytics/noop.ts +15 -0
- package/src/analytics/types.ts +49 -0
- package/src/cache/kv.ts +81 -0
- package/src/cache/memory.ts +46 -0
- package/src/cache/response.ts +24 -0
- package/src/cli.ts +301 -51
- package/src/client/scripts.ts +379 -3
- package/src/config/defaults.ts +66 -5
- package/src/config/schema.ts +200 -2
- package/src/content/assets.ts +202 -0
- package/src/content/cache.ts +232 -0
- package/src/content/filesystem.ts +17 -1
- package/src/content/github.ts +169 -102
- package/src/content/nav-builder.ts +120 -0
- package/src/content/r2.ts +214 -0
- package/src/handler.ts +341 -21
- package/src/index.ts +49 -1
- package/src/mcp/server.ts +164 -0
- package/src/mcp/stdio.ts +29 -0
- package/src/mcp/transport.ts +29 -0
- package/src/negotiate/headers.ts +37 -9
- package/src/render/page-shell.ts +249 -8
- package/src/search/index.ts +342 -0
- package/src/security/csp.ts +92 -0
- package/src/theme/{prose-css.ts → base-css.ts} +251 -11
- package/src/theme/build-css.ts +74 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "mkdnsite",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "1.0.1",
|
|
4
4
|
"description": "Markdown for the web. HTML for humans, Markdown for agents.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "src/index.ts",
|
|
@@ -15,7 +15,10 @@
|
|
|
15
15
|
"./adapters/fly": "./src/adapters/fly.ts"
|
|
16
16
|
},
|
|
17
17
|
"scripts": {
|
|
18
|
-
"dev": "bun run --watch src/cli.ts ./content",
|
|
18
|
+
"dev": "bun run --watch src/cli.ts ./content --static ./static --logo /mkdnsite-logo.png --logo-text mkdnsite",
|
|
19
|
+
"dev:themed": "bun run --watch src/cli.ts --config themed.config.ts",
|
|
20
|
+
"dev:light": "bun run --watch src/cli.ts --config themed.config.ts --color-scheme light",
|
|
21
|
+
"dev:dark": "bun run --watch src/cli.ts --config themed.config.ts --color-scheme dark",
|
|
19
22
|
"start": "bun run src/cli.ts",
|
|
20
23
|
"test": "bun test",
|
|
21
24
|
"lint": "ts-standard src/ test/",
|
|
@@ -44,6 +47,7 @@
|
|
|
44
47
|
"src"
|
|
45
48
|
],
|
|
46
49
|
"dependencies": {
|
|
50
|
+
"@modelcontextprotocol/sdk": "^1.27.1",
|
|
47
51
|
"gray-matter": "^4.0.3",
|
|
48
52
|
"katex": "^0.16.38",
|
|
49
53
|
"lucide-react": "^0.577.0",
|
|
@@ -57,7 +61,8 @@
|
|
|
57
61
|
"remark-gfm": "^4.0.0",
|
|
58
62
|
"remark-github-blockquote-alert": "^2.0.1",
|
|
59
63
|
"remark-math": "^6.0.0",
|
|
60
|
-
"shiki": "^3.0.0"
|
|
64
|
+
"shiki": "^3.0.0",
|
|
65
|
+
"zod": "^4.3.6"
|
|
61
66
|
},
|
|
62
67
|
"ts-standard": {
|
|
63
68
|
"project": "./tsconfig.json"
|
|
@@ -3,12 +3,23 @@ import type { MkdnSiteConfig } from '../config/schema.ts'
|
|
|
3
3
|
import type { ContentSource } from '../content/types.ts'
|
|
4
4
|
import type { MarkdownRenderer } from '../render/types.ts'
|
|
5
5
|
import { createRenderer } from '../render/types.ts'
|
|
6
|
+
import { GitHubSource } from '../content/github.ts'
|
|
7
|
+
import { R2ContentSource } from '../content/r2.ts'
|
|
8
|
+
import { AssetsSource } from '../content/assets.ts'
|
|
9
|
+
import type { ContentCache } from '../content/cache.ts'
|
|
10
|
+
import { KVContentCache } from '../content/cache.ts'
|
|
11
|
+
import type { ResponseCache } from '../cache/response.ts'
|
|
12
|
+
import { KVResponseCache } from '../cache/kv.ts'
|
|
13
|
+
import type { TrafficAnalytics, TrafficEvent } from '../analytics/types.ts'
|
|
6
14
|
|
|
7
15
|
/**
|
|
8
16
|
* Cloudflare Workers deployment adapter.
|
|
9
17
|
*
|
|
10
|
-
*
|
|
11
|
-
*
|
|
18
|
+
* Auto-detects content source from env bindings:
|
|
19
|
+
* - CONTENT_SOURCE=github or config.github set → GitHubSource
|
|
20
|
+
* - CONTENT_SOURCE=r2 or CONTENT_BUCKET present → R2ContentSource
|
|
21
|
+
* - CONTENT_SOURCE=assets or ASSETS binding present → AssetsSource
|
|
22
|
+
* - Explicit CONTENT_SOURCE env var overrides auto-detection
|
|
12
23
|
*
|
|
13
24
|
* Usage in a Worker:
|
|
14
25
|
*
|
|
@@ -38,48 +49,224 @@ export class CloudflareAdapter implements DeploymentAdapter {
|
|
|
38
49
|
this.env = env
|
|
39
50
|
}
|
|
40
51
|
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
52
|
+
private createCache (prefix?: string): ContentCache | undefined {
|
|
53
|
+
if (this.env.CACHE_KV == null) return undefined
|
|
54
|
+
return new KVContentCache(this.env.CACHE_KV, { prefix })
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
createContentSource (config: MkdnSiteConfig): ContentSource {
|
|
58
|
+
const sourceType = this.env.CONTENT_SOURCE
|
|
59
|
+
|
|
60
|
+
// GitHub source: explicit CONTENT_SOURCE=github or config.github set
|
|
61
|
+
if (sourceType === 'github' || (sourceType == null && config.github != null)) {
|
|
62
|
+
const ghConfig = config.github ?? {
|
|
63
|
+
owner: this.env.GITHUB_OWNER ?? '',
|
|
64
|
+
repo: this.env.GITHUB_REPO ?? '',
|
|
65
|
+
ref: this.env.GITHUB_REF,
|
|
66
|
+
token: this.env.GITHUB_TOKEN
|
|
67
|
+
}
|
|
68
|
+
return new GitHubSource(ghConfig)
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// R2 source: explicit CONTENT_SOURCE=r2 or CONTENT_BUCKET binding present
|
|
72
|
+
if (sourceType === 'r2' || (sourceType == null && this.env.CONTENT_BUCKET != null)) {
|
|
73
|
+
if (this.env.CONTENT_BUCKET == null) {
|
|
74
|
+
throw new Error(
|
|
75
|
+
'CloudflareAdapter: CONTENT_SOURCE=r2 requires a CONTENT_BUCKET binding in wrangler.toml.'
|
|
76
|
+
)
|
|
77
|
+
}
|
|
78
|
+
return new R2ContentSource({
|
|
79
|
+
bucket: this.env.CONTENT_BUCKET,
|
|
80
|
+
basePath: this.env.CONTENT_BASE_PATH,
|
|
81
|
+
cache: this.createCache(this.env.CONTENT_BASE_PATH)
|
|
82
|
+
})
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
// Assets source: explicit CONTENT_SOURCE=assets or ASSETS binding present
|
|
86
|
+
if (sourceType === 'assets' || (sourceType == null && this.env.ASSETS != null)) {
|
|
87
|
+
if (this.env.ASSETS == null) {
|
|
88
|
+
throw new Error(
|
|
89
|
+
'CloudflareAdapter: CONTENT_SOURCE=assets requires an ASSETS binding in wrangler.toml.'
|
|
90
|
+
)
|
|
91
|
+
}
|
|
92
|
+
const manifest = this.env.CONTENT_MANIFEST != null
|
|
93
|
+
? JSON.parse(this.env.CONTENT_MANIFEST) as string[]
|
|
94
|
+
: undefined
|
|
95
|
+
return new AssetsSource({
|
|
96
|
+
assets: this.env.ASSETS,
|
|
97
|
+
manifest,
|
|
98
|
+
cache: this.createCache('assets:')
|
|
99
|
+
})
|
|
100
|
+
}
|
|
101
|
+
|
|
44
102
|
throw new Error(
|
|
45
|
-
'CloudflareAdapter
|
|
46
|
-
'
|
|
103
|
+
'CloudflareAdapter: No content source configured. ' +
|
|
104
|
+
'Set CONTENT_SOURCE=github|r2|assets, provide CONTENT_BUCKET (R2), ASSETS binding, or set config.github.'
|
|
47
105
|
)
|
|
48
106
|
}
|
|
49
107
|
|
|
50
108
|
async createRenderer (_config: MkdnSiteConfig): Promise<MarkdownRenderer> {
|
|
51
|
-
// CF Workers don't have Bun.markdown
|
|
109
|
+
// CF Workers don't have Bun.markdown — always use portable renderer
|
|
52
110
|
return await createRenderer('portable')
|
|
53
111
|
}
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* Create a TrafficAnalytics instance if the ANALYTICS binding is present.
|
|
115
|
+
*
|
|
116
|
+
* Returns `undefined` when the binding is absent so callers can skip
|
|
117
|
+
* passing analytics to createHandler without any change in behaviour.
|
|
118
|
+
*
|
|
119
|
+
* Usage:
|
|
120
|
+
* ```ts
|
|
121
|
+
* const handler = createHandler({
|
|
122
|
+
* source: adapter.createContentSource(config),
|
|
123
|
+
* renderer: await adapter.createRenderer(config),
|
|
124
|
+
* config,
|
|
125
|
+
* analytics: adapter.createTrafficAnalytics()
|
|
126
|
+
* })
|
|
127
|
+
* ```
|
|
128
|
+
*/
|
|
129
|
+
/**
|
|
130
|
+
* Create a ResponseCache backed by CACHE_KV when available.
|
|
131
|
+
* Returns undefined when no KV binding is present.
|
|
132
|
+
*/
|
|
133
|
+
createResponseCache (): ResponseCache | undefined {
|
|
134
|
+
if (this.env.CACHE_KV == null) return undefined
|
|
135
|
+
return new KVResponseCache(this.env.CACHE_KV, { prefix: 'resp:' })
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
createTrafficAnalytics (): TrafficAnalytics | undefined {
|
|
139
|
+
if (this.env.ANALYTICS == null) return undefined
|
|
140
|
+
return new WorkersAnalyticsEngineAnalytics(this.env.ANALYTICS)
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
/**
|
|
145
|
+
* Cloudflare Workers Analytics Engine implementation of TrafficAnalytics.
|
|
146
|
+
*
|
|
147
|
+
* Writes a data point to a CF Analytics Engine dataset binding (`ANALYTICS`).
|
|
148
|
+
* Each field maps to an index (string "blobs") or double (numeric values).
|
|
149
|
+
*
|
|
150
|
+
* Usage: automatically created by `CloudflareAdapter.createTrafficAnalytics()`
|
|
151
|
+
* when the `ANALYTICS` binding is present.
|
|
152
|
+
*/
|
|
153
|
+
export class WorkersAnalyticsEngineAnalytics implements TrafficAnalytics {
|
|
154
|
+
private readonly dataset: AnalyticsEngineDataset
|
|
155
|
+
|
|
156
|
+
constructor (dataset: AnalyticsEngineDataset) {
|
|
157
|
+
this.dataset = dataset
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
logRequest (event: TrafficEvent): void {
|
|
161
|
+
// Field ordering is significant — CF Analytics Engine queries reference
|
|
162
|
+
// fields by index (blob1, blob2, ..., double1, double2, ...).
|
|
163
|
+
// Do NOT reorder without updating all downstream queries.
|
|
164
|
+
this.dataset.writeDataPoint({
|
|
165
|
+
indexes: [
|
|
166
|
+
event.siteId ?? '' // index1: site isolation key (empty for single-site)
|
|
167
|
+
],
|
|
168
|
+
blobs: [
|
|
169
|
+
event.path, // blob1: URL pathname
|
|
170
|
+
event.method, // blob2: HTTP method
|
|
171
|
+
event.format, // blob3: response format (html|markdown|mcp|api|other)
|
|
172
|
+
event.trafficType, // blob4: traffic classification (human|ai_agent|bot|mcp)
|
|
173
|
+
event.userAgent // blob5: raw User-Agent string
|
|
174
|
+
],
|
|
175
|
+
doubles: [
|
|
176
|
+
event.statusCode, // double1: HTTP status code
|
|
177
|
+
event.latencyMs, // double2: handler latency in ms
|
|
178
|
+
event.contentLength, // double3: response body size in bytes
|
|
179
|
+
event.cacheHit ? 1 : 0, // double4: cache hit (1) or miss (0)
|
|
180
|
+
event.timestamp // double5: request timestamp (epoch ms)
|
|
181
|
+
]
|
|
182
|
+
})
|
|
183
|
+
}
|
|
54
184
|
}
|
|
55
185
|
|
|
56
186
|
/**
|
|
57
187
|
* Expected Cloudflare Worker environment bindings.
|
|
58
188
|
*/
|
|
59
189
|
export interface CloudflareEnv {
|
|
60
|
-
/**
|
|
190
|
+
/** Explicit content source selection */
|
|
191
|
+
CONTENT_SOURCE?: 'github' | 'r2' | 'assets'
|
|
192
|
+
|
|
193
|
+
/** R2 bucket binding for markdown content */
|
|
61
194
|
CONTENT_BUCKET?: R2Bucket
|
|
62
|
-
/**
|
|
195
|
+
/** Key prefix within the R2 bucket (e.g. 'sites/abc123/') */
|
|
196
|
+
CONTENT_BASE_PATH?: string
|
|
197
|
+
|
|
198
|
+
/** Workers Static Assets binding */
|
|
199
|
+
ASSETS?: AssetsFetcher
|
|
200
|
+
/** JSON array of .md file paths (alternative to _manifest.json in assets) */
|
|
201
|
+
CONTENT_MANIFEST?: string
|
|
202
|
+
|
|
203
|
+
/** KV namespace for caching (future use) */
|
|
63
204
|
CACHE_KV?: KVNamespace
|
|
64
|
-
|
|
205
|
+
|
|
206
|
+
/** GitHub owner (used if config.github not set) */
|
|
207
|
+
GITHUB_OWNER?: string
|
|
208
|
+
/** GitHub repo (used if config.github not set) */
|
|
209
|
+
GITHUB_REPO?: string
|
|
210
|
+
/** GitHub branch/tag (default: main) */
|
|
211
|
+
GITHUB_REF?: string
|
|
212
|
+
/** GitHub token for private repos / higher rate limits */
|
|
213
|
+
GITHUB_TOKEN?: string
|
|
214
|
+
|
|
215
|
+
/** Site title (can override config.site.title) */
|
|
65
216
|
SITE_TITLE?: string
|
|
66
|
-
/** Site URL
|
|
217
|
+
/** Site URL */
|
|
67
218
|
SITE_URL?: string
|
|
219
|
+
|
|
220
|
+
/** Secret token for authenticating POST /_refresh requests */
|
|
221
|
+
REFRESH_TOKEN?: string
|
|
222
|
+
|
|
223
|
+
/** Workers Analytics Engine dataset binding for traffic analytics */
|
|
224
|
+
ANALYTICS?: AnalyticsEngineDataset
|
|
68
225
|
}
|
|
69
226
|
|
|
70
|
-
//
|
|
227
|
+
// ─── Cloudflare R2 type stubs ─────────────────────────────────────────────────
|
|
228
|
+
// These types are provided by the CF Workers runtime; stubs here for type-checking
|
|
229
|
+
// in non-CF environments.
|
|
230
|
+
|
|
71
231
|
interface R2Bucket {
|
|
72
232
|
get: (key: string) => Promise<R2Object | null>
|
|
73
|
-
list: (options?:
|
|
233
|
+
list: (options?: R2ListOptions) => Promise<R2ObjectList>
|
|
74
234
|
}
|
|
75
235
|
|
|
76
236
|
interface R2Object {
|
|
77
237
|
key: string
|
|
78
238
|
uploaded: Date
|
|
239
|
+
size: number
|
|
79
240
|
text: () => Promise<string>
|
|
80
241
|
}
|
|
81
242
|
|
|
243
|
+
interface R2ObjectList {
|
|
244
|
+
objects: R2Object[]
|
|
245
|
+
truncated: boolean
|
|
246
|
+
cursor?: string
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
interface R2ListOptions {
|
|
250
|
+
prefix?: string
|
|
251
|
+
cursor?: string
|
|
252
|
+
limit?: number
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
interface AssetsFetcher {
|
|
256
|
+
fetch: (input: Request | string) => Promise<Response>
|
|
257
|
+
}
|
|
258
|
+
|
|
82
259
|
interface KVNamespace {
|
|
83
260
|
get: (key: string) => Promise<string | null>
|
|
84
|
-
put: (key: string, value: string, options?:
|
|
261
|
+
put: (key: string, value: string, options?: { expirationTtl?: number }) => Promise<void>
|
|
262
|
+
delete: (key: string) => Promise<void>
|
|
263
|
+
list: (options?: { prefix?: string }) => Promise<{ keys: Array<{ name: string }> }>
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
interface AnalyticsEngineDataset {
|
|
267
|
+
writeDataPoint: (data: {
|
|
268
|
+
blobs?: string[]
|
|
269
|
+
doubles?: number[]
|
|
270
|
+
indexes?: string[]
|
|
271
|
+
}) => void
|
|
85
272
|
}
|
package/src/adapters/local.ts
CHANGED
|
@@ -1,9 +1,11 @@
|
|
|
1
|
+
import { Buffer } from 'node:buffer'
|
|
1
2
|
import type { DeploymentAdapter } from './types.ts'
|
|
2
3
|
import { detectRuntime } from './types.ts'
|
|
3
4
|
import type { MkdnSiteConfig } from '../config/schema.ts'
|
|
4
5
|
import type { ContentSource } from '../content/types.ts'
|
|
5
6
|
import type { MarkdownRenderer } from '../render/types.ts'
|
|
6
7
|
import { FilesystemSource } from '../content/filesystem.ts'
|
|
8
|
+
import { GitHubSource } from '../content/github.ts'
|
|
7
9
|
import { createRenderer } from '../render/types.ts'
|
|
8
10
|
|
|
9
11
|
export class LocalAdapter implements DeploymentAdapter {
|
|
@@ -15,6 +17,9 @@ export class LocalAdapter implements DeploymentAdapter {
|
|
|
15
17
|
}
|
|
16
18
|
|
|
17
19
|
createContentSource (config: MkdnSiteConfig): ContentSource {
|
|
20
|
+
if (config.github != null) {
|
|
21
|
+
return new GitHubSource(config.github)
|
|
22
|
+
}
|
|
18
23
|
return new FilesystemSource(config.contentDir)
|
|
19
24
|
}
|
|
20
25
|
|
|
@@ -129,25 +134,41 @@ export class LocalAdapter implements DeploymentAdapter {
|
|
|
129
134
|
|
|
130
135
|
private printStartup (config: MkdnSiteConfig, port: number): void {
|
|
131
136
|
const url = `http://localhost:${String(port)}`
|
|
137
|
+
const DIM_CYAN = '\x1b[2;36m'
|
|
138
|
+
const BOLD_GREEN = '\x1b[1;32m'
|
|
139
|
+
const DIM = '\x1b[2m'
|
|
140
|
+
const RESET = '\x1b[0m'
|
|
141
|
+
|
|
142
|
+
// ASCII art header
|
|
143
|
+
console.log('')
|
|
144
|
+
console.log(`${DIM_CYAN} ▌ ▌ ▘▗ `)
|
|
145
|
+
console.log('▛▛▌▙▘▛▌▛▌▛▘▌▜▘█▌')
|
|
146
|
+
console.log(`▌▌▌▛▖▙▌▌▌▄▌▌▐▖▙▖${RESET}`)
|
|
132
147
|
console.log('')
|
|
133
|
-
console.log(
|
|
134
|
-
console.log('
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
148
|
+
console.log(` ${BOLD_GREEN}\u2192 ${url}${RESET}`)
|
|
149
|
+
console.log('')
|
|
150
|
+
|
|
151
|
+
const row = (label: string, value: string): void => {
|
|
152
|
+
console.log(` ${DIM}${label.padEnd(12)}${RESET}${value}`)
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
row('Runtime', `local (${this.name})`)
|
|
156
|
+
if (config.github != null) {
|
|
157
|
+
const ref = config.github.ref ?? 'main'
|
|
158
|
+
row('GitHub', `${config.github.owner}/${config.github.repo}@${ref}`)
|
|
159
|
+
} else {
|
|
160
|
+
row('Content', config.contentDir)
|
|
161
|
+
}
|
|
162
|
+
row('Renderer', this.rendererEngine)
|
|
163
|
+
if (config.mcp.enabled) {
|
|
164
|
+
row('MCP', config.mcp.endpoint ?? '/mcp')
|
|
165
|
+
}
|
|
166
|
+
if (config.client.search) {
|
|
167
|
+
row('Search', '/api/search')
|
|
168
|
+
}
|
|
169
|
+
|
|
146
170
|
console.log('')
|
|
147
|
-
console.log(
|
|
148
|
-
console.log(` curl ${url}`)
|
|
149
|
-
console.log(` curl -H "Accept: text/markdown" ${url}`)
|
|
150
|
-
console.log(` curl ${url}/llms.txt`)
|
|
171
|
+
console.log(` ${DIM}Ctrl+C to stop${RESET}`)
|
|
151
172
|
console.log('')
|
|
152
173
|
}
|
|
153
174
|
}
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
import type { TrafficType, AnalyticsResponseFormat } from './types.ts'
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Known crawler / bot User-Agent patterns.
|
|
5
|
+
* Checked case-insensitively.
|
|
6
|
+
*
|
|
7
|
+
* This list is intentionally extensible — add entries as new crawlers appear.
|
|
8
|
+
*/
|
|
9
|
+
export const BOT_PATTERNS: RegExp[] = [
|
|
10
|
+
/googlebot/i,
|
|
11
|
+
/bingbot/i,
|
|
12
|
+
/slurp/i, // Yahoo
|
|
13
|
+
/duckduckbot/i,
|
|
14
|
+
/baiduspider/i,
|
|
15
|
+
/yandexbot/i,
|
|
16
|
+
/sogou/i,
|
|
17
|
+
/exabot/i,
|
|
18
|
+
/facebot/i,
|
|
19
|
+
/ia_archiver/i, // Alexa / Internet Archive
|
|
20
|
+
/semrushbot/i,
|
|
21
|
+
/ahrefsbot/i,
|
|
22
|
+
/mj12bot/i,
|
|
23
|
+
/dotbot/i,
|
|
24
|
+
/rogerbot/i,
|
|
25
|
+
/archive\.org_bot/i,
|
|
26
|
+
/petalbot/i,
|
|
27
|
+
/bytespider/i, // TikTok
|
|
28
|
+
/applebot/i,
|
|
29
|
+
/linkedinbot/i,
|
|
30
|
+
/twitterbot/i,
|
|
31
|
+
/facebookexternalhit/i,
|
|
32
|
+
/whatsapp/i,
|
|
33
|
+
/telegrambot/i,
|
|
34
|
+
/discordbot/i,
|
|
35
|
+
/slackbot/i
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Classify a request as human, ai_agent, bot, or mcp traffic.
|
|
40
|
+
*
|
|
41
|
+
* Rules (evaluated in order):
|
|
42
|
+
* 1. MCP format → 'mcp'
|
|
43
|
+
* 2. markdown format (already resolved by the handler from Accept header / .md URL) → 'ai_agent'
|
|
44
|
+
* 3. User-Agent matches a known bot pattern → 'bot'
|
|
45
|
+
* 4. Otherwise → 'human'
|
|
46
|
+
*
|
|
47
|
+
* The `format` parameter is pre-resolved by the handler's `resolveAnalyticsFormat()`,
|
|
48
|
+
* which already checks Content-Type, Accept headers, and .md URL suffix — so we
|
|
49
|
+
* avoid duplicating that logic here.
|
|
50
|
+
*/
|
|
51
|
+
export function classifyTraffic (request: Request, format: AnalyticsResponseFormat): TrafficType {
|
|
52
|
+
// MCP traffic
|
|
53
|
+
if (format === 'mcp') return 'mcp'
|
|
54
|
+
|
|
55
|
+
// AI agent: served raw markdown (format resolved from Accept header / .md URL / Content-Type)
|
|
56
|
+
if (format === 'markdown') return 'ai_agent'
|
|
57
|
+
|
|
58
|
+
// Known bot by User-Agent
|
|
59
|
+
const ua = request.headers.get('User-Agent') ?? ''
|
|
60
|
+
if (ua !== '' && BOT_PATTERNS.some(pattern => pattern.test(ua))) {
|
|
61
|
+
return 'bot'
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
return 'human'
|
|
65
|
+
}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import type { TrafficAnalytics, TrafficEvent } from './types.ts'
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Console analytics implementation — writes a structured log line to stdout.
|
|
5
|
+
*
|
|
6
|
+
* Useful during development and debugging. Output format is a single JSON line
|
|
7
|
+
* per request so it can be piped to `jq` or similar tools.
|
|
8
|
+
*
|
|
9
|
+
* Example output:
|
|
10
|
+
* {"ts":1710000000000,"method":"GET","path":"/docs","format":"html","type":"human","status":200,"ms":12,"bytes":4321,"cache":false}
|
|
11
|
+
*/
|
|
12
|
+
export class ConsoleAnalytics implements TrafficAnalytics {
|
|
13
|
+
private readonly output: (line: string) => void
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* @param output - Write function (defaults to console.log). Injectable for
|
|
17
|
+
* testing without polluting test output.
|
|
18
|
+
*/
|
|
19
|
+
constructor (output?: (line: string) => void) {
|
|
20
|
+
this.output = output ?? console.log
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
logRequest (event: TrafficEvent): void {
|
|
24
|
+
const obj: Record<string, unknown> = {
|
|
25
|
+
ts: event.timestamp,
|
|
26
|
+
method: event.method,
|
|
27
|
+
path: event.path,
|
|
28
|
+
format: event.format,
|
|
29
|
+
type: event.trafficType,
|
|
30
|
+
status: event.statusCode,
|
|
31
|
+
ms: event.latencyMs,
|
|
32
|
+
bytes: event.contentLength,
|
|
33
|
+
cache: event.cacheHit,
|
|
34
|
+
ua: event.userAgent
|
|
35
|
+
}
|
|
36
|
+
if (event.siteId != null) obj.site = event.siteId
|
|
37
|
+
this.output(JSON.stringify(obj))
|
|
38
|
+
}
|
|
39
|
+
}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import type { TrafficAnalytics, TrafficEvent } from './types.ts'
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* No-op analytics implementation.
|
|
5
|
+
*
|
|
6
|
+
* The default when no analytics backend is configured. logRequest() is a
|
|
7
|
+
* genuine no-op — zero allocations, zero overhead beyond the null check in
|
|
8
|
+
* the handler.
|
|
9
|
+
*/
|
|
10
|
+
export class NoopAnalytics implements TrafficAnalytics {
|
|
11
|
+
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
12
|
+
logRequest (_event: TrafficEvent): void {
|
|
13
|
+
// intentionally empty
|
|
14
|
+
}
|
|
15
|
+
}
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Traffic analytics types for mkdnsite.
|
|
3
|
+
*
|
|
4
|
+
* The TrafficAnalytics interface is the core extension point — implement it
|
|
5
|
+
* to route request events to any analytics backend (console, CF Analytics Engine,
|
|
6
|
+
* ClickHouse, Plausible, etc.).
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
/** Classification of who sent the request */
|
|
10
|
+
export type TrafficType = 'human' | 'ai_agent' | 'bot' | 'mcp'
|
|
11
|
+
|
|
12
|
+
/** What format was served in the response */
|
|
13
|
+
export type AnalyticsResponseFormat = 'html' | 'markdown' | 'mcp' | 'api' | 'other'
|
|
14
|
+
|
|
15
|
+
/** A single request event captured by the analytics hook */
|
|
16
|
+
export interface TrafficEvent {
|
|
17
|
+
/** Unix timestamp (ms) when the request started — Date.now() */
|
|
18
|
+
timestamp: number
|
|
19
|
+
/** URL pathname */
|
|
20
|
+
path: string
|
|
21
|
+
/** HTTP method (GET, POST, etc.) */
|
|
22
|
+
method: string
|
|
23
|
+
/** What was served */
|
|
24
|
+
format: AnalyticsResponseFormat
|
|
25
|
+
/** Classified caller type */
|
|
26
|
+
trafficType: TrafficType
|
|
27
|
+
/** HTTP status code of the response */
|
|
28
|
+
statusCode: number
|
|
29
|
+
/** End-to-end handler latency in milliseconds */
|
|
30
|
+
latencyMs: number
|
|
31
|
+
/** Raw User-Agent string */
|
|
32
|
+
userAgent: string
|
|
33
|
+
/** Response body size in bytes */
|
|
34
|
+
contentLength: number
|
|
35
|
+
/** Whether the response was served from cache */
|
|
36
|
+
cacheHit: boolean
|
|
37
|
+
/** Site identifier for multi-tenant deployments (e.g. mkdn.io). Undefined for single-site. */
|
|
38
|
+
siteId?: string
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Pluggable traffic analytics backend.
|
|
43
|
+
*
|
|
44
|
+
* `logRequest` is fire-and-forget and must be synchronous (or fire async work
|
|
45
|
+
* without blocking the response). Implementations must never throw.
|
|
46
|
+
*/
|
|
47
|
+
export interface TrafficAnalytics {
|
|
48
|
+
logRequest: (event: TrafficEvent) => void
|
|
49
|
+
}
|
package/src/cache/kv.ts
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
import type { CachedResponse, ResponseCache } from './response.ts'
|
|
2
|
+
import { MemoryResponseCache } from './memory.ts'
|
|
3
|
+
|
|
4
|
+
const DEFAULT_TTL_SECONDS = 300
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Cloudflare KV type stubs (runtime types not available outside CF Workers).
|
|
8
|
+
* Matches the KVNamespace interface from @cloudflare/workers-types.
|
|
9
|
+
*/
|
|
10
|
+
export interface KVNamespace {
|
|
11
|
+
get: (key: string) => Promise<string | null>
|
|
12
|
+
put: (key: string, value: string, options?: { expirationTtl?: number }) => Promise<void>
|
|
13
|
+
delete: (key: string) => Promise<void>
|
|
14
|
+
list: (options?: { prefix?: string }) => Promise<{ keys: Array<{ name: string }> }>
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Cloudflare KV-backed response cache with L1 in-memory hot path.
|
|
19
|
+
*
|
|
20
|
+
* L1 (MemoryResponseCache): fast path for repeated requests within same isolate.
|
|
21
|
+
* L2 (KV): shared durable storage across isolates / cold starts.
|
|
22
|
+
*
|
|
23
|
+
* KV key format: `{prefix}resp:{cacheKey}`
|
|
24
|
+
*/
|
|
25
|
+
export class KVResponseCache implements ResponseCache {
|
|
26
|
+
private readonly kv: KVNamespace
|
|
27
|
+
private readonly prefix: string
|
|
28
|
+
private readonly ttlSeconds: number
|
|
29
|
+
private readonly memory: MemoryResponseCache
|
|
30
|
+
|
|
31
|
+
constructor (kv: KVNamespace, options?: { prefix?: string, ttlSeconds?: number }) {
|
|
32
|
+
this.kv = kv
|
|
33
|
+
this.prefix = options?.prefix ?? 'resp:'
|
|
34
|
+
this.ttlSeconds = options?.ttlSeconds ?? DEFAULT_TTL_SECONDS
|
|
35
|
+
this.memory = new MemoryResponseCache(this.ttlSeconds)
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
async get (key: string): Promise<CachedResponse | null> {
|
|
39
|
+
// L1: in-memory
|
|
40
|
+
const memResult = await this.memory.get(key)
|
|
41
|
+
if (memResult != null) return memResult
|
|
42
|
+
|
|
43
|
+
// L2: KV
|
|
44
|
+
const raw = await this.kv.get(this.kvKey(key))
|
|
45
|
+
if (raw != null) {
|
|
46
|
+
try {
|
|
47
|
+
const cached = JSON.parse(raw) as CachedResponse
|
|
48
|
+
await this.memory.set(key, cached, this.ttlSeconds)
|
|
49
|
+
return cached
|
|
50
|
+
} catch {
|
|
51
|
+
return null
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
return null
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
async set (key: string, response: CachedResponse, ttlSeconds?: number): Promise<void> {
|
|
58
|
+
const ttl = ttlSeconds ?? this.ttlSeconds
|
|
59
|
+
await this.memory.set(key, response, ttl)
|
|
60
|
+
await this.kv.put(this.kvKey(key), JSON.stringify(response), { expirationTtl: ttl })
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
async delete (key: string): Promise<void> {
|
|
64
|
+
await this.memory.delete(key)
|
|
65
|
+
await this.kv.delete(this.kvKey(key))
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
async clear (): Promise<void> {
|
|
69
|
+
await this.memory.clear()
|
|
70
|
+
try {
|
|
71
|
+
const result = await this.kv.list({ prefix: this.prefix })
|
|
72
|
+
await Promise.all(result.keys.map(async k => await this.kv.delete(k.name)))
|
|
73
|
+
} catch {
|
|
74
|
+
// Best-effort — KV TTL handles expiry
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
private kvKey (key: string): string {
|
|
79
|
+
return this.prefix + key
|
|
80
|
+
}
|
|
81
|
+
}
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
import type { CachedResponse, ResponseCache } from './response.ts'
|
|
2
|
+
|
|
3
|
+
const DEFAULT_TTL_SECONDS = 300
|
|
4
|
+
|
|
5
|
+
interface Entry {
|
|
6
|
+
response: CachedResponse
|
|
7
|
+
expiresAt: number
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* In-memory response cache with TTL eviction.
|
|
12
|
+
*
|
|
13
|
+
* Suitable for single-process deployments (local dev, Node/Deno/Bun servers).
|
|
14
|
+
* Does not share state across Worker isolates — use KVResponseCache for that.
|
|
15
|
+
*/
|
|
16
|
+
export class MemoryResponseCache implements ResponseCache {
|
|
17
|
+
private readonly store = new Map<string, Entry>()
|
|
18
|
+
private readonly defaultTtlSeconds: number
|
|
19
|
+
|
|
20
|
+
constructor (defaultTtlSeconds?: number) {
|
|
21
|
+
this.defaultTtlSeconds = defaultTtlSeconds ?? DEFAULT_TTL_SECONDS
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
async get (key: string): Promise<CachedResponse | null> {
|
|
25
|
+
const entry = this.store.get(key)
|
|
26
|
+
if (entry == null) return null
|
|
27
|
+
if (Date.now() > entry.expiresAt) {
|
|
28
|
+
this.store.delete(key)
|
|
29
|
+
return null
|
|
30
|
+
}
|
|
31
|
+
return entry.response
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
async set (key: string, response: CachedResponse, ttlSeconds?: number): Promise<void> {
|
|
35
|
+
const ttl = (ttlSeconds ?? this.defaultTtlSeconds) * 1000
|
|
36
|
+
this.store.set(key, { response, expiresAt: Date.now() + ttl })
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
async delete (key: string): Promise<void> {
|
|
40
|
+
this.store.delete(key)
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
async clear (): Promise<void> {
|
|
44
|
+
this.store.clear()
|
|
45
|
+
}
|
|
46
|
+
}
|