nebula-ai-plugin-system 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/vision.ts ADDED
@@ -0,0 +1,242 @@
1
+ import { readFile } from 'node:fs/promises'
2
+ import { homedir } from 'node:os'
3
+ import { isAbsolute } from 'node:path'
4
+ import { PathGuard, type ToolDef, type VisionInferFn } from 'nebula-ai-core'
5
+ import { z } from 'zod'
6
+ import { collectUpToBytes, hostIsPrivate } from './web-fetch'
7
+
8
+ /**
9
+ * `vision.analyze` accepts EITHER an absolute file path OR an http(s) URL.
10
+ * URL fetches stream + abort at maxBytes (same SSRF guard as web.fetch); a
11
+ * misleading URL pointing at a multi-GB asset cancels the reader instead
12
+ * of pulling the whole thing before the size check.
13
+ */
14
+
15
+ const KNOWN_MIME_BY_EXT: Record<string, string> = {
16
+ png: 'image/png',
17
+ jpg: 'image/jpeg',
18
+ jpeg: 'image/jpeg',
19
+ gif: 'image/gif',
20
+ webp: 'image/webp',
21
+ bmp: 'image/bmp',
22
+ }
23
+
24
+ /**
25
+ * Sniff MIME from magic bytes. Falls back to extension for cases the magic
26
+ * doesn't cover. Used because the qwen3-vl provider rejects requests with
27
+ * an incorrect mediaType in the data: URL (its OpenAI-compat checker
28
+ * treats `image/*` strictly).
29
+ */
30
+ function sniffMimeFromBytes(bytes: Uint8Array, fallbackExt: string | null): string | null {
31
+ if (bytes.length >= 8) {
32
+ if (
33
+ bytes[0] === 0x89 &&
34
+ bytes[1] === 0x50 &&
35
+ bytes[2] === 0x4e &&
36
+ bytes[3] === 0x47 &&
37
+ bytes[4] === 0x0d &&
38
+ bytes[5] === 0x0a &&
39
+ bytes[6] === 0x1a &&
40
+ bytes[7] === 0x0a
41
+ ) {
42
+ return 'image/png'
43
+ }
44
+ }
45
+ if (bytes.length >= 3 && bytes[0] === 0xff && bytes[1] === 0xd8 && bytes[2] === 0xff) {
46
+ return 'image/jpeg'
47
+ }
48
+ if (
49
+ bytes.length >= 6 &&
50
+ bytes[0] === 0x47 &&
51
+ bytes[1] === 0x49 &&
52
+ bytes[2] === 0x46 &&
53
+ bytes[3] === 0x38 &&
54
+ (bytes[4] === 0x37 || bytes[4] === 0x39) &&
55
+ bytes[5] === 0x61
56
+ ) {
57
+ return 'image/gif'
58
+ }
59
+ if (
60
+ bytes.length >= 12 &&
61
+ bytes[0] === 0x52 &&
62
+ bytes[1] === 0x49 &&
63
+ bytes[2] === 0x46 &&
64
+ bytes[3] === 0x46 &&
65
+ bytes[8] === 0x57 &&
66
+ bytes[9] === 0x45 &&
67
+ bytes[10] === 0x42 &&
68
+ bytes[11] === 0x50
69
+ ) {
70
+ return 'image/webp'
71
+ }
72
+ if (bytes.length >= 2 && bytes[0] === 0x42 && bytes[1] === 0x4d) {
73
+ return 'image/bmp'
74
+ }
75
+ if (fallbackExt && KNOWN_MIME_BY_EXT[fallbackExt]) {
76
+ return KNOWN_MIME_BY_EXT[fallbackExt]!
77
+ }
78
+ return null
79
+ }
80
+
81
+ const VisionSchema = z.object({
82
+ image_path: z
83
+ .string()
84
+ .min(1)
85
+ .optional()
86
+ .describe(
87
+ 'Absolute path on disk to the image to analyze. Provide this OR image_url, not both.',
88
+ ),
89
+ image_url: z
90
+ .string()
91
+ .url()
92
+ .optional()
93
+ .describe(
94
+ 'http(s) URL pointing to the image. Private/loopback IPs blocked; same guard as web.fetch.',
95
+ ),
96
+ prompt: z
97
+ .string()
98
+ .min(1)
99
+ .describe('Question or instruction for the vision model (e.g. "describe this image").'),
100
+ })
101
+
102
+ type VisionArgs = z.infer<typeof VisionSchema>
103
+
104
+ const MAX_IMAGE_BYTES = 10 * 1024 * 1024 // 10 MB raw — base64 ~13.3 MB request body
105
+ const FETCH_TIMEOUT_MS = 15_000
106
+
107
+ export interface VisionAnalyzeDeps {
108
+ /** Null when no vision provider is configured (testnet, opt-out). */
109
+ visionInfer: VisionInferFn | null
110
+ /** Agent state dir; PathGuard refuses image_path reads under it + credential dirs. */
111
+ agentDir: string
112
+ }
113
+
114
+ export function makeVisionAnalyze(deps: VisionAnalyzeDeps): ToolDef<VisionArgs> {
115
+ const guard = new PathGuard({ agentDir: deps.agentDir })
116
+ return {
117
+ name: 'vision.analyze',
118
+ description:
119
+ "Describe / answer questions about an image. Pass image_path (absolute path on disk) OR image_url (http/https). Routes to a multimodal model on Mantle Compute (qwen3-vl-30b on mainnet). Refuses paths under credential dirs (.ssh, .aws, .nebula/). ALWAYS call this tool when the operator references an image by path or URL — do NOT pre-check existence with shell.run and do NOT skip the call by replying 'the file doesn't exist'. The tool returns a structured error if the file is missing or invalid; let the tool be the source of truth, never your guess.",
120
+ searchHint: 'vision image analyze describe ocr photo screenshot multimodal',
121
+ schema: VisionSchema,
122
+ handler: async args => {
123
+ if (!deps.visionInfer) {
124
+ return {
125
+ ok: false,
126
+ error:
127
+ 'vision provider not configured. Set `vision.provider` in ~/.nebula/config.ts to a Mantle Compute multimodal provider, or unset to use the network default.',
128
+ }
129
+ }
130
+ if (Boolean(args.image_path) === Boolean(args.image_url)) {
131
+ return { ok: false, error: 'exactly one of image_path or image_url is required' }
132
+ }
133
+ let bytes: Uint8Array
134
+ let mediaType: string
135
+ try {
136
+ const loaded = await loadImage(args, guard)
137
+ bytes = loaded.bytes
138
+ mediaType = loaded.mediaType
139
+ } catch (e) {
140
+ return { ok: false, error: (e as Error).message }
141
+ }
142
+ try {
143
+ const result = await deps.visionInfer({
144
+ images: [{ bytes, mediaType }],
145
+ prompt: args.prompt,
146
+ maxOutputTokens: 1024,
147
+ })
148
+ return {
149
+ ok: true,
150
+ data: {
151
+ content: result.content,
152
+ model: result.model ?? null,
153
+ usage: result.usage,
154
+ finishReason: result.finishReason,
155
+ },
156
+ }
157
+ } catch (e) {
158
+ return { ok: false, error: `vision call failed: ${(e as Error).message.slice(0, 240)}` }
159
+ }
160
+ },
161
+ }
162
+ }
163
+
164
+ async function loadImage(
165
+ args: VisionArgs,
166
+ guard: PathGuard,
167
+ ): Promise<{ bytes: Uint8Array; mediaType: string }> {
168
+ if (args.image_path) {
169
+ const expanded = args.image_path.startsWith('~')
170
+ ? args.image_path.replace('~', homedir())
171
+ : args.image_path
172
+ if (!isAbsolute(expanded)) {
173
+ throw new Error(`image_path must be absolute, got: ${args.image_path}`)
174
+ }
175
+ const allowed = guard.check(expanded)
176
+ if (!allowed.allowed) {
177
+ throw new Error(allowed.reason ?? 'protected path')
178
+ }
179
+ const buffer = await readFile(expanded)
180
+ if (buffer.byteLength > MAX_IMAGE_BYTES) {
181
+ throw new Error(
182
+ `image too large: ${buffer.byteLength} bytes (limit ${MAX_IMAGE_BYTES}). Resize and retry.`,
183
+ )
184
+ }
185
+ const ext = (expanded.match(/\.([a-zA-Z0-9]+)$/)?.[1] ?? '').toLowerCase()
186
+ const mediaType = sniffMimeFromBytes(new Uint8Array(buffer), ext || null)
187
+ if (!mediaType) {
188
+ throw new Error(`unrecognized image format at ${expanded}`)
189
+ }
190
+ return { bytes: new Uint8Array(buffer), mediaType }
191
+ }
192
+
193
+ // image_url path
194
+ const raw = args.image_url!
195
+ let url: URL
196
+ try {
197
+ url = new URL(raw)
198
+ } catch {
199
+ throw new Error('invalid image_url')
200
+ }
201
+ if (url.protocol !== 'http:' && url.protocol !== 'https:') {
202
+ throw new Error(`unsupported protocol: ${url.protocol}`)
203
+ }
204
+ if (hostIsPrivate(url.hostname)) {
205
+ throw new Error(`host blocked (private/loopback/metadata): ${url.hostname}`)
206
+ }
207
+ const controller = new AbortController()
208
+ const timer = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS)
209
+ try {
210
+ const res = await fetch(url, {
211
+ method: 'GET',
212
+ redirect: 'follow',
213
+ signal: controller.signal,
214
+ headers: { 'user-agent': 'nebula/vision.analyze' },
215
+ })
216
+ if (!res.ok) {
217
+ throw new Error(`fetch http ${res.status}`)
218
+ }
219
+ const { bytes, truncated } = await collectUpToBytes(res.body, MAX_IMAGE_BYTES + 1)
220
+ if (truncated || bytes.byteLength > MAX_IMAGE_BYTES) {
221
+ throw new Error(`image too large: exceeds ${MAX_IMAGE_BYTES} bytes. Resize and retry.`)
222
+ }
223
+ const headerType = (res.headers.get('content-type') ?? '').split(';')[0]?.trim().toLowerCase()
224
+ const extFromUrl = (url.pathname.match(/\.([a-zA-Z0-9]+)$/)?.[1] ?? '').toLowerCase()
225
+ const sniffed = sniffMimeFromBytes(bytes, extFromUrl || null)
226
+ const mediaType =
227
+ sniffed ??
228
+ (headerType?.startsWith('image/') ? headerType : (KNOWN_MIME_BY_EXT[extFromUrl] ?? null))
229
+ if (!mediaType) {
230
+ throw new Error(`unrecognized image format from ${url.hostname}`)
231
+ }
232
+ return { bytes, mediaType }
233
+ } catch (e) {
234
+ const err = e as Error
235
+ if (err.name === 'AbortError') throw new Error(`fetch timeout after ${FETCH_TIMEOUT_MS}ms`)
236
+ throw err
237
+ } finally {
238
+ clearTimeout(timer)
239
+ }
240
+ }
241
+
242
+ export { sniffMimeFromBytes }
@@ -0,0 +1,310 @@
1
+ import type { ToolDef } from 'nebula-ai-core'
2
+ import { z } from 'zod'
3
+
4
+ /**
5
+ * `web.fetch` — GET a URL and return its body as text/markdown/json.
6
+ *
7
+ * Mirrors Claude Code's `WebFetch` capability: GET-only, follows redirects
8
+ * via the platform fetch, decodes content-type into the most useful shape
9
+ * for the brain to consume. POST/PUT/DELETE are intentionally NOT supported
10
+ * — those have side effects that should route through `shell.run curl`
11
+ * with the approval modal in play.
12
+ *
13
+ * Permission scope vs `shell.run curl`:
14
+ * - shell.run hits the approval modal every time + redactEnv strips
15
+ * wallet/API-key env vars (correct for safety, but blocks legitimate
16
+ * auth headers if the brain wanted to construct them).
17
+ * - web.fetch is read-only by construction. Refuses non-GET, refuses
18
+ * non-http(s), refuses private/loopback/metadata IPs. No subprocess
19
+ * spawn, no redactEnv. Lower-risk surface, no modal needed.
20
+ *
21
+ * The HTML→markdown conversion is intentionally minimal (~80 LOC inline,
22
+ * no new deps). It strips script/style, converts headings/links/lists
23
+ * to markdown, drops everything else. Good enough for "let me read this
24
+ * doc page" workflows; not suitable for fully-rendered SPA scraping
25
+ * (use browser.* tools for that).
26
+ */
27
+
28
+ const FetchSchema = z.object({
29
+ url: z.string().url().describe('http(s) URL to GET. Private/loopback IPs are blocked.'),
30
+ timeout_ms: z
31
+ .number()
32
+ .int()
33
+ .positive()
34
+ .max(30_000)
35
+ .optional()
36
+ .describe('Abort the request after N ms. Default 15000.'),
37
+ max_bytes: z
38
+ .number()
39
+ .int()
40
+ .positive()
41
+ .max(500_000)
42
+ .optional()
43
+ .describe('Truncate response body to N bytes. Default 50000.'),
44
+ })
45
+
46
+ interface FetchResult {
47
+ ok: boolean
48
+ data?: {
49
+ status: number
50
+ content_type: string | null
51
+ body: string
52
+ truncated: boolean
53
+ final_url: string
54
+ /**
55
+ * v0.20.2: structured signal that the response body is a bot-block,
56
+ * captcha, rate-limit, or other anti-scrape interstitial — even though
57
+ * the HTTP status itself was 200/2xx. Brain should escalate to
58
+ * `browser.navigate` (frozen-prefix says so) instead of trying to read
59
+ * the markdown body.
60
+ */
61
+ blocked?: boolean
62
+ block_reason?: string
63
+ }
64
+ error?: string
65
+ }
66
+
67
+ const BLOCK_PATTERNS: Array<{ reason: string; re: RegExp }> = [
68
+ // Cloudflare anti-bot interstitial
69
+ {
70
+ reason: 'cloudflare',
71
+ re: /just a moment\.\.\.|attention required.*cloudflare|cf-browser-verification|challenges\.cloudflare\.com/i,
72
+ },
73
+ // Google search bot block
74
+ {
75
+ reason: 'google-bot-block',
76
+ re: /unusual traffic from your computer network|sending automated queries|enablejs\?sei=|please show you're not a robot/i,
77
+ },
78
+ // DuckDuckGo captcha / anomaly page
79
+ {
80
+ reason: 'ddg-anomaly',
81
+ re: /anomaly detected|please complete the captcha|duckassist.*captcha/i,
82
+ },
83
+ // Bing / Microsoft account verify
84
+ { reason: 'bing-verify', re: /verify you are not a robot|verify-bing|blockedreason=botnet/i },
85
+ // Wikipedia rate-limit / API throttle
86
+ { reason: 'rate-limit', re: /rate[- ]?limit|too many requests|hit our rate limit|throttled/i },
87
+ // Generic captcha / hCaptcha / reCAPTCHA gates
88
+ { reason: 'captcha', re: /g-recaptcha|h-captcha|recaptcha\/api\.js|hcaptcha\.com\/captcha/i },
89
+ // Akamai / Imperva / Datadome / PerimeterX bot interstitials
90
+ {
91
+ reason: 'bot-block',
92
+ re: /access denied.*reference #|datadome-captcha|perimeterx|bot detection|imperva incident id/i,
93
+ },
94
+ ]
95
+
96
+ export function detectBlock(
97
+ rawHtml: string,
98
+ status: number,
99
+ finalUrl: string,
100
+ ): { reason: string } | null {
101
+ // Status-based: 429, 451, 503 from a search engine domain are usually bot-blocks
102
+ if (status === 429 || status === 451) return { reason: 'rate-limit' }
103
+ if (status === 403) {
104
+ if (/google\.com|bing\.com|duckduckgo\.com|wikipedia\.org/i.test(finalUrl))
105
+ return { reason: 'bot-block' }
106
+ }
107
+ // Body-based pattern match (truncated to first 4KB for speed; interstitials are always near top)
108
+ const head = rawHtml.slice(0, 4096)
109
+ for (const p of BLOCK_PATTERNS) {
110
+ if (p.re.test(head)) return { reason: p.reason }
111
+ }
112
+ return null
113
+ }
114
+
115
+ const PRIVATE_IP_PATTERNS: RegExp[] = [
116
+ /^127\./,
117
+ /^10\./,
118
+ /^172\.(1[6-9]|2[0-9]|3[0-1])\./,
119
+ /^192\.168\./,
120
+ /^169\.254\./,
121
+ /^::1$/,
122
+ /^fe80::/i,
123
+ /^fc00::/i,
124
+ /^fd00::/i,
125
+ ]
126
+
127
+ const PRIVATE_HOST_LITERALS = new Set([
128
+ 'localhost',
129
+ '0.0.0.0',
130
+ '169.254.169.254',
131
+ 'metadata.google.internal',
132
+ ])
133
+
134
+ export function hostIsPrivate(hostname: string): boolean {
135
+ const h = hostname.toLowerCase()
136
+ if (PRIVATE_HOST_LITERALS.has(h)) return true
137
+ if (h.endsWith('.local')) return true
138
+ if (h.endsWith('.internal')) return true
139
+ return PRIVATE_IP_PATTERNS.some(re => re.test(h))
140
+ }
141
+
142
+ export function makeWebFetch(): ToolDef<z.infer<typeof FetchSchema>> {
143
+ return {
144
+ name: 'web.fetch',
145
+ description:
146
+ 'GET an http(s) URL and return its body as markdown (HTML), JSON-pretty (application/json), or plain text. Read-only; no POST/PUT/DELETE. Refuses private/loopback/metadata IPs. For interactive SPAs or pages requiring login, use the browser.* tools instead.',
147
+ searchHint: 'web fetch http https url get download read article docs',
148
+ schema: FetchSchema,
149
+ handler: async args => fetchUrl(args.url, args.timeout_ms ?? 15_000, args.max_bytes ?? 50_000),
150
+ }
151
+ }
152
+
153
+ async function fetchUrl(rawUrl: string, timeoutMs: number, maxBytes: number): Promise<FetchResult> {
154
+ let url: URL
155
+ try {
156
+ url = new URL(rawUrl)
157
+ } catch {
158
+ return { ok: false, error: 'invalid URL' }
159
+ }
160
+ if (url.protocol !== 'http:' && url.protocol !== 'https:') {
161
+ return { ok: false, error: `unsupported protocol: ${url.protocol}` }
162
+ }
163
+ if (hostIsPrivate(url.hostname)) {
164
+ return { ok: false, error: `host blocked (private/loopback/metadata): ${url.hostname}` }
165
+ }
166
+
167
+ const controller = new AbortController()
168
+ const timer = setTimeout(() => controller.abort(), timeoutMs)
169
+ try {
170
+ const res = await fetch(url, {
171
+ method: 'GET',
172
+ redirect: 'follow',
173
+ signal: controller.signal,
174
+ headers: { 'user-agent': 'nebula/web.fetch' },
175
+ })
176
+ const contentType = res.headers.get('content-type')
177
+ // Stream until we hit `maxBytes`, then cancel the reader so the rest of
178
+ // the body never crosses the wire. Without this, a misleading URL pointing
179
+ // at a multi-GB file would still pull the whole thing before truncation,
180
+ // burning bandwidth + memory long before the cap kicks in.
181
+ const { bytes, truncated } = await collectUpToBytes(res.body, maxBytes)
182
+ const text = new TextDecoder('utf-8', { fatal: false }).decode(bytes)
183
+ const block = detectBlock(text, res.status, res.url)
184
+ const body = renderBody(text, contentType)
185
+ return {
186
+ ok: res.ok || block != null,
187
+ data: {
188
+ status: res.status,
189
+ content_type: contentType,
190
+ body,
191
+ truncated,
192
+ final_url: res.url,
193
+ ...(block ? { blocked: true, block_reason: block.reason } : {}),
194
+ },
195
+ ...(res.ok || block ? {} : { error: `http ${res.status}` }),
196
+ }
197
+ } catch (e) {
198
+ const err = e as Error
199
+ if (err.name === 'AbortError') return { ok: false, error: `timeout after ${timeoutMs}ms` }
200
+ return { ok: false, error: err.message }
201
+ } finally {
202
+ clearTimeout(timer)
203
+ }
204
+ }
205
+
206
+ export async function collectUpToBytes(
207
+ body: ReadableStream<Uint8Array> | null,
208
+ maxBytes: number,
209
+ ): Promise<{ bytes: Uint8Array; truncated: boolean }> {
210
+ if (!body) return { bytes: new Uint8Array(), truncated: false }
211
+ const reader = body.getReader()
212
+ const chunks: Uint8Array[] = []
213
+ let received = 0
214
+ let truncated = false
215
+ while (true) {
216
+ const { done, value } = await reader.read()
217
+ if (done) break
218
+ if (!value) continue
219
+ received += value.length
220
+ if (received >= maxBytes) {
221
+ truncated = received > maxBytes
222
+ const fitting = truncated ? value.slice(0, value.length - (received - maxBytes)) : value
223
+ chunks.push(fitting)
224
+ try {
225
+ await reader.cancel()
226
+ } catch {}
227
+ break
228
+ }
229
+ chunks.push(value)
230
+ }
231
+ const total = chunks.reduce((n, c) => n + c.length, 0)
232
+ const out = new Uint8Array(total)
233
+ let offset = 0
234
+ for (const c of chunks) {
235
+ out.set(c, offset)
236
+ offset += c.length
237
+ }
238
+ return { bytes: out, truncated }
239
+ }
240
+
241
+ function renderBody(text: string, contentType: string | null): string {
242
+ const ct = (contentType ?? '').toLowerCase()
243
+ if (ct.includes('application/json') || ct.includes('+json')) {
244
+ try {
245
+ return JSON.stringify(JSON.parse(text), null, 2)
246
+ } catch {
247
+ return text
248
+ }
249
+ }
250
+ if (ct.includes('text/html') || ct.includes('application/xhtml')) {
251
+ return htmlToMarkdown(text)
252
+ }
253
+ return text
254
+ }
255
+
256
+ /**
257
+ * Minimal HTML→markdown. Goal: produce a readable digest of doc/article
258
+ * pages without pulling in turndown or jsdom. Order of operations:
259
+ *
260
+ * 1. Strip <script>, <style>, comments — never useful for the brain.
261
+ * 2. Collapse heading tags to `# … `.
262
+ * 3. Collapse <a href="x">text</a> to `[text](x)`.
263
+ * 4. Add paragraph breaks for <br>, <p>, <li>, <tr>, <h*>.
264
+ * 5. Strip all remaining tags.
265
+ * 6. Decode common HTML entities.
266
+ * 7. Collapse multiple blank lines.
267
+ */
268
+ export function htmlToMarkdown(html: string): string {
269
+ let s = html
270
+ s = s.replace(/<!--[\s\S]*?-->/g, '')
271
+ s = s.replace(/<script\b[^>]*>[\s\S]*?<\/script\s*>/gi, '')
272
+ s = s.replace(/<style\b[^>]*>[\s\S]*?<\/style\s*>/gi, '')
273
+ s = s.replace(/<noscript\b[^>]*>[\s\S]*?<\/noscript\s*>/gi, '')
274
+ s = s.replace(
275
+ /<h([1-6])\b[^>]*>([\s\S]*?)<\/h\1\s*>/gi,
276
+ (_m, level, inner) => `\n\n${'#'.repeat(Number(level))} ${stripTags(inner)}\n\n`,
277
+ )
278
+ s = s.replace(
279
+ /<a\b[^>]*?href\s*=\s*['"]([^'"]+)['"][^>]*>([\s\S]*?)<\/a\s*>/gi,
280
+ (_m, href, inner) => `[${stripTags(inner).trim()}](${href})`,
281
+ )
282
+ s = s.replace(/<li\b[^>]*>([\s\S]*?)<\/li\s*>/gi, (_m, inner) => `\n- ${stripTags(inner)}`)
283
+ s = s.replace(/<br\s*\/?\s*>/gi, '\n')
284
+ s = s.replace(/<\/p\s*>/gi, '\n\n')
285
+ s = s.replace(/<\/tr\s*>/gi, '\n')
286
+ s = s.replace(/<\/td\s*>/gi, ' | ')
287
+ s = stripTags(s)
288
+ s = decodeEntities(s)
289
+ s = s.replace(/[ \t]+/g, ' ')
290
+ s = s.replace(/\n[ \t]+/g, '\n')
291
+ s = s.replace(/\n{3,}/g, '\n\n')
292
+ return s.trim()
293
+ }
294
+
295
+ function stripTags(s: string): string {
296
+ return s.replace(/<[^>]+>/g, '')
297
+ }
298
+
299
+ function decodeEntities(s: string): string {
300
+ return s
301
+ .replace(/&nbsp;/g, ' ')
302
+ .replace(/&amp;/g, '&')
303
+ .replace(/&lt;/g, '<')
304
+ .replace(/&gt;/g, '>')
305
+ .replace(/&quot;/g, '"')
306
+ .replace(/&#39;/g, "'")
307
+ .replace(/&apos;/g, "'")
308
+ .replace(/&#(\d+);/g, (_m, code) => String.fromCharCode(Number(code)))
309
+ .replace(/&#x([0-9a-f]+);/gi, (_m, code) => String.fromCharCode(Number.parseInt(code, 16)))
310
+ }