@xsynaptic/unified-tools 2.0.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1 @@
1
+ import{createHash as e}from"node:crypto";function t(e){return typeof e==`string`?`'${e}'`:new n().serialize(e)}const n=function(){class e{#e=new Map;compare(e,t){let n=typeof e,r=typeof t;return n===`string`&&r===`string`?e.localeCompare(t):n===`number`&&r===`number`?e-t:String.prototype.localeCompare.call(this.serialize(e,!0),this.serialize(t,!0))}serialize(e,t){if(e===null)return`null`;switch(typeof e){case`string`:return t?e:`'${e}'`;case`bigint`:return`${e}n`;case`object`:return this.$object(e);case`function`:return this.$function(e)}return String(e)}serializeObject(e){let t=Object.prototype.toString.call(e);if(t!==`[object Object]`)return this.serializeBuiltInType(t.length<10?`unknown:${t}`:t.slice(8,-1),e);let n=e.constructor,r=n===Object||n===void 0?``:n.name;if(r!==``&&globalThis[r]===n)return this.serializeBuiltInType(r,e);if(typeof e.toJSON==`function`){let t=e.toJSON();return r+(typeof t==`object`&&t?this.$object(t):`(${this.serialize(t)})`)}return this.serializeObjectEntries(r,Object.entries(e))}serializeBuiltInType(e,t){let n=this[`$`+e];if(n)return n.call(this,t);if(typeof t?.entries==`function`)return this.serializeObjectEntries(e,t.entries());throw Error(`Cannot serialize ${e}`)}serializeObjectEntries(e,t){let n=Array.from(t).sort((e,t)=>this.compare(e[0],t[0])),r=`${e}{`;for(let e=0;e<n.length;e++){let[t,i]=n[e];r+=`${this.serialize(t,!0)}:${this.serialize(i)}`,e<n.length-1&&(r+=`,`)}return r+`}`}$object(e){let t=this.#e.get(e);return t===void 0&&(this.#e.set(e,`#${this.#e.size}`),t=this.serializeObject(e),this.#e.set(e,t)),t}$function(e){let t=Function.prototype.toString.call(e);return t.slice(-15)===`[native code] }`?`${e.name||``}()[native]`:`${e.name}(${e.length})${t.replace(/\s*\n\s*/g,``)}`}$Array(e){let t=`[`;for(let n=0;n<e.length;n++)t+=this.serialize(e[n]),n<e.length-1&&(t+=`,`);return t+`]`}$Date(e){try{return`Date(${e.toISOString()})`}catch{return`Date(null)`}}$ArrayBuffer(e){return`ArrayBuffer[${new Uint8Array(e).join(`,`)}]`}$Set(e){return`Set${this.$Array(Array.from(e).sort((e,t)=>this.compare(e,t)))}`}$Map(e){return this.serializeObjectEntries(`Map`,e.entries())}}for(let t of[`Error`,`RegExp`,`URL`])e.prototype[`$`+t]=function(e){return`${t}(${e})`};for(let t of[`Int8Array`,`Uint8Array`,`Uint8ClampedArray`,`Int16Array`,`Uint16Array`,`Int32Array`,`Uint32Array`,`Float32Array`,`Float64Array`])e.prototype[`$`+t]=function(e){return`${t}[${e.join(`,`)}]`};for(let t of[`BigInt64Array`,`BigUint64Array`])e.prototype[`$`+t]=function(e){return`${t}[${e.join(`n,`)}${e.length>0?`n`:``}]`};return e}(),r=globalThis.process?.getBuiltinModule?.(`crypto`)?.hash,i=`sha256`,a=`base64url`;function o(t){if(r)return r(i,t,a);let n=e(i).update(t);return globalThis.process?.versions?.webcontainer?n.digest().toString(a):n.digest(a)}function s(e){return o(t(e))}export{s as t};
package/dist/html-cjk.mjs CHANGED
@@ -1 +1 @@
1
- import e from"rehype-parse";import t from"rehype-sanitize";import n from"rehype-stringify";import{rehypeWrapCjk as r}from"rehype-wrap-cjk";import{unified as i}from"unified";function a(a){return i().use(e,{fragment:!0}).use(r,{langCode:`zh`}).use(t).use(n).processSync(a).toString()}function o(a){return i().use(e,{fragment:!0}).use(r,{langCode:`ja`}).use(t).use(n).processSync(a).toString()}function s(a){return i().use(e,{fragment:!0}).use(r,{langCode:`ko`}).use(t).use(n).processSync(a).toString()}function c({input:a,wrapCjkOptions:o}){let s=i().use(e,{fragment:!0});return o&&s.use(r,o),s.use(t).use(n),s.processSync(a).toString()}export{a as wrapChinese,c as wrapCjk,o as wrapJapanese,s as wrapKorean};
1
+ import{t as e}from"./dist-3QjHu5Qa.mjs";import t from"rehype-parse";import n from"rehype-sanitize";import r from"rehype-stringify";import{rehypeWrapCjk as i}from"rehype-wrap-cjk";import{unified as a}from"unified";const o=new Map;function s(e){return a().use(t,{fragment:!0}).use(i,e).use(n).use(r).freeze()}function c(){return a().use(t,{fragment:!0}).use(n).use(r).freeze()}function l(t){let n=t?e(t):``,r=o.get(n);return r||(r=t?s(t):c(),o.set(n,r)),r}const u={langCode:`zh`},d={langCode:`ja`},f={langCode:`ko`};function p(e){return l(u).processSync(e).toString()}function m(e){return l(d).processSync(e).toString()}function h(e){return l(f).processSync(e).toString()}function g({input:e,wrapCjkOptions:t}){return l(t).processSync(e).toString()}export{p as wrapChinese,g as wrapCjk,m as wrapJapanese,h as wrapKorean};
package/dist/html.d.mts CHANGED
@@ -2,10 +2,6 @@ import { Options } from "rehype-sanitize";
2
2
 
3
3
  //#region src/html.d.ts
4
4
  declare function sanitizeHtml(input: string, options?: Options): string;
5
- declare const stripTags: (input: string, options?: Options) => string;
6
- /**
7
- * Strips GFM-style footnotes from HTML content using a simple regex approach
8
- */
9
- declare function stripFootnotes(input: string): string;
5
+ declare function stripTags(input: string, options?: Options): string;
10
6
  //#endregion
11
- export { sanitizeHtml, stripFootnotes, stripTags };
7
+ export { sanitizeHtml, stripTags };
package/dist/html.mjs CHANGED
@@ -1 +1 @@
1
- import e from"rehype-parse";import t from"rehype-sanitize";import n from"rehype-stringify";import{unified as r}from"unified";function i(i,a){let o=r().use(e,{fragment:!0}).use(t,a).use(n).processSync(i);return String(o)}const a=(e,t)=>i(e,{...t,tagNames:[]});function o(e){let t=e.replaceAll(/<sup><a[^>]*data-footnote-ref[^>]*>.*?<\/a><\/sup>/gi,``);return t=t.replaceAll(/<section[^>]*data-footnotes[^>]*>.*?<\/section>/gis,``),t}export{i as sanitizeHtml,o as stripFootnotes,a as stripTags};
1
+ import{t as e}from"./dist-3QjHu5Qa.mjs";import t from"rehype-parse";import n from"rehype-sanitize";import r from"rehype-stringify";import{unified as i}from"unified";const a=new Map;function o(e){return i().use(t,{fragment:!0}).use(n,e).use(r).freeze()}function s(t){let n=t?e(t):``,r=a.get(n);return r||(r=o(t),a.set(n,r)),r}function c(e,t){let n=s(t);return String(n.processSync(e))}function l(e,t){return c(e,{...t,tagNames:[]})}export{c as sanitizeHtml,l as stripTags};
package/dist/index.d.mts CHANGED
@@ -1,7 +1,7 @@
1
1
  import { wrapChinese, wrapCjk, wrapJapanese, wrapKorean } from "./html-cjk.mjs";
2
- import { sanitizeHtml, stripFootnotes, stripTags } from "./html.mjs";
2
+ import { sanitizeHtml, stripTags } from "./html.mjs";
3
3
  import { transformMarkdown } from "./markdown.mjs";
4
4
  import { sanitizeMdx } from "./mdx.mjs";
5
5
  import { stylizeText } from "./text.mjs";
6
6
  import { defaultSchema } from "rehype-sanitize";
7
- export { defaultSchema, sanitizeHtml, sanitizeMdx, stripFootnotes, stripTags, stylizeText, transformMarkdown, wrapChinese, wrapCjk, wrapJapanese, wrapKorean };
7
+ export { defaultSchema, sanitizeHtml, sanitizeMdx, stripTags, stylizeText, transformMarkdown, wrapChinese, wrapCjk, wrapJapanese, wrapKorean };
package/dist/index.mjs CHANGED
@@ -1 +1 @@
1
- import{wrapChinese as e,wrapCjk as t,wrapJapanese as n,wrapKorean as r}from"./html-cjk.mjs";import{sanitizeHtml as i,stripFootnotes as a,stripTags as o}from"./html.mjs";import{transformMarkdown as s}from"./markdown.mjs";import{sanitizeMdx as c}from"./mdx.mjs";import{stylizeText as l}from"./text.mjs";import{defaultSchema as u}from"rehype-sanitize";export{u as defaultSchema,i as sanitizeHtml,c as sanitizeMdx,a as stripFootnotes,o as stripTags,l as stylizeText,s as transformMarkdown,e as wrapChinese,t as wrapCjk,n as wrapJapanese,r as wrapKorean};
1
+ import{wrapChinese as e,wrapCjk as t,wrapJapanese as n,wrapKorean as r}from"./html-cjk.mjs";import{sanitizeHtml as i,stripTags as a}from"./html.mjs";import{transformMarkdown as o}from"./markdown.mjs";import{sanitizeMdx as s}from"./mdx.mjs";import{stylizeText as c}from"./text.mjs";import{defaultSchema as l}from"rehype-sanitize";export{l as defaultSchema,i as sanitizeHtml,s as sanitizeMdx,a as stripTags,c as stylizeText,o as transformMarkdown,e as wrapChinese,t as wrapCjk,n as wrapJapanese,r as wrapKorean};
@@ -1,12 +1,13 @@
1
1
  import { RehypeWrapCjkOptions } from "rehype-wrap-cjk";
2
2
 
3
3
  //#region src/markdown.d.ts
4
+ interface TransformMarkdownOptions {
5
+ input: string;
6
+ wrapCjkOptions?: Partial<RehypeWrapCjkOptions> | undefined;
7
+ }
4
8
  declare function transformMarkdown({
5
9
  input,
6
10
  wrapCjkOptions
7
- }: {
8
- input: string;
9
- wrapCjkOptions?: Partial<RehypeWrapCjkOptions>;
10
- }): string;
11
+ }: TransformMarkdownOptions): string;
11
12
  //#endregion
12
13
  export { transformMarkdown };
package/dist/markdown.mjs CHANGED
@@ -1 +1 @@
1
- import e from"rehype-sanitize";import t from"rehype-stringify";import{rehypeWrapCjk as n}from"rehype-wrap-cjk";import{unified as r}from"unified";import i from"remark-parse";import a from"remark-rehype";import o from"remark-smartypants";function s({input:s,wrapCjkOptions:c}){let l=r().use(i).use(o).use(a);return c&&l.use(n,c),l.use(e).use(t),l.processSync(s).toString().trim()}export{s as transformMarkdown};
1
+ import{t as e}from"./dist-3QjHu5Qa.mjs";import t from"rehype-sanitize";import n from"rehype-stringify";import{rehypeWrapCjk as r}from"rehype-wrap-cjk";import{unified as i}from"unified";import a from"remark-parse";import o from"remark-rehype";import s from"remark-smartypants";const c=new Map;function l(e){return i().use(a).use(s).use(o).use(r,e).use(t).use(n).freeze()}function u(){return i().use(a).use(s).use(o).use(t).use(n).freeze()}function d(t){let n=t?e(t):``,r=c.get(n);return r||(r=t?l(t):u(),c.set(n,r)),r}function f({input:e,wrapCjkOptions:t}){return d(t).processSync(e).toString().trim()}export{f as transformMarkdown};
package/dist/mdx.mjs CHANGED
@@ -1 +1 @@
1
- import e from"rehype-sanitize";import t from"rehype-stringify";import{unified as n}from"unified";import r from"remark-parse";import i from"remark-rehype";import a from"remark-mdx";function o(o,s){let c=n().use(r).use(a).use(i).use(e,s??{tagNames:[]}).use(t).processSync(o);return String(c).replaceAll(/\s+/g,` `).trim()}export{o as sanitizeMdx};
1
+ import{t as e}from"./dist-3QjHu5Qa.mjs";import t from"rehype-sanitize";import n from"rehype-stringify";import{unified as r}from"unified";import i from"remark-parse";import a from"remark-rehype";import o from"remark-mdx";const s=new Map,c={tagNames:[]};function l(e){return r().use(i).use(o).use(a).use(t,e).use(n).freeze()}function u(t){let n=t??c,r=e(n),i=s.get(r);return i||(i=l(n),s.set(r,i)),i}function d(e,t){return String(u(t).processSync(e)).replaceAll(/\s+/g,` `).trim()}export{d as sanitizeMdx};
package/dist/text.mjs CHANGED
@@ -1 +1 @@
1
- import{retext as e}from"retext";import t from"retext-smartypants";function n(n,r){let i=e().use(t,r).processSync(n);return String(i).trim()}export{n as stylizeText};
1
+ import{t as e}from"./dist-3QjHu5Qa.mjs";import{retext as t}from"retext";import n from"retext-smartypants";const r=new Map;function i(e){return t().use(n,e).freeze()}function a(t){let n=t?e(t):``,a=r.get(n);return a||(a=i(t),r.set(n,a)),a}function o(e,t){return String(a(t).processSync(e)).trim()}export{o as stylizeText};
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@xsynaptic/unified-tools",
3
- "version": "2.0.0",
3
+ "version": "3.0.0",
4
4
  "description": "A common set of tools for transforming and manipulating markup and text",
5
5
  "exports": {
6
6
  ".": {
@@ -45,7 +45,7 @@
45
45
  "rehype-parse": "^9.0.1",
46
46
  "rehype-sanitize": "^6.0.0",
47
47
  "rehype-stringify": "^10.0.1",
48
- "rehype-wrap-cjk": "^1.0.5",
48
+ "rehype-wrap-cjk": "^1.0.9",
49
49
  "remark-mdx": "^3.1.1",
50
50
  "remark-parse": "^11.0.0",
51
51
  "remark-rehype": "^11.1.2",
@@ -58,14 +58,15 @@
58
58
  "@eslint/js": "^9.39.2",
59
59
  "@types/jest": "^30.0.0",
60
60
  "eslint": "^9.39.2",
61
- "eslint-plugin-perfectionist": "^5.1.0",
61
+ "eslint-plugin-perfectionist": "^5.4.0",
62
62
  "eslint-plugin-unicorn": "^62.0.0",
63
- "globals": "^16.5.0",
64
- "prettier": "^3.7.4",
65
- "tsdown": "^0.18.3",
63
+ "globals": "^17.2.0",
64
+ "ohash": "^2.0.11",
65
+ "prettier": "^3.8.1",
66
+ "tsdown": "^0.19.0",
66
67
  "typescript": "^5.9.3",
67
- "typescript-eslint": "^8.51.0",
68
- "vitest": "^4.0.16"
68
+ "typescript-eslint": "^8.54.0",
69
+ "vitest": "^4.0.18"
69
70
  },
70
71
  "packageManager": "pnpm@10.26.2+sha512.0e308ff2005fc7410366f154f625f6631ab2b16b1d2e70238444dd6ae9d630a8482d92a451144debc492416896ed16f7b114a86ec68b8404b2443869e68ffda6"
71
72
  }
@@ -0,0 +1,9 @@
1
+ import { transformMarkdown } from '../markdown.js';
2
+
3
+ describe('transformMarkdown', () => {
4
+ test('converts markdown to sanitized HTML with smartypants', () => {
5
+ expect(transformMarkdown({ input: '"Hello" -- world' })).toBe(
6
+ '<p>\u201CHello\u201D \u2014 world</p>'
7
+ );
8
+ });
9
+ });
@@ -0,0 +1,9 @@
1
+ import { sanitizeMdx } from '../mdx.js';
2
+
3
+ describe('sanitizeMdx', () => {
4
+ test('strips MDX components from content', () => {
5
+ expect(sanitizeMdx('Text with <Component prop="value" /> inside')).toBe(
6
+ 'Text with inside'
7
+ );
8
+ });
9
+ });
@@ -1,22 +1,22 @@
1
1
  import { stripTags } from '../html.js';
2
2
 
3
3
  const sampleText = [
4
- [`No change to plain text`, `No change to plain text`],
5
- [`Emphasis <em>should</em> be removed`, `Emphasis should be removed`],
6
- [
7
- `This <a href="https://example.com">link</a> should be stripped`,
8
- `This link should be stripped`,
9
- ],
10
- [
11
- `The following MDX component should not appear at all: <Img src="./test1.jpg" />`,
12
- `The following MDX component should not appear at all: `,
13
- ],
4
+ [`No change to plain text`, `No change to plain text`],
5
+ [`Emphasis <em>should</em> be removed`, `Emphasis should be removed`],
6
+ [
7
+ `This <a href="https://example.com">link</a> should be stripped`,
8
+ `This link should be stripped`,
9
+ ],
10
+ [
11
+ `The following MDX component should not appear at all: <Img src="./test1.jpg" />`,
12
+ `The following MDX component should not appear at all: `,
13
+ ],
14
14
  ] as const;
15
15
 
16
16
  describe('html tags should be stripped', () => {
17
- for (const [input, output] of sampleText) {
18
- test(input, () => {
19
- expect(stripTags(input)).toEqual(output);
20
- });
21
- }
17
+ for (const [input, output] of sampleText) {
18
+ test(input, () => {
19
+ expect(stripTags(input)).toEqual(output);
20
+ });
21
+ }
22
22
  });
@@ -0,0 +1,9 @@
1
+ import { stylizeText } from '../text.js';
2
+
3
+ describe('stylizeText', () => {
4
+ test('converts straight quotes to curly quotes', () => {
5
+ expect(stylizeText('"Hello" -- world')).toBe(
6
+ '\u201CHello\u201D \u2014 world'
7
+ );
8
+ });
9
+ });
package/src/html-cjk.ts CHANGED
@@ -1,39 +1,64 @@
1
1
  import type { RehypeWrapCjkOptions } from 'rehype-wrap-cjk';
2
2
 
3
+ import { hash } from 'ohash';
3
4
  import rehypeParse from 'rehype-parse';
4
5
  import rehypeSanitize from 'rehype-sanitize';
5
6
  import rehypeStringify from 'rehype-stringify';
6
7
  import { rehypeWrapCjk } from 'rehype-wrap-cjk';
7
8
  import { unified } from 'unified';
8
9
 
9
- export function wrapChinese(input: string): string {
10
- const processor = unified()
10
+ // Cache frozen processors by options hash
11
+ const processorCache = new Map<string, unknown>();
12
+
13
+ // Processor with CJK wrapping
14
+ function createProcessorWithCjk(wrapCjkOptions: Partial<RehypeWrapCjkOptions>) {
15
+ return unified()
11
16
  .use(rehypeParse, { fragment: true })
12
- .use(rehypeWrapCjk, { langCode: 'zh' })
17
+ .use(rehypeWrapCjk, wrapCjkOptions)
13
18
  .use(rehypeSanitize)
14
- .use(rehypeStringify);
15
-
16
- return processor.processSync(input).toString();
19
+ .use(rehypeStringify)
20
+ .freeze();
17
21
  }
18
22
 
19
- export function wrapJapanese(input: string): string {
20
- const processor = unified()
23
+ // Processor without CJK wrapping
24
+ function createProcessorWithoutCjk() {
25
+ return unified()
21
26
  .use(rehypeParse, { fragment: true })
22
- .use(rehypeWrapCjk, { langCode: 'ja' })
23
27
  .use(rehypeSanitize)
24
- .use(rehypeStringify);
28
+ .use(rehypeStringify)
29
+ .freeze();
30
+ }
31
+
32
+ function getProcessor(wrapCjkOptions?: Partial<RehypeWrapCjkOptions>) {
33
+ const cacheKey = wrapCjkOptions ? hash(wrapCjkOptions) : '';
25
34
 
26
- return processor.processSync(input).toString();
35
+ let processor = processorCache.get(cacheKey);
36
+
37
+ if (!processor) {
38
+ processor = wrapCjkOptions
39
+ ? createProcessorWithCjk(wrapCjkOptions)
40
+ : createProcessorWithoutCjk();
41
+ processorCache.set(cacheKey, processor);
42
+ }
43
+
44
+ return processor as ReturnType<typeof createProcessorWithoutCjk>;
27
45
  }
28
46
 
29
- export function wrapKorean(input: string): string {
30
- const processor = unified()
31
- .use(rehypeParse, { fragment: true })
32
- .use(rehypeWrapCjk, { langCode: 'ko' })
33
- .use(rehypeSanitize)
34
- .use(rehypeStringify);
47
+ // Pre-defined options for common language codes
48
+ const zhOptions: Partial<RehypeWrapCjkOptions> = { langCode: 'zh' };
49
+ const jaOptions: Partial<RehypeWrapCjkOptions> = { langCode: 'ja' };
50
+ const koOptions: Partial<RehypeWrapCjkOptions> = { langCode: 'ko' };
35
51
 
36
- return processor.processSync(input).toString();
52
+ export function wrapChinese(input: string): string {
53
+ return getProcessor(zhOptions).processSync(input).toString();
54
+ }
55
+
56
+ export function wrapJapanese(input: string): string {
57
+ return getProcessor(jaOptions).processSync(input).toString();
58
+ }
59
+
60
+ export function wrapKorean(input: string): string {
61
+ return getProcessor(koOptions).processSync(input).toString();
37
62
  }
38
63
 
39
64
  export function wrapCjk({
@@ -43,11 +68,5 @@ export function wrapCjk({
43
68
  input: string;
44
69
  wrapCjkOptions?: Partial<RehypeWrapCjkOptions>;
45
70
  }): string {
46
- const processor = unified().use(rehypeParse, { fragment: true });
47
-
48
- if (wrapCjkOptions) processor.use(rehypeWrapCjk, wrapCjkOptions);
49
-
50
- processor.use(rehypeSanitize).use(rehypeStringify);
51
-
52
- return processor.processSync(input).toString();
71
+ return getProcessor(wrapCjkOptions).processSync(input).toString();
53
72
  }
package/src/html.ts CHANGED
@@ -1,33 +1,48 @@
1
1
  import type { Options as RehypeSanitizeOptions } from 'rehype-sanitize';
2
2
 
3
+ import { hash } from 'ohash';
3
4
  import rehypeParse from 'rehype-parse';
4
5
  import rehypeSanitize from 'rehype-sanitize';
5
6
  import rehypeStringify from 'rehype-stringify';
6
7
  import { unified } from 'unified';
7
8
 
8
- export function sanitizeHtml(input: string, options?: RehypeSanitizeOptions): string {
9
- const processor = unified()
10
- .use(rehypeParse, { fragment: true })
11
- .use(rehypeSanitize, options)
12
- .use(rehypeStringify)
13
- .processSync(input);
9
+ // Cache frozen processors by options hash
10
+ const processorCache = new Map<string, unknown>();
14
11
 
15
- return String(processor);
12
+ function createProcessor(options?: RehypeSanitizeOptions) {
13
+ return unified()
14
+ .use(rehypeParse, { fragment: true })
15
+ .use(rehypeSanitize, options)
16
+ .use(rehypeStringify)
17
+ .freeze();
16
18
  }
17
19
 
18
- // Handy shortcut for when you just want to strip tags from text
19
- export const stripTags = (input: string, options?: RehypeSanitizeOptions): string =>
20
- sanitizeHtml(input, { ...options, tagNames: [] });
20
+ function getProcessor(options?: RehypeSanitizeOptions) {
21
+ const cacheKey = options ? hash(options) : '';
22
+
23
+ let processor = processorCache.get(cacheKey);
21
24
 
22
- /**
23
- * Strips GFM-style footnotes from HTML content using a simple regex approach
24
- */
25
- export function stripFootnotes(input: string): string {
26
- // Remove footnote references (`sup` elements with footnote links)
27
- let result = input.replaceAll(/<sup><a[^>]*data-footnote-ref[^>]*>.*?<\/a><\/sup>/gi, '');
25
+ if (!processor) {
26
+ processor = createProcessor(options);
27
+ processorCache.set(cacheKey, processor);
28
+ }
29
+
30
+ return processor as ReturnType<typeof createProcessor>;
31
+ }
28
32
 
29
- // Remove the entire footnotes section
30
- result = result.replaceAll(/<section[^>]*data-footnotes[^>]*>.*?<\/section>/gis, '');
33
+ export function sanitizeHtml(
34
+ input: string,
35
+ options?: RehypeSanitizeOptions
36
+ ): string {
37
+ const processor = getProcessor(options);
31
38
 
32
- return result;
39
+ return String(processor.processSync(input));
40
+ }
41
+
42
+ // Handy shortcut for when you just want to strip tags from text
43
+ export function stripTags(
44
+ input: string,
45
+ options?: RehypeSanitizeOptions
46
+ ): string {
47
+ return sanitizeHtml(input, { ...options, tagNames: [] });
33
48
  }
package/src/index.ts CHANGED
@@ -1,5 +1,5 @@
1
1
  export { wrapChinese, wrapCjk, wrapJapanese, wrapKorean } from './html-cjk.js';
2
- export { sanitizeHtml, stripFootnotes, stripTags } from './html.js';
2
+ export { sanitizeHtml, stripTags } from './html.js';
3
3
  export { transformMarkdown } from './markdown.js';
4
4
  export { sanitizeMdx } from './mdx.js';
5
5
  export { stylizeText } from './text.js';
package/src/markdown.ts CHANGED
@@ -1,5 +1,6 @@
1
1
  import type { RehypeWrapCjkOptions } from 'rehype-wrap-cjk';
2
2
 
3
+ import { hash } from 'ohash';
3
4
  import rehypeSanitize from 'rehype-sanitize';
4
5
  import rehypeStringify from 'rehype-stringify';
5
6
  import { rehypeWrapCjk } from 'rehype-wrap-cjk';
@@ -8,21 +9,57 @@ import remarkRehype from 'remark-rehype';
8
9
  import remarkSmartyPants from 'remark-smartypants';
9
10
  import { unified } from 'unified';
10
11
 
11
- export function transformMarkdown({
12
- input,
13
- wrapCjkOptions,
14
- }: {
12
+ interface TransformMarkdownOptions {
15
13
  input: string;
16
- wrapCjkOptions?: Partial<RehypeWrapCjkOptions>;
17
- }): string {
18
- const processor = unified()
14
+ wrapCjkOptions?: Partial<RehypeWrapCjkOptions> | undefined;
15
+ }
16
+
17
+ // Cache frozen processors by options hash
18
+ // Using unknown to avoid unified's complex generic types
19
+ const processorCache = new Map<string, unknown>();
20
+
21
+ // Processor with CJK wrapping
22
+ function createProcessorWithCjk(wrapCjkOptions: Partial<RehypeWrapCjkOptions>) {
23
+ return unified()
19
24
  .use(remarkParse)
20
25
  .use(remarkSmartyPants)
21
- .use(remarkRehype);
26
+ .use(remarkRehype)
27
+ .use(rehypeWrapCjk, wrapCjkOptions)
28
+ .use(rehypeSanitize)
29
+ .use(rehypeStringify)
30
+ .freeze();
31
+ }
22
32
 
23
- if (wrapCjkOptions) processor.use(rehypeWrapCjk, wrapCjkOptions);
33
+ // Processor without CJK wrapping
34
+ function createProcessorWithoutCjk() {
35
+ return unified()
36
+ .use(remarkParse)
37
+ .use(remarkSmartyPants)
38
+ .use(remarkRehype)
39
+ .use(rehypeSanitize)
40
+ .use(rehypeStringify)
41
+ .freeze();
42
+ }
43
+
44
+ function getProcessor(wrapCjkOptions?: Partial<RehypeWrapCjkOptions>) {
45
+ // Generate stable cache key from options (empty string for no options)
46
+ const cacheKey = wrapCjkOptions ? hash(wrapCjkOptions) : '';
47
+
48
+ let processor = processorCache.get(cacheKey);
49
+
50
+ if (!processor) {
51
+ processor = wrapCjkOptions
52
+ ? createProcessorWithCjk(wrapCjkOptions)
53
+ : createProcessorWithoutCjk();
54
+ processorCache.set(cacheKey, processor);
55
+ }
24
56
 
25
- processor.use(rehypeSanitize).use(rehypeStringify);
57
+ return processor as ReturnType<typeof createProcessorWithoutCjk>;
58
+ }
26
59
 
27
- return processor.processSync(input).toString().trim();
60
+ export function transformMarkdown({
61
+ input,
62
+ wrapCjkOptions,
63
+ }: TransformMarkdownOptions): string {
64
+ return getProcessor(wrapCjkOptions).processSync(input).toString().trim();
28
65
  }
package/src/mdx.ts CHANGED
@@ -1,5 +1,6 @@
1
1
  import type { Options as RehypeSanitizeOptions } from 'rehype-sanitize';
2
2
 
3
+ import { hash } from 'ohash';
3
4
  import rehypeSanitize from 'rehype-sanitize';
4
5
  import rehypeStringify from 'rehype-stringify';
5
6
  import remarkMdx from 'remark-mdx';
@@ -7,16 +8,41 @@ import remarkParse from 'remark-parse';
7
8
  import remarkRehype from 'remark-rehype';
8
9
  import { unified } from 'unified';
9
10
 
10
- export function sanitizeMdx(input: string, options?: RehypeSanitizeOptions): string {
11
- const processor = unified()
12
- .use(remarkParse)
13
- .use(remarkMdx)
14
- .use(remarkRehype)
15
- .use(rehypeSanitize, options ?? { tagNames: [] }) // Strip all tags
16
- .use(rehypeStringify)
17
- .processSync(input);
18
-
19
- return String(processor)
20
- .replaceAll(/\s+/g, ' ') // Normalize whitespace
21
- .trim();
11
+ // Cache frozen processors by options hash
12
+ const processorCache = new Map<string, unknown>();
13
+
14
+ // Default options for stripping all tags
15
+ const defaultOptions: RehypeSanitizeOptions = { tagNames: [] };
16
+
17
+ function createProcessor(options: RehypeSanitizeOptions) {
18
+ return unified()
19
+ .use(remarkParse)
20
+ .use(remarkMdx)
21
+ .use(remarkRehype)
22
+ .use(rehypeSanitize, options)
23
+ .use(rehypeStringify)
24
+ .freeze();
25
+ }
26
+
27
+ function getProcessor(options?: RehypeSanitizeOptions) {
28
+ const effectiveOptions = options ?? defaultOptions;
29
+ const cacheKey = hash(effectiveOptions);
30
+
31
+ let processor = processorCache.get(cacheKey);
32
+
33
+ if (!processor) {
34
+ processor = createProcessor(effectiveOptions);
35
+ processorCache.set(cacheKey, processor);
36
+ }
37
+
38
+ return processor as ReturnType<typeof createProcessor>;
39
+ }
40
+
41
+ export function sanitizeMdx(
42
+ input: string,
43
+ options?: RehypeSanitizeOptions
44
+ ): string {
45
+ return String(getProcessor(options).processSync(input))
46
+ .replaceAll(/\s+/g, ' ') // Normalize whitespace
47
+ .trim();
22
48
  }
package/src/text.ts CHANGED
@@ -1,10 +1,32 @@
1
1
  import type { Options as RetextSmartypantsOptions } from 'retext-smartypants';
2
2
 
3
+ import { hash } from 'ohash';
3
4
  import { retext } from 'retext';
4
5
  import retextSmartypants from 'retext-smartypants';
5
6
 
6
- export function stylizeText(input: string, options?: RetextSmartypantsOptions): string {
7
- const processor = retext().use(retextSmartypants, options).processSync(input);
7
+ // Cache frozen processors by options hash
8
+ const processorCache = new Map<string, unknown>();
8
9
 
9
- return String(processor).trim();
10
+ function createProcessor(options?: RetextSmartypantsOptions) {
11
+ return retext().use(retextSmartypants, options).freeze();
12
+ }
13
+
14
+ function getProcessor(options?: RetextSmartypantsOptions) {
15
+ const cacheKey = options ? hash(options) : '';
16
+
17
+ let processor = processorCache.get(cacheKey);
18
+
19
+ if (!processor) {
20
+ processor = createProcessor(options);
21
+ processorCache.set(cacheKey, processor);
22
+ }
23
+
24
+ return processor as ReturnType<typeof createProcessor>;
25
+ }
26
+
27
+ export function stylizeText(
28
+ input: string,
29
+ options?: RetextSmartypantsOptions
30
+ ): string {
31
+ return String(getProcessor(options).processSync(input)).trim();
10
32
  }