@isdk/mdast-plus 0.2.3 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/README.cn.md +21 -5
  2. package/README.md +21 -5
  3. package/dist/index.d.mts +41 -3
  4. package/dist/index.d.ts +41 -3
  5. package/dist/index.js +1 -1
  6. package/dist/index.mjs +1 -1
  7. package/docs/README.md +21 -5
  8. package/docs/_media/CONTRIBUTING.md +10 -0
  9. package/docs/_media/README.cn.md +21 -5
  10. package/docs/classes/MdastBasePipeline.md +20 -20
  11. package/docs/classes/MdastPipeline.md +27 -27
  12. package/docs/enumerations/PipelineStage.md +6 -6
  13. package/docs/functions/astCompiler.md +1 -1
  14. package/docs/functions/checkHtmlUrlExists.md +25 -0
  15. package/docs/functions/checkUrlExists.md +25 -0
  16. package/docs/functions/jsonParser.md +1 -1
  17. package/docs/functions/mdast.md +1 -1
  18. package/docs/globals.md +3 -0
  19. package/docs/interfaces/MdastDataOrigin.md +4 -4
  20. package/docs/interfaces/MdastFormat.md +7 -7
  21. package/docs/interfaces/MdastMark.md +3 -3
  22. package/docs/interfaces/MdastPlugin.md +9 -9
  23. package/docs/interfaces/MdastSub.md +3 -3
  24. package/docs/interfaces/MdastSup.md +3 -3
  25. package/docs/interfaces/PipelineRunOptions.md +4 -4
  26. package/docs/interfaces/ReadabilityOptions.md +67 -6
  27. package/docs/interfaces/SmartExcerptOptions.md +43 -0
  28. package/docs/type-aliases/PipelineStageName.md +1 -1
  29. package/docs/variables/DefaultPipelineStage.md +1 -1
  30. package/docs/variables/astFormat.md +1 -1
  31. package/docs/variables/htmlFormat.md +1 -1
  32. package/docs/variables/htmlReadability.md +1 -1
  33. package/docs/variables/htmlReadabilityPlugin.md +1 -1
  34. package/docs/variables/htmlReadabilityPlugins.md +2 -2
  35. package/docs/variables/markdownFormat.md +1 -1
  36. package/docs/variables/restoreReadabilityMetaPlugin.md +8 -2
  37. package/package.json +1 -1
package/README.cn.md CHANGED
@@ -102,12 +102,16 @@ const vfile = await mdast(myInput)
102
102
  .data({ myGlobal: 'value' })
103
103
  // 以数组形式在 'compile' 阶段添加多个插件
104
104
  .use([pluginA, pluginB])
105
- // 或在特定阶段添加一组插件
106
- .useAt('parse', htmlReadabilityPlugins)
105
+ // 或在特定阶段添加一组插件并配置选项
106
+ .useAt('parse', htmlReadabilityPlugins, {
107
+ url: 'https://example.com/article',
108
+ frontmatter: true, // 将元数据注入为 YAML Frontmatter
109
+ sourceLink: true // 在底部添加原文链接
110
+ })
107
111
  .priority(10) // 比默认插件更晚执行
108
- .to('html');
112
+ .to('markdown');
109
113
 
110
- console.log(vfile.value); // 序列化后的 HTML 字符串
114
+ console.log(vfile.value); // 包含 Frontmatter 的序列化 Markdown 字符串
111
115
  ```
112
116
 
113
117
  ### 插件行为
@@ -181,7 +185,19 @@ const result = await mdast('Hello').to('reverse');
181
185
  | `extract-code-meta` | normalize | 从代码块元数据中解析 `title="foo"`。 |
182
186
  | `image-size` | normalize | 从图片 URL 中解析 `#=WxH`。 |
183
187
  | `normalize-inline-styles` | normalize | 标准化 `==mark==`、`~sub~` 和 `^sup^`。 |
184
- | `html-readability` | parse | 使用 Mozilla 的 Readability 从 HTML 中提取主体内容。使用 `htmlReadabilityPlugins` 数组可以简化配置。 |
188
+ | `html-readability` | parse | 使用 Mozilla 的 Readability 从 HTML 中提取主体内容。支持 `frontmatter` 注入、`sourceLink` (原文链接) 页脚以及 `smartExcerpt` 智能摘要管理。使用 `htmlReadabilityPlugins` 数组可以简化配置。 |
189
+
190
+ ### html-readability 选项
191
+
192
+ - `url`: (string) HTML 文档的 URL。
193
+ - `frontmatter`: (boolean | 'yaml' | 'toml') 是否将元数据注入为 Frontmatter。默认值: `false`。
194
+ - `sourceLink`: (boolean) 是否在底部添加原文链接。即使在 `fields` 选项中过滤或重命名了 title/url 字段,该链接仍将根据原始信息正确生成。默认值: `false`。
195
+ - `fields`: (string[] | object) 控制保留哪些元数据字段或如何重命名它们。
196
+ - 如果是数组:作为白名单(例如 `['title', 'excerpt']`)。
197
+ - 如果是对象:将原始键映射到新名称(例如 `{ title: 'headline' }`)。只有映射中存在的键才会被保留(投影)。
198
+ - `smartExcerpt`: (boolean | object) 是否在摘要与正文内容重复或近乎重复时移除摘要。默认值: `true`。
199
+ - `threshold`: (number) 摘要长度与正文长度的比率阈值 (0.0 到 1.0)。默认值: `0.6`。
200
+ - `minContentLength`: (number) 保留摘要所需的正文最小长度。默认值: `300`。
185
201
 
186
202
  ## 贡献
187
203
 
package/README.md CHANGED
@@ -102,12 +102,16 @@ const vfile = await mdast(myInput)
102
102
  .data({ myGlobal: 'value' })
103
103
  // Add multiple plugins as an array at the 'compile' stage
104
104
  .use([pluginA, pluginB])
105
- // Or add a set of plugins at a specific stage
106
- .useAt('parse', htmlReadabilityPlugins)
105
+ // Or add a set of plugins at a specific stage with options
106
+ .useAt('parse', htmlReadabilityPlugins, {
107
+ url: 'https://example.com/article',
108
+ frontmatter: true, // Inject metadata as YAML frontmatter
109
+ sourceLink: true // Append source link at the bottom
110
+ })
107
111
  .priority(10) // Run later than default plugins
108
- .to('html');
112
+ .to('markdown');
109
113
 
110
- console.log(vfile.value); // The serialized HTML string
114
+ console.log(vfile.value); // The serialized Markdown with frontmatter
111
115
  ```
112
116
 
113
117
  ### Plugin Behavior
@@ -181,7 +185,19 @@ Each stage can have one "main" plugin. If a plugin is marked with `main: true`,
181
185
  | `extract-code-meta` | normalize | Parses `title="foo"` from code block meta. |
182
186
  | `image-size` | normalize | Parses `#=WxH` from image URLs. |
183
187
  | `normalize-inline-styles` | normalize | Standardizes `==mark==`, `~sub~`, and `^sup^`. |
184
- | `html-readability` | parse | Uses Mozilla's Readability to extract main content from HTML. Use `htmlReadabilityPlugins` array for easier setup. |
188
+ | `html-readability` | parse | Uses Mozilla's Readability to extract main content from HTML. Supports `frontmatter` injection, `sourceLink` (source link) footer, and `smartExcerpt` for intelligent summary management. Use `htmlReadabilityPlugins` array for easier setup. |
189
+
190
+ ### html-readability Options
191
+
192
+ - `url`: (string) The URL of the HTML document.
193
+ - `frontmatter`: (boolean | 'yaml' | 'toml') Whether to inject metadata as frontmatter. Default: `false`.
194
+ - `sourceLink`: (boolean) Whether to append source link at the bottom. The link will be generated based on the original title/URL even if they are filtered or renamed in the `fields` option. Default: `false`.
195
+ - `fields`: (string[] | object) Control which metadata fields are kept or how they are renamed.
196
+ - If an array: acts as an allowlist (e.g., `['title', 'excerpt']`).
197
+ - If an object: maps original keys to new names (e.g., `{ title: 'headline' }`). Only keys in the map are kept (Projection).
198
+ - `smartExcerpt`: (boolean | object) Whether to remove the excerpt if it is a duplicate or near-duplicate of the main content. Default: `true`.
199
+ - `threshold`: (number) The ratio of excerpt length to content length (0.0 to 1.0). Default: `0.6`.
200
+ - `minContentLength`: (number) Minimum length of the main content required to keep the excerpt. Default: `300`.
185
201
 
186
202
  ## Contributing
187
203
 
package/dist/index.d.mts CHANGED
@@ -1741,12 +1741,48 @@ declare function jsonParser(this: any): void;
1741
1741
  */
1742
1742
  declare const astFormat: MdastFormat;
1743
1743
 
1744
+ interface SmartExcerptOptions {
1745
+ /**
1746
+ * The threshold ratio of excerpt length to content length.
1747
+ * If (excerptLength / contentLength) > threshold, the excerpt is considered redundant.
1748
+ * @default 0.6
1749
+ */
1750
+ threshold?: number;
1751
+ /**
1752
+ * The minimum length of the main content required to keep the excerpt.
1753
+ * If content length is less than this value, the excerpt is considered redundant (if it is contained in the content).
1754
+ * @default 300
1755
+ */
1756
+ minContentLength?: number;
1757
+ }
1744
1758
  interface ReadabilityOptions {
1745
1759
  url?: string;
1746
1760
  readability?: Record<string, any> | false;
1747
1761
  jsdom?: Record<string, any>;
1748
1762
  hast?: Record<string, any>;
1749
1763
  'rehype-parse'?: Record<string, any>;
1764
+ /**
1765
+ * Whether to inject metadata as frontmatter.
1766
+ * @default false
1767
+ */
1768
+ frontmatter?: boolean | 'yaml' | 'toml';
1769
+ /**
1770
+ * Whether to append source link at the bottom.
1771
+ * @default false
1772
+ */
1773
+ sourceLink?: boolean;
1774
+ /**
1775
+ * Whether to remove the excerpt if it is a duplicate or near-duplicate of the main content.
1776
+ * Useful when the content is short or the excerpt is just a subset of the content.
1777
+ * @default true
1778
+ */
1779
+ smartExcerpt?: boolean | SmartExcerptOptions;
1780
+ /**
1781
+ * Control the fields and names in metadata.
1782
+ * - If an array of strings, it acts as an allowlist (only these fields are kept).
1783
+ * - If an object, it maps original field names to new names. Only the keys present in the map are kept (Projection).
1784
+ */
1785
+ fields?: string[] | Record<string, string>;
1750
1786
  }
1751
1787
  /**
1752
1788
  * A unified/rehype plugin that uses Mozilla's Readability to parse the input HTML.
@@ -1766,7 +1802,7 @@ declare const htmlReadabilityPlugin: {
1766
1802
  */
1767
1803
  declare const restoreReadabilityMetaPlugin: {
1768
1804
  name: string;
1769
- plugin: () => (tree: any, file: any) => void;
1805
+ plugin: (options?: ReadabilityOptions) => (tree: any, file: any) => void;
1770
1806
  stage: PipelineStage;
1771
1807
  after: string;
1772
1808
  };
@@ -1780,9 +1816,11 @@ declare const htmlReadabilityPlugins: ({
1780
1816
  main: boolean;
1781
1817
  } | {
1782
1818
  name: string;
1783
- plugin: () => (tree: any, file: any) => void;
1819
+ plugin: (options?: ReadabilityOptions) => (tree: any, file: any) => void;
1784
1820
  stage: PipelineStage;
1785
1821
  after: string;
1786
1822
  })[];
1823
+ declare function checkUrlExists(tree: any, url: string): boolean;
1824
+ declare function checkHtmlUrlExists(tree: any, url: string): boolean;
1787
1825
 
1788
- export { DefaultPipelineStage, MdastBasePipeline, type MdastDataOrigin, type MdastFormat, type MdastMark, MdastPipeline, type MdastPlugin, type MdastSub, type MdastSup, type PipelineRunOptions, PipelineStage, type PipelineStageName, type ReadabilityOptions, astCompiler, astFormat, htmlFormat, htmlReadability, htmlReadabilityPlugin, htmlReadabilityPlugins, jsonParser, markdownFormat, mdast, restoreReadabilityMetaPlugin };
1826
+ export { DefaultPipelineStage, MdastBasePipeline, type MdastDataOrigin, type MdastFormat, type MdastMark, MdastPipeline, type MdastPlugin, type MdastSub, type MdastSup, type PipelineRunOptions, PipelineStage, type PipelineStageName, type ReadabilityOptions, type SmartExcerptOptions, astCompiler, astFormat, checkHtmlUrlExists, checkUrlExists, htmlFormat, htmlReadability, htmlReadabilityPlugin, htmlReadabilityPlugins, jsonParser, markdownFormat, mdast, restoreReadabilityMetaPlugin };
package/dist/index.d.ts CHANGED
@@ -1741,12 +1741,48 @@ declare function jsonParser(this: any): void;
1741
1741
  */
1742
1742
  declare const astFormat: MdastFormat;
1743
1743
 
1744
+ interface SmartExcerptOptions {
1745
+ /**
1746
+ * The threshold ratio of excerpt length to content length.
1747
+ * If (excerptLength / contentLength) > threshold, the excerpt is considered redundant.
1748
+ * @default 0.6
1749
+ */
1750
+ threshold?: number;
1751
+ /**
1752
+ * The minimum length of the main content required to keep the excerpt.
1753
+ * If content length is less than this value, the excerpt is considered redundant (if it is contained in the content).
1754
+ * @default 300
1755
+ */
1756
+ minContentLength?: number;
1757
+ }
1744
1758
  interface ReadabilityOptions {
1745
1759
  url?: string;
1746
1760
  readability?: Record<string, any> | false;
1747
1761
  jsdom?: Record<string, any>;
1748
1762
  hast?: Record<string, any>;
1749
1763
  'rehype-parse'?: Record<string, any>;
1764
+ /**
1765
+ * Whether to inject metadata as frontmatter.
1766
+ * @default false
1767
+ */
1768
+ frontmatter?: boolean | 'yaml' | 'toml';
1769
+ /**
1770
+ * Whether to append source link at the bottom.
1771
+ * @default false
1772
+ */
1773
+ sourceLink?: boolean;
1774
+ /**
1775
+ * Whether to remove the excerpt if it is a duplicate or near-duplicate of the main content.
1776
+ * Useful when the content is short or the excerpt is just a subset of the content.
1777
+ * @default true
1778
+ */
1779
+ smartExcerpt?: boolean | SmartExcerptOptions;
1780
+ /**
1781
+ * Control the fields and names in metadata.
1782
+ * - If an array of strings, it acts as an allowlist (only these fields are kept).
1783
+ * - If an object, it maps original field names to new names. Only the keys present in the map are kept (Projection).
1784
+ */
1785
+ fields?: string[] | Record<string, string>;
1750
1786
  }
1751
1787
  /**
1752
1788
  * A unified/rehype plugin that uses Mozilla's Readability to parse the input HTML.
@@ -1766,7 +1802,7 @@ declare const htmlReadabilityPlugin: {
1766
1802
  */
1767
1803
  declare const restoreReadabilityMetaPlugin: {
1768
1804
  name: string;
1769
- plugin: () => (tree: any, file: any) => void;
1805
+ plugin: (options?: ReadabilityOptions) => (tree: any, file: any) => void;
1770
1806
  stage: PipelineStage;
1771
1807
  after: string;
1772
1808
  };
@@ -1780,9 +1816,11 @@ declare const htmlReadabilityPlugins: ({
1780
1816
  main: boolean;
1781
1817
  } | {
1782
1818
  name: string;
1783
- plugin: () => (tree: any, file: any) => void;
1819
+ plugin: (options?: ReadabilityOptions) => (tree: any, file: any) => void;
1784
1820
  stage: PipelineStage;
1785
1821
  after: string;
1786
1822
  })[];
1823
+ declare function checkUrlExists(tree: any, url: string): boolean;
1824
+ declare function checkHtmlUrlExists(tree: any, url: string): boolean;
1787
1825
 
1788
- export { DefaultPipelineStage, MdastBasePipeline, type MdastDataOrigin, type MdastFormat, type MdastMark, MdastPipeline, type MdastPlugin, type MdastSub, type MdastSup, type PipelineRunOptions, PipelineStage, type PipelineStageName, type ReadabilityOptions, astCompiler, astFormat, htmlFormat, htmlReadability, htmlReadabilityPlugin, htmlReadabilityPlugins, jsonParser, markdownFormat, mdast, restoreReadabilityMetaPlugin };
1826
+ export { DefaultPipelineStage, MdastBasePipeline, type MdastDataOrigin, type MdastFormat, type MdastMark, MdastPipeline, type MdastPlugin, type MdastSub, type MdastSup, type PipelineRunOptions, PipelineStage, type PipelineStageName, type ReadabilityOptions, type SmartExcerptOptions, astCompiler, astFormat, checkHtmlUrlExists, checkUrlExists, htmlFormat, htmlReadability, htmlReadabilityPlugin, htmlReadabilityPlugins, jsonParser, markdownFormat, mdast, restoreReadabilityMetaPlugin };
package/dist/index.js CHANGED
@@ -1 +1 @@
1
- "use strict";var t,e=Object.create,r=Object.defineProperty,i=Object.getOwnPropertyDescriptor,n=Object.getOwnPropertyNames,s=Object.getPrototypeOf,o=Object.prototype.hasOwnProperty,a=(t,e,s,a)=>{if(e&&"object"==typeof e||"function"==typeof e)for(let l of n(e))o.call(t,l)||l===s||r(t,l,{get:()=>e[l],enumerable:!(a=i(e,l))||a.enumerable});return t},l=(t,i,n)=>(n=null!=t?e(s(t)):{},a(!i&&t&&t.__esModule?n:r(n,"default",{value:t,enumerable:!0}),t)),u={};((t,e)=>{for(var i in e)r(t,i,{get:e[i],enumerable:!0})})(u,{DefaultPipelineStage:()=>p,MdastBasePipeline:()=>K,MdastPipeline:()=>U,PipelineStage:()=>c,astCompiler:()=>V,astFormat:()=>G,htmlFormat:()=>L,htmlReadability:()=>Z,htmlReadabilityPlugin:()=>tt,htmlReadabilityPlugins:()=>rt,jsonParser:()=>_,markdownFormat:()=>R,mdast:()=>W,restoreReadabilityMetaPlugin:()=>et}),module.exports=(t=u,a(r({},"__esModule",{value:!0}),t));var c=(t=>(t[t.parse=0]="parse",t[t.normalize=100]="normalize",t[t.compile=200]="compile",t[t.finalize=300]="finalize",t[t.stringify=400]="stringify",t))(c||{}),p=200,f=require("unified"),m=require("vfile"),g=l(require("remark-parse")),h=l(require("remark-stringify")),d=l(require("remark-gfm")),y=l(require("remark-directive")),b=l(require("remark-math")),v=l(require("remark-frontmatter")),w=require("unist-util-visit");function k(t,e){return{type:t,children:e,data:{hName:t}}}var q={mark:(t,e,r)=>"=="+r.containerPhrasing(t,{before:"==",after:"=="})+"==",sub:(t,e,r)=>"~"+r.containerPhrasing(t,{before:"~",after:"~"})+"~",sup:(t,e,r)=>"^"+r.containerPhrasing(t,{before:"^",after:"^"})+"^"},j={plugin:()=>t=>{!function(t){(0,w.visit)(t,"text",(t,e,r)=>{if(!r||void 0===e)return;const i=t.value;let n=0;const s=[];let o=!1;const a=/(==[^=]+==|~[^~]+~|\^[^^]+\^)/g;let l;for(;null!==(l=a.exec(i));){o=!0;const t=l[0],e=l.index;e>n&&s.push({type:"text",value:i.slice(n,e)});let r="mark",u="";t.startsWith("==")?(r="mark",u=t.slice(2,-2)):t.startsWith("~")?(r="sub",u=t.slice(1,-1)):t.startsWith("^")&&(r="sup",u=t.slice(1,-1)),s.push(k(r,[{type:"text",value:u}])),n=a.lastIndex}return o?(n<i.length&&s.push({type:"text",value:i.slice(n)}),r.children.splice(e,1,...s),e+s.length):void 0})}(t)},stage:100},P=require("unist-util-visit"),M={error:"danger",warn:"warning",success:"tip",important:"important",caution:"caution",note:"note"},x={plugin:()=>async t=>{(0,P.visit)(t,["containerDirective","leafDirective","textDirective"],t=>{const e=t,r=e.name.toLowerCase();if(e.name=M[r]||r,e.children&&e.children.length>0){const t=e.children[0];if(t.data?.directiveLabel||"directiveLabel"===t.type){const r=t;let i="";(0,P.visit)(r,"text",t=>{i+=t.value}),i&&!e.attributes?.title&&(e.attributes=e.attributes||{},e.attributes.title=i.trim()),e.children.shift()}}e.attributes?.title&&(e.attributes.title=String(e.attributes.title).trim()),e.data=e.data||{},e.data.hName=e.data.hName||("containerDirective"===e.type?"div":"span"),e.data.hProperties={...e.data.hProperties||{},...e.attributes,className:[e.name,e.data.hProperties?.className].filter(Boolean).join(" ")}})},stage:100,order:10},S=require("unist-util-visit"),T={plugin:()=>async t=>{(0,S.visit)(t,"tableCell",t=>{if(t.data){const{rowspan:e,colspan:r}=t.data;t.data.hProperties=t.data.hProperties||{},void 0!==e&&(t.data.hProperties.rowSpan=e,delete t.data.rowspan),void 0!==r&&(t.data.hProperties.colSpan=r,delete t.data.colspan)}})},stage:100,order:20},F=require("unist-util-visit"),N=require("shell-quote");var O={plugin:()=>async t=>{(0,F.visit)(t,"code",t=>{if(t.meta){const e=function(t){const e={},r=(0,N.parse)(t);for(const t of r)if("string"==typeof t){const r=t.split("=",2);2===r.length?e[r[0]]=r[1]:e[t]="true"}return e}(t.meta),r=t.data=t.data||{};e.title&&(r.title=e.title),e.filename&&(r.filename=e.filename),r.kv={...r.kv||{},...e}}})},stage:100,order:30},A=require("unist-util-visit"),D={plugin:()=>async t=>{(0,A.visit)(t,"image",t=>{const e=t.data=t.data||{},r=e.hProperties=e.hProperties||{},i=/[#?&](?:width=([0-9]+))?(?:&?height=([0-9]+))?(?:=([0-9]+)x([0-9]+))?$/,n=t.url.match(i);if(n){const e=n[1]||n[3],s=n[2]||n[4];e&&!r.width&&(r.width=parseInt(e,10)),s&&!r.height&&(r.height=parseInt(s,10)),t.url=t.url.replace(i,"")}e.width&&!r.width&&(r.width=e.width),e.height&&!r.height&&(r.height=e.height)})},stage:100,order:40},z=[{plugin:g.default,stage:0},{plugin:d.default,options:[{singleTilde:!1}],stage:0},{plugin:y.default,stage:0},{plugin:b.default,stage:0},{plugin:v.default,options:[["yaml","toml"]],stage:0},x,T,O,D,j],E=[{plugin:h.default,options:[{handlers:q}],stage:400},{plugin:d.default,options:[{singleTilde:!1}],stage:400},{plugin:y.default,stage:400},{plugin:b.default,stage:400},{plugin:v.default,options:[["yaml","toml"]],stage:400}];E.forEach(t=>{t.plugin===h.default?t.order=100:t.order=10});var R={id:"markdown",title:"Markdown (GFM + Directives)",extensions:["md","markdown","mdown","mkdn"],mediaTypes:["text/markdown"],input:z,output:E},$=l(require("rehype-parse")),B=l(require("rehype-remark")),C=l(require("remark-rehype")),H=l(require("rehype-sanitize")),I=l(require("rehype-stringify")),L={id:"html",title:"HTML",extensions:["html","htm"],mediaTypes:["text/html"],input:[{name:"rehype-parse",plugin:$.default,stage:0},{name:"rehype-remark",plugin:B.default,options:[{handlers:{mark:(t,e)=>{const r={type:"mark",children:t.all(e)};return t.patch(e,r),r},sub:(t,e)=>{const r={type:"sub",children:t.all(e)};return t.patch(e,r),r},sup:(t,e)=>{const r={type:"sup",children:t.all(e)};return t.patch(e,r),r}}}],stage:0}],output:[{plugin:C.default,stage:300,order:10},{plugin:H.default,options:[{...H.defaultSchema,tagNames:[...H.defaultSchema.tagNames||[],"mark","sub","sup"],attributes:{...H.defaultSchema.attributes,"*":[...H.defaultSchema.attributes?.["*"]||[],"className","id","style"],td:[...H.defaultSchema.attributes?.td||[],"rowSpan","colSpan","rowspan","colspan"],th:[...H.defaultSchema.attributes?.th||[],"rowSpan","colSpan","rowspan","colspan"],img:[...H.defaultSchema.attributes?.img||[],"width","height"]}}],stage:300,order:20},{plugin:I.default,stage:400}]};function V(){this.Compiler=t=>t}function _(){this.Parser=t=>JSON.parse(t)}var G={id:"ast",title:"MDAST",input:[{plugin:_,stage:0},x,T,O,D,j],output:[{plugin:V,options:[],stage:400}]};function J(t){return"object"==typeof t&&null!==t&&"string"==typeof t.type}var Q=class t{constructor(t){this.queue=[],this._data={},this.input=t}static register(t){this.registry.set(t.id,t)}static getFormat(t){return this.registry.get(t)}data(t,e){return"string"==typeof t?this._data[t]=e:Object.assign(this._data,t),this}getFormat(t){return this.constructor.getFormat(t)}resolveFormat(t){if("string"==typeof t){const e=this.getFormat(t);if(!e)throw new Error(`[MdastPlus] Format '${t}' is not registered.`);return e}return t}toRuntimeEntry(t,e,r){let i=e;void 0!==t.stage&&(i="string"==typeof t.stage?c[t.stage]??e:t.stage);let n=t.options||[];const s=t.name||t.plugin.name;if(r&&s&&s in r){const e=r[s];"object"!=typeof e||null===e||Array.isArray(e)?n=Array.isArray(e)?e:[e]:("main"in e&&(t.main=!!e.main),"before"in e&&(t.before=e.before),"after"in e&&(t.after=e.after),n=[e])}return{name:s,plugin:t.plugin,options:n,stage:i,order:t.order||0,main:t.main,before:t.before,after:t.after}}ensureInputPlugins(e,r,i=400){const n=e.some(t=>0===(t.stage??p)),s=J(this.input);if(!n){let n=[];if(s){const e=t.getFormat("ast");e&&e.input&&(n=e.input)}else{const e=t.getFormat("markdown");e&&e.input&&(n=e.input)}for(const t of n){const n=this.toRuntimeEntry(t,0,r);(n.stage??p)<=i&&e.push(n)}}}from(t,e){const r=this.resolveFormat(t);if(!r.input||0===r.input.length)throw new Error(`[MdastPlus] Format '${r.id}' does not support input.`);for(const t of r.input)this.queue.push(this.toRuntimeEntry(t,0,e));return this}resolveRunQueue(t,e,r,i){let n=[];if(n=e?this.queue.map(t=>this.toRuntimeEntry(t,t.stage??p,e)):[...this.queue],void 0!==r){const t=r;n=n.filter(e=>(e.stage??p)<=t);const s=n.findIndex(e=>(e.stage??p)===t),o=-1!==s?s+(i??0):n.length-1,a=n.slice(0,o+1),l=n.filter(t=>t.main&&!a.includes(t));n=a.concat(l),this.ensureInputPlugins(n,e,t),n.push({plugin:V,options:[],stage:400,order:0})}else if(this.ensureInputPlugins(n,e,400),t.output)for(const r of t.output)n.push(this.toRuntimeEntry(r,300,e));return n}async to(t,e){const r=this.resolveFormat(t);if(!r.output)throw new Error(`[MdastPlus] Format '${r.id}' does not support output.`);let i,n,s;if(e){const t=e;"overrides"in t||"stage"in t||"stopAtIndex"in t?(i=t.overrides,n="string"==typeof t.stage?c[t.stage]:t.stage,s=t.stopAtIndex):i=e}const o=this.resolveRunQueue(r,i,n,s),a=this.assembleProcessor(o);if(J(this.input)){const t=await a.run(this.input),e=a.stringify(t),r=new m.VFile;return"string"==typeof e||Buffer.isBuffer(e)?r.value=e:r.result=e,r}return a.process(this.input)}use(t,...e){return this.useAt("compile",t,...e)}normalizePluggable(t,e=[]){const r=[];if(Array.isArray(t)){let i=!1;if(t.length>0&&"function"==typeof t[0]){t.slice(1).some(t=>!function(t){if("function"==typeof t)return!0;if(Array.isArray(t))return!0;if("object"==typeof t&&null!==t){if("plugin"in t&&"function"==typeof t.plugin)return!0;if("plugins"in t)return!0}return!1}(t))&&(i=!0)}if(i){const[e,...i]=t;r.push({plugin:e,options:i})}else for(const i of t)r.push(...this.normalizePluggable(i,e))}else if("function"==typeof t)r.push({plugin:t,options:e});else if("object"==typeof t&&null!==t)if("plugin"in t&&"function"==typeof t.plugin){const i=t,n=e.length>0?e:i.options||[];r.push({...i,options:n})}else if("plugins"in t){const e=t;e.plugins&&r.push(...this.normalizePluggable(e.plugins))}return r}useAt(t,e,...r){let i,n,s;"string"==typeof t||"number"==typeof t?(i=("string"==typeof t?c[t]:t)??p,n=e,s=r):(i=void 0,n=t,s=[e,...r].filter(t=>void 0!==t));const o=this.normalizePluggable(n,s);for(const t of o){const e=i??p;this.queue.push(this.toRuntimeEntry(t,e))}return this}priority(t){const e=this.queue[this.queue.length-1];return e&&(e.order=t),this}configure(t,...e){for(let r=this.queue.length-1;r>=0;r--){const i=this.queue[r];if((i.name||i.plugin.name)===t){i.options=e;break}}return this}assembleProcessor(t){const e={};for(const r of t){const t=r.stage??p;e[t]||(e[t]=[]),e[t].push(r)}const r=[],i=Object.keys(e).map(Number).sort((t,e)=>t-e);for(const t of i){const i=e[t].sort((t,e)=>(t.order||0)-(e.order||0)),n=i.findIndex(t=>t.main);if(-1!==n){const t=i[n];!1===t.options?.[0]?console.warn(`Main Plugin "${t.name}" is disabled. Skipping.`):0!==n&&(i.splice(n,1),i[0]=t)}let s=!0,o=0;for(;s&&o<i.length;){s=!1,o++;for(let t=0;t<i.length;t++){const e=i[t];if(e.after){const r=i.findIndex(t=>t.name===e.after);if(-1!==r&&r>t){i.splice(t,1),i.splice(r,0,e),s=!0;break}}if(e.before){const r=i.findIndex(t=>t.name===e.before);if(-1!==r&&r<t){i.splice(t,1),i.splice(r,0,e),s=!0;break}}}}r.push(...i)}const n=(0,f.unified)();Object.keys(this._data).length>0&&n.data(this._data);for(const t of r)n.use(t.plugin,...t.options||[]);return n}};Q.registry=new Map;var K=Q,U=class extends K{async toMarkdown(){const t=await this.to("markdown");return String(t)}toMarkdownVFile(){return this.to("markdown")}async toHtml(){const t=await this.to("html");return String(t)}toHtmlVFile(){return this.to("html")}async toAst(t){return(await this.to("ast",t)).result}toHTML(){return this.toHtml()}toAST(t){return this.toAst(t)}};function W(t){return new U(t)}U.register(R),U.register(L),U.register(G);var X=require("hast-util-from-html"),Y=require("hast-util-from-dom"),Z=function(t){const{readability:e,jsdom:r,hast:i,url:n}=t||{};this.parser=function(t,s){if(!1===e)return(0,X.fromHtml)(t,{fragment:!0,...i});let o,a;try{o=require("jsdom").JSDOM;a=require("@mozilla/readability").Readability}catch(t){throw new Error("[html-readability] Dependency missing. Please install 'jsdom' and '@mozilla/readability'.")}const l=new a(new o(t,{url:n,pretendToBeVisual:!0,...r,includeNodeLocations:!0}).window.document,{maxElemsToParse:1e5,nbTopCandidates:5,charThreshold:500,keepClasses:!0,...e,serializer:t=>t}).parse();if(!l||!l.content)return(0,X.fromHtml)(t,{fragment:!0,...i});const u=l.content;let c=(0,Y.fromDom)(u,{afterTransform:i?.afterTransform});const p=!1!==i?.fragment,f={...l};return delete f.content,delete f.textContent,s&&(s.data=s.data||{},s.data.readability=f),c&&(c=p?{type:"root",children:[c]}:{type:"root",children:[{type:"element",tagName:"html",properties:{},children:[{type:"element",tagName:"head",properties:{},children:[]},{type:"element",tagName:"body",properties:{},children:[c]}]}]},c.data=c.data||{},c.data.readability=f),c}},tt={name:"readability",plugin:Z,stage:0,main:!0},et={name:"restore-readability-meta",plugin:()=>(t,e)=>{e.data?.readability&&(t.data=t.data||{},t.data.readability=e.data.readability)},stage:0,after:"rehype-remark"},rt=[tt,et];
1
+ "use strict";var t,e=Object.create,r=Object.defineProperty,i=Object.getOwnPropertyDescriptor,n=Object.getOwnPropertyNames,s=Object.getPrototypeOf,o=Object.prototype.hasOwnProperty,a=(t,e,s,a)=>{if(e&&"object"==typeof e||"function"==typeof e)for(let l of n(e))o.call(t,l)||l===s||r(t,l,{get:()=>e[l],enumerable:!(a=i(e,l))||a.enumerable});return t},l=(t,i,n)=>(n=null!=t?e(s(t)):{},a(!i&&t&&t.__esModule?n:r(n,"default",{value:t,enumerable:!0}),t)),u={};((t,e)=>{for(var i in e)r(t,i,{get:e[i],enumerable:!0})})(u,{DefaultPipelineStage:()=>p,MdastBasePipeline:()=>Q,MdastPipeline:()=>K,PipelineStage:()=>c,astCompiler:()=>U,astFormat:()=>_,checkHtmlUrlExists:()=>lt,checkUrlExists:()=>at,htmlFormat:()=>I,htmlReadability:()=>rt,htmlReadabilityPlugin:()=>it,htmlReadabilityPlugins:()=>st,jsonParser:()=>V,markdownFormat:()=>R,mdast:()=>W,restoreReadabilityMetaPlugin:()=>nt}),module.exports=(t=u,a(r({},"__esModule",{value:!0}),t));var c=(t=>(t[t.parse=0]="parse",t[t.normalize=100]="normalize",t[t.compile=200]="compile",t[t.finalize=300]="finalize",t[t.stringify=400]="stringify",t))(c||{}),p=200,f=require("unified"),m=require("vfile"),h=l(require("remark-parse")),d=l(require("remark-stringify")),g=l(require("remark-gfm")),y=l(require("remark-directive")),b=l(require("remark-math")),v=l(require("remark-frontmatter")),k=require("unist-util-visit");function w(t,e){return{type:t,children:e,data:{hName:t}}}var q={mark:(t,e,r)=>"=="+r.containerPhrasing(t,{before:"==",after:"=="})+"==",sub:(t,e,r)=>"~"+r.containerPhrasing(t,{before:"~",after:"~"})+"~",sup:(t,e,r)=>"^"+r.containerPhrasing(t,{before:"^",after:"^"})+"^"},x={plugin:()=>t=>{!function(t){(0,k.visit)(t,"text",(t,e,r)=>{if(!r||void 0===e)return;const i=t.value;let n=0;const s=[];let o=!1;const a=/(==[^=]+==|~[^~]+~|\^[^^]+\^)/g;let l;for(;null!==(l=a.exec(i));){o=!0;const t=l[0],e=l.index;e>n&&s.push({type:"text",value:i.slice(n,e)});let r="mark",u="";t.startsWith("==")?(r="mark",u=t.slice(2,-2)):t.startsWith("~")?(r="sub",u=t.slice(1,-1)):t.startsWith("^")&&(r="sup",u=t.slice(1,-1)),s.push(w(r,[{type:"text",value:u}])),n=a.lastIndex}return o?(n<i.length&&s.push({type:"text",value:i.slice(n)}),r.children.splice(e,1,...s),e+s.length):void 0})}(t)},stage:100},j=require("unist-util-visit"),P={error:"danger",warn:"warning",success:"tip",important:"important",caution:"caution",note:"note"},M={plugin:()=>async t=>{(0,j.visit)(t,["containerDirective","leafDirective","textDirective"],t=>{const e=t,r=e.name.toLowerCase();if(e.name=P[r]||r,e.children&&e.children.length>0){const t=e.children[0];if(t.data?.directiveLabel||"directiveLabel"===t.type){const r=t;let i="";(0,j.visit)(r,"text",t=>{i+=t.value}),i&&!e.attributes?.title&&(e.attributes=e.attributes||{},e.attributes.title=i.trim()),e.children.shift()}}e.attributes?.title&&(e.attributes.title=String(e.attributes.title).trim()),e.data=e.data||{},e.data.hName=e.data.hName||("containerDirective"===e.type?"div":"span"),e.data.hProperties={...e.data.hProperties||{},...e.attributes,className:[e.name,e.data.hProperties?.className].filter(Boolean).join(" ")}})},stage:100,order:10},S=require("unist-util-visit"),N={plugin:()=>async t=>{(0,S.visit)(t,"tableCell",t=>{if(t.data){const{rowspan:e,colspan:r}=t.data;t.data.hProperties=t.data.hProperties||{},void 0!==e&&(t.data.hProperties.rowSpan=e,delete t.data.rowspan),void 0!==r&&(t.data.hProperties.colSpan=r,delete t.data.colspan)}})},stage:100,order:20},T=require("unist-util-visit"),F=require("shell-quote");var O={plugin:()=>async t=>{(0,T.visit)(t,"code",t=>{if(t.meta){const e=function(t){const e={},r=(0,F.parse)(t);for(const t of r)if("string"==typeof t){const r=t.split("=",2);2===r.length?e[r[0]]=r[1]:e[t]="true"}return e}(t.meta),r=t.data=t.data||{};e.title&&(r.title=e.title),e.filename&&(r.filename=e.filename),r.kv={...r.kv||{},...e}}})},stage:100,order:30},A=require("unist-util-visit"),E={plugin:()=>async t=>{(0,A.visit)(t,"image",t=>{const e=t.data=t.data||{},r=e.hProperties=e.hProperties||{},i=/[#?&](?:width=([0-9]+))?(?:&?height=([0-9]+))?(?:=([0-9]+)x([0-9]+))?$/,n=t.url.match(i);if(n){const e=n[1]||n[3],s=n[2]||n[4];e&&!r.width&&(r.width=parseInt(e,10)),s&&!r.height&&(r.height=parseInt(s,10)),t.url=t.url.replace(i,"")}e.width&&!r.width&&(r.width=e.width),e.height&&!r.height&&(r.height=e.height)})},stage:100,order:40},L=[{plugin:h.default,stage:0},{plugin:g.default,options:[{singleTilde:!1}],stage:0},{plugin:y.default,stage:0},{plugin:b.default,stage:0},{plugin:v.default,options:[["yaml","toml"]],stage:0},M,N,O,E,x],D=[{plugin:d.default,options:[{handlers:q}],stage:400},{plugin:g.default,options:[{singleTilde:!1}],stage:400},{plugin:y.default,stage:400},{plugin:b.default,stage:400},{plugin:v.default,options:[["yaml","toml"]],stage:400}];D.forEach(t=>{t.plugin===d.default?t.order=100:t.order=10});var R={id:"markdown",title:"Markdown (GFM + Directives)",extensions:["md","markdown","mdown","mkdn"],mediaTypes:["text/markdown"],input:L,output:D},z=l(require("rehype-parse")),$=l(require("rehype-remark")),C=l(require("remark-rehype")),H=l(require("rehype-sanitize")),B=l(require("rehype-stringify")),I={id:"html",title:"HTML",extensions:["html","htm"],mediaTypes:["text/html"],input:[{name:"rehype-parse",plugin:z.default,stage:0},{name:"rehype-remark",plugin:$.default,options:[{handlers:{mark:(t,e)=>{const r={type:"mark",children:t.all(e)};return t.patch(e,r),r},sub:(t,e)=>{const r={type:"sub",children:t.all(e)};return t.patch(e,r),r},sup:(t,e)=>{const r={type:"sup",children:t.all(e)};return t.patch(e,r),r}}}],stage:0}],output:[{plugin:C.default,stage:300,order:10},{plugin:H.default,options:[{...H.defaultSchema,tagNames:[...H.defaultSchema.tagNames||[],"mark","sub","sup"],attributes:{...H.defaultSchema.attributes,"*":[...H.defaultSchema.attributes?.["*"]||[],"className","id","style"],td:[...H.defaultSchema.attributes?.td||[],"rowSpan","colSpan","rowspan","colspan"],th:[...H.defaultSchema.attributes?.th||[],"rowSpan","colSpan","rowspan","colspan"],img:[...H.defaultSchema.attributes?.img||[],"width","height"]}}],stage:300,order:20},{plugin:B.default,stage:400}]};function U(){this.Compiler=t=>t}function V(){this.Parser=t=>JSON.parse(t)}var _={id:"ast",title:"MDAST",input:[{plugin:V,stage:0},M,N,O,E,x],output:[{plugin:U,options:[],stage:400}]};function G(t){return"object"==typeof t&&null!==t&&"string"==typeof t.type}var J=class t{constructor(t){this.queue=[],this._data={},this.input=t}static register(t){this.registry.set(t.id,t)}static getFormat(t){return this.registry.get(t)}data(t,e){return"string"==typeof t?this._data[t]=e:Object.assign(this._data,t),this}getFormat(t){return this.constructor.getFormat(t)}resolveFormat(t){if("string"==typeof t){const e=this.getFormat(t);if(!e)throw new Error(`[MdastPlus] Format '${t}' is not registered.`);return e}return t}toRuntimeEntry(t,e,r){let i=e;void 0!==t.stage&&(i="string"==typeof t.stage?c[t.stage]??e:t.stage);let n=t.options||[];const s=t.name||t.plugin.name;if(r&&s&&s in r){const e=r[s];"object"!=typeof e||null===e||Array.isArray(e)?n=Array.isArray(e)?e:[e]:("main"in e&&(t.main=!!e.main),"before"in e&&(t.before=e.before),"after"in e&&(t.after=e.after),n=[e])}return{name:s,plugin:t.plugin,options:n,stage:i,order:t.order||0,main:t.main,before:t.before,after:t.after}}ensureInputPlugins(e,r,i=400){const n=e.some(t=>0===(t.stage??p)),s=G(this.input);if(!n){let n=[];if(s){const e=t.getFormat("ast");e&&e.input&&(n=e.input)}else{const e=t.getFormat("markdown");e&&e.input&&(n=e.input)}for(const t of n){const n=this.toRuntimeEntry(t,0,r);(n.stage??p)<=i&&e.push(n)}}}from(t,e){const r=this.resolveFormat(t);if(!r.input||0===r.input.length)throw new Error(`[MdastPlus] Format '${r.id}' does not support input.`);for(const t of r.input)this.queue.push(this.toRuntimeEntry(t,0,e));return this}resolveRunQueue(t,e,r,i){let n=[];if(n=e?this.queue.map(t=>this.toRuntimeEntry(t,t.stage??p,e)):[...this.queue],void 0!==r){const t=r;n=n.filter(e=>(e.stage??p)<=t);const s=n.findIndex(e=>(e.stage??p)===t),o=-1!==s?s+(i??0):n.length-1,a=n.slice(0,o+1),l=n.filter(t=>t.main&&!a.includes(t));n=a.concat(l),this.ensureInputPlugins(n,e,t),n.push({plugin:U,options:[],stage:400,order:0})}else if(this.ensureInputPlugins(n,e,400),t.output)for(const r of t.output)n.push(this.toRuntimeEntry(r,300,e));return n}async to(t,e){const r=this.resolveFormat(t);if(!r.output)throw new Error(`[MdastPlus] Format '${r.id}' does not support output.`);let i,n,s;if(e){const t=e;"overrides"in t||"stage"in t||"stopAtIndex"in t?(i=t.overrides,n="string"==typeof t.stage?c[t.stage]:t.stage,s=t.stopAtIndex):i=e}const o=this.resolveRunQueue(r,i,n,s),a=this.assembleProcessor(o);if(G(this.input)){const t=await a.run(this.input),e=a.stringify(t),r=new m.VFile;return"string"==typeof e||Buffer.isBuffer(e)?r.value=e:r.result=e,r}return a.process(this.input)}use(t,...e){return this.useAt("compile",t,...e)}normalizePluggable(t,e=[]){const r=[];if(Array.isArray(t)){let i=!1;if(t.length>0&&"function"==typeof t[0]){t.slice(1).some(t=>!function(t){if("function"==typeof t)return!0;if(Array.isArray(t))return!0;if("object"==typeof t&&null!==t){if("plugin"in t&&"function"==typeof t.plugin)return!0;if("plugins"in t)return!0}return!1}(t))&&(i=!0)}if(i){const[e,...i]=t;r.push({plugin:e,options:i})}else for(const i of t)r.push(...this.normalizePluggable(i,e))}else if("function"==typeof t)r.push({plugin:t,options:e});else if("object"==typeof t&&null!==t)if("plugin"in t&&"function"==typeof t.plugin){const i=t,n=e.length>0?e:i.options||[];r.push({...i,options:n})}else if("plugins"in t){const e=t;e.plugins&&r.push(...this.normalizePluggable(e.plugins))}return r}useAt(t,e,...r){let i,n,s;"string"==typeof t||"number"==typeof t?(i=("string"==typeof t?c[t]:t)??p,n=e,s=r):(i=void 0,n=t,s=[e,...r].filter(t=>void 0!==t));const o=this.normalizePluggable(n,s);for(const t of o){const e=i??p;this.queue.push(this.toRuntimeEntry(t,e))}return this}priority(t){const e=this.queue[this.queue.length-1];return e&&(e.order=t),this}configure(t,...e){for(let r=this.queue.length-1;r>=0;r--){const i=this.queue[r];if((i.name||i.plugin.name)===t){i.options=e;break}}return this}assembleProcessor(t){const e={};for(const r of t){const t=r.stage??p;e[t]||(e[t]=[]),e[t].push(r)}const r=[],i=Object.keys(e).map(Number).sort((t,e)=>t-e);for(const t of i){const i=e[t].sort((t,e)=>(t.order||0)-(e.order||0)),n=i.findIndex(t=>t.main);if(-1!==n){const t=i[n];!1===t.options?.[0]?console.warn(`Main Plugin "${t.name}" is disabled. Skipping.`):0!==n&&(i.splice(n,1),i[0]=t)}let s=!0,o=0;for(;s&&o<i.length;){s=!1,o++;for(let t=0;t<i.length;t++){const e=i[t];if(e.after){const r=i.findIndex(t=>t.name===e.after);if(-1!==r&&r>t){i.splice(t,1),i.splice(r,0,e),s=!0;break}}if(e.before){const r=i.findIndex(t=>t.name===e.before);if(-1!==r&&r<t){i.splice(t,1),i.splice(r,0,e),s=!0;break}}}}r.push(...i)}const n=(0,f.unified)();Object.keys(this._data).length>0&&n.data(this._data);for(const t of r)n.use(t.plugin,...t.options||[]);return n}};J.registry=new Map;var Q=J,K=class extends Q{async toMarkdown(){const t=await this.to("markdown");return String(t)}toMarkdownVFile(){return this.to("markdown")}async toHtml(){const t=await this.to("html");return String(t)}toHtmlVFile(){return this.to("html")}async toAst(t){return(await this.to("ast",t)).result}toHTML(){return this.toHtml()}toAST(t){return this.toAst(t)}};function W(t){return new K(t)}K.register(R),K.register(I),K.register(_);var X=require("unist-util-visit"),Y=require("hast-util-from-html"),Z=require("hast-util-from-dom"),tt=require("yaml"),et=require("lodash-es"),rt=function(t){const{readability:e,jsdom:r,hast:i,url:n,fields:s}=t||{};this.parser=function(o,a){if(!1===e)return(0,Y.fromHtml)(o,{fragment:!0,...i});let l,u;try{l=require("jsdom").JSDOM;u=require("@mozilla/readability").Readability}catch(t){throw new Error("[html-readability] Dependency missing. Please install 'jsdom' and '@mozilla/readability'.")}const c=new u(new l(o,{url:n,pretendToBeVisual:!0,...r,includeNodeLocations:!0}).window.document,{maxElemsToParse:1e5,nbTopCandidates:5,charThreshold:500,keepClasses:!0,...e,serializer:t=>t}).parse();if(!c||!c.content)return(0,Y.fromHtml)(o,{fragment:!0,...i});const p=c.content;let f=(0,Z.fromDom)(p,{afterTransform:i?.afterTransform});const m=!1!==i?.fragment,{smartExcerpt:h=!0,sourceLink:d}=t||{};let g=(0,et.omitBy)(c,t=>null==t||""===t);const y=n||g.url,b=g.title;if(h&&g.excerpt&&g.textContent){const{threshold:t=.6,minContentLength:e=300}=!0===h?{}:h,r=g.excerpt.trim().toLowerCase().replace(/\s+/g," "),i=g.textContent.trim().toLowerCase().replace(/\s+/g," ");if(i.includes(r)){(r.length/i.length>t||i.length<e)&&delete g.excerpt}}if(n&&(g.url=n),delete g.content,delete g.textContent,s)if(Array.isArray(s))g=(0,et.pick)(g,s);else{const t={};for(const[e,r]of Object.entries(s))void 0!==g[e]&&(t[r]=g[e]);g=t}if(a&&(a.data=a.data||{},a.data.readability=g),f){const t=[f];d&&y&&!lt(f,y)&&t.push({type:"element",tagName:"blockquote",properties:{},children:[{type:"element",tagName:"p",properties:{},children:[{type:"text",value:"Source: "},{type:"element",tagName:"a",properties:{href:y},children:[{type:"text",value:b||y}]}]}]}),f=m?{type:"root",children:t}:{type:"root",children:[{type:"element",tagName:"html",properties:{},children:[{type:"element",tagName:"head",properties:{},children:[]},{type:"element",tagName:"body",properties:{},children:t}]}]},f.data=f.data||{},f.data.readability=g}return f}},it={name:"readability",plugin:rt,stage:0,main:!0},nt={name:"restore-readability-meta",plugin:t=>(e,r)=>{if(r.data?.readability){e.data=e.data||{},e.data.readability=r.data.readability;const{frontmatter:i,sourceLink:n}=t||{};if(i){const t="toml"===i?"toml":"yaml",n=(0,tt.stringify)(r.data.readability).trim();e.children.unshift({type:t,value:n})}if(n&&r.data.readability.url){const{url:t,title:i}=r.data.readability;at(e,t)||e.children.push({type:"blockquote",children:[{type:"paragraph",children:[{type:"text",value:"Source: "},{type:"link",url:t,children:[{type:"text",value:i||t}]}]}]})}}},stage:0,after:"rehype-remark"},st=[it,nt];function ot(t,e){if(!t||!e)return!1;if(t===e)return!0;try{const r=new URL(t),i=new URL(e);return r.href===i.href}catch{return t.replace(/\/$/,"")===e.replace(/\/$/,"")}}function at(t,e){if(!e)return!1;let r=!1;return(0,X.visit)(t,"link",t=>{if(ot(t.url,e))return r=!0,!1}),r}function lt(t,e){if(!e)return!1;let r=!1;return(0,X.visit)(t,"element",t=>{if("a"===t.tagName&&t.properties&&ot(t.properties.href,e))return r=!0,!1}),r}
package/dist/index.mjs CHANGED
@@ -1 +1 @@
1
- var t=(t=>"undefined"!=typeof require?require:"undefined"!=typeof Proxy?new Proxy(t,{get:(t,e)=>("undefined"!=typeof require?require:t)[e]}):t)(function(t){if("undefined"!=typeof require)return require.apply(this,arguments);throw Error('Dynamic require of "'+t+'" is not supported')}),e=(t=>(t[t.parse=0]="parse",t[t.normalize=100]="normalize",t[t.compile=200]="compile",t[t.finalize=300]="finalize",t[t.stringify=400]="stringify",t))(e||{}),r=200;import{unified as i}from"unified";import{VFile as n}from"vfile";import o from"remark-parse";import s from"remark-stringify";import a from"remark-gfm";import u from"remark-directive";import l from"remark-math";import p from"remark-frontmatter";import{visit as f}from"unist-util-visit";function c(t,e){return{type:t,children:e,data:{hName:t}}}var m={plugin:()=>t=>{!function(t){f(t,"text",(t,e,r)=>{if(!r||void 0===e)return;const i=t.value;let n=0;const o=[];let s=!1;const a=/(==[^=]+==|~[^~]+~|\^[^^]+\^)/g;let u;for(;null!==(u=a.exec(i));){s=!0;const t=u[0],e=u.index;e>n&&o.push({type:"text",value:i.slice(n,e)});let r="mark",l="";t.startsWith("==")?(r="mark",l=t.slice(2,-2)):t.startsWith("~")?(r="sub",l=t.slice(1,-1)):t.startsWith("^")&&(r="sup",l=t.slice(1,-1)),o.push(c(r,[{type:"text",value:l}])),n=a.lastIndex}return s?(n<i.length&&o.push({type:"text",value:i.slice(n)}),r.children.splice(e,1,...o),e+o.length):void 0})}(t)},stage:100};import{visit as h}from"unist-util-visit";var d={error:"danger",warn:"warning",success:"tip",important:"important",caution:"caution",note:"note"},g={plugin:()=>async t=>{h(t,["containerDirective","leafDirective","textDirective"],t=>{const e=t,r=e.name.toLowerCase();if(e.name=d[r]||r,e.children&&e.children.length>0){const t=e.children[0];if(t.data?.directiveLabel||"directiveLabel"===t.type){let r="";h(t,"text",t=>{r+=t.value}),r&&!e.attributes?.title&&(e.attributes=e.attributes||{},e.attributes.title=r.trim()),e.children.shift()}}e.attributes?.title&&(e.attributes.title=String(e.attributes.title).trim()),e.data=e.data||{},e.data.hName=e.data.hName||("containerDirective"===e.type?"div":"span"),e.data.hProperties={...e.data.hProperties||{},...e.attributes,className:[e.name,e.data.hProperties?.className].filter(Boolean).join(" ")}})},stage:100,order:10};import{visit as y}from"unist-util-visit";var v={plugin:()=>async t=>{y(t,"tableCell",t=>{if(t.data){const{rowspan:e,colspan:r}=t.data;t.data.hProperties=t.data.hProperties||{},void 0!==e&&(t.data.hProperties.rowSpan=e,delete t.data.rowspan),void 0!==r&&(t.data.hProperties.colSpan=r,delete t.data.colspan)}})},stage:100,order:20};import{visit as w}from"unist-util-visit";import{parse as b}from"shell-quote";var k={plugin:()=>async t=>{w(t,"code",t=>{if(t.meta){const e=function(t){const e={},r=b(t);for(const t of r)if("string"==typeof t){const r=t.split("=",2);2===r.length?e[r[0]]=r[1]:e[t]="true"}return e}(t.meta),r=t.data=t.data||{};e.title&&(r.title=e.title),e.filename&&(r.filename=e.filename),r.kv={...r.kv||{},...e}}})},stage:100,order:30};import{visit as x}from"unist-util-visit";var T={plugin:()=>async t=>{x(t,"image",t=>{const e=t.data=t.data||{},r=e.hProperties=e.hProperties||{},i=/[#?&](?:width=([0-9]+))?(?:&?height=([0-9]+))?(?:=([0-9]+)x([0-9]+))?$/,n=t.url.match(i);if(n){const e=n[1]||n[3],o=n[2]||n[4];e&&!r.width&&(r.width=parseInt(e,10)),o&&!r.height&&(r.height=parseInt(o,10)),t.url=t.url.replace(i,"")}e.width&&!r.width&&(r.width=e.width),e.height&&!r.height&&(r.height=e.height)})},stage:100,order:40},M=[{plugin:o,stage:0},{plugin:a,options:[{singleTilde:!1}],stage:0},{plugin:u,stage:0},{plugin:l,stage:0},{plugin:p,options:[["yaml","toml"]],stage:0},g,v,k,T,m],P=[{plugin:s,options:[{handlers:{mark:(t,e,r)=>"=="+r.containerPhrasing(t,{before:"==",after:"=="})+"==",sub:(t,e,r)=>"~"+r.containerPhrasing(t,{before:"~",after:"~"})+"~",sup:(t,e,r)=>"^"+r.containerPhrasing(t,{before:"^",after:"^"})+"^"}}],stage:400},{plugin:a,options:[{singleTilde:!1}],stage:400},{plugin:u,stage:400},{plugin:l,stage:400},{plugin:p,options:[["yaml","toml"]],stage:400}];P.forEach(t=>{t.plugin===s?t.order=100:t.order=10});var S={id:"markdown",title:"Markdown (GFM + Directives)",extensions:["md","markdown","mdown","mkdn"],mediaTypes:["text/markdown"],input:M,output:P};import j from"rehype-parse";import N from"rehype-remark";import A from"remark-rehype";import F,{defaultSchema as q}from"rehype-sanitize";import D from"rehype-stringify";var z={id:"html",title:"HTML",extensions:["html","htm"],mediaTypes:["text/html"],input:[{name:"rehype-parse",plugin:j,stage:0},{name:"rehype-remark",plugin:N,options:[{handlers:{mark:(t,e)=>{const r={type:"mark",children:t.all(e)};return t.patch(e,r),r},sub:(t,e)=>{const r={type:"sub",children:t.all(e)};return t.patch(e,r),r},sup:(t,e)=>{const r={type:"sup",children:t.all(e)};return t.patch(e,r),r}}}],stage:0}],output:[{plugin:A,stage:300,order:10},{plugin:F,options:[{...q,tagNames:[...q.tagNames||[],"mark","sub","sup"],attributes:{...q.attributes,"*":[...q.attributes?.["*"]||[],"className","id","style"],td:[...q.attributes?.td||[],"rowSpan","colSpan","rowspan","colspan"],th:[...q.attributes?.th||[],"rowSpan","colSpan","rowspan","colspan"],img:[...q.attributes?.img||[],"width","height"]}}],stage:300,order:20},{plugin:D,stage:400}]};function E(){this.Compiler=t=>t}function $(){this.Parser=t=>JSON.parse(t)}var H={id:"ast",title:"MDAST",input:[{plugin:$,stage:0},g,v,k,T,m],output:[{plugin:E,options:[],stage:400}]};function I(t){return"object"==typeof t&&null!==t&&"string"==typeof t.type}var L=class t{constructor(t){this.queue=[],this._data={},this.input=t}static register(t){this.registry.set(t.id,t)}static getFormat(t){return this.registry.get(t)}data(t,e){return"string"==typeof t?this._data[t]=e:Object.assign(this._data,t),this}getFormat(t){return this.constructor.getFormat(t)}resolveFormat(t){if("string"==typeof t){const e=this.getFormat(t);if(!e)throw new Error(`[MdastPlus] Format '${t}' is not registered.`);return e}return t}toRuntimeEntry(t,r,i){let n=r;void 0!==t.stage&&(n="string"==typeof t.stage?e[t.stage]??r:t.stage);let o=t.options||[];const s=t.name||t.plugin.name;if(i&&s&&s in i){const e=i[s];"object"!=typeof e||null===e||Array.isArray(e)?o=Array.isArray(e)?e:[e]:("main"in e&&(t.main=!!e.main),"before"in e&&(t.before=e.before),"after"in e&&(t.after=e.after),o=[e])}return{name:s,plugin:t.plugin,options:o,stage:n,order:t.order||0,main:t.main,before:t.before,after:t.after}}ensureInputPlugins(e,i,n=400){const o=e.some(t=>0===(t.stage??r)),s=I(this.input);if(!o){let o=[];if(s){const e=t.getFormat("ast");e&&e.input&&(o=e.input)}else{const e=t.getFormat("markdown");e&&e.input&&(o=e.input)}for(const t of o){const o=this.toRuntimeEntry(t,0,i);(o.stage??r)<=n&&e.push(o)}}}from(t,e){const r=this.resolveFormat(t);if(!r.input||0===r.input.length)throw new Error(`[MdastPlus] Format '${r.id}' does not support input.`);for(const t of r.input)this.queue.push(this.toRuntimeEntry(t,0,e));return this}resolveRunQueue(t,e,i,n){let o=[];if(o=e?this.queue.map(t=>this.toRuntimeEntry(t,t.stage??r,e)):[...this.queue],void 0!==i){const t=i;o=o.filter(e=>(e.stage??r)<=t);const s=o.findIndex(e=>(e.stage??r)===t),a=-1!==s?s+(n??0):o.length-1,u=o.slice(0,a+1),l=o.filter(t=>t.main&&!u.includes(t));o=u.concat(l),this.ensureInputPlugins(o,e,t),o.push({plugin:E,options:[],stage:400,order:0})}else if(this.ensureInputPlugins(o,e,400),t.output)for(const r of t.output)o.push(this.toRuntimeEntry(r,300,e));return o}async to(t,r){const i=this.resolveFormat(t);if(!i.output)throw new Error(`[MdastPlus] Format '${i.id}' does not support output.`);let o,s,a;if(r){const t=r;"overrides"in t||"stage"in t||"stopAtIndex"in t?(o=t.overrides,s="string"==typeof t.stage?e[t.stage]:t.stage,a=t.stopAtIndex):o=r}const u=this.resolveRunQueue(i,o,s,a),l=this.assembleProcessor(u);if(I(this.input)){const t=await l.run(this.input),e=l.stringify(t),r=new n;return"string"==typeof e||Buffer.isBuffer(e)?r.value=e:r.result=e,r}return l.process(this.input)}use(t,...e){return this.useAt("compile",t,...e)}normalizePluggable(t,e=[]){const r=[];if(Array.isArray(t)){let i=!1;if(t.length>0&&"function"==typeof t[0]){t.slice(1).some(t=>!function(t){if("function"==typeof t)return!0;if(Array.isArray(t))return!0;if("object"==typeof t&&null!==t){if("plugin"in t&&"function"==typeof t.plugin)return!0;if("plugins"in t)return!0}return!1}(t))&&(i=!0)}if(i){const[e,...i]=t;r.push({plugin:e,options:i})}else for(const i of t)r.push(...this.normalizePluggable(i,e))}else if("function"==typeof t)r.push({plugin:t,options:e});else if("object"==typeof t&&null!==t)if("plugin"in t&&"function"==typeof t.plugin){const i=t,n=e.length>0?e:i.options||[];r.push({...i,options:n})}else if("plugins"in t){const e=t;e.plugins&&r.push(...this.normalizePluggable(e.plugins))}return r}useAt(t,i,...n){let o,s,a;"string"==typeof t||"number"==typeof t?(o=("string"==typeof t?e[t]:t)??r,s=i,a=n):(o=void 0,s=t,a=[i,...n].filter(t=>void 0!==t));const u=this.normalizePluggable(s,a);for(const t of u){const e=o??r;this.queue.push(this.toRuntimeEntry(t,e))}return this}priority(t){const e=this.queue[this.queue.length-1];return e&&(e.order=t),this}configure(t,...e){for(let r=this.queue.length-1;r>=0;r--){const i=this.queue[r];if((i.name||i.plugin.name)===t){i.options=e;break}}return this}assembleProcessor(t){const e={};for(const i of t){const t=i.stage??r;e[t]||(e[t]=[]),e[t].push(i)}const n=[],o=Object.keys(e).map(Number).sort((t,e)=>t-e);for(const t of o){const r=e[t].sort((t,e)=>(t.order||0)-(e.order||0)),i=r.findIndex(t=>t.main);if(-1!==i){const t=r[i];!1===t.options?.[0]?console.warn(`Main Plugin "${t.name}" is disabled. Skipping.`):0!==i&&(r.splice(i,1),r[0]=t)}let o=!0,s=0;for(;o&&s<r.length;){o=!1,s++;for(let t=0;t<r.length;t++){const e=r[t];if(e.after){const i=r.findIndex(t=>t.name===e.after);if(-1!==i&&i>t){r.splice(t,1),r.splice(i,0,e),o=!0;break}}if(e.before){const i=r.findIndex(t=>t.name===e.before);if(-1!==i&&i<t){r.splice(t,1),r.splice(i,0,e),o=!0;break}}}}n.push(...r)}const s=i();Object.keys(this._data).length>0&&s.data(this._data);for(const t of n)s.use(t.plugin,...t.options||[]);return s}};L.registry=new Map;var O=L,B=class extends O{async toMarkdown(){const t=await this.to("markdown");return String(t)}toMarkdownVFile(){return this.to("markdown")}async toHtml(){const t=await this.to("html");return String(t)}toHtmlVFile(){return this.to("html")}async toAst(t){return(await this.to("ast",t)).result}toHTML(){return this.toHtml()}toAST(t){return this.toAst(t)}};function C(t){return new B(t)}B.register(S),B.register(z),B.register(H);import{fromHtml as V}from"hast-util-from-html";import{fromDom as R}from"hast-util-from-dom";var G=function(e){const{readability:r,jsdom:i,hast:n,url:o}=e||{};this.parser=function(e,s){if(!1===r)return V(e,{fragment:!0,...n});let a,u;try{a=t("jsdom").JSDOM;u=t("@mozilla/readability").Readability}catch(t){throw new Error("[html-readability] Dependency missing. Please install 'jsdom' and '@mozilla/readability'.")}const l=new u(new a(e,{url:o,pretendToBeVisual:!0,...i,includeNodeLocations:!0}).window.document,{maxElemsToParse:1e5,nbTopCandidates:5,charThreshold:500,keepClasses:!0,...r,serializer:t=>t}).parse();if(!l||!l.content)return V(e,{fragment:!0,...n});const p=l.content;let f=R(p,{afterTransform:n?.afterTransform});const c=!1!==n?.fragment,m={...l};return delete m.content,delete m.textContent,s&&(s.data=s.data||{},s.data.readability=m),f&&(f=c?{type:"root",children:[f]}:{type:"root",children:[{type:"element",tagName:"html",properties:{},children:[{type:"element",tagName:"head",properties:{},children:[]},{type:"element",tagName:"body",properties:{},children:[f]}]}]},f.data=f.data||{},f.data.readability=m),f}},J={name:"readability",plugin:G,stage:0,main:!0},Q={name:"restore-readability-meta",plugin:()=>(t,e)=>{e.data?.readability&&(t.data=t.data||{},t.data.readability=e.data.readability)},stage:0,after:"rehype-remark"},K=[J,Q];export{r as DefaultPipelineStage,O as MdastBasePipeline,B as MdastPipeline,e as PipelineStage,E as astCompiler,H as astFormat,z as htmlFormat,G as htmlReadability,J as htmlReadabilityPlugin,K as htmlReadabilityPlugins,$ as jsonParser,S as markdownFormat,C as mdast,Q as restoreReadabilityMetaPlugin};
1
+ var t=(t=>"undefined"!=typeof require?require:"undefined"!=typeof Proxy?new Proxy(t,{get:(t,e)=>("undefined"!=typeof require?require:t)[e]}):t)(function(t){if("undefined"!=typeof require)return require.apply(this,arguments);throw Error('Dynamic require of "'+t+'" is not supported')}),e=(t=>(t[t.parse=0]="parse",t[t.normalize=100]="normalize",t[t.compile=200]="compile",t[t.finalize=300]="finalize",t[t.stringify=400]="stringify",t))(e||{}),r=200;import{unified as i}from"unified";import{VFile as n}from"vfile";import o from"remark-parse";import s from"remark-stringify";import a from"remark-gfm";import l from"remark-directive";import u from"remark-math";import p from"remark-frontmatter";import{visit as f}from"unist-util-visit";function c(t,e){return{type:t,children:e,data:{hName:t}}}var m={plugin:()=>t=>{!function(t){f(t,"text",(t,e,r)=>{if(!r||void 0===e)return;const i=t.value;let n=0;const o=[];let s=!1;const a=/(==[^=]+==|~[^~]+~|\^[^^]+\^)/g;let l;for(;null!==(l=a.exec(i));){s=!0;const t=l[0],e=l.index;e>n&&o.push({type:"text",value:i.slice(n,e)});let r="mark",u="";t.startsWith("==")?(r="mark",u=t.slice(2,-2)):t.startsWith("~")?(r="sub",u=t.slice(1,-1)):t.startsWith("^")&&(r="sup",u=t.slice(1,-1)),o.push(c(r,[{type:"text",value:u}])),n=a.lastIndex}return s?(n<i.length&&o.push({type:"text",value:i.slice(n)}),r.children.splice(e,1,...o),e+o.length):void 0})}(t)},stage:100};import{visit as h}from"unist-util-visit";var d={error:"danger",warn:"warning",success:"tip",important:"important",caution:"caution",note:"note"},g={plugin:()=>async t=>{h(t,["containerDirective","leafDirective","textDirective"],t=>{const e=t,r=e.name.toLowerCase();if(e.name=d[r]||r,e.children&&e.children.length>0){const t=e.children[0];if(t.data?.directiveLabel||"directiveLabel"===t.type){let r="";h(t,"text",t=>{r+=t.value}),r&&!e.attributes?.title&&(e.attributes=e.attributes||{},e.attributes.title=r.trim()),e.children.shift()}}e.attributes?.title&&(e.attributes.title=String(e.attributes.title).trim()),e.data=e.data||{},e.data.hName=e.data.hName||("containerDirective"===e.type?"div":"span"),e.data.hProperties={...e.data.hProperties||{},...e.attributes,className:[e.name,e.data.hProperties?.className].filter(Boolean).join(" ")}})},stage:100,order:10};import{visit as y}from"unist-util-visit";var v={plugin:()=>async t=>{y(t,"tableCell",t=>{if(t.data){const{rowspan:e,colspan:r}=t.data;t.data.hProperties=t.data.hProperties||{},void 0!==e&&(t.data.hProperties.rowSpan=e,delete t.data.rowspan),void 0!==r&&(t.data.hProperties.colSpan=r,delete t.data.colspan)}})},stage:100,order:20};import{visit as w}from"unist-util-visit";import{parse as b}from"shell-quote";var k={plugin:()=>async t=>{w(t,"code",t=>{if(t.meta){const e=function(t){const e={},r=b(t);for(const t of r)if("string"==typeof t){const r=t.split("=",2);2===r.length?e[r[0]]=r[1]:e[t]="true"}return e}(t.meta),r=t.data=t.data||{};e.title&&(r.title=e.title),e.filename&&(r.filename=e.filename),r.kv={...r.kv||{},...e}}})},stage:100,order:30};import{visit as x}from"unist-util-visit";var N={plugin:()=>async t=>{x(t,"image",t=>{const e=t.data=t.data||{},r=e.hProperties=e.hProperties||{},i=/[#?&](?:width=([0-9]+))?(?:&?height=([0-9]+))?(?:=([0-9]+)x([0-9]+))?$/,n=t.url.match(i);if(n){const e=n[1]||n[3],o=n[2]||n[4];e&&!r.width&&(r.width=parseInt(e,10)),o&&!r.height&&(r.height=parseInt(o,10)),t.url=t.url.replace(i,"")}e.width&&!r.width&&(r.width=e.width),e.height&&!r.height&&(r.height=e.height)})},stage:100,order:40},S=[{plugin:o,stage:0},{plugin:a,options:[{singleTilde:!1}],stage:0},{plugin:l,stage:0},{plugin:u,stage:0},{plugin:p,options:[["yaml","toml"]],stage:0},g,v,k,N,m],T=[{plugin:s,options:[{handlers:{mark:(t,e,r)=>"=="+r.containerPhrasing(t,{before:"==",after:"=="})+"==",sub:(t,e,r)=>"~"+r.containerPhrasing(t,{before:"~",after:"~"})+"~",sup:(t,e,r)=>"^"+r.containerPhrasing(t,{before:"^",after:"^"})+"^"}}],stage:400},{plugin:a,options:[{singleTilde:!1}],stage:400},{plugin:l,stage:400},{plugin:u,stage:400},{plugin:p,options:[["yaml","toml"]],stage:400}];T.forEach(t=>{t.plugin===s?t.order=100:t.order=10});var M={id:"markdown",title:"Markdown (GFM + Directives)",extensions:["md","markdown","mdown","mkdn"],mediaTypes:["text/markdown"],input:S,output:T};import j from"rehype-parse";import P from"rehype-remark";import q from"remark-rehype";import A,{defaultSchema as F}from"rehype-sanitize";import L from"rehype-stringify";var D={id:"html",title:"HTML",extensions:["html","htm"],mediaTypes:["text/html"],input:[{name:"rehype-parse",plugin:j,stage:0},{name:"rehype-remark",plugin:P,options:[{handlers:{mark:(t,e)=>{const r={type:"mark",children:t.all(e)};return t.patch(e,r),r},sub:(t,e)=>{const r={type:"sub",children:t.all(e)};return t.patch(e,r),r},sup:(t,e)=>{const r={type:"sup",children:t.all(e)};return t.patch(e,r),r}}}],stage:0}],output:[{plugin:q,stage:300,order:10},{plugin:A,options:[{...F,tagNames:[...F.tagNames||[],"mark","sub","sup"],attributes:{...F.attributes,"*":[...F.attributes?.["*"]||[],"className","id","style"],td:[...F.attributes?.td||[],"rowSpan","colSpan","rowspan","colspan"],th:[...F.attributes?.th||[],"rowSpan","colSpan","rowspan","colspan"],img:[...F.attributes?.img||[],"width","height"]}}],stage:300,order:20},{plugin:L,stage:400}]};function E(){this.Compiler=t=>t}function z(){this.Parser=t=>JSON.parse(t)}var $={id:"ast",title:"MDAST",input:[{plugin:z,stage:0},g,v,k,N,m],output:[{plugin:E,options:[],stage:400}]};function O(t){return"object"==typeof t&&null!==t&&"string"==typeof t.type}var C=class t{constructor(t){this.queue=[],this._data={},this.input=t}static register(t){this.registry.set(t.id,t)}static getFormat(t){return this.registry.get(t)}data(t,e){return"string"==typeof t?this._data[t]=e:Object.assign(this._data,t),this}getFormat(t){return this.constructor.getFormat(t)}resolveFormat(t){if("string"==typeof t){const e=this.getFormat(t);if(!e)throw new Error(`[MdastPlus] Format '${t}' is not registered.`);return e}return t}toRuntimeEntry(t,r,i){let n=r;void 0!==t.stage&&(n="string"==typeof t.stage?e[t.stage]??r:t.stage);let o=t.options||[];const s=t.name||t.plugin.name;if(i&&s&&s in i){const e=i[s];"object"!=typeof e||null===e||Array.isArray(e)?o=Array.isArray(e)?e:[e]:("main"in e&&(t.main=!!e.main),"before"in e&&(t.before=e.before),"after"in e&&(t.after=e.after),o=[e])}return{name:s,plugin:t.plugin,options:o,stage:n,order:t.order||0,main:t.main,before:t.before,after:t.after}}ensureInputPlugins(e,i,n=400){const o=e.some(t=>0===(t.stage??r)),s=O(this.input);if(!o){let o=[];if(s){const e=t.getFormat("ast");e&&e.input&&(o=e.input)}else{const e=t.getFormat("markdown");e&&e.input&&(o=e.input)}for(const t of o){const o=this.toRuntimeEntry(t,0,i);(o.stage??r)<=n&&e.push(o)}}}from(t,e){const r=this.resolveFormat(t);if(!r.input||0===r.input.length)throw new Error(`[MdastPlus] Format '${r.id}' does not support input.`);for(const t of r.input)this.queue.push(this.toRuntimeEntry(t,0,e));return this}resolveRunQueue(t,e,i,n){let o=[];if(o=e?this.queue.map(t=>this.toRuntimeEntry(t,t.stage??r,e)):[...this.queue],void 0!==i){const t=i;o=o.filter(e=>(e.stage??r)<=t);const s=o.findIndex(e=>(e.stage??r)===t),a=-1!==s?s+(n??0):o.length-1,l=o.slice(0,a+1),u=o.filter(t=>t.main&&!l.includes(t));o=l.concat(u),this.ensureInputPlugins(o,e,t),o.push({plugin:E,options:[],stage:400,order:0})}else if(this.ensureInputPlugins(o,e,400),t.output)for(const r of t.output)o.push(this.toRuntimeEntry(r,300,e));return o}async to(t,r){const i=this.resolveFormat(t);if(!i.output)throw new Error(`[MdastPlus] Format '${i.id}' does not support output.`);let o,s,a;if(r){const t=r;"overrides"in t||"stage"in t||"stopAtIndex"in t?(o=t.overrides,s="string"==typeof t.stage?e[t.stage]:t.stage,a=t.stopAtIndex):o=r}const l=this.resolveRunQueue(i,o,s,a),u=this.assembleProcessor(l);if(O(this.input)){const t=await u.run(this.input),e=u.stringify(t),r=new n;return"string"==typeof e||Buffer.isBuffer(e)?r.value=e:r.result=e,r}return u.process(this.input)}use(t,...e){return this.useAt("compile",t,...e)}normalizePluggable(t,e=[]){const r=[];if(Array.isArray(t)){let i=!1;if(t.length>0&&"function"==typeof t[0]){t.slice(1).some(t=>!function(t){if("function"==typeof t)return!0;if(Array.isArray(t))return!0;if("object"==typeof t&&null!==t){if("plugin"in t&&"function"==typeof t.plugin)return!0;if("plugins"in t)return!0}return!1}(t))&&(i=!0)}if(i){const[e,...i]=t;r.push({plugin:e,options:i})}else for(const i of t)r.push(...this.normalizePluggable(i,e))}else if("function"==typeof t)r.push({plugin:t,options:e});else if("object"==typeof t&&null!==t)if("plugin"in t&&"function"==typeof t.plugin){const i=t,n=e.length>0?e:i.options||[];r.push({...i,options:n})}else if("plugins"in t){const e=t;e.plugins&&r.push(...this.normalizePluggable(e.plugins))}return r}useAt(t,i,...n){let o,s,a;"string"==typeof t||"number"==typeof t?(o=("string"==typeof t?e[t]:t)??r,s=i,a=n):(o=void 0,s=t,a=[i,...n].filter(t=>void 0!==t));const l=this.normalizePluggable(s,a);for(const t of l){const e=o??r;this.queue.push(this.toRuntimeEntry(t,e))}return this}priority(t){const e=this.queue[this.queue.length-1];return e&&(e.order=t),this}configure(t,...e){for(let r=this.queue.length-1;r>=0;r--){const i=this.queue[r];if((i.name||i.plugin.name)===t){i.options=e;break}}return this}assembleProcessor(t){const e={};for(const i of t){const t=i.stage??r;e[t]||(e[t]=[]),e[t].push(i)}const n=[],o=Object.keys(e).map(Number).sort((t,e)=>t-e);for(const t of o){const r=e[t].sort((t,e)=>(t.order||0)-(e.order||0)),i=r.findIndex(t=>t.main);if(-1!==i){const t=r[i];!1===t.options?.[0]?console.warn(`Main Plugin "${t.name}" is disabled. Skipping.`):0!==i&&(r.splice(i,1),r[0]=t)}let o=!0,s=0;for(;o&&s<r.length;){o=!1,s++;for(let t=0;t<r.length;t++){const e=r[t];if(e.after){const i=r.findIndex(t=>t.name===e.after);if(-1!==i&&i>t){r.splice(t,1),r.splice(i,0,e),o=!0;break}}if(e.before){const i=r.findIndex(t=>t.name===e.before);if(-1!==i&&i<t){r.splice(t,1),r.splice(i,0,e),o=!0;break}}}}n.push(...r)}const s=i();Object.keys(this._data).length>0&&s.data(this._data);for(const t of n)s.use(t.plugin,...t.options||[]);return s}};C.registry=new Map;var H=C,I=class extends H{async toMarkdown(){const t=await this.to("markdown");return String(t)}toMarkdownVFile(){return this.to("markdown")}async toHtml(){const t=await this.to("html");return String(t)}toHtmlVFile(){return this.to("html")}async toAst(t){return(await this.to("ast",t)).result}toHTML(){return this.toHtml()}toAST(t){return this.toAst(t)}};function R(t){return new I(t)}I.register(M),I.register(D),I.register($);import{visit as B}from"unist-util-visit";import{fromHtml as V}from"hast-util-from-html";import{fromDom as U}from"hast-util-from-dom";import{stringify as G}from"yaml";import{omitBy as J,pick as Q}from"lodash-es";var K=function(e){const{readability:r,jsdom:i,hast:n,url:o,fields:s}=e||{};this.parser=function(a,l){if(!1===r)return V(a,{fragment:!0,...n});let u,p;try{u=t("jsdom").JSDOM;p=t("@mozilla/readability").Readability}catch(t){throw new Error("[html-readability] Dependency missing. Please install 'jsdom' and '@mozilla/readability'.")}const f=new p(new u(a,{url:o,pretendToBeVisual:!0,...i,includeNodeLocations:!0}).window.document,{maxElemsToParse:1e5,nbTopCandidates:5,charThreshold:500,keepClasses:!0,...r,serializer:t=>t}).parse();if(!f||!f.content)return V(a,{fragment:!0,...n});const c=f.content;let m=U(c,{afterTransform:n?.afterTransform});const h=!1!==n?.fragment,{smartExcerpt:d=!0,sourceLink:g}=e||{};let y=J(f,t=>null==t||""===t);const v=o||y.url,w=y.title;if(d&&y.excerpt&&y.textContent){const{threshold:t=.6,minContentLength:e=300}=!0===d?{}:d,r=y.excerpt.trim().toLowerCase().replace(/\s+/g," "),i=y.textContent.trim().toLowerCase().replace(/\s+/g," ");if(i.includes(r)){(r.length/i.length>t||i.length<e)&&delete y.excerpt}}if(o&&(y.url=o),delete y.content,delete y.textContent,s)if(Array.isArray(s))y=Q(y,s);else{const t={};for(const[e,r]of Object.entries(s))void 0!==y[e]&&(t[r]=y[e]);y=t}if(l&&(l.data=l.data||{},l.data.readability=y),m){const t=[m];g&&v&&!tt(m,v)&&t.push({type:"element",tagName:"blockquote",properties:{},children:[{type:"element",tagName:"p",properties:{},children:[{type:"text",value:"Source: "},{type:"element",tagName:"a",properties:{href:v},children:[{type:"text",value:w||v}]}]}]}),m=h?{type:"root",children:t}:{type:"root",children:[{type:"element",tagName:"html",properties:{},children:[{type:"element",tagName:"head",properties:{},children:[]},{type:"element",tagName:"body",properties:{},children:t}]}]},m.data=m.data||{},m.data.readability=y}return m}},W={name:"readability",plugin:K,stage:0,main:!0},X={name:"restore-readability-meta",plugin:t=>(e,r)=>{if(r.data?.readability){e.data=e.data||{},e.data.readability=r.data.readability;const{frontmatter:i,sourceLink:n}=t||{};if(i){const t="toml"===i?"toml":"yaml",n=G(r.data.readability).trim();e.children.unshift({type:t,value:n})}if(n&&r.data.readability.url){const{url:t,title:i}=r.data.readability;_(e,t)||e.children.push({type:"blockquote",children:[{type:"paragraph",children:[{type:"text",value:"Source: "},{type:"link",url:t,children:[{type:"text",value:i||t}]}]}]})}}},stage:0,after:"rehype-remark"},Y=[W,X];function Z(t,e){if(!t||!e)return!1;if(t===e)return!0;try{const r=new URL(t),i=new URL(e);return r.href===i.href}catch{return t.replace(/\/$/,"")===e.replace(/\/$/,"")}}function _(t,e){if(!e)return!1;let r=!1;return B(t,"link",t=>{if(Z(t.url,e))return r=!0,!1}),r}function tt(t,e){if(!e)return!1;let r=!1;return B(t,"element",t=>{if("a"===t.tagName&&t.properties&&Z(t.properties.href,e))return r=!0,!1}),r}export{r as DefaultPipelineStage,H as MdastBasePipeline,I as MdastPipeline,e as PipelineStage,E as astCompiler,$ as astFormat,tt as checkHtmlUrlExists,_ as checkUrlExists,D as htmlFormat,K as htmlReadability,W as htmlReadabilityPlugin,Y as htmlReadabilityPlugins,z as jsonParser,M as markdownFormat,R as mdast,X as restoreReadabilityMetaPlugin};
package/docs/README.md CHANGED
@@ -106,12 +106,16 @@ const vfile = await mdast(myInput)
106
106
  .data({ myGlobal: 'value' })
107
107
  // Add multiple plugins as an array at the 'compile' stage
108
108
  .use([pluginA, pluginB])
109
- // Or add a set of plugins at a specific stage
110
- .useAt('parse', htmlReadabilityPlugins)
109
+ // Or add a set of plugins at a specific stage with options
110
+ .useAt('parse', htmlReadabilityPlugins, {
111
+ url: 'https://example.com/article',
112
+ frontmatter: true, // Inject metadata as YAML frontmatter
113
+ sourceLink: true // Append source link at the bottom
114
+ })
111
115
  .priority(10) // Run later than default plugins
112
- .to('html');
116
+ .to('markdown');
113
117
 
114
- console.log(vfile.value); // The serialized HTML string
118
+ console.log(vfile.value); // The serialized Markdown with frontmatter
115
119
  ```
116
120
 
117
121
  ### Plugin Behavior
@@ -185,7 +189,19 @@ Each stage can have one "main" plugin. If a plugin is marked with `main: true`,
185
189
  | `extract-code-meta` | normalize | Parses `title="foo"` from code block meta. |
186
190
  | `image-size` | normalize | Parses `#=WxH` from image URLs. |
187
191
  | `normalize-inline-styles` | normalize | Standardizes `==mark==`, `~sub~`, and `^sup^`. |
188
- | `html-readability` | parse | Uses Mozilla's Readability to extract main content from HTML. Use `htmlReadabilityPlugins` array for easier setup. |
192
+ | `html-readability` | parse | Uses Mozilla's Readability to extract main content from HTML. Supports `frontmatter` injection, `sourceLink` (source link) footer, and `smartExcerpt` for intelligent summary management. Use `htmlReadabilityPlugins` array for easier setup. |
193
+
194
+ ### html-readability Options
195
+
196
+ - `url`: (string) The URL of the HTML document.
197
+ - `frontmatter`: (boolean | 'yaml' | 'toml') Whether to inject metadata as frontmatter. Default: `false`.
198
+ - `sourceLink`: (boolean) Whether to append source link at the bottom. The link will be generated based on the original title/URL even if they are filtered or renamed in the `fields` option. Default: `false`.
199
+ - `fields`: (string[] | object) Control which metadata fields are kept or how they are renamed.
200
+ - If an array: acts as an allowlist (e.g., `['title', 'excerpt']`).
201
+ - If an object: maps original keys to new names (e.g., `{ title: 'headline' }`). Only keys in the map are kept (Projection).
202
+ - `smartExcerpt`: (boolean | object) Whether to remove the excerpt if it is a duplicate or near-duplicate of the main content. Default: `true`.
203
+ - `threshold`: (number) The ratio of excerpt length to content length (0.0 to 1.0). Default: `0.6`.
204
+ - `minContentLength`: (number) Minimum length of the main content required to keep the excerpt. Default: `300`.
189
205
 
190
206
  ## Contributing
191
207
 
@@ -145,6 +145,16 @@ const myPlugin = function(options) {
145
145
  const myPlugin = function(options, context) { ... }
146
146
  ```
147
147
 
148
+ #### 4. Implementation Considerations: Tree Wrapping
149
+
150
+ When a plugin needs to wrap the AST tree (e.g., adding `<html>`/`<body>` tags or appending footers), follow this pattern:
151
+
152
+ 1. **Prepare Content First**: Collect all content nodes (main content, footers, etc.) into an array.
153
+ 2. **Check Existence**: Always check if a node (like a source link) already exists before appending to avoid duplicates.
154
+ 3. **Wrap Once**: Perform the final wrapping logic (e.g., based on `isFragment` option) using the prepared array.
155
+
156
+ This approach ensures robustness across different output configurations and avoids redundant logic.
157
+
148
158
  ### Main Plugins
149
159
 
150
160
  Each stage supports a **single** main plugin. When a plugin is marked with `main: true`, it will replace the first plugin that was originally registered for that stage. This is primarily used by formats to allow users to override the default parser or stringifier by injecting a different one at the same stage.
@@ -102,12 +102,16 @@ const vfile = await mdast(myInput)
102
102
  .data({ myGlobal: 'value' })
103
103
  // 以数组形式在 'compile' 阶段添加多个插件
104
104
  .use([pluginA, pluginB])
105
- // 或在特定阶段添加一组插件
106
- .useAt('parse', htmlReadabilityPlugins)
105
+ // 或在特定阶段添加一组插件并配置选项
106
+ .useAt('parse', htmlReadabilityPlugins, {
107
+ url: 'https://example.com/article',
108
+ frontmatter: true, // 将元数据注入为 YAML Frontmatter
109
+ sourceLink: true // 在底部添加原文链接
110
+ })
107
111
  .priority(10) // 比默认插件更晚执行
108
- .to('html');
112
+ .to('markdown');
109
113
 
110
- console.log(vfile.value); // 序列化后的 HTML 字符串
114
+ console.log(vfile.value); // 包含 Frontmatter 的序列化 Markdown 字符串
111
115
  ```
112
116
 
113
117
  ### 插件行为
@@ -181,7 +185,19 @@ const result = await mdast('Hello').to('reverse');
181
185
  | `extract-code-meta` | normalize | 从代码块元数据中解析 `title="foo"`。 |
182
186
  | `image-size` | normalize | 从图片 URL 中解析 `#=WxH`。 |
183
187
  | `normalize-inline-styles` | normalize | 标准化 `==mark==`、`~sub~` 和 `^sup^`。 |
184
- | `html-readability` | parse | 使用 Mozilla 的 Readability 从 HTML 中提取主体内容。使用 `htmlReadabilityPlugins` 数组可以简化配置。 |
188
+ | `html-readability` | parse | 使用 Mozilla 的 Readability 从 HTML 中提取主体内容。支持 `frontmatter` 注入、`sourceLink` (原文链接) 页脚以及 `smartExcerpt` 智能摘要管理。使用 `htmlReadabilityPlugins` 数组可以简化配置。 |
189
+
190
+ ### html-readability 选项
191
+
192
+ - `url`: (string) HTML 文档的 URL。
193
+ - `frontmatter`: (boolean | 'yaml' | 'toml') 是否将元数据注入为 Frontmatter。默认值: `false`。
194
+ - `sourceLink`: (boolean) 是否在底部添加原文链接。即使在 `fields` 选项中过滤或重命名了 title/url 字段,该链接仍将根据原始信息正确生成。默认值: `false`。
195
+ - `fields`: (string[] | object) 控制保留哪些元数据字段或如何重命名它们。
196
+ - 如果是数组:作为白名单(例如 `['title', 'excerpt']`)。
197
+ - 如果是对象:将原始键映射到新名称(例如 `{ title: 'headline' }`)。只有映射中存在的键才会被保留(投影)。
198
+ - `smartExcerpt`: (boolean | object) 是否在摘要与正文内容重复或近乎重复时移除摘要。默认值: `true`。
199
+ - `threshold`: (number) 摘要长度与正文长度的比率阈值 (0.0 到 1.0)。默认值: `0.6`。
200
+ - `minContentLength`: (number) 保留摘要所需的正文最小长度。默认值: `300`。
185
201
 
186
202
  ## 贡献
187
203