@aiello/wechat-to-markdown 1.2.15 → 1.2.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -117,7 +117,7 @@ function getTurnDownService(params) {
117
117
  }).addRule("getImage", {
118
118
  filter: ["img"],
119
119
  replacement(content, node) {
120
- const src = node.getAttribute("data-src") || "";
120
+ const src = node.getAttribute("data-src") || node.getAttribute("src") || "";
121
121
  return src ? `
122
122
 
123
123
  ![](${src})
@@ -144,12 +144,6 @@ function getTurnDownService(params) {
144
144
  }).addRule("lineBreaks", {
145
145
  filter: "br",
146
146
  replacement: () => "\n"
147
- }).addRule("img2Code", {
148
- filter: ["figure"],
149
- replacement(content, node) {
150
- const res = figure2markdown(node.innerHTML);
151
- return res || "";
152
- }
153
147
  });
154
148
  return turndownService;
155
149
  }
@@ -170,7 +164,15 @@ async function parseWeChatPage(htmlRaw, meta) {
170
164
  const htmlEl = $("#js_content");
171
165
  const html = htmlEl.html();
172
166
  if (html == null ? void 0 : html.length) {
173
- let res = getTurnDownService(meta).turndown(html);
167
+ const service = getTurnDownService(meta);
168
+ service.addRule("img2Code", {
169
+ filter: ["figure"],
170
+ replacement(content, node) {
171
+ const res2 = figure2markdown(node.innerHTML);
172
+ return res2 || "";
173
+ }
174
+ });
175
+ let res = service.turndown(html);
174
176
  res = `## ${title}
175
177
 
176
178
  ## \u4F5C\u8005 ${author}
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/index.ts","../src/error.ts","../src/type.ts","../src/parsers/wechat.ts","../src/turndownCode.ts","../src/formatHtml.ts","../src/parsers/general.ts"],"sourcesContent":["import axios, { AxiosRequestConfig } from 'axios'\nimport { errObj } from './error'\nimport type { TurnDownResult } from './type'\nimport { Status } from './type'\nimport { parseWeChatPage } from './parsers/wechat'\nimport { parseGeneralHTML } from './parsers/general'\n\nconst getError = (code: number) => {\n return {\n code,\n success: false,\n msg: errObj[code],\n }\n}\n\nexport { TurnDownResult, Status }\n\nexport function isWechatPage(html: string) {\n return html?.includes('res.wx.qq.com')\n}\n\nexport async function parseHTML(\n html: string,\n meta: { url: string }\n): Promise<TurnDownResult> {\n let result: TurnDownResult | null = null\n\n if (isWechatPage(html)) {\n result = await parseWeChatPage(html, meta)\n }\n if (!result) {\n // 兜底处理\n result = await parseGeneralHTML(html, meta)\n }\n\n if (result) {\n return result\n }\n\n return getError(Status.Fail)\n}\n\nexport async function transformHtml2Markdown(\n html: string,\n /**\n * 这里的 url 是原始的 url,主要是用来映射内部跳转链接\n */\n url: string\n): Promise<TurnDownResult> {\n try {\n return parseHTML(html, { url })\n } catch (err) {\n console.log(err)\n return getError(Status.Fail)\n }\n}\n\n/**\n * 支持添加代理服务器\n */\ninterface TransformHtml2MarkdownOptions {\n axiosConfig?: AxiosRequestConfig\n}\n\nexport async function transformUrl2Markdown(\n url: string,\n options: TransformHtml2MarkdownOptions = {}\n): Promise<TurnDownResult> {\n const { axiosConfig = {} } = options\n const { headers = {}, ...restConfig } = axiosConfig\n\n const u = new URL(url)\n // 移除该参数\n // 避免出现 302 跳转\n u.searchParams.delete('poc_token')\n\n try {\n const res = await axios.get(u.href, {\n timeout: 30000,\n maxRedirects: 5,\n headers: {\n DNT: '1',\n 'Upgrade-Insecure-Requests': '1',\n 'User-Agent':\n 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36',\n ...headers,\n },\n ...restConfig,\n })\n\n return transformHtml2Markdown(res.data, url)\n } catch (err) {\n console.log(err)\n return getError(Status.Fail)\n }\n}\n\nexport { getTurnDownService } from './turndownCode'\n","export const errObj: {\n [key: number]: string\n} = {\n '400': '内容解析失败',\n}\n","export interface TurnDownResult {\n success: boolean\n code: number\n data?: {\n title?: string\n author?: string\n content?: string\n }\n msg?: string\n}\n\nexport const enum Status {\n Success = 200,\n Fail = 400,\n}\n","import { load } from 'cheerio'\nimport { Status } from '../type'\nimport { getTurnDownService } from '../turndownCode'\n\nexport async function parseWeChatPage(htmlRaw: string, meta: { url: string }) {\n const $ = load(htmlRaw)\n\n const title = ($('#activity-name').text() || '').trim()\n const author = Array.from(\n new Set(\n [\n $('meta[name=\"author\"]')?.attr('content'),\n ...$('#js_name').text().split('\\n'),\n ]\n .map((item) => (item ? item.trim() : ''))\n .filter(Boolean)\n )\n ).join('\\n')\n\n const htmlEl = $('#js_content')\n const html = htmlEl.html()\n\n if (html?.length) {\n let res = getTurnDownService(meta).turndown(html)\n\n res = `## ${title} \\n \\n` + `## 作者 ${author} \\n \\n` + res\n\n return {\n success: true,\n code: Status.Success,\n data: {\n title,\n author,\n content: res,\n },\n }\n }\n\n return null\n}\n","/**\n * html 转换 markdown 格式\n */\nimport turnDownService from 'turndown'\nimport TurndownPluginGfm from '@guyplusplus/turndown-plugin-gfm'\nimport { formatCode, figure2markdown } from './formatHtml'\n\ninterface Params {\n url: string\n}\n\nfunction getTurnDownService(params: Params) {\n const turndownService = new turnDownService({\n codeBlockStyle: 'fenced',\n hr: '',\n })\n\n TurndownPluginGfm.gfm(turndownService)\n\n let videoCounter = 0\n\n // 自定义配置\n turndownService\n .addRule('pre2Code', {\n filter: ['pre'],\n replacement(content, node: any) {\n const len = content.length\n // 微信文章获取到的 content, 会出现首尾都有 '`'\n const isCode = content[0] === '`' && content[len - 1] === '`'\n\n let pre_Markdown = ''\n\n if (isCode) {\n pre_Markdown = formatCode(node.innerHTML)\n }\n\n const res = isCode ? pre_Markdown : content\n\n return '```\\n' + res + '\\n```\\n'\n },\n })\n .addRule('getImage', {\n filter: ['img'],\n replacement(content, node: any) {\n const src = node.getAttribute('data-src') || ''\n\n return src ? `\\n\\n![](${src}) \\n\\n` : ''\n },\n })\n .addRule('video', {\n filter: (node: HTMLElement) => {\n return (\n node.tagName.toLowerCase() === 'iframe' &&\n node.className.includes('video_iframe')\n )\n },\n replacement(content, _node: Node) {\n const node = _node as HTMLIFrameElement\n\n const cover = decodeURIComponent(\n node.getAttribute('data-cover') || ''\n )\n\n const u = new URL(params.url)\n u.hash = `js_mp_video_container_${videoCounter++}`\n\n return cover ? `\\n\\n[![](${cover})](${u.href}) \\n\\n` : ''\n },\n })\n .addRule('lineBreaks', {\n filter: 'br',\n replacement: () => '\\n',\n })\n .addRule('img2Code', {\n filter: ['figure'],\n replacement(content, node: any) {\n const res = figure2markdown(node.innerHTML)\n return res || ''\n },\n })\n\n return turndownService\n}\n\nexport { getTurnDownService }\n","import cheerio from 'cheerio'\n\n/**\n * 微信不同代码风格\n * 1. <code><span>code</span></code>\n * 2. <code><span><span>123</span><br></span></code>\n * turndown 不解析 code 下的 br 标签,需要使用正则替换 br 标签为 \\n 才可以继续解析\n * @param htmlStr\n * @returns\n */\n\nexport function formatCode(htmlStr: string) {\n let code = htmlStr\n\n code = code.replace(/<br>/gi, '\\n')\n\n code = code.replace(/&nbsp;/gi, ' ')\n\n code = code.replace(/&lt;/gi, '<')\n\n code = code.replace(/&gt;/gi, '>')\n\n code = code.replace(/&amp;/gi, '&')\n\n code = code.replace(/&quot;/gi, '\"')\n\n code = code.replace(/&apos;/gi, '‘')\n\n code = code.replace(/&times;/gi, '*')\n\n code = code.replace(/&divide;/gi, '%')\n\n const $ = cheerio.load(code)\n\n return $.text()\n}\n\n/**\n * 解决如下格式\n * <figcaption><img><figcaption></figcaption></figcaption>\n * @param figureHTML\n * @returns\n */\nexport function figure2markdown(figureHTML: string) {\n const imgRegex = /<img.*?data-src=['\"](.*?)['\"]/\n\n const descRegex = /\\<figcaption .*?>(.+)<\\/figcaption>/\n\n const imgArr = figureHTML.match(imgRegex)\n\n const descArr = figureHTML.match(descRegex)\n\n let imgUrl = ''\n\n let desc = ''\n\n if (Array.isArray(imgArr)) {\n imgUrl = imgArr[1]\n }\n\n if (Array.isArray(descArr)) {\n desc = descArr[1]\n }\n\n // img 可能没有图片说明\n if (imgUrl) {\n return `\\n\\n ![${desc}](${imgUrl}) \\n\\n`\n }\n\n return\n}\n","import { load } from 'cheerio'\nimport { Status } from '../type'\nimport { getTurnDownService } from '../turndownCode'\n\nexport async function parseGeneralHTML(htmlRaw: string, meta: { url: string }) {\n const $ = load(htmlRaw)\n $('script').remove()\n $('[style]')\n .filter((_, el) =>\n /display\\s*:\\s*none/i.test($(el).attr('style') || '')\n )\n .remove()\n\n const title = ($('title').text() || '').trim()\n const author = ($('meta[name=\"author\"]')?.attr('content') || '').trim()\n\n const html = $('body').html()\n if (html?.length) {\n let res = getTurnDownService(meta).turndown(html)\n\n res = `## ${title} \\n \\n` + `## 作者 ${author} \\n \\n` + res\n\n return {\n success: true,\n code: Status.Success,\n data: {\n title,\n author,\n content: res,\n },\n }\n }\n\n return null\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,mBAA0C;;;ACAnC,IAAM,SAET;AAAA,EACA,OAAO;AACX;;;ACOO,IAAW,SAAX,kBAAWA,YAAX;AACH,EAAAA,gBAAA,aAAU,OAAV;AACA,EAAAA,gBAAA,UAAO,OAAP;AAFc,SAAAA;AAAA,GAAA;;;ACXlB,IAAAC,kBAAqB;;;ACGrB,sBAA4B;AAC5B,iCAA8B;;;ACJ9B,qBAAoB;AAWb,SAAS,WAAW,SAAiB;AACxC,MAAI,OAAO;AAEX,SAAO,KAAK,QAAQ,UAAU,IAAI;AAElC,SAAO,KAAK,QAAQ,YAAY,GAAG;AAEnC,SAAO,KAAK,QAAQ,UAAU,GAAG;AAEjC,SAAO,KAAK,QAAQ,UAAU,GAAG;AAEjC,SAAO,KAAK,QAAQ,WAAW,GAAG;AAElC,SAAO,KAAK,QAAQ,YAAY,GAAG;AAEnC,SAAO,KAAK,QAAQ,YAAY,QAAG;AAEnC,SAAO,KAAK,QAAQ,aAAa,GAAG;AAEpC,SAAO,KAAK,QAAQ,cAAc,GAAG;AAErC,QAAM,IAAI,eAAAC,QAAQ,KAAK,IAAI;AAE3B,SAAO,EAAE,KAAK;AAClB;AAQO,SAAS,gBAAgB,YAAoB;AAChD,QAAM,WAAW;AAEjB,QAAM,YAAY;AAElB,QAAM,SAAS,WAAW,MAAM,QAAQ;AAExC,QAAM,UAAU,WAAW,MAAM,SAAS;AAE1C,MAAI,SAAS;AAEb,MAAI,OAAO;AAEX,MAAI,MAAM,QAAQ,MAAM,GAAG;AACvB,aAAS,OAAO;AAAA,EACpB;AAEA,MAAI,MAAM,QAAQ,OAAO,GAAG;AACxB,WAAO,QAAQ;AAAA,EACnB;AAGA,MAAI,QAAQ;AACR,WAAO;AAAA;AAAA,KAAU,SAAS;AAAA;AAAA;AAAA,EAC9B;AAEA;AACJ;;;AD3DA,SAAS,mBAAmB,QAAgB;AACxC,QAAM,kBAAkB,IAAI,gBAAAC,QAAgB;AAAA,IACxC,gBAAgB;AAAA,IAChB,IAAI;AAAA,EACR,CAAC;AAED,6BAAAC,QAAkB,IAAI,eAAe;AAErC,MAAI,eAAe;AAGnB,kBACK,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC,KAAK;AAAA,IACd,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,QAAQ;AAEpB,YAAM,SAAS,QAAQ,OAAO,OAAO,QAAQ,MAAM,OAAO;AAE1D,UAAI,eAAe;AAEnB,UAAI,QAAQ;AACR,uBAAe,WAAW,KAAK,SAAS;AAAA,MAC5C;AAEA,YAAM,MAAM,SAAS,eAAe;AAEpC,aAAO,UAAU,MAAM;AAAA,IAC3B;AAAA,EACJ,CAAC,EACA,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC,KAAK;AAAA,IACd,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,KAAK,aAAa,UAAU,KAAK;AAE7C,aAAO,MAAM;AAAA;AAAA,MAAW;AAAA;AAAA,IAAc;AAAA,IAC1C;AAAA,EACJ,CAAC,EACA,QAAQ,SAAS;AAAA,IACd,QAAQ,CAAC,SAAsB;AAC3B,aACI,KAAK,QAAQ,YAAY,MAAM,YAC/B,KAAK,UAAU,SAAS,cAAc;AAAA,IAE9C;AAAA,IACA,YAAY,SAAS,OAAa;AAC9B,YAAM,OAAO;AAEb,YAAM,QAAQ;AAAA,QACV,KAAK,aAAa,YAAY,KAAK;AAAA,MACvC;AAEA,YAAM,IAAI,IAAI,IAAI,OAAO,GAAG;AAC5B,QAAE,OAAO,yBAAyB;AAElC,aAAO,QAAQ;AAAA;AAAA,OAAY,WAAW,EAAE;AAAA;AAAA,IAAe;AAAA,IAC3D;AAAA,EACJ,CAAC,EACA,QAAQ,cAAc;AAAA,IACnB,QAAQ;AAAA,IACR,aAAa,MAAM;AAAA,EACvB,CAAC,EACA,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC,QAAQ;AAAA,IACjB,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,gBAAgB,KAAK,SAAS;AAC1C,aAAO,OAAO;AAAA,IAClB;AAAA,EACJ,CAAC;AAEL,SAAO;AACX;;;AD9EA,eAAsB,gBAAgB,SAAiB,MAAuB;AAJ9E;AAKI,QAAM,QAAI,sBAAK,OAAO;AAEtB,QAAM,SAAS,EAAE,gBAAgB,EAAE,KAAK,KAAK,IAAI,KAAK;AACtD,QAAM,SAAS,MAAM;AAAA,IACjB,IAAI;AAAA,MACA;AAAA,SACI,OAAE,qBAAqB,MAAvB,mBAA0B,KAAK;AAAA,QAC/B,GAAG,EAAE,UAAU,EAAE,KAAK,EAAE,MAAM,IAAI;AAAA,MACtC,EACK,IAAI,CAAC,SAAU,OAAO,KAAK,KAAK,IAAI,EAAG,EACvC,OAAO,OAAO;AAAA,IACvB;AAAA,EACJ,EAAE,KAAK,IAAI;AAEX,QAAM,SAAS,EAAE,aAAa;AAC9B,QAAM,OAAO,OAAO,KAAK;AAEzB,MAAI,6BAAM,QAAQ;AACd,QAAI,MAAM,mBAAmB,IAAI,EAAE,SAAS,IAAI;AAEhD,UAAM,MAAM;AAAA;AAAA,kBAAyB;AAAA;AAAA,IAAiB;AAEtD,WAAO;AAAA,MACH,SAAS;AAAA,MACT;AAAA,MACA,MAAM;AAAA,QACF;AAAA,QACA;AAAA,QACA,SAAS;AAAA,MACb;AAAA,IACJ;AAAA,EACJ;AAEA,SAAO;AACX;;;AGvCA,IAAAC,kBAAqB;AAIrB,eAAsB,iBAAiB,SAAiB,MAAuB;AAJ/E;AAKI,QAAM,QAAI,sBAAK,OAAO;AACtB,IAAE,QAAQ,EAAE,OAAO;AACnB,IAAE,SAAS,EACN;AAAA,IAAO,CAAC,GAAG,OACR,sBAAsB,KAAK,EAAE,EAAE,EAAE,KAAK,OAAO,KAAK,EAAE;AAAA,EACxD,EACC,OAAO;AAEZ,QAAM,SAAS,EAAE,OAAO,EAAE,KAAK,KAAK,IAAI,KAAK;AAC7C,QAAM,YAAU,OAAE,qBAAqB,MAAvB,mBAA0B,KAAK,eAAc,IAAI,KAAK;AAEtE,QAAM,OAAO,EAAE,MAAM,EAAE,KAAK;AAC5B,MAAI,6BAAM,QAAQ;AACd,QAAI,MAAM,mBAAmB,IAAI,EAAE,SAAS,IAAI;AAEhD,UAAM,MAAM;AAAA;AAAA,kBAAyB;AAAA;AAAA,IAAiB;AAEtD,WAAO;AAAA,MACH,SAAS;AAAA,MACT;AAAA,MACA,MAAM;AAAA,QACF;AAAA,QACA;AAAA,QACA,SAAS;AAAA,MACb;AAAA,IACJ;AAAA,EACJ;AAEA,SAAO;AACX;;;AN3BA,IAAM,WAAW,CAAC,SAAiB;AAC/B,SAAO;AAAA,IACH;AAAA,IACA,SAAS;AAAA,IACT,KAAK,OAAO;AAAA,EAChB;AACJ;AAIO,SAAS,aAAa,MAAc;AACvC,SAAO,6BAAM,SAAS;AAC1B;AAEA,eAAsB,UAClB,MACA,MACuB;AACvB,MAAI,SAAgC;AAEpC,MAAI,aAAa,IAAI,GAAG;AACpB,aAAS,MAAM,gBAAgB,MAAM,IAAI;AAAA,EAC7C;AACA,MAAI,CAAC,QAAQ;AAET,aAAS,MAAM,iBAAiB,MAAM,IAAI;AAAA,EAC9C;AAEA,MAAI,QAAQ;AACR,WAAO;AAAA,EACX;AAEA,SAAO,uBAAoB;AAC/B;AAEA,eAAsB,uBAClB,MAIA,KACuB;AACvB,MAAI;AACA,WAAO,UAAU,MAAM,EAAE,IAAI,CAAC;AAAA,EAClC,SAAS,KAAP;AACE,YAAQ,IAAI,GAAG;AACf,WAAO,uBAAoB;AAAA,EAC/B;AACJ;AASA,eAAsB,sBAClB,KACA,UAAyC,CAAC,GACnB;AACvB,QAAM,EAAE,cAAc,CAAC,EAAE,IAAI;AAC7B,QAAM,EAAE,UAAU,CAAC,MAAM,WAAW,IAAI;AAExC,QAAM,IAAI,IAAI,IAAI,GAAG;AAGrB,IAAE,aAAa,OAAO,WAAW;AAEjC,MAAI;AACA,UAAM,MAAM,MAAM,aAAAC,QAAM,IAAI,EAAE,MAAM;AAAA,MAChC,SAAS;AAAA,MACT,cAAc;AAAA,MACd,SAAS;AAAA,QACL,KAAK;AAAA,QACL,6BAA6B;AAAA,QAC7B,cACI;AAAA,QACJ,GAAG;AAAA,MACP;AAAA,MACA,GAAG;AAAA,IACP,CAAC;AAED,WAAO,uBAAuB,IAAI,MAAM,GAAG;AAAA,EAC/C,SAAS,KAAP;AACE,YAAQ,IAAI,GAAG;AACf,WAAO,uBAAoB;AAAA,EAC/B;AACJ;","names":["Status","import_cheerio","cheerio","turnDownService","TurndownPluginGfm","import_cheerio","axios"]}
1
+ {"version":3,"sources":["../src/index.ts","../src/error.ts","../src/type.ts","../src/parsers/wechat.ts","../src/turndownCode.ts","../src/formatHtml.ts","../src/parsers/general.ts"],"sourcesContent":["import axios, { AxiosRequestConfig } from 'axios'\nimport { errObj } from './error'\nimport type { TurnDownResult } from './type'\nimport { Status } from './type'\nimport { parseWeChatPage } from './parsers/wechat'\nimport { parseGeneralHTML } from './parsers/general'\n\nconst getError = (code: number) => {\n return {\n code,\n success: false,\n msg: errObj[code],\n }\n}\n\nexport { TurnDownResult, Status }\n\nexport function isWechatPage(html: string) {\n return html?.includes('res.wx.qq.com')\n}\n\nexport async function parseHTML(\n html: string,\n meta: { url: string }\n): Promise<TurnDownResult> {\n let result: TurnDownResult | null = null\n\n if (isWechatPage(html)) {\n result = await parseWeChatPage(html, meta)\n }\n if (!result) {\n // 兜底处理\n result = await parseGeneralHTML(html, meta)\n }\n\n if (result) {\n return result\n }\n\n return getError(Status.Fail)\n}\n\nexport async function transformHtml2Markdown(\n html: string,\n /**\n * 这里的 url 是原始的 url,主要是用来映射内部跳转链接\n */\n url: string\n): Promise<TurnDownResult> {\n try {\n return parseHTML(html, { url })\n } catch (err) {\n console.log(err)\n return getError(Status.Fail)\n }\n}\n\n/**\n * 支持添加代理服务器\n */\ninterface TransformHtml2MarkdownOptions {\n axiosConfig?: AxiosRequestConfig\n}\n\nexport async function transformUrl2Markdown(\n url: string,\n options: TransformHtml2MarkdownOptions = {}\n): Promise<TurnDownResult> {\n const { axiosConfig = {} } = options\n const { headers = {}, ...restConfig } = axiosConfig\n\n const u = new URL(url)\n // 移除该参数\n // 避免出现 302 跳转\n u.searchParams.delete('poc_token')\n\n try {\n const res = await axios.get(u.href, {\n timeout: 30000,\n maxRedirects: 5,\n headers: {\n DNT: '1',\n 'Upgrade-Insecure-Requests': '1',\n 'User-Agent':\n 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36',\n ...headers,\n },\n ...restConfig,\n })\n\n return transformHtml2Markdown(res.data, url)\n } catch (err) {\n console.log(err)\n return getError(Status.Fail)\n }\n}\n\nexport { getTurnDownService } from './turndownCode'\n","export const errObj: {\n [key: number]: string\n} = {\n '400': '内容解析失败',\n}\n","export interface TurnDownResult {\n success: boolean\n code: number\n data?: {\n title?: string\n author?: string\n content?: string\n }\n msg?: string\n}\n\nexport const enum Status {\n Success = 200,\n Fail = 400,\n}\n","import { load } from 'cheerio'\nimport { Status } from '../type'\nimport { getTurnDownService } from '../turndownCode'\nimport { figure2markdown } from '../formatHtml'\n\nexport async function parseWeChatPage(htmlRaw: string, meta: { url: string }) {\n const $ = load(htmlRaw)\n\n const title = ($('#activity-name').text() || '').trim()\n const author = Array.from(\n new Set(\n [\n $('meta[name=\"author\"]')?.attr('content'),\n ...$('#js_name').text().split('\\n'),\n ]\n .map((item) => (item ? item.trim() : ''))\n .filter(Boolean)\n )\n ).join('\\n')\n\n const htmlEl = $('#js_content')\n const html = htmlEl.html()\n\n if (html?.length) {\n const service = getTurnDownService(meta)\n service.addRule('img2Code', {\n filter: ['figure'],\n replacement(content, node: any) {\n const res = figure2markdown(node.innerHTML)\n return res || ''\n },\n })\n\n let res = service.turndown(html)\n\n res = `## ${title} \\n \\n` + `## 作者 ${author} \\n \\n` + res\n\n return {\n success: true,\n code: Status.Success,\n data: {\n title,\n author,\n content: res,\n },\n }\n }\n\n return null\n}\n","/**\n * html 转换 markdown 格式\n */\nimport turnDownService from 'turndown'\nimport TurndownPluginGfm from '@guyplusplus/turndown-plugin-gfm'\nimport { formatCode } from './formatHtml'\n\ninterface Params {\n url: string\n}\n\nfunction getTurnDownService(params: Params) {\n const turndownService = new turnDownService({\n codeBlockStyle: 'fenced',\n hr: '',\n })\n\n TurndownPluginGfm.gfm(turndownService)\n\n let videoCounter = 0\n\n // 自定义配置\n turndownService\n .addRule('pre2Code', {\n filter: ['pre'],\n replacement(content, node: any) {\n const len = content.length\n // 微信文章获取到的 content, 会出现首尾都有 '`'\n const isCode = content[0] === '`' && content[len - 1] === '`'\n\n let pre_Markdown = ''\n\n if (isCode) {\n pre_Markdown = formatCode(node.innerHTML)\n }\n\n const res = isCode ? pre_Markdown : content\n\n return '```\\n' + res + '\\n```\\n'\n },\n })\n .addRule('getImage', {\n filter: ['img'],\n replacement(content, node: any) {\n // Prefer WeChat's data-src, but fall back to normal src (e.g. base64)\n const src =\n node.getAttribute('data-src') ||\n node.getAttribute('src') ||\n ''\n\n return src ? `\\n\\n![](${src}) \\n\\n` : ''\n },\n })\n .addRule('video', {\n filter: (node: HTMLElement) => {\n return (\n node.tagName.toLowerCase() === 'iframe' &&\n node.className.includes('video_iframe')\n )\n },\n replacement(content, _node: Node) {\n const node = _node as HTMLIFrameElement\n\n const cover = decodeURIComponent(\n node.getAttribute('data-cover') || ''\n )\n\n const u = new URL(params.url)\n u.hash = `js_mp_video_container_${videoCounter++}`\n\n return cover ? `\\n\\n[![](${cover})](${u.href}) \\n\\n` : ''\n },\n })\n .addRule('lineBreaks', {\n filter: 'br',\n replacement: () => '\\n',\n })\n\n return turndownService\n}\n\nexport { getTurnDownService }\n","import cheerio from 'cheerio'\n\n/**\n * 微信不同代码风格\n * 1. <code><span>code</span></code>\n * 2. <code><span><span>123</span><br></span></code>\n * turndown 不解析 code 下的 br 标签,需要使用正则替换 br 标签为 \\n 才可以继续解析\n * @param htmlStr\n * @returns\n */\n\nexport function formatCode(htmlStr: string) {\n let code = htmlStr\n\n code = code.replace(/<br>/gi, '\\n')\n\n code = code.replace(/&nbsp;/gi, ' ')\n\n code = code.replace(/&lt;/gi, '<')\n\n code = code.replace(/&gt;/gi, '>')\n\n code = code.replace(/&amp;/gi, '&')\n\n code = code.replace(/&quot;/gi, '\"')\n\n code = code.replace(/&apos;/gi, '‘')\n\n code = code.replace(/&times;/gi, '*')\n\n code = code.replace(/&divide;/gi, '%')\n\n const $ = cheerio.load(code)\n\n return $.text()\n}\n\n/**\n * 解决如下格式\n * <figcaption><img><figcaption></figcaption></figcaption>\n * @param figureHTML\n * @returns\n */\nexport function figure2markdown(figureHTML: string) {\n const imgRegex = /<img.*?data-src=['\"](.*?)['\"]/\n\n const descRegex = /\\<figcaption .*?>(.+)<\\/figcaption>/\n\n const imgArr = figureHTML.match(imgRegex)\n\n const descArr = figureHTML.match(descRegex)\n\n let imgUrl = ''\n\n let desc = ''\n\n if (Array.isArray(imgArr)) {\n imgUrl = imgArr[1]\n }\n\n if (Array.isArray(descArr)) {\n desc = descArr[1]\n }\n\n // img 可能没有图片说明\n if (imgUrl) {\n return `\\n\\n ![${desc}](${imgUrl}) \\n\\n`\n }\n\n return\n}\n","import { load } from 'cheerio'\nimport { Status } from '../type'\nimport { getTurnDownService } from '../turndownCode'\n\nexport async function parseGeneralHTML(htmlRaw: string, meta: { url: string }) {\n const $ = load(htmlRaw)\n $('script').remove()\n $('[style]')\n .filter((_, el) =>\n /display\\s*:\\s*none/i.test($(el).attr('style') || '')\n )\n .remove()\n\n const title = ($('title').text() || '').trim()\n const author = ($('meta[name=\"author\"]')?.attr('content') || '').trim()\n\n const html = $('body').html()\n if (html?.length) {\n let res = getTurnDownService(meta).turndown(html)\n\n res = `## ${title} \\n \\n` + `## 作者 ${author} \\n \\n` + res\n\n return {\n success: true,\n code: Status.Success,\n data: {\n title,\n author,\n content: res,\n },\n }\n }\n\n return null\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,mBAA0C;;;ACAnC,IAAM,SAET;AAAA,EACA,OAAO;AACX;;;ACOO,IAAW,SAAX,kBAAWA,YAAX;AACH,EAAAA,gBAAA,aAAU,OAAV;AACA,EAAAA,gBAAA,UAAO,OAAP;AAFc,SAAAA;AAAA,GAAA;;;ACXlB,IAAAC,kBAAqB;;;ACGrB,sBAA4B;AAC5B,iCAA8B;;;ACJ9B,qBAAoB;AAWb,SAAS,WAAW,SAAiB;AACxC,MAAI,OAAO;AAEX,SAAO,KAAK,QAAQ,UAAU,IAAI;AAElC,SAAO,KAAK,QAAQ,YAAY,GAAG;AAEnC,SAAO,KAAK,QAAQ,UAAU,GAAG;AAEjC,SAAO,KAAK,QAAQ,UAAU,GAAG;AAEjC,SAAO,KAAK,QAAQ,WAAW,GAAG;AAElC,SAAO,KAAK,QAAQ,YAAY,GAAG;AAEnC,SAAO,KAAK,QAAQ,YAAY,QAAG;AAEnC,SAAO,KAAK,QAAQ,aAAa,GAAG;AAEpC,SAAO,KAAK,QAAQ,cAAc,GAAG;AAErC,QAAM,IAAI,eAAAC,QAAQ,KAAK,IAAI;AAE3B,SAAO,EAAE,KAAK;AAClB;AAQO,SAAS,gBAAgB,YAAoB;AAChD,QAAM,WAAW;AAEjB,QAAM,YAAY;AAElB,QAAM,SAAS,WAAW,MAAM,QAAQ;AAExC,QAAM,UAAU,WAAW,MAAM,SAAS;AAE1C,MAAI,SAAS;AAEb,MAAI,OAAO;AAEX,MAAI,MAAM,QAAQ,MAAM,GAAG;AACvB,aAAS,OAAO;AAAA,EACpB;AAEA,MAAI,MAAM,QAAQ,OAAO,GAAG;AACxB,WAAO,QAAQ;AAAA,EACnB;AAGA,MAAI,QAAQ;AACR,WAAO;AAAA;AAAA,KAAU,SAAS;AAAA;AAAA;AAAA,EAC9B;AAEA;AACJ;;;AD3DA,SAAS,mBAAmB,QAAgB;AACxC,QAAM,kBAAkB,IAAI,gBAAAC,QAAgB;AAAA,IACxC,gBAAgB;AAAA,IAChB,IAAI;AAAA,EACR,CAAC;AAED,6BAAAC,QAAkB,IAAI,eAAe;AAErC,MAAI,eAAe;AAGnB,kBACK,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC,KAAK;AAAA,IACd,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,QAAQ;AAEpB,YAAM,SAAS,QAAQ,OAAO,OAAO,QAAQ,MAAM,OAAO;AAE1D,UAAI,eAAe;AAEnB,UAAI,QAAQ;AACR,uBAAe,WAAW,KAAK,SAAS;AAAA,MAC5C;AAEA,YAAM,MAAM,SAAS,eAAe;AAEpC,aAAO,UAAU,MAAM;AAAA,IAC3B;AAAA,EACJ,CAAC,EACA,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC,KAAK;AAAA,IACd,YAAY,SAAS,MAAW;AAE5B,YAAM,MACF,KAAK,aAAa,UAAU,KAC5B,KAAK,aAAa,KAAK,KACvB;AAEJ,aAAO,MAAM;AAAA;AAAA,MAAW;AAAA;AAAA,IAAc;AAAA,IAC1C;AAAA,EACJ,CAAC,EACA,QAAQ,SAAS;AAAA,IACd,QAAQ,CAAC,SAAsB;AAC3B,aACI,KAAK,QAAQ,YAAY,MAAM,YAC/B,KAAK,UAAU,SAAS,cAAc;AAAA,IAE9C;AAAA,IACA,YAAY,SAAS,OAAa;AAC9B,YAAM,OAAO;AAEb,YAAM,QAAQ;AAAA,QACV,KAAK,aAAa,YAAY,KAAK;AAAA,MACvC;AAEA,YAAM,IAAI,IAAI,IAAI,OAAO,GAAG;AAC5B,QAAE,OAAO,yBAAyB;AAElC,aAAO,QAAQ;AAAA;AAAA,OAAY,WAAW,EAAE;AAAA;AAAA,IAAe;AAAA,IAC3D;AAAA,EACJ,CAAC,EACA,QAAQ,cAAc;AAAA,IACnB,QAAQ;AAAA,IACR,aAAa,MAAM;AAAA,EACvB,CAAC;AAEL,SAAO;AACX;;;AD1EA,eAAsB,gBAAgB,SAAiB,MAAuB;AAL9E;AAMI,QAAM,QAAI,sBAAK,OAAO;AAEtB,QAAM,SAAS,EAAE,gBAAgB,EAAE,KAAK,KAAK,IAAI,KAAK;AACtD,QAAM,SAAS,MAAM;AAAA,IACjB,IAAI;AAAA,MACA;AAAA,SACI,OAAE,qBAAqB,MAAvB,mBAA0B,KAAK;AAAA,QAC/B,GAAG,EAAE,UAAU,EAAE,KAAK,EAAE,MAAM,IAAI;AAAA,MACtC,EACK,IAAI,CAAC,SAAU,OAAO,KAAK,KAAK,IAAI,EAAG,EACvC,OAAO,OAAO;AAAA,IACvB;AAAA,EACJ,EAAE,KAAK,IAAI;AAEX,QAAM,SAAS,EAAE,aAAa;AAC9B,QAAM,OAAO,OAAO,KAAK;AAEzB,MAAI,6BAAM,QAAQ;AACd,UAAM,UAAU,mBAAmB,IAAI;AACvC,YAAQ,QAAQ,YAAY;AAAA,MACxB,QAAQ,CAAC,QAAQ;AAAA,MACjB,YAAY,SAAS,MAAW;AAC5B,cAAMC,OAAM,gBAAgB,KAAK,SAAS;AAC1C,eAAOA,QAAO;AAAA,MAClB;AAAA,IACJ,CAAC;AAED,QAAI,MAAM,QAAQ,SAAS,IAAI;AAE/B,UAAM,MAAM;AAAA;AAAA,kBAAyB;AAAA;AAAA,IAAiB;AAEtD,WAAO;AAAA,MACH,SAAS;AAAA,MACT;AAAA,MACA,MAAM;AAAA,QACF;AAAA,QACA;AAAA,QACA,SAAS;AAAA,MACb;AAAA,IACJ;AAAA,EACJ;AAEA,SAAO;AACX;;;AGjDA,IAAAC,kBAAqB;AAIrB,eAAsB,iBAAiB,SAAiB,MAAuB;AAJ/E;AAKI,QAAM,QAAI,sBAAK,OAAO;AACtB,IAAE,QAAQ,EAAE,OAAO;AACnB,IAAE,SAAS,EACN;AAAA,IAAO,CAAC,GAAG,OACR,sBAAsB,KAAK,EAAE,EAAE,EAAE,KAAK,OAAO,KAAK,EAAE;AAAA,EACxD,EACC,OAAO;AAEZ,QAAM,SAAS,EAAE,OAAO,EAAE,KAAK,KAAK,IAAI,KAAK;AAC7C,QAAM,YAAU,OAAE,qBAAqB,MAAvB,mBAA0B,KAAK,eAAc,IAAI,KAAK;AAEtE,QAAM,OAAO,EAAE,MAAM,EAAE,KAAK;AAC5B,MAAI,6BAAM,QAAQ;AACd,QAAI,MAAM,mBAAmB,IAAI,EAAE,SAAS,IAAI;AAEhD,UAAM,MAAM;AAAA;AAAA,kBAAyB;AAAA;AAAA,IAAiB;AAEtD,WAAO;AAAA,MACH,SAAS;AAAA,MACT;AAAA,MACA,MAAM;AAAA,QACF;AAAA,QACA;AAAA,QACA,SAAS;AAAA,MACb;AAAA,IACJ;AAAA,EACJ;AAEA,SAAO;AACX;;;AN3BA,IAAM,WAAW,CAAC,SAAiB;AAC/B,SAAO;AAAA,IACH;AAAA,IACA,SAAS;AAAA,IACT,KAAK,OAAO;AAAA,EAChB;AACJ;AAIO,SAAS,aAAa,MAAc;AACvC,SAAO,6BAAM,SAAS;AAC1B;AAEA,eAAsB,UAClB,MACA,MACuB;AACvB,MAAI,SAAgC;AAEpC,MAAI,aAAa,IAAI,GAAG;AACpB,aAAS,MAAM,gBAAgB,MAAM,IAAI;AAAA,EAC7C;AACA,MAAI,CAAC,QAAQ;AAET,aAAS,MAAM,iBAAiB,MAAM,IAAI;AAAA,EAC9C;AAEA,MAAI,QAAQ;AACR,WAAO;AAAA,EACX;AAEA,SAAO,uBAAoB;AAC/B;AAEA,eAAsB,uBAClB,MAIA,KACuB;AACvB,MAAI;AACA,WAAO,UAAU,MAAM,EAAE,IAAI,CAAC;AAAA,EAClC,SAAS,KAAP;AACE,YAAQ,IAAI,GAAG;AACf,WAAO,uBAAoB;AAAA,EAC/B;AACJ;AASA,eAAsB,sBAClB,KACA,UAAyC,CAAC,GACnB;AACvB,QAAM,EAAE,cAAc,CAAC,EAAE,IAAI;AAC7B,QAAM,EAAE,UAAU,CAAC,MAAM,WAAW,IAAI;AAExC,QAAM,IAAI,IAAI,IAAI,GAAG;AAGrB,IAAE,aAAa,OAAO,WAAW;AAEjC,MAAI;AACA,UAAM,MAAM,MAAM,aAAAC,QAAM,IAAI,EAAE,MAAM;AAAA,MAChC,SAAS;AAAA,MACT,cAAc;AAAA,MACd,SAAS;AAAA,QACL,KAAK;AAAA,QACL,6BAA6B;AAAA,QAC7B,cACI;AAAA,QACJ,GAAG;AAAA,MACP;AAAA,MACA,GAAG;AAAA,IACP,CAAC;AAED,WAAO,uBAAuB,IAAI,MAAM,GAAG;AAAA,EAC/C,SAAS,KAAP;AACE,YAAQ,IAAI,GAAG;AACf,WAAO,uBAAoB;AAAA,EAC/B;AACJ;","names":["Status","import_cheerio","cheerio","turnDownService","TurndownPluginGfm","res","import_cheerio","axios"]}
package/dist/index.js CHANGED
@@ -82,7 +82,7 @@ function getTurnDownService(params) {
82
82
  }).addRule("getImage", {
83
83
  filter: ["img"],
84
84
  replacement(content, node) {
85
- const src = node.getAttribute("data-src") || "";
85
+ const src = node.getAttribute("data-src") || node.getAttribute("src") || "";
86
86
  return src ? `
87
87
 
88
88
  ![](${src})
@@ -109,12 +109,6 @@ function getTurnDownService(params) {
109
109
  }).addRule("lineBreaks", {
110
110
  filter: "br",
111
111
  replacement: () => "\n"
112
- }).addRule("img2Code", {
113
- filter: ["figure"],
114
- replacement(content, node) {
115
- const res = figure2markdown(node.innerHTML);
116
- return res || "";
117
- }
118
112
  });
119
113
  return turndownService;
120
114
  }
@@ -135,7 +129,15 @@ async function parseWeChatPage(htmlRaw, meta) {
135
129
  const htmlEl = $("#js_content");
136
130
  const html = htmlEl.html();
137
131
  if (html == null ? void 0 : html.length) {
138
- let res = getTurnDownService(meta).turndown(html);
132
+ const service = getTurnDownService(meta);
133
+ service.addRule("img2Code", {
134
+ filter: ["figure"],
135
+ replacement(content, node) {
136
+ const res2 = figure2markdown(node.innerHTML);
137
+ return res2 || "";
138
+ }
139
+ });
140
+ let res = service.turndown(html);
139
141
  res = `## ${title}
140
142
 
141
143
  ## \u4F5C\u8005 ${author}
package/dist/index.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/index.ts","../src/error.ts","../src/type.ts","../src/parsers/wechat.ts","../src/turndownCode.ts","../src/formatHtml.ts","../src/parsers/general.ts"],"sourcesContent":["import axios, { AxiosRequestConfig } from 'axios'\nimport { errObj } from './error'\nimport type { TurnDownResult } from './type'\nimport { Status } from './type'\nimport { parseWeChatPage } from './parsers/wechat'\nimport { parseGeneralHTML } from './parsers/general'\n\nconst getError = (code: number) => {\n return {\n code,\n success: false,\n msg: errObj[code],\n }\n}\n\nexport { TurnDownResult, Status }\n\nexport function isWechatPage(html: string) {\n return html?.includes('res.wx.qq.com')\n}\n\nexport async function parseHTML(\n html: string,\n meta: { url: string }\n): Promise<TurnDownResult> {\n let result: TurnDownResult | null = null\n\n if (isWechatPage(html)) {\n result = await parseWeChatPage(html, meta)\n }\n if (!result) {\n // 兜底处理\n result = await parseGeneralHTML(html, meta)\n }\n\n if (result) {\n return result\n }\n\n return getError(Status.Fail)\n}\n\nexport async function transformHtml2Markdown(\n html: string,\n /**\n * 这里的 url 是原始的 url,主要是用来映射内部跳转链接\n */\n url: string\n): Promise<TurnDownResult> {\n try {\n return parseHTML(html, { url })\n } catch (err) {\n console.log(err)\n return getError(Status.Fail)\n }\n}\n\n/**\n * 支持添加代理服务器\n */\ninterface TransformHtml2MarkdownOptions {\n axiosConfig?: AxiosRequestConfig\n}\n\nexport async function transformUrl2Markdown(\n url: string,\n options: TransformHtml2MarkdownOptions = {}\n): Promise<TurnDownResult> {\n const { axiosConfig = {} } = options\n const { headers = {}, ...restConfig } = axiosConfig\n\n const u = new URL(url)\n // 移除该参数\n // 避免出现 302 跳转\n u.searchParams.delete('poc_token')\n\n try {\n const res = await axios.get(u.href, {\n timeout: 30000,\n maxRedirects: 5,\n headers: {\n DNT: '1',\n 'Upgrade-Insecure-Requests': '1',\n 'User-Agent':\n 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36',\n ...headers,\n },\n ...restConfig,\n })\n\n return transformHtml2Markdown(res.data, url)\n } catch (err) {\n console.log(err)\n return getError(Status.Fail)\n }\n}\n\nexport { getTurnDownService } from './turndownCode'\n","export const errObj: {\n [key: number]: string\n} = {\n '400': '内容解析失败',\n}\n","export interface TurnDownResult {\n success: boolean\n code: number\n data?: {\n title?: string\n author?: string\n content?: string\n }\n msg?: string\n}\n\nexport const enum Status {\n Success = 200,\n Fail = 400,\n}\n","import { load } from 'cheerio'\nimport { Status } from '../type'\nimport { getTurnDownService } from '../turndownCode'\n\nexport async function parseWeChatPage(htmlRaw: string, meta: { url: string }) {\n const $ = load(htmlRaw)\n\n const title = ($('#activity-name').text() || '').trim()\n const author = Array.from(\n new Set(\n [\n $('meta[name=\"author\"]')?.attr('content'),\n ...$('#js_name').text().split('\\n'),\n ]\n .map((item) => (item ? item.trim() : ''))\n .filter(Boolean)\n )\n ).join('\\n')\n\n const htmlEl = $('#js_content')\n const html = htmlEl.html()\n\n if (html?.length) {\n let res = getTurnDownService(meta).turndown(html)\n\n res = `## ${title} \\n \\n` + `## 作者 ${author} \\n \\n` + res\n\n return {\n success: true,\n code: Status.Success,\n data: {\n title,\n author,\n content: res,\n },\n }\n }\n\n return null\n}\n","/**\n * html 转换 markdown 格式\n */\nimport turnDownService from 'turndown'\nimport TurndownPluginGfm from '@guyplusplus/turndown-plugin-gfm'\nimport { formatCode, figure2markdown } from './formatHtml'\n\ninterface Params {\n url: string\n}\n\nfunction getTurnDownService(params: Params) {\n const turndownService = new turnDownService({\n codeBlockStyle: 'fenced',\n hr: '',\n })\n\n TurndownPluginGfm.gfm(turndownService)\n\n let videoCounter = 0\n\n // 自定义配置\n turndownService\n .addRule('pre2Code', {\n filter: ['pre'],\n replacement(content, node: any) {\n const len = content.length\n // 微信文章获取到的 content, 会出现首尾都有 '`'\n const isCode = content[0] === '`' && content[len - 1] === '`'\n\n let pre_Markdown = ''\n\n if (isCode) {\n pre_Markdown = formatCode(node.innerHTML)\n }\n\n const res = isCode ? pre_Markdown : content\n\n return '```\\n' + res + '\\n```\\n'\n },\n })\n .addRule('getImage', {\n filter: ['img'],\n replacement(content, node: any) {\n const src = node.getAttribute('data-src') || ''\n\n return src ? `\\n\\n![](${src}) \\n\\n` : ''\n },\n })\n .addRule('video', {\n filter: (node: HTMLElement) => {\n return (\n node.tagName.toLowerCase() === 'iframe' &&\n node.className.includes('video_iframe')\n )\n },\n replacement(content, _node: Node) {\n const node = _node as HTMLIFrameElement\n\n const cover = decodeURIComponent(\n node.getAttribute('data-cover') || ''\n )\n\n const u = new URL(params.url)\n u.hash = `js_mp_video_container_${videoCounter++}`\n\n return cover ? `\\n\\n[![](${cover})](${u.href}) \\n\\n` : ''\n },\n })\n .addRule('lineBreaks', {\n filter: 'br',\n replacement: () => '\\n',\n })\n .addRule('img2Code', {\n filter: ['figure'],\n replacement(content, node: any) {\n const res = figure2markdown(node.innerHTML)\n return res || ''\n },\n })\n\n return turndownService\n}\n\nexport { getTurnDownService }\n","import cheerio from 'cheerio'\n\n/**\n * 微信不同代码风格\n * 1. <code><span>code</span></code>\n * 2. <code><span><span>123</span><br></span></code>\n * turndown 不解析 code 下的 br 标签,需要使用正则替换 br 标签为 \\n 才可以继续解析\n * @param htmlStr\n * @returns\n */\n\nexport function formatCode(htmlStr: string) {\n let code = htmlStr\n\n code = code.replace(/<br>/gi, '\\n')\n\n code = code.replace(/&nbsp;/gi, ' ')\n\n code = code.replace(/&lt;/gi, '<')\n\n code = code.replace(/&gt;/gi, '>')\n\n code = code.replace(/&amp;/gi, '&')\n\n code = code.replace(/&quot;/gi, '\"')\n\n code = code.replace(/&apos;/gi, '‘')\n\n code = code.replace(/&times;/gi, '*')\n\n code = code.replace(/&divide;/gi, '%')\n\n const $ = cheerio.load(code)\n\n return $.text()\n}\n\n/**\n * 解决如下格式\n * <figcaption><img><figcaption></figcaption></figcaption>\n * @param figureHTML\n * @returns\n */\nexport function figure2markdown(figureHTML: string) {\n const imgRegex = /<img.*?data-src=['\"](.*?)['\"]/\n\n const descRegex = /\\<figcaption .*?>(.+)<\\/figcaption>/\n\n const imgArr = figureHTML.match(imgRegex)\n\n const descArr = figureHTML.match(descRegex)\n\n let imgUrl = ''\n\n let desc = ''\n\n if (Array.isArray(imgArr)) {\n imgUrl = imgArr[1]\n }\n\n if (Array.isArray(descArr)) {\n desc = descArr[1]\n }\n\n // img 可能没有图片说明\n if (imgUrl) {\n return `\\n\\n ![${desc}](${imgUrl}) \\n\\n`\n }\n\n return\n}\n","import { load } from 'cheerio'\nimport { Status } from '../type'\nimport { getTurnDownService } from '../turndownCode'\n\nexport async function parseGeneralHTML(htmlRaw: string, meta: { url: string }) {\n const $ = load(htmlRaw)\n $('script').remove()\n $('[style]')\n .filter((_, el) =>\n /display\\s*:\\s*none/i.test($(el).attr('style') || '')\n )\n .remove()\n\n const title = ($('title').text() || '').trim()\n const author = ($('meta[name=\"author\"]')?.attr('content') || '').trim()\n\n const html = $('body').html()\n if (html?.length) {\n let res = getTurnDownService(meta).turndown(html)\n\n res = `## ${title} \\n \\n` + `## 作者 ${author} \\n \\n` + res\n\n return {\n success: true,\n code: Status.Success,\n data: {\n title,\n author,\n content: res,\n },\n }\n }\n\n return null\n}\n"],"mappings":";AAAA,OAAO,WAAmC;;;ACAnC,IAAM,SAET;AAAA,EACA,OAAO;AACX;;;ACOO,IAAW,SAAX,kBAAWA,YAAX;AACH,EAAAA,gBAAA,aAAU,OAAV;AACA,EAAAA,gBAAA,UAAO,OAAP;AAFc,SAAAA;AAAA,GAAA;;;ACXlB,SAAS,YAAY;;;ACGrB,OAAO,qBAAqB;AAC5B,OAAO,uBAAuB;;;ACJ9B,OAAO,aAAa;AAWb,SAAS,WAAW,SAAiB;AACxC,MAAI,OAAO;AAEX,SAAO,KAAK,QAAQ,UAAU,IAAI;AAElC,SAAO,KAAK,QAAQ,YAAY,GAAG;AAEnC,SAAO,KAAK,QAAQ,UAAU,GAAG;AAEjC,SAAO,KAAK,QAAQ,UAAU,GAAG;AAEjC,SAAO,KAAK,QAAQ,WAAW,GAAG;AAElC,SAAO,KAAK,QAAQ,YAAY,GAAG;AAEnC,SAAO,KAAK,QAAQ,YAAY,QAAG;AAEnC,SAAO,KAAK,QAAQ,aAAa,GAAG;AAEpC,SAAO,KAAK,QAAQ,cAAc,GAAG;AAErC,QAAM,IAAI,QAAQ,KAAK,IAAI;AAE3B,SAAO,EAAE,KAAK;AAClB;AAQO,SAAS,gBAAgB,YAAoB;AAChD,QAAM,WAAW;AAEjB,QAAM,YAAY;AAElB,QAAM,SAAS,WAAW,MAAM,QAAQ;AAExC,QAAM,UAAU,WAAW,MAAM,SAAS;AAE1C,MAAI,SAAS;AAEb,MAAI,OAAO;AAEX,MAAI,MAAM,QAAQ,MAAM,GAAG;AACvB,aAAS,OAAO;AAAA,EACpB;AAEA,MAAI,MAAM,QAAQ,OAAO,GAAG;AACxB,WAAO,QAAQ;AAAA,EACnB;AAGA,MAAI,QAAQ;AACR,WAAO;AAAA;AAAA,KAAU,SAAS;AAAA;AAAA;AAAA,EAC9B;AAEA;AACJ;;;AD3DA,SAAS,mBAAmB,QAAgB;AACxC,QAAM,kBAAkB,IAAI,gBAAgB;AAAA,IACxC,gBAAgB;AAAA,IAChB,IAAI;AAAA,EACR,CAAC;AAED,oBAAkB,IAAI,eAAe;AAErC,MAAI,eAAe;AAGnB,kBACK,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC,KAAK;AAAA,IACd,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,QAAQ;AAEpB,YAAM,SAAS,QAAQ,OAAO,OAAO,QAAQ,MAAM,OAAO;AAE1D,UAAI,eAAe;AAEnB,UAAI,QAAQ;AACR,uBAAe,WAAW,KAAK,SAAS;AAAA,MAC5C;AAEA,YAAM,MAAM,SAAS,eAAe;AAEpC,aAAO,UAAU,MAAM;AAAA,IAC3B;AAAA,EACJ,CAAC,EACA,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC,KAAK;AAAA,IACd,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,KAAK,aAAa,UAAU,KAAK;AAE7C,aAAO,MAAM;AAAA;AAAA,MAAW;AAAA;AAAA,IAAc;AAAA,IAC1C;AAAA,EACJ,CAAC,EACA,QAAQ,SAAS;AAAA,IACd,QAAQ,CAAC,SAAsB;AAC3B,aACI,KAAK,QAAQ,YAAY,MAAM,YAC/B,KAAK,UAAU,SAAS,cAAc;AAAA,IAE9C;AAAA,IACA,YAAY,SAAS,OAAa;AAC9B,YAAM,OAAO;AAEb,YAAM,QAAQ;AAAA,QACV,KAAK,aAAa,YAAY,KAAK;AAAA,MACvC;AAEA,YAAM,IAAI,IAAI,IAAI,OAAO,GAAG;AAC5B,QAAE,OAAO,yBAAyB;AAElC,aAAO,QAAQ;AAAA;AAAA,OAAY,WAAW,EAAE;AAAA;AAAA,IAAe;AAAA,IAC3D;AAAA,EACJ,CAAC,EACA,QAAQ,cAAc;AAAA,IACnB,QAAQ;AAAA,IACR,aAAa,MAAM;AAAA,EACvB,CAAC,EACA,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC,QAAQ;AAAA,IACjB,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,gBAAgB,KAAK,SAAS;AAC1C,aAAO,OAAO;AAAA,IAClB;AAAA,EACJ,CAAC;AAEL,SAAO;AACX;;;AD9EA,eAAsB,gBAAgB,SAAiB,MAAuB;AAJ9E;AAKI,QAAM,IAAI,KAAK,OAAO;AAEtB,QAAM,SAAS,EAAE,gBAAgB,EAAE,KAAK,KAAK,IAAI,KAAK;AACtD,QAAM,SAAS,MAAM;AAAA,IACjB,IAAI;AAAA,MACA;AAAA,SACI,OAAE,qBAAqB,MAAvB,mBAA0B,KAAK;AAAA,QAC/B,GAAG,EAAE,UAAU,EAAE,KAAK,EAAE,MAAM,IAAI;AAAA,MACtC,EACK,IAAI,CAAC,SAAU,OAAO,KAAK,KAAK,IAAI,EAAG,EACvC,OAAO,OAAO;AAAA,IACvB;AAAA,EACJ,EAAE,KAAK,IAAI;AAEX,QAAM,SAAS,EAAE,aAAa;AAC9B,QAAM,OAAO,OAAO,KAAK;AAEzB,MAAI,6BAAM,QAAQ;AACd,QAAI,MAAM,mBAAmB,IAAI,EAAE,SAAS,IAAI;AAEhD,UAAM,MAAM;AAAA;AAAA,kBAAyB;AAAA;AAAA,IAAiB;AAEtD,WAAO;AAAA,MACH,SAAS;AAAA,MACT;AAAA,MACA,MAAM;AAAA,QACF;AAAA,QACA;AAAA,QACA,SAAS;AAAA,MACb;AAAA,IACJ;AAAA,EACJ;AAEA,SAAO;AACX;;;AGvCA,SAAS,QAAAC,aAAY;AAIrB,eAAsB,iBAAiB,SAAiB,MAAuB;AAJ/E;AAKI,QAAM,IAAIC,MAAK,OAAO;AACtB,IAAE,QAAQ,EAAE,OAAO;AACnB,IAAE,SAAS,EACN;AAAA,IAAO,CAAC,GAAG,OACR,sBAAsB,KAAK,EAAE,EAAE,EAAE,KAAK,OAAO,KAAK,EAAE;AAAA,EACxD,EACC,OAAO;AAEZ,QAAM,SAAS,EAAE,OAAO,EAAE,KAAK,KAAK,IAAI,KAAK;AAC7C,QAAM,YAAU,OAAE,qBAAqB,MAAvB,mBAA0B,KAAK,eAAc,IAAI,KAAK;AAEtE,QAAM,OAAO,EAAE,MAAM,EAAE,KAAK;AAC5B,MAAI,6BAAM,QAAQ;AACd,QAAI,MAAM,mBAAmB,IAAI,EAAE,SAAS,IAAI;AAEhD,UAAM,MAAM;AAAA;AAAA,kBAAyB;AAAA;AAAA,IAAiB;AAEtD,WAAO;AAAA,MACH,SAAS;AAAA,MACT;AAAA,MACA,MAAM;AAAA,QACF;AAAA,QACA;AAAA,QACA,SAAS;AAAA,MACb;AAAA,IACJ;AAAA,EACJ;AAEA,SAAO;AACX;;;AN3BA,IAAM,WAAW,CAAC,SAAiB;AAC/B,SAAO;AAAA,IACH;AAAA,IACA,SAAS;AAAA,IACT,KAAK,OAAO;AAAA,EAChB;AACJ;AAIO,SAAS,aAAa,MAAc;AACvC,SAAO,6BAAM,SAAS;AAC1B;AAEA,eAAsB,UAClB,MACA,MACuB;AACvB,MAAI,SAAgC;AAEpC,MAAI,aAAa,IAAI,GAAG;AACpB,aAAS,MAAM,gBAAgB,MAAM,IAAI;AAAA,EAC7C;AACA,MAAI,CAAC,QAAQ;AAET,aAAS,MAAM,iBAAiB,MAAM,IAAI;AAAA,EAC9C;AAEA,MAAI,QAAQ;AACR,WAAO;AAAA,EACX;AAEA,SAAO,uBAAoB;AAC/B;AAEA,eAAsB,uBAClB,MAIA,KACuB;AACvB,MAAI;AACA,WAAO,UAAU,MAAM,EAAE,IAAI,CAAC;AAAA,EAClC,SAAS,KAAP;AACE,YAAQ,IAAI,GAAG;AACf,WAAO,uBAAoB;AAAA,EAC/B;AACJ;AASA,eAAsB,sBAClB,KACA,UAAyC,CAAC,GACnB;AACvB,QAAM,EAAE,cAAc,CAAC,EAAE,IAAI;AAC7B,QAAM,EAAE,UAAU,CAAC,MAAM,WAAW,IAAI;AAExC,QAAM,IAAI,IAAI,IAAI,GAAG;AAGrB,IAAE,aAAa,OAAO,WAAW;AAEjC,MAAI;AACA,UAAM,MAAM,MAAM,MAAM,IAAI,EAAE,MAAM;AAAA,MAChC,SAAS;AAAA,MACT,cAAc;AAAA,MACd,SAAS;AAAA,QACL,KAAK;AAAA,QACL,6BAA6B;AAAA,QAC7B,cACI;AAAA,QACJ,GAAG;AAAA,MACP;AAAA,MACA,GAAG;AAAA,IACP,CAAC;AAED,WAAO,uBAAuB,IAAI,MAAM,GAAG;AAAA,EAC/C,SAAS,KAAP;AACE,YAAQ,IAAI,GAAG;AACf,WAAO,uBAAoB;AAAA,EAC/B;AACJ;","names":["Status","load","load"]}
1
+ {"version":3,"sources":["../src/index.ts","../src/error.ts","../src/type.ts","../src/parsers/wechat.ts","../src/turndownCode.ts","../src/formatHtml.ts","../src/parsers/general.ts"],"sourcesContent":["import axios, { AxiosRequestConfig } from 'axios'\nimport { errObj } from './error'\nimport type { TurnDownResult } from './type'\nimport { Status } from './type'\nimport { parseWeChatPage } from './parsers/wechat'\nimport { parseGeneralHTML } from './parsers/general'\n\nconst getError = (code: number) => {\n return {\n code,\n success: false,\n msg: errObj[code],\n }\n}\n\nexport { TurnDownResult, Status }\n\nexport function isWechatPage(html: string) {\n return html?.includes('res.wx.qq.com')\n}\n\nexport async function parseHTML(\n html: string,\n meta: { url: string }\n): Promise<TurnDownResult> {\n let result: TurnDownResult | null = null\n\n if (isWechatPage(html)) {\n result = await parseWeChatPage(html, meta)\n }\n if (!result) {\n // 兜底处理\n result = await parseGeneralHTML(html, meta)\n }\n\n if (result) {\n return result\n }\n\n return getError(Status.Fail)\n}\n\nexport async function transformHtml2Markdown(\n html: string,\n /**\n * 这里的 url 是原始的 url,主要是用来映射内部跳转链接\n */\n url: string\n): Promise<TurnDownResult> {\n try {\n return parseHTML(html, { url })\n } catch (err) {\n console.log(err)\n return getError(Status.Fail)\n }\n}\n\n/**\n * 支持添加代理服务器\n */\ninterface TransformHtml2MarkdownOptions {\n axiosConfig?: AxiosRequestConfig\n}\n\nexport async function transformUrl2Markdown(\n url: string,\n options: TransformHtml2MarkdownOptions = {}\n): Promise<TurnDownResult> {\n const { axiosConfig = {} } = options\n const { headers = {}, ...restConfig } = axiosConfig\n\n const u = new URL(url)\n // 移除该参数\n // 避免出现 302 跳转\n u.searchParams.delete('poc_token')\n\n try {\n const res = await axios.get(u.href, {\n timeout: 30000,\n maxRedirects: 5,\n headers: {\n DNT: '1',\n 'Upgrade-Insecure-Requests': '1',\n 'User-Agent':\n 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36',\n ...headers,\n },\n ...restConfig,\n })\n\n return transformHtml2Markdown(res.data, url)\n } catch (err) {\n console.log(err)\n return getError(Status.Fail)\n }\n}\n\nexport { getTurnDownService } from './turndownCode'\n","export const errObj: {\n [key: number]: string\n} = {\n '400': '内容解析失败',\n}\n","export interface TurnDownResult {\n success: boolean\n code: number\n data?: {\n title?: string\n author?: string\n content?: string\n }\n msg?: string\n}\n\nexport const enum Status {\n Success = 200,\n Fail = 400,\n}\n","import { load } from 'cheerio'\nimport { Status } from '../type'\nimport { getTurnDownService } from '../turndownCode'\nimport { figure2markdown } from '../formatHtml'\n\nexport async function parseWeChatPage(htmlRaw: string, meta: { url: string }) {\n const $ = load(htmlRaw)\n\n const title = ($('#activity-name').text() || '').trim()\n const author = Array.from(\n new Set(\n [\n $('meta[name=\"author\"]')?.attr('content'),\n ...$('#js_name').text().split('\\n'),\n ]\n .map((item) => (item ? item.trim() : ''))\n .filter(Boolean)\n )\n ).join('\\n')\n\n const htmlEl = $('#js_content')\n const html = htmlEl.html()\n\n if (html?.length) {\n const service = getTurnDownService(meta)\n service.addRule('img2Code', {\n filter: ['figure'],\n replacement(content, node: any) {\n const res = figure2markdown(node.innerHTML)\n return res || ''\n },\n })\n\n let res = service.turndown(html)\n\n res = `## ${title} \\n \\n` + `## 作者 ${author} \\n \\n` + res\n\n return {\n success: true,\n code: Status.Success,\n data: {\n title,\n author,\n content: res,\n },\n }\n }\n\n return null\n}\n","/**\n * html 转换 markdown 格式\n */\nimport turnDownService from 'turndown'\nimport TurndownPluginGfm from '@guyplusplus/turndown-plugin-gfm'\nimport { formatCode } from './formatHtml'\n\ninterface Params {\n url: string\n}\n\nfunction getTurnDownService(params: Params) {\n const turndownService = new turnDownService({\n codeBlockStyle: 'fenced',\n hr: '',\n })\n\n TurndownPluginGfm.gfm(turndownService)\n\n let videoCounter = 0\n\n // 自定义配置\n turndownService\n .addRule('pre2Code', {\n filter: ['pre'],\n replacement(content, node: any) {\n const len = content.length\n // 微信文章获取到的 content, 会出现首尾都有 '`'\n const isCode = content[0] === '`' && content[len - 1] === '`'\n\n let pre_Markdown = ''\n\n if (isCode) {\n pre_Markdown = formatCode(node.innerHTML)\n }\n\n const res = isCode ? pre_Markdown : content\n\n return '```\\n' + res + '\\n```\\n'\n },\n })\n .addRule('getImage', {\n filter: ['img'],\n replacement(content, node: any) {\n // Prefer WeChat's data-src, but fall back to normal src (e.g. base64)\n const src =\n node.getAttribute('data-src') ||\n node.getAttribute('src') ||\n ''\n\n return src ? `\\n\\n![](${src}) \\n\\n` : ''\n },\n })\n .addRule('video', {\n filter: (node: HTMLElement) => {\n return (\n node.tagName.toLowerCase() === 'iframe' &&\n node.className.includes('video_iframe')\n )\n },\n replacement(content, _node: Node) {\n const node = _node as HTMLIFrameElement\n\n const cover = decodeURIComponent(\n node.getAttribute('data-cover') || ''\n )\n\n const u = new URL(params.url)\n u.hash = `js_mp_video_container_${videoCounter++}`\n\n return cover ? `\\n\\n[![](${cover})](${u.href}) \\n\\n` : ''\n },\n })\n .addRule('lineBreaks', {\n filter: 'br',\n replacement: () => '\\n',\n })\n\n return turndownService\n}\n\nexport { getTurnDownService }\n","import cheerio from 'cheerio'\n\n/**\n * 微信不同代码风格\n * 1. <code><span>code</span></code>\n * 2. <code><span><span>123</span><br></span></code>\n * turndown 不解析 code 下的 br 标签,需要使用正则替换 br 标签为 \\n 才可以继续解析\n * @param htmlStr\n * @returns\n */\n\nexport function formatCode(htmlStr: string) {\n let code = htmlStr\n\n code = code.replace(/<br>/gi, '\\n')\n\n code = code.replace(/&nbsp;/gi, ' ')\n\n code = code.replace(/&lt;/gi, '<')\n\n code = code.replace(/&gt;/gi, '>')\n\n code = code.replace(/&amp;/gi, '&')\n\n code = code.replace(/&quot;/gi, '\"')\n\n code = code.replace(/&apos;/gi, '‘')\n\n code = code.replace(/&times;/gi, '*')\n\n code = code.replace(/&divide;/gi, '%')\n\n const $ = cheerio.load(code)\n\n return $.text()\n}\n\n/**\n * 解决如下格式\n * <figcaption><img><figcaption></figcaption></figcaption>\n * @param figureHTML\n * @returns\n */\nexport function figure2markdown(figureHTML: string) {\n const imgRegex = /<img.*?data-src=['\"](.*?)['\"]/\n\n const descRegex = /\\<figcaption .*?>(.+)<\\/figcaption>/\n\n const imgArr = figureHTML.match(imgRegex)\n\n const descArr = figureHTML.match(descRegex)\n\n let imgUrl = ''\n\n let desc = ''\n\n if (Array.isArray(imgArr)) {\n imgUrl = imgArr[1]\n }\n\n if (Array.isArray(descArr)) {\n desc = descArr[1]\n }\n\n // img 可能没有图片说明\n if (imgUrl) {\n return `\\n\\n ![${desc}](${imgUrl}) \\n\\n`\n }\n\n return\n}\n","import { load } from 'cheerio'\nimport { Status } from '../type'\nimport { getTurnDownService } from '../turndownCode'\n\nexport async function parseGeneralHTML(htmlRaw: string, meta: { url: string }) {\n const $ = load(htmlRaw)\n $('script').remove()\n $('[style]')\n .filter((_, el) =>\n /display\\s*:\\s*none/i.test($(el).attr('style') || '')\n )\n .remove()\n\n const title = ($('title').text() || '').trim()\n const author = ($('meta[name=\"author\"]')?.attr('content') || '').trim()\n\n const html = $('body').html()\n if (html?.length) {\n let res = getTurnDownService(meta).turndown(html)\n\n res = `## ${title} \\n \\n` + `## 作者 ${author} \\n \\n` + res\n\n return {\n success: true,\n code: Status.Success,\n data: {\n title,\n author,\n content: res,\n },\n }\n }\n\n return null\n}\n"],"mappings":";AAAA,OAAO,WAAmC;;;ACAnC,IAAM,SAET;AAAA,EACA,OAAO;AACX;;;ACOO,IAAW,SAAX,kBAAWA,YAAX;AACH,EAAAA,gBAAA,aAAU,OAAV;AACA,EAAAA,gBAAA,UAAO,OAAP;AAFc,SAAAA;AAAA,GAAA;;;ACXlB,SAAS,YAAY;;;ACGrB,OAAO,qBAAqB;AAC5B,OAAO,uBAAuB;;;ACJ9B,OAAO,aAAa;AAWb,SAAS,WAAW,SAAiB;AACxC,MAAI,OAAO;AAEX,SAAO,KAAK,QAAQ,UAAU,IAAI;AAElC,SAAO,KAAK,QAAQ,YAAY,GAAG;AAEnC,SAAO,KAAK,QAAQ,UAAU,GAAG;AAEjC,SAAO,KAAK,QAAQ,UAAU,GAAG;AAEjC,SAAO,KAAK,QAAQ,WAAW,GAAG;AAElC,SAAO,KAAK,QAAQ,YAAY,GAAG;AAEnC,SAAO,KAAK,QAAQ,YAAY,QAAG;AAEnC,SAAO,KAAK,QAAQ,aAAa,GAAG;AAEpC,SAAO,KAAK,QAAQ,cAAc,GAAG;AAErC,QAAM,IAAI,QAAQ,KAAK,IAAI;AAE3B,SAAO,EAAE,KAAK;AAClB;AAQO,SAAS,gBAAgB,YAAoB;AAChD,QAAM,WAAW;AAEjB,QAAM,YAAY;AAElB,QAAM,SAAS,WAAW,MAAM,QAAQ;AAExC,QAAM,UAAU,WAAW,MAAM,SAAS;AAE1C,MAAI,SAAS;AAEb,MAAI,OAAO;AAEX,MAAI,MAAM,QAAQ,MAAM,GAAG;AACvB,aAAS,OAAO;AAAA,EACpB;AAEA,MAAI,MAAM,QAAQ,OAAO,GAAG;AACxB,WAAO,QAAQ;AAAA,EACnB;AAGA,MAAI,QAAQ;AACR,WAAO;AAAA;AAAA,KAAU,SAAS;AAAA;AAAA;AAAA,EAC9B;AAEA;AACJ;;;AD3DA,SAAS,mBAAmB,QAAgB;AACxC,QAAM,kBAAkB,IAAI,gBAAgB;AAAA,IACxC,gBAAgB;AAAA,IAChB,IAAI;AAAA,EACR,CAAC;AAED,oBAAkB,IAAI,eAAe;AAErC,MAAI,eAAe;AAGnB,kBACK,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC,KAAK;AAAA,IACd,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,QAAQ;AAEpB,YAAM,SAAS,QAAQ,OAAO,OAAO,QAAQ,MAAM,OAAO;AAE1D,UAAI,eAAe;AAEnB,UAAI,QAAQ;AACR,uBAAe,WAAW,KAAK,SAAS;AAAA,MAC5C;AAEA,YAAM,MAAM,SAAS,eAAe;AAEpC,aAAO,UAAU,MAAM;AAAA,IAC3B;AAAA,EACJ,CAAC,EACA,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC,KAAK;AAAA,IACd,YAAY,SAAS,MAAW;AAE5B,YAAM,MACF,KAAK,aAAa,UAAU,KAC5B,KAAK,aAAa,KAAK,KACvB;AAEJ,aAAO,MAAM;AAAA;AAAA,MAAW;AAAA;AAAA,IAAc;AAAA,IAC1C;AAAA,EACJ,CAAC,EACA,QAAQ,SAAS;AAAA,IACd,QAAQ,CAAC,SAAsB;AAC3B,aACI,KAAK,QAAQ,YAAY,MAAM,YAC/B,KAAK,UAAU,SAAS,cAAc;AAAA,IAE9C;AAAA,IACA,YAAY,SAAS,OAAa;AAC9B,YAAM,OAAO;AAEb,YAAM,QAAQ;AAAA,QACV,KAAK,aAAa,YAAY,KAAK;AAAA,MACvC;AAEA,YAAM,IAAI,IAAI,IAAI,OAAO,GAAG;AAC5B,QAAE,OAAO,yBAAyB;AAElC,aAAO,QAAQ;AAAA;AAAA,OAAY,WAAW,EAAE;AAAA;AAAA,IAAe;AAAA,IAC3D;AAAA,EACJ,CAAC,EACA,QAAQ,cAAc;AAAA,IACnB,QAAQ;AAAA,IACR,aAAa,MAAM;AAAA,EACvB,CAAC;AAEL,SAAO;AACX;;;AD1EA,eAAsB,gBAAgB,SAAiB,MAAuB;AAL9E;AAMI,QAAM,IAAI,KAAK,OAAO;AAEtB,QAAM,SAAS,EAAE,gBAAgB,EAAE,KAAK,KAAK,IAAI,KAAK;AACtD,QAAM,SAAS,MAAM;AAAA,IACjB,IAAI;AAAA,MACA;AAAA,SACI,OAAE,qBAAqB,MAAvB,mBAA0B,KAAK;AAAA,QAC/B,GAAG,EAAE,UAAU,EAAE,KAAK,EAAE,MAAM,IAAI;AAAA,MACtC,EACK,IAAI,CAAC,SAAU,OAAO,KAAK,KAAK,IAAI,EAAG,EACvC,OAAO,OAAO;AAAA,IACvB;AAAA,EACJ,EAAE,KAAK,IAAI;AAEX,QAAM,SAAS,EAAE,aAAa;AAC9B,QAAM,OAAO,OAAO,KAAK;AAEzB,MAAI,6BAAM,QAAQ;AACd,UAAM,UAAU,mBAAmB,IAAI;AACvC,YAAQ,QAAQ,YAAY;AAAA,MACxB,QAAQ,CAAC,QAAQ;AAAA,MACjB,YAAY,SAAS,MAAW;AAC5B,cAAMC,OAAM,gBAAgB,KAAK,SAAS;AAC1C,eAAOA,QAAO;AAAA,MAClB;AAAA,IACJ,CAAC;AAED,QAAI,MAAM,QAAQ,SAAS,IAAI;AAE/B,UAAM,MAAM;AAAA;AAAA,kBAAyB;AAAA;AAAA,IAAiB;AAEtD,WAAO;AAAA,MACH,SAAS;AAAA,MACT;AAAA,MACA,MAAM;AAAA,QACF;AAAA,QACA;AAAA,QACA,SAAS;AAAA,MACb;AAAA,IACJ;AAAA,EACJ;AAEA,SAAO;AACX;;;AGjDA,SAAS,QAAAC,aAAY;AAIrB,eAAsB,iBAAiB,SAAiB,MAAuB;AAJ/E;AAKI,QAAM,IAAIC,MAAK,OAAO;AACtB,IAAE,QAAQ,EAAE,OAAO;AACnB,IAAE,SAAS,EACN;AAAA,IAAO,CAAC,GAAG,OACR,sBAAsB,KAAK,EAAE,EAAE,EAAE,KAAK,OAAO,KAAK,EAAE;AAAA,EACxD,EACC,OAAO;AAEZ,QAAM,SAAS,EAAE,OAAO,EAAE,KAAK,KAAK,IAAI,KAAK;AAC7C,QAAM,YAAU,OAAE,qBAAqB,MAAvB,mBAA0B,KAAK,eAAc,IAAI,KAAK;AAEtE,QAAM,OAAO,EAAE,MAAM,EAAE,KAAK;AAC5B,MAAI,6BAAM,QAAQ;AACd,QAAI,MAAM,mBAAmB,IAAI,EAAE,SAAS,IAAI;AAEhD,UAAM,MAAM;AAAA;AAAA,kBAAyB;AAAA;AAAA,IAAiB;AAEtD,WAAO;AAAA,MACH,SAAS;AAAA,MACT;AAAA,MACA,MAAM;AAAA,QACF;AAAA,QACA;AAAA,QACA,SAAS;AAAA,MACb;AAAA,IACJ;AAAA,EACJ;AAEA,SAAO;AACX;;;AN3BA,IAAM,WAAW,CAAC,SAAiB;AAC/B,SAAO;AAAA,IACH;AAAA,IACA,SAAS;AAAA,IACT,KAAK,OAAO;AAAA,EAChB;AACJ;AAIO,SAAS,aAAa,MAAc;AACvC,SAAO,6BAAM,SAAS;AAC1B;AAEA,eAAsB,UAClB,MACA,MACuB;AACvB,MAAI,SAAgC;AAEpC,MAAI,aAAa,IAAI,GAAG;AACpB,aAAS,MAAM,gBAAgB,MAAM,IAAI;AAAA,EAC7C;AACA,MAAI,CAAC,QAAQ;AAET,aAAS,MAAM,iBAAiB,MAAM,IAAI;AAAA,EAC9C;AAEA,MAAI,QAAQ;AACR,WAAO;AAAA,EACX;AAEA,SAAO,uBAAoB;AAC/B;AAEA,eAAsB,uBAClB,MAIA,KACuB;AACvB,MAAI;AACA,WAAO,UAAU,MAAM,EAAE,IAAI,CAAC;AAAA,EAClC,SAAS,KAAP;AACE,YAAQ,IAAI,GAAG;AACf,WAAO,uBAAoB;AAAA,EAC/B;AACJ;AASA,eAAsB,sBAClB,KACA,UAAyC,CAAC,GACnB;AACvB,QAAM,EAAE,cAAc,CAAC,EAAE,IAAI;AAC7B,QAAM,EAAE,UAAU,CAAC,MAAM,WAAW,IAAI;AAExC,QAAM,IAAI,IAAI,IAAI,GAAG;AAGrB,IAAE,aAAa,OAAO,WAAW;AAEjC,MAAI;AACA,UAAM,MAAM,MAAM,MAAM,IAAI,EAAE,MAAM;AAAA,MAChC,SAAS;AAAA,MACT,cAAc;AAAA,MACd,SAAS;AAAA,QACL,KAAK;AAAA,QACL,6BAA6B;AAAA,QAC7B,cACI;AAAA,QACJ,GAAG;AAAA,MACP;AAAA,MACA,GAAG;AAAA,IACP,CAAC;AAED,WAAO,uBAAuB,IAAI,MAAM,GAAG;AAAA,EAC/C,SAAS,KAAP;AACE,YAAQ,IAAI,GAAG;AACf,WAAO,uBAAoB;AAAA,EAC/B;AACJ;","names":["Status","res","load","load"]}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@aiello/wechat-to-markdown",
3
- "version": "1.2.15",
3
+ "version": "1.2.16",
4
4
  "description": "解析微信文章 URL 为 markdown",
5
5
  "author": "Aiello Chan<aiello.chan@gmail.com>",
6
6
  "keywords": [