@aiello/wechat-to-markdown 1.2.9 → 1.2.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +5 -5
- package/dist/index.cjs.map +2 -2
- package/dist/index.js +5 -5
- package/dist/index.js.map +2 -2
- package/package.json +1 -1
package/dist/index.cjs
CHANGED
|
@@ -192,13 +192,13 @@ async function transformHtml2Markdown(url) {
|
|
|
192
192
|
const u = new URL(url);
|
|
193
193
|
u.searchParams.delete("poc_token");
|
|
194
194
|
try {
|
|
195
|
-
const res = await import_axios.default.
|
|
196
|
-
url: u.href,
|
|
197
|
-
method: "get",
|
|
195
|
+
const res = await import_axios.default.get(u.href, {
|
|
198
196
|
timeout: 3e4,
|
|
199
197
|
maxRedirects: 5,
|
|
200
|
-
|
|
201
|
-
|
|
198
|
+
headers: {
|
|
199
|
+
DNT: "1",
|
|
200
|
+
"Upgrade-Insecure-Requests": "1",
|
|
201
|
+
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36"
|
|
202
202
|
}
|
|
203
203
|
});
|
|
204
204
|
return parseHTML(res.data, { url: u.href });
|
package/dist/index.cjs.map
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"version": 3,
|
|
3
3
|
"sources": ["../src/index.ts", "../node_modules/tsup/assets/cjs_shims.js", "../src/error.ts", "../src/type.ts", "../src/turndownCode.ts", "../src/formatHtml.ts"],
|
|
4
|
-
"sourcesContent": ["import axios from 'axios'\nimport { load } from 'cheerio'\nimport { errObj } from './error'\nimport { TurnDownResult
|
|
5
|
-
"mappings": ";;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACAO,IAAM,oBACX,OAAO,aAAa,cAChB,IAAK,SAAQ,QAAY,IAAK,UAAU,YAAY,OACnD,SAAS,iBAAiB,SAAS,cAAc,OAClD,IAAI,IAAI,WAAW,SAAS,SAAS;;;ADJ3C,mBAAkB;AAClB,sBAAqB;;;AEDd,IAAM,SAET;AAAA,EACA,OAAO;AAAA;;;ACQJ,IAAW;AAAX,UAAW,SAAX;AACH,+BAAU,OAAV;AACA,4BAAO,OAAP;AAAA,GAFc;;;ACRlB,sBAA4B;AAC5B,iCAA8B;;;ACJ9B,qBAAoB;AAWb,oBAAoB,SAAiB;AACxC,MAAI,OAAO;AAEX,SAAO,KAAK,QAAQ,UAAU;AAE9B,SAAO,KAAK,QAAQ,YAAY;AAEhC,SAAO,KAAK,QAAQ,UAAU;AAE9B,SAAO,KAAK,QAAQ,UAAU;AAE9B,SAAO,KAAK,QAAQ,WAAW;AAE/B,SAAO,KAAK,QAAQ,YAAY;AAEhC,SAAO,KAAK,QAAQ,YAAY;AAEhC,SAAO,KAAK,QAAQ,aAAa;AAEjC,SAAO,KAAK,QAAQ,cAAc;AAElC,QAAM,IAAI,uBAAQ,KAAK;AAEvB,SAAO,EAAE;AAAA;AASN,yBAAyB,YAAoB;AAChD,QAAM,WAAW;AAEjB,QAAM,YAAY;AAElB,QAAM,SAAS,WAAW,MAAM;AAEhC,QAAM,UAAU,WAAW,MAAM;AAEjC,MAAI,SAAS;AAEb,MAAI,OAAO;AAEX,MAAI,MAAM,QAAQ,SAAS;AACvB,aAAS,OAAO;AAAA;AAGpB,MAAI,MAAM,QAAQ,UAAU;AACxB,WAAO,QAAQ;AAAA;AAInB,MAAI,QAAQ;AACR,WAAO;AAAA;AAAA,KAAU,SAAS;AAAA;AAAA;AAAA;AAG9B;AAAA;;;AD1DJ,4BAA4B,QAAgB;AACxC,QAAM,kBAAkB,IAAI,wBAAgB;AAAA,IACxC,gBAAgB;AAAA,IAChB,IAAI;AAAA;AAGR,qCAAkB,IAAI;AAEtB,MAAI,eAAe;AAGnB,kBACK,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC;AAAA,IACT,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,QAAQ;AAEpB,YAAM,SAAS,QAAQ,OAAO,OAAO,QAAQ,MAAM,OAAO;AAE1D,UAAI,eAAe;AAEnB,UAAI,QAAQ;AACR,uBAAe,WAAW,KAAK;AAAA;AAGnC,YAAM,MAAM,SAAS,eAAe;AAEpC,aAAO,UAAU,MAAM;AAAA;AAAA,KAG9B,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC;AAAA,IACT,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,KAAK,aAAa,eAAe;AAE7C,aAAO,MAAM;AAAA;AAAA,MAAW;AAAA;AAAA,IAAc;AAAA;AAAA,KAG7C,QAAQ,SAAS;AAAA,IACd,QAAQ,CAAC,SAAsB;AAC3B,aACI,KAAK,QAAQ,kBAAkB,YAC/B,KAAK,UAAU,SAAS;AAAA;AAAA,IAGhC,YAAY,SAAS,OAAa;AAC9B,YAAM,OAAO;AAEb,YAAM,QAAQ,mBACV,KAAK,aAAa,iBAAiB;AAGvC,YAAM,IAAI,IAAI,IAAI,OAAO;AACzB,QAAE,OAAO,yBAAyB;AAElC,aAAO,QAAQ;AAAA;AAAA,OAAY,WAAW,EAAE;AAAA;AAAA,IAAe;AAAA;AAAA,KAG9D,QAAQ,cAAc;AAAA,IACnB,QAAQ;AAAA,IACR,aAAa,MAAM;AAAA,KAEtB,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC;AAAA,IACT,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,gBAAgB,KAAK;AACjC,aAAO,OAAO;AAAA;AAAA;AAI1B,SAAO;AAAA;;;
|
|
4
|
+
"sourcesContent": ["import axios from 'axios'\nimport { load } from 'cheerio'\nimport { errObj } from './error'\nimport type { TurnDownResult } from './type'\nimport { Status } from './type'\nimport { getTurnDownService } from './turndownCode'\n\nconst getError = (code: number) => {\n return {\n code,\n success: false,\n msg: errObj[code],\n }\n}\n\nexport { TurnDownResult, Status }\n\nexport async function parseHTML(htmlRaw: string, meta: { url: string }) {\n const $ = load(htmlRaw)\n\n let title = $('#activity-name').text()\n\n title = title.trim() || ''\n const author = Array.from(\n new Set(\n [\n $('meta[name=\"author\"]')?.attr('content'),\n ...$('#js_name').text().split('\\n'),\n ]\n .map((item) => (item ? item.trim() : ''))\n .filter(Boolean)\n )\n ).join('\\n')\n\n const htmlEl = $('#js_content')\n const html = htmlEl.html()\n\n if (html && html.length > 0) {\n let res = getTurnDownService(meta).turndown(html)\n\n res = `## ${title} \\n \\n` + `## \u4F5C\u8005 ${author} \\n \\n` + res\n\n return {\n success: true,\n code: Status.Success,\n data: {\n title,\n author,\n content: res,\n },\n }\n }\n\n return getError(Status.Fail)\n}\n\nexport default async function transformHtml2Markdown(\n url: string\n): Promise<TurnDownResult> {\n const u = new URL(url)\n // \u79FB\u9664\u8BE5\u53C2\u6570\n // \u907F\u514D\u51FA\u73B0 302 \u8DF3\u8F6C\n u.searchParams.delete('poc_token')\n\n try {\n const res = await axios.get(u.href, {\n timeout: 30000,\n maxRedirects: 5,\n headers: {\n DNT: '1',\n 'Upgrade-Insecure-Requests': '1',\n 'User-Agent':\n 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36',\n },\n })\n\n return parseHTML(res.data, { url: u.href })\n } catch (err) {\n console.log(err)\n return getError(Status.Fail)\n }\n}\n\nexport { getTurnDownService } from './turndownCode'\n", "export const importMetaUrlShim =\n typeof document === 'undefined'\n ? new (require('u' + 'rl').URL)('file:' + __filename).href\n : (document.currentScript && document.currentScript.src) ||\n new URL('main.js', document.baseURI).href\n", "export const errObj: {\n [key: number]: string\n} = {\n '400': '\u5185\u5BB9\u89E3\u6790\u5931\u8D25',\n}\n", "export interface TurnDownResult {\n success: boolean\n code: number\n data?: {\n title?: string\n author?: string\n content?: string\n }\n msg?: string\n}\n\nexport const enum Status {\n Success = 200,\n Fail = 400,\n}\n", "/**\n * html \u8F6C\u6362 markdown \u683C\u5F0F\n */\nimport turnDownService from 'turndown'\nimport TurndownPluginGfm from '@guyplusplus/turndown-plugin-gfm'\nimport { formatCode, figure2markdown } from './formatHtml'\n\ninterface Params {\n url: string\n}\n\nfunction getTurnDownService(params: Params) {\n const turndownService = new turnDownService({\n codeBlockStyle: 'fenced',\n hr: '',\n })\n\n TurndownPluginGfm.gfm(turndownService)\n\n let videoCounter = 0\n\n // \u81EA\u5B9A\u4E49\u914D\u7F6E\n turndownService\n .addRule('pre2Code', {\n filter: ['pre'],\n replacement(content, node: any) {\n const len = content.length\n // \u5FAE\u4FE1\u6587\u7AE0\u83B7\u53D6\u5230\u7684 content\uFF0C \u4F1A\u51FA\u73B0\u9996\u5C3E\u90FD\u6709 '`'\n const isCode = content[0] === '`' && content[len - 1] === '`'\n\n let pre_Markdown = ''\n\n if (isCode) {\n pre_Markdown = formatCode(node.innerHTML)\n }\n\n const res = isCode ? pre_Markdown : content\n\n return '```\\n' + res + '\\n```\\n'\n },\n })\n .addRule('getImage', {\n filter: ['img'],\n replacement(content, node: any) {\n const src = node.getAttribute('data-src') || ''\n\n return src ? `\\n\\n \\n\\n` : ''\n },\n })\n .addRule('video', {\n filter: (node: HTMLElement) => {\n return (\n node.tagName.toLowerCase() === 'iframe' &&\n node.className.includes('video_iframe')\n )\n },\n replacement(content, _node: Node) {\n const node = _node as HTMLIFrameElement\n\n const cover = decodeURIComponent(\n node.getAttribute('data-cover') || ''\n )\n\n const u = new URL(params.url)\n u.hash = `js_mp_video_container_${videoCounter++}`\n\n return cover ? `\\n\\n[](${u.href}) \\n\\n` : ''\n },\n })\n .addRule('lineBreaks', {\n filter: 'br',\n replacement: () => '\\n',\n })\n .addRule('img2Code', {\n filter: ['figure'],\n replacement(content, node: any) {\n const res = figure2markdown(node.innerHTML)\n return res || ''\n },\n })\n\n return turndownService\n}\n\nexport { getTurnDownService }\n", "import cheerio from 'cheerio'\n\n/**\n * \u5FAE\u4FE1\u4E0D\u540C\u4EE3\u7801\u98CE\u683C\n * 1. <code><span>code</span></code>\n * 2. <code><span><span>123</span><br></span></code>\n * turndown \u4E0D\u89E3\u6790 code \u4E0B\u7684 br \u6807\u7B7E\uFF0C\u9700\u8981\u4F7F\u7528\u6B63\u5219\u66FF\u6362 br \u6807\u7B7E\u4E3A \\n \u624D\u53EF\u4EE5\u7EE7\u7EED\u89E3\u6790\n * @param htmlStr\n * @returns\n */\n\nexport function formatCode(htmlStr: string) {\n let code = htmlStr\n\n code = code.replace(/<br>/gi, '\\n')\n\n code = code.replace(/ /gi, ' ')\n\n code = code.replace(/</gi, '<')\n\n code = code.replace(/>/gi, '>')\n\n code = code.replace(/&/gi, '&')\n\n code = code.replace(/"/gi, '\"')\n\n code = code.replace(/'/gi, '\u2018')\n\n code = code.replace(/×/gi, '*')\n\n code = code.replace(/÷/gi, '%')\n\n const $ = cheerio.load(code)\n\n return $.text()\n}\n\n/**\n * \u89E3\u51B3\u5982\u4E0B\u683C\u5F0F\n * <figcaption><img><figcaption></figcaption></figcaption>\n * @param figureHTML\n * @returns\n */\nexport function figure2markdown(figureHTML: string) {\n const imgRegex = /<img.*?data-src=['\"](.*?)['\"]/\n\n const descRegex = /\\<figcaption .*?>(.+)<\\/figcaption>/\n\n const imgArr = figureHTML.match(imgRegex)\n\n const descArr = figureHTML.match(descRegex)\n\n let imgUrl = ''\n\n let desc = ''\n\n if (Array.isArray(imgArr)) {\n imgUrl = imgArr[1]\n }\n\n if (Array.isArray(descArr)) {\n desc = descArr[1]\n }\n\n // img \u53EF\u80FD\u6CA1\u6709\u56FE\u7247\u8BF4\u660E\n if (imgUrl) {\n return `\\n\\n  \\n\\n`\n }\n\n return\n}\n"],
|
|
5
|
+
"mappings": ";;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACAO,IAAM,oBACX,OAAO,aAAa,cAChB,IAAK,SAAQ,QAAY,IAAK,UAAU,YAAY,OACnD,SAAS,iBAAiB,SAAS,cAAc,OAClD,IAAI,IAAI,WAAW,SAAS,SAAS;;;ADJ3C,mBAAkB;AAClB,sBAAqB;;;AEDd,IAAM,SAET;AAAA,EACA,OAAO;AAAA;;;ACQJ,IAAW;AAAX,UAAW,SAAX;AACH,+BAAU,OAAV;AACA,4BAAO,OAAP;AAAA,GAFc;;;ACRlB,sBAA4B;AAC5B,iCAA8B;;;ACJ9B,qBAAoB;AAWb,oBAAoB,SAAiB;AACxC,MAAI,OAAO;AAEX,SAAO,KAAK,QAAQ,UAAU;AAE9B,SAAO,KAAK,QAAQ,YAAY;AAEhC,SAAO,KAAK,QAAQ,UAAU;AAE9B,SAAO,KAAK,QAAQ,UAAU;AAE9B,SAAO,KAAK,QAAQ,WAAW;AAE/B,SAAO,KAAK,QAAQ,YAAY;AAEhC,SAAO,KAAK,QAAQ,YAAY;AAEhC,SAAO,KAAK,QAAQ,aAAa;AAEjC,SAAO,KAAK,QAAQ,cAAc;AAElC,QAAM,IAAI,uBAAQ,KAAK;AAEvB,SAAO,EAAE;AAAA;AASN,yBAAyB,YAAoB;AAChD,QAAM,WAAW;AAEjB,QAAM,YAAY;AAElB,QAAM,SAAS,WAAW,MAAM;AAEhC,QAAM,UAAU,WAAW,MAAM;AAEjC,MAAI,SAAS;AAEb,MAAI,OAAO;AAEX,MAAI,MAAM,QAAQ,SAAS;AACvB,aAAS,OAAO;AAAA;AAGpB,MAAI,MAAM,QAAQ,UAAU;AACxB,WAAO,QAAQ;AAAA;AAInB,MAAI,QAAQ;AACR,WAAO;AAAA;AAAA,KAAU,SAAS;AAAA;AAAA;AAAA;AAG9B;AAAA;;;AD1DJ,4BAA4B,QAAgB;AACxC,QAAM,kBAAkB,IAAI,wBAAgB;AAAA,IACxC,gBAAgB;AAAA,IAChB,IAAI;AAAA;AAGR,qCAAkB,IAAI;AAEtB,MAAI,eAAe;AAGnB,kBACK,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC;AAAA,IACT,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,QAAQ;AAEpB,YAAM,SAAS,QAAQ,OAAO,OAAO,QAAQ,MAAM,OAAO;AAE1D,UAAI,eAAe;AAEnB,UAAI,QAAQ;AACR,uBAAe,WAAW,KAAK;AAAA;AAGnC,YAAM,MAAM,SAAS,eAAe;AAEpC,aAAO,UAAU,MAAM;AAAA;AAAA,KAG9B,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC;AAAA,IACT,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,KAAK,aAAa,eAAe;AAE7C,aAAO,MAAM;AAAA;AAAA,MAAW;AAAA;AAAA,IAAc;AAAA;AAAA,KAG7C,QAAQ,SAAS;AAAA,IACd,QAAQ,CAAC,SAAsB;AAC3B,aACI,KAAK,QAAQ,kBAAkB,YAC/B,KAAK,UAAU,SAAS;AAAA;AAAA,IAGhC,YAAY,SAAS,OAAa;AAC9B,YAAM,OAAO;AAEb,YAAM,QAAQ,mBACV,KAAK,aAAa,iBAAiB;AAGvC,YAAM,IAAI,IAAI,IAAI,OAAO;AACzB,QAAE,OAAO,yBAAyB;AAElC,aAAO,QAAQ;AAAA;AAAA,OAAY,WAAW,EAAE;AAAA;AAAA,IAAe;AAAA;AAAA,KAG9D,QAAQ,cAAc;AAAA,IACnB,QAAQ;AAAA,IACR,aAAa,MAAM;AAAA,KAEtB,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC;AAAA,IACT,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,gBAAgB,KAAK;AACjC,aAAO,OAAO;AAAA;AAAA;AAI1B,SAAO;AAAA;;;AJ1EX,IAAM,WAAW,CAAC,SAAiB;AAC/B,SAAO;AAAA,IACH;AAAA,IACA,SAAS;AAAA,IACT,KAAK,OAAO;AAAA;AAAA;AAMpB,yBAAgC,SAAiB,MAAuB;AAjBxE;AAkBI,QAAM,IAAI,0BAAK;AAEf,MAAI,QAAQ,EAAE,kBAAkB;AAEhC,UAAQ,MAAM,UAAU;AACxB,QAAM,SAAS,MAAM,KACjB,IAAI,IACA;AAAA,IACI,QAAE,2BAAF,mBAA0B,KAAK;AAAA,IAC/B,GAAG,EAAE,YAAY,OAAO,MAAM;AAAA,IAE7B,IAAI,CAAC,SAAU,OAAO,KAAK,SAAS,IACpC,OAAO,WAElB,KAAK;AAEP,QAAM,SAAS,EAAE;AACjB,QAAM,OAAO,OAAO;AAEpB,MAAI,QAAQ,KAAK,SAAS,GAAG;AACzB,QAAI,MAAM,mBAAmB,MAAM,SAAS;AAE5C,UAAM,MAAM;AAAA;AAAA,kBAAyB;AAAA;AAAA,IAAiB;AAEtD,WAAO;AAAA,MACH,SAAS;AAAA,MACT,MAAM,OAAO;AAAA,MACb,MAAM;AAAA,QACF;AAAA,QACA;AAAA,QACA,SAAS;AAAA;AAAA;AAAA;AAKrB,SAAO,SAAS,OAAO;AAAA;AAG3B,sCACI,KACuB;AACvB,QAAM,IAAI,IAAI,IAAI;AAGlB,IAAE,aAAa,OAAO;AAEtB,MAAI;AACA,UAAM,MAAM,MAAM,qBAAM,IAAI,EAAE,MAAM;AAAA,MAChC,SAAS;AAAA,MACT,cAAc;AAAA,MACd,SAAS;AAAA,QACL,KAAK;AAAA,QACL,6BAA6B;AAAA,QAC7B,cACI;AAAA;AAAA;AAIZ,WAAO,UAAU,IAAI,MAAM,EAAE,KAAK,EAAE;AAAA,WAC/B,KAAP;AACE,YAAQ,IAAI;AACZ,WAAO,SAAS,OAAO;AAAA;AAAA;",
|
|
6
6
|
"names": []
|
|
7
7
|
}
|
package/dist/index.js
CHANGED
|
@@ -157,13 +157,13 @@ async function transformHtml2Markdown(url) {
|
|
|
157
157
|
const u = new URL(url);
|
|
158
158
|
u.searchParams.delete("poc_token");
|
|
159
159
|
try {
|
|
160
|
-
const res = await axios.
|
|
161
|
-
url: u.href,
|
|
162
|
-
method: "get",
|
|
160
|
+
const res = await axios.get(u.href, {
|
|
163
161
|
timeout: 3e4,
|
|
164
162
|
maxRedirects: 5,
|
|
165
|
-
|
|
166
|
-
|
|
163
|
+
headers: {
|
|
164
|
+
DNT: "1",
|
|
165
|
+
"Upgrade-Insecure-Requests": "1",
|
|
166
|
+
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36"
|
|
167
167
|
}
|
|
168
168
|
});
|
|
169
169
|
return parseHTML(res.data, { url: u.href });
|
package/dist/index.js.map
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"version": 3,
|
|
3
3
|
"sources": ["../src/index.ts", "../src/error.ts", "../src/type.ts", "../src/turndownCode.ts", "../src/formatHtml.ts"],
|
|
4
|
-
"sourcesContent": ["import axios from 'axios'\nimport { load } from 'cheerio'\nimport { errObj } from './error'\nimport { TurnDownResult
|
|
5
|
-
"mappings": ";AAAA;AACA;;;ACDO,IAAM,SAET;AAAA,EACA,OAAO;AAAA;;;ACQJ,IAAW;AAAX,UAAW,SAAX;AACH,+BAAU,OAAV;AACA,4BAAO,OAAP;AAAA,GAFc;;;ACRlB;AACA;;;ACJA;AAWO,oBAAoB,SAAiB;AACxC,MAAI,OAAO;AAEX,SAAO,KAAK,QAAQ,UAAU;AAE9B,SAAO,KAAK,QAAQ,YAAY;AAEhC,SAAO,KAAK,QAAQ,UAAU;AAE9B,SAAO,KAAK,QAAQ,UAAU;AAE9B,SAAO,KAAK,QAAQ,WAAW;AAE/B,SAAO,KAAK,QAAQ,YAAY;AAEhC,SAAO,KAAK,QAAQ,YAAY;AAEhC,SAAO,KAAK,QAAQ,aAAa;AAEjC,SAAO,KAAK,QAAQ,cAAc;AAElC,QAAM,IAAI,QAAQ,KAAK;AAEvB,SAAO,EAAE;AAAA;AASN,yBAAyB,YAAoB;AAChD,QAAM,WAAW;AAEjB,QAAM,YAAY;AAElB,QAAM,SAAS,WAAW,MAAM;AAEhC,QAAM,UAAU,WAAW,MAAM;AAEjC,MAAI,SAAS;AAEb,MAAI,OAAO;AAEX,MAAI,MAAM,QAAQ,SAAS;AACvB,aAAS,OAAO;AAAA;AAGpB,MAAI,MAAM,QAAQ,UAAU;AACxB,WAAO,QAAQ;AAAA;AAInB,MAAI,QAAQ;AACR,WAAO;AAAA;AAAA,KAAU,SAAS;AAAA;AAAA;AAAA;AAG9B;AAAA;;;AD1DJ,4BAA4B,QAAgB;AACxC,QAAM,kBAAkB,IAAI,gBAAgB;AAAA,IACxC,gBAAgB;AAAA,IAChB,IAAI;AAAA;AAGR,oBAAkB,IAAI;AAEtB,MAAI,eAAe;AAGnB,kBACK,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC;AAAA,IACT,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,QAAQ;AAEpB,YAAM,SAAS,QAAQ,OAAO,OAAO,QAAQ,MAAM,OAAO;AAE1D,UAAI,eAAe;AAEnB,UAAI,QAAQ;AACR,uBAAe,WAAW,KAAK;AAAA;AAGnC,YAAM,MAAM,SAAS,eAAe;AAEpC,aAAO,UAAU,MAAM;AAAA;AAAA,KAG9B,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC;AAAA,IACT,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,KAAK,aAAa,eAAe;AAE7C,aAAO,MAAM;AAAA;AAAA,MAAW;AAAA;AAAA,IAAc;AAAA;AAAA,KAG7C,QAAQ,SAAS;AAAA,IACd,QAAQ,CAAC,SAAsB;AAC3B,aACI,KAAK,QAAQ,kBAAkB,YAC/B,KAAK,UAAU,SAAS;AAAA;AAAA,IAGhC,YAAY,SAAS,OAAa;AAC9B,YAAM,OAAO;AAEb,YAAM,QAAQ,mBACV,KAAK,aAAa,iBAAiB;AAGvC,YAAM,IAAI,IAAI,IAAI,OAAO;AACzB,QAAE,OAAO,yBAAyB;AAElC,aAAO,QAAQ;AAAA;AAAA,OAAY,WAAW,EAAE;AAAA;AAAA,IAAe;AAAA;AAAA,KAG9D,QAAQ,cAAc;AAAA,IACnB,QAAQ;AAAA,IACR,aAAa,MAAM;AAAA,KAEtB,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC;AAAA,IACT,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,gBAAgB,KAAK;AACjC,aAAO,OAAO;AAAA;AAAA;AAI1B,SAAO;AAAA;;;
|
|
4
|
+
"sourcesContent": ["import axios from 'axios'\nimport { load } from 'cheerio'\nimport { errObj } from './error'\nimport type { TurnDownResult } from './type'\nimport { Status } from './type'\nimport { getTurnDownService } from './turndownCode'\n\nconst getError = (code: number) => {\n return {\n code,\n success: false,\n msg: errObj[code],\n }\n}\n\nexport { TurnDownResult, Status }\n\nexport async function parseHTML(htmlRaw: string, meta: { url: string }) {\n const $ = load(htmlRaw)\n\n let title = $('#activity-name').text()\n\n title = title.trim() || ''\n const author = Array.from(\n new Set(\n [\n $('meta[name=\"author\"]')?.attr('content'),\n ...$('#js_name').text().split('\\n'),\n ]\n .map((item) => (item ? item.trim() : ''))\n .filter(Boolean)\n )\n ).join('\\n')\n\n const htmlEl = $('#js_content')\n const html = htmlEl.html()\n\n if (html && html.length > 0) {\n let res = getTurnDownService(meta).turndown(html)\n\n res = `## ${title} \\n \\n` + `## \u4F5C\u8005 ${author} \\n \\n` + res\n\n return {\n success: true,\n code: Status.Success,\n data: {\n title,\n author,\n content: res,\n },\n }\n }\n\n return getError(Status.Fail)\n}\n\nexport default async function transformHtml2Markdown(\n url: string\n): Promise<TurnDownResult> {\n const u = new URL(url)\n // \u79FB\u9664\u8BE5\u53C2\u6570\n // \u907F\u514D\u51FA\u73B0 302 \u8DF3\u8F6C\n u.searchParams.delete('poc_token')\n\n try {\n const res = await axios.get(u.href, {\n timeout: 30000,\n maxRedirects: 5,\n headers: {\n DNT: '1',\n 'Upgrade-Insecure-Requests': '1',\n 'User-Agent':\n 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36',\n },\n })\n\n return parseHTML(res.data, { url: u.href })\n } catch (err) {\n console.log(err)\n return getError(Status.Fail)\n }\n}\n\nexport { getTurnDownService } from './turndownCode'\n", "export const errObj: {\n [key: number]: string\n} = {\n '400': '\u5185\u5BB9\u89E3\u6790\u5931\u8D25',\n}\n", "export interface TurnDownResult {\n success: boolean\n code: number\n data?: {\n title?: string\n author?: string\n content?: string\n }\n msg?: string\n}\n\nexport const enum Status {\n Success = 200,\n Fail = 400,\n}\n", "/**\n * html \u8F6C\u6362 markdown \u683C\u5F0F\n */\nimport turnDownService from 'turndown'\nimport TurndownPluginGfm from '@guyplusplus/turndown-plugin-gfm'\nimport { formatCode, figure2markdown } from './formatHtml'\n\ninterface Params {\n url: string\n}\n\nfunction getTurnDownService(params: Params) {\n const turndownService = new turnDownService({\n codeBlockStyle: 'fenced',\n hr: '',\n })\n\n TurndownPluginGfm.gfm(turndownService)\n\n let videoCounter = 0\n\n // \u81EA\u5B9A\u4E49\u914D\u7F6E\n turndownService\n .addRule('pre2Code', {\n filter: ['pre'],\n replacement(content, node: any) {\n const len = content.length\n // \u5FAE\u4FE1\u6587\u7AE0\u83B7\u53D6\u5230\u7684 content\uFF0C \u4F1A\u51FA\u73B0\u9996\u5C3E\u90FD\u6709 '`'\n const isCode = content[0] === '`' && content[len - 1] === '`'\n\n let pre_Markdown = ''\n\n if (isCode) {\n pre_Markdown = formatCode(node.innerHTML)\n }\n\n const res = isCode ? pre_Markdown : content\n\n return '```\\n' + res + '\\n```\\n'\n },\n })\n .addRule('getImage', {\n filter: ['img'],\n replacement(content, node: any) {\n const src = node.getAttribute('data-src') || ''\n\n return src ? `\\n\\n \\n\\n` : ''\n },\n })\n .addRule('video', {\n filter: (node: HTMLElement) => {\n return (\n node.tagName.toLowerCase() === 'iframe' &&\n node.className.includes('video_iframe')\n )\n },\n replacement(content, _node: Node) {\n const node = _node as HTMLIFrameElement\n\n const cover = decodeURIComponent(\n node.getAttribute('data-cover') || ''\n )\n\n const u = new URL(params.url)\n u.hash = `js_mp_video_container_${videoCounter++}`\n\n return cover ? `\\n\\n[](${u.href}) \\n\\n` : ''\n },\n })\n .addRule('lineBreaks', {\n filter: 'br',\n replacement: () => '\\n',\n })\n .addRule('img2Code', {\n filter: ['figure'],\n replacement(content, node: any) {\n const res = figure2markdown(node.innerHTML)\n return res || ''\n },\n })\n\n return turndownService\n}\n\nexport { getTurnDownService }\n", "import cheerio from 'cheerio'\n\n/**\n * \u5FAE\u4FE1\u4E0D\u540C\u4EE3\u7801\u98CE\u683C\n * 1. <code><span>code</span></code>\n * 2. <code><span><span>123</span><br></span></code>\n * turndown \u4E0D\u89E3\u6790 code \u4E0B\u7684 br \u6807\u7B7E\uFF0C\u9700\u8981\u4F7F\u7528\u6B63\u5219\u66FF\u6362 br \u6807\u7B7E\u4E3A \\n \u624D\u53EF\u4EE5\u7EE7\u7EED\u89E3\u6790\n * @param htmlStr\n * @returns\n */\n\nexport function formatCode(htmlStr: string) {\n let code = htmlStr\n\n code = code.replace(/<br>/gi, '\\n')\n\n code = code.replace(/ /gi, ' ')\n\n code = code.replace(/</gi, '<')\n\n code = code.replace(/>/gi, '>')\n\n code = code.replace(/&/gi, '&')\n\n code = code.replace(/"/gi, '\"')\n\n code = code.replace(/'/gi, '\u2018')\n\n code = code.replace(/×/gi, '*')\n\n code = code.replace(/÷/gi, '%')\n\n const $ = cheerio.load(code)\n\n return $.text()\n}\n\n/**\n * \u89E3\u51B3\u5982\u4E0B\u683C\u5F0F\n * <figcaption><img><figcaption></figcaption></figcaption>\n * @param figureHTML\n * @returns\n */\nexport function figure2markdown(figureHTML: string) {\n const imgRegex = /<img.*?data-src=['\"](.*?)['\"]/\n\n const descRegex = /\\<figcaption .*?>(.+)<\\/figcaption>/\n\n const imgArr = figureHTML.match(imgRegex)\n\n const descArr = figureHTML.match(descRegex)\n\n let imgUrl = ''\n\n let desc = ''\n\n if (Array.isArray(imgArr)) {\n imgUrl = imgArr[1]\n }\n\n if (Array.isArray(descArr)) {\n desc = descArr[1]\n }\n\n // img \u53EF\u80FD\u6CA1\u6709\u56FE\u7247\u8BF4\u660E\n if (imgUrl) {\n return `\\n\\n  \\n\\n`\n }\n\n return\n}\n"],
|
|
5
|
+
"mappings": ";AAAA;AACA;;;ACDO,IAAM,SAET;AAAA,EACA,OAAO;AAAA;;;ACQJ,IAAW;AAAX,UAAW,SAAX;AACH,+BAAU,OAAV;AACA,4BAAO,OAAP;AAAA,GAFc;;;ACRlB;AACA;;;ACJA;AAWO,oBAAoB,SAAiB;AACxC,MAAI,OAAO;AAEX,SAAO,KAAK,QAAQ,UAAU;AAE9B,SAAO,KAAK,QAAQ,YAAY;AAEhC,SAAO,KAAK,QAAQ,UAAU;AAE9B,SAAO,KAAK,QAAQ,UAAU;AAE9B,SAAO,KAAK,QAAQ,WAAW;AAE/B,SAAO,KAAK,QAAQ,YAAY;AAEhC,SAAO,KAAK,QAAQ,YAAY;AAEhC,SAAO,KAAK,QAAQ,aAAa;AAEjC,SAAO,KAAK,QAAQ,cAAc;AAElC,QAAM,IAAI,QAAQ,KAAK;AAEvB,SAAO,EAAE;AAAA;AASN,yBAAyB,YAAoB;AAChD,QAAM,WAAW;AAEjB,QAAM,YAAY;AAElB,QAAM,SAAS,WAAW,MAAM;AAEhC,QAAM,UAAU,WAAW,MAAM;AAEjC,MAAI,SAAS;AAEb,MAAI,OAAO;AAEX,MAAI,MAAM,QAAQ,SAAS;AACvB,aAAS,OAAO;AAAA;AAGpB,MAAI,MAAM,QAAQ,UAAU;AACxB,WAAO,QAAQ;AAAA;AAInB,MAAI,QAAQ;AACR,WAAO;AAAA;AAAA,KAAU,SAAS;AAAA;AAAA;AAAA;AAG9B;AAAA;;;AD1DJ,4BAA4B,QAAgB;AACxC,QAAM,kBAAkB,IAAI,gBAAgB;AAAA,IACxC,gBAAgB;AAAA,IAChB,IAAI;AAAA;AAGR,oBAAkB,IAAI;AAEtB,MAAI,eAAe;AAGnB,kBACK,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC;AAAA,IACT,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,QAAQ;AAEpB,YAAM,SAAS,QAAQ,OAAO,OAAO,QAAQ,MAAM,OAAO;AAE1D,UAAI,eAAe;AAEnB,UAAI,QAAQ;AACR,uBAAe,WAAW,KAAK;AAAA;AAGnC,YAAM,MAAM,SAAS,eAAe;AAEpC,aAAO,UAAU,MAAM;AAAA;AAAA,KAG9B,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC;AAAA,IACT,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,KAAK,aAAa,eAAe;AAE7C,aAAO,MAAM;AAAA;AAAA,MAAW;AAAA;AAAA,IAAc;AAAA;AAAA,KAG7C,QAAQ,SAAS;AAAA,IACd,QAAQ,CAAC,SAAsB;AAC3B,aACI,KAAK,QAAQ,kBAAkB,YAC/B,KAAK,UAAU,SAAS;AAAA;AAAA,IAGhC,YAAY,SAAS,OAAa;AAC9B,YAAM,OAAO;AAEb,YAAM,QAAQ,mBACV,KAAK,aAAa,iBAAiB;AAGvC,YAAM,IAAI,IAAI,IAAI,OAAO;AACzB,QAAE,OAAO,yBAAyB;AAElC,aAAO,QAAQ;AAAA;AAAA,OAAY,WAAW,EAAE;AAAA;AAAA,IAAe;AAAA;AAAA,KAG9D,QAAQ,cAAc;AAAA,IACnB,QAAQ;AAAA,IACR,aAAa,MAAM;AAAA,KAEtB,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC;AAAA,IACT,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,gBAAgB,KAAK;AACjC,aAAO,OAAO;AAAA;AAAA;AAI1B,SAAO;AAAA;;;AH1EX,IAAM,WAAW,CAAC,SAAiB;AAC/B,SAAO;AAAA,IACH;AAAA,IACA,SAAS;AAAA,IACT,KAAK,OAAO;AAAA;AAAA;AAMpB,yBAAgC,SAAiB,MAAuB;AAjBxE;AAkBI,QAAM,IAAI,KAAK;AAEf,MAAI,QAAQ,EAAE,kBAAkB;AAEhC,UAAQ,MAAM,UAAU;AACxB,QAAM,SAAS,MAAM,KACjB,IAAI,IACA;AAAA,IACI,QAAE,2BAAF,mBAA0B,KAAK;AAAA,IAC/B,GAAG,EAAE,YAAY,OAAO,MAAM;AAAA,IAE7B,IAAI,CAAC,SAAU,OAAO,KAAK,SAAS,IACpC,OAAO,WAElB,KAAK;AAEP,QAAM,SAAS,EAAE;AACjB,QAAM,OAAO,OAAO;AAEpB,MAAI,QAAQ,KAAK,SAAS,GAAG;AACzB,QAAI,MAAM,mBAAmB,MAAM,SAAS;AAE5C,UAAM,MAAM;AAAA;AAAA,kBAAyB;AAAA;AAAA,IAAiB;AAEtD,WAAO;AAAA,MACH,SAAS;AAAA,MACT,MAAM,OAAO;AAAA,MACb,MAAM;AAAA,QACF;AAAA,QACA;AAAA,QACA,SAAS;AAAA;AAAA;AAAA;AAKrB,SAAO,SAAS,OAAO;AAAA;AAG3B,sCACI,KACuB;AACvB,QAAM,IAAI,IAAI,IAAI;AAGlB,IAAE,aAAa,OAAO;AAEtB,MAAI;AACA,UAAM,MAAM,MAAM,MAAM,IAAI,EAAE,MAAM;AAAA,MAChC,SAAS;AAAA,MACT,cAAc;AAAA,MACd,SAAS;AAAA,QACL,KAAK;AAAA,QACL,6BAA6B;AAAA,QAC7B,cACI;AAAA;AAAA;AAIZ,WAAO,UAAU,IAAI,MAAM,EAAE,KAAK,EAAE;AAAA,WAC/B,KAAP;AACE,YAAQ,IAAI;AACZ,WAAO,SAAS,OAAO;AAAA;AAAA;",
|
|
6
6
|
"names": []
|
|
7
7
|
}
|