@aiello/wechat-to-markdown 1.2.8 → 1.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -26,7 +26,8 @@ var __toModule = (module2) => {
26
26
  __export(exports, {
27
27
  Status: () => Status,
28
28
  default: () => transformHtml2Markdown,
29
- getTurnDownService: () => getTurnDownService
29
+ getTurnDownService: () => getTurnDownService,
30
+ parseHTML: () => parseHTML
30
31
  });
31
32
 
32
33
  // node_modules/tsup/assets/cjs_shims.js
@@ -157,54 +158,59 @@ var getError = (code) => {
157
158
  msg: errObj[code]
158
159
  };
159
160
  };
160
- async function transformHtml2Markdown(url) {
161
- const u = new URL(url);
162
- u.searchParams.delete("poc_token");
163
- let json = await import_axios.default.request({
164
- url: u.href,
165
- method: "get",
166
- timeout: 3e4,
167
- transformResponse(res) {
168
- return res;
169
- }
170
- }).then((res) => {
171
- var _a;
172
- const $ = (0, import_cheerio2.load)(res["data"]);
173
- let title = $("#activity-name").text();
174
- title = title.trim() || "";
175
- const author = Array.from(new Set([
176
- (_a = $('meta[name="author"]')) == null ? void 0 : _a.attr("content"),
177
- ...$("#js_name").text().split("\n")
178
- ].map((item) => item ? item.trim() : "").filter(Boolean))).join("\n");
179
- const htmlEl = $("#js_content");
180
- const html = htmlEl.html();
181
- if (html && html.length > 0) {
182
- let res2 = getTurnDownService({ url: u.href }).turndown(html);
183
- res2 = `## ${title}
161
+ async function parseHTML(htmlRaw, meta) {
162
+ var _a;
163
+ const $ = (0, import_cheerio2.load)(htmlRaw);
164
+ let title = $("#activity-name").text();
165
+ title = title.trim() || "";
166
+ const author = Array.from(new Set([
167
+ (_a = $('meta[name="author"]')) == null ? void 0 : _a.attr("content"),
168
+ ...$("#js_name").text().split("\n")
169
+ ].map((item) => item ? item.trim() : "").filter(Boolean))).join("\n");
170
+ const htmlEl = $("#js_content");
171
+ const html = htmlEl.html();
172
+ if (html && html.length > 0) {
173
+ let res = getTurnDownService(meta).turndown(html);
174
+ res = `## ${title}
184
175
 
185
176
  ## \u4F5C\u8005 ${author}
186
177
 
187
- ` + res2;
188
- return {
189
- success: true,
190
- code: Status.Success,
191
- data: {
192
- title,
193
- author,
194
- content: res2
195
- }
196
- };
197
- }
198
- return getError(Status.Fail);
199
- }).catch((err) => {
178
+ ` + res;
179
+ return {
180
+ success: true,
181
+ code: Status.Success,
182
+ data: {
183
+ title,
184
+ author,
185
+ content: res
186
+ }
187
+ };
188
+ }
189
+ return getError(Status.Fail);
190
+ }
191
+ async function transformHtml2Markdown(url) {
192
+ const u = new URL(url);
193
+ u.searchParams.delete("poc_token");
194
+ try {
195
+ const res = await import_axios.default.request({
196
+ url: u.href,
197
+ method: "get",
198
+ timeout: 3e4,
199
+ maxRedirects: 5,
200
+ transformResponse(res2) {
201
+ return res2;
202
+ }
203
+ });
204
+ return parseHTML(res.data, { url: u.href });
205
+ } catch (err) {
200
206
  console.log(err);
201
- return err;
202
- });
203
- return json;
207
+ return getError(Status.Fail);
208
+ }
204
209
  }
205
210
  // Annotate the CommonJS export names for ESM import in node:
206
211
  0 && (module.exports = {
207
212
  Status,
208
- getTurnDownService
213
+ getTurnDownService,
214
+ parseHTML
209
215
  });
210
216
  //# sourceMappingURL=index.cjs.map
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "version": 3,
3
3
  "sources": ["../src/index.ts", "../node_modules/tsup/assets/cjs_shims.js", "../src/error.ts", "../src/type.ts", "../src/turndownCode.ts", "../src/formatHtml.ts"],
4
- "sourcesContent": ["import axios from 'axios'\nimport { load } from 'cheerio'\nimport { errObj } from './error'\nimport { TurnDownResult, Status } from './type'\nimport { getTurnDownService } from './turndownCode'\n\nconst getError = (code: number) => {\n return {\n code,\n success: false,\n msg: errObj[code],\n }\n}\n\nexport { TurnDownResult, Status }\n\nexport default async function transformHtml2Markdown(\n url: string\n): Promise<TurnDownResult> {\n const u = new URL(url)\n // \u79FB\u9664\u8BE5\u53C2\u6570\n // \u907F\u514D\u51FA\u73B0 302 \u8DF3\u8F6C\n u.searchParams.delete('poc_token')\n\n let json: TurnDownResult = await axios\n .request({\n url: u.href,\n method: 'get',\n timeout: 30000,\n transformResponse(res) {\n return res\n },\n })\n .then((res) => {\n const $ = load(res['data'])\n\n let title = $('#activity-name').text()\n\n title = title.trim() || ''\n const author = Array.from(\n new Set(\n [\n $('meta[name=\"author\"]')?.attr('content'),\n ...$('#js_name').text().split('\\n'),\n ]\n .map((item) => (item ? item.trim() : ''))\n .filter(Boolean)\n )\n ).join('\\n')\n\n const htmlEl = $('#js_content')\n const html = htmlEl.html()\n\n if (html && html.length > 0) {\n let res = getTurnDownService({ url: u.href }).turndown(html)\n\n res = `## ${title} \\n \\n` + `## \u4F5C\u8005 ${author} \\n \\n` + res\n\n return {\n success: true,\n code: Status.Success,\n data: {\n title,\n author,\n content: res,\n },\n }\n }\n\n return getError(Status.Fail)\n })\n .catch((err) => {\n console.log(err)\n return err\n })\n\n return json\n}\n\nexport { getTurnDownService } from './turndownCode'\n", "export const importMetaUrlShim =\n typeof document === 'undefined'\n ? new (require('u' + 'rl').URL)('file:' + __filename).href\n : (document.currentScript && document.currentScript.src) ||\n new URL('main.js', document.baseURI).href\n", "export const errObj: {\n [key: number]: string\n} = {\n '400': '\u5185\u5BB9\u89E3\u6790\u5931\u8D25',\n}\n", "export interface TurnDownResult {\n success: boolean\n code: number\n data?: {\n title?: string\n author?: string\n content?: string\n }\n msg?: string\n}\n\nexport const enum Status {\n Success = 200,\n Fail = 400,\n}\n", "/**\n * html \u8F6C\u6362 markdown \u683C\u5F0F\n */\nimport turnDownService from 'turndown'\nimport TurndownPluginGfm from '@guyplusplus/turndown-plugin-gfm'\nimport { formatCode, figure2markdown } from './formatHtml'\n\ninterface Params {\n url: string\n}\n\nfunction getTurnDownService(params: Params) {\n const turndownService = new turnDownService({\n codeBlockStyle: 'fenced',\n hr: '',\n })\n\n TurndownPluginGfm.gfm(turndownService)\n\n let videoCounter = 0\n\n // \u81EA\u5B9A\u4E49\u914D\u7F6E\n turndownService\n .addRule('pre2Code', {\n filter: ['pre'],\n replacement(content, node: any) {\n const len = content.length\n // \u5FAE\u4FE1\u6587\u7AE0\u83B7\u53D6\u5230\u7684 content\uFF0C \u4F1A\u51FA\u73B0\u9996\u5C3E\u90FD\u6709 '`'\n const isCode = content[0] === '`' && content[len - 1] === '`'\n\n let pre_Markdown = ''\n\n if (isCode) {\n pre_Markdown = formatCode(node.innerHTML)\n }\n\n const res = isCode ? pre_Markdown : content\n\n return '```\\n' + res + '\\n```\\n'\n },\n })\n .addRule('getImage', {\n filter: ['img'],\n replacement(content, node: any) {\n const src = node.getAttribute('data-src') || ''\n\n return src ? `\\n\\n![](${src}) \\n\\n` : ''\n },\n })\n .addRule('video', {\n filter: (node: HTMLElement) => {\n return (\n node.tagName.toLowerCase() === 'iframe' &&\n node.className.includes('video_iframe')\n )\n },\n replacement(content, _node: Node) {\n const node = _node as HTMLIFrameElement\n\n const cover = decodeURIComponent(\n node.getAttribute('data-cover') || ''\n )\n\n const u = new URL(params.url)\n u.hash = `js_mp_video_container_${videoCounter++}`\n\n return cover ? `\\n\\n[![](${cover})](${u.href}) \\n\\n` : ''\n },\n })\n .addRule('lineBreaks', {\n filter: 'br',\n replacement: () => '\\n',\n })\n .addRule('img2Code', {\n filter: ['figure'],\n replacement(content, node: any) {\n const res = figure2markdown(node.innerHTML)\n return res || ''\n },\n })\n\n return turndownService\n}\n\nexport { getTurnDownService }\n", "import cheerio from 'cheerio'\n\n/**\n * \u5FAE\u4FE1\u4E0D\u540C\u4EE3\u7801\u98CE\u683C\n * 1. <code><span>code</span></code>\n * 2. <code><span><span>123</span><br></span></code>\n * turndown \u4E0D\u89E3\u6790 code \u4E0B\u7684 br \u6807\u7B7E\uFF0C\u9700\u8981\u4F7F\u7528\u6B63\u5219\u66FF\u6362 br \u6807\u7B7E\u4E3A \\n \u624D\u53EF\u4EE5\u7EE7\u7EED\u89E3\u6790\n * @param htmlStr\n * @returns\n */\n\nexport function formatCode(htmlStr: string) {\n let code = htmlStr\n\n code = code.replace(/<br>/gi, '\\n')\n\n code = code.replace(/&nbsp;/gi, ' ')\n\n code = code.replace(/&lt;/gi, '<')\n\n code = code.replace(/&gt;/gi, '>')\n\n code = code.replace(/&amp;/gi, '&')\n\n code = code.replace(/&quot;/gi, '\"')\n\n code = code.replace(/&apos;/gi, '\u2018')\n\n code = code.replace(/&times;/gi, '*')\n\n code = code.replace(/&divide;/gi, '%')\n\n const $ = cheerio.load(code)\n\n return $.text()\n}\n\n/**\n * \u89E3\u51B3\u5982\u4E0B\u683C\u5F0F\n * <figcaption><img><figcaption></figcaption></figcaption>\n * @param figureHTML\n * @returns\n */\nexport function figure2markdown(figureHTML: string) {\n const imgRegex = /<img.*?data-src=['\"](.*?)['\"]/\n\n const descRegex = /\\<figcaption .*?>(.+)<\\/figcaption>/\n\n const imgArr = figureHTML.match(imgRegex)\n\n const descArr = figureHTML.match(descRegex)\n\n let imgUrl = ''\n\n let desc = ''\n\n if (Array.isArray(imgArr)) {\n imgUrl = imgArr[1]\n }\n\n if (Array.isArray(descArr)) {\n desc = descArr[1]\n }\n\n // img \u53EF\u80FD\u6CA1\u6709\u56FE\u7247\u8BF4\u660E\n if (imgUrl) {\n return `\\n\\n ![${desc}](${imgUrl}) \\n\\n`\n }\n\n return\n}\n"],
5
- "mappings": ";;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACAO,IAAM,oBACX,OAAO,aAAa,cAChB,IAAK,SAAQ,QAAY,IAAK,UAAU,YAAY,OACnD,SAAS,iBAAiB,SAAS,cAAc,OAClD,IAAI,IAAI,WAAW,SAAS,SAAS;;;ADJ3C,mBAAkB;AAClB,sBAAqB;;;AEDd,IAAM,SAET;AAAA,EACA,OAAO;AAAA;;;ACQJ,IAAW;AAAX,UAAW,SAAX;AACH,+BAAU,OAAV;AACA,4BAAO,OAAP;AAAA,GAFc;;;ACRlB,sBAA4B;AAC5B,iCAA8B;;;ACJ9B,qBAAoB;AAWb,oBAAoB,SAAiB;AACxC,MAAI,OAAO;AAEX,SAAO,KAAK,QAAQ,UAAU;AAE9B,SAAO,KAAK,QAAQ,YAAY;AAEhC,SAAO,KAAK,QAAQ,UAAU;AAE9B,SAAO,KAAK,QAAQ,UAAU;AAE9B,SAAO,KAAK,QAAQ,WAAW;AAE/B,SAAO,KAAK,QAAQ,YAAY;AAEhC,SAAO,KAAK,QAAQ,YAAY;AAEhC,SAAO,KAAK,QAAQ,aAAa;AAEjC,SAAO,KAAK,QAAQ,cAAc;AAElC,QAAM,IAAI,uBAAQ,KAAK;AAEvB,SAAO,EAAE;AAAA;AASN,yBAAyB,YAAoB;AAChD,QAAM,WAAW;AAEjB,QAAM,YAAY;AAElB,QAAM,SAAS,WAAW,MAAM;AAEhC,QAAM,UAAU,WAAW,MAAM;AAEjC,MAAI,SAAS;AAEb,MAAI,OAAO;AAEX,MAAI,MAAM,QAAQ,SAAS;AACvB,aAAS,OAAO;AAAA;AAGpB,MAAI,MAAM,QAAQ,UAAU;AACxB,WAAO,QAAQ;AAAA;AAInB,MAAI,QAAQ;AACR,WAAO;AAAA;AAAA,KAAU,SAAS;AAAA;AAAA;AAAA;AAG9B;AAAA;;;AD1DJ,4BAA4B,QAAgB;AACxC,QAAM,kBAAkB,IAAI,wBAAgB;AAAA,IACxC,gBAAgB;AAAA,IAChB,IAAI;AAAA;AAGR,qCAAkB,IAAI;AAEtB,MAAI,eAAe;AAGnB,kBACK,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC;AAAA,IACT,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,QAAQ;AAEpB,YAAM,SAAS,QAAQ,OAAO,OAAO,QAAQ,MAAM,OAAO;AAE1D,UAAI,eAAe;AAEnB,UAAI,QAAQ;AACR,uBAAe,WAAW,KAAK;AAAA;AAGnC,YAAM,MAAM,SAAS,eAAe;AAEpC,aAAO,UAAU,MAAM;AAAA;AAAA,KAG9B,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC;AAAA,IACT,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,KAAK,aAAa,eAAe;AAE7C,aAAO,MAAM;AAAA;AAAA,MAAW;AAAA;AAAA,IAAc;AAAA;AAAA,KAG7C,QAAQ,SAAS;AAAA,IACd,QAAQ,CAAC,SAAsB;AAC3B,aACI,KAAK,QAAQ,kBAAkB,YAC/B,KAAK,UAAU,SAAS;AAAA;AAAA,IAGhC,YAAY,SAAS,OAAa;AAC9B,YAAM,OAAO;AAEb,YAAM,QAAQ,mBACV,KAAK,aAAa,iBAAiB;AAGvC,YAAM,IAAI,IAAI,IAAI,OAAO;AACzB,QAAE,OAAO,yBAAyB;AAElC,aAAO,QAAQ;AAAA;AAAA,OAAY,WAAW,EAAE;AAAA;AAAA,IAAe;AAAA;AAAA,KAG9D,QAAQ,cAAc;AAAA,IACnB,QAAQ;AAAA,IACR,aAAa,MAAM;AAAA,KAEtB,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC;AAAA,IACT,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,gBAAgB,KAAK;AACjC,aAAO,OAAO;AAAA;AAAA;AAI1B,SAAO;AAAA;;;AJ3EX,IAAM,WAAW,CAAC,SAAiB;AAC/B,SAAO;AAAA,IACH;AAAA,IACA,SAAS;AAAA,IACT,KAAK,OAAO;AAAA;AAAA;AAMpB,sCACI,KACuB;AACvB,QAAM,IAAI,IAAI,IAAI;AAGlB,IAAE,aAAa,OAAO;AAEtB,MAAI,OAAuB,MAAM,qBAC5B,QAAQ;AAAA,IACL,KAAK,EAAE;AAAA,IACP,QAAQ;AAAA,IACR,SAAS;AAAA,IACT,kBAAkB,KAAK;AACnB,aAAO;AAAA;AAAA,KAGd,KAAK,CAAC,QAAQ;AAjCvB;AAkCY,UAAM,IAAI,0BAAK,IAAI;AAEnB,QAAI,QAAQ,EAAE,kBAAkB;AAEhC,YAAQ,MAAM,UAAU;AACxB,UAAM,SAAS,MAAM,KACjB,IAAI,IACA;AAAA,MACI,QAAE,2BAAF,mBAA0B,KAAK;AAAA,MAC/B,GAAG,EAAE,YAAY,OAAO,MAAM;AAAA,MAE7B,IAAI,CAAC,SAAU,OAAO,KAAK,SAAS,IACpC,OAAO,WAElB,KAAK;AAEP,UAAM,SAAS,EAAE;AACjB,UAAM,OAAO,OAAO;AAEpB,QAAI,QAAQ,KAAK,SAAS,GAAG;AACzB,UAAI,OAAM,mBAAmB,EAAE,KAAK,EAAE,QAAQ,SAAS;AAEvD,aAAM,MAAM;AAAA;AAAA,kBAAyB;AAAA;AAAA,IAAiB;AAEtD,aAAO;AAAA,QACH,SAAS;AAAA,QACT,MAAM,OAAO;AAAA,QACb,MAAM;AAAA,UACF;AAAA,UACA;AAAA,UACA,SAAS;AAAA;AAAA;AAAA;AAKrB,WAAO,SAAS,OAAO;AAAA,KAE1B,MAAM,CAAC,QAAQ;AACZ,YAAQ,IAAI;AACZ,WAAO;AAAA;AAGf,SAAO;AAAA;",
4
+ "sourcesContent": ["import axios from 'axios'\nimport { load } from 'cheerio'\nimport { errObj } from './error'\nimport { TurnDownResult, Status } from './type'\nimport { getTurnDownService } from './turndownCode'\n\nconst getError = (code: number) => {\n return {\n code,\n success: false,\n msg: errObj[code],\n }\n}\n\nexport { TurnDownResult, Status }\n\nexport async function parseHTML(htmlRaw: string, meta: { url: string }) {\n const $ = load(htmlRaw)\n\n let title = $('#activity-name').text()\n\n title = title.trim() || ''\n const author = Array.from(\n new Set(\n [\n $('meta[name=\"author\"]')?.attr('content'),\n ...$('#js_name').text().split('\\n'),\n ]\n .map((item) => (item ? item.trim() : ''))\n .filter(Boolean)\n )\n ).join('\\n')\n\n const htmlEl = $('#js_content')\n const html = htmlEl.html()\n\n if (html && html.length > 0) {\n let res = getTurnDownService(meta).turndown(html)\n\n res = `## ${title} \\n \\n` + `## \u4F5C\u8005 ${author} \\n \\n` + res\n\n return {\n success: true,\n code: Status.Success,\n data: {\n title,\n author,\n content: res,\n },\n }\n }\n\n return getError(Status.Fail)\n}\n\nexport default async function transformHtml2Markdown(\n url: string\n): Promise<TurnDownResult> {\n const u = new URL(url)\n // \u79FB\u9664\u8BE5\u53C2\u6570\n // \u907F\u514D\u51FA\u73B0 302 \u8DF3\u8F6C\n u.searchParams.delete('poc_token')\n\n try {\n const res = await axios.request({\n url: u.href,\n method: 'get',\n timeout: 30000,\n maxRedirects: 5,\n transformResponse(res) {\n return res\n },\n })\n return parseHTML(res.data, { url: u.href })\n } catch (err) {\n console.log(err)\n return getError(Status.Fail)\n }\n}\n\nexport { getTurnDownService } from './turndownCode'\n", "export const importMetaUrlShim =\n typeof document === 'undefined'\n ? new (require('u' + 'rl').URL)('file:' + __filename).href\n : (document.currentScript && document.currentScript.src) ||\n new URL('main.js', document.baseURI).href\n", "export const errObj: {\n [key: number]: string\n} = {\n '400': '\u5185\u5BB9\u89E3\u6790\u5931\u8D25',\n}\n", "export interface TurnDownResult {\n success: boolean\n code: number\n data?: {\n title?: string\n author?: string\n content?: string\n }\n msg?: string\n}\n\nexport const enum Status {\n Success = 200,\n Fail = 400,\n}\n", "/**\n * html \u8F6C\u6362 markdown \u683C\u5F0F\n */\nimport turnDownService from 'turndown'\nimport TurndownPluginGfm from '@guyplusplus/turndown-plugin-gfm'\nimport { formatCode, figure2markdown } from './formatHtml'\n\ninterface Params {\n url: string\n}\n\nfunction getTurnDownService(params: Params) {\n const turndownService = new turnDownService({\n codeBlockStyle: 'fenced',\n hr: '',\n })\n\n TurndownPluginGfm.gfm(turndownService)\n\n let videoCounter = 0\n\n // \u81EA\u5B9A\u4E49\u914D\u7F6E\n turndownService\n .addRule('pre2Code', {\n filter: ['pre'],\n replacement(content, node: any) {\n const len = content.length\n // \u5FAE\u4FE1\u6587\u7AE0\u83B7\u53D6\u5230\u7684 content\uFF0C \u4F1A\u51FA\u73B0\u9996\u5C3E\u90FD\u6709 '`'\n const isCode = content[0] === '`' && content[len - 1] === '`'\n\n let pre_Markdown = ''\n\n if (isCode) {\n pre_Markdown = formatCode(node.innerHTML)\n }\n\n const res = isCode ? pre_Markdown : content\n\n return '```\\n' + res + '\\n```\\n'\n },\n })\n .addRule('getImage', {\n filter: ['img'],\n replacement(content, node: any) {\n const src = node.getAttribute('data-src') || ''\n\n return src ? `\\n\\n![](${src}) \\n\\n` : ''\n },\n })\n .addRule('video', {\n filter: (node: HTMLElement) => {\n return (\n node.tagName.toLowerCase() === 'iframe' &&\n node.className.includes('video_iframe')\n )\n },\n replacement(content, _node: Node) {\n const node = _node as HTMLIFrameElement\n\n const cover = decodeURIComponent(\n node.getAttribute('data-cover') || ''\n )\n\n const u = new URL(params.url)\n u.hash = `js_mp_video_container_${videoCounter++}`\n\n return cover ? `\\n\\n[![](${cover})](${u.href}) \\n\\n` : ''\n },\n })\n .addRule('lineBreaks', {\n filter: 'br',\n replacement: () => '\\n',\n })\n .addRule('img2Code', {\n filter: ['figure'],\n replacement(content, node: any) {\n const res = figure2markdown(node.innerHTML)\n return res || ''\n },\n })\n\n return turndownService\n}\n\nexport { getTurnDownService }\n", "import cheerio from 'cheerio'\n\n/**\n * \u5FAE\u4FE1\u4E0D\u540C\u4EE3\u7801\u98CE\u683C\n * 1. <code><span>code</span></code>\n * 2. <code><span><span>123</span><br></span></code>\n * turndown \u4E0D\u89E3\u6790 code \u4E0B\u7684 br \u6807\u7B7E\uFF0C\u9700\u8981\u4F7F\u7528\u6B63\u5219\u66FF\u6362 br \u6807\u7B7E\u4E3A \\n \u624D\u53EF\u4EE5\u7EE7\u7EED\u89E3\u6790\n * @param htmlStr\n * @returns\n */\n\nexport function formatCode(htmlStr: string) {\n let code = htmlStr\n\n code = code.replace(/<br>/gi, '\\n')\n\n code = code.replace(/&nbsp;/gi, ' ')\n\n code = code.replace(/&lt;/gi, '<')\n\n code = code.replace(/&gt;/gi, '>')\n\n code = code.replace(/&amp;/gi, '&')\n\n code = code.replace(/&quot;/gi, '\"')\n\n code = code.replace(/&apos;/gi, '\u2018')\n\n code = code.replace(/&times;/gi, '*')\n\n code = code.replace(/&divide;/gi, '%')\n\n const $ = cheerio.load(code)\n\n return $.text()\n}\n\n/**\n * \u89E3\u51B3\u5982\u4E0B\u683C\u5F0F\n * <figcaption><img><figcaption></figcaption></figcaption>\n * @param figureHTML\n * @returns\n */\nexport function figure2markdown(figureHTML: string) {\n const imgRegex = /<img.*?data-src=['\"](.*?)['\"]/\n\n const descRegex = /\\<figcaption .*?>(.+)<\\/figcaption>/\n\n const imgArr = figureHTML.match(imgRegex)\n\n const descArr = figureHTML.match(descRegex)\n\n let imgUrl = ''\n\n let desc = ''\n\n if (Array.isArray(imgArr)) {\n imgUrl = imgArr[1]\n }\n\n if (Array.isArray(descArr)) {\n desc = descArr[1]\n }\n\n // img \u53EF\u80FD\u6CA1\u6709\u56FE\u7247\u8BF4\u660E\n if (imgUrl) {\n return `\\n\\n ![${desc}](${imgUrl}) \\n\\n`\n }\n\n return\n}\n"],
5
+ "mappings": ";;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACAO,IAAM,oBACX,OAAO,aAAa,cAChB,IAAK,SAAQ,QAAY,IAAK,UAAU,YAAY,OACnD,SAAS,iBAAiB,SAAS,cAAc,OAClD,IAAI,IAAI,WAAW,SAAS,SAAS;;;ADJ3C,mBAAkB;AAClB,sBAAqB;;;AEDd,IAAM,SAET;AAAA,EACA,OAAO;AAAA;;;ACQJ,IAAW;AAAX,UAAW,SAAX;AACH,+BAAU,OAAV;AACA,4BAAO,OAAP;AAAA,GAFc;;;ACRlB,sBAA4B;AAC5B,iCAA8B;;;ACJ9B,qBAAoB;AAWb,oBAAoB,SAAiB;AACxC,MAAI,OAAO;AAEX,SAAO,KAAK,QAAQ,UAAU;AAE9B,SAAO,KAAK,QAAQ,YAAY;AAEhC,SAAO,KAAK,QAAQ,UAAU;AAE9B,SAAO,KAAK,QAAQ,UAAU;AAE9B,SAAO,KAAK,QAAQ,WAAW;AAE/B,SAAO,KAAK,QAAQ,YAAY;AAEhC,SAAO,KAAK,QAAQ,YAAY;AAEhC,SAAO,KAAK,QAAQ,aAAa;AAEjC,SAAO,KAAK,QAAQ,cAAc;AAElC,QAAM,IAAI,uBAAQ,KAAK;AAEvB,SAAO,EAAE;AAAA;AASN,yBAAyB,YAAoB;AAChD,QAAM,WAAW;AAEjB,QAAM,YAAY;AAElB,QAAM,SAAS,WAAW,MAAM;AAEhC,QAAM,UAAU,WAAW,MAAM;AAEjC,MAAI,SAAS;AAEb,MAAI,OAAO;AAEX,MAAI,MAAM,QAAQ,SAAS;AACvB,aAAS,OAAO;AAAA;AAGpB,MAAI,MAAM,QAAQ,UAAU;AACxB,WAAO,QAAQ;AAAA;AAInB,MAAI,QAAQ;AACR,WAAO;AAAA;AAAA,KAAU,SAAS;AAAA;AAAA;AAAA;AAG9B;AAAA;;;AD1DJ,4BAA4B,QAAgB;AACxC,QAAM,kBAAkB,IAAI,wBAAgB;AAAA,IACxC,gBAAgB;AAAA,IAChB,IAAI;AAAA;AAGR,qCAAkB,IAAI;AAEtB,MAAI,eAAe;AAGnB,kBACK,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC;AAAA,IACT,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,QAAQ;AAEpB,YAAM,SAAS,QAAQ,OAAO,OAAO,QAAQ,MAAM,OAAO;AAE1D,UAAI,eAAe;AAEnB,UAAI,QAAQ;AACR,uBAAe,WAAW,KAAK;AAAA;AAGnC,YAAM,MAAM,SAAS,eAAe;AAEpC,aAAO,UAAU,MAAM;AAAA;AAAA,KAG9B,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC;AAAA,IACT,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,KAAK,aAAa,eAAe;AAE7C,aAAO,MAAM;AAAA;AAAA,MAAW;AAAA;AAAA,IAAc;AAAA;AAAA,KAG7C,QAAQ,SAAS;AAAA,IACd,QAAQ,CAAC,SAAsB;AAC3B,aACI,KAAK,QAAQ,kBAAkB,YAC/B,KAAK,UAAU,SAAS;AAAA;AAAA,IAGhC,YAAY,SAAS,OAAa;AAC9B,YAAM,OAAO;AAEb,YAAM,QAAQ,mBACV,KAAK,aAAa,iBAAiB;AAGvC,YAAM,IAAI,IAAI,IAAI,OAAO;AACzB,QAAE,OAAO,yBAAyB;AAElC,aAAO,QAAQ;AAAA;AAAA,OAAY,WAAW,EAAE;AAAA;AAAA,IAAe;AAAA;AAAA,KAG9D,QAAQ,cAAc;AAAA,IACnB,QAAQ;AAAA,IACR,aAAa,MAAM;AAAA,KAEtB,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC;AAAA,IACT,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,gBAAgB,KAAK;AACjC,aAAO,OAAO;AAAA;AAAA;AAI1B,SAAO;AAAA;;;AJ3EX,IAAM,WAAW,CAAC,SAAiB;AAC/B,SAAO;AAAA,IACH;AAAA,IACA,SAAS;AAAA,IACT,KAAK,OAAO;AAAA;AAAA;AAMpB,yBAAgC,SAAiB,MAAuB;AAhBxE;AAiBI,QAAM,IAAI,0BAAK;AAEf,MAAI,QAAQ,EAAE,kBAAkB;AAEhC,UAAQ,MAAM,UAAU;AACxB,QAAM,SAAS,MAAM,KACjB,IAAI,IACA;AAAA,IACI,QAAE,2BAAF,mBAA0B,KAAK;AAAA,IAC/B,GAAG,EAAE,YAAY,OAAO,MAAM;AAAA,IAE7B,IAAI,CAAC,SAAU,OAAO,KAAK,SAAS,IACpC,OAAO,WAElB,KAAK;AAEP,QAAM,SAAS,EAAE;AACjB,QAAM,OAAO,OAAO;AAEpB,MAAI,QAAQ,KAAK,SAAS,GAAG;AACzB,QAAI,MAAM,mBAAmB,MAAM,SAAS;AAE5C,UAAM,MAAM;AAAA;AAAA,kBAAyB;AAAA;AAAA,IAAiB;AAEtD,WAAO;AAAA,MACH,SAAS;AAAA,MACT,MAAM,OAAO;AAAA,MACb,MAAM;AAAA,QACF;AAAA,QACA;AAAA,QACA,SAAS;AAAA;AAAA;AAAA;AAKrB,SAAO,SAAS,OAAO;AAAA;AAG3B,sCACI,KACuB;AACvB,QAAM,IAAI,IAAI,IAAI;AAGlB,IAAE,aAAa,OAAO;AAEtB,MAAI;AACA,UAAM,MAAM,MAAM,qBAAM,QAAQ;AAAA,MAC5B,KAAK,EAAE;AAAA,MACP,QAAQ;AAAA,MACR,SAAS;AAAA,MACT,cAAc;AAAA,MACd,kBAAkB,MAAK;AACnB,eAAO;AAAA;AAAA;AAGf,WAAO,UAAU,IAAI,MAAM,EAAE,KAAK,EAAE;AAAA,WAC/B,KAAP;AACE,YAAQ,IAAI;AACZ,WAAO,SAAS,OAAO;AAAA;AAAA;",
6
6
  "names": []
7
7
  }
package/dist/index.d.ts CHANGED
@@ -24,6 +24,21 @@ interface Params {
24
24
  }
25
25
  declare function getTurnDownService(params: Params): turnDownService;
26
26
 
27
+ declare function parseHTML(htmlRaw: string, meta: {
28
+ url: string;
29
+ }): Promise<{
30
+ code: number;
31
+ success: boolean;
32
+ msg: string;
33
+ } | {
34
+ success: boolean;
35
+ code: Status;
36
+ data: {
37
+ title: string;
38
+ author: string;
39
+ content: string;
40
+ };
41
+ }>;
27
42
  declare function transformHtml2Markdown(url: string): Promise<TurnDownResult>;
28
43
 
29
- export { Status, TurnDownResult, transformHtml2Markdown as default, getTurnDownService };
44
+ export { Status, TurnDownResult, transformHtml2Markdown as default, getTurnDownService, parseHTML };
package/dist/index.js CHANGED
@@ -123,54 +123,59 @@ var getError = (code) => {
123
123
  msg: errObj[code]
124
124
  };
125
125
  };
126
- async function transformHtml2Markdown(url) {
127
- const u = new URL(url);
128
- u.searchParams.delete("poc_token");
129
- let json = await axios.request({
130
- url: u.href,
131
- method: "get",
132
- timeout: 3e4,
133
- transformResponse(res) {
134
- return res;
135
- }
136
- }).then((res) => {
137
- var _a;
138
- const $ = load(res["data"]);
139
- let title = $("#activity-name").text();
140
- title = title.trim() || "";
141
- const author = Array.from(new Set([
142
- (_a = $('meta[name="author"]')) == null ? void 0 : _a.attr("content"),
143
- ...$("#js_name").text().split("\n")
144
- ].map((item) => item ? item.trim() : "").filter(Boolean))).join("\n");
145
- const htmlEl = $("#js_content");
146
- const html = htmlEl.html();
147
- if (html && html.length > 0) {
148
- let res2 = getTurnDownService({ url: u.href }).turndown(html);
149
- res2 = `## ${title}
126
+ async function parseHTML(htmlRaw, meta) {
127
+ var _a;
128
+ const $ = load(htmlRaw);
129
+ let title = $("#activity-name").text();
130
+ title = title.trim() || "";
131
+ const author = Array.from(new Set([
132
+ (_a = $('meta[name="author"]')) == null ? void 0 : _a.attr("content"),
133
+ ...$("#js_name").text().split("\n")
134
+ ].map((item) => item ? item.trim() : "").filter(Boolean))).join("\n");
135
+ const htmlEl = $("#js_content");
136
+ const html = htmlEl.html();
137
+ if (html && html.length > 0) {
138
+ let res = getTurnDownService(meta).turndown(html);
139
+ res = `## ${title}
150
140
 
151
141
  ## \u4F5C\u8005 ${author}
152
142
 
153
- ` + res2;
154
- return {
155
- success: true,
156
- code: Status.Success,
157
- data: {
158
- title,
159
- author,
160
- content: res2
161
- }
162
- };
163
- }
164
- return getError(Status.Fail);
165
- }).catch((err) => {
143
+ ` + res;
144
+ return {
145
+ success: true,
146
+ code: Status.Success,
147
+ data: {
148
+ title,
149
+ author,
150
+ content: res
151
+ }
152
+ };
153
+ }
154
+ return getError(Status.Fail);
155
+ }
156
+ async function transformHtml2Markdown(url) {
157
+ const u = new URL(url);
158
+ u.searchParams.delete("poc_token");
159
+ try {
160
+ const res = await axios.request({
161
+ url: u.href,
162
+ method: "get",
163
+ timeout: 3e4,
164
+ maxRedirects: 5,
165
+ transformResponse(res2) {
166
+ return res2;
167
+ }
168
+ });
169
+ return parseHTML(res.data, { url: u.href });
170
+ } catch (err) {
166
171
  console.log(err);
167
- return err;
168
- });
169
- return json;
172
+ return getError(Status.Fail);
173
+ }
170
174
  }
171
175
  export {
172
176
  Status,
173
177
  transformHtml2Markdown as default,
174
- getTurnDownService
178
+ getTurnDownService,
179
+ parseHTML
175
180
  };
176
181
  //# sourceMappingURL=index.js.map
package/dist/index.js.map CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "version": 3,
3
3
  "sources": ["../src/index.ts", "../src/error.ts", "../src/type.ts", "../src/turndownCode.ts", "../src/formatHtml.ts"],
4
- "sourcesContent": ["import axios from 'axios'\nimport { load } from 'cheerio'\nimport { errObj } from './error'\nimport { TurnDownResult, Status } from './type'\nimport { getTurnDownService } from './turndownCode'\n\nconst getError = (code: number) => {\n return {\n code,\n success: false,\n msg: errObj[code],\n }\n}\n\nexport { TurnDownResult, Status }\n\nexport default async function transformHtml2Markdown(\n url: string\n): Promise<TurnDownResult> {\n const u = new URL(url)\n // \u79FB\u9664\u8BE5\u53C2\u6570\n // \u907F\u514D\u51FA\u73B0 302 \u8DF3\u8F6C\n u.searchParams.delete('poc_token')\n\n let json: TurnDownResult = await axios\n .request({\n url: u.href,\n method: 'get',\n timeout: 30000,\n transformResponse(res) {\n return res\n },\n })\n .then((res) => {\n const $ = load(res['data'])\n\n let title = $('#activity-name').text()\n\n title = title.trim() || ''\n const author = Array.from(\n new Set(\n [\n $('meta[name=\"author\"]')?.attr('content'),\n ...$('#js_name').text().split('\\n'),\n ]\n .map((item) => (item ? item.trim() : ''))\n .filter(Boolean)\n )\n ).join('\\n')\n\n const htmlEl = $('#js_content')\n const html = htmlEl.html()\n\n if (html && html.length > 0) {\n let res = getTurnDownService({ url: u.href }).turndown(html)\n\n res = `## ${title} \\n \\n` + `## \u4F5C\u8005 ${author} \\n \\n` + res\n\n return {\n success: true,\n code: Status.Success,\n data: {\n title,\n author,\n content: res,\n },\n }\n }\n\n return getError(Status.Fail)\n })\n .catch((err) => {\n console.log(err)\n return err\n })\n\n return json\n}\n\nexport { getTurnDownService } from './turndownCode'\n", "export const errObj: {\n [key: number]: string\n} = {\n '400': '\u5185\u5BB9\u89E3\u6790\u5931\u8D25',\n}\n", "export interface TurnDownResult {\n success: boolean\n code: number\n data?: {\n title?: string\n author?: string\n content?: string\n }\n msg?: string\n}\n\nexport const enum Status {\n Success = 200,\n Fail = 400,\n}\n", "/**\n * html \u8F6C\u6362 markdown \u683C\u5F0F\n */\nimport turnDownService from 'turndown'\nimport TurndownPluginGfm from '@guyplusplus/turndown-plugin-gfm'\nimport { formatCode, figure2markdown } from './formatHtml'\n\ninterface Params {\n url: string\n}\n\nfunction getTurnDownService(params: Params) {\n const turndownService = new turnDownService({\n codeBlockStyle: 'fenced',\n hr: '',\n })\n\n TurndownPluginGfm.gfm(turndownService)\n\n let videoCounter = 0\n\n // \u81EA\u5B9A\u4E49\u914D\u7F6E\n turndownService\n .addRule('pre2Code', {\n filter: ['pre'],\n replacement(content, node: any) {\n const len = content.length\n // \u5FAE\u4FE1\u6587\u7AE0\u83B7\u53D6\u5230\u7684 content\uFF0C \u4F1A\u51FA\u73B0\u9996\u5C3E\u90FD\u6709 '`'\n const isCode = content[0] === '`' && content[len - 1] === '`'\n\n let pre_Markdown = ''\n\n if (isCode) {\n pre_Markdown = formatCode(node.innerHTML)\n }\n\n const res = isCode ? pre_Markdown : content\n\n return '```\\n' + res + '\\n```\\n'\n },\n })\n .addRule('getImage', {\n filter: ['img'],\n replacement(content, node: any) {\n const src = node.getAttribute('data-src') || ''\n\n return src ? `\\n\\n![](${src}) \\n\\n` : ''\n },\n })\n .addRule('video', {\n filter: (node: HTMLElement) => {\n return (\n node.tagName.toLowerCase() === 'iframe' &&\n node.className.includes('video_iframe')\n )\n },\n replacement(content, _node: Node) {\n const node = _node as HTMLIFrameElement\n\n const cover = decodeURIComponent(\n node.getAttribute('data-cover') || ''\n )\n\n const u = new URL(params.url)\n u.hash = `js_mp_video_container_${videoCounter++}`\n\n return cover ? `\\n\\n[![](${cover})](${u.href}) \\n\\n` : ''\n },\n })\n .addRule('lineBreaks', {\n filter: 'br',\n replacement: () => '\\n',\n })\n .addRule('img2Code', {\n filter: ['figure'],\n replacement(content, node: any) {\n const res = figure2markdown(node.innerHTML)\n return res || ''\n },\n })\n\n return turndownService\n}\n\nexport { getTurnDownService }\n", "import cheerio from 'cheerio'\n\n/**\n * \u5FAE\u4FE1\u4E0D\u540C\u4EE3\u7801\u98CE\u683C\n * 1. <code><span>code</span></code>\n * 2. <code><span><span>123</span><br></span></code>\n * turndown \u4E0D\u89E3\u6790 code \u4E0B\u7684 br \u6807\u7B7E\uFF0C\u9700\u8981\u4F7F\u7528\u6B63\u5219\u66FF\u6362 br \u6807\u7B7E\u4E3A \\n \u624D\u53EF\u4EE5\u7EE7\u7EED\u89E3\u6790\n * @param htmlStr\n * @returns\n */\n\nexport function formatCode(htmlStr: string) {\n let code = htmlStr\n\n code = code.replace(/<br>/gi, '\\n')\n\n code = code.replace(/&nbsp;/gi, ' ')\n\n code = code.replace(/&lt;/gi, '<')\n\n code = code.replace(/&gt;/gi, '>')\n\n code = code.replace(/&amp;/gi, '&')\n\n code = code.replace(/&quot;/gi, '\"')\n\n code = code.replace(/&apos;/gi, '\u2018')\n\n code = code.replace(/&times;/gi, '*')\n\n code = code.replace(/&divide;/gi, '%')\n\n const $ = cheerio.load(code)\n\n return $.text()\n}\n\n/**\n * \u89E3\u51B3\u5982\u4E0B\u683C\u5F0F\n * <figcaption><img><figcaption></figcaption></figcaption>\n * @param figureHTML\n * @returns\n */\nexport function figure2markdown(figureHTML: string) {\n const imgRegex = /<img.*?data-src=['\"](.*?)['\"]/\n\n const descRegex = /\\<figcaption .*?>(.+)<\\/figcaption>/\n\n const imgArr = figureHTML.match(imgRegex)\n\n const descArr = figureHTML.match(descRegex)\n\n let imgUrl = ''\n\n let desc = ''\n\n if (Array.isArray(imgArr)) {\n imgUrl = imgArr[1]\n }\n\n if (Array.isArray(descArr)) {\n desc = descArr[1]\n }\n\n // img \u53EF\u80FD\u6CA1\u6709\u56FE\u7247\u8BF4\u660E\n if (imgUrl) {\n return `\\n\\n ![${desc}](${imgUrl}) \\n\\n`\n }\n\n return\n}\n"],
5
- "mappings": ";AAAA;AACA;;;ACDO,IAAM,SAET;AAAA,EACA,OAAO;AAAA;;;ACQJ,IAAW;AAAX,UAAW,SAAX;AACH,+BAAU,OAAV;AACA,4BAAO,OAAP;AAAA,GAFc;;;ACRlB;AACA;;;ACJA;AAWO,oBAAoB,SAAiB;AACxC,MAAI,OAAO;AAEX,SAAO,KAAK,QAAQ,UAAU;AAE9B,SAAO,KAAK,QAAQ,YAAY;AAEhC,SAAO,KAAK,QAAQ,UAAU;AAE9B,SAAO,KAAK,QAAQ,UAAU;AAE9B,SAAO,KAAK,QAAQ,WAAW;AAE/B,SAAO,KAAK,QAAQ,YAAY;AAEhC,SAAO,KAAK,QAAQ,YAAY;AAEhC,SAAO,KAAK,QAAQ,aAAa;AAEjC,SAAO,KAAK,QAAQ,cAAc;AAElC,QAAM,IAAI,QAAQ,KAAK;AAEvB,SAAO,EAAE;AAAA;AASN,yBAAyB,YAAoB;AAChD,QAAM,WAAW;AAEjB,QAAM,YAAY;AAElB,QAAM,SAAS,WAAW,MAAM;AAEhC,QAAM,UAAU,WAAW,MAAM;AAEjC,MAAI,SAAS;AAEb,MAAI,OAAO;AAEX,MAAI,MAAM,QAAQ,SAAS;AACvB,aAAS,OAAO;AAAA;AAGpB,MAAI,MAAM,QAAQ,UAAU;AACxB,WAAO,QAAQ;AAAA;AAInB,MAAI,QAAQ;AACR,WAAO;AAAA;AAAA,KAAU,SAAS;AAAA;AAAA;AAAA;AAG9B;AAAA;;;AD1DJ,4BAA4B,QAAgB;AACxC,QAAM,kBAAkB,IAAI,gBAAgB;AAAA,IACxC,gBAAgB;AAAA,IAChB,IAAI;AAAA;AAGR,oBAAkB,IAAI;AAEtB,MAAI,eAAe;AAGnB,kBACK,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC;AAAA,IACT,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,QAAQ;AAEpB,YAAM,SAAS,QAAQ,OAAO,OAAO,QAAQ,MAAM,OAAO;AAE1D,UAAI,eAAe;AAEnB,UAAI,QAAQ;AACR,uBAAe,WAAW,KAAK;AAAA;AAGnC,YAAM,MAAM,SAAS,eAAe;AAEpC,aAAO,UAAU,MAAM;AAAA;AAAA,KAG9B,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC;AAAA,IACT,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,KAAK,aAAa,eAAe;AAE7C,aAAO,MAAM;AAAA;AAAA,MAAW;AAAA;AAAA,IAAc;AAAA;AAAA,KAG7C,QAAQ,SAAS;AAAA,IACd,QAAQ,CAAC,SAAsB;AAC3B,aACI,KAAK,QAAQ,kBAAkB,YAC/B,KAAK,UAAU,SAAS;AAAA;AAAA,IAGhC,YAAY,SAAS,OAAa;AAC9B,YAAM,OAAO;AAEb,YAAM,QAAQ,mBACV,KAAK,aAAa,iBAAiB;AAGvC,YAAM,IAAI,IAAI,IAAI,OAAO;AACzB,QAAE,OAAO,yBAAyB;AAElC,aAAO,QAAQ;AAAA;AAAA,OAAY,WAAW,EAAE;AAAA;AAAA,IAAe;AAAA;AAAA,KAG9D,QAAQ,cAAc;AAAA,IACnB,QAAQ;AAAA,IACR,aAAa,MAAM;AAAA,KAEtB,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC;AAAA,IACT,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,gBAAgB,KAAK;AACjC,aAAO,OAAO;AAAA;AAAA;AAI1B,SAAO;AAAA;;;AH3EX,IAAM,WAAW,CAAC,SAAiB;AAC/B,SAAO;AAAA,IACH;AAAA,IACA,SAAS;AAAA,IACT,KAAK,OAAO;AAAA;AAAA;AAMpB,sCACI,KACuB;AACvB,QAAM,IAAI,IAAI,IAAI;AAGlB,IAAE,aAAa,OAAO;AAEtB,MAAI,OAAuB,MAAM,MAC5B,QAAQ;AAAA,IACL,KAAK,EAAE;AAAA,IACP,QAAQ;AAAA,IACR,SAAS;AAAA,IACT,kBAAkB,KAAK;AACnB,aAAO;AAAA;AAAA,KAGd,KAAK,CAAC,QAAQ;AAjCvB;AAkCY,UAAM,IAAI,KAAK,IAAI;AAEnB,QAAI,QAAQ,EAAE,kBAAkB;AAEhC,YAAQ,MAAM,UAAU;AACxB,UAAM,SAAS,MAAM,KACjB,IAAI,IACA;AAAA,MACI,QAAE,2BAAF,mBAA0B,KAAK;AAAA,MAC/B,GAAG,EAAE,YAAY,OAAO,MAAM;AAAA,MAE7B,IAAI,CAAC,SAAU,OAAO,KAAK,SAAS,IACpC,OAAO,WAElB,KAAK;AAEP,UAAM,SAAS,EAAE;AACjB,UAAM,OAAO,OAAO;AAEpB,QAAI,QAAQ,KAAK,SAAS,GAAG;AACzB,UAAI,OAAM,mBAAmB,EAAE,KAAK,EAAE,QAAQ,SAAS;AAEvD,aAAM,MAAM;AAAA;AAAA,kBAAyB;AAAA;AAAA,IAAiB;AAEtD,aAAO;AAAA,QACH,SAAS;AAAA,QACT,MAAM,OAAO;AAAA,QACb,MAAM;AAAA,UACF;AAAA,UACA;AAAA,UACA,SAAS;AAAA;AAAA;AAAA;AAKrB,WAAO,SAAS,OAAO;AAAA,KAE1B,MAAM,CAAC,QAAQ;AACZ,YAAQ,IAAI;AACZ,WAAO;AAAA;AAGf,SAAO;AAAA;",
4
+ "sourcesContent": ["import axios from 'axios'\nimport { load } from 'cheerio'\nimport { errObj } from './error'\nimport { TurnDownResult, Status } from './type'\nimport { getTurnDownService } from './turndownCode'\n\nconst getError = (code: number) => {\n return {\n code,\n success: false,\n msg: errObj[code],\n }\n}\n\nexport { TurnDownResult, Status }\n\nexport async function parseHTML(htmlRaw: string, meta: { url: string }) {\n const $ = load(htmlRaw)\n\n let title = $('#activity-name').text()\n\n title = title.trim() || ''\n const author = Array.from(\n new Set(\n [\n $('meta[name=\"author\"]')?.attr('content'),\n ...$('#js_name').text().split('\\n'),\n ]\n .map((item) => (item ? item.trim() : ''))\n .filter(Boolean)\n )\n ).join('\\n')\n\n const htmlEl = $('#js_content')\n const html = htmlEl.html()\n\n if (html && html.length > 0) {\n let res = getTurnDownService(meta).turndown(html)\n\n res = `## ${title} \\n \\n` + `## \u4F5C\u8005 ${author} \\n \\n` + res\n\n return {\n success: true,\n code: Status.Success,\n data: {\n title,\n author,\n content: res,\n },\n }\n }\n\n return getError(Status.Fail)\n}\n\nexport default async function transformHtml2Markdown(\n url: string\n): Promise<TurnDownResult> {\n const u = new URL(url)\n // \u79FB\u9664\u8BE5\u53C2\u6570\n // \u907F\u514D\u51FA\u73B0 302 \u8DF3\u8F6C\n u.searchParams.delete('poc_token')\n\n try {\n const res = await axios.request({\n url: u.href,\n method: 'get',\n timeout: 30000,\n maxRedirects: 5,\n transformResponse(res) {\n return res\n },\n })\n return parseHTML(res.data, { url: u.href })\n } catch (err) {\n console.log(err)\n return getError(Status.Fail)\n }\n}\n\nexport { getTurnDownService } from './turndownCode'\n", "export const errObj: {\n [key: number]: string\n} = {\n '400': '\u5185\u5BB9\u89E3\u6790\u5931\u8D25',\n}\n", "export interface TurnDownResult {\n success: boolean\n code: number\n data?: {\n title?: string\n author?: string\n content?: string\n }\n msg?: string\n}\n\nexport const enum Status {\n Success = 200,\n Fail = 400,\n}\n", "/**\n * html \u8F6C\u6362 markdown \u683C\u5F0F\n */\nimport turnDownService from 'turndown'\nimport TurndownPluginGfm from '@guyplusplus/turndown-plugin-gfm'\nimport { formatCode, figure2markdown } from './formatHtml'\n\ninterface Params {\n url: string\n}\n\nfunction getTurnDownService(params: Params) {\n const turndownService = new turnDownService({\n codeBlockStyle: 'fenced',\n hr: '',\n })\n\n TurndownPluginGfm.gfm(turndownService)\n\n let videoCounter = 0\n\n // \u81EA\u5B9A\u4E49\u914D\u7F6E\n turndownService\n .addRule('pre2Code', {\n filter: ['pre'],\n replacement(content, node: any) {\n const len = content.length\n // \u5FAE\u4FE1\u6587\u7AE0\u83B7\u53D6\u5230\u7684 content\uFF0C \u4F1A\u51FA\u73B0\u9996\u5C3E\u90FD\u6709 '`'\n const isCode = content[0] === '`' && content[len - 1] === '`'\n\n let pre_Markdown = ''\n\n if (isCode) {\n pre_Markdown = formatCode(node.innerHTML)\n }\n\n const res = isCode ? pre_Markdown : content\n\n return '```\\n' + res + '\\n```\\n'\n },\n })\n .addRule('getImage', {\n filter: ['img'],\n replacement(content, node: any) {\n const src = node.getAttribute('data-src') || ''\n\n return src ? `\\n\\n![](${src}) \\n\\n` : ''\n },\n })\n .addRule('video', {\n filter: (node: HTMLElement) => {\n return (\n node.tagName.toLowerCase() === 'iframe' &&\n node.className.includes('video_iframe')\n )\n },\n replacement(content, _node: Node) {\n const node = _node as HTMLIFrameElement\n\n const cover = decodeURIComponent(\n node.getAttribute('data-cover') || ''\n )\n\n const u = new URL(params.url)\n u.hash = `js_mp_video_container_${videoCounter++}`\n\n return cover ? `\\n\\n[![](${cover})](${u.href}) \\n\\n` : ''\n },\n })\n .addRule('lineBreaks', {\n filter: 'br',\n replacement: () => '\\n',\n })\n .addRule('img2Code', {\n filter: ['figure'],\n replacement(content, node: any) {\n const res = figure2markdown(node.innerHTML)\n return res || ''\n },\n })\n\n return turndownService\n}\n\nexport { getTurnDownService }\n", "import cheerio from 'cheerio'\n\n/**\n * \u5FAE\u4FE1\u4E0D\u540C\u4EE3\u7801\u98CE\u683C\n * 1. <code><span>code</span></code>\n * 2. <code><span><span>123</span><br></span></code>\n * turndown \u4E0D\u89E3\u6790 code \u4E0B\u7684 br \u6807\u7B7E\uFF0C\u9700\u8981\u4F7F\u7528\u6B63\u5219\u66FF\u6362 br \u6807\u7B7E\u4E3A \\n \u624D\u53EF\u4EE5\u7EE7\u7EED\u89E3\u6790\n * @param htmlStr\n * @returns\n */\n\nexport function formatCode(htmlStr: string) {\n let code = htmlStr\n\n code = code.replace(/<br>/gi, '\\n')\n\n code = code.replace(/&nbsp;/gi, ' ')\n\n code = code.replace(/&lt;/gi, '<')\n\n code = code.replace(/&gt;/gi, '>')\n\n code = code.replace(/&amp;/gi, '&')\n\n code = code.replace(/&quot;/gi, '\"')\n\n code = code.replace(/&apos;/gi, '\u2018')\n\n code = code.replace(/&times;/gi, '*')\n\n code = code.replace(/&divide;/gi, '%')\n\n const $ = cheerio.load(code)\n\n return $.text()\n}\n\n/**\n * \u89E3\u51B3\u5982\u4E0B\u683C\u5F0F\n * <figcaption><img><figcaption></figcaption></figcaption>\n * @param figureHTML\n * @returns\n */\nexport function figure2markdown(figureHTML: string) {\n const imgRegex = /<img.*?data-src=['\"](.*?)['\"]/\n\n const descRegex = /\\<figcaption .*?>(.+)<\\/figcaption>/\n\n const imgArr = figureHTML.match(imgRegex)\n\n const descArr = figureHTML.match(descRegex)\n\n let imgUrl = ''\n\n let desc = ''\n\n if (Array.isArray(imgArr)) {\n imgUrl = imgArr[1]\n }\n\n if (Array.isArray(descArr)) {\n desc = descArr[1]\n }\n\n // img \u53EF\u80FD\u6CA1\u6709\u56FE\u7247\u8BF4\u660E\n if (imgUrl) {\n return `\\n\\n ![${desc}](${imgUrl}) \\n\\n`\n }\n\n return\n}\n"],
5
+ "mappings": ";AAAA;AACA;;;ACDO,IAAM,SAET;AAAA,EACA,OAAO;AAAA;;;ACQJ,IAAW;AAAX,UAAW,SAAX;AACH,+BAAU,OAAV;AACA,4BAAO,OAAP;AAAA,GAFc;;;ACRlB;AACA;;;ACJA;AAWO,oBAAoB,SAAiB;AACxC,MAAI,OAAO;AAEX,SAAO,KAAK,QAAQ,UAAU;AAE9B,SAAO,KAAK,QAAQ,YAAY;AAEhC,SAAO,KAAK,QAAQ,UAAU;AAE9B,SAAO,KAAK,QAAQ,UAAU;AAE9B,SAAO,KAAK,QAAQ,WAAW;AAE/B,SAAO,KAAK,QAAQ,YAAY;AAEhC,SAAO,KAAK,QAAQ,YAAY;AAEhC,SAAO,KAAK,QAAQ,aAAa;AAEjC,SAAO,KAAK,QAAQ,cAAc;AAElC,QAAM,IAAI,QAAQ,KAAK;AAEvB,SAAO,EAAE;AAAA;AASN,yBAAyB,YAAoB;AAChD,QAAM,WAAW;AAEjB,QAAM,YAAY;AAElB,QAAM,SAAS,WAAW,MAAM;AAEhC,QAAM,UAAU,WAAW,MAAM;AAEjC,MAAI,SAAS;AAEb,MAAI,OAAO;AAEX,MAAI,MAAM,QAAQ,SAAS;AACvB,aAAS,OAAO;AAAA;AAGpB,MAAI,MAAM,QAAQ,UAAU;AACxB,WAAO,QAAQ;AAAA;AAInB,MAAI,QAAQ;AACR,WAAO;AAAA;AAAA,KAAU,SAAS;AAAA;AAAA;AAAA;AAG9B;AAAA;;;AD1DJ,4BAA4B,QAAgB;AACxC,QAAM,kBAAkB,IAAI,gBAAgB;AAAA,IACxC,gBAAgB;AAAA,IAChB,IAAI;AAAA;AAGR,oBAAkB,IAAI;AAEtB,MAAI,eAAe;AAGnB,kBACK,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC;AAAA,IACT,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,QAAQ;AAEpB,YAAM,SAAS,QAAQ,OAAO,OAAO,QAAQ,MAAM,OAAO;AAE1D,UAAI,eAAe;AAEnB,UAAI,QAAQ;AACR,uBAAe,WAAW,KAAK;AAAA;AAGnC,YAAM,MAAM,SAAS,eAAe;AAEpC,aAAO,UAAU,MAAM;AAAA;AAAA,KAG9B,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC;AAAA,IACT,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,KAAK,aAAa,eAAe;AAE7C,aAAO,MAAM;AAAA;AAAA,MAAW;AAAA;AAAA,IAAc;AAAA;AAAA,KAG7C,QAAQ,SAAS;AAAA,IACd,QAAQ,CAAC,SAAsB;AAC3B,aACI,KAAK,QAAQ,kBAAkB,YAC/B,KAAK,UAAU,SAAS;AAAA;AAAA,IAGhC,YAAY,SAAS,OAAa;AAC9B,YAAM,OAAO;AAEb,YAAM,QAAQ,mBACV,KAAK,aAAa,iBAAiB;AAGvC,YAAM,IAAI,IAAI,IAAI,OAAO;AACzB,QAAE,OAAO,yBAAyB;AAElC,aAAO,QAAQ;AAAA;AAAA,OAAY,WAAW,EAAE;AAAA;AAAA,IAAe;AAAA;AAAA,KAG9D,QAAQ,cAAc;AAAA,IACnB,QAAQ;AAAA,IACR,aAAa,MAAM;AAAA,KAEtB,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC;AAAA,IACT,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,gBAAgB,KAAK;AACjC,aAAO,OAAO;AAAA;AAAA;AAI1B,SAAO;AAAA;;;AH3EX,IAAM,WAAW,CAAC,SAAiB;AAC/B,SAAO;AAAA,IACH;AAAA,IACA,SAAS;AAAA,IACT,KAAK,OAAO;AAAA;AAAA;AAMpB,yBAAgC,SAAiB,MAAuB;AAhBxE;AAiBI,QAAM,IAAI,KAAK;AAEf,MAAI,QAAQ,EAAE,kBAAkB;AAEhC,UAAQ,MAAM,UAAU;AACxB,QAAM,SAAS,MAAM,KACjB,IAAI,IACA;AAAA,IACI,QAAE,2BAAF,mBAA0B,KAAK;AAAA,IAC/B,GAAG,EAAE,YAAY,OAAO,MAAM;AAAA,IAE7B,IAAI,CAAC,SAAU,OAAO,KAAK,SAAS,IACpC,OAAO,WAElB,KAAK;AAEP,QAAM,SAAS,EAAE;AACjB,QAAM,OAAO,OAAO;AAEpB,MAAI,QAAQ,KAAK,SAAS,GAAG;AACzB,QAAI,MAAM,mBAAmB,MAAM,SAAS;AAE5C,UAAM,MAAM;AAAA;AAAA,kBAAyB;AAAA;AAAA,IAAiB;AAEtD,WAAO;AAAA,MACH,SAAS;AAAA,MACT,MAAM,OAAO;AAAA,MACb,MAAM;AAAA,QACF;AAAA,QACA;AAAA,QACA,SAAS;AAAA;AAAA;AAAA;AAKrB,SAAO,SAAS,OAAO;AAAA;AAG3B,sCACI,KACuB;AACvB,QAAM,IAAI,IAAI,IAAI;AAGlB,IAAE,aAAa,OAAO;AAEtB,MAAI;AACA,UAAM,MAAM,MAAM,MAAM,QAAQ;AAAA,MAC5B,KAAK,EAAE;AAAA,MACP,QAAQ;AAAA,MACR,SAAS;AAAA,MACT,cAAc;AAAA,MACd,kBAAkB,MAAK;AACnB,eAAO;AAAA;AAAA;AAGf,WAAO,UAAU,IAAI,MAAM,EAAE,KAAK,EAAE;AAAA,WAC/B,KAAP;AACE,YAAQ,IAAI;AACZ,WAAO,SAAS,OAAO;AAAA;AAAA;",
6
6
  "names": []
7
7
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@aiello/wechat-to-markdown",
3
- "version": "1.2.8",
3
+ "version": "1.2.9",
4
4
  "description": "解析微信文章 URL 为 markdown",
5
5
  "author": "Aiello Chan<aiello.chan@gmail.com>",
6
6
  "keywords": [